diff --git a/.arcconfig b/.arcconfig
index 048706a..b258e7a 100644
--- a/.arcconfig
+++ b/.arcconfig
@@ -1,4 +1,4 @@
 {
-  "project_id" : "clang",
+  "repository.callsign" : "C",
   "conduit_uri" : "https://reviews.llvm.org/"
 }
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 42d5800..a2fcaec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,6 +132,9 @@
     if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
       # Note: path not really used, except for checking if lit was found
       set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+      if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit)
+        add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit utils/llvm-lit)
+      endif()
       if(NOT LLVM_UTILS_PROVIDED)
         add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/FileCheck utils/FileCheck)
         add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/count utils/count)
@@ -232,17 +235,21 @@
     "Default runtime library to use (\"libgcc\" or \"compiler-rt\", empty for platform default)" FORCE)
 endif()
 
+set(CLANG_DEFAULT_OBJCOPY "objcopy" CACHE STRING
+  "Default objcopy executable to use.")
+
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
-# OpenMP offloading requires at least sm_30 because we use shuffle instructions
-# to generate efficient code for reductions.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+# OpenMP offloading requires at least sm_35 because we use shuffle instructions
+# to generate efficient code for reductions and the atomicMax instruction on
+# 64-bit integers in the implementation of conditional lastprivate.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
   "Default architecture for OpenMP offloading to Nvidia GPUs.")
 string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
-  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
-  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
     "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
 endif()
 
@@ -429,6 +436,7 @@
 # All targets below may depend on all tablegen'd files.
 get_property(CLANG_TABLEGEN_TARGETS GLOBAL PROPERTY CLANG_TABLEGEN_TARGETS)
 add_custom_target(clang-tablegen-targets DEPENDS ${CLANG_TABLEGEN_TARGETS})
+set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "Misc")
 list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets)
 
 # Force target to be built as soon as possible. Clang modules builds depend
@@ -533,8 +541,8 @@
     set(NEXT_CLANG_STAGE ${NEXT_CLANG_STAGE}-instrumented)
   endif()
   message(STATUS "Setting next clang stage to: ${NEXT_CLANG_STAGE}")
-  
-  
+
+
   set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-stamps/)
   set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-bins/)
 
@@ -596,7 +604,9 @@
     LLVM_BINUTILS_INCDIR
     CLANG_REPOSITORY_STRING
     CMAKE_MAKE_PROGRAM
-    CMAKE_OSX_ARCHITECTURES)
+    CMAKE_OSX_ARCHITECTURES
+    LLVM_ENABLE_PROJECTS
+    LLVM_ENABLE_RUNTIMES)
 
   # We don't need to depend on compiler-rt if we're building instrumented
   # because the next stage will use the same compiler used to build this stage.
@@ -614,7 +624,8 @@
   set(COMPILER_OPTIONS
     -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${CXX_COMPILER}
     -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER}
-    -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER})
+    -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER}
+    -DCMAKE_ASM_COMPILER_ID=Clang)
 
   if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
     add_dependencies(clang-bootstrap-deps llvm-profdata)
@@ -646,7 +657,7 @@
   foreach(variableName ${variableNames})
     if(variableName MATCHES "^BOOTSTRAP_")
       string(SUBSTRING ${variableName} 10 -1 varName)
-      string(REPLACE ";" "\;" value "${${variableName}}")
+      string(REPLACE ";" "|" value "${${variableName}}")
       list(APPEND PASSTHROUGH_VARIABLES
         -D${varName}=${value})
     endif()
@@ -662,7 +673,7 @@
       if("${${variableName}}" STREQUAL "")
         set(value "")
       else()
-        string(REPLACE ";" "\;" value ${${variableName}})
+        string(REPLACE ";" "|" value "${${variableName}}")
       endif()
       list(APPEND PASSTHROUGH_VARIABLES
         -D${variableName}=${value})
@@ -690,6 +701,7 @@
     USES_TERMINAL_CONFIGURE 1
     USES_TERMINAL_BUILD 1
     USES_TERMINAL_INSTALL 1
+    LIST_SEPARATOR |
     )
 
   # exclude really-install from main target
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 0aa156b..2fa9f21 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -25,6 +25,10 @@
 E: echristo@gmail.com
 D: Debug Information, inline assembly
 
+N: Devin Coughlin
+E: dcoughlin@apple.com
+D: Clang Static Analyzer
+
 N: Doug Gregor
 E: dgregor@apple.com
 D: Emeritus owner
@@ -41,10 +45,6 @@
 E: anton@korobeynikov.info
 D: Exception handling, Windows codegen, ARM EABI
 
-N: Anna Zaks
-E: ganna@apple.com
-D: Clang Static Analyzer
-
 N: John McCall
 E: rjmccall@apple.com
 D: Clang LLVM IR generation
diff --git a/README.txt b/README.txt
index 5477b9c..b5f33bb 100644
--- a/README.txt
+++ b/README.txt
@@ -13,10 +13,10 @@
 If you're interested in more (including how to build Clang) it is best to read
 the relevant web sites.  Here are some pointers:
 
-Information on Clang:              http://clang.llvm.org/
-Building and using Clang:          http://clang.llvm.org/get_started.html
-Clang Static Analyzer:             http://clang-analyzer.llvm.org/
-Information on the LLVM project:   http://llvm.org/
+Information on Clang:             http://clang.llvm.org/
+Building and using Clang:         http://clang.llvm.org/get_started.html
+Clang Static Analyzer:            http://clang-analyzer.llvm.org/
+Information on the LLVM project:  http://llvm.org/
 
 If you have questions or comments about Clang, a great place to discuss them is
 on the Clang development mailing list:
diff --git a/bindings/python/README.txt b/bindings/python/README.txt
index 742cf8f..8a0bf99 100644
--- a/bindings/python/README.txt
+++ b/bindings/python/README.txt
@@ -5,11 +5,12 @@
 This directory implements Python bindings for Clang.
 
 You may need to alter LD_LIBRARY_PATH so that the Clang library can be
-found. The unit tests are designed to be run with 'nosetests'. For example:
+found. The unit tests are designed to be run with any standard test
+runner. For example:
 --
 $ env PYTHONPATH=$(echo ~/llvm/tools/clang/bindings/python/) \
       LD_LIBRARY_PATH=$(llvm-config --libdir) \
-  nosetests -v
+  python -m unittest discover -v
 tests.cindex.test_index.test_create ... ok
 ...
 
diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py
index 1dc1760..b53661a 100644
--- a/bindings/python/clang/cindex.py
+++ b/bindings/python/clang/cindex.py
@@ -94,6 +94,9 @@
                 return cls(param)
             if isinstance(param, bytes):
                 return cls(param)
+            if param is None:
+                # Support passing null to C functions expecting char arrays
+                return None
             raise TypeError("Cannot convert '{}' to '{}'".format(type(param).__name__, cls.__name__))
 
         @staticmethod
@@ -1476,6 +1479,12 @@
         """
         return conf.lib.clang_CXXMethod_isVirtual(self)
 
+    def is_abstract_record(self):
+        """Returns True if the cursor refers to a C++ record declaration
+        that has pure virtual member functions.
+        """
+        return conf.lib.clang_CXXRecord_isAbstract(self)
+
     def is_scoped_enum(self):
         """Returns True if the cursor refers to a scoped enum declaration.
         """
@@ -3398,6 +3407,10 @@
    [Cursor],
    bool),
 
+  ("clang_CXXRecord_isAbstract",
+   [Cursor],
+   bool),
+
   ("clang_EnumDecl_isScoped",
    [Cursor],
    bool),
diff --git a/bindings/python/tests/cindex/test_access_specifiers.py b/bindings/python/tests/cindex/test_access_specifiers.py
index cfa04dc..2f6144b 100644
--- a/bindings/python/tests/cindex/test_access_specifiers.py
+++ b/bindings/python/tests/cindex/test_access_specifiers.py
@@ -6,10 +6,14 @@
 from .util import get_cursor
 from .util import get_tu
 
-def test_access_specifiers():
-    """Ensure that C++ access specifiers are available on cursors"""
+import unittest
 
-    tu = get_tu("""
+
+class TestAccessSpecifiers(unittest.TestCase):
+    def test_access_specifiers(self):
+        """Ensure that C++ access specifiers are available on cursors"""
+
+        tu = get_tu("""
 class test_class {
 public:
   void public_member_function();
@@ -20,15 +24,14 @@
 };
 """, lang = 'cpp')
 
-    test_class = get_cursor(tu, "test_class")
-    assert test_class.access_specifier == AccessSpecifier.INVALID;
+        test_class = get_cursor(tu, "test_class")
+        self.assertEqual(test_class.access_specifier, AccessSpecifier.INVALID)
 
-    public = get_cursor(tu.cursor, "public_member_function")
-    assert public.access_specifier == AccessSpecifier.PUBLIC
+        public = get_cursor(tu.cursor, "public_member_function")
+        self.assertEqual(public.access_specifier, AccessSpecifier.PUBLIC)
 
-    protected = get_cursor(tu.cursor, "protected_member_function")
-    assert protected.access_specifier == AccessSpecifier.PROTECTED
+        protected = get_cursor(tu.cursor, "protected_member_function")
+        self.assertEqual(protected.access_specifier, AccessSpecifier.PROTECTED)
 
-    private = get_cursor(tu.cursor, "private_member_function")
-    assert private.access_specifier == AccessSpecifier.PRIVATE
-
+        private = get_cursor(tu.cursor, "private_member_function")
+        self.assertEqual(private.access_specifier, AccessSpecifier.PRIVATE)
diff --git a/bindings/python/tests/cindex/test_cdb.py b/bindings/python/tests/cindex/test_cdb.py
index 35fe3e1..bd6e773 100644
--- a/bindings/python/tests/cindex/test_cdb.py
+++ b/bindings/python/tests/cindex/test_cdb.py
@@ -4,114 +4,116 @@
 from clang.cindex import CompileCommand
 import os
 import gc
+import unittest
+
 
 kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS')
 
-def test_create_fail():
-    """Check we fail loading a database with an assertion"""
-    path = os.path.dirname(__file__)
-    try:
-      cdb = CompilationDatabase.fromDirectory(path)
-    except CompilationDatabaseError as e:
-      assert e.cdb_error == CompilationDatabaseError.ERROR_CANNOTLOADDATABASE
-    else:
-      assert False
 
-def test_create():
-    """Check we can load a compilation database"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
+class TestCDB(unittest.TestCase):
+    def test_create_fail(self):
+        """Check we fail loading a database with an assertion"""
+        path = os.path.dirname(__file__)
+        with self.assertRaises(CompilationDatabaseError) as cm:
+            cdb = CompilationDatabase.fromDirectory(path)
+        e = cm.exception
+        self.assertEqual(e.cdb_error,
+            CompilationDatabaseError.ERROR_CANNOTLOADDATABASE)
 
-def test_lookup_fail():
-    """Check file lookup failure"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    assert cdb.getCompileCommands('file_do_not_exist.cpp') == None
+    def test_create(self):
+        """Check we can load a compilation database"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
 
-def test_lookup_succeed():
-    """Check we get some results if the file exists in the db"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
-    assert len(cmds) != 0
+    def test_lookup_fail(self):
+        """Check file lookup failure"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        self.assertIsNone(cdb.getCompileCommands('file_do_not_exist.cpp'))
 
-def test_all_compilecommand():
-    """Check we get all results from the db"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    cmds = cdb.getAllCompileCommands()
-    assert len(cmds) == 3
-    expected = [
-        { 'wd': '/home/john.doe/MyProject',
-          'file': '/home/john.doe/MyProject/project.cpp',
-          'line': ['clang++', '-o', 'project.o', '-c',
-                   '/home/john.doe/MyProject/project.cpp']},
-        { 'wd': '/home/john.doe/MyProjectA',
-          'file': '/home/john.doe/MyProject/project2.cpp',
-          'line': ['clang++', '-o', 'project2.o', '-c',
-                   '/home/john.doe/MyProject/project2.cpp']},
-        { 'wd': '/home/john.doe/MyProjectB',
-          'file': '/home/john.doe/MyProject/project2.cpp',
-          'line': ['clang++', '-DFEATURE=1', '-o', 'project2-feature.o', '-c',
-                   '/home/john.doe/MyProject/project2.cpp']},
+    def test_lookup_succeed(self):
+        """Check we get some results if the file exists in the db"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
+        self.assertNotEqual(len(cmds), 0)
 
-        ]
-    for i in range(len(cmds)):
-        assert cmds[i].directory == expected[i]['wd']
-        assert cmds[i].filename == expected[i]['file']
-        for arg, exp in zip(cmds[i].arguments, expected[i]['line']):
-            assert arg == exp
+    def test_all_compilecommand(self):
+        """Check we get all results from the db"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        cmds = cdb.getAllCompileCommands()
+        self.assertEqual(len(cmds), 3)
+        expected = [
+            { 'wd': '/home/john.doe/MyProject',
+              'file': '/home/john.doe/MyProject/project.cpp',
+              'line': ['clang++', '-o', 'project.o', '-c',
+                       '/home/john.doe/MyProject/project.cpp']},
+            { 'wd': '/home/john.doe/MyProjectA',
+              'file': '/home/john.doe/MyProject/project2.cpp',
+              'line': ['clang++', '-o', 'project2.o', '-c',
+                       '/home/john.doe/MyProject/project2.cpp']},
+            { 'wd': '/home/john.doe/MyProjectB',
+              'file': '/home/john.doe/MyProject/project2.cpp',
+              'line': ['clang++', '-DFEATURE=1', '-o', 'project2-feature.o', '-c',
+                       '/home/john.doe/MyProject/project2.cpp']},
 
-def test_1_compilecommand():
-    """Check file with single compile command"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    file = '/home/john.doe/MyProject/project.cpp'
-    cmds = cdb.getCompileCommands(file)
-    assert len(cmds) == 1
-    assert cmds[0].directory == os.path.dirname(file)
-    assert cmds[0].filename == file
-    expected = [ 'clang++', '-o', 'project.o', '-c',
-                 '/home/john.doe/MyProject/project.cpp']
-    for arg, exp in zip(cmds[0].arguments, expected):
-        assert arg == exp
+            ]
+        for i in range(len(cmds)):
+            self.assertEqual(cmds[i].directory, expected[i]['wd'])
+            self.assertEqual(cmds[i].filename, expected[i]['file'])
+            for arg, exp in zip(cmds[i].arguments, expected[i]['line']):
+                self.assertEqual(arg, exp)
 
-def test_2_compilecommand():
-    """Check file with 2 compile commands"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project2.cpp')
-    assert len(cmds) == 2
-    expected = [
-        { 'wd': '/home/john.doe/MyProjectA',
-          'line': ['clang++', '-o', 'project2.o', '-c',
-                   '/home/john.doe/MyProject/project2.cpp']},
-        { 'wd': '/home/john.doe/MyProjectB',
-          'line': ['clang++', '-DFEATURE=1', '-o', 'project2-feature.o', '-c',
-                   '/home/john.doe/MyProject/project2.cpp']}
-        ]
-    for i in range(len(cmds)):
-        assert cmds[i].directory == expected[i]['wd']
-        for arg, exp in zip(cmds[i].arguments, expected[i]['line']):
-            assert arg == exp
+    def test_1_compilecommand(self):
+        """Check file with single compile command"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        file = '/home/john.doe/MyProject/project.cpp'
+        cmds = cdb.getCompileCommands(file)
+        self.assertEqual(len(cmds), 1)
+        self.assertEqual(cmds[0].directory, os.path.dirname(file))
+        self.assertEqual(cmds[0].filename, file)
+        expected = [ 'clang++', '-o', 'project.o', '-c',
+                     '/home/john.doe/MyProject/project.cpp']
+        for arg, exp in zip(cmds[0].arguments, expected):
+            self.assertEqual(arg, exp)
 
-def test_compilecommand_iterator_stops():
-    """Check that iterator stops after the correct number of elements"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    count = 0
-    for cmd in cdb.getCompileCommands('/home/john.doe/MyProject/project2.cpp'):
-        count += 1
-        assert count <= 2
+    def test_2_compilecommand(self):
+        """Check file with 2 compile commands"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project2.cpp')
+        self.assertEqual(len(cmds), 2)
+        expected = [
+            { 'wd': '/home/john.doe/MyProjectA',
+              'line': ['clang++', '-o', 'project2.o', '-c',
+                       '/home/john.doe/MyProject/project2.cpp']},
+            { 'wd': '/home/john.doe/MyProjectB',
+              'line': ['clang++', '-DFEATURE=1', '-o', 'project2-feature.o', '-c',
+                       '/home/john.doe/MyProject/project2.cpp']}
+            ]
+        for i in range(len(cmds)):
+            self.assertEqual(cmds[i].directory, expected[i]['wd'])
+            for arg, exp in zip(cmds[i].arguments, expected[i]['line']):
+                self.assertEqual(arg, exp)
 
-def test_compilationDB_references():
-    """Ensure CompilationsCommands are independent of the database"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
-    del cdb
-    gc.collect()
-    workingdir = cmds[0].directory
+    def test_compilecommand_iterator_stops(self):
+        """Check that iterator stops after the correct number of elements"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        count = 0
+        for cmd in cdb.getCompileCommands('/home/john.doe/MyProject/project2.cpp'):
+            count += 1
+            self.assertLessEqual(count, 2)
 
-def test_compilationCommands_references():
-    """Ensure CompilationsCommand keeps a reference to CompilationCommands"""
-    cdb = CompilationDatabase.fromDirectory(kInputsDir)
-    cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
-    del cdb
-    cmd0 = cmds[0]
-    del cmds
-    gc.collect()
-    workingdir = cmd0.directory
+    def test_compilationDB_references(self):
+        """Ensure CompilationsCommands are independent of the database"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
+        del cdb
+        gc.collect()
+        workingdir = cmds[0].directory
 
+    def test_compilationCommands_references(self):
+        """Ensure CompilationsCommand keeps a reference to CompilationCommands"""
+        cdb = CompilationDatabase.fromDirectory(kInputsDir)
+        cmds = cdb.getCompileCommands('/home/john.doe/MyProject/project.cpp')
+        del cdb
+        cmd0 = cmds[0]
+        del cmds
+        gc.collect()
+        workingdir = cmd0.directory
diff --git a/bindings/python/tests/cindex/test_code_completion.py b/bindings/python/tests/cindex/test_code_completion.py
index 357d50d..a56bb30 100644
--- a/bindings/python/tests/cindex/test_code_completion.py
+++ b/bindings/python/tests/cindex/test_code_completion.py
@@ -1,16 +1,20 @@
 from clang.cindex import TranslationUnit
 
-def check_completion_results(cr, expected):
-    assert cr is not None
-    assert len(cr.diagnostics) == 0
+import unittest
 
-    completions = [str(c) for c in cr.results]
 
-    for c in expected:
-        assert c in completions
+class TestCodeCompletion(unittest.TestCase):
+    def check_completion_results(self, cr, expected):
+        self.assertIsNotNone(cr)
+        self.assertEqual(len(cr.diagnostics), 0)
 
-def test_code_complete():
-    files = [('fake.c', """
+        completions = [str(c) for c in cr.results]
+
+        for c in expected:
+            self.assertIn(c, completions)
+
+    def test_code_complete(self):
+        files = [('fake.c', """
 /// Aaa.
 int test1;
 
@@ -22,20 +26,20 @@
 }
 """)]
 
-    tu = TranslationUnit.from_source('fake.c', ['-std=c99'], unsaved_files=files,
-            options=TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION)
+        tu = TranslationUnit.from_source('fake.c', ['-std=c99'], unsaved_files=files,
+                options=TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION)
 
-    cr = tu.codeComplete('fake.c', 9, 1, unsaved_files=files, include_brief_comments=True)
+        cr = tu.codeComplete('fake.c', 9, 1, unsaved_files=files, include_brief_comments=True)
 
-    expected = [
-      "{'int', ResultType} | {'test1', TypedText} || Priority: 50 || Availability: Available || Brief comment: Aaa.",
-      "{'void', ResultType} | {'test2', TypedText} | {'(', LeftParen} | {')', RightParen} || Priority: 50 || Availability: Available || Brief comment: Bbb.",
-      "{'return', TypedText} || Priority: 40 || Availability: Available || Brief comment: None"
-    ]
-    check_completion_results(cr, expected)
+        expected = [
+          "{'int', ResultType} | {'test1', TypedText} || Priority: 50 || Availability: Available || Brief comment: Aaa.",
+          "{'void', ResultType} | {'test2', TypedText} | {'(', LeftParen} | {')', RightParen} || Priority: 50 || Availability: Available || Brief comment: Bbb.",
+          "{'return', TypedText} || Priority: 40 || Availability: Available || Brief comment: None"
+        ]
+        self.check_completion_results(cr, expected)
 
-def test_code_complete_availability():
-    files = [('fake.cpp', """
+    def test_code_complete_availability(self):
+        files = [('fake.cpp', """
 class P {
 protected:
   int member;
@@ -52,24 +56,24 @@
 }
 """)]
 
-    tu = TranslationUnit.from_source('fake.cpp', ['-std=c++98'], unsaved_files=files)
+        tu = TranslationUnit.from_source('fake.cpp', ['-std=c++98'], unsaved_files=files)
 
-    cr = tu.codeComplete('fake.cpp', 12, 5, unsaved_files=files)
+        cr = tu.codeComplete('fake.cpp', 12, 5, unsaved_files=files)
 
-    expected = [
-      "{'const', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
-      "{'volatile', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
-      "{'operator', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
-      "{'P', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None",
-      "{'Q', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None"
-    ]
-    check_completion_results(cr, expected)
+        expected = [
+          "{'const', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
+          "{'volatile', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
+          "{'operator', TypedText} || Priority: 40 || Availability: Available || Brief comment: None",
+          "{'P', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None",
+          "{'Q', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None"
+        ]
+        self.check_completion_results(cr, expected)
 
-    cr = tu.codeComplete('fake.cpp', 13, 5, unsaved_files=files)
-    expected = [
-        "{'P', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None",
-        "{'P &', ResultType} | {'operator=', TypedText} | {'(', LeftParen} | {'const P &', Placeholder} | {')', RightParen} || Priority: 34 || Availability: Available || Brief comment: None",
-        "{'int', ResultType} | {'member', TypedText} || Priority: 35 || Availability: NotAccessible || Brief comment: None",
-        "{'void', ResultType} | {'~P', TypedText} | {'(', LeftParen} | {')', RightParen} || Priority: 34 || Availability: Available || Brief comment: None"
-    ]
-    check_completion_results(cr, expected)
+        cr = tu.codeComplete('fake.cpp', 13, 5, unsaved_files=files)
+        expected = [
+            "{'P', TypedText} | {'::', Text} || Priority: 75 || Availability: Available || Brief comment: None",
+            "{'P &', ResultType} | {'operator=', TypedText} | {'(', LeftParen} | {'const P &', Placeholder} | {')', RightParen} || Priority: 79 || Availability: Available || Brief comment: None",
+            "{'int', ResultType} | {'member', TypedText} || Priority: 35 || Availability: NotAccessible || Brief comment: None",
+            "{'void', ResultType} | {'~P', TypedText} | {'(', LeftParen} | {')', RightParen} || Priority: 79 || Availability: Available || Brief comment: None"
+        ]
+        self.check_completion_results(cr, expected)
diff --git a/bindings/python/tests/cindex/test_comment.py b/bindings/python/tests/cindex/test_comment.py
index d8f3129..d6c6d8e 100644
--- a/bindings/python/tests/cindex/test_comment.py
+++ b/bindings/python/tests/cindex/test_comment.py
@@ -1,8 +1,12 @@
 from clang.cindex import TranslationUnit
 from tests.cindex.util import get_cursor
 
-def test_comment():
-    files = [('fake.c', """
+import unittest
+
+
+class TestComment(unittest.TestCase):
+    def test_comment(self):
+        files = [('fake.c', """
 /// Aaa.
 int test1;
 
@@ -14,27 +18,25 @@
 
 }
 """)]
-    # make a comment-aware TU
-    tu = TranslationUnit.from_source('fake.c', ['-std=c99'], unsaved_files=files,
-            options=TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION)
-    test1 = get_cursor(tu, 'test1')
-    assert test1 is not None, "Could not find test1."
-    assert test1.type.is_pod()
-    raw = test1.raw_comment
-    brief = test1.brief_comment
-    assert raw == """/// Aaa."""
-    assert brief == """Aaa."""
-    
-    test2 = get_cursor(tu, 'test2')
-    raw = test2.raw_comment
-    brief = test2.brief_comment
-    assert raw == """/// Bbb.\n/// x"""
-    assert brief == """Bbb. x"""
-    
-    f = get_cursor(tu, 'f')
-    raw = f.raw_comment
-    brief = f.brief_comment
-    assert raw is None
-    assert brief is None
+        # make a comment-aware TU
+        tu = TranslationUnit.from_source('fake.c', ['-std=c99'], unsaved_files=files,
+                options=TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION)
+        test1 = get_cursor(tu, 'test1')
+        self.assertIsNotNone(test1, "Could not find test1.")
+        self.assertTrue(test1.type.is_pod())
+        raw = test1.raw_comment
+        brief = test1.brief_comment
+        self.assertEqual(raw, """/// Aaa.""")
+        self.assertEqual(brief, """Aaa.""")
 
+        test2 = get_cursor(tu, 'test2')
+        raw = test2.raw_comment
+        brief = test2.brief_comment
+        self.assertEqual(raw, """/// Bbb.\n/// x""")
+        self.assertEqual(brief, """Bbb. x""")
 
+        f = get_cursor(tu, 'f')
+        raw = f.raw_comment
+        brief = f.brief_comment
+        self.assertIsNone(raw)
+        self.assertIsNone(brief)
diff --git a/bindings/python/tests/cindex/test_cursor.py b/bindings/python/tests/cindex/test_cursor.py
index 8ff0695..c2a4eb5 100644
--- a/bindings/python/tests/cindex/test_cursor.py
+++ b/bindings/python/tests/cindex/test_cursor.py
@@ -1,5 +1,6 @@
 import ctypes
 import gc
+import unittest
 
 from clang.cindex import AvailabilityKind
 from clang.cindex import CursorKind
@@ -10,6 +11,7 @@
 from .util import get_cursors
 from .util import get_tu
 
+
 kInput = """\
 struct s0 {
   int a;
@@ -30,257 +32,6 @@
 }
 """
 
-def test_get_children():
-    tu = get_tu(kInput)
-
-    it = tu.cursor.get_children()
-    tu_nodes = list(it)
-
-    assert len(tu_nodes) == 3
-    for cursor in tu_nodes:
-        assert cursor.translation_unit is not None
-
-    assert tu_nodes[0] != tu_nodes[1]
-    assert tu_nodes[0].kind == CursorKind.STRUCT_DECL
-    assert tu_nodes[0].spelling == 's0'
-    assert tu_nodes[0].is_definition() == True
-    assert tu_nodes[0].location.file.name == 't.c'
-    assert tu_nodes[0].location.line == 1
-    assert tu_nodes[0].location.column == 8
-    assert tu_nodes[0].hash > 0
-    assert tu_nodes[0].translation_unit is not None
-
-    s0_nodes = list(tu_nodes[0].get_children())
-    assert len(s0_nodes) == 2
-    assert s0_nodes[0].kind == CursorKind.FIELD_DECL
-    assert s0_nodes[0].spelling == 'a'
-    assert s0_nodes[0].type.kind == TypeKind.INT
-    assert s0_nodes[1].kind == CursorKind.FIELD_DECL
-    assert s0_nodes[1].spelling == 'b'
-    assert s0_nodes[1].type.kind == TypeKind.INT
-
-    assert tu_nodes[1].kind == CursorKind.STRUCT_DECL
-    assert tu_nodes[1].spelling == 's1'
-    assert tu_nodes[1].displayname == 's1'
-    assert tu_nodes[1].is_definition() == False
-
-    assert tu_nodes[2].kind == CursorKind.FUNCTION_DECL
-    assert tu_nodes[2].spelling == 'f0'
-    assert tu_nodes[2].displayname == 'f0(int, int)'
-    assert tu_nodes[2].is_definition() == True
-
-def test_references():
-    """Ensure that references to TranslationUnit are kept."""
-    tu = get_tu('int x;')
-    cursors = list(tu.cursor.get_children())
-    assert len(cursors) > 0
-
-    cursor = cursors[0]
-    assert isinstance(cursor.translation_unit, TranslationUnit)
-
-    # Delete reference to TU and perform a full GC.
-    del tu
-    gc.collect()
-    assert isinstance(cursor.translation_unit, TranslationUnit)
-
-    # If the TU was destroyed, this should cause a segfault.
-    parent = cursor.semantic_parent
-
-def test_canonical():
-    source = 'struct X; struct X; struct X { int member; };'
-    tu = get_tu(source)
-
-    cursors = []
-    for cursor in tu.cursor.get_children():
-        if cursor.spelling == 'X':
-            cursors.append(cursor)
-
-    assert len(cursors) == 3
-    assert cursors[1].canonical == cursors[2].canonical
-
-def test_is_const_method():
-    """Ensure Cursor.is_const_method works."""
-    source = 'class X { void foo() const; void bar(); };'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-    assert cls is not None
-    assert foo is not None
-    assert bar is not None
-
-    assert foo.is_const_method()
-    assert not bar.is_const_method()
-
-def test_is_converting_constructor():
-    """Ensure Cursor.is_converting_constructor works."""
-    source = 'class X { explicit X(int); X(double); X(); };'
-    tu = get_tu(source, lang='cpp')
-
-    xs = get_cursors(tu, 'X')
-
-    assert len(xs) == 4
-    assert xs[0].kind == CursorKind.CLASS_DECL
-    cs = xs[1:]
-    assert cs[0].kind == CursorKind.CONSTRUCTOR
-    assert cs[1].kind == CursorKind.CONSTRUCTOR
-    assert cs[2].kind == CursorKind.CONSTRUCTOR
-
-    assert not cs[0].is_converting_constructor()
-    assert cs[1].is_converting_constructor()
-    assert not cs[2].is_converting_constructor()
-
-
-def test_is_copy_constructor():
-    """Ensure Cursor.is_copy_constructor works."""
-    source = 'class X { X(); X(const X&); X(X&&); };'
-    tu = get_tu(source, lang='cpp')
-
-    xs = get_cursors(tu, 'X')
-    assert xs[0].kind == CursorKind.CLASS_DECL
-    cs = xs[1:]
-    assert cs[0].kind == CursorKind.CONSTRUCTOR
-    assert cs[1].kind == CursorKind.CONSTRUCTOR
-    assert cs[2].kind == CursorKind.CONSTRUCTOR
-
-    assert not cs[0].is_copy_constructor()
-    assert cs[1].is_copy_constructor()
-    assert not cs[2].is_copy_constructor()
-
-def test_is_default_constructor():
-    """Ensure Cursor.is_default_constructor works."""
-    source = 'class X { X(); X(int); };'
-    tu = get_tu(source, lang='cpp')
-
-    xs = get_cursors(tu, 'X')
-    assert xs[0].kind == CursorKind.CLASS_DECL
-    cs = xs[1:]
-    assert cs[0].kind == CursorKind.CONSTRUCTOR
-    assert cs[1].kind == CursorKind.CONSTRUCTOR
-
-    assert cs[0].is_default_constructor()
-    assert not cs[1].is_default_constructor()
-
-def test_is_move_constructor():
-    """Ensure Cursor.is_move_constructor works."""
-    source = 'class X { X(); X(const X&); X(X&&); };'
-    tu = get_tu(source, lang='cpp')
-
-    xs = get_cursors(tu, 'X')
-    assert xs[0].kind == CursorKind.CLASS_DECL
-    cs = xs[1:]
-    assert cs[0].kind == CursorKind.CONSTRUCTOR
-    assert cs[1].kind == CursorKind.CONSTRUCTOR
-    assert cs[2].kind == CursorKind.CONSTRUCTOR
-
-    assert not cs[0].is_move_constructor()
-    assert not cs[1].is_move_constructor()
-    assert cs[2].is_move_constructor()
-
-def test_is_default_method():
-    """Ensure Cursor.is_default_method works."""
-    source = 'class X { X() = default; }; class Y { Y(); };'
-    tu = get_tu(source, lang='cpp')
-
-    xs = get_cursors(tu, 'X')
-    ys = get_cursors(tu, 'Y')
-
-    assert len(xs) == 2
-    assert len(ys) == 2
-
-    xc = xs[1]
-    yc = ys[1]
-
-    assert xc.is_default_method()
-    assert not yc.is_default_method()
-
-def test_is_mutable_field():
-    """Ensure Cursor.is_mutable_field works."""
-    source = 'class X { int x_; mutable int y_; };'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    x_ = get_cursor(tu, 'x_')
-    y_ = get_cursor(tu, 'y_')
-    assert cls is not None
-    assert x_ is not None
-    assert y_ is not None
-
-    assert not x_.is_mutable_field()
-    assert y_.is_mutable_field()
-
-def test_is_static_method():
-    """Ensure Cursor.is_static_method works."""
-
-    source = 'class X { static void foo(); void bar(); };'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-    assert cls is not None
-    assert foo is not None
-    assert bar is not None
-
-    assert foo.is_static_method()
-    assert not bar.is_static_method()
-
-def test_is_pure_virtual_method():
-    """Ensure Cursor.is_pure_virtual_method works."""
-    source = 'class X { virtual void foo() = 0; virtual void bar(); };'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-    assert cls is not None
-    assert foo is not None
-    assert bar is not None
-
-    assert foo.is_pure_virtual_method()
-    assert not bar.is_pure_virtual_method()
-
-def test_is_virtual_method():
-    """Ensure Cursor.is_virtual_method works."""
-    source = 'class X { virtual void foo(); void bar(); };'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-    assert cls is not None
-    assert foo is not None
-    assert bar is not None
-
-    assert foo.is_virtual_method()
-    assert not bar.is_virtual_method()
-
-def test_is_scoped_enum():
-    """Ensure Cursor.is_scoped_enum works."""
-    source = 'class X {}; enum RegularEnum {}; enum class ScopedEnum {};'
-    tu = get_tu(source, lang='cpp')
-
-    cls = get_cursor(tu, 'X')
-    regular_enum = get_cursor(tu, 'RegularEnum')
-    scoped_enum = get_cursor(tu, 'ScopedEnum')
-    assert cls is not None
-    assert regular_enum is not None
-    assert scoped_enum is not None
-
-    assert not cls.is_scoped_enum()
-    assert not regular_enum.is_scoped_enum()
-    assert scoped_enum.is_scoped_enum()
-
-def test_underlying_type():
-    tu = get_tu('typedef int foo;')
-    typedef = get_cursor(tu, 'foo')
-    assert typedef is not None
-
-    assert typedef.kind.is_declaration()
-    underlying = typedef.underlying_typedef_type
-    assert underlying.kind == TypeKind.INT
-
 kParentTest = """\
         class C {
             void f();
@@ -288,188 +39,6 @@
 
         void C::f() { }
     """
-def test_semantic_parent():
-    tu = get_tu(kParentTest, 'cpp')
-    curs = get_cursors(tu, 'f')
-    decl = get_cursor(tu, 'C')
-    assert(len(curs) == 2)
-    assert(curs[0].semantic_parent == curs[1].semantic_parent)
-    assert(curs[0].semantic_parent == decl)
-
-def test_lexical_parent():
-    tu = get_tu(kParentTest, 'cpp')
-    curs = get_cursors(tu, 'f')
-    decl = get_cursor(tu, 'C')
-    assert(len(curs) == 2)
-    assert(curs[0].lexical_parent != curs[1].lexical_parent)
-    assert(curs[0].lexical_parent == decl)
-    assert(curs[1].lexical_parent == tu.cursor)
-
-def test_enum_type():
-    tu = get_tu('enum TEST { FOO=1, BAR=2 };')
-    enum = get_cursor(tu, 'TEST')
-    assert enum is not None
-
-    assert enum.kind == CursorKind.ENUM_DECL
-    enum_type = enum.enum_type
-    assert enum_type.kind == TypeKind.UINT
-
-def test_enum_type_cpp():
-    tu = get_tu('enum TEST : long long { FOO=1, BAR=2 };', lang="cpp")
-    enum = get_cursor(tu, 'TEST')
-    assert enum is not None
-
-    assert enum.kind == CursorKind.ENUM_DECL
-    assert enum.enum_type.kind == TypeKind.LONGLONG
-
-def test_objc_type_encoding():
-    tu = get_tu('int i;', lang='objc')
-    i = get_cursor(tu, 'i')
-
-    assert i is not None
-    assert i.objc_type_encoding == 'i'
-
-def test_enum_values():
-    tu = get_tu('enum TEST { SPAM=1, EGG, HAM = EGG * 20};')
-    enum = get_cursor(tu, 'TEST')
-    assert enum is not None
-
-    assert enum.kind == CursorKind.ENUM_DECL
-
-    enum_constants = list(enum.get_children())
-    assert len(enum_constants) == 3
-
-    spam, egg, ham = enum_constants
-
-    assert spam.kind == CursorKind.ENUM_CONSTANT_DECL
-    assert spam.enum_value == 1
-    assert egg.kind == CursorKind.ENUM_CONSTANT_DECL
-    assert egg.enum_value == 2
-    assert ham.kind == CursorKind.ENUM_CONSTANT_DECL
-    assert ham.enum_value == 40
-
-def test_enum_values_cpp():
-    tu = get_tu('enum TEST : long long { SPAM = -1, HAM = 0x10000000000};', lang="cpp")
-    enum = get_cursor(tu, 'TEST')
-    assert enum is not None
-
-    assert enum.kind == CursorKind.ENUM_DECL
-
-    enum_constants = list(enum.get_children())
-    assert len(enum_constants) == 2
-
-    spam, ham = enum_constants
-
-    assert spam.kind == CursorKind.ENUM_CONSTANT_DECL
-    assert spam.enum_value == -1
-    assert ham.kind == CursorKind.ENUM_CONSTANT_DECL
-    assert ham.enum_value == 0x10000000000
-
-def test_annotation_attribute():
-    tu = get_tu('int foo (void) __attribute__ ((annotate("here be annotation attribute")));')
-
-    foo = get_cursor(tu, 'foo')
-    assert foo is not None
-
-    for c in foo.get_children():
-        if c.kind == CursorKind.ANNOTATE_ATTR:
-            assert c.displayname == "here be annotation attribute"
-            break
-    else:
-        assert False, "Couldn't find annotation"
-
-def test_annotation_template():
-    annotation = '__attribute__ ((annotate("annotation")))'
-    for source, kind in [
-            ('int foo (T value) %s;', CursorKind.FUNCTION_TEMPLATE),
-            ('class %s foo {};', CursorKind.CLASS_TEMPLATE),
-    ]:
-        source = 'template<typename T> ' + (source % annotation)
-        tu = get_tu(source, lang="cpp")
-
-        foo = get_cursor(tu, 'foo')
-        assert foo is not None
-        assert foo.kind == kind
-
-        for c in foo.get_children():
-            if c.kind == CursorKind.ANNOTATE_ATTR:
-                assert c.displayname == "annotation"
-                break
-        else:
-            assert False, "Couldn't find annotation for {}".format(kind)
-
-def test_result_type():
-    tu = get_tu('int foo();')
-    foo = get_cursor(tu, 'foo')
-
-    assert foo is not None
-    t = foo.result_type
-    assert t.kind == TypeKind.INT
-
-def test_availability():
-    tu = get_tu('class A { A(A const&) = delete; };', lang='cpp')
-
-    # AvailabilityKind.AVAILABLE
-    cursor = get_cursor(tu, 'A')
-    assert cursor.kind == CursorKind.CLASS_DECL
-    assert cursor.availability == AvailabilityKind.AVAILABLE
-
-    # AvailabilityKind.NOT_AVAILABLE
-    cursors = get_cursors(tu, 'A')
-    for c in cursors:
-        if c.kind == CursorKind.CONSTRUCTOR:
-            assert c.availability == AvailabilityKind.NOT_AVAILABLE
-            break
-    else:
-        assert False, "Could not find cursor for deleted constructor"
-
-    # AvailabilityKind.DEPRECATED
-    tu = get_tu('void test() __attribute__((deprecated));', lang='cpp')
-    cursor = get_cursor(tu, 'test')
-    assert cursor.availability == AvailabilityKind.DEPRECATED
-
-    # AvailabilityKind.NOT_ACCESSIBLE is only used in the code completion results
-
-def test_get_tokens():
-    """Ensure we can map cursors back to tokens."""
-    tu = get_tu('int foo(int i);')
-    foo = get_cursor(tu, 'foo')
-
-    tokens = list(foo.get_tokens())
-    assert len(tokens) == 6
-    assert tokens[0].spelling == 'int'
-    assert tokens[1].spelling == 'foo'
-
-def test_get_token_cursor():
-    """Ensure we can map tokens to cursors."""
-    tu = get_tu('class A {}; int foo(A var = A());', lang='cpp')
-    foo = get_cursor(tu, 'foo')
-
-    for cursor in foo.walk_preorder():
-        if cursor.kind.is_expression() and not cursor.kind.is_statement():
-            break
-    else:
-        assert False, "Could not find default value expression"
-
-    tokens = list(cursor.get_tokens())
-    assert len(tokens) == 4, [t.spelling for t in tokens]
-    assert tokens[0].spelling == '='
-    assert tokens[1].spelling == 'A'
-    assert tokens[2].spelling == '('
-    assert tokens[3].spelling == ')'
-    t_cursor = tokens[1].cursor
-    assert t_cursor.kind == CursorKind.TYPE_REF
-    r_cursor = t_cursor.referenced # should not raise an exception
-    assert r_cursor.kind == CursorKind.CLASS_DECL
-
-def test_get_arguments():
-    tu = get_tu('void foo(int i, int j);')
-    foo = get_cursor(tu, 'foo')
-    arguments = list(foo.get_arguments())
-
-    assert len(arguments) == 2
-    assert arguments[0].spelling == "i"
-    assert arguments[1].spelling == "j"
 
 kTemplateArgTest = """\
         template <int kInt, typename T, bool kBool>
@@ -479,59 +48,505 @@
         void foo<-7, float, true>();
     """
 
-def test_get_num_template_arguments():
-    tu = get_tu(kTemplateArgTest, lang='cpp')
-    foos = get_cursors(tu, 'foo')
+class TestCursor(unittest.TestCase):
+    def test_get_children(self):
+        tu = get_tu(kInput)
 
-    assert foos[1].get_num_template_arguments() == 3
+        it = tu.cursor.get_children()
+        tu_nodes = list(it)
 
-def test_get_template_argument_kind():
-    tu = get_tu(kTemplateArgTest, lang='cpp')
-    foos = get_cursors(tu, 'foo')
+        self.assertEqual(len(tu_nodes), 3)
+        for cursor in tu_nodes:
+            self.assertIsNotNone(cursor.translation_unit)
 
-    assert foos[1].get_template_argument_kind(0) == TemplateArgumentKind.INTEGRAL
-    assert foos[1].get_template_argument_kind(1) == TemplateArgumentKind.TYPE
-    assert foos[1].get_template_argument_kind(2) == TemplateArgumentKind.INTEGRAL
+        self.assertNotEqual(tu_nodes[0], tu_nodes[1])
+        self.assertEqual(tu_nodes[0].kind, CursorKind.STRUCT_DECL)
+        self.assertEqual(tu_nodes[0].spelling, 's0')
+        self.assertEqual(tu_nodes[0].is_definition(), True)
+        self.assertEqual(tu_nodes[0].location.file.name, 't.c')
+        self.assertEqual(tu_nodes[0].location.line, 1)
+        self.assertEqual(tu_nodes[0].location.column, 8)
+        self.assertGreater(tu_nodes[0].hash, 0)
+        self.assertIsNotNone(tu_nodes[0].translation_unit)
 
-def test_get_template_argument_type():
-    tu = get_tu(kTemplateArgTest, lang='cpp')
-    foos = get_cursors(tu, 'foo')
+        s0_nodes = list(tu_nodes[0].get_children())
+        self.assertEqual(len(s0_nodes), 2)
+        self.assertEqual(s0_nodes[0].kind, CursorKind.FIELD_DECL)
+        self.assertEqual(s0_nodes[0].spelling, 'a')
+        self.assertEqual(s0_nodes[0].type.kind, TypeKind.INT)
+        self.assertEqual(s0_nodes[1].kind, CursorKind.FIELD_DECL)
+        self.assertEqual(s0_nodes[1].spelling, 'b')
+        self.assertEqual(s0_nodes[1].type.kind, TypeKind.INT)
 
-    assert foos[1].get_template_argument_type(1).kind == TypeKind.FLOAT
+        self.assertEqual(tu_nodes[1].kind, CursorKind.STRUCT_DECL)
+        self.assertEqual(tu_nodes[1].spelling, 's1')
+        self.assertEqual(tu_nodes[1].displayname, 's1')
+        self.assertEqual(tu_nodes[1].is_definition(), False)
 
-def test_get_template_argument_value():
-    tu = get_tu(kTemplateArgTest, lang='cpp')
-    foos = get_cursors(tu, 'foo')
+        self.assertEqual(tu_nodes[2].kind, CursorKind.FUNCTION_DECL)
+        self.assertEqual(tu_nodes[2].spelling, 'f0')
+        self.assertEqual(tu_nodes[2].displayname, 'f0(int, int)')
+        self.assertEqual(tu_nodes[2].is_definition(), True)
 
-    assert foos[1].get_template_argument_value(0) == -7
-    assert foos[1].get_template_argument_value(2) == True
+    def test_references(self):
+        """Ensure that references to TranslationUnit are kept."""
+        tu = get_tu('int x;')
+        cursors = list(tu.cursor.get_children())
+        self.assertGreater(len(cursors), 0)
 
-def test_get_template_argument_unsigned_value():
-    tu = get_tu(kTemplateArgTest, lang='cpp')
-    foos = get_cursors(tu, 'foo')
+        cursor = cursors[0]
+        self.assertIsInstance(cursor.translation_unit, TranslationUnit)
 
-    assert foos[1].get_template_argument_unsigned_value(0) == 2 ** 32 - 7
-    assert foos[1].get_template_argument_unsigned_value(2) == True
+        # Delete reference to TU and perform a full GC.
+        del tu
+        gc.collect()
+        self.assertIsInstance(cursor.translation_unit, TranslationUnit)
 
-def test_referenced():
-    tu = get_tu('void foo(); void bar() { foo(); }')
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-    for c in bar.get_children():
-        if c.kind == CursorKind.CALL_EXPR:
-            assert c.referenced.spelling == foo.spelling
-            break
+        # If the TU was destroyed, this should cause a segfault.
+        parent = cursor.semantic_parent
 
-def test_mangled_name():
-    kInputForMangling = """\
-    int foo(int, int);
-    """
-    tu = get_tu(kInputForMangling, lang='cpp')
-    foo = get_cursor(tu, 'foo')
+    def test_canonical(self):
+        source = 'struct X; struct X; struct X { int member; };'
+        tu = get_tu(source)
 
-    # Since libclang does not link in targets, we cannot pass a triple to it
-    # and force the target. To enable this test to pass on all platforms, accept
-    # all valid manglings.
-    # [c-index-test handles this by running the source through clang, emitting
-    #  an AST file and running libclang on that AST file]
-    assert foo.mangled_name in ('_Z3fooii', '__Z3fooii', '?foo@@YAHHH')
+        cursors = []
+        for cursor in tu.cursor.get_children():
+            if cursor.spelling == 'X':
+                cursors.append(cursor)
+
+        self.assertEqual(len(cursors), 3)
+        self.assertEqual(cursors[1].canonical, cursors[2].canonical)
+
+    def test_is_const_method(self):
+        """Ensure Cursor.is_const_method works."""
+        source = 'class X { void foo() const; void bar(); };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(foo)
+        self.assertIsNotNone(bar)
+
+        self.assertTrue(foo.is_const_method())
+        self.assertFalse(bar.is_const_method())
+
+    def test_is_converting_constructor(self):
+        """Ensure Cursor.is_converting_constructor works."""
+        source = 'class X { explicit X(int); X(double); X(); };'
+        tu = get_tu(source, lang='cpp')
+
+        xs = get_cursors(tu, 'X')
+
+        self.assertEqual(len(xs), 4)
+        self.assertEqual(xs[0].kind, CursorKind.CLASS_DECL)
+        cs = xs[1:]
+        self.assertEqual(cs[0].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[1].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[2].kind, CursorKind.CONSTRUCTOR)
+
+        self.assertFalse(cs[0].is_converting_constructor())
+        self.assertTrue(cs[1].is_converting_constructor())
+        self.assertFalse(cs[2].is_converting_constructor())
+
+
+    def test_is_copy_constructor(self):
+        """Ensure Cursor.is_copy_constructor works."""
+        source = 'class X { X(); X(const X&); X(X&&); };'
+        tu = get_tu(source, lang='cpp')
+
+        xs = get_cursors(tu, 'X')
+        self.assertEqual(xs[0].kind, CursorKind.CLASS_DECL)
+        cs = xs[1:]
+        self.assertEqual(cs[0].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[1].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[2].kind, CursorKind.CONSTRUCTOR)
+
+        self.assertFalse(cs[0].is_copy_constructor())
+        self.assertTrue(cs[1].is_copy_constructor())
+        self.assertFalse(cs[2].is_copy_constructor())
+
+    def test_is_default_constructor(self):
+        """Ensure Cursor.is_default_constructor works."""
+        source = 'class X { X(); X(int); };'
+        tu = get_tu(source, lang='cpp')
+
+        xs = get_cursors(tu, 'X')
+        self.assertEqual(xs[0].kind, CursorKind.CLASS_DECL)
+        cs = xs[1:]
+        self.assertEqual(cs[0].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[1].kind, CursorKind.CONSTRUCTOR)
+
+        self.assertTrue(cs[0].is_default_constructor())
+        self.assertFalse(cs[1].is_default_constructor())
+
+    def test_is_move_constructor(self):
+        """Ensure Cursor.is_move_constructor works."""
+        source = 'class X { X(); X(const X&); X(X&&); };'
+        tu = get_tu(source, lang='cpp')
+
+        xs = get_cursors(tu, 'X')
+        self.assertEqual(xs[0].kind, CursorKind.CLASS_DECL)
+        cs = xs[1:]
+        self.assertEqual(cs[0].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[1].kind, CursorKind.CONSTRUCTOR)
+        self.assertEqual(cs[2].kind, CursorKind.CONSTRUCTOR)
+
+        self.assertFalse(cs[0].is_move_constructor())
+        self.assertFalse(cs[1].is_move_constructor())
+        self.assertTrue(cs[2].is_move_constructor())
+
+    def test_is_default_method(self):
+        """Ensure Cursor.is_default_method works."""
+        source = 'class X { X() = default; }; class Y { Y(); };'
+        tu = get_tu(source, lang='cpp')
+
+        xs = get_cursors(tu, 'X')
+        ys = get_cursors(tu, 'Y')
+
+        self.assertEqual(len(xs), 2)
+        self.assertEqual(len(ys), 2)
+
+        xc = xs[1]
+        yc = ys[1]
+
+        self.assertTrue(xc.is_default_method())
+        self.assertFalse(yc.is_default_method())
+
+    def test_is_mutable_field(self):
+        """Ensure Cursor.is_mutable_field works."""
+        source = 'class X { int x_; mutable int y_; };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        x_ = get_cursor(tu, 'x_')
+        y_ = get_cursor(tu, 'y_')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(x_)
+        self.assertIsNotNone(y_)
+
+        self.assertFalse(x_.is_mutable_field())
+        self.assertTrue(y_.is_mutable_field())
+
+    def test_is_static_method(self):
+        """Ensure Cursor.is_static_method works."""
+
+        source = 'class X { static void foo(); void bar(); };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(foo)
+        self.assertIsNotNone(bar)
+
+        self.assertTrue(foo.is_static_method())
+        self.assertFalse(bar.is_static_method())
+
+    def test_is_pure_virtual_method(self):
+        """Ensure Cursor.is_pure_virtual_method works."""
+        source = 'class X { virtual void foo() = 0; virtual void bar(); };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(foo)
+        self.assertIsNotNone(bar)
+
+        self.assertTrue(foo.is_pure_virtual_method())
+        self.assertFalse(bar.is_pure_virtual_method())
+
+    def test_is_virtual_method(self):
+        """Ensure Cursor.is_virtual_method works."""
+        source = 'class X { virtual void foo(); void bar(); };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(foo)
+        self.assertIsNotNone(bar)
+
+        self.assertTrue(foo.is_virtual_method())
+        self.assertFalse(bar.is_virtual_method())
+
+    def test_is_abstract_record(self):
+        """Ensure Cursor.is_abstract_record works."""
+        source = 'struct X { virtual void x() = 0; }; struct Y : X { void x(); };'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        self.assertTrue(cls.is_abstract_record())
+
+        cls = get_cursor(tu, 'Y')
+        self.assertFalse(cls.is_abstract_record())
+
+    def test_is_scoped_enum(self):
+        """Ensure Cursor.is_scoped_enum works."""
+        source = 'class X {}; enum RegularEnum {}; enum class ScopedEnum {};'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'X')
+        regular_enum = get_cursor(tu, 'RegularEnum')
+        scoped_enum = get_cursor(tu, 'ScopedEnum')
+        self.assertIsNotNone(cls)
+        self.assertIsNotNone(regular_enum)
+        self.assertIsNotNone(scoped_enum)
+
+        self.assertFalse(cls.is_scoped_enum())
+        self.assertFalse(regular_enum.is_scoped_enum())
+        self.assertTrue(scoped_enum.is_scoped_enum())
+
+    def test_underlying_type(self):
+        tu = get_tu('typedef int foo;')
+        typedef = get_cursor(tu, 'foo')
+        self.assertIsNotNone(typedef)
+
+        self.assertTrue(typedef.kind.is_declaration())
+        underlying = typedef.underlying_typedef_type
+        self.assertEqual(underlying.kind, TypeKind.INT)
+
+    def test_semantic_parent(self):
+        tu = get_tu(kParentTest, 'cpp')
+        curs = get_cursors(tu, 'f')
+        decl = get_cursor(tu, 'C')
+        self.assertEqual(len(curs), 2)
+        self.assertEqual(curs[0].semantic_parent, curs[1].semantic_parent)
+        self.assertEqual(curs[0].semantic_parent, decl)
+
+    def test_lexical_parent(self):
+        tu = get_tu(kParentTest, 'cpp')
+        curs = get_cursors(tu, 'f')
+        decl = get_cursor(tu, 'C')
+        self.assertEqual(len(curs), 2)
+        self.assertNotEqual(curs[0].lexical_parent, curs[1].lexical_parent)
+        self.assertEqual(curs[0].lexical_parent, decl)
+        self.assertEqual(curs[1].lexical_parent, tu.cursor)
+
+    def test_enum_type(self):
+        tu = get_tu('enum TEST { FOO=1, BAR=2 };')
+        enum = get_cursor(tu, 'TEST')
+        self.assertIsNotNone(enum)
+
+        self.assertEqual(enum.kind, CursorKind.ENUM_DECL)
+        enum_type = enum.enum_type
+        self.assertEqual(enum_type.kind, TypeKind.UINT)
+
+    def test_enum_type_cpp(self):
+        tu = get_tu('enum TEST : long long { FOO=1, BAR=2 };', lang="cpp")
+        enum = get_cursor(tu, 'TEST')
+        self.assertIsNotNone(enum)
+
+        self.assertEqual(enum.kind, CursorKind.ENUM_DECL)
+        self.assertEqual(enum.enum_type.kind, TypeKind.LONGLONG)
+
+    def test_objc_type_encoding(self):
+        tu = get_tu('int i;', lang='objc')
+        i = get_cursor(tu, 'i')
+
+        self.assertIsNotNone(i)
+        self.assertEqual(i.objc_type_encoding, 'i')
+
+    def test_enum_values(self):
+        tu = get_tu('enum TEST { SPAM=1, EGG, HAM = EGG * 20};')
+        enum = get_cursor(tu, 'TEST')
+        self.assertIsNotNone(enum)
+
+        self.assertEqual(enum.kind, CursorKind.ENUM_DECL)
+
+        enum_constants = list(enum.get_children())
+        self.assertEqual(len(enum_constants), 3)
+
+        spam, egg, ham = enum_constants
+
+        self.assertEqual(spam.kind, CursorKind.ENUM_CONSTANT_DECL)
+        self.assertEqual(spam.enum_value, 1)
+        self.assertEqual(egg.kind, CursorKind.ENUM_CONSTANT_DECL)
+        self.assertEqual(egg.enum_value, 2)
+        self.assertEqual(ham.kind, CursorKind.ENUM_CONSTANT_DECL)
+        self.assertEqual(ham.enum_value, 40)
+
+    def test_enum_values_cpp(self):
+        tu = get_tu('enum TEST : long long { SPAM = -1, HAM = 0x10000000000};', lang="cpp")
+        enum = get_cursor(tu, 'TEST')
+        self.assertIsNotNone(enum)
+
+        self.assertEqual(enum.kind, CursorKind.ENUM_DECL)
+
+        enum_constants = list(enum.get_children())
+        self.assertEqual(len(enum_constants), 2)
+
+        spam, ham = enum_constants
+
+        self.assertEqual(spam.kind, CursorKind.ENUM_CONSTANT_DECL)
+        self.assertEqual(spam.enum_value, -1)
+        self.assertEqual(ham.kind, CursorKind.ENUM_CONSTANT_DECL)
+        self.assertEqual(ham.enum_value, 0x10000000000)
+
+    def test_annotation_attribute(self):
+        tu = get_tu('int foo (void) __attribute__ ((annotate("here be annotation attribute")));')
+
+        foo = get_cursor(tu, 'foo')
+        self.assertIsNotNone(foo)
+
+        for c in foo.get_children():
+            if c.kind == CursorKind.ANNOTATE_ATTR:
+                self.assertEqual(c.displayname, "here be annotation attribute")
+                break
+        else:
+            self.fail("Couldn't find annotation")
+
+    def test_annotation_template(self):
+        annotation = '__attribute__ ((annotate("annotation")))'
+        for source, kind in [
+                ('int foo (T value) %s;', CursorKind.FUNCTION_TEMPLATE),
+                ('class %s foo {};', CursorKind.CLASS_TEMPLATE),
+        ]:
+            source = 'template<typename T> ' + (source % annotation)
+            tu = get_tu(source, lang="cpp")
+
+            foo = get_cursor(tu, 'foo')
+            self.assertIsNotNone(foo)
+            self.assertEqual(foo.kind, kind)
+
+            for c in foo.get_children():
+                if c.kind == CursorKind.ANNOTATE_ATTR:
+                    self.assertEqual(c.displayname, "annotation")
+                    break
+            else:
+                self.fail("Couldn't find annotation for {}".format(kind))
+
+    def test_result_type(self):
+        tu = get_tu('int foo();')
+        foo = get_cursor(tu, 'foo')
+
+        self.assertIsNotNone(foo)
+        t = foo.result_type
+        self.assertEqual(t.kind, TypeKind.INT)
+
+    def test_availability(self):
+        tu = get_tu('class A { A(A const&) = delete; };', lang='cpp')
+
+        # AvailabilityKind.AVAILABLE
+        cursor = get_cursor(tu, 'A')
+        self.assertEqual(cursor.kind, CursorKind.CLASS_DECL)
+        self.assertEqual(cursor.availability, AvailabilityKind.AVAILABLE)
+
+        # AvailabilityKind.NOT_AVAILABLE
+        cursors = get_cursors(tu, 'A')
+        for c in cursors:
+            if c.kind == CursorKind.CONSTRUCTOR:
+                self.assertEqual(c.availability, AvailabilityKind.NOT_AVAILABLE)
+                break
+        else:
+            self.fail("Could not find cursor for deleted constructor")
+
+        # AvailabilityKind.DEPRECATED
+        tu = get_tu('void test() __attribute__((deprecated));', lang='cpp')
+        cursor = get_cursor(tu, 'test')
+        self.assertEqual(cursor.availability, AvailabilityKind.DEPRECATED)
+
+        # AvailabilityKind.NOT_ACCESSIBLE is only used in the code completion results
+
+    def test_get_tokens(self):
+        """Ensure we can map cursors back to tokens."""
+        tu = get_tu('int foo(int i);')
+        foo = get_cursor(tu, 'foo')
+
+        tokens = list(foo.get_tokens())
+        self.assertEqual(len(tokens), 6)
+        self.assertEqual(tokens[0].spelling, 'int')
+        self.assertEqual(tokens[1].spelling, 'foo')
+
+    def test_get_token_cursor(self):
+        """Ensure we can map tokens to cursors."""
+        tu = get_tu('class A {}; int foo(A var = A());', lang='cpp')
+        foo = get_cursor(tu, 'foo')
+
+        for cursor in foo.walk_preorder():
+            if cursor.kind.is_expression() and not cursor.kind.is_statement():
+                break
+        else:
+            self.fail("Could not find default value expression")
+
+        tokens = list(cursor.get_tokens())
+        self.assertEqual(len(tokens), 4, [t.spelling for t in tokens])
+        self.assertEqual(tokens[0].spelling, '=')
+        self.assertEqual(tokens[1].spelling, 'A')
+        self.assertEqual(tokens[2].spelling, '(')
+        self.assertEqual(tokens[3].spelling, ')')
+        t_cursor = tokens[1].cursor
+        self.assertEqual(t_cursor.kind, CursorKind.TYPE_REF)
+        r_cursor = t_cursor.referenced # should not raise an exception
+        self.assertEqual(r_cursor.kind, CursorKind.CLASS_DECL)
+
+    def test_get_arguments(self):
+        tu = get_tu('void foo(int i, int j);')
+        foo = get_cursor(tu, 'foo')
+        arguments = list(foo.get_arguments())
+
+        self.assertEqual(len(arguments), 2)
+        self.assertEqual(arguments[0].spelling, "i")
+        self.assertEqual(arguments[1].spelling, "j")
+
+    def test_get_num_template_arguments(self):
+        tu = get_tu(kTemplateArgTest, lang='cpp')
+        foos = get_cursors(tu, 'foo')
+
+        self.assertEqual(foos[1].get_num_template_arguments(), 3)
+
+    def test_get_template_argument_kind(self):
+        tu = get_tu(kTemplateArgTest, lang='cpp')
+        foos = get_cursors(tu, 'foo')
+
+        self.assertEqual(foos[1].get_template_argument_kind(0), TemplateArgumentKind.INTEGRAL)
+        self.assertEqual(foos[1].get_template_argument_kind(1), TemplateArgumentKind.TYPE)
+        self.assertEqual(foos[1].get_template_argument_kind(2), TemplateArgumentKind.INTEGRAL)
+
+    def test_get_template_argument_type(self):
+        tu = get_tu(kTemplateArgTest, lang='cpp')
+        foos = get_cursors(tu, 'foo')
+
+        self.assertEqual(foos[1].get_template_argument_type(1).kind, TypeKind.FLOAT)
+
+    def test_get_template_argument_value(self):
+        tu = get_tu(kTemplateArgTest, lang='cpp')
+        foos = get_cursors(tu, 'foo')
+
+        self.assertEqual(foos[1].get_template_argument_value(0), -7)
+        self.assertEqual(foos[1].get_template_argument_value(2), True)
+
+    def test_get_template_argument_unsigned_value(self):
+        tu = get_tu(kTemplateArgTest, lang='cpp')
+        foos = get_cursors(tu, 'foo')
+
+        self.assertEqual(foos[1].get_template_argument_unsigned_value(0), 2 ** 32 - 7)
+        self.assertEqual(foos[1].get_template_argument_unsigned_value(2), True)
+
+    def test_referenced(self):
+        tu = get_tu('void foo(); void bar() { foo(); }')
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+        for c in bar.get_children():
+            if c.kind == CursorKind.CALL_EXPR:
+                self.assertEqual(c.referenced.spelling, foo.spelling)
+                break
+
+    def test_mangled_name(self):
+        kInputForMangling = """\
+        int foo(int, int);
+        """
+        tu = get_tu(kInputForMangling, lang='cpp')
+        foo = get_cursor(tu, 'foo')
+
+        # Since libclang does not link in targets, we cannot pass a triple to it
+        # and force the target. To enable this test to pass on all platforms, accept
+        # all valid manglings.
+        # [c-index-test handles this by running the source through clang, emitting
+        #  an AST file and running libclang on that AST file]
+        self.assertIn(foo.mangled_name, ('_Z3fooii', '__Z3fooii', '?foo@@YAHHH'))
diff --git a/bindings/python/tests/cindex/test_cursor_kind.py b/bindings/python/tests/cindex/test_cursor_kind.py
index 4d8d88b..f1ee753 100644
--- a/bindings/python/tests/cindex/test_cursor_kind.py
+++ b/bindings/python/tests/cindex/test_cursor_kind.py
@@ -1,49 +1,53 @@
 from clang.cindex import CursorKind
 
-def test_name():
-    assert CursorKind.UNEXPOSED_DECL.name is 'UNEXPOSED_DECL'
+import unittest
 
-def test_get_all_kinds():
-    kinds = CursorKind.get_all_kinds()
-    assert CursorKind.UNEXPOSED_DECL in kinds
-    assert CursorKind.TRANSLATION_UNIT in kinds
-    assert CursorKind.VARIABLE_REF in kinds
-    assert CursorKind.LAMBDA_EXPR in kinds
-    assert CursorKind.OBJ_BOOL_LITERAL_EXPR in kinds
-    assert CursorKind.OBJ_SELF_EXPR in kinds
-    assert CursorKind.MS_ASM_STMT in kinds
-    assert CursorKind.MODULE_IMPORT_DECL in kinds
-    assert CursorKind.TYPE_ALIAS_TEMPLATE_DECL in kinds
 
-def test_kind_groups():
-    """Check that every kind classifies to exactly one group."""
+class TestCursorKind(unittest.TestCase):
+    def test_name(self):
+        self.assertTrue(CursorKind.UNEXPOSED_DECL.name is 'UNEXPOSED_DECL')
 
-    assert CursorKind.UNEXPOSED_DECL.is_declaration()
-    assert CursorKind.TYPE_REF.is_reference()
-    assert CursorKind.DECL_REF_EXPR.is_expression()
-    assert CursorKind.UNEXPOSED_STMT.is_statement()
-    assert CursorKind.INVALID_FILE.is_invalid()
+    def test_get_all_kinds(self):
+        kinds = CursorKind.get_all_kinds()
+        self.assertIn(CursorKind.UNEXPOSED_DECL, kinds)
+        self.assertIn(CursorKind.TRANSLATION_UNIT, kinds)
+        self.assertIn(CursorKind.VARIABLE_REF, kinds)
+        self.assertIn(CursorKind.LAMBDA_EXPR, kinds)
+        self.assertIn(CursorKind.OBJ_BOOL_LITERAL_EXPR, kinds)
+        self.assertIn(CursorKind.OBJ_SELF_EXPR, kinds)
+        self.assertIn(CursorKind.MS_ASM_STMT, kinds)
+        self.assertIn(CursorKind.MODULE_IMPORT_DECL, kinds)
+        self.assertIn(CursorKind.TYPE_ALIAS_TEMPLATE_DECL, kinds)
 
-    assert CursorKind.TRANSLATION_UNIT.is_translation_unit()
-    assert not CursorKind.TYPE_REF.is_translation_unit()
+    def test_kind_groups(self):
+        """Check that every kind classifies to exactly one group."""
 
-    assert CursorKind.PREPROCESSING_DIRECTIVE.is_preprocessing()
-    assert not CursorKind.TYPE_REF.is_preprocessing()
+        self.assertTrue(CursorKind.UNEXPOSED_DECL.is_declaration())
+        self.assertTrue(CursorKind.TYPE_REF.is_reference())
+        self.assertTrue(CursorKind.DECL_REF_EXPR.is_expression())
+        self.assertTrue(CursorKind.UNEXPOSED_STMT.is_statement())
+        self.assertTrue(CursorKind.INVALID_FILE.is_invalid())
 
-    assert CursorKind.UNEXPOSED_DECL.is_unexposed()
-    assert not CursorKind.TYPE_REF.is_unexposed()
+        self.assertTrue(CursorKind.TRANSLATION_UNIT.is_translation_unit())
+        self.assertFalse(CursorKind.TYPE_REF.is_translation_unit())
 
-    for k in CursorKind.get_all_kinds():
-        group = [n for n in ('is_declaration', 'is_reference', 'is_expression',
-                             'is_statement', 'is_invalid', 'is_attribute')
-                 if getattr(k, n)()]
+        self.assertTrue(CursorKind.PREPROCESSING_DIRECTIVE.is_preprocessing())
+        self.assertFalse(CursorKind.TYPE_REF.is_preprocessing())
 
-        if k in (   CursorKind.TRANSLATION_UNIT,
-                    CursorKind.MACRO_DEFINITION,
-                    CursorKind.MACRO_INSTANTIATION,
-                    CursorKind.INCLUSION_DIRECTIVE,
-                    CursorKind.PREPROCESSING_DIRECTIVE,
-                    CursorKind.OVERLOAD_CANDIDATE):
-            assert len(group) == 0
-        else:
-            assert len(group) == 1
+        self.assertTrue(CursorKind.UNEXPOSED_DECL.is_unexposed())
+        self.assertFalse(CursorKind.TYPE_REF.is_unexposed())
+
+        for k in CursorKind.get_all_kinds():
+            group = [n for n in ('is_declaration', 'is_reference', 'is_expression',
+                                 'is_statement', 'is_invalid', 'is_attribute')
+                     if getattr(k, n)()]
+
+            if k in (   CursorKind.TRANSLATION_UNIT,
+                        CursorKind.MACRO_DEFINITION,
+                        CursorKind.MACRO_INSTANTIATION,
+                        CursorKind.INCLUSION_DIRECTIVE,
+                        CursorKind.PREPROCESSING_DIRECTIVE,
+                        CursorKind.OVERLOAD_CANDIDATE):
+                self.assertEqual(len(group), 0)
+            else:
+                self.assertEqual(len(group), 1)
diff --git a/bindings/python/tests/cindex/test_diagnostics.py b/bindings/python/tests/cindex/test_diagnostics.py
index 23cbe89..78b327d 100644
--- a/bindings/python/tests/cindex/test_diagnostics.py
+++ b/bindings/python/tests/cindex/test_diagnostics.py
@@ -1,102 +1,105 @@
 from clang.cindex import *
 from .util import get_tu
 
+import unittest
+
+
 # FIXME: We need support for invalid translation units to test better.
 
-def test_diagnostic_warning():
-    tu = get_tu('int f0() {}\n')
-    assert len(tu.diagnostics) == 1
-    assert tu.diagnostics[0].severity == Diagnostic.Warning
-    assert tu.diagnostics[0].location.line == 1
-    assert tu.diagnostics[0].location.column == 11
-    assert (tu.diagnostics[0].spelling ==
-            'control reaches end of non-void function')
 
-def test_diagnostic_note():
-    # FIXME: We aren't getting notes here for some reason.
-    tu = get_tu('#define A x\nvoid *A = 1;\n')
-    assert len(tu.diagnostics) == 1
-    assert tu.diagnostics[0].severity == Diagnostic.Warning
-    assert tu.diagnostics[0].location.line == 2
-    assert tu.diagnostics[0].location.column == 7
-    assert 'incompatible' in tu.diagnostics[0].spelling
-#    assert tu.diagnostics[1].severity == Diagnostic.Note
-#    assert tu.diagnostics[1].location.line == 1
-#    assert tu.diagnostics[1].location.column == 11
-#    assert tu.diagnostics[1].spelling == 'instantiated from'
+class TestDiagnostics(unittest.TestCase):
+    def test_diagnostic_warning(self):
+        tu = get_tu('int f0() {}\n')
+        self.assertEqual(len(tu.diagnostics), 1)
+        self.assertEqual(tu.diagnostics[0].severity, Diagnostic.Warning)
+        self.assertEqual(tu.diagnostics[0].location.line, 1)
+        self.assertEqual(tu.diagnostics[0].location.column, 11)
+        self.assertEqual(tu.diagnostics[0].spelling,
+                'control reaches end of non-void function')
 
-def test_diagnostic_fixit():
-    tu = get_tu('struct { int f0; } x = { f0 : 1 };')
-    assert len(tu.diagnostics) == 1
-    assert tu.diagnostics[0].severity == Diagnostic.Warning
-    assert tu.diagnostics[0].location.line == 1
-    assert tu.diagnostics[0].location.column == 26
-    assert tu.diagnostics[0].spelling.startswith('use of GNU old-style')
-    assert len(tu.diagnostics[0].fixits) == 1
-    assert tu.diagnostics[0].fixits[0].range.start.line == 1
-    assert tu.diagnostics[0].fixits[0].range.start.column == 26
-    assert tu.diagnostics[0].fixits[0].range.end.line == 1
-    assert tu.diagnostics[0].fixits[0].range.end.column == 30
-    assert tu.diagnostics[0].fixits[0].value == '.f0 = '
+    def test_diagnostic_note(self):
+        # FIXME: We aren't getting notes here for some reason.
+        tu = get_tu('#define A x\nvoid *A = 1;\n')
+        self.assertEqual(len(tu.diagnostics), 1)
+        self.assertEqual(tu.diagnostics[0].severity, Diagnostic.Warning)
+        self.assertEqual(tu.diagnostics[0].location.line, 2)
+        self.assertEqual(tu.diagnostics[0].location.column, 7)
+        self.assertIn('incompatible', tu.diagnostics[0].spelling)
+#       self.assertEqual(tu.diagnostics[1].severity, Diagnostic.Note)
+#       self.assertEqual(tu.diagnostics[1].location.line, 1)
+#       self.assertEqual(tu.diagnostics[1].location.column, 11)
+#       self.assertEqual(tu.diagnostics[1].spelling, 'instantiated from')
 
-def test_diagnostic_range():
-    tu = get_tu('void f() { int i = "a" + 1; }')
-    assert len(tu.diagnostics) == 1
-    assert tu.diagnostics[0].severity == Diagnostic.Warning
-    assert tu.diagnostics[0].location.line == 1
-    assert tu.diagnostics[0].location.column == 16
-    assert tu.diagnostics[0].spelling.startswith('incompatible pointer to')
-    assert len(tu.diagnostics[0].fixits) == 0
-    assert len(tu.diagnostics[0].ranges) == 1
-    assert tu.diagnostics[0].ranges[0].start.line == 1
-    assert tu.diagnostics[0].ranges[0].start.column == 20
-    assert tu.diagnostics[0].ranges[0].end.line == 1
-    assert tu.diagnostics[0].ranges[0].end.column == 27
-    try:
-      tu.diagnostics[0].ranges[1].start.line
-    except IndexError:
-      assert True
-    else:
-      assert False
+    def test_diagnostic_fixit(self):
+        tu = get_tu('struct { int f0; } x = { f0 : 1 };')
+        self.assertEqual(len(tu.diagnostics), 1)
+        self.assertEqual(tu.diagnostics[0].severity, Diagnostic.Warning)
+        self.assertEqual(tu.diagnostics[0].location.line, 1)
+        self.assertEqual(tu.diagnostics[0].location.column, 26)
+        self.assertRegexpMatches(tu.diagnostics[0].spelling,
+            'use of GNU old-style.*')
+        self.assertEqual(len(tu.diagnostics[0].fixits), 1)
+        self.assertEqual(tu.diagnostics[0].fixits[0].range.start.line, 1)
+        self.assertEqual(tu.diagnostics[0].fixits[0].range.start.column, 26)
+        self.assertEqual(tu.diagnostics[0].fixits[0].range.end.line, 1)
+        self.assertEqual(tu.diagnostics[0].fixits[0].range.end.column, 30)
+        self.assertEqual(tu.diagnostics[0].fixits[0].value, '.f0 = ')
 
-def test_diagnostic_category():
-    """Ensure that category properties work."""
-    tu = get_tu('int f(int i) { return 7; }', all_warnings=True)
-    assert len(tu.diagnostics) == 1
-    d = tu.diagnostics[0]
+    def test_diagnostic_range(self):
+        tu = get_tu('void f() { int i = "a" + 1; }')
+        self.assertEqual(len(tu.diagnostics), 1)
+        self.assertEqual(tu.diagnostics[0].severity, Diagnostic.Warning)
+        self.assertEqual(tu.diagnostics[0].location.line, 1)
+        self.assertEqual(tu.diagnostics[0].location.column, 16)
+        self.assertRegexpMatches(tu.diagnostics[0].spelling,
+            'incompatible pointer to.*')
+        self.assertEqual(len(tu.diagnostics[0].fixits), 0)
+        self.assertEqual(len(tu.diagnostics[0].ranges), 1)
+        self.assertEqual(tu.diagnostics[0].ranges[0].start.line, 1)
+        self.assertEqual(tu.diagnostics[0].ranges[0].start.column, 20)
+        self.assertEqual(tu.diagnostics[0].ranges[0].end.line, 1)
+        self.assertEqual(tu.diagnostics[0].ranges[0].end.column, 27)
+        with self.assertRaises(IndexError):
+            tu.diagnostics[0].ranges[1].start.line
 
-    assert d.severity == Diagnostic.Warning
-    assert d.location.line == 1
-    assert d.location.column == 11
+    def test_diagnostic_category(self):
+        """Ensure that category properties work."""
+        tu = get_tu('int f(int i) { return 7; }', all_warnings=True)
+        self.assertEqual(len(tu.diagnostics), 1)
+        d = tu.diagnostics[0]
 
-    assert d.category_number == 2
-    assert d.category_name == 'Semantic Issue'
+        self.assertEqual(d.severity, Diagnostic.Warning)
+        self.assertEqual(d.location.line, 1)
+        self.assertEqual(d.location.column, 11)
 
-def test_diagnostic_option():
-    """Ensure that category option properties work."""
-    tu = get_tu('int f(int i) { return 7; }', all_warnings=True)
-    assert len(tu.diagnostics) == 1
-    d = tu.diagnostics[0]
+        self.assertEqual(d.category_number, 2)
+        self.assertEqual(d.category_name, 'Semantic Issue')
 
-    assert d.option == '-Wunused-parameter'
-    assert d.disable_option == '-Wno-unused-parameter'
+    def test_diagnostic_option(self):
+        """Ensure that category option properties work."""
+        tu = get_tu('int f(int i) { return 7; }', all_warnings=True)
+        self.assertEqual(len(tu.diagnostics), 1)
+        d = tu.diagnostics[0]
 
-def test_diagnostic_children():
-    tu = get_tu('void f(int x) {} void g() { f(); }')
-    assert len(tu.diagnostics) == 1
-    d = tu.diagnostics[0]
+        self.assertEqual(d.option, '-Wunused-parameter')
+        self.assertEqual(d.disable_option, '-Wno-unused-parameter')
 
-    children = d.children
-    assert len(children) == 1
-    assert children[0].severity == Diagnostic.Note
-    assert children[0].spelling.endswith('declared here')
-    assert children[0].location.line == 1
-    assert children[0].location.column == 1
+    def test_diagnostic_children(self):
+        tu = get_tu('void f(int x) {} void g() { f(); }')
+        self.assertEqual(len(tu.diagnostics), 1)
+        d = tu.diagnostics[0]
 
-def test_diagnostic_string_repr():
-    tu = get_tu('struct MissingSemicolon{}')
-    assert len(tu.diagnostics) == 1
-    d = tu.diagnostics[0]
+        children = d.children
+        self.assertEqual(len(children), 1)
+        self.assertEqual(children[0].severity, Diagnostic.Note)
+        self.assertRegexpMatches(children[0].spelling,
+                '.*declared here')
+        self.assertEqual(children[0].location.line, 1)
+        self.assertEqual(children[0].location.column, 1)
 
-    assert repr(d) == '<Diagnostic severity 3, location <SourceLocation file \'t.c\', line 1, column 26>, spelling "expected \';\' after struct">'
-    
+    def test_diagnostic_string_repr(self):
+        tu = get_tu('struct MissingSemicolon{}')
+        self.assertEqual(len(tu.diagnostics), 1)
+        d = tu.diagnostics[0]
+
+        self.assertEqual(repr(d), '<Diagnostic severity 3, location <SourceLocation file \'t.c\', line 1, column 26>, spelling "expected \';\' after struct">')
diff --git a/bindings/python/tests/cindex/test_exception_specification_kind.py b/bindings/python/tests/cindex/test_exception_specification_kind.py
index 543d47f..80b3639 100644
--- a/bindings/python/tests/cindex/test_exception_specification_kind.py
+++ b/bindings/python/tests/cindex/test_exception_specification_kind.py
@@ -2,6 +2,8 @@
 from clang.cindex import ExceptionSpecificationKind
 from .util import get_tu
 
+import unittest
+
 
 def find_function_declarations(node, declarations=[]):
     if node.kind == clang.cindex.CursorKind.FUNCTION_DECL:
@@ -11,17 +13,18 @@
     return declarations
 
 
-def test_exception_specification_kind():
-    source = """int square1(int x);
-                int square2(int x) noexcept;
-                int square3(int x) noexcept(noexcept(x * x));"""
+class TestExceptionSpecificationKind(unittest.TestCase):
+    def test_exception_specification_kind(self):
+        source = """int square1(int x);
+                    int square2(int x) noexcept;
+                    int square3(int x) noexcept(noexcept(x * x));"""
 
-    tu = get_tu(source, lang='cpp', flags=['-std=c++14'])
+        tu = get_tu(source, lang='cpp', flags=['-std=c++14'])
 
-    declarations = find_function_declarations(tu.cursor)
-    expected = [
-        ('square1', ExceptionSpecificationKind.NONE),
-        ('square2', ExceptionSpecificationKind.BASIC_NOEXCEPT),
-        ('square3', ExceptionSpecificationKind.COMPUTED_NOEXCEPT)
-    ]
-    assert declarations == expected
+        declarations = find_function_declarations(tu.cursor)
+        expected = [
+            ('square1', ExceptionSpecificationKind.NONE),
+            ('square2', ExceptionSpecificationKind.BASIC_NOEXCEPT),
+            ('square3', ExceptionSpecificationKind.COMPUTED_NOEXCEPT)
+        ]
+        self.assertListEqual(declarations, expected)
diff --git a/bindings/python/tests/cindex/test_file.py b/bindings/python/tests/cindex/test_file.py
index 146e8c5..98f6575 100644
--- a/bindings/python/tests/cindex/test_file.py
+++ b/bindings/python/tests/cindex/test_file.py
@@ -1,9 +1,13 @@
 from clang.cindex import Index, File
 
-def test_file():
-  index = Index.create()
-  tu = index.parse('t.c', unsaved_files = [('t.c', "")])
-  file = File.from_name(tu, "t.c")
-  assert str(file) == "t.c"
-  assert file.name == "t.c"
-  assert repr(file) == "<File: t.c>"
+import unittest
+
+
+class TestFile(unittest.TestCase):
+    def test_file(self):
+        index = Index.create()
+        tu = index.parse('t.c', unsaved_files = [('t.c', "")])
+        file = File.from_name(tu, "t.c")
+        self.assertEqual(str(file), "t.c")
+        self.assertEqual(file.name, "t.c")
+        self.assertEqual(repr(file), "<File: t.c>")
diff --git a/bindings/python/tests/cindex/test_index.py b/bindings/python/tests/cindex/test_index.py
index dc173f0..cfdf98e 100644
--- a/bindings/python/tests/cindex/test_index.py
+++ b/bindings/python/tests/cindex/test_index.py
@@ -1,15 +1,21 @@
 from clang.cindex import *
 import os
+import unittest
+
 
 kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS')
 
-def test_create():
-    index = Index.create()
 
-# FIXME: test Index.read
+class TestIndex(unittest.TestCase):
+    def test_create(self):
+        index = Index.create()
 
-def test_parse():
-    index = Index.create()
-    assert isinstance(index, Index)
-    tu = index.parse(os.path.join(kInputsDir, 'hello.cpp'))
-    assert isinstance(tu, TranslationUnit)
+    # FIXME: test Index.read
+
+    def test_parse(self):
+        index = Index.create()
+        self.assertIsInstance(index, Index)
+        tu = index.parse(os.path.join(kInputsDir, 'hello.cpp'))
+        self.assertIsInstance(tu, TranslationUnit)
+        tu = index.parse(None, ['-c', os.path.join(kInputsDir, 'hello.cpp')])
+        self.assertIsInstance(tu, TranslationUnit)
diff --git a/bindings/python/tests/cindex/test_linkage.py b/bindings/python/tests/cindex/test_linkage.py
index 2f056d5..6b482f8 100644
--- a/bindings/python/tests/cindex/test_linkage.py
+++ b/bindings/python/tests/cindex/test_linkage.py
@@ -1,4 +1,3 @@
-
 from clang.cindex import LinkageKind
 from clang.cindex import Cursor
 from clang.cindex import TranslationUnit
@@ -6,25 +5,29 @@
 from .util import get_cursor
 from .util import get_tu
 
-def test_linkage():
-    """Ensure that linkage specifers are available on cursors"""
+import unittest
 
-    tu = get_tu("""
+
+class TestLinkage(unittest.TestCase):
+    def test_linkage(self):
+        """Ensure that linkage specifers are available on cursors"""
+
+        tu = get_tu("""
 void foo() { int no_linkage; }
 static int internal;
-namespace { extern int unique_external; }
+namespace { struct unique_external_type {} }
+unique_external_type unique_external;
 extern int external;
 """, lang = 'cpp')
 
-    no_linkage = get_cursor(tu.cursor, 'no_linkage')
-    assert no_linkage.linkage == LinkageKind.NO_LINKAGE;
+        no_linkage = get_cursor(tu.cursor, 'no_linkage')
+        self.assertEqual(no_linkage.linkage, LinkageKind.NO_LINKAGE)
 
-    internal = get_cursor(tu.cursor, 'internal')
-    assert internal.linkage == LinkageKind.INTERNAL
+        internal = get_cursor(tu.cursor, 'internal')
+        self.assertEqual(internal.linkage, LinkageKind.INTERNAL)
 
-    unique_external = get_cursor(tu.cursor, 'unique_external')
-    assert unique_external.linkage == LinkageKind.UNIQUE_EXTERNAL
+        unique_external = get_cursor(tu.cursor, 'unique_external')
+        self.assertEqual(unique_external.linkage, LinkageKind.UNIQUE_EXTERNAL)
 
-    external = get_cursor(tu.cursor, 'external')
-    assert external.linkage == LinkageKind.EXTERNAL
-
+        external = get_cursor(tu.cursor, 'external')
+        self.assertEqual(external.linkage, LinkageKind.EXTERNAL)
diff --git a/bindings/python/tests/cindex/test_location.py b/bindings/python/tests/cindex/test_location.py
index 9e9ef48..cbc32de 100644
--- a/bindings/python/tests/cindex/test_location.py
+++ b/bindings/python/tests/cindex/test_location.py
@@ -5,91 +5,96 @@
 from .util import get_cursor
 from .util import get_tu
 
+import unittest
+
+
 baseInput="int one;\nint two;\n"
 
-def assert_location(loc, line, column, offset):
-    assert loc.line == line
-    assert loc.column == column
-    assert loc.offset == offset
 
-def test_location():
-    tu = get_tu(baseInput)
-    one = get_cursor(tu, 'one')
-    two = get_cursor(tu, 'two')
+class TestLocation(unittest.TestCase):
+    def assert_location(self, loc, line, column, offset):
+        self.assertEqual(loc.line, line)
+        self.assertEqual(loc.column, column)
+        self.assertEqual(loc.offset, offset)
 
-    assert one is not None
-    assert two is not None
+    def test_location(self):
+        tu = get_tu(baseInput)
+        one = get_cursor(tu, 'one')
+        two = get_cursor(tu, 'two')
 
-    assert_location(one.location,line=1,column=5,offset=4)
-    assert_location(two.location,line=2,column=5,offset=13)
+        self.assertIsNotNone(one)
+        self.assertIsNotNone(two)
 
-    # adding a linebreak at top should keep columns same
-    tu = get_tu('\n' + baseInput)
-    one = get_cursor(tu, 'one')
-    two = get_cursor(tu, 'two')
+        self.assert_location(one.location,line=1,column=5,offset=4)
+        self.assert_location(two.location,line=2,column=5,offset=13)
 
-    assert one is not None
-    assert two is not None
+        # adding a linebreak at top should keep columns same
+        tu = get_tu('\n' + baseInput)
+        one = get_cursor(tu, 'one')
+        two = get_cursor(tu, 'two')
 
-    assert_location(one.location,line=2,column=5,offset=5)
-    assert_location(two.location,line=3,column=5,offset=14)
+        self.assertIsNotNone(one)
+        self.assertIsNotNone(two)
 
-    # adding a space should affect column on first line only
-    tu = get_tu(' ' + baseInput)
-    one = get_cursor(tu, 'one')
-    two = get_cursor(tu, 'two')
+        self.assert_location(one.location,line=2,column=5,offset=5)
+        self.assert_location(two.location,line=3,column=5,offset=14)
 
-    assert_location(one.location,line=1,column=6,offset=5)
-    assert_location(two.location,line=2,column=5,offset=14)
+        # adding a space should affect column on first line only
+        tu = get_tu(' ' + baseInput)
+        one = get_cursor(tu, 'one')
+        two = get_cursor(tu, 'two')
 
-    # define the expected location ourselves and see if it matches
-    # the returned location
-    tu = get_tu(baseInput)
+        self.assert_location(one.location,line=1,column=6,offset=5)
+        self.assert_location(two.location,line=2,column=5,offset=14)
 
-    file = File.from_name(tu, 't.c')
-    location = SourceLocation.from_position(tu, file, 1, 5)
-    cursor = Cursor.from_location(tu, location)
+        # define the expected location ourselves and see if it matches
+        # the returned location
+        tu = get_tu(baseInput)
 
-    one = get_cursor(tu, 'one')
-    assert one is not None
-    assert one == cursor
+        file = File.from_name(tu, 't.c')
+        location = SourceLocation.from_position(tu, file, 1, 5)
+        cursor = Cursor.from_location(tu, location)
 
-    # Ensure locations referring to the same entity are equivalent.
-    location2 = SourceLocation.from_position(tu, file, 1, 5)
-    assert location == location2
-    location3 = SourceLocation.from_position(tu, file, 1, 4)
-    assert location2 != location3
+        one = get_cursor(tu, 'one')
+        self.assertIsNotNone(one)
+        self.assertEqual(one, cursor)
 
-    offset_location = SourceLocation.from_offset(tu, file, 5)
-    cursor = Cursor.from_location(tu, offset_location)
-    verified = False
-    for n in [n for n in tu.cursor.get_children() if n.spelling == 'one']:
-        assert n == cursor
-        verified = True
+        # Ensure locations referring to the same entity are equivalent.
+        location2 = SourceLocation.from_position(tu, file, 1, 5)
+        self.assertEqual(location, location2)
+        location3 = SourceLocation.from_position(tu, file, 1, 4)
+        self.assertNotEqual(location2, location3)
 
-    assert verified
+        offset_location = SourceLocation.from_offset(tu, file, 5)
+        cursor = Cursor.from_location(tu, offset_location)
+        verified = False
+        for n in [n for n in tu.cursor.get_children() if n.spelling == 'one']:
+            self.assertEqual(n, cursor)
+            verified = True
 
-def test_extent():
-    tu = get_tu(baseInput)
-    one = get_cursor(tu, 'one')
-    two = get_cursor(tu, 'two')
+        self.assertTrue(verified)
 
-    assert_location(one.extent.start,line=1,column=1,offset=0)
-    assert_location(one.extent.end,line=1,column=8,offset=7)
-    assert baseInput[one.extent.start.offset:one.extent.end.offset] == "int one"
+    def test_extent(self):
+        tu = get_tu(baseInput)
+        one = get_cursor(tu, 'one')
+        two = get_cursor(tu, 'two')
 
-    assert_location(two.extent.start,line=2,column=1,offset=9)
-    assert_location(two.extent.end,line=2,column=8,offset=16)
-    assert baseInput[two.extent.start.offset:two.extent.end.offset] == "int two"
+        self.assert_location(one.extent.start,line=1,column=1,offset=0)
+        self.assert_location(one.extent.end,line=1,column=8,offset=7)
+        self.assertEqual(baseInput[one.extent.start.offset:one.extent.end.offset], "int one")
 
-    file = File.from_name(tu, 't.c')
-    location1 = SourceLocation.from_position(tu, file, 1, 1)
-    location2 = SourceLocation.from_position(tu, file, 1, 8)
+        self.assert_location(two.extent.start,line=2,column=1,offset=9)
+        self.assert_location(two.extent.end,line=2,column=8,offset=16)
+        self.assertEqual(baseInput[two.extent.start.offset:two.extent.end.offset], "int two")
 
-    range1 = SourceRange.from_locations(location1, location2)
-    range2 = SourceRange.from_locations(location1, location2)
-    assert range1 == range2
+        file = File.from_name(tu, 't.c')
+        location1 = SourceLocation.from_position(tu, file, 1, 1)
+        location2 = SourceLocation.from_position(tu, file, 1, 8)
 
-    location3 = SourceLocation.from_position(tu, file, 1, 6)
-    range3 = SourceRange.from_locations(location1, location3)
-    assert range1 != range3
+        range1 = SourceRange.from_locations(location1, location2)
+        range2 = SourceRange.from_locations(location1, location2)
+        self.assertEqual(range1, range2)
+
+        location3 = SourceLocation.from_position(tu, file, 1, 6)
+        range3 = SourceRange.from_locations(location1, location3)
+        self.assertNotEqual(range1, range3)
diff --git a/bindings/python/tests/cindex/test_tls_kind.py b/bindings/python/tests/cindex/test_tls_kind.py
index 6a03c0d..fbc3418 100644
--- a/bindings/python/tests/cindex/test_tls_kind.py
+++ b/bindings/python/tests/cindex/test_tls_kind.py
@@ -1,4 +1,3 @@
-
 from clang.cindex import TLSKind
 from clang.cindex import Cursor
 from clang.cindex import TranslationUnit
@@ -6,32 +5,45 @@
 from .util import get_cursor
 from .util import get_tu
 
-def test_tls_kind():
-    """Ensure that thread-local storage kinds are available on cursors."""
+import unittest
 
-    tu = get_tu("""
+
+class TestTLSKind(unittest.TestCase):
+    def test_tls_kind(self):
+        """Ensure that thread-local storage kinds are available on cursors."""
+
+        tu = get_tu("""
 int tls_none;
 thread_local int tls_dynamic;
 _Thread_local int tls_static;
 """, lang = 'cpp')
 
-    tls_none = get_cursor(tu.cursor, 'tls_none')
-    assert tls_none.tls_kind == TLSKind.NONE;
+        tls_none = get_cursor(tu.cursor, 'tls_none')
+        self.assertEqual(tls_none.tls_kind, TLSKind.NONE)
 
-    tls_dynamic = get_cursor(tu.cursor, 'tls_dynamic')
-    assert tls_dynamic.tls_kind == TLSKind.DYNAMIC
+        tls_dynamic = get_cursor(tu.cursor, 'tls_dynamic')
+        self.assertEqual(tls_dynamic.tls_kind, TLSKind.DYNAMIC)
 
-    tls_static = get_cursor(tu.cursor, 'tls_static')
-    assert tls_static.tls_kind == TLSKind.STATIC
+        tls_static = get_cursor(tu.cursor, 'tls_static')
+        self.assertEqual(tls_static.tls_kind, TLSKind.STATIC)
 
-    # The following case tests '__declspec(thread)'.  Since it is a Microsoft
-    # specific extension, specific flags are required for the parser to pick
-    # these up.
-    flags = ['-fms-extensions', '-target', 'x86_64-unknown-windows-win32']
-    tu = get_tu("""
-__declspec(thread) int tls_declspec;
+        # The following case tests '__declspec(thread)'.  Since it is a Microsoft
+        # specific extension, specific flags are required for the parser to pick
+        # these up.
+        flags = ['-fms-extensions', '-target', 'x86_64-unknown-windows-win32',
+                 '-fms-compatibility-version=18']
+        tu = get_tu("""
+__declspec(thread) int tls_declspec_msvc18;
 """, lang = 'cpp', flags=flags)
 
-    tls_declspec = get_cursor(tu.cursor, 'tls_declspec')
-    assert tls_declspec.tls_kind == TLSKind.STATIC
+        tls_declspec_msvc18 = get_cursor(tu.cursor, 'tls_declspec_msvc18')
+        self.assertEqual(tls_declspec_msvc18.tls_kind, TLSKind.STATIC)
 
+        flags = ['-fms-extensions', '-target', 'x86_64-unknown-windows-win32',
+                 '-fms-compatibility-version=19']
+        tu = get_tu("""
+__declspec(thread) int tls_declspec_msvc19;
+""", lang = 'cpp', flags=flags)
+
+        tls_declspec_msvc19 = get_cursor(tu.cursor, 'tls_declspec_msvc19')
+        self.assertEqual(tls_declspec_msvc19.tls_kind, TLSKind.DYNAMIC)
diff --git a/bindings/python/tests/cindex/test_token_kind.py b/bindings/python/tests/cindex/test_token_kind.py
index 62ec63e..700f95a 100644
--- a/bindings/python/tests/cindex/test_token_kind.py
+++ b/bindings/python/tests/cindex/test_token_kind.py
@@ -1,43 +1,44 @@
 from clang.cindex import TokenKind
-from nose.tools import eq_
-from nose.tools import ok_
-from nose.tools import raises
 
-def test_constructor():
-    """Ensure TokenKind constructor works as expected."""
+import unittest
 
-    t = TokenKind(5, 'foo')
 
-    eq_(t.value, 5)
-    eq_(t.name, 'foo')
+class TestTokenKind(unittest.TestCase):
+    def test_constructor(self):
+        """Ensure TokenKind constructor works as expected."""
 
-@raises(ValueError)
-def test_bad_register():
-    """Ensure a duplicate value is rejected for registration."""
+        t = TokenKind(5, 'foo')
 
-    TokenKind.register(2, 'foo')
+        self.assertEqual(t.value, 5)
+        self.assertEqual(t.name, 'foo')
 
-@raises(ValueError)
-def test_unknown_value():
-    """Ensure trying to fetch an unknown value raises."""
+    def test_bad_register(self):
+        """Ensure a duplicate value is rejected for registration."""
 
-    TokenKind.from_value(-1)
+        with self.assertRaises(ValueError):
+            TokenKind.register(2, 'foo')
 
-def test_registration():
-    """Ensure that items registered appear as class attributes."""
-    ok_(hasattr(TokenKind, 'LITERAL'))
-    literal = TokenKind.LITERAL
+    def test_unknown_value(self):
+        """Ensure trying to fetch an unknown value raises."""
 
-    ok_(isinstance(literal, TokenKind))
+        with self.assertRaises(ValueError):
+            TokenKind.from_value(-1)
 
-def test_from_value():
-    """Ensure registered values can be obtained from from_value()."""
-    t = TokenKind.from_value(3)
-    ok_(isinstance(t, TokenKind))
-    eq_(t, TokenKind.LITERAL)
+    def test_registration(self):
+        """Ensure that items registered appear as class attributes."""
+        self.assertTrue(hasattr(TokenKind, 'LITERAL'))
+        literal = TokenKind.LITERAL
 
-def test_repr():
-    """Ensure repr() works."""
+        self.assertIsInstance(literal, TokenKind)
 
-    r = repr(TokenKind.LITERAL)
-    eq_(r, 'TokenKind.LITERAL')
+    def test_from_value(self):
+        """Ensure registered values can be obtained from from_value()."""
+        t = TokenKind.from_value(3)
+        self.assertIsInstance(t, TokenKind)
+        self.assertEqual(t, TokenKind.LITERAL)
+
+    def test_repr(self):
+        """Ensure repr() works."""
+
+        r = repr(TokenKind.LITERAL)
+        self.assertEqual(r, 'TokenKind.LITERAL')
diff --git a/bindings/python/tests/cindex/test_tokens.py b/bindings/python/tests/cindex/test_tokens.py
index 688b5c1..c93353d 100644
--- a/bindings/python/tests/cindex/test_tokens.py
+++ b/bindings/python/tests/cindex/test_tokens.py
@@ -3,50 +3,52 @@
 from clang.cindex import SourceLocation
 from clang.cindex import SourceRange
 from clang.cindex import TokenKind
-from nose.tools import eq_
-from nose.tools import ok_
 
 from .util import get_tu
 
-def test_token_to_cursor():
-    """Ensure we can obtain a Cursor from a Token instance."""
-    tu = get_tu('int i = 5;')
-    r = tu.get_extent('t.c', (0, 9))
-    tokens = list(tu.get_tokens(extent=r))
+import unittest
 
-    assert len(tokens) == 4
-    assert tokens[1].spelling == 'i'
-    assert tokens[1].kind == TokenKind.IDENTIFIER
 
-    cursor = tokens[1].cursor
-    assert cursor.kind == CursorKind.VAR_DECL
-    assert tokens[1].cursor == tokens[2].cursor
+class TestTokens(unittest.TestCase):
+    def test_token_to_cursor(self):
+        """Ensure we can obtain a Cursor from a Token instance."""
+        tu = get_tu('int i = 5;')
+        r = tu.get_extent('t.c', (0, 9))
+        tokens = list(tu.get_tokens(extent=r))
 
-def test_token_location():
-    """Ensure Token.location works."""
+        self.assertEqual(len(tokens), 4)
+        self.assertEqual(tokens[1].spelling, 'i')
+        self.assertEqual(tokens[1].kind, TokenKind.IDENTIFIER)
 
-    tu = get_tu('int foo = 10;')
-    r = tu.get_extent('t.c', (0, 11))
+        cursor = tokens[1].cursor
+        self.assertEqual(cursor.kind, CursorKind.VAR_DECL)
+        self.assertEqual(tokens[1].cursor, tokens[2].cursor)
 
-    tokens = list(tu.get_tokens(extent=r))
-    eq_(len(tokens), 4)
+    def test_token_location(self):
+        """Ensure Token.location works."""
 
-    loc = tokens[1].location
-    ok_(isinstance(loc, SourceLocation))
-    eq_(loc.line, 1)
-    eq_(loc.column, 5)
-    eq_(loc.offset, 4)
+        tu = get_tu('int foo = 10;')
+        r = tu.get_extent('t.c', (0, 11))
 
-def test_token_extent():
-    """Ensure Token.extent works."""
-    tu = get_tu('int foo = 10;')
-    r = tu.get_extent('t.c', (0, 11))
+        tokens = list(tu.get_tokens(extent=r))
+        self.assertEqual(len(tokens), 4)
 
-    tokens = list(tu.get_tokens(extent=r))
-    eq_(len(tokens), 4)
+        loc = tokens[1].location
+        self.assertIsInstance(loc, SourceLocation)
+        self.assertEqual(loc.line, 1)
+        self.assertEqual(loc.column, 5)
+        self.assertEqual(loc.offset, 4)
 
-    extent = tokens[1].extent
-    ok_(isinstance(extent, SourceRange))
+    def test_token_extent(self):
+        """Ensure Token.extent works."""
+        tu = get_tu('int foo = 10;')
+        r = tu.get_extent('t.c', (0, 11))
 
-    eq_(extent.start.offset, 4)
-    eq_(extent.end.offset, 7)
+        tokens = list(tu.get_tokens(extent=r))
+        self.assertEqual(len(tokens), 4)
+
+        extent = tokens[1].extent
+        self.assertIsInstance(extent, SourceRange)
+
+        self.assertEqual(extent.start.offset, 4)
+        self.assertEqual(extent.end.offset, 7)
diff --git a/bindings/python/tests/cindex/test_translation_unit.py b/bindings/python/tests/cindex/test_translation_unit.py
index 65d1ee0..1b3973d 100644
--- a/bindings/python/tests/cindex/test_translation_unit.py
+++ b/bindings/python/tests/cindex/test_translation_unit.py
@@ -1,6 +1,7 @@
 import gc
 import os
 import tempfile
+import unittest
 
 from clang.cindex import CursorKind
 from clang.cindex import Cursor
@@ -14,83 +15,9 @@
 from .util import get_cursor
 from .util import get_tu
 
+
 kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS')
 
-def test_spelling():
-    path = os.path.join(kInputsDir, 'hello.cpp')
-    tu = TranslationUnit.from_source(path)
-    assert tu.spelling == path
-
-def test_cursor():
-    path = os.path.join(kInputsDir, 'hello.cpp')
-    tu = get_tu(path)
-    c = tu.cursor
-    assert isinstance(c, Cursor)
-    assert c.kind is CursorKind.TRANSLATION_UNIT
-
-def test_parse_arguments():
-    path = os.path.join(kInputsDir, 'parse_arguments.c')
-    tu = TranslationUnit.from_source(path, ['-DDECL_ONE=hello', '-DDECL_TWO=hi'])
-    spellings = [c.spelling for c in tu.cursor.get_children()]
-    assert spellings[-2] == 'hello'
-    assert spellings[-1] == 'hi'
-
-def test_reparse_arguments():
-    path = os.path.join(kInputsDir, 'parse_arguments.c')
-    tu = TranslationUnit.from_source(path, ['-DDECL_ONE=hello', '-DDECL_TWO=hi'])
-    tu.reparse()
-    spellings = [c.spelling for c in tu.cursor.get_children()]
-    assert spellings[-2] == 'hello'
-    assert spellings[-1] == 'hi'
-
-def test_unsaved_files():
-    tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
-            ('fake.c', """
-#include "fake.h"
-int x;
-int SOME_DEFINE;
-"""),
-            ('./fake.h', """
-#define SOME_DEFINE y
-""")
-            ])
-    spellings = [c.spelling for c in tu.cursor.get_children()]
-    assert spellings[-2] == 'x'
-    assert spellings[-1] == 'y'
-
-def test_unsaved_files_2():
-    try:
-        from StringIO import StringIO
-    except:
-        from io import StringIO
-    tu = TranslationUnit.from_source('fake.c', unsaved_files = [
-            ('fake.c', StringIO('int x;'))])
-    spellings = [c.spelling for c in tu.cursor.get_children()]
-    assert spellings[-1] == 'x'
-
-def normpaths_equal(path1, path2):
-    """ Compares two paths for equality after normalizing them with
-        os.path.normpath
-    """
-    return os.path.normpath(path1) == os.path.normpath(path2)
-
-def test_includes():
-    def eq(expected, actual):
-        if not actual.is_input_file:
-            return  normpaths_equal(expected[0], actual.source.name) and \
-                    normpaths_equal(expected[1], actual.include.name)
-        else:
-            return normpaths_equal(expected[1], actual.include.name)
-
-    src = os.path.join(kInputsDir, 'include.cpp')
-    h1 = os.path.join(kInputsDir, "header1.h")
-    h2 = os.path.join(kInputsDir, "header2.h")
-    h3 = os.path.join(kInputsDir, "header3.h")
-    inc = [(src, h1), (h1, h3), (src, h2), (h2, h3)]
-
-    tu = TranslationUnit.from_source(src)
-    for i in zip(inc, tu.get_includes()):
-        assert eq(i[0], i[1])
 
 def save_tu(tu):
     """Convenience API to save a TranslationUnit to a file.
@@ -102,153 +29,227 @@
 
     return path
 
-def test_save():
-    """Ensure TranslationUnit.save() works."""
 
-    tu = get_tu('int foo();')
-
-    path = save_tu(tu)
-    assert os.path.exists(path)
-    assert os.path.getsize(path) > 0
-    os.unlink(path)
-
-def test_save_translation_errors():
-    """Ensure that saving to an invalid directory raises."""
-
-    tu = get_tu('int foo();')
-
-    path = '/does/not/exist/llvm-test.ast'
-    assert not os.path.exists(os.path.dirname(path))
-
-    try:
-        tu.save(path)
-        assert False
-    except TranslationUnitSaveError as ex:
-        expected = TranslationUnitSaveError.ERROR_UNKNOWN
-        assert ex.save_error == expected
-
-def test_load():
-    """Ensure TranslationUnits can be constructed from saved files."""
-
-    tu = get_tu('int foo();')
-    assert len(tu.diagnostics) == 0
-    path = save_tu(tu)
-
-    assert os.path.exists(path)
-    assert os.path.getsize(path) > 0
-
-    tu2 = TranslationUnit.from_ast_file(filename=path)
-    assert len(tu2.diagnostics) == 0
-
-    foo = get_cursor(tu2, 'foo')
-    assert foo is not None
-
-    # Just in case there is an open file descriptor somewhere.
-    del tu2
-
-    os.unlink(path)
-
-def test_index_parse():
-    path = os.path.join(kInputsDir, 'hello.cpp')
-    index = Index.create()
-    tu = index.parse(path)
-    assert isinstance(tu, TranslationUnit)
-
-def test_get_file():
-    """Ensure tu.get_file() works appropriately."""
-
-    tu = get_tu('int foo();')
-
-    f = tu.get_file('t.c')
-    assert isinstance(f, File)
-    assert f.name == 't.c'
-
-    try:
-        f = tu.get_file('foobar.cpp')
-    except:
-        pass
-    else:
-        assert False
-
-def test_get_source_location():
-    """Ensure tu.get_source_location() works."""
-
-    tu = get_tu('int foo();')
-
-    location = tu.get_location('t.c', 2)
-    assert isinstance(location, SourceLocation)
-    assert location.offset == 2
-    assert location.file.name == 't.c'
-
-    location = tu.get_location('t.c', (1, 3))
-    assert isinstance(location, SourceLocation)
-    assert location.line == 1
-    assert location.column == 3
-    assert location.file.name == 't.c'
-
-def test_get_source_range():
-    """Ensure tu.get_source_range() works."""
-
-    tu = get_tu('int foo();')
-
-    r = tu.get_extent('t.c', (1,4))
-    assert isinstance(r, SourceRange)
-    assert r.start.offset == 1
-    assert r.end.offset == 4
-    assert r.start.file.name == 't.c'
-    assert r.end.file.name == 't.c'
-
-    r = tu.get_extent('t.c', ((1,2), (1,3)))
-    assert isinstance(r, SourceRange)
-    assert r.start.line == 1
-    assert r.start.column == 2
-    assert r.end.line == 1
-    assert r.end.column == 3
-    assert r.start.file.name == 't.c'
-    assert r.end.file.name == 't.c'
-
-    start = tu.get_location('t.c', 0)
-    end = tu.get_location('t.c', 5)
-
-    r = tu.get_extent('t.c', (start, end))
-    assert isinstance(r, SourceRange)
-    assert r.start.offset == 0
-    assert r.end.offset == 5
-    assert r.start.file.name == 't.c'
-    assert r.end.file.name == 't.c'
-
-def test_get_tokens_gc():
-    """Ensures get_tokens() works properly with garbage collection."""
-
-    tu = get_tu('int foo();')
-    r = tu.get_extent('t.c', (0, 10))
-    tokens = list(tu.get_tokens(extent=r))
-
-    assert tokens[0].spelling == 'int'
-    gc.collect()
-    assert tokens[0].spelling == 'int'
-
-    del tokens[1]
-    gc.collect()
-    assert tokens[0].spelling == 'int'
-
-    # May trigger segfault if we don't do our job properly.
-    del tokens
-    gc.collect()
-    gc.collect() # Just in case.
-
-def test_fail_from_source():
-    path = os.path.join(kInputsDir, 'non-existent.cpp')
-    try:
+class TestTranslationUnit(unittest.TestCase):
+    def test_spelling(self):
+        path = os.path.join(kInputsDir, 'hello.cpp')
         tu = TranslationUnit.from_source(path)
-    except TranslationUnitLoadError:
-        tu = None
-    assert tu == None
+        self.assertEqual(tu.spelling, path)
 
-def test_fail_from_ast_file():
-    path = os.path.join(kInputsDir, 'non-existent.ast')
-    try:
-        tu = TranslationUnit.from_ast_file(path)
-    except TranslationUnitLoadError:
-        tu = None
-    assert tu == None
+    def test_cursor(self):
+        path = os.path.join(kInputsDir, 'hello.cpp')
+        tu = get_tu(path)
+        c = tu.cursor
+        self.assertIsInstance(c, Cursor)
+        self.assertIs(c.kind, CursorKind.TRANSLATION_UNIT)
+
+    def test_parse_arguments(self):
+        path = os.path.join(kInputsDir, 'parse_arguments.c')
+        tu = TranslationUnit.from_source(path, ['-DDECL_ONE=hello', '-DDECL_TWO=hi'])
+        spellings = [c.spelling for c in tu.cursor.get_children()]
+        self.assertEqual(spellings[-2], 'hello')
+        self.assertEqual(spellings[-1], 'hi')
+
+    def test_reparse_arguments(self):
+        path = os.path.join(kInputsDir, 'parse_arguments.c')
+        tu = TranslationUnit.from_source(path, ['-DDECL_ONE=hello', '-DDECL_TWO=hi'])
+        tu.reparse()
+        spellings = [c.spelling for c in tu.cursor.get_children()]
+        self.assertEqual(spellings[-2], 'hello')
+        self.assertEqual(spellings[-1], 'hi')
+
+    def test_unsaved_files(self):
+        tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+                ('fake.c', """
+#include "fake.h"
+int x;
+int SOME_DEFINE;
+"""),
+                ('./fake.h', """
+#define SOME_DEFINE y
+""")
+                ])
+        spellings = [c.spelling for c in tu.cursor.get_children()]
+        self.assertEqual(spellings[-2], 'x')
+        self.assertEqual(spellings[-1], 'y')
+
+    def test_unsaved_files_2(self):
+        try:
+            from StringIO import StringIO
+        except:
+            from io import StringIO
+        tu = TranslationUnit.from_source('fake.c', unsaved_files = [
+                ('fake.c', StringIO('int x;'))])
+        spellings = [c.spelling for c in tu.cursor.get_children()]
+        self.assertEqual(spellings[-1], 'x')
+
+    def assert_normpaths_equal(self, path1, path2):
+        """ Compares two paths for equality after normalizing them with
+            os.path.normpath
+        """
+        self.assertEqual(os.path.normpath(path1),
+                         os.path.normpath(path2))
+
+    def test_includes(self):
+        def eq(expected, actual):
+            if not actual.is_input_file:
+                self.assert_normpaths_equal(expected[0], actual.source.name)
+                self.assert_normpaths_equal(expected[1], actual.include.name)
+            else:
+                self.assert_normpaths_equal(expected[1], actual.include.name)
+
+        src = os.path.join(kInputsDir, 'include.cpp')
+        h1 = os.path.join(kInputsDir, "header1.h")
+        h2 = os.path.join(kInputsDir, "header2.h")
+        h3 = os.path.join(kInputsDir, "header3.h")
+        inc = [(src, h1), (h1, h3), (src, h2), (h2, h3)]
+
+        tu = TranslationUnit.from_source(src)
+        for i in zip(inc, tu.get_includes()):
+            eq(i[0], i[1])
+
+    def test_save(self):
+        """Ensure TranslationUnit.save() works."""
+
+        tu = get_tu('int foo();')
+
+        path = save_tu(tu)
+        self.assertTrue(os.path.exists(path))
+        self.assertGreater(os.path.getsize(path), 0)
+        os.unlink(path)
+
+    def test_save_translation_errors(self):
+        """Ensure that saving to an invalid directory raises."""
+
+        tu = get_tu('int foo();')
+
+        path = '/does/not/exist/llvm-test.ast'
+        self.assertFalse(os.path.exists(os.path.dirname(path)))
+
+        with self.assertRaises(TranslationUnitSaveError) as cm:
+            tu.save(path)
+        ex = cm.exception
+        expected = TranslationUnitSaveError.ERROR_UNKNOWN
+        self.assertEqual(ex.save_error, expected)
+
+    def test_load(self):
+        """Ensure TranslationUnits can be constructed from saved files."""
+
+        tu = get_tu('int foo();')
+        self.assertEqual(len(tu.diagnostics), 0)
+        path = save_tu(tu)
+
+        self.assertTrue(os.path.exists(path))
+        self.assertGreater(os.path.getsize(path), 0)
+
+        tu2 = TranslationUnit.from_ast_file(filename=path)
+        self.assertEqual(len(tu2.diagnostics), 0)
+
+        foo = get_cursor(tu2, 'foo')
+        self.assertIsNotNone(foo)
+
+        # Just in case there is an open file descriptor somewhere.
+        del tu2
+
+        os.unlink(path)
+
+    def test_index_parse(self):
+        path = os.path.join(kInputsDir, 'hello.cpp')
+        index = Index.create()
+        tu = index.parse(path)
+        self.assertIsInstance(tu, TranslationUnit)
+
+    def test_get_file(self):
+        """Ensure tu.get_file() works appropriately."""
+
+        tu = get_tu('int foo();')
+
+        f = tu.get_file('t.c')
+        self.assertIsInstance(f, File)
+        self.assertEqual(f.name, 't.c')
+
+        with self.assertRaises(Exception):
+            f = tu.get_file('foobar.cpp')
+
+    def test_get_source_location(self):
+        """Ensure tu.get_source_location() works."""
+
+        tu = get_tu('int foo();')
+
+        location = tu.get_location('t.c', 2)
+        self.assertIsInstance(location, SourceLocation)
+        self.assertEqual(location.offset, 2)
+        self.assertEqual(location.file.name, 't.c')
+
+        location = tu.get_location('t.c', (1, 3))
+        self.assertIsInstance(location, SourceLocation)
+        self.assertEqual(location.line, 1)
+        self.assertEqual(location.column, 3)
+        self.assertEqual(location.file.name, 't.c')
+
+    def test_get_source_range(self):
+        """Ensure tu.get_source_range() works."""
+
+        tu = get_tu('int foo();')
+
+        r = tu.get_extent('t.c', (1,4))
+        self.assertIsInstance(r, SourceRange)
+        self.assertEqual(r.start.offset, 1)
+        self.assertEqual(r.end.offset, 4)
+        self.assertEqual(r.start.file.name, 't.c')
+        self.assertEqual(r.end.file.name, 't.c')
+
+        r = tu.get_extent('t.c', ((1,2), (1,3)))
+        self.assertIsInstance(r, SourceRange)
+        self.assertEqual(r.start.line, 1)
+        self.assertEqual(r.start.column, 2)
+        self.assertEqual(r.end.line, 1)
+        self.assertEqual(r.end.column, 3)
+        self.assertEqual(r.start.file.name, 't.c')
+        self.assertEqual(r.end.file.name, 't.c')
+
+        start = tu.get_location('t.c', 0)
+        end = tu.get_location('t.c', 5)
+
+        r = tu.get_extent('t.c', (start, end))
+        self.assertIsInstance(r, SourceRange)
+        self.assertEqual(r.start.offset, 0)
+        self.assertEqual(r.end.offset, 5)
+        self.assertEqual(r.start.file.name, 't.c')
+        self.assertEqual(r.end.file.name, 't.c')
+
+    def test_get_tokens_gc(self):
+        """Ensures get_tokens() works properly with garbage collection."""
+
+        tu = get_tu('int foo();')
+        r = tu.get_extent('t.c', (0, 10))
+        tokens = list(tu.get_tokens(extent=r))
+
+        self.assertEqual(tokens[0].spelling, 'int')
+        gc.collect()
+        self.assertEqual(tokens[0].spelling, 'int')
+
+        del tokens[1]
+        gc.collect()
+        self.assertEqual(tokens[0].spelling, 'int')
+
+        # May trigger segfault if we don't do our job properly.
+        del tokens
+        gc.collect()
+        gc.collect() # Just in case.
+
+    def test_fail_from_source(self):
+        path = os.path.join(kInputsDir, 'non-existent.cpp')
+        try:
+            tu = TranslationUnit.from_source(path)
+        except TranslationUnitLoadError:
+            tu = None
+        self.assertEqual(tu, None)
+
+    def test_fail_from_ast_file(self):
+        path = os.path.join(kInputsDir, 'non-existent.ast')
+        try:
+            tu = TranslationUnit.from_ast_file(path)
+        except TranslationUnitLoadError:
+            tu = None
+        self.assertEqual(tu, None)
diff --git a/bindings/python/tests/cindex/test_type.py b/bindings/python/tests/cindex/test_type.py
index 6ee0773..4dec058 100644
--- a/bindings/python/tests/cindex/test_type.py
+++ b/bindings/python/tests/cindex/test_type.py
@@ -1,12 +1,13 @@
 import gc
+import unittest
 
 from clang.cindex import CursorKind
 from clang.cindex import TranslationUnit
 from clang.cindex import TypeKind
-from nose.tools import raises
 from .util import get_cursor
 from .util import get_tu
 
+
 kInput = """\
 
 typedef int I;
@@ -24,400 +25,414 @@
 
 """
 
-def test_a_struct():
-    tu = get_tu(kInput)
-
-    teststruct = get_cursor(tu, 'teststruct')
-    assert teststruct is not None, "Could not find teststruct."
-    fields = list(teststruct.get_children())
-    assert all(x.kind == CursorKind.FIELD_DECL for x in fields)
-    assert all(x.translation_unit is not None for x in fields)
-
-    assert fields[0].spelling == 'a'
-    assert not fields[0].type.is_const_qualified()
-    assert fields[0].type.kind == TypeKind.INT
-    assert fields[0].type.get_canonical().kind == TypeKind.INT
-    assert fields[0].type.get_typedef_name() == ''
-
-    assert fields[1].spelling == 'b'
-    assert not fields[1].type.is_const_qualified()
-    assert fields[1].type.kind == TypeKind.TYPEDEF
-    assert fields[1].type.get_canonical().kind == TypeKind.INT
-    assert fields[1].type.get_declaration().spelling == 'I'
-    assert fields[1].type.get_typedef_name() == 'I'
-
-    assert fields[2].spelling == 'c'
-    assert not fields[2].type.is_const_qualified()
-    assert fields[2].type.kind == TypeKind.LONG
-    assert fields[2].type.get_canonical().kind == TypeKind.LONG
-    assert fields[2].type.get_typedef_name() == ''
-
-    assert fields[3].spelling == 'd'
-    assert not fields[3].type.is_const_qualified()
-    assert fields[3].type.kind == TypeKind.ULONG
-    assert fields[3].type.get_canonical().kind == TypeKind.ULONG
-    assert fields[3].type.get_typedef_name() == ''
-
-    assert fields[4].spelling == 'e'
-    assert not fields[4].type.is_const_qualified()
-    assert fields[4].type.kind == TypeKind.LONG
-    assert fields[4].type.get_canonical().kind == TypeKind.LONG
-    assert fields[4].type.get_typedef_name() == ''
-
-    assert fields[5].spelling == 'f'
-    assert fields[5].type.is_const_qualified()
-    assert fields[5].type.kind == TypeKind.INT
-    assert fields[5].type.get_canonical().kind == TypeKind.INT
-    assert fields[5].type.get_typedef_name() == ''
-
-    assert fields[6].spelling == 'g'
-    assert not fields[6].type.is_const_qualified()
-    assert fields[6].type.kind == TypeKind.POINTER
-    assert fields[6].type.get_pointee().kind == TypeKind.INT
-    assert fields[6].type.get_typedef_name() == ''
-
-    assert fields[7].spelling == 'h'
-    assert not fields[7].type.is_const_qualified()
-    assert fields[7].type.kind == TypeKind.POINTER
-    assert fields[7].type.get_pointee().kind == TypeKind.POINTER
-    assert fields[7].type.get_pointee().get_pointee().kind == TypeKind.POINTER
-    assert fields[7].type.get_pointee().get_pointee().get_pointee().kind == TypeKind.INT
-    assert fields[7].type.get_typedef_name() == ''
-
-def test_references():
-    """Ensure that a Type maintains a reference to a TranslationUnit."""
-
-    tu = get_tu('int x;')
-    children = list(tu.cursor.get_children())
-    assert len(children) > 0
-
-    cursor = children[0]
-    t = cursor.type
-
-    assert isinstance(t.translation_unit, TranslationUnit)
-
-    # Delete main TranslationUnit reference and force a GC.
-    del tu
-    gc.collect()
-    assert isinstance(t.translation_unit, TranslationUnit)
-
-    # If the TU was destroyed, this should cause a segfault.
-    decl = t.get_declaration()
 
 constarrayInput="""
 struct teststruct {
   void *A[2];
 };
 """
-def testConstantArray():
-    tu = get_tu(constarrayInput)
 
-    teststruct = get_cursor(tu, 'teststruct')
-    assert teststruct is not None, "Didn't find teststruct??"
-    fields = list(teststruct.get_children())
-    assert fields[0].spelling == 'A'
-    assert fields[0].type.kind == TypeKind.CONSTANTARRAY
-    assert fields[0].type.get_array_element_type() is not None
-    assert fields[0].type.get_array_element_type().kind == TypeKind.POINTER
-    assert fields[0].type.get_array_size() == 2
 
-def test_equal():
-    """Ensure equivalence operators work on Type."""
-    source = 'int a; int b; void *v;'
-    tu = get_tu(source)
+class TestType(unittest.TestCase):
+    def test_a_struct(self):
+        tu = get_tu(kInput)
 
-    a = get_cursor(tu, 'a')
-    b = get_cursor(tu, 'b')
-    v = get_cursor(tu, 'v')
-
-    assert a is not None
-    assert b is not None
-    assert v is not None
-
-    assert a.type == b.type
-    assert a.type != v.type
-
-    assert a.type != None
-    assert a.type != 'foo'
-
-def test_type_spelling():
-    """Ensure Type.spelling works."""
-    tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
-    c = get_cursor(tu, 'c')
-    i = get_cursor(tu, 'i')
-    x = get_cursor(tu, 'x')
-    v = get_cursor(tu, 'v')
-    assert c is not None
-    assert i is not None
-    assert x is not None
-    assert v is not None
-    assert c.type.spelling == "int [5]"
-    assert i.type.spelling == "int []"
-    assert x.type.spelling == "int"
-    assert v.type.spelling == "int [x]"
-
-def test_typekind_spelling():
-    """Ensure TypeKind.spelling works."""
-    tu = get_tu('int a;')
-    a = get_cursor(tu, 'a')
-
-    assert a is not None
-    assert a.type.kind.spelling == 'Int'
-
-def test_function_argument_types():
-    """Ensure that Type.argument_types() works as expected."""
-    tu = get_tu('void f(int, int);')
-    f = get_cursor(tu, 'f')
-    assert f is not None
-
-    args = f.type.argument_types()
-    assert args is not None
-    assert len(args) == 2
-
-    t0 = args[0]
-    assert t0 is not None
-    assert t0.kind == TypeKind.INT
-
-    t1 = args[1]
-    assert t1 is not None
-    assert t1.kind == TypeKind.INT
-
-    args2 = list(args)
-    assert len(args2) == 2
-    assert t0 == args2[0]
-    assert t1 == args2[1]
-
-@raises(TypeError)
-def test_argument_types_string_key():
-    """Ensure that non-int keys raise a TypeError."""
-    tu = get_tu('void f(int, int);')
-    f = get_cursor(tu, 'f')
-    assert f is not None
-
-    args = f.type.argument_types()
-    assert len(args) == 2
-
-    args['foo']
-
-@raises(IndexError)
-def test_argument_types_negative_index():
-    """Ensure that negative indexes on argument_types Raises an IndexError."""
-    tu = get_tu('void f(int, int);')
-    f = get_cursor(tu, 'f')
-    args = f.type.argument_types()
-
-    args[-1]
-
-@raises(IndexError)
-def test_argument_types_overflow_index():
-    """Ensure that indexes beyond the length of Type.argument_types() raise."""
-    tu = get_tu('void f(int, int);')
-    f = get_cursor(tu, 'f')
-    args = f.type.argument_types()
-
-    args[2]
-
-@raises(Exception)
-def test_argument_types_invalid_type():
-    """Ensure that obtaining argument_types on a Type without them raises."""
-    tu = get_tu('int i;')
-    i = get_cursor(tu, 'i')
-    assert i is not None
-
-    i.type.argument_types()
-
-def test_is_pod():
-    """Ensure Type.is_pod() works."""
-    tu = get_tu('int i; void f();')
-    i = get_cursor(tu, 'i')
-    f = get_cursor(tu, 'f')
-
-    assert i is not None
-    assert f is not None
-
-    assert i.type.is_pod()
-    assert not f.type.is_pod()
-
-def test_function_variadic():
-    """Ensure Type.is_function_variadic works."""
-
-    source ="""
-#include <stdarg.h>
-
-void foo(int a, ...);
-void bar(int a, int b);
-"""
-
-    tu = get_tu(source)
-    foo = get_cursor(tu, 'foo')
-    bar = get_cursor(tu, 'bar')
-
-    assert foo is not None
-    assert bar is not None
-
-    assert isinstance(foo.type.is_function_variadic(), bool)
-    assert foo.type.is_function_variadic()
-    assert not bar.type.is_function_variadic()
-
-def test_element_type():
-    """Ensure Type.element_type works."""
-    tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
-    c = get_cursor(tu, 'c')
-    i = get_cursor(tu, 'i')
-    v = get_cursor(tu, 'v')
-    assert c is not None
-    assert i is not None
-    assert v is not None
-
-    assert c.type.kind == TypeKind.CONSTANTARRAY
-    assert c.type.element_type.kind == TypeKind.INT
-    assert i.type.kind == TypeKind.INCOMPLETEARRAY
-    assert i.type.element_type.kind == TypeKind.INT
-    assert v.type.kind == TypeKind.VARIABLEARRAY
-    assert v.type.element_type.kind == TypeKind.INT
-
-@raises(Exception)
-def test_invalid_element_type():
-    """Ensure Type.element_type raises if type doesn't have elements."""
-    tu = get_tu('int i;')
-    i = get_cursor(tu, 'i')
-    assert i is not None
-    i.element_type
-
-def test_element_count():
-    """Ensure Type.element_count works."""
-    tu = get_tu('int i[5]; int j;')
-    i = get_cursor(tu, 'i')
-    j = get_cursor(tu, 'j')
-
-    assert i is not None
-    assert j is not None
-
-    assert i.type.element_count == 5
-
-    try:
-        j.type.element_count
-        assert False
-    except:
-        assert True
-
-def test_is_volatile_qualified():
-    """Ensure Type.is_volatile_qualified works."""
-
-    tu = get_tu('volatile int i = 4; int j = 2;')
-
-    i = get_cursor(tu, 'i')
-    j = get_cursor(tu, 'j')
-
-    assert i is not None
-    assert j is not None
-
-    assert isinstance(i.type.is_volatile_qualified(), bool)
-    assert i.type.is_volatile_qualified()
-    assert not j.type.is_volatile_qualified()
-
-def test_is_restrict_qualified():
-    """Ensure Type.is_restrict_qualified works."""
-
-    tu = get_tu('struct s { void * restrict i; void * j; };')
-
-    i = get_cursor(tu, 'i')
-    j = get_cursor(tu, 'j')
-
-    assert i is not None
-    assert j is not None
-
-    assert isinstance(i.type.is_restrict_qualified(), bool)
-    assert i.type.is_restrict_qualified()
-    assert not j.type.is_restrict_qualified()
-
-def test_record_layout():
-    """Ensure Cursor.type.get_size, Cursor.type.get_align and
-    Cursor.type.get_offset works."""
-
-    source ="""
-struct a {
-    long a1;
-    long a2:3;
-    long a3:4;
-    long long a4;
-};
-"""
-    tries=[(['-target','i386-linux-gnu'],(4,16,0,32,35,64)),
-           (['-target','nvptx64-unknown-unknown'],(8,24,0,64,67,128)),
-           (['-target','i386-pc-win32'],(8,16,0,32,35,64)),
-           (['-target','msp430-none-none'],(2,14,0,32,35,48))]
-    for flags, values in tries:
-        align,total,a1,a2,a3,a4 = values
-
-        tu = get_tu(source, flags=flags)
-        teststruct = get_cursor(tu, 'a')
+        teststruct = get_cursor(tu, 'teststruct')
+        self.assertIsNotNone(teststruct, "Could not find teststruct.")
         fields = list(teststruct.get_children())
 
-        assert teststruct.type.get_align() == align
-        assert teststruct.type.get_size() == total
-        assert teststruct.type.get_offset(fields[0].spelling) == a1
-        assert teststruct.type.get_offset(fields[1].spelling) == a2
-        assert teststruct.type.get_offset(fields[2].spelling) == a3
-        assert teststruct.type.get_offset(fields[3].spelling) == a4
-        assert fields[0].is_bitfield() == False
-        assert fields[1].is_bitfield() == True
-        assert fields[1].get_bitfield_width() == 3
-        assert fields[2].is_bitfield() == True
-        assert fields[2].get_bitfield_width() == 4
-        assert fields[3].is_bitfield() == False
+        self.assertEqual(fields[0].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[0].translation_unit)
+        self.assertEqual(fields[0].spelling, 'a')
+        self.assertFalse(fields[0].type.is_const_qualified())
+        self.assertEqual(fields[0].type.kind, TypeKind.INT)
+        self.assertEqual(fields[0].type.get_canonical().kind, TypeKind.INT)
+        self.assertEqual(fields[0].type.get_typedef_name(), '')
 
-def test_offset():
-    """Ensure Cursor.get_record_field_offset works in anonymous records"""
-    source="""
-struct Test {
-  struct {int a;} typeanon;
-  struct {
-    int bariton;
-    union {
-      int foo;
-    };
-  };
-  int bar;
-};"""
-    tries=[(['-target','i386-linux-gnu'],(4,16,0,32,64,96)),
-           (['-target','nvptx64-unknown-unknown'],(8,24,0,32,64,96)),
-           (['-target','i386-pc-win32'],(8,16,0,32,64,96)),
-           (['-target','msp430-none-none'],(2,14,0,32,64,96))]
-    for flags, values in tries:
-        align,total,f1,bariton,foo,bar = values
+        self.assertEqual(fields[1].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[1].translation_unit)
+        self.assertEqual(fields[1].spelling, 'b')
+        self.assertFalse(fields[1].type.is_const_qualified())
+        self.assertEqual(fields[1].type.kind, TypeKind.TYPEDEF)
+        self.assertEqual(fields[1].type.get_canonical().kind, TypeKind.INT)
+        self.assertEqual(fields[1].type.get_declaration().spelling, 'I')
+        self.assertEqual(fields[1].type.get_typedef_name(), 'I')
+
+        self.assertEqual(fields[2].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[2].translation_unit)
+        self.assertEqual(fields[2].spelling, 'c')
+        self.assertFalse(fields[2].type.is_const_qualified())
+        self.assertEqual(fields[2].type.kind, TypeKind.LONG)
+        self.assertEqual(fields[2].type.get_canonical().kind, TypeKind.LONG)
+        self.assertEqual(fields[2].type.get_typedef_name(), '')
+
+        self.assertEqual(fields[3].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[3].translation_unit)
+        self.assertEqual(fields[3].spelling, 'd')
+        self.assertFalse(fields[3].type.is_const_qualified())
+        self.assertEqual(fields[3].type.kind, TypeKind.ULONG)
+        self.assertEqual(fields[3].type.get_canonical().kind, TypeKind.ULONG)
+        self.assertEqual(fields[3].type.get_typedef_name(), '')
+
+        self.assertEqual(fields[4].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[4].translation_unit)
+        self.assertEqual(fields[4].spelling, 'e')
+        self.assertFalse(fields[4].type.is_const_qualified())
+        self.assertEqual(fields[4].type.kind, TypeKind.LONG)
+        self.assertEqual(fields[4].type.get_canonical().kind, TypeKind.LONG)
+        self.assertEqual(fields[4].type.get_typedef_name(), '')
+
+        self.assertEqual(fields[5].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[5].translation_unit)
+        self.assertEqual(fields[5].spelling, 'f')
+        self.assertTrue(fields[5].type.is_const_qualified())
+        self.assertEqual(fields[5].type.kind, TypeKind.INT)
+        self.assertEqual(fields[5].type.get_canonical().kind, TypeKind.INT)
+        self.assertEqual(fields[5].type.get_typedef_name(), '')
+
+        self.assertEqual(fields[6].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[6].translation_unit)
+        self.assertEqual(fields[6].spelling, 'g')
+        self.assertFalse(fields[6].type.is_const_qualified())
+        self.assertEqual(fields[6].type.kind, TypeKind.POINTER)
+        self.assertEqual(fields[6].type.get_pointee().kind, TypeKind.INT)
+        self.assertEqual(fields[6].type.get_typedef_name(), '')
+
+        self.assertEqual(fields[7].kind, CursorKind.FIELD_DECL)
+        self.assertIsNotNone(fields[7].translation_unit)
+        self.assertEqual(fields[7].spelling, 'h')
+        self.assertFalse(fields[7].type.is_const_qualified())
+        self.assertEqual(fields[7].type.kind, TypeKind.POINTER)
+        self.assertEqual(fields[7].type.get_pointee().kind, TypeKind.POINTER)
+        self.assertEqual(fields[7].type.get_pointee().get_pointee().kind, TypeKind.POINTER)
+        self.assertEqual(fields[7].type.get_pointee().get_pointee().get_pointee().kind, TypeKind.INT)
+        self.assertEqual(fields[7].type.get_typedef_name(), '')
+
+    def test_references(self):
+        """Ensure that a Type maintains a reference to a TranslationUnit."""
+
+        tu = get_tu('int x;')
+        children = list(tu.cursor.get_children())
+        self.assertGreater(len(children), 0)
+
+        cursor = children[0]
+        t = cursor.type
+
+        self.assertIsInstance(t.translation_unit, TranslationUnit)
+
+        # Delete main TranslationUnit reference and force a GC.
+        del tu
+        gc.collect()
+        self.assertIsInstance(t.translation_unit, TranslationUnit)
+
+        # If the TU was destroyed, this should cause a segfault.
+        decl = t.get_declaration()
+
+    def testConstantArray(self):
+        tu = get_tu(constarrayInput)
+
+        teststruct = get_cursor(tu, 'teststruct')
+        self.assertIsNotNone(teststruct, "Didn't find teststruct??")
+        fields = list(teststruct.get_children())
+        self.assertEqual(fields[0].spelling, 'A')
+        self.assertEqual(fields[0].type.kind, TypeKind.CONSTANTARRAY)
+        self.assertIsNotNone(fields[0].type.get_array_element_type())
+        self.assertEqual(fields[0].type.get_array_element_type().kind, TypeKind.POINTER)
+        self.assertEqual(fields[0].type.get_array_size(), 2)
+
+    def test_equal(self):
+        """Ensure equivalence operators work on Type."""
+        source = 'int a; int b; void *v;'
         tu = get_tu(source)
-        teststruct = get_cursor(tu, 'Test')
-        children = list(teststruct.get_children())
-        fields = list(teststruct.type.get_fields())
-        assert children[0].kind == CursorKind.STRUCT_DECL
-        assert children[0].spelling != "typeanon"
-        assert children[1].spelling == "typeanon"
-        assert fields[0].kind == CursorKind.FIELD_DECL
-        assert fields[1].kind == CursorKind.FIELD_DECL
-        assert fields[1].is_anonymous()
-        assert teststruct.type.get_offset("typeanon") == f1
-        assert teststruct.type.get_offset("bariton") == bariton
-        assert teststruct.type.get_offset("foo") == foo
-        assert teststruct.type.get_offset("bar") == bar
 
+        a = get_cursor(tu, 'a')
+        b = get_cursor(tu, 'b')
+        v = get_cursor(tu, 'v')
 
-def test_decay():
-    """Ensure decayed types are handled as the original type"""
+        self.assertIsNotNone(a)
+        self.assertIsNotNone(b)
+        self.assertIsNotNone(v)
 
-    tu = get_tu("void foo(int a[]);")
-    foo = get_cursor(tu, 'foo')
-    a = foo.type.argument_types()[0]
+        self.assertEqual(a.type, b.type)
+        self.assertNotEqual(a.type, v.type)
 
-    assert a.kind == TypeKind.INCOMPLETEARRAY
-    assert a.element_type.kind == TypeKind.INT
-    assert a.get_canonical().kind == TypeKind.INCOMPLETEARRAY
+        self.assertNotEqual(a.type, None)
+        self.assertNotEqual(a.type, 'foo')
 
-def test_addrspace():
-    """Ensure the address space can be queried"""
-    tu = get_tu('__attribute__((address_space(2))) int testInteger = 3;', 'c')
+    def test_type_spelling(self):
+        """Ensure Type.spelling works."""
+        tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
+        c = get_cursor(tu, 'c')
+        i = get_cursor(tu, 'i')
+        x = get_cursor(tu, 'x')
+        v = get_cursor(tu, 'v')
+        self.assertIsNotNone(c)
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(x)
+        self.assertIsNotNone(v)
+        self.assertEqual(c.type.spelling, "int [5]")
+        self.assertEqual(i.type.spelling, "int []")
+        self.assertEqual(x.type.spelling, "int")
+        self.assertEqual(v.type.spelling, "int [x]")
 
-    testInteger = get_cursor(tu, 'testInteger')
+    def test_typekind_spelling(self):
+        """Ensure TypeKind.spelling works."""
+        tu = get_tu('int a;')
+        a = get_cursor(tu, 'a')
 
-    assert testInteger is not None, "Could not find testInteger."
-    assert testInteger.type.get_address_space() == 2
+        self.assertIsNotNone(a)
+        self.assertEqual(a.type.kind.spelling, 'Int')
+
+    def test_function_argument_types(self):
+        """Ensure that Type.argument_types() works as expected."""
+        tu = get_tu('void f(int, int);')
+        f = get_cursor(tu, 'f')
+        self.assertIsNotNone(f)
+
+        args = f.type.argument_types()
+        self.assertIsNotNone(args)
+        self.assertEqual(len(args), 2)
+
+        t0 = args[0]
+        self.assertIsNotNone(t0)
+        self.assertEqual(t0.kind, TypeKind.INT)
+
+        t1 = args[1]
+        self.assertIsNotNone(t1)
+        self.assertEqual(t1.kind, TypeKind.INT)
+
+        args2 = list(args)
+        self.assertEqual(len(args2), 2)
+        self.assertEqual(t0, args2[0])
+        self.assertEqual(t1, args2[1])
+
+    def test_argument_types_string_key(self):
+        """Ensure that non-int keys raise a TypeError."""
+        tu = get_tu('void f(int, int);')
+        f = get_cursor(tu, 'f')
+        self.assertIsNotNone(f)
+
+        args = f.type.argument_types()
+        self.assertEqual(len(args), 2)
+
+        with self.assertRaises(TypeError):
+            args['foo']
+
+    def test_argument_types_negative_index(self):
+        """Ensure that negative indexes on argument_types Raises an IndexError."""
+        tu = get_tu('void f(int, int);')
+        f = get_cursor(tu, 'f')
+        args = f.type.argument_types()
+
+        with self.assertRaises(IndexError):
+            args[-1]
+
+    def test_argument_types_overflow_index(self):
+        """Ensure that indexes beyond the length of Type.argument_types() raise."""
+        tu = get_tu('void f(int, int);')
+        f = get_cursor(tu, 'f')
+        args = f.type.argument_types()
+
+        with self.assertRaises(IndexError):
+            args[2]
+
+    def test_argument_types_invalid_type(self):
+        """Ensure that obtaining argument_types on a Type without them raises."""
+        tu = get_tu('int i;')
+        i = get_cursor(tu, 'i')
+        self.assertIsNotNone(i)
+
+        with self.assertRaises(Exception):
+            i.type.argument_types()
+
+    def test_is_pod(self):
+        """Ensure Type.is_pod() works."""
+        tu = get_tu('int i; void f();')
+        i = get_cursor(tu, 'i')
+        f = get_cursor(tu, 'f')
+
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(f)
+
+        self.assertTrue(i.type.is_pod())
+        self.assertFalse(f.type.is_pod())
+
+    def test_function_variadic(self):
+        """Ensure Type.is_function_variadic works."""
+
+        source ="""
+#include <stdarg.h>
+
+    void foo(int a, ...);
+    void bar(int a, int b);
+    """
+
+        tu = get_tu(source)
+        foo = get_cursor(tu, 'foo')
+        bar = get_cursor(tu, 'bar')
+
+        self.assertIsNotNone(foo)
+        self.assertIsNotNone(bar)
+
+        self.assertIsInstance(foo.type.is_function_variadic(), bool)
+        self.assertTrue(foo.type.is_function_variadic())
+        self.assertFalse(bar.type.is_function_variadic())
+
+    def test_element_type(self):
+        """Ensure Type.element_type works."""
+        tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
+        c = get_cursor(tu, 'c')
+        i = get_cursor(tu, 'i')
+        v = get_cursor(tu, 'v')
+        self.assertIsNotNone(c)
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(v)
+
+        self.assertEqual(c.type.kind, TypeKind.CONSTANTARRAY)
+        self.assertEqual(c.type.element_type.kind, TypeKind.INT)
+        self.assertEqual(i.type.kind, TypeKind.INCOMPLETEARRAY)
+        self.assertEqual(i.type.element_type.kind, TypeKind.INT)
+        self.assertEqual(v.type.kind, TypeKind.VARIABLEARRAY)
+        self.assertEqual(v.type.element_type.kind, TypeKind.INT)
+
+    def test_invalid_element_type(self):
+        """Ensure Type.element_type raises if type doesn't have elements."""
+        tu = get_tu('int i;')
+        i = get_cursor(tu, 'i')
+        self.assertIsNotNone(i)
+        with self.assertRaises(Exception):
+            i.element_type
+
+    def test_element_count(self):
+        """Ensure Type.element_count works."""
+        tu = get_tu('int i[5]; int j;')
+        i = get_cursor(tu, 'i')
+        j = get_cursor(tu, 'j')
+
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(j)
+
+        self.assertEqual(i.type.element_count, 5)
+
+        with self.assertRaises(Exception):
+            j.type.element_count
+
+    def test_is_volatile_qualified(self):
+        """Ensure Type.is_volatile_qualified works."""
+
+        tu = get_tu('volatile int i = 4; int j = 2;')
+
+        i = get_cursor(tu, 'i')
+        j = get_cursor(tu, 'j')
+
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(j)
+
+        self.assertIsInstance(i.type.is_volatile_qualified(), bool)
+        self.assertTrue(i.type.is_volatile_qualified())
+        self.assertFalse(j.type.is_volatile_qualified())
+
+    def test_is_restrict_qualified(self):
+        """Ensure Type.is_restrict_qualified works."""
+
+        tu = get_tu('struct s { void * restrict i; void * j; };')
+
+        i = get_cursor(tu, 'i')
+        j = get_cursor(tu, 'j')
+
+        self.assertIsNotNone(i)
+        self.assertIsNotNone(j)
+
+        self.assertIsInstance(i.type.is_restrict_qualified(), bool)
+        self.assertTrue(i.type.is_restrict_qualified())
+        self.assertFalse(j.type.is_restrict_qualified())
+
+    def test_record_layout(self):
+        """Ensure Cursor.type.get_size, Cursor.type.get_align and
+        Cursor.type.get_offset works."""
+
+        source ="""
+    struct a {
+        long a1;
+        long a2:3;
+        long a3:4;
+        long long a4;
+    };
+    """
+        tries=[(['-target','i386-linux-gnu'],(4,16,0,32,35,64)),
+               (['-target','nvptx64-unknown-unknown'],(8,24,0,64,67,128)),
+               (['-target','i386-pc-win32'],(8,16,0,32,35,64)),
+               (['-target','msp430-none-none'],(2,14,0,32,35,48))]
+        for flags, values in tries:
+            align,total,a1,a2,a3,a4 = values
+
+            tu = get_tu(source, flags=flags)
+            teststruct = get_cursor(tu, 'a')
+            fields = list(teststruct.get_children())
+
+            self.assertEqual(teststruct.type.get_align(), align)
+            self.assertEqual(teststruct.type.get_size(), total)
+            self.assertEqual(teststruct.type.get_offset(fields[0].spelling), a1)
+            self.assertEqual(teststruct.type.get_offset(fields[1].spelling), a2)
+            self.assertEqual(teststruct.type.get_offset(fields[2].spelling), a3)
+            self.assertEqual(teststruct.type.get_offset(fields[3].spelling), a4)
+            self.assertEqual(fields[0].is_bitfield(), False)
+            self.assertEqual(fields[1].is_bitfield(), True)
+            self.assertEqual(fields[1].get_bitfield_width(), 3)
+            self.assertEqual(fields[2].is_bitfield(), True)
+            self.assertEqual(fields[2].get_bitfield_width(), 4)
+            self.assertEqual(fields[3].is_bitfield(), False)
+
+    def test_offset(self):
+        """Ensure Cursor.get_record_field_offset works in anonymous records"""
+        source="""
+    struct Test {
+      struct {int a;} typeanon;
+      struct {
+        int bariton;
+        union {
+          int foo;
+        };
+      };
+      int bar;
+    };"""
+        tries=[(['-target','i386-linux-gnu'],(4,16,0,32,64,96)),
+               (['-target','nvptx64-unknown-unknown'],(8,24,0,32,64,96)),
+               (['-target','i386-pc-win32'],(8,16,0,32,64,96)),
+               (['-target','msp430-none-none'],(2,14,0,32,64,96))]
+        for flags, values in tries:
+            align,total,f1,bariton,foo,bar = values
+            tu = get_tu(source)
+            teststruct = get_cursor(tu, 'Test')
+            children = list(teststruct.get_children())
+            fields = list(teststruct.type.get_fields())
+            self.assertEqual(children[0].kind, CursorKind.STRUCT_DECL)
+            self.assertNotEqual(children[0].spelling, "typeanon")
+            self.assertEqual(children[1].spelling, "typeanon")
+            self.assertEqual(fields[0].kind, CursorKind.FIELD_DECL)
+            self.assertEqual(fields[1].kind, CursorKind.FIELD_DECL)
+            self.assertTrue(fields[1].is_anonymous())
+            self.assertEqual(teststruct.type.get_offset("typeanon"), f1)
+            self.assertEqual(teststruct.type.get_offset("bariton"), bariton)
+            self.assertEqual(teststruct.type.get_offset("foo"), foo)
+            self.assertEqual(teststruct.type.get_offset("bar"), bar)
+
+    def test_decay(self):
+        """Ensure decayed types are handled as the original type"""
+
+        tu = get_tu("void foo(int a[]);")
+        foo = get_cursor(tu, 'foo')
+        a = foo.type.argument_types()[0]
+
+        self.assertEqual(a.kind, TypeKind.INCOMPLETEARRAY)
+        self.assertEqual(a.element_type.kind, TypeKind.INT)
+        self.assertEqual(a.get_canonical().kind, TypeKind.INCOMPLETEARRAY)
+
+    def test_addrspace(self):
+        """Ensure the address space can be queried"""
+        tu = get_tu('__attribute__((address_space(2))) int testInteger = 3;', 'c')
+
+        testInteger = get_cursor(tu, 'testInteger')
+
+        self.assertIsNotNone(testInteger, "Could not find testInteger.")
+        self.assertEqual(testInteger.type.get_address_space(), 2)
diff --git a/cmake/caches/Apple-stage2.cmake b/cmake/caches/Apple-stage2.cmake
index 8826fc1..d58e4b6 100644
--- a/cmake/caches/Apple-stage2.cmake
+++ b/cmake/caches/Apple-stage2.cmake
@@ -61,7 +61,7 @@
   LTO
   clang-format
   clang-headers
-  libcxx-headers
+  cxx-headers
   ${LLVM_TOOLCHAIN_TOOLS}
   CACHE STRING "")
 
diff --git a/cmake/caches/Fuchsia-stage2.cmake b/cmake/caches/Fuchsia-stage2.cmake
index 7b2f221..2d36f09 100644
--- a/cmake/caches/Fuchsia-stage2.cmake
+++ b/cmake/caches/Fuchsia-stage2.cmake
@@ -7,8 +7,9 @@
 
 set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
 set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
-set(LLVM_ENABLE_ZLIB ON CACHE BOOL "")
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
+set(LLVM_ENABLE_TERMINFO OFF CACHE BOOL "")
+set(LLVM_ENABLE_ZLIB ON CACHE BOOL "")
 set(LLVM_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "")
 set(CLANG_PLUGIN_SUPPORT OFF CACHE BOOL "")
 
@@ -16,33 +17,20 @@
 if(NOT APPLE)
   set(LLVM_ENABLE_LLD ON CACHE BOOL "")
   set(CLANG_DEFAULT_LINKER lld CACHE STRING "")
+  set(CLANG_DEFAULT_OBJCOPY llvm-objcopy CACHE STRING "")
 endif()
 
-# This is a "Does your linker support it?" option that only applies
-# to x86-64 ELF targets.  All Fuchsia target linkers do support it.
-# For x86-64 Linux, it's supported by LLD and by GNU linkers since
-# binutils 2.27, so one can hope that all Linux hosts in use handle it.
-# Ideally this would be settable as a per-target option.
-set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "")
-
-if(APPLE)
-  set(LLDB_CODESIGN_IDENTITY "" CACHE STRING "")
-endif()
-
+set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "")
 set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
 set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -gline-tables-only -DNDEBUG" CACHE STRING "")
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -gline-tables-only -DNDEBUG" CACHE STRING "")
 
-set(LLVM_BUILTIN_TARGETS "x86_64-fuchsia;aarch64-fuchsia" CACHE STRING "")
+set(LLVM_BUILTIN_TARGETS "default;x86_64-fuchsia;aarch64-fuchsia" CACHE STRING "")
 foreach(target x86_64;aarch64)
   set(BUILTINS_${target}-fuchsia_CMAKE_SYSROOT ${FUCHSIA_${target}_SYSROOT} CACHE PATH "")
   set(BUILTINS_${target}-fuchsia_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "")
 endforeach()
 
-if(NOT APPLE)
-  set(LLVM_BUILTIN_TARGETS "default;${LLVM_BUILTIN_TARGETS}" CACHE STRING "" FORCE)
-endif()
-
 set(LLVM_RUNTIME_TARGETS "default;x86_64-fuchsia;aarch64-fuchsia;x86_64-fuchsia-asan:x86_64-fuchsia;aarch64-fuchsia-asan:aarch64-fuchsia" CACHE STRING "")
 foreach(target x86_64;aarch64)
   set(RUNTIMES_${target}-fuchsia_CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE BOOL "")
@@ -91,12 +79,13 @@
 
 set(LLVM_DISTRIBUTION_COMPONENTS
   clang
+  libclang
   lld
-  lldb
-  liblldb
   LTO
   clang-format
   clang-headers
+  clang-include-fixer
+  clang-refactor
   clang-tidy
   clangd
   builtins
diff --git a/cmake/caches/Fuchsia.cmake b/cmake/caches/Fuchsia.cmake
index 0932c04..0d81592 100644
--- a/cmake/caches/Fuchsia.cmake
+++ b/cmake/caches/Fuchsia.cmake
@@ -8,10 +8,12 @@
 set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
 set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
 set(CLANG_INCLUDE_TESTS OFF CACHE BOOL "")
-set(LLVM_ENABLE_ZLIB OFF CACHE BOOL "")
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
+set(LLVM_ENABLE_TERMINFO OFF CACHE BOOL "")
+set(LLVM_ENABLE_ZLIB OFF CACHE BOOL "")
 set(CLANG_PLUGIN_SUPPORT OFF CACHE BOOL "")
 
+set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "")
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 
 set(BOOTSTRAP_LLVM_ENABLE_LTO ON CACHE BOOL "")
diff --git a/cmake/modules/AddClang.cmake b/cmake/modules/AddClang.cmake
index e657059..c09a842 100644
--- a/cmake/modules/AddClang.cmake
+++ b/cmake/modules/AddClang.cmake
@@ -104,11 +104,9 @@
         RUNTIME DESTINATION bin)
 
       if (${ARG_SHARED} AND NOT CMAKE_CONFIGURATION_TYPES)
-        add_custom_target(install-${name}
-                          DEPENDS ${name}
-                          COMMAND "${CMAKE_COMMAND}"
-                                  -DCMAKE_INSTALL_COMPONENT=${name}
-                                  -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+        add_llvm_install_targets(install-${name}
+                                 DEPENDS ${name}
+                                 COMPONENT ${name})
       endif()
     endif()
     set_property(GLOBAL APPEND PROPERTY CLANG_EXPORTS ${name})
@@ -147,11 +145,9 @@
       COMPONENT ${name})
 
     if(NOT CMAKE_CONFIGURATION_TYPES)
-      add_custom_target(install-${name}
-        DEPENDS ${name}
-        COMMAND "${CMAKE_COMMAND}"
-        -DCMAKE_INSTALL_COMPONENT=${name}
-        -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+      add_llvm_install_targets(install-${name}
+                               DEPENDS ${name}
+                               COMPONENT ${name})
     endif()
     set_property(GLOBAL APPEND PROPERTY CLANG_EXPORTS ${name})
   endif()
diff --git a/cmake/modules/ProtobufMutator.cmake b/cmake/modules/ProtobufMutator.cmake
index be457b5..5f23f33 100644
--- a/cmake/modules/ProtobufMutator.cmake
+++ b/cmake/modules/ProtobufMutator.cmake
@@ -1,23 +1,18 @@
 set(PBM_PREFIX protobuf_mutator)
 set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX})
-set(PBM_LIB_PATH ${PBM_PATH}/src/libprotobuf-mutator.a)
-set(PBM_FUZZ_LIB_PATH ${PBM_PATH}/src/libfuzzer/libprotobuf-mutator-libfuzzer.a)
+set(PBM_LIB_PATH ${PBM_PATH}-build/src/libprotobuf-mutator.a)
+set(PBM_FUZZ_LIB_PATH ${PBM_PATH}-build/src/libfuzzer/libprotobuf-mutator-libfuzzer.a)
 
 ExternalProject_Add(${PBM_PREFIX}
   PREFIX ${PBM_PREFIX}
   GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git
   GIT_TAG master
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR}
-    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-  BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
+  CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+  CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER}
+                   -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
   BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH}
-  BUILD_IN_SOURCE 1
+  UPDATE_COMMAND ""
   INSTALL_COMMAND ""
-  LOG_DOWNLOAD 1
-  LOG_CONFIGURE 1
-  LOG_BUILD 1
   )
 
 set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH})
diff --git a/docs/ClangCommandLineReference.rst b/docs/ClangCommandLineReference.rst
index 0cc9c71..0026f1f 100644
--- a/docs/ClangCommandLineReference.rst
+++ b/docs/ClangCommandLineReference.rst
@@ -61,10 +61,10 @@
 Pass <arg> to the target offloading toolchain.
 
 .. program:: clang1
-.. option:: -Xopenmp-target=<arg> <arg2>
+.. option:: -Xopenmp-target=<triple> <arg>
 .. program:: clang
 
-Pass <arg> to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.
+Pass <arg> to the target offloading toolchain identified by <triple>.
 
 .. option:: -Z<arg>
 
@@ -116,10 +116,18 @@
 .. option:: -bundle\_loader <arg>
 .. program:: clang
 
+.. option:: -cfguard
+
+Emit tables required for Windows Control Flow Guard.
+
 .. option:: -client\_name<arg>
 
 .. option:: -compatibility\_version<arg>
 
+.. option:: --config <arg>
+
+Specifies configuration file
+
 .. option:: --constant-cfstrings
 
 .. option:: -coverage, --coverage
@@ -524,10 +532,10 @@
 
 .. option:: -shared, --shared
 
-.. option:: -shared-libasan
-
 .. option:: -shared-libgcc
 
+.. option:: -shared-libsan, -shared-libasan
+
 .. option:: -single\_module
 
 .. option:: -specs=<arg>, --specs=<arg>
@@ -536,6 +544,8 @@
 
 .. option:: -static-libgcc
 
+.. option:: -static-libsan
+
 .. option:: -static-libstdc++
 
 .. option:: -std-default=<arg>
@@ -588,6 +598,8 @@
 
 .. option:: --version
 
+Print version information
+
 .. option:: -w, --no-warnings
 
 Suppress all warnings
@@ -698,10 +710,22 @@
 
 .. option:: -fdiagnostics-show-category=<arg>
 
+.. option:: -fdiscard-value-names, -fno-discard-value-names
+
+Discard value names in LLVM IR
+
+.. option:: -fexperimental-isel, -fno-experimental-isel
+
+Enables the experimental global instruction selector
+
 .. option:: -fexperimental-new-pass-manager, -fno-experimental-new-pass-manager
 
 Enables an experimental new pass manager in LLVM.
 
+.. option:: -ffine-grained-bitfield-accesses, -fno-fine-grained-bitfield-accesses
+
+Use separate accesses for bitfields with legal widths and alignments.
+
 .. option:: -finline-functions, -fno-inline-functions
 
 Inline suitable functions
@@ -728,6 +752,10 @@
 
 Enable linker dead stripping of globals in AddressSanitizer
 
+.. option:: -fsanitize-address-poison-class-member-array-new-cookie, -fno-sanitize-address-poison-class-member-array-new-cookie
+
+Enable poisoning array cookies when using class member operator new\[\] in AddressSanitizer
+
 .. option:: -fsanitize-address-use-after-scope, -fno-sanitize-address-use-after-scope
 
 Enable use-after-scope detection in AddressSanitizer
@@ -740,6 +768,10 @@
 
 Enable control flow integrity (CFI) checks for cross-DSO calls.
 
+.. option:: -fsanitize-cfi-icall-generalize-pointers
+
+Generalize pointers in CFI indirect call type signature checks
+
 .. option:: -fsanitize-coverage=<arg1>,<arg2>..., -fno-sanitize-coverage=<arg1>,<arg2>...
 
 Specify the type of coverage instrumentation for Sanitizers
@@ -756,7 +788,7 @@
 
 Enable origins tracking in MemorySanitizer
 
-.. option:: -fsanitize-memory-use-after-dtor
+.. option:: -fsanitize-memory-use-after-dtor, -fno-sanitize-memory-use-after-dtor
 
 Enable use-after-destroy detection in MemorySanitizer
 
@@ -856,6 +888,10 @@
 
 Restrict all prior -I flags to double-quoted inclusion and remove current directory from include path
 
+.. option:: --cuda-path-ignore-env
+
+Ignore environment variables to detect CUDA installation
+
 .. option:: --cuda-path=<arg>
 
 CUDA installation path
@@ -1163,6 +1199,10 @@
 
 .. option:: -fcaret-diagnostics, -fno-caret-diagnostics
 
+.. option:: -fcf-protection=<arg>, -fcf-protection (equivalent to -fcf-protection=full)
+
+Instrument control-flow architecture protection. Options: return, branch, full, none.
+
 .. option:: -fclasspath=<arg>, --CLASSPATH <arg>, --CLASSPATH=<arg>, --classpath <arg>, --classpath=<arg>
 
 .. option:: -fcolor-diagnostics, -fno-color-diagnostics
@@ -1263,8 +1303,16 @@
 
 Allow '$' in identifiers
 
+.. option:: -fdouble-square-bracket-attributes, -fno-double-square-bracket-attributes
+
+Enable '\[\[\]\]' attributes in all C and C++ language modes
+
 .. option:: -fdwarf-directory-asm, -fno-dwarf-directory-asm
 
+.. option:: -fdwarf-exceptions
+
+Use DWARF style exceptions
+
 .. option:: -felide-constructors, -fno-elide-constructors
 
 .. option:: -feliminate-unused-debug-symbols, -fno-eliminate-unused-debug-symbols
@@ -1341,10 +1389,18 @@
 
 .. option:: -finput-charset=<arg>
 
+.. option:: -finstrument-function-entry-bare
+
+Instrument function entry only, after inlining, without arguments to the instrumentation call
+
 .. option:: -finstrument-functions
 
 Generate calls to instrument function entry and exit
 
+.. option:: -finstrument-functions-after-inlining
+
+Like -finstrument-functions, but insert the calls after inlining
+
 .. option:: -fintegrated-as, -fno-integrated-as, -integrated-as
 
 Enable the integrated assembler
@@ -1467,12 +1523,6 @@
 
 .. option:: -fno-working-directory
 
-.. option:: -fnoopenmp-relocatable-target
-
-Do not compile OpenMP target code as relocatable.
-
-.. option:: -fnoopenmp-use-tls
-
 .. option:: -fobjc-abi-version=<arg>
 
 .. option:: -fobjc-arc, -fno-objc-arc
@@ -1511,13 +1561,11 @@
 
 .. option:: -fopenmp, -fno-openmp
 
-.. option:: -fopenmp-dump-offload-linker-script
+Parse OpenMP pragmas and generate parallel code.
 
-.. option:: -fopenmp-relocatable-target
+.. option:: -fopenmp-simd, -fno-openmp-simd
 
-OpenMP target code is compiled as relocatable using the -c flag. For OpenMP targets the code is relocatable by default.
-
-.. option:: -fopenmp-use-tls
+Emit OpenMP code only for SIMD-based constructs.
 
 .. option:: -fopenmp-version=<arg>
 
@@ -1557,6 +1605,10 @@
 
 .. option:: -fpie, -fno-pie
 
+.. option:: -fplt, -fno-plt
+
+Use the PLT to make function calls
+
 .. option:: -fplugin=<dsopath>
 
 Load the named plugin (dynamic shared object)
@@ -1654,6 +1706,10 @@
 
 Generate a YAML optimization record file
 
+.. option:: -fseh-exceptions
+
+Use SEH style exceptions
+
 .. option:: -fshort-enums, -fno-short-enums
 
 Allocate to an enum type only as many bytes as it needs for the declared range of possible values
@@ -1696,7 +1752,7 @@
 
 .. option:: -fsplit-dwarf-inlining, -fno-split-dwarf-inlining
 
-Place debug types in their own section (ELF Only)
+Provide minimal debug info in the object/executable to facilitate online symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF
 
 .. option:: -fsplit-stack
 
@@ -1712,6 +1768,10 @@
 
 Use a strong heuristic to apply stack protectors to functions
 
+.. option:: -fstack-size-section, -fno-stack-size-section
+
+Emit section containing metadata on function stack sizes
+
 .. option:: -fstandalone-debug, -fno-limit-debug-info, -fno-standalone-debug
 
 Emit full debug info for all types used by the program
@@ -1838,6 +1898,10 @@
 
 Store string literals as writable data
 
+.. option:: -fxray-always-emit-customevents, -fno-xray-always-emit-customevents
+
+Determine whether to always emit \_\_xray\_customevent(...) calls even if the function it appears in is not always instrumented.
+
 .. option:: -fxray-always-instrument=<arg>
 
 Filename defining the whitelist for imbuing the 'always instrument' XRay attribute.
@@ -1914,6 +1978,10 @@
 
 OpenCL only. This option is added for compatibility with OpenCL 1.0.
 
+.. option:: -cl-uniform-work-group-size
+
+OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel
+
 .. option:: -cl-unsafe-math-optimizations
 
 OpenCL only. Allow unsafe floating-point optimizations.  Also implies -cl-no-signed-zeros and -cl-mad-enable.
@@ -1958,7 +2026,7 @@
 
 .. option:: -mconsole<arg>
 
-.. option:: -mcpu=<arg>, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62)
+.. option:: -mcpu=<arg>, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65)
 
 .. option:: -mdefault-build-attributes<arg>, -mno-default-build-attributes<arg>
 
@@ -2026,6 +2094,10 @@
 
 (integrated-as) Emit an object file which can be used with an incremental linker
 
+.. option:: -mindirect-jump=<arg>
+
+Change indirect jump instructions to inhibit speculation
+
 .. option:: -miphoneos-version-min=<arg>, -mios-version-min=<arg>
 
 .. option:: -mips16
@@ -2080,6 +2152,10 @@
 
 Use copy relocations support for PIE builds
 
+.. option:: -mprefer-vector-width=<arg>
+
+Specifies preferred vector width for auto-vectorization. Defaults to 'none' which allows target specific decisions.
+
 .. option:: -mqdsp6-compat
 
 Enable hexagon-qdsp6 backward compatibility
@@ -2116,6 +2192,10 @@
 
 Set the stack probe size
 
+.. option:: -mstack-arg-probe, -mno-stack-arg-probe
+
+Disable stack probes
+
 .. option:: -mstackrealign, -mno-stackrealign
 
 Force realign the stack at entry to every function
@@ -2162,6 +2242,10 @@
 
 AMDGPU
 ------
+.. option:: -mxnack, -mno-xnack
+
+Enable XNACK (AMDGPU only)
+
 ARM
 ---
 .. option:: -ffixed-r9
@@ -2192,12 +2276,20 @@
 
 Disallow generation of deprecated IT blocks for ARMv8. It is on by default for ARMv8 Thumb mode.
 
+.. option:: -mtp=<arg>
+
+Read thread pointer from coprocessor register (ARM only)
+
 .. option:: -munaligned-access, -mno-unaligned-access
 
 Allow memory accesses to be unaligned (AArch32/AArch64 only)
 
 Hexagon
 -------
+.. option:: -mieee-rnd-near
+
+Hexagon
+-------
 .. option:: -mhvx, -mno-hvx
 
 Enable Hexagon Vector eXtensions
@@ -2206,7 +2298,15 @@
 
 Enable Hexagon Double Vector eXtensions
 
-.. option:: -mieee-rnd-near
+.. option:: -mhvx-length=<arg>
+
+Set Hexagon Vector Length
+
+.. program:: clang1
+.. option:: -mhvx=<arg>
+.. program:: clang
+
+Enable Hexagon Vector eXtensions
 
 PowerPC
 -------
@@ -2246,8 +2346,14 @@
 
 WebAssembly
 -----------
+.. option:: -mnontrapping-fptoint, -mno-nontrapping-fptoint
+
+.. option:: -msign-ext, -mno-sign-ext
+
 .. option:: -msimd128, -mno-simd128
 
+.. option:: -mexception-handling, -mno-exception-handling
+
 X86
 ---
 .. option:: -m3dnow, -mno-3dnow
@@ -2262,6 +2368,8 @@
 
 .. option:: -mavx2, -mno-avx2
 
+.. option:: -mavx512bitalg, -mno-avx512bitalg
+
 .. option:: -mavx512bw, -mno-avx512bw
 
 .. option:: -mavx512cd, -mno-avx512cd
@@ -2278,8 +2386,12 @@
 
 .. option:: -mavx512vbmi, -mno-avx512vbmi
 
+.. option:: -mavx512vbmi2, -mno-avx512vbmi2
+
 .. option:: -mavx512vl, -mno-avx512vl
 
+.. option:: -mavx512vnni, -mno-avx512vnni
+
 .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq
 
 .. option:: -mbmi, -mno-bmi
@@ -2304,6 +2416,10 @@
 
 .. option:: -mfxsr, -mno-fxsr
 
+.. option:: -mgfni, -mno-gfni
+
+.. option:: -mibt, -mno-ibt
+
 .. option:: -mlwp, -mno-lwp
 
 .. option:: -mlzcnt, -mno-lzcnt
@@ -2326,16 +2442,26 @@
 
 .. option:: -mprfchw, -mno-prfchw
 
+.. option:: -mrdpid, -mno-rdpid
+
 .. option:: -mrdrnd, -mno-rdrnd
 
 .. option:: -mrdseed, -mno-rdseed
 
+.. option:: -mretpoline, -mno-retpoline
+
+.. option:: -mretpoline-external-thunk, -mno-retpoline-external-thunk
+
 .. option:: -mrtm, -mno-rtm
 
+.. option:: -msahf, -mno-sahf
+
 .. option:: -msgx, -mno-sgx
 
 .. option:: -msha, -mno-sha
 
+.. option:: -mshstk, -mno-shstk
+
 .. option:: -msse, -mno-sse
 
 .. option:: -msse2, -mno-sse2
@@ -2354,6 +2480,10 @@
 
 .. option:: -mtbm, -mno-tbm
 
+.. option:: -mvaes, -mno-vaes
+
+.. option:: -mvpclmulqdq, -mno-vpclmulqdq
+
 .. option:: -mx87, -m80387, -mno-x87
 
 .. option:: -mxop, -mno-xop
@@ -2445,6 +2575,10 @@
 
 .. option:: -gdwarf-aranges
 
+.. option:: -gembed-source
+
+.. option:: -gno-embed-source
+
 .. option:: -ggnu-pubnames
 
 .. option:: -grecord-gcc-switches, -gno-record-gcc-switches
diff --git a/docs/ClangFormat.rst b/docs/ClangFormat.rst
index b4030ea..f53c02a 100644
--- a/docs/ClangFormat.rst
+++ b/docs/ClangFormat.rst
@@ -11,7 +11,7 @@
 ===============
 
 :program:`clang-format` is located in `clang/tools/clang-format` and can be used
-to format C/C++/Obj-C code.
+to format C/C++/Java/JavaScript/Objective-C/Protobuf code.
 
 .. code-block:: console
 
diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst
index 0c03c77..816b39f 100644
--- a/docs/ClangFormatStyleOptions.rst
+++ b/docs/ClangFormatStyleOptions.rst
@@ -266,13 +266,9 @@
 
   .. code-block:: c++
 
-    true:
-    int a;     // My comment a
-    int b = 2; // comment  b
-
-    false:
-    int a; // My comment a
-    int b = 2; // comment about b
+    true:                                   false:
+    int a;     // My comment a      vs.     int a; // My comment a
+    int b = 2; // comment  b                int b = 2; // comment about b
 
 **AllowAllParametersOfDeclarationOnNextLine** (``bool``)
   If the function declaration doesn't fit on a line,
@@ -633,7 +629,9 @@
       int bar();
       }
 
-  * ``bool AfterObjCDeclaration`` Wrap ObjC definitions (``@autoreleasepool``, interfaces, ..).
+  * ``bool AfterObjCDeclaration`` Wrap ObjC definitions (interfaces, implementations...).
+    @autoreleasepool and @synchronized blocks are wrapped
+    according to `AfterControlStatement` flag.
 
   * ``bool AfterStruct`` Wrap struct definitions.
 
@@ -1177,6 +1175,46 @@
 
   For example: BOOST_FOREACH.
 
+**IncludeBlocks** (``IncludeBlocksStyle``)
+  Dependent on the value, multiple ``#include`` blocks can be sorted
+  as one and divided based on category.
+
+  Possible values:
+
+  * ``IBS_Preserve`` (in configuration: ``Preserve``)
+    Sort each ``#include`` block separately.
+
+    .. code-block:: c++
+
+       #include "b.h"               into      #include "b.h"
+
+       #include <lib/main.h>                  #include "a.h"
+       #include "a.h"                         #include <lib/main.h>
+
+  * ``IBS_Merge`` (in configuration: ``Merge``)
+    Merge multiple ``#include`` blocks together and sort as one.
+
+    .. code-block:: c++
+
+       #include "b.h"               into      #include "a.h"
+                                              #include "b.h"
+       #include <lib/main.h>                  #include <lib/main.h>
+       #include "a.h"
+
+  * ``IBS_Regroup`` (in configuration: ``Regroup``)
+    Merge multiple ``#include`` blocks together and sort as one.
+    Then split into groups based on category priority. See
+    ``IncludeCategories``.
+
+    .. code-block:: c++
+
+       #include "b.h"               into      #include "a.h"
+                                              #include "b.h"
+       #include <lib/main.h>
+       #include "a.h"                         #include <lib/main.h>
+
+
+
 **IncludeCategories** (``std::vector<IncludeCategory>``)
   Regular expressions denoting the different ``#include`` categories
   used for ordering ``#includes``.
@@ -1472,6 +1510,52 @@
 
 
 
+**ObjCBinPackProtocolList** (``BinPackStyle``)
+  Controls bin-packing Objective-C protocol conformance list
+  items into as few lines as possible when they go over ``ColumnLimit``.
+
+  If ``Auto`` (the default), delegates to the value in
+  ``BinPackParameters``. If that is ``true``, bin-packs Objective-C
+  protocol conformance list items into as few lines as possible
+  whenever they go over ``ColumnLimit``.
+
+  If ``Always``, always bin-packs Objective-C protocol conformance
+  list items into as few lines as possible whenever they go over
+  ``ColumnLimit``.
+
+  If ``Never``, lays out Objective-C protocol conformance list items
+  onto individual lines whenever they go over ``ColumnLimit``.
+
+
+  .. code-block:: c++
+
+     Always (or Auto, if BinPackParameters=true):
+     @interface ccccccccccccc () <
+         ccccccccccccc, ccccccccccccc,
+         ccccccccccccc, ccccccccccccc> {
+     }
+
+     Never (or Auto, if BinPackParameters=false):
+     @interface ddddddddddddd () <
+         ddddddddddddd,
+         ddddddddddddd,
+         ddddddddddddd,
+         ddddddddddddd> {
+     }
+
+  Possible values:
+
+  * ``BPS_Auto`` (in configuration: ``Auto``)
+    Automatically determine parameter bin-packing behavior.
+
+  * ``BPS_Always`` (in configuration: ``Always``)
+    Always bin-pack parameters.
+
+  * ``BPS_Never`` (in configuration: ``Never``)
+    Never bin-pack parameters.
+
+
+
 **ObjCBlockIndentWidth** (``unsigned``)
   The number of characters to use for indentation of ObjC blocks.
 
@@ -1541,6 +1625,44 @@
 
 
 
+**RawStringFormats** (``std::vector<RawStringFormat>``)
+  Defines hints for detecting supported languages code blocks in raw
+  strings.
+
+  A raw string with a matching delimiter or a matching enclosing function
+  name will be reformatted assuming the specified language based on the
+  style for that language defined in the .clang-format file. If no style has
+  been defined in the .clang-format file for the specific language, a
+  predefined style given by 'BasedOnStyle' is used. If 'BasedOnStyle' is not
+  found, the formatting is based on llvm style. A matching delimiter takes
+  precedence over a matching enclosing function name for determining the
+  language of the raw string contents.
+
+  If a canonical delimiter is specified, occurrences of other delimiters for
+  the same language will be updated to the canonical if possible.
+
+  There should be at most one specification per language and each delimiter
+  and enclosing function should not occur in multiple specifications.
+
+  To configure this in the .clang-format file, use:
+
+  .. code-block:: yaml
+
+    RawStringFormats:
+      - Language: TextProto
+          Delimiters:
+            - 'pb'
+            - 'proto'
+          EnclosingFunctions:
+            - 'PARSE_TEXT_PROTO'
+          BasedOnStyle: google
+      - Language: Cpp
+          Delimiters:
+            - 'cc'
+            - 'cpp'
+          BasedOnStyle: llvm
+          CanonicalDelimiter: 'cc'
+
 **ReflowComments** (``bool``)
   If ``true``, clang-format will attempt to re-flow comments.
 
@@ -1568,6 +1690,14 @@
 **SortUsingDeclarations** (``bool``)
   If ``true``, clang-format will sort using declarations.
 
+  The order of using declarations is defined as follows:
+  Split the strings by "::" and discard any initial empty strings. The last
+  element of each list is a non-namespace name; all others are namespace
+  names. Sort the lists of names lexicographically, where the sort order of
+  individual names is that all non-namespace names come before all namespace
+  names, and within those groups, names are in case-insensitive
+  lexicographic order.
+
   .. code-block:: c++
 
      false:                                 true:
@@ -1580,7 +1710,7 @@
   .. code-block:: c++
 
      true:                                  false:
-     (int)i;                        vs.     (int) i;
+     (int) i;                       vs.     (int)i;
 
 **SpaceAfterTemplateKeyword** (``bool``)
   If ``true``, a space will be inserted after the 'template' keyword.
@@ -1599,6 +1729,23 @@
      int a = 5;                     vs.     int a=5;
      a += 42                                a+=42;
 
+**SpaceBeforeCtorInitializerColon** (``bool``)
+  If ``false``, spaces will be removed before constructor initializer
+  colon.
+
+  .. code-block:: c++
+
+     true:                                  false:
+     Foo::Foo() : a(a) {}                   Foo::Foo(): a(a) {}
+
+**SpaceBeforeInheritanceColon** (``bool``)
+  If ``false``, spaces will be removed before inheritance colon.
+
+  .. code-block:: c++
+
+     true:                                  false:
+     class Foo : Bar {}             vs.     class Foo: Bar {}
+
 **SpaceBeforeParens** (``SpaceBeforeParensOptions``)
   Defines in which cases to put a space before opening parentheses.
 
@@ -1643,6 +1790,15 @@
 
 
 
+**SpaceBeforeRangeBasedForLoopColon** (``bool``)
+  If ``false``, spaces will be removed before range-based for loop
+  colon.
+
+  .. code-block:: c++
+
+     true:                                  false:
+     for (auto v : values) {}       vs.     for(auto v: values) {}
+
 **SpaceInEmptyParentheses** (``bool``)
   If ``true``, spaces may be inserted into ``()``.
 
diff --git a/docs/ControlFlowIntegrity.rst b/docs/ControlFlowIntegrity.rst
index 04fb43a..12b4610 100644
--- a/docs/ControlFlowIntegrity.rst
+++ b/docs/ControlFlowIntegrity.rst
@@ -215,6 +215,23 @@
 
 This scheme is currently only supported on the x86 and x86_64 architectures.
 
+``-fsanitize-cfi-icall-generalize-pointers``
+--------------------------------------------
+
+Mismatched pointer types are a common cause of cfi-icall check failures.
+Translation units compiled with the ``-fsanitize-cfi-icall-generalize-pointers``
+flag relax pointer type checking for call sites in that translation unit,
+applied across all functions compiled with ``-fsanitize=cfi-icall``.
+
+Specifically, pointers in return and argument types are treated as equivalent as
+long as the qualifiers for the type they point to match. For example, ``char*``
+``char**`, and ``int*`` are considered equivalent types. However, ``char*`` and
+``const char*`` are considered separate types.
+
+``-fsanitize-cfi-icall-generalize-pointers`` is not compatible with
+``-fsanitize-cfi-cross-dso``.
+
+
 ``-fsanitize=cfi-icall`` and ``-fsanitize=function``
 ----------------------------------------------------
 
diff --git a/docs/DiagnosticsReference.rst b/docs/DiagnosticsReference.rst
index d8c486f..e2b0bd7 100644
--- a/docs/DiagnosticsReference.rst
+++ b/docs/DiagnosticsReference.rst
@@ -244,6 +244,21 @@
 ------------------
 This diagnostic flag exists for GCC compatibility, and has no effect in Clang.
 
+-Waligned-allocation-unavailable
+--------------------------------
+This diagnostic is an error by default, but the flag ``-Wno-aligned-allocation-unavailable`` can be used to disable the error.
+
+**Diagnostic text:**
+
++--------------------------------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:error:`error:` |nbsp| :diagtext:`aligned` |nbsp| |+------------------------+| |nbsp| :diagtext:`function of type '`:placeholder:`B`:diagtext:`' is only available on` |nbsp| :placeholder:`C` |nbsp| :placeholder:`D` |nbsp| :diagtext:`or newer`|
+|                                                  ||:diagtext:`allocation`  ||                                                                                                                                                                    |
+|                                                  |+------------------------+|                                                                                                                                                                    |
+|                                                  ||:diagtext:`deallocation`||                                                                                                                                                                    |
+|                                                  |+------------------------+|                                                                                                                                                                    |
++--------------------------------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wall
 -----
 Some of the diagnostics controlled by this flag are enabled by default.
@@ -745,6 +760,11 @@
 +--------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
+-Wbinary-literal
+----------------
+Controls `-Wc++14-binary-literal`_, `-Wc++98-c++11-compat-binary-literal`_, `-Wgnu-binary-literal`_.
+
+
 -Wbind-to-temporary-copy
 ------------------------
 Also controls `-Wc++98-compat-bind-to-temporary-copy`_.
@@ -972,7 +992,7 @@
 --------------
 Some of the diagnostics controlled by this flag are enabled by default.
 
-Also controls `-Wc++11-compat-deprecated-writable-strings`_, `-Wc++11-compat-reserved-user-defined-literal`_, `-Wc++11-narrowing`_, `-Wc++98-c++11-c++14-compat`_, `-Wc++98-c++11-compat`_.
+Also controls `-Wc++11-compat-deprecated-writable-strings`_, `-Wc++11-compat-reserved-user-defined-literal`_, `-Wc++11-narrowing`_, `-Wc++98-c++11-c++14-c++17-compat`_, `-Wc++98-c++11-c++14-compat`_, `-Wc++98-c++11-compat`_.
 
 **Diagnostic text:**
 
@@ -1038,7 +1058,9 @@
 
 -Wc++11-compat-pedantic
 -----------------------
-Controls `-Wc++98-c++11-c++14-compat-pedantic`_, `-Wc++98-c++11-compat-pedantic`_.
+Some of the diagnostics controlled by this flag are enabled by default.
+
+Controls `-Wc++11-compat`_, `-Wc++98-c++11-c++14-c++17-compat-pedantic`_, `-Wc++98-c++11-c++14-compat-pedantic`_, `-Wc++98-c++11-compat-pedantic`_.
 
 
 -Wc++11-compat-reserved-user-defined-literal
@@ -1288,12 +1310,12 @@
 
 -Wc++14-compat
 --------------
-Synonym for `-Wc++98-c++11-c++14-compat`_.
+Controls `-Wc++98-c++11-c++14-c++17-compat`_, `-Wc++98-c++11-c++14-compat`_.
 
 
 -Wc++14-compat-pedantic
 -----------------------
-Synonym for `-Wc++98-c++11-c++14-compat-pedantic`_.
+Controls `-Wc++14-compat`_, `-Wc++98-c++11-c++14-c++17-compat-pedantic`_, `-Wc++98-c++11-c++14-compat-pedantic`_.
 
 
 -Wc++14-extensions
@@ -1349,16 +1371,16 @@
 +-------------------------------------------------------------------------------+
 
 
--Wc++1y-extensions
-------------------
-Synonym for `-Wc++14-extensions`_.
-
-
--Wc++1z-compat
+-Wc++17-compat
 --------------
-This diagnostic is enabled by default.
+Some of the diagnostics controlled by this flag are enabled by default.
 
-Also controls `-Wdeprecated-increment-bool`_, `-Wdeprecated-register`_.
+Controls `-Wc++17-compat-mangling`_, `-Wc++98-c++11-c++14-c++17-compat`_, `-Wdeprecated-increment-bool`_, `-Wdeprecated-register`_.
+
+
+-Wc++17-compat-mangling
+-----------------------
+This diagnostic is enabled by default.
 
 **Diagnostic text:**
 
@@ -1367,42 +1389,49 @@
 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
--Wc++1z-extensions
+-Wc++17-compat-pedantic
+-----------------------
+Some of the diagnostics controlled by this flag are enabled by default.
+
+Controls `-Wc++17-compat`_, `-Wc++98-c++11-c++14-c++17-compat-pedantic`_.
+
+
+-Wc++17-extensions
 ------------------
 Some of the diagnostics controlled by this flag are enabled by default.
 
 **Diagnostic text:**
 
 +------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`constexpr if is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`constexpr if is a C++17 extension`|
 +------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`'constexpr' on lambda expressions is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`'constexpr' on lambda expressions is a C++17 extension`|
 +---------------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`use of the` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`use of the` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute is a C++17 extension`|
 +---------------------------------------------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`decomposition declarations are a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`decomposition declarations are a C++17 extension`|
 +---------------------------------------------------------------------------------------+
 
 +--------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`pack fold expression is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`pack fold expression is a C++17 extension`|
 +--------------------------------------------------------------------------------+
 
 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`'begin' and 'end' returning different types (`:placeholder:`A` |nbsp| :diagtext:`and` |nbsp| :placeholder:`B`:diagtext:`) is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`'begin' and 'end' returning different types (`:placeholder:`A` |nbsp| :diagtext:`and` |nbsp| :placeholder:`B`:diagtext:`) is a C++17 extension`|
 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 +----------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are a C++1z feature`|
+|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are a C++17 feature`|
 +----------------------------------------------------------------------------------------+
 
 +----------------------------------------+--------------------+-------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`'`|+------------------+|:diagtext:`' initialization statements are a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`'`|+------------------+|:diagtext:`' initialization statements are a C++17 extension`|
 |                                        ||:diagtext:`if`    ||                                                             |
 |                                        |+------------------+|                                                             |
 |                                        ||:diagtext:`switch`||                                                             |
@@ -1410,68 +1439,145 @@
 +----------------------------------------+--------------------+-------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`inline variables are a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`inline variables are a C++17 extension`|
 +-----------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`use of multiple declarators in a single using declaration is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`use of multiple declarators in a single using declaration is a C++17 extension`|
 +---------------------------------------------------------------------------------------------------------------------+
 
 +-------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`nested namespace definition is a C++1z extension; define each namespace separately`|
+|:warning:`warning:` |nbsp| :diagtext:`nested namespace definition is a C++17 extension; define each namespace separately`|
 +-------------------------------------------------------------------------------------------------------------------------+
 
++------------------------------------------------------------+---------------------------+-----------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`attributes on` |nbsp| |+-------------------------+| |nbsp| :diagtext:`declaration are a C++17 extension`|
+|                                                            ||:diagtext:`a namespace`  ||                                                     |
+|                                                            |+-------------------------+|                                                     |
+|                                                            ||:diagtext:`an enumerator`||                                                     |
+|                                                            |+-------------------------+|                                                     |
++------------------------------------------------------------+---------------------------+-----------------------------------------------------+
+
 +---------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`capture of '\*this' by copy is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`capture of '\*this' by copy is a C++17 extension`|
 +---------------------------------------------------------------------------------------+
 
 +------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`static\_assert with no message is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`static\_assert with no message is a C++17 extension`|
 +------------------------------------------------------------------------------------------+
 
 +--------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`template template parameter using 'typename' is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`template template parameter using 'typename' is a C++17 extension`|
 +--------------------------------------------------------------------------------------------------------+
 
 +--------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`default scope specifier for attributes is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`default scope specifier for attributes is a C++17 extension`|
 +--------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`pack expansion of using declaration is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`pack expansion of using declaration is a C++17 extension`|
 +-----------------------------------------------------------------------------------------------+
 
 
+-Wc++1y-extensions
+------------------
+Synonym for `-Wc++14-extensions`_.
+
+
+-Wc++1z-compat
+--------------
+Synonym for `-Wc++17-compat`_.
+
+
+-Wc++1z-compat-mangling
+-----------------------
+Synonym for `-Wc++17-compat-mangling`_.
+
+
+-Wc++1z-extensions
+------------------
+Synonym for `-Wc++17-extensions`_.
+
+
+-Wc++2a-compat
+--------------
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`'`:placeholder:`A`:diagtext:`' is a keyword in C++2a`|
++-------------------------------------------------------------------------------------------+
+
+
+-Wc++2a-compat-pedantic
+-----------------------
+Synonym for `-Wc++2a-compat`_.
+
+
+-Wc++2a-extensions
+------------------
+Some of the diagnostics controlled by this flag are enabled by default.
+
+**Diagnostic text:**
+
++----------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`default member initializer for bit-field is a C++2a extension`|
++----------------------------------------------------------------------------------------------------+
+
++--------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`explicit capture of 'this' with a capture default of '=' is a C++2a extension`|
++--------------------------------------------------------------------------------------------------------------------+
+
++--------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`invoking a pointer to a 'const &' member function on an rvalue is a C++2a extension`|
++--------------------------------------------------------------------------------------------------------------------------+
+
+
+-Wc++98-c++11-c++14-c++17-compat
+--------------------------------
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`default member initializer for bit-field is incompatible with C++ standards before C++2a`|
++-------------------------------------------------------------------------------------------------------------------------------+
+
++-----------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`explicit capture of 'this' with a capture default of '=' is incompatible with C++ standards before C++2a`|
++-----------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+-Wc++98-c++11-c++14-c++17-compat-pedantic
+-----------------------------------------
+Also controls `-Wc++98-c++11-c++14-c++17-compat`_.
+
+**Diagnostic text:**
+
++-----------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`invoking a pointer to a 'const &' member function on an rvalue is incompatible with C++ standards before C++2a`|
++-----------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wc++98-c++11-c++14-compat
 --------------------------
 **Diagnostic text:**
 
-+------------------------------------------------------------+---------------------------+--------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`attributes on` |nbsp| |+-------------------------+| |nbsp| :diagtext:`declaration are incompatible with C++ standards before C++1z`|
-|                                                            ||:diagtext:`a namespace`  ||                                                                                |
-|                                                            |+-------------------------+|                                                                                |
-|                                                            ||:diagtext:`an enumerator`||                                                                                |
-|                                                            |+-------------------------+|                                                                                |
-+------------------------------------------------------------+---------------------------+--------------------------------------------------------------------------------+
-
 +---------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`constexpr if is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`constexpr if is incompatible with C++ standards before C++17`|
 +---------------------------------------------------------------------------------------------------+
 
 +----------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`constexpr on lambda expressions is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`constexpr on lambda expressions is incompatible with C++ standards before C++17`|
 +----------------------------------------------------------------------------------------------------------------------+
 
 +------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`decomposition declarations are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`decomposition declarations are incompatible with C++ standards before C++17`|
 +------------------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`pack fold expression is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`pack fold expression is incompatible with C++ standards before C++17`|
 +-----------------------------------------------------------------------------------------------------------+
 
 +---------------------------+--------------------+----------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| |+------------------+| |nbsp| :diagtext:`initialization statements are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| |+------------------+| |nbsp| :diagtext:`initialization statements are incompatible with C++ standards before C++17`|
 |                           ||:diagtext:`if`    ||                                                                                              |
 |                           |+------------------+|                                                                                              |
 |                           ||:diagtext:`switch`||                                                                                              |
@@ -1479,47 +1585,47 @@
 +---------------------------+--------------------+----------------------------------------------------------------------------------------------+
 
 +--------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`inline variables are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`inline variables are incompatible with C++ standards before C++17`|
 +--------------------------------------------------------------------------------------------------------+
 
 +------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`nested namespace definition is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`nested namespace definition is incompatible with C++ standards before C++17`|
 +------------------------------------------------------------------------------------------------------------------+
 
 +-------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`by value capture of '\*this' is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`by value capture of '\*this' is incompatible with C++ standards before C++17`|
 +-------------------------------------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`static\_assert with no message is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`static\_assert with no message is incompatible with C++ standards before C++17`|
 +---------------------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`non-type template parameters declared with` |nbsp| :placeholder:`A` |nbsp| :diagtext:`are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`non-type template parameters declared with` |nbsp| :placeholder:`A` |nbsp| :diagtext:`are incompatible with C++ standards before C++17`|
 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`template template parameter using 'typename' is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`template template parameter using 'typename' is incompatible with C++ standards before C++17`|
 +-----------------------------------------------------------------------------------------------------------------------------------+
 
 +--------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`unicode literals are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`unicode literals are incompatible with C++ standards before C++17`|
 +--------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`default scope specifier for attributes is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`default scope specifier for attributes is incompatible with C++ standards before C++17`|
 +-----------------------------------------------------------------------------------------------------------------------------+
 
 +------------------------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`use of multiple declarators in a single using declaration is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`use of multiple declarators in a single using declaration is incompatible with C++ standards before C++17`|
 +------------------------------------------------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`pack expansion using declaration is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`pack expansion using declaration is incompatible with C++ standards before C++17`|
 +-----------------------------------------------------------------------------------------------------------------------+
 
 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`'begin' and 'end' returning different types (`:placeholder:`A` |nbsp| :diagtext:`and` |nbsp| :placeholder:`B`:diagtext:`) is incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`'begin' and 'end' returning different types (`:placeholder:`A` |nbsp| :diagtext:`and` |nbsp| :placeholder:`B`:diagtext:`) is incompatible with C++ standards before C++17`|
 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
@@ -1529,8 +1635,16 @@
 
 **Diagnostic text:**
 
++------------------------------------------------------------+---------------------------+--------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`attributes on` |nbsp| |+-------------------------+| |nbsp| :diagtext:`declaration are incompatible with C++ standards before C++17`|
+|                                                            ||:diagtext:`a namespace`  ||                                                                                |
+|                                                            |+-------------------------+|                                                                                |
+|                                                            ||:diagtext:`an enumerator`||                                                                                |
+|                                                            |+-------------------------+|                                                                                |
++------------------------------------------------------------+---------------------------+--------------------------------------------------------------------------------+
+
 +---------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are incompatible with C++ standards before C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are incompatible with C++ standards before C++17`|
 +---------------------------------------------------------------------------------------------------------------------+
 
 
@@ -1587,10 +1701,8 @@
 +----------------------------------------------------------------------------------------------------------+
 
 
--Wc++98-c++11-compat-pedantic
------------------------------
-Also controls `-Wc++98-c++11-compat`_.
-
+-Wc++98-c++11-compat-binary-literal
+-----------------------------------
 **Diagnostic text:**
 
 +---------------------------------------------------------------------------------------------------------------+
@@ -1598,9 +1710,14 @@
 +---------------------------------------------------------------------------------------------------------------+
 
 
+-Wc++98-c++11-compat-pedantic
+-----------------------------
+Controls `-Wc++98-c++11-compat`_, `-Wc++98-c++11-compat-binary-literal`_.
+
+
 -Wc++98-compat
 --------------
-Also controls `-Wc++98-c++11-c++14-compat`_, `-Wc++98-c++11-compat`_, `-Wc++98-compat-local-type-template-args`_, `-Wc++98-compat-unnamed-type-template-args`_.
+Also controls `-Wc++98-c++11-c++14-c++17-compat`_, `-Wc++98-c++11-c++14-compat`_, `-Wc++98-c++11-compat`_, `-Wc++98-compat-local-type-template-args`_, `-Wc++98-compat-unnamed-type-template-args`_.
 
 **Diagnostic text:**
 
@@ -1941,7 +2058,7 @@
 
 -Wc++98-compat-pedantic
 -----------------------
-Also controls `-Wc++98-c++11-c++14-compat-pedantic`_, `-Wc++98-c++11-compat-pedantic`_, `-Wc++98-compat`_, `-Wc++98-compat-bind-to-temporary-copy`_.
+Also controls `-Wc++98-c++11-c++14-c++17-compat-pedantic`_, `-Wc++98-c++11-c++14-compat-pedantic`_, `-Wc++98-c++11-compat-pedantic`_, `-Wc++98-compat`_, `-Wc++98-compat-bind-to-temporary-copy`_.
 
 **Diagnostic text:**
 
@@ -2435,6 +2552,11 @@
 
 -Wcoroutine
 -----------
+Synonym for `-Wcoroutine-missing-unhandled-exception`_.
+
+
+-Wcoroutine-missing-unhandled-exception
+---------------------------------------
 This diagnostic is enabled by default.
 
 **Diagnostic text:**
@@ -2453,6 +2575,11 @@
 +--------------------------------------------------------------------------------------------------+
 
 
+-Wcpp
+-----
+Synonym for `-W#warnings`_.
+
+
 -Wcstring-format-directive
 --------------------------
 **Diagnostic text:**
@@ -2716,6 +2843,10 @@
 
 **Diagnostic text:**
 
++--------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`specifying 'uuid' as an ATL attribute is deprecated; use \_\_declspec instead`|
++--------------------------------------------------------------------------------------------------------------------+
+
 +-----------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`use of C-style parameters in Objective-C method declarations is deprecated`|
 +-----------------------------------------------------------------------------------------------------------------+
@@ -2751,7 +2882,7 @@
 **Diagnostic text:**
 
 +----------------------------------------------------------------------+----------------------+
-|:warning:`warning:` |nbsp| :diagtext:`Implementing deprecated` |nbsp| |+--------------------+|
+|:warning:`warning:` |nbsp| :diagtext:`implementing deprecated` |nbsp| |+--------------------+|
 |                                                                      ||:diagtext:`method`  ||
 |                                                                      |+--------------------+|
 |                                                                      ||:diagtext:`class`   ||
@@ -2760,6 +2891,10 @@
 |                                                                      |+--------------------+|
 +----------------------------------------------------------------------+----------------------+
 
++----------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`implementing unavailable method`|
++----------------------------------------------------------------------+
+
 
 -Wdeprecated-increment-bool
 ---------------------------
@@ -2768,7 +2903,7 @@
 **Diagnostic text:**
 
 +---------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`incrementing expression of type bool is deprecated and incompatible with C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`incrementing expression of type bool is deprecated and incompatible with C++17`|
 +---------------------------------------------------------------------------------------------------------------------+
 
 
@@ -2818,7 +2953,7 @@
 **Diagnostic text:**
 
 +-------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`'register' storage class specifier is deprecated and incompatible with C++1z`|
+|:warning:`warning:` |nbsp| :diagtext:`'register' storage class specifier is deprecated and incompatible with C++17`|
 +-------------------------------------------------------------------------------------------------------------------+
 
 
@@ -3227,7 +3362,7 @@
 **Diagnostic text:**
 
 +--------------------------------------------------------------------------------------------+
-|:error:`error:` |nbsp| :diagtext:`ISO C++1z does not allow dynamic exception specifications`|
+|:error:`error:` |nbsp| :diagtext:`ISO C++17 does not allow dynamic exception specifications`|
 +--------------------------------------------------------------------------------------------+
 
 
@@ -3279,7 +3414,7 @@
 **Diagnostic text:**
 
 +-------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`ISO C++1z does not allow a decomposition group to be empty`|
+|:warning:`warning:` |nbsp| :diagtext:`ISO C++17 does not allow a decomposition group to be empty`|
 +-------------------------------------------------------------------------------------------------+
 
 
@@ -3312,6 +3447,8 @@
 --------------
 This diagnostic is enabled by default.
 
+Also controls `-Wenum-compare-switch`_.
+
 **Diagnostic text:**
 
 +------------------------------------------------------------------------------------------------+
@@ -3319,6 +3456,17 @@
 +------------------------------------------------------------------------------------------------+
 
 
+-Wenum-compare-switch
+---------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++--------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`comparison of two values with different enumeration types in switch statement`|
++--------------------------------------------------------------------------------------------------------------------+
+
+
 -Wenum-conversion
 -----------------
 This diagnostic is enabled by default.
@@ -3363,6 +3511,10 @@
 |:warning:`warning:` |nbsp| :diagtext:`exception of type` |nbsp| :placeholder:`A` |nbsp| :diagtext:`will be caught by earlier handler`|
 +-------------------------------------------------------------------------------------------------------------------------------------+
 
++-----------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`has a non-throwing exception specification but can still throw`|
++-----------------------------------------------------------------------------------------------------------------------------+
+
 
 -Wexit-time-destructors
 -----------------------
@@ -3451,7 +3603,7 @@
 -------
 Some of the diagnostics controlled by this flag are enabled by default.
 
-Also controls `-Wignored-qualifiers`_, `-Winitializer-overrides`_, `-Wmissing-field-initializers`_, `-Wmissing-method-return-type`_, `-Wsemicolon-before-method-body`_, `-Wsign-compare`_, `-Wunused-parameter`_.
+Also controls `-Wignored-qualifiers`_, `-Winitializer-overrides`_, `-Wmissing-field-initializers`_, `-Wmissing-method-return-type`_, `-Wnull-pointer-arithmetic`_, `-Wsemicolon-before-method-body`_, `-Wsign-compare`_, `-Wunused-parameter`_.
 
 **Diagnostic text:**
 
@@ -4317,6 +4469,10 @@
 |:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute ignored`|
 +--------------------------------------------------------------------------------+
 
++--------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute ignored for field of type` |nbsp| :placeholder:`B`|
++--------------------------------------------------------------------------------------------------------------------------+
+
 +---------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute ignored on inline function`|
 +---------------------------------------------------------------------------------------------------+
@@ -4453,6 +4609,8 @@
 |                                                                                                |+----------------------------------------------------------------------------------------------------------------+|
 |                                                                                                ||:diagtext:`methods and properties`                                                                              ||
 |                                                                                                |+----------------------------------------------------------------------------------------------------------------+|
+|                                                                                                ||:diagtext:`functions, methods, and properties`                                                                  ||
+|                                                                                                |+----------------------------------------------------------------------------------------------------------------+|
 |                                                                                                ||:diagtext:`struct or union`                                                                                     ||
 |                                                                                                |+----------------------------------------------------------------------------------------------------------------+|
 |                                                                                                ||:diagtext:`struct, union or class`                                                                              ||
@@ -4645,9 +4803,13 @@
 |:warning:`warning:` |nbsp| :diagtext:`\_\_declspec attribute` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is not supported`|
 +-------------------------------------------------------------------------------------------------------------------------+
 
-+-------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`Ignoring unsupported '`:placeholder:`A`:diagtext:`' in the target attribute string`|
-+-------------------------------------------------------------------------------------------------------------------------+
++-------------------------------------------------------+-------------------------+----------------------------------+---------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring` |nbsp| |+-----------------------+|+--------------------------------+| |nbsp| :diagtext:`'`:placeholder:`C`:diagtext:`' in the target attribute string`|
+|                                                       ||:diagtext:`unsupported`|||                                ||                                                                                 |
+|                                                       |+-----------------------+|+--------------------------------+|                                                                                 |
+|                                                       ||:diagtext:`duplicate`  ||| |nbsp| :diagtext:`architecture`||                                                                                 |
+|                                                       |+-----------------------+|+--------------------------------+|                                                                                 |
++-------------------------------------------------------+-------------------------+----------------------------------+---------------------------------------------------------------------------------+
 
 
 -Wignored-optimization-argument
@@ -4970,9 +5132,13 @@
 
 **Diagnostic text:**
 
-+------------------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`implicit declaration of function` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is invalid in C99`|
-+------------------------------------------------------------------------------------------------------------------------------------+
++----------------------------------------------------------------------------------------------------------------------------------------+--------------------+
+|:warning:`warning:` |nbsp| :diagtext:`implicit declaration of function` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is invalid in` |nbsp| |+------------------+|
+|                                                                                                                                        ||:diagtext:`C99`   ||
+|                                                                                                                                        |+------------------+|
+|                                                                                                                                        ||:diagtext:`OpenCL`||
+|                                                                                                                                        |+------------------+|
++----------------------------------------------------------------------------------------------------------------------------------------+--------------------+
 
 +---------------------------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`implicitly declaring library function '`:placeholder:`A`:diagtext:`' with type` |nbsp| :placeholder:`B`|
@@ -5250,6 +5416,10 @@
 |:warning:`warning:` |nbsp| :diagtext:`missing submodule '`:placeholder:`A`:diagtext:`'`|
 +---------------------------------------------------------------------------------------+
 
++--------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`umbrella directory '`:placeholder:`A`:diagtext:`' not found`|
++--------------------------------------------------------------------------------------------------+
+
 +-------------------------------------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`umbrella header for module '`:placeholder:`A`:diagtext:`' does not include header '`:placeholder:`B`:diagtext:`'`|
 +-------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -5299,7 +5469,7 @@
 **Diagnostic text:**
 
 +------------------------------------------------------------------------------------------------+
-|:error:`error:` |nbsp| :diagtext:`ISO C++1z does not allow incrementing expression of type bool`|
+|:error:`error:` |nbsp| :diagtext:`ISO C++17 does not allow incrementing expression of type bool`|
 +------------------------------------------------------------------------------------------------+
 
 
@@ -5472,6 +5642,10 @@
 
 **Diagnostic text:**
 
++-----------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`the object size sanitizer has no effect at -O0, but is explicitly enabled:` |nbsp| :placeholder:`A`|
++-----------------------------------------------------------------------------------------------------------------------------------------+
+
 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`optimization level '`:placeholder:`A`:diagtext:`' is not supported; using '`:placeholder:`B`:placeholder:`C`:diagtext:`' instead`|
 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -5522,6 +5696,17 @@
 +--------------------------------------------------------------------------------------------------------------+
 
 
+-Winvalid-ios-deployment-target
+-------------------------------
+This diagnostic is an error by default, but the flag ``-Wno-invalid-ios-deployment-target`` can be used to disable the error.
+
+**Diagnostic text:**
+
++------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:error:`error:` |nbsp| :diagtext:`invalid iOS deployment version '`:placeholder:`A`:diagtext:`', iOS 10 is the maximum deployment target for 32-bit targets`|
++------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Winvalid-noreturn
 ------------------
 This diagnostic is enabled by default.
@@ -6013,6 +6198,8 @@
 
 -Wmicrosoft-enum-forward-reference
 ----------------------------------
+This diagnostic is enabled by default.
+
 **Diagnostic text:**
 
 +---------------------------------------------------------------------------------------------------+
@@ -6022,8 +6209,6 @@
 
 -Wmicrosoft-enum-value
 ----------------------
-This diagnostic is enabled by default.
-
 **Diagnostic text:**
 
 +---------------------------------------------------------------------------------------------------------------------------+
@@ -6433,6 +6618,17 @@
 +--------------------------------------------------------------------------------------------+
 
 
+-Wmissing-noescape
+------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++----------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`parameter of overriding method should be annotated with \_\_attribute\_\_((noescape))`|
++----------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wmissing-noreturn
 ------------------
 **Diagnostic text:**
@@ -6596,7 +6792,7 @@
 ------
 Some of the diagnostics controlled by this flag are enabled by default.
 
-Controls `-Wcast-of-sel-type`_, `-Wchar-subscripts`_, `-Wcomment`_, `-Wdelete-non-virtual-dtor`_, `-Wextern-c-compat`_, `-Wfor-loop-analysis`_, `-Wformat`_, `-Wimplicit`_, `-Winfinite-recursion`_, `-Wmismatched-tags`_, `-Wmissing-braces`_, `-Wmove`_, `-Wmultichar`_, `-Wobjc-designated-initializers`_, `-Wobjc-missing-super-calls`_, `-Woverloaded-virtual`_, `-Wprivate-extern`_, `-Wreorder`_, `-Wreturn-type`_, `-Wself-assign`_, `-Wself-move`_, `-Wsizeof-array-argument`_, `-Wsizeof-array-decay`_, `-Wstring-plus-int`_, `-Wtrigraphs`_, `-Wuninitialized`_, `-Wunknown-pragmas`_, `-Wunused`_, `-Wuser-defined-warnings`_, `-Wvolatile-register-var`_.
+Controls `-Wcast-of-sel-type`_, `-Wchar-subscripts`_, `-Wcomment`_, `-Wdelete-non-virtual-dtor`_, `-Wextern-c-compat`_, `-Wfor-loop-analysis`_, `-Wformat`_, `-Wimplicit`_, `-Winfinite-recursion`_, `-Wmismatched-tags`_, `-Wmissing-braces`_, `-Wmove`_, `-Wmultichar`_, `-Wobjc-designated-initializers`_, `-Wobjc-flexible-array`_, `-Wobjc-missing-super-calls`_, `-Woverloaded-virtual`_, `-Wprivate-extern`_, `-Wreorder`_, `-Wreturn-type`_, `-Wself-assign`_, `-Wself-move`_, `-Wsizeof-array-argument`_, `-Wsizeof-array-decay`_, `-Wstring-plus-int`_, `-Wtrigraphs`_, `-Wuninitialized`_, `-Wunknown-pragmas`_, `-Wunused`_, `-Wuser-defined-warnings`_, `-Wvolatile-register-var`_.
 
 
 -Wmove
@@ -6692,6 +6888,11 @@
 +----------------------------------------------------------------+
 
 
+-Wnoexcept-type
+---------------
+Synonym for `-Wc++17-compat-mangling`_.
+
+
 -Wnon-gcc
 ---------
 Some of the diagnostics controlled by this flag are enabled by default.
@@ -6832,6 +7033,32 @@
 +---------------------------------------------------------------------------------------------------------------------+
 
 
+-Wnsconsumed-mismatch
+---------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++---------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`overriding method has mismatched ns\_consumed attribute on its parameter`|
++---------------------------------------------------------------------------------------------------------------+
+
+
+-Wnsreturns-mismatch
+--------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------+---------------------------+------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`overriding method has mismatched ns\_returns\_`|+-------------------------+| |nbsp| :diagtext:`attributes`|
+|                                                                                     ||:diagtext:`not\_retained`||                              |
+|                                                                                     |+-------------------------+|                              |
+|                                                                                     ||:diagtext:`retained`     ||                              |
+|                                                                                     |+-------------------------+|                              |
++-------------------------------------------------------------------------------------+---------------------------+------------------------------+
+
+
 -Wnull-arithmetic
 -----------------
 This diagnostic is enabled by default.
@@ -6904,6 +7131,23 @@
 +---------------------------------------------------------------------------------------------------------+
 
 
+-Wnull-pointer-arithmetic
+-------------------------
+**Diagnostic text:**
+
++--------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`arithmetic on a null pointer treated as a cast from integer to pointer is a GNU extension`|
++--------------------------------------------------------------------------------------------------------------------------------+
+
++-------------------------------------------------------------------------------------------------------------+----------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`performing pointer arithmetic on a null pointer has undefined behavior`|+--------------------------------------------+|
+|                                                                                                             ||                                            ||
+|                                                                                                             |+--------------------------------------------+|
+|                                                                                                             || |nbsp| :diagtext:`if the offset is nonzero`||
+|                                                                                                             |+--------------------------------------------+|
++-------------------------------------------------------------------------------------------------------------+----------------------------------------------+
+
+
 -Wnullability
 -------------
 This diagnostic is enabled by default.
@@ -7077,6 +7321,21 @@
 +----------------------------------------------------------------------------------------------------------+
 
 
+-Wobjc-flexible-array
+---------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`field` |nbsp| :placeholder:`A` |nbsp| :diagtext:`can overwrite instance variable` |nbsp| :placeholder:`B` |nbsp| :diagtext:`with variable sized type` |nbsp| :placeholder:`C` |nbsp| :diagtext:`in superclass` |nbsp| :placeholder:`D`|
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`field` |nbsp| :placeholder:`A` |nbsp| :diagtext:`with variable sized type` |nbsp| :placeholder:`B` |nbsp| :diagtext:`is not visible to subclasses and can conflict with their instance variables`|
++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wobjc-forward-class-redefinition
 ---------------------------------
 This diagnostic is enabled by default.
@@ -7152,6 +7411,15 @@
 +-------------------------------------------------------------------------------------------+
 
 
+-Wobjc-messaging-id
+-------------------
+**Diagnostic text:**
+
++---------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`messaging unqualified id`|
++---------------------------------------------------------------+
+
+
 -Wobjc-method-access
 --------------------
 This diagnostic is enabled by default.
@@ -7515,10 +7783,26 @@
 
 **Diagnostic text:**
 
++------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`option '-ffine-grained-bitfield-accesses' cannot be enabled together with a sanitizer; flag ignored`|
++------------------------------------------------------------------------------------------------------------------------------------------+
+
 +----------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`option '`:placeholder:`A`:diagtext:`' was ignored by the PS4 toolchain, using '-fPIC'`|
 +----------------------------------------------------------------------------------------------------------------------------+
 
++-------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring '-mabicalls' option as it cannot be used with non position-independent code and the N64 ABI`|
++-------------------------------------------------------------------------------------------------------------------------------------------+
+
++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------+----------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring '-mlong-calls' option as it is not currently supported with` |nbsp| |+-----------------------------------------+|:diagtext:`-mabicalls`|
+|                                                                                                                   ||                                         ||                      |
+|                                                                                                                   |+-----------------------------------------+|                      |
+|                                                                                                                   ||:diagtext:`the implicit usage of` |nbsp| ||                      |
+|                                                                                                                   |+-----------------------------------------+|                      |
++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------+----------------------+
+
 
 -Wout-of-line-declaration
 -------------------------
@@ -7531,6 +7815,21 @@
 +-------------------------------------------------------------------------------------------+
 
 
+-Wout-of-scope-function
+-----------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`use of out-of-scope declaration of` |nbsp| :placeholder:`A`|+-------------------------------------------------------------------------------------+|
+|                                                                                                 ||                                                                                     ||
+|                                                                                                 |+-------------------------------------------------------------------------------------+|
+|                                                                                                 || |nbsp| :diagtext:`whose type is not compatible with that of an implicit declaration`||
+|                                                                                                 |+-------------------------------------------------------------------------------------+|
++-------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+
+
+
 -Wover-aligned
 --------------
 **Diagnostic text:**
@@ -7776,7 +8075,7 @@
 
 -Wpedantic
 ----------
-Also controls `-Wc++11-extra-semi`_, `-Wc++11-long-long`_, `-Wc++14-binary-literal`_, `-Wc11-extensions`_, `-Wcomplex-component-init`_, `-Wdeclaration-after-statement`_, `-Wdollar-in-identifier-extension`_, `-Wembedded-directive`_, `-Wempty-translation-unit`_, `-Wextended-offsetof`_, `-Wflexible-array-extensions`_, `-Wformat-pedantic`_, `-Wfour-char-constants`_, `-Wgnu-anonymous-struct`_, `-Wgnu-auto-type`_, `-Wgnu-binary-literal`_, `-Wgnu-case-range`_, `-Wgnu-complex-integer`_, `-Wgnu-compound-literal-initializer`_, `-Wgnu-conditional-omitted-operand`_, `-Wgnu-empty-initializer`_, `-Wgnu-empty-struct`_, `-Wgnu-flexible-array-initializer`_, `-Wgnu-flexible-array-union-member`_, `-Wgnu-folding-constant`_, `-Wgnu-imaginary-constant`_, `-Wgnu-include-next`_, `-Wgnu-label-as-value`_, `-Wgnu-redeclared-enum`_, `-Wgnu-statement-expression`_, `-Wgnu-union-cast`_, `-Wgnu-zero-line-directive`_, `-Wgnu-zero-variadic-macro-arguments`_, `-Wimport-preprocessor-directive-pedantic`_, `-Wkeyword-macro`_, `-Wlanguage-extension-token`_, `-Wlong-long`_, `-Wmicrosoft-charize`_, `-Wmicrosoft-comment-paste`_, `-Wmicrosoft-cpp-macro`_, `-Wmicrosoft-end-of-file`_, `-Wmicrosoft-enum-forward-reference`_, `-Wmicrosoft-fixed-enum`_, `-Wmicrosoft-flexible-array`_, `-Wmicrosoft-redeclare-static`_, `-Wnested-anon-types`_, `-Wnullability-extension`_, `-Woverlength-strings`_, `-Wretained-language-linkage`_, `-Wvariadic-macros`_, `-Wvla-extension`_, `-Wzero-length-array`_.
+Also controls `-Wc++11-extra-semi`_, `-Wc++11-long-long`_, `-Wc++14-binary-literal`_, `-Wc11-extensions`_, `-Wcomplex-component-init`_, `-Wdeclaration-after-statement`_, `-Wdollar-in-identifier-extension`_, `-Wembedded-directive`_, `-Wempty-translation-unit`_, `-Wextended-offsetof`_, `-Wflexible-array-extensions`_, `-Wformat-pedantic`_, `-Wfour-char-constants`_, `-Wgnu-anonymous-struct`_, `-Wgnu-auto-type`_, `-Wgnu-binary-literal`_, `-Wgnu-case-range`_, `-Wgnu-complex-integer`_, `-Wgnu-compound-literal-initializer`_, `-Wgnu-conditional-omitted-operand`_, `-Wgnu-empty-initializer`_, `-Wgnu-empty-struct`_, `-Wgnu-flexible-array-initializer`_, `-Wgnu-flexible-array-union-member`_, `-Wgnu-folding-constant`_, `-Wgnu-imaginary-constant`_, `-Wgnu-include-next`_, `-Wgnu-label-as-value`_, `-Wgnu-redeclared-enum`_, `-Wgnu-statement-expression`_, `-Wgnu-union-cast`_, `-Wgnu-zero-line-directive`_, `-Wgnu-zero-variadic-macro-arguments`_, `-Wimport-preprocessor-directive-pedantic`_, `-Wkeyword-macro`_, `-Wlanguage-extension-token`_, `-Wlong-long`_, `-Wmicrosoft-charize`_, `-Wmicrosoft-comment-paste`_, `-Wmicrosoft-cpp-macro`_, `-Wmicrosoft-end-of-file`_, `-Wmicrosoft-enum-value`_, `-Wmicrosoft-fixed-enum`_, `-Wmicrosoft-flexible-array`_, `-Wmicrosoft-redeclare-static`_, `-Wnested-anon-types`_, `-Wnullability-extension`_, `-Woverlength-strings`_, `-Wretained-language-linkage`_, `-Wundefined-internal-type`_, `-Wvla-extension`_, `-Wzero-length-array`_.
 
 **Diagnostic text:**
 
@@ -7842,6 +8141,10 @@
 |:warning:`warning:` |nbsp| :diagtext:`parameter` |nbsp| :placeholder:`A` |nbsp| :diagtext:`was not declared, defaulting to type 'int'`|
 +--------------------------------------------------------------------------------------------------------------------------------------+
 
++--------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`invoking a pointer to a 'const &' member function on an rvalue is a C++2a extension`|
++--------------------------------------------------------------------------------------------------------------------------+
+
 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`qualifier in explicit instantiation of` |nbsp| :placeholder:`A` |nbsp| :diagtext:`requires a template-id (a typedef is not permitted)`|
 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -7995,7 +8298,7 @@
 +---------------------------------------------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`use of the` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute is a C++1z extension`|
+|:warning:`warning:` |nbsp| :diagtext:`use of the` |nbsp| :placeholder:`A` |nbsp| :diagtext:`attribute is a C++17 extension`|
 +---------------------------------------------------------------------------------------------------------------------------+
 
 +-----------------------------------------------------------------------------+--------------------+---------------------------------------------+
@@ -8080,6 +8383,14 @@
 |:warning:`warning:` |nbsp| :diagtext:`exception specification of '...' is a Microsoft extension`|
 +------------------------------------------------------------------------------------------------+
 
++------------------------------------------------------------+---------------------------+-----------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`attributes on` |nbsp| |+-------------------------+| |nbsp| :diagtext:`declaration are a C++17 extension`|
+|                                                            ||:diagtext:`a namespace`  ||                                                     |
+|                                                            |+-------------------------+|                                                     |
+|                                                            ||:diagtext:`an enumerator`||                                                     |
+|                                                            |+-------------------------+|                                                     |
++------------------------------------------------------------+---------------------------+-----------------------------------------------------+
+
 +-----------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`extern templates are a C++11 extension`|
 +-----------------------------------------------------------------------------+
@@ -8105,7 +8416,7 @@
 +---------------------------------------------------------------------------------------+
 
 +----------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are a C++1z feature`|
+|:warning:`warning:` |nbsp| :diagtext:`hexadecimal floating literals are a C++17 feature`|
 +----------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------+
@@ -8124,6 +8435,14 @@
 |:warning:`warning:` |nbsp| :diagtext:`\_\_VA\_ARGS\_\_ can only appear in the expansion of a C99 variadic macro`|
 +----------------------------------------------------------------------------------------------------------------+
 
++------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`variadic macros are a C99 feature`|
++------------------------------------------------------------------------+
+
++--------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`named variadic macros are a GNU extension`|
++--------------------------------------------------------------------------------+
+
 +------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`empty macro arguments are a C99 feature`|
 +------------------------------------------------------------------------------+
@@ -8288,6 +8607,17 @@
 +----------------------------------------------------------------------------------------------------------------------------------+
 
 
+-Wpragma-clang-attribute
+------------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`unused attribute` |nbsp| :placeholder:`A` |nbsp| :diagtext:`in '#pragma clang attribute push' region`|
++-------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wpragma-once-outside-header
 ----------------------------
 This diagnostic is enabled by default.
@@ -8299,6 +8629,32 @@
 +----------------------------------------------------------------+
 
 
+-Wpragma-pack
+-------------
+Some of the diagnostics controlled by this flag are enabled by default.
+
+Also controls `-Wpragma-pack-suspicious-include`_.
+
+**Diagnostic text:**
+
++---------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`the current #pragma pack aligment value is modified in the included file`|
++---------------------------------------------------------------------------------------------------------------+
+
++---------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`unterminated '#pragma pack (push, ...)' at end of file`|
++---------------------------------------------------------------------------------------------+
+
+
+-Wpragma-pack-suspicious-include
+--------------------------------
+**Diagnostic text:**
+
++-------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`non-default #pragma pack value changes the alignment of struct or union members in the included file`|
++-------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 -Wpragma-system-header-outside-header
 -------------------------------------
 This diagnostic is enabled by default.
@@ -8314,7 +8670,7 @@
 ---------
 Some of the diagnostics controlled by this flag are enabled by default.
 
-Also controls `-Wignored-pragmas`_, `-Wunknown-pragmas`_.
+Also controls `-Wignored-pragmas`_, `-Wpragma-clang-attribute`_, `-Wpragma-pack`_, `-Wunknown-pragmas`_.
 
 **Diagnostic text:**
 
@@ -8370,6 +8726,23 @@
 |:warning:`warning:` |nbsp| :diagtext:`top-level module '`:placeholder:`A`:diagtext:`' in private module map, expected a submodule of '`:placeholder:`B`:diagtext:`'`|
 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
++----------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`module '`:placeholder:`A`:diagtext:`' already re-exported as '`:placeholder:`B`:diagtext:`'`|
++----------------------------------------------------------------------------------------------------------------------------------+
+
+
+-Wprofile-instr-missing
+-----------------------
+**Diagnostic text:**
+
++-----------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+---------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`profile data may be incomplete: of` |nbsp| :placeholder:`A` |nbsp| :diagtext:`function`|+-------------+|:diagtext:`,` |nbsp| :placeholder:`B` |nbsp| |+-----------------+| |nbsp| :diagtext:`no data`|
+|                                                                                                                             ||             ||                                             ||:diagtext:`:has` ||                           |
+|                                                                                                                             |+-------------+|                                             |+-----------------+|                           |
+|                                                                                                                             ||:diagtext:`s`||                                             ||:diagtext:`:have`||                           |
+|                                                                                                                             |+-------------+|                                             |+-----------------+|                           |
++-----------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+---------------------------+
+
 
 -Wprofile-instr-out-of-date
 ---------------------------
@@ -8377,13 +8750,13 @@
 
 **Diagnostic text:**
 
-+------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+---------------------------------------------------------------+-------------------+--------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`profile data may be out of date: of` |nbsp| :placeholder:`A` |nbsp| :diagtext:`function`|+-------------+|:diagtext:`,` |nbsp| :placeholder:`B` |nbsp| |+-----------------+| |nbsp| :diagtext:`no data and` |nbsp| :placeholder:`C` |nbsp| |+-----------------+| |nbsp| :diagtext:`mismatched data that will be ignored`|
-|                                                                                                                              ||             ||                                             ||:diagtext:`:has` ||                                                               ||:diagtext:`:has` ||                                                        |
-|                                                                                                                              |+-------------+|                                             |+-----------------+|                                                               |+-----------------+|                                                        |
-|                                                                                                                              ||:diagtext:`s`||                                             ||:diagtext:`:have`||                                                               ||:diagtext:`:have`||                                                        |
-|                                                                                                                              |+-------------+|                                             |+-----------------+|                                                               |+-----------------+|                                                        |
-+------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+---------------------------------------------------------------+-------------------+--------------------------------------------------------+
++------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+--------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`profile data may be out of date: of` |nbsp| :placeholder:`A` |nbsp| :diagtext:`function`|+-------------+|:diagtext:`,` |nbsp| :placeholder:`B` |nbsp| |+-----------------+| |nbsp| :diagtext:`mismatched data that will be ignored`|
+|                                                                                                                              ||             ||                                             ||:diagtext:`:has` ||                                                        |
+|                                                                                                                              |+-------------+|                                             |+-----------------+|                                                        |
+|                                                                                                                              ||:diagtext:`s`||                                             ||:diagtext:`:have`||                                                        |
+|                                                                                                                              |+-------------+|                                             |+-----------------+|                                                        |
++------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------+-------------------+--------------------------------------------------------+
 
 
 -Wprofile-instr-unprofiled
@@ -8448,9 +8821,29 @@
 
 **Diagnostic text:**
 
-+-----------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`property of type` |nbsp| :placeholder:`A` |nbsp| :diagtext:`was selected for synthesis`|
-+-----------------------------------------------------------------------------------------------------------------------------+
++-------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`property` |nbsp| |+--------------------------------------------------------------+| |nbsp| :diagtext:`was selected for synthesis`|
+|                                                       ||+-------------------------------------------+                 ||                                              |
+|                                                       |||:diagtext:`of type` |nbsp| :placeholder:`B`|                 ||                                              |
+|                                                       ||+-------------------------------------------+                 ||                                              |
+|                                                       |+--------------------------------------------------------------+|                                              |
+|                                                       ||+---------------------------------------------------------+   ||                                              |
+|                                                       |||:diagtext:`with attribute '`:placeholder:`B`:diagtext:`'`|   ||                                              |
+|                                                       ||+---------------------------------------------------------+   ||                                              |
+|                                                       |+--------------------------------------------------------------+|                                              |
+|                                                       ||+------------------------------------------------------------+||                                              |
+|                                                       |||:diagtext:`without attribute '`:placeholder:`B`:diagtext:`'`|||                                              |
+|                                                       ||+------------------------------------------------------------+||                                              |
+|                                                       |+--------------------------------------------------------------+|                                              |
+|                                                       ||+-----------------------------------------------+             ||                                              |
+|                                                       |||:diagtext:`with getter` |nbsp| :placeholder:`B`|             ||                                              |
+|                                                       ||+-----------------------------------------------+             ||                                              |
+|                                                       |+--------------------------------------------------------------+|                                              |
+|                                                       ||+-----------------------------------------------+             ||                                              |
+|                                                       |||:diagtext:`with setter` |nbsp| :placeholder:`B`|             ||                                              |
+|                                                       ||+-----------------------------------------------+             ||                                              |
+|                                                       |+--------------------------------------------------------------+|                                              |
++-------------------------------------------------------+----------------------------------------------------------------+----------------------------------------------+
 
 
 -Wqualified-void-return-type
@@ -8542,6 +8935,15 @@
 +-------------------------------------------------------------------------+
 
 
+-Wredundant-parens
+------------------
+**Diagnostic text:**
+
++-----------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`redundant parentheses surrounding declarator`|
++-----------------------------------------------------------------------------------+
+
+
 -Wregister
 ----------
 This diagnostic is enabled by default.
@@ -8551,7 +8953,7 @@
 **Diagnostic text:**
 
 +----------------------------------------------------------------------------------------------+
-|:error:`error:` |nbsp| :diagtext:`ISO C++1z does not allow 'register' storage class specifier`|
+|:error:`error:` |nbsp| :diagtext:`ISO C++17 does not allow 'register' storage class specifier`|
 +----------------------------------------------------------------------------------------------+
 
 
@@ -8695,9 +9097,9 @@
 |                                                   |+--------------------+|                                                                 |
 +---------------------------------------------------+----------------------+-----------------------------------------------------------------+
 
-+--------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`control reaches end of non-void coroutine`|
-+--------------------------------------------------------------------------------+
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`control reaches end of coroutine; which is undefined behavior because the promise type` |nbsp| :placeholder:`A` |nbsp| :diagtext:`does not declare 'return\_void()'`|
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 +-------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`control reaches end of non-void function`|
@@ -8707,9 +9109,9 @@
 |:warning:`warning:` |nbsp| :diagtext:`control reaches end of non-void lambda`|
 +-----------------------------------------------------------------------------+
 
-+----------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`control may reach end of non-void coroutine`|
-+----------------------------------------------------------------------------------+
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`control may reach end of coroutine; which is undefined behavior because the promise type` |nbsp| :placeholder:`A` |nbsp| :diagtext:`does not declare 'return\_void()'`|
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 +---------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`control may reach end of non-void function`|
@@ -8789,6 +9191,10 @@
 
 **Diagnostic text:**
 
++--------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`section attribute is specified on redeclared variable`|
++--------------------------------------------------------------------------------------------+
+
 +----------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`section does not match previous declaration`|
 +----------------------------------------------------------------------------------+
@@ -9370,6 +9776,8 @@
 |:warning:`warning:` |nbsp| :diagtext:`this` |nbsp| |+------------------------------------------------------------+| |nbsp| :diagtext:`a prototype`|
 |                                                   ||:diagtext:`function declaration is not`                     ||                               |
 |                                                   |+------------------------------------------------------------+|                               |
+|                                                   ||:diagtext:`block declaration is not`                        ||                               |
+|                                                   |+------------------------------------------------------------+|                               |
 |                                                   ||:diagtext:`old-style function definition is not preceded by`||                               |
 |                                                   |+------------------------------------------------------------+|                               |
 +---------------------------------------------------+--------------------------------------------------------------+-------------------------------+
@@ -9383,6 +9791,8 @@
 |:warning:`warning:` |nbsp| :diagtext:`this` |nbsp| |+------------------------------------------------------------+| |nbsp| :diagtext:`a prototype`|
 |                                                   ||:diagtext:`function declaration is not`                     ||                               |
 |                                                   |+------------------------------------------------------------+|                               |
+|                                                   ||:diagtext:`block declaration is not`                        ||                               |
+|                                                   |+------------------------------------------------------------+|                               |
 |                                                   ||:diagtext:`old-style function definition is not preceded by`||                               |
 |                                                   |+------------------------------------------------------------+|                               |
 +---------------------------------------------------+--------------------------------------------------------------+-------------------------------+
@@ -9576,7 +9986,7 @@
 ----------------------
 Some of the diagnostics controlled by this flag are enabled by default.
 
-Also controls `-Wtautological-constant-out-of-range-compare`_, `-Wtautological-overlap-compare`_, `-Wtautological-pointer-compare`_, `-Wtautological-undefined-compare`_.
+Also controls `-Wtautological-constant-compare`_, `-Wtautological-overlap-compare`_, `-Wtautological-pointer-compare`_, `-Wtautological-undefined-compare`_.
 
 **Diagnostic text:**
 
@@ -9598,21 +10008,22 @@
 |                                                                                     |+-----------------+|
 +-------------------------------------------------------------------------------------+-------------------+
 
-+-------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`comparison of unsigned`|+------------------------+| |nbsp| :diagtext:`expression` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is always` |nbsp| :placeholder:`B`|
-|                                                             ||                        ||                                                                                                           |
-|                                                             |+------------------------+|                                                                                                           |
-|                                                             || |nbsp| :diagtext:`enum`||                                                                                                           |
-|                                                             |+------------------------+|                                                                                                           |
-+-------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------------------------------------------+
 
-+--------------------------------------------------------------------------------------------------------+--------------------------+----------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`comparison of` |nbsp| :placeholder:`A` |nbsp| :diagtext:`unsigned`|+------------------------+| |nbsp| :diagtext:`expression is always` |nbsp| :placeholder:`B`|
-|                                                                                                        ||                        ||                                                                |
-|                                                                                                        |+------------------------+|                                                                |
-|                                                                                                        || |nbsp| :diagtext:`enum`||                                                                |
-|                                                                                                        |+------------------------+|                                                                |
-+--------------------------------------------------------------------------------------------------------+--------------------------+----------------------------------------------------------------+
+-Wtautological-constant-compare
+-------------------------------
+This diagnostic is enabled by default.
+
+Also controls `-Wtautological-constant-out-of-range-compare`_, `-Wtautological-unsigned-enum-zero-compare`_, `-Wtautological-unsigned-zero-compare`_.
+
+**Diagnostic text:**
+
++---------------------------------------------------------+------------------+--------------------------------+------------------+-------------------------------------+-------------------+
+|:warning:`warning:` |nbsp| :diagtext:`comparison` |nbsp| |+----------------+| |nbsp| :placeholder:`C` |nbsp| |+----------------+| |nbsp| :diagtext:`is always` |nbsp| |+-----------------+|
+|                                                         ||:placeholder:`D`||                                ||:placeholder:`B`||                                     ||:diagtext:`false`||
+|                                                         |+----------------+|                                |+----------------+|                                     |+-----------------+|
+|                                                         ||:placeholder:`B`||                                ||:placeholder:`D`||                                     ||:diagtext:`true` ||
+|                                                         |+----------------+|                                |+----------------+|                                     |+-----------------+|
++---------------------------------------------------------+------------------+--------------------------------+------------------+-------------------------------------+-------------------+
 
 
 -Wtautological-constant-out-of-range-compare
@@ -9695,6 +10106,36 @@
 +------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+
 
 
+-Wtautological-unsigned-enum-zero-compare
+-----------------------------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++------------------------------------------------------------+--------------------------------------+--------------------------------+--------------------------------------+-------------------------------------+-------------------+
+|:warning:`warning:` |nbsp| :diagtext:`comparison of` |nbsp| |+------------------------------------+| |nbsp| :placeholder:`C` |nbsp| |+------------------------------------+| |nbsp| :diagtext:`is always` |nbsp| |+-----------------+|
+|                                                            ||:placeholder:`D`                    ||                                ||:diagtext:`unsigned enum expression`||                                     ||:diagtext:`false`||
+|                                                            |+------------------------------------+|                                |+------------------------------------+|                                     |+-----------------+|
+|                                                            ||:diagtext:`unsigned enum expression`||                                ||:placeholder:`D`                    ||                                     ||:diagtext:`true` ||
+|                                                            |+------------------------------------+|                                |+------------------------------------+|                                     |+-----------------+|
++------------------------------------------------------------+--------------------------------------+--------------------------------+--------------------------------------+-------------------------------------+-------------------+
+
+
+-Wtautological-unsigned-zero-compare
+------------------------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++------------------------------------------------------------+---------------------------------+--------------------------------+---------------------------------+-------------------------------------+-------------------+
+|:warning:`warning:` |nbsp| :diagtext:`comparison of` |nbsp| |+-------------------------------+| |nbsp| :placeholder:`C` |nbsp| |+-------------------------------+| |nbsp| :diagtext:`is always` |nbsp| |+-----------------+|
+|                                                            ||:placeholder:`D`               ||                                ||:diagtext:`unsigned expression`||                                     ||:diagtext:`false`||
+|                                                            |+-------------------------------+|                                |+-------------------------------+|                                     |+-----------------+|
+|                                                            ||:diagtext:`unsigned expression`||                                ||:placeholder:`D`               ||                                     ||:diagtext:`true` ||
+|                                                            |+-------------------------------+|                                |+-------------------------------+|                                     |+-----------------+|
++------------------------------------------------------------+---------------------------------+--------------------------------+---------------------------------+-------------------------------------+-------------------+
+
+
 -Wtentative-definition-incomplete-type
 --------------------------------------
 This diagnostic is enabled by default.
@@ -10110,6 +10551,19 @@
 +---------------------------+----------------------+-----------------------------------------------------------------------------------+
 
 
+-Wundefined-internal-type
+-------------------------
+**Diagnostic text:**
+
++---------------------------------------------------------------------------------------------------------+----------------------+----------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ISO C++ requires a definition in this translation unit for` |nbsp| |+--------------------+| |nbsp| :placeholder:`B` |nbsp| :diagtext:`because its type does not have linkage`|
+|                                                                                                         ||:diagtext:`function`||                                                                                  |
+|                                                                                                         |+--------------------+|                                                                                  |
+|                                                                                                         ||:diagtext:`variable`||                                                                                  |
+|                                                                                                         |+--------------------+|                                                                                  |
++---------------------------------------------------------------------------------------------------------+----------------------+----------------------------------------------------------------------------------+
+
+
 -Wundefined-reinterpret-cast
 ----------------------------
 **Diagnostic text:**
@@ -10149,19 +10603,22 @@
 
 -Wunguarded-availability
 ------------------------
+Some of the diagnostics controlled by this flag are enabled by default.
+
+Also controls `-Wunguarded-availability-new`_.
+
 **Diagnostic text:**
 
-+----------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is only available conditionally`|
-+----------------------------------------------------------------------------------------------+
++---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is only available on` |nbsp| :placeholder:`B` |nbsp| :placeholder:`C` |nbsp| :diagtext:`or newer`|
++---------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
-+------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`may be partial because the receiver type is unknown`|
-+------------------------------------------------------------------------------------------------------------------+
 
-+--------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is partial:` |nbsp| :placeholder:`B`|
-+--------------------------------------------------------------------------------------------------+
+-Wunguarded-availability-new
+----------------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
 
 +---------------------------------------------------------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :placeholder:`A` |nbsp| :diagtext:`is only available on` |nbsp| :placeholder:`B` |nbsp| :placeholder:`C` |nbsp| :diagtext:`or newer`|
@@ -10314,10 +10771,6 @@
 |:warning:`warning:` |nbsp| :diagtext:`unexpected token in pragma diagnostic`|
 +----------------------------------------------------------------------------+
 
-+----------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`unknown warning group '`:placeholder:`A`:diagtext:`', ignored`|
-+----------------------------------------------------------------------------------------------------+
-
 +-------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`unknown pragma ignored`|
 +-------------------------------------------------------------+
@@ -10376,6 +10829,10 @@
 
 **Diagnostic text:**
 
++----------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`unknown warning group '`:placeholder:`A`:diagtext:`', ignored`|
++----------------------------------------------------------------------------------------------------+
+
 +------------------------------------------------------+---------------------+---------------------------------------------------------+--------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`unknown` |nbsp| |+-------------------+| |nbsp| :diagtext:`option '`:placeholder:`B`:diagtext:`'`|+------------------------------------------------------------+|
 |                                                      ||:diagtext:`warning`||                                                         ||                                                            ||
@@ -10488,6 +10945,36 @@
 +-----------------------------------------------------------------------------------------------------+
 
 
+-Wunsupported-abs
+-----------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++-----------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring '-mabs=2008' option because the '`:placeholder:`A`:diagtext:`' architecture does not support it`|
++-----------------------------------------------------------------------------------------------------------------------------------------------+
+
++-------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring '-mabs=legacy' option because the '`:placeholder:`A`:diagtext:`' architecture does not support it`|
++-------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
+-Wunsupported-availability-guard
+--------------------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++---------------------------+------------------------------------+--------------------------------------------------------------+------------------------------------+---------------------+
+|:warning:`warning:` |nbsp| |+----------------------------------+| |nbsp| :diagtext:`does not guard availability here; use if (`|+----------------------------------+|:diagtext:`) instead`|
+|                           ||:diagtext:`@available`            ||                                                              ||:diagtext:`@available`            ||                     |
+|                           |+----------------------------------+|                                                              |+----------------------------------+|                     |
+|                           ||:diagtext:`\_\_builtin\_available`||                                                              ||:diagtext:`\_\_builtin\_available`||                     |
+|                           |+----------------------------------+|                                                              |+----------------------------------+|                     |
++---------------------------+------------------------------------+--------------------------------------------------------------+------------------------------------+---------------------+
+
+
 -Wunsupported-cb
 ----------------
 This diagnostic is enabled by default.
@@ -10527,6 +11014,21 @@
 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
+-Wunsupported-gpopt
+-------------------
+This diagnostic is enabled by default.
+
+**Diagnostic text:**
+
++--------------------------------------------------------------------------------------------------+-------------------------------------------+----------------------+
+|:warning:`warning:` |nbsp| :diagtext:`ignoring '-mgpopt' option as it cannot be used with` |nbsp| |+-----------------------------------------+|:diagtext:`-mabicalls`|
+|                                                                                                  ||                                         ||                      |
+|                                                                                                  |+-----------------------------------------+|                      |
+|                                                                                                  ||:diagtext:`the implicit usage of` |nbsp| ||                      |
+|                                                                                                  |+-----------------------------------------+|                      |
++--------------------------------------------------------------------------------------------------+-------------------------------------------+----------------------+
+
+
 -Wunsupported-nan
 -----------------
 This diagnostic is enabled by default.
@@ -10585,14 +11087,14 @@
 
 **Diagnostic text:**
 
++-------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`argument '`:placeholder:`A`:diagtext:`' requires profile-guided optimization information`|
++-------------------------------------------------------------------------------------------------------------------------------+
+
 +---------------------------------------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`joined argument expects additional value: '`:placeholder:`A`:diagtext:`'`|
 +---------------------------------------------------------------------------------------------------------------+
 
-+-----------------------------------------------------------------------------------------------------------------------------+
-|:warning:`warning:` |nbsp| :diagtext:`argument '-fdiagnostics-show-hotness' requires profile-guided optimization information`|
-+-----------------------------------------------------------------------------------------------------------------------------+
-
 +----------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :placeholder:`A`:diagtext:`: '`:placeholder:`B`:diagtext:`' input unused`|+--------------------------------------------------------------------+|
 |                                                                                                    ||+------------------------------------------------------------------+||
@@ -10621,6 +11123,10 @@
 |:warning:`warning:` |nbsp| :diagtext:`argument unused during compilation: '`:placeholder:`A`:diagtext:`'`|
 +---------------------------------------------------------------------------------------------------------+
 
++----------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`the flag '`:placeholder:`A`:diagtext:`' has been deprecated and will be ignored`|
++----------------------------------------------------------------------------------------------------------------------+
+
 
 -Wunused-comparison
 -------------------
@@ -10781,6 +11287,21 @@
 +-------------------------------------------------------------------------------------------------------------------------------------------+
 
 
+-Wunused-template
+-----------------
+Also controls `-Wunneeded-internal-declaration`_.
+
+**Diagnostic text:**
+
++-----------------------------------------------------+----------------------+----------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`unused` |nbsp| |+--------------------+| |nbsp| :diagtext:`template` |nbsp| :placeholder:`B`|
+|                                                     ||:diagtext:`function`||                                                    |
+|                                                     |+--------------------+|                                                    |
+|                                                     ||:diagtext:`variable`||                                                    |
+|                                                     |+--------------------+|                                                    |
++-----------------------------------------------------+----------------------+----------------------------------------------------+
+
+
 -Wunused-value
 --------------
 This diagnostic is enabled by default.
@@ -10888,12 +11409,18 @@
 
 -Wvariadic-macros
 -----------------
+Some of the diagnostics controlled by this flag are enabled by default.
+
 **Diagnostic text:**
 
 +--------------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`named variadic macros are a GNU extension`|
 +--------------------------------------------------------------------------------+
 
++-----------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`\_\_VA\_OPT\_\_ can only appear in the expansion of a variadic macro`|
++-----------------------------------------------------------------------------------------------------------+
+
 +------------------------------------------------------------------------+
 |:warning:`warning:` |nbsp| :diagtext:`variadic macros are a C99 feature`|
 +------------------------------------------------------------------------+
@@ -10952,6 +11479,10 @@
 |:warning:`warning:` |nbsp| :diagtext:`parentheses were disambiguated as a function declaration`|
 +-----------------------------------------------------------------------------------------------+
 
++-----------------------------------------------------------------------------------------------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`parentheses were disambiguated as redundant parentheses around declaration of variable named` |nbsp| :placeholder:`A`|
++-----------------------------------------------------------------------------------------------------------------------------------------------------------+
+
 
 -Wvisibility
 ------------
@@ -11037,6 +11568,15 @@
 Synonym for `-Wwritable-strings`_.
 
 
+-Wzero-as-null-pointer-constant
+-------------------------------
+**Diagnostic text:**
+
++--------------------------------------------------------------------+
+|:warning:`warning:` |nbsp| :diagtext:`zero as null pointer constant`|
++--------------------------------------------------------------------+
+
+
 -Wzero-length-array
 -------------------
 **Diagnostic text:**
diff --git a/docs/HardwareAssistedAddressSanitizerDesign.rst b/docs/HardwareAssistedAddressSanitizerDesign.rst
new file mode 100644
index 0000000..71a9df1
--- /dev/null
+++ b/docs/HardwareAssistedAddressSanitizerDesign.rst
@@ -0,0 +1,160 @@
+=======================================================
+Hardware-assisted AddressSanitizer Design Documentation
+=======================================================
+
+This page is a design document for
+**hardware-assisted AddressSanitizer** (or **HWASAN**)
+a tool similar to :doc:`AddressSanitizer`,
+but based on partial hardware assistance.
+
+The document is a draft, suggestions are welcome.
+
+
+Introduction
+============
+
+:doc:`AddressSanitizer`
+tags every 8 bytes of the application memory with a 1 byte tag (using *shadow memory*),
+uses *redzones* to find buffer-overflows and
+*quarantine* to find use-after-free.
+The redzones, the quarantine, and, to a less extent, the shadow, are the
+sources of AddressSanitizer's memory overhead.
+See the `AddressSanitizer paper`_ for details.
+
+AArch64 has the `Address Tagging`_ (or top-byte-ignore, TBI), a hardware feature that allows
+software to use 8 most significant bits of a 64-bit pointer as
+a tag. HWASAN uses `Address Tagging`_
+to implement a memory safety tool, similar to :doc:`AddressSanitizer`,
+but with smaller memory overhead and slightly different (mostly better)
+accuracy guarantees.
+
+Algorithm
+=========
+* Every heap/stack/global memory object is forcibly aligned by `N` bytes
+  (`N` is e.g. 16 or 64). We call `N` the **granularity** of tagging.
+* For every such object a random `K`-bit tag `T` is chosen (`K` is e.g. 4 or 8)
+* The pointer to the object is tagged with `T`.
+* The memory for the object is also tagged with `T`
+  (using a `N=>1` shadow memory)
+* Every load and store is instrumented to read the memory tag and compare it
+  with the pointer tag, exception is raised on tag mismatch.
+
+Instrumentation
+===============
+
+Memory Accesses
+---------------
+All memory accesses are prefixed with an inline instruction sequence that
+verifies the tags. Currently, the following sequence is used:
+
+
+.. code-block:: asm
+
+  // int foo(int *a) { return *a; }
+  // clang -O2 --target=aarch64-linux -fsanitize=hwaddress -c load.c
+  foo:
+       0:	08 dc 44 d3 	ubfx	x8, x0, #4, #52  // shadow address
+       4:	08 01 40 39 	ldrb	w8, [x8]         // load shadow
+       8:	09 fc 78 d3 	lsr	x9, x0, #56      // address tag
+       c:	3f 01 08 6b 	cmp	w9, w8           // compare tags
+      10:	61 00 00 54 	b.ne	#12              // jump on mismatch
+      14:	00 00 40 b9 	ldr	w0, [x0]         // original load
+      18:	c0 03 5f d6 	ret             
+      1c:	40 20 40 d4 	hlt	#0x102           // halt
+
+
+Alternatively, memory accesses are prefixed with a function call.
+
+Heap
+----
+
+Tagging the heap memory/pointers is done by `malloc`.
+This can be based on any malloc that forces all objects to be N-aligned.
+`free` tags the memory with a different tag.
+
+Stack
+-----
+
+Stack frames are instrumented by aligning all non-promotable allocas
+by `N` and tagging stack memory in function prologue and epilogue.
+
+Tags for different allocas in one function are **not** generated
+independently; doing that in a function with `M` allocas would require
+maintaining `M` live stack pointers, significantly increasing register
+pressure. Instead we generate a single base tag value in the prologue,
+and build the tag for alloca number `M` as `ReTag(BaseTag, M)`, where
+ReTag can be as simple as exclusive-or with constant `M`.
+
+Stack instrumentation is expected to be a major source of overhead,
+but could be optional.
+
+Globals
+-------
+
+TODO: details.
+
+Error reporting
+---------------
+
+Errors are generated by the `HLT` instruction and are handled by a signal handler.
+
+Attribute
+---------
+
+HWASAN uses its own LLVM IR Attribute `sanitize_hwaddress` and a matching
+C function attribute. An alternative would be to re-use ASAN's attribute
+`sanitize_address`. The reasons to use a separate attribute are:
+
+  * Users may need to disable ASAN but not HWASAN, or vise versa,
+    because the tools have different trade-offs and compatibility issues.
+  * LLVM (ideally) does not use flags to decide which pass is being used,
+    ASAN or HWASAN are being applied, based on the function attributes.
+
+This does mean that users of HWASAN may need to add the new attribute
+to the code that already uses the old attribute.
+
+
+Comparison with AddressSanitizer
+================================
+
+HWASAN:
+  * Is less portable than :doc:`AddressSanitizer`
+    as it relies on hardware `Address Tagging`_ (AArch64).
+    Address Tagging can be emulated with compiler instrumentation,
+    but it will require the instrumentation to remove the tags before
+    any load or store, which is infeasible in any realistic environment
+    that contains non-instrumented code.
+  * May have compatibility problems if the target code uses higher
+    pointer bits for other purposes.
+  * May require changes in the OS kernels (e.g. Linux seems to dislike
+    tagged pointers passed from address space:
+    https://www.kernel.org/doc/Documentation/arm64/tagged-pointers.txt).
+  * **Does not require redzones to detect buffer overflows**,
+    but the buffer overflow detection is probabilistic, with roughly
+    `(2**K-1)/(2**K)` probability of catching a bug.
+  * **Does not require quarantine to detect heap-use-after-free,
+    or stack-use-after-return**.
+    The detection is similarly probabilistic.
+
+The memory overhead of HWASAN is expected to be much smaller
+than that of AddressSanitizer:
+`1/N` extra memory for the shadow
+and some overhead due to `N`-aligning all objects.
+
+
+Related Work
+============
+* `SPARC ADI`_ implements a similar tool mostly in hardware.
+* `Effective and Efficient Memory Protection Using Dynamic Tainting`_ discusses
+  similar approaches ("lock & key").
+* `Watchdog`_ discussed a heavier, but still somewhat similar
+  "lock & key" approach.
+* *TODO: add more "related work" links. Suggestions are welcome.*
+
+
+.. _Watchdog: http://www.cis.upenn.edu/acg/papers/isca12_watchdog.pdf
+.. _Effective and Efficient Memory Protection Using Dynamic Tainting: https://www.cc.gatech.edu/~orso/papers/clause.doudalis.orso.prvulovic.pdf
+.. _SPARC ADI: https://lazytyped.blogspot.com/2017/09/getting-started-with-adi.html
+.. _AddressSanitizer paper: https://www.usenix.org/system/files/conference/atc12/atc12-final39.pdf
+.. _Address Tagging: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch12s05s01.html
+
diff --git a/docs/InternalsManual.rst b/docs/InternalsManual.rst
index 058c63f..b23d314 100644
--- a/docs/InternalsManual.rst
+++ b/docs/InternalsManual.rst
@@ -1823,7 +1823,7 @@
 handling, requiring extra implementation efforts to ensure the attribute
 appertains to the appropriate subject, etc.
 
-If the attribute should not be propagated from from a template declaration to an
+If the attribute should not be propagated from a template declaration to an
 instantiation of the template, set the ``Clone`` member to 0. By default, all
 attributes will be cloned to template instantiations.
 
diff --git a/docs/JSONCompilationDatabase.rst b/docs/JSONCompilationDatabase.rst
index 8631e83..1f3441b 100644
--- a/docs/JSONCompilationDatabase.rst
+++ b/docs/JSONCompilationDatabase.rst
@@ -91,3 +91,9 @@
 the top of the build directory. Clang tools are pointed to the top of
 the build directory to detect the file and use the compilation database
 to parse C++ code in the source tree.
+
+Alternatives
+============
+For simple projects, Clang tools also recognize a compile_flags.txt file.
+This should contain one flag per line. The same flags will be used to compile
+any file.
diff --git a/docs/LanguageExtensions.rst b/docs/LanguageExtensions.rst
index 652145f..bb9b4c1 100644
--- a/docs/LanguageExtensions.rst
+++ b/docs/LanguageExtensions.rst
@@ -139,6 +139,35 @@
 the same name.  For instance, ``gnu::__const__`` can be used instead of
 ``gnu::const``.
 
+``__has_c_attribute``
+---------------------
+
+This function-like macro takes a single argument that is the name of an
+attribute exposed with the double square-bracket syntax in C mode. The argument
+can either be a single identifier or a scoped identifier. If the attribute is
+supported, a nonzero value is returned. If the attribute is not supported by the
+current compilation target, this macro evaluates to 0. It can be used like this:
+
+.. code-block:: c
+
+  #ifndef __has_c_attribute         // Optional of course.
+    #define __has_c_attribute(x) 0  // Compatibility with non-clang compilers.
+  #endif
+
+  ...
+  #if __has_c_attribute(fallthrough)
+    #define FALLTHROUGH [[fallthrough]]
+  #else
+    #define FALLTHROUGH
+  #endif
+  ...
+
+The attribute identifier (but not scope) can also be specified with a preceding
+and following ``__`` (double underscore) to avoid interference from a macro with
+the same name.  For instance, ``gnu::__const__`` can be used instead of
+``gnu::const``.
+
+
 ``__has_attribute``
 -------------------
 
@@ -436,6 +465,49 @@
 
 See also :ref:`langext-__builtin_shufflevector`, :ref:`langext-__builtin_convertvector`.
 
+Half-Precision Floating Point
+=============================
+
+Clang supports two half-precision (16-bit) floating point types: ``__fp16`` and
+``_Float16``. ``__fp16`` is defined in the ARM C Language Extensions (`ACLE
+<http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053d/IHI0053D_acle_2_1.pdf>`_)
+and ``_Float16`` in ISO/IEC TS 18661-3:2015.
+
+``__fp16`` is a storage and interchange format only. This means that values of
+``__fp16`` promote to (at least) float when used in arithmetic operations.
+There are two ``__fp16`` formats. Clang supports the IEEE 754-2008 format and
+not the ARM alternative format.
+
+ISO/IEC TS 18661-3:2015 defines C support for additional floating point types.
+``_FloatN`` is defined as a binary floating type, where the N suffix denotes
+the number of bits and is 16, 32, 64, or greater and equal to 128 and a
+multiple of 32. Clang supports ``_Float16``. The difference from ``__fp16`` is
+that arithmetic on ``_Float16`` is performed in half-precision, thus it is not
+a storage-only format. ``_Float16`` is available as a source language type in
+both C and C++ mode.
+
+It is recommended that portable code use the ``_Float16`` type because
+``__fp16`` is an ARM C-Language Extension (ACLE), whereas ``_Float16`` is
+defined by the C standards committee, so using ``_Float16`` will not prevent
+code from being ported to architectures other than Arm.  Also, ``_Float16``
+arithmetic and operations will directly map on half-precision instructions when
+they are available (e.g. Armv8.2-A), avoiding conversions to/from
+single-precision, and thus will result in more performant code. If
+half-precision instructions are unavailable, values will be promoted to
+single-precision, similar to the semantics of ``__fp16`` except that the
+results will be stored in single-precision.
+
+In an arithmetic operation where one operand is of ``__fp16`` type and the
+other is of ``_Float16`` type, the ``_Float16`` type is first converted to
+``__fp16`` type and then the operation is completed as if both operands were of
+``__fp16`` type.
+
+To define a ``_Float16`` literal, suffix ``f16`` can be appended to the compile-time
+constant declaration. There is no default argument promotion for ``_Float16``; this
+applies to the standard floating types only. As a consequence, for example, an
+explicit cast is required for printing a ``_Float16`` value (there is no string
+format specifier for ``_Float16``).
+
 Messages on ``deprecated`` and ``unavailable`` Attributes
 =========================================================
 
@@ -1024,6 +1096,11 @@
 * ``__is_constructible`` (MSVC 2013, clang)
 * ``__is_nothrow_constructible`` (MSVC 2013, clang)
 * ``__is_assignable`` (MSVC 2015, clang)
+* ``__reference_binds_to_temporary(T, U)`` (Clang):  Determines whether a
+  reference of type ``T`` bound to an expression of type ``U`` would bind to a
+  materialized temporary object. If ``T`` is not a reference type the result
+  is false. Note this trait will also return false when the initialization of
+  ``T`` from ``U`` is ill-formed.
 
 Blocks
 ======
@@ -1114,12 +1191,14 @@
 
 Clang provides support for :doc:`automated reference counting
 <AutomaticReferenceCounting>` in Objective-C, which eliminates the need
-for manual ``retain``/``release``/``autorelease`` message sends.  There are two
+for manual ``retain``/``release``/``autorelease`` message sends.  There are three
 feature macros associated with automatic reference counting:
 ``__has_feature(objc_arc)`` indicates the availability of automated reference
 counting in general, while ``__has_feature(objc_arc_weak)`` indicates that
 automated reference counting also includes support for ``__weak`` pointers to
-Objective-C objects.
+Objective-C objects. ``__has_feature(objc_arc_fields)`` indicates that C structs
+are allowed to have fields that are pointers to Objective-C objects managed by
+automatic reference counting.
 
 .. _objc-fixed-enum:
 
@@ -2648,3 +2727,11 @@
 * The decision about which section-kind applies to each global is taken in the back-end.
   Once the section-kind is known, appropriate section name, as specified by the user using
   ``#pragma clang section`` directive, is applied to that global.
+
+Specifying Linker Options on ELF Targets
+========================================
+
+The ``#pragma comment(lib, ...)`` directive is supported on all ELF targets.
+The second parameter is the library name (without the traditional Unix prefix of
+``lib``).  This allows you to provide an implicit link of dependent libraries.
+
diff --git a/docs/LibASTMatchersReference.html b/docs/LibASTMatchersReference.html
index b8d4ed5..0a9fcceb0 100644
--- a/docs/LibASTMatchersReference.html
+++ b/docs/LibASTMatchersReference.html
@@ -346,6 +346,15 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('objcCategoryImplDecl0')"><a name="objcCategoryImplDecl0Anchor">objcCategoryImplDecl</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCCategoryImplDecl.html">ObjCCategoryImplDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcCategoryImplDecl0"><pre>Matches Objective-C category definitions.
+
+Example matches Foo (Additions)
+  @implementation Foo (Additions)
+  @end
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('objcImplementationDecl0')"><a name="objcImplementationDecl0Anchor">objcImplementationDecl</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCImplementationDecl.html">ObjCImplementationDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="objcImplementationDecl0"><pre>Matches Objective-C implementation declarations.
 
@@ -691,7 +700,7 @@
 Given
   switch(a) { case 42: break; default: break; }
 caseStmt()
-  matches 'case 42: break;'.
+  matches 'case 42:'.
 </pre></td></tr>
 
 
@@ -732,7 +741,7 @@
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('compoundStmt0')"><a name="compoundStmt0Anchor">compoundStmt</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html">CompoundStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="compoundStmt0"><pre>Matches compound statements.
 
-Example matches '{}' and '{{}}'in 'for (;;) {{}}'
+Example matches '{}' and '{{}}' in 'for (;;) {{}}'
   for (;;) {{}}
 </pre></td></tr>
 
@@ -1019,7 +1028,7 @@
 Given
   switch(a) { case 42: break; default: break; }
 defaultStmt()
-  matches 'default: break;'.
+  matches 'default:'.
 </pre></td></tr>
 
 
@@ -1188,9 +1197,9 @@
 materializeTemporaryExpr() matches 'f()' in these statements
   T u(f());
   g(f());
+  f().func();
 but does not match
   f();
-  f().func();
 </pre></td></tr>
 
 
@@ -1216,6 +1225,24 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcCatchStmt0')"><a name="objcCatchStmt0Anchor">objcCatchStmt</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCAtCatchStmt.html">ObjCAtCatchStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcCatchStmt0"><pre>Matches Objective-C @catch statements.
+
+Example matches @catch
+  @try {}
+  @catch (...) {}
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcFinallyStmt0')"><a name="objcFinallyStmt0Anchor">objcFinallyStmt</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCAtFinallyStmt.html">ObjCAtFinallyStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcFinallyStmt0"><pre>Matches Objective-C @finally statements.
+
+Example matches @finally
+  @try {}
+  @finally {}
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcMessageExpr0')"><a name="objcMessageExpr0Anchor">objcMessageExpr</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="objcMessageExpr0"><pre>Matches ObjectiveC Message invocation expressions.
 
@@ -1227,6 +1254,22 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcThrowStmt0')"><a name="objcThrowStmt0Anchor">objcThrowStmt</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCAtThrowStmt.html">ObjCAtThrowStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcThrowStmt0"><pre>Matches Objective-C statements.
+
+Example matches @throw obj;
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcTryStmt0')"><a name="objcTryStmt0Anchor">objcTryStmt</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCAtTryStmt.html">ObjCAtTryStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcTryStmt0"><pre>Matches Objective-C @try statements.
+
+Example matches @try
+  @try {}
+  @catch (...) {}
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('opaqueValueExpr0')"><a name="opaqueValueExpr0Anchor">opaqueValueExpr</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1OpaqueValueExpr.html">OpaqueValueExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="opaqueValueExpr0"><pre>Matches opaque value expressions. They are used as helpers
 to reference another expressions and can be met
@@ -1326,7 +1369,7 @@
 Given
   switch(a) { case 42: break; default: break; }
 switchCase()
-  matches 'case 42: break;' and 'default: break;'.
+  matches 'case 42:' and 'default:'.
 </pre></td></tr>
 
 
@@ -1883,7 +1926,11 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;</td><td class="name" onclick="toggle('equals2')"><a name="equals2Anchor">equals</a></td><td>ValueT  Value</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;</td><td class="name" onclick="toggle('equals5')"><a name="equals5Anchor">equals</a></td><td>bool Value</td></tr>
+<tr><td colspan="4" class="doc" id="equals5"><pre></pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;</td><td class="name" onclick="toggle('equals2')"><a name="equals2Anchor">equals</a></td><td>const ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals2"><pre>Matches literals that are equal to the given value of type ValueT.
 
 Given
@@ -1909,10 +1956,6 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;</td><td class="name" onclick="toggle('equals5')"><a name="equals5Anchor">equals</a></td><td>bool Value</td></tr>
-<tr><td colspan="4" class="doc" id="equals5"><pre></pre></td></tr>
-
-
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;</td><td class="name" onclick="toggle('equals11')"><a name="equals11Anchor">equals</a></td><td>double Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals11"><pre></pre></td></tr>
 
@@ -2232,6 +2275,16 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;</td><td class="name" onclick="toggle('isArray0')"><a name="isArray0Anchor">isArray</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isArray0"><pre>Matches array new expressions.
+
+Given:
+  MyClass *p1 = new MyClass[10];
+cxxNewExpr(isArray())
+  matches the expression 'new MyClass[10]'.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>&gt;</td><td class="name" onclick="toggle('hasOverloadedOperatorName1')"><a name="hasOverloadedOperatorName1Anchor">hasOverloadedOperatorName</a></td><td>StringRef Name</td></tr>
 <tr><td colspan="4" class="doc" id="hasOverloadedOperatorName1"><pre>Matches overloaded operator names.
 
@@ -2253,6 +2306,15 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;</td><td class="name" onclick="toggle('hasDefinition0')"><a name="hasDefinition0Anchor">hasDefinition</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasDefinition0"><pre>Matches a class declaration that is defined.
+
+Example matches x (matcher = cxxRecordDecl(hasDefinition()))
+class x {};
+class y;
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;</td><td class="name" onclick="toggle('isDerivedFrom1')"><a name="isDerivedFrom1Anchor">isDerivedFrom</a></td><td>std::string BaseName</td></tr>
 <tr><td colspan="4" class="doc" id="isDerivedFrom1"><pre>Overloaded method as shortcut for isDerivedFrom(hasName(...)).
 </pre></td></tr>
@@ -2314,6 +2376,8 @@
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
+or
+  template &lt;typename T&gt; class X {}; class A {}; extern template class X&lt;A&gt;;
 cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
@@ -2346,7 +2410,11 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals3')"><a name="equals3Anchor">equals</a></td><td>ValueT  Value</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals4')"><a name="equals4Anchor">equals</a></td><td>bool Value</td></tr>
+<tr><td colspan="4" class="doc" id="equals4"><pre></pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals3')"><a name="equals3Anchor">equals</a></td><td>const ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals3"><pre>Matches literals that are equal to the given value of type ValueT.
 
 Given
@@ -2372,10 +2440,6 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals4')"><a name="equals4Anchor">equals</a></td><td>bool Value</td></tr>
-<tr><td colspan="4" class="doc" id="equals4"><pre></pre></td></tr>
-
-
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals10')"><a name="equals10Anchor">equals</a></td><td>double Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals10"><pre></pre></td></tr>
 
@@ -2582,6 +2646,15 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1EnumDecl.html">EnumDecl</a>&gt;</td><td class="name" onclick="toggle('isScoped0')"><a name="isScoped0Anchor">isScoped</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isScoped0"><pre>Matches C++11 scoped enum declaration.
+
+Example matches Y (matcher = enumDecl(isScoped()))
+enum X {};
+enum class Y {};
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FieldDecl.html">FieldDecl</a>&gt;</td><td class="name" onclick="toggle('hasBitWidth0')"><a name="hasBitWidth0Anchor">hasBitWidth</a></td><td>unsigned Width</td></tr>
 <tr><td colspan="4" class="doc" id="hasBitWidth0"><pre>Matches non-static data members that are bit-fields of the specified
 bit width.
@@ -2610,7 +2683,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>&gt;</td><td class="name" onclick="toggle('equals1')"><a name="equals1Anchor">equals</a></td><td>ValueT  Value</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>&gt;</td><td class="name" onclick="toggle('equals1')"><a name="equals1Anchor">equals</a></td><td>const ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals1"><pre>Matches literals that are equal to the given value of type ValueT.
 
 Given
@@ -2678,6 +2751,15 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('hasTrailingReturn0')"><a name="hasTrailingReturn0Anchor">hasTrailingReturn</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasTrailingReturn0"><pre>Matches a function declared with a trailing return type.
+
+Example matches Y (matcher = functionDecl(hasTrailingReturn()))
+int X() {}
+auto Y() -&gt; int {}
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isConstexpr1')"><a name="isConstexpr1Anchor">isConstexpr</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isConstexpr1"><pre>Matches constexpr variable and function declarations.
 
@@ -2702,8 +2784,8 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition2')"><a name="isDefinition2Anchor">isDefinition</a></td><td></td></tr>
-<tr><td colspan="4" class="doc" id="isDefinition2"><pre>Matches if a declaration has a body attached.
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition3')"><a name="isDefinition3Anchor">isDefinition</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isDefinition3"><pre>Matches if a declaration has a body attached.
 
 Example matches A, va, fa
   class A {};
@@ -2712,8 +2794,15 @@
   extern int vb;  Doesn't match, as it doesn't define the variable.
   void fa() {}
   void fb();  Doesn't match, as it has no body.
+  @interface X
+  - (void)ma; Doesn't match, interface is declaration.
+  @end
+  @implementation X
+  - (void)ma {}
+  @end
 
-Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;
+Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;,
+  Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMethodDecl.html">ObjCMethodDecl</a>&gt;
 </pre></td></tr>
 
 
@@ -2741,6 +2830,7 @@
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;
 </pre></td></tr>
 
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isExternC0')"><a name="isExternC0Anchor">isExternC</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExternC0"><pre>Matches extern "C" function or variable declarations.
 
@@ -2757,6 +2847,7 @@
   matches the declaration of x and y, but not the declaration of z.
 </pre></td></tr>
 
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isInline1')"><a name="isInline1Anchor">isInline</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isInline1"><pre>Matches function and namespace declarations that are marked with
 the inline keyword.
@@ -2772,6 +2863,20 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isNoReturn0')"><a name="isNoReturn0Anchor">isNoReturn</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isNoReturn0"><pre>Matches FunctionDecls that have a noreturn attribute.
+
+Given
+  void nope();
+  [[noreturn]] void a();
+  __attribute__((noreturn)) void b();
+  struct c { [[noreturn]] c(); };
+functionDecl(isNoReturn())
+  matches all of those except
+  void nope();
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isNoThrow0')"><a name="isNoThrow0Anchor">isNoThrow</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isNoThrow0"><pre>Matches functions that have a non-throwing exception specification.
 
@@ -2810,6 +2915,8 @@
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
+or
+  template &lt;typename T&gt; class X {}; class A {}; extern template class X&lt;A&gt;;
 cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
@@ -2846,11 +2953,11 @@
   void j(int i);
   void k(int x, int y, int z, ...);
 functionDecl(parameterCountIs(2))
-  matches void g(int i, int j) {}
+  matches g and h
 functionProtoType(parameterCountIs(2))
-  matches void h(int i, int j)
+  matches g and h
 functionProtoType(parameterCountIs(3))
-  matches void k(int x, int y, int z, ...);
+  matches k
 </pre></td></tr>
 
 
@@ -2896,15 +3003,19 @@
   void j(int i);
   void k(int x, int y, int z, ...);
 functionDecl(parameterCountIs(2))
-  matches void g(int i, int j) {}
+  matches g and h
 functionProtoType(parameterCountIs(2))
-  matches void h(int i, int j)
+  matches g and h
 functionProtoType(parameterCountIs(3))
-  matches void k(int x, int y, int z, ...);
+  matches k
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals0')"><a name="equals0Anchor">equals</a></td><td>ValueT  Value</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals6')"><a name="equals6Anchor">equals</a></td><td>bool Value</td></tr>
+<tr><td colspan="4" class="doc" id="equals6"><pre></pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals0')"><a name="equals0Anchor">equals</a></td><td>const ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals0"><pre>Matches literals that are equal to the given value of type ValueT.
 
 Given
@@ -2930,10 +3041,6 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals6')"><a name="equals6Anchor">equals</a></td><td>bool Value</td></tr>
-<tr><td colspan="4" class="doc" id="equals6"><pre></pre></td></tr>
-
-
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals13')"><a name="equals13Anchor">equals</a></td><td>double Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals13"><pre></pre></td></tr>
 
@@ -2979,7 +3086,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>&gt;</td><td class="name" onclick="toggle('hasName0')"><a name="hasName0Anchor">hasName</a></td><td>std::string  Name</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>&gt;</td><td class="name" onclick="toggle('hasName0')"><a name="hasName0Anchor">hasName</a></td><td>const std::string  Name</td></tr>
 <tr><td colspan="4" class="doc" id="hasName0"><pre>Matches NamedDecl nodes that have the specified name.
 
 Supports specifying enclosing namespaces or classes by prefixing the name
@@ -3110,6 +3217,37 @@
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMethodDecl.html">ObjCMethodDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition2')"><a name="isDefinition2Anchor">isDefinition</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isDefinition2"><pre>Matches if a declaration has a body attached.
+
+Example matches A, va, fa
+  class A {};
+  class B;  Doesn't match, as it has no body.
+  int va;
+  extern int vb;  Doesn't match, as it doesn't define the variable.
+  void fa() {}
+  void fb();  Doesn't match, as it has no body.
+  @interface X
+  - (void)ma; Doesn't match, interface is declaration.
+  @end
+  @implementation X
+  - (void)ma {}
+  @end
+
+Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;,
+  Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMethodDecl.html">ObjCMethodDecl</a>&gt;
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ParmVarDecl.html">ParmVarDecl</a>&gt;</td><td class="name" onclick="toggle('hasDefaultArgument0')"><a name="hasDefaultArgument0Anchor">hasDefaultArgument</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasDefaultArgument0"><pre>Matches a declaration that has default arguments.
+
+Example matches y (matcher = parmVarDecl(hasDefaultArgument()))
+void x(int val) {}
+void y(int val = 0) {}
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('asString0')"><a name="asString0Anchor">asString</a></td><td>std::string Name</td></tr>
 <tr><td colspan="4" class="doc" id="asString0"><pre>Matches if the matched type is represented by the given string.
 
@@ -3389,8 +3527,15 @@
   extern int vb;  Doesn't match, as it doesn't define the variable.
   void fa() {}
   void fb();  Doesn't match, as it has no body.
+  @interface X
+  - (void)ma; Doesn't match, interface is declaration.
+  @end
+  @implementation X
+  - (void)ma {}
+  @end
 
-Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;
+Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;,
+  Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMethodDecl.html">ObjCMethodDecl</a>&gt;
 </pre></td></tr>
 
 
@@ -3402,7 +3547,7 @@
 representation of that integral value in base 10.
 
 Given
-  template&lt;int T&gt; struct A {};
+  template&lt;int T&gt; struct C {};
   C&lt;42&gt; c;
 classTemplateSpecializationDecl(
   hasAnyTemplateArgument(equalsIntegralValue("42")))
@@ -3414,7 +3559,7 @@
 <tr><td colspan="4" class="doc" id="isIntegral0"><pre>Matches a TemplateArgument that is an integral value.
 
 Given
-  template&lt;int T&gt; struct A {};
+  template&lt;int T&gt; struct C {};
   C&lt;42&gt; c;
 classTemplateSpecializationDecl(
   hasAnyTemplateArgument(isIntegral()))
@@ -3652,8 +3797,15 @@
   extern int vb;  Doesn't match, as it doesn't define the variable.
   void fa() {}
   void fb();  Doesn't match, as it has no body.
+  @interface X
+  - (void)ma; Doesn't match, interface is declaration.
+  @end
+  @implementation X
+  - (void)ma {}
+  @end
 
-Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;
+Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;,
+  Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMethodDecl.html">ObjCMethodDecl</a>&gt;
 </pre></td></tr>
 
 
@@ -3683,6 +3835,7 @@
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;
 </pre></td></tr>
 
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isExternC1')"><a name="isExternC1Anchor">isExternC</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExternC1"><pre>Matches extern "C" function or variable declarations.
 
@@ -3699,6 +3852,7 @@
   matches the declaration of x and y, but not the declaration of z.
 </pre></td></tr>
 
+
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isStaticStorageClass1')"><a name="isStaticStorageClass1Anchor">isStaticStorageClass</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isStaticStorageClass1"><pre>Matches variablefunction declarations that have "static" storage
 class specifier ("static" keyword) written in the source.
@@ -3723,6 +3877,8 @@
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
+or
+  template &lt;typename T&gt; class X {}; class A {}; extern template class X&lt;A&gt;;
 cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
@@ -3832,10 +3988,11 @@
 <tr><td colspan="4" class="doc" id="forEachDescendant0"><pre>Matches AST nodes that have descendant AST nodes that match the
 provided matcher.
 
-Example matches X, A, B, C
+Example matches X, A, A::X, B, B::C, B::C::X
   (matcher = cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X")))))
-  class X {};  Matches X, because X::X is a class of name X inside X.
-  class A { class X {}; };
+  class X {};
+  class A { class X {}; };  Matches A, because A::X is a class of name
+                            X inside A.
   class B { class C { class X {}; }; };
 
 DescendantT must be an AST base type.
@@ -3858,10 +4015,11 @@
 <tr><td colspan="4" class="doc" id="forEach0"><pre>Matches AST nodes that have child AST nodes that match the
 provided matcher.
 
-Example matches X, Y
+Example matches X, Y, Y::X, Z::Y, Z::Y::X
   (matcher = cxxRecordDecl(forEach(cxxRecordDecl(hasName("X")))
-  class X {};  Matches X, because X::X is a class of name X inside X.
-  class Y { class X {}; };
+  class X {};
+  class Y { class X {}; };  Matches Y, because Y::X is a class of name X
+                            inside Y.
   class Z { class Y { class X {}; }; };  Does not match Z.
 
 ChildT must be an AST base type.
@@ -3964,7 +4122,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration15')"><a name="hasDeclaration15Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration15')"><a name="hasDeclaration15Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration15"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -3975,8 +4133,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -4099,7 +4266,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1BinaryOperator.html">BinaryOperator</a>&gt;</td><td class="name" onclick="toggle('hasEitherOperand0')"><a name="hasEitherOperand0Anchor">hasEitherOperand</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1BinaryOperator.html">BinaryOperator</a>&gt;</td><td class="name" onclick="toggle('hasEitherOperand0')"><a name="hasEitherOperand0Anchor">hasEitherOperand</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasEitherOperand0"><pre>Matches if either the left hand side or the right hand side of a
 binary operator matches.
 </pre></td></tr>
@@ -4196,7 +4363,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration13')"><a name="hasDeclaration13Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration13')"><a name="hasDeclaration13Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration13"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -4207,8 +4374,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -4320,7 +4496,7 @@
 Example matches y.x()
   (matcher = cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("Y"))))))
   class Y { public: void x(); };
-  void z() { Y y; y.x(); }",
+  void z() { Y y; y.x(); }
 
 FIXME: Overload to allow directly matching types?
 </pre></td></tr>
@@ -4380,7 +4556,17 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration12')"><a name="hasDeclaration12Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;</td><td class="name" onclick="toggle('hasArraySize0')"><a name="hasArraySize0Anchor">hasArraySize</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
+<tr><td colspan="4" class="doc" id="hasArraySize0"><pre>Matches array new expressions with a given array size.
+
+Given:
+  MyClass *p1 = new MyClass[10];
+cxxNewExpr(hasArraySize(intgerLiteral(equals(10))))
+  matches the expression 'new MyClass[10]'.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration12')"><a name="hasDeclaration12Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration12"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -4391,8 +4577,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -4515,7 +4710,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration14')"><a name="hasDeclaration14Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration14')"><a name="hasDeclaration14Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration14"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -4526,8 +4721,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -4551,7 +4755,18 @@
 
 
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CastExpr.html">CastExpr</a>&gt;</td><td class="name" onclick="toggle('hasSourceExpression0')"><a name="hasSourceExpression0Anchor">hasSourceExpression</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
-<tr><td colspan="4" class="doc" id="hasSourceExpression0"><pre></pre></td></tr>
+<tr><td colspan="4" class="doc" id="hasSourceExpression0"><pre>Matches if the cast's source expression
+or opaque value's source expression matches the given matcher.
+
+Example 1: matches "a string"
+(matcher = castExpr(hasSourceExpression(cxxConstructExpr())))
+class URL { URL(string); };
+URL url = "a string";
+
+Example 2: matches 'b' (matcher =
+opaqueValueExpr(hasSourceExpression(implicitCastExpr(declRefExpr())))
+int a = b ?: 1;
+</pre></td></tr>
 
 
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1ClassTemplateSpecializationDecl.html">ClassTemplateSpecializationDecl</a>&gt;</td><td class="name" onclick="toggle('hasAnyTemplateArgument0')"><a name="hasAnyTemplateArgument0Anchor">hasAnyTemplateArgument</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateArgument.html">TemplateArgument</a>&gt; InnerMatcher</td></tr>
@@ -4597,7 +4812,7 @@
   A&lt;bool, int&gt; b;
   A&lt;int, bool&gt; c;
 
-  template&lt;typename T&gt; f() {};
+  template&lt;typename T&gt; void f() {}
   void func() { f&lt;int&gt;(); };
 classTemplateSpecializationDecl(hasTemplateArgument(
     1, refersToType(asString("int"))))
@@ -4656,7 +4871,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration11')"><a name="hasDeclaration11Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration11')"><a name="hasDeclaration11Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration11"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -4667,8 +4882,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -4820,7 +5044,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1EnumType.html">EnumType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration10')"><a name="hasDeclaration10Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1EnumType.html">EnumType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration10')"><a name="hasDeclaration10Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration10"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -4831,8 +5055,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5096,7 +5329,7 @@
   A&lt;bool, int&gt; b;
   A&lt;int, bool&gt; c;
 
-  template&lt;typename T&gt; f() {};
+  template&lt;typename T&gt; void f() {}
   void func() { f&lt;int&gt;(); };
 classTemplateSpecializationDecl(hasTemplateArgument(
     1, refersToType(asString("int"))))
@@ -5168,7 +5401,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1InjectedClassNameType.html">InjectedClassNameType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration9')"><a name="hasDeclaration9Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1InjectedClassNameType.html">InjectedClassNameType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration9')"><a name="hasDeclaration9Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration9"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5179,8 +5412,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5192,7 +5434,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1LabelStmt.html">LabelStmt</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration8')"><a name="hasDeclaration8Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1LabelStmt.html">LabelStmt</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration8')"><a name="hasDeclaration8Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration8"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5203,8 +5445,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5216,7 +5467,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration7')"><a name="hasDeclaration7Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration7')"><a name="hasDeclaration7Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration7"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5227,8 +5478,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5397,7 +5657,18 @@
 
 
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1OpaqueValueExpr.html">OpaqueValueExpr</a>&gt;</td><td class="name" onclick="toggle('hasSourceExpression1')"><a name="hasSourceExpression1Anchor">hasSourceExpression</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
-<tr><td colspan="4" class="doc" id="hasSourceExpression1"><pre></pre></td></tr>
+<tr><td colspan="4" class="doc" id="hasSourceExpression1"><pre>Matches if the cast's source expression
+or opaque value's source expression matches the given matcher.
+
+Example 1: matches "a string"
+(matcher = castExpr(hasSourceExpression(cxxConstructExpr())))
+class URL { URL(string); };
+URL url = "a string";
+
+Example 2: matches 'b' (matcher =
+opaqueValueExpr(hasSourceExpression(implicitCastExpr(declRefExpr())))
+int a = b ?: 1;
+</pre></td></tr>
 
 
 <tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1OverloadExpr.html">OverloadExpr</a>&gt;</td><td class="name" onclick="toggle('hasAnyDeclaration0')"><a name="hasAnyDeclaration0Anchor">hasAnyDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt; InnerMatcher</td></tr>
@@ -5476,7 +5747,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration6')"><a name="hasDeclaration6Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration6')"><a name="hasDeclaration6Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration6"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5487,8 +5758,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5548,7 +5828,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordType.html">RecordType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration5')"><a name="hasDeclaration5Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordType.html">RecordType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration5')"><a name="hasDeclaration5Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration5"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5559,8 +5839,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5629,7 +5918,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('alignOfExpr0')"><a name="alignOfExpr0Anchor">alignOfExpr</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('alignOfExpr0')"><a name="alignOfExpr0Anchor">alignOfExpr</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="alignOfExpr0"><pre>Same as unaryExprOrTypeTraitExpr, but only matching
 alignof.
 </pre></td></tr>
@@ -5649,7 +5938,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('sizeOfExpr0')"><a name="sizeOfExpr0Anchor">sizeOfExpr</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('sizeOfExpr0')"><a name="sizeOfExpr0Anchor">sizeOfExpr</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="sizeOfExpr0"><pre>Same as unaryExprOrTypeTraitExpr, but only matching
 sizeof.
 </pre></td></tr>
@@ -5691,7 +5980,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagType.html">TagType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration4')"><a name="hasDeclaration4Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TagType.html">TagType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration4')"><a name="hasDeclaration4Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration4"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5702,8 +5991,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5719,8 +6017,8 @@
 <tr><td colspan="4" class="doc" id="isExpr0"><pre>Matches a sugar TemplateArgument that refers to a certain expression.
 
 Given
-  template&lt;typename T&gt; struct A {};
-  struct B { B* next; };
+  struct B { int next; };
+  template&lt;int(B::*next_ptr)&gt; struct A {};
   A&lt;&amp;B::next&gt; a;
 templateSpecializationType(hasAnyTemplateArgument(
   isExpr(hasDescendant(declRefExpr(to(fieldDecl(hasName("next"))))))))
@@ -5734,11 +6032,11 @@
 declaration.
 
 Given
-  template&lt;typename T&gt; struct A {};
-  struct B { B* next; };
+  struct B { int next; };
+  template&lt;int(B::*next_ptr)&gt; struct A {};
   A&lt;&amp;B::next&gt; a;
 classTemplateSpecializationDecl(hasAnyTemplateArgument(
-    refersToDeclaration(fieldDecl(hasName("next"))))
+    refersToDeclaration(fieldDecl(hasName("next")))))
   matches the specialization A&lt;&amp;B::next&gt; with fieldDecl(...) matching
     B::next
 </pre></td></tr>
@@ -5748,7 +6046,7 @@
 <tr><td colspan="4" class="doc" id="refersToIntegralType0"><pre>Matches a TemplateArgument that referes to an integral type.
 
 Given
-  template&lt;int T&gt; struct A {};
+  template&lt;int T&gt; struct C {};
   C&lt;42&gt; c;
 classTemplateSpecializationDecl(
   hasAnyTemplateArgument(refersToIntegralType(asString("int"))))
@@ -5804,7 +6102,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration3')"><a name="hasDeclaration3Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration3')"><a name="hasDeclaration3Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration3"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5815,8 +6113,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5837,7 +6144,7 @@
   A&lt;bool, int&gt; b;
   A&lt;int, bool&gt; c;
 
-  template&lt;typename T&gt; f() {};
+  template&lt;typename T&gt; void f() {}
   void func() { f&lt;int&gt;(); };
 classTemplateSpecializationDecl(hasTemplateArgument(
     1, refersToType(asString("int"))))
@@ -5848,7 +6155,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateTypeParmType.html">TemplateTypeParmType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration2')"><a name="hasDeclaration2Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateTypeParmType.html">TemplateTypeParmType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration2')"><a name="hasDeclaration2Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration2"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5859,8 +6166,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5872,7 +6188,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;T&gt;</td><td class="name" onclick="toggle('findAll0')"><a name="findAll0Anchor">findAll</a></td><td>Matcher&lt;T&gt;  Matcher</td></tr>
+<tr><td>Matcher&lt;T&gt;</td><td class="name" onclick="toggle('findAll0')"><a name="findAll0Anchor">findAll</a></td><td>const Matcher&lt;T&gt;  Matcher</td></tr>
 <tr><td colspan="4" class="doc" id="findAll0"><pre>Matches if the node or any descendant matches.
 
 Generates results for each match.
@@ -5901,7 +6217,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration1')"><a name="hasDeclaration1Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration1')"><a name="hasDeclaration1Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration1"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5912,8 +6228,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
@@ -5932,7 +6257,7 @@
 For example, in:
   class A {};
   using B = A;
-The matcher type(hasUniqualifeidDesugaredType(recordType())) matches
+The matcher type(hasUnqualifeidDesugaredType(recordType())) matches
 both B and A.
 </pre></td></tr>
 
@@ -5956,7 +6281,7 @@
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnresolvedUsingType.html">UnresolvedUsingType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration0')"><a name="hasDeclaration0Anchor">hasDeclaration</a></td><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
+<tr><td>Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1UnresolvedUsingType.html">UnresolvedUsingType</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration0')"><a name="hasDeclaration0Anchor">hasDeclaration</a></td><td>const Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration0"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
 
@@ -5967,8 +6292,17 @@
 - for CXXConstructExpr, the declaration of the constructor
 - for CXXNewExpr, the declaration of the operator new
 
-Also usable as Matcher&lt;T&gt; for any T supporting the getDecl() member
-function. e.g. various subtypes of clang::Type and various expressions.
+For type nodes, hasDeclaration will generally match the declaration of the
+sugared type. Given
+  class X {};
+  typedef X Y;
+  Y y;
+in varDecl(hasType(hasDeclaration(decl()))) the decl will match the
+typedefDecl. A common use case is to match the underlying, desugared type.
+This can be achieved by using the hasUnqualifiedDesugaredType matcher:
+  varDecl(hasType(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(decl())))))
+In this matcher, the decl will match the CXXRecordDecl of class X.
 
 Usable as: Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1AddrLabelExpr.html">AddrLabelExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>&gt;,
   Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;, Matcher&lt;<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;,
diff --git a/docs/Modules.rst b/docs/Modules.rst
index 757be61..493c54d 100644
--- a/docs/Modules.rst
+++ b/docs/Modules.rst
@@ -430,6 +430,21 @@
 cplusplus11
   C++11 support is available.
 
+cplusplus14
+  C++14 support is available.
+
+cplusplus17
+  C++17 support is available.
+
+c99
+  C99 support is available.
+
+c11
+  C11 support is available.
+
+c17
+  C17 support is available.
+
 freestanding
   A freestanding environment is available.
 
@@ -859,10 +874,12 @@
 
   module Foo {
     header "Foo.h"
-    
-    explicit module Private {
-      header "Foo_Private.h"
-    }
+    ...
+  }
+
+  module Foo_Private {
+    header "Foo_Private.h"
+    ...
   }
 
 
@@ -873,7 +890,7 @@
 
 Private module map files, which are named ``module.private.modulemap``
 (or, for backward compatibility, ``module_private.map``), allow one to
-augment the primary module map file with an additional submodule. For
+augment the primary module map file with an additional modules. For
 example, we would split the module map file above into two module map
 files:
 
@@ -883,9 +900,9 @@
   module Foo {
     header "Foo.h"
   }
-  
+
   /* module.private.modulemap */
-  explicit module Foo.Private {
+  module Foo_Private {
     header "Foo_Private.h"
   }
 
@@ -899,13 +916,12 @@
 
 When writing a private module as part of a *framework*, it's recommended that:
 
-* Headers for this module are present in the ``PrivateHeaders``
-  framework subdirectory.
-* The private module is defined as a *submodule* of the public framework (if
-  there's one), similar to how ``Foo.Private`` is defined in the example above.
-* The ``explicit`` keyword should be used to guarantee that its content will
-  only be available when the submodule itself is explicitly named (through a
-  ``@import`` for example).
+* Headers for this module are present in the ``PrivateHeaders`` framework
+  subdirectory.
+* The private module is defined as a *top level module* with the name of the
+  public framework prefixed, like ``Foo_Private`` above. Clang has extra logic
+  to work with this naming, using ``FooPrivate`` or ``Foo.Private`` (submodule)
+  trigger warnings and might not work as expected.
 
 Modularizing a Platform
 =======================
diff --git a/docs/OpenMPSupport.rst b/docs/OpenMPSupport.rst
new file mode 100644
index 0000000..f340494
--- /dev/null
+++ b/docs/OpenMPSupport.rst
@@ -0,0 +1,68 @@
+.. raw:: html
+
+  <style type="text/css">
+    .none { background-color: #FFCCCC }
+    .partial { background-color: #FFFF99 }
+    .good { background-color: #CCFF99 }
+  </style>
+
+.. role:: none
+.. role:: partial
+.. role:: good
+
+==================
+OpenMP Support
+==================
+
+Clang fully supports OpenMP 3.1 + some elements of OpenMP 4.5. Clang supports offloading to X86_64, AArch64 and PPC64[LE] devices.
+Support for Cuda devices is not ready yet.
+The status of major OpenMP 4.5 features support in Clang.
+
+Standalone directives
+=====================
+
+* #pragma omp [for] simd: :good:`Complete`.
+
+* #pragma omp declare simd: :partial:`Partial`.  We support parsing/semantic
+  analysis + generation of special attributes for X86 target, but still
+  missing the LLVM pass for vectorization.
+
+* #pragma omp taskloop [simd]: :good:`Complete`.
+
+* #pragma omp target [enter|exit] data: :good:`Complete`.
+
+* #pragma omp target update: :good:`Complete`.
+
+* #pragma omp target: :good:`Complete`.
+
+* #pragma omp declare target: :partial:`Partial`.  No full codegen support.
+
+* #pragma omp teams: :good:`Complete`.
+
+* #pragma omp distribute [simd]: :good:`Complete`.
+
+* #pragma omp distribute parallel for [simd]: :good:`Complete`.
+
+Combined directives
+===================
+
+* #pragma omp parallel for simd: :good:`Complete`.
+
+* #pragma omp target parallel: :good:`Complete`.
+
+* #pragma omp target parallel for [simd]: :good:`Complete`.
+
+* #pragma omp target simd: :good:`Complete`.
+
+* #pragma omp target teams: :good:`Complete`.
+
+* #pragma omp teams distribute [simd]: :good:`Complete`.
+
+* #pragma omp target teams distribute [simd]: :good:`Complete`.
+
+* #pragma omp teams distribute parallel for [simd]: :good:`Complete`.
+
+* #pragma omp target teams distribute parallel for [simd]: :good:`Complete`.
+
+Clang does not support any constructs/updates from upcoming OpenMP 5.0 except for `reduction`-based clauses in the `task` and `target`-based directives.
+In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and mac OS.
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 293144a..b29cd06 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 =======================================
-Clang 6.0.0 (In-Progress) Release Notes
+Clang 7.0.0 (In-Progress) Release Notes
 =======================================
 
 .. contents::
@@ -10,7 +10,7 @@
 
 .. warning::
 
-   These are in-progress notes for the upcoming Clang 6 release.
+   These are in-progress notes for the upcoming Clang 7 release.
    Release notes for previous releases can be found on
    `the Download Page <http://releases.llvm.org/download.html>`_.
 
@@ -18,7 +18,7 @@
 ============
 
 This document contains the release notes for the Clang C/C++/Objective-C
-frontend, part of the LLVM Compiler Infrastructure, release 6.0.0. Here we
+frontend, part of the LLVM Compiler Infrastructure, release 7.0.0. Here we
 describe the status of Clang in some detail, including major
 improvements from the previous release and new feature work. For the
 general LLVM release notes, see `the LLVM
@@ -35,7 +35,7 @@
 the current one. To see the release notes for a specific release, please
 see the `releases page <http://llvm.org/releases/>`_.
 
-What's New in Clang 6.0.0?
+What's New in Clang 7.0.0?
 ==========================
 
 Some of the major new features and improvements to Clang are listed
@@ -51,57 +51,17 @@
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- ``-Wpragma-pack`` is a new warning that warns in the following cases:
-
-  - When a translation unit is missing terminating ``#pragma pack (pop)``
-    directives.
-
-  - When leaving an included file that changes the current alignment value,
-    i.e. when the alignment before ``#include`` is different to the alignment
-    after ``#include``.
-
-  - ``-Wpragma-pack-suspicious-include`` (disabled by default) warns on an
-    ``#include`` when the included file contains structures or unions affected by
-    a non-default alignment that has been specified using a ``#pragma pack``
-    directive prior to the ``#include``.
-
-- ``-Wobjc-messaging-id`` is a new, non-default warning that warns about
-  message sends to unqualified ``id`` in Objective-C. This warning is useful
-  for projects that would like to avoid any potential future compiler
-  errors/warnings, as the system frameworks might add a method with the same
-  selector which could make the message send to ``id`` ambiguous.
-
-- ``-Wtautological-compare`` now warns when comparing an unsigned integer and 0
-  regardless of whether the constant is signed or unsigned."
-
-- ``-Wtautological-compare`` now warns about comparing a signed integer and 0
-  when the signed integer is coerced to an unsigned type for the comparison.
-  ``-Wsign-compare`` was adjusted not to warn in this case.
-
-- ``-Wtautological-constant-compare`` is a new warning that warns on
-  tautological comparisons between integer variable of the type ``T`` and the
-  largest/smallest possible integer constant of that same type.
-
-- ``-Wnull-pointer-arithmetic`` now warns about performing pointer arithmetic
-  on a null pointer. Such pointer arithmetic has an undefined behavior if the
-  offset is nonzero. It also now warns about arithmetic on a null pointer
-  treated as a cast from integer to pointer (GNU extension).
+- ...
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
-- Bitrig OS was merged back into OpenBSD, so Bitrig support has been
-  removed from Clang/LLVM.
-
-- The default value of _MSC_VER was raised from 1800 to 1911, making it
-  compatible with the Visual Studio 2015 and 2017 C++ standard library headers.
-  Users should generally expect this to be regularly raised to match the most
-  recently released version of the Visual C++ compiler.
+- ...
 
 New Compiler Flags
 ------------------
 
-- --autocomplete was implemented to obtain a list of flags and its arguments. This is used for shell autocompletion.
+- ...
 
 Deprecated Compiler Flags
 -------------------------
@@ -120,9 +80,12 @@
 Attribute Changes in Clang
 --------------------------
 
-- The presence of __attribute__((availability(...))) on a declaration no longer
-  implies default visibility for that declaration on macOS.
-
+- Clang now supports function multiversioning with attribute 'target' on ELF
+  based x86/x86-64 environments by using indirect functions. This implementation
+  has a few minor limitations over the GCC implementation for the sake of AST
+  sanity, however it is otherwise compatible with existing code using this
+  feature for GCC. Consult the documentation for the target attribute for more
+  information.
 - ...
 
 Windows Support
@@ -146,7 +109,7 @@
 C++ Language Changes in Clang
 -----------------------------
 
-...
+- ...
 
 C++1z Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
@@ -166,12 +129,12 @@
 OpenMP Support in Clang
 ----------------------------------
 
-...
+- ...
 
 Internal API Changes
 --------------------
 
-These are major API changes that have happened since the 4.0.0 release of
+These are major API changes that have happened since the 6.0.0 release of
 Clang. If upgrading an external codebase that uses Clang as a library,
 this section should help get you past the largest hurdles of upgrading.
 
@@ -180,54 +143,12 @@
 AST Matchers
 ------------
 
-The hasDeclaration matcher now works the same for Type and QualType and only
-ever looks through one level of sugaring in a limited number of cases.
-
-There are two main patterns affected by this:
-
--  qualType(hasDeclaration(recordDecl(...))): previously, we would look through
-   sugar like TypedefType to get at the underlying recordDecl; now, we need
-   to explicitly remove the sugaring:
-   qualType(hasUnqualifiedDesugaredType(hasDeclaration(recordDecl(...))))
-
--  hasType(recordDecl(...)): hasType internally uses hasDeclaration; previously,
-   this matcher used to match for example TypedefTypes of the RecordType, but
-   after the change they don't; to fix, use:
-
-::
-   hasType(hasUnqualifiedDesugaredType(
-       recordType(hasDeclaration(recordDecl(...)))))
-
--  templateSpecializationType(hasDeclaration(classTemplateDecl(...))):
-   previously, we would directly match the underlying ClassTemplateDecl;
-   now, we can explicitly match the ClassTemplateSpecializationDecl, but that
-   requires to explicitly get the ClassTemplateDecl:
-
-::
-   templateSpecializationType(hasDeclaration(
-       classTemplateSpecializationDecl(
-           hasSpecializedTemplate(classTemplateDecl(...)))))
+- ...
 
 clang-format
 ------------
 
-* Option *IndentPPDirectives* added to indent preprocessor directives on
-  conditionals.
-
-  +----------------------+----------------------+
-  | Before               | After                |
-  +======================+======================+
-  |  .. code-block:: c++ | .. code-block:: c++  |
-  |                      |                      |
-  |    #if FOO           |   #if FOO            |
-  |    #if BAR           |   #  if BAR          |
-  |    #include <foo>    |   #    include <foo> |
-  |    #endif            |   #  endif           |
-  |    #endif            |   #endif             |
-  +----------------------+----------------------+
-
-* Option -verbose added to the command line.
-  Shows the list of processed files.
+- ...
 
 libclang
 --------
@@ -238,15 +159,14 @@
 Static Analyzer
 ---------------
 
+- ...
+
 ...
 
 Undefined Behavior Sanitizer (UBSan)
 ------------------------------------
 
-* A minimal runtime is now available. It is suitable for use in production
-  environments, and has a small attack surface. It only provides very basic
-  issue logging and deduplication, and does not support ``-fsanitize=vptr``
-  checking.
+* ...
 
 Core Analysis Improvements
 ==========================
diff --git a/docs/ThinLTO.rst b/docs/ThinLTO.rst
index 607121f..38873f4 100644
--- a/docs/ThinLTO.rst
+++ b/docs/ThinLTO.rst
@@ -156,7 +156,7 @@
 Possible key-value pairs are:
 
 - ``cache_size=X%``: The maximum size for the cache directory is ``X`` percent
-  of the available space on the the disk. Set to 100 to indicate no limit,
+  of the available space on the disk. Set to 100 to indicate no limit,
   50 to indicate that the cache size will not be left over half the available
   disk space. A value over 100 is invalid. A value of 0 disables the percentage
   size-based pruning. The default is 75%.
@@ -173,6 +173,10 @@
   ``cache_size_bytes=1g`` on its own will cause both the 1GB and default 75%
   policies to be applied unless the default ``cache_size`` is overridden.
 
+- ``cache_size_files=X``:
+  Set the maximum number of files in the cache directory. Set to 0 to indicate
+  no limit. The default is 1000000 files.
+
 - ``prune_after=Xs``, ``prune_after=Xm``, ``prune_after=Xh``: Sets the
   expiration time for cache files to ``X`` seconds (or minutes, hours
   respectively).  When a file hasn't been accessed for ``prune_after`` seconds,
diff --git a/docs/Toolchain.rst b/docs/Toolchain.rst
index e727ccd..06bde35 100644
--- a/docs/Toolchain.rst
+++ b/docs/Toolchain.rst
@@ -106,7 +106,7 @@
 Clang can either use LLVM's integrated assembler or an external system-specific
 tool (for instance, the GNU Assembler on GNU OSes) to produce machine code from
 assembly.
-By default, Clang uses LLVM's integrataed assembler on all targets where it is
+By default, Clang uses LLVM's integrated assembler on all targets where it is
 supported. If you wish to use the system assember instead, use the
 ``-fno-integrated-as`` option.
 
diff --git a/docs/UndefinedBehaviorSanitizer.rst b/docs/UndefinedBehaviorSanitizer.rst
index 0a08a41..e9f85c2 100644
--- a/docs/UndefinedBehaviorSanitizer.rst
+++ b/docs/UndefinedBehaviorSanitizer.rst
@@ -124,8 +124,8 @@
   -  ``-fsanitize=signed-integer-overflow``: Signed integer overflow,
      including all the checks added by ``-ftrapv``, and checking for
      overflow in signed division (``INT_MIN / -1``).
-  -  ``-fsanitize=unreachable``: If control flow reaches
-     ``__builtin_unreachable``.
+  -  ``-fsanitize=unreachable``: If control flow reaches an unreachable
+     program point.
   -  ``-fsanitize=unsigned-integer-overflow``: Unsigned integer
      overflows. Note that unlike signed integer overflow, unsigned integer
      is not undefined behavior. However, while it has well-defined semantics,
diff --git a/docs/UsersManual.rst b/docs/UsersManual.rst
index 2757ced..f19aacc 100644
--- a/docs/UsersManual.rst
+++ b/docs/UsersManual.rst
@@ -694,6 +694,79 @@
 option tells Clang to put double-quotes around the entire filename, which
 is the convention used by NMake and Jom.
 
+Configuration files
+-------------------
+
+Configuration files group command-line options and allow all of them to be
+specified just by referencing the configuration file. They may be used, for
+example, to collect options required to tune compilation for particular
+target, such as -L, -I, -l, --sysroot, codegen options, etc.
+
+The command line option `--config` can be used to specify configuration
+file in a Clang invocation. For example:
+
+::
+
+    clang --config /home/user/cfgs/testing.txt
+    clang --config debug.cfg
+
+If the provided argument contains a directory separator, it is considered as
+a file path, and options are read from that file. Otherwise the argument is
+treated as a file name and is searched for sequentially in the directories:
+
+    - user directory,
+    - system directory,
+    - the directory where Clang executable resides.
+
+Both user and system directories for configuration files are specified during
+clang build using CMake parameters, CLANG_CONFIG_FILE_USER_DIR and
+CLANG_CONFIG_FILE_SYSTEM_DIR respectively. The first file found is used. It is
+an error if the required file cannot be found.
+
+Another way to specify a configuration file is to encode it in executable name.
+For example, if the Clang executable is named `armv7l-clang` (it may be a
+symbolic link to `clang`), then Clang will search for file `armv7l.cfg` in the
+directory where Clang resides.
+
+If a driver mode is specified in invocation, Clang tries to find a file specific
+for the specified mode. For example, if the executable file is named
+`x86_64-clang-cl`, Clang first looks for `x86_64-cl.cfg` and if it is not found,
+looks for `x86_64.cfg`.
+
+If the command line contains options that effectively change target architecture
+(these are -m32, -EL, and some others) and the configuration file starts with an
+architecture name, Clang tries to load the configuration file for the effective
+architecture. For example, invocation:
+
+::
+
+    x86_64-clang -m32 abc.c
+
+causes Clang search for a file `i368.cfg` first, and if no such file is found,
+Clang looks for the file `x86_64.cfg`.
+
+The configuration file consists of command-line options specified on one or
+more lines. Lines composed of whitespace characters only are ignored as well as
+lines in which the first non-blank character is `#`. Long options may be split
+between several lines by a trailing backslash. Here is example of a
+configuration file:
+
+::
+
+    # Several options on line
+    -c --target=x86_64-unknown-linux-gnu
+
+    # Long option split between lines
+    -I/usr/lib/gcc/x86_64-linux-gnu/5.4.0/../../../../\
+    include/c++/5.4.0
+
+    # other config files may be included
+    @linux.options
+
+Files included by `@file` directives in configuration files are resolved
+relative to the including file. For example, if a configuration file
+`~/.llvm/target.cfg` contains the directive `@os/linux.opts`, the file
+`linux.opts` is searched for in the directory `~/.llvm/os`.
 
 Language and Target-Independent Features
 ========================================
@@ -1002,7 +1075,7 @@
 Building a relocatable precompiled header requires two additional
 arguments. First, pass the ``--relocatable-pch`` flag to indicate that
 the resulting PCH file should be relocatable. Second, pass
-`-isysroot /path/to/build`, which makes all includes for your library
+``-isysroot /path/to/build``, which makes all includes for your library
 relative to the build directory. For example:
 
 .. code-block:: console
@@ -1012,9 +1085,9 @@
 When loading the relocatable PCH file, the various headers used in the
 PCH file are found from the system header root. For example, ``mylib.h``
 can be found in ``/usr/include/mylib.h``. If the headers are installed
-in some other system root, the `-isysroot` option can be used provide
+in some other system root, the ``-isysroot`` option can be used provide
 a different system root from which the headers will be based. For
-example, `-isysroot /Developer/SDKs/MacOSX10.4u.sdk` will look for
+example, ``-isysroot /Developer/SDKs/MacOSX10.4u.sdk`` will look for
 ``mylib.h`` in ``/Developer/SDKs/MacOSX10.4u.sdk/usr/include/mylib.h``.
 
 Relocatable precompiled headers are intended to be used in a limited
@@ -1147,6 +1220,11 @@
    the behavior of sanitizers in the ``cfi`` group to allow checking
    of cross-DSO virtual and indirect calls.
 
+.. option:: -fsanitize-cfi-icall-generalize-pointers
+
+   Generalize pointers in return and argument types in function type signatures
+   checked by Control Flow Integrity indirect call checking. See
+   :doc:`ControlFlowIntegrity` for more details.
 
 .. option:: -fstrict-vtable-pointers
 
@@ -1297,7 +1375,8 @@
 Profile information enables better optimization. For example, knowing that a
 branch is taken very frequently helps the compiler make better decisions when
 ordering basic blocks. Knowing that a function ``foo`` is called more
-frequently than another function ``bar`` helps the inliner.
+frequently than another function ``bar`` helps the inliner. Optimization
+levels ``-O2`` and above are recommended for use of profile guided optimization.
 
 Clang supports profile guided optimization with two different kinds of
 profiling. A sampling profiler can generate a profile with very low runtime
@@ -1776,6 +1855,27 @@
   must come first.)
 
 
+Controlling LLVM IR Output
+--------------------------
+
+Controlling Value Names in LLVM IR
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Emitting value names in LLVM IR increases the size and verbosity of the IR.
+By default, value names are only emitted in assertion-enabled builds of Clang.
+However, when reading IR it can be useful to re-enable the emission of value
+names to improve readability.
+
+.. option:: -fdiscard-value-names
+
+  Discard value names when generating LLVM IR.
+
+.. option:: -fno-discard-value-names
+
+  Do not discard value names when generating LLVM IR. This option can be used
+  to re-enable names for release builds of Clang.
+
+
 Comment Parsing Options
 -----------------------
 
@@ -2037,6 +2137,11 @@
 Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with
 `-fno-openmp`.
 
+Use `-fopenmp-simd` to enable OpenMP simd features only, without linking
+the runtime library; for combined constructs
+(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses
+will be ignored. This can be disabled with `-fno-openmp-simd`.
+
 Controlling implementation limits
 ---------------------------------
 
@@ -2579,10 +2684,37 @@
 To enable clang-cl to find system headers, libraries, and the linker when run
 from the command-line, it should be executed inside a Visual Studio Native Tools
 Command Prompt or a regular Command Prompt where the environment has been set
-up using e.g. `vcvars32.bat <http://msdn.microsoft.com/en-us/library/f2ccy3wt.aspx>`_.
+up using e.g. `vcvarsall.bat <http://msdn.microsoft.com/en-us/library/f2ccy3wt.aspx>`_.
 
-clang-cl can also be used from inside Visual Studio by using an LLVM Platform
-Toolset.
+clang-cl can also be used from inside Visual Studio by selecting the LLVM
+Platform Toolset. The toolset is installed by the LLVM installer, which can be
+downloaded from the `LLVM release <http://releases.llvm.org/download.html>`_ or
+`snapshot build <http://llvm.org/builds/>`_ web pages. To use the toolset,
+select a project in Solution Explorer, open its Property Page (Alt+F7), and in
+the "General" section of "Configuration Properties" change "Platform Toolset"
+to e.g. LLVM-vs2014.
+
+To use the toolset with MSBuild directly, invoke it with e.g.
+``/p:PlatformToolset=LLVM-vs2014``. This allows trying out the clang-cl
+toolchain without modifying your project files.
+
+It's also possible to point MSBuild at clang-cl without changing toolset by
+passing ``/p:CLToolPath=c:\llvm\bin /p:CLToolExe=clang-cl.exe``.
+
+When using CMake and the Visual Studio generators, the toolset can be set with the ``-T`` flag:
+
+  ::
+
+    cmake -G"Visual Studio 15 2017" -T LLVM-vs2014 ..
+
+When using CMake with the Ninja generator, set the ``CMAKE_C_COMPILER`` and
+``CMAKE_CXX_COMPILER`` variables to clang-cl:
+
+  ::
+
+    cmake -GNinja -DCMAKE_C_COMPILER="c:/Program Files (x86)/LLVM/bin/clang-cl.exe"
+        -DCMAKE_CXX_COMPILER="c:/Program Files (x86)/LLVM/bin/clang-cl.exe" ..
+
 
 Command-Line Options
 --------------------
@@ -2649,6 +2781,7 @@
       /Gd                     Set __cdecl as a default calling convention
       /GF-                    Disable string pooling
       /GR-                    Disable emission of RTTI data
+      /Gregcall               Set __regcall as a default calling convention
       /GR                     Enable emission of RTTI data
       /Gr                     Set __fastcall as a default calling convention
       /GS-                    Disable buffer security check
@@ -2705,7 +2838,7 @@
       /W2                     Enable -Wall
       /W3                     Enable -Wall
       /W4                     Enable -Wall and -Wextra
-      /Wall                   Enable -Wall and -Wextra
+      /Wall                   Enable -Weverything
       /WX-                    Do not treat warnings as errors
       /WX                     Treat warnings as errors
       /w                      Disable all warnings
@@ -2762,6 +2895,8 @@
                               Disable specified features of coverage instrumentation for Sanitizers
       -fno-sanitize-memory-track-origins
                               Disable origins tracking in MemorySanitizer
+      -fno-sanitize-memory-use-after-dtor
+                              Disable use-after-destroy detection in MemorySanitizer
       -fno-sanitize-recover=<value>
                               Disable recovery for specified sanitizers
       -fno-sanitize-stats     Disable sanitizer statistics gathering.
@@ -2792,6 +2927,8 @@
                               Path to blacklist file for sanitizers
       -fsanitize-cfi-cross-dso
                               Enable control flow integrity (CFI) checks for cross-DSO calls.
+      -fsanitize-cfi-icall-generalize-pointers
+                              Generalize pointers in CFI indirect call type signature checks
       -fsanitize-coverage=<value>
                               Specify the type of coverage instrumentation for Sanitizers
       -fsanitize-memory-track-origins=<value>
@@ -2815,6 +2952,7 @@
       -fsanitize=<check>      Turn on runtime checks for various forms of undefined or suspicious
                               behavior. See user manual for available checks
       -fstandalone-debug      Emit full debug info for all types used by the program
+      -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
       -gcodeview              Generate CodeView debug information
       -gline-tables-only      Emit debug line number tables only
       -miamcu                 Use Intel MCU ABI
@@ -2823,6 +2961,7 @@
       -Qunused-arguments      Don't emit warning for unused driver arguments
       -R<remark>              Enable the specified remark
       --target=<value>        Generate code for the given target
+      --version               Print version information
       -v                      Show commands to run and use verbose output
       -W<warning>             Enable the specified warning
       -Xclang <arg>           Pass <arg> to the clang compiler
diff --git a/docs/analyzer/DebugChecks.rst b/docs/analyzer/DebugChecks.rst
index e2a8e05..67521b8 100644
--- a/docs/analyzer/DebugChecks.rst
+++ b/docs/analyzer/DebugChecks.rst
@@ -242,6 +242,19 @@
       clang_analyzer_printState(); // Read the stderr!
     }
 
+- ``void clang_analyzer_hashDump(int);``
+
+  The analyzer can generate a hash to identify reports. To debug what information
+  is used to calculate this hash it is possible to dump the hashed string as a
+  warning of an arbitrary expression using the function above.
+
+  Example usage::
+
+    void foo() {
+      int x = 1;
+      clang_analyzer_hashDump(x); // expected-warning{{hashed string for x}}
+    }
+
 Statistics
 ==========
 
diff --git a/docs/conf.py b/docs/conf.py
index a12f99a..b38c93a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,9 +49,9 @@
 # built documents.
 #
 # The short version.
-version = '6'
+version = '7'
 # The full version, including alpha/beta/rc tags.
-release = '6'
+release = '7'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
index 13ed722..08f4253 100644
--- a/docs/doxygen.cfg.in
+++ b/docs/doxygen.cfg.in
@@ -284,7 +284,7 @@
 
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by by putting a % sign in front of the word
+# be prevented in individual cases by putting a % sign in front of the word
 # or globally by setting AUTOLINK_SUPPORT to NO.
 # The default value is: YES.
 
diff --git a/docs/index.rst b/docs/index.rst
index daf943a..ed47953 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,6 +39,7 @@
    SourceBasedCodeCoverage
    Modules
    MSVCCompatibility
+   OpenMPSupport
    ThinLTO
    CommandGuide/index
    FAQ
@@ -84,6 +85,7 @@
    PTHInternals
    PCHInternals
    ItaniumMangleAbiTags
+   HardwareAssistedAddressSanitizerDesign.rst
 
 
 Indices and tables
diff --git a/docs/tools/dump_ast_matchers.py b/docs/tools/dump_ast_matchers.py
index 4554040..d389775 100755
--- a/docs/tools/dump_ast_matchers.py
+++ b/docs/tools/dump_ast_matchers.py
@@ -95,7 +95,7 @@
 def unify_arguments(args):
   """Gets rid of anything the user doesn't care about in the argument list."""
   args = re.sub(r'internal::', r'', args)
-  args = re.sub(r'const\s+(.*)&', r'\1 ', args)
+  args = re.sub(r'extern const\s+(.*)&', r'\1 ', args)
   args = re.sub(r'&', r' ', args)
   args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args)
   return args
@@ -150,11 +150,11 @@
                   comment, is_dyncast=True)
       return
 
-    # Parse the various matcher definition macros.
-    m = re.match(""".*AST_TYPE_MATCHER\(
-                       \s*([^\s,]+\s*),
-                       \s*([^\s,]+\s*)
-                     \)\s*;\s*$""", declaration, flags=re.X)
+    # Special case of type matchers:
+    #   AstTypeMatcher<ArgumentType> name
+    m = re.match(r""".*AstTypeMatcher\s*<
+                       \s*([^\s>]+)\s*>
+                       \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)
     if m:
       inner, name = m.groups()
       add_matcher('Type', name, 'Matcher<%s>...' % inner,
@@ -165,7 +165,8 @@
       #             comment, is_dyncast=True)
       return
 
-    m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\(
+    # Parse the various matcher definition macros.
+    m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
                        \s*([^\s,]+\s*),
                        \s*(?:[^\s,]+\s*),
                        \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
@@ -236,7 +237,7 @@
                        (?:,\s*([^\s,]+)\s*
                           ,\s*([^\s,]+)\s*)?
                        (?:,\s*\d+\s*)?
-                      \)\s*{\s*$""", declaration, flags=re.X)
+                      \)\s*{""", declaration, flags=re.X)
     if m:
       p, n, result, name = m.groups()[0:4]
       args = m.groups()[4:]
@@ -256,8 +257,8 @@
 
     # Parse ArgumentAdapting matchers.
     m = re.match(
-        r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*(?:LLVM_ATTRIBUTE_UNUSED\s*)
-              ([a-zA-Z]*)\s*=\s*{};$""",
+        r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*
+              ([a-zA-Z]*);$""",
         declaration, flags=re.X)
     if m:
       name = m.groups()[0]
@@ -267,7 +268,7 @@
     # Parse Variadic functions.
     m = re.match(
         r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s*
-              ([a-zA-Z]*)\s*=\s*{.*};$""",
+              ([a-zA-Z]*);$""",
         declaration, flags=re.X)
     if m:
       result, arg, name = m.groups()[:3]
@@ -276,15 +277,15 @@
 
     # Parse Variadic operator matchers.
     m = re.match(
-        r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s>]+)\s*>\s*
-              ([a-zA-Z]*)\s*=\s*{.*};$""",
+        r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s]+)\s*>\s*
+              ([a-zA-Z]*);$""",
         declaration, flags=re.X)
     if m:
       min_args, max_args, name = m.groups()[:3]
       if max_args == '1':
         add_matcher('*', name, 'Matcher<*>', comment)
         return
-      elif max_args == 'UINT_MAX':
+      elif max_args == 'std::numeric_limits<unsigned>::max()':
         add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment)
         return
 
diff --git a/docs/tools/dump_format_style.py b/docs/tools/dump_format_style.py
index e2571f4..1ca050e 100755
--- a/docs/tools/dump_format_style.py
+++ b/docs/tools/dump_format_style.py
@@ -177,7 +177,8 @@
   for option in options:
     if not option.type in ['bool', 'unsigned', 'int', 'std::string',
                            'std::vector<std::string>',
-                           'std::vector<IncludeCategory>']:
+                           'std::vector<IncludeCategory>',
+                           'std::vector<RawStringFormat>']:
       if enums.has_key(option.type):
         option.enum = enums[option.type]
       elif nested_structs.has_key(option.type):
diff --git a/examples/analyzer-plugin/MainCallChecker.cpp b/examples/analyzer-plugin/MainCallChecker.cpp
index 7f08760..74fe663 100644
--- a/examples/analyzer-plugin/MainCallChecker.cpp
+++ b/examples/analyzer-plugin/MainCallChecker.cpp
@@ -16,10 +16,8 @@
 } // end anonymous namespace
 
 void MainCallChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const {
-  const ProgramStateRef state = C.getState();
-  const LocationContext *LC = C.getLocationContext();
   const Expr *Callee = CE->getCallee();
-  const FunctionDecl *FD = state->getSVal(Callee, LC).getAsFunctionDecl();
+  const FunctionDecl *FD = C.getSVal(Callee).getAsFunctionDecl();
 
   if (!FD)
     return;
diff --git a/examples/clang-interpreter/CMakeLists.txt b/examples/clang-interpreter/CMakeLists.txt
index e7e59d9..3084238 100644
--- a/examples/clang-interpreter/CMakeLists.txt
+++ b/examples/clang-interpreter/CMakeLists.txt
@@ -17,6 +17,7 @@
   )
 
 target_link_libraries(clang-interpreter
+  PRIVATE
   clangBasic
   clangCodeGen
   clangDriver
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index c72be56..6468d1a 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -32,7 +32,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 43
+#define CINDEX_VERSION_MINOR 48
 
 #define CINDEX_VERSION_ENCODE(major, minor) ( \
       ((major) * 10000)                       \
@@ -334,6 +334,16 @@
 CINDEX_LINKAGE unsigned clang_CXIndex_getGlobalOptions(CXIndex);
 
 /**
+ * \brief Sets the invocation emission path option in a CXIndex.
+ *
+ * The invocation emission path specifies a path which will contain log
+ * files for certain libclang invocations. A null value (default) implies that
+ * libclang invocations are not logged..
+ */
+CINDEX_LINKAGE void
+clang_CXIndex_setInvocationEmissionPathOption(CXIndex, const char *Path);
+
+/**
  * \defgroup CINDEX_FILES File manipulation routines
  *
  * @{
@@ -394,6 +404,21 @@
                                     const char *file_name);
 
 /**
+ * \brief Retrieve the buffer associated with the given file.
+ *
+ * \param tu the translation unit
+ *
+ * \param file the file for which to retrieve the buffer.
+ *
+ * \param size [out] if non-NULL, will be set to the size of the buffer.
+ *
+ * \returns a pointer to the buffer in memory that holds the contents of
+ * \p file, or a NULL pointer when the file is not loaded.
+ */
+CINDEX_LINKAGE const char *clang_getFileContents(CXTranslationUnit tu,
+                                                 CXFile file, size_t *size);
+
+/**
  * \brief Returns non-zero if the \c file1 and \c file2 point to the same file,
  * or they are both NULL.
  */
@@ -2617,6 +2642,16 @@
 CINDEX_LINKAGE unsigned clang_isDeclaration(enum CXCursorKind);
 
 /**
+ * \brief Determine whether the given declaration is invalid.
+ *
+ * A declaration is invalid if it could not be parsed successfully.
+ *
+ * \returns non-zero if the cursor represents a declaration and it is
+ * invalid, otherwise NULL.
+ */
+CINDEX_LINKAGE unsigned clang_isInvalidDeclaration(CXCursor);
+
+/**
  * \brief Determine whether the given cursor kind represents a simple
  * reference.
  *
@@ -4057,6 +4092,89 @@
                                                           unsigned options);
 
 /**
+ * \brief Opaque pointer representing a policy that controls pretty printing
+ * for \c clang_getCursorPrettyPrinted.
+ */
+typedef void *CXPrintingPolicy;
+
+/**
+ * \brief Properties for the printing policy.
+ *
+ * See \c clang::PrintingPolicy for more information.
+ */
+enum CXPrintingPolicyProperty {
+  CXPrintingPolicy_Indentation,
+  CXPrintingPolicy_SuppressSpecifiers,
+  CXPrintingPolicy_SuppressTagKeyword,
+  CXPrintingPolicy_IncludeTagDefinition,
+  CXPrintingPolicy_SuppressScope,
+  CXPrintingPolicy_SuppressUnwrittenScope,
+  CXPrintingPolicy_SuppressInitializers,
+  CXPrintingPolicy_ConstantArraySizeAsWritten,
+  CXPrintingPolicy_AnonymousTagLocations,
+  CXPrintingPolicy_SuppressStrongLifetime,
+  CXPrintingPolicy_SuppressLifetimeQualifiers,
+  CXPrintingPolicy_SuppressTemplateArgsInCXXConstructors,
+  CXPrintingPolicy_Bool,
+  CXPrintingPolicy_Restrict,
+  CXPrintingPolicy_Alignof,
+  CXPrintingPolicy_UnderscoreAlignof,
+  CXPrintingPolicy_UseVoidForZeroParams,
+  CXPrintingPolicy_TerseOutput,
+  CXPrintingPolicy_PolishForDeclaration,
+  CXPrintingPolicy_Half,
+  CXPrintingPolicy_MSWChar,
+  CXPrintingPolicy_IncludeNewlines,
+  CXPrintingPolicy_MSVCFormatting,
+  CXPrintingPolicy_ConstantsAsWritten,
+  CXPrintingPolicy_SuppressImplicitBase,
+  CXPrintingPolicy_FullyQualifiedName,
+
+  CXPrintingPolicy_LastProperty = CXPrintingPolicy_FullyQualifiedName
+};
+
+/**
+ * \brief Get a property value for the given printing policy.
+ */
+CINDEX_LINKAGE unsigned
+clang_PrintingPolicy_getProperty(CXPrintingPolicy Policy,
+                                 enum CXPrintingPolicyProperty Property);
+
+/**
+ * \brief Set a property value for the given printing policy.
+ */
+CINDEX_LINKAGE void clang_PrintingPolicy_setProperty(CXPrintingPolicy Policy,
+                                                     enum CXPrintingPolicyProperty Property,
+                                                     unsigned Value);
+
+/**
+ * \brief Retrieve the default policy for the cursor.
+ *
+ * The policy should be released after use with \c
+ * clang_PrintingPolicy_dispose.
+ */
+CINDEX_LINKAGE CXPrintingPolicy clang_getCursorPrintingPolicy(CXCursor);
+
+/**
+ * \brief Release a printing policy.
+ */
+CINDEX_LINKAGE void clang_PrintingPolicy_dispose(CXPrintingPolicy Policy);
+
+/**
+ * \brief Pretty print declarations.
+ *
+ * \param Cursor The cursor representing a declaration.
+ *
+ * \param Policy The policy to control the entities being printed. If
+ * NULL, a default policy is used.
+ *
+ * \returns The pretty printed declaration or the empty string for
+ * other cursors.
+ */
+CINDEX_LINKAGE CXString clang_getCursorPrettyPrinted(CXCursor Cursor,
+                                                     CXPrintingPolicy Policy);
+
+/**
  * \brief Retrieve the display name for the entity referenced by this cursor.
  *
  * The display name contains extra information that helps identify the cursor,
@@ -4442,6 +4560,12 @@
 CINDEX_LINKAGE unsigned clang_CXXMethod_isVirtual(CXCursor C);
 
 /**
+ * \brief Determine if a C++ record is abstract, i.e. whether a class or struct
+ * has a pure virtual member function.
+ */
+CINDEX_LINKAGE unsigned clang_CXXRecord_isAbstract(CXCursor C);
+
+/**
  * \brief Determine if an enum declaration refers to a scoped enum.
  */
 CINDEX_LINKAGE unsigned clang_EnumDecl_isScoped(CXCursor C);
@@ -5120,7 +5244,14 @@
    * \brief Whether to include brief documentation within the set of code
    * completions returned.
    */
-  CXCodeComplete_IncludeBriefComments = 0x04
+  CXCodeComplete_IncludeBriefComments = 0x04,
+
+  /**
+   * Whether to speed up completion by omitting top- or namespace-level entities
+   * defined in the preamble. There's no guarantee any particular entity is
+   * omitted. This may be useful if the headers are indexed externally.
+   */
+  CXCodeComplete_SkipPreamble = 0x08
 };
 
 /**
@@ -5961,6 +6092,9 @@
 
 /**
  * \brief Data for IndexerCallbacks#indexEntityReference.
+ *
+ * This may be deprecated in a future version as this duplicates
+ * the \c CXSymbolRole_Implicit bit in \c CXSymbolRole.
  */
 typedef enum {
   /**
@@ -5975,6 +6109,25 @@
 } CXIdxEntityRefKind;
 
 /**
+ * \brief Roles that are attributed to symbol occurrences.
+ *
+ * Internal: this currently mirrors low 9 bits of clang::index::SymbolRole with
+ * higher bits zeroed. These high bits may be exposed in the future.
+ */
+typedef enum {
+  CXSymbolRole_None = 0,
+  CXSymbolRole_Declaration = 1 << 0,
+  CXSymbolRole_Definition = 1 << 1,
+  CXSymbolRole_Reference = 1 << 2,
+  CXSymbolRole_Read = 1 << 3,
+  CXSymbolRole_Write = 1 << 4,
+  CXSymbolRole_Call = 1 << 5,
+  CXSymbolRole_Dynamic = 1 << 6,
+  CXSymbolRole_AddressOf = 1 << 7,
+  CXSymbolRole_Implicit = 1 << 8
+} CXSymbolRole;
+
+/**
  * \brief Data for IndexerCallbacks#indexEntityReference.
  */
 typedef struct {
@@ -6004,6 +6157,10 @@
    * \brief Lexical container context of the reference.
    */
   const CXIdxContainerInfo *container;
+  /**
+   * \brief Sets of symbol roles of the reference.
+   */
+  CXSymbolRole role;
 } CXIdxEntityRefInfo;
 
 /**
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index 09035be..a5b090d 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -1,4 +1,4 @@
-//===--- ASTContext.h - Context to hold long-lived AST nodes ----*- C++ -*-===//
+//===- ASTContext.h - Context to hold long-lived AST nodes ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::ASTContext interface.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_ASTCONTEXT_H
@@ -19,8 +19,8 @@
 #include "clang/AST/CanonicalType.h"
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/Decl.h"
-#include "clang/AST/DeclarationName.h"
 #include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/PrettyPrinter.h"
@@ -30,10 +30,9 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Linkage.h"
-#include "clang/Basic/LLVM.h"
-#include "clang/Basic/Module.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/SanitizerBlacklist.h"
@@ -46,17 +45,17 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
@@ -66,7 +65,6 @@
 #include <cstdint>
 #include <iterator>
 #include <memory>
-#include <new>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -76,49 +74,72 @@
 
 struct fltSemantics;
 
-} // end namespace llvm
+} // namespace llvm
 
 namespace clang {
 
+class APValue;
 class ASTMutationListener;
 class ASTRecordLayout;
 class AtomicExpr;
 class BlockExpr;
+class BuiltinTemplateDecl;
 class CharUnits;
 class CXXABI;
+class CXXConstructorDecl;
+class CXXMethodDecl;
+class CXXRecordDecl;
 class DiagnosticsEngine;
 class Expr;
+class MangleContext;
 class MangleNumberingContext;
 class MaterializeTemporaryExpr;
-// Decls
-class MangleContext;
+class MemberSpecializationInfo;
+class Module;
+class ObjCCategoryDecl;
+class ObjCCategoryImplDecl;
+class ObjCContainerDecl;
+class ObjCImplDecl;
+class ObjCImplementationDecl;
+class ObjCInterfaceDecl;
 class ObjCIvarDecl;
+class ObjCMethodDecl;
 class ObjCPropertyDecl;
+class ObjCPropertyImplDecl;
+class ObjCProtocolDecl;
+class ObjCTypeParamDecl;
+class Preprocessor;
+class Stmt;
+class StoredDeclsMap;
+class TemplateDecl;
+class TemplateParameterList;
+class TemplateTemplateParmDecl;
+class TemplateTypeParmDecl;
 class UnresolvedSetIterator;
-class UsingDecl;
 class UsingShadowDecl;
+class VarTemplateDecl;
 class VTableContextBase;
 
 namespace Builtin {
 
-  class Context;
+class Context;
 
-} // end namespace Builtin
+} // namespace Builtin
 
 enum BuiltinTemplateKind : int;
 
 namespace comments {
 
-  class FullComment;
+class FullComment;
 
-} // end namespace comments
+} // namespace comments
 
 struct TypeInfo {
-  uint64_t Width;
-  unsigned Align;
+  uint64_t Width = 0;
+  unsigned Align = 0;
   bool AlignIsRequired : 1;
 
-  TypeInfo() : Width(0), Align(0), AlignIsRequired(false) {}
+  TypeInfo() : AlignIsRequired(false) {}
   TypeInfo(uint64_t Width, unsigned Align, bool AlignIsRequired)
       : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
 };
@@ -126,7 +147,7 @@
 /// \brief Holds long-lived AST nodes (such as types and decls) that can be
 /// referred to throughout the semantic analysis of a file.
 class ASTContext : public RefCountedBase<ASTContext> {
-  ASTContext &this_() { return *this; }
+  friend class NestedNameSpecifier;
 
   mutable SmallVector<Type *, 0> Types;
   mutable llvm::FoldingSet<ExtQuals> ExtQualNodes;
@@ -189,8 +210,7 @@
   ///
   /// This set is managed by the NestedNameSpecifier class.
   mutable llvm::FoldingSet<NestedNameSpecifier> NestedNameSpecifiers;
-  mutable NestedNameSpecifier *GlobalNestedNameSpecifier;
-  friend class NestedNameSpecifier;
+  mutable NestedNameSpecifier *GlobalNestedNameSpecifier = nullptr;
 
   /// \brief A cache mapping from RecordDecls to ASTRecordLayouts.
   ///
@@ -201,7 +221,7 @@
     ObjCLayouts;
 
   /// \brief A cache from types to size and alignment information.
-  typedef llvm::DenseMap<const Type *, struct TypeInfo> TypeInfoMap;
+  using TypeInfoMap = llvm::DenseMap<const Type *, struct TypeInfo>;
   mutable TypeInfoMap MemoizedTypeInfo;
 
   /// \brief A cache mapping from CXXRecordDecls to key functions.
@@ -235,7 +255,7 @@
 
   public:
     CanonicalTemplateTemplateParm(TemplateTemplateParmDecl *Parm)
-      : Parm(Parm) { }
+        : Parm(Parm) {}
 
     TemplateTemplateParmDecl *getParam() const { return Parm; }
 
@@ -251,32 +271,32 @@
     getCanonicalTemplateTemplateParmDecl(TemplateTemplateParmDecl *TTP) const;
 
   /// \brief The typedef for the __int128_t type.
-  mutable TypedefDecl *Int128Decl;
+  mutable TypedefDecl *Int128Decl = nullptr;
 
   /// \brief The typedef for the __uint128_t type.
-  mutable TypedefDecl *UInt128Decl;
+  mutable TypedefDecl *UInt128Decl = nullptr;
 
   /// \brief The typedef for the target specific predefined
   /// __builtin_va_list type.
-  mutable TypedefDecl *BuiltinVaListDecl;
+  mutable TypedefDecl *BuiltinVaListDecl = nullptr;
 
   /// The typedef for the predefined \c __builtin_ms_va_list type.
-  mutable TypedefDecl *BuiltinMSVaListDecl;
+  mutable TypedefDecl *BuiltinMSVaListDecl = nullptr;
 
   /// \brief The typedef for the predefined \c id type.
-  mutable TypedefDecl *ObjCIdDecl;
+  mutable TypedefDecl *ObjCIdDecl = nullptr;
 
   /// \brief The typedef for the predefined \c SEL type.
-  mutable TypedefDecl *ObjCSelDecl;
+  mutable TypedefDecl *ObjCSelDecl = nullptr;
 
   /// \brief The typedef for the predefined \c Class type.
-  mutable TypedefDecl *ObjCClassDecl;
+  mutable TypedefDecl *ObjCClassDecl = nullptr;
 
   /// \brief The typedef for the predefined \c Protocol class in Objective-C.
-  mutable ObjCInterfaceDecl *ObjCProtocolClassDecl;
+  mutable ObjCInterfaceDecl *ObjCProtocolClassDecl = nullptr;
 
   /// \brief The typedef for the predefined 'BOOL' type.
-  mutable TypedefDecl *BOOLDecl;
+  mutable TypedefDecl *BOOLDecl = nullptr;
 
   // Typedefs which may be provided defining the structure of Objective-C
   // pseudo-builtins
@@ -300,42 +320,42 @@
   mutable IdentifierInfo *TypePackElementName = nullptr;
 
   QualType ObjCConstantStringType;
-  mutable RecordDecl *CFConstantStringTagDecl;
-  mutable TypedefDecl *CFConstantStringTypeDecl;
+  mutable RecordDecl *CFConstantStringTagDecl = nullptr;
+  mutable TypedefDecl *CFConstantStringTypeDecl = nullptr;
 
   mutable QualType ObjCSuperType;
 
   QualType ObjCNSStringType;
 
   /// \brief The typedef declaration for the Objective-C "instancetype" type.
-  TypedefDecl *ObjCInstanceTypeDecl;
+  TypedefDecl *ObjCInstanceTypeDecl = nullptr;
 
   /// \brief The type for the C FILE type.
-  TypeDecl *FILEDecl;
+  TypeDecl *FILEDecl = nullptr;
 
   /// \brief The type for the C jmp_buf type.
-  TypeDecl *jmp_bufDecl;
+  TypeDecl *jmp_bufDecl = nullptr;
 
   /// \brief The type for the C sigjmp_buf type.
-  TypeDecl *sigjmp_bufDecl;
+  TypeDecl *sigjmp_bufDecl = nullptr;
 
   /// \brief The type for the C ucontext_t type.
-  TypeDecl *ucontext_tDecl;
+  TypeDecl *ucontext_tDecl = nullptr;
 
   /// \brief Type for the Block descriptor for Blocks CodeGen.
   ///
   /// Since this is only used for generation of debug info, it is not
   /// serialized.
-  mutable RecordDecl *BlockDescriptorType;
+  mutable RecordDecl *BlockDescriptorType = nullptr;
 
   /// \brief Type for the Block descriptor for Blocks CodeGen.
   ///
   /// Since this is only used for generation of debug info, it is not
   /// serialized.
-  mutable RecordDecl *BlockDescriptorExtendedType;
+  mutable RecordDecl *BlockDescriptorExtendedType = nullptr;
 
   /// \brief Declaration for the CUDA cudaConfigureCall function.
-  FunctionDecl *cudaConfigureCallDecl;
+  FunctionDecl *cudaConfigureCallDecl = nullptr;
 
   /// \brief Keeps track of all declaration attributes.
   ///
@@ -365,12 +385,19 @@
   };
   llvm::DenseMap<Module*, PerModuleInitializers*> ModuleInitializers;
 
+  ASTContext &this_() { return *this; }
+
 public:
   /// \brief A type synonym for the TemplateOrInstantiation mapping.
-  typedef llvm::PointerUnion<VarTemplateDecl *, MemberSpecializationInfo *>
-  TemplateOrSpecializationInfo;
+  using TemplateOrSpecializationInfo =
+      llvm::PointerUnion<VarTemplateDecl *, MemberSpecializationInfo *>;
 
 private:
+  friend class ASTDeclReader;
+  friend class ASTReader;
+  friend class ASTWriter;
+  friend class CXXRecordDecl;
+
   /// \brief A mapping to contain the template or declaration that
   /// a variable declaration describes or was instantiated from,
   /// respectively.
@@ -440,7 +467,7 @@
   /// Since most C++ member functions aren't virtual and therefore
   /// don't override anything, we store the overridden functions in
   /// this map on the side rather than within the CXXMethodDecl structure.
-  typedef llvm::TinyPtrVector<const CXXMethodDecl*> CXXMethodVector;
+  using CXXMethodVector = llvm::TinyPtrVector<const CXXMethodDecl *>;
   llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector> OverriddenMethods;
 
   /// \brief Mapping from each declaration context to its corresponding
@@ -456,16 +483,16 @@
 
   /// \brief Mapping that stores parameterIndex values for ParmVarDecls when
   /// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex.
-  typedef llvm::DenseMap<const VarDecl *, unsigned> ParameterIndexTable;
+  using ParameterIndexTable = llvm::DenseMap<const VarDecl *, unsigned>;
   ParameterIndexTable ParamIndices;
 
-  ImportDecl *FirstLocalImport;
-  ImportDecl *LastLocalImport;
+  ImportDecl *FirstLocalImport = nullptr;
+  ImportDecl *LastLocalImport = nullptr;
 
   TranslationUnitDecl *TUDecl;
-  mutable ExternCContextDecl *ExternCContext;
-  mutable BuiltinTemplateDecl *MakeIntegerSeqDecl;
-  mutable BuiltinTemplateDecl *TypePackElementDecl;
+  mutable ExternCContextDecl *ExternCContext = nullptr;
+  mutable BuiltinTemplateDecl *MakeIntegerSeqDecl = nullptr;
+  mutable BuiltinTemplateDecl *TypePackElementDecl = nullptr;
 
   /// \brief The associated SourceManager object.
   SourceManager &SourceMgr;
@@ -496,19 +523,14 @@
   CXXABI *createCXXABI(const TargetInfo &T);
 
   /// \brief The logical -> physical address space map.
-  const LangASMap *AddrSpaceMap;
+  const LangASMap *AddrSpaceMap = nullptr;
 
   /// \brief Address space map mangling must be used with language specific
   /// address spaces (e.g. OpenCL/CUDA)
   bool AddrSpaceMapMangling;
 
-  friend class ASTDeclReader;
-  friend class ASTReader;
-  friend class ASTWriter;
-  friend class CXXRecordDecl;
-
-  const TargetInfo *Target;
-  const TargetInfo *AuxTarget;
+  const TargetInfo *Target = nullptr;
+  const TargetInfo *AuxTarget = nullptr;
   clang::PrintingPolicy PrintingPolicy;
 
 public:
@@ -517,31 +539,33 @@
   Builtin::Context &BuiltinInfo;
   mutable DeclarationNameTable DeclarationNames;
   IntrusiveRefCntPtr<ExternalASTSource> ExternalSource;
-  ASTMutationListener *Listener;
+  ASTMutationListener *Listener = nullptr;
 
   /// \brief Contains parents of a node.
-  typedef llvm::SmallVector<ast_type_traits::DynTypedNode, 2> ParentVector;
+  using ParentVector = llvm::SmallVector<ast_type_traits::DynTypedNode, 2>;
 
   /// \brief Maps from a node to its parents. This is used for nodes that have
   /// pointer identity only, which are more common and we can save space by
   /// only storing a unique pointer to them.
-  typedef llvm::DenseMap<const void *,
-                         llvm::PointerUnion4<const Decl *, const Stmt *,
-                                             ast_type_traits::DynTypedNode *,
-                                             ParentVector *>> ParentMapPointers;
+  using ParentMapPointers =
+      llvm::DenseMap<const void *,
+                     llvm::PointerUnion4<const Decl *, const Stmt *,
+                                         ast_type_traits::DynTypedNode *,
+                                         ParentVector *>>;
 
   /// Parent map for nodes without pointer identity. We store a full
   /// DynTypedNode for all keys.
-  typedef llvm::DenseMap<
-      ast_type_traits::DynTypedNode,
-      llvm::PointerUnion4<const Decl *, const Stmt *,
-                          ast_type_traits::DynTypedNode *, ParentVector *>>
-      ParentMapOtherNodes;
+  using ParentMapOtherNodes =
+      llvm::DenseMap<ast_type_traits::DynTypedNode,
+                     llvm::PointerUnion4<const Decl *, const Stmt *,
+                                         ast_type_traits::DynTypedNode *,
+                                         ParentVector *>>;
 
   /// Container for either a single DynTypedNode or for an ArrayRef to
   /// DynTypedNode. For use with ParentMap.
   class DynTypedNodeList {
-    typedef ast_type_traits::DynTypedNode DynTypedNode;
+    using DynTypedNode = ast_type_traits::DynTypedNode;
+
     llvm::AlignedCharArrayUnion<ast_type_traits::DynTypedNode,
                                 ArrayRef<DynTypedNode>> Storage;
     bool IsSingleNode;
@@ -550,6 +574,7 @@
     DynTypedNodeList(const DynTypedNode &N) : IsSingleNode(true) {
       new (Storage.buffer) DynTypedNode(N);
     }
+
     DynTypedNodeList(ArrayRef<DynTypedNode> A) : IsSingleNode(false) {
       new (Storage.buffer) ArrayRef<DynTypedNode>(A);
     }
@@ -628,13 +653,14 @@
   template <typename T> T *Allocate(size_t Num = 1) const {
     return static_cast<T *>(Allocate(Num * sizeof(T), alignof(T)));
   }
-  void Deallocate(void *Ptr) const { }
+  void Deallocate(void *Ptr) const {}
 
   /// Return the total amount of physical memory allocated for representing
   /// AST nodes and type information.
   size_t getASTAllocatedMemory() const {
     return BumpAlloc.getTotalMemory();
   }
+
   /// Return the total memory used for various side tables.
   size_t getSideTableAllocatedMemory() const;
 
@@ -651,6 +677,7 @@
   /// Returns empty type if there is no appropriate target types.
   QualType getIntTypeForBitwidth(unsigned DestWidth,
                                  unsigned Signed) const;
+
   /// getRealTypeForBitwidth -
   /// sets floating point QualTy according to specified bitwidth.
   /// Returns empty type if there is no appropriate target types.
@@ -678,7 +705,7 @@
   RawCommentList Comments;
 
   /// \brief True if comments are already loaded from ExternalASTSource.
-  mutable bool CommentsLoaded;
+  mutable bool CommentsLoaded = false;
 
   class RawCommentAndCacheFlags {
   public:
@@ -757,28 +784,28 @@
   void addComment(const RawComment &RC) {
     assert(LangOpts.RetainCommentsFromSystemHeaders ||
            !SourceMgr.isInSystemHeader(RC.getSourceRange().getBegin()));
-    Comments.addComment(RC, BumpAlloc);
+    Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
   }
 
   /// \brief Return the documentation comment attached to a given declaration.
-  /// Returns NULL if no comment is attached.
+  /// Returns nullptr if no comment is attached.
   ///
-  /// \param OriginalDecl if not NULL, is set to declaration AST node that had
-  /// the comment, if the comment we found comes from a redeclaration.
+  /// \param OriginalDecl if not nullptr, is set to declaration AST node that
+  /// had the comment, if the comment we found comes from a redeclaration.
   const RawComment *
   getRawCommentForAnyRedecl(const Decl *D,
                             const Decl **OriginalDecl = nullptr) const;
 
   /// Return parsed documentation comment attached to a given declaration.
-  /// Returns NULL if no comment is attached.
+  /// Returns nullptr if no comment is attached.
   ///
-  /// \param PP the Preprocessor used with this TU.  Could be NULL if
+  /// \param PP the Preprocessor used with this TU.  Could be nullptr if
   /// preprocessor is not available.
   comments::FullComment *getCommentForDecl(const Decl *D,
                                            const Preprocessor *PP) const;
 
   /// Return parsed documentation comment attached to a given declaration.
-  /// Returns NULL if no comment is attached. Does not look at any
+  /// Returns nullptr if no comment is attached. Does not look at any
   /// redeclarations of the declaration.
   comments::FullComment *getLocalCommentForDeclUncached(const Decl *D) const;
 
@@ -790,16 +817,16 @@
 
   /// \brief Iterator that visits import declarations.
   class import_iterator {
-    ImportDecl *Import;
+    ImportDecl *Import = nullptr;
 
   public:
-    typedef ImportDecl               *value_type;
-    typedef ImportDecl               *reference;
-    typedef ImportDecl               *pointer;
-    typedef int                       difference_type;
-    typedef std::forward_iterator_tag iterator_category;
+    using value_type = ImportDecl *;
+    using reference = ImportDecl *;
+    using pointer = ImportDecl *;
+    using difference_type = int;
+    using iterator_category = std::forward_iterator_tag;
 
-    import_iterator() : Import() {}
+    import_iterator() = default;
     explicit import_iterator(ImportDecl *Import) : Import(Import) {}
 
     reference operator*() const { return Import; }
@@ -878,7 +905,7 @@
   void setInstantiatedFromUnnamedFieldDecl(FieldDecl *Inst, FieldDecl *Tmpl);
 
   // Access to the set of methods overridden by the given C++ method.
-  typedef CXXMethodVector::const_iterator overridden_cxx_method_iterator;
+  using overridden_cxx_method_iterator = CXXMethodVector::const_iterator;
   overridden_cxx_method_iterator
   overridden_methods_begin(const CXXMethodDecl *Method) const;
 
@@ -886,8 +913,10 @@
   overridden_methods_end(const CXXMethodDecl *Method) const;
 
   unsigned overridden_methods_size(const CXXMethodDecl *Method) const;
-  typedef llvm::iterator_range<overridden_cxx_method_iterator>
-      overridden_method_range;
+
+  using overridden_method_range =
+      llvm::iterator_range<overridden_cxx_method_iterator>;
+
   overridden_method_range overridden_methods(const CXXMethodDecl *Method) const;
 
   /// \brief Note that the given C++ \p Method overrides the given \p
@@ -914,7 +943,8 @@
     return Import->NextLocalImport;
   }
 
-  typedef llvm::iterator_range<import_iterator> import_range;
+  using import_range = llvm::iterator_range<import_iterator>;
+
   import_range local_imports() const {
     return import_range(import_iterator(FirstLocalImport), import_iterator());
   }
@@ -931,6 +961,7 @@
   /// and should be visible whenever \p M is visible.
   void mergeDefinitionIntoModule(NamedDecl *ND, Module *M,
                                  bool NotifyListeners = true);
+
   /// \brief Clean up the merged definition list. Call this if you might have
   /// added duplicates into the list.
   void deduplicateMergedDefinitonsFor(NamedDecl *ND);
@@ -1131,6 +1162,13 @@
   /// \brief Change the result type of a function type once it is deduced.
   void adjustDeducedFunctionResultType(FunctionDecl *FD, QualType ResultType);
 
+  /// Get a function type and produce the equivalent function type with the
+  /// specified exception specification. Type sugar that can be present on a
+  /// declaration of a function with an exception specification is permitted
+  /// and preserved. Other type sugar (for instance, typedefs) is not.
+  QualType getFunctionTypeWithExceptionSpec(
+      QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI);
+
   /// \brief Determine whether two function types are the same, ignoring
   /// exception specifications in cases where they're part of the type.
   bool hasSameFunctionTypeIgnoringExceptionSpec(QualType T, QualType U);
@@ -1141,6 +1179,10 @@
                            const FunctionProtoType::ExceptionSpecInfo &ESI,
                            bool AsWritten = false);
 
+  /// Determine whether a type is a class that should be detructed in the
+  /// callee function.
+  bool isParamDestroyedInCallee(QualType T) const;
+
   /// \brief Return the uniqued reference to the type for a complex
   /// number with the specified element type.
   QualType getComplexType(QualType T) const;
@@ -1185,6 +1227,7 @@
 
   /// \brief Return a read_only pipe type for the specified type.
   QualType getReadPipeType(QualType T) const;
+
   /// \brief Return a write_only pipe type for the specified type.
   QualType getWritePipeType(QualType T) const;
 
@@ -1192,9 +1235,16 @@
   /// pointer to blocks.
   QualType getBlockDescriptorExtendedType() const;
 
+  /// Map an AST Type to an OpenCLTypeKind enum value.
+  TargetInfo::OpenCLTypeKind getOpenCLTypeKind(const Type *T) const;
+
+  /// Get address space for OpenCL type.
+  LangAS getOpenCLTypeAddrSpace(const Type *T) const;
+
   void setcudaConfigureCallDecl(FunctionDecl *FD) {
     cudaConfigureCallDecl = FD;
   }
+
   FunctionDecl *getcudaConfigureCallDecl() {
     return cudaConfigureCallDecl;
   }
@@ -1202,7 +1252,6 @@
   /// Returns true iff we need copy/dispose helpers for the given type.
   bool BlockRequiresCopying(QualType Ty, const VarDecl *D);
 
-
   /// Returns true, if given type has a known lifetime. HasByrefExtendedLayout is set
   /// to false in this case. If HasByrefExtendedLayout returns true, byref variable
   /// has extended lifetime.
@@ -1410,6 +1459,7 @@
                                 QualType Canonical = QualType()) const;
 
   bool ObjCObjectAdoptsQTypeProtocols(QualType QT, ObjCInterfaceDecl *Decl);
+
   /// QIdProtocolsAdoptObjCObjectProtocols - Checks that protocols in
   /// QT's qualified-id protocol list adopt all protocols in IDecl's list
   /// of protocols.
@@ -1440,7 +1490,7 @@
   /// \brief C++11 deduction pattern for 'auto &&' type.
   QualType getAutoRRefDeductType() const;
 
-  /// \brief C++1z deduced class template specialization type.
+  /// \brief C++17 deduced class template specialization type.
   QualType getDeducedTemplateSpecializationType(TemplateName Template,
                                                 QualType DeducedType,
                                                 bool IsDependent) const;
@@ -1902,10 +1952,17 @@
                                         const TemplateArgument &ArgPack) const;
 
   enum GetBuiltinTypeError {
-    GE_None,              ///< No error
-    GE_Missing_stdio,     ///< Missing a type from <stdio.h>
-    GE_Missing_setjmp,    ///< Missing a type from <setjmp.h>
-    GE_Missing_ucontext   ///< Missing a type from <ucontext.h>
+    /// No error
+    GE_None,
+
+    /// Missing a type from <stdio.h>
+    GE_Missing_stdio,
+
+    /// Missing a type from <setjmp.h>
+    GE_Missing_setjmp,
+
+    /// Missing a type from <ucontext.h>
+    GE_Missing_ucontext
   };
 
   /// \brief Return the type for the specified builtin.
@@ -2056,7 +2113,7 @@
   getASTObjCImplementationLayout(const ObjCImplementationDecl *D) const;
 
   /// \brief Get our current best idea for the key function of the
-  /// given record decl, or NULL if there isn't one.
+  /// given record decl, or nullptr if there isn't one.
   ///
   /// The key function is, according to the Itanium C++ ABI section 5.2.3:
   ///   ...the first non-pure virtual function that is not inline at the
@@ -2109,6 +2166,10 @@
   void CollectInheritedProtocols(const Decl *CDecl,
                           llvm::SmallPtrSet<ObjCProtocolDecl*, 8> &Protocols);
 
+  /// \brief Return true if the specified type has unique object representations
+  /// according to (C++17 [meta.unary.prop]p9)
+  bool hasUniqueObjectRepresentations(QualType Ty) const;
+
   //===--------------------------------------------------------------------===//
   //                            Type Operators
   //===--------------------------------------------------------------------===//
@@ -2140,7 +2201,6 @@
   bool hasSameType(QualType T1, QualType T2) const {
     return getCanonicalType(T1) == getCanonicalType(T2);
   }
-
   bool hasSameType(const Type *T1, const Type *T2) const {
     return getCanonicalType(T1) == getCanonicalType(T2);
   }
@@ -2392,12 +2452,15 @@
   bool isObjCIdType(QualType T) const {
     return T == getObjCIdType();
   }
+
   bool isObjCClassType(QualType T) const {
     return T == getObjCClassType();
   }
+
   bool isObjCSelType(QualType T) const {
     return T == getObjCSelType();
   }
+
   bool ObjCQualifiedIdTypesAreCompatible(QualType LHS, QualType RHS,
                                          bool ForCompare);
 
@@ -2490,12 +2553,13 @@
 
   bool isSentinelNullExpr(const Expr *E);
 
-  /// \brief Get the implementation of the ObjCInterfaceDecl \p D, or NULL if
+  /// \brief Get the implementation of the ObjCInterfaceDecl \p D, or nullptr if
   /// none exists.
   ObjCImplementationDecl *getObjCImplementation(ObjCInterfaceDecl *D);
-  /// \brief Get the implementation of the ObjCCategoryDecl \p D, or NULL if
+
+  /// \brief Get the implementation of the ObjCCategoryDecl \p D, or nullptr if
   /// none exists.
-  ObjCCategoryImplDecl   *getObjCImplementation(ObjCCategoryDecl *D);
+  ObjCCategoryImplDecl *getObjCImplementation(ObjCCategoryDecl *D);
 
   /// \brief Return true if there is at least one \@implementation in the TU.
   bool AnyObjCImplementation() {
@@ -2505,6 +2569,7 @@
   /// \brief Set the implementation of ObjCInterfaceDecl.
   void setObjCImplementation(ObjCInterfaceDecl *IFaceD,
                              ObjCImplementationDecl *ImplD);
+
   /// \brief Set the implementation of ObjCCategoryDecl.
   void setObjCImplementation(ObjCCategoryDecl *CatD,
                              ObjCCategoryImplDecl *ImplD);
@@ -2524,8 +2589,9 @@
 
   /// \brief Set the copy inialization expression of a block var decl.
   void setBlockVarCopyInits(VarDecl*VD, Expr* Init);
+
   /// \brief Get the copy initialization expression of the VarDecl \p VD, or
-  /// NULL if none exists.
+  /// nullptr if none exists.
   Expr *getBlockVarCopyInits(const VarDecl* VD);
 
   /// \brief Allocate an uninitialized TypeSourceInfo.
@@ -2578,6 +2644,12 @@
   /// it is not used.
   bool DeclMustBeEmitted(const Decl *D);
 
+  /// \brief Visits all versions of a multiversioned function with the passed
+  /// predicate.
+  void forEachMultiversionedFunctionVersion(
+      const FunctionDecl *FD,
+      llvm::function_ref<void(const FunctionDecl *)> Pred) const;
+
   const CXXConstructorDecl *
   getCopyConstructorForExceptionObject(CXXRecordDecl *RD);
 
@@ -2694,6 +2766,7 @@
                                        const FieldDecl *Field,
                                        bool includeVBases = true,
                                        QualType *NotEncodedT=nullptr) const;
+
 public:
   // Adds the encoding of a method parameter or return type.
   void getObjCEncodingForMethodParameter(Decl::ObjCDeclQualifier QT,
@@ -2705,11 +2778,19 @@
   bool isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const;
 
   enum class InlineVariableDefinitionKind {
-    None,        ///< Not an inline variable.
-    Weak,        ///< Weak definition of inline variable.
-    WeakUnknown, ///< Weak for now, might become strong later in this TU.
-    Strong       ///< Strong definition.
+    /// Not an inline variable.
+    None,
+
+    /// Weak definition of inline variable.
+    Weak,
+
+    /// Weak for now, might become strong later in this TU.
+    WeakUnknown,
+
+    /// Strong definition.
+    Strong
   };
+
   /// \brief Determine whether a definition of this inline variable should
   /// be treated as a weak or strong definition. For compatibility with
   /// C++14 and before, for a constexpr static data member, if there is an
@@ -2719,6 +2800,9 @@
   getInlineVariableDefinitionKind(const VarDecl *VD) const;
 
 private:
+  friend class DeclarationNameTable;
+  friend class DeclContext;
+
   const ASTRecordLayout &
   getObjCLayout(const ObjCInterfaceDecl *D,
                 const ObjCImplementationDecl *Impl) const;
@@ -2731,26 +2815,23 @@
   // into the datastructures which avoids this mess during deallocation but is
   // wasteful of memory, and here we require a lot of error prone book keeping
   // in order to track and run destructors while we're tearing things down.
-  typedef llvm::SmallVector<std::pair<void (*)(void *), void *>, 16>
-      DeallocationFunctionsAndArguments;
+  using DeallocationFunctionsAndArguments =
+      llvm::SmallVector<std::pair<void (*)(void *), void *>, 16>;
   DeallocationFunctionsAndArguments Deallocations;
 
   // FIXME: This currently contains the set of StoredDeclMaps used
   // by DeclContext objects.  This probably should not be in ASTContext,
   // but we include it here so that ASTContext can quickly deallocate them.
-  llvm::PointerIntPair<StoredDeclsMap*,1> LastSDM;
-
-  friend class DeclContext;
-  friend class DeclarationNameTable;
-
-  void ReleaseDeclContextMaps();
-  void ReleaseParentMapEntries();
+  llvm::PointerIntPair<StoredDeclsMap *, 1> LastSDM;
 
   std::unique_ptr<ParentMapPointers> PointerParents;
   std::unique_ptr<ParentMapOtherNodes> OtherParents;
 
   std::unique_ptr<VTableContextBase> VTContext;
 
+  void ReleaseDeclContextMaps();
+  void ReleaseParentMapEntries();
+
 public:
   enum PragmaSectionFlag : unsigned {
     PSF_None = 0,
@@ -2770,27 +2851,26 @@
     SectionInfo(DeclaratorDecl *Decl,
                 SourceLocation PragmaSectionLocation,
                 int SectionFlags)
-      : Decl(Decl),
-        PragmaSectionLocation(PragmaSectionLocation),
-        SectionFlags(SectionFlags) {}
+        : Decl(Decl), PragmaSectionLocation(PragmaSectionLocation),
+          SectionFlags(SectionFlags) {}
   };
 
   llvm::StringMap<SectionInfo> SectionInfos;
 };
 
 /// \brief Utility function for constructing a nullary selector.
-static inline Selector GetNullarySelector(StringRef name, ASTContext& Ctx) {
+inline Selector GetNullarySelector(StringRef name, ASTContext &Ctx) {
   IdentifierInfo* II = &Ctx.Idents.get(name);
   return Ctx.Selectors.getSelector(0, &II);
 }
 
 /// \brief Utility function for constructing an unary selector.
-static inline Selector GetUnarySelector(StringRef name, ASTContext& Ctx) {
+inline Selector GetUnarySelector(StringRef name, ASTContext &Ctx) {
   IdentifierInfo* II = &Ctx.Idents.get(name);
   return Ctx.Selectors.getSelector(1, &II);
 }
 
-}  // end namespace clang
+} // namespace clang
 
 // operator new and delete aren't allowed inside namespaces.
 
@@ -2821,11 +2901,12 @@
 /// @param C The ASTContext that provides the allocator.
 /// @param Alignment The alignment of the allocated memory (if the underlying
 ///                  allocator supports it).
-/// @return The allocated memory. Could be NULL.
+/// @return The allocated memory. Could be nullptr.
 inline void *operator new(size_t Bytes, const clang::ASTContext &C,
                           size_t Alignment) {
   return C.Allocate(Bytes, Alignment);
 }
+
 /// @brief Placement delete companion to the new above.
 ///
 /// This operator is just a companion to the new above. There is no way of
@@ -2858,7 +2939,7 @@
 /// @param C The ASTContext that provides the allocator.
 /// @param Alignment The alignment of the allocated memory (if the underlying
 ///                  allocator supports it).
-/// @return The allocated memory. Could be NULL.
+/// @return The allocated memory. Could be nullptr.
 inline void *operator new[](size_t Bytes, const clang::ASTContext& C,
                             size_t Alignment = 8) {
   return C.Allocate(Bytes, Alignment);
diff --git a/include/clang/AST/ASTMutationListener.h b/include/clang/AST/ASTMutationListener.h
index ed82b32..9395d36 100644
--- a/include/clang/AST/ASTMutationListener.h
+++ b/include/clang/AST/ASTMutationListener.h
@@ -36,6 +36,7 @@
   class QualType;
   class RecordDecl;
   class TagDecl;
+  class ValueDecl;
   class VarDecl;
   class VarTemplateDecl;
   class VarTemplateSpecializationDecl;
@@ -87,8 +88,13 @@
   /// \brief An implicit member got a definition.
   virtual void CompletedImplicitDefinition(const FunctionDecl *D) {}
 
-  /// \brief A static data member was implicitly instantiated.
-  virtual void StaticDataMemberInstantiated(const VarDecl *D) {}
+  /// \brief The instantiation of a templated function or variable was
+  /// requested. In particular, the point of instantiation and template
+  /// specialization kind of \p D may have changed.
+  virtual void InstantiationRequested(const ValueDecl *D) {}
+
+  /// \brief A templated variable's definition was implicitly instantiated.
+  virtual void VariableDefinitionInstantiated(const VarDecl *D) {}
 
   /// \brief A function template's definition was instantiated.
   virtual void FunctionDefinitionInstantiated(const FunctionDecl *D) {}
diff --git a/include/clang/AST/ASTUnresolvedSet.h b/include/clang/AST/ASTUnresolvedSet.h
index 9078a0e..3693aec 100644
--- a/include/clang/AST/ASTUnresolvedSet.h
+++ b/include/clang/AST/ASTUnresolvedSet.h
@@ -1,4 +1,4 @@
-//===-- ASTUnresolvedSet.h - Unresolved sets of declarations  ---*- C++ -*-===//
+//===- ASTUnresolvedSet.h - Unresolved sets of declarations -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,14 +16,22 @@
 #define LLVM_CLANG_AST_ASTUNRESOLVEDSET_H
 
 #include "clang/AST/ASTVector.h"
+#include "clang/AST/DeclAccessPair.h"
 #include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/Specifiers.h"
+#include <cassert>
+#include <cstdint>
 
 namespace clang {
 
+class NamedDecl;
+
 /// \brief An UnresolvedSet-like class which uses the ASTContext's allocator.
 class ASTUnresolvedSet {
+  friend class LazyASTUnresolvedSet;
+
   struct DeclsTy : ASTVector<DeclAccessPair> {
-    DeclsTy() {}
+    DeclsTy() = default;
     DeclsTy(ASTContext &C, unsigned N) : ASTVector<DeclAccessPair>(C, N) {}
 
     bool isLazy() const { return getTag(); }
@@ -32,14 +40,12 @@
 
   DeclsTy Decls;
 
-  friend class LazyASTUnresolvedSet;
-
 public:
-  ASTUnresolvedSet() {}
+  ASTUnresolvedSet() = default;
   ASTUnresolvedSet(ASTContext &C, unsigned N) : Decls(C, N) {}
 
-  typedef UnresolvedSetIterator iterator;
-  typedef UnresolvedSetIterator const_iterator;
+  using iterator = UnresolvedSetIterator;
+  using const_iterator = UnresolvedSetIterator;
 
   iterator begin() { return iterator(Decls.begin()); }
   iterator end() { return iterator(Decls.end()); }
@@ -98,13 +104,14 @@
   }
 
   void reserve(ASTContext &C, unsigned N) { Impl.reserve(C, N); }
+
   void addLazyDecl(ASTContext &C, uintptr_t ID, AccessSpecifier AS) {
     assert(Impl.empty() || Impl.Decls.isLazy());
     Impl.Decls.setLazy(true);
-    Impl.addDecl(C, reinterpret_cast<NamedDecl*>(ID << 2), AS);
+    Impl.addDecl(C, reinterpret_cast<NamedDecl *>(ID << 2), AS);
   }
 };
 
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_ASTUNRESOLVEDSET_H
diff --git a/include/clang/AST/ASTVector.h b/include/clang/AST/ASTVector.h
index 717a9e9..80cd6b7 100644
--- a/include/clang/AST/ASTVector.h
+++ b/include/clang/AST/ASTVector.h
@@ -1,4 +1,4 @@
-//===- ASTVector.h - Vector that uses ASTContext for allocation  --*- C++ -*-=//
+//===- ASTVector.h - Vector that uses ASTContext for allocation ---*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,22 +18,26 @@
 #ifndef LLVM_CLANG_AST_ASTVECTOR_H
 #define LLVM_CLANG_AST_ASTVECTOR_H
 
-#include "clang/AST/AttrIterator.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Support/type_traits.h"
 #include <algorithm>
+#include <cassert>
 #include <cstddef>
 #include <cstring>
+#include <iterator>
 #include <memory>
+#include <type_traits>
+#include <utility>
 
 namespace clang {
-  class ASTContext;
+
+class ASTContext;
 
 template<typename T>
 class ASTVector {
 private:
-  T *Begin, *End;
-  llvm::PointerIntPair<T*, 1, bool> Capacity;
+  T *Begin = nullptr;
+  T *End = nullptr;
+  llvm::PointerIntPair<T *, 1, bool> Capacity;
 
   void setEnd(T *P) { this->End = P; }
 
@@ -45,7 +49,7 @@
 
 public:
   // Default ctor - Initialize to empty.
-  ASTVector() : Begin(nullptr), End(nullptr), Capacity(nullptr, false) {}
+  ASTVector() : Capacity(nullptr, false) {}
 
   ASTVector(ASTVector &&O) : Begin(O.Begin), End(O.End), Capacity(O.Capacity) {
     O.Begin = O.End = nullptr;
@@ -53,14 +57,15 @@
     O.Capacity.setInt(false);
   }
 
-  ASTVector(const ASTContext &C, unsigned N)
-      : Begin(nullptr), End(nullptr), Capacity(nullptr, false) {
+  ASTVector(const ASTContext &C, unsigned N) : Capacity(nullptr, false) {
     reserve(C, N);
   }
 
   ASTVector &operator=(ASTVector &&RHS) {
     ASTVector O(std::move(RHS));
+
     using std::swap;
+
     swap(Begin, O.Begin);
     swap(End, O.End);
     swap(Capacity, O.Capacity);
@@ -74,19 +79,19 @@
     }
   }
 
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef T value_type;
-  typedef T* iterator;
-  typedef const T* const_iterator;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using value_type = T;
+  using iterator = T *;
+  using const_iterator = const T *;
 
-  typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>  reverse_iterator;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
 
-  typedef T& reference;
-  typedef const T& const_reference;
-  typedef T* pointer;
-  typedef const T* const_pointer;
+  using reference = T &;
+  using const_reference = const T &;
+  using pointer = T *;
+  using const_pointer = const T *;
 
   // forward iterator creation methods.
   iterator begin() { return Begin; }
@@ -175,7 +180,6 @@
   size_t capacity() const { return this->capacity_ptr() - Begin; }
 
   /// append - Add the specified range to the end of the SmallVector.
-  ///
   template<typename in_iter>
   void append(const ASTContext &C, in_iter in_start, in_iter in_end) {
     size_type NumInputs = std::distance(in_start, in_end);
@@ -195,7 +199,6 @@
   }
 
   /// append - Add the specified range to the end of the SmallVector.
-  ///
   void append(const ASTContext &C, size_type NumInputs, const T &Elt) {
     // Grow allocated space if needed.
     if (NumInputs > size_type(this->capacity_ptr()-this->end()))
@@ -368,6 +371,7 @@
   const_iterator capacity_ptr() const {
     return (iterator) Capacity.getPointer();
   }
+
   iterator capacity_ptr() { return (iterator)Capacity.getPointer(); }
 };
 
@@ -401,5 +405,6 @@
   Capacity.setPointer(Begin+NewCapacity);
 }
 
-} // end: clang namespace
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_AST_ASTVECTOR_H
diff --git a/include/clang/AST/Attr.h b/include/clang/AST/Attr.h
index bbe320c..38dce44 100644
--- a/include/clang/AST/Attr.h
+++ b/include/clang/AST/Attr.h
@@ -52,8 +52,10 @@
   unsigned Inherited : 1;
   unsigned IsPackExpansion : 1;
   unsigned Implicit : 1;
+  // FIXME: These are properties of the attribute kind, not state for this
+  // instance of the attribute.
   unsigned IsLateParsed : 1;
-  unsigned DuplicatesAllowed : 1;
+  unsigned InheritEvenIfAlreadyPresent : 1;
 
   void *operator new(size_t bytes) noexcept {
     llvm_unreachable("Attrs cannot be allocated with regular 'new'.");
@@ -74,10 +76,10 @@
 
 protected:
   Attr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex,
-       bool IsLateParsed, bool DuplicatesAllowed)
+       bool IsLateParsed)
     : Range(R), AttrKind(AK), SpellingListIndex(SpellingListIndex),
       Inherited(false), IsPackExpansion(false), Implicit(false),
-      IsLateParsed(IsLateParsed), DuplicatesAllowed(DuplicatesAllowed) {}
+      IsLateParsed(IsLateParsed), InheritEvenIfAlreadyPresent(false) {}
 
 public:
 
@@ -109,18 +111,13 @@
 
   // Pretty print this attribute.
   void printPretty(raw_ostream &OS, const PrintingPolicy &Policy) const;
-
-  /// \brief By default, attributes cannot be duplicated when being merged;
-  /// however, an attribute can override this. Returns true if the attribute
-  /// can be duplicated when merging.
-  bool duplicatesAllowed() const { return DuplicatesAllowed; }
 };
 
 class StmtAttr : public Attr {
 protected:
   StmtAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex,
-                  bool IsLateParsed, bool DuplicatesAllowed)
-      : Attr(AK, R, SpellingListIndex, IsLateParsed, DuplicatesAllowed) {}
+                  bool IsLateParsed)
+      : Attr(AK, R, SpellingListIndex, IsLateParsed) {}
 
 public:
   static bool classof(const Attr *A) {
@@ -132,12 +129,20 @@
 class InheritableAttr : public Attr {
 protected:
   InheritableAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex,
-                  bool IsLateParsed, bool DuplicatesAllowed)
-      : Attr(AK, R, SpellingListIndex, IsLateParsed, DuplicatesAllowed) {}
+                  bool IsLateParsed, bool InheritEvenIfAlreadyPresent)
+      : Attr(AK, R, SpellingListIndex, IsLateParsed) {
+    this->InheritEvenIfAlreadyPresent = InheritEvenIfAlreadyPresent;
+  }
 
 public:
   void setInherited(bool I) { Inherited = I; }
 
+  /// Should this attribute be inherited from a prior declaration even if it's
+  /// explicitly provided in the current declaration?
+  bool shouldInheritEvenIfAlreadyPresent() const {
+    return InheritEvenIfAlreadyPresent;
+  }
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Attr *A) {
     return A->getKind() >= attr::FirstInheritableAttr &&
@@ -148,9 +153,9 @@
 class InheritableParamAttr : public InheritableAttr {
 protected:
   InheritableParamAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex,
-                       bool IsLateParsed, bool DuplicatesAllowed)
+                       bool IsLateParsed, bool InheritEvenIfAlreadyPresent)
       : InheritableAttr(AK, R, SpellingListIndex, IsLateParsed,
-                        DuplicatesAllowed) {}
+                        InheritEvenIfAlreadyPresent) {}
 
 public:
   // Implement isa/cast/dyncast/etc.
@@ -166,9 +171,9 @@
 protected:
   ParameterABIAttr(attr::Kind AK, SourceRange R,
                    unsigned SpellingListIndex, bool IsLateParsed,
-                   bool DuplicatesAllowed)
+                   bool InheritEvenIfAlreadyPresent)
     : InheritableParamAttr(AK, R, SpellingListIndex, IsLateParsed,
-                           DuplicatesAllowed) {}
+                           InheritEvenIfAlreadyPresent) {}
 
 public:
   ParameterABI getABI() const {
@@ -190,6 +195,120 @@
    }
 };
 
+/// A single parameter index whose accessors require each use to make explicit
+/// the parameter index encoding needed.
+class ParamIdx {
+  // Idx is exposed only via accessors that specify specific encodings.
+  unsigned Idx : 30;
+  unsigned HasThis : 1;
+  unsigned IsValid : 1;
+
+  void assertComparable(const ParamIdx &I) const {
+    assert(isValid() && I.isValid() &&
+           "ParamIdx must be valid to be compared");
+    // It's possible to compare indices from separate functions, but so far
+    // it's not proven useful.  Moreover, it might be confusing because a
+    // comparison on the results of getASTIndex might be inconsistent with a
+    // comparison on the ParamIdx objects themselves.
+    assert(HasThis == I.HasThis &&
+           "ParamIdx must be for the same function to be compared");
+  }
+
+public:
+  /// Construct an invalid parameter index (\c isValid returns false and
+  /// accessors fail an assert).
+  ParamIdx() : Idx(0), HasThis(false), IsValid(false) {}
+
+  /// \param Idx is the parameter index as it is normally specified in
+  /// attributes in the source: one-origin including any C++ implicit this
+  /// parameter.
+  ///
+  /// \param D is the declaration containing the parameters.  It is used to
+  /// determine if there is a C++ implicit this parameter.
+  ParamIdx(unsigned Idx, const Decl *D)
+      : Idx(Idx), HasThis(false), IsValid(true) {
+    if (const auto *FD = dyn_cast<FunctionDecl>(D))
+      HasThis = FD->isCXXInstanceMember();
+  }
+
+  /// \param Idx is the parameter index as it is normally specified in
+  /// attributes in the source: one-origin including any C++ implicit this
+  /// parameter.
+  ///
+  /// \param HasThis specifies whether the function has a C++ implicit this
+  /// parameter.
+  ParamIdx(unsigned Idx, bool HasThis)
+      : Idx(Idx), HasThis(HasThis), IsValid(true) {}
+
+  /// Is this parameter index valid?
+  bool isValid() const { return IsValid; }
+
+  /// Is there a C++ implicit this parameter?
+  bool hasThis() const {
+    assert(isValid() && "ParamIdx must be valid");
+    return HasThis;
+  }
+
+  /// Get the parameter index as it would normally be encoded for attributes at
+  /// the source level of representation: one-origin including any C++ implicit
+  /// this parameter.
+  ///
+  /// This encoding thus makes sense for diagnostics, pretty printing, and
+  /// constructing new attributes from a source-like specification.
+  unsigned getSourceIndex() const {
+    assert(isValid() && "ParamIdx must be valid");
+    return Idx;
+  }
+
+  /// Get the parameter index as it would normally be encoded at the AST level
+  /// of representation: zero-origin not including any C++ implicit this
+  /// parameter.
+  ///
+  /// This is the encoding primarily used in Sema.  However, in diagnostics,
+  /// Sema uses \c getSourceIndex instead.
+  unsigned getASTIndex() const {
+    assert(isValid() && "ParamIdx must be valid");
+    assert(Idx >= 1 + HasThis &&
+           "stored index must be base-1 and not specify C++ implicit this");
+    return Idx - 1 - HasThis;
+  }
+
+  /// Get the parameter index as it would normally be encoded at the LLVM level
+  /// of representation: zero-origin including any C++ implicit this parameter.
+  ///
+  /// This is the encoding primarily used in CodeGen.
+  unsigned getLLVMIndex() const {
+    assert(isValid() && "ParamIdx must be valid");
+    assert(Idx >= 1 && "stored index must be base-1");
+    return Idx - 1;
+  }
+
+  bool operator==(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx == I.Idx;
+  }
+  bool operator!=(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx != I.Idx;
+  }
+  bool operator<(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx < I.Idx;
+  }
+  bool operator>(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx > I.Idx;
+  }
+  bool operator<=(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx <= I.Idx;
+  }
+  bool operator>=(const ParamIdx &I) const {
+    assertComparable(I);
+    return Idx >= I.Idx;
+  }
+};
+
 #include "clang/AST/Attrs.inc"
 
 inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
diff --git a/include/clang/AST/AttrIterator.h b/include/clang/AST/AttrIterator.h
index fb9b049..56807b4 100644
--- a/include/clang/AST/AttrIterator.h
+++ b/include/clang/AST/AttrIterator.h
@@ -1,4 +1,4 @@
-//===--- AttrIterator.h - Classes for attribute iteration -------*- C++ -*-===//
+//===- AttrIterator.h - Classes for attribute iteration ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,23 @@
 #define LLVM_CLANG_AST_ATTRITERATOR_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstddef>
 #include <iterator>
 
 namespace clang {
-  class ASTContext;
-  class Attr;
-}
+
+class ASTContext;
+class Attr;
+
+} // namespace clang
 
 // Defined in ASTContext.h
 void *operator new(size_t Bytes, const clang::ASTContext &C,
                    size_t Alignment = 8);
+
 // FIXME: Being forced to not have a default argument here due to redeclaration
 //        rules on default arguments sucks
 void *operator new[](size_t Bytes, const clang::ASTContext &C,
@@ -39,13 +46,13 @@
 namespace clang {
 
 /// AttrVec - A vector of Attr, which is how they are stored on the AST.
-typedef SmallVector<Attr *, 4> AttrVec;
+using AttrVec = SmallVector<Attr *, 4>;
 
 /// specific_attr_iterator - Iterates over a subrange of an AttrVec, only
 /// providing attributes that are of a specific type.
 template <typename SpecificAttr, typename Container = AttrVec>
 class specific_attr_iterator {
-  typedef typename Container::const_iterator Iterator;
+  using Iterator = typename Container::const_iterator;
 
   /// Current - The current, underlying iterator.
   /// In order to ensure we don't dereference an invalid iterator unless
@@ -67,14 +74,14 @@
   }
 
 public:
-  typedef SpecificAttr*             value_type;
-  typedef SpecificAttr*             reference;
-  typedef SpecificAttr*             pointer;
-  typedef std::forward_iterator_tag iterator_category;
-  typedef std::ptrdiff_t            difference_type;
+  using value_type = SpecificAttr *;
+  using reference = SpecificAttr *;
+  using pointer = SpecificAttr *;
+  using iterator_category = std::forward_iterator_tag;
+  using difference_type = std::ptrdiff_t;
 
-  specific_attr_iterator() : Current() { }
-  explicit specific_attr_iterator(Iterator i) : Current(i) { }
+  specific_attr_iterator() = default;
+  explicit specific_attr_iterator(Iterator i) : Current(i) {}
 
   reference operator*() const {
     AdvanceToNext();
@@ -136,6 +143,6 @@
     return nullptr;
 }
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_ATTRITERATOR_H
diff --git a/include/clang/AST/BaseSubobject.h b/include/clang/AST/BaseSubobject.h
index 66af023..fdb7e71 100644
--- a/include/clang/AST/BaseSubobject.h
+++ b/include/clang/AST/BaseSubobject.h
@@ -1,4 +1,4 @@
-//===--- BaseSubobject.h - BaseSubobject class ----------------------------===//
+//===- BaseSubobject.h - BaseSubobject class --------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,12 +15,15 @@
 #define LLVM_CLANG_AST_BASESUBOBJECT_H
 
 #include "clang/AST/CharUnits.h"
-#include "clang/AST/DeclCXX.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/type_traits.h"
+#include <cstdint>
+#include <utility>
 
 namespace clang {
+
+class CXXRecordDecl;
+
 // BaseSubobject - Uniquely identifies a direct or indirect base class. 
 // Stores both the base class decl and the offset from the most derived class to
 // the base class. Used for vtable and VTT generation.
@@ -32,9 +35,9 @@
   CharUnits BaseOffset;
   
 public:
-  BaseSubobject() { }
+  BaseSubobject() = default;
   BaseSubobject(const CXXRecordDecl *Base, CharUnits BaseOffset)
-    : Base(Base), BaseOffset(BaseOffset) { }
+      : Base(Base), BaseOffset(BaseOffset) {}
   
   /// getBase - Returns the base class declaration.
   const CXXRecordDecl *getBase() const { return Base; }
@@ -47,7 +50,7 @@
  }
 };
 
-} // end namespace clang
+} // namespace clang
 
 namespace llvm {
 
@@ -65,7 +68,8 @@
   }
 
   static unsigned getHashValue(const clang::BaseSubobject &Base) {
-    typedef std::pair<const clang::CXXRecordDecl *, clang::CharUnits> PairTy;
+    using PairTy = std::pair<const clang::CXXRecordDecl *, clang::CharUnits>;
+
     return DenseMapInfo<PairTy>::getHashValue(PairTy(Base.getBase(),
                                                      Base.getBaseOffset()));
   }
@@ -81,6 +85,6 @@
   static const bool value = true;
 };
 
-}
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_AST_BASESUBOBJECT_H
diff --git a/include/clang/AST/CXXInheritance.h b/include/clang/AST/CXXInheritance.h
index 9806085..11fb229 100644
--- a/include/clang/AST/CXXInheritance.h
+++ b/include/clang/AST/CXXInheritance.h
@@ -1,4 +1,4 @@
-//===------ CXXInheritance.h - C++ Inheritance ------------------*- C++ -*-===//
+//===- CXXInheritance.h - C++ Inheritance -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,19 +16,23 @@
 
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeOrdering.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include <cassert>
+#include "llvm/ADT/iterator_range.h"
 #include <list>
+#include <memory>
+#include <utility>
 
 namespace clang {
-  
-class CXXBaseSpecifier;
-class CXXMethodDecl;
-class CXXRecordDecl;
+
+class ASTContext;
 class NamedDecl;
   
 /// \brief Represents an element in a path from a derived class to a
@@ -66,12 +70,12 @@
 /// subobject is being used.
 class CXXBasePath : public SmallVector<CXXBasePathElement, 4> {
 public:
-  CXXBasePath() : Access(AS_public) {}
-
   /// \brief The access along this inheritance path.  This is only
   /// calculated when recording paths.  AS_none is a special value
   /// used to indicate a path which permits no legal access.
-  AccessSpecifier Access;
+  AccessSpecifier Access = AS_public;
+
+  CXXBasePath() = default;
 
   /// \brief The set of declarations found inside this base class
   /// subobject.
@@ -113,8 +117,10 @@
 /// refer to the same base class subobject of type A (the virtual
 /// one), there is no ambiguity.
 class CXXBasePaths {
+  friend class CXXRecordDecl;
+
   /// \brief The type from which this search originated.
-  CXXRecordDecl *Origin;
+  CXXRecordDecl *Origin = nullptr;
   
   /// Paths - The actual set of paths that can be taken from the
   /// derived class to the same base class.
@@ -152,15 +158,13 @@
   CXXBasePath ScratchPath;
 
   /// DetectedVirtual - The base class that is virtual.
-  const RecordType *DetectedVirtual;
+  const RecordType *DetectedVirtual = nullptr;
   
   /// \brief Array of the declarations that have been found. This
   /// array is constructed only if needed, e.g., to iterate over the
   /// results within LookupResult.
   std::unique_ptr<NamedDecl *[]> DeclsFound;
-  unsigned NumDeclsFound;
-  
-  friend class CXXRecordDecl;
+  unsigned NumDeclsFound = 0;
   
   void ComputeDeclsFound();
 
@@ -169,17 +173,16 @@
                      bool LookupInDependent = false);
 
 public:
-  typedef std::list<CXXBasePath>::iterator paths_iterator;
-  typedef std::list<CXXBasePath>::const_iterator const_paths_iterator;
-  typedef NamedDecl **decl_iterator;
+  using paths_iterator = std::list<CXXBasePath>::iterator;
+  using const_paths_iterator = std::list<CXXBasePath>::const_iterator;
+  using decl_iterator = NamedDecl **;
   
   /// BasePaths - Construct a new BasePaths structure to record the
   /// paths for a derived-to-base search.
   explicit CXXBasePaths(bool FindAmbiguities = true, bool RecordPaths = true,
                         bool DetectVirtual = true)
-      : Origin(), FindAmbiguities(FindAmbiguities), RecordPaths(RecordPaths),
-        DetectVirtual(DetectVirtual), DetectedVirtual(nullptr),
-        NumDeclsFound(0) {}
+      : FindAmbiguities(FindAmbiguities), RecordPaths(RecordPaths),
+        DetectVirtual(DetectVirtual) {}
 
   paths_iterator begin() { return Paths.begin(); }
   paths_iterator end()   { return Paths.end(); }
@@ -189,7 +192,8 @@
   CXXBasePath&       front()       { return Paths.front(); }
   const CXXBasePath& front() const { return Paths.front(); }
   
-  typedef llvm::iterator_range<decl_iterator> decl_range;
+  using decl_range = llvm::iterator_range<decl_iterator>;
+
   decl_range found_decls();
   
   /// \brief Determine whether the path from the most-derived type to the
@@ -231,25 +235,24 @@
 /// \brief Uniquely identifies a virtual method within a class
 /// hierarchy by the method itself and a class subobject number.
 struct UniqueVirtualMethod {
-  UniqueVirtualMethod()
-    : Method(nullptr), Subobject(0), InVirtualSubobject(nullptr) { }
-
-  UniqueVirtualMethod(CXXMethodDecl *Method, unsigned Subobject,
-                      const CXXRecordDecl *InVirtualSubobject)
-    : Method(Method), Subobject(Subobject), 
-      InVirtualSubobject(InVirtualSubobject) { }
-
   /// \brief The overriding virtual method.
-  CXXMethodDecl *Method;
+  CXXMethodDecl *Method = nullptr;
 
   /// \brief The subobject in which the overriding virtual method
   /// resides.
-  unsigned Subobject;
+  unsigned Subobject = 0;
 
   /// \brief The virtual base class subobject of which this overridden
   /// virtual method is a part. Note that this records the closest
   /// derived virtual base class subobject.
-  const CXXRecordDecl *InVirtualSubobject;
+  const CXXRecordDecl *InVirtualSubobject = nullptr;
+
+  UniqueVirtualMethod() = default;
+
+  UniqueVirtualMethod(CXXMethodDecl *Method, unsigned Subobject,
+                      const CXXRecordDecl *InVirtualSubobject)
+      : Method(Method), Subobject(Subobject),
+        InVirtualSubobject(InVirtualSubobject) {}
 
   friend bool operator==(const UniqueVirtualMethod &X,
                          const UniqueVirtualMethod &Y) {
@@ -271,14 +274,16 @@
 /// pair is the virtual method that overrides it (including the
 /// subobject in which that virtual function occurs).
 class OverridingMethods {
-  typedef SmallVector<UniqueVirtualMethod, 4> ValuesT;
-  typedef llvm::MapVector<unsigned, ValuesT> MapType;
+  using ValuesT = SmallVector<UniqueVirtualMethod, 4>;
+  using MapType = llvm::MapVector<unsigned, ValuesT>;
+
   MapType Overrides;
 
 public:
   // Iterate over the set of subobjects that have overriding methods.
-  typedef MapType::iterator iterator;
-  typedef MapType::const_iterator const_iterator;
+  using iterator = MapType::iterator;
+  using const_iterator = MapType::const_iterator;
+
   iterator begin() { return Overrides.begin(); }
   const_iterator begin() const { return Overrides.begin(); }
   iterator end() { return Overrides.end(); }
@@ -287,10 +292,10 @@
 
   // Iterate over the set of overriding virtual methods in a given
   // subobject.
-  typedef SmallVectorImpl<UniqueVirtualMethod>::iterator
-    overriding_iterator;
-  typedef SmallVectorImpl<UniqueVirtualMethod>::const_iterator
-    overriding_const_iterator;
+  using overriding_iterator =
+      SmallVectorImpl<UniqueVirtualMethod>::iterator;
+  using overriding_const_iterator =
+      SmallVectorImpl<UniqueVirtualMethod>::const_iterator;
 
   // Add a new overriding method for a particular subobject.
   void add(unsigned OverriddenSubobject, UniqueVirtualMethod Overriding);
@@ -357,12 +362,12 @@
 /// subobject numbers greater than 0 refer to non-virtual base class
 /// subobjects of that type.
 class CXXFinalOverriderMap
-  : public llvm::MapVector<const CXXMethodDecl *, OverridingMethods> { };
+  : public llvm::MapVector<const CXXMethodDecl *, OverridingMethods> {};
 
 /// \brief A set of all the primary bases for a class.
 class CXXIndirectPrimaryBaseSet
-  : public llvm::SmallSet<const CXXRecordDecl*, 32> { };
+  : public llvm::SmallSet<const CXXRecordDecl*, 32> {};
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_CXXINHERITANCE_H
diff --git a/include/clang/AST/CanonicalType.h b/include/clang/AST/CanonicalType.h
index 023456e..6487613 100644
--- a/include/clang/AST/CanonicalType.h
+++ b/include/clang/AST/CanonicalType.h
@@ -1,4 +1,4 @@
-//===-- CanonicalType.h - C Language Family Type Representation -*- C++ -*-===//
+//===- CanonicalType.h - C Language Family Type Representation --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,13 +16,29 @@
 #define LLVM_CLANG_AST_CANONICALTYPE_H
 
 #include "clang/AST/Type.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
+#include <iterator>
+#include <type_traits>
 
 namespace clang {
 
 template<typename T> class CanProxy;
 template<typename T> struct CanProxyAdaptor;
+class CXXRecordDecl;
+class EnumDecl;
+class Expr;
+class IdentifierInfo;
+class ObjCInterfaceDecl;
+class RecordDecl;
+class TagDecl;
+class TemplateTypeParmDecl;
 
 //----------------------------------------------------------------------------//
 // Canonical, qualified type template
@@ -46,8 +62,6 @@
 /// converted to @c CanQual<ReferenceType>. Note that any @c CanQual type can
 /// be implicitly converted to a QualType, but the reverse operation requires
 /// a call to ASTContext::getCanonicalType().
-///
-///
 template<typename T = Type>
 class CanQual {
   /// \brief The actual, canonical type.
@@ -55,7 +69,7 @@
 
 public:
   /// \brief Constructs a NULL canonical type.
-  CanQual() : Stored() { }
+  CanQual() = default;
 
   /// \brief Converting constructor that permits implicit upcasting of
   /// canonical type pointers.
@@ -66,12 +80,11 @@
   /// \brief Retrieve the underlying type pointer, which refers to a
   /// canonical type.
   ///
-  /// The underlying pointer must not be NULL.
+  /// The underlying pointer must not be nullptr.
   const T *getTypePtr() const { return cast<T>(Stored.getTypePtr()); }
 
   /// \brief Retrieve the underlying type pointer, which refers to a
-  /// canonical type, or NULL.
-  ///
+  /// canonical type, or nullptr.
   const T *getTypePtrOrNull() const { 
     return cast_or_null<T>(Stored.getTypePtrOrNull()); 
   }
@@ -125,9 +138,11 @@
   bool isConstQualified() const {
     return Stored.isLocalConstQualified();
   }
+
   bool isVolatileQualified() const {
     return Stored.isLocalVolatileQualified();
   }
+
   bool isRestrictQualified() const {
     return Stored.isLocalRestrictQualified();
   }
@@ -195,7 +210,7 @@
 }
 
 /// \brief Represents a canonical, potentially-qualified type.
-typedef CanQual<Type> CanQualType;
+using CanQualType = CanQual<Type>;
 
 inline CanQualType Type::getCanonicalTypeUnqualified() const {
   return CanQualType::CreateUnsafe(getCanonicalTypeInternal());
@@ -320,7 +335,7 @@
 /// than the more typical @c QualType, to propagate the notion of "canonical"
 /// through the system.
 template<typename T>
-struct CanProxyAdaptor : CanProxyBase<T> { };
+struct CanProxyAdaptor : CanProxyBase<T> {};
 
 /// \brief Canonical proxy type returned when retrieving the members of a
 /// canonical type or as the result of the @c CanQual<T>::getAs member
@@ -333,7 +348,7 @@
 class CanProxy : public CanProxyAdaptor<T> {
 public:
   /// \brief Build a NULL proxy.
-  CanProxy() { }
+  CanProxy() = default;
 
   /// \brief Build a proxy to the given canonical type.
   CanProxy(CanQual<T> Stored) { this->Stored = Stored; }
@@ -342,7 +357,7 @@
   operator CanQual<T>() const { return this->Stored; }
 };
 
-} // end namespace clang
+} // namespace clang
 
 namespace llvm {
 
@@ -350,8 +365,9 @@
 /// CanQual<T> to a specific Type class. We're prefer isa/dyn_cast/cast/etc.
 /// to return smart pointer (proxies?).
 template<typename T>
-struct simplify_type< ::clang::CanQual<T> > {
-  typedef const T *SimpleType;
+struct simplify_type< ::clang::CanQual<T>> {
+  using SimpleType = const T *;
+
   static SimpleType getSimplifiedValue(::clang::CanQual<T> Val) {
     return Val.getTypePtr();
   }
@@ -359,18 +375,20 @@
 
 // Teach SmallPtrSet that CanQual<T> is "basically a pointer".
 template<typename T>
-struct PointerLikeTypeTraits<clang::CanQual<T> > {
-  static inline void *getAsVoidPointer(clang::CanQual<T> P) {
+struct PointerLikeTypeTraits<clang::CanQual<T>> {
+  static void *getAsVoidPointer(clang::CanQual<T> P) {
     return P.getAsOpaquePtr();
   }
-  static inline clang::CanQual<T> getFromVoidPointer(void *P) {
+
+  static clang::CanQual<T> getFromVoidPointer(void *P) {
     return clang::CanQual<T>::getFromOpaquePtr(P);
   }
+
   // qualifier information is encoded in the low bits.
   enum { NumLowBitsAvailable = 0 };
 };
 
-} // end namespace llvm
+} // namespace llvm
 
 namespace clang {
 
@@ -388,7 +406,7 @@
           CanQualType,
           typename std::iterator_traits<InputIterator>::difference_type,
           CanProxy<Type>, CanQualType> {
-  CanTypeIterator() {}
+  CanTypeIterator() = default;
   explicit CanTypeIterator(InputIterator Iter)
       : CanTypeIterator::iterator_adaptor_base(std::move(Iter)) {}
 
@@ -486,6 +504,7 @@
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, hasExtParameterInfos)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(
             ArrayRef<FunctionProtoType::ExtParameterInfo>, getExtParameterInfos)
+
   CanQualType getParamType(unsigned i) const {
     return CanQualType::CreateUnsafe(this->getTypePtr()->getParamType(i));
   }
@@ -493,8 +512,8 @@
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isVariadic)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(unsigned, getTypeQuals)
 
-  typedef CanTypeIterator<FunctionProtoType::param_type_iterator>
-  param_type_iterator;
+  using param_type_iterator =
+      CanTypeIterator<FunctionProtoType::param_type_iterator>;
 
   param_type_iterator param_type_begin() const {
     return param_type_iterator(this->getTypePtr()->param_type_begin());
@@ -566,7 +585,8 @@
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isObjCQualifiedId)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isObjCQualifiedClass)
 
-  typedef ObjCObjectPointerType::qual_iterator qual_iterator;
+  using qual_iterator = ObjCObjectPointerType::qual_iterator;
+
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(qual_iterator, qual_begin)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(qual_iterator, qual_end)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, qual_empty)
@@ -584,7 +604,8 @@
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isObjCQualifiedIdType)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isObjCQualifiedClassType)
 
-  typedef ObjCObjectPointerType::qual_iterator qual_iterator;
+  using qual_iterator = ObjCObjectPointerType::qual_iterator;
+
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(qual_iterator, qual_begin)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(qual_iterator, qual_end)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, qual_empty)
@@ -661,7 +682,6 @@
   return CanProxy<Type>(*this);
 }
 
-}
+} // namespace clang
 
-
-#endif
+#endif // LLVM_CLANG_AST_CANONICALTYPE_H
diff --git a/include/clang/AST/CommentVisitor.h b/include/clang/AST/CommentVisitor.h
index 21641bf..d1cc2d0 100644
--- a/include/clang/AST/CommentVisitor.h
+++ b/include/clang/AST/CommentVisitor.h
@@ -1,4 +1,4 @@
-//===--- CommentVisitor.h - Visitor for Comment subclasses ------*- C++ -*-===//
+//===- CommentVisitor.h - Visitor for Comment subclasses --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,8 +16,8 @@
 namespace clang {
 namespace comments {
 
-template <typename T> struct make_ptr       { typedef       T *type; };
-template <typename T> struct make_const_ptr { typedef const T *type; };
+template <typename T> struct make_ptr { using type = T *; };
+template <typename T> struct make_const_ptr { using type = const T *; };
 
 template<template <typename> class Ptr, typename ImplClass, typename RetTy=void>
 class CommentVisitorBase {
@@ -64,7 +64,7 @@
 class ConstCommentVisitor :
     public CommentVisitorBase<make_const_ptr, ImplClass, RetTy> {};
 
-} // end namespace comments
-} // end namespace clang
+} // namespace comments
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_COMMENTVISITOR_H
diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h
index 51d53a4..57c6f45 100644
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -1,4 +1,4 @@
-//===--- Decl.h - Classes for representing declarations ---------*- C++ -*-===//
+//===- Decl.h - Classes for representing declarations -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,74 +18,103 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Redeclarable.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/Linkage.h"
-#include "clang/Basic/Module.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/PragmaKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Basic/Visibility.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <utility>
 
 namespace clang {
+
+class ASTContext;
 struct ASTTemplateArgumentListInfo;
-class CXXTemporary;
+class Attr;
 class CompoundStmt;
 class DependentFunctionTemplateSpecializationInfo;
+class EnumDecl;
 class Expr;
 class FunctionTemplateDecl;
 class FunctionTemplateSpecializationInfo;
 class LabelStmt;
 class MemberSpecializationInfo;
-class NestedNameSpecifier;
+class Module;
+class NamespaceDecl;
 class ParmVarDecl;
+class RecordDecl;
 class Stmt;
 class StringLiteral;
+class TagDecl;
 class TemplateArgumentList;
+class TemplateArgumentListInfo;
 class TemplateParameterList;
 class TypeAliasTemplateDecl;
 class TypeLoc;
 class UnresolvedSetImpl;
 class VarTemplateDecl;
 
-/// \brief A container of type source information.
+/// A container of type source information.
 ///
 /// A client can read the relevant info using TypeLoc wrappers, e.g:
 /// @code
 /// TypeLoc TL = TypeSourceInfo->getTypeLoc();
 /// TL.getStartLoc().print(OS, SrcMgr);
 /// @endcode
-///
-class TypeSourceInfo {
-  QualType Ty;
+class LLVM_ALIGNAS(8) TypeSourceInfo {
   // Contains a memory block after the class, used for type source information,
   // allocated by ASTContext.
   friend class ASTContext;
-  TypeSourceInfo(QualType ty) : Ty(ty) { }
+
+  QualType Ty;
+
+  TypeSourceInfo(QualType ty) : Ty(ty) {}
+
 public:
-  /// \brief Return the type wrapped by this type source info.
+  /// Return the type wrapped by this type source info.
   QualType getType() const { return Ty; }
 
-  /// \brief Return the TypeLoc wrapper for the type source info.
+  /// Return the TypeLoc wrapper for the type source info.
   TypeLoc getTypeLoc() const; // implemented in TypeLoc.h
   
-  /// \brief Override the type stored in this TypeSourceInfo. Use with caution!
+  /// Override the type stored in this TypeSourceInfo. Use with caution!
   void overrideType(QualType T) { Ty = T; }
 };
 
-/// TranslationUnitDecl - The top declaration context.
+/// The top declaration context.
 class TranslationUnitDecl : public Decl, public DeclContext {
-  virtual void anchor();
   ASTContext &Ctx;
 
   /// The (most recently entered) anonymous namespace for this
   /// translation unit, if one has been created.
-  NamespaceDecl *AnonymousNamespace;
+  NamespaceDecl *AnonymousNamespace = nullptr;
 
   explicit TranslationUnitDecl(ASTContext &ctx);
+
+  virtual void anchor();
+
 public:
   ASTContext &getASTContext() const { return Ctx; }
 
@@ -93,6 +122,7 @@
   void setAnonymousNamespace(NamespaceDecl *D) { AnonymousNamespace = D; }
 
   static TranslationUnitDecl *Create(ASTContext &C);
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == TranslationUnit; }
@@ -104,23 +134,23 @@
   }
 };
 
-/// \brief Represents a `#pragma comment` line. Always a child of
+/// Represents a `#pragma comment` line. Always a child of
 /// TranslationUnitDecl.
 class PragmaCommentDecl final
     : public Decl,
       private llvm::TrailingObjects<PragmaCommentDecl, char> {
-  virtual void anchor();
-
-  PragmaMSCommentKind CommentKind;
-
-  friend TrailingObjects;
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
+  friend TrailingObjects;
+
+  PragmaMSCommentKind CommentKind;
 
   PragmaCommentDecl(TranslationUnitDecl *TU, SourceLocation CommentLoc,
                     PragmaMSCommentKind CommentKind)
       : Decl(PragmaComment, TU, CommentLoc), CommentKind(CommentKind) {}
 
+  virtual void anchor();
+
 public:
   static PragmaCommentDecl *Create(const ASTContext &C, TranslationUnitDecl *DC,
                                    SourceLocation CommentLoc,
@@ -138,23 +168,23 @@
   static bool classofKind(Kind K) { return K == PragmaComment; }
 };
 
-/// \brief Represents a `#pragma detect_mismatch` line. Always a child of
+/// Represents a `#pragma detect_mismatch` line. Always a child of
 /// TranslationUnitDecl.
 class PragmaDetectMismatchDecl final
     : public Decl,
       private llvm::TrailingObjects<PragmaDetectMismatchDecl, char> {
-  virtual void anchor();
-
-  size_t ValueStart;
-
-  friend TrailingObjects;
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
+  friend TrailingObjects;
+
+  size_t ValueStart;
 
   PragmaDetectMismatchDecl(TranslationUnitDecl *TU, SourceLocation Loc,
                            size_t ValueStart)
       : Decl(PragmaDetectMismatch, TU, Loc), ValueStart(ValueStart) {}
 
+  virtual void anchor();
+
 public:
   static PragmaDetectMismatchDecl *Create(const ASTContext &C,
                                           TranslationUnitDecl *DC,
@@ -171,7 +201,7 @@
   static bool classofKind(Kind K) { return K == PragmaDetectMismatch; }
 };
 
-/// \brief Declaration context for names declared as extern "C" in C++. This
+/// Declaration context for names declared as extern "C" in C++. This
 /// is neither the semantic nor lexical context for such declarations, but is
 /// used to check for conflicts with other extern "C" declarations. Example:
 ///
@@ -189,14 +219,16 @@
 /// The declaration at #3 finds it is a redeclaration of \c N::f through
 /// lookup in the extern "C" context.
 class ExternCContextDecl : public Decl, public DeclContext {
-  virtual void anchor();
-
   explicit ExternCContextDecl(TranslationUnitDecl *TU)
     : Decl(ExternCContext, TU, SourceLocation()),
       DeclContext(ExternCContext) {}
+
+  virtual void anchor();
+
 public:
   static ExternCContextDecl *Create(const ASTContext &C,
                                     TranslationUnitDecl *TU);
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ExternCContext; }
@@ -208,30 +240,36 @@
   }
 };
 
-/// NamedDecl - This represents a decl with a name.  Many decls have names such
+/// This represents a decl that may have a name.  Many decls have names such
 /// as ObjCMethodDecl, but not \@class, etc.
+///
+/// Note that not every NamedDecl is actually named (e.g., a struct might
+/// be anonymous), and not every name is an identifier.
 class NamedDecl : public Decl {
-  virtual void anchor();
-  /// Name - The name of this declaration, which is typically a normal
+  /// The name of this declaration, which is typically a normal
   /// identifier but may also be a special kind of name (C++
   /// constructor, Objective-C selector, etc.)
   DeclarationName Name;
 
+  virtual void anchor();
+
 private:
   NamedDecl *getUnderlyingDeclImpl() LLVM_READONLY;
 
 protected:
   NamedDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName N)
-    : Decl(DK, DC, L), Name(N) { }
+      : Decl(DK, DC, L), Name(N) {}
 
 public:
-  /// getIdentifier - Get the identifier that names this declaration,
-  /// if there is one. This will return NULL if this declaration has
-  /// no name (e.g., for an unnamed class) or if the name is a special
-  /// name (C++ constructor, Objective-C selector, etc.).
+  /// Get the identifier that names this declaration, if there is one.
+  ///
+  /// This will return NULL if this declaration has no name (e.g., for
+  /// an unnamed class) or if the name is a special name (C++ constructor,
+  /// Objective-C selector, etc.).
   IdentifierInfo *getIdentifier() const { return Name.getAsIdentifierInfo(); }
 
-  /// getName - Get the name of identifier for this declaration as a StringRef.
+  /// Get the name of identifier for this declaration as a StringRef.
+  ///
   /// This requires that the declaration have a name and that it be a simple
   /// identifier.
   StringRef getName() const {
@@ -239,11 +277,12 @@
     return getIdentifier() ? getIdentifier()->getName() : "";
   }
 
-  /// getNameAsString - Get a human-readable name for the declaration, even if
-  /// it is one of the special kinds of names (C++ constructor, Objective-C
-  /// selector, etc).  Creating this name requires expensive string
-  /// manipulation, so it should be called only when performance doesn't matter.
-  /// For simple declarations, getNameAsCString() should suffice.
+  /// Get a human-readable name for the declaration, even if it is one of the
+  /// special kinds of names (C++ constructor, Objective-C selector, etc).
+  ///
+  /// Creating this name requires expensive string manipulation, so it should
+  /// be called only when performance doesn't matter. For simple declarations,
+  /// getNameAsCString() should suffice.
   //
   // FIXME: This function should be renamed to indicate that it is not just an
   // alternate form of getName(), and clients should move as appropriate.
@@ -253,17 +292,19 @@
 
   virtual void printName(raw_ostream &os) const;
 
-  /// getDeclName - Get the actual, stored name of the declaration,
-  /// which may be a special name.
+  /// Get the actual, stored name of the declaration, which may be a special
+  /// name.
   DeclarationName getDeclName() const { return Name; }
 
-  /// \brief Set the name of this declaration.
+  /// Set the name of this declaration.
   void setDeclName(DeclarationName N) { Name = N; }
 
-  /// printQualifiedName - Returns human-readable qualified name for
-  /// declaration, like A::B::i, for i being member of namespace A::B.
-  /// If declaration is not member of context which can be named (record,
-  /// namespace), it will return same result as printName().
+  /// Returns a human-readable qualified name for this declaration, like
+  /// A::B::i, for i being member of namespace A::B.
+  ///
+  /// If the declaration is not a member of context which can be named (record,
+  /// namespace), it will return the same result as printName().
+  ///
   /// Creating this name is expensive, so it should be called only when
   /// performance doesn't matter.
   void printQualifiedName(raw_ostream &OS) const;
@@ -272,8 +313,7 @@
   // FIXME: Remove string version.
   std::string getQualifiedNameAsString() const;
 
-  /// getNameForDiagnostic - Appends a human-readable name for this
-  /// declaration into the given stream.
+  /// Appends a human-readable name for this declaration into the given stream.
   ///
   /// This is the method invoked by Sema when displaying a NamedDecl
   /// in a diagnostic.  It does not necessarily produce the same
@@ -283,25 +323,25 @@
                                     const PrintingPolicy &Policy,
                                     bool Qualified) const;
 
-  /// \brief Determine whether this declaration, if
-  /// known to be well-formed within its context, will replace the
-  /// declaration OldD if introduced into scope. A declaration will
-  /// replace another declaration if, for example, it is a
-  /// redeclaration of the same variable or function, but not if it is
-  /// a declaration of a different kind (function vs. class) or an
-  /// overloaded function.
+  /// Determine whether this declaration, if known to be well-formed within
+  /// its context, will replace the declaration OldD if introduced into scope.
+  ///
+  /// A declaration will replace another declaration if, for example, it is
+  /// a redeclaration of the same variable or function, but not if it is a
+  /// declaration of a different kind (function vs. class) or an overloaded
+  /// function.
   ///
   /// \param IsKnownNewer \c true if this declaration is known to be newer
   /// than \p OldD (for instance, if this declaration is newly-created).
   bool declarationReplaces(NamedDecl *OldD, bool IsKnownNewer = true) const;
 
-  /// \brief Determine whether this declaration has linkage.
+  /// Determine whether this declaration has linkage.
   bool hasLinkage() const;
 
   using Decl::isModulePrivate;
   using Decl::setModulePrivate;
 
-  /// \brief Determine whether this declaration is a C++ class member.
+  /// Determine whether this declaration is a C++ class member.
   bool isCXXClassMember() const {
     const DeclContext *DC = getDeclContext();
 
@@ -314,23 +354,24 @@
     return DC->isRecord();
   }
 
-  /// \brief Determine whether the given declaration is an instance member of
+  /// Determine whether the given declaration is an instance member of
   /// a C++ class.
   bool isCXXInstanceMember() const;
 
-  /// \brief Determine what kind of linkage this entity has.
+  /// Determine what kind of linkage this entity has.
+  ///
   /// This is not the linkage as defined by the standard or the codegen notion
   /// of linkage. It is just an implementation detail that is used to compute
   /// those.
   Linkage getLinkageInternal() const;
 
-  /// \brief Get the linkage from a semantic point of view. Entities in
+  /// Get the linkage from a semantic point of view. Entities in
   /// anonymous namespaces are external (in c++98).
   Linkage getFormalLinkage() const {
     return clang::getFormalLinkage(getLinkageInternal());
   }
 
-  /// \brief True if this decl has external linkage.
+  /// True if this decl has external linkage.
   bool hasExternalFormalLinkage() const {
     return isExternalFormalLinkage(getLinkageInternal());
   }
@@ -345,12 +386,12 @@
     return isExternallyVisible() && !getOwningModuleForLinkage();
   }
 
-  /// \brief Determines the visibility of this entity.
+  /// Determines the visibility of this entity.
   Visibility getVisibility() const {
     return getLinkageAndVisibility().getVisibility();
   }
 
-  /// \brief Determines the linkage and visibility of this entity.
+  /// Determines the linkage and visibility of this entity.
   LinkageInfo getLinkageAndVisibility() const;
 
   /// Kinds of explicit visibility.
@@ -366,16 +407,16 @@
     VisibilityForValue
   };
 
-  /// \brief If visibility was explicitly specified for this
+  /// If visibility was explicitly specified for this
   /// declaration, return that visibility.
   Optional<Visibility>
   getExplicitVisibility(ExplicitVisibilityKind kind) const;
 
-  /// \brief True if the computed linkage is valid. Used for consistency
+  /// True if the computed linkage is valid. Used for consistency
   /// checking. Should always return true.
   bool isLinkageValid() const;
 
-  /// \brief True if something has required us to compute the linkage
+  /// True if something has required us to compute the linkage
   /// of this declaration.
   ///
   /// Language features which can retroactively change linkage (like a
@@ -385,7 +426,7 @@
     return hasCachedLinkage();
   }
 
-  /// \brief Looks through UsingDecls and ObjCCompatibleAliasDecls for
+  /// Looks through UsingDecls and ObjCCompatibleAliasDecls for
   /// the underlying named decl.
   NamedDecl *getUnderlyingDecl() {
     // Fast-path the common case.
@@ -419,27 +460,26 @@
   return OS;
 }
 
-/// LabelDecl - Represents the declaration of a label.  Labels also have a
+/// Represents the declaration of a label.  Labels also have a
 /// corresponding LabelStmt, which indicates the position that the label was
 /// defined at.  For normal labels, the location of the decl is the same as the
 /// location of the statement.  For GNU local labels (__label__), the decl
 /// location is where the __label__ is.
 class LabelDecl : public NamedDecl {
-  void anchor() override;
   LabelStmt *TheStmt;
   StringRef MSAsmName;
-  bool MSAsmNameResolved;
-  /// LocStart - For normal labels, this is the same as the main declaration
+  bool MSAsmNameResolved = false;
+
+  /// For normal labels, this is the same as the main declaration
   /// label, i.e., the location of the identifier; for GNU local labels,
   /// this is the location of the __label__ keyword.
   SourceLocation LocStart;
 
   LabelDecl(DeclContext *DC, SourceLocation IdentL, IdentifierInfo *II,
             LabelStmt *S, SourceLocation StartL)
-    : NamedDecl(Label, DC, IdentL, II),
-      TheStmt(S),
-      MSAsmNameResolved(false),
-      LocStart(StartL) {}
+      : NamedDecl(Label, DC, IdentL, II), TheStmt(S), LocStart(StartL) {}
+
+  void anchor() override;
 
 public:
   static LabelDecl *Create(ASTContext &C, DeclContext *DC,
@@ -459,7 +499,7 @@
     return SourceRange(LocStart, getLocation());
   }
 
-  bool isMSAsmLabel() const { return MSAsmName.size() != 0; }
+  bool isMSAsmLabel() const { return !MSAsmName.empty(); }
   bool isResolvedMSAsmLabel() const { return isMSAsmLabel() && MSAsmNameResolved; }
   void setMSAsmLabel(StringRef Name);
   StringRef getMSAsmLabel() const { return MSAsmName; }
@@ -470,17 +510,18 @@
   static bool classofKind(Kind K) { return K == Label; }
 };
 
-/// NamespaceDecl - Represent a C++ namespace.
+/// Represent a C++ namespace.
 class NamespaceDecl : public NamedDecl, public DeclContext, 
                       public Redeclarable<NamespaceDecl> 
 {
-  /// LocStart - The starting location of the source range, pointing
+  /// The starting location of the source range, pointing
   /// to either the namespace or the inline keyword.
   SourceLocation LocStart;
-  /// RBraceLoc - The ending location of the source range.
+
+  /// The ending location of the source range.
   SourceLocation RBraceLoc;
 
-  /// \brief A pointer to either the anonymous namespace that lives just inside
+  /// A pointer to either the anonymous namespace that lives just inside
   /// this namespace or to the first namespace in the chain (the latter case
   /// only when this is not the first in the chain), along with a 
   /// boolean value indicating whether this is an inline namespace.
@@ -490,12 +531,16 @@
                 SourceLocation StartLoc, SourceLocation IdLoc,
                 IdentifierInfo *Id, NamespaceDecl *PrevDecl);
 
-  typedef Redeclarable<NamespaceDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<NamespaceDecl>;
+
   NamespaceDecl *getNextRedeclarationImpl() override;
   NamespaceDecl *getPreviousDeclImpl() override;
   NamespaceDecl *getMostRecentDeclImpl() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static NamespaceDecl *Create(ASTContext &C, DeclContext *DC,
                                bool Inline, SourceLocation StartLoc,
                                SourceLocation IdLoc, IdentifierInfo *Id,
@@ -503,8 +548,9 @@
 
   static NamespaceDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -512,7 +558,7 @@
   using redeclarable_base::getMostRecentDecl;
   using redeclarable_base::isFirstDecl;
 
-  /// \brief Returns true if this is an anonymous namespace declaration.
+  /// Returns true if this is an anonymous namespace declaration.
   ///
   /// For example:
   /// \code
@@ -525,28 +571,28 @@
     return !getIdentifier();
   }
 
-  /// \brief Returns true if this is an inline namespace declaration.
+  /// Returns true if this is an inline namespace declaration.
   bool isInline() const {
     return AnonOrFirstNamespaceAndInline.getInt();
   }
 
-  /// \brief Set whether this is an inline namespace declaration.
+  /// Set whether this is an inline namespace declaration.
   void setInline(bool Inline) {
     AnonOrFirstNamespaceAndInline.setInt(Inline);
   }
 
-  /// \brief Get the original (first) namespace declaration.
+  /// Get the original (first) namespace declaration.
   NamespaceDecl *getOriginalNamespace();
 
-  /// \brief Get the original (first) namespace declaration.
+  /// Get the original (first) namespace declaration.
   const NamespaceDecl *getOriginalNamespace() const;
 
-  /// \brief Return true if this declaration is an original (first) declaration
+  /// Return true if this declaration is an original (first) declaration
   /// of the namespace. This is false for non-original (subsequent) namespace
   /// declarations and anonymous namespaces.
   bool isOriginalNamespace() const;
 
-  /// \brief Retrieve the anonymous namespace nested inside this namespace,
+  /// Retrieve the anonymous namespace nested inside this namespace,
   /// if any.
   NamespaceDecl *getAnonymousNamespace() const {
     return getOriginalNamespace()->AnonOrFirstNamespaceAndInline.getPointer();
@@ -582,27 +628,26 @@
   static NamespaceDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<NamespaceDecl *>(const_cast<DeclContext*>(DC));
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
-/// ValueDecl - Represent the declaration of a variable (in which case it is
+/// Represent the declaration of a variable (in which case it is
 /// an lvalue) a function (in which case it is a function designator) or
 /// an enum constant.
 class ValueDecl : public NamedDecl {
-  void anchor() override;
   QualType DeclType;
 
+  void anchor() override;
+
 protected:
   ValueDecl(Kind DK, DeclContext *DC, SourceLocation L,
             DeclarationName N, QualType T)
     : NamedDecl(DK, DC, L, N), DeclType(T) {}
+
 public:
   QualType getType() const { return DeclType; }
   void setType(QualType newType) { DeclType = newType; }
 
-  /// \brief Determine whether this symbol is weakly-imported,
+  /// Determine whether this symbol is weakly-imported,
   ///        or declared with the weak or weak-ref attr.
   bool isWeak() const;
 
@@ -611,40 +656,34 @@
   static bool classofKind(Kind K) { return K >= firstValue && K <= lastValue; }
 };
 
-/// QualifierInfo - A struct with extended info about a syntactic
+/// A struct with extended info about a syntactic
 /// name qualifier, to be used for the case of out-of-line declarations.
 struct QualifierInfo {
   NestedNameSpecifierLoc QualifierLoc;
 
-  /// NumTemplParamLists - The number of "outer" template parameter lists.
+  /// The number of "outer" template parameter lists.
   /// The count includes all of the template parameter lists that were matched
   /// against the template-ids occurring into the NNS and possibly (in the
   /// case of an explicit specialization) a final "template <>".
-  unsigned NumTemplParamLists;
+  unsigned NumTemplParamLists = 0;
 
-  /// TemplParamLists - A new-allocated array of size NumTemplParamLists,
+  /// A new-allocated array of size NumTemplParamLists,
   /// containing pointers to the "outer" template parameter lists.
   /// It includes all of the template parameter lists that were matched
   /// against the template-ids occurring into the NNS and possibly (in the
   /// case of an explicit specialization) a final "template <>".
-  TemplateParameterList** TemplParamLists;
+  TemplateParameterList** TemplParamLists = nullptr;
 
-  /// Default constructor.
-  QualifierInfo()
-    : QualifierLoc(), NumTemplParamLists(0), TemplParamLists(nullptr) {}
+  QualifierInfo() = default;
+  QualifierInfo(const QualifierInfo &) = delete;
+  QualifierInfo& operator=(const QualifierInfo &) = delete;
 
-  /// setTemplateParameterListsInfo - Sets info about "outer" template
-  /// parameter lists.
+  /// Sets info about "outer" template parameter lists.
   void setTemplateParameterListsInfo(ASTContext &Context,
                                      ArrayRef<TemplateParameterList *> TPLists);
-
-private:
-  // Copy constructor and copy assignment are disabled.
-  QualifierInfo(const QualifierInfo&) = delete;
-  QualifierInfo& operator=(const QualifierInfo&) = delete;
 };
 
-/// \brief Represents a ValueDecl that came out of a declarator.
+/// Represents a ValueDecl that came out of a declarator.
 /// Contains type source information through TypeSourceInfo.
 class DeclaratorDecl : public ValueDecl {
   // A struct representing both a TInfo and a syntactic qualifier,
@@ -653,9 +692,9 @@
     TypeSourceInfo *TInfo;
   };
 
-  llvm::PointerUnion<TypeSourceInfo*, ExtInfo*> DeclInfo;
+  llvm::PointerUnion<TypeSourceInfo *, ExtInfo *> DeclInfo;
 
-  /// InnerLocStart - The start of the source range for this declaration,
+  /// The start of the source range for this declaration,
   /// ignoring outer template declarations.
   SourceLocation InnerLocStart;
 
@@ -667,15 +706,18 @@
   DeclaratorDecl(Kind DK, DeclContext *DC, SourceLocation L,
                  DeclarationName N, QualType T, TypeSourceInfo *TInfo,
                  SourceLocation StartL)
-    : ValueDecl(DK, DC, L, N, T), DeclInfo(TInfo), InnerLocStart(StartL) {
-  }
+      : ValueDecl(DK, DC, L, N, T), DeclInfo(TInfo), InnerLocStart(StartL) {}
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   TypeSourceInfo *getTypeSourceInfo() const {
     return hasExtInfo()
       ? getExtInfo()->TInfo
       : DeclInfo.get<TypeSourceInfo*>();
   }
+
   void setTypeSourceInfo(TypeSourceInfo *TI) {
     if (hasExtInfo())
       getExtInfo()->TInfo = TI;
@@ -683,28 +725,28 @@
       DeclInfo = TI;
   }
 
-  /// getInnerLocStart - Return SourceLocation representing start of source
-  /// range ignoring outer template declarations.
+  /// Return start of source range ignoring outer template declarations.
   SourceLocation getInnerLocStart() const { return InnerLocStart; }
   void setInnerLocStart(SourceLocation L) { InnerLocStart = L; }
 
-  /// getOuterLocStart - Return SourceLocation representing start of source
-  /// range taking into account any outer template declarations.
+  /// Return start of source range taking into account any outer template
+  /// declarations.
   SourceLocation getOuterLocStart() const;
 
   SourceRange getSourceRange() const override LLVM_READONLY;
+
   SourceLocation getLocStart() const LLVM_READONLY {
     return getOuterLocStart();
   }
 
-  /// \brief Retrieve the nested-name-specifier that qualifies the name of this
+  /// Retrieve the nested-name-specifier that qualifies the name of this
   /// declaration, if it was present in the source.
   NestedNameSpecifier *getQualifier() const {
     return hasExtInfo() ? getExtInfo()->QualifierLoc.getNestedNameSpecifier()
                         : nullptr;
   }
 
-  /// \brief Retrieve the nested-name-specifier (with source-location
+  /// Retrieve the nested-name-specifier (with source-location
   /// information) that qualifies the name of this declaration, if it was
   /// present in the source.
   NestedNameSpecifierLoc getQualifierLoc() const {
@@ -717,10 +759,12 @@
   unsigned getNumTemplateParameterLists() const {
     return hasExtInfo() ? getExtInfo()->NumTemplParamLists : 0;
   }
+
   TemplateParameterList *getTemplateParameterList(unsigned index) const {
     assert(index < getNumTemplateParameterLists());
     return getExtInfo()->TemplParamLists[index];
   }
+
   void setTemplateParameterListsInfo(ASTContext &Context,
                                      ArrayRef<TemplateParameterList *> TPLists);
 
@@ -731,65 +775,71 @@
   static bool classofKind(Kind K) {
     return K >= firstDeclarator && K <= lastDeclarator;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
-/// \brief Structure used to store a statement, the constant value to
+/// Structure used to store a statement, the constant value to
 /// which it was evaluated (if any), and whether or not the statement
 /// is an integral constant expression (if known).
 struct EvaluatedStmt {
-  EvaluatedStmt() : WasEvaluated(false), IsEvaluating(false), CheckedICE(false),
-                    CheckingICE(false), IsICE(false) { }
-
-  /// \brief Whether this statement was already evaluated.
+  /// Whether this statement was already evaluated.
   bool WasEvaluated : 1;
 
-  /// \brief Whether this statement is being evaluated.
+  /// Whether this statement is being evaluated.
   bool IsEvaluating : 1;
 
-  /// \brief Whether we already checked whether this statement was an
+  /// Whether we already checked whether this statement was an
   /// integral constant expression.
   bool CheckedICE : 1;
 
-  /// \brief Whether we are checking whether this statement is an
+  /// Whether we are checking whether this statement is an
   /// integral constant expression.
   bool CheckingICE : 1;
 
-  /// \brief Whether this statement is an integral constant expression,
+  /// Whether this statement is an integral constant expression,
   /// or in C++11, whether the statement is a constant expression. Only
   /// valid if CheckedICE is true.
   bool IsICE : 1;
 
   Stmt *Value;
   APValue Evaluated;
+
+  EvaluatedStmt() : WasEvaluated(false), IsEvaluating(false), CheckedICE(false),
+                    CheckingICE(false), IsICE(false) {}
+
 };
 
-/// VarDecl - An instance of this class is created to represent a variable
-/// declaration or definition.
+/// Represents a variable declaration or definition.
 class VarDecl : public DeclaratorDecl, public Redeclarable<VarDecl> {
 public:
-  /// getStorageClassSpecifierString - Return the string used to
-  /// specify the storage class \p SC.
+  /// Initialization styles.
+  enum InitializationStyle {
+    /// C-style initialization with assignment
+    CInit,
+
+    /// Call-style initialization (C++98)
+    CallInit,
+
+    /// Direct list-initialization (C++11)
+    ListInit
+  };
+
+  /// Kinds of thread-local storage.
+  enum TLSKind {
+    /// Not a TLS variable.
+    TLS_None,
+
+    /// TLS with a known-constant initializer.
+    TLS_Static,
+
+    /// TLS with a dynamic initializer.
+    TLS_Dynamic
+  };
+
+  /// Return the string used to specify the storage class \p SC.
   ///
   /// It is illegal to call this function with SC == None.
   static const char *getStorageClassSpecifierString(StorageClass SC);
 
-  /// \brief Initialization styles.
-  enum InitializationStyle {
-    CInit,    ///< C-style initialization with assignment
-    CallInit, ///< Call-style initialization (C++98)
-    ListInit  ///< Direct list-initialization (C++11)
-  };
-
-  /// \brief Kinds of thread-local storage.
-  enum TLSKind {
-    TLS_None,   ///< Not a TLS variable.
-    TLS_Static, ///< TLS with a known-constant initializer.
-    TLS_Dynamic ///< TLS with a dynamic initializer.
-  };
-
 protected:
   // A pointer union of Stmt * and EvaluatedStmt *. When an EvaluatedStmt, we
   // have allocated the auxiliary struct of information there.
@@ -798,16 +848,20 @@
   // this as *many* VarDecls are ParmVarDecls that don't have default
   // arguments. We could save some space by moving this pointer union to be
   // allocated in trailing space when necessary.
-  typedef llvm::PointerUnion<Stmt *, EvaluatedStmt *> InitType;
+  using InitType = llvm::PointerUnion<Stmt *, EvaluatedStmt *>;
 
-  /// \brief The initializer for this variable or, for a ParmVarDecl, the
+  /// The initializer for this variable or, for a ParmVarDecl, the
   /// C++ default argument.
   mutable InitType Init;
 
 private:
+  friend class ASTDeclReader;
+  friend class ASTNodeImporter;
+  friend class StmtIteratorBase;
+
   class VarDeclBitfields {
-    friend class VarDecl;
     friend class ASTDeclReader;
+    friend class VarDecl;
 
     unsigned SClass : 3;
     unsigned TSCSpec : 2;
@@ -815,10 +869,6 @@
   };
   enum { NumVarDeclBits = 7 };
 
-  friend class ASTDeclReader;
-  friend class StmtIteratorBase;
-  friend class ASTNodeImporter;
-
 protected:
   enum { NumParameterIndexBits = 8 };
 
@@ -830,8 +880,8 @@
   };
 
   class ParmVarDeclBitfields {
-    friend class ParmVarDecl;
     friend class ASTDeclReader;
+    friend class ParmVarDecl;
 
     unsigned : NumVarDeclBits;
 
@@ -863,48 +913,48 @@
   };
 
   class NonParmVarDeclBitfields {
-    friend class VarDecl;
-    friend class ImplicitParamDecl;
     friend class ASTDeclReader;
+    friend class ImplicitParamDecl;
+    friend class VarDecl;
 
     unsigned : NumVarDeclBits;
 
     // FIXME: We need something similar to CXXRecordDecl::DefinitionData.
-    /// \brief Whether this variable is a definition which was demoted due to
+    /// Whether this variable is a definition which was demoted due to
     /// module merge.
     unsigned IsThisDeclarationADemotedDefinition : 1;
 
-    /// \brief Whether this variable is the exception variable in a C++ catch
+    /// Whether this variable is the exception variable in a C++ catch
     /// or an Objective-C @catch statement.
     unsigned ExceptionVar : 1;
 
-    /// \brief Whether this local variable could be allocated in the return
+    /// Whether this local variable could be allocated in the return
     /// slot of its function, enabling the named return value optimization
     /// (NRVO).
     unsigned NRVOVariable : 1;
 
-    /// \brief Whether this variable is the for-range-declaration in a C++0x
+    /// Whether this variable is the for-range-declaration in a C++0x
     /// for-range statement.
     unsigned CXXForRangeDecl : 1;
 
-    /// \brief Whether this variable is an ARC pseudo-__strong
+    /// Whether this variable is an ARC pseudo-__strong
     /// variable;  see isARCPseudoStrong() for details.
     unsigned ARCPseudoStrong : 1;
 
-    /// \brief Whether this variable is (C++1z) inline.
+    /// Whether this variable is (C++1z) inline.
     unsigned IsInline : 1;
 
-    /// \brief Whether this variable has (C++1z) inline explicitly specified.
+    /// Whether this variable has (C++1z) inline explicitly specified.
     unsigned IsInlineSpecified : 1;
 
-    /// \brief Whether this variable is (C++0x) constexpr.
+    /// Whether this variable is (C++0x) constexpr.
     unsigned IsConstexpr : 1;
 
-    /// \brief Whether this variable is the implicit variable for a lambda
+    /// Whether this variable is the implicit variable for a lambda
     /// init-capture.
     unsigned IsInitCapture : 1;
 
-    /// \brief Whether this local extern variable's previous declaration was
+    /// Whether this local extern variable's previous declaration was
     /// declared in the same block scope. This controls whether we should merge
     /// the type of this declaration with its previous declaration.
     unsigned PreviousDeclInSameBlockScope : 1;
@@ -925,20 +975,24 @@
           SourceLocation IdLoc, IdentifierInfo *Id, QualType T,
           TypeSourceInfo *TInfo, StorageClass SC);
 
-  typedef Redeclarable<VarDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<VarDecl>;
+
   VarDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   VarDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   VarDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
 
 public:
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -955,7 +1009,7 @@
 
   SourceRange getSourceRange() const override LLVM_READONLY;
 
-  /// \brief Returns the storage class as written in the source. For the
+  /// Returns the storage class as written in the source. For the
   /// computed linkage of symbol, see getLinkage.
   StorageClass getStorageClass() const {
     return (StorageClass) VarDeclBits.SClass;
@@ -971,8 +1025,8 @@
   }
   TLSKind getTLSKind() const;
 
-  /// hasLocalStorage - Returns true if a variable with function scope
-  ///  is a non-static local variable.
+  /// Returns true if a variable with function scope is a non-static local
+  /// variable.
   bool hasLocalStorage() const {
     if (getStorageClass() == SC_None) {
       // OpenCL v1.2 s6.5.3: The __constant or constant address space name is
@@ -995,8 +1049,8 @@
     return getStorageClass() >= SC_Auto;
   }
 
-  /// isStaticLocal - Returns true if a variable with function scope is a
-  /// static local variable.
+  /// Returns true if a variable with function scope is a static local
+  /// variable.
   bool isStaticLocal() const {
     return (getStorageClass() == SC_Static ||
             // C++11 [dcl.stc]p4
@@ -1004,46 +1058,44 @@
       && !isFileVarDecl();
   }
 
-  /// \brief Returns true if a variable has extern or __private_extern__
+  /// Returns true if a variable has extern or __private_extern__
   /// storage.
   bool hasExternalStorage() const {
     return getStorageClass() == SC_Extern ||
            getStorageClass() == SC_PrivateExtern;
   }
 
-  /// \brief Returns true for all variables that do not have local storage.
+  /// Returns true for all variables that do not have local storage.
   ///
   /// This includes all global variables as well as static variables declared
   /// within a function.
   bool hasGlobalStorage() const { return !hasLocalStorage(); }
 
-  /// \brief Get the storage duration of this variable, per C++ [basic.stc].
+  /// Get the storage duration of this variable, per C++ [basic.stc].
   StorageDuration getStorageDuration() const {
     return hasLocalStorage() ? SD_Automatic :
            getTSCSpec() ? SD_Thread : SD_Static;
   }
 
-  /// \brief Compute the language linkage.
+  /// Compute the language linkage.
   LanguageLinkage getLanguageLinkage() const;
 
-  /// \brief Determines whether this variable is a variable with
-  /// external, C linkage.
+  /// Determines whether this variable is a variable with external, C linkage.
   bool isExternC() const;
 
-  /// \brief Determines whether this variable's context is, or is nested within,
+  /// Determines whether this variable's context is, or is nested within,
   /// a C++ extern "C" linkage spec.
   bool isInExternCContext() const;
 
-  /// \brief Determines whether this variable's context is, or is nested within,
+  /// Determines whether this variable's context is, or is nested within,
   /// a C++ extern "C++" linkage spec.
   bool isInExternCXXContext() const;
 
-  /// isLocalVarDecl - Returns true for local variable declarations
-  /// other than parameters.  Note that this includes static variables
-  /// inside of functions. It also includes variables inside blocks.
+  /// Returns true for local variable declarations other than parameters.
+  /// Note that this includes static variables inside of functions. It also
+  /// includes variables inside blocks.
   ///
   ///   void foo() { int x; static int y; extern int z; }
-  ///
   bool isLocalVarDecl() const {
     if (getKind() != Decl::Var && getKind() != Decl::Decomposition)
       return false;
@@ -1052,13 +1104,12 @@
     return false;
   }
 
-  /// \brief Similar to isLocalVarDecl but also includes parameters.
+  /// Similar to isLocalVarDecl but also includes parameters.
   bool isLocalVarDeclOrParm() const {
     return isLocalVarDecl() || getKind() == Decl::ParmVar;
   }
 
-  /// isFunctionOrMethodVarDecl - Similar to isLocalVarDecl, but
-  /// excludes variables declared in blocks.
+  /// Similar to isLocalVarDecl, but excludes variables declared in blocks.
   bool isFunctionOrMethodVarDecl() const {
     if (getKind() != Decl::Var && getKind() != Decl::Decomposition)
       return false;
@@ -1066,7 +1117,7 @@
     return DC->isFunctionOrMethod() && DC->getDeclKind() != Decl::Block;
   }
 
-  /// \brief Determines whether this is a static data member.
+  /// Determines whether this is a static data member.
   ///
   /// This will only be true in C++, and applies to, e.g., the
   /// variable 'x' in:
@@ -1086,12 +1137,17 @@
   }
 
   enum DefinitionKind {
-    DeclarationOnly,      ///< This declaration is only a declaration.
-    TentativeDefinition,  ///< This declaration is a tentative definition.
-    Definition            ///< This declaration is definitely a definition.
+    /// This declaration is only a declaration.
+    DeclarationOnly,
+
+    /// This declaration is a tentative definition.
+    TentativeDefinition,
+
+    /// This declaration is definitely a definition.
+    Definition
   };
 
-  /// \brief Check whether this declaration is a definition. If this could be
+  /// Check whether this declaration is a definition. If this could be
   /// a tentative definition (in C), don't check whether there's an overriding
   /// definition.
   DefinitionKind isThisDeclarationADefinition(ASTContext &) const;
@@ -1099,21 +1155,20 @@
     return isThisDeclarationADefinition(getASTContext());
   }
 
-  /// \brief Check whether this variable is defined in this
-  /// translation unit.
+  /// Check whether this variable is defined in this translation unit.
   DefinitionKind hasDefinition(ASTContext &) const;
   DefinitionKind hasDefinition() const {
     return hasDefinition(getASTContext());
   }
 
-  /// \brief Get the tentative definition that acts as the real definition in
-  /// a TU. Returns null if there is a proper definition available.
+  /// Get the tentative definition that acts as the real definition in a TU.
+  /// Returns null if there is a proper definition available.
   VarDecl *getActingDefinition();
   const VarDecl *getActingDefinition() const {
     return const_cast<VarDecl*>(this)->getActingDefinition();
   }
 
-  /// \brief Get the real (not just tentative) definition for this declaration.
+  /// Get the real (not just tentative) definition for this declaration.
   VarDecl *getDefinition(ASTContext &);
   const VarDecl *getDefinition(ASTContext &C) const {
     return const_cast<VarDecl*>(this)->getDefinition(C);
@@ -1125,11 +1180,11 @@
     return const_cast<VarDecl*>(this)->getDefinition();
   }
 
-  /// \brief Determine whether this is or was instantiated from an out-of-line
+  /// Determine whether this is or was instantiated from an out-of-line
   /// definition of a static data member.
   bool isOutOfLine() const override;
 
-  /// isFileVarDecl - Returns true for file scoped variable declaration.
+  /// Returns true for file scoped variable declaration.
   bool isFileVarDecl() const {
     Kind K = getKind();
     if (K == ParmVar || K == ImplicitParam)
@@ -1144,14 +1199,14 @@
     return false;
   }
 
-  /// getAnyInitializer - Get the initializer for this variable, no matter which
+  /// Get the initializer for this variable, no matter which
   /// declaration it is attached to.
   const Expr *getAnyInitializer() const {
     const VarDecl *D;
     return getAnyInitializer(D);
   }
 
-  /// getAnyInitializer - Get the initializer for this variable, no matter which
+  /// Get the initializer for this variable, no matter which
   /// declaration it is attached to. Also get that declaration.
   const Expr *getAnyInitializer(const VarDecl *&D) const;
 
@@ -1161,12 +1216,12 @@
   }
   Expr *getInit();
 
-  /// \brief Retrieve the address of the initializer expression.
+  /// Retrieve the address of the initializer expression.
   Stmt **getInitAddress();
 
   void setInit(Expr *I);
 
-  /// \brief Determine whether this variable's value can be used in a
+  /// Determine whether this variable's value can be used in a
   /// constant expression, according to the relevant language standard.
   /// This only checks properties of the declaration, and does not check
   /// whether the initializer is in fact a constant expression.
@@ -1218,6 +1273,7 @@
   InitializationStyle getInitStyle() const {
     return static_cast<InitializationStyle>(VarDeclBits.InitStyle);
   }
+
   /// \brief Whether the initializer is a direct-initializer (list or call).
   bool isDirectInit() const {
     return getInitStyle() != CInit;
@@ -1235,8 +1291,8 @@
   /// definitions one of which needs to be demoted to a declaration to keep
   /// the AST invariants.
   void demoteThisDefinitionToDeclaration() {
-    assert (isThisDeclarationADefinition() && "Not a definition!");
-    assert (!isa<ParmVarDecl>(this) && "Cannot demote ParmVarDecls!");
+    assert(isThisDeclarationADefinition() && "Not a definition!");
+    assert(!isa<ParmVarDecl>(this) && "Cannot demote ParmVarDecls!");
     NonParmVarDeclBits.IsThisDeclarationADemotedDefinition = 1;
   }
 
@@ -1400,12 +1456,23 @@
   /// with pointer to 'this', 'self', '_cmd', virtual table pointers, captured
   /// context or something else.
   enum ImplicitParamKind : unsigned {
-    ObjCSelf,        /// Parameter for Objective-C 'self' argument
-    ObjCCmd,         /// Parameter for Objective-C '_cmd' argument
-    CXXThis,         /// Parameter for C++ 'this' argument
-    CXXVTT,          /// Parameter for C++ virtual table pointers
-    CapturedContext, /// Parameter for captured context
-    Other,           /// Other implicit parameter
+    /// Parameter for Objective-C 'self' argument
+    ObjCSelf,
+
+    /// Parameter for Objective-C '_cmd' argument
+    ObjCCmd,
+
+    /// Parameter for C++ 'this' argument
+    CXXThis,
+
+    /// Parameter for C++ virtual table pointers
+    CXXVTT,
+
+    /// Parameter for captured context
+    CapturedContext,
+
+    /// Other implicit parameter
+    Other,
   };
 
   /// Create implicit parameter.
@@ -1438,12 +1505,13 @@
   ImplicitParamKind getParameterKind() const {
     return static_cast<ImplicitParamKind>(NonParmVarDeclBits.ImplicitParamKind);
   }
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ImplicitParam; }
 };
 
-/// ParmVarDecl - Represents a parameter to a function.
+/// Represents a parameter to a function.
 class ParmVarDecl : public VarDecl {
 public:
   enum { MaxFunctionScopeDepth = 255 };
@@ -1541,14 +1609,13 @@
     return const_cast<ParmVarDecl *>(this)->getUninstantiatedDefaultArg();
   }
 
-  /// hasDefaultArg - Determines whether this parameter has a default argument,
+  /// Determines whether this parameter has a default argument,
   /// either parsed or not.
   bool hasDefaultArg() const;
 
-  /// hasUnparsedDefaultArg - Determines whether this parameter has a
-  /// default argument that has not yet been parsed. This will occur
-  /// during the processing of a C++ class whose member functions have
-  /// default arguments, e.g.,
+  /// Determines whether this parameter has a default argument that has not
+  /// yet been parsed. This will occur during the processing of a C++ class
+  /// whose member functions have default arguments, e.g.,
   /// @code
   ///   class X {
   ///   public:
@@ -1563,11 +1630,10 @@
     return ParmVarDeclBits.DefaultArgKind == DAK_Uninstantiated;
   }
 
-  /// setUnparsedDefaultArg - Specify that this parameter has an
-  /// unparsed default argument. The argument will be replaced with a
-  /// real default argument via setDefaultArg when the class
-  /// definition enclosing the function declaration that owns this
-  /// default argument is completed.
+  /// Specify that this parameter has an unparsed default argument.
+  /// The argument will be replaced with a real default argument via
+  /// setDefaultArg when the class definition enclosing the function
+  /// declaration that owns this default argument is completed.
   void setUnparsedDefaultArg() {
     ParmVarDeclBits.DefaultArgKind = DAK_Unparsed;
   }
@@ -1586,7 +1652,7 @@
   /// parameter pack.
   bool isParameterPack() const;
 
-  /// setOwningFunction - Sets the function declaration that owns this
+  /// Sets the function declaration that owns this
   /// ParmVarDecl. Since ParmVarDecls are often created before the
   /// FunctionDecls that own them, this routine is required to update
   /// the DeclContext appropriately.
@@ -1617,8 +1683,7 @@
   unsigned getParameterIndexLarge() const;
 };
 
-/// FunctionDecl - An instance of this class is created to represent a
-/// function declaration or definition.
+/// Represents a function declaration or definition.
 ///
 /// Since a given function can be declared several times in a program,
 /// there may be several FunctionDecls that correspond to that
@@ -1641,10 +1706,10 @@
   };
 
 private:
-  /// ParamInfo - new[]'d array of pointers to VarDecls for the formal
+  /// A new[]'d array of pointers to VarDecls for the formal
   /// parameters of this function.  This is null if a prototype or if there are
   /// no formals.
-  ParmVarDecl **ParamInfo;
+  ParmVarDecl **ParamInfo = nullptr;
 
   LazyDeclStmtPtr Body;
 
@@ -1653,10 +1718,12 @@
   unsigned SClass : 3;
   unsigned IsInline : 1;
   unsigned IsInlineSpecified : 1;
+
 protected:
   // This is shared by CXXConstructorDecl, CXXConversionDecl, and
   // CXXDeductionGuideDecl.
   unsigned IsExplicitSpecified : 1;
+
 private:
   unsigned IsVirtualAsWritten : 1;
   unsigned IsPure : 1;
@@ -1664,12 +1731,18 @@
   unsigned HasWrittenPrototype : 1;
   unsigned IsDeleted : 1;
   unsigned IsTrivial : 1; // sunk from CXXMethodDecl
+
+  /// This flag indicates whether this function is trivial for the purpose of
+  /// calls. This is meaningful only when this function is a copy/move
+  /// constructor or a destructor.
+  unsigned IsTrivialForCall : 1;
+
   unsigned IsDefaulted : 1; // sunk from CXXMethoDecl
   unsigned IsExplicitlyDefaulted : 1; //sunk from CXXMethodDecl
   unsigned HasImplicitReturnZero : 1;
   unsigned IsLateTemplateParsed : 1;
   unsigned IsConstexpr : 1;
-  unsigned InstantiationIsPending:1;
+  unsigned InstantiationIsPending : 1;
 
   /// \brief Indicates if the function uses __try.
   unsigned UsesSEHTry : 1;
@@ -1682,6 +1755,24 @@
   /// parsing it.
   unsigned WillHaveBody : 1;
 
+  /// Indicates that this function is a multiversioned function using attribute
+  /// 'target'.
+  unsigned IsMultiVersion : 1;
+
+protected:
+  /// [C++17] Only used by CXXDeductionGuideDecl. Declared here to avoid
+  /// increasing the size of CXXDeductionGuideDecl by the size of an unsigned
+  /// int as opposed to adding a single bit to FunctionDecl.
+  /// Indicates that the Deduction Guide is the implicitly generated 'copy
+  /// deduction candidate' (is used during overload resolution).
+  unsigned IsCopyDeductionCandidate : 1;
+
+private:
+
+  /// Store the ODRHash after first calculation.
+  unsigned HasODRHash : 1;
+  unsigned ODRHash;
+
   /// \brief End part of this FunctionDecl's source range.
   ///
   /// We could compute the full range in getSourceRange(). However, when we're
@@ -1709,8 +1800,8 @@
                       DependentFunctionTemplateSpecializationInfo *>
     TemplateOrSpecialization;
 
-  /// DNLoc - Provides source/type location info for the
-  /// declaration name embedded in the DeclaratorDecl base class.
+  /// Provides source/type location info for the declaration name embedded in
+  /// the DeclaratorDecl base class.
   DeclarationNameLoc DNLoc;
 
   /// \brief Specify that this function declaration is actually a function
@@ -1756,33 +1847,40 @@
                bool isConstexprSpecified)
       : DeclaratorDecl(DK, DC, NameInfo.getLoc(), NameInfo.getName(), T, TInfo,
                        StartLoc),
-        DeclContext(DK), redeclarable_base(C), ParamInfo(nullptr), Body(),
-        SClass(S), IsInline(isInlineSpecified),
-        IsInlineSpecified(isInlineSpecified), IsExplicitSpecified(false),
-        IsVirtualAsWritten(false), IsPure(false),
+        DeclContext(DK), redeclarable_base(C), SClass(S),
+        IsInline(isInlineSpecified), IsInlineSpecified(isInlineSpecified),
+        IsExplicitSpecified(false), IsVirtualAsWritten(false), IsPure(false),
         HasInheritedPrototype(false), HasWrittenPrototype(true),
-        IsDeleted(false), IsTrivial(false), IsDefaulted(false),
+        IsDeleted(false), IsTrivial(false), IsTrivialForCall(false),
+        IsDefaulted(false),
         IsExplicitlyDefaulted(false), HasImplicitReturnZero(false),
         IsLateTemplateParsed(false), IsConstexpr(isConstexprSpecified),
-        InstantiationIsPending(false),
-        UsesSEHTry(false), HasSkippedBody(false), WillHaveBody(false),
-        EndRangeLoc(NameInfo.getEndLoc()), TemplateOrSpecialization(),
-        DNLoc(NameInfo.getInfo()) {}
+        InstantiationIsPending(false), UsesSEHTry(false), HasSkippedBody(false),
+        WillHaveBody(false), IsMultiVersion(false),
+        IsCopyDeductionCandidate(false), HasODRHash(false), ODRHash(0),
+        EndRangeLoc(NameInfo.getEndLoc()), DNLoc(NameInfo.getInfo()) {}
 
-  typedef Redeclarable<FunctionDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<FunctionDecl>;
+
   FunctionDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   FunctionDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   FunctionDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
 
 public:
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -1827,11 +1925,25 @@
 
   SourceRange getSourceRange() const override LLVM_READONLY;
 
-  /// \brief Returns true if the function has a body (definition). The
-  /// function body might be in any of the (re-)declarations of this
-  /// function. The variant that accepts a FunctionDecl pointer will
-  /// set that function declaration to the actual declaration
-  /// containing the body (if there is one).
+  // Function definitions.
+  //
+  // A function declaration may be:
+  // - a non defining declaration,
+  // - a definition. A function may be defined because:
+  //   - it has a body, or will have it in the case of late parsing.
+  //   - it has an uninstantiated body. The body does not exist because the
+  //     function is not used yet, but the declaration is considered a
+  //     definition and does not allow other definition of this function.
+  //   - it does not have a user specified body, but it does not allow
+  //     redefinition, because it is deleted/defaulted or is defined through
+  //     some other mechanism (alias, ifunc).
+
+  /// Returns true if the function has a body.
+  ///
+  /// The function body might be in any of the (re-)declarations of this
+  /// function. The variant that accepts a FunctionDecl pointer will set that
+  /// function declaration to the actual declaration containing the body (if
+  /// there is one).
   bool hasBody(const FunctionDecl *&Definition) const;
 
   bool hasBody() const override {
@@ -1839,13 +1951,15 @@
     return hasBody(Definition);
   }
 
-  /// hasTrivialBody - Returns whether the function has a trivial body that does
-  /// not require any specific codegen.
+  /// Returns whether the function has a trivial body that does not require any
+  /// specific codegen.
   bool hasTrivialBody() const;
 
-  /// isDefined - Returns true if the function is defined at all, including
-  /// a deleted definition. Except for the behavior when the function is
-  /// deleted, behaves like hasBody.
+  /// Returns true if the function has a definition that does not need to be
+  /// instantiated.
+  ///
+  /// The variant that accepts a FunctionDecl pointer will set that function
+  /// declaration to the declaration that is a definition (if there is one).
   bool isDefined(const FunctionDecl *&Definition) const;
 
   virtual bool isDefined() const {
@@ -1864,11 +1978,10 @@
     return const_cast<FunctionDecl *>(this)->getDefinition();
   }
 
-  /// getBody - Retrieve the body (definition) of the function. The
-  /// function body might be in any of the (re-)declarations of this
-  /// function. The variant that accepts a FunctionDecl pointer will
-  /// set that function declaration to the actual declaration
-  /// containing the body (if there is one).
+  /// Retrieve the body (definition) of the function. The function body might be
+  /// in any of the (re-)declarations of this function. The variant that accepts
+  /// a FunctionDecl pointer will set that function declaration to the actual
+  /// declaration containing the body (if there is one).
   /// NOTE: For checking if there is a body, use hasBody() instead, to avoid
   /// unnecessary AST de-serialization of the body.
   Stmt *getBody(const FunctionDecl *&Definition) const;
@@ -1883,15 +1996,12 @@
   ///
   /// This does not determine whether the function has been defined (e.g., in a
   /// previous definition); for that information, use isDefined.
-  ///
   bool isThisDeclarationADefinition() const {
-    return IsDeleted || IsDefaulted || Body || IsLateTemplateParsed ||
-      WillHaveBody || hasDefiningAttr();
+    return IsDeleted || IsDefaulted || Body || HasSkippedBody ||
+           IsLateTemplateParsed || WillHaveBody || hasDefiningAttr();
   }
 
-  /// doesThisDeclarationHaveABody - Returns whether this specific
-  /// declaration of the function has a body - that is, if it is a non-
-  /// deleted definition.
+  /// Returns whether this specific declaration of the function has a body.
   bool doesThisDeclarationHaveABody() const {
     return Body || IsLateTemplateParsed;
   }
@@ -1922,6 +2032,9 @@
   bool isTrivial() const { return IsTrivial; }
   void setTrivial(bool IT) { IsTrivial = IT; }
 
+  bool isTrivialForCall() const { return IsTrivialForCall; }
+  void setTrivialForCall(bool IT) { IsTrivialForCall = IT; }
+
   /// Whether this function is defaulted per C++0x. Only valid for
   /// special member functions.
   bool isDefaulted() const { return IsDefaulted; }
@@ -2069,6 +2182,15 @@
   bool willHaveBody() const { return WillHaveBody; }
   void setWillHaveBody(bool V = true) { WillHaveBody = V; }
 
+  /// True if this function is considered a multiversioned function.
+  bool isMultiVersion() const { return getCanonicalDecl()->IsMultiVersion; }
+
+  /// Sets the multiversion state for this declaration and all of its
+  /// redeclarations.
+  void setIsMultiVersion(bool V = true) {
+    getCanonicalDecl()->IsMultiVersion = V;
+  }
+
   void setPreviousDeclaration(FunctionDecl * PrevDecl);
 
   FunctionDecl *getCanonicalDecl() override;
@@ -2087,8 +2209,9 @@
   }
 
   // Iterator access to formal parameters.
-  typedef MutableArrayRef<ParmVarDecl *>::iterator param_iterator;
-  typedef ArrayRef<ParmVarDecl *>::const_iterator param_const_iterator;
+  using param_iterator = MutableArrayRef<ParmVarDecl *>::iterator;
+  using param_const_iterator = ArrayRef<ParmVarDecl *>::const_iterator;
+
   bool param_empty() const { return parameters().empty(); }
   param_iterator param_begin() { return parameters().begin(); }
   param_iterator param_end() { return parameters().end(); }
@@ -2096,9 +2219,9 @@
   param_const_iterator param_end() const { return parameters().end(); }
   size_t param_size() const { return parameters().size(); }
 
-  /// getNumParams - Return the number of parameters this function must have
-  /// based on its FunctionType.  This is the length of the ParamInfo array
-  /// after it has been created.
+  /// Return the number of parameters this function must have based on its
+  /// FunctionType.  This is the length of the ParamInfo array after it has been
+  /// created.
   unsigned getNumParams() const;
 
   const ParmVarDecl *getParamDecl(unsigned i) const {
@@ -2113,10 +2236,9 @@
     setParams(getASTContext(), NewParamInfo);
   }
 
-  /// getMinRequiredArguments - Returns the minimum number of arguments
-  /// needed to call this function. This may be fewer than the number of
-  /// function parameters, if some of the parameters have default
-  /// arguments (in C++).
+  /// Returns the minimum number of arguments needed to call this function. This
+  /// may be fewer than the number of function parameters, if some of the
+  /// parameters have default arguments (in C++).
   unsigned getMinRequiredArguments() const;
 
   QualType getReturnType() const {
@@ -2177,8 +2299,8 @@
 
   bool doesDeclarationForceExternallyVisibleDefinition() const;
 
-  /// isOverloadedOperator - Whether this function declaration
-  /// represents an C++ overloaded operator, e.g., "operator+".
+  /// Whether this function declaration represents an C++ overloaded
+  /// operator, e.g., "operator+".
   bool isOverloadedOperator() const {
     return getOverloadedOperator() != OO_None;
   }
@@ -2360,6 +2482,10 @@
   /// returns 0.
   unsigned getMemoryFunctionKind() const;
 
+  /// \brief Returns ODRHash of the function.  This value is calculated and
+  /// stored on first call, then the stored value returned on the other calls.
+  unsigned getODRHash();
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) {
@@ -2371,14 +2497,9 @@
   static FunctionDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<FunctionDecl *>(const_cast<DeclContext*>(DC));
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
-
-/// FieldDecl - An instance of this class is created by Sema::ActOnField to
-/// represent a member of a struct/union/class.
+/// Represents a member of a struct/union/class.
 class FieldDecl : public DeclaratorDecl, public Mergeable<FieldDecl> {
   unsigned BitField : 1;
   unsigned Mutable : 1;
@@ -2438,6 +2559,9 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static FieldDecl *Create(const ASTContext &C, DeclContext *DC,
                            SourceLocation StartLoc, SourceLocation IdLoc,
                            IdentifierInfo *Id, QualType T,
@@ -2446,20 +2570,20 @@
 
   static FieldDecl *CreateDeserialized(ASTContext &C, unsigned ID);
   
-  /// getFieldIndex - Returns the index of this field within its record,
+  /// Returns the index of this field within its record,
   /// as appropriate for passing to ASTRecordLayout::getFieldOffset.
   unsigned getFieldIndex() const;
 
-  /// isMutable - Determines whether this field is mutable (C++ only).
+  /// Determines whether this field is mutable (C++ only).
   bool isMutable() const { return Mutable; }
 
-  /// \brief Determines whether this field is a bitfield.
+  /// Determines whether this field is a bitfield.
   bool isBitField() const { return BitField; }
 
-  /// @brief Determines whether this is an unnamed bitfield.
+  /// Determines whether this is an unnamed bitfield.
   bool isUnnamedBitfield() const { return isBitField() && !getDeclName(); }
 
-  /// isAnonymousStructOrUnion - Determines whether this field is a
+  /// Determines whether this field is a
   /// representative for an anonymous struct or union. Such fields are
   /// unnamed and are implicitly generated by the implementation to
   /// store the data for the anonymous union or struct.
@@ -2473,9 +2597,10 @@
       return static_cast<InitAndBitWidth*>(Ptr)->BitWidth;
     return static_cast<Expr*>(Ptr);
   }
+
   unsigned getBitWidthValue(const ASTContext &Ctx) const;
 
-  /// setBitWidth - Set the bit-field width for this member.
+  /// Set the bit-field width for this member.
   // Note: used by some clients (i.e., do not remove it).
   void setBitWidth(Expr *Width) {
     assert(!hasCapturedVLAType() && !BitField &&
@@ -2489,7 +2614,7 @@
     BitField = true;
   }
 
-  /// removeBitWidth - Remove the bit-field width from this member.
+  /// Remove the bit-field width from this member.
   // Note: used by some clients (i.e., do not remove it).
   void removeBitWidth() {
     assert(isBitField() && "no bitfield width to remove");
@@ -2521,8 +2646,7 @@
     return static_cast<Expr*>(Ptr);
   }
 
-  /// setInClassInitializer - Set the C++11 in-class initializer for this
-  /// member.
+  /// Set the C++11 in-class initializer for this member.
   void setInClassInitializer(Expr *Init) {
     assert(hasInClassInitializer() && !getInClassInitializer());
     if (BitField)
@@ -2531,8 +2655,7 @@
       InitStorage.setPointer(Init);
   }
 
-  /// removeInClassInitializer - Remove the C++11 in-class initializer from this
-  /// member.
+  /// Remove the C++11 in-class initializer from this member.
   void removeInClassInitializer() {
     assert(hasInClassInitializer() && "no initializer to remove");
     InitStorage.setPointerAndInt(getBitWidth(), ISK_NoInit);
@@ -2550,10 +2673,11 @@
                                       InitStorage.getPointer())
                                 : nullptr;
   }
+
   /// \brief Set the captured variable length array type for this field.
   void setCapturedVLAType(const VariableArrayType *VLAType);
 
-  /// getParent - Returns the parent of this field declaration, which
+  /// Returns the parent of this field declaration, which
   /// is the struct in which this field is defined.
   const RecordDecl *getParent() const {
     return cast<RecordDecl>(getDeclContext());
@@ -2572,18 +2696,16 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K >= firstField && K <= lastField; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
-/// EnumConstantDecl - An instance of this object exists for each enum constant
+/// An instance of this object exists for each enum constant
 /// that is defined.  For example, in "enum X {a,b}", each of a/b are
 /// EnumConstantDecl's, X is an instance of EnumDecl, and the type of a/b is a
 /// TagType for the X EnumDecl.
 class EnumConstantDecl : public ValueDecl, public Mergeable<EnumConstantDecl> {
   Stmt *Init; // an integer constant expression
   llvm::APSInt Val; // The value.
+
 protected:
   EnumConstantDecl(DeclContext *DC, SourceLocation L,
                    IdentifierInfo *Id, QualType T, Expr *E,
@@ -2591,6 +2713,7 @@
     : ValueDecl(EnumConstant, DC, L, Id, T), Init((Stmt*)E), Val(V) {}
 
 public:
+  friend class StmtIteratorBase;
 
   static EnumConstantDecl *Create(ASTContext &C, EnumDecl *DC,
                                   SourceLocation L, IdentifierInfo *Id,
@@ -2614,16 +2737,12 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == EnumConstant; }
-
-  friend class StmtIteratorBase;
 };
 
-/// IndirectFieldDecl - An instance of this class is created to represent a
-/// field injected from an anonymous union/struct into the parent scope.
-/// IndirectFieldDecl are always implicit.
+/// Represents a field injected from an anonymous union/struct into the parent
+/// scope. These are always implicit.
 class IndirectFieldDecl : public ValueDecl,
                           public Mergeable<IndirectFieldDecl> {
-  void anchor() override;
   NamedDecl **Chaining;
   unsigned ChainingSize;
 
@@ -2631,14 +2750,18 @@
                     DeclarationName N, QualType T,
                     MutableArrayRef<NamedDecl *> CH);
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+
   static IndirectFieldDecl *Create(ASTContext &C, DeclContext *DC,
                                    SourceLocation L, IdentifierInfo *Id,
                                    QualType T, llvm::MutableArrayRef<NamedDecl *> CH);
 
   static IndirectFieldDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
-  typedef ArrayRef<NamedDecl *>::const_iterator chain_iterator;
+  using chain_iterator = ArrayRef<NamedDecl *>::const_iterator;
 
   ArrayRef<NamedDecl *> chain() const {
     return llvm::makeArrayRef(Chaining, ChainingSize);
@@ -2664,26 +2787,27 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == IndirectField; }
-  friend class ASTDeclReader;
 };
 
-/// TypeDecl - Represents a declaration of a type.
-///
+/// Represents a declaration of a type.
 class TypeDecl : public NamedDecl {
-  void anchor() override;
-  /// TypeForDecl - This indicates the Type object that represents
+  friend class ASTContext;
+
+  /// This indicates the Type object that represents
   /// this TypeDecl.  It is a cache maintained by
   /// ASTContext::getTypedefType, ASTContext::getTagDeclType, and
   /// ASTContext::getTemplateTypeParmType, and TemplateTypeParmDecl.
-  mutable const Type *TypeForDecl;
-  /// LocStart - The start of the source range for this declaration.
+  mutable const Type *TypeForDecl = nullptr;
+
+  /// The start of the source range for this declaration.
   SourceLocation LocStart;
-  friend class ASTContext;
+
+  void anchor() override;
 
 protected:
   TypeDecl(Kind DK, DeclContext *DC, SourceLocation L, IdentifierInfo *Id,
            SourceLocation StartL = SourceLocation())
-    : NamedDecl(DK, DC, L, Id), TypeForDecl(nullptr), LocStart(StartL) {}
+    : NamedDecl(DK, DC, L, Id), LocStart(StartL) {}
 
 public:
   // Low-level accessor. If you just want the type defined by this node,
@@ -2707,39 +2831,46 @@
   static bool classofKind(Kind K) { return K >= firstType && K <= lastType; }
 };
 
-
 /// Base class for declarations which introduce a typedef-name.
 class TypedefNameDecl : public TypeDecl, public Redeclarable<TypedefNameDecl> {
-  void anchor() override;
-  typedef std::pair<TypeSourceInfo*, QualType> ModedTInfo;
-  llvm::PointerUnion<TypeSourceInfo*, ModedTInfo*> MaybeModedTInfo;
+  struct LLVM_ALIGNAS(8) ModedTInfo {
+    TypeSourceInfo *first;
+    QualType second;
+  };
 
-  // FIXME: This can be packed into the bitfields in Decl.
-  /// If 0, we have not computed IsTransparentTag.
-  /// Otherwise, IsTransparentTag is (CacheIsTransparentTag >> 1).
-  mutable unsigned CacheIsTransparentTag : 2;
+  /// If int part is 0, we have not computed IsTransparentTag.
+  /// Otherwise, IsTransparentTag is (getInt() >> 1).
+  mutable llvm::PointerIntPair<
+      llvm::PointerUnion<TypeSourceInfo *, ModedTInfo *>, 2>
+      MaybeModedTInfo;
+
+  void anchor() override;
 
 protected:
   TypedefNameDecl(Kind DK, ASTContext &C, DeclContext *DC,
                   SourceLocation StartLoc, SourceLocation IdLoc,
                   IdentifierInfo *Id, TypeSourceInfo *TInfo)
       : TypeDecl(DK, DC, IdLoc, Id, StartLoc), redeclarable_base(C),
-        MaybeModedTInfo(TInfo), CacheIsTransparentTag(0) {}
+        MaybeModedTInfo(TInfo, 0) {}
 
-  typedef Redeclarable<TypedefNameDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<TypedefNameDecl>;
+
   TypedefNameDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   TypedefNameDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   TypedefNameDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
 
 public:
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -2747,23 +2878,29 @@
   using redeclarable_base::getMostRecentDecl;
   using redeclarable_base::isFirstDecl;
 
-  bool isModed() const { return MaybeModedTInfo.is<ModedTInfo*>(); }
+  bool isModed() const {
+    return MaybeModedTInfo.getPointer().is<ModedTInfo *>();
+  }
 
   TypeSourceInfo *getTypeSourceInfo() const {
-    return isModed()
-      ? MaybeModedTInfo.get<ModedTInfo*>()->first
-      : MaybeModedTInfo.get<TypeSourceInfo*>();
+    return isModed() ? MaybeModedTInfo.getPointer().get<ModedTInfo *>()->first
+                     : MaybeModedTInfo.getPointer().get<TypeSourceInfo *>();
   }
+
   QualType getUnderlyingType() const {
-    return isModed()
-      ? MaybeModedTInfo.get<ModedTInfo*>()->second
-      : MaybeModedTInfo.get<TypeSourceInfo*>()->getType();
+    return isModed() ? MaybeModedTInfo.getPointer().get<ModedTInfo *>()->second
+                     : MaybeModedTInfo.getPointer()
+                           .get<TypeSourceInfo *>()
+                           ->getType();
   }
+
   void setTypeSourceInfo(TypeSourceInfo *newType) {
-    MaybeModedTInfo = newType;
+    MaybeModedTInfo.setPointer(newType);
   }
+
   void setModedTypeSourceInfo(TypeSourceInfo *unmodedTSI, QualType modedTy) {
-    MaybeModedTInfo = new (getASTContext()) ModedTInfo(unmodedTSI, modedTy);
+    MaybeModedTInfo.setPointer(new (getASTContext(), 8)
+                                   ModedTInfo({unmodedTSI, modedTy}));
   }
 
   /// Retrieves the canonical declaration of this typedef-name.
@@ -2780,8 +2917,8 @@
   /// Determines if this typedef shares a name and spelling location with its
   /// underlying tag type, as is the case with the NS_ENUM macro.
   bool isTransparentTag() const {
-    if (CacheIsTransparentTag)
-      return CacheIsTransparentTag & 0x2;
+    if (MaybeModedTInfo.getInt())
+      return MaybeModedTInfo.getInt() & 0x2;
     return isTransparentTagSlow();
   }
 
@@ -2795,7 +2932,7 @@
   bool isTransparentTagSlow() const;
 };
 
-/// TypedefDecl - Represents the declaration of a typedef-name via the 'typedef'
+/// Represents the declaration of a typedef-name via the 'typedef'
 /// type specifier.
 class TypedefDecl : public TypedefNameDecl {
   TypedefDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
@@ -2815,7 +2952,7 @@
   static bool classofKind(Kind K) { return K == Typedef; }
 };
 
-/// TypeAliasDecl - Represents the declaration of a typedef-name via a C++0x
+/// Represents the declaration of a typedef-name via a C++11
 /// alias-declaration.
 class TypeAliasDecl : public TypedefNameDecl {
   /// The template for which this is the pattern, if any.
@@ -2842,34 +2979,33 @@
   static bool classofKind(Kind K) { return K == TypeAlias; }
 };
 
-/// TagDecl - Represents the declaration of a struct/union/class/enum.
+/// Represents the declaration of a struct/union/class/enum.
 class TagDecl
   : public TypeDecl, public DeclContext, public Redeclarable<TagDecl> {
 public:
   // This is really ugly.
-  typedef TagTypeKind TagKind;
+  using TagKind = TagTypeKind;
 
 private:
   // FIXME: This can be packed into the bitfields in Decl.
-  /// TagDeclKind - The TagKind enum.
+  /// The TagKind enum.
   unsigned TagDeclKind : 3;
 
-  /// IsCompleteDefinition - True if this is a definition ("struct foo
-  /// {};"), false if it is a declaration ("struct foo;").  It is not
-  /// a definition until the definition has been fully processed.
+  /// True if this is a definition ("struct foo {};"), false if it is a
+  /// declaration ("struct foo;").  It is not considered a definition
+  /// until the definition has been fully processed.
   unsigned IsCompleteDefinition : 1;
 
 protected:
-  /// IsBeingDefined - True if this is currently being defined.
+  /// True if this is currently being defined.
   unsigned IsBeingDefined : 1;
 
 private:
-  /// IsEmbeddedInDeclarator - True if this tag declaration is
-  /// "embedded" (i.e., defined or declared for the very first time)
-  /// in the syntax of a declarator.
+  /// True if this tag declaration is "embedded" (i.e., defined or declared
+  /// for the very first time) in the syntax of a declarator.
   unsigned IsEmbeddedInDeclarator : 1;
 
-  /// \brief True if this tag is free standing, e.g. "struct foo;".
+  /// True if this tag is free standing, e.g. "struct foo;".
   unsigned IsFreeStanding : 1;
 
 protected:
@@ -2877,20 +3013,21 @@
   unsigned NumPositiveBits : 8;
   unsigned NumNegativeBits : 8;
 
-  /// IsScoped - True if this tag declaration is a scoped enumeration. Only
+  /// True if this tag declaration is a scoped enumeration. Only
   /// possible in C++11 mode.
   unsigned IsScoped : 1;
-  /// IsScopedUsingClassTag - If this tag declaration is a scoped enum,
+
+  /// If this tag declaration is a scoped enum,
   /// then this is true if the scoped enum was declared using the class
   /// tag, false if it was declared with the struct tag. No meaning is
   /// associated if this tag declaration is not a scoped enum.
   unsigned IsScopedUsingClassTag : 1;
 
-  /// IsFixed - True if this is an enumeration with fixed underlying type. Only
+  /// True if this is an enumeration with fixed underlying type. Only
   /// possible in C++11, Microsoft extensions, or Objective C mode.
   unsigned IsFixed : 1;
 
-  /// \brief Indicates whether it is possible for declarations of this kind
+  /// Indicates whether it is possible for declarations of this kind
   /// to have an out-of-date definition.
   ///
   /// This option is only enabled when modules are enabled.
@@ -2899,12 +3036,13 @@
   /// Has the full definition of this type been required by a use somewhere in
   /// the TU.
   unsigned IsCompleteDefinitionRequired : 1;
+
 private:
   SourceRange BraceRange;
 
   // A struct representing syntactic qualifier info,
   // to be used for the (uncommon) case of out-of-line declarations.
-  typedef QualifierInfo ExtInfo;
+  using ExtInfo = QualifierInfo;
 
   /// \brief If the (out-of-line) tag declaration name
   /// is qualified, it points to the qualifier info (nns and range);
@@ -2936,13 +3074,16 @@
     setPreviousDecl(PrevDecl);
   }
 
-  typedef Redeclarable<TagDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<TagDecl>;
+
   TagDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   TagDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   TagDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
@@ -2953,8 +3094,12 @@
   void completeDefinition();
 
 public:
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -2965,11 +3110,11 @@
   SourceRange getBraceRange() const { return BraceRange; }
   void setBraceRange(SourceRange R) { BraceRange = R; }
 
-  /// getInnerLocStart - Return SourceLocation representing start of source
+  /// Return SourceLocation representing start of source
   /// range ignoring outer template declarations.
   SourceLocation getInnerLocStart() const { return getLocStart(); }
 
-  /// getOuterLocStart - Return SourceLocation representing start of source
+  /// Return SourceLocation representing start of source
   /// range taking into account any outer template declarations.
   SourceLocation getOuterLocStart() const;
   SourceRange getSourceRange() const override LLVM_READONLY;
@@ -2979,14 +3124,13 @@
     return const_cast<TagDecl*>(this)->getCanonicalDecl();
   }
 
-  /// isThisDeclarationADefinition() - Return true if this declaration
-  /// is a completion definition of the type.  Provided for consistency.
+  /// Return true if this declaration is a completion definition of the type.
+  /// Provided for consistency.
   bool isThisDeclarationADefinition() const {
     return isCompleteDefinition();
   }
 
-  /// isCompleteDefinition - Return true if this decl has its body
-  /// fully specified.
+  /// Return true if this decl has its body fully specified.
   bool isCompleteDefinition() const {
     return IsCompleteDefinition;
   }
@@ -2997,7 +3141,7 @@
     return IsCompleteDefinitionRequired;
   }
 
-  /// isBeingDefined - Return true if this decl is currently being defined.
+  /// Return true if this decl is currently being defined.
   bool isBeingDefined() const {
     return IsBeingDefined;
   }
@@ -3019,14 +3163,14 @@
   /// parameters.
   bool isDependentType() const { return isDependentContext(); }
 
-  /// @brief Starts the definition of this tag declaration.
+  /// Starts the definition of this tag declaration.
   ///
   /// This method should be invoked at the beginning of the definition
   /// of this tag declaration. It will set the tag type into a state
   /// where it is in the process of being defined.
   void startDefinition();
 
-  /// getDefinition - Returns the TagDecl that actually defines this
+  /// Returns the TagDecl that actually defines this
   ///  struct/union/class/enum.  When determining whether or not a
   ///  struct/union/class/enum has a definition, one should use this
   ///  method as opposed to 'isDefinition'.  'isDefinition' indicates
@@ -3104,10 +3248,12 @@
   unsigned getNumTemplateParameterLists() const {
     return hasExtInfo() ? getExtInfo()->NumTemplParamLists : 0;
   }
+
   TemplateParameterList *getTemplateParameterList(unsigned i) const {
     assert(i < getNumTemplateParameterLists());
     return getExtInfo()->TemplParamLists[i];
   }
+
   void setTemplateParameterListsInfo(ASTContext &Context,
                                      ArrayRef<TemplateParameterList *> TPLists);
 
@@ -3118,20 +3264,17 @@
   static DeclContext *castToDeclContext(const TagDecl *D) {
     return static_cast<DeclContext *>(const_cast<TagDecl*>(D));
   }
+
   static TagDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<TagDecl *>(const_cast<DeclContext*>(DC));
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
-/// EnumDecl - Represents an enum.  In C++11, enums can be forward-declared
+/// Represents an enum.  In C++11, enums can be forward-declared
 /// with a fixed underlying type, and in C we allow them to be forward-declared
 /// with no underlying type as an extension.
 class EnumDecl : public TagDecl {
-  void anchor() override;
-  /// IntegerType - This represent the integer type that the enum corresponds
+  /// This represent the integer type that the enum corresponds
   /// to for code generation purposes.  Note that the enumerator constants may
   /// have a different type than this does.
   ///
@@ -3145,10 +3288,9 @@
   /// The underlying type of an enumeration never has any qualifiers, so
   /// we can get away with just storing a raw Type*, and thus save an
   /// extra pointer when TypeSourceInfo is needed.
+  llvm::PointerUnion<const Type *, TypeSourceInfo *> IntegerType;
 
-  llvm::PointerUnion<const Type*, TypeSourceInfo*> IntegerType;
-
-  /// PromotionType - The integer type that values of this type should
+  /// The integer type that values of this type should
   /// promote to.  In C, enumerators are generally of an integer type
   /// directly, but gcc-style large enumerators (and all enumerators
   /// in C++) are of the enum type instead.
@@ -3157,13 +3299,12 @@
   /// \brief If this enumeration is an instantiation of a member enumeration
   /// of a class template specialization, this is the member specialization
   /// information.
-  MemberSpecializationInfo *SpecializationInfo;
+  MemberSpecializationInfo *SpecializationInfo = nullptr;
 
   EnumDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
            SourceLocation IdLoc, IdentifierInfo *Id, EnumDecl *PrevDecl,
            bool Scoped, bool ScopedUsingClassTag, bool Fixed)
-      : TagDecl(Enum, TTK_Enum, C, DC, IdLoc, Id, PrevDecl, StartLoc),
-        SpecializationInfo(nullptr) {
+      : TagDecl(Enum, TTK_Enum, C, DC, IdLoc, Id, PrevDecl, StartLoc) {
     assert(Scoped || !ScopedUsingClassTag);
     IntegerType = (const Type *)nullptr;
     NumNegativeBits = 0;
@@ -3173,9 +3314,13 @@
     IsFixed = Fixed;
   }
 
+  void anchor() override;
+
   void setInstantiationOfMemberEnum(ASTContext &C, EnumDecl *ED,
                                     TemplateSpecializationKind TSK);
 public:
+  friend class ASTDeclReader;
+
   EnumDecl *getCanonicalDecl() override {
     return cast<EnumDecl>(TagDecl::getCanonicalDecl());
   }
@@ -3209,9 +3354,9 @@
                           bool IsFixed);
   static EnumDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
-  /// completeDefinition - When created, the EnumDecl corresponds to a
+  /// When created, the EnumDecl corresponds to a
   /// forward-declared enum. This method is used to mark the
-  /// declaration as being defined; it's enumerators have already been
+  /// declaration as being defined; its enumerators have already been
   /// added (via DeclContext::addDecl). NewType is the new underlying
   /// type of the enumeration type.
   void completeDefinition(QualType NewType,
@@ -3219,11 +3364,10 @@
                           unsigned NumPositiveBits,
                           unsigned NumNegativeBits);
 
-  // enumerator_iterator - Iterates through the enumerators of this
-  // enumeration.
-  typedef specific_decl_iterator<EnumConstantDecl> enumerator_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<EnumConstantDecl>>
-    enumerator_range;
+  // Iterates through the enumerators of this enumeration.
+  using enumerator_iterator = specific_decl_iterator<EnumConstantDecl>;
+  using enumerator_range =
+      llvm::iterator_range<specific_decl_iterator<EnumConstantDecl>>;
 
   enumerator_range enumerators() const {
     return enumerator_range(enumerator_begin(), enumerator_end());
@@ -3243,14 +3387,13 @@
     return enumerator_iterator(E->decls_end());
   }
 
-  /// getPromotionType - Return the integer type that enumerators
-  /// should promote to.
+  /// Return the integer type that enumerators should promote to.
   QualType getPromotionType() const { return PromotionType; }
 
-  /// \brief Set the promotion type.
+  /// Set the promotion type.
   void setPromotionType(QualType T) { PromotionType = T; }
 
-  /// getIntegerType - Return the integer type this enum decl corresponds to.
+  /// Return the integer type this enum decl corresponds to.
   /// This returns a null QualType for an enum forward definition with no fixed
   /// underlying type.
   QualType getIntegerType() const {
@@ -3321,7 +3464,9 @@
 
   /// \brief Returns true if this can be considered a complete type.
   bool isComplete() const {
-    return isCompleteDefinition() || isFixed();
+    // IntegerType is set for fixed type enums and non-fixed but implicitly
+    // int-sized Microsoft enums.
+    return isCompleteDefinition() || IntegerType;
   }
 
   /// Returns true if this enum is either annotated with
@@ -3371,41 +3516,42 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Enum; }
-
-  friend class ASTDeclReader;
 };
 
-
-/// RecordDecl - Represents a struct/union/class.  For example:
+/// Represents a struct/union/class.  For example:
 ///   struct X;                  // Forward declaration, no "body".
 ///   union Y { int A, B; };     // Has body with members A and B (FieldDecls).
 /// This decl will be marked invalid if *any* members are invalid.
-///
 class RecordDecl : public TagDecl {
+  friend class DeclContext;
+
   // FIXME: This can be packed into the bitfields in Decl.
-  /// HasFlexibleArrayMember - This is true if this struct ends with a flexible
+  /// This is true if this struct ends with a flexible
   /// array member (e.g. int X[]) or if this union contains a struct that does.
   /// If so, this cannot be contained in arrays or other structs as a member.
   bool HasFlexibleArrayMember : 1;
 
-  /// AnonymousStructOrUnion - Whether this is the type of an anonymous struct
-  /// or union.
+  /// Whether this is the type of an anonymous struct or union.
   bool AnonymousStructOrUnion : 1;
 
-  /// HasObjectMember - This is true if this struct has at least one member
+  /// This is true if this struct has at least one member
   /// containing an Objective-C object pointer type.
   bool HasObjectMember : 1;
-  
-  /// HasVolatileMember - This is true if struct has at least one member of
+
+  /// This is true if struct has at least one member of
   /// 'volatile' type.
   bool HasVolatileMember : 1;
 
-  /// \brief Whether the field declarations of this record have been loaded
+  /// Whether the field declarations of this record have been loaded
   /// from external storage. To avoid unnecessary deserialization of
   /// methods/nested types we allow deserialization of just the fields
   /// when needed.
   mutable bool LoadedFieldsFromExternalStorage : 1;
-  friend class DeclContext;
+
+  /// Basic properties of non-trivial C structs.
+  bool NonTrivialToPrimitiveDefaultInitialize : 1;
+  bool NonTrivialToPrimitiveCopy : 1;
+  bool NonTrivialToPrimitiveDestroy : 1;
 
 protected:
   RecordDecl(Kind DK, TagKind TK, const ASTContext &C, DeclContext *DC,
@@ -3436,10 +3582,9 @@
   bool hasFlexibleArrayMember() const { return HasFlexibleArrayMember; }
   void setHasFlexibleArrayMember(bool V) { HasFlexibleArrayMember = V; }
 
-  /// isAnonymousStructOrUnion - Whether this is an anonymous struct
-  /// or union. To be an anonymous struct or union, it must have been
-  /// declared without a name and there must be no objects of this
-  /// type declared, e.g.,
+  /// Whether this is an anonymous struct or union. To be an anonymous
+  /// struct or union, it must have been declared without a name and
+  /// there must be no objects of this type declared, e.g.,
   /// @code
   ///   union { int i; float f; };
   /// @endcode
@@ -3466,6 +3611,31 @@
     LoadedFieldsFromExternalStorage = val;
   }
 
+  /// Functions to query basic properties of non-trivial C structs.
+  bool isNonTrivialToPrimitiveDefaultInitialize() const {
+    return NonTrivialToPrimitiveDefaultInitialize;
+  }
+
+  void setNonTrivialToPrimitiveDefaultInitialize() {
+    NonTrivialToPrimitiveDefaultInitialize = true;
+  }
+
+  bool isNonTrivialToPrimitiveCopy() const {
+    return NonTrivialToPrimitiveCopy;
+  }
+
+  void setNonTrivialToPrimitiveCopy() {
+    NonTrivialToPrimitiveCopy = true;
+  }
+
+  bool isNonTrivialToPrimitiveDestroy() const {
+    return NonTrivialToPrimitiveDestroy;
+  }
+
+  void setNonTrivialToPrimitiveDestroy() {
+    NonTrivialToPrimitiveDestroy = true;
+  }
+
   /// \brief Determines whether this declaration represents the
   /// injected class name.
   ///
@@ -3488,11 +3658,12 @@
   /// \brief Determine whether this record is a record for captured variables in
   /// CapturedStmt construct.
   bool isCapturedRecord() const;
+
   /// \brief Mark the record as a record for captured variables in CapturedStmt
   /// construct.
   void setCapturedRecord();
 
-  /// getDefinition - Returns the RecordDecl that actually defines
+  /// Returns the RecordDecl that actually defines
   ///  this struct/union/class.  When determining whether or not a
   ///  struct/union/class is completely defined, one should use this
   ///  method as opposed to 'isCompleteDefinition'.
@@ -3507,8 +3678,8 @@
   // Iterator access to field members. The field iterator only visits
   // the non-static data members of this class, ignoring any static
   // data members, functions, constructors, destructors, etc.
-  typedef specific_decl_iterator<FieldDecl> field_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<FieldDecl>> field_range;
+  using field_iterator = specific_decl_iterator<FieldDecl>;
+  using field_range = llvm::iterator_range<specific_decl_iterator<FieldDecl>>;
 
   field_range fields() const { return field_range(field_begin(), field_end()); }
   field_iterator field_begin() const;
@@ -3517,14 +3688,12 @@
     return field_iterator(decl_iterator());
   }
 
-  // field_empty - Whether there are any fields (non-static data
-  // members) in this record.
+  // Whether there are any fields (non-static data members) in this record.
   bool field_empty() const {
     return field_begin() == field_end();
   }
 
-  /// completeDefinition - Notes that the definition of this type is
-  /// now complete.
+  /// Note that the definition of this type is now complete.
   virtual void completeDefinition();
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
@@ -3532,7 +3701,7 @@
     return K >= firstRecord && K <= lastRecord;
   }
 
-  /// isMsStrust - Get whether or not this is an ms_struct which can
+  /// \brief Get whether or not this is an ms_struct which can
   /// be turned on with an attribute, pragma, or -mms-bitfields
   /// commandline option.
   bool isMsStruct(const ASTContext &C) const;
@@ -3552,12 +3721,15 @@
 };
 
 class FileScopeAsmDecl : public Decl {
-  virtual void anchor();
   StringLiteral *AsmString;
   SourceLocation RParenLoc;
+
   FileScopeAsmDecl(DeclContext *DC, StringLiteral *asmstring,
                    SourceLocation StartL, SourceLocation EndL)
     : Decl(FileScopeAsm, DC, StartL), AsmString(asmstring), RParenLoc(EndL) {}
+
+  virtual void anchor();
+
 public:
   static FileScopeAsmDecl *Create(ASTContext &C, DeclContext *DC,
                                   StringLiteral *Str, SourceLocation AsmLoc,
@@ -3580,10 +3752,9 @@
   static bool classofKind(Kind K) { return K == FileScopeAsm; }
 };
 
-/// BlockDecl - This represents a block literal declaration, which is like an
+/// Pepresents a block literal declaration, which is like an
 /// unnamed FunctionDecl.  For example:
 /// ^{ statement-body }   or   ^(int arg1, float arg2){ statement-body }
-///
 class BlockDecl : public Decl, public DeclContext {
 public:
   /// A class which contains all the information about a particular
@@ -3630,29 +3801,31 @@
   bool CapturesCXXThis : 1;
   bool BlockMissingReturnType : 1;
   bool IsConversionFromLambda : 1;
-  /// ParamInfo - new[]'d array of pointers to ParmVarDecls for the formal
+
+  /// A bit that indicates this block is passed directly to a function as a
+  /// non-escaping parameter.
+  bool DoesNotEscape : 1;
+
+  /// A new[]'d array of pointers to ParmVarDecls for the formal
   /// parameters of this function.  This is null if a prototype or if there are
   /// no formals.
-  ParmVarDecl **ParamInfo;
-  unsigned NumParams;
+  ParmVarDecl **ParamInfo = nullptr;
+  unsigned NumParams = 0;
 
-  Stmt *Body;
-  TypeSourceInfo *SignatureAsWritten;
+  Stmt *Body = nullptr;
+  TypeSourceInfo *SignatureAsWritten = nullptr;
 
-  const Capture *Captures;
-  unsigned NumCaptures;
+  const Capture *Captures = nullptr;
+  unsigned NumCaptures = 0;
 
-  unsigned ManglingNumber;
-  Decl *ManglingContextDecl;
+  unsigned ManglingNumber = 0;
+  Decl *ManglingContextDecl = nullptr;
 
 protected:
   BlockDecl(DeclContext *DC, SourceLocation CaretLoc)
-    : Decl(Block, DC, CaretLoc), DeclContext(Block),
-      IsVariadic(false), CapturesCXXThis(false),
-      BlockMissingReturnType(true), IsConversionFromLambda(false),
-      ParamInfo(nullptr), NumParams(0), Body(nullptr),
-      SignatureAsWritten(nullptr), Captures(nullptr), NumCaptures(0),
-      ManglingNumber(0), ManglingContextDecl(nullptr) {}
+      : Decl(Block, DC, CaretLoc), DeclContext(Block), IsVariadic(false),
+        CapturesCXXThis(false), BlockMissingReturnType(true),
+        IsConversionFromLambda(false), DoesNotEscape(false) {}
 
 public:
   static BlockDecl *Create(ASTContext &C, DeclContext *DC, SourceLocation L); 
@@ -3679,8 +3852,9 @@
   }
 
   // Iterator access to formal parameters.
-  typedef MutableArrayRef<ParmVarDecl *>::iterator param_iterator;
-  typedef ArrayRef<ParmVarDecl *>::const_iterator param_const_iterator;
+  using param_iterator = MutableArrayRef<ParmVarDecl *>::iterator;
+  using param_const_iterator = ArrayRef<ParmVarDecl *>::const_iterator;
+
   bool param_empty() const { return parameters().empty(); }
   param_iterator param_begin() { return parameters().begin(); }
   param_iterator param_end() { return parameters().end(); }
@@ -3689,6 +3863,7 @@
   size_t param_size() const { return parameters().size(); }
 
   unsigned getNumParams() const { return NumParams; }
+
   const ParmVarDecl *getParamDecl(unsigned i) const {
     assert(i < getNumParams() && "Illegal param #");
     return ParamInfo[i];
@@ -3697,17 +3872,18 @@
     assert(i < getNumParams() && "Illegal param #");
     return ParamInfo[i];
   }
+
   void setParams(ArrayRef<ParmVarDecl *> NewParamInfo);
 
-  /// hasCaptures - True if this block (or its nested blocks) captures
+  /// True if this block (or its nested blocks) captures
   /// anything of local storage from its enclosing scopes.
   bool hasCaptures() const { return NumCaptures != 0 || CapturesCXXThis; }
 
-  /// getNumCaptures - Returns the number of captured variables.
+  /// Returns the number of captured variables.
   /// Does not include an entry for 'this'.
   unsigned getNumCaptures() const { return NumCaptures; }
 
-  typedef ArrayRef<Capture>::const_iterator capture_const_iterator;
+  using capture_const_iterator = ArrayRef<Capture>::const_iterator;
 
   ArrayRef<Capture> captures() const { return {Captures, NumCaptures}; }
 
@@ -3721,6 +3897,9 @@
   bool isConversionFromLambda() const { return IsConversionFromLambda; }
   void setIsConversionFromLambda(bool val) { IsConversionFromLambda = val; }
 
+  bool doesNotEscape() const { return DoesNotEscape; }
+  void setDoesNotEscape() { DoesNotEscape = true; }
+
   bool capturesVariable(const VarDecl *var) const;
 
   void setCaptures(ASTContext &Context, ArrayRef<Capture> Captures,
@@ -3729,6 +3908,7 @@
    unsigned getBlockManglingNumber() const {
      return ManglingNumber;
    }
+
    Decl *getBlockManglingContextDecl() const {
      return ManglingContextDecl;    
    }
@@ -3751,8 +3931,7 @@
   }
 };
 
-/// \brief This represents the body of a CapturedStmt, and serves as its
-/// DeclContext.
+/// Represents the body of a CapturedStmt, and serves as its DeclContext.
 class CapturedDecl final
     : public Decl,
       public DeclContext,
@@ -3765,8 +3944,10 @@
 private:
   /// \brief The number of parameters to the outlined function.
   unsigned NumParams;
+
   /// \brief The position of context parameter in list of parameters.
   unsigned ContextParam;
+
   /// \brief The body of the outlined function.
   llvm::PointerIntPair<Stmt *, 1, bool> BodyAndNothrow;
 
@@ -3781,6 +3962,10 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend TrailingObjects;
+
   static CapturedDecl *Create(ASTContext &C, DeclContext *DC,
                               unsigned NumParams);
   static CapturedDecl *CreateDeserialized(ASTContext &C, unsigned ID,
@@ -3823,8 +4008,8 @@
   }
   unsigned getContextParamPosition() const { return ContextParam; }
 
-  typedef ImplicitParamDecl *const *param_iterator;
-  typedef llvm::iterator_range<param_iterator> param_range;
+  using param_iterator = ImplicitParamDecl *const *;
+  using param_range = llvm::iterator_range<param_iterator>;
 
   /// \brief Retrieve an iterator pointing to the first parameter decl.
   param_iterator param_begin() const { return getParams(); }
@@ -3840,10 +4025,6 @@
   static CapturedDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<CapturedDecl *>(const_cast<DeclContext *>(DC));
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend TrailingObjects;
 };
 
 /// \brief Describes a module import declaration, which makes the contents
@@ -3858,6 +4039,11 @@
 /// \#include/\#import directives.
 class ImportDecl final : public Decl,
                          llvm::TrailingObjects<ImportDecl, SourceLocation> {
+  friend class ASTContext;
+  friend class ASTDeclReader;
+  friend class ASTReader;
+  friend TrailingObjects;
+
   /// \brief The imported module, along with a bit that indicates whether
   /// we have source-location information for each identifier in the module
   /// name. 
@@ -3868,20 +4054,15 @@
   
   /// \brief The next import in the list of imports local to the translation
   /// unit being parsed (not loaded from an AST file).
-  ImportDecl *NextLocalImport;
+  ImportDecl *NextLocalImport = nullptr;
   
-  friend class ASTReader;
-  friend class ASTDeclReader;
-  friend class ASTContext;
-  friend TrailingObjects;
-
   ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported,
              ArrayRef<SourceLocation> IdentifierLocs);
 
   ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported,
              SourceLocation EndLoc);
 
-  ImportDecl(EmptyShell Empty) : Decl(Import, Empty), NextLocalImport() { }
+  ImportDecl(EmptyShell Empty) : Decl(Import, Empty) {}
   
 public:
   /// \brief Create a new module import declaration.
@@ -3923,15 +4104,16 @@
 /// \endcode
 class ExportDecl final : public Decl, public DeclContext {
   virtual void anchor();
+
 private:
+  friend class ASTDeclReader;
+
   /// \brief The source location for the right brace (if valid).
   SourceLocation RBraceLoc;
 
   ExportDecl(DeclContext *DC, SourceLocation ExportLoc)
-    : Decl(Export, DC, ExportLoc), DeclContext(Export),
-      RBraceLoc(SourceLocation()) { }
-
-  friend class ASTDeclReader;
+      : Decl(Export, DC, ExportLoc), DeclContext(Export),
+        RBraceLoc(SourceLocation()) {}
 
 public:
   static ExportDecl *Create(ASTContext &C, DeclContext *DC,
@@ -3964,11 +4146,11 @@
   }
 };
 
-/// \brief Represents an empty-declaration.
+/// Represents an empty-declaration.
 class EmptyDecl : public Decl {
+  EmptyDecl(DeclContext *DC, SourceLocation L) : Decl(Empty, DC, L) {}
+
   virtual void anchor();
-  EmptyDecl(DeclContext *DC, SourceLocation L)
-    : Decl(Empty, DC, L) { }
 
 public:
   static EmptyDecl *Create(ASTContext &C, DeclContext *DC,
@@ -4029,7 +4211,7 @@
 
 // Inline function definitions.
 
-/// \brief Check if the given decl is complete.
+/// Check if the given decl is complete.
 ///
 /// We use this function to break a cycle between the inline definitions in
 /// Type.h and Decl.h.
@@ -4037,7 +4219,7 @@
   return ED->isComplete();
 }
 
-/// \brief Check if the given decl is scoped.
+/// Check if the given decl is scoped.
 ///
 /// We use this function to break a cycle between the inline definitions in
 /// Type.h and Decl.h.
@@ -4045,6 +4227,6 @@
   return ED->isScoped();
 }
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DECL_H
diff --git a/include/clang/AST/DeclBase.h b/include/clang/AST/DeclBase.h
index 47515a8..908894c 100644
--- a/include/clang/AST/DeclBase.h
+++ b/include/clang/AST/DeclBase.h
@@ -1,4 +1,4 @@
-//===-- DeclBase.h - Base Classes for representing declarations -*- C++ -*-===//
+//===- DeclBase.h - Base Classes for representing declarations --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,33 +16,40 @@
 
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Specifiers.h"
 #include "clang/Basic/VersionTuple.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <string>
+#include <type_traits>
+#include <utility>
 
 namespace clang {
+
+class ASTContext;
 class ASTMutationListener;
-class BlockDecl;
-class CXXRecordDecl;
-class CompoundStmt;
+class Attr;
 class DeclContext;
-class DeclarationName;
-class DependentDiagnostic;
-class EnumDecl;
-class ExportDecl;
 class ExternalSourceSymbolAttr;
 class FunctionDecl;
 class FunctionType;
+class IdentifierInfo;
 enum Linkage : unsigned char;
-class LinkageComputer;
 class LinkageSpecDecl;
 class Module;
 class NamedDecl;
-class NamespaceDecl;
 class ObjCCategoryDecl;
 class ObjCCategoryImplDecl;
 class ObjCContainerDecl;
@@ -53,23 +60,21 @@
 class ObjCProtocolDecl;
 struct PrintingPolicy;
 class RecordDecl;
+class SourceManager;
 class Stmt;
 class StoredDeclsMap;
 class TemplateDecl;
 class TranslationUnitDecl;
 class UsingDirectiveDecl;
-}
 
-namespace clang {
-
-  /// \brief Captures the result of checking the availability of a
-  /// declaration.
-  enum AvailabilityResult {
-    AR_Available = 0,
-    AR_NotYetIntroduced,
-    AR_Deprecated,
-    AR_Unavailable
-  };
+/// \brief Captures the result of checking the availability of a
+/// declaration.
+enum AvailabilityResult {
+  AR_Available = 0,
+  AR_NotYetIntroduced,
+  AR_Deprecated,
+  AR_Unavailable
+};
 
 /// Decl - This represents one declaration (or definition), e.g. a variable,
 /// typedef, function, struct, etc.
@@ -94,7 +99,7 @@
   /// \brief A placeholder type used to construct an empty shell of a
   /// decl-derived type that will be filled in later (e.g., by some
   /// deserialization method).
-  struct EmptyShell { };
+  struct EmptyShell {};
 
   /// IdentifierNamespace - The different namespaces in which
   /// declarations may appear.  According to C99 6.2.3, there are
@@ -208,15 +213,18 @@
   enum class ModuleOwnershipKind : unsigned {
     /// This declaration is not owned by a module.
     Unowned,
+
     /// This declaration has an owning module, but is globally visible
     /// (typically because its owning module is visible and we know that
     /// modules cannot later become hidden in this compilation).
     /// After serialization and deserialization, this will be converted
     /// to VisibleWhenImported.
     Visible,
+
     /// This declaration has an owning module, and is visible when that
     /// module is imported.
     VisibleWhenImported,
+
     /// This declaration has an owning module, but is only visible to
     /// lookups that occur within that module.
     ModulePrivate
@@ -238,7 +246,6 @@
     DeclContext *LexicalDC;
   };
 
-
   /// DeclCtx - Holds either a DeclContext* or a MultipleDC*.
   /// For declarations that don't contain C++ scope specifiers, it contains
   /// the DeclContext where the Decl was declared.
@@ -254,12 +261,14 @@
   ///                // LexicalDC == global namespace
   llvm::PointerUnion<DeclContext*, MultipleDC*> DeclCtx;
 
-  inline bool isInSemaDC() const    { return DeclCtx.is<DeclContext*>(); }
-  inline bool isOutOfSemaDC() const { return DeclCtx.is<MultipleDC*>(); }
-  inline MultipleDC *getMultipleDC() const {
+  bool isInSemaDC() const { return DeclCtx.is<DeclContext*>(); }
+  bool isOutOfSemaDC() const { return DeclCtx.is<MultipleDC*>(); }
+
+  MultipleDC *getMultipleDC() const {
     return DeclCtx.get<MultipleDC*>();
   }
-  inline DeclContext *getSemanticDC() const {
+
+  DeclContext *getSemanticDC() const {
     return DeclCtx.get<DeclContext*>();
   }
 
@@ -298,10 +307,16 @@
   static bool StatisticsEnabled;
 
 protected:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTReader;
+  friend class CXXClassMemberWrapper;
+  friend class LinkageComputer;
+  template<typename decl_type> friend class Redeclarable;
+
   /// Access - Used by C++ decls for the access specifier.
   // NOTE: VC++ treats enums as signed, avoid using the AccessSpecifier enum
   unsigned Access : 2;
-  friend class CXXClassMemberWrapper;
 
   /// \brief Whether this declaration was loaded from an AST file.
   unsigned FromASTFile : 1;
@@ -313,13 +328,6 @@
   /// Otherwise, it is the linkage + 1.
   mutable unsigned CacheValidAndLinkage : 3;
 
-  friend class ASTDeclWriter;
-  friend class ASTDeclReader;
-  friend class ASTReader;
-  friend class LinkageComputer;
-
-  template<typename decl_type> friend class Redeclarable;
-
   /// \brief Allocate memory for a deserialized declaration.
   ///
   /// This routine must be used to allocate memory for any declaration that is
@@ -357,7 +365,7 @@
 protected:
   Decl(Kind DK, DeclContext *DC, SourceLocation L)
       : NextInContextAndBits(nullptr, getModuleOwnershipKindForChildOf(DC)),
-        DeclCtx(DC), Loc(L), DeclKind(DK), InvalidDecl(0), HasAttrs(false),
+        DeclCtx(DC), Loc(L), DeclKind(DK), InvalidDecl(false), HasAttrs(false),
         Implicit(false), Used(false), Referenced(false),
         TopLevelDeclInObjCContainer(false), Access(AS_none), FromASTFile(0),
         IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
@@ -366,9 +374,9 @@
   }
 
   Decl(Kind DK, EmptyShell Empty)
-      : NextInContextAndBits(), DeclKind(DK), InvalidDecl(0), HasAttrs(false),
-        Implicit(false), Used(false), Referenced(false),
-        TopLevelDeclInObjCContainer(false), Access(AS_none), FromASTFile(0),
+      : DeclKind(DK), InvalidDecl(false), HasAttrs(false), Implicit(false),
+        Used(false), Referenced(false), TopLevelDeclInObjCContainer(false),
+        Access(AS_none), FromASTFile(0),
         IdentifierNamespace(getIdentifierNamespaceForKind(DK)),
         CacheValidAndLinkage(0) {
     if (StatisticsEnabled) add(DK);
@@ -392,14 +400,15 @@
   }
 
 public:
-
   /// \brief Source range that this declaration covers.
   virtual SourceRange getSourceRange() const LLVM_READONLY {
     return SourceRange(getLocation(), getLocation());
   }
+
   SourceLocation getLocStart() const LLVM_READONLY {
     return getSourceRange().getBegin();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getSourceRange().getEnd();
   }
@@ -460,12 +469,15 @@
   }
 
   bool hasAttrs() const { return HasAttrs; }
+
   void setAttrs(const AttrVec& Attrs) {
     return setAttrsImpl(Attrs, getASTContext());
   }
+
   AttrVec &getAttrs() {
     return const_cast<AttrVec&>(const_cast<const Decl*>(this)->getAttrs());
   }
+
   const AttrVec &getAttrs() const;
   void dropAttrs();
 
@@ -476,8 +488,8 @@
       setAttrs(AttrVec(1, A));
   }
 
-  typedef AttrVec::const_iterator attr_iterator;
-  typedef llvm::iterator_range<attr_iterator> attr_range;
+  using attr_iterator = AttrVec::const_iterator;
+  using attr_range = llvm::iterator_range<attr_iterator>;
 
   attr_range attrs() const {
     return attr_range(attr_begin(), attr_end());
@@ -510,6 +522,7 @@
   specific_attr_iterator<T> specific_attr_begin() const {
     return specific_attr_iterator<T>(attr_begin());
   }
+
   template <typename T>
   specific_attr_iterator<T> specific_attr_end() const {
     return specific_attr_iterator<T>(attr_end());
@@ -518,6 +531,7 @@
   template<typename T> T *getAttr() const {
     return hasAttrs() ? getSpecificAttr<T>(getAttrs()) : nullptr;
   }
+
   template<typename T> bool hasAttr() const {
     return hasAttrs() && hasSpecificAttr<T>(getAttrs());
   }
@@ -616,7 +630,6 @@
   }
   
 public:
-  
   /// \brief Determine the availability of the given declaration.
   ///
   /// This routine will determine the most restrictive availability of
@@ -698,6 +711,7 @@
 
 private:
   Module *getOwningModuleSlow() const;
+
 protected:
   bool hasLocalOwningModuleStorage() const;
 
@@ -777,14 +791,17 @@
   unsigned getIdentifierNamespace() const {
     return IdentifierNamespace;
   }
+
   bool isInIdentifierNamespace(unsigned NS) const {
     return getIdentifierNamespace() & NS;
   }
+
   static unsigned getIdentifierNamespaceForKind(Kind DK);
 
   bool hasTagIdentifierNamespace() const {
     return isTagIdentifierNamespace(getIdentifierNamespace());
   }
+
   static bool isTagIdentifierNamespace(unsigned NS) {
     // TagDecls have Tag and Type set and may also have TagFriend.
     return (NS & ~IDNS_TagFriend) == (IDNS_Tag | IDNS_Type);
@@ -819,6 +836,10 @@
 
   void setLexicalDeclContext(DeclContext *DC);
 
+  /// Determine whether this declaration is a templated entity (whether it is
+  // within the scope of a template parameter).
+  bool isTemplated() const;
+
   /// isDefinedOutsideFunctionOrMethod - This predicate returns true if this
   /// scoped decl is defined outside the current function or method.  This is
   /// roughly global variables and functions, but also handles enums (which
@@ -872,18 +893,18 @@
   /// \brief Iterates through all the redeclarations of the same decl.
   class redecl_iterator {
     /// Current - The current declaration.
-    Decl *Current;
+    Decl *Current = nullptr;
     Decl *Starter;
 
   public:
-    typedef Decl *value_type;
-    typedef const value_type &reference;
-    typedef const value_type *pointer;
-    typedef std::forward_iterator_tag iterator_category;
-    typedef std::ptrdiff_t difference_type;
+    using value_type = Decl *;
+    using reference = const value_type &;
+    using pointer = const value_type *;
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
 
-    redecl_iterator() : Current(nullptr) { }
-    explicit redecl_iterator(Decl *C) : Current(C), Starter(C) { }
+    redecl_iterator() = default;
+    explicit redecl_iterator(Decl *C) : Current(C), Starter(C) {}
 
     reference operator*() const { return Current; }
     value_type operator->() const { return Current; }
@@ -906,12 +927,13 @@
     friend bool operator==(redecl_iterator x, redecl_iterator y) {
       return x.Current == y.Current;
     }
+
     friend bool operator!=(redecl_iterator x, redecl_iterator y) {
       return x.Current != y.Current;
     }
   };
 
-  typedef llvm::iterator_range<redecl_iterator> redecl_range;
+  using redecl_range = llvm::iterator_range<redecl_iterator>;
 
   /// \brief Returns an iterator range for all the redeclarations of the same
   /// decl. It will iterate at least once (when this decl is the only one).
@@ -922,6 +944,7 @@
   redecl_iterator redecls_begin() const {
     return redecl_iterator(const_cast<Decl *>(this));
   }
+
   redecl_iterator redecls_end() const { return redecl_iterator(); }
 
   /// \brief Retrieve the previous declaration that declares the same entity
@@ -1103,10 +1126,13 @@
   static void printGroup(Decl** Begin, unsigned NumDecls,
                          raw_ostream &Out, const PrintingPolicy &Policy,
                          unsigned Indentation = 0);
+
   // Debuggers don't usually respect default arguments.
   void dump() const;
+
   // Same as dump(), but forces color printing.
   void dumpColor() const;
+
   void dump(raw_ostream &Out, bool Deserialize = false) const;
 
   /// \brief Looks through the Decl's underlying type to extract a FunctionType
@@ -1141,10 +1167,11 @@
   SourceLocation Loc;
   SourceManager &SM;
   const char *Message;
+
 public:
   PrettyStackTraceDecl(const Decl *theDecl, SourceLocation L,
                        SourceManager &sm, const char *Msg)
-  : TheDecl(theDecl), Loc(L), SM(sm), Message(Msg) {}
+      : TheDecl(theDecl), Loc(L), SM(sm), Message(Msg) {}
 
   void print(raw_ostream &OS) const override;
 };
@@ -1153,30 +1180,35 @@
 /// single result (with no stable storage) or a collection of results (with
 /// stable storage provided by the lookup table).
 class DeclContextLookupResult {
-  typedef ArrayRef<NamedDecl *> ResultTy;
+  using ResultTy = ArrayRef<NamedDecl *>;
+
   ResultTy Result;
+
   // If there is only one lookup result, it would be invalidated by
   // reallocations of the name table, so store it separately.
-  NamedDecl *Single;
+  NamedDecl *Single = nullptr;
 
   static NamedDecl *const SingleElementDummyList;
 
 public:
-  DeclContextLookupResult() : Result(), Single() {}
+  DeclContextLookupResult() = default;
   DeclContextLookupResult(ArrayRef<NamedDecl *> Result)
-      : Result(Result), Single() {}
+      : Result(Result) {}
   DeclContextLookupResult(NamedDecl *Single)
       : Result(SingleElementDummyList), Single(Single) {}
 
   class iterator;
-  typedef llvm::iterator_adaptor_base<iterator, ResultTy::iterator,
-                                      std::random_access_iterator_tag,
-                                      NamedDecl *const> IteratorBase;
+
+  using IteratorBase =
+      llvm::iterator_adaptor_base<iterator, ResultTy::iterator,
+                                  std::random_access_iterator_tag,
+                                  NamedDecl *const>;
+
   class iterator : public IteratorBase {
     value_type SingleElement;
 
   public:
-    iterator() : IteratorBase(), SingleElement() {}
+    iterator() = default;
     explicit iterator(pointer Pos, value_type Single = nullptr)
         : IteratorBase(Pos), SingleElement(Single) {}
 
@@ -1184,9 +1216,10 @@
       return SingleElement ? SingleElement : IteratorBase::operator*();
     }
   };
-  typedef iterator const_iterator;
-  typedef iterator::pointer pointer;
-  typedef iterator::reference reference;
+
+  using const_iterator = iterator;
+  using pointer = iterator::pointer;
+  using reference = iterator::reference;
 
   iterator begin() const { return iterator(Result.begin(), Single); }
   iterator end() const { return iterator(Result.end(), Single); }
@@ -1220,7 +1253,6 @@
 ///   ExportDecl
 ///   BlockDecl
 ///   OMPDeclareReductionDecl
-///
 class DeclContext {
   /// DeclKind - This indicates which class this is.
   unsigned DeclKind : 8;
@@ -1260,22 +1292,22 @@
   /// contains an entry for a DeclarationName (and we haven't lazily
   /// omitted anything), then it contains all relevant entries for that
   /// name (modulo the hasExternalDecls() flag).
-  mutable StoredDeclsMap *LookupPtr;
+  mutable StoredDeclsMap *LookupPtr = nullptr;
 
 protected:
+  friend class ASTDeclReader;
+  friend class ASTWriter;
+  friend class ExternalASTSource;
+
   /// FirstDecl - The first declaration stored within this declaration
   /// context.
-  mutable Decl *FirstDecl;
+  mutable Decl *FirstDecl = nullptr;
 
   /// LastDecl - The last declaration stored within this declaration
   /// context. FIXME: We could probably cache this value somewhere
   /// outside of the DeclContext, to reduce the size of DeclContext by
   /// another pointer.
-  mutable Decl *LastDecl;
-
-  friend class ExternalASTSource;
-  friend class ASTDeclReader;
-  friend class ASTWriter;
+  mutable Decl *LastDecl = nullptr;
 
   /// \brief Build up a chain of declarations.
   ///
@@ -1288,8 +1320,7 @@
         ExternalVisibleStorage(false),
         NeedToReconcileExternalVisibleStorage(false),
         HasLazyLocalLexicalLookups(false), HasLazyExternalLexicalLookups(false),
-        UseQualifiedLookup(false),
-        LookupPtr(nullptr), FirstDecl(nullptr), LastDecl(nullptr) {}
+        UseQualifiedLookup(false) {}
 
 public:
   ~DeclContext();
@@ -1297,6 +1328,7 @@
   Decl::Kind getDeclKind() const {
     return static_cast<Decl::Kind>(DeclKind);
   }
+
   const char *getDeclKindName() const;
 
   /// getParent - Returns the containing DeclContext.
@@ -1504,19 +1536,20 @@
   /// within this context.
   class decl_iterator {
     /// Current - The current declaration.
-    Decl *Current;
+    Decl *Current = nullptr;
 
   public:
-    typedef Decl *value_type;
-    typedef const value_type &reference;
-    typedef const value_type *pointer;
-    typedef std::forward_iterator_tag iterator_category;
-    typedef std::ptrdiff_t            difference_type;
+    using value_type = Decl *;
+    using reference = const value_type &;
+    using pointer = const value_type *;
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
 
-    decl_iterator() : Current(nullptr) { }
-    explicit decl_iterator(Decl *C) : Current(C) { }
+    decl_iterator() = default;
+    explicit decl_iterator(Decl *C) : Current(C) {}
 
     reference operator*() const { return Current; }
+
     // This doesn't meet the iterator requirements, but it's convenient
     value_type operator->() const { return Current; }
 
@@ -1534,12 +1567,13 @@
     friend bool operator==(decl_iterator x, decl_iterator y) {
       return x.Current == y.Current;
     }
+
     friend bool operator!=(decl_iterator x, decl_iterator y) {
       return x.Current != y.Current;
     }
   };
 
-  typedef llvm::iterator_range<decl_iterator> decl_range;
+  using decl_range = llvm::iterator_range<decl_iterator>;
 
   /// decls_begin/decls_end - Iterate over the declarations stored in
   /// this context.
@@ -1578,16 +1612,16 @@
     }
 
   public:
-    typedef SpecificDecl *value_type;
-    // TODO: Add reference and pointer typedefs (with some appropriate proxy
-    // type) if we ever have a need for them.
-    typedef void reference;
-    typedef void pointer;
-    typedef std::iterator_traits<DeclContext::decl_iterator>::difference_type
-      difference_type;
-    typedef std::forward_iterator_tag iterator_category;
+    using value_type = SpecificDecl *;
+    // TODO: Add reference and pointer types (with some appropriate proxy type)
+    // if we ever have a need for them.
+    using reference = void;
+    using pointer = void;
+    using difference_type =
+        std::iterator_traits<DeclContext::decl_iterator>::difference_type;
+    using iterator_category = std::forward_iterator_tag;
 
-    specific_decl_iterator() : Current() { }
+    specific_decl_iterator() = default;
 
     /// specific_decl_iterator - Construct a new iterator over a
     /// subset of the declarations the range [C,
@@ -1602,6 +1636,7 @@
     }
 
     value_type operator*() const { return cast<SpecificDecl>(*Current); }
+
     // This doesn't meet the iterator requirements, but it's convenient
     value_type operator->() const { return **this; }
 
@@ -1655,16 +1690,16 @@
     }
 
   public:
-    typedef SpecificDecl *value_type;
-    // TODO: Add reference and pointer typedefs (with some appropriate proxy
-    // type) if we ever have a need for them.
-    typedef void reference;
-    typedef void pointer;
-    typedef std::iterator_traits<DeclContext::decl_iterator>::difference_type
-      difference_type;
-    typedef std::forward_iterator_tag iterator_category;
+    using value_type = SpecificDecl *;
+    // TODO: Add reference and pointer types (with some appropriate proxy type)
+    // if we ever have a need for them.
+    using reference = void;
+    using pointer = void;
+    using difference_type =
+        std::iterator_traits<DeclContext::decl_iterator>::difference_type;
+    using iterator_category = std::forward_iterator_tag;
 
-    filtered_decl_iterator() : Current() { }
+    filtered_decl_iterator() = default;
 
     /// filtered_decl_iterator - Construct a new iterator over a
     /// subset of the declarations the range [C,
@@ -1742,8 +1777,8 @@
   /// @brief Checks whether a declaration is in this context.
   bool containsDecl(Decl *D) const;
 
-  typedef DeclContextLookupResult lookup_result;
-  typedef lookup_result::iterator lookup_iterator;
+  using lookup_result = DeclContextLookupResult;
+  using lookup_iterator = lookup_result::iterator;
 
   /// lookup - Find the declarations (if any) with the given Name in
   /// this context. Returns a range of iterators that contains all of
@@ -1789,10 +1824,12 @@
   /// of looking up every possible name.
   class all_lookups_iterator;
 
-  typedef llvm::iterator_range<all_lookups_iterator> lookups_range;
+  using lookups_range = llvm::iterator_range<all_lookups_iterator>;
 
   lookups_range lookups() const;
-  lookups_range noload_lookups() const;
+  // Like lookups(), but avoids loading external declarations.
+  // If PreserveInternalState, avoids building lookup data structures too.
+  lookups_range noload_lookups(bool PreserveInternalState) const;
 
   /// \brief Iterators over all possible lookups within this context.
   all_lookups_iterator lookups_begin() const;
@@ -1805,21 +1842,26 @@
   all_lookups_iterator noload_lookups_end() const;
 
   struct udir_iterator;
-  typedef llvm::iterator_adaptor_base<udir_iterator, lookup_iterator,
-                                      std::random_access_iterator_tag,
-                                      UsingDirectiveDecl *> udir_iterator_base;
+
+  using udir_iterator_base =
+      llvm::iterator_adaptor_base<udir_iterator, lookup_iterator,
+                                  std::random_access_iterator_tag,
+                                  UsingDirectiveDecl *>;
+
   struct udir_iterator : udir_iterator_base {
     udir_iterator(lookup_iterator I) : udir_iterator_base(I) {}
+
     UsingDirectiveDecl *operator*() const;
   };
 
-  typedef llvm::iterator_range<udir_iterator> udir_range;
+  using udir_range = llvm::iterator_range<udir_iterator>;
 
   udir_range using_directives() const;
 
   // These are all defined in DependentDiagnostic.h.
   class ddiag_iterator;
-  typedef llvm::iterator_range<DeclContext::ddiag_iterator> ddiag_range;
+
+  using ddiag_range = llvm::iterator_range<DeclContext::ddiag_iterator>;
 
   inline ddiag_range ddiags() const;
 
@@ -1892,6 +1934,8 @@
                    bool Deserialize = false) const;
 
 private:
+  friend class DependentDiagnostic;
+
   void reconcileExternalVisibleStorage() const;
   bool LoadLexicalDeclsFromExternalStorage() const;
 
@@ -1903,9 +1947,9 @@
   /// use of addDeclInternal().
   void makeDeclVisibleInContextInternal(NamedDecl *D);
 
-  friend class DependentDiagnostic;
   StoredDeclsMap *CreateStoredDeclsMap(ASTContext &C) const;
 
+  void loadLazyLocalLexicalLookups();
   void buildLookupImpl(DeclContext *DCtx, bool Internal);
   void makeDeclVisibleInContextWithFlags(NamedDecl *D, bool Internal,
                                          bool Rediscoverable);
@@ -1942,8 +1986,7 @@
   }
 };
 
-
-} // end clang.
+} // namespace clang
 
 namespace llvm {
 
@@ -1963,12 +2006,14 @@
     return *::clang::cast_convert_decl_context<ToTy>::doit(&Val);
   }
 };
+
 template<class ToTy>
 struct cast_convert_val<ToTy, ::clang::DeclContext, ::clang::DeclContext> {
   static ToTy &doit(::clang::DeclContext &Val) {
     return *::clang::cast_convert_decl_context<ToTy>::doit(&Val);
   }
 };
+
 template<class ToTy>
 struct cast_convert_val<ToTy,
                      const ::clang::DeclContext*, const ::clang::DeclContext*> {
@@ -1976,6 +2021,7 @@
     return ::clang::cast_convert_decl_context<ToTy>::doit(Val);
   }
 };
+
 template<class ToTy>
 struct cast_convert_val<ToTy, ::clang::DeclContext*, ::clang::DeclContext*> {
   static ToTy *doit(::clang::DeclContext *Val) {
@@ -2012,6 +2058,6 @@
   }
 };
 
-} // end namespace llvm
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_AST_DECLBASE_H
diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h
index 1b9458f..faa86e3 100644
--- a/include/clang/AST/DeclCXX.h
+++ b/include/clang/AST/DeclCXX.h
@@ -1,4 +1,4 @@
-//===-- DeclCXX.h - Classes for representing C++ declarations -*- C++ -*-=====//
+//===- DeclCXX.h - Classes for representing C++ declarations --*- C++ -*-=====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,11 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the C++ Decl subclasses, other than those for templates
 /// (found in DeclTemplate.h) and friends (in DeclFriend.h).
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_DECLCXX_H
@@ -20,29 +20,56 @@
 #include "clang/AST/ASTUnresolvedSet.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/LambdaCapture.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/Redeclarable.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/Lambda.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <vector>
 
 namespace clang {
 
 class ClassTemplateDecl;
-class ClassTemplateSpecializationDecl;
 class ConstructorUsingShadowDecl;
 class CXXBasePath;
 class CXXBasePaths;
 class CXXConstructorDecl;
-class CXXConversionDecl;
 class CXXDestructorDecl;
-class CXXMethodDecl;
-class CXXRecordDecl;
-class CXXMemberLookupCriteria;
 class CXXFinalOverriderMap;
 class CXXIndirectPrimaryBaseSet;
+class CXXMethodDecl;
+class DiagnosticBuilder;
 class FriendDecl;
-class LambdaExpr;
+class FunctionTemplateDecl;
+class IdentifierInfo;
+class MemberSpecializationInfo;
+class TemplateDecl;
+class TemplateParameterList;
 class UsingDecl;
 
 /// \brief Represents any kind of function declaration, whether it is a
@@ -50,10 +77,10 @@
 class AnyFunctionDecl {
   NamedDecl *Function;
 
-  AnyFunctionDecl(NamedDecl *ND) : Function(ND) { }
+  AnyFunctionDecl(NamedDecl *ND) : Function(ND) {}
 
 public:
-  AnyFunctionDecl(FunctionDecl *FD) : Function(FD) { }
+  AnyFunctionDecl(FunctionDecl *FD) : Function(FD) {}
   AnyFunctionDecl(FunctionTemplateDecl *FTD);
 
   /// \brief Implicily converts any function or function template into a
@@ -68,16 +95,18 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
 namespace llvm {
+
   // Provide PointerLikeTypeTraits for non-cvr pointers.
   template<>
   struct PointerLikeTypeTraits< ::clang::AnyFunctionDecl> {
-    static inline void *getAsVoidPointer(::clang::AnyFunctionDecl F) {
+    static void *getAsVoidPointer(::clang::AnyFunctionDecl F) {
       return F.get();
     }
-    static inline ::clang::AnyFunctionDecl getFromVoidPointer(void *P) {
+
+    static ::clang::AnyFunctionDecl getFromVoidPointer(void *P) {
       return ::clang::AnyFunctionDecl::getFromNamedDecl(
                                       static_cast< ::clang::NamedDecl*>(P));
     }
@@ -85,7 +114,7 @@
     enum { NumLowBitsAvailable = 2 };
   };
 
-} // end namespace llvm
+} // namespace llvm
 
 namespace clang {
 
@@ -100,7 +129,6 @@
 /// Also note that this class has nothing to do with so-called
 /// "access declarations" (C++98 11.3 [class.access.dcl]).
 class AccessSpecDecl : public Decl {
-  virtual void anchor();
   /// \brief The location of the ':'.
   SourceLocation ColonLoc;
 
@@ -109,16 +137,21 @@
     : Decl(AccessSpec, DC, ASLoc), ColonLoc(ColonLoc) {
     setAccess(AS);
   }
-  AccessSpecDecl(EmptyShell Empty)
-    : Decl(AccessSpec, Empty) { }
+
+  AccessSpecDecl(EmptyShell Empty) : Decl(AccessSpec, Empty) {}
+
+  virtual void anchor();
+
 public:
   /// \brief The location of the access specifier.
   SourceLocation getAccessSpecifierLoc() const { return getLocation(); }
+
   /// \brief Sets the location of the access specifier.
   void setAccessSpecifierLoc(SourceLocation ASLoc) { setLocation(ASLoc); }
 
   /// \brief The location of the colon following the access specifier.
   SourceLocation getColonLoc() const { return ColonLoc; }
+
   /// \brief Sets the location of the colon.
   void setColonLoc(SourceLocation CLoc) { ColonLoc = CLoc; }
 
@@ -131,6 +164,7 @@
                                 SourceLocation ColonLoc) {
     return new (C, DC) AccessSpecDecl(AS, DC, ASLoc, ColonLoc);
   }
+
   static AccessSpecDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
   // Implement isa/cast/dyncast/etc.
@@ -190,12 +224,11 @@
   TypeSourceInfo *BaseTypeInfo;
 
 public:
-  CXXBaseSpecifier() { }
-
+  CXXBaseSpecifier() = default;
   CXXBaseSpecifier(SourceRange R, bool V, bool BC, AccessSpecifier A,
                    TypeSourceInfo *TInfo, SourceLocation EllipsisLoc)
     : Range(R), EllipsisLoc(EllipsisLoc), Virtual(V), BaseOfClass(BC),
-      Access(A), InheritConstructors(false), BaseTypeInfo(TInfo) { }
+      Access(A), InheritConstructors(false), BaseTypeInfo(TInfo) {}
 
   /// \brief Retrieves the source range that contains the entire base specifier.
   SourceRange getSourceRange() const LLVM_READONLY { return Range; }
@@ -264,7 +297,16 @@
 
 /// \brief Represents a C++ struct/union/class.
 class CXXRecordDecl : public RecordDecl {
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTNodeImporter;
+  friend class ASTReader;
+  friend class ASTRecordWriter;
+  friend class ASTWriter;
+  friend class DeclContext;
+  friend class LambdaExpr;
 
+  friend void FunctionDecl::setPure(bool);
   friend void TagDecl::startDefinition();
 
   /// Values used in DefinitionData fields to represent special members.
@@ -279,8 +321,6 @@
   };
 
   struct DefinitionData {
-    DefinitionData(CXXRecordDecl *D);
-
     /// \brief True if this class has any user-declared constructors.
     unsigned UserDeclaredConstructor : 1;
 
@@ -397,6 +437,11 @@
     /// which have been declared but not yet defined.
     unsigned HasTrivialSpecialMembers : 6;
 
+    /// These bits keep track of the triviality of special functions for the
+    /// purpose of calls. Only the bits corresponding to SMF_CopyConstructor,
+    /// SMF_MoveConstructor, and SMF_Destructor are meaningful here.
+    unsigned HasTrivialSpecialMembersForCall : 6;
+
     /// \brief The declared special members of this class which are known to be
     /// non-trivial.
     ///
@@ -405,6 +450,12 @@
     /// members which have not yet been declared.
     unsigned DeclaredNonTrivialSpecialMembers : 6;
 
+    /// These bits keep track of the declared special members that are
+    /// non-trivial for the purpose of calls.
+    /// Only the bits corresponding to SMF_CopyConstructor,
+    /// SMF_MoveConstructor, and SMF_Destructor are meaningful here.
+    unsigned DeclaredNonTrivialSpecialMembersForCall : 6;
+
     /// \brief True when this class has a destructor with no semantic effect.
     unsigned HasIrrelevantDestructor : 1;
 
@@ -474,13 +525,13 @@
     unsigned HasODRHash : 1;
 
     /// \brief A hash of parts of the class to help in ODR checking.
-    unsigned ODRHash;
+    unsigned ODRHash = 0;
 
     /// \brief The number of base class specifiers in Bases.
-    unsigned NumBases;
+    unsigned NumBases = 0;
 
     /// \brief The number of virtual base class specifiers in VBases.
-    unsigned NumVBases;
+    unsigned NumVBases = 0;
 
     /// \brief Base classes of this class.
     ///
@@ -512,6 +563,8 @@
     /// This is actually currently stored in reverse order.
     LazyDeclPtr FirstFriend;
 
+    DefinitionData(CXXRecordDecl *D);
+
     /// \brief Retrieve the set of direct base classes.
     CXXBaseSpecifier *getBases() const {
       if (!Bases.isOffset())
@@ -529,6 +582,7 @@
     ArrayRef<CXXBaseSpecifier> bases() const {
       return llvm::makeArrayRef(getBases(), NumBases);
     }
+
     ArrayRef<CXXBaseSpecifier> vbases() const {
       return llvm::makeArrayRef(getVBases(), NumVBases);
     }
@@ -542,22 +596,7 @@
 
   /// \brief Describes a C++ closure type (generated by a lambda expression).
   struct LambdaDefinitionData : public DefinitionData {
-    typedef LambdaCapture Capture;
-
-    LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, 
-                         bool Dependent, bool IsGeneric, 
-                         LambdaCaptureDefault CaptureDefault) 
-      : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), 
-        CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0), 
-        ManglingNumber(0), ContextDecl(nullptr), Captures(nullptr),
-        MethodTyInfo(Info) {
-      IsLambda = true;
-
-      // C++1z [expr.prim.lambda]p4:
-      //   This class type is not an aggregate type.
-      Aggregate = false;
-      PlainOldData = false;
-    }
+    using Capture = LambdaCapture;
 
     /// \brief Whether this lambda is known to be dependent, even if its
     /// context isn't dependent.
@@ -583,7 +622,7 @@
 
     /// \brief The number used to indicate this lambda expression for name 
     /// mangling in the Itanium C++ ABI.
-    unsigned ManglingNumber;
+    unsigned ManglingNumber = 0;
     
     /// \brief The declaration that provides context for this lambda, if the
     /// actual DeclContext does not suffice. This is used for lambdas that
@@ -593,11 +632,24 @@
     
     /// \brief The list of captures, both explicit and implicit, for this 
     /// lambda.
-    Capture *Captures;
+    Capture *Captures = nullptr;
 
     /// \brief The type of the call method.
     TypeSourceInfo *MethodTyInfo;
-       
+
+    LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, 
+                         bool Dependent, bool IsGeneric, 
+                         LambdaCaptureDefault CaptureDefault) 
+      : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), 
+        CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0), 
+        MethodTyInfo(Info) {
+      IsLambda = true;
+
+      // C++1z [expr.prim.lambda]p4:
+      //   This class type is not an aggregate type.
+      Aggregate = false;
+      PlainOldData = false;
+    }
   };
 
   struct DefinitionData *dataPtr() const {
@@ -629,11 +681,8 @@
   /// classes of class template specializations, this will be the
   /// MemberSpecializationInfo referring to the member class that was
   /// instantiated or specialized.
-  llvm::PointerUnion<ClassTemplateDecl*, MemberSpecializationInfo*>
-    TemplateOrInstantiation;
-
-  friend class DeclContext;
-  friend class LambdaExpr;
+  llvm::PointerUnion<ClassTemplateDecl *, MemberSpecializationInfo *>
+      TemplateOrInstantiation;
 
   /// \brief Called from setBases and addedMember to notify the class that a
   /// direct or virtual base class or a member of class type has been added.
@@ -647,9 +696,6 @@
   void addedMember(Decl *D);
 
   void markedVirtualFunctionPure();
-  friend void FunctionDecl::setPure(bool);
-
-  friend class ASTNodeImporter;
 
   /// \brief Get the head of our list of friend declarations, possibly
   /// deserializing the friends from an external AST source.
@@ -662,14 +708,15 @@
 
 public:
   /// \brief Iterator that traverses the base classes of a class.
-  typedef CXXBaseSpecifier*       base_class_iterator;
+  using base_class_iterator = CXXBaseSpecifier *;
 
   /// \brief Iterator that traverses the base classes of a class.
-  typedef const CXXBaseSpecifier* base_class_const_iterator;
+  using base_class_const_iterator = const CXXBaseSpecifier *;
 
   CXXRecordDecl *getCanonicalDecl() override {
     return cast<CXXRecordDecl>(RecordDecl::getCanonicalDecl());
   }
+
   const CXXRecordDecl *getCanonicalDecl() const {
     return const_cast<CXXRecordDecl*>(this)->getCanonicalDecl();
   }
@@ -678,6 +725,7 @@
     return cast_or_null<CXXRecordDecl>(
             static_cast<RecordDecl *>(this)->getPreviousDecl());
   }
+
   const CXXRecordDecl *getPreviousDecl() const {
     return const_cast<CXXRecordDecl*>(this)->getPreviousDecl();
   }
@@ -729,9 +777,9 @@
   /// \brief Retrieves the number of base classes of this class.
   unsigned getNumBases() const { return data().NumBases; }
 
-  typedef llvm::iterator_range<base_class_iterator> base_class_range;
-  typedef llvm::iterator_range<base_class_const_iterator>
-    base_class_const_range;
+  using base_class_range = llvm::iterator_range<base_class_iterator>;
+  using base_class_const_range =
+      llvm::iterator_range<base_class_const_iterator>;
 
   base_class_range bases() {
     return base_class_range(bases_begin(), bases_end());
@@ -771,9 +819,9 @@
   /// Iterator access to method members.  The method iterator visits
   /// all method members of the class, including non-instance methods,
   /// special methods, etc.
-  typedef specific_decl_iterator<CXXMethodDecl> method_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<CXXMethodDecl>>
-    method_range;
+  using method_iterator = specific_decl_iterator<CXXMethodDecl>;
+  using method_range =
+      llvm::iterator_range<specific_decl_iterator<CXXMethodDecl>>;
 
   method_range methods() const {
     return method_range(method_begin(), method_end());
@@ -784,21 +832,23 @@
   method_iterator method_begin() const {
     return method_iterator(decls_begin());
   }
+
   /// \brief Method past-the-end iterator.
   method_iterator method_end() const {
     return method_iterator(decls_end());
   }
 
   /// Iterator access to constructor members.
-  typedef specific_decl_iterator<CXXConstructorDecl> ctor_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<CXXConstructorDecl>>
-    ctor_range;
+  using ctor_iterator = specific_decl_iterator<CXXConstructorDecl>;
+  using ctor_range =
+      llvm::iterator_range<specific_decl_iterator<CXXConstructorDecl>>;
 
   ctor_range ctors() const { return ctor_range(ctor_begin(), ctor_end()); }
 
   ctor_iterator ctor_begin() const {
     return ctor_iterator(decls_begin());
   }
+
   ctor_iterator ctor_end() const {
     return ctor_iterator(decls_end());
   }
@@ -806,7 +856,7 @@
   /// An iterator over friend declarations.  All of these are defined
   /// in DeclFriend.h.
   class friend_iterator;
-  typedef llvm::iterator_range<friend_iterator> friend_range;
+  using friend_range = llvm::iterator_range<friend_iterator>;
 
   friend_range friends() const;
   friend_iterator friend_begin() const;
@@ -1155,24 +1205,28 @@
   void getCaptureFields(llvm::DenseMap<const VarDecl *, FieldDecl *> &Captures,
                         FieldDecl *&ThisCapture) const;
 
-  typedef const LambdaCapture *capture_const_iterator;
-  typedef llvm::iterator_range<capture_const_iterator> capture_const_range;
+  using capture_const_iterator = const LambdaCapture *;
+  using capture_const_range = llvm::iterator_range<capture_const_iterator>;
 
   capture_const_range captures() const {
     return capture_const_range(captures_begin(), captures_end());
   }
+
   capture_const_iterator captures_begin() const {
     return isLambda() ? getLambdaData().Captures : nullptr;
   }
+
   capture_const_iterator captures_end() const {
     return isLambda() ? captures_begin() + getLambdaData().NumCaptures
                       : nullptr;
   }
 
-  typedef UnresolvedSetIterator conversion_iterator;
+  using conversion_iterator = UnresolvedSetIterator;
+
   conversion_iterator conversion_begin() const {
     return data().Conversions.get(getASTContext()).begin();
   }
+
   conversion_iterator conversion_end() const {
     return data().Conversions.get(getASTContext()).end();
   }
@@ -1306,6 +1360,10 @@
     return data().HasTrivialSpecialMembers & SMF_CopyConstructor;
   }
 
+  bool hasTrivialCopyConstructorForCall() const {
+    return data().HasTrivialSpecialMembersForCall & SMF_CopyConstructor;
+  }
+
   /// \brief Determine whether this class has a non-trivial copy constructor
   /// (C++ [class.copy]p6, C++11 [class.copy]p12)
   bool hasNonTrivialCopyConstructor() const {
@@ -1313,6 +1371,12 @@
            !hasTrivialCopyConstructor();
   }
 
+  bool hasNonTrivialCopyConstructorForCall() const {
+    return (data().DeclaredNonTrivialSpecialMembersForCall &
+            SMF_CopyConstructor) ||
+           !hasTrivialCopyConstructorForCall();
+  }
+
   /// \brief Determine whether this class has a trivial move constructor
   /// (C++11 [class.copy]p12)
   bool hasTrivialMoveConstructor() const {
@@ -1320,6 +1384,11 @@
            (data().HasTrivialSpecialMembers & SMF_MoveConstructor);
   }
 
+  bool hasTrivialMoveConstructorForCall() const {
+    return hasMoveConstructor() &&
+           (data().HasTrivialSpecialMembersForCall & SMF_MoveConstructor);
+  }
+
   /// \brief Determine whether this class has a non-trivial move constructor
   /// (C++11 [class.copy]p12)
   bool hasNonTrivialMoveConstructor() const {
@@ -1328,6 +1397,13 @@
             !(data().HasTrivialSpecialMembers & SMF_MoveConstructor));
   }
 
+  bool hasNonTrivialMoveConstructorForCall() const {
+    return (data().DeclaredNonTrivialSpecialMembersForCall &
+            SMF_MoveConstructor) ||
+           (needsImplicitMoveConstructor() &&
+            !(data().HasTrivialSpecialMembersForCall & SMF_MoveConstructor));
+  }
+
   /// \brief Determine whether this class has a trivial copy assignment operator
   /// (C++ [class.copy]p11, C++11 [class.copy]p25)
   bool hasTrivialCopyAssignment() const {
@@ -1362,12 +1438,25 @@
     return data().HasTrivialSpecialMembers & SMF_Destructor;
   }
 
+  bool hasTrivialDestructorForCall() const {
+    return data().HasTrivialSpecialMembersForCall & SMF_Destructor;
+  }
+
   /// \brief Determine whether this class has a non-trivial destructor
   /// (C++ [class.dtor]p3)
   bool hasNonTrivialDestructor() const {
     return !(data().HasTrivialSpecialMembers & SMF_Destructor);
   }
 
+  bool hasNonTrivialDestructorForCall() const {
+    return !(data().HasTrivialSpecialMembersForCall & SMF_Destructor);
+  }
+
+  void setHasTrivialSpecialMemberForCall() {
+    data().HasTrivialSpecialMembersForCall =
+        (SMF_CopyConstructor | SMF_MoveConstructor | SMF_Destructor);
+  }
+
   /// \brief Determine whether declaring a const variable with this type is ok
   /// per core issue 253.
   bool allowConstDefaultInit() const {
@@ -1397,6 +1486,13 @@
     data().CanPassInRegisters = CanPass;
   }
 
+  /// Determine whether the triviality for the purpose of calls for this class
+  /// is overridden to be trivial because this class or the type of one of its
+  /// subobjects has attribute "trivial_abi".
+  bool hasTrivialABIOverride() const {
+    return canPassInRegisters() && hasNonTrivialDestructor();
+  }
+
   /// \brief Determine whether this class has a non-literal or/ volatile type
   /// non-static data member or base class.
   bool hasNonLiteralTypeFieldsOrBases() const {
@@ -1444,10 +1540,10 @@
   /// We resolve DR1361 by ignoring the second bullet. We resolve DR1452 by
   /// treating types with trivial default constructors as literal types.
   ///
-  /// Only in C++1z and beyond, are lambdas literal types.
+  /// Only in C++17 and beyond, are lambdas literal types.
   bool isLiteral() const {
     return hasTrivialDestructor() &&
-           (!isLambda() || getASTContext().getLangOpts().CPlusPlus1z) &&
+           (!isLambda() || getASTContext().getLangOpts().CPlusPlus17) &&
            !hasNonLiteralTypeFieldsOrBases() &&
            (isAggregate() || isLambda() ||
             hasConstexprNonCopyMoveConstructor() ||
@@ -1596,8 +1692,8 @@
   /// \param BaseDefinition the definition of the base class
   ///
   /// \returns true if this base matched the search criteria
-  typedef llvm::function_ref<bool(const CXXRecordDecl *BaseDefinition)>
-      ForallBasesCallback;
+  using ForallBasesCallback =
+      llvm::function_ref<bool(const CXXRecordDecl *BaseDefinition)>;
 
   /// \brief Determines if the given callback holds for all the direct
   /// or indirect base classes of this type.
@@ -1625,8 +1721,9 @@
   /// base named by the \p Specifier.
   ///
   /// \returns true if this base matched the search criteria, false otherwise.
-  typedef llvm::function_ref<bool(const CXXBaseSpecifier *Specifier,
-                                  CXXBasePath &Path)> BaseMatchesCallback;
+  using BaseMatchesCallback =
+      llvm::function_ref<bool(const CXXBaseSpecifier *Specifier,
+                              CXXBasePath &Path)>;
 
   /// \brief Look for entities within the base classes of this C++ class,
   /// transitively searching all base class subobjects.
@@ -1753,6 +1850,8 @@
   /// member function is now complete.
   void finishedDefaultedOrDeletedMember(CXXMethodDecl *MD);
 
+  void setTrivialForCallFlags(CXXMethodDecl *MD);
+
   /// \brief Indicates that the definition of this class is now complete.
   void completeDefinition() override;
 
@@ -1805,6 +1904,7 @@
 
   /// \brief Returns the inheritance model used for this record.
   MSInheritanceAttr::Spelling getMSInheritanceModel() const;
+
   /// \brief Calculate what the inheritance model would be for this class.
   MSInheritanceAttr::Spelling calculateInheritanceModel() const;
 
@@ -1851,12 +1951,6 @@
   static bool classofKind(Kind K) {
     return K >= firstCXXRecord && K <= lastCXXRecord;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend class ASTRecordWriter;
-  friend class ASTReader;
-  friend class ASTWriter;
 };
 
 /// \brief Represents a C++ deduction guide declaration.
@@ -1871,6 +1965,7 @@
 /// the constructors of \c A.
 class CXXDeductionGuideDecl : public FunctionDecl {
   void anchor() override;
+
 private:
   CXXDeductionGuideDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
                         bool IsExplicit, const DeclarationNameInfo &NameInfo,
@@ -1884,6 +1979,9 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static CXXDeductionGuideDecl *Create(ASTContext &C, DeclContext *DC,
                                        SourceLocation StartLoc, bool IsExplicit,
                                        const DeclarationNameInfo &NameInfo,
@@ -1903,12 +2001,15 @@
     return getDeclName().getCXXDeductionGuideTemplate();
   }
 
+  void setIsCopyDeductionCandidate() {
+    IsCopyDeductionCandidate = true;
+  }
+
+  bool isCopyDeductionCandidate() const { return IsCopyDeductionCandidate; }
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == CXXDeductionGuide; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a static or instance method of a struct/union/class.
@@ -1917,6 +2018,7 @@
 /// non-static) member functions, whether virtual or not.
 class CXXMethodDecl : public FunctionDecl {
   void anchor() override;
+
 protected:
   CXXMethodDecl(Kind DK, ASTContext &C, CXXRecordDecl *RD,
                 SourceLocation StartLoc, const DeclarationNameInfo &NameInfo,
@@ -1960,15 +2062,14 @@
   bool isVolatile() const { return getType()->castAs<FunctionType>()->isVolatile(); }
 
   bool isVirtual() const {
-    CXXMethodDecl *CD =
-      cast<CXXMethodDecl>(const_cast<CXXMethodDecl*>(this)->getCanonicalDecl());
+    CXXMethodDecl *CD = const_cast<CXXMethodDecl*>(this)->getCanonicalDecl();
 
     // Member function is virtual if it is marked explicitly so, or if it is
     // declared in __interface -- then it is automatically pure virtual.
     if (CD->isVirtualAsWritten() || CD->isPure())
       return true;
 
-    return (CD->begin_overridden_methods() != CD->end_overridden_methods());
+    return CD->size_overridden_methods() != 0;
   }
 
   /// If it's possible to devirtualize a call to this method, return the called
@@ -2014,18 +2115,23 @@
   /// True if this method is user-declared and was not
   /// deleted or defaulted on its first declaration.
   bool isUserProvided() const {
-    return !(isDeleted() || getCanonicalDecl()->isDefaulted());
+    auto *DeclAsWritten = this;
+    if (auto *Pattern = getTemplateInstantiationPattern())
+      DeclAsWritten = cast<CXXMethodDecl>(Pattern);
+    return !(DeclAsWritten->isDeleted() ||
+             DeclAsWritten->getCanonicalDecl()->isDefaulted());
   }
 
-  ///
   void addOverriddenMethod(const CXXMethodDecl *MD);
 
-  typedef const CXXMethodDecl *const* method_iterator;
+  using method_iterator = const CXXMethodDecl *const *;
 
   method_iterator begin_overridden_methods() const;
   method_iterator end_overridden_methods() const;
   unsigned size_overridden_methods() const;
-  typedef ASTContext::overridden_method_range overridden_method_range;
+
+  using overridden_method_range= ASTContext::overridden_method_range;
+
   overridden_method_range overridden_methods() const;
 
   /// Returns the parent of this method declaration, which
@@ -2255,6 +2361,7 @@
       return Initializee.get<FieldDecl*>();
     return nullptr;
   }
+
   FieldDecl *getAnyMember() const {
     if (isMemberInitializer())
       return Initializee.get<FieldDecl*>();
@@ -2316,11 +2423,11 @@
 
 /// Description of a constructor that was inherited from a base class.
 class InheritedConstructor {
-  ConstructorUsingShadowDecl *Shadow;
-  CXXConstructorDecl *BaseCtor;
+  ConstructorUsingShadowDecl *Shadow = nullptr;
+  CXXConstructorDecl *BaseCtor = nullptr;
 
 public:
-  InheritedConstructor() : Shadow(), BaseCtor() {}
+  InheritedConstructor() = default;
   InheritedConstructor(ConstructorUsingShadowDecl *Shadow,
                        CXXConstructorDecl *BaseCtor)
       : Shadow(Shadow), BaseCtor(BaseCtor) {}
@@ -2344,8 +2451,6 @@
 class CXXConstructorDecl final
     : public CXXMethodDecl,
       private llvm::TrailingObjects<CXXConstructorDecl, InheritedConstructor> {
-  void anchor() override;
-
   /// \name Support for base and member initializers.
   /// \{
   /// \brief The arguments used to initialize the base or member.
@@ -2365,15 +2470,20 @@
                      InheritedConstructor Inherited)
     : CXXMethodDecl(CXXConstructor, C, RD, StartLoc, NameInfo, T, TInfo,
                     SC_None, isInline, isConstexpr, SourceLocation()),
-      CtorInitializers(nullptr), NumCtorInitializers(0),
-      IsInheritingConstructor((bool)Inherited) {
+      NumCtorInitializers(0), IsInheritingConstructor((bool)Inherited) {
     setImplicit(isImplicitlyDeclared);
     if (Inherited)
       *getTrailingObjects<InheritedConstructor>() = Inherited;
     IsExplicitSpecified = isExplicitSpecified;
   }
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend TrailingObjects;
+
   static CXXConstructorDecl *CreateDeserialized(ASTContext &C, unsigned ID,
                                                 bool InheritsConstructor);
   static CXXConstructorDecl *
@@ -2384,13 +2494,13 @@
          InheritedConstructor Inherited = InheritedConstructor());
 
   /// \brief Iterates through the member/base initializer list.
-  typedef CXXCtorInitializer **init_iterator;
+  using init_iterator = CXXCtorInitializer **;
 
   /// \brief Iterates through the member/base initializer list.
-  typedef CXXCtorInitializer *const *init_const_iterator;
+  using init_const_iterator = CXXCtorInitializer *const *;
 
-  typedef llvm::iterator_range<init_iterator> init_range;
-  typedef llvm::iterator_range<init_const_iterator> init_const_range;
+  using init_range = llvm::iterator_range<init_iterator>;
+  using init_const_range = llvm::iterator_range<init_const_iterator>;
 
   init_range inits() { return init_range(init_begin(), init_end()); }
   init_const_range inits() const {
@@ -2402,6 +2512,7 @@
     const auto *ConstThis = this;
     return const_cast<init_iterator>(ConstThis->init_begin());
   }
+
   /// \brief Retrieve an iterator to the first initializer.
   init_const_iterator init_begin() const;
 
@@ -2409,14 +2520,15 @@
   init_iterator       init_end()       {
     return init_begin() + NumCtorInitializers;
   }
+
   /// \brief Retrieve an iterator past the last initializer.
   init_const_iterator init_end() const {
     return init_begin() + NumCtorInitializers;
   }
 
-  typedef std::reverse_iterator<init_iterator> init_reverse_iterator;
-  typedef std::reverse_iterator<init_const_iterator>
-          init_const_reverse_iterator;
+  using init_reverse_iterator = std::reverse_iterator<init_iterator>;
+  using init_const_reverse_iterator =
+      std::reverse_iterator<init_const_iterator>;
 
   init_reverse_iterator init_rbegin() {
     return init_reverse_iterator(init_end());
@@ -2547,10 +2659,6 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == CXXConstructor; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend TrailingObjects;
 };
 
 /// \brief Represents a C++ destructor within a class.
@@ -2564,23 +2672,26 @@
 /// };
 /// \endcode
 class CXXDestructorDecl : public CXXMethodDecl {
-  void anchor() override;
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
 
   // FIXME: Don't allocate storage for these except in the first declaration
   // of a virtual destructor.
-  FunctionDecl *OperatorDelete;
-  Expr *OperatorDeleteThisArg;
+  FunctionDecl *OperatorDelete = nullptr;
+  Expr *OperatorDeleteThisArg = nullptr;
 
   CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
                     const DeclarationNameInfo &NameInfo,
                     QualType T, TypeSourceInfo *TInfo,
                     bool isInline, bool isImplicitlyDeclared)
     : CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo,
-                    SC_None, isInline, /*isConstexpr=*/false, SourceLocation()),
-      OperatorDelete(nullptr), OperatorDeleteThisArg(nullptr) {
+                    SC_None, isInline, /*isConstexpr=*/false, SourceLocation())
+  {
     setImplicit(isImplicitlyDeclared);
   }
 
+  void anchor() override;
+
 public:
   static CXXDestructorDecl *Create(ASTContext &C, CXXRecordDecl *RD,
                                    SourceLocation StartLoc,
@@ -2591,9 +2702,11 @@
   static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID);
 
   void setOperatorDelete(FunctionDecl *OD, Expr *ThisArg);
+
   const FunctionDecl *getOperatorDelete() const {
     return getCanonicalDecl()->OperatorDelete;
   }
+
   Expr *getOperatorDeleteThisArg() const {
     return getCanonicalDecl()->OperatorDeleteThisArg;
   }
@@ -2608,9 +2721,6 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == CXXDestructor; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a C++ conversion function within a class.
@@ -2624,8 +2734,6 @@
 /// };
 /// \endcode
 class CXXConversionDecl : public CXXMethodDecl {
-  void anchor() override;
-
   CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
                     const DeclarationNameInfo &NameInfo, QualType T,
                     TypeSourceInfo *TInfo, bool isInline,
@@ -2636,7 +2744,12 @@
     IsExplicitSpecified = isExplicitSpecified;
   }
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static CXXConversionDecl *Create(ASTContext &C, CXXRecordDecl *RD,
                                    SourceLocation StartLoc,
                                    const DeclarationNameInfo &NameInfo,
@@ -2673,9 +2786,6 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == CXXConversion; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a linkage specification. 
@@ -2686,6 +2796,7 @@
 /// \endcode
 class LinkageSpecDecl : public Decl, public DeclContext {
   virtual void anchor();
+
 public:
   /// \brief Represents the language in a linkage specification.
   ///
@@ -2697,25 +2808,29 @@
     lang_c = /* DW_LANG_C */ 0x0002,
     lang_cxx = /* DW_LANG_C_plus_plus */ 0x0004
   };
+
 private:
   /// \brief The language for this linkage specification.
   unsigned Language : 3;
+
   /// \brief True if this linkage spec has braces.
   ///
   /// This is needed so that hasBraces() returns the correct result while the
   /// linkage spec body is being parsed.  Once RBraceLoc has been set this is
   /// not used, so it doesn't need to be serialized.
   unsigned HasBraces : 1;
+
   /// \brief The source location for the extern keyword.
   SourceLocation ExternLoc;
+
   /// \brief The source location for the right brace (if valid).
   SourceLocation RBraceLoc;
 
   LinkageSpecDecl(DeclContext *DC, SourceLocation ExternLoc,
                   SourceLocation LangLoc, LanguageIDs lang, bool HasBraces)
-    : Decl(LinkageSpec, DC, LangLoc), DeclContext(LinkageSpec),
-      Language(lang), HasBraces(HasBraces), ExternLoc(ExternLoc),
-      RBraceLoc(SourceLocation()) { }
+      : Decl(LinkageSpec, DC, LangLoc), DeclContext(LinkageSpec),
+        Language(lang), HasBraces(HasBraces), ExternLoc(ExternLoc),
+        RBraceLoc(SourceLocation()) {}
 
 public:
   static LinkageSpecDecl *Create(ASTContext &C, DeclContext *DC,
@@ -2726,6 +2841,7 @@
   
   /// \brief Return the language specified by this linkage specification.
   LanguageIDs getLanguage() const { return LanguageIDs(Language); }
+
   /// \brief Set the language specified by this linkage specification.
   void setLanguage(LanguageIDs L) { Language = L; }
 
@@ -2758,9 +2874,11 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == LinkageSpec; }
+
   static DeclContext *castToDeclContext(const LinkageSpecDecl *D) {
     return static_cast<DeclContext *>(const_cast<LinkageSpecDecl*>(D));
   }
+
   static LinkageSpecDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<LinkageSpecDecl *>(const_cast<DeclContext*>(DC));
   }
@@ -2777,7 +2895,6 @@
 /// artificial names for all using-directives in order to store
 /// them in DeclContext effectively.
 class UsingDirectiveDecl : public NamedDecl {
-  void anchor() override;
   /// \brief The location of the \c using keyword.
   SourceLocation UsingLoc;
 
@@ -2794,6 +2911,16 @@
   /// namespace.
   DeclContext *CommonAncestor;
 
+  UsingDirectiveDecl(DeclContext *DC, SourceLocation UsingLoc,
+                     SourceLocation NamespcLoc,
+                     NestedNameSpecifierLoc QualifierLoc,
+                     SourceLocation IdentLoc,
+                     NamedDecl *Nominated,
+                     DeclContext *CommonAncestor)
+      : NamedDecl(UsingDirective, DC, IdentLoc, getName()), UsingLoc(UsingLoc),
+        NamespaceLoc(NamespcLoc), QualifierLoc(QualifierLoc),
+        NominatedNamespace(Nominated), CommonAncestor(CommonAncestor) {}
+
   /// \brief Returns special DeclarationName used by using-directives.
   ///
   /// This is only used by DeclContext for storing UsingDirectiveDecls in
@@ -2802,17 +2929,14 @@
     return DeclarationName::getUsingDirectiveName();
   }
 
-  UsingDirectiveDecl(DeclContext *DC, SourceLocation UsingLoc,
-                     SourceLocation NamespcLoc,
-                     NestedNameSpecifierLoc QualifierLoc,
-                     SourceLocation IdentLoc,
-                     NamedDecl *Nominated,
-                     DeclContext *CommonAncestor)
-    : NamedDecl(UsingDirective, DC, IdentLoc, getName()), UsingLoc(UsingLoc),
-      NamespaceLoc(NamespcLoc), QualifierLoc(QualifierLoc),
-      NominatedNamespace(Nominated), CommonAncestor(CommonAncestor) { }
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+
+  // Friend for getUsingDirectiveName.
+  friend class DeclContext;
+
   /// \brief Retrieve the nested-name-specifier that qualifies the
   /// name of the namespace, with source-location information.
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
@@ -2865,11 +2989,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == UsingDirective; }
-
-  // Friend for getUsingDirectiveName.
-  friend class DeclContext;
-
-  friend class ASTDeclReader;
 };
 
 /// \brief Represents a C++ namespace alias.
@@ -2881,7 +3000,7 @@
 /// \endcode
 class NamespaceAliasDecl : public NamedDecl,
                            public Redeclarable<NamespaceAliasDecl> {
-  void anchor() override;
+  friend class ASTDeclReader;
 
   /// \brief The location of the \c namespace keyword.
   SourceLocation NamespaceLoc;
@@ -2906,13 +3025,14 @@
         NamespaceLoc(NamespaceLoc), IdentLoc(IdentLoc),
         QualifierLoc(QualifierLoc), Namespace(Namespace) {}
 
-  typedef Redeclarable<NamespaceAliasDecl> redeclarable_base;
+  void anchor() override;
+
+  using redeclarable_base = Redeclarable<NamespaceAliasDecl>;
+
   NamespaceAliasDecl *getNextRedeclarationImpl() override;
   NamespaceAliasDecl *getPreviousDeclImpl() override;
   NamespaceAliasDecl *getMostRecentDeclImpl() override;
 
-  friend class ASTDeclReader;
-
 public:
   static NamespaceAliasDecl *Create(ASTContext &C, DeclContext *DC,
                                     SourceLocation NamespaceLoc,
@@ -2924,8 +3044,9 @@
 
   static NamespaceAliasDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -2997,23 +3118,27 @@
 /// }
 /// \endcode
 class UsingShadowDecl : public NamedDecl, public Redeclarable<UsingShadowDecl> {
-  void anchor() override;
+  friend class UsingDecl;
 
   /// The referenced declaration.
-  NamedDecl *Underlying;
+  NamedDecl *Underlying = nullptr;
 
   /// \brief The using declaration which introduced this decl or the next using
   /// shadow declaration contained in the aforementioned using declaration.
-  NamedDecl *UsingOrNextShadow;
-  friend class UsingDecl;
+  NamedDecl *UsingOrNextShadow = nullptr;
 
-  typedef Redeclarable<UsingShadowDecl> redeclarable_base;
+  void anchor() override;
+
+  using redeclarable_base = Redeclarable<UsingShadowDecl>;
+
   UsingShadowDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   UsingShadowDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   UsingShadowDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
@@ -3024,6 +3149,9 @@
   UsingShadowDecl(Kind K, ASTContext &C, EmptyShell);
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static UsingShadowDecl *Create(ASTContext &C, DeclContext *DC,
                                  SourceLocation Loc, UsingDecl *Using,
                                  NamedDecl *Target) {
@@ -3032,8 +3160,9 @@
 
   static UsingShadowDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -3054,10 +3183,14 @@
 
   /// \brief Sets the underlying declaration which has been brought into the
   /// local scope.
-  void setTargetDecl(NamedDecl* ND) {
+  void setTargetDecl(NamedDecl *ND) {
     assert(ND && "Target decl is null!");
     Underlying = ND;
-    IdentifierNamespace = ND->getIdentifierNamespace();
+    // A UsingShadowDecl is never a friend or local extern declaration, even
+    // if it is a shadow declaration for one.
+    IdentifierNamespace =
+        ND->getIdentifierNamespace() &
+        ~(IDNS_OrdinaryFriend | IDNS_TagFriend | IDNS_LocalExtern);
   }
 
   /// \brief Gets the using declaration to which this declaration is tied.
@@ -3073,9 +3206,6 @@
   static bool classofKind(Kind K) {
     return K == Decl::UsingShadow || K == Decl::ConstructorUsingShadow;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a shadow constructor declaration introduced into a
@@ -3089,18 +3219,16 @@
 /// };
 /// \endcode
 class ConstructorUsingShadowDecl final : public UsingShadowDecl {
-  void anchor() override;
-
   /// \brief If this constructor using declaration inherted the constructor
   /// from an indirect base class, this is the ConstructorUsingShadowDecl
   /// in the named direct base class from which the declaration was inherited.
-  ConstructorUsingShadowDecl *NominatedBaseClassShadowDecl;
+  ConstructorUsingShadowDecl *NominatedBaseClassShadowDecl = nullptr;
 
   /// \brief If this constructor using declaration inherted the constructor
   /// from an indirect base class, this is the ConstructorUsingShadowDecl
   /// that will be used to construct the unique direct or virtual base class
   /// that receives the constructor arguments.
-  ConstructorUsingShadowDecl *ConstructedBaseClassShadowDecl;
+  ConstructorUsingShadowDecl *ConstructedBaseClassShadowDecl = nullptr;
 
   /// \brief \c true if the constructor ultimately named by this using shadow
   /// declaration is within a virtual base class subobject of the class that
@@ -3126,12 +3254,16 @@
       IsVirtual = true;
     }
   }
+
   ConstructorUsingShadowDecl(ASTContext &C, EmptyShell Empty)
-      : UsingShadowDecl(ConstructorUsingShadow, C, Empty),
-        NominatedBaseClassShadowDecl(), ConstructedBaseClassShadowDecl(),
-        IsVirtual(false) {}
+      : UsingShadowDecl(ConstructorUsingShadow, C, Empty), IsVirtual(false) {}
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ConstructorUsingShadowDecl *Create(ASTContext &C, DeclContext *DC,
                                             SourceLocation Loc,
                                             UsingDecl *Using, NamedDecl *Target,
@@ -3190,9 +3322,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ConstructorUsingShadow; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a C++ using-declaration.
@@ -3202,8 +3331,6 @@
 ///    using someNameSpace::someIdentifier;
 /// \endcode
 class UsingDecl : public NamedDecl, public Mergeable<UsingDecl> {
-  void anchor() override;
-
   /// \brief The source location of the 'using' keyword itself.
   SourceLocation UsingLocation;
 
@@ -3229,7 +3356,12 @@
       DNLoc(NameInfo.getInfo()), FirstUsingShadow(nullptr, HasTypenameKeyword) {
   }
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// \brief Return the source location of the 'using' keyword.
   SourceLocation getUsingLoc() const { return UsingLocation; }
 
@@ -3262,17 +3394,17 @@
   /// this using declaration.
   class shadow_iterator {
     /// \brief The current using shadow declaration.
-    UsingShadowDecl *Current;
+    UsingShadowDecl *Current = nullptr;
 
   public:
-    typedef UsingShadowDecl*          value_type;
-    typedef UsingShadowDecl*          reference;
-    typedef UsingShadowDecl*          pointer;
-    typedef std::forward_iterator_tag iterator_category;
-    typedef std::ptrdiff_t            difference_type;
+    using value_type = UsingShadowDecl *;
+    using reference = UsingShadowDecl *;
+    using pointer = UsingShadowDecl *;
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
 
-    shadow_iterator() : Current(nullptr) { }
-    explicit shadow_iterator(UsingShadowDecl *C) : Current(C) { }
+    shadow_iterator() = default;
+    explicit shadow_iterator(UsingShadowDecl *C) : Current(C) {}
 
     reference operator*() const { return Current; }
     pointer operator->() const { return Current; }
@@ -3296,14 +3428,16 @@
     }
   };
 
-  typedef llvm::iterator_range<shadow_iterator> shadow_range;
+  using shadow_range = llvm::iterator_range<shadow_iterator>;
 
   shadow_range shadows() const {
     return shadow_range(shadow_begin(), shadow_end());
   }
+
   shadow_iterator shadow_begin() const {
     return shadow_iterator(FirstUsingShadow.getPointer());
   }
+
   shadow_iterator shadow_end() const { return shadow_iterator(); }
 
   /// \brief Return the number of shadowed declarations associated with this
@@ -3331,9 +3465,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Using; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Represents a pack of using declarations that a single
@@ -3352,8 +3483,6 @@
 class UsingPackDecl final
     : public NamedDecl, public Mergeable<UsingPackDecl>,
       private llvm::TrailingObjects<UsingPackDecl, NamedDecl *> {
-  void anchor() override;
-
   /// The UnresolvedUsingValueDecl or UnresolvedUsingTypenameDecl from
   /// which this waas instantiated.
   NamedDecl *InstantiatedFrom;
@@ -3373,7 +3502,13 @@
                             getTrailingObjects<NamedDecl *>());
   }
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend TrailingObjects;
+
   /// Get the using declaration from which this was instantiated. This will
   /// always be an UnresolvedUsingValueDecl or an UnresolvedUsingTypenameDecl
   /// that is a pack expansion.
@@ -3401,10 +3536,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == UsingPack; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend TrailingObjects;
 };
 
 /// \brief Represents a dependent using declaration which was not marked with
@@ -3420,8 +3551,6 @@
 /// \endcode
 class UnresolvedUsingValueDecl : public ValueDecl,
                                  public Mergeable<UnresolvedUsingValueDecl> {
-  void anchor() override;
-
   /// \brief The source location of the 'using' keyword
   SourceLocation UsingLocation;
 
@@ -3440,13 +3569,17 @@
                            NestedNameSpecifierLoc QualifierLoc,
                            const DeclarationNameInfo &NameInfo,
                            SourceLocation EllipsisLoc)
-    : ValueDecl(UnresolvedUsingValue, DC,
-                NameInfo.getLoc(), NameInfo.getName(), Ty),
-      UsingLocation(UsingLoc), EllipsisLoc(EllipsisLoc),
-      QualifierLoc(QualifierLoc), DNLoc(NameInfo.getInfo())
-  { }
+      : ValueDecl(UnresolvedUsingValue, DC,
+                  NameInfo.getLoc(), NameInfo.getName(), Ty),
+        UsingLocation(UsingLoc), EllipsisLoc(EllipsisLoc),
+        QualifierLoc(QualifierLoc), DNLoc(NameInfo.getInfo()) {}
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// \brief Returns the source location of the 'using' keyword.
   SourceLocation getUsingLoc() const { return UsingLocation; }
 
@@ -3499,9 +3632,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == UnresolvedUsingValue; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Represents a dependent using declaration which was marked with
@@ -3518,7 +3648,7 @@
 class UnresolvedUsingTypenameDecl
     : public TypeDecl,
       public Mergeable<UnresolvedUsingTypenameDecl> {
-  void anchor() override;
+  friend class ASTDeclReader;
 
   /// \brief The source location of the 'typename' keyword
   SourceLocation TypenameLocation;
@@ -3538,9 +3668,9 @@
     : TypeDecl(UnresolvedUsingTypename, DC, TargetNameLoc, TargetName,
                UsingLoc),
       TypenameLocation(TypenameLoc), EllipsisLoc(EllipsisLoc),
-      QualifierLoc(QualifierLoc) { }
+      QualifierLoc(QualifierLoc) {}
 
-  friend class ASTDeclReader;
+  void anchor() override;
 
 public:
   /// \brief Returns the source location of the 'using' keyword.
@@ -3595,7 +3725,6 @@
 
 /// \brief Represents a C++11 static_assert declaration.
 class StaticAssertDecl : public Decl {
-  virtual void anchor();
   llvm::PointerIntPair<Expr *, 1, bool> AssertExprAndFailed;
   StringLiteral *Message;
   SourceLocation RParenLoc;
@@ -3603,11 +3732,15 @@
   StaticAssertDecl(DeclContext *DC, SourceLocation StaticAssertLoc,
                    Expr *AssertExpr, StringLiteral *Message,
                    SourceLocation RParenLoc, bool Failed)
-    : Decl(StaticAssert, DC, StaticAssertLoc),
-      AssertExprAndFailed(AssertExpr, Failed), Message(Message),
-      RParenLoc(RParenLoc) { }
+      : Decl(StaticAssert, DC, StaticAssertLoc),
+        AssertExprAndFailed(AssertExpr, Failed), Message(Message),
+        RParenLoc(RParenLoc) {}
+
+  virtual void anchor();
 
 public:
+  friend class ASTDeclReader;
+
   static StaticAssertDecl *Create(ASTContext &C, DeclContext *DC,
                                   SourceLocation StaticAssertLoc,
                                   Expr *AssertExpr, StringLiteral *Message,
@@ -3630,8 +3763,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == StaticAssert; }
-
-  friend class ASTDeclReader;
 };
 
 /// A binding in a decomposition declaration. For instance, given:
@@ -3643,18 +3774,20 @@
 /// x[0], x[1], and x[2] respectively, where x is the implicit
 /// DecompositionDecl of type 'int (&)[3]'.
 class BindingDecl : public ValueDecl {
-  void anchor() override;
-
   /// The binding represented by this declaration. References to this
   /// declaration are effectively equivalent to this expression (except
   /// that it is only evaluated once at the point of declaration of the
   /// binding).
-  Expr *Binding;
+  Expr *Binding = nullptr;
 
   BindingDecl(DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id)
-      : ValueDecl(Decl::Binding, DC, IdLoc, Id, QualType()), Binding(nullptr) {}
+      : ValueDecl(Decl::Binding, DC, IdLoc, Id, QualType()) {}
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+
   static BindingDecl *Create(ASTContext &C, DeclContext *DC,
                              SourceLocation IdLoc, IdentifierInfo *Id);
   static BindingDecl *CreateDeserialized(ASTContext &C, unsigned ID);
@@ -3678,8 +3811,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decl::Binding; }
-
-  friend class ASTDeclReader;
 };
 
 /// A decomposition declaration. For instance, given:
@@ -3693,8 +3824,6 @@
 class DecompositionDecl final
     : public VarDecl,
       private llvm::TrailingObjects<DecompositionDecl, BindingDecl *> {
-  void anchor() override;
-
   /// The number of BindingDecl*s following this object.
   unsigned NumBindings;
 
@@ -3709,7 +3838,12 @@
                             getTrailingObjects<BindingDecl *>());
   }
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend TrailingObjects;
+
   static DecompositionDecl *Create(ASTContext &C, DeclContext *DC,
                                    SourceLocation StartLoc,
                                    SourceLocation LSquareLoc,
@@ -3727,9 +3861,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decomposition; }
-
-  friend TrailingObjects;
-  friend class ASTDeclReader;
 };
 
 /// An instance of this class represents the declaration of a property
@@ -3769,6 +3900,8 @@
         GetterId(Getter), SetterId(Setter) {}
 
 public:
+  friend class ASTDeclReader;
+
   static MSPropertyDecl *Create(ASTContext &C, DeclContext *DC,
                                 SourceLocation L, DeclarationName N, QualType T,
                                 TypeSourceInfo *TInfo, SourceLocation StartL,
@@ -3781,8 +3914,6 @@
   IdentifierInfo* getGetterId() const { return GetterId; }
   bool hasSetter() const { return SetterId != nullptr; }
   IdentifierInfo* getSetterId() const { return SetterId; }
-
-  friend class ASTDeclReader;
 };
 
 /// Insertion operator for diagnostics.  This allows sending an AccessSpecifier
@@ -3793,6 +3924,6 @@
 const PartialDiagnostic &operator<<(const PartialDiagnostic &DB,
                                     AccessSpecifier AS);
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DECLCXX_H
diff --git a/include/clang/AST/DeclContextInternals.h b/include/clang/AST/DeclContextInternals.h
index eb86526..6545f70 100644
--- a/include/clang/AST/DeclContextInternals.h
+++ b/include/clang/AST/DeclContextInternals.h
@@ -1,4 +1,4 @@
-//===-- DeclContextInternals.h - DeclContext Representation -----*- C++ -*-===//
+//===- DeclContextInternals.h - DeclContext Representation ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,10 +11,12 @@
 //  of DeclContext.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_AST_DECLCONTEXTINTERNALS_H
 #define LLVM_CLANG_AST_DECLCONTEXTINTERNALS_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclarationName.h"
 #include "llvm/ADT/DenseMap.h"
@@ -22,6 +24,7 @@
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
+#include <cassert>
 
 namespace clang {
 
@@ -30,21 +33,20 @@
 /// \brief An array of decls optimized for the common case of only containing
 /// one entry.
 struct StoredDeclsList {
-
   /// \brief When in vector form, this is what the Data pointer points to.
-  typedef SmallVector<NamedDecl *, 4> DeclsTy;
+  using DeclsTy = SmallVector<NamedDecl *, 4>;
 
   /// \brief A collection of declarations, with a flag to indicate if we have
   /// further external declarations.
-  typedef llvm::PointerIntPair<DeclsTy *, 1, bool> DeclsAndHasExternalTy;
+  using DeclsAndHasExternalTy = llvm::PointerIntPair<DeclsTy *, 1, bool>;
 
   /// \brief The stored data, which will be either a pointer to a NamedDecl,
   /// or a pointer to a vector with a flag to indicate if there are further
   /// external declarations.
-  llvm::PointerUnion<NamedDecl*, DeclsAndHasExternalTy> Data;
+  llvm::PointerUnion<NamedDecl *, DeclsAndHasExternalTy> Data;
 
 public:
-  StoredDeclsList() {}
+  StoredDeclsList() = default;
 
   StoredDeclsList(StoredDeclsList &&RHS) : Data(RHS.Data) {
     RHS.Data = (NamedDecl *)nullptr;
@@ -186,7 +188,6 @@
 
   /// AddSubsequentDecl - This is called on the second and later decl when it is
   /// not a redeclaration to merge it into the appropriate place in our list.
-  ///
   void AddSubsequentDecl(NamedDecl *D) {
     assert(!isNull() && "don't AddSubsequentDecl when we have no decls");
 
@@ -237,28 +238,28 @@
 };
 
 class StoredDeclsMap
-  : public llvm::SmallDenseMap<DeclarationName, StoredDeclsList, 4> {
-
+    : public llvm::SmallDenseMap<DeclarationName, StoredDeclsList, 4> {
 public:
   static void DestroyAll(StoredDeclsMap *Map, bool Dependent);
 
 private:
   friend class ASTContext; // walks the chain deleting these
   friend class DeclContext;
+
   llvm::PointerIntPair<StoredDeclsMap*, 1> Previous;
 };
 
 class DependentStoredDeclsMap : public StoredDeclsMap {
 public:
-  DependentStoredDeclsMap() : FirstDiagnostic(nullptr) {}
+  DependentStoredDeclsMap() = default;
 
 private:
-  friend class DependentDiagnostic;
   friend class DeclContext; // iterates over diagnostics
+  friend class DependentDiagnostic;
 
-  DependentDiagnostic *FirstDiagnostic;
+  DependentDiagnostic *FirstDiagnostic = nullptr;
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DECLCONTEXTINTERNALS_H
diff --git a/include/clang/AST/DeclFriend.h b/include/clang/AST/DeclFriend.h
index 172c46a..5d1c6b8 100644
--- a/include/clang/AST/DeclFriend.h
+++ b/include/clang/AST/DeclFriend.h
@@ -1,4 +1,4 @@
-//===-- DeclFriend.h - Classes for C++ friend declarations -*- C++ -*------===//
+//===- DeclFriend.h - Classes for C++ friend declarations -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,30 @@
 #ifndef LLVM_CLANG_AST_DECLFRIEND_H
 #define LLVM_CLANG_AST_DECLFRIEND_H
 
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <iterator>
 
 namespace clang {
 
+class ASTContext;
+
 /// FriendDecl - Represents the declaration of a friend entity,
 /// which can be a function, a type, or a templated function or type.
-//  For example:
+/// For example:
 ///
 /// @code
 /// template <typename T> class A {
@@ -41,10 +55,14 @@
     : public Decl,
       private llvm::TrailingObjects<FriendDecl, TemplateParameterList *> {
   virtual void anchor();
+
 public:
-  typedef llvm::PointerUnion<NamedDecl*,TypeSourceInfo*> FriendUnion;
+  using FriendUnion = llvm::PointerUnion<NamedDecl *, TypeSourceInfo *>;
 
 private:
+  friend class CXXRecordDecl;
+  friend class CXXRecordDecl::friend_iterator;
+
   // The declaration that's a friend of this class.
   FriendUnion Friend;
 
@@ -64,35 +82,33 @@
   //     template <class T> friend class A<T>::B;
   unsigned NumTPLists : 31;
 
-  friend class CXXRecordDecl::friend_iterator;
-  friend class CXXRecordDecl;
-
   FriendDecl(DeclContext *DC, SourceLocation L, FriendUnion Friend,
              SourceLocation FriendL,
-             ArrayRef<TemplateParameterList*> FriendTypeTPLists)
-    : Decl(Decl::Friend, DC, L),
-      Friend(Friend),
-      NextFriend(),
-      FriendLoc(FriendL),
-      UnsupportedFriend(false),
-      NumTPLists(FriendTypeTPLists.size()) {
+             ArrayRef<TemplateParameterList *> FriendTypeTPLists)
+      : Decl(Decl::Friend, DC, L), Friend(Friend), FriendLoc(FriendL),
+        UnsupportedFriend(false), NumTPLists(FriendTypeTPLists.size()) {
     for (unsigned i = 0; i < NumTPLists; ++i)
       getTrailingObjects<TemplateParameterList *>()[i] = FriendTypeTPLists[i];
   }
 
   FriendDecl(EmptyShell Empty, unsigned NumFriendTypeTPLists)
-    : Decl(Decl::Friend, Empty), NextFriend(),
-      UnsupportedFriend(false),
-      NumTPLists(NumFriendTypeTPLists) { }
+      : Decl(Decl::Friend, Empty), UnsupportedFriend(false),
+        NumTPLists(NumFriendTypeTPLists) {}
 
   FriendDecl *getNextFriend() {
     if (!NextFriend.isOffset())
       return cast_or_null<FriendDecl>(NextFriend.get(nullptr));
     return getNextFriendSlowCase();
   }
+
   FriendDecl *getNextFriendSlowCase();
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTNodeImporter;
+  friend TrailingObjects;
+
   static FriendDecl *Create(ASTContext &C, DeclContext *DC,
                             SourceLocation L, FriendUnion Friend_,
                             SourceLocation FriendL,
@@ -108,9 +124,11 @@
   TypeSourceInfo *getFriendType() const {
     return Friend.dyn_cast<TypeSourceInfo*>();
   }
+
   unsigned getFriendTypeNumTemplateParameterLists() const {
     return NumTPLists;
   }
+
   TemplateParameterList *getFriendTypeTemplateParameterList(unsigned N) const {
     assert(N < NumTPLists);
     return getTrailingObjects<TemplateParameterList *>()[N];
@@ -119,7 +137,7 @@
   /// If this friend declaration doesn't name a type, return the inner
   /// declaration.
   NamedDecl *getFriendDecl() const {
-    return Friend.dyn_cast<NamedDecl*>();
+    return Friend.dyn_cast<NamedDecl *>();
   }
 
   /// Retrieves the location of the 'friend' keyword.
@@ -164,27 +182,24 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decl::Friend; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend class ASTNodeImporter;
-  friend TrailingObjects;
 };
 
 /// An iterator over the friend declarations of a class.
 class CXXRecordDecl::friend_iterator {
+  friend class CXXRecordDecl;
+
   FriendDecl *Ptr;
 
-  friend class CXXRecordDecl;
   explicit friend_iterator(FriendDecl *Ptr) : Ptr(Ptr) {}
-public:
-  friend_iterator() {}
 
-  typedef FriendDecl *value_type;
-  typedef FriendDecl *reference;
-  typedef FriendDecl *pointer;
-  typedef int difference_type;
-  typedef std::forward_iterator_tag iterator_category;
+public:
+  friend_iterator() = default;
+
+  using value_type = FriendDecl *;
+  using reference = FriendDecl *;
+  using pointer = FriendDecl *;
+  using difference_type = int;
+  using iterator_category = std::forward_iterator_tag;
 
   reference operator*() const { return Ptr; }
 
@@ -240,6 +255,6 @@
   data().FirstFriend = FD;
 }
   
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DECLFRIEND_H
diff --git a/include/clang/AST/DeclGroup.h b/include/clang/AST/DeclGroup.h
index 628d788..6d5aaad 100644
--- a/include/clang/AST/DeclGroup.h
+++ b/include/clang/AST/DeclGroup.h
@@ -1,4 +1,4 @@
-//===--- DeclGroup.h - Classes for representing groups of Decls -*- C++ -*-===//
+//===- DeclGroup.h - Classes for representing groups of Decls ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,26 +14,26 @@
 #ifndef LLVM_CLANG_AST_DECLGROUP_H
 #define LLVM_CLANG_AST_DECLGROUP_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/TrailingObjects.h"
 #include <cassert>
+#include <cstdint>
 
 namespace clang {
 
 class ASTContext;
 class Decl;
-class DeclGroup;
-class DeclGroupIterator;
 
 class DeclGroup final : private llvm::TrailingObjects<DeclGroup, Decl *> {
   // FIXME: Include a TypeSpecifier object.
-  unsigned NumDecls;
+  unsigned NumDecls = 0;
 
 private:
-  DeclGroup() : NumDecls(0) {}
+  DeclGroup() = default;
   DeclGroup(unsigned numdecls, Decl** decls);
 
 public:
+  friend TrailingObjects;
+
   static DeclGroup *Create(ASTContext &C, Decl **Decls, unsigned NumDecls);
 
   unsigned size() const { return NumDecls; }
@@ -47,23 +47,21 @@
     assert (i < NumDecls && "Out-of-bounds access.");
     return getTrailingObjects<Decl *>()[i];
   }
-
-  friend TrailingObjects;
 };
 
 class DeclGroupRef {
   // Note this is not a PointerIntPair because we need the address of the
   // non-group case to be valid as a Decl** for iteration.
   enum Kind { SingleDeclKind=0x0, DeclGroupKind=0x1, Mask=0x1 };
-  Decl* D;
+
+  Decl* D = nullptr;
 
   Kind getKind() const {
     return (Kind) (reinterpret_cast<uintptr_t>(D) & Mask);
   }
 
 public:
-  DeclGroupRef() : D(nullptr) {}
-
+  DeclGroupRef() = default;
   explicit DeclGroupRef(Decl* d) : D(d) {}
   explicit DeclGroupRef(DeclGroup* dg)
     : D((Decl*) (reinterpret_cast<uintptr_t>(dg) | DeclGroupKind)) {}
@@ -76,8 +74,8 @@
     return DeclGroupRef(DeclGroup::Create(C, Decls, NumDecls));
   }
 
-  typedef Decl** iterator;
-  typedef Decl* const * const_iterator;
+  using iterator = Decl **;
+  using const_iterator = Decl * const *;
 
   bool isNull() const { return D == nullptr; }
   bool isSingleDecl() const { return getKind() == SingleDeclKind; }
@@ -133,9 +131,10 @@
   }
 };
 
-} // end clang namespace
+} // namespace clang
 
 namespace llvm {
+
   // DeclGroupRef is "like a pointer", implement PointerLikeTypeTraits.
   template <typename T>
   struct PointerLikeTypeTraits;
@@ -144,10 +143,14 @@
     static inline void *getAsVoidPointer(clang::DeclGroupRef P) {
       return P.getAsOpaquePtr();
     }
+
     static inline clang::DeclGroupRef getFromVoidPointer(void *P) {
       return clang::DeclGroupRef::getFromOpaquePtr(P);
     }
+
     enum { NumLowBitsAvailable = 0 };
   };
-}
-#endif
+
+} // namespace llvm
+
+#endif // LLVM_CLANG_AST_DECLGROUP_H
diff --git a/include/clang/AST/DeclLookups.h b/include/clang/AST/DeclLookups.h
index eba2266..64eb3f2 100644
--- a/include/clang/AST/DeclLookups.h
+++ b/include/clang/AST/DeclLookups.h
@@ -1,4 +1,4 @@
-//===-- DeclLookups.h - Low-level interface to all names in a DC-*- C++ -*-===//
+//===- DeclLookups.h - Low-level interface to all names in a DC -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,6 +18,9 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclContextInternals.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/AST/ExternalASTSource.h"
+#include <cstddef>
+#include <iterator>
 
 namespace clang {
 
@@ -25,14 +28,15 @@
 /// of looking up every possible name.
 class DeclContext::all_lookups_iterator {
   StoredDeclsMap::iterator It, End;
-public:
-  typedef lookup_result             value_type;
-  typedef lookup_result             reference;
-  typedef lookup_result             pointer;
-  typedef std::forward_iterator_tag iterator_category;
-  typedef std::ptrdiff_t            difference_type;
 
-  all_lookups_iterator() {}
+public:
+  using value_type = lookup_result;
+  using reference = lookup_result;
+  using pointer = lookup_result;
+  using iterator_category = std::forward_iterator_tag;
+  using difference_type = std::ptrdiff_t;
+
+  all_lookups_iterator() = default;
   all_lookups_iterator(StoredDeclsMap::iterator It,
                        StoredDeclsMap::iterator End)
       : It(It), End(End) {}
@@ -63,6 +67,7 @@
   friend bool operator==(all_lookups_iterator x, all_lookups_iterator y) {
     return x.It == y.It;
   }
+
   friend bool operator!=(all_lookups_iterator x, all_lookups_iterator y) {
     return x.It != y.It;
   }
@@ -81,16 +86,11 @@
   return lookups_range(all_lookups_iterator(), all_lookups_iterator());
 }
 
-inline DeclContext::all_lookups_iterator DeclContext::lookups_begin() const {
-  return lookups().begin();
-}
-
-inline DeclContext::all_lookups_iterator DeclContext::lookups_end() const {
-  return lookups().end();
-}
-
-inline DeclContext::lookups_range DeclContext::noload_lookups() const {
+inline DeclContext::lookups_range
+DeclContext::noload_lookups(bool PreserveInternalState) const {
   DeclContext *Primary = const_cast<DeclContext*>(this)->getPrimaryContext();
+  if (!PreserveInternalState)
+    Primary->loadLazyLocalLexicalLookups();
   if (StoredDeclsMap *Map = Primary->getLookupPtr())
     return lookups_range(all_lookups_iterator(Map->begin(), Map->end()),
                          all_lookups_iterator(Map->end(), Map->end()));
@@ -100,16 +100,6 @@
   return lookups_range(all_lookups_iterator(), all_lookups_iterator());
 }
 
-inline
-DeclContext::all_lookups_iterator DeclContext::noload_lookups_begin() const {
-  return noload_lookups().begin();
-}
+} // namespace clang
 
-inline
-DeclContext::all_lookups_iterator DeclContext::noload_lookups_end() const {
-  return noload_lookups().end();
-}
-
-} // end namespace clang
-
-#endif
+#endif // LLVM_CLANG_AST_DECLLOOKUPS_H
diff --git a/include/clang/AST/DeclObjC.h b/include/clang/AST/DeclObjC.h
index 1cd6e00..cef7d93 100644
--- a/include/clang/AST/DeclObjC.h
+++ b/include/clang/AST/DeclObjC.h
@@ -1,4 +1,4 @@
-//===--- DeclObjC.h - Classes for representing declarations -----*- C++ -*-===//
+//===- DeclObjC.h - Classes for representing declarations -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,33 +15,59 @@
 #define LLVM_CLANG_AST_DECLOBJC_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/Redeclarable.h"
 #include "clang/AST/SelectorLocationsKind.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
 
 namespace clang {
+
+class ASTContext;
+class CompoundStmt;
+class CXXCtorInitializer;
 class Expr;
-class Stmt;
-class FunctionDecl;
-class RecordDecl;
-class ObjCIvarDecl;
-class ObjCMethodDecl;
-class ObjCProtocolDecl;
 class ObjCCategoryDecl;
+class ObjCCategoryImplDecl;
+class ObjCImplementationDecl;
+class ObjCInterfaceDecl;
+class ObjCIvarDecl;
 class ObjCPropertyDecl;
 class ObjCPropertyImplDecl;
-class CXXCtorInitializer;
+class ObjCProtocolDecl;
+class Stmt;
 
 class ObjCListBase {
-  ObjCListBase(const ObjCListBase &) = delete;
-  void operator=(const ObjCListBase &) = delete;
 protected:
   /// List is an array of pointers to objects that are not owned by this object.
-  void **List;
-  unsigned NumElts;
+  void **List = nullptr;
+  unsigned NumElts = 0;
 
 public:
-  ObjCListBase() : List(nullptr), NumElts(0) {}
+  ObjCListBase() = default;
+  ObjCListBase(const ObjCListBase &) = delete;
+  ObjCListBase &operator=(const ObjCListBase &) = delete;
+
   unsigned size() const { return NumElts; }
   bool empty() const { return NumElts == 0; }
 
@@ -49,7 +75,6 @@
   void set(void *const* InList, unsigned Elts, ASTContext &Ctx);
 };
 
-
 /// ObjCList - This is a simple template class used to hold various lists of
 /// decls etc, which is heavily used by the ObjC front-end.  This only use case
 /// this supports is setting the list all at once and then reading elements out
@@ -61,7 +86,8 @@
     ObjCListBase::set(reinterpret_cast<void*const*>(InList), Elts, Ctx);
   }
 
-  typedef T* const * iterator;
+  using iterator = T* const *;
+
   iterator begin() const { return (iterator)List; }
   iterator end() const { return (iterator)List+NumElts; }
 
@@ -74,14 +100,15 @@
 /// \brief A list of Objective-C protocols, along with the source
 /// locations at which they were referenced.
 class ObjCProtocolList : public ObjCList<ObjCProtocolDecl> {
-  SourceLocation *Locations;
+  SourceLocation *Locations = nullptr;
 
   using ObjCList<ObjCProtocolDecl>::set;
 
 public:
-  ObjCProtocolList() : ObjCList<ObjCProtocolDecl>(), Locations(nullptr) { }
+  ObjCProtocolList() = default;
 
-  typedef const SourceLocation *loc_iterator;
+  using loc_iterator = const SourceLocation *;
+
   loc_iterator loc_begin() const { return Locations; }
   loc_iterator loc_end() const { return Locations + size(); }
 
@@ -89,7 +116,6 @@
            const SourceLocation *Locs, ASTContext &Ctx);
 };
 
-
 /// ObjCMethodDecl - Represents an instance or class method declaration.
 /// ObjC methods can be declared within 4 contexts: class interfaces,
 /// categories, protocols, and class implementations. While C++ member
@@ -113,6 +139,7 @@
 class ObjCMethodDecl : public NamedDecl, public DeclContext {
 public:
   enum ImplementationControl { None, Required, Optional };
+
 private:
   // The conventional meaning of this method; an ObjCMethodFamily.
   // This is not serialized; instead, it is computed on demand and
@@ -170,8 +197,8 @@
 
   /// \brief Array of ParmVarDecls for the formal parameters of this method
   /// and optionally followed by selector locations.
-  void *ParamsAndSelLocs;
-  unsigned NumParams;
+  void *ParamsAndSelLocs = nullptr;
+  unsigned NumParams = 0;
 
   /// List of attributes for this method declaration.
   SourceLocation DeclEndLoc; // the location of the ';' or '{'.
@@ -181,14 +208,35 @@
 
   /// SelfDecl - Decl for the implicit self parameter. This is lazily
   /// constructed by createImplicitParams.
-  ImplicitParamDecl *SelfDecl;
+  ImplicitParamDecl *SelfDecl = nullptr;
+
   /// CmdDecl - Decl for the implicit _cmd parameter. This is lazily
   /// constructed by createImplicitParams.
-  ImplicitParamDecl *CmdDecl;
+  ImplicitParamDecl *CmdDecl = nullptr;
+
+  ObjCMethodDecl(SourceLocation beginLoc, SourceLocation endLoc,
+                 Selector SelInfo, QualType T, TypeSourceInfo *ReturnTInfo,
+                 DeclContext *contextDecl, bool isInstance = true,
+                 bool isVariadic = false, bool isPropertyAccessor = false,
+                 bool isImplicitlyDeclared = false, bool isDefined = false,
+                 ImplementationControl impControl = None,
+                 bool HasRelatedResultType = false)
+      : NamedDecl(ObjCMethod, contextDecl, beginLoc, SelInfo),
+        DeclContext(ObjCMethod), Family(InvalidObjCMethodFamily),
+        IsInstance(isInstance), IsVariadic(isVariadic),
+        IsPropertyAccessor(isPropertyAccessor), IsDefined(isDefined),
+        IsRedeclaration(0), HasRedeclaration(0), DeclImplementation(impControl),
+        objcDeclQualifier(OBJC_TQ_None),
+        RelatedResultType(HasRelatedResultType),
+        SelLocsKind(SelLoc_StandardNoSpace), IsOverriding(0), HasSkippedBody(0),
+        MethodDeclType(T), ReturnTInfo(ReturnTInfo), DeclEndLoc(endLoc) {
+    setImplicit(isImplicitlyDeclared);
+  }
 
   SelectorLocationsKind getSelLocsKind() const {
     return (SelectorLocationsKind)SelLocsKind;
   }
+
   bool hasStandardSelLocs() const {
     return getSelLocsKind() != SelLoc_NonStandard;
   }
@@ -223,33 +271,15 @@
                            ArrayRef<ParmVarDecl*> Params,
                            ArrayRef<SourceLocation> SelLocs);
 
-  ObjCMethodDecl(SourceLocation beginLoc, SourceLocation endLoc,
-                 Selector SelInfo, QualType T, TypeSourceInfo *ReturnTInfo,
-                 DeclContext *contextDecl, bool isInstance = true,
-                 bool isVariadic = false, bool isPropertyAccessor = false,
-                 bool isImplicitlyDeclared = false, bool isDefined = false,
-                 ImplementationControl impControl = None,
-                 bool HasRelatedResultType = false)
-      : NamedDecl(ObjCMethod, contextDecl, beginLoc, SelInfo),
-        DeclContext(ObjCMethod), Family(InvalidObjCMethodFamily),
-        IsInstance(isInstance), IsVariadic(isVariadic),
-        IsPropertyAccessor(isPropertyAccessor), IsDefined(isDefined),
-        IsRedeclaration(0), HasRedeclaration(0), DeclImplementation(impControl),
-        objcDeclQualifier(OBJC_TQ_None),
-        RelatedResultType(HasRelatedResultType),
-        SelLocsKind(SelLoc_StandardNoSpace), IsOverriding(0), HasSkippedBody(0),
-        MethodDeclType(T), ReturnTInfo(ReturnTInfo), ParamsAndSelLocs(nullptr),
-        NumParams(0), DeclEndLoc(endLoc), Body(), SelfDecl(nullptr),
-        CmdDecl(nullptr) {
-    setImplicit(isImplicitlyDeclared);
-  }
-
   /// \brief A definition will return its interface declaration.
   /// An interface declaration will return its definition.
   /// Otherwise it will return itself.
   ObjCMethodDecl *getNextRedeclarationImpl() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ObjCMethodDecl *
   Create(ASTContext &C, SourceLocation beginLoc, SourceLocation endLoc,
          Selector SelInfo, QualType T, TypeSourceInfo *ReturnTInfo,
@@ -299,6 +329,7 @@
       return getLocStart();
     return getSelectorLoc(0);
   }
+
   SourceLocation getSelectorLoc(unsigned Index) const {
     assert(Index < getNumSelectorLocs() && "Index out of range!");
     if (hasStandardSelLocs())
@@ -346,17 +377,20 @@
 
   // Iterator access to formal parameters.
   unsigned param_size() const { return NumParams; }
-  typedef const ParmVarDecl *const *param_const_iterator;
-  typedef ParmVarDecl *const *param_iterator;
-  typedef llvm::iterator_range<param_iterator> param_range;
-  typedef llvm::iterator_range<param_const_iterator> param_const_range;
+
+  using param_const_iterator = const ParmVarDecl *const *;
+  using param_iterator = ParmVarDecl *const *;
+  using param_range = llvm::iterator_range<param_iterator>;
+  using param_const_range = llvm::iterator_range<param_const_iterator>;
 
   param_const_iterator param_begin() const {
     return param_const_iterator(getParams());
   }
+
   param_const_iterator param_end() const {
     return param_const_iterator(getParams() + NumParams);
   }
+
   param_iterator param_begin() { return param_iterator(getParams()); }
   param_iterator param_end() { return param_iterator(getParams() + NumParams); }
 
@@ -384,12 +418,14 @@
   struct GetTypeFn {
     QualType operator()(const ParmVarDecl *PD) const { return PD->getType(); }
   };
-  typedef llvm::mapped_iterator<param_const_iterator, GetTypeFn>
-      param_type_iterator;
+
+  using param_type_iterator =
+      llvm::mapped_iterator<param_const_iterator, GetTypeFn>;
 
   param_type_iterator param_type_begin() const {
     return llvm::map_iterator(param_begin(), GetTypeFn());
   }
+
   param_type_iterator param_type_end() const {
     return llvm::map_iterator(param_end(), GetTypeFn());
   }
@@ -462,9 +498,11 @@
   void setDeclImplementation(ImplementationControl ic) {
     DeclImplementation = ic;
   }
+
   ImplementationControl getImplementationControl() const {
     return ImplementationControl(DeclImplementation);
   }
+
   bool isOptional() const {
     return getImplementationControl() == Optional;
   }
@@ -499,24 +537,25 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCMethod; }
+
   static DeclContext *castToDeclContext(const ObjCMethodDecl *D) {
     return static_cast<DeclContext *>(const_cast<ObjCMethodDecl*>(D));
   }
+
   static ObjCMethodDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<ObjCMethodDecl *>(const_cast<DeclContext*>(DC));
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Describes the variance of a given generic parameter.
 enum class ObjCTypeParamVariance : uint8_t {
   /// The parameter is invariant: must match exactly.
   Invariant,
+
   /// The parameter is covariant, e.g., X<T> is a subtype of X<U> when
   /// the type parameter is covariant and T is a subtype of U.
   Covariant,
+
   /// The parameter is contravariant, e.g., X<T> is a subtype of X<U>
   /// when the type parameter is covariant and U is a subtype of T.
   Contravariant,
@@ -535,8 +574,6 @@
 ///
 /// Objective-C type parameters are typedef-names in the grammar,
 class ObjCTypeParamDecl : public TypedefNameDecl {
-  void anchor() override;
-
   /// Index of this type parameter in the type parameter list.
   unsigned Index : 14;
 
@@ -555,12 +592,17 @@
                     unsigned index,
                     SourceLocation nameLoc, IdentifierInfo *name,
                     SourceLocation colonLoc, TypeSourceInfo *boundInfo)
-    : TypedefNameDecl(ObjCTypeParam, ctx, dc, nameLoc, nameLoc, name,
-                      boundInfo),
-      Index(index), Variance(static_cast<unsigned>(variance)),
-      VarianceLoc(varianceLoc), ColonLoc(colonLoc) { }
+      : TypedefNameDecl(ObjCTypeParam, ctx, dc, nameLoc, nameLoc, name,
+                        boundInfo),
+        Index(index), Variance(static_cast<unsigned>(variance)),
+        VarianceLoc(varianceLoc), ColonLoc(colonLoc) {}
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ObjCTypeParamDecl *Create(ASTContext &ctx, DeclContext *dc,
                                    ObjCTypeParamVariance variance,
                                    SourceLocation varianceLoc,
@@ -600,9 +642,6 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCTypeParam; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Stores a list of Objective-C type parameters for a parameterized class
@@ -636,6 +675,8 @@
                     SourceLocation rAngleLoc);
 
 public:
+  friend TrailingObjects;
+
   /// Create a new Objective-C type parameter list.
   static ObjCTypeParamList *create(ASTContext &ctx,
                                    SourceLocation lAngleLoc,
@@ -643,7 +684,7 @@
                                    SourceLocation rAngleLoc);
 
   /// Iterate through the type parameters in the list.
-  typedef ObjCTypeParamDecl **iterator;
+  using iterator = ObjCTypeParamDecl **;
 
   iterator begin() { return getTrailingObjects<ObjCTypeParamDecl *>(); }
 
@@ -653,7 +694,7 @@
   unsigned size() const { return NumParams; }
 
   // Iterate through the type parameters in the list.
-  typedef ObjCTypeParamDecl * const *const_iterator;
+  using const_iterator = ObjCTypeParamDecl * const *;
 
   const_iterator begin() const {
     return getTrailingObjects<ObjCTypeParamDecl *>();
@@ -676,9 +717,11 @@
   SourceLocation getLAngleLoc() const {
     return SourceLocation::getFromRawEncoding(Brackets.Begin);
   }
+
   SourceLocation getRAngleLoc() const {
     return SourceLocation::getFromRawEncoding(Brackets.End);
   }
+
   SourceRange getSourceRange() const {
     return SourceRange(getLAngleLoc(), getRAngleLoc());
   }
@@ -686,7 +729,6 @@
   /// Gather the default set of type arguments to be substituted for
   /// these type parameters when dealing with an unspecialized type.
   void gatherDefaultTypeArgs(SmallVectorImpl<QualType> &typeArgs) const;
-  friend TrailingObjects;
 };
 
 enum class ObjCPropertyQueryKind : uint8_t {
@@ -703,6 +745,7 @@
 /// \endcode
 class ObjCPropertyDecl : public NamedDecl {
   void anchor() override;
+
 public:
   enum PropertyAttributeKind {
     OBJC_PR_noattr    = 0x00,
@@ -733,24 +776,42 @@
 
   enum SetterKind { Assign, Retain, Copy, Weak };
   enum PropertyControl { None, Required, Optional };
+
 private:
-  SourceLocation AtLoc;   // location of \@property
-  SourceLocation LParenLoc; // location of '(' starting attribute list or null.
+  // location of \@property
+  SourceLocation AtLoc;
+
+  // location of '(' starting attribute list or null.
+  SourceLocation LParenLoc;
+
   QualType DeclType;
   TypeSourceInfo *DeclTypeSourceInfo;
   unsigned PropertyAttributes : NumPropertyAttrsBits;
   unsigned PropertyAttributesAsWritten : NumPropertyAttrsBits;
+
   // \@required/\@optional
   unsigned PropertyImplementation : 2;
 
-  Selector GetterName;    // getter name of NULL if no getter
-  Selector SetterName;    // setter name of NULL if no setter
-  SourceLocation GetterNameLoc; // location of the getter attribute's value
-  SourceLocation SetterNameLoc; // location of the setter attribute's value
+  // getter name of NULL if no getter
+  Selector GetterName;
 
-  ObjCMethodDecl *GetterMethodDecl; // Declaration of getter instance method
-  ObjCMethodDecl *SetterMethodDecl; // Declaration of setter instance method
-  ObjCIvarDecl *PropertyIvarDecl;   // Synthesize ivar for this property
+  // setter name of NULL if no setter
+  Selector SetterName;
+
+  // location of the getter attribute's value
+  SourceLocation GetterNameLoc;
+
+  // location of the setter attribute's value
+  SourceLocation SetterNameLoc;
+
+  // Declaration of getter instance method
+  ObjCMethodDecl *GetterMethodDecl = nullptr;
+
+  // Declaration of setter instance method
+  ObjCMethodDecl *SetterMethodDecl = nullptr;
+
+  // Synthesize ivar for this property
+  ObjCIvarDecl *PropertyIvarDecl = nullptr;
 
   ObjCPropertyDecl(DeclContext *DC, SourceLocation L, IdentifierInfo *Id,
                    SourceLocation AtLocation,  SourceLocation LParenLocation,
@@ -760,11 +821,8 @@
       LParenLoc(LParenLocation), DeclType(T), DeclTypeSourceInfo(TSI),
       PropertyAttributes(OBJC_PR_noattr),
       PropertyAttributesAsWritten(OBJC_PR_noattr),
-      PropertyImplementation(propControl),
-      GetterName(Selector()),
-      SetterName(Selector()),
-      GetterMethodDecl(nullptr), SetterMethodDecl(nullptr),
-      PropertyIvarDecl(nullptr) {}
+      PropertyImplementation(propControl), GetterName(Selector()),
+      SetterName(Selector()) {}
 
 public:
   static ObjCPropertyDecl *Create(ASTContext &C, DeclContext *DC,
@@ -799,9 +857,11 @@
   PropertyAttributeKind getPropertyAttributes() const {
     return PropertyAttributeKind(PropertyAttributes);
   }
+
   void setPropertyAttributes(PropertyAttributeKind PRVal) {
     PropertyAttributes |= PRVal;
   }
+
   void overwritePropertyAttributes(unsigned PRVal) {
     PropertyAttributes = PRVal;
   }
@@ -834,10 +894,12 @@
 
   bool isInstanceProperty() const { return !isClassProperty(); }
   bool isClassProperty() const { return PropertyAttributes & OBJC_PR_class; }
+
   ObjCPropertyQueryKind getQueryKind() const {
     return isClassProperty() ? ObjCPropertyQueryKind::OBJC_PR_query_class :
                                ObjCPropertyQueryKind::OBJC_PR_query_instance;
   }
+
   static ObjCPropertyQueryKind getQueryKind(bool isClassProperty) {
     return isClassProperty ? ObjCPropertyQueryKind::OBJC_PR_query_class :
                              ObjCPropertyQueryKind::OBJC_PR_query_instance;
@@ -860,6 +922,7 @@
 
   Selector getGetterName() const { return GetterName; }
   SourceLocation getGetterNameLoc() const { return GetterNameLoc; }
+
   void setGetterName(Selector Sel, SourceLocation Loc = SourceLocation()) {
     GetterName = Sel;
     GetterNameLoc = Loc;
@@ -867,6 +930,7 @@
 
   Selector getSetterName() const { return SetterName; }
   SourceLocation getSetterNameLoc() const { return SetterNameLoc; }
+
   void setSetterName(Selector Sel, SourceLocation Loc = SourceLocation()) {
     SetterName = Sel;
     SetterNameLoc = Loc;
@@ -882,9 +946,11 @@
   void setPropertyImplementation(PropertyControl pc) {
     PropertyImplementation = pc;
   }
+
   PropertyControl getPropertyImplementation() const {
     return PropertyControl(PropertyImplementation);
   }
+
   bool isOptional() const {
     return getPropertyImplementation() == PropertyControl::Optional;
   }
@@ -892,6 +958,7 @@
   void setPropertyIvarDecl(ObjCIvarDecl *Ivar) {
     PropertyIvarDecl = Ivar;
   }
+
   ObjCIvarDecl *getPropertyIvarDecl() const {
     return PropertyIvarDecl;
   }
@@ -917,104 +984,116 @@
 /// ObjCProtocolDecl, and ObjCImplDecl.
 ///
 class ObjCContainerDecl : public NamedDecl, public DeclContext {
-  void anchor() override;
-
   SourceLocation AtStart;
 
   // These two locations in the range mark the end of the method container.
   // The first points to the '@' token, and the second to the 'end' token.
   SourceRange AtEnd;
-public:
 
+  void anchor() override;
+
+public:
   ObjCContainerDecl(Kind DK, DeclContext *DC,
                     IdentifierInfo *Id, SourceLocation nameLoc,
                     SourceLocation atStartLoc)
-    : NamedDecl(DK, DC, nameLoc, Id), DeclContext(DK), AtStart(atStartLoc) {}
+      : NamedDecl(DK, DC, nameLoc, Id), DeclContext(DK), AtStart(atStartLoc) {}
 
   // Iterator access to instance/class properties.
-  typedef specific_decl_iterator<ObjCPropertyDecl> prop_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCPropertyDecl>>
-    prop_range;
+  using prop_iterator = specific_decl_iterator<ObjCPropertyDecl>;
+  using prop_range =
+      llvm::iterator_range<specific_decl_iterator<ObjCPropertyDecl>>;
 
   prop_range properties() const { return prop_range(prop_begin(), prop_end()); }
+
   prop_iterator prop_begin() const {
     return prop_iterator(decls_begin());
   }
+
   prop_iterator prop_end() const {
     return prop_iterator(decls_end());
   }
 
-  typedef filtered_decl_iterator<ObjCPropertyDecl,
-                                 &ObjCPropertyDecl::isInstanceProperty>
-    instprop_iterator;
-  typedef llvm::iterator_range<instprop_iterator> instprop_range;
+  using instprop_iterator =
+      filtered_decl_iterator<ObjCPropertyDecl,
+                             &ObjCPropertyDecl::isInstanceProperty>;
+  using instprop_range = llvm::iterator_range<instprop_iterator>;
 
   instprop_range instance_properties() const {
     return instprop_range(instprop_begin(), instprop_end());
   }
+
   instprop_iterator instprop_begin() const {
     return instprop_iterator(decls_begin());
   }
+
   instprop_iterator instprop_end() const {
     return instprop_iterator(decls_end());
   }
 
-  typedef filtered_decl_iterator<ObjCPropertyDecl,
-                                 &ObjCPropertyDecl::isClassProperty>
-    classprop_iterator;
-  typedef llvm::iterator_range<classprop_iterator> classprop_range;
+  using classprop_iterator =
+      filtered_decl_iterator<ObjCPropertyDecl,
+                             &ObjCPropertyDecl::isClassProperty>;
+  using classprop_range = llvm::iterator_range<classprop_iterator>;
 
   classprop_range class_properties() const {
     return classprop_range(classprop_begin(), classprop_end());
   }
+
   classprop_iterator classprop_begin() const {
     return classprop_iterator(decls_begin());
   }
+
   classprop_iterator classprop_end() const {
     return classprop_iterator(decls_end());
   }
 
   // Iterator access to instance/class methods.
-  typedef specific_decl_iterator<ObjCMethodDecl> method_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCMethodDecl>>
-    method_range;
+  using method_iterator = specific_decl_iterator<ObjCMethodDecl>;
+  using method_range =
+      llvm::iterator_range<specific_decl_iterator<ObjCMethodDecl>>;
 
   method_range methods() const {
     return method_range(meth_begin(), meth_end());
   }
+
   method_iterator meth_begin() const {
     return method_iterator(decls_begin());
   }
+
   method_iterator meth_end() const {
     return method_iterator(decls_end());
   }
 
-  typedef filtered_decl_iterator<ObjCMethodDecl,
-                                 &ObjCMethodDecl::isInstanceMethod>
-    instmeth_iterator;
-  typedef llvm::iterator_range<instmeth_iterator> instmeth_range;
+  using instmeth_iterator =
+      filtered_decl_iterator<ObjCMethodDecl,
+                             &ObjCMethodDecl::isInstanceMethod>;
+  using instmeth_range = llvm::iterator_range<instmeth_iterator>;
 
   instmeth_range instance_methods() const {
     return instmeth_range(instmeth_begin(), instmeth_end());
   }
+
   instmeth_iterator instmeth_begin() const {
     return instmeth_iterator(decls_begin());
   }
+
   instmeth_iterator instmeth_end() const {
     return instmeth_iterator(decls_end());
   }
 
-  typedef filtered_decl_iterator<ObjCMethodDecl,
-                                 &ObjCMethodDecl::isClassMethod>
-    classmeth_iterator;
-  typedef llvm::iterator_range<classmeth_iterator> classmeth_range;
+  using classmeth_iterator =
+      filtered_decl_iterator<ObjCMethodDecl,
+                             &ObjCMethodDecl::isClassMethod>;
+  using classmeth_range = llvm::iterator_range<classmeth_iterator>;
 
   classmeth_range class_methods() const {
     return classmeth_range(classmeth_begin(), classmeth_end());
   }
+
   classmeth_iterator classmeth_begin() const {
     return classmeth_iterator(decls_begin());
   }
+
   classmeth_iterator classmeth_end() const {
     return classmeth_iterator(decls_end());
   }
@@ -1022,13 +1101,16 @@
   // Get the local instance/class method declared in this interface.
   ObjCMethodDecl *getMethod(Selector Sel, bool isInstance,
                             bool AllowHidden = false) const;
+
   ObjCMethodDecl *getInstanceMethod(Selector Sel,
                                     bool AllowHidden = false) const {
     return getMethod(Sel, true/*isInstance*/, AllowHidden);
   }
+
   ObjCMethodDecl *getClassMethod(Selector Sel, bool AllowHidden = false) const {
     return getMethod(Sel, false/*isInstance*/, AllowHidden);
   }
+
   bool HasUserDeclaredSetterMethod(const ObjCPropertyDecl *P) const;
   ObjCIvarDecl *getIvarDecl(IdentifierInfo *Id) const;
 
@@ -1036,13 +1118,11 @@
   FindPropertyDeclaration(const IdentifierInfo *PropertyId,
                           ObjCPropertyQueryKind QueryKind) const;
 
-  typedef llvm::DenseMap<std::pair<IdentifierInfo*,
-                                   unsigned/*isClassProperty*/>,
-                         ObjCPropertyDecl*> PropertyMap;
-
-  typedef llvm::SmallDenseSet<const ObjCProtocolDecl *, 8> ProtocolPropertySet;
-
-  typedef llvm::SmallVector<ObjCPropertyDecl*, 8> PropertyDeclOrder;
+  using PropertyMap =
+      llvm::DenseMap<std::pair<IdentifierInfo *, unsigned/*isClassProperty*/>,
+                     ObjCPropertyDecl *>;
+  using ProtocolPropertySet = llvm::SmallDenseSet<const ObjCProtocolDecl *, 8>;
+  using PropertyDeclOrder = llvm::SmallVector<ObjCPropertyDecl *, 8>;
   
   /// This routine collects list of properties to be implemented in the class.
   /// This includes, class's and its conforming protocols' properties.
@@ -1057,6 +1137,7 @@
   SourceRange getAtEndRange() const {
     return AtEnd;
   }
+
   void setAtEndRange(SourceRange atEnd) {
     AtEnd = atEnd;
   }
@@ -1067,6 +1148,7 @@
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstObjCContainer &&
            K <= lastObjCContainer;
@@ -1075,6 +1157,7 @@
   static DeclContext *castToDeclContext(const ObjCContainerDecl *D) {
     return static_cast<DeclContext *>(const_cast<ObjCContainerDecl*>(D));
   }
+
   static ObjCContainerDecl *castFromDeclContext(const DeclContext *DC) {
     return static_cast<ObjCContainerDecl *>(const_cast<DeclContext*>(DC));
   }
@@ -1107,20 +1190,19 @@
 ///
 class ObjCInterfaceDecl : public ObjCContainerDecl
                         , public Redeclarable<ObjCInterfaceDecl> {
-  void anchor() override;
+  friend class ASTContext;
 
   /// TypeForDecl - This indicates the Type object that represents this
   /// TypeDecl.  It is a cache maintained by ASTContext::getObjCInterfaceType
-  mutable const Type *TypeForDecl;
-  friend class ASTContext;
+  mutable const Type *TypeForDecl = nullptr;
   
   struct DefinitionData {
     /// \brief The definition of this class, for quick access from any 
     /// declaration.
-    ObjCInterfaceDecl *Definition;
+    ObjCInterfaceDecl *Definition = nullptr;
     
     /// When non-null, this is always an ObjCObjectType.
-    TypeSourceInfo *SuperClassTInfo;
+    TypeSourceInfo *SuperClassTInfo = nullptr;
     
     /// Protocols referenced in the \@interface  declaration
     ObjCProtocolList ReferencedProtocols;
@@ -1133,11 +1215,11 @@
     /// Categories are stored as a linked list in the AST, since the categories
     /// and class extensions come long after the initial interface declaration,
     /// and we avoid dynamically-resized arrays in the AST wherever possible.
-    ObjCCategoryDecl *CategoryList;
+    ObjCCategoryDecl *CategoryList = nullptr;
 
     /// IvarList - List of all ivars defined by this class; including class
     /// extensions and implementation. This list is built lazily.
-    ObjCIvarDecl *IvarList;
+    ObjCIvarDecl *IvarList = nullptr;
 
     /// \brief Indicates that the contents of this Objective-C class will be
     /// completed by the external AST source when required.
@@ -1155,11 +1237,14 @@
       /// We didn't calculate whether the designated initializers should be
       /// inherited or not.
       IDI_Unknown = 0,
+
       /// Designated initializers are inherited for the super class.
       IDI_Inherited = 1,
+
       /// The class does not inherit designated initializers.
       IDI_NotInherited = 2
     };
+
     /// One of the \c InheritedDesignatedInitializersState enumeratos.
     mutable unsigned InheritedDesignatedInitializers : 2;
     
@@ -1168,22 +1253,14 @@
     /// identifier, 
     SourceLocation EndLoc; 
 
-    DefinitionData() : Definition(), SuperClassTInfo(), CategoryList(), IvarList(), 
-                       ExternallyCompleted(),
-                       IvarListMissingImplementation(true),
-                       HasDesignatedInitializers(),
-                       InheritedDesignatedInitializers(IDI_Unknown) { }
+    DefinitionData()
+        : ExternallyCompleted(false), IvarListMissingImplementation(true),
+          HasDesignatedInitializers(false),
+          InheritedDesignatedInitializers(IDI_Unknown) {}
   };
 
-  ObjCInterfaceDecl(const ASTContext &C, DeclContext *DC, SourceLocation AtLoc,
-                    IdentifierInfo *Id, ObjCTypeParamList *typeParamList,
-                    SourceLocation CLoc, ObjCInterfaceDecl *PrevDecl,
-                    bool IsInternal);
-
-  void LoadExternalDefinition() const;
-
   /// The type parameters associated with this class, if any.
-  ObjCTypeParamList *TypeParamList;
+  ObjCTypeParamList *TypeParamList = nullptr;
 
   /// \brief Contains a pointer to the data associated with this class,
   /// which will be NULL if this class has not yet been defined.
@@ -1192,6 +1269,15 @@
   /// declarations. It will be set unless modules are enabled.
   llvm::PointerIntPair<DefinitionData *, 1, bool> Data;
 
+  ObjCInterfaceDecl(const ASTContext &C, DeclContext *DC, SourceLocation AtLoc,
+                    IdentifierInfo *Id, ObjCTypeParamList *typeParamList,
+                    SourceLocation CLoc, ObjCInterfaceDecl *PrevDecl,
+                    bool IsInternal);
+
+  void anchor() override;
+
+  void LoadExternalDefinition() const;
+
   DefinitionData &data() const {
     assert(Data.getPointer() && "Declaration has no definition!");
     return *Data.getPointer();
@@ -1200,13 +1286,16 @@
   /// \brief Allocate the definition data for this class.
   void allocateDefinitionData();
   
-  typedef Redeclarable<ObjCInterfaceDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<ObjCInterfaceDecl>;
+
   ObjCInterfaceDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   ObjCInterfaceDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   ObjCInterfaceDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
@@ -1284,17 +1373,19 @@
   // Get the local instance/class method declared in a category.
   ObjCMethodDecl *getCategoryInstanceMethod(Selector Sel) const;
   ObjCMethodDecl *getCategoryClassMethod(Selector Sel) const;
+
   ObjCMethodDecl *getCategoryMethod(Selector Sel, bool isInstance) const {
     return isInstance ? getCategoryInstanceMethod(Sel)
                       : getCategoryClassMethod(Sel);
   }
 
-  typedef ObjCProtocolList::iterator protocol_iterator;
-  typedef llvm::iterator_range<protocol_iterator> protocol_range;
+  using protocol_iterator = ObjCProtocolList::iterator;
+  using protocol_range = llvm::iterator_range<protocol_iterator>;
 
   protocol_range protocols() const {
     return protocol_range(protocol_begin(), protocol_end());
   }
+
   protocol_iterator protocol_begin() const {
     // FIXME: Should make sure no callers ever do this.
     if (!hasDefinition())
@@ -1305,6 +1396,7 @@
 
     return data().ReferencedProtocols.begin();
   }
+
   protocol_iterator protocol_end() const {
     // FIXME: Should make sure no callers ever do this.
     if (!hasDefinition())
@@ -1316,12 +1408,13 @@
     return data().ReferencedProtocols.end();
   }
 
-  typedef ObjCProtocolList::loc_iterator protocol_loc_iterator;
-  typedef llvm::iterator_range<protocol_loc_iterator> protocol_loc_range;
+  using protocol_loc_iterator = ObjCProtocolList::loc_iterator;
+  using protocol_loc_range = llvm::iterator_range<protocol_loc_iterator>;
 
   protocol_loc_range protocol_locs() const {
     return protocol_loc_range(protocol_loc_begin(), protocol_loc_end());
   }
+
   protocol_loc_iterator protocol_loc_begin() const {
     // FIXME: Should make sure no callers ever do this.
     if (!hasDefinition())
@@ -1344,13 +1437,14 @@
     return data().ReferencedProtocols.loc_end();
   }
 
-  typedef ObjCList<ObjCProtocolDecl>::iterator all_protocol_iterator;
-  typedef llvm::iterator_range<all_protocol_iterator> all_protocol_range;
+  using all_protocol_iterator = ObjCList<ObjCProtocolDecl>::iterator;
+  using all_protocol_range = llvm::iterator_range<all_protocol_iterator>;
 
   all_protocol_range all_referenced_protocols() const {
     return all_protocol_range(all_referenced_protocol_begin(),
                               all_referenced_protocol_end());
   }
+
   all_protocol_iterator all_referenced_protocol_begin() const {
     // FIXME: Should make sure no callers ever do this.
     if (!hasDefinition())
@@ -1363,6 +1457,7 @@
              ? protocol_begin()
              : data().AllReferencedProtocols.begin();
   }
+
   all_protocol_iterator all_referenced_protocol_end() const {
     // FIXME: Should make sure no callers ever do this.
     if (!hasDefinition())
@@ -1376,10 +1471,11 @@
              : data().AllReferencedProtocols.end();
   }
 
-  typedef specific_decl_iterator<ObjCIvarDecl> ivar_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>> ivar_range;
+  using ivar_iterator = specific_decl_iterator<ObjCIvarDecl>;
+  using ivar_range = llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>>;
 
   ivar_range ivars() const { return ivar_range(ivar_begin(), ivar_end()); }
+
   ivar_iterator ivar_begin() const { 
     if (const ObjCInterfaceDecl *Def = getDefinition())
       return ivar_iterator(Def->decls_begin()); 
@@ -1387,6 +1483,7 @@
     // FIXME: Should make sure no callers ever do this.
     return ivar_iterator();
   }
+
   ivar_iterator ivar_end() const { 
     if (const ObjCInterfaceDecl *Def = getDefinition())
       return ivar_iterator(Def->decls_end()); 
@@ -1518,21 +1615,20 @@
   /// and extension iterators.
   template<bool (*Filter)(ObjCCategoryDecl *)>
   class filtered_category_iterator {
-    ObjCCategoryDecl *Current;
+    ObjCCategoryDecl *Current = nullptr;
 
     void findAcceptableCategory();
     
   public:
-    typedef ObjCCategoryDecl *      value_type;
-    typedef value_type              reference;
-    typedef value_type              pointer;
-    typedef std::ptrdiff_t          difference_type;
-    typedef std::input_iterator_tag iterator_category;
+    using value_type = ObjCCategoryDecl *;
+    using reference = value_type;
+    using pointer = value_type;
+    using difference_type = std::ptrdiff_t;
+    using iterator_category = std::input_iterator_tag;
 
-    filtered_category_iterator() : Current(nullptr) { }
+    filtered_category_iterator() = default;
     explicit filtered_category_iterator(ObjCCategoryDecl *Current)
-      : Current(Current)
-    {
+        : Current(Current) {
       findAcceptableCategory();
     }
 
@@ -1567,11 +1663,11 @@
 public:
   /// \brief Iterator that walks over the list of categories and extensions
   /// that are visible, i.e., not hidden in a non-imported submodule.
-  typedef filtered_category_iterator<isVisibleCategory>
-    visible_categories_iterator;
+  using visible_categories_iterator =
+      filtered_category_iterator<isVisibleCategory>;
 
-  typedef llvm::iterator_range<visible_categories_iterator>
-    visible_categories_range;
+  using visible_categories_range =
+      llvm::iterator_range<visible_categories_iterator>;
 
   visible_categories_range visible_categories() const {
     return visible_categories_range(visible_categories_begin(),
@@ -1603,9 +1699,9 @@
 public:
   /// \brief Iterator that walks over all of the known categories and
   /// extensions, including those that are hidden.
-  typedef filtered_category_iterator<isKnownCategory> known_categories_iterator;
-  typedef llvm::iterator_range<known_categories_iterator>
-    known_categories_range;
+  using known_categories_iterator = filtered_category_iterator<isKnownCategory>;
+  using known_categories_range =
+     llvm::iterator_range<known_categories_iterator>;
 
   known_categories_range known_categories() const {
     return known_categories_range(known_categories_begin(),
@@ -1637,11 +1733,11 @@
 public:
   /// \brief Iterator that walks over all of the visible extensions, skipping
   /// any that are known but hidden.
-  typedef filtered_category_iterator<isVisibleExtension>
-    visible_extensions_iterator;
+  using visible_extensions_iterator =
+      filtered_category_iterator<isVisibleExtension>;
 
-  typedef llvm::iterator_range<visible_extensions_iterator>
-    visible_extensions_range;
+  using visible_extensions_range =
+      llvm::iterator_range<visible_extensions_iterator>;
 
   visible_extensions_range visible_extensions() const {
     return visible_extensions_range(visible_extensions_begin(),
@@ -1671,11 +1767,15 @@
   static bool isKnownExtension(ObjCCategoryDecl *Cat);
   
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTReader;
+
   /// \brief Iterator that walks over all of the known extensions.
-  typedef filtered_category_iterator<isKnownExtension>
-    known_extensions_iterator;
-  typedef llvm::iterator_range<known_extensions_iterator>
-    known_extensions_range;
+  using known_extensions_iterator =
+      filtered_category_iterator<isKnownExtension>;
+  using known_extensions_range =
+      llvm::iterator_range<known_extensions_iterator>;
 
   known_extensions_range known_extensions() const {
     return known_extensions_range(known_extensions_begin(),
@@ -1771,6 +1871,7 @@
   ObjCMethodDecl *lookupClassMethod(Selector Sel) const {
     return lookupMethod(Sel, false/*isInstance*/);
   }
+
   ObjCInterfaceDecl *lookupInheritedClass(const IdentifierInfo *ICName);
 
   /// \brief Lookup a method in the classes implementation hierarchy.
@@ -1819,8 +1920,9 @@
                                bool lookupCategory,
                                bool RHSIsQualifiedID = false);
 
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -1839,10 +1941,6 @@
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCInterface; }
 
-  friend class ASTReader;
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-
 private:
   const ObjCInterfaceDecl *findInterfaceWithDesignatedInitializers() const;
   bool inheritsDesignatedInitializers() const;
@@ -1876,9 +1974,9 @@
                SourceLocation IdLoc, IdentifierInfo *Id,
                QualType T, TypeSourceInfo *TInfo, AccessControl ac, Expr *BW,
                bool synthesized)
-    : FieldDecl(ObjCIvar, DC, StartLoc, IdLoc, Id, T, TInfo, BW,
-                /*Mutable=*/false, /*HasInit=*/ICIS_NoInit),
-      NextIvar(nullptr), DeclAccess(ac), Synthesized(synthesized) {}
+      : FieldDecl(ObjCIvar, DC, StartLoc, IdLoc, Id, T, TInfo, BW,
+                  /*Mutable=*/false, /*HasInit=*/ICIS_NoInit),
+        DeclAccess(ac), Synthesized(synthesized) {}
 
 public:
   static ObjCIvarDecl *Create(ASTContext &C, ObjCContainerDecl *DC,
@@ -1918,26 +2016,27 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCIvar; }
+
 private:
   /// NextIvar - Next Ivar in the list of ivars declared in class; class's
   /// extensions and class's implementation
-  ObjCIvarDecl *NextIvar;
+  ObjCIvarDecl *NextIvar = nullptr;
 
   // NOTE: VC++ treats enums as signed, avoid using the AccessControl enum
   unsigned DeclAccess : 3;
   unsigned Synthesized : 1;
 };
 
-
 /// \brief Represents a field declaration created by an \@defs(...).
 class ObjCAtDefsFieldDecl : public FieldDecl {
-  void anchor() override;
   ObjCAtDefsFieldDecl(DeclContext *DC, SourceLocation StartLoc,
                       SourceLocation IdLoc, IdentifierInfo *Id,
                       QualType T, Expr *BW)
-    : FieldDecl(ObjCAtDefsField, DC, StartLoc, IdLoc, Id, T,
-                /*TInfo=*/nullptr, // FIXME: Do ObjCAtDefs have declarators ?
-                BW, /*Mutable=*/false, /*HasInit=*/ICIS_NoInit) {}
+      : FieldDecl(ObjCAtDefsField, DC, StartLoc, IdLoc, Id, T,
+                  /*TInfo=*/nullptr, // FIXME: Do ObjCAtDefs have declarators ?
+                  BW, /*Mutable=*/false, /*HasInit=*/ICIS_NoInit) {}
+
+  void anchor() override;
 
 public:
   static ObjCAtDefsFieldDecl *Create(ASTContext &C, DeclContext *DC,
@@ -1981,11 +2080,8 @@
 /// protocols are referenced using angle brackets as follows:
 ///
 /// id \<NSDraggingInfo> anyObjectThatImplementsNSDraggingInfo;
-///
 class ObjCProtocolDecl : public ObjCContainerDecl,
                          public Redeclarable<ObjCProtocolDecl> {
-  void anchor() override;
-
   struct DefinitionData {
     // \brief The declaration that defines this protocol.
     ObjCProtocolDecl *Definition;
@@ -2001,29 +2097,38 @@
   /// declarations. It will be set unless modules are enabled.
   llvm::PointerIntPair<DefinitionData *, 1, bool> Data;
 
+  ObjCProtocolDecl(ASTContext &C, DeclContext *DC, IdentifierInfo *Id,
+                   SourceLocation nameLoc, SourceLocation atStartLoc,
+                   ObjCProtocolDecl *PrevDecl);
+
+  void anchor() override;
+
   DefinitionData &data() const {
     assert(Data.getPointer() && "Objective-C protocol has no definition!");
     return *Data.getPointer();
   }
   
-  ObjCProtocolDecl(ASTContext &C, DeclContext *DC, IdentifierInfo *Id,
-                   SourceLocation nameLoc, SourceLocation atStartLoc,
-                   ObjCProtocolDecl *PrevDecl);
-
   void allocateDefinitionData();
 
-  typedef Redeclarable<ObjCProtocolDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<ObjCProtocolDecl>;
+
   ObjCProtocolDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   ObjCProtocolDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   ObjCProtocolDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTReader;
+
   static ObjCProtocolDecl *Create(ASTContext &C, DeclContext *DC,
                                   IdentifierInfo *Id,
                                   SourceLocation nameLoc,
@@ -2036,42 +2141,49 @@
     assert(hasDefinition() && "No definition available!");
     return data().ReferencedProtocols;
   }
-  typedef ObjCProtocolList::iterator protocol_iterator;
-  typedef llvm::iterator_range<protocol_iterator> protocol_range;
+
+  using protocol_iterator = ObjCProtocolList::iterator;
+  using protocol_range = llvm::iterator_range<protocol_iterator>;
 
   protocol_range protocols() const {
     return protocol_range(protocol_begin(), protocol_end());
   }
+
   protocol_iterator protocol_begin() const {
     if (!hasDefinition())
       return protocol_iterator();
     
     return data().ReferencedProtocols.begin();
   }
+
   protocol_iterator protocol_end() const { 
     if (!hasDefinition())
       return protocol_iterator();
     
     return data().ReferencedProtocols.end(); 
   }
-  typedef ObjCProtocolList::loc_iterator protocol_loc_iterator;
-  typedef llvm::iterator_range<protocol_loc_iterator> protocol_loc_range;
+
+  using protocol_loc_iterator = ObjCProtocolList::loc_iterator;
+  using protocol_loc_range = llvm::iterator_range<protocol_loc_iterator>;
 
   protocol_loc_range protocol_locs() const {
     return protocol_loc_range(protocol_loc_begin(), protocol_loc_end());
   }
+
   protocol_loc_iterator protocol_loc_begin() const {
     if (!hasDefinition())
       return protocol_loc_iterator();
     
     return data().ReferencedProtocols.loc_begin();
   }
+
   protocol_loc_iterator protocol_loc_end() const {
     if (!hasDefinition())
       return protocol_loc_iterator();
     
     return data().ReferencedProtocols.loc_end();
   }
+
   unsigned protocol_size() const { 
     if (!hasDefinition())
       return 0;
@@ -2092,9 +2204,11 @@
   // Lookup a method. First, we search locally. If a method isn't
   // found, we search referenced protocols and class categories.
   ObjCMethodDecl *lookupMethod(Selector Sel, bool isInstance) const;
+
   ObjCMethodDecl *lookupInstanceMethod(Selector Sel) const {
     return lookupMethod(Sel, true/*isInstance*/);
   }
+
   ObjCMethodDecl *lookupClassMethod(Selector Sel) const {
     return lookupMethod(Sel, false/*isInstance*/);
   }
@@ -2141,8 +2255,9 @@
     return SourceRange(getAtStartLoc(), getLocation());
   }
    
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -2163,10 +2278,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCProtocol; }
-
-  friend class ASTReader;
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// ObjCCategoryDecl - Represents a category declaration. A category allows
@@ -2185,22 +2296,19 @@
 /// Categories were originally inspired by dynamic languages such as Common
 /// Lisp and Smalltalk.  More traditional class-based languages (C++, Java)
 /// don't support this level of dynamism, which is both powerful and dangerous.
-///
 class ObjCCategoryDecl : public ObjCContainerDecl {
-  void anchor() override;
-
   /// Interface belonging to this category
   ObjCInterfaceDecl *ClassInterface;
 
   /// The type parameters associated with this category, if any.
-  ObjCTypeParamList *TypeParamList;
+  ObjCTypeParamList *TypeParamList = nullptr;
 
   /// referenced protocols in this category.
   ObjCProtocolList ReferencedProtocols;
 
   /// Next category belonging to this class.
   /// FIXME: this should not be a singly-linked list.  Move storage elsewhere.
-  ObjCCategoryDecl *NextClassCategory;
+  ObjCCategoryDecl *NextClassCategory = nullptr;
 
   /// \brief The location of the category name in this declaration.
   SourceLocation CategoryNameLoc;
@@ -2213,10 +2321,14 @@
                    SourceLocation ClassNameLoc, SourceLocation CategoryNameLoc,
                    IdentifierInfo *Id, ObjCInterfaceDecl *IDecl,
                    ObjCTypeParamList *typeParamList,
-                   SourceLocation IvarLBraceLoc=SourceLocation(),
-                   SourceLocation IvarRBraceLoc=SourceLocation());
+                   SourceLocation IvarLBraceLoc = SourceLocation(),
+                   SourceLocation IvarRBraceLoc = SourceLocation());
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
 
   static ObjCCategoryDecl *Create(ASTContext &C, DeclContext *DC,
                                   SourceLocation AtLoc,
@@ -2257,26 +2369,31 @@
     return ReferencedProtocols;
   }
 
-  typedef ObjCProtocolList::iterator protocol_iterator;
-  typedef llvm::iterator_range<protocol_iterator> protocol_range;
+  using protocol_iterator = ObjCProtocolList::iterator;
+  using protocol_range = llvm::iterator_range<protocol_iterator>;
 
   protocol_range protocols() const {
     return protocol_range(protocol_begin(), protocol_end());
   }
+
   protocol_iterator protocol_begin() const {
     return ReferencedProtocols.begin();
   }
+
   protocol_iterator protocol_end() const { return ReferencedProtocols.end(); }
   unsigned protocol_size() const { return ReferencedProtocols.size(); }
-  typedef ObjCProtocolList::loc_iterator protocol_loc_iterator;
-  typedef llvm::iterator_range<protocol_loc_iterator> protocol_loc_range;
+
+  using protocol_loc_iterator = ObjCProtocolList::loc_iterator;
+  using protocol_loc_range = llvm::iterator_range<protocol_loc_iterator>;
 
   protocol_loc_range protocol_locs() const {
     return protocol_loc_range(protocol_loc_begin(), protocol_loc_end());
   }
+
   protocol_loc_iterator protocol_loc_begin() const {
     return ReferencedProtocols.loc_begin();
   }
+
   protocol_loc_iterator protocol_loc_end() const {
     return ReferencedProtocols.loc_end();
   }
@@ -2291,19 +2408,23 @@
 
   bool IsClassExtension() const { return getIdentifier() == nullptr; }
 
-  typedef specific_decl_iterator<ObjCIvarDecl> ivar_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>> ivar_range;
+  using ivar_iterator = specific_decl_iterator<ObjCIvarDecl>;
+  using ivar_range = llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>>;
 
   ivar_range ivars() const { return ivar_range(ivar_begin(), ivar_end()); }
+
   ivar_iterator ivar_begin() const {
     return ivar_iterator(decls_begin());
   }
+
   ivar_iterator ivar_end() const {
     return ivar_iterator(decls_end());
   }
+
   unsigned ivar_size() const {
     return std::distance(ivar_begin(), ivar_end());
   }
+
   bool ivar_empty() const {
     return ivar_begin() == ivar_end();
   }
@@ -2318,24 +2439,21 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCCategory; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 class ObjCImplDecl : public ObjCContainerDecl {
-  void anchor() override;
-
   /// Class interface for this class/category implementation
   ObjCInterfaceDecl *ClassInterface;
 
+  void anchor() override;
+
 protected:
   ObjCImplDecl(Kind DK, DeclContext *DC,
                ObjCInterfaceDecl *classInterface,
                IdentifierInfo *Id,
                SourceLocation nameLoc, SourceLocation atStartLoc)
-    : ObjCContainerDecl(DK, DC, Id, nameLoc, atStartLoc),
-      ClassInterface(classInterface) {}
+      : ObjCContainerDecl(DK, DC, Id, nameLoc, atStartLoc),
+        ClassInterface(classInterface) {}
 
 public:
   const ObjCInterfaceDecl *getClassInterface() const { return ClassInterface; }
@@ -2347,6 +2465,7 @@
     method->setLexicalDeclContext(this);
     addDecl(method);
   }
+
   void addClassMethod(ObjCMethodDecl *method) {
     // FIXME: Context should be set correctly before we get here.
     method->setLexicalDeclContext(this);
@@ -2360,21 +2479,24 @@
   ObjCPropertyImplDecl *FindPropertyImplIvarDecl(IdentifierInfo *ivarId) const;
 
   // Iterator access to properties.
-  typedef specific_decl_iterator<ObjCPropertyImplDecl> propimpl_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCPropertyImplDecl>>
-    propimpl_range;
+  using propimpl_iterator = specific_decl_iterator<ObjCPropertyImplDecl>;
+  using propimpl_range =
+      llvm::iterator_range<specific_decl_iterator<ObjCPropertyImplDecl>>;
 
   propimpl_range property_impls() const {
     return propimpl_range(propimpl_begin(), propimpl_end());
   }
+
   propimpl_iterator propimpl_begin() const {
     return propimpl_iterator(decls_begin());
   }
+
   propimpl_iterator propimpl_end() const {
     return propimpl_iterator(decls_end());
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstObjCImpl && K <= lastObjCImpl;
   }
@@ -2394,8 +2516,6 @@
 ///
 /// ObjCCategoryImplDecl
 class ObjCCategoryImplDecl : public ObjCImplDecl {
-  void anchor() override;
-
   // Category name location
   SourceLocation CategoryNameLoc;
 
@@ -2403,10 +2523,16 @@
                        ObjCInterfaceDecl *classInterface,
                        SourceLocation nameLoc, SourceLocation atStartLoc,
                        SourceLocation CategoryNameLoc)
-    : ObjCImplDecl(ObjCCategoryImpl, DC, classInterface, Id,
-                   nameLoc, atStartLoc),
-      CategoryNameLoc(CategoryNameLoc) {}
+      : ObjCImplDecl(ObjCCategoryImpl, DC, classInterface, Id,
+                     nameLoc, atStartLoc),
+        CategoryNameLoc(CategoryNameLoc) {}
+
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ObjCCategoryImplDecl *Create(ASTContext &C, DeclContext *DC,
                                       IdentifierInfo *Id,
                                       ObjCInterfaceDecl *classInterface,
@@ -2421,9 +2547,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCCategoryImpl;}
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const ObjCCategoryImplDecl &CID);
@@ -2446,7 +2569,6 @@
 /// we allow instance variables to be specified in the implementation. When
 /// specified, they need to be \em identical to the interface.
 class ObjCImplementationDecl : public ObjCImplDecl {
-  void anchor() override;
   /// Implementation Class's super class.
   ObjCInterfaceDecl *SuperClass;
   SourceLocation SuperLoc;
@@ -2458,7 +2580,7 @@
   /// Support for ivar initialization.
   /// \brief The arguments used to initialize the ivars
   LazyCXXCtorInitializersPtr IvarInitializers;
-  unsigned NumIvarInitializers;
+  unsigned NumIvarInitializers = 0;
 
   /// Do the ivars of this class require initialization other than
   /// zero-initialization?
@@ -2474,15 +2596,20 @@
                          SourceLocation superLoc = SourceLocation(),
                          SourceLocation IvarLBraceLoc=SourceLocation(), 
                          SourceLocation IvarRBraceLoc=SourceLocation())
-    : ObjCImplDecl(ObjCImplementation, DC, classInterface,
-                   classInterface ? classInterface->getIdentifier()
-                                  : nullptr,
-                   nameLoc, atStartLoc),
-       SuperClass(superDecl), SuperLoc(superLoc), IvarLBraceLoc(IvarLBraceLoc),
-       IvarRBraceLoc(IvarRBraceLoc),
-       IvarInitializers(nullptr), NumIvarInitializers(0),
-       HasNonZeroConstructors(false), HasDestructors(false) {}
+      : ObjCImplDecl(ObjCImplementation, DC, classInterface,
+                     classInterface ? classInterface->getIdentifier()
+                                    : nullptr,
+                     nameLoc, atStartLoc),
+         SuperClass(superDecl), SuperLoc(superLoc),
+         IvarLBraceLoc(IvarLBraceLoc), IvarRBraceLoc(IvarRBraceLoc),
+         HasNonZeroConstructors(false), HasDestructors(false) {}
+
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ObjCImplementationDecl *Create(ASTContext &C, DeclContext *DC,
                                         ObjCInterfaceDecl *classInterface,
                                         ObjCInterfaceDecl *superDecl,
@@ -2495,15 +2622,16 @@
   static ObjCImplementationDecl *CreateDeserialized(ASTContext &C, unsigned ID);
 
   /// init_iterator - Iterates through the ivar initializer list.
-  typedef CXXCtorInitializer **init_iterator;
+  using init_iterator = CXXCtorInitializer **;
 
   /// init_const_iterator - Iterates through the ivar initializer list.
-  typedef CXXCtorInitializer * const * init_const_iterator;
+  using init_const_iterator = CXXCtorInitializer * const *;
 
-  typedef llvm::iterator_range<init_iterator> init_range;
-  typedef llvm::iterator_range<init_const_iterator> init_const_range;
+  using init_range = llvm::iterator_range<init_iterator>;
+  using init_const_range = llvm::iterator_range<init_const_iterator>;
 
   init_range inits() { return init_range(init_begin(), init_end()); }
+
   init_const_range inits() const {
     return init_const_range(init_begin(), init_end());
   }
@@ -2513,6 +2641,7 @@
     const auto *ConstThis = this;
     return const_cast<init_iterator>(ConstThis->init_begin());
   }
+
   /// begin() - Retrieve an iterator to the first initializer.
   init_const_iterator init_begin() const;
 
@@ -2520,10 +2649,12 @@
   init_iterator       init_end()       {
     return init_begin() + NumIvarInitializers;
   }
+
   /// end() - Retrieve an iterator past the last initializer.
   init_const_iterator init_end() const {
     return init_begin() + NumIvarInitializers;
   }
+
   /// getNumArgs - Number of ivars which must be initialized.
   unsigned getNumIvarInitializers() const {
     return NumIvarInitializers;
@@ -2585,28 +2716,29 @@
   void setIvarRBraceLoc(SourceLocation Loc) { IvarRBraceLoc = Loc; }
   SourceLocation getIvarRBraceLoc() const { return IvarRBraceLoc; }
   
-  typedef specific_decl_iterator<ObjCIvarDecl> ivar_iterator;
-  typedef llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>> ivar_range;
+  using ivar_iterator = specific_decl_iterator<ObjCIvarDecl>;
+  using ivar_range = llvm::iterator_range<specific_decl_iterator<ObjCIvarDecl>>;
 
   ivar_range ivars() const { return ivar_range(ivar_begin(), ivar_end()); }
+
   ivar_iterator ivar_begin() const {
     return ivar_iterator(decls_begin());
   }
+
   ivar_iterator ivar_end() const {
     return ivar_iterator(decls_end());
   }
+
   unsigned ivar_size() const {
     return std::distance(ivar_begin(), ivar_end());
   }
+
   bool ivar_empty() const {
     return ivar_begin() == ivar_end();
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCImplementation; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const ObjCImplementationDecl &ID);
@@ -2614,13 +2746,15 @@
 /// ObjCCompatibleAliasDecl - Represents alias of a class. This alias is
 /// declared as \@compatibility_alias alias class.
 class ObjCCompatibleAliasDecl : public NamedDecl {
-  void anchor() override;
   /// Class that this is an alias of.
   ObjCInterfaceDecl *AliasedClass;
 
   ObjCCompatibleAliasDecl(DeclContext *DC, SourceLocation L, IdentifierInfo *Id,
                           ObjCInterfaceDecl* aliasedClass)
-    : NamedDecl(ObjCCompatibleAlias, DC, L, Id), AliasedClass(aliasedClass) {}
+      : NamedDecl(ObjCCompatibleAlias, DC, L, Id), AliasedClass(aliasedClass) {}
+
+  void anchor() override;
+
 public:
   static ObjCCompatibleAliasDecl *Create(ASTContext &C, DeclContext *DC,
                                          SourceLocation L, IdentifierInfo *Id,
@@ -2635,7 +2769,6 @@
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ObjCCompatibleAlias; }
-
 };
 
 /// ObjCPropertyImplDecl - Represents implementation declaration of a property
@@ -2648,6 +2781,7 @@
     Synthesize,
     Dynamic
   };
+
 private:
   SourceLocation AtLoc;   // location of \@synthesize or \@dynamic
 
@@ -2667,24 +2801,25 @@
 
   /// Null for \@dynamic. Non-null if property must be copy-constructed in
   /// getter.
-  Expr *GetterCXXConstructor;
+  Expr *GetterCXXConstructor = nullptr;
 
   /// Null for \@dynamic. Non-null if property has assignment operator to call
   /// in Setter synthesis.
-  Expr *SetterCXXAssignment;
+  Expr *SetterCXXAssignment = nullptr;
 
   ObjCPropertyImplDecl(DeclContext *DC, SourceLocation atLoc, SourceLocation L,
                        ObjCPropertyDecl *property,
                        Kind PK,
                        ObjCIvarDecl *ivarDecl,
                        SourceLocation ivarLoc)
-    : Decl(ObjCPropertyImpl, DC, L), AtLoc(atLoc),
-      IvarLoc(ivarLoc), PropertyDecl(property), PropertyIvarDecl(ivarDecl),
-      GetterCXXConstructor(nullptr), SetterCXXAssignment(nullptr) {
-    assert (PK == Dynamic || PropertyIvarDecl);
+      : Decl(ObjCPropertyImpl, DC, L), AtLoc(atLoc),
+        IvarLoc(ivarLoc), PropertyDecl(property), PropertyIvarDecl(ivarDecl) {
+    assert(PK == Dynamic || PropertyIvarDecl);
   }
 
 public:
+  friend class ASTDeclReader;
+
   static ObjCPropertyImplDecl *Create(ASTContext &C, DeclContext *DC,
                                       SourceLocation atLoc, SourceLocation L,
                                       ObjCPropertyDecl *property,
@@ -2733,6 +2868,7 @@
   Expr *getGetterCXXConstructor() const {
     return GetterCXXConstructor;
   }
+
   void setGetterCXXConstructor(Expr *getterCXXConstructor) {
     GetterCXXConstructor = getterCXXConstructor;
   }
@@ -2740,14 +2876,13 @@
   Expr *getSetterCXXAssignment() const {
     return SetterCXXAssignment;
   }
+
   void setSetterCXXAssignment(Expr *setterCXXAssignment) {
     SetterCXXAssignment = setterCXXAssignment;
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Decl::Kind K) { return K == ObjCPropertyImpl; }
-
-  friend class ASTDeclReader;
 };
 
 template<bool (*Filter)(ObjCCategoryDecl *)>
@@ -2778,5 +2913,6 @@
   return Cat->IsClassExtension();
 }
 
-}  // end namespace clang
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_AST_DECLOBJC_H
diff --git a/include/clang/AST/DeclOpenMP.h b/include/clang/AST/DeclOpenMP.h
index 2a329c3..76fbe01 100644
--- a/include/clang/AST/DeclOpenMP.h
+++ b/include/clang/AST/DeclOpenMP.h
@@ -190,8 +190,8 @@
 
   OMPCapturedExprDecl(ASTContext &C, DeclContext *DC, IdentifierInfo *Id,
                       QualType Type, SourceLocation StartLoc)
-      : VarDecl(OMPCapturedExpr, C, DC, StartLoc, SourceLocation(), Id, Type,
-                nullptr, SC_None) {
+      : VarDecl(OMPCapturedExpr, C, DC, StartLoc, StartLoc, Id, Type, nullptr,
+                SC_None) {
     setImplicit();
   }
 
diff --git a/include/clang/AST/DeclTemplate.h b/include/clang/AST/DeclTemplate.h
index 2879452..7842d70 100644
--- a/include/clang/AST/DeclTemplate.h
+++ b/include/clang/AST/DeclTemplate.h
@@ -1,4 +1,4 @@
-//===-- DeclTemplate.h - Classes for representing C++ templates -*- C++ -*-===//
+//===- DeclTemplate.h - Classes for representing C++ templates --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,42 +6,60 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the C++ template declaration subclasses.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_DECLTEMPLATE_H
 #define LLVM_CLANG_AST_DECLTEMPLATE_H
 
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Redeclarable.h"
 #include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
 #include <utility>
 
 namespace clang {
 
 enum BuiltinTemplateKind : int;
-class TemplateParameterList;
-class TemplateDecl;
-class RedeclarableTemplateDecl;
-class FunctionTemplateDecl;
 class ClassTemplateDecl;
 class ClassTemplatePartialSpecializationDecl;
-class TemplateTypeParmDecl;
+class Expr;
+class FunctionTemplateDecl;
+class IdentifierInfo;
 class NonTypeTemplateParmDecl;
+class TemplateDecl;
 class TemplateTemplateParmDecl;
-class TypeAliasTemplateDecl;
+class TemplateTypeParmDecl;
+class UnresolvedSetImpl;
 class VarTemplateDecl;
 class VarTemplatePartialSpecializationDecl;
 
 /// \brief Stores a template parameter of any kind.
-typedef llvm::PointerUnion3<TemplateTypeParmDecl*, NonTypeTemplateParmDecl*,
-                            TemplateTemplateParmDecl*> TemplateParameter;
+using TemplateParameter =
+    llvm::PointerUnion3<TemplateTypeParmDecl *, NonTypeTemplateParmDecl *,
+                        TemplateTemplateParmDecl *>;
 
 NamedDecl *getAsNamedDecl(TemplateParameter P);
 
@@ -50,7 +68,6 @@
 class TemplateParameterList final
     : private llvm::TrailingObjects<TemplateParameterList, NamedDecl *,
                                     Expr *> {
-
   /// The location of the 'template' keyword.
   SourceLocation TemplateLoc;
 
@@ -69,6 +86,10 @@
   unsigned HasRequiresClause : 1;
 
 protected:
+  TemplateParameterList(SourceLocation TemplateLoc, SourceLocation LAngleLoc,
+                        ArrayRef<NamedDecl *> Params, SourceLocation RAngleLoc,
+                        Expr *RequiresClause);
+
   size_t numTrailingObjects(OverloadToken<NamedDecl *>) const {
     return NumParams;
   }
@@ -77,11 +98,11 @@
     return HasRequiresClause;
   }
 
-  TemplateParameterList(SourceLocation TemplateLoc, SourceLocation LAngleLoc,
-                        ArrayRef<NamedDecl *> Params, SourceLocation RAngleLoc,
-                        Expr *RequiresClause);
-
 public:
+  template <size_t N, bool HasRequiresClause>
+  friend class FixedSizeTemplateParameterListStorage;
+  friend TrailingObjects;
+
   static TemplateParameterList *Create(const ASTContext &C,
                                        SourceLocation TemplateLoc,
                                        SourceLocation LAngleLoc,
@@ -90,10 +111,10 @@
                                        Expr *RequiresClause);
 
   /// \brief Iterates through the template parameters in this list.
-  typedef NamedDecl** iterator;
+  using iterator = NamedDecl **;
 
   /// \brief Iterates through the template parameters in this list.
-  typedef NamedDecl* const* const_iterator;
+  using const_iterator = NamedDecl * const *;
 
   iterator begin() { return getTrailingObjects<NamedDecl *>(); }
   const_iterator begin() const { return getTrailingObjects<NamedDecl *>(); }
@@ -113,7 +134,6 @@
     assert(Idx < size() && "Template parameter index out-of-range");
     return begin()[Idx];
   }
-
   const NamedDecl* getParam(unsigned Idx) const {
     assert(Idx < size() && "Template parameter index out-of-range");
     return begin()[Idx];
@@ -157,11 +177,6 @@
     return SourceRange(TemplateLoc, RAngleLoc);
   }
 
-  friend TrailingObjects;
-
-  template <size_t N, bool HasRequiresClause>
-  friend class FixedSizeTemplateParameterListStorage;
-
 public:
   // FIXME: workaround for MSVC 2013; remove when no longer needed
   using FixedSizeStorageOwner = TrailingObjects::FixedSizeStorageOwner;
@@ -201,14 +216,16 @@
   /// argument list.
   unsigned NumArguments;
 
-  TemplateArgumentList(const TemplateArgumentList &Other) = delete;
-  void operator=(const TemplateArgumentList &Other) = delete;
-
   // Constructs an instance with an internal Argument list, containing
   // a copy of the Args array. (Called by CreateCopy)
   TemplateArgumentList(ArrayRef<TemplateArgument> Args);
 
 public:
+  friend TrailingObjects;
+
+  TemplateArgumentList(const TemplateArgumentList &) = delete;
+  TemplateArgumentList &operator=(const TemplateArgumentList &) = delete;
+
   /// \brief Type used to indicate that the template argument list itself is a
   /// stack object. It does not own its template arguments.
   enum OnStackType { OnStack };
@@ -254,8 +271,6 @@
 
   /// \brief Retrieve a pointer to the template argument list.
   const TemplateArgument *data() const { return Arguments; }
-
-  friend TrailingObjects;
 };
 
 void *allocateDefaultArgStorageChain(const ASTContext &C);
@@ -297,9 +312,11 @@
 
   /// Determine whether there is a default argument for this parameter.
   bool isSet() const { return !ValueOrInherited.isNull(); }
+
   /// Determine whether the default argument for this parameter was inherited
   /// from a previous declaration of the same entity.
   bool isInherited() const { return ValueOrInherited.template is<ParmDecl*>(); }
+
   /// Get the default argument's value. This does not consider whether the
   /// default argument is visible.
   ArgType get() const {
@@ -310,6 +327,7 @@
       return C->Value;
     return Storage->ValueOrInherited.template get<ArgType>();
   }
+
   /// Get the parameter from which we inherit the default argument, if any.
   /// This is the parameter on which the default argument was actually written.
   const ParmDecl *getInheritedFrom() const {
@@ -319,11 +337,13 @@
       return C->PrevDeclWithDefaultArg;
     return nullptr;
   }
+
   /// Set the default argument.
   void set(ArgType Arg) {
     assert(!isSet() && "default argument already set");
     ValueOrInherited = Arg;
   }
+
   /// Set that the default argument was inherited from another parameter.
   void setInherited(const ASTContext &C, ParmDecl *InheritedFrom) {
     assert(!isInherited() && "default argument already inherited");
@@ -334,6 +354,7 @@
       ValueOrInherited = new (allocateDefaultArgStorageChain(C))
           Chain{InheritedFrom, ValueOrInherited.template get<ArgType>()};
   }
+
   /// Remove the default argument, even if it was inherited.
   void clear() {
     ValueOrInherited = ArgType();
@@ -350,7 +371,7 @@
   friend TemplateDecl;
 
 public:
-  ConstrainedTemplateDeclInfo() : TemplateParams(), AssociatedConstraints() {}
+  ConstrainedTemplateDeclInfo() = default;
 
   TemplateParameterList *getTemplateParameters() const {
     return TemplateParams;
@@ -365,8 +386,8 @@
 
   void setAssociatedConstraints(Expr *AC) { AssociatedConstraints = AC; }
 
-  TemplateParameterList *TemplateParams;
-  Expr *AssociatedConstraints;
+  TemplateParameterList *TemplateParams = nullptr;
+  Expr *AssociatedConstraints = nullptr;
 };
 
 
@@ -377,13 +398,14 @@
 /// reference to the templated scoped declaration: the underlying AST node.
 class TemplateDecl : public NamedDecl {
   void anchor() override;
+
 protected:
   // Construct a template decl with the given name and parameters.
   // Used when there is no templated element (e.g., for tt-params).
   TemplateDecl(ConstrainedTemplateDeclInfo *CTDI, Kind DK, DeclContext *DC,
                SourceLocation L, DeclarationName Name,
                TemplateParameterList *Params)
-      : NamedDecl(DK, DC, L, Name), TemplatedDecl(nullptr, false),
+      : NamedDecl(DK, DC, L, Name), TemplatedDecl(nullptr),
         TemplateParams(CTDI) {
     this->setTemplateParameters(Params);
   }
@@ -396,7 +418,7 @@
   TemplateDecl(ConstrainedTemplateDeclInfo *CTDI, Kind DK, DeclContext *DC,
                SourceLocation L, DeclarationName Name,
                TemplateParameterList *Params, NamedDecl *Decl)
-      : NamedDecl(DK, DC, L, Name), TemplatedDecl(Decl, false),
+      : NamedDecl(DK, DC, L, Name), TemplatedDecl(Decl),
         TemplateParams(CTDI) {
     this->setTemplateParameters(Params);
   }
@@ -428,31 +450,22 @@
   }
 
   /// Get the underlying, templated declaration.
-  NamedDecl *getTemplatedDecl() const { return TemplatedDecl.getPointer(); }
+  NamedDecl *getTemplatedDecl() const { return TemplatedDecl; }
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstTemplate && K <= lastTemplate;
   }
 
   SourceRange getSourceRange() const override LLVM_READONLY {
     return SourceRange(getTemplateParameters()->getTemplateLoc(),
-                       TemplatedDecl.getPointer()->getSourceRange().getEnd());
+                       TemplatedDecl->getSourceRange().getEnd());
   }
 
-  /// Whether this is a (C++ Concepts TS) function or variable concept.
-  bool isConcept() const { return TemplatedDecl.getInt(); }
-  void setConcept() { TemplatedDecl.setInt(true); }
-
 protected:
-  /// \brief The named declaration from which this template was instantiated.
-  /// (or null).
-  ///
-  /// The boolean value will be true to indicate that this template
-  /// (function or variable) is a concept.
-  llvm::PointerIntPair<NamedDecl *, 1, bool> TemplatedDecl;
-
+  NamedDecl *TemplatedDecl;
   /// \brief The template parameter list and optional requires-clause
   /// associated with this declaration; alternatively, a
   /// \c ConstrainedTemplateDeclInfo if the associated constraints of the
@@ -481,9 +494,9 @@
   /// \brief Initialize the underlying templated declaration and
   /// template parameters.
   void init(NamedDecl *templatedDecl, TemplateParameterList* templateParams) {
-    assert(!TemplatedDecl.getPointer() && "TemplatedDecl already set!");
+    assert(!TemplatedDecl && "TemplatedDecl already set!");
     assert(!TemplateParams && "TemplateParams already set!");
-    TemplatedDecl.setPointer(templatedDecl);
+    TemplatedDecl = templatedDecl;
     TemplateParams = templateParams;
   }
 };
@@ -498,11 +511,10 @@
                                      const TemplateArgumentList *TemplateArgs,
                        const ASTTemplateArgumentListInfo *TemplateArgsAsWritten,
                                      SourceLocation POI)
-  : Function(FD),
-    Template(Template, TSK - 1),
-    TemplateArguments(TemplateArgs),
-    TemplateArgumentsAsWritten(TemplateArgsAsWritten),
-    PointOfInstantiation(POI) { }
+      : Function(FD), Template(Template, TSK - 1),
+        TemplateArguments(TemplateArgs),
+        TemplateArgumentsAsWritten(TemplateArgsAsWritten),
+        PointOfInstantiation(POI) {}
 
 public:
   static FunctionTemplateSpecializationInfo *
@@ -604,7 +616,7 @@
   explicit
   MemberSpecializationInfo(NamedDecl *IF, TemplateSpecializationKind TSK,
                            SourceLocation POI = SourceLocation())
-    : MemberAndTSK(IF, TSK - 1), PointOfInstantiation(POI) {
+      : MemberAndTSK(IF, TSK - 1), PointOfInstantiation(POI) {
     assert(TSK != TSK_Undeclared &&
            "Cannot encode undeclared template specializations for members");
   }
@@ -681,6 +693,8 @@
                                  const TemplateArgumentListInfo &TemplateArgs);
 
 public:
+  friend TrailingObjects;
+
   static DependentFunctionTemplateSpecializationInfo *
   Create(ASTContext &Context, const UnresolvedSetImpl &Templates,
          const TemplateArgumentListInfo &TemplateArgs);
@@ -716,32 +730,34 @@
   SourceLocation getRAngleLoc() const {
     return AngleLocs.getEnd();
   }
-
-  friend TrailingObjects;
 };
 
 /// Declaration of a redeclarable template.
 class RedeclarableTemplateDecl : public TemplateDecl, 
                                  public Redeclarable<RedeclarableTemplateDecl> 
 {
-  typedef Redeclarable<RedeclarableTemplateDecl> redeclarable_base;
+  using redeclarable_base = Redeclarable<RedeclarableTemplateDecl>;
+
   RedeclarableTemplateDecl *getNextRedeclarationImpl() override {
     return getNextRedeclaration();
   }
+
   RedeclarableTemplateDecl *getPreviousDeclImpl() override {
     return getPreviousDecl();
   }
+
   RedeclarableTemplateDecl *getMostRecentDeclImpl() override {
     return getMostRecentDecl();
   }
 
 protected:
   template <typename EntryType> struct SpecEntryTraits {
-    typedef EntryType DeclType;
+    using DeclType = EntryType;
 
     static DeclType *getDecl(EntryType *D) {
       return D;
     }
+
     static ArrayRef<TemplateArgument> getTemplateArgs(EntryType *D) {
       return D->getTemplateArgs().asArray();
     }
@@ -756,7 +772,7 @@
             typename std::iterator_traits<typename llvm::FoldingSetVector<
                 EntryType>::iterator>::iterator_category,
             DeclType *, ptrdiff_t, DeclType *, DeclType *> {
-    SpecIterator() {}
+    SpecIterator() = default;
     explicit SpecIterator(
         typename llvm::FoldingSetVector<EntryType>::iterator SetIter)
         : SpecIterator::iterator_adaptor_base(std::move(SetIter)) {}
@@ -764,6 +780,7 @@
     DeclType *operator*() const {
       return SETraits::getDecl(&*this->I)->getMostRecentDecl();
     }
+
     DeclType *operator->() const { return **this; }
   };
 
@@ -773,6 +790,8 @@
     return SpecIterator<EntryType>(isEnd ? Specs.end() : Specs.begin());
   }
 
+  void loadLazySpecializationsImpl() const;
+
   template <class EntryType> typename SpecEntryTraits<EntryType>::DeclType*
   findSpecializationImpl(llvm::FoldingSetVector<EntryType> &Specs,
                          ArrayRef<TemplateArgument> Args, void *&InsertPos);
@@ -782,7 +801,7 @@
                              EntryType *Entry, void *InsertPos);
 
   struct CommonBase {
-    CommonBase() : InstantiatedFromMember(nullptr, false) { }
+    CommonBase() : InstantiatedFromMember(nullptr, false) {}
 
     /// \brief The template from which this was most
     /// directly instantiated (or null).
@@ -791,11 +810,18 @@
     /// was explicitly specialized.
     llvm::PointerIntPair<RedeclarableTemplateDecl*, 1, bool>
       InstantiatedFromMember;
+
+    /// \brief If non-null, points to an array of specializations (including
+    /// partial specializations) known only by their external declaration IDs.
+    ///
+    /// The first value in the array is the number of specializations/partial
+    /// specializations that follow.
+    uint32_t *LazySpecializations = nullptr;
   };
 
   /// \brief Pointer to the common data shared by all declarations of this
   /// template.
-  mutable CommonBase *Common;
+  mutable CommonBase *Common = nullptr;
   
   /// \brief Retrieves the "common" pointer shared by all (re-)declarations of
   /// the same template. Calling this routine may implicitly allocate memory
@@ -809,8 +835,8 @@
                            ASTContext &C, DeclContext *DC, SourceLocation L,
                            DeclarationName Name, TemplateParameterList *Params,
                            NamedDecl *Decl)
-      : TemplateDecl(CTDI, DK, DC, L, Name, Params, Decl), redeclarable_base(C),
-        Common() {}
+      : TemplateDecl(CTDI, DK, DC, L, Name, Params, Decl), redeclarable_base(C)
+        {}
 
   RedeclarableTemplateDecl(Kind DK, ASTContext &C, DeclContext *DC,
                            SourceLocation L, DeclarationName Name,
@@ -818,6 +844,9 @@
       : RedeclarableTemplateDecl(nullptr, DK, C, DC, L, Name, Params, Decl) {}
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class ASTReader;
   template <class decl_type> friend class RedeclarableTemplate;
 
   /// \brief Retrieves the canonical declaration of this template.
@@ -858,7 +887,7 @@
   }
 
   /// \brief Retrieve the member template from which this template was
-  /// instantiated, or NULL if this template was not instantiated from a 
+  /// instantiated, or nullptr if this template was not instantiated from a 
   /// member template.
   ///
   /// A template is instantiated from a member template when the member 
@@ -902,8 +931,9 @@
     getCommonPtr()->InstantiatedFromMember.setPointer(TD);
   }
 
-  typedef redeclarable_base::redecl_range redecl_range;
-  typedef redeclarable_base::redecl_iterator redecl_iterator;
+  using redecl_range = redeclarable_base::redecl_range;
+  using redecl_iterator = redeclarable_base::redecl_iterator;
+
   using redeclarable_base::redecls_begin;
   using redeclarable_base::redecls_end;
   using redeclarable_base::redecls;
@@ -913,22 +943,20 @@
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstRedeclarableTemplate && K <= lastRedeclarableTemplate;
   }
-
-  friend class ASTReader;
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 template <> struct RedeclarableTemplateDecl::
 SpecEntryTraits<FunctionTemplateSpecializationInfo> {
-  typedef FunctionDecl DeclType;
+  using DeclType = FunctionDecl;
 
   static DeclType *getDecl(FunctionTemplateSpecializationInfo *I) {
     return I->Function;
   }
+
   static ArrayRef<TemplateArgument>
   getTemplateArgs(FunctionTemplateSpecializationInfo *I) {
     return I->TemplateArguments->asArray();
@@ -938,11 +966,11 @@
 /// Declaration of a template function.
 class FunctionTemplateDecl : public RedeclarableTemplateDecl {
 protected:
+  friend class FunctionDecl;
+
   /// \brief Data that is common to all of the declarations of a given
   /// function template.
   struct Common : CommonBase {
-    Common() : InjectedArgs(), LazySpecializations() { }
-
     /// \brief The function template specializations for this function
     /// template, including explicit specializations and instantiations.
     llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> Specializations;
@@ -954,14 +982,9 @@
     /// many template arguments as template parameaters) for the function
     /// template, and is allocated lazily, since most function templates do not
     /// require the use of this information.
-    TemplateArgument *InjectedArgs;
+    TemplateArgument *InjectedArgs = nullptr;
 
-    /// \brief If non-null, points to an array of specializations known only
-    /// by their external declaration IDs.
-    ///
-    /// The first value in the array is the number of of specializations
-    /// that follow.
-    uint32_t *LazySpecializations;
+    Common() = default;
   };
 
   FunctionTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
@@ -976,8 +999,6 @@
     return static_cast<Common *>(RedeclarableTemplateDecl::getCommonPtr());
   }
 
-  friend class FunctionDecl;
-
   /// \brief Retrieve the set of function template specializations of this
   /// function template.
   llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> &
@@ -991,12 +1012,15 @@
                          void *InsertPos);
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// \brief Load any lazily-loaded specializations from the external source.
   void LoadLazySpecializations() const;
 
   /// Get the underlying function declaration of the template.
   FunctionDecl *getTemplatedDecl() const {
-    return static_cast<FunctionDecl *>(TemplatedDecl.getPointer());
+    return static_cast<FunctionDecl *>(TemplatedDecl);
   }
 
   /// Returns whether this template declaration defines the primary
@@ -1020,14 +1044,11 @@
   }
 
   /// \brief Retrieve the previous declaration of this function template, or
-  /// NULL if no such declaration exists.
+  /// nullptr if no such declaration exists.
   FunctionTemplateDecl *getPreviousDecl() {
     return cast_or_null<FunctionTemplateDecl>(
              static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
   }
-
-  /// \brief Retrieve the previous declaration of this function template, or
-  /// NULL if no such declaration exists.
   const FunctionTemplateDecl *getPreviousDecl() const {
     return cast_or_null<FunctionTemplateDecl>(
        static_cast<const RedeclarableTemplateDecl *>(this)->getPreviousDecl());
@@ -1047,12 +1068,13 @@
              RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
 
-  typedef SpecIterator<FunctionTemplateSpecializationInfo> spec_iterator;
-  typedef llvm::iterator_range<spec_iterator> spec_range;
+  using spec_iterator = SpecIterator<FunctionTemplateSpecializationInfo>;
+  using spec_range = llvm::iterator_range<spec_iterator>;
 
   spec_range specializations() const {
     return spec_range(spec_begin(), spec_end());
   }
+
   spec_iterator spec_begin() const {
     return makeSpecIterator(getSpecializations(), false);
   }
@@ -1083,9 +1105,6 @@
   // Implement isa/cast/dyncast support
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == FunctionTemplate; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 //===----------------------------------------------------------------------===//
@@ -1102,19 +1121,17 @@
 /// This class is inheritedly privately by different kinds of template
 /// parameters and is not part of the Decl hierarchy. Just a facility.
 class TemplateParmPosition {
-  TemplateParmPosition() = delete;
-
 protected:
-  TemplateParmPosition(unsigned D, unsigned P)
-    : Depth(D), Position(P)
-  { }
-
   // FIXME: These probably don't need to be ints. int:5 for depth, int:8 for
   // position? Maybe?
   unsigned Depth;
   unsigned Position;
 
+  TemplateParmPosition(unsigned D, unsigned P) : Depth(D), Position(P) {}
+
 public:
+  TemplateParmPosition() = delete;
+
   /// Get the nesting depth of the template parameter.
   unsigned getDepth() const { return Depth; }
   void setDepth(unsigned D) { Depth = D; }
@@ -1134,6 +1151,9 @@
 /// template<typename T> class vector;
 /// \endcode
 class TemplateTypeParmDecl : public TypeDecl {
+  /// Sema creates these on the stack during auto type deduction.
+  friend class Sema;
+
   /// \brief Whether this template type parameter was declaration with
   /// the 'typename' keyword.
   ///
@@ -1141,18 +1161,14 @@
   bool Typename : 1;
 
   /// \brief The default template argument, if any.
-  typedef DefaultArgStorage<TemplateTypeParmDecl, TypeSourceInfo *>
-      DefArgStorage;
+  using DefArgStorage =
+      DefaultArgStorage<TemplateTypeParmDecl, TypeSourceInfo *>;
   DefArgStorage DefaultArgument;
 
   TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc,
                        SourceLocation IdLoc, IdentifierInfo *Id,
                        bool Typename)
-    : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename),
-      DefaultArgument() { }
-
-  /// Sema creates these on the stack during auto type deduction.
-  friend class Sema;
+      : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename) {}
 
 public:
   static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC,
@@ -1199,6 +1215,7 @@
   void setDefaultArgument(TypeSourceInfo *DefArg) {
     DefaultArgument.set(DefArg);
   }
+
   /// \brief Set that this default argument was inherited from another
   /// parameter.
   void setInheritedDefaultArgument(const ASTContext &C,
@@ -1241,9 +1258,12 @@
       protected TemplateParmPosition,
       private llvm::TrailingObjects<NonTypeTemplateParmDecl,
                                     std::pair<QualType, TypeSourceInfo *>> {
+  friend class ASTDeclReader;
+  friend TrailingObjects;
+
   /// \brief The default template argument, if any, and whether or not
   /// it was inherited.
-  typedef DefaultArgStorage<NonTypeTemplateParmDecl, Expr*> DefArgStorage;
+  using DefArgStorage = DefaultArgStorage<NonTypeTemplateParmDecl, Expr *>;
   DefArgStorage DefaultArgument;
 
   // FIXME: Collapse this into TemplateParamPosition; or, just move depth/index
@@ -1255,10 +1275,10 @@
   /// \brief Whether this non-type template parameter is an "expanded"
   /// parameter pack, meaning that its type is a pack expansion and we
   /// already know the set of types that expansion expands to.
-  bool ExpandedParameterPack;
+  bool ExpandedParameterPack = false;
 
   /// \brief The number of types in an expanded parameter pack.
-  unsigned NumExpandedTypes;
+  unsigned NumExpandedTypes = 0;
 
   size_t numTrailingObjects(
       OverloadToken<std::pair<QualType, TypeSourceInfo *>>) const {
@@ -1269,10 +1289,8 @@
                           SourceLocation IdLoc, unsigned D, unsigned P,
                           IdentifierInfo *Id, QualType T,
                           bool ParameterPack, TypeSourceInfo *TInfo)
-    : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
-      TemplateParmPosition(D, P), ParameterPack(ParameterPack),
-      ExpandedParameterPack(false), NumExpandedTypes(0)
-  { }
+      : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
+        TemplateParmPosition(D, P), ParameterPack(ParameterPack) {}
 
   NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc,
                           SourceLocation IdLoc, unsigned D, unsigned P,
@@ -1281,9 +1299,6 @@
                           ArrayRef<QualType> ExpandedTypes,
                           ArrayRef<TypeSourceInfo *> ExpandedTInfos);
 
-  friend class ASTDeclReader;
-  friend TrailingObjects;
-
 public:
   static NonTypeTemplateParmDecl *
   Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
@@ -1428,11 +1443,9 @@
       protected TemplateParmPosition,
       private llvm::TrailingObjects<TemplateTemplateParmDecl,
                                     TemplateParameterList *> {
-  void anchor() override;
-
   /// \brief The default template argument, if any.
-  typedef DefaultArgStorage<TemplateTemplateParmDecl, TemplateArgumentLoc *>
-      DefArgStorage;
+  using DefArgStorage =
+      DefaultArgStorage<TemplateTemplateParmDecl, TemplateArgumentLoc *>;
   DefArgStorage DefaultArgument;
 
   /// \brief Whether this parameter is a parameter pack.
@@ -1441,25 +1454,29 @@
   /// \brief Whether this template template parameter is an "expanded"
   /// parameter pack, meaning that it is a pack expansion and we
   /// already know the set of template parameters that expansion expands to.
-  bool ExpandedParameterPack;
+  bool ExpandedParameterPack = false;
 
   /// \brief The number of parameters in an expanded parameter pack.
-  unsigned NumExpandedParams;
+  unsigned NumExpandedParams = 0;
 
   TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L,
                            unsigned D, unsigned P, bool ParameterPack,
                            IdentifierInfo *Id, TemplateParameterList *Params)
-    : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
-      TemplateParmPosition(D, P), ParameterPack(ParameterPack),
-      ExpandedParameterPack(false), NumExpandedParams(0)
-    { }
+      : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
+        TemplateParmPosition(D, P), ParameterPack(ParameterPack) {}
 
   TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L,
                            unsigned D, unsigned P,
                            IdentifierInfo *Id, TemplateParameterList *Params,
                            ArrayRef<TemplateParameterList *> Expansions);
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend TrailingObjects;
+
   static TemplateTemplateParmDecl *Create(const ASTContext &C, DeclContext *DC,
                                           SourceLocation L, unsigned D,
                                           unsigned P, bool ParameterPack,
@@ -1579,22 +1596,18 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == TemplateTemplateParm; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
-  friend TrailingObjects;
 };
 
 /// \brief Represents the builtin template declaration which is used to
 /// implement __make_integer_seq and other builtin templates.  It serves
 /// no real purpose beyond existing as a place to hold template parameters.
 class BuiltinTemplateDecl : public TemplateDecl {
-  void anchor() override;
+  BuiltinTemplateKind BTK;
 
   BuiltinTemplateDecl(const ASTContext &C, DeclContext *DC,
                       DeclarationName Name, BuiltinTemplateKind BTK);
 
-  BuiltinTemplateKind BTK;
+  void anchor() override;
 
 public:
   // Implement isa/cast/dyncast support
@@ -1629,7 +1642,6 @@
 /// \endcode
 class ClassTemplateSpecializationDecl
   : public CXXRecordDecl, public llvm::FoldingSetNode {
-
   /// \brief Structure that stores information about a class template
   /// specialization that was instantiated from a class template partial
   /// specialization.
@@ -1650,19 +1662,20 @@
   /// \brief Further info for explicit template specialization/instantiation.
   struct ExplicitSpecializationInfo {
     /// \brief The type-as-written.
-    TypeSourceInfo *TypeAsWritten;
+    TypeSourceInfo *TypeAsWritten = nullptr;
+
     /// \brief The location of the extern keyword.
     SourceLocation ExternLoc;
+
     /// \brief The location of the template keyword.
     SourceLocation TemplateKeywordLoc;
 
-    ExplicitSpecializationInfo()
-      : TypeAsWritten(nullptr), ExternLoc(), TemplateKeywordLoc() {}
+    ExplicitSpecializationInfo() = default;
   };
 
   /// \brief Further info for explicit template specialization/instantiation.
   /// Does not apply to implicit specializations.
-  ExplicitSpecializationInfo *ExplicitInfo;
+  ExplicitSpecializationInfo *ExplicitInfo = nullptr;
 
   /// \brief The template arguments used to describe this specialization.
   const TemplateArgumentList *TemplateArgs;
@@ -1685,6 +1698,9 @@
   explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ClassTemplateSpecializationDecl *
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
          SourceLocation StartLoc, SourceLocation IdLoc,
@@ -1828,6 +1844,7 @@
       ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
     ExplicitInfo->TypeAsWritten = T;
   }
+
   /// \brief Gets the type of this specialization as it was written by
   /// the user, if it was so written.
   TypeSourceInfo *getTypeAsWritten() const {
@@ -1838,6 +1855,7 @@
   SourceLocation getExternLoc() const {
     return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
   }
+
   /// \brief Sets the location of the extern keyword.
   void setExternLoc(SourceLocation Loc) {
     if (!ExplicitInfo)
@@ -1851,6 +1869,7 @@
       ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
     ExplicitInfo->TemplateKeywordLoc = Loc;
   }
+
   /// \brief Gets the location of the template keyword, if present.
   SourceLocation getTemplateKeywordLoc() const {
     return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
@@ -1871,25 +1890,21 @@
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstClassTemplateSpecialization &&
            K <= lastClassTemplateSpecialization;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 class ClassTemplatePartialSpecializationDecl
   : public ClassTemplateSpecializationDecl {
-  void anchor() override;
-
   /// \brief The list of template parameters
-  TemplateParameterList* TemplateParams;
+  TemplateParameterList* TemplateParams = nullptr;
 
   /// \brief The source info for the template arguments as written.
   /// FIXME: redundant with TypeAsWritten?
-  const ASTTemplateArgumentListInfo *ArgsAsWritten;
+  const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
 
   /// \brief The class template partial specialization from which this
   /// class template partial specialization was instantiated.
@@ -1911,10 +1926,14 @@
 
   ClassTemplatePartialSpecializationDecl(ASTContext &C)
     : ClassTemplateSpecializationDecl(C, ClassTemplatePartialSpecialization),
-      TemplateParams(nullptr), ArgsAsWritten(nullptr),
       InstantiatedFromMember(nullptr, false) {}
 
+  void anchor() override;
+
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static ClassTemplatePartialSpecializationDecl *
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
          SourceLocation StartLoc, SourceLocation IdLoc,
@@ -2024,12 +2043,10 @@
   // FIXME: Add Profile support!
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K == ClassTemplatePartialSpecialization;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Declaration of a class template.
@@ -2038,8 +2055,6 @@
   /// \brief Data that is common to all of the declarations of a given
   /// class template.
   struct Common : CommonBase {
-    Common() : LazySpecializations() { }
-
     /// \brief The class template specializations for this class
     /// template, including explicit specializations and instantiations.
     llvm::FoldingSetVector<ClassTemplateSpecializationDecl> Specializations;
@@ -2052,12 +2067,7 @@
     /// \brief The injected-class-name type for this class template.
     QualType InjectedClassNameType;
 
-    /// \brief If non-null, points to an array of specializations (including
-    /// partial specializations) known only by their external declaration IDs.
-    ///
-    /// The first value in the array is the number of of specializations/
-    /// partial specializations that follow.
-    uint32_t *LazySpecializations;
+    Common() = default;
   };
 
   /// \brief Retrieve the set of specializations of this class template.
@@ -2087,12 +2097,15 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// \brief Load any lazily-loaded specializations from the external source.
   void LoadLazySpecializations() const;
 
   /// \brief Get the underlying class declarations of the template.
   CXXRecordDecl *getTemplatedDecl() const {
-    return static_cast<CXXRecordDecl *>(TemplatedDecl.getPointer());
+    return static_cast<CXXRecordDecl *>(TemplatedDecl);
   }
 
   /// \brief Returns whether this template declaration defines the primary
@@ -2132,14 +2145,11 @@
   }
 
   /// \brief Retrieve the previous declaration of this class template, or
-  /// NULL if no such declaration exists.
+  /// nullptr if no such declaration exists.
   ClassTemplateDecl *getPreviousDecl() {
     return cast_or_null<ClassTemplateDecl>(
              static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
   }
-
-  /// \brief Retrieve the previous declaration of this class template, or
-  /// NULL if no such declaration exists.
   const ClassTemplateDecl *getPreviousDecl() const {
     return cast_or_null<ClassTemplateDecl>(
              static_cast<const RedeclarableTemplateDecl *>(
@@ -2180,7 +2190,7 @@
   /// template.
   ///
   /// \returns the class template partial specialization that exactly matches
-  /// the type \p T, or NULL if no such partial specialization exists.
+  /// the type \p T, or nullptr if no such partial specialization exists.
   ClassTemplatePartialSpecializationDecl *findPartialSpecialization(QualType T);
 
   /// \brief Find a class template partial specialization which was instantiated
@@ -2189,8 +2199,8 @@
   /// \param D a member class template partial specialization.
   ///
   /// \returns the class template partial specialization which was instantiated
-  /// from the given member partial specialization, or NULL if no such partial
-  /// specialization exists.
+  /// from the given member partial specialization, or nullptr if no such
+  /// partial specialization exists.
   ClassTemplatePartialSpecializationDecl *
   findPartialSpecInstantiatedFromMember(
                                      ClassTemplatePartialSpecializationDecl *D);
@@ -2211,8 +2221,8 @@
   /// \endcode
   QualType getInjectedClassNameSpecialization();
 
-  typedef SpecIterator<ClassTemplateSpecializationDecl> spec_iterator;
-  typedef llvm::iterator_range<spec_iterator> spec_range;
+  using spec_iterator = SpecIterator<ClassTemplateSpecializationDecl>;
+  using spec_range = llvm::iterator_range<spec_iterator>;
 
   spec_range specializations() const {
     return spec_range(spec_begin(), spec_end());
@@ -2229,9 +2239,6 @@
   // Implement isa/cast/dyncast support
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == ClassTemplate; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Declaration of a friend template.
@@ -2249,15 +2256,16 @@
 /// will yield a FriendDecl, not a FriendTemplateDecl.
 class FriendTemplateDecl : public Decl {
   virtual void anchor();
+
 public:
-  typedef llvm::PointerUnion<NamedDecl*,TypeSourceInfo*> FriendUnion;
+  using FriendUnion = llvm::PointerUnion<NamedDecl *,TypeSourceInfo *>;
 
 private:
   // The number of template parameters;  always non-zero.
-  unsigned NumParams;
+  unsigned NumParams = 0;
 
   // The parameter list.
-  TemplateParameterList **Params;
+  TemplateParameterList **Params = nullptr;
 
   // The declaration that's a friend of this class.
   FriendUnion Friend;
@@ -2271,13 +2279,11 @@
       : Decl(Decl::FriendTemplate, DC, Loc), NumParams(Params.size()),
         Params(Params.data()), Friend(Friend), FriendLoc(FriendLoc) {}
 
-  FriendTemplateDecl(EmptyShell Empty)
-    : Decl(Decl::FriendTemplate, Empty),
-      NumParams(0),
-      Params(nullptr)
-  {}
+  FriendTemplateDecl(EmptyShell Empty) : Decl(Decl::FriendTemplate, Empty) {}
 
 public:
+  friend class ASTDeclReader;
+
   static FriendTemplateDecl *
   Create(ASTContext &Context, DeclContext *DC, SourceLocation Loc,
          MutableArrayRef<TemplateParameterList *> Params, FriendUnion Friend,
@@ -2316,8 +2322,6 @@
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decl::FriendTemplate; }
-
-  friend class ASTDeclReader;
 };
 
 /// \brief Declaration of an alias template.
@@ -2328,7 +2332,7 @@
 /// \endcode
 class TypeAliasTemplateDecl : public RedeclarableTemplateDecl {
 protected:
-  typedef CommonBase Common;
+  using Common = CommonBase;
 
   TypeAliasTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
                         DeclarationName Name, TemplateParameterList *Params,
@@ -2343,9 +2347,12 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// Get the underlying function declaration of the template.
   TypeAliasDecl *getTemplatedDecl() const {
-    return static_cast<TypeAliasDecl *>(TemplatedDecl.getPointer());
+    return static_cast<TypeAliasDecl *>(TemplatedDecl);
   }
 
 
@@ -2359,14 +2366,11 @@
   }
 
   /// \brief Retrieve the previous declaration of this function template, or
-  /// NULL if no such declaration exists.
+  /// nullptr if no such declaration exists.
   TypeAliasTemplateDecl *getPreviousDecl() {
     return cast_or_null<TypeAliasTemplateDecl>(
              static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
   }
-
-  /// \brief Retrieve the previous declaration of this function template, or
-  /// NULL if no such declaration exists.
   const TypeAliasTemplateDecl *getPreviousDecl() const {
     return cast_or_null<TypeAliasTemplateDecl>(
              static_cast<const RedeclarableTemplateDecl *>(
@@ -2378,7 +2382,6 @@
              RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
 
-
   /// \brief Create a function template node.
   static TypeAliasTemplateDecl *Create(ASTContext &C, DeclContext *DC,
                                        SourceLocation L,
@@ -2392,9 +2395,6 @@
   // Implement isa/cast/dyncast support
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == TypeAliasTemplate; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Declaration of a function specialization at template class scope.
@@ -2414,7 +2414,9 @@
 /// CXXMethodDecl. Then during an instantiation of class A, it will be
 /// transformed into an actual function specialization.
 class ClassScopeFunctionSpecializationDecl : public Decl {
-  virtual void anchor();
+  CXXMethodDecl *Specialization;
+  bool HasExplicitTemplateArgs;
+  TemplateArgumentListInfo TemplateArgs;
 
   ClassScopeFunctionSpecializationDecl(DeclContext *DC, SourceLocation Loc,
                                        CXXMethodDecl *FD, bool Args,
@@ -2424,13 +2426,14 @@
         TemplateArgs(std::move(TemplArgs)) {}
 
   ClassScopeFunctionSpecializationDecl(EmptyShell Empty)
-    : Decl(Decl::ClassScopeFunctionSpecialization, Empty) {}
+      : Decl(Decl::ClassScopeFunctionSpecialization, Empty) {}
 
-  CXXMethodDecl *Specialization;
-  bool HasExplicitTemplateArgs;
-  TemplateArgumentListInfo TemplateArgs;
+  virtual void anchor();
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   CXXMethodDecl *getSpecialization() const { return Specialization; }
   bool hasExplicitTemplateArgs() const { return HasExplicitTemplateArgs; }
   const TemplateArgumentListInfo& templateArgs() const { return TemplateArgs; }
@@ -2450,17 +2453,15 @@
   
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K == Decl::ClassScopeFunctionSpecialization;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Implementation of inline functions that require the template declarations
 inline AnyFunctionDecl::AnyFunctionDecl(FunctionTemplateDecl *FTD)
-  : Function(FTD) { }
+    : Function(FTD) {}
 
 /// \brief Represents a variable template specialization, which refers to
 /// a variable template with a given set of template arguments.
@@ -2498,19 +2499,20 @@
   /// \brief Further info for explicit template specialization/instantiation.
   struct ExplicitSpecializationInfo {
     /// \brief The type-as-written.
-    TypeSourceInfo *TypeAsWritten;
+    TypeSourceInfo *TypeAsWritten = nullptr;
+
     /// \brief The location of the extern keyword.
     SourceLocation ExternLoc;
+
     /// \brief The location of the template keyword.
     SourceLocation TemplateKeywordLoc;
 
-    ExplicitSpecializationInfo()
-        : TypeAsWritten(nullptr), ExternLoc(), TemplateKeywordLoc() {}
+    ExplicitSpecializationInfo() = default;
   };
 
   /// \brief Further info for explicit template specialization/instantiation.
   /// Does not apply to implicit specializations.
-  ExplicitSpecializationInfo *ExplicitInfo;
+  ExplicitSpecializationInfo *ExplicitInfo = nullptr;
 
   /// \brief The template arguments used to describe this specialization.
   const TemplateArgumentList *TemplateArgs;
@@ -2523,6 +2525,12 @@
   /// Really a value of type TemplateSpecializationKind.
   unsigned SpecializationKind : 3;
 
+  /// \brief Whether this declaration is a complete definition of the
+  /// variable template specialization. We can't otherwise tell apart
+  /// an instantiated declaration from an instantiated definition with
+  /// no initializer.
+  unsigned IsCompleteDefinition : 1;
+
 protected:
   VarTemplateSpecializationDecl(Kind DK, ASTContext &Context, DeclContext *DC,
                                 SourceLocation StartLoc, SourceLocation IdLoc,
@@ -2534,6 +2542,10 @@
   explicit VarTemplateSpecializationDecl(Kind DK, ASTContext &Context);
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+  friend class VarDecl;
+
   static VarTemplateSpecializationDecl *
   Create(ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
          SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
@@ -2596,6 +2608,8 @@
     PointOfInstantiation = Loc;
   }
 
+  void setCompleteDefinition() { IsCompleteDefinition = true; }
+
   /// \brief If this variable template specialization is an instantiation of
   /// a template (rather than an explicit specialization), return the
   /// variable template or variable template partial specialization from which
@@ -2668,6 +2682,7 @@
       ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
     ExplicitInfo->TypeAsWritten = T;
   }
+
   /// \brief Gets the type of this specialization as it was written by
   /// the user, if it was so written.
   TypeSourceInfo *getTypeAsWritten() const {
@@ -2678,6 +2693,7 @@
   SourceLocation getExternLoc() const {
     return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
   }
+
   /// \brief Sets the location of the extern keyword.
   void setExternLoc(SourceLocation Loc) {
     if (!ExplicitInfo)
@@ -2691,6 +2707,7 @@
       ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
     ExplicitInfo->TemplateKeywordLoc = Loc;
   }
+
   /// \brief Gets the location of the template keyword, if present.
   SourceLocation getTemplateKeywordLoc() const {
     return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
@@ -2709,25 +2726,21 @@
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K >= firstVarTemplateSpecialization &&
            K <= lastVarTemplateSpecialization;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 class VarTemplatePartialSpecializationDecl
     : public VarTemplateSpecializationDecl {
-  void anchor() override;
-
   /// \brief The list of template parameters
-  TemplateParameterList *TemplateParams;
+  TemplateParameterList *TemplateParams = nullptr;
 
   /// \brief The source info for the template arguments as written.
   /// FIXME: redundant with TypeAsWritten?
-  const ASTTemplateArgumentListInfo *ArgsAsWritten;
+  const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
 
   /// \brief The variable template partial specialization from which this
   /// variable template partial specialization was instantiated.
@@ -2745,11 +2758,16 @@
       const ASTTemplateArgumentListInfo *ArgInfos);
 
   VarTemplatePartialSpecializationDecl(ASTContext &Context)
-    : VarTemplateSpecializationDecl(VarTemplatePartialSpecialization, Context),
-      TemplateParams(nullptr), ArgsAsWritten(nullptr),
-      InstantiatedFromMember(nullptr, false) {}
+      : VarTemplateSpecializationDecl(VarTemplatePartialSpecialization,
+                                      Context),
+        InstantiatedFromMember(nullptr, false) {}
+
+  void anchor() override;
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   static VarTemplatePartialSpecializationDecl *
   Create(ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
          SourceLocation IdLoc, TemplateParameterList *Params,
@@ -2841,12 +2859,10 @@
   }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+
   static bool classofKind(Kind K) {
     return K == VarTemplatePartialSpecialization;
   }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// Declaration of a variable template.
@@ -2855,8 +2871,6 @@
   /// \brief Data that is common to all of the declarations of a given
   /// variable template.
   struct Common : CommonBase {
-    Common() : LazySpecializations() {}
-
     /// \brief The variable template specializations for this variable
     /// template, including explicit specializations and instantiations.
     llvm::FoldingSetVector<VarTemplateSpecializationDecl> Specializations;
@@ -2866,12 +2880,7 @@
     llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl>
     PartialSpecializations;
 
-    /// \brief If non-null, points to an array of specializations (including
-    /// partial specializations) known ownly by their external declaration IDs.
-    ///
-    /// The first value in the array is the number of of specializations/
-    /// partial specializations that follow.
-    uint32_t *LazySpecializations;
+    Common() = default;
   };
 
   /// \brief Retrieve the set of specializations of this variable template.
@@ -2895,12 +2904,15 @@
   }
 
 public:
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
   /// \brief Load any lazily-loaded specializations from the external source.
   void LoadLazySpecializations() const;
 
   /// \brief Get the underlying variable declarations of the template.
   VarDecl *getTemplatedDecl() const {
-    return static_cast<VarDecl *>(TemplatedDecl.getPointer());
+    return static_cast<VarDecl *>(TemplatedDecl);
   }
 
   /// \brief Returns whether this template declaration defines the primary
@@ -2937,14 +2949,11 @@
   }
 
   /// \brief Retrieve the previous declaration of this variable template, or
-  /// NULL if no such declaration exists.
+  /// nullptr if no such declaration exists.
   VarTemplateDecl *getPreviousDecl() {
     return cast_or_null<VarTemplateDecl>(
         static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
   }
-
-  /// \brief Retrieve the previous declaration of this variable template, or
-  /// NULL if no such declaration exists.
   const VarTemplateDecl *getPreviousDecl() const {
     return cast_or_null<VarTemplateDecl>(
             static_cast<const RedeclarableTemplateDecl *>(
@@ -2986,13 +2995,13 @@
   ///
   /// \returns the variable template partial specialization which was
   /// instantiated
-  /// from the given member partial specialization, or NULL if no such partial
-  /// specialization exists.
+  /// from the given member partial specialization, or nullptr if no such
+  /// partial specialization exists.
   VarTemplatePartialSpecializationDecl *findPartialSpecInstantiatedFromMember(
       VarTemplatePartialSpecializationDecl *D);
 
-  typedef SpecIterator<VarTemplateSpecializationDecl> spec_iterator;
-  typedef llvm::iterator_range<spec_iterator> spec_range;
+  using spec_iterator = SpecIterator<VarTemplateSpecializationDecl>;
+  using spec_range = llvm::iterator_range<spec_iterator>;
 
   spec_range specializations() const {
     return spec_range(spec_begin(), spec_end());
@@ -3009,9 +3018,6 @@
   // Implement isa/cast/dyncast support
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == VarTemplate; }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 inline NamedDecl *getAsNamedDecl(TemplateParameter P) {
@@ -3032,6 +3038,6 @@
              : nullptr;
 }
 
-} /* end of namespace clang */
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DECLTEMPLATE_H
diff --git a/include/clang/AST/DeclVisitor.h b/include/clang/AST/DeclVisitor.h
index 4eaae35..3ff274b 100644
--- a/include/clang/AST/DeclVisitor.h
+++ b/include/clang/AST/DeclVisitor.h
@@ -1,4 +1,4 @@
-//===--- DeclVisitor.h - Visitor for Decl subclasses ------------*- C++ -*-===//
+//===- DeclVisitor.h - Visitor for Decl subclasses --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,27 +10,30 @@
 //  This file defines the DeclVisitor interface.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_AST_DECLVISITOR_H
 #define LLVM_CLANG_AST_DECLVISITOR_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace clang {
+
 namespace declvisitor {
 
-template <typename T> struct make_ptr       { typedef       T *type; };
-template <typename T> struct make_const_ptr { typedef const T *type; };
+template <typename T> struct make_ptr { using type = T *; };
+template <typename T> struct make_const_ptr { using type = const T *; };
 
 /// \brief A simple visitor class that helps create declaration visitors.
 template<template <typename> class Ptr, typename ImplClass, typename RetTy=void>
 class Base {
 public:
-
 #define PTR(CLASS) typename Ptr<CLASS>::type
 #define DISPATCH(NAME, CLASS) \
   return static_cast<ImplClass*>(this)->Visit##NAME(static_cast<PTR(CLASS)>(D))
@@ -57,23 +60,23 @@
 #undef DISPATCH
 };
 
-} // end namespace declvisitor
+} // namespace declvisitor
 
 /// \brief A simple visitor class that helps create declaration visitors.
 ///
 /// This class does not preserve constness of Decl pointers (see also
 /// ConstDeclVisitor).
-template<typename ImplClass, typename RetTy=void>
+template<typename ImplClass, typename RetTy = void>
 class DeclVisitor
  : public declvisitor::Base<declvisitor::make_ptr, ImplClass, RetTy> {};
 
 /// \brief A simple visitor class that helps create declaration visitors.
 ///
 /// This class preserves constness of Decl pointers (see also DeclVisitor).
-template<typename ImplClass, typename RetTy=void>
+template<typename ImplClass, typename RetTy = void>
 class ConstDeclVisitor
  : public declvisitor::Base<declvisitor::make_const_ptr, ImplClass, RetTy> {};
 
-}  // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_AST_DECLVISITOR_H
diff --git a/include/clang/AST/DeclarationName.h b/include/clang/AST/DeclarationName.h
index 5e773c9..467b02c 100644
--- a/include/clang/AST/DeclarationName.h
+++ b/include/clang/AST/DeclarationName.h
@@ -1,4 +1,4 @@
-//===-- DeclarationName.h - Representation of declaration names -*- C++ -*-===//
+//===- DeclarationName.h - Representation of declaration names --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,36 +10,42 @@
 // This file declares the DeclarationName and DeclarationNameTable classes.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_AST_DECLARATIONNAME_H
 #define LLVM_CLANG_AST_DECLARATIONNAME_H
 
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/Compiler.h"
-
-namespace llvm {
-  template <typename T> struct DenseMapInfo;
-}
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <string>
 
 namespace clang {
-  class ASTContext;
-  class CXXDeductionGuideNameExtra;
-  class CXXLiteralOperatorIdName;
-  class CXXOperatorIdName;
-  class CXXSpecialName;
-  class DeclarationNameExtra;
-  class IdentifierInfo;
-  class MultiKeywordSelector;
-  enum OverloadedOperatorKind : int;
-  struct PrintingPolicy;
-  class QualType;
-  class TemplateDecl;
-  class Type;
-  class TypeSourceInfo;
-  class UsingDirectiveDecl;
 
-  template <typename> class CanQual;
-  typedef CanQual<Type> CanQualType;
+class ASTContext;
+template <typename> class CanQual;
+class CXXDeductionGuideNameExtra;
+class CXXLiteralOperatorIdName;
+class CXXOperatorIdName;
+class CXXSpecialName;
+class DeclarationNameExtra;
+class IdentifierInfo;
+class MultiKeywordSelector;
+enum OverloadedOperatorKind : int;
+struct PrintingPolicy;
+class QualType;
+class TemplateDecl;
+class Type;
+class TypeSourceInfo;
+class UsingDirectiveDecl;
+
+using CanQualType = CanQual<Type>;
 
 /// DeclarationName - The name of a declaration. In the common case,
 /// this just stores an IdentifierInfo pointer to a normal
@@ -63,9 +69,13 @@
     CXXLiteralOperatorName,
     CXXUsingDirective
   };
+
   static const unsigned NumNameKinds = CXXUsingDirective + 1;
 
 private:
+  friend class DeclarationNameTable;
+  friend class NamedDecl;
+
   /// StoredNameKind - The kind of name that is actually stored in the
   /// upper bits of the Ptr field. This is only used internally.
   ///
@@ -99,7 +109,18 @@
   ///   DeclarationNameExtra structure, whose first value will tell us
   ///   whether this is an Objective-C selector, C++ operator-id name,
   ///   or special C++ name.
-  uintptr_t Ptr;
+  uintptr_t Ptr = 0;
+
+  // Construct a declaration name from the name of a C++ constructor,
+  // destructor, or conversion function.
+  DeclarationName(DeclarationNameExtra *Name)
+      : Ptr(reinterpret_cast<uintptr_t>(Name)) {
+    assert((Ptr & PtrMask) == 0 && "Improperly aligned DeclarationNameExtra");
+    Ptr |= StoredDeclarationNameExtra;
+  }
+
+  /// Construct a declaration name from a raw pointer.
+  DeclarationName(uintptr_t Ptr) : Ptr(Ptr) {}
 
   /// getStoredNameKind - Return the kind of object that is stored in
   /// Ptr.
@@ -146,36 +167,22 @@
     return nullptr;
   }
 
-  // Construct a declaration name from the name of a C++ constructor,
-  // destructor, or conversion function.
-  DeclarationName(DeclarationNameExtra *Name)
-    : Ptr(reinterpret_cast<uintptr_t>(Name)) {
-    assert((Ptr & PtrMask) == 0 && "Improperly aligned DeclarationNameExtra");
-    Ptr |= StoredDeclarationNameExtra;
-  }
-
-  /// Construct a declaration name from a raw pointer.
-  DeclarationName(uintptr_t Ptr) : Ptr(Ptr) { }
-
-  friend class DeclarationNameTable;
-  friend class NamedDecl;
-
   /// getFETokenInfoAsVoidSlow - Retrieves the front end-specified pointer
   /// for this name as a void pointer if it's not an identifier.
   void *getFETokenInfoAsVoidSlow() const;
 
 public:
   /// DeclarationName - Used to create an empty selector.
-  DeclarationName() : Ptr(0) { }
+  DeclarationName() = default;
 
   // Construct a declaration name from an IdentifierInfo *.
   DeclarationName(const IdentifierInfo *II)
-    : Ptr(reinterpret_cast<uintptr_t>(II)) {
+      : Ptr(reinterpret_cast<uintptr_t>(II)) {
     assert((Ptr & PtrMask) == 0 && "Improperly aligned IdentifierInfo");
   }
 
   // Construct a declaration name from an Objective-C selector.
-  DeclarationName(Selector Sel) : Ptr(Sel.InfoPtr) { }
+  DeclarationName(Selector Sel) : Ptr(Sel.InfoPtr) {}
 
   /// getUsingDirectiveName - Return name for all using-directives.
   static DeclarationName getUsingDirectiveName();
@@ -344,16 +351,24 @@
 /// getCXXConstructorName).
 class DeclarationNameTable {
   const ASTContext &Ctx;
-  void *CXXSpecialNamesImpl; // Actually a FoldingSet<CXXSpecialName> *
-  CXXOperatorIdName *CXXOperatorNames; // Operator names
-  void *CXXLiteralOperatorNames; // Actually a CXXOperatorIdName*
-  void *CXXDeductionGuideNames; // FoldingSet<CXXDeductionGuideNameExtra> *
 
-  DeclarationNameTable(const DeclarationNameTable&) = delete;
-  void operator=(const DeclarationNameTable&) = delete;
+  // Actually a FoldingSet<CXXSpecialName> *
+  void *CXXSpecialNamesImpl;
+
+  // Operator names
+  CXXOperatorIdName *CXXOperatorNames;
+
+  // Actually a CXXOperatorIdName*
+  void *CXXLiteralOperatorNames;
+
+  // FoldingSet<CXXDeductionGuideNameExtra> *
+  void *CXXDeductionGuideNames;
 
 public:
   DeclarationNameTable(const ASTContext &C);
+  DeclarationNameTable(const DeclarationNameTable &) = delete;
+  DeclarationNameTable &operator=(const DeclarationNameTable &) = delete;
+
   ~DeclarationNameTable();
 
   /// getIdentifier - Create a declaration name that is a simple
@@ -428,10 +443,10 @@
   };
 
   DeclarationNameLoc(DeclarationName Name);
+
   // FIXME: this should go away once all DNLocs are properly initialized.
   DeclarationNameLoc() { memset((void*) this, 0, sizeof(*this)); }
-}; // struct DeclarationNameLoc
-
+};
 
 /// DeclarationNameInfo - A collector data type for bundling together
 /// a DeclarationName and the correspnding source/type location info.
@@ -439,29 +454,33 @@
 private:
   /// Name - The declaration name, also encoding name kind.
   DeclarationName Name;
+
   /// Loc - The main source location for the declaration name.
   SourceLocation NameLoc;
+
   /// Info - Further source/type location info for special kinds of names.
   DeclarationNameLoc LocInfo;
 
 public:
   // FIXME: remove it.
-  DeclarationNameInfo() {}
+  DeclarationNameInfo() = default;
 
   DeclarationNameInfo(DeclarationName Name, SourceLocation NameLoc)
-    : Name(Name), NameLoc(NameLoc), LocInfo(Name) {}
+      : Name(Name), NameLoc(NameLoc), LocInfo(Name) {}
 
   DeclarationNameInfo(DeclarationName Name, SourceLocation NameLoc,
                       DeclarationNameLoc LocInfo)
-    : Name(Name), NameLoc(NameLoc), LocInfo(LocInfo) {}
+      : Name(Name), NameLoc(NameLoc), LocInfo(LocInfo) {}
 
   /// getName - Returns the embedded declaration name.
   DeclarationName getName() const { return Name; }
+
   /// setName - Sets the embedded declaration name.
   void setName(DeclarationName N) { Name = N; }
 
   /// getLoc - Returns the main location of the declaration name.
   SourceLocation getLoc() const { return NameLoc; }
+
   /// setLoc - Sets the main location of the declaration name.
   void setLoc(SourceLocation L) { NameLoc = L; }
 
@@ -477,6 +496,7 @@
            Name.getNameKind() == DeclarationName::CXXConversionFunctionName);
     return LocInfo.NamedType.TInfo;
   }
+
   /// setNamedTypeInfo - Sets the source type info associated to
   /// the name. Assumes it is a constructor, destructor or conversion.
   void setNamedTypeInfo(TypeSourceInfo *TInfo) {
@@ -495,6 +515,7 @@
      SourceLocation::getFromRawEncoding(LocInfo.CXXOperatorName.EndOpNameLoc)
                        );
   }
+
   /// setCXXOperatorNameRange - Sets the range of the operator name
   /// (without the operator keyword). Assumes it is a C++ operator.
   void setCXXOperatorNameRange(SourceRange R) {
@@ -511,6 +532,7 @@
     return SourceLocation::
       getFromRawEncoding(LocInfo.CXXLiteralOperatorName.OpNameLoc);
   }
+
   /// setCXXLiteralOperatorNameLoc - Sets the location of the literal
   /// operator name (not the operator keyword).
   /// Assumes it is a literal operator.
@@ -534,15 +556,19 @@
 
   /// getBeginLoc - Retrieve the location of the first token.
   SourceLocation getBeginLoc() const { return NameLoc; }
+
   /// getEndLoc - Retrieve the location of the last token.
   SourceLocation getEndLoc() const;
+
   /// getSourceRange - The range of the declaration name.
   SourceRange getSourceRange() const LLVM_READONLY {
     return SourceRange(getLocStart(), getLocEnd());
   }
+
   SourceLocation getLocStart() const LLVM_READONLY {
     return getBeginLoc();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     SourceLocation EndLoc = getEndLoc();
     return EndLoc.isValid() ? EndLoc : getLocStart();
@@ -573,9 +599,10 @@
   return OS;
 }
 
-}  // end namespace clang
+} // namespace clang
 
 namespace llvm {
+
 /// Define DenseMapInfo so that DeclarationNames can be used as keys
 /// in DenseMap and DenseSets.
 template<>
@@ -601,6 +628,6 @@
 template <>
 struct isPodLike<clang::DeclarationName> { static const bool value = true; };
 
-}  // end namespace llvm
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_AST_DECLARATIONNAME_H
diff --git a/include/clang/AST/DependentDiagnostic.h b/include/clang/AST/DependentDiagnostic.h
index 8e038c8..a514326 100644
--- a/include/clang/AST/DependentDiagnostic.h
+++ b/include/clang/AST/DependentDiagnostic.h
@@ -1,4 +1,4 @@
-//===-- DependentDiagnostic.h - Dependently-generated diagnostics -*- C++ -*-=//
+//==- DependentDiagnostic.h - Dependently-generated diagnostics --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,6 +23,9 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include <cassert>
+#include <iterator>
 
 namespace clang {
 
@@ -94,6 +97,9 @@
   }
 
 private:
+  friend class DeclContext::ddiag_iterator;
+  friend class DependentStoredDeclsMap;
+
   DependentDiagnostic(const PartialDiagnostic &PDiag,
                       PartialDiagnostic::Storage *Storage) 
     : Diag(PDiag, Storage) {}
@@ -102,8 +108,6 @@
                                      DeclContext *Parent,
                                      const PartialDiagnostic &PDiag);
 
-  friend class DependentStoredDeclsMap;
-  friend class DeclContext::ddiag_iterator;
   DependentDiagnostic *NextDiagnostic;
 
   PartialDiagnostic Diag;
@@ -118,19 +122,17 @@
   } AccessData;
 };
 
-/// 
-
 /// An iterator over the dependent diagnostics in a dependent context.
 class DeclContext::ddiag_iterator {
 public:
-  ddiag_iterator() : Ptr(nullptr) {}
+  ddiag_iterator() = default;
   explicit ddiag_iterator(DependentDiagnostic *Ptr) : Ptr(Ptr) {}
 
-  typedef DependentDiagnostic *value_type;
-  typedef DependentDiagnostic *reference;
-  typedef DependentDiagnostic *pointer;
-  typedef int difference_type;
-  typedef std::forward_iterator_tag iterator_category;
+  using value_type = DependentDiagnostic *;
+  using reference = DependentDiagnostic *;
+  using pointer = DependentDiagnostic *;
+  using difference_type = int;
+  using iterator_category = std::forward_iterator_tag;
 
   reference operator*() const { return Ptr; }
 
@@ -168,7 +170,7 @@
   }
 
 private:
-  DependentDiagnostic *Ptr;
+  DependentDiagnostic *Ptr = nullptr;
 };
 
 inline DeclContext::ddiag_range DeclContext::ddiags() const {
@@ -184,6 +186,6 @@
   return ddiag_range(ddiag_iterator(Map->FirstDiagnostic), ddiag_iterator());
 }
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_DEPENDENTDIAGNOSTIC_H
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index bef2339..9707a7e 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1720,19 +1720,19 @@
 
 private:
   unsigned Opc : 5;
+  unsigned CanOverflow : 1;
   SourceLocation Loc;
   Stmt *Val;
 public:
-
-  UnaryOperator(Expr *input, Opcode opc, QualType type,
-                ExprValueKind VK, ExprObjectKind OK, SourceLocation l)
-    : Expr(UnaryOperatorClass, type, VK, OK,
-           input->isTypeDependent() || type->isDependentType(),
-           input->isValueDependent(),
-           (input->isInstantiationDependent() ||
-            type->isInstantiationDependentType()),
-           input->containsUnexpandedParameterPack()),
-      Opc(opc), Loc(l), Val(input) {}
+  UnaryOperator(Expr *input, Opcode opc, QualType type, ExprValueKind VK,
+                ExprObjectKind OK, SourceLocation l, bool CanOverflow)
+      : Expr(UnaryOperatorClass, type, VK, OK,
+             input->isTypeDependent() || type->isDependentType(),
+             input->isValueDependent(),
+             (input->isInstantiationDependent() ||
+              type->isInstantiationDependentType()),
+             input->containsUnexpandedParameterPack()),
+        Opc(opc), CanOverflow(CanOverflow), Loc(l), Val(input) {}
 
   /// \brief Build an empty unary operator.
   explicit UnaryOperator(EmptyShell Empty)
@@ -1748,6 +1748,15 @@
   SourceLocation getOperatorLoc() const { return Loc; }
   void setOperatorLoc(SourceLocation L) { Loc = L; }
 
+  /// Returns true if the unary operator can cause an overflow. For instance,
+  ///   signed int i = INT_MAX; i++;
+  ///   signed char c = CHAR_MAX; c++;
+  /// Due to integer promotions, c++ is promoted to an int before the postfix
+  /// increment, and the result is an int that cannot overflow. However, i++
+  /// can overflow.
+  bool canOverflow() const { return CanOverflow; }
+  void setCanOverflow(bool C) { CanOverflow = C; }
+
   /// isPostfix - Return true if this is a postfix operation, like x++.
   static bool isPostfix(Opcode Op) {
     return Op == UO_PostInc || Op == UO_PostDec;
@@ -2160,19 +2169,19 @@
   void setRHS(Expr *E) { SubExprs[RHS] = E; }
 
   Expr *getBase() {
-    return cast<Expr>(getRHS()->getType()->isIntegerType() ? getLHS():getRHS());
+    return getRHS()->getType()->isIntegerType() ? getLHS() : getRHS();
   }
 
   const Expr *getBase() const {
-    return cast<Expr>(getRHS()->getType()->isIntegerType() ? getLHS():getRHS());
+    return getRHS()->getType()->isIntegerType() ? getLHS() : getRHS();
   }
 
   Expr *getIdx() {
-    return cast<Expr>(getRHS()->getType()->isIntegerType() ? getRHS():getLHS());
+    return getRHS()->getType()->isIntegerType() ? getRHS() : getLHS();
   }
 
   const Expr *getIdx() const {
-    return cast<Expr>(getRHS()->getType()->isIntegerType() ? getRHS():getLHS());
+    return getRHS()->getType()->isIntegerType() ? getRHS() : getLHS();
   }
 
   SourceLocation getLocStart() const LLVM_READONLY {
@@ -2348,6 +2357,10 @@
   SourceLocation getLocStart() const LLVM_READONLY;
   SourceLocation getLocEnd() const LLVM_READONLY;
 
+  /// Return true if this is a call to __assume() or __builtin_assume() with
+  /// a non-value-dependent constant parameter evaluating as false.
+  bool isBuiltinAssumeFalse(const ASTContext &Ctx) const;
+
   bool isCallToStdMove() const {
     const FunctionDecl* FD = getDirectCallee();
     return getNumArgs() == 1 && FD && FD->isInStdNamespace() &&
@@ -2742,7 +2755,6 @@
                ty->containsUnexpandedParameterPack()) ||
               (op && op->containsUnexpandedParameterPack()))),
         Op(op) {
-    assert(kind != CK_Invalid && "creating cast with invalid cast kind");
     CastExprBits.Kind = kind;
     setBasePathSize(BasePathSize);
     assert(CastConsistency());
@@ -3073,7 +3085,7 @@
   static bool isEqualityOp(Opcode Opc) { return Opc == BO_EQ || Opc == BO_NE; }
   bool isEqualityOp() const { return isEqualityOp(getOpcode()); }
 
-  static bool isComparisonOp(Opcode Opc) { return Opc >= BO_LT && Opc<=BO_NE; }
+  static bool isComparisonOp(Opcode Opc) { return Opc >= BO_Cmp && Opc<=BO_NE; }
   bool isComparisonOp() const { return isComparisonOp(getOpcode()); }
 
   static Opcode negateComparisonOp(Opcode Opc) {
diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h
index a2cf961..e51ffb6 100644
--- a/include/clang/AST/ExprCXX.h
+++ b/include/clang/AST/ExprCXX.h
@@ -1,4 +1,4 @@
-//===--- ExprCXX.h - Classes for representing expressions -------*- C++ -*-===//
+//===- ExprCXX.h - Classes for representing expressions ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,31 +6,57 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::Expr interface and subclasses for C++ expressions.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_EXPRCXX_H
 #define LLVM_CLANG_AST_EXPRCXX_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/LambdaCapture.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/ExceptionSpecificationType.h"
 #include "clang/Basic/ExpressionTraits.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/Lambda.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Basic/TypeTraits.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
 
 namespace clang {
 
-class CXXTemporary;
-class MSPropertyDecl;
-class TemplateArgumentListInfo;
-class UuidAttr;
+class ASTContext;
+class DeclAccessPair;
+class IdentifierInfo;
+class LambdaCapture;
+class NonTypeTemplateParmDecl;
+class TemplateParameterList;
 
 //===--------------------------------------------------------------------===//
 // C++ Expressions.
@@ -52,23 +78,28 @@
 class CXXOperatorCallExpr : public CallExpr {
   /// \brief The overloaded operator.
   OverloadedOperatorKind Operator;
+
   SourceRange Range;
 
   // Only meaningful for floating point types.
   FPOptions FPFeatures;
 
   SourceRange getSourceRangeImpl() const LLVM_READONLY;
+
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   CXXOperatorCallExpr(ASTContext& C, OverloadedOperatorKind Op, Expr *fn,
                       ArrayRef<Expr*> args, QualType t, ExprValueKind VK,
                       SourceLocation operatorloc, FPOptions FPFeatures)
-    : CallExpr(C, CXXOperatorCallExprClass, fn, args, t, VK, operatorloc),
-      Operator(Op), FPFeatures(FPFeatures) {
+      : CallExpr(C, CXXOperatorCallExprClass, fn, args, t, VK, operatorloc),
+        Operator(Op), FPFeatures(FPFeatures) {
     Range = getSourceRangeImpl();
   }
-  explicit CXXOperatorCallExpr(ASTContext& C, EmptyShell Empty) :
-    CallExpr(C, CXXOperatorCallExprClass, Empty) { }
 
+  explicit CXXOperatorCallExpr(ASTContext& C, EmptyShell Empty)
+      : CallExpr(C, CXXOperatorCallExprClass, Empty) {}
 
   /// \brief Returns the kind of overloaded operator that this
   /// expression refers to.
@@ -120,9 +151,6 @@
   bool isFPContractableWithinStatement() const {
     return FPFeatures.allowFPContractWithinStatement();
   }
-
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// Represents a call to a member function that
@@ -137,10 +165,10 @@
 public:
   CXXMemberCallExpr(ASTContext &C, Expr *fn, ArrayRef<Expr*> args,
                     QualType t, ExprValueKind VK, SourceLocation RP)
-    : CallExpr(C, CXXMemberCallExprClass, fn, args, t, VK, RP) {}
+      : CallExpr(C, CXXMemberCallExprClass, fn, args, t, VK, RP) {}
 
   CXXMemberCallExpr(ASTContext &C, EmptyShell Empty)
-    : CallExpr(C, CXXMemberCallExprClass, Empty) { }
+      : CallExpr(C, CXXMemberCallExprClass, Empty) {}
 
   /// \brief Retrieves the implicit object argument for the member call.
   ///
@@ -183,7 +211,7 @@
       : CallExpr(C, CUDAKernelCallExprClass, fn, Config, args, t, VK, RP) {}
 
   CUDAKernelCallExpr(ASTContext &C, EmptyShell Empty)
-    : CallExpr(C, CUDAKernelCallExprClass, END_PREARG, Empty) { }
+      : CallExpr(C, CUDAKernelCallExprClass, END_PREARG, Empty) {}
 
   const CallExpr *getConfig() const {
     return cast_or_null<CallExpr>(getPreArg(CONFIG));
@@ -217,23 +245,28 @@
 /// reinterpret_cast, and CXXConstCastExpr for \c const_cast.
 class CXXNamedCastExpr : public ExplicitCastExpr {
 private:
-  SourceLocation Loc; // the location of the casting op
-  SourceLocation RParenLoc; // the location of the right parenthesis
-  SourceRange AngleBrackets; // range for '<' '>'
+  // the location of the casting op
+  SourceLocation Loc;
+
+  // the location of the right parenthesis
+  SourceLocation RParenLoc;
+
+  // range for '<' '>'
+  SourceRange AngleBrackets;
 
 protected:
+  friend class ASTStmtReader;
+
   CXXNamedCastExpr(StmtClass SC, QualType ty, ExprValueKind VK,
                    CastKind kind, Expr *op, unsigned PathSize,
                    TypeSourceInfo *writtenTy, SourceLocation l,
                    SourceLocation RParenLoc,
                    SourceRange AngleBrackets)
-    : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, writtenTy), Loc(l),
-      RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {}
+      : ExplicitCastExpr(SC, ty, VK, kind, op, PathSize, writtenTy), Loc(l),
+        RParenLoc(RParenLoc), AngleBrackets(AngleBrackets) {}
 
   explicit CXXNamedCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize)
-    : ExplicitCastExpr(SC, Shell, PathSize) { }
-
-  friend class ASTStmtReader;
+      : ExplicitCastExpr(SC, Shell, PathSize) {}
 
 public:
   const char *getCastName() const;
@@ -273,13 +306,16 @@
                     unsigned pathSize, TypeSourceInfo *writtenTy,
                     SourceLocation l, SourceLocation RParenLoc,
                     SourceRange AngleBrackets)
-    : CXXNamedCastExpr(CXXStaticCastExprClass, ty, vk, kind, op, pathSize,
-                       writtenTy, l, RParenLoc, AngleBrackets) {}
+      : CXXNamedCastExpr(CXXStaticCastExprClass, ty, vk, kind, op, pathSize,
+                         writtenTy, l, RParenLoc, AngleBrackets) {}
 
   explicit CXXStaticCastExpr(EmptyShell Empty, unsigned PathSize)
-    : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize) { }
+      : CXXNamedCastExpr(CXXStaticCastExprClass, Empty, PathSize) {}
 
 public:
+  friend class CastExpr;
+  friend TrailingObjects;
+
   static CXXStaticCastExpr *Create(const ASTContext &Context, QualType T,
                                    ExprValueKind VK, CastKind K, Expr *Op,
                                    const CXXCastPath *Path,
@@ -292,9 +328,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXStaticCastExprClass;
   }
-
-  friend TrailingObjects;
-  friend class CastExpr;
 };
 
 /// \brief A C++ @c dynamic_cast expression (C++ [expr.dynamic.cast]).
@@ -309,13 +342,16 @@
                      Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy,
                      SourceLocation l, SourceLocation RParenLoc,
                      SourceRange AngleBrackets)
-    : CXXNamedCastExpr(CXXDynamicCastExprClass, ty, VK, kind, op, pathSize,
-                       writtenTy, l, RParenLoc, AngleBrackets) {}
+      : CXXNamedCastExpr(CXXDynamicCastExprClass, ty, VK, kind, op, pathSize,
+                         writtenTy, l, RParenLoc, AngleBrackets) {}
 
   explicit CXXDynamicCastExpr(EmptyShell Empty, unsigned pathSize)
-    : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize) { }
+      : CXXNamedCastExpr(CXXDynamicCastExprClass, Empty, pathSize) {}
 
 public:
+  friend class CastExpr;
+  friend TrailingObjects;
+
   static CXXDynamicCastExpr *Create(const ASTContext &Context, QualType T,
                                     ExprValueKind VK, CastKind Kind, Expr *Op,
                                     const CXXCastPath *Path,
@@ -331,9 +367,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXDynamicCastExprClass;
   }
-
-  friend TrailingObjects;
-  friend class CastExpr;
 };
 
 /// \brief A C++ @c reinterpret_cast expression (C++ [expr.reinterpret.cast]).
@@ -353,13 +386,16 @@
                          TypeSourceInfo *writtenTy, SourceLocation l,
                          SourceLocation RParenLoc,
                          SourceRange AngleBrackets)
-    : CXXNamedCastExpr(CXXReinterpretCastExprClass, ty, vk, kind, op,
-                       pathSize, writtenTy, l, RParenLoc, AngleBrackets) {}
+      : CXXNamedCastExpr(CXXReinterpretCastExprClass, ty, vk, kind, op,
+                         pathSize, writtenTy, l, RParenLoc, AngleBrackets) {}
 
   CXXReinterpretCastExpr(EmptyShell Empty, unsigned pathSize)
-    : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize) { }
+      : CXXNamedCastExpr(CXXReinterpretCastExprClass, Empty, pathSize) {}
 
 public:
+  friend class CastExpr;
+  friend TrailingObjects;
+
   static CXXReinterpretCastExpr *Create(const ASTContext &Context, QualType T,
                                         ExprValueKind VK, CastKind Kind,
                                         Expr *Op, const CXXCastPath *Path,
@@ -372,9 +408,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXReinterpretCastExprClass;
   }
-
-  friend TrailingObjects;
-  friend class CastExpr;
 };
 
 /// \brief A C++ \c const_cast expression (C++ [expr.const.cast]).
@@ -390,13 +423,16 @@
   CXXConstCastExpr(QualType ty, ExprValueKind VK, Expr *op,
                    TypeSourceInfo *writtenTy, SourceLocation l,
                    SourceLocation RParenLoc, SourceRange AngleBrackets)
-    : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op,
-                       0, writtenTy, l, RParenLoc, AngleBrackets) {}
+      : CXXNamedCastExpr(CXXConstCastExprClass, ty, VK, CK_NoOp, op,
+                         0, writtenTy, l, RParenLoc, AngleBrackets) {}
 
   explicit CXXConstCastExpr(EmptyShell Empty)
-    : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0) { }
+      : CXXNamedCastExpr(CXXConstCastExprClass, Empty, 0) {}
 
 public:
+  friend class CastExpr;
+  friend TrailingObjects;
+
   static CXXConstCastExpr *Create(const ASTContext &Context, QualType T,
                                   ExprValueKind VK, Expr *Op,
                                   TypeSourceInfo *WrittenTy, SourceLocation L,
@@ -407,9 +443,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXConstCastExprClass;
   }
-
-  friend TrailingObjects;
-  friend class CastExpr;
 };
 
 /// \brief A call to a literal operator (C++11 [over.literal])
@@ -426,22 +459,37 @@
   SourceLocation UDSuffixLoc;
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   UserDefinedLiteral(const ASTContext &C, Expr *Fn, ArrayRef<Expr*> Args,
                      QualType T, ExprValueKind VK, SourceLocation LitEndLoc,
                      SourceLocation SuffixLoc)
-    : CallExpr(C, UserDefinedLiteralClass, Fn, Args, T, VK, LitEndLoc),
-      UDSuffixLoc(SuffixLoc) {}
+      : CallExpr(C, UserDefinedLiteralClass, Fn, Args, T, VK, LitEndLoc),
+        UDSuffixLoc(SuffixLoc) {}
+
   explicit UserDefinedLiteral(const ASTContext &C, EmptyShell Empty)
-    : CallExpr(C, UserDefinedLiteralClass, Empty) {}
+      : CallExpr(C, UserDefinedLiteralClass, Empty) {}
 
   /// The kind of literal operator which is invoked.
   enum LiteralOperatorKind {
-    LOK_Raw,      ///< Raw form: operator "" X (const char *)
-    LOK_Template, ///< Raw form: operator "" X<cs...> ()
-    LOK_Integer,  ///< operator "" X (unsigned long long)
-    LOK_Floating, ///< operator "" X (long double)
-    LOK_String,   ///< operator "" X (const CharT *, size_t)
-    LOK_Character ///< operator "" X (CharT)
+    /// Raw form: operator "" X (const char *)
+    LOK_Raw,
+
+    /// Raw form: operator "" X<cs...> ()
+    LOK_Template,
+
+    /// operator "" X (unsigned long long)
+    LOK_Integer,
+
+    /// operator "" X (long double)
+    LOK_Floating,
+
+    /// operator "" X (const CharT *, size_t)
+    LOK_String,
+
+    /// operator "" X (CharT)
+    LOK_Character
   };
 
   /// \brief Returns the kind of literal operator invocation
@@ -461,8 +509,8 @@
       return getRParenLoc();
     return getArg(0)->getLocStart();
   }
-  SourceLocation getLocEnd() const { return getRParenLoc(); }
 
+  SourceLocation getLocEnd() const { return getRParenLoc(); }
 
   /// \brief Returns the location of a ud-suffix in the expression.
   ///
@@ -476,24 +524,21 @@
   static bool classof(const Stmt *S) {
     return S->getStmtClass() == UserDefinedLiteralClass;
   }
-
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// \brief A boolean literal, per ([C++ lex.bool] Boolean literals).
-///
 class CXXBoolLiteralExpr : public Expr {
   bool Value;
   SourceLocation Loc;
+
 public:
-  CXXBoolLiteralExpr(bool val, QualType Ty, SourceLocation l) :
-    Expr(CXXBoolLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
-         false, false),
-    Value(val), Loc(l) {}
+  CXXBoolLiteralExpr(bool val, QualType Ty, SourceLocation l)
+      : Expr(CXXBoolLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
+             false, false),
+        Value(val), Loc(l) {}
 
   explicit CXXBoolLiteralExpr(EmptyShell Empty)
-    : Expr(CXXBoolLiteralExprClass, Empty) { }
+      : Expr(CXXBoolLiteralExprClass, Empty) {}
 
   bool getValue() const { return Value; }
   void setValue(bool V) { Value = V; }
@@ -519,14 +564,15 @@
 /// Introduced in C++11, the only literal of type \c nullptr_t is \c nullptr.
 class CXXNullPtrLiteralExpr : public Expr {
   SourceLocation Loc;
+
 public:
-  CXXNullPtrLiteralExpr(QualType Ty, SourceLocation l) :
-    Expr(CXXNullPtrLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
-         false, false),
-    Loc(l) {}
+  CXXNullPtrLiteralExpr(QualType Ty, SourceLocation l)
+      : Expr(CXXNullPtrLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false,
+             false, false, false),
+        Loc(l) {}
 
   explicit CXXNullPtrLiteralExpr(EmptyShell Empty)
-    : Expr(CXXNullPtrLiteralExprClass, Empty) { }
+      : Expr(CXXNullPtrLiteralExprClass, Empty) {}
 
   SourceLocation getLocStart() const LLVM_READONLY { return Loc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return Loc; }
@@ -546,18 +592,21 @@
 /// \brief Implicit construction of a std::initializer_list<T> object from an
 /// array temporary within list-initialization (C++11 [dcl.init.list]p5).
 class CXXStdInitializerListExpr : public Expr {
-  Stmt *SubExpr;
+  Stmt *SubExpr = nullptr;
 
   CXXStdInitializerListExpr(EmptyShell Empty)
-    : Expr(CXXStdInitializerListExprClass, Empty), SubExpr(nullptr) {}
+      : Expr(CXXStdInitializerListExprClass, Empty) {}
 
 public:
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   CXXStdInitializerListExpr(QualType Ty, Expr *SubExpr)
-    : Expr(CXXStdInitializerListExprClass, Ty, VK_RValue, OK_Ordinary,
-           Ty->isDependentType(), SubExpr->isValueDependent(),
-           SubExpr->isInstantiationDependent(),
-           SubExpr->containsUnexpandedParameterPack()),
-      SubExpr(SubExpr) {}
+      : Expr(CXXStdInitializerListExprClass, Ty, VK_RValue, OK_Ordinary,
+             Ty->isDependentType(), SubExpr->isValueDependent(),
+             SubExpr->isInstantiationDependent(),
+             SubExpr->containsUnexpandedParameterPack()),
+        SubExpr(SubExpr) {}
 
   Expr *getSubExpr() { return static_cast<Expr*>(SubExpr); }
   const Expr *getSubExpr() const { return static_cast<const Expr*>(SubExpr); }
@@ -565,9 +614,11 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return SubExpr->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return SubExpr->getLocEnd();
   }
+
   SourceRange getSourceRange() const LLVM_READONLY {
     return SubExpr->getSourceRange();
   }
@@ -577,9 +628,6 @@
   }
 
   child_range children() { return child_range(&SubExpr, &SubExpr + 1); }
-
-  friend class ASTReader;
-  friend class ASTStmtReader;
 };
 
 /// A C++ \c typeid expression (C++ [expr.typeid]), which gets
@@ -594,27 +642,29 @@
 
 public:
   CXXTypeidExpr(QualType Ty, TypeSourceInfo *Operand, SourceRange R)
-    : Expr(CXXTypeidExprClass, Ty, VK_LValue, OK_Ordinary,
-           // typeid is never type-dependent (C++ [temp.dep.expr]p4)
-           false,
-           // typeid is value-dependent if the type or expression are dependent
-           Operand->getType()->isDependentType(),
-           Operand->getType()->isInstantiationDependentType(),
-           Operand->getType()->containsUnexpandedParameterPack()),
-      Operand(Operand), Range(R) { }
+      : Expr(CXXTypeidExprClass, Ty, VK_LValue, OK_Ordinary,
+             // typeid is never type-dependent (C++ [temp.dep.expr]p4)
+             false,
+             // typeid is value-dependent if the type or expression are
+             // dependent
+             Operand->getType()->isDependentType(),
+             Operand->getType()->isInstantiationDependentType(),
+             Operand->getType()->containsUnexpandedParameterPack()),
+        Operand(Operand), Range(R) {}
 
   CXXTypeidExpr(QualType Ty, Expr *Operand, SourceRange R)
-    : Expr(CXXTypeidExprClass, Ty, VK_LValue, OK_Ordinary,
-        // typeid is never type-dependent (C++ [temp.dep.expr]p4)
-           false,
-        // typeid is value-dependent if the type or expression are dependent
-           Operand->isTypeDependent() || Operand->isValueDependent(),
-           Operand->isInstantiationDependent(),
-           Operand->containsUnexpandedParameterPack()),
-      Operand(Operand), Range(R) { }
+      : Expr(CXXTypeidExprClass, Ty, VK_LValue, OK_Ordinary,
+             // typeid is never type-dependent (C++ [temp.dep.expr]p4)
+             false,
+             // typeid is value-dependent if the type or expression are
+             // dependent
+             Operand->isTypeDependent() || Operand->isValueDependent(),
+             Operand->isInstantiationDependent(),
+             Operand->containsUnexpandedParameterPack()),
+        Operand(Operand), Range(R) {}
 
   CXXTypeidExpr(EmptyShell Empty, bool isExpr)
-    : Expr(CXXTypeidExprClass, Empty) {
+      : Expr(CXXTypeidExprClass, Empty) {
     if (isExpr)
       Operand = (Expr*)nullptr;
     else
@@ -683,26 +733,30 @@
   NestedNameSpecifierLoc QualifierLoc;
 
 public:
+  friend class ASTStmtReader;
+
   MSPropertyRefExpr(Expr *baseExpr, MSPropertyDecl *decl, bool isArrow,
                     QualType ty, ExprValueKind VK,
                     NestedNameSpecifierLoc qualifierLoc,
                     SourceLocation nameLoc)
-  : Expr(MSPropertyRefExprClass, ty, VK, OK_Ordinary,
-         /*type-dependent*/ false, baseExpr->isValueDependent(),
-         baseExpr->isInstantiationDependent(),
-         baseExpr->containsUnexpandedParameterPack()),
-    BaseExpr(baseExpr), TheDecl(decl),
-    MemberLoc(nameLoc), IsArrow(isArrow),
-    QualifierLoc(qualifierLoc) {}
+      : Expr(MSPropertyRefExprClass, ty, VK, OK_Ordinary,
+             /*type-dependent*/ false, baseExpr->isValueDependent(),
+             baseExpr->isInstantiationDependent(),
+             baseExpr->containsUnexpandedParameterPack()),
+        BaseExpr(baseExpr), TheDecl(decl),
+        MemberLoc(nameLoc), IsArrow(isArrow),
+        QualifierLoc(qualifierLoc) {}
 
   MSPropertyRefExpr(EmptyShell Empty) : Expr(MSPropertyRefExprClass, Empty) {}
 
   SourceRange getSourceRange() const LLVM_READONLY {
     return SourceRange(getLocStart(), getLocEnd());
   }
+
   bool isImplicitAccess() const {
     return getBaseExpr() && getBaseExpr()->isImplicitCXXThis();
   }
+
   SourceLocation getLocStart() const {
     if (!isImplicitAccess())
       return BaseExpr->getLocStart();
@@ -711,11 +765,13 @@
     else
         return MemberLoc;
   }
+
   SourceLocation getLocEnd() const { return getMemberLoc(); }
 
   child_range children() {
     return child_range((Stmt**)&BaseExpr, (Stmt**)&BaseExpr + 1);
   }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == MSPropertyRefExprClass;
   }
@@ -725,8 +781,6 @@
   bool isArrow() const { return IsArrow; }
   SourceLocation getMemberLoc() const { return MemberLoc; }
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
-
-  friend class ASTStmtReader;
 };
 
 /// MS property subscript expression.
@@ -742,7 +796,9 @@
 /// This is a syntactic pseudo-object expression.
 class MSPropertySubscriptExpr : public Expr {
   friend class ASTStmtReader;
+
   enum { BASE_EXPR, IDX_EXPR, NUM_SUBEXPRS = 2 };
+
   Stmt *SubExprs[NUM_SUBEXPRS];
   SourceLocation RBracketLoc;
 
@@ -773,6 +829,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return getBase()->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return RBracketLoc; }
 
   SourceLocation getRBracketLoc() const { return RBracketLoc; }
@@ -891,13 +948,13 @@
 
 public:
   CXXThisExpr(SourceLocation L, QualType Type, bool isImplicit)
-    : Expr(CXXThisExprClass, Type, VK_RValue, OK_Ordinary,
-           // 'this' is type-dependent if the class type of the enclosing
-           // member function is dependent (C++ [temp.dep.expr]p2)
-           Type->isDependentType(), Type->isDependentType(),
-           Type->isInstantiationDependentType(),
-           /*ContainsUnexpandedParameterPack=*/false),
-      Loc(L), Implicit(isImplicit) { }
+      : Expr(CXXThisExprClass, Type, VK_RValue, OK_Ordinary,
+             // 'this' is type-dependent if the class type of the enclosing
+             // member function is dependent (C++ [temp.dep.expr]p2)
+             Type->isDependentType(), Type->isDependentType(),
+             Type->isInstantiationDependentType(),
+             /*ContainsUnexpandedParameterPack=*/false),
+        Loc(L), Implicit(isImplicit) {}
 
   CXXThisExpr(EmptyShell Empty) : Expr(CXXThisExprClass, Empty) {}
 
@@ -926,23 +983,25 @@
 /// 'throw' assignment-expression.  When assignment-expression isn't
 /// present, Op will be null.
 class CXXThrowExpr : public Expr {
+  friend class ASTStmtReader;
+
   Stmt *Op;
   SourceLocation ThrowLoc;
+
   /// \brief Whether the thrown variable (if any) is in scope.
   unsigned IsThrownVariableInScope : 1;
 
-  friend class ASTStmtReader;
-
 public:
   // \p Ty is the void type which is used as the result type of the
   // expression.  The \p l is the location of the throw keyword.  \p expr
   // can by null, if the optional expression to throw isn't present.
   CXXThrowExpr(Expr *expr, QualType Ty, SourceLocation l,
-               bool IsThrownVariableInScope) :
-    Expr(CXXThrowExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
-         expr && expr->isInstantiationDependent(),
-         expr && expr->containsUnexpandedParameterPack()),
-    Op(expr), ThrowLoc(l), IsThrownVariableInScope(IsThrownVariableInScope) {}
+               bool IsThrownVariableInScope)
+      : Expr(CXXThrowExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
+             expr && expr->isInstantiationDependent(),
+             expr && expr->containsUnexpandedParameterPack()),
+        Op(expr), ThrowLoc(l),
+        IsThrownVariableInScope(IsThrownVariableInScope) {}
   CXXThrowExpr(EmptyShell Empty) : Expr(CXXThrowExprClass, Empty) {}
 
   const Expr *getSubExpr() const { return cast_or_null<Expr>(Op); }
@@ -958,6 +1017,7 @@
   bool isThrownVariableInScope() const { return IsThrownVariableInScope; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return ThrowLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (!getSubExpr())
       return ThrowLoc;
@@ -987,15 +1047,19 @@
   SourceLocation Loc;
 
   CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *param)
-    : Expr(SC,
-           param->hasUnparsedDefaultArg()
-             ? param->getType().getNonReferenceType()
-             : param->getDefaultArg()->getType(),
-           param->getDefaultArg()->getValueKind(),
-           param->getDefaultArg()->getObjectKind(), false, false, false, false),
-      Param(param), Loc(Loc) { }
+      : Expr(SC,
+             param->hasUnparsedDefaultArg()
+               ? param->getType().getNonReferenceType()
+               : param->getDefaultArg()->getType(),
+             param->getDefaultArg()->getValueKind(),
+             param->getDefaultArg()->getObjectKind(), false, false, false,
+             false),
+        Param(param), Loc(Loc) {}
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {}
 
   // \p Param is the parameter whose default argument is used by this
@@ -1036,9 +1100,6 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// \brief A use of a default initializer in a constructor or in aggregate
@@ -1062,6 +1123,9 @@
   CXXDefaultInitExpr(EmptyShell Empty) : Expr(CXXDefaultInitExprClass, Empty) {}
 
 public:
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   /// \p Field is the non-static data member whose default initializer is used
   /// by this expression.
   static CXXDefaultInitExpr *Create(const ASTContext &C, SourceLocation Loc,
@@ -1094,9 +1158,6 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTReader;
-  friend class ASTStmtReader;
 };
 
 /// \brief Represents a C++ temporary.
@@ -1105,13 +1166,14 @@
   const CXXDestructorDecl *Destructor;
 
   explicit CXXTemporary(const CXXDestructorDecl *destructor)
-    : Destructor(destructor) { }
+      : Destructor(destructor) {}
 
 public:
   static CXXTemporary *Create(const ASTContext &C,
                               const CXXDestructorDecl *Destructor);
 
   const CXXDestructorDecl *getDestructor() const { return Destructor; }
+
   void setDestructor(const CXXDestructorDecl *Dtor) {
     Destructor = Dtor;
   }
@@ -1132,21 +1194,20 @@
 ///   }
 /// \endcode
 class CXXBindTemporaryExpr : public Expr {
-  CXXTemporary *Temp;
-
-  Stmt *SubExpr;
+  CXXTemporary *Temp = nullptr;
+  Stmt *SubExpr = nullptr;
 
   CXXBindTemporaryExpr(CXXTemporary *temp, Expr* SubExpr)
-   : Expr(CXXBindTemporaryExprClass, SubExpr->getType(),
-          VK_RValue, OK_Ordinary, SubExpr->isTypeDependent(),
-          SubExpr->isValueDependent(),
-          SubExpr->isInstantiationDependent(),
-          SubExpr->containsUnexpandedParameterPack()),
-     Temp(temp), SubExpr(SubExpr) { }
+      : Expr(CXXBindTemporaryExprClass, SubExpr->getType(),
+             VK_RValue, OK_Ordinary, SubExpr->isTypeDependent(),
+             SubExpr->isValueDependent(),
+             SubExpr->isInstantiationDependent(),
+             SubExpr->containsUnexpandedParameterPack()),
+        Temp(temp), SubExpr(SubExpr) {}
 
 public:
   CXXBindTemporaryExpr(EmptyShell Empty)
-    : Expr(CXXBindTemporaryExprClass, Empty), Temp(nullptr), SubExpr(nullptr) {}
+      : Expr(CXXBindTemporaryExprClass, Empty) {}
 
   static CXXBindTemporaryExpr *Create(const ASTContext &C, CXXTemporary *Temp,
                                       Expr* SubExpr);
@@ -1162,6 +1223,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return SubExpr->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return SubExpr->getLocEnd();}
 
   // Implement isa/cast/dyncast/etc.
@@ -1184,8 +1246,7 @@
   };
 
 private:
-  CXXConstructorDecl *Constructor;
-
+  CXXConstructorDecl *Constructor = nullptr;
   SourceLocation Loc;
   SourceRange ParenOrBraceRange;
   unsigned NumArgs : 16;
@@ -1195,7 +1256,7 @@
   unsigned StdInitListInitialization : 1;
   unsigned ZeroInitialization : 1;
   unsigned ConstructKind : 2;
-  Stmt **Args;
+  Stmt **Args = nullptr;
 
   void setConstructor(CXXConstructorDecl *C) { Constructor = C; }
 
@@ -1214,15 +1275,16 @@
 
   /// \brief Construct an empty C++ construction expression.
   CXXConstructExpr(StmtClass SC, EmptyShell Empty)
-    : Expr(SC, Empty), Constructor(nullptr), NumArgs(0), Elidable(false),
-      HadMultipleCandidates(false), ListInitialization(false),
-      ZeroInitialization(false), ConstructKind(0), Args(nullptr)
-  { }
+      : Expr(SC, Empty), NumArgs(0), Elidable(false),
+        HadMultipleCandidates(false), ListInitialization(false),
+        ZeroInitialization(false), ConstructKind(0) {}
 
 public:
+  friend class ASTStmtReader;
+
   /// \brief Construct an empty C++ construction expression.
   explicit CXXConstructExpr(EmptyShell Empty)
-    : CXXConstructExpr(CXXConstructExprClass, Empty) {}
+      : CXXConstructExpr(CXXConstructExprClass, Empty) {}
 
   static CXXConstructExpr *Create(const ASTContext &C, QualType T,
                                   SourceLocation Loc,
@@ -1278,10 +1340,10 @@
     ConstructKind = CK;
   }
 
-  typedef ExprIterator arg_iterator;
-  typedef ConstExprIterator const_arg_iterator;
-  typedef llvm::iterator_range<arg_iterator> arg_range;
-  typedef llvm::iterator_range<const_arg_iterator> arg_const_range;
+  using arg_iterator = ExprIterator;
+  using const_arg_iterator = ConstExprIterator;
+  using arg_range = llvm::iterator_range<arg_iterator>;
+  using arg_const_range = llvm::iterator_range<const_arg_iterator>;
 
   arg_range arguments() { return arg_range(arg_begin(), arg_end()); }
   arg_const_range arguments() const {
@@ -1329,8 +1391,6 @@
   child_range children() {
     return child_range(&Args[0], &Args[0]+NumArgs);
   }
-
-  friend class ASTStmtReader;
 };
 
 /// \brief Represents a call to an inherited base class constructor from an
@@ -1339,7 +1399,7 @@
 /// base class constructor.
 class CXXInheritedCtorInitExpr : public Expr {
 private:
-  CXXConstructorDecl *Constructor;
+  CXXConstructorDecl *Constructor = nullptr;
 
   /// The location of the using declaration.
   SourceLocation Loc;
@@ -1352,6 +1412,8 @@
   unsigned InheritedFromVirtualBase : 1;
 
 public:
+  friend class ASTStmtReader;
+
   /// \brief Construct a C++ inheriting construction expression.
   CXXInheritedCtorInitExpr(SourceLocation Loc, QualType T,
                            CXXConstructorDecl *Ctor, bool ConstructsVirtualBase,
@@ -1366,7 +1428,7 @@
 
   /// \brief Construct an empty C++ inheriting construction expression.
   explicit CXXInheritedCtorInitExpr(EmptyShell Empty)
-      : Expr(CXXInheritedCtorInitExprClass, Empty), Constructor(nullptr),
+      : Expr(CXXInheritedCtorInitExprClass, Empty),
         ConstructsVirtualBase(false), InheritedFromVirtualBase(false) {}
 
   /// \brief Get the constructor that this expression will call.
@@ -1393,11 +1455,10 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXInheritedCtorInitExprClass;
   }
+
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTStmtReader;
 };
 
 /// \brief Represents an explicit C++ type conversion that uses "functional"
@@ -1417,14 +1478,17 @@
                         TypeSourceInfo *writtenTy,
                         CastKind kind, Expr *castExpr, unsigned pathSize,
                         SourceLocation lParenLoc, SourceLocation rParenLoc)
-    : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind,
-                       castExpr, pathSize, writtenTy),
-      LParenLoc(lParenLoc), RParenLoc(rParenLoc) {}
+      : ExplicitCastExpr(CXXFunctionalCastExprClass, ty, VK, kind,
+                         castExpr, pathSize, writtenTy),
+        LParenLoc(lParenLoc), RParenLoc(rParenLoc) {}
 
   explicit CXXFunctionalCastExpr(EmptyShell Shell, unsigned PathSize)
-    : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize) { }
+      : ExplicitCastExpr(CXXFunctionalCastExprClass, Shell, PathSize) {}
 
 public:
+  friend class CastExpr;
+  friend TrailingObjects;
+
   static CXXFunctionalCastExpr *Create(const ASTContext &Context, QualType T,
                                        ExprValueKind VK,
                                        TypeSourceInfo *Written,
@@ -1440,15 +1504,15 @@
   SourceLocation getRParenLoc() const { return RParenLoc; }
   void setRParenLoc(SourceLocation L) { RParenLoc = L; }
 
+  /// Determine whether this expression models list-initialization.
+  bool isListInitialization() const { return LParenLoc.isInvalid(); }
+
   SourceLocation getLocStart() const LLVM_READONLY;
   SourceLocation getLocEnd() const LLVM_READONLY;
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXFunctionalCastExprClass;
   }
-
-  friend TrailingObjects;
-  friend class CastExpr;
 };
 
 /// @brief Represents a C++ functional cast expression that builds a
@@ -1467,9 +1531,11 @@
 /// };
 /// \endcode
 class CXXTemporaryObjectExpr : public CXXConstructExpr {
-  TypeSourceInfo *Type;
+  TypeSourceInfo *Type = nullptr;
 
 public:
+  friend class ASTStmtReader;
+
   CXXTemporaryObjectExpr(const ASTContext &C,
                          CXXConstructorDecl *Cons,
                          QualType Type,
@@ -1481,7 +1547,7 @@
                          bool StdInitListInitialization,
                          bool ZeroInitialization);
   explicit CXXTemporaryObjectExpr(EmptyShell Empty)
-    : CXXConstructExpr(CXXTemporaryObjectExprClass, Empty), Type() { }
+      : CXXConstructExpr(CXXTemporaryObjectExprClass, Empty) {}
 
   TypeSourceInfo *getTypeSourceInfo() const { return Type; }
 
@@ -1491,8 +1557,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXTemporaryObjectExprClass;
   }
-
-  friend class ASTStmtReader;
 };
 
 /// \brief A C++ lambda expression, which produces a function object
@@ -1558,9 +1622,9 @@
 
   /// \brief Construct an empty lambda expression.
   LambdaExpr(EmptyShell Empty, unsigned NumCaptures)
-    : Expr(LambdaExprClass, Empty),
-      NumCaptures(NumCaptures), CaptureDefault(LCD_None), ExplicitParams(false),
-      ExplicitResultType(false) { 
+      : Expr(LambdaExprClass, Empty), NumCaptures(NumCaptures),
+        CaptureDefault(LCD_None), ExplicitParams(false),
+        ExplicitResultType(false) {
     getStoredStmts()[NumCaptures] = nullptr;
   }
 
@@ -1569,6 +1633,10 @@
   Stmt *const *getStoredStmts() const { return getTrailingObjects<Stmt *>(); }
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   /// \brief Construct a new lambda expression.
   static LambdaExpr *
   Create(const ASTContext &C, CXXRecordDecl *Class, SourceRange IntroducerRange,
@@ -1597,10 +1665,10 @@
 
   /// \brief An iterator that walks over the captures of the lambda,
   /// both implicit and explicit.
-  typedef const LambdaCapture *capture_iterator;
+  using capture_iterator = const LambdaCapture *;
 
   /// \brief An iterator over a range of lambda captures.
-  typedef llvm::iterator_range<capture_iterator> capture_range;
+  using capture_range = llvm::iterator_range<capture_iterator>;
 
   /// \brief Retrieve this lambda's captures.
   capture_range captures() const;
@@ -1639,11 +1707,11 @@
 
   /// \brief Iterator that walks over the capture initialization
   /// arguments.
-  typedef Expr **capture_init_iterator;
+  using capture_init_iterator = Expr **;
 
   /// \brief Const iterator that walks over the capture initialization
   /// arguments.
-  typedef Expr *const *const_capture_init_iterator;
+  using const_capture_init_iterator = Expr *const *;
 
   /// \brief Retrieve the initialization expressions for this lambda's captures.
   llvm::iterator_range<capture_init_iterator> capture_inits() {
@@ -1723,26 +1791,23 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return IntroducerRange.getBegin();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return ClosingBrace; }
 
   child_range children() {
     // Includes initialization exprs plus body stmt
     return child_range(getStoredStmts(), getStoredStmts() + NumCaptures + 1);
   }
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// An expression "T()" which creates a value-initialized rvalue of type
 /// T, which is a non-class type.  See (C++98 [5.2.3p2]).
 class CXXScalarValueInitExpr : public Expr {
+  friend class ASTStmtReader;
+
   SourceLocation RParenLoc;
   TypeSourceInfo *TypeInfo;
 
-  friend class ASTStmtReader;
-
 public:
   /// \brief Create an explicitly-written scalar-value initialization
   /// expression.
@@ -1754,7 +1819,7 @@
         RParenLoc(rParenLoc), TypeInfo(TypeInfo) {}
 
   explicit CXXScalarValueInitExpr(EmptyShell Shell)
-    : Expr(CXXScalarValueInitExprClass, Shell) { }
+      : Expr(CXXScalarValueInitExprClass, Shell) {}
 
   TypeSourceInfo *getTypeSourceInfo() const {
     return TypeInfo;
@@ -1778,11 +1843,16 @@
 /// \brief Represents a new-expression for memory allocation and constructor
 /// calls, e.g: "new CXXNewExpr(foo)".
 class CXXNewExpr : public Expr {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   /// Contains an optional array size expression, an optional initialization
   /// expression, and any number of optional placement arguments, in that order.
-  Stmt **SubExprs;
+  Stmt **SubExprs = nullptr;
+
   /// \brief Points to the allocation function used.
   FunctionDecl *OperatorNew;
+
   /// \brief Points to the deallocation function used in case of error. May be
   /// null.
   FunctionDecl *OperatorDelete;
@@ -1802,27 +1872,35 @@
 
   /// Was the usage ::new, i.e. is the global new to be used?
   unsigned GlobalNew : 1;
+
   /// Do we allocate an array? If so, the first SubExpr is the size expression.
   unsigned Array : 1;
+
   /// Should the alignment be passed to the allocation function?
   unsigned PassAlignment : 1;
+
   /// If this is an array allocation, does the usual deallocation
   /// function for the allocated type want to know the allocated size?
   unsigned UsualArrayDeleteWantsSize : 1;
+
   /// The number of placement new arguments.
   unsigned NumPlacementArgs : 26;
+
   /// What kind of initializer do we have? Could be none, parens, or braces.
   /// In storage, we distinguish between "none, and no initializer expr", and
   /// "none, but an implicit initializer expr".
   unsigned StoredInitializationStyle : 2;
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 public:
   enum InitializationStyle {
-    NoInit,   ///< New-expression has no initializer as written.
-    CallInit, ///< New-expression has a C++98 paren-delimited initializer.
-    ListInit  ///< New-expression has a C++11 list-initializer.
+    /// New-expression has no initializer as written.
+    NoInit,
+
+    /// New-expression has a C++98 paren-delimited initializer.
+    CallInit,
+
+    /// New-expression has a C++11 list-initializer.
+    ListInit
   };
 
   CXXNewExpr(const ASTContext &C, bool globalNew, FunctionDecl *operatorNew,
@@ -1833,7 +1911,7 @@
              QualType ty, TypeSourceInfo *AllocatedTypeInfo,
              SourceRange Range, SourceRange directInitRange);
   explicit CXXNewExpr(EmptyShell Shell)
-    : Expr(CXXNewExprClass, Shell), SubExprs(nullptr) { }
+      : Expr(CXXNewExprClass, Shell) {}
 
   void AllocateArgsArray(const ASTContext &C, bool isArray,
                          unsigned numPlaceArgs, bool hasInitializer);
@@ -1870,6 +1948,7 @@
   void setOperatorDelete(FunctionDecl *D) { OperatorDelete = D; }
 
   bool isArray() const { return Array; }
+
   Expr *getArraySize() {
     return Array ? cast<Expr>(SubExprs[0]) : nullptr;
   }
@@ -1878,6 +1957,7 @@
   }
 
   unsigned getNumPlacementArgs() const { return NumPlacementArgs; }
+
   Expr **getPlacementArgs() {
     return reinterpret_cast<Expr **>(SubExprs + Array + hasInitializer());
   }
@@ -1932,8 +2012,8 @@
     return UsualArrayDeleteWantsSize;
   }
 
-  typedef ExprIterator arg_iterator;
-  typedef ConstExprIterator const_arg_iterator;
+  using arg_iterator = ExprIterator;
+  using const_arg_iterator = ConstExprIterator;
 
   llvm::iterator_range<arg_iterator> placement_arguments() {
     return llvm::make_range(placement_arg_begin(), placement_arg_end());
@@ -1956,7 +2036,8 @@
     return SubExprs + Array + hasInitializer() + getNumPlacementArgs();
   }
 
-  typedef Stmt **raw_arg_iterator;
+  using raw_arg_iterator = Stmt **;
+
   raw_arg_iterator raw_arg_begin() { return SubExprs; }
   raw_arg_iterator raw_arg_end() {
     return SubExprs + Array + hasInitializer() + getNumPlacementArgs();
@@ -1974,6 +2055,7 @@
   SourceRange getSourceRange() const LLVM_READONLY {
     return Range;
   }
+
   SourceLocation getLocStart() const LLVM_READONLY { return getStartLoc(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return getEndLoc(); }
 
@@ -1991,36 +2073,43 @@
 /// destructor calls, e.g. "delete[] pArray".
 class CXXDeleteExpr : public Expr {
   /// Points to the operator delete overload that is used. Could be a member.
-  FunctionDecl *OperatorDelete;
+  FunctionDecl *OperatorDelete = nullptr;
+
   /// The pointer expression to be deleted.
-  Stmt *Argument;
+  Stmt *Argument = nullptr;
+
   /// Location of the expression.
   SourceLocation Loc;
+
   /// Is this a forced global delete, i.e. "::delete"?
   bool GlobalDelete : 1;
+
   /// Is this the array form of delete, i.e. "delete[]"?
   bool ArrayForm : 1;
+
   /// ArrayFormAsWritten can be different from ArrayForm if 'delete' is applied
   /// to pointer-to-array type (ArrayFormAsWritten will be false while ArrayForm
   /// will be true).
   bool ArrayFormAsWritten : 1;
+
   /// Does the usual deallocation function for the element type require
   /// a size_t argument?
   bool UsualArrayDeleteWantsSize : 1;
+
 public:
+  friend class ASTStmtReader;
+
   CXXDeleteExpr(QualType ty, bool globalDelete, bool arrayForm,
                 bool arrayFormAsWritten, bool usualArrayDeleteWantsSize,
                 FunctionDecl *operatorDelete, Expr *arg, SourceLocation loc)
-    : Expr(CXXDeleteExprClass, ty, VK_RValue, OK_Ordinary, false, false,
-           arg->isInstantiationDependent(),
-           arg->containsUnexpandedParameterPack()),
-      OperatorDelete(operatorDelete), Argument(arg), Loc(loc),
-      GlobalDelete(globalDelete),
-      ArrayForm(arrayForm), ArrayFormAsWritten(arrayFormAsWritten),
-      UsualArrayDeleteWantsSize(usualArrayDeleteWantsSize) { }
-  explicit CXXDeleteExpr(EmptyShell Shell)
-    : Expr(CXXDeleteExprClass, Shell), OperatorDelete(nullptr),
-      Argument(nullptr) {}
+      : Expr(CXXDeleteExprClass, ty, VK_RValue, OK_Ordinary, false, false,
+             arg->isInstantiationDependent(),
+             arg->containsUnexpandedParameterPack()),
+        OperatorDelete(operatorDelete), Argument(arg), Loc(loc),
+        GlobalDelete(globalDelete),
+        ArrayForm(arrayForm), ArrayFormAsWritten(arrayFormAsWritten),
+        UsualArrayDeleteWantsSize(usualArrayDeleteWantsSize) {}
+  explicit CXXDeleteExpr(EmptyShell Shell) : Expr(CXXDeleteExprClass, Shell) {}
 
   bool isGlobalDelete() const { return GlobalDelete; }
   bool isArrayForm() const { return ArrayForm; }
@@ -2054,8 +2143,6 @@
 
   // Iterators
   child_range children() { return child_range(&Argument, &Argument+1); }
-
-  friend class ASTStmtReader;
 };
 
 /// \brief Stores the type being destroyed by a pseudo-destructor expression.
@@ -2068,10 +2155,10 @@
   SourceLocation Location;
 
 public:
-  PseudoDestructorTypeStorage() { }
+  PseudoDestructorTypeStorage() = default;
 
   PseudoDestructorTypeStorage(IdentifierInfo *II, SourceLocation Loc)
-    : Type(II), Location(Loc) { }
+      : Type(II), Location(Loc) {}
 
   PseudoDestructorTypeStorage(TypeSourceInfo *Info);
 
@@ -2111,8 +2198,10 @@
 /// for scalar types. A pseudo-destructor expression has no run-time semantics
 /// beyond evaluating the base expression.
 class CXXPseudoDestructorExpr : public Expr {
+  friend class ASTStmtReader;
+
   /// \brief The base expression (that is being destroyed).
-  Stmt *Base;
+  Stmt *Base = nullptr;
 
   /// \brief Whether the operator was an arrow ('->'); otherwise, it was a
   /// period ('.').
@@ -2126,7 +2215,7 @@
 
   /// \brief The type that precedes the '::' in a qualified pseudo-destructor
   /// expression.
-  TypeSourceInfo *ScopeType;
+  TypeSourceInfo *ScopeType = nullptr;
 
   /// \brief The location of the '::' in a qualified pseudo-destructor
   /// expression.
@@ -2139,8 +2228,6 @@
   /// resolve the name.
   PseudoDestructorTypeStorage DestroyedType;
 
-  friend class ASTStmtReader;
-
 public:
   CXXPseudoDestructorExpr(const ASTContext &Context,
                           Expr *Base, bool isArrow, SourceLocation OperatorLoc,
@@ -2151,8 +2238,7 @@
                           PseudoDestructorTypeStorage DestroyedType);
 
   explicit CXXPseudoDestructorExpr(EmptyShell Shell)
-    : Expr(CXXPseudoDestructorExprClass, Shell),
-      Base(nullptr), IsArrow(false), QualifierLoc(), ScopeType(nullptr) { }
+      : Expr(CXXPseudoDestructorExprClass, Shell), IsArrow(false) {}
 
   Expr *getBase() const { return cast<Expr>(Base); }
 
@@ -2270,13 +2356,17 @@
                 SourceLocation RParenLoc,
                 bool Value);
 
-  TypeTraitExpr(EmptyShell Empty) : Expr(TypeTraitExprClass, Empty) { }
+  TypeTraitExpr(EmptyShell Empty) : Expr(TypeTraitExprClass, Empty) {}
 
   size_t numTrailingObjects(OverloadToken<TypeSourceInfo *>) const {
     return getNumArgs();
   }
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   /// \brief Create a new type trait expression.
   static TypeTraitExpr *Create(const ASTContext &C, QualType T,
                                SourceLocation Loc, TypeTrait Kind,
@@ -2323,10 +2413,6 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// \brief An Embarcadero array type trait, as used in the implementation of
@@ -2338,13 +2424,11 @@
 ///   __array_extent(int, 1)    == 20
 /// \endcode
 class ArrayTypeTraitExpr : public Expr {
-  virtual void anchor();
-
   /// \brief The trait. An ArrayTypeTrait enum in MSVC compat unsigned.
   unsigned ATT : 2;
 
   /// \brief The value of the type trait. Unspecified if dependent.
-  uint64_t Value;
+  uint64_t Value = 0;
 
   /// \brief The array dimension being queried, or -1 if not used.
   Expr *Dimension;
@@ -2356,26 +2440,28 @@
   SourceLocation RParen;
 
   /// \brief The type being queried.
-  TypeSourceInfo *QueriedType;
+  TypeSourceInfo *QueriedType = nullptr;
+
+  virtual void anchor();
 
 public:
+  friend class ASTStmtReader;
+
   ArrayTypeTraitExpr(SourceLocation loc, ArrayTypeTrait att,
                      TypeSourceInfo *queried, uint64_t value,
                      Expr *dimension, SourceLocation rparen, QualType ty)
-    : Expr(ArrayTypeTraitExprClass, ty, VK_RValue, OK_Ordinary,
-           false, queried->getType()->isDependentType(),
-           (queried->getType()->isInstantiationDependentType() ||
-            (dimension && dimension->isInstantiationDependent())),
-           queried->getType()->containsUnexpandedParameterPack()),
-      ATT(att), Value(value), Dimension(dimension),
-      Loc(loc), RParen(rparen), QueriedType(queried) { }
-
+      : Expr(ArrayTypeTraitExprClass, ty, VK_RValue, OK_Ordinary,
+             false, queried->getType()->isDependentType(),
+             (queried->getType()->isInstantiationDependentType() ||
+              (dimension && dimension->isInstantiationDependent())),
+             queried->getType()->containsUnexpandedParameterPack()),
+        ATT(att), Value(value), Dimension(dimension),
+        Loc(loc), RParen(rparen), QueriedType(queried) {}
 
   explicit ArrayTypeTraitExpr(EmptyShell Empty)
-    : Expr(ArrayTypeTraitExprClass, Empty), ATT(0), Value(false),
-      QueriedType() { }
+      : Expr(ArrayTypeTraitExprClass, Empty), ATT(0) {}
 
-  virtual ~ArrayTypeTraitExpr() { }
+  virtual ~ArrayTypeTraitExpr() = default;
 
   SourceLocation getLocStart() const LLVM_READONLY { return Loc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RParen; }
@@ -2398,8 +2484,6 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTStmtReader;
 };
 
 /// \brief An expression trait intrinsic.
@@ -2412,6 +2496,7 @@
 class ExpressionTraitExpr : public Expr {
   /// \brief The trait. A ExpressionTrait enum in MSVC compatible unsigned.
   unsigned ET : 31;
+
   /// \brief The value of the type trait. Unspecified if dependent.
   unsigned Value : 1;
 
@@ -2422,23 +2507,25 @@
   SourceLocation RParen;
 
   /// \brief The expression being queried.
-  Expr* QueriedExpression;
+  Expr* QueriedExpression = nullptr;
+
 public:
+  friend class ASTStmtReader;
+
   ExpressionTraitExpr(SourceLocation loc, ExpressionTrait et,
                      Expr *queried, bool value,
                      SourceLocation rparen, QualType resultType)
-    : Expr(ExpressionTraitExprClass, resultType, VK_RValue, OK_Ordinary,
-           false, // Not type-dependent
-           // Value-dependent if the argument is type-dependent.
-           queried->isTypeDependent(),
-           queried->isInstantiationDependent(),
-           queried->containsUnexpandedParameterPack()),
-      ET(et), Value(value), Loc(loc), RParen(rparen),
-      QueriedExpression(queried) { }
+      : Expr(ExpressionTraitExprClass, resultType, VK_RValue, OK_Ordinary,
+             false, // Not type-dependent
+             // Value-dependent if the argument is type-dependent.
+             queried->isTypeDependent(),
+             queried->isInstantiationDependent(),
+             queried->containsUnexpandedParameterPack()),
+        ET(et), Value(value), Loc(loc), RParen(rparen),
+        QueriedExpression(queried) {}
 
   explicit ExpressionTraitExpr(EmptyShell Empty)
-    : Expr(ExpressionTraitExprClass, Empty), ET(0), Value(false),
-      QueriedExpression() { }
+      : Expr(ExpressionTraitExprClass, Empty), ET(0), Value(false) {}
 
   SourceLocation getLocStart() const LLVM_READONLY { return Loc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RParen; }
@@ -2457,11 +2544,8 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTStmtReader;
 };
 
-
 /// \brief A reference to an overloaded function set, either an
 /// \c UnresolvedLookupExpr or an \c UnresolvedMemberExpr.
 class OverloadExpr : public Expr {
@@ -2475,13 +2559,26 @@
   /// include UsingShadowDecls.  Access is relative to the naming
   /// class.
   // FIXME: Allocate this data after the OverloadExpr subclass.
-  DeclAccessPair *Results;
-  unsigned NumResults;
+  DeclAccessPair *Results = nullptr;
+
+  unsigned NumResults = 0;
 
 protected:
   /// \brief Whether the name includes info for explicit template
   /// keyword and arguments.
-  bool HasTemplateKWAndArgsInfo;
+  bool HasTemplateKWAndArgsInfo = false;
+
+  OverloadExpr(StmtClass K, const ASTContext &C,
+               NestedNameSpecifierLoc QualifierLoc,
+               SourceLocation TemplateKWLoc,
+               const DeclarationNameInfo &NameInfo,
+               const TemplateArgumentListInfo *TemplateArgs,
+               UnresolvedSetIterator Begin, UnresolvedSetIterator End,
+               bool KnownDependent,
+               bool KnownInstantiationDependent,
+               bool KnownContainsUnexpandedParameterPack);
+
+  OverloadExpr(StmtClass K, EmptyShell Empty) : Expr(K, Empty) {}
 
   /// \brief Return the optional template keyword and arguments info.
   ASTTemplateKWAndArgsInfo *
@@ -2496,25 +2593,14 @@
   /// Return the optional template arguments.
   TemplateArgumentLoc *getTrailingTemplateArgumentLoc(); // defined far below
 
-  OverloadExpr(StmtClass K, const ASTContext &C,
-               NestedNameSpecifierLoc QualifierLoc,
-               SourceLocation TemplateKWLoc,
-               const DeclarationNameInfo &NameInfo,
-               const TemplateArgumentListInfo *TemplateArgs,
-               UnresolvedSetIterator Begin, UnresolvedSetIterator End,
-               bool KnownDependent,
-               bool KnownInstantiationDependent,
-               bool KnownContainsUnexpandedParameterPack);
-
-  OverloadExpr(StmtClass K, EmptyShell Empty)
-    : Expr(K, Empty), QualifierLoc(), Results(nullptr), NumResults(0),
-      HasTemplateKWAndArgsInfo(false) { }
-
   void initializeResults(const ASTContext &C,
                          UnresolvedSetIterator Begin,
                          UnresolvedSetIterator End);
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   struct FindResult {
     OverloadExpr *Expression;
     bool IsAddressOfOperand;
@@ -2552,7 +2638,8 @@
   /// \brief Gets the naming class of this lookup, if any.
   CXXRecordDecl *getNamingClass() const;
 
-  typedef UnresolvedSetImpl::iterator decls_iterator;
+  using decls_iterator = UnresolvedSetImpl::iterator;
+
   decls_iterator decls_begin() const { return UnresolvedSetIterator(Results); }
   decls_iterator decls_end() const {
     return UnresolvedSetIterator(Results + NumResults);
@@ -2636,9 +2723,6 @@
     return T->getStmtClass() == UnresolvedLookupExprClass ||
            T->getStmtClass() == UnresolvedMemberExprClass;
   }
-
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// \brief A reference to a name which we were able to look up during
@@ -2656,25 +2740,25 @@
     : public OverloadExpr,
       private llvm::TrailingObjects<
           UnresolvedLookupExpr, ASTTemplateKWAndArgsInfo, TemplateArgumentLoc> {
+  friend class ASTStmtReader;
+  friend class OverloadExpr;
+  friend TrailingObjects;
+
   /// True if these lookup results should be extended by
   /// argument-dependent lookup if this is the operand of a function
   /// call.
-  bool RequiresADL;
+  bool RequiresADL = false;
 
   /// True if these lookup results are overloaded.  This is pretty
   /// trivially rederivable if we urgently need to kill this field.
-  bool Overloaded;
+  bool Overloaded = false;
 
   /// The naming class (C++ [class.access.base]p5) of the lookup, if
   /// any.  This can generally be recalculated from the context chain,
   /// but that can be fairly expensive for unqualified lookups.  If we
   /// want to improve memory use here, this could go in a union
   /// against the qualified-lookup bits.
-  CXXRecordDecl *NamingClass;
-
-  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
-    return HasTemplateKWAndArgsInfo ? 1 : 0;
-  }
+  CXXRecordDecl *NamingClass = nullptr;
 
   UnresolvedLookupExpr(const ASTContext &C,
                        CXXRecordDecl *NamingClass,
@@ -2684,20 +2768,17 @@
                        bool RequiresADL, bool Overloaded,
                        const TemplateArgumentListInfo *TemplateArgs,
                        UnresolvedSetIterator Begin, UnresolvedSetIterator End)
-    : OverloadExpr(UnresolvedLookupExprClass, C, QualifierLoc, TemplateKWLoc,
-                   NameInfo, TemplateArgs, Begin, End, false, false, false),
-      RequiresADL(RequiresADL),
-      Overloaded(Overloaded), NamingClass(NamingClass)
-  {}
+      : OverloadExpr(UnresolvedLookupExprClass, C, QualifierLoc, TemplateKWLoc,
+                     NameInfo, TemplateArgs, Begin, End, false, false, false),
+        RequiresADL(RequiresADL),
+        Overloaded(Overloaded), NamingClass(NamingClass) {}
 
   UnresolvedLookupExpr(EmptyShell Empty)
-    : OverloadExpr(UnresolvedLookupExprClass, Empty),
-      RequiresADL(false), Overloaded(false), NamingClass(nullptr)
-  {}
+      : OverloadExpr(UnresolvedLookupExprClass, Empty) {}
 
-  friend TrailingObjects;
-  friend class OverloadExpr;
-  friend class ASTStmtReader;
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
+  }
 
 public:
   static UnresolvedLookupExpr *Create(const ASTContext &C,
@@ -2743,6 +2824,7 @@
       return l.getBeginLoc();
     return getNameInfo().getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (hasExplicitTemplateArgs())
       return getRAngleLoc();
@@ -2788,17 +2870,21 @@
   /// keyword and arguments.
   bool HasTemplateKWAndArgsInfo;
 
-  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
-    return HasTemplateKWAndArgsInfo ? 1 : 0;
-  }
-
   DependentScopeDeclRefExpr(QualType T,
                             NestedNameSpecifierLoc QualifierLoc,
                             SourceLocation TemplateKWLoc,
                             const DeclarationNameInfo &NameInfo,
                             const TemplateArgumentListInfo *Args);
 
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
+  }
+
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   static DependentScopeDeclRefExpr *Create(const ASTContext &C,
                                            NestedNameSpecifierLoc QualifierLoc,
                                            SourceLocation TemplateKWLoc,
@@ -2888,6 +2974,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return QualifierLoc.getBeginLoc();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (hasExplicitTemplateArgs())
       return getRAngleLoc();
@@ -2901,10 +2988,6 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// Represents an expression -- generally a full-expression -- that
@@ -2925,18 +3008,18 @@
   /// It's useful to remember the set of blocks;  we could also
   /// remember the set of temporaries, but there's currently
   /// no need.
-  typedef BlockDecl *CleanupObject;
+  using CleanupObject = BlockDecl *;
 
 private:
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   Stmt *SubExpr;
 
   ExprWithCleanups(EmptyShell, unsigned NumObjects);
   ExprWithCleanups(Expr *SubExpr, bool CleanupsHaveSideEffects,
                    ArrayRef<CleanupObject> Objects);
 
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-
 public:
   static ExprWithCleanups *Create(const ASTContext &C, EmptyShell empty,
                                   unsigned numObjects);
@@ -2959,6 +3042,7 @@
 
   Expr *getSubExpr() { return cast<Expr>(SubExpr); }
   const Expr *getSubExpr() const { return cast<Expr>(SubExpr); }
+
   bool cleanupsHaveSideEffects() const {
     return ExprWithCleanupsBits.CleanupsHaveSideEffects;
   }
@@ -2970,6 +3054,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return SubExpr->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return SubExpr->getLocEnd();}
 
   // Implement isa/cast/dyncast/etc.
@@ -3005,8 +3090,11 @@
 class CXXUnresolvedConstructExpr final
     : public Expr,
       private llvm::TrailingObjects<CXXUnresolvedConstructExpr, Expr *> {
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   /// \brief The type being constructed.
-  TypeSourceInfo *Type;
+  TypeSourceInfo *Type = nullptr;
 
   /// \brief The location of the left parentheses ('(').
   SourceLocation LParenLoc;
@@ -3023,10 +3111,7 @@
                              SourceLocation RParenLoc);
 
   CXXUnresolvedConstructExpr(EmptyShell Empty, unsigned NumArgs)
-    : Expr(CXXUnresolvedConstructExprClass, Empty), Type(), NumArgs(NumArgs) { }
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
+      : Expr(CXXUnresolvedConstructExprClass, Empty), NumArgs(NumArgs) {}
 
 public:
   static CXXUnresolvedConstructExpr *Create(const ASTContext &C,
@@ -3064,11 +3149,13 @@
   /// \brief Retrieve the number of arguments.
   unsigned arg_size() const { return NumArgs; }
 
-  typedef Expr** arg_iterator;
+  using arg_iterator = Expr **;
+
   arg_iterator arg_begin() { return getTrailingObjects<Expr *>(); }
   arg_iterator arg_end() { return arg_begin() + NumArgs; }
 
-  typedef const Expr* const * const_arg_iterator;
+  using const_arg_iterator = const Expr* const *;
+
   const_arg_iterator arg_begin() const { return getTrailingObjects<Expr *>(); }
   const_arg_iterator arg_end() const {
     return arg_begin() + NumArgs;
@@ -3090,6 +3177,7 @@
   }
 
   SourceLocation getLocStart() const LLVM_READONLY;
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (!RParenLoc.isValid() && NumArgs > 0)
       return getArg(NumArgs - 1)->getLocEnd();
@@ -3170,6 +3258,10 @@
                               const TemplateArgumentListInfo *TemplateArgs);
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   CXXDependentScopeMemberExpr(const ASTContext &C, Expr *Base,
                               QualType BaseType, bool IsArrow,
                               SourceLocation OperatorLoc,
@@ -3219,7 +3311,6 @@
   /// name, with source location information.
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
 
-
   /// \brief Retrieve the first part of the nested-name-specifier that was
   /// found in the scope of the member access expression when the member access
   /// was initially parsed.
@@ -3331,10 +3422,6 @@
       return child_range(child_iterator(), child_iterator());
     return child_range(&Base, &Base + 1);
   }
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// \brief Represents a C++ member access expression for which lookup
@@ -3356,6 +3443,10 @@
     : public OverloadExpr,
       private llvm::TrailingObjects<
           UnresolvedMemberExpr, ASTTemplateKWAndArgsInfo, TemplateArgumentLoc> {
+  friend class ASTStmtReader;
+  friend class OverloadExpr;
+  friend TrailingObjects;
+
   /// \brief Whether this member expression used the '->' operator or
   /// the '.' operator.
   bool IsArrow : 1;
@@ -3368,7 +3459,7 @@
   /// e.g., the \c x in x.f.
   ///
   /// This can be null if this is an 'unbased' member expression.
-  Stmt *Base;
+  Stmt *Base = nullptr;
 
   /// \brief The type of the base expression; never null.
   QualType BaseType;
@@ -3376,10 +3467,6 @@
   /// \brief The location of the '->' or '.' operator.
   SourceLocation OperatorLoc;
 
-  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
-    return HasTemplateKWAndArgsInfo ? 1 : 0;
-  }
-
   UnresolvedMemberExpr(const ASTContext &C, bool HasUnresolvedUsing,
                        Expr *Base, QualType BaseType, bool IsArrow,
                        SourceLocation OperatorLoc,
@@ -3390,12 +3477,12 @@
                        UnresolvedSetIterator Begin, UnresolvedSetIterator End);
 
   UnresolvedMemberExpr(EmptyShell Empty)
-    : OverloadExpr(UnresolvedMemberExprClass, Empty), IsArrow(false),
-      HasUnresolvedUsing(false), Base(nullptr) { }
+      : OverloadExpr(UnresolvedMemberExprClass, Empty), IsArrow(false),
+        HasUnresolvedUsing(false) {}
 
-  friend TrailingObjects;
-  friend class OverloadExpr;
-  friend class ASTStmtReader;
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
+  }
 
 public:
   static UnresolvedMemberExpr *
@@ -3468,6 +3555,7 @@
       return l.getBeginLoc();
     return getMemberNameInfo().getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (hasExplicitTemplateArgs())
       return getRAngleLoc();
@@ -3513,26 +3601,23 @@
 /// The noexcept expression tests whether a given expression might throw. Its
 /// result is a boolean constant.
 class CXXNoexceptExpr : public Expr {
+  friend class ASTStmtReader;
+
   bool Value : 1;
   Stmt *Operand;
   SourceRange Range;
 
-  friend class ASTStmtReader;
-
 public:
   CXXNoexceptExpr(QualType Ty, Expr *Operand, CanThrowResult Val,
                   SourceLocation Keyword, SourceLocation RParen)
-    : Expr(CXXNoexceptExprClass, Ty, VK_RValue, OK_Ordinary,
-           /*TypeDependent*/false,
-           /*ValueDependent*/Val == CT_Dependent,
-           Val == CT_Dependent || Operand->isInstantiationDependent(),
-           Operand->containsUnexpandedParameterPack()),
-      Value(Val == CT_Cannot), Operand(Operand), Range(Keyword, RParen)
-  { }
+      : Expr(CXXNoexceptExprClass, Ty, VK_RValue, OK_Ordinary,
+             /*TypeDependent*/false,
+             /*ValueDependent*/Val == CT_Dependent,
+             Val == CT_Dependent || Operand->isInstantiationDependent(),
+             Operand->containsUnexpandedParameterPack()),
+        Value(Val == CT_Cannot), Operand(Operand), Range(Keyword, RParen) {}
 
-  CXXNoexceptExpr(EmptyShell Empty)
-    : Expr(CXXNoexceptExprClass, Empty)
-  { }
+  CXXNoexceptExpr(EmptyShell Empty) : Expr(CXXNoexceptExprClass, Empty) {}
 
   Expr *getOperand() const { return static_cast<Expr*>(Operand); }
 
@@ -3568,6 +3653,9 @@
 /// template is instantiated, the pack expansion will instantiate to zero or
 /// or more function arguments to the function object \c f.
 class PackExpansionExpr : public Expr {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   SourceLocation EllipsisLoc;
 
   /// \brief The number of expansions that will be produced by this pack
@@ -3579,21 +3667,18 @@
 
   Stmt *Pattern;
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
-
 public:
   PackExpansionExpr(QualType T, Expr *Pattern, SourceLocation EllipsisLoc,
                     Optional<unsigned> NumExpansions)
-    : Expr(PackExpansionExprClass, T, Pattern->getValueKind(),
-           Pattern->getObjectKind(), /*TypeDependent=*/true,
-           /*ValueDependent=*/true, /*InstantiationDependent=*/true,
-           /*ContainsUnexpandedParameterPack=*/false),
-      EllipsisLoc(EllipsisLoc),
-      NumExpansions(NumExpansions? *NumExpansions + 1 : 0),
-      Pattern(Pattern) { }
+      : Expr(PackExpansionExprClass, T, Pattern->getValueKind(),
+             Pattern->getObjectKind(), /*TypeDependent=*/true,
+             /*ValueDependent=*/true, /*InstantiationDependent=*/true,
+             /*ContainsUnexpandedParameterPack=*/false),
+        EllipsisLoc(EllipsisLoc),
+        NumExpansions(NumExpansions ? *NumExpansions + 1 : 0),
+        Pattern(Pattern) {}
 
-  PackExpansionExpr(EmptyShell Empty) : Expr(PackExpansionExprClass, Empty) { }
+  PackExpansionExpr(EmptyShell Empty) : Expr(PackExpansionExprClass, Empty) {}
 
   /// \brief Retrieve the pattern of the pack expansion.
   Expr *getPattern() { return reinterpret_cast<Expr *>(Pattern); }
@@ -3617,6 +3702,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return Pattern->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return EllipsisLoc; }
 
   static bool classof(const Stmt *T) {
@@ -3629,7 +3715,6 @@
   }
 };
 
-
 /// \brief Represents an expression that computes the length of a parameter
 /// pack.
 ///
@@ -3642,6 +3727,10 @@
 class SizeOfPackExpr final
     : public Expr,
       private llvm::TrailingObjects<SizeOfPackExpr, TemplateArgument> {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   /// \brief The location of the \c sizeof keyword.
   SourceLocation OperatorLoc;
 
@@ -3664,11 +3753,7 @@
   unsigned Length;
 
   /// \brief The parameter pack.
-  NamedDecl *Pack;
-
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
+  NamedDecl *Pack = nullptr;
 
   /// \brief Create an expression that computes the length of
   /// the given parameter pack.
@@ -3689,7 +3774,7 @@
 
   /// \brief Create an empty expression.
   SizeOfPackExpr(EmptyShell Empty, unsigned NumPartialArgs)
-      : Expr(SizeOfPackExprClass, Empty), Length(NumPartialArgs), Pack() {}
+      : Expr(SizeOfPackExprClass, Empty), Length(NumPartialArgs) {}
 
 public:
   static SizeOfPackExpr *Create(ASTContext &Context, SourceLocation OperatorLoc,
@@ -3754,6 +3839,9 @@
 /// \brief Represents a reference to a non-type template parameter
 /// that has been substituted with a template argument.
 class SubstNonTypeTemplateParmExpr : public Expr {
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   /// \brief The replaced parameter.
   NonTypeTemplateParmDecl *Param;
 
@@ -3763,10 +3851,8 @@
   /// \brief The location of the non-type template parameter reference.
   SourceLocation NameLoc;
 
-  friend class ASTReader;
-  friend class ASTStmtReader;
   explicit SubstNonTypeTemplateParmExpr(EmptyShell Empty)
-    : Expr(SubstNonTypeTemplateParmExprClass, Empty) { }
+      : Expr(SubstNonTypeTemplateParmExprClass, Empty) {}
 
 public:
   SubstNonTypeTemplateParmExpr(QualType type,
@@ -3774,11 +3860,11 @@
                                SourceLocation loc,
                                NonTypeTemplateParmDecl *param,
                                Expr *replacement)
-    : Expr(SubstNonTypeTemplateParmExprClass, type, valueKind, OK_Ordinary,
-           replacement->isTypeDependent(), replacement->isValueDependent(),
-           replacement->isInstantiationDependent(),
-           replacement->containsUnexpandedParameterPack()),
-      Param(param), Replacement(replacement), NameLoc(loc) {}
+      : Expr(SubstNonTypeTemplateParmExprClass, type, valueKind, OK_Ordinary,
+             replacement->isTypeDependent(), replacement->isValueDependent(),
+             replacement->isInstantiationDependent(),
+             replacement->containsUnexpandedParameterPack()),
+        Param(param), Replacement(replacement), NameLoc(loc) {}
 
   SourceLocation getNameLoc() const { return NameLoc; }
   SourceLocation getLocStart() const LLVM_READONLY { return NameLoc; }
@@ -3809,6 +3895,9 @@
 /// arguments), this type will be replaced with the appropriate underlying
 /// expression at the current pack substitution index.
 class SubstNonTypeTemplateParmPackExpr : public Expr {
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   /// \brief The non-type template parameter pack itself.
   NonTypeTemplateParmDecl *Param;
 
@@ -3822,13 +3911,12 @@
   /// \brief The location of the non-type template parameter pack reference.
   SourceLocation NameLoc;
 
-  friend class ASTReader;
-  friend class ASTStmtReader;
   explicit SubstNonTypeTemplateParmPackExpr(EmptyShell Empty)
-    : Expr(SubstNonTypeTemplateParmPackExprClass, Empty) { }
+      : Expr(SubstNonTypeTemplateParmPackExprClass, Empty) {}
 
 public:
   SubstNonTypeTemplateParmPackExpr(QualType T,
+                                   ExprValueKind ValueKind,
                                    NonTypeTemplateParmDecl *Param,
                                    SourceLocation NameLoc,
                                    const TemplateArgument &ArgPack);
@@ -3873,6 +3961,10 @@
 class FunctionParmPackExpr final
     : public Expr,
       private llvm::TrailingObjects<FunctionParmPackExpr, ParmVarDecl *> {
+  friend class ASTReader;
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   /// \brief The function parameter pack which was referenced.
   ParmVarDecl *ParamPack;
 
@@ -3886,10 +3978,6 @@
                        SourceLocation NameLoc, unsigned NumParams,
                        ParmVarDecl *const *Params);
 
-  friend TrailingObjects;
-  friend class ASTReader;
-  friend class ASTStmtReader;
-
 public:
   static FunctionParmPackExpr *Create(const ASTContext &Context, QualType T,
                                       ParmVarDecl *ParamPack,
@@ -3906,7 +3994,7 @@
 
   /// \brief Iterators over the parameters which the parameter pack expanded
   /// into.
-  typedef ParmVarDecl * const *iterator;
+  using iterator = ParmVarDecl * const *;
   iterator begin() const { return getTrailingObjects<ParmVarDecl *>(); }
   iterator end() const { return begin() + NumParameters; }
 
@@ -3950,6 +4038,9 @@
 /// declaration which is responsible for the lifetime extension.
 class MaterializeTemporaryExpr : public Expr {
 private:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   struct ExtraState {
     /// \brief The temporary-generating expression whose value will be
     /// materialized.
@@ -3963,24 +4054,21 @@
   };
   llvm::PointerUnion<Stmt *, ExtraState *> State;
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
-
   void initializeExtraState(const ValueDecl *ExtendedBy,
                             unsigned ManglingNumber);
 
 public:
   MaterializeTemporaryExpr(QualType T, Expr *Temporary,
                            bool BoundToLvalueReference)
-    : Expr(MaterializeTemporaryExprClass, T,
-           BoundToLvalueReference? VK_LValue : VK_XValue, OK_Ordinary,
-           Temporary->isTypeDependent(), Temporary->isValueDependent(),
-           Temporary->isInstantiationDependent(),
-           Temporary->containsUnexpandedParameterPack()),
+      : Expr(MaterializeTemporaryExprClass, T,
+             BoundToLvalueReference? VK_LValue : VK_XValue, OK_Ordinary,
+             Temporary->isTypeDependent(), Temporary->isValueDependent(),
+             Temporary->isInstantiationDependent(),
+             Temporary->containsUnexpandedParameterPack()),
         State(Temporary) {}
 
   MaterializeTemporaryExpr(EmptyShell Empty)
-    : Expr(MaterializeTemporaryExprClass, Empty) { }
+      : Expr(MaterializeTemporaryExprClass, Empty) {}
 
   Stmt *getTemporary() const {
     return State.is<Stmt *>() ? State.get<Stmt *>()
@@ -4031,6 +4119,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return getTemporary()->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getTemporary()->getLocEnd();
   }
@@ -4058,14 +4147,15 @@
 ///    ( ... op expr )
 ///    ( expr op ... op expr )
 class CXXFoldExpr : public Expr {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   SourceLocation LParenLoc;
   SourceLocation EllipsisLoc;
   SourceLocation RParenLoc;
   Stmt *SubExprs[2];
   BinaryOperatorKind Opcode;
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 public:
   CXXFoldExpr(QualType T, SourceLocation LParenLoc, Expr *LHS,
               BinaryOperatorKind Opcode, SourceLocation EllipsisLoc, Expr *RHS,
@@ -4078,6 +4168,7 @@
     SubExprs[0] = LHS;
     SubExprs[1] = RHS;
   }
+
   CXXFoldExpr(EmptyShell Empty) : Expr(CXXFoldExprClass, Empty) {}
 
   Expr *getLHS() const { return static_cast<Expr*>(SubExprs[0]); }
@@ -4087,10 +4178,13 @@
   bool isRightFold() const {
     return getLHS() && getLHS()->containsUnexpandedParameterPack();
   }
+
   /// Does this produce a left-associated sequence of operators?
   bool isLeftFold() const { return !isRightFold(); }
+
   /// Get the pattern, that is, the operand that contains an unexpanded pack.
   Expr *getPattern() const { return isLeftFold() ? getRHS() : getLHS(); }
+
   /// Get the operand that doesn't contain a pack, for a binary fold.
   Expr *getInit() const { return isLeftFold() ? getLHS() : getRHS(); }
 
@@ -4100,6 +4194,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return LParenLoc;
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return RParenLoc;
   }
@@ -4126,13 +4221,15 @@
 /// expression is evaluated, and its result is the result of the overall
 /// expression.
 class CoroutineSuspendExpr : public Expr {
+  friend class ASTStmtReader;
+
   SourceLocation KeywordLoc;
 
   enum SubExpr { Common, Ready, Suspend, Resume, Count };
+
   Stmt *SubExprs[SubExpr::Count];
   OpaqueValueExpr *OpaqueValue = nullptr;
 
-  friend class ASTStmtReader;
 public:
   CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, Expr *Common,
                        Expr *Ready, Expr *Suspend, Expr *Resume,
@@ -4147,6 +4244,7 @@
     SubExprs[SubExpr::Suspend] = Suspend;
     SubExprs[SubExpr::Resume] = Resume;
   }
+
   CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, QualType Ty,
                        Expr *Common)
       : Expr(SC, Ty, VK_RValue, OK_Ordinary, true, true, true,
@@ -4159,6 +4257,7 @@
     SubExprs[SubExpr::Suspend] = nullptr;
     SubExprs[SubExpr::Resume] = nullptr;
   }
+
   CoroutineSuspendExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {
     SubExprs[SubExpr::Common] = nullptr;
     SubExprs[SubExpr::Ready] = nullptr;
@@ -4167,18 +4266,22 @@
   }
 
   SourceLocation getKeywordLoc() const { return KeywordLoc; }
+
   Expr *getCommonExpr() const {
     return static_cast<Expr*>(SubExprs[SubExpr::Common]);
   }
+
   /// \brief getOpaqueValue - Return the opaque value placeholder.
   OpaqueValueExpr *getOpaqueValue() const { return OpaqueValue; }
 
   Expr *getReadyExpr() const {
     return static_cast<Expr*>(SubExprs[SubExpr::Ready]);
   }
+
   Expr *getSuspendExpr() const {
     return static_cast<Expr*>(SubExprs[SubExpr::Suspend]);
   }
+
   Expr *getResumeExpr() const {
     return static_cast<Expr*>(SubExprs[SubExpr::Resume]);
   }
@@ -4186,6 +4289,7 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return KeywordLoc;
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getCommonExpr()->getLocEnd();
   }
@@ -4203,6 +4307,7 @@
 /// \brief Represents a 'co_await' expression.
 class CoawaitExpr : public CoroutineSuspendExpr {
   friend class ASTStmtReader;
+
 public:
   CoawaitExpr(SourceLocation CoawaitLoc, Expr *Operand, Expr *Ready,
               Expr *Suspend, Expr *Resume, OpaqueValueExpr *OpaqueValue,
@@ -4211,11 +4316,13 @@
                              Suspend, Resume, OpaqueValue) {
     CoawaitBits.IsImplicit = IsImplicit;
   }
+
   CoawaitExpr(SourceLocation CoawaitLoc, QualType Ty, Expr *Operand,
               bool IsImplicit = false)
       : CoroutineSuspendExpr(CoawaitExprClass, CoawaitLoc, Ty, Operand) {
     CoawaitBits.IsImplicit = IsImplicit;
   }
+
   CoawaitExpr(EmptyShell Empty)
       : CoroutineSuspendExpr(CoawaitExprClass, Empty) {}
 
@@ -4235,11 +4342,11 @@
 /// \brief Represents a 'co_await' expression while the type of the promise
 /// is dependent.
 class DependentCoawaitExpr : public Expr {
+  friend class ASTStmtReader;
+
   SourceLocation KeywordLoc;
   Stmt *SubExprs[2];
 
-  friend class ASTStmtReader;
-
 public:
   DependentCoawaitExpr(SourceLocation KeywordLoc, QualType Ty, Expr *Op,
                        UnresolvedLookupExpr *OpCoawait)
@@ -4260,12 +4367,15 @@
       : Expr(DependentCoawaitExprClass, Empty) {}
 
   Expr *getOperand() const { return cast<Expr>(SubExprs[0]); }
+
   UnresolvedLookupExpr *getOperatorCoawaitLookup() const {
     return cast<UnresolvedLookupExpr>(SubExprs[1]);
   }
+
   SourceLocation getKeywordLoc() const { return KeywordLoc; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return KeywordLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getOperand()->getLocEnd();
   }
@@ -4280,6 +4390,7 @@
 /// \brief Represents a 'co_yield' expression.
 class CoyieldExpr : public CoroutineSuspendExpr {
   friend class ASTStmtReader;
+
 public:
   CoyieldExpr(SourceLocation CoyieldLoc, Expr *Operand, Expr *Ready,
               Expr *Suspend, Expr *Resume, OpaqueValueExpr *OpaqueValue)
@@ -4300,6 +4411,6 @@
   }
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_EXPRCXX_H
diff --git a/include/clang/AST/ExprObjC.h b/include/clang/AST/ExprObjC.h
index cd938175..6959349 100644
--- a/include/clang/AST/ExprObjC.h
+++ b/include/clang/AST/ExprObjC.h
@@ -1,4 +1,4 @@
-//===--- ExprObjC.h - Classes for representing ObjC expressions -*- C++ -*-===//
+//===- ExprObjC.h - Classes for representing ObjC expressions ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,28 +14,51 @@
 #ifndef LLVM_CLANG_AST_EXPROBJC_H
 #define LLVM_CLANG_AST_EXPROBJC_H
 
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/AST/SelectorLocationsKind.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Basic/VersionTuple.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 
 namespace clang {
-  class IdentifierInfo;
-  class ASTContext;
+
+class ASTContext;
+class CXXBaseSpecifier;
 
 /// ObjCStringLiteral, used for Objective-C string literals
 /// i.e. @"foo".
 class ObjCStringLiteral : public Expr {
   Stmt *String;
   SourceLocation AtLoc;
+
 public:
   ObjCStringLiteral(StringLiteral *SL, QualType T, SourceLocation L)
-    : Expr(ObjCStringLiteralClass, T, VK_RValue, OK_Ordinary, false, false,
-           false, false),
-      String(SL), AtLoc(L) {}
+      : Expr(ObjCStringLiteralClass, T, VK_RValue, OK_Ordinary, false, false,
+             false, false),
+        String(SL), AtLoc(L) {}
   explicit ObjCStringLiteral(EmptyShell Empty)
-    : Expr(ObjCStringLiteralClass, Empty) {}
+      : Expr(ObjCStringLiteralClass, Empty) {}
 
   StringLiteral *getString() { return cast<StringLiteral>(String); }
   const StringLiteral *getString() const { return cast<StringLiteral>(String); }
@@ -47,26 +70,26 @@
   SourceLocation getLocStart() const LLVM_READONLY { return AtLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return String->getLocEnd(); }
 
+  // Iterators
+  child_range children() { return child_range(&String, &String+1); }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ObjCStringLiteralClass;
   }
-
-  // Iterators
-  child_range children() { return child_range(&String, &String+1); }
 };
 
 /// ObjCBoolLiteralExpr - Objective-C Boolean Literal.
-///
 class ObjCBoolLiteralExpr : public Expr {
   bool Value;
   SourceLocation Loc;
+
 public:
-  ObjCBoolLiteralExpr(bool val, QualType Ty, SourceLocation l) :
-  Expr(ObjCBoolLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
-       false, false), Value(val), Loc(l) {}
-    
+  ObjCBoolLiteralExpr(bool val, QualType Ty, SourceLocation l)
+      : Expr(ObjCBoolLiteralExprClass, Ty, VK_RValue, OK_Ordinary, false, false,
+             false, false),
+        Value(val), Loc(l) {}
   explicit ObjCBoolLiteralExpr(EmptyShell Empty)
-  : Expr(ObjCBoolLiteralExprClass, Empty) { }
+      : Expr(ObjCBoolLiteralExprClass, Empty) {}
     
   bool getValue() const { return Value; }
   void setValue(bool V) { Value = V; }
@@ -77,14 +100,14 @@
   SourceLocation getLocation() const { return Loc; }
   void setLocation(SourceLocation L) { Loc = L; }
     
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCBoolLiteralExprClass;
-  }
-    
   // Iterators
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCBoolLiteralExprClass;
+  }
 };
 
 /// ObjCBoxedExpr - used for generalized expression boxing.
@@ -95,15 +118,19 @@
   Stmt *SubExpr;
   ObjCMethodDecl *BoxingMethod;
   SourceRange Range;
+
 public:
+  friend class ASTStmtReader;
+
   ObjCBoxedExpr(Expr *E, QualType T, ObjCMethodDecl *method,
                      SourceRange R)
-  : Expr(ObjCBoxedExprClass, T, VK_RValue, OK_Ordinary, 
-         E->isTypeDependent(), E->isValueDependent(), 
-         E->isInstantiationDependent(), E->containsUnexpandedParameterPack()), 
-         SubExpr(E), BoxingMethod(method), Range(R) {}
+      : Expr(ObjCBoxedExprClass, T, VK_RValue, OK_Ordinary, 
+             E->isTypeDependent(), E->isValueDependent(), 
+             E->isInstantiationDependent(),
+             E->containsUnexpandedParameterPack()), 
+        SubExpr(E), BoxingMethod(method), Range(R) {}
   explicit ObjCBoxedExpr(EmptyShell Empty)
-  : Expr(ObjCBoxedExprClass, Empty) {}
+      : Expr(ObjCBoxedExprClass, Empty) {}
   
   Expr *getSubExpr() { return cast<Expr>(SubExpr); }
   const Expr *getSubExpr() const { return cast<Expr>(SubExpr); }
@@ -116,27 +143,27 @@
   
   SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
+
   SourceRange getSourceRange() const LLVM_READONLY {
     return Range;
   }
   
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCBoxedExprClass;
-  }
-  
   // Iterators
   child_range children() { return child_range(&SubExpr, &SubExpr+1); }
 
-  typedef ConstExprIterator const_arg_iterator;
+  using const_arg_iterator = ConstExprIterator;
 
   const_arg_iterator arg_begin() const {
     return reinterpret_cast<Stmt const * const*>(&SubExpr);
   }
+
   const_arg_iterator arg_end() const {
     return reinterpret_cast<Stmt const * const*>(&SubExpr + 1);
   }
-  
-  friend class ASTStmtReader;
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCBoxedExprClass;
+  }
 };
 
 /// ObjCArrayLiteral - used for objective-c array containers; as in:
@@ -153,9 +180,12 @@
                    SourceRange SR);
 
   explicit ObjCArrayLiteral(EmptyShell Empty, unsigned NumElements)
-    : Expr(ObjCArrayLiteralClass, Empty), NumElements(NumElements) {}
+      : Expr(ObjCArrayLiteralClass, Empty), NumElements(NumElements) {}
 
 public:
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   static ObjCArrayLiteral *Create(const ASTContext &C,
                                   ArrayRef<Expr *> Elements,
                                   QualType T, ObjCMethodDecl * Method,
@@ -168,10 +198,6 @@
   SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
   SourceRange getSourceRange() const LLVM_READONLY { return Range; }
 
-  static bool classof(const Stmt *T) {
-      return T->getStmtClass() == ObjCArrayLiteralClass;
-  }
-
   /// \brief Retrieve elements of array of literals.
   Expr **getElements() { return getTrailingObjects<Expr *>(); }
 
@@ -183,14 +209,14 @@
   /// getNumElements - Return number of elements of objective-c array literal.
   unsigned getNumElements() const { return NumElements; }
     
-    /// getExpr - Return the Expr at the specified index.
+  /// getElement - Return the Element at the specified index.
   Expr *getElement(unsigned Index) {
     assert((Index < NumElements) && "Arg access out of range!");
-    return cast<Expr>(getElements()[Index]);
+    return getElements()[Index];
   }
   const Expr *getElement(unsigned Index) const {
     assert((Index < NumElements) && "Arg access out of range!");
-    return cast<Expr>(getElements()[Index]);
+    return getElements()[Index];
   }
     
   ObjCMethodDecl *getArrayWithObjectsMethod() const {
@@ -203,8 +229,9 @@
                        reinterpret_cast<Stmt **>(getElements()) + NumElements);
   }
 
-  friend TrailingObjects;
-  friend class ASTStmtReader;
+  static bool classof(const Stmt *T) {
+      return T->getStmtClass() == ObjCArrayLiteralClass;
+  }
 };
 
 /// \brief An element in an Objective-C dictionary literal.
@@ -226,13 +253,17 @@
   /// \brief Determines whether this dictionary element is a pack expansion.
   bool isPackExpansion() const { return EllipsisLoc.isValid(); }
 };
-} // end namespace clang
+
+} // namespace clang
 
 namespace llvm {
+
 template <> struct isPodLike<clang::ObjCDictionaryElement> : std::true_type {};
-}
+
+} // namespace llvm
 
 namespace clang {
+
 /// \brief Internal struct for storing Key/value pair.
 struct ObjCDictionaryLiteral_KeyValuePair {
   Expr *Key;
@@ -274,12 +305,8 @@
   SourceRange Range;
   ObjCMethodDecl *DictWithObjectsMethod;
 
-  typedef ObjCDictionaryLiteral_KeyValuePair KeyValuePair;
-  typedef ObjCDictionaryLiteral_ExpansionData ExpansionData;
-
-  size_t numTrailingObjects(OverloadToken<KeyValuePair>) const {
-    return NumElements;
-  }
+  using KeyValuePair = ObjCDictionaryLiteral_KeyValuePair;
+  using ExpansionData = ObjCDictionaryLiteral_ExpansionData;
 
   ObjCDictionaryLiteral(ArrayRef<ObjCDictionaryElement> VK, 
                         bool HasPackExpansions,
@@ -288,10 +315,18 @@
 
   explicit ObjCDictionaryLiteral(EmptyShell Empty, unsigned NumElements,
                                  bool HasPackExpansions)
-    : Expr(ObjCDictionaryLiteralClass, Empty), NumElements(NumElements),
-      HasPackExpansions(HasPackExpansions) {}
+      : Expr(ObjCDictionaryLiteralClass, Empty), NumElements(NumElements),
+        HasPackExpansions(HasPackExpansions) {}
+
+  size_t numTrailingObjects(OverloadToken<KeyValuePair>) const {
+    return NumElements;
+  }
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   static ObjCDictionaryLiteral *Create(const ASTContext &C,
                                        ArrayRef<ObjCDictionaryElement> VK, 
                                        bool HasPackExpansions,
@@ -320,17 +355,14 @@
     return Result;
   }
     
-  ObjCMethodDecl *getDictWithObjectsMethod() const
-    { return DictWithObjectsMethod; }
+  ObjCMethodDecl *getDictWithObjectsMethod() const {
+    return DictWithObjectsMethod;
+  }
 
   SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
   SourceRange getSourceRange() const LLVM_READONLY { return Range; }
-  
-  static bool classof(const Stmt *T) {
-      return T->getStmtClass() == ObjCDictionaryLiteralClass;
-  }
-    
+
   // Iterators
   child_range children() {
     // Note: we're taking advantage of the layout of the KeyValuePair struct
@@ -342,12 +374,11 @@
         reinterpret_cast<Stmt **>(getTrailingObjects<KeyValuePair>()) +
             NumElements * 2);
   }
-    
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
-  friend TrailingObjects;
-};
 
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCDictionaryLiteralClass;
+  }
+};
 
 /// ObjCEncodeExpr, used for \@encode in Objective-C.  \@encode has the same
 /// type and behavior as StringLiteral except that the string initializer is
@@ -355,19 +386,19 @@
 class ObjCEncodeExpr : public Expr {
   TypeSourceInfo *EncodedType;
   SourceLocation AtLoc, RParenLoc;
+
 public:
   ObjCEncodeExpr(QualType T, TypeSourceInfo *EncodedType,
                  SourceLocation at, SourceLocation rp)
-    : Expr(ObjCEncodeExprClass, T, VK_LValue, OK_Ordinary,
-           EncodedType->getType()->isDependentType(),
-           EncodedType->getType()->isDependentType(),
-           EncodedType->getType()->isInstantiationDependentType(),
-           EncodedType->getType()->containsUnexpandedParameterPack()), 
-      EncodedType(EncodedType), AtLoc(at), RParenLoc(rp) {}
+      : Expr(ObjCEncodeExprClass, T, VK_LValue, OK_Ordinary,
+             EncodedType->getType()->isDependentType(),
+             EncodedType->getType()->isDependentType(),
+             EncodedType->getType()->isInstantiationDependentType(),
+             EncodedType->getType()->containsUnexpandedParameterPack()), 
+        EncodedType(EncodedType), AtLoc(at), RParenLoc(rp) {}
 
   explicit ObjCEncodeExpr(EmptyShell Empty) : Expr(ObjCEncodeExprClass, Empty){}
 
-
   SourceLocation getAtLoc() const { return AtLoc; }
   void setAtLoc(SourceLocation L) { AtLoc = L; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
@@ -376,6 +407,7 @@
   QualType getEncodedType() const { return EncodedType->getType(); }
 
   TypeSourceInfo *getEncodedTypeSourceInfo() const { return EncodedType; }
+
   void setEncodedTypeSourceInfo(TypeSourceInfo *EncType) { 
     EncodedType = EncType; 
   }
@@ -383,28 +415,29 @@
   SourceLocation getLocStart() const LLVM_READONLY { return AtLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RParenLoc; }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCEncodeExprClass;
-  }
-
   // Iterators
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCEncodeExprClass;
+  }
 };
 
 /// ObjCSelectorExpr used for \@selector in Objective-C.
 class ObjCSelectorExpr : public Expr {
   Selector SelName;
   SourceLocation AtLoc, RParenLoc;
+
 public:
   ObjCSelectorExpr(QualType T, Selector selInfo,
                    SourceLocation at, SourceLocation rp)
-    : Expr(ObjCSelectorExprClass, T, VK_RValue, OK_Ordinary, false, false, 
-           false, false),
-    SelName(selInfo), AtLoc(at), RParenLoc(rp){}
+      : Expr(ObjCSelectorExprClass, T, VK_RValue, OK_Ordinary, false, false,
+             false, false),
+        SelName(selInfo), AtLoc(at), RParenLoc(rp) {}
   explicit ObjCSelectorExpr(EmptyShell Empty)
-   : Expr(ObjCSelectorExprClass, Empty) {}
+      : Expr(ObjCSelectorExprClass, Empty) {}
 
   Selector getSelector() const { return SelName; }
   void setSelector(Selector S) { SelName = S; }
@@ -420,14 +453,14 @@
   /// getNumArgs - Return the number of actual arguments to this call.
   unsigned getNumArgs() const { return SelName.getNumArgs(); }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCSelectorExprClass;
-  }
-
   // Iterators
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCSelectorExprClass;
+  }
 };
 
 /// ObjCProtocolExpr used for protocol expression in Objective-C.
@@ -441,14 +474,18 @@
 class ObjCProtocolExpr : public Expr {
   ObjCProtocolDecl *TheProtocol;
   SourceLocation AtLoc, ProtoLoc, RParenLoc;
+
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   ObjCProtocolExpr(QualType T, ObjCProtocolDecl *protocol,
                  SourceLocation at, SourceLocation protoLoc, SourceLocation rp)
-    : Expr(ObjCProtocolExprClass, T, VK_RValue, OK_Ordinary, false, false,
-           false, false),
-      TheProtocol(protocol), AtLoc(at), ProtoLoc(protoLoc), RParenLoc(rp) {}
+      : Expr(ObjCProtocolExprClass, T, VK_RValue, OK_Ordinary, false, false,
+             false, false),
+        TheProtocol(protocol), AtLoc(at), ProtoLoc(protoLoc), RParenLoc(rp) {}
   explicit ObjCProtocolExpr(EmptyShell Empty)
-    : Expr(ObjCProtocolExprClass, Empty) {}
+      : Expr(ObjCProtocolExprClass, Empty) {}
 
   ObjCProtocolDecl *getProtocol() const { return TheProtocol; }
   void setProtocol(ObjCProtocolDecl *P) { TheProtocol = P; }
@@ -462,17 +499,14 @@
   SourceLocation getLocStart() const LLVM_READONLY { return AtLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RParenLoc; }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCProtocolExprClass;
-  }
-
   // Iterators
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCProtocolExprClass;
+  }
 };
 
 /// ObjCIvarRefExpr - A reference to an ObjC instance variable.
@@ -480,27 +514,31 @@
   ObjCIvarDecl *D;
   Stmt *Base;
   SourceLocation Loc;
+
   /// OpLoc - This is the location of '.' or '->'
   SourceLocation OpLoc;
-  
-  bool IsArrow:1;      // True if this is "X->F", false if this is "X.F".
-  bool IsFreeIvar:1;   // True if ivar reference has no base (self assumed).
+
+  // True if this is "X->F", false if this is "X.F".
+  bool IsArrow : 1;
+
+  // True if ivar reference has no base (self assumed).
+  bool IsFreeIvar : 1;
 
 public:
   ObjCIvarRefExpr(ObjCIvarDecl *d, QualType t,
                   SourceLocation l, SourceLocation oploc,
                   Expr *base,
-                  bool arrow = false, bool freeIvar = false) :
-    Expr(ObjCIvarRefExprClass, t, VK_LValue,
-         d->isBitField() ? OK_BitField : OK_Ordinary,
-         /*TypeDependent=*/false, base->isValueDependent(), 
-         base->isInstantiationDependent(),
-         base->containsUnexpandedParameterPack()), 
-    D(d), Base(base), Loc(l), OpLoc(oploc),
-    IsArrow(arrow), IsFreeIvar(freeIvar) {}
+                  bool arrow = false, bool freeIvar = false)
+      : Expr(ObjCIvarRefExprClass, t, VK_LValue,
+             d->isBitField() ? OK_BitField : OK_Ordinary,
+             /*TypeDependent=*/false, base->isValueDependent(), 
+             base->isInstantiationDependent(),
+             base->containsUnexpandedParameterPack()), 
+        D(d), Base(base), Loc(l), OpLoc(oploc), IsArrow(arrow),
+        IsFreeIvar(freeIvar) {}
 
   explicit ObjCIvarRefExpr(EmptyShell Empty)
-    : Expr(ObjCIvarRefExprClass, Empty) {}
+      : Expr(ObjCIvarRefExprClass, Empty) {}
 
   ObjCIvarDecl *getDecl() { return D; }
   const ObjCIvarDecl *getDecl() const { return D; }
@@ -526,12 +564,12 @@
   SourceLocation getOpLoc() const { return OpLoc; }
   void setOpLoc(SourceLocation L) { OpLoc = L; }
 
+  // Iterators
+  child_range children() { return child_range(&Base, &Base+1); }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ObjCIvarRefExprClass;
   }
-
-  // Iterators
-  child_range children() { return child_range(&Base, &Base+1); }
 };
 
 /// ObjCPropertyRefExpr - A dot-syntax expression to access an ObjC
@@ -542,7 +580,7 @@
   /// pointer is an (optional) ObjCMethodDecl and Setter may be set.
   /// if the bool is false, this is an explicit property reference;
   /// the pointer is an ObjCPropertyDecl and Setter is always null.
-  llvm::PointerIntPair<NamedDecl*, 1, bool> PropertyOrGetter;
+  llvm::PointerIntPair<NamedDecl *, 1, bool> PropertyOrGetter;
 
   /// \brief Indicates whether the property reference will result in a message
   /// to the getter, the setter, or both.
@@ -567,40 +605,39 @@
   /// the location of the 'super' keyword.  When it's an interface,
   /// this is that interface.
   SourceLocation ReceiverLoc;
-  llvm::PointerUnion3<Stmt*, const Type*, ObjCInterfaceDecl*> Receiver;
+  llvm::PointerUnion3<Stmt *, const Type *, ObjCInterfaceDecl *> Receiver;
   
 public:
   ObjCPropertyRefExpr(ObjCPropertyDecl *PD, QualType t,
                       ExprValueKind VK, ExprObjectKind OK,
                       SourceLocation l, Expr *base)
-    : Expr(ObjCPropertyRefExprClass, t, VK, OK,
-           /*TypeDependent=*/false, base->isValueDependent(),
-           base->isInstantiationDependent(),
-           base->containsUnexpandedParameterPack()),
-      PropertyOrGetter(PD, false), SetterAndMethodRefFlags(),
-      IdLoc(l), ReceiverLoc(), Receiver(base) {
+      : Expr(ObjCPropertyRefExprClass, t, VK, OK,
+             /*TypeDependent=*/false, base->isValueDependent(),
+             base->isInstantiationDependent(),
+             base->containsUnexpandedParameterPack()),
+        PropertyOrGetter(PD, false), IdLoc(l), Receiver(base) {
     assert(t->isSpecificPlaceholderType(BuiltinType::PseudoObject));
   }
   
   ObjCPropertyRefExpr(ObjCPropertyDecl *PD, QualType t,
                       ExprValueKind VK, ExprObjectKind OK,
                       SourceLocation l, SourceLocation sl, QualType st)
-    : Expr(ObjCPropertyRefExprClass, t, VK, OK,
-           /*TypeDependent=*/false, false, st->isInstantiationDependentType(),
-           st->containsUnexpandedParameterPack()),
-      PropertyOrGetter(PD, false), SetterAndMethodRefFlags(),
-      IdLoc(l), ReceiverLoc(sl), Receiver(st.getTypePtr()) {
+      : Expr(ObjCPropertyRefExprClass, t, VK, OK,
+             /*TypeDependent=*/false, false, st->isInstantiationDependentType(),
+             st->containsUnexpandedParameterPack()),
+        PropertyOrGetter(PD, false), IdLoc(l), ReceiverLoc(sl),
+        Receiver(st.getTypePtr()) {
     assert(t->isSpecificPlaceholderType(BuiltinType::PseudoObject));
   }
 
   ObjCPropertyRefExpr(ObjCMethodDecl *Getter, ObjCMethodDecl *Setter,
                       QualType T, ExprValueKind VK, ExprObjectKind OK,
                       SourceLocation IdLoc, Expr *Base)
-    : Expr(ObjCPropertyRefExprClass, T, VK, OK, false,
-           Base->isValueDependent(), Base->isInstantiationDependent(),
-           Base->containsUnexpandedParameterPack()),
-      PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
-      IdLoc(IdLoc), ReceiverLoc(), Receiver(Base) {
+      : Expr(ObjCPropertyRefExprClass, T, VK, OK, false,
+             Base->isValueDependent(), Base->isInstantiationDependent(),
+             Base->containsUnexpandedParameterPack()),
+        PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
+        IdLoc(IdLoc), Receiver(Base) {
     assert(T->isSpecificPlaceholderType(BuiltinType::PseudoObject));
   }
 
@@ -608,9 +645,9 @@
                       QualType T, ExprValueKind VK, ExprObjectKind OK,
                       SourceLocation IdLoc,
                       SourceLocation SuperLoc, QualType SuperTy)
-    : Expr(ObjCPropertyRefExprClass, T, VK, OK, false, false, false, false),
-      PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
-      IdLoc(IdLoc), ReceiverLoc(SuperLoc), Receiver(SuperTy.getTypePtr()) {
+      : Expr(ObjCPropertyRefExprClass, T, VK, OK, false, false, false, false),
+        PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
+        IdLoc(IdLoc), ReceiverLoc(SuperLoc), Receiver(SuperTy.getTypePtr()) {
     assert(T->isSpecificPlaceholderType(BuiltinType::PseudoObject));
   }
 
@@ -618,14 +655,14 @@
                       QualType T, ExprValueKind VK, ExprObjectKind OK,
                       SourceLocation IdLoc,
                       SourceLocation ReceiverLoc, ObjCInterfaceDecl *Receiver)
-    : Expr(ObjCPropertyRefExprClass, T, VK, OK, false, false, false, false),
-      PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
-      IdLoc(IdLoc), ReceiverLoc(ReceiverLoc), Receiver(Receiver) {
+      : Expr(ObjCPropertyRefExprClass, T, VK, OK, false, false, false, false),
+        PropertyOrGetter(Getter, true), SetterAndMethodRefFlags(Setter, 0),
+        IdLoc(IdLoc), ReceiverLoc(ReceiverLoc), Receiver(Receiver) {
     assert(T->isSpecificPlaceholderType(BuiltinType::PseudoObject));
   }
 
   explicit ObjCPropertyRefExpr(EmptyShell Empty)
-    : Expr(ObjCPropertyRefExprClass, Empty) {}
+      : Expr(ObjCPropertyRefExprClass, Empty) {}
 
   bool isImplicitProperty() const { return PropertyOrGetter.getInt(); }
   bool isExplicitProperty() const { return !PropertyOrGetter.getInt(); }
@@ -689,6 +726,7 @@
   SourceLocation getLocation() const { return IdLoc; }
   
   SourceLocation getReceiverLocation() const { return ReceiverLoc; }
+
   QualType getSuperReceiverType() const { 
     return QualType(Receiver.get<const Type*>(), 0); 
   }
@@ -696,6 +734,7 @@
   ObjCInterfaceDecl *getClassReceiver() const {
     return Receiver.get<ObjCInterfaceDecl*>();
   }
+
   bool isObjectReceiver() const { return Receiver.is<Stmt*>(); }
   bool isSuperReceiver() const { return Receiver.is<const Type*>(); }
   bool isClassReceiver() const { return Receiver.is<ObjCInterfaceDecl*>(); }
@@ -706,11 +745,8 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return isObjectReceiver() ? getBase()->getLocStart() :getReceiverLocation();
   }
-  SourceLocation getLocEnd() const LLVM_READONLY { return IdLoc; }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCPropertyRefExprClass;
-  }
+  SourceLocation getLocEnd() const LLVM_READONLY { return IdLoc; }
 
   // Iterators
   child_range children() {
@@ -721,15 +757,21 @@
     return child_range(child_iterator(), child_iterator());
   }
 
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCPropertyRefExprClass;
+  }
+
 private:
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
+
   void setExplicitProperty(ObjCPropertyDecl *D, unsigned methRefFlags) {
     PropertyOrGetter.setPointer(D);
     PropertyOrGetter.setInt(false);
     SetterAndMethodRefFlags.setPointer(nullptr);
     SetterAndMethodRefFlags.setInt(methRefFlags);
   }
+
   void setImplicitProperty(ObjCMethodDecl *Getter, ObjCMethodDecl *Setter,
                            unsigned methRefFlags) {
     PropertyOrGetter.setPointer(Getter);
@@ -737,6 +779,7 @@
     SetterAndMethodRefFlags.setPointer(Setter);
     SetterAndMethodRefFlags.setInt(methRefFlags);
   }
+
   void setBase(Expr *Base) { Receiver = Base; }
   void setSuperReceiver(QualType T) { Receiver = T.getTypePtr(); }
   void setClassReceiver(ObjCInterfaceDecl *D) { Receiver = D; }
@@ -756,10 +799,10 @@
   
 /// ObjCSubscriptRefExpr - used for array and dictionary subscripting.
 /// array[4] = array[3]; dictionary[key] = dictionary[alt_key];
-///
 class ObjCSubscriptRefExpr : public Expr {
   // Location of ']' in an indexing expression.
   SourceLocation RBracket;
+
   // array/dictionary base expression.
   // for arrays, this is a numeric expression. For dictionaries, this is
   // an objective-c object pointer expression.
@@ -773,24 +816,24 @@
   ObjCMethodDecl *SetAtIndexMethodDecl;
   
 public:
-  
   ObjCSubscriptRefExpr(Expr *base, Expr *key, QualType T,
                        ExprValueKind VK, ExprObjectKind OK,
                        ObjCMethodDecl *getMethod,
                        ObjCMethodDecl *setMethod, SourceLocation RB)
-    : Expr(ObjCSubscriptRefExprClass, T, VK, OK, 
-           base->isTypeDependent() || key->isTypeDependent(), 
-           base->isValueDependent() || key->isValueDependent(),
-           base->isInstantiationDependent() || key->isInstantiationDependent(),
-           (base->containsUnexpandedParameterPack() ||
-            key->containsUnexpandedParameterPack())),
-      RBracket(RB), 
-  GetAtIndexMethodDecl(getMethod), 
-  SetAtIndexMethodDecl(setMethod) 
-    {SubExprs[BASE] = base; SubExprs[KEY] = key;}
+      : Expr(ObjCSubscriptRefExprClass, T, VK, OK, 
+             base->isTypeDependent() || key->isTypeDependent(), 
+             base->isValueDependent() || key->isValueDependent(),
+             (base->isInstantiationDependent() ||
+              key->isInstantiationDependent()),
+             (base->containsUnexpandedParameterPack() ||
+              key->containsUnexpandedParameterPack())),
+        RBracket(RB), GetAtIndexMethodDecl(getMethod),
+        SetAtIndexMethodDecl(setMethod) {
+    SubExprs[BASE] = base; SubExprs[KEY] = key;
+  }
 
   explicit ObjCSubscriptRefExpr(EmptyShell Empty)
-    : Expr(ObjCSubscriptRefExprClass, Empty) {}
+      : Expr(ObjCSubscriptRefExprClass, Empty) {}
   
   SourceLocation getRBracket() const { return RBracket; }
   void setRBracket(SourceLocation RB) { RBracket = RB; }
@@ -798,11 +841,8 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return SubExprs[BASE]->getLocStart();
   }
-  SourceLocation getLocEnd() const LLVM_READONLY { return RBracket; }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCSubscriptRefExprClass;
-  }
+  SourceLocation getLocEnd() const LLVM_READONLY { return RBracket; }
   
   Expr *getBaseExpr() const { return cast<Expr>(SubExprs[BASE]); }
   void setBaseExpr(Stmt *S) { SubExprs[BASE] = S; }
@@ -825,10 +865,14 @@
   child_range children() {
     return child_range(SubExprs, SubExprs+END_EXPR);
   }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCSubscriptRefExprClass;
+  }
+
 private:
   friend class ASTStmtReader;
 };
-  
 
 /// \brief An expression that sends a message to the given Objective-C
 /// object or class.
@@ -856,14 +900,13 @@
 /// The "void *" trailing objects are actually ONE void * (the
 /// receiver pointer), and NumArgs Expr *. But due to the
 /// implementation of children(), these must be together contiguously.
-
 class ObjCMessageExpr final
     : public Expr,
       private llvm::TrailingObjects<ObjCMessageExpr, void *, SourceLocation> {
   /// \brief Stores either the selector that this message is sending
   /// to (when \c HasMethod is zero) or an \c ObjCMethodDecl pointer
   /// referring to the method that we type-checked against.
-  uintptr_t SelectorOrMethod;
+  uintptr_t SelectorOrMethod = 0;
 
   enum { NumArgsBitWidth = 16 };
 
@@ -904,16 +947,9 @@
   /// brackets ('[' and ']', respectively).
   SourceLocation LBracLoc, RBracLoc;
 
-  size_t numTrailingObjects(OverloadToken<void *>) const { return NumArgs + 1; }
-
-  void setNumArgs(unsigned Num) {
-    assert((Num >> NumArgsBitWidth) == 0 && "Num of args is out of range!");
-    NumArgs = Num;
-  }
-
   ObjCMessageExpr(EmptyShell Empty, unsigned NumArgs)
-    : Expr(ObjCMessageExprClass, Empty), SelectorOrMethod(0), Kind(0), 
-      HasMethod(0), IsDelegateInitCall(0), IsImplicit(0), SelLocsKind(0) {
+      : Expr(ObjCMessageExprClass, Empty), Kind(0), HasMethod(false),
+        IsDelegateInitCall(false), IsImplicit(false), SelLocsKind(0) {
     setNumArgs(NumArgs);
   }
 
@@ -950,6 +986,13 @@
                   SourceLocation RBracLoc,
                   bool isImplicit);
 
+  size_t numTrailingObjects(OverloadToken<void *>) const { return NumArgs + 1; }
+
+  void setNumArgs(unsigned Num) {
+    assert((Num >> NumArgsBitWidth) == 0 && "Num of args is out of range!");
+    NumArgs = Num;
+  }
+
   void initArgsAndSelLocs(ArrayRef<Expr *> Args,
                           ArrayRef<SourceLocation> SelLocs,
                           SelectorLocationsKind SelLocsK);
@@ -965,6 +1008,7 @@
   SelectorLocationsKind getSelLocsKind() const {
     return (SelectorLocationsKind)SelLocsKind;
   }
+
   bool hasStandardSelLocs() const {
     return getSelLocsKind() != SelLoc_NonStandard;
   }
@@ -997,14 +1041,21 @@
                                 unsigned NumStoredSelLocs);
 
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend TrailingObjects;
+
   /// \brief The kind of receiver this message is sending to.
   enum ReceiverKind {
     /// \brief The receiver is a class.
     Class = 0,
+
     /// \brief The receiver is an object instance.
     Instance,
+
     /// \brief The receiver is a superclass.
     SuperClass,
+
     /// \brief The receiver is the instance of the superclass object.
     SuperInstance
   };
@@ -1029,7 +1080,7 @@
   /// \param Sel The selector used to determine which method gets called.
   ///
   /// \param Method The Objective-C method against which this message
-  /// send was type-checked. May be NULL.
+  /// send was type-checked. May be nullptr.
   ///
   /// \param Args The message send arguments.
   ///
@@ -1065,7 +1116,7 @@
   /// \param Sel The selector used to determine which method gets called.
   ///
   /// \param Method The Objective-C method against which this message
-  /// send was type-checked. May be NULL.
+  /// send was type-checked. May be nullptr.
   ///
   /// \param Args The message send arguments.
   ///
@@ -1099,7 +1150,7 @@
   /// \param Sel The selector used to determine which method gets called.
   ///
   /// \param Method The Objective-C method against which this message
-  /// send was type-checked. May be NULL.
+  /// send was type-checked. May be nullptr.
   ///
   /// \param Args The message send arguments.
   ///
@@ -1175,11 +1226,11 @@
     if (TypeSourceInfo *TSInfo = getClassReceiverTypeInfo())
       return TSInfo->getType();
 
-    return QualType();
+    return {};
   }
 
   /// \brief Returns a type-source information of a class message
-  /// send, or NULL if the message is not a class message.
+  /// send, or nullptr if the message is not a class message.
   TypeSourceInfo *getClassReceiverTypeInfo() const {
     if (getReceiverKind() == Class)
       return reinterpret_cast<TypeSourceInfo *>(getReceiverPointer());
@@ -1220,7 +1271,7 @@
   /// whether the message is a class or an instance method, whether it
   /// is a send to super or not, etc.
   ///
-  /// \returns The Objective-C interface if known, otherwise NULL.
+  /// \returns The Objective-C interface if known, otherwise nullptr.
   ObjCInterfaceDecl *getReceiverInterface() const;
 
   /// \brief Retrieve the type referred to by 'super'. 
@@ -1295,6 +1346,7 @@
     assert(Arg < NumArgs && "Arg access out of range!");
     return getArgs()[Arg];
   }
+
   /// setArg - Set the specified argument.
   void setArg(unsigned Arg, Expr *ArgExpr) {
     assert(Arg < NumArgs && "Arg access out of range!");
@@ -1315,6 +1367,7 @@
       return getLocStart();
     return getSelectorLoc(0);
   }
+
   SourceLocation getSelectorLoc(unsigned Index) const {
     assert(Index < getNumSelectorLocs() && "Index out of range!");
     if (hasStandardSelLocs())
@@ -1341,18 +1394,15 @@
     LBracLoc = R.getBegin();
     RBracLoc = R.getEnd();
   }
+
   SourceLocation getLocStart() const LLVM_READONLY { return LBracLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RBracLoc; }
 
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ObjCMessageExprClass;
-  }
-
   // Iterators
   child_range children();
 
-  typedef ExprIterator arg_iterator;
-  typedef ConstExprIterator const_arg_iterator;
+  using arg_iterator = ExprIterator;
+  using const_arg_iterator = ConstExprIterator;
 
   llvm::iterator_range<arg_iterator> arguments() {
     return llvm::make_range(arg_begin(), arg_end());
@@ -1363,19 +1413,22 @@
   }
 
   arg_iterator arg_begin() { return reinterpret_cast<Stmt **>(getArgs()); }
+
   arg_iterator arg_end()   { 
     return reinterpret_cast<Stmt **>(getArgs() + NumArgs); 
   }
+
   const_arg_iterator arg_begin() const { 
     return reinterpret_cast<Stmt const * const*>(getArgs()); 
   }
+
   const_arg_iterator arg_end() const { 
     return reinterpret_cast<Stmt const * const*>(getArgs() + NumArgs); 
   }
 
-  friend TrailingObjects;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ObjCMessageExprClass;
+  }
 };
 
 /// ObjCIsaExpr - Represent X->isa and X.isa when X is an ObjC 'id' type.
@@ -1392,17 +1445,18 @@
 
   /// IsArrow - True if this is "X->F", false if this is "X.F".
   bool IsArrow;
+
 public:
   ObjCIsaExpr(Expr *base, bool isarrow, SourceLocation l, SourceLocation oploc,
               QualType ty)
-    : Expr(ObjCIsaExprClass, ty, VK_LValue, OK_Ordinary,
-           /*TypeDependent=*/false, base->isValueDependent(),
-           base->isInstantiationDependent(),
-           /*ContainsUnexpandedParameterPack=*/false),
-      Base(base), IsaMemberLoc(l), OpLoc(oploc), IsArrow(isarrow) {}
+      : Expr(ObjCIsaExprClass, ty, VK_LValue, OK_Ordinary,
+             /*TypeDependent=*/false, base->isValueDependent(),
+             base->isInstantiationDependent(),
+             /*ContainsUnexpandedParameterPack=*/false),
+        Base(base), IsaMemberLoc(l), OpLoc(oploc), IsArrow(isarrow) {}
 
   /// \brief Build an empty expression.
-  explicit ObjCIsaExpr(EmptyShell Empty) : Expr(ObjCIsaExprClass, Empty) { }
+  explicit ObjCIsaExpr(EmptyShell Empty) : Expr(ObjCIsaExprClass, Empty) {}
 
   void setBase(Expr *E) { Base = E; }
   Expr *getBase() const { return cast<Expr>(Base); }
@@ -1430,15 +1484,14 @@
 
   SourceLocation getExprLoc() const LLVM_READONLY { return IsaMemberLoc; }
 
+  // Iterators
+  child_range children() { return child_range(&Base, &Base+1); }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ObjCIsaExprClass;
   }
-
-  // Iterators
-  child_range children() { return child_range(&Base, &Base+1); }
 };
 
-
 /// ObjCIndirectCopyRestoreExpr - Represents the passing of a function
 /// argument by indirect copy-restore in ARC.  This is used to support
 /// passing indirect arguments with the wrong lifetime, e.g. when
@@ -1462,27 +1515,27 @@
 /// __autoreleasing;  this qualifier is ignored when initializing
 /// the value.
 class ObjCIndirectCopyRestoreExpr : public Expr {
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   Stmt *Operand;
 
   // unsigned ObjCIndirectCopyRestoreBits.ShouldCopy : 1;
 
-  friend class ASTReader;
-  friend class ASTStmtReader;
+  explicit ObjCIndirectCopyRestoreExpr(EmptyShell Empty)
+      : Expr(ObjCIndirectCopyRestoreExprClass, Empty) {}
 
   void setShouldCopy(bool shouldCopy) {
     ObjCIndirectCopyRestoreExprBits.ShouldCopy = shouldCopy;
   }
 
-  explicit ObjCIndirectCopyRestoreExpr(EmptyShell Empty)
-    : Expr(ObjCIndirectCopyRestoreExprClass, Empty) { }
-
 public:
   ObjCIndirectCopyRestoreExpr(Expr *operand, QualType type, bool shouldCopy)
-    : Expr(ObjCIndirectCopyRestoreExprClass, type, VK_LValue, OK_Ordinary,
-           operand->isTypeDependent(), operand->isValueDependent(),
-           operand->isInstantiationDependent(),
-           operand->containsUnexpandedParameterPack()),
-      Operand(operand) {
+      : Expr(ObjCIndirectCopyRestoreExprClass, type, VK_LValue, OK_Ordinary,
+             operand->isTypeDependent(), operand->isValueDependent(),
+             operand->isInstantiationDependent(),
+             operand->containsUnexpandedParameterPack()),
+        Operand(operand) {
     setShouldCopy(shouldCopy);
   }
 
@@ -1519,26 +1572,26 @@
 class ObjCBridgedCastExpr final
     : public ExplicitCastExpr,
       private llvm::TrailingObjects<ObjCBridgedCastExpr, CXXBaseSpecifier *> {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+  friend class CastExpr;
+  friend TrailingObjects;
+
   SourceLocation LParenLoc;
   SourceLocation BridgeKeywordLoc;
   unsigned Kind : 2;
 
-  friend TrailingObjects;
-  friend class CastExpr;
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
-  
 public:
   ObjCBridgedCastExpr(SourceLocation LParenLoc, ObjCBridgeCastKind Kind,
                       CastKind CK, SourceLocation BridgeKeywordLoc,
                       TypeSourceInfo *TSInfo, Expr *Operand)
-    : ExplicitCastExpr(ObjCBridgedCastExprClass, TSInfo->getType(), VK_RValue,
-                       CK, Operand, 0, TSInfo),
-      LParenLoc(LParenLoc), BridgeKeywordLoc(BridgeKeywordLoc), Kind(Kind) { }
+      : ExplicitCastExpr(ObjCBridgedCastExprClass, TSInfo->getType(), VK_RValue,
+                         CK, Operand, 0, TSInfo),
+        LParenLoc(LParenLoc), BridgeKeywordLoc(BridgeKeywordLoc), Kind(Kind) {}
   
   /// \brief Construct an empty Objective-C bridged cast.
   explicit ObjCBridgedCastExpr(EmptyShell Shell)
-    : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0) { }
+      : ExplicitCastExpr(ObjCBridgedCastExprClass, Shell, 0) {}
 
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -1554,6 +1607,7 @@
   SourceLocation getBridgeKeywordLoc() const { return BridgeKeywordLoc; }
   
   SourceLocation getLocStart() const LLVM_READONLY { return LParenLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getSubExpr()->getLocEnd();
   }
@@ -1577,10 +1631,11 @@
 /// expressions.
 ///
 class ObjCAvailabilityCheckExpr : public Expr {
+  friend class ASTStmtReader;
+
   VersionTuple VersionToCheck;
   SourceLocation AtLoc, RParen;
 
-  friend class ASTStmtReader;
 public:
   ObjCAvailabilityCheckExpr(VersionTuple VersionToCheck, SourceLocation AtLoc,
                             SourceLocation RParen, QualType Ty)
@@ -1608,6 +1663,6 @@
   }
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_EXPROBJC_H
diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h
index d8dd18e..be013c5 100644
--- a/include/clang/AST/ExternalASTSource.h
+++ b/include/clang/AST/ExternalASTSource.h
@@ -1,4 +1,4 @@
-//===--- ExternalASTSource.h - Abstract External AST Interface --*- C++ -*-===//
+//===- ExternalASTSource.h - Abstract External AST Interface ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,24 +11,44 @@
 //  construction of AST nodes from some external source.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_AST_EXTERNALASTSOURCE_H
 #define LLVM_CLANG_AST_EXTERNALASTSOURCE_H
 
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/DeclBase.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
 
 namespace clang {
 
 class ASTConsumer;
+class ASTContext;
 class CXXBaseSpecifier;
 class CXXCtorInitializer;
+class CXXRecordDecl;
 class DeclarationName;
-class ExternalSemaSource; // layering violation required for downcasting
 class FieldDecl;
-class Module;
+class IdentifierInfo;
 class NamedDecl;
+class ObjCInterfaceDecl;
 class RecordDecl;
 class Selector;
 class Stmt;
@@ -42,30 +62,31 @@
 /// actual type and declaration nodes, and read parts of declaration
 /// contexts.
 class ExternalASTSource : public RefCountedBase<ExternalASTSource> {
+  friend class ExternalSemaSource;
+
   /// Generation number for this external AST source. Must be increased
   /// whenever we might have added new redeclarations for existing decls.
-  uint32_t CurrentGeneration;
+  uint32_t CurrentGeneration = 0;
 
   /// \brief Whether this AST source also provides information for
   /// semantic analysis.
-  bool SemaSource;
-
-  friend class ExternalSemaSource;
+  bool SemaSource = false;
 
 public:
-  ExternalASTSource() : CurrentGeneration(0), SemaSource(false) { }
-
+  ExternalASTSource() = default;
   virtual ~ExternalASTSource();
 
   /// \brief RAII class for safely pairing a StartedDeserializing call
   /// with FinishedDeserializing.
   class Deserializing {
     ExternalASTSource *Source;
+
   public:
     explicit Deserializing(ExternalASTSource *source) : Source(source) {
       assert(Source);
       Source->StartedDeserializing();
     }
+
     ~Deserializing() {
       Source->FinishedDeserializing();
     }
@@ -122,7 +143,7 @@
   virtual CXXBaseSpecifier *GetExternalCXXBaseSpecifiers(uint64_t Offset);
 
   /// \brief Update an out-of-date identifier.
-  virtual void updateOutOfDateIdentifier(IdentifierInfo &II) { }
+  virtual void updateOutOfDateIdentifier(IdentifierInfo &II) {}
 
   /// \brief Find all declarations with the given name in the given context,
   /// and add them to the context by calling SetExternalVisibleDeclsForName
@@ -154,12 +175,13 @@
     const Module *ClangModule = nullptr;
 
   public:
-    ASTSourceDescriptor(){};
+    ASTSourceDescriptor() = default;
     ASTSourceDescriptor(StringRef Name, StringRef Path, StringRef ASTFile,
                         ASTFileSignature Signature)
         : PCHModuleName(std::move(Name)), Path(std::move(Path)),
-          ASTFile(std::move(ASTFile)), Signature(Signature){};
+          ASTFile(std::move(ASTFile)), Signature(Signature) {}
     ASTSourceDescriptor(const Module &M);
+
     std::string getModuleName() const;
     StringRef getPath() const { return Path; }
     StringRef getASTFile() const { return ASTFile; }
@@ -246,7 +268,6 @@
   /// The default implementation of this method is a no-op.
   virtual void PrintStats();
   
-  
   /// \brief Perform layout on the given record.
   ///
   /// This routine allows the external AST source to provide an specific 
@@ -289,7 +310,7 @@
     size_t mmap_bytes;
     
     MemoryBufferSizes(size_t malloc_bytes, size_t mmap_bytes)
-    : malloc_bytes(malloc_bytes), mmap_bytes(mmap_bytes) {}
+        : malloc_bytes(malloc_bytes), mmap_bytes(mmap_bytes) {}
   };
   
   /// Return the amount of memory used by memory buffers, breaking down
@@ -329,12 +350,12 @@
   ///
   /// If the low bit is clear, a pointer to the AST node. If the low
   /// bit is set, the upper 63 bits are the offset.
-  mutable uint64_t Ptr;
+  mutable uint64_t Ptr = 0;
 
 public:
-  LazyOffsetPtr() : Ptr(0) { }
+  LazyOffsetPtr() = default;
+  explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
 
-  explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) { }
   explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
@@ -392,15 +413,16 @@
   /// A cache of the value of this pointer, in the most recent generation in
   /// which we queried it.
   struct LazyData {
-    LazyData(ExternalASTSource *Source, T Value)
-        : ExternalSource(Source), LastGeneration(0), LastValue(Value) {}
     ExternalASTSource *ExternalSource;
-    uint32_t LastGeneration;
+    uint32_t LastGeneration = 0;
     T LastValue;
+
+    LazyData(ExternalASTSource *Source, T Value)
+        : ExternalSource(Source), LastValue(Value) {}
   };
 
   // Our value is represented as simply T if there is no external AST source.
-  typedef llvm::PointerUnion<T, LazyData*> ValueType;
+  using ValueType = llvm::PointerUnion<T, LazyData*>;
   ValueType Value;
 
   LazyGenerationalUpdatePtr(ValueType V) : Value(V) {}
@@ -459,25 +481,31 @@
     return LazyGenerationalUpdatePtr(ValueType::getFromOpaqueValue(Ptr));
   }
 };
-} // end namespace clang
+
+} // namespace clang
 
 /// Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be
 /// placed into a PointerUnion.
 namespace llvm {
+
 template<typename Owner, typename T,
          void (clang::ExternalASTSource::*Update)(Owner)>
 struct PointerLikeTypeTraits<
     clang::LazyGenerationalUpdatePtr<Owner, T, Update>> {
-  typedef clang::LazyGenerationalUpdatePtr<Owner, T, Update> Ptr;
+  using Ptr = clang::LazyGenerationalUpdatePtr<Owner, T, Update>;
+
   static void *getAsVoidPointer(Ptr P) { return P.getOpaqueValue(); }
   static Ptr getFromVoidPointer(void *P) { return Ptr::getFromOpaqueValue(P); }
+
   enum {
     NumLowBitsAvailable = PointerLikeTypeTraits<T>::NumLowBitsAvailable - 1
   };
 };
-}
+
+} // namespace llvm
 
 namespace clang {
+
 /// \brief Represents a lazily-loaded vector of data.
 ///
 /// The lazily-loaded vector of data contains data that is partially loaded
@@ -511,13 +539,14 @@
   class iterator
       : public llvm::iterator_adaptor_base<
             iterator, int, std::random_access_iterator_tag, T, int, T *, T &> {
+    friend class LazyVector;
+
     LazyVector *Self;
 
     iterator(LazyVector *Self, int Position)
         : iterator::iterator_adaptor_base(Position), Self(Self) {}
 
     bool isLoaded() const { return this->I < 0; }
-    friend class LazyVector;
 
   public:
     iterator() : iterator(nullptr, 0) {}
@@ -562,23 +591,23 @@
 };
 
 /// \brief A lazy pointer to a statement.
-typedef LazyOffsetPtr<Stmt, uint64_t, &ExternalASTSource::GetExternalDeclStmt>
-  LazyDeclStmtPtr;
+using LazyDeclStmtPtr =
+    LazyOffsetPtr<Stmt, uint64_t, &ExternalASTSource::GetExternalDeclStmt>;
 
 /// \brief A lazy pointer to a declaration.
-typedef LazyOffsetPtr<Decl, uint32_t, &ExternalASTSource::GetExternalDecl>
-  LazyDeclPtr;
+using LazyDeclPtr =
+    LazyOffsetPtr<Decl, uint32_t, &ExternalASTSource::GetExternalDecl>;
 
 /// \brief A lazy pointer to a set of CXXCtorInitializers.
-typedef LazyOffsetPtr<CXXCtorInitializer *, uint64_t,
-                      &ExternalASTSource::GetExternalCXXCtorInitializers>
-  LazyCXXCtorInitializersPtr;
+using LazyCXXCtorInitializersPtr =
+    LazyOffsetPtr<CXXCtorInitializer *, uint64_t,
+                  &ExternalASTSource::GetExternalCXXCtorInitializers>;
 
 /// \brief A lazy pointer to a set of CXXBaseSpecifiers.
-typedef LazyOffsetPtr<CXXBaseSpecifier, uint64_t,
-                      &ExternalASTSource::GetExternalCXXBaseSpecifiers>
-  LazyCXXBaseSpecifiersPtr;
+using LazyCXXBaseSpecifiersPtr =
+    LazyOffsetPtr<CXXBaseSpecifier, uint64_t,
+                  &ExternalASTSource::GetExternalCXXBaseSpecifiers>;
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_EXTERNALASTSOURCE_H
diff --git a/include/clang/AST/GlobalDecl.h b/include/clang/AST/GlobalDecl.h
index adf63a3..3b3e436 100644
--- a/include/clang/AST/GlobalDecl.h
+++ b/include/clang/AST/GlobalDecl.h
@@ -1,4 +1,4 @@
-//===--- GlobalDecl.h - Global declaration holder ---------------*- C++ -*-===//
+//===- GlobalDecl.h - Global declaration holder -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,6 +19,12 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclOpenMP.h"
 #include "clang/Basic/ABI.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
 
 namespace clang {
 
@@ -27,7 +33,7 @@
 /// a CXXDestructorDecl and the destructor type (Base, Complete) or
 /// a VarDecl, a FunctionDecl or a BlockDecl.
 class GlobalDecl {
-  llvm::PointerIntPair<const Decl*, 2> Value;
+  llvm::PointerIntPair<const Decl *, 2> Value;
 
   void Init(const Decl *D) {
     assert(!isa<CXXConstructorDecl>(D) && "Use other ctor with ctor decls!");
@@ -37,19 +43,15 @@
   }
 
 public:
-  GlobalDecl() {}
-
+  GlobalDecl() = default;
   GlobalDecl(const VarDecl *D) { Init(D);}
   GlobalDecl(const FunctionDecl *D) { Init(D); }
   GlobalDecl(const BlockDecl *D) { Init(D); }
   GlobalDecl(const CapturedDecl *D) { Init(D); }
   GlobalDecl(const ObjCMethodDecl *D) { Init(D); }
   GlobalDecl(const OMPDeclareReductionDecl *D) { Init(D); }
-
-  GlobalDecl(const CXXConstructorDecl *D, CXXCtorType Type)
-  : Value(D, Type) {}
-  GlobalDecl(const CXXDestructorDecl *D, CXXDtorType Type)
-  : Value(D, Type) {}
+  GlobalDecl(const CXXConstructorDecl *D, CXXCtorType Type) : Value(D, Type) {}
+  GlobalDecl(const CXXDestructorDecl *D, CXXDtorType Type) : Value(D, Type) {}
 
   GlobalDecl getCanonicalDecl() const {
     GlobalDecl CanonGD;
@@ -90,10 +92,9 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
 namespace llvm {
-  template<class> struct DenseMapInfo;
 
   template<> struct DenseMapInfo<clang::GlobalDecl> {
     static inline clang::GlobalDecl getEmptyKey() {
@@ -113,7 +114,6 @@
                         clang::GlobalDecl RHS) {
       return LHS == RHS;
     }
-      
   };
   
   // GlobalDecl isn't *technically* a POD type. However, its copy constructor,
@@ -122,6 +122,7 @@
   struct isPodLike<clang::GlobalDecl> {
     static const bool value = true;
   };
-} // end namespace llvm
 
-#endif
+} // namespace llvm
+
+#endif // LLVM_CLANG_AST_GLOBALDECL_H
diff --git a/include/clang/AST/NestedNameSpecifier.h b/include/clang/AST/NestedNameSpecifier.h
index b1ff9bd..e2cb45c 100644
--- a/include/clang/AST/NestedNameSpecifier.h
+++ b/include/clang/AST/NestedNameSpecifier.h
@@ -1,4 +1,4 @@
-//===--- NestedNameSpecifier.h - C++ nested name specifiers -----*- C++ -*-===//
+//===- NestedNameSpecifier.h - C++ nested name specifiers -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,38 +11,42 @@
 //  a C++ nested-name-specifier.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_AST_NESTEDNAMESPECIFIER_H
 #define LLVM_CLANG_AST_NESTEDNAMESPECIFIER_H
 
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/Compiler.h"
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
 
 namespace clang {
 
 class ASTContext;
 class CXXRecordDecl;
+class IdentifierInfo;
+class LangOptions;
 class NamespaceAliasDecl;
 class NamespaceDecl;
-class IdentifierInfo;
 struct PrintingPolicy;
 class Type;
 class TypeLoc;
-class LangOptions;
 
 /// \brief Represents a C++ nested name specifier, such as
 /// "\::std::vector<int>::".
 ///
 /// C++ nested name specifiers are the prefixes to qualified
-/// namespaces. For example, "foo::" in "foo::x" is a nested name
+/// names. For example, "foo::" in "foo::x" is a nested name
 /// specifier. Nested name specifiers are made up of a sequence of
 /// specifiers, each of which can be a namespace, type, identifier
 /// (for dependent names), decltype specifier, or the global specifier ('::').
 /// The last two specifiers can only appear at the start of a 
 /// nested-namespace-specifier.
 class NestedNameSpecifier : public llvm::FoldingSetNode {
-
   /// \brief Enumeration describing
   enum StoredSpecifierKind {
     StoredIdentifier = 0,
@@ -66,7 +70,7 @@
   /// specifier '::'. Otherwise, the pointer is one of
   /// IdentifierInfo*, Namespace*, or Type*, depending on the kind of
   /// specifier as encoded within the prefix.
-  void* Specifier;
+  void* Specifier = nullptr;
 
 public:
   /// \brief The kind of specifier that completes this nested name
@@ -74,17 +78,23 @@
   enum SpecifierKind {
     /// \brief An identifier, stored as an IdentifierInfo*.
     Identifier,
+
     /// \brief A namespace, stored as a NamespaceDecl*.
     Namespace,
+
     /// \brief A namespace alias, stored as a NamespaceAliasDecl*.
     NamespaceAlias,
+
     /// \brief A type, stored as a Type*.
     TypeSpec,
+
     /// \brief A type that was preceded by the 'template' keyword,
     /// stored as a Type*.
     TypeSpecWithTemplate,
+
     /// \brief The global specifier '::'. There is no stored value.
     Global,
+
     /// \brief Microsoft's '__super' specifier, stored as a CXXRecordDecl* of
     /// the class it appeared in.
     Super
@@ -92,17 +102,11 @@
 
 private:
   /// \brief Builds the global specifier.
-  NestedNameSpecifier()
-    : Prefix(nullptr, StoredIdentifier), Specifier(nullptr) {}
+  NestedNameSpecifier() : Prefix(nullptr, StoredIdentifier) {}
 
   /// \brief Copy constructor used internally to clone nested name
   /// specifiers.
-  NestedNameSpecifier(const NestedNameSpecifier &Other)
-    : llvm::FoldingSetNode(Other), Prefix(Other.Prefix),
-      Specifier(Other.Specifier) {
-  }
-
-  void operator=(const NestedNameSpecifier &) = delete;
+  NestedNameSpecifier(const NestedNameSpecifier &Other) = default;
 
   /// \brief Either find or insert the given nested name specifier
   /// mockup in the given context.
@@ -110,6 +114,8 @@
                                            const NestedNameSpecifier &Mockup);
 
 public:
+  NestedNameSpecifier &operator=(const NestedNameSpecifier &) = delete;
+
   /// \brief Builds a specifier combining a prefix and an identifier.
   ///
   /// The prefix must be dependent, since nested name specifiers
@@ -224,8 +230,8 @@
 /// \brief A C++ nested-name-specifier augmented with source location
 /// information.
 class NestedNameSpecifierLoc {
-  NestedNameSpecifier *Qualifier;
-  void *Data;
+  NestedNameSpecifier *Qualifier = nullptr;
+  void *Data = nullptr;
 
   /// \brief Determines the data length for the last component in the
   /// given nested-name-specifier.
@@ -237,12 +243,12 @@
 
 public:
   /// \brief Construct an empty nested-name-specifier.
-  NestedNameSpecifierLoc() : Qualifier(nullptr), Data(nullptr) { }
+  NestedNameSpecifierLoc() = default;
 
   /// \brief Construct a nested-name-specifier with source location information
   /// from
   NestedNameSpecifierLoc(NestedNameSpecifier *Qualifier, void *Data)
-    : Qualifier(Qualifier), Data(Data) { }
+      : Qualifier(Qualifier), Data(Data) {}
 
   /// \brief Evalutes true when this nested-name-specifier location is
   /// non-empty.
@@ -339,7 +345,7 @@
 class NestedNameSpecifierLocBuilder {
   /// \brief The current representation of the nested-name-specifier we're
   /// building.
-  NestedNameSpecifier *Representation;
+  NestedNameSpecifier *Representation = nullptr;
 
   /// \brief Buffer used to store source-location information for the
   /// nested-name-specifier.
@@ -347,21 +353,18 @@
   /// Note that we explicitly manage the buffer (rather than using a
   /// SmallVector) because \c Declarator expects it to be possible to memcpy()
   /// a \c CXXScopeSpec, and CXXScopeSpec uses a NestedNameSpecifierLocBuilder.
-  char *Buffer;
+  char *Buffer = nullptr;
 
   /// \brief The size of the buffer used to store source-location information
   /// for the nested-name-specifier.
-  unsigned BufferSize;
+  unsigned BufferSize = 0;
 
   /// \brief The capacity of the buffer used to store source-location
   /// information for the nested-name-specifier.
-  unsigned BufferCapacity;
+  unsigned BufferCapacity = 0;
 
 public:
-  NestedNameSpecifierLocBuilder()
-    : Representation(nullptr), Buffer(nullptr), BufferSize(0),
-      BufferCapacity(0) {}
-
+  NestedNameSpecifierLocBuilder() = default;
   NestedNameSpecifierLocBuilder(const NestedNameSpecifierLocBuilder &Other);
 
   NestedNameSpecifierLocBuilder &
@@ -451,6 +454,7 @@
   /// \param ColonColonLoc The location of the trailing '::'.
   void MakeSuper(ASTContext &Context, CXXRecordDecl *RD, 
                  SourceLocation SuperLoc, SourceLocation ColonColonLoc);
+
   /// \brief Make a new nested-name-specifier from incomplete source-location
   /// information.
   ///
@@ -511,6 +515,6 @@
   return DB;
 }
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_NESTEDNAMESPECIFIER_H
diff --git a/include/clang/AST/ODRHash.h b/include/clang/AST/ODRHash.h
index e4cc12d..c9e8027 100644
--- a/include/clang/AST/ODRHash.h
+++ b/include/clang/AST/ODRHash.h
@@ -37,8 +37,9 @@
 // Typically, only one Add* call is needed.  clear can be called to reuse the
 // object.
 class ODRHash {
-  // Use DenseMaps to convert between Decl and Type pointers and an index value.
-  llvm::DenseMap<const Decl*, unsigned> DeclMap;
+  // Use DenseMaps to convert from DeclarationName and Type pointers
+  // to an index value.
+  llvm::DenseMap<DeclarationName, unsigned> DeclNameMap;
   llvm::DenseMap<const Type*, unsigned> TypeMap;
 
   // Save space by processing bools at the end.
@@ -53,6 +54,10 @@
   // more information than the AddDecl class.
   void AddCXXRecordDecl(const CXXRecordDecl *Record);
 
+  // Use this for ODR checking functions between modules.  This method compares
+  // more information than the AddDecl class.
+  void AddFunctionDecl(const FunctionDecl *Function);
+
   // Process SubDecls of the main Decl.  This method calls the DeclVisitor
   // while AddDecl does not.
   void AddSubDecl(const Decl *D);
diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h
index 7c24e34..abcd1cb 100644
--- a/include/clang/AST/OpenMPClause.h
+++ b/include/clang/AST/OpenMPClause.h
@@ -6,35 +6,55 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 /// \file
 /// \brief This file defines OpenMP AST classes for clauses.
 /// There are clauses for executable directives, clauses for declarative
 /// directives and clauses which can be used in both kinds of directives.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_OPENMPCLAUSE_H
 #define LLVM_CLANG_AST_OPENMPCLAUSE_H
 
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Stmt.h"
+#include "clang/AST/StmtIterator.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <utility>
 
 namespace clang {
 
+class ASTContext;
+
 //===----------------------------------------------------------------------===//
 // AST classes for clauses.
 //===----------------------------------------------------------------------===//
 
 /// \brief This is a basic class for representing single OpenMP clause.
-///
 class OMPClause {
   /// \brief Starting location of the clause (the clause keyword).
   SourceLocation StartLoc;
+
   /// \brief Ending location of the clause.
   SourceLocation EndLoc;
+
   /// \brief Kind of the clause.
   OpenMPClauseKind Kind;
 
@@ -45,11 +65,13 @@
 public:
   /// \brief Returns the starting location of the clause.
   SourceLocation getLocStart() const { return StartLoc; }
+
   /// \brief Returns the ending location of the clause.
   SourceLocation getLocEnd() const { return EndLoc; }
 
   /// \brief Sets the starting location of the clause.
   void setLocStart(SourceLocation Loc) { StartLoc = Loc; }
+
   /// \brief Sets the ending location of the clause.
   void setLocEnd(SourceLocation Loc) { EndLoc = Loc; }
 
@@ -58,16 +80,17 @@
 
   bool isImplicit() const { return StartLoc.isInvalid(); }
 
-  typedef StmtIterator child_iterator;
-  typedef ConstStmtIterator const_child_iterator;
-  typedef llvm::iterator_range<child_iterator> child_range;
-  typedef llvm::iterator_range<const_child_iterator> const_child_range;
+  using child_iterator = StmtIterator;
+  using const_child_iterator = ConstStmtIterator;
+  using child_range = llvm::iterator_range<child_iterator>;
+  using const_child_range = llvm::iterator_range<const_child_iterator>;
 
   child_range children();
   const_child_range children() const {
     auto Children = const_cast<OMPClause *>(this)->children();
     return const_child_range(Children.begin(), Children.end());
   }
+
   static bool classof(const OMPClause *) { return true; }
 };
 
@@ -75,29 +98,34 @@
 /// 'shedule', 'firstprivate' etc.
 class OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// Pre-initialization statement for the clause.
-  Stmt *PreInit;
+  Stmt *PreInit = nullptr;
+
   /// Region that captures the associated stmt.
-  OpenMPDirectiveKind CaptureRegion;
+  OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
 protected:
+  OMPClauseWithPreInit(const OMPClause *This) {
+    assert(get(This) && "get is not tuned for pre-init.");
+  }
+
   /// Set pre-initialization statement for the clause.
   void setPreInitStmt(Stmt *S, OpenMPDirectiveKind ThisRegion = OMPD_unknown) {
     PreInit = S;
     CaptureRegion = ThisRegion;
   }
-  OMPClauseWithPreInit(const OMPClause *This)
-      : PreInit(nullptr), CaptureRegion(OMPD_unknown) {
-    assert(get(This) && "get is not tuned for pre-init.");
-  }
 
 public:
   /// Get pre-initialization statement for the clause.
   const Stmt *getPreInitStmt() const { return PreInit; }
+
   /// Get pre-initialization statement for the clause.
   Stmt *getPreInitStmt() { return PreInit; }
+
   /// Get capture region for the stmt in the clause.
   OpenMPDirectiveKind getCaptureRegion() { return CaptureRegion; }
+
   static OMPClauseWithPreInit *get(OMPClause *C);
   static const OMPClauseWithPreInit *get(const OMPClause *C);
 };
@@ -106,21 +134,25 @@
 /// 'lastprivate', 'reduction' etc.
 class OMPClauseWithPostUpdate : public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// Post-update expression for the clause.
-  Expr *PostUpdate;
+  Expr *PostUpdate = nullptr;
+
 protected:
-  /// Set pre-initialization statement for the clause.
-  void setPostUpdateExpr(Expr *S) { PostUpdate = S; }
-  OMPClauseWithPostUpdate(const OMPClause *This)
-      : OMPClauseWithPreInit(This), PostUpdate(nullptr) {
+  OMPClauseWithPostUpdate(const OMPClause *This) : OMPClauseWithPreInit(This) {
     assert(get(This) && "get is not tuned for post-update.");
   }
 
+  /// Set pre-initialization statement for the clause.
+  void setPostUpdateExpr(Expr *S) { PostUpdate = S; }
+
 public:
   /// Get post-update expression for the clause.
   const Expr *getPostUpdateExpr() const { return PostUpdate; }
+
   /// Get post-update expression for the clause.
   Expr *getPostUpdateExpr() { return PostUpdate; }
+
   static OMPClauseWithPostUpdate *get(OMPClause *C);
   static const OMPClauseWithPostUpdate *get(const OMPClause *C);
 };
@@ -130,12 +162,25 @@
 /// '#pragma omp ...' directives.
 template <class T> class OMPVarListClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Number of variables in the list.
   unsigned NumVars;
 
 protected:
+  /// \brief Build a clause with \a N variables
+  ///
+  /// \param K Kind of the clause.
+  /// \param StartLoc Starting location of the clause (the clause keyword).
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  /// \param N Number of the variables in the clause.
+  OMPVarListClause(OpenMPClauseKind K, SourceLocation StartLoc,
+                   SourceLocation LParenLoc, SourceLocation EndLoc, unsigned N)
+      : OMPClause(K, StartLoc, EndLoc), LParenLoc(LParenLoc), NumVars(N) {}
+
   /// \brief Fetches list of variables associated with this clause.
   MutableArrayRef<Expr *> getVarRefs() {
     return MutableArrayRef<Expr *>(
@@ -150,23 +195,11 @@
               static_cast<T *>(this)->template getTrailingObjects<Expr *>());
   }
 
-  /// \brief Build a clause with \a N variables
-  ///
-  /// \param K Kind of the clause.
-  /// \param StartLoc Starting location of the clause (the clause keyword).
-  /// \param LParenLoc Location of '('.
-  /// \param EndLoc Ending location of the clause.
-  /// \param N Number of the variables in the clause.
-  ///
-  OMPVarListClause(OpenMPClauseKind K, SourceLocation StartLoc,
-                   SourceLocation LParenLoc, SourceLocation EndLoc, unsigned N)
-      : OMPClause(K, StartLoc, EndLoc), LParenLoc(LParenLoc), NumVars(N) {}
-
 public:
-  typedef MutableArrayRef<Expr *>::iterator varlist_iterator;
-  typedef ArrayRef<const Expr *>::iterator varlist_const_iterator;
-  typedef llvm::iterator_range<varlist_iterator> varlist_range;
-  typedef llvm::iterator_range<varlist_const_iterator> varlist_const_range;
+  using varlist_iterator = MutableArrayRef<Expr *>::iterator;
+  using varlist_const_iterator = ArrayRef<const Expr *>::iterator;
+  using varlist_range = llvm::iterator_range<varlist_iterator>;
+  using varlist_const_range = llvm::iterator_range<varlist_const_iterator>;
 
   unsigned varlist_size() const { return NumVars; }
   bool varlist_empty() const { return NumVars == 0; }
@@ -185,6 +218,7 @@
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -203,31 +237,34 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has simple 'if' clause with
 /// condition 'a > 5' and directive name modifier 'parallel'.
-///
 class OMPIfClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Condition of the 'if' clause.
-  Stmt *Condition;
+  Stmt *Condition = nullptr;
+
   /// \brief Location of ':' (if any).
   SourceLocation ColonLoc;
+
   /// \brief Directive name modifier for the clause.
-  OpenMPDirectiveKind NameModifier;
+  OpenMPDirectiveKind NameModifier = OMPD_unknown;
+
   /// \brief Name modifier location.
   SourceLocation NameModifierLoc;
 
   /// \brief Set condition.
-  ///
   void setCondition(Expr *Cond) { Condition = Cond; }
+
   /// \brief Set directive name modifier for the clause.
-  ///
   void setNameModifier(OpenMPDirectiveKind NM) { NameModifier = NM; }
+
   /// \brief Set location of directive name modifier for the clause.
-  ///
   void setNameModifierLoc(SourceLocation Loc) { NameModifierLoc = Loc; }
+
   /// \brief Set location of ':'.
-  ///
   void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
 
 public:
@@ -243,7 +280,6 @@
   /// \param NameModifierLoc Location of directive name modifier.
   /// \param ColonLoc [OpenMP 4.1] Location of ':'.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPIfClause(OpenMPDirectiveKind NameModifier, Expr *Cond, Stmt *HelperCond,
               OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc,
               SourceLocation LParenLoc, SourceLocation NameModifierLoc,
@@ -255,14 +291,13 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   OMPIfClause()
       : OMPClause(OMPC_if, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), LParenLoc(), Condition(nullptr), ColonLoc(),
-        NameModifier(OMPD_unknown), NameModifierLoc() {}
+        OMPClauseWithPreInit(this) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -271,17 +306,18 @@
 
   /// \brief Returns condition.
   Expr *getCondition() const { return cast_or_null<Expr>(Condition); }
+
   /// \brief Return directive name modifier associated with the clause.
   OpenMPDirectiveKind getNameModifier() const { return NameModifier; }
 
   /// \brief Return the location of directive name modifier.
   SourceLocation getNameModifierLoc() const { return NameModifierLoc; }
 
+  child_range children() { return child_range(&Condition, &Condition + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_if;
   }
-
-  child_range children() { return child_range(&Condition, &Condition + 1); }
 };
 
 /// \brief This represents 'final' clause in the '#pragma omp ...' directive.
@@ -291,16 +327,16 @@
 /// \endcode
 /// In this example directive '#pragma omp task' has simple 'final'
 /// clause with condition 'a > 5'.
-///
 class OMPFinalClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Condition of the 'if' clause.
-  Stmt *Condition;
+  Stmt *Condition = nullptr;
 
   /// \brief Set condition.
-  ///
   void setCondition(Expr *Cond) { Condition = Cond; }
 
 public:
@@ -310,31 +346,29 @@
   /// \param LParenLoc Location of '('.
   /// \param Cond Condition of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPFinalClause(Expr *Cond, SourceLocation StartLoc, SourceLocation LParenLoc,
                  SourceLocation EndLoc)
       : OMPClause(OMPC_final, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Condition(Cond) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPFinalClause()
-      : OMPClause(OMPC_final, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Condition(nullptr) {}
+      : OMPClause(OMPC_final, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Returns condition.
   Expr *getCondition() const { return cast_or_null<Expr>(Condition); }
 
+  child_range children() { return child_range(&Condition, &Condition + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_final;
   }
-
-  child_range children() { return child_range(&Condition, &Condition + 1); }
 };
 
 /// \brief This represents 'num_threads' clause in the '#pragma omp ...'
@@ -345,16 +379,16 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has simple 'num_threads'
 /// clause with number of threads '6'.
-///
 class OMPNumThreadsClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Condition of the 'num_threads' clause.
-  Stmt *NumThreads;
+  Stmt *NumThreads = nullptr;
 
   /// \brief Set condition.
-  ///
   void setNumThreads(Expr *NThreads) { NumThreads = NThreads; }
 
 public:
@@ -367,7 +401,6 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPNumThreadsClause(Expr *NumThreads, Stmt *HelperNumThreads,
                       OpenMPDirectiveKind CaptureRegion,
                       SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -379,25 +412,24 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   OMPNumThreadsClause()
       : OMPClause(OMPC_num_threads, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), LParenLoc(SourceLocation()),
-        NumThreads(nullptr) {}
+        OMPClauseWithPreInit(this) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Returns number of threads.
   Expr *getNumThreads() const { return cast_or_null<Expr>(NumThreads); }
 
+  child_range children() { return child_range(&NumThreads, &NumThreads + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_num_threads;
   }
-
-  child_range children() { return child_range(&NumThreads, &NumThreads + 1); }
 };
 
 /// \brief This represents 'safelen' clause in the '#pragma omp ...'
@@ -412,13 +444,14 @@
 /// concurrently with SIMD instructions can have a greater distance
 /// in the logical iteration space than its value. The parameter of
 /// the safelen clause must be a constant positive integer expression.
-///
 class OMPSafelenClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Safe iteration space distance.
-  Stmt *Safelen;
+  Stmt *Safelen = nullptr;
 
   /// \brief Set safelen.
   void setSafelen(Expr *Len) { Safelen = Len; }
@@ -429,31 +462,29 @@
   /// \param Len Expression associated with this clause.
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPSafelenClause(Expr *Len, SourceLocation StartLoc, SourceLocation LParenLoc,
                    SourceLocation EndLoc)
       : OMPClause(OMPC_safelen, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Safelen(Len) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPSafelenClause()
-      : OMPClause(OMPC_safelen, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Safelen(nullptr) {}
+      : OMPClause(OMPC_safelen, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return safe iteration space distance.
   Expr *getSafelen() const { return cast_or_null<Expr>(Safelen); }
 
+  child_range children() { return child_range(&Safelen, &Safelen + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_safelen;
   }
-
-  child_range children() { return child_range(&Safelen, &Safelen + 1); }
 };
 
 /// \brief This represents 'simdlen' clause in the '#pragma omp ...'
@@ -467,13 +498,14 @@
 /// If the 'simdlen' clause is used then it specifies the preferred number of
 /// iterations to be executed concurrently. The parameter of the 'simdlen'
 /// clause must be a constant positive integer expression.
-///
 class OMPSimdlenClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Safe iteration space distance.
-  Stmt *Simdlen;
+  Stmt *Simdlen = nullptr;
 
   /// \brief Set simdlen.
   void setSimdlen(Expr *Len) { Simdlen = Len; }
@@ -484,31 +516,29 @@
   /// \param Len Expression associated with this clause.
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPSimdlenClause(Expr *Len, SourceLocation StartLoc, SourceLocation LParenLoc,
                    SourceLocation EndLoc)
       : OMPClause(OMPC_simdlen, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Simdlen(Len) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPSimdlenClause()
-      : OMPClause(OMPC_simdlen, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Simdlen(nullptr) {}
+      : OMPClause(OMPC_simdlen, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return safe iteration space distance.
   Expr *getSimdlen() const { return cast_or_null<Expr>(Simdlen); }
 
+  child_range children() { return child_range(&Simdlen, &Simdlen + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_simdlen;
   }
-
-  child_range children() { return child_range(&Simdlen, &Simdlen + 1); }
 };
 
 /// \brief This represents 'collapse' clause in the '#pragma omp ...'
@@ -522,13 +552,14 @@
 /// The parameter must be a constant positive integer expression, it specifies
 /// the number of nested loops that should be collapsed into a single iteration
 /// space.
-///
 class OMPCollapseClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Number of for-loops.
-  Stmt *NumForLoops;
+  Stmt *NumForLoops = nullptr;
 
   /// \brief Set the number of associated for-loops.
   void setNumForLoops(Expr *Num) { NumForLoops = Num; }
@@ -540,31 +571,29 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPCollapseClause(Expr *Num, SourceLocation StartLoc,
                     SourceLocation LParenLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_collapse, StartLoc, EndLoc), LParenLoc(LParenLoc),
         NumForLoops(Num) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPCollapseClause()
-      : OMPClause(OMPC_collapse, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), NumForLoops(nullptr) {}
+      : OMPClause(OMPC_collapse, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return the number of associated for-loops.
   Expr *getNumForLoops() const { return cast_or_null<Expr>(NumForLoops); }
 
+  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_collapse;
   }
-
-  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
 };
 
 /// \brief This represents 'default' clause in the '#pragma omp ...' directive.
@@ -574,26 +603,26 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has simple 'default'
 /// clause with kind 'shared'.
-///
 class OMPDefaultClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief A kind of the 'default' clause.
-  OpenMPDefaultClauseKind Kind;
+  OpenMPDefaultClauseKind Kind = OMPC_DEFAULT_unknown;
+
   /// \brief Start location of the kind in source code.
   SourceLocation KindKwLoc;
 
   /// \brief Set kind of the clauses.
   ///
   /// \param K Argument of clause.
-  ///
   void setDefaultKind(OpenMPDefaultClauseKind K) { Kind = K; }
 
   /// \brief Set argument location.
   ///
   /// \param KLoc Argument location.
-  ///
   void setDefaultKindKwLoc(SourceLocation KLoc) { KindKwLoc = KLoc; }
 
 public:
@@ -604,7 +633,6 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPDefaultClause(OpenMPDefaultClauseKind A, SourceLocation ALoc,
                    SourceLocation StartLoc, SourceLocation LParenLoc,
                    SourceLocation EndLoc)
@@ -612,14 +640,12 @@
         Kind(A), KindKwLoc(ALoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPDefaultClause()
-      : OMPClause(OMPC_default, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Kind(OMPC_DEFAULT_unknown),
-        KindKwLoc(SourceLocation()) {}
+      : OMPClause(OMPC_default, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -629,13 +655,13 @@
   /// \brief Returns location of clause kind.
   SourceLocation getDefaultKindKwLoc() const { return KindKwLoc; }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_default;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_default;
+  }
 };
 
 /// \brief This represents 'proc_bind' clause in the '#pragma omp ...'
@@ -646,26 +672,26 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has simple 'proc_bind'
 /// clause with kind 'master'.
-///
 class OMPProcBindClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief A kind of the 'proc_bind' clause.
-  OpenMPProcBindClauseKind Kind;
+  OpenMPProcBindClauseKind Kind = OMPC_PROC_BIND_unknown;
+
   /// \brief Start location of the kind in source code.
   SourceLocation KindKwLoc;
 
   /// \brief Set kind of the clause.
   ///
   /// \param K Kind of clause.
-  ///
   void setProcBindKind(OpenMPProcBindClauseKind K) { Kind = K; }
 
   /// \brief Set clause kind location.
   ///
   /// \param KLoc Kind location.
-  ///
   void setProcBindKindKwLoc(SourceLocation KLoc) { KindKwLoc = KLoc; }
 
 public:
@@ -677,7 +703,6 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPProcBindClause(OpenMPProcBindClauseKind A, SourceLocation ALoc,
                     SourceLocation StartLoc, SourceLocation LParenLoc,
                     SourceLocation EndLoc)
@@ -685,14 +710,12 @@
         Kind(A), KindKwLoc(ALoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPProcBindClause()
-      : OMPClause(OMPC_proc_bind, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Kind(OMPC_PROC_BIND_unknown),
-        KindKwLoc(SourceLocation()) {}
+      : OMPClause(OMPC_proc_bind, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -702,13 +725,13 @@
   /// \brief Returns location of clause kind.
   SourceLocation getProcBindKindKwLoc() const { return KindKwLoc; }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_proc_bind;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_proc_bind;
+  }
 };
 
 /// \brief This represents 'schedule' clause in the '#pragma omp ...' directive.
@@ -718,58 +741,63 @@
 /// \endcode
 /// In this example directive '#pragma omp for' has 'schedule' clause with
 /// arguments 'static' and '3'.
-///
 class OMPScheduleClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief A kind of the 'schedule' clause.
-  OpenMPScheduleClauseKind Kind;
+  OpenMPScheduleClauseKind Kind = OMPC_SCHEDULE_unknown;
+
   /// \brief Modifiers for 'schedule' clause.
   enum {FIRST, SECOND, NUM_MODIFIERS};
   OpenMPScheduleClauseModifier Modifiers[NUM_MODIFIERS];
+
   /// \brief Locations of modifiers.
   SourceLocation ModifiersLoc[NUM_MODIFIERS];
+
   /// \brief Start location of the schedule ind in source code.
   SourceLocation KindLoc;
+
   /// \brief Location of ',' (if any).
   SourceLocation CommaLoc;
+
   /// \brief Chunk size.
-  Expr *ChunkSize;
+  Expr *ChunkSize = nullptr;
 
   /// \brief Set schedule kind.
   ///
   /// \param K Schedule kind.
-  ///
   void setScheduleKind(OpenMPScheduleClauseKind K) { Kind = K; }
+
   /// \brief Set the first schedule modifier.
   ///
   /// \param M Schedule modifier.
-  ///
   void setFirstScheduleModifier(OpenMPScheduleClauseModifier M) {
     Modifiers[FIRST] = M;
   }
+
   /// \brief Set the second schedule modifier.
   ///
   /// \param M Schedule modifier.
-  ///
   void setSecondScheduleModifier(OpenMPScheduleClauseModifier M) {
     Modifiers[SECOND] = M;
   }
+
   /// \brief Set location of the first schedule modifier.
-  ///
   void setFirstScheduleModifierLoc(SourceLocation Loc) {
     ModifiersLoc[FIRST] = Loc;
   }
+
   /// \brief Set location of the second schedule modifier.
-  ///
   void setSecondScheduleModifierLoc(SourceLocation Loc) {
     ModifiersLoc[SECOND] = Loc;
   }
+
   /// \brief Set schedule modifier location.
   ///
   /// \param M Schedule modifier location.
-  ///
   void setScheduleModifer(OpenMPScheduleClauseModifier M) {
     if (Modifiers[FIRST] == OMPC_SCHEDULE_MODIFIER_unknown)
       Modifiers[FIRST] = M;
@@ -778,25 +806,25 @@
       Modifiers[SECOND] = M;
     }
   }
+
   /// \brief Sets the location of '('.
   ///
   /// \param Loc Location of '('.
-  ///
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Set schedule kind start location.
   ///
   /// \param KLoc Schedule kind location.
-  ///
   void setScheduleKindLoc(SourceLocation KLoc) { KindLoc = KLoc; }
+
   /// \brief Set location of ','.
   ///
   /// \param Loc Location of ','.
-  ///
   void setCommaLoc(SourceLocation Loc) { CommaLoc = Loc; }
+
   /// \brief Set chunk size.
   ///
   /// \param E Chunk size.
-  ///
   void setChunkSize(Expr *E) { ChunkSize = E; }
 
 public:
@@ -815,7 +843,6 @@
   /// \param M1Loc Location of the first modifier
   /// \param M2 The second modifier applied to 'schedule' clause.
   /// \param M2Loc Location of the second modifier
-  ///
   OMPScheduleClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                     SourceLocation KLoc, SourceLocation CommaLoc,
                     SourceLocation EndLoc, OpenMPScheduleClauseKind Kind,
@@ -833,62 +860,59 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPScheduleClause()
       : OMPClause(OMPC_schedule, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), Kind(OMPC_SCHEDULE_unknown),
-        ChunkSize(nullptr) {
+        OMPClauseWithPreInit(this) {
     Modifiers[FIRST] = OMPC_SCHEDULE_MODIFIER_unknown;
     Modifiers[SECOND] = OMPC_SCHEDULE_MODIFIER_unknown;
   }
 
   /// \brief Get kind of the clause.
-  ///
   OpenMPScheduleClauseKind getScheduleKind() const { return Kind; }
+
   /// \brief Get the first modifier of the clause.
-  ///
   OpenMPScheduleClauseModifier getFirstScheduleModifier() const {
     return Modifiers[FIRST];
   }
+
   /// \brief Get the second modifier of the clause.
-  ///
   OpenMPScheduleClauseModifier getSecondScheduleModifier() const {
     return Modifiers[SECOND];
   }
+
   /// \brief Get location of '('.
-  ///
   SourceLocation getLParenLoc() { return LParenLoc; }
+
   /// \brief Get kind location.
-  ///
   SourceLocation getScheduleKindLoc() { return KindLoc; }
+
   /// \brief Get the first modifier location.
-  ///
   SourceLocation getFirstScheduleModifierLoc() const {
     return ModifiersLoc[FIRST];
   }
+
   /// \brief Get the second modifier location.
-  ///
   SourceLocation getSecondScheduleModifierLoc() const {
     return ModifiersLoc[SECOND];
   }
-  /// \brief Get location of ','.
-  ///
-  SourceLocation getCommaLoc() { return CommaLoc; }
-  /// \brief Get chunk size.
-  ///
-  Expr *getChunkSize() { return ChunkSize; }
-  /// \brief Get chunk size.
-  ///
-  const Expr *getChunkSize() const { return ChunkSize; }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_schedule;
-  }
+  /// \brief Get location of ','.
+  SourceLocation getCommaLoc() { return CommaLoc; }
+
+  /// \brief Get chunk size.
+  Expr *getChunkSize() { return ChunkSize; }
+
+  /// \brief Get chunk size.
+  const Expr *getChunkSize() const { return ChunkSize; }
 
   child_range children() {
     return child_range(reinterpret_cast<Stmt **>(&ChunkSize),
                        reinterpret_cast<Stmt **>(&ChunkSize) + 1);
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_schedule;
+  }
 };
 
 /// \brief This represents 'ordered' clause in the '#pragma omp ...' directive.
@@ -898,13 +922,14 @@
 /// \endcode
 /// In this example directive '#pragma omp for' has 'ordered' clause with
 /// parameter 2.
-///
 class OMPOrderedClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Number of for-loops.
-  Stmt *NumForLoops;
+  Stmt *NumForLoops = nullptr;
 
   /// \brief Set the number of associated for-loops.
   void setNumForLoops(Expr *Num) { NumForLoops = Num; }
@@ -916,31 +941,29 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPOrderedClause(Expr *Num, SourceLocation StartLoc,
                     SourceLocation LParenLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_ordered, StartLoc, EndLoc), LParenLoc(LParenLoc),
         NumForLoops(Num) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPOrderedClause()
-      : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), NumForLoops(nullptr) {}
+      : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return the number of associated for-loops.
   Expr *getNumForLoops() const { return cast_or_null<Expr>(NumForLoops); }
 
+  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_ordered;
   }
-
-  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
 };
 
 /// \brief This represents 'nowait' clause in the '#pragma omp ...' directive.
@@ -949,29 +972,26 @@
 /// #pragma omp for nowait
 /// \endcode
 /// In this example directive '#pragma omp for' has 'nowait' clause.
-///
 class OMPNowaitClause : public OMPClause {
 public:
   /// \brief Build 'nowait' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPNowaitClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_nowait, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPNowaitClause()
       : OMPClause(OMPC_nowait, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_nowait;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_nowait;
+  }
 };
 
 /// \brief This represents 'untied' clause in the '#pragma omp ...' directive.
@@ -980,29 +1000,26 @@
 /// #pragma omp task untied
 /// \endcode
 /// In this example directive '#pragma omp task' has 'untied' clause.
-///
 class OMPUntiedClause : public OMPClause {
 public:
   /// \brief Build 'untied' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPUntiedClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_untied, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPUntiedClause()
       : OMPClause(OMPC_untied, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_untied;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_untied;
+  }
 };
 
 /// \brief This represents 'mergeable' clause in the '#pragma omp ...'
@@ -1012,29 +1029,26 @@
 /// #pragma omp task mergeable
 /// \endcode
 /// In this example directive '#pragma omp task' has 'mergeable' clause.
-///
 class OMPMergeableClause : public OMPClause {
 public:
   /// \brief Build 'mergeable' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPMergeableClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_mergeable, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPMergeableClause()
       : OMPClause(OMPC_mergeable, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_mergeable;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_mergeable;
+  }
 };
 
 /// \brief This represents 'read' clause in the '#pragma omp atomic' directive.
@@ -1043,28 +1057,25 @@
 /// #pragma omp atomic read
 /// \endcode
 /// In this example directive '#pragma omp atomic' has 'read' clause.
-///
 class OMPReadClause : public OMPClause {
 public:
   /// \brief Build 'read' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPReadClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_read, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPReadClause() : OMPClause(OMPC_read, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_read;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_read;
+  }
 };
 
 /// \brief This represents 'write' clause in the '#pragma omp atomic' directive.
@@ -1073,29 +1084,26 @@
 /// #pragma omp atomic write
 /// \endcode
 /// In this example directive '#pragma omp atomic' has 'write' clause.
-///
 class OMPWriteClause : public OMPClause {
 public:
   /// \brief Build 'write' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPWriteClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_write, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPWriteClause()
       : OMPClause(OMPC_write, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_write;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_write;
+  }
 };
 
 /// \brief This represents 'update' clause in the '#pragma omp atomic'
@@ -1105,29 +1113,26 @@
 /// #pragma omp atomic update
 /// \endcode
 /// In this example directive '#pragma omp atomic' has 'update' clause.
-///
 class OMPUpdateClause : public OMPClause {
 public:
   /// \brief Build 'update' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPUpdateClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_update, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPUpdateClause()
       : OMPClause(OMPC_update, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_update;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_update;
+  }
 };
 
 /// \brief This represents 'capture' clause in the '#pragma omp atomic'
@@ -1137,29 +1142,26 @@
 /// #pragma omp atomic capture
 /// \endcode
 /// In this example directive '#pragma omp atomic' has 'capture' clause.
-///
 class OMPCaptureClause : public OMPClause {
 public:
   /// \brief Build 'capture' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPCaptureClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_capture, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPCaptureClause()
       : OMPClause(OMPC_capture, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_capture;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_capture;
+  }
 };
 
 /// \brief This represents 'seq_cst' clause in the '#pragma omp atomic'
@@ -1169,29 +1171,26 @@
 /// #pragma omp atomic seq_cst
 /// \endcode
 /// In this example directive '#pragma omp atomic' has 'seq_cst' clause.
-///
 class OMPSeqCstClause : public OMPClause {
 public:
   /// \brief Build 'seq_cst' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_seq_cst, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPSeqCstClause()
       : OMPClause(OMPC_seq_cst, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_seq_cst;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_seq_cst;
+  }
 };
 
 /// \brief This represents clause 'private' in the '#pragma omp ...' directives.
@@ -1201,20 +1200,19 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has clause 'private'
 /// with the variables 'a' and 'b'.
-///
 class OMPPrivateClause final
     : public OMPVarListClause<OMPPrivateClause>,
       private llvm::TrailingObjects<OMPPrivateClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPPrivateClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                    SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPPrivateClause>(OMPC_private, StartLoc, LParenLoc,
@@ -1223,7 +1221,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPPrivateClause(unsigned N)
       : OMPVarListClause<OMPPrivateClause>(OMPC_private, SourceLocation(),
                                            SourceLocation(), SourceLocation(),
@@ -1252,28 +1249,28 @@
   /// \param EndLoc Ending location of the clause.
   /// \param VL List of references to the variables.
   /// \param PrivateVL List of references to private copies with initializers.
-  ///
   static OMPPrivateClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                   SourceLocation LParenLoc,
                                   SourceLocation EndLoc, ArrayRef<Expr *> VL,
                                   ArrayRef<Expr *> PrivateVL);
+
   /// \brief Creates an empty clause with the place for \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPPrivateClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  typedef MutableArrayRef<Expr *>::iterator private_copies_iterator;
-  typedef ArrayRef<const Expr *>::iterator private_copies_const_iterator;
-  typedef llvm::iterator_range<private_copies_iterator> private_copies_range;
-  typedef llvm::iterator_range<private_copies_const_iterator>
-      private_copies_const_range;
+  using private_copies_iterator = MutableArrayRef<Expr *>::iterator;
+  using private_copies_const_iterator = ArrayRef<const Expr *>::iterator;
+  using private_copies_range = llvm::iterator_range<private_copies_iterator>;
+  using private_copies_const_range =
+      llvm::iterator_range<private_copies_const_iterator>;
 
   private_copies_range private_copies() {
     return private_copies_range(getPrivateCopies().begin(),
                                 getPrivateCopies().end());
   }
+
   private_copies_const_range private_copies() const {
     return private_copies_const_range(getPrivateCopies().begin(),
                                       getPrivateCopies().end());
@@ -1297,14 +1294,13 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has clause 'firstprivate'
 /// with the variables 'a' and 'b'.
-///
 class OMPFirstprivateClause final
     : public OMPVarListClause<OMPFirstprivateClause>,
       public OMPClauseWithPreInit,
       private llvm::TrailingObjects<OMPFirstprivateClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// \brief Build clause with number of variables \a N.
   ///
@@ -1312,7 +1308,6 @@
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPFirstprivateClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                         SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPFirstprivateClause>(OMPC_firstprivate, StartLoc,
@@ -1322,12 +1317,12 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPFirstprivateClause(unsigned N)
       : OMPVarListClause<OMPFirstprivateClause>(
             OMPC_firstprivate, SourceLocation(), SourceLocation(),
             SourceLocation(), N),
         OMPClauseWithPreInit(this) {}
+
   /// \brief Sets the list of references to private copies with initializers for
   /// new private variables.
   /// \param VL List of references.
@@ -1370,23 +1365,22 @@
   /// of array type.
   /// \param PreInit Statement that must be executed before entering the OpenMP
   /// region with this clause.
-  ///
   static OMPFirstprivateClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL,
          ArrayRef<Expr *> InitVL, Stmt *PreInit);
+
   /// \brief Creates an empty clause with the place for \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPFirstprivateClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  typedef MutableArrayRef<Expr *>::iterator private_copies_iterator;
-  typedef ArrayRef<const Expr *>::iterator private_copies_const_iterator;
-  typedef llvm::iterator_range<private_copies_iterator> private_copies_range;
-  typedef llvm::iterator_range<private_copies_const_iterator>
-      private_copies_const_range;
+  using private_copies_iterator = MutableArrayRef<Expr *>::iterator;
+  using private_copies_const_iterator = ArrayRef<const Expr *>::iterator;
+  using private_copies_range = llvm::iterator_range<private_copies_iterator>;
+  using private_copies_const_range =
+      llvm::iterator_range<private_copies_const_iterator>;
 
   private_copies_range private_copies() {
     return private_copies_range(getPrivateCopies().begin(),
@@ -1397,10 +1391,10 @@
                                       getPrivateCopies().end());
   }
 
-  typedef MutableArrayRef<Expr *>::iterator inits_iterator;
-  typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
-  typedef llvm::iterator_range<inits_iterator> inits_range;
-  typedef llvm::iterator_range<inits_const_iterator> inits_const_range;
+  using inits_iterator = MutableArrayRef<Expr *>::iterator;
+  using inits_const_iterator = ArrayRef<const Expr *>::iterator;
+  using inits_range = llvm::iterator_range<inits_iterator>;
+  using inits_const_range = llvm::iterator_range<inits_const_iterator>;
 
   inits_range inits() {
     return inits_range(getInits().begin(), getInits().end());
@@ -1447,10 +1441,9 @@
   // \endcode
   // Required for proper codegen of final assignment performed by the
   // lastprivate clause.
-  //
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// \brief Build clause with number of variables \a N.
   ///
@@ -1458,7 +1451,6 @@
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPLastprivateClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                        SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPLastprivateClause>(OMPC_lastprivate, StartLoc,
@@ -1468,7 +1460,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPLastprivateClause(unsigned N)
       : OMPVarListClause<OMPLastprivateClause>(
             OMPC_lastprivate, SourceLocation(), SourceLocation(),
@@ -1550,24 +1541,23 @@
   /// region with this clause.
   /// \param PostUpdate Expression that must be executed after exit from the
   /// OpenMP region with this clause.
-  ///
   static OMPLastprivateClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
          ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps,
          Stmt *PreInit, Expr *PostUpdate);
+
   /// \brief Creates an empty clause with the place for \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPLastprivateClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   /// \brief Set list of helper expressions, required for generation of private
   /// copies of original lastprivate variables.
@@ -1577,29 +1567,36 @@
     return helper_expr_const_range(getPrivateCopies().begin(),
                                    getPrivateCopies().end());
   }
+
   helper_expr_range private_copies() {
     return helper_expr_range(getPrivateCopies().begin(),
                              getPrivateCopies().end());
   }
+
   helper_expr_const_range source_exprs() const {
     return helper_expr_const_range(getSourceExprs().begin(),
                                    getSourceExprs().end());
   }
+
   helper_expr_range source_exprs() {
     return helper_expr_range(getSourceExprs().begin(), getSourceExprs().end());
   }
+
   helper_expr_const_range destination_exprs() const {
     return helper_expr_const_range(getDestinationExprs().begin(),
                                    getDestinationExprs().end());
   }
+
   helper_expr_range destination_exprs() {
     return helper_expr_range(getDestinationExprs().begin(),
                              getDestinationExprs().end());
   }
+
   helper_expr_const_range assignment_ops() const {
     return helper_expr_const_range(getAssignmentOps().begin(),
                                    getAssignmentOps().end());
   }
+
   helper_expr_range assignment_ops() {
     return helper_expr_range(getAssignmentOps().begin(),
                              getAssignmentOps().end());
@@ -1622,19 +1619,18 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has clause 'shared'
 /// with the variables 'a' and 'b'.
-///
 class OMPSharedClause final
     : public OMPVarListClause<OMPSharedClause>,
       private llvm::TrailingObjects<OMPSharedClause, Expr *> {
-  friend TrailingObjects;
   friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPSharedClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                   SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPSharedClause>(OMPC_shared, StartLoc, LParenLoc,
@@ -1643,7 +1639,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPSharedClause(unsigned N)
       : OMPVarListClause<OMPSharedClause>(OMPC_shared, SourceLocation(),
                                           SourceLocation(), SourceLocation(),
@@ -1657,15 +1652,14 @@
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param VL List of references to the variables.
-  ///
   static OMPSharedClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                  SourceLocation LParenLoc,
                                  SourceLocation EndLoc, ArrayRef<Expr *> VL);
+
   /// \brief Creates an empty clause with \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPSharedClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   child_range children() {
@@ -1686,18 +1680,20 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has clause 'reduction'
 /// with operator '+' and the variables 'a' and 'b'.
-///
 class OMPReductionClause final
     : public OMPVarListClause<OMPReductionClause>,
       public OMPClauseWithPostUpdate,
       private llvm::TrailingObjects<OMPReductionClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Location of ':'.
   SourceLocation ColonLoc;
+
   /// \brief Nested name specifier for C++.
   NestedNameSpecifierLoc QualifierLoc;
+
   /// \brief Name of custom operator.
   DeclarationNameInfo NameInfo;
 
@@ -1710,7 +1706,6 @@
   /// \param N Number of the variables in the clause.
   /// \param QualifierLoc The nested-name qualifier with location information
   /// \param NameInfo The full name info for reduction identifier.
-  ///
   OMPReductionClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                      SourceLocation ColonLoc, SourceLocation EndLoc, unsigned N,
                      NestedNameSpecifierLoc QualifierLoc,
@@ -1723,17 +1718,18 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPReductionClause(unsigned N)
       : OMPVarListClause<OMPReductionClause>(OMPC_reduction, SourceLocation(),
                                              SourceLocation(), SourceLocation(),
                                              N),
-        OMPClauseWithPostUpdate(this), ColonLoc(), QualifierLoc(), NameInfo() {}
+        OMPClauseWithPostUpdate(this) {}
 
   /// \brief Sets location of ':' symbol in clause.
   void setColonLoc(SourceLocation CL) { ColonLoc = CL; }
+
   /// \brief Sets the name info for specified reduction identifier.
   void setNameInfo(DeclarationNameInfo DNI) { NameInfo = DNI; }
+
   /// \brief Sets the nested name specifier.
   void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; }
 
@@ -1825,7 +1821,6 @@
   /// region with this clause.
   /// \param PostUpdate Expression that must be executed after exit from the
   /// OpenMP region with this clause.
-  ///
   static OMPReductionClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
@@ -1833,48 +1828,57 @@
          const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> Privates,
          ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
          ArrayRef<Expr *> ReductionOps, Stmt *PreInit, Expr *PostUpdate);
+
   /// \brief Creates an empty clause with the place for \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPReductionClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   /// \brief Gets location of ':' symbol in clause.
   SourceLocation getColonLoc() const { return ColonLoc; }
+
   /// \brief Gets the name info for specified reduction identifier.
   const DeclarationNameInfo &getNameInfo() const { return NameInfo; }
+
   /// \brief Gets the nested name specifier.
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   helper_expr_const_range privates() const {
     return helper_expr_const_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_range privates() {
     return helper_expr_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_const_range lhs_exprs() const {
     return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_range lhs_exprs() {
     return helper_expr_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_const_range rhs_exprs() const {
     return helper_expr_const_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_range rhs_exprs() {
     return helper_expr_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_const_range reduction_ops() const {
     return helper_expr_const_range(getReductionOps().begin(),
                                    getReductionOps().end());
   }
+
   helper_expr_range reduction_ops() {
     return helper_expr_range(getReductionOps().begin(),
                              getReductionOps().end());
@@ -1898,18 +1902,20 @@
 /// \endcode
 /// In this example directive '#pragma omp taskgroup' has clause
 /// 'task_reduction' with operator '+' and the variables 'a' and 'b'.
-///
 class OMPTaskReductionClause final
     : public OMPVarListClause<OMPTaskReductionClause>,
       public OMPClauseWithPostUpdate,
       private llvm::TrailingObjects<OMPTaskReductionClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// Location of ':'.
   SourceLocation ColonLoc;
+
   /// Nested name specifier for C++.
   NestedNameSpecifierLoc QualifierLoc;
+
   /// Name of custom operator.
   DeclarationNameInfo NameInfo;
 
@@ -1922,7 +1928,6 @@
   /// \param N Number of the variables in the clause.
   /// \param QualifierLoc The nested-name qualifier with location information
   /// \param NameInfo The full name info for reduction identifier.
-  ///
   OMPTaskReductionClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                          SourceLocation ColonLoc, SourceLocation EndLoc,
                          unsigned N, NestedNameSpecifierLoc QualifierLoc,
@@ -1935,17 +1940,18 @@
   /// Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPTaskReductionClause(unsigned N)
       : OMPVarListClause<OMPTaskReductionClause>(
             OMPC_task_reduction, SourceLocation(), SourceLocation(),
             SourceLocation(), N),
-        OMPClauseWithPostUpdate(this), ColonLoc(), QualifierLoc(), NameInfo() {}
+        OMPClauseWithPostUpdate(this) {}
 
   /// Sets location of ':' symbol in clause.
   void setColonLoc(SourceLocation CL) { ColonLoc = CL; }
+
   /// Sets the name info for specified reduction identifier.
   void setNameInfo(DeclarationNameInfo DNI) { NameInfo = DNI; }
+
   /// Sets the nested name specifier.
   void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; }
 
@@ -2035,7 +2041,6 @@
   /// region with this clause.
   /// \param PostUpdate Expression that must be executed after exit from the
   /// OpenMP region with this clause.
-  ///
   static OMPTaskReductionClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
@@ -2048,44 +2053,52 @@
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPTaskReductionClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   /// Gets location of ':' symbol in clause.
   SourceLocation getColonLoc() const { return ColonLoc; }
+
   /// Gets the name info for specified reduction identifier.
   const DeclarationNameInfo &getNameInfo() const { return NameInfo; }
+
   /// Gets the nested name specifier.
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   helper_expr_const_range privates() const {
     return helper_expr_const_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_range privates() {
     return helper_expr_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_const_range lhs_exprs() const {
     return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_range lhs_exprs() {
     return helper_expr_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_const_range rhs_exprs() const {
     return helper_expr_const_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_range rhs_exprs() {
     return helper_expr_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_const_range reduction_ops() const {
     return helper_expr_const_range(getReductionOps().begin(),
                                    getReductionOps().end());
   }
+
   helper_expr_range reduction_ops() {
     return helper_expr_range(getReductionOps().begin(),
                              getReductionOps().end());
@@ -2108,18 +2121,20 @@
 /// \endcode
 /// In this example directive '#pragma omp task' has clause 'in_reduction' with
 /// operator '+' and the variables 'a' and 'b'.
-///
 class OMPInReductionClause final
     : public OMPVarListClause<OMPInReductionClause>,
       public OMPClauseWithPostUpdate,
       private llvm::TrailingObjects<OMPInReductionClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// Location of ':'.
   SourceLocation ColonLoc;
+
   /// Nested name specifier for C++.
   NestedNameSpecifierLoc QualifierLoc;
+
   /// Name of custom operator.
   DeclarationNameInfo NameInfo;
 
@@ -2132,7 +2147,6 @@
   /// \param N Number of the variables in the clause.
   /// \param QualifierLoc The nested-name qualifier with location information
   /// \param NameInfo The full name info for reduction identifier.
-  ///
   OMPInReductionClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                        SourceLocation ColonLoc, SourceLocation EndLoc,
                        unsigned N, NestedNameSpecifierLoc QualifierLoc,
@@ -2145,17 +2159,18 @@
   /// Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPInReductionClause(unsigned N)
       : OMPVarListClause<OMPInReductionClause>(
             OMPC_in_reduction, SourceLocation(), SourceLocation(),
             SourceLocation(), N),
-        OMPClauseWithPostUpdate(this), ColonLoc(), QualifierLoc(), NameInfo() {}
+        OMPClauseWithPostUpdate(this) {}
 
   /// Sets location of ':' symbol in clause.
   void setColonLoc(SourceLocation CL) { ColonLoc = CL; }
+
   /// Sets the name info for specified reduction identifier.
   void setNameInfo(DeclarationNameInfo DNI) { NameInfo = DNI; }
+
   /// Sets the nested name specifier.
   void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; }
 
@@ -2258,7 +2273,6 @@
   /// region with this clause.
   /// \param PostUpdate Expression that must be executed after exit from the
   /// OpenMP region with this clause.
-  ///
   static OMPInReductionClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
@@ -2272,52 +2286,62 @@
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPInReductionClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   /// Gets location of ':' symbol in clause.
   SourceLocation getColonLoc() const { return ColonLoc; }
+
   /// Gets the name info for specified reduction identifier.
   const DeclarationNameInfo &getNameInfo() const { return NameInfo; }
+
   /// Gets the nested name specifier.
   NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   helper_expr_const_range privates() const {
     return helper_expr_const_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_range privates() {
     return helper_expr_range(getPrivates().begin(), getPrivates().end());
   }
+
   helper_expr_const_range lhs_exprs() const {
     return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_range lhs_exprs() {
     return helper_expr_range(getLHSExprs().begin(), getLHSExprs().end());
   }
+
   helper_expr_const_range rhs_exprs() const {
     return helper_expr_const_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_range rhs_exprs() {
     return helper_expr_range(getRHSExprs().begin(), getRHSExprs().end());
   }
+
   helper_expr_const_range reduction_ops() const {
     return helper_expr_const_range(getReductionOps().begin(),
                                    getReductionOps().end());
   }
+
   helper_expr_range reduction_ops() {
     return helper_expr_range(getReductionOps().begin(),
                              getReductionOps().end());
   }
+
   helper_expr_const_range taskgroup_descriptors() const {
     return helper_expr_const_range(getTaskgroupDescriptors().begin(),
                                    getTaskgroupDescriptors().end());
   }
+
   helper_expr_range taskgroup_descriptors() {
     return helper_expr_range(getTaskgroupDescriptors().begin(),
                              getTaskgroupDescriptors().end());
@@ -2341,18 +2365,20 @@
 /// \endcode
 /// In this example directive '#pragma omp simd' has clause 'linear'
 /// with variables 'a', 'b' and linear step '2'.
-///
 class OMPLinearClause final
     : public OMPVarListClause<OMPLinearClause>,
       public OMPClauseWithPostUpdate,
       private llvm::TrailingObjects<OMPLinearClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Modifier of 'linear' clause.
-  OpenMPLinearClauseKind Modifier;
+  OpenMPLinearClauseKind Modifier = OMPC_LINEAR_val;
+
   /// \brief Location of linear modifier if any.
   SourceLocation ModifierLoc;
+
   /// \brief Location of ':'.
   SourceLocation ColonLoc;
 
@@ -2369,7 +2395,6 @@
   /// \param ColonLoc Location of ':'.
   /// \param EndLoc Ending location of the clause.
   /// \param NumVars Number of variables.
-  ///
   OMPLinearClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                   OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc,
                   SourceLocation ColonLoc, SourceLocation EndLoc,
@@ -2382,13 +2407,11 @@
   /// \brief Build an empty clause.
   ///
   /// \param NumVars Number of variables.
-  ///
   explicit OMPLinearClause(unsigned NumVars)
       : OMPVarListClause<OMPLinearClause>(OMPC_linear, SourceLocation(),
                                           SourceLocation(), SourceLocation(),
                                           NumVars),
-        OMPClauseWithPostUpdate(this), Modifier(OMPC_LINEAR_val), ModifierLoc(),
-        ColonLoc() {}
+        OMPClauseWithPostUpdate(this) {}
 
   /// \brief Gets the list of initial values for linear variables.
   ///
@@ -2402,7 +2425,6 @@
   ///
   /// { Vars[] /* in OMPVarListClause */; Privates[]; Inits[]; Updates[];
   /// Finals[]; Step; CalcStep; }
-  ///
   MutableArrayRef<Expr *> getPrivates() {
     return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
   }
@@ -2472,30 +2494,35 @@
   ///
   /// \param C AST context.
   /// \param NumVars Number of variables.
-  ///
   static OMPLinearClause *CreateEmpty(const ASTContext &C, unsigned NumVars);
 
   /// \brief Set modifier.
   void setModifier(OpenMPLinearClauseKind Kind) { Modifier = Kind; }
+
   /// \brief Return modifier.
   OpenMPLinearClauseKind getModifier() const { return Modifier; }
 
   /// \brief Set modifier location.
   void setModifierLoc(SourceLocation Loc) { ModifierLoc = Loc; }
+
   /// \brief Return modifier location.
   SourceLocation getModifierLoc() const { return ModifierLoc; }
 
   /// \brief Sets the location of ':'.
   void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
+
   /// \brief Returns the location of ':'.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
   /// \brief Returns linear step.
   Expr *getStep() { return *(getFinals().end()); }
+
   /// \brief Returns linear step.
   const Expr *getStep() const { return *(getFinals().end()); }
+
   /// \brief Returns expression to calculate linear step.
   Expr *getCalcStep() { return *(getFinals().end() + 1); }
+
   /// \brief Returns expression to calculate linear step.
   const Expr *getCalcStep() const { return *(getFinals().end() + 1); }
 
@@ -2507,50 +2534,54 @@
   /// \param FL List of expressions.
   void setFinals(ArrayRef<Expr *> FL);
 
-  typedef MutableArrayRef<Expr *>::iterator privates_iterator;
-  typedef ArrayRef<const Expr *>::iterator privates_const_iterator;
-  typedef llvm::iterator_range<privates_iterator> privates_range;
-  typedef llvm::iterator_range<privates_const_iterator> privates_const_range;
+  using privates_iterator = MutableArrayRef<Expr *>::iterator;
+  using privates_const_iterator = ArrayRef<const Expr *>::iterator;
+  using privates_range = llvm::iterator_range<privates_iterator>;
+  using privates_const_range = llvm::iterator_range<privates_const_iterator>;
 
   privates_range privates() {
     return privates_range(getPrivates().begin(), getPrivates().end());
   }
+
   privates_const_range privates() const {
     return privates_const_range(getPrivates().begin(), getPrivates().end());
   }
 
-  typedef MutableArrayRef<Expr *>::iterator inits_iterator;
-  typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
-  typedef llvm::iterator_range<inits_iterator> inits_range;
-  typedef llvm::iterator_range<inits_const_iterator> inits_const_range;
+  using inits_iterator = MutableArrayRef<Expr *>::iterator;
+  using inits_const_iterator = ArrayRef<const Expr *>::iterator;
+  using inits_range = llvm::iterator_range<inits_iterator>;
+  using inits_const_range = llvm::iterator_range<inits_const_iterator>;
 
   inits_range inits() {
     return inits_range(getInits().begin(), getInits().end());
   }
+
   inits_const_range inits() const {
     return inits_const_range(getInits().begin(), getInits().end());
   }
 
-  typedef MutableArrayRef<Expr *>::iterator updates_iterator;
-  typedef ArrayRef<const Expr *>::iterator updates_const_iterator;
-  typedef llvm::iterator_range<updates_iterator> updates_range;
-  typedef llvm::iterator_range<updates_const_iterator> updates_const_range;
+  using updates_iterator = MutableArrayRef<Expr *>::iterator;
+  using updates_const_iterator = ArrayRef<const Expr *>::iterator;
+  using updates_range = llvm::iterator_range<updates_iterator>;
+  using updates_const_range = llvm::iterator_range<updates_const_iterator>;
 
   updates_range updates() {
     return updates_range(getUpdates().begin(), getUpdates().end());
   }
+
   updates_const_range updates() const {
     return updates_const_range(getUpdates().begin(), getUpdates().end());
   }
 
-  typedef MutableArrayRef<Expr *>::iterator finals_iterator;
-  typedef ArrayRef<const Expr *>::iterator finals_const_iterator;
-  typedef llvm::iterator_range<finals_iterator> finals_range;
-  typedef llvm::iterator_range<finals_const_iterator> finals_const_range;
+  using finals_iterator = MutableArrayRef<Expr *>::iterator;
+  using finals_const_iterator = ArrayRef<const Expr *>::iterator;
+  using finals_range = llvm::iterator_range<finals_iterator>;
+  using finals_const_range = llvm::iterator_range<finals_const_iterator>;
 
   finals_range finals() {
     return finals_range(getFinals().begin(), getFinals().end());
   }
+
   finals_const_range finals() const {
     return finals_const_range(getFinals().begin(), getFinals().end());
   }
@@ -2573,13 +2604,13 @@
 /// \endcode
 /// In this example directive '#pragma omp simd' has clause 'aligned'
 /// with variables 'a', 'b' and alignment '8'.
-///
 class OMPAlignedClause final
     : public OMPVarListClause<OMPAlignedClause>,
       private llvm::TrailingObjects<OMPAlignedClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Location of ':'.
   SourceLocation ColonLoc;
 
@@ -2593,7 +2624,6 @@
   /// \param ColonLoc Location of ':'.
   /// \param EndLoc Ending location of the clause.
   /// \param NumVars Number of variables.
-  ///
   OMPAlignedClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                    SourceLocation ColonLoc, SourceLocation EndLoc,
                    unsigned NumVars)
@@ -2604,12 +2634,10 @@
   /// \brief Build an empty clause.
   ///
   /// \param NumVars Number of variables.
-  ///
   explicit OMPAlignedClause(unsigned NumVars)
       : OMPVarListClause<OMPAlignedClause>(OMPC_aligned, SourceLocation(),
                                            SourceLocation(), SourceLocation(),
-                                           NumVars),
-        ColonLoc(SourceLocation()) {}
+                                           NumVars) {}
 
 public:
   /// \brief Creates clause with a list of variables \a VL and alignment \a A.
@@ -2631,16 +2659,17 @@
   ///
   /// \param C AST context.
   /// \param NumVars Number of variables.
-  ///
   static OMPAlignedClause *CreateEmpty(const ASTContext &C, unsigned NumVars);
 
   /// \brief Sets the location of ':'.
   void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
+
   /// \brief Returns the location of ':'.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
   /// \brief Returns alignment.
   Expr *getAlignment() { return *varlist_end(); }
+
   /// \brief Returns alignment.
   const Expr *getAlignment() const { return *varlist_end(); }
 
@@ -2661,7 +2690,6 @@
 /// \endcode
 /// In this example directive '#pragma omp parallel' has clause 'copyin'
 /// with the variables 'a' and 'b'.
-///
 class OMPCopyinClause final
     : public OMPVarListClause<OMPCopyinClause>,
       private llvm::TrailingObjects<OMPCopyinClause, Expr *> {
@@ -2678,16 +2706,16 @@
   // threadprivate variables to local instances of that variables in other
   // implicit threads.
 
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPCopyinClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                   SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPCopyinClause>(OMPC_copyin, StartLoc, LParenLoc,
@@ -2696,7 +2724,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPCopyinClause(unsigned N)
       : OMPVarListClause<OMPCopyinClause>(OMPC_copyin, SourceLocation(),
                                           SourceLocation(), SourceLocation(),
@@ -2764,43 +2791,47 @@
   /// Required for proper codegen of propagation of master's thread values of
   /// threadprivate variables to local instances of that variables in other
   /// implicit threads.
-  ///
   static OMPCopyinClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
          ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps);
+
   /// \brief Creates an empty clause with \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPCopyinClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   helper_expr_const_range source_exprs() const {
     return helper_expr_const_range(getSourceExprs().begin(),
                                    getSourceExprs().end());
   }
+
   helper_expr_range source_exprs() {
     return helper_expr_range(getSourceExprs().begin(), getSourceExprs().end());
   }
+
   helper_expr_const_range destination_exprs() const {
     return helper_expr_const_range(getDestinationExprs().begin(),
                                    getDestinationExprs().end());
   }
+
   helper_expr_range destination_exprs() {
     return helper_expr_range(getDestinationExprs().begin(),
                              getDestinationExprs().end());
   }
+
   helper_expr_const_range assignment_ops() const {
     return helper_expr_const_range(getAssignmentOps().begin(),
                                    getAssignmentOps().end());
   }
+
   helper_expr_range assignment_ops() {
     return helper_expr_range(getAssignmentOps().begin(),
                              getAssignmentOps().end());
@@ -2824,20 +2855,19 @@
 /// \endcode
 /// In this example directive '#pragma omp single' has clause 'copyprivate'
 /// with the variables 'a' and 'b'.
-///
 class OMPCopyprivateClause final
     : public OMPVarListClause<OMPCopyprivateClause>,
       private llvm::TrailingObjects<OMPCopyprivateClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPCopyprivateClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                        SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPCopyprivateClause>(OMPC_copyprivate, StartLoc,
@@ -2846,7 +2876,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPCopyprivateClause(unsigned N)
       : OMPVarListClause<OMPCopyprivateClause>(
             OMPC_copyprivate, SourceLocation(), SourceLocation(),
@@ -2913,43 +2942,47 @@
   /// \endcode
   /// Required for proper codegen of final assignment performed by the
   /// copyprivate clause.
-  ///
   static OMPCopyprivateClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
          ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps);
+
   /// \brief Creates an empty clause with \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPCopyprivateClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  typedef MutableArrayRef<Expr *>::iterator helper_expr_iterator;
-  typedef ArrayRef<const Expr *>::iterator helper_expr_const_iterator;
-  typedef llvm::iterator_range<helper_expr_iterator> helper_expr_range;
-  typedef llvm::iterator_range<helper_expr_const_iterator>
-      helper_expr_const_range;
+  using helper_expr_iterator = MutableArrayRef<Expr *>::iterator;
+  using helper_expr_const_iterator = ArrayRef<const Expr *>::iterator;
+  using helper_expr_range = llvm::iterator_range<helper_expr_iterator>;
+  using helper_expr_const_range =
+      llvm::iterator_range<helper_expr_const_iterator>;
 
   helper_expr_const_range source_exprs() const {
     return helper_expr_const_range(getSourceExprs().begin(),
                                    getSourceExprs().end());
   }
+
   helper_expr_range source_exprs() {
     return helper_expr_range(getSourceExprs().begin(), getSourceExprs().end());
   }
+
   helper_expr_const_range destination_exprs() const {
     return helper_expr_const_range(getDestinationExprs().begin(),
                                    getDestinationExprs().end());
   }
+
   helper_expr_range destination_exprs() {
     return helper_expr_range(getDestinationExprs().begin(),
                              getDestinationExprs().end());
   }
+
   helper_expr_const_range assignment_ops() const {
     return helper_expr_const_range(getAssignmentOps().begin(),
                                    getAssignmentOps().end());
   }
+
   helper_expr_range assignment_ops() {
     return helper_expr_range(getAssignmentOps().begin(),
                              getAssignmentOps().end());
@@ -2977,19 +3010,18 @@
 /// \endcode
 /// In this example directive '#pragma omp flush' has implicit clause 'flush'
 /// with the variables 'a' and 'b'.
-///
 class OMPFlushClause final
     : public OMPVarListClause<OMPFlushClause>,
       private llvm::TrailingObjects<OMPFlushClause, Expr *> {
-  friend TrailingObjects;
   friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPFlushClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                  SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPFlushClause>(OMPC_flush, StartLoc, LParenLoc,
@@ -2998,7 +3030,6 @@
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPFlushClause(unsigned N)
       : OMPVarListClause<OMPFlushClause>(OMPC_flush, SourceLocation(),
                                          SourceLocation(), SourceLocation(),
@@ -3012,15 +3043,14 @@
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param VL List of references to the variables.
-  ///
   static OMPFlushClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                 SourceLocation LParenLoc, SourceLocation EndLoc,
                                 ArrayRef<Expr *> VL);
+
   /// \brief Creates an empty clause with \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPFlushClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   child_range children() {
@@ -3041,41 +3071,41 @@
 /// \endcode
 /// In this example directive '#pragma omp task' with clause 'depend' with the
 /// variables 'a' and 'b' with dependency 'in'.
-///
 class OMPDependClause final
     : public OMPVarListClause<OMPDependClause>,
       private llvm::TrailingObjects<OMPDependClause, Expr *> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
   friend class OMPClauseReader;
+  friend OMPVarListClause;
+  friend TrailingObjects;
+
   /// \brief Dependency type (one of in, out, inout).
-  OpenMPDependClauseKind DepKind;
+  OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
+
   /// \brief Dependency type location.
   SourceLocation DepLoc;
+
   /// \brief Colon location.
   SourceLocation ColonLoc;
+
   /// \brief Build clause with number of variables \a N.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   /// \param N Number of the variables in the clause.
-  ///
   OMPDependClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                   SourceLocation EndLoc, unsigned N)
       : OMPVarListClause<OMPDependClause>(OMPC_depend, StartLoc, LParenLoc,
-                                          EndLoc, N),
-        DepKind(OMPC_DEPEND_unknown) {}
+                                          EndLoc, N) {}
 
   /// \brief Build an empty clause.
   ///
   /// \param N Number of variables.
-  ///
   explicit OMPDependClause(unsigned N)
       : OMPVarListClause<OMPDependClause>(OMPC_depend, SourceLocation(),
                                           SourceLocation(), SourceLocation(),
-                                          N),
-        DepKind(OMPC_DEPEND_unknown) {}
+                                          N) {}
+
   /// \brief Set dependency kind.
   void setDependencyKind(OpenMPDependClauseKind K) { DepKind = K; }
 
@@ -3100,25 +3130,29 @@
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, OpenMPDependClauseKind DepKind,
          SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL);
+
   /// \brief Creates an empty clause with \a N variables.
   ///
   /// \param C AST context.
   /// \param N The number of variables.
-  ///
   static OMPDependClause *CreateEmpty(const ASTContext &C, unsigned N);
 
   /// \brief Get dependency type.
   OpenMPDependClauseKind getDependencyKind() const { return DepKind; }
+
   /// \brief Get dependency type location.
   SourceLocation getDependencyLoc() const { return DepLoc; }
+
   /// \brief Get colon location.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
   /// Set the loop counter value for the depend clauses with 'sink|source' kind
   /// of dependency. Required for codegen.
   void setCounterValue(Expr *V);
+
   /// Get the loop counter value.
   Expr *getCounterValue();
+
   /// Get the loop counter value.
   const Expr *getCounterValue() const;
 
@@ -3140,54 +3174,59 @@
 /// \endcode
 /// In this example directive '#pragma omp target' has clause 'device'
 /// with single expression 'a'.
-///
 class OMPDeviceClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Device number.
-  Stmt *Device;
+  Stmt *Device = nullptr;
+
   /// \brief Set the device number.
   ///
   /// \param E Device number.
-  ///
   void setDevice(Expr *E) { Device = E; }
 
 public:
   /// \brief Build 'device' clause.
   ///
   /// \param E Expression associated with this clause.
+  /// \param CaptureRegion Innermost OpenMP region where expressions in this
+  /// clause must be captured.
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
-  OMPDeviceClause(Expr *E, Stmt *HelperE, SourceLocation StartLoc,
-                  SourceLocation LParenLoc, SourceLocation EndLoc)
+  OMPDeviceClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion,
+                  SourceLocation StartLoc, SourceLocation LParenLoc,
+                  SourceLocation EndLoc)
       : OMPClause(OMPC_device, StartLoc, EndLoc), OMPClauseWithPreInit(this),
         LParenLoc(LParenLoc), Device(E) {
-    setPreInitStmt(HelperE);
+    setPreInitStmt(HelperE, CaptureRegion);
   }
 
   /// \brief Build an empty clause.
-  ///
   OMPDeviceClause()
       : OMPClause(OMPC_device, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), LParenLoc(SourceLocation()),
-        Device(nullptr) {}
+        OMPClauseWithPreInit(this) {}
+
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
+
   /// \brief Return device number.
   Expr *getDevice() { return cast<Expr>(Device); }
+
   /// \brief Return device number.
   Expr *getDevice() const { return cast<Expr>(Device); }
 
+  child_range children() { return child_range(&Device, &Device + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_device;
   }
-
-  child_range children() { return child_range(&Device, &Device + 1); }
 };
 
 /// \brief This represents 'threads' clause in the '#pragma omp ...' directive.
@@ -3196,29 +3235,26 @@
 /// #pragma omp ordered threads
 /// \endcode
 /// In this example directive '#pragma omp ordered' has simple 'threads' clause.
-///
 class OMPThreadsClause : public OMPClause {
 public:
   /// \brief Build 'threads' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPThreadsClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_threads, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPThreadsClause()
       : OMPClause(OMPC_threads, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_threads;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_threads;
+  }
 };
 
 /// \brief This represents 'simd' clause in the '#pragma omp ...' directive.
@@ -3227,28 +3263,25 @@
 /// #pragma omp ordered simd
 /// \endcode
 /// In this example directive '#pragma omp ordered' has simple 'simd' clause.
-///
 class OMPSIMDClause : public OMPClause {
 public:
   /// \brief Build 'simd' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPSIMDClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_simd, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPSIMDClause() : OMPClause(OMPC_simd, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_simd;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_simd;
+  }
 };
 
 /// \brief Struct that defines common infrastructure to handle mappable
@@ -3264,13 +3297,14 @@
   class MappableComponent {
     // \brief Expression associated with the component.
     Expr *AssociatedExpression = nullptr;
+
     // \brief Declaration associated with the declaration. If the component does
     // not have a declaration (e.g. array subscripts or section), this is set to
     // nullptr.
     ValueDecl *AssociatedDeclaration = nullptr;
 
   public:
-    explicit MappableComponent() {}
+    explicit MappableComponent() = default;
     explicit MappableComponent(Expr *AssociatedExpression,
                                ValueDecl *AssociatedDeclaration)
         : AssociatedExpression(AssociatedExpression),
@@ -3280,6 +3314,7 @@
                   : nullptr) {}
 
     Expr *getAssociatedExpression() const { return AssociatedExpression; }
+
     ValueDecl *getAssociatedDeclaration() const {
       return AssociatedDeclaration;
     }
@@ -3287,14 +3322,14 @@
 
   // \brief List of components of an expression. This first one is the whole
   // expression and the last one is the base expression.
-  typedef SmallVector<MappableComponent, 8> MappableExprComponentList;
-  typedef ArrayRef<MappableComponent> MappableExprComponentListRef;
+  using MappableExprComponentList = SmallVector<MappableComponent, 8>;
+  using MappableExprComponentListRef = ArrayRef<MappableComponent>;
 
   // \brief List of all component lists associated to the same base declaration.
   // E.g. if both 'S.a' and 'S.b' are a mappable expressions, each will have
   // their component list but the same base declaration 'S'.
-  typedef SmallVector<MappableExprComponentList, 8> MappableExprComponentLists;
-  typedef ArrayRef<MappableExprComponentList> MappableExprComponentListsRef;
+  using MappableExprComponentLists = SmallVector<MappableExprComponentList, 8>;
+  using MappableExprComponentListsRef = ArrayRef<MappableExprComponentList>;
 
 protected:
   // \brief Return the total number of elements in a list of component lists.
@@ -3326,6 +3361,28 @@
   unsigned NumComponents;
 
 protected:
+  /// \brief Build a clause for \a NumUniqueDeclarations declarations, \a
+  /// NumComponentLists total component lists, and \a NumComponents total
+  /// components.
+  ///
+  /// \param K Kind of the clause.
+  /// \param StartLoc Starting location of the clause (the clause keyword).
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  /// \param NumVars Number of expressions listed in the clause.
+  /// \param NumUniqueDeclarations Number of unique base declarations in this
+  /// clause.
+  /// \param NumComponentLists Number of component lists in this clause - one
+  /// list for each expression in the clause.
+  /// \param NumComponents Total number of expression components in the clause.
+  OMPMappableExprListClause(OpenMPClauseKind K, SourceLocation StartLoc,
+                            SourceLocation LParenLoc, SourceLocation EndLoc,
+                            unsigned NumVars, unsigned NumUniqueDeclarations,
+                            unsigned NumComponentLists, unsigned NumComponents)
+      : OMPVarListClause<T>(K, StartLoc, LParenLoc, EndLoc, NumVars),
+        NumUniqueDeclarations(NumUniqueDeclarations),
+        NumComponentLists(NumComponentLists), NumComponents(NumComponents) {}
+
   /// \brief Get the unique declarations that are in the trailing objects of the
   /// class.
   MutableArrayRef<ValueDecl *> getUniqueDeclsRef() {
@@ -3505,34 +3562,13 @@
     }
   }
 
-  /// \brief Build a clause for \a NumUniqueDeclarations declarations, \a
-  /// NumComponentLists total component lists, and \a NumComponents total
-  /// components.
-  ///
-  /// \param K Kind of the clause.
-  /// \param StartLoc Starting location of the clause (the clause keyword).
-  /// \param LParenLoc Location of '('.
-  /// \param EndLoc Ending location of the clause.
-  /// \param NumVars Number of expressions listed in the clause.
-  /// \param NumUniqueDeclarations Number of unique base declarations in this
-  /// clause.
-  /// \param NumComponentLists Number of component lists in this clause - one
-  /// list for each expression in the clause.
-  /// \param NumComponents Total number of expression components in the clause.
-  ///
-  OMPMappableExprListClause(OpenMPClauseKind K, SourceLocation StartLoc,
-                            SourceLocation LParenLoc, SourceLocation EndLoc,
-                            unsigned NumVars, unsigned NumUniqueDeclarations,
-                            unsigned NumComponentLists, unsigned NumComponents)
-      : OMPVarListClause<T>(K, StartLoc, LParenLoc, EndLoc, NumVars),
-        NumUniqueDeclarations(NumUniqueDeclarations),
-        NumComponentLists(NumComponentLists), NumComponents(NumComponents) {}
-
 public:
   /// \brief Return the number of unique base declarations in this clause.
   unsigned getUniqueDeclarationsNum() const { return NumUniqueDeclarations; }
+
   /// \brief Return the number of lists derived from the clause expressions.
   unsigned getTotalComponentListNum() const { return NumComponentLists; }
+
   /// \brief Return the total number of components in all lists derived from the
   /// clause.
   unsigned getTotalComponentsNum() const { return NumComponents; }
@@ -3552,11 +3588,11 @@
     ArrayRef<unsigned>::iterator NumListsCur;
 
     // Remaining lists for the current declaration.
-    unsigned RemainingLists;
+    unsigned RemainingLists = 0;
 
     // The cumulative size of the previous list, or zero if there is no previous
     // list.
-    unsigned PrevListSize;
+    unsigned PrevListSize = 0;
 
     // The cumulative sizes of the current list - it will delimit the remaining
     // range of interest.
@@ -3575,7 +3611,6 @@
         : const_component_lists_iterator::iterator_adaptor_base(
               Components.begin()),
           DeclCur(UniqueDecls.begin()), NumListsCur(DeclsListNum.begin()),
-          RemainingLists(0u), PrevListSize(0u),
           ListSizeCur(CumulativeListSizes.begin()),
           ListSizeEnd(CumulativeListSizes.end()), End(Components.end()) {
       assert(UniqueDecls.size() == DeclsListNum.size() &&
@@ -3592,7 +3627,6 @@
         MappableExprComponentListRef Components)
         : const_component_lists_iterator(UniqueDecls, DeclsListNum,
                                          CumulativeListSizes, Components) {
-
       // Look for the desired declaration. While we are looking for it, we
       // update the state so that we know the component where a given list
       // starts.
@@ -3672,8 +3706,8 @@
     }
   };
 
-  typedef llvm::iterator_range<const_component_lists_iterator>
-      const_component_lists_range;
+  using const_component_lists_range =
+      llvm::iterator_range<const_component_lists_iterator>;
 
   /// \brief Iterators for all component lists.
   const_component_lists_iterator component_lists_begin() const {
@@ -3708,32 +3742,36 @@
 
   /// Iterators to access all the declarations, number of lists, list sizes, and
   /// components.
-  typedef ArrayRef<ValueDecl *>::iterator const_all_decls_iterator;
-  typedef llvm::iterator_range<const_all_decls_iterator> const_all_decls_range;
+  using const_all_decls_iterator = ArrayRef<ValueDecl *>::iterator;
+  using const_all_decls_range = llvm::iterator_range<const_all_decls_iterator>;
+
   const_all_decls_range all_decls() const {
     auto A = getUniqueDeclsRef();
     return const_all_decls_range(A.begin(), A.end());
   }
 
-  typedef ArrayRef<unsigned>::iterator const_all_num_lists_iterator;
-  typedef llvm::iterator_range<const_all_num_lists_iterator>
-      const_all_num_lists_range;
+  using const_all_num_lists_iterator = ArrayRef<unsigned>::iterator;
+  using const_all_num_lists_range =
+      llvm::iterator_range<const_all_num_lists_iterator>;
+
   const_all_num_lists_range all_num_lists() const {
     auto A = getDeclNumListsRef();
     return const_all_num_lists_range(A.begin(), A.end());
   }
 
-  typedef ArrayRef<unsigned>::iterator const_all_lists_sizes_iterator;
-  typedef llvm::iterator_range<const_all_lists_sizes_iterator>
-      const_all_lists_sizes_range;
+  using const_all_lists_sizes_iterator = ArrayRef<unsigned>::iterator;
+  using const_all_lists_sizes_range =
+      llvm::iterator_range<const_all_lists_sizes_iterator>;
+
   const_all_lists_sizes_range all_lists_sizes() const {
     auto A = getComponentListSizesRef();
     return const_all_lists_sizes_range(A.begin(), A.end());
   }
 
-  typedef ArrayRef<MappableComponent>::iterator const_all_components_iterator;
-  typedef llvm::iterator_range<const_all_components_iterator>
-      const_all_components_range;
+  using const_all_components_iterator = ArrayRef<MappableComponent>::iterator;
+  using const_all_components_range =
+      llvm::iterator_range<const_all_components_iterator>;
+
   const_all_components_range all_components() const {
     auto A = getComponentsRef();
     return const_all_components_range(A.begin(), A.end());
@@ -3748,15 +3786,14 @@
 /// \endcode
 /// In this example directive '#pragma omp target' has clause 'map'
 /// with the variables 'a' and 'b'.
-///
 class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
                            private llvm::TrailingObjects<
                                OMPMapClause, Expr *, ValueDecl *, unsigned,
                                OMPClauseMappableExprCommon::MappableComponent> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
-  friend OMPMappableExprListClause;
   friend class OMPClauseReader;
+  friend OMPMappableExprListClause;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// Define the sizes of each trailing object array except the last one. This
   /// is required for TrailingObjects to work properly.
@@ -3771,37 +3808,20 @@
   }
 
   /// \brief Map type modifier for the 'map' clause.
-  OpenMPMapClauseKind MapTypeModifier;
+  OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+
   /// \brief Map type for the 'map' clause.
-  OpenMPMapClauseKind MapType;
+  OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
+
   /// \brief Is this an implicit map type or not.
-  bool MapTypeIsImplicit;
+  bool MapTypeIsImplicit = false;
+
   /// \brief Location of the map type.
   SourceLocation MapLoc;
+
   /// \brief Colon location.
   SourceLocation ColonLoc;
 
-  /// \brief Set type modifier for the clause.
-  ///
-  /// \param T Type Modifier for the clause.
-  ///
-  void setMapTypeModifier(OpenMPMapClauseKind T) { MapTypeModifier = T; }
-
-  /// \brief Set type for the clause.
-  ///
-  /// \param T Type for the clause.
-  ///
-  void setMapType(OpenMPMapClauseKind T) { MapType = T; }
-
-  /// \brief Set type location.
-  ///
-  /// \param TLoc Type location.
-  ///
-  void setMapLoc(SourceLocation TLoc) { MapLoc = TLoc; }
-
-  /// \brief Set colon location.
-  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
-
   /// \brief Build a clause for \a NumVars listed expressions, \a
   /// NumUniqueDeclarations declarations, \a NumComponentLists total component
   /// lists, and \a NumComponents total expression components.
@@ -3817,7 +3837,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPMapClause(OpenMPMapClauseKind MapTypeModifier,
                         OpenMPMapClauseKind MapType, bool MapTypeIsImplicit,
                         SourceLocation MapLoc, SourceLocation StartLoc,
@@ -3837,14 +3856,29 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPMapClause(unsigned NumVars, unsigned NumUniqueDeclarations,
                         unsigned NumComponentLists, unsigned NumComponents)
       : OMPMappableExprListClause(
             OMPC_map, SourceLocation(), SourceLocation(), SourceLocation(),
-            NumVars, NumUniqueDeclarations, NumComponentLists, NumComponents),
-        MapTypeModifier(OMPC_MAP_unknown), MapType(OMPC_MAP_unknown),
-        MapTypeIsImplicit(false), MapLoc() {}
+            NumVars, NumUniqueDeclarations, NumComponentLists, NumComponents) {}
+
+  /// \brief Set type modifier for the clause.
+  ///
+  /// \param T Type Modifier for the clause.
+  void setMapTypeModifier(OpenMPMapClauseKind T) { MapTypeModifier = T; }
+
+  /// \brief Set type for the clause.
+  ///
+  /// \param T Type for the clause.
+  void setMapType(OpenMPMapClauseKind T) { MapType = T; }
+
+  /// \brief Set type location.
+  ///
+  /// \param TLoc Type location.
+  void setMapLoc(SourceLocation TLoc) { MapLoc = TLoc; }
+
+  /// \brief Set colon location.
+  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
 
 public:
   /// \brief Creates clause with a list of variables \a VL.
@@ -3859,7 +3893,6 @@
   /// \param Type Map type.
   /// \param TypeIsImplicit Map type is inferred implicitly.
   /// \param TypeLoc Location of the map type.
-  ///
   static OMPMapClause *Create(const ASTContext &C, SourceLocation StartLoc,
                               SourceLocation LParenLoc, SourceLocation EndLoc,
                               ArrayRef<Expr *> Vars,
@@ -3868,7 +3901,8 @@
                               OpenMPMapClauseKind TypeModifier,
                               OpenMPMapClauseKind Type, bool TypeIsImplicit,
                               SourceLocation TypeLoc);
-  /// \brief Creates an empty clause with the place for for \a NumVars original
+
+  /// \brief Creates an empty clause with the place for \a NumVars original
   /// expressions, \a NumUniqueDeclarations declarations, \NumComponentLists
   /// lists, and \a NumComponents expression components.
   ///
@@ -3879,7 +3913,6 @@
   /// \param NumComponentLists Number of unique base declarations in this
   /// clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   static OMPMapClause *CreateEmpty(const ASTContext &C, unsigned NumVars,
                                    unsigned NumUniqueDeclarations,
                                    unsigned NumComponentLists,
@@ -3906,15 +3939,15 @@
   /// \brief Get colon location.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_map;
-  }
-
   child_range children() {
     return child_range(
         reinterpret_cast<Stmt **>(varlist_begin()),
         reinterpret_cast<Stmt **>(varlist_end()));
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_map;
+  }
 };
 
 /// \brief This represents 'num_teams' clause in the '#pragma omp ...'
@@ -3925,17 +3958,18 @@
 /// \endcode
 /// In this example directive '#pragma omp teams' has clause 'num_teams'
 /// with single expression 'n'.
-///
 class OMPNumTeamsClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief NumTeams number.
-  Stmt *NumTeams;
+  Stmt *NumTeams = nullptr;
+
   /// \brief Set the NumTeams number.
   ///
   /// \param E NumTeams number.
-  ///
   void setNumTeams(Expr *E) { NumTeams = E; }
 
 public:
@@ -3948,7 +3982,6 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPNumTeamsClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion,
                     SourceLocation StartLoc, SourceLocation LParenLoc,
                     SourceLocation EndLoc)
@@ -3958,25 +3991,27 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   OMPNumTeamsClause()
       : OMPClause(OMPC_num_teams, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), LParenLoc(SourceLocation()),
-        NumTeams(nullptr) {}
+        OMPClauseWithPreInit(this) {}
+
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
+
   /// \brief Return NumTeams number.
   Expr *getNumTeams() { return cast<Expr>(NumTeams); }
+
   /// \brief Return NumTeams number.
   Expr *getNumTeams() const { return cast<Expr>(NumTeams); }
 
+  child_range children() { return child_range(&NumTeams, &NumTeams + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_num_teams;
   }
-
-  child_range children() { return child_range(&NumTeams, &NumTeams + 1); }
 };
 
 /// \brief This represents 'thread_limit' clause in the '#pragma omp ...'
@@ -3987,17 +4022,18 @@
 /// \endcode
 /// In this example directive '#pragma omp teams' has clause 'thread_limit'
 /// with single expression 'n'.
-///
 class OMPThreadLimitClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief ThreadLimit number.
-  Stmt *ThreadLimit;
+  Stmt *ThreadLimit = nullptr;
+
   /// \brief Set the ThreadLimit number.
   ///
   /// \param E ThreadLimit number.
-  ///
   void setThreadLimit(Expr *E) { ThreadLimit = E; }
 
 public:
@@ -4010,7 +4046,6 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPThreadLimitClause(Expr *E, Stmt *HelperE,
                        OpenMPDirectiveKind CaptureRegion,
                        SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -4021,25 +4056,27 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   OMPThreadLimitClause()
       : OMPClause(OMPC_thread_limit, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), LParenLoc(SourceLocation()),
-        ThreadLimit(nullptr) {}
+        OMPClauseWithPreInit(this) {}
+
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
+
   /// \brief Return ThreadLimit number.
   Expr *getThreadLimit() { return cast<Expr>(ThreadLimit); }
+
   /// \brief Return ThreadLimit number.
   Expr *getThreadLimit() const { return cast<Expr>(ThreadLimit); }
 
+  child_range children() { return child_range(&ThreadLimit, &ThreadLimit + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_thread_limit;
   }
-
-  child_range children() { return child_range(&ThreadLimit, &ThreadLimit + 1); }
 };
 
 /// \brief This represents 'priority' clause in the '#pragma omp ...'
@@ -4050,17 +4087,18 @@
 /// \endcode
 /// In this example directive '#pragma omp teams' has clause 'priority' with
 /// single expression 'n'.
-///
 class OMPPriorityClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Priority number.
-  Stmt *Priority;
+  Stmt *Priority = nullptr;
+
   /// \brief Set the Priority number.
   ///
   /// \param E Priority number.
-  ///
   void setPriority(Expr *E) { Priority = E; }
 
 public:
@@ -4070,31 +4108,32 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPPriorityClause(Expr *E, SourceLocation StartLoc, SourceLocation LParenLoc,
                     SourceLocation EndLoc)
       : OMPClause(OMPC_priority, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Priority(E) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPPriorityClause()
-      : OMPClause(OMPC_priority, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Priority(nullptr) {}
+      : OMPClause(OMPC_priority, SourceLocation(), SourceLocation()) {}
+
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
+
   /// \brief Return Priority number.
   Expr *getPriority() { return cast<Expr>(Priority); }
+
   /// \brief Return Priority number.
   Expr *getPriority() const { return cast<Expr>(Priority); }
 
+  child_range children() { return child_range(&Priority, &Priority + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_priority;
   }
-
-  child_range children() { return child_range(&Priority, &Priority + 1); }
 };
 
 /// \brief This represents 'grainsize' clause in the '#pragma omp ...'
@@ -4105,13 +4144,14 @@
 /// \endcode
 /// In this example directive '#pragma omp taskloop' has clause 'grainsize'
 /// with single expression '4'.
-///
 class OMPGrainsizeClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Safe iteration space distance.
-  Stmt *Grainsize;
+  Stmt *Grainsize = nullptr;
 
   /// \brief Set safelen.
   void setGrainsize(Expr *Size) { Grainsize = Size; }
@@ -4122,31 +4162,29 @@
   /// \param Size Expression associated with this clause.
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPGrainsizeClause(Expr *Size, SourceLocation StartLoc,
                      SourceLocation LParenLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_grainsize, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Grainsize(Size) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPGrainsizeClause()
-      : OMPClause(OMPC_grainsize, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Grainsize(nullptr) {}
+      : OMPClause(OMPC_grainsize, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return safe iteration space distance.
   Expr *getGrainsize() const { return cast_or_null<Expr>(Grainsize); }
 
+  child_range children() { return child_range(&Grainsize, &Grainsize + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_grainsize;
   }
-
-  child_range children() { return child_range(&Grainsize, &Grainsize + 1); }
 };
 
 /// \brief This represents 'nogroup' clause in the '#pragma omp ...' directive.
@@ -4155,29 +4193,26 @@
 /// #pragma omp taskloop nogroup
 /// \endcode
 /// In this example directive '#pragma omp taskloop' has 'nogroup' clause.
-///
 class OMPNogroupClause : public OMPClause {
 public:
   /// \brief Build 'nogroup' clause.
   ///
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPNogroupClause(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_nogroup, StartLoc, EndLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   OMPNogroupClause()
       : OMPClause(OMPC_nogroup, SourceLocation(), SourceLocation()) {}
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_nogroup;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_nogroup;
+  }
 };
 
 /// \brief This represents 'num_tasks' clause in the '#pragma omp ...'
@@ -4188,13 +4223,14 @@
 /// \endcode
 /// In this example directive '#pragma omp taskloop' has clause 'num_tasks'
 /// with single expression '4'.
-///
 class OMPNumTasksClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Safe iteration space distance.
-  Stmt *NumTasks;
+  Stmt *NumTasks = nullptr;
 
   /// \brief Set safelen.
   void setNumTasks(Expr *Size) { NumTasks = Size; }
@@ -4205,31 +4241,29 @@
   /// \param Size Expression associated with this clause.
   /// \param StartLoc Starting location of the clause.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPNumTasksClause(Expr *Size, SourceLocation StartLoc,
                     SourceLocation LParenLoc, SourceLocation EndLoc)
       : OMPClause(OMPC_num_tasks, StartLoc, EndLoc), LParenLoc(LParenLoc),
         NumTasks(Size) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPNumTasksClause()
-      : OMPClause(OMPC_num_tasks, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), NumTasks(nullptr) {}
+      : OMPClause(OMPC_num_tasks, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Return safe iteration space distance.
   Expr *getNumTasks() const { return cast_or_null<Expr>(NumTasks); }
 
+  child_range children() { return child_range(&NumTasks, &NumTasks + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_num_tasks;
   }
-
-  child_range children() { return child_range(&NumTasks, &NumTasks + 1); }
 };
 
 /// \brief This represents 'hint' clause in the '#pragma omp ...' directive.
@@ -4239,16 +4273,16 @@
 /// \endcode
 /// In this example directive '#pragma omp critical' has name 'name' and clause
 /// 'hint' with argument '6'.
-///
 class OMPHintClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Hint expression of the 'hint' clause.
-  Stmt *Hint;
+  Stmt *Hint = nullptr;
 
   /// \brief Set hint expression.
-  ///
   void setHint(Expr *H) { Hint = H; }
 
 public:
@@ -4258,31 +4292,28 @@
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
-  ///
   OMPHintClause(Expr *Hint, SourceLocation StartLoc, SourceLocation LParenLoc,
                 SourceLocation EndLoc)
       : OMPClause(OMPC_hint, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Hint(Hint) {}
 
   /// \brief Build an empty clause.
-  ///
-  OMPHintClause()
-      : OMPClause(OMPC_hint, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Hint(nullptr) {}
+  OMPHintClause() : OMPClause(OMPC_hint, SourceLocation(), SourceLocation()) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
   /// \brief Returns number of threads.
   Expr *getHint() const { return cast_or_null<Expr>(Hint); }
 
+  child_range children() { return child_range(&Hint, &Hint + 1); }
+
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_hint;
   }
-
-  child_range children() { return child_range(&Hint, &Hint + 1); }
 };
 
 /// \brief This represents 'dist_schedule' clause in the '#pragma omp ...'
@@ -4293,44 +4324,47 @@
 /// \endcode
 /// In this example directive '#pragma omp distribute' has 'dist_schedule'
 /// clause with arguments 'static' and '3'.
-///
 class OMPDistScheduleClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief A kind of the 'schedule' clause.
-  OpenMPDistScheduleClauseKind Kind;
+  OpenMPDistScheduleClauseKind Kind = OMPC_DIST_SCHEDULE_unknown;
+
   /// \brief Start location of the schedule kind in source code.
   SourceLocation KindLoc;
+
   /// \brief Location of ',' (if any).
   SourceLocation CommaLoc;
+
   /// \brief Chunk size.
-  Expr *ChunkSize;
+  Expr *ChunkSize = nullptr;
 
   /// \brief Set schedule kind.
   ///
   /// \param K Schedule kind.
-  ///
   void setDistScheduleKind(OpenMPDistScheduleClauseKind K) { Kind = K; }
+
   /// \brief Sets the location of '('.
   ///
   /// \param Loc Location of '('.
-  ///
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Set schedule kind start location.
   ///
   /// \param KLoc Schedule kind location.
-  ///
   void setDistScheduleKindLoc(SourceLocation KLoc) { KindLoc = KLoc; }
+
   /// \brief Set location of ','.
   ///
   /// \param Loc Location of ','.
-  ///
   void setCommaLoc(SourceLocation Loc) { CommaLoc = Loc; }
+
   /// \brief Set chunk size.
   ///
   /// \param E Chunk size.
-  ///
   void setChunkSize(Expr *E) { ChunkSize = E; }
 
 public:
@@ -4345,7 +4379,6 @@
   /// \param Kind DistSchedule kind.
   /// \param ChunkSize Chunk size.
   /// \param HelperChunkSize Helper chunk size for combined directives.
-  ///
   OMPDistScheduleClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                         SourceLocation KLoc, SourceLocation CommaLoc,
                         SourceLocation EndLoc,
@@ -4358,39 +4391,36 @@
   }
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPDistScheduleClause()
       : OMPClause(OMPC_dist_schedule, SourceLocation(), SourceLocation()),
-        OMPClauseWithPreInit(this), Kind(OMPC_DIST_SCHEDULE_unknown),
-        ChunkSize(nullptr) {}
+        OMPClauseWithPreInit(this) {}
 
   /// \brief Get kind of the clause.
-  ///
   OpenMPDistScheduleClauseKind getDistScheduleKind() const { return Kind; }
-  /// \brief Get location of '('.
-  ///
-  SourceLocation getLParenLoc() { return LParenLoc; }
-  /// \brief Get kind location.
-  ///
-  SourceLocation getDistScheduleKindLoc() { return KindLoc; }
-  /// \brief Get location of ','.
-  ///
-  SourceLocation getCommaLoc() { return CommaLoc; }
-  /// \brief Get chunk size.
-  ///
-  Expr *getChunkSize() { return ChunkSize; }
-  /// \brief Get chunk size.
-  ///
-  const Expr *getChunkSize() const { return ChunkSize; }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_dist_schedule;
-  }
+  /// \brief Get location of '('.
+  SourceLocation getLParenLoc() { return LParenLoc; }
+
+  /// \brief Get kind location.
+  SourceLocation getDistScheduleKindLoc() { return KindLoc; }
+
+  /// \brief Get location of ','.
+  SourceLocation getCommaLoc() { return CommaLoc; }
+
+  /// \brief Get chunk size.
+  Expr *getChunkSize() { return ChunkSize; }
+
+  /// \brief Get chunk size.
+  const Expr *getChunkSize() const { return ChunkSize; }
 
   child_range children() {
     return child_range(reinterpret_cast<Stmt **>(&ChunkSize),
                        reinterpret_cast<Stmt **>(&ChunkSize) + 1);
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_dist_schedule;
+  }
 };
 
 /// \brief This represents 'defaultmap' clause in the '#pragma omp ...' directive.
@@ -4400,46 +4430,49 @@
 /// \endcode
 /// In this example directive '#pragma omp target' has 'defaultmap' clause of kind
 /// 'scalar' with modifier 'tofrom'.
-///
 class OMPDefaultmapClause : public OMPClause {
   friend class OMPClauseReader;
+
   /// \brief Location of '('.
   SourceLocation LParenLoc;
+
   /// \brief Modifiers for 'defaultmap' clause.
-  OpenMPDefaultmapClauseModifier Modifier;
+  OpenMPDefaultmapClauseModifier Modifier = OMPC_DEFAULTMAP_MODIFIER_unknown;
+
   /// \brief Locations of modifiers.
   SourceLocation ModifierLoc;
+
   /// \brief A kind of the 'defaultmap' clause.
-  OpenMPDefaultmapClauseKind Kind;
+  OpenMPDefaultmapClauseKind Kind = OMPC_DEFAULTMAP_unknown;
+
   /// \brief Start location of the defaultmap kind in source code.
   SourceLocation KindLoc;
 
   /// \brief Set defaultmap kind.
   ///
   /// \param K Defaultmap kind.
-  ///
   void setDefaultmapKind(OpenMPDefaultmapClauseKind K) { Kind = K; }
+
   /// \brief Set the defaultmap modifier.
   ///
   /// \param M Defaultmap modifier.
-  ///
   void setDefaultmapModifier(OpenMPDefaultmapClauseModifier M) {
     Modifier = M;
   }
+
   /// \brief Set location of the defaultmap modifier.
-  ///
   void setDefaultmapModifierLoc(SourceLocation Loc) {
     ModifierLoc = Loc;
   }
+
   /// \brief Sets the location of '('.
   ///
   /// \param Loc Location of '('.
-  ///
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
   /// \brief Set defaultmap kind start location.
   ///
   /// \param KLoc Defaultmap kind location.
-  ///
   void setDefaultmapKindLoc(SourceLocation KLoc) { KindLoc = KLoc; }
 
 public:
@@ -4452,7 +4485,6 @@
   /// \param Kind Defaultmap kind.
   /// \param M The modifier applied to 'defaultmap' clause.
   /// \param MLoc Location of the modifier
-  ///
   OMPDefaultmapClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                       SourceLocation MLoc, SourceLocation KLoc,
                       SourceLocation EndLoc, OpenMPDefaultmapClauseKind Kind,
@@ -4461,39 +4493,35 @@
         Modifier(M), ModifierLoc(MLoc), Kind(Kind), KindLoc(KLoc) {}
 
   /// \brief Build an empty clause.
-  ///
   explicit OMPDefaultmapClause()
-      : OMPClause(OMPC_defaultmap, SourceLocation(), SourceLocation()),
-        Modifier(OMPC_DEFAULTMAP_MODIFIER_unknown),
-        Kind(OMPC_DEFAULTMAP_unknown) {}
+      : OMPClause(OMPC_defaultmap, SourceLocation(), SourceLocation()) {}
 
   /// \brief Get kind of the clause.
-  ///
   OpenMPDefaultmapClauseKind getDefaultmapKind() const { return Kind; }
+
   /// \brief Get the modifier of the clause.
-  ///
   OpenMPDefaultmapClauseModifier getDefaultmapModifier() const {
     return Modifier;
   }
+
   /// \brief Get location of '('.
-  ///
   SourceLocation getLParenLoc() { return LParenLoc; }
+
   /// \brief Get kind location.
-  ///
   SourceLocation getDefaultmapKindLoc() { return KindLoc; }
+
   /// \brief Get the modifier location.
-  ///
   SourceLocation getDefaultmapModifierLoc() const {
     return ModifierLoc;
   }
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_defaultmap;
-  }
-
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_defaultmap;
+  }
 };
 
 /// \brief This represents clause 'to' in the '#pragma omp ...'
@@ -4504,27 +4532,14 @@
 /// \endcode
 /// In this example directive '#pragma omp target update' has clause 'to'
 /// with the variables 'a' and 'b'.
-///
 class OMPToClause final : public OMPMappableExprListClause<OMPToClause>,
                           private llvm::TrailingObjects<
                               OMPToClause, Expr *, ValueDecl *, unsigned,
                               OMPClauseMappableExprCommon::MappableComponent> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
-  friend OMPMappableExprListClause;
   friend class OMPClauseReader;
-
-  /// Define the sizes of each trailing object array except the last one. This
-  /// is required for TrailingObjects to work properly.
-  size_t numTrailingObjects(OverloadToken<Expr *>) const {
-    return varlist_size();
-  }
-  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
-    return getUniqueDeclarationsNum();
-  }
-  size_t numTrailingObjects(OverloadToken<unsigned>) const {
-    return getUniqueDeclarationsNum() + getTotalComponentListNum();
-  }
+  friend OMPMappableExprListClause;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// \brief Build clause with number of variables \a NumVars.
   ///
@@ -4535,7 +4550,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPToClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                        SourceLocation EndLoc, unsigned NumVars,
                        unsigned NumUniqueDeclarations,
@@ -4551,13 +4565,24 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPToClause(unsigned NumVars, unsigned NumUniqueDeclarations,
                        unsigned NumComponentLists, unsigned NumComponents)
       : OMPMappableExprListClause(
             OMPC_to, SourceLocation(), SourceLocation(), SourceLocation(),
             NumVars, NumUniqueDeclarations, NumComponentLists, NumComponents) {}
 
+  /// Define the sizes of each trailing object array except the last one. This
+  /// is required for TrailingObjects to work properly.
+  size_t numTrailingObjects(OverloadToken<Expr *>) const {
+    return varlist_size();
+  }
+  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
+    return getUniqueDeclarationsNum();
+  }
+  size_t numTrailingObjects(OverloadToken<unsigned>) const {
+    return getUniqueDeclarationsNum() + getTotalComponentListNum();
+  }
+
 public:
   /// \brief Creates clause with a list of variables \a Vars.
   ///
@@ -4567,7 +4592,6 @@
   /// \param Vars The original expression used in the clause.
   /// \param Declarations Declarations used in the clause.
   /// \param ComponentLists Component lists used in the clause.
-  ///
   static OMPToClause *Create(const ASTContext &C, SourceLocation StartLoc,
                              SourceLocation LParenLoc, SourceLocation EndLoc,
                              ArrayRef<Expr *> Vars,
@@ -4583,20 +4607,19 @@
   /// \param NumComponentLists Number of unique base declarations in this
   /// clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   static OMPToClause *CreateEmpty(const ASTContext &C, unsigned NumVars,
                                   unsigned NumUniqueDeclarations,
                                   unsigned NumComponentLists,
                                   unsigned NumComponents);
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_to;
-  }
-
   child_range children() {
     return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
                        reinterpret_cast<Stmt **>(varlist_end()));
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_to;
+  }
 };
 
 /// \brief This represents clause 'from' in the '#pragma omp ...'
@@ -4607,28 +4630,15 @@
 /// \endcode
 /// In this example directive '#pragma omp target update' has clause 'from'
 /// with the variables 'a' and 'b'.
-///
 class OMPFromClause final
     : public OMPMappableExprListClause<OMPFromClause>,
       private llvm::TrailingObjects<
           OMPFromClause, Expr *, ValueDecl *, unsigned,
           OMPClauseMappableExprCommon::MappableComponent> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
-  friend OMPMappableExprListClause;
   friend class OMPClauseReader;
-
-  /// Define the sizes of each trailing object array except the last one. This
-  /// is required for TrailingObjects to work properly.
-  size_t numTrailingObjects(OverloadToken<Expr *>) const {
-    return varlist_size();
-  }
-  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
-    return getUniqueDeclarationsNum();
-  }
-  size_t numTrailingObjects(OverloadToken<unsigned>) const {
-    return getUniqueDeclarationsNum() + getTotalComponentListNum();
-  }
+  friend OMPMappableExprListClause;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// \brief Build clause with number of variables \a NumVars.
   ///
@@ -4639,7 +4649,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPFromClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                          SourceLocation EndLoc, unsigned NumVars,
                          unsigned NumUniqueDeclarations,
@@ -4655,13 +4664,24 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPFromClause(unsigned NumVars, unsigned NumUniqueDeclarations,
                          unsigned NumComponentLists, unsigned NumComponents)
       : OMPMappableExprListClause(
             OMPC_from, SourceLocation(), SourceLocation(), SourceLocation(),
             NumVars, NumUniqueDeclarations, NumComponentLists, NumComponents) {}
 
+  /// Define the sizes of each trailing object array except the last one. This
+  /// is required for TrailingObjects to work properly.
+  size_t numTrailingObjects(OverloadToken<Expr *>) const {
+    return varlist_size();
+  }
+  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
+    return getUniqueDeclarationsNum();
+  }
+  size_t numTrailingObjects(OverloadToken<unsigned>) const {
+    return getUniqueDeclarationsNum() + getTotalComponentListNum();
+  }
+
 public:
   /// \brief Creates clause with a list of variables \a Vars.
   ///
@@ -4671,7 +4691,6 @@
   /// \param Vars The original expression used in the clause.
   /// \param Declarations Declarations used in the clause.
   /// \param ComponentLists Component lists used in the clause.
-  ///
   static OMPFromClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                SourceLocation LParenLoc, SourceLocation EndLoc,
                                ArrayRef<Expr *> Vars,
@@ -4687,20 +4706,19 @@
   /// \param NumComponentLists Number of unique base declarations in this
   /// clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   static OMPFromClause *CreateEmpty(const ASTContext &C, unsigned NumVars,
                                     unsigned NumUniqueDeclarations,
                                     unsigned NumComponentLists,
                                     unsigned NumComponents);
 
-  static bool classof(const OMPClause *T) {
-    return T->getClauseKind() == OMPC_from;
-  }
-
   child_range children() {
     return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
                        reinterpret_cast<Stmt **>(varlist_end()));
   }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_from;
+  }
 };
 
 /// This represents clause 'use_device_ptr' in the '#pragma omp ...'
@@ -4711,28 +4729,15 @@
 /// \endcode
 /// In this example directive '#pragma omp target data' has clause
 /// 'use_device_ptr' with the variables 'a' and 'b'.
-///
 class OMPUseDevicePtrClause final
     : public OMPMappableExprListClause<OMPUseDevicePtrClause>,
       private llvm::TrailingObjects<
           OMPUseDevicePtrClause, Expr *, ValueDecl *, unsigned,
           OMPClauseMappableExprCommon::MappableComponent> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
-  friend OMPMappableExprListClause;
   friend class OMPClauseReader;
-
-  /// Define the sizes of each trailing object array except the last one. This
-  /// is required for TrailingObjects to work properly.
-  size_t numTrailingObjects(OverloadToken<Expr *>) const {
-    return 3 * varlist_size();
-  }
-  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
-    return getUniqueDeclarationsNum();
-  }
-  size_t numTrailingObjects(OverloadToken<unsigned>) const {
-    return getUniqueDeclarationsNum() + getTotalComponentListNum();
-  }
+  friend OMPMappableExprListClause;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
   /// Build clause with number of variables \a NumVars.
   ///
@@ -4743,7 +4748,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPUseDevicePtrClause(SourceLocation StartLoc,
                                  SourceLocation LParenLoc,
                                  SourceLocation EndLoc, unsigned NumVars,
@@ -4761,7 +4765,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPUseDevicePtrClause(unsigned NumVars,
                                  unsigned NumUniqueDeclarations,
                                  unsigned NumComponentLists,
@@ -4771,6 +4774,18 @@
                                   NumUniqueDeclarations, NumComponentLists,
                                   NumComponents) {}
 
+  /// Define the sizes of each trailing object array except the last one. This
+  /// is required for TrailingObjects to work properly.
+  size_t numTrailingObjects(OverloadToken<Expr *>) const {
+    return 3 * varlist_size();
+  }
+  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
+    return getUniqueDeclarationsNum();
+  }
+  size_t numTrailingObjects(OverloadToken<unsigned>) const {
+    return getUniqueDeclarationsNum() + getTotalComponentListNum();
+  }
+
   /// Sets the list of references to private copies with initializers for new
   /// private variables.
   /// \param VL List of references.
@@ -4810,7 +4825,6 @@
   /// \param Inits Expressions referring to private copy initializers.
   /// \param Declarations Declarations used in the clause.
   /// \param ComponentLists Component lists used in the clause.
-  ///
   static OMPUseDevicePtrClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> Vars,
@@ -4827,36 +4841,37 @@
   /// \param NumComponentLists Number of unique base declarations in this
   /// clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C,
                                             unsigned NumVars,
                                             unsigned NumUniqueDeclarations,
                                             unsigned NumComponentLists,
                                             unsigned NumComponents);
 
-  typedef MutableArrayRef<Expr *>::iterator private_copies_iterator;
-  typedef ArrayRef<const Expr *>::iterator private_copies_const_iterator;
-  typedef llvm::iterator_range<private_copies_iterator> private_copies_range;
-  typedef llvm::iterator_range<private_copies_const_iterator>
-      private_copies_const_range;
+  using private_copies_iterator = MutableArrayRef<Expr *>::iterator;
+  using private_copies_const_iterator = ArrayRef<const Expr *>::iterator;
+  using private_copies_range = llvm::iterator_range<private_copies_iterator>;
+  using private_copies_const_range =
+      llvm::iterator_range<private_copies_const_iterator>;
 
   private_copies_range private_copies() {
     return private_copies_range(getPrivateCopies().begin(),
                                 getPrivateCopies().end());
   }
+
   private_copies_const_range private_copies() const {
     return private_copies_const_range(getPrivateCopies().begin(),
                                       getPrivateCopies().end());
   }
 
-  typedef MutableArrayRef<Expr *>::iterator inits_iterator;
-  typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
-  typedef llvm::iterator_range<inits_iterator> inits_range;
-  typedef llvm::iterator_range<inits_const_iterator> inits_const_range;
+  using inits_iterator = MutableArrayRef<Expr *>::iterator;
+  using inits_const_iterator = ArrayRef<const Expr *>::iterator;
+  using inits_range = llvm::iterator_range<inits_iterator>;
+  using inits_const_range = llvm::iterator_range<inits_const_iterator>;
 
   inits_range inits() {
     return inits_range(getInits().begin(), getInits().end());
   }
+
   inits_const_range inits() const {
     return inits_const_range(getInits().begin(), getInits().end());
   }
@@ -4879,28 +4894,16 @@
 /// \endcode
 /// In this example directive '#pragma omp target' has clause
 /// 'is_device_ptr' with the variables 'a' and 'b'.
-///
 class OMPIsDevicePtrClause final
     : public OMPMappableExprListClause<OMPIsDevicePtrClause>,
       private llvm::TrailingObjects<
           OMPIsDevicePtrClause, Expr *, ValueDecl *, unsigned,
           OMPClauseMappableExprCommon::MappableComponent> {
-  friend TrailingObjects;
-  friend OMPVarListClause;
-  friend OMPMappableExprListClause;
   friend class OMPClauseReader;
+  friend OMPMappableExprListClause;
+  friend OMPVarListClause;
+  friend TrailingObjects;
 
-  /// Define the sizes of each trailing object array except the last one. This
-  /// is required for TrailingObjects to work properly.
-  size_t numTrailingObjects(OverloadToken<Expr *>) const {
-    return varlist_size();
-  }
-  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
-    return getUniqueDeclarationsNum();
-  }
-  size_t numTrailingObjects(OverloadToken<unsigned>) const {
-    return getUniqueDeclarationsNum() + getTotalComponentListNum();
-  }
   /// Build clause with number of variables \a NumVars.
   ///
   /// \param StartLoc Starting location of the clause.
@@ -4910,7 +4913,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPIsDevicePtrClause(SourceLocation StartLoc,
                                 SourceLocation LParenLoc, SourceLocation EndLoc,
                                 unsigned NumVars,
@@ -4928,7 +4930,6 @@
   /// clause.
   /// \param NumComponentLists Number of component lists in this clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   explicit OMPIsDevicePtrClause(unsigned NumVars,
                                 unsigned NumUniqueDeclarations,
                                 unsigned NumComponentLists,
@@ -4938,6 +4939,18 @@
                                   NumUniqueDeclarations, NumComponentLists,
                                   NumComponents) {}
 
+  /// Define the sizes of each trailing object array except the last one. This
+  /// is required for TrailingObjects to work properly.
+  size_t numTrailingObjects(OverloadToken<Expr *>) const {
+    return varlist_size();
+  }
+  size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
+    return getUniqueDeclarationsNum();
+  }
+  size_t numTrailingObjects(OverloadToken<unsigned>) const {
+    return getUniqueDeclarationsNum() + getTotalComponentListNum();
+  }
+
 public:
   /// Creates clause with a list of variables \a Vars.
   ///
@@ -4947,7 +4960,6 @@
   /// \param Vars The original expression used in the clause.
   /// \param Declarations Declarations used in the clause.
   /// \param ComponentLists Component lists used in the clause.
-  ///
   static OMPIsDevicePtrClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation EndLoc, ArrayRef<Expr *> Vars,
@@ -4963,7 +4975,6 @@
   /// \param NumComponentLists Number of unique base declarations in this
   /// clause.
   /// \param NumComponents Total number of expression components in the clause.
-  ///
   static OMPIsDevicePtrClause *CreateEmpty(const ASTContext &C,
                                            unsigned NumVars,
                                            unsigned NumUniqueDeclarations,
@@ -4979,6 +4990,7 @@
     return T->getClauseKind() == OMPC_is_device_ptr;
   }
 };
-} // end namespace clang
+
+} // namespace clang
 
 #endif // LLVM_CLANG_AST_OPENMPCLAUSE_H
diff --git a/include/clang/AST/OperationKinds.def b/include/clang/AST/OperationKinds.def
index 2d48a7d..b13cf3b 100644
--- a/include/clang/AST/OperationKinds.def
+++ b/include/clang/AST/OperationKinds.def
@@ -327,12 +327,13 @@
 // Convert a pointer to a different address space.
 CAST_OPERATION(AddressSpaceConversion)
 
-// Convert an integer initializer to an OpenCL sampler.
+// Convert an integer initializer to an OpenCL sampler.
 CAST_OPERATION(IntToOCLSampler)
 
 //===- Binary Operations  -------------------------------------------------===//
 // Operators listed in order of precedence.
-// Note that additions to this should also update the StmtVisitor class.
+// Note that additions to this should also update the StmtVisitor class and
+// BinaryOperator::getOverloadedOperator.
 
 // [C++ 5.5] Pointer-to-member operators.
 BINARY_OPERATION(PtrMemD, ".*")
@@ -347,6 +348,8 @@
 // [C99 6.5.7] Bitwise shift operators.
 BINARY_OPERATION(Shl, "<<")
 BINARY_OPERATION(Shr, ">>")
+// C++20 [expr.spaceship] Three-way comparison operator.
+BINARY_OPERATION(Cmp, "<=>")
 // [C99 6.5.8] Relational operators.
 BINARY_OPERATION(LT, "<")
 BINARY_OPERATION(GT, ">")
@@ -382,7 +385,8 @@
 
 
 //===- Unary Operations ---------------------------------------------------===//
-// Note that additions to this should also update the StmtVisitor class.
+// Note that additions to this should also update the StmtVisitor class and
+// UnaryOperator::getOverloadedOperator.
 
 // [C99 6.5.2.4] Postfix increment and decrement
 UNARY_OPERATION(PostInc, "++")
diff --git a/include/clang/AST/OperationKinds.h b/include/clang/AST/OperationKinds.h
index 00f060f..e383268 100644
--- a/include/clang/AST/OperationKinds.h
+++ b/include/clang/AST/OperationKinds.h
@@ -23,8 +23,6 @@
 #include "clang/AST/OperationKinds.def"
 };
 
-static const CastKind CK_Invalid = static_cast<CastKind>(-1);
-
 enum BinaryOperatorKind {
 #define BINARY_OPERATION(Name, Spelling) BO_##Name,
 #include "clang/AST/OperationKinds.def"
diff --git a/include/clang/AST/PrettyPrinter.h b/include/clang/AST/PrettyPrinter.h
index 54fe398..e831b90 100644
--- a/include/clang/AST/PrettyPrinter.h
+++ b/include/clang/AST/PrettyPrinter.h
@@ -30,8 +30,8 @@
   virtual bool handledStmt(Stmt* E, raw_ostream& OS) = 0;
 };
 
-/// \brief Describes how types, statements, expressions, and
-/// declarations should be printed.
+/// Describes how types, statements, expressions, and declarations should be
+/// printed.
 ///
 /// This type is intended to be small and suitable for passing by value.
 /// It is very frequently copied.
@@ -51,22 +51,23 @@
       TerseOutput(false), PolishForDeclaration(false),
       Half(LO.Half), MSWChar(LO.MicrosoftExt && !LO.WChar),
       IncludeNewlines(true), MSVCFormatting(false),
-      ConstantsAsWritten(false) { }
+      ConstantsAsWritten(false), SuppressImplicitBase(false),
+      FullyQualifiedName(false) { }
 
-  /// \brief Adjust this printing policy for cases where it's known that
-  /// we're printing C++ code (for instance, if AST dumping reaches a
-  /// C++-only construct). This should not be used if a real LangOptions
-  /// object is available.
+  /// Adjust this printing policy for cases where it's known that we're
+  /// printing C++ code (for instance, if AST dumping reaches a C++-only
+  /// construct). This should not be used if a real LangOptions object is
+  /// available.
   void adjustForCPlusPlus() {
     SuppressTagKeyword = true;
     Bool = true;
     UseVoidForZeroParams = false;
   }
 
-  /// \brief The number of spaces to use to indent each line.
+  /// The number of spaces to use to indent each line.
   unsigned Indentation : 8;
 
-  /// \brief Whether we should suppress printing of the actual specifiers for
+  /// Whether we should suppress printing of the actual specifiers for
   /// the given type or declaration.
   ///
   /// This flag is only used when we are printing declarators beyond
@@ -82,7 +83,7 @@
   /// "const int" type specifier and instead only print the "*y".
   bool SuppressSpecifiers : 1;
 
-  /// \brief Whether type printing should skip printing the tag keyword.
+  /// Whether type printing should skip printing the tag keyword.
   ///
   /// This is used when printing the inner type of elaborated types,
   /// (as the tag keyword is part of the elaborated type):
@@ -92,7 +93,7 @@
   /// \endcode
   bool SuppressTagKeyword : 1;
 
-  /// \brief When true, include the body of a tag definition.
+  /// When true, include the body of a tag definition.
   ///
   /// This is used to place the definition of a struct
   /// in the middle of another declaration as with:
@@ -102,14 +103,14 @@
   /// \endcode
   bool IncludeTagDefinition : 1;
 
-  /// \brief Suppresses printing of scope specifiers.
+  /// Suppresses printing of scope specifiers.
   bool SuppressScope : 1;
 
-  /// \brief Suppress printing parts of scope specifiers that don't need
+  /// Suppress printing parts of scope specifiers that don't need
   /// to be written, e.g., for inline or anonymous namespaces.
   bool SuppressUnwrittenScope : 1;
   
-  /// \brief Suppress printing of variable initializers.
+  /// Suppress printing of variable initializers.
   ///
   /// This flag is used when printing the loop variable in a for-range
   /// statement. For example, given:
@@ -122,8 +123,8 @@
   /// internal initializer constructed for x will not be printed.
   bool SuppressInitializers : 1;
 
-  /// \brief Whether we should print the sizes of constant array expressions
-  /// as written in the sources.
+  /// Whether we should print the sizes of constant array expressions as written
+  /// in the sources.
   ///
   /// This flag determines whether array types declared as
   ///
@@ -140,69 +141,67 @@
   /// \endcode
   bool ConstantArraySizeAsWritten : 1;
   
-  /// \brief When printing an anonymous tag name, also print the location of
-  /// that entity (e.g., "enum <anonymous at t.h:10:5>"). Otherwise, just 
-  /// prints "(anonymous)" for the name.
+  /// When printing an anonymous tag name, also print the location of that
+  /// entity (e.g., "enum <anonymous at t.h:10:5>"). Otherwise, just prints
+  /// "(anonymous)" for the name.
   bool AnonymousTagLocations : 1;
   
-  /// \brief When true, suppress printing of the __strong lifetime qualifier in
-  /// ARC.
+  /// When true, suppress printing of the __strong lifetime qualifier in ARC.
   unsigned SuppressStrongLifetime : 1;
   
-  /// \brief When true, suppress printing of lifetime qualifier in
-  /// ARC.
+  /// When true, suppress printing of lifetime qualifier in ARC.
   unsigned SuppressLifetimeQualifiers : 1;
 
   /// When true, suppresses printing template arguments in names of C++
   /// constructors.
   unsigned SuppressTemplateArgsInCXXConstructors : 1;
 
-  /// \brief Whether we can use 'bool' rather than '_Bool' (even if the language
+  /// Whether we can use 'bool' rather than '_Bool' (even if the language
   /// doesn't actually have 'bool', because, e.g., it is defined as a macro).
   unsigned Bool : 1;
 
-  /// \brief Whether we can use 'restrict' rather than '__restrict'.
+  /// Whether we can use 'restrict' rather than '__restrict'.
   unsigned Restrict : 1;
 
-  /// \brief Whether we can use 'alignof' rather than '__alignof'.
+  /// Whether we can use 'alignof' rather than '__alignof'.
   unsigned Alignof : 1;
 
-  /// \brief Whether we can use '_Alignof' rather than '__alignof'.
+  /// Whether we can use '_Alignof' rather than '__alignof'.
   unsigned UnderscoreAlignof : 1;
 
-  /// \brief Whether we should use '(void)' rather than '()' for a function
-  /// prototype with zero parameters.
+  /// Whether we should use '(void)' rather than '()' for a function prototype
+  /// with zero parameters.
   unsigned UseVoidForZeroParams : 1;
 
-  /// \brief Provide a 'terse' output.
+  /// Provide a 'terse' output.
   ///
   /// For example, in this mode we don't print function bodies, class members,
   /// declarations inside namespaces etc.  Effectively, this should print
   /// only the requested declaration.
   unsigned TerseOutput : 1;
   
-  /// \brief When true, do certain refinement needed for producing proper
-  /// declaration tag; such as, do not print attributes attached to the declaration.
+  /// When true, do certain refinement needed for producing proper declaration
+  /// tag; such as, do not print attributes attached to the declaration.
   ///
   unsigned PolishForDeclaration : 1;
 
-  /// \brief When true, print the half-precision floating-point type as 'half'
+  /// When true, print the half-precision floating-point type as 'half'
   /// instead of '__fp16'
   unsigned Half : 1;
 
-  /// \brief When true, print the built-in wchar_t type as __wchar_t. For use in
+  /// When true, print the built-in wchar_t type as __wchar_t. For use in
   /// Microsoft mode when wchar_t is not available.
   unsigned MSWChar : 1;
 
-  /// \brief When true, include newlines after statements like "break", etc.
+  /// When true, include newlines after statements like "break", etc.
   unsigned IncludeNewlines : 1;
 
-  /// \brief Use whitespace and punctuation like MSVC does. In particular, this
-  /// prints anonymous namespaces as `anonymous namespace' and does not insert
-  /// spaces after template arguments.
+  /// Use whitespace and punctuation like MSVC does. In particular, this prints
+  /// anonymous namespaces as `anonymous namespace' and does not insert spaces
+  /// after template arguments.
   bool MSVCFormatting : 1;
 
-  /// \brief Whether we should print the constant expressions as written in the
+  /// Whether we should print the constant expressions as written in the
   /// sources.
   ///
   /// This flag determines whether constants expressions like
@@ -218,7 +217,14 @@
   /// 0x10
   /// 2.5e3
   /// \endcode
-  bool ConstantsAsWritten;
+  bool ConstantsAsWritten : 1;
+
+  /// When true, don't print the implicit 'self' or 'this' expressions.
+  bool SuppressImplicitBase : 1;
+
+  /// When true, print the fully qualified name of function declarations.
+  /// This is the opposite of SuppressScope and thus overrules it.
+  bool FullyQualifiedName : 1;
 };
 
 } // end namespace clang
diff --git a/include/clang/AST/QualTypeNames.h b/include/clang/AST/QualTypeNames.h
new file mode 100644
index 0000000..86d805f
--- /dev/null
+++ b/include/clang/AST/QualTypeNames.h
@@ -0,0 +1,90 @@
+//===--- QualTypeNames.h - Generate Complete QualType Names ----*- C++ -*-===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// ===----------------------------------------------------------------------===//
+//
+// \file
+// Functionality to generate the fully-qualified names of QualTypes,
+// including recursively expanding any subtypes and template
+// parameters.
+//
+// More precisely: Generates a name that can be used to name the same
+// type if used at the end of the current translation unit--with
+// certain limitations. See below.
+//
+// This code desugars names only very minimally, so in this code:
+//
+// namespace A {
+//   struct X {};
+// }
+// using A::X;
+// namespace B {
+//   using std::tuple;
+//   typedef tuple<X> TX;
+//   TX t;
+// }
+//
+// B::t's type is reported as "B::TX", rather than std::tuple<A::X>.
+//
+// Also, this code replaces types found via using declarations with
+// their more qualified name, so for the code:
+//
+// using std::tuple;
+// tuple<int> TInt;
+//
+// TInt's type will be named, "std::tuple<int>".
+//
+// Limitations:
+//
+// Some types have ambiguous names at the end of a translation unit,
+// are not namable at all there, or are special cases in other ways.
+//
+// 1) Types with only local scope will have their local names:
+//
+// void foo() {
+//   struct LocalType {} LocalVar;
+// }
+//
+// LocalVar's type will be named, "struct LocalType", without any
+// qualification.
+//
+// 2) Types that have been shadowed are reported normally, but a
+// client using that name at the end of the translation unit will be
+// referring to a different type.
+//
+// ===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_QUALTYPENAMES_H
+#define LLVM_CLANG_AST_QUALTYPENAMES_H
+
+#include "clang/AST/ASTContext.h"
+
+namespace clang {
+namespace TypeName {
+/// \brief Get the fully qualified name for a type. This includes full
+/// qualification of all template parameters etc.
+///
+/// \param[in] QT - the type for which the fully qualified name will be
+/// returned.
+/// \param[in] Ctx - the ASTContext to be used.
+/// \param[in] WithGlobalNsPrefix - If true, then the global namespace
+/// specifier "::" will be prepended to the fully qualified name.
+std::string getFullyQualifiedName(QualType QT, const ASTContext &Ctx,
+                                  bool WithGlobalNsPrefix = false);
+
+/// \brief Generates a QualType that can be used to name the same type
+/// if used at the end of the current translation unit. This ignores
+/// issues such as type shadowing.
+///
+/// \param[in] QT - the type for which the fully qualified type will be
+/// returned.
+/// \param[in] Ctx - the ASTContext to be used.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
+                               bool WithGlobalNsPrefix = false);
+} // end namespace TypeName
+} // end namespace clang
+#endif // LLVM_CLANG_TOOLING_CORE_QUALTYPENAMES_H
diff --git a/include/clang/AST/RawCommentList.h b/include/clang/AST/RawCommentList.h
index 2e005dd..9c931e0 100644
--- a/include/clang/AST/RawCommentList.h
+++ b/include/clang/AST/RawCommentList.h
@@ -41,7 +41,7 @@
   RawComment() : Kind(RCK_Invalid), IsAlmostTrailingComment(false) { }
 
   RawComment(const SourceManager &SourceMgr, SourceRange SR,
-             bool Merged, bool ParseAllComments);
+             const CommentOptions &CommentOpts, bool Merged);
 
   CommentKind getKind() const LLVM_READONLY {
     return (CommentKind) Kind;
@@ -70,7 +70,6 @@
   /// \code /**< stuff */ \endcode
   /// \code /*!< stuff */ \endcode
   bool isTrailingComment() const LLVM_READONLY {
-    assert(isDocumentation());
     return IsTrailingComment;
   }
 
@@ -83,8 +82,7 @@
 
   /// Returns true if this comment is not a documentation comment.
   bool isOrdinary() const LLVM_READONLY {
-    return ((Kind == RCK_OrdinaryBCPL) || (Kind == RCK_OrdinaryC)) &&
-        !ParseAllComments;
+    return ((Kind == RCK_OrdinaryBCPL) || (Kind == RCK_OrdinaryC));
   }
 
   /// Returns true if this comment any kind of a documentation comment.
@@ -92,11 +90,6 @@
     return !isInvalid() && !isOrdinary();
   }
 
-  /// Returns whether we are parsing all comments.
-  bool isParseAllComments() const LLVM_READONLY {
-    return ParseAllComments;
-  }
-
   /// Returns raw comment text with comment markers.
   StringRef getRawText(const SourceManager &SourceMgr) const {
     if (RawTextValid)
@@ -139,18 +132,12 @@
   bool IsTrailingComment : 1;
   bool IsAlmostTrailingComment : 1;
 
-  /// When true, ordinary comments starting with "//" and "/*" will be
-  /// considered as documentation comments.
-  bool ParseAllComments : 1;
-
   /// \brief Constructor for AST deserialization.
   RawComment(SourceRange SR, CommentKind K, bool IsTrailingComment,
-             bool IsAlmostTrailingComment,
-             bool ParseAllComments) :
+             bool IsAlmostTrailingComment) :
     Range(SR), RawTextValid(false), BriefTextValid(false), Kind(K),
     IsAttached(false), IsTrailingComment(IsTrailingComment),
-    IsAlmostTrailingComment(IsAlmostTrailingComment),
-    ParseAllComments(ParseAllComments)
+    IsAlmostTrailingComment(IsAlmostTrailingComment)
   { }
 
   StringRef getRawTextSlow(const SourceManager &SourceMgr) const;
@@ -183,7 +170,8 @@
 public:
   RawCommentList(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
 
-  void addComment(const RawComment &RC, llvm::BumpPtrAllocator &Allocator);
+  void addComment(const RawComment &RC, const CommentOptions &CommentOpts,
+                  llvm::BumpPtrAllocator &Allocator);
 
   ArrayRef<RawComment *> getComments() const {
     return Comments;
diff --git a/include/clang/AST/RecordLayout.h b/include/clang/AST/RecordLayout.h
index 7a39c3b..696d44e 100644
--- a/include/clang/AST/RecordLayout.h
+++ b/include/clang/AST/RecordLayout.h
@@ -1,4 +1,4 @@
-//===--- RecordLayout.h - Layout information for a struct/union -*- C++ -*-===//
+//===- RecordLayout.h - Layout information for a struct/union ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,15 +14,20 @@
 #ifndef LLVM_CLANG_AST_RECORDLAYOUT_H
 #define LLVM_CLANG_AST_RECORDLAYOUT_H
 
+#include "clang/AST/ASTVector.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include <cassert>
+#include <cstdint>
 
 namespace clang {
-  class ASTContext;
-  class FieldDecl;
-  class RecordDecl;
-  class CXXRecordDecl;
+
+class ASTContext;
+class CXXRecordDecl;
 
 /// ASTRecordLayout -
 /// This class contains layout information for one RecordDecl,
@@ -42,21 +47,21 @@
     /// Whether this virtual base requires a vtordisp field in the
     /// Microsoft ABI.  These fields are required for certain operations
     /// in constructors and destructors.
-    bool HasVtorDisp;
+    bool HasVtorDisp = false;
 
   public:
+    VBaseInfo() = default;
+    VBaseInfo(CharUnits VBaseOffset, bool hasVtorDisp)
+        : VBaseOffset(VBaseOffset), HasVtorDisp(hasVtorDisp) {}
+
     bool hasVtorDisp() const { return HasVtorDisp; }
-
-    VBaseInfo() : HasVtorDisp(false) {}
-
-    VBaseInfo(CharUnits VBaseOffset, bool hasVtorDisp) :
-     VBaseOffset(VBaseOffset), HasVtorDisp(hasVtorDisp) {}
   };
 
-  typedef llvm::DenseMap<const CXXRecordDecl *, VBaseInfo>
-    VBaseOffsetsMapTy;
+  using VBaseOffsetsMapTy = llvm::DenseMap<const CXXRecordDecl *, VBaseInfo>;
 
 private:
+  friend class ASTContext;
+
   /// Size - Size of record in characters.
   CharUnits Size;
 
@@ -117,7 +122,7 @@
     const CXXRecordDecl *BaseSharingVBPtr;
     
     /// FIXME: This should really use a SmallPtrMap, once we have one in LLVM :)
-    typedef llvm::DenseMap<const CXXRecordDecl *, CharUnits> BaseOffsetsMapTy;
+    using BaseOffsetsMapTy = llvm::DenseMap<const CXXRecordDecl *, CharUnits>;
     
     /// BaseOffsets - Contains a map from base classes to their offset.
     BaseOffsetsMapTy BaseOffsets;
@@ -128,16 +133,15 @@
 
   /// CXXInfo - If the record layout is for a C++ record, this will have
   /// C++ specific information about the record.
-  CXXRecordLayoutInfo *CXXInfo;
-
-  friend class ASTContext;
+  CXXRecordLayoutInfo *CXXInfo = nullptr;
 
   ASTRecordLayout(const ASTContext &Ctx, CharUnits size, CharUnits alignment,
                   CharUnits requiredAlignment, CharUnits datasize,
                   ArrayRef<uint64_t> fieldoffsets);
 
+  using BaseOffsetsMapTy = CXXRecordLayoutInfo::BaseOffsetsMapTy;
+
   // Constructor for C++ records.
-  typedef CXXRecordLayoutInfo::BaseOffsetsMapTy BaseOffsetsMapTy;
   ASTRecordLayout(const ASTContext &Ctx,
                   CharUnits size, CharUnits alignment,
                   CharUnits requiredAlignment,
@@ -159,9 +163,9 @@
 
   void Destroy(ASTContext &Ctx);
   
-  ASTRecordLayout(const ASTRecordLayout &) = delete;
-  void operator=(const ASTRecordLayout &) = delete;
 public:
+  ASTRecordLayout(const ASTRecordLayout &) = delete;
+  ASTRecordLayout &operator=(const ASTRecordLayout &) = delete;
 
   /// getAlignment - Get the record alignment in characters.
   CharUnits getAlignment() const { return Alignment; }
@@ -305,6 +309,6 @@
   }
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_RECORDLAYOUT_H
diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h
index 7fb5221..ba76ea0 100644
--- a/include/clang/AST/RecursiveASTVisitor.h
+++ b/include/clang/AST/RecursiveASTVisitor.h
@@ -63,8 +63,8 @@
   OPERATOR(PtrMemD) OPERATOR(PtrMemI) OPERATOR(Mul) OPERATOR(Div)              \
       OPERATOR(Rem) OPERATOR(Add) OPERATOR(Sub) OPERATOR(Shl) OPERATOR(Shr)    \
       OPERATOR(LT) OPERATOR(GT) OPERATOR(LE) OPERATOR(GE) OPERATOR(EQ)         \
-      OPERATOR(NE) OPERATOR(And) OPERATOR(Xor) OPERATOR(Or) OPERATOR(LAnd)     \
-      OPERATOR(LOr) OPERATOR(Assign) OPERATOR(Comma)
+      OPERATOR(NE) OPERATOR(Cmp) OPERATOR(And) OPERATOR(Xor) OPERATOR(Or)      \
+      OPERATOR(LAnd) OPERATOR(LOr) OPERATOR(Assign) OPERATOR(Comma)
 
 // All compound assign operators.
 #define CAO_LIST()                                                             \
diff --git a/include/clang/AST/Redeclarable.h b/include/clang/AST/Redeclarable.h
index 89a9d3c..86b0f35 100644
--- a/include/clang/AST/Redeclarable.h
+++ b/include/clang/AST/Redeclarable.h
@@ -1,4 +1,4 @@
-//===-- Redeclarable.h - Base for Decls that can be redeclared -*- C++ -*-====//
+//===- Redeclarable.h - Base for Decls that can be redeclared --*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,11 +15,18 @@
 #define LLVM_CLANG_AST_REDECLARABLE_H
 
 #include "clang/AST/ExternalASTSource.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstddef>
 #include <iterator>
 
 namespace clang {
+
 class ASTContext;
+class Decl;
 
 // Some notes on redeclarables:
 //
@@ -82,21 +89,21 @@
   class DeclLink {
     /// A pointer to a known latest declaration, either statically known or
     /// generationally updated as decls are added by an external source.
-    typedef LazyGenerationalUpdatePtr<const Decl*, Decl*,
-                                      &ExternalASTSource::CompleteRedeclChain>
-                                          KnownLatest;
+    using KnownLatest =
+        LazyGenerationalUpdatePtr<const Decl *, Decl *,
+                                  &ExternalASTSource::CompleteRedeclChain>;
 
     /// We store a pointer to the ASTContext in the UninitializedLatest
     /// pointer, but to avoid circular type dependencies when we steal the low
     /// bits of this pointer, we use a raw void* here.
-    typedef const void *UninitializedLatest;
+    using UninitializedLatest = const void *;
 
-    typedef Decl *Previous;
+    using Previous = Decl *;
 
     /// A pointer to either an uninitialized latest declaration (where either
     /// we've not yet set the previous decl or there isn't one), or to a known
     /// previous declaration.
-    typedef llvm::PointerUnion<Previous, UninitializedLatest> NotKnownLatest;
+    using NotKnownLatest = llvm::PointerUnion<Previous, UninitializedLatest>;
 
     mutable llvm::PointerUnion<NotKnownLatest, KnownLatest> Next;
 
@@ -106,8 +113,7 @@
 
     DeclLink(LatestTag, const ASTContext &Ctx)
         : Next(NotKnownLatest(reinterpret_cast<UninitializedLatest>(&Ctx))) {}
-    DeclLink(PreviousTag, decl_type *D)
-        : Next(NotKnownLatest(Previous(D))) {}
+    DeclLink(PreviousTag, decl_type *D) : Next(NotKnownLatest(Previous(D))) {}
 
     bool NextIsPrevious() const {
       return Next.is<NotKnownLatest>() &&
@@ -182,6 +188,7 @@
   ///
   /// If there is only one declaration, it is <pointer to self, true>
   DeclLink RedeclLink;
+
   decl_type *First;
 
   decl_type *getNextRedeclaration() const {
@@ -189,8 +196,12 @@
   }
 
 public:
- Redeclarable(const ASTContext &Ctx)
-     : RedeclLink(LatestDeclLink(Ctx)), First(static_cast<decl_type *>(this)) {}
+  friend class ASTDeclReader;
+  friend class ASTDeclWriter;
+
+  Redeclarable(const ASTContext &Ctx)
+      : RedeclLink(LatestDeclLink(Ctx)),
+        First(static_cast<decl_type *>(this)) {}
 
   /// \brief Return the previous declaration of this declaration or NULL if this
   /// is the first declaration.
@@ -232,20 +243,19 @@
   /// \brief Iterates through all the redeclarations of the same decl.
   class redecl_iterator {
     /// Current - The current declaration.
-    decl_type *Current;
+    decl_type *Current = nullptr;
     decl_type *Starter;
-    bool PassedFirst;
+    bool PassedFirst = false;
 
   public:
-    typedef decl_type*                value_type;
-    typedef decl_type*                reference;
-    typedef decl_type*                pointer;
-    typedef std::forward_iterator_tag iterator_category;
-    typedef std::ptrdiff_t            difference_type;
+    using value_type = decl_type *;
+    using reference = decl_type *;
+    using pointer = decl_type *;
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
 
-    redecl_iterator() : Current(nullptr) { }
-    explicit redecl_iterator(decl_type *C)
-      : Current(C), Starter(C), PassedFirst(false) { }
+    redecl_iterator() = default;
+    explicit redecl_iterator(decl_type *C) : Current(C), Starter(C) {}
 
     reference operator*() const { return Current; }
     pointer operator->() const { return Current; }
@@ -282,7 +292,7 @@
     }
   };
 
-  typedef llvm::iterator_range<redecl_iterator> redecl_range;
+  using redecl_range = llvm::iterator_range<redecl_iterator>;
 
   /// \brief Returns an iterator range for all the redeclarations of the same
   /// decl. It will iterate at least once (when this decl is the only one).
@@ -294,9 +304,6 @@
 
   redecl_iterator redecls_begin() const { return redecls().begin(); }
   redecl_iterator redecls_end() const { return redecls().end(); }
-
-  friend class ASTDeclReader;
-  friend class ASTDeclWriter;
 };
 
 /// \brief Get the primary declaration for a declaration from an AST file. That
@@ -309,7 +316,7 @@
 template<typename decl_type>
 class Mergeable {
 public:
-  Mergeable() {}
+  Mergeable() = default;
 
   /// \brief Return the first declaration of this declaration or itself if this
   /// is the only declaration.
@@ -344,7 +351,7 @@
 /// remember to call getCanonicalDecl() everywhere.
 template <typename decl_type> class CanonicalDeclPtr {
 public:
-  CanonicalDeclPtr() : Ptr(nullptr) {}
+  CanonicalDeclPtr() = default;
   CanonicalDeclPtr(decl_type *Ptr)
       : Ptr(Ptr ? Ptr->getCanonicalDecl() : nullptr) {}
   CanonicalDeclPtr(const CanonicalDeclPtr &) = default;
@@ -362,11 +369,13 @@
 private:
   friend struct llvm::DenseMapInfo<CanonicalDeclPtr<decl_type>>;
 
-  decl_type *Ptr;
+  decl_type *Ptr = nullptr;
 };
+
 } // namespace clang
 
 namespace llvm {
+
 template <typename decl_type>
 struct DenseMapInfo<clang::CanonicalDeclPtr<decl_type>> {
   using CanonicalDeclPtr = clang::CanonicalDeclPtr<decl_type>;
@@ -395,6 +404,7 @@
     return BaseInfo::isEqual(LHS, RHS);
   }
 };
+
 } // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_AST_REDECLARABLE_H
diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h
index 795f4d6..b27dbfa 100644
--- a/include/clang/AST/Stmt.h
+++ b/include/clang/AST/Stmt.h
@@ -1,4 +1,4 @@
-//===--- Stmt.h - Classes for representing statements -----------*- C++ -*-===//
+//===- Stmt.h - Classes for representing statements -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,34 +22,40 @@
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
 #include <string>
 
 namespace llvm {
-  class FoldingSetNodeID;
-}
+
+class FoldingSetNodeID;
+
+} // namespace llvm
 
 namespace clang {
-  class ASTContext;
-  class Attr;
-  class CapturedDecl;
-  class Decl;
-  class Expr;
-  class IdentifierInfo;
-  class LabelDecl;
-  class ODRHash;
-  class ParmVarDecl;
-  class PrinterHelper;
-  struct PrintingPolicy;
-  class QualType;
-  class RecordDecl;
-  class SourceManager;
-  class StringLiteral;
-  class SwitchStmt;
-  class Token;
-  class VarDecl;
+
+class ASTContext;
+class Attr;
+class CapturedDecl;
+class Decl;
+class Expr;
+class LabelDecl;
+class ODRHash;
+class PrinterHelper;
+struct PrintingPolicy;
+class RecordDecl;
+class SourceManager;
+class StringLiteral;
+class Token;
+class VarDecl;
 
 //===----------------------------------------------------------------------===//
 // AST classes for statements.
@@ -72,9 +78,13 @@
 
   // Make vanilla 'new' and 'delete' illegal for Stmts.
 protected:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   void *operator new(size_t bytes) noexcept {
     llvm_unreachable("Stmts cannot be allocated with regular 'new'.");
   }
+
   void operator delete(void *data) noexcept {
     llvm_unreachable("Stmts cannot be released with regular 'delete'.");
   }
@@ -89,6 +99,7 @@
 
   class CompoundStmtBitfields {
     friend class CompoundStmt;
+
     unsigned : NumStmtBits;
 
     unsigned NumStmts : 32 - NumStmtBits;
@@ -96,34 +107,36 @@
 
   class IfStmtBitfields {
     friend class IfStmt;
+
     unsigned : NumStmtBits;
 
     unsigned IsConstexpr : 1;
   };
 
   class ExprBitfields {
-    friend class Expr;
-    friend class DeclRefExpr; // computeDependence
-    friend class InitListExpr; // ctor
-    friend class DesignatedInitExpr; // ctor
-    friend class BlockDeclRefExpr; // ctor
     friend class ASTStmtReader; // deserialization
-    friend class CXXNewExpr; // ctor
-    friend class DependentScopeDeclRefExpr; // ctor
-    friend class CXXConstructExpr; // ctor
+    friend class AtomicExpr; // ctor
+    friend class BlockDeclRefExpr; // ctor
     friend class CallExpr; // ctor
-    friend class OffsetOfExpr; // ctor
-    friend class ObjCMessageExpr; // ctor
+    friend class CXXConstructExpr; // ctor
+    friend class CXXDependentScopeMemberExpr; // ctor
+    friend class CXXNewExpr; // ctor
+    friend class CXXUnresolvedConstructExpr; // ctor
+    friend class DeclRefExpr; // computeDependence
+    friend class DependentScopeDeclRefExpr; // ctor
+    friend class DesignatedInitExpr; // ctor
+    friend class Expr;
+    friend class InitListExpr; // ctor
     friend class ObjCArrayLiteral; // ctor
     friend class ObjCDictionaryLiteral; // ctor
-    friend class ShuffleVectorExpr; // ctor
-    friend class ParenListExpr; // ctor
-    friend class CXXUnresolvedConstructExpr; // ctor
-    friend class CXXDependentScopeMemberExpr; // ctor
-    friend class OverloadExpr; // ctor
-    friend class PseudoObjectExpr; // ctor
-    friend class AtomicExpr; // ctor
+    friend class ObjCMessageExpr; // ctor
+    friend class OffsetOfExpr; // ctor
     friend class OpaqueValueExpr; // ctor
+    friend class OverloadExpr; // ctor
+    friend class ParenListExpr; // ctor
+    friend class PseudoObjectExpr; // ctor
+    friend class ShuffleVectorExpr; // ctor
+
     unsigned : NumStmtBits;
 
     unsigned ValueKind : 2;
@@ -137,6 +150,7 @@
 
   class CharacterLiteralBitfields {
     friend class CharacterLiteral;
+
     unsigned : NumExprBits;
 
     unsigned Kind : 3;
@@ -153,6 +167,7 @@
 
   class FloatingLiteralBitfields {
     friend class FloatingLiteral;
+
     unsigned : NumExprBits;
 
     unsigned Semantics : 3; // Provides semantics for APFloat construction
@@ -161,6 +176,7 @@
 
   class UnaryExprOrTypeTraitExprBitfields {
     friend class UnaryExprOrTypeTraitExpr;
+
     unsigned : NumExprBits;
 
     unsigned Kind : 2;
@@ -168,8 +184,9 @@
   };
 
   class DeclRefExprBitfields {
-    friend class DeclRefExpr;
     friend class ASTStmtReader; // deserialization
+    friend class DeclRefExpr;
+
     unsigned : NumExprBits;
 
     unsigned HasQualifier : 1;
@@ -181,6 +198,7 @@
 
   class CastExprBitfields {
     friend class CastExpr;
+
     unsigned : NumExprBits;
 
     unsigned Kind : 6;
@@ -189,14 +207,15 @@
 
   class CallExprBitfields {
     friend class CallExpr;
+
     unsigned : NumExprBits;
 
     unsigned NumPreArgs : 1;
   };
 
   class ExprWithCleanupsBitfields {
-    friend class ExprWithCleanups;
     friend class ASTStmtReader; // deserialization
+    friend class ExprWithCleanups;
 
     unsigned : NumExprBits;
 
@@ -207,8 +226,8 @@
   };
 
   class PseudoObjectExprBitfields {
-    friend class PseudoObjectExpr;
     friend class ASTStmtReader; // deserialization
+    friend class PseudoObjectExpr;
 
     unsigned : NumExprBits;
 
@@ -220,6 +239,7 @@
 
   class ObjCIndirectCopyRestoreExprBitfields {
     friend class ObjCIndirectCopyRestoreExpr;
+
     unsigned : NumExprBits;
 
     unsigned ShouldCopy : 1;
@@ -236,9 +256,9 @@
   };
 
   class TypeTraitExprBitfields {
-    friend class TypeTraitExpr;
     friend class ASTStmtReader;
     friend class ASTStmtWriter;
+    friend class TypeTraitExpr;
     
     unsigned : NumExprBits;
     
@@ -280,9 +300,6 @@
     CoawaitExprBitfields CoawaitBits;
   };
 
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
-
 public:
   // Only allow allocation of Stmts using the allocator in ASTContext
   // or by doing a placement new.
@@ -305,7 +322,7 @@
   /// \brief A placeholder type used to construct an empty shell of a
   /// type, that will be filled in later (e.g., by some
   /// de-serialization).
-  struct EmptyShell { };
+  struct EmptyShell {};
 
 protected:
   /// Iterator for iterating over Stmt * arrays that contain only Expr *
@@ -361,6 +378,7 @@
   StmtClass getStmtClass() const {
     return static_cast<StmtClass>(StmtBits.sClass);
   }
+
   const char *getStmtClassName() const;
 
   /// SourceLocation tokens are not useful in isolation - they are low level
@@ -406,6 +424,9 @@
   /// \brief Skip no-op (attributed, compound) container stmts and skip captured
   /// stmt at the top, if \a IgnoreCaptured is true.
   Stmt *IgnoreContainers(bool IgnoreCaptured = false);
+  const Stmt *IgnoreContainers(bool IgnoreCaptured = false) const {
+    return const_cast<Stmt *>(this)->IgnoreContainers(IgnoreCaptured);
+  }
 
   const Stmt *stripLabelLikeStatements() const;
   Stmt *stripLabelLikeStatements() {
@@ -416,11 +437,11 @@
   /// Child Iterators: All subclasses must implement 'children'
   /// to permit easy iteration over the substatements/subexpessions of an
   /// AST node.  This permits easy iteration over all nodes in the AST.
-  typedef StmtIterator       child_iterator;
-  typedef ConstStmtIterator  const_child_iterator;
+  using child_iterator = StmtIterator;
+  using const_child_iterator = ConstStmtIterator;
 
-  typedef llvm::iterator_range<child_iterator> child_range;
-  typedef llvm::iterator_range<const_child_iterator> const_child_range;
+  using child_range = llvm::iterator_range<child_iterator>;
+  using const_child_range = llvm::iterator_range<const_child_iterator>;
 
   child_range children();
   const_child_range children() const {
@@ -463,18 +484,16 @@
 /// expressions. For example, CompoundStmt mixes statements, expressions
 /// and declarations (variables, types). Another example is ForStmt, where
 /// the first statement can be an expression or a declaration.
-///
 class DeclStmt : public Stmt {
   DeclGroupRef DG;
   SourceLocation StartLoc, EndLoc;
 
 public:
-  DeclStmt(DeclGroupRef dg, SourceLocation startLoc,
-           SourceLocation endLoc) : Stmt(DeclStmtClass), DG(dg),
-                                    StartLoc(startLoc), EndLoc(endLoc) {}
+  DeclStmt(DeclGroupRef dg, SourceLocation startLoc, SourceLocation endLoc)
+      : Stmt(DeclStmtClass), DG(dg), StartLoc(startLoc), EndLoc(endLoc) {}
 
   /// \brief Build an empty declaration statement.
-  explicit DeclStmt(EmptyShell Empty) : Stmt(DeclStmtClass, Empty) { }
+  explicit DeclStmt(EmptyShell Empty) : Stmt(DeclStmtClass, Empty) {}
 
   /// isSingleDecl - This method returns true if this DeclStmt refers
   /// to a single Decl.
@@ -507,10 +526,10 @@
                        child_iterator(DG.end(), DG.end()));
   }
 
-  typedef DeclGroupRef::iterator decl_iterator;
-  typedef DeclGroupRef::const_iterator const_decl_iterator;
-  typedef llvm::iterator_range<decl_iterator> decl_range;
-  typedef llvm::iterator_range<const_decl_iterator> decl_const_range;
+  using decl_iterator = DeclGroupRef::iterator;
+  using const_decl_iterator = DeclGroupRef::const_iterator;
+  using decl_range = llvm::iterator_range<decl_iterator>;
+  using decl_const_range = llvm::iterator_range<const_decl_iterator>;
 
   decl_range decls() { return decl_range(decl_begin(), decl_end()); }
   decl_const_range decls() const {
@@ -521,10 +540,12 @@
   const_decl_iterator decl_begin() const { return DG.begin(); }
   const_decl_iterator decl_end() const { return DG.end(); }
 
-  typedef std::reverse_iterator<decl_iterator> reverse_decl_iterator;
+  using reverse_decl_iterator = std::reverse_iterator<decl_iterator>;
+
   reverse_decl_iterator decl_rbegin() {
     return reverse_decl_iterator(decl_end());
   }
+
   reverse_decl_iterator decl_rend() {
     return reverse_decl_iterator(decl_begin());
   }
@@ -540,15 +561,18 @@
   ///   #define CALL(x)
   ///   CALL(0);
   /// @endcode
-  bool HasLeadingEmptyMacro;
+  bool HasLeadingEmptyMacro = false;
+
 public:
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
   NullStmt(SourceLocation L, bool hasLeadingEmptyMacro = false)
-    : Stmt(NullStmtClass), SemiLoc(L),
-      HasLeadingEmptyMacro(hasLeadingEmptyMacro) {}
+      : Stmt(NullStmtClass), SemiLoc(L),
+        HasLeadingEmptyMacro(hasLeadingEmptyMacro) {}
 
   /// \brief Build an empty null statement.
-  explicit NullStmt(EmptyShell Empty) : Stmt(NullStmtClass, Empty),
-      HasLeadingEmptyMacro(false) { }
+  explicit NullStmt(EmptyShell Empty) : Stmt(NullStmtClass, Empty) {}
 
   SourceLocation getSemiLoc() const { return SemiLoc; }
   void setSemiLoc(SourceLocation L) { SemiLoc = L; }
@@ -565,79 +589,85 @@
   child_range children() {
     return child_range(child_iterator(), child_iterator());
   }
-
-  friend class ASTStmtReader;
-  friend class ASTStmtWriter;
 };
 
 /// CompoundStmt - This represents a group of statements like { stmt stmt }.
-///
-class CompoundStmt : public Stmt {
-  Stmt** Body;
+class CompoundStmt final : public Stmt,
+                           private llvm::TrailingObjects<CompoundStmt, Stmt *> {
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   SourceLocation LBraceLoc, RBraceLoc;
 
-  friend class ASTStmtReader;
+  CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB, SourceLocation RB);
+  explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {}
+
+  void setStmts(ArrayRef<Stmt *> Stmts);
 
 public:
-  CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts,
-               SourceLocation LB, SourceLocation RB);
+  static CompoundStmt *Create(const ASTContext &C, ArrayRef<Stmt *> Stmts,
+                              SourceLocation LB, SourceLocation RB);
 
   // \brief Build an empty compound statement with a location.
   explicit CompoundStmt(SourceLocation Loc)
-    : Stmt(CompoundStmtClass), Body(nullptr), LBraceLoc(Loc), RBraceLoc(Loc) {
+      : Stmt(CompoundStmtClass), LBraceLoc(Loc), RBraceLoc(Loc) {
     CompoundStmtBits.NumStmts = 0;
   }
 
   // \brief Build an empty compound statement.
-  explicit CompoundStmt(EmptyShell Empty)
-    : Stmt(CompoundStmtClass, Empty), Body(nullptr) {
-    CompoundStmtBits.NumStmts = 0;
-  }
-
-  void setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts);
+  static CompoundStmt *CreateEmpty(const ASTContext &C, unsigned NumStmts);
 
   bool body_empty() const { return CompoundStmtBits.NumStmts == 0; }
   unsigned size() const { return CompoundStmtBits.NumStmts; }
 
-  typedef Stmt** body_iterator;
-  typedef llvm::iterator_range<body_iterator> body_range;
+  using body_iterator = Stmt **;
+  using body_range = llvm::iterator_range<body_iterator>;
 
   body_range body() { return body_range(body_begin(), body_end()); }
-  body_iterator body_begin() { return Body; }
-  body_iterator body_end() { return Body + size(); }
-  Stmt *body_front() { return !body_empty() ? Body[0] : nullptr; }
-  Stmt *body_back() { return !body_empty() ? Body[size()-1] : nullptr; }
+  body_iterator body_begin() { return getTrailingObjects<Stmt *>(); }
+  body_iterator body_end() { return body_begin() + size(); }
+  Stmt *body_front() { return !body_empty() ? body_begin()[0] : nullptr; }
+  Stmt *body_back() {
+    return !body_empty() ? body_begin()[size() - 1] : nullptr;
+  }
 
   void setLastStmt(Stmt *S) {
     assert(!body_empty() && "setLastStmt");
-    Body[size()-1] = S;
+    body_begin()[size() - 1] = S;
   }
 
-  typedef Stmt* const * const_body_iterator;
-  typedef llvm::iterator_range<const_body_iterator> body_const_range;
+  using const_body_iterator = Stmt* const *;
+  using body_const_range = llvm::iterator_range<const_body_iterator>;
 
   body_const_range body() const {
     return body_const_range(body_begin(), body_end());
   }
-  const_body_iterator body_begin() const { return Body; }
-  const_body_iterator body_end() const { return Body + size(); }
-  const Stmt *body_front() const {
-    return !body_empty() ? Body[0] : nullptr;
+
+  const_body_iterator body_begin() const {
+    return getTrailingObjects<Stmt *>();
   }
-  const Stmt *body_back() const {
-    return !body_empty() ? Body[size() - 1] : nullptr;
+  const_body_iterator body_end() const { return body_begin() + size(); }
+
+  const Stmt *body_front() const {
+    return !body_empty() ? body_begin()[0] : nullptr;
   }
 
-  typedef std::reverse_iterator<body_iterator> reverse_body_iterator;
+  const Stmt *body_back() const {
+    return !body_empty() ? body_begin()[size() - 1] : nullptr;
+  }
+
+  using reverse_body_iterator = std::reverse_iterator<body_iterator>;
+
   reverse_body_iterator body_rbegin() {
     return reverse_body_iterator(body_end());
   }
+
   reverse_body_iterator body_rend() {
     return reverse_body_iterator(body_begin());
   }
 
-  typedef std::reverse_iterator<const_body_iterator>
-          const_reverse_body_iterator;
+  using const_reverse_body_iterator =
+      std::reverse_iterator<const_body_iterator>;
 
   const_reverse_body_iterator body_rbegin() const {
     return const_reverse_body_iterator(body_end());
@@ -658,13 +688,10 @@
   }
 
   // Iterators
-  child_range children() {
-    return child_range(Body, Body + CompoundStmtBits.NumStmts);
-  }
+  child_range children() { return child_range(body_begin(), body_end()); }
 
   const_child_range children() const {
-    return const_child_range(child_iterator(Body),
-                             child_iterator(Body + CompoundStmtBits.NumStmts));
+    return const_child_range(body_begin(), body_end());
   }
 };
 
@@ -673,16 +700,14 @@
 protected:
   // A pointer to the following CaseStmt or DefaultStmt class,
   // used by SwitchStmt.
-  SwitchCase *NextSwitchCase;
+  SwitchCase *NextSwitchCase = nullptr;
   SourceLocation KeywordLoc;
   SourceLocation ColonLoc;
 
   SwitchCase(StmtClass SC, SourceLocation KWLoc, SourceLocation ColonLoc)
-    : Stmt(SC), NextSwitchCase(nullptr), KeywordLoc(KWLoc), ColonLoc(ColonLoc) {
-  }
+      : Stmt(SC), KeywordLoc(KWLoc), ColonLoc(ColonLoc) {}
 
-  SwitchCase(StmtClass SC, EmptyShell)
-    : Stmt(SC), NextSwitchCase(nullptr) {}
+  SwitchCase(StmtClass SC, EmptyShell) : Stmt(SC) {}
 
 public:
   const SwitchCase *getNextSwitchCase() const { return NextSwitchCase; }
@@ -715,6 +740,7 @@
   enum { LHS, RHS, SUBSTMT, END_EXPR };
   Stmt* SubExprs[END_EXPR];  // The expression for the RHS is Non-null for
                              // GNU "case 1 ... 4" extension
+
 public:
   CaseStmt(Expr *lhs, Expr *rhs, SourceLocation caseLoc,
            SourceLocation ellipsisLoc, SourceLocation colonLoc)
@@ -726,7 +752,7 @@
   }
 
   /// \brief Build an empty switch case statement.
-  explicit CaseStmt(EmptyShell Empty) : SwitchCase(CaseStmtClass, Empty) { }
+  explicit CaseStmt(EmptyShell Empty) : SwitchCase(CaseStmtClass, Empty) {}
 
   SourceLocation getCaseLoc() const { return KeywordLoc; }
   void setCaseLoc(SourceLocation L) { KeywordLoc = L; }
@@ -742,9 +768,11 @@
   const Expr *getLHS() const {
     return reinterpret_cast<const Expr*>(SubExprs[LHS]);
   }
+
   const Expr *getRHS() const {
     return reinterpret_cast<const Expr*>(SubExprs[RHS]);
   }
+
   const Stmt *getSubStmt() const { return SubExprs[SUBSTMT]; }
 
   void setSubStmt(Stmt *S) { SubExprs[SUBSTMT] = S; }
@@ -752,6 +780,7 @@
   void setRHS(Expr *Val) { SubExprs[RHS] = reinterpret_cast<Stmt*>(Val); }
 
   SourceLocation getLocStart() const LLVM_READONLY { return KeywordLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     // Handle deeply nested case statements with iteration instead of recursion.
     const CaseStmt *CS = this;
@@ -773,13 +802,14 @@
 
 class DefaultStmt : public SwitchCase {
   Stmt* SubStmt;
+
 public:
   DefaultStmt(SourceLocation DL, SourceLocation CL, Stmt *substmt) :
     SwitchCase(DefaultStmtClass, DL, CL), SubStmt(substmt) {}
 
   /// \brief Build an empty default statement.
   explicit DefaultStmt(EmptyShell Empty)
-    : SwitchCase(DefaultStmtClass, Empty) { }
+      : SwitchCase(DefaultStmtClass, Empty) {}
 
   Stmt *getSubStmt() { return SubStmt; }
   const Stmt *getSubStmt() const { return SubStmt; }
@@ -809,7 +839,6 @@
 
 /// LabelStmt - Represents a label, which has a substatement.  For example:
 ///    foo: return;
-///
 class LabelStmt : public Stmt {
   SourceLocation IdentLoc;
   LabelDecl *TheDecl;
@@ -824,7 +853,7 @@
   }
 
   // \brief Build an empty label statement.
-  explicit LabelStmt(EmptyShell Empty) : Stmt(LabelStmtClass, Empty) { }
+  explicit LabelStmt(EmptyShell Empty) : Stmt(LabelStmtClass, Empty) {}
 
   SourceLocation getIdentLoc() const { return IdentLoc; }
   LabelDecl *getDecl() const { return TheDecl; }
@@ -845,19 +874,20 @@
   }
 };
 
-
 /// \brief Represents an attribute applied to a statement.
 ///
 /// Represents an attribute applied to a statement. For example:
 ///   [[omp::for(...)]] for (...) { ... }
-///
-class AttributedStmt : public Stmt {
+class AttributedStmt final
+    : public Stmt,
+      private llvm::TrailingObjects<AttributedStmt, const Attr *> {
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
   Stmt *SubStmt;
   SourceLocation AttrLoc;
   unsigned NumAttrs;
 
-  friend class ASTStmtReader;
-
   AttributedStmt(SourceLocation Loc, ArrayRef<const Attr*> Attrs, Stmt *SubStmt)
     : Stmt(AttributedStmtClass), SubStmt(SubStmt), AttrLoc(Loc),
       NumAttrs(Attrs.size()) {
@@ -870,15 +900,14 @@
   }
 
   const Attr *const *getAttrArrayPtr() const {
-    return reinterpret_cast<const Attr *const *>(this + 1);
+    return getTrailingObjects<const Attr *>();
   }
-  const Attr **getAttrArrayPtr() {
-    return reinterpret_cast<const Attr **>(this + 1);
-  }
+  const Attr **getAttrArrayPtr() { return getTrailingObjects<const Attr *>(); }
 
 public:
   static AttributedStmt *Create(const ASTContext &C, SourceLocation Loc,
                                 ArrayRef<const Attr*> Attrs, Stmt *SubStmt);
+
   // \brief Build an empty attributed statement.
   static AttributedStmt *CreateEmpty(const ASTContext &C, unsigned NumAttrs);
 
@@ -886,6 +915,7 @@
   ArrayRef<const Attr*> getAttrs() const {
     return llvm::makeArrayRef(getAttrArrayPtr(), NumAttrs);
   }
+
   Stmt *getSubStmt() { return SubStmt; }
   const Stmt *getSubStmt() const { return SubStmt; }
 
@@ -899,9 +929,7 @@
   }
 };
 
-
 /// IfStmt - This represents an if/then/else.
-///
 class IfStmt : public Stmt {
   enum { INIT, VAR, COND, THEN, ELSE, END_EXPR };
   Stmt* SubExprs[END_EXPR];
@@ -916,7 +944,7 @@
          Stmt *elsev = nullptr);
 
   /// \brief Build an empty if/then/else statement
-  explicit IfStmt(EmptyShell Empty) : Stmt(IfStmtClass, Empty) { }
+  explicit IfStmt(EmptyShell Empty) : Stmt(IfStmtClass, Empty) {}
 
   /// \brief Retrieve the variable declared in this "if" statement, if any.
   ///
@@ -960,6 +988,7 @@
   bool isObjCAvailabilityCheck() const;
 
   SourceLocation getLocStart() const LLVM_READONLY { return IfLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (SubExprs[ELSE])
       return SubExprs[ELSE]->getLocEnd();
@@ -979,11 +1008,11 @@
 };
 
 /// SwitchStmt - This represents a 'switch' stmt.
-///
 class SwitchStmt : public Stmt {
   SourceLocation SwitchLoc;
   enum { INIT, VAR, COND, BODY, END_EXPR };
   Stmt* SubExprs[END_EXPR];
+
   // This points to a linked list of case and default statements and, if the
   // SwitchStmt is a switch on an enum value, records whether all the enum
   // values were covered by CaseStmts.  The coverage information value is meant
@@ -994,7 +1023,7 @@
   SwitchStmt(const ASTContext &C, Stmt *Init, VarDecl *Var, Expr *cond);
 
   /// \brief Build a empty switch statement.
-  explicit SwitchStmt(EmptyShell Empty) : Stmt(SwitchStmtClass, Empty) { }
+  explicit SwitchStmt(EmptyShell Empty) : Stmt(SwitchStmtClass, Empty) {}
 
   /// \brief Retrieve the variable declared in this "switch" statement, if any.
   ///
@@ -1037,6 +1066,7 @@
     SubExprs[BODY] = S;
     SwitchLoc = SL;
   }
+
   void addSwitchCase(SwitchCase *SC) {
     assert(!SC->getNextSwitchCase()
            && "case/default already added to a switch");
@@ -1053,6 +1083,7 @@
   bool isAllEnumCasesCovered() const { return FirstCase.getInt(); }
 
   SourceLocation getLocStart() const LLVM_READONLY { return SwitchLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return SubExprs[BODY] ? SubExprs[BODY]->getLocEnd() : SubExprs[COND]->getLocEnd();
   }
@@ -1067,19 +1098,18 @@
   }
 };
 
-
 /// WhileStmt - This represents a 'while' stmt.
-///
 class WhileStmt : public Stmt {
   SourceLocation WhileLoc;
   enum { VAR, COND, BODY, END_EXPR };
   Stmt* SubExprs[END_EXPR];
+
 public:
   WhileStmt(const ASTContext &C, VarDecl *Var, Expr *cond, Stmt *body,
             SourceLocation WL);
 
   /// \brief Build an empty while statement.
-  explicit WhileStmt(EmptyShell Empty) : Stmt(WhileStmtClass, Empty) { }
+  explicit WhileStmt(EmptyShell Empty) : Stmt(WhileStmtClass, Empty) {}
 
   /// \brief Retrieve the variable declared in this "while" statement, if any.
   ///
@@ -1109,6 +1139,7 @@
   void setWhileLoc(SourceLocation L) { WhileLoc = L; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return WhileLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return SubExprs[BODY]->getLocEnd();
   }
@@ -1124,7 +1155,6 @@
 };
 
 /// DoStmt - This represents a 'do/while' stmt.
-///
 class DoStmt : public Stmt {
   SourceLocation DoLoc;
   enum { BODY, COND, END_EXPR };
@@ -1141,7 +1171,7 @@
   }
 
   /// \brief Build an empty do-while statement.
-  explicit DoStmt(EmptyShell Empty) : Stmt(DoStmtClass, Empty) { }
+  explicit DoStmt(EmptyShell Empty) : Stmt(DoStmtClass, Empty) {}
 
   Expr *getCond() { return reinterpret_cast<Expr*>(SubExprs[COND]); }
   const Expr *getCond() const { return reinterpret_cast<Expr*>(SubExprs[COND]);}
@@ -1171,11 +1201,9 @@
   }
 };
 
-
 /// ForStmt - This represents a 'for (init;cond;inc)' stmt.  Note that any of
 /// the init/cond/inc parts of the ForStmt will be null if they were not
 /// specified in the source.
-///
 class ForStmt : public Stmt {
   SourceLocation ForLoc;
   enum { INIT, CONDVAR, COND, INC, BODY, END_EXPR };
@@ -1188,7 +1216,7 @@
           SourceLocation RP);
 
   /// \brief Build an empty for statement.
-  explicit ForStmt(EmptyShell Empty) : Stmt(ForStmtClass, Empty) { }
+  explicit ForStmt(EmptyShell Empty) : Stmt(ForStmtClass, Empty) {}
 
   Stmt *getInit() { return SubExprs[INIT]; }
 
@@ -1231,6 +1259,7 @@
   void setRParenLoc(SourceLocation L) { RParenLoc = L; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return ForLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return SubExprs[BODY]->getLocEnd();
   }
@@ -1246,17 +1275,17 @@
 };
 
 /// GotoStmt - This represents a direct goto.
-///
 class GotoStmt : public Stmt {
   LabelDecl *Label;
   SourceLocation GotoLoc;
   SourceLocation LabelLoc;
+
 public:
   GotoStmt(LabelDecl *label, SourceLocation GL, SourceLocation LL)
-    : Stmt(GotoStmtClass), Label(label), GotoLoc(GL), LabelLoc(LL) {}
+      : Stmt(GotoStmtClass), Label(label), GotoLoc(GL), LabelLoc(LL) {}
 
   /// \brief Build an empty goto statement.
-  explicit GotoStmt(EmptyShell Empty) : Stmt(GotoStmtClass, Empty) { }
+  explicit GotoStmt(EmptyShell Empty) : Stmt(GotoStmtClass, Empty) {}
 
   LabelDecl *getLabel() const { return Label; }
   void setLabel(LabelDecl *D) { Label = D; }
@@ -1280,11 +1309,11 @@
 };
 
 /// IndirectGotoStmt - This represents an indirect goto.
-///
 class IndirectGotoStmt : public Stmt {
   SourceLocation GotoLoc;
   SourceLocation StarLoc;
   Stmt *Target;
+
 public:
   IndirectGotoStmt(SourceLocation gotoLoc, SourceLocation starLoc,
                    Expr *target)
@@ -1293,7 +1322,7 @@
 
   /// \brief Build an empty indirect goto statement.
   explicit IndirectGotoStmt(EmptyShell Empty)
-    : Stmt(IndirectGotoStmtClass, Empty) { }
+      : Stmt(IndirectGotoStmtClass, Empty) {}
 
   void setGotoLoc(SourceLocation L) { GotoLoc = L; }
   SourceLocation getGotoLoc() const { return GotoLoc; }
@@ -1322,16 +1351,15 @@
   child_range children() { return child_range(&Target, &Target+1); }
 };
 
-
 /// ContinueStmt - This represents a continue.
-///
 class ContinueStmt : public Stmt {
   SourceLocation ContinueLoc;
+
 public:
   ContinueStmt(SourceLocation CL) : Stmt(ContinueStmtClass), ContinueLoc(CL) {}
 
   /// \brief Build an empty continue statement.
-  explicit ContinueStmt(EmptyShell Empty) : Stmt(ContinueStmtClass, Empty) { }
+  explicit ContinueStmt(EmptyShell Empty) : Stmt(ContinueStmtClass, Empty) {}
 
   SourceLocation getContinueLoc() const { return ContinueLoc; }
   void setContinueLoc(SourceLocation L) { ContinueLoc = L; }
@@ -1350,7 +1378,6 @@
 };
 
 /// BreakStmt - This represents a break.
-///
 class BreakStmt : public Stmt {
   SourceLocation BreakLoc;
 
@@ -1361,7 +1388,7 @@
   }
 
   /// \brief Build an empty break statement.
-  explicit BreakStmt(EmptyShell Empty) : Stmt(BreakStmtClass, Empty) { }
+  explicit BreakStmt(EmptyShell Empty) : Stmt(BreakStmtClass, Empty) {}
 
   SourceLocation getBreakLoc() const { return BreakLoc; }
   void setBreakLoc(SourceLocation L) { BreakLoc = L; }
@@ -1379,7 +1406,6 @@
   }
 };
 
-
 /// ReturnStmt - This represents a return, optionally of an expression:
 ///   return;
 ///   return 4;
@@ -1388,7 +1414,6 @@
 /// return a value, and it allows returning a value in functions declared to
 /// return void.  We explicitly model this in the AST, which means you can't
 /// depend on the return type of the function and the presence of an argument.
-///
 class ReturnStmt : public Stmt {
   SourceLocation RetLoc;
   Stmt *RetExpr;
@@ -1402,7 +1427,7 @@
         NRVOCandidate(NRVOCandidate) {}
 
   /// \brief Build an empty return expression.
-  explicit ReturnStmt(EmptyShell Empty) : Stmt(ReturnStmtClass, Empty) { }
+  explicit ReturnStmt(EmptyShell Empty) : Stmt(ReturnStmtClass, Empty) {}
 
   const Expr *getRetValue() const;
   Expr *getRetValue();
@@ -1420,6 +1445,7 @@
   void setNRVOCandidate(const VarDecl *Var) { NRVOCandidate = Var; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return RetLoc; }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return RetExpr ? RetExpr->getLocEnd() : RetLoc;
   }
@@ -1436,10 +1462,12 @@
 };
 
 /// AsmStmt is the base class for GCCAsmStmt and MSAsmStmt.
-///
 class AsmStmt : public Stmt {
 protected:
+  friend class ASTStmtReader;
+
   SourceLocation AsmLoc;
+
   /// \brief True if the assembly statement does not have any input or output
   /// operands.
   bool IsSimple;
@@ -1452,19 +1480,17 @@
   unsigned NumInputs;
   unsigned NumClobbers;
 
-  Stmt **Exprs;
+  Stmt **Exprs = nullptr;
 
   AsmStmt(StmtClass SC, SourceLocation asmloc, bool issimple, bool isvolatile,
-          unsigned numoutputs, unsigned numinputs, unsigned numclobbers) :
-    Stmt (SC), AsmLoc(asmloc), IsSimple(issimple), IsVolatile(isvolatile),
-    NumOutputs(numoutputs), NumInputs(numinputs), NumClobbers(numclobbers) { }
-
-  friend class ASTStmtReader;
+          unsigned numoutputs, unsigned numinputs, unsigned numclobbers)
+      : Stmt (SC), AsmLoc(asmloc), IsSimple(issimple), IsVolatile(isvolatile),
+        NumOutputs(numoutputs), NumInputs(numinputs),
+        NumClobbers(numclobbers) {}
 
 public:
   /// \brief Build an empty inline-assembly statement.
-  explicit AsmStmt(StmtClass SC, EmptyShell Empty) :
-    Stmt(SC, Empty), Exprs(nullptr) { }
+  explicit AsmStmt(StmtClass SC, EmptyShell Empty) : Stmt(SC, Empty) {}
 
   SourceLocation getAsmLoc() const { return AsmLoc; }
   void setAsmLoc(SourceLocation L) { AsmLoc = L; }
@@ -1527,10 +1553,10 @@
 
   // Input expr iterators.
 
-  typedef ExprIterator inputs_iterator;
-  typedef ConstExprIterator const_inputs_iterator;
-  typedef llvm::iterator_range<inputs_iterator> inputs_range;
-  typedef llvm::iterator_range<const_inputs_iterator> inputs_const_range;
+  using inputs_iterator = ExprIterator;
+  using const_inputs_iterator = ConstExprIterator;
+  using inputs_range = llvm::iterator_range<inputs_iterator>;
+  using inputs_const_range = llvm::iterator_range<const_inputs_iterator>;
 
   inputs_iterator begin_inputs() {
     return &Exprs[0] + NumOutputs;
@@ -1556,17 +1582,19 @@
 
   // Output expr iterators.
 
-  typedef ExprIterator outputs_iterator;
-  typedef ConstExprIterator const_outputs_iterator;
-  typedef llvm::iterator_range<outputs_iterator> outputs_range;
-  typedef llvm::iterator_range<const_outputs_iterator> outputs_const_range;
+  using outputs_iterator = ExprIterator;
+  using const_outputs_iterator = ConstExprIterator;
+  using outputs_range = llvm::iterator_range<outputs_iterator>;
+  using outputs_const_range = llvm::iterator_range<const_outputs_iterator>;
 
   outputs_iterator begin_outputs() {
     return &Exprs[0];
   }
+
   outputs_iterator end_outputs() {
     return &Exprs[0] + NumOutputs;
   }
+
   outputs_range outputs() {
     return outputs_range(begin_outputs(), end_outputs());
   }
@@ -1574,9 +1602,11 @@
   const_outputs_iterator begin_outputs() const {
     return &Exprs[0];
   }
+
   const_outputs_iterator end_outputs() const {
     return &Exprs[0] + NumOutputs;
   }
+
   outputs_const_range outputs() const {
     return outputs_const_range(begin_outputs(), end_outputs());
   }
@@ -1587,17 +1617,16 @@
 };
 
 /// This represents a GCC inline-assembly statement extension.
-///
 class GCCAsmStmt : public AsmStmt {
+  friend class ASTStmtReader;
+
   SourceLocation RParenLoc;
   StringLiteral *AsmStr;
 
   // FIXME: If we wanted to, we could allocate all of these in one big array.
-  StringLiteral **Constraints;
-  StringLiteral **Clobbers;
-  IdentifierInfo **Names;
-
-  friend class ASTStmtReader;
+  StringLiteral **Constraints = nullptr;
+  StringLiteral **Clobbers = nullptr;
+  IdentifierInfo **Names = nullptr;
 
 public:
   GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple,
@@ -1607,8 +1636,7 @@
              StringLiteral **clobbers, SourceLocation rparenloc);
 
   /// \brief Build an empty inline-assembly statement.
-  explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty),
-    Constraints(nullptr), Clobbers(nullptr), Names(nullptr) { }
+  explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty) {}
 
   SourceLocation getRParenLoc() const { return RParenLoc; }
   void setRParenLoc(SourceLocation L) { RParenLoc = L; }
@@ -1628,6 +1656,7 @@
       String,  // String in .ll asm string form, "$" -> "$$" and "%%" -> "%".
       Operand  // Operand reference, with optional modifier %c4.
     };
+
   private:
     Kind MyKind;
     std::string Str;
@@ -1635,13 +1664,13 @@
 
     // Source range for operand references.
     CharSourceRange Range;
+
   public:
     AsmStringPiece(const std::string &S) : MyKind(String), Str(S) {}
     AsmStringPiece(unsigned OpNo, const std::string &S, SourceLocation Begin,
                    SourceLocation End)
-      : MyKind(Operand), Str(S), OperandNo(OpNo),
-        Range(CharSourceRange::getCharRange(Begin, End)) {
-    }
+        : MyKind(Operand), Str(S), OperandNo(OpNo),
+          Range(CharSourceRange::getCharRange(Begin, End)) {}
 
     bool isString() const { return MyKind == String; }
     bool isOperand() const { return MyKind == Operand; }
@@ -1742,8 +1771,8 @@
                                       unsigned NumInputs,
                                       StringLiteral **Clobbers,
                                       unsigned NumClobbers);
-public:
 
+public:
   //===--- Other ---===//
 
   /// getNamedOperand - Given a symbolic operand reference like %[foo],
@@ -1752,6 +1781,7 @@
   int getNamedOperand(StringRef SymbolicName) const;
 
   StringRef getClobber(unsigned i) const;
+
   StringLiteral *getClobberStringLiteral(unsigned i) { return Clobbers[i]; }
   const StringLiteral *getClobberStringLiteral(unsigned i) const {
     return Clobbers[i];
@@ -1766,18 +1796,17 @@
 };
 
 /// This represents a Microsoft inline-assembly statement extension.
-///
 class MSAsmStmt : public AsmStmt {
+  friend class ASTStmtReader;
+
   SourceLocation LBraceLoc, EndLoc;
   StringRef AsmStr;
 
-  unsigned NumAsmToks;
+  unsigned NumAsmToks = 0;
 
-  Token *AsmToks;
-  StringRef *Constraints;
-  StringRef *Clobbers;
-
-  friend class ASTStmtReader;
+  Token *AsmToks = nullptr;
+  StringRef *Constraints = nullptr;
+  StringRef *Clobbers = nullptr;
 
 public:
   MSAsmStmt(const ASTContext &C, SourceLocation asmloc,
@@ -1788,8 +1817,7 @@
             ArrayRef<StringRef> clobbers, SourceLocation endloc);
 
   /// \brief Build an empty MS-style inline-assembly statement.
-  explicit MSAsmStmt(EmptyShell Empty) : AsmStmt(MSAsmStmtClass, Empty),
-    NumAsmToks(0), AsmToks(nullptr), Constraints(nullptr), Clobbers(nullptr) { }
+  explicit MSAsmStmt(EmptyShell Empty) : AsmStmt(MSAsmStmtClass, Empty) {}
 
   SourceLocation getLBraceLoc() const { return LBraceLoc; }
   void setLBraceLoc(SourceLocation L) { LBraceLoc = L; }
@@ -1839,9 +1867,11 @@
   ArrayRef<StringRef> getAllConstraints() const {
     return llvm::makeArrayRef(Constraints, NumInputs + NumOutputs);
   }
+
   ArrayRef<StringRef> getClobbers() const {
     return llvm::makeArrayRef(Clobbers, NumClobbers);
   }
+
   ArrayRef<Expr*> getAllExprs() const {
     return llvm::makeArrayRef(reinterpret_cast<Expr**>(Exprs),
                               NumInputs + NumOutputs);
@@ -1853,8 +1883,8 @@
   void initialize(const ASTContext &C, StringRef AsmString,
                   ArrayRef<Token> AsmToks, ArrayRef<StringRef> Constraints,
                   ArrayRef<Expr*> Exprs, ArrayRef<StringRef> Clobbers);
-public:
 
+public:
   SourceLocation getLocStart() const LLVM_READONLY { return AsmLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return EndLoc; }
 
@@ -1868,18 +1898,16 @@
 };
 
 class SEHExceptStmt : public Stmt {
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
   SourceLocation  Loc;
-  Stmt           *Children[2];
+  Stmt *Children[2];
 
   enum { FILTER_EXPR, BLOCK };
 
-  SEHExceptStmt(SourceLocation Loc,
-                Expr *FilterExpr,
-                Stmt *Block);
-
-  friend class ASTReader;
-  friend class ASTStmtReader;
-  explicit SEHExceptStmt(EmptyShell E) : Stmt(SEHExceptStmtClass, E) { }
+  SEHExceptStmt(SourceLocation Loc, Expr *FilterExpr, Stmt *Block);
+  explicit SEHExceptStmt(EmptyShell E) : Stmt(SEHExceptStmtClass, E) {}
 
 public:
   static SEHExceptStmt* Create(const ASTContext &C,
@@ -1908,19 +1936,17 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == SEHExceptStmtClass;
   }
-
 };
 
 class SEHFinallyStmt : public Stmt {
-  SourceLocation  Loc;
-  Stmt           *Block;
-
-  SEHFinallyStmt(SourceLocation Loc,
-                 Stmt *Block);
-
   friend class ASTReader;
   friend class ASTStmtReader;
-  explicit SEHFinallyStmt(EmptyShell E) : Stmt(SEHFinallyStmtClass, E) { }
+
+  SourceLocation  Loc;
+  Stmt *Block;
+
+  SEHFinallyStmt(SourceLocation Loc, Stmt *Block);
+  explicit SEHFinallyStmt(EmptyShell E) : Stmt(SEHFinallyStmtClass, E) {}
 
 public:
   static SEHFinallyStmt* Create(const ASTContext &C,
@@ -1942,13 +1968,15 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == SEHFinallyStmtClass;
   }
-
 };
 
 class SEHTryStmt : public Stmt {
-  bool            IsCXXTry;
+  friend class ASTReader;
+  friend class ASTStmtReader;
+
+  bool IsCXXTry;
   SourceLocation  TryLoc;
-  Stmt           *Children[2];
+  Stmt *Children[2];
 
   enum { TRY = 0, HANDLER = 1 };
 
@@ -1957,9 +1985,7 @@
              Stmt *TryBlock,
              Stmt *Handler);
 
-  friend class ASTReader;
-  friend class ASTStmtReader;
-  explicit SEHTryStmt(EmptyShell E) : Stmt(SEHTryStmtClass, E) { }
+  explicit SEHTryStmt(EmptyShell E) : Stmt(SEHTryStmtClass, E) {}
 
 public:
   static SEHTryStmt* Create(const ASTContext &C, bool isCXXTry,
@@ -1994,15 +2020,15 @@
 };
 
 /// Represents a __leave statement.
-///
 class SEHLeaveStmt : public Stmt {
   SourceLocation LeaveLoc;
+
 public:
   explicit SEHLeaveStmt(SourceLocation LL)
       : Stmt(SEHLeaveStmtClass), LeaveLoc(LL) {}
 
   /// \brief Build an empty __leave statement.
-  explicit SEHLeaveStmt(EmptyShell Empty) : Stmt(SEHLeaveStmtClass, Empty) { }
+  explicit SEHLeaveStmt(EmptyShell Empty) : Stmt(SEHLeaveStmtClass, Empty) {}
 
   SourceLocation getLeaveLoc() const { return LeaveLoc; }
   void setLeaveLoc(SourceLocation L) { LeaveLoc = L; }
@@ -2047,6 +2073,8 @@
     SourceLocation Loc;
 
   public:
+    friend class ASTStmtReader;
+
     /// \brief Create a new capture.
     ///
     /// \param Loc The source location associated with this capture.
@@ -2054,7 +2082,6 @@
     /// \param Kind The kind of capture (this, ByRef, ...).
     ///
     /// \param Var The variable being captured, or null if capturing this.
-    ///
     Capture(SourceLocation Loc, VariableCaptureKind Kind,
             VarDecl *Var = nullptr);
 
@@ -2086,8 +2113,6 @@
     ///
     /// This operation is only valid if this capture captures a variable.
     VarDecl *getCapturedVar() const;
-
-    friend class ASTStmtReader;
   };
 
 private:
@@ -2099,7 +2124,7 @@
   llvm::PointerIntPair<CapturedDecl *, 1, CapturedRegionKind> CapDeclAndKind;
 
   /// \brief The record for captured variables, a RecordDecl or CXXRecordDecl.
-  RecordDecl *TheRecordDecl;
+  RecordDecl *TheRecordDecl = nullptr;
 
   /// \brief Construct a captured statement.
   CapturedStmt(Stmt *S, CapturedRegionKind Kind, ArrayRef<Capture> Captures,
@@ -2119,6 +2144,8 @@
   void setCapturedStmt(Stmt *S) { getStoredStmts()[NumCaptures] = S; }
 
 public:
+  friend class ASTStmtReader;
+
   static CapturedStmt *Create(const ASTContext &Context, Stmt *S,
                               CapturedRegionKind Kind,
                               ArrayRef<Capture> Captures,
@@ -2158,10 +2185,10 @@
   bool capturesVariable(const VarDecl *Var) const;
 
   /// \brief An iterator that walks over the captures.
-  typedef Capture *capture_iterator;
-  typedef const Capture *const_capture_iterator;
-  typedef llvm::iterator_range<capture_iterator> capture_range;
-  typedef llvm::iterator_range<const_capture_iterator> capture_const_range;
+  using capture_iterator = Capture *;
+  using const_capture_iterator = const Capture *;
+  using capture_range = llvm::iterator_range<capture_iterator>;
+  using capture_const_range = llvm::iterator_range<const_capture_iterator>;
 
   capture_range captures() {
     return capture_range(capture_begin(), capture_end());
@@ -2184,14 +2211,14 @@
   unsigned capture_size() const { return NumCaptures; }
 
   /// \brief Iterator that walks over the capture initialization arguments.
-  typedef Expr **capture_init_iterator;
-  typedef llvm::iterator_range<capture_init_iterator> capture_init_range;
+  using capture_init_iterator = Expr **;
+  using capture_init_range = llvm::iterator_range<capture_init_iterator>;
 
   /// \brief Const iterator that walks over the capture initialization
   /// arguments.
-  typedef Expr *const *const_capture_init_iterator;
-  typedef llvm::iterator_range<const_capture_init_iterator>
-      const_capture_init_range;
+  using const_capture_init_iterator = Expr *const *;
+  using const_capture_init_range =
+      llvm::iterator_range<const_capture_init_iterator>;
 
   capture_init_range capture_inits() {
     return capture_init_range(capture_init_begin(), capture_init_end());
@@ -2223,9 +2250,11 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return getCapturedStmt()->getLocStart();
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     return getCapturedStmt()->getLocEnd();
   }
+
   SourceRange getSourceRange() const LLVM_READONLY {
     return getCapturedStmt()->getSourceRange();
   }
@@ -2235,10 +2264,8 @@
   }
 
   child_range children();
-
-  friend class ASTStmtReader;
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_STMT_H
diff --git a/include/clang/AST/StmtGraphTraits.h b/include/clang/AST/StmtGraphTraits.h
index 92eb644..02c77b2 100644
--- a/include/clang/AST/StmtGraphTraits.h
+++ b/include/clang/AST/StmtGraphTraits.h
@@ -1,4 +1,4 @@
-//===--- StmtGraphTraits.h - Graph Traits for the class Stmt ----*- C++ -*-===//
+//===- StmtGraphTraits.h - Graph Traits for the class Stmt ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,13 +21,10 @@
 
 namespace llvm {
 
-//template <typename T> struct GraphTraits;
-
-
-template <> struct GraphTraits<clang::Stmt*> {
-  typedef clang::Stmt *                     NodeRef;
-  typedef clang::Stmt::child_iterator       ChildIteratorType;
-  typedef llvm::df_iterator<clang::Stmt*>   nodes_iterator;
+template <> struct GraphTraits<clang::Stmt *> {
+  using NodeRef = clang::Stmt *;
+  using ChildIteratorType = clang::Stmt::child_iterator;
+  using nodes_iterator = llvm::df_iterator<clang::Stmt *>;
 
   static NodeRef getEntryNode(clang::Stmt *S) { return S; }
 
@@ -50,11 +47,10 @@
   }
 };
 
-
-template <> struct GraphTraits<const clang::Stmt*> {
-  typedef const clang::Stmt *                     NodeRef;
-  typedef clang::Stmt::const_child_iterator       ChildIteratorType;
-  typedef llvm::df_iterator<const clang::Stmt*>   nodes_iterator;
+template <> struct GraphTraits<const clang::Stmt *> {
+  using NodeRef = const clang::Stmt *;
+  using ChildIteratorType = clang::Stmt::const_child_iterator;
+  using nodes_iterator = llvm::df_iterator<const clang::Stmt *>;
 
   static NodeRef getEntryNode(const clang::Stmt *S) { return S; }
 
@@ -77,7 +73,6 @@
   }
 };
 
+} // namespace llvm
 
-} // end namespace llvm
-
-#endif
+#endif // LLVM_CLANG_AST_STMTGRAPHTRAITS_H
diff --git a/include/clang/AST/StmtIterator.h b/include/clang/AST/StmtIterator.h
index 5d3bce8..33aab08 100644
--- a/include/clang/AST/StmtIterator.h
+++ b/include/clang/AST/StmtIterator.h
@@ -1,4 +1,4 @@
-//===--- StmtIterator.h - Iterators for Statements --------------*- C++ -*-===//
+//===- StmtIterator.h - Iterators for Statements ----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,31 +14,38 @@
 #ifndef LLVM_CLANG_AST_STMTITERATOR_H
 #define LLVM_CLANG_AST_STMTITERATOR_H
 
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <iterator>
-#include <utility>
 
 namespace clang {
 
-class Stmt;
 class Decl;
+class Stmt;
 class VariableArrayType;
 
 class StmtIteratorBase {
 protected:
-  enum { StmtMode = 0x0, SizeOfTypeVAMode = 0x1, DeclGroupMode = 0x2,
-         Flags = 0x3 };
+  enum {
+    StmtMode = 0x0,
+    SizeOfTypeVAMode = 0x1,
+    DeclGroupMode = 0x2,
+    Flags = 0x3
+  };
   
   union {
     Stmt **stmt;
     Decl **DGI;
   };
-  uintptr_t RawVAPtr;
+  uintptr_t RawVAPtr = 0;
   Decl **DGE;
   
+  StmtIteratorBase(Stmt **s) : stmt(s) {}
+  StmtIteratorBase(const VariableArrayType *t);
+  StmtIteratorBase(Decl **dgi, Decl **dge);
+  StmtIteratorBase() : stmt(nullptr) {}
+
   bool inDeclGroup() const {
     return (RawVAPtr & Flags) == DeclGroupMode;
   }
@@ -56,7 +63,7 @@
   }
 
   void setVAPtr(const VariableArrayType *P) {
-    assert (inDeclGroup() || inSizeOfTypeVA());
+    assert(inDeclGroup() || inSizeOfTypeVA());
     RawVAPtr = reinterpret_cast<uintptr_t>(P) | (RawVAPtr & Flags);
   }
 
@@ -65,14 +72,8 @@
   void NextVA();
 
   Stmt*& GetDeclExpr() const;
-
-  StmtIteratorBase(Stmt **s) : stmt(s), RawVAPtr(0) {}
-  StmtIteratorBase(const VariableArrayType *t);
-  StmtIteratorBase(Decl **dgi, Decl **dge);
-  StmtIteratorBase() : stmt(nullptr), RawVAPtr(0) {}
 };
 
-
 template <typename DERIVED, typename REFERENCE>
 class StmtIteratorImpl : public StmtIteratorBase,
                          public std::iterator<std::forward_iterator_tag,
@@ -80,8 +81,9 @@
                                               REFERENCE, REFERENCE> {
 protected:
   StmtIteratorImpl(const StmtIteratorBase& RHS) : StmtIteratorBase(RHS) {}
+
 public:
-  StmtIteratorImpl() {}
+  StmtIteratorImpl() = default;
   StmtIteratorImpl(Stmt **s) : StmtIteratorBase(s) {}
   StmtIteratorImpl(Decl **dgi, Decl **dge) : StmtIteratorBase(dgi, dge) {}
   StmtIteratorImpl(const VariableArrayType *t) : StmtIteratorBase(t) {}
@@ -120,16 +122,13 @@
 
 struct ConstStmtIterator;
 
-struct StmtIterator : public StmtIteratorImpl<StmtIterator,Stmt*&> {
-  explicit StmtIterator() : StmtIteratorImpl<StmtIterator,Stmt*&>() {}
-
-  StmtIterator(Stmt** S) : StmtIteratorImpl<StmtIterator,Stmt*&>(S) {}
-
+struct StmtIterator : public StmtIteratorImpl<StmtIterator, Stmt*&> {
+  explicit StmtIterator() = default;
+  StmtIterator(Stmt** S) : StmtIteratorImpl<StmtIterator, Stmt*&>(S) {}
   StmtIterator(Decl** dgi, Decl** dge)
-   : StmtIteratorImpl<StmtIterator,Stmt*&>(dgi, dge) {}
-
+      : StmtIteratorImpl<StmtIterator, Stmt*&>(dgi, dge) {}
   StmtIterator(const VariableArrayType *t)
-    : StmtIteratorImpl<StmtIterator,Stmt*&>(t) {}
+      : StmtIteratorImpl<StmtIterator, Stmt*&>(t) {}
 
 private:
   StmtIterator(const StmtIteratorBase &RHS)
@@ -141,11 +140,9 @@
 
 struct ConstStmtIterator : public StmtIteratorImpl<ConstStmtIterator,
                                                    const Stmt*> {
-  explicit ConstStmtIterator() :
-    StmtIteratorImpl<ConstStmtIterator,const Stmt*>() {}
-
-  ConstStmtIterator(const StmtIterator& RHS) :
-    StmtIteratorImpl<ConstStmtIterator,const Stmt*>(RHS) {}
+  explicit ConstStmtIterator() = default;
+  ConstStmtIterator(const StmtIterator& RHS)
+      : StmtIteratorImpl<ConstStmtIterator, const Stmt*>(RHS) {}
 
   ConstStmtIterator(Stmt * const *S)
       : StmtIteratorImpl<ConstStmtIterator, const Stmt *>(
@@ -155,6 +152,7 @@
 inline StmtIterator cast_away_const(const ConstStmtIterator &RHS) {
   return RHS;
 }
-} // end namespace clang
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_AST_STMTITERATOR_H
diff --git a/include/clang/AST/StmtOpenMP.h b/include/clang/AST/StmtOpenMP.h
index 5ad118c..8805209 100644
--- a/include/clang/AST/StmtOpenMP.h
+++ b/include/clang/AST/StmtOpenMP.h
@@ -193,16 +193,20 @@
   bool hasAssociatedStmt() const { return NumChildren > 0; }
 
   /// \brief Returns statement associated with the directive.
-  Stmt *getAssociatedStmt() const {
+  const Stmt *getAssociatedStmt() const {
     assert(hasAssociatedStmt() && "no associated statement.");
-    return const_cast<Stmt *>(*child_begin());
+    return *child_begin();
+  }
+  Stmt *getAssociatedStmt() {
+    assert(hasAssociatedStmt() && "no associated statement.");
+    return *child_begin();
   }
 
   /// \brief Returns the captured statement associated with the
   /// component region within the (combined) directive.
   //
   // \param RegionKind Component region kind.
-  CapturedStmt *getCapturedStmt(OpenMPDirectiveKind RegionKind) const {
+  const CapturedStmt *getCapturedStmt(OpenMPDirectiveKind RegionKind) const {
     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
     getOpenMPCaptureRegions(CaptureRegions, getDirectiveKind());
     assert(std::any_of(
@@ -218,6 +222,25 @@
     llvm_unreachable("Incorrect RegionKind specified for directive.");
   }
 
+  /// Get innermost captured statement for the construct.
+  CapturedStmt *getInnermostCapturedStmt() {
+    assert(hasAssociatedStmt() && getAssociatedStmt() &&
+           "Must have associated statement.");
+    SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+    getOpenMPCaptureRegions(CaptureRegions, getDirectiveKind());
+    assert(!CaptureRegions.empty() &&
+           "At least one captured statement must be provided.");
+    auto *CS = cast<CapturedStmt>(getAssociatedStmt());
+    for (unsigned Level = CaptureRegions.size(); Level > 1; --Level)
+      CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    return CS;
+  }
+
+  const CapturedStmt *getInnermostCapturedStmt() const {
+    return const_cast<OMPExecutableDirective *>(this)
+        ->getInnermostCapturedStmt();
+  }
+
   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
 
   static bool classof(const Stmt *S) {
@@ -899,7 +922,8 @@
   }
   const Stmt *getBody() const {
     // This relies on the loop form is already checked by Sema.
-    Stmt *Body = getAssociatedStmt()->IgnoreContainers(true);
+    const Stmt *Body =
+        getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
     Body = cast<ForStmt>(Body)->getBody();
     for (unsigned Cnt = 1; Cnt < CollapsedNum; ++Cnt) {
       Body = Body->IgnoreContainers();
@@ -955,8 +979,15 @@
            T->getStmtClass() == OMPTargetSimdDirectiveClass ||
            T->getStmtClass() == OMPTeamsDistributeDirectiveClass ||
            T->getStmtClass() == OMPTeamsDistributeSimdDirectiveClass ||
-           T->getStmtClass() == OMPTeamsDistributeParallelForSimdDirectiveClass ||
-           T->getStmtClass() == OMPTeamsDistributeParallelForDirectiveClass;
+           T->getStmtClass() ==
+               OMPTeamsDistributeParallelForSimdDirectiveClass ||
+           T->getStmtClass() == OMPTeamsDistributeParallelForDirectiveClass ||
+           T->getStmtClass() ==
+               OMPTargetTeamsDistributeParallelForDirectiveClass ||
+           T->getStmtClass() ==
+               OMPTargetTeamsDistributeParallelForSimdDirectiveClass ||
+           T->getStmtClass() == OMPTargetTeamsDistributeDirectiveClass ||
+           T->getStmtClass() == OMPTargetTeamsDistributeSimdDirectiveClass;
   }
 };
 
@@ -2346,7 +2377,7 @@
                               unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTargetEnterDataDirectiveClass,
                                OMPD_target_enter_data, StartLoc, EndLoc,
-                               NumClauses, /*NumChildren=*/0) {}
+                               NumClauses, /*NumChildren=*/1) {}
 
   /// \brief Build an empty directive.
   ///
@@ -2356,7 +2387,7 @@
       : OMPExecutableDirective(this, OMPTargetEnterDataDirectiveClass,
                                OMPD_target_enter_data, SourceLocation(),
                                SourceLocation(), NumClauses,
-                               /*NumChildren=*/0) {}
+                               /*NumChildren=*/1) {}
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -2365,11 +2396,11 @@
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
   ///
-  static OMPTargetEnterDataDirective *Create(const ASTContext &C,
-                                             SourceLocation StartLoc,
-                                             SourceLocation EndLoc,
-                                             ArrayRef<OMPClause *> Clauses);
+  static OMPTargetEnterDataDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
 
   /// \brief Creates an empty directive with the place for \a N clauses.
   ///
@@ -2405,7 +2436,7 @@
                              unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTargetExitDataDirectiveClass,
                                OMPD_target_exit_data, StartLoc, EndLoc,
-                               NumClauses, /*NumChildren=*/0) {}
+                               NumClauses, /*NumChildren=*/1) {}
 
   /// \brief Build an empty directive.
   ///
@@ -2415,7 +2446,7 @@
       : OMPExecutableDirective(this, OMPTargetExitDataDirectiveClass,
                                OMPD_target_exit_data, SourceLocation(),
                                SourceLocation(), NumClauses,
-                               /*NumChildren=*/0) {}
+                               /*NumChildren=*/1) {}
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -2424,11 +2455,11 @@
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
   ///
-  static OMPTargetExitDataDirective *Create(const ASTContext &C,
-                                            SourceLocation StartLoc,
-                                            SourceLocation EndLoc,
-                                            ArrayRef<OMPClause *> Clauses);
+  static OMPTargetExitDataDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
 
   /// \brief Creates an empty directive with the place for \a N clauses.
   ///
@@ -2982,7 +3013,7 @@
                            unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTargetUpdateDirectiveClass,
                                OMPD_target_update, StartLoc, EndLoc, NumClauses,
-                               0) {}
+                               1) {}
 
   /// \brief Build an empty directive.
   ///
@@ -2991,7 +3022,7 @@
   explicit OMPTargetUpdateDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTargetUpdateDirectiveClass,
                                OMPD_target_update, SourceLocation(),
-                               SourceLocation(), NumClauses, 0) {}
+                               SourceLocation(), NumClauses, 1) {}
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -3000,11 +3031,11 @@
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
   ///
-  static OMPTargetUpdateDirective *Create(const ASTContext &C,
-                                          SourceLocation StartLoc,
-                                          SourceLocation EndLoc,
-                                          ArrayRef<OMPClause *> Clauses);
+  static OMPTargetUpdateDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
 
   /// \brief Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -3031,6 +3062,8 @@
 ///
 class OMPDistributeParallelForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// true if the construct has inner cancel directive.
+  bool HasCancel = false;
 
   /// \brief Build directive with the given start and end location.
   ///
@@ -3044,7 +3077,7 @@
                                     unsigned CollapsedNum, unsigned NumClauses)
       : OMPLoopDirective(this, OMPDistributeParallelForDirectiveClass,
                          OMPD_distribute_parallel_for, StartLoc, EndLoc,
-                         CollapsedNum, NumClauses) {}
+                         CollapsedNum, NumClauses), HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -3055,7 +3088,11 @@
                                              unsigned NumClauses)
       : OMPLoopDirective(this, OMPDistributeParallelForDirectiveClass,
                          OMPD_distribute_parallel_for, SourceLocation(),
-                         SourceLocation(), CollapsedNum, NumClauses) {}
+                         SourceLocation(), CollapsedNum, NumClauses),
+        HasCancel(false) {}
+
+  /// Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -3067,11 +3104,12 @@
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
 
   /// \brief Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -3085,6 +3123,9 @@
                                                         unsigned CollapsedNum,
                                                         EmptyShell);
 
+  /// Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPDistributeParallelForDirectiveClass;
   }
@@ -3581,6 +3622,8 @@
 ///
 class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// true if the construct has inner cancel directive.
+  bool HasCancel = false;
 
   /// Build directive with the given start and end location.
   ///
@@ -3595,7 +3638,7 @@
                                          unsigned NumClauses)
       : OMPLoopDirective(this, OMPTeamsDistributeParallelForDirectiveClass,
                          OMPD_teams_distribute_parallel_for, StartLoc, EndLoc,
-                         CollapsedNum, NumClauses) {}
+                         CollapsedNum, NumClauses), HasCancel(false) {}
 
   /// Build an empty directive.
   ///
@@ -3606,7 +3649,11 @@
                                                   unsigned NumClauses)
       : OMPLoopDirective(this, OMPTeamsDistributeParallelForDirectiveClass,
                          OMPD_teams_distribute_parallel_for, SourceLocation(),
-                         SourceLocation(), CollapsedNum, NumClauses) {}
+                         SourceLocation(), CollapsedNum, NumClauses),
+        HasCancel(false) {}
+
+  /// Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// Creates directive with a list of \a Clauses.
@@ -3618,11 +3665,12 @@
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPTeamsDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses clauses.
   ///
@@ -3634,6 +3682,9 @@
   CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
               EmptyShell);
 
+  /// Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPTeamsDistributeParallelForDirectiveClass;
   }
@@ -3777,6 +3828,8 @@
 class OMPTargetTeamsDistributeParallelForDirective final
     : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// true if the construct has inner cancel directive.
+  bool HasCancel = false;
 
   /// Build directive with the given start and end location.
   ///
@@ -3792,7 +3845,8 @@
       : OMPLoopDirective(this,
                          OMPTargetTeamsDistributeParallelForDirectiveClass,
                          OMPD_target_teams_distribute_parallel_for, StartLoc,
-                         EndLoc, CollapsedNum, NumClauses) {}
+                         EndLoc, CollapsedNum, NumClauses),
+        HasCancel(false) {}
 
   /// Build an empty directive.
   ///
@@ -3804,7 +3858,11 @@
       : OMPLoopDirective(
             this, OMPTargetTeamsDistributeParallelForDirectiveClass,
             OMPD_target_teams_distribute_parallel_for, SourceLocation(),
-            SourceLocation(), CollapsedNum, NumClauses) {}
+            SourceLocation(), CollapsedNum, NumClauses),
+        HasCancel(false) {}
+
+  /// Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// Creates directive with a list of \a Clauses.
@@ -3816,11 +3874,12 @@
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPTargetTeamsDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses clauses.
   ///
@@ -3832,6 +3891,9 @@
   CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
               EmptyShell);
 
+  /// Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() ==
            OMPTargetTeamsDistributeParallelForDirectiveClass;
diff --git a/include/clang/AST/StmtVisitor.h b/include/clang/AST/StmtVisitor.h
index 470788e..98fa113 100644
--- a/include/clang/AST/StmtVisitor.h
+++ b/include/clang/AST/StmtVisitor.h
@@ -1,4 +1,4 @@
-//===--- StmtVisitor.h - Visitor for Stmt subclasses ------------*- C++ -*-===//
+//===- StmtVisitor.h - Visitor for Stmt subclasses --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,14 +17,19 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprOpenMP.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
 #include "clang/AST/StmtOpenMP.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <utility>
 
 namespace clang {
 
-template <typename T> struct make_ptr       { typedef       T *type; };
-template <typename T> struct make_const_ptr { typedef const T *type; };
+template <typename T> struct make_ptr { using type = T *; };
+template <typename T> struct make_const_ptr { using type = const T *; };
 
 /// StmtVisitorBase - This class implements a simple visitor for Stmt
 /// subclasses. Since Expr derives from Stmt, this also includes support for
@@ -33,14 +38,12 @@
          class... ParamTys>
 class StmtVisitorBase {
 public:
-
 #define PTR(CLASS) typename Ptr<CLASS>::type
 #define DISPATCH(NAME, CLASS) \
   return static_cast<ImplClass*>(this)->Visit ## NAME( \
     static_cast<PTR(CLASS)>(S), std::forward<ParamTys>(P)...)
 
   RetTy Visit(PTR(Stmt) S, ParamTys... P) {
-
     // If we have a binary expr, dispatch to the subcode of the binop.  A smart
     // optimizer (e.g. LLVM) will fold this comparison into the switch stmt
     // below.
@@ -62,6 +65,7 @@
       case BO_GE:        DISPATCH(BinGE,        BinaryOperator);
       case BO_EQ:        DISPATCH(BinEQ,        BinaryOperator);
       case BO_NE:        DISPATCH(BinNE,        BinaryOperator);
+      case BO_Cmp:       DISPATCH(BinCmp,       BinaryOperator);
 
       case BO_And:       DISPATCH(BinAnd,       BinaryOperator);
       case BO_Xor:       DISPATCH(BinXor,       BinaryOperator);
@@ -129,6 +133,8 @@
 
   BINOP_FALLBACK(LT)    BINOP_FALLBACK(GT)   BINOP_FALLBACK(LE)
   BINOP_FALLBACK(GE)    BINOP_FALLBACK(EQ)   BINOP_FALLBACK(NE)
+  BINOP_FALLBACK(Cmp)
+
   BINOP_FALLBACK(And)   BINOP_FALLBACK(Xor)  BINOP_FALLBACK(Or)
   BINOP_FALLBACK(LAnd)  BINOP_FALLBACK(LOr)
 
@@ -224,6 +230,6 @@
 class ConstOMPClauseVisitor :
       public OMPClauseVisitorBase <ImplClass, make_const_ptr, RetTy> {};
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_STMTVISITOR_H
diff --git a/include/clang/AST/TemplateBase.h b/include/clang/AST/TemplateBase.h
index 84fbcda..850250b 100644
--- a/include/clang/AST/TemplateBase.h
+++ b/include/clang/AST/TemplateBase.h
@@ -1,4 +1,4 @@
-//===-- TemplateBase.h - Core classes for C++ templates ---------*- C++ -*-===//
+//===- TemplateBase.h - Core classes for C++ templates ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,21 +15,32 @@
 #ifndef LLVM_CLANG_AST_TEMPLATEBASE_H
 #define LLVM_CLANG_AST_TEMPLATEBASE_H
 
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 
 namespace llvm {
-  class FoldingSetNodeID;
-}
+
+class FoldingSetNodeID;
+
+} // namespace llvm
 
 namespace clang {
 
+class ASTContext;
 class DiagnosticBuilder;
 class Expr;
 struct PrintingPolicy;
@@ -44,29 +55,37 @@
     /// \brief Represents an empty template argument, e.g., one that has not
     /// been deduced.
     Null = 0,
+
     /// The template argument is a type.
     Type,
+
     /// The template argument is a declaration that was provided for a pointer,
     /// reference, or pointer to member non-type template parameter.
     Declaration,
+
     /// The template argument is a null pointer or null pointer to member that
     /// was provided for a non-type template parameter.
     NullPtr,
+
     /// The template argument is an integral value stored in an llvm::APSInt
     /// that was provided for an integral non-type template parameter.
     Integral,
+
     /// The template argument is a template name that was provided for a
     /// template template parameter.
     Template,
+
     /// The template argument is a pack expansion of a template name that was
     /// provided for a template template parameter.
     TemplateExpansion,
+
     /// The template argument is an expression, and we've not resolved it to one
     /// of the other forms yet, either because it's dependent or because we're
     /// representing a non-canonical template argument (for instance, in a
     /// TemplateSpecializationType). Also used to represent a non-dependent
     /// __uuidof expression (a Microsoft extension).
     Expression,
+
     /// The template argument is actually a parameter pack. Arguments are stored
     /// in the Args struct.
     Pack
@@ -88,8 +107,11 @@
     unsigned BitWidth : 31;
     unsigned IsUnsigned : 1;
     union {
-      uint64_t VAL;          ///< Used to store the <= 64 bits integer value.
-      const uint64_t *pVal;  ///< Used to store the >64 bits integer value.
+      /// Used to store the <= 64 bits integer value.
+      uint64_t VAL;
+
+      /// Used to store the >64 bits integer value.
+      const uint64_t *pVal;
     };
     void *Type;
   };
@@ -115,8 +137,6 @@
     struct TV TypeOrValue;
   };
 
-  TemplateArgument(TemplateName, bool) = delete;
-  
 public:
   /// \brief Construct an empty, invalid template argument.
   constexpr TemplateArgument() : TypeOrValue({Null, 0}) {}
@@ -202,6 +222,8 @@
     this->Args.NumArgs = Args.size();
   }
 
+  TemplateArgument(TemplateName, bool) = delete;
+
   static TemplateArgument getEmptyPack() { return TemplateArgument(None); }
 
   /// \brief Create a new template argument pack by copying the given set of
@@ -278,7 +300,9 @@
   // FIXME: Provide a way to read the integral data without copying the value.
   llvm::APSInt getAsIntegral() const {
     assert(getKind() == Integral && "Unexpected kind");
+
     using namespace llvm;
+
     if (Integer.BitWidth <= 64)
       return APSInt(APInt(Integer.BitWidth, Integer.VAL), Integer.IsUnsigned);
 
@@ -309,7 +333,7 @@
   }
 
   /// \brief Iterator that traverses the elements of a template argument pack.
-  typedef const TemplateArgument * pack_iterator;
+  using pack_iterator = const TemplateArgument *;
 
   /// \brief Iterator referencing the first argument of a template argument
   /// pack.
@@ -368,7 +392,6 @@
 /// Location information for a TemplateArgument.
 struct TemplateArgumentLocInfo {
 private:
-
   struct T {
     // FIXME: We'd like to just use the qualifier in the TemplateName,
     // but template arguments get canonicalized too quickly.
@@ -393,8 +416,7 @@
   
   TemplateArgumentLocInfo(NestedNameSpecifierLoc QualifierLoc,
                           SourceLocation TemplateNameLoc,
-                          SourceLocation EllipsisLoc)
-  {
+                          SourceLocation EllipsisLoc) {
     Template.Qualifier = QualifierLoc.getNestedNameSpecifier();
     Template.QualifierLocData = QualifierLoc.getOpaqueData();
     Template.TemplateNameLoc = TemplateNameLoc.getRawEncoding();
@@ -434,16 +456,15 @@
 
   TemplateArgumentLoc(const TemplateArgument &Argument,
                       TemplateArgumentLocInfo Opaque)
-    : Argument(Argument), LocInfo(Opaque) {
-  }
+      : Argument(Argument), LocInfo(Opaque) {}
 
   TemplateArgumentLoc(const TemplateArgument &Argument, TypeSourceInfo *TInfo)
-    : Argument(Argument), LocInfo(TInfo) {
+      : Argument(Argument), LocInfo(TInfo) {
     assert(Argument.getKind() == TemplateArgument::Type);
   }
 
   TemplateArgumentLoc(const TemplateArgument &Argument, Expr *E)
-    : Argument(Argument), LocInfo(E) {
+      : Argument(Argument), LocInfo(E) {
     assert(Argument.getKind() == TemplateArgument::Expression);
   }
 
@@ -451,7 +472,8 @@
                       NestedNameSpecifierLoc QualifierLoc,
                       SourceLocation TemplateNameLoc,
                       SourceLocation EllipsisLoc = SourceLocation())
-    : Argument(Argument), LocInfo(QualifierLoc, TemplateNameLoc, EllipsisLoc) {
+      : Argument(Argument),
+        LocInfo(QualifierLoc, TemplateNameLoc, EllipsisLoc) {
     assert(Argument.getKind() == TemplateArgument::Template ||
            Argument.getKind() == TemplateArgument::TemplateExpansion);
   }
@@ -526,16 +548,16 @@
   SourceLocation LAngleLoc;
   SourceLocation RAngleLoc;
 
-  // This can leak if used in an AST node, use ASTTemplateArgumentListInfo
-  // instead.
-  void *operator new(size_t bytes, ASTContext &C) = delete;
-
 public:
-  TemplateArgumentListInfo() {}
+  TemplateArgumentListInfo() = default;
 
   TemplateArgumentListInfo(SourceLocation LAngleLoc,
                            SourceLocation RAngleLoc)
-    : LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc) {}
+      : LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc) {}
+
+  // This can leak if used in an AST node, use ASTTemplateArgumentListInfo
+  // instead.
+  void *operator new(size_t bytes, ASTContext &C) = delete;
 
   SourceLocation getLAngleLoc() const { return LAngleLoc; }
   SourceLocation getRAngleLoc() const { return RAngleLoc; }
@@ -574,8 +596,8 @@
     : private llvm::TrailingObjects<ASTTemplateArgumentListInfo,
                                     TemplateArgumentLoc> {
 private:
-  friend TrailingObjects;
   friend class ASTNodeImporter;
+  friend TrailingObjects;
 
   ASTTemplateArgumentListInfo(const TemplateArgumentListInfo &List);
 
@@ -668,6 +690,6 @@
   return getArgs()[Idx];
 }
   
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_TEMPLATEBASE_H
diff --git a/include/clang/AST/TemplateName.h b/include/clang/AST/TemplateName.h
index 45a610e..fb33cf5 100644
--- a/include/clang/AST/TemplateName.h
+++ b/include/clang/AST/TemplateName.h
@@ -1,4 +1,4 @@
-//===--- TemplateName.h - C++ Template Name Representation-------*- C++ -*-===//
+//===- TemplateName.h - C++ Template Name Representation --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,10 +14,12 @@
 #ifndef LLVM_CLANG_AST_TEMPLATENAME_H
 #define LLVM_CLANG_AST_TEMPLATENAME_H
 
-#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
 
 namespace clang {
   
@@ -94,7 +96,7 @@
   friend class ASTContext;
 
   OverloadedTemplateStorage(unsigned size) 
-    : UncommonTemplateNameStorage(Overloaded, size) { }
+      : UncommonTemplateNameStorage(Overloaded, size) {}
 
   NamedDecl **getStorage() {
     return reinterpret_cast<NamedDecl **>(this + 1);
@@ -104,7 +106,7 @@
   }
 
 public:
-  typedef NamedDecl *const *iterator;
+  using iterator = NamedDecl *const *;
 
   iterator begin() const { return getStorage(); }
   iterator end() const { return getStorage() + size(); }
@@ -126,8 +128,8 @@
   SubstTemplateTemplateParmPackStorage(TemplateTemplateParmDecl *Parameter,
                                        unsigned Size, 
                                        const TemplateArgument *Arguments)
-    : UncommonTemplateNameStorage(SubstTemplateTemplateParmPack, Size),
-      Parameter(Parameter), Arguments(Arguments) { }
+      : UncommonTemplateNameStorage(SubstTemplateTemplateParmPack, Size),
+        Parameter(Parameter), Arguments(Arguments) {}
   
   /// \brief Retrieve the template template parameter pack being substituted.
   TemplateTemplateParmDecl *getParameterPack() const {
@@ -174,10 +176,9 @@
 /// specifier in the typedef. "apply" is a nested template, and can
 /// only be understood in the context of
 class TemplateName {
-  typedef llvm::PointerUnion4<TemplateDecl *,
-                              UncommonTemplateNameStorage *,
-                              QualifiedTemplateName *,
-                              DependentTemplateName *> StorageType;
+  using StorageType =
+      llvm::PointerUnion4<TemplateDecl *, UncommonTemplateNameStorage *,
+                          QualifiedTemplateName *, DependentTemplateName *>;
 
   StorageType Storage;
 
@@ -188,24 +189,29 @@
   enum NameKind {
     /// \brief A single template declaration.
     Template,
+
     /// \brief A set of overloaded template declarations.
     OverloadedTemplate,
+
     /// \brief A qualified template name, where the qualification is kept 
     /// to describe the source code as written.
     QualifiedTemplate,
+
     /// \brief A dependent template name that has not been resolved to a 
     /// template (or set of templates).
     DependentTemplate,
+
     /// \brief A template template parameter that has been substituted
     /// for some other template name.
     SubstTemplateTemplateParm,
+
     /// \brief A template template parameter pack that has been substituted for 
     /// a template template argument pack, but has not yet been expanded into
     /// individual arguments.
     SubstTemplateTemplateParmPack
   };
 
-  TemplateName() : Storage() { }
+  TemplateName() = default;
   explicit TemplateName(TemplateDecl *Template);
   explicit TemplateName(OverloadedTemplateStorage *Storage);
   explicit TemplateName(SubstTemplateTemplateParmStorage *Storage);
@@ -325,8 +331,8 @@
 
   SubstTemplateTemplateParmStorage(TemplateTemplateParmDecl *parameter,
                                    TemplateName replacement)
-    : UncommonTemplateNameStorage(SubstTemplateTemplateParm, 0),
-      Parameter(parameter), Replacement(replacement) {}
+      : UncommonTemplateNameStorage(SubstTemplateTemplateParm, 0),
+        Parameter(parameter), Replacement(replacement) {}
 
 public:
   TemplateTemplateParmDecl *getParameter() const { return Parameter; }
@@ -358,6 +364,8 @@
 /// manner, it is to TemplateName what ElaboratedType is to Type,
 /// providing extra syntactic sugar for downstream clients.
 class QualifiedTemplateName : public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   /// \brief The nested name specifier that qualifies the template name.
   ///
   /// The bit is used to indicate whether the "template" keyword was
@@ -371,12 +379,9 @@
   /// that this qualified name refers to.
   TemplateDecl *Template;
 
-  friend class ASTContext;
-
   QualifiedTemplateName(NestedNameSpecifier *NNS, bool TemplateKeyword,
                         TemplateDecl *Template)
-    : Qualifier(NNS, TemplateKeyword? 1 : 0),
-      Template(Template) { }
+      : Qualifier(NNS, TemplateKeyword? 1 : 0), Template(Template) {}
 
 public:
   /// \brief Return the nested name specifier that qualifies this name.
@@ -415,6 +420,8 @@
 /// where "MetaFun::" is the nested name specifier and "apply" is the
 /// template name referenced. The "template" keyword is implied.
 class DependentTemplateName : public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   /// \brief The nested name specifier that qualifies the template
   /// name.
   ///
@@ -444,29 +451,27 @@
   /// canonical.
   TemplateName CanonicalTemplateName;
 
-  friend class ASTContext;
-
   DependentTemplateName(NestedNameSpecifier *Qualifier,
                         const IdentifierInfo *Identifier)
-    : Qualifier(Qualifier, false), Identifier(Identifier), 
-      CanonicalTemplateName(this) { }
+      : Qualifier(Qualifier, false), Identifier(Identifier), 
+        CanonicalTemplateName(this) {}
 
   DependentTemplateName(NestedNameSpecifier *Qualifier,
                         const IdentifierInfo *Identifier,
                         TemplateName Canon)
-    : Qualifier(Qualifier, false), Identifier(Identifier), 
-      CanonicalTemplateName(Canon) { }
+      : Qualifier(Qualifier, false), Identifier(Identifier), 
+        CanonicalTemplateName(Canon) {}
 
   DependentTemplateName(NestedNameSpecifier *Qualifier,
                         OverloadedOperatorKind Operator)
-  : Qualifier(Qualifier, true), Operator(Operator), 
-    CanonicalTemplateName(this) { }
+      : Qualifier(Qualifier, true), Operator(Operator), 
+        CanonicalTemplateName(this) {}
   
   DependentTemplateName(NestedNameSpecifier *Qualifier,
                         OverloadedOperatorKind Operator,
                         TemplateName Canon)
-  : Qualifier(Qualifier, true), Operator(Operator), 
-    CanonicalTemplateName(Canon) { }
+       : Qualifier(Qualifier, true), Operator(Operator), 
+         CanonicalTemplateName(Canon) {}
   
 public:
   /// \brief Return the nested name specifier that qualifies this name.
@@ -514,7 +519,7 @@
   }
 };
 
-} // end namespace clang.
+} // namespace clang.
 
 namespace llvm {
 
@@ -533,6 +538,6 @@
   enum { NumLowBitsAvailable = 0 };
 };
 
-} // end namespace llvm.
+} // namespace llvm.
 
-#endif
+#endif // LLVM_CLANG_AST_TEMPLATENAME_H
diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index 30804b7..4dd67f7 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -1,4 +1,4 @@
-//===--- Type.h - C Language Family Type Representation ---------*- C++ -*-===//
+//===- Type.h - C Language Family Type Representation -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,12 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 /// \file
 /// \brief C Language Family Type Representation
 ///
 /// This file defines the clang::Type interface and subclasses, used to
 /// represent types for languages in the C family.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_TYPE_H
@@ -25,83 +26,118 @@
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Linkage.h"
 #include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Specifiers.h"
 #include "clang/Basic/Visibility.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <type_traits>
+#include <utility>
 
 namespace clang {
-  enum {
-    TypeAlignmentInBits = 4,
-    TypeAlignment = 1 << TypeAlignmentInBits
-  };
-  class Type;
-  class ExtQuals;
-  class QualType;
-}
+
+class ExtQuals;
+class QualType;
+class Type;
+
+enum {
+  TypeAlignmentInBits = 4,
+  TypeAlignment = 1 << TypeAlignmentInBits
+};
+
+} // namespace clang
 
 namespace llvm {
+
   template <typename T>
   struct PointerLikeTypeTraits;
   template<>
   struct PointerLikeTypeTraits< ::clang::Type*> {
     static inline void *getAsVoidPointer(::clang::Type *P) { return P; }
+
     static inline ::clang::Type *getFromVoidPointer(void *P) {
       return static_cast< ::clang::Type*>(P);
     }
+
     enum { NumLowBitsAvailable = clang::TypeAlignmentInBits };
   };
+
   template<>
   struct PointerLikeTypeTraits< ::clang::ExtQuals*> {
     static inline void *getAsVoidPointer(::clang::ExtQuals *P) { return P; }
+
     static inline ::clang::ExtQuals *getFromVoidPointer(void *P) {
       return static_cast< ::clang::ExtQuals*>(P);
     }
+
     enum { NumLowBitsAvailable = clang::TypeAlignmentInBits };
   };
 
   template <>
   struct isPodLike<clang::QualType> { static const bool value = true; };
-}
+
+} // namespace llvm
 
 namespace clang {
-  class ASTContext;
-  class TypedefNameDecl;
-  class TemplateDecl;
-  class TemplateTypeParmDecl;
-  class NonTypeTemplateParmDecl;
-  class TemplateTemplateParmDecl;
-  class TagDecl;
-  class RecordDecl;
-  class CXXRecordDecl;
-  class EnumDecl;
-  class FieldDecl;
-  class FunctionDecl;
-  class ObjCInterfaceDecl;
-  class ObjCProtocolDecl;
-  class ObjCMethodDecl;
-  class ObjCTypeParamDecl;
-  class UnresolvedUsingTypenameDecl;
-  class Expr;
-  class Stmt;
-  class SourceLocation;
-  class StmtIteratorBase;
-  class TemplateArgument;
-  class TemplateArgumentLoc;
-  class TemplateArgumentListInfo;
-  class ElaboratedType;
-  class ExtQuals;
-  class ExtQualsTypeCommonBase;
-  struct PrintingPolicy;
 
-  template <typename> class CanQual;
-  typedef CanQual<Type> CanQualType;
+class ArrayType;
+class ASTContext;
+class AttributedType;
+class AutoType;
+class BuiltinType;
+template <typename> class CanQual;
+class ComplexType;
+class CXXRecordDecl;
+class DeclContext;
+class DeducedType;
+class EnumDecl;
+class Expr;
+class ExtQualsTypeCommonBase;
+class FunctionDecl;
+class FunctionNoProtoType;
+class FunctionProtoType;
+class IdentifierInfo;
+class InjectedClassNameType;
+class NamedDecl;
+class ObjCInterfaceDecl;
+class ObjCObjectPointerType;
+class ObjCObjectType;
+class ObjCProtocolDecl;
+class ObjCTypeParamDecl;
+class ParenType;
+struct PrintingPolicy;
+class RecordDecl;
+class RecordType;
+class Stmt;
+class TagDecl;
+class TemplateArgument;
+class TemplateArgumentListInfo;
+class TemplateArgumentLoc;
+class TemplateSpecializationType;
+class TemplateTypeParmDecl;
+class TypedefNameDecl;
+class TypedefType;
+class UnresolvedUsingTypenameDecl;
+
+using CanQualType = CanQual<Type>;
 
   // Provide forward declarations for all of the *Type classes
 #define TYPE(Class, Base) class Class##Type;
@@ -536,7 +572,6 @@
   }
 
 private:
-
   // bits:     |0 1 2|3|4 .. 5|6  ..  8|9   ...   31|
   //           |C R V|U|GCAttr|Lifetime|AddressSpace|
   uint32_t Mask = 0;
@@ -556,12 +591,12 @@
 /// into its local qualifiers and its locally-unqualified type.
 struct SplitQualType {
   /// The locally-unqualified type.
-  const Type *Ty;
+  const Type *Ty = nullptr;
 
   /// The local qualifiers.
   Qualifiers Quals;
 
-  SplitQualType() : Ty(nullptr), Quals() {}
+  SplitQualType() = default;
   SplitQualType(const Type *ty, Qualifiers qs) : Ty(ty), Quals(qs) {}
 
   SplitQualType getSingleStepDesugaredType() const; // end of this file
@@ -587,12 +622,16 @@
 enum class ObjCSubstitutionContext {
   /// An ordinary type.
   Ordinary,
+
   /// The result type of a method or function.
   Result,
+
   /// The parameter type of a method or function.
   Parameter,
+
   /// The type of a property.
   Property,
+
   /// The superclass of a type.
   Superclass,
 };
@@ -612,8 +651,10 @@
 /// indicates whether there are extended qualifiers present, in which
 /// case the pointer points to a special structure.
 class QualType {
+  friend class QualifierCollector;
+
   // Thankfully, these are efficiently composable.
-  llvm::PointerIntPair<llvm::PointerUnion<const Type*,const ExtQuals*>,
+  llvm::PointerIntPair<llvm::PointerUnion<const Type *, const ExtQuals *>,
                        Qualifiers::FastWidth> Value;
 
   const ExtQuals *getExtQualsUnsafe() const {
@@ -632,14 +673,10 @@
     return reinterpret_cast<ExtQualsTypeCommonBase*>(CommonPtrVal);
   }
 
-  friend class QualifierCollector;
 public:
   QualType() = default;
-
-  QualType(const Type *Ptr, unsigned Quals)
-    : Value(Ptr, Quals) {}
-  QualType(const ExtQuals *Ptr, unsigned Quals)
-    : Value(Ptr, Quals) {}
+  QualType(const Type *Ptr, unsigned Quals) : Value(Ptr, Quals) {}
+  QualType(const ExtQuals *Ptr, unsigned Quals) : Value(Ptr, Quals) {}
 
   unsigned getLocalFastQualifiers() const { return Value.getInt(); }
   void setLocalFastQualifiers(unsigned Quals) { Value.setInt(Quals); }
@@ -660,6 +697,7 @@
   SplitQualType split() const;
 
   void *getAsOpaquePtr() const { return Value.getOpaqueValue(); }
+
   static QualType getFromOpaquePtr(const void *Ptr) {
     QualType T;
     T.Value.setFromOpaqueValue(const_cast<void*>(Ptr));
@@ -770,6 +808,11 @@
   /// Return true if this is a trivially copyable type (C++0x [basic.types]p9)
   bool isTriviallyCopyableType(const ASTContext &Context) const;
 
+  /// Determine whether this is a class whose triviality for the purpose of
+  /// calls is overridden to be trivial because the class or the type of one of
+  /// its subobjects has attribute "trivial_abi".
+  bool hasTrivialABIOverride() const;
+
   // Don't promise in the API that anything besides 'const' can be
   // easily added.
 
@@ -937,14 +980,15 @@
   friend bool operator!=(const QualType &LHS, const QualType &RHS) {
     return LHS.Value != RHS.Value;
   }
-  std::string getAsString() const {
-    return getAsString(split());
-  }
-  static std::string getAsString(SplitQualType split) {
-    return getAsString(split.Ty, split.Quals);
-  }
-  static std::string getAsString(const Type *ty, Qualifiers qs);
 
+  static std::string getAsString(SplitQualType split,
+                                 const PrintingPolicy &Policy) {
+    return getAsString(split.Ty, split.Quals, Policy);
+  }
+  static std::string getAsString(const Type *ty, Qualifiers qs,
+                                 const PrintingPolicy &Policy);
+
+  std::string getAsString() const; 
   std::string getAsString(const PrintingPolicy &Policy) const;
 
   void print(raw_ostream &OS, const PrintingPolicy &Policy,
@@ -952,11 +996,13 @@
              unsigned Indentation = 0) const {
     print(split(), OS, Policy, PlaceHolder, Indentation);
   }
+
   static void print(SplitQualType split, raw_ostream &OS,
                     const PrintingPolicy &policy, const Twine &PlaceHolder,
                     unsigned Indentation = 0) {
     return print(split.Ty, split.Quals, OS, policy, PlaceHolder, Indentation);
   }
+
   static void print(const Type *ty, Qualifiers qs,
                     raw_ostream &OS, const PrintingPolicy &policy,
                     const Twine &PlaceHolder,
@@ -966,10 +1012,12 @@
                            const PrintingPolicy &Policy) const {
     return getAsStringInternal(split(), Str, Policy);
   }
+
   static void getAsStringInternal(SplitQualType split, std::string &out,
                                   const PrintingPolicy &policy) {
     return getAsStringInternal(split.Ty, split.Quals, out, policy);
   }
+
   static void getAsStringInternal(const Type *ty, Qualifiers qs,
                                   std::string &out,
                                   const PrintingPolicy &policy);
@@ -979,11 +1027,12 @@
     const PrintingPolicy &Policy;
     const Twine &PlaceHolder;
     unsigned Indentation;
+
   public:
     StreamedQualTypeHelper(const QualType &T, const PrintingPolicy &Policy,
                            const Twine &PlaceHolder, unsigned Indentation)
-      : T(T), Policy(Policy), PlaceHolder(PlaceHolder),
-        Indentation(Indentation) { }
+        : T(T), Policy(Policy), PlaceHolder(PlaceHolder),
+          Indentation(Indentation) {}
 
     friend raw_ostream &operator<<(raw_ostream &OS,
                                    const StreamedQualTypeHelper &SQT) {
@@ -1038,11 +1087,71 @@
   // true when Type is objc's weak and weak is enabled but ARC isn't.
   bool isNonWeakInMRRWithObjCWeak(const ASTContext &Context) const;
 
+  enum PrimitiveDefaultInitializeKind {
+    /// The type does not fall into any of the following categories. Note that
+    /// this case is zero-valued so that values of this enum can be used as a
+    /// boolean condition for non-triviality.
+    PDIK_Trivial,
+
+    /// The type is an Objective-C retainable pointer type that is qualified
+    /// with the ARC __strong qualifier.
+    PDIK_ARCStrong,
+
+    /// The type is a struct containing a field whose type is not PCK_Trivial.
+    PDIK_Struct
+  };
+
+  /// Functions to query basic properties of non-trivial C struct types.
+
+  /// Check if this is a non-trivial type that would cause a C struct
+  /// transitively containing this type to be non-trivial to default initialize
+  /// and return the kind.
+  PrimitiveDefaultInitializeKind
+  isNonTrivialToPrimitiveDefaultInitialize() const;
+
+  enum PrimitiveCopyKind {
+    /// The type does not fall into any of the following categories. Note that
+    /// this case is zero-valued so that values of this enum can be used as a
+    /// boolean condition for non-triviality.
+    PCK_Trivial,
+
+    /// The type would be trivial except that it is volatile-qualified. Types
+    /// that fall into one of the other non-trivial cases may additionally be
+    /// volatile-qualified.
+    PCK_VolatileTrivial,
+
+    /// The type is an Objective-C retainable pointer type that is qualified
+    /// with the ARC __strong qualifier.
+    PCK_ARCStrong,
+
+    /// The type is a struct containing a field whose type is neither
+    /// PCK_Trivial nor PCK_VolatileTrivial.
+    /// Note that a C++ struct type does not necessarily match this; C++ copying
+    /// semantics are too complex to express here, in part because they depend
+    /// on the exact constructor or assignment operator that is chosen by
+    /// overload resolution to do the copy.
+    PCK_Struct
+  };
+
+  /// Check if this is a non-trivial type that would cause a C struct
+  /// transitively containing this type to be non-trivial to copy and return the
+  /// kind.
+  PrimitiveCopyKind isNonTrivialToPrimitiveCopy() const;
+
+  /// Check if this is a non-trivial type that would cause a C struct
+  /// transitively containing this type to be non-trivial to destructively
+  /// move and return the kind. Destructive move in this context is a C++-style
+  /// move in which the source object is placed in a valid but unspecified state
+  /// after it is moved, as opposed to a truly destructive move in which the
+  /// source object is placed in an uninitialized state.
+  PrimitiveCopyKind isNonTrivialToPrimitiveDestructiveMove() const;
+
   enum DestructionKind {
     DK_none,
     DK_cxx_destructor,
     DK_objc_strong_lifetime,
-    DK_objc_weak_lifetime
+    DK_objc_weak_lifetime,
+    DK_nontrivial_c_struct
   };
 
   /// Returns a nonzero value if objects of this type require
@@ -1127,13 +1236,15 @@
   static DestructionKind isDestructedTypeImpl(QualType type);
 };
 
-} // end clang.
+} // namespace clang
 
 namespace llvm {
+
 /// Implement simplify_type for QualType, so that we can dyn_cast from QualType
 /// to a specific Type class.
 template<> struct simplify_type< ::clang::QualType> {
-  typedef const ::clang::Type *SimpleType;
+  using SimpleType = const ::clang::Type *;
+
   static SimpleType getSimplifiedValue(::clang::QualType Val) {
     return Val.getTypePtr();
   }
@@ -1145,24 +1256,26 @@
   static inline void *getAsVoidPointer(clang::QualType P) {
     return P.getAsOpaquePtr();
   }
+
   static inline clang::QualType getFromVoidPointer(void *P) {
     return clang::QualType::getFromOpaquePtr(P);
   }
+
   // Various qualifiers go in low bits.
   enum { NumLowBitsAvailable = 0 };
 };
 
-} // end namespace llvm
+} // namespace llvm
 
 namespace clang {
 
 /// \brief Base class that is common to both the \c ExtQuals and \c Type
 /// classes, which allows \c QualType to access the common fields between the
 /// two.
-///
 class ExtQualsTypeCommonBase {
-  ExtQualsTypeCommonBase(const Type *baseType, QualType canon)
-    : BaseType(baseType), CanonicalType(canon) {}
+  friend class ExtQuals;
+  friend class QualType;
+  friend class Type;
 
   /// \brief The "base" type of an extended qualifiers type (\c ExtQuals) or
   /// a self-referential pointer (for \c Type).
@@ -1174,9 +1287,8 @@
   /// \brief The canonical type of this type.  A QualType.
   QualType CanonicalType;
 
-  friend class QualType;
-  friend class Type;
-  friend class ExtQuals;
+  ExtQualsTypeCommonBase(const Type *baseType, QualType canon)
+      : BaseType(baseType), CanonicalType(canon) {}
 };
 
 /// We can encode up to four bits in the low bits of a
@@ -1211,10 +1323,9 @@
 
 public:
   ExtQuals(const Type *baseType, QualType canon, Qualifiers quals)
-    : ExtQualsTypeCommonBase(baseType,
-                             canon.isNull() ? QualType(this_(), 0) : canon),
-      Quals(quals)
-  {
+      : ExtQualsTypeCommonBase(baseType,
+                               canon.isNull() ? QualType(this_(), 0) : canon),
+        Quals(quals) {
     assert(Quals.hasNonFastQualifiers()
            && "ExtQuals created with no fast qualifiers");
     assert(!Quals.hasFastQualifiers()
@@ -1240,6 +1351,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) const {
     Profile(ID, getBaseType(), Quals);
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       const Type *BaseType,
                       Qualifiers Quals) {
@@ -1255,8 +1367,10 @@
 enum RefQualifierKind {
   /// \brief No ref-qualifier was provided.
   RQ_None = 0,
+
   /// \brief An lvalue ref-qualifier was provided (\c &).
   RQ_LValue,
+
   /// \brief An rvalue ref-qualifier was provided (\c &&).
   RQ_RValue
 };
@@ -1265,8 +1379,10 @@
 enum class AutoTypeKeyword {
   /// \brief auto
   Auto,
+
   /// \brief decltype(auto)
   DecltypeAuto,
+
   /// \brief __auto_type (GNU extension)
   GNUAutoType
 };
@@ -1308,9 +1424,6 @@
   };
 
 private:
-  Type(const Type &) = delete;
-  void operator=(const Type &) = delete;
-
   /// Bitfields required by the Type class.
   class TypeBitfields {
     friend class Type;
@@ -1349,10 +1462,12 @@
     bool isCacheValid() const {
       return CacheValid;
     }
+
     Linkage getLinkage() const {
       assert(isCacheValid() && "getting linkage from invalid cache");
       return static_cast<Linkage>(CachedLinkage);
     }
+
     bool hasLocalOrUnnamedType() const {
       assert(isCacheValid() && "getting linkage from invalid cache");
       return CachedLocalOrUnnamed;
@@ -1389,8 +1504,8 @@
   };
 
   class FunctionTypeBitfields {
-    friend class FunctionType;
     friend class FunctionProtoType;
+    friend class FunctionType;
 
     unsigned : NumTypeBits;
 
@@ -1426,6 +1541,7 @@
     /// Whether this is a "kindof" type.
     unsigned IsKindOf : 1;
   };
+
   static_assert(NumTypeBits + 7 + 6 + 1 <= 32, "Does not fit in an unsigned");
 
   class ReferenceTypeBitfields {
@@ -1508,21 +1624,21 @@
   };
 
 private:
+  template <class T> friend class TypePropertyCache;
+
   /// \brief Set whether this type comes from an AST file.
   void setFromAST(bool V = true) const {
     TypeBits.FromAST = V;
   }
 
-  template <class T> friend class TypePropertyCache;
-
 protected:
-  // silence VC++ warning C4355: 'this' : used in base member initializer list
-  Type *this_() { return this; }
+  friend class ASTContext;
+
   Type(TypeClass tc, QualType canon, bool Dependent,
        bool InstantiationDependent, bool VariablyModified,
        bool ContainsUnexpandedParameterPack)
-    : ExtQualsTypeCommonBase(this,
-                             canon.isNull() ? QualType(this_(), 0) : canon) {
+      : ExtQualsTypeCommonBase(this,
+                               canon.isNull() ? QualType(this_(), 0) : canon) {
     TypeBits.TC = tc;
     TypeBits.Dependent = Dependent;
     TypeBits.InstantiationDependent = Dependent || InstantiationDependent;
@@ -1533,22 +1649,32 @@
     TypeBits.CachedLinkage = NoLinkage;
     TypeBits.FromAST = false;
   }
-  friend class ASTContext;
+
+  // silence VC++ warning C4355: 'this' : used in base member initializer list
+  Type *this_() { return this; }
 
   void setDependent(bool D = true) {
     TypeBits.Dependent = D;
     if (D)
       TypeBits.InstantiationDependent = true;
   }
+
   void setInstantiationDependent(bool D = true) {
     TypeBits.InstantiationDependent = D; }
-  void setVariablyModified(bool VM = true) { TypeBits.VariablyModified = VM;
-  }
+
+  void setVariablyModified(bool VM = true) { TypeBits.VariablyModified = VM; }
+
   void setContainsUnexpandedParameterPack(bool PP = true) {
     TypeBits.ContainsUnexpandedParameterPack = PP;
   }
 
 public:
+  friend class ASTReader;
+  friend class ASTWriter;
+
+  Type(const Type &) = delete;
+  Type &operator=(const Type &) = delete;
+
   TypeClass getTypeClass() const { return static_cast<TypeClass>(TypeBits.TC); }
 
   /// \brief Whether this type comes from an AST file.
@@ -1655,6 +1781,7 @@
 
   /// Determine whether this type is an integral or enumeration type.
   bool isIntegralOrEnumerationType() const;
+
   /// Determine whether this type is an integral or unscoped enumeration type.
   bool isIntegralOrUnscopedEnumerationType() const;
 
@@ -1666,6 +1793,7 @@
   bool isAnyComplexType() const;   // C99 6.2.5p11 (complex) + Complex Int.
   bool isFloatingType() const;     // C99 6.2.5p11 (real floating + complex)
   bool isHalfType() const;         // OpenCL 6.1.1.1, NEON (IEEE 754-2008 half)
+  bool isFloat16Type() const;      // C11 extension ISO/IEC TS 18661
   bool isRealType() const;         // C99 6.2.5p17 (real floating + integer)
   bool isArithmeticType() const;   // C99 6.2.5p18 (integer + floating)
   bool isVoidType() const;         // C99 6.2.5p19
@@ -1787,6 +1915,7 @@
     STK_IntegralComplex,
     STK_FloatingComplex
   };
+
   /// Given that this is a scalar type, classify it.
   ScalarTypeKind getScalarTypeKind() const;
 
@@ -1857,6 +1986,7 @@
   const RecordType *getAsUnionType() const;
   const ComplexType *getAsComplexIntegerType() const; // GCC complex int type.
   const ObjCObjectType *getAsObjCInterfaceType() const;
+
   // The following is a convenience method that returns an ObjCObjectPointerType
   // for object declared using an interface.
   const ObjCObjectPointerType *getAsObjCInterfacePointerType() const;
@@ -2043,12 +2173,10 @@
   QualType getCanonicalTypeInternal() const {
     return CanonicalType;
   }
+
   CanQualType getCanonicalTypeUnqualified() const; // in CanonicalType.h
   void dump() const;
   void dump(llvm::raw_ostream &OS) const;
-
-  friend class ASTReader;
-  friend class ASTWriter;
 };
 
 /// \brief This will check for a TypedefType by removing any existing sugar
@@ -2076,7 +2204,6 @@
 }
 #include "clang/AST/TypeNodes.def"
 
-
 /// This class is used for builtin types like 'int'.  Builtin
 /// types are always canonical and have a literal name field.
 class BuiltinType : public Type {
@@ -2093,15 +2220,16 @@
 
 public:
   BuiltinType(Kind K)
-    : Type(Builtin, QualType(), /*Dependent=*/(K == Dependent),
-           /*InstantiationDependent=*/(K == Dependent),
-           /*VariablyModified=*/false,
-           /*Unexpanded parameter pack=*/false) {
+      : Type(Builtin, QualType(), /*Dependent=*/(K == Dependent),
+             /*InstantiationDependent=*/(K == Dependent),
+             /*VariablyModified=*/false,
+             /*Unexpanded parameter pack=*/false) {
     BuiltinTypeBits.Kind = K;
   }
 
   Kind getKind() const { return static_cast<Kind>(BuiltinTypeBits.Kind); }
   StringRef getName(const PrintingPolicy &Policy) const;
+
   const char *getNameAsCString(const PrintingPolicy &Policy) const {
     // The StringRef is null-terminated.
     StringRef str = getName(Policy);
@@ -2158,17 +2286,17 @@
 
 /// Complex values, per C99 6.2.5p11.  This supports the C99 complex
 /// types (_Complex float etc) as well as the GCC integer complex extensions.
-///
 class ComplexType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType ElementType;
-  ComplexType(QualType Element, QualType CanonicalPtr) :
-    Type(Complex, CanonicalPtr, Element->isDependentType(),
-         Element->isInstantiationDependentType(),
-         Element->isVariablyModifiedType(),
-         Element->containsUnexpandedParameterPack()),
-    ElementType(Element) {
-  }
-  friend class ASTContext;  // ASTContext creates these.
+
+  ComplexType(QualType Element, QualType CanonicalPtr)
+      : Type(Complex, CanonicalPtr, Element->isDependentType(),
+             Element->isInstantiationDependentType(),
+             Element->isVariablyModifiedType(),
+             Element->containsUnexpandedParameterPack()),
+        ElementType(Element) {}
 
 public:
   QualType getElementType() const { return ElementType; }
@@ -2179,6 +2307,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getElementType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Element) {
     ID.AddPointer(Element.getAsOpaquePtr());
   }
@@ -2187,21 +2316,19 @@
 };
 
 /// Sugar for parentheses used when specifying types.
-///
 class ParenType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType Inner;
 
-  ParenType(QualType InnerType, QualType CanonType) :
-    Type(Paren, CanonType, InnerType->isDependentType(),
-         InnerType->isInstantiationDependentType(),
-         InnerType->isVariablyModifiedType(),
-         InnerType->containsUnexpandedParameterPack()),
-    Inner(InnerType) {
-  }
-  friend class ASTContext;  // ASTContext creates these.
+  ParenType(QualType InnerType, QualType CanonType)
+      : Type(Paren, CanonType, InnerType->isDependentType(),
+             InnerType->isInstantiationDependentType(),
+             InnerType->isVariablyModifiedType(),
+             InnerType->containsUnexpandedParameterPack()),
+        Inner(InnerType) {}
 
 public:
-
   QualType getInnerType() const { return Inner; }
 
   bool isSugared() const { return true; }
@@ -2210,6 +2337,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getInnerType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Inner) {
     Inner.Profile(ID);
   }
@@ -2218,21 +2346,19 @@
 };
 
 /// PointerType - C99 6.7.5.1 - Pointer Declarators.
-///
 class PointerType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType PointeeType;
 
-  PointerType(QualType Pointee, QualType CanonicalPtr) :
-    Type(Pointer, CanonicalPtr, Pointee->isDependentType(),
-         Pointee->isInstantiationDependentType(),
-         Pointee->isVariablyModifiedType(),
-         Pointee->containsUnexpandedParameterPack()),
-    PointeeType(Pointee) {
-  }
-  friend class ASTContext;  // ASTContext creates these.
+  PointerType(QualType Pointee, QualType CanonicalPtr)
+      : Type(Pointer, CanonicalPtr, Pointee->isDependentType(),
+             Pointee->isInstantiationDependentType(),
+             Pointee->isVariablyModifiedType(),
+             Pointee->containsUnexpandedParameterPack()),
+        PointeeType(Pointee) {}
 
 public:
-
   QualType getPointeeType() const { return PointeeType; }
 
   /// Returns true if address spaces of pointers overlap.
@@ -2257,6 +2383,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getPointeeType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Pointee) {
     ID.AddPointer(Pointee.getAsOpaquePtr());
   }
@@ -2272,6 +2399,8 @@
   QualType AdjustedTy;
 
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   AdjustedType(TypeClass TC, QualType OriginalTy, QualType AdjustedTy,
                QualType CanonicalPtr)
       : Type(TC, CanonicalPtr, OriginalTy->isDependentType(),
@@ -2280,8 +2409,6 @@
              OriginalTy->containsUnexpandedParameterPack()),
         OriginalTy(OriginalTy), AdjustedTy(AdjustedTy) {}
 
-  friend class ASTContext;  // ASTContext creates these.
-
 public:
   QualType getOriginalType() const { return OriginalTy; }
   QualType getAdjustedType() const { return AdjustedTy; }
@@ -2292,6 +2419,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, OriginalTy, AdjustedTy);
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Orig, QualType New) {
     ID.AddPointer(Orig.getAsOpaquePtr());
     ID.AddPointer(New.getAsOpaquePtr());
@@ -2304,12 +2432,11 @@
 
 /// Represents a pointer type decayed from an array or function type.
 class DecayedType : public AdjustedType {
+  friend class ASTContext; // ASTContext creates these.
 
   inline
   DecayedType(QualType OriginalType, QualType Decayed, QualType Canonical);
 
-  friend class ASTContext;  // ASTContext creates these.
-
 public:
   QualType getDecayedType() const { return getAdjustedType(); }
 
@@ -2321,20 +2448,20 @@
 /// Pointer to a block type.
 /// This type is to represent types syntactically represented as
 /// "void (^)(int)", etc. Pointee is required to always be a function type.
-///
 class BlockPointerType : public Type, public llvm::FoldingSetNode {
-  QualType PointeeType;  // Block is some kind of pointer type
-  BlockPointerType(QualType Pointee, QualType CanonicalCls) :
-    Type(BlockPointer, CanonicalCls, Pointee->isDependentType(),
-         Pointee->isInstantiationDependentType(),
-         Pointee->isVariablyModifiedType(),
-         Pointee->containsUnexpandedParameterPack()),
-    PointeeType(Pointee) {
-  }
-  friend class ASTContext;  // ASTContext creates these.
+  friend class ASTContext; // ASTContext creates these.
+
+  // Block is some kind of pointer type
+  QualType PointeeType;
+
+  BlockPointerType(QualType Pointee, QualType CanonicalCls)
+      : Type(BlockPointer, CanonicalCls, Pointee->isDependentType(),
+             Pointee->isInstantiationDependentType(),
+             Pointee->isVariablyModifiedType(),
+             Pointee->containsUnexpandedParameterPack()),
+        PointeeType(Pointee) {}
 
 public:
-
   // Get the pointee type. Pointee is required to always be a function type.
   QualType getPointeeType() const { return PointeeType; }
 
@@ -2344,6 +2471,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
       Profile(ID, getPointeeType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Pointee) {
       ID.AddPointer(Pointee.getAsOpaquePtr());
   }
@@ -2354,19 +2482,17 @@
 };
 
 /// Base for LValueReferenceType and RValueReferenceType
-///
 class ReferenceType : public Type, public llvm::FoldingSetNode {
   QualType PointeeType;
 
 protected:
   ReferenceType(TypeClass tc, QualType Referencee, QualType CanonicalRef,
-                bool SpelledAsLValue) :
-    Type(tc, CanonicalRef, Referencee->isDependentType(),
-         Referencee->isInstantiationDependentType(),
-         Referencee->isVariablyModifiedType(),
-         Referencee->containsUnexpandedParameterPack()),
-    PointeeType(Referencee)
-  {
+                bool SpelledAsLValue)
+      : Type(tc, CanonicalRef, Referencee->isDependentType(),
+             Referencee->isInstantiationDependentType(),
+             Referencee->isVariablyModifiedType(),
+             Referencee->containsUnexpandedParameterPack()),
+        PointeeType(Referencee) {
     ReferenceTypeBits.SpelledAsLValue = SpelledAsLValue;
     ReferenceTypeBits.InnerRef = Referencee->isReferenceType();
   }
@@ -2376,6 +2502,7 @@
   bool isInnerRef() const { return ReferenceTypeBits.InnerRef; }
 
   QualType getPointeeTypeAsWritten() const { return PointeeType; }
+
   QualType getPointeeType() const {
     // FIXME: this might strip inner qualifiers; okay?
     const ReferenceType *T = this;
@@ -2387,6 +2514,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, PointeeType, isSpelledAsLValue());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       QualType Referencee,
                       bool SpelledAsLValue) {
@@ -2401,13 +2529,14 @@
 };
 
 /// An lvalue reference type, per C++11 [dcl.ref].
-///
 class LValueReferenceType : public ReferenceType {
-  LValueReferenceType(QualType Referencee, QualType CanonicalRef,
-                      bool SpelledAsLValue) :
-    ReferenceType(LValueReference, Referencee, CanonicalRef, SpelledAsLValue)
-  {}
   friend class ASTContext; // ASTContext creates these
+
+  LValueReferenceType(QualType Referencee, QualType CanonicalRef,
+                      bool SpelledAsLValue)
+      : ReferenceType(LValueReference, Referencee, CanonicalRef,
+                      SpelledAsLValue) {}
+
 public:
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
@@ -2418,12 +2547,12 @@
 };
 
 /// An rvalue reference type, per C++11 [dcl.ref].
-///
 class RValueReferenceType : public ReferenceType {
-  RValueReferenceType(QualType Referencee, QualType CanonicalRef) :
-    ReferenceType(RValueReference, Referencee, CanonicalRef, false) {
-  }
   friend class ASTContext; // ASTContext creates these
+
+  RValueReferenceType(QualType Referencee, QualType CanonicalRef)
+       : ReferenceType(RValueReference, Referencee, CanonicalRef, false) {}
+
 public:
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
@@ -2436,24 +2565,24 @@
 /// A pointer to member type per C++ 8.3.3 - Pointers to members.
 ///
 /// This includes both pointers to data members and pointer to member functions.
-///
 class MemberPointerType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType PointeeType;
+
   /// The class of which the pointee is a member. Must ultimately be a
   /// RecordType, but could be a typedef or a template parameter too.
   const Type *Class;
 
-  MemberPointerType(QualType Pointee, const Type *Cls, QualType CanonicalPtr) :
-    Type(MemberPointer, CanonicalPtr,
-         Cls->isDependentType() || Pointee->isDependentType(),
-         (Cls->isInstantiationDependentType() ||
-          Pointee->isInstantiationDependentType()),
-         Pointee->isVariablyModifiedType(),
-         (Cls->containsUnexpandedParameterPack() ||
-          Pointee->containsUnexpandedParameterPack())),
-    PointeeType(Pointee), Class(Cls) {
-  }
-  friend class ASTContext; // ASTContext creates these.
+  MemberPointerType(QualType Pointee, const Type *Cls, QualType CanonicalPtr)
+      : Type(MemberPointer, CanonicalPtr,
+             Cls->isDependentType() || Pointee->isDependentType(),
+             (Cls->isInstantiationDependentType() ||
+              Pointee->isInstantiationDependentType()),
+             Pointee->isVariablyModifiedType(),
+             (Cls->containsUnexpandedParameterPack() ||
+              Pointee->containsUnexpandedParameterPack())),
+             PointeeType(Pointee), Class(Cls) {}
 
 public:
   QualType getPointeeType() const { return PointeeType; }
@@ -2479,6 +2608,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getPointeeType(), getClass());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Pointee,
                       const Type *Class) {
     ID.AddPointer(Pointee.getAsOpaquePtr());
@@ -2491,7 +2621,6 @@
 };
 
 /// Represents an array type, per C99 6.7.5.2 - Array Declarators.
-///
 class ArrayType : public Type, public llvm::FoldingSetNode {
 public:
   /// Capture whether this is a normal array (e.g. int X[4])
@@ -2501,11 +2630,14 @@
   enum ArraySizeModifier {
     Normal, Static, Star
   };
+
 private:
   /// The element type of the array.
   QualType ElementType;
 
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   // C++ [temp.dep.type]p1:
   //   A type is dependent if it is...
   //     - an array type constructed from any dependent type or whose
@@ -2514,25 +2646,26 @@
   ArrayType(TypeClass tc, QualType et, QualType can,
             ArraySizeModifier sm, unsigned tq,
             bool ContainsUnexpandedParameterPack)
-    : Type(tc, can, et->isDependentType() || tc == DependentSizedArray,
-           et->isInstantiationDependentType() || tc == DependentSizedArray,
-           (tc == VariableArray || et->isVariablyModifiedType()),
-           ContainsUnexpandedParameterPack),
-      ElementType(et) {
+      : Type(tc, can, et->isDependentType() || tc == DependentSizedArray,
+             et->isInstantiationDependentType() || tc == DependentSizedArray,
+             (tc == VariableArray || et->isVariablyModifiedType()),
+             ContainsUnexpandedParameterPack),
+        ElementType(et) {
     ArrayTypeBits.IndexTypeQuals = tq;
     ArrayTypeBits.SizeModifier = sm;
   }
 
-  friend class ASTContext;  // ASTContext creates these.
-
 public:
   QualType getElementType() const { return ElementType; }
+
   ArraySizeModifier getSizeModifier() const {
     return ArraySizeModifier(ArrayTypeBits.SizeModifier);
   }
+
   Qualifiers getIndexTypeQualifiers() const {
     return Qualifiers::fromCVRMask(getIndexTypeCVRQualifiers());
   }
+
   unsigned getIndexTypeCVRQualifiers() const {
     return ArrayTypeBits.IndexTypeQuals;
   }
@@ -2553,21 +2686,23 @@
 
   ConstantArrayType(QualType et, QualType can, const llvm::APInt &size,
                     ArraySizeModifier sm, unsigned tq)
-    : ArrayType(ConstantArray, et, can, sm, tq,
-                et->containsUnexpandedParameterPack()),
-      Size(size) {}
+      : ArrayType(ConstantArray, et, can, sm, tq,
+                  et->containsUnexpandedParameterPack()),
+        Size(size) {}
+
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   ConstantArrayType(TypeClass tc, QualType et, QualType can,
                     const llvm::APInt &size, ArraySizeModifier sm, unsigned tq)
-    : ArrayType(tc, et, can, sm, tq, et->containsUnexpandedParameterPack()),
-      Size(size) {}
-  friend class ASTContext;  // ASTContext creates these.
+      : ArrayType(tc, et, can, sm, tq, et->containsUnexpandedParameterPack()),
+        Size(size) {}
+
 public:
   const llvm::APInt &getSize() const { return Size; }
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
 
-
   /// \brief Determine the number of bits required to address a member of
   // an array with the given element type and number of elements.
   static unsigned getNumAddressingBits(const ASTContext &Context,
@@ -2582,6 +2717,7 @@
     Profile(ID, getElementType(), getSize(),
             getSizeModifier(), getIndexTypeCVRQualifiers());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType ET,
                       const llvm::APInt &ArraySize, ArraySizeModifier SizeMod,
                       unsigned TypeQuals) {
@@ -2590,6 +2726,7 @@
     ID.AddInteger(SizeMod);
     ID.AddInteger(TypeQuals);
   }
+
   static bool classof(const Type *T) {
     return T->getTypeClass() == ConstantArray;
   }
@@ -2599,13 +2736,16 @@
 /// an IncompleteArrayType where the element type is 'int' and the size is
 /// unspecified.
 class IncompleteArrayType : public ArrayType {
+  friend class ASTContext; // ASTContext creates these.
 
   IncompleteArrayType(QualType et, QualType can,
                       ArraySizeModifier sm, unsigned tq)
-    : ArrayType(IncompleteArray, et, can, sm, tq,
-                et->containsUnexpandedParameterPack()) {}
-  friend class ASTContext;  // ASTContext creates these.
+      : ArrayType(IncompleteArray, et, can, sm, tq,
+                  et->containsUnexpandedParameterPack()) {}
+
 public:
+  friend class StmtIteratorBase;
+
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
 
@@ -2613,8 +2753,6 @@
     return T->getTypeClass() == IncompleteArray;
   }
 
-  friend class StmtIteratorBase;
-
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getElementType(), getSizeModifier(),
             getIndexTypeCVRQualifiers());
@@ -2642,28 +2780,32 @@
 ///   ++x;
 ///   int Z[x];
 /// }
-///
 class VariableArrayType : public ArrayType {
+  friend class ASTContext; // ASTContext creates these.
+
   /// An assignment-expression. VLA's are only permitted within
   /// a function block.
   Stmt *SizeExpr;
+
   /// The range spanned by the left and right array brackets.
   SourceRange Brackets;
 
   VariableArrayType(QualType et, QualType can, Expr *e,
                     ArraySizeModifier sm, unsigned tq,
                     SourceRange brackets)
-    : ArrayType(VariableArray, et, can, sm, tq,
-                et->containsUnexpandedParameterPack()),
-      SizeExpr((Stmt*) e), Brackets(brackets) {}
-  friend class ASTContext;  // ASTContext creates these.
+      : ArrayType(VariableArray, et, can, sm, tq,
+                  et->containsUnexpandedParameterPack()),
+        SizeExpr((Stmt*) e), Brackets(brackets) {}
 
 public:
+  friend class StmtIteratorBase;
+
   Expr *getSizeExpr() const {
     // We use C-style casts instead of cast<> here because we do not wish
     // to have a dependency of Type.h on Stmt.h/Expr.h.
     return (Expr*) SizeExpr;
   }
+
   SourceRange getBracketsRange() const { return Brackets; }
   SourceLocation getLBracketLoc() const { return Brackets.getBegin(); }
   SourceLocation getRBracketLoc() const { return Brackets.getEnd(); }
@@ -2675,8 +2817,6 @@
     return T->getTypeClass() == VariableArray;
   }
 
-  friend class StmtIteratorBase;
-
   void Profile(llvm::FoldingSetNodeID &ID) {
     llvm_unreachable("Cannot unique VariableArrayTypes.");
   }
@@ -2696,6 +2836,8 @@
 /// until template instantiation occurs, at which point this will
 /// become either a ConstantArrayType or a VariableArrayType.
 class DependentSizedArrayType : public ArrayType {
+  friend class ASTContext; // ASTContext creates these.
+
   const ASTContext &Context;
 
   /// \brief An assignment expression that will instantiate to the
@@ -2712,14 +2854,15 @@
                           Expr *e, ArraySizeModifier sm, unsigned tq,
                           SourceRange brackets);
 
-  friend class ASTContext;  // ASTContext creates these.
-
 public:
+  friend class StmtIteratorBase;
+
   Expr *getSizeExpr() const {
     // We use C-style casts instead of cast<> here because we do not wish
     // to have a dependency of Type.h on Stmt.h/Expr.h.
     return (Expr*) SizeExpr;
   }
+
   SourceRange getBracketsRange() const { return Brackets; }
   SourceLocation getLBracketLoc() const { return Brackets.getBegin(); }
   SourceLocation getRBracketLoc() const { return Brackets.getEnd(); }
@@ -2731,9 +2874,6 @@
     return T->getTypeClass() == DependentSizedArray;
   }
 
-  friend class StmtIteratorBase;
-
-
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, Context, getElementType(),
             getSizeModifier(), getIndexTypeCVRQualifiers(), getSizeExpr());
@@ -2756,6 +2896,8 @@
 /// }
 /// \endcode
 class DependentAddressSpaceType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   const ASTContext &Context;
   Expr *AddrSpaceExpr;
   QualType PointeeType;
@@ -2765,8 +2907,6 @@
                             QualType can, Expr *AddrSpaceExpr, 
                             SourceLocation loc);
 
-  friend class ASTContext;
-
 public:
   Expr *getAddrSpaceExpr() const { return AddrSpaceExpr; }
   QualType getPointeeType() const { return PointeeType; }
@@ -2798,17 +2938,19 @@
 /// }
 /// \endcode
 class DependentSizedExtVectorType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   const ASTContext &Context;
   Expr *SizeExpr;
+
   /// The element type of the array.
   QualType ElementType;
+
   SourceLocation loc;
 
   DependentSizedExtVectorType(const ASTContext &Context, QualType ElementType,
                               QualType can, Expr *SizeExpr, SourceLocation loc);
 
-  friend class ASTContext;
-
 public:
   Expr *getSizeExpr() const { return SizeExpr; }
   QualType getElementType() const { return ElementType; }
@@ -2838,14 +2980,28 @@
 class VectorType : public Type, public llvm::FoldingSetNode {
 public:
   enum VectorKind {
-    GenericVector,  ///< not a target-specific vector type
-    AltiVecVector,  ///< is AltiVec vector
-    AltiVecPixel,   ///< is AltiVec 'vector Pixel'
-    AltiVecBool,    ///< is AltiVec 'vector bool ...'
-    NeonVector,     ///< is ARM Neon vector
-    NeonPolyVector  ///< is ARM Neon polynomial vector
+    /// not a target-specific vector type
+    GenericVector,
+
+    /// is AltiVec vector
+    AltiVecVector,
+
+    /// is AltiVec 'vector Pixel'
+    AltiVecPixel,
+
+    /// is AltiVec 'vector bool ...'
+    AltiVecBool,
+
+    /// is ARM Neon vector
+    NeonVector,
+
+    /// is ARM Neon polynomial vector
+    NeonPolyVector
   };
+
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   /// The element type of the vector.
   QualType ElementType;
 
@@ -2855,12 +3011,10 @@
   VectorType(TypeClass tc, QualType vecType, unsigned nElements,
              QualType canonType, VectorKind vecKind);
 
-  friend class ASTContext;  // ASTContext creates these.
-
 public:
-
   QualType getElementType() const { return ElementType; }
   unsigned getNumElements() const { return VectorTypeBits.NumElements; }
+
   static bool isVectorSizeTooLarge(unsigned NumElements) {
     return NumElements > VectorTypeBitfields::MaxNumElements;
   }
@@ -2876,6 +3030,7 @@
     Profile(ID, getElementType(), getNumElements(),
             getTypeClass(), getVectorKind());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType ElementType,
                       unsigned NumElements, TypeClass TypeClass,
                       VectorKind VecKind) {
@@ -2897,9 +3052,11 @@
 /// points (as .xyzw), colors (as .rgba), and textures (modeled after OpenGL
 /// Shading Language).
 class ExtVectorType : public VectorType {
-  ExtVectorType(QualType vecType, unsigned nElements, QualType canonType) :
-    VectorType(ExtVector, vecType, nElements, canonType, GenericVector) {}
-  friend class ASTContext;  // ASTContext creates these.
+  friend class ASTContext; // ASTContext creates these.
+
+  ExtVectorType(QualType vecType, unsigned nElements, QualType canonType)
+      : VectorType(ExtVector, vecType, nElements, canonType, GenericVector) {}
+
 public:
   static int getPointAccessorIdx(char c) {
     switch (c) {
@@ -2910,6 +3067,7 @@
     case 'w': case 'a': return 3;
     }
   }
+
   static int getNumericAccessorIdx(char c) {
     switch (c) {
       default: return -1;
@@ -2950,6 +3108,7 @@
       return unsigned(idx-1) < getNumElements();
     return false;
   }
+
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
 
@@ -2960,12 +3119,11 @@
 
 /// FunctionType - C99 6.7.5.3 - Function Declarators.  This is the common base
 /// class of FunctionNoProtoType and FunctionProtoType.
-///
 class FunctionType : public Type {
   // The type returned by the function.
   QualType ResultType;
 
- public:
+public:
   /// A class which abstracts out some details necessary for
   /// making a call.
   ///
@@ -2987,6 +3145,8 @@
   // * AST read and write
   // * Codegen
   class ExtInfo {
+    friend class FunctionType;
+
     // Feel free to rearrange or add bits, but if you go over 11,
     // you'll need to adjust both the Bits field below and
     // Type::FunctionTypeBitfields.
@@ -3005,15 +3165,13 @@
       RegParmOffset = 8
     }; // Assumed to be the last field
 
-    uint16_t Bits;
+    uint16_t Bits = CC_C;
 
     ExtInfo(unsigned Bits) : Bits(static_cast<uint16_t>(Bits)) {}
 
-    friend class FunctionType;
-
    public:
-    // Constructor with no defaults. Use this when you know that you
-    // have all the elements (when reading an AST file for example).
+     // Constructor with no defaults. Use this when you know that you
+     // have all the elements (when reading an AST file for example).
      ExtInfo(bool noReturn, bool hasRegParm, unsigned regParm, CallingConv cc,
              bool producesResult, bool noCallerSavedRegs) {
        assert((!hasRegParm || regParm < 7) && "Invalid regparm value");
@@ -3025,22 +3183,24 @@
 
     // Constructor with all defaults. Use when for example creating a
     // function known to use defaults.
-    ExtInfo() : Bits(CC_C) { }
+    ExtInfo() = default;
 
     // Constructor with just the calling convention, which is an important part
     // of the canonical type.
-    ExtInfo(CallingConv CC) : Bits(CC) { }
+    ExtInfo(CallingConv CC) : Bits(CC) {}
 
     bool getNoReturn() const { return Bits & NoReturnMask; }
     bool getProducesResult() const { return Bits & ProducesResultMask; }
     bool getNoCallerSavedRegs() const { return Bits & NoCallerSavedRegsMask; }
     bool getHasRegParm() const { return (Bits >> RegParmOffset) != 0; }
+
     unsigned getRegParm() const {
       unsigned RegParm = Bits >> RegParmOffset;
       if (RegParm > 0)
         --RegParm;
       return RegParm;
     }
+
     CallingConv getCC() const { return CallingConv(Bits & CallConvMask); }
 
     bool operator==(ExtInfo Other) const {
@@ -3095,11 +3255,12 @@
                bool InstantiationDependent,
                bool VariablyModified, bool ContainsUnexpandedParameterPack,
                ExtInfo Info)
-    : Type(tc, Canonical, Dependent, InstantiationDependent, VariablyModified,
-           ContainsUnexpandedParameterPack),
-      ResultType(res) {
+      : Type(tc, Canonical, Dependent, InstantiationDependent, VariablyModified,
+             ContainsUnexpandedParameterPack),
+        ResultType(res) {
     FunctionTypeBits.ExtInfo = Info.Bits;
   }
+
   unsigned getTypeQuals() const { return FunctionTypeBits.TypeQuals; }
 
 public:
@@ -3107,10 +3268,12 @@
 
   bool getHasRegParm() const { return getExtInfo().getHasRegParm(); }
   unsigned getRegParmType() const { return getExtInfo().getRegParm(); }
+
   /// Determine whether this function type includes the GNU noreturn
   /// attribute. The C++11 [[noreturn]] attribute does not affect the function
   /// type.
   bool getNoReturnAttr() const { return getExtInfo().getNoReturn(); }
+
   CallingConv getCallConv() const { return getExtInfo().getCC(); }
   ExtInfo getExtInfo() const { return ExtInfo(FunctionTypeBits.ExtInfo); }
   bool isConst() const { return getTypeQuals() & Qualifiers::Const; }
@@ -3134,13 +3297,13 @@
 /// Represents a K&R-style 'int foo()' function, which has
 /// no information available about its arguments.
 class FunctionNoProtoType : public FunctionType, public llvm::FoldingSetNode {
-  FunctionNoProtoType(QualType Result, QualType Canonical, ExtInfo Info)
-    : FunctionType(FunctionNoProto, Result, Canonical,
-                   /*Dependent=*/false, /*InstantiationDependent=*/false,
-                   Result->isVariablyModifiedType(),
-                   /*ContainsUnexpandedParameterPack=*/false, Info) {}
+  friend class ASTContext; // ASTContext creates these.
 
-  friend class ASTContext;  // ASTContext creates these.
+  FunctionNoProtoType(QualType Result, QualType Canonical, ExtInfo Info)
+      : FunctionType(FunctionNoProto, Result, Canonical,
+                     /*Dependent=*/false, /*InstantiationDependent=*/false,
+                     Result->isVariablyModifiedType(),
+                     /*ContainsUnexpandedParameterPack=*/false, Info) {}
 
 public:
   // No additional state past what FunctionType provides.
@@ -3151,6 +3314,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getReturnType(), getExtInfo());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType ResultType,
                       ExtInfo Info) {
     Info.Profile(ID);
@@ -3195,10 +3359,10 @@
       HasPassObjSize  = 0x20,
       IsNoEscape      = 0x40,
     };
-    unsigned char Data;
+    unsigned char Data = 0;
 
   public:
-    ExtParameterInfo() : Data(0) {}
+    ExtParameterInfo() = default;
 
     /// Return the ABI treatment of this parameter.
     ParameterABI getABI() const {
@@ -3263,54 +3427,54 @@
   };
 
   struct ExceptionSpecInfo {
-    ExceptionSpecInfo()
-        : Type(EST_None), NoexceptExpr(nullptr),
-          SourceDecl(nullptr), SourceTemplate(nullptr) {}
-
-    ExceptionSpecInfo(ExceptionSpecificationType EST)
-        : Type(EST), NoexceptExpr(nullptr), SourceDecl(nullptr),
-          SourceTemplate(nullptr) {}
-
     /// The kind of exception specification this is.
-    ExceptionSpecificationType Type;
+    ExceptionSpecificationType Type = EST_None;
+
     /// Explicitly-specified list of exception types.
     ArrayRef<QualType> Exceptions;
+
     /// Noexcept expression, if this is EST_ComputedNoexcept.
-    Expr *NoexceptExpr;
+    Expr *NoexceptExpr = nullptr;
+
     /// The function whose exception specification this is, for
     /// EST_Unevaluated and EST_Uninstantiated.
-    FunctionDecl *SourceDecl;
+    FunctionDecl *SourceDecl = nullptr;
+
     /// The function template whose exception specification this is instantiated
     /// from, for EST_Uninstantiated.
-    FunctionDecl *SourceTemplate;
+    FunctionDecl *SourceTemplate = nullptr;
+
+    ExceptionSpecInfo() = default;
+
+    ExceptionSpecInfo(ExceptionSpecificationType EST) : Type(EST) {}
   };
 
   /// Extra information about a function prototype.
   struct ExtProtoInfo {
+    FunctionType::ExtInfo ExtInfo;
+    bool Variadic : 1;
+    bool HasTrailingReturn : 1;
+    unsigned char TypeQuals = 0;
+    RefQualifierKind RefQualifier = RQ_None;
+    ExceptionSpecInfo ExceptionSpec;
+    const ExtParameterInfo *ExtParameterInfos = nullptr;
+
     ExtProtoInfo()
-        : Variadic(false), HasTrailingReturn(false), TypeQuals(0),
-          RefQualifier(RQ_None), ExtParameterInfos(nullptr) {}
+        : Variadic(false), HasTrailingReturn(false) {}
 
     ExtProtoInfo(CallingConv CC)
-        : ExtInfo(CC), Variadic(false), HasTrailingReturn(false), TypeQuals(0),
-          RefQualifier(RQ_None), ExtParameterInfos(nullptr) {}
+        : ExtInfo(CC), Variadic(false), HasTrailingReturn(false) {}
 
     ExtProtoInfo withExceptionSpec(const ExceptionSpecInfo &O) {
       ExtProtoInfo Result(*this);
       Result.ExceptionSpec = O;
       return Result;
     }
-
-    FunctionType::ExtInfo ExtInfo;
-    bool Variadic : 1;
-    bool HasTrailingReturn : 1;
-    unsigned char TypeQuals;
-    RefQualifierKind RefQualifier;
-    ExceptionSpecInfo ExceptionSpec;
-    const ExtParameterInfo *ExtParameterInfos;
   };
 
 private:
+  friend class ASTContext; // ASTContext creates these.
+
   /// \brief Determine whether there are any argument types that
   /// contain an unexpanded parameter pack.
   static bool containsAnyUnexpandedParameterPack(const QualType *ArgArray,
@@ -3362,8 +3526,6 @@
   // for each of the parameters.  This only appears if HasExtParameterInfos
   // is true.
 
-  friend class ASTContext;  // ASTContext creates these.
-
   const ExtParameterInfo *getExtParameterInfosBuffer() const {
     assert(hasExtParameterInfos());
 
@@ -3391,10 +3553,12 @@
 
 public:
   unsigned getNumParams() const { return NumParams; }
+
   QualType getParamType(unsigned i) const {
     assert(i < NumParams && "invalid parameter index");
     return param_type_begin()[i];
   }
+
   ArrayRef<QualType> getParamTypes() const {
     return llvm::makeArrayRef(param_type_begin(), param_type_end());
   }
@@ -3426,31 +3590,47 @@
   ExceptionSpecificationType getExceptionSpecType() const {
     return static_cast<ExceptionSpecificationType>(ExceptionSpecType);
   }
+
   /// Return whether this function has any kind of exception spec.
   bool hasExceptionSpec() const {
     return getExceptionSpecType() != EST_None;
   }
+
   /// Return whether this function has a dynamic (throw) exception spec.
   bool hasDynamicExceptionSpec() const {
     return isDynamicExceptionSpec(getExceptionSpecType());
   }
+
   /// Return whether this function has a noexcept exception spec.
   bool hasNoexceptExceptionSpec() const {
     return isNoexceptExceptionSpec(getExceptionSpecType());
   }
+
   /// Return whether this function has a dependent exception spec.
   bool hasDependentExceptionSpec() const;
+
   /// Return whether this function has an instantiation-dependent exception
   /// spec.
   bool hasInstantiationDependentExceptionSpec() const;
+
   /// Result type of getNoexceptSpec().
   enum NoexceptResult {
-    NR_NoNoexcept,  ///< There is no noexcept specifier.
-    NR_BadNoexcept, ///< The noexcept specifier has a bad expression.
-    NR_Dependent,   ///< The noexcept specifier is dependent.
-    NR_Throw,       ///< The noexcept specifier evaluates to false.
-    NR_Nothrow      ///< The noexcept specifier evaluates to true.
+    /// There is no noexcept specifier.
+    NR_NoNoexcept,
+
+    /// The noexcept specifier has a bad expression.
+    NR_BadNoexcept,
+
+    /// The noexcept specifier is dependent.
+    NR_Dependent,
+
+    /// The noexcept specifier evaluates to false.
+    NR_Throw,
+
+    /// The noexcept specifier evaluates to true.
+    NR_Nothrow
   };
+
   /// Get the meaning of the noexcept spec on this function, if any.
   NoexceptResult getNoexceptSpec(const ASTContext &Ctx) const;
   unsigned getNumExceptions() const { return NumExceptions; }
@@ -3464,6 +3644,7 @@
     // NoexceptExpr sits where the arguments end.
     return *reinterpret_cast<Expr *const *>(param_type_end());
   }
+
   /// \brief If this function type has an exception specification which hasn't
   /// been determined yet (either because it has not been evaluated or because
   /// it has not been instantiated), this is the function whose exception
@@ -3474,6 +3655,7 @@
       return nullptr;
     return reinterpret_cast<FunctionDecl *const *>(param_type_end())[0];
   }
+
   /// \brief If this function type has an uninstantiated exception
   /// specification, this is the function whose exception specification
   /// should be instantiated to find the exception specification for
@@ -3483,9 +3665,11 @@
       return nullptr;
     return reinterpret_cast<FunctionDecl *const *>(param_type_end())[1];
   }
+
   /// Determine whether this function type has a non-throwing exception
   /// specification.
   CanThrowResult canThrow(const ASTContext &Ctx) const;
+
   /// Determine whether this function type has a non-throwing exception
   /// specification. If this depends on template arguments, returns
   /// \c ResultIfDependent.
@@ -3508,34 +3692,37 @@
 
   unsigned getTypeQuals() const { return FunctionType::getTypeQuals(); }
 
-
   /// Retrieve the ref-qualifier associated with this function type.
   RefQualifierKind getRefQualifier() const {
     return static_cast<RefQualifierKind>(FunctionTypeBits.RefQualifier);
   }
 
-  typedef const QualType *param_type_iterator;
-  typedef llvm::iterator_range<param_type_iterator> param_type_range;
+  using param_type_iterator = const QualType *;
+  using param_type_range = llvm::iterator_range<param_type_iterator>;
 
   param_type_range param_types() const {
     return param_type_range(param_type_begin(), param_type_end());
   }
+
   param_type_iterator param_type_begin() const {
     return reinterpret_cast<const QualType *>(this+1);
   }
+
   param_type_iterator param_type_end() const {
     return param_type_begin() + NumParams;
   }
 
-  typedef const QualType *exception_iterator;
+  using exception_iterator = const QualType *;
 
   ArrayRef<QualType> exceptions() const {
     return llvm::makeArrayRef(exception_begin(), exception_end());
   }
+
   exception_iterator exception_begin() const {
     // exceptions begin where arguments end
     return param_type_end();
   }
+
   exception_iterator exception_end() const {
     if (getExceptionSpecType() != EST_Dynamic)
       return exception_begin();
@@ -3550,6 +3737,7 @@
     return ArrayRef<ExtParameterInfo>(getExtParameterInfosBuffer(),
                                       getNumParams());
   }
+
   /// Return a pointer to the beginning of the array of extra parameter
   /// information, if present, or else null if none of the parameters
   /// carry it.  This is equivalent to getExtProtoInfo().ExtParameterInfos.
@@ -3603,15 +3791,16 @@
 ///
 /// Template instantiation turns these into the underlying type.
 class UnresolvedUsingType : public Type {
+  friend class ASTContext; // ASTContext creates these.
+
   UnresolvedUsingTypenameDecl *Decl;
 
   UnresolvedUsingType(const UnresolvedUsingTypenameDecl *D)
-    : Type(UnresolvedUsing, QualType(), true, true, false,
-           /*ContainsUnexpandedParameterPack=*/false),
-      Decl(const_cast<UnresolvedUsingTypenameDecl*>(D)) {}
-  friend class ASTContext; // ASTContext creates these.
-public:
+      : Type(UnresolvedUsing, QualType(), true, true, false,
+             /*ContainsUnexpandedParameterPack=*/false),
+        Decl(const_cast<UnresolvedUsingTypenameDecl*>(D)) {}
 
+public:
   UnresolvedUsingTypenameDecl *getDecl() const { return Decl; }
 
   bool isSugared() const { return false; }
@@ -3624,27 +3813,29 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     return Profile(ID, Decl);
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       UnresolvedUsingTypenameDecl *D) {
     ID.AddPointer(D);
   }
 };
 
-
 class TypedefType : public Type {
   TypedefNameDecl *Decl;
+
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   TypedefType(TypeClass tc, const TypedefNameDecl *D, QualType can)
-    : Type(tc, can, can->isDependentType(),
-           can->isInstantiationDependentType(),
-           can->isVariablyModifiedType(),
-           /*ContainsUnexpandedParameterPack=*/false),
-      Decl(const_cast<TypedefNameDecl*>(D)) {
+      : Type(tc, can, can->isDependentType(),
+             can->isInstantiationDependentType(),
+             can->isVariablyModifiedType(),
+             /*ContainsUnexpandedParameterPack=*/false),
+        Decl(const_cast<TypedefNameDecl*>(D)) {
     assert(!isa<TypedefType>(can) && "Invalid canonical type");
   }
-  friend class ASTContext;  // ASTContext creates these.
-public:
 
+public:
   TypedefNameDecl *getDecl() const { return Decl; }
 
   bool isSugared() const { return true; }
@@ -3658,8 +3849,10 @@
   Expr *TOExpr;
 
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   TypeOfExprType(Expr *E, QualType can = QualType());
-  friend class ASTContext;  // ASTContext creates these.
+
 public:
   Expr *getUnderlyingExpr() const { return TOExpr; }
 
@@ -3684,7 +3877,7 @@
 
 public:
   DependentTypeOfExprType(const ASTContext &Context, Expr *E)
-    : TypeOfExprType(E), Context(Context) { }
+      : TypeOfExprType(E), Context(Context) {}
 
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, Context, getUnderlyingExpr());
@@ -3696,16 +3889,19 @@
 
 /// Represents `typeof(type)`, a GCC extension.
 class TypeOfType : public Type {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType TOType;
+
   TypeOfType(QualType T, QualType can)
-    : Type(TypeOf, can, T->isDependentType(),
-           T->isInstantiationDependentType(),
-           T->isVariablyModifiedType(),
-           T->containsUnexpandedParameterPack()),
-      TOType(T) {
+      : Type(TypeOf, can, T->isDependentType(),
+             T->isInstantiationDependentType(),
+             T->isVariablyModifiedType(),
+             T->containsUnexpandedParameterPack()),
+        TOType(T) {
     assert(!isa<TypedefType>(can) && "Invalid canonical type");
   }
-  friend class ASTContext;  // ASTContext creates these.
+
 public:
   QualType getUnderlyingType() const { return TOType; }
 
@@ -3724,8 +3920,10 @@
   QualType UnderlyingType;
 
 protected:
+  friend class ASTContext; // ASTContext creates these.
+
   DecltypeType(Expr *E, QualType underlyingType, QualType can = QualType());
-  friend class ASTContext;  // ASTContext creates these.
+
 public:
   Expr *getUnderlyingExpr() const { return E; }
   QualType getUnderlyingType() const { return UnderlyingType; }
@@ -3769,14 +3967,18 @@
 private:
   /// The untransformed type.
   QualType BaseType;
+
   /// The transformed type if not dependent, otherwise the same as BaseType.
   QualType UnderlyingType;
 
   UTTKind UKind;
+
 protected:
+  friend class ASTContext;
+
   UnaryTransformType(QualType BaseTy, QualType UnderlyingTy, UTTKind UKind,
                      QualType CanonicalTy);
-  friend class ASTContext;
+
 public:
   bool isSugared() const { return !isDependentType(); }
   QualType desugar() const { return UnderlyingType; }
@@ -3802,6 +4004,7 @@
 public:
   DependentUnaryTransformType(const ASTContext &C, QualType BaseType,
                               UTTKind UKind);
+
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getBaseType(), getUTTKind());
   }
@@ -3814,11 +4017,11 @@
 };
 
 class TagType : public Type {
+  friend class ASTReader;
+
   /// Stores the TagDecl associated with this type. The decl may point to any
   /// TagDecl that declares the entity.
-  TagDecl * decl;
-
-  friend class ASTReader;
+  TagDecl *decl;
 
 protected:
   TagType(TypeClass TC, const TagDecl *D, QualType can);
@@ -3838,13 +4041,14 @@
 /// to detect TagType objects of structs/unions/classes.
 class RecordType : public TagType {
 protected:
-  explicit RecordType(const RecordDecl *D)
-    : TagType(Record, reinterpret_cast<const TagDecl*>(D), QualType()) { }
-  explicit RecordType(TypeClass TC, RecordDecl *D)
-    : TagType(TC, reinterpret_cast<const TagDecl*>(D), QualType()) { }
-  friend class ASTContext;   // ASTContext creates these.
-public:
+  friend class ASTContext; // ASTContext creates these.
 
+  explicit RecordType(const RecordDecl *D)
+      : TagType(Record, reinterpret_cast<const TagDecl*>(D), QualType()) {}
+  explicit RecordType(TypeClass TC, RecordDecl *D)
+      : TagType(TC, reinterpret_cast<const TagDecl*>(D), QualType()) {}
+
+public:
   RecordDecl *getDecl() const {
     return reinterpret_cast<RecordDecl*>(TagType::getDecl());
   }
@@ -3862,11 +4066,12 @@
 /// A helper class that allows the use of isa/cast/dyncast
 /// to detect TagType objects of enums.
 class EnumType : public TagType {
-  explicit EnumType(const EnumDecl *D)
-    : TagType(Enum, reinterpret_cast<const TagDecl*>(D), QualType()) { }
-  friend class ASTContext;   // ASTContext creates these.
-public:
+  friend class ASTContext; // ASTContext creates these.
 
+  explicit EnumType(const EnumDecl *D)
+      : TagType(Enum, reinterpret_cast<const TagDecl*>(D), QualType()) {}
+
+public:
   EnumDecl *getDecl() const {
     return reinterpret_cast<EnumDecl*>(TagType::getDecl());
   }
@@ -3941,11 +4146,11 @@
   };
 
 private:
+  friend class ASTContext; // ASTContext creates these
+
   QualType ModifiedType;
   QualType EquivalentType;
 
-  friend class ASTContext; // creates these
-
   AttributedType(QualType canon, Kind attrKind, QualType modified,
                  QualType equivalent)
       : Type(Attributed, canon, equivalent->isDependentType(),
@@ -4034,6 +4239,8 @@
 };
 
 class TemplateTypeParmType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
+
   // Helper data collector for canonical types.
   struct CanonicalTTPTInfo {
     unsigned Depth : 15;
@@ -4044,31 +4251,30 @@
   union {
     // Info for the canonical type.
     CanonicalTTPTInfo CanTTPTInfo;
+
     // Info for the non-canonical type.
     TemplateTypeParmDecl *TTPDecl;
   };
 
   /// Build a non-canonical type.
   TemplateTypeParmType(TemplateTypeParmDecl *TTPDecl, QualType Canon)
-    : Type(TemplateTypeParm, Canon, /*Dependent=*/true,
-           /*InstantiationDependent=*/true,
-           /*VariablyModified=*/false,
-           Canon->containsUnexpandedParameterPack()),
-      TTPDecl(TTPDecl) { }
+      : Type(TemplateTypeParm, Canon, /*Dependent=*/true,
+             /*InstantiationDependent=*/true,
+             /*VariablyModified=*/false,
+             Canon->containsUnexpandedParameterPack()),
+        TTPDecl(TTPDecl) {}
 
   /// Build the canonical type.
   TemplateTypeParmType(unsigned D, unsigned I, bool PP)
-    : Type(TemplateTypeParm, QualType(this, 0),
-           /*Dependent=*/true,
-           /*InstantiationDependent=*/true,
-           /*VariablyModified=*/false, PP) {
+      : Type(TemplateTypeParm, QualType(this, 0),
+             /*Dependent=*/true,
+             /*InstantiationDependent=*/true,
+             /*VariablyModified=*/false, PP) {
     CanTTPTInfo.Depth = D;
     CanTTPTInfo.Index = I;
     CanTTPTInfo.ParameterPack = PP;
   }
 
-  friend class ASTContext;  // ASTContext creates these
-
   const CanonicalTTPTInfo& getCanTTPTInfo() const {
     QualType Can = getCanonicalTypeInternal();
     return Can->castAs<TemplateTypeParmType>()->CanTTPTInfo;
@@ -4114,17 +4320,17 @@
 /// type was originally written as a template type parameter;
 /// therefore they are never canonical.
 class SubstTemplateTypeParmType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   // The original type parameter.
   const TemplateTypeParmType *Replaced;
 
   SubstTemplateTypeParmType(const TemplateTypeParmType *Param, QualType Canon)
-    : Type(SubstTemplateTypeParm, Canon, Canon->isDependentType(),
-           Canon->isInstantiationDependentType(),
-           Canon->isVariablyModifiedType(),
-           Canon->containsUnexpandedParameterPack()),
-      Replaced(Param) { }
-
-  friend class ASTContext;
+      : Type(SubstTemplateTypeParm, Canon, Canon->isDependentType(),
+             Canon->isInstantiationDependentType(),
+             Canon->isVariablyModifiedType(),
+             Canon->containsUnexpandedParameterPack()),
+        Replaced(Param) {}
 
 public:
   /// Gets the template parameter that was substituted for.
@@ -4144,6 +4350,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getReplacedParameter(), getReplacementType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       const TemplateTypeParmType *Replaced,
                       QualType Replacement) {
@@ -4169,6 +4376,8 @@
 /// arguments), this type will be replaced with the \c SubstTemplateTypeParmType
 /// at the current pack substitution index.
 class SubstTemplateTypeParmPackType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+
   /// \brief The original type parameter.
   const TemplateTypeParmType *Replaced;
 
@@ -4183,8 +4392,6 @@
                                 QualType Canon,
                                 const TemplateArgument &ArgPack);
 
-  friend class ASTContext;
-
 public:
   IdentifierInfo *getIdentifier() const { return Replaced->getIdentifier(); }
 
@@ -4258,6 +4465,8 @@
 
 /// \brief Represents a C++11 auto or C++14 decltype(auto) type.
 class AutoType : public DeducedType, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
+
   AutoType(QualType DeducedAsType, AutoTypeKeyword Keyword,
            bool IsDeducedAsDependent)
       : DeducedType(Auto, DeducedAsType, IsDeducedAsDependent,
@@ -4265,12 +4474,11 @@
     AutoTypeBits.Keyword = (unsigned)Keyword;
   }
 
-  friend class ASTContext;  // ASTContext creates these
-
 public:
   bool isDecltypeAuto() const {
     return getKeyword() == AutoTypeKeyword::DecltypeAuto;
   }
+
   AutoTypeKeyword getKeyword() const {
     return (AutoTypeKeyword)AutoTypeBits.Keyword;
   }
@@ -4294,6 +4502,8 @@
 /// \brief Represents a C++17 deduced template specialization type.
 class DeducedTemplateSpecializationType : public DeducedType,
                                           public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
+
   /// The name of the template whose arguments will be deduced.
   TemplateName Template;
 
@@ -4306,8 +4516,6 @@
                     Template.containsUnexpandedParameterPack()),
         Template(Template) {}
 
-  friend class ASTContext;  // ASTContext creates these
-
 public:
   /// Retrieve the name of the template that we are deducing.
   TemplateName getTemplateName() const { return Template;}
@@ -4351,6 +4559,8 @@
 class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) TemplateSpecializationType
     : public Type,
       public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
+
   /// The name of the template being specialized.  This is
   /// either a TemplateName::Template (in which case it is a
   /// ClassTemplateDecl*, a TemplateTemplateParmDecl*, or a
@@ -4372,8 +4582,6 @@
                              QualType Canon,
                              QualType Aliased);
 
-  friend class ASTContext;  // ASTContext creates these
-
 public:
   /// Determine whether any of the given template arguments are dependent.
   static bool anyDependentTemplateArguments(ArrayRef<TemplateArgumentLoc> Args,
@@ -4382,21 +4590,6 @@
   static bool anyDependentTemplateArguments(const TemplateArgumentListInfo &,
                                             bool &InstantiationDependent);
 
-  /// \brief Print a template argument list, including the '<' and '>'
-  /// enclosing the template arguments.
-  static void PrintTemplateArgumentList(raw_ostream &OS,
-                                        ArrayRef<TemplateArgument> Args,
-                                        const PrintingPolicy &Policy,
-                                        bool SkipBrackets = false);
-
-  static void PrintTemplateArgumentList(raw_ostream &OS,
-                                        ArrayRef<TemplateArgumentLoc> Args,
-                                        const PrintingPolicy &Policy);
-
-  static void PrintTemplateArgumentList(raw_ostream &OS,
-                                        const TemplateArgumentListInfo &,
-                                        const PrintingPolicy &Policy);
-
   /// True if this template specialization type matches a current
   /// instantiation in the context in which it is found.
   bool isCurrentInstantiation() const {
@@ -4427,7 +4620,7 @@
     return *reinterpret_cast<const QualType*>(end());
   }
 
-  typedef const TemplateArgument * iterator;
+  using iterator = const TemplateArgument *;
 
   iterator begin() const { return getArgs(); }
   iterator end() const; // defined inline in TemplateBase.h
@@ -4454,6 +4647,7 @@
   bool isSugared() const {
     return !isDependentType() || isCurrentInstantiation() || isTypeAlias();
   }
+
   QualType desugar() const { return getCanonicalTypeInternal(); }
 
   void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx) {
@@ -4471,6 +4665,20 @@
   }
 };
 
+/// \brief Print a template argument list, including the '<' and '>'
+/// enclosing the template arguments.
+void printTemplateArgumentList(raw_ostream &OS,
+                               ArrayRef<TemplateArgument> Args,
+                               const PrintingPolicy &Policy);
+
+void printTemplateArgumentList(raw_ostream &OS,
+                               ArrayRef<TemplateArgumentLoc> Args,
+                               const PrintingPolicy &Policy);
+
+void printTemplateArgumentList(raw_ostream &OS,
+                               const TemplateArgumentListInfo &Args,
+                               const PrintingPolicy &Policy);
+
 /// The injected class name of a C++ class template or class
 /// template partial specialization.  Used to record that a type was
 /// spelled with a bare identifier rather than as a template-id; the
@@ -4489,6 +4697,12 @@
 /// will canonicalize to the injected class name (when appropriate
 /// according to the rules of the language).
 class InjectedClassNameType : public Type {
+  friend class ASTContext; // ASTContext creates these.
+  friend class ASTNodeImporter;
+  friend class ASTReader; // FIXME: ASTContext::getInjectedClassNameType is not
+                          // currently suitable for AST reading, too much
+                          // interdependencies.
+
   CXXRecordDecl *Decl;
 
   /// The template specialization which this type represents.
@@ -4502,18 +4716,12 @@
   /// and always dependent.
   QualType InjectedType;
 
-  friend class ASTContext; // ASTContext creates these.
-  friend class ASTReader; // FIXME: ASTContext::getInjectedClassNameType is not
-                          // currently suitable for AST reading, too much
-                          // interdependencies.
-  friend class ASTNodeImporter;
-
   InjectedClassNameType(CXXRecordDecl *D, QualType TST)
-    : Type(InjectedClassName, QualType(), /*Dependent=*/true,
-           /*InstantiationDependent=*/true,
-           /*VariablyModified=*/false,
-           /*ContainsUnexpandedParameterPack=*/false),
-      Decl(D), InjectedType(TST) {
+      : Type(InjectedClassName, QualType(), /*Dependent=*/true,
+             /*InstantiationDependent=*/true,
+             /*VariablyModified=*/false,
+             /*ContainsUnexpandedParameterPack=*/false),
+        Decl(D), InjectedType(TST) {
     assert(isa<TemplateSpecializationType>(TST));
     assert(!TST.hasQualifiers());
     assert(TST->isDependentType());
@@ -4521,9 +4729,11 @@
 
 public:
   QualType getInjectedSpecializationType() const { return InjectedType; }
+
   const TemplateSpecializationType *getInjectedTST() const {
     return cast<TemplateSpecializationType>(InjectedType.getTypePtr());
   }
+
   TemplateName getTemplateName() const {
     return getInjectedTST()->getTemplateName();
   }
@@ -4542,12 +4752,16 @@
 enum TagTypeKind {
   /// \brief The "struct" keyword.
   TTK_Struct,
+
   /// \brief The "__interface" keyword.
   TTK_Interface,
+
   /// \brief The "union" keyword.
   TTK_Union,
+
   /// \brief The "class" keyword.
   TTK_Class,
+
   /// \brief The "enum" keyword.
   TTK_Enum
 };
@@ -4557,17 +4771,23 @@
 enum ElaboratedTypeKeyword {
   /// \brief The "struct" keyword introduces the elaborated-type-specifier.
   ETK_Struct,
+
   /// \brief The "__interface" keyword introduces the elaborated-type-specifier.
   ETK_Interface,
+
   /// \brief The "union" keyword introduces the elaborated-type-specifier.
   ETK_Union,
+
   /// \brief The "class" keyword introduces the elaborated-type-specifier.
   ETK_Class,
+
   /// \brief The "enum" keyword introduces the elaborated-type-specifier.
   ETK_Enum,
+
   /// \brief The "typename" keyword precedes the qualified type name, e.g.,
   /// \c typename T::type.
   ETK_Typename,
+
   /// \brief No keyword precedes the qualified type name.
   ETK_None
 };
@@ -4582,8 +4802,8 @@
                   QualType Canonical, bool Dependent,
                   bool InstantiationDependent, bool VariablyModified,
                   bool ContainsUnexpandedParameterPack)
-  : Type(tc, Canonical, Dependent, InstantiationDependent, VariablyModified,
-         ContainsUnexpandedParameterPack) {
+      : Type(tc, Canonical, Dependent, InstantiationDependent, VariablyModified,
+             ContainsUnexpandedParameterPack) {
     TypeWithKeywordBits.Keyword = Keyword;
   }
 
@@ -4628,6 +4848,7 @@
 /// The type itself is always "sugar", used to express what was written
 /// in the source code but containing no additional semantic information.
 class ElaboratedType : public TypeWithKeyword, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
 
   /// The nested name specifier containing the qualifier.
   NestedNameSpecifier *NNS;
@@ -4648,8 +4869,6 @@
            "and name qualifier both null.");
   }
 
-  friend class ASTContext;  // ASTContext creates these
-
 public:
   ~ElaboratedType();
 
@@ -4694,6 +4913,7 @@
 /// mode, this type is used with non-dependent names to delay name lookup until
 /// instantiation.
 class DependentNameType : public TypeWithKeyword, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
 
   /// \brief The nested name specifier containing the qualifier.
   NestedNameSpecifier *NNS;
@@ -4703,13 +4923,11 @@
 
   DependentNameType(ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS,
                     const IdentifierInfo *Name, QualType CanonType)
-    : TypeWithKeyword(Keyword, DependentName, CanonType, /*Dependent=*/true,
-                      /*InstantiationDependent=*/true,
-                      /*VariablyModified=*/false,
-                      NNS->containsUnexpandedParameterPack()),
-      NNS(NNS), Name(Name) {}
-
-  friend class ASTContext;  // ASTContext creates these
+      : TypeWithKeyword(Keyword, DependentName, CanonType, /*Dependent=*/true,
+                        /*InstantiationDependent=*/true,
+                        /*VariablyModified=*/false,
+                        NNS->containsUnexpandedParameterPack()),
+        NNS(NNS), Name(Name) {}
 
 public:
   /// Retrieve the qualification on this type.
@@ -4749,6 +4967,7 @@
 class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) DependentTemplateSpecializationType
     : public TypeWithKeyword,
       public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
 
   /// The nested name specifier containing the qualifier.
   NestedNameSpecifier *NNS;
@@ -4760,20 +4979,19 @@
   /// specialization.
   unsigned NumArgs;
 
-  const TemplateArgument *getArgBuffer() const {
-    return reinterpret_cast<const TemplateArgument*>(this+1);
-  }
-  TemplateArgument *getArgBuffer() {
-    return reinterpret_cast<TemplateArgument*>(this+1);
-  }
-
   DependentTemplateSpecializationType(ElaboratedTypeKeyword Keyword,
                                       NestedNameSpecifier *NNS,
                                       const IdentifierInfo *Name,
                                       ArrayRef<TemplateArgument> Args,
                                       QualType Canon);
 
-  friend class ASTContext;  // ASTContext creates these
+  const TemplateArgument *getArgBuffer() const {
+    return reinterpret_cast<const TemplateArgument*>(this+1);
+  }
+
+  TemplateArgument *getArgBuffer() {
+    return reinterpret_cast<TemplateArgument*>(this+1);
+  }
 
 public:
   NestedNameSpecifier *getQualifier() const { return NNS; }
@@ -4793,7 +5011,8 @@
     return {getArgs(), NumArgs};
   }
 
-  typedef const TemplateArgument * iterator;
+  using iterator = const TemplateArgument *;
+
   iterator begin() const { return getArgs(); }
   iterator end() const; // inline in TemplateBase.h
 
@@ -4839,6 +5058,8 @@
 /// Here, the pack expansion \c Types&... is represented via a
 /// PackExpansionType whose pattern is Types&.
 class PackExpansionType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these
+
   /// \brief The pattern of the pack expansion.
   QualType Pattern;
 
@@ -4852,14 +5073,12 @@
 
   PackExpansionType(QualType Pattern, QualType Canon,
                     Optional<unsigned> NumExpansions)
-    : Type(PackExpansion, Canon, /*Dependent=*/Pattern->isDependentType(),
-           /*InstantiationDependent=*/true,
-           /*VariablyModified=*/Pattern->isVariablyModifiedType(),
-           /*ContainsUnexpandedParameterPack=*/false),
-      Pattern(Pattern),
-      NumExpansions(NumExpansions? *NumExpansions + 1: 0) { }
-
-  friend class ASTContext;  // ASTContext creates these
+      : Type(PackExpansion, Canon, /*Dependent=*/Pattern->isDependentType(),
+             /*InstantiationDependent=*/true,
+             /*VariablyModified=*/Pattern->isVariablyModifiedType(),
+             /*ContainsUnexpandedParameterPack=*/false),
+        Pattern(Pattern),
+        NumExpansions(NumExpansions ? *NumExpansions + 1 : 0) {}
 
 public:
   /// \brief Retrieve the pattern of this pack expansion, which is the
@@ -4901,7 +5120,8 @@
 template <class T>
 class ObjCProtocolQualifiers {
 protected:
-  ObjCProtocolQualifiers() {}
+  ObjCProtocolQualifiers() = default;
+
   ObjCProtocolDecl * const *getProtocolStorage() const {
     return const_cast<ObjCProtocolQualifiers*>(this)->getProtocolStorage();
   }
@@ -4909,9 +5129,11 @@
   ObjCProtocolDecl **getProtocolStorage() {
     return static_cast<T*>(this)->getProtocolStorageImpl();
   }
+
   void setNumProtocols(unsigned N) {
     static_cast<T*>(this)->setNumProtocolsImpl(N);
   }
+
   void initialize(ArrayRef<ObjCProtocolDecl *> protocols) {
     setNumProtocols(protocols.size());
     assert(getNumProtocols() == protocols.size() &&
@@ -4922,8 +5144,8 @@
   }
 
 public:
-  typedef ObjCProtocolDecl * const *qual_iterator;
-  typedef llvm::iterator_range<qual_iterator> qual_range;
+  using qual_iterator = ObjCProtocolDecl * const *;
+  using qual_range = llvm::iterator_range<qual_iterator>;
 
   qual_range quals() const { return qual_range(qual_begin(), qual_end()); }
   qual_iterator qual_begin() const { return getProtocolStorage(); }
@@ -4961,21 +5183,26 @@
   unsigned NumProtocols : 6;
 
   ObjCTypeParamDecl *OTPDecl;
+
   /// The protocols are stored after the ObjCTypeParamType node. In the
   /// canonical type, the list of protocols are sorted alphabetically
   /// and uniqued.
   ObjCProtocolDecl **getProtocolStorageImpl();
+
   /// Return the number of qualifying protocols in this interface type,
   /// or 0 if there are none.
   unsigned getNumProtocolsImpl() const {
     return NumProtocols;
   }
+
   void setNumProtocolsImpl(unsigned N) {
     NumProtocols = N;
   }
+
   ObjCTypeParamType(const ObjCTypeParamDecl *D,
                     QualType can,
                     ArrayRef<ObjCProtocolDecl *> protocols);
+
 public:
   bool isSugared() const { return true; }
   QualType desugar() const { return getCanonicalTypeInternal(); }
@@ -5023,6 +5250,7 @@
 class ObjCObjectType : public Type,
                        public ObjCProtocolQualifiers<ObjCObjectType> {
   friend class ObjCProtocolQualifiers<ObjCObjectType>;
+
   // ObjCObjectType.NumTypeArgs - the number of type arguments stored
   // after the ObjCObjectPointerType node.
   // ObjCObjectType.NumProtocols - the number of protocols stored
@@ -5058,15 +5286,16 @@
   }
 
 protected:
+  enum Nonce_ObjCInterface { Nonce_ObjCInterface };
+
   ObjCObjectType(QualType Canonical, QualType Base,
                  ArrayRef<QualType> typeArgs,
                  ArrayRef<ObjCProtocolDecl *> protocols,
                  bool isKindOf);
 
-  enum Nonce_ObjCInterface { Nonce_ObjCInterface };
   ObjCObjectType(enum Nonce_ObjCInterface)
         : Type(ObjCInterface, QualType(), false, false, false, false),
-      BaseType(QualType(this_(), 0)) {
+          BaseType(QualType(this_(), 0)) {
     ObjCObjectTypeBits.NumProtocols = 0;
     ObjCObjectTypeBits.NumTypeArgs = 0;
     ObjCObjectTypeBits.IsKindOf = 0;
@@ -5086,9 +5315,11 @@
   bool isObjCId() const {
     return getBaseType()->isSpecificBuiltinType(BuiltinType::ObjCId);
   }
+
   bool isObjCClass() const {
     return getBaseType()->isSpecificBuiltinType(BuiltinType::ObjCClass);
   }
+
   bool isObjCUnqualifiedId() const { return qual_empty() && isObjCId(); }
   bool isObjCUnqualifiedClass() const { return qual_empty() && isObjCClass(); }
   bool isObjCUnqualifiedIdOrClass() const {
@@ -5179,7 +5410,7 @@
                      ArrayRef<QualType> typeArgs,
                      ArrayRef<ObjCProtocolDecl *> protocols,
                      bool isKindOf)
-    : ObjCObjectType(Canonical, Base, typeArgs, protocols, isKindOf) {}
+      : ObjCObjectType(Canonical, Base, typeArgs, protocols, isKindOf) {}
 
 public:
   void Profile(llvm::FoldingSetNodeID &ID);
@@ -5217,14 +5448,15 @@
 ///   - It is its own base type.  That is, if T is an ObjCInterfaceType*,
 ///     T->getBaseType() == QualType(T, 0).
 class ObjCInterfaceType : public ObjCObjectType {
+  friend class ASTContext; // ASTContext creates these.
+  friend class ASTReader;
+  friend class ObjCInterfaceDecl;
+
   mutable ObjCInterfaceDecl *Decl;
 
   ObjCInterfaceType(const ObjCInterfaceDecl *D)
-    : ObjCObjectType(Nonce_ObjCInterface),
-      Decl(const_cast<ObjCInterfaceDecl*>(D)) {}
-  friend class ASTContext;  // ASTContext creates these.
-  friend class ASTReader;
-  friend class ObjCInterfaceDecl;
+      : ObjCObjectType(Nonce_ObjCInterface),
+        Decl(const_cast<ObjCInterfaceDecl*>(D)) {}
 
 public:
   /// Get the declaration of this interface.
@@ -5272,16 +5504,17 @@
 /// Pointers to pointers to Objective C objects are still PointerTypes;
 /// only the first level of pointer gets it own type implementation.
 class ObjCObjectPointerType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType PointeeType;
 
   ObjCObjectPointerType(QualType Canonical, QualType Pointee)
-    : Type(ObjCObjectPointer, Canonical,
-           Pointee->isDependentType(),
-           Pointee->isInstantiationDependentType(),
-           Pointee->isVariablyModifiedType(),
-           Pointee->containsUnexpandedParameterPack()),
-      PointeeType(Pointee) {}
-  friend class ASTContext;  // ASTContext creates these.
+      : Type(ObjCObjectPointer, Canonical,
+             Pointee->isDependentType(),
+             Pointee->isInstantiationDependentType(),
+             Pointee->isVariablyModifiedType(),
+             Pointee->containsUnexpandedParameterPack()),
+        PointeeType(Pointee) {}
 
 public:
   /// Gets the type pointed to by this ObjC pointer.
@@ -5390,16 +5623,19 @@
   /// An iterator over the qualifiers on the object type.  Provided
   /// for convenience.  This will always iterate over the full set of
   /// protocols on a type, not just those provided directly.
-  typedef ObjCObjectType::qual_iterator qual_iterator;
-  typedef llvm::iterator_range<qual_iterator> qual_range;
+  using qual_iterator = ObjCObjectType::qual_iterator;
+  using qual_range = llvm::iterator_range<qual_iterator>;
 
   qual_range quals() const { return qual_range(qual_begin(), qual_end()); }
+
   qual_iterator qual_begin() const {
     return getObjectType()->qual_begin();
   }
+
   qual_iterator qual_end() const {
     return getObjectType()->qual_end();
   }
+
   bool qual_empty() const { return getObjectType()->qual_empty(); }
 
   /// Return the number of qualifying protocols on the object type.
@@ -5431,26 +5667,29 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getPointeeType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType T) {
     ID.AddPointer(T.getAsOpaquePtr());
   }
+
   static bool classof(const Type *T) {
     return T->getTypeClass() == ObjCObjectPointer;
   }
 };
 
 class AtomicType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType ValueType;
 
   AtomicType(QualType ValTy, QualType Canonical)
-    : Type(Atomic, Canonical, ValTy->isDependentType(),
-           ValTy->isInstantiationDependentType(),
-           ValTy->isVariablyModifiedType(),
-           ValTy->containsUnexpandedParameterPack()),
-      ValueType(ValTy) {}
-  friend class ASTContext;  // ASTContext creates these.
+      : Type(Atomic, Canonical, ValTy->isDependentType(),
+             ValTy->isInstantiationDependentType(),
+             ValTy->isVariablyModifiedType(),
+             ValTy->containsUnexpandedParameterPack()),
+        ValueType(ValTy) {}
 
-  public:
+public:
   /// Gets the type contained by this atomic type, i.e.
   /// the type returned by performing an atomic load of this atomic type.
   QualType getValueType() const { return ValueType; }
@@ -5461,9 +5700,11 @@
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getValueType());
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, QualType T) {
     ID.AddPointer(T.getAsOpaquePtr());
   }
+
   static bool classof(const Type *T) {
     return T->getTypeClass() == Atomic;
   }
@@ -5471,16 +5712,17 @@
 
 /// PipeType - OpenCL20.
 class PipeType : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext; // ASTContext creates these.
+
   QualType ElementType;
   bool isRead;
 
-  PipeType(QualType elemType, QualType CanonicalPtr, bool isRead) :
-    Type(Pipe, CanonicalPtr, elemType->isDependentType(),
-         elemType->isInstantiationDependentType(),
-         elemType->isVariablyModifiedType(),
-         elemType->containsUnexpandedParameterPack()),
-    ElementType(elemType), isRead(isRead) {}
-  friend class ASTContext;  // ASTContext creates these.
+  PipeType(QualType elemType, QualType CanonicalPtr, bool isRead)
+      : Type(Pipe, CanonicalPtr, elemType->isDependentType(),
+             elemType->isInstantiationDependentType(),
+             elemType->isVariablyModifiedType(),
+             elemType->containsUnexpandedParameterPack()),
+        ElementType(elemType), isRead(isRead) {}
 
 public:
   QualType getElementType() const { return ElementType; }
@@ -5530,7 +5772,6 @@
   QualType apply(const ASTContext &Context, const Type* T) const;
 };
 
-
 // Inline function definitions.
 
 inline SplitQualType SplitQualType::getSingleStepDesugaredType() const {
@@ -5763,91 +6004,117 @@
 inline bool Type::isFunctionType() const {
   return isa<FunctionType>(CanonicalType);
 }
+
 inline bool Type::isPointerType() const {
   return isa<PointerType>(CanonicalType);
 }
+
 inline bool Type::isAnyPointerType() const {
   return isPointerType() || isObjCObjectPointerType();
 }
+
 inline bool Type::isBlockPointerType() const {
   return isa<BlockPointerType>(CanonicalType);
 }
+
 inline bool Type::isReferenceType() const {
   return isa<ReferenceType>(CanonicalType);
 }
+
 inline bool Type::isLValueReferenceType() const {
   return isa<LValueReferenceType>(CanonicalType);
 }
+
 inline bool Type::isRValueReferenceType() const {
   return isa<RValueReferenceType>(CanonicalType);
 }
+
 inline bool Type::isFunctionPointerType() const {
   if (const PointerType *T = getAs<PointerType>())
     return T->getPointeeType()->isFunctionType();
   else
     return false;
 }
+
 inline bool Type::isMemberPointerType() const {
   return isa<MemberPointerType>(CanonicalType);
 }
+
 inline bool Type::isMemberFunctionPointerType() const {
   if (const MemberPointerType* T = getAs<MemberPointerType>())
     return T->isMemberFunctionPointer();
   else
     return false;
 }
+
 inline bool Type::isMemberDataPointerType() const {
   if (const MemberPointerType* T = getAs<MemberPointerType>())
     return T->isMemberDataPointer();
   else
     return false;
 }
+
 inline bool Type::isArrayType() const {
   return isa<ArrayType>(CanonicalType);
 }
+
 inline bool Type::isConstantArrayType() const {
   return isa<ConstantArrayType>(CanonicalType);
 }
+
 inline bool Type::isIncompleteArrayType() const {
   return isa<IncompleteArrayType>(CanonicalType);
 }
+
 inline bool Type::isVariableArrayType() const {
   return isa<VariableArrayType>(CanonicalType);
 }
+
 inline bool Type::isDependentSizedArrayType() const {
   return isa<DependentSizedArrayType>(CanonicalType);
 }
+
 inline bool Type::isBuiltinType() const {
   return isa<BuiltinType>(CanonicalType);
 }
+
 inline bool Type::isRecordType() const {
   return isa<RecordType>(CanonicalType);
 }
+
 inline bool Type::isEnumeralType() const {
   return isa<EnumType>(CanonicalType);
 }
+
 inline bool Type::isAnyComplexType() const {
   return isa<ComplexType>(CanonicalType);
 }
+
 inline bool Type::isVectorType() const {
   return isa<VectorType>(CanonicalType);
 }
+
 inline bool Type::isExtVectorType() const {
   return isa<ExtVectorType>(CanonicalType);
 }
+
 inline bool Type::isDependentAddressSpaceType() const {
   return isa<DependentAddressSpaceType>(CanonicalType);
 }
+
 inline bool Type::isObjCObjectPointerType() const {
   return isa<ObjCObjectPointerType>(CanonicalType);
 }
+
 inline bool Type::isObjCObjectType() const {
   return isa<ObjCObjectType>(CanonicalType);
 }
+
 inline bool Type::isObjCObjectOrInterfaceType() const {
   return isa<ObjCInterfaceType>(CanonicalType) ||
     isa<ObjCObjectType>(CanonicalType);
 }
+
 inline bool Type::isAtomicType() const {
   return isa<AtomicType>(CanonicalType);
 }
@@ -5857,26 +6124,31 @@
     return OPT->isObjCQualifiedIdType();
   return false;
 }
+
 inline bool Type::isObjCQualifiedClassType() const {
   if (const ObjCObjectPointerType *OPT = getAs<ObjCObjectPointerType>())
     return OPT->isObjCQualifiedClassType();
   return false;
 }
+
 inline bool Type::isObjCIdType() const {
   if (const ObjCObjectPointerType *OPT = getAs<ObjCObjectPointerType>())
     return OPT->isObjCIdType();
   return false;
 }
+
 inline bool Type::isObjCClassType() const {
   if (const ObjCObjectPointerType *OPT = getAs<ObjCObjectPointerType>())
     return OPT->isObjCClassType();
   return false;
 }
+
 inline bool Type::isObjCSelType() const {
   if (const PointerType *OPT = getAs<PointerType>())
     return OPT->getPointeeType()->isSpecificBuiltinType(BuiltinType::ObjCSel);
   return false;
 }
+
 inline bool Type::isObjCBuiltinType() const {
   return isObjCIdType() || isObjCClassType() || isObjCSelType();
 }
@@ -5911,7 +6183,7 @@
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) is##Id##Type() ||
   return
 #include "clang/Basic/OpenCLImageTypes.def"
-      0; // end boolean or operation
+      false; // end boolean or operation
 }
 
 inline bool Type::isPipeType() const {
@@ -5973,6 +6245,12 @@
   return false;
 }
 
+inline bool Type::isFloat16Type() const {
+  if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+    return BT->getKind() == BuiltinType::Float16;
+  return false;
+}
+
 inline bool Type::isNullPtrType() const {
   if (const BuiltinType *BT = getAs<BuiltinType>())
     return BT->getKind() == BuiltinType::NullPtr;
@@ -6190,7 +6468,6 @@
   return cast<PointerType>(Decayed)->getPointeeType();
 }
 
+} // namespace clang
 
-}  // end namespace clang
-
-#endif
+#endif // LLVM_CLANG_AST_TYPE_H
diff --git a/include/clang/AST/TypeLoc.h b/include/clang/AST/TypeLoc.h
index 283c421..b805160 100644
--- a/include/clang/AST/TypeLoc.h
+++ b/include/clang/AST/TypeLoc.h
@@ -1,4 +1,4 @@
-//===--- TypeLoc.h - Type Source Info Wrapper -------------------*- C++ -*-===//
+//===- TypeLoc.h - Type Source Info Wrapper ---------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,26 +6,42 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::TypeLoc interface and its subclasses.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_TYPELOC_H
 #define LLVM_CLANG_AST_TYPELOC_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
 
 namespace clang {
-  class ASTContext;
-  class ParmVarDecl;
-  class TypeSourceInfo;
-  class UnqualTypeLoc;
+
+class ASTContext;
+class CXXRecordDecl;
+class Expr;
+class ObjCInterfaceDecl;
+class ObjCProtocolDecl;
+class ObjCTypeParamDecl;
+class TemplateTypeParmDecl;
+class UnqualTypeLoc;
+class UnresolvedUsingTypenameDecl;
 
 // Predeclare all the type nodes.
 #define ABSTRACT_TYPELOC(Class, Base)
@@ -41,10 +57,16 @@
 protected:
   // The correctness of this relies on the property that, for Type *Ty,
   //   QualType(Ty, 0).getAsOpaquePtr() == (void*) Ty
-  const void *Ty;
-  void *Data;
+  const void *Ty = nullptr;
+  void *Data = nullptr;
 
 public:
+  TypeLoc() = default;
+  TypeLoc(QualType ty, void *opaqueData)
+      : Ty(ty.getAsOpaquePtr()), Data(opaqueData) {}
+  TypeLoc(const Type *ty, void *opaqueData)
+      : Ty(ty), Data(opaqueData) {}
+
   /// \brief Convert to the specified TypeLoc type, asserting that this TypeLoc
   /// is of the desired type.
   ///
@@ -88,12 +110,6 @@
     Qualified
   };
 
-  TypeLoc() : Ty(nullptr), Data(nullptr) { }
-  TypeLoc(QualType ty, void *opaqueData)
-    : Ty(ty.getAsOpaquePtr()), Data(opaqueData) { }
-  TypeLoc(const Type *ty, void *opaqueData)
-    : Ty(ty), Data(opaqueData) { }
-
   TypeLocClass getTypeLocClass() const {
     if (getType().hasLocalQualifiers()) return Qualified;
     return (TypeLocClass) getType()->getTypeClass();
@@ -134,6 +150,7 @@
   SourceRange getSourceRange() const LLVM_READONLY {
     return SourceRange(getBeginLoc(), getEndLoc());
   }
+
   SourceLocation getLocStart() const LLVM_READONLY { return getBeginLoc(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return getEndLoc(); }
 
@@ -228,7 +245,7 @@
 /// no direct qualifiers.
 class UnqualTypeLoc : public TypeLoc {
 public:
-  UnqualTypeLoc() {}
+  UnqualTypeLoc() = default;
   UnqualTypeLoc(const Type *Ty, void *Data) : TypeLoc(Ty, Data) {}
 
   const Type *getTypePtr() const {
@@ -241,6 +258,7 @@
 
 private:
   friend class TypeLoc;
+
   static bool isKind(const TypeLoc &TL) {
     return !TL.getType().hasLocalQualifiers();
   }
@@ -253,9 +271,7 @@
 /// type qualifiers.
 class QualifiedTypeLoc : public TypeLoc {
 public:
-  SourceRange getLocalSourceRange() const {
-    return SourceRange();
-  }
+  SourceRange getLocalSourceRange() const { return {}; }
 
   UnqualTypeLoc getUnqualifiedLoc() const {
     unsigned align =
@@ -296,6 +312,7 @@
 
 private:
   friend class TypeLoc;
+
   static bool isKind(const TypeLoc &TL) {
     return TL.getType().hasLocalQualifiers();
   }
@@ -337,12 +354,12 @@
 /// InheritingConcreteTypeLoc instead.
 template <class Base, class Derived, class TypeClass, class LocalData>
 class ConcreteTypeLoc : public Base {
+  friend class TypeLoc;
 
   const Derived *asDerived() const {
     return static_cast<const Derived*>(this);
   }
 
-  friend class TypeLoc;
   static bool isKind(const TypeLoc &TL) {
     return !TL.getType().hasLocalQualifiers() &&
            Derived::classofType(TL.getTypePtr());
@@ -357,6 +374,7 @@
     return std::max(unsigned(alignof(LocalData)),
                     asDerived()->getExtraLocalDataAlignment());
   }
+
   unsigned getLocalDataSize() const {
     unsigned size = sizeof(LocalData);
     unsigned extraAlign = asDerived()->getExtraLocalDataAlignment();
@@ -449,9 +467,7 @@
     return TypeLoc::getLocalAlignmentForType(T);
   }
 
-  TypeLoc getNextTypeLoc(HasNoInnerType _) const {
-    return TypeLoc();
-  }
+  TypeLoc getNextTypeLoc(HasNoInnerType _) const { return {}; }
 
   TypeLoc getNextTypeLoc(QualType T) const {
     return TypeLoc(T, getNonLocalData());
@@ -464,6 +480,7 @@
 template <class Base, class Derived, class TypeClass>
 class InheritingConcreteTypeLoc : public Base {
   friend class TypeLoc;
+
   static bool classofType(const Type *Ty) {
     return TypeClass::classof(Ty);
   }
@@ -482,7 +499,6 @@
   }
 };
 
-
 struct TypeSpecLocInfo {
   SourceLocation NameLoc;
 };
@@ -502,22 +518,25 @@
   SourceLocation getNameLoc() const {
     return this->getLocalData()->NameLoc;
   }
+
   void setNameLoc(SourceLocation Loc) {
     this->getLocalData()->NameLoc = Loc;
   }
+
   SourceRange getLocalSourceRange() const {
     return SourceRange(getNameLoc(), getNameLoc());
   }
+
   void initializeLocal(ASTContext &Context, SourceLocation Loc) {
     setNameLoc(Loc);
   }
 
 private:
   friend class TypeLoc;
+
   static bool isKind(const TypeLoc &TL);
 };
 
-
 struct BuiltinLocInfo {
   SourceRange BuiltinRange;
 };
@@ -531,9 +550,11 @@
   SourceLocation getBuiltinLoc() const {
     return getLocalData()->BuiltinRange.getBegin();
   }
+
   void setBuiltinLoc(SourceLocation Loc) {
     getLocalData()->BuiltinRange = Loc;
   }
+
   void expandBuiltinRange(SourceRange Range) {
     SourceRange &BuiltinRange = getLocalData()->BuiltinRange;
     if (!BuiltinRange.getBegin().isValid()) {
@@ -579,9 +600,11 @@
     else
       return TSS_unspecified;
   }
+
   bool hasWrittenSignSpec() const {
     return getWrittenSignSpec() != TSS_unspecified;
   }
+
   void setWrittenSignSpec(TypeSpecifierSign written) {
     if (needsExtraLocalData())
       getWrittenBuiltinSpecs().Sign = written;
@@ -593,18 +616,22 @@
     else
       return TSW_unspecified;
   }
+
   bool hasWrittenWidthSpec() const {
     return getWrittenWidthSpec() != TSW_unspecified;
   }
+
   void setWrittenWidthSpec(TypeSpecifierWidth written) {
     if (needsExtraLocalData())
       getWrittenBuiltinSpecs().Width = written;
   }
 
   TypeSpecifierType getWrittenTypeSpec() const;
+
   bool hasWrittenTypeSpec() const {
     return getWrittenTypeSpec() != TST_unspecified;
   }
+
   void setWrittenTypeSpec(TypeSpecifierType written) {
     if (needsExtraLocalData())
       getWrittenBuiltinSpecs().Type = written;
@@ -616,6 +643,7 @@
     else
       return false;
   }
+
   void setModeAttr(bool written) {
     if (needsExtraLocalData())
       getWrittenBuiltinSpecs().ModeAttr = written;
@@ -633,7 +661,6 @@
   }
 };
 
-
 /// \brief Wrapper for source info for typedefs.
 class TypedefTypeLoc : public InheritingConcreteTypeLoc<TypeSpecTypeLoc,
                                                         TypedefTypeLoc,
@@ -742,6 +769,7 @@
       *((SourceLocation*)this->getExtraLocalData()) :
       SourceLocation();
   }
+
   void setProtocolLAngleLoc(SourceLocation Loc) {
     *((SourceLocation*)this->getExtraLocalData()) = Loc;
   }
@@ -751,6 +779,7 @@
       *((SourceLocation*)this->getExtraLocalData() + 1) :
       SourceLocation();
   }
+
   void setProtocolRAngleLoc(SourceLocation Loc) {
     *((SourceLocation*)this->getExtraLocalData() + 1) = Loc;
   }
@@ -763,6 +792,7 @@
     assert(i < getNumProtocols() && "Index is out of bounds!");
     return getProtocolLocArray()[i];
   }
+
   void setProtocolLoc(unsigned i, SourceLocation Loc) {
     assert(i < getNumProtocols() && "Index is out of bounds!");
     getProtocolLocArray()[i] = Loc;
@@ -785,9 +815,11 @@
     // as well.
     return (this->getNumProtocols() + 2) * sizeof(SourceLocation) ;
   }
+
   unsigned getExtraLocalDataAlignment() const {
     return alignof(SourceLocation);
   }
+
   SourceRange getLocalSourceRange() const {
     SourceLocation start = getNameLoc();
     SourceLocation end = getProtocolRAngleLoc();
@@ -938,7 +970,6 @@
   }
 };
 
-
 struct ObjCObjectTypeLocInfo {
   SourceLocation TypeArgsLAngleLoc;
   SourceLocation TypeArgsRAngleLoc;
@@ -971,6 +1002,7 @@
   SourceLocation getTypeArgsLAngleLoc() const {
     return this->getLocalData()->TypeArgsLAngleLoc;
   }
+
   void setTypeArgsLAngleLoc(SourceLocation Loc) {
     this->getLocalData()->TypeArgsLAngleLoc = Loc;
   }
@@ -978,6 +1010,7 @@
   SourceLocation getTypeArgsRAngleLoc() const {
     return this->getLocalData()->TypeArgsRAngleLoc;
   }
+
   void setTypeArgsRAngleLoc(SourceLocation Loc) {
     this->getLocalData()->TypeArgsRAngleLoc = Loc;
   }
@@ -999,6 +1032,7 @@
   SourceLocation getProtocolLAngleLoc() const {
     return this->getLocalData()->ProtocolLAngleLoc;
   }
+
   void setProtocolLAngleLoc(SourceLocation Loc) {
     this->getLocalData()->ProtocolLAngleLoc = Loc;
   }
@@ -1006,6 +1040,7 @@
   SourceLocation getProtocolRAngleLoc() const {
     return this->getLocalData()->ProtocolRAngleLoc;
   }
+
   void setProtocolRAngleLoc(SourceLocation Loc) {
     this->getLocalData()->ProtocolRAngleLoc = Loc;
   }
@@ -1018,6 +1053,7 @@
     assert(i < getNumProtocols() && "Index is out of bounds!");
     return getProtocolLocArray()[i];
   }
+
   void setProtocolLoc(unsigned i, SourceLocation Loc) {
     assert(i < getNumProtocols() && "Index is out of bounds!");
     getProtocolLocArray()[i] = Loc;
@@ -1073,7 +1109,6 @@
   }
 };
 
-
 struct ObjCInterfaceLocInfo {
   SourceLocation NameLoc;
   SourceLocation NameEndLoc;
@@ -1127,12 +1162,15 @@
   SourceLocation getLParenLoc() const {
     return this->getLocalData()->LParenLoc;
   }
+
   SourceLocation getRParenLoc() const {
     return this->getLocalData()->RParenLoc;
   }
+
   void setLParenLoc(SourceLocation Loc) {
     this->getLocalData()->LParenLoc = Loc;
   }
+
   void setRParenLoc(SourceLocation Loc) {
     this->getLocalData()->RParenLoc = Loc;
   }
@@ -1161,8 +1199,7 @@
   return *this;
 }
 
-
-struct AdjustedLocInfo { }; // Nothing.
+struct AdjustedLocInfo {}; // Nothing.
 
 class AdjustedTypeLoc : public ConcreteTypeLoc<UnqualTypeLoc, AdjustedTypeLoc,
                                                AdjustedType, AdjustedLocInfo> {
@@ -1181,9 +1218,7 @@
     return getTypePtr()->getOriginalType();
   }
 
-  SourceRange getLocalSourceRange() const {
-    return SourceRange();
-  }
+  SourceRange getLocalSourceRange() const { return {}; }
 
   unsigned getLocalDataSize() const {
     // sizeof(AdjustedLocInfo) is 1, but we don't need its address to be unique
@@ -1210,6 +1245,7 @@
   SourceLocation getSigilLoc() const {
     return this->getLocalData()->StarLoc;
   }
+
   void setSigilLoc(SourceLocation Loc) {
     this->getLocalData()->StarLoc = Loc;
   }
@@ -1231,7 +1267,6 @@
   }
 };
 
-
 /// \brief Wrapper for source info for pointers.
 class PointerTypeLoc : public PointerLikeTypeLoc<PointerTypeLoc,
                                                  PointerType> {
@@ -1239,12 +1274,12 @@
   SourceLocation getStarLoc() const {
     return getSigilLoc();
   }
+
   void setStarLoc(SourceLocation Loc) {
     setSigilLoc(Loc);
   }
 };
 
-
 /// \brief Wrapper for source info for block pointers.
 class BlockPointerTypeLoc : public PointerLikeTypeLoc<BlockPointerTypeLoc,
                                                       BlockPointerType> {
@@ -1252,6 +1287,7 @@
   SourceLocation getCaretLoc() const {
     return getSigilLoc();
   }
+
   void setCaretLoc(SourceLocation Loc) {
     setSigilLoc(Loc);
   }
@@ -1269,6 +1305,7 @@
   SourceLocation getStarLoc() const {
     return getSigilLoc();
   }
+
   void setStarLoc(SourceLocation Loc) {
     setSigilLoc(Loc);
   }
@@ -1276,9 +1313,11 @@
   const Type *getClass() const {
     return getTypePtr()->getClass();
   }
+
   TypeSourceInfo *getClassTInfo() const {
     return getLocalData()->ClassTInfo;
   }
+
   void setClassTInfo(TypeSourceInfo* TI) {
     getLocalData()->ClassTInfo = TI;
   }
@@ -1310,7 +1349,6 @@
   }
 };
 
-
 class ReferenceTypeLoc : public PointerLikeTypeLoc<ReferenceTypeLoc,
                                                    ReferenceType> {
 public:
@@ -1327,6 +1365,7 @@
   SourceLocation getAmpLoc() const {
     return getSigilLoc();
   }
+
   void setAmpLoc(SourceLocation Loc) {
     setSigilLoc(Loc);
   }
@@ -1340,12 +1379,12 @@
   SourceLocation getAmpAmpLoc() const {
     return getSigilLoc();
   }
+
   void setAmpAmpLoc(SourceLocation Loc) {
     setSigilLoc(Loc);
   }
 };
 
-
 struct FunctionLocInfo {
   SourceLocation LocalRangeBegin;
   SourceLocation LParenLoc;
@@ -1371,10 +1410,12 @@
     // exception specification information.
     return (SourceRange *)(getParmArray() + getNumParams());
   }
+
 public:
   SourceLocation getLocalRangeBegin() const {
     return getLocalData()->LocalRangeBegin;
   }
+
   void setLocalRangeBegin(SourceLocation L) {
     getLocalData()->LocalRangeBegin = L;
   }
@@ -1382,6 +1423,7 @@
   SourceLocation getLocalRangeEnd() const {
     return getLocalData()->LocalRangeEnd;
   }
+
   void setLocalRangeEnd(SourceLocation L) {
     getLocalData()->LocalRangeEnd = L;
   }
@@ -1389,6 +1431,7 @@
   SourceLocation getLParenLoc() const {
     return this->getLocalData()->LParenLoc;
   }
+
   void setLParenLoc(SourceLocation Loc) {
     this->getLocalData()->LParenLoc = Loc;
   }
@@ -1396,6 +1439,7 @@
   SourceLocation getRParenLoc() const {
     return this->getLocalData()->RParenLoc;
   }
+
   void setRParenLoc(SourceLocation Loc) {
     this->getLocalData()->RParenLoc = Loc;
   }
@@ -1407,8 +1451,9 @@
   SourceRange getExceptionSpecRange() const {
     if (hasExceptionSpec())
       return *getExceptionSpecRangePtr();
-    return SourceRange();
+    return {};
   }
+
   void setExceptionSpecRange(SourceRange R) {
     if (hasExceptionSpec())
       *getExceptionSpecRangePtr() = R;
@@ -1428,6 +1473,7 @@
       return 0;
     return cast<FunctionProtoType>(getTypePtr())->getNumParams();
   }
+
   ParmVarDecl *getParam(unsigned i) const { return getParmArray()[i]; }
   void setParam(unsigned i, ParmVarDecl *VD) { getParmArray()[i] = VD; }
 
@@ -1474,7 +1520,6 @@
                                      FunctionNoProtoType> {
 };
 
-
 struct ArrayLocInfo {
   SourceLocation LBracketLoc, RBracketLoc;
   Expr *Size;
@@ -1489,6 +1534,7 @@
   SourceLocation getLBracketLoc() const {
     return getLocalData()->LBracketLoc;
   }
+
   void setLBracketLoc(SourceLocation Loc) {
     getLocalData()->LBracketLoc = Loc;
   }
@@ -1496,6 +1542,7 @@
   SourceLocation getRBracketLoc() const {
     return getLocalData()->RBracketLoc;
   }
+
   void setRBracketLoc(SourceLocation Loc) {
     getLocalData()->RBracketLoc = Loc;
   }
@@ -1507,6 +1554,7 @@
   Expr *getSizeExpr() const {
     return getLocalData()->Size;
   }
+
   void setSizeExpr(Expr *Size) {
     getLocalData()->Size = Size;
   }
@@ -1557,7 +1605,6 @@
                                      VariableArrayType> {
 };
 
-
 // Location information for a TemplateName.  Rudimentary for now.
 struct TemplateNameLocInfo {
   SourceLocation NameLoc;
@@ -1578,6 +1625,7 @@
   SourceLocation getTemplateKeywordLoc() const {
     return getLocalData()->TemplateKWLoc;
   }
+
   void setTemplateKeywordLoc(SourceLocation Loc) {
     getLocalData()->TemplateKWLoc = Loc;
   }
@@ -1585,6 +1633,7 @@
   SourceLocation getLAngleLoc() const {
     return getLocalData()->LAngleLoc;
   }
+
   void setLAngleLoc(SourceLocation Loc) {
     getLocalData()->LAngleLoc = Loc;
   }
@@ -1592,6 +1641,7 @@
   SourceLocation getRAngleLoc() const {
     return getLocalData()->RAngleLoc;
   }
+
   void setRAngleLoc(SourceLocation Loc) {
     getLocalData()->RAngleLoc = Loc;
   }
@@ -1599,9 +1649,11 @@
   unsigned getNumArgs() const {
     return getTypePtr()->getNumArgs();
   }
+
   void setArgLocInfo(unsigned i, TemplateArgumentLocInfo AI) {
     getArgInfos()[i] = AI;
   }
+
   TemplateArgumentLocInfo getArgLocInfo(unsigned i) const {
     return getArgInfos()[i];
   }
@@ -1613,6 +1665,7 @@
   SourceLocation getTemplateNameLoc() const {
     return getLocalData()->NameLoc;
   }
+
   void setTemplateNameLoc(SourceLocation Loc) {
     getLocalData()->NameLoc = Loc;
   }
@@ -1675,9 +1728,7 @@
                              DependentAddressSpaceTypeLoc,
                              DependentAddressSpaceType,
                              DependentAddressSpaceLocInfo> {
-
-  public:
-
+public:
   /// The location of the attribute name, i.e.
   ///    int * __attribute__((address_space(11)))
   ///                         ^~~~~~~~~~~~~
@@ -1787,6 +1838,7 @@
   SourceLocation getTypeofLoc() const {
     return this->getLocalData()->TypeofLoc;
   }
+
   void setTypeofLoc(SourceLocation Loc) {
     this->getLocalData()->TypeofLoc = Loc;
   }
@@ -1794,6 +1846,7 @@
   SourceLocation getLParenLoc() const {
     return this->getLocalData()->LParenLoc;
   }
+
   void setLParenLoc(SourceLocation Loc) {
     this->getLocalData()->LParenLoc = Loc;
   }
@@ -1801,6 +1854,7 @@
   SourceLocation getRParenLoc() const {
     return this->getLocalData()->RParenLoc;
   }
+
   void setRParenLoc(SourceLocation Loc) {
     this->getLocalData()->RParenLoc = Loc;
   }
@@ -1808,6 +1862,7 @@
   SourceRange getParensRange() const {
     return SourceRange(getLParenLoc(), getRParenLoc());
   }
+
   void setParensRange(SourceRange range) {
       setLParenLoc(range.getBegin());
       setRParenLoc(range.getEnd());
@@ -1831,6 +1886,7 @@
   Expr* getUnderlyingExpr() const {
     return getTypePtr()->getUnderlyingExpr();
   }
+
   // Reimplemented to account for GNU/C++ extension
   //     typeof unary-expression
   // where there are no parentheses.
@@ -1843,9 +1899,11 @@
   QualType getUnderlyingType() const {
     return this->getTypePtr()->getUnderlyingType();
   }
+
   TypeSourceInfo* getUnderlyingTInfo() const {
     return this->getLocalData()->UnderlyingTInfo;
   }
+
   void setUnderlyingTInfo(TypeSourceInfo* TI) const {
     this->getLocalData()->UnderlyingTInfo = TI;
   }
@@ -1885,6 +1943,7 @@
   TypeSourceInfo* getUnderlyingTInfo() const {
     return getLocalData()->UnderlyingTInfo;
   }
+
   void setUnderlyingTInfo(TypeSourceInfo *TInfo) {
     getLocalData()->UnderlyingTInfo = TInfo;
   }
@@ -1896,16 +1955,13 @@
   SourceRange getParensRange() const {
     return SourceRange(getLParenLoc(), getRParenLoc());
   }
+
   void setParensRange(SourceRange Range) {
     setLParenLoc(Range.getBegin());
     setRParenLoc(Range.getEnd());
   }
 
-  void initializeLocal(ASTContext &Context, SourceLocation Loc) {
-    setKWLoc(Loc);
-    setRParenLoc(Loc);
-    setLParenLoc(Loc);
-  }
+  void initializeLocal(ASTContext &Context, SourceLocation Loc);
 };
 
 class DeducedTypeLoc
@@ -1924,6 +1980,7 @@
   SourceLocation getTemplateNameLoc() const {
     return getNameLoc();
   }
+
   void setTemplateNameLoc(SourceLocation Loc) {
     setNameLoc(Loc);
   }
@@ -1931,6 +1988,7 @@
 
 struct ElaboratedLocInfo {
   SourceLocation ElaboratedKWLoc;
+
   /// \brief Data associated with the nested-name-specifier location.
   void *QualifierData;
 };
@@ -1943,6 +2001,7 @@
   SourceLocation getElaboratedKeywordLoc() const {
     return this->getLocalData()->ElaboratedKWLoc;
   }
+
   void setElaboratedKeywordLoc(SourceLocation Loc) {
     this->getLocalData()->ElaboratedKWLoc = Loc;
   }
@@ -2001,6 +2060,7 @@
   SourceLocation getElaboratedKeywordLoc() const {
     return this->getLocalData()->ElaboratedKWLoc;
   }
+
   void setElaboratedKeywordLoc(SourceLocation Loc) {
     this->getLocalData()->ElaboratedKWLoc = Loc;
   }
@@ -2020,6 +2080,7 @@
   SourceLocation getNameLoc() const {
     return this->getLocalData()->NameLoc;
   }
+
   void setNameLoc(SourceLocation Loc) {
     this->getLocalData()->NameLoc = Loc;
   }
@@ -2056,6 +2117,7 @@
   SourceLocation getElaboratedKeywordLoc() const {
     return this->getLocalData()->ElaboratedKWLoc;
   }
+
   void setElaboratedKeywordLoc(SourceLocation Loc) {
     this->getLocalData()->ElaboratedKWLoc = Loc;
   }
@@ -2087,6 +2149,7 @@
   SourceLocation getTemplateKeywordLoc() const {
     return getLocalData()->TemplateKWLoc;
   }
+
   void setTemplateKeywordLoc(SourceLocation Loc) {
     getLocalData()->TemplateKWLoc = Loc;
   }
@@ -2094,6 +2157,7 @@
   SourceLocation getTemplateNameLoc() const {
     return this->getLocalData()->NameLoc;
   }
+
   void setTemplateNameLoc(SourceLocation Loc) {
     this->getLocalData()->NameLoc = Loc;
   }
@@ -2101,6 +2165,7 @@
   SourceLocation getLAngleLoc() const {
     return this->getLocalData()->LAngleLoc;
   }
+
   void setLAngleLoc(SourceLocation Loc) {
     this->getLocalData()->LAngleLoc = Loc;
   }
@@ -2108,6 +2173,7 @@
   SourceLocation getRAngleLoc() const {
     return this->getLocalData()->RAngleLoc;
   }
+
   void setRAngleLoc(SourceLocation Loc) {
     this->getLocalData()->RAngleLoc = Loc;
   }
@@ -2119,6 +2185,7 @@
   void setArgLocInfo(unsigned i, TemplateArgumentLocInfo AI) {
     getArgInfos()[i] = AI;
   }
+
   TemplateArgumentLocInfo getArgLocInfo(unsigned i) const {
     return getArgInfos()[i];
   }
@@ -2160,7 +2227,6 @@
   }
 };
 
-
 struct PackExpansionTypeLocInfo {
   SourceLocation EllipsisLoc;
 };
@@ -2212,6 +2278,7 @@
   SourceLocation getKWLoc() const {
     return this->getLocalData()->KWLoc;
   }
+
   void setKWLoc(SourceLocation Loc) {
     this->getLocalData()->KWLoc = Loc;
   }
@@ -2219,6 +2286,7 @@
   SourceLocation getLParenLoc() const {
     return this->getLocalData()->LParenLoc;
   }
+
   void setLParenLoc(SourceLocation Loc) {
     this->getLocalData()->LParenLoc = Loc;
   }
@@ -2226,6 +2294,7 @@
   SourceLocation getRParenLoc() const {
     return this->getLocalData()->RParenLoc;
   }
+
   void setRParenLoc(SourceLocation Loc) {
     this->getLocalData()->RParenLoc = Loc;
   }
@@ -2233,6 +2302,7 @@
   SourceRange getParensRange() const {
     return SourceRange(getLParenLoc(), getRParenLoc());
   }
+
   void setParensRange(SourceRange Range) {
     setLParenLoc(Range.getBegin());
     setRParenLoc(Range.getEnd());
@@ -2287,6 +2357,7 @@
   }
   return Cur.getAs<T>();
 }
-}
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_AST_TYPELOC_H
diff --git a/include/clang/AST/UnresolvedSet.h b/include/clang/AST/UnresolvedSet.h
index b63c6eb..614ff9b 100644
--- a/include/clang/AST/UnresolvedSet.h
+++ b/include/clang/AST/UnresolvedSet.h
@@ -1,4 +1,4 @@
-//===-- UnresolvedSet.h - Unresolved sets of declarations  ------*- C++ -*-===//
+//===- UnresolvedSet.h - Unresolved sets of declarations --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,20 +17,25 @@
 
 #include "clang/AST/DeclAccessPair.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/Specifiers.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
+#include <cstddef>
+#include <iterator>
 
 namespace clang {
 
+class NamedDecl;
+
 /// The iterator over UnresolvedSets.  Serves as both the const and
 /// non-const iterator.
 class UnresolvedSetIterator : public llvm::iterator_adaptor_base<
                                   UnresolvedSetIterator, DeclAccessPair *,
                                   std::random_access_iterator_tag, NamedDecl *,
                                   std::ptrdiff_t, NamedDecl *, NamedDecl *> {
-  friend class UnresolvedSetImpl;
   friend class ASTUnresolvedSet;
   friend class OverloadExpr;
+  friend class UnresolvedSetImpl;
 
   explicit UnresolvedSetIterator(DeclAccessPair *Iter)
       : iterator_adaptor_base(Iter) {}
@@ -54,12 +59,13 @@
 
 /// \brief A set of unresolved declarations.
 class UnresolvedSetImpl {
-  typedef SmallVectorImpl<DeclAccessPair> DeclsTy;
+  using DeclsTy = SmallVectorImpl<DeclAccessPair>;
 
   // Don't allow direct construction, and only permit subclassing by
   // UnresolvedSet.
 private:
   template <unsigned N> friend class UnresolvedSet;
+
   UnresolvedSetImpl() = default;
   UnresolvedSetImpl(const UnresolvedSetImpl &) = default;
   UnresolvedSetImpl &operator=(const UnresolvedSetImpl &) = default;
@@ -71,8 +77,8 @@
 public:
   // We don't currently support assignment through this iterator, so we might
   // as well use the same implementation twice.
-  typedef UnresolvedSetIterator iterator;
-  typedef UnresolvedSetIterator const_iterator;
+  using iterator = UnresolvedSetIterator;
+  using const_iterator = UnresolvedSetIterator;
 
   iterator begin() { return iterator(decls().begin()); }
   iterator end() { return iterator(decls().end()); }
@@ -140,7 +146,7 @@
   SmallVector<DeclAccessPair, InlineCapacity> Decls;
 };
 
-  
+ 
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_UNRESOLVEDSET_H
diff --git a/include/clang/AST/VTTBuilder.h b/include/clang/AST/VTTBuilder.h
index b4a6fe3..1781394 100644
--- a/include/clang/AST/VTTBuilder.h
+++ b/include/clang/AST/VTTBuilder.h
@@ -1,4 +1,4 @@
-//===--- VTTBuilder.h - C++ VTT layout builder --------------------*- C++ -*-=//
+//===- VTTBuilder.h - C++ VTT layout builder --------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,25 +16,31 @@
 #define LLVM_CLANG_AST_VTTBUILDER_H
 
 #include "clang/AST/BaseSubobject.h"
-#include "clang/AST/CXXInheritance.h"
-#include "clang/AST/GlobalDecl.h"
-#include "clang/AST/RecordLayout.h"
-#include "clang/Basic/ABI.h"
-#include <utility>
+#include "clang/AST/CharUnits.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cstdint>
 
 namespace clang {
 
+class ASTContext;
+class ASTRecordLayout;
+class CXXRecordDecl;
+
 class VTTVTable {
   llvm::PointerIntPair<const CXXRecordDecl *, 1, bool> BaseAndIsVirtual;
   CharUnits BaseOffset;
 
 public:
-  VTTVTable() {}
+  VTTVTable() = default;
   VTTVTable(const CXXRecordDecl *Base, CharUnits BaseOffset, bool BaseIsVirtual)
-    : BaseAndIsVirtual(Base, BaseIsVirtual), BaseOffset(BaseOffset) {}
+      : BaseAndIsVirtual(Base, BaseIsVirtual), BaseOffset(BaseOffset) {}
   VTTVTable(BaseSubobject Base, bool BaseIsVirtual)
-    : BaseAndIsVirtual(Base.getBase(), BaseIsVirtual),
-      BaseOffset(Base.getBaseOffset()) {}
+      : BaseAndIsVirtual(Base.getBase(), BaseIsVirtual),
+        BaseOffset(Base.getBaseOffset()) {}
 
   const CXXRecordDecl *getBase() const {
     return BaseAndIsVirtual.getPointer();
@@ -57,25 +63,24 @@
   uint64_t VTableIndex;
   BaseSubobject VTableBase;
 
-  VTTComponent() {}
+  VTTComponent() = default;
   VTTComponent(uint64_t VTableIndex, BaseSubobject VTableBase)
-    : VTableIndex(VTableIndex), VTableBase(VTableBase) {}
+     : VTableIndex(VTableIndex), VTableBase(VTableBase) {}
 };
 
 /// \brief Class for building VTT layout information.
 class VTTBuilder {
-  
   ASTContext &Ctx;
 
   /// \brief The most derived class for which we're building this vtable.
   const CXXRecordDecl *MostDerivedClass;
 
-  typedef SmallVector<VTTVTable, 64> VTTVTablesVectorTy;
+  using VTTVTablesVectorTy = SmallVector<VTTVTable, 64>;
   
   /// \brief The VTT vtables.
   VTTVTablesVectorTy VTTVTables;
   
-  typedef SmallVector<VTTComponent, 64> VTTComponentsVectorTy;
+  using VTTComponentsVectorTy = SmallVector<VTTComponent, 64>;
   
   /// \brief The VTT components.
   VTTComponentsVectorTy VTTComponents;
@@ -83,9 +88,9 @@
   /// \brief The AST record layout of the most derived class.
   const ASTRecordLayout &MostDerivedClassLayout;
 
-  typedef llvm::SmallPtrSet<const CXXRecordDecl *, 4> VisitedVirtualBasesSetTy;
+  using VisitedVirtualBasesSetTy = llvm::SmallPtrSet<const CXXRecordDecl *, 4>;
 
-  typedef llvm::DenseMap<BaseSubobject, uint64_t> AddressPointsMapTy;
+  using AddressPointsMapTy = llvm::DenseMap<BaseSubobject, uint64_t>;
 
   /// \brief The sub-VTT indices for the bases of the most derived class.
   llvm::DenseMap<BaseSubobject, uint64_t> SubVTTIndicies;
@@ -153,9 +158,8 @@
   getSecondaryVirtualPointerIndices() const {
     return SecondaryVirtualPointerIndices;
   }
-
 };
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_AST_VTTBUILDER_H
diff --git a/include/clang/ASTMatchers/ASTMatchers.h b/include/clang/ASTMatchers/ASTMatchers.h
index 2b03bb3..6279172 100644
--- a/include/clang/ASTMatchers/ASTMatchers.h
+++ b/include/clang/ASTMatchers/ASTMatchers.h
@@ -1,4 +1,4 @@
-//===--- ASTMatchers.h - Structural query framework -------------*- C++ -*-===//
+//===- ASTMatchers.h - Structural query framework ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -46,13 +46,48 @@
 #define LLVM_CLANG_ASTMATCHERS_ASTMATCHERS_H
 
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
 #include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "clang/ASTMatchers/ASTMatchersMacros.h"
+#include "clang/Basic/AttrKinds.h"
+#include "clang/Basic/ExceptionSpecificationType.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TypeTraits.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
+#include <cassert>
+#include <cstddef>
 #include <iterator>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace ast_matchers {
@@ -78,7 +113,7 @@
   /// \brief Type of mapping from binding identifiers to bound nodes. This type
   /// is an associative container with a key type of \c std::string and a value
   /// type of \c clang::ast_type_traits::DynTypedNode
-  typedef internal::BoundNodesMap::IDToNodeMap IDToNodeMap;
+  using IDToNodeMap = internal::BoundNodesMap::IDToNodeMap;
 
   /// \brief Retrieve mapping from binding identifiers to bound nodes.
   const IDToNodeMap &getMap() const {
@@ -86,13 +121,13 @@
   }
 
 private:
+  friend class internal::BoundNodesTreeBuilder;
+
   /// \brief Create BoundNodes from a pre-filled map of bindings.
   BoundNodes(internal::BoundNodesMap &MyBoundNodes)
       : MyBoundNodes(MyBoundNodes) {}
 
   internal::BoundNodesMap MyBoundNodes;
-
-  friend class internal::BoundNodesTreeBuilder;
 };
 
 /// \brief If the provided matcher matches a node, binds the node to \c ID.
@@ -107,13 +142,13 @@
 /// \brief Types of matchers for the top-level classes in the AST class
 /// hierarchy.
 /// @{
-typedef internal::Matcher<Decl> DeclarationMatcher;
-typedef internal::Matcher<Stmt> StatementMatcher;
-typedef internal::Matcher<QualType> TypeMatcher;
-typedef internal::Matcher<TypeLoc> TypeLocMatcher;
-typedef internal::Matcher<NestedNameSpecifier> NestedNameSpecifierMatcher;
-typedef internal::Matcher<NestedNameSpecifierLoc> NestedNameSpecifierLocMatcher;
-typedef internal::Matcher<CXXCtorInitializer> CXXCtorInitializerMatcher;
+using DeclarationMatcher = internal::Matcher<Decl>;
+using StatementMatcher = internal::Matcher<Stmt>;
+using TypeMatcher = internal::Matcher<QualType>;
+using TypeLocMatcher = internal::Matcher<TypeLoc>;
+using NestedNameSpecifierMatcher = internal::Matcher<NestedNameSpecifier>;
+using NestedNameSpecifierLocMatcher = internal::Matcher<NestedNameSpecifierLoc>;
+using CXXCtorInitializerMatcher = internal::Matcher<CXXCtorInitializer>;
 /// @}
 
 /// \brief Matches any node.
@@ -143,7 +178,7 @@
 /// \endcode
 /// decl(hasDeclContext(translationUnitDecl()))
 ///   matches "int X", but not "int Y".
-const internal::VariadicDynCastAllOfMatcher<Decl, TranslationUnitDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TranslationUnitDecl>
     translationUnitDecl;
 
 /// \brief Matches typedef declarations.
@@ -155,7 +190,8 @@
 /// \endcode
 /// typedefDecl()
 ///   matches "typedef int X", but not "using Y = int"
-const internal::VariadicDynCastAllOfMatcher<Decl, TypedefDecl> typedefDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TypedefDecl>
+    typedefDecl;
 
 /// \brief Matches typedef name declarations.
 ///
@@ -166,7 +202,7 @@
 /// \endcode
 /// typedefNameDecl()
 ///   matches "typedef int X" and "using Y = int"
-const internal::VariadicDynCastAllOfMatcher<Decl, TypedefNameDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TypedefNameDecl>
     typedefNameDecl;
 
 /// \brief Matches type alias declarations.
@@ -178,7 +214,8 @@
 /// \endcode
 /// typeAliasDecl()
 ///   matches "using Y = int", but not "typedef int X"
-const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl> typeAliasDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl>
+    typeAliasDecl;
 
 /// \brief Matches type alias template declarations.
 ///
@@ -187,7 +224,7 @@
 ///   template <typename T>
 ///   using Y = X<T>;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl>
     typeAliasTemplateDecl;
 
 /// \brief Matches AST nodes that were expanded within the main-file.
@@ -278,7 +315,7 @@
 ///     friend X;
 ///   };
 /// \endcode
-const internal::VariadicAllOfMatcher<Decl> decl;
+extern const internal::VariadicAllOfMatcher<Decl> decl;
 
 /// \brief Matches a declaration of a linkage specification.
 ///
@@ -288,7 +325,7 @@
 /// \endcode
 /// linkageSpecDecl()
 ///   matches "extern "C" {}"
-const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl>
     linkageSpecDecl;
 
 /// \brief Matches a declaration of anything that could have a name.
@@ -302,7 +339,7 @@
 ///     } U;
 ///   };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;
 
 /// \brief Matches a declaration of label.
 ///
@@ -313,7 +350,7 @@
 /// \endcode
 /// labelDecl()
 ///   matches 'FOO:'
-const internal::VariadicDynCastAllOfMatcher<Decl, LabelDecl> labelDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, LabelDecl> labelDecl;
 
 /// \brief Matches a declaration of a namespace.
 ///
@@ -324,7 +361,8 @@
 /// \endcode
 /// namespaceDecl()
 ///   matches "namespace {}" and "namespace test {}"
-const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceDecl> namespaceDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceDecl>
+    namespaceDecl;
 
 /// \brief Matches a declaration of a namespace alias.
 ///
@@ -335,7 +373,7 @@
 /// \endcode
 /// namespaceAliasDecl()
 ///   matches "namespace alias" but not "namespace test"
-const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceAliasDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceAliasDecl>
     namespaceAliasDecl;
 
 /// \brief Matches class, struct, and union declarations.
@@ -347,9 +385,7 @@
 ///   struct S {};
 ///   union U {};
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  RecordDecl> recordDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, RecordDecl> recordDecl;
 
 /// \brief Matches C++ class declarations.
 ///
@@ -358,9 +394,8 @@
 ///   class X;
 ///   template<class T> class Z {};
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  CXXRecordDecl> cxxRecordDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, CXXRecordDecl>
+    cxxRecordDecl;
 
 /// \brief Matches C++ class template declarations.
 ///
@@ -368,9 +403,8 @@
 /// \code
 ///   template<class T> class Z {};
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ClassTemplateDecl> classTemplateDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ClassTemplateDecl>
+    classTemplateDecl;
 
 /// \brief Matches C++ class template specializations.
 ///
@@ -382,9 +416,9 @@
 /// \endcode
 /// classTemplateSpecializationDecl()
 ///   matches the specializations \c A<int> and \c A<double>
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ClassTemplateSpecializationDecl> classTemplateSpecializationDecl;
+extern const internal::VariadicDynCastAllOfMatcher<
+    Decl, ClassTemplateSpecializationDecl>
+    classTemplateSpecializationDecl;
 
 /// \brief Matches declarator declarations (field, variable, function
 /// and non-type template parameter declarations).
@@ -395,7 +429,7 @@
 /// \endcode
 /// declaratorDecl()
 ///   matches \c int y.
-const internal::VariadicDynCastAllOfMatcher<Decl, DeclaratorDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, DeclaratorDecl>
     declaratorDecl;
 
 /// \brief Matches parameter variable declarations.
@@ -406,7 +440,8 @@
 /// \endcode
 /// parmVarDecl()
 ///   matches \c int x.
-const internal::VariadicDynCastAllOfMatcher<Decl, ParmVarDecl> parmVarDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ParmVarDecl>
+    parmVarDecl;
 
 /// \brief Matches C++ access specifier declarations.
 ///
@@ -419,9 +454,8 @@
 /// \endcode
 /// accessSpecDecl()
 ///   matches 'public:'
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  AccessSpecDecl> accessSpecDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, AccessSpecDecl>
+    accessSpecDecl;
 
 /// \brief Matches constructor initializers.
 ///
@@ -432,7 +466,8 @@
 ///     int i;
 ///   };
 /// \endcode
-const internal::VariadicAllOfMatcher<CXXCtorInitializer> cxxCtorInitializer;
+extern const internal::VariadicAllOfMatcher<CXXCtorInitializer>
+    cxxCtorInitializer;
 
 /// \brief Matches template arguments.
 ///
@@ -443,7 +478,7 @@
 /// \endcode
 /// templateArgument()
 ///   matches 'int' in C<int>.
-const internal::VariadicAllOfMatcher<TemplateArgument> templateArgument;
+extern const internal::VariadicAllOfMatcher<TemplateArgument> templateArgument;
 
 /// \brief Matches template name.
 ///
@@ -454,7 +489,7 @@
 /// \endcode
 /// templateName()
 ///   matches 'X' in X<int>.
-const internal::VariadicAllOfMatcher<TemplateName> templateName;
+extern const internal::VariadicAllOfMatcher<TemplateName> templateName;
 
 /// \brief Matches non-type template parameter declarations.
 ///
@@ -464,9 +499,9 @@
 /// \endcode
 /// nonTypeTemplateParmDecl()
 ///   matches 'N', but not 'T'.
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  NonTypeTemplateParmDecl> nonTypeTemplateParmDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl,
+                                                   NonTypeTemplateParmDecl>
+    nonTypeTemplateParmDecl;
 
 /// \brief Matches template type parameter declarations.
 ///
@@ -476,9 +511,8 @@
 /// \endcode
 /// templateTypeParmDecl()
 ///   matches 'T', but not 'N'.
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  TemplateTypeParmDecl> templateTypeParmDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, TemplateTypeParmDecl>
+    templateTypeParmDecl;
 
 /// \brief Matches public C++ declarations.
 ///
@@ -766,7 +800,7 @@
 ///   A<bool, int> b;
 ///   A<int, bool> c;
 ///
-///   template<typename T> f() {};
+///   template<typename T> void f() {}
 ///   void func() { f<int>(); };
 /// \endcode
 /// classTemplateSpecializationDecl(hasTemplateArgument(
@@ -846,12 +880,12 @@
 ///
 /// Given
 /// \code
-///   template<typename T> struct A {};
-///   struct B { B* next; };
+///   struct B { int next; };
+///   template<int(B::*next_ptr)> struct A {};
 ///   A<&B::next> a;
 /// \endcode
 /// classTemplateSpecializationDecl(hasAnyTemplateArgument(
-///     refersToDeclaration(fieldDecl(hasName("next"))))
+///     refersToDeclaration(fieldDecl(hasName("next")))))
 ///   matches the specialization \c A<&B::next> with \c fieldDecl(...) matching
 ///     \c B::next
 AST_MATCHER_P(TemplateArgument, refersToDeclaration,
@@ -865,8 +899,8 @@
 ///
 /// Given
 /// \code
-///   template<typename T> struct A {};
-///   struct B { B* next; };
+///   struct B { int next; };
+///   template<int(B::*next_ptr)> struct A {};
 ///   A<&B::next> a;
 /// \endcode
 /// templateSpecializationType(hasAnyTemplateArgument(
@@ -883,7 +917,7 @@
 ///
 /// Given
 /// \code
-///   template<int T> struct A {};
+///   template<int T> struct C {};
 ///   C<42> c;
 /// \endcode
 /// classTemplateSpecializationDecl(
@@ -898,7 +932,7 @@
 ///
 /// Given
 /// \code
-///   template<int T> struct A {};
+///   template<int T> struct C {};
 ///   C<42> c;
 /// \endcode
 /// classTemplateSpecializationDecl(
@@ -919,7 +953,7 @@
 ///
 /// Given
 /// \code
-///   template<int T> struct A {};
+///   template<int T> struct C {};
 ///   C<42> c;
 /// \endcode
 /// classTemplateSpecializationDecl(
@@ -939,7 +973,7 @@
 ///   enum X { A, B, C };
 ///   void F();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, ValueDecl> valueDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ValueDecl> valueDecl;
 
 /// \brief Matches C++ constructor declarations.
 ///
@@ -952,9 +986,8 @@
 ///     int DoSomething();
 ///   };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  CXXConstructorDecl> cxxConstructorDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, CXXConstructorDecl>
+    cxxConstructorDecl;
 
 /// \brief Matches explicit C++ destructor declarations.
 ///
@@ -965,9 +998,8 @@
 ///     virtual ~Foo();
 ///   };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  CXXDestructorDecl> cxxDestructorDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, CXXDestructorDecl>
+    cxxDestructorDecl;
 
 /// \brief Matches enum declarations.
 ///
@@ -977,7 +1009,7 @@
 ///     A, B, C
 ///   };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, EnumDecl> enumDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, EnumDecl> enumDecl;
 
 /// \brief Matches enum constants.
 ///
@@ -987,9 +1019,8 @@
 ///     A, B, C
 ///   };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  EnumConstantDecl> enumConstantDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, EnumConstantDecl>
+    enumConstantDecl;
 
 /// \brief Matches method declarations.
 ///
@@ -997,7 +1028,8 @@
 /// \code
 ///   class X { void y(); };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl> cxxMethodDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl>
+    cxxMethodDecl;
 
 /// \brief Matches conversion operator declarations.
 ///
@@ -1005,7 +1037,7 @@
 /// \code
 ///   class X { operator int() const; };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
     cxxConversionDecl;
 
 /// \brief Matches variable declarations.
@@ -1017,7 +1049,7 @@
 /// \code
 ///   int a;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, VarDecl> varDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, VarDecl> varDecl;
 
 /// \brief Matches field declarations.
 ///
@@ -1027,7 +1059,7 @@
 /// \endcode
 /// fieldDecl()
 ///   matches 'm'.
-const internal::VariadicDynCastAllOfMatcher<Decl, FieldDecl> fieldDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, FieldDecl> fieldDecl;
 
 /// \brief Matches function declarations.
 ///
@@ -1035,7 +1067,8 @@
 /// \code
 ///   void f();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, FunctionDecl> functionDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, FunctionDecl>
+    functionDecl;
 
 /// \brief Matches C++ function template declarations.
 ///
@@ -1043,9 +1076,8 @@
 /// \code
 ///   template<class T> void f(T t) {}
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  FunctionTemplateDecl> functionTemplateDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, FunctionTemplateDecl>
+    functionTemplateDecl;
 
 /// \brief Matches friend declarations.
 ///
@@ -1055,7 +1087,7 @@
 /// \endcode
 /// friendDecl()
 ///   matches 'friend void foo()'.
-const internal::VariadicDynCastAllOfMatcher<Decl, FriendDecl> friendDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, FriendDecl> friendDecl;
 
 /// \brief Matches statements.
 ///
@@ -1065,7 +1097,7 @@
 /// \endcode
 /// stmt()
 ///   matches both the compound statement '{ ++a; }' and '++a'.
-const internal::VariadicAllOfMatcher<Stmt> stmt;
+extern const internal::VariadicAllOfMatcher<Stmt> stmt;
 
 /// \brief Matches declaration statements.
 ///
@@ -1075,9 +1107,7 @@
 /// \endcode
 /// declStmt()
 ///   matches 'int a'.
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  DeclStmt> declStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, DeclStmt> declStmt;
 
 /// \brief Matches member expressions.
 ///
@@ -1090,7 +1120,7 @@
 /// \endcode
 /// memberExpr()
 ///   matches this->x, x, y.x, a, this->b
-const internal::VariadicDynCastAllOfMatcher<Stmt, MemberExpr> memberExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, MemberExpr> memberExpr;
 
 /// \brief Matches call expressions.
 ///
@@ -1100,7 +1130,7 @@
 ///   x.y();
 ///   y();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CallExpr> callExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CallExpr> callExpr;
 
 /// \brief Matches lambda expressions.
 ///
@@ -1108,7 +1138,7 @@
 /// \code
 ///   [&](){return 5;}
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, LambdaExpr> lambdaExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, LambdaExpr> lambdaExpr;
 
 /// \brief Matches member call expressions.
 ///
@@ -1117,9 +1147,8 @@
 ///   X x;
 ///   x.y();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXMemberCallExpr> cxxMemberCallExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXMemberCallExpr>
+    cxxMemberCallExpr;
 
 /// \brief Matches ObjectiveC Message invocation expressions.
 ///
@@ -1130,9 +1159,8 @@
 /// \code
 ///   [[NSString alloc] initWithString:@"Hello"]
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ObjCMessageExpr> objcMessageExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCMessageExpr>
+    objcMessageExpr;
 
 /// \brief Matches Objective-C interface declarations.
 ///
@@ -1141,9 +1169,8 @@
 ///   @interface Foo
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCInterfaceDecl> objcInterfaceDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCInterfaceDecl>
+    objcInterfaceDecl;
 
 /// \brief Matches Objective-C implementation declarations.
 ///
@@ -1152,9 +1179,8 @@
 ///   @implementation Foo
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCImplementationDecl> objcImplementationDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCImplementationDecl>
+    objcImplementationDecl;
 
 /// \brief Matches Objective-C protocol declarations.
 ///
@@ -1163,9 +1189,8 @@
 ///   @protocol FooDelegate
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCProtocolDecl> objcProtocolDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCProtocolDecl>
+    objcProtocolDecl;
 
 /// \brief Matches Objective-C category declarations.
 ///
@@ -1174,9 +1199,18 @@
 ///   @interface Foo (Additions)
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCCategoryDecl> objcCategoryDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCCategoryDecl>
+    objcCategoryDecl;
+
+/// \brief Matches Objective-C category definitions.
+///
+/// Example matches Foo (Additions)
+/// \code
+///   @implementation Foo (Additions)
+///   @end
+/// \endcode
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCCategoryImplDecl>
+    objcCategoryImplDecl;
 
 /// \brief Matches Objective-C method declarations.
 ///
@@ -1190,9 +1224,8 @@
 ///   - (void)method {}
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCMethodDecl> objcMethodDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCMethodDecl>
+    objcMethodDecl;
 
 /// \brief Matches Objective-C instance variable declarations.
 ///
@@ -1203,9 +1236,8 @@
 ///   }
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCIvarDecl> objcIvarDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCIvarDecl>
+    objcIvarDecl;
 
 /// \brief Matches Objective-C property declarations.
 ///
@@ -1215,9 +1247,47 @@
 ///   @property BOOL enabled;
 ///   @end
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  ObjCPropertyDecl> objcPropertyDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, ObjCPropertyDecl>
+    objcPropertyDecl;
+
+/// \brief Matches Objective-C \@throw statements.
+///
+/// Example matches \@throw
+/// \code
+///   @throw obj;
+/// \endcode
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtThrowStmt>
+    objcThrowStmt;
+
+/// \brief Matches Objective-C @try statements.
+///
+/// Example matches @try
+/// \code
+///   @try {}
+///   @catch (...) {}
+/// \endcode
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtTryStmt>
+    objcTryStmt;
+
+/// \brief Matches Objective-C @catch statements.
+///
+/// Example matches @catch
+/// \code
+///   @try {}
+///   @catch (...) {}
+/// \endcode
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtCatchStmt>
+    objcCatchStmt;
+
+/// \brief Matches Objective-C @finally statements.
+///
+/// Example matches @finally
+/// \code
+///   @try {}
+///   @finally {}
+/// \endcode
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtFinallyStmt>
+    objcFinallyStmt;
 
 /// \brief Matches expressions that introduce cleanups to be run at the end
 /// of the sub-expression's evaluation.
@@ -1226,9 +1296,8 @@
 /// \code
 ///   const std::string str = std::string();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ExprWithCleanups> exprWithCleanups;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ExprWithCleanups>
+    exprWithCleanups;
 
 /// \brief Matches init list expressions.
 ///
@@ -1240,7 +1309,8 @@
 /// \endcode
 /// initListExpr()
 ///   matches "{ 1, 2 }" and "{ 5, 6 }"
-const internal::VariadicDynCastAllOfMatcher<Stmt, InitListExpr> initListExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, InitListExpr>
+    initListExpr;
 
 /// \brief Matches the syntactic form of init list expressions
 /// (if expression have it).
@@ -1262,8 +1332,9 @@
 /// \endcode
 /// cxxStdInitializerListExpr()
 ///   matches "{ 1, 2, 3 }" and "{ 4, 5 }"
-const internal::VariadicDynCastAllOfMatcher<Stmt,
-  CXXStdInitializerListExpr> cxxStdInitializerListExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   CXXStdInitializerListExpr>
+    cxxStdInitializerListExpr;
 
 /// \brief Matches implicit initializers of init list expressions.
 ///
@@ -1273,8 +1344,8 @@
 /// \endcode
 /// implicitValueInitExpr()
 ///   matches "[0].y" (implicitly)
-const internal::VariadicDynCastAllOfMatcher<Stmt, ImplicitValueInitExpr>
-implicitValueInitExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ImplicitValueInitExpr>
+    implicitValueInitExpr;
 
 /// \brief Matches paren list expressions.
 /// ParenListExprs don't have a predefined type and are used for late parsing.
@@ -1291,7 +1362,8 @@
 /// \endcode
 /// parenListExpr() matches "*this" but NOT matches (a, b) because (a, b)
 /// has a predefined type and is a ParenExpr, not a ParenListExpr.
-const internal::VariadicDynCastAllOfMatcher<Stmt, ParenListExpr> parenListExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ParenListExpr>
+    parenListExpr;
 
 /// \brief Matches substitutions of non-type template parameters.
 ///
@@ -1303,9 +1375,9 @@
 /// \endcode
 /// substNonTypeTemplateParmExpr()
 ///   matches "N" in the right-hand side of "static const int n = N;"
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  SubstNonTypeTemplateParmExpr> substNonTypeTemplateParmExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   SubstNonTypeTemplateParmExpr>
+    substNonTypeTemplateParmExpr;
 
 /// \brief Matches using declarations.
 ///
@@ -1316,7 +1388,7 @@
 /// \endcode
 /// usingDecl()
 ///   matches \code using X::x \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, UsingDecl> usingDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, UsingDecl> usingDecl;
 
 /// \brief Matches using namespace declarations.
 ///
@@ -1327,9 +1399,8 @@
 /// \endcode
 /// usingDirectiveDecl()
 ///   matches \code using namespace X \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  UsingDirectiveDecl> usingDirectiveDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, UsingDirectiveDecl>
+    usingDirectiveDecl;
 
 /// \brief Matches reference to a name that can be looked up during parsing
 /// but could not be resolved to a specific declaration.
@@ -1345,9 +1416,8 @@
 /// \endcode
 /// unresolvedLookupExpr()
 ///   matches \code foo<T>() \endcode
-const internal::VariadicDynCastAllOfMatcher<
-   Stmt,
-   UnresolvedLookupExpr> unresolvedLookupExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, UnresolvedLookupExpr>
+    unresolvedLookupExpr;
 
 /// \brief Matches unresolved using value declarations.
 ///
@@ -1360,9 +1430,9 @@
 /// \endcode
 /// unresolvedUsingValueDecl()
 ///   matches \code using X::x \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  UnresolvedUsingValueDecl> unresolvedUsingValueDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl,
+                                                   UnresolvedUsingValueDecl>
+    unresolvedUsingValueDecl;
 
 /// \brief Matches unresolved using value declarations that involve the
 /// typename.
@@ -1379,9 +1449,9 @@
 /// \endcode
 /// unresolvedUsingTypenameDecl()
 ///   matches \code using Base<T>::Foo \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  UnresolvedUsingTypenameDecl> unresolvedUsingTypenameDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl,
+                                                   UnresolvedUsingTypenameDecl>
+    unresolvedUsingTypenameDecl;
 
 /// \brief Matches parentheses used in expressions.
 ///
@@ -1390,9 +1460,7 @@
 ///   int foo() { return 1; }
 ///   int a = (foo() + 1);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ParenExpr> parenExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ParenExpr> parenExpr;
 
 /// \brief Matches constructor call expressions (including implicit ones).
 ///
@@ -1404,9 +1472,8 @@
 ///   int n;
 ///   f(string(ptr, n), ptr);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXConstructExpr> cxxConstructExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXConstructExpr>
+    cxxConstructExpr;
 
 /// \brief Matches unresolved constructor call expressions.
 ///
@@ -1416,9 +1483,9 @@
 ///   template <typename T>
 ///   void f(const T& t) { return T(t); }
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXUnresolvedConstructExpr> cxxUnresolvedConstructExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   CXXUnresolvedConstructExpr>
+    cxxUnresolvedConstructExpr;
 
 /// \brief Matches implicit and explicit this expressions.
 ///
@@ -1430,7 +1497,8 @@
 ///   int f() { return i; }
 /// };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThisExpr> cxxThisExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThisExpr>
+    cxxThisExpr;
 
 /// \brief Matches nodes where temporaries are created.
 ///
@@ -1440,9 +1508,8 @@
 ///   FunctionTakesString(GetStringByValue());
 ///   FunctionTakesStringByPointer(GetStringPointer());
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXBindTemporaryExpr> cxxBindTemporaryExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBindTemporaryExpr>
+    cxxBindTemporaryExpr;
 
 /// \brief Matches nodes where temporaries are materialized.
 ///
@@ -1456,15 +1523,15 @@
 /// \code
 ///   T u(f());
 ///   g(f());
+///   f().func();
 /// \endcode
 /// but does not match
 /// \code
 ///   f();
-///   f().func();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  MaterializeTemporaryExpr> materializeTemporaryExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   MaterializeTemporaryExpr>
+    materializeTemporaryExpr;
 
 /// \brief Matches new expressions.
 ///
@@ -1474,7 +1541,7 @@
 /// \endcode
 /// cxxNewExpr()
 ///   matches 'new X'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> cxxNewExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> cxxNewExpr;
 
 /// \brief Matches delete expressions.
 ///
@@ -1484,7 +1551,8 @@
 /// \endcode
 /// cxxDeleteExpr()
 ///   matches 'delete X'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDeleteExpr> cxxDeleteExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDeleteExpr>
+    cxxDeleteExpr;
 
 /// \brief Matches array subscript expressions.
 ///
@@ -1494,9 +1562,8 @@
 /// \endcode
 /// arraySubscriptExpr()
 ///   matches "a[1]"
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ArraySubscriptExpr> arraySubscriptExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ArraySubscriptExpr>
+    arraySubscriptExpr;
 
 /// \brief Matches the value of a default argument at the call site.
 ///
@@ -1507,9 +1574,8 @@
 ///   void f(int x, int y = 0);
 ///   f(42);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXDefaultArgExpr> cxxDefaultArgExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDefaultArgExpr>
+    cxxDefaultArgExpr;
 
 /// \brief Matches overloaded operator calls.
 ///
@@ -1525,9 +1591,8 @@
 ///   ostream &o; int b = 1, c = 1;
 ///   o << b << c;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXOperatorCallExpr> cxxOperatorCallExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXOperatorCallExpr>
+    cxxOperatorCallExpr;
 
 /// \brief Matches expressions.
 ///
@@ -1535,7 +1600,7 @@
 /// \code
 ///   void f() { x(); }
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, Expr> expr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, Expr> expr;
 
 /// \brief Matches expressions that refer to declarations.
 ///
@@ -1544,7 +1609,8 @@
 ///   bool x;
 ///   if (x) {}
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, DeclRefExpr> declRefExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, DeclRefExpr>
+    declRefExpr;
 
 /// \brief Matches if statements.
 ///
@@ -1552,7 +1618,7 @@
 /// \code
 ///   if (x) {}
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, IfStmt> ifStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, IfStmt> ifStmt;
 
 /// \brief Matches for statements.
 ///
@@ -1561,7 +1627,7 @@
 ///   for (;;) {}
 ///   int i[] =  {1, 2, 3}; for (auto a : i);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, ForStmt> forStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ForStmt> forStmt;
 
 /// \brief Matches the increment statement of a for loop.
 ///
@@ -1599,9 +1665,8 @@
 ///   int i[] =  {1, 2, 3}; for (auto a : i);
 ///   for(int j = 0; j < 5; ++j);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXForRangeStmt> cxxForRangeStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXForRangeStmt>
+    cxxForRangeStmt;
 
 /// \brief Matches the initialization statement of a for loop.
 ///
@@ -1639,7 +1704,7 @@
 /// \endcode
 /// whileStmt()
 ///   matches 'while (true) {}'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, WhileStmt> whileStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, WhileStmt> whileStmt;
 
 /// \brief Matches do statements.
 ///
@@ -1649,7 +1714,7 @@
 /// \endcode
 /// doStmt()
 ///   matches 'do {} while(true)'
-const internal::VariadicDynCastAllOfMatcher<Stmt, DoStmt> doStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, DoStmt> doStmt;
 
 /// \brief Matches break statements.
 ///
@@ -1659,7 +1724,7 @@
 /// \endcode
 /// breakStmt()
 ///   matches 'break'
-const internal::VariadicDynCastAllOfMatcher<Stmt, BreakStmt> breakStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, BreakStmt> breakStmt;
 
 /// \brief Matches continue statements.
 ///
@@ -1669,7 +1734,8 @@
 /// \endcode
 /// continueStmt()
 ///   matches 'continue'
-const internal::VariadicDynCastAllOfMatcher<Stmt, ContinueStmt> continueStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ContinueStmt>
+    continueStmt;
 
 /// \brief Matches return statements.
 ///
@@ -1679,7 +1745,7 @@
 /// \endcode
 /// returnStmt()
 ///   matches 'return 1'
-const internal::VariadicDynCastAllOfMatcher<Stmt, ReturnStmt> returnStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ReturnStmt> returnStmt;
 
 /// \brief Matches goto statements.
 ///
@@ -1690,7 +1756,7 @@
 /// \endcode
 /// gotoStmt()
 ///   matches 'goto FOO'
-const internal::VariadicDynCastAllOfMatcher<Stmt, GotoStmt> gotoStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, GotoStmt> gotoStmt;
 
 /// \brief Matches label statements.
 ///
@@ -1701,7 +1767,7 @@
 /// \endcode
 /// labelStmt()
 ///   matches 'FOO:'
-const internal::VariadicDynCastAllOfMatcher<Stmt, LabelStmt> labelStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, LabelStmt> labelStmt;
 
 /// \brief Matches address of label statements (GNU extension).
 ///
@@ -1713,7 +1779,8 @@
 /// \endcode
 /// addrLabelExpr()
 ///   matches '&&FOO'
-const internal::VariadicDynCastAllOfMatcher<Stmt, AddrLabelExpr> addrLabelExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, AddrLabelExpr>
+    addrLabelExpr;
 
 /// \brief Matches switch statements.
 ///
@@ -1723,7 +1790,7 @@
 /// \endcode
 /// switchStmt()
 ///   matches 'switch(a)'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchStmt> switchStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchStmt> switchStmt;
 
 /// \brief Matches case and default statements inside switch statements.
 ///
@@ -1732,8 +1799,8 @@
 ///   switch(a) { case 42: break; default: break; }
 /// \endcode
 /// switchCase()
-///   matches 'case 42: break;' and 'default: break;'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchCase> switchCase;
+///   matches 'case 42:' and 'default:'.
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchCase> switchCase;
 
 /// \brief Matches case statements inside switch statements.
 ///
@@ -1742,8 +1809,8 @@
 ///   switch(a) { case 42: break; default: break; }
 /// \endcode
 /// caseStmt()
-///   matches 'case 42: break;'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, CaseStmt> caseStmt;
+///   matches 'case 42:'.
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CaseStmt> caseStmt;
 
 /// \brief Matches default statements inside switch statements.
 ///
@@ -1752,16 +1819,18 @@
 ///   switch(a) { case 42: break; default: break; }
 /// \endcode
 /// defaultStmt()
-///   matches 'default: break;'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, DefaultStmt> defaultStmt;
+///   matches 'default:'.
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, DefaultStmt>
+    defaultStmt;
 
 /// \brief Matches compound statements.
 ///
-/// Example matches '{}' and '{{}}'in 'for (;;) {{}}'
+/// Example matches '{}' and '{{}}' in 'for (;;) {{}}'
 /// \code
 ///   for (;;) {{}}
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundStmt> compoundStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundStmt>
+    compoundStmt;
 
 /// \brief Matches catch statements.
 ///
@@ -1770,7 +1839,8 @@
 /// \endcode
 /// cxxCatchStmt()
 ///   matches 'catch(int i)'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXCatchStmt> cxxCatchStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXCatchStmt>
+    cxxCatchStmt;
 
 /// \brief Matches try statements.
 ///
@@ -1779,7 +1849,7 @@
 /// \endcode
 /// cxxTryStmt()
 ///   matches 'try {}'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt;
 
 /// \brief Matches throw expressions.
 ///
@@ -1788,7 +1858,8 @@
 /// \endcode
 /// cxxThrowExpr()
 ///   matches 'throw 5'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> cxxThrowExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr>
+    cxxThrowExpr;
 
 /// \brief Matches null statements.
 ///
@@ -1797,7 +1868,7 @@
 /// \endcode
 /// nullStmt()
 ///   matches the second ';'
-const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt;
 
 /// \brief Matches asm statements.
 ///
@@ -1807,7 +1878,7 @@
 /// \endcode
 /// asmStmt()
 ///   matches '__asm("mov al, 2")'
-const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
 
 /// \brief Matches bool literals.
 ///
@@ -1815,9 +1886,8 @@
 /// \code
 ///   true
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXBoolLiteralExpr> cxxBoolLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBoolLiteralExpr>
+    cxxBoolLiteral;
 
 /// \brief Matches string literals (also matches wide string literals).
 ///
@@ -1826,9 +1896,8 @@
 ///   char *s = "abcd";
 ///   wchar_t *ws = L"abcd";
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  StringLiteral> stringLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, StringLiteral>
+    stringLiteral;
 
 /// \brief Matches character literals (also matches wchar_t).
 ///
@@ -1840,17 +1909,15 @@
 ///   char ch = 'a';
 ///   wchar_t chw = L'a';
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CharacterLiteral> characterLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CharacterLiteral>
+    characterLiteral;
 
 /// \brief Matches integer literals of all sizes / encodings, e.g.
 /// 1, 1L, 0x1 and 1U.
 ///
 /// Does not match character-encoded integers such as L'a'.
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  IntegerLiteral> integerLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, IntegerLiteral>
+    integerLiteral;
 
 /// \brief Matches float literals of all sizes / encodings, e.g.
 /// 1.0, 1.0f, 1.0L and 1e10.
@@ -1859,16 +1926,14 @@
 /// \code
 ///   float a = 10;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  FloatingLiteral> floatLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, FloatingLiteral>
+    floatLiteral;
 
 /// \brief Matches user defined literal operator call.
 ///
 /// Example match: "foo"_suffix
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  UserDefinedLiteral> userDefinedLiteral;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, UserDefinedLiteral>
+    userDefinedLiteral;
 
 /// \brief Matches compound (i.e. non-scalar) literals
 ///
@@ -1877,24 +1942,23 @@
 ///   int array[4] = {1};
 ///   vector int myvec = (vector int)(1, 2);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CompoundLiteralExpr> compoundLiteralExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundLiteralExpr>
+    compoundLiteralExpr;
 
 /// \brief Matches nullptr literal.
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXNullPtrLiteralExpr> cxxNullPtrLiteralExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNullPtrLiteralExpr>
+    cxxNullPtrLiteralExpr;
 
 /// \brief Matches GNU __null expression.
-const internal::VariadicDynCastAllOfMatcher<Stmt, GNUNullExpr> gnuNullExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, GNUNullExpr>
+    gnuNullExpr;
 
 /// \brief Matches atomic builtins.
 /// Example matches __atomic_load_n(ptr, 1)
 /// \code
 ///   void foo() { int *ptr; __atomic_load_n(ptr, 1); }
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, AtomicExpr> atomicExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, AtomicExpr> atomicExpr;
 
 /// \brief Matches statement expression (GNU extension).
 ///
@@ -1902,7 +1966,7 @@
 /// \code
 ///   int C = ({ int X = 4; X; });
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, StmtExpr> stmtExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, StmtExpr> stmtExpr;
 
 /// \brief Matches binary operator expressions.
 ///
@@ -1910,9 +1974,8 @@
 /// \code
 ///   !(a || b)
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  BinaryOperator> binaryOperator;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, BinaryOperator>
+    binaryOperator;
 
 /// \brief Matches unary operator expressions.
 ///
@@ -1920,9 +1983,8 @@
 /// \code
 ///   !a || b
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  UnaryOperator> unaryOperator;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, UnaryOperator>
+    unaryOperator;
 
 /// \brief Matches conditional operator expressions.
 ///
@@ -1930,9 +1992,8 @@
 /// \code
 ///   (a ? b : c) + 42
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ConditionalOperator> conditionalOperator;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ConditionalOperator>
+    conditionalOperator;
 
 /// \brief Matches binary conditional operator expressions (GNU extension).
 ///
@@ -1940,9 +2001,9 @@
 /// \code
 ///   (a ?: b) + 42;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  BinaryConditionalOperator> binaryConditionalOperator;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   BinaryConditionalOperator>
+    binaryConditionalOperator;
 
 /// \brief Matches opaque value expressions. They are used as helpers
 /// to reference another expressions and can be met
@@ -1952,9 +2013,8 @@
 /// \code
 ///   (a ?: c) + 42;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  OpaqueValueExpr> opaqueValueExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, OpaqueValueExpr>
+    opaqueValueExpr;
 
 /// \brief Matches a C++ static_assert declaration.
 ///
@@ -1969,9 +2029,8 @@
 ///   };
 ///   static_assert(sizeof(S) == sizeof(int));
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Decl,
-  StaticAssertDecl> staticAssertDecl;
+extern const internal::VariadicDynCastAllOfMatcher<Decl, StaticAssertDecl>
+    staticAssertDecl;
 
 /// \brief Matches a reinterpret_cast expression.
 ///
@@ -1983,9 +2042,8 @@
 /// \code
 ///   void* p = reinterpret_cast<char*>(&p);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXReinterpretCastExpr> cxxReinterpretCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXReinterpretCastExpr>
+    cxxReinterpretCastExpr;
 
 /// \brief Matches a C++ static_cast expression.
 ///
@@ -2000,9 +2058,8 @@
 /// \code
 ///   long eight(static_cast<long>(8));
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXStaticCastExpr> cxxStaticCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXStaticCastExpr>
+    cxxStaticCastExpr;
 
 /// \brief Matches a dynamic_cast expression.
 ///
@@ -2016,9 +2073,8 @@
 ///   B b;
 ///   D* p = dynamic_cast<D*>(&b);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXDynamicCastExpr> cxxDynamicCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDynamicCastExpr>
+    cxxDynamicCastExpr;
 
 /// \brief Matches a const_cast expression.
 ///
@@ -2028,9 +2084,8 @@
 ///   const int &r(n);
 ///   int* p = const_cast<int*>(&r);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXConstCastExpr> cxxConstCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXConstCastExpr>
+    cxxConstCastExpr;
 
 /// \brief Matches a C-style cast expression.
 ///
@@ -2038,9 +2093,8 @@
 /// \code
 ///   int i = (int) 2.2f;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CStyleCastExpr> cStyleCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CStyleCastExpr>
+    cStyleCastExpr;
 
 /// \brief Matches explicit cast expressions.
 ///
@@ -2063,17 +2117,15 @@
 /// \code
 ///   long ell = 42;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ExplicitCastExpr> explicitCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ExplicitCastExpr>
+    explicitCastExpr;
 
 /// \brief Matches the implicit cast nodes of Clang's AST.
 ///
 /// This matches many different places, including function call return value
 /// eliding, as well as any type conversions.
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  ImplicitCastExpr> implicitCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, ImplicitCastExpr>
+    implicitCastExpr;
 
 /// \brief Matches any cast nodes of Clang's AST.
 ///
@@ -2088,7 +2140,7 @@
 ///   int i = (0);
 ///   int k = 0;
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CastExpr> castExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CastExpr> castExpr;
 
 /// \brief Matches functional cast expressions
 ///
@@ -2098,9 +2150,8 @@
 ///   Foo g = (Foo) bar;
 ///   Foo h = Foo(bar);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXFunctionalCastExpr> cxxFunctionalCastExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXFunctionalCastExpr>
+    cxxFunctionalCastExpr;
 
 /// \brief Matches functional cast expressions having N != 1 arguments
 ///
@@ -2108,9 +2159,8 @@
 /// \code
 ///   Foo h = Foo(bar, bar);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CXXTemporaryObjectExpr> cxxTemporaryObjectExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTemporaryObjectExpr>
+    cxxTemporaryObjectExpr;
 
 /// \brief Matches predefined identifier expressions [C99 6.4.2.2].
 ///
@@ -2118,9 +2168,8 @@
 /// \code
 ///   printf("%s", __func__);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  PredefinedExpr> predefinedExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, PredefinedExpr>
+    predefinedExpr;
 
 /// \brief Matches C99 designated initializer expressions [C99 6.7.8].
 ///
@@ -2128,9 +2177,8 @@
 /// \code
 ///   point ptarray[10] = { [2].y = 1.0, [0].x = 1.0 };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  DesignatedInitExpr> designatedInitExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, DesignatedInitExpr>
+    designatedInitExpr;
 
 /// \brief Matches designated initializer expressions that contain
 /// a specific number of designators.
@@ -2148,13 +2196,13 @@
 }
 
 /// \brief Matches \c QualTypes in the clang AST.
-const internal::VariadicAllOfMatcher<QualType> qualType;
+extern const internal::VariadicAllOfMatcher<QualType> qualType;
 
 /// \brief Matches \c Types in the clang AST.
-const internal::VariadicAllOfMatcher<Type> type;
+extern const internal::VariadicAllOfMatcher<Type> type;
 
 /// \brief Matches \c TypeLocs in the clang AST.
-const internal::VariadicAllOfMatcher<TypeLoc> typeLoc;
+extern const internal::VariadicAllOfMatcher<TypeLoc> typeLoc;
 
 /// \brief Matches if any of the given matchers matches.
 ///
@@ -2175,23 +2223,23 @@
 /// \c b.
 ///
 /// Usable as: Any Matcher
-const internal::VariadicOperatorMatcherFunc<2, UINT_MAX> eachOf = {
-  internal::DynTypedMatcher::VO_EachOf
-};
+extern const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    eachOf;
 
 /// \brief Matches if any of the given matchers matches.
 ///
 /// Usable as: Any Matcher
-const internal::VariadicOperatorMatcherFunc<2, UINT_MAX> anyOf = {
-  internal::DynTypedMatcher::VO_AnyOf
-};
+extern const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    anyOf;
 
 /// \brief Matches if all given matchers match.
 ///
 /// Usable as: Any Matcher
-const internal::VariadicOperatorMatcherFunc<2, UINT_MAX> allOf = {
-  internal::DynTypedMatcher::VO_AllOf
-};
+extern const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    allOf;
 
 /// \brief Matches sizeof (C99), alignof (C++11) and vec_step (OpenCL)
 ///
@@ -2202,9 +2250,9 @@
 /// \endcode
 /// unaryExprOrTypeTraitExpr()
 ///   matches \c sizeof(x) and \c alignof(x)
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  UnaryExprOrTypeTraitExpr> unaryExprOrTypeTraitExpr;
+extern const internal::VariadicDynCastAllOfMatcher<Stmt,
+                                                   UnaryExprOrTypeTraitExpr>
+    unaryExprOrTypeTraitExpr;
 
 /// \brief Matches unary expressions that have a specific type of argument.
 ///
@@ -2280,9 +2328,9 @@
 /// \code
 ///     anyOf(hasName(a), hasName(b), hasName(c))
 /// \endcode
-const internal::VariadicFunction<internal::Matcher<NamedDecl>, StringRef,
-                                 internal::hasAnyNameFunc>
-    hasAnyName = {};
+extern const internal::VariadicFunction<internal::Matcher<NamedDecl>, StringRef,
+                                        internal::hasAnyNameFunc>
+    hasAnyName;
 
 /// \brief Matches NamedDecl nodes whose fully qualified names contain
 /// a substring matched by the given RegExp.
@@ -2431,8 +2479,7 @@
 /// casts and paren casts. If you are matching with expr then you should
 /// probably consider using ignoringParenImpCasts like:
 /// has(ignoringParenImpCasts(expr())).
-const internal::ArgumentAdaptingMatcherFunc<internal::HasMatcher>
-LLVM_ATTRIBUTE_UNUSED has = {};
+extern const internal::ArgumentAdaptingMatcherFunc<internal::HasMatcher> has;
 
 /// \brief Matches AST nodes that have descendant AST nodes that match the
 /// provided matcher.
@@ -2448,17 +2495,19 @@
 /// DescendantT must be an AST base type.
 ///
 /// Usable as: Any Matcher
-const internal::ArgumentAdaptingMatcherFunc<internal::HasDescendantMatcher>
-LLVM_ATTRIBUTE_UNUSED hasDescendant = {};
+extern const internal::ArgumentAdaptingMatcherFunc<
+    internal::HasDescendantMatcher>
+    hasDescendant;
 
 /// \brief Matches AST nodes that have child AST nodes that match the
 /// provided matcher.
 ///
-/// Example matches X, Y
+/// Example matches X, Y, Y::X, Z::Y, Z::Y::X
 ///   (matcher = cxxRecordDecl(forEach(cxxRecordDecl(hasName("X")))
 /// \code
-///   class X {};  // Matches X, because X::X is a class of name X inside X.
-///   class Y { class X {}; };
+///   class X {};
+///   class Y { class X {}; };  // Matches Y, because Y::X is a class of name X
+///                             // inside Y.
 ///   class Z { class Y { class X {}; }; };  // Does not match Z.
 /// \endcode
 ///
@@ -2468,17 +2517,18 @@
 /// matches instead of only on the first one.
 ///
 /// Usable as: Any Matcher
-const internal::ArgumentAdaptingMatcherFunc<internal::ForEachMatcher>
-LLVM_ATTRIBUTE_UNUSED forEach = {};
+extern const internal::ArgumentAdaptingMatcherFunc<internal::ForEachMatcher>
+    forEach;
 
 /// \brief Matches AST nodes that have descendant AST nodes that match the
 /// provided matcher.
 ///
-/// Example matches X, A, B, C
+/// Example matches X, A, A::X, B, B::C, B::C::X
 ///   (matcher = cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X")))))
 /// \code
-///   class X {};  // Matches X, because X::X is a class of name X inside X.
-///   class A { class X {}; };
+///   class X {};
+///   class A { class X {}; };  // Matches A, because A::X is a class of name
+///                             // X inside A.
 ///   class B { class C { class X {}; }; };
 /// \endcode
 ///
@@ -2497,8 +2547,9 @@
 /// \endcode
 ///
 /// Usable as: Any Matcher
-const internal::ArgumentAdaptingMatcherFunc<internal::ForEachDescendantMatcher>
-LLVM_ATTRIBUTE_UNUSED forEachDescendant = {};
+extern const internal::ArgumentAdaptingMatcherFunc<
+    internal::ForEachDescendantMatcher>
+    forEachDescendant;
 
 /// \brief Matches if the node or any descendant matches.
 ///
@@ -2531,11 +2582,11 @@
 /// \c compoundStmt(hasParent(ifStmt())) matches "{ int x = 43; }".
 ///
 /// Usable as: Any Matcher
-const internal::ArgumentAdaptingMatcherFunc<
+extern const internal::ArgumentAdaptingMatcherFunc<
     internal::HasParentMatcher,
     internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
     internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
-    LLVM_ATTRIBUTE_UNUSED hasParent = {};
+    hasParent;
 
 /// \brief Matches AST nodes that have an ancestor that matches the provided
 /// matcher.
@@ -2548,11 +2599,11 @@
 /// \c expr(integerLiteral(hasAncestor(ifStmt()))) matches \c 42, but not 43.
 ///
 /// Usable as: Any Matcher
-const internal::ArgumentAdaptingMatcherFunc<
+extern const internal::ArgumentAdaptingMatcherFunc<
     internal::HasAncestorMatcher,
     internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
     internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
-    LLVM_ATTRIBUTE_UNUSED hasAncestor = {};
+    hasAncestor;
 
 /// \brief Matches if the provided matcher does not match.
 ///
@@ -2563,9 +2614,7 @@
 /// \endcode
 ///
 /// Usable as: Any Matcher
-const internal::VariadicOperatorMatcherFunc<1, 1> unless = {
-  internal::DynTypedMatcher::VO_UnaryNot
-};
+extern const internal::VariadicOperatorMatcherFunc<1, 1> unless;
 
 /// \brief Matches a node if the declaration associated with that node
 /// matches the given matcher.
@@ -2634,7 +2683,7 @@
 ///   (matcher = cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("Y"))))))
 /// \code
 ///   class Y { public: void x(); };
-///   void z() { Y y; y.x(); }",
+///   void z() { Y y; y.x(); }
 /// \endcode
 ///
 /// FIXME: Overload to allow directly matching types?
@@ -2794,8 +2843,10 @@
 AST_POLYMORPHIC_MATCHER_P_OVERLOAD(
     hasType, AST_POLYMORPHIC_SUPPORTED_TYPES(Expr, TypedefNameDecl, ValueDecl),
     internal::Matcher<QualType>, InnerMatcher, 0) {
-  return InnerMatcher.matches(internal::getUnderlyingType(Node),
-                              Finder, Builder);
+  QualType QT = internal::getUnderlyingType(Node);
+  if (!QT.isNull())
+    return InnerMatcher.matches(QT, Finder, Builder);
+  return false;
 }
 
 /// \brief Overloaded to match the declaration of the expression's or value
@@ -3508,11 +3559,11 @@
 ///   void k(int x, int y, int z, ...);
 /// \endcode
 /// functionDecl(parameterCountIs(2))
-///   matches void g(int i, int j) {}
+///   matches \c g and \c h
 /// functionProtoType(parameterCountIs(2))
-///   matches void h(int i, int j)
+///   matches \c g and \c h
 /// functionProtoType(parameterCountIs(3))
-///   matches void k(int x, int y, int z, ...);
+///   matches \c k
 AST_POLYMORPHIC_MATCHER_P(parameterCountIs,
                           AST_POLYMORPHIC_SUPPORTED_TYPES(FunctionDecl,
                                                           FunctionProtoType),
@@ -3520,6 +3571,22 @@
   return Node.getNumParams() == N;
 }
 
+/// \brief Matches \c FunctionDecls that have a noreturn attribute.
+///
+/// Given
+/// \code
+///   void nope();
+///   [[noreturn]] void a();
+///   __attribute__((noreturn)) void b();
+///   struct c { [[noreturn]] c(); };
+/// \endcode
+/// functionDecl(isNoReturn())
+///   matches all of those except
+/// \code
+///   void nope();
+/// \endcode
+AST_MATCHER(FunctionDecl, isNoReturn) { return Node.isNoReturn(); }
+
 /// \brief Matches the return type of a function declaration.
 ///
 /// Given:
@@ -3993,7 +4060,6 @@
 /// \code
 /// int a = b ?: 1;
 /// \endcode
-
 AST_POLYMORPHIC_MATCHER_P(hasSourceExpression,
                           AST_POLYMORPHIC_SUPPORTED_TYPES(CastExpr,
                                                           OpaqueValueExpr),
@@ -4113,11 +4179,19 @@
 ///   extern int vb;  // Doesn't match, as it doesn't define the variable.
 ///   void fa() {}
 ///   void fb();  // Doesn't match, as it has no body.
+///   @interface X
+///   - (void)ma; // Doesn't match, interface is declaration.
+///   @end
+///   @implementation X
+///   - (void)ma {}
+///   @end
 /// \endcode
 ///
-/// Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>
+/// Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>,
+///   Matcher<ObjCMethodDecl>
 AST_POLYMORPHIC_MATCHER(isDefinition,
                         AST_POLYMORPHIC_SUPPORTED_TYPES(TagDecl, VarDecl,
+                                                        ObjCMethodDecl,
                                                         FunctionDecl)) {
   return Node.isThisDeclarationADefinition();
 }
@@ -4577,6 +4651,10 @@
 /// \code
 ///   template <typename T> class X {}; class A {}; template class X<A>;
 /// \endcode
+/// or
+/// \code
+///   template <typename T> class X {}; class A {}; extern template class X<A>;
+/// \endcode
 /// cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
 ///   matches the template instantiation of X<A>.
 ///
@@ -4594,7 +4672,9 @@
                                                         CXXRecordDecl)) {
   return (Node.getTemplateSpecializationKind() == TSK_ImplicitInstantiation ||
           Node.getTemplateSpecializationKind() ==
-          TSK_ExplicitInstantiationDefinition);
+              TSK_ExplicitInstantiationDefinition ||
+          Node.getTemplateSpecializationKind() ==
+              TSK_ExplicitInstantiationDeclaration);
 }
 
 /// \brief Matches declarations that are template instantiations or are inside
@@ -4684,6 +4764,9 @@
   return Node.isVoidType();
 }
 
+template <typename NodeType>
+using AstTypeMatcher = internal::VariadicDynCastAllOfMatcher<Type, NodeType>;
+
 /// \brief Matches builtin Types.
 ///
 /// Given
@@ -4696,7 +4779,7 @@
 /// \endcode
 /// builtinType()
 ///   matches "int b", "float c" and "bool d"
-AST_TYPE_MATCHER(BuiltinType, builtinType);
+extern const AstTypeMatcher<BuiltinType> builtinType;
 
 /// \brief Matches all kinds of arrays.
 ///
@@ -4708,7 +4791,7 @@
 /// \endcode
 /// arrayType()
 ///   matches "int a[]", "int b[4]" and "int c[a[0]]";
-AST_TYPE_MATCHER(ArrayType, arrayType);
+extern const AstTypeMatcher<ArrayType> arrayType;
 
 /// \brief Matches C99 complex types.
 ///
@@ -4718,7 +4801,7 @@
 /// \endcode
 /// complexType()
 ///   matches "_Complex float f"
-AST_TYPE_MATCHER(ComplexType, complexType);
+extern const AstTypeMatcher<ComplexType> complexType;
 
 /// \brief Matches any real floating-point type (float, double, long double).
 ///
@@ -4746,9 +4829,9 @@
 ///   matches "int b[7]"
 ///
 /// Usable as: Matcher<ArrayType>, Matcher<ComplexType>
-AST_TYPELOC_TRAVERSE_MATCHER(hasElementType, getElement,
-                             AST_POLYMORPHIC_SUPPORTED_TYPES(ArrayType,
-                                                             ComplexType));
+AST_TYPELOC_TRAVERSE_MATCHER_DECL(hasElementType, getElement,
+                                  AST_POLYMORPHIC_SUPPORTED_TYPES(ArrayType,
+                                                                  ComplexType));
 
 /// \brief Matches C arrays with a specified constant size.
 ///
@@ -4762,7 +4845,7 @@
 /// \endcode
 /// constantArrayType()
 ///   matches "int a[2]"
-AST_TYPE_MATCHER(ConstantArrayType, constantArrayType);
+extern const AstTypeMatcher<ConstantArrayType> constantArrayType;
 
 /// \brief Matches nodes that have the specified size.
 ///
@@ -4797,7 +4880,7 @@
 /// \endcode
 /// dependentSizedArrayType
 ///   matches "T data[Size]"
-AST_TYPE_MATCHER(DependentSizedArrayType, dependentSizedArrayType);
+extern const AstTypeMatcher<DependentSizedArrayType> dependentSizedArrayType;
 
 /// \brief Matches C arrays with unspecified size.
 ///
@@ -4809,7 +4892,7 @@
 /// \endcode
 /// incompleteArrayType()
 ///   matches "int a[]" and "int c[]"
-AST_TYPE_MATCHER(IncompleteArrayType, incompleteArrayType);
+extern const AstTypeMatcher<IncompleteArrayType> incompleteArrayType;
 
 /// \brief Matches C arrays with a specified size that is not an
 /// integer-constant-expression.
@@ -4824,7 +4907,7 @@
 /// \endcode
 /// variableArrayType()
 ///   matches "int c[a[0]]"
-AST_TYPE_MATCHER(VariableArrayType, variableArrayType);
+extern const AstTypeMatcher<VariableArrayType> variableArrayType;
 
 /// \brief Matches \c VariableArrayType nodes that have a specific size
 /// expression.
@@ -4851,7 +4934,7 @@
 /// \endcode
 /// atomicType()
 ///   matches "_Atomic(int) i"
-AST_TYPE_MATCHER(AtomicType, atomicType);
+extern const AstTypeMatcher<AtomicType> atomicType;
 
 /// \brief Matches atomic types with a specific value type.
 ///
@@ -4864,8 +4947,8 @@
 ///  matches "_Atomic(int) i"
 ///
 /// Usable as: Matcher<AtomicType>
-AST_TYPELOC_TRAVERSE_MATCHER(hasValueType, getValue,
-                             AST_POLYMORPHIC_SUPPORTED_TYPES(AtomicType));
+AST_TYPELOC_TRAVERSE_MATCHER_DECL(hasValueType, getValue,
+                                  AST_POLYMORPHIC_SUPPORTED_TYPES(AtomicType));
 
 /// \brief Matches types nodes representing C++11 auto types.
 ///
@@ -4877,7 +4960,7 @@
 /// \endcode
 /// autoType()
 ///   matches "auto n" and "auto i"
-AST_TYPE_MATCHER(AutoType, autoType);
+extern const AstTypeMatcher<AutoType> autoType;
 
 /// \brief Matches \c AutoType nodes where the deduced type is a specific type.
 ///
@@ -4905,7 +4988,7 @@
 /// \endcode
 /// functionType()
 ///   matches "int (*f)(int)" and the type of "g".
-AST_TYPE_MATCHER(FunctionType, functionType);
+extern const AstTypeMatcher<FunctionType> functionType;
 
 /// \brief Matches \c FunctionProtoType nodes.
 ///
@@ -4917,7 +5000,7 @@
 /// functionProtoType()
 ///   matches "int (*f)(int)" and the type of "g" in C++ mode.
 ///   In C mode, "g" is not matched because it does not contain a prototype.
-AST_TYPE_MATCHER(FunctionProtoType, functionProtoType);
+extern const AstTypeMatcher<FunctionProtoType> functionProtoType;
 
 /// \brief Matches \c ParenType nodes.
 ///
@@ -4929,7 +5012,7 @@
 ///
 /// \c varDecl(hasType(pointsTo(parenType()))) matches \c ptr_to_array but not
 /// \c array_of_ptrs.
-AST_TYPE_MATCHER(ParenType, parenType);
+extern const AstTypeMatcher<ParenType> parenType;
 
 /// \brief Matches \c ParenType nodes where the inner type is a specific type.
 ///
@@ -4950,7 +5033,7 @@
 /// "void (^)(int)".
 ///
 /// The \c pointee is always required to be a \c FunctionType.
-AST_TYPE_MATCHER(BlockPointerType, blockPointerType);
+extern const AstTypeMatcher<BlockPointerType> blockPointerType;
 
 /// \brief Matches member pointer types.
 /// Given
@@ -4960,7 +5043,7 @@
 /// \endcode
 /// memberPointerType()
 ///   matches "A::* ptr"
-AST_TYPE_MATCHER(MemberPointerType, memberPointerType);
+extern const AstTypeMatcher<MemberPointerType> memberPointerType;
 
 /// \brief Matches pointer types, but does not match Objective-C object pointer
 /// types.
@@ -4977,7 +5060,7 @@
 /// \endcode
 /// pointerType()
 ///   matches "int *a", but does not match "Foo *f".
-AST_TYPE_MATCHER(PointerType, pointerType);
+extern const AstTypeMatcher<PointerType> pointerType;
 
 /// \brief Matches an Objective-C object pointer type, which is different from
 /// a pointer type, despite being syntactically similar.
@@ -4992,7 +5075,7 @@
 /// \endcode
 /// pointerType()
 ///   matches "Foo *f", but does not match "int *a".
-AST_TYPE_MATCHER(ObjCObjectPointerType, objcObjectPointerType);
+extern const AstTypeMatcher<ObjCObjectPointerType> objcObjectPointerType;
 
 /// \brief Matches both lvalue and rvalue reference types.
 ///
@@ -5008,7 +5091,7 @@
 /// \endcode
 ///
 /// \c referenceType() matches the types of \c b, \c c, \c d, \c e, and \c f.
-AST_TYPE_MATCHER(ReferenceType, referenceType);
+extern const AstTypeMatcher<ReferenceType> referenceType;
 
 /// \brief Matches lvalue reference types.
 ///
@@ -5025,7 +5108,7 @@
 ///
 /// \c lValueReferenceType() matches the types of \c b, \c d, and \c e. \c e is
 /// matched since the type is deduced as int& by reference collapsing rules.
-AST_TYPE_MATCHER(LValueReferenceType, lValueReferenceType);
+extern const AstTypeMatcher<LValueReferenceType> lValueReferenceType;
 
 /// \brief Matches rvalue reference types.
 ///
@@ -5042,7 +5125,7 @@
 ///
 /// \c rValueReferenceType() matches the types of \c c and \c f. \c e is not
 /// matched as it is deduced to int& by reference collapsing rules.
-AST_TYPE_MATCHER(RValueReferenceType, rValueReferenceType);
+extern const AstTypeMatcher<RValueReferenceType> rValueReferenceType;
 
 /// \brief Narrows PointerType (and similar) matchers to those where the
 /// \c pointee matches a given matcher.
@@ -5058,11 +5141,10 @@
 ///
 /// Usable as: Matcher<BlockPointerType>, Matcher<MemberPointerType>,
 ///   Matcher<PointerType>, Matcher<ReferenceType>
-AST_TYPELOC_TRAVERSE_MATCHER(pointee, getPointee,
-                             AST_POLYMORPHIC_SUPPORTED_TYPES(BlockPointerType,
-                                                             MemberPointerType,
-                                                             PointerType,
-                                                             ReferenceType));
+AST_TYPELOC_TRAVERSE_MATCHER_DECL(
+    pointee, getPointee,
+    AST_POLYMORPHIC_SUPPORTED_TYPES(BlockPointerType, MemberPointerType,
+                                    PointerType, ReferenceType));
 
 /// \brief Matches typedef types.
 ///
@@ -5072,7 +5154,7 @@
 /// \endcode
 /// typedefType()
 ///   matches "typedef int X"
-AST_TYPE_MATCHER(TypedefType, typedefType);
+extern const AstTypeMatcher<TypedefType> typedefType;
 
 /// \brief Matches enum types.
 ///
@@ -5087,7 +5169,7 @@
 //
 /// \c enumType() matches the type of the variable declarations of both \c c and
 /// \c s.
-AST_TYPE_MATCHER(EnumType, enumType);
+extern const AstTypeMatcher<EnumType> enumType;
 
 /// \brief Matches template specialization types.
 ///
@@ -5102,7 +5184,8 @@
 ///
 /// \c templateSpecializationType() matches the type of the explicit
 /// instantiation in \c A and the type of the variable declaration in \c B.
-AST_TYPE_MATCHER(TemplateSpecializationType, templateSpecializationType);
+extern const AstTypeMatcher<TemplateSpecializationType>
+    templateSpecializationType;
 
 /// \brief Matches types nodes representing unary type transformations.
 ///
@@ -5112,7 +5195,7 @@
 /// \endcode
 /// unaryTransformType()
 ///   matches "__underlying_type(T)"
-AST_TYPE_MATCHER(UnaryTransformType, unaryTransformType);
+extern const AstTypeMatcher<UnaryTransformType> unaryTransformType;
 
 /// \brief Matches record types (e.g. structs, classes).
 ///
@@ -5127,7 +5210,7 @@
 ///
 /// \c recordType() matches the type of the variable declarations of both \c c
 /// and \c s.
-AST_TYPE_MATCHER(RecordType, recordType);
+extern const AstTypeMatcher<RecordType> recordType;
 
 /// \brief Matches tag types (record and enum types).
 ///
@@ -5142,7 +5225,7 @@
 ///
 /// \c tagType() matches the type of the variable declarations of both \c e
 /// and \c c.
-AST_TYPE_MATCHER(TagType, tagType);
+extern const AstTypeMatcher<TagType> tagType;
 
 /// \brief Matches types specified with an elaborated type keyword or with a
 /// qualified name.
@@ -5162,7 +5245,7 @@
 ///
 /// \c elaboratedType() matches the type of the variable declarations of both
 /// \c c and \c d.
-AST_TYPE_MATCHER(ElaboratedType, elaboratedType);
+extern const AstTypeMatcher<ElaboratedType> elaboratedType;
 
 /// \brief Matches ElaboratedTypes whose qualifier, a NestedNameSpecifier,
 /// matches \c InnerMatcher if the qualifier exists.
@@ -5219,7 +5302,8 @@
 /// \endcode
 ///
 /// \c substTemplateTypeParmType() matches the type of 't' but not '1'
-AST_TYPE_MATCHER(SubstTemplateTypeParmType, substTemplateTypeParmType);
+extern const AstTypeMatcher<SubstTemplateTypeParmType>
+    substTemplateTypeParmType;
 
 /// \brief Matches template type parameter substitutions that have a replacement
 /// type that matches the provided matcher.
@@ -5244,7 +5328,7 @@
 /// \code
 ///   template <typename T> void f(int i);
 /// \endcode
-AST_TYPE_MATCHER(TemplateTypeParmType, templateTypeParmType);
+extern const AstTypeMatcher<TemplateTypeParmType> templateTypeParmType;
 
 /// \brief Matches injected class name types.
 ///
@@ -5256,7 +5340,7 @@
 ///     void g(S<T> s);
 ///   };
 /// \endcode
-AST_TYPE_MATCHER(InjectedClassNameType, injectedClassNameType);
+extern const AstTypeMatcher<InjectedClassNameType> injectedClassNameType;
 
 /// \brief Matches decayed type
 /// Example matches i[] in declaration of f.
@@ -5268,7 +5352,7 @@
 ///     i[1] = 0;
 ///   }
 /// \endcode
-AST_TYPE_MATCHER(DecayedType, decayedType);
+extern const AstTypeMatcher<DecayedType> decayedType;
 
 /// \brief Matches the decayed type, whos decayed type matches \c InnerMatcher
 AST_MATCHER_P(DecayedType, hasDecayedType, internal::Matcher<QualType>,
@@ -5309,11 +5393,12 @@
 /// \endcode
 /// nestedNameSpecifier()
 ///   matches "ns::" and both "A::"
-const internal::VariadicAllOfMatcher<NestedNameSpecifier> nestedNameSpecifier;
+extern const internal::VariadicAllOfMatcher<NestedNameSpecifier>
+    nestedNameSpecifier;
 
 /// \brief Same as \c nestedNameSpecifier but matches \c NestedNameSpecifierLoc.
-const internal::VariadicAllOfMatcher<
-  NestedNameSpecifierLoc> nestedNameSpecifierLoc;
+extern const internal::VariadicAllOfMatcher<NestedNameSpecifierLoc>
+    nestedNameSpecifierLoc;
 
 /// \brief Matches \c NestedNameSpecifierLocs for which the given inner
 /// NestedNameSpecifier-matcher matches.
@@ -5666,17 +5751,14 @@
   return false;
 }
 
-
 /// \brief Matches CUDA kernel call expression.
 ///
 /// Example matches,
 /// \code
 ///   kernel<<<i,j>>>();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<
-  Stmt,
-  CUDAKernelCallExpr> cudaKernelCallExpr;
-
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, CUDAKernelCallExpr>
+    cudaKernelCallExpr;
 
 /// \brief Matches expressions that resolve to a null pointer constant, such as
 /// GNU's __null, C++11's nullptr, or C's NULL macro.
@@ -5761,7 +5843,78 @@
   return Node.hasExternalFormalLinkage();
 }
 
-} // end namespace ast_matchers
-} // end namespace clang
+/// \brief Matches a declaration that has default arguments.
+///
+/// Example matches y (matcher = parmVarDecl(hasDefaultArgument()))
+/// \code
+/// void x(int val) {}
+/// void y(int val = 0) {}
+/// \endcode
+AST_MATCHER(ParmVarDecl, hasDefaultArgument) { 
+  return Node.hasDefaultArg(); 
+}
 
-#endif
+/// \brief Matches array new expressions.
+///
+/// Given:
+/// \code
+///   MyClass *p1 = new MyClass[10];
+/// \endcode
+/// cxxNewExpr(isArray())
+///   matches the expression 'new MyClass[10]'.
+AST_MATCHER(CXXNewExpr, isArray) {
+  return Node.isArray();
+}
+
+/// \brief Matches array new expressions with a given array size.
+///
+/// Given:
+/// \code
+///   MyClass *p1 = new MyClass[10];
+/// \endcode
+/// cxxNewExpr(hasArraySize(intgerLiteral(equals(10))))
+///   matches the expression 'new MyClass[10]'.
+AST_MATCHER_P(CXXNewExpr, hasArraySize, internal::Matcher<Expr>, InnerMatcher) {
+  return Node.isArray() &&
+    InnerMatcher.matches(*Node.getArraySize(), Finder, Builder);
+}
+
+/// \brief Matches a class declaration that is defined.
+///
+/// Example matches x (matcher = cxxRecordDecl(hasDefinition()))
+/// \code
+/// class x {};
+/// class y;
+/// \endcode
+AST_MATCHER(CXXRecordDecl, hasDefinition) {
+  return Node.hasDefinition();
+}
+
+/// \brief Matches C++11 scoped enum declaration.
+///
+/// Example matches Y (matcher = enumDecl(isScoped()))
+/// \code
+/// enum X {};
+/// enum class Y {};
+/// \endcode
+AST_MATCHER(EnumDecl, isScoped) {
+  return Node.isScoped();
+}
+
+/// \brief Matches a function declared with a trailing return type.
+///
+/// Example matches Y (matcher = functionDecl(hasTrailingReturn()))
+/// \code
+/// int X() {}
+/// auto Y() -> int {}
+/// \endcode
+AST_MATCHER(FunctionDecl, hasTrailingReturn) {
+  if (const auto *F = Node.getType()->getAs<FunctionProtoType>())
+    return F->hasTrailingReturn();
+  return false;
+}
+
+} // namespace ast_matchers
+} // namespace clang
+
+#endif // LLVM_CLANG_ASTMATCHERS_ASTMATCHERS_H
diff --git a/include/clang/ASTMatchers/ASTMatchersInternal.h b/include/clang/ASTMatchers/ASTMatchersInternal.h
index cd59bbc..8bd61a7 100644
--- a/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -1,4 +1,4 @@
-//===--- ASTMatchersInternal.h - Structural query framework -----*- C++ -*-===//
+//===- ASTMatchersInternal.h - Structural query framework -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -38,23 +38,42 @@
 #include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
-#include "clang/AST/ExprObjC.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Stmt.h"
-#include "clang/AST/StmtCXX.h"
-#include "clang/AST/StmtObjC.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ManagedStatic.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <map>
 #include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
 namespace clang {
+
+class ASTContext;
+
 namespace ast_matchers {
 
 class BoundNodes;
@@ -158,7 +177,7 @@
   /// Note that we're using std::map here, as for memoization:
   /// - we need a comparison operator
   /// - we need an assignment operator
-  typedef std::map<std::string, ast_type_traits::DynTypedNode> IDToNodeMap;
+  using IDToNodeMap = std::map<std::string, ast_type_traits::DynTypedNode>;
 
   const IDToNodeMap &getMap() const {
     return NodeMap;
@@ -188,7 +207,7 @@
   /// BoundNodesTree.
   class Visitor {
   public:
-    virtual ~Visitor() {}
+    virtual ~Visitor() = default;
 
     /// \brief Called multiple times during a single call to VisitMatches(...).
     ///
@@ -248,7 +267,7 @@
 class DynMatcherInterface
     : public llvm::ThreadSafeRefCountedBase<DynMatcherInterface> {
 public:
-  virtual ~DynMatcherInterface() {}
+  virtual ~DynMatcherInterface() = default;
 
   /// \brief Returns true if \p DynNode can be matched.
   ///
@@ -317,26 +336,29 @@
   /// \brief Takes ownership of the provided implementation pointer.
   template <typename T>
   DynTypedMatcher(MatcherInterface<T> *Implementation)
-      : AllowBind(false),
-        SupportedKind(ast_type_traits::ASTNodeKind::getFromNodeKind<T>()),
+      : SupportedKind(ast_type_traits::ASTNodeKind::getFromNodeKind<T>()),
         RestrictKind(SupportedKind), Implementation(Implementation) {}
 
   /// \brief Construct from a variadic function.
   enum VariadicOperator {
     /// \brief Matches nodes for which all provided matchers match.
     VO_AllOf,
+
     /// \brief Matches nodes for which at least one of the provided matchers
     /// matches.
     VO_AnyOf,
+
     /// \brief Matches nodes for which at least one of the provided matchers
     /// matches, but doesn't stop at the first match.
     VO_EachOf,
+
     /// \brief Matches nodes that do not match the provided matcher.
     ///
     /// Uses the variadic matcher interface, but fails if
     /// InnerMatchers.size() != 1.
     VO_UnaryNot
   };
+
   static DynTypedMatcher
   constructVariadic(VariadicOperator Op,
                     ast_type_traits::ASTNodeKind SupportedKind,
@@ -382,7 +404,7 @@
   /// include both in the ID to make it unique.
   ///
   /// \c MatcherIDType supports operator< and provides strict weak ordering.
-  typedef std::pair<ast_type_traits::ASTNodeKind, uint64_t> MatcherIDType;
+  using MatcherIDType = std::pair<ast_type_traits::ASTNodeKind, uint64_t>;
   MatcherIDType getID() const {
     /// FIXME: Document the requirements this imposes on matcher
     /// implementations (no new() implementation_ during a Matches()).
@@ -428,13 +450,12 @@
  DynTypedMatcher(ast_type_traits::ASTNodeKind SupportedKind,
                  ast_type_traits::ASTNodeKind RestrictKind,
                  IntrusiveRefCntPtr<DynMatcherInterface> Implementation)
-     : AllowBind(false),
-       SupportedKind(SupportedKind),
-       RestrictKind(RestrictKind),
+     : SupportedKind(SupportedKind), RestrictKind(RestrictKind),
        Implementation(std::move(Implementation)) {}
 
-  bool AllowBind;
+  bool AllowBind = false;
   ast_type_traits::ASTNodeKind SupportedKind;
+
   /// \brief A potentially stricter node kind.
   ///
   /// It allows to perform implicit and dynamic cast of matchers without
@@ -545,6 +566,7 @@
 private:
   // For Matcher<T> <=> Matcher<U> conversions.
   template <typename U> friend class Matcher;
+
   // For DynTypedMatcher::unconditionalConvertTo<T>.
   friend class DynTypedMatcher;
 
@@ -618,8 +640,8 @@
 // Metafunction to determine if type T has a member called getDecl.
 template <typename Ty>
 class has_getDecl {
-  typedef char yes[1];
-  typedef char no[2];
+  using yes = char[1];
+  using no = char[2];
 
   template <typename Inner>
   static yes& test(Inner *I, decltype(I->getDecl()) * = nullptr);
@@ -741,8 +763,7 @@
   /// matcher matches on it.
   bool matchesSpecialized(const Type &Node, ASTMatchFinder *Finder,
                           BoundNodesTreeBuilder *Builder) const {
-
-    // DeducedType does not have declarations of its own, so
+    // DeducedType does not have declarations of its own, so
     // match the deduced type instead.
     const Type *EffectiveType = &Node;
     if (const auto *S = dyn_cast<DeducedType>(&Node)) {
@@ -917,6 +938,7 @@
   enum TraversalKind {
     /// Will traverse any child nodes.
     TK_AsIs,
+
     /// Will not traverse implicit casts and parentheses.
     TK_IgnoreImplicitCastsAndParentheses
   };
@@ -925,6 +947,7 @@
   enum BindKind {
     /// Stop at the first match and only bind the first match.
     BK_First,
+
     /// Create results for all combinations of bindings that match.
     BK_All
   };
@@ -933,11 +956,12 @@
   enum AncestorMatchMode {
     /// All ancestors.
     AMM_All,
+
     /// Direct parent only.
     AMM_ParentOnly
   };
 
-  virtual ~ASTMatchFinder() {}
+  virtual ~ASTMatchFinder() = default;
 
   /// \brief Returns true if the given class is directly or indirectly derived
   /// from a base type matching \c base.
@@ -960,7 +984,7 @@
                   std::is_base_of<TypeLoc, T>::value ||
                   std::is_base_of<QualType, T>::value,
                   "unsupported type for recursive matching");
-   return matchesChildOf(ast_type_traits::DynTypedNode::create(Node),
+    return matchesChildOf(ast_type_traits::DynTypedNode::create(Node),
                           Matcher, Builder, Traverse, Bind);
   }
 
@@ -1023,17 +1047,17 @@
 
 template <typename T1, typename... Ts> struct TypeList<T1, Ts...> {
   /// \brief The first type on the list.
-  typedef T1 head;
+  using head = T1;
 
   /// \brief A sublist with the tail. ie everything but the head.
   ///
   /// This type is used to do recursion. TypeList<>/EmptyTypeList indicates the
   /// end of the list.
-  typedef TypeList<Ts...> tail;
+  using tail = TypeList<Ts...>;
 };
 
 /// \brief The empty type list.
-typedef TypeList<> EmptyTypeList;
+using EmptyTypeList = TypeList<>;
 
 /// \brief Helper meta-function to determine if some type \c T is present or
 ///   a parent type in the list.
@@ -1051,8 +1075,9 @@
 /// \brief A "type list" that contains all types.
 ///
 /// Useful for matchers like \c anything and \c unless.
-typedef TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc,
-                 QualType, Type, TypeLoc, CXXCtorInitializer> AllNodeBaseTypes;
+using AllNodeBaseTypes =
+    TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc, QualType,
+             Type, TypeLoc, CXXCtorInitializer>;
 
 /// \brief Helper meta-function to extract the argument out of a function of
 ///   type void(Arg).
@@ -1060,21 +1085,22 @@
 /// See AST_POLYMORPHIC_SUPPORTED_TYPES for details.
 template <class T> struct ExtractFunctionArgMeta;
 template <class T> struct ExtractFunctionArgMeta<void(T)> {
-  typedef T type;
+  using type = T;
 };
 
 /// \brief Default type lists for ArgumentAdaptingMatcher matchers.
-typedef AllNodeBaseTypes AdaptativeDefaultFromTypes;
-typedef TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc,
-                 TypeLoc, QualType> AdaptativeDefaultToTypes;
+using AdaptativeDefaultFromTypes = AllNodeBaseTypes;
+using AdaptativeDefaultToTypes =
+    TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc, TypeLoc,
+             QualType>;
 
 /// \brief All types that are supported by HasDeclarationMatcher above.
-typedef TypeList<CallExpr, CXXConstructExpr, CXXNewExpr, DeclRefExpr, EnumType,
-                 ElaboratedType, InjectedClassNameType, LabelStmt,
-                 AddrLabelExpr, MemberExpr, QualType, RecordType, TagType,
-                 TemplateSpecializationType, TemplateTypeParmType, TypedefType,
-                 UnresolvedUsingType>
-    HasDeclarationSupportedTypes;
+using HasDeclarationSupportedTypes =
+    TypeList<CallExpr, CXXConstructExpr, CXXNewExpr, DeclRefExpr, EnumType,
+             ElaboratedType, InjectedClassNameType, LabelStmt, AddrLabelExpr,
+             MemberExpr, QualType, RecordType, TagType,
+             TemplateSpecializationType, TemplateTypeParmType, TypedefType,
+             UnresolvedUsingType>;
 
 /// \brief Converts a \c Matcher<T> to a matcher of desired type \c To by
 /// "adapting" a \c To into a \c T.
@@ -1098,7 +1124,7 @@
     explicit Adaptor(const Matcher<T> &InnerMatcher)
         : InnerMatcher(InnerMatcher) {}
 
-    typedef ToTypes ReturnTypes;
+    using ReturnTypes = ToTypes;
 
     template <typename To> operator Matcher<To>() const {
       return Matcher<To>(new ArgumentAdapterT<To, T>(InnerMatcher));
@@ -1135,7 +1161,8 @@
           typename ReturnTypesF = void(AllNodeBaseTypes)>
 class PolymorphicMatcherWithParam0 {
 public:
-  typedef typename ExtractFunctionArgMeta<ReturnTypesF>::type ReturnTypes;
+  using ReturnTypes = typename ExtractFunctionArgMeta<ReturnTypesF>::type;
+
   template <typename T>
   operator Matcher<T>() const {
     static_assert(TypeListContainsSuperOf<ReturnTypes, T>::value,
@@ -1152,7 +1179,7 @@
   explicit PolymorphicMatcherWithParam1(const P1 &Param1)
       : Param1(Param1) {}
 
-  typedef typename ExtractFunctionArgMeta<ReturnTypesF>::type ReturnTypes;
+  using ReturnTypes = typename ExtractFunctionArgMeta<ReturnTypesF>::type;
 
   template <typename T>
   operator Matcher<T>() const {
@@ -1173,7 +1200,7 @@
   PolymorphicMatcherWithParam2(const P1 &Param1, const P2 &Param2)
       : Param1(Param1), Param2(Param2) {}
 
-  typedef typename ExtractFunctionArgMeta<ReturnTypesF>::type ReturnTypes;
+  using ReturnTypes = typename ExtractFunctionArgMeta<ReturnTypesF>::type;
 
   template <typename T>
   operator Matcher<T>() const {
@@ -1192,8 +1219,8 @@
 /// This is useful when a matcher syntactically requires a child matcher,
 /// but the context doesn't care. See for example: anything().
 class TrueMatcher {
- public:
-  typedef AllNodeBaseTypes ReturnTypes;
+public:
+  using ReturnTypes = AllNodeBaseTypes;
 
   template <typename T>
   operator Matcher<T>() const {
@@ -1239,7 +1266,6 @@
 /// ChildT must be an AST base type.
 template <typename T, typename ChildT>
 class HasMatcher : public WrapperMatcherInterface<T> {
-
 public:
   explicit HasMatcher(const Matcher<ChildT> &ChildMatcher)
       : HasMatcher::WrapperMatcherInterface(ChildMatcher) {}
@@ -1333,7 +1359,7 @@
 BindableMatcher<T> makeAllOfComposite(
     ArrayRef<const Matcher<T> *> InnerMatchers) {
   // For the size() == 0 case, we return a "true" matcher.
-  if (InnerMatchers.size() == 0) {
+  if (InnerMatchers.empty()) {
     return BindableMatcher<T>(TrueMatcher());
   }
   // For the size() == 1 case, we simply return that one matcher.
@@ -1342,7 +1368,8 @@
     return BindableMatcher<T>(*InnerMatchers[0]);
   }
 
-  typedef llvm::pointee_iterator<const Matcher<T> *const *> PI;
+  using PI = llvm::pointee_iterator<const Matcher<T> *const *>;
+
   std::vector<DynTypedMatcher> DynMatchers(PI(InnerMatchers.begin()),
                                            PI(InnerMatchers.end()));
   return BindableMatcher<T>(
@@ -1635,12 +1662,13 @@
           typename ReturnTypesF>
 class TypeTraversePolymorphicMatcher {
 private:
-  typedef TypeTraversePolymorphicMatcher<InnerTBase, Getter, MatcherImpl,
-                                         ReturnTypesF> Self;
+  using Self = TypeTraversePolymorphicMatcher<InnerTBase, Getter, MatcherImpl,
+                                              ReturnTypesF>;
+
   static Self create(ArrayRef<const Matcher<InnerTBase> *> InnerMatchers);
 
 public:
-  typedef typename ExtractFunctionArgMeta<ReturnTypesF>::type ReturnTypes;
+  using ReturnTypes = typename ExtractFunctionArgMeta<ReturnTypesF>::type;
 
   explicit TypeTraversePolymorphicMatcher(
       ArrayRef<const Matcher<InnerTBase> *> InnerMatchers)
@@ -1667,6 +1695,7 @@
 template <typename Matcher, Matcher (*Func)()> class MemoizedMatcher {
   struct Wrapper {
     Wrapper() : M(Func()) {}
+
     Matcher M;
   };
 
@@ -1712,6 +1741,7 @@
   bool operator()(const internal::BoundNodesMap &Nodes) const {
     return Nodes.getNode(ID) != Node;
   }
+
   std::string ID;
   ast_type_traits::DynTypedNode Node;
 };
@@ -1767,9 +1797,10 @@
   return Node.getSubStmt();
 }
 
+} // namespace internal
 
-} // end namespace internal
-} // end namespace ast_matchers
-} // end namespace clang
+} // namespace ast_matchers
+
+} // namespace clang
 
 #endif // LLVM_CLANG_ASTMATCHERS_ASTMATCHERSINTERNAL_H
diff --git a/include/clang/ASTMatchers/ASTMatchersMacros.h b/include/clang/ASTMatchers/ASTMatchersMacros.h
index ddc4837..1933324 100644
--- a/include/clang/ASTMatchers/ASTMatchersMacros.h
+++ b/include/clang/ASTMatchers/ASTMatchersMacros.h
@@ -359,14 +359,30 @@
               ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) \
           const
 
-/// \brief Creates a variadic matcher for both a specific \c Type as well as
-/// the corresponding \c TypeLoc.
-#define AST_TYPE_MATCHER(NodeType, MatcherName)                                \
-  const ::clang::ast_matchers::internal::VariadicDynCastAllOfMatcher<          \
-      Type, NodeType> MatcherName
 // FIXME: add a matcher for TypeLoc derived classes using its custom casting
 // API (no longer dyn_cast) if/when we need such matching
 
+#define AST_TYPE_TRAVERSE_MATCHER_DECL(MatcherName, FunctionName,              \
+                                       ReturnTypesF)                           \
+  namespace internal {                                                         \
+  template <typename T> struct TypeMatcher##MatcherName##Getter {              \
+    static QualType (T::*value())() const { return &T::FunctionName; }         \
+  };                                                                           \
+  }                                                                            \
+  extern const ::clang::ast_matchers::internal::                               \
+      TypeTraversePolymorphicMatcher<                                          \
+          QualType,                                                            \
+          ::clang::ast_matchers::internal::TypeMatcher##MatcherName##Getter,   \
+          ::clang::ast_matchers::internal::TypeTraverseMatcher,                \
+          ReturnTypesF>::Func MatcherName
+
+#define AST_TYPE_TRAVERSE_MATCHER_DEF(MatcherName, ReturnTypesF)               \
+  const ::clang::ast_matchers::internal::TypeTraversePolymorphicMatcher<       \
+      QualType,                                                                \
+      ::clang::ast_matchers::internal::TypeMatcher##MatcherName##Getter,       \
+      ::clang::ast_matchers::internal::TypeTraverseMatcher,                    \
+      ReturnTypesF>::Func MatcherName
+
 /// \brief AST_TYPE_TRAVERSE_MATCHER(MatcherName, FunctionName) defines
 /// the matcher \c MatcherName that can be used to traverse from one \c Type
 /// to another.
@@ -386,6 +402,30 @@
       ::clang::ast_matchers::internal::TypeTraverseMatcher,                    \
       ReturnTypesF>::Func MatcherName
 
+#define AST_TYPELOC_TRAVERSE_MATCHER_DECL(MatcherName, FunctionName,           \
+                                          ReturnTypesF)                        \
+  namespace internal {                                                         \
+  template <typename T> struct TypeLocMatcher##MatcherName##Getter {           \
+    static TypeLoc (T::*value())() const { return &T::FunctionName##Loc; }     \
+  };                                                                           \
+  }                                                                            \
+  extern const ::clang::ast_matchers::internal::                               \
+      TypeTraversePolymorphicMatcher<                                          \
+          TypeLoc,                                                             \
+          ::clang::ast_matchers::internal::                                    \
+              TypeLocMatcher##MatcherName##Getter,                             \
+          ::clang::ast_matchers::internal::TypeLocTraverseMatcher,             \
+          ReturnTypesF>::Func MatcherName##Loc;                                \
+  AST_TYPE_TRAVERSE_MATCHER_DECL(MatcherName, FunctionName##Type, ReturnTypesF)
+
+#define AST_TYPELOC_TRAVERSE_MATCHER_DEF(MatcherName, ReturnTypesF)            \
+  const ::clang::ast_matchers::internal::TypeTraversePolymorphicMatcher<       \
+      TypeLoc,                                                                 \
+      ::clang::ast_matchers::internal::TypeLocMatcher##MatcherName##Getter,    \
+      ::clang::ast_matchers::internal::TypeLocTraverseMatcher,                 \
+      ReturnTypesF>::Func MatcherName##Loc;                                    \
+  AST_TYPE_TRAVERSE_MATCHER_DEF(MatcherName, ReturnTypesF)
+
 /// \brief AST_TYPELOC_TRAVERSE_MATCHER(MatcherName, FunctionName) works
 /// identical to \c AST_TYPE_TRAVERSE_MATCHER but operates on \c TypeLocs.
 #define AST_TYPELOC_TRAVERSE_MATCHER(MatcherName, FunctionName, ReturnTypesF)  \
diff --git a/include/clang/ASTMatchers/Dynamic/Parser.h b/include/clang/ASTMatchers/Dynamic/Parser.h
index 5ec4a9a..e8fcf0a 100644
--- a/include/clang/ASTMatchers/Dynamic/Parser.h
+++ b/include/clang/ASTMatchers/Dynamic/Parser.h
@@ -1,4 +1,4 @@
-//===--- Parser.h - Matcher expression parser -----*- C++ -*-===//
+//===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,7 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Simple matcher expression parser.
 ///
@@ -30,24 +30,28 @@
 /// <Identifier>        := [a-zA-Z]+
 /// <ArgumentList>      := <Expression> | <Expression>,<ArgumentList>
 /// \endcode
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
 
-#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
+#include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "clang/ASTMatchers/Dynamic/Registry.h"
 #include "clang/ASTMatchers/Dynamic/VariantValue.h"
-#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace ast_matchers {
 namespace dynamic {
 
+class Diagnostics;
+
 /// \brief Matcher expression parser.
 class Parser {
 public:
@@ -124,8 +128,8 @@
   /// \brief Sema implementation that uses the matcher registry to process the
   ///   tokens.
   class RegistrySema : public Parser::Sema {
-   public:
-     ~RegistrySema() override;
+  public:
+    ~RegistrySema() override;
 
     llvm::Optional<MatcherCtor>
     lookupMatcherCtor(StringRef MatcherName) override;
@@ -143,7 +147,7 @@
     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override;
   };
 
-  typedef llvm::StringMap<VariantValue> NamedValueMap;
+  using NamedValueMap = llvm::StringMap<VariantValue>;
 
   /// \brief Parse a matcher expression.
   ///
@@ -247,13 +251,14 @@
   const NamedValueMap *const NamedValues;
   Diagnostics *const Error;
 
-  typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy;
+  using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>;
+
   ContextStackTy ContextStack;
   std::vector<MatcherCompletion> Completions;
 };
 
-}  // namespace dynamic
-}  // namespace ast_matchers
-}  // namespace clang
+} // namespace dynamic
+} // namespace ast_matchers
+} // namespace clang
 
-#endif  // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H
+#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H
diff --git a/include/clang/ASTMatchers/Dynamic/Registry.h b/include/clang/ASTMatchers/Dynamic/Registry.h
index 7bba95d..2774912 100644
--- a/include/clang/ASTMatchers/Dynamic/Registry.h
+++ b/include/clang/ASTMatchers/Dynamic/Registry.h
@@ -1,4 +1,4 @@
-//===--- Registry.h - Matcher registry --------------------------*- C++ -*-===//
+//===- Registry.h - Matcher registry ----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,12 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Registry of all known matchers.
 ///
 /// The registry provides a generic interface to construct any matcher by name.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_REGISTRY_H
@@ -34,9 +34,9 @@
 
 class MatcherDescriptor;
 
-} // end namespace internal
+} // namespace internal
 
-typedef const internal::MatcherDescriptor *MatcherCtor;
+using MatcherCtor = const internal::MatcherDescriptor *;
 
 struct MatcherCompletion {
   MatcherCompletion() = default;
@@ -129,8 +129,8 @@
                                               Diagnostics *Error);
 };
 
-} // end namespace dynamic
-} // end namespace ast_matchers
-} // end namespace clang
+} // namespace dynamic
+} // namespace ast_matchers
+} // namespace clang
 
 #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_REGISTRY_H
diff --git a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
index 1735162..0a58d2a 100644
--- a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
+++ b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
@@ -92,6 +92,7 @@
   BOP_Neq,          //  !=
   BOP_Lt,           //  <
   BOP_Leq,          //  <=
+  BOP_Cmp,          //  <=>
   BOP_LogicAnd,     //  &&  (no short-circuit)
   BOP_LogicOr       //  ||  (no short-circuit)
 };
diff --git a/include/clang/Analysis/AnalysisDeclContext.h b/include/clang/Analysis/AnalysisDeclContext.h
index ebec0d2..63d4676 100644
--- a/include/clang/Analysis/AnalysisDeclContext.h
+++ b/include/clang/Analysis/AnalysisDeclContext.h
@@ -16,6 +16,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSISDECLCONTEXT_H
 
 #include "clang/AST/Decl.h"
+#include "clang/Analysis/BodyFarm.h"
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/CodeInjector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -264,7 +265,11 @@
 
   virtual void Profile(llvm::FoldingSetNodeID &ID) = 0;
 
-  void dumpStack(raw_ostream &OS, StringRef Indent = "") const;
+  void dumpStack(
+      raw_ostream &OS, StringRef Indent = "", const char *NL = "\n",
+      const char *Sep = "",
+      std::function<void(const LocationContext *)> printMoreInfoPerContext =
+          [](const LocationContext *) {}) const;
   void dumpStack() const;
 
 public:
@@ -416,24 +421,26 @@
   /// Pointer to an interface that can provide function bodies for
   /// declarations from external source.
   std::unique_ptr<CodeInjector> Injector;
-  
+
+  /// A factory for creating and caching implementations for common
+  /// methods during the analysis.
+  BodyFarm FunctionBodyFarm;
+
   /// Flag to indicate whether or not bodies should be synthesized
   /// for well-known functions.
   bool SynthesizeBodies;
 
 public:
-  AnalysisDeclContextManager(bool useUnoptimizedCFG = false,
+  AnalysisDeclContextManager(ASTContext &ASTCtx, bool useUnoptimizedCFG = false,
                              bool addImplicitDtors = false,
                              bool addInitializers = false,
                              bool addTemporaryDtors = false,
-                             bool addLifetime = false,
-                             bool addLoopExit = false,
+                             bool addLifetime = false, bool addLoopExit = false,
                              bool synthesizeBodies = false,
                              bool addStaticInitBranches = false,
                              bool addCXXNewAllocator = true,
-                             CodeInjector* injector = nullptr);
-
-  ~AnalysisDeclContextManager();
+                             bool addRichCXXConstructors = true,
+                             CodeInjector *injector = nullptr);
 
   AnalysisDeclContext *getContext(const Decl *D);
 
@@ -472,6 +479,9 @@
     return LocContexts.getStackFrame(getContext(D), Parent, S, Blk, Idx);
   }
 
+  /// Get a reference to {@code BodyFarm} instance.
+  BodyFarm &getBodyFarm();
+
   /// Discard all previously created AnalysisDeclContexts.
   void clear();
 
diff --git a/include/clang/Analysis/BodyFarm.h b/include/clang/Analysis/BodyFarm.h
new file mode 100644
index 0000000..ff0859b
--- /dev/null
+++ b/include/clang/Analysis/BodyFarm.h
@@ -0,0 +1,54 @@
+//== BodyFarm.h - Factory for conjuring up fake bodies -------------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BodyFarm is a factory for creating faux implementations for functions/methods
+// for analysis purposes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
+#define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
+
+#include "clang/AST/DeclBase.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+
+namespace clang {
+
+class ASTContext;
+class FunctionDecl;
+class ObjCMethodDecl;
+class ObjCPropertyDecl;
+class Stmt;
+class CodeInjector;
+
+class BodyFarm {
+public:
+  BodyFarm(ASTContext &C, CodeInjector *injector) : C(C), Injector(injector) {}
+
+  /// Factory method for creating bodies for ordinary functions.
+  Stmt *getBody(const FunctionDecl *D);
+
+  /// Factory method for creating bodies for Objective-C properties.
+  Stmt *getBody(const ObjCMethodDecl *D);
+
+  /// Remove copy constructor to avoid accidental copying.
+  BodyFarm(const BodyFarm &other) = delete;
+
+private:
+  typedef llvm::DenseMap<const Decl *, Optional<Stmt *>> BodyMap;
+
+  ASTContext &C;
+  BodyMap Bodies;
+  CodeInjector *Injector;
+};
+} // namespace clang
+
+#endif
diff --git a/include/clang/Analysis/CFG.h b/include/clang/Analysis/CFG.h
index 8cdd782..1465163 100644
--- a/include/clang/Analysis/CFG.h
+++ b/include/clang/Analysis/CFG.h
@@ -1,4 +1,4 @@
-//===--- CFG.h - Classes for representing and building CFGs------*- C++ -*-===//
+//===- CFG.h - Classes for representing and building CFGs -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,51 +15,56 @@
 #ifndef LLVM_CLANG_ANALYSIS_CFG_H
 #define LLVM_CLANG_ANALYSIS_CFG_H
 
-#include "clang/AST/Stmt.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/Analysis/Support/BumpVector.h"
-#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
 #include <bitset>
 #include <cassert>
+#include <cstddef>
 #include <iterator>
 #include <memory>
+#include <vector>
 
 namespace clang {
-  class CXXDestructorDecl;
-  class Decl;
-  class Stmt;
-  class Expr;
-  class FieldDecl;
-  class VarDecl;
-  class CXXCtorInitializer;
-  class CXXBaseSpecifier;
-  class CXXBindTemporaryExpr;
-  class CFG;
-  class PrinterHelper;
-  class LangOptions;
-  class ASTContext;
-  class CXXRecordDecl;
-  class CXXDeleteExpr;
-  class CXXNewExpr;
-  class BinaryOperator;
+
+class ASTContext;
+class BinaryOperator;
+class CFG;
+class ConstructionContext;
+class CXXBaseSpecifier;
+class CXXBindTemporaryExpr;
+class CXXCtorInitializer;
+class CXXDeleteExpr;
+class CXXDestructorDecl;
+class CXXNewExpr;
+class CXXRecordDecl;
+class Decl;
+class FieldDecl;
+class LangOptions;
+class VarDecl;
 
 /// CFGElement - Represents a top-level expression in a basic block.
 class CFGElement {
 public:
   enum Kind {
     // main kind
-    Statement,
     Initializer,
     NewAllocator,
     LifetimeEnds,
     LoopExit,
+    // stmt kind
+    Statement,
+    Constructor,
+    STMT_BEGIN = Statement,
+    STMT_END = Constructor,
     // dtor kind
     AutomaticObjectDtor,
     DeleteDtor,
@@ -76,14 +81,14 @@
   llvm::PointerIntPair<void *, 2> Data2;
 
   CFGElement(Kind kind, const void *Ptr1, const void *Ptr2 = nullptr)
-    : Data1(const_cast<void*>(Ptr1), ((unsigned) kind) & 0x3),
-      Data2(const_cast<void*>(Ptr2), (((unsigned) kind) >> 2) & 0x3) {
+      : Data1(const_cast<void*>(Ptr1), ((unsigned) kind) & 0x3),
+        Data2(const_cast<void*>(Ptr2), (((unsigned) kind) >> 2) & 0x3) {
     assert(getKind() == kind);
   }
 
-  CFGElement() {}
-public:
+  CFGElement() = default;
 
+public:
   /// \brief Convert to the specified CFGElement type, asserting that this
   /// CFGElement is of the desired type.
   template<typename T>
@@ -117,7 +122,9 @@
 
 class CFGStmt : public CFGElement {
 public:
-  CFGStmt(Stmt *S) : CFGElement(Statement, S) {}
+  explicit CFGStmt(Stmt *S, Kind K = Statement) : CFGElement(K, S) {
+    assert(isKind(*this));
+  }
 
   const Stmt *getStmt() const {
     return static_cast<const Stmt *>(Data1.getPointer());
@@ -125,9 +132,41 @@
 
 private:
   friend class CFGElement;
-  CFGStmt() {}
+
   static bool isKind(const CFGElement &E) {
-    return E.getKind() == Statement;
+    return E.getKind() >= STMT_BEGIN && E.getKind() <= STMT_END;
+  }
+
+protected:
+  CFGStmt() = default;
+};
+
+/// CFGConstructor - Represents C++ constructor call. Maintains information
+/// necessary to figure out what memory is being initialized by the
+/// constructor expression. For now this is only used by the analyzer's CFG.
+class CFGConstructor : public CFGStmt {
+public:
+  explicit CFGConstructor(CXXConstructExpr *CE, const ConstructionContext *C)
+      : CFGStmt(CE, Constructor) {
+    assert(C);
+    Data2.setPointer(const_cast<ConstructionContext *>(C));
+  }
+
+  const ConstructionContext *getConstructionContext() const {
+    return static_cast<ConstructionContext *>(Data2.getPointer());
+  }
+
+  QualType getType() const {
+    return cast<CXXConstructExpr>(getStmt())->getType();
+  }
+
+private:
+  friend class CFGElement;
+
+  CFGConstructor() = default;
+
+  static bool isKind(const CFGElement &E) {
+    return E.getKind() == Constructor;
   }
 };
 
@@ -135,7 +174,7 @@
 /// constructor's initialization list.
 class CFGInitializer : public CFGElement {
 public:
-  CFGInitializer(CXXCtorInitializer *initializer)
+  explicit CFGInitializer(CXXCtorInitializer *initializer)
       : CFGElement(Initializer, initializer) {}
 
   CXXCtorInitializer* getInitializer() const {
@@ -144,7 +183,9 @@
 
 private:
   friend class CFGElement;
-  CFGInitializer() {}
+
+  CFGInitializer() = default;
+
   static bool isKind(const CFGElement &E) {
     return E.getKind() == Initializer;
   }
@@ -163,7 +204,9 @@
 
 private:
   friend class CFGElement;
-  CFGNewAllocator() {}
+
+  CFGNewAllocator() = default;
+
   static bool isKind(const CFGElement &elem) {
     return elem.getKind() == NewAllocator;
   }
@@ -177,19 +220,20 @@
 /// entered.
 class CFGLoopExit : public CFGElement {
 public:
-    explicit CFGLoopExit(const Stmt *stmt)
-            : CFGElement(LoopExit, stmt) {}
+  explicit CFGLoopExit(const Stmt *stmt) : CFGElement(LoopExit, stmt) {}
 
-    const Stmt *getLoopStmt() const {
-      return static_cast<Stmt *>(Data1.getPointer());
-    }
+  const Stmt *getLoopStmt() const {
+    return static_cast<Stmt *>(Data1.getPointer());
+  }
 
 private:
-    friend class CFGElement;
-    CFGLoopExit() {}
-    static bool isKind(const CFGElement &elem) {
-      return elem.getKind() == LoopExit;
-    }
+  friend class CFGElement;
+
+  CFGLoopExit() = default;
+
+  static bool isKind(const CFGElement &elem) {
+    return elem.getKind() == LoopExit;
+  }
 };
 
 /// Represents the point where the lifetime of an automatic object ends
@@ -208,7 +252,9 @@
 
 private:
   friend class CFGElement;
-  CFGLifetimeEnds() {}
+
+  CFGLifetimeEnds() = default;
+
   static bool isKind(const CFGElement &elem) {
     return elem.getKind() == LifetimeEnds;
   }
@@ -218,7 +264,8 @@
 /// by compiler on various occasions.
 class CFGImplicitDtor : public CFGElement {
 protected:
-  CFGImplicitDtor() {}
+  CFGImplicitDtor() = default;
+
   CFGImplicitDtor(Kind kind, const void *data1, const void *data2 = nullptr)
     : CFGElement(kind, data1, data2) {
     assert(kind >= DTOR_BEGIN && kind <= DTOR_END);
@@ -230,6 +277,7 @@
 
 private:
   friend class CFGElement;
+
   static bool isKind(const CFGElement &E) {
     Kind kind = E.getKind();
     return kind >= DTOR_BEGIN && kind <= DTOR_END;
@@ -255,7 +303,9 @@
 
 private:
   friend class CFGElement;
-  CFGAutomaticObjDtor() {}
+
+  CFGAutomaticObjDtor() = default;
+
   static bool isKind(const CFGElement &elem) {
     return elem.getKind() == AutomaticObjectDtor;
   }
@@ -279,7 +329,9 @@
 
 private:
   friend class CFGElement;
-  CFGDeleteDtor() {}
+
+  CFGDeleteDtor() = default;
+
   static bool isKind(const CFGElement &elem) {
     return elem.getKind() == DeleteDtor;
   }
@@ -298,7 +350,9 @@
 
 private:
   friend class CFGElement;
-  CFGBaseDtor() {}
+
+  CFGBaseDtor() = default;
+
   static bool isKind(const CFGElement &E) {
     return E.getKind() == BaseDtor;
   }
@@ -317,7 +371,9 @@
 
 private:
   friend class CFGElement;
-  CFGMemberDtor() {}
+
+  CFGMemberDtor() = default;
+
   static bool isKind(const CFGElement &E) {
     return E.getKind() == MemberDtor;
   }
@@ -336,7 +392,9 @@
 
 private:
   friend class CFGElement;
-  CFGTemporaryDtor() {}
+
+  CFGTemporaryDtor() = default;
+
   static bool isKind(const CFGElement &E) {
     return E.getKind() == TemporaryDtor;
   }
@@ -350,8 +408,9 @@
 /// of matching full expression.
 class CFGTerminator {
   llvm::PointerIntPair<Stmt *, 1> Data;
+
 public:
-  CFGTerminator() {}
+  CFGTerminator() = default;
   CFGTerminator(Stmt *S, bool TemporaryDtorsBranch = false)
       : Data(S, TemporaryDtorsBranch) {}
 
@@ -397,21 +456,23 @@
 ///     &&, ||          expression that uses result of && or ||, RHS
 ///
 /// But note that any of that may be NULL in case of optimized-out edges.
-///
 class CFGBlock {
   class ElementList {
-    typedef BumpVector<CFGElement> ImplTy;
+    using ImplTy = BumpVector<CFGElement>;
+
     ImplTy Impl;
+
   public:
     ElementList(BumpVectorContext &C) : Impl(C, 4) {}
 
-    typedef std::reverse_iterator<ImplTy::iterator>       iterator;
-    typedef std::reverse_iterator<ImplTy::const_iterator> const_iterator;
-    typedef ImplTy::iterator                              reverse_iterator;
-    typedef ImplTy::const_iterator                       const_reverse_iterator;
-    typedef ImplTy::const_reference                       const_reference;
+    using iterator = std::reverse_iterator<ImplTy::iterator>;
+    using const_iterator = std::reverse_iterator<ImplTy::const_iterator>;
+    using reverse_iterator = ImplTy::iterator;
+    using const_reverse_iterator = ImplTy::const_iterator;
+    using const_reference = ImplTy::const_reference;
 
     void push_back(CFGElement e, BumpVectorContext &C) { Impl.push_back(e, C); }
+
     reverse_iterator insert(reverse_iterator I, size_t Cnt, CFGElement E,
         BumpVectorContext &C) {
       return Impl.insert(I, Cnt, E, C);
@@ -429,10 +490,10 @@
     const_reverse_iterator rbegin() const { return Impl.begin(); }
     const_reverse_iterator rend() const { return Impl.end(); }
 
-   CFGElement operator[](size_t i) const  {
-     assert(i < Impl.size());
-     return Impl[Impl.size() - 1 - i];
-   }
+    CFGElement operator[](size_t i) const  {
+      assert(i < Impl.size());
+      return Impl[Impl.size() - 1 - i];
+    }
 
     size_t size() const { return Impl.size(); }
     bool empty() const { return Impl.empty(); }
@@ -444,7 +505,7 @@
   /// Label - An (optional) label that prefixes the executable
   ///  statements in the block.  When this variable is non-NULL, it is
   ///  either an instance of LabelStmt, SwitchCase or CXXCatchStmt.
-  Stmt *Label;
+  Stmt *Label = nullptr;
 
   /// Terminator - The terminator for a basic block that
   ///  indicates the type of control-flow that occurs between a block
@@ -454,7 +515,7 @@
   /// LoopTarget - Some blocks are used to represent the "loop edge" to
   ///  the start of a loop from within the loop body.  This Stmt* will be
   ///  refer to the loop statement for such blocks (and be null otherwise).
-  const Stmt *LoopTarget;
+  const Stmt *LoopTarget = nullptr;
 
   /// BlockID - A numerical ID assigned to a CFGBlock during construction
   ///   of the CFG.
@@ -474,7 +535,7 @@
     };
 
     CFGBlock *ReachableBlock;
-    llvm::PointerIntPair<CFGBlock*, 2> UnreachableBlock;
+    llvm::PointerIntPair<CFGBlock *, 2> UnreachableBlock;
 
   public:
     /// Construct an AdjacentBlock with a possibly unreachable block.
@@ -517,7 +578,7 @@
 private:
   /// Predecessors/Successors - Keep track of the predecessor / successor
   /// CFG blocks.
-  typedef BumpVector<AdjacentBlock> AdjacentBlocks;
+  using AdjacentBlocks = BumpVector<AdjacentBlock>;
   AdjacentBlocks Preds;
   AdjacentBlocks Succs;
 
@@ -537,15 +598,14 @@
 
 public:
   explicit CFGBlock(unsigned blockid, BumpVectorContext &C, CFG *parent)
-    : Elements(C), Label(nullptr), Terminator(nullptr), LoopTarget(nullptr), 
-      BlockID(blockid), Preds(C, 1), Succs(C, 1), HasNoReturnElement(false),
-      Parent(parent) {}
+      : Elements(C), Terminator(nullptr), BlockID(blockid), Preds(C, 1),
+        Succs(C, 1), HasNoReturnElement(false), Parent(parent) {}
 
   // Statement iterators
-  typedef ElementList::iterator                      iterator;
-  typedef ElementList::const_iterator                const_iterator;
-  typedef ElementList::reverse_iterator              reverse_iterator;
-  typedef ElementList::const_reverse_iterator        const_reverse_iterator;
+  using iterator = ElementList::iterator;
+  using const_iterator = ElementList::const_iterator;
+  using reverse_iterator = ElementList::reverse_iterator;
+  using const_reverse_iterator = ElementList::const_reverse_iterator;
 
   CFGElement                 front()       const { return Elements.front();   }
   CFGElement                 back()        const { return Elements.back();    }
@@ -566,19 +626,19 @@
   CFGElement operator[](size_t i) const  { return Elements[i]; }
 
   // CFG iterators
-  typedef AdjacentBlocks::iterator                              pred_iterator;
-  typedef AdjacentBlocks::const_iterator                  const_pred_iterator;
-  typedef AdjacentBlocks::reverse_iterator              pred_reverse_iterator;
-  typedef AdjacentBlocks::const_reverse_iterator  const_pred_reverse_iterator;
-  typedef llvm::iterator_range<pred_iterator>                      pred_range;
-  typedef llvm::iterator_range<const_pred_iterator>          pred_const_range;
+  using pred_iterator = AdjacentBlocks::iterator;
+  using const_pred_iterator = AdjacentBlocks::const_iterator;
+  using pred_reverse_iterator = AdjacentBlocks::reverse_iterator;
+  using const_pred_reverse_iterator = AdjacentBlocks::const_reverse_iterator;
+  using pred_range = llvm::iterator_range<pred_iterator>;
+  using pred_const_range = llvm::iterator_range<const_pred_iterator>;
 
-  typedef AdjacentBlocks::iterator                              succ_iterator;
-  typedef AdjacentBlocks::const_iterator                  const_succ_iterator;
-  typedef AdjacentBlocks::reverse_iterator              succ_reverse_iterator;
-  typedef AdjacentBlocks::const_reverse_iterator  const_succ_reverse_iterator;
-  typedef llvm::iterator_range<succ_iterator>                      succ_range;
-  typedef llvm::iterator_range<const_succ_iterator>          succ_const_range;
+  using succ_iterator = AdjacentBlocks::iterator;
+  using const_succ_iterator = AdjacentBlocks::const_iterator;
+  using succ_reverse_iterator = AdjacentBlocks::reverse_iterator;
+  using const_succ_reverse_iterator = AdjacentBlocks::const_reverse_iterator;
+  using succ_range = llvm::iterator_range<succ_iterator>;
+  using succ_const_range = llvm::iterator_range<const_succ_iterator>;
 
   pred_iterator                pred_begin()        { return Preds.begin();   }
   pred_iterator                pred_end()          { return Preds.end();     }
@@ -590,10 +650,11 @@
   const_pred_reverse_iterator  pred_rbegin() const { return Preds.rbegin();  }
   const_pred_reverse_iterator  pred_rend()   const { return Preds.rend();    }
 
-  pred_range                   preds() {
+  pred_range preds() {
     return pred_range(pred_begin(), pred_end());
   }
-  pred_const_range             preds() const {
+
+  pred_const_range preds() const {
     return pred_const_range(pred_begin(), pred_end());
   }
 
@@ -607,10 +668,11 @@
   const_succ_reverse_iterator  succ_rbegin() const { return Succs.rbegin();  }
   const_succ_reverse_iterator  succ_rend()   const { return Succs.rend();    }
 
-  succ_range                   succs() {
+  succ_range succs() {
     return succ_range(succ_begin(), succ_end());
   }
-  succ_const_range             succs() const {
+
+  succ_const_range succs() const {
     return succ_const_range(succ_begin(), succ_end());
   }
 
@@ -623,13 +685,11 @@
 
   class FilterOptions {
   public:
-    FilterOptions() {
-      IgnoreNullPredecessors = 1;
-      IgnoreDefaultsWithCoveredEnums = 0;
-    }
-
     unsigned IgnoreNullPredecessors : 1;
     unsigned IgnoreDefaultsWithCoveredEnums : 1;
+
+    FilterOptions()
+        : IgnoreNullPredecessors(1), IgnoreDefaultsWithCoveredEnums(0) {}
   };
 
   static bool FilterEdge(const FilterOptions &F, const CFGBlock *Src,
@@ -641,6 +701,7 @@
     IMPL I, E;
     const FilterOptions F;
     const CFGBlock *From;
+
   public:
     explicit FilteredCFGBlockIterator(const IMPL &i, const IMPL &e,
                                       const CFGBlock *from,
@@ -658,17 +719,18 @@
     }
 
     const CFGBlock *operator*() const { return *I; }
+
   private:
     bool Filter(const CFGBlock *To) {
       return IsPred ? FilterEdge(F, To, From) : FilterEdge(F, From, To);
     }
   };
 
-  typedef FilteredCFGBlockIterator<const_pred_iterator, true>
-          filtered_pred_iterator;
+  using filtered_pred_iterator =
+      FilteredCFGBlockIterator<const_pred_iterator, true>;
 
-  typedef FilteredCFGBlockIterator<const_succ_iterator, false>
-          filtered_succ_iterator;
+  using filtered_succ_iterator =
+      FilteredCFGBlockIterator<const_succ_iterator, false>;
 
   filtered_pred_iterator filtered_pred_start_end(const FilterOptions &f) const {
     return filtered_pred_iterator(pred_begin(), pred_end(), this, f);
@@ -722,6 +784,11 @@
     Elements.push_back(CFGStmt(statement), C);
   }
 
+  void appendConstructor(CXXConstructExpr *CE, const ConstructionContext *CC,
+                         BumpVectorContext &C) {
+    Elements.push_back(CFGConstructor(CE, CC), C);
+  }
+
   void appendInitializer(CXXCtorInitializer *initializer,
                         BumpVectorContext &C) {
     Elements.push_back(CFGInitializer(initializer), C);
@@ -791,11 +858,12 @@
 /// operator error is found when building the CFG.
 class CFGCallback {
 public:
-  CFGCallback() {}
+  CFGCallback() = default;
+  virtual ~CFGCallback() = default;
+
   virtual void compareAlwaysTrue(const BinaryOperator *B, bool isAlwaysTrue) {}
   virtual void compareBitwiseEquality(const BinaryOperator *B,
                                       bool isAlwaysTrue) {}
-  virtual ~CFGCallback() {}
 };
 
 /// CFG - Represents a source-level, intra-procedural CFG that represents the
@@ -813,20 +881,25 @@
 
   class BuildOptions {
     std::bitset<Stmt::lastStmtConstant> alwaysAddMask;
+
   public:
-    typedef llvm::DenseMap<const Stmt *, const CFGBlock*> ForcedBlkExprs;
-    ForcedBlkExprs **forcedBlkExprs;
-    CFGCallback *Observer;
-    bool PruneTriviallyFalseEdges;
-    bool AddEHEdges;
-    bool AddInitializers;
-    bool AddImplicitDtors;
-    bool AddLifetime;
-    bool AddLoopExit;
-    bool AddTemporaryDtors;
-    bool AddStaticInitBranches;
-    bool AddCXXNewAllocator;
-    bool AddCXXDefaultInitExprInCtors;
+    using ForcedBlkExprs = llvm::DenseMap<const Stmt *, const CFGBlock *>;
+
+    ForcedBlkExprs **forcedBlkExprs = nullptr;
+    CFGCallback *Observer = nullptr;
+    bool PruneTriviallyFalseEdges = true;
+    bool AddEHEdges = false;
+    bool AddInitializers = false;
+    bool AddImplicitDtors = false;
+    bool AddLifetime = false;
+    bool AddLoopExit = false;
+    bool AddTemporaryDtors = false;
+    bool AddStaticInitBranches = false;
+    bool AddCXXNewAllocator = false;
+    bool AddCXXDefaultInitExprInCtors = false;
+    bool AddRichCXXConstructors = false;
+
+    BuildOptions() = default;
 
     bool alwaysAdd(const Stmt *stmt) const {
       return alwaysAddMask[stmt->getStmtClass()];
@@ -841,15 +914,6 @@
       alwaysAddMask.set();
       return *this;
     }
-
-    BuildOptions()
-      : forcedBlkExprs(nullptr), Observer(nullptr),
-        PruneTriviallyFalseEdges(true),
-        AddEHEdges(false),
-        AddInitializers(false), AddImplicitDtors(false),
-        AddLifetime(false), AddLoopExit(false),
-        AddTemporaryDtors(false), AddStaticInitBranches(false),
-        AddCXXNewAllocator(false), AddCXXDefaultInitExprInCtors(false) {}
   };
 
   /// buildCFG - Builds a CFG from an AST.
@@ -873,11 +937,11 @@
   // Block Iterators
   //===--------------------------------------------------------------------===//
 
-  typedef BumpVector<CFGBlock*>                    CFGBlockListTy;
-  typedef CFGBlockListTy::iterator                 iterator;
-  typedef CFGBlockListTy::const_iterator           const_iterator;
-  typedef std::reverse_iterator<iterator>          reverse_iterator;
-  typedef std::reverse_iterator<const_iterator>    const_reverse_iterator;
+  using CFGBlockListTy = BumpVector<CFGBlock *>;
+  using iterator = CFGBlockListTy::iterator;
+  using const_iterator = CFGBlockListTy::const_iterator;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
 
   CFGBlock &                front()                { return *Blocks.front(); }
   CFGBlock &                back()                 { return *Blocks.back(); }
@@ -905,10 +969,12 @@
   CFGBlock *       getIndirectGotoBlock() { return IndirectGotoBlock; }
   const CFGBlock * getIndirectGotoBlock() const { return IndirectGotoBlock; }
 
-  typedef std::vector<const CFGBlock*>::const_iterator try_block_iterator;
+  using try_block_iterator = std::vector<const CFGBlock *>::const_iterator;
+
   try_block_iterator try_blocks_begin() const {
     return TryDispatchBlocks.begin();
   }
+
   try_block_iterator try_blocks_end() const {
     return TryDispatchBlocks.end();
   }
@@ -929,9 +995,9 @@
     SyntheticDeclStmts[Synthetic] = Source;
   }
 
-  typedef llvm::DenseMap<const DeclStmt *, const DeclStmt *>::const_iterator
-    synthetic_stmt_iterator;
-  typedef llvm::iterator_range<synthetic_stmt_iterator> synthetic_stmt_range;
+  using synthetic_stmt_iterator =
+      llvm::DenseMap<const DeclStmt *, const DeclStmt *>::const_iterator;
+  using synthetic_stmt_range = llvm::iterator_range<synthetic_stmt_iterator>;
 
   /// Iterates over synthetic DeclStmts in the CFG.
   ///
@@ -991,9 +1057,7 @@
   // Internal: constructors and data.
   //===--------------------------------------------------------------------===//
 
-  CFG()
-    : Entry(nullptr), Exit(nullptr), IndirectGotoBlock(nullptr), NumBlockIDs(0),
-      Blocks(BlkBVC, 10) {}
+  CFG() : Blocks(BlkBVC, 10) {}
 
   llvm::BumpPtrAllocator& getAllocator() {
     return BlkBVC.getAllocator();
@@ -1004,11 +1068,13 @@
   }
 
 private:
-  CFGBlock *Entry;
-  CFGBlock *Exit;
-  CFGBlock* IndirectGotoBlock;  // Special block to contain collective dispatch
-                                // for indirect gotos
-  unsigned  NumBlockIDs;
+  CFGBlock *Entry = nullptr;
+  CFGBlock *Exit = nullptr;
+
+  // Special block to contain collective dispatch for indirect gotos
+  CFGBlock* IndirectGotoBlock = nullptr;
+
+  unsigned  NumBlockIDs = 0;
 
   BumpVectorContext BlkBVC;
 
@@ -1022,7 +1088,8 @@
   /// source DeclStmt.
   llvm::DenseMap<const DeclStmt *, const DeclStmt *> SyntheticDeclStmts;
 };
-} // end namespace clang
+
+} // namespace clang
 
 //===----------------------------------------------------------------------===//
 // GraphTraits specializations for CFG basic block graphs (source-level CFGs)
@@ -1033,7 +1100,8 @@
 /// Implement simplify_type for CFGTerminator, so that we can dyn_cast from
 /// CFGTerminator to a specific Stmt class.
 template <> struct simplify_type< ::clang::CFGTerminator> {
-  typedef ::clang::Stmt *SimpleType;
+  using SimpleType = ::clang::Stmt *;
+
   static SimpleType getSimplifiedValue(::clang::CFGTerminator Val) {
     return Val.getStmt();
   }
@@ -1042,50 +1110,44 @@
 // Traits for: CFGBlock
 
 template <> struct GraphTraits< ::clang::CFGBlock *> {
-  typedef ::clang::CFGBlock *NodeRef;
-  typedef ::clang::CFGBlock::succ_iterator ChildIteratorType;
+  using NodeRef = ::clang::CFGBlock *;
+  using ChildIteratorType = ::clang::CFGBlock::succ_iterator;
 
   static NodeRef getEntryNode(::clang::CFGBlock *BB) { return BB; }
-
   static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
-
   static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 
 template <> struct GraphTraits< const ::clang::CFGBlock *> {
-  typedef const ::clang::CFGBlock *NodeRef;
-  typedef ::clang::CFGBlock::const_succ_iterator ChildIteratorType;
+  using NodeRef = const ::clang::CFGBlock *;
+  using ChildIteratorType = ::clang::CFGBlock::const_succ_iterator;
 
   static NodeRef getEntryNode(const clang::CFGBlock *BB) { return BB; }
-
   static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
-
   static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 
-template <> struct GraphTraits<Inverse< ::clang::CFGBlock*> > {
-  typedef ::clang::CFGBlock *NodeRef;
-  typedef ::clang::CFGBlock::const_pred_iterator ChildIteratorType;
+template <> struct GraphTraits<Inverse< ::clang::CFGBlock *>> {
+  using NodeRef = ::clang::CFGBlock *;
+  using ChildIteratorType = ::clang::CFGBlock::const_pred_iterator;
 
   static NodeRef getEntryNode(Inverse<::clang::CFGBlock *> G) {
     return G.Graph;
   }
 
   static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); }
-
   static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
 };
 
-template <> struct GraphTraits<Inverse<const ::clang::CFGBlock*> > {
-  typedef const ::clang::CFGBlock *NodeRef;
-  typedef ::clang::CFGBlock::const_pred_iterator ChildIteratorType;
+template <> struct GraphTraits<Inverse<const ::clang::CFGBlock *>> {
+  using NodeRef = const ::clang::CFGBlock *;
+  using ChildIteratorType = ::clang::CFGBlock::const_pred_iterator;
 
   static NodeRef getEntryNode(Inverse<const ::clang::CFGBlock *> G) {
     return G.Graph;
   }
 
   static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); }
-
   static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
 };
 
@@ -1093,8 +1155,7 @@
 
 template <> struct GraphTraits< ::clang::CFG* >
     : public GraphTraits< ::clang::CFGBlock *>  {
-
-  typedef ::clang::CFG::iterator nodes_iterator;
+  using nodes_iterator = ::clang::CFG::iterator;
 
   static NodeRef getEntryNode(::clang::CFG *F) { return &F->getEntry(); }
   static nodes_iterator nodes_begin(::clang::CFG* F) { return F->nodes_begin();}
@@ -1104,44 +1165,47 @@
 
 template <> struct GraphTraits<const ::clang::CFG* >
     : public GraphTraits<const ::clang::CFGBlock *>  {
-
-  typedef ::clang::CFG::const_iterator nodes_iterator;
+  using nodes_iterator = ::clang::CFG::const_iterator;
 
   static NodeRef getEntryNode(const ::clang::CFG *F) { return &F->getEntry(); }
+
   static nodes_iterator nodes_begin( const ::clang::CFG* F) {
     return F->nodes_begin();
   }
+
   static nodes_iterator nodes_end( const ::clang::CFG* F) {
     return F->nodes_end();
   }
+
   static unsigned size(const ::clang::CFG* F) {
     return F->size();
   }
 };
 
-template <> struct GraphTraits<Inverse< ::clang::CFG*> >
-  : public GraphTraits<Inverse< ::clang::CFGBlock*> > {
-
-  typedef ::clang::CFG::iterator nodes_iterator;
+template <> struct GraphTraits<Inverse< ::clang::CFG *>>
+  : public GraphTraits<Inverse< ::clang::CFGBlock *>> {
+  using nodes_iterator = ::clang::CFG::iterator;
 
   static NodeRef getEntryNode(::clang::CFG *F) { return &F->getExit(); }
   static nodes_iterator nodes_begin( ::clang::CFG* F) {return F->nodes_begin();}
   static nodes_iterator nodes_end( ::clang::CFG* F) { return F->nodes_end(); }
 };
 
-template <> struct GraphTraits<Inverse<const ::clang::CFG*> >
-  : public GraphTraits<Inverse<const ::clang::CFGBlock*> > {
-
-  typedef ::clang::CFG::const_iterator nodes_iterator;
+template <> struct GraphTraits<Inverse<const ::clang::CFG *>>
+  : public GraphTraits<Inverse<const ::clang::CFGBlock *>> {
+  using nodes_iterator = ::clang::CFG::const_iterator;
 
   static NodeRef getEntryNode(const ::clang::CFG *F) { return &F->getExit(); }
+
   static nodes_iterator nodes_begin(const ::clang::CFG* F) {
     return F->nodes_begin();
   }
+
   static nodes_iterator nodes_end(const ::clang::CFG* F) {
     return F->nodes_end();
   }
 };
-} // end llvm namespace
+
+} // namespace llvm
 
 #endif // LLVM_CLANG_ANALYSIS_CFG_H
diff --git a/include/clang/Analysis/CallGraph.h b/include/clang/Analysis/CallGraph.h
index a2a27a8..bdcdfec 100644
--- a/include/clang/Analysis/CallGraph.h
+++ b/include/clang/Analysis/CallGraph.h
@@ -1,4 +1,4 @@
-//== CallGraph.h - AST-based Call graph  ------------------------*- C++ -*--==//
+//===- CallGraph.h - AST-based Call graph -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,19 +12,27 @@
 //  A call graph for functions whose definitions/bodies are available in the
 //  current translation unit. The graph has a "virtual" root node that contains
 //  edges to all externally available functions.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_ANALYSIS_CALLGRAPH_H
 #define LLVM_CLANG_ANALYSIS_CALLGRAPH_H
 
-#include "clang/AST/DeclBase.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include <memory>
 
 namespace clang {
+
 class CallGraphNode;
+class Decl;
+class DeclContext;
+class Stmt;
 
 /// \brief The AST-based call graph.
 ///
@@ -34,8 +42,8 @@
 class CallGraph : public RecursiveASTVisitor<CallGraph> {
   friend class CallGraphNode;
 
-  typedef llvm::DenseMap<const Decl *, std::unique_ptr<CallGraphNode>>
-      FunctionMapTy;
+  using FunctionMapTy =
+      llvm::DenseMap<const Decl *, std::unique_ptr<CallGraphNode>>;
 
   /// FunctionMap owns all CallGraphNodes.
   FunctionMapTy FunctionMap;
@@ -65,10 +73,11 @@
   /// one into the graph.
   CallGraphNode *getOrInsertNode(Decl *);
 
+  using iterator = FunctionMapTy::iterator;
+  using const_iterator = FunctionMapTy::const_iterator;
+
   /// Iterators through all the elements in the graph. Note, this gives
   /// non-deterministic order.
-  typedef FunctionMapTy::iterator iterator;
-  typedef FunctionMapTy::const_iterator const_iterator;
   iterator begin() { return FunctionMap.begin(); }
   iterator end()   { return FunctionMap.end();   }
   const_iterator begin() const { return FunctionMap.begin(); }
@@ -84,8 +93,8 @@
   /// Iterators through all the nodes of the graph that have no parent. These
   /// are the unreachable nodes, which are either unused or are due to us
   /// failing to add a call edge due to the analysis imprecision.
-  typedef llvm::SetVector<CallGraphNode *>::iterator nodes_iterator;
-  typedef llvm::SetVector<CallGraphNode *>::const_iterator const_nodes_iterator;
+  using nodes_iterator = llvm::SetVector<CallGraphNode *>::iterator;
+  using const_nodes_iterator = llvm::SetVector<CallGraphNode *>::const_iterator;
 
   void print(raw_ostream &os) const;
   void dump() const;
@@ -133,7 +142,7 @@
 
 class CallGraphNode {
 public:
-  typedef CallGraphNode* CallRecord;
+  using CallRecord = CallGraphNode *;
 
 private:
   /// \brief The function/method declaration.
@@ -145,17 +154,17 @@
 public:
   CallGraphNode(Decl *D) : FD(D) {}
 
-  typedef SmallVectorImpl<CallRecord>::iterator iterator;
-  typedef SmallVectorImpl<CallRecord>::const_iterator const_iterator;
+  using iterator = SmallVectorImpl<CallRecord>::iterator;
+  using const_iterator = SmallVectorImpl<CallRecord>::const_iterator;
 
   /// Iterators through all the callees/children of the node.
-  inline iterator begin() { return CalledFunctions.begin(); }
-  inline iterator end()   { return CalledFunctions.end(); }
-  inline const_iterator begin() const { return CalledFunctions.begin(); }
-  inline const_iterator end()   const { return CalledFunctions.end();   }
+  iterator begin() { return CalledFunctions.begin(); }
+  iterator end() { return CalledFunctions.end(); }
+  const_iterator begin() const { return CalledFunctions.begin(); }
+  const_iterator end() const { return CalledFunctions.end(); }
 
-  inline bool empty() const {return CalledFunctions.empty(); }
-  inline unsigned size() const {return CalledFunctions.size(); }
+  bool empty() const { return CalledFunctions.empty(); }
+  unsigned size() const { return CalledFunctions.size(); }
 
   void addCallee(CallGraphNode *N) {
     CalledFunctions.push_back(N);
@@ -167,35 +176,33 @@
   void dump() const;
 };
 
-} // end clang namespace
+} // namespace clang
 
 // Graph traits for iteration, viewing.
 namespace llvm {
+
 template <> struct GraphTraits<clang::CallGraphNode*> {
-  typedef clang::CallGraphNode NodeType;
-  typedef clang::CallGraphNode *NodeRef;
-  typedef NodeType::iterator ChildIteratorType;
+  using NodeType = clang::CallGraphNode;
+  using NodeRef = clang::CallGraphNode *;
+  using ChildIteratorType = NodeType::iterator;
 
   static NodeType *getEntryNode(clang::CallGraphNode *CGN) { return CGN; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static ChildIteratorType child_begin(NodeType *N) { return N->begin();  }
+  static ChildIteratorType child_end(NodeType *N) { return N->end(); }
 };
 
 template <> struct GraphTraits<const clang::CallGraphNode*> {
-  typedef const clang::CallGraphNode NodeType;
-  typedef const clang::CallGraphNode *NodeRef;
-  typedef NodeType::const_iterator ChildIteratorType;
+  using NodeType = const clang::CallGraphNode;
+  using NodeRef = const clang::CallGraphNode *;
+  using ChildIteratorType = NodeType::const_iterator;
 
   static NodeType *getEntryNode(const clang::CallGraphNode *CGN) { return CGN; }
-  static inline ChildIteratorType child_begin(NodeType *N) { return N->begin();}
-  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static ChildIteratorType child_begin(NodeType *N) { return N->begin();}
+  static ChildIteratorType child_end(NodeType *N) { return N->end(); }
 };
 
 template <> struct GraphTraits<clang::CallGraph*>
   : public GraphTraits<clang::CallGraphNode*> {
-
   static NodeType *getEntryNode(clang::CallGraph *CGN) {
     return CGN->getRoot();  // Start at the external node!
   }
@@ -206,19 +213,18 @@
   }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef mapped_iterator<clang::CallGraph::iterator, decltype(&CGGetValue)>
-      nodes_iterator;
+  using nodes_iterator =
+      mapped_iterator<clang::CallGraph::iterator, decltype(&CGGetValue)>;
 
   static nodes_iterator nodes_begin(clang::CallGraph *CG) {
     return nodes_iterator(CG->begin(), &CGGetValue);
   }
+
   static nodes_iterator nodes_end  (clang::CallGraph *CG) {
     return nodes_iterator(CG->end(), &CGGetValue);
   }
 
-  static unsigned size(clang::CallGraph *CG) {
-    return CG->size();
-  }
+  static unsigned size(clang::CallGraph *CG) { return CG->size(); }
 };
 
 template <> struct GraphTraits<const clang::CallGraph*> :
@@ -233,21 +239,20 @@
   }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef mapped_iterator<clang::CallGraph::const_iterator,
-                          decltype(&CGGetValue)>
-      nodes_iterator;
+  using nodes_iterator =
+      mapped_iterator<clang::CallGraph::const_iterator, decltype(&CGGetValue)>;
 
   static nodes_iterator nodes_begin(const clang::CallGraph *CG) {
     return nodes_iterator(CG->begin(), &CGGetValue);
   }
+
   static nodes_iterator nodes_end(const clang::CallGraph *CG) {
     return nodes_iterator(CG->end(), &CGGetValue);
   }
-  static unsigned size(const clang::CallGraph *CG) {
-    return CG->size();
-  }
+
+  static unsigned size(const clang::CallGraph *CG) { return CG->size(); }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_ANALYSIS_CALLGRAPH_H
diff --git a/include/clang/Analysis/CloneDetection.h b/include/clang/Analysis/CloneDetection.h
index 051b923..5ef4457 100644
--- a/include/clang/Analysis/CloneDetection.h
+++ b/include/clang/Analysis/CloneDetection.h
@@ -351,7 +351,7 @@
 /// Analyzes the pattern of the referenced variables in a statement.
 class VariablePattern {
 
-  /// Describes an occurence of a variable reference in a statement.
+  /// Describes an occurrence of a variable reference in a statement.
   struct VariableOccurence {
     /// The index of the associated VarDecl in the Variables vector.
     size_t KindID;
@@ -362,7 +362,7 @@
         : KindID(KindID), Mention(Mention) {}
   };
 
-  /// All occurences of referenced variables in the order of appearance.
+  /// All occurrences of referenced variables in the order of appearance.
   std::vector<VariableOccurence> Occurences;
   /// List of referenced variables in the order of appearance.
   /// Every item in this list is unique.
diff --git a/include/clang/Analysis/ConstructionContext.h b/include/clang/Analysis/ConstructionContext.h
new file mode 100644
index 0000000..770f1c9
--- /dev/null
+++ b/include/clang/Analysis/ConstructionContext.h
@@ -0,0 +1,245 @@
+//===- ConstructionContext.h - CFG constructor information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ConstructionContext class and its sub-classes,
+// which represent various different ways of constructing C++ objects
+// with the additional information the users may want to know about
+// the constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_CONSTRUCTIONCONTEXT_H
+#define LLVM_CLANG_ANALYSIS_CONSTRUCTIONCONTEXT_H
+
+#include "clang/Analysis/Support/BumpVector.h"
+#include "clang/AST/ExprCXX.h"
+
+namespace clang {
+
+/// Construction context is a linked list of multiple layers. Layers are
+/// created gradually while traversing the AST, and layers that represent
+/// the outmost AST nodes are built first, while the node that immediately
+/// contains the constructor would be built last and capture the previous
+/// layers as its parents. Construction context captures the last layer
+/// (which has links to the previous layers) and classifies the seemingly
+/// arbitrary chain of layers into one of the possible ways of constructing
+/// an object in C++ for user-friendly experience.
+class ConstructionContextLayer {
+public:
+  typedef llvm::PointerUnion<Stmt *, CXXCtorInitializer *> TriggerTy;
+
+private:
+  /// The construction site - the statement that triggered the construction
+  /// for one of its parts. For instance, stack variable declaration statement
+  /// triggers construction of itself or its elements if it's an array,
+  /// new-expression triggers construction of the newly allocated object(s).
+  TriggerTy Trigger;
+
+  /// Sometimes a single trigger is not enough to describe the construction
+  /// site. In this case we'd have a chain of "partial" construction context
+  /// layers.
+  /// Some examples:
+  /// - A constructor within in an aggregate initializer list within a variable
+  ///   would have a construction context of the initializer list with
+  ///   the parent construction context of a variable.
+  /// - A constructor for a temporary that needs to be both destroyed
+  ///   and materialized into an elidable copy constructor would have a
+  ///   construction context of a CXXBindTemporaryExpr with the parent
+  ///   construction context of a MaterializeTemproraryExpr.
+  /// Not all of these are currently supported.
+  const ConstructionContextLayer *Parent = nullptr;
+
+  ConstructionContextLayer(TriggerTy Trigger,
+                          const ConstructionContextLayer *Parent)
+      : Trigger(Trigger), Parent(Parent) {}
+
+public:
+  static const ConstructionContextLayer *
+  create(BumpVectorContext &C, TriggerTy Trigger,
+         const ConstructionContextLayer *Parent = nullptr);
+
+  const ConstructionContextLayer *getParent() const { return Parent; }
+  bool isLast() const { return !Parent; }
+
+  const Stmt *getTriggerStmt() const {
+    return Trigger.dyn_cast<Stmt *>();
+  }
+
+  const CXXCtorInitializer *getTriggerInit() const {
+    return Trigger.dyn_cast<CXXCtorInitializer *>();
+  }
+
+  /// Returns true if these layers are equal as individual layers, even if
+  /// their parents are different.
+  bool isSameLayer(const ConstructionContextLayer *Other) const {
+    assert(Other);
+    return (Trigger == Other->Trigger);
+  }
+
+  /// See if Other is a proper initial segment of this construction context
+  /// in terms of the parent chain - i.e. a few first parents coincide and
+  /// then the other context terminates but our context goes further - i.e.,
+  /// we are providing the same context that the other context provides,
+  /// and a bit more above that.
+  bool isStrictlyMoreSpecificThan(const ConstructionContextLayer *Other) const;
+};
+
+
+/// ConstructionContext's subclasses describe different ways of constructing
+/// an object in C++. The context re-captures the essential parent AST nodes
+/// of the CXXConstructExpr it is assigned to and presents these nodes
+/// through easy-to-understand accessor methods.
+class ConstructionContext {
+public:
+  enum Kind {
+    SimpleVariableKind,
+    ConstructorInitializerKind,
+    NewAllocatedObjectKind,
+    TemporaryObjectKind,
+    ReturnedValueKind
+  };
+
+protected:
+  Kind K;
+
+protected:
+  // Do not make public! These need to only be constructed
+  // via createFromLayers().
+  explicit ConstructionContext(Kind K) : K(K) {}
+
+public:
+
+  /// Consume the construction context layer, together with its parent layers,
+  /// and wrap it up into a complete construction context.
+  static const ConstructionContext *
+  createFromLayers(BumpVectorContext &C,
+                   const ConstructionContextLayer *TopLayer);
+
+  Kind getKind() const { return K; }
+};
+
+/// Represents construction into a simple local variable, eg. T var(123);.
+class SimpleVariableConstructionContext : public ConstructionContext {
+  const DeclStmt *DS;
+
+public:
+  explicit SimpleVariableConstructionContext(const DeclStmt *DS)
+      : ConstructionContext(ConstructionContext::SimpleVariableKind), DS(DS) {
+    assert(DS);
+  }
+
+  const DeclStmt *getDeclStmt() const { return DS; }
+
+  static bool classof(const ConstructionContext *CC) {
+    return CC->getKind() == SimpleVariableKind;
+  }
+};
+
+/// Represents construction into a field or a base class within a bigger object
+/// via a constructor initializer, eg. T(): field(123) { ... }.
+class ConstructorInitializerConstructionContext : public ConstructionContext {
+  const CXXCtorInitializer *I;
+
+public:
+  explicit ConstructorInitializerConstructionContext(
+      const CXXCtorInitializer *I)
+      : ConstructionContext(ConstructionContext::ConstructorInitializerKind),
+        I(I) {
+    assert(I);
+  }
+
+  const CXXCtorInitializer *getCXXCtorInitializer() const { return I; }
+
+  static bool classof(const ConstructionContext *CC) {
+    return CC->getKind() == ConstructorInitializerKind;
+  }
+};
+
+/// Represents immediate initialization of memory allocated by operator new,
+/// eg. new T(123);.
+class NewAllocatedObjectConstructionContext : public ConstructionContext {
+  const CXXNewExpr *NE;
+
+public:
+  explicit NewAllocatedObjectConstructionContext(const CXXNewExpr *NE)
+      : ConstructionContext(ConstructionContext::NewAllocatedObjectKind),
+        NE(NE) {
+    assert(NE);
+  }
+
+  const CXXNewExpr *getCXXNewExpr() const { return NE; }
+
+  static bool classof(const ConstructionContext *CC) {
+    return CC->getKind() == NewAllocatedObjectKind;
+  }
+};
+
+/// Represents a temporary object, eg. T(123), that does not immediately cross
+/// function boundaries "by value"; constructors that construct function
+/// value-type arguments or values that are immediately returned from the
+/// function that returns a value receive separate construction context kinds.
+class TemporaryObjectConstructionContext : public ConstructionContext {
+  const CXXBindTemporaryExpr *BTE;
+  const MaterializeTemporaryExpr *MTE;
+
+public:
+  explicit TemporaryObjectConstructionContext(
+      const CXXBindTemporaryExpr *BTE, const MaterializeTemporaryExpr *MTE)
+      : ConstructionContext(ConstructionContext::TemporaryObjectKind),
+        BTE(BTE), MTE(MTE) {
+    // Both BTE and MTE can be null here, all combinations possible.
+    // Even though for now at least one should be non-null, we simply haven't
+    // implemented this case yet (this would be a temporary in the middle of
+    // nowhere that doesn't have a non-trivial destructor).
+  }
+
+  /// CXXBindTemporaryExpr here is non-null as long as the temporary has
+  /// a non-trivial destructor.
+  const CXXBindTemporaryExpr *getCXXBindTemporaryExpr() const {
+    return BTE;
+  }
+
+  /// MaterializeTemporaryExpr is non-null as long as the temporary is actually
+  /// used after construction, eg. by binding to a reference (lifetime
+  /// extension), accessing a field, calling a method, or passing it into
+  /// a function (an elidable copy or move constructor would be a common
+  /// example) by reference.
+  const MaterializeTemporaryExpr *getMaterializedTemporaryExpr() const {
+    return MTE;
+  }
+
+  static bool classof(const ConstructionContext *CC) {
+    return CC->getKind() == TemporaryObjectKind;
+  }
+};
+
+/// Represents a temporary object that is being immediately returned from a
+/// function by value, eg. return t; or return T(123);. In this case there is
+/// always going to be a constructor at the return site. However, the usual
+/// temporary-related bureaucracy (CXXBindTemporaryExpr,
+/// MaterializeTemporaryExpr) is normally located in the caller function's AST.
+class ReturnedValueConstructionContext : public ConstructionContext {
+  const ReturnStmt *RS;
+
+public:
+  explicit ReturnedValueConstructionContext(const ReturnStmt *RS)
+      : ConstructionContext(ConstructionContext::ReturnedValueKind), RS(RS) {
+    assert(RS);
+  }
+
+  const ReturnStmt *getReturnStmt() const { return RS; }
+
+  static bool classof(const ConstructionContext *CC) {
+    return CC->getKind() == ReturnedValueKind;
+  }
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_CONSTRUCTIONCONTEXT_H
diff --git a/include/clang/Analysis/ProgramPoint.h b/include/clang/Analysis/ProgramPoint.h
index 2d59dec..f7bd1be 100644
--- a/include/clang/Analysis/ProgramPoint.h
+++ b/include/clang/Analysis/ProgramPoint.h
@@ -73,8 +73,9 @@
               PostStoreKind,
               PostConditionKind,
               PostLValueKind,
+              PostAllocatorCallKind,
               MinPostStmtKind = PostStmtKind,
-              MaxPostStmtKind = PostLValueKind,
+              MaxPostStmtKind = PostAllocatorCallKind,
               PostInitializerKind,
               CallEnterKind,
               CallExitBeginKind,
@@ -97,7 +98,7 @@
   llvm::PointerIntPair<const ProgramPointTag *, 2, unsigned> Tag;
 
 protected:
-  ProgramPoint() {}
+  ProgramPoint() = default;
   ProgramPoint(const void *P,
                Kind k,
                const LocationContext *l,
@@ -233,7 +234,7 @@
   
 private:
   friend class ProgramPoint;
-  BlockEntrance() {}
+  BlockEntrance() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == BlockEntranceKind;
   }
@@ -254,7 +255,7 @@
 
 private:
   friend class ProgramPoint;
-  BlockExit() {}
+  BlockExit() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == BlockExitKind;
   }
@@ -274,7 +275,7 @@
   const T* getStmtAs() const { return dyn_cast<T>(getStmt()); }
 
 protected:
-  StmtPoint() {}
+  StmtPoint() = default;
 private:
   friend class ProgramPoint;
   static bool isKind(const ProgramPoint &Location) {
@@ -294,7 +295,7 @@
 
 private:
   friend class ProgramPoint;
-  PreStmt() {}
+  PreStmt() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PreStmtKind;
   }
@@ -302,7 +303,7 @@
 
 class PostStmt : public StmtPoint {
 protected:
-  PostStmt() {}
+  PostStmt() = default;
   PostStmt(const Stmt *S, const void *data, Kind k, const LocationContext *L,
            const ProgramPointTag *tag = nullptr)
     : StmtPoint(S, data, k, L, tag) {}
@@ -333,7 +334,7 @@
 
 private:
   friend class ProgramPoint;
-  PostCondition() {}
+  PostCondition() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostConditionKind;
   }
@@ -341,7 +342,7 @@
 
 class LocationCheck : public StmtPoint {
 protected:
-  LocationCheck() {}
+  LocationCheck() = default;
   LocationCheck(const Stmt *S, const LocationContext *L,
                 ProgramPoint::Kind K, const ProgramPointTag *tag)
     : StmtPoint(S, nullptr, K, L, tag) {}
@@ -362,7 +363,7 @@
   
 private:
   friend class ProgramPoint;
-  PreLoad() {}
+  PreLoad() = default;
   static bool isKind(const ProgramPoint &location) {
     return location.getKind() == PreLoadKind;
   }
@@ -376,7 +377,7 @@
   
 private:
   friend class ProgramPoint;
-  PreStore() {}
+  PreStore() = default;
   static bool isKind(const ProgramPoint &location) {
     return location.getKind() == PreStoreKind;
   }
@@ -390,7 +391,7 @@
 
 private:
   friend class ProgramPoint;
-  PostLoad() {}
+  PostLoad() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostLoadKind;
   }
@@ -417,7 +418,7 @@
 
 private:
   friend class ProgramPoint;
-  PostStore() {}
+  PostStore() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostStoreKind;
   }
@@ -431,7 +432,7 @@
 
 private:
   friend class ProgramPoint;
-  PostLValue() {}
+  PostLValue() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostLValueKind;
   }
@@ -447,7 +448,7 @@
 
 private:
   friend class ProgramPoint;
-  PreStmtPurgeDeadSymbols() {}
+  PreStmtPurgeDeadSymbols() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PreStmtPurgeDeadSymbolsKind;
   }
@@ -463,7 +464,7 @@
 
 private:
   friend class ProgramPoint;
-  PostStmtPurgeDeadSymbols() {}
+  PostStmtPurgeDeadSymbols() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostStmtPurgeDeadSymbolsKind;
   }
@@ -487,7 +488,7 @@
 
 private:
   friend class ProgramPoint;
-  BlockEdge() {}
+  BlockEdge() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == BlockEdgeKind;
   }
@@ -516,7 +517,7 @@
 
 private:
   friend class ProgramPoint;
-  PostInitializer() {}
+  PostInitializer() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostInitializerKind;
   }
@@ -537,7 +538,7 @@
   }
 
 protected:
-  ImplicitCallPoint() {}
+  ImplicitCallPoint() = default;
 private:
   friend class ProgramPoint;
   static bool isKind(const ProgramPoint &Location) {
@@ -557,7 +558,7 @@
 
 private:
   friend class ProgramPoint;
-  PreImplicitCall() {}
+  PreImplicitCall() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PreImplicitCallKind;
   }
@@ -574,12 +575,26 @@
 
 private:
   friend class ProgramPoint;
-  PostImplicitCall() {}
+  PostImplicitCall() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == PostImplicitCallKind;
   }
 };
 
+class PostAllocatorCall : public StmtPoint {
+public:
+  PostAllocatorCall(const Stmt *S, const LocationContext *L,
+                    const ProgramPointTag *Tag = nullptr)
+      : StmtPoint(S, nullptr, PostAllocatorCallKind, L, Tag) {}
+
+private:
+  friend class ProgramPoint;
+  PostAllocatorCall() = default;
+  static bool isKind(const ProgramPoint &Location) {
+    return Location.getKind() == PostAllocatorCallKind;
+  }
+};
+
 /// Represents a point when we begin processing an inlined call.
 /// CallEnter uses the caller's location context.
 class CallEnter : public ProgramPoint {
@@ -605,7 +620,7 @@
 
 private:
   friend class ProgramPoint;
-  CallEnter() {}
+  CallEnter() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == CallEnterKind;
   }
@@ -626,9 +641,13 @@
   CallExitBegin(const StackFrameContext *L, const ReturnStmt *RS)
     : ProgramPoint(RS, CallExitBeginKind, L, nullptr) { }
 
+  const ReturnStmt *getReturnStmt() const {
+    return static_cast<const ReturnStmt *>(getData1());
+  }
+
 private:
   friend class ProgramPoint;
-  CallExitBegin() {}
+  CallExitBegin() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == CallExitBeginKind;
   }
@@ -649,7 +668,7 @@
 
 private:
   friend class ProgramPoint;
-  CallExitEnd() {}
+  CallExitEnd() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == CallExitEndKind;
   }
@@ -672,7 +691,7 @@
 
 private:
     friend class ProgramPoint;
-    LoopExit() {}
+    LoopExit() = default;
     static bool isKind(const ProgramPoint &Location) {
       return Location.getKind() == LoopExitKind;
     }
@@ -691,7 +710,7 @@
 
 private:
   friend class ProgramPoint;
-  EpsilonPoint() {}
+  EpsilonPoint() = default;
   static bool isKind(const ProgramPoint &Location) {
     return Location.getKind() == EpsilonKind;
   }
diff --git a/include/clang/Analysis/Support/BumpVector.h b/include/clang/Analysis/Support/BumpVector.h
index 591d17b..5940520 100644
--- a/include/clang/Analysis/Support/BumpVector.h
+++ b/include/clang/Analysis/Support/BumpVector.h
@@ -1,4 +1,4 @@
-//===-- BumpVector.h - Vector-like ADT that uses bump allocation --*- C++ -*-=//
+//===- BumpVector.h - Vector-like ADT that uses bump allocation -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,16 +21,18 @@
 
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/type_traits.h"
-#include <algorithm>
+#include <cassert>
+#include <cstddef>
 #include <cstring>
 #include <iterator>
 #include <memory>
+#include <type_traits>
 
 namespace clang {
   
 class BumpVectorContext {
   llvm::PointerIntPair<llvm::BumpPtrAllocator*, 1> Alloc;
+
 public:
   /// Construct a new BumpVectorContext that creates a new BumpPtrAllocator
   /// and destroys it when the BumpVectorContext object is destroyed.
@@ -56,11 +58,13 @@
   
 template<typename T>
 class BumpVector {
-  T *Begin, *End, *Capacity;
+  T *Begin = nullptr;
+  T *End = nullptr;
+  T *Capacity = nullptr;
+
 public:
   // Default ctor - Initialize to empty.
-  explicit BumpVector(BumpVectorContext &C, unsigned N)
-  : Begin(nullptr), End(nullptr), Capacity(nullptr) {
+  explicit BumpVector(BumpVectorContext &C, unsigned N) {
     reserve(C, N);
   }
   
@@ -71,19 +75,19 @@
     }
   }
   
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef T value_type;
-  typedef T* iterator;
-  typedef const T* const_iterator;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using value_type = T;
+  using iterator = T *;
+  using const_iterator = const T *;
   
-  typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>  reverse_iterator;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
   
-  typedef T& reference;
-  typedef const T& const_reference;
-  typedef T* pointer;
-  typedef const T* const_pointer;
+  using reference = T &;
+  using const_reference = const T &;
+  using pointer = T *;
+  using const_pointer = const T *;
   
   // forward iterator creation methods.
   iterator begin() { return Begin; }
@@ -92,10 +96,12 @@
   const_iterator end() const { return End; }
   
   // reverse iterator creation methods.
-  reverse_iterator rbegin()            { return reverse_iterator(end()); }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
   const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
-  reverse_iterator rend()              { return reverse_iterator(begin()); }
-  const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
     
   bool empty() const { return Begin == End; }
   size_type size() const { return End-Begin; }
@@ -166,7 +172,7 @@
   /// iterator to position after last inserted copy.
   iterator insert(iterator I, size_t Cnt, const_reference E,
       BumpVectorContext &C) {
-    assert (I >= Begin && I <= End && "Iterator out of bounds.");
+    assert(I >= Begin && I <= End && "Iterator out of bounds.");
     if (End + Cnt <= Capacity) {
     Retry:
       move_range_right(I, End, Cnt);
@@ -246,5 +252,6 @@
   Capacity = Begin+NewCapacity;
 }
 
-} // end: clang namespace
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_SUPPORT_BUMPVECTOR_H
diff --git a/include/clang/Basic/AddressSpaces.h b/include/clang/Basic/AddressSpaces.h
index 6b00908..67019f6 100644
--- a/include/clang/Basic/AddressSpaces.h
+++ b/include/clang/Basic/AddressSpaces.h
@@ -24,7 +24,7 @@
 /// of QualType.
 ///
 enum class LangAS : unsigned {
-  // The default value 0 is the value used in QualType for the the situation
+  // The default value 0 is the value used in QualType for the situation
   // where there is no address space qualifier.
   Default = 0,
 
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index af3e716..92d1dd6 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -69,44 +69,53 @@
 //
 // The code fragment is a boolean expression that will confirm that the subject
 // meets the requirements; the subject will have the name S, and will have the
-// type specified by the base. It should be a simple boolean expression.
-class SubsetSubject<AttrSubject base, code check> : AttrSubject {
+// type specified by the base. It should be a simple boolean expression. The
+// diagnostic string should be a comma-separated list of subject names.
+class SubsetSubject<AttrSubject base, code check, string diag> : AttrSubject {
   AttrSubject Base = base;
   code CheckCode = check;
+  string DiagSpelling = diag;
 }
 
-// This is the type of a variable which C++11 allows alignas(...) to appertain
-// to.
-def NormalVar : SubsetSubject<Var,
-                              [{S->getStorageClass() != VarDecl::Register &&
-                                S->getKind() != Decl::ImplicitParam &&
-                                S->getKind() != Decl::ParmVar &&
-                                S->getKind() != Decl::NonTypeTemplateParm}]>;
+def LocalVar : SubsetSubject<Var,
+                              [{S->hasLocalStorage() && !isa<ParmVarDecl>(S)}],
+                              "local variables">;
 def NonParmVar : SubsetSubject<Var,
-                               [{S->getKind() != Decl::ParmVar}]>;
+                               [{S->getKind() != Decl::ParmVar}],
+                               "variables">;
+def NonLocalVar : SubsetSubject<Var,
+                                [{!S->hasLocalStorage()}],
+                                "variables with non-local storage">;
 def NonBitField : SubsetSubject<Field,
-                                [{!S->isBitField()}]>;
+                                [{!S->isBitField()}],
+                                "non-bit-field non-static data members">;
 
 def ObjCInstanceMethod : SubsetSubject<ObjCMethod,
-                                       [{S->isInstanceMethod()}]>;
+                                       [{S->isInstanceMethod()}],
+                                       "Objective-C instance methods">;
 
 def ObjCInterfaceDeclInitMethod : SubsetSubject<ObjCMethod,
                                [{S->getMethodFamily() == OMF_init &&
                                  (isa<ObjCInterfaceDecl>(S->getDeclContext()) ||
                                   (isa<ObjCCategoryDecl>(S->getDeclContext()) &&
-            cast<ObjCCategoryDecl>(S->getDeclContext())->IsClassExtension()))}]>;
+            cast<ObjCCategoryDecl>(S->getDeclContext())->IsClassExtension()))}],
+            "init methods of interface or class extension declarations">;
 
 def Struct : SubsetSubject<Record,
-                           [{!S->isUnion()}]>;
+                           [{!S->isUnion()}], "structs">;
 
 def TLSVar : SubsetSubject<Var,
-                           [{S->getTLSKind() != 0}]>;
+                           [{S->getTLSKind() != 0}], "thread-local variables">;
 
 def SharedVar : SubsetSubject<Var,
-                              [{S->hasGlobalStorage() && !S->getTLSKind()}]>;
+                              [{S->hasGlobalStorage() && !S->getTLSKind()}],
+                              "global variables">;
 
 def GlobalVar : SubsetSubject<Var,
-                             [{S->hasGlobalStorage()}]>;
+                             [{S->hasGlobalStorage()}], "global variables">;
+
+def InlineFunction : SubsetSubject<Function,
+                             [{S->isInlineSpecified()}], "inline functions">;
 
 // FIXME: this hack is needed because DeclNodes.td defines the base Decl node
 // type to be a class, not a definition. This makes it impossible to create an
@@ -115,11 +124,12 @@
 // the case of a SubsetSubject, there's no way to express it without this hack.
 def DeclBase : AttrSubject;
 def FunctionLike : SubsetSubject<DeclBase,
-                                  [{S->getFunctionType(false) != nullptr}]>;
+                                 [{S->getFunctionType(false) != nullptr}],
+                                 "functions, function pointers">;
 
-def OpenCLKernelFunction : SubsetSubject<Function, [{
-  S->hasAttr<OpenCLKernelAttr>()
-}]>;
+def OpenCLKernelFunction
+    : SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}],
+                    "kernel functions">;
 
 // HasFunctionProto is a more strict version of FunctionLike, so it should
 // never be specified in a Subjects list along with FunctionLike (due to the
@@ -128,7 +138,8 @@
                                      [{(S->getFunctionType(true) != nullptr &&
                               isa<FunctionProtoType>(S->getFunctionType())) ||
                                        isa<ObjCMethodDecl>(S) ||
-                                       isa<BlockDecl>(S)}]>;
+                                       isa<BlockDecl>(S)}],
+                                     "non-K&R-style functions">;
 
 // A single argument to an attribute
 class Argument<string name, bit optional, bit fake = 0> {
@@ -158,6 +169,12 @@
 class VariadicExprArgument<string name> : Argument<name, 1>;
 class VariadicStringArgument<string name> : Argument<name, 1>;
 
+// Like VariadicUnsignedArgument except values are ParamIdx.
+class VariadicParamIdxArgument<string name> : Argument<name, 1>;
+
+// Like VariadicParamIdxArgument but for a single function parameter index.
+class ParamIdxArgument<string name, bit opt = 0> : Argument<name, opt>;
+
 // A version of the form major.minor[.subminor].
 class VersionArgument<string name, bit opt = 0> : Argument<name, opt>;
 
@@ -219,13 +236,20 @@
   string Namespace = namespace;
 }
 
-// The GCC spelling implies GNU<name, "GNU"> and CXX11<"gnu", name> and also
-// sets KnownToGCC to 1. This spelling should be used for any GCC-compatible
+// The GCC spelling implies GNU<name> and CXX11<"gnu", name> and also sets
+// KnownToGCC to 1. This spelling should be used for any GCC-compatible
 // attributes.
 class GCC<string name> : Spelling<name, "GCC"> {
   let KnownToGCC = 1;
 }
 
+// The Clang spelling implies GNU<name>, CXX11<"clang", name>, and optionally,
+// C2x<"clang", name>. This spelling should be used for any Clang-specific
+// attributes.
+class Clang<string name, bit allowInC = 1> : Spelling<name, "Clang"> {
+  bit AllowInC = allowInC;
+}
+
 class Accessor<string name, list<Spelling> spellings> {
   string Name = name;
   list<Spelling> Spellings = spellings;
@@ -258,13 +282,24 @@
 def ObjC : LangOpt<"ObjC1">;
 def BlocksSupported : LangOpt<"Blocks">;
 
-// Defines targets for target-specific attributes. The list of strings should
-// specify architectures for which the target applies, based off the ArchType
-// enumeration in Triple.h.
-class TargetArch<list<string> arches> {
-  list<string> Arches = arches;
+// Defines targets for target-specific attributes. Empty lists are unchecked.
+class TargetSpec {
+  // Specifies Architectures for which the target applies, based off the
+  // ArchType enumeration in Triple.h.
+  list<string> Arches = [];
+  // Specifies Operating Systems for which the target applies, based off the
+  // OSType enumeration in Triple.h
   list<string> OSes;
+  // Specifies the C++ ABIs for which the target applies, based off the
+  // TargetCXXABI::Kind in TargetCXXABI.h.
   list<string> CXXABIs;
+  // Specifies Object Formats for which the target applies, based off the
+  // ObjectFormatType enumeration in Triple.h
+  list<string> ObjectFormats;
+}
+
+class TargetArch<list<string> arches> : TargetSpec {
+  let Arches = arches;
 }
 def TargetARM : TargetArch<["arm", "thumb", "armeb", "thumbeb"]>;
 def TargetAVR : TargetArch<["avr"]>;
@@ -279,6 +314,9 @@
 def TargetMicrosoftCXXABI : TargetArch<["x86", "x86_64", "arm", "thumb", "aarch64"]> {
   let CXXABIs = ["Microsoft"];
 }
+def TargetELF : TargetSpec {
+  let ObjectFormats = ["ELF"];
+}
 
 // Attribute subject match rules that are used for #pragma clang attribute.
 //
@@ -415,9 +453,6 @@
   // Set to true if all of the attribute's arguments should be parsed in an
   // unevaluated context.
   bit ParseArgumentsAsUnevaluated = 0;
-  // Set to true if this attribute can be duplicated on a subject when merging
-  // attributes. By default, attributes are not merged.
-  bit DuplicatesAllowedWhileMerging = 0;
   // Set to true if this attribute meaningful when applied to or inherited 
   // in a class template definition.
   bit MeaningfulToClassTemplateDefinition = 0;
@@ -452,12 +487,16 @@
 class StmtAttr : Attr;
 
 /// An inheritable attribute is inherited by later redeclarations.
-class InheritableAttr : Attr;
+class InheritableAttr : Attr {
+  // Set to true if this attribute can be duplicated on a subject when inheriting
+  // attributes from prior declarations.
+  bit InheritEvenIfAlreadyPresent = 0;
+}
 
 /// A target-specific attribute.  This class is meant to be used as a mixin
 /// with InheritableAttr or Attr depending on the attribute's needs.
-class TargetSpecificAttr<TargetArch target> {
-  TargetArch Target = target;
+class TargetSpecificAttr<TargetSpec target> {
+  TargetSpec Target = target;
   // Attributes are generally required to have unique spellings for their names
   // so that the parser can determine what kind of attribute it has parsed.
   // However, target-specific attributes are special in that the attribute only
@@ -498,14 +537,13 @@
 def AbiTag : Attr {
   let Spellings = [GCC<"abi_tag">];
   let Args = [VariadicStringArgument<"Tags">];
-  let Subjects = SubjectList<[Struct, Var, Function, Namespace], ErrorDiag,
-      "ExpectedStructClassVariableFunctionOrInlineNamespace">;
+  let Subjects = SubjectList<[Struct, Var, Function, Namespace], ErrorDiag>;
   let MeaningfulToClassTemplateDefinition = 1;
   let Documentation = [AbiTagsDocs];
 }
 
 def AddressSpace : TypeAttr {
-  let Spellings = [GNU<"address_space">];
+  let Spellings = [Clang<"address_space">];
   let Args = [IntArgument<"AddressSpace">];
   let Documentation = [Undocumented];
 }
@@ -513,15 +551,13 @@
 def Alias : Attr {
   let Spellings = [GCC<"alias">];
   let Args = [StringArgument<"Aliasee">];
-  let Subjects = SubjectList<[Function, GlobalVar], ErrorDiag,
-                             "ExpectedFunctionOrGlobalVar">;
+  let Subjects = SubjectList<[Function, GlobalVar], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def Aligned : InheritableAttr {
   let Spellings = [GCC<"aligned">, Declspec<"align">, Keyword<"alignas">,
                    Keyword<"_Alignas">];
-//  let Subjects = SubjectList<[NonBitField, NormalVar, Tag]>;
   let Args = [AlignedArgument<"Alignment", 1>];
   let Accessors = [Accessor<"isGNU", [GCC<"aligned">]>,
                    Accessor<"isC11", [Keyword<"_Alignas">]>,
@@ -539,13 +575,15 @@
     // the future (and a corresponding C++ attribute), but this can be done
     // later once we decide if we also want them to have slightly-different
     // semantics than Intel's align_value.
+    // 
+    // Does not get a [[]] spelling because the attribute is not exposed as such
+    // by Intel.
     GNU<"align_value">
     // Intel's compiler on Windows also supports:
     // , Declspec<"align_value">
   ];
   let Args = [ExprArgument<"Alignment">];
-  let Subjects = SubjectList<[Var, TypedefName], WarnDiag,
-                             "ExpectedVariableOrTypedef">;
+  let Subjects = SubjectList<[Var, TypedefName]>;
   let Documentation = [AlignValueDocs];
 }
 
@@ -562,45 +600,53 @@
   let Documentation = [Undocumented];
 }
 
+def Artificial : InheritableAttr {
+  let Spellings = [GCC<"artificial">];
+  let Subjects = SubjectList<[InlineFunction], WarnDiag>;
+  let Documentation = [ArtificialDocs];
+}
+
 def XRayInstrument : InheritableAttr {
-  let Spellings = [GNU<"xray_always_instrument">,
-                   CXX11<"clang", "xray_always_instrument">,
-                   GNU<"xray_never_instrument">,
-                   CXX11<"clang", "xray_never_instrument">];
-  let Subjects = SubjectList<[CXXMethod, ObjCMethod, Function], WarnDiag,
-                              "ExpectedFunctionOrMethod">;
+  let Spellings = [Clang<"xray_always_instrument">,
+                   Clang<"xray_never_instrument">];
+  let Subjects = SubjectList<[Function, ObjCMethod]>;
   let Accessors = [Accessor<"alwaysXRayInstrument",
-                     [GNU<"xray_always_instrument">,
-                      CXX11<"clang", "xray_always_instrument">]>,
+                     [Clang<"xray_always_instrument">]>,
                    Accessor<"neverXRayInstrument",
-                     [GNU<"xray_never_instrument">,
-                      CXX11<"clang", "xray_never_instrument">]>];
+                     [Clang<"xray_never_instrument">]>];
   let Documentation = [XRayDocs];
 }
 
 def XRayLogArgs : InheritableAttr {
-  let Spellings = [GNU<"xray_log_args">, CXX11<"clang", "xray_log_args">];
-  let Subjects = SubjectList<
-      [CXXMethod, ObjCMethod, Function], WarnDiag, "ExpectedFunctionOrMethod"
-  >;
+  let Spellings = [Clang<"xray_log_args">];
+  let Subjects = SubjectList<[Function, ObjCMethod]>;
+  // This argument is a count not an index, so it has the same encoding (base
+  // 1 including C++ implicit this parameter) at the source and LLVM levels of
+  // representation, so ParamIdxArgument is inappropriate.  It is never used
+  // at the AST level of representation, so it never needs to be adjusted not
+  // to include any C++ implicit this parameter.  Thus, we just store it and
+  // use it as an unsigned that never needs adjustment.
   let Args = [UnsignedArgument<"ArgumentCount">];
   let Documentation = [XRayDocs];
 }
 
 def TLSModel : InheritableAttr {
   let Spellings = [GCC<"tls_model">];
-  let Subjects = SubjectList<[TLSVar], ErrorDiag, "ExpectedTLSVar">;
+  let Subjects = SubjectList<[TLSVar], ErrorDiag>;
   let Args = [StringArgument<"Model">];
   let Documentation = [TLSModelDocs];
 }
 
 def AnalyzerNoReturn : InheritableAttr {
+  // TODO: should this attribute be exposed with a [[]] spelling under the clang
+  // vendor namespace, or should it use a vendor namespace specific to the
+  // analyzer?
   let Spellings = [GNU<"analyzer_noreturn">];
   let Documentation = [Undocumented];
 }
 
 def Annotate : InheritableParamAttr {
-  let Spellings = [GNU<"annotate">];
+  let Spellings = [Clang<"annotate">];
   let Args = [StringArgument<"Annotation">];
   // Ensure that the annotate attribute can be used with
   // '#pragma clang attribute' even though it has no subject list.
@@ -642,7 +688,7 @@
 }
 
 def Availability : InheritableAttr {
-  let Spellings = [GNU<"availability">];
+  let Spellings = [Clang<"availability">];
   let Args = [IdentifierArgument<"platform">, VersionArgument<"introduced">,
               VersionArgument<"deprecated">, VersionArgument<"obsoleted">,
               BoolArgument<"unavailable">, StringArgument<"message">,
@@ -686,14 +732,13 @@
              .Default(Platform);
 } }];
   let HasCustomParsing = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Named]>;
   let Documentation = [AvailabilityDocs];
 }
 
 def ExternalSourceSymbol : InheritableAttr {
-  let Spellings = [GNU<"external_source_symbol">,
-                   CXX11<"clang", "external_source_symbol">];
+  let Spellings = [Clang<"external_source_symbol">];
   let Args = [StringArgument<"language", 1>,
               StringArgument<"definedIn", 1>,
               BoolArgument<"generatedDeclaration", 1>];
@@ -703,12 +748,13 @@
 }
 
 def Blocks : InheritableAttr {
-  let Spellings = [GNU<"blocks">];
+  let Spellings = [Clang<"blocks">];
   let Args = [EnumArgument<"Type", "BlockType", ["byref"], ["ByRef"]>];
   let Documentation = [Undocumented];
 }
 
 def Bounded : IgnoredAttr {
+  // Does not have a [[]] spelling because the attribute is ignored.
   let Spellings = [GNU<"bounded">];
 }
 
@@ -730,7 +776,7 @@
 // cf_returns_retained attributes.  It is generally applied by
 // '#pragma clang arc_cf_code_audited' rather than explicitly.
 def CFAuditedTransfer : InheritableAttr {
-  let Spellings = [GNU<"cf_audited_transfer">];
+  let Spellings = [Clang<"cf_audited_transfer">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [Undocumented];
 }
@@ -739,25 +785,25 @@
 // It indicates that the function has unknown or unautomatable
 // transfer semantics.
 def CFUnknownTransfer : InheritableAttr {
-  let Spellings = [GNU<"cf_unknown_transfer">];
+  let Spellings = [Clang<"cf_unknown_transfer">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def CFReturnsRetained : InheritableAttr {
-  let Spellings = [GNU<"cf_returns_retained">];
+  let Spellings = [Clang<"cf_returns_retained">];
 //  let Subjects = SubjectList<[ObjCMethod, ObjCProperty, Function]>;
   let Documentation = [Undocumented];
 }
 
 def CFReturnsNotRetained : InheritableAttr {
-  let Spellings = [GNU<"cf_returns_not_retained">];
+  let Spellings = [Clang<"cf_returns_not_retained">];
 //  let Subjects = SubjectList<[ObjCMethod, ObjCProperty, Function]>;
   let Documentation = [Undocumented];
 }
 
 def CFConsumed : InheritableParamAttr {
-  let Spellings = [GNU<"cf_consumed">];
+  let Spellings = [Clang<"cf_consumed">];
   let Subjects = SubjectList<[ParmVar]>;
   let Documentation = [Undocumented];
 }
@@ -765,7 +811,7 @@
 def Cleanup : InheritableAttr {
   let Spellings = [GCC<"cleanup">];
   let Args = [FunctionArgument<"FunctionDecl">];
-  let Subjects = SubjectList<[Var]>;
+  let Subjects = SubjectList<[LocalVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -793,8 +839,8 @@
   let Documentation = [Undocumented];
 }
 
-// CUDA attributes are spelled __attribute__((attr)) or __declspec(__attr__).
-
+// CUDA attributes are spelled __attribute__((attr)) or __declspec(__attr__),
+// and they do not receive a [[]] spelling.
 def CUDAConstant : InheritableAttr {
   let Spellings = [GNU<"constant">, Declspec<"__constant__">];
   let Subjects = SubjectList<[Var]>;
@@ -856,8 +902,7 @@
   let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
   let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
   let LangOpts = [CUDA];
-  let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
-                             "ExpectedFunctionOrMethod">;
+  let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
   // An AST node is created for this attribute, but is not used by other parts
   // of the compiler. However, this node needs to exist in the AST because
   // non-LLVM backends may be relying on the attribute's presence.
@@ -879,11 +924,13 @@
 }
 
 def CXX11NoReturn : InheritableAttr {
-  let Spellings = [CXX11<"","noreturn", 200809>];
+  let Spellings = [CXX11<"", "noreturn", 200809>];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [CXX11NoReturnDocs];
 }
 
+// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because
+// the specification does not expose them with one currently.
 def OpenCLKernel : InheritableAttr {
   let Spellings = [Keyword<"__kernel">, Keyword<"kernel">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
@@ -909,8 +956,7 @@
   let Spellings = [Keyword<"__read_only">, Keyword<"read_only">,
                    Keyword<"__write_only">, Keyword<"write_only">,
                    Keyword<"__read_write">, Keyword<"read_write">];
-  let Subjects = SubjectList<[ParmVar, TypedefName], ErrorDiag,
-                             "ExpectedParameterOrTypedef">;
+  let Subjects = SubjectList<[ParmVar, TypedefName], ErrorDiag>;
   let Accessors = [Accessor<"isReadOnly", [Keyword<"__read_only">,
                                            Keyword<"read_only">]>,
                    Accessor<"isReadWrite", [Keyword<"__read_write">,
@@ -987,12 +1033,16 @@
 def AllocSize : InheritableAttr {
   let Spellings = [GCC<"alloc_size">];
   let Subjects = SubjectList<[Function]>;
-  let Args = [IntArgument<"ElemSizeParam">, IntArgument<"NumElemsParam", 1>];
+  let Args = [ParamIdxArgument<"ElemSizeParam">,
+              ParamIdxArgument<"NumElemsParam", /*opt*/ 1>];
   let TemplateDependent = 1;
   let Documentation = [AllocSizeDocs];
 }
 
 def EnableIf : InheritableAttr {
+  // Does not have a [[]] spelling because this attribute requires the ability
+  // to parse function arguments but the attribute is not written in the type
+  // position.
   let Spellings = [GNU<"enable_if">];
   let Subjects = SubjectList<[Function]>;
   let Args = [ExprArgument<"Cond">, StringArgument<"Message">];
@@ -1001,6 +1051,7 @@
 }
 
 def ExtVectorType : Attr {
+  // This is an OpenCL-related attribute and does not receive a [[]] spelling.
   let Spellings = [GNU<"ext_vector_type">];
   let Subjects = SubjectList<[TypedefName], ErrorDiag>;
   let Args = [ExprArgument<"NumElements">];
@@ -1035,20 +1086,19 @@
 }
 
 def MinSize : InheritableAttr {
-  let Spellings = [GNU<"minsize">];
+  let Spellings = [Clang<"minsize">];
   let Subjects = SubjectList<[Function, ObjCMethod], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def FlagEnum : InheritableAttr {
-  let Spellings = [GNU<"flag_enum">];
+  let Spellings = [Clang<"flag_enum">];
   let Subjects = SubjectList<[Enum]>;
   let Documentation = [FlagEnumDocs];
 }
 
 def EnumExtensibility : InheritableAttr {
-  let Spellings = [GNU<"enum_extensibility">,
-                   CXX11<"clang", "enum_extensibility">];
+  let Spellings = [Clang<"enum_extensibility">];
   let Subjects = SubjectList<[Enum]>;
   let Args = [EnumArgument<"Extensibility", "Kind",
               ["closed", "open"], ["Closed", "Open"]>];
@@ -1065,16 +1115,14 @@
   let Spellings = [GCC<"format">];
   let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">,
               IntArgument<"FirstArg">];
-  let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto], WarnDiag,
-                             "ExpectedFunctionWithProtoType">;
+  let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto]>;
   let Documentation = [FormatDocs];
 }
 
 def FormatArg : InheritableAttr {
   let Spellings = [GCC<"format_arg">];
-  let Args = [IntArgument<"FormatIdx">];
-  let Subjects = SubjectList<[ObjCMethod, HasFunctionProto], WarnDiag,
-                             "ExpectedFunctionWithProtoType">;
+  let Args = [ParamIdxArgument<"FormatIdx">];
+  let Subjects = SubjectList<[ObjCMethod, HasFunctionProto]>;
   let Documentation = [Undocumented];
 }
 
@@ -1093,9 +1141,8 @@
 }
 
 def IBAction : InheritableAttr {
-  let Spellings = [GNU<"ibaction">];
-  let Subjects = SubjectList<[ObjCInstanceMethod], WarnDiag,
-                             "ExpectedObjCInstanceMethod">;
+  let Spellings = [Clang<"ibaction">];
+  let Subjects = SubjectList<[ObjCInstanceMethod]>;
   // An AST node is created for this attribute, but is not used by other parts
   // of the compiler. However, this node needs to exist in the AST because
   // external tools rely on it.
@@ -1103,19 +1150,19 @@
 }
 
 def IBOutlet : InheritableAttr {
-  let Spellings = [GNU<"iboutlet">];
+  let Spellings = [Clang<"iboutlet">];
 //  let Subjects = [ObjCIvar, ObjCProperty];
   let Documentation = [Undocumented];
 }
 
 def IBOutletCollection : InheritableAttr {
-  let Spellings = [GNU<"iboutletcollection">];
+  let Spellings = [Clang<"iboutletcollection">];
   let Args = [TypeArgument<"Interface", 1>];
 //  let Subjects = [ObjCIvar, ObjCProperty];
   let Documentation = [Undocumented];
 }
 
-def IFunc : Attr {
+def IFunc : Attr, TargetSpecificAttr<TargetELF> {
   let Spellings = [GCC<"ifunc">];
   let Args = [StringArgument<"Resolver">];
   let Subjects = SubjectList<[Function]>;
@@ -1135,6 +1182,15 @@
   let Documentation = [LayoutVersionDocs];
 }
 
+def TrivialABI : InheritableAttr {
+  // This attribute does not have a C [[]] spelling because it requires the
+  // CPlusPlus language option.
+  let Spellings = [Clang<"trivial_abi", 0>];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [TrivialABIDocs];
+  let LangOpts = [CPlusPlus];
+}
+
 def MaxFieldAlignment : InheritableAttr {
   // This attribute has no spellings as it is only ever created implicitly.
   let Spellings = [];
@@ -1207,8 +1263,7 @@
 
 def Mode : Attr {
   let Spellings = [GCC<"mode">];
-  let Subjects = SubjectList<[Var, Enum, TypedefName, Field], ErrorDiag,
-                             "ExpectedVariableEnumFieldOrTypedef">;
+  let Subjects = SubjectList<[Var, Enum, TypedefName, Field], ErrorDiag>;
   let Args = [IdentifierArgument<"Mode">];
   let Documentation = [Undocumented];
 }
@@ -1220,13 +1275,13 @@
 }
 
 def NeonPolyVectorType : TypeAttr {
-  let Spellings = [GNU<"neon_polyvector_type">];
+  let Spellings = [Clang<"neon_polyvector_type">];
   let Args = [IntArgument<"NumElements">];
   let Documentation = [Undocumented];
 }
 
 def NeonVectorType : TypeAttr {
-  let Spellings = [GNU<"neon_vector_type">];
+  let Spellings = [Clang<"neon_vector_type">];
   let Args = [IntArgument<"NumElements">];
   let Documentation = [Undocumented];
 }
@@ -1238,8 +1293,7 @@
 }
 
 def DisableTailCalls : InheritableAttr {
-  let Spellings = [GNU<"disable_tail_calls">,
-                   CXX11<"clang", "disable_tail_calls">];
+  let Spellings = [Clang<"disable_tail_calls">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
   let Documentation = [DisableTailCallsDocs];
 }
@@ -1258,19 +1312,18 @@
 
 def NoDebug : InheritableAttr {
   let Spellings = [GCC<"nodebug">];
-  let Subjects = SubjectList<[FunctionLike, ObjCMethod, NonParmVar], WarnDiag,
-                              "ExpectedVariableOrFunction">;
+  let Subjects = SubjectList<[FunctionLike, ObjCMethod, NonParmVar]>;
   let Documentation = [NoDebugDocs];
 }
 
 def NoDuplicate : InheritableAttr {
-  let Spellings = [GNU<"noduplicate">, CXX11<"clang", "noduplicate">];
+  let Spellings = [Clang<"noduplicate">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [NoDuplicateDocs];
 }
 
 def Convergent : InheritableAttr {
-  let Spellings = [GNU<"convergent">, CXX11<"clang", "convergent">];
+  let Spellings = [Clang<"convergent">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [ConvergentDocs];
 }
@@ -1311,31 +1364,31 @@
 // this should be rejected on non-kernels.
 
 def AMDGPUFlatWorkGroupSize : InheritableAttr {
-  let Spellings = [GNU<"amdgpu_flat_work_group_size">];
+  let Spellings = [Clang<"amdgpu_flat_work_group_size", 0>];
   let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">];
   let Documentation = [AMDGPUFlatWorkGroupSizeDocs];
-  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+  let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
 def AMDGPUWavesPerEU : InheritableAttr {
-  let Spellings = [GNU<"amdgpu_waves_per_eu">];
+  let Spellings = [Clang<"amdgpu_waves_per_eu", 0>];
   let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max", 1>];
   let Documentation = [AMDGPUWavesPerEUDocs];
-  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+  let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
 def AMDGPUNumSGPR : InheritableAttr {
-  let Spellings = [GNU<"amdgpu_num_sgpr">];
+  let Spellings = [Clang<"amdgpu_num_sgpr", 0>];
   let Args = [UnsignedArgument<"NumSGPR">];
   let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
-  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+  let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
 def AMDGPUNumVGPR : InheritableAttr {
-  let Spellings = [GNU<"amdgpu_num_vgpr">];
+  let Spellings = [Clang<"amdgpu_num_vgpr", 0>];
   let Args = [UnsignedArgument<"NumVGPR">];
   let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
-  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+  let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
 def NoSplitStack : InheritableAttr {
@@ -1347,33 +1400,32 @@
 def NonNull : InheritableParamAttr {
   let Spellings = [GCC<"nonnull">];
   let Subjects = SubjectList<[ObjCMethod, HasFunctionProto, ParmVar], WarnDiag,
-                             "ExpectedFunctionMethodOrParameter">;
-  let Args = [VariadicUnsignedArgument<"Args">];
-  let AdditionalMembers =
-[{bool isNonNull(unsigned idx) const {
-    if (!args_size())
-      return true;
-    for (const auto &V : args())
-      if (V == idx)
+                             "functions, methods, and parameters">;
+  let Args = [VariadicParamIdxArgument<"Args">];
+  let AdditionalMembers = [{
+    bool isNonNull(unsigned IdxAST) const {
+      if (!args_size())
         return true;
-    return false;
-  } }];
+      return args_end() != std::find_if(
+          args_begin(), args_end(),
+          [=](const ParamIdx &Idx) { return Idx.getASTIndex() == IdxAST; });
+    }
+  }];
   // FIXME: We should merge duplicates into a single nonnull attribute.
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Documentation = [NonNullDocs];
 }
 
 def ReturnsNonNull : InheritableAttr {
   let Spellings = [GCC<"returns_nonnull">];
-  let Subjects = SubjectList<[ObjCMethod, Function], WarnDiag,
-                             "ExpectedFunctionOrMethod">;
+  let Subjects = SubjectList<[ObjCMethod, Function]>;
   let Documentation = [ReturnsNonNullDocs];
 }
 
 // pass_object_size(N) indicates that the parameter should have
 // __builtin_object_size with Type=N evaluated on the parameter at the callsite.
 def PassObjectSize : InheritableParamAttr {
-  let Spellings = [GNU<"pass_object_size">];
+  let Spellings = [Clang<"pass_object_size">];
   let Args = [IntArgument<"Type">];
   let Subjects = SubjectList<[ParmVar]>;
   let Documentation = [PassObjectSizeDocs];
@@ -1401,7 +1453,7 @@
 }
 
 def NoEscape : Attr {
-  let Spellings = [GNU<"noescape">, CXX11<"clang", "noescape">];
+  let Spellings = [Clang<"noescape">];
   let Subjects = SubjectList<[ParmVar]>;
   let Documentation = [NoEscapeDocs];
 }
@@ -1415,9 +1467,8 @@
 
 def AllocAlign : InheritableAttr {
   let Spellings = [GCC<"alloc_align">];
-  let Subjects = SubjectList<[HasFunctionProto], WarnDiag,
-                             "ExpectedFunctionWithProtoType">;
-  let Args = [IntArgument<"ParamIndex">];
+  let Subjects = SubjectList<[HasFunctionProto]>;
+  let Args = [ParamIdxArgument<"ParamIndex">];
   let Documentation = [AllocAlignDocs];
 }
 
@@ -1434,7 +1485,7 @@
 }
 
 def NotTailCalled : InheritableAttr {
-  let Spellings = [GNU<"not_tail_called">, CXX11<"clang", "not_tail_called">];
+  let Spellings = [Clang<"not_tail_called">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [NotTailCalledDocs];
 }
@@ -1447,74 +1498,74 @@
 
 def NvWeak : IgnoredAttr {
   // No Declspec spelling of this attribute; the CUDA headers use
-  // __attribute__((nv_weak)) unconditionally.
+  // __attribute__((nv_weak)) unconditionally. Does not receive an [[]]
+  // spelling because it is a CUDA attribute.
   let Spellings = [GNU<"nv_weak">];
   let LangOpts = [CUDA];
 }
 
 def ObjCBridge : InheritableAttr {
-  let Spellings = [GNU<"objc_bridge">];
-  let Subjects = SubjectList<[Record, TypedefName], ErrorDiag,
-                             "ExpectedStructOrUnionOrTypedef">;
+  let Spellings = [Clang<"objc_bridge">];
+  let Subjects = SubjectList<[Record, TypedefName], ErrorDiag>;
   let Args = [IdentifierArgument<"BridgedType">];
   let Documentation = [Undocumented];
 }
 
 def ObjCBridgeMutable : InheritableAttr {
-  let Spellings = [GNU<"objc_bridge_mutable">];
+  let Spellings = [Clang<"objc_bridge_mutable">];
   let Subjects = SubjectList<[Record], ErrorDiag>;
   let Args = [IdentifierArgument<"BridgedType">];
   let Documentation = [Undocumented];
 }
 
 def ObjCBridgeRelated : InheritableAttr {
-  let Spellings = [GNU<"objc_bridge_related">];
+  let Spellings = [Clang<"objc_bridge_related">];
   let Subjects = SubjectList<[Record], ErrorDiag>;
   let Args = [IdentifierArgument<"RelatedClass">,
-          IdentifierArgument<"ClassMethod", 1>,
-          IdentifierArgument<"InstanceMethod", 1>];
+          IdentifierArgument<"ClassMethod">,
+          IdentifierArgument<"InstanceMethod">];
   let HasCustomParsing = 1;
   let Documentation = [Undocumented];
 }
 
 def NSReturnsRetained : InheritableAttr {
-  let Spellings = [GNU<"ns_returns_retained">];
+  let Spellings = [Clang<"ns_returns_retained">];
 //  let Subjects = SubjectList<[ObjCMethod, ObjCProperty, Function]>;
   let Documentation = [Undocumented];
 }
 
 def NSReturnsNotRetained : InheritableAttr {
-  let Spellings = [GNU<"ns_returns_not_retained">];
+  let Spellings = [Clang<"ns_returns_not_retained">];
 //  let Subjects = SubjectList<[ObjCMethod, ObjCProperty, Function]>;
   let Documentation = [Undocumented];
 }
 
 def NSReturnsAutoreleased : InheritableAttr {
-  let Spellings = [GNU<"ns_returns_autoreleased">];
+  let Spellings = [Clang<"ns_returns_autoreleased">];
 //  let Subjects = SubjectList<[ObjCMethod, ObjCProperty, Function]>;
   let Documentation = [Undocumented];
 }
 
 def NSConsumesSelf : InheritableAttr {
-  let Spellings = [GNU<"ns_consumes_self">];
+  let Spellings = [Clang<"ns_consumes_self">];
   let Subjects = SubjectList<[ObjCMethod]>;
   let Documentation = [Undocumented];
 }
 
 def NSConsumed : InheritableParamAttr {
-  let Spellings = [GNU<"ns_consumed">];
+  let Spellings = [Clang<"ns_consumed">];
   let Subjects = SubjectList<[ParmVar]>;
   let Documentation = [Undocumented];
 }
 
 def ObjCException : InheritableAttr {
-  let Spellings = [GNU<"objc_exception">];
+  let Spellings = [Clang<"objc_exception">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCMethodFamily : InheritableAttr {
-  let Spellings = [GNU<"objc_method_family">];
+  let Spellings = [Clang<"objc_method_family">];
   let Subjects = SubjectList<[ObjCMethod], ErrorDiag>;
   let Args = [EnumArgument<"Family", "FamilyKind",
                ["none", "alloc", "copy", "init", "mutableCopy", "new"],
@@ -1524,85 +1575,84 @@
 }
 
 def ObjCNSObject : InheritableAttr {
-  let Spellings = [GNU<"NSObject">];
+  let Spellings = [Clang<"NSObject">];
   let Documentation = [Undocumented];
 }
 
 def ObjCIndependentClass : InheritableAttr {
-  let Spellings = [GNU<"objc_independent_class">];
+  let Spellings = [Clang<"objc_independent_class">];
   let Documentation = [Undocumented];
 }
 
 def ObjCPreciseLifetime : InheritableAttr {
-  let Spellings = [GNU<"objc_precise_lifetime">];
+  let Spellings = [Clang<"objc_precise_lifetime">];
   let Subjects = SubjectList<[Var], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCReturnsInnerPointer : InheritableAttr {
-  let Spellings = [GNU<"objc_returns_inner_pointer">];
+  let Spellings = [Clang<"objc_returns_inner_pointer">];
   let Subjects = SubjectList<[ObjCMethod, ObjCProperty], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCRequiresSuper : InheritableAttr {
-  let Spellings = [GNU<"objc_requires_super">];
+  let Spellings = [Clang<"objc_requires_super">];
   let Subjects = SubjectList<[ObjCMethod], ErrorDiag>;
   let Documentation = [ObjCRequiresSuperDocs];
 }
 
 def ObjCRootClass : InheritableAttr {
-  let Spellings = [GNU<"objc_root_class">];
+  let Spellings = [Clang<"objc_root_class">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCSubclassingRestricted : InheritableAttr {
-  let Spellings = [GNU<"objc_subclassing_restricted">];
+  let Spellings = [Clang<"objc_subclassing_restricted">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [ObjCSubclassingRestrictedDocs];
 }
 
 def ObjCExplicitProtocolImpl : InheritableAttr {
-  let Spellings = [GNU<"objc_protocol_requires_explicit_implementation">];
+  let Spellings = [Clang<"objc_protocol_requires_explicit_implementation">];
   let Subjects = SubjectList<[ObjCProtocol], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCDesignatedInitializer : Attr {
-  let Spellings = [GNU<"objc_designated_initializer">];
-  let Subjects = SubjectList<[ObjCInterfaceDeclInitMethod], ErrorDiag,
-                             "ExpectedObjCInterfaceDeclInitMethod">;
+  let Spellings = [Clang<"objc_designated_initializer">];
+  let Subjects = SubjectList<[ObjCInterfaceDeclInitMethod], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCRuntimeName : Attr {
-  let Spellings = [GNU<"objc_runtime_name">];
+  let Spellings = [Clang<"objc_runtime_name">];
   let Subjects = SubjectList<[ObjCInterface, ObjCProtocol], ErrorDiag>;
   let Args = [StringArgument<"MetadataName">];
   let Documentation = [ObjCRuntimeNameDocs];
 }
 
 def ObjCRuntimeVisible : Attr {
-  let Spellings = [GNU<"objc_runtime_visible">];
+  let Spellings = [Clang<"objc_runtime_visible">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [ObjCRuntimeVisibleDocs];
 }
 
 def ObjCBoxable : Attr {
-  let Spellings = [GNU<"objc_boxable">];
-  let Subjects = SubjectList<[Record], ErrorDiag, "ExpectedStructOrUnion">;
+  let Spellings = [Clang<"objc_boxable">];
+  let Subjects = SubjectList<[Record], ErrorDiag>;
   let Documentation = [ObjCBoxableDocs];
 }
 
 def OptimizeNone : InheritableAttr {
-  let Spellings = [GNU<"optnone">, CXX11<"clang", "optnone">];
+  let Spellings = [Clang<"optnone">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
   let Documentation = [OptnoneDocs];
 }
 
 def Overloadable : Attr {
-  let Spellings = [GNU<"overloadable">];
+  let Spellings = [Clang<"overloadable">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [OverloadableDocs];
 }
@@ -1614,11 +1664,11 @@
 }
 
 def Ownership : InheritableAttr {
-  let Spellings = [GNU<"ownership_holds">, GNU<"ownership_returns">,
-                   GNU<"ownership_takes">];
-  let Accessors = [Accessor<"isHolds", [GNU<"ownership_holds">]>,
-                   Accessor<"isReturns", [GNU<"ownership_returns">]>,
-                   Accessor<"isTakes", [GNU<"ownership_takes">]>];
+  let Spellings = [Clang<"ownership_holds">, Clang<"ownership_returns">,
+                   Clang<"ownership_takes">];
+  let Accessors = [Accessor<"isHolds", [Clang<"ownership_holds">]>,
+                   Accessor<"isReturns", [Clang<"ownership_returns">]>,
+                   Accessor<"isTakes", [Clang<"ownership_takes">]>];
   let AdditionalMembers = [{
     enum OwnershipKind { Holds, Returns, Takes };
     OwnershipKind getOwnKind() const {
@@ -1627,9 +1677,9 @@
              Returns;
     }
   }];
-  let Args = [IdentifierArgument<"Module">, VariadicUnsignedArgument<"Args">];
-  let Subjects = SubjectList<[HasFunctionProto], WarnDiag,
-                             "ExpectedFunctionWithProtoType">;
+  let Args = [IdentifierArgument<"Module">,
+              VariadicParamIdxArgument<"Args">];
+  let Subjects = SubjectList<[HasFunctionProto]>;
   let Documentation = [Undocumented];
 }
 
@@ -1640,7 +1690,7 @@
 }
 
 def IntelOclBicc : InheritableAttr {
-  let Spellings = [GNU<"intel_ocl_bicc">];
+  let Spellings = [Clang<"intel_ocl_bicc", 0>];
 //  let Subjects = [Function, ObjCMethod];
   let Documentation = [Undocumented];
 }
@@ -1666,6 +1716,7 @@
 }
 
 def ReqdWorkGroupSize : InheritableAttr {
+  // Does not have a [[]] spelling because it is an OpenCL-related attribute.
   let Spellings = [GNU<"reqd_work_group_size">];
   let Args = [UnsignedArgument<"XDim">, UnsignedArgument<"YDim">,
               UnsignedArgument<"ZDim">];
@@ -1674,15 +1725,16 @@
 }
 
 def RequireConstantInit : InheritableAttr {
-  let Spellings = [GNU<"require_constant_initialization">,
-                   CXX11<"clang", "require_constant_initialization">];
-  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
-                              "ExpectedStaticOrTLSVar">;
+  // This attribute does not have a C [[]] spelling because it requires the
+  // CPlusPlus language option.
+  let Spellings = [Clang<"require_constant_initialization", 0>];
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag>;
   let Documentation = [RequireConstantInitDocs];
   let LangOpts = [CPlusPlus];
 }
 
 def WorkGroupSizeHint :  InheritableAttr {
+  // Does not have a [[]] spelling because it is an OpenCL-related attribute.
   let Spellings = [GNU<"work_group_size_hint">];
   let Args = [UnsignedArgument<"XDim">, 
               UnsignedArgument<"YDim">,
@@ -1701,9 +1753,8 @@
 def Section : InheritableAttr {
   let Spellings = [GCC<"section">, Declspec<"allocate">];
   let Args = [StringArgument<"Name">];
-  let Subjects = SubjectList<[Function, GlobalVar,
-                              ObjCMethod, ObjCProperty], ErrorDiag,
-                             "ExpectedFunctionGlobalVarMethodOrProperty">;
+  let Subjects =
+      SubjectList<[ Function, GlobalVar, ObjCMethod, ObjCProperty ], ErrorDiag>;
   let Documentation = [SectionDocs];
 }
 
@@ -1711,8 +1762,7 @@
   // This attribute has no spellings as it is only ever created implicitly.
   let Spellings = [];
   let Args = [StringArgument<"Name">];
-  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
-                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
@@ -1720,8 +1770,7 @@
   // This attribute has no spellings as it is only ever created implicitly.
   let Spellings = [];
   let Args = [StringArgument<"Name">];
-  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
-                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
@@ -1729,8 +1778,7 @@
   // This attribute has no spellings as it is only ever created implicitly.
   let Spellings = [];
   let Args = [StringArgument<"Name">];
-  let Subjects = SubjectList<[GlobalVar], ErrorDiag,
-                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Subjects = SubjectList<[GlobalVar], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
@@ -1738,8 +1786,7 @@
   // This attribute has no spellings as it is only ever created implicitly.
   let Spellings = [];
   let Args = [StringArgument<"Name">];
-  let Subjects = SubjectList<[Function], ErrorDiag,
-                             "ExpectedFunctionMethodOrGlobalVar">;
+  let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
@@ -1758,23 +1805,23 @@
 }
 
 def SwiftCall : InheritableAttr {
-  let Spellings = [GNU<"swiftcall">];
+  let Spellings = [Clang<"swiftcall">];
 //  let Subjects = SubjectList<[Function]>;
   let Documentation = [SwiftCallDocs];
 }
 
 def SwiftContext : ParameterABIAttr {
-  let Spellings = [GNU<"swift_context">];
+  let Spellings = [Clang<"swift_context">];
   let Documentation = [SwiftContextDocs];
 }
 
 def SwiftErrorResult : ParameterABIAttr {
-  let Spellings = [GNU<"swift_error_result">];
+  let Spellings = [Clang<"swift_error_result">];
   let Documentation = [SwiftErrorResultDocs];
 }
 
 def SwiftIndirectResult : ParameterABIAttr {
-  let Spellings = [GNU<"swift_indirect_result">];
+  let Spellings = [Clang<"swift_indirect_result">];
   let Documentation = [SwiftIndirectResultDocs];
 }
 
@@ -1798,25 +1845,25 @@
 }
 
 def VectorCall : InheritableAttr {
-  let Spellings = [GNU<"vectorcall">, Keyword<"__vectorcall">,
+  let Spellings = [Clang<"vectorcall">, Keyword<"__vectorcall">,
                    Keyword<"_vectorcall">];
 //  let Subjects = [Function, ObjCMethod];
   let Documentation = [VectorCallDocs];
 }
 
 def Pascal : InheritableAttr {
-  let Spellings = [GNU<"pascal">, Keyword<"__pascal">, Keyword<"_pascal">];
+  let Spellings = [Clang<"pascal">, Keyword<"__pascal">, Keyword<"_pascal">];
 //  let Subjects = [Function, ObjCMethod];
   let Documentation = [Undocumented];
 }
 
 def PreserveMost : InheritableAttr {
-  let Spellings = [GNU<"preserve_most">];
+  let Spellings = [Clang<"preserve_most">];
   let Documentation = [PreserveMostDocs];
 }
 
 def PreserveAll : InheritableAttr {
-  let Spellings = [GNU<"preserve_all">];
+  let Spellings = [Clang<"preserve_all">];
   let Documentation = [PreserveAllDocs];
 }
 
@@ -1830,12 +1877,27 @@
       std::vector<std::string> Features;
       StringRef Architecture;
       bool DuplicateArchitecture = false;
+      bool operator ==(const ParsedTargetAttr &Other) const {
+        return DuplicateArchitecture == Other.DuplicateArchitecture &&
+               Architecture == Other.Architecture && Features == Other.Features;
+      }
     };
     ParsedTargetAttr parse() const {
       return parse(getFeaturesStr());
     }
+
+    template<class Compare>
+    ParsedTargetAttr parse(Compare cmp) const {
+      ParsedTargetAttr Attrs = parse();
+      std::sort(std::begin(Attrs.Features), std::end(Attrs.Features), cmp);
+      return Attrs;
+    }
+
+    bool isDefaultVersion() const { return getFeaturesStr() == "default"; }
+
     static ParsedTargetAttr parse(StringRef Features) {
       ParsedTargetAttr Ret;
+      if (Features == "default") return Ret;
       SmallVector<StringRef, 1> AttrFeatures;
       Features.split(AttrFeatures, ",");
 
@@ -1877,7 +1939,7 @@
 }
 
 def Unavailable : InheritableAttr {
-  let Spellings = [GNU<"unavailable">];
+  let Spellings = [Clang<"unavailable">];
   let Args = [StringArgument<"Message", 1>,
               EnumArgument<"ImplicitReason", "ImplicitReason",
                 ["", "", "", ""],
@@ -1891,6 +1953,9 @@
 }
 
 def DiagnoseIf : InheritableAttr {
+  // Does not have a [[]] spelling because this attribute requires the ability
+  // to parse function arguments but the attribute is not written in the type
+  // position.
   let Spellings = [GNU<"diagnose_if">];
   let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>;
   let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
@@ -1900,7 +1965,7 @@
                            ["DT_Error", "DT_Warning"]>,
               BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
               NamedArgument<"Parent", 0, /*fake*/ 1>];
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let LateParsed = 1;
   let AdditionalMembers = [{
     bool isError() const { return diagnosticType == DT_Error; }
@@ -1911,26 +1976,26 @@
 }
 
 def ArcWeakrefUnavailable : InheritableAttr {
-  let Spellings = [GNU<"objc_arc_weak_reference_unavailable">];
+  let Spellings = [Clang<"objc_arc_weak_reference_unavailable">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [Undocumented];
 }
 
 def ObjCGC : TypeAttr {
-  let Spellings = [GNU<"objc_gc">];
+  let Spellings = [Clang<"objc_gc">];
   let Args = [IdentifierArgument<"Kind">];
   let Documentation = [Undocumented];
 }
 
 def ObjCOwnership : InheritableAttr {
-  let Spellings = [GNU<"objc_ownership">];
+  let Spellings = [Clang<"objc_ownership">];
   let Args = [IdentifierArgument<"Kind">];
   let ASTNode = 0;
   let Documentation = [Undocumented];
 }
 
 def ObjCRequiresPropertyDefs : InheritableAttr {
-  let Spellings = [GNU<"objc_requires_property_definitions">];
+  let Spellings = [Clang<"objc_requires_property_definitions">];
   let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
   let Documentation = [Undocumented];
 }
@@ -1939,20 +2004,20 @@
   let Spellings = [CXX11<"", "maybe_unused", 201603>, GCC<"unused">,
                    C2x<"", "maybe_unused">];
   let Subjects = SubjectList<[Var, ObjCIvar, Type, Enum, EnumConstant, Label,
-                              Field, ObjCMethod, FunctionLike], WarnDiag,
-                             "ExpectedForMaybeUnused">;
+                              Field, ObjCMethod, FunctionLike]>;
   let Documentation = [WarnMaybeUnusedDocs];
 }
 
 def Used : InheritableAttr {
   let Spellings = [GCC<"used">];
+  let Subjects = SubjectList<[NonLocalVar, Function, ObjCMethod]>;
   let Documentation = [Undocumented];
 }
 
 def Uuid : InheritableAttr {
   let Spellings = [Declspec<"uuid">, Microsoft<"uuid">];
   let Args = [StringArgument<"Guid">];
-  let Subjects = SubjectList<[Record, Enum], WarnDiag, "ExpectedEnumOrClass">;
+  let Subjects = SubjectList<[Record, Enum]>;
   // FIXME: Allow expressing logical AND for LangOpts. Our condition should be:
   // CPlusPlus && (MicrosoftExt || Borland)
   let LangOpts = [MicrosoftExt, Borland];
@@ -1966,6 +2031,7 @@
 }
 
 def VecTypeHint : InheritableAttr {
+  // Does not have a [[]] spelling because it is an OpenCL-related attribute.
   let Spellings = [GNU<"vec_type_hint">];
   let Args = [TypeArgument<"TypeHint">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
@@ -1984,7 +2050,7 @@
 
 def TypeVisibility : InheritableAttr {
   let Clone = 0;
-  let Spellings = [GNU<"type_visibility">, CXX11<"clang", "type_visibility">];
+  let Spellings = [Clang<"type_visibility">];
   let Args = [EnumArgument<"Visibility", "VisibilityType",
                            ["default", "hidden", "internal", "protected"],
                            ["Default", "Hidden", "Hidden", "Protected"]>];
@@ -1993,7 +2059,10 @@
 }
 
 def VecReturn : InheritableAttr {
-  let Spellings = [GNU<"vecreturn">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ struct/class/union.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"vecreturn", 0>];
   let Subjects = SubjectList<[CXXRecord], ErrorDiag>;
   let Documentation = [Undocumented];
 }
@@ -2008,8 +2077,7 @@
   let Spellings = [CXX11<"", "nodiscard", 201603>, C2x<"", "nodiscard">,
                    CXX11<"clang", "warn_unused_result">,
                    GCC<"warn_unused_result">];
-  let Subjects = SubjectList<[ObjCMethod, Enum, Record, FunctionLike],
-                             WarnDiag, "ExpectedFunctionMethodEnumOrClass">;
+  let Subjects = SubjectList<[ObjCMethod, Enum, Record, FunctionLike]>;
   let Documentation = [WarnUnusedResultsDocs];
 }
 
@@ -2020,7 +2088,7 @@
 }
 
 def WeakImport : InheritableAttr {
-  let Spellings = [GNU<"weak_import">];
+  let Spellings = [Clang<"weak_import">];
   let Documentation = [Undocumented];
 }
 
@@ -2033,7 +2101,7 @@
 }
 
 def LTOVisibilityPublic : InheritableAttr {
-  let Spellings = [CXX11<"clang", "lto_visibility_public">];
+  let Spellings = [Clang<"lto_visibility_public">];
   let Subjects = SubjectList<[Record]>;
   let Documentation = [LTOVisibilityDocs];
 }
@@ -2063,10 +2131,9 @@
 }
 
 def NoSanitize : InheritableAttr {
-  let Spellings = [GNU<"no_sanitize">, CXX11<"clang", "no_sanitize">];
+  let Spellings = [Clang<"no_sanitize">];
   let Args = [VariadicStringArgument<"Sanitizers">];
-  let Subjects = SubjectList<[Function, ObjCMethod, GlobalVar], ErrorDiag,
-    "ExpectedFunctionMethodOrGlobalVar">;
+  let Subjects = SubjectList<[Function, ObjCMethod, GlobalVar], ErrorDiag>;
   let Documentation = [NoSanitizeDocs];
   let AdditionalMembers = [{
     SanitizerMask getMask() const {
@@ -2087,27 +2154,28 @@
   let Spellings = [GCC<"no_address_safety_analysis">,
                    GCC<"no_sanitize_address">,
                    GCC<"no_sanitize_thread">,
-                   GNU<"no_sanitize_memory">];
-  let Subjects = SubjectList<[Function, GlobalVar], ErrorDiag,
-        "ExpectedFunctionOrGlobalVar">;
+                   Clang<"no_sanitize_memory">];
+  let Subjects = SubjectList<[Function, GlobalVar], ErrorDiag>;
   let Documentation = [NoSanitizeAddressDocs, NoSanitizeThreadDocs,
                        NoSanitizeMemoryDocs];
   let ASTNode = 0;
 }
 
 // C/C++ Thread safety attributes (e.g. for deadlock, data race checking)
-
+// Not all of these attributes will be given a [[]] spelling. The attributes
+// which require access to function parameter names cannot use the [[]] spelling
+// because they are not written in the type position. Some attributes are given
+// an updated captability-based name and the older name will only be supported
+// under the GNU-style spelling.
 def GuardedVar : InheritableAttr {
-  let Spellings = [GNU<"guarded_var">];
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let Spellings = [Clang<"guarded_var", 0>];
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
 def PtGuardedVar : InheritableAttr {
-  let Spellings = [GNU<"pt_guarded_var">];
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let Spellings = [Clang<"pt_guarded_var", 0>];
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -2119,21 +2187,17 @@
 }
 
 def ScopedLockable : InheritableAttr {
-  let Spellings = [GNU<"scoped_lockable">];
+  let Spellings = [Clang<"scoped_lockable", 0>];
   let Subjects = SubjectList<[Record]>;
   let Documentation = [Undocumented];
 }
 
 def Capability : InheritableAttr {
-  let Spellings = [GNU<"capability">, CXX11<"clang", "capability">,
-                   GNU<"shared_capability">,
-                   CXX11<"clang", "shared_capability">];
-  let Subjects = SubjectList<[Record, TypedefName], ErrorDiag,
-                             "ExpectedStructOrUnionOrTypedef">;
+  let Spellings = [Clang<"capability", 0>, Clang<"shared_capability", 0>];
+  let Subjects = SubjectList<[Record, TypedefName], ErrorDiag>;
   let Args = [StringArgument<"Name">];
   let Accessors = [Accessor<"isShared",
-                    [GNU<"shared_capability">,
-                     CXX11<"clang","shared_capability">]>];
+                    [Clang<"shared_capability", 0>]>];
   let Documentation = [Undocumented];
   let AdditionalMembers = [{
     bool isMutex() const { return getName().equals_lower("mutex"); }
@@ -2142,105 +2206,88 @@
 }
 
 def AssertCapability : InheritableAttr {
-  let Spellings = [GNU<"assert_capability">,
-                   CXX11<"clang", "assert_capability">,
-                   GNU<"assert_shared_capability">,
-                   CXX11<"clang", "assert_shared_capability">];
+  let Spellings = [Clang<"assert_capability", 0>,
+                   Clang<"assert_shared_capability", 0>];
   let Subjects = SubjectList<[Function]>;
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Args = [VariadicExprArgument<"Args">];
   let Accessors = [Accessor<"isShared",
-                    [GNU<"assert_shared_capability">,
-                     CXX11<"clang", "assert_shared_capability">]>];
+                    [Clang<"assert_shared_capability", 0>]>];
   let Documentation = [AssertCapabilityDocs];
 }
 
 def AcquireCapability : InheritableAttr {
-  let Spellings = [GNU<"acquire_capability">,
-                   CXX11<"clang", "acquire_capability">,
-                   GNU<"acquire_shared_capability">,
-                   CXX11<"clang", "acquire_shared_capability">,
+  let Spellings = [Clang<"acquire_capability", 0>,
+                   Clang<"acquire_shared_capability", 0>,
                    GNU<"exclusive_lock_function">,
                    GNU<"shared_lock_function">];
   let Subjects = SubjectList<[Function]>;
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Args = [VariadicExprArgument<"Args">];
   let Accessors = [Accessor<"isShared",
-                    [GNU<"acquire_shared_capability">,
-                     CXX11<"clang", "acquire_shared_capability">,
+                    [Clang<"acquire_shared_capability", 0>,
                      GNU<"shared_lock_function">]>];
   let Documentation = [AcquireCapabilityDocs];
 }
 
 def TryAcquireCapability : InheritableAttr {
-  let Spellings = [GNU<"try_acquire_capability">,
-                   CXX11<"clang", "try_acquire_capability">,
-                   GNU<"try_acquire_shared_capability">,
-                   CXX11<"clang", "try_acquire_shared_capability">];
+  let Spellings = [Clang<"try_acquire_capability", 0>,
+                   Clang<"try_acquire_shared_capability", 0>];
   let Subjects = SubjectList<[Function],
                              ErrorDiag>;
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Args = [ExprArgument<"SuccessValue">, VariadicExprArgument<"Args">];
   let Accessors = [Accessor<"isShared",
-                    [GNU<"try_acquire_shared_capability">,
-                     CXX11<"clang", "try_acquire_shared_capability">]>];
+                    [Clang<"try_acquire_shared_capability", 0>]>];
   let Documentation = [TryAcquireCapabilityDocs];
 }
 
 def ReleaseCapability : InheritableAttr {
-  let Spellings = [GNU<"release_capability">,
-                   CXX11<"clang", "release_capability">,
-                   GNU<"release_shared_capability">,
-                   CXX11<"clang", "release_shared_capability">,
-                   GNU<"release_generic_capability">,
-                   CXX11<"clang", "release_generic_capability">,
-                   GNU<"unlock_function">];
+  let Spellings = [Clang<"release_capability", 0>,
+                   Clang<"release_shared_capability", 0>,
+                   Clang<"release_generic_capability", 0>,
+                   Clang<"unlock_function", 0>];
   let Subjects = SubjectList<[Function]>;
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Args = [VariadicExprArgument<"Args">];
   let Accessors = [Accessor<"isShared",
-                    [GNU<"release_shared_capability">,
-                     CXX11<"clang", "release_shared_capability">]>,
+                    [Clang<"release_shared_capability", 0>]>,
                    Accessor<"isGeneric",
-                     [GNU<"release_generic_capability">,
-                      CXX11<"clang", "release_generic_capability">,
-                      GNU<"unlock_function">]>];
+                     [Clang<"release_generic_capability", 0>,
+                      Clang<"unlock_function", 0>]>];
   let Documentation = [ReleaseCapabilityDocs];
 }
 
 def RequiresCapability : InheritableAttr {
-  let Spellings = [GNU<"requires_capability">,
-                   CXX11<"clang", "requires_capability">,
-                   GNU<"exclusive_locks_required">,
-                   GNU<"requires_shared_capability">,
-                   CXX11<"clang", "requires_shared_capability">,
-                   GNU<"shared_locks_required">];
+  let Spellings = [Clang<"requires_capability", 0>,
+                   Clang<"exclusive_locks_required", 0>,
+                   Clang<"requires_shared_capability", 0>,
+                   Clang<"shared_locks_required", 0>];
   let Args = [VariadicExprArgument<"Args">];
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
-  let Accessors = [Accessor<"isShared", [GNU<"requires_shared_capability">,
-                                         GNU<"shared_locks_required">,
-                                CXX11<"clang","requires_shared_capability">]>];
+  let Accessors = [Accessor<"isShared", [Clang<"requires_shared_capability", 0>,
+                                         Clang<"shared_locks_required", 0>]>];
   let Documentation = [Undocumented];
 }
 
 def NoThreadSafetyAnalysis : InheritableAttr {
-  let Spellings = [GNU<"no_thread_safety_analysis">];
+  let Spellings = [Clang<"no_thread_safety_analysis">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2251,9 +2298,8 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let InheritEvenIfAlreadyPresent = 1;
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -2263,9 +2309,8 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let InheritEvenIfAlreadyPresent = 1;
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -2275,9 +2320,8 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let InheritEvenIfAlreadyPresent = 1;
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -2287,9 +2331,8 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
-  let Subjects = SubjectList<[Field, SharedVar], WarnDiag,
-                             "ExpectedFieldOrGlobalVar">;
+  let InheritEvenIfAlreadyPresent = 1;
+  let Subjects = SubjectList<[Field, SharedVar]>;
   let Documentation = [Undocumented];
 }
 
@@ -2299,7 +2342,7 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2310,7 +2353,7 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2323,7 +2366,7 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2336,7 +2379,7 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2357,7 +2400,7 @@
   let LateParsed = 1;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
-  let DuplicatesAllowedWhileMerging = 1;
+  let InheritEvenIfAlreadyPresent = 1;
   let Subjects = SubjectList<[Function]>;
   let Documentation = [Undocumented];
 }
@@ -2365,7 +2408,10 @@
 // C/C++ consumed attributes.
 
 def Consumable : InheritableAttr {
-  let Spellings = [GNU<"consumable">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ struct/class/union.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"consumable", 0>];
   let Subjects = SubjectList<[CXXRecord]>;
   let Args = [EnumArgument<"DefaultState", "ConsumedState",
                            ["unknown", "consumed", "unconsumed"],
@@ -2374,19 +2420,28 @@
 }
 
 def ConsumableAutoCast : InheritableAttr {
-  let Spellings = [GNU<"consumable_auto_cast_state">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ struct/class/union.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"consumable_auto_cast_state", 0>];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 }
 
 def ConsumableSetOnRead : InheritableAttr {
-  let Spellings = [GNU<"consumable_set_state_on_read">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ struct/class/union.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"consumable_set_state_on_read", 0>];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 }
 
 def CallableWhen : InheritableAttr {
-  let Spellings = [GNU<"callable_when">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ function (but doesn't require it to be a member function).
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"callable_when", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
   let Args = [VariadicEnumArgument<"CallableStates", "ConsumedState",
                                    ["unknown", "consumed", "unconsumed"],
@@ -2395,7 +2450,10 @@
 }
 
 def ParamTypestate : InheritableAttr {
-  let Spellings = [GNU<"param_typestate">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to a parameter whose type is a consumable C++ class.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"param_typestate", 0>];
   let Subjects = SubjectList<[ParmVar]>;
   let Args = [EnumArgument<"ParamState", "ConsumedState",
                            ["unknown", "consumed", "unconsumed"],
@@ -2404,7 +2462,10 @@
 }
 
 def ReturnTypestate : InheritableAttr {
-  let Spellings = [GNU<"return_typestate">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to a parameter or function return type that is a consumable C++ class.
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"return_typestate", 0>];
   let Subjects = SubjectList<[Function, ParmVar]>;
   let Args = [EnumArgument<"State", "ConsumedState",
                            ["unknown", "consumed", "unconsumed"],
@@ -2413,7 +2474,10 @@
 }
 
 def SetTypestate : InheritableAttr {
-  let Spellings = [GNU<"set_typestate">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ function (but doesn't require it to be a member function).
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"set_typestate", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
   let Args = [EnumArgument<"NewState", "ConsumedState",
                            ["unknown", "consumed", "unconsumed"],
@@ -2422,7 +2486,10 @@
 }
 
 def TestTypestate : InheritableAttr {
-  let Spellings = [GNU<"test_typestate">];
+  // This attribute does not have a C [[]] spelling because it only appertains
+  // to C++ function (but doesn't require it to be a member function).
+  // FIXME: should this attribute have a CPlusPlus language option?
+  let Spellings = [Clang<"test_typestate", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
   let Args = [EnumArgument<"TestState", "ConsumedState",
                            ["consumed", "unconsumed"],
@@ -2433,18 +2500,18 @@
 // Type safety attributes for `void *' pointers and type tags.
 
 def ArgumentWithTypeTag : InheritableAttr {
-  let Spellings = [GNU<"argument_with_type_tag">,
-                   GNU<"pointer_with_type_tag">];
+  let Spellings = [Clang<"argument_with_type_tag">,
+                   Clang<"pointer_with_type_tag">];
+  let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>;
   let Args = [IdentifierArgument<"ArgumentKind">,
-              UnsignedArgument<"ArgumentIdx">,
-              UnsignedArgument<"TypeTagIdx">,
-              BoolArgument<"IsPointer">];
-  let HasCustomParsing = 1;
+              ParamIdxArgument<"ArgumentIdx">,
+              ParamIdxArgument<"TypeTagIdx">,
+              BoolArgument<"IsPointer", /*opt*/0, /*fake*/1>];
   let Documentation = [ArgumentWithTypeTagDocs, PointerWithTypeTagDocs];
 }
 
 def TypeTagForDatatype : InheritableAttr {
-  let Spellings = [GNU<"type_tag_for_datatype">];
+  let Spellings = [Clang<"type_tag_for_datatype">];
   let Args = [IdentifierArgument<"ArgumentKind">,
               TypeArgument<"MatchingCType">,
               BoolArgument<"LayoutCompatible">,
@@ -2578,7 +2645,7 @@
   let Documentation = [InitSegDocs];
   let AdditionalMembers = [{
   void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const {
-    OS << '(' << getSection() << ')';
+    OS << " (" << getSection() << ')';
   }
   }];
 }
@@ -2633,12 +2700,12 @@
     if (SpellingIndex == Pragma_nounroll)
       return;
     else if (SpellingIndex == Pragma_unroll) {
-      OS << getValueString(Policy);
+      OS << ' ' << getValueString(Policy);
       return;
     }
 
     assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling");
-    OS << getOptionName(option) << getValueString(Policy);
+    OS << ' ' << getOptionName(option) << getValueString(Policy);
   }
 
   // Return a string containing the loop hint argument including the
@@ -2725,37 +2792,37 @@
     void printPrettyPragma(raw_ostream & OS, const PrintingPolicy &Policy)
         const {
       if (getBranchState() != BS_Undefined)
-        OS << ConvertBranchStateTyToStr(getBranchState()) << " ";
+        OS << ' ' << ConvertBranchStateTyToStr(getBranchState());
       if (auto *E = getSimdlen()) {
-        OS << "simdlen(";
+        OS << " simdlen(";
         E->printPretty(OS, nullptr, Policy);
-        OS << ") ";
+        OS << ")";
       }
       if (uniforms_size() > 0) {
-        OS << "uniform";
+        OS << " uniform";
         StringRef Sep = "(";
         for (auto *E : uniforms()) {
           OS << Sep;
           E->printPretty(OS, nullptr, Policy);
           Sep = ", ";
         }
-        OS << ") ";
+        OS << ")";
       }
       alignments_iterator NI = alignments_begin();
       for (auto *E : aligneds()) {
-        OS << "aligned(";
+        OS << " aligned(";
         E->printPretty(OS, nullptr, Policy);
         if (*NI) {
           OS << ": ";
           (*NI)->printPretty(OS, nullptr, Policy);
         }
-        OS << ") ";
+        OS << ")";
         ++NI;
       }
       steps_iterator I = steps_begin();
       modifiers_iterator MI = modifiers_begin();
       for (auto *E : linears()) {
-        OS << "linear(";
+        OS << " linear(";
         if (*MI != OMPC_LINEAR_unknown)
           OS << getOpenMPSimpleClauseTypeName(OMPC_linear, *MI) << "(";
         E->printPretty(OS, nullptr, Policy);
@@ -2765,7 +2832,7 @@
           OS << ": ";
           (*I)->printPretty(OS, nullptr, Policy);
         }
-        OS << ") ";
+        OS << ")";
         ++I;
         ++MI;
       }
@@ -2786,13 +2853,13 @@
     void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const {
       // Use fake syntax because it is for testing and debugging purpose only.
       if (getMapType() != MT_To)
-        OS << ConvertMapTypeTyToStr(getMapType()) << " ";
+        OS << ' ' << ConvertMapTypeTyToStr(getMapType());
     }
   }];
 }
 
 def InternalLinkage : InheritableAttr {
-  let Spellings = [GNU<"internal_linkage">, CXX11<"clang", "internal_linkage">];
+  let Spellings = [Clang<"internal_linkage">];
   let Subjects = SubjectList<[Var, Function, CXXRecord]>;
   let Documentation = [InternalLinkageDocs];
 }
diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td
index 78a40c4..35c0cb3 100644
--- a/include/clang/Basic/AttrDocs.td
+++ b/include/clang/Basic/AttrDocs.td
@@ -142,6 +142,7 @@
 For example:
 
 .. code-block:: c
+
   int *gp;
 
   void nonescapingFunc(__attribute__((noescape)) int *p) {
@@ -156,6 +157,8 @@
 <https://clang.llvm.org/docs/BlockLanguageSpec.html>`, the same restriction
 applies to copies of the block. For example:
 
+.. code-block:: c
+
   typedef void (^BlockTy)();
   BlockTy g0, g1;
 
@@ -350,7 +353,7 @@
 
   int isdigit(int c);
   int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF")));
-  
+
   void foo(char c) {
     isdigit(c);
     isdigit(10);
@@ -403,7 +406,7 @@
 
   void f() __attribute__((enable_if(true, "")));  // #1
   void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, "")));  // #2
-  
+
   void g(int i, int j) __attribute__((enable_if(i, "")));  // #1
   void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true)));  // #2
 
@@ -915,11 +918,11 @@
 can only be placed before an @protocol or @interface declaration:
         
 .. code-block:: objc
-        
+
   __attribute__((objc_runtime_name("MyLocalName")))
   @interface Message
   @end
-        
+
     }];
 }
 
@@ -1225,7 +1228,7 @@
 .. code-block: c++
   struct [[nodiscard]] error_info { /*...*/ };
   error_info enable_missile_safety_mode();
-  
+
   void launch_missiles();
   void test_missiles() {
     enable_missile_safety_mode(); // diagnoses
@@ -1471,6 +1474,25 @@
 Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
 "avx", "xop" and largely correspond to the machine specific options handled by
 the front end.
+
+Additionally, this attribute supports function multiversioning for ELF based
+x86/x86-64 targets, which can be used to create multiple implementations of the
+same function that will be resolved at runtime based on the priority of their
+``target`` attribute strings. A function is considered a multiversioned function
+if either two declarations of the function have different ``target`` attribute
+strings, or if it has a ``target`` attribute string of ``default``.  For
+example:
+
+  .. code-block:: c++
+
+    __attribute__((target("arch=atom")))
+    void foo() {} // will be called on 'atom' processors.
+    __attribute__((target("default")))
+    void foo() {} // will be called on any other processors.
+
+All multiversioned functions must contain a ``default`` (fallback)
+implementation, otherwise usages of the function are considered invalid.
+Additionally, a function may not become multiversioned after its first use.
 }];
 }
 
@@ -2221,6 +2243,48 @@
   }];
 }
 
+def TrivialABIDocs : Documentation {
+  let Category = DocCatVariable;
+  let Content = [{
+The ``trivial_abi`` attribute can be applied to a C++ class, struct, or union.
+It instructs the compiler to pass and return the type using the C ABI for the
+underlying type when the type would otherwise be considered non-trivial for the
+purpose of calls.
+A class annotated with `trivial_abi` can have non-trivial destructors or copy/move constructors without automatically becoming non-trivial for the purposes of calls. For example:
+
+  .. code-block:: c++
+
+    // A is trivial for the purposes of calls because `trivial_abi` makes the
+    // user-provided special functions trivial.
+    struct __attribute__((trivial_abi)) A {
+      ~A();
+      A(const A &);
+      A(A &&);
+      int x;
+    };
+
+    // B's destructor and copy/move constructor are considered trivial for the
+    // purpose of calls because A is trivial.
+    struct B {
+      A a;
+    };
+
+If a type is trivial for the purposes of calls, has a non-trivial destructor,
+and is passed as an argument by value, the convention is that the callee will
+destroy the object before returning.
+
+Attribute ``trivial_abi`` has no effect in the following cases:
+
+- The class directly declares a virtual base or virtual methods.
+- The class has a base class that is non-trivial for the purposes of calls.
+- The class has a non-static data member whose type is non-trivial for the purposes of calls, which includes:
+
+  - classes that are non-trivial for the purposes of calls
+  - __weak-qualified types in Objective-C++
+  - arrays of any of the above
+  }];
+}
+
 def MSInheritanceDocs : Documentation {
   let Category = DocCatType;
   let Heading = "__single_inhertiance, __multiple_inheritance, __virtual_inheritance";
@@ -2638,23 +2702,23 @@
 concurrently.
 The syntax of the `declare simd` construct is as follows:
 
-  .. code-block:: c
+  .. code-block:: none
 
-  #pragma omp declare simd [clause[[,] clause] ...] new-line
-  [#pragma omp declare simd [clause[[,] clause] ...] new-line]
-  [...]
-  function definition or declaration
+    #pragma omp declare simd [clause[[,] clause] ...] new-line
+    [#pragma omp declare simd [clause[[,] clause] ...] new-line]
+    [...]
+    function definition or declaration
 
 where clause is one of the following:
 
-  .. code-block:: c
+  .. code-block:: none
 
-  simdlen(length)
-  linear(argument-list[:constant-linear-step])
-  aligned(argument-list[:alignment])
-  uniform(argument-list)
-  inbranch
-  notinbranch
+    simdlen(length)
+    linear(argument-list[:constant-linear-step])
+    aligned(argument-list[:alignment])
+    uniform(argument-list)
+    inbranch
+    notinbranch
 
   }];
 }
@@ -2670,9 +2734,9 @@
 
   .. code-block:: c
 
-  #pragma omp declare target new-line
-  declarations-definition-seq
-  #pragma omp end declare target new-line
+    #pragma omp declare target new-line
+    declarations-definition-seq
+    #pragma omp end declare target new-line
   }];
 }
 
@@ -3210,3 +3274,13 @@
 or `msvc documentation <https://docs.microsoft.com/pl-pl/cpp/cpp/selectany>`_.
 }];
 }
+
+def ArtificialDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``artificial`` attribute can be applied to an inline function. If such a
+function is inlined, the attribute indicates that debuggers should associate
+the resulting instructions with the call site, rather than with the 
+corresponding line within the inlined callee.
+  }];
+}
diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def
index 8e276e8..9b7e09a 100644
--- a/include/clang/Basic/Builtins.def
+++ b/include/clang/Basic/Builtins.def
@@ -48,8 +48,8 @@
 //  . -> "...".  This may only occur at the end of the function list.
 //
 // Types may be prefixed with the following modifiers:
-//  L   -> long (e.g. Li for 'long int')
-//  LL  -> long long
+//  L   -> long (e.g. Li for 'long int', Ld for 'long double')
+//  LL  -> long long (e.g. LLi for 'long long int', LLd for __float128)
 //  LLL -> __int128_t (e.g. LLLi)
 //  W   -> int64_t
 //  N   -> 'int' size if target is LP64, 'L' otherwise.
@@ -103,158 +103,164 @@
 #endif
 
 // Standard libc/libm functions:
-BUILTIN(__builtin_atan2 , "ddd"  , "Fnc")
-BUILTIN(__builtin_atan2f, "fff"  , "Fnc")
-BUILTIN(__builtin_atan2l, "LdLdLd", "Fnc")
+BUILTIN(__builtin_atan2 , "ddd"  , "Fne")
+BUILTIN(__builtin_atan2f, "fff"  , "Fne")
+BUILTIN(__builtin_atan2l, "LdLdLd", "Fne")
 BUILTIN(__builtin_abs  , "ii"  , "ncF")
 BUILTIN(__builtin_copysign, "ddd", "ncF")
 BUILTIN(__builtin_copysignf, "fff", "ncF")
 BUILTIN(__builtin_copysignl, "LdLdLd", "ncF")
+BUILTIN(__builtin_copysignf128, "LLdLLdLLd", "ncF")
 BUILTIN(__builtin_fabs , "dd"  , "ncF")
 BUILTIN(__builtin_fabsf, "ff"  , "ncF")
 BUILTIN(__builtin_fabsl, "LdLd", "ncF")
-BUILTIN(__builtin_fmod , "ddd"  , "Fnc")
-BUILTIN(__builtin_fmodf, "fff"  , "Fnc")
-BUILTIN(__builtin_fmodl, "LdLdLd", "Fnc")
+BUILTIN(__builtin_fabsf128, "LLdLLd", "ncF")
+BUILTIN(__builtin_fmod , "ddd"  , "Fne")
+BUILTIN(__builtin_fmodf, "fff"  , "Fne")
+BUILTIN(__builtin_fmodl, "LdLdLd", "Fne")
 BUILTIN(__builtin_frexp , "ddi*"  , "Fn")
 BUILTIN(__builtin_frexpf, "ffi*"  , "Fn")
 BUILTIN(__builtin_frexpl, "LdLdi*", "Fn")
 BUILTIN(__builtin_huge_val, "d", "nc")
 BUILTIN(__builtin_huge_valf, "f", "nc")
 BUILTIN(__builtin_huge_vall, "Ld", "nc")
+BUILTIN(__builtin_huge_valf128, "LLd", "nc")
 BUILTIN(__builtin_inf  , "d"   , "nc")
 BUILTIN(__builtin_inff , "f"   , "nc")
 BUILTIN(__builtin_infl , "Ld"  , "nc")
+BUILTIN(__builtin_inff128 , "LLd"  , "nc")
 BUILTIN(__builtin_labs , "LiLi"  , "Fnc")
 BUILTIN(__builtin_llabs, "LLiLLi", "Fnc")
-BUILTIN(__builtin_ldexp , "ddi"  , "Fnc")
-BUILTIN(__builtin_ldexpf, "ffi"  , "Fnc")
-BUILTIN(__builtin_ldexpl, "LdLdi", "Fnc")
+BUILTIN(__builtin_ldexp , "ddi"  , "Fne")
+BUILTIN(__builtin_ldexpf, "ffi"  , "Fne")
+BUILTIN(__builtin_ldexpl, "LdLdi", "Fne")
 BUILTIN(__builtin_modf , "ddd*"  , "Fn")
 BUILTIN(__builtin_modff, "fff*"  , "Fn")
 BUILTIN(__builtin_modfl, "LdLdLd*", "Fn")
 BUILTIN(__builtin_nan,  "dcC*" , "ncF")
 BUILTIN(__builtin_nanf, "fcC*" , "ncF")
 BUILTIN(__builtin_nanl, "LdcC*", "ncF")
+BUILTIN(__builtin_nanf128, "LLdcC*", "ncF")
 BUILTIN(__builtin_nans,  "dcC*" , "ncF")
 BUILTIN(__builtin_nansf, "fcC*" , "ncF")
 BUILTIN(__builtin_nansl, "LdcC*", "ncF")
+BUILTIN(__builtin_nansf128, "LLdcC*", "ncF")
 BUILTIN(__builtin_powi , "ddi"  , "Fnc")
 BUILTIN(__builtin_powif, "ffi"  , "Fnc")
 BUILTIN(__builtin_powil, "LdLdi", "Fnc")
-BUILTIN(__builtin_pow , "ddd"  , "Fnc")
-BUILTIN(__builtin_powf, "fff"  , "Fnc")
-BUILTIN(__builtin_powl, "LdLdLd", "Fnc")
+BUILTIN(__builtin_pow , "ddd"  , "Fne")
+BUILTIN(__builtin_powf, "fff"  , "Fne")
+BUILTIN(__builtin_powl, "LdLdLd", "Fne")
 
 // Standard unary libc/libm functions with double/float/long double variants:
-BUILTIN(__builtin_acos , "dd"  , "Fnc")
-BUILTIN(__builtin_acosf, "ff"  , "Fnc")
-BUILTIN(__builtin_acosl, "LdLd", "Fnc")
-BUILTIN(__builtin_acosh , "dd"  , "Fnc")
-BUILTIN(__builtin_acoshf, "ff"  , "Fnc")
-BUILTIN(__builtin_acoshl, "LdLd", "Fnc")
-BUILTIN(__builtin_asin , "dd"  , "Fnc")
-BUILTIN(__builtin_asinf, "ff"  , "Fnc")
-BUILTIN(__builtin_asinl, "LdLd", "Fnc")
-BUILTIN(__builtin_asinh , "dd"  , "Fnc")
-BUILTIN(__builtin_asinhf, "ff"  , "Fnc")
-BUILTIN(__builtin_asinhl, "LdLd", "Fnc")
-BUILTIN(__builtin_atan , "dd"  , "Fnc")
-BUILTIN(__builtin_atanf, "ff"  , "Fnc")
-BUILTIN(__builtin_atanl, "LdLd", "Fnc")
-BUILTIN(__builtin_atanh , "dd", "Fnc")
-BUILTIN(__builtin_atanhf, "ff", "Fnc")
-BUILTIN(__builtin_atanhl, "LdLd", "Fnc")
+BUILTIN(__builtin_acos , "dd"  , "Fne")
+BUILTIN(__builtin_acosf, "ff"  , "Fne")
+BUILTIN(__builtin_acosl, "LdLd", "Fne")
+BUILTIN(__builtin_acosh , "dd"  , "Fne")
+BUILTIN(__builtin_acoshf, "ff"  , "Fne")
+BUILTIN(__builtin_acoshl, "LdLd", "Fne")
+BUILTIN(__builtin_asin , "dd"  , "Fne")
+BUILTIN(__builtin_asinf, "ff"  , "Fne")
+BUILTIN(__builtin_asinl, "LdLd", "Fne")
+BUILTIN(__builtin_asinh , "dd"  , "Fne")
+BUILTIN(__builtin_asinhf, "ff"  , "Fne")
+BUILTIN(__builtin_asinhl, "LdLd", "Fne")
+BUILTIN(__builtin_atan , "dd"  , "Fne")
+BUILTIN(__builtin_atanf, "ff"  , "Fne")
+BUILTIN(__builtin_atanl, "LdLd", "Fne")
+BUILTIN(__builtin_atanh , "dd", "Fne")
+BUILTIN(__builtin_atanhf, "ff", "Fne")
+BUILTIN(__builtin_atanhl, "LdLd", "Fne")
 BUILTIN(__builtin_cbrt , "dd", "Fnc")
 BUILTIN(__builtin_cbrtf, "ff", "Fnc")
 BUILTIN(__builtin_cbrtl, "LdLd", "Fnc")
 BUILTIN(__builtin_ceil , "dd"  , "Fnc")
 BUILTIN(__builtin_ceilf, "ff"  , "Fnc")
 BUILTIN(__builtin_ceill, "LdLd", "Fnc")
-BUILTIN(__builtin_cos , "dd"  , "Fnc")
-BUILTIN(__builtin_cosf, "ff"  , "Fnc")
-BUILTIN(__builtin_cosh , "dd"  , "Fnc")
-BUILTIN(__builtin_coshf, "ff"  , "Fnc")
-BUILTIN(__builtin_coshl, "LdLd", "Fnc")
-BUILTIN(__builtin_cosl, "LdLd", "Fnc")
-BUILTIN(__builtin_erf , "dd", "Fnc")
-BUILTIN(__builtin_erff, "ff", "Fnc")
-BUILTIN(__builtin_erfl, "LdLd", "Fnc")
-BUILTIN(__builtin_erfc , "dd", "Fnc")
-BUILTIN(__builtin_erfcf, "ff", "Fnc")
-BUILTIN(__builtin_erfcl, "LdLd", "Fnc")
-BUILTIN(__builtin_exp , "dd"  , "Fnc")
-BUILTIN(__builtin_expf, "ff"  , "Fnc")
-BUILTIN(__builtin_expl, "LdLd", "Fnc")
-BUILTIN(__builtin_exp2 , "dd"  , "Fnc")
-BUILTIN(__builtin_exp2f, "ff"  , "Fnc")
-BUILTIN(__builtin_exp2l, "LdLd", "Fnc")
-BUILTIN(__builtin_expm1 , "dd", "Fnc")
-BUILTIN(__builtin_expm1f, "ff", "Fnc")
-BUILTIN(__builtin_expm1l, "LdLd", "Fnc")
-BUILTIN(__builtin_fdim, "ddd", "Fnc")
-BUILTIN(__builtin_fdimf, "fff", "Fnc")
-BUILTIN(__builtin_fdiml, "LdLdLd", "Fnc")
+BUILTIN(__builtin_cos , "dd"  , "Fne")
+BUILTIN(__builtin_cosf, "ff"  , "Fne")
+BUILTIN(__builtin_cosh , "dd"  , "Fne")
+BUILTIN(__builtin_coshf, "ff"  , "Fne")
+BUILTIN(__builtin_coshl, "LdLd", "Fne")
+BUILTIN(__builtin_cosl, "LdLd", "Fne")
+BUILTIN(__builtin_erf , "dd", "Fne")
+BUILTIN(__builtin_erff, "ff", "Fne")
+BUILTIN(__builtin_erfl, "LdLd", "Fne")
+BUILTIN(__builtin_erfc , "dd", "Fne")
+BUILTIN(__builtin_erfcf, "ff", "Fne")
+BUILTIN(__builtin_erfcl, "LdLd", "Fne")
+BUILTIN(__builtin_exp , "dd"  , "Fne")
+BUILTIN(__builtin_expf, "ff"  , "Fne")
+BUILTIN(__builtin_expl, "LdLd", "Fne")
+BUILTIN(__builtin_exp2 , "dd"  , "Fne")
+BUILTIN(__builtin_exp2f, "ff"  , "Fne")
+BUILTIN(__builtin_exp2l, "LdLd", "Fne")
+BUILTIN(__builtin_expm1 , "dd", "Fne")
+BUILTIN(__builtin_expm1f, "ff", "Fne")
+BUILTIN(__builtin_expm1l, "LdLd", "Fne")
+BUILTIN(__builtin_fdim, "ddd", "Fne")
+BUILTIN(__builtin_fdimf, "fff", "Fne")
+BUILTIN(__builtin_fdiml, "LdLdLd", "Fne")
 BUILTIN(__builtin_floor , "dd"  , "Fnc")
 BUILTIN(__builtin_floorf, "ff"  , "Fnc")
 BUILTIN(__builtin_floorl, "LdLd", "Fnc")
-BUILTIN(__builtin_fma, "dddd", "Fnc")
-BUILTIN(__builtin_fmaf, "ffff", "Fnc")
-BUILTIN(__builtin_fmal, "LdLdLdLd", "Fnc")
+BUILTIN(__builtin_fma, "dddd", "Fne")
+BUILTIN(__builtin_fmaf, "ffff", "Fne")
+BUILTIN(__builtin_fmal, "LdLdLdLd", "Fne")
 BUILTIN(__builtin_fmax, "ddd", "Fnc")
 BUILTIN(__builtin_fmaxf, "fff", "Fnc")
 BUILTIN(__builtin_fmaxl, "LdLdLd", "Fnc")
 BUILTIN(__builtin_fmin, "ddd", "Fnc")
 BUILTIN(__builtin_fminf, "fff", "Fnc")
 BUILTIN(__builtin_fminl, "LdLdLd", "Fnc")
-BUILTIN(__builtin_hypot , "ddd"  , "Fnc")
-BUILTIN(__builtin_hypotf, "fff"  , "Fnc")
-BUILTIN(__builtin_hypotl, "LdLdLd", "Fnc")
-BUILTIN(__builtin_ilogb , "id", "Fnc")
-BUILTIN(__builtin_ilogbf, "if", "Fnc")
-BUILTIN(__builtin_ilogbl, "iLd", "Fnc")
-BUILTIN(__builtin_lgamma , "dd", "Fnc")
-BUILTIN(__builtin_lgammaf, "ff", "Fnc")
-BUILTIN(__builtin_lgammal, "LdLd", "Fnc")
-BUILTIN(__builtin_llrint, "LLid", "Fnc")
-BUILTIN(__builtin_llrintf, "LLif", "Fnc")
-BUILTIN(__builtin_llrintl, "LLiLd", "Fnc")
-BUILTIN(__builtin_llround , "LLid", "Fnc")
-BUILTIN(__builtin_llroundf, "LLif", "Fnc")
-BUILTIN(__builtin_llroundl, "LLiLd", "Fnc")
-BUILTIN(__builtin_log , "dd"  , "Fnc")
-BUILTIN(__builtin_log10 , "dd"  , "Fnc")
-BUILTIN(__builtin_log10f, "ff"  , "Fnc")
-BUILTIN(__builtin_log10l, "LdLd", "Fnc")
-BUILTIN(__builtin_log1p , "dd"  , "Fnc")
-BUILTIN(__builtin_log1pf, "ff"  , "Fnc")
-BUILTIN(__builtin_log1pl, "LdLd", "Fnc")
-BUILTIN(__builtin_log2, "dd"  , "Fnc")
-BUILTIN(__builtin_log2f, "ff"  , "Fnc")
-BUILTIN(__builtin_log2l, "LdLd"  , "Fnc")
-BUILTIN(__builtin_logb , "dd", "Fnc")
-BUILTIN(__builtin_logbf, "ff", "Fnc")
-BUILTIN(__builtin_logbl, "LdLd", "Fnc")
-BUILTIN(__builtin_logf, "ff"  , "Fnc")
-BUILTIN(__builtin_logl, "LdLd", "Fnc")
-BUILTIN(__builtin_lrint , "Lid", "Fnc")
-BUILTIN(__builtin_lrintf, "Lif", "Fnc")
-BUILTIN(__builtin_lrintl, "LiLd", "Fnc")
-BUILTIN(__builtin_lround , "Lid", "Fnc")
-BUILTIN(__builtin_lroundf, "Lif", "Fnc")
-BUILTIN(__builtin_lroundl, "LiLd", "Fnc")
+BUILTIN(__builtin_hypot , "ddd"  , "Fne")
+BUILTIN(__builtin_hypotf, "fff"  , "Fne")
+BUILTIN(__builtin_hypotl, "LdLdLd", "Fne")
+BUILTIN(__builtin_ilogb , "id", "Fne")
+BUILTIN(__builtin_ilogbf, "if", "Fne")
+BUILTIN(__builtin_ilogbl, "iLd", "Fne")
+BUILTIN(__builtin_lgamma , "dd", "Fn")
+BUILTIN(__builtin_lgammaf, "ff", "Fn")
+BUILTIN(__builtin_lgammal, "LdLd", "Fn")
+BUILTIN(__builtin_llrint, "LLid", "Fne")
+BUILTIN(__builtin_llrintf, "LLif", "Fne")
+BUILTIN(__builtin_llrintl, "LLiLd", "Fne")
+BUILTIN(__builtin_llround , "LLid", "Fne")
+BUILTIN(__builtin_llroundf, "LLif", "Fne")
+BUILTIN(__builtin_llroundl, "LLiLd", "Fne")
+BUILTIN(__builtin_log , "dd"  , "Fne")
+BUILTIN(__builtin_log10 , "dd"  , "Fne")
+BUILTIN(__builtin_log10f, "ff"  , "Fne")
+BUILTIN(__builtin_log10l, "LdLd", "Fne")
+BUILTIN(__builtin_log1p , "dd"  , "Fne")
+BUILTIN(__builtin_log1pf, "ff"  , "Fne")
+BUILTIN(__builtin_log1pl, "LdLd", "Fne")
+BUILTIN(__builtin_log2, "dd"  , "Fne")
+BUILTIN(__builtin_log2f, "ff"  , "Fne")
+BUILTIN(__builtin_log2l, "LdLd"  , "Fne")
+BUILTIN(__builtin_logb , "dd", "Fne")
+BUILTIN(__builtin_logbf, "ff", "Fne")
+BUILTIN(__builtin_logbl, "LdLd", "Fne")
+BUILTIN(__builtin_logf, "ff"  , "Fne")
+BUILTIN(__builtin_logl, "LdLd", "Fne")
+BUILTIN(__builtin_lrint , "Lid", "Fne")
+BUILTIN(__builtin_lrintf, "Lif", "Fne")
+BUILTIN(__builtin_lrintl, "LiLd", "Fne")
+BUILTIN(__builtin_lround , "Lid", "Fne")
+BUILTIN(__builtin_lroundf, "Lif", "Fne")
+BUILTIN(__builtin_lroundl, "LiLd", "Fne")
 BUILTIN(__builtin_nearbyint , "dd", "Fnc")
 BUILTIN(__builtin_nearbyintf, "ff", "Fnc")
 BUILTIN(__builtin_nearbyintl, "LdLd", "Fnc")
-BUILTIN(__builtin_nextafter , "ddd", "Fnc")
-BUILTIN(__builtin_nextafterf, "fff", "Fnc")
-BUILTIN(__builtin_nextafterl, "LdLdLd", "Fnc")
-BUILTIN(__builtin_nexttoward , "ddLd", "Fnc")
-BUILTIN(__builtin_nexttowardf, "ffLd", "Fnc")
-BUILTIN(__builtin_nexttowardl, "LdLdLd", "Fnc")
-BUILTIN(__builtin_remainder , "ddd", "Fnc")
-BUILTIN(__builtin_remainderf, "fff", "Fnc")
-BUILTIN(__builtin_remainderl, "LdLdLd", "Fnc")
+BUILTIN(__builtin_nextafter , "ddd", "Fne")
+BUILTIN(__builtin_nextafterf, "fff", "Fne")
+BUILTIN(__builtin_nextafterl, "LdLdLd", "Fne")
+BUILTIN(__builtin_nexttoward , "ddLd", "Fne")
+BUILTIN(__builtin_nexttowardf, "ffLd", "Fne")
+BUILTIN(__builtin_nexttowardl, "LdLdLd", "Fne")
+BUILTIN(__builtin_remainder , "ddd", "Fne")
+BUILTIN(__builtin_remainderf, "fff", "Fne")
+BUILTIN(__builtin_remainderl, "LdLdLd", "Fne")
 BUILTIN(__builtin_remquo , "dddi*", "Fn")
 BUILTIN(__builtin_remquof, "fffi*", "Fn")
 BUILTIN(__builtin_remquol, "LdLdLdi*", "Fn")
@@ -264,101 +270,101 @@
 BUILTIN(__builtin_round, "dd"  , "Fnc")
 BUILTIN(__builtin_roundf, "ff"  , "Fnc")
 BUILTIN(__builtin_roundl, "LdLd"  , "Fnc")
-BUILTIN(__builtin_scalbln , "ddLi", "Fnc")
-BUILTIN(__builtin_scalblnf, "ffLi", "Fnc")
-BUILTIN(__builtin_scalblnl, "LdLdLi", "Fnc")
-BUILTIN(__builtin_scalbn , "ddi", "Fnc")
-BUILTIN(__builtin_scalbnf, "ffi", "Fnc")
-BUILTIN(__builtin_scalbnl, "LdLdi", "Fnc")
-BUILTIN(__builtin_sin , "dd"  , "Fnc")
-BUILTIN(__builtin_sinf, "ff"  , "Fnc")
-BUILTIN(__builtin_sinh , "dd"  , "Fnc")
-BUILTIN(__builtin_sinhf, "ff"  , "Fnc")
-BUILTIN(__builtin_sinhl, "LdLd", "Fnc")
-BUILTIN(__builtin_sinl, "LdLd", "Fnc")
-BUILTIN(__builtin_sqrt , "dd"  , "Fnc")
-BUILTIN(__builtin_sqrtf, "ff"  , "Fnc")
-BUILTIN(__builtin_sqrtl, "LdLd", "Fnc")
-BUILTIN(__builtin_tan , "dd"  , "Fnc")
-BUILTIN(__builtin_tanf, "ff"  , "Fnc")
-BUILTIN(__builtin_tanh , "dd"  , "Fnc")
-BUILTIN(__builtin_tanhf, "ff"  , "Fnc")
-BUILTIN(__builtin_tanhl, "LdLd", "Fnc")
-BUILTIN(__builtin_tanl, "LdLd", "Fnc")
-BUILTIN(__builtin_tgamma , "dd", "Fnc")
-BUILTIN(__builtin_tgammaf, "ff", "Fnc")
-BUILTIN(__builtin_tgammal, "LdLd", "Fnc")
+BUILTIN(__builtin_scalbln , "ddLi", "Fne")
+BUILTIN(__builtin_scalblnf, "ffLi", "Fne")
+BUILTIN(__builtin_scalblnl, "LdLdLi", "Fne")
+BUILTIN(__builtin_scalbn , "ddi", "Fne")
+BUILTIN(__builtin_scalbnf, "ffi", "Fne")
+BUILTIN(__builtin_scalbnl, "LdLdi", "Fne")
+BUILTIN(__builtin_sin , "dd"  , "Fne")
+BUILTIN(__builtin_sinf, "ff"  , "Fne")
+BUILTIN(__builtin_sinh , "dd"  , "Fne")
+BUILTIN(__builtin_sinhf, "ff"  , "Fne")
+BUILTIN(__builtin_sinhl, "LdLd", "Fne")
+BUILTIN(__builtin_sinl, "LdLd", "Fne")
+BUILTIN(__builtin_sqrt , "dd"  , "Fne")
+BUILTIN(__builtin_sqrtf, "ff"  , "Fne")
+BUILTIN(__builtin_sqrtl, "LdLd", "Fne")
+BUILTIN(__builtin_tan , "dd"  , "Fne")
+BUILTIN(__builtin_tanf, "ff"  , "Fne")
+BUILTIN(__builtin_tanh , "dd"  , "Fne")
+BUILTIN(__builtin_tanhf, "ff"  , "Fne")
+BUILTIN(__builtin_tanhl, "LdLd", "Fne")
+BUILTIN(__builtin_tanl, "LdLd", "Fne")
+BUILTIN(__builtin_tgamma , "dd", "Fne")
+BUILTIN(__builtin_tgammaf, "ff", "Fne")
+BUILTIN(__builtin_tgammal, "LdLd", "Fne")
 BUILTIN(__builtin_trunc , "dd", "Fnc")
 BUILTIN(__builtin_truncf, "ff", "Fnc")
 BUILTIN(__builtin_truncl, "LdLd", "Fnc")
 
 // C99 complex builtins
-BUILTIN(__builtin_cabs, "dXd", "Fnc")
-BUILTIN(__builtin_cabsf, "fXf", "Fnc")
-BUILTIN(__builtin_cabsl, "LdXLd", "Fnc")
-BUILTIN(__builtin_cacos, "XdXd", "Fnc")
-BUILTIN(__builtin_cacosf, "XfXf", "Fnc")
-BUILTIN(__builtin_cacosh, "XdXd", "Fnc")
-BUILTIN(__builtin_cacoshf, "XfXf", "Fnc")
-BUILTIN(__builtin_cacoshl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_cacosl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_carg, "dXd", "Fnc")
-BUILTIN(__builtin_cargf, "fXf", "Fnc")
-BUILTIN(__builtin_cargl, "LdXLd", "Fnc")
-BUILTIN(__builtin_casin, "XdXd", "Fnc")
-BUILTIN(__builtin_casinf, "XfXf", "Fnc")
-BUILTIN(__builtin_casinh, "XdXd", "Fnc")
-BUILTIN(__builtin_casinhf, "XfXf", "Fnc")
-BUILTIN(__builtin_casinhl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_casinl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_catan, "XdXd", "Fnc")
-BUILTIN(__builtin_catanf, "XfXf", "Fnc")
-BUILTIN(__builtin_catanh, "XdXd", "Fnc")
-BUILTIN(__builtin_catanhf, "XfXf", "Fnc")
-BUILTIN(__builtin_catanhl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_catanl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_ccos, "XdXd", "Fnc")
-BUILTIN(__builtin_ccosf, "XfXf", "Fnc")
-BUILTIN(__builtin_ccosl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_ccosh, "XdXd", "Fnc")
-BUILTIN(__builtin_ccoshf, "XfXf", "Fnc")
-BUILTIN(__builtin_ccoshl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_cexp, "XdXd", "Fnc")
-BUILTIN(__builtin_cexpf, "XfXf", "Fnc")
-BUILTIN(__builtin_cexpl, "XLdXLd", "Fnc")
+BUILTIN(__builtin_cabs, "dXd", "Fne")
+BUILTIN(__builtin_cabsf, "fXf", "Fne")
+BUILTIN(__builtin_cabsl, "LdXLd", "Fne")
+BUILTIN(__builtin_cacos, "XdXd", "Fne")
+BUILTIN(__builtin_cacosf, "XfXf", "Fne")
+BUILTIN(__builtin_cacosh, "XdXd", "Fne")
+BUILTIN(__builtin_cacoshf, "XfXf", "Fne")
+BUILTIN(__builtin_cacoshl, "XLdXLd", "Fne")
+BUILTIN(__builtin_cacosl, "XLdXLd", "Fne")
+BUILTIN(__builtin_carg, "dXd", "Fne")
+BUILTIN(__builtin_cargf, "fXf", "Fne")
+BUILTIN(__builtin_cargl, "LdXLd", "Fne")
+BUILTIN(__builtin_casin, "XdXd", "Fne")
+BUILTIN(__builtin_casinf, "XfXf", "Fne")
+BUILTIN(__builtin_casinh, "XdXd", "Fne")
+BUILTIN(__builtin_casinhf, "XfXf", "Fne")
+BUILTIN(__builtin_casinhl, "XLdXLd", "Fne")
+BUILTIN(__builtin_casinl, "XLdXLd", "Fne")
+BUILTIN(__builtin_catan, "XdXd", "Fne")
+BUILTIN(__builtin_catanf, "XfXf", "Fne")
+BUILTIN(__builtin_catanh, "XdXd", "Fne")
+BUILTIN(__builtin_catanhf, "XfXf", "Fne")
+BUILTIN(__builtin_catanhl, "XLdXLd", "Fne")
+BUILTIN(__builtin_catanl, "XLdXLd", "Fne")
+BUILTIN(__builtin_ccos, "XdXd", "Fne")
+BUILTIN(__builtin_ccosf, "XfXf", "Fne")
+BUILTIN(__builtin_ccosl, "XLdXLd", "Fne")
+BUILTIN(__builtin_ccosh, "XdXd", "Fne")
+BUILTIN(__builtin_ccoshf, "XfXf", "Fne")
+BUILTIN(__builtin_ccoshl, "XLdXLd", "Fne")
+BUILTIN(__builtin_cexp, "XdXd", "Fne")
+BUILTIN(__builtin_cexpf, "XfXf", "Fne")
+BUILTIN(__builtin_cexpl, "XLdXLd", "Fne")
 BUILTIN(__builtin_cimag, "dXd", "Fnc")
 BUILTIN(__builtin_cimagf, "fXf", "Fnc")
 BUILTIN(__builtin_cimagl, "LdXLd", "Fnc")
 BUILTIN(__builtin_conj, "XdXd", "Fnc")
 BUILTIN(__builtin_conjf, "XfXf", "Fnc")
 BUILTIN(__builtin_conjl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_clog, "XdXd", "Fnc")
-BUILTIN(__builtin_clogf, "XfXf", "Fnc")
-BUILTIN(__builtin_clogl, "XLdXLd", "Fnc")
+BUILTIN(__builtin_clog, "XdXd", "Fne")
+BUILTIN(__builtin_clogf, "XfXf", "Fne")
+BUILTIN(__builtin_clogl, "XLdXLd", "Fne")
 BUILTIN(__builtin_cproj, "XdXd", "Fnc")
 BUILTIN(__builtin_cprojf, "XfXf", "Fnc")
 BUILTIN(__builtin_cprojl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_cpow, "XdXdXd", "Fnc")
-BUILTIN(__builtin_cpowf, "XfXfXf", "Fnc")
-BUILTIN(__builtin_cpowl, "XLdXLdXLd", "Fnc")
+BUILTIN(__builtin_cpow, "XdXdXd", "Fne")
+BUILTIN(__builtin_cpowf, "XfXfXf", "Fne")
+BUILTIN(__builtin_cpowl, "XLdXLdXLd", "Fne")
 BUILTIN(__builtin_creal, "dXd", "Fnc")
 BUILTIN(__builtin_crealf, "fXf", "Fnc")
 BUILTIN(__builtin_creall, "LdXLd", "Fnc")
-BUILTIN(__builtin_csin, "XdXd", "Fnc")
-BUILTIN(__builtin_csinf, "XfXf", "Fnc")
-BUILTIN(__builtin_csinl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_csinh, "XdXd", "Fnc")
-BUILTIN(__builtin_csinhf, "XfXf", "Fnc")
-BUILTIN(__builtin_csinhl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_csqrt, "XdXd", "Fnc")
-BUILTIN(__builtin_csqrtf, "XfXf", "Fnc")
-BUILTIN(__builtin_csqrtl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_ctan, "XdXd", "Fnc")
-BUILTIN(__builtin_ctanf, "XfXf", "Fnc")
-BUILTIN(__builtin_ctanl, "XLdXLd", "Fnc")
-BUILTIN(__builtin_ctanh, "XdXd", "Fnc")
-BUILTIN(__builtin_ctanhf, "XfXf", "Fnc")
-BUILTIN(__builtin_ctanhl, "XLdXLd", "Fnc")
+BUILTIN(__builtin_csin, "XdXd", "Fne")
+BUILTIN(__builtin_csinf, "XfXf", "Fne")
+BUILTIN(__builtin_csinl, "XLdXLd", "Fne")
+BUILTIN(__builtin_csinh, "XdXd", "Fne")
+BUILTIN(__builtin_csinhf, "XfXf", "Fne")
+BUILTIN(__builtin_csinhl, "XLdXLd", "Fne")
+BUILTIN(__builtin_csqrt, "XdXd", "Fne")
+BUILTIN(__builtin_csqrtf, "XfXf", "Fne")
+BUILTIN(__builtin_csqrtl, "XLdXLd", "Fne")
+BUILTIN(__builtin_ctan, "XdXd", "Fne")
+BUILTIN(__builtin_ctanf, "XfXf", "Fne")
+BUILTIN(__builtin_ctanl, "XLdXLd", "Fne")
+BUILTIN(__builtin_ctanh, "XdXd", "Fne")
+BUILTIN(__builtin_ctanhf, "XfXf", "Fne")
+BUILTIN(__builtin_ctanhl, "XLdXLd", "Fne")
 
 // FP Comparisons.
 BUILTIN(__builtin_isgreater     , "i.", "Fnc")
@@ -796,6 +802,10 @@
 LIBBUILTIN(_setjmpex, "iJ", "fj",   "setjmpex.h", ALL_MS_LANGUAGES)
 
 // C99 library functions
+// C99 stdarg.h
+LIBBUILTIN(va_start, "vA.",       "fn",    "stdarg.h", ALL_LANGUAGES)
+LIBBUILTIN(va_end, "vA",          "fn",    "stdarg.h", ALL_LANGUAGES)
+LIBBUILTIN(va_copy, "vAA",        "fn",    "stdarg.h", ALL_LANGUAGES)
 // C99 stdlib.h
 LIBBUILTIN(abort, "v",            "fr",    "stdlib.h", ALL_LANGUAGES)
 LIBBUILTIN(calloc, "v*zz",        "f",     "stdlib.h", ALL_LANGUAGES)
@@ -1040,9 +1050,9 @@
 LIBBUILTIN(atanhf, "ff", "fne", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(atanhl, "LdLd", "fne", "math.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cbrt, "dd", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(cbrtf, "ff", "fne", "math.h", ALL_LANGUAGES)
-LIBBUILTIN(cbrtl, "LdLd", "fne", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrt, "dd", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrtf, "ff", "fnc", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(cbrtl, "LdLd", "fnc", "math.h", ALL_LANGUAGES)
 
 LIBBUILTIN(ceil, "dd", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(ceilf, "ff", "fnc", "math.h", ALL_LANGUAGES)
@@ -1162,6 +1172,10 @@
 LIBBUILTIN(remainderf, "fff", "fne", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(remainderl, "LdLdLd", "fne", "math.h", ALL_LANGUAGES)
 
+LIBBUILTIN(remquo, "dddi*", "fn", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(remquof, "fffi*", "fn", "math.h", ALL_LANGUAGES)
+LIBBUILTIN(remquol, "LdLdLdi*", "fn", "math.h", ALL_LANGUAGES)
+
 LIBBUILTIN(rint, "dd", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(rintf, "ff", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(rintl, "LdLd", "fnc", "math.h", ALL_LANGUAGES)
@@ -1206,49 +1220,49 @@
 LIBBUILTIN(truncf, "ff", "fnc", "math.h", ALL_LANGUAGES)
 LIBBUILTIN(truncl, "LdLd", "fnc", "math.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cabs, "dXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cabsf, "fXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cabsl, "LdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cabs, "dXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cabsf, "fXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cabsl, "LdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cacos, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cacosf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cacosl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacos, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacosf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacosl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cacosh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cacoshf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cacoshl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacosh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacoshf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cacoshl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(carg, "dXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cargf, "fXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cargl, "LdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(carg, "dXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cargf, "fXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cargl, "LdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(casin, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(casinf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(casinl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casin, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casinf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casinl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(casinh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(casinhf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(casinhl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casinh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casinhf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(casinhl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(catan, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(catanf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(catanl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catan, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catanf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catanl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(catanh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(catanhf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(catanhl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catanh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catanhf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(catanhl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(ccos, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ccosf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ccosl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccos, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccosf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccosl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(ccosh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ccoshf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ccoshl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccosh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccoshf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ccoshl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cexp, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cexpf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cexpl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cexp, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cexpf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cexpl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
 LIBBUILTIN(cimag, "dXd", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(cimagf, "fXf", "fnc", "complex.h", ALL_LANGUAGES)
@@ -1258,41 +1272,41 @@
 LIBBUILTIN(conjf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(conjl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(clog, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(clogf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(clogl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(clog, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(clogf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(clogl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
 LIBBUILTIN(cproj, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(cprojf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(cprojl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(cpow, "XdXdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cpowf, "XfXfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(cpowl, "XLdXLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cpow, "XdXdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cpowf, "XfXfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(cpowl, "XLdXLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
 LIBBUILTIN(creal, "dXd", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(crealf, "fXf", "fnc", "complex.h", ALL_LANGUAGES)
 LIBBUILTIN(creall, "LdXLd", "fnc", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(csin, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csinf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csinl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csin, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csinf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csinl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(csinh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csinhf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csinhl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csinh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csinhf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csinhl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(csqrt, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csqrtf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(csqrtl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csqrt, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csqrtf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(csqrtl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(ctan, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ctanf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ctanl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctan, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctanf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctanl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
-LIBBUILTIN(ctanh, "XdXd", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ctanhf, "XfXf", "fnc", "complex.h", ALL_LANGUAGES)
-LIBBUILTIN(ctanhl, "XLdXLd", "fnc", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctanh, "XdXd", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctanhf, "XfXf", "fne", "complex.h", ALL_LANGUAGES)
+LIBBUILTIN(ctanhl, "XLdXLd", "fne", "complex.h", ALL_LANGUAGES)
 
 // __sinpi and friends are OS X specific library functions, but otherwise much
 // like the standard (non-complex) sin (etc).
diff --git a/include/clang/Basic/BuiltinsAMDGPU.def b/include/clang/Basic/BuiltinsAMDGPU.def
index ec6a0fb..46687f7 100644
--- a/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,9 +21,9 @@
 // SI+ only builtins.
 //===----------------------------------------------------------------------===//
 
-BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*2", "nc")
-BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc")
-BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc")
+BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc")
+BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc")
+BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc")
 
 BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc")
@@ -93,6 +93,9 @@
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc")
+BUILTIN(__builtin_amdgcn_ds_fadd, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmin, "ff*3fiib", "n")
+BUILTIN(__builtin_amdgcn_ds_fmax, "ff*3fiib", "n")
 
 //===----------------------------------------------------------------------===//
 // VI+ only builtins.
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def
index 4e277f8..941d320 100644
--- a/include/clang/Basic/BuiltinsARM.def
+++ b/include/clang/Basic/BuiltinsARM.def
@@ -36,6 +36,7 @@
 // Saturating arithmetic
 BUILTIN(__builtin_arm_qadd, "iii", "nc")
 BUILTIN(__builtin_arm_qsub, "iii", "nc")
+BUILTIN(__builtin_arm_qdbl, "ii", "nc")
 BUILTIN(__builtin_arm_ssat, "iiUi", "nc")
 BUILTIN(__builtin_arm_usat, "UiiUi", "nc")
 
diff --git a/include/clang/Basic/BuiltinsHexagon.def b/include/clang/Basic/BuiltinsHexagon.def
index 14fc4ad..fda50b5 100644
--- a/include/clang/Basic/BuiltinsHexagon.def
+++ b/include/clang/Basic/BuiltinsHexagon.def
@@ -17,7 +17,6 @@
 // The builtins below are not autogenerated from iset.py.
 // Make sure you do not overwrite these.
 
-BUILTIN(__builtin_SI_to_SXTHI_asrh, "ii", "")
 BUILTIN(__builtin_brev_ldd,   "LLi*LLi*LLi*i", "")
 BUILTIN(__builtin_brev_ldw,   "i*i*i*i", "")
 BUILTIN(__builtin_brev_ldh,   "s*s*s*i", "")
@@ -882,6 +881,7 @@
 BUILTIN(__builtin_HEXAGON_S2_ct1p,"iLLi","")
 BUILTIN(__builtin_HEXAGON_S2_interleave,"LLiLLi","")
 BUILTIN(__builtin_HEXAGON_S2_deinterleave,"LLiLLi","")
+BUILTIN(__builtin_HEXAGON_prefetch,"vv*","")
 BUILTIN(__builtin_HEXAGON_Y2_dccleana,"vv*","")
 BUILTIN(__builtin_HEXAGON_Y2_dccleaninva,"vv*","")
 BUILTIN(__builtin_HEXAGON_Y2_dcinva,"vv*","")
@@ -1470,14 +1470,6 @@
 BUILTIN(__builtin_HEXAGON_V6_vassign_128B,"V32iV32i","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vcombine,"V32iV16iV16i","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vcombine_128B,"V64iV32iV32i","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb,"V16iV16iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_128B,"V32iV32iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_acc,"V16iV16iV16iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_acc_128B,"V32iV32iV32iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_dv,"V32iV32iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_dv_128B,"V64iV64iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_dv_acc,"V32iV32iV32iLLii","v:60:")
-BUILTIN(__builtin_HEXAGON_V6_vlutb_dv_acc_128B,"V64iV64iV64iLLii","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vdelta,"V16iV16iV16i","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vdelta_128B,"V32iV32iV32i","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vrdelta,"V16iV16iV16i","v:60:")
@@ -1508,4 +1500,216 @@
 BUILTIN(__builtin_HEXAGON_V6_vassignp,"V32iV32i","v:60:")
 BUILTIN(__builtin_HEXAGON_V6_vassignp_128B,"V64iV64i","v:60:")
 
+BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV16iv*V16i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV32iv*V32i","v:60:")
+BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV32iv*V32i","v:60:")
+
+BUILTIN(__builtin_HEXAGON_M6_vabsdiffb,"LLiLLiLLi","v:62:")
+BUILTIN(__builtin_HEXAGON_M6_vabsdiffub,"LLiLLiLLi","v:62:")
+BUILTIN(__builtin_HEXAGON_A6_vminub_RdP,"LLiLLiLLi","v:62:")
+BUILTIN(__builtin_HEXAGON_S6_vsplatrbp,"LLii","v:62:")
+BUILTIN(__builtin_HEXAGON_S6_vtrunehb_ppp,"LLiLLiLLi","v:62:")
+BUILTIN(__builtin_HEXAGON_S6_vtrunohb_ppp,"LLiLLiLLi","v:62:")
+
+BUILTIN(__builtin_HEXAGON_V6_vlsrb,"V16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlsrb_128B,"V32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasrwuhrndsat,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasrwuhrndsat_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasruwuhrndsat,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasruwuhrndsat_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasrhbsat,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vasrhbsat_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vrounduwuh,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vrounduwuh_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vrounduhub,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vrounduhub_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduwsat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduwsat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduwsat_dv,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduwsat_dv_128B,"V64iV64iV64i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubuwsat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubuwsat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubuwsat_dv,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubuwsat_dv_128B,"V64iV64iV64i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddbsat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddbsat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddbsat_dv,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddbsat_dv_128B,"V64iV64iV64i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubbsat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubbsat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubbsat_dv,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubbsat_dv_128B,"V64iV64iV64i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddububb_sat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddububb_sat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubububb_sat,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubububb_sat_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddhw_acc,"V32iV32iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddhw_acc_128B,"V64iV64iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduhw_acc,"V32iV32iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vadduhw_acc_128B,"V64iV64iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddubh_acc,"V32iV32iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddubh_acc_128B,"V64iV64iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyewuh_64,"V32iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyewuh_64_128B,"V64iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyowh_64_acc,"V32iV32iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyowh_64_acc_128B,"V64iV64iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhb,"V32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhb_128B,"V64iV64ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhb_acc,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhb_acc_128B,"V64iV64iV64ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyiwub,"V16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_128B,"V32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandnqrt,"V16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B,"V32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandvqv,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandvnqv,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2,"V16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B,"V32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_shuffeqw,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_shuffeqh,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmaxb,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vmaxb_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vminb,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vminb_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsatuwuh,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsatuwuh_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_lvsplath,"V16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_lvsplath_128B,"V32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_lvsplatb,"V16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_lvsplatb_128B,"V32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddclbw,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddclbw_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddclbh,"V16iV16iV16i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddclbh_128B,"V32iV32iV32i","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvbi,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvbi_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvb_oracci,"V16iV16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvb_oracci_128B,"V32iV32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwhi,"V32iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwhi_128B,"V64iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwh_oracci,"V32iV32iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwh_oracci_128B,"V64iV64iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvb_nm,"V16iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvvb_nm_128B,"V32iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwh_nm,"V32iV16iV16ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vlutvwh_nm_128B,"V64iV32iV32ii","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddcarry,"V16iV16iV16iv*","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vaddcarry_128B,"V32iV32iV32iv*","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubcarry,"V16iV16iV16iv*","v:62:")
+BUILTIN(__builtin_HEXAGON_V6_vsubcarry_128B,"V32iV32iV32iv*","v:62:")
+
+BUILTIN(__builtin_HEXAGON_A6_vcmpbeq_notany,"iLLiLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_A6_vcmpbeq_notany_128B,"iLLiLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt,"V32iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_128B,"V64iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc,"V32iV32iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B,"V64iV64iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpybub_rtt,"V32iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpybub_rtt_128B,"V64iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpybub_rtt_acc,"V32iV32iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vrmpybub_rtt_acc_128B,"V64iV64iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruwuhsat,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruwuhsat_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruhubsat,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruhubsat_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruhubrndsat,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasruhubrndsat_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vaslh_acc,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vaslh_acc_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasrh_acc,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vasrh_acc_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavguw,"V16iV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavguw_128B,"V32iV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavguwrnd,"V16iV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavguwrnd_128B,"V32iV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavgb,"V16iV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavgb_128B,"V32iV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavgbrnd,"V16iV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vavgbrnd_128B,"V32iV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vnavgb,"V16iV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vnavgb_128B,"V32iV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vabsb,"V16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vabsb_128B,"V32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vabsb_sat,"V16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vabsb_sat_128B,"V32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpabuu,"V32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpabuu_128B,"V64iV64ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpabuu_acc,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpabuu_acc_128B,"V64iV64iV64ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyh_acc,"V32iV32iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyh_acc_128B,"V64iV64iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpahhsat,"V16iV16iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpahhsat_128B,"V32iV32iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhuhsat,"V16iV16iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpauhuhsat_128B,"V32iV32iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpsuhuhsat,"V16iV16iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpsuhuhsat_128B,"V32iV32iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vlut4,"V16iV16iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vlut4_128B,"V32iV32iLLi","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyuhe,"V16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyuhe_128B,"V32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyuhe_acc,"V16iV16iV16ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vmpyuhe_acc_128B,"V32iV32iV32ii","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermw,"vv*iiV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermw_128B,"vv*iiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermh,"vv*iiV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermh_128B,"vv*iiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhw,"vv*iiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhw_128B,"vv*iiV64i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermwq,"vv*V16iiiV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B,"vv*V32iiiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhq,"vv*V16iiiV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B,"vv*V32iiiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhwq,"vv*V16iiiV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B,"vv*V32iiiV64i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermw,"viiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermw_128B,"viiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermh,"viiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermh_128B,"viiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermw_add,"viiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermw_add_128B,"viiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermh_add,"viiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermh_add_128B,"viiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermwq,"vV16iiiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B,"vV32iiiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhq,"vV16iiiV16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B,"vV32iiiV32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhw,"viiV32iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhw_128B,"viiV64iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhwq,"vV16iiiV32iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B,"vV32iiiV64iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add,"viiV32iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add_128B,"viiV64iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqb,"V16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B,"V32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqh,"V16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B,"V32iV32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqw,"V16iV16i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B,"V32iV32i","v:65:")
+
+BUILTIN(__builtin_HEXAGON_V6_vdd0,"V32i","v:65:")
+BUILTIN(__builtin_HEXAGON_V6_vdd0_128B,"V64i","v:65:")
+
+
 #undef BUILTIN
diff --git a/include/clang/Basic/BuiltinsNEON.def b/include/clang/Basic/BuiltinsNEON.def
index 7800ae6..241b93a 100644
--- a/include/clang/Basic/BuiltinsNEON.def
+++ b/include/clang/Basic/BuiltinsNEON.def
@@ -16,6 +16,7 @@
 
 #define GET_NEON_BUILTINS
 #include "clang/Basic/arm_neon.inc"
+#include "clang/Basic/arm_fp16.inc"
 #undef GET_NEON_BUILTINS
 
 #undef BUILTIN
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def
index caa8604..7bab73a 100644
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -371,6 +371,9 @@
 BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
 BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
 
+// FNS
+TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", "ptx60")
+
 // Sync
 
 BUILTIN(__syncthreads, "v", "")
@@ -481,7 +484,7 @@
 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", "satom")
 BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n")
 BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n")
-BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n")
+TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", "satom")
 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", "satom")
 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", "satom")
 
diff --git a/include/clang/Basic/BuiltinsWebAssembly.def b/include/clang/Basic/BuiltinsWebAssembly.def
index 19318dc..d698151 100644
--- a/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/include/clang/Basic/BuiltinsWebAssembly.def
@@ -16,8 +16,16 @@
 
 // The format of this database matches clang/Basic/Builtins.def.
 
-// Note that current_memory is not "c" (readnone) because it must be sequenced
+// Query the current memory size, and increase the current memory size.
+// Note that mem.size is not "c" (readnone) because it must be sequenced
 // with respect to grow_memory calls.
+// These are the new proposed names, which aren't yet official. Use at your own
+// risk.
+BUILTIN(__builtin_wasm_mem_size, "zIi", "n")
+BUILTIN(__builtin_wasm_mem_grow, "zIiz", "n")
+
+// These are the existing names, which are currently official, but expected
+// to be deprecated in the future. They also lack the immediate field.
 BUILTIN(__builtin_wasm_current_memory, "z", "n")
 BUILTIN(__builtin_wasm_grow_memory, "zz", "n")
 
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index d57ea8f..44592d1 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -429,9 +429,34 @@
 TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes")
 TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes")
 
+// VAES
+TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
+
+// GFNI
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gfni")
+
 // CLMUL
 TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
 
+// VPCLMULQDQ
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq")
+
 // AVX
 TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx")
@@ -638,6 +663,16 @@
 TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*ULLi", "", "xsavec")
 TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves")
 
+// SHSTK
+TARGET_BUILTIN(__builtin_ia32_incsspd, "vUi", "u", "shstk")
+TARGET_BUILTIN(__builtin_ia32_rdsspd, "UiUi", "Un", "shstk")
+TARGET_BUILTIN(__builtin_ia32_saveprevssp, "v", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_rstorssp, "vv*", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrssd, "vUiv*", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrussd, "vUiv*", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_setssbsy, "v", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_clrssbsy, "vv*", "", "shstk")
+
 //CLFLUSHOPT
 TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "", "clflushopt")
 
@@ -682,36 +717,18 @@
 // FMA
 TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
 
 TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
@@ -877,6 +894,9 @@
 BUILTIN(__builtin_ia32_rdtsc, "ULLi", "")
 BUILTIN(__rdtsc, "ULLi", "")
 BUILTIN(__builtin_ia32_rdtscp, "ULLiUi*", "")
+
+TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "", "rdpid")
+
 // PKU
 TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "", "pku")
 TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "", "pku")
@@ -910,39 +930,10 @@
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "", "avx512f")
 
+TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpeqb512_mask, "LLiV64cV64cLLi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqd512_mask, "sV16iV16is", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqq512_mask, "cV8LLiV8LLic", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqw512_mask, "iV32sV32si", "", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpeqb256_mask, "iV32cV32ci", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqd256_mask, "cV8iV8ic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqq256_mask, "cV4LLiV4LLic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqw256_mask, "sV16sV16ss", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqb128_mask, "sV16cV16cs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqd128_mask, "cV4iV4ic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqq128_mask, "cV2LLiV2LLic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqw128_mask, "cV8sV8sc", "", "avx512vl,avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpgtb512_mask, "LLiV64cV64cLLi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtd512_mask, "sV16iV16is", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtq512_mask, "cV8LLiV8LLic", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtw512_mask, "iV32sV32si", "", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpgtb256_mask, "iV32cV32ci", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtd256_mask, "cV8iV8ic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtq256_mask, "cV4LLiV4LLic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtw256_mask, "sV16sV16ss", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtb128_mask, "sV16cV16cs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtd128_mask, "cV4iV4ic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtq128_mask, "cV2LLiV2LLic", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtw128_mask, "cV8sV8sc", "", "avx512vl,avx512bw")
-
 TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "", "avx512vl")
@@ -974,8 +965,6 @@
 TARGET_BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "", "avx512f")
@@ -993,6 +982,31 @@
 TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f")
 
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
+
 TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl")
@@ -1099,9 +1113,24 @@
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
 
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
 TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
 
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg")
+
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg")
+
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
@@ -1109,9 +1138,9 @@
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
@@ -1142,6 +1171,12 @@
 TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc","","avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs","","avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs","","avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi","","avx512vl,avx512vbmi2")
+
 TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "", "avx512vl")
@@ -1150,30 +1185,25 @@
 TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "", "avx512vl,avx512vbmi2")
+
 TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256_mask, "V4fV4dV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd128_mask, "V2dV4fV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd256_mask, "V4dV4fV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
@@ -1182,10 +1212,22 @@
 TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512vbmi2")
+
 TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2d*V2dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4d*V4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLi*V2LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLi*V4LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "", "avx512vl,avx512vbmi2")
+
 TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4f*V4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8f*V8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4i*V4iUc", "", "avx512vl")
@@ -1258,6 +1300,65 @@
 TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2")
+
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw")
@@ -1487,28 +1588,6 @@
 TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_maskz, "V8dV8LLiV8dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_maskz, "V16fV16iV16fV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_ptestmb512, "ULLiV64cV64cULLi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ptestmw512, "UiV32sV32sUi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ptestnmb512, "ULLiV64cV64cULLi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ptestnmw512, "UiV32sV32sUi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ptestmb128, "UsV16cV16cUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmb256, "UiV32cV32cUi","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmw128, "UcV8sV8sUc","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmw256, "UsV16sV16sUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmb128, "UsV16cV16cUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmb256, "UiV32cV32cUi","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmw128, "UcV8sV8sUc","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmw256, "UsV16sV16sUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmd128, "UcV4iV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmd256, "UcV8iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmq128, "UcV2LLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestmq256, "UcV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmd128, "UcV4iV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmd256, "UcV8iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmq128, "UcV2LLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmq256, "UcV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ptestnmd512, "UsV16iV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_ptestnmq512, "UcV8LLiV8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi","","avx512f")
@@ -1580,12 +1659,6 @@
 TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastmb512, "V8LLiUc","","avx512cd")
-TARGET_BUILTIN(__builtin_ia32_broadcastmw512, "V16iUs","","avx512cd")
-TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw")
@@ -1715,8 +1788,6 @@
 TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, "V8fV8fV8iV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, "V8iV8iV8iV8iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc","","avx512dq,avx512vl")
@@ -1740,14 +1811,20 @@
 TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi","","avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi","","avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi","","avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi","","avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f")
@@ -1755,6 +1832,8 @@
 TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi","","avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi","","avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc","","avx512vl")
diff --git a/include/clang/Basic/BuiltinsX86_64.def b/include/clang/Basic/BuiltinsX86_64.def
index 6e12067..fe2c887 100644
--- a/include/clang/Basic/BuiltinsX86_64.def
+++ b/include/clang/Basic/BuiltinsX86_64.def
@@ -40,6 +40,7 @@
 TARGET_HEADER_BUILTIN(_InterlockedIncrement64,   "LLiLLiD*",    "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedOr64,          "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedXor64,         "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128, "UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "cx16")
 
 TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "")
 TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "")
@@ -60,6 +61,10 @@
 TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*ULLi", "", "xsaves")
 TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*ULLi", "", "xsavec")
 TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_incsspq, "vULLi", "u", "shstk")
+TARGET_BUILTIN(__builtin_ia32_rdsspq, "ULLiULLi", "Un", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrssq, "vULLiv*", "", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrussq, "vULLiv*", "", "shstk")
 TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx")
 TARGET_BUILTIN(__builtin_ia32_addcarry_u64, "UcUcULLiULLiULLi*", "", "")
 TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcULLiULLiULLi*", "", "")
diff --git a/include/clang/Basic/CMakeLists.txt b/include/clang/Basic/CMakeLists.txt
index 821c405..be61ffa 100644
--- a/include/clang/Basic/CMakeLists.txt
+++ b/include/clang/Basic/CMakeLists.txt
@@ -46,3 +46,7 @@
   -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
   SOURCE arm_neon.td
   TARGET ClangARMNeon)
+clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
+  -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
+  SOURCE arm_fp16.td
+  TARGET ClangARMFP16)
diff --git a/include/clang/Basic/CharInfo.h b/include/clang/Basic/CharInfo.h
index dd9c554..cc27bbb 100644
--- a/include/clang/Basic/CharInfo.h
+++ b/include/clang/Basic/CharInfo.h
@@ -40,14 +40,14 @@
 } // end namespace charinfo
 
 /// Returns true if this is an ASCII character.
-LLVM_READNONE static inline bool isASCII(char c) {
+LLVM_READNONE inline bool isASCII(char c) {
   return static_cast<unsigned char>(c) <= 127;
 }
 
 /// Returns true if this is a valid first character of a C identifier,
 /// which is [a-zA-Z_].
-LLVM_READONLY static inline bool isIdentifierHead(unsigned char c,
-                                                  bool AllowDollar = false) {
+LLVM_READONLY inline bool isIdentifierHead(unsigned char c,
+                                           bool AllowDollar = false) {
   using namespace charinfo;
   if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
     return true;
@@ -56,8 +56,8 @@
 
 /// Returns true if this is a body character of a C identifier,
 /// which is [a-zA-Z0-9_].
-LLVM_READONLY static inline bool isIdentifierBody(unsigned char c,
-                                                  bool AllowDollar = false) {
+LLVM_READONLY inline bool isIdentifierBody(unsigned char c,
+                                           bool AllowDollar = false) {
   using namespace charinfo;
   if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER))
     return true;
@@ -68,7 +68,7 @@
 /// ' ', '\\t', '\\f', '\\v'.
 ///
 /// Note that this returns false for '\\0'.
-LLVM_READONLY static inline bool isHorizontalWhitespace(unsigned char c) {
+LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
 }
@@ -76,7 +76,7 @@
 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
 ///
 /// Note that this returns false for '\\0'.
-LLVM_READONLY static inline bool isVerticalWhitespace(unsigned char c) {
+LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & CHAR_VERT_WS) != 0;
 }
@@ -85,43 +85,43 @@
 /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'.
 ///
 /// Note that this returns false for '\\0'.
-LLVM_READONLY static inline bool isWhitespace(unsigned char c) {
+LLVM_READONLY inline bool isWhitespace(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
 }
 
 /// Return true if this character is an ASCII digit: [0-9]
-LLVM_READONLY static inline bool isDigit(unsigned char c) {
+LLVM_READONLY inline bool isDigit(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & CHAR_DIGIT) != 0;
 }
 
 /// Return true if this character is a lowercase ASCII letter: [a-z]
-LLVM_READONLY static inline bool isLowercase(unsigned char c) {
+LLVM_READONLY inline bool isLowercase(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & CHAR_LOWER) != 0;
 }
 
 /// Return true if this character is an uppercase ASCII letter: [A-Z]
-LLVM_READONLY static inline bool isUppercase(unsigned char c) {
+LLVM_READONLY inline bool isUppercase(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & CHAR_UPPER) != 0;
 }
 
 /// Return true if this character is an ASCII letter: [a-zA-Z]
-LLVM_READONLY static inline bool isLetter(unsigned char c) {
+LLVM_READONLY inline bool isLetter(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0;
 }
 
 /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9]
-LLVM_READONLY static inline bool isAlphanumeric(unsigned char c) {
+LLVM_READONLY inline bool isAlphanumeric(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0;
 }
 
 /// Return true if this character is an ASCII hex digit: [0-9a-fA-F]
-LLVM_READONLY static inline bool isHexDigit(unsigned char c) {
+LLVM_READONLY inline bool isHexDigit(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0;
 }
@@ -129,7 +129,7 @@
 /// Return true if this character is an ASCII punctuation character.
 ///
 /// Note that '_' is both a punctuation character and an identifier character!
-LLVM_READONLY static inline bool isPunctuation(unsigned char c) {
+LLVM_READONLY inline bool isPunctuation(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
 }
@@ -137,7 +137,7 @@
 /// Return true if this character is an ASCII printable character; that is, a
 /// character that should take exactly one column to print in a fixed-width
 /// terminal.
-LLVM_READONLY static inline bool isPrintable(unsigned char c) {
+LLVM_READONLY inline bool isPrintable(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
                           CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
@@ -145,14 +145,14 @@
 
 /// Return true if this is the body character of a C preprocessing number,
 /// which is [a-zA-Z0-9_.].
-LLVM_READONLY static inline bool isPreprocessingNumberBody(unsigned char c) {
+LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] &
           (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0;
 }
 
 /// Return true if this is the body character of a C++ raw string delimiter.
-LLVM_READONLY static inline bool isRawStringDelimBody(unsigned char c) {
+LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
   using namespace charinfo;
   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
                           CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
@@ -162,7 +162,7 @@
 /// Converts the given ASCII character to its lowercase equivalent.
 ///
 /// If the character is not an uppercase character, it is returned as is.
-LLVM_READONLY static inline char toLowercase(char c) {
+LLVM_READONLY inline char toLowercase(char c) {
   if (isUppercase(c))
     return c + 'a' - 'A';
   return c;
@@ -171,7 +171,7 @@
 /// Converts the given ASCII character to its uppercase equivalent.
 ///
 /// If the character is not a lowercase character, it is returned as is.
-LLVM_READONLY static inline char toUppercase(char c) {
+LLVM_READONLY inline char toUppercase(char c) {
   if (isLowercase(c))
     return c + 'A' - 'a';
   return c;
@@ -182,7 +182,7 @@
 ///
 /// Note that this is a very simple check; it does not accept '$' or UCNs as
 /// valid identifier characters.
-LLVM_READONLY static inline bool isValidIdentifier(StringRef S) {
+LLVM_READONLY inline bool isValidIdentifier(StringRef S) {
   if (S.empty() || !isIdentifierHead(S[0]))
     return false;
 
diff --git a/include/clang/Basic/Cuda.h b/include/clang/Basic/Cuda.h
index b273365..8ce3807 100644
--- a/include/clang/Basic/Cuda.h
+++ b/include/clang/Basic/Cuda.h
@@ -22,6 +22,8 @@
   CUDA_75,
   CUDA_80,
   CUDA_90,
+  CUDA_91,
+  LATEST = CUDA_91,
 };
 const char *CudaVersionToString(CudaVersion V);
 
@@ -43,6 +45,8 @@
   SM_61,
   SM_62,
   SM_70,
+  SM_72,
+  LAST,
 };
 const char *CudaArchToString(CudaArch A);
 
@@ -63,6 +67,7 @@
   COMPUTE_61,
   COMPUTE_62,
   COMPUTE_70,
+  COMPUTE_72,
 };
 const char *CudaVirtualArchToString(CudaVirtualArch A);
 
@@ -75,6 +80,9 @@
 /// Get the earliest CudaVersion that supports the given CudaArch.
 CudaVersion MinVersionForCudaArch(CudaArch A);
 
+/// Get the latest CudaVersion that supports the given CudaArch.
+CudaVersion MaxVersionForCudaArch(CudaArch A);
+
 } // namespace clang
 
 #endif
diff --git a/include/clang/Basic/DeclNodes.td b/include/clang/Basic/DeclNodes.td
index 3298a80..67ca9e5 100644
--- a/include/clang/Basic/DeclNodes.td
+++ b/include/clang/Basic/DeclNodes.td
@@ -1,66 +1,68 @@
 class AttrSubject;
 
-class Decl<bit abstract = 0> : AttrSubject {
+class Decl<string diagSpelling = "", bit abstract = 0> : AttrSubject {
   bit Abstract = abstract;
+  string DiagSpelling = diagSpelling;
 }
 
-class DDecl<Decl base, bit abstract = 0> : Decl<abstract> {
+class DDecl<Decl base, string diagSpelling = "", bit abstract = 0>
+    : Decl<diagSpelling, abstract> {
   Decl Base = base;
 }
 
-class DeclContext { }
+class DeclContext {}
 
 def TranslationUnit : Decl, DeclContext;
 def PragmaComment : Decl;
 def PragmaDetectMismatch : Decl;
 def ExternCContext : Decl, DeclContext;
-def Named : Decl<1>;
-  def Namespace : DDecl<Named>, DeclContext;
+def Named : Decl<"named declarations", 1>;
+  def Namespace : DDecl<Named, "namespaces">, DeclContext;
   def UsingDirective : DDecl<Named>;
   def NamespaceAlias : DDecl<Named>;
-  def Label : DDecl<Named>;
-  def Type : DDecl<Named, 1>;
-    def TypedefName : DDecl<Type, 1>;
+  def Label : DDecl<Named, "labels">;
+  def Type : DDecl<Named, "types", 1>;
+    def TypedefName : DDecl<Type, "typedefs", 1>;
       def Typedef : DDecl<TypedefName>;
       def TypeAlias : DDecl<TypedefName>;
       def ObjCTypeParam : DDecl<TypedefName>;
     def UnresolvedUsingTypename : DDecl<Type>;
-    def Tag : DDecl<Type, 1>, DeclContext;
-      def Enum : DDecl<Tag>;
-      def Record : DDecl<Tag>;
-        def CXXRecord : DDecl<Record>;
+    def Tag : DDecl<Type, "tag types", 1>, DeclContext;
+      def Enum : DDecl<Tag, "enums">;
+      def Record : DDecl<Tag, "structs, unions, classes">;
+        def CXXRecord : DDecl<Record, "classes">;
           def ClassTemplateSpecialization : DDecl<CXXRecord>;
             def ClassTemplatePartialSpecialization
               : DDecl<ClassTemplateSpecialization>;
     def TemplateTypeParm : DDecl<Type>;
-  def Value : DDecl<Named, 1>;
-    def EnumConstant : DDecl<Value>;
+  def Value : DDecl<Named, "value declarations", 1>;
+    def EnumConstant : DDecl<Value, "enumerators">;
     def UnresolvedUsingValue : DDecl<Value>;
     def IndirectField : DDecl<Value>;
     def Binding : DDecl<Value>;
     def OMPDeclareReduction : DDecl<Value>, DeclContext;
-    def Declarator : DDecl<Value, 1>;
-      def Field : DDecl<Declarator>;
+    def Declarator : DDecl<Value, "declarators", 1>;
+      def Field : DDecl<Declarator, "non-static data members">;
         def ObjCIvar : DDecl<Field>;
         def ObjCAtDefsField : DDecl<Field>;
       def MSProperty : DDecl<Declarator>;
-      def Function : DDecl<Declarator>, DeclContext;
+      def Function : DDecl<Declarator, "functions">, DeclContext;
         def CXXDeductionGuide : DDecl<Function>;
         def CXXMethod : DDecl<Function>;
           def CXXConstructor : DDecl<CXXMethod>;
           def CXXDestructor : DDecl<CXXMethod>;
           def CXXConversion : DDecl<CXXMethod>;
-      def Var : DDecl<Declarator>;
+      def Var : DDecl<Declarator, "variables">;
         def VarTemplateSpecialization : DDecl<Var>;
           def VarTemplatePartialSpecialization
             : DDecl<VarTemplateSpecialization>;
         def ImplicitParam : DDecl<Var>;
-        def ParmVar : DDecl<Var>;
+        def ParmVar : DDecl<Var, "parameters">;
         def Decomposition : DDecl<Var>;
         def OMPCapturedExpr : DDecl<Var>;
       def NonTypeTemplateParm : DDecl<Declarator>;
-  def Template : DDecl<Named, 1>;
-    def RedeclarableTemplate : DDecl<Template, 1>;
+  def Template : DDecl<Named, "templates", 1>;
+    def RedeclarableTemplate : DDecl<Template, "redeclarable templates", 1>;
       def FunctionTemplate : DDecl<RedeclarableTemplate>;
       def ClassTemplate : DDecl<RedeclarableTemplate>;
       def VarTemplate : DDecl<RedeclarableTemplate>;
@@ -71,15 +73,16 @@
   def UsingPack : DDecl<Named>;
   def UsingShadow : DDecl<Named>;
     def ConstructorUsingShadow : DDecl<UsingShadow>;
-  def ObjCMethod : DDecl<Named>, DeclContext;
-  def ObjCContainer : DDecl<Named, 1>, DeclContext;
+  def ObjCMethod : DDecl<Named, "Objective-C methods">, DeclContext;
+  def ObjCContainer : DDecl<Named, "Objective-C containers", 1>, DeclContext;
     def ObjCCategory : DDecl<ObjCContainer>;
-    def ObjCProtocol : DDecl<ObjCContainer>;
-    def ObjCInterface : DDecl<ObjCContainer>;
-    def ObjCImpl : DDecl<ObjCContainer, 1>;
+    def ObjCProtocol : DDecl<ObjCContainer, "Objective-C protocols">;
+    def ObjCInterface : DDecl<ObjCContainer, "Objective-C interfaces">;
+    def ObjCImpl
+        : DDecl<ObjCContainer, "Objective-C implementation declarations", 1>;
       def ObjCCategoryImpl : DDecl<ObjCImpl>;
       def ObjCImplementation : DDecl<ObjCImpl>;
-  def ObjCProperty : DDecl<Named>;
+  def ObjCProperty : DDecl<Named, "Objective-C properties">;
   def ObjCCompatibleAlias : DDecl<Named>;
 def LinkageSpec : Decl, DeclContext;
 def Export : Decl, DeclContext;
@@ -89,7 +92,7 @@
 def Friend : Decl;
 def FriendTemplate : Decl;
 def StaticAssert : Decl;
-def Block : Decl, DeclContext;
+def Block : Decl<"blocks">, DeclContext;
 def Captured : Decl, DeclContext;
 def ClassScopeFunctionSpecialization : Decl;
 def Import : Decl;
diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h
index 22cded2..8bf0fd1 100644
--- a/include/clang/Basic/Diagnostic.h
+++ b/include/clang/Basic/Diagnostic.h
@@ -1,4 +1,4 @@
-//===--- Diagnostic.h - C Language Family Diagnostic Handling ---*- C++ -*-===//
+//===- Diagnostic.h - C Language Family Diagnostic Handling -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the Diagnostic-related interfaces.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_DIAGNOSTIC_H
@@ -22,12 +22,13 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include <algorithm>
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Compiler.h" 
 #include <cassert>
 #include <cstdint>
+#include <limits>
 #include <list>
 #include <map>
 #include <memory>
@@ -44,13 +45,14 @@
 class IdentifierInfo;
 class LangOptions;
 class Preprocessor;
+class SourceManager;
 class StoredDiagnostic;
 
 namespace tok {
 
-  enum TokenKind : unsigned short;
+enum TokenKind : unsigned short;
 
-} // end namespace tok
+} // namespace tok
 
 /// \brief Annotates a diagnostic with some code that should be
 /// inserted, removed, or replaced to fix the problem.
@@ -75,11 +77,11 @@
   /// string.
   std::string CodeToInsert;
 
-  bool BeforePreviousInsertions;
+  bool BeforePreviousInsertions = false;
 
   /// \brief Empty code modification hint, indicating that no code
   /// modification is known.
-  FixItHint() : BeforePreviousInsertions(false) { }
+  FixItHint() = default;
 
   bool isNull() const {
     return !RemoveRange.isValid();
@@ -157,43 +159,86 @@
   };
 
   enum ArgumentKind {
-    ak_std_string,      ///< std::string
-    ak_c_string,        ///< const char *
-    ak_sint,            ///< int
-    ak_uint,            ///< unsigned
-    ak_tokenkind,       ///< enum TokenKind : unsigned
-    ak_identifierinfo,  ///< IdentifierInfo
-    ak_qualtype,        ///< QualType
-    ak_declarationname, ///< DeclarationName
-    ak_nameddecl,       ///< NamedDecl *
-    ak_nestednamespec,  ///< NestedNameSpecifier *
-    ak_declcontext,     ///< DeclContext *
-    ak_qualtype_pair,   ///< pair<QualType, QualType>
-    ak_attr             ///< Attr *
+    /// std::string
+    ak_std_string,
+
+    /// const char *
+    ak_c_string,
+
+    /// int
+    ak_sint,
+
+    /// unsigned
+    ak_uint,
+
+    /// enum TokenKind : unsigned
+    ak_tokenkind,
+
+    /// IdentifierInfo
+    ak_identifierinfo,
+
+    /// QualType
+    ak_qualtype,
+
+    /// DeclarationName
+    ak_declarationname,
+
+    /// NamedDecl *
+    ak_nameddecl,
+
+    /// NestedNameSpecifier *
+    ak_nestednamespec,
+
+    /// DeclContext *
+    ak_declcontext,
+
+    /// pair<QualType, QualType>
+    ak_qualtype_pair,
+
+    /// Attr *
+    ak_attr
   };
 
   /// \brief Represents on argument value, which is a union discriminated
   /// by ArgumentKind, with a value.
-  typedef std::pair<ArgumentKind, intptr_t> ArgumentValue;
+  using ArgumentValue = std::pair<ArgumentKind, intptr_t>;
 
 private:
-  unsigned char AllExtensionsSilenced; // Used by __extension__
-  bool SuppressAfterFatalError;  // Suppress diagnostics after a fatal error?
-  bool SuppressAllDiagnostics;   // Suppress all diagnostics.
-  bool ElideType;                // Elide common types of templates.
-  bool PrintTemplateTree;        // Print a tree when comparing templates.
-  bool ShowColors;               // Color printing is enabled.
-  OverloadsShown ShowOverloads;  // Which overload candidates to show.
-  unsigned ErrorLimit;           // Cap of # errors emitted, 0 -> no limit.
-  unsigned TemplateBacktraceLimit; // Cap on depth of template backtrace stack,
-                                   // 0 -> no limit.
-  unsigned ConstexprBacktraceLimit; // Cap on depth of constexpr evaluation
-                                    // backtrace stack, 0 -> no limit.
+  // Used by __extension__
+  unsigned char AllExtensionsSilenced = 0;
+
+  // Suppress diagnostics after a fatal error?
+  bool SuppressAfterFatalError = true;
+
+  // Suppress all diagnostics.
+  bool SuppressAllDiagnostics = false;
+
+  // Elide common types of templates.
+  bool ElideType = true;
+
+  // Print a tree when comparing templates.
+  bool PrintTemplateTree = false;
+
+  // Color printing is enabled.
+  bool ShowColors = false;
+
+  // Which overload candidates to show.
+  OverloadsShown ShowOverloads = Ovl_All;
+
+  // Cap of # errors emitted, 0 -> no limit.
+  unsigned ErrorLimit = 0;
+
+  // Cap on depth of template backtrace stack, 0 -> no limit.
+  unsigned TemplateBacktraceLimit = 0;
+
+  // Cap on depth of constexpr evaluation backtrace stack, 0 -> no limit.
+  unsigned ConstexprBacktraceLimit = 0;
+
   IntrusiveRefCntPtr<DiagnosticIDs> Diags;
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
-  DiagnosticConsumer *Client;
+  DiagnosticConsumer *Client = nullptr;
   std::unique_ptr<DiagnosticConsumer> Owner;
-  SourceManager *SourceMgr;
+  SourceManager *SourceMgr = nullptr;
 
   /// \brief Mapping information for diagnostics.
   ///
@@ -211,25 +256,38 @@
 
   public:
     // "Global" configuration state that can actually vary between modules.
-    unsigned IgnoreAllWarnings : 1;      // Ignore all warnings: -w
-    unsigned EnableAllWarnings : 1;      // Enable all warnings.
-    unsigned WarningsAsErrors : 1;       // Treat warnings like errors.
-    unsigned ErrorsAsFatal : 1;          // Treat errors like fatal errors.
-    unsigned SuppressSystemWarnings : 1; // Suppress warnings in system headers.
-    diag::Severity ExtBehavior;         // Map extensions to warnings or errors?
+
+    // Ignore all warnings: -w
+    unsigned IgnoreAllWarnings : 1;
+
+    // Enable all warnings.
+    unsigned EnableAllWarnings : 1;
+
+    // Treat warnings like errors.
+    unsigned WarningsAsErrors : 1;
+
+    // Treat errors like fatal errors.
+    unsigned ErrorsAsFatal : 1;
+
+    // Suppress warnings in system headers.
+    unsigned SuppressSystemWarnings : 1;
+
+    // Map extensions to warnings or errors?
+    diag::Severity ExtBehavior = diag::Severity::Ignored;
 
     DiagState()
         : IgnoreAllWarnings(false), EnableAllWarnings(false),
           WarningsAsErrors(false), ErrorsAsFatal(false),
-          SuppressSystemWarnings(false), ExtBehavior(diag::Severity::Ignored) {}
+          SuppressSystemWarnings(false) {}
 
-    typedef llvm::DenseMap<unsigned, DiagnosticMapping>::iterator iterator;
-    typedef llvm::DenseMap<unsigned, DiagnosticMapping>::const_iterator
-    const_iterator;
+    using iterator = llvm::DenseMap<unsigned, DiagnosticMapping>::iterator;
+    using const_iterator =
+        llvm::DenseMap<unsigned, DiagnosticMapping>::const_iterator;
 
     void setMapping(diag::kind Diag, DiagnosticMapping Info) {
       DiagMap[Diag] = Info;
     }
+
     DiagnosticMapping lookupMapping(diag::kind Diag) const {
       return DiagMap.lookup(Diag);
     }
@@ -249,12 +307,16 @@
   public:
     /// Add an initial diagnostic state.
     void appendFirst(DiagState *State);
+
     /// Add a new latest state point.
     void append(SourceManager &SrcMgr, SourceLocation Loc, DiagState *State);
+
     /// Look up the diagnostic state at a given source location.
     DiagState *lookup(SourceManager &SrcMgr, SourceLocation Loc) const;
+
     /// Determine whether this map is empty.
     bool empty() const { return Files.empty(); }
+
     /// Clear out this map.
     void clear() {
       Files.clear();
@@ -262,12 +324,20 @@
       CurDiagStateLoc = SourceLocation();
     }
 
+    /// Produce a debugging dump of the diagnostic state.
+    LLVM_DUMP_METHOD void dump(SourceManager &SrcMgr,
+                               StringRef DiagName = StringRef()) const;
+
     /// Grab the most-recently-added state point.
     DiagState *getCurDiagState() const { return CurDiagState; }
+
     /// Get the location at which a diagnostic state was last added.
     SourceLocation getCurDiagStateLoc() const { return CurDiagStateLoc; }
 
   private:
+    friend class ASTReader;
+    friend class ASTWriter;
+
     /// \brief Represents a point in source where the diagnostic state was
     /// modified because of a pragma.
     ///
@@ -276,8 +346,9 @@
     struct DiagStatePoint {
       DiagState *State;
       unsigned Offset;
+
       DiagStatePoint(DiagState *State, unsigned Offset)
-        : State(State), Offset(Offset) { } 
+          : State(State), Offset(Offset) {} 
     };
 
     /// Description of the diagnostic states and state transitions for a
@@ -287,11 +358,14 @@
       /// as looking up the DecomposedIncludedLoc for the FileID in the Files
       /// map would give us this, but we cache it here for performance.
       File *Parent = nullptr;
+
       /// The offset of this file within its parent.
       unsigned ParentOffset = 0;
+
       /// Whether this file has any local (not imported from an AST file)
       /// diagnostic state transitions.
       bool HasLocalTransitions = false;
+
       /// The points within the file where the state changes. There will always
       /// be at least one of these (the state on entry to the file).
       llvm::SmallVector<DiagStatePoint, 4> StateTransitions;
@@ -304,16 +378,15 @@
 
     /// The initial diagnostic state.
     DiagState *FirstDiagState;
+
     /// The current diagnostic state.
     DiagState *CurDiagState;
+
     /// The location at which the current diagnostic state was established.
     SourceLocation CurDiagStateLoc;
 
     /// Get the diagnostic state information for a file.
     File *getFile(SourceManager &SrcMgr, FileID ID) const;
-
-    friend class ASTReader;
-    friend class ASTWriter;
   };
 
   DiagStateMap DiagStatesByLoc;
@@ -359,8 +432,11 @@
   /// diagnostic that they follow.
   DiagnosticIDs::Level LastDiagLevel;
 
-  unsigned NumWarnings;         ///< Number of warnings reported
-  unsigned NumErrors;           ///< Number of errors reported
+  /// Number of warnings reported
+  unsigned NumWarnings;
+
+  /// Number of errors reported
+  unsigned NumErrors;
 
   /// \brief A function pointer that converts an opaque diagnostic
   /// argument to a strings.
@@ -372,14 +448,15 @@
   /// avoid redundancy across arguments.
   ///
   /// This is a hack to avoid a layering violation between libbasic and libsema.
-  typedef void (*ArgToStringFnTy)(
+  using ArgToStringFnTy = void (*)(
       ArgumentKind Kind, intptr_t Val,
       StringRef Modifier, StringRef Argument,
       ArrayRef<ArgumentValue> PrevArgs,
       SmallVectorImpl<char> &Output,
       void *Cookie,
       ArrayRef<intptr_t> QualTypeVals);
-  void *ArgToStringCookie;
+
+  void *ArgToStringCookie = nullptr;
   ArgToStringFnTy ArgToStringFn;
 
   /// \brief ID of the "delayed" diagnostic, which is a (typically
@@ -402,13 +479,18 @@
 
 public:
   explicit DiagnosticsEngine(IntrusiveRefCntPtr<DiagnosticIDs> Diags,
-                             DiagnosticOptions *DiagOpts,
+                             IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts,
                              DiagnosticConsumer *client = nullptr,
                              bool ShouldOwnClient = true);
   DiagnosticsEngine(const DiagnosticsEngine &) = delete;
   DiagnosticsEngine &operator=(const DiagnosticsEngine &) = delete;
   ~DiagnosticsEngine();
 
+  LLVM_DUMP_METHOD void dump() const { DiagStatesByLoc.dump(*SourceMgr); }
+  LLVM_DUMP_METHOD void dump(StringRef DiagName) const {
+    DiagStatesByLoc.dump(*SourceMgr, DiagName);
+  }
+
   const IntrusiveRefCntPtr<DiagnosticIDs> &getDiagnosticIDs() const {
     return Diags;
   }
@@ -416,7 +498,7 @@
   /// \brief Retrieve the diagnostic options.
   DiagnosticOptions &getDiagnosticOptions() const { return *DiagOpts; }
 
-  typedef llvm::iterator_range<DiagState::const_iterator> diag_mapping_range;
+  using diag_mapping_range = llvm::iterator_range<DiagState::const_iterator>;
 
   /// \brief Get the current set of diagnostic mappings.
   diag_mapping_range getDiagnosticMappings() const {
@@ -435,10 +517,12 @@
   std::unique_ptr<DiagnosticConsumer> takeClient() { return std::move(Owner); }
 
   bool hasSourceManager() const { return SourceMgr != nullptr; }
+
   SourceManager &getSourceManager() const {
     assert(SourceMgr && "SourceManager not set!");
     return *SourceMgr;
   }
+
   void setSourceManager(SourceManager *SrcMgr) {
     assert(DiagStatesByLoc.empty() &&
            "Leftover diag state from a different SourceManager.");
@@ -575,13 +659,15 @@
   OverloadsShown getShowOverloads() const { return ShowOverloads; }
   
   /// \brief Pretend that the last diagnostic issued was ignored, so any
-  /// subsequent notes will be suppressed.
+  /// subsequent notes will be suppressed, or restore a prior ignoring
+  /// state after ignoring some diagnostics and their notes, possibly in
+  /// the middle of another diagnostic.
   ///
   /// This can be used by clients who suppress diagnostics themselves.
-  void setLastDiagnosticIgnored() {
+  void setLastDiagnosticIgnored(bool Ignored = true) {
     if (LastDiagLevel == DiagnosticIDs::Fatal)
       FatalErrorOccurred = true;
-    LastDiagLevel = DiagnosticIDs::Ignored;
+    LastDiagLevel = Ignored ? DiagnosticIDs::Ignored : DiagnosticIDs::Warning;
   }
 
   /// \brief Determine whether the previous diagnostic was ignored. This can
@@ -762,7 +848,9 @@
   void Report(const StoredDiagnostic &storedDiag);
 
   /// \brief Determine whethere there is already a diagnostic in flight.
-  bool isDiagnosticInFlight() const { return CurDiagID != ~0U; }
+  bool isDiagnosticInFlight() const {
+    return CurDiagID != std::numeric_limits<unsigned>::max();
+  }
 
   /// \brief Set the "delayed" diagnostic that will be emitted once
   /// the current diagnostic completes.
@@ -789,33 +877,34 @@
                             StringRef Arg2 = "");
   
   /// \brief Clear out the current diagnostic.
-  void Clear() { CurDiagID = ~0U; }
+  void Clear() { CurDiagID = std::numeric_limits<unsigned>::max(); }
 
   /// \brief Return the value associated with this diagnostic flag.
   StringRef getFlagValue() const { return FlagValue; }
 
 private:
-  /// \brief Report the delayed diagnostic.
-  void ReportDelayed();
-
   // This is private state used by DiagnosticBuilder.  We put it here instead of
   // in DiagnosticBuilder in order to keep DiagnosticBuilder a small lightweight
   // object.  This implementation choice means that we can only have one
   // diagnostic "in flight" at a time, but this seems to be a reasonable
   // tradeoff to keep these objects small.  Assertions verify that only one
   // diagnostic is in flight at a time.
-  friend class DiagnosticIDs;
-  friend class DiagnosticBuilder;
   friend class Diagnostic;
-  friend class PartialDiagnostic;
+  friend class DiagnosticBuilder;
   friend class DiagnosticErrorTrap;
+  friend class DiagnosticIDs;
+  friend class PartialDiagnostic;
   
+  /// \brief Report the delayed diagnostic.
+  void ReportDelayed();
+
   /// \brief The location of the current diagnostic that is in flight.
   SourceLocation CurDiagLoc;
 
   /// \brief The ID of the current diagnostic that is in flight.
   ///
-  /// This is set to ~0U when there is no diagnostic in flight.
+  /// This is set to std::numeric_limits<unsigned>::max() when there is no
+  /// diagnostic in flight.
   unsigned CurDiagID;
 
   enum {
@@ -882,6 +971,9 @@
   /// @name Diagnostic Emission
   /// @{
 protected:
+  friend class ASTReader;
+  friend class ASTWriter;
+
   // Sema requires access to the following functions because the current design
   // of SFINAE requires it to use its own SemaDiagnosticBuilder, which needs to
   // access us directly to ensure we minimize the emitted code for the common
@@ -898,9 +990,6 @@
   SourceLocation getCurrentDiagLoc() const { return CurDiagLoc; }
 
   /// @}
-
-  friend class ASTReader;
-  friend class ASTWriter;
 };
 
 /// \brief RAII class that determines when any errors have occurred
@@ -913,7 +1002,7 @@
 
 public:
   explicit DiagnosticErrorTrap(DiagnosticsEngine &Diag)
-    : Diag(Diag) { reset(); }
+      : Diag(Diag) { reset(); }
 
   /// \brief Determine whether any errors have occurred since this
   /// object instance was created.
@@ -951,6 +1040,9 @@
 /// the common fields to registers, eliminating increments of the NumArgs field,
 /// for example.
 class DiagnosticBuilder {
+  friend class DiagnosticsEngine;
+  friend class PartialDiagnostic;
+  
   mutable DiagnosticsEngine *DiagObj = nullptr;
   mutable unsigned NumArgs = 0;
 
@@ -965,8 +1057,6 @@
   /// call to ForceEmit.
   mutable bool IsForceEmit = false;
 
-  friend class DiagnosticsEngine;
-
   DiagnosticBuilder() = default;
 
   explicit DiagnosticBuilder(DiagnosticsEngine *diagObj)
@@ -976,8 +1066,6 @@
     diagObj->DiagFixItHints.clear();
   }
 
-  friend class PartialDiagnostic;
-  
 protected:
   void FlushCounts() {
     DiagObj->NumDiagArgs = NumArgs;
@@ -1038,7 +1126,7 @@
 
   /// \brief Retrieve an empty diagnostic builder.
   static DiagnosticBuilder getEmpty() {
-    return DiagnosticBuilder();
+    return {};
   }
 
   /// \brief Forces the diagnostic to be emitted.
@@ -1087,8 +1175,9 @@
 };
 
 struct AddFlagValue {
-  explicit AddFlagValue(StringRef V) : Val(V) {}
   StringRef Val;
+
+  explicit AddFlagValue(StringRef V) : Val(V) {}
 };
 
 /// \brief Register a value for the flag in the current diagnostic. This
@@ -1197,14 +1286,15 @@
 
 /// A nullability kind paired with a bit indicating whether it used a
 /// context-sensitive keyword.
-typedef std::pair<NullabilityKind, bool> DiagNullabilityKind;
+using DiagNullabilityKind = std::pair<NullabilityKind, bool>;
 
 const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
                                     DiagNullabilityKind nullability);
 
 inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc,
                                                    unsigned DiagID) {
-  assert(CurDiagID == ~0U && "Multiple diagnostics in flight at once!");
+  assert(CurDiagID == std::numeric_limits<unsigned>::max() &&
+         "Multiple diagnostics in flight at once!");
   CurDiagLoc = Loc;
   CurDiagID = DiagID;
   FlagValue.clear();
@@ -1229,7 +1319,7 @@
 public:
   explicit Diagnostic(const DiagnosticsEngine *DO) : DiagObj(DO) {}
   Diagnostic(const DiagnosticsEngine *DO, StringRef storedDiagMessage)
-    : DiagObj(DO), StoredDiagMessage(storedDiagMessage) {}
+      : DiagObj(DO), StoredDiagMessage(storedDiagMessage) {}
 
   const DiagnosticsEngine *getDiags() const { return DiagObj; }
   unsigned getID() const { return DiagObj->CurDiagID; }
@@ -1371,7 +1461,8 @@
 
   void setLocation(FullSourceLoc Loc) { this->Loc = Loc; }
 
-  typedef std::vector<CharSourceRange>::const_iterator range_iterator;
+  using range_iterator = std::vector<CharSourceRange>::const_iterator;
+
   range_iterator range_begin() const { return Ranges.begin(); }
   range_iterator range_end() const { return Ranges.end(); }
   unsigned range_size() const { return Ranges.size(); }
@@ -1380,7 +1471,8 @@
     return llvm::makeArrayRef(Ranges);
   }
 
-  typedef std::vector<FixItHint>::const_iterator fixit_iterator;
+  using fixit_iterator = std::vector<FixItHint>::const_iterator;
+
   fixit_iterator fixit_begin() const { return FixIts.begin(); }
   fixit_iterator fixit_end() const { return FixIts.end(); }
   unsigned fixit_size() const { return FixIts.size(); }
@@ -1399,7 +1491,6 @@
   
 public:
   DiagnosticConsumer() = default;
-
   virtual ~DiagnosticConsumer();
 
   unsigned getNumErrors() const { return NumErrors; }
@@ -1465,7 +1556,6 @@
 
 public:
   ForwardingDiagnosticConsumer(DiagnosticConsumer &Target) : Target(Target) {}
-
   ~ForwardingDiagnosticConsumer() override;
 
   void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
@@ -1483,6 +1573,7 @@
   unsigned PrintFromType : 1;
   unsigned ElideType : 1;
   unsigned ShowColors : 1;
+
   // The printer sets this variable to true if the template diff was used.
   unsigned TemplateDiffUsed : 1;
 };
@@ -1491,13 +1582,12 @@
 /// attribute.  The character itself will be not be printed.
 const char ToggleHighlight = 127;
 
-
 /// ProcessWarningOptions - Initialize the diagnostic client and process the
 /// warning options specified on the command line.
 void ProcessWarningOptions(DiagnosticsEngine &Diags,
                            const DiagnosticOptions &Opts,
                            bool ReportDiags = true);
 
-} // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H
diff --git a/include/clang/Basic/DiagnosticASTKinds.td b/include/clang/Basic/DiagnosticASTKinds.td
index b3cba20..215580b 100644
--- a/include/clang/Basic/DiagnosticASTKinds.td
+++ b/include/clang/Basic/DiagnosticASTKinds.td
@@ -127,6 +127,10 @@
 def note_constexpr_access_past_end : Note<
   "%select{read of|assignment to|increment of|decrement of}0 "
   "dereferenced one-past-the-end pointer is not allowed in a constant expression">;
+def note_constexpr_access_unsized_array : Note<
+  "%select{read of|assignment to|increment of|decrement of}0 "
+  "pointer to element of array without known bound "
+  "is not allowed in a constant expression">;
 def note_constexpr_access_inactive_union_member : Note<
   "%select{read of|assignment to|increment of|decrement of}0 "
   "member %1 of union with %select{active member %3|no active member}2 "
@@ -154,6 +158,11 @@
 def note_constexpr_baa_value_insufficient_alignment : Note<
   "value of the aligned pointer (%0) is not a multiple of the asserted %1 "
   "%plural{1:byte|:bytes}1">;
+def note_constexpr_unsupported_unsized_array : Note<
+  "array-to-pointer decay of array member without known bound is not supported">;
+def note_constexpr_unsized_array_indexed : Note<
+  "indexing of array without known bound is not allowed "
+  "in a constant expression">;
 
 def warn_integer_constant_overflow : Warning<
   "overflow in expression; result is %0 with type %1">,
diff --git a/include/clang/Basic/DiagnosticCommonKinds.td b/include/clang/Basic/DiagnosticCommonKinds.td
index 98fd3c4..9c019cb 100644
--- a/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/include/clang/Basic/DiagnosticCommonKinds.td
@@ -94,6 +94,9 @@
   "could not acquire lock file for module '%0': %1">, InGroup<ModuleBuild>;
 def remark_module_lock_timeout : Remark<
   "timed out waiting to acquire lock file for module '%0'">, InGroup<ModuleBuild>;
+def err_module_shadowed : Error<"import of shadowed module '%0'">, DefaultFatal;
+def err_module_build_shadowed_submodule : Error<
+  "build a shadowed submodule '%0'">, DefaultFatal;
 def err_module_cycle : Error<"cyclic dependency in module '%0': %1">, 
   DefaultFatal;
 def err_module_prebuilt : Error<
@@ -185,6 +188,9 @@
 def err_target_unknown_triple : Error<
   "unknown target triple '%0', please use -triple or -arch">;
 def err_target_unknown_cpu : Error<"unknown target CPU '%0'">;
+def note_valid_options : Note<"valid target CPU values are: %0">;
+def err_target_unsupported_cpu_for_micromips : Error<
+  "micromips is not supported for target CPU '%0'">;
 def err_target_unknown_abi : Error<"unknown target ABI '%0'">;
 def err_target_unsupported_abi : Error<"ABI '%0' is not supported on CPU '%1'">;
 def err_target_unsupported_abi_for_triple : Error<
@@ -198,6 +204,8 @@
   "execute only is not supported for the %0 sub-architecture">;
 def err_opt_not_valid_with_opt : Error<
   "option '%0' cannot be specified with '%1'">;
+def err_opt_not_valid_without_opt : Error<
+  "option '%0' cannot be specified without '%1'">;
 
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
diff --git a/include/clang/Basic/DiagnosticDriverKinds.td b/include/clang/Basic/DiagnosticDriverKinds.td
index 67472a5..182136f 100644
--- a/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/include/clang/Basic/DiagnosticDriverKinds.td
@@ -11,6 +11,8 @@
 
 def err_drv_no_such_file : Error<"no such file or directory: '%0'">;
 def err_drv_unsupported_opt : Error<"unsupported option '%0'">;
+def err_drv_unsupported_opt_with_suggestion
+  : Error<"unsupported option '%0', did you mean '%1'?">;
 def err_drv_unsupported_opt_for_target : Error<
   "unsupported option '%0' for target '%1'">;
 def err_drv_unsupported_option_argument : Error<
@@ -29,9 +31,10 @@
 def err_drv_no_cuda_libdevice : Error<
   "cannot find libdevice for %0. Provide path to different CUDA installation "
   "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
-def err_drv_cuda_version_too_low : Error<
-  "GPU arch %1 requires CUDA version at least %3, but installation at %0 is %2. "
-  "Use --cuda-path to specify a different CUDA install, or pass "
+def err_drv_cuda_version_unsupported : Error<
+  "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
+  "but installation at %3 is %4.  Use --cuda-path to specify a different CUDA "
+  "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
   "--no-cuda-version-check.">;
 def err_drv_cuda_nvptx_host : Error<"unsupported use of NVPTX for host compilation.">;
 def err_drv_invalid_thread_model_for_target : Error<
@@ -111,6 +114,18 @@
   "invalid argument '%0' to -fdebug-prefix-map">;
 def err_drv_malformed_sanitizer_blacklist : Error<
   "malformed sanitizer blacklist: '%0'">;
+def err_drv_duplicate_config : Error<
+  "no more than one option '--config' is allowed">;
+def err_drv_config_file_not_exist : Error<
+  "configuration file '%0' does not exist">;
+def err_drv_config_file_not_found : Error<
+  "configuration file '%0' cannot be found">;
+def note_drv_config_file_searched_in : Note<
+  "was searched for in the directory: %0">;
+def err_drv_cannot_read_config_file : Error<
+  "cannot read configuration file '%0'">;
+def err_drv_nested_config_file: Error<
+  "option '--config' is not allowed inside configuration file">;
 
 def err_target_unsupported_arch
   : Error<"the target architecture '%0' is not supported by the target '%1'">;
@@ -122,9 +137,14 @@
 def err_drv_I_dash_not_supported : Error<
   "'%0' not supported, please use -iquote instead">;
 def err_drv_unknown_argument : Error<"unknown argument: '%0'">;
+def err_drv_unknown_argument_with_suggestion
+  : Error<"unknown argument '%0', did you mean '%1'?">;
 def warn_drv_unknown_argument_clang_cl : Warning<
   "unknown argument ignored in clang-cl: '%0'">,
   InGroup<UnknownArgument>;
+def warn_drv_unknown_argument_clang_cl_with_suggestion : Warning<
+  "unknown argument ignored in clang-cl '%0' (did you mean '%1'?)">,
+  InGroup<UnknownArgument>;
 
 def warn_drv_ycyu_no_arg_clang_cl : Warning<
   "support for '%0' without a filename not implemented yet; flag ignored">,
@@ -313,6 +333,10 @@
   "ignoring '-mabicalls' option as it cannot be used with "
   "non position-independent code and the N64 ABI">,
   InGroup<OptionIgnored>;
+def err_drv_unsupported_indirect_jump_opt : Error<
+  "'-mindirect-jump=%0' is unsupported with the '%1' architecture">;
+def err_drv_unknown_indirect_jump_opt : Error<
+  "unknown '-mindirect-jump=' option '%0'">;
 
 def warn_drv_unable_to_find_directory_expected : Warning<
   "unable to find %0 directory, expected to be in '%1'">,
@@ -337,4 +361,16 @@
 def warn_drv_fine_grained_bitfield_accesses_ignored : Warning<
   "option '-ffine-grained-bitfield-accesses' cannot be enabled together with a sanitizer; flag ignored">,
   InGroup<OptionIgnored>;
+
+def note_drv_verify_prefix_spelling : Note<
+  "-verify prefixes must start with a letter and contain only alphanumeric"
+  " characters, hyphens, and underscores">;
+
+def warn_drv_experimental_isel_incomplete : Warning<
+  "-fexperimental-isel support for the '%0' architecture is incomplete">,
+  InGroup<ExperimentalISel>;
+
+def warn_drv_experimental_isel_incomplete_opt : Warning<
+  "-fexperimental-isel support is incomplete for this architecture at the current optimization level">,
+  InGroup<ExperimentalISel>;
 }
diff --git a/include/clang/Basic/DiagnosticFrontendKinds.td b/include/clang/Basic/DiagnosticFrontendKinds.td
index 392a340..b25181f 100644
--- a/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -198,6 +198,11 @@
 def err_no_submodule : Error<"no submodule named %0 in module '%1'">;
 def err_no_submodule_suggest : Error<
   "no submodule named %0 in module '%1'; did you mean '%2'?">;
+def warn_no_priv_submodule_use_toplevel : Warning<
+  "no submodule named %0 in module '%1'; using top level '%2'">,
+  InGroup<PrivateModule>;
+def note_private_top_level_defined : Note<
+  "module defined here">;
 def warn_missing_submodule : Warning<"missing submodule '%0'">,
   InGroup<IncompleteUmbrella>;
 def note_module_import_here : Note<"module imported here">;
diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td
index 1896280..2d471f1 100644
--- a/include/clang/Basic/DiagnosticGroups.td
+++ b/include/clang/Basic/DiagnosticGroups.td
@@ -374,6 +374,7 @@
 def ObjCPointerIntrospectPerformSelector : DiagGroup<"deprecated-objc-pointer-introspection-performSelector">;
 def ObjCPointerIntrospect : DiagGroup<"deprecated-objc-pointer-introspection", [ObjCPointerIntrospectPerformSelector]>;
 def ObjCMultipleMethodNames : DiagGroup<"objc-multiple-method-names">;
+def ObjCFlexibleArray : DiagGroup<"objc-flexible-array">;
 def OpenCLUnsupportedRGBA: DiagGroup<"opencl-unsupported-rgba">;
 def DeprecatedObjCIsaUsage : DiagGroup<"deprecated-objc-isa-usage">;
 def ExplicitInitializeCall : DiagGroup<"explicit-initialize-call">;
@@ -434,13 +435,16 @@
 def StringPlusInt : DiagGroup<"string-plus-int">;
 def StringPlusChar : DiagGroup<"string-plus-char">;
 def StrncatSize : DiagGroup<"strncat-size">;
+def TautologicalTypeLimitCompare : DiagGroup<"tautological-type-limit-compare">;
 def TautologicalUnsignedZeroCompare : DiagGroup<"tautological-unsigned-zero-compare">;
 def TautologicalUnsignedEnumZeroCompare : DiagGroup<"tautological-unsigned-enum-zero-compare">;
+def TautologicalInRangeCompare : DiagGroup<"tautological-constant-in-range-compare",
+                                           [TautologicalTypeLimitCompare,
+                                            TautologicalUnsignedZeroCompare,
+                                            TautologicalUnsignedEnumZeroCompare]>;
 def TautologicalOutOfRangeCompare : DiagGroup<"tautological-constant-out-of-range-compare">;
 def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare",
-                                            [TautologicalUnsignedZeroCompare,
-                                             TautologicalUnsignedEnumZeroCompare,
-                                             TautologicalOutOfRangeCompare]>;
+                                            [TautologicalOutOfRangeCompare]>;
 def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">;
 def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">;
 def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">;
@@ -750,6 +754,7 @@
     VolatileRegisterVar,
     ObjCMissingSuperCalls,
     ObjCDesignatedInit,
+    ObjCFlexibleArray,
     OverloadedVirtual,
     PrivateExtern,
     SelTypeCast,
@@ -888,6 +893,7 @@
 def MicrosoftAnonTag : DiagGroup<"microsoft-anon-tag">;
 def MicrosoftCommentPaste : DiagGroup<"microsoft-comment-paste">;
 def MicrosoftEndOfFile : DiagGroup<"microsoft-end-of-file">;
+def MicrosoftInaccessibleBase : DiagGroup<"microsoft-inaccessible-base">;
 // Aliases.
 def : DiagGroup<"msvc-include", [MicrosoftInclude]>;
                 // -Wmsvc-include = -Wmicrosoft-include
@@ -979,3 +985,6 @@
 // A warning group for warnings about code that clang accepts when
 // compiling OpenCL C/C++ but which is not compatible with the SPIR spec.
 def SpirCompat : DiagGroup<"spir-compat">;
+
+// Warning for the experimental-isel options.
+def ExperimentalISel : DiagGroup<"experimental-isel">;
diff --git a/include/clang/Basic/DiagnosticIDs.h b/include/clang/Basic/DiagnosticIDs.h
index ca04cca..b4ea85b 100644
--- a/include/clang/Basic/DiagnosticIDs.h
+++ b/include/clang/Basic/DiagnosticIDs.h
@@ -34,7 +34,7 @@
       DIAG_SIZE_SERIALIZATION =  120,
       DIAG_SIZE_LEX           =  400,
       DIAG_SIZE_PARSE         =  500,
-      DIAG_SIZE_AST           =  110,
+      DIAG_SIZE_AST           =  150,
       DIAG_SIZE_COMMENT       =  100,
       DIAG_SIZE_CROSSTU       =  100,
       DIAG_SIZE_SEMA          = 3500,
@@ -296,8 +296,8 @@
                              SmallVectorImpl<diag::kind> &Diags) const;
 
   /// \brief Get the set of all diagnostic IDs.
-  void getAllDiagnostics(diag::Flavor Flavor,
-                         SmallVectorImpl<diag::kind> &Diags) const;
+  static void getAllDiagnostics(diag::Flavor Flavor,
+                                std::vector<diag::kind> &Diags);
 
   /// \brief Get the diagnostic option with the closest edit distance to the
   /// given group name.
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 8bc6d77..bf13b5b 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -31,6 +31,14 @@
   "'<::' is treated as digraph '<:' (aka '[') followed by ':' in C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
 
+def warn_cxx17_compat_spaceship : Warning<
+  "'<=>' operator is incompatible with C++ standards before C++2a">,
+  InGroup<CXXPre2aCompat>, DefaultIgnore;
+def warn_cxx2a_compat_spaceship : Warning<
+  "'<=>' is a single token in C++2a; "
+  "add a space to avoid a change in behavior">,
+  InGroup<CXX2aCompat>;
+
 // Trigraphs.
 def trigraph_ignored : Warning<"trigraph ignored">, InGroup<Trigraphs>;
 def trigraph_ignored_block_comment : Warning<
@@ -111,6 +119,9 @@
 def ext_unicode_whitespace : ExtWarn<
   "treating Unicode character as whitespace">,
   InGroup<DiagGroup<"unicode-whitespace">>;
+def warn_utf8_symbol_homoglyph : Warning<
+  "treating Unicode character <U+%0> as identifier character rather than "
+  "as '%1' symbol">, InGroup<DiagGroup<"unicode-homoglyph">>;
 
 def err_hex_escape_no_digits : Error<
   "\\%0 used with no following hex digits">;
@@ -494,17 +505,12 @@
 def err_pragma_message : Error<"%0">;
 def warn_pragma_ignored : Warning<"unknown pragma ignored">,
    InGroup<UnknownPragmas>, DefaultIgnore;
-def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">,
-   InGroup<UnknownPragmas>;
 def ext_on_off_switch_syntax :
    ExtWarn<"expected 'ON' or 'OFF' or 'DEFAULT' in pragma">,
    InGroup<UnknownPragmas>;
 def ext_pragma_syntax_eod :
    ExtWarn<"expected end of directive in pragma">,
    InGroup<UnknownPragmas>;
-def warn_stdc_fenv_access_not_supported :
-   Warning<"pragma STDC FENV_ACCESS ON is not supported, ignoring pragma">,
-   InGroup<UnknownPragmas>;
 def warn_pragma_diagnostic_invalid :
    ExtWarn<"pragma diagnostic expected 'error', 'warning', 'ignored', 'fatal',"
             " 'push', or 'pop'">,
@@ -680,11 +686,15 @@
 def err_mmap_expected_attribute : Error<"expected an attribute name">;
 def warn_mmap_unknown_attribute : Warning<"unknown attribute '%0'">,
   InGroup<IgnoredAttributes>;
-def warn_mmap_mismatched_top_level_private : Warning<
-  "top-level module '%0' in private module map, expected a submodule of '%1'">,
+def warn_mmap_mismatched_private_submodule : Warning<
+  "private submodule '%0' in private module map, expected top-level module">,
   InGroup<PrivateModule>;
-def note_mmap_rename_top_level_private_as_submodule : Note<
-  "make '%0' a submodule of '%1' to ensure it can be found by name">;
+def warn_mmap_mismatched_private_module_name : Warning<
+  "expected canonical name for private module '%0'">,
+  InGroup<PrivateModule>;
+def note_mmap_rename_top_level_private_module : Note<
+  "rename '%0' to ensure it can be found by name">;
+
 def err_mmap_duplicate_header_attribute : Error<
   "header attribute '%0' specified multiple times">;
 def err_mmap_invalid_header_attribute_value : Error<
diff --git a/include/clang/Basic/DiagnosticOptions.h b/include/clang/Basic/DiagnosticOptions.h
index c195003..404d0c5 100644
--- a/include/clang/Basic/DiagnosticOptions.h
+++ b/include/clang/Basic/DiagnosticOptions.h
@@ -1,4 +1,4 @@
-//===--- DiagnosticOptions.h ------------------------------------*- C++ -*-===//
+//===- DiagnosticOptions.h --------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,8 +21,11 @@
 /// \brief Specifies which overload candidates to display when overload
 /// resolution fails.
 enum OverloadsShown : unsigned {
-  Ovl_All,  ///< Show all overloads.
-  Ovl_Best  ///< Show just the "best" overload candidates.
+  /// Show all overloads.
+  Ovl_All,
+
+  /// Show just the "best" overload candidates.
+  Ovl_Best
 };
 
 /// \brief A bitmask representing the diagnostic levels used by
@@ -100,6 +103,10 @@
   /// prefixes removed.
   std::vector<std::string> Remarks;
 
+  /// The prefixes for comment directives sought by -verify ("expected" by
+  /// default).
+  std::vector<std::string> VerifyPrefixes;
+
 public:
   // Define accessors/mutators for diagnostic options of enumeration type.
 #define DIAGOPT(Name, Bits, Default)
@@ -115,8 +122,8 @@
   }
 };
 
-typedef DiagnosticOptions::TextDiagnosticFormat TextDiagnosticFormat;
+using TextDiagnosticFormat = DiagnosticOptions::TextDiagnosticFormat;
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_BASIC_DIAGNOSTICOPTIONS_H
diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td
index a8d6955..e60f3ae 100644
--- a/include/clang/Basic/DiagnosticParseKinds.td
+++ b/include/clang/Basic/DiagnosticParseKinds.td
@@ -587,6 +587,7 @@
 def err_using_attribute_ns_conflict : Error<
   "attribute with scope specifier cannot follow default scope specifier">;
 def err_attributes_not_allowed : Error<"an attribute list cannot appear here">;
+def err_attributes_misplaced : Error<"misplaced attributes; expected attributes here">;
 def err_l_square_l_square_not_attribute : Error<
   "C++11 only allows consecutive left square brackets when "
   "introducing an attribute">;
@@ -972,6 +973,12 @@
 def err_pragma_fp_contract_scope : Error<
   "'#pragma fp_contract' can only appear at file scope or at the start of a "
   "compound statement">; 
+// - #pragma stdc unknown
+def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">,
+   InGroup<UnknownPragmas>;
+def warn_stdc_fenv_access_not_supported :
+   Warning<"pragma STDC FENV_ACCESS ON is not supported, ignoring pragma">,
+   InGroup<UnknownPragmas>;
 // - #pragma comment
 def err_pragma_comment_malformed : Error<
   "pragma comment requires parenthesized identifier and optional string">;
@@ -1065,7 +1072,7 @@
 def err_omp_unknown_directive : Error<
   "expected an OpenMP directive">;
 def err_omp_unexpected_directive : Error<
-  "unexpected OpenMP directive '#pragma omp %0'">;
+  "unexpected OpenMP directive %select{|'#pragma omp %1'}0">;
 def err_omp_expected_punc : Error<
   "expected ',' or ')' in '%0' %select{clause|directive}1">;
 def err_omp_unexpected_clause : Error<
@@ -1155,6 +1162,9 @@
 
 def err_objc_type_args_after_protocols : Error<
   "protocol qualifiers must precede type arguments">;
+
+def note_meant_to_use_typename : Note<
+  "did you mean to use 'typename'?">;
 }
 
 let CategoryName = "Coroutines Issue" in {
diff --git a/include/clang/Basic/DiagnosticRefactoringKinds.td b/include/clang/Basic/DiagnosticRefactoringKinds.td
index ba9c830..ee396b9 100644
--- a/include/clang/Basic/DiagnosticRefactoringKinds.td
+++ b/include/clang/Basic/DiagnosticRefactoringKinds.td
@@ -19,6 +19,15 @@
   "without a selection">;
 def err_refactor_selection_no_symbol : Error<"there is no symbol at the given "
   "location">;
+def err_refactor_selection_invalid_ast : Error<"the provided selection does "
+  "not overlap with the AST nodes of interest">;
+
+def err_refactor_code_outside_of_function : Error<"the selected code is not a "
+  "part of a function's / method's body">;
+def err_refactor_extract_simple_expression : Error<"the selected expression "
+  "is too simple to extract">;
+def err_refactor_extract_prohibited_expression : Error<"the selected "
+  "expression can't be extracted">;
 
 }
 
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index ce79ac2..fd0b408 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -141,6 +141,10 @@
   "variable length array declaration cannot have 'extern' linkage">;
 def ext_vla_folded_to_constant : Extension<
   "variable length array folded to constant array as an extension">, InGroup<GNUFoldingConstant>;
+def err_vla_unsupported : Error<
+  "variable length arrays are not supported for the current target">;
+def note_vla_unsupported : Note<
+  "variable length arrays are not supported for the current target">;
 
 // C99 variably modified types
 def err_variably_modified_template_arg : Error<
@@ -410,6 +414,9 @@
   "C++ standards before C++17">, DefaultIgnore, InGroup<CXXPre17Compat>;
 def ext_decomp_decl : ExtWarn<
   "decomposition declarations are a C++17 extension">, InGroup<CXX17>;
+def ext_decomp_decl_cond : ExtWarn<
+  "ISO C++17 does not permit structured binding declaration in a condition">,
+  InGroup<DiagGroup<"binding-in-condition">>;
 def err_decomp_decl_spec : Error<
   "decomposition declaration cannot be declared "
   "%plural{1:'%1'|:with '%1' specifiers}0">;
@@ -752,6 +759,10 @@
   Warning<"ms_struct may not produce Microsoft-compatible layouts for classes "
           "with base classes or virtual functions">,
   DefaultError, InGroup<IncompatibleMSStruct>;
+def warn_npot_ms_struct :
+  Warning<"ms_struct may not produce Microsoft-compatible layouts with fundamental "
+          "data types with sizes that aren't a power of two">,
+  DefaultError, InGroup<IncompatibleMSStruct>;
 def err_section_conflict : Error<"%0 causes a section type conflict with %1">;
 def err_no_base_classes : Error<"invalid use of '__super', %0 has no base classes">;
 def err_invalid_super_scope : Error<"invalid use of '__super', "
@@ -1582,12 +1593,20 @@
 def ext_pure_function_definition : ExtWarn<
   "function definition with pure-specifier is a Microsoft extension">,
   InGroup<MicrosoftPureDefinition>;
-def err_implicit_object_parameter_init : Error<
-  "cannot initialize object parameter of type %0 with an expression "
-  "of type %1">;
 def err_qualified_member_of_unrelated : Error<
   "%q0 is not a member of class %1">;
 
+def err_member_function_call_bad_cvr : Error<
+  "'this' argument to member function %0 has type %1, but function is not marked "
+  "%select{const|restrict|const or restrict|volatile|const or volatile|"
+  "volatile or restrict|const, volatile, or restrict}2">;
+def err_member_function_call_bad_ref : Error<
+  "'this' argument to member function %0 is an %select{lvalue|rvalue}1, "
+  "but function has %select{non-const lvalue|rvalue}2 ref-qualifier">;
+def err_member_function_call_bad_type : Error<
+  "cannot initialize object parameter of type %0 with an expression "
+  "of type %1">;
+
 def warn_call_to_pure_virtual_member_function_from_ctor_dtor : Warning<
   "call to pure virtual member function %0 has undefined behavior; "
   "overrides of %0 in subclasses are not available in the "
@@ -1808,10 +1827,6 @@
 def err_init_list_bad_dest_type : Error<
   "%select{|non-aggregate }0type %1 cannot be initialized with an initializer "
   "list">;
-def err_member_function_call_bad_cvr : Error<"member function %0 not viable: "
-    "'this' argument has type %1, but function is not marked "
-    "%select{const|restrict|const or restrict|volatile|const or volatile|"
-    "volatile or restrict|const, volatile, or restrict}2">;
 
 def err_reference_bind_to_bitfield : Error<
   "%select{non-const|volatile}0 reference cannot bind to "
@@ -1981,8 +1996,9 @@
   "declaration of variable %0 with deduced type %1 requires an initializer">;
 def err_auto_new_requires_ctor_arg : Error<
   "new expression for type %0 requires a constructor argument">;
-def err_auto_new_list_init : Error<
-  "new expression for type %0 cannot use list-initialization">;
+def ext_auto_new_list_init : Extension<
+  "ISO C++ standards before C++17 do not allow new expression for "
+  "type %0 to use list-initialization">, InGroup<CXX17>;
 def err_auto_var_init_no_expression : Error<
   "initializer for variable %0 with type %1 is empty">;
 def err_auto_var_init_multiple_expressions : Error<
@@ -2433,8 +2449,8 @@
 def err_attribute_requires_opencl_version : Error<
   "%0 attribute requires OpenCL version %1%select{| or above}2">;
 def warn_unsupported_target_attribute
-    : Warning<"ignoring %select{unsupported|duplicate}0"
-              "%select{| architecture}1 '%2' in the target attribute string">,
+    : Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in"
+              " the 'target' attribute string; 'target' attribute ignored">,
       InGroup<IgnoredAttributes>;
 def err_attribute_unsupported
     : Error<"%0 attribute is not supported for this target">;
@@ -2778,7 +2794,7 @@
 def err_attribute_weakref_without_alias : Error<
   "weakref declaration of %0 must also have an alias attribute">;
 def err_alias_not_supported_on_darwin : Error <
-  "only weak aliases are supported on darwin">;
+  "aliases are not supported on darwin">;
 def err_alias_to_undefined : Error<
   "%select{alias|ifunc}0 must point to a defined %select{variable or |}1function">;
 def warn_alias_to_weak_alias : Warning<
@@ -2795,55 +2811,26 @@
   "ifunc resolver function must return a pointer">;
 def err_ifunc_resolver_params : Error<
   "ifunc resolver function must have no parameters">;
+def warn_attribute_wrong_decl_type_str : Warning<
+  "%0 attribute only applies to %1">, InGroup<IgnoredAttributes>;
+def err_attribute_wrong_decl_type_str : Error<
+  warn_attribute_wrong_decl_type_str.Text>;
 def warn_attribute_wrong_decl_type : Warning<
   "%0 attribute only applies to %select{"
   "functions"
   "|unions"
   "|variables and functions"
-  "|functions and global variables"
-  "|functions, variables, and Objective-C interfaces"
   "|functions and methods"
-  "|parameters"
   "|functions, methods and blocks"
-  "|functions, methods, and classes"
   "|functions, methods, and parameters"
-  "|functions, methods, and global variables"
-  "|classes"
-  "|enums"
   "|variables"
-  "|methods"
-  "|fields and global variables"
-  "|structs"
-  "|parameters and typedefs"
-  "|variables and typedefs"
-  "|thread-local variables"
   "|variables and fields"
   "|variables, data members and tag types"
   "|types and namespaces"
-  "|Objective-C interfaces"
-  "|methods and properties"
-  "|functions, methods, and properties"
-  "|struct or union"
-  "|struct, union or class"
-  "|types"
-  "|Objective-C instance methods"
-  "|init methods of interface or class extension declarations"
   "|variables, functions and classes"
-  "|functions, variables, classes, and Objective-C interfaces"
-  "|Objective-C protocols"
-  "|variables with static or thread storage duration"
-  "|functions, methods, properties, and global variables"
-  "|structs, unions, and typedefs"
-  "|structs and typedefs"
-  "|interface or protocol declarations"
   "|kernel functions"
   "|non-K&R-style functions"
-  "|variables, enums, fields and typedefs"
-  "|functions, methods, enums, and classes"
-  "|structs, classes, variables, functions, and inline namespaces"
-  "|variables, functions, methods, types, enumerations, enumerators, labels, and non-static data members"
-  "|classes and enumerations"
-  "|named declarations}1">,
+  "|variables, functions, methods, types, enumerations, enumerators, labels, and non-static data members}1">,
   InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Text>;
 def warn_type_attribute_wrong_type : Warning<
@@ -2898,6 +2885,9 @@
 def err_invalid_attribute_on_virtual_function : Error<
   "%0 attribute cannot be applied to virtual functions">;
 
+def ext_cannot_use_trivial_abi : ExtWarn<
+  "'trivial_abi' cannot be applied to %0">, InGroup<IgnoredAttributes>;
+
 // Availability attribute
 def warn_availability_unknown_platform : Warning<
   "unknown platform %0 in availability macro">, InGroup<Availability>;
@@ -3932,6 +3922,9 @@
   "%select{|template parameter }0redeclaration">;
 def note_template_param_different_kind : Note<
   "template parameter has a different kind in template argument">;
+
+def err_invalid_decl_specifier_in_nontype_parm : Error<
+  "invalid declaration specifier in template non-type parameter">;
   
 def err_template_nontype_parm_different_type : Error<
   "template non-type parameter has a different type %0 in template "
@@ -5130,12 +5123,17 @@
   "jump bypasses initialization of __strong variable">;
 def note_protected_by_objc_weak_init : Note<
   "jump bypasses initialization of __weak variable">;
+def note_protected_by_non_trivial_c_struct_init : Note<
+  "jump bypasses initialization of variable of non-trivial C struct type">;
 def note_enters_block_captures_cxx_obj : Note<
   "jump enters lifetime of block which captures a destructible C++ object">;
 def note_enters_block_captures_strong : Note<
   "jump enters lifetime of block which strongly captures a variable">;
 def note_enters_block_captures_weak : Note<
   "jump enters lifetime of block which weakly captures a variable">;
+def note_enters_block_captures_non_trivial_c_struct : Note<
+  "jump enters lifetime of block which captures a C struct that is non-trivial "
+  "to destroy">;
 
 def note_exits_cleanup : Note<
   "jump exits scope of variable with __attribute__((cleanup))">;
@@ -5176,6 +5174,9 @@
   "jump exits lifetime of block which strongly captures a variable">;
 def note_exits_block_captures_weak : Note<
   "jump exits lifetime of block which weakly captures a variable">;
+def note_exits_block_captures_non_trivial_c_struct : Note<
+  "jump exits lifetime of block which captures a C struct that is non-trivial "
+  "to destroy">;
 
 def err_func_returning_qualified_void : ExtWarn<
   "function cannot return qualified void type %0">,
@@ -5222,6 +5223,28 @@
 def ext_flexible_array_union_gnu : Extension<
   "flexible array member %0 in a union is a GNU extension">, InGroup<GNUFlexibleArrayUnionMember>;
 
+def err_flexible_array_not_at_end : Error<
+  "flexible array member %0 with type %1 is not at the end of"
+  " %select{struct|interface|union|class|enum}2">;
+def err_objc_variable_sized_type_not_at_end : Error<
+  "field %0 with variable sized type %1 is not at the end of class">;
+def note_next_field_declaration : Note<
+  "next field declaration is here">;
+def note_next_ivar_declaration : Note<
+  "next %select{instance variable declaration|synthesized instance variable}0"
+  " is here">;
+def err_synthesize_variable_sized_ivar : Error<
+  "synthesized property with variable size type %0"
+  " requires an existing instance variable">;
+def err_flexible_array_arc_retainable : Error<
+  "ARC forbids flexible array members with retainable object type">;
+def warn_variable_sized_ivar_visibility : Warning<
+  "field %0 with variable sized type %1 is not visible to subclasses and"
+  " can conflict with their instance variables">, InGroup<ObjCFlexibleArray>;
+def warn_superclass_variable_sized_type_not_at_end : Warning<
+  "field %0 can overwrite instance variable %1 with variable sized type %2"
+  " in superclass %3">, InGroup<ObjCFlexibleArray>;
+
 let CategoryName = "ARC Semantic Issue" in {
 
 // ARC-mode diagnostics.
@@ -5520,9 +5543,6 @@
 def err_offsetof_record_type : Error<
   "offsetof requires struct, union, or class type, %0 invalid">;
 def err_offsetof_array_type : Error<"offsetof requires array type, %0 invalid">;
-def ext_offsetof_extended_field_designator : Extension<
-  "using extended field designator is an extension">,
-  InGroup<DiagGroup<"extended-offsetof">>;
 def ext_offsetof_non_pod_type : ExtWarn<"offset of on non-POD type %0">,
   InGroup<InvalidOffsetof>;
 def ext_offsetof_non_standardlayout_type : ExtWarn<
@@ -5939,25 +5959,27 @@
   "%select{|nested }1data member %2 declared const here}0">;
 
 def warn_unsigned_always_true_comparison : Warning<
-  "comparison of %select{%3|unsigned expression}0 %2 "
-  "%select{unsigned expression|%3}0 is always %select{false|true}4">,
-  InGroup<TautologicalUnsignedZeroCompare>;
+  "result of comparison of %select{%3|unsigned expression}0 %2 "
+  "%select{unsigned expression|%3}0 is always %4">,
+  InGroup<TautologicalUnsignedZeroCompare>, DefaultIgnore;
 def warn_unsigned_enum_always_true_comparison : Warning<
-  "comparison of %select{%3|unsigned enum expression}0 %2 "
-  "%select{unsigned enum expression|%3}0 is always %select{false|true}4">,
-  InGroup<TautologicalUnsignedEnumZeroCompare>;
+  "result of comparison of %select{%3|unsigned enum expression}0 %2 "
+  "%select{unsigned enum expression|%3}0 is always %4">,
+  InGroup<TautologicalUnsignedEnumZeroCompare>, DefaultIgnore;
 def warn_tautological_constant_compare : Warning<
-  "comparison %select{%3|%1}0 %2 "
-  "%select{%1|%3}0 is always %select{false|true}4">,
-  InGroup<TautologicalConstantCompare>;
+  "result of comparison %select{%3|%1}0 %2 "
+  "%select{%1|%3}0 is always %4">,
+  InGroup<TautologicalTypeLimitCompare>, DefaultIgnore;
 
 def warn_mixed_sign_comparison : Warning<
   "comparison of integers of different signs: %0 and %1">,
   InGroup<SignCompare>, DefaultIgnore;
 def warn_out_of_range_compare : Warning<
-  "comparison of %select{constant %0|true|false}1 with " 
-  "%select{expression of type %2|boolean expression}3 is always "
-  "%select{false|true}4">, InGroup<TautologicalOutOfRangeCompare>;
+  "result of comparison of %select{constant %0|true|false}1 with " 
+  "%select{expression of type %2|boolean expression}3 is always %4">,
+  InGroup<TautologicalOutOfRangeCompare>;
+def warn_tautological_bool_compare : Warning<warn_out_of_range_compare.Text>,
+  InGroup<TautologicalConstantCompare>;
 def warn_comparison_of_mixed_enum_types : Warning<
   "comparison of two values with different enumeration types"
   "%diff{ ($ and $)|}0,1">,
@@ -7173,6 +7195,10 @@
   "cannot pass object with interface type %1 by value to variadic "
   "%select{function|block|method|constructor}2; expected type from format "
   "string was %3">;
+def err_cannot_pass_non_trivial_c_struct_to_vararg : Error<
+  "cannot pass non-trivial C object of type %0 by value to variadic "
+  "%select{function|block|method|constructor}1">;
+
 
 def err_cannot_pass_objc_interface_to_vararg : Error<
   "cannot pass object with interface type %0 by value through variadic "
@@ -7289,7 +7315,7 @@
   "use of the %0 attribute is a C++17 extension">, InGroup<CXX17>;
 
 def warn_unused_comparison : Warning<
-  "%select{%select{|in}1equality|relational}0 comparison result unused">,
+  "%select{equality|inequality|relational|three-way}0 comparison result unused">,
   InGroup<UnusedComparison>;
 def note_inequality_comparison_to_or_assign : Note<
   "use '|=' to turn this inequality comparison into an or-assignment">;
@@ -7543,6 +7569,9 @@
 def err_ambiguous_memptr_conv : Error<
   "ambiguous conversion from pointer to member of %select{base|derived}0 "
   "class %1 to pointer to member of %select{derived|base}0 class %2:%3">;
+def ext_ms_ambiguous_direct_base : ExtWarn<
+  "accessing inaccessible direct base %0 of %1 is a Microsoft extension">,
+  InGroup<MicrosoftInaccessibleBase>;
 
 def err_memptr_conv_via_virtual : Error<
   "conversion from pointer to member of class %0 to pointer to member "
@@ -7896,7 +7925,7 @@
 // should result in a warning, since these always evaluate to a constant.
 // Array comparisons have similar warnings
 def warn_comparison_always : Warning<
-  "%select{self-|array }0comparison always evaluates to %select{false|true|a constant}1">,
+  "%select{self-|array }0comparison always evaluates to %select{a constant|%2}1">,
   InGroup<TautologicalCompare>;
 def warn_comparison_bitwise_always : Warning<
   "bitwise comparison always evaluates to %select{false|true}0">,
@@ -7922,6 +7951,8 @@
   "'type_tag_for_datatype' attribute requires the initializer to be "
   "an %select{integer|integral}0 constant expression "
   "that can be represented by a 64 bit integer">;
+def err_tag_index_out_of_range : Error<
+  "%select{type tag|argument}0 index %1 is greater than the number of arguments specified">;
 def warn_type_tag_for_datatype_wrong_kind : Warning<
   "this type tag was not designed to be used with this function">,
   InGroup<TypeSafety>;
@@ -8288,12 +8319,16 @@
   "feature, not permitted in C++">;
 def err_type_unsupported : Error<
   "%0 is not supported on this target">;
-def warn_nsconsumed_attribute_mismatch : Warning<
+def err_nsconsumed_attribute_mismatch : Error<
   "overriding method has mismatched ns_consumed attribute on its"
-  " parameter">, InGroup<NSConsumedMismatch>;
-def warn_nsreturns_retained_attribute_mismatch : Warning<
+  " parameter">;
+def err_nsreturns_retained_attribute_mismatch : Error<
   "overriding method has mismatched ns_returns_%select{not_retained|retained}0"
-  " attributes">, InGroup<NSReturnsMismatch>;
+  " attributes">;
+def warn_nsconsumed_attribute_mismatch : Warning<
+  err_nsconsumed_attribute_mismatch.Text>, InGroup<NSConsumedMismatch>;
+def warn_nsreturns_retained_attribute_mismatch : Warning<
+  err_nsreturns_retained_attribute_mismatch.Text>, InGroup<NSReturnsMismatch>;
   
 def note_getter_unavailable : Note<
   "or because setter is declared here, but no getter method %0 is found">;
@@ -8618,7 +8653,7 @@
 def err_array_section_does_not_specify_contiguous_storage : Error<
   "array section does not specify contiguous storage">;
 def err_omp_union_type_not_allowed : Error<
-  "mapped storage cannot be derived from a union">;
+  "mapping of union members is not allowed">;
 def err_omp_expected_access_to_data_field : Error<
   "expected access to data field">;
 def err_omp_multiple_array_items_in_map_clause : Error<
@@ -8627,8 +8662,8 @@
   "pointer cannot be mapped along with a section derived from itself">;
 def err_omp_original_storage_is_shared_and_does_not_contain : Error<
   "original storage of expression in data environment is shared but data environment do not fully contain mapped expression storage">;
-def err_omp_same_pointer_derreferenced : Error<
-  "same pointer derreferenced in multiple different ways in map clause expressions">;
+def err_omp_same_pointer_dereferenced : Error<
+  "same pointer dereferenced in multiple different ways in map clause expressions">;
 def note_omp_task_predetermined_firstprivate_here : Note<
   "predetermined as a firstprivate in a task construct here">;
 def err_omp_threadprivate_incomplete_type : Error<
@@ -8705,6 +8740,8 @@
 def warn_omp_not_in_target_context : Warning<
   "declaration is not declared in any declare target region">,
   InGroup<OpenMPTarget>;
+def err_omp_function_in_link_clause : Error<
+  "function name is not allowed in 'link' clause">;
 def err_omp_aligned_expected_array_or_ptr : Error<
   "argument of aligned clause should be array"
   "%select{ or pointer|, pointer, reference to array or reference to pointer}1"
@@ -8845,9 +8882,9 @@
 def err_omp_wrong_cancel_region : Error<
   "one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">;
 def err_omp_parent_cancel_region_nowait : Error<
-  "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be nowait">;
+  "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be nowait">;
 def err_omp_parent_cancel_region_ordered : Error<
-  "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be ordered">;
+  "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be ordered">;
 def err_omp_reduction_wrong_type : Error<"reduction type cannot be %select{qualified with 'const', 'volatile' or 'restrict'|a function|a reference|an array}0 type">;
 def err_omp_wrong_var_in_declare_reduction : Error<"only %select{'omp_priv' or 'omp_orig'|'omp_in' or 'omp_out'}0 variables are allowed in %select{initializer|combiner}0 expression">;
 def err_omp_declare_reduction_redefinition : Error<"redefinition of user-defined reduction for type %0">;
@@ -8958,6 +8995,13 @@
   "expected addressable reduction item for the task-based directives">;
 def err_omp_reduction_with_nogroup : Error<
   "'reduction' clause cannot be used with 'nogroup' clause">;
+def err_omp_reduction_vla_unsupported : Error<
+  "cannot generate code for reduction on %select{|array section, which requires a }0variable length array">;
+def err_omp_linear_distribute_var_non_loop_iteration : Error<
+  "only loop iteration variables are allowed in 'linear' clause in distribute directives">;
+def warn_omp_non_trivial_type_mapped : Warning<
+  "Non-trivial type %0 is mapped, only trivial types are guaranteed to be mapped correctly">,
+  InGroup<OpenMPTarget>;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
@@ -9310,4 +9354,35 @@
   InGroup<ShadowField>, DefaultIgnore;
 def note_shadow_field : Note<"declared here">;
 
+def err_target_required_in_redecl : Error<
+  "function declaration is missing 'target' attribute in a multiversioned "
+  "function">;
+def note_multiversioning_caused_here : Note<
+  "function multiversioning caused by this declaration">;
+def err_multiversion_after_used : Error<
+  "function declaration cannot become a multiversioned function after first "
+  "usage">;
+def err_bad_multiversion_option : Error<
+  "function multiversioning doesn't support %select{feature|architecture}0 "
+  "'%1'">;
+def err_multiversion_duplicate : Error<
+  "multiversioned function redeclarations require identical target attributes">;
+def err_multiversion_noproto : Error<
+  "multiversioned function must have a prototype">;
+def err_multiversion_no_other_attrs : Error<
+  "attribute 'target' multiversioning cannot be combined with other "
+  "attributes">;
+def err_multiversion_diff : Error<
+  "multiversioned function declaration has a different %select{calling convention"
+  "|return type|constexpr specification|inline specification|storage class|"
+  "linkage}0">;
+def err_multiversion_doesnt_support : Error<
+  "multiversioned functions do not yet support %select{function templates|"
+  "virtual functions|deduced return types|constructors|destructors|"
+  "deleted functions|defaulted functions}0">;
+def err_multiversion_not_allowed_on_main : Error<
+  "'main' cannot be a multiversioned function">;
+def err_multiversion_not_supported : Error<
+ "function multiversioning is not supported on the current target">;
+
 } // end of sema component.
diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td
index 3f0a14c..250b49f 100644
--- a/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -77,6 +77,8 @@
     "module '%0' in AST file '%1' (imported by AST file '%2') "
     "is not defined in any loaded module map file; "
     "maybe you need to load '%3'?">, DefaultFatal;
+def note_imported_by_pch_module_not_found : Note<
+    "consider adding '%0' to the header search path">;
 def err_imported_module_modmap_changed : Error<
     "module '%0' imported by AST file '%1' found in a different module map file"
     " (%2) than when the importing AST file was built (%3)">, DefaultFatal;
@@ -268,6 +270,29 @@
   "friend function %2|"
   "}1">;
 
+def err_module_odr_violation_function : Error<
+  "%q0 has different definitions in different modules; "
+  "%select{definition in module '%2'|defined here}1 "
+  "first difference is "
+  "%select{"
+  "return type is %4|"
+  "%ordinal4 parameter with name %5|"
+  "%ordinal4 parameter with type %5%select{| decayed from %7}6|"
+  "%ordinal4 parameter with%select{out|}5 a default argument|"
+  "%ordinal4 parameter with a default argument|"
+  "function body"
+  "}3">;
+
+def note_module_odr_violation_function : Note<"but in '%0' found "
+  "%select{"
+  "different return type %2|"
+  "%ordinal2 parameter with name %3|"
+  "%ordinal2 parameter with type %3%select{| decayed from %5}4|"
+  "%ordinal2 parameter with%select{out|}3 a default argument|"
+  "%ordinal2 parameter with a different default argument|"
+  "a different body"
+  "}1">;
+
 def err_module_odr_violation_mismatch_decl_unknown : Error<
   "%q0 %select{with definition in module '%2'|defined here}1 has different "
   "definitions in different modules; first difference is this "
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index 3938e09..8e3c15a 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -1,4 +1,4 @@
-//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
+//===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,11 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
 /// clang::Selector interfaces.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
@@ -18,35 +18,29 @@
 
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <new>
 #include <string>
 #include <utility>
 
-namespace llvm {
-
-  template <typename T> struct DenseMapInfo;
-
-} // end namespace llvm
-
 namespace clang {
 
-  class LangOptions;
-  class IdentifierInfo;
-  class IdentifierTable;
-  class SourceLocation;
-  class MultiKeywordSelector; // private class used by Selector
-  class DeclarationName;      // AST class that stores declaration names
+class IdentifierInfo;
+class LangOptions;
+class MultiKeywordSelector;
+class SourceLocation;
 
-  /// \brief A simple pair of identifier info and location.
-  typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
+/// \brief A simple pair of identifier info and location.
+using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
 
 /// One of these records is kept for each identifier that
 /// is lexed.  This contains information about whether the token was \#define'd,
@@ -85,8 +79,10 @@
                                    // keyword.
   // 29 bit left in 64-bit word.
 
-  void *FETokenInfo;               // Managed by the language front-end.
-  llvm::StringMapEntry<IdentifierInfo*> *Entry;
+  // Managed by the language front-end.
+  void *FETokenInfo = nullptr;
+
+  llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
 
 public:
   IdentifierInfo();
@@ -104,7 +100,6 @@
 
   /// \brief Return the beginning of the actual null-terminated string for this
   /// identifier.
-  ///
   const char *getNameStart() const {
     if (Entry) return Entry->getKeyData();
     // FIXME: This is gross. It would be best not to embed specific details
@@ -112,12 +107,12 @@
     // The 'this' pointer really points to a
     // std::pair<IdentifierInfo, const char*>, where internal pointer
     // points to the external string data.
-    typedef std::pair<IdentifierInfo, const char*> actualtype;
+    using actualtype = std::pair<IdentifierInfo, const char *>;
+
     return ((const actualtype*) this)->second;
   }
 
   /// \brief Efficiently return the length of this identifier info.
-  ///
   unsigned getLength() const {
     if (Entry) return Entry->getKeyLength();
     // FIXME: This is gross. It would be best not to embed specific details
@@ -125,7 +120,8 @@
     // The 'this' pointer really points to a
     // std::pair<IdentifierInfo, const char*>, where internal pointer
     // points to the external string data.
-    typedef std::pair<IdentifierInfo, const char*> actualtype;
+    using actualtype = std::pair<IdentifierInfo, const char *>;
+
     const char* p = ((const actualtype*) this)->second - 2;
     return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
   }
@@ -465,7 +461,7 @@
 class IdentifierTable {
   // Shark shows that using MallocAllocator is *much* slower than using this
   // BumpPtrAllocator!
-  typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
+  using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
   HashTableTy HashTable;
 
   IdentifierInfoLookup* ExternalLookup;
@@ -551,8 +547,8 @@
     return *II;
   }
 
-  typedef HashTableTy::const_iterator iterator;
-  typedef HashTableTy::const_iterator const_iterator;
+  using iterator = HashTableTy::const_iterator;
+  using const_iterator = HashTableTy::const_iterator;
 
   iterator begin() const { return HashTable.begin(); }
   iterator end() const   { return HashTable.end(); }
@@ -654,7 +650,9 @@
     MultiArg = 0x3,
     ArgFlags = ZeroArg|OneArg
   };
-  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
+
+  // a pointer to the MultiKeywordSelector or IdentifierInfo.
+  uintptr_t InfoPtr = 0;
 
   Selector(IdentifierInfo *II, unsigned nArgs) {
     InfoPtr = reinterpret_cast<uintptr_t>(II);
@@ -662,6 +660,7 @@
     assert(nArgs < 2 && "nArgs not equal to 0/1");
     InfoPtr |= nArgs+1;
   }
+
   Selector(MultiKeywordSelector *SI) {
     InfoPtr = reinterpret_cast<uintptr_t>(SI);
     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
@@ -692,7 +691,7 @@
 
   /// The default ctor should only be used when creating data structures that
   ///  will contain selectors.
-  Selector() : InfoPtr(0) {}
+  Selector() = default;
   Selector(uintptr_t V) : InfoPtr(V) {}
 
   /// operator==/!= - Indicate whether the specified selectors are identical.
@@ -776,7 +775,8 @@
 /// \brief This table allows us to fully hide how we implement
 /// multi-keyword caching.
 class SelectorTable {
-  void *Impl;  // Actually a SelectorTableImpl
+  // Actually a SelectorTableImpl
+  void *Impl;
 
 public:
   SelectorTable();
@@ -793,6 +793,7 @@
   Selector getUnarySelector(IdentifierInfo *ID) {
     return Selector(ID, 1);
   }
+
   Selector getNullarySelector(IdentifierInfo *ID) {
     return Selector(ID, 0);
   }
@@ -848,7 +849,7 @@
   unsigned ExtraKindOrNumArgs;
 };
 
-}  // end namespace clang
+}  // namespace clang
 
 namespace llvm {
 
@@ -856,11 +857,11 @@
 /// DenseSets.
 template <>
 struct DenseMapInfo<clang::Selector> {
-  static inline clang::Selector getEmptyKey() {
+  static clang::Selector getEmptyKey() {
     return clang::Selector::getEmptyMarker();
   }
 
-  static inline clang::Selector getTombstoneKey() {
+  static clang::Selector getTombstoneKey() {
     return clang::Selector::getTombstoneMarker();
   }
 
@@ -874,15 +875,13 @@
 template <>
 struct isPodLike<clang::Selector> { static const bool value = true; };
 
-template <typename T> struct PointerLikeTypeTraits;
-
 template<>
 struct PointerLikeTypeTraits<clang::Selector> {
-  static inline const void *getAsVoidPointer(clang::Selector P) {
+  static const void *getAsVoidPointer(clang::Selector P) {
     return P.getAsOpaquePtr();
   }
 
-  static inline clang::Selector getFromVoidPointer(const void *P) {
+  static clang::Selector getFromVoidPointer(const void *P) {
     return clang::Selector(reinterpret_cast<uintptr_t>(P));
   }
 
@@ -893,11 +892,11 @@
 // are not guaranteed to be 8-byte aligned.
 template<>
 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
-  static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
+  static void *getAsVoidPointer(clang::IdentifierInfo* P) {
     return P;
   }
 
-  static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
+  static clang::IdentifierInfo *getFromVoidPointer(void *P) {
     return static_cast<clang::IdentifierInfo*>(P);
   }
 
@@ -906,17 +905,17 @@
 
 template<>
 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
-  static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
+  static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
     return P;
   }
 
-  static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
+  static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
     return static_cast<const clang::IdentifierInfo*>(P);
   }
 
   enum { NumLowBitsAvailable = 1 };
 };
 
-} // end namespace llvm
+} // namespace llvm
 
 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def
index 78a7437..68edee2 100644
--- a/include/clang/Basic/LangOptions.def
+++ b/include/clang/Basic/LangOptions.def
@@ -82,6 +82,7 @@
 // FIXME: A lot of the BENIGN_ options should be COMPATIBLE_ instead.
 LANGOPT(C99               , 1, 0, "C99")
 LANGOPT(C11               , 1, 0, "C11")
+LANGOPT(C17               , 1, 0, "C17")
 LANGOPT(MSVCCompat        , 1, 0, "Microsoft Visual C++ full compatibility mode")
 LANGOPT(MicrosoftExt      , 1, 0, "Microsoft C++ extensions")
 LANGOPT(AsmBlocks         , 1, 0, "Microsoft inline asm blocks")
@@ -89,7 +90,7 @@
 LANGOPT(CPlusPlus         , 1, 0, "C++")
 LANGOPT(CPlusPlus11       , 1, 0, "C++11")
 LANGOPT(CPlusPlus14       , 1, 0, "C++14")
-LANGOPT(CPlusPlus1z       , 1, 0, "C++17")
+LANGOPT(CPlusPlus17       , 1, 0, "C++17")
 LANGOPT(CPlusPlus2a       , 1, 0, "C++2a")
 LANGOPT(ObjC1             , 1, 0, "Objective-C 1")
 LANGOPT(ObjC2             , 1, 0, "Objective-C 2")
@@ -124,7 +125,9 @@
 LANGOPT(Exceptions        , 1, 0, "exception handling")
 LANGOPT(ObjCExceptions    , 1, 0, "Objective-C exceptions")
 LANGOPT(CXXExceptions     , 1, 0, "C++ exceptions")
+LANGOPT(DWARFExceptions   , 1, 0, "dwarf exception handling")
 LANGOPT(SjLjExceptions    , 1, 0, "setjmp-longjump exception handling")
+LANGOPT(SEHExceptions     , 1, 0, "SEH .xdata exception handling")
 LANGOPT(ExternCNoUnwind   , 1, 0, "Assume extern C functions don't unwind")
 LANGOPT(TraditionalCPP    , 1, 0, "traditional CPP emulation")
 LANGOPT(RTTI              , 1, 1, "run-time type information")
@@ -191,8 +194,10 @@
 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
 LANGOPT(CUDA              , 1, 0, "CUDA")
 LANGOPT(OpenMP            , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
+LANGOPT(OpenMPSimd        , 1, 0, "Use SIMD only OpenMP support.")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
 LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
+LANGOPT(OpenMPCUDAMode    , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
 LANGOPT(CUDAIsDevice      , 1, 0, "compiling for CUDA device")
@@ -200,6 +205,7 @@
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
 LANGOPT(CUDADeviceFlushDenormalsToZero, 1, 0, "flushing denormals to zero")
 LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
+LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code")
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
@@ -270,6 +276,9 @@
                                            "aggressive, 2: more aggressive)")
 
 LANGOPT(XRayInstrument, 1, 0, "controls whether to do XRay instrumentation")
+LANGOPT(XRayAlwaysEmitCustomEvents, 1, 0,
+        "controls whether to always emit intrinsic calls to "
+        "__xray_customevent(...) builtin.")
 
 BENIGN_LANGOPT(AllowEditorPlaceholders, 1, 0,
                "allow editor placeholders in source")
diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h
index 51fb5ad..b16392d 100644
--- a/include/clang/Basic/LangOptions.h
+++ b/include/clang/Basic/LangOptions.h
@@ -1,4 +1,4 @@
-//===--- LangOptions.h - C Language Family Language Options -----*- C++ -*-===//
+//===- LangOptions.h - C Language Family Language Options -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::LangOptions interface.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H
@@ -20,6 +20,8 @@
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/Sanitizers.h"
 #include "clang/Basic/Visibility.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
 #include <string>
 #include <vector>
 
@@ -47,22 +49,32 @@
 /// enabled, which controls the dialect of C or C++ that is accepted.
 class LangOptions : public LangOptionsBase {
 public:
-  typedef clang::Visibility Visibility;
+  using Visibility = clang::Visibility;
   
   enum GCMode { NonGC, GCOnly, HybridGC };
   enum StackProtectorMode { SSPOff, SSPOn, SSPStrong, SSPReq };
   
   enum SignedOverflowBehaviorTy {
-    SOB_Undefined,  // Default C standard behavior.
-    SOB_Defined,    // -fwrapv
-    SOB_Trapping    // -ftrapv
+    // Default C standard behavior.
+    SOB_Undefined,
+
+    // -fwrapv
+    SOB_Defined,
+
+    // -ftrapv
+    SOB_Trapping
   };
 
   // FIXME: Unify with TUKind.
   enum CompilingModuleKind {
-    CMK_None,           ///< Not compiling a module interface at all.
-    CMK_ModuleMap,      ///< Compiling a module from a module map.
-    CMK_ModuleInterface ///< Compiling a C++ modules TS module interface unit.
+    /// Not compiling a module interface at all.
+    CMK_None,
+
+    /// Compiling a module from a module map.
+    CMK_ModuleMap,
+
+    /// Compiling a C++ modules TS module interface unit.
+    CMK_ModuleInterface
   };
 
   enum PragmaMSPointersToMembersKind {
@@ -77,7 +89,8 @@
     DCC_CDecl,
     DCC_FastCall,
     DCC_StdCall,
-    DCC_VectorCall
+    DCC_VectorCall,
+    DCC_RegCall
   };
 
   enum AddrSpaceMapMangling { ASMM_Target, ASMM_On, ASMM_Off };
@@ -90,9 +103,14 @@
   };
 
   enum FPContractModeKind {
-    FPC_Off,        // Form fused FP ops only where result will not be affected.
-    FPC_On,         // Form fused FP ops according to FP_CONTRACT rules.
-    FPC_Fast        // Aggressively fuse FP ops (E.g. FMA).
+    // Form fused FP ops only where result will not be affected.
+    FPC_Off,
+
+    // Form fused FP ops according to FP_CONTRACT rules.
+    FPC_On,
+
+    // Aggressively fuse FP ops (E.g. FMA).
+    FPC_Fast
   };
 
 public:
@@ -151,7 +169,7 @@
 
   /// \brief Indicates whether the front-end is explicitly told that the
   /// input is a header file (i.e. -x c-header).
-  bool IsHeaderFile;
+  bool IsHeaderFile = false;
 
   LangOptions();
 
@@ -218,15 +236,19 @@
   bool allowFPContractWithinStatement() const {
     return fp_contract == LangOptions::FPC_On;
   }
+
   bool allowFPContractAcrossStatement() const {
     return fp_contract == LangOptions::FPC_Fast;
   }
+
   void setAllowFPContractWithinStatement() {
     fp_contract = LangOptions::FPC_On;
   }
+
   void setAllowFPContractAcrossStatement() {
     fp_contract = LangOptions::FPC_Fast;
   }
+
   void setDisallowFPContract() { fp_contract = LangOptions::FPC_Off; }
 
   /// Used to serialize this.
@@ -241,13 +263,15 @@
 enum TranslationUnitKind {
   /// \brief The translation unit is a complete translation unit.
   TU_Complete,
+
   /// \brief The translation unit is a prefix to a translation unit, and is
   /// not complete.
   TU_Prefix,
+
   /// \brief The translation unit is a module.
   TU_Module
 };
   
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_BASIC_LANGOPTIONS_H
diff --git a/include/clang/Basic/Module.h b/include/clang/Basic/Module.h
index 0b2a665..3725fcb 100644
--- a/include/clang/Basic/Module.h
+++ b/include/clang/Basic/Module.h
@@ -1,4 +1,4 @@
-//===--- Module.h - Describe a module ---------------------------*- C++ -*-===//
+//===- Module.h - Describe a module -----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,12 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::Module class, which describes a module in the
 /// source code.
-///
+//
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_BASIC_MODULE_H
 #define LLVM_CLANG_BASIC_MODULE_H
 
@@ -19,6 +20,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SetVector.h"
@@ -26,22 +28,28 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <ctime>
 #include <string>
 #include <utility>
 #include <vector>
 
 namespace llvm {
-  class raw_ostream;
-}
+
+class raw_ostream;
+
+} // namespace llvm
 
 namespace clang {
   
 class LangOptions;
 class TargetInfo;
-class IdentifierInfo;
-  
+
 /// \brief Describes the name of a module.
-typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId;
+using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>;
 
 /// The signature of a module, which is a hash of the AST content.
 struct ASTFileSignature : std::array<uint32_t, 5> {
@@ -85,7 +93,7 @@
   /// \brief The build directory of this module. This is the directory in
   /// which the module is notionally built, and relative to which its headers
   /// are found.
-  const DirectoryEntry *Directory;
+  const DirectoryEntry *Directory = nullptr;
 
   /// \brief The presumed file name for the module map defining this module.
   /// Only non-empty when building from preprocessed source.
@@ -114,7 +122,7 @@
 
   /// \brief The AST file if this is a top-level module which has a
   /// corresponding serialized AST file, or null otherwise.
-  const FileEntry *ASTFile;
+  const FileEntry *ASTFile = nullptr;
 
   /// \brief The top-level headers associated with this module.
   llvm::SmallSetVector<const FileEntry *, 2> TopHeaders;
@@ -181,7 +189,7 @@
 
   /// \brief An individual requirement: a feature name and a flag indicating
   /// the required state of that feature.
-  typedef std::pair<std::string, bool> Requirement;
+  using Requirement = std::pair<std::string, bool>;
 
   /// \brief The set of language features required to use this module.
   ///
@@ -189,6 +197,9 @@
   /// will be false to indicate that this (sub)module is not available.
   SmallVector<Requirement, 2> Requirements;
 
+  /// \brief A module with the same name that shadows this module.
+  Module *ShadowingModule = nullptr;
+
   /// \brief Whether this module is missing a feature from \c Requirements.
   unsigned IsMissingRequirement : 1;
 
@@ -270,7 +281,7 @@
   ///
   /// The pointer is the module being re-exported, while the bit will be true
   /// to indicate that this is a wildcard export.
-  typedef llvm::PointerIntPair<Module *, 1, bool> ExportDecl;
+  using ExportDecl = llvm::PointerIntPair<Module *, 1, bool>;
   
   /// \brief The set of export declarations.
   SmallVector<ExportDecl, 2> Exports;
@@ -302,9 +313,9 @@
   /// \brief A library or framework to link against when an entity from this
   /// module is used.
   struct LinkLibrary {
-    LinkLibrary() : IsFramework(false) { }
+    LinkLibrary() = default;
     LinkLibrary(const std::string &Library, bool IsFramework)
-      : Library(Library), IsFramework(IsFramework) { }
+        : Library(Library), IsFramework(IsFramework) {}
     
     /// \brief The library to link against.
     ///
@@ -313,7 +324,7 @@
     std::string Library;
 
     /// \brief Whether this is a framework rather than a library.
-    bool IsFramework;
+    bool IsFramework = false;
   };
 
   /// \brief The set of libraries or frameworks to link against when
@@ -367,13 +378,20 @@
   ///
   /// \param Target The target options used for the current translation unit.
   ///
-  /// \param Req If this module is unavailable, this parameter
-  /// will be set to one of the requirements that is not met for use of
-  /// this module.
+  /// \param Req If this module is unavailable because of a missing requirement,
+  /// this parameter will be set to one of the requirements that is not met for
+  /// use of this module.
+  ///
+  /// \param MissingHeader If this module is unavailable because of a missing
+  /// header, this parameter will be set to one of the missing headers.
+  ///
+  /// \param ShadowingModule If this module is unavailable because it is
+  /// shadowed, this parameter will be set to the shadowing module.
   bool isAvailable(const LangOptions &LangOpts, 
                    const TargetInfo &Target,
                    Requirement &Req,
-                   UnresolvedHeaderDirective &MissingHeader) const;
+                   UnresolvedHeaderDirective &MissingHeader,
+                   Module *&ShadowingModule) const;
 
   /// \brief Determine whether this module is a submodule.
   bool isSubModule() const { return Parent != nullptr; }
@@ -432,7 +450,6 @@
   const Module *getTopLevelModule() const;
   
   /// \brief Retrieve the name of the top-level module.
-  ///
   StringRef getTopLevelModuleName() const {
     return getTopLevelModule()->Name;
   }
@@ -525,8 +542,8 @@
 
   unsigned getVisibilityID() const { return VisibilityID; }
 
-  typedef std::vector<Module *>::iterator submodule_iterator;
-  typedef std::vector<Module *>::const_iterator submodule_const_iterator;
+  using submodule_iterator = std::vector<Module *>::iterator;
+  using submodule_const_iterator = std::vector<Module *>::const_iterator;
   
   submodule_iterator submodule_begin() { return SubModules.begin(); }
   submodule_const_iterator submodule_begin() const {return SubModules.begin();}
@@ -551,7 +568,6 @@
   }
 
   /// \brief Print the module map for this module to the given stream. 
-  ///
   void print(raw_ostream &OS, unsigned Indent = 0) const;
   
   /// \brief Dump the contents of this module to the given output stream.
@@ -564,7 +580,7 @@
 /// \brief A set of visible modules.
 class VisibleModuleSet {
 public:
-  VisibleModuleSet() : Generation(0) {}
+  VisibleModuleSet() = default;
   VisibleModuleSet(VisibleModuleSet &&O)
       : ImportLocs(std::move(O.ImportLocs)), Generation(O.Generation ? 1 : 0) {
     O.ImportLocs.clear();
@@ -599,13 +615,15 @@
 
   /// \brief A callback to call when a module is made visible (directly or
   /// indirectly) by a call to \ref setVisible.
-  typedef llvm::function_ref<void(Module *M)> VisibleCallback;
+  using VisibleCallback = llvm::function_ref<void(Module *M)>;
+
   /// \brief A callback to call when a module conflict is found. \p Path
   /// consists of a sequence of modules from the conflicting module to the one
   /// made visible, where each was exported by the next.
-  typedef llvm::function_ref<void(ArrayRef<Module *> Path,
-                                  Module *Conflict, StringRef Message)>
-      ConflictCallback;
+  using ConflictCallback =
+      llvm::function_ref<void(ArrayRef<Module *> Path, Module *Conflict,
+                         StringRef Message)>;
+
   /// \brief Make a specific module visible.
   void setVisible(Module *M, SourceLocation Loc,
                   VisibleCallback Vis = [](Module *) {},
@@ -616,11 +634,11 @@
   /// Import locations for each visible module. Indexed by the module's
   /// VisibilityID.
   std::vector<SourceLocation> ImportLocs;
+
   /// Visibility generation, bumped every time the visibility state changes.
-  unsigned Generation;
+  unsigned Generation = 0;
 };
 
-} // end namespace clang
-
+} // namespace clang
 
 #endif // LLVM_CLANG_BASIC_MODULE_H
diff --git a/include/clang/Basic/ObjCRuntime.h b/include/clang/Basic/ObjCRuntime.h
index 8dc259c..3926c0c 100644
--- a/include/clang/Basic/ObjCRuntime.h
+++ b/include/clang/Basic/ObjCRuntime.h
@@ -207,8 +207,8 @@
   bool hasSubscripting() const {
     switch (getKind()) {
     case FragileMacOSX: return false;
-    case MacOSX: return getVersion() >= VersionTuple(10, 8);
-    case iOS: return getVersion() >= VersionTuple(6);
+    case MacOSX: return getVersion() >= VersionTuple(10, 11);
+    case iOS: return getVersion() >= VersionTuple(9);
     case WatchOS: return true;
 
     // This is really a lie, because some implementations and versions
diff --git a/include/clang/Basic/OpenCLExtensions.def b/include/clang/Basic/OpenCLExtensions.def
index 360fec4..13cb12e 100644
--- a/include/clang/Basic/OpenCLExtensions.def
+++ b/include/clang/Basic/OpenCLExtensions.def
@@ -53,6 +53,9 @@
 OPENCLEXT_INTERNAL(cl_khr_gl_event, 110, ~0U)
 OPENCLEXT_INTERNAL(cl_khr_d3d10_sharing, 110, ~0U)
 
+// EMBEDDED_PROFILE
+OPENCLEXT_INTERNAL(cles_khr_int64, 110, ~0U)
+
 // OpenCL 1.2.
 OPENCLEXT_INTERNAL(cl_khr_context_abort, 120, ~0U)
 OPENCLEXT_INTERNAL(cl_khr_d3d11_sharing, 120, ~0U)
@@ -79,6 +82,10 @@
 OPENCLEXT_INTERNAL(cl_amd_media_ops, 100, ~0U)
 OPENCLEXT_INTERNAL(cl_amd_media_ops2, 100, ~0U)
 
+// Intel OpenCL extensions
+OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U)
+OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U)
+
 #undef OPENCLEXT_INTERNAL
 
 #ifdef OPENCLEXT
diff --git a/include/clang/Basic/OpenMPKinds.def b/include/clang/Basic/OpenMPKinds.def
index 3fedaf7..6a0bed7 100644
--- a/include/clang/Basic/OpenMPKinds.def
+++ b/include/clang/Basic/OpenMPKinds.def
@@ -454,6 +454,7 @@
 OPENMP_TARGET_CLAUSE(defaultmap)
 OPENMP_TARGET_CLAUSE(firstprivate)
 OPENMP_TARGET_CLAUSE(is_device_ptr)
+OPENMP_TARGET_CLAUSE(reduction)
 
 // Clauses allowed for OpenMP directive 'target data'.
 OPENMP_TARGET_DATA_CLAUSE(if)
@@ -745,9 +746,9 @@
 OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(shared)
 OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(reduction)
 OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
-OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
 OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(num_teams)
 OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(thread_limit)
+OPENMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(copyin)
 
 // Clauses allowed for OpenMP directive 'target teams'.
 OPENMP_TARGET_TEAMS_CLAUSE(if)
@@ -805,7 +806,6 @@
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(num_threads)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
-OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
 
 // Clauses allowed for OpenMP directive
 // 'target teams distribute parallel for simd'.
diff --git a/include/clang/Basic/OperatorKinds.def b/include/clang/Basic/OperatorKinds.def
index 34ad764..d86294b 100644
--- a/include/clang/Basic/OperatorKinds.def
+++ b/include/clang/Basic/OperatorKinds.def
@@ -89,6 +89,7 @@
 OVERLOADED_OPERATOR(ExclaimEqual         , "!="  , exclaimequal       , false, true , false)
 OVERLOADED_OPERATOR(LessEqual            , "<="  , lessequal          , false, true , false)
 OVERLOADED_OPERATOR(GreaterEqual         , ">="  , greaterequal       , false, true , false)
+OVERLOADED_OPERATOR(Spaceship            , "<=>" , spaceship          , false, true , false)
 OVERLOADED_OPERATOR(AmpAmp               , "&&"  , ampamp             , false, true , false)
 OVERLOADED_OPERATOR(PipePipe             , "||"  , pipepipe           , false, true , false)
 OVERLOADED_OPERATOR(PlusPlus             , "++"  , plusplus           , true , true , false)
diff --git a/include/clang/Basic/OperatorPrecedence.h b/include/clang/Basic/OperatorPrecedence.h
index 640749f..94978f8 100644
--- a/include/clang/Basic/OperatorPrecedence.h
+++ b/include/clang/Basic/OperatorPrecedence.h
@@ -36,10 +36,11 @@
     And             = 8,    // &
     Equality        = 9,    // ==, !=
     Relational      = 10,   //  >=, <=, >, <
-    Shift           = 11,   // <<, >>
-    Additive        = 12,   // -, +
-    Multiplicative  = 13,   // *, /, %
-    PointerToMember = 14    // .*, ->*
+    Spaceship       = 11,   // <=>
+    Shift           = 12,   // <<, >>
+    Additive        = 13,   // -, +
+    Multiplicative  = 14,   // *, /, %
+    PointerToMember = 15    // .*, ->*
   };
 }
 
diff --git a/include/clang/Basic/PartialDiagnostic.h b/include/clang/Basic/PartialDiagnostic.h
index b2f14af..dffeaee 100644
--- a/include/clang/Basic/PartialDiagnostic.h
+++ b/include/clang/Basic/PartialDiagnostic.h
@@ -1,4 +1,4 @@
-//===--- PartialDiagnostic.h - Diagnostic "closures" ------------*- C++ -*-===//
+//===- PartialDiagnostic.h - Diagnostic "closures" --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,25 +6,32 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Implements a partial diagnostic that can be emitted anwyhere
 /// in a DiagnosticBuilder stream.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_PARTIALDIAGNOSTIC_H
 #define LLVM_CLANG_BASIC_PARTIALDIAGNOSTIC_H
 
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include <cassert>
+#include <cstdint>
+#include <string>
+#include <type_traits>
+#include <utility>
 
 namespace clang {
 
+class DeclContext;
+class IdentifierInfo;
+
 class PartialDiagnostic {
 public:
   enum {
@@ -36,8 +43,6 @@
   };
 
   struct Storage {
-    Storage() : NumDiagArgs(0) { }
-
     enum {
         /// \brief The maximum number of arguments we can hold. We
         /// currently only support up to 10 arguments (%0-%9).
@@ -48,7 +53,7 @@
     };
 
     /// \brief The number of entries in Arguments.
-    unsigned char NumDiagArgs;
+    unsigned char NumDiagArgs = 0;
 
     /// \brief Specifies for each argument whether it is in DiagArgumentsStr
     /// or in DiagArguments.
@@ -71,6 +76,8 @@
     /// \brief If valid, provides a hint with some code to insert, remove, or
     /// modify at a particular position.
     SmallVector<FixItHint, 6>  FixItHints;
+
+    Storage() = default;
   };
 
   /// \brief An allocator for Storage objects, which uses a small cache to
@@ -114,13 +121,13 @@
   // in the new location.
 
   /// \brief The diagnostic ID.
-  mutable unsigned DiagID;
+  mutable unsigned DiagID = 0;
 
   /// \brief Storage for args and ranges.
-  mutable Storage *DiagStorage;
+  mutable Storage *DiagStorage = nullptr;
 
   /// \brief Allocator used to allocate storage for this diagnostic.
-  StorageAllocator *Allocator;
+  StorageAllocator *Allocator = nullptr;
 
   /// \brief Retrieve storage for this particular diagnostic.
   Storage *getStorage() const {
@@ -176,17 +183,16 @@
 
 public:
   struct NullDiagnostic {};
+
   /// \brief Create a null partial diagnostic, which cannot carry a payload,
   /// and only exists to be swapped with a real partial diagnostic.
-  PartialDiagnostic(NullDiagnostic)
-    : DiagID(0), DiagStorage(nullptr), Allocator(nullptr) { }
+  PartialDiagnostic(NullDiagnostic) {}
 
   PartialDiagnostic(unsigned DiagID, StorageAllocator &Allocator)
-    : DiagID(DiagID), DiagStorage(nullptr), Allocator(&Allocator) { }
+      : DiagID(DiagID), Allocator(&Allocator) {}
 
   PartialDiagnostic(const PartialDiagnostic &Other)
-    : DiagID(Other.DiagID), DiagStorage(nullptr), Allocator(Other.Allocator)
-  {
+      : DiagID(Other.DiagID), Allocator(Other.Allocator) {
     if (Other.DiagStorage) {
       DiagStorage = getStorage();
       *DiagStorage = *Other.DiagStorage;
@@ -194,22 +200,20 @@
   }
 
   PartialDiagnostic(PartialDiagnostic &&Other)
-    : DiagID(Other.DiagID), DiagStorage(Other.DiagStorage),
-      Allocator(Other.Allocator) {
+      : DiagID(Other.DiagID), DiagStorage(Other.DiagStorage),
+        Allocator(Other.Allocator) {
     Other.DiagStorage = nullptr;
   }
 
   PartialDiagnostic(const PartialDiagnostic &Other, Storage *DiagStorage)
-    : DiagID(Other.DiagID), DiagStorage(DiagStorage),
-      Allocator(reinterpret_cast<StorageAllocator *>(~uintptr_t(0)))
-  {
+      : DiagID(Other.DiagID), DiagStorage(DiagStorage),
+        Allocator(reinterpret_cast<StorageAllocator *>(~uintptr_t(0))) {
     if (Other.DiagStorage)
       *this->DiagStorage = *Other.DiagStorage;
   }
 
   PartialDiagnostic(const Diagnostic &Other, StorageAllocator &Allocator)
-    : DiagID(Other.getID()), DiagStorage(nullptr), Allocator(&Allocator)
-  {
+      : DiagID(Other.getID()), Allocator(&Allocator) {
     // Copy arguments.
     for (unsigned I = 0, N = Other.getNumArgs(); I != N; ++I) {
       if (Other.getArgKind(I) == DiagnosticsEngine::ak_std_string)
@@ -402,7 +406,6 @@
     PD.AddFixItHint(Hint);
     return PD;
   }
-
 };
 
 inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
@@ -413,7 +416,8 @@
 
 /// \brief A partial diagnostic along with the source location where this
 /// diagnostic occurs.
-typedef std::pair<SourceLocation, PartialDiagnostic> PartialDiagnosticAt;
+using PartialDiagnosticAt = std::pair<SourceLocation, PartialDiagnostic>;
 
-}  // end namespace clang
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_PARTIALDIAGNOSTIC_H
diff --git a/include/clang/Basic/PlistSupport.h b/include/clang/Basic/PlistSupport.h
index 61de824..be92bbf 100644
--- a/include/clang/Basic/PlistSupport.h
+++ b/include/clang/Basic/PlistSupport.h
@@ -1,4 +1,4 @@
-//===---------- PlistSupport.h - Plist Output Utilities ---------*- C++ -*-===//
+//===- PlistSupport.h - Plist Output Utilities ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,12 +10,20 @@
 #ifndef LLVM_CLANG_BASIC_PLISTSUPPORT_H
 #define LLVM_CLANG_BASIC_PLISTSUPPORT_H
 
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
 
 namespace clang {
 namespace markup {
-typedef llvm::DenseMap<FileID, unsigned> FIDMap;
+
+using FIDMap = llvm::DenseMap<FileID, unsigned>;
 
 inline void AddFID(FIDMap &FIDs, SmallVectorImpl<FileID> &V,
                    const SourceManager &SM, SourceLocation L) {
@@ -112,7 +120,8 @@
   EmitLocation(o, SM, R.getEnd(), FM, indent + 1);
   Indent(o, indent) << "</array>\n";
 }
-}
-}
 
-#endif
+} // namespace markup
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_PLISTSUPPORT_H
diff --git a/include/clang/Basic/Sanitizers.def b/include/clang/Basic/Sanitizers.def
index d6df617..30d5cc8 100644
--- a/include/clang/Basic/Sanitizers.def
+++ b/include/clang/Basic/Sanitizers.def
@@ -44,6 +44,8 @@
 // Kernel AddressSanitizer (KASan)
 SANITIZER("kernel-address", KernelAddress)
 
+SANITIZER("hwaddress", HWAddress)
+
 // MemorySanitizer
 SANITIZER("memory", Memory)
 
@@ -135,6 +137,9 @@
 SANITIZER_GROUP("efficiency-all", Efficiency,
                 EfficiencyCacheFrag | EfficiencyWorkingSet)
 
+// Scudo hardened allocator
+SANITIZER("scudo", Scudo)
+
 // Magic group, containing all sanitizers. For example, "-fno-sanitize=all"
 // can be used to disable all the sanitizers.
 SANITIZER_GROUP("all", All, ~0ULL)
diff --git a/include/clang/Basic/Sanitizers.h b/include/clang/Basic/Sanitizers.h
index 5317720..1b936c7 100644
--- a/include/clang/Basic/Sanitizers.h
+++ b/include/clang/Basic/Sanitizers.h
@@ -80,7 +80,7 @@
 SanitizerMask expandSanitizerGroups(SanitizerMask Kinds);
 
 /// Return the sanitizers which do not affect preprocessing.
-static inline SanitizerMask getPPTransparentSanitizers() {
+inline SanitizerMask getPPTransparentSanitizers() {
   return SanitizerKind::CFI | SanitizerKind::Integer |
          SanitizerKind::Nullability | SanitizerKind::Undefined;
 }
diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h
index 7418b50..a83f95e 100644
--- a/include/clang/Basic/SourceLocation.h
+++ b/include/clang/Basic/SourceLocation.h
@@ -1,4 +1,4 @@
-//===--- SourceLocation.h - Compact identifier for Source Files -*- C++ -*-===//
+//===- SourceLocation.h - Compact identifier for Source Files ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,28 +6,29 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::SourceLocation class and associated facilities.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_SOURCELOCATION_H
 #define LLVM_CLANG_BASIC_SOURCELOCATION_H
 
 #include "clang/Basic/LLVM.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <cassert>
-#include <functional>
+#include <cstdint>
 #include <string>
 #include <utility>
 
 namespace llvm {
-  class MemoryBuffer;
-  template <typename T> struct DenseMapInfo;
-  template <typename T> struct isPodLike;
-}
+
+template <typename T> struct DenseMapInfo;
+template <typename T> struct isPodLike;
+
+} // namespace llvm
 
 namespace clang {
 
@@ -56,19 +57,19 @@
   unsigned getHashValue() const { return static_cast<unsigned>(ID); }
 
 private:
-  friend class SourceManager;
   friend class ASTWriter;
   friend class ASTReader;
+  friend class SourceManager;
   
   static FileID get(int V) {
     FileID F;
     F.ID = V;
     return F;
   }
+
   int getOpaqueValue() const { return ID; }
 };
 
-
 /// \brief Encodes a location in the source. The SourceManager can decode this
 /// to get at the full include stack, line and column information.
 ///
@@ -85,10 +86,12 @@
 ///
 /// It is important that this type remains small. It is currently 32 bits wide.
 class SourceLocation {
-  unsigned ID = 0;
-  friend class SourceManager;
   friend class ASTReader;
   friend class ASTWriter;
+  friend class SourceManager;
+
+  unsigned ID = 0;
+
   enum : unsigned {
     MacroIDBit = 1U << 31
   };
@@ -124,8 +127,8 @@
     L.ID = MacroIDBit | ID;
     return L;
   }
-public:
 
+public:
   /// \brief Return a source location with the specified offset from this
   /// SourceLocation.
   SourceLocation getLocWithOffset(int Offset) const {
@@ -245,6 +248,7 @@
   static CharSourceRange getTokenRange(SourceLocation B, SourceLocation E) {
     return getTokenRange(SourceRange(B, E));
   }
+
   static CharSourceRange getCharRange(SourceLocation B, SourceLocation E) {
     return getCharRange(SourceRange(B, E));
   }
@@ -274,12 +278,12 @@
 ///
 /// You can get a PresumedLoc from a SourceLocation with SourceManager.
 class PresumedLoc {
-  const char *Filename;
+  const char *Filename = nullptr;
   unsigned Line, Col;
   SourceLocation IncludeLoc;
 
 public:
-  PresumedLoc() : Filename(nullptr) {}
+  PresumedLoc() = default;
   PresumedLoc(const char *FN, unsigned Ln, unsigned Co, SourceLocation IL)
       : Filename(FN), Line(Ln), Col(Co), IncludeLoc(IL) {}
 
@@ -336,7 +340,7 @@
   FullSourceLoc() = default;
 
   explicit FullSourceLoc(SourceLocation Loc, const SourceManager &SM)
-    : SourceLocation(Loc), SrcMgr(&SM) {}
+      : SourceLocation(Loc), SrcMgr(&SM) {}
 
   bool hasManager() const {
       bool hasSrcMgr =  SrcMgr != nullptr;
@@ -415,32 +419,31 @@
   /// This is useful for debugging.
   void dump() const;
 
-  friend inline bool
+  friend bool
   operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
     return LHS.getRawEncoding() == RHS.getRawEncoding() &&
           LHS.SrcMgr == RHS.SrcMgr;
   }
 
-  friend inline bool
+  friend bool
   operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
     return !(LHS == RHS);
   }
-
 };
 
-
-
-}  // end namespace clang
+} // namespace clang
 
 namespace llvm {
+
   /// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and
   /// DenseSets.
   template <>
   struct DenseMapInfo<clang::FileID> {
-    static inline clang::FileID getEmptyKey() {
-      return clang::FileID();
+    static clang::FileID getEmptyKey() {
+      return {};
     }
-    static inline clang::FileID getTombstoneKey() {
+
+    static clang::FileID getTombstoneKey() {
       return clang::FileID::getSentinel();
     }
 
@@ -461,15 +464,17 @@
   // Teach SmallPtrSet how to handle SourceLocation.
   template<>
   struct PointerLikeTypeTraits<clang::SourceLocation> {
-    static inline void *getAsVoidPointer(clang::SourceLocation L) {
+    enum { NumLowBitsAvailable = 0 };
+
+    static void *getAsVoidPointer(clang::SourceLocation L) {
       return L.getPtrEncoding();
     }
-    static inline clang::SourceLocation getFromVoidPointer(void *P) {
+
+    static clang::SourceLocation getFromVoidPointer(void *P) {
       return clang::SourceLocation::getFromRawEncoding((unsigned)(uintptr_t)P);
     }
-    enum { NumLowBitsAvailable = 0 };
   };
 
-}  // end namespace llvm
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_BASIC_SOURCELOCATION_H
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h
index 16bd561..17601b8 100644
--- a/include/clang/Basic/SourceManager.h
+++ b/include/clang/Basic/SourceManager.h
@@ -1,4 +1,4 @@
-//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
+//===- SourceManager.h - Track and cache source files -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,7 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the SourceManager interface.
 ///
@@ -29,14 +29,13 @@
 /// location in the source where the macro was originally defined,
 /// and the presumed location is where the line directive states that
 /// the line is 17, or any other line.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_SOURCEMANAGER_H
 #define LLVM_CLANG_BASIC_SOURCEMANAGER_H
 
 #include "clang/Basic/FileManager.h"
-#include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
@@ -49,10 +48,8 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
-#include <cstdint>
 #include <map>
 #include <memory>
 #include <string>
@@ -69,7 +66,6 @@
 
 /// \brief Public enums and private classes that are part of the
 /// SourceManager implementation.
-///
 namespace SrcMgr {
 
   /// \brief Indicates whether a file or directory holds normal user code,
@@ -100,6 +96,7 @@
     enum CCFlags {
       /// \brief Whether the buffer is invalid.
       InvalidFlag = 0x01,
+
       /// \brief Whether the buffer should not be freed on destruction.
       DoNotFreeFlag = 0x02
     };
@@ -130,12 +127,12 @@
     ///
     /// This is lazily computed.  This is owned by the SourceManager
     /// BumpPointerAllocator object.
-    unsigned *SourceLineCache;
+    unsigned *SourceLineCache = nullptr;
 
     /// \brief The number of lines in this ContentCache.
     ///
     /// This is only valid if SourceLineCache is non-null.
-    unsigned NumLines;
+    unsigned NumLines = 0;
 
     /// \brief Indicates whether the buffer itself was provided to override
     /// the actual file contents.
@@ -157,15 +154,14 @@
 
     ContentCache(const FileEntry *Ent, const FileEntry *contentEnt)
       : Buffer(nullptr, false), OrigEntry(Ent), ContentsEntry(contentEnt),
-        SourceLineCache(nullptr), NumLines(0), BufferOverridden(false),
-        IsSystemFile(false), IsTransient(false) {}
+        BufferOverridden(false), IsSystemFile(false), IsTransient(false) {}
     
     /// The copy ctor does not allow copies where source object has either
     /// a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
     /// is not transferred, so this is a logical error.
     ContentCache(const ContentCache &RHS)
-      : Buffer(nullptr, false), SourceLineCache(nullptr),
-        BufferOverridden(false), IsSystemFile(false), IsTransient(false) {
+      : Buffer(nullptr, false), BufferOverridden(false), IsSystemFile(false),
+        IsTransient(false) {
       OrigEntry = RHS.OrigEntry;
       ContentsEntry = RHS.ContentsEntry;
 
@@ -246,6 +242,10 @@
   /// FileInfos contain a "ContentCache *", with the contents of the file.
   ///
   class FileInfo {
+    friend class clang::SourceManager;
+    friend class clang::ASTWriter;
+    friend class clang::ASTReader;
+
     /// \brief The location of the \#include that brought in this file.
     ///
     /// This is an invalid SLOC for the main file (top of the \#include chain).
@@ -264,10 +264,6 @@
     llvm::PointerIntPair<const ContentCache*, 3, CharacteristicKind>
         ContentAndKind;
 
-    friend class clang::SourceManager;
-    friend class clang::ASTWriter;
-    friend class clang::ASTReader;
-
   public:
     /// \brief Return a FileInfo object.
     static FileInfo get(SourceLocation IL, const ContentCache *Con,
@@ -448,7 +444,7 @@
     }
   };
 
-}  // end SrcMgr namespace.
+} // namespace SrcMgr
 
 /// \brief External source of source location entries.
 class ExternalSLocEntrySource {
@@ -546,7 +542,7 @@
 /// \brief The stack used when building modules on demand, which is used
 /// to provide a link between the source managers of the different compiler
 /// instances.
-typedef ArrayRef<std::pair<std::string, FullSourceLoc>> ModuleBuildStack;
+using ModuleBuildStack = ArrayRef<std::pair<std::string, FullSourceLoc>>;
 
 /// \brief This class handles loading and caching of source files into memory.
 ///
@@ -578,7 +574,7 @@
 
   /// \brief True if the ContentCache for files that are overridden by other
   /// files, should report the original file name. Defaults to true.
-  bool OverridenFilesKeepOriginalName;
+  bool OverridenFilesKeepOriginalName = true;
 
   /// \brief True if non-system source files should be treated as volatile
   /// (likely to change while trying to use them). Defaults to false.
@@ -587,12 +583,13 @@
   /// \brief True if all files read during this compilation should be treated
   /// as transient (may not be present in later compilations using a module
   /// file created from this compilation). Defaults to false.
-  bool FilesAreTransient;
+  bool FilesAreTransient = false;
 
   struct OverriddenFilesInfoTy {
     /// \brief Files that have been overridden with the contents from another
     /// file.
     llvm::DenseMap<const FileEntry *, const FileEntry *> OverriddenFiles;
+
     /// \brief Files that were overridden with a memory buffer.
     llvm::DenseSet<const FileEntry *> OverriddenFilesWithBuffer;
   };
@@ -647,7 +644,7 @@
   llvm::BitVector SLocEntryLoaded;
 
   /// \brief An external source for source location entries.
-  ExternalSLocEntrySource *ExternalSLocEntries;
+  ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
 
   /// \brief A one-entry cache to speed up getFileID.
   ///
@@ -658,7 +655,7 @@
   /// \brief Holds information for \#line directives.
   ///
   /// This is referenced by indices from SLocEntryTable.
-  LineTableInfo *LineTable;
+  LineTableInfo *LineTable = nullptr;
 
   /// \brief These ivars serve as a cache used in the getLineNumber
   /// method which is used to speedup getLineNumber calls to nearby locations.
@@ -674,7 +671,8 @@
   FileID PreambleFileID;
 
   // Statistics for -print-stats.
-  mutable unsigned NumLinearScans, NumBinaryProbes;
+  mutable unsigned NumLinearScans = 0;
+  mutable unsigned NumBinaryProbes = 0;
 
   /// \brief Associates a FileID with its "included/expanded in" decomposed
   /// location.
@@ -684,12 +682,12 @@
   mutable llvm::DenseMap<FileID, std::pair<FileID, unsigned>> IncludedLocMap;
 
   /// The key value into the IsBeforeInTUCache table.
-  typedef std::pair<FileID, FileID> IsBeforeInTUCacheKey;
+  using IsBeforeInTUCacheKey = std::pair<FileID, FileID>;
 
   /// The IsBeforeInTranslationUnitCache is a mapping from FileID pairs
   /// to cache results.
-  typedef llvm::DenseMap<IsBeforeInTUCacheKey, InBeforeInTUCacheEntry>
-          InBeforeInTUCache;
+  using InBeforeInTUCache =
+      llvm::DenseMap<IsBeforeInTUCacheKey, InBeforeInTUCacheEntry>;
 
   /// Cache results for the isBeforeInTranslationUnit method.
   mutable InBeforeInTUCache IBTUCache;
@@ -706,7 +704,7 @@
 
   /// \brief Lazily computed map of macro argument chunks to their expanded
   /// source location.
-  typedef std::map<unsigned, SourceLocation> MacroArgsMap;
+  using MacroArgsMap = std::map<unsigned, SourceLocation>;
 
   mutable llvm::DenseMap<FileID, std::unique_ptr<MacroArgsMap>>
       MacroArgsCacheMap;
@@ -1139,6 +1137,18 @@
   /// be used by clients.
   SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const;
 
+  /// \brief Form a SourceLocation from a FileID and Offset pair.
+  SourceLocation getComposedLoc(FileID FID, unsigned Offset) const {
+    bool Invalid = false;
+    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
+    if (Invalid)
+      return SourceLocation();
+
+    unsigned GlobalOffset = Entry.getOffset() + Offset;
+    return Entry.isFile() ? SourceLocation::getFileLoc(GlobalOffset)
+                          : SourceLocation::getMacroLoc(GlobalOffset);
+  }
+
   /// \brief Decompose the specified location into a raw FileID + Offset pair.
   ///
   /// The first element is the FileID, the second is the offset from the
@@ -1417,7 +1427,6 @@
   //===--------------------------------------------------------------------===//
 
   /// \brief Return the uniqued ID for the specified filename.
-  ///
   unsigned getLineTableFilenameID(StringRef Str);
 
   /// \brief Add a line note to the line table for the FileID and offset
@@ -1538,8 +1547,9 @@
   }
 
   // Iterators over FileInfos.
-  typedef llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>
-      ::const_iterator fileinfo_iterator;
+  using fileinfo_iterator =
+      llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::const_iterator;
+
   fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); }
   fileinfo_iterator fileinfo_end() const { return FileInfos.end(); }
   bool hasFileInfo(const FileEntry *File) const {
@@ -1547,7 +1557,6 @@
   }
 
   /// \brief Print statistics to stderr.
-  ///
   void PrintStats() const;
 
   void dump() const;
@@ -1637,7 +1646,13 @@
     return getImmediateExpansionRange(Loc).first;
   }
 
+  /// \return Location of the top-level macro caller.
+  SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const;
+
 private:
+  friend class ASTReader;
+  friend class ASTWriter;
+
   llvm::MemoryBuffer *getFakeBufferForRecovery() const;
   const SrcMgr::ContentCache *getFakeContentCacheForRecovery() const;
 
@@ -1729,8 +1744,6 @@
                                          SourceLocation SpellLoc,
                                          SourceLocation ExpansionLoc,
                                          unsigned ExpansionLength) const;
-  friend class ASTReader;
-  friend class ASTWriter;
 };
 
 /// \brief Comparison function object.
@@ -1743,7 +1756,7 @@
   SourceManager &SM;
 
 public:
-  explicit BeforeThanCompare(SourceManager &SM) : SM(SM) { }
+  explicit BeforeThanCompare(SourceManager &SM) : SM(SM) {}
 
   bool operator()(SourceLocation LHS, SourceLocation RHS) const {
     return SM.isBeforeInTranslationUnit(LHS, RHS);
@@ -1756,13 +1769,13 @@
   SourceManager &SM;
 
 public:
-  explicit BeforeThanCompare(SourceManager &SM) : SM(SM) { }
+  explicit BeforeThanCompare(SourceManager &SM) : SM(SM) {}
 
   bool operator()(SourceRange LHS, SourceRange RHS) const {
     return SM.isBeforeInTranslationUnit(LHS.getBegin(), RHS.getBegin());
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_BASIC_SOURCEMANAGER_H
diff --git a/include/clang/Basic/SourceManagerInternals.h b/include/clang/Basic/SourceManagerInternals.h
index 9403dea..edd910e 100644
--- a/include/clang/Basic/SourceManagerInternals.h
+++ b/include/clang/Basic/SourceManagerInternals.h
@@ -1,4 +1,4 @@
-//===--- SourceManagerInternals.h - SourceManager Internals -----*- C++ -*-===//
+//===- SourceManagerInternals.h - SourceManager Internals -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines implementation details of the clang::SourceManager class.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_BASIC_SOURCEMANAGERINTERNALS_H
@@ -18,7 +18,11 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
 #include <map>
+#include <vector>
 
 namespace clang {
 
@@ -86,7 +90,8 @@
 
   /// \brief Map from FileIDs to a list of line entries (sorted by the offset
   /// at which they occur in the file).
-  std::map<FileID, std::vector<LineEntry> > LineEntries;
+  std::map<FileID, std::vector<LineEntry>> LineEntries;
+
 public:
   void clear() {
     FilenameIDs.clear();
@@ -95,10 +100,12 @@
   }
 
   unsigned getLineTableFilenameID(StringRef Str);
+
   StringRef getFilename(unsigned ID) const {
     assert(ID < FilenamesByID.size() && "Invalid FilenameID");
     return FilenamesByID[ID]->getKey();
   }
+
   unsigned getNumFilenames() const { return FilenamesByID.size(); }
 
   void AddLineNote(FileID FID, unsigned Offset,
@@ -112,7 +119,8 @@
   const LineEntry *FindNearestLineEntry(FileID FID, unsigned Offset);
 
   // Low-level access
-  typedef std::map<FileID, std::vector<LineEntry> >::iterator iterator;
+  using iterator = std::map<FileID, std::vector<LineEntry>>::iterator;
+
   iterator begin() { return LineEntries.begin(); }
   iterator end() { return LineEntries.end(); }
 
@@ -121,6 +129,6 @@
   void AddEntry(FileID FID, const std::vector<LineEntry> &Entries);
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_BASIC_SOURCEMANAGERINTERNALS_H
diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h
index 046c331..96e51c9 100644
--- a/include/clang/Basic/TargetInfo.h
+++ b/include/clang/Basic/TargetInfo.h
@@ -46,7 +46,6 @@
 class QualType;
 class SourceLocation;
 class SourceManager;
-class Type;
 
 namespace Builtin { struct Info; }
 
@@ -60,6 +59,7 @@
   // values are specified by the TargetInfo constructor.
   bool BigEndian;
   bool TLSSupported;
+  bool VLASupported;
   bool NoAsmVariants;  // True if {|} are normal characters.
   bool HasFloat128;
   unsigned char PointerWidth, PointerAlign;
@@ -358,7 +358,7 @@
 
   /// \brief Determine whether the __int128 type is supported on this target.
   virtual bool hasInt128Type() const {
-    return getPointerWidth(0) >= 64;
+    return (getPointerWidth(0) >= 64) || getTargetOpts().ForceEnableInt128;
   } // FIXME
 
   /// \brief Determine whether the __float128 type is supported on this target.
@@ -559,6 +559,14 @@
     return ComplexLongDoubleUsesFP2Ret;
   }
 
+  /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used
+  /// to convert to and from __fp16.
+  /// FIXME: This function should be removed once all targets stop using the
+  /// conversion intrinsics.
+  virtual bool useFP16ConversionIntrinsics() const {
+    return true;
+  }
+
   /// \brief Specify if mangling based on address space map should be used or
   /// not for language specific address spaces
   bool useAddressSpaceMapMangling() const {
@@ -615,9 +623,9 @@
   /// ReturnCanonical = true and Name = "rax", will return "ax".
   StringRef getNormalizedGCCRegisterName(StringRef Name,
                                          bool ReturnCanonical = false) const;
- 
-  virtual StringRef getConstraintRegister(const StringRef &Constraint,
-                                          const StringRef &Expression) const {
+
+  virtual StringRef getConstraintRegister(StringRef Constraint,
+                                          StringRef Expression) const {
     return "";
   }
 
@@ -859,9 +867,12 @@
     return false;
   }
 
+  /// Fill a SmallVectorImpl with the valid values to setCPU.
+  virtual void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {}
+
   /// brief Determine whether this TargetInfo supports the given CPU name.
   virtual bool isValidCPUName(StringRef Name) const {
-    return false;
+    return true;
   }
 
   /// \brief Use the specified ABI.
@@ -888,7 +899,7 @@
 
   /// \brief Determine whether this TargetInfo supports the given feature.
   virtual bool isValidFeatureName(StringRef Feature) const {
-    return false;
+    return true;
   }
 
   /// \brief Perform initialization based on the user configured
@@ -912,10 +923,20 @@
     return false;
   }
 
+  /// \brief Identify whether this taret supports multiversioning of functions,
+  /// which requires support for cpu_supports and cpu_is functionality.
+  virtual bool supportsMultiVersioning() const { return false; }
+
   // \brief Validate the contents of the __builtin_cpu_supports(const char*)
   // argument.
   virtual bool validateCpuSupports(StringRef Name) const { return false; }
 
+  // \brief Return the target-specific priority for features/cpus/vendors so
+  // that they can be properly sorted for checking.
+  virtual unsigned multiVersionSortPriority(StringRef Name) const {
+    return 0;
+  }
+
   // \brief Validate the contents of the __builtin_cpu_is(const char*)
   // argument.
   virtual bool validateCpuIs(StringRef Name) const { return false; }
@@ -939,6 +960,9 @@
     return MaxTLSAlign;
   }
 
+  /// \brief Whether target supports variable-length arrays.
+  bool isVLASupported() const { return VLASupported; }
+
   /// \brief Whether the target supports SEH __try.
   bool isSEHTrySupported() const {
     return getTriple().isOSWindows() &&
@@ -1030,6 +1054,18 @@
     return false;
   }
 
+  /// Check if the target supports CFProtection branch.
+  virtual bool
+  checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const {
+    return false;
+  }
+
+  /// Check if the target supports CFProtection branch.
+  virtual bool
+  checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const {
+    return false;
+  }
+
   /// \brief Whether target allows to overalign ABI-specified preferred alignment
   virtual bool allowsLargerPreferedTypeAlignment() const { return true; }
 
@@ -1053,8 +1089,19 @@
       return getTargetOpts().SupportedOpenCLOptions;
   }
 
+  enum OpenCLTypeKind {
+    OCLTK_Default,
+    OCLTK_ClkEvent,
+    OCLTK_Event,
+    OCLTK_Image,
+    OCLTK_Pipe,
+    OCLTK_Queue,
+    OCLTK_ReserveID,
+    OCLTK_Sampler,
+  };
+
   /// \brief Get address space for OpenCL type.
-  virtual LangAS getOpenCLTypeAddrSpace(const Type *T) const;
+  virtual LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const;
 
   /// \returns Target specific vtbl ptr address space.
   virtual unsigned getVtblPtrAddressSpace() const {
diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h
index 9bb19c7..bd57393 100644
--- a/include/clang/Basic/TargetOptions.h
+++ b/include/clang/Basic/TargetOptions.h
@@ -54,14 +54,15 @@
   /// be a list of strings starting with by '+' or '-'.
   std::vector<std::string> Features;
 
-  std::vector<std::string> Reciprocals;
-
   /// Supported OpenCL extensions and optional core features.
   OpenCLOptions SupportedOpenCLOptions;
 
   /// \brief The list of OpenCL extensions to enable or disable, as written on
   /// the command line.
   std::vector<std::string> OpenCLExtensionsAsWritten;
+
+  /// \brief If given, enables support for __int128_t and __uint128_t types.
+  bool ForceEnableInt128 = false;
 };
 
 }  // end namespace clang
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index 4f5e515..f18e06f 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -194,6 +194,7 @@
 PUNCTUATOR(lessless,            "<<")
 PUNCTUATOR(lessequal,           "<=")
 PUNCTUATOR(lesslessequal,       "<<=")
+PUNCTUATOR(spaceship,           "<=>")
 PUNCTUATOR(greater,             ">")
 PUNCTUATOR(greatergreater,      ">>")
 PUNCTUATOR(greaterequal,        ">=")
@@ -455,11 +456,14 @@
 TYPE_TRAIT_1(__is_polymorphic, IsPolymorphic, KEYCXX)
 TYPE_TRAIT_1(__is_trivial, IsTrivial, KEYCXX)
 TYPE_TRAIT_1(__is_union, IsUnion, KEYCXX)
+TYPE_TRAIT_1(__has_unique_object_representations,
+             HasUniqueObjectRepresentations, KEYCXX)
 
 // Clang-only C++ Type Traits
 TYPE_TRAIT_N(__is_trivially_constructible, IsTriviallyConstructible, KEYCXX)
 TYPE_TRAIT_1(__is_trivially_copyable, IsTriviallyCopyable, KEYCXX)
 TYPE_TRAIT_2(__is_trivially_assignable, IsTriviallyAssignable, KEYCXX)
+TYPE_TRAIT_2(__reference_binds_to_temporary, ReferenceBindsToTemporary, KEYCXX)
 KEYWORD(__underlying_type           , KEYCXX)
 
 // Embarcadero Expression Traits
diff --git a/include/clang/Basic/TypeTraits.h b/include/clang/Basic/TypeTraits.h
index 6aadf795..509e8b4 100644
--- a/include/clang/Basic/TypeTraits.h
+++ b/include/clang/Basic/TypeTraits.h
@@ -70,7 +70,8 @@
     UTT_IsUnsigned,
     UTT_IsVoid,
     UTT_IsVolatile,
-    UTT_Last = UTT_IsVolatile,
+    UTT_HasUniqueObjectRepresentations,
+    UTT_Last = UTT_HasUniqueObjectRepresentations,
     BTT_IsBaseOf,
     BTT_IsConvertible,
     BTT_IsConvertibleTo,
@@ -79,7 +80,8 @@
     BTT_IsAssignable,
     BTT_IsNothrowAssignable,
     BTT_IsTriviallyAssignable,
-    BTT_Last = BTT_IsTriviallyAssignable,
+    BTT_ReferenceBindsToTemporary,
+    BTT_Last = BTT_ReferenceBindsToTemporary,
     TT_IsConstructible,
     TT_IsNothrowConstructible,
     TT_IsTriviallyConstructible
diff --git a/include/clang/Basic/VirtualFileSystem.h b/include/clang/Basic/VirtualFileSystem.h
index e52b345..245452d 100644
--- a/include/clang/Basic/VirtualFileSystem.h
+++ b/include/clang/Basic/VirtualFileSystem.h
@@ -319,16 +319,30 @@
   explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
   ~InMemoryFileSystem() override;
 
-  /// Add a buffer to the VFS with a path. The VFS owns the buffer.
-  /// \return true if the file was successfully added, false if the file already
-  /// exists in the file system with different contents.
+  /// Add a file containing a buffer or a directory to the VFS with a
+  /// path. The VFS owns the buffer.  If present, User, Group, Type
+  /// and Perms apply to the newly-created file or directory.
+  /// \return true if the file or directory was successfully added,
+  /// false if the file or directory already exists in the file system with
+  /// different contents.
   bool addFile(const Twine &Path, time_t ModificationTime,
-               std::unique_ptr<llvm::MemoryBuffer> Buffer);
+               std::unique_ptr<llvm::MemoryBuffer> Buffer,
+               Optional<uint32_t> User = None, Optional<uint32_t> Group = None,
+               Optional<llvm::sys::fs::file_type> Type = None,
+               Optional<llvm::sys::fs::perms> Perms = None);
   /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
-  /// \return true if the file was successfully added, false if the file already
-  /// exists in the file system with different contents.
+  /// If present, User, Group, Type and Perms apply to the newly-created file
+  /// or directory.
+  /// \return true if the file or directory was successfully added,
+  /// false if the file or directory already exists in the file system with
+  /// different contents.
   bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
-                    llvm::MemoryBuffer *Buffer);
+                    llvm::MemoryBuffer *Buffer,
+                    Optional<uint32_t> User = None,
+                    Optional<uint32_t> Group = None,
+                    Optional<llvm::sys::fs::file_type> Type = None,
+                    Optional<llvm::sys::fs::perms> Perms = None);
+
   std::string toString() const;
   /// Return true if this file system normalizes . and .. in paths.
   bool useNormalizedPaths() const { return UseNormalizedPaths; }
diff --git a/include/clang/Basic/X86Target.def b/include/clang/Basic/X86Target.def
new file mode 100644
index 0000000..0bb7589
--- /dev/null
+++ b/include/clang/Basic/X86Target.def
@@ -0,0 +1,277 @@
+//===--- X86Target.def - X86 Feature/Processor Database ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific Features and Processors, as used by
+// the X86 Targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROC_WITH_FEAT
+#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE)                      \
+  PROC(ENUM, STRING, IS64BIT)
+#endif
+
+#ifndef PROC
+#define PROC(ENUM, STRING, IS64BIT)
+#endif
+
+#ifndef PROC_ALIAS
+#define PROC_ALIAS(ENUM, ALIAS)
+#endif
+
+#ifndef FEATURE
+#define FEATURE(ENUM)
+#endif
+
+#define PROC_64_BIT true
+#define PROC_32_BIT false
+
+/// \name i386
+/// i386-generation processors.
+//@{
+PROC(i386, "i386", PROC_32_BIT)
+//@}
+
+/// \name i486
+/// i486-generation processors.
+//@{
+PROC(i486, "i486", PROC_32_BIT)
+PROC(WinChipC6, "winchip-c6", PROC_32_BIT)
+PROC(WinChip2, "winchip2", PROC_32_BIT)
+PROC(C3, "c3", PROC_32_BIT)
+//@}
+
+/// \name i586
+/// i586-generation processors, P5 microarchitecture based.
+//@{
+PROC(i586, "i586", PROC_32_BIT)
+PROC(Pentium, "pentium", PROC_32_BIT)
+PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT)
+//@}
+
+/// \name i686
+/// i686-generation processors, P6 / Pentium M microarchitecture based.
+//@{
+PROC(PentiumPro, "pentiumpro", PROC_32_BIT)
+PROC_ALIAS(PentiumPro, "i686")
+PROC(Pentium2, "pentium2", PROC_32_BIT)
+PROC(Pentium3, "pentium3", PROC_32_BIT)
+PROC_ALIAS(Pentium3, "pentium3m")
+PROC(PentiumM, "pentium-m", PROC_32_BIT)
+PROC(C3_2, "c3-2", PROC_32_BIT)
+
+/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
+/// Clang however has some logic to support this.
+// FIXME: Warn, deprecate, and potentially remove this.
+PROC(Yonah, "yonah", PROC_32_BIT)
+//@}
+
+/// \name Netburst
+/// Netburst microarchitecture based processors.
+//@{
+PROC(Pentium4, "pentium4", PROC_32_BIT)
+PROC_ALIAS(Pentium4, "pentium4m")
+
+PROC(Prescott, "prescott", PROC_32_BIT)
+PROC(Nocona, "nocona", PROC_64_BIT)
+//@}
+
+/// \name Core
+/// Core microarchitecture based processors.
+//@{
+PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3)
+
+/// This enumerator, like Yonah, is a bit odd. It is another
+/// codename which GCC no longer accepts as an option to -march, but Clang
+/// has some logic for recognizing it.
+// FIXME: Warn, deprecate, and potentially remove this.
+PROC(Penryn, "penryn", PROC_64_BIT)
+//@}
+
+/// \name Atom
+/// Atom processors
+//@{
+PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3)
+PROC_ALIAS(Bonnell, "atom")
+
+PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2)
+PROC_ALIAS(Silvermont, "slm")
+
+PROC(Goldmont, "goldmont", PROC_64_BIT)
+//@}
+
+/// \name Nehalem
+/// Nehalem microarchitecture based processors.
+PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2)
+PROC_ALIAS(Nehalem, "corei7")
+
+/// \name Westmere
+/// Westmere microarchitecture based processors.
+PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL)
+
+/// \name Sandy Bridge
+/// Sandy Bridge microarchitecture based processors.
+PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX)
+PROC_ALIAS(SandyBridge, "corei7-avx")
+
+/// \name Ivy Bridge
+/// Ivy Bridge microarchitecture based processors.
+PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX)
+PROC_ALIAS(IvyBridge, "core-avx-i")
+
+/// \name Haswell
+/// Haswell microarchitecture based processors.
+PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2)
+PROC_ALIAS(Haswell, "core-avx2")
+
+/// \name Broadwell
+/// Broadwell microarchitecture based processors.
+PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2)
+
+/// \name Skylake Client
+/// Skylake client microarchitecture based processors.
+PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2)
+
+/// \name Skylake Server
+/// Skylake server microarchitecture based processors.
+PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F)
+PROC_ALIAS(SkylakeServer, "skx")
+
+/// \name Cannonlake Client
+/// Cannonlake client microarchitecture based processors.
+PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI)
+
+/// \name Icelake Client
+/// Icelake client microarchitecture based processors.
+PROC(Icelake, "icelake", PROC_64_BIT)
+
+/// \name Knights Landing
+/// Knights Landing processor.
+PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F)
+
+/// \name Knights Mill
+/// Knights Mill processor.
+PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS)
+
+/// \name Lakemont
+/// Lakemont microarchitecture based processors.
+PROC(Lakemont, "lakemont", PROC_32_BIT)
+
+/// \name K6
+/// K6 architecture processors.
+//@{
+PROC(K6, "k6", PROC_32_BIT)
+PROC(K6_2, "k6-2", PROC_32_BIT)
+PROC(K6_3, "k6-3", PROC_32_BIT)
+//@}
+
+/// \name K7
+/// K7 architecture processors.
+//@{
+PROC(Athlon, "athlon", PROC_32_BIT)
+PROC_ALIAS(Athlon, "athlon-tbird")
+
+PROC(AthlonXP, "athlon-xp", PROC_32_BIT)
+PROC_ALIAS(AthlonXP, "athlon-mp")
+PROC_ALIAS(AthlonXP, "athlon-4")
+//@}
+
+/// \name K8
+/// K8 architecture processors.
+//@{
+PROC(K8, "k8", PROC_64_BIT)
+PROC_ALIAS(K8, "athlon64")
+PROC_ALIAS(K8, "athlon-fx")
+PROC_ALIAS(K8, "opteron")
+
+PROC(K8SSE3, "k8-sse3", PROC_64_BIT)
+PROC_ALIAS(K8SSE3, "athlon64-sse3")
+PROC_ALIAS(K8SSE3, "opteron-sse3")
+
+PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A)
+PROC_ALIAS(AMDFAM10, "barcelona")
+//@}
+
+/// \name Bobcat
+/// Bobcat architecture processors.
+//@{
+PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A)
+PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI)
+//@}
+
+/// \name Bulldozer
+/// Bulldozer architecture processors.
+//@{
+PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP)
+PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA)
+PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA)
+PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2)
+//@}
+
+/// \name zen
+/// Zen architecture processors.
+//@{
+PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2)
+//@}
+
+/// This specification is deprecated and will be removed in the future.
+/// Users should prefer K8.
+// FIXME: Warn on this when the CPU is set to it.
+//@{
+PROC(x86_64, "x86-64", PROC_64_BIT)
+//@}
+
+/// \name Geode
+/// Geode processors.
+//@{
+PROC(Geode, "geode", PROC_32_BIT)
+//@}
+
+// List of CPU Supports features in order.  These need to remain in the order
+// required by attribute 'target' checking.  Note that not all are supported/
+// prioritized by GCC, so synchronization with GCC's implementation may require
+// changing some existing values.
+FEATURE(FEATURE_CMOV)
+FEATURE(FEATURE_MMX)
+FEATURE(FEATURE_SSE)
+FEATURE(FEATURE_SSE2)
+FEATURE(FEATURE_SSE3)
+FEATURE(FEATURE_SSSE3)
+FEATURE(FEATURE_SSE4_A)
+FEATURE(FEATURE_SSE4_1)
+FEATURE(FEATURE_SSE4_2)
+FEATURE(FEATURE_POPCNT)
+FEATURE(FEATURE_AES)
+FEATURE(FEATURE_PCLMUL)
+FEATURE(FEATURE_AVX)
+FEATURE(FEATURE_BMI)
+FEATURE(FEATURE_FMA4)
+FEATURE(FEATURE_XOP)
+FEATURE(FEATURE_FMA)
+FEATURE(FEATURE_BMI2)
+FEATURE(FEATURE_AVX2)
+FEATURE(FEATURE_AVX512F)
+FEATURE(FEATURE_AVX512VL)
+FEATURE(FEATURE_AVX512BW)
+FEATURE(FEATURE_AVX512DQ)
+FEATURE(FEATURE_AVX512CD)
+FEATURE(FEATURE_AVX512ER)
+FEATURE(FEATURE_AVX512PF)
+FEATURE(FEATURE_AVX512VBMI)
+FEATURE(FEATURE_AVX512IFMA)
+FEATURE(FEATURE_AVX5124VNNIW)
+FEATURE(FEATURE_AVX5124FMAPS)
+FEATURE(FEATURE_AVX512VPOPCNTDQ)
+
+#undef PROC_64_BIT
+#undef PROC_32_BIT
+#undef FEATURE
+#undef PROC
+#undef PROC_ALIAS
+#undef PROC_WITH_FEAT
diff --git a/include/clang/Basic/arm_fp16.td b/include/clang/Basic/arm_fp16.td
new file mode 100644
index 0000000..5c7e437
--- /dev/null
+++ b/include/clang/Basic/arm_fp16.td
@@ -0,0 +1,131 @@
+//===--- arm_fp16.td - ARM FP16 compiler interface ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the TableGen definitions from which the ARM FP16 header
+//  file will be generated.
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_neon_incl.td"
+
+// ARMv8.2-A FP16 intrinsics.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
+
+  // Negate
+  def VNEGSH          : SInst<"vneg", "ss", "Sh">;
+
+  // Reciprocal/Sqrt
+  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  def FSQRTSH         : SInst<"vsqrt", "ss", "Sh">;
+  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Reciprocal Estimate
+  def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
+
+  // Reciprocal Exponent
+  def SCALAR_FRECPXH  : IInst<"vrecpx", "ss", "Sh">;
+
+  // Reciprocal Square Root Estimate
+  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
+
+  // Rounding
+  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
+  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
+  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
+  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
+  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
+  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
+  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+
+  // Conversion
+  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
+  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
+  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
+  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
+  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
+  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
+  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
+  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
+  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
+  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
+  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
+  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
+  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
+  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
+  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
+  def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
+  def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "bs", "Sh">;
+  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
+  def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
+  def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "$s", "Sh">;
+  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
+  def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
+  def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "bs", "Sh">;
+  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
+  def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
+  def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "$s", "Sh">;
+  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
+  def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
+  def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
+  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
+  def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
+
+  def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
+  def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
+  def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
+  def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
+  def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
+  def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
+  def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
+
+  // Comparison
+  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
+  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
+  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
+  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
+  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
+  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
+  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
+  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
+  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
+  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
+
+  // Absolute Compare Mask Greater Than Or Equal
+  def SCALAR_FACGEH   : IInst<"vcage", "bss", "Sh">;
+  def SCALAR_FACLEH   : IInst<"vcale", "bss", "Sh">;
+
+  // Absolute Compare Mask Greater Than
+  def SCALAR_FACGT    : IInst<"vcagt", "bss", "Sh">;
+  def SCALAR_FACLT    : IInst<"vcalt", "bss", "Sh">;
+
+  // Scalar Absolute Value
+  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
+
+  // Scalar Absolute Difference
+  def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
+
+  // Add/Sub
+  def VADDSH          : SInst<"vadd", "sss", "Sh">;
+  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
+
+  // Max/Min
+  def VMAXHS          : SInst<"vmax", "sss", "Sh">;
+  def VMINHS          : SInst<"vmin", "sss", "Sh">;
+  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
+  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
+
+  // Multiplication/Division
+  def VMULHS          : SInst<"vmul", "sss", "Sh">;
+  def MULXHS          : SInst<"vmulx", "sss", "Sh">;
+  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
+
+  // Vector fused multiply-add operations
+  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
+  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
+}
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td
index ad8d679..37aac58 100644
--- a/include/clang/Basic/arm_neon.td
+++ b/include/clang/Basic/arm_neon.td
@@ -11,308 +11,8 @@
 //  file will be generated.  See ARM document DUI0348B.
 //
 //===----------------------------------------------------------------------===//
-//
-// Each intrinsic is a subclass of the Inst class. An intrinsic can either
-// generate a __builtin_* call or it can expand to a set of generic operations.
-//
-// The operations are subclasses of Operation providing a list of DAGs, the
-// last of which is the return value. The available DAG nodes are documented
-// below.
-//
-//===----------------------------------------------------------------------===//
 
-// The base Operation class. All operations must subclass this.
-class Operation<list<dag> ops=[]> {
-  list<dag> Ops = ops;
-  bit Unavailable = 0;
-}
-// An operation that only contains a single DAG.
-class Op<dag op> : Operation<[op]>;
-// A shorter version of Operation - takes a list of DAGs. The last of these will
-// be the return value.
-class LOp<list<dag> ops> : Operation<ops>;
-
-// These defs and classes are used internally to implement the SetTheory
-// expansion and should be ignored.
-foreach Index = 0-63 in
-  def sv##Index;
-class MaskExpand;
-
-//===----------------------------------------------------------------------===//
-// Available operations
-//===----------------------------------------------------------------------===//
-
-// DAG arguments can either be operations (documented below) or variables.
-// Variables are prefixed with '$'. There are variables for each input argument,
-// with the name $pN, where N starts at zero. So the zero'th argument will be
-// $p0, the first $p1 etc.
-
-// op - Binary or unary operator, depending on the number of arguments. The
-//      operator itself is just treated as a raw string and is not checked.
-// example: (op "+", $p0, $p1) -> "__p0 + __p1".
-//          (op "-", $p0)      -> "-__p0"
-def op;
-// call - Invoke another intrinsic. The input types are type checked and
-//        disambiguated. If there is no intrinsic defined that takes
-//        the given types (or if there is a type ambiguity) an error is
-//        generated at tblgen time. The name of the intrinsic is the raw
-//        name as given to the Inst class (not mangled).
-// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
-//            (assuming $p0 has type int16x8_t).
-def call;
-// cast - Perform a cast to a different type. This gets emitted as a static
-//        C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
-//        "bitcast".
-//
-//        The syntax is (cast MOD* VAL). The last argument is the value to
-//        cast, preceded by a sequence of type modifiers. The target type
-//        starts off as the type of VAL, and is modified by MOD in sequence.
-//        The available modifiers are:
-//          - $X  - Take the type of parameter/variable X. For example:
-//                  (cast $p0, $p1) would cast $p1 to the type of $p0.
-//          - "R" - The type of the return type.
-//          - A typedef string - A NEON or stdint.h type that is then parsed.
-//                               for example: (cast "uint32x4_t", $p0).
-//          - "U" - Make the type unsigned.
-//          - "S" - Make the type signed.
-//          - "H" - Halve the number of lanes in the type.
-//          - "D" - Double the number of lanes in the type.
-//          - "8" - Convert type to an equivalent vector of 8-bit signed
-//                  integers.
-// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
-//           value is of type "int32x4_t".
-//          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
-//           has type float64x1_t or any other vector type of 64 bits).
-//          (cast "int32_t", $p2) -> "(int32_t)__p2"
-def cast;
-// bitcast - Same as "cast", except a reinterpret-cast is produced:
-//             (bitcast "T", $p0) -> "*(T*)&__p0".
-//           The VAL argument is saved to a temporary so it can be used
-//           as an l-value.
-def bitcast;
-// dup - Take a scalar argument and create a vector by duplicating it into
-//       all lanes. The type of the vector is the base type of the intrinsic.
-// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
-//          is uint32x2_t).
-def dup;
-// splat - Take a vector and a lane index, and return a vector of the same type
-//         containing repeated instances of the source vector at the lane index.
-// example: (splat $p0, $p1) ->
-//            "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
-//          (assuming __p0 has four elements).
-def splat;
-// save_temp - Create a temporary (local) variable. The variable takes a name
-//             based on the zero'th parameter and can be referenced using
-//             using that name in subsequent DAGs in the same
-//             operation. The scope of a temp is the operation. If a variable
-//             with the given name already exists, an error will be given at
-//             tblgen time.
-// example: [(save_temp $var, (call "foo", $p0)),
-//           (op "+", $var, $p1)] ->
-//              "int32x2_t __var = foo(__p0); return __var + __p1;"
-def save_temp;
-// name_replace - Return the name of the current intrinsic with the first
-//                argument replaced by the second argument. Raises an error if
-//                the first argument does not exist in the intrinsic name.
-// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
-//            version of this intrinsic).
-def name_replace;
-// literal - Create a literal piece of code. The code is treated as a raw
-//           string, and must be given a type. The type is a stdint.h or
-//           NEON intrinsic type as given to (cast).
-// example: (literal "int32_t", "0")
-def literal;
-// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
-//           The MASK argument is a set of elements. The elements are generated
-//           from the two special defs "mask0" and "mask1". "mask0" expands to
-//           the lane indices in sequence for ARG0, and "mask1" expands to
-//           the lane indices in sequence for ARG1. They can be used as-is, e.g.
-//
-//             (shuffle $p0, $p1, mask0) -> $p0
-//             (shuffle $p0, $p1, mask1) -> $p1
-//
-//           or, more usefully, they can be manipulated using the SetTheory
-//           operators plus some extra operators defined in the NEON emitter.
-//           The operators are described below.
-// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
-//            A concatenation of the high halves of the input vectors.
-def shuffle;
-
-// add, interleave, decimate: These set operators are vanilla SetTheory
-// operators and take their normal definition.
-def add;
-def interleave;
-def decimate;
-// rotl - Rotate set left by a number of elements.
-// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
-def rotl;
-// rotl - Rotate set right by a number of elements.
-// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
-def rotr;
-// highhalf - Take only the high half of the input.
-// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
-def highhalf;
-// highhalf - Take only the low half of the input.
-// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
-def lowhalf;
-// rev - Perform a variable-width reversal of the elements. The zero'th argument
-//       is a width in bits to reverse. The lanes this maps to is determined
-//       based on the element width of the underlying type.
-// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
-// example: (rev 32, mask0) -> [1, 0, 3, 2]             (if 16-bit elements)
-def rev;
-// mask0 - The initial sequence of lanes for shuffle ARG0
-def mask0 : MaskExpand;
-// mask0 - The initial sequence of lanes for shuffle ARG1
-def mask1 : MaskExpand;
-
-def OP_NONE  : Operation;
-def OP_UNAVAILABLE : Operation {
-  let Unavailable = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction definitions
-//===----------------------------------------------------------------------===//
-
-// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
-// a sequence of typespecs.
-//
-// The name is the base name of the intrinsic, for example "vget_lane". This is
-// then mangled by the tblgen backend to add type information ("vget_lane_s16").
-//
-// A typespec is a sequence of uppercase characters (modifiers) followed by one
-// lowercase character. A typespec encodes a particular "base type" of the
-// intrinsic.
-//
-// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
-// typespec codes are given below.
-//
-// The string given to an Inst class is a sequence of typespecs. The intrinsic
-// is instantiated for every typespec in the sequence. For example "sdQsQd".
-//
-// The prototype is a string that defines the return type of the intrinsic
-// and the type of each argument. The return type and every argument gets a
-// "modifier" that can change in some way the "base type" of the intrinsic.
-//
-// The modifier 'd' means "default" and does not modify the base type in any
-// way. The available modifiers are given below.
-//
-// Typespecs
-// ---------
-// c: char
-// s: short
-// i: int
-// l: long
-// k: 128-bit long
-// f: float
-// h: half-float
-// d: double
-//
-// Typespec modifiers
-// ------------------
-// S: scalar, only used for function mangling.
-// U: unsigned
-// Q: 128b
-// H: 128b without mangling 'q'
-// P: polynomial
-//
-// Prototype modifiers
-// -------------------
-// prototype: return (arg, arg, ...)
-//
-// v: void
-// t: best-fit integer (int/poly args)
-// x: signed integer   (int/float args)
-// u: unsigned integer (int/float args)
-// f: float (int args)
-// F: double (int args)
-// d: default
-// g: default, ignore 'Q' size modifier.
-// j: default, force 'Q' size modifier.
-// w: double width elements, same num elts
-// n: double width elements, half num elts
-// h: half width elements, double num elts
-// q: half width elements, quad num elts
-// e: half width elements, double num elts, unsigned
-// m: half width elements, same num elts
-// i: constant int
-// l: constant uint64
-// s: scalar of element type
-// z: scalar of half width element type, signed
-// r: scalar of double width element type, signed
-// a: scalar of element type (splat to vector type)
-// b: scalar of unsigned integer/long type (int/float args)
-// $: scalar of signed integer/long type (int/float args)
-// y: scalar of float
-// o: scalar of double
-// k: default elt width, double num elts
-// 2,3,4: array of default vectors
-// B,C,D: array of default elts, force 'Q' size modifier.
-// p: pointer type
-// c: const pointer type
-
-// Every intrinsic subclasses Inst.
-class Inst <string n, string p, string t, Operation o> {
-  string Name = n;
-  string Prototype = p;
-  string Types = t;
-  string ArchGuard = "";
-
-  Operation Operation = o;
-  bit CartesianProductOfTypes = 0;
-  bit BigEndianSafe = 0;
-  bit isShift = 0;
-  bit isScalarShift = 0;
-  bit isScalarNarrowShift = 0;
-  bit isVCVT_N = 0;
-  // For immediate checks: the immediate will be assumed to specify the lane of
-  // a Q register. Only used for intrinsics which end up calling polymorphic
-  // builtins.
-  bit isLaneQ = 0;
-
-  // Certain intrinsics have different names than their representative
-  // instructions. This field allows us to handle this correctly when we
-  // are generating tests.
-  string InstName = "";
-
-  // Certain intrinsics even though they are not a WOpInst or LOpInst,
-  // generate a WOpInst/LOpInst instruction (see below for definition
-  // of a WOpInst/LOpInst). For testing purposes we need to know
-  // this. Ex: vset_lane which outputs vmov instructions.
-  bit isHiddenWInst = 0;
-  bit isHiddenLInst = 0;
-}
-
-// The following instruction classes are implemented via builtins.
-// These declarations are used to generate Builtins.def:
-//
-// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
-// IInst: Instruction with generic integer suffix (e.g., "i8")
-// WInst: Instruction with only bit size suffix (e.g., "8")
-class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-
-// The following instruction classes are implemented via operators
-// instead of builtins. As such these declarations are only used for
-// the purpose of generating tests.
-//
-// SOpInst:       Instruction with signed/unsigned suffix (e.g., "s8",
-//                "u8", "p8").
-// IOpInst:       Instruction with generic integer suffix (e.g., "i8").
-// WOpInst:       Instruction with bit size only suffix (e.g., "8").
-// LOpInst:       Logical instruction with no bit size suffix.
-// NoTestOpInst:  Intrinsic that has no corresponding instruction.
-class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
-
-//===----------------------------------------------------------------------===//
-// Operations
-//===----------------------------------------------------------------------===//
+include "arm_neon_incl.td"
 
 def OP_ADD      : Op<(op "+", $p0, $p1)>;
 def OP_ADDL     : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>;
@@ -345,6 +45,7 @@
                                          (call "vget_high", $p2))>;
 def OP_MLSLHi_N : Op<(call "vmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
 def OP_MUL_N    : Op<(op "*", $p0, (dup $p1))>;
+def OP_MULX_N   : Op<(call "vmulx", $p0, (dup $p1))>;
 def OP_MLA_N    : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>;
 def OP_MLS_N    : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>;
 def OP_FMLA_N   : Op<(call "vfma", $p0, $p1, (dup $p2))>;
@@ -1661,3 +1362,186 @@
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 }
+
+// ARMv8.2-A FP16 intrinsics.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in {
+
+  // ARMv8.2-A FP16 one-operand vector intrinsics.
+
+  // Comparison
+  def CMEQH    : SInst<"vceqz", "ud", "hQh">;
+  def CMGEH    : SInst<"vcgez", "ud", "hQh">;
+  def CMGTH    : SInst<"vcgtz", "ud", "hQh">;
+  def CMLEH    : SInst<"vclez", "ud", "hQh">;
+  def CMLTH    : SInst<"vcltz", "ud", "hQh">;
+
+  // Vector conversion
+  def VCVT_F16     : SInst<"vcvt_f16", "Hd",  "sUsQsQUs">;
+  def VCVT_S16     : SInst<"vcvt_s16", "xd",  "hQh">;
+  def VCVT_U16     : SInst<"vcvt_u16", "ud",  "hQh">;
+  def VCVTA_S16    : SInst<"vcvta_s16", "xd", "hQh">;
+  def VCVTA_U16    : SInst<"vcvta_u16", "ud", "hQh">;
+  def VCVTM_S16    : SInst<"vcvtm_s16", "xd", "hQh">;
+  def VCVTM_U16    : SInst<"vcvtm_u16", "ud", "hQh">;
+  def VCVTN_S16    : SInst<"vcvtn_s16", "xd", "hQh">;
+  def VCVTN_U16    : SInst<"vcvtn_u16", "ud", "hQh">;
+  def VCVTP_S16    : SInst<"vcvtp_s16", "xd", "hQh">;
+  def VCVTP_U16    : SInst<"vcvtp_u16", "ud", "hQh">;
+
+  // Vector rounding
+  def FRINTZH      : SInst<"vrnd",  "dd", "hQh">;
+  def FRINTNH      : SInst<"vrndn", "dd", "hQh">;
+  def FRINTAH      : SInst<"vrnda", "dd", "hQh">;
+  def FRINTPH      : SInst<"vrndp", "dd", "hQh">;
+  def FRINTMH      : SInst<"vrndm", "dd", "hQh">;
+  def FRINTXH      : SInst<"vrndx", "dd", "hQh">;
+  def FRINTIH      : SInst<"vrndi", "dd", "hQh">;
+
+  // Misc.
+  def VABSH        : SInst<"vabs", "dd", "hQh">;
+  def VNEGH        : SOpInst<"vneg", "dd", "hQh", OP_NEG>;
+  def VRECPEH      : SInst<"vrecpe", "dd", "hQh">;
+  def FRSQRTEH     : SInst<"vrsqrte", "dd", "hQh">;
+  def FSQRTH       : SInst<"vsqrt", "dd", "hQh">;
+
+  // ARMv8.2-A FP16 two-operands vector intrinsics.
+
+  // Misc.
+  def VADDH        : SOpInst<"vadd", "ddd", "hQh", OP_ADD>;
+  def VABDH        : SInst<"vabd", "ddd",  "hQh">;
+  def VSUBH         : SOpInst<"vsub", "ddd", "hQh", OP_SUB>;
+
+  // Comparison
+  let InstName = "vacge" in {
+	  def VCAGEH     : SInst<"vcage", "udd", "hQh">;
+	  def VCALEH     : SInst<"vcale", "udd", "hQh">;
+  }
+  let InstName = "vacgt" in {
+    def VCAGTH     : SInst<"vcagt", "udd", "hQh">;
+	  def VCALTH     : SInst<"vcalt", "udd", "hQh">;
+  }
+  def VCEQH        : SOpInst<"vceq", "udd", "hQh", OP_EQ>;
+  def VCGEH        : SOpInst<"vcge", "udd", "hQh", OP_GE>;
+  def VCGTH        : SOpInst<"vcgt", "udd", "hQh", OP_GT>;
+  let InstName = "vcge" in
+    def VCLEH      : SOpInst<"vcle", "udd", "hQh", OP_LE>;
+  let InstName = "vcgt" in
+    def VCLTH      : SOpInst<"vclt", "udd", "hQh", OP_LT>;
+
+  // Vector conversion
+  let isVCVT_N = 1 in {
+    def VCVT_N_F16 : SInst<"vcvt_n_f16", "Hdi", "sUsQsQUs">;
+    def VCVT_N_S16 : SInst<"vcvt_n_s16", "xdi", "hQh">;
+    def VCVT_N_U16 : SInst<"vcvt_n_u16", "udi", "hQh">;
+  }
+
+  // Max/Min
+  def VMAXH         : SInst<"vmax", "ddd", "hQh">;
+  def VMINH         : SInst<"vmin", "ddd", "hQh">;
+  def FMAXNMH       : SInst<"vmaxnm", "ddd", "hQh">;
+  def FMINNMH       : SInst<"vminnm", "ddd", "hQh">;
+
+  // Multiplication/Division
+  def VMULH         : SOpInst<"vmul", "ddd", "hQh", OP_MUL>;
+  def MULXH         : SInst<"vmulx", "ddd", "hQh">;
+  def FDIVH         : IOpInst<"vdiv", "ddd",  "hQh", OP_DIV>;
+
+  // Pairwise addition
+  def VPADDH        : SInst<"vpadd", "ddd", "hQh">;
+
+  // Pairwise Max/Min
+  def VPMAXH        : SInst<"vpmax", "ddd", "hQh">;
+  def VPMINH        : SInst<"vpmin", "ddd", "hQh">;
+  // Pairwise MaxNum/MinNum
+  def FMAXNMPH      : SInst<"vpmaxnm", "ddd", "hQh">;
+  def FMINNMPH      : SInst<"vpminnm", "ddd", "hQh">;
+
+  // Reciprocal/Sqrt
+  def VRECPSH       : SInst<"vrecps", "ddd", "hQh">;
+  def VRSQRTSH      : SInst<"vrsqrts", "ddd", "hQh">;
+
+  // ARMv8.2-A FP16 three-operands vector intrinsics.
+
+  // Vector fused multiply-add operations
+  def VFMAH        : SInst<"vfma", "dddd", "hQh">;
+  def VFMSH        : SOpInst<"vfms", "dddd", "hQh", OP_FMLS>;
+
+  // ARMv8.2-A FP16 lane vector intrinsics.
+
+  // FMA lane
+  def VFMA_LANEH   : IInst<"vfma_lane", "dddgi", "hQh">;
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "dddji", "hQh">;
+
+  // FMA lane with scalar argument
+  def FMLA_NH      : SOpInst<"vfma_n", "ddds", "hQh", OP_FMLA_N>;
+  // Scalar floating point fused multiply-add (scalar, by element)
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "sssdi", "Sh">;
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "sssji", "Sh">;
+
+  // FMS lane
+  def VFMS_LANEH   : IOpInst<"vfms_lane", "dddgi", "hQh", OP_FMS_LN>;
+  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "dddji", "hQh", OP_FMS_LNQ>;
+  // FMS lane with scalar argument
+  def FMLS_NH      : SOpInst<"vfms_n", "ddds", "hQh", OP_FMLS_N>;
+  // Scalar floating foint fused multiply-subtract (scalar, by element)
+  def SCALAR_FMLS_LANEH  : IOpInst<"vfms_lane", "sssdi", "Sh", OP_FMS_LN>;
+  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "sssji", "Sh", OP_FMS_LNQ>;
+
+  // Mul lane
+  def VMUL_LANEH    : IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>;
+  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "ddji", "hQh", OP_MUL_LN>;
+  def VMUL_NH       : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>;
+  // Scalar floating point  multiply (scalar, by element)
+  def SCALAR_FMUL_LANEH  : IOpInst<"vmul_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>;
+  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>;
+
+  // Mulx lane
+  def VMULX_LANEH   : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>;
+  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>;
+  def VMULX_NH      : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>;
+  // TODO: Scalar floating point multiply extended (scalar, by element)
+  // Below ones are commented out because they need vmulx_f16(float16_t, float16_t)
+  // which will be implemented later with fp16 scalar intrinsic (arm_fp16.h)
+  //def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>;
+  //def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>;
+
+  // ARMv8.2-A FP16 reduction vector intrinsics.
+  def VMAXVH   : SInst<"vmaxv", "sd", "hQh">;
+  def VMINVH   : SInst<"vminv", "sd", "hQh">;
+  def FMAXNMVH : SInst<"vmaxnmv", "sd", "hQh">;
+  def FMINNMVH : SInst<"vminnmv", "sd", "hQh">;
+
+  // Data processing intrinsics - section 5
+
+  // Logical operations
+  let isHiddenLInst = 1 in
+  def VBSLH    : SInst<"vbsl", "dudd", "hQh">;
+
+  // Transposition operations
+  def VZIPH    : WInst<"vzip", "2dd", "hQh">;
+  def VUZPH    : WInst<"vuzp", "2dd", "hQh">;
+  def VTRNH    : WInst<"vtrn", "2dd", "hQh">;
+
+  // Set all lanes to same value.
+  /* Already implemented prior to ARMv8.2-A.
+  def VMOV_NH  : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>;
+  def VDUP_NH  : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>;
+  def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>;*/
+
+  // Vector Extract
+  def VEXTH      : WInst<"vext", "dddi", "hQh">;
+
+  // Reverse vector elements
+  def VREV64H    : WOpInst<"vrev64", "dd", "hQh", OP_REV64>;
+
+  // Permutation
+  def VTRN1H     : SOpInst<"vtrn1", "ddd", "hQh", OP_TRN1>;
+  def VZIP1H     : SOpInst<"vzip1", "ddd", "hQh", OP_ZIP1>;
+  def VUZP1H     : SOpInst<"vuzp1", "ddd", "hQh", OP_UZP1>;
+  def VTRN2H     : SOpInst<"vtrn2", "ddd", "hQh", OP_TRN2>;
+  def VZIP2H     : SOpInst<"vzip2", "ddd", "hQh", OP_ZIP2>;
+  def VUZP2H     : SOpInst<"vuzp2", "ddd", "hQh", OP_UZP2>;
+
+  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "sdi", "Sh">;
+  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">;
+}
diff --git a/include/clang/Basic/arm_neon_incl.td b/include/clang/Basic/arm_neon_incl.td
new file mode 100644
index 0000000..e6a6d25
--- /dev/null
+++ b/include/clang/Basic/arm_neon_incl.td
@@ -0,0 +1,313 @@
+//===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines data structures shared by arm_neon.td and arm_fp16.td.
+//  It constains base operation classes, operations, instructions, instruction
+//  modifiers, etc.  
+//
+//===----------------------------------------------------------------------===//
+//
+// Each intrinsic is a subclass of the Inst class. An intrinsic can either
+// generate a __builtin_* call or it can expand to a set of generic operations.
+//
+// The operations are subclasses of Operation providing a list of DAGs, the
+// last of which is the return value. The available DAG nodes are documented
+// below.
+//
+//===----------------------------------------------------------------------===//
+
+// The base Operation class. All operations must subclass this.
+class Operation<list<dag> ops=[]> {
+  list<dag> Ops = ops;
+  bit Unavailable = 0;
+}
+// An operation that only contains a single DAG.
+class Op<dag op> : Operation<[op]>;
+// A shorter version of Operation - takes a list of DAGs. The last of these will
+// be the return value.
+class LOp<list<dag> ops> : Operation<ops>;
+
+// These defs and classes are used internally to implement the SetTheory
+// expansion and should be ignored.
+foreach Index = 0-63 in
+  def sv##Index;
+class MaskExpand;
+
+//===----------------------------------------------------------------------===//
+// Available operations
+//===----------------------------------------------------------------------===//
+
+// DAG arguments can either be operations (documented below) or variables.
+// Variables are prefixed with '$'. There are variables for each input argument,
+// with the name $pN, where N starts at zero. So the zero'th argument will be
+// $p0, the first $p1 etc.
+
+// op - Binary or unary operator, depending on the number of arguments. The
+//      operator itself is just treated as a raw string and is not checked.
+// example: (op "+", $p0, $p1) -> "__p0 + __p1".
+//          (op "-", $p0)      -> "-__p0"
+def op;
+// call - Invoke another intrinsic. The input types are type checked and
+//        disambiguated. If there is no intrinsic defined that takes
+//        the given types (or if there is a type ambiguity) an error is
+//        generated at tblgen time. The name of the intrinsic is the raw
+//        name as given to the Inst class (not mangled).
+// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
+//            (assuming $p0 has type int16x8_t).
+def call;
+// cast - Perform a cast to a different type. This gets emitted as a static
+//        C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
+//        "bitcast".
+//
+//        The syntax is (cast MOD* VAL). The last argument is the value to
+//        cast, preceded by a sequence of type modifiers. The target type
+//        starts off as the type of VAL, and is modified by MOD in sequence.
+//        The available modifiers are:
+//          - $X  - Take the type of parameter/variable X. For example:
+//                  (cast $p0, $p1) would cast $p1 to the type of $p0.
+//          - "R" - The type of the return type.
+//          - A typedef string - A NEON or stdint.h type that is then parsed.
+//                               for example: (cast "uint32x4_t", $p0).
+//          - "U" - Make the type unsigned.
+//          - "S" - Make the type signed.
+//          - "H" - Halve the number of lanes in the type.
+//          - "D" - Double the number of lanes in the type.
+//          - "8" - Convert type to an equivalent vector of 8-bit signed
+//                  integers.
+// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
+//           value is of type "int32x4_t".
+//          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
+//           has type float64x1_t or any other vector type of 64 bits).
+//          (cast "int32_t", $p2) -> "(int32_t)__p2"
+def cast;
+// bitcast - Same as "cast", except a reinterpret-cast is produced:
+//             (bitcast "T", $p0) -> "*(T*)&__p0".
+//           The VAL argument is saved to a temporary so it can be used
+//           as an l-value.
+def bitcast;
+// dup - Take a scalar argument and create a vector by duplicating it into
+//       all lanes. The type of the vector is the base type of the intrinsic.
+// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
+//          is uint32x2_t).
+def dup;
+// splat - Take a vector and a lane index, and return a vector of the same type
+//         containing repeated instances of the source vector at the lane index.
+// example: (splat $p0, $p1) ->
+//            "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
+//          (assuming __p0 has four elements).
+def splat;
+// save_temp - Create a temporary (local) variable. The variable takes a name
+//             based on the zero'th parameter and can be referenced using
+//             using that name in subsequent DAGs in the same
+//             operation. The scope of a temp is the operation. If a variable
+//             with the given name already exists, an error will be given at
+//             tblgen time.
+// example: [(save_temp $var, (call "foo", $p0)),
+//           (op "+", $var, $p1)] ->
+//              "int32x2_t __var = foo(__p0); return __var + __p1;"
+def save_temp;
+// name_replace - Return the name of the current intrinsic with the first
+//                argument replaced by the second argument. Raises an error if
+//                the first argument does not exist in the intrinsic name.
+// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
+//            version of this intrinsic).
+def name_replace;
+// literal - Create a literal piece of code. The code is treated as a raw
+//           string, and must be given a type. The type is a stdint.h or
+//           NEON intrinsic type as given to (cast).
+// example: (literal "int32_t", "0")
+def literal;
+// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
+//           The MASK argument is a set of elements. The elements are generated
+//           from the two special defs "mask0" and "mask1". "mask0" expands to
+//           the lane indices in sequence for ARG0, and "mask1" expands to
+//           the lane indices in sequence for ARG1. They can be used as-is, e.g.
+//
+//             (shuffle $p0, $p1, mask0) -> $p0
+//             (shuffle $p0, $p1, mask1) -> $p1
+//
+//           or, more usefully, they can be manipulated using the SetTheory
+//           operators plus some extra operators defined in the NEON emitter.
+//           The operators are described below.
+// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
+//            A concatenation of the high halves of the input vectors.
+def shuffle;
+
+// add, interleave, decimate: These set operators are vanilla SetTheory
+// operators and take their normal definition.
+def add;
+def interleave;
+def decimate;
+// rotl - Rotate set left by a number of elements.
+// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
+def rotl;
+// rotl - Rotate set right by a number of elements.
+// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
+def rotr;
+// highhalf - Take only the high half of the input.
+// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
+def highhalf;
+// highhalf - Take only the low half of the input.
+// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
+def lowhalf;
+// rev - Perform a variable-width reversal of the elements. The zero'th argument
+//       is a width in bits to reverse. The lanes this maps to is determined
+//       based on the element width of the underlying type.
+// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
+// example: (rev 32, mask0) -> [1, 0, 3, 2]             (if 16-bit elements)
+def rev;
+// mask0 - The initial sequence of lanes for shuffle ARG0
+def mask0 : MaskExpand;
+// mask0 - The initial sequence of lanes for shuffle ARG1
+def mask1 : MaskExpand;
+
+def OP_NONE  : Operation;
+def OP_UNAVAILABLE : Operation {
+  let Unavailable = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions
+//===----------------------------------------------------------------------===//
+
+// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
+// a sequence of typespecs.
+//
+// The name is the base name of the intrinsic, for example "vget_lane". This is
+// then mangled by the tblgen backend to add type information ("vget_lane_s16").
+//
+// A typespec is a sequence of uppercase characters (modifiers) followed by one
+// lowercase character. A typespec encodes a particular "base type" of the
+// intrinsic.
+//
+// An example typespec is "Qs" - quad-size short - uint16x8_t. The available
+// typespec codes are given below.
+//
+// The string given to an Inst class is a sequence of typespecs. The intrinsic
+// is instantiated for every typespec in the sequence. For example "sdQsQd".
+//
+// The prototype is a string that defines the return type of the intrinsic
+// and the type of each argument. The return type and every argument gets a
+// "modifier" that can change in some way the "base type" of the intrinsic.
+//
+// The modifier 'd' means "default" and does not modify the base type in any
+// way. The available modifiers are given below.
+//
+// Typespecs
+// ---------
+// c: char
+// s: short
+// i: int
+// l: long
+// k: 128-bit long
+// f: float
+// h: half-float
+// d: double
+//
+// Typespec modifiers
+// ------------------
+// S: scalar, only used for function mangling.
+// U: unsigned
+// Q: 128b
+// H: 128b without mangling 'q'
+// P: polynomial
+//
+// Prototype modifiers
+// -------------------
+// prototype: return (arg, arg, ...)
+//
+// v: void
+// t: best-fit integer (int/poly args)
+// x: signed integer   (int/float args)
+// u: unsigned integer (int/float args)
+// f: float (int args)
+// F: double (int args)
+// H: half (int args)
+// d: default
+// g: default, ignore 'Q' size modifier.
+// j: default, force 'Q' size modifier.
+// w: double width elements, same num elts
+// n: double width elements, half num elts
+// h: half width elements, double num elts
+// q: half width elements, quad num elts
+// e: half width elements, double num elts, unsigned
+// m: half width elements, same num elts
+// i: constant int
+// l: constant uint64
+// s: scalar of element type
+// z: scalar of half width element type, signed
+// r: scalar of double width element type, signed
+// a: scalar of element type (splat to vector type)
+// b: scalar of unsigned integer/long type (int/float args)
+// $: scalar of signed integer/long type (int/float args)
+// y: scalar of float
+// o: scalar of double
+// k: default elt width, double num elts
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
+// p: pointer type
+// c: const pointer type
+
+// Every intrinsic subclasses Inst.
+class Inst <string n, string p, string t, Operation o> {
+  string Name = n;
+  string Prototype = p;
+  string Types = t;
+  string ArchGuard = "";
+
+  Operation Operation = o;
+  bit CartesianProductOfTypes = 0;
+  bit BigEndianSafe = 0;
+  bit isShift = 0;
+  bit isScalarShift = 0;
+  bit isScalarNarrowShift = 0;
+  bit isVCVT_N = 0;
+  // For immediate checks: the immediate will be assumed to specify the lane of
+  // a Q register. Only used for intrinsics which end up calling polymorphic
+  // builtins.
+  bit isLaneQ = 0;
+
+  // Certain intrinsics have different names than their representative
+  // instructions. This field allows us to handle this correctly when we
+  // are generating tests.
+  string InstName = "";
+
+  // Certain intrinsics even though they are not a WOpInst or LOpInst,
+  // generate a WOpInst/LOpInst instruction (see below for definition
+  // of a WOpInst/LOpInst). For testing purposes we need to know
+  // this. Ex: vset_lane which outputs vmov instructions.
+  bit isHiddenWInst = 0;
+  bit isHiddenLInst = 0;
+}
+
+// The following instruction classes are implemented via builtins.
+// These declarations are used to generate Builtins.def:
+//
+// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
+// IInst: Instruction with generic integer suffix (e.g., "i8")
+// WInst: Instruction with only bit size suffix (e.g., "8")
+class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+
+// The following instruction classes are implemented via operators
+// instead of builtins. As such these declarations are only used for
+// the purpose of generating tests.
+//
+// SOpInst:       Instruction with signed/unsigned suffix (e.g., "s8",
+//                "u8", "p8").
+// IOpInst:       Instruction with generic integer suffix (e.g., "i8").
+// WOpInst:       Instruction with bit size only suffix (e.g., "8").
+// LOpInst:       Logical instruction with no bit size suffix.
+// NoTestOpInst:  Intrinsic that has no corresponding instruction.
+class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
+class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
diff --git a/include/clang/CodeGen/BackendUtil.h b/include/clang/CodeGen/BackendUtil.h
index bb105cb..3d1221a 100644
--- a/include/clang/CodeGen/BackendUtil.h
+++ b/include/clang/CodeGen/BackendUtil.h
@@ -49,6 +49,8 @@
 
   llvm::Expected<llvm::BitcodeModule>
   FindThinLTOModule(llvm::MemoryBufferRef MBRef);
+  llvm::BitcodeModule *
+  FindThinLTOModule(llvm::MutableArrayRef<llvm::BitcodeModule> BMs);
 }
 
 #endif
diff --git a/include/clang/CodeGen/CGFunctionInfo.h b/include/clang/CodeGen/CGFunctionInfo.h
index d1f0512..42f963b 100644
--- a/include/clang/CodeGen/CGFunctionInfo.h
+++ b/include/clang/CodeGen/CGFunctionInfo.h
@@ -95,6 +95,7 @@
   bool SRetAfterThis : 1;   // isIndirect()
   bool InReg : 1;           // isDirect() || isExtend() || isIndirect()
   bool CanBeFlattened: 1;   // isDirect()
+  bool SignExt : 1;         // isExtend()
 
   bool canHavePaddingType() const {
     return isDirect() || isExtend() || isIndirect() || isExpand();
@@ -133,15 +134,38 @@
     AI.setInReg(true);
     return AI;
   }
-  static ABIArgInfo getExtend(llvm::Type *T = nullptr) {
+
+  static ABIArgInfo getSignExtend(QualType Ty, llvm::Type *T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
     auto AI = ABIArgInfo(Extend);
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setSignExt(true);
     return AI;
   }
-  static ABIArgInfo getExtendInReg(llvm::Type *T = nullptr) {
-    auto AI = getExtend(T);
+
+  static ABIArgInfo getZeroExtend(QualType Ty, llvm::Type *T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
+    auto AI = ABIArgInfo(Extend);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(nullptr);
+    AI.setDirectOffset(0);
+    AI.setSignExt(false);
+    return AI;
+  }
+
+  // ABIArgInfo will record the argument as being extended based on the sign
+  // of its type.
+  static ABIArgInfo getExtend(QualType Ty, llvm::Type *T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
+    if (Ty->hasSignedIntegerRepresentation())
+      return getSignExtend(Ty, T);
+    return getZeroExtend(Ty, T);
+  }
+
+  static ABIArgInfo getExtendInReg(QualType Ty, llvm::Type *T = nullptr) {
+    auto AI = getExtend(Ty, T);
     AI.setInReg(true);
     return AI;
   }
@@ -254,6 +278,15 @@
     DirectOffset = Offset;
   }
 
+  bool isSignExt() const {
+    assert(isExtend() && "Invalid kind!");
+    return SignExt;
+  }
+  void setSignExt(bool SExt) {
+    assert(isExtend() && "Invalid kind!");
+    SignExt = SExt;
+  }
+
   llvm::Type *getPaddingType() const {
     return (canHavePaddingType() ? PaddingType : nullptr);
   }
diff --git a/include/clang/CodeGen/ConstantInitBuilder.h b/include/clang/CodeGen/ConstantInitBuilder.h
index 113d86d..d1388f0 100644
--- a/include/clang/CodeGen/ConstantInitBuilder.h
+++ b/include/clang/CodeGen/ConstantInitBuilder.h
@@ -295,7 +295,7 @@
     slot = value;
   }
 
-  /// Produce an address which will eventually point to the the next
+  /// Produce an address which will eventually point to the next
   /// position to be filled.  This is computed with an indexed
   /// getelementptr rather than by computing offsets.
   ///
diff --git a/include/clang/CodeGen/SwiftCallingConv.h b/include/clang/CodeGen/SwiftCallingConv.h
index 23db43e..8ea2b9d 100644
--- a/include/clang/CodeGen/SwiftCallingConv.h
+++ b/include/clang/CodeGen/SwiftCallingConv.h
@@ -116,6 +116,12 @@
   void splitVectorEntry(unsigned index);
 };
 
+/// Should an aggregate which expands to the given type sequence
+/// be passed/returned indirectly under swiftcall?
+bool shouldPassIndirectly(CodeGenModule &CGM,
+                          ArrayRef<llvm::Type*> types,
+                          bool asReturnValue);
+
 /// Return the maximum voluntary integer size for the current target.
 CharUnits getMaximumVoluntaryIntegerSize(CodeGenModule &CGM);
 
diff --git a/include/clang/Config/config.h.cmake b/include/clang/Config/config.h.cmake
index 10074aa..5f42019 100644
--- a/include/clang/Config/config.h.cmake
+++ b/include/clang/Config/config.h.cmake
@@ -17,6 +17,9 @@
 /* Default runtime library to use. */
 #define CLANG_DEFAULT_RTLIB "${CLANG_DEFAULT_RTLIB}"
 
+/* Default objcopy to use */
+#define CLANG_DEFAULT_OBJCOPY "${CLANG_DEFAULT_OBJCOPY}"
+
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
@@ -32,6 +35,10 @@
 /* Directories clang will search for headers */
 #define C_INCLUDE_DIRS "${C_INCLUDE_DIRS}"
 
+/* Directories clang will search for configuration files */
+#cmakedefine CLANG_CONFIG_FILE_SYSTEM_DIR "${CLANG_CONFIG_FILE_SYSTEM_DIR}"
+#cmakedefine CLANG_CONFIG_FILE_USER_DIR "${CLANG_CONFIG_FILE_USER_DIR}"
+
 /* Default <path> to all compiler invocations for --sysroot=<path>. */
 #define DEFAULT_SYSROOT "${DEFAULT_SYSROOT}"
 
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index bca51b5..97c996d 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -244,6 +244,8 @@
   HelpText<"Turn off Type Based Alias Analysis">;
 def no_struct_path_tbaa : Flag<["-"], "no-struct-path-tbaa">,
   HelpText<"Turn off struct-path aware Type Based Alias Analysis">;
+def new_struct_path_tbaa : Flag<["-"], "new-struct-path-tbaa">,
+  HelpText<"Enable enhanced struct-path aware Type Based Alias Analysis">;
 def masm_verbose : Flag<["-"], "masm-verbose">,
   HelpText<"Generate verbose assembly output">;
 def mcode_model : Separate<["-"], "mcode-model">,
@@ -261,6 +263,8 @@
 def menable_unsafe_fp_math : Flag<["-"], "menable-unsafe-fp-math">,
   HelpText<"Allow unsafe floating-point math optimizations which may decrease "
            "precision">;
+def mreassociate : Flag<["-"], "mreassociate">,
+  HelpText<"Allow reassociation transformations for floating-point instructions">;
 def mfloat_abi : Separate<["-"], "mfloat-abi">,
   HelpText<"The float ABI to use">;
 def mtp : Separate<["-"], "mtp">,
@@ -398,8 +402,12 @@
   HelpText<"Set the maximum number of source lines to show in a caret diagnostic">;
 def fmessage_length : Separate<["-"], "fmessage-length">, MetaVarName<"<N>">,
   HelpText<"Format message diagnostics so that they fit within N columns or fewer, when possible.">;
+def verify_EQ : CommaJoined<["-"], "verify=">,
+  MetaVarName<"<prefixes>">,
+  HelpText<"Verify diagnostic output using comment directives that start with"
+           " prefixes in the comma-separated sequence <prefixes>">;
 def verify : Flag<["-"], "verify">,
-  HelpText<"Verify diagnostic output using comment directives">;
+  HelpText<"Equivalent to -verify=expected">;
 def verify_ignore_unexpected : Flag<["-"], "verify-ignore-unexpected">,
   HelpText<"Ignore unexpected diagnostic messages">;
 def verify_ignore_unexpected_EQ : CommaJoined<["-"], "verify-ignore-unexpected=">,
@@ -435,6 +443,8 @@
   HelpText<"Include code patterns in code-completion results">;
 def no_code_completion_globals : Flag<["-"], "no-code-completion-globals">,
   HelpText<"Do not include global declarations in code-completion results.">;
+def no_code_completion_ns_level_decls : Flag<["-"], "no-code-completion-ns-level-decls">,
+  HelpText<"Do not include declarations inside namespaces (incl. global namespace) in the code-completion results.">;
 def code_completion_brief_comments : Flag<["-"], "code-completion-brief-comments">,
   HelpText<"Include brief documentation comments in code-completion results.">;
 def disable_free : Flag<["-"], "disable-free">,
@@ -523,6 +533,8 @@
   HelpText<"Build ASTs and then debug dump them">;
 def ast_dump_all : Flag<["-"], "ast-dump-all">,
   HelpText<"Build ASTs and then debug dump them, forcing deserialization">;
+def templight_dump : Flag<["-"], "templight-dump">,
+  HelpText<"Dump templight information to stdout">;
 def ast_dump_lookups : Flag<["-"], "ast-dump-lookups">,
   HelpText<"Build ASTs and then debug dump their name lookup tables">;
 def ast_view : Flag<["-"], "ast-view">,
@@ -695,8 +707,6 @@
   HelpText<"Allow Objective-C array and dictionary subscripting in legacy runtime">;
 def vtordisp_mode_EQ : Joined<["-"], "vtordisp-mode=">,
   HelpText<"Control vtordisp placement on win32 targets">;
-def fno_rtti_data : Flag<["-"], "fno-rtti-data">,
-  HelpText<"Control emission of RTTI data">;
 def fnative_half_type: Flag<["-"], "fnative-half-type">,
   HelpText<"Use the native half type for __fp16 instead of promoting to float">;
 def fnative_half_arguments_and_returns : Flag<["-"], "fnative-half-arguments-and-returns">,
@@ -704,7 +714,7 @@
 def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
   HelpText<"Allow function arguments and returns of type half">;
 def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
-  HelpText<"Set default MS calling convention">, Values<"cdecl,fastcall,stdcall,vectorcall">;
+  HelpText<"Set default calling convention">, Values<"cdecl,fastcall,stdcall,vectorcall,regcall">;
 def finclude_default_header : Flag<["-"], "finclude-default-header">,
   HelpText<"Include the default header file for OpenCL">;
 def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
diff --git a/include/clang/Driver/CLCompatOptions.td b/include/clang/Driver/CLCompatOptions.td
index aebb36e..fb9eb59 100644
--- a/include/clang/Driver/CLCompatOptions.td
+++ b/include/clang/Driver/CLCompatOptions.td
@@ -61,6 +61,8 @@
 def _SLASH_C : CLFlag<"C">,
   HelpText<"Don't discard comments when preprocessing">, Alias<C>;
 def _SLASH_c : CLFlag<"c">, HelpText<"Compile only">, Alias<c>;
+def _SLASH_d1PP : CLFlag<"d1PP">,
+  HelpText<"Retain macro definitions in /E mode">, Alias<dD>;
 def _SLASH_d1reportAllClassLayout : CLFlag<"d1reportAllClassLayout">,
   HelpText<"Dump record layout information">, Alias<fdump_record_layouts>;
 def _SLASH_diagnostics_caret : CLFlag<"diagnostics:caret">,
@@ -145,7 +147,8 @@
 def _SLASH_W2 : CLFlag<"W2">, HelpText<"Enable -Wall">, Alias<Wall>;
 def _SLASH_W3 : CLFlag<"W3">, HelpText<"Enable -Wall">, Alias<Wall>;
 def _SLASH_W4 : CLFlag<"W4">, HelpText<"Enable -Wall and -Wextra">, Alias<WCL4>;
-def _SLASH_Wall : CLFlag<"Wall">, HelpText<"Enable -Wall and -Wextra">, Alias<WCL4>;
+def _SLASH_Wall : CLFlag<"Wall">, HelpText<"Enable -Weverything">,
+  Alias<W_Joined>, AliasArgs<["everything"]>;
 def _SLASH_WX : CLFlag<"WX">, HelpText<"Treat warnings as errors">,
   Alias<W_Joined>, AliasArgs<["error"]>;
 def _SLASH_WX_ : CLFlag<"WX-">, HelpText<"Do not treat warnings as errors">,
@@ -153,6 +156,8 @@
 def _SLASH_w_flag : CLFlag<"w">, HelpText<"Disable all warnings">, Alias<w>;
 def _SLASH_wd4005 : CLFlag<"wd4005">, Alias<W_Joined>,
   AliasArgs<["no-macro-redefined"]>;
+def _SLASH_wd4018 : CLFlag<"wd4018">, Alias<W_Joined>,
+  AliasArgs<["no-sign-compare"]>;
 def _SLASH_wd4100 : CLFlag<"wd4100">, Alias<W_Joined>,
   AliasArgs<["no-unused-parameter"]>;
 def _SLASH_wd4910 : CLFlag<"wd4910">, Alias<W_Joined>,
@@ -161,6 +166,9 @@
   AliasArgs<["no-deprecated-declarations"]>;
 def _SLASH_vd : CLJoined<"vd">, HelpText<"Control vtordisp placement">,
   Alias<vtordisp_mode_EQ>;
+def _SLASH_X : CLFlag<"X">,
+  HelpText<"Don't add %INCLUDE% to the include search path">,
+  Alias<nostdlibinc>;
 def _SLASH_Zc_sizedDealloc : CLFlag<"Zc:sizedDealloc">,
   HelpText<"Enable C++14 sized global deallocation functions">,
   Alias<fsized_deallocation>;
@@ -232,6 +240,8 @@
 def _SLASH_Fo : CLCompileJoined<"Fo">,
   HelpText<"Set output object file, or directory (ends in / or \\) (with /c)">,
   MetaVarName<"<file or directory>">;
+def _SLASH_Guard : CLJoined<"guard:">,
+  HelpText<"Enable Control Flow Guard with /guard:cf">;
 def _SLASH_GX : CLFlag<"GX">,
   HelpText<"Enable exception handling">;
 def _SLASH_GX_ : CLFlag<"GX-">,
@@ -302,6 +312,8 @@
   HelpText<"Set __stdcall as a default calling convention">;
 def _SLASH_Gv : CLFlag<"Gv">,
   HelpText<"Set __vectorcall as a default calling convention">;
+def _SLASH_Gregcall : CLFlag<"Gregcall">,
+  HelpText<"Set __regcall as a default calling convention">;
 
 // Ignored:
 
@@ -319,6 +331,7 @@
 def _SLASH_nologo : CLIgnoredFlag<"nologo">;
 def _SLASH_Og : CLIgnoredFlag<"Og">;
 def _SLASH_openmp_ : CLIgnoredFlag<"openmp-">;
+def _SLASH_permissive_ : CLIgnoredFlag<"permissive-">;
 def _SLASH_RTC : CLIgnoredJoined<"RTC">;
 def _SLASH_sdl : CLIgnoredFlag<"sdl">;
 def _SLASH_sdl_ : CLIgnoredFlag<"sdl-">;
@@ -339,6 +352,8 @@
 // Unsupported:
 
 def _SLASH_AI : CLJoined<"AI">;
+def _SLASH_Bt : CLFlag<"Bt">;
+def _SLASH_Bt_plus : CLFlag<"Bt+">;
 def _SLASH_clr : CLJoined<"clr">;
 def _SLASH_doc : CLJoined<"doc">;
 def _SLASH_FA_joined : CLJoined<"FA">;
@@ -359,7 +374,6 @@
 def _SLASH_Gm : CLFlag<"Gm">;
 def _SLASH_Gm_ : CLFlag<"Gm-">;
 def _SLASH_GT : CLFlag<"GT">;
-def _SLASH_Guard : CLJoined<"guard:">;
 def _SLASH_GZ : CLFlag<"GZ">;
 def _SLASH_H : CLFlag<"H">;
 def _SLASH_homeparams : CLFlag<"homeparams">;
@@ -377,7 +391,6 @@
 def _SLASH_V : CLFlag<"V">;
 def _SLASH_WL : CLFlag<"WL">;
 def _SLASH_Wp64 : CLFlag<"Wp64">;
-def _SLASH_X : CLFlag<"X">;
 def _SLASH_Yd : CLFlag<"Yd">;
 def _SLASH_Yl : CLJoined<"Yl">;
 def _SLASH_Za : CLFlag<"Za">;
diff --git a/include/clang/Driver/Distro.h b/include/clang/Driver/Distro.h
index fab4986..4ab4e2a 100644
--- a/include/clang/Driver/Distro.h
+++ b/include/clang/Driver/Distro.h
@@ -26,12 +26,14 @@
     // NB: Releases of a particular Linux distro should be kept together
     // in this enum, because some tests are done by integer comparison against
     // the first and last known member in the family, e.g. IsRedHat().
+    AlpineLinux,
     ArchLinux,
     DebianLenny,
     DebianSqueeze,
     DebianWheezy,
     DebianJessie,
     DebianStretch,
+    DebianBuster,
     Exherbo,
     RHEL5,
     RHEL6,
@@ -58,6 +60,7 @@
     UbuntuYakkety,
     UbuntuZesty,
     UbuntuArtful,
+    UbuntuBionic,
     UnknownDistro
   };
 
@@ -107,11 +110,15 @@
   }
 
   bool IsDebian() const {
-    return DistroVal >= DebianLenny && DistroVal <= DebianStretch;
+    return DistroVal >= DebianLenny && DistroVal <= DebianBuster;
   }
 
   bool IsUbuntu() const {
-    return DistroVal >= UbuntuHardy && DistroVal <= UbuntuArtful;
+    return DistroVal >= UbuntuHardy && DistroVal <= UbuntuBionic;
+  }
+
+  bool IsAlpineLinux() const {
+    return DistroVal == AlpineLinux;
   }
 
   /// @}
diff --git a/include/clang/Driver/Driver.h b/include/clang/Driver/Driver.h
index a366287..0e55dab 100644
--- a/include/clang/Driver/Driver.h
+++ b/include/clang/Driver/Driver.h
@@ -19,6 +19,8 @@
 #include "clang/Driver/Util.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/StringSaver.h"
 
 #include <list>
 #include <map>
@@ -26,14 +28,6 @@
 
 namespace llvm {
 class Triple;
-
-namespace opt {
-  class Arg;
-  class ArgList;
-  class DerivedArgList;
-  class InputArgList;
-  class OptTable;
-}
 }
 
 namespace clang {
@@ -138,6 +132,12 @@
   /// The path to the compiler resource directory.
   std::string ResourceDir;
 
+  /// System directory for config files.
+  std::string SystemConfigDir;
+
+  /// User directory for config files.
+  std::string UserConfigDir;
+
   /// A prefix directory used to emulate a limited subset of GCC's '-Bprefix'
   /// functionality.
   /// FIXME: This type of customization should be removed in favor of the
@@ -208,6 +208,21 @@
   /// Name to use when invoking gcc/g++.
   std::string CCCGenericGCCName;
 
+  /// Name of configuration file if used.
+  std::string ConfigFile;
+
+  /// Allocator for string saver.
+  llvm::BumpPtrAllocator Alloc;
+
+  /// Object that stores strings read from configuration file.
+  llvm::StringSaver Saver;
+
+  /// Arguments originated from configuration file.
+  std::unique_ptr<llvm::opt::InputArgList> CfgOptions;
+
+  /// Arguments originated from command line.
+  std::unique_ptr<llvm::opt::InputArgList> CLOptions;
+
   /// Whether to check that input files exist when constructing compilation
   /// jobs.
   unsigned CheckInputsExist : 1;
@@ -277,6 +292,8 @@
   /// Name to use when invoking gcc/g++.
   const std::string &getCCCGenericGCCName() const { return CCCGenericGCCName; }
 
+  const std::string &getConfigFile() const { return ConfigFile; }
+
   const llvm::opt::OptTable &getOpts() const { return *Opts; }
 
   const DiagnosticsEngine &getDiags() const { return Diags; }
@@ -425,9 +442,9 @@
   // FIXME: This should be in CompilationInfo.
   std::string GetProgramPath(StringRef Name, const ToolChain &TC) const;
 
-  /// handleAutocompletions - Handle --autocomplete by searching and printing
+  /// HandleAutocompletions - Handle --autocomplete by searching and printing
   /// possible flags, descriptions, and its arguments.
-  void handleAutocompletions(StringRef PassedFlags) const;
+  void HandleAutocompletions(StringRef PassedFlags) const;
 
   /// HandleImmediateArgs - Handle any arguments which should be
   /// treated before building actions or binding tools.
@@ -493,6 +510,18 @@
   LTOKind getLTOMode() const { return LTOMode; }
 
 private:
+
+  /// Tries to load options from configuration file.
+  ///
+  /// \returns true if error occurred.
+  bool loadConfigFile();
+
+  /// Read options from the specified file.
+  ///
+  /// \param [in] FileName File to read.
+  /// \returns true, if error occurred while reading.
+  bool readConfigFile(StringRef FileName);
+
   /// Set the driver mode (cl, gcc, etc) from an option string of the form
   /// --driver-mode=<mode>.
   void setDriverModeFromOption(StringRef Opt);
@@ -543,6 +572,8 @@
   /// no extra characters remaining at the end.
   static bool GetReleaseVersion(StringRef Str,
                                 MutableArrayRef<unsigned> Digits);
+  /// Compute the default -fmodule-cache-path.
+  static void getDefaultModuleCachePath(SmallVectorImpl<char> &Result);
 };
 
 /// \return True if the last defined optimization level is -Ofast.
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 526b72f..6925157 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -466,7 +466,8 @@
 def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
   HelpText<"Pass <arg> to the target offloading toolchain.">, MetaVarName<"<arg>">;
 def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
-  HelpText<"Pass <arg> to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.">, MetaVarName<"<arg>">;
+  HelpText<"Pass <arg> to the target offloading toolchain identified by <triple>.">,
+  MetaVarName<"<triple> <arg>">;
 def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
   HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
   Group<Link_Group>;
@@ -492,6 +493,8 @@
 def bundle__loader : Separate<["-"], "bundle_loader">;
 def bundle : Flag<["-"], "bundle">;
 def b : JoinedOrSeparate<["-"], "b">, Flags<[Unsupported]>;
+def cfguard : Flag<["-"], "cfguard">, Flags<[CC1Option]>,
+  HelpText<"Emit tables required for Windows Control Flow Guard.">;
 def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>, Flags<[CC1Option]>,
   HelpText<"OpenCL only. This option disables all optimizations. By default optimizations are enabled.">;
 def cl_strict_aliasing : Flag<["-"], "cl-strict-aliasing">, Group<opencl_Group>, Flags<[CC1Option]>,
@@ -516,10 +519,18 @@
   HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
 def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
   HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">;
+def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
+  HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">;
 def client__name : JoinedOrSeparate<["-"], "client_name">;
 def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>;
 def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
-def coverage : Flag<["-", "--"], "coverage">;
+def config : Separate<["--"], "config">, Flags<[DriverOption]>,
+  HelpText<"Specifies configuration file">;
+def config_system_dir_EQ : Joined<["--"], "config-system-dir=">, Flags<[DriverOption, HelpHidden]>,
+  HelpText<"System directory for configuration files">;
+def config_user_dir_EQ : Joined<["--"], "config-user-dir=">, Flags<[DriverOption, HelpHidden]>,
+  HelpText<"User directory for configuration files">;
+def coverage : Flag<["-", "--"], "coverage">, Flags<[CoreOption]>;
 def cpp_precomp : Flag<["-"], "cpp-precomp">, Group<clang_ignored_f_Group>;
 def current__version : JoinedOrSeparate<["-"], "current_version">;
 def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
@@ -548,6 +559,8 @@
 def no_cuda_noopt_device_debug : Flag<["--"], "no-cuda-noopt-device-debug">;
 def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
   HelpText<"CUDA installation path">;
+def cuda_path_ignore_env : Flag<["--"], "cuda-path-ignore-env">, Group<i_Group>,
+  HelpText<"Ignore environment variables to detect CUDA installation">;
 def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>,
   HelpText<"Path to ptxas (used for compiling CUDA code)">;
 def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
@@ -556,6 +569,9 @@
 def fcuda_approx_transcendentals : Flag<["-"], "fcuda-approx-transcendentals">,
   Flags<[CC1Option]>, HelpText<"Use approximate transcendental functions">;
 def fno_cuda_approx_transcendentals : Flag<["-"], "fno-cuda-approx-transcendentals">;
+def fcuda_rdc : Flag<["-"], "fcuda-rdc">, Flags<[CC1Option, HelpHidden]>,
+  HelpText<"Generate relocatable device code, also known as separate compilation mode.">;
+def fno_cuda_rdc : Flag<["-"], "fno-cuda-rdc">;
 def dA : Flag<["-"], "dA">, Group<d_Group>;
 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
   HelpText<"Print macro definitions in -E mode in addition to normal output">;
@@ -613,8 +629,8 @@
 def fdouble_square_bracket_attributes : Flag<[ "-" ], "fdouble-square-bracket-attributes">,
   Group<f_Group>, Flags<[DriverOption, CC1Option]>,
   HelpText<"Enable '[[]]' attributes in all C and C++ language modes">;
-def fno_double_square_bracket_attributes : Flag<[ "-" ], "fno-fdouble-square-bracket-attributes">,
-  Group<f_Group>, Flags<[DriverOption]>,
+def fno_double_square_bracket_attributes : Flag<[ "-" ], "fno-double-square-bracket-attributes">,
+  Group<f_Group>, Flags<[DriverOption, CC1Option]>,
   HelpText<"Disable '[[]]' attributes in all C and C++ language modes">;
 
 def fautolink : Flag <["-"], "fautolink">, Group<f_Group>;
@@ -780,6 +796,10 @@
     HelpText<"Print a template comparison tree for differing templates">;
 def fdeclspec : Flag<["-"], "fdeclspec">, Group<f_clang_Group>,
   HelpText<"Allow __declspec as a keyword">, Flags<[CC1Option]>;
+def fdiscard_value_names : Flag<["-"], "fdiscard-value-names">, Group<f_clang_Group>,
+  HelpText<"Discard value names in LLVM IR">, Flags<[DriverOption]>;
+def fno_discard_value_names : Flag<["-"], "fno-discard-value-names">, Group<f_clang_Group>,
+  HelpText<"Do not discard value names in LLVM IR">, Flags<[DriverOption]>;
 def fdollars_in_identifiers : Flag<["-"], "fdollars-in-identifiers">, Group<f_Group>,
   HelpText<"Allow '$' in identifiers">, Flags<[CC1Option]>;
 def fdwarf2_cfi_asm : Flag<["-"], "fdwarf2-cfi-asm">, Group<clang_ignored_f_Group>;
@@ -795,13 +815,17 @@
   HelpText<"Emit all declarations, even if unused">;
 def femulated_tls : Flag<["-"], "femulated-tls">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Use emutls functions to access thread_local variables">;
-def fno_emulated_tls : Flag<["-"], "fno-emulated-tls">, Group<f_Group>;
+def fno_emulated_tls : Flag<["-"], "fno-emulated-tls">, Group<f_Group>, Flags<[CC1Option]>;
 def fencoding_EQ : Joined<["-"], "fencoding=">, Group<f_Group>;
 def ferror_limit_EQ : Joined<["-"], "ferror-limit=">, Group<f_Group>, Flags<[CoreOption]>;
 def fexceptions : Flag<["-"], "fexceptions">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Enable support for exception handling">;
+def fdwarf_exceptions : Flag<["-"], "fdwarf-exceptions">, Group<f_Group>,
+  Flags<[CC1Option]>, HelpText<"Use DWARF style exceptions">;
 def fsjlj_exceptions : Flag<["-"], "fsjlj-exceptions">, Group<f_Group>,
   Flags<[CC1Option]>, HelpText<"Use SjLj style exceptions">;
+def fseh_exceptions : Flag<["-"], "fseh-exceptions">, Group<f_Group>,
+  Flags<[CC1Option]>, HelpText<"Use SEH style exceptions">;
 def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">,
     Group<clang_ignored_gcc_optimization_f_Group>;
 def : Flag<["-"], "fexpensive-optimizations">, Group<clang_ignored_gcc_optimization_f_Group>;
@@ -825,6 +849,12 @@
 def fjump_tables : Flag<["-"], "fjump-tables">, Group<f_Group>;
 def fno_jump_tables : Flag<["-"], "fno-jump-tables">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Do not use jump tables for lowering switches">;
+def fforce_enable_int128 : Flag<["-"], "fforce-enable-int128">,
+  Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Enable support for int128_t type">;
+def fno_force_enable_int128 : Flag<["-"], "fno-force-enable-int128">,
+  Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Disable support for int128_t type">;
 
 // Begin sanitizer flags. These should all be core options exposed in all driver
 // modes.
@@ -877,6 +907,14 @@
                                            Group<f_clang_Group>,
                                            Flags<[CoreOption, DriverOption]>,
                                            HelpText<"Disable use-after-scope detection in AddressSanitizer">;
+def fsanitize_address_poison_class_member_array_new_cookie
+    : Flag<[ "-" ], "fsanitize-address-poison-class-member-array-new-cookie">,
+      Group<f_clang_Group>,
+      HelpText<"Enable poisoning array cookies when using class member operator new[] in AddressSanitizer">;
+def fno_sanitize_address_poison_class_member_array_new_cookie
+    : Flag<[ "-" ], "fno-sanitize-address-poison-class-member-array-new-cookie">,
+      Group<f_clang_Group>,
+      HelpText<"Disable poisoning array cookies when using class member operator new[] in AddressSanitizer">;
 def fsanitize_address_globals_dead_stripping : Flag<["-"], "fsanitize-address-globals-dead-stripping">,
                                         Group<f_clang_Group>,
                                         HelpText<"Enable linker dead stripping of globals in AddressSanitizer">;
@@ -914,6 +952,9 @@
                                  Flags<[CoreOption, DriverOption]>,
                                  Group<f_clang_Group>,
                                  HelpText<"Disable control flow integrity (CFI) checks for cross-DSO calls.">;
+def fsanitize_cfi_icall_generalize_pointers : Flag<["-"], "fsanitize-cfi-icall-generalize-pointers">,
+                                              Group<f_clang_Group>,
+                                              HelpText<"Generalize pointers in CFI indirect call type signature checks">;
 def fsanitize_stats : Flag<["-"], "fsanitize-stats">,
                               Group<f_clang_Group>,
                               HelpText<"Enable sanitizer statistics gathering.">;
@@ -1018,6 +1059,8 @@
 def finline_hint_functions: Flag<["-"], "finline-hint-functions">, Group<f_clang_Group>, Flags<[CC1Option]>,
   HelpText<"Inline functions which are (explicitly or implicitly) marked inline">;
 def finline : Flag<["-"], "finline">, Group<clang_ignored_f_Group>;
+def fexperimental_isel : Flag<["-"], "fexperimental-isel">, Group<f_clang_Group>,
+  HelpText<"Enables the experimental global instruction selector">;
 def fexperimental_new_pass_manager : Flag<["-"], "fexperimental-new-pass-manager">,
   Group<f_clang_Group>, Flags<[CC1Option]>,
   HelpText<"Enables an experimental new pass manager in LLVM.">;
@@ -1025,6 +1068,15 @@
 def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>;
 def finstrument_functions : Flag<["-"], "finstrument-functions">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Generate calls to instrument function entry and exit">;
+def finstrument_functions_after_inlining : Flag<["-"], "finstrument-functions-after-inlining">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Like -finstrument-functions, but insert the calls after inlining">;
+def finstrument_function_entry_bare : Flag<["-"], "finstrument-function-entry-bare">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Instrument function entry only, after inlining, without arguments to the instrumentation call">;
+def fcf_protection_EQ : Joined<["-"], "fcf-protection=">, Flags<[CoreOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Instrument control-flow architecture protection. Options: return, branch, full, none.">, Values<"return,branch,full,none">;
+def fcf_protection : Flag<["-"], "fcf-protection">, Group<f_Group>, Flags<[CoreOption, CC1Option]>,
+  Alias<fcf_protection_EQ>, AliasArgs<["full"]>,
+  HelpText<"Enable cf-protection in 'full' mode">;
 
 def fxray_instrument : Flag<["-"], "fxray-instrument">, Group<f_Group>,
   Flags<[CC1Option]>,
@@ -1049,6 +1101,12 @@
   Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Filename defining the whitelist for imbuing the 'never instrument' XRay attribute.">;
 
+def fxray_always_emit_customevents : Flag<["-"], "fxray-always-emit-customevents">, Group<f_Group>,
+  Flags<[CC1Option]>,
+  HelpText<"Determine whether to always emit __xray_customevent(...) calls even if the function it appears in is not always instrumented.">;
+def fnoxray_always_emit_customevents : Flag<["-"], "fno-xray-always-emit-customevents">, Group<f_Group>,
+  Flags<[CC1Option]>;
+
 def ffine_grained_bitfield_accesses : Flag<["-"],
   "ffine-grained-bitfield-accesses">, Group<f_clang_Group>, Flags<[CC1Option]>,
   HelpText<"Use separate accesses for bitfields with legal widths and alignments.">;
@@ -1214,6 +1272,8 @@
 def fno_gnu_keywords : Flag<["-"], "fno-gnu-keywords">, Group<f_Group>, Flags<[CC1Option]>;
 def fno_inline_functions : Flag<["-"], "fno-inline-functions">, Group<f_clang_Group>, Flags<[CC1Option]>;
 def fno_inline : Flag<["-"], "fno-inline">, Group<f_clang_Group>, Flags<[CC1Option]>;
+def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_Group>,
+  HelpText<"Disables the experimental global instruction selector">;
 def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-manager">,
   Group<f_clang_Group>, Flags<[CC1Option]>,
   HelpText<"Disables an experimental new pass manager in LLVM.">;
@@ -1255,6 +1315,8 @@
 def fno_pascal_strings : Flag<["-"], "fno-pascal-strings">, Group<f_Group>;
 def fno_rtti : Flag<["-"], "fno-rtti">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Disable generation of rtti information">;
+def fno_rtti_data : Flag<["-"], "fno-rtti-data">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Control emission of RTTI data">;
 def fno_short_enums : Flag<["-"], "fno-short-enums">, Group<f_Group>;
 def fno_show_column : Flag<["-"], "fno-show-column">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Do not include column number on diagnostics">;
@@ -1344,22 +1406,32 @@
 
 def fobjc_sender_dependent_dispatch : Flag<["-"], "fobjc-sender-dependent-dispatch">, Group<f_Group>;
 def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>;
-def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
+  HelpText<"Parse OpenMP pragmas and generate parallel code.">;
 def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
 def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group<f_Group>;
-def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>, Flags<[NoArgumentUnused]>;
-def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>,
+  Flags<[NoArgumentUnused, HelpHidden]>;
+def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group<f_Group>,
+  Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>,
   HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">;
-def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, Group<f_Group>, 
-  Flags<[NoArgumentUnused]>;
-def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
-  HelpText<"OpenMP target code is compiled as relocatable using the -c flag. For OpenMP targets the code is relocatable by default.">;
-def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
-  HelpText<"Do not compile OpenMP target code as relocatable.">;
+def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
+def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">,
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">,
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
+  HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
+def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
 def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
+def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group<f_Group>, Flags<[CC1Option]>;
+def fescaping_block_tail_calls : Flag<["-"], "fescaping-block-tail-calls">, Group<f_Group>;
 def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">;
 def force__flat__namespace : Flag<["-"], "force_flat_namespace">;
 def force__load : Separate<["-"], "force_load">;
@@ -1381,6 +1453,10 @@
 def fno_pic : Flag<["-"], "fno-pic">, Group<f_Group>;
 def fpie : Flag<["-"], "fpie">, Group<f_Group>;
 def fno_pie : Flag<["-"], "fno-pie">, Group<f_Group>;
+def fplt : Flag<["-"], "fplt">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Use the PLT to make function calls">;
+def fno_plt : Flag<["-"], "fno-plt">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Do not use the PLT to make function calls">;
 def fropi : Flag<["-"], "fropi">, Group<f_Group>;
 def fno_ropi : Flag<["-"], "fno-ropi">, Group<f_Group>;
 def frwpi : Flag<["-"], "frwpi">, Group<f_Group>;
@@ -1543,6 +1619,10 @@
  Flags<[CC1Option]>, HelpText<"Place each data in its own section (ELF Only)">;
 def fno_data_sections : Flag <["-"], "fno-data-sections">, Group<f_Group>,
   Flags<[CC1Option]>;
+def fstack_size_section : Flag<["-"], "fstack-size-section">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Emit section containing metadata on function stack sizes">;
+def fno_stack_size_section : Flag<["-"], "fno-stack-size-section">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Don't emit section containing metadata on function stack sizes">;
 
 def funique_section_names : Flag <["-"], "funique-section-names">,
   Group<f_Group>, Flags<[CC1Option]>,
@@ -1568,7 +1648,7 @@
 def fno_debug_types_section: Flag<["-"], "fno-debug-types-section">, Group<f_Group>,
   Flags<[CC1Option]>;
 def fsplit_dwarf_inlining: Flag <["-"], "fsplit-dwarf-inlining">, Group<f_Group>,
-  Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;
+  Flags<[CC1Option]>, HelpText<"Provide minimal debug info in the object/executable to facilitate online symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF">;
 def fno_split_dwarf_inlining: Flag<["-"], "fno-split-dwarf-inlining">, Group<f_Group>,
   Flags<[CC1Option]>;
 def fdebug_prefix_map_EQ
@@ -1629,6 +1709,11 @@
     HelpText<"DWARF debug sections compression type">;
 def gz_EQ : Joined<["-"], "gz=">, Group<g_flags_Group>,
     HelpText<"DWARF debug sections compression type">;
+def gembed_source : Flag<["-"], "gembed-source">, Group<g_flags_Group>, Flags<[CC1Option]>,
+    HelpText<"Embed source text in DWARF debug sections">;
+def gno_embed_source : Flag<["-"], "gno-embed-source">, Group<g_flags_Group>,
+    Flags<[DriverOption]>,
+    HelpText<"Restore the default behavior of not embedding source text in DWARF debug sections">;
 def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
 def help : Flag<["-", "--"], "help">, Flags<[CC1Option,CC1AsOption]>,
   HelpText<"Display available options">;
@@ -1679,7 +1764,7 @@
   Flags<[CC1Option]>;
 def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group<clang_i_Group>, Flags<[CC1Option]>,
   HelpText<"Overlay the virtual filesystem described by file over the real file system">;
-def i : Joined<["-"], "i">, Group<i_Group>;
+def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>;
 def keep__private__externs : Flag<["-"], "keep_private_externs">;
 def l : JoinedOrSeparate<["-"], "l">, Flags<[LinkerInput, RenderJoined]>,
         Group<Link_Group>;
@@ -1693,8 +1778,6 @@
 def m32 : Flag<["-"], "m32">, Group<m_Group>, Flags<[DriverOption, CoreOption]>;
 def mqdsp6_compat : Flag<["-"], "mqdsp6-compat">, Group<m_Group>, Flags<[DriverOption,CC1Option]>,
   HelpText<"Enable hexagon-qdsp6 backward compatibility">;
-def m3dnowa : Flag<["-"], "m3dnowa">, Group<m_x86_Features_Group>;
-def m3dnow : Flag<["-"], "m3dnow">, Group<m_x86_Features_Group>;
 def m64 : Flag<["-"], "m64">, Group<m_Group>, Flags<[DriverOption, CoreOption]>;
 def mx32 : Flag<["-"], "mx32">, Group<m_Group>, Flags<[DriverOption, CoreOption]>;
 def mabi_EQ : Joined<["-"], "mabi=">, Group<m_Group>;
@@ -1776,18 +1859,18 @@
   HelpText<"Set the stack alignment">;
 def mstack_probe_size : Joined<["-"], "mstack-probe-size=">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Set the stack probe size">;
+def mstack_arg_probe : Flag<["-"], "mstack-arg-probe">, Group<m_Group>,
+  HelpText<"Enable stack probes">;
+def mno_stack_arg_probe : Flag<["-"], "mno-stack-arg-probe">, Group<m_Group>, Flags<[CC1Option]>,
+  HelpText<"Disable stack probes which are enabled by default">;
 def mthread_model : Separate<["-"], "mthread-model">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"The thread model to use, e.g. posix, single (posix by default)">, Values<"posix,single">;
 def meabi : Separate<["-"], "meabi">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Set EABI type, e.g. 4, 5 or gnu (default depends on triple)">, Values<"default,4,5,gnu">;
 
-def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
-def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
-def mno_3dnow : Flag<["-"], "mno-3dnow">, Group<m_x86_Features_Group>;
 def mno_constant_cfstrings : Flag<["-"], "mno-constant-cfstrings">, Group<m_Group>;
 def mno_global_merge : Flag<["-"], "mno-global-merge">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Disable merging of globals">;
-def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
 def mno_pascal_strings : Flag<["-"], "mno-pascal-strings">,
   Alias<fno_pascal_strings>;
 def mno_red_zone : Flag<["-"], "mno-red-zone">, Group<m_Group>;
@@ -1795,65 +1878,6 @@
 def mno_rtd: Flag<["-"], "mno-rtd">, Group<m_Group>;
 def mno_soft_float : Flag<["-"], "mno-soft-float">, Group<m_Group>;
 def mno_stackrealign : Flag<["-"], "mno-stackrealign">, Group<m_Group>;
-def mno_x87 : Flag<["-"], "mno-x87">, Group<m_x86_Features_Group>;
-def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
-def mno_sse2 : Flag<["-"], "mno-sse2">, Group<m_x86_Features_Group>;
-def mno_sse3 : Flag<["-"], "mno-sse3">, Group<m_x86_Features_Group>;
-def mno_sse4a : Flag<["-"], "mno-sse4a">, Group<m_x86_Features_Group>;
-def mno_sse4_1 : Flag<["-"], "mno-sse4.1">, Group<m_x86_Features_Group>;
-def mno_sse4_2 : Flag<["-"], "mno-sse4.2">, Group<m_x86_Features_Group>;
-// -mno-sse4 turns off sse4.1 which has the effect of turning off everything
-// later than 4.1. -msse4 turns on 4.2 which has the effect of turning on
-// everything earlier than 4.2.
-def mno_sse4 : Flag<["-"], "mno-sse4">, Alias<mno_sse4_1>;
-def mno_sse : Flag<["-"], "mno-sse">, Group<m_x86_Features_Group>;
-def mno_ssse3 : Flag<["-"], "mno-ssse3">, Group<m_x86_Features_Group>;
-def mno_aes : Flag<["-"], "mno-aes">, Group<m_x86_Features_Group>;
-def mno_avx : Flag<["-"], "mno-avx">, Group<m_x86_Features_Group>;
-def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
-def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
-def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
-def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
-def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
-def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
-def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
-def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
-def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>;
-def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
-def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
-def mno_pclmul : Flag<["-"], "mno-pclmul">, Group<m_x86_Features_Group>;
-def mno_lzcnt : Flag<["-"], "mno-lzcnt">, Group<m_x86_Features_Group>;
-def mno_rdrnd : Flag<["-"], "mno-rdrnd">, Group<m_x86_Features_Group>;
-def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group<m_x86_Features_Group>;
-def mno_bmi : Flag<["-"], "mno-bmi">, Group<m_x86_Features_Group>;
-def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group<m_x86_Features_Group>;
-def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
-def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
-def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>;
-def mno_fma4 : Flag<["-"], "mno-fma4">, Group<m_x86_Features_Group>;
-def mno_fma : Flag<["-"], "mno-fma">, Group<m_x86_Features_Group>;
-def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
-def mno_f16c : Flag<["-"], "mno-f16c">, Group<m_x86_Features_Group>;
-def mno_rtm : Flag<["-"], "mno-rtm">, Group<m_x86_Features_Group>;
-def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
-def mno_rdseed : Flag<["-"], "mno-rdseed">, Group<m_x86_Features_Group>;
-def mno_adx : Flag<["-"], "mno-adx">, Group<m_x86_Features_Group>;
-def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
-def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
-def mno_fxsr : Flag<["-"], "mno-fxsr">, Group<m_x86_Features_Group>;
-def mno_xsave : Flag<["-"], "mno-xsave">, Group<m_x86_Features_Group>;
-def mno_xsaveopt : Flag<["-"], "mno-xsaveopt">, Group<m_x86_Features_Group>;
-def mno_xsavec : Flag<["-"], "mno-xsavec">, Group<m_x86_Features_Group>;
-def mno_xsaves : Flag<["-"], "mno-xsaves">, Group<m_x86_Features_Group>;
-def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
-def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
-def mno_pku : Flag<["-"], "mno-pku">, Group<m_x86_Features_Group>;
-def mno_clflushopt : Flag<["-"], "mno-clflushopt">, Group<m_x86_Features_Group>;
-def mno_clwb : Flag<["-"], "mno-clwb">, Group<m_x86_Features_Group>;
-def mno_movbe : Flag<["-"], "mno-movbe">, Group<m_x86_Features_Group>;
-def mno_mpx : Flag<["-"], "mno-mpx">, Group<m_x86_Features_Group>;
-def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
-def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
 
 def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
   HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
@@ -1880,7 +1904,6 @@
 
 def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group<m_aarch64_Features_Group>,
   HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">;
-
 def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">,
   Group<m_aarch64_Features_Group>,
   HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">;
@@ -1892,12 +1915,22 @@
 
 def msimd128 : Flag<["-"], "msimd128">, Group<m_wasm_Features_Group>;
 def mno_simd128 : Flag<["-"], "mno-simd128">, Group<m_wasm_Features_Group>;
+def mnontrapping_fptoint : Flag<["-"], "mnontrapping-fptoint">, Group<m_wasm_Features_Group>;
+def mno_nontrapping_fptoint : Flag<["-"], "mno-nontrapping-fptoint">, Group<m_wasm_Features_Group>;
+def msign_ext : Flag<["-"], "msign-ext">, Group<m_wasm_Features_Group>;
+def mno_sign_ext : Flag<["-"], "mno-sign-ext">, Group<m_wasm_Features_Group>;
+def mexception_handing : Flag<["-"], "mexception-handling">, Group<m_wasm_Features_Group>;
+def mno_exception_handing : Flag<["-"], "mno-exception-handling">, Group<m_wasm_Features_Group>;
 
 def mamdgpu_debugger_abi : Joined<["-"], "mamdgpu-debugger-abi=">,
   Flags<[HelpHidden]>,
   Group<m_Group>,
   HelpText<"Generate additional code for specified <version> of debugger ABI (AMDGPU only)">,
   MetaVarName<"<version>">;
+def mxnack : Flag<["-"], "mxnack">, Group<m_amdgpu_Features_Group>,
+  HelpText<"Enable XNACK (AMDGPU only)">;
+def mno_xnack : Flag<["-"], "mno-xnack">, Group<m_amdgpu_Features_Group>,
+  HelpText<"Disable XNACK (AMDGPU only)">;
 
 def faltivec : Flag<["-"], "faltivec">, Group<f_Group>, Flags<[DriverOption]>;
 def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>, Flags<[DriverOption]>;
@@ -1993,68 +2026,14 @@
 def mimplicit_float : Flag<["-"], "mimplicit-float">, Group<m_Group>;
 def mrecip : Flag<["-"], "mrecip">, Group<m_Group>;
 def mrecip_EQ : CommaJoined<["-"], "mrecip=">, Group<m_Group>, Flags<[CC1Option]>;
+def mprefer_vector_width_EQ : Joined<["-"], "mprefer-vector-width=">, Group<m_Group>, Flags<[CC1Option]>,
+  HelpText<"Specifies preferred vector width for auto-vectorization. Defaults to 'none' which allows target specific decisions.">;
 def mpie_copy_relocations : Flag<["-"], "mpie-copy-relocations">, Group<m_Group>,
   Flags<[CC1Option]>,
   HelpText<"Use copy relocations support for PIE builds">;
 def mno_pie_copy_relocations : Flag<["-"], "mno-pie-copy-relocations">, Group<m_Group>;
 def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86 only)">,
   Flags<[CC1Option]>, Group<m_Group>;
-def mx87 : Flag<["-"], "mx87">, Group<m_x86_Features_Group>;
-def m80387 : Flag<["-"], "m80387">, Alias<mx87>;
-def msse2 : Flag<["-"], "msse2">, Group<m_x86_Features_Group>;
-def msse3 : Flag<["-"], "msse3">, Group<m_x86_Features_Group>;
-def msse4a : Flag<["-"], "msse4a">, Group<m_x86_Features_Group>;
-def msse4_1 : Flag<["-"], "msse4.1">, Group<m_x86_Features_Group>;
-def msse4_2 : Flag<["-"], "msse4.2">, Group<m_x86_Features_Group>;
-def msse4 : Flag<["-"], "msse4">, Alias<msse4_2>;
-def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;
-def mssse3 : Flag<["-"], "mssse3">, Group<m_x86_Features_Group>;
-def maes : Flag<["-"], "maes">, Group<m_x86_Features_Group>;
-def mavx : Flag<["-"], "mavx">, Group<m_x86_Features_Group>;
-def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
-def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
-def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
-def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
-def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
-def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
-def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
-def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
-def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>;
-def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
-def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
-def mpclmul : Flag<["-"], "mpclmul">, Group<m_x86_Features_Group>;
-def mlzcnt : Flag<["-"], "mlzcnt">, Group<m_x86_Features_Group>;
-def mrdrnd : Flag<["-"], "mrdrnd">, Group<m_x86_Features_Group>;
-def mfsgsbase : Flag<["-"], "mfsgsbase">, Group<m_x86_Features_Group>;
-def mbmi : Flag<["-"], "mbmi">, Group<m_x86_Features_Group>;
-def mbmi2 : Flag<["-"], "mbmi2">, Group<m_x86_Features_Group>;
-def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
-def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
-def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>;
-def mfma4 : Flag<["-"], "mfma4">, Group<m_x86_Features_Group>;
-def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;
-def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
-def mf16c : Flag<["-"], "mf16c">, Group<m_x86_Features_Group>;
-def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>;
-def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
-def mrdseed : Flag<["-"], "mrdseed">, Group<m_x86_Features_Group>;
-def mpku : Flag<["-"], "mpku">, Group<m_x86_Features_Group>;
-def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
-def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
-def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
-def mfxsr : Flag<["-"], "mfxsr">, Group<m_x86_Features_Group>;
-def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
-def mxsaveopt : Flag<["-"], "mxsaveopt">, Group<m_x86_Features_Group>;
-def mxsavec : Flag<["-"], "mxsavec">, Group<m_x86_Features_Group>;
-def mxsaves : Flag<["-"], "mxsaves">, Group<m_x86_Features_Group>;
-def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
-def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
-def mclflushopt : Flag<["-"], "mclflushopt">, Group<m_x86_Features_Group>;
-def mclwb : Flag<["-"], "mclwb">, Group<m_x86_Features_Group>;
-def mmovbe : Flag<["-"], "mmovbe">, Group<m_x86_Features_Group>;
-def mmpx : Flag<["-"], "mmpx">, Group<m_x86_Features_Group>;
-def msgx : Flag<["-"], "msgx">, Group<m_x86_Features_Group>;
-def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
 def mips16 : Flag<["-"], "mips16">, Group<m_Group>;
 def mno_mips16 : Flag<["-"], "mno-mips16">, Group<m_Group>;
 def mmicromips : Flag<["-"], "mmicromips">, Group<m_Group>;
@@ -2071,6 +2050,9 @@
   IgnoredGCCCompat;
 def mno_branch_likely : Flag<["-"], "mno-branch-likely">, Group<m_Group>,
   IgnoredGCCCompat;
+def mindirect_jump_EQ : Joined<["-"], "mindirect-jump=">,
+  Group<m_Group>,
+  HelpText<"Change indirect jump instructions to inhibit speculation">;
 def mdsp : Flag<["-"], "mdsp">, Group<m_Group>;
 def mno_dsp : Flag<["-"], "mno-dsp">, Group<m_Group>;
 def mdspr2 : Flag<["-"], "mdspr2">, Group<m_Group>;
@@ -2469,7 +2451,8 @@
 def _serialize_diags : Separate<["-", "--"], "serialize-diagnostics">, Flags<[DriverOption]>,
   HelpText<"Serialize compiler diagnostics to a file">;
 // We give --version different semantics from -version.
-def _version : Flag<["--"], "version">,  Flags<[CC1Option]>;
+def _version : Flag<["--"], "version">, Flags<[CoreOption, CC1Option]>,
+  HelpText<"Print version information">;
 def _signed_char : Flag<["--"], "signed-char">, Alias<fsigned_char>;
 def _std : Separate<["--"], "std">, Alias<std_EQ>;
 def _stdlib : Separate<["--"], "stdlib">, Alias<stdlib_EQ>;
@@ -2499,6 +2482,8 @@
            Alias<mcpu_EQ>, AliasArgs<["hexagonv60"]>;
 def mv62 : Flag<["-"], "mv62">, Group<m_hexagon_Features_Group>,
            Alias<mcpu_EQ>, AliasArgs<["hexagonv62"]>;
+def mv65 : Flag<["-"], "mv65">, Group<m_hexagon_Features_Group>,
+           Alias<mcpu_EQ>, AliasArgs<["hexagonv65"]>;
 def mhexagon_hvx : Flag<[ "-" ], "mhvx">,
                    Group<m_hexagon_Features_HVX_Group>,
                    HelpText<"Enable Hexagon Vector eXtensions">;
@@ -2519,6 +2504,156 @@
     : Flag<[ "-" ], "mno-hvx-double">,
       Group<m_hexagon_Features_HVX_Group>,
       HelpText<"Disable Hexagon Double Vector eXtensions">;
+def ffixed_r19: Flag<["-"], "ffixed-r19">,
+  HelpText<"Reserve the r19 register (Hexagon only)">;
+
+
+// X86 feature flags
+def mx87 : Flag<["-"], "mx87">, Group<m_x86_Features_Group>;
+def mno_x87 : Flag<["-"], "mno-x87">, Group<m_x86_Features_Group>;
+def m80387 : Flag<["-"], "m80387">, Alias<mx87>;
+def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
+def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
+def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
+def m3dnow : Flag<["-"], "m3dnow">, Group<m_x86_Features_Group>;
+def mno_3dnow : Flag<["-"], "mno-3dnow">, Group<m_x86_Features_Group>;
+def m3dnowa : Flag<["-"], "m3dnowa">, Group<m_x86_Features_Group>;
+def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
+def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;
+def mno_sse : Flag<["-"], "mno-sse">, Group<m_x86_Features_Group>;
+def msse2 : Flag<["-"], "msse2">, Group<m_x86_Features_Group>;
+def mno_sse2 : Flag<["-"], "mno-sse2">, Group<m_x86_Features_Group>;
+def msse3 : Flag<["-"], "msse3">, Group<m_x86_Features_Group>;
+def mno_sse3 : Flag<["-"], "mno-sse3">, Group<m_x86_Features_Group>;
+def mssse3 : Flag<["-"], "mssse3">, Group<m_x86_Features_Group>;
+def mno_ssse3 : Flag<["-"], "mno-ssse3">, Group<m_x86_Features_Group>;
+def msse4_1 : Flag<["-"], "msse4.1">, Group<m_x86_Features_Group>;
+def mno_sse4_1 : Flag<["-"], "mno-sse4.1">, Group<m_x86_Features_Group>;
+def msse4_2 : Flag<["-"], "msse4.2">, Group<m_x86_Features_Group>;
+def mno_sse4_2 : Flag<["-"], "mno-sse4.2">, Group<m_x86_Features_Group>;
+def msse4 : Flag<["-"], "msse4">, Alias<msse4_2>;
+// -mno-sse4 turns off sse4.1 which has the effect of turning off everything
+// later than 4.1. -msse4 turns on 4.2 which has the effect of turning on
+// everything earlier than 4.2.
+def mno_sse4 : Flag<["-"], "mno-sse4">, Alias<mno_sse4_1>;
+def msse4a : Flag<["-"], "msse4a">, Group<m_x86_Features_Group>;
+def mno_sse4a : Flag<["-"], "mno-sse4a">, Group<m_x86_Features_Group>;
+def mavx : Flag<["-"], "mavx">, Group<m_x86_Features_Group>;
+def mno_avx : Flag<["-"], "mno-avx">, Group<m_x86_Features_Group>;
+def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
+def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
+def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
+def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
+def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
+def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
+def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
+def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
+def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
+def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
+def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
+def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
+def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
+def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
+def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
+def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
+def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
+def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
+def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
+def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
+def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
+def mno_avx512vbmi2 : Flag<["-"], "mno-avx512vbmi2">, Group<m_x86_Features_Group>;
+def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>;
+def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>;
+def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>;
+def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>;
+def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
+def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
+def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
+def mno_adx : Flag<["-"], "mno-adx">, Group<m_x86_Features_Group>;
+def maes : Flag<["-"], "maes">, Group<m_x86_Features_Group>;
+def mno_aes : Flag<["-"], "mno-aes">, Group<m_x86_Features_Group>;
+def mbmi : Flag<["-"], "mbmi">, Group<m_x86_Features_Group>;
+def mno_bmi : Flag<["-"], "mno-bmi">, Group<m_x86_Features_Group>;
+def mbmi2 : Flag<["-"], "mbmi2">, Group<m_x86_Features_Group>;
+def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group<m_x86_Features_Group>;
+def mclflushopt : Flag<["-"], "mclflushopt">, Group<m_x86_Features_Group>;
+def mno_clflushopt : Flag<["-"], "mno-clflushopt">, Group<m_x86_Features_Group>;
+def mclwb : Flag<["-"], "mclwb">, Group<m_x86_Features_Group>;
+def mno_clwb : Flag<["-"], "mno-clwb">, Group<m_x86_Features_Group>;
+def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
+def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
+def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
+def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
+def mf16c : Flag<["-"], "mf16c">, Group<m_x86_Features_Group>;
+def mno_f16c : Flag<["-"], "mno-f16c">, Group<m_x86_Features_Group>;
+def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;
+def mno_fma : Flag<["-"], "mno-fma">, Group<m_x86_Features_Group>;
+def mfma4 : Flag<["-"], "mfma4">, Group<m_x86_Features_Group>;
+def mno_fma4 : Flag<["-"], "mno-fma4">, Group<m_x86_Features_Group>;
+def mfsgsbase : Flag<["-"], "mfsgsbase">, Group<m_x86_Features_Group>;
+def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group<m_x86_Features_Group>;
+def mfxsr : Flag<["-"], "mfxsr">, Group<m_x86_Features_Group>;
+def mno_fxsr : Flag<["-"], "mno-fxsr">, Group<m_x86_Features_Group>;
+def mgfni : Flag<["-"], "mgfni">, Group<m_x86_Features_Group>;
+def mno_gfni : Flag<["-"], "mno-gfni">, Group<m_x86_Features_Group>;
+def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>;
+def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>;
+def mlzcnt : Flag<["-"], "mlzcnt">, Group<m_x86_Features_Group>;
+def mno_lzcnt : Flag<["-"], "mno-lzcnt">, Group<m_x86_Features_Group>;
+def mmovbe : Flag<["-"], "mmovbe">, Group<m_x86_Features_Group>;
+def mno_movbe : Flag<["-"], "mno-movbe">, Group<m_x86_Features_Group>;
+def mmpx : Flag<["-"], "mmpx">, Group<m_x86_Features_Group>;
+def mno_mpx : Flag<["-"], "mno-mpx">, Group<m_x86_Features_Group>;
+def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
+def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
+def mpku : Flag<["-"], "mpku">, Group<m_x86_Features_Group>;
+def mno_pku : Flag<["-"], "mno-pku">, Group<m_x86_Features_Group>;
+def mpclmul : Flag<["-"], "mpclmul">, Group<m_x86_Features_Group>;
+def mno_pclmul : Flag<["-"], "mno-pclmul">, Group<m_x86_Features_Group>;
+def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
+def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
+def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
+def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
+def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
+def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
+def mrdpid : Flag<["-"], "mrdpid">, Group<m_x86_Features_Group>;
+def mno_rdpid : Flag<["-"], "mno-rdpid">, Group<m_x86_Features_Group>;
+def mrdrnd : Flag<["-"], "mrdrnd">, Group<m_x86_Features_Group>;
+def mno_rdrnd : Flag<["-"], "mno-rdrnd">, Group<m_x86_Features_Group>;
+def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>;
+def mno_rtm : Flag<["-"], "mno-rtm">, Group<m_x86_Features_Group>;
+def mrdseed : Flag<["-"], "mrdseed">, Group<m_x86_Features_Group>;
+def mno_rdseed : Flag<["-"], "mno-rdseed">, Group<m_x86_Features_Group>;
+def msahf : Flag<["-"], "msahf">, Group<m_x86_Features_Group>;
+def mno_sahf : Flag<["-"], "mno-sahf">, Group<m_x86_Features_Group>;
+def msgx : Flag<["-"], "msgx">, Group<m_x86_Features_Group>;
+def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
+def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
+def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
+def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
+def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
+def mvaes : Flag<["-"], "mvaes">, Group<m_x86_Features_Group>;
+def mno_vaes : Flag<["-"], "mno-vaes">, Group<m_x86_Features_Group>;
+def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group<m_x86_Features_Group>;
+def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group<m_x86_Features_Group>;
+def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
+def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
+def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
+def mno_xsave : Flag<["-"], "mno-xsave">, Group<m_x86_Features_Group>;
+def mxsavec : Flag<["-"], "mxsavec">, Group<m_x86_Features_Group>;
+def mno_xsavec : Flag<["-"], "mno-xsavec">, Group<m_x86_Features_Group>;
+def mxsaveopt : Flag<["-"], "mxsaveopt">, Group<m_x86_Features_Group>;
+def mno_xsaveopt : Flag<["-"], "mno-xsaveopt">, Group<m_x86_Features_Group>;
+def mxsaves : Flag<["-"], "mxsaves">, Group<m_x86_Features_Group>;
+def mno_xsaves : Flag<["-"], "mno-xsaves">, Group<m_x86_Features_Group>;
+def mshstk : Flag<["-"], "mshstk">, Group<m_x86_Features_Group>;
+def mno_shstk : Flag<["-"], "mno-shstk">, Group<m_x86_Features_Group>;
+def mibt : Flag<["-"], "mibt">, Group<m_x86_Features_Group>;
+def mno_ibt : Flag<["-"], "mno-ibt">, Group<m_x86_Features_Group>;
+def mretpoline : Flag<["-"], "mretpoline">, Group<m_x86_Features_Group>;
+def mno_retpoline : Flag<["-"], "mno-retpoline">, Group<m_x86_Features_Group>;
+def mretpoline_external_thunk : Flag<["-"], "mretpoline-external-thunk">, Group<m_x86_Features_Group>;
+def mno_retpoline_external_thunk : Flag<["-"], "mno-retpoline-external-thunk">, Group<m_x86_Features_Group>;
 
 // These are legacy user-facing driver-level option spellings. They are always
 // aliases for options that are spelled using the more common Unix / GNU flag
diff --git a/include/clang/Driver/SanitizerArgs.h b/include/clang/Driver/SanitizerArgs.h
index 52b6698..d144e48 100644
--- a/include/clang/Driver/SanitizerArgs.h
+++ b/include/clang/Driver/SanitizerArgs.h
@@ -30,8 +30,9 @@
   std::vector<std::string> ExtraDeps;
   int CoverageFeatures = 0;
   int MsanTrackOrigins = 0;
-  bool MsanUseAfterDtor = false;
+  bool MsanUseAfterDtor = true;
   bool CfiCrossDso = false;
+  bool CfiICallGeneralizePointers = false;
   int AsanFieldPadding = 0;
   bool SharedRuntime = false;
   bool AsanUseAfterScope = true;
@@ -54,12 +55,14 @@
   bool needsSharedRt() const { return SharedRuntime; }
 
   bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); }
+  bool needsHwasanRt() const { return Sanitizers.has(SanitizerKind::HWAddress); }
   bool needsTsanRt() const { return Sanitizers.has(SanitizerKind::Thread); }
   bool needsMsanRt() const { return Sanitizers.has(SanitizerKind::Memory); }
   bool needsFuzzer() const { return Sanitizers.has(SanitizerKind::Fuzzer); }
   bool needsLsanRt() const {
     return Sanitizers.has(SanitizerKind::Leak) &&
-           !Sanitizers.has(SanitizerKind::Address);
+           !Sanitizers.has(SanitizerKind::Address) &&
+           !Sanitizers.has(SanitizerKind::HWAddress);
   }
   bool needsUbsanRt() const;
   bool requiresMinimalRuntime() const { return MinimalRuntime; }
@@ -71,6 +74,7 @@
   bool needsEsanRt() const {
     return Sanitizers.hasOneOf(SanitizerKind::Efficiency);
   }
+  bool needsScudoRt() const { return Sanitizers.has(SanitizerKind::Scudo); }
 
   bool requiresPIE() const;
   bool needsUnwindTables() const;
diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h
index 51ff2b2..293fcf0 100644
--- a/include/clang/Driver/ToolChain.h
+++ b/include/clang/Driver/ToolChain.h
@@ -40,6 +40,7 @@
   class Compilation;
   class CudaInstallationDetector;
   class Driver;
+  class InputInfo;
   class JobAction;
   class RegisterEffectiveTriple;
   class SanitizerArgs;
@@ -92,7 +93,7 @@
 
 private:
   const Driver &D;
-  const llvm::Triple Triple;
+  llvm::Triple Triple;
   const llvm::opt::ArgList &Args;
   // We need to initialize CachedRTTIArg before CachedRTTIMode
   const llvm::opt::Arg *const CachedRTTIArg;
@@ -135,6 +136,8 @@
   ToolChain(const Driver &D, const llvm::Triple &T,
             const llvm::opt::ArgList &Args);
 
+  void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
+
   virtual Tool *buildAssembler() const;
   virtual Tool *buildLinker() const;
   virtual Tool *getTool(Action::ActionClass AC) const;
@@ -172,6 +175,11 @@
   /// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu.
   virtual const llvm::Triple *getAuxTriple() const { return nullptr; }
 
+  /// Some toolchains need to modify the file name, for example to replace the
+  /// extension for object files with .cubin for OpenMP offloading to Nvidia
+  /// GPUs.
+  virtual std::string getInputFilename(const InputInfo &Input) const;
+
   llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
   StringRef getArchName() const { return Triple.getArchName(); }
   StringRef getPlatform() const { return Triple.getVendorName(); }
@@ -310,6 +318,9 @@
   /// mixed dispatch method be used?
   virtual bool UseObjCMixedDispatch() const { return false; }
 
+  /// \brief Check whether to enable x86 relax relocations by default.
+  virtual bool useRelaxRelocations() const;
+
   /// GetDefaultStackProtectorLevel - Get the default stack protector level for
   /// this tool chain (0=off, 1=on, 2=strong, 3=all).
   virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const {
@@ -344,6 +355,9 @@
   // as OpenMP) to find arch-specific libraries.
   std::string getArchSpecificLibPath() const;
 
+  // Returns <OSname> part of above.
+  StringRef getOSLibName() const;
+
   /// needsProfileRT - returns true if instrumentation profile is on.
   static bool needsProfileRT(const llvm::opt::ArgList &Args);
 
@@ -366,9 +380,6 @@
   /// SupportsProfiling - Does this tool chain support -pg.
   virtual bool SupportsProfiling() const { return true; }
 
-  /// Does this tool chain support Objective-C garbage collection.
-  virtual bool SupportsObjCGC() const { return true; }
-
   /// Complain if this tool chain doesn't support Objective-C ARC.
   virtual void CheckObjCARC() const {}
 
@@ -391,10 +402,9 @@
     return llvm::DebuggerKind::GDB;
   }
 
-  /// UseSjLjExceptions - Does this tool chain use SjLj exceptions.
-  virtual bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const {
-    return false;
-  }
+  /// GetExceptionModel - Return the tool chain exception model.
+  virtual llvm::ExceptionHandling
+  GetExceptionModel(const llvm::opt::ArgList &Args) const;
 
   /// SupportsEmbeddedBitcode - Does this tool chain support embedded bitcode.
   virtual bool SupportsEmbeddedBitcode() const {
diff --git a/include/clang/Driver/XRayArgs.h b/include/clang/Driver/XRayArgs.h
index 83210d1..e5b7616 100644
--- a/include/clang/Driver/XRayArgs.h
+++ b/include/clang/Driver/XRayArgs.h
@@ -24,6 +24,7 @@
   std::vector<std::string> ExtraDeps;
   bool XRayInstrument = false;
   int InstructionThreshold = 200;
+  bool XRayAlwaysEmitCustomEvents = false;
 
 public:
   /// Parses the XRay arguments from an argument list.
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index 302ced3..0a8b6cf 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -390,6 +390,17 @@
   /// \endcode
   bool BinPackParameters;
 
+  /// \brief The style of wrapping parameters on the same line (bin-packed) or
+  /// on one line each.
+  enum BinPackStyle {
+    /// Automatically determine parameter bin-packing behavior.
+    BPS_Auto,
+    /// Always bin-pack parameters.
+    BPS_Always,
+    /// Never bin-pack parameters.
+    BPS_Never,
+  };
+
   /// \brief The style of breaking before or after binary operators.
   enum BinaryOperatorStyle {
     /// Break after operators.
@@ -651,7 +662,9 @@
     ///   }
     /// \endcode
     bool AfterNamespace;
-    /// \brief Wrap ObjC definitions (``@autoreleasepool``, interfaces, ..).
+    /// \brief Wrap ObjC definitions (interfaces, implementations...).
+    /// \note @autoreleasepool and @synchronized blocks are wrapped
+    /// according to `AfterControlStatement` flag.
     bool AfterObjCDeclaration;
     /// \brief Wrap struct definitions.
     /// \code
@@ -985,6 +998,40 @@
   /// For example: BOOST_FOREACH.
   std::vector<std::string> ForEachMacros;
 
+  /// \brief Styles for sorting multiple ``#include`` blocks.
+  enum IncludeBlocksStyle {
+    /// \brief Sort each ``#include`` block separately.
+    /// \code
+    ///    #include "b.h"               into      #include "b.h"
+    ///
+    ///    #include <lib/main.h>                  #include "a.h"
+    ///    #include "a.h"                         #include <lib/main.h>
+    /// \endcode
+    IBS_Preserve,
+    /// \brief Merge multiple ``#include`` blocks together and sort as one.
+    /// \code
+    ///    #include "b.h"               into      #include "a.h"
+    ///                                           #include "b.h"
+    ///    #include <lib/main.h>                  #include <lib/main.h>
+    ///    #include "a.h"
+    /// \endcode
+    IBS_Merge,
+    /// \brief Merge multiple ``#include`` blocks together and sort as one.
+    /// Then split into groups based on category priority. See
+    /// ``IncludeCategories``.
+    /// \code
+    ///    #include "b.h"               into      #include "a.h"
+    ///                                           #include "b.h"
+    ///    #include <lib/main.h>
+    ///    #include "a.h"                         #include <lib/main.h>
+    /// \endcode
+    IBS_Regroup,
+  };
+
+  /// \brief Dependent on the value, multiple ``#include`` blocks can be sorted
+  /// as one and divided based on category.
+  IncludeBlocksStyle IncludeBlocks;
+
   /// \brief See documentation of ``IncludeCategories``.
   struct IncludeCategory {
     /// \brief The regular expression that this category matches.
@@ -1265,6 +1312,38 @@
   /// \brief The indentation used for namespaces.
   NamespaceIndentationKind NamespaceIndentation;
 
+  /// \brief Controls bin-packing Objective-C protocol conformance list
+  /// items into as few lines as possible when they go over ``ColumnLimit``.
+  ///
+  /// If ``Auto`` (the default), delegates to the value in
+  /// ``BinPackParameters``. If that is ``true``, bin-packs Objective-C
+  /// protocol conformance list items into as few lines as possible
+  /// whenever they go over ``ColumnLimit``.
+  ///
+  /// If ``Always``, always bin-packs Objective-C protocol conformance
+  /// list items into as few lines as possible whenever they go over
+  /// ``ColumnLimit``.
+  ///
+  /// If ``Never``, lays out Objective-C protocol conformance list items
+  /// onto individual lines whenever they go over ``ColumnLimit``.
+  ///
+  /// \code
+  ///    Always (or Auto, if BinPackParameters=true):
+  ///    @interface ccccccccccccc () <
+  ///        ccccccccccccc, ccccccccccccc,
+  ///        ccccccccccccc, ccccccccccccc> {
+  ///    }
+  ///
+  ///    Never (or Auto, if BinPackParameters=false):
+  ///    @interface ddddddddddddd () <
+  ///        ddddddddddddd,
+  ///        ddddddddddddd,
+  ///        ddddddddddddd,
+  ///        ddddddddddddd> {
+  ///    }
+  /// \endcode
+  BinPackStyle ObjCBinPackProtocolList;
+
   /// \brief The number of characters to use for indentation of ObjC blocks.
   /// \code{.objc}
   ///    ObjCBlockIndentWidth: 4
@@ -1327,6 +1406,65 @@
   /// \brief Pointer and reference alignment style.
   PointerAlignmentStyle PointerAlignment;
 
+  /// See documentation of ``RawStringFormats``.
+  struct RawStringFormat {
+    /// \brief The language of this raw string.
+    LanguageKind Language;
+    /// \brief A list of raw string delimiters that match this language.
+    std::vector<std::string> Delimiters;
+    /// \brief A list of enclosing function names that match this language.
+    std::vector<std::string> EnclosingFunctions;
+    /// \brief The canonical delimiter for this language.
+    std::string CanonicalDelimiter;
+    /// \brief The style name on which this raw string format is based on.
+    /// If not specified, the raw string format is based on the style that this
+    /// format is based on.
+    std::string BasedOnStyle;
+    bool operator==(const RawStringFormat &Other) const {
+      return Language == Other.Language && Delimiters == Other.Delimiters &&
+             EnclosingFunctions == Other.EnclosingFunctions &&
+             CanonicalDelimiter == Other.CanonicalDelimiter &&
+             BasedOnStyle == Other.BasedOnStyle;
+    }
+  };
+
+  /// \brief Defines hints for detecting supported languages code blocks in raw
+  /// strings.
+  ///
+  /// A raw string with a matching delimiter or a matching enclosing function
+  /// name will be reformatted assuming the specified language based on the
+  /// style for that language defined in the .clang-format file. If no style has
+  /// been defined in the .clang-format file for the specific language, a
+  /// predefined style given by 'BasedOnStyle' is used. If 'BasedOnStyle' is not
+  /// found, the formatting is based on llvm style. A matching delimiter takes
+  /// precedence over a matching enclosing function name for determining the
+  /// language of the raw string contents.
+  ///
+  /// If a canonical delimiter is specified, occurrences of other delimiters for
+  /// the same language will be updated to the canonical if possible.
+  ///
+  /// There should be at most one specification per language and each delimiter
+  /// and enclosing function should not occur in multiple specifications.
+  ///
+  /// To configure this in the .clang-format file, use:
+  /// \code{.yaml}
+  ///   RawStringFormats:
+  ///     - Language: TextProto
+  ///         Delimiters:
+  ///           - 'pb'
+  ///           - 'proto'
+  ///         EnclosingFunctions:
+  ///           - 'PARSE_TEXT_PROTO'
+  ///         BasedOnStyle: google
+  ///     - Language: Cpp
+  ///         Delimiters:
+  ///           - 'cc'
+  ///           - 'cpp'
+  ///         BasedOnStyle: llvm
+  ///         CanonicalDelimiter: 'cc'
+  /// \endcode
+  std::vector<RawStringFormat> RawStringFormats;
+
   /// \brief If ``true``, clang-format will attempt to re-flow comments.
   /// \code
   ///    false:
@@ -1350,6 +1488,14 @@
   bool SortIncludes;
 
   /// \brief If ``true``, clang-format will sort using declarations.
+  ///
+  /// The order of using declarations is defined as follows:
+  /// Split the strings by "::" and discard any initial empty strings. The last
+  /// element of each list is a non-namespace name; all others are namespace
+  /// names. Sort the lists of names lexicographically, where the sort order of
+  /// individual names is that all non-namespace names come before all namespace
+  /// names, and within those groups, names are in case-insensitive
+  /// lexicographic order.
   /// \code
   ///    false:                                 true:
   ///    using std::cout;               vs.     using std::cin;
@@ -1360,7 +1506,7 @@
   /// \brief If ``true``, a space is inserted after C style casts.
   /// \code
   ///    true:                                  false:
-  ///    (int)i;                        vs.     (int) i;
+  ///    (int) i;                       vs.     (int)i;
   /// \endcode
   bool SpaceAfterCStyleCast;
 
@@ -1379,6 +1525,21 @@
   /// \endcode
   bool SpaceBeforeAssignmentOperators;
 
+  /// \brief If ``false``, spaces will be removed before constructor initializer
+  /// colon.
+  /// \code
+  ///    true:                                  false:
+  ///    Foo::Foo() : a(a) {}                   Foo::Foo(): a(a) {}
+  /// \endcode
+  bool SpaceBeforeCtorInitializerColon;
+
+  /// \brief If ``false``, spaces will be removed before inheritance colon.
+  /// \code
+  ///    true:                                  false:
+  ///    class Foo : Bar {}             vs.     class Foo: Bar {}
+  /// \endcode
+  bool SpaceBeforeInheritanceColon;
+
   /// \brief Different ways to put a space before opening parentheses.
   enum SpaceBeforeParensOptions {
     /// Never put a space before opening parentheses.
@@ -1417,6 +1578,14 @@
   /// \brief Defines in which cases to put a space before opening parentheses.
   SpaceBeforeParensOptions SpaceBeforeParens;
 
+  /// \brief If ``false``, spaces will be removed before range-based for loop
+  /// colon.
+  /// \code
+  ///    true:                                  false:
+  ///    for (auto v : values) {}       vs.     for(auto v: values) {}
+  /// \endcode
+  bool SpaceBeforeRangeBasedForLoopColon;
+
   /// \brief If ``true``, spaces may be inserted into ``()``.
   /// \code
   ///    true:                                false:
@@ -1566,6 +1735,7 @@
                R.ExperimentalAutoDetectBinPacking &&
            FixNamespaceComments == R.FixNamespaceComments &&
            ForEachMacros == R.ForEachMacros &&
+           IncludeBlocks == R.IncludeBlocks &&
            IncludeCategories == R.IncludeCategories &&
            IndentCaseLabels == R.IndentCaseLabels &&
            IndentPPDirectives == R.IndentPPDirectives &&
@@ -1579,6 +1749,7 @@
            MacroBlockEnd == R.MacroBlockEnd &&
            MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            NamespaceIndentation == R.NamespaceIndentation &&
+           ObjCBinPackProtocolList == R.ObjCBinPackProtocolList &&
            ObjCBlockIndentWidth == R.ObjCBlockIndentWidth &&
            ObjCSpaceAfterProperty == R.ObjCSpaceAfterProperty &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&
@@ -1592,10 +1763,16 @@
            PenaltyExcessCharacter == R.PenaltyExcessCharacter &&
            PenaltyReturnTypeOnItsOwnLine == R.PenaltyReturnTypeOnItsOwnLine &&
            PointerAlignment == R.PointerAlignment &&
+           RawStringFormats == R.RawStringFormats &&
            SpaceAfterCStyleCast == R.SpaceAfterCStyleCast &&
            SpaceAfterTemplateKeyword == R.SpaceAfterTemplateKeyword &&
            SpaceBeforeAssignmentOperators == R.SpaceBeforeAssignmentOperators &&
+           SpaceBeforeCtorInitializerColon ==
+               R.SpaceBeforeCtorInitializerColon &&
+           SpaceBeforeInheritanceColon == R.SpaceBeforeInheritanceColon &&
            SpaceBeforeParens == R.SpaceBeforeParens &&
+           SpaceBeforeRangeBasedForLoopColon ==
+               R.SpaceBeforeRangeBasedForLoopColon &&
            SpaceInEmptyParentheses == R.SpaceInEmptyParentheses &&
            SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments &&
            SpacesInAngles == R.SpacesInAngles &&
@@ -1606,6 +1783,43 @@
            Standard == R.Standard && TabWidth == R.TabWidth &&
            UseTab == R.UseTab;
   }
+
+  llvm::Optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
+
+  // Stores per-language styles. A FormatStyle instance inside has an empty
+  // StyleSet. A FormatStyle instance returned by the Get method has its
+  // StyleSet set to a copy of the originating StyleSet, effectively keeping the
+  // internal representation of that StyleSet alive.
+  //
+  // The memory management and ownership reminds of a birds nest: chicks
+  // leaving the nest take photos of the nest with them.
+  struct FormatStyleSet {
+    typedef std::map<FormatStyle::LanguageKind, FormatStyle> MapType;
+
+    llvm::Optional<FormatStyle> Get(FormatStyle::LanguageKind Language) const;
+
+    // Adds \p Style to this FormatStyleSet. Style must not have an associated
+    // FormatStyleSet.
+    // Style.Language should be different than LK_None. If this FormatStyleSet
+    // already contains an entry for Style.Language, that gets replaced with the
+    // passed Style.
+    void Add(FormatStyle Style);
+
+    // Clears this FormatStyleSet.
+    void Clear();
+
+  private:
+    std::shared_ptr<MapType> Styles;
+  };
+
+  static FormatStyleSet BuildStyleSetFromConfiguration(
+      const FormatStyle &MainStyle,
+      const std::vector<FormatStyle> &ConfigurationStyles);
+
+private:
+  FormatStyleSet StyleSet;
+
+  friend std::error_code parseConfiguration(StringRef Text, FormatStyle *Style);
 };
 
 /// \brief Returns a format style complying with the LLVM coding standards:
@@ -1651,6 +1865,8 @@
 /// Style->Language is used to get the base style, if the ``BasedOnStyle``
 /// option is present.
 ///
+/// The FormatStyleSet of Style is reset.
+///
 /// When ``BasedOnStyle`` is not present, options not present in the YAML
 /// document, are retained in \p Style.
 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style);
@@ -1795,6 +2011,10 @@
                                      StringRef Code = "",
                                      vfs::FileSystem *FS = nullptr);
 
+// \brief Guesses the language from the ``FileName`` and ``Code`` to be formatted.
+// Defaults to FormatStyle::LK_Cpp.
+FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code);
+
 // \brief Returns a string representation of ``Language``.
 inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
   switch (Language) {
diff --git a/include/clang/Frontend/CodeGenOptions.def b/include/clang/Frontend/CodeGenOptions.def
index 8cac8e4..edf266f 100644
--- a/include/clang/Frontend/CodeGenOptions.def
+++ b/include/clang/Frontend/CodeGenOptions.def
@@ -38,6 +38,7 @@
 CODEGENOPT(Autolink          , 1, 1) ///< -fno-autolink
 CODEGENOPT(ObjCAutoRefCountExceptions , 1, 0) ///< Whether ARC should be EH-safe.
 CODEGENOPT(Backchain         , 1, 0) ///< -mbackchain
+CODEGENOPT(ControlFlowGuard  , 1, 0) ///< -cfguard
 CODEGENOPT(CoverageExtraChecksum, 1, 0) ///< Whether we need a second checksum for functions in GCNO files.
 CODEGENOPT(CoverageNoFunctionNamesInData, 1, 0) ///< Do not include function names in GCDA files.
 CODEGENOPT(CoverageExitBlockBeforeBody, 1, 0) ///< Whether to emit the exit block before the body blocks in GCNO files.
@@ -61,6 +62,8 @@
                                    ///< pass manager.
 CODEGENOPT(DisableRedZone    , 1, 0) ///< Set when -mno-red-zone is enabled.
 CODEGENOPT(DisableTailCalls  , 1, 0) ///< Do not emit tail calls.
+CODEGENOPT(NoEscapingBlockTailCalls, 1, 0) ///< Do not emit tail calls from
+                                           ///< escaping blocks.
 CODEGENOPT(EmitDeclMetadata  , 1, 0) ///< Emit special metadata indicating what
                                      ///< Decl* various IR entities came from.
                                      ///< Only useful when running CodeGen as a
@@ -68,7 +71,8 @@
 CODEGENOPT(EmitGcovArcs      , 1, 0) ///< Emit coverage data files, aka. GCDA.
 CODEGENOPT(EmitGcovNotes     , 1, 0) ///< Emit coverage "notes" files, aka GCNO.
 CODEGENOPT(EmitOpenCLArgMetadata , 1, 0) ///< Emit OpenCL kernel arg metadata.
-CODEGENOPT(EmulatedTLS       , 1, 0) ///< Set when -femulated-tls is enabled.
+CODEGENOPT(EmulatedTLS       , 1, 0) ///< Set by default or -f[no-]emulated-tls.
+CODEGENOPT(ExplicitEmulatedTLS , 1, 0) ///< Set if -f[no-]emulated-tls is used.
 /// \brief Embed Bitcode mode (off/all/bitcode/marker).
 ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off)
 CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables
@@ -76,9 +80,20 @@
 CODEGENOPT(FunctionSections  , 1, 0) ///< Set when -ffunction-sections is enabled.
 CODEGENOPT(InstrumentFunctions , 1, 0) ///< Set when -finstrument-functions is
                                        ///< enabled.
-
+CODEGENOPT(InstrumentFunctionsAfterInlining , 1, 0) ///< Set when
+                          ///< -finstrument-functions-after-inlining is enabled.
+CODEGENOPT(InstrumentFunctionEntryBare , 1, 0) ///< Set when
+                               ///< -finstrument-function-entry-bare is enabled.
+CODEGENOPT(CFProtectionReturn , 1, 0) ///< if -fcf-protection is
+                                      ///< set to full or return.
+CODEGENOPT(CFProtectionBranch , 1, 0) ///< if -fcf-protection is
+                                      ///< set to full or branch.
 CODEGENOPT(XRayInstrumentFunctions , 1, 0) ///< Set when -fxray-instrument is
                                            ///< enabled.
+CODEGENOPT(StackSizeSection  , 1, 0) ///< Set when -fstack-size-section is enabled.
+
+///< Set when -fxray-always-emit-customevents is enabled.
+CODEGENOPT(XRayAlwaysEmitCustomEvents , 1, 0)
 
 ///< Set the minimum number of instructions in a function to determine selective
 ///< XRay instrumentation.
@@ -110,11 +125,13 @@
 CODEGENOPT(NoImplicitFloat   , 1, 0) ///< Set when -mno-implicit-float is enabled.
 CODEGENOPT(NoInfsFPMath      , 1, 0) ///< Assume FP arguments, results not +-Inf.
 CODEGENOPT(NoSignedZeros     , 1, 0) ///< Allow ignoring the signedness of FP zero
+CODEGENOPT(Reassociate       , 1, 0) ///< Allow reassociation of FP math ops
 CODEGENOPT(ReciprocalMath    , 1, 0) ///< Allow FP divisions to be reassociated.
 CODEGENOPT(NoTrappingMath    , 1, 0) ///< Set when -fno-trapping-math is enabled.
 CODEGENOPT(NoNaNsFPMath      , 1, 0) ///< Assume FP arguments, results not NaN.
 CODEGENOPT(FlushDenorm       , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
 CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(UniformWGSize     , 1, 0) ///< -cl-uniform-work-group-size
 CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
 /// \brief Method of Objective-C dispatch to use.
 ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
@@ -142,9 +159,13 @@
 CODEGENOPT(RelaxAll          , 1, 0) ///< Relax all machine code instructions.
 CODEGENOPT(RelaxedAliasing   , 1, 0) ///< Set when -fno-strict-aliasing is enabled.
 CODEGENOPT(StructPathTBAA    , 1, 0) ///< Whether or not to use struct-path TBAA.
+CODEGENOPT(NewStructPathTBAA , 1, 0) ///< Whether or not to use enhanced struct-path TBAA.
 CODEGENOPT(SaveTempLabels    , 1, 0) ///< Save temporary labels.
 CODEGENOPT(SanitizeAddressUseAfterScope , 1, 0) ///< Enable use-after-scope detection
                                                 ///< in AddressSanitizer
+CODEGENOPT(SanitizeAddressPoisonClassMemberArrayNewCookie, 1,
+           0) ///< Enable poisoning operator new[] which is not a replaceable
+              ///< global allocation function in AddressSanitizer
 CODEGENOPT(SanitizeAddressGlobalsDeadStripping, 1, 0) ///< Enable linker dead stripping
                                                       ///< of globals in AddressSanitizer
 CODEGENOPT(SanitizeMemoryTrackOrigins, 2, 0) ///< Enable tracking origins in
@@ -154,6 +175,8 @@
 CODEGENOPT(SanitizeCfiCrossDso, 1, 0) ///< Enable cross-dso support in CFI.
 CODEGENOPT(SanitizeMinimalRuntime, 1, 0) ///< Use "_minimal" sanitizer runtime for
                                          ///< diagnostics.
+CODEGENOPT(SanitizeCfiICallGeneralizePointers, 1, 0) ///< Generalize pointer types in
+                                                     ///< CFI icall function signatures
 CODEGENOPT(SanitizeCoverageType, 2, 0) ///< Type of sanitizer coverage
                                        ///< instrumentation.
 CODEGENOPT(SanitizeCoverageIndirectCalls, 1, 0) ///< Enable sanitizer coverage
@@ -207,6 +230,7 @@
                                             ///< alignment, if not 0.
 VALUE_CODEGENOPT(StackProbeSize    , 32, 4096) ///< Overrides default stack
                                                ///< probe size, even if 0.
+CODEGENOPT(NoStackArgProbe, 1, 0) ///< Set when -mno-stack-arg-probe is used
 CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information
                                   ///< in debug info.
 
@@ -295,6 +319,11 @@
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 CODEGENOPT(GnuPubnames, 1, 0)
 
+CODEGENOPT(NoPLT, 1, 0)
+
+/// Whether to embed source in DWARF debug line section.
+CODEGENOPT(EmbedSource, 1, 0)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
diff --git a/include/clang/Frontend/CodeGenOptions.h b/include/clang/Frontend/CodeGenOptions.h
index 71730a2..dd2e9bd 100644
--- a/include/clang/Frontend/CodeGenOptions.h
+++ b/include/clang/Frontend/CodeGenOptions.h
@@ -16,6 +16,7 @@
 
 #include "clang/Basic/DebugInfoOptions.h"
 #include "clang/Basic/Sanitizers.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetOptions.h"
 #include <map>
@@ -167,7 +168,7 @@
   std::string SplitDwarfFile;
 
   /// The name of the relocation model to use.
-  std::string RelocationModel;
+  llvm::Reloc::Model RelocationModel;
 
   /// The thread model to use
   std::string ThreadModel;
@@ -204,10 +205,9 @@
   /// the summary and module symbol table (and not, e.g. any debug metadata).
   std::string ThinLinkBitcodeFile;
 
-  /// A list of file names passed with -fcuda-include-gpubinary options to
-  /// forward to CUDA runtime back-end for incorporating them into host-side
-  /// object file.
-  std::vector<std::string> CudaGpuBinaryFileNames;
+  /// Name of file passed with -fcuda-include-gpubinary option to forward to
+  /// CUDA runtime back-end for incorporating them into host-side object file.
+  std::string CudaGpuBinaryFileName;
 
   /// The name of the file to which the backend should save YAML optimization
   /// records.
@@ -251,6 +251,13 @@
   /// \brief A list of all -fno-builtin-* function names (e.g., memset).
   std::vector<std::string> NoBuiltinFuncs;
 
+  std::vector<std::string> Reciprocals;
+
+  /// The preferred width for auto-vectorization transforms. This is intended to
+  /// override default transforms based on the width of the architected vector
+  /// registers.
+  std::string PreferVectorWidth;
+
 public:
   // Define accessors/mutators for code generation options of enumeration type.
 #define CODEGENOPT(Name, Bits, Default)
diff --git a/include/clang/Frontend/CompilerInstance.h b/include/clang/Frontend/CompilerInstance.h
index 90a9501..bd0170d 100644
--- a/include/clang/Frontend/CompilerInstance.h
+++ b/include/clang/Frontend/CompilerInstance.h
@@ -183,6 +183,9 @@
   /// The list of active output files.
   std::list<OutputFile> OutputFiles;
 
+  /// Force an output buffer.
+  std::unique_ptr<llvm::raw_pwrite_stream> OutputStream;
+
   CompilerInstance(const CompilerInstance &) = delete;
   void operator=(const CompilerInstance &) = delete;
 public:
@@ -773,6 +776,14 @@
 
   /// }
 
+  void setOutputStream(std::unique_ptr<llvm::raw_pwrite_stream> OutStream) {
+    OutputStream = std::move(OutStream);
+  }
+
+  std::unique_ptr<llvm::raw_pwrite_stream> takeOutputStream() {
+    return std::move(OutputStream);
+  }
+
   // Create module manager.
   void createModuleManager();
 
diff --git a/include/clang/Frontend/FrontendActions.h b/include/clang/Frontend/FrontendActions.h
index c45aeaa..df0b7a6 100644
--- a/include/clang/Frontend/FrontendActions.h
+++ b/include/clang/Frontend/FrontendActions.h
@@ -86,10 +86,15 @@
   /// \brief Compute the AST consumer arguments that will be used to
   /// create the PCHGenerator instance returned by CreateASTConsumer.
   ///
-  /// \returns true if an error occurred, false otherwise.
-  static std::unique_ptr<raw_pwrite_stream>
-  ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
-                              std::string &Sysroot, std::string &OutputFile);
+  /// \returns false if an error occurred, true otherwise.
+  static bool ComputeASTConsumerArguments(CompilerInstance &CI,
+                                          std::string &Sysroot);
+
+  /// \brief Creates file to write the PCH into and returns a stream to write it
+  /// into. On error, returns null.
+  static std::unique_ptr<llvm::raw_pwrite_stream>
+  CreateOutputFile(CompilerInstance &CI, StringRef InFile,
+                   std::string &OutputFile);
 
   bool BeginSourceFileAction(CompilerInstance &CI) override;
 };
@@ -162,6 +167,14 @@
   bool hasCodeCompletionSupport() const override { return false; }
 };
 
+class TemplightDumpAction : public ASTFrontendAction {
+protected:
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override;
+
+  void ExecuteAction() override;
+};
+
 /**
  * \brief Frontend action adaptor that merges ASTs together.
  *
diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h
index 5192a37..c609500 100644
--- a/include/clang/Frontend/FrontendOptions.h
+++ b/include/clang/Frontend/FrontendOptions.h
@@ -58,6 +58,7 @@
     RewriteObjC,            ///< ObjC->C Rewriter.
     RewriteTest,            ///< Rewriter playground
     RunAnalysis,            ///< Run one or more source code analyses.
+    TemplightDump,          ///< Dump template instantiations
     MigrateSource,          ///< Run migrator.
     RunPreprocessorOnly     ///< Just lex, no output.
   };
diff --git a/include/clang/Frontend/LangStandard.h b/include/clang/Frontend/LangStandard.h
index 6731e08..83e452d 100644
--- a/include/clang/Frontend/LangStandard.h
+++ b/include/clang/Frontend/LangStandard.h
@@ -22,16 +22,17 @@
   LineComment = (1 << 0),
   C99 = (1 << 1),
   C11 = (1 << 2),
-  CPlusPlus = (1 << 3),
-  CPlusPlus11 = (1 << 4),
-  CPlusPlus14 = (1 << 5),
-  CPlusPlus1z = (1 << 6),
-  CPlusPlus2a = (1 << 7),
-  Digraphs = (1 << 8),
-  GNUMode = (1 << 9),
-  HexFloat = (1 << 10),
-  ImplicitInt = (1 << 11),
-  OpenCL = (1 << 12)
+  C17 = (1 << 3),
+  CPlusPlus = (1 << 4),
+  CPlusPlus11 = (1 << 5),
+  CPlusPlus14 = (1 << 6),
+  CPlusPlus17 = (1 << 7),
+  CPlusPlus2a = (1 << 8),
+  Digraphs = (1 << 9),
+  GNUMode = (1 << 10),
+  HexFloat = (1 << 11),
+  ImplicitInt = (1 << 12),
+  OpenCL = (1 << 13)
 };
 
 }
@@ -70,6 +71,9 @@
   /// isC11 - Language is a superset of C11.
   bool isC11() const { return Flags & frontend::C11; }
 
+  /// isC17 - Language is a superset of C17.
+  bool isC17() const { return Flags & frontend::C17; }
+
   /// isCPlusPlus - Language is a C++ variant.
   bool isCPlusPlus() const { return Flags & frontend::CPlusPlus; }
 
@@ -79,8 +83,8 @@
   /// isCPlusPlus14 - Language is a C++14 variant (or later).
   bool isCPlusPlus14() const { return Flags & frontend::CPlusPlus14; }
 
-  /// isCPlusPlus1z - Language is a C++17 variant (or later).
-  bool isCPlusPlus1z() const { return Flags & frontend::CPlusPlus1z; }
+  /// isCPlusPlus17 - Language is a C++17 variant (or later).
+  bool isCPlusPlus17() const { return Flags & frontend::CPlusPlus17; }
 
   /// isCPlusPlus2a - Language is a post-C++17 variant (or later).
   bool isCPlusPlus2a() const { return Flags & frontend::CPlusPlus2a; }
diff --git a/include/clang/Frontend/LangStandards.def b/include/clang/Frontend/LangStandards.def
index a019d63..e7a081d 100644
--- a/include/clang/Frontend/LangStandards.def
+++ b/include/clang/Frontend/LangStandards.def
@@ -77,6 +77,15 @@
              LineComment | C99 | C11 | Digraphs | GNUMode | HexFloat)
 LANGSTANDARD_ALIAS_DEPR(gnu11, "gnu1x")
 
+// C17 modes
+LANGSTANDARD(c17, "c17",
+             C, "ISO C 2017",
+             LineComment | C99 | C11 | C17 | Digraphs | HexFloat)
+LANGSTANDARD_ALIAS(c17, "iso9899:2017")
+LANGSTANDARD(gnu17, "gnu17",
+             C, "ISO C 2017 with GNU extensions",
+             LineComment | C99 | C11 | C17 | Digraphs | GNUMode | HexFloat)
+
 // C++ modes
 LANGSTANDARD(cxx98, "c++98",
              CXX, "ISO C++ 1998 with amendments",
@@ -111,24 +120,24 @@
 
 LANGSTANDARD(cxx17, "c++17",
              CXX, "ISO C++ 2017 with amendments",
-             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
              Digraphs | HexFloat)
 LANGSTANDARD_ALIAS_DEPR(cxx17, "c++1z")
 
 LANGSTANDARD(gnucxx17, "gnu++17",
              CXX, "ISO C++ 2017 with amendments and GNU extensions",
-             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
              Digraphs | HexFloat | GNUMode)
 LANGSTANDARD_ALIAS_DEPR(gnucxx17, "gnu++1z")
 
 LANGSTANDARD(cxx2a, "c++2a",
              CXX, "Working draft for ISO C++ 2020",
-             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
              CPlusPlus2a | Digraphs | HexFloat)
 
 LANGSTANDARD(gnucxx2a, "gnu++2a",
              CXX, "Working draft for ISO C++ 2020 with GNU extensions",
-             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z |
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
              CPlusPlus2a | Digraphs | HexFloat | GNUMode)
 
 // OpenCL
diff --git a/include/clang/Frontend/PrecompiledPreamble.h b/include/clang/Frontend/PrecompiledPreamble.h
index 6b0b626..f1898f5 100644
--- a/include/clang/Frontend/PrecompiledPreamble.h
+++ b/include/clang/Frontend/PrecompiledPreamble.h
@@ -17,7 +17,9 @@
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/MD5.h"
+#include <cstddef>
 #include <memory>
 #include <system_error>
 #include <type_traits>
@@ -47,7 +49,7 @@
 /// reuse the PCH for the subsequent runs. Use BuildPreamble to create PCH and
 /// CanReusePreamble + AddImplicitPreamble to make use of it.
 class PrecompiledPreamble {
-  class TempPCHFile;
+  class PCHStorage;
   struct PreambleFileHash;
 
 public:
@@ -70,6 +72,9 @@
   ///
   /// \param PCHContainerOps An instance of PCHContainerOperations.
   ///
+  /// \param StoreInMemory Store PCH in memory. If false, PCH will be stored in
+  /// a temporary file.
+  ///
   /// \param Callbacks A set of callbacks to be executed when building
   /// the preamble.
   static llvm::ErrorOr<PrecompiledPreamble>
@@ -77,7 +82,7 @@
         const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
         DiagnosticsEngine &Diagnostics, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
         std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-        PreambleCallbacks &Callbacks);
+        bool StoreInMemory, PreambleCallbacks &Callbacks);
 
   PrecompiledPreamble(PrecompiledPreamble &&) = default;
   PrecompiledPreamble &operator=(PrecompiledPreamble &&) = default;
@@ -85,8 +90,10 @@
   /// PreambleBounds used to build the preamble.
   PreambleBounds getBounds() const;
 
-  /// The temporary file path at which the preamble PCH was placed.
-  StringRef GetPCHPath() const { return PCHFile.getFilePath(); }
+  /// Returns the size, in bytes, that preamble takes on disk or in memory.
+  /// For on-disk preambles returns 0 if filesystem operations fail. Intended to
+  /// be used for logging and debugging purposes only.
+  std::size_t getSize() const;
 
   /// Check whether PrecompiledPreamble can be reused for the new contents(\p
   /// MainFileBuffer) of the main file.
@@ -95,12 +102,26 @@
                 vfs::FileSystem *VFS) const;
 
   /// Changes options inside \p CI to use PCH from this preamble. Also remaps
-  /// main file to \p MainFileBuffer.
+  /// main file to \p MainFileBuffer and updates \p VFS to ensure the preamble
+  /// is accessible.
+  /// Requires that CanReuse() is true.
+  /// For in-memory preambles, PrecompiledPreamble instance continues to own the
+  /// MemoryBuffer with the Preamble after this method returns. The caller is
+  /// reponsible for making sure the PrecompiledPreamble instance outlives the
+  /// compiler run and the AST that will be using the PCH.
   void AddImplicitPreamble(CompilerInvocation &CI,
+                           IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
                            llvm::MemoryBuffer *MainFileBuffer) const;
 
+  /// Configure \p CI to use this preamble.
+  /// Like AddImplicitPreamble, but doesn't assume CanReuse() is true.
+  /// If this preamble does not match the file, it may parse differently.
+  void OverridePreamble(CompilerInvocation &CI,
+                        IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
+                        llvm::MemoryBuffer *MainFileBuffer) const;
+
 private:
-  PrecompiledPreamble(TempPCHFile PCHFile, std::vector<char> PreambleBytes,
+  PrecompiledPreamble(PCHStorage Storage, std::vector<char> PreambleBytes,
                       bool PreambleEndsAtStartOfLine,
                       llvm::StringMap<PreambleFileHash> FilesInPreamble);
 
@@ -142,6 +163,44 @@
     llvm::Optional<std::string> FilePath;
   };
 
+  class InMemoryPreamble {
+  public:
+    std::string Data;
+  };
+
+  class PCHStorage {
+  public:
+    enum class Kind { Empty, InMemory, TempFile };
+
+    PCHStorage() = default;
+    PCHStorage(TempPCHFile File);
+    PCHStorage(InMemoryPreamble Memory);
+
+    PCHStorage(const PCHStorage &) = delete;
+    PCHStorage &operator=(const PCHStorage &) = delete;
+
+    PCHStorage(PCHStorage &&Other);
+    PCHStorage &operator=(PCHStorage &&Other);
+
+    ~PCHStorage();
+
+    Kind getKind() const;
+
+    TempPCHFile &asFile();
+    const TempPCHFile &asFile() const;
+
+    InMemoryPreamble &asMemory();
+    const InMemoryPreamble &asMemory() const;
+
+  private:
+    void destroy();
+    void setEmpty();
+
+  private:
+    Kind StorageKind = Kind::Empty;
+    llvm::AlignedCharArrayUnion<TempPCHFile, InMemoryPreamble> Storage = {};
+  };
+
   /// Data used to determine if a file used in the preamble has been changed.
   struct PreambleFileHash {
     /// All files have size set.
@@ -171,8 +230,21 @@
     }
   };
 
-  /// Manages the lifetime of temporary file that stores a PCH.
-  TempPCHFile PCHFile;
+  /// Helper function to set up PCH for the preamble into \p CI and \p VFS to
+  /// with the specified \p Bounds.
+  void configurePreamble(PreambleBounds Bounds, CompilerInvocation &CI,
+                         IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
+                         llvm::MemoryBuffer *MainFileBuffer) const;
+
+  /// Sets up the PreprocessorOptions and changes VFS, so that PCH stored in \p
+  /// Storage is accessible to clang. This method is an implementation detail of
+  /// AddImplicitPreamble.
+  static void setupPreambleStorage(const PCHStorage &Storage,
+                                   PreprocessorOptions &PreprocessorOpts,
+                                   IntrusiveRefCntPtr<vfs::FileSystem> &VFS);
+
+  /// Manages the memory buffer or temporary file that stores the PCH.
+  PCHStorage Storage;
   /// Keeps track of the files that were used when computing the
   /// preamble, with both their buffer size and their modification time.
   ///
@@ -192,6 +264,11 @@
 public:
   virtual ~PreambleCallbacks() = default;
 
+  /// Called before FrontendAction::BeginSourceFile.
+  /// Can be used to store references to various CompilerInstance fields
+  /// (e.g. SourceManager) that may be interesting to the consumers of other
+  /// callbacks.
+  virtual void BeforeExecute(CompilerInstance &CI);
   /// Called after FrontendAction::Execute(), but before
   /// FrontendAction::EndSourceFile(). Can be used to transfer ownership of
   /// various CompilerInstance fields before they are destroyed.
@@ -203,11 +280,9 @@
   /// NOTE: To allow more flexibility a custom ASTConsumer could probably be
   /// used instead, but having only this method allows a simpler API.
   virtual void HandleTopLevelDecl(DeclGroupRef DG);
-  /// Called for each macro defined in the Preamble.
-  /// NOTE: To allow more flexibility a custom PPCallbacks could probably be
-  /// used instead, but having only this method allows a simpler API.
-  virtual void HandleMacroDefined(const Token &MacroNameTok,
-                                  const MacroDirective *MD);
+  /// Creates wrapper class for PPCallbacks so we can also process information
+  /// about includes that are inside of a preamble
+  virtual std::unique_ptr<PPCallbacks> createPPCallbacks();
 };
 
 enum class BuildPreambleError {
diff --git a/include/clang/Frontend/SerializedDiagnosticPrinter.h b/include/clang/Frontend/SerializedDiagnosticPrinter.h
index 4c57e9d..36ef014 100644
--- a/include/clang/Frontend/SerializedDiagnosticPrinter.h
+++ b/include/clang/Frontend/SerializedDiagnosticPrinter.h
@@ -29,7 +29,7 @@
 ///  a bitcode file.
 ///
 /// The created DiagnosticConsumer is designed for quick and lightweight
-/// transfer of of diagnostics to the enclosing build system (e.g., an IDE).
+/// transfer of diagnostics to the enclosing build system (e.g., an IDE).
 /// This allows wrapper tools for Clang to get diagnostics from Clang
 /// (via libclang) without needing to parse Clang's command line output.
 ///
diff --git a/include/clang/Frontend/TextDiagnosticBuffer.h b/include/clang/Frontend/TextDiagnosticBuffer.h
index 3bcf824..23f168e 100644
--- a/include/clang/Frontend/TextDiagnosticBuffer.h
+++ b/include/clang/Frontend/TextDiagnosticBuffer.h
@@ -29,6 +29,11 @@
   typedef DiagList::const_iterator const_iterator;
 private:
   DiagList Errors, Warnings, Remarks, Notes;
+  /// All - All diagnostics in the order in which they were generated.  That
+  /// order likely doesn't correspond to user input order, but it at least
+  /// keeps notes in the right places.  Each pair in the vector is a diagnostic
+  /// level and an index into the corresponding DiagList above.
+  std::vector<std::pair<DiagnosticsEngine::Level, size_t>> All;
 public:
   const_iterator err_begin() const  { return Errors.begin(); }
   const_iterator err_end() const    { return Errors.end(); }
diff --git a/include/clang/FrontendTool/Utils.h b/include/clang/FrontendTool/Utils.h
index 031ee7d..2e6b7b5 100644
--- a/include/clang/FrontendTool/Utils.h
+++ b/include/clang/FrontendTool/Utils.h
@@ -15,9 +15,18 @@
 #ifndef LLVM_CLANG_FRONTENDTOOL_UTILS_H
 #define LLVM_CLANG_FRONTENDTOOL_UTILS_H
 
+#include <memory>
+
 namespace clang {
 
 class CompilerInstance;
+class FrontendAction;
+
+/// Construct the FrontendAction of a compiler invocation based on the
+/// options specified for the compiler invocation.
+///
+/// \return - The created FrontendAction object
+std::unique_ptr<FrontendAction> CreateFrontendAction(CompilerInstance &CI);
 
 /// ExecuteCompilerInvocation - Execute the given actions described by the
 /// compiler invocation object in the given compiler instance.
diff --git a/include/clang/Index/IndexDataConsumer.h b/include/clang/Index/IndexDataConsumer.h
index 3154370..080f4cb 100644
--- a/include/clang/Index/IndexDataConsumer.h
+++ b/include/clang/Index/IndexDataConsumer.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_INDEX_INDEXDATACONSUMER_H
 
 #include "clang/Index/IndexSymbol.h"
+#include "clang/Lex/Preprocessor.h"
 
 namespace clang {
   class ASTContext;
@@ -36,6 +37,8 @@
 
   virtual void initialize(ASTContext &Ctx) {}
 
+  virtual void setPreprocessor(std::shared_ptr<Preprocessor> PP) {}
+
   /// \returns true to continue indexing, or false to abort.
   virtual bool handleDeclOccurence(const Decl *D, SymbolRoleSet Roles,
                                    ArrayRef<SymbolRelation> Relations,
diff --git a/include/clang/Index/IndexSymbol.h b/include/clang/Index/IndexSymbol.h
index ae59136..3355dd2 100644
--- a/include/clang/Index/IndexSymbol.h
+++ b/include/clang/Index/IndexSymbol.h
@@ -56,7 +56,7 @@
   Using,
 };
 
-enum class SymbolLanguage {
+enum class SymbolLanguage : uint8_t {
   C,
   ObjC,
   CXX,
@@ -64,7 +64,7 @@
 };
 
 /// Language specific sub-kinds.
-enum class SymbolSubKind {
+enum class SymbolSubKind : uint8_t {
   None,
   CXXCopyConstructor,
   CXXMoveConstructor,
@@ -74,8 +74,9 @@
   UsingValue,
 };
 
+typedef uint8_t SymbolPropertySet;
 /// Set of properties that provide additional info about a symbol.
-enum class SymbolProperty : uint8_t {
+enum class SymbolProperty : SymbolPropertySet {
   Generic                       = 1 << 0,
   TemplatePartialSpecialization = 1 << 1,
   TemplateSpecialization        = 1 << 2,
@@ -86,9 +87,10 @@
   Local                         = 1 << 7,
 };
 static const unsigned SymbolPropertyBitNum = 8;
-typedef unsigned SymbolPropertySet;
 
 /// Set of roles that are attributed to symbol occurrences.
+///
+/// Low 9 bits of clang-c/include/Index.h CXSymbolRole mirrors this enum.
 enum class SymbolRole : uint32_t {
   Declaration = 1 << 0,
   Definition  = 1 << 1,
@@ -127,8 +129,8 @@
 struct SymbolInfo {
   SymbolKind Kind;
   SymbolSubKind SubKind;
-  SymbolPropertySet Properties;
   SymbolLanguage Lang;
+  SymbolPropertySet Properties;
 };
 
 SymbolInfo getSymbolInfo(const Decl *D);
diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h
index 126bbdf..4e8d9cb 100644
--- a/include/clang/Lex/HeaderSearch.h
+++ b/include/clang/Lex/HeaderSearch.h
@@ -1,4 +1,4 @@
-//===--- HeaderSearch.h - Resolve Header File Locations ---------*- C++ -*-===//
+//===- HeaderSearch.h - Resolve Header File Locations -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,25 +14,37 @@
 #ifndef LLVM_CLANG_LEX_HEADERSEARCH_H
 #define LLVM_CLANG_LEX_HEADERSEARCH_H
 
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/ModuleMap.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <cstddef>
 #include <memory>
+#include <string>
+#include <utility>
 #include <vector>
 
 namespace clang {
-  
-class DiagnosticsEngine;  
+
+class DiagnosticsEngine;
+class DirectoryEntry;
 class ExternalPreprocessorSource;
 class FileEntry;
 class FileManager;
+class HeaderMap;
 class HeaderSearchOptions;
 class IdentifierInfo;
+class LangOptions;
+class Module;
 class Preprocessor;
+class TargetInfo;
 
 /// \brief The preprocessor keeps track of this information for each
 /// file that is \#included.
@@ -76,14 +88,14 @@
   unsigned IsValid : 1;
   
   /// \brief The number of times the file has been included already.
-  unsigned short NumIncludes;
+  unsigned short NumIncludes = 0;
 
   /// \brief The ID number of the controlling macro.
   ///
   /// This ID number will be non-zero when there is a controlling
   /// macro whose IdentifierInfo may not yet have been loaded from
   /// external storage.
-  unsigned ControllingMacroID;
+  unsigned ControllingMacroID = 0;
 
   /// If this file has a \#ifndef XXX (or equivalent) guard that
   /// protects the entire contents of the file, this is the identifier
@@ -93,17 +105,16 @@
   /// the controlling macro of this header, since
   /// getControllingMacro() is able to load a controlling macro from
   /// external storage.
-  const IdentifierInfo *ControllingMacro;
+  const IdentifierInfo *ControllingMacro = nullptr;
 
   /// \brief If this header came from a framework include, this is the name
   /// of the framework.
   StringRef Framework;
   
   HeaderFileInfo()
-    : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User), 
-      External(false), isModuleHeader(false), isCompilingModuleHeader(false),
-      Resolved(false), IndexHeaderMapHeader(false), IsValid(0),
-      NumIncludes(0), ControllingMacroID(0), ControllingMacro(nullptr)  {}
+      : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User), 
+        External(false), isModuleHeader(false), isCompilingModuleHeader(false),
+        Resolved(false), IndexHeaderMapHeader(false), IsValid(false)  {}
 
   /// \brief Retrieve the controlling macro for this header file, if
   /// any.
@@ -135,6 +146,8 @@
 /// \brief Encapsulates the information needed to find the file referenced
 /// by a \#include or \#include_next, (sub-)framework lookup, etc.
 class HeaderSearch {
+  friend class DirectoryLookup;
+
   /// This structure is used to record entries in our framework cache.
   struct FrameworkCacheEntry {
     /// The directory entry which should be used for the cached framework.
@@ -151,6 +164,7 @@
 
   DiagnosticsEngine &Diags;
   FileManager &FileMgr;
+
   /// \#include search path information.  Requests for \#include "x" search the
   /// directory of the \#including file first, then each directory in SearchDirs
   /// consecutively. Requests for <x> search the current dir first, then each
@@ -158,9 +172,9 @@
   /// NoCurDirSearch is true, then the check for the file in the current
   /// directory is suppressed.
   std::vector<DirectoryLookup> SearchDirs;
-  unsigned AngledDirIdx;
-  unsigned SystemDirIdx;
-  bool NoCurDirSearch;
+  unsigned AngledDirIdx = 0;
+  unsigned SystemDirIdx = 0;
+  bool NoCurDirSearch = false;
 
   /// \brief \#include prefixes for which the 'system header' property is
   /// overridden.
@@ -168,7 +182,7 @@
   /// For a \#include "x" or \#include \<x> directive, the last string in this
   /// list which is a prefix of 'x' determines whether the file is treated as
   /// a system header.
-  std::vector<std::pair<std::string, bool> > SystemHeaderPrefixes;
+  std::vector<std::pair<std::string, bool>> SystemHeaderPrefixes;
 
   /// \brief The path to the module cache.
   std::string ModuleCachePath;
@@ -182,15 +196,17 @@
     /// Starting index in SearchDirs that the cached search was performed from.
     /// If there is a hit and this value doesn't match the current query, the
     /// cache has to be ignored.
-    unsigned StartIdx;
+    unsigned StartIdx = 0;
+
     /// The entry in SearchDirs that satisfied the query.
-    unsigned HitIdx;
+    unsigned HitIdx = 0;
+
     /// This is non-null if the original filename was mapped to a framework
     /// include via a headermap.
-    const char *MappedName;
+    const char *MappedName = nullptr;
 
     /// Default constructor -- Initialize all members with zero.
-    LookupFileCacheInfo(): StartIdx(0), HitIdx(0), MappedName(nullptr) {}
+    LookupFileCacheInfo() = default;
 
     void reset(unsigned StartIdx) {
       this->StartIdx = StartIdx;
@@ -206,13 +222,13 @@
   /// IncludeAliases - maps include file names (including the quotes or
   /// angle brackets) to other include file names.  This is used to support the
   /// include_alias pragma for Microsoft compatibility.
-  typedef llvm::StringMap<std::string, llvm::BumpPtrAllocator>
-    IncludeAliasMap;
+  using IncludeAliasMap =
+      llvm::StringMap<std::string, llvm::BumpPtrAllocator>;
   std::unique_ptr<IncludeAliasMap> IncludeAliases;
 
   /// HeaderMaps - This is a mapping from FileEntry -> HeaderMap, uniquing
   /// headermaps.  This vector owns the headermap.
-  std::vector<std::pair<const FileEntry*, const HeaderMap*> > HeaderMaps;
+  std::vector<std::pair<const FileEntry *, const HeaderMap *>> HeaderMaps;
 
   /// \brief The mapping between modules and headers.
   mutable ModuleMap ModMap;
@@ -231,26 +247,23 @@
   /// \brief Entity used to resolve the identifier IDs of controlling
   /// macros into IdentifierInfo pointers, and keep the identifire up to date,
   /// as needed.
-  ExternalPreprocessorSource *ExternalLookup;
+  ExternalPreprocessorSource *ExternalLookup = nullptr;
 
   /// \brief Entity used to look up stored header file information.
-  ExternalHeaderFileInfoSource *ExternalSource;
+  ExternalHeaderFileInfoSource *ExternalSource = nullptr;
   
   // Various statistics we track for performance analysis.
-  unsigned NumIncluded;
-  unsigned NumMultiIncludeFileOptzn;
-  unsigned NumFrameworkLookups, NumSubFrameworkLookups;
+  unsigned NumIncluded = 0;
+  unsigned NumMultiIncludeFileOptzn = 0;
+  unsigned NumFrameworkLookups = 0;
+  unsigned NumSubFrameworkLookups = 0;
 
-  // HeaderSearch doesn't support default or copy construction.
-  HeaderSearch(const HeaderSearch&) = delete;
-  void operator=(const HeaderSearch&) = delete;
-
-  friend class DirectoryLookup;
-  
 public:
   HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
                SourceManager &SourceMgr, DiagnosticsEngine &Diags,
                const LangOptions &LangOpts, const TargetInfo *Target);
+  HeaderSearch(const HeaderSearch &) = delete;
+  HeaderSearch &operator=(const HeaderSearch &) = delete;
   ~HeaderSearch();
 
   /// \brief Retrieve the header-search options with which this header search
@@ -282,7 +295,7 @@
   }
 
   /// \brief Set the list of system header prefixes.
-  void SetSystemHeaderPrefixes(ArrayRef<std::pair<std::string, bool> > P) {
+  void SetSystemHeaderPrefixes(ArrayRef<std::pair<std::string, bool>> P) {
     SystemHeaderPrefixes.assign(P.begin(), P.end());
   }
 
@@ -310,7 +323,7 @@
     IncludeAliasMap::const_iterator Iter = IncludeAliases->find(Source);
     if (Iter != IncludeAliases->end())
       return Iter->second;
-    return StringRef();
+    return {};
   }
 
   /// \brief Set the path to the module cache.
@@ -403,7 +416,7 @@
     return FrameworkMap[FWName];
   }
 
-  /// \brief Mark the specified file as a target of of a \#include,
+  /// \brief Mark the specified file as a target of a \#include,
   /// \#include_next, or \#import directive.
   ///
   /// \return false if \#including the file will have no effect or true
@@ -493,7 +506,6 @@
   std::string getPrebuiltModuleFileName(StringRef ModuleName,
                                         bool FileMapOnly = false);
 
-
   /// \brief Retrieve the name of the (to-be-)cached module file that should
   /// be used to load a module with the given name.
   ///
@@ -572,7 +584,6 @@
   void loadTopLevelSystemModules();
 
 private:
-
   /// \brief Lookup a module with the given module name and search-name.
   ///
   /// \param ModuleName The name of the module we're looking for.
@@ -652,7 +663,8 @@
                                             bool WantExternal = true) const;
 
   // Used by external tools
-  typedef std::vector<DirectoryLookup>::const_iterator search_dir_iterator;
+  using search_dir_iterator = std::vector<DirectoryLookup>::const_iterator;
+
   search_dir_iterator search_dir_begin() const { return SearchDirs.begin(); }
   search_dir_iterator search_dir_end() const { return SearchDirs.end(); }
   unsigned search_dir_size() const { return SearchDirs.size(); }
@@ -660,6 +672,7 @@
   search_dir_iterator quoted_dir_begin() const {
     return SearchDirs.begin();
   }
+
   search_dir_iterator quoted_dir_end() const {
     return SearchDirs.begin() + AngledDirIdx;
   }
@@ -667,6 +680,7 @@
   search_dir_iterator angled_dir_begin() const {
     return SearchDirs.begin() + AngledDirIdx;
   }
+
   search_dir_iterator angled_dir_end() const {
     return SearchDirs.begin() + SystemDirIdx;
   }
@@ -674,11 +688,12 @@
   search_dir_iterator system_dir_begin() const {
     return SearchDirs.begin() + SystemDirIdx;
   }
+
   search_dir_iterator system_dir_end() const { return SearchDirs.end(); }
 
   /// \brief Retrieve a uniqued framework name.
   StringRef getUniqueFrameworkName(StringRef Framework);
-  
+
   /// \brief Suggest a path by which the specified file could be found, for
   /// use in diagnostics to suggest a #include.
   ///
@@ -687,6 +702,15 @@
   std::string suggestPathToFileForDiagnostics(const FileEntry *File,
                                               bool *IsSystem = nullptr);
 
+  /// \brief Suggest a path by which the specified file could be found, for
+  /// use in diagnostics to suggest a #include.
+  ///
+  /// \param WorkingDir If non-empty, this will be prepended to search directory
+  /// paths that are relative.
+  std::string suggestPathToFileForDiagnostics(llvm::StringRef File,
+                                              llvm::StringRef WorkingDir,
+                                              bool *IsSystem = nullptr);
+
   void PrintStats();
   
   size_t getTotalMemory() const;
@@ -696,10 +720,13 @@
   enum LoadModuleMapResult {
     /// \brief The module map file had already been loaded.
     LMM_AlreadyLoaded,
+
     /// \brief The module map file was loaded by this invocation.
     LMM_NewlyLoaded,
+
     /// \brief There is was directory with the given name.
     LMM_NoDirectory,
+
     /// \brief There was either no module map file or the module map file was
     /// invalid.
     LMM_InvalidModuleMap
@@ -735,6 +762,6 @@
                                         bool IsSystem, bool IsFramework);
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_HEADERSEARCH_H
diff --git a/include/clang/Lex/HeaderSearchOptions.h b/include/clang/Lex/HeaderSearchOptions.h
index 356f500..937ad98 100644
--- a/include/clang/Lex/HeaderSearchOptions.h
+++ b/include/clang/Lex/HeaderSearchOptions.h
@@ -1,4 +1,4 @@
-//===--- HeaderSearchOptions.h ----------------------------------*- C++ -*-===//
+//===- HeaderSearchOptions.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,9 @@
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/CachedHashString.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
+#include <cstdint> 
 #include <string>
 #include <vector>
 #include <map>
@@ -22,26 +22,45 @@
 namespace clang {
 
 namespace frontend {
-  /// IncludeDirGroup - Identifies the group an include Entry belongs to,
-  /// representing its relative positive in the search list.
-  /// \#include directives whose paths are enclosed by string quotes ("")
-  /// start searching at the Quoted group (specified by '-iquote'),
-  /// then search the Angled group, then the System group, etc.
-  enum IncludeDirGroup {
-    Quoted = 0,     ///< '\#include ""' paths, added by 'gcc -iquote'.
-    Angled,         ///< Paths for '\#include <>' added by '-I'.
-    IndexHeaderMap, ///< Like Angled, but marks header maps used when
-                       ///  building frameworks.
-    System,         ///< Like Angled, but marks system directories.
-    ExternCSystem,  ///< Like System, but headers are implicitly wrapped in
-                    ///  extern "C".
-    CSystem,        ///< Like System, but only used for C.
-    CXXSystem,      ///< Like System, but only used for C++.
-    ObjCSystem,     ///< Like System, but only used for ObjC.
-    ObjCXXSystem,   ///< Like System, but only used for ObjC++.
-    After           ///< Like System, but searched after the system directories.
-  };
-}
+
+/// IncludeDirGroup - Identifies the group an include Entry belongs to,
+/// representing its relative positive in the search list.
+/// \#include directives whose paths are enclosed by string quotes ("")
+/// start searching at the Quoted group (specified by '-iquote'),
+/// then search the Angled group, then the System group, etc.
+enum IncludeDirGroup {
+  /// '\#include ""' paths, added by 'gcc -iquote'.
+  Quoted = 0,
+
+  /// Paths for '\#include <>' added by '-I'.
+  Angled,
+
+  /// Like Angled, but marks header maps used when building frameworks.
+  IndexHeaderMap, 
+
+  /// Like Angled, but marks system directories.
+  System,
+
+  /// Like System, but headers are implicitly wrapped in extern "C".
+  ExternCSystem,
+
+  /// Like System, but only used for C.
+  CSystem,
+
+  /// Like System, but only used for C++.
+  CXXSystem,
+
+  /// Like System, but only used for ObjC.
+  ObjCSystem,
+
+  /// Like System, but only used for ObjC++.
+  ObjCXXSystem,
+
+  /// Like System, but searched after the system directories.
+  After
+};
+
+} // namespace frontend
 
 /// HeaderSearchOptions - Helper class for storing options related to the
 /// initialization of the HeaderSearch object.
@@ -59,8 +78,8 @@
 
     Entry(StringRef path, frontend::IncludeDirGroup group, bool isFramework,
           bool ignoreSysRoot)
-      : Path(path), Group(group), IsFramework(isFramework),
-        IgnoreSysRoot(ignoreSysRoot) {}
+        : Path(path), Group(group), IsFramework(isFramework),
+          IgnoreSysRoot(ignoreSysRoot) {}
   };
 
   struct SystemHeaderPrefix {
@@ -72,7 +91,7 @@
     bool IsSystemHeader;
 
     SystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader)
-      : Prefix(Prefix), IsSystemHeader(IsSystemHeader) {}
+        : Prefix(Prefix), IsSystemHeader(IsSystemHeader) {}
   };
 
   /// If non-empty, the directory to use as a "virtual system root" for include
@@ -130,7 +149,7 @@
   /// files.
   ///
   /// The default value is large, e.g., the operation runs once a week.
-  unsigned ModuleCachePruneInterval;
+  unsigned ModuleCachePruneInterval = 7 * 24 * 60 * 60;
 
   /// \brief The time (in seconds) after which an unused module file will be
   /// considered unused and will, therefore, be pruned.
@@ -139,13 +158,13 @@
   /// accessed in this many seconds will be removed. The default value is
   /// large, e.g., a month, to avoid forcing infrequently-used modules to be
   /// regenerated often.
-  unsigned ModuleCachePruneAfter;
+  unsigned ModuleCachePruneAfter = 31 * 24 * 60 * 60;
 
   /// \brief The time in seconds when the build session started.
   ///
   /// This time is used by other optimizations in header search and module
   /// loading.
-  uint64_t BuildSessionTimestamp;
+  uint64_t BuildSessionTimestamp = 0;
 
   /// \brief The set of macro names that should be ignored for the purposes
   /// of computing the module hash.
@@ -185,10 +204,8 @@
   unsigned ModulesHashContent : 1;
 
   HeaderSearchOptions(StringRef _Sysroot = "/")
-      : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(0),
-        ImplicitModuleMaps(0), ModuleMapFileHomeIsCwd(0),
-        ModuleCachePruneInterval(7 * 24 * 60 * 60),
-        ModuleCachePruneAfter(31 * 24 * 60 * 60), BuildSessionTimestamp(0),
+      : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false),
+        ImplicitModuleMaps(false), ModuleMapFileHomeIsCwd(false),
         UseBuiltinIncludes(true), UseStandardSystemIncludes(true),
         UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false),
         ModulesValidateOncePerBuildSession(false),
@@ -217,6 +234,6 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 603ce10..d588496 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -1,4 +1,4 @@
-//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===//
+//===- Lexer.h - C Language Family Lexer ------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,25 +15,39 @@
 #define LLVM_CLANG_LEX_LEXER_H
 
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include <cassert>
+#include <cstdint>
 #include <string>
 
+namespace llvm {
+
+class MemoryBuffer;
+
+} // namespace llvm
+
 namespace clang {
-class DiagnosticsEngine;
-class SourceManager;
-class Preprocessor;
+
 class DiagnosticBuilder;
+class Preprocessor;
+class SourceManager;
 
 /// ConflictMarkerKind - Kinds of conflict marker which the lexer might be
 /// recovering from.
 enum ConflictMarkerKind {
   /// Not within a conflict marker.
   CMK_None,
+
   /// A normal or diff3 conflict marker, initiated by at least 7 "<"s,
   /// separated by at least 7 "="s or "|"s, and terminated by at least 7 ">"s.
   CMK_Normal,
+
   /// A Perforce-style conflict marker, initiated by 4 ">"s,
   /// separated by 4 "="s, and terminated by 4 "<"s.
   CMK_Perforce
@@ -43,17 +57,17 @@
 /// PreprocessorOptions::PrecompiledPreambleBytes.
 /// The preamble includes the BOM, if any.
 struct PreambleBounds {
-  PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
-    : Size(Size),
-      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
-
   /// \brief Size of the preamble in bytes.
   unsigned Size;
+
   /// \brief Whether the preamble ends at the start of a new line.
   ///
   /// Used to inform the lexer as to whether it's starting at the beginning of
   /// a line after skipping the preamble.
   bool PreambleEndsAtStartOfLine;
+
+  PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
+      : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
 };
 
 /// Lexer - This provides a simple interface that turns a text buffer into a
@@ -61,16 +75,28 @@
 /// or buffering/seeking of tokens, only forward lexing is supported.  It relies
 /// on the specified Preprocessor object to handle preprocessor directives, etc.
 class Lexer : public PreprocessorLexer {
+  friend class Preprocessor;
+
   void anchor() override;
 
   //===--------------------------------------------------------------------===//
   // Constant configuration values for this lexer.
-  const char *BufferStart;       // Start of the buffer.
-  const char *BufferEnd;         // End of the buffer.
-  SourceLocation FileLoc;        // Location for start of file.
-  LangOptions LangOpts;          // LangOpts enabled by this language (cache).
-  bool Is_PragmaLexer;           // True if lexer for _Pragma handling.
-  
+
+  // Start of the buffer.
+  const char *BufferStart;
+
+  // End of the buffer.
+  const char *BufferEnd;
+
+  // Location for start of file.
+  SourceLocation FileLoc;
+
+  // LangOpts enabled by this language (cache).
+  LangOptions LangOpts;
+
+  // True if lexer for _Pragma handling.
+  bool Is_PragmaLexer;
+
   //===--------------------------------------------------------------------===//
   // Context-specific lexing flags set by the preprocessor.
   //
@@ -106,13 +132,9 @@
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
-  Lexer(const Lexer &) = delete;
-  void operator=(const Lexer &) = delete;
-  friend class Preprocessor;
-
   void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
-public:
 
+public:
   /// Lexer constructor - Create a new lexer object for the specified buffer
   /// with the specified preprocessor managing the lexing process.  This lexer
   /// assumes that the associated file buffer and Preprocessor objects will
@@ -131,6 +153,9 @@
   Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer,
         const SourceManager &SM, const LangOptions &LangOpts);
 
+  Lexer(const Lexer &) = delete;
+  Lexer &operator=(const Lexer &) = delete;
+
   /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
   /// _Pragma expansion.  This has a variety of magic semantics that this method
   /// sets up.  It returns a new'd Lexer that must be delete'd when done.
@@ -139,7 +164,6 @@
                                    SourceLocation ExpansionLocEnd,
                                    unsigned TokLen, Preprocessor &PP);
 
-
   /// getLangOpts - Return the language features currently enabled.
   /// NOTE: this lexer modifies features as a file is parsed!
   const LangOptions &getLangOpts() const { return LangOpts; }
@@ -241,17 +265,16 @@
 
   /// \brief Return the current location in the buffer.
   const char *getBufferLocation() const { return BufferPtr; }
-  
-  /// Stringify - Convert the specified string into a C string by escaping '\'
-  /// and " characters.  This does not add surrounding ""'s to the string.
+
+  /// Stringify - Convert the specified string into a C string by i) escaping
+  /// '\\' and " characters and ii) replacing newline character(s) with "\\n".
   /// If Charify is true, this escapes the ' character instead of ".
   static std::string Stringify(StringRef Str, bool Charify = false);
 
-  /// Stringify - Convert the specified string into a C string by escaping '\'
-  /// and " characters.  This does not add surrounding ""'s to the string.
+  /// Stringify - Convert the specified string into a C string by i) escaping
+  /// '\\' and " characters and ii) replacing newline character(s) with "\\n".
   static void Stringify(SmallVectorImpl<char> &Str);
 
-  
   /// getSpelling - This method is used to get the spelling of a token into a
   /// preallocated buffer, instead of as an std::string.  The caller is required
   /// to allocate enough space for the token, which is guaranteed to be at least
@@ -262,11 +285,11 @@
   /// to point to a constant buffer with the data already in it (avoiding a
   /// copy).  The caller is not allowed to modify the returned buffer pointer
   /// if an internal buffer is returned.
-  static unsigned getSpelling(const Token &Tok, const char *&Buffer, 
+  static unsigned getSpelling(const Token &Tok, const char *&Buffer,
                               const SourceManager &SourceMgr,
                               const LangOptions &LangOpts,
                               bool *Invalid = nullptr);
-  
+
   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
   /// token is the characters used to represent the token in the source file
   /// after trigraph expansion and escaped-newline folding.  In particular, this
@@ -274,7 +297,7 @@
   /// UCNs, etc.
   static std::string getSpelling(const Token &Tok,
                                  const SourceManager &SourceMgr,
-                                 const LangOptions &LangOpts, 
+                                 const LangOptions &LangOpts,
                                  bool *Invalid = nullptr);
 
   /// getSpelling - This method is used to get the spelling of the
@@ -290,7 +313,7 @@
                                const SourceManager &SourceMgr,
                                const LangOptions &LangOpts,
                                bool *invalid = nullptr);
-  
+
   /// MeasureTokenLength - Relex the token at the specified location and return
   /// its length in bytes in the input file.  If the token needs cleaning (e.g.
   /// includes a trigraph or an escaped newline) then this count includes bytes
@@ -312,7 +335,7 @@
   static SourceLocation GetBeginningOfToken(SourceLocation Loc,
                                             const SourceManager &SM,
                                             const LangOptions &LangOpts);
-  
+
   /// AdvanceToTokenCharacter - If the current SourceLocation specifies a
   /// location at the start of a token, return a new location that specifies a
   /// character within the token.  This handles trigraphs and escaped newlines.
@@ -320,7 +343,7 @@
                                                 unsigned Character,
                                                 const SourceManager &SM,
                                                 const LangOptions &LangOpts);
-  
+
   /// \brief Computes the source location just past the end of the
   /// token at this source location.
   ///
@@ -466,6 +489,13 @@
                                         const LangOptions &LangOpts,
                                         unsigned MaxLines = 0);
 
+  /// Finds the token that comes right after the given location.
+  ///
+  /// Returns the next token, or none if the location is inside a macro.
+  static Optional<Token> findNextToken(SourceLocation Loc,
+                                       const SourceManager &SM,
+                                       const LangOptions &LangOpts);
+
   /// \brief Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the
   /// location immediately after the specified token. If the token is not found
@@ -504,9 +534,9 @@
   static StringRef getIndentationForLine(SourceLocation Loc,
                                          const SourceManager &SM);
 
+private:
   //===--------------------------------------------------------------------===//
   // Internal implementation interfaces.
-private:
 
   /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
   /// by Lex.
@@ -660,7 +690,7 @@
   bool SkipBlockComment      (Token &Result, const char *CurPtr,
                               bool &TokAtPhysicalStartOfLine);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
-  
+
   bool IsStartOfConflictMarker(const char *CurPtr);
   bool HandleEndOfConflictMarker(const char *CurPtr);
 
@@ -679,7 +709,7 @@
   ///               valid), this parameter will be updated to point to the
   ///               character after the UCN.
   /// \param SlashLoc The position in the source buffer of the '\'.
-  /// \param Tok The token being formed. Pass \c NULL to suppress diagnostics
+  /// \param Tok The token being formed. Pass \c nullptr to suppress diagnostics
   ///            and handle token formation in the caller.
   ///
   /// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
@@ -708,6 +738,6 @@
   bool tryConsumeIdentifierUTF8Char(const char *&CurPtr);
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_LEXER_H
diff --git a/include/clang/Lex/MacroInfo.h b/include/clang/Lex/MacroInfo.h
index 47f10d6..3029294 100644
--- a/include/clang/Lex/MacroInfo.h
+++ b/include/clang/Lex/MacroInfo.h
@@ -1,4 +1,4 @@
-//===--- MacroInfo.h - Information about #defined identifiers ---*- C++ -*-===//
+//===- MacroInfo.h - Information about #defined identifiers -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,27 +6,33 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::MacroInfo and clang::MacroDirective classes.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LEX_MACROINFO_H
 #define LLVM_CLANG_LEX_MACROINFO_H
 
 #include "clang/Lex/Token.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
+#include <algorithm>
 #include <cassert>
 
 namespace clang {
+
+class DefMacroDirective;
+class IdentifierInfo;
 class Module;
-class ModuleMacro;
 class Preprocessor;
+class SourceManager;
 
 /// \brief Encapsulates the data about a macro definition (e.g. its tokens).
 ///
@@ -37,6 +43,7 @@
 
   /// \brief The location the macro is defined.
   SourceLocation Location;
+
   /// \brief The location of the last token in the macro.
   SourceLocation EndLocation;
 
@@ -46,10 +53,10 @@
   ///
   /// This can be empty, for, e.g. "#define X()".  In a C99-style variadic
   /// macro, this includes the \c __VA_ARGS__ identifier on the list.
-  IdentifierInfo **ParameterList;
+  IdentifierInfo **ParameterList = nullptr;
 
   /// \see ParameterList
-  unsigned NumParameters;
+  unsigned NumParameters = 0;
 
   /// \brief This is the list of tokens that the macro is defined to.
   SmallVector<Token, 8> ReplacementTokens;
@@ -169,7 +176,7 @@
 
   /// Parameters - The list of parameters for a function-like macro.  This can 
   /// be empty, for, e.g. "#define X()".
-  typedef IdentifierInfo *const *param_iterator;
+  using param_iterator = IdentifierInfo *const *;
   bool param_empty() const { return NumParameters == 0; }
   param_iterator param_begin() const { return ParameterList; }
   param_iterator param_end() const { return ParameterList + NumParameters; }
@@ -224,7 +231,6 @@
   bool isWarnIfUnused() const { return IsWarnIfUnused; }
 
   /// \brief Return the number of tokens that this macro expands to.
-  ///
   unsigned getNumTokens() const { return ReplacementTokens.size(); }
 
   const Token &getReplacementToken(unsigned Tok) const {
@@ -232,7 +238,8 @@
     return ReplacementTokens[Tok];
   }
 
-  typedef SmallVectorImpl<Token>::const_iterator tokens_iterator;
+  using tokens_iterator = SmallVectorImpl<Token>::const_iterator;
+
   tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); }
   tokens_iterator tokens_end() const { return ReplacementTokens.end(); }
   bool tokens_empty() const { return ReplacementTokens.empty(); }
@@ -269,12 +276,10 @@
   void dump() const;
 
 private:
-  unsigned getDefinitionLengthSlow(const SourceManager &SM) const;
-
   friend class Preprocessor;
-};
 
-class DefMacroDirective;
+  unsigned getDefinitionLengthSlow(const SourceManager &SM) const;
+};
 
 /// \brief Encapsulates changes to the "macros namespace" (the location where
 /// the macro name became active, the location where it was undefined, etc.).
@@ -285,11 +290,15 @@
 /// create additional DefMacroDirectives for the same MacroInfo.
 class MacroDirective {
 public:
-  enum Kind { MD_Define, MD_Undefine, MD_Visibility };
+  enum Kind {
+    MD_Define,
+    MD_Undefine,
+    MD_Visibility
+  };
 
 protected:
-  /// \brief Previous macro directive for the same identifier, or NULL.
-  MacroDirective *Previous;
+  /// \brief Previous macro directive for the same identifier, or nullptr.
+  MacroDirective *Previous = nullptr;
 
   SourceLocation Loc;
 
@@ -306,8 +315,7 @@
   unsigned IsPublic : 1;
 
   MacroDirective(Kind K, SourceLocation Loc)
-      : Previous(nullptr), Loc(Loc), MDKind(K), IsFromPCH(false),
-        IsPublic(true) {}
+      : Loc(Loc), MDKind(K), IsFromPCH(false), IsPublic(true) {}
 
 public:
   Kind getKind() const { return Kind(MDKind); }
@@ -329,13 +337,12 @@
   void setIsFromPCH() { IsFromPCH = true; }
 
   class DefInfo {
-    DefMacroDirective *DefDirective;
+    DefMacroDirective *DefDirective = nullptr;
     SourceLocation UndefLoc;
-    bool IsPublic;
+    bool IsPublic = true;
 
   public:
-    DefInfo() : DefDirective(nullptr), IsPublic(true) {}
-
+    DefInfo() = default;
     DefInfo(DefMacroDirective *DefDirective, SourceLocation UndefLoc,
             bool isPublic)
         : DefDirective(DefDirective), UndefLoc(UndefLoc), IsPublic(isPublic) {}
@@ -345,6 +352,7 @@
 
     inline SourceLocation getLocation() const;
     inline MacroInfo *getMacroInfo();
+
     const MacroInfo *getMacroInfo() const {
       return const_cast<DefInfo *>(this)->getMacroInfo();
     }
@@ -360,6 +368,7 @@
     explicit operator bool() const { return isValid(); }
 
     inline DefInfo getPreviousDefinition();
+
     const DefInfo getPreviousDefinition() const {
       return const_cast<DefInfo *>(this)->getPreviousDefinition();
     }
@@ -412,6 +421,7 @@
   static bool classof(const MacroDirective *MD) {
     return MD->getKind() == MD_Define;
   }
+
   static bool classof(const DefMacroDirective *) { return true; }
 };
 
@@ -426,6 +436,7 @@
   static bool classof(const MacroDirective *MD) {
     return MD->getKind() == MD_Undefine;
   }
+
   static bool classof(const UndefMacroDirective *) { return true; }
 };
 
@@ -444,12 +455,13 @@
   static bool classof(const MacroDirective *MD) {
     return MD->getKind() == MD_Visibility;
   }
+
   static bool classof(const VisibilityMacroDirective *) { return true; }
 };
 
 inline SourceLocation MacroDirective::DefInfo::getLocation() const {
   if (isInvalid())
-    return SourceLocation();
+    return {};
   return DefDirective->getLocation();
 }
 
@@ -462,7 +474,7 @@
 inline MacroDirective::DefInfo
 MacroDirective::DefInfo::getPreviousDefinition() {
   if (isInvalid() || DefDirective->getPrevious() == nullptr)
-    return DefInfo();
+    return {};
   return DefDirective->getPrevious()->getDefinition();
 }
 
@@ -474,23 +486,26 @@
 ///
 /// These are stored in a FoldingSet in the preprocessor.
 class ModuleMacro : public llvm::FoldingSetNode {
+  friend class Preprocessor;
+
   /// The name defined by the macro.
   IdentifierInfo *II;
+
   /// The body of the #define, or nullptr if this is a #undef.
   MacroInfo *Macro;
+
   /// The module that exports this macro.
   Module *OwningModule;
+
   /// The number of module macros that override this one.
-  unsigned NumOverriddenBy;
+  unsigned NumOverriddenBy = 0;
+
   /// The number of modules whose macros are directly overridden by this one.
   unsigned NumOverrides;
-  // ModuleMacro *OverriddenMacros[NumOverrides];
-
-  friend class Preprocessor;
 
   ModuleMacro(Module *OwningModule, IdentifierInfo *II, MacroInfo *Macro,
               ArrayRef<ModuleMacro *> Overrides)
-      : II(II), Macro(Macro), OwningModule(OwningModule), NumOverriddenBy(0),
+      : II(II), Macro(Macro), OwningModule(OwningModule),
         NumOverrides(Overrides.size()) {
     std::copy(Overrides.begin(), Overrides.end(),
               reinterpret_cast<ModuleMacro **>(this + 1));
@@ -504,6 +519,7 @@
   void Profile(llvm::FoldingSetNodeID &ID) const {
     return Profile(ID, OwningModule, II);
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID, Module *OwningModule,
                       IdentifierInfo *II) {
     ID.AddPointer(OwningModule);
@@ -522,13 +538,16 @@
 
   /// Iterators over the overridden module IDs.
   /// \{
-  typedef ModuleMacro *const *overrides_iterator;
+  using overrides_iterator = ModuleMacro *const *;
+
   overrides_iterator overrides_begin() const {
     return reinterpret_cast<overrides_iterator>(this + 1);
   }
+
   overrides_iterator overrides_end() const {
     return overrides_begin() + NumOverrides;
   }
+
   ArrayRef<ModuleMacro *> overrides() const {
     return llvm::makeArrayRef(overrides_begin(), overrides_end());
   }
@@ -547,7 +566,7 @@
   ArrayRef<ModuleMacro *> ModuleMacros;
 
 public:
-  MacroDefinition() : LatestLocalAndAmbiguous(), ModuleMacros() {}
+  MacroDefinition() = default;
   MacroDefinition(DefMacroDirective *MD, ArrayRef<ModuleMacro *> MMs,
                   bool IsAmbiguous)
       : LatestLocalAndAmbiguous(MD, IsAmbiguous), ModuleMacros(MMs) {}
@@ -586,6 +605,6 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_MACROINFO_H
diff --git a/include/clang/Lex/ModuleLoader.h b/include/clang/Lex/ModuleLoader.h
index ee0638b..30ea583 100644
--- a/include/clang/Lex/ModuleLoader.h
+++ b/include/clang/Lex/ModuleLoader.h
@@ -1,4 +1,4 @@
-//===--- ModuleLoader.h - Module Loader Interface ---------------*- C++ -*-===//
+//===- ModuleLoader.h - Module Loader Interface -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,23 +11,26 @@
 //  loading named modules.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_LEX_MODULELOADER_H
 #define LLVM_CLANG_LEX_MODULELOADER_H
 
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/StringRef.h"
+#include <utility>
 
 namespace clang {
 
 class GlobalModuleIndex;
 class IdentifierInfo;
-class Module;
 
 /// \brief A sequence of identifier/location pairs used to describe a particular
 /// module or submodule, e.g., std.vector.
-typedef ArrayRef<std::pair<IdentifierInfo *, SourceLocation> > ModuleIdPath;
+using ModuleIdPath = ArrayRef<std::pair<IdentifierInfo *, SourceLocation>>;
 
 /// \brief Describes the result of attempting to load a module.
 class ModuleLoadResult {
@@ -35,16 +38,18 @@
   enum LoadResultKind {
     // We either succeeded or failed to load the named module.
     Normal,
+
     // The module exists, but does not actually contain the named submodule.
     // This should only happen if the named submodule was inferred from an
     // umbrella directory, but not actually part of the umbrella header.
     MissingExpected,
+
     // The module exists but cannot be imported due to a configuration mismatch.
     ConfigMismatch
   };
   llvm::PointerIntPair<Module *, 2, LoadResultKind> Storage;
 
-  ModuleLoadResult() : Storage() { }
+  ModuleLoadResult() = default;
   ModuleLoadResult(Module *M) : Storage(M, Normal) {}
   ModuleLoadResult(LoadResultKind Kind) : Storage(nullptr, Kind) {}
 
@@ -69,10 +74,10 @@
 class ModuleLoader {
   // Building a module if true.
   bool BuildingModule;
+
 public:
-  explicit ModuleLoader(bool BuildingModule = false) :
-    BuildingModule(BuildingModule),
-    HadFatalFailure(false) {}
+  explicit ModuleLoader(bool BuildingModule = false)
+      : BuildingModule(BuildingModule) {}
 
   virtual ~ModuleLoader();
   
@@ -80,6 +85,7 @@
   bool buildingModule() const {
     return BuildingModule;
   }
+
   /// \brief Flag indicating whether this instance is building a module.
   void setBuildingModule(bool BuildingModuleFlag) {
     BuildingModule = BuildingModuleFlag;
@@ -144,7 +150,7 @@
   virtual bool lookupMissingImports(StringRef Name,
                                     SourceLocation TriggerLoc) = 0;
 
-  bool HadFatalFailure;
+  bool HadFatalFailure = false;
 };
 
 /// A module loader that doesn't know how to load modules.
@@ -153,7 +159,7 @@
   ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path,
                               Module::NameVisibilityKind Visibility,
                               bool IsInclusionDirective) override {
-    return ModuleLoadResult();
+    return {};
   }
 
   void loadModuleFromSource(SourceLocation ImportLoc, StringRef ModuleName,
@@ -165,12 +171,13 @@
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override {
     return nullptr;
   }
+
   bool lookupMissingImports(StringRef Name,
                             SourceLocation TriggerLoc) override {
-    return 0;
+    return false;
   }
 };
   
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_MODULELOADER_H
diff --git a/include/clang/Lex/ModuleMap.h b/include/clang/Lex/ModuleMap.h
index 4e600ce..b4ca320 100644
--- a/include/clang/Lex/ModuleMap.h
+++ b/include/clang/Lex/ModuleMap.h
@@ -1,4 +1,4 @@
-//===--- ModuleMap.h - Describe the layout of modules -----------*- C++ -*-===//
+//===- ModuleMap.h - Describe the layout of modules -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,7 +18,6 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
-#include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
@@ -28,20 +27,19 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/Twine.h"
-#include <algorithm>
+#include <ctime>
 #include <memory>
 #include <string>
 #include <utility>
 
 namespace clang {
 
+class DiagnosticsEngine;
 class DirectoryEntry;
 class FileEntry;
 class FileManager;
-class DiagnosticConsumer;
-class DiagnosticsEngine;
 class HeaderSearch;
-class ModuleMapParser;
+class SourceManager;
 
 /// \brief A mechanism to observe the actions of the module map parser as it
 /// reads module map files.
@@ -100,6 +98,9 @@
   /// \brief The top-level modules that are known.
   llvm::StringMap<Module *> Modules;
 
+  /// Shadow modules created while building this module map.
+  llvm::SmallVector<Module*, 2> ShadowModules;
+
   /// \brief The number of modules we have created in total.
   unsigned NumCreatedModules = 0;
 
@@ -108,11 +109,14 @@
   enum ModuleHeaderRole {
     /// \brief This header is normally included in the module.
     NormalHeader  = 0x0,
+
     /// \brief This header is included but private.
     PrivateHeader = 0x1,
+
     /// \brief This header is part of the module (for layering purposes) but
     /// should be textually included.
     TextualHeader = 0x2,
+
     // Caution: Adding an enumerator needs other changes.
     // Adjust the number of bits for KnownHeader::Storage.
     // Adjust the bitfield HeaderFileInfo::HeaderRole size.
@@ -123,6 +127,7 @@
 
   /// Convert a header kind to a role. Requires Kind to not be HK_Excluded.
   static ModuleHeaderRole headerKindToRole(Module::HeaderKind Kind);
+
   /// Convert a header role to a kind.
   static Module::HeaderKind headerRoleToKind(ModuleHeaderRole Role);
 
@@ -132,8 +137,8 @@
     llvm::PointerIntPair<Module *, 2, ModuleHeaderRole> Storage;
 
   public:
-    KnownHeader() : Storage(nullptr, NormalHeader) { }
-    KnownHeader(Module *M, ModuleHeaderRole Role) : Storage(M, Role) { }
+    KnownHeader() : Storage(nullptr, NormalHeader) {}
+    KnownHeader(Module *M, ModuleHeaderRole Role) : Storage(M, Role) {}
 
     friend bool operator==(const KnownHeader &A, const KnownHeader &B) {
       return A.Storage == B.Storage;
@@ -166,11 +171,13 @@
     }
   };
 
-  typedef llvm::SmallPtrSet<const FileEntry *, 1> AdditionalModMapsSet;
+  using AdditionalModMapsSet = llvm::SmallPtrSet<const FileEntry *, 1>;
 
 private:
-  typedef llvm::DenseMap<const FileEntry *, SmallVector<KnownHeader, 1>>
-  HeadersMap;
+  friend class ModuleMapParser;
+
+  using HeadersMap =
+      llvm::DenseMap<const FileEntry *, SmallVector<KnownHeader, 1>>;
 
   /// \brief Mapping from each header to the module that owns the contents of
   /// that header.
@@ -178,6 +185,7 @@
 
   /// Map from file sizes to modules with lazy header directives of that size.
   mutable llvm::DenseMap<off_t, llvm::TinyPtrVector<Module*>> LazyHeadersBySize;
+
   /// Map from mtimes to modules with lazy header directives with those mtimes.
   mutable llvm::DenseMap<time_t, llvm::TinyPtrVector<Module*>>
               LazyHeadersByModTime;
@@ -190,11 +198,17 @@
   /// header.
   llvm::DenseMap<const DirectoryEntry *, Module *> UmbrellaDirs;
 
+  /// \brief A generation counter that is used to test whether modules of the
+  /// same name may shadow or are illegal redefintions.
+  ///
+  /// Modules from earlier scopes may shadow modules from later ones.
+  /// Modules from the same scope may not have the same name.
+  unsigned CurrentModuleScopeID = 0;
+
+  llvm::DenseMap<Module *, unsigned> ModuleScopeIDs;
+
   /// \brief The set of attributes that can be attached to a module.
   struct Attributes {
-    Attributes()
-        : IsSystem(), IsExternC(), IsExhaustive(), NoUndeclaredIncludes() {}
-
     /// \brief Whether this is a system module.
     unsigned IsSystem : 1;
 
@@ -207,12 +221,14 @@
     /// \brief Whether files in this module can only include non-modular headers
     /// and headers from used modules.
     unsigned NoUndeclaredIncludes : 1;
+
+    Attributes()
+        : IsSystem(false), IsExternC(false), IsExhaustive(false),
+          NoUndeclaredIncludes(false) {}
   };
 
   /// \brief A directory for which framework modules can be inferred.
   struct InferredDirectory {
-    InferredDirectory() : InferModules() {}
-
     /// \brief Whether to infer modules from this directory.
     unsigned InferModules : 1;
 
@@ -226,6 +242,8 @@
     /// \brief The names of modules that cannot be inferred within this
     /// directory.
     SmallVector<std::string, 2> ExcludedModules;
+
+    InferredDirectory() : InferModules(false) {}
   };
 
   /// \brief A mapping from directories to information about inferring
@@ -242,8 +260,6 @@
   /// map.
   llvm::DenseMap<const FileEntry *, bool> ParsedModuleMap;
 
-  friend class ModuleMapParser;
-  
   /// \brief Resolve the given export declaration into an actual export
   /// declaration.
   ///
@@ -345,7 +361,6 @@
             HeaderSearch &HeaderInfo);
 
   /// \brief Destroy the module map.
-  ///
   ~ModuleMap();
 
   /// \brief Set the target information.
@@ -464,7 +479,7 @@
   /// \param Name The name of the module to find or create.
   ///
   /// \param Parent The module that will act as the parent of this submodule,
-  /// or NULL to indicate that this is a top-level module.
+  /// or nullptr to indicate that this is a top-level module.
   ///
   /// \param IsFramework Whether this is a framework module.
   ///
@@ -499,13 +514,31 @@
   Module *inferFrameworkModule(const DirectoryEntry *FrameworkDir,
                                bool IsSystem, Module *Parent);
 
+  /// \brief Create a new top-level module that is shadowed by
+  /// \p ShadowingModule.
+  Module *createShadowedModule(StringRef Name, bool IsFramework,
+                               Module *ShadowingModule);
+
+  /// \brief Creates a new declaration scope for module names, allowing
+  /// previously defined modules to shadow definitions from the new scope.
+  ///
+  /// \note Module names from earlier scopes will shadow names from the new
+  /// scope, which is the opposite of how shadowing works for variables.
+  void finishModuleDeclarationScope() { CurrentModuleScopeID += 1; }
+
+  bool mayShadowNewModule(Module *ExistingModule) {
+    assert(!ExistingModule->Parent && "expected top-level module");
+    assert(ModuleScopeIDs.count(ExistingModule) && "unknown module");
+    return ModuleScopeIDs[ExistingModule] < CurrentModuleScopeID;
+  }
+
   /// \brief Retrieve the module map file containing the definition of the given
   /// module.
   ///
   /// \param Module The module whose module map file will be returned, if known.
   ///
   /// \returns The file entry for the module map file containing the given
-  /// module, or NULL if the module definition was inferred.
+  /// module, or nullptr if the module definition was inferred.
   const FileEntry *getContainingModuleMapFile(const Module *Module) const;
 
   /// \brief Get the module map file that (along with the module name) uniquely
@@ -605,18 +638,19 @@
   ///
   /// \returns true if an error occurred, false otherwise.
   bool parseModuleMapFile(const FileEntry *File, bool IsSystem,
-                          const DirectoryEntry *HomeDir, FileID ID = FileID(),
-                          unsigned *Offset = nullptr,
+                          const DirectoryEntry *HomeDir,
+                          FileID ID = FileID(), unsigned *Offset = nullptr,
                           SourceLocation ExternModuleLoc = SourceLocation());
 
   /// \brief Dump the contents of the module map, for debugging purposes.
   void dump();
   
-  typedef llvm::StringMap<Module *>::const_iterator module_iterator;
+  using module_iterator = llvm::StringMap<Module *>::const_iterator;
+
   module_iterator module_begin() const { return Modules.begin(); }
   module_iterator module_end()   const { return Modules.end(); }
 };
   
-} // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_LEX_MODULEMAP_H
diff --git a/include/clang/Lex/MultipleIncludeOpt.h b/include/clang/Lex/MultipleIncludeOpt.h
index 3967f86..00d3047 100644
--- a/include/clang/Lex/MultipleIncludeOpt.h
+++ b/include/clang/Lex/MultipleIncludeOpt.h
@@ -119,7 +119,7 @@
   /// Note, we don't care about the input value of 'ReadAnyTokens'.  The caller
   /// ensures that this is only called if there are no tokens read before the
   /// \#ifndef.  The caller is required to do this, because reading the \#if
-  /// line obviously reads in in tokens.
+  /// line obviously reads in tokens.
   void EnterTopLevelIfndef(const IdentifierInfo *M, SourceLocation Loc) {
     // If the macro is already set, this is after the top-level #endif.
     if (TheMacro)
diff --git a/include/clang/Lex/PPCallbacks.h b/include/clang/Lex/PPCallbacks.h
index 8cb1656..19bce4d 100644
--- a/include/clang/Lex/PPCallbacks.h
+++ b/include/clang/Lex/PPCallbacks.h
@@ -410,6 +410,11 @@
     Second->PragmaDetectMismatch(Loc, Name, Value);
   }
 
+  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override {
+    First->PragmaDebug(Loc, DebugType);
+    Second->PragmaDebug(Loc, DebugType);
+  }
+
   void PragmaMessage(SourceLocation Loc, StringRef Namespace,
                      PragmaMessageKind Kind, StringRef Str) override {
     First->PragmaMessage(Loc, Namespace, Kind, Str);
diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h
index f96af66..f122a00 100644
--- a/include/clang/Lex/PTHLexer.h
+++ b/include/clang/Lex/PTHLexer.h
@@ -1,4 +1,4 @@
-//===--- PTHLexer.h - Lexer based on Pre-tokenized input --------*- C++ -*-===//
+//===- PTHLexer.h - Lexer based on Pre-tokenized input ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,12 +14,15 @@
 #ifndef LLVM_CLANG_LEX_PTHLEXER_H
 #define LLVM_CLANG_LEX_PTHLEXER_H
 
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Token.h"
 
 namespace clang {
 
+class Preprocessor;
 class PTHManager;
-class PTHSpellingSearch;
 
 class PTHLexer : public PreprocessorLexer {
   SourceLocation FileStartLoc;
@@ -33,7 +36,7 @@
 
   /// LastHashTokPtr - Pointer into TokBuf of the last processed '#'
   ///  token that appears at the start of a line.
-  const unsigned char* LastHashTokPtr;
+  const unsigned char* LastHashTokPtr = nullptr;
 
   /// PPCond - Pointer to a side table in the PTH file that provides a
   ///  a concise summary of the preprocessor conditional block structure.
@@ -44,11 +47,8 @@
   ///  to process when doing quick skipping of preprocessor blocks.
   const unsigned char* CurPPCondPtr;
 
-  PTHLexer(const PTHLexer &) = delete;
-  void operator=(const PTHLexer &) = delete;
-
   /// ReadToken - Used by PTHLexer to read tokens TokBuf.
-  void ReadToken(Token& T);
+  void ReadToken(Token &T);
   
   bool LexEndOfFile(Token &Result);
 
@@ -61,10 +61,13 @@
   friend class PTHManager;
 
   /// Create a PTHLexer for the specified token stream.
-  PTHLexer(Preprocessor& pp, FileID FID, const unsigned char *D,
+  PTHLexer(Preprocessor &pp, FileID FID, const unsigned char *D,
            const unsigned char* ppcond, PTHManager &PM);
+
 public:
-  ~PTHLexer() override {}
+  PTHLexer(const PTHLexer &) = delete;
+  PTHLexer &operator=(const PTHLexer &) = delete;
+  ~PTHLexer() override = default;
 
   /// Lex - Return the next token.
   bool Lex(Token &Tok);
@@ -99,6 +102,6 @@
   bool SkipBlock();
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_PTHLEXER_H
diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h
index f4e4774..483b69f 100644
--- a/include/clang/Lex/PTHManager.h
+++ b/include/clang/Lex/PTHManager.h
@@ -1,4 +1,4 @@
-//===--- PTHManager.h - Manager object for PTH processing -------*- C++ -*-===//
+//===- PTHManager.h - Manager object for PTH processing ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,30 +17,33 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include <memory>
 
 namespace llvm {
-  class MemoryBuffer;
-}
+
+class MemoryBuffer;
+
+} // namespace llvm
 
 namespace clang {
 
-class FileEntry;
-class Preprocessor;
-class PTHLexer;
 class DiagnosticsEngine;
 class FileSystemStatCache;
+class Preprocessor;
+class PTHLexer;
 
 class PTHManager : public IdentifierInfoLookup {
   friend class PTHLexer;
-
   friend class PTHStatCache;
 
-  class PTHStringLookupTrait;
   class PTHFileLookupTrait;
-  typedef llvm::OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
-  typedef llvm::OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup;
+  class PTHStringLookupTrait;
+
+  using PTHStringIdLookup = llvm::OnDiskChainedHashTable<PTHStringLookupTrait>;
+  using PTHFileLookup = llvm::OnDiskChainedHashTable<PTHFileLookupTrait>;
 
   /// The memory mapped PTH file.
   std::unique_ptr<const llvm::MemoryBuffer> Buf;
@@ -70,7 +73,7 @@
 
   /// PP - The Preprocessor object that will use this PTHManager to create
   ///  PTHLexer objects.
-  Preprocessor* PP;
+  Preprocessor* PP = nullptr;
 
   /// SpellingBase - The base offset within the PTH memory buffer that
   ///  contains the cached spellings for literals.
@@ -89,16 +92,13 @@
              std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
              const unsigned char *spellingBase, const char *originalSourceFile);
 
-  PTHManager(const PTHManager &) = delete;
-  void operator=(const PTHManager &) = delete;
-
   /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached
   ///  spelling for a token.
   unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer);
 
   /// GetIdentifierInfo - Used to reconstruct IdentifierInfo objects from the
   ///  PTH file.
-  inline IdentifierInfo* GetIdentifierInfo(unsigned PersistentID) {
+  IdentifierInfo *GetIdentifierInfo(unsigned PersistentID) {
     // Check if the IdentifierInfo has already been resolved.
     if (IdentifierInfo* II = PerIDCache[PersistentID])
       return II;
@@ -110,6 +110,8 @@
   // The current PTH version.
   enum { Version = 10 };
 
+  PTHManager(const PTHManager &) = delete;
+  PTHManager &operator=(const PTHManager &) = delete;
   ~PTHManager() override;
 
   /// getOriginalSourceFile - Return the full path to the original header
@@ -120,18 +122,18 @@
 
   /// get - Return the identifier token info for the specified named identifier.
   ///  Unlike the version in IdentifierTable, this returns a pointer instead
-  ///  of a reference.  If the pointer is NULL then the IdentifierInfo cannot
+  ///  of a reference.  If the pointer is nullptr then the IdentifierInfo cannot
   ///  be found.
   IdentifierInfo *get(StringRef Name) override;
 
   /// Create - This method creates PTHManager objects.  The 'file' argument
-  ///  is the name of the PTH file.  This method returns NULL upon failure.
+  ///  is the name of the PTH file.  This method returns nullptr upon failure.
   static PTHManager *Create(StringRef file, DiagnosticsEngine &Diags);
 
   void setPreprocessor(Preprocessor *pp) { PP = pp; }
 
   /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the
-  ///  specified file.  This method returns NULL if no cached tokens exist.
+  ///  specified file.  This method returns nullptr if no cached tokens exist.
   ///  It is the responsibility of the caller to 'delete' the returned object.
   PTHLexer *CreateLexer(FileID FID);
 
@@ -142,6 +144,6 @@
   std::unique_ptr<FileSystemStatCache> createStatCache();
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_PTHMANAGER_H
diff --git a/include/clang/Lex/Pragma.h b/include/clang/Lex/Pragma.h
index 274f0da..090ae2a 100644
--- a/include/clang/Lex/Pragma.h
+++ b/include/clang/Lex/Pragma.h
@@ -1,4 +1,4 @@
-//===--- Pragma.h - Pragma registration and handling ------------*- C++ -*-===//
+//===- Pragma.h - Pragma registration and handling --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,13 +17,13 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include <cassert>
+#include <string>
 
 namespace clang {
-  class Preprocessor;
-  class Token;
-  class IdentifierInfo;
-  class PragmaNamespace;
+
+class PragmaNamespace;
+class Preprocessor;
+class Token;
 
   /**
    * \brief Describes how the pragma was introduced, e.g., with \#pragma,
@@ -58,9 +58,10 @@
 /// pragmas.
 class PragmaHandler {
   std::string Name;
+
 public:
+  PragmaHandler() = default;
   explicit PragmaHandler(StringRef name) : Name(name) {}
-  PragmaHandler() {}
   virtual ~PragmaHandler();
 
   StringRef getName() const { return Name; }
@@ -89,8 +90,8 @@
 class PragmaNamespace : public PragmaHandler {
   /// Handlers - This is a map of the handlers in this namespace with their name
   /// as key.
-  ///
-  llvm::StringMap<PragmaHandler*> Handlers;
+  llvm::StringMap<PragmaHandler *> Handlers;
+
 public:
   explicit PragmaNamespace(StringRef Name) : PragmaHandler(Name) {}
   ~PragmaNamespace() override;
@@ -103,16 +104,13 @@
                              bool IgnoreNull = true) const;
 
   /// AddPragma - Add a pragma to this namespace.
-  ///
   void AddPragma(PragmaHandler *Handler);
 
   /// RemovePragmaHandler - Remove the given handler from the
   /// namespace.
   void RemovePragmaHandler(PragmaHandler *Handler);
 
-  bool IsEmpty() {
-    return Handlers.empty();
-  }
+  bool IsEmpty() const { return Handlers.empty(); }
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &FirstToken) override;
@@ -120,7 +118,6 @@
   PragmaNamespace *getIfNamespace() override { return this; }
 };
 
+} // namespace clang
 
-}  // end namespace clang
-
-#endif
+#endif // LLVM_CLANG_LEX_PRAGMA_H
diff --git a/include/clang/Lex/PreprocessingRecord.h b/include/clang/Lex/PreprocessingRecord.h
index 882b8ae..77da1eb 100644
--- a/include/clang/Lex/PreprocessingRecord.h
+++ b/include/clang/Lex/PreprocessingRecord.h
@@ -1,4 +1,4 @@
-//===--- PreprocessingRecord.h - Record of Preprocessing --------*- C++ -*-===//
+//===- PreprocessingRecord.h - Record of Preprocessing ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,24 +11,33 @@
 //  of what occurred during preprocessing.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_LEX_PREPROCESSINGRECORD_H
 #define LLVM_CLANG_LEX_PREPROCESSINGRECORD_H
 
-#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <utility>
 #include <vector>
 
 namespace clang {
-  class IdentifierInfo;
-  class MacroInfo;
-  class PreprocessingRecord;
-}
+
+class PreprocessingRecord;
+
+} // namespace clang
 
 /// \brief Allocates memory within a Clang preprocessing record.
 void *operator new(size_t bytes, clang::PreprocessingRecord &PR,
@@ -39,8 +48,12 @@
                      unsigned) noexcept;
 
 namespace clang {
-  class MacroDefinitionRecord;
-  class FileEntry;
+
+class FileEntry;
+class IdentifierInfo;
+class MacroInfo;
+class SourceManager;
+class Token;
 
   /// \brief Base class that describes a preprocessed entity, which may be a
   /// preprocessor directive or macro expansion.
@@ -78,11 +91,11 @@
     SourceRange Range;
     
   protected:
-    PreprocessedEntity(EntityKind Kind, SourceRange Range)
-      : Kind(Kind), Range(Range) { }
-
     friend class PreprocessingRecord;
 
+    PreprocessedEntity(EntityKind Kind, SourceRange Range)
+        : Kind(Kind), Range(Range) {}
+
   public:
     /// \brief Retrieve the kind of preprocessed entity stored in this object.
     EntityKind getKind() const { return Kind; }
@@ -122,7 +135,7 @@
   class PreprocessingDirective : public PreprocessedEntity {
   public:
     PreprocessingDirective(EntityKind Kind, SourceRange Range) 
-      : PreprocessedEntity(Kind, Range) { }
+        : PreprocessedEntity(Kind, Range) {}
     
     // Implement isa/cast/dyncast/etc.
     static bool classof(const PreprocessedEntity *PD) { 
@@ -199,10 +212,13 @@
     enum InclusionKind {
       /// \brief An \c \#include directive.
       Include,
+
       /// \brief An Objective-C \c \#import directive.
       Import,
+
       /// \brief A GNU \c \#include_next directive.
       IncludeNext,
+
       /// \brief A Clang \c \#__include_macros directive.
       IncludeMacros
     };
@@ -281,6 +297,9 @@
                                                         FileID FID) {
       return None;
     }
+
+    /// \brief Read a preallocated skipped range from the external source.
+    virtual SourceRange ReadSkippedRange(unsigned Index) = 0;
   };
   
   /// \brief A record of the steps taken while preprocessing a source file,
@@ -306,6 +325,8 @@
     /// \brief The set of ranges that were skipped by the preprocessor,
     std::vector<SourceRange> SkippedRanges;
 
+    bool SkippedRangesAllLoaded = true;
+
     /// \brief Global (loaded or local) ID for a preprocessed entity.
     /// Negative values are used to indicate preprocessed entities
     /// loaded from the external source while non-negative values are used to
@@ -316,11 +337,14 @@
     /// value 1 corresponds to element 0 in the local entities vector,
     /// value 2 corresponds to element 1 in the local entities vector, etc.
     class PPEntityID {
-      int ID;
-      explicit PPEntityID(int ID) : ID(ID) {}
       friend class PreprocessingRecord;
+
+      int ID = 0;
+
+      explicit PPEntityID(int ID) : ID(ID) {}
+
     public:
-      PPEntityID() : ID(0) {}
+      PPEntityID() = default;
     };
 
     static PPEntityID getPPEntityID(unsigned Index, bool isLoaded) {
@@ -331,7 +355,7 @@
     llvm::DenseMap<const MacroInfo *, MacroDefinitionRecord *> MacroDefinitions;
 
     /// \brief External source of preprocessed entities.
-    ExternalPreprocessingRecordSource *ExternalSource;
+    ExternalPreprocessingRecordSource *ExternalSource = nullptr;
 
     /// \brief Retrieve the preprocessed entity at the given ID.
     PreprocessedEntity *getPreprocessedEntity(PPEntityID PPID);
@@ -358,6 +382,16 @@
     /// corresponds to the first newly-allocated entity.
     unsigned allocateLoadedEntities(unsigned NumEntities);
 
+    /// \brief Allocate space for a new set of loaded preprocessed skipped
+    /// ranges.
+    ///
+    /// \returns The index into the set of loaded preprocessed ranges, which
+    /// corresponds to the first newly-allocated range.
+    unsigned allocateSkippedRanges(unsigned NumRanges);
+
+    /// \brief Ensures that all external skipped ranges have been loaded.
+    void ensureSkippedRangesLoaded();
+
     /// \brief Register a new macro definition.
     void RegisterMacroDefinition(MacroInfo *Macro, MacroDefinitionRecord *Def);
 
@@ -371,7 +405,7 @@
     }
     
     /// \brief Deallocate memory in the preprocessing record.
-    void Deallocate(void *Ptr) { }
+    void Deallocate(void *Ptr) {}
 
     size_t getTotalMemory() const;
 
@@ -397,11 +431,12 @@
                          iterator, int, std::random_access_iterator_tag,
                          PreprocessedEntity *, int, PreprocessedEntity *,
                          PreprocessedEntity *> {
+      friend class PreprocessingRecord;
+
       PreprocessingRecord *Self;
 
       iterator(PreprocessingRecord *Self, int Position)
           : iterator::iterator_adaptor_base(Position), Self(Self) {}
-      friend class PreprocessingRecord;
 
     public:
       iterator() : iterator(nullptr, 0) {}
@@ -451,7 +486,6 @@
     /// encompasses.
     ///
     /// \param R the range to look for preprocessed entities.
-    ///
     llvm::iterator_range<iterator>
     getPreprocessedEntitiesInRange(SourceRange R);
 
@@ -480,11 +514,15 @@
     MacroDefinitionRecord *findMacroDefinition(const MacroInfo *MI);
 
     /// \brief Retrieve all ranges that got skipped while preprocessing.
-    const std::vector<SourceRange> &getSkippedRanges() const {
+    const std::vector<SourceRange> &getSkippedRanges() {
+      ensureSkippedRangesLoaded();
       return SkippedRanges;
     }
         
   private:
+    friend class ASTReader;
+    friend class ASTWriter;
+
     void MacroExpands(const Token &Id, const MacroDefinition &MD,
                       SourceRange Range, const MacroArgs *Args) override;
     void MacroDefined(const Token &Id, const MacroDirective *MD) override;
@@ -500,6 +538,7 @@
                const MacroDefinition &MD) override;
     void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
                 const MacroDefinition &MD) override;
+
     /// \brief Hook called whenever the 'defined' operator is seen.
     void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
                  SourceRange Range) override;
@@ -518,11 +557,9 @@
     } CachedRangeQuery;
 
     std::pair<int, int> getPreprocessedEntitiesInRangeSlow(SourceRange R);
-
-    friend class ASTReader;
-    friend class ASTWriter;
   };
-} // end namespace clang
+
+} // namespace clang
 
 inline void *operator new(size_t bytes, clang::PreprocessingRecord &PR,
                           unsigned alignment) noexcept {
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 4f211a3..485600f 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -1,4 +1,4 @@
-//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
+//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the clang::Preprocessor interface.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
@@ -18,48 +18,70 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/PTHLexer.h"
+#include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Registry.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <memory>
+#include <map>
+#include <string>
+#include <utility>
 #include <vector>
 
 namespace llvm {
-  template<unsigned InternalLen> class SmallString;
-}
+
+template<unsigned InternalLen> class SmallString;
+
+} // namespace llvm
 
 namespace clang {
 
-class SourceManager;
-class ExternalPreprocessorSource;
-class FileManager;
-class FileEntry;
-class HeaderSearch;
-class MemoryBufferCache;
-class PragmaNamespace;
-class PragmaHandler;
+class CodeCompletionHandler;
 class CommentHandler;
+class DirectoryEntry;
+class DirectoryLookup;
+class ExternalPreprocessorSource;
+class FileEntry;
+class FileManager;
+class HeaderSearch;
+class MacroArgs;
+class MemoryBufferCache;
+class PragmaHandler;
+class PragmaNamespace;
+class PreprocessingRecord;
+class PreprocessorLexer;
+class PreprocessorOptions;
+class PTHManager;
 class ScratchBuffer;
 class TargetInfo;
-class PPCallbacks;
-class CodeCompletionHandler;
-class DirectoryLookup;
-class PreprocessingRecord;
-class ModuleLoader;
-class PTHManager;
-class PreprocessorOptions;
 
 /// \brief Stores token information for comparing actual tokens with
 /// predefined values.  Only handles simple tokens and identifiers.
@@ -75,7 +97,9 @@
     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
   }
+
   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
+
   bool operator==(const Token &Tok) const {
     return Tok.getKind() == Kind &&
         (!II || II == Tok.getIdentifierInfo());
@@ -84,9 +108,14 @@
 
 /// \brief Context in which macro name is used.
 enum MacroUse {
-  MU_Other  = 0,  // other than #define or #undef
-  MU_Define = 1,  // macro name specified in #define
-  MU_Undef  = 2   // macro name specified in #undef
+  // other than #define or #undef
+  MU_Other  = 0,
+
+  // macro name specified in #define
+  MU_Define = 1,
+
+  // macro name specified in #undef
+  MU_Undef  = 2
 };
 
 /// \brief Engages in a tight little dance with the lexer to efficiently
@@ -96,13 +125,14 @@
 /// know anything about preprocessor-level issues like the \#include stack,
 /// token expansion, etc.
 class Preprocessor {
-  friend class VariadicMacroScopeGuard;
   friend class VAOptDefinitionContext;
+  friend class VariadicMacroScopeGuard;
+
   std::shared_ptr<PreprocessorOptions> PPOpts;
   DiagnosticsEngine        *Diags;
   LangOptions       &LangOpts;
-  const TargetInfo  *Target;
-  const TargetInfo  *AuxTarget;
+  const TargetInfo *Target = nullptr;
+  const TargetInfo *AuxTarget = nullptr;
   FileManager       &FileMgr;
   SourceManager     &SourceMgr;
   MemoryBufferCache &PCMCache;
@@ -113,7 +143,6 @@
   /// \brief External source of macros.
   ExternalPreprocessorSource *ExternalSource;
 
-
   /// An optional PTHManager object used for getting tokens from
   /// a token cache rather than lexing the original source file.
   std::unique_ptr<PTHManager> PTH;
@@ -144,10 +173,17 @@
   IdentifierInfo *Ident__building_module;          // __building_module
   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
+  IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
+  IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
+  IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
+  IdentifierInfo *Ident__is_target_os;             // __is_target_os
+  IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
 
   SourceLocation DATELoc, TIMELoc;
-  unsigned CounterValue;  // Next __COUNTER__ value.
+
+  // Next __COUNTER__ value, starts at 0.
+  unsigned CounterValue = 0;
 
   enum {
     /// \brief Maximum depth of \#includes.
@@ -221,19 +257,19 @@
 
   /// \brief True if we want to ignore EOF token and continue later on (thus 
   /// avoid tearing the Lexer and etc. down).
-  bool IncrementalProcessing;
+  bool IncrementalProcessing = false;
 
   /// The kind of translation unit we are processing.
   TranslationUnitKind TUKind;
 
   /// \brief The code-completion handler.
-  CodeCompletionHandler *CodeComplete;
+  CodeCompletionHandler *CodeComplete = nullptr;
 
   /// \brief The file that we're performing code-completion for, if any.
-  const FileEntry *CodeCompletionFile;
+  const FileEntry *CodeCompletionFile = nullptr;
 
   /// \brief The offset in file for the code-completion point.
-  unsigned CodeCompletionOffset;
+  unsigned CodeCompletionOffset = 0;
 
   /// \brief The location for the code-completion point. This gets instantiated
   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
@@ -253,11 +289,11 @@
   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
 
   /// \brief Whether the last token we lexed was an '@'.
-  bool LastTokenWasAt;
+  bool LastTokenWasAt = false;
 
   /// \brief Whether the module import expects an identifier next. Otherwise,
   /// it expects a '.' or ';'.
-  bool ModuleImportExpectsIdentifier;
+  bool ModuleImportExpectsIdentifier = false;
   
   /// \brief The source location of the currently-active
   /// \#pragma clang arc_cf_code_audited begin.
@@ -268,16 +304,16 @@
   SourceLocation PragmaAssumeNonNullLoc;
 
   /// \brief True if we hit the code-completion point.
-  bool CodeCompletionReached;
+  bool CodeCompletionReached = false;
 
   /// \brief The code completion token containing the information
   /// on the stem that is to be code completed.
-  IdentifierInfo *CodeCompletionII;
+  IdentifierInfo *CodeCompletionII = nullptr;
 
   /// \brief The directory that the main file should be considered to occupy,
   /// if it does not correspond to a real file (as happens when building a
   /// module).
-  const DirectoryEntry *MainFileDir;
+  const DirectoryEntry *MainFileDir = nullptr;
 
   /// \brief The number of bytes that we will initially skip when entering the
   /// main file, along with a flag that indicates whether skipping this number
@@ -286,6 +322,26 @@
   /// This is used when loading a precompiled preamble.
   std::pair<int, bool> SkipMainFilePreamble;
 
+public:
+  struct PreambleSkipInfo {
+    SourceLocation HashTokenLoc;
+    SourceLocation IfTokenLoc;
+    bool FoundNonSkipPortion;
+    bool FoundElse;
+    SourceLocation ElseLoc;
+
+    PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
+                     bool FoundNonSkipPortion, bool FoundElse,
+                     SourceLocation ElseLoc)
+        : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
+          FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
+          ElseLoc(ElseLoc) {}
+  };
+
+private:
+  friend class ASTReader;
+  friend class MacroArgs;
+
   class PreambleConditionalStackStore {
     enum State {
       Off = 0,
@@ -294,7 +350,7 @@
     };
 
   public:
-    PreambleConditionalStackStore() : ConditionalStackState(Off) {}
+    PreambleConditionalStackStore() = default;
 
     void startRecording() { ConditionalStackState = Recording; }
     void startReplaying() { ConditionalStackState = Replaying; }
@@ -319,9 +375,15 @@
 
     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
 
+    bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
+
+    void clearSkipInfo() { SkipInfo.reset(); }
+
+    llvm::Optional<PreambleSkipInfo> SkipInfo;
+
   private:
     SmallVector<PPConditionalInfo, 4> ConditionalStack;
-    State ConditionalStackState;
+    State ConditionalStackState = Off;
   } PreambleConditionalStack;
 
   /// \brief The current top of the stack that we're lexing from if
@@ -340,14 +402,14 @@
   /// if not expanding a macro.
   ///
   /// This is an alias for either CurLexer or  CurPTHLexer.
-  PreprocessorLexer *CurPPLexer;
+  PreprocessorLexer *CurPPLexer = nullptr;
 
   /// \brief Used to find the current FileEntry, if CurLexer is non-null
   /// and if applicable.
   ///
   /// This allows us to implement \#include_next and find directory-specific
   /// properties.
-  const DirectoryLookup *CurDirLookup;
+  const DirectoryLookup *CurDirLookup = nullptr;
 
   /// \brief The current macro we are expanding, if we are expanding a macro.
   ///
@@ -361,11 +423,11 @@
     CLK_TokenLexer,
     CLK_CachingLexer,
     CLK_LexAfterModuleImport
-  } CurLexerKind;
+  } CurLexerKind = CLK_Lexer;
 
   /// \brief If the current lexer is for a submodule that is being built, this
   /// is that submodule.
-  Module *CurLexerSubmodule;
+  Module *CurLexerSubmodule = nullptr;
 
   /// \brief Keeps track of the stack of files currently
   /// \#included, and macros currently being expanded from, not counting
@@ -404,27 +466,31 @@
     Token Tok;
     MacroDefinition MD;
     SourceRange Range;
+
     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
-      : Tok(Tok), MD(MD), Range(Range) { }
+        : Tok(Tok), MD(MD), Range(Range) {}
   };
   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
 
   /// Information about a name that has been used to define a module macro.
   struct ModuleMacroInfo {
-    ModuleMacroInfo(MacroDirective *MD)
-        : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {}
-
     /// The most recent macro directive for this identifier.
     MacroDirective *MD;
+
     /// The active module macros for this identifier.
-    llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros;
+    llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
+
     /// The generation number at which we last updated ActiveModuleMacros.
     /// \see Preprocessor::VisibleModules.
-    unsigned ActiveModuleMacrosGeneration;
+    unsigned ActiveModuleMacrosGeneration = 0;
+
     /// Whether this macro name is ambiguous.
-    bool IsAmbiguous;
+    bool IsAmbiguous = false;
+
     /// The module macros that are overridden by this macro.
-    llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros;
+    llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
+
+    ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
   };
 
   /// The state of a macro for an identifier.
@@ -459,15 +525,18 @@
   public:
     MacroState() : MacroState(nullptr) {}
     MacroState(MacroDirective *MD) : State(MD) {}
+
     MacroState(MacroState &&O) noexcept : State(O.State) {
       O.State = (MacroDirective *)nullptr;
     }
+
     MacroState &operator=(MacroState &&O) noexcept {
       auto S = O.State;
       O.State = (MacroDirective *)nullptr;
       State = S;
       return *this;
     }
+
     ~MacroState() {
       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
         Info->~ModuleMacroInfo();
@@ -478,6 +547,7 @@
         return Info->MD;
       return State.get<MacroDirective*>();
     }
+
     void setLatest(MacroDirective *MD) {
       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
         Info->MD = MD;
@@ -489,6 +559,7 @@
       auto *Info = getModuleInfo(PP, II);
       return Info ? Info->IsAmbiguous : false;
     }
+
     ArrayRef<ModuleMacro *>
     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
       if (auto *Info = getModuleInfo(PP, II))
@@ -501,7 +572,7 @@
       // FIXME: Incorporate module macros into the result of this.
       if (auto *Latest = getLatest())
         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
-      return MacroDirective::DefInfo();
+      return {};
     }
 
     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
@@ -513,11 +584,13 @@
         Info->IsAmbiguous = false;
       }
     }
+
     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
         return Info->OverriddenMacros;
       return None;
     }
+
     void setOverriddenMacros(Preprocessor &PP,
                              ArrayRef<ModuleMacro *> Overrides) {
       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
@@ -540,31 +613,33 @@
   /// the reverse order (the latest one is in the head of the list).
   ///
   /// This mapping lives within the \p CurSubmoduleState.
-  typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap;
-
-  friend class ASTReader;
+  using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
 
   struct SubmoduleState;
 
   /// \brief Information about a submodule that we're currently building.
   struct BuildingSubmoduleInfo {
+    /// The module that we are building.
+    Module *M;
+
+    /// The location at which the module was included.
+    SourceLocation ImportLoc;
+
+    /// Whether we entered this submodule via a pragma.
+    bool IsPragma;
+
+    /// The previous SubmoduleState.
+    SubmoduleState *OuterSubmoduleState;
+
+    /// The number of pending module macro names when we started building this.
+    unsigned OuterPendingModuleMacroNames;
+
     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
                           SubmoduleState *OuterSubmoduleState,
                           unsigned OuterPendingModuleMacroNames)
         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
           OuterSubmoduleState(OuterSubmoduleState),
           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
-
-    /// The module that we are building.
-    Module *M;
-    /// The location at which the module was included.
-    SourceLocation ImportLoc;
-    /// Whether we entered this submodule via a pragma.
-    bool IsPragma;
-    /// The previous SubmoduleState.
-    SubmoduleState *OuterSubmoduleState;
-    /// The number of pending module macro names when we started building this.
-    unsigned OuterPendingModuleMacroNames;
   };
   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
 
@@ -572,12 +647,14 @@
   struct SubmoduleState {
     /// The macros for the submodule.
     MacroMap Macros;
+
     /// The set of modules that are visible within the submodule.
     VisibleModuleSet VisibleModules;
+
     // FIXME: CounterValue?
     // FIXME: PragmaPushMacroInfo?
   };
-  std::map<Module*, SubmoduleState> Submodules;
+  std::map<Module *, SubmoduleState> Submodules;
 
   /// The preprocessor state for preprocessing outside of any submodule.
   SubmoduleState NullSubmoduleState;
@@ -590,11 +667,11 @@
   llvm::FoldingSet<ModuleMacro> ModuleMacros;
 
   /// The names of potential module macros that we've not yet processed.
-  llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames;
+  llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
 
   /// The list of module macros, for each identifier, that are not overridden by
   /// any other module macro.
-  llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>>
+  llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
       LeafModuleMacros;
 
   /// \brief Macros that we want to warn because they are not used at the end
@@ -606,25 +683,35 @@
   /// just so that we can report that they are unused, we just warn using
   /// the SourceLocations of this set (that will be filled by the ASTReader).
   /// We are using SmallPtrSet instead of a vector for faster removal.
-  typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
+  using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
 
   /// \brief A "freelist" of MacroArg objects that can be
   /// reused for quick allocation.
-  MacroArgs *MacroArgCache;
-  friend class MacroArgs;
+  MacroArgs *MacroArgCache = nullptr;
 
   /// For each IdentifierInfo used in a \#pragma push_macro directive,
   /// we keep a MacroInfo stack used to restore the previous macro value.
-  llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
+  llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
+      PragmaPushMacroInfo;
 
   // Various statistics we track for performance analysis.
-  unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
-  unsigned NumIf, NumElse, NumEndif;
-  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
-  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
-  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
-  unsigned NumSkipped;
+  unsigned NumDirectives = 0;
+  unsigned NumDefined = 0;
+  unsigned NumUndefined = 0;
+  unsigned NumPragma = 0;
+  unsigned NumIf = 0;
+  unsigned NumElse = 0;
+  unsigned NumEndif = 0;
+  unsigned NumEnteredSourceFiles = 0;
+  unsigned MaxIncludeStackDepth = 0;
+  unsigned NumMacroExpanded = 0;
+  unsigned NumFnMacroExpanded = 0;
+  unsigned NumBuiltinMacroExpanded = 0;
+  unsigned NumFastMacroExpanded = 0;
+  unsigned NumTokenPaste = 0;
+  unsigned NumFastTokenPaste = 0;
+  unsigned NumSkipped = 0;
 
   /// \brief The predefined macros that preprocessor should use from the
   /// command line etc.
@@ -646,17 +733,17 @@
   /// going to lex in the cache and when it finishes the tokens are removed
   /// from the end of the cache.
   SmallVector<Token, 16> MacroExpandedTokens;
-  std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
+  std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
 
   /// \brief A record of the macro definitions and expansions that
   /// occurred during preprocessing.
   ///
   /// This is an optional side structure that can be enabled with
   /// \c createPreprocessingRecord() prior to preprocessing.
-  PreprocessingRecord *Record;
+  PreprocessingRecord *Record = nullptr;
 
   /// Cached tokens state.
-  typedef SmallVector<Token, 1> CachedTokensTy;
+  using CachedTokensTy = SmallVector<Token, 1>;
 
   /// \brief Cached tokens are stored here when we do backtracking or
   /// lookahead. They are "lexed" by the CachingLex() method.
@@ -667,7 +754,7 @@
   ///
   /// If it points beyond the CachedTokens vector, it means that a normal
   /// Lex() should be invoked.
-  CachedTokensTy::size_type CachedLexPos;
+  CachedTokensTy::size_type CachedLexPos = 0;
 
   /// \brief Stack of backtrack positions, allowing nested backtracks.
   ///
@@ -683,7 +770,7 @@
 
   /// MacroInfos are managed as a chain for easy disposal.  This is the head
   /// of that list.
-  MacroInfoChain *MIChainHead;
+  MacroInfoChain *MIChainHead = nullptr;
 
   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
 
@@ -856,7 +943,7 @@
 
   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
     if (!II->hasMacroDefinition())
-      return MacroDefinition();
+      return {};
 
     MacroState &S = CurSubmoduleState->Macros[II];
     auto *MD = S.getLatest();
@@ -870,7 +957,7 @@
   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
                                           SourceLocation Loc) {
     if (!II->hadMacroDefinition())
-      return MacroDefinition();
+      return {};
 
     MacroState &S = CurSubmoduleState->Macros[II];
     MacroDirective::DefInfo DI;
@@ -926,6 +1013,7 @@
                                              MacroInfo *MI) {
     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
   }
+
   /// \brief Set a MacroDirective that was loaded from a PCH file.
   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
                                MacroDirective *MD);
@@ -949,10 +1037,12 @@
   /// Iterators for the macro history table. Currently defined macros have
   /// IdentifierInfo::hasMacroDefinition() set and an empty
   /// MacroInfo::getUndefLoc() at the head of the list.
-  typedef MacroMap::const_iterator macro_iterator;
+  using macro_iterator = MacroMap::const_iterator;
+
   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
   llvm::iterator_range<macro_iterator>
+
   macros(bool IncludeExternalMacros = true) const {
     return llvm::make_range(macro_begin(IncludeExternalMacros),
                             macro_end(IncludeExternalMacros));
@@ -966,6 +1056,7 @@
                                      ArrayRef<TokenValue> Tokens) const;
 
   const std::string &getPredefines() const { return Predefines; }
+
   /// \brief Set the predefines for this Preprocessor.
   ///
   /// These predefines are automatically injected when parsing the main file.
@@ -1090,6 +1181,7 @@
                         bool DisableMacroExpansion) {
     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
   }
+
   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
   }
@@ -1608,7 +1700,6 @@
   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
 
 public:
-
   /// \brief Specifies the reason for poisoning an identifier.
   ///
   /// If that identifier is accessed while poisoned, then this reason will be
@@ -1658,7 +1749,6 @@
   /// lex again.
   bool HandleIdentifier(Token &Identifier);
 
-
   /// \brief Callback invoked when the lexer hits the end of the current file.
   ///
   /// This either returns the EOF token and returns true, or
@@ -1763,6 +1853,8 @@
   Module *LeaveSubmodule(bool ForPragma);
 
 private:
+  friend void TokenLexer::ExpandFunctionArguments();
+
   void PushIncludeMacroStack() {
     assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
     IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
@@ -1821,7 +1913,6 @@
   /// 
   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
   ///  returns a nullptr if an invalid sequence of tokens is encountered.
-
   MacroInfo *ReadOptionalMacroParameterListAndBody(
       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
 
@@ -1839,7 +1930,7 @@
   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
   /// already seen one so a \#else directive is a duplicate.  When this returns,
   /// the caller can lex the first valid token.
-  void SkipExcludedConditionalBlock(const Token &HashToken,
+  void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
                                     SourceLocation IfTokenLoc,
                                     bool FoundNonSkipPortion, bool FoundElse,
                                     SourceLocation ElseLoc = SourceLocation());
@@ -1852,6 +1943,7 @@
   struct DirectiveEvalResult {
     /// Whether the expression was evaluated as true or not.
     bool Conditional;
+
     /// True if the expression contained identifiers that were undefined.
     bool IncludedUndefinedIds;
   };
@@ -1881,8 +1973,8 @@
   /// from the end of the cache.
   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
                                   ArrayRef<Token> tokens);
+
   void removeCachedMacroExpandedTokensOfLastLexer();
-  friend void TokenLexer::ExpandFunctionArguments();
 
   /// Determine whether the next preprocessor token to be
   /// lexed is a '('.  If so, consume the token and return true, if not, this
@@ -1938,17 +2030,21 @@
   //===--------------------------------------------------------------------===//
   // Caching stuff.
   void CachingLex(Token &Result);
+
   bool InCachingLexMode() const {
     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
     // that we are past EOF, not that we are in CachingLex mode.
     return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
            !IncludeMacroStack.empty();
   }
+
   void EnterCachingLexMode();
+
   void ExitCachingLexMode() {
     if (InCachingLexMode())
       RemoveTopOfLexerStack();
   }
+
   const Token &PeekAhead(unsigned N);
   void AnnotatePreviousCachedTokens(const Token &Tok);
 
@@ -2019,9 +2115,15 @@
     PreambleConditionalStack.setStack(s);
   }
 
-  void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
+  void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
+                                             llvm::Optional<PreambleSkipInfo> SkipInfo) {
     PreambleConditionalStack.startReplaying();
     PreambleConditionalStack.setStack(s);
+    PreambleConditionalStack.SkipInfo = SkipInfo;
+  }
+
+  llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
+    return PreambleConditionalStack.SkipInfo;
   }
 
 private:
@@ -2045,6 +2147,7 @@
   // Pragmas.
   void HandlePragmaDirective(SourceLocation IntroducerLoc,
                              PragmaIntroducerKind Introducer);
+
 public:
   void HandlePragmaOnce(Token &OnceTok);
   void HandlePragmaMark();
@@ -2078,8 +2181,8 @@
 };
 
 /// \brief Registry of pragma handlers added by plugins
-typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry;
+using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
diff --git a/include/clang/Lex/PreprocessorLexer.h b/include/clang/Lex/PreprocessorLexer.h
index 65021fc..ff71d11 100644
--- a/include/clang/Lex/PreprocessorLexer.h
+++ b/include/clang/Lex/PreprocessorLexer.h
@@ -1,4 +1,4 @@
-//===--- PreprocessorLexer.h - C Language Family Lexer ----------*- C++ -*-===//
+//===- PreprocessorLexer.h - C Language Family Lexer ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the PreprocessorLexer interface.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LEX_PREPROCESSORLEXER_H
@@ -17,8 +17,10 @@
 
 #include "clang/Lex/MultipleIncludeOpt.h"
 #include "clang/Lex/Token.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include <cassert>
 
 namespace clang {
 
@@ -27,25 +29,29 @@
 
 class PreprocessorLexer {
   virtual void anchor();
+
 protected:
-  Preprocessor *PP;              // Preprocessor object controlling lexing.
+  friend class Preprocessor;
+
+  // Preprocessor object controlling lexing.
+  Preprocessor *PP = nullptr;
 
   /// The SourceManager FileID corresponding to the file being lexed.
   const FileID FID;
 
   /// \brief Number of SLocEntries before lexing the file.
-  unsigned InitialNumSLocEntries;
+  unsigned InitialNumSLocEntries = 0;
 
   //===--------------------------------------------------------------------===//
   // Context-specific lexing flags set by the preprocessor.
   //===--------------------------------------------------------------------===//
 
   /// \brief True when parsing \#XXX; turns '\\n' into a tok::eod token.
-  bool ParsingPreprocessorDirective;
+  bool ParsingPreprocessorDirective = false;
 
   /// \brief True after \#include; turns \<xx> into a tok::angle_string_literal
   /// token.
-  bool ParsingFilename;
+  bool ParsingFilename = false;
 
   /// \brief True if in raw mode.
   ///
@@ -60,7 +66,7 @@
   ///  5. No callbacks are made into the preprocessor.
   ///
   /// Note that in raw mode that the PP pointer may be null.
-  bool LexingRawMode;
+  bool LexingRawMode = false;
 
   /// \brief A state machine that detects the \#ifndef-wrapping a file
   /// idiom for the multiple-include optimization.
@@ -70,19 +76,9 @@
   /// we are currently in.
   SmallVector<PPConditionalInfo, 4> ConditionalStack;
 
-  PreprocessorLexer(const PreprocessorLexer &) = delete;
-  void operator=(const PreprocessorLexer &) = delete;
-  friend class Preprocessor;
-
+  PreprocessorLexer() : FID() {}
   PreprocessorLexer(Preprocessor *pp, FileID fid);
-
-  PreprocessorLexer()
-    : PP(nullptr), FID(), InitialNumSLocEntries(0),
-      ParsingPreprocessorDirective(false),
-      ParsingFilename(false),
-      LexingRawMode(false) {}
-
-  virtual ~PreprocessorLexer() {}
+  virtual ~PreprocessorLexer() = default;
 
   virtual void IndirectLex(Token& Result) = 0;
 
@@ -128,6 +124,8 @@
   unsigned getConditionalStackDepth() const { return ConditionalStack.size(); }
 
 public:
+  PreprocessorLexer(const PreprocessorLexer &) = delete;
+  PreprocessorLexer &operator=(const PreprocessorLexer &) = delete;
 
   //===--------------------------------------------------------------------===//
   // Misc. lexing methods.
@@ -168,12 +166,13 @@
 
   /// \brief Iterator that traverses the current stack of preprocessor
   /// conditional directives (\#if/\#ifdef/\#ifndef).
-  typedef SmallVectorImpl<PPConditionalInfo>::const_iterator 
-    conditional_iterator;
+  using conditional_iterator =
+      SmallVectorImpl<PPConditionalInfo>::const_iterator;
 
   conditional_iterator conditional_begin() const { 
     return ConditionalStack.begin(); 
   }
+
   conditional_iterator conditional_end() const { 
     return ConditionalStack.end(); 
   }
@@ -184,6 +183,6 @@
   }
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_PREPROCESSORLEXER_H
diff --git a/include/clang/Lex/PreprocessorOptions.h b/include/clang/Lex/PreprocessorOptions.h
index 760b308..55fc305 100644
--- a/include/clang/Lex/PreprocessorOptions.h
+++ b/include/clang/Lex/PreprocessorOptions.h
@@ -1,4 +1,4 @@
-//===--- PreprocessorOptions.h ----------------------------------*- C++ -*-===//
+//===- PreprocessorOptions.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,30 +10,30 @@
 #ifndef LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
 #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
 
-#include "clang/Basic/SourceLocation.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
-#include <cassert>
+#include <memory> 
 #include <set>
 #include <string>
 #include <utility>
 #include <vector>
 
 namespace llvm {
-  class MemoryBuffer;
-}
+
+class MemoryBuffer;
+
+} // namespace llvm
 
 namespace clang {
 
-class Preprocessor;
-class LangOptions;
-
 /// \brief Enumerate the kinds of standard library that 
 enum ObjCXXARCStandardLibraryKind {
   ARCXX_nolib,
+
   /// \brief libc++
   ARCXX_libcxx,
+
   /// \brief libstdc++
   ARCXX_libstdcxx
 };
@@ -42,17 +42,17 @@
 /// used in preprocessor initialization to InitializePreprocessor().
 class PreprocessorOptions {
 public:
-  std::vector<std::pair<std::string, bool/*isUndef*/> > Macros;
+  std::vector<std::pair<std::string, bool/*isUndef*/>> Macros;
   std::vector<std::string> Includes;
   std::vector<std::string> MacroIncludes;
 
   /// \brief Initialize the preprocessor with the compiler and target specific
   /// predefines.
-  unsigned UsePredefines : 1;
+  bool UsePredefines = true;
 
   /// \brief Whether we should maintain a detailed record of all macro
   /// definitions and expansions.
-  unsigned DetailedRecord : 1;
+  bool DetailedRecord = false;
 
   /// The implicit PCH included at the start of the translation unit, or empty.
   std::string ImplicitPCHInclude;
@@ -62,13 +62,13 @@
 
   /// \brief When true, disables most of the normal validation performed on
   /// precompiled headers.
-  bool DisablePCHValidation;
+  bool DisablePCHValidation = false;
 
   /// \brief When true, a PCH with compiler errors will not be rejected.
-  bool AllowPCHWithCompilerErrors;
+  bool AllowPCHWithCompilerErrors = false;
 
   /// \brief Dump declarations that are deserialized from PCH, for testing.
-  bool DumpDeserializedPCHDecls;
+  bool DumpDeserializedPCHDecls = false;
 
   /// \brief This is a set of names for decls that we do not want to be
   /// deserialized, and we emit an error if they are; for testing purposes.
@@ -86,7 +86,7 @@
   /// When the lexer is done, one of the things that need to be preserved is the
   /// conditional #if stack, so the ASTWriter/ASTReader can save/restore it when
   /// processing the rest of the file.
-  bool GeneratePreamble;
+  bool GeneratePreamble = false;
 
   /// The implicit PTH input included at the start of the translation unit, or
   /// empty.
@@ -107,7 +107,7 @@
 
   /// \brief True if the SourceManager should report the original file name for
   /// contents of files that were remapped to other files. Defaults to true.
-  bool RemappedFilesKeepOriginalName;
+  bool RemappedFilesKeepOriginalName = true;
 
   /// \brief The set of file remappings, which take existing files on
   /// the system (the first part of each pair) and gives them the
@@ -126,12 +126,12 @@
   /// This flag defaults to false; it can be set true only through direct
   /// manipulation of the compiler invocation object, in cases where the 
   /// compiler invocation and its buffers will be reused.
-  bool RetainRemappedFileBuffers;
+  bool RetainRemappedFileBuffers = false;
   
   /// \brief The Objective-C++ ARC standard library that we should support,
   /// by providing appropriate definitions to retrofit the standard library
   /// with support for lifetime-qualified pointers.
-  ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary;
+  ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary = ARCXX_nolib;
     
   /// \brief Records the set of modules
   class FailedModulesSet {
@@ -156,18 +156,11 @@
   std::shared_ptr<FailedModulesSet> FailedModules;
 
 public:
-  PreprocessorOptions() : UsePredefines(true), DetailedRecord(false),
-                          DisablePCHValidation(false),
-                          AllowPCHWithCompilerErrors(false),
-                          DumpDeserializedPCHDecls(false),
-                          PrecompiledPreambleBytes(0, false),
-                          GeneratePreamble(false),
-                          RemappedFilesKeepOriginalName(true),
-                          RetainRemappedFileBuffers(false),
-                          ObjCXXARCStandardLibrary(ARCXX_nolib) { }
+  PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
 
   void addMacroDef(StringRef Name) { Macros.emplace_back(Name, false); }
   void addMacroUndef(StringRef Name) { Macros.emplace_back(Name, true); }
+
   void addRemappedFile(StringRef From, StringRef To) {
     RemappedFiles.emplace_back(From, To);
   }
@@ -199,6 +192,6 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h
index 1730510..b8b0bea 100644
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -1,4 +1,4 @@
-//===--- TokenLexer.h - Lex from a token buffer -----------------*- C++ -*-===//
+//===- TokenLexer.h - Lex from a token buffer -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,28 +18,28 @@
 #include "llvm/ADT/ArrayRef.h"
 
 namespace clang {
-  class MacroInfo;
-  class Preprocessor;
-  class Token;
-  class MacroArgs;
-  class VAOptExpansionContext;
+
+class MacroArgs;
+class MacroInfo;
+class Preprocessor;
+class Token;
+class VAOptExpansionContext;
 
 /// TokenLexer - This implements a lexer that returns tokens from a macro body
 /// or token stream instead of lexing from a character buffer.  This is used for
 /// macro expansion and _Pragma handling, for example.
-///
 class TokenLexer {
+  friend class Preprocessor;
+
   /// Macro - The macro we are expanding from.  This is null if expanding a
   /// token stream.
-  ///
-  MacroInfo *Macro;
+  MacroInfo *Macro = nullptr;
 
   /// ActualArgs - The actual arguments specified for a function-like macro, or
   /// null.  The TokenLexer owns the pointed-to object.
-  MacroArgs *ActualArgs;
+  MacroArgs *ActualArgs = nullptr;
 
   /// PP - The current preprocessor object we are expanding for.
-  ///
   Preprocessor &PP;
 
   /// Tokens - This is the pointer to an array of tokens that the macro is
@@ -51,14 +51,11 @@
   /// Note that if it points into Preprocessor's cache buffer, the Preprocessor
   /// may update the pointer as needed.
   const Token *Tokens;
-  friend class Preprocessor;
 
   /// NumTokens - This is the length of the Tokens array.
-  ///
   unsigned NumTokens;
 
   /// This is the index of the next token that Lex will return.
-  ///
   unsigned CurTokenIdx;
 
   /// ExpandLocStart/End - The source location range where this macro was
@@ -75,6 +72,7 @@
 
   /// \brief Location of the macro definition.
   SourceLocation MacroDefStart;
+
   /// \brief Length of the macro definition.
   unsigned MacroDefLength;
 
@@ -101,8 +99,6 @@
   /// should not be subject to further macro expansion.
   bool DisableMacroExpansion : 1;
 
-  TokenLexer(const TokenLexer &) = delete;
-  void operator=(const TokenLexer &) = delete;
 public:
   /// Create a TokenLexer for the specified macro with the specified actual
   /// arguments.  Note that this ctor takes ownership of the ActualArgs pointer.
@@ -110,10 +106,23 @@
   /// identifier for an object-like macro.
   TokenLexer(Token &Tok, SourceLocation ILEnd, MacroInfo *MI,
              MacroArgs *ActualArgs, Preprocessor &pp)
-    : Macro(nullptr), ActualArgs(nullptr), PP(pp), OwnsTokens(false) {
+      : PP(pp), OwnsTokens(false) {
     Init(Tok, ILEnd, MI, ActualArgs);
   }
 
+  /// Create a TokenLexer for the specified token stream.  If 'OwnsTokens' is
+  /// specified, this takes ownership of the tokens and delete[]'s them when
+  /// the token lexer is empty.
+  TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion,
+             bool ownsTokens, Preprocessor &pp)
+      : PP(pp), OwnsTokens(false) {
+    Init(TokArray, NumToks, DisableExpansion, ownsTokens);
+  }
+
+  TokenLexer(const TokenLexer &) = delete;
+  TokenLexer &operator=(const TokenLexer &) = delete;
+  ~TokenLexer() { destroy(); }
+
   /// Init - Initialize this TokenLexer to expand from the specified macro
   /// with the specified argument information.  Note that this ctor takes
   /// ownership of the ActualArgs pointer.  ILEnd specifies the location of the
@@ -121,15 +130,6 @@
   void Init(Token &Tok, SourceLocation ILEnd, MacroInfo *MI,
             MacroArgs *ActualArgs);
 
-  /// Create a TokenLexer for the specified token stream.  If 'OwnsTokens' is
-  /// specified, this takes ownership of the tokens and delete[]'s them when
-  /// the token lexer is empty.
-  TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion,
-             bool ownsTokens, Preprocessor &pp)
-    : Macro(nullptr), ActualArgs(nullptr), PP(pp), OwnsTokens(false) {
-    Init(TokArray, NumToks, DisableExpansion, ownsTokens);
-  }
-
   /// Init - Initialize this TokenLexer with the specified token stream.
   /// This does not take ownership of the specified token vector.
   ///
@@ -138,8 +138,6 @@
   void Init(const Token *TokArray, unsigned NumToks,
             bool DisableMacroExpansion, bool OwnsTokens);
 
-  ~TokenLexer() { destroy(); }
-
   /// isNextTokenLParen - If the next token lexed will pop this macro off the
   /// expansion stack, return 2.  If the next unexpanded token is a '(', return
   /// 1, otherwise return 0.
@@ -182,7 +180,6 @@
   ///
   /// \returns If this returns true, the caller should immediately return the
   /// token.
-
   bool pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
                    unsigned int &CurIdx);
 
@@ -204,7 +201,6 @@
   /// \param[in] VCtx - contains relevent contextual information about the
   /// state of the tokens around and including the __VA_OPT__ token, necessary
   /// for stringification.
-
   void stringifyVAOPTContents(SmallVectorImpl<Token> &ReplacementToks,
                               const VAOptExpansionContext &VCtx,
                               SourceLocation VAOPTClosingParenLoc);
@@ -243,6 +239,6 @@
   void PropagateLineStartLeadingSpaceInfo(Token &Result);
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LEX_TOKENLEXER_H
diff --git a/include/clang/Lex/VariadicMacroSupport.h b/include/clang/Lex/VariadicMacroSupport.h
index cebaf15..db1ce0e 100644
--- a/include/clang/Lex/VariadicMacroSupport.h
+++ b/include/clang/Lex/VariadicMacroSupport.h
@@ -55,7 +55,7 @@
 
     /// Client code should call this function as soon as the Preprocessor has
     /// either completed lexing the macro's definition tokens, or an error
-    /// occured and the context is being exited.  This function is idempotent
+    /// occurred and the context is being exited.  This function is idempotent
     /// (might be explicitly called, and then reinvoked via the destructor).
     void exitScope() {
       Ident__VA_ARGS__->setIsPoisoned(true);
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index a2eef24..3bf3855 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -185,6 +185,9 @@
   std::unique_ptr<PragmaHandler> UnrollHintHandler;
   std::unique_ptr<PragmaHandler> NoUnrollHintHandler;
   std::unique_ptr<PragmaHandler> FPHandler;
+  std::unique_ptr<PragmaHandler> STDCFENVHandler;
+  std::unique_ptr<PragmaHandler> STDCCXLIMITHandler;
+  std::unique_ptr<PragmaHandler> STDCUnknownHandler;
   std::unique_ptr<PragmaHandler> AttributePragmaHandler;
 
   std::unique_ptr<CommentHandler> CommentSemaHandler;
@@ -376,15 +379,15 @@
 
   /// isTokenParen - Return true if the cur token is '(' or ')'.
   bool isTokenParen() const {
-    return Tok.getKind() == tok::l_paren || Tok.getKind() == tok::r_paren;
+    return Tok.isOneOf(tok::l_paren, tok::r_paren);
   }
   /// isTokenBracket - Return true if the cur token is '[' or ']'.
   bool isTokenBracket() const {
-    return Tok.getKind() == tok::l_square || Tok.getKind() == tok::r_square;
+    return Tok.isOneOf(tok::l_square, tok::r_square);
   }
   /// isTokenBrace - Return true if the cur token is '{' or '}'.
   bool isTokenBrace() const {
-    return Tok.getKind() == tok::l_brace || Tok.getKind() == tok::r_brace;
+    return Tok.isOneOf(tok::l_brace, tok::r_brace);
   }
   /// isTokenStringLiteral - True if this token is a string-literal.
   bool isTokenStringLiteral() const {
@@ -506,6 +509,12 @@
            Kind == tok::annot_module_end || Kind == tok::annot_module_include;
   }
 
+  /// \brief Checks if the \p Level is valid for use in a fold expression.
+  bool isFoldOperator(prec::Level Level) const;
+
+  /// \brief Checks if the \p Kind is a valid operator for fold expressions.
+  bool isFoldOperator(tok::TokenKind Kind) const;
+
   /// \brief Initialize all pragma handlers.
   void initializePragmaHandlers();
 
@@ -1331,7 +1340,7 @@
 
   // Objective-C External Declarations
   void MaybeSkipAttributes(tok::ObjCKeywordKind Kind);
-  DeclGroupPtrTy ParseObjCAtDirectives();
+  DeclGroupPtrTy ParseObjCAtDirectives(ParsedAttributesWithRange &Attrs);
   DeclGroupPtrTy ParseObjCAtClassDeclaration(SourceLocation atLoc);
   Decl *ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
                                         ParsedAttributes &prefixAttrs);
@@ -1441,7 +1450,7 @@
 
   bool isTokIdentifier_in() const;
 
-  ParsedType ParseObjCTypeName(ObjCDeclSpec &DS, Declarator::TheContext Ctx,
+  ParsedType ParseObjCTypeName(ObjCDeclSpec &DS, DeclaratorContext Ctx,
                                ParsedAttributes *ParamAttrs);
   void ParseObjCMethodRequirement();
   Decl *ParseObjCMethodPrototype(
@@ -1635,7 +1644,8 @@
 
   //===--------------------------------------------------------------------===//
   // C++0x 8: Function declaration trailing-return-type
-  TypeResult ParseTrailingReturnType(SourceRange &Range);
+  TypeResult ParseTrailingReturnType(SourceRange &Range,
+                                     bool MayBeFollowedByDirectInit);
 
   //===--------------------------------------------------------------------===//
   // C++ 2.13.5: C++ Boolean Literals
@@ -1850,7 +1860,7 @@
   /// A context for parsing declaration specifiers.  TODO: flesh this
   /// out, there are other significant restrictions on specifiers than
   /// would be best implemented in the parser.
-  enum DeclSpecContext {
+  enum class DeclSpecContext {
     DSC_normal, // normal context
     DSC_class,  // class context, enables 'friend'
     DSC_type_specifier, // C++ type-specifier-seq or C specifier-qualifier-list
@@ -1867,18 +1877,18 @@
   /// trailing-type-specifier)?
   static bool isTypeSpecifier(DeclSpecContext DSC) {
     switch (DSC) {
-    case DSC_normal:
-    case DSC_template_param:
-    case DSC_class:
-    case DSC_top_level:
-    case DSC_objc_method_result:
-    case DSC_condition:
+    case DeclSpecContext::DSC_normal:
+    case DeclSpecContext::DSC_template_param:
+    case DeclSpecContext::DSC_class:
+    case DeclSpecContext::DSC_top_level:
+    case DeclSpecContext::DSC_objc_method_result:
+    case DeclSpecContext::DSC_condition:
       return false;
 
-    case DSC_template_type_arg:
-    case DSC_type_specifier:
-    case DSC_trailing:
-    case DSC_alias_declaration:
+    case DeclSpecContext::DSC_template_type_arg:
+    case DeclSpecContext::DSC_type_specifier:
+    case DeclSpecContext::DSC_trailing:
+    case DeclSpecContext::DSC_alias_declaration:
       return true;
     }
     llvm_unreachable("Missing DeclSpecContext case");
@@ -1888,18 +1898,18 @@
   /// deduction?
   static bool isClassTemplateDeductionContext(DeclSpecContext DSC) {
     switch (DSC) {
-    case DSC_normal:
-    case DSC_template_param:
-    case DSC_class:
-    case DSC_top_level:
-    case DSC_condition:
-    case DSC_type_specifier:
+    case DeclSpecContext::DSC_normal:
+    case DeclSpecContext::DSC_template_param:
+    case DeclSpecContext::DSC_class:
+    case DeclSpecContext::DSC_top_level:
+    case DeclSpecContext::DSC_condition:
+    case DeclSpecContext::DSC_type_specifier:
       return true;
 
-    case DSC_objc_method_result:
-    case DSC_template_type_arg:
-    case DSC_trailing:
-    case DSC_alias_declaration:
+    case DeclSpecContext::DSC_objc_method_result:
+    case DeclSpecContext::DSC_template_type_arg:
+    case DeclSpecContext::DSC_trailing:
+    case DeclSpecContext::DSC_alias_declaration:
       return false;
     }
     llvm_unreachable("Missing DeclSpecContext case");
@@ -1914,15 +1924,16 @@
     bool ParsedForRangeDecl() { return !ColonLoc.isInvalid(); }
   };
 
-  DeclGroupPtrTy ParseDeclaration(unsigned Context, SourceLocation &DeclEnd,
+  DeclGroupPtrTy ParseDeclaration(DeclaratorContext Context,
+                                  SourceLocation &DeclEnd,
                                   ParsedAttributesWithRange &attrs);
-  DeclGroupPtrTy ParseSimpleDeclaration(unsigned Context,
+  DeclGroupPtrTy ParseSimpleDeclaration(DeclaratorContext Context,
                                         SourceLocation &DeclEnd,
                                         ParsedAttributesWithRange &attrs,
                                         bool RequireSemi,
                                         ForRangeInit *FRI = nullptr);
-  bool MightBeDeclarator(unsigned Context);
-  DeclGroupPtrTy ParseDeclGroup(ParsingDeclSpec &DS, unsigned Context,
+  bool MightBeDeclarator(DeclaratorContext Context);
+  DeclGroupPtrTy ParseDeclGroup(ParsingDeclSpec &DS, DeclaratorContext Context,
                                 SourceLocation *DeclEnd = nullptr,
                                 ForRangeInit *FRI = nullptr);
   Decl *ParseDeclarationAfterDeclarator(Declarator &D,
@@ -1945,21 +1956,24 @@
                         const ParsedTemplateInfo &TemplateInfo,
                         AccessSpecifier AS, DeclSpecContext DSC, 
                         ParsedAttributesWithRange &Attrs);
-  DeclSpecContext getDeclSpecContextFromDeclaratorContext(unsigned Context);
-  void ParseDeclarationSpecifiers(DeclSpec &DS,
-                const ParsedTemplateInfo &TemplateInfo = ParsedTemplateInfo(),
-                                  AccessSpecifier AS = AS_none,
-                                  DeclSpecContext DSC = DSC_normal,
-                                  LateParsedAttrList *LateAttrs = nullptr);
-  bool DiagnoseMissingSemiAfterTagDefinition(DeclSpec &DS, AccessSpecifier AS,
-                                       DeclSpecContext DSContext,
-                                       LateParsedAttrList *LateAttrs = nullptr);
+  DeclSpecContext
+  getDeclSpecContextFromDeclaratorContext(DeclaratorContext Context);
+  void ParseDeclarationSpecifiers(
+      DeclSpec &DS,
+      const ParsedTemplateInfo &TemplateInfo = ParsedTemplateInfo(),
+      AccessSpecifier AS = AS_none,
+      DeclSpecContext DSC = DeclSpecContext::DSC_normal,
+      LateParsedAttrList *LateAttrs = nullptr);
+  bool DiagnoseMissingSemiAfterTagDefinition(
+      DeclSpec &DS, AccessSpecifier AS, DeclSpecContext DSContext,
+      LateParsedAttrList *LateAttrs = nullptr);
 
-  void ParseSpecifierQualifierList(DeclSpec &DS, AccessSpecifier AS = AS_none,
-                                   DeclSpecContext DSC = DSC_normal);
+  void ParseSpecifierQualifierList(
+      DeclSpec &DS, AccessSpecifier AS = AS_none,
+      DeclSpecContext DSC = DeclSpecContext::DSC_normal);
 
   void ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
-                                  Declarator::TheContext Context);
+                                  DeclaratorContext Context);
 
   void ParseEnumSpecifier(SourceLocation TagLoc, DeclSpec &DS,
                           const ParsedTemplateInfo &TemplateInfo,
@@ -2145,7 +2159,8 @@
   TPResult TryParsePtrOperatorSeq();
   TPResult TryParseOperatorId();
   TPResult TryParseInitDeclaratorList();
-  TPResult TryParseDeclarator(bool mayBeAbstract, bool mayHaveIdentifier=true);
+  TPResult TryParseDeclarator(bool mayBeAbstract, bool mayHaveIdentifier = true,
+                              bool mayHaveDirectInit = false);
   TPResult
   TryParseParameterDeclarationClause(bool *InvalidAsDeclaration = nullptr,
                                      bool VersusTemplateArg = false);
@@ -2155,8 +2170,8 @@
 
 public:
   TypeResult ParseTypeName(SourceRange *Range = nullptr,
-                           Declarator::TheContext Context
-                             = Declarator::TypeNameContext,
+                           DeclaratorContext Context
+                             = DeclaratorContext::TypeNameContext,
                            AccessSpecifier AS = AS_none,
                            Decl **OwnedType = nullptr,
                            ParsedAttributes *Attrs = nullptr);
@@ -2194,13 +2209,16 @@
 
   void stripTypeAttributesOffDeclSpec(ParsedAttributesWithRange &Attrs,
                                       DeclSpec &DS, Sema::TagUseKind TUK);
-
-  void ProhibitAttributes(ParsedAttributesWithRange &attrs) {
+  
+  // FixItLoc = possible correct location for the attributes
+  void ProhibitAttributes(ParsedAttributesWithRange &attrs,
+                          SourceLocation FixItLoc = SourceLocation()) {
     if (!attrs.Range.isValid()) return;
-    DiagnoseProhibitedAttributes(attrs);
+    DiagnoseProhibitedAttributes(attrs, FixItLoc);
     attrs.clear();
   }
-  void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs);
+  void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, 
+    SourceLocation FixItLoc);
 
   // Forbid C++11 and C2x attributes that appear on certain syntactic locations
   // which standard permits but we don't supported yet, for example, attributes
@@ -2503,20 +2521,21 @@
 
   void DiagnoseUnexpectedNamespace(NamedDecl *Context);
 
-  DeclGroupPtrTy ParseNamespace(unsigned Context, SourceLocation &DeclEnd,
+  DeclGroupPtrTy ParseNamespace(DeclaratorContext Context,
+                                SourceLocation &DeclEnd,
                                 SourceLocation InlineLoc = SourceLocation());
-  void ParseInnerNamespace(std::vector<SourceLocation>& IdentLoc,
-                           std::vector<IdentifierInfo*>& Ident,
-                           std::vector<SourceLocation>& NamespaceLoc,
-                           unsigned int index, SourceLocation& InlineLoc,
-                           ParsedAttributes& attrs,
+  void ParseInnerNamespace(std::vector<SourceLocation> &IdentLoc,
+                           std::vector<IdentifierInfo *> &Ident,
+                           std::vector<SourceLocation> &NamespaceLoc,
+                           unsigned int index, SourceLocation &InlineLoc,
+                           ParsedAttributes &attrs,
                            BalancedDelimiterTracker &Tracker);
-  Decl *ParseLinkage(ParsingDeclSpec &DS, unsigned Context);
+  Decl *ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context);
   Decl *ParseExportDeclaration();
   DeclGroupPtrTy ParseUsingDirectiveOrDeclaration(
-      unsigned Context, const ParsedTemplateInfo &TemplateInfo,
+      DeclaratorContext Context, const ParsedTemplateInfo &TemplateInfo,
       SourceLocation &DeclEnd, ParsedAttributesWithRange &attrs);
-  Decl *ParseUsingDirective(unsigned Context,
+  Decl *ParseUsingDirective(DeclaratorContext Context,
                             SourceLocation UsingLoc,
                             SourceLocation &DeclEnd,
                             ParsedAttributes &attrs);
@@ -2535,8 +2554,8 @@
     }
   };
 
-  bool ParseUsingDeclarator(unsigned Context, UsingDeclarator &D);
-  DeclGroupPtrTy ParseUsingDeclaration(unsigned Context,
+  bool ParseUsingDeclarator(DeclaratorContext Context, UsingDeclarator &D);
+  DeclGroupPtrTy ParseUsingDeclaration(DeclaratorContext Context,
                                        const ParsedTemplateInfo &TemplateInfo,
                                        SourceLocation UsingLoc,
                                        SourceLocation &DeclEnd,
@@ -2658,30 +2677,42 @@
   /// \brief Parses clause with a single expression of a kind \a Kind.
   ///
   /// \param Kind Kind of current clause.
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
   ///
-  OMPClause *ParseOpenMPSingleExprClause(OpenMPClauseKind Kind);
+  OMPClause *ParseOpenMPSingleExprClause(OpenMPClauseKind Kind,
+                                         bool ParseOnly);
   /// \brief Parses simple clause of a kind \a Kind.
   ///
   /// \param Kind Kind of current clause.
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
   ///
-  OMPClause *ParseOpenMPSimpleClause(OpenMPClauseKind Kind);
+  OMPClause *ParseOpenMPSimpleClause(OpenMPClauseKind Kind, bool ParseOnly);
   /// \brief Parses clause with a single expression and an additional argument
   /// of a kind \a Kind.
   ///
   /// \param Kind Kind of current clause.
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
   ///
-  OMPClause *ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind);
+  OMPClause *ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind,
+                                                bool ParseOnly);
   /// \brief Parses clause without any additional arguments.
   ///
   /// \param Kind Kind of current clause.
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
   ///
-  OMPClause *ParseOpenMPClause(OpenMPClauseKind Kind);
+  OMPClause *ParseOpenMPClause(OpenMPClauseKind Kind, bool ParseOnly = false);
   /// \brief Parses clause with the list of variables of a kind \a Kind.
   ///
   /// \param Kind Kind of current clause.
+  /// \param ParseOnly true to skip the clause's semantic actions and return
+  /// nullptr.
   ///
   OMPClause *ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
-                                      OpenMPClauseKind Kind);
+                                      OpenMPClauseKind Kind, bool ParseOnly);
 
 public:
   /// Parses simple expression in parens for single-expression clauses of OpenMP
@@ -2720,16 +2751,16 @@
   // C++ 14: Templates [temp]
 
   // C++ 14.1: Template Parameters [temp.param]
-  Decl *ParseDeclarationStartingWithTemplate(unsigned Context,
+  Decl *ParseDeclarationStartingWithTemplate(DeclaratorContext Context,
                                           SourceLocation &DeclEnd,
                                           AccessSpecifier AS = AS_none,
                                           AttributeList *AccessAttrs = nullptr);
-  Decl *ParseTemplateDeclarationOrSpecialization(unsigned Context,
+  Decl *ParseTemplateDeclarationOrSpecialization(DeclaratorContext Context,
                                                  SourceLocation &DeclEnd,
                                                  AccessSpecifier AS,
                                                  AttributeList *AccessAttrs);
   Decl *ParseSingleDeclarationAfterTemplate(
-                                       unsigned Context,
+                                       DeclaratorContext Context,
                                        const ParsedTemplateInfo &TemplateInfo,
                                        ParsingDeclRAIIObject &DiagsFromParams,
                                        SourceLocation &DeclEnd,
@@ -2742,10 +2773,10 @@
   bool ParseTemplateParameterList(unsigned Depth,
                                   SmallVectorImpl<NamedDecl*> &TemplateParams);
   bool isStartOfTemplateTypeParameter();
-  Decl *ParseTemplateParameter(unsigned Depth, unsigned Position);
-  Decl *ParseTypeParameter(unsigned Depth, unsigned Position);
-  Decl *ParseTemplateTemplateParameter(unsigned Depth, unsigned Position);
-  Decl *ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position);
+  NamedDecl *ParseTemplateParameter(unsigned Depth, unsigned Position);
+  NamedDecl *ParseTypeParameter(unsigned Depth, unsigned Position);
+  NamedDecl *ParseTemplateTemplateParameter(unsigned Depth, unsigned Position);
+  NamedDecl *ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position);
   void DiagnoseMisplacedEllipsis(SourceLocation EllipsisLoc,
                                  SourceLocation CorrectLoc,
                                  bool AlreadyHasEllipsis,
@@ -2773,7 +2804,7 @@
   bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs);
   ParsedTemplateArgument ParseTemplateTemplateArgument();
   ParsedTemplateArgument ParseTemplateArgument();
-  Decl *ParseExplicitInstantiation(unsigned Context,
+  Decl *ParseExplicitInstantiation(DeclaratorContext Context,
                                    SourceLocation ExternLoc,
                                    SourceLocation TemplateLoc,
                                    SourceLocation &DeclEnd,
@@ -2782,7 +2813,7 @@
   //===--------------------------------------------------------------------===//
   // Modules
   DeclGroupPtrTy ParseModuleDecl();
-  DeclGroupPtrTy ParseModuleImport(SourceLocation AtLoc);
+  Decl *ParseModuleImport(SourceLocation AtLoc);
   bool parseMisplacedModuleImport();
   bool tryParseMisplacedModuleImport() {
     tok::TokenKind Kind = Tok.getKind();
diff --git a/include/clang/Parse/RAIIObjectsForParser.h b/include/clang/Parse/RAIIObjectsForParser.h
index 0422b03..acaeb44 100644
--- a/include/clang/Parse/RAIIObjectsForParser.h
+++ b/include/clang/Parse/RAIIObjectsForParser.h
@@ -202,7 +202,7 @@
     ParsingDeclRAIIObject ParsingRAII;
 
   public:
-    ParsingDeclarator(Parser &P, const ParsingDeclSpec &DS, TheContext C)
+    ParsingDeclarator(Parser &P, const ParsingDeclSpec &DS, DeclaratorContext C)
       : Declarator(DS, C), ParsingRAII(P, &DS.getDelayedDiagnosticPool()) {
     }
 
diff --git a/include/clang/Sema/AttributeList.h b/include/clang/Sema/AttributeList.h
index 0580ff8..6b5bb1f 100644
--- a/include/clang/Sema/AttributeList.h
+++ b/include/clang/Sema/AttributeList.h
@@ -1,4 +1,4 @@
-//===--- AttributeList.h - Parsed attribute sets ----------------*- C++ -*-===//
+//===- AttributeList.h - Parsed attribute sets ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,11 +24,17 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <utility>
 
 namespace clang {
-  class ASTContext;
-  class IdentifierInfo;
-  class Expr;
+
+class ASTContext;
+class Decl;
+class Expr;
+class IdentifierInfo;
+class LangOptions;
 
 /// \brief Represents information about a change in availability for
 /// an entity, which is part of the encoding of the 'availability'
@@ -48,6 +54,7 @@
 };
 
 namespace {
+
 enum AvailabilitySlot {
   IntroducedSlot, DeprecatedSlot, ObsoletedSlot, NumAvailabilitySlots
 };
@@ -57,6 +64,7 @@
   AvailabilityChange Changes[NumAvailabilitySlots];
   SourceLocation StrictLoc;
   const Expr *Replacement;
+
   AvailabilityData(const AvailabilityChange &Introduced,
                    const AvailabilityChange &Deprecated,
                    const AvailabilityChange &Obsoleted,
@@ -67,7 +75,8 @@
     Changes[ObsoletedSlot] = Obsoleted;
   }
 };
-}
+
+} // namespace
 
 /// \brief Wraps an identifier and optional source location for the identifier.
 struct IdentifierLoc {
@@ -80,8 +89,8 @@
 
 /// \brief A union of the various pointer types that can be passed to an
 /// AttributeList as an argument.
-typedef llvm::PointerUnion<Expr*, IdentifierLoc*> ArgsUnion;
-typedef llvm::SmallVector<ArgsUnion, 12U> ArgsVector;
+using ArgsUnion = llvm::PointerUnion<Expr *, IdentifierLoc *>;
+using ArgsVector = llvm::SmallVector<ArgsUnion, 12U>;
 
 /// AttributeList - Represents a syntactic attribute.
 ///
@@ -98,18 +107,25 @@
   enum Syntax {
     /// __attribute__((...))
     AS_GNU,
+
     /// [[...]]
     AS_CXX11,
+
     /// [[...]]
     AS_C2x,
+
     /// __declspec(...)
     AS_Declspec,
+
     /// [uuid("...")] class Foo
     AS_Microsoft,
+
     /// __ptr16, alignas(...), etc.
     AS_Keyword,
+
     /// #pragma ...
     AS_Pragma,
+
     // Note TableGen depends on the order above.  Do not add or change the order
     // without adding related code to TableGen/ClangAttrEmitter.cpp.
     /// Context-sensitive version of a keyword attribute.
@@ -166,10 +182,10 @@
   const Expr *MessageExpr;
 
   /// The next attribute in the current position.
-  AttributeList *NextInPosition;
+  AttributeList *NextInPosition = nullptr;
 
   /// The next attribute allocated in the current Pool.
-  AttributeList *NextInPool;
+  AttributeList *NextInPool = nullptr;
 
   /// Arguments, if any, are stored immediately following the object.
   ArgsUnion *getArgsBuffer() { return reinterpret_cast<ArgsUnion *>(this + 1); }
@@ -194,63 +210,25 @@
   };
   struct PropertyData {
     IdentifierInfo *GetterId, *SetterId;
+
     PropertyData(IdentifierInfo *getterId, IdentifierInfo *setterId)
-    : GetterId(getterId), SetterId(setterId) {}
+        : GetterId(getterId), SetterId(setterId) {}
   };
 
 private:
-  /// Type tag information is stored immediately following the arguments, if
-  /// any, at the end of the object.  They are mutually exlusive with
-  /// availability slots.
-  TypeTagForDatatypeData &getTypeTagForDatatypeDataSlot() {
-    return *reinterpret_cast<TypeTagForDatatypeData*>(getArgsBuffer()+NumArgs);
-  }
-
-  const TypeTagForDatatypeData &getTypeTagForDatatypeDataSlot() const {
-    return *reinterpret_cast<const TypeTagForDatatypeData*>(getArgsBuffer()
-                                                            + NumArgs);
-  }
-
-  /// The type buffer immediately follows the object and are mutually exclusive
-  /// with arguments.
-  ParsedType &getTypeBuffer() {
-    return *reinterpret_cast<ParsedType *>(this + 1);
-  }
-
-  const ParsedType &getTypeBuffer() const {
-    return *reinterpret_cast<const ParsedType *>(this + 1);
-  }
-
-  /// The property data immediately follows the object is is mutually exclusive
-  /// with arguments.
-  PropertyData &getPropertyDataBuffer() {
-    assert(IsProperty);
-    return *reinterpret_cast<PropertyData*>(this + 1);
-  }
-
-  const PropertyData &getPropertyDataBuffer() const {
-    assert(IsProperty);
-    return *reinterpret_cast<const PropertyData*>(this + 1);
-  }
-
-  AttributeList(const AttributeList &) = delete;
-  void operator=(const AttributeList &) = delete;
-  void operator delete(void *) = delete;
-  ~AttributeList() = delete;
-
-  size_t allocated_size() const;
+  friend class AttributeFactory;
+  friend class AttributePool;
 
   /// Constructor for attributes with expression arguments.
   AttributeList(IdentifierInfo *attrName, SourceRange attrRange,
                 IdentifierInfo *scopeName, SourceLocation scopeLoc,
                 ArgsUnion *args, unsigned numArgs,
                 Syntax syntaxUsed, SourceLocation ellipsisLoc)
-    : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-      ScopeLoc(scopeLoc), EllipsisLoc(ellipsisLoc), NumArgs(numArgs),
-      SyntaxUsed(syntaxUsed), Invalid(false), UsedAsTypeAttr(false),
-      IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false),
-      HasParsedType(false), HasProcessingCache(false),
-      NextInPosition(nullptr), NextInPool(nullptr) {
+      : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
+        ScopeLoc(scopeLoc), EllipsisLoc(ellipsisLoc), NumArgs(numArgs),
+        SyntaxUsed(syntaxUsed), Invalid(false), UsedAsTypeAttr(false),
+        IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false),
+        HasParsedType(false), HasProcessingCache(false) {
     if (numArgs) memcpy(getArgsBuffer(), args, numArgs * sizeof(ArgsUnion));
     AttrKind = getKind(getName(), getScopeName(), syntaxUsed);
   }
@@ -265,12 +243,12 @@
                 const Expr *messageExpr,
                 Syntax syntaxUsed, SourceLocation strict,
                 const Expr *replacementExpr)
-    : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-      ScopeLoc(scopeLoc), EllipsisLoc(), NumArgs(1), SyntaxUsed(syntaxUsed),
-      Invalid(false), UsedAsTypeAttr(false), IsAvailability(true),
-      IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false),
-      HasProcessingCache(false), UnavailableLoc(unavailable),
-      MessageExpr(messageExpr), NextInPosition(nullptr), NextInPool(nullptr) {
+      : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
+        ScopeLoc(scopeLoc), NumArgs(1), SyntaxUsed(syntaxUsed), Invalid(false),
+        UsedAsTypeAttr(false), IsAvailability(true),
+        IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false),
+        HasProcessingCache(false), UnavailableLoc(unavailable),
+        MessageExpr(messageExpr) {
     ArgsUnion PVal(Parm);
     memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion));
     new (getAvailabilityData()) AvailabilityData(
@@ -285,11 +263,11 @@
                 IdentifierLoc *Parm2,
                 IdentifierLoc *Parm3,
                 Syntax syntaxUsed)
-  : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-    ScopeLoc(scopeLoc), EllipsisLoc(), NumArgs(3), SyntaxUsed(syntaxUsed),
-    Invalid(false), UsedAsTypeAttr(false), IsAvailability(false),
-    IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false),
-    HasProcessingCache(false), NextInPosition(nullptr), NextInPool(nullptr) {
+      : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
+        ScopeLoc(scopeLoc), NumArgs(3), SyntaxUsed(syntaxUsed), Invalid(false),
+        UsedAsTypeAttr(false), IsAvailability(false),
+        IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false),
+        HasProcessingCache(false) {
     ArgsUnion *Args = getArgsBuffer();
     Args[0] = Parm1;
     Args[1] = Parm2;
@@ -302,11 +280,11 @@
                 IdentifierInfo *scopeName, SourceLocation scopeLoc,
                 IdentifierLoc *ArgKind, ParsedType matchingCType,
                 bool layoutCompatible, bool mustBeNull, Syntax syntaxUsed)
-    : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-      ScopeLoc(scopeLoc), EllipsisLoc(), NumArgs(1), SyntaxUsed(syntaxUsed),
-      Invalid(false), UsedAsTypeAttr(false), IsAvailability(false),
-      IsTypeTagForDatatype(true), IsProperty(false), HasParsedType(false),
-      HasProcessingCache(false), NextInPosition(nullptr), NextInPool(nullptr) {
+      : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
+        ScopeLoc(scopeLoc), NumArgs(1), SyntaxUsed(syntaxUsed), Invalid(false),
+        UsedAsTypeAttr(false), IsAvailability(false),
+        IsTypeTagForDatatype(true), IsProperty(false), HasParsedType(false),
+        HasProcessingCache(false) {
     ArgsUnion PVal(ArgKind);
     memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion));
     TypeTagForDatatypeData &ExtraData = getTypeTagForDatatypeDataSlot();
@@ -321,10 +299,10 @@
                 IdentifierInfo *scopeName, SourceLocation scopeLoc,
                 ParsedType typeArg, Syntax syntaxUsed)
       : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-        ScopeLoc(scopeLoc), EllipsisLoc(), NumArgs(0), SyntaxUsed(syntaxUsed),
-        Invalid(false), UsedAsTypeAttr(false), IsAvailability(false),
+        ScopeLoc(scopeLoc), NumArgs(0), SyntaxUsed(syntaxUsed), Invalid(false),
+        UsedAsTypeAttr(false), IsAvailability(false),
         IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(true),
-        HasProcessingCache(false), NextInPosition(nullptr), NextInPool(nullptr){
+        HasProcessingCache(false) {
     new (&getTypeBuffer()) ParsedType(typeArg);
     AttrKind = getKind(getName(), getScopeName(), syntaxUsed);
   }
@@ -334,19 +312,55 @@
                 IdentifierInfo *scopeName, SourceLocation scopeLoc,
                 IdentifierInfo *getterId, IdentifierInfo *setterId,
                 Syntax syntaxUsed)
-    : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
-      ScopeLoc(scopeLoc), EllipsisLoc(), NumArgs(0), SyntaxUsed(syntaxUsed),
-      Invalid(false), UsedAsTypeAttr(false), IsAvailability(false),
-      IsTypeTagForDatatype(false), IsProperty(true), HasParsedType(false),
-      HasProcessingCache(false), NextInPosition(nullptr), NextInPool(nullptr) {
+      : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange),
+        ScopeLoc(scopeLoc), NumArgs(0), SyntaxUsed(syntaxUsed),
+        Invalid(false), UsedAsTypeAttr(false), IsAvailability(false),
+        IsTypeTagForDatatype(false), IsProperty(true), HasParsedType(false),
+        HasProcessingCache(false) {
     new (&getPropertyDataBuffer()) PropertyData(getterId, setterId);
     AttrKind = getKind(getName(), getScopeName(), syntaxUsed);
   }
 
-  friend class AttributePool;
-  friend class AttributeFactory;
+  /// Type tag information is stored immediately following the arguments, if
+  /// any, at the end of the object.  They are mutually exlusive with
+  /// availability slots.
+  TypeTagForDatatypeData &getTypeTagForDatatypeDataSlot() {
+    return *reinterpret_cast<TypeTagForDatatypeData*>(getArgsBuffer()+NumArgs);
+  }
+  const TypeTagForDatatypeData &getTypeTagForDatatypeDataSlot() const {
+    return *reinterpret_cast<const TypeTagForDatatypeData*>(getArgsBuffer()
+                                                            + NumArgs);
+  }
+
+  /// The type buffer immediately follows the object and are mutually exclusive
+  /// with arguments.
+  ParsedType &getTypeBuffer() {
+    return *reinterpret_cast<ParsedType *>(this + 1);
+  }
+  const ParsedType &getTypeBuffer() const {
+    return *reinterpret_cast<const ParsedType *>(this + 1);
+  }
+
+  /// The property data immediately follows the object is is mutually exclusive
+  /// with arguments.
+  PropertyData &getPropertyDataBuffer() {
+    assert(IsProperty);
+    return *reinterpret_cast<PropertyData*>(this + 1);
+  }
+  const PropertyData &getPropertyDataBuffer() const {
+    assert(IsProperty);
+    return *reinterpret_cast<const PropertyData*>(this + 1);
+  }
+
+  size_t allocated_size() const;
 
 public:
+  AttributeList(const AttributeList &) = delete;
+  AttributeList &operator=(const AttributeList &) = delete;
+  ~AttributeList() = delete;
+
+  void operator delete(void *) = delete;
+
   enum Kind {           
     #define PARSED_ATTR(NAME) AT_##NAME,
     #include "clang/Sema/AttrParsedAttrList.inc"
@@ -377,12 +391,15 @@
 
   bool isDeclspecAttribute() const { return SyntaxUsed == AS_Declspec; }
   bool isMicrosoftAttribute() const { return SyntaxUsed == AS_Microsoft; }
+
   bool isCXX11Attribute() const {
     return SyntaxUsed == AS_CXX11 || isAlignasAttribute();
   }
+
   bool isC2xAttribute() const {
     return SyntaxUsed == AS_C2x;
   }
+
   bool isKeywordAttribute() const {
     return SyntaxUsed == AS_Keyword || SyntaxUsed == AS_ContextSensitiveKeyword;
   }
@@ -395,10 +412,12 @@
   void setInvalid(bool b = true) const { Invalid = b; }
 
   bool hasProcessingCache() const { return HasProcessingCache; }
+
   unsigned getProcessingCache() const {
     assert(hasProcessingCache());
     return ProcessingCache;
   }
+
   void setProcessingCache(unsigned value) const {
     ProcessingCache = value;
     HasProcessingCache = true;
@@ -429,6 +448,7 @@
   bool isArgExpr(unsigned Arg) const {
     return Arg < NumArgs && getArg(Arg).is<Expr*>();
   }
+
   Expr *getArgAsExpr(unsigned Arg) const {
     return getArg(Arg).get<Expr*>();
   }
@@ -436,6 +456,7 @@
   bool isArgIdent(unsigned Arg) const {
     return Arg < NumArgs && getArg(Arg).is<IdentifierLoc*>();
   }
+
   IdentifierLoc *getArgAsIdent(unsigned Arg) const {
     return getArg(Arg).get<IdentifierLoc*>();
   }
@@ -596,7 +617,7 @@
 
 class AttributePool {
   AttributeFactory &Factory;
-  AttributeList *Head;
+  AttributeList *Head = nullptr;
 
   void *allocate(size_t size) {
     return Factory.allocate(size);
@@ -613,10 +634,14 @@
 
 public:
   /// Create a new pool for a factory.
-  AttributePool(AttributeFactory &factory) : Factory(factory), Head(nullptr) {}
+  AttributePool(AttributeFactory &factory) : Factory(factory) {}
 
   AttributePool(const AttributePool &) = delete;
 
+  ~AttributePool() {
+    if (Head) Factory.reclaimPool(Head);
+  }
+
   /// Move the given pool's allocations to this pool.
   AttributePool(AttributePool &&pool) : Factory(pool.Factory), Head(pool.Head) {
     pool.Head = nullptr;
@@ -639,10 +664,6 @@
     }
   }
 
-  ~AttributePool() {
-    if (Head) Factory.reclaimPool(Head);
-  }
-
   AttributeList *create(IdentifierInfo *attrName, SourceRange attrRange,
                         IdentifierInfo *scopeName, SourceLocation scopeLoc,
                         ArgsUnion *args, unsigned numArgs,
@@ -733,10 +754,7 @@
 /// is that this will become significantly more serious.
 class ParsedAttributes {
 public:
-  ParsedAttributes(AttributeFactory &factory)
-    : pool(factory), list(nullptr) {
-  }
-
+  ParsedAttributes(AttributeFactory &factory) : pool(factory) {}
   ParsedAttributes(const ParsedAttributes &) = delete;
 
   AttributePool &getPool() const { return pool; }
@@ -882,7 +900,7 @@
 
 private:
   mutable AttributePool pool;
-  AttributeList *list;
+  AttributeList *list = nullptr;
 };
 
 /// These constants match the enumerated choices of
@@ -900,52 +918,19 @@
   ExpectedFunction,
   ExpectedUnion,
   ExpectedVariableOrFunction,
-  ExpectedFunctionOrGlobalVar,
-  ExpectedFunctionVariableOrObjCInterface,
   ExpectedFunctionOrMethod,
-  ExpectedParameter,
   ExpectedFunctionMethodOrBlock,
-  ExpectedFunctionMethodOrClass,
   ExpectedFunctionMethodOrParameter,
-  ExpectedFunctionMethodOrGlobalVar,
-  ExpectedClass,
-  ExpectedEnum,
   ExpectedVariable,
-  ExpectedMethod,
-  ExpectedFieldOrGlobalVar,
-  ExpectedStruct,
-  ExpectedParameterOrTypedef,
-  ExpectedVariableOrTypedef,
-  ExpectedTLSVar,
   ExpectedVariableOrField,
   ExpectedVariableFieldOrTag,
   ExpectedTypeOrNamespace,
-  ExpectedObjectiveCInterface,
-  ExpectedMethodOrProperty,
-  ExpectedFunctionOrMethodOrProperty,
-  ExpectedStructOrUnion,
-  ExpectedStructOrUnionOrClass,
-  ExpectedType,
-  ExpectedObjCInstanceMethod,
-  ExpectedObjCInterfaceDeclInitMethod,
   ExpectedFunctionVariableOrClass,
-  ExpectedFunctionVariableClassOrObjCInterface,
-  ExpectedObjectiveCProtocol,
-  ExpectedStaticOrTLSVar,
-  ExpectedFunctionGlobalVarMethodOrProperty,
-  ExpectedStructOrUnionOrTypedef,
-  ExpectedStructOrTypedef,
-  ExpectedObjectiveCInterfaceOrProtocol,
   ExpectedKernelFunction,
   ExpectedFunctionWithProtoType,
-  ExpectedVariableEnumFieldOrTypedef,
-  ExpectedFunctionMethodEnumOrClass,
-  ExpectedStructClassVariableFunctionOrInlineNamespace,
   ExpectedForMaybeUnused,
-  ExpectedEnumOrClass,
-  ExpectedNamedDecl,
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_ATTRIBUTELIST_H
diff --git a/include/clang/Sema/CodeCompleteConsumer.h b/include/clang/Sema/CodeCompleteConsumer.h
index dee53dc..48e8129 100644
--- a/include/clang/Sema/CodeCompleteConsumer.h
+++ b/include/clang/Sema/CodeCompleteConsumer.h
@@ -1,4 +1,4 @@
-//===---- CodeCompleteConsumer.h - Code Completion Interface ----*- C++ -*-===//
+//===- CodeCompleteConsumer.h - Code Completion Interface -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,24 +10,42 @@
 //  This file defines the CodeCompleteConsumer class.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SEMA_CODECOMPLETECONSUMER_H
 #define LLVM_CLANG_SEMA_CODECOMPLETECONSUMER_H
 
 #include "clang-c/Index.h"
-#include "clang/AST/CanonicalType.h"
-#include "clang/AST/DeclBase.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Sema/CodeCompleteOptions.h"
+#include "clang/Sema/DeclSpec.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <memory>
 #include <string>
 #include <utility>
 
 namespace clang {
 
+class ASTContext;
 class Decl;
+class DeclContext;
+class FunctionDecl;
+class FunctionTemplateDecl;
+class IdentifierInfo;
+class LangOptions;
+class NamedDecl;
+class NestedNameSpecifier;
+class Preprocessor;
+class Sema;
 
 /// \brief Default priority values for code-completion results based
 /// on their kind.
@@ -35,31 +53,43 @@
   /// \brief Priority for the next initialization in a constructor initializer
   /// list.
   CCP_NextInitializer = 7,
+
   /// \brief Priority for an enumeration constant inside a switch whose
   /// condition is of the enumeration type.
   CCP_EnumInCase = 7,
+
   /// \brief Priority for a send-to-super completion.
   CCP_SuperCompletion = 20,
+
   /// \brief Priority for a declaration that is in the local scope.
   CCP_LocalDeclaration = 34,
+
   /// \brief Priority for a member declaration found from the current
   /// method or member function.
   CCP_MemberDeclaration = 35,
+
   /// \brief Priority for a language keyword (that isn't any of the other
   /// categories).
   CCP_Keyword = 40,
+
   /// \brief Priority for a code pattern.
   CCP_CodePattern = 40,
+
   /// \brief Priority for a non-type declaration.
   CCP_Declaration = 50,
+
   /// \brief Priority for a type.
   CCP_Type = CCP_Declaration,
+
   /// \brief Priority for a constant value (e.g., enumerator).
   CCP_Constant = 65,
+
   /// \brief Priority for a preprocessor macro.
   CCP_Macro = 70,
+
   /// \brief Priority for a nested-name-specifier.
   CCP_NestedNameSpecifier = 75,
+
   /// \brief Priority for a result that isn't likely to be what the user wants,
   /// but is included for completeness.
   CCP_Unlikely = 80,
@@ -73,9 +103,11 @@
 enum {
   /// \brief The result is in a base class.
   CCD_InBaseClass = 2,
+
   /// \brief The result is a C++ non-static member function whose qualifiers
   /// exactly match the object type on which the member function can be called.
   CCD_ObjectQualifierMatch = -1,
+
   /// \brief The selector of the given message exactly matches the selector
   /// of the current method, which might imply that some kind of delegation
   /// is occurring.
@@ -103,6 +135,7 @@
   /// \brief Divide by this factor when a code-completion result's type exactly
   /// matches the type we expect.
   CCF_ExactTypeMatch = 4,
+
   /// \brief Divide by this factor when a code-completion result's type is
   /// similar to the type we expect (e.g., both arithmetic types, both
   /// Objective-C object pointer types).
@@ -147,14 +180,6 @@
 /// declaration.
 CXCursorKind getCursorKindForDecl(const Decl *D);
 
-class FunctionDecl;
-class FunctionType;
-class FunctionTemplateDecl;
-class IdentifierInfo;
-class NamedDecl;
-class NestedNameSpecifier;
-class Sema;
-
 /// \brief The context in which code completion occurred, so that the
 /// code-completion consumer can process the results accordingly.
 class CodeCompletionContext {
@@ -162,31 +187,41 @@
   enum Kind {
     /// \brief An unspecified code-completion context.
     CCC_Other,
+
     /// \brief An unspecified code-completion context where we should also add
     /// macro completions.
     CCC_OtherWithMacros,
+
     /// \brief Code completion occurred within a "top-level" completion context,
     /// e.g., at namespace or global scope.
     CCC_TopLevel,
+
     /// \brief Code completion occurred within an Objective-C interface,
     /// protocol, or category interface.
     CCC_ObjCInterface,
+
     /// \brief Code completion occurred within an Objective-C implementation
     /// or category implementation.
     CCC_ObjCImplementation,
+
     /// \brief Code completion occurred within the instance variable list of
     /// an Objective-C interface, implementation, or category implementation.
     CCC_ObjCIvarList,
+
     /// \brief Code completion occurred within a class, struct, or union.
     CCC_ClassStructUnion,
+
     /// \brief Code completion occurred where a statement (or declaration) is
     /// expected in a function, method, or block.
     CCC_Statement,
+
     /// \brief Code completion occurred where an expression is expected.
     CCC_Expression,
+
     /// \brief Code completion occurred where an Objective-C message receiver
     /// is expected.
     CCC_ObjCMessageReceiver,
+
     /// \brief Code completion occurred on the right-hand side of a member
     /// access expression using the dot operator.
     ///
@@ -194,6 +229,7 @@
     /// accessed. The type itself is available via
     /// \c CodeCompletionContext::getType().
     CCC_DotMemberAccess,
+
     /// \brief Code completion occurred on the right-hand side of a member
     /// access expression using the arrow operator.
     ///
@@ -201,6 +237,7 @@
     /// accessed. The type itself is available via
     /// \c CodeCompletionContext::getType().
     CCC_ArrowMemberAccess,
+
     /// \brief Code completion occurred on the right-hand side of an Objective-C
     /// property access expression.
     ///
@@ -208,67 +245,90 @@
     /// accessed. The type itself is available via
     /// \c CodeCompletionContext::getType().
     CCC_ObjCPropertyAccess,
+
     /// \brief Code completion occurred after the "enum" keyword, to indicate
     /// an enumeration name.
     CCC_EnumTag,
+
     /// \brief Code completion occurred after the "union" keyword, to indicate
     /// a union name.
     CCC_UnionTag,
+
     /// \brief Code completion occurred after the "struct" or "class" keyword,
     /// to indicate a struct or class name.
     CCC_ClassOrStructTag,
+
     /// \brief Code completion occurred where a protocol name is expected.
     CCC_ObjCProtocolName,
+
     /// \brief Code completion occurred where a namespace or namespace alias
     /// is expected.
     CCC_Namespace,
+
     /// \brief Code completion occurred where a type name is expected.
     CCC_Type,
+
     /// \brief Code completion occurred where a new name is expected.
     CCC_Name,
+
     /// \brief Code completion occurred where a new name is expected and a
     /// qualified name is permissible.
     CCC_PotentiallyQualifiedName,
+
     /// \brief Code completion occurred where an macro is being defined.
     CCC_MacroName,
+
     /// \brief Code completion occurred where a macro name is expected
     /// (without any arguments, in the case of a function-like macro).
     CCC_MacroNameUse,
+
     /// \brief Code completion occurred within a preprocessor expression.
     CCC_PreprocessorExpression,
+
     /// \brief Code completion occurred where a preprocessor directive is
     /// expected.
     CCC_PreprocessorDirective,
+
     /// \brief Code completion occurred in a context where natural language is
     /// expected, e.g., a comment or string literal.
     ///
     /// This context usually implies that no completions should be added,
     /// unless they come from an appropriate natural-language dictionary.
     CCC_NaturalLanguage,
+
     /// \brief Code completion for a selector, as in an \@selector expression.
     CCC_SelectorName,
+
     /// \brief Code completion within a type-qualifier list.
     CCC_TypeQualifiers,
+
     /// \brief Code completion in a parenthesized expression, which means that
     /// we may also have types here in C and Objective-C (as well as in C++).
     CCC_ParenthesizedExpression,
+
     /// \brief Code completion where an Objective-C instance message is
     /// expected.
     CCC_ObjCInstanceMessage,
+
     /// \brief Code completion where an Objective-C class message is expected.
     CCC_ObjCClassMessage,
+
     /// \brief Code completion where the name of an Objective-C class is
     /// expected.
     CCC_ObjCInterfaceName,
+
     /// \brief Code completion where an Objective-C category name is expected.
     CCC_ObjCCategoryName,
+
     /// \brief An unknown context, in which we are recovering from a parsing
     /// error and don't know which completions we should give.
     CCC_Recovery
   };
 
+  using VisitedContextSet = llvm::SmallPtrSet<DeclContext *, 8>;
+
 private:
-  enum Kind Kind;
+  Kind CCKind;
 
   /// \brief The type that would prefer to see at this point (e.g., the type
   /// of an initializer or function parameter).
@@ -280,25 +340,32 @@
   /// \brief The identifiers for Objective-C selector parts.
   ArrayRef<IdentifierInfo *> SelIdents;
 
+  /// \brief The scope specifier that comes before the completion token e.g.
+  /// "a::b::"
+  llvm::Optional<CXXScopeSpec> ScopeSpecifier;
+
+  /// \brief A set of declaration contexts visited by Sema when doing lookup for
+  /// code completion.
+  VisitedContextSet VisitedContexts;
+
 public:
   /// \brief Construct a new code-completion context of the given kind.
-  CodeCompletionContext(enum Kind Kind) : Kind(Kind), SelIdents(None) { }
+  CodeCompletionContext(Kind CCKind) : CCKind(CCKind), SelIdents(None) {}
 
   /// \brief Construct a new code-completion context of the given kind.
-  CodeCompletionContext(enum Kind Kind, QualType T,
+  CodeCompletionContext(Kind CCKind, QualType T,
                         ArrayRef<IdentifierInfo *> SelIdents = None)
-                        : Kind(Kind),
-                          SelIdents(SelIdents) {
-    if (Kind == CCC_DotMemberAccess || Kind == CCC_ArrowMemberAccess ||
-        Kind == CCC_ObjCPropertyAccess || Kind == CCC_ObjCClassMessage ||
-        Kind == CCC_ObjCInstanceMessage)
+      : CCKind(CCKind), SelIdents(SelIdents) {
+    if (CCKind == CCC_DotMemberAccess || CCKind == CCC_ArrowMemberAccess ||
+        CCKind == CCC_ObjCPropertyAccess || CCKind == CCC_ObjCClassMessage ||
+        CCKind == CCC_ObjCInstanceMessage)
       BaseType = T;
     else
       PreferredType = T;
   }
 
   /// \brief Retrieve the kind of code-completion context.
-  enum Kind getKind() const { return Kind; }
+  Kind getKind() const { return CCKind; }
 
   /// \brief Retrieve the type that this expression would prefer to have, e.g.,
   /// if the expression is a variable initializer or a function argument, the
@@ -315,8 +382,33 @@
   /// \brief Determines whether we want C++ constructors as results within this
   /// context.
   bool wantConstructorResults() const;
+
+  /// \brief Sets the scope specifier that comes before the completion token.
+  /// This is expected to be set in code completions on qualfied specifiers
+  /// (e.g. "a::b::").
+  void setCXXScopeSpecifier(CXXScopeSpec SS) {
+    this->ScopeSpecifier = std::move(SS);
+  }
+
+  /// \brief Adds a visited context.
+  void addVisitedContext(DeclContext *Ctx) {
+    VisitedContexts.insert(Ctx);
+  }
+
+  /// \brief Retrieves all visited contexts.
+  const VisitedContextSet &getVisitedContexts() const {
+    return VisitedContexts;
+  }
+
+  llvm::Optional<const CXXScopeSpec *> getCXXScopeSpecifier() {
+    if (ScopeSpecifier)
+      return ScopeSpecifier.getPointer();
+    return llvm::None;
+  }
 };
 
+/// \brief Get string representation of \p Kind, useful for for debugging.
+llvm::StringRef getCompletionKindString(CodeCompletionContext::Kind Kind);
 
 /// \brief A "string" used to describe how code completion can
 /// be performed for an entity.
@@ -334,52 +426,71 @@
     /// match the code-completion string, typically a keyword or the name of a
     /// declarator or macro.
     CK_TypedText,
+
     /// \brief A piece of text that should be placed in the buffer, e.g.,
     /// parentheses or a comma in a function call.
     CK_Text,
+
     /// \brief A code completion string that is entirely optional. For example,
     /// an optional code completion string that describes the default arguments
     /// in a function call.
     CK_Optional,
+
     /// \brief A string that acts as a placeholder for, e.g., a function
     /// call argument.
     CK_Placeholder,
+
     /// \brief A piece of text that describes something about the result but
     /// should not be inserted into the buffer.
     CK_Informative,
     /// \brief A piece of text that describes the type of an entity or, for
     /// functions and methods, the return type.
     CK_ResultType,
+
     /// \brief A piece of text that describes the parameter that corresponds
     /// to the code-completion location within a function call, message send,
     /// macro invocation, etc.
     CK_CurrentParameter,
+
     /// \brief A left parenthesis ('(').
     CK_LeftParen,
+
     /// \brief A right parenthesis (')').
     CK_RightParen,
+
     /// \brief A left bracket ('[').
     CK_LeftBracket,
+
     /// \brief A right bracket (']').
     CK_RightBracket,
+
     /// \brief A left brace ('{').
     CK_LeftBrace,
+
     /// \brief A right brace ('}').
     CK_RightBrace,
+
     /// \brief A left angle bracket ('<').
     CK_LeftAngle,
+
     /// \brief A right angle bracket ('>').
     CK_RightAngle,
+
     /// \brief A comma separator (',').
     CK_Comma,
+
     /// \brief A colon (':').
     CK_Colon,
+
     /// \brief A semicolon (';').
     CK_SemiColon,
+
     /// \brief An '=' sign.
     CK_Equal,
+
     /// \brief Horizontal whitespace (' ').
     CK_HorizontalSpace,
+
     /// \brief Vertical whitespace ('\\n' or '\\r\\n', depending on the
     /// platform).
     CK_VerticalSpace
@@ -389,7 +500,7 @@
   struct Chunk {
     /// \brief The kind of data stored in this piece of the code completion
     /// string.
-    ChunkKind Kind;
+    ChunkKind Kind = CK_Text;
 
     union {
       /// \brief The text string associated with a CK_Text, CK_Placeholder,
@@ -404,7 +515,7 @@
       CodeCompletionString *Optional;
     };
 
-    Chunk() : Kind(CK_Text), Text(nullptr) { }
+    Chunk() : Text(nullptr) {}
 
     explicit Chunk(ChunkKind Kind, const char *Text = "");
 
@@ -428,6 +539,9 @@
   };
 
 private:
+  friend class CodeCompletionBuilder;
+  friend class CodeCompletionResult;
+
   /// \brief The number of chunks stored in this string.
   unsigned NumChunks : 16;
 
@@ -447,9 +561,6 @@
   /// entity being completed by this result.
   const char *BriefComment;
   
-  CodeCompletionString(const CodeCompletionString &) = delete;
-  void operator=(const CodeCompletionString &) = delete;
-
   CodeCompletionString(const Chunk *Chunks, unsigned NumChunks,
                        unsigned Priority, CXAvailabilityKind Availability,
                        const char **Annotations, unsigned NumAnnotations,
@@ -457,11 +568,12 @@
                        const char *BriefComment);
   ~CodeCompletionString() = default;
 
-  friend class CodeCompletionBuilder;
-  friend class CodeCompletionResult;
-
 public:
-  typedef const Chunk *iterator;
+  CodeCompletionString(const CodeCompletionString &) = delete;
+  CodeCompletionString &operator=(const CodeCompletionString &) = delete;
+
+  using iterator = const Chunk *;
+
   iterator begin() const { return reinterpret_cast<const Chunk *>(this + 1); }
   iterator end() const { return begin() + NumChunks; }
   bool empty() const { return NumChunks == 0; }
@@ -523,6 +635,7 @@
   std::shared_ptr<GlobalCodeCompletionAllocator> getAllocatorRef() const {
     return AllocatorRef;
   }
+
   CodeCompletionAllocator &getAllocator() const {
     assert(AllocatorRef);
     return *AllocatorRef;
@@ -531,28 +644,30 @@
   StringRef getParentName(const DeclContext *DC);
 };
 
-} // end namespace clang
+} // namespace clang
 
 namespace llvm {
-  template <> struct isPodLike<clang::CodeCompletionString::Chunk> {
-    static const bool value = true;
-  };
-}
+
+template <> struct isPodLike<clang::CodeCompletionString::Chunk> {
+  static const bool value = true;
+};
+
+} // namespace llvm
 
 namespace clang {
 
 /// \brief A builder class used to construct new code-completion strings.
 class CodeCompletionBuilder {
 public:
-  typedef CodeCompletionString::Chunk Chunk;
+  using Chunk = CodeCompletionString::Chunk;
 
 private:
   CodeCompletionAllocator &Allocator;
   CodeCompletionTUInfo &CCTUInfo;
-  unsigned Priority;
-  CXAvailabilityKind Availability;
+  unsigned Priority = 0;
+  CXAvailabilityKind Availability = CXAvailability_Available;
   StringRef ParentName;
-  const char *BriefComment;
+  const char *BriefComment = nullptr;
   
   /// \brief The chunks stored in this string.
   SmallVector<Chunk, 4> Chunks;
@@ -562,16 +677,13 @@
 public:
   CodeCompletionBuilder(CodeCompletionAllocator &Allocator,
                         CodeCompletionTUInfo &CCTUInfo)
-    : Allocator(Allocator), CCTUInfo(CCTUInfo),
-      Priority(0), Availability(CXAvailability_Available),
-      BriefComment(nullptr) { }
+      : Allocator(Allocator), CCTUInfo(CCTUInfo) {}
 
   CodeCompletionBuilder(CodeCompletionAllocator &Allocator,
                         CodeCompletionTUInfo &CCTUInfo,
                         unsigned Priority, CXAvailabilityKind Availability)
-    : Allocator(Allocator), CCTUInfo(CCTUInfo),
-      Priority(Priority), Availability(Availability),
-      BriefComment(nullptr) { }
+      : Allocator(Allocator), CCTUInfo(CCTUInfo), Priority(Priority),
+        Availability(Availability) {}
 
   /// \brief Retrieve the allocator into which the code completion
   /// strings should be allocated.
@@ -624,15 +736,22 @@
 public:
   /// \brief Describes the kind of result generated.
   enum ResultKind {
-    RK_Declaration = 0, ///< Refers to a declaration
-    RK_Keyword,         ///< Refers to a keyword or symbol.
-    RK_Macro,           ///< Refers to a macro
-    RK_Pattern          ///< Refers to a precomputed pattern.
+    /// Refers to a declaration.
+    RK_Declaration = 0,
+
+    /// Refers to a keyword or symbol.
+    RK_Keyword,
+
+    /// Refers to a macro.
+    RK_Macro,
+
+    /// Refers to a precomputed pattern.
+    RK_Pattern
   };
 
   /// \brief When Kind == RK_Declaration or RK_Pattern, the declaration we are
   /// referring to. In the latter case, the declaration might be NULL.
-  const NamedDecl *Declaration;
+  const NamedDecl *Declaration = nullptr;
 
   union {
     /// \brief When Kind == RK_Keyword, the string representing the keyword
@@ -652,7 +771,7 @@
 
   /// \brief Specifies which parameter (of a function, Objective-C method,
   /// macro, etc.) we should start with when formatting the result.
-  unsigned StartParameter;
+  unsigned StartParameter = 0;
 
   /// \brief The kind of result stored here.
   ResultKind Kind;
@@ -661,7 +780,7 @@
   CXCursorKind CursorKind;
 
   /// \brief The availability of this result.
-  CXAvailabilityKind Availability;
+  CXAvailabilityKind Availability = CXAvailability_Available;
 
   /// \brief Whether this result is hidden by another name.
   bool Hidden : 1;
@@ -684,7 +803,7 @@
   /// \brief If the result should have a nested-name-specifier, this is it.
   /// When \c QualifierIsInformative, the nested-name-specifier is
   /// informative rather than required.
-  NestedNameSpecifier *Qualifier;
+  NestedNameSpecifier *Qualifier = nullptr;
 
   /// \brief Build a result that refers to a declaration.
   CodeCompletionResult(const NamedDecl *Declaration,
@@ -692,33 +811,27 @@
                        NestedNameSpecifier *Qualifier = nullptr,
                        bool QualifierIsInformative = false,
                        bool Accessible = true)
-    : Declaration(Declaration), Priority(Priority),
-      StartParameter(0), Kind(RK_Declaration),
-      Availability(CXAvailability_Available), Hidden(false),
-      QualifierIsInformative(QualifierIsInformative),
-      StartsNestedNameSpecifier(false), AllParametersAreInformative(false),
-      DeclaringEntity(false), Qualifier(Qualifier) {
+      : Declaration(Declaration), Priority(Priority), Kind(RK_Declaration),
+        Hidden(false), QualifierIsInformative(QualifierIsInformative),
+        StartsNestedNameSpecifier(false), AllParametersAreInformative(false),
+        DeclaringEntity(false), Qualifier(Qualifier) {
     computeCursorKindAndAvailability(Accessible);
   }
 
   /// \brief Build a result that refers to a keyword or symbol.
   CodeCompletionResult(const char *Keyword, unsigned Priority = CCP_Keyword)
-    : Declaration(nullptr), Keyword(Keyword), Priority(Priority),
-      StartParameter(0), Kind(RK_Keyword), CursorKind(CXCursor_NotImplemented),
-      Availability(CXAvailability_Available), Hidden(false),
-      QualifierIsInformative(0), StartsNestedNameSpecifier(false),
-      AllParametersAreInformative(false), DeclaringEntity(false),
-      Qualifier(nullptr) {}
+      : Keyword(Keyword), Priority(Priority), Kind(RK_Keyword),
+        CursorKind(CXCursor_NotImplemented), Hidden(false),
+        QualifierIsInformative(false), StartsNestedNameSpecifier(false),
+        AllParametersAreInformative(false), DeclaringEntity(false) {}
 
   /// \brief Build a result that refers to a macro.
   CodeCompletionResult(const IdentifierInfo *Macro,
                        unsigned Priority = CCP_Macro)
-    : Declaration(nullptr), Macro(Macro), Priority(Priority), StartParameter(0),
-      Kind(RK_Macro), CursorKind(CXCursor_MacroDefinition),
-      Availability(CXAvailability_Available), Hidden(false),
-      QualifierIsInformative(0), StartsNestedNameSpecifier(false),
-      AllParametersAreInformative(false), DeclaringEntity(false),
-      Qualifier(nullptr) {}
+      : Macro(Macro), Priority(Priority), Kind(RK_Macro),
+        CursorKind(CXCursor_MacroDefinition), Hidden(false),
+        QualifierIsInformative(false), StartsNestedNameSpecifier(false),
+        AllParametersAreInformative(false), DeclaringEntity(false) {}
 
   /// \brief Build a result that refers to a pattern.
   CodeCompletionResult(CodeCompletionString *Pattern,
@@ -726,23 +839,19 @@
                        CXCursorKind CursorKind = CXCursor_NotImplemented,
                    CXAvailabilityKind Availability = CXAvailability_Available,
                        const NamedDecl *D = nullptr)
-    : Declaration(D), Pattern(Pattern), Priority(Priority), StartParameter(0),
-      Kind(RK_Pattern), CursorKind(CursorKind), Availability(Availability),
-      Hidden(false), QualifierIsInformative(0),
-      StartsNestedNameSpecifier(false), AllParametersAreInformative(false),
-      DeclaringEntity(false), Qualifier(nullptr)
-  {
-  }
+      : Declaration(D), Pattern(Pattern), Priority(Priority), Kind(RK_Pattern),
+        CursorKind(CursorKind), Availability(Availability), Hidden(false),
+        QualifierIsInformative(false), StartsNestedNameSpecifier(false),
+        AllParametersAreInformative(false), DeclaringEntity(false) {}
 
   /// \brief Build a result that refers to a pattern with an associated
   /// declaration.
   CodeCompletionResult(CodeCompletionString *Pattern, const NamedDecl *D,
                        unsigned Priority)
-    : Declaration(D), Pattern(Pattern), Priority(Priority), StartParameter(0),
-      Kind(RK_Pattern), Availability(CXAvailability_Available), Hidden(false),
-      QualifierIsInformative(false), StartsNestedNameSpecifier(false),
-      AllParametersAreInformative(false), DeclaringEntity(false),
-      Qualifier(nullptr) {
+      : Declaration(D), Pattern(Pattern), Priority(Priority), Kind(RK_Pattern),
+        Hidden(false), QualifierIsInformative(false),
+        StartsNestedNameSpecifier(false), AllParametersAreInformative(false),
+        DeclaringEntity(false) {
     computeCursorKindAndAvailability();
   }  
   
@@ -777,6 +886,12 @@
                                            CodeCompletionTUInfo &CCTUInfo,
                                            bool IncludeBriefComments);
 
+  /// \brief Retrieve the name that should be used to order a result.
+  ///
+  /// If the name needs to be constructed as a string, that string will be
+  /// saved into Saved and the returned StringRef will refer to it.
+  StringRef getOrderedName(std::string &Saved) const;
+
 private:
   void computeCursorKindAndAvailability(bool Accessible = true);
 };
@@ -798,7 +913,6 @@
   return !(X < Y);
 }
 
-
 raw_ostream &operator<<(raw_ostream &OS,
                               const CodeCompletionString &CCS);
 
@@ -819,8 +933,10 @@
     enum CandidateKind {
       /// \brief The candidate is a function declaration.
       CK_Function,
+
       /// \brief The candidate is a function template.
       CK_FunctionTemplate,
+
       /// \brief The "candidate" is actually a variable, expression, or block
       /// for which we only have a function prototype.
       CK_FunctionType
@@ -846,13 +962,13 @@
 
   public:
     OverloadCandidate(FunctionDecl *Function)
-      : Kind(CK_Function), Function(Function) { }
+        : Kind(CK_Function), Function(Function) {}
 
     OverloadCandidate(FunctionTemplateDecl *FunctionTemplateDecl)
-      : Kind(CK_FunctionTemplate), FunctionTemplate(FunctionTemplateDecl) { }
+        : Kind(CK_FunctionTemplate), FunctionTemplate(FunctionTemplateDecl) {}
 
     OverloadCandidate(const FunctionType *Type)
-      : Kind(CK_FunctionType), Type(Type) { }
+        : Kind(CK_FunctionType), Type(Type) {}
 
     /// \brief Determine the kind of overload candidate.
     CandidateKind getKind() const { return Kind; }
@@ -882,8 +998,7 @@
 
   CodeCompleteConsumer(const CodeCompleteOptions &CodeCompleteOpts,
                        bool OutputIsBinary)
-    : CodeCompleteOpts(CodeCompleteOpts), OutputIsBinary(OutputIsBinary)
-  { }
+      : CodeCompleteOpts(CodeCompleteOpts), OutputIsBinary(OutputIsBinary) {}
 
   /// \brief Whether the code-completion consumer wants to see macros.
   bool includeMacros() const {
@@ -896,8 +1011,13 @@
   }
 
   /// \brief Whether to include global (top-level) declaration results.
-  bool includeGlobals() const {
-    return CodeCompleteOpts.IncludeGlobals;
+  bool includeGlobals() const { return CodeCompleteOpts.IncludeGlobals; }
+
+  /// \brief Whether to include declarations in namespace contexts (including
+  /// the global namespace). If this is false, `includeGlobals()` will be
+  /// ignored.
+  bool includeNamespaceLevelDecls() const {
+    return CodeCompleteOpts.IncludeNamespaceLevelDecls;
   }
 
   /// \brief Whether to include brief documentation comments within the set of
@@ -906,6 +1026,12 @@
     return CodeCompleteOpts.IncludeBriefComments;
   }
 
+  /// \brief Hint whether to load data from the external AST in order to provide
+  /// full results. If false, declarations from the preamble may be omitted.
+  bool loadExternal() const {
+    return CodeCompleteOpts.LoadExternal;
+  }
+
   /// \brief Determine whether the output of this consumer is binary.
   bool isOutputBinary() const { return OutputIsBinary; }
 
@@ -925,7 +1051,7 @@
   virtual void ProcessCodeCompleteResults(Sema &S,
                                           CodeCompletionContext Context,
                                           CodeCompletionResult *Results,
-                                          unsigned NumResults) { }
+                                          unsigned NumResults) {}
 
   /// \param S the semantic-analyzer object for which code-completion is being
   /// done.
@@ -937,7 +1063,7 @@
   /// \param NumCandidates the number of overload candidates
   virtual void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
                                          OverloadCandidate *Candidates,
-                                         unsigned NumCandidates) { }
+                                         unsigned NumCandidates) {}
   //@}
 
   /// \brief Retrieve the allocator that will be used to allocate
@@ -981,6 +1107,6 @@
   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
 };
 
-} // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_SEMA_CODECOMPLETECONSUMER_H
diff --git a/include/clang/Sema/CodeCompleteOptions.h b/include/clang/Sema/CodeCompleteOptions.h
index fc7713c..bdd4732 100644
--- a/include/clang/Sema/CodeCompleteOptions.h
+++ b/include/clang/Sema/CodeCompleteOptions.h
@@ -24,15 +24,25 @@
   /// Show top-level decls in code completion results.
   unsigned IncludeGlobals : 1;
 
+  /// Show decls in namespace (including the global namespace) in code
+  /// completion results. If this is 0, `IncludeGlobals` will be ignored.
+  ///
+  /// Currently, this only works when completing qualified IDs (i.e.
+  /// `Sema::CodeCompleteQualifiedId`).
+  /// FIXME: consider supporting more completion cases with this option.
+  unsigned IncludeNamespaceLevelDecls : 1;
+
   /// Show brief documentation comments in code completion results.
   unsigned IncludeBriefComments : 1;
 
-  CodeCompleteOptions() :
-      IncludeMacros(0),
-      IncludeCodePatterns(0),
-      IncludeGlobals(1),
-      IncludeBriefComments(0)
-  { }
+  /// Hint whether to load data from the external AST to provide full results.
+  /// If false, namespace-level declarations from the preamble may be omitted.
+  unsigned LoadExternal : 1;
+
+  CodeCompleteOptions()
+      : IncludeMacros(0), IncludeCodePatterns(0), IncludeGlobals(1),
+        IncludeNamespaceLevelDecls(1), IncludeBriefComments(0),
+        LoadExternal(1) {}
 };
 
 } // namespace clang
diff --git a/include/clang/Sema/DeclSpec.h b/include/clang/Sema/DeclSpec.h
index 4fc55a1..e9b116f 100644
--- a/include/clang/Sema/DeclSpec.h
+++ b/include/clang/Sema/DeclSpec.h
@@ -326,6 +326,7 @@
     PQ_TypeSpecifier         = 2,
     PQ_TypeQualifier         = 4,
     PQ_FunctionSpecifier     = 8
+    // FIXME: Attributes should be included here.
   };
 
 private:
@@ -361,9 +362,6 @@
   // constexpr-specifier
   unsigned Constexpr_specified : 1;
 
-  // concept-specifier
-  unsigned Concept_specified : 1;
-
   union {
     UnionParsedType TypeRep;
     Decl *DeclRep;
@@ -393,7 +391,7 @@
       TQ_unalignedLoc;
   SourceLocation FS_inlineLoc, FS_virtualLoc, FS_explicitLoc, FS_noreturnLoc;
   SourceLocation FS_forceinlineLoc;
-  SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc, ConceptLoc;
+  SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc;
   SourceLocation TQ_pipeLoc;
 
   WrittenBuiltinSpecs writtenBS;
@@ -439,7 +437,6 @@
       FS_noreturn_specified(false),
       Friend_specified(false),
       Constexpr_specified(false),
-      Concept_specified(false),
       Attrs(attrFactory),
       writtenBS(),
       ObjCQualifiers(nullptr) {
@@ -697,8 +694,6 @@
                             unsigned &DiagID);
   bool SetConstexprSpec(SourceLocation Loc, const char *&PrevSpec,
                         unsigned &DiagID);
-  bool SetConceptSpec(SourceLocation Loc, const char *&PrevSpec,
-                      unsigned &DiagID);
 
   bool isFriendSpecified() const { return Friend_specified; }
   SourceLocation getFriendSpecLoc() const { return FriendLoc; }
@@ -709,19 +704,11 @@
   bool isConstexprSpecified() const { return Constexpr_specified; }
   SourceLocation getConstexprSpecLoc() const { return ConstexprLoc; }
 
-  bool isConceptSpecified() const { return Concept_specified; }
-  SourceLocation getConceptSpecLoc() const { return ConceptLoc; }
-
   void ClearConstexprSpec() {
     Constexpr_specified = false;
     ConstexprLoc = SourceLocation();
   }
 
-  void ClearConceptSpec() {
-    Concept_specified = false;
-    ConceptLoc = SourceLocation();
-  }
-
   AttributePool &getAttributePool() const {
     return Attrs.getPool();
   }
@@ -896,6 +883,30 @@
 
 };
 
+/// \brief Describes the kind of unqualified-id parsed.
+enum class UnqualifiedIdKind {
+  /// \brief An identifier.
+  IK_Identifier,
+  /// \brief An overloaded operator name, e.g., operator+.
+  IK_OperatorFunctionId,
+  /// \brief A conversion function name, e.g., operator int.
+  IK_ConversionFunctionId,
+  /// \brief A user-defined literal name, e.g., operator "" _i.
+  IK_LiteralOperatorId,
+  /// \brief A constructor name.
+  IK_ConstructorName,
+  /// \brief A constructor named via a template-id.
+  IK_ConstructorTemplateId,
+  /// \brief A destructor name.
+  IK_DestructorName,
+  /// \brief A template-id, e.g., f<int>.
+  IK_TemplateId,
+  /// \brief An implicit 'self' parameter
+  IK_ImplicitSelfParam,
+  /// \brief A deduction-guide name (a template-name)
+  IK_DeductionGuideName
+};
+
 /// \brief Represents a C++ unqualified-id that has been parsed. 
 class UnqualifiedId {
 private:
@@ -904,28 +915,7 @@
 
 public:
   /// \brief Describes the kind of unqualified-id parsed.
-  enum IdKind {
-    /// \brief An identifier.
-    IK_Identifier,
-    /// \brief An overloaded operator name, e.g., operator+.
-    IK_OperatorFunctionId,
-    /// \brief A conversion function name, e.g., operator int.
-    IK_ConversionFunctionId,
-    /// \brief A user-defined literal name, e.g., operator "" _i.
-    IK_LiteralOperatorId,
-    /// \brief A constructor name.
-    IK_ConstructorName,
-    /// \brief A constructor named via a template-id.
-    IK_ConstructorTemplateId,
-    /// \brief A destructor name.
-    IK_DestructorName,
-    /// \brief A template-id, e.g., f<int>.
-    IK_TemplateId,
-    /// \brief An implicit 'self' parameter
-    IK_ImplicitSelfParam,
-    /// \brief A deduction-guide name (a template-name)
-    IK_DeductionGuideName
-  } Kind;
+  UnqualifiedIdKind Kind;
 
   struct OFI {
     /// \brief The kind of overloaded operator.
@@ -980,13 +970,14 @@
   
   /// \brief The location of the last token that describes this unqualified-id.
   SourceLocation EndLocation;
-  
-  UnqualifiedId() : Kind(IK_Identifier), Identifier(nullptr) { }
+
+  UnqualifiedId()
+      : Kind(UnqualifiedIdKind::IK_Identifier), Identifier(nullptr) {}
 
   /// \brief Clear out this unqualified-id, setting it to default (invalid) 
   /// state.
   void clear() {
-    Kind = IK_Identifier;
+    Kind = UnqualifiedIdKind::IK_Identifier;
     Identifier = nullptr;
     StartLocation = SourceLocation();
     EndLocation = SourceLocation();
@@ -999,15 +990,15 @@
   bool isInvalid() const { return !isValid(); }
   
   /// \brief Determine what kind of name we have.
-  IdKind getKind() const { return Kind; }
-  void setKind(IdKind kind) { Kind = kind; } 
+  UnqualifiedIdKind getKind() const { return Kind; }
+  void setKind(UnqualifiedIdKind kind) { Kind = kind; } 
   
   /// \brief Specify that this unqualified-id was parsed as an identifier.
   ///
   /// \param Id the parsed identifier.
   /// \param IdLoc the location of the parsed identifier.
   void setIdentifier(const IdentifierInfo *Id, SourceLocation IdLoc) {
-    Kind = IK_Identifier;
+    Kind = UnqualifiedIdKind::IK_Identifier;
     Identifier = const_cast<IdentifierInfo *>(Id);
     StartLocation = EndLocation = IdLoc;
   }
@@ -1036,7 +1027,7 @@
   void setConversionFunctionId(SourceLocation OperatorLoc, 
                                ParsedType Ty,
                                SourceLocation EndLoc) {
-    Kind = IK_ConversionFunctionId;
+    Kind = UnqualifiedIdKind::IK_ConversionFunctionId;
     StartLocation = OperatorLoc;
     EndLocation = EndLoc;
     ConversionFunctionId = Ty;
@@ -1052,7 +1043,7 @@
   /// \param IdLoc the location of the identifier.
   void setLiteralOperatorId(const IdentifierInfo *Id, SourceLocation OpLoc,
                               SourceLocation IdLoc) {
-    Kind = IK_LiteralOperatorId;
+    Kind = UnqualifiedIdKind::IK_LiteralOperatorId;
     Identifier = const_cast<IdentifierInfo *>(Id);
     StartLocation = OpLoc;
     EndLocation = IdLoc;
@@ -1068,7 +1059,7 @@
   void setConstructorName(ParsedType ClassType, 
                           SourceLocation ClassNameLoc,
                           SourceLocation EndLoc) {
-    Kind = IK_ConstructorName;
+    Kind = UnqualifiedIdKind::IK_ConstructorName;
     StartLocation = ClassNameLoc;
     EndLocation = EndLoc;
     ConstructorName = ClassType;
@@ -1091,7 +1082,7 @@
   void setDestructorName(SourceLocation TildeLoc,
                          ParsedType ClassType,
                          SourceLocation EndLoc) {
-    Kind = IK_DestructorName;
+    Kind = UnqualifiedIdKind::IK_DestructorName;
     StartLocation = TildeLoc;
     EndLocation = EndLoc;
     DestructorName = ClassType;
@@ -1111,7 +1102,7 @@
   /// \param TemplateLoc The location of the parsed template-name.
   void setDeductionGuideName(ParsedTemplateTy Template,
                              SourceLocation TemplateLoc) {
-    Kind = IK_DeductionGuideName;
+    Kind = UnqualifiedIdKind::IK_DeductionGuideName;
     TemplateName = Template;
     StartLocation = EndLocation = TemplateLoc;
   }
@@ -1719,21 +1710,7 @@
   FDK_Deleted
 };
 
-/// \brief Information about one declarator, including the parsed type
-/// information and the identifier.
-///
-/// When the declarator is fully formed, this is turned into the appropriate
-/// Decl object.
-///
-/// Declarators come in two types: normal declarators and abstract declarators.
-/// Abstract declarators are used when parsing types, and don't have an
-/// identifier.  Normal declarators do have ID's.
-///
-/// Instances of this class should be a transient object that lives on the
-/// stack, not objects that are allocated in large quantities on the heap.
-class Declarator {
-public:
-  enum TheContext {
+enum class DeclaratorContext {
     FileContext,         // File scope declaration.
     PrototypeContext,    // Within a function prototype.
     ObjCResultContext,   // An ObjC method result type.
@@ -1755,11 +1732,28 @@
     LambdaExprParameterContext, // Lambda-expression parameter declarator.
     ConversionIdContext, // C++ conversion-type-id.
     TrailingReturnContext, // C++11 trailing-type-specifier.
-    TemplateTypeArgContext, // Template type argument.
+    TrailingReturnVarContext, // C++11 trailing-type-specifier for variable.
+    TemplateArgContext,  // Any template argument (in template argument list).
+    TemplateTypeArgContext, // Template type argument (in default argument).
     AliasDeclContext,    // C++11 alias-declaration.
     AliasTemplateContext // C++11 alias-declaration template.
-  };
+};
 
+
+/// \brief Information about one declarator, including the parsed type
+/// information and the identifier.
+///
+/// When the declarator is fully formed, this is turned into the appropriate
+/// Decl object.
+///
+/// Declarators come in two types: normal declarators and abstract declarators.
+/// Abstract declarators are used when parsing types, and don't have an
+/// identifier.  Normal declarators do have ID's.
+///
+/// Instances of this class should be a transient object that lives on the
+/// stack, not objects that are allocated in large quantities on the heap.
+class Declarator {
+  
 private:
   const DeclSpec &DS;
   CXXScopeSpec SS;
@@ -1767,7 +1761,7 @@
   SourceRange Range;
 
   /// \brief Where we are parsing this declarator.
-  TheContext Context;
+  DeclaratorContext Context;
 
   /// The C++17 structured binding, if any. This is an alternative to a Name.
   DecompositionDeclarator BindingGroup;
@@ -1834,7 +1828,7 @@
   friend struct DeclaratorChunk;
 
 public:
-  Declarator(const DeclSpec &ds, TheContext C)
+  Declarator(const DeclSpec &ds, DeclaratorContext C)
       : DS(ds), Range(ds.getSourceRange()), Context(C),
         InvalidType(DS.getTypeSpecType() == DeclSpec::TST_error),
         GroupingParens(false), FunctionDefinition(FDK_Declaration),
@@ -1872,13 +1866,13 @@
     return BindingGroup;
   }
   
-  TheContext getContext() const { return Context; }
+  DeclaratorContext getContext() const { return Context; }
 
   bool isPrototypeContext() const {
-    return (Context == PrototypeContext ||
-            Context == ObjCParameterContext ||
-            Context == ObjCResultContext ||
-            Context == LambdaExprParameterContext);
+    return (Context == DeclaratorContext::PrototypeContext ||
+            Context == DeclaratorContext::ObjCParameterContext ||
+            Context == DeclaratorContext::ObjCResultContext ||
+            Context == DeclaratorContext::LambdaExprParameterContext);
   }
 
   /// \brief Get the source range that spans this declarator.
@@ -1933,32 +1927,34 @@
   /// parameter lists.
   bool mayOmitIdentifier() const {
     switch (Context) {
-    case FileContext:
-    case KNRTypeListContext:
-    case MemberContext:
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
-    case ConditionContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
       return false;
 
-    case TypeNameContext:
-    case FunctionalCastContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case TemplateParamContext:
-    case CXXNewContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       return true;
     }
     llvm_unreachable("unknown context kind!");
@@ -1969,32 +1965,34 @@
   /// typenames.
   bool mayHaveIdentifier() const {
     switch (Context) {
-    case FileContext:
-    case KNRTypeListContext:
-    case MemberContext:
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
-    case ConditionContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case TemplateParamContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
       return true;
 
-    case TypeNameContext:
-    case FunctionalCastContext:
-    case CXXNewContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       return false;
     }
     llvm_unreachable("unknown context kind!");
@@ -2003,38 +2001,40 @@
   /// Return true if the context permits a C++17 decomposition declarator.
   bool mayHaveDecompositionDeclarator() const {
     switch (Context) {
-    case FileContext:
+    case DeclaratorContext::FileContext:
       // FIXME: It's not clear that the proposal meant to allow file-scope
       // structured bindings, but it does.
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
       return true;
 
-    case ConditionContext:
-    case MemberContext:
-    case PrototypeContext:
-    case TemplateParamContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::TemplateParamContext:
       // Maybe one day...
       return false;
 
     // These contexts don't allow any kind of non-abstract declarator.
-    case KNRTypeListContext:
-    case TypeNameContext:
-    case FunctionalCastContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case LambdaExprParameterContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case CXXNewContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       return false;
     }
     llvm_unreachable("unknown context kind!");
@@ -2049,45 +2049,47 @@
       return false;
 
     if (getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern &&
-        Context != FileContext)
+        Context != DeclaratorContext::FileContext)
       return false;
 
     // Special names can't have direct initializers.
-    if (Name.getKind() != UnqualifiedId::IK_Identifier)
+    if (Name.getKind() != UnqualifiedIdKind::IK_Identifier)
       return false;
 
     switch (Context) {
-    case FileContext:
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       return true;
 
-    case ConditionContext:
+    case DeclaratorContext::ConditionContext:
       // This may not be followed by a direct initializer, but it can't be a
       // function declaration either, and we'd prefer to perform a tentative
       // parse in order to produce the right diagnostic.
       return true;
 
-    case KNRTypeListContext:
-    case MemberContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case TemplateParamContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case TypeNameContext:
-    case FunctionalCastContext: // FIXME
-    case CXXNewContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext: // FIXME
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TrailingReturnContext:
       return false;
     }
     llvm_unreachable("unknown context kind!");
@@ -2103,8 +2105,8 @@
   /// special C++ name (constructor, destructor, etc.), or a structured
   /// binding (which is not exactly a name, but occupies the same position).
   bool hasName() const {
-    return Name.getKind() != UnqualifiedId::IK_Identifier || Name.Identifier ||
-           isDecompositionDeclarator();
+    return Name.getKind() != UnqualifiedIdKind::IK_Identifier ||
+           Name.Identifier || isDecompositionDeclarator();
   }
 
   /// Return whether this declarator is a decomposition declarator.
@@ -2113,7 +2115,7 @@
   }
 
   IdentifierInfo *getIdentifier() const { 
-    if (Name.getKind() == UnqualifiedId::IK_Identifier)
+    if (Name.getKind() == UnqualifiedIdKind::IK_Identifier)
       return Name.Identifier;
     
     return nullptr;
@@ -2271,32 +2273,34 @@
       return false;
 
     switch (Context) {
-    case FileContext:
-    case MemberContext:
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
       return true;
 
-    case ConditionContext:
-    case KNRTypeListContext:
-    case TypeNameContext:
-    case FunctionalCastContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case TemplateParamContext:
-    case CXXNewContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TemplateTypeArgContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::ConditionContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       return false;
     }
     llvm_unreachable("unknown context kind!");
@@ -2306,32 +2310,37 @@
   /// expression could appear.
   bool isExpressionContext() const {
     switch (Context) {
-    case FileContext:
-    case KNRTypeListContext:
-    case MemberContext:
-    case TypeNameContext: // FIXME: sizeof(...) permits an expression.
-    case FunctionalCastContext:
-    case AliasDeclContext:
-    case AliasTemplateContext:
-    case PrototypeContext:
-    case LambdaExprParameterContext:
-    case ObjCParameterContext:
-    case ObjCResultContext:
-    case TemplateParamContext:
-    case CXXNewContext:
-    case CXXCatchContext:
-    case ObjCCatchContext:
-    case BlockLiteralContext:
-    case LambdaExprContext:
-    case ConversionIdContext:
-    case TrailingReturnContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::MemberContext:
+
+    // FIXME: sizeof(...) permits an expression.
+    case DeclaratorContext::TypeNameContext: 
+    
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
+    case DeclaratorContext::TemplateTypeArgContext:
       return false;
 
-    case BlockContext:
-    case ForContext:
-    case InitStmtContext:
-    case ConditionContext:
-    case TemplateTypeArgContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
+    case DeclaratorContext::TemplateArgContext:
       return true;
     }
 
@@ -2444,7 +2453,8 @@
 
   /// Returns true if this declares a real member and not a friend.
   bool isFirstDeclarationOfMember() {
-    return getContext() == MemberContext && !getDeclSpec().isFriendSpecified();
+    return getContext() == DeclaratorContext::MemberContext &&
+           !getDeclSpec().isFriendSpecified();
   }
 
   /// Returns true if this declares a static member.  This cannot be called on a
@@ -2465,7 +2475,8 @@
   Declarator D;
   Expr *BitfieldSize;
   explicit FieldDeclarator(const DeclSpec &DS)
-    : D(DS, Declarator::MemberContext), BitfieldSize(nullptr) { }
+      : D(DS, DeclaratorContext::MemberContext),
+        BitfieldSize(nullptr) {}
 };
 
 /// \brief Represents a C++11 virt-specifier-seq.
diff --git a/include/clang/Sema/DelayedDiagnostic.h b/include/clang/Sema/DelayedDiagnostic.h
index d65dbf0..c4db8e2 100644
--- a/include/clang/Sema/DelayedDiagnostic.h
+++ b/include/clang/Sema/DelayedDiagnostic.h
@@ -1,4 +1,4 @@
-//===--- DelayedDiagnostic.h - Delayed declarator diagnostics ---*- C++ -*-===//
+//===- DelayedDiagnostic.h - Delayed declarator diagnostics -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,7 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Defines the classes clang::DelayedDiagnostic and 
 /// clang::AccessedEntity.
@@ -16,15 +16,33 @@
 /// diagnostics -- notably deprecation and access control -- are suppressed
 /// based on semantic properties of the parsed declaration that aren't known
 /// until it is fully parsed.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_SEMA_DELAYEDDIAGNOSTIC_H
 #define LLVM_CLANG_SEMA_DELAYEDDIAGNOSTIC_H
 
+#include "clang/AST/DeclAccessPair.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstddef>
+#include <utility>
 
 namespace clang {
+
+class ObjCInterfaceDecl;
+class ObjCPropertyDecl;
+
 namespace sema {
 
 /// A declaration being accessed, together with information about how
@@ -39,16 +57,14 @@
   /// The target is the base class.
   enum BaseNonce { Base };
 
-  bool isMemberAccess() const { return IsMember; }
-
   AccessedEntity(PartialDiagnostic::StorageAllocator &Allocator,
                  MemberNonce _,
                  CXXRecordDecl *NamingClass,
                  DeclAccessPair FoundDecl,
                  QualType BaseObjectType)
-    : Access(FoundDecl.getAccess()), IsMember(true),
-      Target(FoundDecl.getDecl()), NamingClass(NamingClass),
-      BaseObjectType(BaseObjectType), Diag(0, Allocator) {
+      : Access(FoundDecl.getAccess()), IsMember(true),
+        Target(FoundDecl.getDecl()), NamingClass(NamingClass),
+        BaseObjectType(BaseObjectType), Diag(0, Allocator) {
   }
 
   AccessedEntity(PartialDiagnostic::StorageAllocator &Allocator,
@@ -56,11 +72,10 @@
                  CXXRecordDecl *BaseClass,
                  CXXRecordDecl *DerivedClass,
                  AccessSpecifier Access)
-    : Access(Access), IsMember(false),
-      Target(BaseClass),
-      NamingClass(DerivedClass),
-      Diag(0, Allocator) {
-  }
+      : Access(Access), IsMember(false), Target(BaseClass),
+        NamingClass(DerivedClass), Diag(0, Allocator) {}
+
+  bool isMemberAccess() const { return IsMember; }
 
   bool isQuiet() const { return Diag.getDiagID() == 0; }
 
@@ -216,7 +231,6 @@
   }
 
 private:
-
   struct AD {
     const NamedDecl *ReferringDecl;
     const NamedDecl *OffendingDecl;
@@ -248,20 +262,17 @@
   const DelayedDiagnosticPool *Parent;
   SmallVector<DelayedDiagnostic, 4> Diagnostics;
 
-  DelayedDiagnosticPool(const DelayedDiagnosticPool &) = delete;
-  void operator=(const DelayedDiagnosticPool &) = delete;
 public:
   DelayedDiagnosticPool(const DelayedDiagnosticPool *parent) : Parent(parent) {}
-  ~DelayedDiagnosticPool() {
-    for (SmallVectorImpl<DelayedDiagnostic>::iterator
-           i = Diagnostics.begin(), e = Diagnostics.end(); i != e; ++i)
-      i->Destroy();
-  }
+
+  DelayedDiagnosticPool(const DelayedDiagnosticPool &) = delete;
+  DelayedDiagnosticPool &operator=(const DelayedDiagnosticPool &) = delete;
 
   DelayedDiagnosticPool(DelayedDiagnosticPool &&Other)
-    : Parent(Other.Parent), Diagnostics(std::move(Other.Diagnostics)) {
+      : Parent(Other.Parent), Diagnostics(std::move(Other.Diagnostics)) {
     Other.Diagnostics.clear();
   }
+
   DelayedDiagnosticPool &operator=(DelayedDiagnosticPool &&Other) {
     Parent = Other.Parent;
     Diagnostics = std::move(Other.Diagnostics);
@@ -269,6 +280,12 @@
     return *this;
   }
 
+  ~DelayedDiagnosticPool() {
+    for (SmallVectorImpl<DelayedDiagnostic>::iterator
+           i = Diagnostics.begin(), e = Diagnostics.end(); i != e; ++i)
+      i->Destroy();
+  }
+
   const DelayedDiagnosticPool *getParent() const { return Parent; }
 
   /// Does this pool, or any of its ancestors, contain any diagnostics?
@@ -293,13 +310,14 @@
     pool.Diagnostics.clear();
   }
 
-  typedef SmallVectorImpl<DelayedDiagnostic>::const_iterator pool_iterator;
+  using pool_iterator = SmallVectorImpl<DelayedDiagnostic>::const_iterator;
+
   pool_iterator pool_begin() const { return Diagnostics.begin(); }
   pool_iterator pool_end() const { return Diagnostics.end(); }
   bool pool_empty() const { return Diagnostics.empty(); }
 };
 
-}
+} // namespace clang
 
 /// Add a diagnostic to the current delay pool.
 inline void Sema::DelayedDiagnostics::add(const sema::DelayedDiagnostic &diag) {
@@ -307,7 +325,6 @@
   CurPool->add(diag);
 }
 
+} // namespace clang
 
-}
-
-#endif
+#endif // LLVM_CLANG_SEMA_DELAYEDDIAGNOSTIC_H
diff --git a/include/clang/Sema/IdentifierResolver.h b/include/clang/Sema/IdentifierResolver.h
index 382fe80..2647766 100644
--- a/include/clang/Sema/IdentifierResolver.h
+++ b/include/clang/Sema/IdentifierResolver.h
@@ -15,16 +15,20 @@
 #ifndef LLVM_CLANG_SEMA_IDENTIFIERRESOLVER_H
 #define LLVM_CLANG_SEMA_IDENTIFIERRESOLVER_H
 
-#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
 
 namespace clang {
 
-class ASTContext;
 class Decl;
-class DeclContext;
 class DeclarationName;
-class ExternalPreprocessorSource;
+class DeclContext;
+class IdentifierInfo;
+class LangOptions;
 class NamedDecl;
 class Preprocessor;
 class Scope;
@@ -33,17 +37,16 @@
 /// scopes.  It manages the shadowing chains of declaration names and
 /// implements efficient decl lookup based on a declaration name.
 class IdentifierResolver {
-
   /// IdDeclInfo - Keeps track of information about decls associated
   /// to a particular declaration name. IdDeclInfos are lazily
   /// constructed and assigned to a declaration name the first time a
   /// decl with that declaration name is shadowed in some scope.
   class IdDeclInfo {
   public:
-    typedef SmallVector<NamedDecl*, 2> DeclsTy;
+    using DeclsTy = SmallVector<NamedDecl *, 2>;
 
-    inline DeclsTy::iterator decls_begin() { return Decls.begin(); }
-    inline DeclsTy::iterator decls_end() { return Decls.end(); }
+    DeclsTy::iterator decls_begin() { return Decls.begin(); }
+    DeclsTy::iterator decls_end() { return Decls.end(); }
 
     void AddDecl(NamedDecl *D) { Decls.push_back(D); }
 
@@ -61,30 +64,32 @@
   };
 
 public:
-
   /// iterator - Iterate over the decls of a specified declaration name.
   /// It will walk or not the parent declaration contexts depending on how
   /// it was instantiated.
   class iterator {
   public:
-    typedef NamedDecl *             value_type;
-    typedef NamedDecl *             reference;
-    typedef NamedDecl *             pointer;
-    typedef std::input_iterator_tag iterator_category;
-    typedef std::ptrdiff_t          difference_type;
+    friend class IdentifierResolver;
+
+    using value_type = NamedDecl *;
+    using reference = NamedDecl *;
+    using pointer = NamedDecl *;
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
 
     /// Ptr - There are 2 forms that 'Ptr' represents:
     /// 1) A single NamedDecl. (Ptr & 0x1 == 0)
     /// 2) A IdDeclInfo::DeclsTy::iterator that traverses only the decls of the
     ///    same declaration context. (Ptr & 0x1 == 0x1)
-    uintptr_t Ptr;
-    typedef IdDeclInfo::DeclsTy::iterator BaseIter;
+    uintptr_t Ptr = 0;
+    using BaseIter = IdDeclInfo::DeclsTy::iterator;
 
     /// A single NamedDecl. (Ptr & 0x1 == 0)
     iterator(NamedDecl *D) {
       Ptr = reinterpret_cast<uintptr_t>(D);
       assert((Ptr & 0x1) == 0 && "Invalid Ptr!");
     }
+
     /// A IdDeclInfo::DeclsTy::iterator that walks or not the parent declaration
     /// contexts depending on 'LookInParentCtx'.
     iterator(BaseIter I) {
@@ -98,11 +103,10 @@
       return reinterpret_cast<BaseIter>(Ptr & ~0x1);
     }
 
-    friend class IdentifierResolver;
-
     void incrementSlowCase();
+
   public:
-    iterator() : Ptr(0) {}
+    iterator() = default;
 
     NamedDecl *operator*() const {
       if (isIterator())
@@ -128,6 +132,9 @@
     }
   };
 
+  explicit IdentifierResolver(Preprocessor &PP);
+  ~IdentifierResolver();
+
   /// begin - Returns an iterator for decls with the name 'Name'.
   iterator begin(DeclarationName Name);
 
@@ -170,9 +177,6 @@
   /// \returns true if the declaration was added, false otherwise.
   bool tryAddTopLevelDecl(NamedDecl *D, DeclarationName Name);
   
-  explicit IdentifierResolver(Preprocessor &PP);
-  ~IdentifierResolver();
-
 private:
   const LangOptions &LangOpt;
   Preprocessor &PP;
@@ -193,11 +197,10 @@
     assert((reinterpret_cast<uintptr_t>(Ptr) & 0x1) == 1
           && "Ptr not a IdDeclInfo* !");
     return reinterpret_cast<IdDeclInfo*>(
-                    reinterpret_cast<uintptr_t>(Ptr) & ~0x1
-                                                            );
+                    reinterpret_cast<uintptr_t>(Ptr) & ~0x1);
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_IDENTIFIERRESOLVER_H
diff --git a/include/clang/Sema/Initialization.h b/include/clang/Sema/Initialization.h
index bd07b9e..d11c1ee 100644
--- a/include/clang/Sema/Initialization.h
+++ b/include/clang/Sema/Initialization.h
@@ -1,4 +1,4 @@
-//===--- Initialization.h - Semantic Analysis for Initializers --*- C++ -*-===//
+//===- Initialization.h - Semantic Analysis for Initializers ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,32 +10,41 @@
 // This file provides supporting data types for initialization of objects.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SEMA_INITIALIZATION_H
 #define LLVM_CLANG_SEMA_INITIALIZATION_H
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclAccessPair.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
-#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Ownership.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include <cassert>
+#include <cstdint>
+#include <string>
 
 namespace clang {
-  
+
+class APValue;
 class CXXBaseSpecifier;
-class DeclaratorDecl;
-class DeclaratorInfo;
-class FieldDecl;
-class FunctionDecl;
-class ParmVarDecl;
-class Sema;
-class TypeLoc;
-class VarDecl;
+class CXXConstructorDecl;
 class ObjCMethodDecl;
-  
+class Sema;
+
 /// \brief Describes an entity that is being initialized.
 class InitializedEntity {
 public:
@@ -43,51 +52,69 @@
   enum EntityKind {
     /// \brief The entity being initialized is a variable.
     EK_Variable,
+
     /// \brief The entity being initialized is a function parameter.
     EK_Parameter,
+
     /// \brief The entity being initialized is the result of a function call.
     EK_Result,
+
     /// \brief The entity being initialized is an exception object that
     /// is being thrown.
     EK_Exception,
+
     /// \brief The entity being initialized is a non-static data member 
     /// subobject.
     EK_Member,
+
     /// \brief The entity being initialized is an element of an array.
     EK_ArrayElement,
+
     /// \brief The entity being initialized is an object (or array of
     /// objects) allocated via new.
     EK_New,
+
     /// \brief The entity being initialized is a temporary object.
     EK_Temporary,
+
     /// \brief The entity being initialized is a base member subobject.
     EK_Base,
+
     /// \brief The initialization is being done by a delegating constructor.
     EK_Delegating,
+
     /// \brief The entity being initialized is an element of a vector.
     /// or vector.
     EK_VectorElement,
+
     /// \brief The entity being initialized is a field of block descriptor for
     /// the copied-in c++ object.
     EK_BlockElement,
+
     /// The entity being initialized is a field of block descriptor for the
     /// copied-in lambda object that's used in the lambda to block conversion.
     EK_LambdaToBlockConversionBlockElement,
+
     /// \brief The entity being initialized is the real or imaginary part of a
     /// complex number.
     EK_ComplexElement,
+
     /// \brief The entity being initialized is the field that captures a 
     /// variable in a lambda.
     EK_LambdaCapture,
+
     /// \brief The entity being initialized is the initializer for a compound
     /// literal.
     EK_CompoundLiteralInit,
+
     /// \brief The entity being implicitly initialized back to the formal
     /// result type.
     EK_RelatedResult,
+
     /// \brief The entity being initialized is a function parameter; function
     /// is member of group of audited CF APIs.
     EK_Parameter_CF_Audited,
+
     /// \brief The entity being initialized is a structured binding of a
     /// decomposition declaration.
     EK_Binding,
@@ -103,13 +130,13 @@
 
   /// \brief If non-NULL, the parent entity in which this
   /// initialization occurs.
-  const InitializedEntity *Parent;
+  const InitializedEntity *Parent = nullptr;
 
   /// \brief The type of the object or reference being initialized.
   QualType Type;
 
   /// \brief The mangling number for the next reference temporary to be created.
-  mutable unsigned ManglingNumber;
+  mutable unsigned ManglingNumber = 0;
 
   struct LN {
     /// \brief When Kind == EK_Result, EK_Exception, EK_New, the
@@ -172,20 +199,18 @@
     struct C Capture;
   };
 
-  InitializedEntity() : ManglingNumber(0) {}
+  InitializedEntity() = default;
 
   /// \brief Create the initialization entity for a variable.
   InitializedEntity(VarDecl *Var, EntityKind EK = EK_Variable)
-    : Kind(EK), Parent(nullptr), Type(Var->getType()),
-      ManglingNumber(0), Variable{Var, false} { }
+      : Kind(EK), Type(Var->getType()), Variable{Var, false} {}
   
   /// \brief Create the initialization entity for the result of a
   /// function, throwing an object, performing an explicit cast, or
   /// initializing a parameter for which there is no declaration.
   InitializedEntity(EntityKind Kind, SourceLocation Loc, QualType Type,
                     bool NRVO = false)
-    : Kind(Kind), Parent(nullptr), Type(Type), ManglingNumber(0)
-  {
+      : Kind(Kind), Type(Type) {
     LocAndNRVO.Location = Loc.getRawEncoding();
     LocAndNRVO.NRVO = NRVO;
   }
@@ -193,9 +218,8 @@
   /// \brief Create the initialization entity for a member subobject.
   InitializedEntity(FieldDecl *Member, const InitializedEntity *Parent,
                     bool Implicit) 
-    : Kind(EK_Member), Parent(Parent), Type(Member->getType()),
-      ManglingNumber(0), Variable{Member, Implicit} {
-  }
+      : Kind(EK_Member), Parent(Parent), Type(Member->getType()),
+        Variable{Member, Implicit} {}
   
   /// \brief Create the initialization entity for an array element.
   InitializedEntity(ASTContext &Context, unsigned Index, 
@@ -203,9 +227,7 @@
 
   /// \brief Create the initialization entity for a lambda capture.
   InitializedEntity(IdentifierInfo *VarID, QualType FieldType, SourceLocation Loc)
-    : Kind(EK_LambdaCapture), Parent(nullptr), Type(FieldType),
-      ManglingNumber(0)
-  {
+      : Kind(EK_LambdaCapture), Type(FieldType) {
     Capture.VarID = VarID;
     Capture.Location = Loc.getRawEncoding();
   }
@@ -307,7 +329,6 @@
     return Result;
   }
 
-
   /// \brief Create the initialization entity for a base class subobject.
   static InitializedEntity
   InitializeBase(ASTContext &Context, const CXXBaseSpecifier *Base,
@@ -362,7 +383,6 @@
     return Result;
   }
 
-
   /// \brief Determine the kind of initialization.
   EntityKind getKind() const { return Kind; }
   
@@ -401,6 +421,7 @@
     return (getKind() == EK_Parameter  ||
             getKind() == EK_Parameter_CF_Audited);
   }
+
   /// \brief Determine whether this initialization consumes the
   /// parameter.
   bool isParameterConsumed() const {
@@ -453,6 +474,7 @@
            getKind() == EK_ComplexElement);
     return Index;
   }
+
   /// \brief If this is already the initializer for an array or vector
   /// element, sets the element index.
   void setElementIndex(unsigned Index) {
@@ -460,11 +482,13 @@
            getKind() == EK_ComplexElement);
     this->Index = Index;
   }
+
   /// \brief For a lambda capture, return the capture's name.
   StringRef getCapturedVarName() const {
     assert(getKind() == EK_LambdaCapture && "Not a lambda capture!");
     return Capture.VarID->getName();
   }
+
   /// \brief Determine the location of the capture when initializing
   /// field from a captured variable in a lambda.
   SourceLocation getCaptureLoc() const {
@@ -493,22 +517,42 @@
 public:
   /// \brief The kind of initialization being performed.
   enum InitKind {
-    IK_Direct,       ///< Direct initialization
-    IK_DirectList,   ///< Direct list-initialization
-    IK_Copy,         ///< Copy initialization
-    IK_Default,      ///< Default initialization
-    IK_Value         ///< Value initialization
+    /// Direct initialization
+    IK_Direct,
+
+    /// Direct list-initialization
+    IK_DirectList,
+
+    /// Copy initialization
+    IK_Copy,
+
+    /// Default initialization
+    IK_Default,
+
+    /// Value initialization
+    IK_Value
   };
   
 private:
   /// \brief The context of the initialization.
   enum InitContext {
-    IC_Normal,         ///< Normal context
-    IC_ExplicitConvs,  ///< Normal context, but allows explicit conversion funcs
-    IC_Implicit,       ///< Implicit context (value initialization)
-    IC_StaticCast,     ///< Static cast context
-    IC_CStyleCast,     ///< C-style cast context
-    IC_FunctionalCast  ///< Functional cast context
+    /// Normal context
+    IC_Normal,
+
+    /// Normal context, but allows explicit conversion functionss
+    IC_ExplicitConvs,
+
+    /// Implicit context (value initialization)
+    IC_Implicit,
+
+    /// Static cast context
+    IC_StaticCast,
+
+    /// C-style cast context
+    IC_CStyleCast,
+
+    /// Functional cast context
+    IC_FunctionalCast
   };
   
   /// \brief The kind of initialization being performed.
@@ -522,8 +566,7 @@
   
   InitializationKind(InitKind Kind, InitContext Context, SourceLocation Loc1, 
                      SourceLocation Loc2, SourceLocation Loc3)
-    : Kind(Kind), Context(Context)
-  {
+      : Kind(Kind), Context(Context) {
     Locations[0] = Loc1;
     Locations[1] = Loc2;
     Locations[2] = Loc3;
@@ -539,8 +582,15 @@
   }
 
   static InitializationKind CreateDirectList(SourceLocation InitLoc) {
-    return InitializationKind(IK_DirectList, IC_Normal,
-                              InitLoc, InitLoc, InitLoc);
+    return InitializationKind(IK_DirectList, IC_Normal, InitLoc, InitLoc,
+                              InitLoc);
+  }
+
+  static InitializationKind CreateDirectList(SourceLocation InitLoc,
+                                             SourceLocation LBraceLoc,
+                                             SourceLocation RBraceLoc) {
+    return InitializationKind(IK_DirectList, IC_Normal, InitLoc, LBraceLoc,
+                              RBraceLoc);
   }
 
   /// \brief Create a direct initialization due to a cast that isn't a C-style 
@@ -598,7 +648,8 @@
                                           Expr *Init) {
     if (!Init) return CreateDefault(Loc);
     if (!DirectInit) return CreateCopy(Loc, Init->getLocStart());
-    if (isa<InitListExpr>(Init)) return CreateDirectList(Loc);
+    if (isa<InitListExpr>(Init))
+      return CreateDirectList(Loc, Init->getLocStart(), Init->getLocEnd());
     return CreateDirect(Loc, Init->getLocStart(), Init->getLocEnd());
   }
   
@@ -660,12 +711,20 @@
   bool allowExplicitConversionFunctionsInRefBinding() const {
     return !isCopyInit() || Context == IC_ExplicitConvs;
   }
+
+  /// Determine whether this initialization has a source range containing the
+  /// locations of open and closing parentheses or braces.
+  bool hasParenOrBraceRange() const {
+    return Kind == IK_Direct || Kind == IK_Value || Kind == IK_DirectList;
+  }
   
   /// \brief Retrieve the source range containing the locations of the open
-  /// and closing parentheses for value and direct initializations.
-  SourceRange getParenRange() const {
-    assert((Kind == IK_Direct || Kind == IK_Value) &&
-           "Only direct- and value-initialization have parentheses");
+  /// and closing parentheses or braces for value, direct, and direct list
+  /// initializations.
+  SourceRange getParenOrBraceRange() const {
+    assert(hasParenOrBraceRange() && "Only direct, value, and direct-list "
+                                     "initialization have parentheses or "
+                                     "braces");
     return SourceRange(Locations[1], Locations[2]);
   }
 };
@@ -695,86 +754,123 @@
     /// \brief Resolve the address of an overloaded function to a specific
     /// function declaration.
     SK_ResolveAddressOfOverloadedFunction,
+
     /// \brief Perform a derived-to-base cast, producing an rvalue.
     SK_CastDerivedToBaseRValue,
+
     /// \brief Perform a derived-to-base cast, producing an xvalue.
     SK_CastDerivedToBaseXValue,
+
     /// \brief Perform a derived-to-base cast, producing an lvalue.
     SK_CastDerivedToBaseLValue,
+
     /// \brief Reference binding to an lvalue.
     SK_BindReference,
+
     /// \brief Reference binding to a temporary.
     SK_BindReferenceToTemporary,
+
     /// \brief An optional copy of a temporary object to another
     /// temporary object, which is permitted (but not required) by
     /// C++98/03 but not C++0x.
     SK_ExtraneousCopyToTemporary,
+
     /// \brief Direct-initialization from a reference-related object in the
     /// final stage of class copy-initialization.
     SK_FinalCopy,
+
     /// \brief Perform a user-defined conversion, either via a conversion
     /// function or via a constructor.
     SK_UserConversion,
+
     /// \brief Perform a qualification conversion, producing an rvalue.
     SK_QualificationConversionRValue,
+
     /// \brief Perform a qualification conversion, producing an xvalue.
     SK_QualificationConversionXValue,
+
     /// \brief Perform a qualification conversion, producing an lvalue.
     SK_QualificationConversionLValue,
+
     /// \brief Perform a conversion adding _Atomic to a type.
     SK_AtomicConversion,
+
     /// \brief Perform a load from a glvalue, producing an rvalue.
     SK_LValueToRValue,
+
     /// \brief Perform an implicit conversion sequence.
     SK_ConversionSequence,
+
     /// \brief Perform an implicit conversion sequence without narrowing.
     SK_ConversionSequenceNoNarrowing,
+
     /// \brief Perform list-initialization without a constructor.
     SK_ListInitialization,
+
     /// \brief Unwrap the single-element initializer list for a reference.
     SK_UnwrapInitList,
+
     /// \brief Rewrap the single-element initializer list for a reference.
     SK_RewrapInitList,
+
     /// \brief Perform initialization via a constructor.
     SK_ConstructorInitialization,
+
     /// \brief Perform initialization via a constructor, taking arguments from
     /// a single InitListExpr.
     SK_ConstructorInitializationFromList,
+
     /// \brief Zero-initialize the object
     SK_ZeroInitialization,
+
     /// \brief C assignment
     SK_CAssignment,
+
     /// \brief Initialization by string
     SK_StringInit,
+
     /// \brief An initialization that "converts" an Objective-C object
     /// (not a point to an object) to another Objective-C object type.
     SK_ObjCObjectConversion,
+
     /// \brief Array indexing for initialization by elementwise copy.
     SK_ArrayLoopIndex,
+
     /// \brief Array initialization by elementwise copy.
     SK_ArrayLoopInit,
+
     /// \brief Array initialization (from an array rvalue).
     SK_ArrayInit,
+
     /// \brief Array initialization (from an array rvalue) as a GNU extension.
     SK_GNUArrayInit,
+
     /// \brief Array initialization from a parenthesized initializer list.
     /// This is a GNU C++ extension.
     SK_ParenthesizedArrayInit,
+
     /// \brief Pass an object by indirect copy-and-restore.
     SK_PassByIndirectCopyRestore,
+
     /// \brief Pass an object by indirect restore.
     SK_PassByIndirectRestore,
+
     /// \brief Produce an Objective-C object pointer.
     SK_ProduceObjCObject,
+
     /// \brief Construct a std::initializer_list from an initializer list.
     SK_StdInitializerList,
+
     /// \brief Perform initialization via a constructor taking a single
     /// std::initializer_list argument.
     SK_StdInitializerListConstructorCall,
+
     /// \brief Initialize an OpenCL sampler from an integer.
     SK_OCLSamplerInit,
+
     /// \brief Initialize queue_t from 0.
     SK_OCLZeroQueue,
+
     /// \brief Passing zero to a function where OpenCL event_t is expected.
     SK_OCLZeroEvent
   };
@@ -831,79 +927,113 @@
   enum FailureKind {
     /// \brief Too many initializers provided for a reference.
     FK_TooManyInitsForReference,
+
     /// \brief Reference initialized from a parenthesized initializer list.
     FK_ParenthesizedListInitForReference,
+
     /// \brief Array must be initialized with an initializer list.
     FK_ArrayNeedsInitList,
+
     /// \brief Array must be initialized with an initializer list or a 
     /// string literal.
     FK_ArrayNeedsInitListOrStringLiteral,
+
     /// \brief Array must be initialized with an initializer list or a
     /// wide string literal.
     FK_ArrayNeedsInitListOrWideStringLiteral,
+
     /// \brief Initializing a wide char array with narrow string literal.
     FK_NarrowStringIntoWideCharArray,
+
     /// \brief Initializing char array with wide string literal.
     FK_WideStringIntoCharArray,
+
     /// \brief Initializing wide char array with incompatible wide string
     /// literal.
     FK_IncompatWideStringIntoWideChar,
+
     /// \brief Array type mismatch.
     FK_ArrayTypeMismatch,
+
     /// \brief Non-constant array initializer
     FK_NonConstantArrayInit,
+
     /// \brief Cannot resolve the address of an overloaded function.
     FK_AddressOfOverloadFailed,
+
     /// \brief Overloading due to reference initialization failed.
     FK_ReferenceInitOverloadFailed,
+
     /// \brief Non-const lvalue reference binding to a temporary.
     FK_NonConstLValueReferenceBindingToTemporary,
+
     /// \brief Non-const lvalue reference binding to a bit-field.
     FK_NonConstLValueReferenceBindingToBitfield,
+
     /// \brief Non-const lvalue reference binding to a vector element.
     FK_NonConstLValueReferenceBindingToVectorElement,
+
     /// \brief Non-const lvalue reference binding to an lvalue of unrelated
     /// type.
     FK_NonConstLValueReferenceBindingToUnrelated,
+
     /// \brief Rvalue reference binding to an lvalue.
     FK_RValueReferenceBindingToLValue,
+
     /// \brief Reference binding drops qualifiers.
     FK_ReferenceInitDropsQualifiers,
+
     /// \brief Reference binding failed.
     FK_ReferenceInitFailed,
+
     /// \brief Implicit conversion failed.
     FK_ConversionFailed,
+
     /// \brief Implicit conversion failed.
     FK_ConversionFromPropertyFailed,
+
     /// \brief Too many initializers for scalar
     FK_TooManyInitsForScalar,
+
     /// \brief Scalar initialized from a parenthesized initializer list.
     FK_ParenthesizedListInitForScalar,
+
     /// \brief Reference initialization from an initializer list
     FK_ReferenceBindingToInitList,
+
     /// \brief Initialization of some unused destination type with an
     /// initializer list.
     FK_InitListBadDestinationType,
+
     /// \brief Overloading for a user-defined conversion failed.
     FK_UserConversionOverloadFailed,
+
     /// \brief Overloading for initialization by constructor failed.
     FK_ConstructorOverloadFailed,
+
     /// \brief Overloading for list-initialization by constructor failed.
     FK_ListConstructorOverloadFailed,
+
     /// \brief Default-initialization of a 'const' object.
     FK_DefaultInitOfConst,
+
     /// \brief Initialization of an incomplete type.
     FK_Incomplete,
+
     /// \brief Variable-length array must not have an initializer.
     FK_VariableLengthArrayHasInitializer,
+
     /// \brief List initialization failed at some point.
     FK_ListInitializationFailed,
+
     /// \brief Initializer has a placeholder type which cannot be
     /// resolved by initialization.
     FK_PlaceholderType,
+
     /// \brief Trying to take the address of a function that doesn't support
     /// having its address taken.
     FK_AddressOfUnaddressableFunction,
+
     /// \brief List-copy-initialization chose an explicit constructor.
     FK_ExplicitConstructor,
   };
@@ -935,7 +1065,6 @@
   }
 
 private:
-  
   /// \brief Prints a follow-up note that highlights the location of
   /// the initialized entity, if it's remote.
   void PrintInitLocationNote(Sema &S, const InitializedEntity &Entity);
@@ -1020,11 +1149,13 @@
   /// \brief Determine whether the initialization sequence is invalid.
   bool Failed() const { return SequenceKind == FailedSequence; }
 
-  typedef SmallVectorImpl<Step>::const_iterator step_iterator;
+  using step_iterator = SmallVectorImpl<Step>::const_iterator;
+
   step_iterator step_begin() const { return Steps.begin(); }
   step_iterator step_end()   const { return Steps.end(); }
 
-  typedef llvm::iterator_range<step_iterator> step_range;
+  using step_range = llvm::iterator_range<step_iterator>;
+
   step_range steps() const { return {step_begin(), step_end()}; }
 
   /// \brief Determine whether this initialization is a direct reference 
@@ -1229,6 +1360,6 @@
   void dump() const;
 };
   
-} // end namespace clang
+} // namespace clang
 
 #endif // LLVM_CLANG_SEMA_INITIALIZATION_H
diff --git a/include/clang/Sema/Lookup.h b/include/clang/Sema/Lookup.h
index 546df88..a151bda 100644
--- a/include/clang/Sema/Lookup.h
+++ b/include/clang/Sema/Lookup.h
@@ -1,4 +1,4 @@
-//===--- Lookup.h - Classes for name lookup ---------------------*- C++ -*-===//
+//===- Lookup.h - Classes for name lookup -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,13 +15,28 @@
 #ifndef LLVM_CLANG_SEMA_LOOKUP_H
 #define LLVM_CLANG_SEMA_LOOKUP_H
 
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Sema/Sema.h"
-
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <utility>
 
 namespace clang {
 
+class CXXBasePaths;
+
 /// @brief Represents the results of name lookup.
 ///
 /// An instance of the LookupResult class captures the results of a
@@ -126,25 +141,15 @@
     Temporary
   };
 
-  typedef UnresolvedSetImpl::iterator iterator;
+  using iterator = UnresolvedSetImpl::iterator;
 
   LookupResult(Sema &SemaRef, const DeclarationNameInfo &NameInfo,
                Sema::LookupNameKind LookupKind,
                Sema::RedeclarationKind Redecl = Sema::NotForRedeclaration)
-    : ResultKind(NotFound),
-      Paths(nullptr),
-      NamingClass(nullptr),
-      SemaPtr(&SemaRef),
-      NameInfo(NameInfo),
-      LookupKind(LookupKind),
-      IDNS(0),
-      Redecl(Redecl != Sema::NotForRedeclaration),
-      ExternalRedecl(Redecl == Sema::ForExternalRedeclaration),
-      HideTags(true),
-      Diagnose(Redecl == Sema::NotForRedeclaration),
-      AllowHidden(false),
-      Shadowed(false)
-  {
+      : SemaPtr(&SemaRef), NameInfo(NameInfo), LookupKind(LookupKind),
+        Redecl(Redecl != Sema::NotForRedeclaration),
+        ExternalRedecl(Redecl == Sema::ForExternalRedeclaration),
+        Diagnose(Redecl == Sema::NotForRedeclaration) {
     configure();
   }
 
@@ -154,20 +159,10 @@
   LookupResult(Sema &SemaRef, DeclarationName Name,
                SourceLocation NameLoc, Sema::LookupNameKind LookupKind,
                Sema::RedeclarationKind Redecl = Sema::NotForRedeclaration)
-    : ResultKind(NotFound),
-      Paths(nullptr),
-      NamingClass(nullptr),
-      SemaPtr(&SemaRef),
-      NameInfo(Name, NameLoc),
-      LookupKind(LookupKind),
-      IDNS(0),
-      Redecl(Redecl != Sema::NotForRedeclaration),
-      ExternalRedecl(Redecl == Sema::ForExternalRedeclaration),
-      HideTags(true),
-      Diagnose(Redecl == Sema::NotForRedeclaration),
-      AllowHidden(false),
-      Shadowed(false)
-  {
+      : SemaPtr(&SemaRef), NameInfo(Name, NameLoc), LookupKind(LookupKind),
+        Redecl(Redecl != Sema::NotForRedeclaration),
+        ExternalRedecl(Redecl == Sema::ForExternalRedeclaration),
+        Diagnose(Redecl == Sema::NotForRedeclaration) {
     configure();
   }
 
@@ -175,20 +170,10 @@
   /// using the information from another result.  Diagnostics are always
   /// disabled.
   LookupResult(TemporaryToken _, const LookupResult &Other)
-    : ResultKind(NotFound),
-      Paths(nullptr),
-      NamingClass(nullptr),
-      SemaPtr(Other.SemaPtr),
-      NameInfo(Other.NameInfo),
-      LookupKind(Other.LookupKind),
-      IDNS(Other.IDNS),
-      Redecl(Other.Redecl),
-      ExternalRedecl(Other.ExternalRedecl),
-      HideTags(Other.HideTags),
-      Diagnose(false),
-      AllowHidden(Other.AllowHidden),
-      Shadowed(false)
-  {}
+      : SemaPtr(Other.SemaPtr), NameInfo(Other.NameInfo),
+        LookupKind(Other.LookupKind), IDNS(Other.IDNS), Redecl(Other.Redecl),
+        ExternalRedecl(Other.ExternalRedecl), HideTags(Other.HideTags),
+        AllowHidden(Other.AllowHidden) {}
 
   // FIXME: Remove these deleted methods once the default build includes
   // -Wdeprecated.
@@ -213,6 +198,7 @@
     Other.Paths = nullptr;
     Other.Diagnose = false;
   }
+
   LookupResult &operator=(LookupResult &&Other) {
     ResultKind = std::move(Other.ResultKind);
     Ambiguity = std::move(Other.Ambiguity);
@@ -618,15 +604,14 @@
   /// filtering out results.  The results returned are possibly
   /// sugared.
   class Filter {
+    friend class LookupResult;
+
     LookupResult &Results;
     LookupResult::iterator I;
-    bool Changed;
-    bool CalledDone;
+    bool Changed = false;
+    bool CalledDone = false;
     
-    friend class LookupResult;
-    Filter(LookupResult &Results)
-      : Results(Results), I(Results.begin()), Changed(false), CalledDone(false)
-    {}
+    Filter(LookupResult &Results) : Results(Results), I(Results.begin()) {}
 
   public:
     Filter(Filter &&F)
@@ -634,6 +619,7 @@
           CalledDone(F.CalledDone) {
       F.CalledDone = true;
     }
+
     ~Filter() {
       assert(CalledDone &&
              "LookupResult::Filter destroyed without done() call");
@@ -722,11 +708,11 @@
   static void deletePaths(CXXBasePaths *);
 
   // Results.
-  LookupResultKind ResultKind;
+  LookupResultKind ResultKind = NotFound;
   AmbiguityKind Ambiguity; // ill-defined unless ambiguous
   UnresolvedSet<8> Decls;
-  CXXBasePaths *Paths;
-  CXXRecordDecl *NamingClass;
+  CXXBasePaths *Paths = nullptr;
+  CXXRecordDecl *NamingClass = nullptr;
   QualType BaseObjectType;
 
   // Parameters.
@@ -734,24 +720,24 @@
   DeclarationNameInfo NameInfo;
   SourceRange NameContextRange;
   Sema::LookupNameKind LookupKind;
-  unsigned IDNS; // set by configure()
+  unsigned IDNS = 0; // set by configure()
 
   bool Redecl;
   bool ExternalRedecl;
 
   /// \brief True if tag declarations should be hidden if non-tags
   ///   are present
-  bool HideTags;
+  bool HideTags = true;
 
-  bool Diagnose;
+  bool Diagnose = false;
 
   /// \brief True if we should allow hidden declarations to be 'visible'.
-  bool AllowHidden;
+  bool AllowHidden = false;
 
   /// \brief True if the found declarations were shadowed by some other
   /// declaration that we skipped. This only happens when \c LookupKind
   /// is \c LookupRedeclarationWithLinkage.
-  bool Shadowed;
+  bool Shadowed = false;
 };
 
 /// \brief Consumes visible declarations found when searching for
@@ -784,6 +770,12 @@
   /// class of the context we searched.
   virtual void FoundDecl(NamedDecl *ND, NamedDecl *Hiding, DeclContext *Ctx,
                          bool InBaseClass) = 0;
+
+  /// \brief Callback to inform the client that Sema entered into a new context
+  /// to find a visible declaration.
+  //
+  /// \param Ctx the context which Sema entered.
+  virtual void EnteredContext(DeclContext *Ctx) {}
 };
 
 /// \brief A class for storing results from argument-dependent lookup.
@@ -807,13 +799,13 @@
     Decls.erase(cast<NamedDecl>(D->getCanonicalDecl()));
   }
 
-  typedef llvm::mapped_iterator<decltype(Decls)::iterator, select_second>
-      iterator;
+  using iterator =
+      llvm::mapped_iterator<decltype(Decls)::iterator, select_second>;
 
   iterator begin() { return iterator(Decls.begin(), select_second()); }
   iterator end() { return iterator(Decls.end(), select_second()); }
 };
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_LOOKUP_H
diff --git a/include/clang/Sema/MultiplexExternalSemaSource.h b/include/clang/Sema/MultiplexExternalSemaSource.h
index 1d681a0..4de36af 100644
--- a/include/clang/Sema/MultiplexExternalSemaSource.h
+++ b/include/clang/Sema/MultiplexExternalSemaSource.h
@@ -148,8 +148,10 @@
   /// \brief Print any statistics that have been gathered regarding
   /// the external AST source.
   void PrintStats() override;
-  
-  
+
+  /// \brief Retrieve the module that corresponds to the given module ID.
+  Module *getModule(unsigned ID) override;
+
   /// \brief Perform layout on the given record.
   ///
   /// This routine allows the external AST source to provide an specific 
diff --git a/include/clang/Sema/Overload.h b/include/clang/Sema/Overload.h
index 05cfe53..28ae40b 100644
--- a/include/clang/Sema/Overload.h
+++ b/include/clang/Sema/Overload.h
@@ -1,4 +1,4 @@
-//===--- Overload.h - C++ Overloading ---------------------------*- C++ -*-===//
+//===- Overload.h - C++ Overloading -----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,32 +16,51 @@
 #define LLVM_CLANG_SEMA_OVERLOAD_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclAccessPair.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
-#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Sema/SemaFixItUtils.h"
 #include "clang/Sema/TemplateDeduction.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
 
 namespace clang {
-  class ASTContext;
-  class CXXConstructorDecl;
-  class CXXConversionDecl;
-  class FunctionDecl;
-  class Sema;
+
+class APValue;
+class ASTContext;
+class Sema;
 
   /// OverloadingResult - Capture the result of performing overload
   /// resolution.
   enum OverloadingResult {
-    OR_Success,             ///< Overload resolution succeeded.
-    OR_No_Viable_Function,  ///< No viable function found.
-    OR_Ambiguous,           ///< Ambiguous candidates found.
-    OR_Deleted              ///< Succeeded, but refers to a deleted function.
+    /// Overload resolution succeeded.
+    OR_Success,
+
+    /// No viable function found.
+    OR_No_Viable_Function,
+
+    /// Ambiguous candidates found.
+    OR_Ambiguous,
+
+    /// Succeeded, but refers to a deleted function.
+    OR_Deleted
   };
   
   enum OverloadCandidateDisplayKind {
@@ -58,36 +77,92 @@
   /// match with Table 9 of (C++ 13.3.3.1.1) and are listed such that
   /// better conversion kinds have smaller values.
   enum ImplicitConversionKind {
-    ICK_Identity = 0,          ///< Identity conversion (no conversion)
-    ICK_Lvalue_To_Rvalue,      ///< Lvalue-to-rvalue conversion (C++ 4.1)
-    ICK_Array_To_Pointer,      ///< Array-to-pointer conversion (C++ 4.2)
-    ICK_Function_To_Pointer,   ///< Function-to-pointer (C++ 4.3)
-    ICK_Function_Conversion,   ///< Function pointer conversion (C++17 4.13)
-    ICK_Qualification,         ///< Qualification conversions (C++ 4.4)
-    ICK_Integral_Promotion,    ///< Integral promotions (C++ 4.5)
-    ICK_Floating_Promotion,    ///< Floating point promotions (C++ 4.6)
-    ICK_Complex_Promotion,     ///< Complex promotions (Clang extension)
-    ICK_Integral_Conversion,   ///< Integral conversions (C++ 4.7)
-    ICK_Floating_Conversion,   ///< Floating point conversions (C++ 4.8)
-    ICK_Complex_Conversion,    ///< Complex conversions (C99 6.3.1.6)
-    ICK_Floating_Integral,     ///< Floating-integral conversions (C++ 4.9)
-    ICK_Pointer_Conversion,    ///< Pointer conversions (C++ 4.10)
-    ICK_Pointer_Member,        ///< Pointer-to-member conversions (C++ 4.11)
-    ICK_Boolean_Conversion,    ///< Boolean conversions (C++ 4.12)
-    ICK_Compatible_Conversion, ///< Conversions between compatible types in C99
-    ICK_Derived_To_Base,       ///< Derived-to-base (C++ [over.best.ics])
-    ICK_Vector_Conversion,     ///< Vector conversions
-    ICK_Vector_Splat,          ///< A vector splat from an arithmetic type
-    ICK_Complex_Real,          ///< Complex-real conversions (C99 6.3.1.7)
-    ICK_Block_Pointer_Conversion,    ///< Block Pointer conversions 
-    ICK_TransparentUnionConversion, ///< Transparent Union Conversions
-    ICK_Writeback_Conversion,  ///< Objective-C ARC writeback conversion
-    ICK_Zero_Event_Conversion, ///< Zero constant to event (OpenCL1.2 6.12.10)
-    ICK_Zero_Queue_Conversion, ///< Zero constant to queue
-    ICK_C_Only_Conversion,     ///< Conversions allowed in C, but not C++
-    ICK_Incompatible_Pointer_Conversion, ///< C-only conversion between pointers
-                                         ///  with incompatible types
-    ICK_Num_Conversion_Kinds,  ///< The number of conversion kinds
+    /// Identity conversion (no conversion)
+    ICK_Identity = 0,
+
+    /// Lvalue-to-rvalue conversion (C++ 4.1)
+    ICK_Lvalue_To_Rvalue,
+
+    /// Array-to-pointer conversion (C++ 4.2)
+    ICK_Array_To_Pointer,
+
+    /// Function-to-pointer (C++ 4.3)
+    ICK_Function_To_Pointer,
+
+    /// Function pointer conversion (C++17 4.13)
+    ICK_Function_Conversion,
+
+    /// Qualification conversions (C++ 4.4)
+    ICK_Qualification,
+
+    /// Integral promotions (C++ 4.5)
+    ICK_Integral_Promotion,
+
+    /// Floating point promotions (C++ 4.6)
+    ICK_Floating_Promotion,
+
+    /// Complex promotions (Clang extension)
+    ICK_Complex_Promotion,
+
+    /// Integral conversions (C++ 4.7)
+    ICK_Integral_Conversion,
+
+    /// Floating point conversions (C++ 4.8)
+    ICK_Floating_Conversion,
+
+    /// Complex conversions (C99 6.3.1.6)
+    ICK_Complex_Conversion,
+
+    /// Floating-integral conversions (C++ 4.9)
+    ICK_Floating_Integral,
+
+    /// Pointer conversions (C++ 4.10)
+    ICK_Pointer_Conversion,
+
+    /// Pointer-to-member conversions (C++ 4.11)
+    ICK_Pointer_Member,
+
+    /// Boolean conversions (C++ 4.12)
+    ICK_Boolean_Conversion,
+
+    /// Conversions between compatible types in C99
+    ICK_Compatible_Conversion,
+
+    /// Derived-to-base (C++ [over.best.ics])
+    ICK_Derived_To_Base,
+
+    /// Vector conversions
+    ICK_Vector_Conversion,
+
+    /// A vector splat from an arithmetic type
+    ICK_Vector_Splat,
+
+    /// Complex-real conversions (C99 6.3.1.7)
+    ICK_Complex_Real,
+
+    /// Block Pointer conversions
+    ICK_Block_Pointer_Conversion,
+
+    /// Transparent Union Conversions
+    ICK_TransparentUnionConversion,
+
+    /// Objective-C ARC writeback conversion
+    ICK_Writeback_Conversion,
+
+    /// Zero constant to event (OpenCL1.2 6.12.10)
+    ICK_Zero_Event_Conversion,
+
+    /// Zero constant to queue
+    ICK_Zero_Queue_Conversion,
+
+    /// Conversions allowed in C, but not C++
+    ICK_C_Only_Conversion,
+
+    /// C-only conversion between pointers with incompatible types
+    ICK_Incompatible_Pointer_Conversion,
+
+    /// The number of conversion kinds
+    ICK_Num_Conversion_Kinds,
   };
 
   /// ImplicitConversionRank - The rank of an implicit conversion
@@ -95,16 +170,30 @@
   /// 13.3.3.1.1) and are listed such that better conversion ranks
   /// have smaller values.
   enum ImplicitConversionRank {
-    ICR_Exact_Match = 0,         ///< Exact Match
-    ICR_Promotion,               ///< Promotion
-    ICR_Conversion,              ///< Conversion
-    ICR_OCL_Scalar_Widening,     ///< OpenCL Scalar Widening
-    ICR_Complex_Real_Conversion, ///< Complex <-> Real conversion
-    ICR_Writeback_Conversion,    ///< ObjC ARC writeback conversion
-    ICR_C_Conversion,            ///< Conversion only allowed in the C standard.
-                                 ///  (e.g. void* to char*)
-    ICR_C_Conversion_Extension   ///< Conversion not allowed by the C standard,
-                                 ///  but that we accept as an extension anyway.
+    /// Exact Match
+    ICR_Exact_Match = 0,
+
+    /// Promotion
+    ICR_Promotion,
+
+    /// Conversion
+    ICR_Conversion,
+
+    /// OpenCL Scalar Widening
+    ICR_OCL_Scalar_Widening,
+
+    /// Complex <-> Real conversion
+    ICR_Complex_Real_Conversion,
+
+    /// ObjC ARC writeback conversion
+    ICR_Writeback_Conversion,
+
+    /// Conversion only allowed in the C standard (e.g. void* to char*).
+    ICR_C_Conversion,
+
+    /// Conversion not allowed by the C standard, but that we accept as an
+    /// extension anyway.
+    ICR_C_Conversion_Extension
   };
 
   ImplicitConversionRank GetConversionRank(ImplicitConversionKind Kind);
@@ -213,10 +302,12 @@
     DeclAccessPair FoundCopyConstructor;
 
     void setFromType(QualType T) { FromTypePtr = T.getAsOpaquePtr(); }
+
     void setToType(unsigned Idx, QualType T) { 
       assert(Idx < 3 && "To type index is out of range");
       ToTypePtrs[Idx] = T.getAsOpaquePtr(); 
     }
+
     void setAllToTypes(QualType T) {
       ToTypePtrs[0] = T.getAsOpaquePtr(); 
       ToTypePtrs[1] = ToTypePtrs[0];
@@ -226,6 +317,7 @@
     QualType getFromType() const {
       return QualType::getFromOpaquePtr(FromTypePtr);
     }
+
     QualType getToType(unsigned Idx) const {
       assert(Idx < 3 && "To type index is out of range");
       return QualType::getFromOpaquePtr(ToTypePtrs[Idx]);
@@ -294,7 +386,8 @@
 
   /// Represents an ambiguous user-defined conversion sequence.
   struct AmbiguousConversionSequence {
-    typedef SmallVector<std::pair<NamedDecl*, FunctionDecl*>, 4> ConversionSet;
+    using ConversionSet =
+        SmallVector<std::pair<NamedDecl *, FunctionDecl *>, 4>;
 
     void *FromTypePtr;
     void *ToTypePtr;
@@ -303,9 +396,11 @@
     QualType getFromType() const {
       return QualType::getFromOpaquePtr(FromTypePtr);
     }
+
     QualType getToType() const {
       return QualType::getFromOpaquePtr(ToTypePtr);
     }
+
     void setFromType(QualType T) { FromTypePtr = T.getAsOpaquePtr(); }
     void setToType(QualType T) { ToTypePtr = T.getAsOpaquePtr(); }
 
@@ -321,11 +416,13 @@
       conversions().push_back(std::make_pair(Found, D));
     }
 
-    typedef ConversionSet::iterator iterator;
+    using iterator = ConversionSet::iterator;
+
     iterator begin() { return conversions().begin(); }
     iterator end() { return conversions().end(); }
 
-    typedef ConversionSet::const_iterator const_iterator;
+    using const_iterator = ConversionSet::const_iterator;
+
     const_iterator begin() const { return conversions().begin(); }
     const_iterator end() const { return conversions().end(); }
 
@@ -362,6 +459,7 @@
       init(K, From->getType(), To);
       FromExpr = From;
     }
+
     void init(FailureKind K, QualType From, QualType To) {
       Kind = K;
       FromExpr = nullptr;
@@ -376,6 +474,7 @@
       FromExpr = E;
       setFromType(E->getType());
     }
+
     void setFromType(QualType T) { FromTy = T.getAsOpaquePtr(); }
     void setToType(QualType T) { ToTy = T.getAsOpaquePtr(); }
   };
@@ -442,13 +541,10 @@
         : ConversionKind(Uninitialized), StdInitializerListElement(false) {
       Standard.setAsIdentityConversion();
     }
-    ~ImplicitConversionSequence() {
-      destruct();
-    }
+
     ImplicitConversionSequence(const ImplicitConversionSequence &Other)
-      : ConversionKind(Other.ConversionKind),
-        StdInitializerListElement(Other.StdInitializerListElement)
-    {
+        : ConversionKind(Other.ConversionKind),
+          StdInitializerListElement(Other.StdInitializerListElement) {
       switch (ConversionKind) {
       case Uninitialized: break;
       case StandardConversion: Standard = Other.Standard; break;
@@ -460,12 +556,16 @@
     }
 
     ImplicitConversionSequence &
-        operator=(const ImplicitConversionSequence &Other) {
+    operator=(const ImplicitConversionSequence &Other) {
       destruct();
       new (this) ImplicitConversionSequence(Other);
       return *this;
     }
     
+    ~ImplicitConversionSequence() {
+      destruct();
+    }
+
     Kind getKind() const {
       assert(isInitialized() && "querying uninitialized conversion");
       return Kind(ConversionKind);
@@ -526,6 +626,7 @@
     void setStandard() { setKind(StandardConversion); }
     void setEllipsis() { setKind(EllipsisConversion); }
     void setUserDefined() { setKind(UserDefinedConversion); }
+
     void setAmbiguous() {
       if (ConversionKind == AmbiguousConversion) return;
       ConversionKind = AmbiguousConversion;
@@ -613,12 +714,16 @@
     /// This inherited constructor is not viable because it would slice the
     /// argument.
     ovl_fail_inhctor_slice,
+
+    /// This candidate was not viable because it is a non-default multiversioned
+    /// function.
+    ovl_non_default_multiversion_function,
   };
 
   /// A list of implicit conversion sequences for the arguments of an
   /// OverloadCandidate.
-  typedef llvm::MutableArrayRef<ImplicitConversionSequence>
-      ConversionSequenceList;
+  using ConversionSequenceList =
+      llvm::MutableArrayRef<ImplicitConversionSequence>;
 
   /// OverloadCandidate - A single candidate in an overload set (C++ 13.3).
   struct OverloadCandidate {
@@ -726,16 +831,19 @@
     enum CandidateSetKind {
       /// Normal lookup.
       CSK_Normal,
+
       /// C++ [over.match.oper]:
       /// Lookup of operator function candidates in a call using operator
       /// syntax. Candidates that have no parameters of class type will be
       /// skipped unless there is a parameter of (reference to) enum type and
       /// the corresponding argument is of the same enum type.
       CSK_Operator,
+
       /// C++ [over.match.copy]:
       /// Copy-initialization of an object of class type by user-defined
       /// conversion.
       CSK_InitByUserDefinedConversion,
+
       /// C++ [over.match.ctor], [over.match.list]
       /// Initialization of an object of class type by constructor,
       /// using either a parenthesized or braced list of arguments.
@@ -755,7 +863,7 @@
 
     constexpr static unsigned NumInlineBytes =
         24 * sizeof(ImplicitConversionSequence);
-    unsigned NumInlineBytesUsed;
+    unsigned NumInlineBytesUsed = 0;
     llvm::AlignedCharArray<alignof(void *), NumInlineBytes> InlineSpace;
 
     /// If we have space, allocates from inline storage. Otherwise, allocates
@@ -784,14 +892,13 @@
       return reinterpret_cast<T *>(FreeSpaceStart);
     }
 
-    OverloadCandidateSet(const OverloadCandidateSet &) = delete;
-    void operator=(const OverloadCandidateSet &) = delete;
-
     void destroyCandidates();
 
   public:
     OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK)
-        : Loc(Loc), Kind(CSK), NumInlineBytesUsed(0) {}
+        : Loc(Loc), Kind(CSK) {}
+    OverloadCandidateSet(const OverloadCandidateSet &) = delete;
+    OverloadCandidateSet &operator=(const OverloadCandidateSet &) = delete;
     ~OverloadCandidateSet() { destroyCandidates(); }
 
     SourceLocation getLocation() const { return Loc; }
@@ -806,7 +913,8 @@
     /// \brief Clear out all of the candidates.
     void clear(CandidateSetKind CSK);
 
-    typedef SmallVectorImpl<OverloadCandidate>::iterator iterator;
+    using iterator = SmallVectorImpl<OverloadCandidate>::iterator;
+
     iterator begin() { return Candidates.begin(); }
     iterator end() { return Candidates.end(); }
 
@@ -865,8 +973,10 @@
     DeclAccessPair FoundDecl;
     CXXConstructorDecl *Constructor;
     FunctionTemplateDecl *ConstructorTmpl;
+
     explicit operator bool() const { return Constructor; }
   };
+
   // FIXME: Add an AddOverloadCandidate / AddTemplateOverloadCandidate overload
   // that takes one of these.
   inline ConstructorInfo getConstructorInfo(NamedDecl *ND) {
@@ -884,6 +994,7 @@
     Info.Constructor = dyn_cast<CXXConstructorDecl>(D);
     return Info;
   }
-} // end namespace clang
+
+} // namespace clang
 
 #endif // LLVM_CLANG_SEMA_OVERLOAD_H
diff --git a/include/clang/Sema/Ownership.h b/include/clang/Sema/Ownership.h
index 1e35316..11382c3 100644
--- a/include/clang/Sema/Ownership.h
+++ b/include/clang/Sema/Ownership.h
@@ -1,4 +1,4 @@
-//===--- Ownership.h - Parser ownership helpers -----------------*- C++ -*-===//
+//===- Ownership.h - Parser ownership helpers -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,21 +17,27 @@
 #include "clang/AST/Expr.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 
 //===----------------------------------------------------------------------===//
 // OpaquePtr
 //===----------------------------------------------------------------------===//
 
 namespace clang {
-  class CXXCtorInitializer;
-  class CXXBaseSpecifier;
-  class Decl;
-  class Expr;
-  class ParsedTemplateArgument;
-  class QualType;
-  class Stmt;
-  class TemplateName;
-  class TemplateParameterList;
+
+class CXXBaseSpecifier;
+class CXXCtorInitializer;
+class Decl;
+class Expr;
+class ParsedTemplateArgument;
+class QualType;
+class Stmt;
+class TemplateName;
+class TemplateParameterList;
 
   /// \brief Wrapper for void* pointer.
   /// \tparam PtrTy Either a pointer type like 'T*' or a type that behaves like
@@ -44,9 +50,10 @@
   template <class PtrTy>
   class OpaquePtr {
     void *Ptr = nullptr;
+
     explicit OpaquePtr(void *Ptr) : Ptr(Ptr) {}
 
-    typedef llvm::PointerLikeTypeTraits<PtrTy> Traits;
+    using Traits = llvm::PointerLikeTypeTraits<PtrTy>;
 
   public:
     OpaquePtr(std::nullptr_t = nullptr) {}
@@ -103,26 +110,32 @@
       return *this;
     }
   };
-}
+
+} // namespace clang
 
 namespace llvm {
+
   template <class T>
-  struct PointerLikeTypeTraits<clang::OpaquePtr<T> > {
+  struct PointerLikeTypeTraits<clang::OpaquePtr<T>> {
+    enum { NumLowBitsAvailable = 0 };
+
     static inline void *getAsVoidPointer(clang::OpaquePtr<T> P) {
       // FIXME: Doesn't work? return P.getAs< void >();
       return P.getAsOpaquePtr();
     }
+
     static inline clang::OpaquePtr<T> getFromVoidPointer(void *P) {
       return clang::OpaquePtr<T>::getFromOpaquePtr(P);
     }
-    enum { NumLowBitsAvailable = 0 };
   };
 
   template <class T>
-  struct isPodLike<clang::OpaquePtr<T> > { static const bool value = true; };
-}
+  struct isPodLike<clang::OpaquePtr<T>> { static const bool value = true; };
+
+} // namespace llvm
 
 namespace clang {
+
   // Basic
   class DiagnosticBuilder;
 
@@ -146,8 +159,7 @@
     bool Invalid;
 
   public:
-    ActionResult(bool Invalid = false)
-      : Val(PtrTy()), Invalid(Invalid) {}
+    ActionResult(bool Invalid = false) : Val(PtrTy()), Invalid(Invalid) {}
     ActionResult(PtrTy val) : Val(val), Invalid(false) {}
     ActionResult(const DiagnosticBuilder &) : Val(PtrTy()), Invalid(true) {}
 
@@ -178,17 +190,20 @@
     // A pointer whose low bit is 1 if this result is invalid, 0
     // otherwise.
     uintptr_t PtrWithInvalid;
-    typedef llvm::PointerLikeTypeTraits<PtrTy> PtrTraits;
+
+    using PtrTraits = llvm::PointerLikeTypeTraits<PtrTy>;
+
   public:
     ActionResult(bool Invalid = false)
-      : PtrWithInvalid(static_cast<uintptr_t>(Invalid)) { }
+        : PtrWithInvalid(static_cast<uintptr_t>(Invalid)) {}
 
     ActionResult(PtrTy V) {
       void *VP = PtrTraits::getAsVoidPointer(V);
       PtrWithInvalid = reinterpret_cast<uintptr_t>(VP);
       assert((PtrWithInvalid & 0x01) == 0 && "Badly aligned pointer");
     }
-    ActionResult(const DiagnosticBuilder &) : PtrWithInvalid(0x01) { }
+
+    ActionResult(const DiagnosticBuilder &) : PtrWithInvalid(0x01) {}
 
     // These two overloads prevent void* -> bool conversions.
     ActionResult(const void *) = delete;
@@ -202,6 +217,7 @@
       void *VP = reinterpret_cast<void *>(PtrWithInvalid & ~0x01);
       return PtrTraits::getFromVoidPointer(VP);
     }
+
     template <typename T> T *getAs() { return static_cast<T*>(get()); }
 
     void set(PtrTy V) {
@@ -229,8 +245,8 @@
 
   /// An opaque type for threading parsed type information through the
   /// parser.
-  typedef OpaquePtr<QualType> ParsedType;
-  typedef UnionOpaquePtr<QualType> UnionParsedType;
+  using ParsedType = OpaquePtr<QualType>;
+  using UnionParsedType = UnionOpaquePtr<QualType>;
 
   // We can re-use the low bit of expression, statement, base, and
   // member-initializer pointers for the "invalid" flag of
@@ -248,21 +264,21 @@
     static const bool value = true;
   };
 
-  typedef ActionResult<Expr*> ExprResult;
-  typedef ActionResult<Stmt*> StmtResult;
-  typedef ActionResult<ParsedType> TypeResult;
-  typedef ActionResult<CXXBaseSpecifier*> BaseResult;
-  typedef ActionResult<CXXCtorInitializer*> MemInitResult;
+  using ExprResult = ActionResult<Expr *>;
+  using StmtResult = ActionResult<Stmt *>;
+  using TypeResult = ActionResult<ParsedType>;
+  using BaseResult = ActionResult<CXXBaseSpecifier *>;
+  using MemInitResult = ActionResult<CXXCtorInitializer *>;
 
-  typedef ActionResult<Decl*> DeclResult;
-  typedef OpaquePtr<TemplateName> ParsedTemplateTy;
-  typedef UnionOpaquePtr<TemplateName> UnionParsedTemplateTy;
+  using DeclResult = ActionResult<Decl *>;
+  using ParsedTemplateTy = OpaquePtr<TemplateName>;
+  using UnionParsedTemplateTy = UnionOpaquePtr<TemplateName>;
 
-  typedef MutableArrayRef<Expr*> MultiExprArg;
-  typedef MutableArrayRef<Stmt*> MultiStmtArg;
-  typedef MutableArrayRef<ParsedTemplateArgument> ASTTemplateArgsPtr;
-  typedef MutableArrayRef<ParsedType> MultiTypeArg;
-  typedef MutableArrayRef<TemplateParameterList*> MultiTemplateParamsArg;
+  using MultiExprArg = MutableArrayRef<Expr *>;
+  using MultiStmtArg = MutableArrayRef<Stmt *>;
+  using ASTTemplateArgsPtr = MutableArrayRef<ParsedTemplateArgument>;
+  using MultiTypeArg = MutableArrayRef<ParsedType>;
+  using MultiTemplateParamsArg = MutableArrayRef<TemplateParameterList *>;
 
   inline ExprResult ExprError() { return ExprResult(true); }
   inline StmtResult StmtError() { return StmtResult(true); }
@@ -282,6 +298,7 @@
     assert(!R.isInvalid() && "operation was asserted to never fail!");
     return R.get();
   }
-}
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_OWNERSHIP_H
diff --git a/include/clang/Sema/ParsedTemplate.h b/include/clang/Sema/ParsedTemplate.h
index 01a4ab3..e980e4d 100644
--- a/include/clang/Sema/ParsedTemplate.h
+++ b/include/clang/Sema/ParsedTemplate.h
@@ -199,8 +199,7 @@
            SourceLocation LAngleLoc, SourceLocation RAngleLoc,
            ArrayRef<ParsedTemplateArgument> TemplateArgs,
            SmallVectorImpl<TemplateIdAnnotation *> &CleanupList) {
-
-      TemplateIdAnnotation *TemplateId = new (std::malloc(
+      TemplateIdAnnotation *TemplateId = new (llvm::safe_malloc(
           totalSizeToAlloc<ParsedTemplateArgument>(TemplateArgs.size())))
           TemplateIdAnnotation(SS, TemplateKWLoc, TemplateNameLoc, Name,
                                OperatorKind, OpaqueTemplateName, TemplateKind,
diff --git a/include/clang/Sema/Scope.h b/include/clang/Sema/Scope.h
index e892d82..ba32784 100644
--- a/include/clang/Sema/Scope.h
+++ b/include/clang/Sema/Scope.h
@@ -1,4 +1,4 @@
-//===--- Scope.h - Scope interface ------------------------------*- C++ -*-===//
+//===- Scope.h - Scope interface --------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,28 +14,29 @@
 #ifndef LLVM_CLANG_SEMA_SCOPE_H
 #define LLVM_CLANG_SEMA_SCOPE_H
 
-#include "clang/AST/Decl.h"
 #include "clang/Basic/Diagnostic.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
 
 namespace llvm {
 
 class raw_ostream;
 
-}
+} // namespace llvm
 
 namespace clang {
 
 class Decl;
+class DeclContext;
 class UsingDirectiveDecl;
 class VarDecl;
 
 /// Scope - A scope is a transient data structure that is used while parsing the
 /// program.  It assists with resolving identifiers to the appropriate
 /// declaration.
-///
 class Scope {
 public:
   /// ScopeFlags - These are bitfields that are or'd together when creating a
@@ -127,7 +128,11 @@
 
     /// This is a compound statement scope.
     CompoundStmtScope = 0x400000,
+
+    /// We are between inheritance colon and the real class/struct definition scope.
+    ClassInheritanceScope = 0x800000,
   };
+
 private:
   /// The parent scope for this scope.  This is null for the translation-unit
   /// scope.
@@ -182,7 +187,7 @@
   /// popped, these declarations are removed from the IdentifierTable's notion
   /// of current declaration.  It is up to the current Action implementation to
   /// implement these semantics.
-  typedef llvm::SmallPtrSet<Decl *, 32> DeclSetTy;
+  using DeclSetTy = llvm::SmallPtrSet<Decl *, 32>;
   DeclSetTy DeclsInScope;
 
   /// The DeclContext with which this scope is associated. For
@@ -190,7 +195,7 @@
   /// entity of a function scope is a function, etc.
   DeclContext *Entity;
 
-  typedef SmallVector<UsingDirectiveDecl *, 2> UsingDirectivesTy;
+  using UsingDirectivesTy = SmallVector<UsingDirectiveDecl *, 2>;
   UsingDirectivesTy UsingDirectives;
 
   /// \brief Used to determine if errors occurred in this scope.
@@ -204,25 +209,23 @@
 
 public:
   Scope(Scope *Parent, unsigned ScopeFlags, DiagnosticsEngine &Diag)
-    : ErrorTrap(Diag) {
+      : ErrorTrap(Diag) {
     Init(Parent, ScopeFlags);
   }
 
   /// getFlags - Return the flags for this scope.
-  ///
   unsigned getFlags() const { return Flags; }
+
   void setFlags(unsigned F) { setFlags(getParent(), F); }
 
   /// isBlockScope - Return true if this scope correspond to a closure.
   bool isBlockScope() const { return Flags & BlockScope; }
 
   /// getParent - Return the scope that this is nested in.
-  ///
   const Scope *getParent() const { return AnyParent; }
   Scope *getParent() { return AnyParent; }
 
   /// getFnParent - Return the closest scope that is a function body.
-  ///
   const Scope *getFnParent() const { return FnParent; }
   Scope *getFnParent() { return FnParent; }
 
@@ -256,6 +259,9 @@
   Scope *getTemplateParamParent() { return TemplateParamParent; }
   const Scope *getTemplateParamParent() const { return TemplateParamParent; }
 
+  /// Returns the depth of this scope. The translation-unit has scope depth 0.
+  unsigned getDepth() const { return Depth; }
+
   /// Returns the number of function prototype scopes in this scope
   /// chain.
   unsigned getFunctionPrototypeDepth() const {
@@ -269,10 +275,12 @@
     return PrototypeIndex++;
   }
 
-  typedef llvm::iterator_range<DeclSetTy::iterator> decl_range;
+  using decl_range = llvm::iterator_range<DeclSetTy::iterator>;
+
   decl_range decls() const {
     return decl_range(DeclsInScope.begin(), DeclsInScope.end());
   }
+
   bool decl_empty() const { return DeclsInScope.empty(); }
 
   void AddDecl(Decl *D) {
@@ -362,7 +370,6 @@
     return false;
   }
 
-  
   /// isTemplateParamScope - Return true if this scope is a C++
   /// template parameter scope.
   bool isTemplateParamScope() const {
@@ -451,8 +458,8 @@
     UsingDirectives.push_back(UDir);
   }
 
-  typedef llvm::iterator_range<UsingDirectivesTy::iterator>
-    using_directives_range;
+  using using_directives_range =
+      llvm::iterator_range<UsingDirectivesTy::iterator>;
 
   using_directives_range using_directives() {
     return using_directives_range(UsingDirectives.begin(),
@@ -471,25 +478,23 @@
   }
 
   void setNoNRVO() {
-    NRVO.setInt(1);
+    NRVO.setInt(true);
     NRVO.setPointer(nullptr);
   }
 
   void mergeNRVOIntoParent();
 
   /// Init - This is used by the parser to implement scope caching.
-  ///
   void Init(Scope *parent, unsigned flags);
 
   /// \brief Sets up the specified scope flags and adjusts the scope state
   /// variables accordingly.
-  ///
   void AddFlags(unsigned Flags);
 
   void dumpImpl(raw_ostream &OS) const;
   void dump() const;
 };
 
-}  // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_SCOPE_H
diff --git a/include/clang/Sema/ScopeInfo.h b/include/clang/Sema/ScopeInfo.h
index 6bb667d..35fdd74 100644
--- a/include/clang/Sema/ScopeInfo.h
+++ b/include/clang/Sema/ScopeInfo.h
@@ -1,4 +1,4 @@
-//===--- ScopeInfo.h - Information about a semantic context -----*- C++ -*-===//
+//===- ScopeInfo.h - Information about a semantic context -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,35 +18,46 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CapturedStmt.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Sema/CleanupInfo.h"
-#include "clang/Sema/Ownership.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
+#include <cassert>
+#include <utility>
 
 namespace clang {
 
-class Decl;
 class BlockDecl;
 class CapturedDecl;
 class CXXMethodDecl;
-class FieldDecl;
-class ObjCPropertyDecl;
-class IdentifierInfo;
+class CXXRecordDecl;
 class ImplicitParamDecl;
-class LabelDecl;
+class NamedDecl;
+class ObjCIvarRefExpr;
+class ObjCMessageExpr;
+class ObjCPropertyDecl;
+class ObjCPropertyRefExpr;
+class ParmVarDecl;
+class RecordDecl;
 class ReturnStmt;
 class Scope;
+class Stmt;
 class SwitchStmt;
-class TemplateTypeParmDecl;
 class TemplateParameterList;
+class TemplateTypeParmDecl;
 class VarDecl;
-class ObjCIvarRefExpr;
-class ObjCPropertyRefExpr;
-class ObjCMessageExpr;
 
 namespace sema {
 
@@ -54,12 +65,15 @@
 /// parsed.
 class CompoundScopeInfo {
 public:
-  CompoundScopeInfo()
-    : HasEmptyLoopBodies(false) { }
-
   /// \brief Whether this compound stamement contains `for' or `while' loops
   /// with empty bodies.
-  bool HasEmptyLoopBodies;
+  bool HasEmptyLoopBodies = false;
+
+  /// \brief Whether this compound statement corresponds to a GNU statement
+  /// expression.
+  bool IsStmtExpr;
+
+  CompoundScopeInfo(bool IsStmtExpr) : IsStmtExpr(IsStmtExpr) {}
 
   void setHasEmptyLoopBodies() {
     HasEmptyLoopBodies = true;
@@ -74,7 +88,7 @@
   
   PossiblyUnreachableDiag(const PartialDiagnostic &PD, SourceLocation Loc,
                           const Stmt *stmt)
-    : PD(PD), Loc(Loc), stmt(stmt) {}
+      : PD(PD), Loc(Loc), stmt(stmt) {}
 };
     
 /// \brief Retains information about a function, method, or block that is
@@ -90,7 +104,6 @@
   
 public:
   /// \brief What kind of scope we are describing.
-  ///
   ScopeKind Kind : 3;
 
   /// \brief Whether this function contains a VLA, \@try, try, C++
@@ -123,6 +136,7 @@
 
   /// True when this is a method marked as a designated initializer.
   bool ObjCIsDesignatedInit : 1;
+
   /// This starts true for a method marked as designated initializer and will
   /// be set to false if there is an invocation to a designated initializer of
   /// the super class.
@@ -132,6 +146,7 @@
   /// initializer within a class that has at least one initializer marked as a
   /// designated initializer.
   bool ObjCIsSecondaryInit : 1;
+
   /// This starts true for a secondary initializer method and will be set to
   /// false if there is an invocation of an initializer on 'self'.
   bool ObjCWarnForNoInitDelegation : 1;
@@ -172,6 +187,10 @@
   /// \brief The promise object for this coroutine, if any.
   VarDecl *CoroutinePromise = nullptr;
 
+  /// \brief A mapping between the coroutine function parameters that were moved
+  /// to the coroutine frame, and their move statements.
+  llvm::SmallMapVector<ParmVarDecl *, Stmt *, 4> CoroutineParameterMoves;
+
   /// \brief The initial and final coroutine suspend points.
   std::pair<Stmt *, Stmt *> CoroutineSuspends;
 
@@ -186,7 +205,7 @@
   
   /// \brief A list of parameters which have the nonnull attribute and are
   /// modified in the function.
-  llvm::SmallPtrSet<const ParmVarDecl*, 8> ModifiedNonNullParams;
+  llvm::SmallPtrSet<const ParmVarDecl *, 8> ModifiedNonNullParams;
 
 public:
   /// Represents a simple identification of a weak object.
@@ -218,14 +237,14 @@
     /// identify the object in memory.
     ///
     /// \sa isExactProfile()
-    typedef llvm::PointerIntPair<const NamedDecl *, 1, bool> BaseInfoTy;
+    using BaseInfoTy = llvm::PointerIntPair<const NamedDecl *, 1, bool>;
     BaseInfoTy Base;
 
     /// The "property" decl, as described in the class documentation.
     ///
     /// Note that this may not actually be an ObjCPropertyDecl, e.g. in the
     /// case of "implicit" properties (regular methods accessed via dot syntax).
-    const NamedDecl *Property;
+    const NamedDecl *Property = nullptr;
 
     /// Used to find the proper base profile for a given base expression.
     static BaseInfoTy getBaseInfo(const Expr *BaseE);
@@ -270,12 +289,14 @@
       static inline WeakObjectProfileTy getEmptyKey() {
         return WeakObjectProfileTy();
       }
+
       static inline WeakObjectProfileTy getTombstoneKey() {
         return WeakObjectProfileTy::getSentinel();
       }
 
       static unsigned getHashValue(const WeakObjectProfileTy &Val) {
-        typedef std::pair<BaseInfoTy, const NamedDecl *> Pair;
+        using Pair = std::pair<BaseInfoTy, const NamedDecl *>;
+
         return llvm::DenseMapInfo<Pair>::getHashValue(Pair(Val.Base,
                                                            Val.Property));
       }
@@ -295,6 +316,7 @@
   /// Part of the implementation of -Wrepeated-use-of-weak.
   class WeakUseTy {
     llvm::PointerIntPair<const Expr *, 1, bool> Rep;
+
   public:
     WeakUseTy(const Expr *Use, bool IsRead) : Rep(Use, IsRead) {}
 
@@ -310,14 +332,14 @@
   /// Used to collect uses of a particular weak object in a function body.
   ///
   /// Part of the implementation of -Wrepeated-use-of-weak.
-  typedef SmallVector<WeakUseTy, 4> WeakUseVector;
+  using WeakUseVector = SmallVector<WeakUseTy, 4>;
 
   /// Used to collect all uses of weak objects in a function body.
   ///
   /// Part of the implementation of -Wrepeated-use-of-weak.
-  typedef llvm::SmallDenseMap<WeakObjectProfileTy, WeakUseVector, 8,
-                              WeakObjectProfileTy::DenseMapInfo>
-          WeakObjectUseMap;
+  using WeakObjectUseMap =
+      llvm::SmallDenseMap<WeakObjectProfileTy, WeakUseVector, 8,
+                          WeakObjectProfileTy::DenseMapInfo>;
 
 private:
   /// Used to collect all uses of weak objects in this function body.
@@ -329,6 +351,18 @@
   FunctionScopeInfo(const FunctionScopeInfo&) = default;
 
 public:
+  FunctionScopeInfo(DiagnosticsEngine &Diag)
+      : Kind(SK_Function), HasBranchProtectedScope(false),
+        HasBranchIntoScope(false), HasIndirectGoto(false),
+        HasDroppedStmt(false), HasOMPDeclareReductionCombiner(false),
+        HasFallthroughStmt(false), HasPotentialAvailabilityViolations(false),
+        ObjCShouldCallSuper(false), ObjCIsDesignatedInit(false),
+        ObjCWarnForNoDesignatedInitChain(false), ObjCIsSecondaryInit(false),
+        ObjCWarnForNoInitDelegation(false), NeedsCoroutineSuspends(true),
+        ErrorTrap(Diag) {}
+
+  virtual ~FunctionScopeInfo();
+
   /// Record that a weak object was accessed.
   ///
   /// Part of the implementation of -Wrepeated-use-of-weak.
@@ -430,25 +464,6 @@
     CoroutineSuspends.second = Final;
   }
 
-  FunctionScopeInfo(DiagnosticsEngine &Diag)
-    : Kind(SK_Function),
-      HasBranchProtectedScope(false),
-      HasBranchIntoScope(false),
-      HasIndirectGoto(false),
-      HasDroppedStmt(false),
-      HasOMPDeclareReductionCombiner(false),
-      HasFallthroughStmt(false),
-      HasPotentialAvailabilityViolations(false),
-      ObjCShouldCallSuper(false),
-      ObjCIsDesignatedInit(false),
-      ObjCWarnForNoDesignatedInitChain(false),
-      ObjCIsSecondaryInit(false),
-      ObjCWarnForNoInitDelegation(false),
-      NeedsCoroutineSuspends(true),
-      ErrorTrap(Diag) { }
-
-  virtual ~FunctionScopeInfo();
-
   /// \brief Clear out the information in this function scope, making it
   /// suitable for reuse.
   void Clear();
@@ -487,9 +502,11 @@
       IsNestedCapture = 0x1,
       IsThisCaptured = 0x2
     };
+
     /// The variable being captured (if we are not capturing 'this') and whether
     /// this is a nested capture, and whether we are capturing 'this'
     llvm::PointerIntPair<VarDecl*, 2> VarAndNestedAndThis;
+
     /// Expression to initialize a field of the given type, and the kind of
     /// capture (if this is a capture and not an init-capture). The expression
     /// is only required if we are capturing ByVal and the variable's type has
@@ -508,11 +525,11 @@
 
     /// \brief Whether an explicit capture has been odr-used in the body of the
     /// lambda.
-    bool ODRUsed;
+    bool ODRUsed = false;
 
     /// \brief Whether an explicit capture has been non-odr-used in the body of
     /// the lambda.
-    bool NonODRUsed;
+    bool NonODRUsed = false;
 
   public:
     Capture(VarDecl *Var, bool Block, bool ByRef, bool IsNested,
@@ -522,8 +539,7 @@
           InitExprAndCaptureKind(
               Cpy, !Var ? Cap_VLA : Block ? Cap_Block : ByRef ? Cap_ByRef
                                                               : Cap_ByCopy),
-          Loc(Loc), EllipsisLoc(EllipsisLoc), CaptureType(CaptureType),
-          ODRUsed(false), NonODRUsed(false) {}
+          Loc(Loc), EllipsisLoc(EllipsisLoc), CaptureType(CaptureType) {}
 
     enum IsThisCapture { ThisCapture };
     Capture(IsThisCapture, bool IsNested, SourceLocation Loc,
@@ -531,35 +547,42 @@
         : VarAndNestedAndThis(
               nullptr, (IsThisCaptured | (IsNested ? IsNestedCapture : 0))),
           InitExprAndCaptureKind(Cpy, ByCopy ? Cap_ByCopy : Cap_ByRef),
-          Loc(Loc), EllipsisLoc(), CaptureType(CaptureType), ODRUsed(false),
-          NonODRUsed(false) {}
+          Loc(Loc), CaptureType(CaptureType) {}
 
     bool isThisCapture() const {
       return VarAndNestedAndThis.getInt() & IsThisCaptured;
     }
+
     bool isVariableCapture() const {
       return !isThisCapture() && !isVLATypeCapture();
     }
+
     bool isCopyCapture() const {
       return InitExprAndCaptureKind.getInt() == Cap_ByCopy;
     }
+
     bool isReferenceCapture() const {
       return InitExprAndCaptureKind.getInt() == Cap_ByRef;
     }
+
     bool isBlockCapture() const {
       return InitExprAndCaptureKind.getInt() == Cap_Block;
     }
+
     bool isVLATypeCapture() const {
       return InitExprAndCaptureKind.getInt() == Cap_VLA;
     }
+
     bool isNested() const {
       return VarAndNestedAndThis.getInt() & IsNestedCapture;
     }
+
     bool isODRUsed() const { return ODRUsed; }
     bool isNonODRUsed() const { return NonODRUsed; }
     void markUsed(bool IsODRUse) { (IsODRUse ? ODRUsed : NonODRUsed) = true; }
 
     VarDecl *getVariable() const {
+      assert(isVariableCapture());
       return VarAndNestedAndThis.getPointer();
     }
     
@@ -585,23 +608,21 @@
   };
 
   CapturingScopeInfo(DiagnosticsEngine &Diag, ImplicitCaptureStyle Style)
-    : FunctionScopeInfo(Diag), ImpCaptureStyle(Style), CXXThisCaptureIndex(0),
-      HasImplicitReturnType(false)
-     {}
+      : FunctionScopeInfo(Diag), ImpCaptureStyle(Style) {}
 
   /// CaptureMap - A map of captured variables to (index+1) into Captures.
   llvm::DenseMap<VarDecl*, unsigned> CaptureMap;
 
   /// CXXThisCaptureIndex - The (index+1) of the capture of 'this';
   /// zero if 'this' is not captured.
-  unsigned CXXThisCaptureIndex;
+  unsigned CXXThisCaptureIndex = 0;
 
   /// Captures - The captures.
   SmallVector<Capture, 4> Captures;
 
   /// \brief - Whether the target type of return statements in this context
   /// is deduced (e.g. a lambda or block with omitted return type).
-  bool HasImplicitReturnType;
+  bool HasImplicitReturnType = false;
 
   /// ReturnType - The target type of return statements in this context,
   /// or null if unknown.
@@ -679,9 +700,8 @@
   QualType FunctionType;
 
   BlockScopeInfo(DiagnosticsEngine &Diag, Scope *BlockScope, BlockDecl *Block)
-    : CapturingScopeInfo(Diag, ImpCap_Block), TheDecl(Block),
-      TheScope(BlockScope)
-  {
+      : CapturingScopeInfo(Diag, ImpCap_Block), TheDecl(Block),
+        TheScope(BlockScope) {
     Kind = SK_Block;
   }
 
@@ -697,23 +717,27 @@
 public:
   /// \brief The CapturedDecl for this statement.
   CapturedDecl *TheCapturedDecl;
+
   /// \brief The captured record type.
   RecordDecl *TheRecordDecl;
+
   /// \brief This is the enclosing scope of the captured region.
   Scope *TheScope;
+
   /// \brief The implicit parameter for the captured variables.
   ImplicitParamDecl *ContextParam;
+
   /// \brief The kind of captured region.
   unsigned short CapRegionKind;
+
   unsigned short OpenMPLevel;
 
   CapturedRegionScopeInfo(DiagnosticsEngine &Diag, Scope *S, CapturedDecl *CD,
                           RecordDecl *RD, ImplicitParamDecl *Context,
                           CapturedRegionKind K, unsigned OpenMPLevel)
-    : CapturingScopeInfo(Diag, ImpCap_CapturedRegion),
-      TheCapturedDecl(CD), TheRecordDecl(RD), TheScope(S),
-      ContextParam(Context), CapRegionKind(K), OpenMPLevel(OpenMPLevel)
-  {
+      : CapturingScopeInfo(Diag, ImpCap_CapturedRegion),
+        TheCapturedDecl(CD), TheRecordDecl(RD), TheScope(S),
+        ContextParam(Context), CapRegionKind(K), OpenMPLevel(OpenMPLevel) {
     Kind = SK_CapturedRegion;
   }
 
@@ -738,10 +762,10 @@
 class LambdaScopeInfo final : public CapturingScopeInfo {
 public:
   /// \brief The class that describes the lambda.
-  CXXRecordDecl *Lambda;
+  CXXRecordDecl *Lambda = nullptr;
 
   /// \brief The lambda's compiler-generated \c operator().
-  CXXMethodDecl *CallOperator;
+  CXXMethodDecl *CallOperator = nullptr;
 
   /// \brief Source range covering the lambda introducer [...].
   SourceRange IntroducerRange;
@@ -752,23 +776,23 @@
 
   /// \brief The number of captures in the \c Captures list that are
   /// explicit captures.
-  unsigned NumExplicitCaptures;
+  unsigned NumExplicitCaptures = 0;
 
   /// \brief Whether this is a mutable lambda.
-  bool Mutable;
+  bool Mutable = false;
 
   /// \brief Whether the (empty) parameter list is explicit.
-  bool ExplicitParams;
+  bool ExplicitParams = false;
 
   /// \brief Whether any of the capture expressions requires cleanups.
   CleanupInfo Cleanup;
 
   /// \brief Whether the lambda contains an unexpanded parameter pack.
-  bool ContainsUnexpandedParameterPack;
+  bool ContainsUnexpandedParameterPack = false;
 
   /// \brief If this is a generic lambda, use this as the depth of 
   /// each 'auto' parameter, during initial AST construction.
-  unsigned AutoTemplateParameterDepth;
+  unsigned AutoTemplateParameterDepth = 0;
 
   /// \brief Store the list of the auto parameters for a generic lambda.
   /// If this is a generic lambda, store the list of the auto 
@@ -780,7 +804,7 @@
   /// If this is a generic lambda, and the template parameter
   /// list has been created (from the AutoTemplateParams) then
   /// store a reference to it (cache it to avoid reconstructing it).
-  TemplateParameterList *GLTemplateParameterList;
+  TemplateParameterList *GLTemplateParameterList = nullptr;
   
   /// \brief Contains all variable-referring-expressions (i.e. DeclRefExprs
   ///  or MemberExprs) that refer to local variables in a generic lambda
@@ -789,13 +813,12 @@
   ///  Potentially capturable variables of a nested lambda that might need 
   ///   to be captured by the lambda are housed here.  
   ///  This is specifically useful for generic lambdas or
-  ///  lambdas within a a potentially evaluated-if-used context.
+  ///  lambdas within a potentially evaluated-if-used context.
   ///  If an enclosing variable is named in an expression of a lambda nested
   ///  within a generic lambda, we don't always know know whether the variable 
   ///  will truly be odr-used (i.e. need to be captured) by that nested lambda,
   ///  until its instantiation. But we still need to capture it in the 
   ///  enclosing lambda if all intervening lambdas can capture the variable.
-
   llvm::SmallVector<Expr*, 4> PotentiallyCapturingExprs;
 
   /// \brief Contains all variable-referring-expressions that refer
@@ -816,11 +839,7 @@
   SourceLocation PotentialThisCaptureLocation;
 
   LambdaScopeInfo(DiagnosticsEngine &Diag)
-    : CapturingScopeInfo(Diag, ImpCap_None), Lambda(nullptr),
-      CallOperator(nullptr), NumExplicitCaptures(0), Mutable(false),
-      ExplicitParams(false), Cleanup{},
-      ContainsUnexpandedParameterPack(false), AutoTemplateParameterDepth(0),
-      GLTemplateParameterList(nullptr) {
+      : CapturingScopeInfo(Diag, ImpCap_None) {
     Kind = SK_Lambda;
   }
 
@@ -839,7 +858,6 @@
     return !AutoTemplateParams.empty() || GLTemplateParameterList;
   }
 
-  ///
   /// \brief Add a variable that might potentially be captured by the 
   /// lambda and therefore the enclosing lambdas. 
   /// 
@@ -865,6 +883,7 @@
   void addPotentialThisCapture(SourceLocation Loc) {
     PotentialThisCaptureLocation = Loc;
   }
+
   bool hasPotentialThisCapture() const { 
     return PotentialThisCaptureLocation.isValid(); 
   }
@@ -908,7 +927,6 @@
   ///  seemingly harmless change elsewhere in Sema could cause us to start or stop
   ///  building such a node. So we need a rule that anyone can implement and get
   ///  exactly the same result".
-  ///    
   void markVariableExprAsNonODRUsed(Expr *CapturingVarExpr) {
     assert(isa<DeclRefExpr>(CapturingVarExpr) 
         || isa<MemberExpr>(CapturingVarExpr));
@@ -944,7 +962,7 @@
 };
 
 FunctionScopeInfo::WeakObjectProfileTy::WeakObjectProfileTy()
-  : Base(nullptr, false), Property(nullptr) {}
+    : Base(nullptr, false) {}
 
 FunctionScopeInfo::WeakObjectProfileTy
 FunctionScopeInfo::WeakObjectProfileTy::getSentinel() {
@@ -969,7 +987,8 @@
   CXXThisCaptureIndex = Captures.size();
 }
 
-} // end namespace sema
-} // end namespace clang
+} // namespace sema
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SCOPEINFO_H
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 368ae18..35599bf 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -172,6 +172,7 @@
   class TemplateArgumentList;
   class TemplateArgumentLoc;
   class TemplateDecl;
+  class TemplateInstantiationCallback;
   class TemplateParameterList;
   class TemplatePartialOrderingContext;
   class TemplateTemplateParmDecl;
@@ -1339,7 +1340,7 @@
       getCurFunction()->recordUseOfWeak(E, IsRead);
   }
 
-  void PushCompoundScope();
+  void PushCompoundScope(bool IsStmtExpr);
   void PopCompoundScope();
 
   sema::CompoundScopeInfo &getCurCompoundScope() const;
@@ -1459,6 +1460,7 @@
       const PartialDiagnostic &DiagID, const PartialDiagnostic & NoteID,
       const FunctionProtoType *Old, SourceLocation OldLoc,
       const FunctionProtoType *New, SourceLocation NewLoc);
+  bool handlerCanCatch(QualType HandlerType, QualType ExceptionType);
   bool CheckExceptionSpecSubset(const PartialDiagnostic &DiagID,
                                 const PartialDiagnostic &NestedDiagID,
                                 const PartialDiagnostic &NoteID,
@@ -1555,7 +1557,7 @@
   /// visible at the specified location.
   void makeMergedDefinitionVisible(NamedDecl *ND);
 
-  bool isModuleVisible(Module *M) { return VisibleModules.isVisible(M); }
+  bool isModuleVisible(const Module *M) { return VisibleModules.isVisible(M); }
 
   /// Determine whether a declaration is visible to name lookup.
   bool isVisible(const NamedDecl *D) {
@@ -1724,7 +1726,6 @@
     ExprResult Expr;
     TemplateName Template;
     ParsedType Type;
-    const IdentifierInfo *Keyword;
 
     explicit NameClassification(NameClassificationKind Kind) : Kind(Kind) {}
 
@@ -1733,8 +1734,7 @@
 
     NameClassification(ParsedType Type) : Kind(NC_Type), Type(Type) {}
 
-    NameClassification(const IdentifierInfo *Keyword)
-      : Kind(NC_Keyword), Keyword(Keyword) { }
+    NameClassification(const IdentifierInfo *Keyword) : Kind(NC_Keyword) {}
 
     static NameClassification Error() {
       return NameClassification(NC_Error);
@@ -2237,7 +2237,17 @@
 
   bool CheckNontrivialField(FieldDecl *FD);
   void DiagnoseNontrivial(const CXXRecordDecl *Record, CXXSpecialMember CSM);
+
+  enum TrivialABIHandling {
+    /// The triviality of a method unaffected by "trivial_abi".
+    TAH_IgnoreTrivialABI,
+
+    /// The triviality of a method affected by "trivial_abi".
+    TAH_ConsiderTrivialABI
+  };
+
   bool SpecialMemberIsTrivial(CXXMethodDecl *MD, CXXSpecialMember CSM,
+                              TrivialABIHandling TAH = TAH_IgnoreTrivialABI,
                               bool Diagnose = false);
   CXXSpecialMember getSpecialMember(const CXXMethodDecl *MD);
   void ActOnLastBitfield(SourceLocation DeclStart,
@@ -2305,8 +2315,7 @@
                                       Expr *val);
   bool CheckEnumUnderlyingType(TypeSourceInfo *TI);
   bool CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
-                              QualType EnumUnderlyingTy,
-                              bool EnumUnderlyingIsImplicit,
+                              QualType EnumUnderlyingTy, bool IsFixed,
                               const EnumDecl *Prev);
 
   /// Determine whether the body of an anonymous enumeration should be skipped.
@@ -2707,7 +2716,8 @@
                       OverloadCandidateSet &CandidateSet,
                       TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr,
                       bool SuppressUserConversions = false,
-                      bool PartialOverloading = false);
+                      bool PartialOverloading = false,
+                      bool FirstArgumentIsBase = false);
   void AddMethodCandidate(DeclAccessPair FoundDecl,
                           QualType ObjectType,
                           Expr::Classification ObjectClassification,
@@ -3200,11 +3210,13 @@
 
   void LookupVisibleDecls(Scope *S, LookupNameKind Kind,
                           VisibleDeclConsumer &Consumer,
-                          bool IncludeGlobalScope = true);
+                          bool IncludeGlobalScope = true,
+                          bool LoadExternal = true);
   void LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
                           VisibleDeclConsumer &Consumer,
                           bool IncludeGlobalScope = true,
-                          bool IncludeDependentBases = false);
+                          bool IncludeDependentBases = false,
+                          bool LoadExternal = true);
 
   enum CorrectTypoKind {
     CTK_NonError,     // CorrectTypo used in a non error recovery situation.
@@ -3665,7 +3677,7 @@
   StmtResult ActOnNullStmt(SourceLocation SemiLoc,
                            bool HasLeadingEmptyMacro = false);
 
-  void ActOnStartOfCompoundStmt();
+  void ActOnStartOfCompoundStmt(bool IsStmtExpr);
   void ActOnFinishOfCompoundStmt();
   StmtResult ActOnCompoundStmt(SourceLocation L, SourceLocation R,
                                ArrayRef<Stmt *> Elts, bool isStmtExpr);
@@ -3673,8 +3685,8 @@
   /// \brief A RAII object to enter scope of a compound statement.
   class CompoundScopeRAII {
   public:
-    CompoundScopeRAII(Sema &S): S(S) {
-      S.ActOnStartOfCompoundStmt();
+    CompoundScopeRAII(Sema &S, bool IsStmtExpr = false) : S(S) {
+      S.ActOnStartOfCompoundStmt(IsStmtExpr);
     }
 
     ~CompoundScopeRAII() {
@@ -5104,14 +5116,16 @@
   /// or class type construction ("ClassType(x,y,z)")
   /// or creation of a value-initialized type ("int()").
   ExprResult ActOnCXXTypeConstructExpr(ParsedType TypeRep,
-                                       SourceLocation LParenLoc,
+                                       SourceLocation LParenOrBraceLoc,
                                        MultiExprArg Exprs,
-                                       SourceLocation RParenLoc);
+                                       SourceLocation RParenOrBraceLoc,
+                                       bool ListInitialization);
 
   ExprResult BuildCXXTypeConstructExpr(TypeSourceInfo *Type,
                                        SourceLocation LParenLoc,
                                        MultiExprArg Exprs,
-                                       SourceLocation RParenLoc);
+                                       SourceLocation RParenLoc,
+                                       bool ListInitialization);
 
   /// ActOnCXXNew - Parsed a C++ 'new' expression.
   ExprResult ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal,
@@ -5137,7 +5151,8 @@
                                bool UseGlobal, QualType AllocType, bool IsArray,
                                bool &PassAlignment, MultiExprArg PlaceArgs,
                                FunctionDecl *&OperatorNew,
-                               FunctionDecl *&OperatorDelete);
+                               FunctionDecl *&OperatorDelete,
+                               bool Diagnose = true);
   void DeclareGlobalNewDelete();
   void DeclareGlobalAllocationFunction(DeclarationName Name, QualType Return,
                                        ArrayRef<QualType> Params);
@@ -5548,6 +5563,11 @@
   ExprResult BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
                              sema::LambdaScopeInfo *LSI);
 
+  /// Get the return type to use for a lambda's conversion function(s) to
+  /// function pointer type, given the type of the call operator.
+  QualType
+  getLambdaConversionFunctionResultType(const FunctionProtoType *CallOpType);
+
   /// \brief Define the "body" of the conversion from a lambda object to a
   /// function pointer.
   ///
@@ -5787,6 +5807,11 @@
       SourceLocation BaseLoc);
 
   void CheckCompletedCXXClass(CXXRecordDecl *Record);
+
+  /// Check that the C++ class annoated with "trivial_abi" satisfies all the
+  /// conditions that are needed for the attribute to have an effect.
+  void checkIllFormedTrivialABIStruct(CXXRecordDecl &RD);
+
   void ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
                                          Decl *TagDecl,
                                          SourceLocation LBrac,
@@ -6065,7 +6090,7 @@
   void DiagnoseTemplateParameterShadow(SourceLocation Loc, Decl *PrevDecl);
   TemplateDecl *AdjustDeclIfTemplate(Decl *&Decl);
 
-  Decl *ActOnTypeParameter(Scope *S, bool Typename,
+  NamedDecl *ActOnTypeParameter(Scope *S, bool Typename,
                            SourceLocation EllipsisLoc,
                            SourceLocation KeyLoc,
                            IdentifierInfo *ParamName,
@@ -6078,12 +6103,12 @@
                                              SourceLocation Loc);
   QualType CheckNonTypeTemplateParameterType(QualType T, SourceLocation Loc);
 
-  Decl *ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
+  NamedDecl *ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
                                       unsigned Depth,
                                       unsigned Position,
                                       SourceLocation EqualLoc,
                                       Expr *DefaultArg);
-  Decl *ActOnTemplateTemplateParameter(Scope *S,
+  NamedDecl *ActOnTemplateTemplateParameter(Scope *S,
                                        SourceLocation TmpLoc,
                                        TemplateParameterList *Params,
                                        SourceLocation EllipsisLoc,
@@ -6143,6 +6168,8 @@
   void translateTemplateArguments(const ASTTemplateArgsPtr &In,
                                   TemplateArgumentListInfo &Out);
 
+  ParsedTemplateArgument ActOnTemplateTypeArgument(TypeResult ParsedType);
+
   void NoteAllFoundTemplates(TemplateName Name);
 
   QualType CheckTemplateIdType(TemplateName Template,
@@ -7103,6 +7130,12 @@
       /// We are defining a synthesized function (such as a defaulted special
       /// member).
       DefiningSynthesizedFunction,
+
+      /// Added for Template instantiation observation.
+      /// Memoization means we are _not_ instantiating a template because
+      /// it is already instantiated (but we entered a context where we
+      /// would have had to if it was not already instantiated).
+      Memoization
     } Kind;
 
     /// \brief Was the enclosing context a non-instantiation SFINAE context?
@@ -7211,6 +7244,14 @@
   // FIXME: Does this belong in Sema? It's tough to implement it anywhere else.
   unsigned LastEmittedCodeSynthesisContextDepth = 0;
 
+  /// \brief The template instantiation callbacks to trace or track
+  /// instantiations (objects can be chained).
+  ///
+  /// This callbacks is used to print, trace or track template
+  /// instantiations as they are being constructed.
+  std::vector<std::unique_ptr<TemplateInstantiationCallback>>
+      TemplateInstCallbacks;
+
   /// \brief The current index into pack expansion arguments that will be
   /// used for substitution of parameter packs.
   ///
@@ -7429,13 +7470,16 @@
     unsigned PrevSFINAEErrors;
     bool PrevInNonInstantiationSFINAEContext;
     bool PrevAccessCheckingSFINAE;
+    bool PrevLastDiagnosticIgnored;
 
   public:
     explicit SFINAETrap(Sema &SemaRef, bool AccessCheckingSFINAE = false)
       : SemaRef(SemaRef), PrevSFINAEErrors(SemaRef.NumSFINAEErrors),
         PrevInNonInstantiationSFINAEContext(
                                       SemaRef.InNonInstantiationSFINAEContext),
-        PrevAccessCheckingSFINAE(SemaRef.AccessCheckingSFINAE)
+        PrevAccessCheckingSFINAE(SemaRef.AccessCheckingSFINAE),
+        PrevLastDiagnosticIgnored(
+            SemaRef.getDiagnostics().isLastDiagnosticIgnored())
     {
       if (!SemaRef.isSFINAEContext())
         SemaRef.InNonInstantiationSFINAEContext = true;
@@ -7447,6 +7491,8 @@
       SemaRef.InNonInstantiationSFINAEContext
         = PrevInNonInstantiationSFINAEContext;
       SemaRef.AccessCheckingSFINAE = PrevAccessCheckingSFINAE;
+      SemaRef.getDiagnostics().setLastDiagnosticIgnored(
+          PrevLastDiagnosticIgnored);
     }
 
     /// \brief Determine whether any SFINAE errors have been trapped.
@@ -7756,6 +7802,9 @@
 
   void InstantiateExceptionSpec(SourceLocation PointOfInstantiation,
                                 FunctionDecl *Function);
+  FunctionDecl *InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD,
+                                               const TemplateArgumentList *Args,
+                                               SourceLocation Loc);
   void InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
                                      FunctionDecl *Function,
                                      bool Recursive = false,
@@ -7786,11 +7835,6 @@
                                      VarDecl *Var, bool Recursive = false,
                                      bool DefinitionRequired = false,
                                      bool AtEndOfTU = false);
-  void InstantiateStaticDataMemberDefinition(
-                                     SourceLocation PointOfInstantiation,
-                                     VarDecl *Var,
-                                     bool Recursive = false,
-                                     bool DefinitionRequired = false);
 
   void InstantiateMemInitializers(CXXConstructorDecl *New,
                                   const CXXConstructorDecl *Tmpl,
@@ -8466,6 +8510,7 @@
   StmtResult BuildCoreturnStmt(SourceLocation KwLoc, Expr *E,
                                bool IsImplicit = false);
   StmtResult BuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs);
+  bool buildCoroutineParameterMoves(SourceLocation Loc);
   VarDecl *buildCoroutinePromise(SourceLocation Loc);
   void CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body);
 
@@ -8537,6 +8582,10 @@
   /// Returns OpenMP nesting level for current directive.
   unsigned getOpenMPNestingLevel() const;
 
+  /// Adjusts the function scopes index for the target-based regions.
+  void adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
+                                    unsigned Level) const;
+
   /// Push new OpenMP function region for non-capturing function.
   void pushOpenMPFunctionRegion();
 
@@ -8653,11 +8702,20 @@
                                     OMPDeclareTargetDeclAttr::MapTypeTy MT,
                                     NamedDeclSetType &SameDirectiveDecls);
   /// Check declaration inside target region.
-  void checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D);
-  /// Return true inside OpenMP target region.
+  void checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D,
+                                        SourceLocation IdLoc = SourceLocation());
+  /// Return true inside OpenMP declare target region.
   bool isInOpenMPDeclareTargetContext() const {
     return IsInOpenMPDeclareTargetContext;
   }
+  /// Return true inside OpenMP target region.
+  bool isInOpenMPTargetExecutionDirective() const;
+  /// Return true if (un)supported features for the current target should be
+  /// diagnosed if OpenMP (offloading) is enabled.
+  bool shouldDiagnoseTargetSupportFromOpenMP() const {
+    return !getLangOpts().OpenMPIsDevice || isInOpenMPDeclareTargetContext() ||
+      isInOpenMPTargetExecutionDirective();
+  }
 
   /// Return the number of captured regions created for an OpenMP directive.
   static int getOpenMPCaptureLevels(OpenMPDirectiveKind Kind);
@@ -8787,12 +8845,14 @@
   /// parsing of the associated statement.
   StmtResult ActOnOpenMPTargetEnterDataDirective(ArrayRef<OMPClause *> Clauses,
                                                  SourceLocation StartLoc,
-                                                 SourceLocation EndLoc);
+                                                 SourceLocation EndLoc,
+                                                 Stmt *AStmt);
   /// \brief Called on well-formed '\#pragma omp target exit data' after
   /// parsing of the associated statement.
   StmtResult ActOnOpenMPTargetExitDataDirective(ArrayRef<OMPClause *> Clauses,
                                                 SourceLocation StartLoc,
-                                                SourceLocation EndLoc);
+                                                SourceLocation EndLoc,
+                                                Stmt *AStmt);
   /// \brief Called on well-formed '\#pragma omp target parallel' after
   /// parsing of the associated statement.
   StmtResult ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
@@ -8841,7 +8901,8 @@
   /// \brief Called on well-formed '\#pragma omp target update'.
   StmtResult ActOnOpenMPTargetUpdateDirective(ArrayRef<OMPClause *> Clauses,
                                               SourceLocation StartLoc,
-                                              SourceLocation EndLoc);
+                                              SourceLocation EndLoc,
+                                              Stmt *AStmt);
   /// \brief Called on well-formed '\#pragma omp distribute parallel for' after
   /// parsing of the associated statement.
   StmtResult ActOnOpenMPDistributeParallelForDirective(
@@ -9557,7 +9618,8 @@
                                bool AllowBothBool, bool AllowBoolConversion);
   QualType GetSignedVectorType(QualType V);
   QualType CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS,
-                                      SourceLocation Loc, bool isRelational);
+                                      SourceLocation Loc,
+                                      BinaryOperatorKind Opc);
   QualType CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
                                       SourceLocation Loc);
 
@@ -10210,8 +10272,7 @@
   void CodeCompleteObjCPropertyDefinition(Scope *S);
   void CodeCompleteObjCPropertySynthesizeIvar(Scope *S,
                                               IdentifierInfo *PropertyName);
-  void CodeCompleteObjCMethodDecl(Scope *S,
-                                  bool IsInstanceMethod,
+  void CodeCompleteObjCMethodDecl(Scope *S, Optional<bool> IsInstanceMethod,
                                   ParsedType ReturnType);
   void CodeCompleteObjCMethodDeclSelector(Scope *S,
                                           bool IsInstanceMethod,
@@ -10382,7 +10443,10 @@
                           const AttrVec *Attrs = nullptr,
                           const FunctionDecl *FD = nullptr);
 
-  void CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr* RHS);
+public:
+  void CheckFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS);
+
+private:
   void CheckImplicitConversions(Expr *E, SourceLocation CC = SourceLocation());
   void CheckBoolLikeConversion(Expr *E, SourceLocation CC);
   void CheckForIntOverflow(Expr *E);
@@ -10446,7 +10510,8 @@
   /// \brief Peform checks on a call of a function with argument_with_type_tag
   /// or pointer_with_type_tag attributes.
   void CheckArgumentWithTypeTag(const ArgumentWithTypeTagAttr *Attr,
-                                const Expr * const *ExprArgs);
+                                const ArrayRef<const Expr *> ExprArgs,
+                                SourceLocation CallSiteLoc);
 
   /// \brief Check if we are taking the address of a packed field
   /// as this may be a problem if the pointer value is dereferenced.
diff --git a/include/clang/Sema/Template.h b/include/clang/Sema/Template.h
index 644d55b..4dbb947 100644
--- a/include/clang/Sema/Template.h
+++ b/include/clang/Sema/Template.h
@@ -1,25 +1,48 @@
-//===------- SemaTemplate.h - C++ Templates ---------------------*- C++ -*-===/
+//===- SemaTemplate.h - C++ Templates ---------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
 //
-//  This file provides types used in the semantic analysis of C++ templates.
+// This file provides types used in the semantic analysis of C++ templates.
 //
-//===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SEMA_TEMPLATE_H
 #define LLVM_CLANG_SEMA_TEMPLATE_H
 
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DeclVisitor.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <utility>
 
 namespace clang {
+
+class ASTContext;
+class BindingDecl;
+class CXXMethodDecl;
+class Decl;
+class DeclaratorDecl;
+class DeclContext;
+class EnumDecl;
+class FunctionDecl;
+class NamedDecl;
+class ParmVarDecl;
+class TagDecl;
+class TypedefNameDecl;
+class TypeSourceInfo;
+class VarDecl;
+
   /// \brief Data structure that captures multiple levels of template argument
   /// lists for use in template instantiation.
   ///
@@ -41,7 +64,7 @@
   /// template argument list (17) at depth 1.
   class MultiLevelTemplateArgumentList {
     /// \brief The template argument list at a certain template depth 
-    typedef ArrayRef<TemplateArgument> ArgList;
+    using ArgList = ArrayRef<TemplateArgument>;
 
     /// \brief The template argument lists, stored from the innermost template
     /// argument list (first) to the outermost template argument list (last).
@@ -53,7 +76,7 @@
     
   public:
     /// \brief Construct an empty set of template argument lists.
-    MultiLevelTemplateArgumentList() { }
+    MultiLevelTemplateArgumentList() = default;
     
     /// \brief Construct a single-level template argument list.
     explicit 
@@ -129,7 +152,7 @@
     }
 
     /// \brief Retrieve the innermost template argument list.
-    const ArgList &getInnermost() const { 
+    const ArgList &getInnermost() const {
       return TemplateArgumentLists.front(); 
     }
   };
@@ -138,9 +161,11 @@
   enum TPOC {
     /// \brief Partial ordering of function templates for a function call.
     TPOC_Call,
+
     /// \brief Partial ordering of function templates for a call to a 
     /// conversion function.
     TPOC_Conversion,
+
     /// \brief Partial ordering of function templates in other contexts, e.g.,
     /// taking the address of a function template or matching a function 
     /// template specialization to a function template.
@@ -153,8 +178,10 @@
   // making Sema.h declare things as enums).
   class TemplatePartialOrderingContext {
     TPOC Value;
+
   public:
     TemplatePartialOrderingContext(TPOC Value) : Value(Value) {}
+
     operator TPOC() const { return Value; }
   };
 
@@ -163,15 +190,14 @@
   class DeducedTemplateArgument : public TemplateArgument {
     /// \brief For a non-type template argument, whether the value was
     /// deduced from an array bound.
-    bool DeducedFromArrayBound;
+    bool DeducedFromArrayBound = false;
 
   public:
-    DeducedTemplateArgument()
-      : TemplateArgument(), DeducedFromArrayBound(false) { }
+    DeducedTemplateArgument() = default;
 
     DeducedTemplateArgument(const TemplateArgument &Arg,
                             bool DeducedFromArrayBound = false)
-      : TemplateArgument(Arg), DeducedFromArrayBound(DeducedFromArrayBound) { }
+        : TemplateArgument(Arg), DeducedFromArrayBound(DeducedFromArrayBound) {}
 
     /// \brief Construct an integral non-type template argument that
     /// has been deduced, possibly from an array bound.
@@ -179,8 +205,8 @@
                             const llvm::APSInt &Value,
                             QualType ValueType,
                             bool DeducedFromArrayBound)
-      : TemplateArgument(Ctx, Value, ValueType),
-        DeducedFromArrayBound(DeducedFromArrayBound) { }
+        : TemplateArgument(Ctx, Value, ValueType),
+          DeducedFromArrayBound(DeducedFromArrayBound) {}
 
     /// \brief For a non-type template argument, determine whether the
     /// template argument was deduced from an array bound.
@@ -202,16 +228,16 @@
   class LocalInstantiationScope {
   public:
     /// \brief A set of declarations.
-    typedef SmallVector<ParmVarDecl *, 4> DeclArgumentPack;
+    using DeclArgumentPack = SmallVector<ParmVarDecl *, 4>;
 
   private:
     /// \brief Reference to the semantic analysis that is performing
     /// this template instantiation.
     Sema &SemaRef;
 
-    typedef llvm::SmallDenseMap<
-        const Decl *, llvm::PointerUnion<Decl *, DeclArgumentPack *>, 4>
-    LocalDeclsMap;
+    using LocalDeclsMap =
+        llvm::SmallDenseMap<const Decl *,
+                            llvm::PointerUnion<Decl *, DeclArgumentPack *>, 4>;
 
     /// \brief A mapping from local declarations that occur
     /// within a template to their instantiations.
@@ -242,7 +268,7 @@
     LocalInstantiationScope *Outer;
 
     /// \brief Whether we have already exited this scope.
-    bool Exited;
+    bool Exited = false;
 
     /// \brief Whether to combine this scope with the outer scope, such that
     /// lookup will search our outer scope.
@@ -250,7 +276,7 @@
     
     /// \brief If non-NULL, the template parameter pack that has been
     /// partially substituted per C++0x [temp.arg.explicit]p9.
-    NamedDecl *PartiallySubstitutedPack;
+    NamedDecl *PartiallySubstitutedPack = nullptr;
     
     /// \brief If \c PartiallySubstitutedPack is non-null, the set of
     /// explicitly-specified template arguments in that pack.
@@ -261,20 +287,17 @@
     /// ArgsInPartiallySubstitutedPack.
     unsigned NumArgsInPartiallySubstitutedPack;
 
-    // This class is non-copyable
-    LocalInstantiationScope(
-      const LocalInstantiationScope &) = delete;
-    void operator=(const LocalInstantiationScope &) = delete;
-
   public:
     LocalInstantiationScope(Sema &SemaRef, bool CombineWithOuterScope = false)
-      : SemaRef(SemaRef), Outer(SemaRef.CurrentInstantiationScope),
-        Exited(false), CombineWithOuterScope(CombineWithOuterScope),
-        PartiallySubstitutedPack(nullptr)
-    {
+        : SemaRef(SemaRef), Outer(SemaRef.CurrentInstantiationScope),
+          CombineWithOuterScope(CombineWithOuterScope) {
       SemaRef.CurrentInstantiationScope = this;
     }
 
+    LocalInstantiationScope(const LocalInstantiationScope &) = delete;
+    LocalInstantiationScope &
+    operator=(const LocalInstantiationScope &) = delete;
+
     ~LocalInstantiationScope() {
       Exit();
     }
@@ -399,8 +422,8 @@
     Sema::ArgumentPackSubstitutionIndexRAII SubstIndex;
     DeclContext *Owner;
     const MultiLevelTemplateArgumentList &TemplateArgs;
-    Sema::LateInstantiatedAttrVec* LateAttrs;
-    LocalInstantiationScope *StartingScope;
+    Sema::LateInstantiatedAttrVec* LateAttrs = nullptr;
+    LocalInstantiationScope *StartingScope = nullptr;
 
     /// \brief A list of out-of-line class template partial
     /// specializations that will need to be instantiated after the
@@ -420,10 +443,9 @@
   public:
     TemplateDeclInstantiator(Sema &SemaRef, DeclContext *Owner,
                              const MultiLevelTemplateArgumentList &TemplateArgs)
-      : SemaRef(SemaRef),
-        SubstIndex(SemaRef, SemaRef.ArgumentPackSubstitutionIndex),
-        Owner(Owner), TemplateArgs(TemplateArgs), LateAttrs(nullptr),
-        StartingScope(nullptr) {}
+        : SemaRef(SemaRef),
+          SubstIndex(SemaRef, SemaRef.ArgumentPackSubstitutionIndex),
+          Owner(Owner), TemplateArgs(TemplateArgs) {}
 
 // Define all the decl visitors using DeclNodes.inc
 #define DECL(DERIVED, BASE) \
@@ -476,15 +498,11 @@
 
     LocalInstantiationScope *getStartingScope() const { return StartingScope; }
 
-    typedef 
-      SmallVectorImpl<std::pair<ClassTemplateDecl *,
-                                     ClassTemplatePartialSpecializationDecl *> >
-        ::iterator
-      delayed_partial_spec_iterator;
+    using delayed_partial_spec_iterator = SmallVectorImpl<std::pair<
+      ClassTemplateDecl *, ClassTemplatePartialSpecializationDecl *>>::iterator;
 
-    typedef SmallVectorImpl<std::pair<
-        VarTemplateDecl *, VarTemplatePartialSpecializationDecl *> >::iterator
-    delayed_var_partial_spec_iterator;
+    using delayed_var_partial_spec_iterator = SmallVectorImpl<std::pair<
+        VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>>::iterator;
 
     /// \brief Return an iterator to the beginning of the set of
     /// "delayed" partial specializations, which must be passed to
@@ -545,6 +563,7 @@
     Decl *instantiateUnresolvedUsingDecl(T *D,
                                          bool InstantiatingPackElement = false);
   };  
-}
+
+} // namespace clang
 
 #endif // LLVM_CLANG_SEMA_TEMPLATE_H
diff --git a/include/clang/Sema/TemplateDeduction.h b/include/clang/Sema/TemplateDeduction.h
index cd9ed6a..41e175b 100644
--- a/include/clang/Sema/TemplateDeduction.h
+++ b/include/clang/Sema/TemplateDeduction.h
@@ -1,26 +1,35 @@
-//===- TemplateDeduction.h - C++ template argument deduction ----*- C++ -*-===/
+//===- TemplateDeduction.h - C++ template argument deduction ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===/
 //
-//  This file provides types used with Sema's template argument deduction
+//===----------------------------------------------------------------------===//
+//
+// This file provides types used with Sema's template argument deduction
 // routines.
 //
-//===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SEMA_TEMPLATEDEDUCTION_H
 #define LLVM_CLANG_SEMA_TEMPLATEDEDUCTION_H
 
+#include "clang/AST/DeclAccessPair.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/TemplateBase.h"
 #include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstddef>
+#include <utility>
 
 namespace clang {
 
+class Decl;
 struct DeducedPack;
-class TemplateArgumentList;
 class Sema;
 
 namespace sema {
@@ -30,15 +39,14 @@
 /// TemplateDeductionResult value.
 class TemplateDeductionInfo {
   /// \brief The deduced template argument list.
-  ///
-  TemplateArgumentList *Deduced;
+  TemplateArgumentList *Deduced = nullptr;
 
   /// \brief The source location at which template argument
   /// deduction is occurring.
   SourceLocation Loc;
 
   /// \brief Have we suppressed an error during deduction?
-  bool HasSFINAEDiagnostic;
+  bool HasSFINAEDiagnostic = false;
 
   /// \brief The template parameter depth for which we're performing deduction.
   unsigned DeducedDepth;
@@ -47,13 +55,11 @@
   /// SFINAE while performing template argument deduction.
   SmallVector<PartialDiagnosticAt, 4> SuppressedDiagnostics;
 
-  TemplateDeductionInfo(const TemplateDeductionInfo &) = delete;
-  void operator=(const TemplateDeductionInfo &) = delete;
-
 public:
   TemplateDeductionInfo(SourceLocation Loc, unsigned DeducedDepth = 0)
-    : Deduced(nullptr), Loc(Loc), HasSFINAEDiagnostic(false),
-      DeducedDepth(DeducedDepth), CallArgIndex(0) {}
+      : Loc(Loc), DeducedDepth(DeducedDepth) {}
+  TemplateDeductionInfo(const TemplateDeductionInfo &) = delete;
+  TemplateDeductionInfo &operator=(const TemplateDeductionInfo &) = delete;
 
   /// \brief Returns the location at which template argument is
   /// occurring.
@@ -124,8 +130,7 @@
   }
 
   /// \brief Iterator over the set of suppressed diagnostics.
-  typedef SmallVectorImpl<PartialDiagnosticAt>::const_iterator
-    diag_iterator;
+  using diag_iterator = SmallVectorImpl<PartialDiagnosticAt>::const_iterator;
 
   /// \brief Returns an iterator at the beginning of the sequence of suppressed
   /// diagnostics.
@@ -186,7 +191,7 @@
   ///
   ///   TDK_DeducedMismatch: this is the index of the argument that had a
   ///   different argument type from its substituted parameter type.
-  unsigned CallArgIndex;
+  unsigned CallArgIndex = 0;
 
   /// \brief Information on packs that we're currently expanding.
   ///
@@ -194,7 +199,7 @@
   SmallVector<DeducedPack *, 8> PendingDeducedPacks;
 };
 
-} // end namespace sema
+} // namespace sema
 
 /// A structure used to record information about a failed
 /// template argument deduction, for diagnosis.
@@ -276,20 +281,20 @@
 class TemplateSpecCandidateSet {
   SmallVector<TemplateSpecCandidate, 16> Candidates;
   SourceLocation Loc;
+
   // Stores whether we're taking the address of these candidates. This helps us
   // produce better error messages when dealing with the pass_object_size
   // attribute on parameters.
   bool ForTakingAddress;
 
-  TemplateSpecCandidateSet(
-      const TemplateSpecCandidateSet &) = delete;
-  void operator=(const TemplateSpecCandidateSet &) = delete;
-
   void destroyCandidates();
 
 public:
   TemplateSpecCandidateSet(SourceLocation Loc, bool ForTakingAddress = false)
       : Loc(Loc), ForTakingAddress(ForTakingAddress) {}
+  TemplateSpecCandidateSet(const TemplateSpecCandidateSet &) = delete;
+  TemplateSpecCandidateSet &
+  operator=(const TemplateSpecCandidateSet &) = delete;
   ~TemplateSpecCandidateSet() { destroyCandidates(); }
 
   SourceLocation getLocation() const { return Loc; }
@@ -298,7 +303,8 @@
   /// TODO: This may be unnecessary.
   void clear();
 
-  typedef SmallVector<TemplateSpecCandidate, 16>::iterator iterator;
+  using iterator = SmallVector<TemplateSpecCandidate, 16>::iterator;
+
   iterator begin() { return Candidates.begin(); }
   iterator end() { return Candidates.end(); }
 
@@ -319,6 +325,6 @@
   }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_TEMPLATEDEDUCTION_H
diff --git a/include/clang/Sema/TemplateInstCallback.h b/include/clang/Sema/TemplateInstCallback.h
new file mode 100644
index 0000000..14a3d0b
--- /dev/null
+++ b/include/clang/Sema/TemplateInstCallback.h
@@ -0,0 +1,83 @@
+//===- TemplateInstCallback.h - Template Instantiation Callback - C++ --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file defines the TemplateInstantiationCallback class, which is the
+// base class for callbacks that will be notified at template instantiations.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TEMPLATE_INST_CALLBACK_H
+#define LLVM_CLANG_TEMPLATE_INST_CALLBACK_H
+
+#include "clang/Sema/Sema.h"
+
+namespace clang {
+
+/// \brief This is a base class for callbacks that will be notified at every
+/// template instantiation.
+class TemplateInstantiationCallback {
+public:
+  virtual ~TemplateInstantiationCallback() = default;
+
+  /// \brief Called before doing AST-parsing.
+  virtual void initialize(const Sema &TheSema) = 0;
+
+  /// \brief Called after AST-parsing is completed.
+  virtual void finalize(const Sema &TheSema) = 0;
+
+  /// \brief Called when instantiation of a template just began.
+  virtual void atTemplateBegin(const Sema &TheSema,
+                               const Sema::CodeSynthesisContext &Inst) = 0;
+
+  /// \brief Called when instantiation of a template is just about to end.
+  virtual void atTemplateEnd(const Sema &TheSema,
+                             const Sema::CodeSynthesisContext &Inst) = 0;
+};
+
+template <class TemplateInstantiationCallbackPtrs>
+void initialize(TemplateInstantiationCallbackPtrs &Callbacks,
+                const Sema &TheSema) {
+  for (auto &C : Callbacks) {
+    if (C)
+      C->initialize(TheSema);
+  }
+}
+
+template <class TemplateInstantiationCallbackPtrs>
+void finalize(TemplateInstantiationCallbackPtrs &Callbacks,
+              const Sema &TheSema) {
+  for (auto &C : Callbacks) {
+    if (C)
+      C->finalize(TheSema);
+  }
+}
+
+template <class TemplateInstantiationCallbackPtrs>
+void atTemplateBegin(TemplateInstantiationCallbackPtrs &Callbacks,
+                     const Sema &TheSema,
+                     const Sema::CodeSynthesisContext &Inst) {
+  for (auto &C : Callbacks) {
+    if (C)
+      C->atTemplateBegin(TheSema, Inst);
+  }
+}
+
+template <class TemplateInstantiationCallbackPtrs>
+void atTemplateEnd(TemplateInstantiationCallbackPtrs &Callbacks,
+                   const Sema &TheSema,
+                   const Sema::CodeSynthesisContext &Inst) {
+  for (auto &C : Callbacks) {
+    if (C)
+      C->atTemplateEnd(TheSema, Inst);
+  }
+}
+
+} // namespace clang
+
+#endif
diff --git a/include/clang/Sema/TypoCorrection.h b/include/clang/Sema/TypoCorrection.h
index 5bebe52..a5cd5f9 100644
--- a/include/clang/Sema/TypoCorrection.h
+++ b/include/clang/Sema/TypoCorrection.h
@@ -1,4 +1,4 @@
-//===--- TypoCorrection.h - Class for typo correction results ---*- C++ -*-===//
+//===- TypoCorrection.h - Class for typo correction results -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,18 +15,36 @@
 #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H
 #define LLVM_CLANG_SEMA_TYPOCORRECTION_H
 
-#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Sema/DeclSpec.h"
-#include "clang/Sema/Ownership.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include <cstddef>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
 
 namespace clang {
 
+class DeclContext;
+class IdentifierInfo;
+class LangOptions;
+class MemberExpr;
+class NestedNameSpecifier;
+class Sema;
+
 /// @brief Simple class containing the result of Sema::CorrectTypo
 class TypoCorrection {
 public:
   // "Distance" for unusable corrections
-  static const unsigned InvalidDistance = ~0U;
+  static const unsigned InvalidDistance = std::numeric_limits<unsigned>::max();
+
   // The largest distance still considered valid (larger edit distances are
   // mapped to InvalidDistance by getEditDistance).
   static const unsigned MaximumDistance = 10000U;
@@ -43,9 +61,7 @@
                  NestedNameSpecifier *NNS = nullptr, unsigned CharDistance = 0,
                  unsigned QualifierDistance = 0)
       : CorrectionName(Name), CorrectionNameSpec(NNS),
-        CharDistance(CharDistance), QualifierDistance(QualifierDistance),
-        CallbackDistance(0), ForceSpecifierReplacement(false),
-        RequiresImport(false) {
+        CharDistance(CharDistance), QualifierDistance(QualifierDistance) {
     if (NameDecl)
       CorrectionDecls.push_back(NameDecl);
   }
@@ -53,8 +69,7 @@
   TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS = nullptr,
                  unsigned CharDistance = 0)
       : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
-        CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0),
-        ForceSpecifierReplacement(false), RequiresImport(false) {
+        CharDistance(CharDistance) {
     if (Name)
       CorrectionDecls.push_back(Name);
   }
@@ -62,16 +77,13 @@
   TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS = nullptr,
                  unsigned CharDistance = 0)
       : CorrectionName(Name), CorrectionNameSpec(NNS),
-        CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0),
-        ForceSpecifierReplacement(false), RequiresImport(false) {}
+        CharDistance(CharDistance) {}
 
-  TypoCorrection()
-      : CorrectionNameSpec(nullptr), CharDistance(0), QualifierDistance(0),
-        CallbackDistance(0), ForceSpecifierReplacement(false),
-        RequiresImport(false) {}
+  TypoCorrection() = default;
 
   /// \brief Gets the DeclarationName of the typo correction
   DeclarationName getCorrection() const { return CorrectionName; }
+
   IdentifierInfo *getCorrectionAsIdentifierInfo() const {
     return CorrectionName.getAsIdentifierInfo();
   }
@@ -80,6 +92,7 @@
   NestedNameSpecifier *getCorrectionSpecifier() const {
     return CorrectionNameSpec;
   }
+
   void setCorrectionSpecifier(NestedNameSpecifier *NNS) {
     CorrectionNameSpec = NNS;
     ForceSpecifierReplacement = (NNS != nullptr);
@@ -167,6 +180,7 @@
   void addCorrectionDecl(NamedDecl *CDecl);
 
   std::string getAsString(const LangOptions &LO) const;
+
   std::string getQuoted(const LangOptions &LO) const {
     return "'" + getAsString(LO) + "'";
   }
@@ -214,15 +228,20 @@
     return CorrectionRange;
   }
 
-  typedef SmallVectorImpl<NamedDecl *>::iterator decl_iterator;
+  using decl_iterator = SmallVectorImpl<NamedDecl *>::iterator;
+
   decl_iterator begin() {
     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
   }
+
   decl_iterator end() { return CorrectionDecls.end(); }
-  typedef SmallVectorImpl<NamedDecl *>::const_iterator const_decl_iterator;
+
+  using const_decl_iterator = SmallVectorImpl<NamedDecl *>::const_iterator;
+
   const_decl_iterator begin() const {
     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
   }
+
   const_decl_iterator end() const { return CorrectionDecls.end(); }
 
   /// \brief Returns whether this typo correction is correcting to a
@@ -246,14 +265,14 @@
 
   // Results.
   DeclarationName CorrectionName;
-  NestedNameSpecifier *CorrectionNameSpec;
+  NestedNameSpecifier *CorrectionNameSpec = nullptr;
   SmallVector<NamedDecl *, 1> CorrectionDecls;
-  unsigned CharDistance;
-  unsigned QualifierDistance;
-  unsigned CallbackDistance;
+  unsigned CharDistance = 0;
+  unsigned QualifierDistance = 0;
+  unsigned CallbackDistance = 0;
   SourceRange CorrectionRange;
-  bool ForceSpecifierReplacement;
-  bool RequiresImport;
+  bool ForceSpecifierReplacement = false;
+  bool RequiresImport = false;
 
   std::vector<PartialDiagnostic> ExtraDiagnostics;
 };
@@ -266,13 +285,9 @@
 
   explicit CorrectionCandidateCallback(IdentifierInfo *Typo = nullptr,
                                        NestedNameSpecifier *TypoNNS = nullptr)
-      : WantTypeSpecifiers(true), WantExpressionKeywords(true),
-        WantCXXNamedCasts(true), WantFunctionLikeCasts(true),
-        WantRemainingKeywords(true), WantObjCSuper(false),
-        IsObjCIvarLookup(false), IsAddressOfOperand(false), Typo(Typo),
-        TypoNNS(TypoNNS) {}
+      : Typo(Typo), TypoNNS(TypoNNS) {}
 
-  virtual ~CorrectionCandidateCallback() {}
+  virtual ~CorrectionCandidateCallback() = default;
 
   /// \brief Simple predicate used by the default RankCandidate to
   /// determine whether to return an edit distance of 0 or InvalidDistance.
@@ -304,16 +319,16 @@
   // Flags for context-dependent keywords. WantFunctionLikeCasts is only
   // used/meaningful when WantCXXNamedCasts is false.
   // TODO: Expand these to apply to non-keywords or possibly remove them.
-  bool WantTypeSpecifiers;
-  bool WantExpressionKeywords;
-  bool WantCXXNamedCasts;
-  bool WantFunctionLikeCasts;
-  bool WantRemainingKeywords;
-  bool WantObjCSuper;
+  bool WantTypeSpecifiers = true;
+  bool WantExpressionKeywords = true;
+  bool WantCXXNamedCasts = true;
+  bool WantFunctionLikeCasts = true;
+  bool WantRemainingKeywords = true;
+  bool WantObjCSuper = false;
   // Temporary hack for the one case where a CorrectTypoContext enum is used
   // when looking up results.
-  bool IsObjCIvarLookup;
-  bool IsAddressOfOperand;
+  bool IsObjCIvarLookup = false;
+  bool IsAddressOfOperand = false;
 
 protected:
   bool MatchesTypo(const TypoCorrection &candidate) {
@@ -349,7 +364,7 @@
 
   bool ValidateCandidate(const TypoCorrection &candidate) override;
 
- private:
+private:
   unsigned NumArgs;
   bool HasExplicitTemplateArgs;
   DeclContext *CurContext;
@@ -372,6 +387,6 @@
   }
 };
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SEMA_TYPOCORRECTION_H
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h
index 42c6225..1f4e034 100644
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -14,17 +14,23 @@
 // respective lists.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SERIALIZATION_ASTBITCODES_H
 #define LLVM_CLANG_SERIALIZATION_ASTBITCODES_H
 
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/Type.h"
-#include "llvm/ADT/DenseMap.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitcode/BitCodes.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstdint>
 
 namespace clang {
-  namespace serialization {
+namespace serialization {
+
     /// \brief AST file major version number supported by this version of
     /// Clang.
     ///
@@ -52,7 +58,7 @@
     /// 
     /// The ID numbers of identifiers are consecutive (in order of discovery)
     /// and start at 1. 0 is reserved for NULL.
-    typedef uint32_t IdentifierID;
+    using IdentifierID = uint32_t;
     
     /// \brief An ID number that refers to a declaration in an AST file.
     ///
@@ -60,12 +66,12 @@
     /// discovery), with values below NUM_PREDEF_DECL_IDS being reserved. 
     /// At the start of a chain of precompiled headers, declaration ID 1 is 
     /// used for the translation unit declaration.
-    typedef uint32_t DeclID;
+    using DeclID = uint32_t;
 
     // FIXME: Turn these into classes so we can have some type safety when
     // we go from local ID to global and vice-versa.
-    typedef DeclID LocalDeclID;
-    typedef DeclID GlobalDeclID;
+    using LocalDeclID = DeclID;
+    using GlobalDeclID = DeclID;
 
     /// \brief An ID number that refers to a type in an AST file.
     ///
@@ -77,22 +83,25 @@
     /// IDs (based on the PREDEF_TYPE_*_ID constants), with 0 as a
     /// placeholder for "no type". Values from NUM_PREDEF_TYPE_IDs are
     /// other types that have serialized representations.
-    typedef uint32_t TypeID;
+    using TypeID = uint32_t;
 
     /// \brief A type index; the type ID with the qualifier bits removed.
     class TypeIdx {
-      uint32_t Idx;
+      uint32_t Idx = 0;
+
     public:
-      TypeIdx() : Idx(0) { }
-      explicit TypeIdx(uint32_t index) : Idx(index) { }
+      TypeIdx() = default;
+      explicit TypeIdx(uint32_t index) : Idx(index) {}
 
       uint32_t getIndex() const { return Idx; }
+
       TypeID asTypeID(unsigned FastQuals) const {
         if (Idx == uint32_t(-1))
           return TypeID(-1);
         
         return (Idx << Qualifiers::FastWidth) | FastQuals;
       }
+
       static TypeIdx fromTypeID(TypeID ID) {
         if (ID == TypeID(-1))
           return TypeIdx(-1);
@@ -104,14 +113,17 @@
     /// A structure for putting "fast"-unqualified QualTypes into a
     /// DenseMap.  This uses the standard pointer hash function.
     struct UnsafeQualTypeDenseMapInfo {
-      static inline bool isEqual(QualType A, QualType B) { return A == B; }
-      static inline QualType getEmptyKey() {
+      static bool isEqual(QualType A, QualType B) { return A == B; }
+
+      static QualType getEmptyKey() {
         return QualType::getFromOpaquePtr((void*) 1);
       }
-      static inline QualType getTombstoneKey() {
+
+      static QualType getTombstoneKey() {
         return QualType::getFromOpaquePtr((void*) 2);
       }
-      static inline unsigned getHashValue(QualType T) {
+
+      static unsigned getHashValue(QualType T) {
         assert(!T.getLocalFastQualifiers() && 
                "hash invalid for types with fast quals");
         uintptr_t v = reinterpret_cast<uintptr_t>(T.getAsOpaquePtr());
@@ -120,44 +132,44 @@
     };
 
     /// \brief An ID number that refers to an identifier in an AST file.
-    typedef uint32_t IdentID;
+    using IdentID = uint32_t;
 
     /// \brief The number of predefined identifier IDs.
     const unsigned int NUM_PREDEF_IDENT_IDS = 1;
 
     /// \brief An ID number that refers to a macro in an AST file.
-    typedef uint32_t MacroID;
+    using MacroID = uint32_t;
 
     /// \brief A global ID number that refers to a macro in an AST file.
-    typedef uint32_t GlobalMacroID;
+    using GlobalMacroID = uint32_t;
 
     /// \brief A local to a module ID number that refers to a macro in an
     /// AST file.
-    typedef uint32_t LocalMacroID;
+    using LocalMacroID = uint32_t;
 
     /// \brief The number of predefined macro IDs.
     const unsigned int NUM_PREDEF_MACRO_IDS = 1;
 
     /// \brief An ID number that refers to an ObjC selector in an AST file.
-    typedef uint32_t SelectorID;
+    using SelectorID = uint32_t;
 
     /// \brief The number of predefined selector IDs.
     const unsigned int NUM_PREDEF_SELECTOR_IDS = 1;
     
     /// \brief An ID number that refers to a set of CXXBaseSpecifiers in an 
     /// AST file.
-    typedef uint32_t CXXBaseSpecifiersID;
+    using CXXBaseSpecifiersID = uint32_t;
 
     /// \brief An ID number that refers to a list of CXXCtorInitializers in an
     /// AST file.
-    typedef uint32_t CXXCtorInitializersID;
+    using CXXCtorInitializersID = uint32_t;
 
     /// \brief An ID number that refers to an entity in the detailed
     /// preprocessing record.
-    typedef uint32_t PreprocessedEntityID;
+    using PreprocessedEntityID = uint32_t;
 
     /// \brief An ID number that refers to a submodule in a module file.
-    typedef uint32_t SubmoduleID;
+    using SubmoduleID = uint32_t;
     
     /// \brief The number of predefined submodule IDs.
     const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
@@ -166,15 +178,37 @@
     struct PPEntityOffset {
       /// \brief Raw source location of beginning of range.
       unsigned Begin;
+
       /// \brief Raw source location of end of range.
       unsigned End;
+
       /// \brief Offset in the AST file.
       uint32_t BitOffset;
 
       PPEntityOffset(SourceRange R, uint32_t BitOffset)
         : Begin(R.getBegin().getRawEncoding()),
-          End(R.getEnd().getRawEncoding()),
-          BitOffset(BitOffset) { }
+          End(R.getEnd().getRawEncoding()), BitOffset(BitOffset) {}
+
+      SourceLocation getBegin() const {
+        return SourceLocation::getFromRawEncoding(Begin);
+      }
+
+      SourceLocation getEnd() const {
+        return SourceLocation::getFromRawEncoding(End);
+      }
+    };
+
+    /// \brief Source range of a skipped preprocessor region
+    struct PPSkippedRange {
+      /// \brief Raw source location of beginning of range.
+      unsigned Begin;
+      /// \brief Raw source location of end of range.
+      unsigned End;
+
+      PPSkippedRange(SourceRange R)
+        : Begin(R.getBegin().getRawEncoding()),
+          End(R.getEnd().getRawEncoding()) { }
+
       SourceLocation getBegin() const {
         return SourceLocation::getFromRawEncoding(Begin);
       }
@@ -186,17 +220,19 @@
     /// \brief Source range/offset of a preprocessed entity.
     struct DeclOffset {
       /// \brief Raw source location.
-      unsigned Loc;
-      /// \brief Offset in the AST file.
-      uint32_t BitOffset;
+      unsigned Loc = 0;
 
-      DeclOffset() : Loc(0), BitOffset(0) { }
+      /// \brief Offset in the AST file.
+      uint32_t BitOffset = 0;
+
+      DeclOffset() = default;
       DeclOffset(SourceLocation Loc, uint32_t BitOffset)
-        : Loc(Loc.getRawEncoding()),
-          BitOffset(BitOffset) { }
+        : Loc(Loc.getRawEncoding()), BitOffset(BitOffset) {}
+
       void setLocation(SourceLocation L) {
         Loc = L.getRawEncoding();
       }
+
       SourceLocation getLocation() const {
         return SourceLocation::getFromRawEncoding(Loc);
       }
@@ -610,6 +646,9 @@
 
       /// \brief The stack of open #ifs/#ifdefs recorded in a preamble.
       PP_CONDITIONAL_STACK = 62,
+
+      /// \brief A table of skipped ranges within the preprocessing record.
+      PPD_SKIPPED_RANGES = 63
     };
 
     /// \brief Record types used within a source manager block.
@@ -617,17 +656,21 @@
       /// \brief Describes a source location entry (SLocEntry) for a
       /// file.
       SM_SLOC_FILE_ENTRY = 1,
+
       /// \brief Describes a source location entry (SLocEntry) for a
       /// buffer.
       SM_SLOC_BUFFER_ENTRY = 2,
+
       /// \brief Describes a blob that contains the data for a buffer
       /// entry. This kind of record always directly follows a
       /// SM_SLOC_BUFFER_ENTRY record or a SM_SLOC_FILE_ENTRY with an
       /// overridden buffer.
       SM_SLOC_BUFFER_BLOB = 3,
+
       /// \brief Describes a zlib-compressed blob that contains the data for
       /// a buffer entry.
       SM_SLOC_BUFFER_BLOB_COMPRESSED = 4,
+
       /// \brief Describes a source location entry (SLocEntry) for a
       /// macro expansion.
       SM_SLOC_EXPANSION_ENTRY = 5
@@ -676,46 +719,63 @@
     enum SubmoduleRecordTypes {
       /// \brief Metadata for submodules as a whole.
       SUBMODULE_METADATA = 0,
+
       /// \brief Defines the major attributes of a submodule, including its
       /// name and parent.
       SUBMODULE_DEFINITION = 1,
+
       /// \brief Specifies the umbrella header used to create this module,
       /// if any.
       SUBMODULE_UMBRELLA_HEADER = 2,
+
       /// \brief Specifies a header that falls into this (sub)module.
       SUBMODULE_HEADER = 3,
+
       /// \brief Specifies a top-level header that falls into this (sub)module.
       SUBMODULE_TOPHEADER = 4,
+
       /// \brief Specifies an umbrella directory.
       SUBMODULE_UMBRELLA_DIR = 5,
+
       /// \brief Specifies the submodules that are imported by this 
       /// submodule.
       SUBMODULE_IMPORTS = 6,
+
       /// \brief Specifies the submodules that are re-exported from this 
       /// submodule.
       SUBMODULE_EXPORTS = 7,
+
       /// \brief Specifies a required feature.
       SUBMODULE_REQUIRES = 8,
+
       /// \brief Specifies a header that has been explicitly excluded
       /// from this submodule.
       SUBMODULE_EXCLUDED_HEADER = 9,
+
       /// \brief Specifies a library or framework to link against.
       SUBMODULE_LINK_LIBRARY = 10,
+
       /// \brief Specifies a configuration macro for this module.
       SUBMODULE_CONFIG_MACRO = 11,
+
       /// \brief Specifies a conflict with another module.
       SUBMODULE_CONFLICT = 12,
+
       /// \brief Specifies a header that is private to this submodule.
       SUBMODULE_PRIVATE_HEADER = 13,
+
       /// \brief Specifies a header that is part of the module but must be
       /// textually included.
       SUBMODULE_TEXTUAL_HEADER = 14,
+
       /// \brief Specifies a header that is private to this submodule but
       /// must be textually included.
       SUBMODULE_PRIVATE_TEXTUAL_HEADER = 15,
+
       /// \brief Specifies some declarations with initializers that must be
       /// emitted to initialize the module.
       SUBMODULE_INITIALIZERS = 16,
+
       /// \brief Specifies the name of the module that will eventually
       /// re-export the entities in this module.
       SUBMODULE_EXPORT_AS = 17,
@@ -743,94 +803,139 @@
     enum PredefinedTypeIDs {
       /// \brief The NULL type.
       PREDEF_TYPE_NULL_ID       = 0,
+
       /// \brief The void type.
       PREDEF_TYPE_VOID_ID       = 1,
+
       /// \brief The 'bool' or '_Bool' type.
       PREDEF_TYPE_BOOL_ID       = 2,
+
       /// \brief The 'char' type, when it is unsigned.
       PREDEF_TYPE_CHAR_U_ID     = 3,
+
       /// \brief The 'unsigned char' type.
       PREDEF_TYPE_UCHAR_ID      = 4,
+
       /// \brief The 'unsigned short' type.
       PREDEF_TYPE_USHORT_ID     = 5,
+
       /// \brief The 'unsigned int' type.
       PREDEF_TYPE_UINT_ID       = 6,
+
       /// \brief The 'unsigned long' type.
       PREDEF_TYPE_ULONG_ID      = 7,
+
       /// \brief The 'unsigned long long' type.
       PREDEF_TYPE_ULONGLONG_ID  = 8,
+
       /// \brief The 'char' type, when it is signed.
       PREDEF_TYPE_CHAR_S_ID     = 9,
+
       /// \brief The 'signed char' type.
       PREDEF_TYPE_SCHAR_ID      = 10,
+
       /// \brief The C++ 'wchar_t' type.
       PREDEF_TYPE_WCHAR_ID      = 11,
+
       /// \brief The (signed) 'short' type.
       PREDEF_TYPE_SHORT_ID      = 12,
+
       /// \brief The (signed) 'int' type.
       PREDEF_TYPE_INT_ID        = 13,
+
       /// \brief The (signed) 'long' type.
       PREDEF_TYPE_LONG_ID       = 14,
+
       /// \brief The (signed) 'long long' type.
       PREDEF_TYPE_LONGLONG_ID   = 15,
+
       /// \brief The 'float' type.
       PREDEF_TYPE_FLOAT_ID      = 16,
+
       /// \brief The 'double' type.
       PREDEF_TYPE_DOUBLE_ID     = 17,
+
       /// \brief The 'long double' type.
       PREDEF_TYPE_LONGDOUBLE_ID = 18,
+
       /// \brief The placeholder type for overloaded function sets.
       PREDEF_TYPE_OVERLOAD_ID   = 19,
+
       /// \brief The placeholder type for dependent types.
       PREDEF_TYPE_DEPENDENT_ID  = 20,
+
       /// \brief The '__uint128_t' type.
       PREDEF_TYPE_UINT128_ID    = 21,
+
       /// \brief The '__int128_t' type.
       PREDEF_TYPE_INT128_ID     = 22,
+
       /// \brief The type of 'nullptr'.
       PREDEF_TYPE_NULLPTR_ID    = 23,
+
       /// \brief The C++ 'char16_t' type.
       PREDEF_TYPE_CHAR16_ID     = 24,
+
       /// \brief The C++ 'char32_t' type.
       PREDEF_TYPE_CHAR32_ID     = 25,
+
       /// \brief The ObjC 'id' type.
       PREDEF_TYPE_OBJC_ID       = 26,
+
       /// \brief The ObjC 'Class' type.
       PREDEF_TYPE_OBJC_CLASS    = 27,
+
       /// \brief The ObjC 'SEL' type.
       PREDEF_TYPE_OBJC_SEL      = 28,
+
       /// \brief The 'unknown any' placeholder type.
       PREDEF_TYPE_UNKNOWN_ANY   = 29,
+
       /// \brief The placeholder type for bound member functions.
       PREDEF_TYPE_BOUND_MEMBER  = 30,
+
       /// \brief The "auto" deduction type.
       PREDEF_TYPE_AUTO_DEDUCT   = 31,
+
       /// \brief The "auto &&" deduction type.
       PREDEF_TYPE_AUTO_RREF_DEDUCT = 32,
+
       /// \brief The OpenCL 'half' / ARM NEON __fp16 type.
       PREDEF_TYPE_HALF_ID       = 33,
+
       /// \brief ARC's unbridged-cast placeholder type.
       PREDEF_TYPE_ARC_UNBRIDGED_CAST = 34,
+
       /// \brief The pseudo-object placeholder type.
       PREDEF_TYPE_PSEUDO_OBJECT = 35,
+
       /// \brief The placeholder type for builtin functions.
       PREDEF_TYPE_BUILTIN_FN = 36,
+
       /// \brief OpenCL event type.
       PREDEF_TYPE_EVENT_ID      = 37,
+
       /// \brief OpenCL clk event type.
       PREDEF_TYPE_CLK_EVENT_ID  = 38,
+
       /// \brief OpenCL sampler type.
       PREDEF_TYPE_SAMPLER_ID    = 39,
+
       /// \brief OpenCL queue type.
       PREDEF_TYPE_QUEUE_ID      = 40,
+
       /// \brief OpenCL reserve_id type.
       PREDEF_TYPE_RESERVE_ID_ID = 41,
+
       /// \brief The placeholder type for OpenMP array section.
       PREDEF_TYPE_OMP_ARRAY_SECTION = 42,
+
       /// \brief The '__float128' type
       PREDEF_TYPE_FLOAT128_ID = 43,
+
       /// \brief The '_Float16' type
       PREDEF_TYPE_FLOAT16_ID = 44,
+
       /// \brief OpenCL image types with auto numeration
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
       PREDEF_TYPE_##Id##_ID,
@@ -853,94 +958,139 @@
     enum TypeCode {
       /// \brief An ExtQualType record.
       TYPE_EXT_QUAL                 = 1,
+
       /// \brief A ComplexType record.
       TYPE_COMPLEX                  = 3,
+
       /// \brief A PointerType record.
       TYPE_POINTER                  = 4,
+
       /// \brief A BlockPointerType record.
       TYPE_BLOCK_POINTER            = 5,
+
       /// \brief An LValueReferenceType record.
       TYPE_LVALUE_REFERENCE         = 6,
+
       /// \brief An RValueReferenceType record.
       TYPE_RVALUE_REFERENCE         = 7,
+
       /// \brief A MemberPointerType record.
       TYPE_MEMBER_POINTER           = 8,
+
       /// \brief A ConstantArrayType record.
       TYPE_CONSTANT_ARRAY           = 9,
+
       /// \brief An IncompleteArrayType record.
       TYPE_INCOMPLETE_ARRAY         = 10,
+
       /// \brief A VariableArrayType record.
       TYPE_VARIABLE_ARRAY           = 11,
+
       /// \brief A VectorType record.
       TYPE_VECTOR                   = 12,
+
       /// \brief An ExtVectorType record.
       TYPE_EXT_VECTOR               = 13,
+
       /// \brief A FunctionNoProtoType record.
       TYPE_FUNCTION_NO_PROTO        = 14,
+
       /// \brief A FunctionProtoType record.
       TYPE_FUNCTION_PROTO           = 15,
+
       /// \brief A TypedefType record.
       TYPE_TYPEDEF                  = 16,
+
       /// \brief A TypeOfExprType record.
       TYPE_TYPEOF_EXPR              = 17,
+
       /// \brief A TypeOfType record.
       TYPE_TYPEOF                   = 18,
+
       /// \brief A RecordType record.
       TYPE_RECORD                   = 19,
+
       /// \brief An EnumType record.
       TYPE_ENUM                     = 20,
+
       /// \brief An ObjCInterfaceType record.
       TYPE_OBJC_INTERFACE           = 21,
+
       /// \brief An ObjCObjectPointerType record.
       TYPE_OBJC_OBJECT_POINTER      = 22,
+
       /// \brief a DecltypeType record.
       TYPE_DECLTYPE                 = 23,
+
       /// \brief An ElaboratedType record.
       TYPE_ELABORATED               = 24,
+
       /// \brief A SubstTemplateTypeParmType record.
       TYPE_SUBST_TEMPLATE_TYPE_PARM = 25,
+
       /// \brief An UnresolvedUsingType record.
       TYPE_UNRESOLVED_USING         = 26,
+
       /// \brief An InjectedClassNameType record.
       TYPE_INJECTED_CLASS_NAME      = 27,
+
       /// \brief An ObjCObjectType record.
       TYPE_OBJC_OBJECT              = 28,
+
       /// \brief An TemplateTypeParmType record.
       TYPE_TEMPLATE_TYPE_PARM       = 29,
+
       /// \brief An TemplateSpecializationType record.
       TYPE_TEMPLATE_SPECIALIZATION  = 30,
+
       /// \brief A DependentNameType record.
       TYPE_DEPENDENT_NAME           = 31,
+
       /// \brief A DependentTemplateSpecializationType record.
       TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION = 32,
+
       /// \brief A DependentSizedArrayType record.
       TYPE_DEPENDENT_SIZED_ARRAY    = 33,
+
       /// \brief A ParenType record.
       TYPE_PAREN                    = 34,
+
       /// \brief A PackExpansionType record.
       TYPE_PACK_EXPANSION           = 35,
+
       /// \brief An AttributedType record.
       TYPE_ATTRIBUTED               = 36,
+
       /// \brief A SubstTemplateTypeParmPackType record.
       TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK = 37,
+
       /// \brief A AutoType record.
       TYPE_AUTO                  = 38,
+
       /// \brief A UnaryTransformType record.
       TYPE_UNARY_TRANSFORM       = 39,
+
       /// \brief An AtomicType record.
       TYPE_ATOMIC                = 40,
+
       /// \brief A DecayedType record.
       TYPE_DECAYED               = 41,
+
       /// \brief An AdjustedType record.
       TYPE_ADJUSTED              = 42,
+
       /// \brief A PipeType record.
       TYPE_PIPE                  = 43,
+
       /// \brief An ObjCTypeParamType record.
       TYPE_OBJC_TYPE_PARAM       = 44,
+
       /// \brief A DeducedTemplateSpecializationType record.
       TYPE_DEDUCED_TEMPLATE_SPECIALIZATION = 45,
+
       /// \brief A DependentSizedExtVectorType record.
       TYPE_DEPENDENT_SIZED_EXT_VECTOR = 46,
+
       /// \brief A DependentAddressSpaceType record.
       TYPE_DEPENDENT_ADDRESS_SPACE = 47
     };
@@ -953,18 +1103,25 @@
     enum SpecialTypeIDs {
       /// \brief CFConstantString type
       SPECIAL_TYPE_CF_CONSTANT_STRING          = 0,
+
       /// \brief C FILE typedef type
       SPECIAL_TYPE_FILE                        = 1,
+
       /// \brief C jmp_buf typedef type
       SPECIAL_TYPE_JMP_BUF                     = 2,
+
       /// \brief C sigjmp_buf typedef type
       SPECIAL_TYPE_SIGJMP_BUF                  = 3,
+
       /// \brief Objective-C "id" redefinition type
       SPECIAL_TYPE_OBJC_ID_REDEFINITION        = 4,
+
       /// \brief Objective-C "Class" redefinition type
       SPECIAL_TYPE_OBJC_CLASS_REDEFINITION     = 5,
+
       /// \brief Objective-C "SEL" redefinition type
       SPECIAL_TYPE_OBJC_SEL_REDEFINITION       = 6,
+
       /// \brief C ucontext_t typedef type
       SPECIAL_TYPE_UCONTEXT_T                  = 7
     };
@@ -1055,57 +1212,84 @@
       /// \brief A TypedefDecl record.
       DECL_TYPEDEF = 51,
       /// \brief A TypeAliasDecl record.
+
       DECL_TYPEALIAS,
+
       /// \brief An EnumDecl record.
       DECL_ENUM,
+
       /// \brief A RecordDecl record.
       DECL_RECORD,
+
       /// \brief An EnumConstantDecl record.
       DECL_ENUM_CONSTANT,
+
       /// \brief A FunctionDecl record.
       DECL_FUNCTION,
+
       /// \brief A ObjCMethodDecl record.
       DECL_OBJC_METHOD,
+
       /// \brief A ObjCInterfaceDecl record.
       DECL_OBJC_INTERFACE,
+
       /// \brief A ObjCProtocolDecl record.
       DECL_OBJC_PROTOCOL,
+
       /// \brief A ObjCIvarDecl record.
       DECL_OBJC_IVAR,
+
       /// \brief A ObjCAtDefsFieldDecl record.
       DECL_OBJC_AT_DEFS_FIELD,
+
       /// \brief A ObjCCategoryDecl record.
       DECL_OBJC_CATEGORY,
+
       /// \brief A ObjCCategoryImplDecl record.
       DECL_OBJC_CATEGORY_IMPL,
+
       /// \brief A ObjCImplementationDecl record.
       DECL_OBJC_IMPLEMENTATION,
+
       /// \brief A ObjCCompatibleAliasDecl record.
       DECL_OBJC_COMPATIBLE_ALIAS,
+
       /// \brief A ObjCPropertyDecl record.
       DECL_OBJC_PROPERTY,
+
       /// \brief A ObjCPropertyImplDecl record.
       DECL_OBJC_PROPERTY_IMPL,
+
       /// \brief A FieldDecl record.
       DECL_FIELD,
+
       /// \brief A MSPropertyDecl record.
       DECL_MS_PROPERTY,
+
       /// \brief A VarDecl record.
       DECL_VAR,
+
       /// \brief An ImplicitParamDecl record.
       DECL_IMPLICIT_PARAM,
+
       /// \brief A ParmVarDecl record.
       DECL_PARM_VAR,
+
       /// \brief A DecompositionDecl record.
       DECL_DECOMPOSITION,
+
       /// \brief A BindingDecl record.
       DECL_BINDING,
+
       /// \brief A FileScopeAsmDecl record.
       DECL_FILE_SCOPE_ASM,
+
       /// \brief A BlockDecl record.
       DECL_BLOCK,
+
       /// \brief A CapturedDecl record.
       DECL_CAPTURED,
+
       /// \brief A record that stores the set of declarations that are
       /// lexically stored within a given DeclContext.
       ///
@@ -1115,6 +1299,7 @@
       /// the contents of a DeclContext, e.g., via
       /// DeclContext::decls_begin() and DeclContext::decls_end().
       DECL_CONTEXT_LEXICAL,
+
       /// \brief A record that stores the set of declarations that are
       /// visible from a given DeclContext.
       ///
@@ -1123,104 +1308,151 @@
       /// IDs. This data is used when performing qualified name lookup
       /// into a DeclContext via DeclContext::lookup.
       DECL_CONTEXT_VISIBLE,
+
       /// \brief A LabelDecl record.
       DECL_LABEL,
+
       /// \brief A NamespaceDecl record.
       DECL_NAMESPACE,
+
       /// \brief A NamespaceAliasDecl record.
       DECL_NAMESPACE_ALIAS,
+
       /// \brief A UsingDecl record.
       DECL_USING,
+
       /// \brief A UsingPackDecl record.
       DECL_USING_PACK,
+
       /// \brief A UsingShadowDecl record.
       DECL_USING_SHADOW,
+
       /// \brief A ConstructorUsingShadowDecl record.
       DECL_CONSTRUCTOR_USING_SHADOW,
+
       /// \brief A UsingDirecitveDecl record.
       DECL_USING_DIRECTIVE,
+
       /// \brief An UnresolvedUsingValueDecl record.
       DECL_UNRESOLVED_USING_VALUE,
+
       /// \brief An UnresolvedUsingTypenameDecl record.
       DECL_UNRESOLVED_USING_TYPENAME,
+
       /// \brief A LinkageSpecDecl record.
       DECL_LINKAGE_SPEC,
+
       /// \brief An ExportDecl record.
       DECL_EXPORT,
+
       /// \brief A CXXRecordDecl record.
       DECL_CXX_RECORD,
+
       /// \brief A CXXDeductionGuideDecl record.
       DECL_CXX_DEDUCTION_GUIDE,
+
       /// \brief A CXXMethodDecl record.
       DECL_CXX_METHOD,
+
       /// \brief A CXXConstructorDecl record.
       DECL_CXX_CONSTRUCTOR,
+
       /// \brief A CXXConstructorDecl record for an inherited constructor.
       DECL_CXX_INHERITED_CONSTRUCTOR,
+
       /// \brief A CXXDestructorDecl record.
       DECL_CXX_DESTRUCTOR,
+
       /// \brief A CXXConversionDecl record.
       DECL_CXX_CONVERSION,
+
       /// \brief An AccessSpecDecl record.
       DECL_ACCESS_SPEC,
 
       /// \brief A FriendDecl record.
       DECL_FRIEND,
+
       /// \brief A FriendTemplateDecl record.
       DECL_FRIEND_TEMPLATE,
+
       /// \brief A ClassTemplateDecl record.
       DECL_CLASS_TEMPLATE,
+
       /// \brief A ClassTemplateSpecializationDecl record.
       DECL_CLASS_TEMPLATE_SPECIALIZATION,
+
       /// \brief A ClassTemplatePartialSpecializationDecl record.
       DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION,
+
       /// \brief A VarTemplateDecl record.
       DECL_VAR_TEMPLATE,
+
       /// \brief A VarTemplateSpecializationDecl record.
       DECL_VAR_TEMPLATE_SPECIALIZATION,
+
       /// \brief A VarTemplatePartialSpecializationDecl record.
       DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION,
+
       /// \brief A FunctionTemplateDecl record.
       DECL_FUNCTION_TEMPLATE,
+
       /// \brief A TemplateTypeParmDecl record.
       DECL_TEMPLATE_TYPE_PARM,
+
       /// \brief A NonTypeTemplateParmDecl record.
       DECL_NON_TYPE_TEMPLATE_PARM,
+
       /// \brief A TemplateTemplateParmDecl record.
       DECL_TEMPLATE_TEMPLATE_PARM,
+
       /// \brief A TypeAliasTemplateDecl record.
       DECL_TYPE_ALIAS_TEMPLATE,
+
       /// \brief A StaticAssertDecl record.
       DECL_STATIC_ASSERT,
+
       /// \brief A record containing CXXBaseSpecifiers.
       DECL_CXX_BASE_SPECIFIERS,
+
       /// \brief A record containing CXXCtorInitializers.
       DECL_CXX_CTOR_INITIALIZERS,
+
       /// \brief A IndirectFieldDecl record.
       DECL_INDIRECTFIELD,
+
       /// \brief A NonTypeTemplateParmDecl record that stores an expanded
       /// non-type template parameter pack.
       DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK,
+
       /// \brief A TemplateTemplateParmDecl record that stores an expanded
       /// template template parameter pack.
       DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK,
+
       /// \brief A ClassScopeFunctionSpecializationDecl record a class scope
       /// function specialization. (Microsoft extension).
       DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION,
+
       /// \brief An ImportDecl recording a module import.
       DECL_IMPORT,
+
       /// \brief An OMPThreadPrivateDecl record.
       DECL_OMP_THREADPRIVATE,
+
       /// \brief An EmptyDecl record.
       DECL_EMPTY,
+
       /// \brief An ObjCTypeParamDecl record.
       DECL_OBJC_TYPE_PARAM,
+
       /// \brief An OMPCapturedExprDecl record.
       DECL_OMP_CAPTUREDEXPR,
+
       /// \brief A PragmaCommentDecl record.
       DECL_PRAGMA_COMMENT,
+
       /// \brief A PragmaDetectMismatchDecl record.
       DECL_PRAGMA_DETECT_MISMATCH,
+
       /// \brief An OMPDeclareReductionDecl record.
       DECL_OMP_DECLARE_REDUCTION,
     };
@@ -1236,128 +1468,190 @@
       /// \brief A marker record that indicates that we are at the end
       /// of an expression.
       STMT_STOP = 128,
+
       /// \brief A NULL expression.
       STMT_NULL_PTR,
+
       /// \brief A reference to a previously [de]serialized Stmt record.
       STMT_REF_PTR,
+
       /// \brief A NullStmt record.
       STMT_NULL,
+
       /// \brief A CompoundStmt record.
       STMT_COMPOUND,
+
       /// \brief A CaseStmt record.
       STMT_CASE,
+
       /// \brief A DefaultStmt record.
       STMT_DEFAULT,
+
       /// \brief A LabelStmt record.
       STMT_LABEL,
+
       /// \brief An AttributedStmt record.
       STMT_ATTRIBUTED,
+
       /// \brief An IfStmt record.
       STMT_IF,
+
       /// \brief A SwitchStmt record.
       STMT_SWITCH,
+
       /// \brief A WhileStmt record.
       STMT_WHILE,
+
       /// \brief A DoStmt record.
       STMT_DO,
+
       /// \brief A ForStmt record.
       STMT_FOR,
+
       /// \brief A GotoStmt record.
       STMT_GOTO,
+
       /// \brief An IndirectGotoStmt record.
       STMT_INDIRECT_GOTO,
+
       /// \brief A ContinueStmt record.
       STMT_CONTINUE,
+
       /// \brief A BreakStmt record.
       STMT_BREAK,
+
       /// \brief A ReturnStmt record.
       STMT_RETURN,
+
       /// \brief A DeclStmt record.
       STMT_DECL,
+
       /// \brief A CapturedStmt record.
       STMT_CAPTURED,
+
       /// \brief A GCC-style AsmStmt record.
       STMT_GCCASM,
+
       /// \brief A MS-style AsmStmt record.
       STMT_MSASM,
+
       /// \brief A PredefinedExpr record.
       EXPR_PREDEFINED,
+
       /// \brief A DeclRefExpr record.
       EXPR_DECL_REF,
+
       /// \brief An IntegerLiteral record.
       EXPR_INTEGER_LITERAL,
+
       /// \brief A FloatingLiteral record.
       EXPR_FLOATING_LITERAL,
+
       /// \brief An ImaginaryLiteral record.
       EXPR_IMAGINARY_LITERAL,
+
       /// \brief A StringLiteral record.
       EXPR_STRING_LITERAL,
+
       /// \brief A CharacterLiteral record.
       EXPR_CHARACTER_LITERAL,
+
       /// \brief A ParenExpr record.
       EXPR_PAREN,
+
       /// \brief A ParenListExpr record.
       EXPR_PAREN_LIST,
+
       /// \brief A UnaryOperator record.
       EXPR_UNARY_OPERATOR,
+
       /// \brief An OffsetOfExpr record.
       EXPR_OFFSETOF,
+
       /// \brief A SizefAlignOfExpr record.
       EXPR_SIZEOF_ALIGN_OF,
+
       /// \brief An ArraySubscriptExpr record.
       EXPR_ARRAY_SUBSCRIPT,
+
       /// \brief A CallExpr record.
       EXPR_CALL,
+
       /// \brief A MemberExpr record.
       EXPR_MEMBER,
+
       /// \brief A BinaryOperator record.
       EXPR_BINARY_OPERATOR,
+
       /// \brief A CompoundAssignOperator record.
       EXPR_COMPOUND_ASSIGN_OPERATOR,
+
       /// \brief A ConditionOperator record.
       EXPR_CONDITIONAL_OPERATOR,
+
       /// \brief An ImplicitCastExpr record.
       EXPR_IMPLICIT_CAST,
+
       /// \brief A CStyleCastExpr record.
       EXPR_CSTYLE_CAST,
+
       /// \brief A CompoundLiteralExpr record.
       EXPR_COMPOUND_LITERAL,
+
       /// \brief An ExtVectorElementExpr record.
       EXPR_EXT_VECTOR_ELEMENT,
+
       /// \brief An InitListExpr record.
       EXPR_INIT_LIST,
+
       /// \brief A DesignatedInitExpr record.
       EXPR_DESIGNATED_INIT,
+
       /// \brief A DesignatedInitUpdateExpr record.
       EXPR_DESIGNATED_INIT_UPDATE,
+
       /// \brief An NoInitExpr record.
       EXPR_NO_INIT,
+
       /// \brief An ArrayInitLoopExpr record.
       EXPR_ARRAY_INIT_LOOP,
+
       /// \brief An ArrayInitIndexExpr record.
       EXPR_ARRAY_INIT_INDEX,
+
       /// \brief An ImplicitValueInitExpr record.
       EXPR_IMPLICIT_VALUE_INIT,
+
       /// \brief A VAArgExpr record.
       EXPR_VA_ARG,
+
       /// \brief An AddrLabelExpr record.
       EXPR_ADDR_LABEL,
+
       /// \brief A StmtExpr record.
       EXPR_STMT,
+
       /// \brief A ChooseExpr record.
       EXPR_CHOOSE,
+
       /// \brief A GNUNullExpr record.
       EXPR_GNU_NULL,
+
       /// \brief A ShuffleVectorExpr record.
       EXPR_SHUFFLE_VECTOR,
+
       /// \brief A ConvertVectorExpr record.
       EXPR_CONVERT_VECTOR,
+
       /// \brief BlockExpr
       EXPR_BLOCK,
+
       /// \brief A GenericSelectionExpr record.
       EXPR_GENERIC_SELECTION,
+
       /// \brief A PseudoObjectExpr record.
       EXPR_PSEUDO_OBJECT,
+
       /// \brief An AtomicExpr record.
       EXPR_ATOMIC,
 
@@ -1369,45 +1663,61 @@
       EXPR_OBJC_BOXED_EXPRESSION,
       EXPR_OBJC_ARRAY_LITERAL,
       EXPR_OBJC_DICTIONARY_LITERAL,
-
-    
+   
       /// \brief An ObjCEncodeExpr record.
       EXPR_OBJC_ENCODE,
+
       /// \brief An ObjCSelectorExpr record.
       EXPR_OBJC_SELECTOR_EXPR,
+
       /// \brief An ObjCProtocolExpr record.
       EXPR_OBJC_PROTOCOL_EXPR,
+
       /// \brief An ObjCIvarRefExpr record.
       EXPR_OBJC_IVAR_REF_EXPR,
+
       /// \brief An ObjCPropertyRefExpr record.
       EXPR_OBJC_PROPERTY_REF_EXPR,
+
       /// \brief An ObjCSubscriptRefExpr record.
       EXPR_OBJC_SUBSCRIPT_REF_EXPR,
+
       /// \brief UNUSED
       EXPR_OBJC_KVC_REF_EXPR,
+
       /// \brief An ObjCMessageExpr record.
       EXPR_OBJC_MESSAGE_EXPR,
+
       /// \brief An ObjCIsa Expr record.
       EXPR_OBJC_ISA,
+
       /// \brief An ObjCIndirectCopyRestoreExpr record.
       EXPR_OBJC_INDIRECT_COPY_RESTORE,
 
       /// \brief An ObjCForCollectionStmt record.
       STMT_OBJC_FOR_COLLECTION,
+
       /// \brief An ObjCAtCatchStmt record.
       STMT_OBJC_CATCH,
+
       /// \brief An ObjCAtFinallyStmt record.
       STMT_OBJC_FINALLY,
+
       /// \brief An ObjCAtTryStmt record.
       STMT_OBJC_AT_TRY,
+
       /// \brief An ObjCAtSynchronizedStmt record.
       STMT_OBJC_AT_SYNCHRONIZED,
+
       /// \brief An ObjCAtThrowStmt record.
       STMT_OBJC_AT_THROW,
+
       /// \brief An ObjCAutoreleasePoolStmt record.
       STMT_OBJC_AUTORELEASE_POOL,
+
       /// \brief An ObjCBoolLiteralExpr record.
       EXPR_OBJC_BOOL_LITERAL,
+
       /// \brief An ObjCAvailabilityCheckExpr record.
       EXPR_OBJC_AVAILABILITY_CHECK,
 
@@ -1415,37 +1725,52 @@
       
       /// \brief A CXXCatchStmt record.
       STMT_CXX_CATCH,
+
       /// \brief A CXXTryStmt record.
       STMT_CXX_TRY,
       /// \brief A CXXForRangeStmt record.
+
       STMT_CXX_FOR_RANGE,
 
       /// \brief A CXXOperatorCallExpr record.
       EXPR_CXX_OPERATOR_CALL,
+
       /// \brief A CXXMemberCallExpr record.
       EXPR_CXX_MEMBER_CALL,
+
       /// \brief A CXXConstructExpr record.
       EXPR_CXX_CONSTRUCT,
+
       /// \brief A CXXInheritedCtorInitExpr record.
       EXPR_CXX_INHERITED_CTOR_INIT,
+
       /// \brief A CXXTemporaryObjectExpr record.
       EXPR_CXX_TEMPORARY_OBJECT,
+
       /// \brief A CXXStaticCastExpr record.
       EXPR_CXX_STATIC_CAST,
+
       /// \brief A CXXDynamicCastExpr record.
       EXPR_CXX_DYNAMIC_CAST,
+
       /// \brief A CXXReinterpretCastExpr record.
       EXPR_CXX_REINTERPRET_CAST,
+
       /// \brief A CXXConstCastExpr record.
       EXPR_CXX_CONST_CAST,
+
       /// \brief A CXXFunctionalCastExpr record.
       EXPR_CXX_FUNCTIONAL_CAST,
+
       /// \brief A UserDefinedLiteral record.
       EXPR_USER_DEFINED_LITERAL,
+
       /// \brief A CXXStdInitializerListExpr record.
       EXPR_CXX_STD_INITIALIZER_LIST,
+
       /// \brief A CXXBoolLiteralExpr record.
       EXPR_CXX_BOOL_LITERAL,
+
       EXPR_CXX_NULL_PTR_LITERAL,  // CXXNullPtrLiteralExpr
       EXPR_CXX_TYPEID_EXPR,       // CXXTypeidExpr (of expr).
       EXPR_CXX_TYPEID_TYPE,       // CXXTypeidExpr (of type).
@@ -1567,11 +1892,14 @@
     enum DesignatorTypes {
       /// \brief Field designator where only the field name is known.
       DESIG_FIELD_NAME  = 0,
+
       /// \brief Field designator where the field has been resolved to
       /// a declaration.
       DESIG_FIELD_DECL  = 1,
+
       /// \brief Array designator.
       DESIG_ARRAY       = 2,
+
       /// \brief GNU array range designator.
       DESIG_ARRAY_RANGE = 3
     };
@@ -1587,8 +1915,11 @@
 
     /// \brief Describes the redeclarations of a declaration.
     struct LocalRedeclarationsInfo {
-      DeclID FirstID;      // The ID of the first declaration
-      unsigned Offset;     // Offset into the array of redeclaration chains.
+      // The ID of the first declaration
+      DeclID FirstID;
+
+      // Offset into the array of redeclaration chains.
+      unsigned Offset;
       
       friend bool operator<(const LocalRedeclarationsInfo &X,
                             const LocalRedeclarationsInfo &Y) {
@@ -1613,8 +1944,11 @@
 
     /// \brief Describes the categories of an Objective-C class.
     struct ObjCCategoriesInfo {
-      DeclID DefinitionID; // The ID of the definition
-      unsigned Offset;     // Offset into the array of category lists.
+      // The ID of the definition
+      DeclID DefinitionID;
+
+      // Offset into the array of category lists.
+      unsigned Offset;
       
       friend bool operator<(const ObjCCategoriesInfo &X,
                             const ObjCCategoriesInfo &Y) {
@@ -1643,15 +1977,14 @@
     /// same key can occasionally represent multiple names (for names that
     /// contain types, in particular).
     class DeclarationNameKey {
-      typedef unsigned NameKind;
+      using NameKind = unsigned;
 
-      NameKind Kind;
-      uint64_t Data;
+      NameKind Kind = 0;
+      uint64_t Data = 0;
 
     public:
-      DeclarationNameKey() : Kind(), Data() {}
+      DeclarationNameKey() = default;
       DeclarationNameKey(DeclarationName Name);
-
       DeclarationNameKey(NameKind Kind, uint64_t Data)
           : Kind(Kind), Data(Data) {}
 
@@ -1663,12 +1996,14 @@
                Kind == DeclarationName::CXXDeductionGuideName);
         return (IdentifierInfo *)Data;
       }
+
       Selector getSelector() const {
         assert(Kind == DeclarationName::ObjCZeroArgSelector ||
                Kind == DeclarationName::ObjCOneArgSelector ||
                Kind == DeclarationName::ObjCMultiArgSelector);
         return Selector(Data);
       }
+
       OverloadedOperatorKind getOperatorKind() const {
         assert(Kind == DeclarationName::CXXOperatorName);
         return (OverloadedOperatorKind)Data;
@@ -1684,26 +2019,32 @@
     };
 
     /// @}
-  }
-} // end namespace clang
+
+} // namespace serialization
+} // namespace clang
 
 namespace llvm {
+
   template <> struct DenseMapInfo<clang::serialization::DeclarationNameKey> {
     static clang::serialization::DeclarationNameKey getEmptyKey() {
       return clang::serialization::DeclarationNameKey(-1, 1);
     }
+
     static clang::serialization::DeclarationNameKey getTombstoneKey() {
       return clang::serialization::DeclarationNameKey(-1, 2);
     }
+
     static unsigned
     getHashValue(const clang::serialization::DeclarationNameKey &Key) {
       return Key.getHash();
     }
+
     static bool isEqual(const clang::serialization::DeclarationNameKey &L,
                         const clang::serialization::DeclarationNameKey &R) {
       return L == R;
     }
   };
-}
 
-#endif
+} // namespace llvm
+
+#endif // LLVM_CLANG_SERIALIZATION_ASTBITCODES_H
diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h
index 9c5ad00..01cd055 100644
--- a/include/clang/Serialization/ASTReader.h
+++ b/include/clang/Serialization/ASTReader.h
@@ -1,4 +1,4 @@
-//===--- ASTReader.h - AST File Reader --------------------------*- C++ -*-===//
+//===- ASTReader.h - AST File Reader ----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,16 +14,25 @@
 #ifndef LLVM_CLANG_SERIALIZATION_ASTREADER_H
 #define LLVM_CLANG_SERIALIZATION_ASTREADER_H
 
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/OpenCLOptions.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Version.h"
+#include "clang/Basic/VersionTuple.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Lex/Token.h"
 #include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/IdentifierResolver.h"
 #include "clang/Serialization/ASTBitCodes.h"
@@ -31,70 +40,82 @@
 #include "clang/Serialization/Module.h"
 #include "clang/Serialization/ModuleFileExtension.h"
 #include "clang/Serialization/ModuleManager.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Timer.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <ctime>
 #include <deque>
 #include <memory>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
 
-namespace llvm {
-  class BitstreamCursor;
-  class MemoryBuffer;
-  class APInt;
-  class APSInt;
-  class APFloat;
-}
-
 namespace clang {
 
-class SourceManager;
-class HeaderSearchOptions;
-class FileManager;
-class AddrLabelExpr;
 class ASTConsumer;
 class ASTContext;
-class ASTIdentifierIterator;
-class ASTUnit; // FIXME: Layering violation and egregious hack.
-class Attr;
+class ASTDeserializationListener;
+class ASTReader;
+class ASTRecordReader;
+class CXXTemporary;
 class Decl;
+class DeclaratorDecl;
 class DeclContext;
-class DefMacroDirective;
-class DiagnosticOptions;
-class NestedNameSpecifier;
-class CXXBaseSpecifier;
-class CXXConstructorDecl;
-class CXXCtorInitializer;
+class EnumDecl;
+class Expr;
+class FieldDecl;
+class FileEntry;
+class FileManager;
+class FileSystemOptions;
+class FunctionDecl;
 class GlobalModuleIndex;
-class GotoStmt;
-class MacroDefinition;
-class MacroDirective;
-class ModuleMacro;
+struct HeaderFileInfo;
+class HeaderSearchOptions;
+class LangOptions;
+class LazyASTUnresolvedSet;
+class MacroInfo;
+class MemoryBufferCache;
 class NamedDecl;
-class OpaqueValueExpr;
+class NamespaceDecl;
+class ObjCCategoryDecl;
+class ObjCInterfaceDecl;
+class PCHContainerReader;
 class Preprocessor;
 class PreprocessorOptions;
+struct QualifierInfo;
 class Sema;
+class SourceManager;
+class Stmt;
 class SwitchCase;
-class ASTDeserializationListener;
-class ASTWriter;
-class ASTReader;
-class ASTDeclReader;
-class ASTStmtReader;
-class ASTRecordReader;
-class TypeLocReader;
-struct HeaderFileInfo;
-class VersionTuple;
 class TargetOptions;
-class LazyASTUnresolvedSet;
+class TemplateParameterList;
+class TypedefNameDecl;
+class TypeSourceInfo;
+class ValueDecl;
+class VarDecl;
 
 /// \brief Abstract interface for callback invocations by the ASTReader.
 ///
@@ -189,9 +210,11 @@
   /// \brief Returns true if this \c ASTReaderListener wants to receive the
   /// input files of the AST file via \c visitInputFile, false otherwise.
   virtual bool needsInputFileVisitation() { return false; }
+
   /// \brief Returns true if this \c ASTReaderListener wants to receive the
   /// system input files of the AST file via \c visitInputFile, false otherwise.
   virtual bool needsSystemInputFileVisitation() { return false; }
+
   /// \brief if \c needsInputFileVisitation returns true, this is called for
   /// each non-system input file of the AST File. If
   /// \c needsSystemInputFileVisitation is true, then it is called for all
@@ -206,6 +229,7 @@
   /// \brief Returns true if this \c ASTReaderListener wants to receive the
   /// imports of the AST file via \c visitImport, false otherwise.
   virtual bool needsImportVisitation() const { return false; }
+
   /// \brief If needsImportVisitation returns \c true, this is called for each
   /// AST file imported by this AST file.
   virtual void visitImport(StringRef Filename) {}
@@ -267,7 +291,7 @@
 
 public:
   PCHValidator(Preprocessor &PP, ASTReader &Reader)
-    : PP(PP), Reader(Reader) {}
+      : PP(PP), Reader(Reader) {}
 
   bool ReadLanguageOptions(const LangOptions &LangOpts, bool Complain,
                            bool AllowCompatibleDifferences) override;
@@ -294,8 +318,7 @@
   Preprocessor &PP;
 
 public:
-  SimpleASTReaderListener(Preprocessor &PP)
-    : PP(PP) {}
+  SimpleASTReaderListener(Preprocessor &PP) : PP(PP) {}
 
   bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts, bool Complain,
                                std::string &SuggestedPredefines) override;
@@ -306,12 +329,15 @@
 class ReadMethodPoolVisitor;
 
 namespace reader {
-  class ASTIdentifierLookupTrait;
-  /// \brief The on-disk hash table(s) used for DeclContext name lookup.
-  struct DeclContextLookupTable;
-}
 
-} // end namespace serialization
+class ASTIdentifierLookupTrait;
+
+/// \brief The on-disk hash table(s) used for DeclContext name lookup.
+struct DeclContextLookupTable;
+
+} // namespace reader
+
+} // namespace serialization
 
 /// \brief Reads an AST files chain containing the contents of a translation
 /// unit.
@@ -334,8 +360,20 @@
     public ExternalSLocEntrySource
 {
 public:
-  typedef SmallVector<uint64_t, 64> RecordData;
-  typedef SmallVectorImpl<uint64_t> RecordDataImpl;
+  /// \brief Types of AST files.
+  friend class ASTDeclReader;
+  friend class ASTIdentifierIterator;
+  friend class ASTRecordReader;
+  friend class ASTStmtReader;
+  friend class ASTUnit; // ASTUnit needs to remap source locations.
+  friend class ASTWriter;
+  friend class PCHValidator;
+  friend class serialization::reader::ASTIdentifierLookupTrait;
+  friend class serialization::ReadMethodPoolVisitor;
+  friend class TypeLocReader;
+
+  using RecordData = SmallVector<uint64_t, 64>;
+  using RecordDataImpl = SmallVectorImpl<uint64_t>;
 
   /// \brief The result of reading the control block of an AST file, which
   /// can fail for various reasons.
@@ -343,41 +381,34 @@
     /// \brief The control block was read successfully. Aside from failures,
     /// the AST file is safe to read into the current context.
     Success,
+
     /// \brief The AST file itself appears corrupted.
     Failure,
+
     /// \brief The AST file was missing.
     Missing,
+
     /// \brief The AST file is out-of-date relative to its input files,
     /// and needs to be regenerated.
     OutOfDate,
+
     /// \brief The AST file was written by a different version of Clang.
     VersionMismatch,
+
     /// \brief The AST file was writtten with a different language/target
     /// configuration.
     ConfigurationMismatch,
+
     /// \brief The AST file has errors.
     HadErrors
   };
 
-  /// \brief Types of AST files.
-  friend class PCHValidator;
-  friend class ASTDeclReader;
-  friend class ASTStmtReader;
-  friend class ASTIdentifierIterator;
-  friend class serialization::reader::ASTIdentifierLookupTrait;
-  friend class TypeLocReader;
-  friend class ASTRecordReader;
-  friend class ASTWriter;
-  friend class ASTUnit; // ASTUnit needs to remap source locations.
-  friend class serialization::ReadMethodPoolVisitor;
-
-  typedef serialization::ModuleFile ModuleFile;
-  typedef serialization::ModuleKind ModuleKind;
-  typedef serialization::ModuleManager ModuleManager;
-
-  typedef ModuleManager::ModuleIterator ModuleIterator;
-  typedef ModuleManager::ModuleConstIterator ModuleConstIterator;
-  typedef ModuleManager::ModuleReverseIterator ModuleReverseIterator;
+  using ModuleFile = serialization::ModuleFile;
+  using ModuleKind = serialization::ModuleKind;
+  using ModuleManager = serialization::ModuleManager;
+  using ModuleIterator = ModuleManager::ModuleIterator;
+  using ModuleConstIterator = ModuleManager::ModuleConstIterator;
+  using ModuleReverseIterator = ModuleManager::ModuleReverseIterator;
 
 private:
   /// \brief The receiver of some callbacks invoked by ASTReader.
@@ -385,6 +416,7 @@
 
   /// \brief The receiver of deserialization events.
   ASTDeserializationListener *DeserializationListener = nullptr;
+
   bool OwnsDeserializationListener = false;
 
   SourceManager &SourceMgr;
@@ -436,7 +468,8 @@
   /// \brief A map of negated SLocEntryIDs to the modules containing them.
   ContinuousRangeMap<unsigned, ModuleFile*, 64> GlobalSLocEntryMap;
 
-  typedef ContinuousRangeMap<unsigned, ModuleFile*, 64> GlobalSLocOffsetMapType;
+  using GlobalSLocOffsetMapType =
+      ContinuousRangeMap<unsigned, ModuleFile *, 64>;
 
   /// \brief A map of reversed (SourceManager::MaxLoadedOffset - SLocOffset)
   /// SourceLocation offsets to the modules containing them.
@@ -448,8 +481,8 @@
   /// ID = (I + 1) << FastQual::Width has already been loaded
   std::vector<QualType> TypesLoaded;
 
-  typedef ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>
-    GlobalTypeMapType;
+  using GlobalTypeMapType =
+      ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
 
   /// \brief Mapping from global type IDs to the module in which the
   /// type resides along with the offset that should be added to the
@@ -462,17 +495,17 @@
   /// = I + 1 has already been loaded.
   std::vector<Decl *> DeclsLoaded;
 
-  typedef ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>
-    GlobalDeclMapType;
+  using GlobalDeclMapType =
+      ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;
 
   /// \brief Mapping from global declaration IDs to the module in which the
   /// declaration resides.
   GlobalDeclMapType GlobalDeclMap;
 
-  typedef std::pair<ModuleFile *, uint64_t> FileOffset;
-  typedef SmallVector<FileOffset, 2> FileOffsetsTy;
-  typedef llvm::DenseMap<serialization::DeclID, FileOffsetsTy>
-      DeclUpdateOffsetsMap;
+  using FileOffset = std::pair<ModuleFile *, uint64_t>;
+  using FileOffsetsTy = SmallVector<FileOffset, 2>;
+  using DeclUpdateOffsetsMap =
+      llvm::DenseMap<serialization::DeclID, FileOffsetsTy>;
 
   /// \brief Declarations that have modifications residing in a later file
   /// in the chain.
@@ -481,12 +514,15 @@
   struct PendingUpdateRecord {
     Decl *D;
     serialization::GlobalDeclID ID;
+
     // Whether the declaration was just deserialized.
     bool JustLoaded;
+
     PendingUpdateRecord(serialization::GlobalDeclID ID, Decl *D,
                         bool JustLoaded)
         : D(D), ID(ID), JustLoaded(JustLoaded) {}
   };
+
   /// \brief Declaration updates for already-loaded declarations that we need
   /// to apply once we finish processing an import.
   llvm::SmallVector<PendingUpdateRecord, 16> PendingUpdateRecords;
@@ -505,7 +541,7 @@
 
   /// \brief Declarations that have been imported and have typedef names for
   /// linkage purposes.
-  llvm::DenseMap<std::pair<DeclContext*, IdentifierInfo*>, NamedDecl*>
+  llvm::DenseMap<std::pair<DeclContext *, IdentifierInfo *>, NamedDecl *>
       ImportedTypedefNamesForLinkage;
 
   /// \brief Mergeable declaration contexts that have anonymous declarations
@@ -514,12 +550,12 @@
     AnonymousDeclarationsForMerging;
 
   struct FileDeclsInfo {
-    ModuleFile *Mod;
+    ModuleFile *Mod = nullptr;
     ArrayRef<serialization::LocalDeclID> Decls;
 
-    FileDeclsInfo() : Mod(nullptr) {}
+    FileDeclsInfo() = default;
     FileDeclsInfo(ModuleFile *Mod, ArrayRef<serialization::LocalDeclID> Decls)
-      : Mod(Mod), Decls(Decls) {}
+        : Mod(Mod), Decls(Decls) {}
   };
 
   /// \brief Map from a FileID to the file-level declarations that it contains.
@@ -527,7 +563,7 @@
 
   /// \brief An array of lexical contents of a declaration context, as a sequence of
   /// Decl::Kind, DeclID pairs.
-  typedef ArrayRef<llvm::support::unaligned_uint32_t> LexicalContents;
+  using LexicalContents = ArrayRef<llvm::support::unaligned_uint32_t>;
 
   /// \brief Map from a DeclContext to its lexical contents.
   llvm::DenseMap<const DeclContext*, std::pair<ModuleFile*, LexicalContents>>
@@ -548,7 +584,7 @@
     ModuleFile *Mod;
     const unsigned char *Data;
   };
-  typedef SmallVector<PendingVisibleUpdate, 1> DeclContextVisibleUpdates;
+  using DeclContextVisibleUpdates = SmallVector<PendingVisibleUpdate, 1>;
 
   /// \brief Updates to the visible declarations of declaration contexts that
   /// haven't been loaded yet.
@@ -559,22 +595,23 @@
   /// declarations that have not yet been linked to their definitions.
   llvm::SmallPtrSet<Decl *, 4> PendingDefinitions;
 
-  typedef llvm::MapVector<Decl *, uint64_t,
-                          llvm::SmallDenseMap<Decl *, unsigned, 4>,
-                          SmallVector<std::pair<Decl *, uint64_t>, 4> >
-    PendingBodiesMap;
+  using PendingBodiesMap =
+      llvm::MapVector<Decl *, uint64_t,
+                      llvm::SmallDenseMap<Decl *, unsigned, 4>,
+                      SmallVector<std::pair<Decl *, uint64_t>, 4>>;
 
   /// \brief Functions or methods that have bodies that will be attached.
   PendingBodiesMap PendingBodies;
 
   /// \brief Definitions for which we have added merged definitions but not yet
   /// performed deduplication.
-  llvm::SetVector<NamedDecl*> PendingMergedDefinitionsToDeduplicate;
+  llvm::SetVector<NamedDecl *> PendingMergedDefinitionsToDeduplicate;
 
   /// \brief Read the record that describes the lexical contents of a DC.
   bool ReadLexicalDeclContextStorage(ModuleFile &M,
                                      llvm::BitstreamCursor &Cursor,
                                      uint64_t Offset, DeclContext *DC);
+
   /// \brief Read the record that describes the visible contents of a DC.
   bool ReadVisibleDeclContextStorage(ModuleFile &M,
                                      llvm::BitstreamCursor &Cursor,
@@ -588,8 +625,8 @@
   /// been loaded.
   std::vector<IdentifierInfo *> IdentifiersLoaded;
 
-  typedef ContinuousRangeMap<serialization::IdentID, ModuleFile *, 4>
-    GlobalIdentifierMapType;
+  using GlobalIdentifierMapType =
+      ContinuousRangeMap<serialization::IdentID, ModuleFile *, 4>;
 
   /// \brief Mapping from global identifier IDs to the module in which the
   /// identifier resides along with the offset that should be added to the
@@ -604,16 +641,16 @@
   /// been loaded.
   std::vector<MacroInfo *> MacrosLoaded;
 
-  typedef std::pair<IdentifierInfo *, serialization::SubmoduleID>
-      LoadedMacroInfo;
+  using LoadedMacroInfo =
+      std::pair<IdentifierInfo *, serialization::SubmoduleID>;
 
   /// \brief A set of #undef directives that we have loaded; used to
   /// deduplicate the same #undef information coming from multiple module
   /// files.
   llvm::DenseSet<LoadedMacroInfo> LoadedUndefs;
 
-  typedef ContinuousRangeMap<serialization::MacroID, ModuleFile *, 4>
-    GlobalMacroMapType;
+  using GlobalMacroMapType =
+      ContinuousRangeMap<serialization::MacroID, ModuleFile *, 4>;
 
   /// \brief Mapping from global macro IDs to the module in which the
   /// macro resides along with the offset that should be added to the
@@ -626,8 +663,8 @@
   /// indicate that the particular submodule ID has not yet been loaded.
   SmallVector<Module *, 2> SubmodulesLoaded;
   
-  typedef ContinuousRangeMap<serialization::SubmoduleID, ModuleFile *, 4>
-    GlobalSubmoduleMapType;
+  using GlobalSubmoduleMapType =
+      ContinuousRangeMap<serialization::SubmoduleID, ModuleFile *, 4>;
   
   /// \brief Mapping from global submodule IDs to the module file in which the
   /// submodule resides along with the offset that should be added to the
@@ -635,14 +672,13 @@
   GlobalSubmoduleMapType GlobalSubmoduleMap;
 
   /// \brief A set of hidden declarations.
-  typedef SmallVector<Decl*, 2> HiddenNames;
-  typedef llvm::DenseMap<Module *, HiddenNames> HiddenNamesMapType;
+  using HiddenNames = SmallVector<Decl *, 2>;
+  using HiddenNamesMapType = llvm::DenseMap<Module *, HiddenNames>;
 
   /// \brief A mapping from each of the hidden submodules to the deserialized
   /// declarations in that submodule that could be made visible.
   HiddenNamesMapType HiddenNamesMap;
   
-  
   /// \brief A module import, export, or conflict that hasn't yet been resolved.
   struct UnresolvedModuleRef {
     /// \brief The file in which this module resides.
@@ -675,11 +711,10 @@
   /// been loaded.
   SmallVector<Selector, 16> SelectorsLoaded;
 
-  typedef ContinuousRangeMap<serialization::SelectorID, ModuleFile *, 4>
-    GlobalSelectorMapType;
+  using GlobalSelectorMapType =
+      ContinuousRangeMap<serialization::SelectorID, ModuleFile *, 4>;
 
   /// \brief Mapping from global selector IDs to the module in which the
-
   /// global selector ID to produce a local ID.
   GlobalSelectorMapType GlobalSelectorMap;
 
@@ -699,21 +734,28 @@
         : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {}
   };
 
-  typedef llvm::MapVector<IdentifierInfo *, SmallVector<PendingMacroInfo, 2> >
-    PendingMacroIDsMap;
+  using PendingMacroIDsMap =
+      llvm::MapVector<IdentifierInfo *, SmallVector<PendingMacroInfo, 2>>;
 
   /// \brief Mapping from identifiers that have a macro history to the global
   /// IDs have not yet been deserialized to the global IDs of those macros.
   PendingMacroIDsMap PendingMacroIDs;
 
-  typedef ContinuousRangeMap<unsigned, ModuleFile *, 4>
-    GlobalPreprocessedEntityMapType;
+  using GlobalPreprocessedEntityMapType =
+      ContinuousRangeMap<unsigned, ModuleFile *, 4>;
 
   /// \brief Mapping from global preprocessing entity IDs to the module in
   /// which the preprocessed entity resides along with the offset that should be
   /// added to the global preprocessing entity ID to produce a local ID.
   GlobalPreprocessedEntityMapType GlobalPreprocessedEntityMap;
 
+  using GlobalSkippedRangeMapType =
+      ContinuousRangeMap<unsigned, ModuleFile *, 4>;
+
+  /// \brief Mapping from global skipped range base IDs to the module in which
+  /// the skipped ranges reside.
+  GlobalSkippedRangeMapType GlobalSkippedRangeMap;
+
   /// \name CodeGen-relevant special data
   /// \brief Fields containing data that is relevant to CodeGen.
   //@{
@@ -859,7 +901,7 @@
     SourceLocation ImportLoc;
 
     ImportedSubmodule(serialization::SubmoduleID ID, SourceLocation ImportLoc)
-      : ID(ID), ImportLoc(ImportLoc) {}
+        : ID(ID), ImportLoc(ImportLoc) {}
   };
 
 private:
@@ -895,7 +937,8 @@
   ///\brief Whether we are currently processing update records.
   bool ProcessingUpdateRecords = false;
 
-  typedef llvm::DenseMap<unsigned, SwitchCase *> SwitchCaseMapTy;
+  using SwitchCaseMapTy = llvm::DenseMap<unsigned, SwitchCase *>;
+
   /// \brief Mapping from switch-case IDs in the chain to switch-case statements
   ///
   /// Statements usually don't have IDs, but switch cases need them, so that the
@@ -979,7 +1022,7 @@
   ///
   /// The declarations on the identifier chain for these identifiers will be
   /// loaded once the recursive loading has completed.
-  llvm::MapVector<IdentifierInfo *, SmallVector<uint32_t, 4> >
+  llvm::MapVector<IdentifierInfo *, SmallVector<uint32_t, 4>>
     PendingIdentifierInfos;
 
   /// \brief The set of lookup results that we have faked in order to support
@@ -998,7 +1041,9 @@
   public:
     InterestingDecl(Decl *D, bool HasBody)
         : D(D), DeclHasPendingBody(HasBody) {}
+
     Decl *getDecl() { return D; }
+
     /// Whether the declaration has a pending body.
     bool hasPendingBody() { return DeclHasPendingBody; }
   };
@@ -1050,6 +1095,10 @@
   llvm::SmallDenseMap<CXXRecordDecl *, llvm::SmallVector<DataPointers, 2>, 2>
       PendingOdrMergeFailures;
 
+  /// \brief Function definitions in which we found an ODR violation.
+  llvm::SmallDenseMap<FunctionDecl *, llvm::SmallVector<FunctionDecl *, 2>, 2>
+      PendingFunctionOdrMergeFailures;
+
   /// \brief DeclContexts in which we have diagnosed an ODR violation.
   llvm::SmallPtrSet<DeclContext*, 2> DiagnosedOdrMergeFailures;
 
@@ -1062,8 +1111,8 @@
   /// module is loaded.
   SmallVector<ObjCInterfaceDecl *, 16> ObjCClassesLoaded;
 
-  typedef llvm::DenseMap<Decl *, SmallVector<serialization::DeclID, 2> >
-    KeyDeclsMap;
+  using KeyDeclsMap =
+      llvm::DenseMap<Decl *, SmallVector<serialization::DeclID, 2>>;
     
   /// \brief A mapping from canonical declarations to the set of global
   /// declaration IDs for key declaration that have been merged with that
@@ -1097,15 +1146,14 @@
     ASTReader &Reader;
     enum ReadingKind PrevKind;
 
-    ReadingKindTracker(const ReadingKindTracker &) = delete;
-    void operator=(const ReadingKindTracker &) = delete;
-
   public:
     ReadingKindTracker(enum ReadingKind newKind, ASTReader &reader)
-      : Reader(reader), PrevKind(Reader.ReadingKind) {
+        : Reader(reader), PrevKind(Reader.ReadingKind) {
       Reader.ReadingKind = newKind;
     }
 
+    ReadingKindTracker(const ReadingKindTracker &) = delete;
+    ReadingKindTracker &operator=(const ReadingKindTracker &) = delete;
     ~ReadingKindTracker() { Reader.ReadingKind = PrevKind; }
   };
 
@@ -1114,15 +1162,15 @@
     ASTReader &Reader;
     bool PrevState;
 
-    ProcessingUpdatesRAIIObj(const ProcessingUpdatesRAIIObj &) = delete;
-    void operator=(const ProcessingUpdatesRAIIObj &) = delete;
-
   public:
     ProcessingUpdatesRAIIObj(ASTReader &reader)
-      : Reader(reader), PrevState(Reader.ProcessingUpdateRecords) {
+        : Reader(reader), PrevState(Reader.ProcessingUpdateRecords) {
       Reader.ProcessingUpdateRecords = true;
     }
 
+    ProcessingUpdatesRAIIObj(const ProcessingUpdatesRAIIObj &) = delete;
+    ProcessingUpdatesRAIIObj &
+    operator=(const ProcessingUpdatesRAIIObj &) = delete;
     ~ProcessingUpdatesRAIIObj() { Reader.ProcessingUpdateRecords = PrevState; }
   };
 
@@ -1205,7 +1253,7 @@
     ImportedModule(ModuleFile *Mod,
                    ModuleFile *ImportedBy,
                    SourceLocation ImportLoc)
-      : Mod(Mod), ImportedBy(ImportedBy), ImportLoc(ImportLoc) { }
+        : Mod(Mod), ImportedBy(ImportedBy), ImportLoc(ImportLoc) {}
   };
 
   ASTReadResult ReadASTCore(StringRef FileName, ModuleKind Type,
@@ -1266,10 +1314,10 @@
                                        std::string &SuggestedPredefines);
 
   struct RecordLocation {
-    RecordLocation(ModuleFile *M, uint64_t O)
-      : F(M), Offset(O) {}
     ModuleFile *F;
     uint64_t Offset;
+
+    RecordLocation(ModuleFile *M, uint64_t O) : F(M), Offset(O) {}
   };
 
   QualType readTypeRecord(unsigned Index);
@@ -1328,12 +1376,11 @@
             ModuleDeclIterator, const serialization::LocalDeclID *,
             std::random_access_iterator_tag, const Decl *, ptrdiff_t,
             const Decl *, const Decl *> {
-    ASTReader *Reader;
-    ModuleFile *Mod;
+    ASTReader *Reader = nullptr;
+    ModuleFile *Mod = nullptr;
 
   public:
-    ModuleDeclIterator()
-        : iterator_adaptor_base(nullptr), Reader(nullptr), Mod(nullptr) {}
+    ModuleDeclIterator() : iterator_adaptor_base(nullptr) {}
 
     ModuleDeclIterator(ASTReader *Reader, ModuleFile *Mod,
                        const serialization::LocalDeclID *Pos)
@@ -1342,6 +1389,7 @@
     value_type operator*() const {
       return Reader->GetDecl(Reader->getGlobalDeclID(*Mod, *I));
     }
+
     value_type operator->() const { return **this; }
 
     bool operator==(const ModuleDeclIterator &RHS) const {
@@ -1378,8 +1426,6 @@
   void Error(unsigned DiagID, StringRef Arg1 = StringRef(),
              StringRef Arg2 = StringRef()) const;
 
-  ASTReader(const ASTReader &) = delete;
-  void operator=(const ASTReader &) = delete;
 public:
   /// \brief Load the AST file and validate its contents against the given
   /// Preprocessor.
@@ -1428,7 +1474,8 @@
             bool AllowConfigurationMismatch = false,
             bool ValidateSystemInputs = false, bool UseGlobalIndex = true,
             std::unique_ptr<llvm::Timer> ReadTimer = {});
-
+  ASTReader(const ASTReader &) = delete;
+  ASTReader &operator=(const ASTReader &) = delete;
   ~ASTReader() override;
 
   SourceManager &getSourceManager() const { return SourceMgr; }
@@ -1443,15 +1490,19 @@
   enum LoadFailureCapabilities {
     /// \brief The client can't handle any AST loading failures.
     ARR_None = 0,
+
     /// \brief The client can handle an AST file that cannot load because it
     /// is missing.
     ARR_Missing = 0x1,
+
     /// \brief The client can handle an AST file that cannot load because it
     /// is out-of-date relative to its input files.
     ARR_OutOfDate = 0x2,
+
     /// \brief The client can handle an AST file that cannot load because it
     /// was built with a different version of Clang.
     ARR_VersionMismatch = 0x4,
+
     /// \brief The client can handle an AST file that cannot load because it's
     /// compiled configuration doesn't match that of the context it was
     /// loaded into.
@@ -1522,11 +1573,11 @@
   /// RAII object to temporarily add an AST callback listener.
   class ListenerScope {
     ASTReader &Reader;
-    bool Chained;
+    bool Chained = false;
 
   public:
     ListenerScope(ASTReader &Reader, std::unique_ptr<ASTReaderListener> L)
-        : Reader(Reader), Chained(false) {
+        : Reader(Reader) {
       auto Old = Reader.takeListener();
       if (Old) {
         Chained = true;
@@ -1535,6 +1586,7 @@
       }
       Reader.setListener(std::move(L));
     }
+
     ~ListenerScope() {
       auto New = Reader.takeListener();
       if (Chained)
@@ -1643,6 +1695,9 @@
   Optional<bool> isPreprocessedEntityInFileID(unsigned Index,
                                               FileID FID) override;
 
+  /// \brief Read a preallocated skipped range from the external source.
+  SourceRange ReadSkippedRange(unsigned Index) override;
+
   /// \brief Read the header file information for the given file entry.
   HeaderFileInfo GetHeaderFileInfo(const FileEntry *FE) override;
 
@@ -1725,7 +1780,7 @@
   /// was read from the given AST file.
   QualType readType(ModuleFile &F, const RecordData &Record, unsigned &Idx) {
     if (Idx >= Record.size())
-      return QualType();
+      return {};
 
     return getLocalType(F, Record[Idx++]);
   }
@@ -1933,16 +1988,16 @@
       llvm::SmallSetVector<const TypedefNameDecl *, 4> &Decls) override;
 
   void ReadReferencedSelectors(
-          SmallVectorImpl<std::pair<Selector, SourceLocation> > &Sels) override;
+           SmallVectorImpl<std::pair<Selector, SourceLocation>> &Sels) override;
 
   void ReadWeakUndeclaredIdentifiers(
-          SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo> > &WI) override;
+           SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo>> &WI) override;
 
   void ReadUsedVTables(SmallVectorImpl<ExternalVTableUse> &VTables) override;
 
   void ReadPendingInstantiations(
-                 SmallVectorImpl<std::pair<ValueDecl *,
-                                           SourceLocation> > &Pending) override;
+                  SmallVectorImpl<std::pair<ValueDecl *,
+                                            SourceLocation>> &Pending) override;
 
   void ReadLateParsedTemplates(
       llvm::MapVector<const FunctionDecl *, std::unique_ptr<LateParsedTemplate>>
@@ -2275,20 +2330,19 @@
 
 /// \brief An object for streaming information from a record.
 class ASTRecordReader {
-  typedef serialization::ModuleFile ModuleFile;
+  using ModuleFile = serialization::ModuleFile;
 
   ASTReader *Reader;
   ModuleFile *F;
   unsigned Idx = 0;
   ASTReader::RecordData Record;
 
-  typedef ASTReader::RecordData RecordData;
-  typedef ASTReader::RecordDataImpl RecordDataImpl;
+  using RecordData = ASTReader::RecordData;
+  using RecordDataImpl = ASTReader::RecordDataImpl;
 
 public:
   /// Construct an ASTRecordReader that uses the default encoding scheme.
-  ASTRecordReader(ASTReader &Reader, ModuleFile &F)
-      : Reader(&Reader), F(&F) {}
+  ASTRecordReader(ASTReader &Reader, ModuleFile &F) : Reader(&Reader), F(&F) {}
 
   /// \brief Reads a record with id AbbrevID from Cursor, resetting the
   /// internal state.
@@ -2302,17 +2356,20 @@
 
   /// \brief The current position in this record.
   unsigned getIdx() const { return Idx; }
+
   /// \brief The length of this record.
   size_t size() const { return Record.size(); }
 
   /// \brief An arbitrary index in this record.
   const uint64_t &operator[](size_t N) { return Record[N]; }
+
   /// \brief The last element in this record.
   const uint64_t &back() const { return Record.back(); }
 
   /// \brief Returns the current value in this record, and advances to the
   /// next value.
   const uint64_t &readInt() { return Record[Idx++]; }
+
   /// \brief Returns the current value in this record, without advancing.
   const uint64_t &peekInt() { return Record[Idx]; }
 
@@ -2566,7 +2623,7 @@
 /// then restores it when destroyed.
 struct SavedStreamPosition {
   explicit SavedStreamPosition(llvm::BitstreamCursor &Cursor)
-    : Cursor(Cursor), Offset(Cursor.GetCurrentBitNo()) { }
+      : Cursor(Cursor), Offset(Cursor.GetCurrentBitNo()) {}
 
   ~SavedStreamPosition() {
     Cursor.JumpToBit(Offset);
@@ -2581,6 +2638,6 @@
   Reader.Error(Msg);
 }
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H
diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h
index 392a465..9437bf7 100644
--- a/include/clang/Serialization/ASTWriter.h
+++ b/include/clang/Serialization/ASTWriter.h
@@ -1,4 +1,4 @@
-//===--- ASTWriter.h - AST File Writer --------------------------*- C++ -*-===//
+//===- ASTWriter.h - AST File Writer ----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,67 +11,89 @@
 //  containing a serialized representation of a translation unit.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_SERIALIZATION_ASTWRITER_H
 #define LLVM_CLANG_SERIALIZATION_ASTWRITER_H
 
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Sema/SemaConsumer.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <ctime>
+#include <memory>
 #include <queue>
+#include <string>
+#include <utility>
 #include <vector>
 
 namespace llvm {
-  class APFloat;
-  class APInt;
-}
+
+class APFloat;
+class APInt;
+class APSInt;
+
+} // namespace llvm
 
 namespace clang {
 
-class DeclarationName;
 class ASTContext;
+class ASTReader;
+class ASTUnresolvedSet;
 class Attr;
-class NestedNameSpecifier;
 class CXXBaseSpecifier;
 class CXXCtorInitializer;
+class CXXRecordDecl;
+class CXXTemporary;
 class FileEntry;
 class FPOptions;
+class FunctionDecl;
 class HeaderSearch;
 class HeaderSearchOptions;
 class IdentifierResolver;
+class LangOptions;
 class MacroDefinitionRecord;
-class MacroDirective;
 class MacroInfo;
-class OpaqueValueExpr;
-class OpenCLOptions;
-class ASTReader;
 class MemoryBufferCache;
 class Module;
 class ModuleFileExtension;
 class ModuleFileExtensionWriter;
-class PreprocessedEntity;
+class NamedDecl;
+class NestedNameSpecifier;
+class ObjCInterfaceDecl;
 class PreprocessingRecord;
 class Preprocessor;
+struct QualifierInfo;
 class RecordDecl;
 class Sema;
 class SourceManager;
+class Stmt;
 struct StoredDeclsList;
 class SwitchCase;
-class TargetInfo;
+class TemplateParameterList;
 class Token;
+class TypeSourceInfo;
 class VersionTuple;
-class ASTUnresolvedSet;
-
-namespace SrcMgr { class SLocEntry; }
 
 /// \brief Writes an AST file containing the contents of a translation unit.
 ///
@@ -82,14 +104,15 @@
 class ASTWriter : public ASTDeserializationListener,
                   public ASTMutationListener {
 public:
-  typedef SmallVector<uint64_t, 64> RecordData;
-  typedef SmallVectorImpl<uint64_t> RecordDataImpl;
-  typedef ArrayRef<uint64_t> RecordDataRef;
-
   friend class ASTDeclWriter;
+  friend class ASTRecordWriter;
   friend class ASTStmtWriter;
   friend class ASTTypeWriter;
-  friend class ASTRecordWriter;
+
+  using RecordData = SmallVector<uint64_t, 64>;
+  using RecordDataImpl = SmallVectorImpl<uint64_t>;
+  using RecordDataRef = ArrayRef<uint64_t>;
+
 private:
   /// \brief Map that provides the ID numbers of each type within the
   /// output stream, plus those deserialized from a chained PCH.
@@ -100,9 +123,8 @@
   /// allow for the const/volatile qualifiers.
   ///
   /// Keys in the map never have const/volatile qualifiers.
-  typedef llvm::DenseMap<QualType, serialization::TypeIdx,
-                         serialization::UnsafeQualTypeDenseMapInfo>
-    TypeIdxMap;
+  using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx,
+                                    serialization::UnsafeQualTypeDenseMapInfo>;
 
   /// \brief The bitstream writer used to emit this precompiled header.
   llvm::BitstreamWriter &Stream;
@@ -152,8 +174,8 @@
   /// \brief Stores a declaration or a type to be written to the AST file.
   class DeclOrType {
   public:
-    DeclOrType(Decl *D) : Stored(D), IsType(false) { }
-    DeclOrType(QualType T) : Stored(T.getAsOpaquePtr()), IsType(true) { }
+    DeclOrType(Decl *D) : Stored(D), IsType(false) {}
+    DeclOrType(QualType T) : Stored(T.getAsOpaquePtr()), IsType(true) {}
 
     bool isType() const { return IsType; }
     bool isDecl() const { return !IsType; }
@@ -195,15 +217,16 @@
   std::vector<serialization::DeclOffset> DeclOffsets;
 
   /// \brief Sorted (by file offset) vector of pairs of file offset/DeclID.
-  typedef SmallVector<std::pair<unsigned, serialization::DeclID>, 64>
-    LocDeclIDsTy;
+  using LocDeclIDsTy =
+      SmallVector<std::pair<unsigned, serialization::DeclID>, 64>;
   struct DeclIDInFileInfo {
     LocDeclIDsTy DeclIDs;
+
     /// \brief Set when the DeclIDs vectors from all files are joined, this
     /// indicates the index that this particular vector has in the global one.
     unsigned FirstDeclIndex;
   };
-  typedef llvm::DenseMap<FileID, DeclIDInFileInfo *> FileDeclIDsTy;
+  using FileDeclIDsTy = llvm::DenseMap<FileID, DeclIDInFileInfo *>;
 
   /// \brief Map from file SLocEntries to info about the file-level declarations
   /// that it contains.
@@ -260,6 +283,7 @@
     MacroInfo *MI;
     serialization::MacroID ID;
   };
+
   /// \brief The macro infos to emit.
   std::vector<MacroInfoToEmitData> MacroInfosToEmit;
 
@@ -331,31 +355,33 @@
         : Kind(Kind), Type(Type.getAsOpaquePtr()) {}
     DeclUpdate(unsigned Kind, SourceLocation Loc)
         : Kind(Kind), Loc(Loc.getRawEncoding()) {}
-    DeclUpdate(unsigned Kind, unsigned Val)
-        : Kind(Kind), Val(Val) {}
-    DeclUpdate(unsigned Kind, Module *M)
-          : Kind(Kind), Mod(M) {}
+    DeclUpdate(unsigned Kind, unsigned Val) : Kind(Kind), Val(Val) {}
+    DeclUpdate(unsigned Kind, Module *M) : Kind(Kind), Mod(M) {}
     DeclUpdate(unsigned Kind, const Attr *Attribute)
           : Kind(Kind), Attribute(Attribute) {}
 
     unsigned getKind() const { return Kind; }
     const Decl *getDecl() const { return Dcl; }
     QualType getType() const { return QualType::getFromOpaquePtr(Type); }
+
     SourceLocation getLoc() const {
       return SourceLocation::getFromRawEncoding(Loc);
     }
+
     unsigned getNumber() const { return Val; }
     Module *getModule() const { return Mod; }
     const Attr *getAttr() const { return Attribute; }
   };
 
-  typedef SmallVector<DeclUpdate, 1> UpdateRecord;
-  typedef llvm::MapVector<const Decl *, UpdateRecord> DeclUpdateMap;
+  using UpdateRecord = SmallVector<DeclUpdate, 1>;
+  using DeclUpdateMap = llvm::MapVector<const Decl *, UpdateRecord>;
+
   /// \brief Mapping from declarations that came from a chained PCH to the
   /// record containing modifications to them.
   DeclUpdateMap DeclUpdates;
 
-  typedef llvm::DenseMap<Decl *, Decl *> FirstLatestDeclMap;
+  using FirstLatestDeclMap = llvm::DenseMap<Decl *, Decl *>;
+
   /// \brief Map of first declarations from a chained PCH that point to the
   /// most recent declarations in another PCH.
   FirstLatestDeclMap FirstLatestDecls;
@@ -600,7 +626,6 @@
   /// \brief Emit a reference to a declaration.
   void AddDeclRef(const Decl *D, RecordDataImpl &Record);
 
-
   /// \brief Force a declaration to be emitted and get its ID.
   serialization::DeclID GetDeclRef(const Decl *D);
 
@@ -651,6 +676,7 @@
   unsigned getTypeExtQualAbbrev() const {
     return TypeExtQualAbbrev;
   }
+
   unsigned getTypeFunctionProtoAbbrev() const {
     return TypeFunctionProtoAbbrev;
   }
@@ -701,10 +727,11 @@
                               const FunctionDecl *Delete,
                               Expr *ThisArg) override;
   void CompletedImplicitDefinition(const FunctionDecl *D) override;
-  void StaticDataMemberInstantiated(const VarDecl *D) override;
+  void InstantiationRequested(const ValueDecl *D) override;
+  void VariableDefinitionInstantiated(const VarDecl *D) override;
+  void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void DefaultArgumentInstantiated(const ParmVarDecl *D) override;
   void DefaultMemberInitializerInstantiated(const FieldDecl *D) override;
-  void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
   void DeclarationMarkedUsed(const Decl *D) override;
@@ -757,8 +784,8 @@
       : Writer(Parent.Writer), Record(&Record) {}
 
   /// Copying an ASTRecordWriter is almost certainly a bug.
-  ASTRecordWriter(const ASTRecordWriter&) = delete;
-  void operator=(const ASTRecordWriter&) = delete;
+  ASTRecordWriter(const ASTRecordWriter &) = delete;
+  ASTRecordWriter &operator=(const ASTRecordWriter &) = delete;
 
   /// \brief Extract the underlying record storage.
   ASTWriter::RecordDataImpl &getRecordData() const { return *Record; }
@@ -910,7 +937,7 @@
   void AddUnresolvedSet(const ASTUnresolvedSet &Set);
 
   /// \brief Emit a CXXCtorInitializer array.
-  void AddCXXCtorInitializers(ArrayRef<CXXCtorInitializer*> CtorInits);
+  void AddCXXCtorInitializers(ArrayRef<CXXCtorInitializer *> CtorInits);
 
   void AddCXXDefinitionData(const CXXRecordDecl *D);
 
@@ -956,6 +983,7 @@
                ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
                bool AllowASTWithErrors = false, bool IncludeTimestamps = true);
   ~PCHGenerator() override;
+
   void InitializeSema(Sema &S) override { SemaPtr = &S; }
   void HandleTranslationUnit(ASTContext &Ctx) override;
   ASTMutationListener *GetASTMutationListener() override;
@@ -963,6 +991,6 @@
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-} // end namespace clang
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SERIALIZATION_ASTWRITER_H
diff --git a/include/clang/Serialization/ContinuousRangeMap.h b/include/clang/Serialization/ContinuousRangeMap.h
index 244b01b..24bfadd 100644
--- a/include/clang/Serialization/ContinuousRangeMap.h
+++ b/include/clang/Serialization/ContinuousRangeMap.h
@@ -1,4 +1,4 @@
-//===--- ContinuousRangeMap.h - Map with int range as key -------*- C++ -*-===//
+//===- ContinuousRangeMap.h - Map with int range as key ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,6 +18,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
+#include <cassert>
 #include <utility>
 
 namespace clang {
@@ -35,14 +36,15 @@
 template <typename Int, typename V, unsigned InitialCapacity>
 class ContinuousRangeMap {
 public:
-  typedef std::pair<Int, V> value_type;
-  typedef value_type &reference;
-  typedef const value_type &const_reference;
-  typedef value_type *pointer;
-  typedef const value_type *const_pointer;
+  using value_type = std::pair<Int, V>;
+  using reference = value_type &;
+  using const_reference = const value_type &;
+  using pointer = value_type *;
+  using const_pointer = const value_type *;
 
 private:
-  typedef SmallVector<value_type, InitialCapacity> Representation;
+  using Representation = SmallVector<value_type, InitialCapacity>;
+
   Representation Rep;
 
   struct Compare {
@@ -52,7 +54,7 @@
     bool operator ()(Int L, const_reference R) const {
       return L < R.first;
     }
-    bool operator ()(Int L, Int R) const { 
+    bool operator ()(Int L, Int R) const {
       return L < R;
     }
     bool operator ()(const_reference L, const_reference R) const {
@@ -80,8 +82,8 @@
     Rep.insert(I, Val);
   }
 
-  typedef typename Representation::iterator iterator;
-  typedef typename Representation::const_iterator const_iterator;
+  using iterator = typename Representation::iterator;
+  using const_iterator = typename Representation::const_iterator;
 
   iterator begin() { return Rep.begin(); }
   iterator end() { return Rep.end(); }
@@ -108,13 +110,12 @@
   /// from a set of values.
   class Builder {
     ContinuousRangeMap &Self;
-    
+
+  public:
+    explicit Builder(ContinuousRangeMap &Self) : Self(Self) {}
     Builder(const Builder&) = delete;
     Builder &operator=(const Builder&) = delete;
     
-  public:
-    explicit Builder(ContinuousRangeMap &Self) : Self(Self) { }
-    
     ~Builder() {
       std::sort(Self.Rep.begin(), Self.Rep.end(), Compare());
       std::unique(Self.Rep.begin(), Self.Rep.end(),
@@ -131,9 +132,10 @@
       Self.Rep.push_back(Val);
     }
   };
+
   friend class Builder;
 };
 
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_SERIALIZATION_CONTINUOUSRANGEMAP_H
diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h
index 4e4bf44..26b3bce 100644
--- a/include/clang/Serialization/Module.h
+++ b/include/clang/Serialization/Module.h
@@ -324,6 +324,12 @@
   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
   unsigned NumPreprocessedEntities = 0;
 
+  /// \brief Base ID for preprocessed skipped ranges local to this module.
+  unsigned BasePreprocessedSkippedRangeID = 0;
+
+  const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
+  unsigned NumPreprocessedSkippedRanges = 0;
+
   // === Header search information ===
 
   /// \brief The number of local HeaderFileInfo structures.
diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h
index ffc42ce..147a691 100644
--- a/include/clang/Serialization/ModuleManager.h
+++ b/include/clang/Serialization/ModuleManager.h
@@ -1,4 +1,4 @@
-//===--- ModuleManager.cpp - Module Manager ---------------------*- C++ -*-===//
+//===- ModuleManager.cpp - Module Manager -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,19 +15,33 @@
 #ifndef LLVM_CLANG_SERIALIZATION_MODULEMANAGER_H
 #define LLVM_CLANG_SERIALIZATION_MODULEMANAGER_H
 
-#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Serialization/Module.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cstdint>
+#include <ctime>
+#include <memory>
+#include <string>
+#include <utility>
 
-namespace clang { 
+namespace clang {
 
+class FileEntry;
+class FileManager;
 class GlobalModuleIndex;
+class HeaderSearch;
 class MemoryBufferCache;
 class ModuleMap;
 class PCHContainerReader;
-class HeaderSearch;
 
 namespace serialization {
 
@@ -83,14 +97,12 @@
   ///
   /// The global module index will actually be owned by the ASTReader; this is
   /// just an non-owning pointer.
-  GlobalModuleIndex *GlobalIndex;
+  GlobalModuleIndex *GlobalIndex = nullptr;
 
   /// \brief State used by the "visit" operation to avoid malloc traffic in
   /// calls to visit().
   struct VisitState {
-    explicit VisitState(unsigned N)
-      : VisitNumber(N, 0), NextVisitNumber(1), NextState(nullptr)
-    {
+    explicit VisitState(unsigned N) : VisitNumber(N, 0) {
       Stack.reserve(N);
     }
 
@@ -107,29 +119,26 @@
     SmallVector<unsigned, 4> VisitNumber;
 
     /// \brief The next visit number to use to mark visited module files.
-    unsigned NextVisitNumber;
+    unsigned NextVisitNumber = 1;
 
     /// \brief The next visit state.
-    VisitState *NextState;
+    VisitState *NextState = nullptr;
   };
 
   /// \brief The first visit() state in the chain.
-  VisitState *FirstVisitState;
+  VisitState *FirstVisitState = nullptr;
 
   VisitState *allocateVisitState();
   void returnVisitState(VisitState *State);
 
 public:
-  typedef llvm::pointee_iterator<
-      SmallVectorImpl<std::unique_ptr<ModuleFile>>::iterator>
-      ModuleIterator;
-  typedef llvm::pointee_iterator<
-      SmallVectorImpl<std::unique_ptr<ModuleFile>>::const_iterator>
-      ModuleConstIterator;
-  typedef llvm::pointee_iterator<
-      SmallVectorImpl<std::unique_ptr<ModuleFile>>::reverse_iterator>
-      ModuleReverseIterator;
-  typedef std::pair<uint32_t, StringRef> ModuleOffset;
+  using ModuleIterator = llvm::pointee_iterator<
+      SmallVectorImpl<std::unique_ptr<ModuleFile>>::iterator>;
+  using ModuleConstIterator = llvm::pointee_iterator<
+      SmallVectorImpl<std::unique_ptr<ModuleFile>>::const_iterator>;
+  using ModuleReverseIterator = llvm::pointee_iterator<
+      SmallVectorImpl<std::unique_ptr<ModuleFile>>::reverse_iterator>;
+  using ModuleOffset = std::pair<uint32_t, StringRef>;
 
   explicit ModuleManager(FileManager &FileMgr, MemoryBufferCache &PCMCache,
                          const PCHContainerReader &PCHContainerRdr,
@@ -138,16 +147,19 @@
 
   /// \brief Forward iterator to traverse all loaded modules.
   ModuleIterator begin() { return Chain.begin(); }
+
   /// \brief Forward iterator end-point to traverse all loaded modules
   ModuleIterator end() { return Chain.end(); }
   
   /// \brief Const forward iterator to traverse all loaded modules.
   ModuleConstIterator begin() const { return Chain.begin(); }
+
   /// \brief Const forward iterator end-point to traverse all loaded modules
   ModuleConstIterator end() const { return Chain.end(); }
   
   /// \brief Reverse iterator to traverse all loaded modules.
   ModuleReverseIterator rbegin() { return Chain.rbegin(); }
+
   /// \brief Reverse iterator end-point to traverse all loaded modules.
   ModuleReverseIterator rend() { return Chain.rend(); }
 
@@ -187,15 +199,18 @@
   enum AddModuleResult {
     /// \brief The module file had already been loaded.
     AlreadyLoaded,
+
     /// \brief The module file was just loaded in response to this call.
     NewlyLoaded,
+
     /// \brief The module file is missing.
     Missing,
+
     /// \brief The module file is out-of-date.
     OutOfDate
   };
 
-  typedef ASTFileSignature(*ASTFileSignatureReader)(StringRef);
+  using ASTFileSignatureReader = ASTFileSignature (*)(StringRef);
 
   /// \brief Attempts to create a new module and add it to the list of known
   /// modules.
@@ -306,6 +321,8 @@
   MemoryBufferCache &getPCMCache() const { return *PCMCache; }
 };
 
-} } // end namespace clang::serialization
+} // namespace serialization
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_SERIALIZATION_MODULEMANAGER_H
diff --git a/include/clang/StaticAnalyzer/Checkers/Checkers.td b/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 9eaf55f..3b94a9b 100644
--- a/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -86,7 +86,7 @@
 
 // The APIModeling package is for checkers that model APIs and don't perform
 // any diagnostics. These checkers are always turned on; this package is
-// intended for API modeling that is not controlled by the the target triple.
+// intended for API modeling that is not controlled by the target triple.
 def APIModeling : Package<"apiModeling">, Hidden;
 def GoogleAPIModeling : Package<"google">, InPackage<APIModeling>;
 
@@ -188,6 +188,10 @@
   HelpText<"Check for cases where the dynamic and the static type of an object are unrelated.">,
   DescFile<"DynamicTypeChecker.cpp">;
 
+def StackAddrAsyncEscapeChecker : Checker<"StackAddressAsyncEscape">,
+  HelpText<"Check that addresses to stack memory do not escape the function">,
+  DescFile<"StackAddrEscapeChecker.cpp">;
+
 } // end "alpha.core"
 
 let ParentPackage = Nullability in {
@@ -410,6 +414,13 @@
   HelpText<"Check for overflows in the arguments to malloc()">,
   DescFile<"MallocOverflowSecurityChecker.cpp">;
 
+// Operating systems specific PROT_READ/PROT_WRITE values is not implemented,
+// the defaults are correct for several common operating systems though, 
+// but may need to be overridden via the related analyzer-config flags.
+def MmapWriteExecChecker : Checker<"MmapWriteExec">,
+  HelpText<"Warn on mmap() calls that are both writable and executable">,
+  DescFile<"MmapWriteExecChecker.cpp">;
+
 } // end "alpha.security"
 
 //===----------------------------------------------------------------------===//
@@ -599,6 +610,13 @@
 
 } // end "osx.cocoa"
 
+let ParentPackage = OSXAlpha in {
+
+def GCDAsyncSemaphoreChecker : Checker<"GCDAsyncSemaphore">,
+  HelpText<"Checker for performance anti-pattern when using semaphors from async API">,
+  DescFile<"GCDAsyncSemaphoreChecker.cpp">;
+} // end "alpha.osx"
+
 let ParentPackage = CocoaAlpha in {
 
 def InstanceVariableInvalidation : Checker<"InstanceVariableInvalidation">,
@@ -749,10 +767,6 @@
   HelpText<"View Exploded Graphs using GraphViz">,
   DescFile<"DebugCheckers.cpp">;
 
-def BugHashDumper : Checker<"DumpBugHash">,
-  HelpText<"Dump the bug hash for all statements.">,
-  DescFile<"DebugCheckers.cpp">;
-
 } // end "debug"
 
 
diff --git a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index ce50cc5..767ec02 100644
--- a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -139,6 +139,10 @@
   AnalysisPurgeMode AnalysisPurgeOpt;
   
   std::string AnalyzeSpecificFunction;
+
+  /// Store full compiler invocation for reproducible instructions in the
+  /// generated report.
+  std::string FullCompilerInvocation;
   
   /// \brief The maximum number of times the analyzer visits a block.
   unsigned maxBlockVisitOnPath;
@@ -184,7 +188,19 @@
   /// \brief The mode of function selection used during inlining.
   AnalysisInliningMode InliningMode;
 
+  enum class ExplorationStrategyKind {
+    DFS,
+    BFS,
+    UnexploredFirst,
+    UnexploredFirstQueue,
+    BFSBlockDFSContents,
+    NotSet
+  };
+
 private:
+
+  ExplorationStrategyKind ExplorationStrategy;
+
   /// \brief Describes the kinds for high-level analyzer mode.
   enum UserModeKind {
     UMK_NotSet = 0,
@@ -217,6 +233,9 @@
   /// \sa IncludeLoopExitInCFG
   Optional<bool> IncludeLoopExitInCFG;
 
+  /// \sa IncludeRichConstructorsInCFG
+  Optional<bool> IncludeRichConstructorsInCFG;
+
   /// \sa mayInlineCXXStandardLibrary
   Optional<bool> InlineCXXStandardLibrary;
   
@@ -232,6 +251,9 @@
   /// \sa mayInlineCXXSharedPtrDtor
   Optional<bool> InlineCXXSharedPtrDtor;
 
+  /// \sa mayInlineCXXTemporaryDtors
+  Optional<bool> InlineCXXTemporaryDtors;
+
   /// \sa mayInlineObjCMethod
   Optional<bool> ObjCInliningMode;
 
@@ -260,6 +282,8 @@
   /// \sa StableReportFilename
   Optional<bool> StableReportFilename;
 
+  Optional<bool> SerializeStats;
+
   /// \sa getGraphTrimInterval
   Optional<unsigned> GraphTrimInterval;
 
@@ -284,6 +308,16 @@
   /// \sa shouldDisplayNotesAsEvents
   Optional<bool> DisplayNotesAsEvents;
 
+  /// \sa getCTUDir
+  Optional<StringRef> CTUDir;
+
+  /// \sa getCTUIndexName
+  Optional<StringRef> CTUIndexName;
+
+  /// \sa naiveCTUEnabled
+  Optional<bool> NaiveCTU;
+
+
   /// A helper function that retrieves option for a given full-qualified
   /// checker name.
   /// Options for checkers can be specified via 'analyzer-config' command-line
@@ -386,6 +420,8 @@
   /// outside of AnalyzerOptions.
   UserModeKind getUserMode();
 
+  ExplorationStrategyKind getExplorationStrategy();
+
   /// \brief Returns the inter-procedural analysis mode.
   IPAKind getIPAMode();
 
@@ -428,6 +464,13 @@
   /// the values "true" and "false".
   bool includeLoopExitInCFG();
 
+  /// Returns whether or not construction site information should be included
+  /// in the CFG C++ constructor elements.
+  ///
+  /// This is controlled by the 'cfg-rich-constructors' config options,
+  /// which accepts the values "true" and "false".
+  bool includeRichConstructorsInCFG();
+
   /// Returns whether or not C++ standard library functions may be considered
   /// for inlining.
   ///
@@ -464,6 +507,17 @@
   /// accepts the values "true" and "false".
   bool mayInlineCXXSharedPtrDtor();
 
+  /// Returns true if C++ temporary destructors should be inlined during
+  /// analysis.
+  ///
+  /// If temporary destructors are disabled in the CFG via the
+  /// 'cfg-temporary-dtors' option, temporary destructors would not be
+  /// inlined anyway.
+  ///
+  /// This is controlled by the 'c++-temp-dtor-inlining' config option, which
+  /// accepts the values "true" and "false".
+  bool mayInlineCXXTemporaryDtors();
+
   /// Returns whether or not paths that go through null returns should be
   /// suppressed.
   ///
@@ -512,6 +566,14 @@
   /// which accepts the values "true" and "false". Default = false
   bool shouldWriteStableReportFilename();
 
+  /// \return Whether the analyzer should
+  /// serialize statistics to plist output.
+  /// Statistics would be serialized in JSON format inside the main dictionary
+  /// under the \c statistics key.
+  /// Available only if compiled in assert mode or with LLVM statistics
+  /// explicitly enabled.
+  bool shouldSerializeStats();
+
   /// Returns whether irrelevant parts of a bug report path should be pruned
   /// out of the final output.
   ///
@@ -585,6 +647,17 @@
   /// to false when unset.
   bool shouldDisplayNotesAsEvents();
 
+  /// Returns the directory containing the CTU related files.
+  StringRef getCTUDir();
+
+  /// Returns the name of the file containing the CTU index of functions.
+  StringRef getCTUIndexName();
+
+  /// Returns true when naive cross translation unit analysis is enabled.
+  /// This is an experimental feature to inline functions from another
+  /// translation units.
+  bool naiveCTUEnabled();
+
 public:
   AnalyzerOptions() :
     AnalysisStoreOpt(RegionStoreModel),
@@ -607,6 +680,7 @@
     // Cap the stack depth at 4 calls (5 stack frames, base + 4 calls).
     InlineMaxStackDepth(5),
     InliningMode(NoRedundancy),
+    ExplorationStrategy(ExplorationStrategyKind::NotSet),
     UserMode(UMK_NotSet),
     IPAMode(IPAK_NotSet),
     CXXMemberInliningMode() {}
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
index 0f1eb09..a5febd8 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
@@ -17,7 +17,7 @@
 
 #include "clang/Basic/SourceLocation.h"
 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
-#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
@@ -470,8 +470,6 @@
     return true;
   }
 
-  bool RemoveUnneededCalls(PathPieces &pieces, BugReport *R);
-
   void Register(BugType *BT);
 
   /// \brief Add the given report to the set of reports tracked by BugReporter.
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h
deleted file mode 100644
index b72bce5..0000000
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h
+++ /dev/null
@@ -1,379 +0,0 @@
-//===---  BugReporterVisitor.h - Generate PathDiagnostics -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file declares BugReporterVisitors, which are used to generate enhanced
-//  diagnostic traces.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITOR_H
-#define LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITOR_H
-
-#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
-#include "llvm/ADT/FoldingSet.h"
-
-namespace clang {
-class CFGBlock;
-
-namespace ento {
-
-class BugReport;
-class BugReporterContext;
-class ExplodedNode;
-class MemRegion;
-class PathDiagnosticPiece;
-
-/// \brief BugReporterVisitors are used to add custom diagnostics along a path.
-///
-/// Custom visitors should subclass the BugReporterVisitorImpl class for a
-/// default implementation of the clone() method.
-/// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
-/// default implementation of clone() will NOT do the right thing, and you
-/// will have to provide your own implementation.)
-class BugReporterVisitor : public llvm::FoldingSetNode {
-public:
-  BugReporterVisitor() = default;
-  BugReporterVisitor(const BugReporterVisitor &) = default;
-  BugReporterVisitor(BugReporterVisitor &&) {}
-  virtual ~BugReporterVisitor();
-
-  /// \brief Returns a copy of this BugReporter.
-  ///
-  /// Custom BugReporterVisitors should not override this method directly.
-  /// Instead, they should inherit from BugReporterVisitorImpl and provide
-  /// a protected or public copy constructor.
-  ///
-  /// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
-  /// default implementation of clone() will NOT do the right thing, and you
-  /// will have to provide your own implementation.)
-  virtual std::unique_ptr<BugReporterVisitor> clone() const = 0;
-
-  /// \brief Return a diagnostic piece which should be associated with the
-  /// given node.
-  ///
-  /// The last parameter can be used to register a new visitor with the given
-  /// BugReport while processing a node.
-  virtual std::shared_ptr<PathDiagnosticPiece>
-  VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred,
-            BugReporterContext &BRC, BugReport &BR) = 0;
-
-  /// \brief Provide custom definition for the final diagnostic piece on the
-  /// path - the piece, which is displayed before the path is expanded.
-  ///
-  /// If returns NULL the default implementation will be used.
-  /// Also note that at most one visitor of a BugReport should generate a
-  /// non-NULL end of path diagnostic piece.
-  virtual std::unique_ptr<PathDiagnosticPiece>
-  getEndPath(BugReporterContext &BRC, const ExplodedNode *N, BugReport &BR);
-
-  virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
-
-  /// \brief Generates the default final diagnostic piece.
-  static std::unique_ptr<PathDiagnosticPiece>
-  getDefaultEndPath(BugReporterContext &BRC, const ExplodedNode *N,
-                    BugReport &BR);
-};
-
-/// This class provides a convenience implementation for clone() using the
-/// Curiously-Recurring Template Pattern. If you are implementing a custom
-/// BugReporterVisitor, subclass BugReporterVisitorImpl and provide a public
-/// or protected copy constructor.
-///
-/// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
-/// default implementation of clone() will NOT do the right thing, and you
-/// will have to provide your own implementation.)
-template <class DERIVED>
-class BugReporterVisitorImpl : public BugReporterVisitor {
-  std::unique_ptr<BugReporterVisitor> clone() const override {
-    return llvm::make_unique<DERIVED>(*static_cast<const DERIVED *>(this));
-  }
-};
-
-class FindLastStoreBRVisitor final
-    : public BugReporterVisitorImpl<FindLastStoreBRVisitor> {
-  const MemRegion *R;
-  SVal V;
-  bool Satisfied;
-
-  /// If the visitor is tracking the value directly responsible for the
-  /// bug, we are going to employ false positive suppression.
-  bool EnableNullFPSuppression;
-
-public:
-  /// Creates a visitor for every VarDecl inside a Stmt and registers it with
-  /// the BugReport.
-  static void registerStatementVarDecls(BugReport &BR, const Stmt *S,
-                                        bool EnableNullFPSuppression);
-
-  FindLastStoreBRVisitor(KnownSVal V, const MemRegion *R,
-                         bool InEnableNullFPSuppression)
-  : R(R),
-    V(V),
-    Satisfied(false),
-    EnableNullFPSuppression(InEnableNullFPSuppression) {}
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override;
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *PrevN,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-};
-
-class TrackConstraintBRVisitor final
-    : public BugReporterVisitorImpl<TrackConstraintBRVisitor> {
-  DefinedSVal Constraint;
-  bool Assumption;
-  bool IsSatisfied;
-  bool IsZeroCheck;
-
-  /// We should start tracking from the last node along the path in which the
-  /// value is constrained.
-  bool IsTrackingTurnedOn;
-
-public:
-  TrackConstraintBRVisitor(DefinedSVal constraint, bool assumption)
-  : Constraint(constraint), Assumption(assumption), IsSatisfied(false),
-    IsZeroCheck(!Assumption && Constraint.getAs<Loc>()),
-    IsTrackingTurnedOn(false) {}
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override;
-
-  /// Return the tag associated with this visitor.  This tag will be used
-  /// to make all PathDiagnosticPieces created by this visitor.
-  static const char *getTag();
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *PrevN,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-
-private:
-  /// Checks if the constraint is valid in the current state.
-  bool isUnderconstrained(const ExplodedNode *N) const;
-
-};
-
-/// \class NilReceiverBRVisitor
-/// \brief Prints path notes when a message is sent to a nil receiver.
-class NilReceiverBRVisitor final
-    : public BugReporterVisitorImpl<NilReceiverBRVisitor> {
-public:
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override {
-    static int x = 0;
-    ID.AddPointer(&x);
-  }
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *PrevN,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-
-  /// If the statement is a message send expression with nil receiver, returns
-  /// the receiver expression. Returns NULL otherwise.
-  static const Expr *getNilReceiver(const Stmt *S, const ExplodedNode *N);
-};
-
-/// Visitor that tries to report interesting diagnostics from conditions.
-class ConditionBRVisitor final
-    : public BugReporterVisitorImpl<ConditionBRVisitor> {
-
-  // FIXME: constexpr initialization isn't supported by MSVC2013.
-  static const char *const GenericTrueMessage;
-  static const char *const GenericFalseMessage;
-
-public:
-  void Profile(llvm::FoldingSetNodeID &ID) const override {
-    static int x = 0;
-    ID.AddPointer(&x);
-  }
-
-  /// Return the tag associated with this visitor.  This tag will be used
-  /// to make all PathDiagnosticPieces created by this visitor.
-  static const char *getTag();
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *Prev,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNodeImpl(const ExplodedNode *N,
-                                                     const ExplodedNode *Prev,
-                                                     BugReporterContext &BRC,
-                                                     BugReport &BR);
-
-  std::shared_ptr<PathDiagnosticPiece>
-  VisitTerminator(const Stmt *Term, const ExplodedNode *N,
-                  const CFGBlock *srcBlk, const CFGBlock *dstBlk, BugReport &R,
-                  BugReporterContext &BRC);
-
-  std::shared_ptr<PathDiagnosticPiece>
-  VisitTrueTest(const Expr *Cond, bool tookTrue, BugReporterContext &BRC,
-                BugReport &R, const ExplodedNode *N);
-
-  std::shared_ptr<PathDiagnosticPiece>
-  VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, const bool tookTrue,
-                BugReporterContext &BRC, BugReport &R, const ExplodedNode *N);
-
-  std::shared_ptr<PathDiagnosticPiece>
-  VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
-                const bool tookTrue, BugReporterContext &BRC, BugReport &R,
-                const ExplodedNode *N);
-
-  std::shared_ptr<PathDiagnosticPiece>
-  VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr,
-                         const bool tookTrue, BugReporterContext &BRC,
-                         BugReport &R, const ExplodedNode *N);
-
-  bool patternMatch(const Expr *Ex,
-                    const Expr *ParentEx,
-                    raw_ostream &Out,
-                    BugReporterContext &BRC,
-                    BugReport &R,
-                    const ExplodedNode *N,
-                    Optional<bool> &prunable);
-
-  static bool isPieceMessageGeneric(const PathDiagnosticPiece *Piece);
-};
-
-/// \brief Suppress reports that might lead to known false positives.
-///
-/// Currently this suppresses reports based on locations of bugs.
-class LikelyFalsePositiveSuppressionBRVisitor final
-    : public BugReporterVisitorImpl<LikelyFalsePositiveSuppressionBRVisitor> {
-public:
-  static void *getTag() {
-    static int Tag = 0;
-    return static_cast<void *>(&Tag);
-  }
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override {
-    ID.AddPointer(getTag());
-  }
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *Prev,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override {
-    return nullptr;
-  }
-
-  std::unique_ptr<PathDiagnosticPiece> getEndPath(BugReporterContext &BRC,
-                                                  const ExplodedNode *N,
-                                                  BugReport &BR) override;
-};
-
-/// \brief When a region containing undefined value or '0' value is passed 
-/// as an argument in a call, marks the call as interesting.
-///
-/// As a result, BugReporter will not prune the path through the function even
-/// if the region's contents are not modified/accessed by the call.
-class UndefOrNullArgVisitor final
-    : public BugReporterVisitorImpl<UndefOrNullArgVisitor> {
-
-  /// The interesting memory region this visitor is tracking.
-  const MemRegion *R;
-
-public:
-  UndefOrNullArgVisitor(const MemRegion *InR) : R(InR) {}
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override {
-    static int Tag = 0;
-    ID.AddPointer(&Tag);
-    ID.AddPointer(R);
-  }
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
-                                                 const ExplodedNode *PrevN,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-};
-
-class SuppressInlineDefensiveChecksVisitor final
-    : public BugReporterVisitorImpl<SuppressInlineDefensiveChecksVisitor> {
-  /// The symbolic value for which we are tracking constraints.
-  /// This value is constrained to null in the end of path.
-  DefinedSVal V;
-
-  /// Track if we found the node where the constraint was first added.
-  bool IsSatisfied;
-
-  /// Since the visitors can be registered on nodes previous to the last
-  /// node in the BugReport, but the path traversal always starts with the last
-  /// node, the visitor invariant (that we start with a node in which V is null)
-  /// might not hold when node visitation starts. We are going to start tracking
-  /// from the last node in which the value is null.
-  bool IsTrackingTurnedOn;
-
-public:
-  SuppressInlineDefensiveChecksVisitor(DefinedSVal Val, const ExplodedNode *N);
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override;
-
-  /// Return the tag associated with this visitor.  This tag will be used
-  /// to make all PathDiagnosticPieces created by this visitor.
-  static const char *getTag();
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *Succ,
-                                                 const ExplodedNode *Pred,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-};
-
-class CXXSelfAssignmentBRVisitor final
-  : public BugReporterVisitorImpl<CXXSelfAssignmentBRVisitor> {
-  
-  bool Satisfied;
-
-public:
-  CXXSelfAssignmentBRVisitor() : Satisfied(false) {}
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override {}
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *Succ,
-                                                 const ExplodedNode *Pred,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-};
-
-namespace bugreporter {
-
-/// Attempts to add visitors to trace a null or undefined value back to its
-/// point of origin, whether it is a symbol constrained to null or an explicit
-/// assignment.
-///
-/// \param N A node "downstream" from the evaluation of the statement.
-/// \param S The statement whose value is null or undefined.
-/// \param R The bug report to which visitors should be attached.
-/// \param IsArg Whether the statement is an argument to an inlined function.
-///              If this is the case, \p N \em must be the CallEnter node for
-///              the function.
-/// \param EnableNullFPSuppression Whether we should employ false positive
-///         suppression (inlined defensive checks, returned null).
-///
-/// \return Whether or not the function was able to add visitors for this
-///         statement. Note that returning \c true does not actually imply
-///         that any visitors were added.
-bool trackNullOrUndefValue(const ExplodedNode *N, const Stmt *S, BugReport &R,
-                           bool IsArg = false,
-                           bool EnableNullFPSuppression = true);
-
-const Expr *getDerefExpr(const Stmt *S);
-const Stmt *GetDenomExpr(const ExplodedNode *N);
-const Stmt *GetRetValExpr(const ExplodedNode *N);
-bool isDeclRefExprToReference(const Expr *E);
-
-
-} // end namespace clang
-} // end namespace ento
-} // end namespace bugreporter
-
-
-#endif
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
new file mode 100644
index 0000000..2043896
--- /dev/null
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -0,0 +1,379 @@
+//===---  BugReporterVisitors.h - Generate PathDiagnostics -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares BugReporterVisitors, which are used to generate enhanced
+//  diagnostic traces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITORS_H
+#define LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITORS_H
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/FoldingSet.h"
+
+namespace clang {
+class CFGBlock;
+
+namespace ento {
+
+class BugReport;
+class BugReporterContext;
+class ExplodedNode;
+class MemRegion;
+class PathDiagnosticPiece;
+
+/// \brief BugReporterVisitors are used to add custom diagnostics along a path.
+///
+/// Custom visitors should subclass the BugReporterVisitorImpl class for a
+/// default implementation of the clone() method.
+/// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
+/// default implementation of clone() will NOT do the right thing, and you
+/// will have to provide your own implementation.)
+class BugReporterVisitor : public llvm::FoldingSetNode {
+public:
+  BugReporterVisitor() = default;
+  BugReporterVisitor(const BugReporterVisitor &) = default;
+  BugReporterVisitor(BugReporterVisitor &&) {}
+  virtual ~BugReporterVisitor();
+
+  /// \brief Returns a copy of this BugReporter.
+  ///
+  /// Custom BugReporterVisitors should not override this method directly.
+  /// Instead, they should inherit from BugReporterVisitorImpl and provide
+  /// a protected or public copy constructor.
+  ///
+  /// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
+  /// default implementation of clone() will NOT do the right thing, and you
+  /// will have to provide your own implementation.)
+  virtual std::unique_ptr<BugReporterVisitor> clone() const = 0;
+
+  /// \brief Return a diagnostic piece which should be associated with the
+  /// given node.
+  ///
+  /// The last parameter can be used to register a new visitor with the given
+  /// BugReport while processing a node.
+  virtual std::shared_ptr<PathDiagnosticPiece>
+  VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred,
+            BugReporterContext &BRC, BugReport &BR) = 0;
+
+  /// \brief Provide custom definition for the final diagnostic piece on the
+  /// path - the piece, which is displayed before the path is expanded.
+  ///
+  /// If returns NULL the default implementation will be used.
+  /// Also note that at most one visitor of a BugReport should generate a
+  /// non-NULL end of path diagnostic piece.
+  virtual std::unique_ptr<PathDiagnosticPiece>
+  getEndPath(BugReporterContext &BRC, const ExplodedNode *N, BugReport &BR);
+
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
+
+  /// \brief Generates the default final diagnostic piece.
+  static std::unique_ptr<PathDiagnosticPiece>
+  getDefaultEndPath(BugReporterContext &BRC, const ExplodedNode *N,
+                    BugReport &BR);
+};
+
+/// This class provides a convenience implementation for clone() using the
+/// Curiously-Recurring Template Pattern. If you are implementing a custom
+/// BugReporterVisitor, subclass BugReporterVisitorImpl and provide a public
+/// or protected copy constructor.
+///
+/// (Warning: if you have a deep subclass of BugReporterVisitorImpl, the
+/// default implementation of clone() will NOT do the right thing, and you
+/// will have to provide your own implementation.)
+template <class DERIVED>
+class BugReporterVisitorImpl : public BugReporterVisitor {
+  std::unique_ptr<BugReporterVisitor> clone() const override {
+    return llvm::make_unique<DERIVED>(*static_cast<const DERIVED *>(this));
+  }
+};
+
+class FindLastStoreBRVisitor final
+    : public BugReporterVisitorImpl<FindLastStoreBRVisitor> {
+  const MemRegion *R;
+  SVal V;
+  bool Satisfied;
+
+  /// If the visitor is tracking the value directly responsible for the
+  /// bug, we are going to employ false positive suppression.
+  bool EnableNullFPSuppression;
+
+public:
+  /// Creates a visitor for every VarDecl inside a Stmt and registers it with
+  /// the BugReport.
+  static void registerStatementVarDecls(BugReport &BR, const Stmt *S,
+                                        bool EnableNullFPSuppression);
+
+  FindLastStoreBRVisitor(KnownSVal V, const MemRegion *R,
+                         bool InEnableNullFPSuppression)
+  : R(R),
+    V(V),
+    Satisfied(false),
+    EnableNullFPSuppression(InEnableNullFPSuppression) {}
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override;
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+};
+
+class TrackConstraintBRVisitor final
+    : public BugReporterVisitorImpl<TrackConstraintBRVisitor> {
+  DefinedSVal Constraint;
+  bool Assumption;
+  bool IsSatisfied;
+  bool IsZeroCheck;
+
+  /// We should start tracking from the last node along the path in which the
+  /// value is constrained.
+  bool IsTrackingTurnedOn;
+
+public:
+  TrackConstraintBRVisitor(DefinedSVal constraint, bool assumption)
+  : Constraint(constraint), Assumption(assumption), IsSatisfied(false),
+    IsZeroCheck(!Assumption && Constraint.getAs<Loc>()),
+    IsTrackingTurnedOn(false) {}
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override;
+
+  /// Return the tag associated with this visitor.  This tag will be used
+  /// to make all PathDiagnosticPieces created by this visitor.
+  static const char *getTag();
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+
+private:
+  /// Checks if the constraint is valid in the current state.
+  bool isUnderconstrained(const ExplodedNode *N) const;
+
+};
+
+/// \class NilReceiverBRVisitor
+/// \brief Prints path notes when a message is sent to a nil receiver.
+class NilReceiverBRVisitor final
+    : public BugReporterVisitorImpl<NilReceiverBRVisitor> {
+public:
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    static int x = 0;
+    ID.AddPointer(&x);
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+
+  /// If the statement is a message send expression with nil receiver, returns
+  /// the receiver expression. Returns NULL otherwise.
+  static const Expr *getNilReceiver(const Stmt *S, const ExplodedNode *N);
+};
+
+/// Visitor that tries to report interesting diagnostics from conditions.
+class ConditionBRVisitor final
+    : public BugReporterVisitorImpl<ConditionBRVisitor> {
+
+  // FIXME: constexpr initialization isn't supported by MSVC2013.
+  static const char *const GenericTrueMessage;
+  static const char *const GenericFalseMessage;
+
+public:
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    static int x = 0;
+    ID.AddPointer(&x);
+  }
+
+  /// Return the tag associated with this visitor.  This tag will be used
+  /// to make all PathDiagnosticPieces created by this visitor.
+  static const char *getTag();
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *Prev,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNodeImpl(const ExplodedNode *N,
+                                                     const ExplodedNode *Prev,
+                                                     BugReporterContext &BRC,
+                                                     BugReport &BR);
+
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitTerminator(const Stmt *Term, const ExplodedNode *N,
+                  const CFGBlock *srcBlk, const CFGBlock *dstBlk, BugReport &R,
+                  BugReporterContext &BRC);
+
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitTrueTest(const Expr *Cond, bool tookTrue, BugReporterContext &BRC,
+                BugReport &R, const ExplodedNode *N);
+
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, const bool tookTrue,
+                BugReporterContext &BRC, BugReport &R, const ExplodedNode *N);
+
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
+                const bool tookTrue, BugReporterContext &BRC, BugReport &R,
+                const ExplodedNode *N);
+
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr,
+                         const bool tookTrue, BugReporterContext &BRC,
+                         BugReport &R, const ExplodedNode *N);
+
+  bool patternMatch(const Expr *Ex,
+                    const Expr *ParentEx,
+                    raw_ostream &Out,
+                    BugReporterContext &BRC,
+                    BugReport &R,
+                    const ExplodedNode *N,
+                    Optional<bool> &prunable);
+
+  static bool isPieceMessageGeneric(const PathDiagnosticPiece *Piece);
+};
+
+/// \brief Suppress reports that might lead to known false positives.
+///
+/// Currently this suppresses reports based on locations of bugs.
+class LikelyFalsePositiveSuppressionBRVisitor final
+    : public BugReporterVisitorImpl<LikelyFalsePositiveSuppressionBRVisitor> {
+public:
+  static void *getTag() {
+    static int Tag = 0;
+    return static_cast<void *>(&Tag);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.AddPointer(getTag());
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *Prev,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override {
+    return nullptr;
+  }
+
+  std::unique_ptr<PathDiagnosticPiece> getEndPath(BugReporterContext &BRC,
+                                                  const ExplodedNode *N,
+                                                  BugReport &BR) override;
+};
+
+/// \brief When a region containing undefined value or '0' value is passed 
+/// as an argument in a call, marks the call as interesting.
+///
+/// As a result, BugReporter will not prune the path through the function even
+/// if the region's contents are not modified/accessed by the call.
+class UndefOrNullArgVisitor final
+    : public BugReporterVisitorImpl<UndefOrNullArgVisitor> {
+
+  /// The interesting memory region this visitor is tracking.
+  const MemRegion *R;
+
+public:
+  UndefOrNullArgVisitor(const MemRegion *InR) : R(InR) {}
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    static int Tag = 0;
+    ID.AddPointer(&Tag);
+    ID.AddPointer(R);
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+};
+
+class SuppressInlineDefensiveChecksVisitor final
+    : public BugReporterVisitorImpl<SuppressInlineDefensiveChecksVisitor> {
+  /// The symbolic value for which we are tracking constraints.
+  /// This value is constrained to null in the end of path.
+  DefinedSVal V;
+
+  /// Track if we found the node where the constraint was first added.
+  bool IsSatisfied;
+
+  /// Since the visitors can be registered on nodes previous to the last
+  /// node in the BugReport, but the path traversal always starts with the last
+  /// node, the visitor invariant (that we start with a node in which V is null)
+  /// might not hold when node visitation starts. We are going to start tracking
+  /// from the last node in which the value is null.
+  bool IsTrackingTurnedOn;
+
+public:
+  SuppressInlineDefensiveChecksVisitor(DefinedSVal Val, const ExplodedNode *N);
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override;
+
+  /// Return the tag associated with this visitor.  This tag will be used
+  /// to make all PathDiagnosticPieces created by this visitor.
+  static const char *getTag();
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *Succ,
+                                                 const ExplodedNode *Pred,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+};
+
+class CXXSelfAssignmentBRVisitor final
+  : public BugReporterVisitorImpl<CXXSelfAssignmentBRVisitor> {
+  
+  bool Satisfied;
+
+public:
+  CXXSelfAssignmentBRVisitor() : Satisfied(false) {}
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {}
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *Succ,
+                                                 const ExplodedNode *Pred,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override;
+};
+
+namespace bugreporter {
+
+/// Attempts to add visitors to trace a null or undefined value back to its
+/// point of origin, whether it is a symbol constrained to null or an explicit
+/// assignment.
+///
+/// \param N A node "downstream" from the evaluation of the statement.
+/// \param S The statement whose value is null or undefined.
+/// \param R The bug report to which visitors should be attached.
+/// \param IsArg Whether the statement is an argument to an inlined function.
+///              If this is the case, \p N \em must be the CallEnter node for
+///              the function.
+/// \param EnableNullFPSuppression Whether we should employ false positive
+///         suppression (inlined defensive checks, returned null).
+///
+/// \return Whether or not the function was able to add visitors for this
+///         statement. Note that returning \c true does not actually imply
+///         that any visitors were added.
+bool trackNullOrUndefValue(const ExplodedNode *N, const Stmt *S, BugReport &R,
+                           bool IsArg = false,
+                           bool EnableNullFPSuppression = true);
+
+const Expr *getDerefExpr(const Stmt *S);
+const Stmt *GetDenomExpr(const ExplodedNode *N);
+const Stmt *GetRetValExpr(const ExplodedNode *N);
+bool isDeclRefExprToReference(const Expr *E);
+
+
+} // end namespace clang
+} // end namespace ento
+} // end namespace bugreporter
+
+
+#endif
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
index 18fa85c..d3163ef 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
@@ -32,27 +32,39 @@
   const CheckName Check;
   const std::string Name;
   const std::string Category;
-  bool SuppressonSink;
+  const CheckerBase *Checker;
+  bool SuppressOnSink;
 
   virtual void anchor();
-public:
-  BugType(class CheckName check, StringRef name, StringRef cat)
-      : Check(check), Name(name), Category(cat), SuppressonSink(false) {}
-  BugType(const CheckerBase *checker, StringRef name, StringRef cat)
-      : Check(checker->getCheckName()), Name(name), Category(cat),
-        SuppressonSink(false) {}
-  virtual ~BugType() {}
 
-  // FIXME: Should these be made strings as well?
+public:
+  BugType(CheckName Check, StringRef Name, StringRef Cat)
+      : Check(Check), Name(Name), Category(Cat), Checker(nullptr),
+        SuppressOnSink(false) {}
+  BugType(const CheckerBase *Checker, StringRef Name, StringRef Cat)
+      : Check(Checker->getCheckName()), Name(Name), Category(Cat),
+        Checker(Checker), SuppressOnSink(false) {}
+  virtual ~BugType() = default;
+
   StringRef getName() const { return Name; }
   StringRef getCategory() const { return Category; }
-  StringRef getCheckName() const { return Check.getName(); }
+  StringRef getCheckName() const {
+    // FIXME: This is a workaround to ensure that the correct check name is used
+    // The check names are set after the constructors are run.
+    // In case the BugType object is initialized in the checker's ctor
+    // the Check field will be empty. To circumvent this problem we use 
+    // CheckerBase whenever it is possible.
+    StringRef CheckName =
+        Checker ? Checker->getCheckName().getName() : Check.getName();
+    assert(!CheckName.empty() && "Check name is not set properly.");
+    return CheckName;
+  }
 
   /// isSuppressOnSink - Returns true if bug reports associated with this bug
   ///  type should be suppressed if the end node of the report is post-dominated
   ///  by a sink node.
-  bool isSuppressOnSink() const { return SuppressonSink; }
-  void setSuppressOnSink(bool x) { SuppressonSink = x; }
+  bool isSuppressOnSink() const { return SuppressOnSink; }
+  void setSuppressOnSink(bool x) { SuppressOnSink = x; }
 
   virtual void FlushReports(BugReporter& BR);
 };
@@ -74,7 +86,7 @@
   StringRef getDescription() const { return desc; }
 };
 
-} // end GR namespace
+} // end ento namespace
 
 } // end clang namespace
 #endif
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
index a07cd88..2adc09c 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
@@ -23,6 +23,8 @@
 #include <deque>
 #include <iterator>
 #include <list>
+#include <map>
+#include <set>
 #include <string>
 #include <vector>
 
@@ -334,7 +336,7 @@
 // Path "pieces" for path-sensitive diagnostics.
 //===----------------------------------------------------------------------===//
 
-class PathDiagnosticPiece {
+class PathDiagnosticPiece: public llvm::FoldingSetNode {
 public:
   enum Kind { ControlFlow, Event, Macro, Call, Note };
   enum DisplayHint { Above, Below };
@@ -733,6 +735,9 @@
   void Profile(llvm::FoldingSetNodeID &ID) const override;
 };
 
+/// File IDs mapped to sets of line numbers.
+typedef std::map<unsigned, std::set<unsigned>> FilesToLineNumsMap;
+
 /// PathDiagnostic - PathDiagnostic objects represent a single path-sensitive
 ///  diagnostic.  It represents an ordered-collection of PathDiagnosticPieces,
 ///  each which represent the pieces of the path.
@@ -756,12 +761,16 @@
   PathDiagnosticLocation UniqueingLoc;
   const Decl *UniqueingDecl;
 
+  /// Lines executed in the path.
+  std::unique_ptr<FilesToLineNumsMap> ExecutedLines;
+
   PathDiagnostic() = delete;
 public:
   PathDiagnostic(StringRef CheckName, const Decl *DeclWithIssue,
                  StringRef bugtype, StringRef verboseDesc, StringRef shortDesc,
                  StringRef category, PathDiagnosticLocation LocationToUnique,
-                 const Decl *DeclToUnique);
+                 const Decl *DeclToUnique,
+                 std::unique_ptr<FilesToLineNumsMap> ExecutedLines);
 
   ~PathDiagnostic();
   
@@ -830,6 +839,12 @@
   meta_iterator meta_end() const { return OtherDesc.end(); }
   void addMeta(StringRef s) { OtherDesc.push_back(s); }
 
+  typedef FilesToLineNumsMap::const_iterator filesmap_iterator;
+  filesmap_iterator executedLines_begin() const {
+    return ExecutedLines->begin();
+  }
+  filesmap_iterator executedLines_end() const { return ExecutedLines->end(); }
+
   PathDiagnosticLocation getLocation() const {
     assert(Loc.isValid() && "No report location set yet!");
     return Loc;
diff --git a/include/clang/StaticAnalyzer/Core/Checker.h b/include/clang/StaticAnalyzer/Core/Checker.h
index f947776..305abea 100644
--- a/include/clang/StaticAnalyzer/Core/Checker.h
+++ b/include/clang/StaticAnalyzer/Core/Checker.h
@@ -283,6 +283,22 @@
   }
 };
 
+class NewAllocator {
+  template <typename CHECKER>
+  static void _checkNewAllocator(void *checker, const CXXNewExpr *NE,
+                                 SVal Target, CheckerContext &C) {
+    ((const CHECKER *)checker)->checkNewAllocator(NE, Target, C);
+  }
+
+public:
+  template <typename CHECKER>
+  static void _register(CHECKER *checker, CheckerManager &mgr) {
+    mgr._registerForNewAllocator(
+        CheckerManager::CheckNewAllocatorFunc(checker,
+                                              _checkNewAllocator<CHECKER>));
+  }
+};
+
 class LiveSymbols {
   template <typename CHECKER>
   static void _checkLiveSymbols(void *checker, ProgramStateRef state,
diff --git a/include/clang/StaticAnalyzer/Core/CheckerManager.h b/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 88cb08a..081718e 100644
--- a/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -1,4 +1,4 @@
-//===--- CheckerManager.h - Static Analyzer Checker Manager -----*- C++ -*-===//
+//===- CheckerManager.h - Static Analyzer Checker Manager -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,45 +16,56 @@
 
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/LangOptions.h"
-#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include <utility>
+#include "llvm/ADT/StringRef.h"
 #include <vector>
 
 namespace clang {
-  class Decl;
-  class Stmt;
-  class CallExpr;
+
+class AnalyzerOptions;
+class CallExpr;
+class CXXNewExpr;
+class Decl;
+class LocationContext;
+class Stmt;
+class TranslationUnitDecl;
 
 namespace ento {
-  class CheckerBase;
-  class CheckerRegistry;
-  class ExprEngine;
-  class AnalysisManager;
-  class BugReporter;
-  class CheckerContext;
-  class ObjCMethodCall;
-  class SVal;
-  class ExplodedNode;
-  class ExplodedNodeSet;
-  class ExplodedGraph;
-  class ProgramState;
-  class NodeBuilder;
-  struct NodeBuilderContext;
-  class MemRegion;
-  class SymbolReaper;
+
+class AnalysisManager;
+class BugReporter;
+class CallEvent;
+class CheckerBase;
+class CheckerContext;
+class CheckerRegistry;
+class ExplodedGraph;
+class ExplodedNode;
+class ExplodedNodeSet;
+class ExprEngine;
+class MemRegion;
+struct NodeBuilderContext;
+class ObjCMethodCall;
+class RegionAndSymbolInvalidationTraits;
+class SVal;
+class SymbolReaper;
 
 template <typename T> class CheckerFn;
 
 template <typename RET, typename... Ps>
 class CheckerFn<RET(Ps...)> {
-  typedef RET (*Func)(void *, Ps...);
+  using Func = RET (*)(void *, Ps...);
+
   Func Fn;
+
 public:
   CheckerBase *Checker;
-  CheckerFn(CheckerBase *checker, Func fn) : Fn(fn), Checker(checker) { }
+
+  CheckerFn(CheckerBase *checker, Func fn) : Fn(fn), Checker(checker) {}
+
   RET operator()(Ps... ps) const {
     return Fn(Checker, ps...);
   }
@@ -85,12 +96,15 @@
 // name strings have a lifetime that keeps them alive at least until the path
 // diagnostics have been processed.
 class CheckName {
-  StringRef Name;
   friend class ::clang::ento::CheckerRegistry;
+
+  StringRef Name;
+
   explicit CheckName(StringRef Name) : Name(Name) {}
 
 public:
   CheckName() = default;
+
   StringRef getName() const { return Name; }
 };
 
@@ -121,9 +135,9 @@
   const LangOptions &getLangOpts() const { return LangOpts; }
   AnalyzerOptions &getAnalyzerOptions() { return AOptions; }
 
-  typedef CheckerBase *CheckerRef;
-  typedef const void *CheckerTag;
-  typedef CheckerFn<void ()> CheckerDtor;
+  using CheckerRef = CheckerBase *;
+  using CheckerTag = const void *;
+  using CheckerDtor = CheckerFn<void ()>;
 
 //===----------------------------------------------------------------------===//
 // registerChecker
@@ -238,7 +252,6 @@
                               Eng);
   }
 
-
   /// \brief Run checkers for visiting obj-c messages.
   void runCheckersForObjCMessage(ObjCMessageVisitKind visitKind,
                                  ExplodedNodeSet &Dst,
@@ -303,6 +316,13 @@
                                      ExplodedNodeSet &Dst, ExplodedNode *Pred,
                                      ExprEngine &Eng);
 
+  /// \brief Run checkers between C++ operator new and constructor calls.
+  void runCheckersForNewAllocator(const CXXNewExpr *NE, SVal Target,
+                                  ExplodedNodeSet &Dst,
+                                  ExplodedNode *Pred,
+                                  ExprEngine &Eng,
+                                  bool wasInlined = false);
+
   /// \brief Run checkers for live symbols.
   ///
   /// Allows modifying SymbolReaper object. For example, checkers can explicitly
@@ -361,7 +381,7 @@
                               const InvalidatedSymbols &Escaped,
                               const CallEvent *Call,
                               PointerEscapeKind Kind,
-                             RegionAndSymbolInvalidationTraits *ITraits);
+                              RegionAndSymbolInvalidationTraits *ITraits);
 
   /// \brief Run checkers for handling assumptions on symbolic values.
   ProgramStateRef runCheckersForEvalAssume(ProgramStateRef state,
@@ -397,10 +417,11 @@
   // Functions used by the registration mechanism, checkers should not touch
   // these directly.
 
-  typedef CheckerFn<void (const Decl *, AnalysisManager&, BugReporter &)>
-      CheckDeclFunc;
+  using CheckDeclFunc =
+      CheckerFn<void (const Decl *, AnalysisManager&, BugReporter &)>;
 
-  typedef bool (*HandlesDeclFunc)(const Decl *D);
+  using HandlesDeclFunc = bool (*)(const Decl *D);
+
   void _registerForDecl(CheckDeclFunc checkfn, HandlesDeclFunc isForDeclFn);
 
   void _registerForBody(CheckDeclFunc checkfn);
@@ -409,67 +430,66 @@
 // Internal registration functions for path-sensitive checking.
 //===----------------------------------------------------------------------===//
 
-  typedef CheckerFn<void (const Stmt *, CheckerContext &)> CheckStmtFunc;
+  using CheckStmtFunc = CheckerFn<void (const Stmt *, CheckerContext &)>;
   
-  typedef CheckerFn<void (const ObjCMethodCall &, CheckerContext &)>
-      CheckObjCMessageFunc;
+  using CheckObjCMessageFunc =
+      CheckerFn<void (const ObjCMethodCall &, CheckerContext &)>;
 
-  typedef CheckerFn<void (const CallEvent &, CheckerContext &)>
-      CheckCallFunc;
+  using CheckCallFunc =
+      CheckerFn<void (const CallEvent &, CheckerContext &)>;
   
-  typedef CheckerFn<void (const SVal &location, bool isLoad,
-                          const Stmt *S,
-                          CheckerContext &)>
-      CheckLocationFunc;
+  using CheckLocationFunc =
+      CheckerFn<void (const SVal &location, bool isLoad, const Stmt *S,
+                      CheckerContext &)>;
   
-  typedef CheckerFn<void (const SVal &location, const SVal &val, 
-                          const Stmt *S, CheckerContext &)> 
-      CheckBindFunc;
+  using CheckBindFunc =
+      CheckerFn<void (const SVal &location, const SVal &val, const Stmt *S,
+                      CheckerContext &)>;
   
-  typedef CheckerFn<void (ExplodedGraph &, BugReporter &, ExprEngine &)>
-      CheckEndAnalysisFunc;
+  using CheckEndAnalysisFunc =
+      CheckerFn<void (ExplodedGraph &, BugReporter &, ExprEngine &)>;
 
-  typedef CheckerFn<void (CheckerContext &)>
-      CheckBeginFunctionFunc;
+  using CheckBeginFunctionFunc = CheckerFn<void (CheckerContext &)>;
 
-  typedef CheckerFn<void (CheckerContext &)>
-      CheckEndFunctionFunc;
+  using CheckEndFunctionFunc = CheckerFn<void (CheckerContext &)>;
   
-  typedef CheckerFn<void (const Stmt *, CheckerContext &)>
-      CheckBranchConditionFunc;
-  
-  typedef CheckerFn<void (SymbolReaper &, CheckerContext &)>
-      CheckDeadSymbolsFunc;
-  
-  typedef CheckerFn<void (ProgramStateRef,SymbolReaper &)> CheckLiveSymbolsFunc;
-  
-  typedef CheckerFn<ProgramStateRef (ProgramStateRef,
-                                     const InvalidatedSymbols *symbols,
-                                     ArrayRef<const MemRegion *> ExplicitRegions,
-                                     ArrayRef<const MemRegion *> Regions,
-                                     const LocationContext *LCtx,
-                                     const CallEvent *Call)>
-      CheckRegionChangesFunc;
-  
-  typedef CheckerFn<ProgramStateRef (ProgramStateRef,
-                                     const InvalidatedSymbols &Escaped,
-                                     const CallEvent *Call,
-                                     PointerEscapeKind Kind,
-                                     RegionAndSymbolInvalidationTraits *ITraits)>
-      CheckPointerEscapeFunc;
-  
-  typedef CheckerFn<ProgramStateRef (ProgramStateRef,
-                                          const SVal &cond, bool assumption)>
-      EvalAssumeFunc;
-  
-  typedef CheckerFn<bool (const CallExpr *, CheckerContext &)>
-      EvalCallFunc;
+  using CheckBranchConditionFunc =
+      CheckerFn<void (const Stmt *, CheckerContext &)>;
 
-  typedef CheckerFn<void (const TranslationUnitDecl *,
-                          AnalysisManager&, BugReporter &)>
-      CheckEndOfTranslationUnit;
+  using CheckNewAllocatorFunc =
+      CheckerFn<void (const CXXNewExpr *, SVal, CheckerContext &)>;
+  
+  using CheckDeadSymbolsFunc =
+      CheckerFn<void (SymbolReaper &, CheckerContext &)>;
+  
+  using CheckLiveSymbolsFunc = CheckerFn<void (ProgramStateRef,SymbolReaper &)>;
+  
+  using CheckRegionChangesFunc =
+      CheckerFn<ProgramStateRef (ProgramStateRef,
+                                 const InvalidatedSymbols *symbols,
+                                 ArrayRef<const MemRegion *> ExplicitRegions,
+                                 ArrayRef<const MemRegion *> Regions,
+                                 const LocationContext *LCtx,
+                                 const CallEvent *Call)>;
+  
+  using CheckPointerEscapeFunc =
+      CheckerFn<ProgramStateRef (ProgramStateRef,
+                                 const InvalidatedSymbols &Escaped,
+                                 const CallEvent *Call, PointerEscapeKind Kind,
+                                 RegionAndSymbolInvalidationTraits *ITraits)>;
+  
+  using EvalAssumeFunc =
+      CheckerFn<ProgramStateRef (ProgramStateRef, const SVal &cond,
+                                 bool assumption)>;
+  
+  using EvalCallFunc = CheckerFn<bool (const CallExpr *, CheckerContext &)>;
 
-  typedef bool (*HandlesStmtFunc)(const Stmt *D);
+  using CheckEndOfTranslationUnit =
+      CheckerFn<void (const TranslationUnitDecl *, AnalysisManager &,
+                      BugReporter &)>;
+
+  using HandlesStmtFunc = bool (*)(const Stmt *D);
+
   void _registerForPreStmt(CheckStmtFunc checkfn,
                            HandlesStmtFunc isForStmtFn);
   void _registerForPostStmt(CheckStmtFunc checkfn,
@@ -494,6 +514,8 @@
 
   void _registerForBranchCondition(CheckBranchConditionFunc checkfn);
 
+  void _registerForNewAllocator(CheckNewAllocatorFunc checkfn);
+
   void _registerForLiveSymbols(CheckLiveSymbolsFunc checkfn);
 
   void _registerForDeadSymbols(CheckDeadSymbolsFunc checkfn);
@@ -514,8 +536,8 @@
 // Internal registration functions for events.
 //===----------------------------------------------------------------------===//
 
-  typedef void *EventTag;
-  typedef CheckerFn<void (const void *event)> CheckEventFunc;
+  using EventTag = void *;
+  using CheckEventFunc = CheckerFn<void (const void *event)>;
 
   template <typename EVENT>
   void _registerListenerForEvent(CheckEventFunc checkfn) {
@@ -535,8 +557,8 @@
     if (I == Events.end())
       return;
     const EventInfo &info = I->second;
-    for (unsigned i = 0, e = info.Checkers.size(); i != e; ++i)
-      info.Checkers[i](&event);
+    for (const auto Checker : info.Checkers)
+      Checker(&event);
   }
 
 //===----------------------------------------------------------------------===//
@@ -562,8 +584,8 @@
 
   std::vector<CheckDeclFunc> BodyCheckers;
 
-  typedef SmallVector<CheckDeclFunc, 4> CachedDeclCheckers;
-  typedef llvm::DenseMap<unsigned, CachedDeclCheckers> CachedDeclCheckersMapTy;
+  using CachedDeclCheckers = SmallVector<CheckDeclFunc, 4>;
+  using CachedDeclCheckersMapTy = llvm::DenseMap<unsigned, CachedDeclCheckers>;
   CachedDeclCheckersMapTy CachedDeclCheckersMap;
 
   struct StmtCheckerInfo {
@@ -573,8 +595,8 @@
   };
   std::vector<StmtCheckerInfo> StmtCheckers;
 
-  typedef SmallVector<CheckStmtFunc, 4> CachedStmtCheckers;
-  typedef llvm::DenseMap<unsigned, CachedStmtCheckers> CachedStmtCheckersMapTy;
+  using CachedStmtCheckers = SmallVector<CheckStmtFunc, 4>;
+  using CachedStmtCheckersMapTy = llvm::DenseMap<unsigned, CachedStmtCheckers>;
   CachedStmtCheckersMapTy CachedStmtCheckersMap;
 
   const CachedStmtCheckers &getCachedStmtCheckersFor(const Stmt *S,
@@ -603,6 +625,8 @@
 
   std::vector<CheckBranchConditionFunc> BranchConditionCheckers;
 
+  std::vector<CheckNewAllocatorFunc> NewAllocatorCheckers;
+
   std::vector<CheckLiveSymbolsFunc> LiveSymbolsCheckers;
 
   std::vector<CheckDeadSymbolsFunc> DeadSymbolsCheckers;
@@ -619,16 +643,17 @@
 
   struct EventInfo {
     SmallVector<CheckEventFunc, 4> Checkers;
-    bool HasDispatcher;
-    EventInfo() : HasDispatcher(false) { }
+    bool HasDispatcher = false;
+
+    EventInfo() = default;
   };
   
-  typedef llvm::DenseMap<EventTag, EventInfo> EventsTy;
+  using EventsTy = llvm::DenseMap<EventTag, EventInfo>;
   EventsTy Events;
 };
 
-} // end ento namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_CHECKERMANAGER_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
index 4aa8744..0bbc650 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
@@ -1,4 +1,4 @@
-//=== BasicValueFactory.h - Basic values for Path Sens analysis --*- C++ -*---//
+//==- BasicValueFactory.h - Basic values for Path Sens analysis --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,12 +17,26 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BASICVALUEFACTORY_H
 
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ImmutableList.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
 
 namespace clang {
+
+class CXXBaseSpecifier;
+class DeclaratorDecl;
+
 namespace ento {
 
 class CompoundValData : public llvm::FoldingSetNode {
@@ -34,7 +48,8 @@
     assert(NonLoc::isCompoundType(t));
   }
 
-  typedef llvm::ImmutableList<SVal>::iterator iterator;
+  using iterator = llvm::ImmutableList<SVal>::iterator;
+
   iterator begin() const { return L.begin(); }
   iterator end() const { return L.end(); }
 
@@ -47,6 +62,7 @@
 class LazyCompoundValData : public llvm::FoldingSetNode {
   StoreRef store;
   const TypedValueRegion *region;
+
 public:
   LazyCompoundValData(const StoreRef &st, const TypedValueRegion *r)
       : store(st), region(r) {
@@ -63,16 +79,17 @@
   void Profile(llvm::FoldingSetNodeID& ID) { Profile(ID, store, region); }
 };
 
-class PointerToMemberData: public llvm::FoldingSetNode {
+class PointerToMemberData : public llvm::FoldingSetNode {
   const DeclaratorDecl *D;
   llvm::ImmutableList<const CXXBaseSpecifier *> L;
 
 public:
   PointerToMemberData(const DeclaratorDecl *D,
                       llvm::ImmutableList<const CXXBaseSpecifier *> L)
-    : D(D), L(L) {}
+      : D(D), L(L) {}
 
-  typedef llvm::ImmutableList<const CXXBaseSpecifier *>::iterator iterator;
+  using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
+
   iterator begin() const { return L.begin(); }
   iterator end() const { return L.end(); }
 
@@ -81,24 +98,25 @@
 
   void Profile(llvm::FoldingSetNodeID& ID) { Profile(ID, D, L); }
   const DeclaratorDecl *getDeclaratorDecl() const {return D;}
+
   llvm::ImmutableList<const CXXBaseSpecifier *> getCXXBaseList() const {
     return L;
   }
 };
 
 class BasicValueFactory {
-  typedef llvm::FoldingSet<llvm::FoldingSetNodeWrapper<llvm::APSInt> >
-          APSIntSetTy;
+  using APSIntSetTy =
+      llvm::FoldingSet<llvm::FoldingSetNodeWrapper<llvm::APSInt>>;
 
   ASTContext &Ctx;
   llvm::BumpPtrAllocator& BPAlloc;
 
-  APSIntSetTy   APSIntSet;
-  void *        PersistentSVals;
-  void *        PersistentSValPairs;
+  APSIntSetTy APSIntSet;
+  void *PersistentSVals = nullptr;
+  void *PersistentSValPairs = nullptr;
 
   llvm::ImmutableList<SVal>::Factory SValListFactory;
-  llvm::ImmutableList<const CXXBaseSpecifier*>::Factory CXXBaseListFactory;
+  llvm::ImmutableList<const CXXBaseSpecifier *>::Factory CXXBaseListFactory;
   llvm::FoldingSet<CompoundValData>  CompoundValDataSet;
   llvm::FoldingSet<LazyCompoundValData> LazyCompoundValDataSet;
   llvm::FoldingSet<PointerToMemberData> PointerToMemberDataSet;
@@ -109,9 +127,8 @@
 
 public:
   BasicValueFactory(ASTContext &ctx, llvm::BumpPtrAllocator &Alloc)
-    : Ctx(ctx), BPAlloc(Alloc), PersistentSVals(nullptr),
-      PersistentSValPairs(nullptr), SValListFactory(Alloc),
-      CXXBaseListFactory(Alloc) {}
+      : Ctx(ctx), BPAlloc(Alloc), SValListFactory(Alloc),
+        CXXBaseListFactory(Alloc) {}
 
   ~BasicValueFactory();
 
@@ -147,57 +164,57 @@
     return getValue(TargetType.convert(From));
   }
 
-  const llvm::APSInt& getIntValue(uint64_t X, bool isUnsigned) {
+  const llvm::APSInt &getIntValue(uint64_t X, bool isUnsigned) {
     QualType T = isUnsigned ? Ctx.UnsignedIntTy : Ctx.IntTy;
     return getValue(X, T);
   }
 
-  inline const llvm::APSInt& getMaxValue(const llvm::APSInt &v) {
+  const llvm::APSInt &getMaxValue(const llvm::APSInt &v) {
     return getValue(APSIntType(v).getMaxValue());
   }
 
-  inline const llvm::APSInt& getMinValue(const llvm::APSInt &v) {
+  const llvm::APSInt &getMinValue(const llvm::APSInt &v) {
     return getValue(APSIntType(v).getMinValue());
   }
 
-  inline const llvm::APSInt& getMaxValue(QualType T) {
+  const llvm::APSInt &getMaxValue(QualType T) {
     return getValue(getAPSIntType(T).getMaxValue());
   }
 
-  inline const llvm::APSInt& getMinValue(QualType T) {
+  const llvm::APSInt &getMinValue(QualType T) {
     return getValue(getAPSIntType(T).getMinValue());
   }
 
-  inline const llvm::APSInt& Add1(const llvm::APSInt& V) {
+  const llvm::APSInt &Add1(const llvm::APSInt &V) {
     llvm::APSInt X = V;
     ++X;
     return getValue(X);
   }
 
-  inline const llvm::APSInt& Sub1(const llvm::APSInt& V) {
+  const llvm::APSInt &Sub1(const llvm::APSInt &V) {
     llvm::APSInt X = V;
     --X;
     return getValue(X);
   }
 
-  inline const llvm::APSInt& getZeroWithTypeSize(QualType T) {
+  const llvm::APSInt &getZeroWithTypeSize(QualType T) {
     assert(T->isScalarType());
     return getValue(0, Ctx.getTypeSize(T), true);
   }
 
-  inline const llvm::APSInt& getZeroWithPtrWidth(bool isUnsigned = true) {
+  const llvm::APSInt &getZeroWithPtrWidth(bool isUnsigned = true) {
     return getValue(0, Ctx.getTypeSize(Ctx.VoidPtrTy), isUnsigned);
   }
 
-  inline const llvm::APSInt &getIntWithPtrWidth(uint64_t X, bool isUnsigned) {
+  const llvm::APSInt &getIntWithPtrWidth(uint64_t X, bool isUnsigned) {
     return getValue(X, Ctx.getTypeSize(Ctx.VoidPtrTy), isUnsigned);
   }
 
-  inline const llvm::APSInt& getTruthValue(bool b, QualType T) {
-    return getValue(b ? 1 : 0, Ctx.getTypeSize(T), false);
+  const llvm::APSInt &getTruthValue(bool b, QualType T) {
+    return getValue(b ? 1 : 0, Ctx.getIntWidth(T), true);
   }
 
-  inline const llvm::APSInt& getTruthValue(bool b) {
+  const llvm::APSInt &getTruthValue(bool b) {
     return getTruthValue(b, Ctx.getLogicalOperationType());
   }
 
@@ -229,7 +246,7 @@
     return CXXBaseListFactory.add(CBS, L);
   }
 
-  const clang::ento::PointerToMemberData *accumCXXBase(
+  const PointerToMemberData *accumCXXBase(
       llvm::iterator_range<CastExpr::path_const_iterator> PathRange,
       const nonloc::PointerToMember &PTM);
 
@@ -246,8 +263,8 @@
   const SVal* getPersistentSVal(SVal X);
 };
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BASICVALUEFACTORY_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
index 9fec217..5e7b3ef 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
@@ -1,4 +1,4 @@
-//===- CallEvent.h - Wrapper for all function and method calls ----*- C++ -*--//
+//===- CallEvent.h - Wrapper for all function and method calls --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,19 +16,42 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLEVENT_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLEVENT_H
 
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
-#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <limits>
 #include <utility>
 
 namespace clang {
+
+class LocationContext;
 class ProgramPoint;
 class ProgramPointTag;
+class StackFrameContext;
 
 namespace ento {
 
@@ -48,19 +71,20 @@
 };
 
 class CallEvent;
-class CallEventManager;
 
 /// This class represents a description of a function call using the number of
 /// arguments and the name of the function.
 class CallDescription {
   friend CallEvent;
-  mutable IdentifierInfo *II;
-  mutable bool IsLookupDone;
+
+  mutable IdentifierInfo *II = nullptr;
+  mutable bool IsLookupDone = false;
   StringRef FuncName;
   unsigned RequiredArgs;
 
 public:
-  const static unsigned NoArgRequirement = ~0;
+  const static unsigned NoArgRequirement = std::numeric_limits<unsigned>::max();
+
   /// \brief Constructs a CallDescription object.
   ///
   /// @param FuncName The name of the function that will be matched.
@@ -69,8 +93,7 @@
   /// call. Omit this parameter to match every occurance of call with a given
   /// name regardless the number of arguments.
   CallDescription(StringRef FuncName, unsigned RequiredArgs = NoArgRequirement)
-      : II(nullptr), IsLookupDone(false), FuncName(FuncName),
-        RequiredArgs(RequiredArgs) {}
+      : FuncName(FuncName), RequiredArgs(RequiredArgs) {}
 
   /// \brief Get the name of the function that this object matches.
   StringRef getFunctionName() const { return FuncName; }
@@ -104,18 +127,19 @@
 class RuntimeDefinition {
   /// The Declaration of the function which could be called at runtime.
   /// NULL if not available.
-  const Decl *D;
+  const Decl *D = nullptr;
 
   /// The region representing an object (ObjC/C++) on which the method is
   /// called. With dynamic dispatch, the method definition depends on the
   /// runtime type of this object. NULL when the DynamicTypeInfo is
   /// precise.
-  const MemRegion *R;
+  const MemRegion *R = nullptr;
 
 public:
-  RuntimeDefinition(): D(nullptr), R(nullptr) {}
-  RuntimeDefinition(const Decl *InD): D(InD), R(nullptr) {}
+  RuntimeDefinition() = default;
+  RuntimeDefinition(const Decl *InD): D(InD) {}
   RuntimeDefinition(const Decl *InD, const MemRegion *InR): D(InD), R(InR) {}
+
   const Decl *getDecl() { return D; }
     
   /// \brief Check if the definition we have is precise. 
@@ -139,15 +163,13 @@
 /// Use the "Data" and "Location" fields instead.
 class CallEvent {
 public:
-  typedef CallEventKind Kind;
+  using Kind = CallEventKind;
 
 private:
   ProgramStateRef State;
   const LocationContext *LCtx;
   llvm::PointerUnion<const Expr *, const Decl *> Origin;
 
-  void operator=(const CallEvent &) = delete;
-
 protected:
   // This is user data for subclasses.
   const void *Data;
@@ -158,9 +180,10 @@
   SourceLocation Location;
 
 private:
-  mutable unsigned RefCount;
-
   template <typename T> friend struct llvm::IntrusiveRefCntPtrInfo;
+
+  mutable unsigned RefCount = 0;
+
   void Retain() const { ++RefCount; }
   void Release() const;
 
@@ -168,15 +191,15 @@
   friend class CallEventManager;
 
   CallEvent(const Expr *E, ProgramStateRef state, const LocationContext *lctx)
-      : State(std::move(state)), LCtx(lctx), Origin(E), RefCount(0) {}
+      : State(std::move(state)), LCtx(lctx), Origin(E) {}
 
   CallEvent(const Decl *D, ProgramStateRef state, const LocationContext *lctx)
-      : State(std::move(state)), LCtx(lctx), Origin(D), RefCount(0) {}
+      : State(std::move(state)), LCtx(lctx), Origin(D) {}
 
   // DO NOT MAKE PUBLIC
   CallEvent(const CallEvent &Original)
-    : State(Original.State), LCtx(Original.LCtx), Origin(Original.Origin),
-      Data(Original.Data), Location(Original.Location), RefCount(0) {}
+      : State(Original.State), LCtx(Original.LCtx), Origin(Original.Origin),
+        Data(Original.Data), Location(Original.Location) {}
 
   /// Copies this CallEvent, with vtable intact, into a new block of memory.
   virtual void cloneTo(void *Dest) const = 0;
@@ -186,8 +209,7 @@
     return getState()->getSVal(S, getLocationContext());
   }
 
-
-  typedef SmallVectorImpl<SVal> ValueList;
+  using ValueList = SmallVectorImpl<SVal>;
 
   /// \brief Used to specify non-argument regions that will be invalidated as a
   /// result of this call.
@@ -195,7 +217,8 @@
                  RegionAndSymbolInvalidationTraits *ETraits) const {}
 
 public:
-  virtual ~CallEvent() {}
+  CallEvent &operator=(const CallEvent &) = delete;
+  virtual ~CallEvent() = default;
 
   /// \brief Returns the kind of call this is.
   virtual Kind getKind() const = 0;
@@ -248,7 +271,7 @@
 
     // Special case for implicitly-declared global operator new/delete.
     // These should be considered system functions.
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+    if (const auto *FD = dyn_cast<FunctionDecl>(D))
       return FD->isOverloadedOperator() && FD->isImplicit() && FD->isGlobal();
 
     return false;
@@ -333,7 +356,7 @@
   // FIXME: Move this down to AnyFunctionCall once checkers have more
   // precise callbacks.
   const IdentifierInfo *getCalleeIdentifier() const {
-    const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(getDecl());
+    const auto *ND = dyn_cast_or_null<NamedDecl>(getDecl());
     if (!ND)
       return nullptr;
     return ND->getIdentifier();
@@ -350,8 +373,8 @@
   ProgramStateRef invalidateRegions(unsigned BlockCount,
                                     ProgramStateRef Orig = nullptr) const;
 
-  typedef std::pair<Loc, SVal> FrameBindingTy;
-  typedef SmallVectorImpl<FrameBindingTy> BindingsTy;
+  using FrameBindingTy = std::pair<Loc, SVal>;
+  using BindingsTy = SmallVectorImpl<FrameBindingTy>;
 
   /// Populates the given SmallVector with the bindings in the callee's stack
   /// frame at the start of this call.
@@ -393,10 +416,10 @@
   /// Remember that the number of formal parameters may not match the number
   /// of arguments for all calls. However, the first parameter will always
   /// correspond with the argument value returned by \c getArgSVal(0).
-  virtual ArrayRef<ParmVarDecl*> parameters() const = 0;
+  virtual ArrayRef<ParmVarDecl *> parameters() const = 0;
 
-  typedef llvm::mapped_iterator<ArrayRef<ParmVarDecl*>::iterator, GetTypeFn>
-    param_type_iterator;
+  using param_type_iterator =
+      llvm::mapped_iterator<ArrayRef<ParmVarDecl *>::iterator, GetTypeFn>;
 
   /// Returns an iterator over the types of the call's formal parameters.
   ///
@@ -416,18 +439,17 @@
   void dump() const;
 };
 
-
 /// \brief Represents a call to any sort of function that might have a
 /// FunctionDecl.
 class AnyFunctionCall : public CallEvent {
 protected:
   AnyFunctionCall(const Expr *E, ProgramStateRef St,
                   const LocationContext *LCtx)
-    : CallEvent(E, St, LCtx) {}
+      : CallEvent(E, St, LCtx) {}
   AnyFunctionCall(const Decl *D, ProgramStateRef St,
                   const LocationContext *LCtx)
-    : CallEvent(D, St, LCtx) {}
-  AnyFunctionCall(const AnyFunctionCall &Other) : CallEvent(Other) {}
+      : CallEvent(D, St, LCtx) {}
+  AnyFunctionCall(const AnyFunctionCall &Other) = default;
 
 public:
   // This function is overridden by subclasses, but they must return
@@ -460,9 +482,9 @@
 protected:
   SimpleFunctionCall(const CallExpr *CE, ProgramStateRef St,
                      const LocationContext *LCtx)
-    : AnyFunctionCall(CE, St, LCtx) {}
-  SimpleFunctionCall(const SimpleFunctionCall &Other)
-    : AnyFunctionCall(Other) {}
+      : AnyFunctionCall(CE, St, LCtx) {}
+  SimpleFunctionCall(const SimpleFunctionCall &Other) = default;
+
   void cloneTo(void *Dest) const override {
     new (Dest) SimpleFunctionCall(*this);
   }
@@ -496,9 +518,9 @@
 protected:
   BlockCall(const CallExpr *CE, ProgramStateRef St,
             const LocationContext *LCtx)
-    : CallEvent(CE, St, LCtx) {}
+      : CallEvent(CE, St, LCtx) {}
+  BlockCall(const BlockCall &Other) = default;
 
-  BlockCall(const BlockCall &Other) : CallEvent(Other) {}
   void cloneTo(void *Dest) const override { new (Dest) BlockCall(*this); }
 
   void getExtraInvalidatedValues(ValueList &Values,
@@ -598,18 +620,16 @@
 /// it is written.
 class CXXInstanceCall : public AnyFunctionCall {
 protected:
-  void getExtraInvalidatedValues(ValueList &Values, 
-         RegionAndSymbolInvalidationTraits *ETraits) const override;
-
   CXXInstanceCall(const CallExpr *CE, ProgramStateRef St,
                   const LocationContext *LCtx)
-    : AnyFunctionCall(CE, St, LCtx) {}
+      : AnyFunctionCall(CE, St, LCtx) {}
   CXXInstanceCall(const FunctionDecl *D, ProgramStateRef St,
                   const LocationContext *LCtx)
-    : AnyFunctionCall(D, St, LCtx) {}
+      : AnyFunctionCall(D, St, LCtx) {}
+  CXXInstanceCall(const CXXInstanceCall &Other) = default;
 
-
-  CXXInstanceCall(const CXXInstanceCall &Other) : AnyFunctionCall(Other) {}
+  void getExtraInvalidatedValues(ValueList &Values, 
+         RegionAndSymbolInvalidationTraits *ETraits) const override;
 
 public:
   /// \brief Returns the expression representing the implicit 'this' object.
@@ -640,9 +660,9 @@
 protected:
   CXXMemberCall(const CXXMemberCallExpr *CE, ProgramStateRef St,
                 const LocationContext *LCtx)
-    : CXXInstanceCall(CE, St, LCtx) {}
+      : CXXInstanceCall(CE, St, LCtx) {}
+  CXXMemberCall(const CXXMemberCall &Other) = default;
 
-  CXXMemberCall(const CXXMemberCall &Other) : CXXInstanceCall(Other) {}
   void cloneTo(void *Dest) const override { new (Dest) CXXMemberCall(*this); }
 
 public:
@@ -681,10 +701,9 @@
 protected:
   CXXMemberOperatorCall(const CXXOperatorCallExpr *CE, ProgramStateRef St,
                         const LocationContext *LCtx)
-    : CXXInstanceCall(CE, St, LCtx) {}
+      : CXXInstanceCall(CE, St, LCtx) {}
+  CXXMemberOperatorCall(const CXXMemberOperatorCall &Other) = default;
 
-  CXXMemberOperatorCall(const CXXMemberOperatorCall &Other)
-    : CXXInstanceCall(Other) {}
   void cloneTo(void *Dest) const override {
     new (Dest) CXXMemberOperatorCall(*this);
   }
@@ -697,6 +716,7 @@
   unsigned getNumArgs() const override {
     return getOriginExpr()->getNumArgs() - 1;
   }
+
   const Expr *getArgExpr(unsigned Index) const override {
     return getOriginExpr()->getArg(Index + 1);
   }
@@ -718,7 +738,7 @@
   friend class CallEventManager;
 
 protected:
-  typedef llvm::PointerIntPair<const MemRegion *, 1, bool> DtorDataTy;
+  using DtorDataTy = llvm::PointerIntPair<const MemRegion *, 1, bool>;
 
   /// Creates an implicit destructor.
   ///
@@ -730,12 +750,13 @@
   CXXDestructorCall(const CXXDestructorDecl *DD, const Stmt *Trigger,
                     const MemRegion *Target, bool IsBaseDestructor,
                     ProgramStateRef St, const LocationContext *LCtx)
-    : CXXInstanceCall(DD, St, LCtx) {
+      : CXXInstanceCall(DD, St, LCtx) {
     Data = DtorDataTy(Target, IsBaseDestructor).getOpaqueValue();
     Location = Trigger->getLocEnd();
   }
 
-  CXXDestructorCall(const CXXDestructorCall &Other) : CXXInstanceCall(Other) {}
+  CXXDestructorCall(const CXXDestructorCall &Other) = default;
+
   void cloneTo(void *Dest) const override {new (Dest) CXXDestructorCall(*this);}
 
 public:
@@ -775,11 +796,12 @@
   /// \param LCtx The location context at this point in the program.
   CXXConstructorCall(const CXXConstructExpr *CE, const MemRegion *Target,
                      ProgramStateRef St, const LocationContext *LCtx)
-    : AnyFunctionCall(CE, St, LCtx) {
+      : AnyFunctionCall(CE, St, LCtx) {
     Data = Target;
   }
 
-  CXXConstructorCall(const CXXConstructorCall &Other) : AnyFunctionCall(Other){}
+  CXXConstructorCall(const CXXConstructorCall &Other) = default;
+
   void cloneTo(void *Dest) const override { new (Dest) CXXConstructorCall(*this); }
 
   void getExtraInvalidatedValues(ValueList &Values,
@@ -822,9 +844,9 @@
 protected:
   CXXAllocatorCall(const CXXNewExpr *E, ProgramStateRef St,
                    const LocationContext *LCtx)
-    : AnyFunctionCall(E, St, LCtx) {}
+      : AnyFunctionCall(E, St, LCtx) {}
+  CXXAllocatorCall(const CXXAllocatorCall &Other) = default;
 
-  CXXAllocatorCall(const CXXAllocatorCall &Other) : AnyFunctionCall(Other) {}
   void cloneTo(void *Dest) const override { new (Dest) CXXAllocatorCall(*this); }
 
 public:
@@ -875,11 +897,12 @@
 protected:
   ObjCMethodCall(const ObjCMessageExpr *Msg, ProgramStateRef St,
                  const LocationContext *LCtx)
-    : CallEvent(Msg, St, LCtx) {
+      : CallEvent(Msg, St, LCtx) {
     Data = nullptr;
   }
 
-  ObjCMethodCall(const ObjCMethodCall &Other) : CallEvent(Other) {}
+  ObjCMethodCall(const ObjCMethodCall &Other) = default;
+
   void cloneTo(void *Dest) const override { new (Dest) ObjCMethodCall(*this); }
 
   void getExtraInvalidatedValues(ValueList &Values,
@@ -893,12 +916,15 @@
   virtual const ObjCMessageExpr *getOriginExpr() const {
     return cast<ObjCMessageExpr>(CallEvent::getOriginExpr());
   }
+
   const ObjCMethodDecl *getDecl() const override {
     return getOriginExpr()->getMethodDecl();
   }
+
   unsigned getNumArgs() const override {
     return getOriginExpr()->getNumArgs();
   }
+
   const Expr *getArgExpr(unsigned Index) const override {
     return getOriginExpr()->getArg(Index);
   }
@@ -906,9 +932,11 @@
   bool isInstanceMessage() const {
     return getOriginExpr()->isInstanceMessage();
   }
+
   ObjCMethodFamily getMethodFamily() const {
     return getOriginExpr()->getMethodFamily();
   }
+
   Selector getSelector() const {
     return getOriginExpr()->getSelector();
   }
@@ -971,7 +999,6 @@
   }
 };
 
-
 /// \brief Manages the lifetime of CallEvent objects.
 ///
 /// CallEventManager provides a way to create arbitrary CallEvents "on the
@@ -984,7 +1011,8 @@
 
   llvm::BumpPtrAllocator &Alloc;
   SmallVector<void *, 8> Cache;
-  typedef SimpleFunctionCall CallEventTemplateTy;
+
+  using CallEventTemplateTy = SimpleFunctionCall;
 
   void reclaim(const void *Memory) {
     Cache.push_back(const_cast<void *>(Memory));
@@ -1032,11 +1060,9 @@
 public:
   CallEventManager(llvm::BumpPtrAllocator &alloc) : Alloc(alloc) {}
 
-
   CallEventRef<>
   getCaller(const StackFrameContext *CalleeCtx, ProgramStateRef State);
 
-
   CallEventRef<>
   getSimpleCall(const CallExpr *E, ProgramStateRef State,
                 const LocationContext *LCtx);
@@ -1067,7 +1093,6 @@
   }
 };
 
-
 template <typename T>
 CallEventRef<T> CallEvent::cloneWithState(ProgramStateRef NewState) const {
   assert(isa<T>(*this) && "Cloning to unrelated type");
@@ -1099,19 +1124,22 @@
   this->~CallEvent();
 }
 
-} // end namespace ento
-} // end namespace clang
+} // namespace ento
+
+} // namespace clang
 
 namespace llvm {
-  // Support isa<>, cast<>, and dyn_cast<> for CallEventRef.
-  template<class T> struct simplify_type< clang::ento::CallEventRef<T> > {
-    typedef const T *SimpleType;
 
-    static SimpleType
-    getSimplifiedValue(clang::ento::CallEventRef<T> Val) {
-      return Val.get();
-    }
-  };
-}
+// Support isa<>, cast<>, and dyn_cast<> for CallEventRef.
+template<class T> struct simplify_type< clang::ento::CallEventRef<T>> {
+  using SimpleType = const T *;
 
-#endif
+  static SimpleType
+  getSimplifiedValue(clang::ento::CallEventRef<T> Val) {
+    return Val.get();
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLEVENT_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
index 78d38a3..2ca564d 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
@@ -193,7 +193,7 @@
 
   /// \brief Get the value of arbitrary expressions at this point in the path.
   SVal getSVal(const Stmt *S) const {
-    return getState()->getSVal(S, getLocationContext());
+    return Pred->getSVal(S);
   }
 
   /// \brief Returns true if the value of \p E is greater than or equal to \p
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
index c01600d..e21be43 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
@@ -1,4 +1,4 @@
-//== ConstraintManager.h - Constraints on symbolic values.-------*- C++ -*--==//
+//===- ConstraintManager.h - Constraints on symbolic values. ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,28 +14,44 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CONSTRAINTMANAGER_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CONSTRAINTMANAGER_H
 
+#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include <memory>
+#include <utility>
 
 namespace llvm {
+
 class APSInt;
-}
+
+} // namespace llvm
 
 namespace clang {
 namespace ento {
 
+class ProgramStateManager;
 class SubEngine;
+class SymbolReaper;
 
 class ConditionTruthVal {
   Optional<bool> Val;
+
 public:
   /// Construct a ConditionTruthVal indicating the constraint is constrained
   /// to either true or false, depending on the boolean value provided.
   ConditionTruthVal(bool constraint) : Val(constraint) {}
 
   /// Construct a ConstraintVal indicating the constraint is underconstrained.
-  ConditionTruthVal() {}
+  ConditionTruthVal() = default;
+
+  /// \return Stored value, assuming that the value is known.
+  /// Crashes otherwise.
+  bool getValue() const {
+    return *Val;
+  }
 
   /// Return true if the constraint is perfectly constrained to 'true'.
   bool isConstrainedTrue() const {
@@ -61,14 +77,14 @@
 
 class ConstraintManager {
 public:
-  ConstraintManager() : NotifyAssumeClients(true) {}
-
+  ConstraintManager() = default;
   virtual ~ConstraintManager();
+
   virtual ProgramStateRef assume(ProgramStateRef state,
                                  DefinedSVal Cond,
                                  bool Assumption) = 0;
 
-  typedef std::pair<ProgramStateRef, ProgramStateRef> ProgramStatePair;
+  using ProgramStatePair = std::pair<ProgramStateRef, ProgramStateRef>;
 
   /// Returns a pair of states (StTrue, StFalse) where the given condition is
   /// assumed to be true or false, respectively.
@@ -166,7 +182,7 @@
   ///
   /// Note that this flag allows the ConstraintManager to be re-entrant,
   /// but not thread-safe.
-  bool NotifyAssumeClients;
+  bool NotifyAssumeClients = true;
 
   /// canReasonAbout - Not all ConstraintManagers can accurately reason about
   ///  all SVal values.  This method returns true if the ConstraintManager can
@@ -187,8 +203,7 @@
 std::unique_ptr<ConstraintManager>
 CreateZ3ConstraintManager(ProgramStateManager &statemgr, SubEngine *subengine);
 
-} // end GR namespace
+} // namespace ento
+} // namespace clang
 
-} // end clang namespace
-
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CONSTRAINTMANAGER_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
index 7472a71..6d479d6 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
@@ -1,4 +1,4 @@
-//==- CoreEngine.h - Path-Sensitive Dataflow Engine ----------------*- C++ -*-//
+//===- CoreEngine.h - Path-Sensitive Dataflow Engine ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,21 +15,33 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
 
-#include "clang/AST/Expr.h"
+#include "clang/AST/Stmt.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
 #include <memory>
+#include <utility>
+#include <vector>
 
 namespace clang {
 
-class ProgramPointTag;
-  
+class AnalyzerOptions;
+class CXXBindTemporaryExpr;
+class Expr;
+class LabelDecl;
+
 namespace ento {
 
-class NodeBuilder;
+class FunctionSummariesTy;
+class SubEngine;
 
 //===----------------------------------------------------------------------===//
 /// CoreEngine - Implements the core logic of the graph-reachability
@@ -41,23 +53,23 @@
 ///   at the statement and block-level.  The analyses themselves must implement
 ///   any transfer function logic and the sub-expression level (if any).
 class CoreEngine {
-  friend struct NodeBuilderContext;
-  friend class NodeBuilder;
-  friend class ExprEngine;
   friend class CommonNodeBuilder;
-  friend class IndirectGotoNodeBuilder;
-  friend class SwitchNodeBuilder;
   friend class EndOfFunctionNodeBuilder;
+  friend class ExprEngine;
+  friend class IndirectGotoNodeBuilder;
+  friend class NodeBuilder;
+  friend struct NodeBuilderContext;
+  friend class SwitchNodeBuilder;
+
 public:
-  typedef std::vector<std::pair<BlockEdge, const ExplodedNode*> >
-            BlocksExhausted;
+  using BlocksExhausted =
+      std::vector<std::pair<BlockEdge, const ExplodedNode *>>;
   
-  typedef std::vector<std::pair<const CFGBlock*, const ExplodedNode*> >
-            BlocksAborted;
+  using BlocksAborted =
+      std::vector<std::pair<const CFGBlock *, const ExplodedNode *>>;
 
 private:
-
-  SubEngine& SubEng;
+  SubEngine &SubEng;
 
   /// G - The simulation graph.  Each node is a (location,state) pair.
   mutable ExplodedGraph G;
@@ -106,17 +118,17 @@
                         ExplodedNode *Pred);
 
 private:
-  CoreEngine(const CoreEngine &) = delete;
-  void operator=(const CoreEngine &) = delete;
-
   ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
                                           const ReturnStmt *RS);
 
 public:
   /// Construct a CoreEngine object to analyze the provided CFG.
-  CoreEngine(SubEngine &subengine, FunctionSummariesTy *FS)
-      : SubEng(subengine), WList(WorkList::makeDFS()),
-        BCounterFactory(G.getAllocator()), FunctionSummaries(FS) {}
+  CoreEngine(SubEngine &subengine,
+             FunctionSummariesTy *FS,
+             AnalyzerOptions &Opts);
+
+  CoreEngine(const CoreEngine &) = delete;
+  CoreEngine &operator=(const CoreEngine &) = delete;
 
   /// getGraph - Returns the exploded graph.
   ExplodedGraph &getGraph() { return G; }
@@ -125,6 +137,7 @@
   ///  steps.  Returns true if there is still simulation state on the worklist.
   bool ExecuteWorkList(const LocationContext *L, unsigned Steps,
                        ProgramStateRef InitState);
+
   /// Returns true if there is still simulation state on the worklist.
   bool ExecuteWorkListWithInitialState(const LocationContext *L,
                                        unsigned Steps,
@@ -154,12 +167,15 @@
   BlocksExhausted::const_iterator blocks_exhausted_begin() const {
     return blocksExhausted.begin();
   }
+
   BlocksExhausted::const_iterator blocks_exhausted_end() const {
     return blocksExhausted.end();
   }
+
   BlocksAborted::const_iterator blocks_aborted_begin() const {
     return blocksAborted.begin();
   }
+
   BlocksAborted::const_iterator blocks_aborted_end() const {
     return blocksAborted.end();
   }
@@ -184,8 +200,9 @@
   const CoreEngine &Eng;
   const CFGBlock *Block;
   const LocationContext *LC;
+
   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N)
-    : Eng(E), Block(B), LC(N->getLocationContext()) { assert(B); }
+      : Eng(E), Block(B), LC(N->getLocationContext()) { assert(B); }
 
   /// \brief Return the CFGBlock associated with this builder.
   const CFGBlock *getBlock() const { return Block; }
@@ -210,29 +227,29 @@
 /// constructed nodes) but did not have any outgoing transitions added.
 class NodeBuilder {
   virtual void anchor();
+
 protected:
   const NodeBuilderContext &C;
 
   /// Specifies if the builder results have been finalized. For example, if it
   /// is set to false, autotransitions are yet to be generated.
   bool Finalized;
-  bool HasGeneratedNodes;
+
+  bool HasGeneratedNodes = false;
+
   /// \brief The frontier set - a set of nodes which need to be propagated after
   /// the builder dies.
   ExplodedNodeSet &Frontier;
 
   /// Checkes if the results are ready.
   virtual bool checkResults() {
-    if (!Finalized)
-      return false;
-    return true;
+    return Finalized;
   }
 
   bool hasNoSinksInFrontier() {
-    for (iterator I = Frontier.begin(), E = Frontier.end(); I != E; ++I) {
-      if ((*I)->isSink())
+    for (const auto  I : Frontier)
+      if (I->isSink())
         return false;
-    }
     return true;
   }
 
@@ -247,18 +264,18 @@
 public:
   NodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
               const NodeBuilderContext &Ctx, bool F = true)
-    : C(Ctx), Finalized(F), HasGeneratedNodes(false), Frontier(DstSet) {
+      : C(Ctx), Finalized(F), Frontier(DstSet) {
     Frontier.Add(SrcNode);
   }
 
   NodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
               const NodeBuilderContext &Ctx, bool F = true)
-    : C(Ctx), Finalized(F), HasGeneratedNodes(false), Frontier(DstSet) {
+      : C(Ctx), Finalized(F), Frontier(DstSet) {
     Frontier.insert(SrcSet);
     assert(hasNoSinksInFrontier());
   }
 
-  virtual ~NodeBuilder() {}
+  virtual ~NodeBuilder() = default;
 
   /// \brief Generates a node in the ExplodedGraph.
   ExplodedNode *generateNode(const ProgramPoint &PP,
@@ -284,14 +301,16 @@
     return Frontier;
   }
 
-  typedef ExplodedNodeSet::iterator iterator;
+  using iterator = ExplodedNodeSet::iterator;
+
   /// \brief Iterators through the results frontier.
-  inline iterator begin() {
+  iterator begin() {
     finalizeResults();
     assert(checkResults());
     return Frontier.begin();
   }
-  inline iterator end() {
+
+  iterator end() {
     finalizeResults();
     return Frontier.end();
   }
@@ -300,9 +319,10 @@
   bool hasGeneratedNodes() { return HasGeneratedNodes; }
 
   void takeNodes(const ExplodedNodeSet &S) {
-    for (ExplodedNodeSet::iterator I = S.begin(), E = S.end(); I != E; ++I )
-      Frontier.erase(*I);
+    for (const auto I : S)
+      Frontier.erase(I);
   }
+
   void takeNodes(ExplodedNode *N) { Frontier.erase(N); }
   void addNodes(const ExplodedNodeSet &S) { Frontier.insert(S); }
   void addNodes(ExplodedNode *N) { Frontier.Add(N); }
@@ -312,6 +332,7 @@
 /// \brief This node builder keeps track of the generated sink nodes.
 class NodeBuilderWithSinks: public NodeBuilder {
   void anchor() override;
+
 protected:
   SmallVector<ExplodedNode*, 2> sinksGenerated;
   ProgramPoint &Location;
@@ -319,7 +340,7 @@
 public:
   NodeBuilderWithSinks(ExplodedNode *Pred, ExplodedNodeSet &DstSet,
                        const NodeBuilderContext &Ctx, ProgramPoint &L)
-    : NodeBuilder(Pred, DstSet, Ctx), Location(L) {}
+      : NodeBuilder(Pred, DstSet, Ctx), Location(L) {}
 
   ExplodedNode *generateNode(ProgramStateRef State,
                              ExplodedNode *Pred,
@@ -348,15 +369,15 @@
 /// that it creates a statement specific ProgramPoint.
 class StmtNodeBuilder: public NodeBuilder {
   NodeBuilder *EnclosingBldr;
-public:
 
+public:
   /// \brief Constructs a StmtNodeBuilder. If the builder is going to process
   /// nodes currently owned by another builder(with larger scope), use
   /// Enclosing builder to transfer ownership.
   StmtNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
                   const NodeBuilderContext &Ctx,
                   NodeBuilder *Enclosing = nullptr)
-    : NodeBuilder(SrcNode, DstSet, Ctx), EnclosingBldr(Enclosing) {
+      : NodeBuilder(SrcNode, DstSet, Ctx), EnclosingBldr(Enclosing) {
     if (EnclosingBldr)
       EnclosingBldr->takeNodes(SrcNode);
   }
@@ -364,11 +385,10 @@
   StmtNodeBuilder(ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
                   const NodeBuilderContext &Ctx,
                   NodeBuilder *Enclosing = nullptr)
-    : NodeBuilder(SrcSet, DstSet, Ctx), EnclosingBldr(Enclosing) {
+      : NodeBuilder(SrcSet, DstSet, Ctx), EnclosingBldr(Enclosing) {
     if (EnclosingBldr)
-      for (ExplodedNodeSet::iterator I = SrcSet.begin(),
-                                     E = SrcSet.end(); I != E; ++I )
-        EnclosingBldr->takeNodes(*I);
+      for (const auto I : SrcSet)
+        EnclosingBldr->takeNodes(I);
   }
 
   ~StmtNodeBuilder() override;
@@ -400,19 +420,20 @@
 /// \brief BranchNodeBuilder is responsible for constructing the nodes
 /// corresponding to the two branches of the if statement - true and false.
 class BranchNodeBuilder: public NodeBuilder {
-  void anchor() override;
   const CFGBlock *DstT;
   const CFGBlock *DstF;
 
   bool InFeasibleTrue;
   bool InFeasibleFalse;
 
+  void anchor() override;
+
 public:
   BranchNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
                     const NodeBuilderContext &C,
                     const CFGBlock *dstT, const CFGBlock *dstF)
-  : NodeBuilder(SrcNode, DstSet, C), DstT(dstT), DstF(dstF),
-    InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
+      : NodeBuilder(SrcNode, DstSet, C), DstT(dstT), DstF(dstF),
+        InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
     // The branch node builder does not generate autotransitions.
     // If there are no successors it means that both branches are infeasible.
     takeNodes(SrcNode);
@@ -421,8 +442,8 @@
   BranchNodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
                     const NodeBuilderContext &C,
                     const CFGBlock *dstT, const CFGBlock *dstF)
-  : NodeBuilder(SrcSet, DstSet, C), DstT(dstT), DstF(dstF),
-    InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
+      : NodeBuilder(SrcSet, DstSet, C), DstT(dstT), DstF(dstF),
+        InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
     takeNodes(SrcSet);
   }
 
@@ -455,15 +476,16 @@
 public:
   IndirectGotoNodeBuilder(ExplodedNode *pred, const CFGBlock *src, 
                     const Expr *e, const CFGBlock *dispatch, CoreEngine* eng)
-    : Eng(*eng), Src(src), DispatchBlock(*dispatch), E(e), Pred(pred) {}
+      : Eng(*eng), Src(src), DispatchBlock(*dispatch), E(e), Pred(pred) {}
 
   class iterator {
+    friend class IndirectGotoNodeBuilder;
+
     CFGBlock::const_succ_iterator I;
 
-    friend class IndirectGotoNodeBuilder;
     iterator(CFGBlock::const_succ_iterator i) : I(i) {}
-  public:
 
+  public:
     iterator &operator++() { ++I; return *this; }
     bool operator!=(const iterator &X) const { return I != X.I; }
 
@@ -501,12 +523,13 @@
 public:
   SwitchNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
                     const Expr *condition, CoreEngine* eng)
-  : Eng(*eng), Src(src), Condition(condition), Pred(pred) {}
+      : Eng(*eng), Src(src), Condition(condition), Pred(pred) {}
 
   class iterator {
+    friend class SwitchNodeBuilder;
+
     CFGBlock::const_succ_reverse_iterator I;
 
-    friend class SwitchNodeBuilder;
     iterator(CFGBlock::const_succ_reverse_iterator i) : I(i) {}
 
   public:
@@ -545,7 +568,8 @@
   }
 };
 
-} // end ento namespace
-} // end clang namespace
+} // namespace ento
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
index 555191d..0dc4ef1 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
@@ -1,4 +1,4 @@
-//== DynamicTypeMap.h - Dynamic type map ----------------------- -*- C++ -*--=//
+//===- DynamicTypeMap.h - Dynamic type map ----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,19 +13,26 @@
 
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
-#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/ADT/ImmutableMap.h"
+#include "clang/AST/Type.h"
 
 namespace clang {
 namespace ento {
 
+class MemRegion;
+
 /// The GDM component containing the dynamic type info. This is a map from a
 /// symbol to its most likely type.
 struct DynamicTypeMap {};
-typedef llvm::ImmutableMap<const MemRegion *, DynamicTypeInfo>
-    DynamicTypeMapImpl;
+
+using DynamicTypeMapImpl =
+    llvm::ImmutableMap<const MemRegion *, DynamicTypeInfo>;
+
 template <>
 struct ProgramStateTrait<DynamicTypeMap>
     : public ProgramStatePartialTrait<DynamicTypeMapImpl> {
@@ -51,7 +58,10 @@
                             DynamicTypeInfo(NewTy, CanBeSubClassed));
 }
 
-} // ento
-} // clang
+void printDynamicTypeInfo(ProgramStateRef State, raw_ostream &Out,
+                          const char *NL, const char *Sep);
+
+} // namespace ento
+} // namespace clang
 
 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h b/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
index c63ed4a..d49aa81 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
@@ -1,4 +1,4 @@
-//== Environment.h - Map from Stmt* to Locations/Values ---------*- C++ -*--==//
+//===- Environment.h - Map from Stmt* to Locations/Values -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,17 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_ENVIRONMENT_H
 
 #include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "llvm/ADT/ImmutableMap.h"
+#include <utility>
 
 namespace clang {
 
-class LiveVariables;
+class Stmt;
 
 namespace ento {
 
-class EnvironmentManager;
 class SValBuilder;
 class SymbolReaper;
 
@@ -32,7 +33,7 @@
 /// This allows the environment to manage context-sensitive bindings,
 /// which is essentially for modeling recursive function analysis, among
 /// other things.
-class EnvironmentEntry : public std::pair<const Stmt*,
+class EnvironmentEntry : public std::pair<const Stmt *,
                                           const StackFrameContext *> {
 public:
   EnvironmentEntry(const Stmt *s, const LocationContext *L);
@@ -57,19 +58,17 @@
 private:
   friend class EnvironmentManager;
 
-  // Type definitions.
-  typedef llvm::ImmutableMap<EnvironmentEntry, SVal> BindingsTy;
+  using BindingsTy = llvm::ImmutableMap<EnvironmentEntry, SVal>;
 
-  // Data.
   BindingsTy ExprBindings;
 
-  Environment(BindingsTy eb)
-    : ExprBindings(eb) {}
+  Environment(BindingsTy eb) : ExprBindings(eb) {}
 
   SVal lookupExpr(const EnvironmentEntry &E) const;
 
 public:
-  typedef BindingsTy::iterator iterator;
+  using iterator = BindingsTy::iterator;
+
   iterator begin() const { return ExprBindings.begin(); }
   iterator end() const { return ExprBindings.end(); }
 
@@ -92,21 +91,19 @@
   bool operator==(const Environment& RHS) const {
     return ExprBindings == RHS.ExprBindings;
   }
-  
-  void print(raw_ostream &Out, const char *NL, const char *Sep) const;
-  
-private:
-  void printAux(raw_ostream &Out, bool printLocations,
-                const char *NL, const char *Sep) const;
+
+  void print(raw_ostream &Out, const char *NL, const char *Sep,
+             const LocationContext *WithLC = nullptr) const;
 };
 
 class EnvironmentManager {
 private:
-  typedef Environment::BindingsTy::Factory FactoryTy;
+  using FactoryTy = Environment::BindingsTy::Factory;
+
   FactoryTy F;
 
 public:
-  EnvironmentManager(llvm::BumpPtrAllocator& Allocator) : F(Allocator) {}
+  EnvironmentManager(llvm::BumpPtrAllocator &Allocator) : F(Allocator) {}
 
   Environment getInitialEnvironment() {
     return Environment(F.getEmptyMap());
@@ -121,8 +118,8 @@
                                  ProgramStateRef state);
 };
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_ENVIRONMENT_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
index dcea5e4..a55c43d 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
@@ -1,4 +1,4 @@
-//=-- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -*- C++ -*-------==//
+//===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,17 +19,25 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
 
-#include "clang/AST/Decl.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Analysis/Support/BumpVector.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstdint>
 #include <memory>
 #include <utility>
 #include <vector>
@@ -37,6 +45,10 @@
 namespace clang {
 
 class CFG;
+class Decl;
+class Expr;
+class ParentMap;
+class Stmt;
 
 namespace ento {
 
@@ -52,13 +64,13 @@
 // successors to it at any time after creating it.
 
 class ExplodedNode : public llvm::FoldingSetNode {
-  friend class ExplodedGraph;
-  friend class CoreEngine;
-  friend class NodeBuilder;
   friend class BranchNodeBuilder;
-  friend class IndirectGotoNodeBuilder;
-  friend class SwitchNodeBuilder;
+  friend class CoreEngine;
   friend class EndOfFunctionNodeBuilder;
+  friend class ExplodedGraph;
+  friend class IndirectGotoNodeBuilder;
+  friend class NodeBuilder;
+  friend class SwitchNodeBuilder;
 
   /// Efficiently stores a list of ExplodedNodes, or an optional flag.
   ///
@@ -156,6 +168,11 @@
     return Location.getAs<T>();
   }
 
+  /// Get the value of an arbitrary expression at this node.
+  SVal getSVal(const Stmt *S) const {
+    return getState()->getSVal(S, getLocationContext());
+  }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       const ProgramPoint &Loc,
                       const ProgramStateRef &state,
@@ -198,10 +215,10 @@
   }
 
   // Iterators over successor and predecessor vertices.
-  typedef ExplodedNode*       const *       succ_iterator;
-  typedef const ExplodedNode* const * const_succ_iterator;
-  typedef ExplodedNode*       const *       pred_iterator;
-  typedef const ExplodedNode* const * const_pred_iterator;
+  using succ_iterator = ExplodedNode * const *;
+  using const_succ_iterator = const ExplodedNode * const *;
+  using pred_iterator = ExplodedNode * const *;
+  using const_pred_iterator = const ExplodedNode * const *;
 
   pred_iterator pred_begin() { return Preds.begin(); }
   pred_iterator pred_end() { return Preds.end(); }
@@ -226,10 +243,10 @@
   // For debugging.
 
 public:
-
   class Auditor {
   public:
     virtual ~Auditor();
+
     virtual void AddEdge(ExplodedNode *Src, ExplodedNode *Dst) = 0;
   };
 
@@ -240,15 +257,15 @@
   void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); }
 };
 
-typedef llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>
-        InterExplodedGraphMap;
+using InterExplodedGraphMap =
+    llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>;
 
 class ExplodedGraph {
 protected:
   friend class CoreEngine;
 
   // Type definitions.
-  typedef std::vector<ExplodedNode *> NodeVector;
+  using NodeVector = std::vector<ExplodedNode *>;
 
   /// The roots of the simulation graph. Usually there will be only
   /// one, but clients are free to establish multiple subgraphs within a single
@@ -268,7 +285,7 @@
   BumpVectorContext BVC;
 
   /// NumNodes - The number of nodes in the graph.
-  unsigned NumNodes;
+  unsigned NumNodes = 0;
   
   /// A list of recently allocated nodes that can potentially be recycled.
   NodeVector ChangedNodes;
@@ -279,12 +296,14 @@
   /// Determines how often nodes are reclaimed.
   ///
   /// If this is 0, nodes will never be reclaimed.
-  unsigned ReclaimNodeInterval;
+  unsigned ReclaimNodeInterval = 0;
   
   /// Counter to determine when to reclaim nodes.
   unsigned ReclaimCounter;
 
 public:
+  ExplodedGraph();
+  ~ExplodedGraph();
 
   /// \brief Retrieve the node associated with a (Location,State) pair,
   ///  where the 'Location' is a ProgramPoint in the CFG.  If no node for
@@ -318,10 +337,6 @@
     return V;
   }
 
-  ExplodedGraph();
-
-  ~ExplodedGraph();
-  
   unsigned num_roots() const { return Roots.size(); }
   unsigned num_eops() const { return EndNodes.size(); }
 
@@ -331,14 +346,14 @@
   void reserve(unsigned NodeCount) { Nodes.reserve(NodeCount); }
 
   // Iterators.
-  typedef ExplodedNode                        NodeTy;
-  typedef llvm::FoldingSet<ExplodedNode>      AllNodesTy;
-  typedef NodeVector::iterator                roots_iterator;
-  typedef NodeVector::const_iterator          const_roots_iterator;
-  typedef NodeVector::iterator                eop_iterator;
-  typedef NodeVector::const_iterator          const_eop_iterator;
-  typedef AllNodesTy::iterator                node_iterator;
-  typedef AllNodesTy::const_iterator          const_node_iterator;
+  using NodeTy = ExplodedNode;
+  using AllNodesTy = llvm::FoldingSet<ExplodedNode>;
+  using roots_iterator = NodeVector::iterator;
+  using const_roots_iterator = NodeVector::const_iterator;
+  using eop_iterator = NodeVector::iterator;
+  using const_eop_iterator = NodeVector::const_iterator;
+  using node_iterator = AllNodesTy::iterator;
+  using const_node_iterator = AllNodesTy::const_iterator;
 
   node_iterator nodes_begin() { return Nodes.begin(); }
 
@@ -367,7 +382,7 @@
   llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); }
   BumpVectorContext &getNodeAllocator() { return BVC; }
 
-  typedef llvm::DenseMap<const ExplodedNode*, ExplodedNode*> NodeMap;
+  using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>;
 
   /// Creates a trimmed version of the graph that only contains paths leading
   /// to the given nodes.
@@ -404,29 +419,30 @@
 };
 
 class ExplodedNodeSet {
-  typedef llvm::SmallSetVector<ExplodedNode*, 4> ImplTy;
+  using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>;
   ImplTy Impl;
 
 public:
   ExplodedNodeSet(ExplodedNode *N) {
-    assert (N && !static_cast<ExplodedNode*>(N)->isSink());
+    assert(N && !static_cast<ExplodedNode*>(N)->isSink());
     Impl.insert(N);
   }
 
-  ExplodedNodeSet() {}
+  ExplodedNodeSet() = default;
 
-  inline void Add(ExplodedNode *N) {
+  void Add(ExplodedNode *N) {
     if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(N);
   }
 
-  typedef ImplTy::iterator       iterator;
-  typedef ImplTy::const_iterator const_iterator;
+  using iterator = ImplTy::iterator;
+  using const_iterator = ImplTy::const_iterator;
 
   unsigned size() const { return Impl.size();  }
   bool empty()    const { return Impl.empty(); }
   bool erase(ExplodedNode *N) { return Impl.remove(N); }
 
   void clear() { Impl.clear(); }
+
   void insert(const ExplodedNodeSet &S) {
     assert(&S != this);
     if (empty())
@@ -435,24 +451,25 @@
       Impl.insert(S.begin(), S.end());
   }
 
-  inline iterator begin() { return Impl.begin(); }
-  inline iterator end()   { return Impl.end();   }
+  iterator begin() { return Impl.begin(); }
+  iterator end() { return Impl.end(); }
 
-  inline const_iterator begin() const { return Impl.begin(); }
-  inline const_iterator end()   const { return Impl.end();   }
+  const_iterator begin() const { return Impl.begin(); }
+  const_iterator end() const { return Impl.end(); }
 };
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
 // GraphTraits
 
 namespace llvm {
+
   template<> struct GraphTraits<clang::ento::ExplodedNode*> {
-    typedef clang::ento::ExplodedNode *NodeRef;
-    typedef clang::ento::ExplodedNode::succ_iterator ChildIteratorType;
-    typedef llvm::df_iterator<NodeRef> nodes_iterator;
+    using NodeRef = clang::ento::ExplodedNode *;
+    using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator;
+    using nodes_iterator = llvm::df_iterator<NodeRef>;
 
     static NodeRef getEntryNode(NodeRef N) { return N; }
 
@@ -466,9 +483,9 @@
   };
 
   template<> struct GraphTraits<const clang::ento::ExplodedNode*> {
-    typedef const clang::ento::ExplodedNode *NodeRef;
-    typedef clang::ento::ExplodedNode::const_succ_iterator ChildIteratorType;
-    typedef llvm::df_iterator<NodeRef> nodes_iterator;
+    using NodeRef = const clang::ento::ExplodedNode *;
+    using ChildIteratorType = clang::ento::ExplodedNode::const_succ_iterator;
+    using nodes_iterator = llvm::df_iterator<NodeRef>;
 
     static NodeRef getEntryNode(NodeRef N) { return N; }
 
@@ -481,6 +498,6 @@
     static nodes_iterator nodes_end(NodeRef N) { return df_end(N); }
   };
 
-} // end llvm namespace
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index ecff5c6..e7f79ac 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -1,4 +1,4 @@
-//===-- ExprEngine.h - Path-Sensitive Expression-Level Dataflow ---*- C++ -*-=//
+//===- ExprEngine.h - Path-Sensitive Expression-Level Dataflow --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,32 +18,68 @@
 
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/CFG.h"
 #include "clang/Analysis/DomainSpecific/ObjCNoReturn.h"
+#include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include <cassert>
+#include <utility>
 
 namespace clang {
 
 class AnalysisDeclContextManager;
+class AnalyzerOptions;
+class ASTContext;
+class ConstructionContext;
+class CXXBindTemporaryExpr;
 class CXXCatchStmt;
 class CXXConstructExpr;
 class CXXDeleteExpr;
 class CXXNewExpr;
-class CXXTemporaryObjectExpr;
 class CXXThisExpr;
+class Decl;
+class DeclStmt;
+class GCCAsmStmt;
+class LambdaExpr;
+class LocationContext;
 class MaterializeTemporaryExpr;
+class MSAsmStmt;
+class NamedDecl;
 class ObjCAtSynchronizedStmt;
 class ObjCForCollectionStmt;
+class ObjCIvarRefExpr;
+class ObjCMessageExpr;
+class ReturnStmt;
+class Stmt;
+
+namespace cross_tu {
+
+class CrossTranslationUnitContext;
+
+} // namespace cross_tu
   
 namespace ento {
 
-class AnalysisManager;
+class BasicValueFactory;
 class CallEvent;
-class CXXConstructorCall;
+class CheckerManager;
+class ConstraintManager;
+class CXXTempObjectRegion;
+class MemRegion;
+class RegionAndSymbolInvalidationTraits;
+class SymbolManager;
 
 class ExprEngine : public SubEngine {
 public:
@@ -51,11 +87,35 @@
   enum InliningModes {
     /// Follow the default settings for inlining callees.
     Inline_Regular = 0,
+
     /// Do minimal inlining of callees.
     Inline_Minimal = 0x1
   };
 
+  /// Hints for figuring out of a call should be inlined during evalCall().
+  struct EvalCallOptions {
+    /// This call is a constructor or a destructor for which we do not currently
+    /// compute the this-region correctly.
+    bool IsCtorOrDtorWithImproperlyModeledTargetRegion = false;
+
+    /// This call is a constructor or a destructor for a single element within
+    /// an array, a part of array construction or destruction.
+    bool IsArrayCtorOrDtor = false;
+
+    /// This call is a constructor or a destructor of a temporary value.
+    bool IsTemporaryCtorOrDtor = false;
+
+    /// This call is a constructor for a temporary that is lifetime-extended
+    /// by binding a smaller object within it to a reference, for example
+    /// 'const int &x = C().x;'.
+    bool IsTemporaryLifetimeExtendedViaSubobject = false;
+
+    EvalCallOptions() {}
+  };
+
 private:
+  cross_tu::CrossTranslationUnitContext &CTU;
+
   AnalysisManager &AMgr;
   
   AnalysisDeclContextManager &AnalysisDeclContexts;
@@ -63,19 +123,19 @@
   CoreEngine Engine;
 
   /// G - the simulation graph.
-  ExplodedGraph& G;
+  ExplodedGraph &G;
 
   /// StateMgr - Object that manages the data for all created states.
   ProgramStateManager StateMgr;
 
   /// SymMgr - Object that manages the symbol information.
-  SymbolManager& SymMgr;
+  SymbolManager &SymMgr;
 
   /// svalBuilder - SValBuilder object that creates SVals from expressions.
   SValBuilder &svalBuilder;
 
-  unsigned int currStmtIdx;
-  const NodeBuilderContext *currBldrCtx;
+  unsigned int currStmtIdx = 0;
+  const NodeBuilderContext *currBldrCtx = nullptr;
   
   /// Helper object to determine if an Objective-C message expression
   /// implicitly never returns.
@@ -97,10 +157,9 @@
   InliningModes HowToInline;
 
 public:
-  ExprEngine(AnalysisManager &mgr, bool gcEnabled,
-             SetOfConstDecls *VisitedCalleesIn,
-             FunctionSummariesTy *FS,
-             InliningModes HowToInlineIn);
+  ExprEngine(cross_tu::CrossTranslationUnitContext &CTU, AnalysisManager &mgr,
+             bool gcEnabled, SetOfConstDecls *VisitedCalleesIn,
+             FunctionSummariesTy *FS, InliningModes HowToInlineIn);
 
   ~ExprEngine() override;
 
@@ -130,7 +189,12 @@
 
   SValBuilder &getSValBuilder() { return svalBuilder; }
 
-  BugReporter& getBugReporter() { return BR; }
+  BugReporter &getBugReporter() { return BR; }
+
+  cross_tu::CrossTranslationUnitContext *
+  getCrossTranslationUnitContext() override {
+    return &CTU;
+  }
 
   const NodeBuilderContext &getBuilderContext() {
     assert(currBldrCtx);
@@ -150,14 +214,14 @@
 
   /// Visualize a trimmed ExplodedGraph that only contains paths to the given
   /// nodes.
-  void ViewGraph(ArrayRef<const ExplodedNode*> Nodes);
+  void ViewGraph(ArrayRef<const ExplodedNode *> Nodes);
 
   /// getInitialState - Return the initial state used for the root vertex
   ///  in the ExplodedGraph.
   ProgramStateRef getInitialState(const LocationContext *InitLoc) override;
 
-  ExplodedGraph& getGraph() { return G; }
-  const ExplodedGraph& getGraph() const { return G; }
+  ExplodedGraph &getGraph() { return G; }
+  const ExplodedGraph &getGraph() const { return G; }
 
   /// \brief Run the analyzer's garbage collection - remove dead symbols and
   /// bindings from the state.
@@ -194,7 +258,7 @@
   void processCFGElement(const CFGElement E, ExplodedNode *Pred,
                          unsigned StmtIdx, NodeBuilderContext *Ctx) override;
 
-  void ProcessStmt(const CFGStmt S, ExplodedNode *Pred);
+  void ProcessStmt(const Stmt *S, ExplodedNode *Pred);
 
   void ProcessLoopExit(const Stmt* S, ExplodedNode *Pred);
 
@@ -299,25 +363,26 @@
                        const CallEvent *Call) override;
 
   /// printState - Called by ProgramStateManager to print checker-specific data.
-  void printState(raw_ostream &Out, ProgramStateRef State,
-                  const char *NL, const char *Sep) override;
+  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+                  const char *Sep,
+                  const LocationContext *LCtx = nullptr) override;
 
-  ProgramStateManager& getStateManager() override { return StateMgr; }
+  ProgramStateManager &getStateManager() override { return StateMgr; }
 
-  StoreManager& getStoreManager() { return StateMgr.getStoreManager(); }
+  StoreManager &getStoreManager() { return StateMgr.getStoreManager(); }
 
-  ConstraintManager& getConstraintManager() {
+  ConstraintManager &getConstraintManager() {
     return StateMgr.getConstraintManager();
   }
 
   // FIXME: Remove when we migrate over to just using SValBuilder.
-  BasicValueFactory& getBasicVals() {
+  BasicValueFactory &getBasicVals() {
     return StateMgr.getBasicVals();
   }
 
   // FIXME: Remove when we migrate over to just using ValueManager.
-  SymbolManager& getSymbolManager() { return SymMgr; }
-  const SymbolManager& getSymbolManager() const { return SymMgr; }
+  SymbolManager &getSymbolManager() { return SymMgr; }
+  const SymbolManager &getSymbolManager() const { return SymMgr; }
 
   // Functions for external checking of whether we have unfinished work
   bool wasBlocksExhausted() const { return Engine.wasBlocksExhausted(); }
@@ -332,9 +397,9 @@
   void Visit(const Stmt *S, ExplodedNode *Pred, ExplodedNodeSet &Dst);
 
   /// VisitArraySubscriptExpr - Transfer function for array accesses.
-  void VisitLvalArraySubscriptExpr(const ArraySubscriptExpr *Ex,
-                                   ExplodedNode *Pred,
-                                   ExplodedNodeSet &Dst);
+  void VisitArraySubscriptExpr(const ArraySubscriptExpr *Ex,
+                               ExplodedNode *Pred,
+                               ExplodedNodeSet &Dst);
 
   /// VisitGCCAsmStmt - Transfer function logic for inline asm.
   void VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
@@ -363,7 +428,7 @@
 
   /// VisitCast - Transfer function logic for all casts (implicit and explicit).
   void VisitCast(const CastExpr *CastE, const Expr *Ex, ExplodedNode *Pred,
-                ExplodedNodeSet &Dst);
+                 ExplodedNodeSet &Dst);
 
   /// VisitCompoundLiteralExpr - Transfer function logic for compound literals.
   void VisitCompoundLiteralExpr(const CompoundLiteralExpr *CL, 
@@ -390,9 +455,9 @@
 
   /// VisitMemberExpr - Transfer function for member expressions.
   void VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred, 
-                           ExplodedNodeSet &Dst);
+                       ExplodedNodeSet &Dst);
 
-  /// VisitMemberExpr - Transfer function for builtin atomic expressions
+  /// VisitAtomicExpr - Transfer function for builtin atomic expressions
   void VisitAtomicExpr(const AtomicExpr *E, ExplodedNode *Pred,
                        ExplodedNodeSet &Dst);
 
@@ -422,7 +487,7 @@
 
   /// VisitUnaryExprOrTypeTraitExpr - Transfer function for sizeof.
   void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex,
-                              ExplodedNode *Pred, ExplodedNodeSet &Dst);
+                                     ExplodedNode *Pred, ExplodedNodeSet &Dst);
 
   /// VisitUnaryOperator - Transfer function logic for unary operators.
   void VisitUnaryOperator(const UnaryOperator* B, ExplodedNode *Pred, 
@@ -448,7 +513,8 @@
 
   void VisitCXXDestructor(QualType ObjectType, const MemRegion *Dest,
                           const Stmt *S, bool IsBaseDtor,
-                          ExplodedNode *Pred, ExplodedNodeSet &Dst);
+                          ExplodedNode *Pred, ExplodedNodeSet &Dst,
+                          const EvalCallOptions &Options);
 
   void VisitCXXNewAllocatorCall(const CXXNewExpr *CNE,
                                 ExplodedNode *Pred,
@@ -471,7 +537,7 @@
   void evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src, 
                          const Expr *Ex);
   
-  std::pair<const ProgramPointTag *, const ProgramPointTag*>
+  std::pair<const ProgramPointTag *, const ProgramPointTag *>
     geteagerlyAssumeBinOpBifurcationTags();
 
   SVal evalMinus(SVal X) {
@@ -499,7 +565,6 @@
                          StmtNodeBuilder &Bldr);
 
 public:
-
   SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op,
                  NonLoc L, NonLoc R, QualType T) {
     return svalBuilder.evalBinOpNN(state, op, L, R, T);
@@ -561,6 +626,12 @@
                  ExplodedNode *Pred, ProgramStateRef St, SVal TargetLV, SVal Val,
                  const ProgramPointTag *tag = nullptr);
 
+  /// Return the CFG element corresponding to the worklist element
+  /// that is currently being processed by ExprEngine.
+  CFGElement getCurrentCFGElement() {
+    return (*currBldrCtx->getBlock())[currStmtIdx];
+  }
+
   /// \brief Create a new state in which the call return value is binded to the
   /// call origin expression.
   ProgramStateRef bindReturnValue(const CallEvent &Call,
@@ -574,7 +645,9 @@
 
   /// \brief Default implementation of call evaluation.
   void defaultEvalCall(NodeBuilder &B, ExplodedNode *Pred,
-                       const CallEvent &Call);
+                       const CallEvent &Call,
+                       const EvalCallOptions &CallOpts = {});
+
 private:
   void evalLoadCommon(ExplodedNodeSet &Dst,
                       const Expr *NodeEx,  /* Eventually will be a CFGStmt */
@@ -598,9 +671,23 @@
   void examineStackFrames(const Decl *D, const LocationContext *LCtx,
                           bool &IsRecursive, unsigned &StackDepth);
 
+  enum CallInlinePolicy {
+    CIP_Allowed,
+    CIP_DisallowedOnce,
+    CIP_DisallowedAlways
+  };
+
+  /// \brief See if a particular call should be inlined, by only looking
+  /// at the call event and the current state of analysis.
+  CallInlinePolicy mayInlineCallKind(const CallEvent &Call,
+                                     const ExplodedNode *Pred,
+                                     AnalyzerOptions &Opts,
+                                     const EvalCallOptions &CallOpts);
+
   /// Checks our policies and decides weither the given call should be inlined.
   bool shouldInlineCall(const CallEvent &Call, const Decl *D,
-                        const ExplodedNode *Pred);
+                        const ExplodedNode *Pred,
+                        const EvalCallOptions &CallOpts = {});
 
   bool inlineCall(const CallEvent &Call, const Decl *D, NodeBuilder &Bldr,
                   ExplodedNode *Pred, ProgramStateRef State);
@@ -634,6 +721,17 @@
                                                 const Expr *InitWithAdjustments,
                                                 const Expr *Result = nullptr);
 
+  /// Returns a region representing the first element of a (possibly
+  /// multi-dimensional) array, for the purposes of element construction or
+  /// destruction.
+  ///
+  /// On return, \p Ty will be set to the base type of the array.
+  ///
+  /// If the type is not an array type at all, the original value is returned.
+  /// Otherwise the "IsArray" flag is set.
+  static SVal makeZeroElementRegion(ProgramStateRef State, SVal LValue,
+                                    QualType &Ty, bool &IsArray);
+
   /// For a DeclStmt or CXXInitCtorInitializer, walk backward in the current CFG
   /// block to find the constructor expression that directly constructed into
   /// the storage for this statement. Returns null if the constructor for this
@@ -641,20 +739,64 @@
   /// constructing into an existing region.
   const CXXConstructExpr *findDirectConstructorForCurrentCFGElement();
 
-  /// For a CXXConstructExpr, walk forward in the current CFG block to find the
-  /// CFGElement for the DeclStmt or CXXInitCtorInitializer for which is
-  /// directly constructed by this constructor. Returns None if the current
-  /// constructor expression did not directly construct into an existing
-  /// region.
-  Optional<CFGElement> findElementDirectlyInitializedByCurrentConstructor();
-
   /// For a given constructor, look forward in the current CFG block to
   /// determine the region into which an object will be constructed by \p CE.
-  /// Returns either a field or local variable region if the object will be
-  /// directly constructed in an existing region or a temporary object region
-  /// if not.
+  /// When the lookahead fails, a temporary region is returned, and the
+  /// IsConstructorWithImproperlyModeledTargetRegion flag is set in \p CallOpts.
   const MemRegion *getRegionForConstructedObject(const CXXConstructExpr *CE,
-                                                 ExplodedNode *Pred);
+                                                 ExplodedNode *Pred,
+                                                 const ConstructionContext *CC,
+                                                 EvalCallOptions &CallOpts);
+
+  /// Store the region of a C++ temporary object corresponding to a
+  /// CXXBindTemporaryExpr for later destruction.
+  static ProgramStateRef addInitializedTemporary(
+      ProgramStateRef State, const CXXBindTemporaryExpr *BTE,
+      const LocationContext *LC, const CXXTempObjectRegion *R);
+
+  /// Check if all initialized temporary regions are clear for the given
+  /// context range (including FromLC, not including ToLC).
+  /// This is useful for assertions.
+  static bool areInitializedTemporariesClear(ProgramStateRef State,
+                                             const LocationContext *FromLC,
+                                             const LocationContext *ToLC);
+
+  /// Store the region of a C++ temporary object corresponding to a
+  /// CXXBindTemporaryExpr for later destruction.
+  static ProgramStateRef addTemporaryMaterialization(
+      ProgramStateRef State, const MaterializeTemporaryExpr *MTE,
+      const LocationContext *LC, const CXXTempObjectRegion *R);
+
+  /// Check if all temporary materialization regions are clear for the given
+  /// context range (including FromLC, not including ToLC).
+  /// This is useful for assertions.
+  static bool areTemporaryMaterializationsClear(ProgramStateRef State,
+                                                const LocationContext *FromLC,
+                                                const LocationContext *ToLC);
+
+  /// Store the region returned by operator new() so that the constructor
+  /// that follows it knew what location to initialize. The value should be
+  /// cleared once the respective CXXNewExpr CFGStmt element is processed.
+  static ProgramStateRef
+  setCXXNewAllocatorValue(ProgramStateRef State, const CXXNewExpr *CNE,
+                          const LocationContext *CallerLC, SVal V);
+
+  /// Retrieve the location returned by the current operator new().
+  static SVal
+  getCXXNewAllocatorValue(ProgramStateRef State, const CXXNewExpr *CNE,
+                          const LocationContext *CallerLC);
+
+  /// Clear the location returned by the respective operator new(). This needs
+  /// to be done as soon as CXXNewExpr CFG block is evaluated.
+  static ProgramStateRef
+  clearCXXNewAllocatorValue(ProgramStateRef State, const CXXNewExpr *CNE,
+                            const LocationContext *CallerLC);
+
+  /// Check if all allocator values are clear for the given context range
+  /// (including FromLC, not including ToLC). This is useful for assertions.
+  static bool areCXXNewAllocatorValuesClear(ProgramStateRef State,
+                                            const LocationContext *FromLC,
+                                            const LocationContext *ToLC);
 };
 
 /// Traits for storing the call processing policy inside GDM.
@@ -668,8 +810,8 @@
   static void *GDMIndex() { static int index = 0; return &index; }
 };
 
-} // end ento namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPRENGINE_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
index ce81c98..b70faa1 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
@@ -1,4 +1,4 @@
-//== FunctionSummary.h - Stores summaries of functions. ------------*- C++ -*-//
+//===- FunctionSummary.h - Stores summaries of functions. -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,15 +18,18 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include <cassert>
 #include <deque>
+#include <utility>
 
 namespace clang {
-
 namespace ento {
-typedef std::deque<Decl*> SetOfDecls;
-typedef llvm::DenseSet<const Decl*> SetOfConstDecls;
+
+using SetOfDecls = std::deque<Decl *>;
+using SetOfConstDecls = llvm::DenseSet<const Decl *>;
 
 class FunctionSummariesTy {
   class FunctionSummary {
@@ -47,13 +50,12 @@
     /// The number of times the function has been inlined.
     unsigned TimesInlined : 32;
 
-    FunctionSummary() :
-      TotalBasicBlocks(0),
-      InlineChecked(0),
-      TimesInlined(0) {}
+    FunctionSummary()
+        : TotalBasicBlocks(0), InlineChecked(0), MayInline(0),
+          TimesInlined(0) {}
   };
 
-  typedef llvm::DenseMap<const Decl *, FunctionSummary> MapTy;
+  using MapTy = llvm::DenseMap<const Decl *, FunctionSummary>;
   MapTy Map;
 
 public:
@@ -62,7 +64,8 @@
     if (I != Map.end())
       return I;
 
-    typedef std::pair<const Decl *, FunctionSummary> KVPair;
+    using KVPair = std::pair<const Decl *, FunctionSummary>;
+
     I = Map.insert(KVPair(D, FunctionSummary())).first;
     assert(I != Map.end());
     return I;
@@ -132,9 +135,9 @@
 
   unsigned getTotalNumBasicBlocks();
   unsigned getTotalNumVisitedBasicBlocks();
-
 };
 
-}} // end clang ento namespaces
+} // namespace ento
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_FUNCTIONSUMMARY_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index a827a21..14909e3 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -103,6 +103,7 @@
   const MemRegion *getBaseRegion() const;
 
   /// Check if the region is a subregion of the given region.
+  /// Each region is a subregion of itself.
   virtual bool isSubRegionOf(const MemRegion *R) const;
 
   const MemRegion *StripCasts(bool StripBaseCasts = true) const;
@@ -961,7 +962,10 @@
   CXXThisRegion(const PointerType *thisPointerTy,
                 const StackArgumentsSpaceRegion *sReg)
       : TypedValueRegion(sReg, CXXThisRegionKind),
-        ThisPointerTy(thisPointerTy) {}
+        ThisPointerTy(thisPointerTy) {
+    assert(ThisPointerTy->getPointeeType()->getAsCXXRecordDecl() &&
+           "Invalid region type!");
+  }
 
   static void ProfileRegion(llvm::FoldingSetNodeID &ID,
                             const PointerType *PT,
@@ -1075,6 +1079,8 @@
     assert((!Idx.getAs<nonloc::ConcreteInt>() ||
             Idx.castAs<nonloc::ConcreteInt>().getValue().isSigned()) &&
            "The index must be signed");
+    assert(!elementType.isNull() && !elementType->isVoidType() &&
+           "Invalid region type!");
   }
 
   static void ProfileRegion(llvm::FoldingSetNodeID& ID, QualType elementType,
@@ -1412,21 +1418,18 @@
   bool hasTrait(SymbolRef Sym, InvalidationKinds IK) const;
   bool hasTrait(const MemRegion *MR, InvalidationKinds IK) const;
 };
-  
-} // end GR namespace
-
-} // end clang namespace
 
 //===----------------------------------------------------------------------===//
 // Pretty-printing regions.
 //===----------------------------------------------------------------------===//
-
-namespace llvm {
-static inline raw_ostream &operator<<(raw_ostream &os,
-                                      const clang::ento::MemRegion* R) {
+inline raw_ostream &operator<<(raw_ostream &os,
+                               const clang::ento::MemRegion *R) {
   R->dumpToStream(os);
   return os;
 }
-} // end llvm namespace
+
+} // namespace ento
+
+} // namespace clang
 
 #endif
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index e3a2164..ef40634 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -212,10 +212,17 @@
   assumeInclusiveRange(DefinedOrUnknownSVal Val, const llvm::APSInt &From,
                        const llvm::APSInt &To) const;
 
+  /// \brief Check if the given SVal is not constrained to zero and is not
+  ///        a zero constant.
+  ConditionTruthVal isNonNull(SVal V) const;
+
   /// \brief Check if the given SVal is constrained to zero or is a zero
   ///        constant.
   ConditionTruthVal isNull(SVal V) const;
 
+  /// \return Whether values \p Lhs and \p Rhs are equal.
+  ConditionTruthVal areEqual(SVal Lhs, SVal Rhs) const;
+
   /// Utility method for getting regions.
   const VarRegion* getRegion(const VarDecl *D, const LocationContext *LC) const;
 
@@ -308,8 +315,12 @@
 
   /// \brief Return the value bound to the specified location.
   /// Returns UnknownVal() if none found.
-  SVal getSVal(const MemRegion* R) const;
+  SVal getSVal(const MemRegion* R, QualType T = QualType()) const;
 
+  /// \brief Return the value bound to the specified location, assuming
+  /// that the value is a scalar integer or an enumeration or a pointer.
+  /// Returns UnknownVal() if none found or the region is not known to hold
+  /// a value of such type.
   SVal getSValAsScalarOrLoc(const MemRegion *R) const;
 
   /// \brief Visits the symbols reachable from the given SVal using the provided
@@ -424,9 +435,10 @@
   }
 
   // Pretty-printing.
-  void print(raw_ostream &Out, const char *nl = "\n",
-             const char *sep = "") const;
-  void printDOT(raw_ostream &Out) const;
+  void print(raw_ostream &Out, const char *nl = "\n", const char *sep = "",
+             const LocationContext *CurrentLC = nullptr) const;
+  void printDOT(raw_ostream &Out,
+                const LocationContext *CurrentLC = nullptr) const;
   void printTaint(raw_ostream &Out, const char *nl = "\n",
                   const char *sep = "") const;
 
@@ -758,9 +770,10 @@
   return getStateManager().StoreMgr->getBinding(getStore(), LV, T);
 }
 
-inline SVal ProgramState::getSVal(const MemRegion* R) const {
+inline SVal ProgramState::getSVal(const MemRegion* R, QualType T) const {
   return getStateManager().StoreMgr->getBinding(getStore(),
-                                                loc::MemRegionVal(R));
+                                                loc::MemRegionVal(R),
+                                                T);
 }
 
 inline BasicValueFactory &ProgramState::getBasicVals() const {
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h
index a04fa90..5555b29 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h
@@ -14,33 +14,28 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_PROGRAMSTATETRAIT_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_PROGRAMSTATETRAIT_H
 
+#include "llvm/ADT/ImmutableList.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-  template <typename K, typename D, typename I> class ImmutableMap;
-  template <typename K, typename I> class ImmutableSet;
-  template <typename T> class ImmutableList;
-  template <typename T> class ImmutableListImpl;
-}
+#include <cstdint>
 
 namespace clang {
-
 namespace ento {
+
   template <typename T> struct ProgramStatePartialTrait;
 
   /// Declares a program state trait for type \p Type called \p Name, and
-  /// introduce a typedef named \c NameTy.
+  /// introduce a type named \c NameTy.
   /// The macro should not be used inside namespaces, or for traits that must
   /// be accessible from more than one translation unit.
   #define REGISTER_TRAIT_WITH_PROGRAMSTATE(Name, Type) \
     namespace { \
       class Name {}; \
-      typedef Type Name ## Ty; \
+      using Name ## Ty = Type; \
     } \
     namespace clang { \
     namespace ento { \
@@ -52,28 +47,30 @@
     } \
     }
 
-
   // Partial-specialization for ImmutableMap.
-
   template <typename Key, typename Data, typename Info>
-  struct ProgramStatePartialTrait< llvm::ImmutableMap<Key,Data,Info> > {
-    typedef llvm::ImmutableMap<Key,Data,Info> data_type;
-    typedef typename data_type::Factory&      context_type;
-    typedef Key                               key_type;
-    typedef Data                              value_type;
-    typedef const value_type*                 lookup_type;
+  struct ProgramStatePartialTrait<llvm::ImmutableMap<Key, Data, Info>> {
+    using data_type = llvm::ImmutableMap<Key, Data, Info>;
+    using context_type = typename data_type::Factory &;
+    using key_type = Key;
+    using value_type = Data;
+    using lookup_type = const value_type *;
 
-    static inline data_type MakeData(void *const* p) {
-      return p ? data_type((typename data_type::TreeTy*) *p)
+    static data_type MakeData(void *const *p) {
+      return p ? data_type((typename data_type::TreeTy *) *p)
                : data_type(nullptr);
     }
-    static inline void *MakeVoidPtr(data_type B) {
+
+    static void *MakeVoidPtr(data_type B) {
       return B.getRoot();
     }
+
     static lookup_type Lookup(data_type B, key_type K) {
       return B.lookup(K);
     }
-    static data_type Set(data_type B, key_type K, value_type E,context_type F){
+
+    static data_type Set(data_type B, key_type K, value_type E,
+                         context_type F) {
       return F.add(B, K, E);
     }
 
@@ -85,8 +82,8 @@
       return B.contains(K);
     }
 
-    static inline context_type MakeContext(void *p) {
-      return *((typename data_type::Factory*) p);
+    static context_type MakeContext(void *p) {
+      return *((typename data_type::Factory *) p);
     }
 
     static void *CreateContext(llvm::BumpPtrAllocator& Alloc) {
@@ -94,7 +91,7 @@
     }
 
     static void DeleteContext(void *Ctx) {
-      delete (typename data_type::Factory*) Ctx;
+      delete (typename data_type::Factory *) Ctx;
     }
   };
 
@@ -107,21 +104,19 @@
   /// can deal with.
   #define CLANG_ENTO_PROGRAMSTATE_MAP(Key, Value) llvm::ImmutableMap<Key, Value>
 
-
   // Partial-specialization for ImmutableSet.
-
   template <typename Key, typename Info>
-  struct ProgramStatePartialTrait< llvm::ImmutableSet<Key,Info> > {
-    typedef llvm::ImmutableSet<Key,Info>      data_type;
-    typedef typename data_type::Factory&      context_type;
-    typedef Key                               key_type;
+  struct ProgramStatePartialTrait<llvm::ImmutableSet<Key, Info>> {
+    using data_type = llvm::ImmutableSet<Key, Info>;
+    using context_type = typename data_type::Factory &;
+    using key_type = Key;
 
-    static inline data_type MakeData(void *const* p) {
-      return p ? data_type((typename data_type::TreeTy*) *p)
+    static data_type MakeData(void *const *p) {
+      return p ? data_type((typename data_type::TreeTy *) *p)
                : data_type(nullptr);
     }
 
-    static inline void *MakeVoidPtr(data_type B) {
+    static void *MakeVoidPtr(data_type B) {
       return B.getRoot();
     }
 
@@ -137,27 +132,25 @@
       return B.contains(K);
     }
 
-    static inline context_type MakeContext(void *p) {
-      return *((typename data_type::Factory*) p);
+    static context_type MakeContext(void *p) {
+      return *((typename data_type::Factory *) p);
     }
 
-    static void *CreateContext(llvm::BumpPtrAllocator& Alloc) {
+    static void *CreateContext(llvm::BumpPtrAllocator &Alloc) {
       return new typename data_type::Factory(Alloc);
     }
 
     static void DeleteContext(void *Ctx) {
-      delete (typename data_type::Factory*) Ctx;
+      delete (typename data_type::Factory *) Ctx;
     }
   };
 
-
   // Partial-specialization for ImmutableList.
-
   template <typename T>
-  struct ProgramStatePartialTrait< llvm::ImmutableList<T> > {
-    typedef llvm::ImmutableList<T>            data_type;
-    typedef T                                 key_type;
-    typedef typename data_type::Factory&      context_type;
+  struct ProgramStatePartialTrait<llvm::ImmutableList<T>> {
+    using data_type = llvm::ImmutableList<T>;
+    using key_type = T;
+    using context_type = typename data_type::Factory &;
 
     static data_type Add(data_type L, key_type K, context_type F) {
       return F.add(K, L);
@@ -167,83 +160,84 @@
       return L.contains(K);
     }
 
-    static inline data_type MakeData(void *const* p) {
-      return p ? data_type((const llvm::ImmutableListImpl<T>*) *p)
+    static data_type MakeData(void *const *p) {
+      return p ? data_type((const llvm::ImmutableListImpl<T> *) *p)
                : data_type(nullptr);
     }
 
-    static inline void *MakeVoidPtr(data_type D) {
+    static void *MakeVoidPtr(data_type D) {
       return const_cast<llvm::ImmutableListImpl<T> *>(D.getInternalPointer());
     }
 
-    static inline context_type MakeContext(void *p) {
-      return *((typename data_type::Factory*) p);
+    static context_type MakeContext(void *p) {
+      return *((typename data_type::Factory *) p);
     }
 
-    static void *CreateContext(llvm::BumpPtrAllocator& Alloc) {
+    static void *CreateContext(llvm::BumpPtrAllocator &Alloc) {
       return new typename data_type::Factory(Alloc);
     }
 
     static void DeleteContext(void *Ctx) {
-      delete (typename data_type::Factory*) Ctx;
+      delete (typename data_type::Factory *) Ctx;
     }
   };
 
-
   // Partial specialization for bool.
   template <> struct ProgramStatePartialTrait<bool> {
-    typedef bool data_type;
+    using data_type = bool;
 
-    static inline data_type MakeData(void *const* p) {
+    static data_type MakeData(void *const *p) {
       return p ? (data_type) (uintptr_t) *p
                : data_type();
     }
-    static inline void *MakeVoidPtr(data_type d) {
-      return (void*) (uintptr_t) d;
+
+    static void *MakeVoidPtr(data_type d) {
+      return (void *) (uintptr_t) d;
     }
   };
 
   // Partial specialization for unsigned.
   template <> struct ProgramStatePartialTrait<unsigned> {
-    typedef unsigned data_type;
+    using data_type = unsigned;
 
-    static inline data_type MakeData(void *const* p) {
+    static data_type MakeData(void *const *p) {
       return p ? (data_type) (uintptr_t) *p
                : data_type();
     }
-    static inline void *MakeVoidPtr(data_type d) {
-      return (void*) (uintptr_t) d;
+
+    static void *MakeVoidPtr(data_type d) {
+      return (void *) (uintptr_t) d;
     }
   };
 
   // Partial specialization for void*.
-  template <> struct ProgramStatePartialTrait<void*> {
-    typedef void *data_type;
+  template <> struct ProgramStatePartialTrait<void *> {
+    using data_type = void *;
 
-    static inline data_type MakeData(void *const* p) {
+    static data_type MakeData(void *const *p) {
       return p ? *p
                : data_type();
     }
-    static inline void *MakeVoidPtr(data_type d) {
+
+    static void *MakeVoidPtr(data_type d) {
       return d;
     }
   };
 
   // Partial specialization for const void *.
   template <> struct ProgramStatePartialTrait<const void *> {
-    typedef const void *data_type;
+    using data_type = const void *;
 
-    static inline data_type MakeData(void * const *p) {
+    static data_type MakeData(void *const *p) {
       return p ? *p : data_type();
     }
 
-    static inline void *MakeVoidPtr(data_type d) {
+    static void *MakeVoidPtr(data_type d) {
       return const_cast<void *>(d);
     }
   };
 
-} // end ento namespace
+} // namespace ento
+} // namespace clang
 
-} // end clang namespace
-
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_PROGRAMSTATETRAIT_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index d58d0a6..92acf73 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -16,21 +16,43 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALBUILDER_H
 
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/ImmutableList.h"
+#include "llvm/ADT/Optional.h"
+#include <cstdint>
 
 namespace clang {
 
+class BlockDecl;
 class CXXBoolLiteralExpr;
+class CXXMethodDecl;
+class CXXRecordDecl;
+class DeclaratorDecl;
+class FunctionDecl;
+class LocationContext;
+class StackFrameContext;
+class Stmt;
 
 namespace ento {
 
+class ConditionTruthVal;
+class ProgramStateManager;
+class StoreRef;
+
 class SValBuilder {
   virtual void anchor();
+
 protected:
   ASTContext &Context;
   
@@ -62,14 +84,12 @@
 public:
   SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
               ProgramStateManager &stateMgr)
-    : Context(context), BasicVals(context, alloc),
-      SymMgr(context, BasicVals, alloc),
-      MemMgr(context, alloc),
-      StateMgr(stateMgr),
-      ArrayIndexTy(context.LongLongTy),
-      ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
+      : Context(context), BasicVals(context, alloc),
+        SymMgr(context, BasicVals, alloc), MemMgr(context, alloc),
+        StateMgr(stateMgr), ArrayIndexTy(context.LongLongTy),
+        ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
 
-  virtual ~SValBuilder() {}
+  virtual ~SValBuilder() = default;
 
   bool haveSameType(const SymExpr *Sym1, const SymExpr *Sym2) {
     return haveSameType(Sym1->getType(), Sym2->getType());
@@ -124,7 +144,12 @@
 
   SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op,
                  SVal lhs, SVal rhs, QualType type);
-  
+
+  /// \return Whether values in \p lhs and \p rhs are equal at \p state.
+  ConditionTruthVal areEqual(ProgramStateRef state, SVal lhs, SVal rhs);
+
+  SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs);
+
   DefinedOrUnknownSVal evalEQ(ProgramStateRef state, DefinedOrUnknownSVal lhs,
                               DefinedOrUnknownSVal rhs);
 
@@ -188,11 +213,11 @@
                                         const LocationContext *LCtx,
                                         QualType type,
                                         unsigned count);
-  
   DefinedOrUnknownSVal conjureSymbolVal(const Stmt *stmt,
                                         const LocationContext *LCtx,
                                         QualType type,
                                         unsigned visitCount);
+
   /// \brief Conjure a symbol representing heap allocated memory region.
   ///
   /// Note, the expression should represent a location.
@@ -355,8 +380,8 @@
                                      ASTContext &context,
                                      ProgramStateManager &stateMgr);
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALBUILDER_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
index 935f001..29e44b3 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
@@ -1,4 +1,4 @@
-//== SVals.h - Abstract Values for Static Analysis ---------*- C++ -*--==//
+//===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,11 +16,18 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
 
 #include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableList.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
 
 //==------------------------------------------------------------------------==//
 //  Base SVal types.
@@ -28,34 +35,40 @@
 
 namespace clang {
 
+class CXXBaseSpecifier;
+class DeclaratorDecl;
+class FunctionDecl;
+class LabelDecl;
+
 namespace ento {
 
+class BasicValueFactory;
 class CompoundValData;
 class LazyCompoundValData;
-class PointerToMemberData;
-class ProgramState;
-class BasicValueFactory;
 class MemRegion;
-class TypedValueRegion;
-class MemRegionManager;
-class ProgramStateManager;
+class PointerToMemberData;
 class SValBuilder;
+class TypedValueRegion;
 
 namespace nonloc {
+
 /// Sub-kinds for NonLoc values.
 enum Kind {
 #define NONLOC_SVAL(Id, Parent) Id ## Kind,
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
 };
-}
+
+} // namespace nonloc
 
 namespace loc {
+
 /// Sub-kinds for Loc values.
 enum Kind {
 #define LOC_SVAL(Id, Parent) Id ## Kind,
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
 };
-}
+
+} // namespace loc
 
 /// SVal - This represents a symbolic expression, which can be either
 ///  an L-value or an R-value.
@@ -71,20 +84,19 @@
   enum { BaseBits = 2, BaseMask = 0x3 };
 
 protected:
-  const void *Data;
+  const void *Data = nullptr;
 
   /// The lowest 2 bits are a BaseKind (0 -- 3).
   ///  The higher bits are an unsigned "kind" value.
-  unsigned Kind;
+  unsigned Kind = 0;
 
   explicit SVal(const void *d, bool isLoc, unsigned ValKind)
-  : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {}
+      : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {}
 
-  explicit SVal(BaseKind k, const void *D = nullptr)
-    : Data(D), Kind(k) {}
+  explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {}
 
 public:
-  explicit SVal() : Data(nullptr), Kind(0) {}
+  explicit SVal() = default;
 
   /// \brief Convert to the specified SVal type, asserting that this SVal is of
   /// the desired type.
@@ -103,41 +115,38 @@
     return *static_cast<const T *>(this);
   }
 
-  /// BufferTy - A temporary buffer to hold a set of SVals.
-  typedef SmallVector<SVal,5> BufferTy;
-
-  inline unsigned getRawKind() const { return Kind; }
-  inline BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); }
-  inline unsigned getSubKind() const { return (Kind & ~BaseMask) >> BaseBits; }
+  unsigned getRawKind() const { return Kind; }
+  BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); }
+  unsigned getSubKind() const { return (Kind & ~BaseMask) >> BaseBits; }
 
   // This method is required for using SVal in a FoldingSetNode.  It
   // extracts a unique signature for this SVal object.
-  inline void Profile(llvm::FoldingSetNodeID& ID) const {
+  void Profile(llvm::FoldingSetNodeID &ID) const {
     ID.AddInteger((unsigned) getRawKind());
     ID.AddPointer(Data);
   }
 
-  inline bool operator==(const SVal& R) const {
+  bool operator==(const SVal &R) const {
     return getRawKind() == R.getRawKind() && Data == R.Data;
   }
 
-  inline bool operator!=(const SVal& R) const {
+  bool operator!=(const SVal &R) const {
     return !(*this == R);
   }
 
-  inline bool isUnknown() const {
+  bool isUnknown() const {
     return getRawKind() == UnknownValKind;
   }
 
-  inline bool isUndef() const {
+  bool isUndef() const {
     return getRawKind() == UndefinedValKind;
   }
 
-  inline bool isUnknownOrUndef() const {
+  bool isUnknownOrUndef() const {
     return getRawKind() <= UnknownValKind;
   }
 
-  inline bool isValid() const {
+  bool isValid() const {
     return getRawKind() > UnknownValKind;
   }
 
@@ -178,7 +187,7 @@
   ///  return that expression.  Otherwise return NULL.
   const SymExpr *getAsSymbolicExpression() const;
 
-  const SymExpr* getAsSymExpr() const;
+  const SymExpr *getAsSymExpr() const;
 
   const MemRegion *getAsRegion() const;
 
@@ -198,6 +207,10 @@
   }
 };
 
+inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
+  V.dumpToStream(os);
+  return os;
+}
 
 class UndefinedVal : public SVal {
 public:
@@ -205,28 +218,28 @@
 
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == UndefinedValKind;
   }
 };
 
 class DefinedOrUnknownSVal : public SVal {
-private:
+public:
   // We want calling these methods to be a compiler error since they are
   // tautologically false.
   bool isUndef() const = delete;
   bool isValid() const = delete;
   
 protected:
-  DefinedOrUnknownSVal() {}
+  DefinedOrUnknownSVal() = default;
   explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind)
-    : SVal(d, isLoc, ValKind) {}
-  
-  explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr)
-    : SVal(k, D) {}
+      : SVal(d, isLoc, ValKind) {}
+  explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {}
   
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return !V.isUndef();
   }
@@ -238,37 +251,43 @@
   
 private:
   friend class SVal;
+
   static bool isKind(const SVal &V) {
     return V.getBaseKind() == UnknownValKind;
   }
 };
 
 class DefinedSVal : public DefinedOrUnknownSVal {
-private:
+public:
   // We want calling these methods to be a compiler error since they are
   // tautologically true/false.
   bool isUnknown() const = delete;
   bool isUnknownOrUndef() const = delete;
   bool isValid() const = delete;
+
 protected:
-  DefinedSVal() {}
+  DefinedSVal() = default;
   explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind)
-    : DefinedOrUnknownSVal(d, isLoc, ValKind) {}
+      : DefinedOrUnknownSVal(d, isLoc, ValKind) {}
+
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return !V.isUnknownOrUndef();
   }
 };
 
-
 /// \brief Represents an SVal that is guaranteed to not be UnknownVal.
 class KnownSVal : public SVal {
-  KnownSVal() {}
   friend class SVal;
+
+  KnownSVal() = default;
+
   static bool isKind(const SVal &V) {
     return !V.isUnknown();
   }
+
 public:
   KnownSVal(const DefinedSVal &V) : SVal(V) {}
   KnownSVal(const UndefinedVal &V) : SVal(V) {}
@@ -276,20 +295,21 @@
 
 class NonLoc : public DefinedSVal {
 protected:
-  NonLoc() {}
+  NonLoc() = default;
   explicit NonLoc(unsigned SubKind, const void *d)
-    : DefinedSVal(d, false, SubKind) {}
+      : DefinedSVal(d, false, SubKind) {}
 
 public:
   void dumpToStream(raw_ostream &Out) const;
 
-  static inline bool isCompoundType(QualType T) {
+  static bool isCompoundType(QualType T) {
     return T->isArrayType() || T->isRecordType() ||
            T->isComplexType() || T->isVectorType();
   }
 
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind;
   }
@@ -297,20 +317,21 @@
 
 class Loc : public DefinedSVal {
 protected:
-  Loc() {}
+  Loc() = default;
   explicit Loc(unsigned SubKind, const void *D)
-  : DefinedSVal(const_cast<void*>(D), true, SubKind) {}
+      : DefinedSVal(const_cast<void *>(D), true, SubKind) {}
 
 public:
   void dumpToStream(raw_ostream &Out) const;
 
-  static inline bool isLocType(QualType T) {
+  static bool isLocType(QualType T) {
     return T->isAnyPointerType() || T->isBlockPointerType() || 
            T->isReferenceType() || T->isNullPtrType();
   }
 
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == LocKind;
   }
@@ -329,7 +350,7 @@
   SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) { assert(sym); }
 
   SymbolRef getSymbol() const {
-    return (const SymExpr*) Data;
+    return (const SymExpr *) Data;
   }
 
   bool isExpression() const {
@@ -338,6 +359,7 @@
 
 private:
   friend class SVal;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind &&
            V.getSubKind() == SymbolValKind;
@@ -354,7 +376,7 @@
   explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {}
 
   const llvm::APSInt& getValue() const {
-    return *static_cast<const llvm::APSInt*>(Data);
+    return *static_cast<const llvm::APSInt *>(Data);
   }
 
   // Transfer functions for binary/unary operations on ConcreteInts.
@@ -367,7 +389,9 @@
 
 private:
   friend class SVal;
-  ConcreteInt() {}
+
+  ConcreteInt() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind &&
            V.getSubKind() == ConcreteIntKind;
@@ -391,7 +415,6 @@
   }
 
 public:
-
   Loc getLoc() const {
     const std::pair<SVal, uintptr_t> *D =
       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
@@ -413,7 +436,9 @@
 
 private:
   friend class SVal;
-  LocAsInteger() {}
+
+  LocAsInteger() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind &&
            V.getSubKind() == LocAsIntegerKind;
@@ -431,16 +456,19 @@
 
 public:
   const CompoundValData* getValue() const {
-    return static_cast<const CompoundValData*>(Data);
+    return static_cast<const CompoundValData *>(Data);
   }
 
-  typedef llvm::ImmutableList<SVal>::iterator iterator;
+  using iterator = llvm::ImmutableList<SVal>::iterator;
+
   iterator begin() const;
   iterator end() const;
 
 private:
   friend class SVal;
-  CompoundVal() {}
+
+  CompoundVal() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind;
   }
@@ -454,21 +482,26 @@
   friend class ento::SValBuilder;
 
   explicit LazyCompoundVal(const LazyCompoundValData *D)
-    : NonLoc(LazyCompoundValKind, D) {}
+      : NonLoc(LazyCompoundValKind, D) {}
+
 public:
   const LazyCompoundValData *getCVData() const {
-    return static_cast<const LazyCompoundValData*>(Data);
+    return static_cast<const LazyCompoundValData *>(Data);
   }
+
   const void *getStore() const;
   const TypedValueRegion *getRegion() const;
 
 private:
   friend class SVal;
-  LazyCompoundVal() {}
+
+  LazyCompoundVal() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind &&
            V.getSubKind() == LazyCompoundValKind;
   }
+
   static bool isKind(const NonLoc& V) {
     return V.getSubKind() == LazyCompoundValKind;
   }
@@ -487,28 +520,36 @@
   friend class ento::SValBuilder;
 
 public:
-  typedef llvm::PointerUnion<const DeclaratorDecl *,
-                             const PointerToMemberData *> PTMDataType;
+  using PTMDataType =
+      llvm::PointerUnion<const DeclaratorDecl *, const PointerToMemberData *>;
+
   const PTMDataType getPTMData() const {
     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
   }
+
   bool isNullMemberPointer() const {
     return getPTMData().isNull();
   }
+
   const DeclaratorDecl *getDecl() const;
+
   template<typename AdjustedDecl>
-  const AdjustedDecl* getDeclAs() const {
+  const AdjustedDecl *getDeclAs() const {
     return dyn_cast_or_null<AdjustedDecl>(getDecl());
   }
-  typedef llvm::ImmutableList<const CXXBaseSpecifier *>::iterator iterator;
+
+  using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
+
   iterator begin() const;
   iterator end() const;
 
 private:
-  explicit PointerToMember(const PTMDataType D)
-    : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
   friend class SVal;
-  PointerToMember() {}
+
+  PointerToMember() = default;
+  explicit PointerToMember(const PTMDataType D)
+      : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == NonLocKind &&
            V.getSubKind() == PointerToMemberKind;
@@ -519,7 +560,7 @@
   }
 };
 
-} // end namespace ento::nonloc
+} // namespace nonloc
 
 //==------------------------------------------------------------------------==//
 //  Subclasses of Loc.
@@ -534,12 +575,14 @@
   }
 
   const LabelDecl *getLabel() const {
-    return static_cast<const LabelDecl*>(Data);
+    return static_cast<const LabelDecl *>(Data);
   }
 
 private:
   friend class SVal;
-  GotoLabel() {}
+
+  GotoLabel() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind;
   }
@@ -549,7 +592,6 @@
   }
 };
 
-
 class MemRegionVal : public Loc {
 public:
   explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {
@@ -557,8 +599,8 @@
   }
 
   /// \brief Get the underlining region.
-  const MemRegion* getRegion() const {
-    return static_cast<const MemRegion*>(Data);
+  const MemRegion *getRegion() const {
+    return static_cast<const MemRegion *>(Data);
   }
 
   /// \brief Get the underlining region and strip casts.
@@ -569,17 +611,19 @@
     return dyn_cast<REGION>(getRegion());
   }
 
-  inline bool operator==(const MemRegionVal& R) const {
+  bool operator==(const MemRegionVal &R) const {
     return getRegion() == R.getRegion();
   }
 
-  inline bool operator!=(const MemRegionVal& R) const {
+  bool operator!=(const MemRegionVal &R) const {
     return getRegion() != R.getRegion();
   }
 
 private:
   friend class SVal;
-  MemRegionVal() {}
+
+  MemRegionVal() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == LocKind &&
            V.getSubKind() == MemRegionValKind;
@@ -594,8 +638,8 @@
 public:
   explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {}
 
-  const llvm::APSInt& getValue() const {
-    return *static_cast<const llvm::APSInt*>(Data);
+  const llvm::APSInt &getValue() const {
+    return *static_cast<const llvm::APSInt *>(Data);
   }
 
   // Transfer functions for binary/unary operations on ConcreteInts.
@@ -604,7 +648,9 @@
 
 private:
   friend class SVal;
-  ConcreteInt() {}
+
+  ConcreteInt() = default;
+
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == LocKind &&
            V.getSubKind() == ConcreteIntKind;
@@ -615,24 +661,19 @@
   }
 };
 
-} // end ento::loc namespace
+} // namespace loc 
 
-} // end ento namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
 namespace llvm {
-static inline raw_ostream &operator<<(raw_ostream &os,
-                                            clang::ento::SVal V) {
-  V.dumpToStream(os);
-  return os;
-}
 
 template <typename T> struct isPodLike;
 template <> struct isPodLike<clang::ento::SVal> {
   static const bool value = true;
 };
 
-} // end llvm namespace
+} // namespace llvm
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
index 25d20a1..84afb08 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
@@ -1,4 +1,4 @@
-//== Store.h - Interface for maps from Locations to Values ------*- C++ -*--==//
+//===- Store.h - Interface for maps from Locations to Values ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,27 +14,42 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_STORE_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_STORE_H
 
+#include "clang/AST/Type.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
 
 namespace clang {
 
-class Stmt;
-class Expr;
-class ObjCIvarDecl;
+class ASTContext;
+class CastExpr;
+class CompoundLiteralExpr;
 class CXXBasePath;
+class Decl;
+class Expr;
+class LocationContext;
+class ObjCIvarDecl;
 class StackFrameContext;
 
 namespace ento {
 
 class CallEvent;
-class ProgramState;
 class ProgramStateManager;
 class ScanReachableSymbols;
+class SymbolReaper;
 
-typedef llvm::DenseSet<SymbolRef> InvalidatedSymbols;
+using InvalidatedSymbols = llvm::DenseSet<SymbolRef>;
 
 class StoreManager {
 protected:
@@ -48,7 +63,7 @@
   StoreManager(ProgramStateManager &stateMgr);
 
 public:
-  virtual ~StoreManager() {}
+  virtual ~StoreManager() = default;
 
   /// Return the value bound to specified location in a given state.
   /// \param[in] store The store in which to make the lookup.
@@ -168,7 +183,7 @@
   const MemRegion *castRegion(const MemRegion *region, QualType CastToTy);
 
   virtual StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx,
-                                      SymbolReaper& SymReaper) = 0;
+                                      SymbolReaper &SymReaper) = 0;
 
   virtual bool includedInBindings(Store store,
                                   const MemRegion *region) const = 0;
@@ -182,7 +197,7 @@
   /// associated with the object is recycled.
   virtual void decrementReferenceCount(Store store) {}
 
-  typedef SmallVector<const MemRegion *, 8> InvalidatedRegions;
+  using InvalidatedRegions = SmallVector<const MemRegion *, 8>;
 
   /// invalidateRegions - Clears out the specified regions from the store,
   ///  marking their values as unknown. Depending on the store, this may also
@@ -235,6 +250,7 @@
   class BindingsHandler {
   public:
     virtual ~BindingsHandler();
+
     virtual bool HandleBinding(StoreManager& SMgr, Store store,
                                const MemRegion *region, SVal val) = 0;
   };
@@ -242,16 +258,16 @@
   class FindUniqueBinding :
   public BindingsHandler {
     SymbolRef Sym;
-    const MemRegion* Binding;
-    bool First;
+    const MemRegion* Binding = nullptr;
+    bool First = true;
 
   public:
-    FindUniqueBinding(SymbolRef sym)
-      : Sym(sym), Binding(nullptr), First(true) {}
+    FindUniqueBinding(SymbolRef sym) : Sym(sym) {}
+
+    explicit operator bool() { return First && Binding; }
 
     bool HandleBinding(StoreManager& SMgr, Store store, const MemRegion* R,
                        SVal val) override;
-    explicit operator bool() { return First && Binding; }
     const MemRegion *getRegion() { return Binding; }
   };
 
@@ -273,15 +289,14 @@
   SVal getLValueFieldOrIvar(const Decl *decl, SVal base);
 };
 
-
 inline StoreRef::StoreRef(Store store, StoreManager & smgr)
-  : store(store), mgr(smgr) {
+    : store(store), mgr(smgr) {
   if (store)
     mgr.incrementReferenceCount(store);
 }
 
-inline StoreRef::StoreRef(const StoreRef &sr) 
-  : store(sr.store), mgr(sr.mgr)
+inline StoreRef::StoreRef(const StoreRef &sr)
+    : store(sr.store), mgr(sr.mgr)
 { 
   if (store)
     mgr.incrementReferenceCount(store);
@@ -308,8 +323,8 @@
 std::unique_ptr<StoreManager>
 CreateFieldsOnlyRegionStoreManager(ProgramStateManager &StMgr);
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_STORE_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h b/include/clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h
index 958c8c3..c5b4e50 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h
@@ -1,4 +1,4 @@
-//== StoreRef.h - Smart pointer for store objects ---------------*- C++ -*--==//
+//===- StoreRef.h - Smart pointer for store objects -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,33 +19,36 @@
 namespace clang {
 namespace ento {
   
+class StoreManager;
+  
 /// Store - This opaque type encapsulates an immutable mapping from
 ///  locations to values.  At a high-level, it represents the symbolic
 ///  memory model.  Different subclasses of StoreManager may choose
 ///  different types to represent the locations and values.
-typedef const void *Store;
-  
-class StoreManager;
+using Store = const void *;
   
 class StoreRef {
   Store store;
   StoreManager &mgr;
+
 public:
-  StoreRef(Store, StoreManager &);
-  StoreRef(const StoreRef &);
-  StoreRef &operator=(StoreRef const &);
+  StoreRef(Store store, StoreManager &smgr);
+  StoreRef(const StoreRef &sr);
+  StoreRef &operator=(StoreRef const &newStore);
+  ~StoreRef();
   
   bool operator==(const StoreRef &x) const {
     assert(&mgr == &x.mgr);
     return x.store == store;
   }
+
   bool operator!=(const StoreRef &x) const { return !operator==(x); }
   
-  ~StoreRef();
-  
   Store getStore() const { return store; }
   const StoreManager &getStoreManager() const { return mgr; }
 };
 
-}}
-#endif
+} // namespace ento
+} // namespace clang
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_STOREREF_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
index 8ccd347..e574ffd 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
@@ -24,6 +24,10 @@
 class LocationContext;
 class Stmt;
 
+namespace cross_tu {
+class CrossTranslationUnitContext;
+}
+
 namespace ento {
   
 struct NodeBuilderContext;
@@ -49,6 +53,9 @@
 
   virtual AnalysisManager &getAnalysisManager() = 0;
 
+  virtual cross_tu::CrossTranslationUnitContext *
+  getCrossTranslationUnitContext() = 0;
+
   virtual ProgramStateManager &getStateManager() = 0;
 
   /// Called by CoreEngine. Used to generate new successor
@@ -155,7 +162,8 @@
 
   /// printState - Called by ProgramStateManager to print checker-specific data.
   virtual void printState(raw_ostream &Out, ProgramStateRef State,
-                          const char *NL, const char *Sep) = 0;
+                          const char *NL, const char *Sep,
+                          const LocationContext *LCtx = nullptr) = 0;
 
   /// Called by CoreEngine when the analysis worklist is either empty or the
   //  maximum number of analysis steps have been reached.
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
index f720339..85551f9 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -1,4 +1,4 @@
-//== SymExpr.h - Management of Symbolic Values ------------------*- C++ -*--==//
+//===- SymExpr.h - Management of Symbolic Values ----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +15,10 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMEXPR_H
 
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 namespace clang {
 namespace ento {
@@ -49,7 +50,7 @@
   }
 
 public:
-  virtual ~SymExpr() {}
+  virtual ~SymExpr() = default;
 
   Kind getKind() const { return K; }
 
@@ -67,10 +68,11 @@
   /// treated as SymbolData - the iterator will NOT visit the parent region.
   class symbol_iterator {
     SmallVector<const SymExpr *, 5> itr;
+
     void expand();
 
   public:
-    symbol_iterator() {}
+    symbol_iterator() = default;
     symbol_iterator(const SymExpr *SE);
 
     symbol_iterator &operator++();
@@ -98,23 +100,30 @@
   virtual const MemRegion *getOriginRegion() const { return nullptr; }
 };
 
-typedef const SymExpr *SymbolRef;
-typedef SmallVector<SymbolRef, 2> SymbolRefSmallVectorTy;
+inline raw_ostream &operator<<(raw_ostream &os,
+                               const clang::ento::SymExpr *SE) {
+  SE->dumpToStream(os);
+  return os;
+}
 
-typedef unsigned SymbolID;
+using SymbolRef = const SymExpr *;
+using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
+using SymbolID = unsigned;
+
 /// \brief A symbol representing data which can be stored in a memory location
 /// (region).
 class SymbolData : public SymExpr {
-  void anchor() override;
   const SymbolID Sym;
 
+  void anchor() override;
+
 protected:
   SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
     assert(classof(this));
   }
 
 public:
-  ~SymbolData() override {}
+  ~SymbolData() override = default;
 
   SymbolID getSymbolID() const { return Sym; }
 
@@ -128,4 +137,4 @@
 } // namespace ento
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMEXPR_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 34112f8..f222e83 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -1,4 +1,4 @@
-//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==//
+//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,8 +15,8 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
 
-#include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
@@ -26,17 +26,17 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
 
 namespace clang {
-  class ASTContext;
-  class StackFrameContext;
+
+class ASTContext;
+class Stmt;
 
 namespace ento {
-  class BasicValueFactory;
-  class SubRegion;
-  class TypedValueRegion;
-  class VarRegion;
+
+class BasicValueFactory;
+class StoreManager;
 
 ///\brief A symbol representing the value stored at a MemRegion.
 class SymbolRegionValue : public SymbolData {
@@ -66,7 +66,7 @@
   QualType getType() const override;
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolRegionValueKind;
   }
 };
@@ -118,7 +118,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolConjuredKind;
   }
 };
@@ -157,7 +157,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolDerivedKind;
   }
 };
@@ -190,7 +190,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolExtentKind;
   }
 };
@@ -206,11 +206,12 @@
   const LocationContext *LCtx;
   unsigned Count;
   const void *Tag;
+
 public:
   SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
                  const LocationContext *LCtx, unsigned count, const void *tag)
-  : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
-    Count(count), Tag(tag) {
+      : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
+        Count(count), Tag(tag) {
       assert(r);
       assert(s);
       assert(isValidTypeForSymbol(t));
@@ -245,7 +246,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolMetadataKind;
   }
 };
@@ -253,8 +254,10 @@
 /// \brief Represents a cast expression.
 class SymbolCast : public SymExpr {
   const SymExpr *Operand;
+
   /// Type of the operand.
   QualType FromTy;
+
   /// The type of the result.
   QualType ToTy;
 
@@ -286,7 +289,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymbolCastKind;
   }
 };
@@ -311,7 +314,7 @@
   BinaryOperator::Opcode getOpcode() const { return Op; }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     Kind k = SE->getKind();
     return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
   }
@@ -349,7 +352,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymIntExprKind;
   }
 };
@@ -386,7 +389,7 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == IntSymExprKind;
   }
 };
@@ -423,20 +426,22 @@
   }
 
   // Implement isa<T> support.
-  static inline bool classof(const SymExpr *SE) {
+  static bool classof(const SymExpr *SE) {
     return SE->getKind() == SymSymExprKind;
   }
 };
 
 class SymbolManager {
-  typedef llvm::FoldingSet<SymExpr> DataSetTy;
-  typedef llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy*> SymbolDependTy;
+  using DataSetTy = llvm::FoldingSet<SymExpr>;
+  using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>;
 
   DataSetTy DataSet;
+
   /// Stores the extra dependencies between symbols: the data should be kept
   /// alive as long as the key is live.
   SymbolDependTy SymbolDependencies;
-  unsigned SymbolCounter;
+
+  unsigned SymbolCounter = 0;
   llvm::BumpPtrAllocator& BPAlloc;
   BasicValueFactory &BV;
   ASTContext &Ctx;
@@ -444,9 +449,7 @@
 public:
   SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
                 llvm::BumpPtrAllocator& bpalloc)
-    : SymbolDependencies(16), SymbolCounter(0),
-      BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
-
+      : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
   ~SymbolManager();
 
   static bool canSymbolicate(QualType T);
@@ -522,9 +525,9 @@
     HaveMarkedDependents
   };
 
-  typedef llvm::DenseSet<SymbolRef> SymbolSetTy;
-  typedef llvm::DenseMap<SymbolRef, SymbolStatus> SymbolMapTy;
-  typedef llvm::DenseSet<const MemRegion *> RegionSetTy;
+  using SymbolSetTy = llvm::DenseSet<SymbolRef>;
+  using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
+  using RegionSetTy = llvm::DenseSet<const MemRegion *>;
 
   SymbolMapTy TheLiving;
   SymbolSetTy MetadataInUse;
@@ -546,10 +549,9 @@
   /// considered live.
   /// If the stack frame context is NULL, everything on stack is considered
   /// dead.
-  SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, SymbolManager& symmgr,
-               StoreManager &storeMgr)
-   : LCtx(Ctx), Loc(s), SymMgr(symmgr),
-     reapedStore(nullptr, storeMgr) {}
+  SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
+               SymbolManager &symmgr, StoreManager &storeMgr)
+      : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
 
   const LocationContext *getLocationContext() const { return LCtx; }
 
@@ -580,7 +582,8 @@
   ///  Returns true if the symbol is dead, false if live.
   bool maybeDead(SymbolRef sym);
 
-  typedef SymbolSetTy::const_iterator dead_iterator;
+  using dead_iterator = SymbolSetTy::const_iterator;
+
   dead_iterator dead_begin() const { return TheDead.begin(); }
   dead_iterator dead_end() const { return TheDead.end(); }
 
@@ -588,7 +591,8 @@
     return !TheDead.empty();
   }
   
-  typedef RegionSetTy::const_iterator region_iterator;
+  using region_iterator = RegionSetTy::const_iterator;
+
   region_iterator region_begin() const { return RegionRoots.begin(); }
   region_iterator region_end() const { return RegionRoots.end(); }
 
@@ -629,15 +633,8 @@
   virtual bool VisitMemRegion(const MemRegion *region) { return true; }
 };
 
-} // end GR namespace
+} // namespace ento
 
-} // end clang namespace
+} // namespace clang
 
-namespace llvm {
-static inline raw_ostream &operator<<(raw_ostream &os,
-                                      const clang::ento::SymExpr *SE) {
-  SE->dumpToStream(os);
-  return os;
-}
-} // end llvm namespace
-#endif
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/WorkList.h b/include/clang/StaticAnalyzer/Core/PathSensitive/WorkList.h
index 4f1a60e..3169791 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/WorkList.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/WorkList.h
@@ -80,17 +80,11 @@
   void setBlockCounter(BlockCounter C) { CurrentCounter = C; }
   BlockCounter getBlockCounter() const { return CurrentCounter; }
 
-  class Visitor {
-  public:
-    Visitor() {}
-    virtual ~Visitor();
-    virtual bool visit(const WorkListUnit &U) = 0;
-  };
-  virtual bool visitItemsInWorkList(Visitor &V) = 0;
-  
-  static WorkList *makeDFS();
-  static WorkList *makeBFS();
-  static WorkList *makeBFSBlockDFSContents();
+  static std::unique_ptr<WorkList> makeDFS();
+  static std::unique_ptr<WorkList> makeBFS();
+  static std::unique_ptr<WorkList> makeBFSBlockDFSContents();
+  static std::unique_ptr<WorkList> makeUnexploredFirst();
+  static std::unique_ptr<WorkList> makeUnexploredFirstPriorityQueue();
 };
 
 } // end GR namespace
diff --git a/include/clang/Tooling/AllTUsExecution.h b/include/clang/Tooling/AllTUsExecution.h
new file mode 100644
index 0000000..0417aea
--- /dev/null
+++ b/include/clang/Tooling/AllTUsExecution.h
@@ -0,0 +1,76 @@
+//===--- AllTUsExecution.h - Execute actions on all TUs. -*- C++ --------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a tool executor that runs given actions on all TUs in the
+//  compilation database. Tool results are deuplicated by the result key.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_ALLTUSEXECUTION_H
+#define LLVM_CLANG_TOOLING_ALLTUSEXECUTION_H
+
+#include "clang/Tooling/ArgumentsAdjusters.h"
+#include "clang/Tooling/Execution.h"
+
+namespace clang {
+namespace tooling {
+
+/// \brief Executes given frontend actions on all files/TUs in the compilation
+/// database.
+class AllTUsToolExecutor : public ToolExecutor {
+public:
+  static const char *ExecutorName;
+
+  /// \brief Init with \p CompilationDatabase.
+  /// This uses \p ThreadCount threads to exececute the actions on all files in
+  /// parallel. If \p ThreadCount is 0, this uses `llvm::hardware_concurrency`.
+  AllTUsToolExecutor(const CompilationDatabase &Compilations,
+                     unsigned ThreadCount,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                         std::make_shared<PCHContainerOperations>());
+
+  /// \brief Init with \p CommonOptionsParser. This is expected to be used by
+  /// `createExecutorFromCommandLineArgs` based on commandline options.
+  ///
+  /// The executor takes ownership of \p Options.
+  AllTUsToolExecutor(CommonOptionsParser Options, unsigned ThreadCount,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                         std::make_shared<PCHContainerOperations>());
+
+  StringRef getExecutorName() const override { return ExecutorName; }
+
+  using ToolExecutor::execute;
+
+  llvm::Error
+  execute(llvm::ArrayRef<
+          std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+              Actions) override;
+
+  ExecutionContext *getExecutionContext() override { return &Context; };
+
+  ToolResults *getToolResults() override { return Results.get(); }
+
+  void mapVirtualFile(StringRef FilePath, StringRef Content) override {
+    OverlayFiles[FilePath] = Content;
+  }
+
+private:
+  // Used to store the parser when the executor is initialized with parser.
+  llvm::Optional<CommonOptionsParser> OptionsParser;
+  const CompilationDatabase &Compilations;
+  std::unique_ptr<ToolResults> Results;
+  ExecutionContext Context;
+  llvm::StringMap<std::string> OverlayFiles;
+  unsigned ThreadCount;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_ALLTUSEXECUTION_H
diff --git a/include/clang/Tooling/CommonOptionsParser.h b/include/clang/Tooling/CommonOptionsParser.h
index 70f5e80..15e8161 100644
--- a/include/clang/Tooling/CommonOptionsParser.h
+++ b/include/clang/Tooling/CommonOptionsParser.h
@@ -30,6 +30,7 @@
 #include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 
 namespace clang {
 namespace tooling {
@@ -86,13 +87,18 @@
   /// All options not belonging to \p Category become hidden.
   ///
   /// It also allows calls to set the required number of positional parameters.
-  ///
-  /// This constructor exits program in case of error.
   CommonOptionsParser(int &argc, const char **argv,
                       llvm::cl::OptionCategory &Category,
                       llvm::cl::NumOccurrencesFlag OccurrencesFlag,
                       const char *Overview = nullptr);
 
+  /// \brief A factory method that is similar to the above constructor, except
+  /// this returns an error instead exiting the program on error.
+  static llvm::Expected<CommonOptionsParser>
+  create(int &argc, const char **argv, llvm::cl::OptionCategory &Category,
+         llvm::cl::NumOccurrencesFlag OccurrencesFlag,
+         const char *Overview = nullptr);
+
   /// Returns a reference to the loaded compilations database.
   CompilationDatabase &getCompilations() {
     return *Compilations;
@@ -103,11 +109,23 @@
     return SourcePathList;
   }
 
+  /// Returns the argument adjuster calculated from "--extra-arg" and
+  //"--extra-arg-before" options.
+  ArgumentsAdjuster getArgumentsAdjuster() { return Adjuster; }
+
   static const char *const HelpMessage;
 
 private:
+  CommonOptionsParser() = default;
+
+  llvm::Error init(int &argc, const char **argv,
+                   llvm::cl::OptionCategory &Category,
+                   llvm::cl::NumOccurrencesFlag OccurrencesFlag,
+                   const char *Overview);
+
   std::unique_ptr<CompilationDatabase> Compilations;
   std::vector<std::string> SourcePathList;
+  ArgumentsAdjuster Adjuster;
 };
 
 class ArgumentsAdjustingCompilations : public CompilationDatabase {
diff --git a/include/clang/Tooling/CompilationDatabase.h b/include/clang/Tooling/CompilationDatabase.h
index 28af33a..bc3e67b 100644
--- a/include/clang/Tooling/CompilationDatabase.h
+++ b/include/clang/Tooling/CompilationDatabase.h
@@ -64,10 +64,12 @@
 
 /// \brief Interface for compilation databases.
 ///
-/// A compilation database allows the user to retrieve all compile command lines
-/// that a specified file is compiled with in a project.
-/// The retrieved compile command lines can be used to run clang tools over
-/// a subset of the files in a project.
+/// A compilation database allows the user to retrieve compile command lines
+/// for the files in a project.
+///
+/// Many implementations are enumerable, allowing all command lines to be
+/// retrieved. These can be used to run clang tools over a subset of the files
+/// in a project.
 class CompilationDatabase {
 public:
   virtual ~CompilationDatabase();
@@ -114,7 +116,10 @@
     StringRef FilePath) const = 0;
 
   /// \brief Returns the list of all files available in the compilation database.
-  virtual std::vector<std::string> getAllFiles() const = 0;
+  ///
+  /// By default, returns nothing. Implementations should override this if they
+  /// can enumerate their source files.
+  virtual std::vector<std::string> getAllFiles() const { return {}; }
 
   /// \brief Returns all compile commands for all the files in the compilation
   /// database.
@@ -122,7 +127,10 @@
   /// FIXME: Add a layer in Tooling that provides an interface to run a tool
   /// over all files in a compilation database. Not all build systems have the
   /// ability to provide a feasible implementation for \c getAllCompileCommands.
-  virtual std::vector<CompileCommand> getAllCompileCommands() const = 0;
+  ///
+  /// By default, this is implemented in terms of getAllFiles() and
+  /// getCompileCommands(). Subclasses may override this for efficiency.
+  virtual std::vector<CompileCommand> getAllCompileCommands() const;
 };
 
 /// \brief Interface for compilation database plugins.
@@ -149,6 +157,7 @@
 /// \brief A compilation database that returns a single compile command line.
 ///
 /// Useful when we want a tool to behave more like a compiler invocation.
+/// This compilation database is not enumerable: getAllFiles() returns {}.
 class FixedCompilationDatabase : public CompilationDatabase {
 public:
   /// \brief Creates a FixedCompilationDatabase from the arguments after "--".
@@ -182,6 +191,11 @@
       int &Argc, const char *const *Argv, std::string &ErrorMsg,
       Twine Directory = ".");
 
+  /// Reads flags from the given file, one-per line.
+  /// Returns nullptr and sets ErrorMessage if we can't read the file.
+  static std::unique_ptr<FixedCompilationDatabase>
+  loadFromFile(StringRef Path, std::string &ErrorMsg);
+
   /// \brief Constructs a compilation data base from a specified directory
   /// and command line.
   FixedCompilationDatabase(Twine Directory, ArrayRef<std::string> CommandLine);
@@ -194,17 +208,6 @@
   std::vector<CompileCommand>
   getCompileCommands(StringRef FilePath) const override;
 
-  /// \brief Returns the list of all files available in the compilation database.
-  ///
-  /// Note: This is always an empty list for the fixed compilation database.
-  std::vector<std::string> getAllFiles() const override;
-
-  /// \brief Returns all compile commands for all the files in the compilation
-  /// database.
-  ///
-  /// Note: This is always an empty list for the fixed compilation database.
-  std::vector<CompileCommand> getAllCompileCommands() const override;
-
 private:
   /// This is built up to contain a single entry vector to be returned from
   /// getCompileCommands after adding the positional argument.
diff --git a/include/clang/Tooling/Core/Diagnostic.h b/include/clang/Tooling/Core/Diagnostic.h
index b4920d4..c8307c8 100644
--- a/include/clang/Tooling/Core/Diagnostic.h
+++ b/include/clang/Tooling/Core/Diagnostic.h
@@ -33,7 +33,7 @@
   DiagnosticMessage(llvm::StringRef Message = "");
 
   /// \brief Constructs a diagnostic message with anoffset to the diagnostic
-  /// within the file where the problem occured.
+  /// within the file where the problem occurred.
   ///
   /// \param Loc Should be a file location, it is not meaningful for a macro
   /// location.
diff --git a/include/clang/Tooling/Core/QualTypeNames.h b/include/clang/Tooling/Core/QualTypeNames.h
deleted file mode 100644
index 7248356..0000000
--- a/include/clang/Tooling/Core/QualTypeNames.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===--- QualTypeNames.h - Generate Complete QualType Names ----*- C++ -*-===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// ===----------------------------------------------------------------------===//
-//
-// \file
-// Functionality to generate the fully-qualified names of QualTypes,
-// including recursively expanding any subtypes and template
-// parameters.
-//
-// More precisely: Generates a name that can be used to name the same
-// type if used at the end of the current translation unit--with
-// certain limitations. See below.
-//
-// This code desugars names only very minimally, so in this code:
-//
-// namespace A {
-//   struct X {};
-// }
-// using A::X;
-// namespace B {
-//   using std::tuple;
-//   typedef tuple<X> TX;
-//   TX t;
-// }
-//
-// B::t's type is reported as "B::TX", rather than std::tuple<A::X>.
-//
-// Also, this code replaces types found via using declarations with
-// their more qualified name, so for the code:
-//
-// using std::tuple;
-// tuple<int> TInt;
-//
-// TInt's type will be named, "std::tuple<int>".
-//
-// Limitations:
-//
-// Some types have ambiguous names at the end of a translation unit,
-// are not namable at all there, or are special cases in other ways.
-//
-// 1) Types with only local scope will have their local names:
-//
-// void foo() {
-//   struct LocalType {} LocalVar;
-// }
-//
-// LocalVar's type will be named, "struct LocalType", without any
-// qualification.
-//
-// 2) Types that have been shadowed are reported normally, but a
-// client using that name at the end of the translation unit will be
-// referring to a different type.
-//
-// ===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLING_CORE_QUALTYPENAMES_H
-#define LLVM_CLANG_TOOLING_CORE_QUALTYPENAMES_H
-
-#include "clang/AST/ASTContext.h"
-
-namespace clang {
-namespace TypeName {
-/// \brief Get the fully qualified name for a type. This includes full
-/// qualification of all template parameters etc.
-///
-/// \param[in] QT - the type for which the fully qualified name will be
-/// returned.
-/// \param[in] Ctx - the ASTContext to be used.
-/// \param[in] WithGlobalNsPrefix - If true, then the global namespace
-/// specifier "::" will be prepended to the fully qualified name.
-std::string getFullyQualifiedName(QualType QT,
-                                  const ASTContext &Ctx,
-                                  bool WithGlobalNsPrefix = false);
-}  // end namespace TypeName
-}  // end namespace clang
-#endif  // LLVM_CLANG_TOOLING_CORE_QUALTYPENAMES_H
diff --git a/include/clang/Tooling/Execution.h b/include/clang/Tooling/Execution.h
new file mode 100644
index 0000000..1a44c47
--- /dev/null
+++ b/include/clang/Tooling/Execution.h
@@ -0,0 +1,175 @@
+//===--- Execution.h - Executing clang frontend actions -*- C++ ---------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines framework for executing clang frontend actions.
+//
+//  The framework can be extended to support different execution plans including
+//  standalone execution on the given TUs or parallel execution on all TUs in
+//  the codebase.
+//
+//  In order to enable multiprocessing execution, tool actions are expected to
+//  output result into the ToolResults provided by the executor. The
+//  `ToolResults` is an interface that abstracts how results are stored e.g.
+//  in-memory for standalone execution or on-disk for large-scale execution.
+//
+//  New executors can be registered as ToolExecutorPlugins via the
+//  `ToolExecutorPluginRegistry`. CLI tools can use
+//  `createExecutorFromCommandLineArgs` to create a specific registered executor
+//  according to the command-line arguments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_EXECUTION_H
+#define LLVM_CLANG_TOOLING_EXECUTION_H
+
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Registry.h"
+
+namespace clang {
+namespace tooling {
+
+/// \brief An abstraction for the result of a tool execution. For example, the
+/// underlying result can be in-memory or on-disk.
+///
+/// Results should be string key-value pairs. For example, a refactoring tool
+/// can use source location as key and a replacement in YAML format as value.
+class ToolResults {
+public:
+  virtual ~ToolResults() = default;
+  virtual void addResult(StringRef Key, StringRef Value) = 0;
+  virtual std::vector<std::pair<std::string, std::string>> AllKVResults() = 0;
+  virtual void forEachResult(
+      llvm::function_ref<void(StringRef Key, StringRef Value)> Callback) = 0;
+};
+
+class InMemoryToolResults : public ToolResults {
+public:
+  void addResult(StringRef Key, StringRef Value) override;
+  std::vector<std::pair<std::string, std::string>> AllKVResults() override;
+  void forEachResult(llvm::function_ref<void(StringRef Key, StringRef Value)>
+                         Callback) override;
+
+private:
+  std::vector<std::pair<std::string, std::string>> KVResults;
+};
+
+/// \brief The context of an execution, including the information about
+/// compilation and results.
+class ExecutionContext {
+public:
+  virtual ~ExecutionContext() {}
+
+  /// \brief Initializes a context. This does not take ownership of `Results`.
+  explicit ExecutionContext(ToolResults *Results) : Results(Results) {}
+
+  /// \brief Adds a KV pair to the result container of this execution.
+  void reportResult(StringRef Key, StringRef Value);
+
+  // Returns the source control system's revision number if applicable.
+  // Otherwise returns an empty string.
+  virtual std::string getRevision() { return ""; }
+
+  // Returns the corpus being analyzed, e.g. "llvm" for the LLVM codebase, if
+  // applicable.
+  virtual std::string getCorpus() { return ""; }
+
+  // Returns the currently processed compilation unit if available.
+  virtual std::string getCurrentCompilationUnit() { return ""; }
+
+private:
+  ToolResults *Results;
+};
+
+/// \brief Interface for executing clang frontend actions.
+///
+/// This can be extended to support running tool actions in different
+/// execution mode, e.g. on a specific set of TUs or many TUs in parallel.
+///
+///  New executors can be registered as ToolExecutorPlugins via the
+///  `ToolExecutorPluginRegistry`. CLI tools can use
+///  `createExecutorFromCommandLineArgs` to create a specific registered
+///  executor according to the command-line arguments.
+class ToolExecutor {
+public:
+  virtual ~ToolExecutor() {}
+
+  /// \brief Returns the name of a specific executor.
+  virtual StringRef getExecutorName() const = 0;
+
+  /// \brief Executes each action with a corresponding arguments adjuster.
+  virtual llvm::Error
+  execute(llvm::ArrayRef<
+          std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+              Actions) = 0;
+
+  /// \brief Convenient functions for the above `execute`.
+  llvm::Error execute(std::unique_ptr<FrontendActionFactory> Action);
+  /// Executes an action with an argument adjuster.
+  llvm::Error execute(std::unique_ptr<FrontendActionFactory> Action,
+                      ArgumentsAdjuster Adjuster);
+
+  /// \brief Returns a reference to the execution context.
+  ///
+  /// This should be passed to tool callbacks, and tool callbacks should report
+  /// results via the returned context.
+  virtual ExecutionContext *getExecutionContext() = 0;
+
+  /// \brief Returns a reference to the result container.
+  ///
+  /// NOTE: This should only be used after the execution finishes. Tool
+  /// callbacks should report results via `ExecutionContext` instead.
+  virtual ToolResults *getToolResults() = 0;
+
+  /// \brief Map a virtual file to be used while running the tool.
+  ///
+  /// \param FilePath The path at which the content will be mapped.
+  /// \param Content A buffer of the file's content.
+  virtual void mapVirtualFile(StringRef FilePath, StringRef Content) = 0;
+};
+
+/// \brief Interface for factories that create specific executors. This is also
+/// used as a plugin to be registered into ToolExecutorPluginRegistry.
+class ToolExecutorPlugin {
+public:
+  virtual ~ToolExecutorPlugin() {}
+
+  /// \brief Create an `ToolExecutor`.
+  ///
+  /// `OptionsParser` can be consumed (e.g. moved) if the creation succeeds.
+  virtual llvm::Expected<std::unique_ptr<ToolExecutor>>
+  create(CommonOptionsParser &OptionsParser) = 0;
+};
+
+/// \brief This creates a ToolExecutor that is in the global registry based on
+/// commandline arguments.
+///
+/// This picks the right executor based on the `--executor` option. This parses
+/// the commandline arguments with `CommonOptionsParser`, so caller does not
+/// need to parse again.
+///
+/// By default, this creates a `StandaloneToolExecutor` ("standalone") if
+/// `--executor` is not provided.
+llvm::Expected<std::unique_ptr<ToolExecutor>>
+createExecutorFromCommandLineArgs(int &argc, const char **argv,
+                                  llvm::cl::OptionCategory &Category,
+                                  const char *Overview = nullptr);
+
+namespace internal {
+llvm::Expected<std::unique_ptr<ToolExecutor>>
+createExecutorFromCommandLineArgsImpl(int &argc, const char **argv,
+                                      llvm::cl::OptionCategory &Category,
+                                      const char *Overview = nullptr);
+} // end namespace internal
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_EXECUTION_H
diff --git a/include/clang/Tooling/Refactoring/ASTSelection.h b/include/clang/Tooling/Refactoring/ASTSelection.h
index 17a97d2..aa02a68 100644
--- a/include/clang/Tooling/Refactoring/ASTSelection.h
+++ b/include/clang/Tooling/Refactoring/ASTSelection.h
@@ -115,6 +115,21 @@
     return SelectedNode.get().Children[I].Node.get<Stmt>();
   }
 
+  /// Returns true when a selected code range is in a function-like body
+  /// of code, like a function, method or a block.
+  ///
+  /// This function can be used to test against selected expressions that are
+  /// located outside of a function, e.g. global variable initializers, default
+  /// argument values, or even template arguments.
+  ///
+  /// Use the \c getFunctionLikeNearestParent to get the function-like parent
+  /// declaration.
+  bool isInFunctionLikeBodyOfCode() const;
+
+  /// Returns the nearest function-like parent declaration or null if such
+  /// declaration doesn't exist.
+  const Decl *getFunctionLikeNearestParent() const;
+
   static Optional<CodeRangeASTSelection>
   create(SourceRange SelectionRange, const SelectedASTNode &ASTSelection);
 
diff --git a/include/clang/Tooling/Refactoring/Extract/Extract.h b/include/clang/Tooling/Refactoring/Extract/Extract.h
new file mode 100644
index 0000000..2fd76d2
--- /dev/null
+++ b/include/clang/Tooling/Refactoring/Extract/Extract.h
@@ -0,0 +1,53 @@
+//===--- Extract.h - Clang refactoring library ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H
+#define LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H
+
+#include "clang/Tooling/Refactoring/ASTSelection.h"
+#include "clang/Tooling/Refactoring/RefactoringActionRules.h"
+
+namespace clang {
+namespace tooling {
+
+/// An "Extract Function" refactoring moves code into a new function that's
+/// then called from the place where the original code was.
+class ExtractFunction final : public SourceChangeRefactoringRule {
+public:
+  /// Initiates the extract function refactoring operation.
+  ///
+  /// \param Code     The selected set of statements.
+  /// \param DeclName The name name of the extract function. If None,
+  ///                 "extracted" is used.
+  static Expected<ExtractFunction> initiate(RefactoringRuleContext &Context,
+                                            CodeRangeASTSelection Code,
+                                            Optional<std::string> DeclName);
+
+  static const RefactoringDescriptor &describe();
+
+private:
+  ExtractFunction(CodeRangeASTSelection Code, Optional<std::string> DeclName)
+      : Code(std::move(Code)),
+        DeclName(DeclName ? std::move(*DeclName) : "extracted") {}
+
+  Expected<AtomicChanges>
+  createSourceReplacements(RefactoringRuleContext &Context) override;
+
+  CodeRangeASTSelection Code;
+
+  // FIXME: Account for naming collisions:
+  //  - error when name is specified by user.
+  //  - rename to "extractedN" when name is implicit.
+  std::string DeclName;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H
diff --git a/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h b/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h
index 8b01a61..d96ad78 100644
--- a/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h
+++ b/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h
@@ -70,6 +70,18 @@
     return visit(Expr->getFoundDecl().getDecl(), Expr->getMemberLoc());
   }
 
+  bool VisitOffsetOfExpr(const OffsetOfExpr *S) {
+    for (unsigned I = 0, E = S->getNumComponents(); I != E; ++I) {
+      const OffsetOfNode &Component = S->getComponent(I);
+      if (Component.getKind() == OffsetOfNode::Field) {
+        if (!visit(Component.getField(), Component.getLocEnd()))
+          return false;
+      }
+      // FIXME: Try to resolve dependent field references.
+    }
+    return true;
+  }
+
   // Other visitors:
 
   bool VisitTypeLoc(const TypeLoc Loc) {
diff --git a/include/clang/Tooling/Refactoring/RefactoringActionRegistry.def b/include/clang/Tooling/Refactoring/RefactoringActionRegistry.def
deleted file mode 100644
index a1191f1..0000000
--- a/include/clang/Tooling/Refactoring/RefactoringActionRegistry.def
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef REFACTORING_ACTION
-#define REFACTORING_ACTION(Name)
-#endif
-
-REFACTORING_ACTION(LocalRename)
-
-#undef REFACTORING_ACTION
diff --git a/include/clang/Tooling/Refactoring/RefactoringActionRule.h b/include/clang/Tooling/Refactoring/RefactoringActionRule.h
index 294ccc3..4130e29 100644
--- a/include/clang/Tooling/Refactoring/RefactoringActionRule.h
+++ b/include/clang/Tooling/Refactoring/RefactoringActionRule.h
@@ -11,6 +11,8 @@
 #define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include <vector>
 
 namespace clang {
@@ -20,6 +22,15 @@
 class RefactoringResultConsumer;
 class RefactoringRuleContext;
 
+struct RefactoringDescriptor {
+  /// A unique identifier for the specific refactoring.
+  StringRef Name;
+  /// A human readable title for the refactoring.
+  StringRef Title;
+  /// A human readable description of what the refactoring does.
+  StringRef Description;
+};
+
 /// A common refactoring action rule interface that defines the 'invoke'
 /// function that performs the refactoring operation (either fully or
 /// partially).
@@ -33,6 +44,9 @@
   /// consumer to propagate the result of the refactoring action.
   virtual void invoke(RefactoringResultConsumer &Consumer,
                       RefactoringRuleContext &Context) = 0;
+
+  /// Returns the structure that describes the refactoring.
+  // static const RefactoringDescriptor &describe() = 0;
 };
 
 /// A refactoring action rule is a wrapper class around a specific refactoring
diff --git a/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h b/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h
index 36c982b..355a6a5 100644
--- a/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h
+++ b/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_REQUIREMENTS_H
 
 #include "clang/Basic/LLVM.h"
+#include "clang/Tooling/Refactoring/ASTSelection.h"
 #include "clang/Tooling/Refactoring/RefactoringDiagnostic.h"
 #include "clang/Tooling/Refactoring/RefactoringOption.h"
 #include "clang/Tooling/Refactoring/RefactoringRuleContext.h"
@@ -52,6 +53,31 @@
   }
 };
 
+/// An AST selection requirement is satisfied when any portion of the AST
+/// overlaps with the selection range.
+///
+/// The requirement will be evaluated only once during the initiation and
+/// search of matching refactoring action rules.
+class ASTSelectionRequirement : public SourceRangeSelectionRequirement {
+public:
+  Expected<SelectedASTNode> evaluate(RefactoringRuleContext &Context) const;
+};
+
+/// A selection requirement that is satisfied when the selection range overlaps
+/// with a number of neighbouring statements in the AST. The statemenst must be
+/// contained in declaration like a function. The selection range must be a
+/// non-empty source selection (i.e. cursors won't be accepted).
+///
+/// The requirement will be evaluated only once during the initiation and search
+/// of matching refactoring action rules.
+///
+/// \see CodeRangeASTSelection
+class CodeRangeASTSelectionRequirement : public ASTSelectionRequirement {
+public:
+  Expected<CodeRangeASTSelection>
+  evaluate(RefactoringRuleContext &Context) const;
+};
+
 /// A base class for any requirement that requires some refactoring options.
 class RefactoringOptionsRequirement : public RefactoringActionRuleRequirement {
 public:
diff --git a/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h b/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h
index cf6147c..75b6c8f 100644
--- a/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h
+++ b/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h
@@ -57,7 +57,11 @@
     return Consumer.handleError(std::move(Err));
   // Construct the target action rule by extracting the evaluated
   // requirements from Expected<> wrappers and then run it.
-  RuleType((*std::get<Is>(Values))...).invoke(Consumer, Context);
+  auto Rule =
+      RuleType::initiate(Context, std::move((*std::get<Is>(Values)))...);
+  if (!Rule)
+    return Consumer.handleError(Rule.takeError());
+  Rule->invoke(Consumer, Context);
 }
 
 inline void visitRefactoringOptionsImpl(RefactoringOptionVisitor &) {}
@@ -141,7 +145,6 @@
           Visitor, Requirements,
           llvm::index_sequence_for<RequirementTypes...>());
     }
-
   private:
     std::tuple<RequirementTypes...> Requirements;
   };
diff --git a/include/clang/Tooling/Refactoring/RefactoringRuleContext.h b/include/clang/Tooling/Refactoring/RefactoringRuleContext.h
index 54ea06d..882ab82 100644
--- a/include/clang/Tooling/Refactoring/RefactoringRuleContext.h
+++ b/include/clang/Tooling/Refactoring/RefactoringRuleContext.h
@@ -12,6 +12,7 @@
 
 #include "clang/Basic/DiagnosticError.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Tooling/Refactoring/ASTSelection.h"
 
 namespace clang {
 
@@ -62,6 +63,10 @@
     return createDiagnosticError(SourceLocation(), DiagID);
   }
 
+  void setASTSelection(std::unique_ptr<SelectedASTNode> Node) {
+    ASTNodeSelection = std::move(Node);
+  }
+
 private:
   /// The source manager for the translation unit / file on which a refactoring
   /// action might operate on.
@@ -74,6 +79,9 @@
   ASTContext *AST = nullptr;
   /// The allocator for diagnostics.
   PartialDiagnostic::StorageAllocator DiagStorage;
+
+  // FIXME: Remove when memoized.
+  std::unique_ptr<SelectedASTNode> ASTNodeSelection;
 };
 
 } // end namespace tooling
diff --git a/include/clang/Tooling/Refactoring/Rename/RenamingAction.h b/include/clang/Tooling/Refactoring/Rename/RenamingAction.h
index f2d9a7b..d32ed64 100644
--- a/include/clang/Tooling/Refactoring/Rename/RenamingAction.h
+++ b/include/clang/Tooling/Refactoring/Rename/RenamingAction.h
@@ -17,6 +17,7 @@
 
 #include "clang/Tooling/Refactoring.h"
 #include "clang/Tooling/Refactoring/AtomicChange.h"
+#include "clang/Tooling/Refactoring/RefactoringActionRules.h"
 #include "clang/Tooling/Refactoring/RefactoringOptions.h"
 #include "clang/Tooling/Refactoring/Rename/SymbolOccurrences.h"
 #include "llvm/Support/Error.h"
@@ -46,12 +47,45 @@
   bool PrintLocations;
 };
 
-class NewNameOption : public RequiredRefactoringOption<std::string> {
+class RenameOccurrences final : public SourceChangeRefactoringRule {
 public:
-  StringRef getName() const override { return "new-name"; }
-  StringRef getDescription() const override {
-    return "The new name to change the symbol to";
-  }
+  static Expected<RenameOccurrences> initiate(RefactoringRuleContext &Context,
+                                              SourceRange SelectionRange,
+                                              std::string NewName);
+
+  static const RefactoringDescriptor &describe();
+
+private:
+  RenameOccurrences(const NamedDecl *ND, std::string NewName)
+      : ND(ND), NewName(std::move(NewName)) {}
+
+  Expected<AtomicChanges>
+  createSourceReplacements(RefactoringRuleContext &Context) override;
+
+  const NamedDecl *ND;
+  std::string NewName;
+};
+
+class QualifiedRenameRule final : public SourceChangeRefactoringRule {
+public:
+  static Expected<QualifiedRenameRule> initiate(RefactoringRuleContext &Context,
+                                                std::string OldQualifiedName,
+                                                std::string NewQualifiedName);
+
+  static const RefactoringDescriptor &describe();
+
+private:
+  QualifiedRenameRule(const NamedDecl *ND,
+                      std::string NewQualifiedName)
+      : ND(ND), NewQualifiedName(std::move(NewQualifiedName)) {}
+
+  Expected<AtomicChanges>
+  createSourceReplacements(RefactoringRuleContext &Context) override;
+
+  // A NamedDecl which indentifies the symbol being renamed.
+  const NamedDecl *ND;
+  // The new qualified name to change the symbol to.
+  std::string NewQualifiedName;
 };
 
 /// Returns source replacements that correspond to the rename of the given
diff --git a/include/clang/Tooling/StandaloneExecution.h b/include/clang/Tooling/StandaloneExecution.h
new file mode 100644
index 0000000..f5f32d7
--- /dev/null
+++ b/include/clang/Tooling/StandaloneExecution.h
@@ -0,0 +1,97 @@
+//===--- StandaloneExecution.h - Standalone execution. -*- C++ ----------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines standalone execution of clang tools.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_STANDALONEEXECUTION_H
+#define LLVM_CLANG_TOOLING_STANDALONEEXECUTION_H
+
+#include "clang/Tooling/ArgumentsAdjusters.h"
+#include "clang/Tooling/Execution.h"
+
+namespace clang {
+namespace tooling {
+
+/// \brief A standalone executor that runs FrontendActions on a given set of
+/// TUs in sequence.
+///
+/// By default, this executor uses the following arguments adjusters (as defined
+/// in `clang/Tooling/ArgumentsAdjusters.h`):
+///   - `getClangStripOutputAdjuster()`
+///   - `getClangSyntaxOnlyAdjuster()`
+///   - `getClangStripDependencyFileAdjuster()`
+class StandaloneToolExecutor : public ToolExecutor {
+public:
+  static const char *ExecutorName;
+
+  /// \brief Init with \p CompilationDatabase and the paths of all files to be
+  /// proccessed.
+  StandaloneToolExecutor(
+      const CompilationDatabase &Compilations,
+      llvm::ArrayRef<std::string> SourcePaths,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+          std::make_shared<PCHContainerOperations>());
+
+  /// \brief Init with \p CommonOptionsParser. This is expected to be used by
+  /// `createExecutorFromCommandLineArgs` based on commandline options.
+  ///
+  /// The executor takes ownership of \p Options.
+  StandaloneToolExecutor(
+      CommonOptionsParser Options,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+          std::make_shared<PCHContainerOperations>());
+
+  StringRef getExecutorName() const override { return ExecutorName; }
+
+  using ToolExecutor::execute;
+
+  llvm::Error
+  execute(llvm::ArrayRef<
+          std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+              Actions) override;
+
+  /// \brief Set a \c DiagnosticConsumer to use during parsing.
+  void setDiagnosticConsumer(DiagnosticConsumer *DiagConsumer) {
+    Tool.setDiagnosticConsumer(DiagConsumer);
+  }
+
+  ExecutionContext *getExecutionContext() override { return &Context; };
+
+  ToolResults *getToolResults() override { return &Results; }
+
+  llvm::ArrayRef<std::string> getSourcePaths() const {
+    return Tool.getSourcePaths();
+  }
+
+  void mapVirtualFile(StringRef FilePath, StringRef Content) override {
+    Tool.mapVirtualFile(FilePath, Content);
+  }
+
+  /// \brief Returns the file manager used in the tool.
+  ///
+  /// The file manager is shared between all translation units.
+  FileManager &getFiles() { return Tool.getFiles(); }
+
+private:
+  // Used to store the parser when the executor is initialized with parser.
+  llvm::Optional<CommonOptionsParser> OptionsParser;
+  // FIXME: The standalone executor is currently just a wrapper of `ClangTool`.
+  // Merge `ClangTool` implementation into the this.
+  ClangTool Tool;
+  ExecutionContext Context;
+  InMemoryToolResults Results;
+  ArgumentsAdjuster ArgsAdjuster;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_STANDALONEEXECUTION_H
diff --git a/include/clang/Tooling/ToolExecutorPluginRegistry.h b/include/clang/Tooling/ToolExecutorPluginRegistry.h
new file mode 100644
index 0000000..11ba895
--- /dev/null
+++ b/include/clang/Tooling/ToolExecutorPluginRegistry.h
@@ -0,0 +1,24 @@
+//===--- ToolExecutorPluginRegistry.h - -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_TOOLEXECUTORPLUGINREGISTRY_H
+#define LLVM_CLANG_TOOLING_TOOLEXECUTORPLUGINREGISTRY_H
+
+#include "clang/Tooling/Execution.h"
+#include "llvm/Support/Registry.h"
+
+namespace clang {
+namespace tooling {
+
+typedef llvm::Registry<ToolExecutorPlugin> ToolExecutorPluginRegistry;
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_TOOLEXECUTORPLUGINREGISTRY_H
diff --git a/include/clang/Tooling/Tooling.h b/include/clang/Tooling/Tooling.h
index 6f9bc9e..8ddfef3 100644
--- a/include/clang/Tooling/Tooling.h
+++ b/include/clang/Tooling/Tooling.h
@@ -31,12 +31,12 @@
 #define LLVM_CLANG_TOOLING_TOOLING_H
 
 #include "clang/AST/ASTConsumer.h"
-#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Driver/Util.h"
 #include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
@@ -296,10 +296,14 @@
   ///        not found in Compilations, it is skipped.
   /// \param PCHContainerOps The PCHContainerOperations for loading and creating
   /// clang modules.
+  /// \param BaseFS VFS used for all underlying file accesses when running the
+  /// tool.
   ClangTool(const CompilationDatabase &Compilations,
             ArrayRef<std::string> SourcePaths,
             std::shared_ptr<PCHContainerOperations> PCHContainerOps =
-                std::make_shared<PCHContainerOperations>());
+                std::make_shared<PCHContainerOperations>(),
+            IntrusiveRefCntPtr<vfs::FileSystem> BaseFS =
+                vfs::getRealFileSystem());
 
   ~ClangTool();
 
@@ -326,6 +330,9 @@
   /// Runs an action over all files specified in the command line.
   ///
   /// \param Action Tool action.
+  ///
+  /// \returns 0 on success; 1 if any error occured; 2 if there is no error but
+  /// some files are skipped due to missing compile commands.
   int run(ToolAction *Action);
 
   /// \brief Create an AST for each file specified in the command line and
@@ -337,7 +344,9 @@
   /// The file manager is shared between all translation units.
   FileManager &getFiles() { return *Files; }
 
- private:
+  llvm::ArrayRef<std::string> getSourcePaths() const { return SourcePaths; }
+
+private:
   const CompilationDatabase &Compilations;
   std::vector<std::string> SourcePaths;
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap
index 1e32621..4097ad2 100644
--- a/include/clang/module.modulemap
+++ b/include/clang/module.modulemap
@@ -49,6 +49,7 @@
   textual header "Basic/OperatorKinds.def"
   textual header "Basic/Sanitizers.def"
   textual header "Basic/TokenKinds.def"
+  textual header "Basic/X86Target.def"
 
   module * { export * }
 }
@@ -146,8 +147,6 @@
   // importing the AST matchers library gives a link dependency on the AST
   // matchers (and thus the AST), which clang-format should not have.
   exclude header "Tooling/RefactoringCallbacks.h"
-
-  textual header "Tooling/Refactoring/RefactoringActionRegistry.def"
 }
 
 module Clang_ToolingCore {
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 15aa453..5039be3 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -1,4 +1,4 @@
-//===--- ASTContext.cpp - Context to hold long-lived AST nodes ------------===//
+//===- ASTContext.cpp - Context to hold long-lived AST nodes --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,33 +13,83 @@
 
 #include "clang/AST/ASTContext.h"
 #include "CXXABI.h"
+#include "clang/AST/APValue.h"
 #include "clang/AST/ASTMutationListener.h"
+#include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/AttrIterator.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Comment.h"
-#include "clang/AST/CommentCommandTraits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclContextInternals.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/MangleNumberingContext.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/RawCommentList.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/AST/UnresolvedSet.h"
 #include "clang/AST/VTableBuilder.h"
+#include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/CommentOptions.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Linkage.h"
+#include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/SanitizerBlacklist.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/XRayLists.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Capacity.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
 #include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
 
 using namespace clang;
 
@@ -176,8 +226,7 @@
     // for is usually among the last two comments we parsed -- check them
     // first.
     RawComment CommentAtDeclLoc(
-        SourceMgr, SourceRange(DeclLoc), false,
-        LangOpts.CommentOpts.ParseAllComments);
+        SourceMgr, SourceRange(DeclLoc), LangOpts.CommentOpts, false);
     BeforeThanCompare<RawComment> Compare(SourceMgr);
     ArrayRef<RawComment *>::iterator MaybeBeforeDecl = RawComments.end() - 1;
     bool Found = Compare(*MaybeBeforeDecl, &CommentAtDeclLoc);
@@ -203,7 +252,8 @@
 
   // First check whether we have a trailing comment.
   if (Comment != RawComments.end() &&
-      (*Comment)->isDocumentation() && (*Comment)->isTrailingComment() &&
+      ((*Comment)->isDocumentation() || LangOpts.CommentOpts.ParseAllComments)
+      && (*Comment)->isTrailingComment() &&
       (isa<FieldDecl>(D) || isa<EnumConstantDecl>(D) || isa<VarDecl>(D) ||
        isa<ObjCMethodDecl>(D) || isa<ObjCPropertyDecl>(D))) {
     std::pair<FileID, unsigned> CommentBeginDecomp
@@ -225,7 +275,9 @@
   --Comment;
 
   // Check that we actually have a non-member Doxygen comment.
-  if (!(*Comment)->isDocumentation() || (*Comment)->isTrailingComment())
+  if (!((*Comment)->isDocumentation() ||
+        LangOpts.CommentOpts.ParseAllComments) ||
+      (*Comment)->isTrailingComment())
     return nullptr;
 
   // Decompose the end of the comment.
@@ -256,11 +308,10 @@
   return *Comment;
 }
 
-namespace {
 /// If we have a 'templated' declaration for a template, adjust 'D' to
 /// refer to the actual template.
 /// If we have an implicit instantiation, adjust 'D' to refer to template.
-const Decl *adjustDeclToTemplate(const Decl *D) {
+static const Decl *adjustDeclToTemplate(const Decl *D) {
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     // Is this function declaration part of a function template?
     if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
@@ -327,7 +378,6 @@
   // FIXME: Adjust alias templates?
   return D;
 }
-} // anonymous namespace
 
 const RawComment *ASTContext::getRawCommentForAnyRedecl(
                                                 const Decl *D,
@@ -380,7 +430,7 @@
   }
 
   // If we found a comment, it should be a documentation comment.
-  assert(!RC || RC->isDocumentation());
+  assert(!RC || RC->isDocumentation() || LangOpts.CommentOpts.ParseAllComments);
 
   if (OriginalDecl)
     *OriginalDecl = OriginalDeclForRC;
@@ -737,25 +787,12 @@
                        Builtin::Context &builtins)
     : FunctionProtoTypes(this_()), TemplateSpecializationTypes(this_()),
       DependentTemplateSpecializationTypes(this_()),
-      SubstTemplateTemplateParmPacks(this_()),
-      GlobalNestedNameSpecifier(nullptr), Int128Decl(nullptr),
-      UInt128Decl(nullptr), BuiltinVaListDecl(nullptr),
-      BuiltinMSVaListDecl(nullptr), ObjCIdDecl(nullptr), ObjCSelDecl(nullptr),
-      ObjCClassDecl(nullptr), ObjCProtocolClassDecl(nullptr), BOOLDecl(nullptr),
-      CFConstantStringTagDecl(nullptr), CFConstantStringTypeDecl(nullptr),
-      ObjCInstanceTypeDecl(nullptr), FILEDecl(nullptr), jmp_bufDecl(nullptr),
-      sigjmp_bufDecl(nullptr), ucontext_tDecl(nullptr),
-      BlockDescriptorType(nullptr), BlockDescriptorExtendedType(nullptr),
-      cudaConfigureCallDecl(nullptr), FirstLocalImport(), LastLocalImport(),
-      ExternCContext(nullptr), MakeIntegerSeqDecl(nullptr),
-      TypePackElementDecl(nullptr), SourceMgr(SM), LangOpts(LOpts),
+      SubstTemplateTemplateParmPacks(this_()), SourceMgr(SM), LangOpts(LOpts),
       SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)),
       XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles,
                                         LangOpts.XRayNeverInstrumentFiles, SM)),
-      AddrSpaceMap(nullptr), Target(nullptr), AuxTarget(nullptr),
       PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
-      BuiltinInfo(builtins), DeclarationNames(*this), ExternalSource(nullptr),
-      Listener(nullptr), Comments(SM), CommentsLoaded(false),
+      BuiltinInfo(builtins), DeclarationNames(*this), Comments(SM),
       CommentCommandTraits(BumpAlloc, LOpts.CommentOpts), LastSDM(nullptr, 0) {
   TUDecl = TranslationUnitDecl::Create(*this);
 }
@@ -1220,7 +1257,7 @@
 }
 
 /// \brief Erase the attributes corresponding to the given declaration.
-void ASTContext::eraseDeclAttrs(const Decl *D) { 
+void ASTContext::eraseDeclAttrs(const Decl *D) {
   llvm::DenseMap<const Decl*, AttrVec*>::iterator Pos = DeclAttrs.find(D);
   if (Pos != DeclAttrs.end()) {
     Pos->second->~AttrVec();
@@ -1343,35 +1380,27 @@
 
 ASTContext::overridden_cxx_method_iterator
 ASTContext::overridden_methods_begin(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
-      OverriddenMethods.find(Method->getCanonicalDecl());
-  if (Pos == OverriddenMethods.end())
-    return nullptr;
-  return Pos->second.begin();
+  return overridden_methods(Method).begin();
 }
 
 ASTContext::overridden_cxx_method_iterator
 ASTContext::overridden_methods_end(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
-      OverriddenMethods.find(Method->getCanonicalDecl());
-  if (Pos == OverriddenMethods.end())
-    return nullptr;
-  return Pos->second.end();
+  return overridden_methods(Method).end();
 }
 
 unsigned
 ASTContext::overridden_methods_size(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
-      OverriddenMethods.find(Method->getCanonicalDecl());
-  if (Pos == OverriddenMethods.end())
-    return 0;
-  return Pos->second.size();
+  auto Range = overridden_methods(Method);
+  return Range.end() - Range.begin();
 }
 
 ASTContext::overridden_method_range
 ASTContext::overridden_methods(const CXXMethodDecl *Method) const {
-  return overridden_method_range(overridden_methods_begin(Method),
-                                 overridden_methods_end(Method));
+  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
+      OverriddenMethods.find(Method->getCanonicalDecl());
+  if (Pos == OverriddenMethods.end())
+    return overridden_method_range(nullptr, nullptr);
+  return overridden_method_range(Pos->second.begin(), Pos->second.end());
 }
 
 void ASTContext::addOverriddenMethod(const CXXMethodDecl *Method, 
@@ -1463,7 +1492,6 @@
   // else about the declaration and its type.
   if (UseAlignAttrOnly) {
     // do nothing
-
   } else if (const ValueDecl *VD = dyn_cast<ValueDecl>(D)) {
     QualType T = VD->getType();
     if (const ReferenceType *RT = T->getAs<ReferenceType>()) {
@@ -1704,7 +1732,6 @@
       Width = 0;
       Align = 8;
       break;
-
     case BuiltinType::Bool:
       Width = Target->getBoolWidth();
       Align = Target->getBoolAlign();
@@ -1793,7 +1820,8 @@
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
     case BuiltinType::Id:
 #include "clang/Basic/OpenCLImageTypes.def"
-      AS = getTargetAddressSpace(Target->getOpenCLTypeAddrSpace(T));
+      AS = getTargetAddressSpace(
+          Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T)));
       Width = Target->getPointerWidth(AS);
       Align = Target->getPointerAlign(AS);
       break;
@@ -1803,30 +1831,29 @@
     Width = Target->getPointerWidth(0);
     Align = Target->getPointerAlign(0);
     break;
-  case Type::BlockPointer: {
+  case Type::BlockPointer:
     AS = getTargetAddressSpace(cast<BlockPointerType>(T)->getPointeeType());
     Width = Target->getPointerWidth(AS);
     Align = Target->getPointerAlign(AS);
     break;
-  }
   case Type::LValueReference:
-  case Type::RValueReference: {
+  case Type::RValueReference:
     // alignof and sizeof should never enter this code path here, so we go
     // the pointer route.
     AS = getTargetAddressSpace(cast<ReferenceType>(T)->getPointeeType());
     Width = Target->getPointerWidth(AS);
     Align = Target->getPointerAlign(AS);
     break;
-  }
-  case Type::Pointer: {
+  case Type::Pointer:
     AS = getTargetAddressSpace(cast<PointerType>(T)->getPointeeType());
     Width = Target->getPointerWidth(AS);
     Align = Target->getPointerAlign(AS);
     break;
-  }
   case Type::MemberPointer: {
     const MemberPointerType *MPT = cast<MemberPointerType>(T);
-    std::tie(Width, Align) = ABI->getMemberPointerWidthAndAlign(MPT);
+    CXXABI::MemberPointerInfo MPI = ABI->getMemberPointerInfo(MPT);
+    Width = MPI.Width;
+    Align = MPI.Align;
     break;
   }
   case Type::Complex: {
@@ -1941,11 +1968,10 @@
   }
   break;
 
-  case Type::Pipe: {
+  case Type::Pipe:
     Width = Target->getPointerWidth(getTargetAddressSpace(LangAS::opencl_global));
     Align = Target->getPointerAlign(getTargetAddressSpace(LangAS::opencl_global));
-  }
-
+    break;
   }
 
   assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
@@ -2058,7 +2084,6 @@
 /// super class and then collects all ivars, including those synthesized for
 /// current class. This routine is used for implementation of current class
 /// when all ivars, declared and synthesized are known.
-///
 void ASTContext::DeepCollectObjCIvars(const ObjCInterfaceDecl *OI,
                                       bool leafClass,
                             SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const {
@@ -2110,6 +2135,171 @@
   }
 }
 
+static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
+                                                const RecordDecl *RD) {
+  assert(RD->isUnion() && "Must be union type");
+  CharUnits UnionSize = Context.getTypeSizeInChars(RD->getTypeForDecl());
+
+  for (const auto *Field : RD->fields()) {
+    if (!Context.hasUniqueObjectRepresentations(Field->getType()))
+      return false;
+    CharUnits FieldSize = Context.getTypeSizeInChars(Field->getType());
+    if (FieldSize != UnionSize)
+      return false;
+  }
+  return !RD->field_empty();
+}
+
+static bool isStructEmpty(QualType Ty) {
+  const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl();
+
+  if (!RD->field_empty())
+    return false;
+
+  if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD))
+    return ClassDecl->isEmpty();
+
+  return true;
+}
+
+static llvm::Optional<int64_t>
+structHasUniqueObjectRepresentations(const ASTContext &Context,
+                                     const RecordDecl *RD) {
+  assert(!RD->isUnion() && "Must be struct/class type");
+  const auto &Layout = Context.getASTRecordLayout(RD);
+
+  int64_t CurOffsetInBits = 0;
+  if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD)) {
+    if (ClassDecl->isDynamicClass())
+      return llvm::None;
+
+    SmallVector<std::pair<QualType, int64_t>, 4> Bases;
+    for (const auto Base : ClassDecl->bases()) {
+      // Empty types can be inherited from, and non-empty types can potentially
+      // have tail padding, so just make sure there isn't an error.
+      if (!isStructEmpty(Base.getType())) {
+        llvm::Optional<int64_t> Size = structHasUniqueObjectRepresentations(
+            Context, Base.getType()->getAs<RecordType>()->getDecl());
+        if (!Size)
+          return llvm::None;
+        Bases.emplace_back(Base.getType(), Size.getValue());
+      }
+    }
+
+    std::sort(
+        Bases.begin(), Bases.end(), [&](const std::pair<QualType, int64_t> &L,
+                                        const std::pair<QualType, int64_t> &R) {
+          return Layout.getBaseClassOffset(L.first->getAsCXXRecordDecl()) <
+                 Layout.getBaseClassOffset(R.first->getAsCXXRecordDecl());
+        });
+
+    for (const auto Base : Bases) {
+      int64_t BaseOffset = Context.toBits(
+          Layout.getBaseClassOffset(Base.first->getAsCXXRecordDecl()));
+      int64_t BaseSize = Base.second;
+      if (BaseOffset != CurOffsetInBits)
+        return llvm::None;
+      CurOffsetInBits = BaseOffset + BaseSize;
+    }
+  }
+
+  for (const auto *Field : RD->fields()) {
+    if (!Field->getType()->isReferenceType() &&
+        !Context.hasUniqueObjectRepresentations(Field->getType()))
+      return llvm::None;
+
+    int64_t FieldSizeInBits =
+        Context.toBits(Context.getTypeSizeInChars(Field->getType()));
+    if (Field->isBitField()) {
+      int64_t BitfieldSize = Field->getBitWidthValue(Context);
+
+      if (BitfieldSize > FieldSizeInBits)
+        return llvm::None;
+      FieldSizeInBits = BitfieldSize;
+    }
+
+    int64_t FieldOffsetInBits = Context.getFieldOffset(Field);
+
+    if (FieldOffsetInBits != CurOffsetInBits)
+      return llvm::None;
+
+    CurOffsetInBits = FieldSizeInBits + FieldOffsetInBits;
+  }
+
+  return CurOffsetInBits;
+}
+
+bool ASTContext::hasUniqueObjectRepresentations(QualType Ty) const {
+  // C++17 [meta.unary.prop]:
+  //   The predicate condition for a template specialization
+  //   has_unique_object_representations<T> shall be
+  //   satisfied if and only if:
+  //     (9.1) - T is trivially copyable, and
+  //     (9.2) - any two objects of type T with the same value have the same
+  //     object representation, where two objects
+  //   of array or non-union class type are considered to have the same value
+  //   if their respective sequences of
+  //   direct subobjects have the same values, and two objects of union type
+  //   are considered to have the same
+  //   value if they have the same active member and the corresponding members
+  //   have the same value.
+  //   The set of scalar types for which this condition holds is
+  //   implementation-defined. [ Note: If a type has padding
+  //   bits, the condition does not hold; otherwise, the condition holds true
+  //   for unsigned integral types. -- end note ]
+  assert(!Ty.isNull() && "Null QualType sent to unique object rep check");
+
+  // Arrays are unique only if their element type is unique.
+  if (Ty->isArrayType())
+    return hasUniqueObjectRepresentations(getBaseElementType(Ty));
+
+  // (9.1) - T is trivially copyable...
+  if (!Ty.isTriviallyCopyableType(*this))
+    return false;
+
+  // All integrals and enums are unique.
+  if (Ty->isIntegralOrEnumerationType())
+    return true;
+
+  // All other pointers are unique.
+  if (Ty->isPointerType())
+    return true;
+
+  if (Ty->isMemberPointerType()) {
+    const MemberPointerType *MPT = Ty->getAs<MemberPointerType>();
+    return !ABI->getMemberPointerInfo(MPT).HasPadding;
+  }
+
+  if (Ty->isRecordType()) {
+    const RecordDecl *Record = Ty->getAs<RecordType>()->getDecl();
+
+    if (Record->isInvalidDecl())
+      return false;
+
+    if (Record->isUnion())
+      return unionHasUniqueObjectRepresentations(*this, Record);
+
+    Optional<int64_t> StructSize =
+        structHasUniqueObjectRepresentations(*this, Record);
+
+    return StructSize &&
+           StructSize.getValue() == static_cast<int64_t>(getTypeSize(Ty));
+  }
+
+  // FIXME: More cases to handle here (list by rsmith):
+  // vectors (careful about, eg, vector of 3 foo)
+  // _Complex int and friends
+  // _Atomic T
+  // Obj-C block pointers
+  // Obj-C object pointers
+  // and perhaps OpenCL's various builtin types (pipe, sampler_t, event_t,
+  // clk_event_t, queue_t, reserve_id_t)
+  // There're also Obj-C class types and the Obj-C selector type, but I think it
+  // makes sense for those to return false here.
+
+  return false;
+}
+
 unsigned ASTContext::CountNonClassIvars(const ObjCInterfaceDecl *OI) const {
   unsigned count = 0;  
   // Count ivars declared in class extension.
@@ -2142,7 +2332,8 @@
   return false;
 }
 
-/// \brief Get the implementation of ObjCInterfaceDecl,or NULL if none exists.
+/// \brief Get the implementation of ObjCInterfaceDecl, or nullptr if none
+/// exists.
 ObjCImplementationDecl *ASTContext::getObjCImplementation(ObjCInterfaceDecl *D) {
   llvm::DenseMap<ObjCContainerDecl*, ObjCImplDecl*>::iterator
     I = ObjCImpls.find(D);
@@ -2150,7 +2341,9 @@
     return cast<ObjCImplementationDecl>(I->second);
   return nullptr;
 }
-/// \brief Get the implementation of ObjCCategoryDecl, or NULL if none exists.
+
+/// \brief Get the implementation of ObjCCategoryDecl, or nullptr if none
+/// exists.
 ObjCCategoryImplDecl *ASTContext::getObjCImplementation(ObjCCategoryDecl *D) {
   llvm::DenseMap<ObjCContainerDecl*, ObjCImplDecl*>::iterator
     I = ObjCImpls.find(D);
@@ -2165,6 +2358,7 @@
   assert(IFaceD && ImplD && "Passed null params");
   ObjCImpls[IFaceD] = ImplD;
 }
+
 /// \brief Set the implementation of ObjCCategoryDecl.
 void ASTContext::setObjCImplementation(ObjCCategoryDecl *CatD,
                            ObjCCategoryImplDecl *ImplD) {
@@ -2198,7 +2392,7 @@
   return nullptr;
 }
 
-/// \brief Get the copy initialization expression of VarDecl,or NULL if 
+/// \brief Get the copy initialization expression of VarDecl, or nullptr if
 /// none exists.
 Expr *ASTContext::getBlockVarCopyInits(const VarDecl*VD) {
   assert(VD && "Passed null params");
@@ -2206,7 +2400,7 @@
          "getBlockVarCopyInits - not __block var");
   llvm::DenseMap<const VarDecl*, Expr*>::iterator
     I = BlockVarCopyInits.find(VD);
-  return (I != BlockVarCopyInits.end()) ? cast<Expr>(I->second) : nullptr;
+  return (I != BlockVarCopyInits.end()) ? I->second : nullptr;
 }
 
 /// \brief Set the copy inialization expression of a block var decl.
@@ -2390,26 +2584,24 @@
 /// specified exception specification. Type sugar that can be present on a
 /// declaration of a function with an exception specification is permitted
 /// and preserved. Other type sugar (for instance, typedefs) is not.
-static QualType getFunctionTypeWithExceptionSpec(
-    ASTContext &Context, QualType Orig,
-    const FunctionProtoType::ExceptionSpecInfo &ESI) {
+QualType ASTContext::getFunctionTypeWithExceptionSpec(
+    QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) {
   // Might have some parens.
   if (auto *PT = dyn_cast<ParenType>(Orig))
-    return Context.getParenType(
-        getFunctionTypeWithExceptionSpec(Context, PT->getInnerType(), ESI));
+    return getParenType(
+        getFunctionTypeWithExceptionSpec(PT->getInnerType(), ESI));
 
   // Might have a calling-convention attribute.
   if (auto *AT = dyn_cast<AttributedType>(Orig))
-    return Context.getAttributedType(
+    return getAttributedType(
         AT->getAttrKind(),
-        getFunctionTypeWithExceptionSpec(Context, AT->getModifiedType(), ESI),
-        getFunctionTypeWithExceptionSpec(Context, AT->getEquivalentType(),
-                                         ESI));
+        getFunctionTypeWithExceptionSpec(AT->getModifiedType(), ESI),
+        getFunctionTypeWithExceptionSpec(AT->getEquivalentType(), ESI));
 
   // Anything else must be a function type. Rebuild it with the new exception
   // specification.
   const FunctionProtoType *Proto = cast<FunctionProtoType>(Orig);
-  return Context.getFunctionType(
+  return getFunctionType(
       Proto->getReturnType(), Proto->getParamTypes(),
       Proto->getExtProtoInfo().withExceptionSpec(ESI));
 }
@@ -2417,9 +2609,9 @@
 bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T,
                                                           QualType U) {
   return hasSameType(T, U) ||
-         (getLangOpts().CPlusPlus1z &&
-          hasSameType(getFunctionTypeWithExceptionSpec(*this, T, EST_None),
-                      getFunctionTypeWithExceptionSpec(*this, U, EST_None)));
+         (getLangOpts().CPlusPlus17 &&
+          hasSameType(getFunctionTypeWithExceptionSpec(T, EST_None),
+                      getFunctionTypeWithExceptionSpec(U, EST_None)));
 }
 
 void ASTContext::adjustExceptionSpec(
@@ -2427,7 +2619,7 @@
     bool AsWritten) {
   // Update the type.
   QualType Updated =
-      getFunctionTypeWithExceptionSpec(*this, FD->getType(), ESI);
+      getFunctionTypeWithExceptionSpec(FD->getType(), ESI);
   FD->setType(Updated);
 
   if (!AsWritten)
@@ -2438,7 +2630,7 @@
     // If the type and the type-as-written differ, we may need to update
     // the type-as-written too.
     if (TSInfo->getType() != FD->getType())
-      Updated = getFunctionTypeWithExceptionSpec(*this, TSInfo->getType(), ESI);
+      Updated = getFunctionTypeWithExceptionSpec(TSInfo->getType(), ESI);
 
     // FIXME: When we get proper type location information for exceptions,
     // we'll also have to rebuild the TypeSourceInfo. For now, we just patch
@@ -2450,6 +2642,12 @@
   }
 }
 
+bool ASTContext::isParamDestroyedInCallee(QualType T) const {
+  return getTargetInfo().getCXXABI().areArgsDestroyedLeftToRightInCallee() ||
+         T.hasTrivialABIOverride() ||
+         T.isDestructedType() == QualType::DK_nontrivial_c_struct;
+}
+
 /// getComplexType - Return the uniqued reference to the type for a complex
 /// number with the specified element type.
 QualType ASTContext::getComplexType(QualType T) const {
@@ -3166,7 +3364,6 @@
 }
 
 /// getFunctionNoProtoType - Return a K&R style C function type like 'int()'.
-///
 QualType
 ASTContext::getFunctionNoProtoType(QualType ResultTy,
                                    const FunctionType::ExtInfo &Info) const {
@@ -3282,7 +3479,7 @@
     Unique = true;
   }
 
-  bool NoexceptInType = getLangOpts().CPlusPlus1z;
+  bool NoexceptInType = getLangOpts().CPlusPlus17;
   bool IsCanonicalExceptionSpec =
       isCanonicalExceptionSpecification(EPI.ExceptionSpec, NoexceptInType);
 
@@ -3418,7 +3615,7 @@
   llvm::FoldingSetNodeID ID;
   PipeType::Profile(ID, T, ReadOnly);
 
-  void *InsertPos = 0;
+  void *InsertPos = nullptr;
   if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos))
     return QualType(PT, 0);
 
@@ -4046,7 +4243,7 @@
 QualType ASTContext::getObjCObjectType(QualType BaseType,
                                        ObjCProtocolDecl * const *Protocols,
                                        unsigned NumProtocols) const {
-  return getObjCObjectType(BaseType, { },
+  return getObjCObjectType(BaseType, {},
                            llvm::makeArrayRef(Protocols, NumProtocols),
                            /*isKindOf=*/false);
 }
@@ -4183,13 +4380,13 @@
 
     // FIXME: Check for protocols to which the class type is already
     // known to conform.
-    return getObjCObjectType(type, { }, protocols, false);
+    return getObjCObjectType(type, {}, protocols, false);
   }
 
   // id<protocol-list>
   if (type->isObjCIdType()) {
     const ObjCObjectPointerType *objPtr = type->castAs<ObjCObjectPointerType>();
-    type = getObjCObjectType(ObjCBuiltinIdTy, { }, protocols,
+    type = getObjCObjectType(ObjCBuiltinIdTy, {}, protocols,
                                  objPtr->isKindOfType());
     return getObjCObjectPointerType(type);
   }
@@ -4197,7 +4394,7 @@
   // Class<protocol-list>
   if (type->isObjCClassType()) {
     const ObjCObjectPointerType *objPtr = type->castAs<ObjCObjectPointerType>();
-    type = getObjCObjectType(ObjCBuiltinClassTy, { }, protocols,
+    type = getObjCObjectType(ObjCBuiltinClassTy, {}, protocols,
                                  objPtr->isKindOfType());
     return getObjCObjectPointerType(type);
   }
@@ -4572,7 +4769,7 @@
 /// getTagDeclType - Return the unique reference to the type for the
 /// specified TagDecl (struct/union/class/enum) decl.
 QualType ASTContext::getTagDeclType(const TagDecl *Decl) const {
-  assert (Decl);
+  assert(Decl);
   // FIXME: What is the design on getTagDeclType when it requires casting
   // away const?  mutable?
   return getTypeDeclType(const_cast<TagDecl*>(Decl));
@@ -5525,6 +5722,46 @@
   return getTagDeclType(BlockDescriptorExtendedType);
 }
 
+TargetInfo::OpenCLTypeKind ASTContext::getOpenCLTypeKind(const Type *T) const {
+  auto BT = dyn_cast<BuiltinType>(T);
+
+  if (!BT) {
+    if (isa<PipeType>(T))
+      return TargetInfo::OCLTK_Pipe;
+
+    return TargetInfo::OCLTK_Default;
+  }
+
+  switch (BT->getKind()) {
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
+  case BuiltinType::Id:                                                        \
+    return TargetInfo::OCLTK_Image;
+#include "clang/Basic/OpenCLImageTypes.def"
+
+  case BuiltinType::OCLClkEvent:
+    return TargetInfo::OCLTK_ClkEvent;
+
+  case BuiltinType::OCLEvent:
+    return TargetInfo::OCLTK_Event;
+
+  case BuiltinType::OCLQueue:
+    return TargetInfo::OCLTK_Queue;
+
+  case BuiltinType::OCLReserveID:
+    return TargetInfo::OCLTK_ReserveID;
+
+  case BuiltinType::OCLSampler:
+    return TargetInfo::OCLTK_Sampler;
+
+  default:
+    return TargetInfo::OCLTK_Default;
+  }
+}
+
+LangAS ASTContext::getOpenCLTypeAddrSpace(const Type *T) const {
+  return Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T));
+}
+
 /// BlockRequiresCopying - Returns true if byref variable "D" of type "Ty"
 /// requires copy/dispose. Note that this must match the logic
 /// in buildByrefHelpers.
@@ -5537,6 +5774,11 @@
     return true;
   }
   
+  // The block needs copy/destroy helpers if Ty is non-trivial to destructively
+  // move or destroy.
+  if (Ty.isNonTrivialToPrimitiveDestructiveMove() || Ty.isDestructedType())
+    return true;
+
   if (!Ty->isObjCRetainableType()) return false;
   
   Qualifiers qs = Ty.getQualifiers();
@@ -5550,13 +5792,12 @@
       case Qualifiers::OCL_ExplicitNone:
       case Qualifiers::OCL_Autoreleasing:
         return false;
-        
-      // Tell the runtime that this is ARC __weak, called by the
-      // byref routines.
+
+      // These cases should have been taken care of when checking the type's
+      // non-triviality.
       case Qualifiers::OCL_Weak:
-      // ARC __strong __block variables need to be retained.
       case Qualifiers::OCL_Strong:
-        return true;
+        llvm_unreachable("impossible");
     }
     llvm_unreachable("fell out of lifetime switch!");
   }
@@ -5567,7 +5808,6 @@
 bool ASTContext::getByrefLifetime(QualType Ty,
                               Qualifiers::ObjCLifetime &LifeTime,
                               bool &HasByrefExtendedLayout) const {
-  
   if (!getLangOpts().ObjC1 ||
       getLangOpts().getGC() != LangOptions::NonGC)
     return false;
@@ -5635,14 +5875,14 @@
 
   // In almost all cases, it's a weak definition.
   auto *First = VD->getFirstDecl();
-  if (!First->isConstexpr() || First->isInlineSpecified() ||
-      !VD->isStaticDataMember())
+  if (First->isInlineSpecified() || !First->isStaticDataMember())
     return InlineVariableDefinitionKind::Weak;
 
   // If there's a file-context declaration in this translation unit, it's a
   // non-discardable definition.
   for (auto *D : VD->redecls())
-    if (D->getLexicalDeclContext()->isFileContext())
+    if (D->getLexicalDeclContext()->isFileContext() &&
+        !D->isInlineSpecified() && (D->isConstexpr() || First->isConstexpr()))
       return InlineVariableDefinitionKind::Strong;
 
   // If we've not seen one yet, we don't know.
@@ -5679,7 +5919,7 @@
     CharUnits sz = getObjCEncodingTypeSize(PType);
     if (sz.isZero())
       continue;
-    assert (sz.isPositive() && "BlockExpr - Incomplete param type");
+    assert(sz.isPositive() && "BlockExpr - Incomplete param type");
     ParmOffset += sz;
   }
   // Size of the argument frame
@@ -5790,8 +6030,8 @@
     if (sz.isZero())
       continue;
  
-    assert (sz.isPositive() && 
-        "getObjCEncodingForMethodDecl - Incomplete param type");
+    assert(sz.isPositive() && 
+           "getObjCEncodingForMethodDecl - Incomplete param type");
     ParmOffset += sz;
   }
   S += charUnitsToString(ParmOffset);
@@ -5938,7 +6178,6 @@
 /// Another legacy compatibility encoding: 32-bit longs are encoded as
 /// 'l' or 'L' , but not always.  For typedefs, we need to use
 /// 'i' or 'I' instead if encoding a struct field, or a pointer!
-///
 void ASTContext::getLegacyIntegralTypeEncoding (QualType &PointeeTy) const {
   if (isa<TypedefType>(PointeeTy.getTypePtr())) {
     if (const BuiltinType *BT = PointeeTy->getAs<BuiltinType>()) {
@@ -6245,9 +6484,8 @@
           = dyn_cast<ClassTemplateSpecializationDecl>(RDecl)) {
         const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
         llvm::raw_string_ostream OS(S);
-        TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                            TemplateArgs.asArray(),
-                                            (*this).getPrintingPolicy());
+        printTemplateArgumentList(OS, TemplateArgs.asArray(),
+                                  getPrintingPolicy());
       }
     } else {
       S += '?';
@@ -6339,7 +6577,7 @@
       SmallVector<const ObjCIvarDecl*, 32> Ivars;
       DeepCollectObjCIvars(OI, true, Ivars);
       for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
-        const FieldDecl *Field = cast<FieldDecl>(Ivars[i]);
+        const FieldDecl *Field = Ivars[i];
         if (Field->isBitField())
           getObjCEncodingForTypeImpl(Field->getType(), S, false, true, Field);
         else
@@ -6400,7 +6638,7 @@
         SmallVector<const ObjCIvarDecl*, 32> Ivars;
         DeepCollectObjCIvars(OI, true, Ivars);
         for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
-          if (cast<FieldDecl>(Ivars[i]) == FD) {
+          if (Ivars[i] == FD) {
             S += '{';
             S += OI->getObjCRuntimeNameAsString();
             S += '}';
@@ -6439,10 +6677,9 @@
   case Type::Vector:
   case Type::ExtVector:
   // Until we have a coherent encoding of these three types, issue warning.
-    { if (NotEncodedT)
-        *NotEncodedT = T;
-      return;
-    }
+    if (NotEncodedT)
+      *NotEncodedT = T;
+    return;
       
   // We could see an undeduced auto type here during error recovery.
   // Just ignore it.
@@ -6626,7 +6863,7 @@
 
 TypedefDecl *ASTContext::getObjCIdDecl() const {
   if (!ObjCIdDecl) {
-    QualType T = getObjCObjectType(ObjCBuiltinIdTy, { }, { });
+    QualType T = getObjCObjectType(ObjCBuiltinIdTy, {}, {});
     T = getObjCObjectPointerType(T);
     ObjCIdDecl = buildImplicitTypedef(T, "id");
   }
@@ -6643,7 +6880,7 @@
 
 TypedefDecl *ASTContext::getObjCClassDecl() const {
   if (!ObjCClassDecl) {
-    QualType T = getObjCObjectType(ObjCBuiltinClassTy, { }, { });
+    QualType T = getObjCObjectType(ObjCBuiltinClassTy, {}, {});
     T = getObjCObjectPointerType(T);
     ObjCClassDecl = buildImplicitTypedef(T, "Class");
   }
@@ -7305,7 +7542,7 @@
                                                       QualType rhs) {
   const ObjCObjectPointerType *lhsQID = lhs->getAs<ObjCObjectPointerType>();
   const ObjCObjectPointerType *rhsOPT = rhs->getAs<ObjCObjectPointerType>();
-  assert ((lhsQID && rhsOPT) && "ObjCQualifiedClassTypesAreCompatible");
+  assert((lhsQID && rhsOPT) && "ObjCQualifiedClassTypesAreCompatible");
   
   for (auto *lhsProto : lhsQID->quals()) {
     bool match = false;
@@ -7443,7 +7680,6 @@
 /// canAssignObjCInterfaces - Return true if the two interface types are
 /// compatible for assignment from RHS to LHS.  This handles validation of any
 /// protocol qualifiers on the LHS or RHS.
-///
 bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT,
                                          const ObjCObjectPointerType *RHSOPT) {
   const ObjCObjectType* LHS = LHSOPT->getObjectType();
@@ -7549,7 +7785,6 @@
 static int compareObjCProtocolsByName(ObjCProtocolDecl * const *lhs,
                                       ObjCProtocolDecl * const *rhs) {
   return (*lhs)->getName().compare((*rhs)->getName());
-
 }
 
 /// getIntersectionOfProtocols - This routine finds the intersection of set
@@ -7718,7 +7953,7 @@
       } else if (LHS->isSpecialized() != RHS->isSpecialized()) {
         // If only one has type arguments, the result will not have type
         // arguments.
-        LHSTypeArgs = { };
+        LHSTypeArgs = {};
         anyChanges = true;
       }
 
@@ -7769,7 +8004,7 @@
       } else if (LHS->isSpecialized() != RHS->isSpecialized()) {
         // If only one has type arguments, the result will not have type
         // arguments.
-        RHSTypeArgs = { };
+        RHSTypeArgs = {};
         anyChanges = true;
       }
 
@@ -8407,7 +8642,7 @@
 
     return QualType();
   }
-  case Type::ObjCObjectPointer: {
+  case Type::ObjCObjectPointer:
     if (OfBlockPointer) {
       if (canAssignObjCInterfacesInBlockPointer(
                                           LHS->getAs<ObjCObjectPointerType>(),
@@ -8421,14 +8656,11 @@
       return LHS;
 
     return QualType();
-  }
   case Type::Pipe:
-  {
     assert(LHS != RHS &&
            "Equivalent pipe types should have already been handled!");
     return QualType();
   }
-  }
 
   llvm_unreachable("Invalid Type::Class!");
 }
@@ -8605,7 +8837,7 @@
   }
 }
 
-ASTMutationListener::~ASTMutationListener() { }
+ASTMutationListener::~ASTMutationListener() = default;
 
 void ASTMutationListener::DeducedReturnType(const FunctionDecl *FD,
                                             QualType ReturnType) {}
@@ -8657,7 +8889,7 @@
       assert(HowLong <= 2 && "Can't have LLLL modifier");
       ++HowLong;
       break;
-    case 'N': {
+    case 'N':
       // 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
       assert(!IsSpecialLong && "Can't use two 'N' or 'W' modifiers!");
       assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
@@ -8667,7 +8899,6 @@
       if (Context.getTargetInfo().getLongWidth() == 32)
         ++HowLong;
       break;
-    }
     case 'W':
       // This modifier represents int64 type.
       assert(!IsSpecialLong && "Can't use two 'N' or 'W' modifiers!");
@@ -8710,10 +8941,12 @@
     Type = Context.FloatTy;
     break;
   case 'd':
-    assert(HowLong < 2 && !Signed && !Unsigned &&
+    assert(HowLong < 3 && !Signed && !Unsigned &&
            "Bad modifiers used with 'd'!");
-    if (HowLong)
+    if (HowLong == 1)
       Type = Context.LongDoubleTy;
+    else if (HowLong == 2)
+      Type = Context.Float128Ty;
     else
       Type = Context.DoubleTy;
     break;
@@ -8822,10 +9055,9 @@
     Type = Context.getComplexType(ElementType);
     break;
   }  
-  case 'Y' : {
+  case 'Y':
     Type = Context.getPointerDiffType();
     break;
-  }
   case 'P':
     Type = Context.getFILEType();
     if (Type.isNull()) {
@@ -8963,6 +9195,12 @@
   if (!FD->isExternallyVisible())
     return GVA_Internal;
 
+  // Non-user-provided functions get emitted as weak definitions with every
+  // use, no matter whether they've been explicitly instantiated etc.
+  if (auto *MD = dyn_cast<CXXMethodDecl>(FD))
+    if (!MD->isUserProvided())
+      return GVA_DiscardableODR;
+
   GVALinkage External;
   switch (FD->getTemplateSpecializationKind()) {
   case TSK_Undeclared:
@@ -9257,6 +9495,23 @@
   return false;
 }
 
+void ASTContext::forEachMultiversionedFunctionVersion(
+    const FunctionDecl *FD,
+    llvm::function_ref<void(const FunctionDecl *)> Pred) const {
+  assert(FD->isMultiVersion() && "Only valid for multiversioned functions");
+  llvm::SmallDenseSet<const FunctionDecl*, 4> SeenDecls;
+  FD = FD->getCanonicalDecl();
+  for (auto *CurDecl :
+       FD->getDeclContext()->getRedeclContext()->lookup(FD->getDeclName())) {
+    FunctionDecl *CurFD = CurDecl->getAsFunction()->getCanonicalDecl();
+    if (CurFD && hasSameType(CurFD->getType(), FD->getType()) &&
+        std::end(SeenDecls) == llvm::find(SeenDecls, CurFD)) {
+      SeenDecls.insert(CurFD);
+      Pred(CurFD);
+    }
+  }
+}
+
 CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
                                                     bool IsCXXMethod) const {
   // Pass through to the C++ ABI object
@@ -9269,7 +9524,7 @@
   case LangOptions::DCC_CDecl:
     return CC_C;
   case LangOptions::DCC_FastCall:
-    if (getTargetInfo().hasFeature("sse2"))
+    if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
       return CC_X86FastCall;
     break;
   case LangOptions::DCC_StdCall:
@@ -9281,6 +9536,11 @@
     if (!IsVariadic)
       return CC_X86VectorCall;
     break;
+  case LangOptions::DCC_RegCall:
+    // __regcall cannot be applied to variadic functions.
+    if (!IsVariadic)
+      return CC_X86RegCall;
+    break;
   }
   return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown);
 }
@@ -9317,7 +9577,7 @@
   llvm_unreachable("Unsupported ABI");
 }
 
-CXXABI::~CXXABI() {}
+CXXABI::~CXXABI() = default;
 
 size_t ASTContext::getSideTableAllocatedMemory() const {
   return ASTRecordLayouts.getMemorySize() +
@@ -9480,9 +9740,7 @@
   return (Size != Align || toBits(sizeChars) > MaxInlineWidthInBits);
 }
 
-namespace {
-
-ast_type_traits::DynTypedNode getSingleDynTypedNodeFromParentMap(
+static ast_type_traits::DynTypedNode getSingleDynTypedNodeFromParentMap(
     ASTContext::ParentMapPointers::mapped_type U) {
   if (const auto *D = U.dyn_cast<const Decl *>())
     return ast_type_traits::DynTypedNode::create(*D);
@@ -9491,6 +9749,8 @@
   return *U.get<ast_type_traits::DynTypedNode *>();
 }
 
+namespace {
+
 /// Template specializations to abstract away from pointers and TypeLocs.
 /// @{
 template <typename T>
@@ -9531,7 +9791,9 @@
     }
 
   private:
-    typedef RecursiveASTVisitor<ParentMapASTVisitor> VisitorBase;
+    friend class RecursiveASTVisitor<ParentMapASTVisitor>;
+
+    using VisitorBase = RecursiveASTVisitor<ParentMapASTVisitor>;
 
     ParentMapASTVisitor(ASTContext::ParentMapPointers *Parents,
                         ASTContext::ParentMapOtherNodes *OtherParents)
@@ -9540,6 +9802,7 @@
     bool shouldVisitTemplateInstantiations() const {
       return true;
     }
+
     bool shouldVisitImplicitCode() const {
       return true;
     }
@@ -9629,11 +9892,9 @@
     ASTContext::ParentMapPointers *Parents;
     ASTContext::ParentMapOtherNodes *OtherParents;
     llvm::SmallVector<ast_type_traits::DynTypedNode, 16> ParentStack;
-
-    friend class RecursiveASTVisitor<ParentMapASTVisitor>;
   };
 
-} // anonymous namespace
+} // namespace
 
 template <typename NodeTy, typename MapTy>
 static ASTContext::DynTypedNodeList getDynNodeFromMap(const NodeTy &Node,
@@ -9689,8 +9950,8 @@
     if (!hasSameType(DeclVar->getType(), ImplVar->getType()))
       return false;
   }
+
   return (MethodDecl->isVariadic() == MethodImpl->isVariadic());
-  
 }
 
 uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp
index e74aee0..e264d05 100644
--- a/lib/AST/ASTDumper.cpp
+++ b/lib/AST/ASTDumper.cpp
@@ -99,6 +99,9 @@
     const CommandTraits *Traits;
     const SourceManager *SM;
 
+    /// The policy to use for printing; can be defaulted.
+    PrintingPolicy PrintPolicy;
+
     /// Pending[i] is an action to dump an entity at level i.
     llvm::SmallVector<std::function<void(bool isLastChild)>, 32> Pending;
 
@@ -207,12 +210,17 @@
   public:
     ASTDumper(raw_ostream &OS, const CommandTraits *Traits,
               const SourceManager *SM)
-      : OS(OS), Traits(Traits), SM(SM),
-        ShowColors(SM && SM->getDiagnostics().getShowColors()) { }
+        : ASTDumper(OS, Traits, SM,
+                    SM && SM->getDiagnostics().getShowColors()) {}
 
     ASTDumper(raw_ostream &OS, const CommandTraits *Traits,
               const SourceManager *SM, bool ShowColors)
-      : OS(OS), Traits(Traits), SM(SM), ShowColors(ShowColors) {}
+        : ASTDumper(OS, Traits, SM, ShowColors, LangOptions()) {}
+    ASTDumper(raw_ostream &OS, const CommandTraits *Traits,
+              const SourceManager *SM, bool ShowColors,
+              const PrintingPolicy &PrintPolicy)
+        : OS(OS), Traits(Traits), SM(SM), PrintPolicy(PrintPolicy),
+          ShowColors(ShowColors) {}
 
     void setDeserialize(bool D) { Deserialize = D; }
 
@@ -531,6 +539,7 @@
     void VisitAddrLabelExpr(const AddrLabelExpr *Node);
     void VisitBlockExpr(const BlockExpr *Node);
     void VisitOpaqueValueExpr(const OpaqueValueExpr *Node);
+    void VisitGenericSelectionExpr(const GenericSelectionExpr *E);
 
     // C++
     void VisitCXXNamedCastExpr(const CXXNamedCastExpr *Node);
@@ -646,13 +655,13 @@
   ColorScope Color(*this, TypeColor);
 
   SplitQualType T_split = T.split();
-  OS << "'" << QualType::getAsString(T_split) << "'";
+  OS << "'" << QualType::getAsString(T_split, PrintPolicy) << "'";
 
   if (Desugar && !T.isNull()) {
     // If the type is sugared, also dump a (shallow) desugared type.
     SplitQualType D_split = T.getSplitDesugaredType();
     if (T_split != D_split)
-      OS << ":'" << QualType::getAsString(D_split) << "'";
+      OS << ":'" << QualType::getAsString(D_split, PrintPolicy) << "'";
   }
 }
 
@@ -800,11 +809,10 @@
 
     bool HasUndeserializedLookups = Primary->hasExternalVisibleStorage();
 
-    for (auto I = Deserialize ? Primary->lookups_begin()
-                              : Primary->noload_lookups_begin(),
-              E = Deserialize ? Primary->lookups_end()
-                              : Primary->noload_lookups_end();
-         I != E; ++I) {
+    auto Range = Deserialize
+                     ? Primary->lookups()
+                     : Primary->noload_lookups(/*PreserveInternalState=*/true);
+    for (auto I = Range.begin(), E = Range.end(); I != E; ++I) {
       DeclarationName Name = I.getLookupName();
       DeclContextLookupResult R = *I;
 
@@ -1187,20 +1195,19 @@
 
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
     if (MD->size_overridden_methods() != 0) {
-      auto dumpOverride =
-        [=](const CXXMethodDecl *D) {
-          SplitQualType T_split = D->getType().split();
-          OS << D << " " << D->getParent()->getName() << "::"
-             << D->getNameAsString() << " '" << QualType::getAsString(T_split) << "'";
-        };
+      auto dumpOverride = [=](const CXXMethodDecl *D) {
+        SplitQualType T_split = D->getType().split();
+        OS << D << " " << D->getParent()->getName()
+           << "::" << D->getNameAsString() << " '"
+           << QualType::getAsString(T_split, PrintPolicy) << "'";
+      };
 
       dumpChild([=] {
-        auto FirstOverrideItr = MD->begin_overridden_methods();
+        auto Overrides = MD->overridden_methods();
         OS << "Overrides: [ ";
-        dumpOverride(*FirstOverrideItr);
+        dumpOverride(*Overrides.begin());
         for (const auto *Override :
-               llvm::make_range(FirstOverrideItr + 1,
-                                MD->end_overridden_methods())) {
+             llvm::make_range(Overrides.begin() + 1, Overrides.end())) {
           OS << ", ";
           dumpOverride(Override);
         }
@@ -1538,7 +1545,7 @@
     case TSK_ExplicitInstantiationDefinition:
       if (!DumpExplicitInst)
         break;
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case TSK_Undeclared:
     case TSK_ImplicitInstantiation:
       if (DumpRefOnly)
@@ -1939,10 +1946,15 @@
       return;
     }
 
+    // Some statements have custom mechanisms for dumping their children.
     if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
       VisitDeclStmt(DS);
       return;
     }
+    if (const GenericSelectionExpr *GSE = dyn_cast<GenericSelectionExpr>(S)) {
+      VisitGenericSelectionExpr(GSE);
+      return;
+    }
 
     ConstStmtVisitor<ASTDumper>::Visit(S);
 
@@ -2201,12 +2213,14 @@
 }
 
 void ASTDumper::VisitUnaryOperator(const UnaryOperator *Node) {
-  VisitExpr(Node);
-  OS << " " << (Node->isPostfix() ? "postfix" : "prefix")
-     << " '" << UnaryOperator::getOpcodeStr(Node->getOpcode()) << "'";
-}
-
-void ASTDumper::VisitUnaryExprOrTypeTraitExpr(
+  VisitExpr(Node);
+  OS << " " << (Node->isPostfix() ? "postfix" : "prefix")
+     << " '" << UnaryOperator::getOpcodeStr(Node->getOpcode()) << "'";
+  if (!Node->canOverflow())
+    OS << " cannot overflow";
+}
+
+void ASTDumper::VisitUnaryExprOrTypeTraitExpr(
     const UnaryExprOrTypeTraitExpr *Node) {
   VisitExpr(Node);
   switch(Node->getKind()) {
@@ -2265,6 +2279,32 @@
     dumpStmt(Source);
 }
 
+void ASTDumper::VisitGenericSelectionExpr(const GenericSelectionExpr *E) {
+  VisitExpr(E);
+  if (E->isResultDependent())
+    OS << " result_dependent";
+  dumpStmt(E->getControllingExpr());
+  dumpTypeAsChild(E->getControllingExpr()->getType()); // FIXME: remove
+
+  for (unsigned I = 0, N = E->getNumAssocs(); I != N; ++I) {
+    dumpChild([=] {
+      if (const TypeSourceInfo *TSI = E->getAssocTypeSourceInfo(I)) {
+        OS << "case ";
+        dumpType(TSI->getType());
+      } else {
+        OS << "default";
+      }
+
+      if (!E->isResultDependent() && E->getResultIndex() == I)
+        OS << " selected";
+
+      if (const TypeSourceInfo *TSI = E->getAssocTypeSourceInfo(I))
+        dumpTypeAsChild(TSI->getType());
+      dumpStmt(E->getAssocExpr(I));
+    });
+  }
+}
+
 // GNU extensions.
 
 void ASTDumper::VisitAddrLabelExpr(const AddrLabelExpr *Node) {
@@ -2683,15 +2723,19 @@
 LLVM_DUMP_METHOD void Decl::dump() const { dump(llvm::errs()); }
 
 LLVM_DUMP_METHOD void Decl::dump(raw_ostream &OS, bool Deserialize) const {
-  ASTDumper P(OS, &getASTContext().getCommentCommandTraits(),
-              &getASTContext().getSourceManager());
+  const ASTContext &Ctx = getASTContext();
+  const SourceManager &SM = Ctx.getSourceManager();
+  ASTDumper P(OS, &Ctx.getCommentCommandTraits(), &SM,
+              SM.getDiagnostics().getShowColors(), Ctx.getPrintingPolicy());
   P.setDeserialize(Deserialize);
   P.dumpDecl(this);
 }
 
 LLVM_DUMP_METHOD void Decl::dumpColor() const {
-  ASTDumper P(llvm::errs(), &getASTContext().getCommentCommandTraits(),
-              &getASTContext().getSourceManager(), /*ShowColors*/true);
+  const ASTContext &Ctx = getASTContext();
+  ASTDumper P(llvm::errs(), &Ctx.getCommentCommandTraits(),
+              &Ctx.getSourceManager(), /*ShowColors*/ true,
+              Ctx.getPrintingPolicy());
   P.dumpDecl(this);
 }
 
@@ -2706,7 +2750,9 @@
   while (!DC->isTranslationUnit())
     DC = DC->getParent();
   ASTContext &Ctx = cast<TranslationUnitDecl>(DC)->getASTContext();
-  ASTDumper P(OS, &Ctx.getCommentCommandTraits(), &Ctx.getSourceManager());
+  const SourceManager &SM = Ctx.getSourceManager();
+  ASTDumper P(OS, &Ctx.getCommentCommandTraits(), &Ctx.getSourceManager(),
+              SM.getDiagnostics().getShowColors(), Ctx.getPrintingPolicy());
   P.setDeserialize(Deserialize);
   P.dumpLookups(this, DumpDecls);
 }
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp
index c926477..ac262e7 100644
--- a/lib/AST/ASTImporter.cpp
+++ b/lib/AST/ASTImporter.cpp
@@ -23,7 +23,6 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <deque>
 
 namespace clang {
   class ASTNodeImporter : public TypeVisitor<ASTNodeImporter, QualType>,
@@ -52,13 +51,13 @@
     QualType VisitConstantArrayType(const ConstantArrayType *T);
     QualType VisitIncompleteArrayType(const IncompleteArrayType *T);
     QualType VisitVariableArrayType(const VariableArrayType *T);
-    // FIXME: DependentSizedArrayType
+    QualType VisitDependentSizedArrayType(const DependentSizedArrayType *T);
     // FIXME: DependentSizedExtVectorType
     QualType VisitVectorType(const VectorType *T);
     QualType VisitExtVectorType(const ExtVectorType *T);
     QualType VisitFunctionNoProtoType(const FunctionNoProtoType *T);
     QualType VisitFunctionProtoType(const FunctionProtoType *T);
-    // FIXME: UnresolvedUsingType
+    QualType VisitUnresolvedUsingType(const UnresolvedUsingType *T);
     QualType VisitParenType(const ParenType *T);
     QualType VisitTypedefType(const TypedefType *T);
     QualType VisitTypeOfExprType(const TypeOfExprType *T);
@@ -77,7 +76,9 @@
     QualType VisitTemplateSpecializationType(const TemplateSpecializationType *T);
     QualType VisitElaboratedType(const ElaboratedType *T);
     // FIXME: DependentNameType
-    // FIXME: DependentTemplateSpecializationType
+    QualType VisitPackExpansionType(const PackExpansionType *T);
+    QualType VisitDependentTemplateSpecializationType(
+        const DependentTemplateSpecializationType *T);
     QualType VisitObjCInterfaceType(const ObjCInterfaceType *T);
     QualType VisitObjCObjectType(const ObjCObjectType *T);
     QualType VisitObjCObjectPointerType(const ObjCObjectPointerType *T);
@@ -96,6 +97,8 @@
     typedef DesignatedInitExpr::Designator Designator;
     Designator ImportDesignator(const Designator &D);
 
+    Optional<LambdaCapture> ImportLambdaCapture(const LambdaCapture &From);
+
                         
     /// \brief What we should import from the definition.
     enum ImportDefinitionKind { 
@@ -126,29 +129,47 @@
     bool ImportDefinition(ObjCProtocolDecl *From, ObjCProtocolDecl *To,
                           ImportDefinitionKind Kind = IDK_Default);
     TemplateParameterList *ImportTemplateParameterList(
-                                                 TemplateParameterList *Params);
+        TemplateParameterList *Params);
     TemplateArgument ImportTemplateArgument(const TemplateArgument &From);
-    TemplateArgumentLoc ImportTemplateArgumentLoc(
-        const TemplateArgumentLoc &TALoc, bool &Error);
+    Optional<TemplateArgumentLoc> ImportTemplateArgumentLoc(
+        const TemplateArgumentLoc &TALoc);
     bool ImportTemplateArguments(const TemplateArgument *FromArgs,
                                  unsigned NumFromArgs,
-                               SmallVectorImpl<TemplateArgument> &ToArgs);
+                                 SmallVectorImpl<TemplateArgument> &ToArgs);
+
+    template <typename InContainerTy>
+    bool ImportTemplateArgumentListInfo(const InContainerTy &Container,
+                                        TemplateArgumentListInfo &ToTAInfo);
+
+    template<typename InContainerTy>
+    bool ImportTemplateArgumentListInfo(SourceLocation FromLAngleLoc,
+                                        SourceLocation FromRAngleLoc,
+                                        const InContainerTy &Container,
+                                        TemplateArgumentListInfo &Result);
+
+    bool ImportTemplateInformation(FunctionDecl *FromFD, FunctionDecl *ToFD);
+
     bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord,
                            bool Complain = true);
     bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
                            bool Complain = true);
     bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord);
     bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC);
+    bool IsStructuralMatch(FunctionTemplateDecl *From,
+                           FunctionTemplateDecl *To);
     bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);
     bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);
     Decl *VisitDecl(Decl *D);
+    Decl *VisitEmptyDecl(EmptyDecl *D);
     Decl *VisitAccessSpecDecl(AccessSpecDecl *D);
     Decl *VisitStaticAssertDecl(StaticAssertDecl *D);
     Decl *VisitTranslationUnitDecl(TranslationUnitDecl *D);
     Decl *VisitNamespaceDecl(NamespaceDecl *D);
+    Decl *VisitNamespaceAliasDecl(NamespaceAliasDecl *D);
     Decl *VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias);
     Decl *VisitTypedefDecl(TypedefDecl *D);
     Decl *VisitTypeAliasDecl(TypeAliasDecl *D);
+    Decl *VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
     Decl *VisitLabelDecl(LabelDecl *D);
     Decl *VisitEnumDecl(EnumDecl *D);
     Decl *VisitRecordDecl(RecordDecl *D);
@@ -170,6 +191,12 @@
     Decl *VisitObjCCategoryDecl(ObjCCategoryDecl *D);
     Decl *VisitObjCProtocolDecl(ObjCProtocolDecl *D);
     Decl *VisitLinkageSpecDecl(LinkageSpecDecl *D);
+    Decl *VisitUsingDecl(UsingDecl *D);
+    Decl *VisitUsingShadowDecl(UsingShadowDecl *D);
+    Decl *VisitUsingDirectiveDecl(UsingDirectiveDecl *D);
+    Decl *VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D);
+    Decl *VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D);
+
 
     ObjCTypeParamList *ImportObjCTypeParamList(ObjCTypeParamList *list);
     Decl *VisitObjCInterfaceDecl(ObjCInterfaceDecl *D);
@@ -185,6 +212,7 @@
                                             ClassTemplateSpecializationDecl *D);
     Decl *VisitVarTemplateDecl(VarTemplateDecl *D);
     Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D);
+    Decl *VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
 
     // Importing statements
     DeclGroupRef ImportDeclGroup(DeclGroupRef DG);
@@ -265,21 +293,30 @@
     Expr *VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
     Expr *VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *CE);
     Expr *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E);
+    Expr *VisitPackExpansionExpr(PackExpansionExpr *E);
+    Expr *VisitSizeOfPackExpr(SizeOfPackExpr *E);
     Expr *VisitCXXNewExpr(CXXNewExpr *CE);
     Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E);
     Expr *VisitCXXConstructExpr(CXXConstructExpr *E);
     Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E);
+    Expr *VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E);
+    Expr *VisitCXXUnresolvedConstructExpr(CXXUnresolvedConstructExpr *CE);
+    Expr *VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E);
     Expr *VisitExprWithCleanups(ExprWithCleanups *EWC);
     Expr *VisitCXXThisExpr(CXXThisExpr *E);
     Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E);
+    Expr *VisitCXXPseudoDestructorExpr(CXXPseudoDestructorExpr *E);
     Expr *VisitMemberExpr(MemberExpr *E);
     Expr *VisitCallExpr(CallExpr *E);
+    Expr *VisitLambdaExpr(LambdaExpr *LE);
     Expr *VisitInitListExpr(InitListExpr *E);
     Expr *VisitArrayInitLoopExpr(ArrayInitLoopExpr *E);
     Expr *VisitArrayInitIndexExpr(ArrayInitIndexExpr *E);
     Expr *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E);
     Expr *VisitCXXNamedCastExpr(CXXNamedCastExpr *E);
     Expr *VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E);
+    Expr *VisitTypeTraitExpr(TypeTraitExpr *E);
+    Expr *VisitCXXTypeidExpr(CXXTypeidExpr *E);
 
 
     template<typename IIter, typename OIter>
@@ -323,8 +360,37 @@
     // Importing overrides.
     void ImportOverrides(CXXMethodDecl *ToMethod, CXXMethodDecl *FromMethod);
   };
+
+
+template <typename InContainerTy>
+bool ASTNodeImporter::ImportTemplateArgumentListInfo(
+    SourceLocation FromLAngleLoc, SourceLocation FromRAngleLoc,
+    const InContainerTy &Container, TemplateArgumentListInfo &Result) {
+  TemplateArgumentListInfo ToTAInfo(Importer.Import(FromLAngleLoc),
+                                    Importer.Import(FromRAngleLoc));
+  if (ImportTemplateArgumentListInfo(Container, ToTAInfo))
+    return true;
+  Result = ToTAInfo;
+  return false;
 }
 
+template <>
+bool ASTNodeImporter::ImportTemplateArgumentListInfo<TemplateArgumentListInfo>(
+    const TemplateArgumentListInfo &From, TemplateArgumentListInfo &Result) {
+  return ImportTemplateArgumentListInfo(
+      From.getLAngleLoc(), From.getRAngleLoc(), From.arguments(), Result);
+}
+
+template <>
+bool ASTNodeImporter::ImportTemplateArgumentListInfo<
+    ASTTemplateArgumentListInfo>(const ASTTemplateArgumentListInfo &From,
+                                 TemplateArgumentListInfo &Result) {
+  return ImportTemplateArgumentListInfo(From.LAngleLoc, From.RAngleLoc,
+                                        From.arguments(), Result);
+}
+
+} // end namespace clang
+
 //----------------------------------------------------------------------------
 // Import Types
 //----------------------------------------------------------------------------
@@ -492,6 +558,24 @@
                                                       Brackets);
 }
 
+QualType ASTNodeImporter::VisitDependentSizedArrayType(
+    const DependentSizedArrayType *T) {
+  QualType ToElementType = Importer.Import(T->getElementType());
+  if (ToElementType.isNull())
+    return QualType();
+
+  // SizeExpr may be null if size is not specified directly.
+  // For example, 'int a[]'.
+  Expr *Size = Importer.Import(T->getSizeExpr());
+  if (!Size && T->getSizeExpr())
+    return QualType();
+
+  SourceRange Brackets = Importer.Import(T->getBracketsRange());
+  return Importer.getToContext().getDependentSizedArrayType(
+      ToElementType, Size, T->getSizeModifier(), T->getIndexTypeCVRQualifiers(),
+      Brackets);
+}
+
 QualType ASTNodeImporter::VisitVectorType(const VectorType *T) {
   QualType ToElementType = Importer.Import(T->getElementType());
   if (ToElementType.isNull())
@@ -566,6 +650,22 @@
   return Importer.getToContext().getFunctionType(ToResultType, ArgTypes, ToEPI);
 }
 
+QualType ASTNodeImporter::VisitUnresolvedUsingType(
+    const UnresolvedUsingType *T) {
+  UnresolvedUsingTypenameDecl *ToD = cast_or_null<UnresolvedUsingTypenameDecl>(
+        Importer.Import(T->getDecl()));
+  if (!ToD)
+    return QualType();
+
+  UnresolvedUsingTypenameDecl *ToPrevD =
+      cast_or_null<UnresolvedUsingTypenameDecl>(
+        Importer.Import(T->getDecl()->getPreviousDecl()));
+  if (!ToPrevD && T->getDecl()->getPreviousDecl())
+    return QualType();
+
+  return Importer.getToContext().getTypeDeclType(ToD, ToPrevD);
+}
+
 QualType ASTNodeImporter::VisitParenType(const ParenType *T) {
   QualType ToInnerType = Importer.Import(T->getInnerType());
   if (ToInnerType.isNull())
@@ -767,6 +867,34 @@
                                                    ToQualifier, ToNamedType);
 }
 
+QualType ASTNodeImporter::VisitPackExpansionType(const PackExpansionType *T) {
+  QualType Pattern = Importer.Import(T->getPattern());
+  if (Pattern.isNull())
+    return QualType();
+
+  return Importer.getToContext().getPackExpansionType(Pattern,
+                                                      T->getNumExpansions());
+}
+
+QualType ASTNodeImporter::VisitDependentTemplateSpecializationType(
+    const DependentTemplateSpecializationType *T) {
+  NestedNameSpecifier *Qualifier = Importer.Import(T->getQualifier());
+  if (!Qualifier && T->getQualifier())
+    return QualType();
+
+  IdentifierInfo *Name = Importer.Import(T->getIdentifier());
+  if (!Name && T->getIdentifier())
+    return QualType();
+
+  SmallVector<TemplateArgument, 2> ToPack;
+  ToPack.reserve(T->getNumArgs());
+  if (ImportTemplateArguments(T->getArgs(), T->getNumArgs(), ToPack))
+    return QualType();
+
+  return Importer.getToContext().getDependentTemplateSpecializationType(
+        T->getKeyword(), Qualifier, Name, ToPack);
+}
+
 QualType ASTNodeImporter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
   ObjCInterfaceDecl *Class
     = dyn_cast_or_null<ObjCInterfaceDecl>(Importer.Import(T->getDecl()));
@@ -998,7 +1126,6 @@
       = FromData.HasDeclaredCopyConstructorWithConstParam;
     ToData.HasDeclaredCopyAssignmentWithConstParam
       = FromData.HasDeclaredCopyAssignmentWithConstParam;
-    ToData.IsLambda = FromData.IsLambda;
 
     SmallVector<CXXBaseSpecifier *, 4> Bases;
     for (const auto &Base1 : FromCXX->bases()) {
@@ -1171,9 +1298,8 @@
   llvm_unreachable("Invalid template argument kind");
 }
 
-TemplateArgumentLoc ASTNodeImporter::ImportTemplateArgumentLoc(
-    const TemplateArgumentLoc &TALoc, bool &Error) {
-  Error = false;
+Optional<TemplateArgumentLoc>
+ASTNodeImporter::ImportTemplateArgumentLoc(const TemplateArgumentLoc &TALoc) {
   TemplateArgument Arg = ImportTemplateArgument(TALoc.getArgument());
   TemplateArgumentLocInfo FromInfo = TALoc.getLocInfo();
   TemplateArgumentLocInfo ToInfo;
@@ -1181,12 +1307,12 @@
     Expr *E = Importer.Import(FromInfo.getAsExpr());
     ToInfo = TemplateArgumentLocInfo(E);
     if (!E)
-      Error = true;
+      return None;
   } else if (Arg.getKind() == TemplateArgument::Type) {
     if (TypeSourceInfo *TSI = Importer.Import(FromInfo.getAsTypeSourceInfo()))
       ToInfo = TemplateArgumentLocInfo(TSI);
     else
-      Error = true;
+      return None;
   } else {
     ToInfo = TemplateArgumentLocInfo(
           Importer.Import(FromInfo.getTemplateQualifierLoc()),
@@ -1210,6 +1336,21 @@
   return false;
 }
 
+// We cannot use Optional<> pattern here and below because
+// TemplateArgumentListInfo's operator new is declared as deleted so it cannot
+// be stored in Optional.
+template <typename InContainerTy>
+bool ASTNodeImporter::ImportTemplateArgumentListInfo(
+    const InContainerTy &Container, TemplateArgumentListInfo &ToTAInfo) {
+  for (const auto &FromLoc : Container) {
+    if (auto ToLoc = ImportTemplateArgumentLoc(FromLoc))
+      ToTAInfo.addArgument(*ToLoc);
+    else
+      return true;
+  }
+  return false;
+}
+
 bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord, 
                                         RecordDecl *ToRecord, bool Complain) {
   // Eliminate a potential failure point where we attempt to re-import
@@ -1243,6 +1384,14 @@
   return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum);
 }
 
+bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From,
+                                        FunctionTemplateDecl *To) {
+  StructuralEquivalenceContext Ctx(
+      Importer.getFromContext(), Importer.getToContext(),
+      Importer.getNonEquivalentDecls(), false, false);
+  return Ctx.IsStructurallyEquivalent(From, To);
+}
+
 bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,
                                         EnumConstantDecl *ToEC)
 {
@@ -1276,6 +1425,29 @@
   return nullptr;
 }
 
+Decl *ASTNodeImporter::VisitEmptyDecl(EmptyDecl *D) {
+  // Import the context of this declaration.
+  DeclContext *DC = Importer.ImportContext(D->getDeclContext());
+  if (!DC)
+    return nullptr;
+
+  DeclContext *LexicalDC = DC;
+  if (D->getDeclContext() != D->getLexicalDeclContext()) {
+    LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
+    if (!LexicalDC)
+      return nullptr;
+  }
+
+  // Import the location of this declaration.
+  SourceLocation Loc = Importer.Import(D->getLocation());
+
+  EmptyDecl *ToD = EmptyDecl::Create(Importer.getToContext(), DC, Loc);
+  ToD->setLexicalDeclContext(LexicalDC);
+  Importer.Imported(D, ToD);
+  LexicalDC->addDeclInternal(ToD);
+  return ToD;
+}
+
 Decl *ASTNodeImporter::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
   TranslationUnitDecl *ToD = 
     Importer.getToContext().getTranslationUnitDecl();
@@ -1410,6 +1582,44 @@
   return ToNamespace;
 }
 
+Decl *ASTNodeImporter::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
+  // Import the major distinguishing characteristics of this namespace.
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *LookupD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, LookupD, Loc))
+    return nullptr;
+  if (LookupD)
+    return LookupD;
+
+  // NOTE: No conflict resolution is done for namespace aliases now.
+
+  NamespaceDecl *TargetDecl = cast_or_null<NamespaceDecl>(
+        Importer.Import(D->getNamespace()));
+  if (!TargetDecl)
+    return nullptr;
+
+  IdentifierInfo *ToII = Importer.Import(D->getIdentifier());
+  if (!ToII)
+    return nullptr;
+
+  NestedNameSpecifierLoc ToQLoc = Importer.Import(D->getQualifierLoc());
+  if (D->getQualifierLoc() && !ToQLoc)
+    return nullptr;
+
+  NamespaceAliasDecl *ToD = NamespaceAliasDecl::Create(
+        Importer.getToContext(), DC, Importer.Import(D->getNamespaceLoc()),
+        Importer.Import(D->getAliasLoc()), ToII, ToQLoc,
+        Importer.Import(D->getTargetNameLoc()), TargetDecl);
+
+  ToD->setLexicalDeclContext(LexicalDC);
+  Importer.Imported(D, ToD);
+  LexicalDC->addDeclInternal(ToD);
+
+  return ToD;
+}
+
 Decl *ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias) {
   // Import the major distinguishing characteristics of this typedef.
   DeclContext *DC, *LexicalDC;
@@ -1461,10 +1671,8 @@
   SourceLocation StartL = Importer.Import(D->getLocStart());
   TypedefNameDecl *ToTypedef;
   if (IsAlias)
-    ToTypedef = TypeAliasDecl::Create(Importer.getToContext(), DC,
-                                      StartL, Loc,
-                                      Name.getAsIdentifierInfo(),
-                                      TInfo);
+    ToTypedef = TypeAliasDecl::Create(Importer.getToContext(), DC, StartL, Loc,
+                                      Name.getAsIdentifierInfo(), TInfo);
   else
     ToTypedef = TypedefDecl::Create(Importer.getToContext(), DC,
                                     StartL, Loc,
@@ -1474,7 +1682,11 @@
   ToTypedef->setAccess(D->getAccess());
   ToTypedef->setLexicalDeclContext(LexicalDC);
   Importer.Imported(D, ToTypedef);
-  LexicalDC->addDeclInternal(ToTypedef);
+
+  // Templated declarations should not appear in DeclContext.
+  TypeAliasDecl *FromAlias = IsAlias ? cast<TypeAliasDecl>(D) : nullptr;
+  if (!FromAlias || !FromAlias->getDescribedAliasTemplate())
+    LexicalDC->addDeclInternal(ToTypedef);
 
   return ToTypedef;
 }
@@ -1487,6 +1699,65 @@
   return VisitTypedefNameDecl(D, /*IsAlias=*/true);
 }
 
+Decl *ASTNodeImporter::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
+  // Import the major distinguishing characteristics of this typedef.
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *FoundD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, FoundD, Loc))
+    return nullptr;
+  if (FoundD)
+    return FoundD;
+
+  // If this typedef is not in block scope, determine whether we've
+  // seen a typedef with the same name (that we can merge with) or any
+  // other entity by that name (which name lookup could conflict with).
+  if (!DC->isFunctionOrMethod()) {
+    SmallVector<NamedDecl *, 4> ConflictingDecls;
+    unsigned IDNS = Decl::IDNS_Ordinary;
+    SmallVector<NamedDecl *, 2> FoundDecls;
+    DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
+    for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
+      if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
+        continue;
+      if (auto *FoundAlias =
+            dyn_cast<TypeAliasTemplateDecl>(FoundDecls[I]))
+          return Importer.Imported(D, FoundAlias);
+      ConflictingDecls.push_back(FoundDecls[I]);
+    }
+
+    if (!ConflictingDecls.empty()) {
+      Name = Importer.HandleNameConflict(Name, DC, IDNS,
+                                         ConflictingDecls.data(),
+                                         ConflictingDecls.size());
+      if (!Name)
+        return nullptr;
+    }
+  }
+
+  TemplateParameterList *Params = ImportTemplateParameterList(
+        D->getTemplateParameters());
+  if (!Params)
+    return nullptr;
+
+  auto *TemplDecl = cast_or_null<TypeAliasDecl>(
+        Importer.Import(D->getTemplatedDecl()));
+  if (!TemplDecl)
+    return nullptr;
+
+  TypeAliasTemplateDecl *ToAlias = TypeAliasTemplateDecl::Create(
+        Importer.getToContext(), DC, Loc, Name, Params, TemplDecl);
+
+  TemplDecl->setDescribedAliasTemplate(ToAlias);
+
+  ToAlias->setAccess(D->getAccess());
+  ToAlias->setLexicalDeclContext(LexicalDC);
+  Importer.Imported(D, ToAlias);
+  LexicalDC->addDeclInternal(ToAlias);
+  return ToAlias;
+}
+
 Decl *ASTNodeImporter::VisitLabelDecl(LabelDecl *D) {
   // Import the major distinguishing characteristics of this label.
   DeclContext *DC, *LexicalDC;
@@ -1734,16 +2005,16 @@
         if (DCXX->getLambdaContextDecl() && !CDecl)
           return nullptr;
         D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), CDecl);
-      } else if (DCXX->isInjectedClassName()) {                                                 
-        // We have to be careful to do a similar dance to the one in                            
-        // Sema::ActOnStartCXXMemberDeclarations                                                
-        CXXRecordDecl *const PrevDecl = nullptr;                                                
-        const bool DelayTypeCreation = true;                                                    
-        D2CXX = CXXRecordDecl::Create(                                                          
-            Importer.getToContext(), D->getTagKind(), DC, StartLoc, Loc,                        
-            Name.getAsIdentifierInfo(), PrevDecl, DelayTypeCreation);                           
-        Importer.getToContext().getTypeDeclType(                                                
-            D2CXX, llvm::dyn_cast<CXXRecordDecl>(DC));                                          
+      } else if (DCXX->isInjectedClassName()) {
+        // We have to be careful to do a similar dance to the one in
+        // Sema::ActOnStartCXXMemberDeclarations
+        CXXRecordDecl *const PrevDecl = nullptr;
+        const bool DelayTypeCreation = true;
+        D2CXX = CXXRecordDecl::Create(
+            Importer.getToContext(), D->getTagKind(), DC, StartLoc, Loc,
+            Name.getAsIdentifierInfo(), PrevDecl, DelayTypeCreation);
+        Importer.getToContext().getTypeDeclType(
+            D2CXX, llvm::dyn_cast<CXXRecordDecl>(DC));
       } else {
         D2CXX = CXXRecordDecl::Create(Importer.getToContext(),
                                       D->getTagKind(),
@@ -1752,14 +2023,42 @@
       }
       D2 = D2CXX;
       D2->setAccess(D->getAccess());
+      D2->setLexicalDeclContext(LexicalDC);
+      if (!DCXX->getDescribedClassTemplate())
+        LexicalDC->addDeclInternal(D2);
+
+      Importer.Imported(D, D2);
+
+      if (ClassTemplateDecl *FromDescribed =
+          DCXX->getDescribedClassTemplate()) {
+        ClassTemplateDecl *ToDescribed = cast_or_null<ClassTemplateDecl>(
+              Importer.Import(FromDescribed));
+        if (!ToDescribed)
+          return nullptr;
+        D2CXX->setDescribedClassTemplate(ToDescribed);
+
+      } else if (MemberSpecializationInfo *MemberInfo =
+                   DCXX->getMemberSpecializationInfo()) {
+        TemplateSpecializationKind SK =
+            MemberInfo->getTemplateSpecializationKind();
+        CXXRecordDecl *FromInst = DCXX->getInstantiatedFromMemberClass();
+        CXXRecordDecl *ToInst =
+            cast_or_null<CXXRecordDecl>(Importer.Import(FromInst));
+        if (FromInst && !ToInst)
+          return nullptr;
+        D2CXX->setInstantiationOfMemberClass(ToInst, SK);
+        D2CXX->getMemberSpecializationInfo()->setPointOfInstantiation(
+              Importer.Import(MemberInfo->getPointOfInstantiation()));
+      }
+
     } else {
       D2 = RecordDecl::Create(Importer.getToContext(), D->getTagKind(),
                               DC, StartLoc, Loc, Name.getAsIdentifierInfo());
+      D2->setLexicalDeclContext(LexicalDC);
+      LexicalDC->addDeclInternal(D2);
     }
     
     D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
-    D2->setLexicalDeclContext(LexicalDC);
-    LexicalDC->addDeclInternal(D2);
     if (D->isAnonymousStructOrUnion())
       D2->setAnonymousStructOrUnion(true);
     if (PrevDecl) {
@@ -1835,6 +2134,88 @@
   return ToEnumerator;
 }
 
+bool ASTNodeImporter::ImportTemplateInformation(FunctionDecl *FromFD,
+                                                FunctionDecl *ToFD) {
+  switch (FromFD->getTemplatedKind()) {
+  case FunctionDecl::TK_NonTemplate:
+  case FunctionDecl::TK_FunctionTemplate:
+    return false;
+
+  case FunctionDecl::TK_MemberSpecialization: {
+    auto *InstFD = cast_or_null<FunctionDecl>(
+          Importer.Import(FromFD->getInstantiatedFromMemberFunction()));
+    if (!InstFD)
+      return true;
+
+    TemplateSpecializationKind TSK = FromFD->getTemplateSpecializationKind();
+    SourceLocation POI = Importer.Import(
+          FromFD->getMemberSpecializationInfo()->getPointOfInstantiation());
+    ToFD->setInstantiationOfMemberFunction(InstFD, TSK);
+    ToFD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
+    return false;
+  }
+
+  case FunctionDecl::TK_FunctionTemplateSpecialization: {
+    auto *FTSInfo = FromFD->getTemplateSpecializationInfo();
+    auto *Template = cast_or_null<FunctionTemplateDecl>(
+        Importer.Import(FTSInfo->getTemplate()));
+    if (!Template)
+      return true;
+    TemplateSpecializationKind TSK = FTSInfo->getTemplateSpecializationKind();
+
+    // Import template arguments.
+    auto TemplArgs = FTSInfo->TemplateArguments->asArray();
+    SmallVector<TemplateArgument, 8> ToTemplArgs;
+    if (ImportTemplateArguments(TemplArgs.data(), TemplArgs.size(),
+                                ToTemplArgs))
+      return true;
+
+    TemplateArgumentList *ToTAList = TemplateArgumentList::CreateCopy(
+          Importer.getToContext(), ToTemplArgs);
+
+    TemplateArgumentListInfo ToTAInfo;
+    const auto *FromTAArgsAsWritten = FTSInfo->TemplateArgumentsAsWritten;
+    if (FromTAArgsAsWritten)
+      if (ImportTemplateArgumentListInfo(*FromTAArgsAsWritten, ToTAInfo))
+        return true;
+
+    SourceLocation POI = Importer.Import(FTSInfo->getPointOfInstantiation());
+
+    ToFD->setFunctionTemplateSpecialization(
+        Template, ToTAList, /* InsertPos= */ nullptr,
+        TSK, FromTAArgsAsWritten ? &ToTAInfo : nullptr, POI);
+    return false;
+  }
+
+  case FunctionDecl::TK_DependentFunctionTemplateSpecialization: {
+    auto *FromInfo = FromFD->getDependentSpecializationInfo();
+    UnresolvedSet<8> TemplDecls;
+    unsigned NumTemplates = FromInfo->getNumTemplates();
+    for (unsigned I = 0; I < NumTemplates; I++) {
+      if (auto *ToFTD = cast_or_null<FunctionTemplateDecl>(
+              Importer.Import(FromInfo->getTemplate(I))))
+        TemplDecls.addDecl(ToFTD);
+      else
+        return true;
+    }
+
+    // Import TemplateArgumentListInfo.
+    TemplateArgumentListInfo ToTAInfo;
+    if (ImportTemplateArgumentListInfo(
+            FromInfo->getLAngleLoc(), FromInfo->getRAngleLoc(),
+            llvm::makeArrayRef(FromInfo->getTemplateArgs(),
+                               FromInfo->getNumTemplateArgs()),
+            ToTAInfo))
+      return true;
+
+    ToFD->setDependentTemplateSpecialization(Importer.getToContext(),
+                                             TemplDecls, ToTAInfo);
+    return false;
+  }
+  }
+  llvm_unreachable("All cases should be covered!");
+}
+
 Decl *ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
   // Import the major distinguishing characteristics of this function.
   DeclContext *DC, *LexicalDC;
@@ -1942,15 +2323,18 @@
     Parameters.push_back(ToP);
   }
   
-  // Create the imported function.
   TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
+  if (D->getTypeSourceInfo() && !TInfo)
+    return nullptr;
+
+  // Create the imported function.
   FunctionDecl *ToFunction = nullptr;
   SourceLocation InnerLocStart = Importer.Import(D->getInnerLocStart());
   if (CXXConstructorDecl *FromConstructor = dyn_cast<CXXConstructorDecl>(D)) {
     ToFunction = CXXConstructorDecl::Create(Importer.getToContext(),
                                             cast<CXXRecordDecl>(DC),
                                             InnerLocStart,
-                                            NameInfo, T, TInfo, 
+                                            NameInfo, T, TInfo,
                                             FromConstructor->isExplicit(),
                                             D->isInlineSpecified(), 
                                             D->isImplicit(),
@@ -2016,9 +2400,9 @@
   Importer.Imported(D, ToFunction);
 
   // Set the parameters.
-  for (unsigned I = 0, N = Parameters.size(); I != N; ++I) {
-    Parameters[I]->setOwningFunction(ToFunction);
-    ToFunction->addDeclInternal(Parameters[I]);
+  for (ParmVarDecl *Param : Parameters) {
+    Param->setOwningFunction(ToFunction);
+    ToFunction->addDeclInternal(Param);
   }
   ToFunction->setParams(Parameters);
 
@@ -2028,6 +2412,16 @@
     ToFunction->setPreviousDecl(Recent);
   }
 
+  // We need to complete creation of FunctionProtoTypeLoc manually with setting
+  // params it refers to.
+  if (TInfo) {
+    if (auto ProtoLoc =
+        TInfo->getTypeLoc().IgnoreParens().getAs<FunctionProtoTypeLoc>()) {
+      for (unsigned I = 0, N = Parameters.size(); I != N; ++I)
+        ProtoLoc.setParam(I, Parameters[I]);
+    }
+  }
+
   if (usedDifferentExceptionSpec) {
     // Update FunctionProtoType::ExtProtoInfo.
     QualType T = Importer.Import(D->getType());
@@ -2045,8 +2439,17 @@
 
   // FIXME: Other bits to merge?
 
+  // If it is a template, import all related things.
+  if (ImportTemplateInformation(D, ToFunction))
+    return nullptr;
+
   // Add this function to the lexical context.
-  LexicalDC->addDeclInternal(ToFunction);
+  // NOTE: If the function is templated declaration, it should be not added into
+  // LexicalDC. But described template is imported during import of
+  // FunctionTemplateDecl (it happens later). So, we use source declaration
+  // to determine if we should add the result function.
+  if (!D->getDescribedFunctionTemplate())
+    LexicalDC->addDeclInternal(ToFunction);
 
   if (auto *FromCXXMethod = dyn_cast<CXXMethodDecl>(D))
     ImportOverrides(cast<CXXMethodDecl>(ToFunction), FromCXXMethod);
@@ -2453,7 +2856,10 @@
   ToVar->setAccess(D->getAccess());
   ToVar->setLexicalDeclContext(LexicalDC);
   Importer.Imported(D, ToVar);
-  LexicalDC->addDeclInternal(ToVar);
+
+  // Templated declarations should never appear in the enclosing DeclContext.
+  if (!D->getDescribedVarTemplate())
+    LexicalDC->addDeclInternal(ToVar);
 
   if (!D->isFileVarDecl() &&
       D->isUsed())
@@ -2540,6 +2946,14 @@
   if (FromDefArg && !ToDefArg)
     return nullptr;
 
+  if (D->isObjCMethodParameter()) {
+    ToParm->setObjCMethodScopeInfo(D->getFunctionScopeIndex());
+    ToParm->setObjCDeclQualifier(D->getObjCDeclQualifier());
+  } else {
+    ToParm->setScopeInfo(D->getFunctionScopeDepth(),
+                         D->getFunctionScopeIndex());
+  }
+
   if (D->isUsed())
     ToParm->setIsUsed();
 
@@ -2650,9 +3064,13 @@
     ToParams[I]->setOwningFunction(ToMethod);
     ToMethod->addDeclInternal(ToParams[I]);
   }
+
   SmallVector<SourceLocation, 12> SelLocs;
   D->getSelectorLocs(SelLocs);
-  ToMethod->setMethodParams(Importer.getToContext(), ToParams, SelLocs); 
+  for (SourceLocation &Loc : SelLocs)
+    Loc = Importer.Import(Loc);
+
+  ToMethod->setMethodParams(Importer.getToContext(), ToParams, SelLocs);
 
   ToMethod->setLexicalDeclContext(LexicalDC);
   Importer.Imported(D, ToMethod);
@@ -2891,6 +3309,178 @@
   return ToLinkageSpec;
 }
 
+Decl *ASTNodeImporter::VisitUsingDecl(UsingDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD = nullptr;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  DeclarationNameInfo NameInfo(Name,
+                               Importer.Import(D->getNameInfo().getLoc()));
+  ImportDeclarationNameLoc(D->getNameInfo(), NameInfo);
+
+  UsingDecl *ToUsing = UsingDecl::Create(Importer.getToContext(), DC,
+                                         Importer.Import(D->getUsingLoc()),
+                                         Importer.Import(D->getQualifierLoc()),
+                                         NameInfo, D->hasTypename());
+  ToUsing->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToUsing);
+  Importer.Imported(D, ToUsing);
+
+  if (NamedDecl *FromPattern =
+      Importer.getFromContext().getInstantiatedFromUsingDecl(D)) {
+    if (NamedDecl *ToPattern =
+        dyn_cast_or_null<NamedDecl>(Importer.Import(FromPattern)))
+      Importer.getToContext().setInstantiatedFromUsingDecl(ToUsing, ToPattern);
+    else
+      return nullptr;
+  }
+
+  for (UsingShadowDecl *FromShadow : D->shadows()) {
+    if (UsingShadowDecl *ToShadow =
+        dyn_cast_or_null<UsingShadowDecl>(Importer.Import(FromShadow)))
+      ToUsing->addShadowDecl(ToShadow);
+    else
+      // FIXME: We return a nullptr here but the definition is already created
+      // and available with lookups. How to fix this?..
+      return nullptr;
+  }
+  return ToUsing;
+}
+
+Decl *ASTNodeImporter::VisitUsingShadowDecl(UsingShadowDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD = nullptr;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  UsingDecl *ToUsing = dyn_cast_or_null<UsingDecl>(
+        Importer.Import(D->getUsingDecl()));
+  if (!ToUsing)
+    return nullptr;
+
+  NamedDecl *ToTarget = dyn_cast_or_null<NamedDecl>(
+        Importer.Import(D->getTargetDecl()));
+  if (!ToTarget)
+    return nullptr;
+
+  UsingShadowDecl *ToShadow = UsingShadowDecl::Create(
+        Importer.getToContext(), DC, Loc, ToUsing, ToTarget);
+
+  ToShadow->setLexicalDeclContext(LexicalDC);
+  ToShadow->setAccess(D->getAccess());
+  Importer.Imported(D, ToShadow);
+
+  if (UsingShadowDecl *FromPattern =
+      Importer.getFromContext().getInstantiatedFromUsingShadowDecl(D)) {
+    if (UsingShadowDecl *ToPattern =
+        dyn_cast_or_null<UsingShadowDecl>(Importer.Import(FromPattern)))
+      Importer.getToContext().setInstantiatedFromUsingShadowDecl(ToShadow,
+                                                                 ToPattern);
+    else
+      // FIXME: We return a nullptr here but the definition is already created
+      // and available with lookups. How to fix this?..
+      return nullptr;
+  }
+
+  LexicalDC->addDeclInternal(ToShadow);
+
+  return ToShadow;
+}
+
+
+Decl *ASTNodeImporter::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD = nullptr;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  DeclContext *ToComAncestor = Importer.ImportContext(D->getCommonAncestor());
+  if (!ToComAncestor)
+    return nullptr;
+
+  NamespaceDecl *ToNominated = cast_or_null<NamespaceDecl>(
+        Importer.Import(D->getNominatedNamespace()));
+  if (!ToNominated)
+    return nullptr;
+
+  UsingDirectiveDecl *ToUsingDir = UsingDirectiveDecl::Create(
+        Importer.getToContext(), DC, Importer.Import(D->getUsingLoc()),
+        Importer.Import(D->getNamespaceKeyLocation()),
+        Importer.Import(D->getQualifierLoc()),
+        Importer.Import(D->getIdentLocation()), ToNominated, ToComAncestor);
+  ToUsingDir->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToUsingDir);
+  Importer.Imported(D, ToUsingDir);
+
+  return ToUsingDir;
+}
+
+Decl *ASTNodeImporter::VisitUnresolvedUsingValueDecl(
+    UnresolvedUsingValueDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD = nullptr;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  DeclarationNameInfo NameInfo(Name, Importer.Import(D->getNameInfo().getLoc()));
+  ImportDeclarationNameLoc(D->getNameInfo(), NameInfo);
+
+  UnresolvedUsingValueDecl *ToUsingValue = UnresolvedUsingValueDecl::Create(
+        Importer.getToContext(), DC, Importer.Import(D->getUsingLoc()),
+        Importer.Import(D->getQualifierLoc()), NameInfo,
+        Importer.Import(D->getEllipsisLoc()));
+
+  Importer.Imported(D, ToUsingValue);
+  ToUsingValue->setAccess(D->getAccess());
+  ToUsingValue->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToUsingValue);
+
+  return ToUsingValue;
+}
+
+Decl *ASTNodeImporter::VisitUnresolvedUsingTypenameDecl(
+    UnresolvedUsingTypenameDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD = nullptr;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  UnresolvedUsingTypenameDecl *ToUsing = UnresolvedUsingTypenameDecl::Create(
+        Importer.getToContext(), DC, Importer.Import(D->getUsingLoc()),
+        Importer.Import(D->getTypenameLoc()),
+        Importer.Import(D->getQualifierLoc()), Loc, Name,
+        Importer.Import(D->getEllipsisLoc()));
+
+  Importer.Imported(D, ToUsing);
+  ToUsing->setAccess(D->getAccess());
+  ToUsing->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToUsing);
+
+  return ToUsing;
+}
+
+
 bool ASTNodeImporter::ImportDefinition(ObjCInterfaceDecl *From, 
                                        ObjCInterfaceDecl *To,
                                        ImportDefinitionKind Kind) {
@@ -3465,13 +4055,12 @@
       return nullptr;
   }
 
-  CXXRecordDecl *DTemplated = D->getTemplatedDecl();
-  
+  CXXRecordDecl *FromTemplated = D->getTemplatedDecl();
+
   // Create the declaration that is being templated.
-  // Create the declaration that is being templated.
-  CXXRecordDecl *D2Templated = cast_or_null<CXXRecordDecl>(
-        Importer.Import(DTemplated));
-  if (!D2Templated)
+  CXXRecordDecl *ToTemplated = cast_or_null<CXXRecordDecl>(
+        Importer.Import(FromTemplated));
+  if (!ToTemplated)
     return nullptr;
 
   // Resolve possible cyclic import.
@@ -3479,15 +4068,15 @@
     return AlreadyImported;
 
   // Create the class template declaration itself.
-  TemplateParameterList *TemplateParams
-    = ImportTemplateParameterList(D->getTemplateParameters());
+  TemplateParameterList *TemplateParams =
+      ImportTemplateParameterList(D->getTemplateParameters());
   if (!TemplateParams)
     return nullptr;
 
   ClassTemplateDecl *D2 = ClassTemplateDecl::Create(Importer.getToContext(), DC, 
                                                     Loc, Name, TemplateParams, 
-                                                    D2Templated);
-  D2Templated->setDescribedClassTemplate(D2);    
+                                                    ToTemplated);
+  ToTemplated->setDescribedClassTemplate(D2);
   
   D2->setAccess(D->getAccess());
   D2->setLexicalDeclContext(LexicalDC);
@@ -3495,10 +4084,10 @@
   
   // Note the relationship between the class templates.
   Importer.Imported(D, D2);
-  Importer.Imported(DTemplated, D2Templated);
+  Importer.Imported(FromTemplated, ToTemplated);
 
-  if (DTemplated->isCompleteDefinition() &&
-      !D2Templated->isCompleteDefinition()) {
+  if (FromTemplated->isCompleteDefinition() &&
+      !ToTemplated->isCompleteDefinition()) {
     // FIXME: Import definition!
   }
   
@@ -3573,14 +4162,9 @@
 
       // Import TemplateArgumentListInfo
       TemplateArgumentListInfo ToTAInfo;
-      auto &ASTTemplateArgs = *PartialSpec->getTemplateArgsAsWritten();
-      for (unsigned I = 0, E = ASTTemplateArgs.NumTemplateArgs; I < E; ++I) {
-        bool Error = false;
-        auto ToLoc = ImportTemplateArgumentLoc(ASTTemplateArgs[I], Error);
-        if (Error)
-          return nullptr;
-        ToTAInfo.addArgument(ToLoc);
-      }
+      const auto &ASTTemplateArgs = *PartialSpec->getTemplateArgsAsWritten();
+      if (ImportTemplateArgumentListInfo(ASTTemplateArgs, ToTAInfo))
+        return nullptr;
 
       QualType CanonInjType = Importer.Import(
             PartialSpec->getInjectedSpecializationType());
@@ -3711,21 +4295,8 @@
     return nullptr;
 
   // Create the declaration that is being templated.
-  SourceLocation StartLoc = Importer.Import(DTemplated->getLocStart());
-  SourceLocation IdLoc = Importer.Import(DTemplated->getLocation());
-  TypeSourceInfo *TInfo = Importer.Import(DTemplated->getTypeSourceInfo());
-  VarDecl *D2Templated = VarDecl::Create(Importer.getToContext(), DC, StartLoc,
-                                         IdLoc, Name.getAsIdentifierInfo(), T,
-                                         TInfo, DTemplated->getStorageClass());
-  D2Templated->setAccess(DTemplated->getAccess());
-  D2Templated->setQualifierInfo(Importer.Import(DTemplated->getQualifierLoc()));
-  D2Templated->setLexicalDeclContext(LexicalDC);
-
-  // Importer.Imported(DTemplated, D2Templated);
-  // LexicalDC->addDeclInternal(D2Templated);
-
-  // Merge the initializer.
-  if (ImportDefinition(DTemplated, D2Templated))
+  auto *ToTemplated = dyn_cast_or_null<VarDecl>(Importer.Import(DTemplated));
+  if (!ToTemplated)
     return nullptr;
 
   // Create the variable template declaration itself.
@@ -3734,24 +4305,24 @@
   if (!TemplateParams)
     return nullptr;
 
-  VarTemplateDecl *D2 = VarTemplateDecl::Create(
-      Importer.getToContext(), DC, Loc, Name, TemplateParams, D2Templated);
-  D2Templated->setDescribedVarTemplate(D2);
+  VarTemplateDecl *ToVarTD = VarTemplateDecl::Create(
+      Importer.getToContext(), DC, Loc, Name, TemplateParams, ToTemplated);
+  ToTemplated->setDescribedVarTemplate(ToVarTD);
 
-  D2->setAccess(D->getAccess());
-  D2->setLexicalDeclContext(LexicalDC);
-  LexicalDC->addDeclInternal(D2);
+  ToVarTD->setAccess(D->getAccess());
+  ToVarTD->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToVarTD);
 
   // Note the relationship between the variable templates.
-  Importer.Imported(D, D2);
-  Importer.Imported(DTemplated, D2Templated);
+  Importer.Imported(D, ToVarTD);
+  Importer.Imported(DTemplated, ToTemplated);
 
   if (DTemplated->isThisDeclarationADefinition() &&
-      !D2Templated->isThisDeclarationADefinition()) {
+      !ToTemplated->isThisDeclarationADefinition()) {
     // FIXME: Import definition!
   }
 
-  return D2;
+  return ToVarTD;
 }
 
 Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(
@@ -3820,14 +4391,57 @@
     QualType T = Importer.Import(D->getType());
     if (T.isNull())
       return nullptr;
-    TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
 
+    TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
+    if (D->getTypeSourceInfo() && !TInfo)
+      return nullptr;
+
+    TemplateArgumentListInfo ToTAInfo;
+    if (ImportTemplateArgumentListInfo(D->getTemplateArgsInfo(), ToTAInfo))
+      return nullptr;
+
+    using PartVarSpecDecl = VarTemplatePartialSpecializationDecl;
     // Create a new specialization.
-    D2 = VarTemplateSpecializationDecl::Create(
-        Importer.getToContext(), DC, StartLoc, IdLoc, VarTemplate, T, TInfo,
-        D->getStorageClass(), TemplateArgs);
+    if (auto *FromPartial = dyn_cast<PartVarSpecDecl>(D)) {
+      // Import TemplateArgumentListInfo
+      TemplateArgumentListInfo ArgInfos;
+      const auto *FromTAArgsAsWritten = FromPartial->getTemplateArgsAsWritten();
+      // NOTE: FromTAArgsAsWritten and template parameter list are non-null.
+      if (ImportTemplateArgumentListInfo(*FromTAArgsAsWritten, ArgInfos))
+        return nullptr;
+
+      TemplateParameterList *ToTPList = ImportTemplateParameterList(
+            FromPartial->getTemplateParameters());
+      if (!ToTPList)
+        return nullptr;
+
+      auto *ToPartial = PartVarSpecDecl::Create(
+          Importer.getToContext(), DC, StartLoc, IdLoc, ToTPList, VarTemplate,
+          T, TInfo, D->getStorageClass(), TemplateArgs, ArgInfos);
+
+      auto *FromInst = FromPartial->getInstantiatedFromMember();
+      auto *ToInst = cast_or_null<PartVarSpecDecl>(Importer.Import(FromInst));
+      if (FromInst && !ToInst)
+        return nullptr;
+
+      ToPartial->setInstantiatedFromMember(ToInst);
+      if (FromPartial->isMemberSpecialization())
+        ToPartial->setMemberSpecialization();
+
+      D2 = ToPartial;
+
+    } else { // Full specialization
+      D2 = VarTemplateSpecializationDecl::Create(
+          Importer.getToContext(), DC, StartLoc, IdLoc, VarTemplate, T, TInfo,
+          D->getStorageClass(), TemplateArgs);
+    }
+
+    SourceLocation POI = D->getPointOfInstantiation();
+    if (POI.isValid())
+      D2->setPointOfInstantiation(Importer.Import(POI));
+
     D2->setSpecializationKind(D->getSpecializationKind());
-    D2->setTemplateArgsInfo(D->getTemplateArgsInfo());
+    D2->setTemplateArgsInfo(ToTAInfo);
 
     // Add this specialization to the class template.
     VarTemplate->AddSpecialization(D2, InsertPos);
@@ -3835,18 +4449,87 @@
     // Import the qualifier, if any.
     D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
 
+    if (D->isConstexpr())
+      D2->setConstexpr(true);
+
     // Add the specialization to this context.
     D2->setLexicalDeclContext(LexicalDC);
     LexicalDC->addDeclInternal(D2);
+
+    D2->setAccess(D->getAccess());
   }
+
   Importer.Imported(D, D2);
 
-  if (D->isThisDeclarationADefinition() && ImportDefinition(D, D2))
+  // NOTE: isThisDeclarationADefinition() can return DeclarationOnly even if
+  // declaration has initializer. Should this be fixed in the AST?.. Anyway,
+  // we have to check the declaration for initializer - otherwise, it won't be
+  // imported.
+  if ((D->isThisDeclarationADefinition() || D->hasInit()) &&
+      ImportDefinition(D, D2))
     return nullptr;
 
   return D2;
 }
 
+Decl *ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD;
+
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+
+  if (ToD)
+    return ToD;
+
+  // Try to find a function in our own ("to") context with the same name, same
+  // type, and in the same context as the function we're importing.
+  if (!LexicalDC->isFunctionOrMethod()) {
+    unsigned IDNS = Decl::IDNS_Ordinary;
+    SmallVector<NamedDecl *, 2> FoundDecls;
+    DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
+    for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
+      if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
+        continue;
+
+      if (FunctionTemplateDecl *FoundFunction =
+              dyn_cast<FunctionTemplateDecl>(FoundDecls[I])) {
+        if (FoundFunction->hasExternalFormalLinkage() &&
+            D->hasExternalFormalLinkage()) {
+          if (IsStructuralMatch(D, FoundFunction)) {
+            Importer.Imported(D, FoundFunction);
+            // FIXME: Actually try to merge the body and other attributes.
+            return FoundFunction;
+          }
+        }
+      }
+    }
+  }
+
+  TemplateParameterList *Params =
+      ImportTemplateParameterList(D->getTemplateParameters());
+  if (!Params)
+    return nullptr;
+
+  FunctionDecl *TemplatedFD =
+      cast_or_null<FunctionDecl>(Importer.Import(D->getTemplatedDecl()));
+  if (!TemplatedFD)
+    return nullptr;
+
+  FunctionTemplateDecl *ToFunc = FunctionTemplateDecl::Create(
+      Importer.getToContext(), DC, Loc, Name, Params, TemplatedFD);
+
+  TemplatedFD->setDescribedFunctionTemplate(ToFunc);
+  ToFunc->setAccess(D->getAccess());
+  ToFunc->setLexicalDeclContext(LexicalDC);
+  Importer.Imported(D, ToFunc);
+
+  LexicalDC->addDeclInternal(ToFunc);
+  return ToFunc;
+}
+
 //----------------------------------------------------------------------------
 // Import Statements
 //----------------------------------------------------------------------------
@@ -3971,9 +4654,8 @@
 
   SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
   SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
-  return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
-                                                    ToStmts,
-                                                    ToLBraceLoc, ToRBraceLoc);
+  return CompoundStmt::Create(Importer.getToContext(), ToStmts, ToLBraceLoc,
+                              ToRBraceLoc);
 }
 
 Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) {
@@ -4461,13 +5143,8 @@
   TemplateArgumentListInfo ToTAInfo;
   TemplateArgumentListInfo *ResInfo = nullptr;
   if (E->hasExplicitTemplateArgs()) {
-    for (const auto &FromLoc : E->template_arguments()) {
-      bool Error = false;
-      TemplateArgumentLoc ToTALoc = ImportTemplateArgumentLoc(FromLoc, Error);
-      if (Error)
-        return nullptr;
-      ToTAInfo.addArgument(ToTALoc);
-    }
+    if (ImportTemplateArgumentListInfo(E->template_arguments(), ToTAInfo))
+      return nullptr;
     ResInfo = &ToTAInfo;
   }
 
@@ -4687,14 +5364,13 @@
   if (!SubExpr)
     return nullptr;
 
-  return new (Importer.getToContext()) UnaryOperator(SubExpr, E->getOpcode(),
-                                                     T, E->getValueKind(),
-                                                     E->getObjectKind(),
-                                         Importer.Import(E->getOperatorLoc()));                                        
+  return new (Importer.getToContext()) UnaryOperator(
+      SubExpr, E->getOpcode(), T, E->getValueKind(), E->getObjectKind(),
+      Importer.Import(E->getOperatorLoc()), E->canOverflow());
 }
 
-Expr *ASTNodeImporter::VisitUnaryExprOrTypeTraitExpr(
-                                            UnaryExprOrTypeTraitExpr *E) {
+Expr *
+ASTNodeImporter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) {
   QualType ResultType = Importer.Import(E->getType());
   
   if (E->isArgumentType()) {
@@ -5134,6 +5810,11 @@
   if (T.isNull())
     return nullptr;
 
+
+  TypeSourceInfo *TInfo = Importer.Import(CE->getTypeSourceInfo());
+  if (!TInfo)
+    return nullptr;
+
   SmallVector<Expr *, 8> Args(CE->getNumArgs());
   if (ImportContainerChecked(CE->arguments(), Args))
     return nullptr;
@@ -5143,18 +5824,11 @@
   if (!Ctor)
     return nullptr;
 
-  return CXXTemporaryObjectExpr::Create(
-        Importer.getToContext(), T,
-        Importer.Import(CE->getLocStart()),
-        Ctor,
-        CE->isElidable(),
-        Args,
-        CE->hadMultipleCandidates(),
-        CE->isListInitialization(),
-        CE->isStdInitListInitialization(),
-        CE->requiresZeroInitialization(),
-        CE->getConstructionKind(),
-        Importer.Import(CE->getParenOrBraceRange()));
+  return new (Importer.getToContext()) CXXTemporaryObjectExpr(
+      Importer.getToContext(), Ctor, T, TInfo, Args,
+      Importer.Import(CE->getParenOrBraceRange()), CE->hadMultipleCandidates(),
+      CE->isListInitialization(), CE->isStdInitListInitialization(),
+      CE->requiresZeroInitialization());
 }
 
 Expr *
@@ -5180,6 +5854,45 @@
   return ToMTE;
 }
 
+Expr *ASTNodeImporter::VisitPackExpansionExpr(PackExpansionExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *Pattern = Importer.Import(E->getPattern());
+  if (!Pattern)
+    return nullptr;
+
+  return new (Importer.getToContext()) PackExpansionExpr(
+        T, Pattern, Importer.Import(E->getEllipsisLoc()),
+        E->getNumExpansions());
+}
+
+Expr *ASTNodeImporter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
+  auto *Pack = cast_or_null<NamedDecl>(Importer.Import(E->getPack()));
+  if (!Pack)
+    return nullptr;
+
+  Optional<unsigned> Length;
+
+  if (!E->isValueDependent())
+    Length = E->getPackLength();
+
+  SmallVector<TemplateArgument, 8> PartialArguments;
+  if (E->isPartiallySubstituted()) {
+    if (ImportTemplateArguments(E->getPartialArguments().data(),
+                                E->getPartialArguments().size(),
+                                PartialArguments))
+      return nullptr;
+  }
+
+  return SizeOfPackExpr::Create(
+      Importer.getToContext(), Importer.Import(E->getOperatorLoc()), Pack,
+      Importer.Import(E->getPackLoc()), Importer.Import(E->getRParenLoc()),
+      Length, PartialArguments);
+}
+
+
 Expr *ASTNodeImporter::VisitCXXNewExpr(CXXNewExpr *CE) {
   QualType T = Importer.Import(CE->getType());
   if (T.isNull())
@@ -5363,6 +6076,137 @@
                             E->getObjectKind());
 }
 
+Expr *ASTNodeImporter::VisitCXXPseudoDestructorExpr(
+    CXXPseudoDestructorExpr *E) {
+
+  Expr *BaseE = Importer.Import(E->getBase());
+  if (!BaseE)
+    return nullptr;
+
+  TypeSourceInfo *ScopeInfo = Importer.Import(E->getScopeTypeInfo());
+  if (!ScopeInfo && E->getScopeTypeInfo())
+    return nullptr;
+
+  PseudoDestructorTypeStorage Storage;
+  if (IdentifierInfo *FromII = E->getDestroyedTypeIdentifier()) {
+    IdentifierInfo *ToII = Importer.Import(FromII);
+    if (!ToII)
+      return nullptr;
+    Storage = PseudoDestructorTypeStorage(
+          ToII, Importer.Import(E->getDestroyedTypeLoc()));
+  } else {
+    TypeSourceInfo *TI = Importer.Import(E->getDestroyedTypeInfo());
+    if (!TI)
+      return nullptr;
+    Storage = PseudoDestructorTypeStorage(TI);
+  }
+
+  return new (Importer.getToContext()) CXXPseudoDestructorExpr(
+        Importer.getToContext(), BaseE, E->isArrow(),
+        Importer.Import(E->getOperatorLoc()),
+        Importer.Import(E->getQualifierLoc()),
+        ScopeInfo, Importer.Import(E->getColonColonLoc()),
+        Importer.Import(E->getTildeLoc()), Storage);
+}
+
+Expr *ASTNodeImporter::VisitCXXDependentScopeMemberExpr(
+    CXXDependentScopeMemberExpr *E) {
+  Expr *Base = nullptr;
+  if (!E->isImplicitAccess()) {
+    Base = Importer.Import(E->getBase());
+    if (!Base)
+      return nullptr;
+  }
+
+  QualType BaseType = Importer.Import(E->getBaseType());
+  if (BaseType.isNull())
+    return nullptr;
+
+  TemplateArgumentListInfo ToTAInfo, *ResInfo = nullptr;
+  if (E->hasExplicitTemplateArgs()) {
+    if (ImportTemplateArgumentListInfo(E->getLAngleLoc(), E->getRAngleLoc(),
+                                       E->template_arguments(), ToTAInfo))
+      return nullptr;
+    ResInfo = &ToTAInfo;
+  }
+
+  DeclarationName Name = Importer.Import(E->getMember());
+  if (!E->getMember().isEmpty() && Name.isEmpty())
+    return nullptr;
+
+  DeclarationNameInfo MemberNameInfo(Name, Importer.Import(E->getMemberLoc()));
+  // Import additional name location/type info.
+  ImportDeclarationNameLoc(E->getMemberNameInfo(), MemberNameInfo);
+  auto ToFQ = Importer.Import(E->getFirstQualifierFoundInScope());
+  if (!ToFQ && E->getFirstQualifierFoundInScope())
+    return nullptr;
+
+  return CXXDependentScopeMemberExpr::Create(
+      Importer.getToContext(), Base, BaseType, E->isArrow(),
+      Importer.Import(E->getOperatorLoc()),
+      Importer.Import(E->getQualifierLoc()),
+      Importer.Import(E->getTemplateKeywordLoc()),
+      cast_or_null<NamedDecl>(ToFQ), MemberNameInfo, ResInfo);
+}
+
+Expr *ASTNodeImporter::VisitCXXUnresolvedConstructExpr(
+    CXXUnresolvedConstructExpr *CE) {
+
+  unsigned NumArgs = CE->arg_size();
+
+  llvm::SmallVector<Expr *, 8> ToArgs(NumArgs);
+  if (ImportArrayChecked(CE->arg_begin(), CE->arg_end(), ToArgs.begin()))
+    return nullptr;
+
+  return CXXUnresolvedConstructExpr::Create(
+      Importer.getToContext(), Importer.Import(CE->getTypeSourceInfo()),
+      Importer.Import(CE->getLParenLoc()), llvm::makeArrayRef(ToArgs),
+      Importer.Import(CE->getRParenLoc()));
+}
+
+Expr *ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) {
+  CXXRecordDecl *NamingClass =
+      cast_or_null<CXXRecordDecl>(Importer.Import(E->getNamingClass()));
+  if (E->getNamingClass() && !NamingClass)
+    return nullptr;
+
+  DeclarationName Name = Importer.Import(E->getName());
+  if (E->getName() && !Name)
+    return nullptr;
+
+  DeclarationNameInfo NameInfo(Name, Importer.Import(E->getNameLoc()));
+  // Import additional name location/type info.
+  ImportDeclarationNameLoc(E->getNameInfo(), NameInfo);
+
+  UnresolvedSet<8> ToDecls;
+  for (Decl *D : E->decls()) {
+    if (NamedDecl *To = cast_or_null<NamedDecl>(Importer.Import(D)))
+      ToDecls.addDecl(To);
+    else
+      return nullptr;
+  }
+
+  TemplateArgumentListInfo ToTAInfo, *ResInfo = nullptr;
+  if (E->hasExplicitTemplateArgs()) {
+    if (ImportTemplateArgumentListInfo(E->getLAngleLoc(), E->getRAngleLoc(),
+                                       E->template_arguments(), ToTAInfo))
+      return nullptr;
+    ResInfo = &ToTAInfo;
+  }
+
+  if (ResInfo || E->getTemplateKeywordLoc().isValid())
+    return UnresolvedLookupExpr::Create(
+        Importer.getToContext(), NamingClass,
+        Importer.Import(E->getQualifierLoc()),
+        Importer.Import(E->getTemplateKeywordLoc()), NameInfo, E->requiresADL(),
+        ResInfo, ToDecls.begin(), ToDecls.end());
+
+  return UnresolvedLookupExpr::Create(
+      Importer.getToContext(), NamingClass,
+      Importer.Import(E->getQualifierLoc()), NameInfo, E->requiresADL(),
+      E->isOverloaded(), ToDecls.begin(), ToDecls.end());
+}
+
 Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5373,16 +6217,9 @@
     return nullptr;
 
   unsigned NumArgs = E->getNumArgs();
-
   llvm::SmallVector<Expr *, 2> ToArgs(NumArgs);
-
-  for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai) {
-    Expr *FromArg = E->getArg(ai);
-    Expr *ToArg = Importer.Import(FromArg);
-    if (!ToArg)
-      return nullptr;
-    ToArgs[ai] = ToArg;
-  }
+  if (ImportContainerChecked(E->arguments(), ToArgs))
+     return nullptr;
 
   Expr **ToArgs_Copied = new (Importer.getToContext()) 
     Expr*[NumArgs];
@@ -5390,12 +6227,86 @@
   for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai)
     ToArgs_Copied[ai] = ToArgs[ai];
 
+  if (const auto *OCE = dyn_cast<CXXOperatorCallExpr>(E)) {
+    return new (Importer.getToContext()) CXXOperatorCallExpr(
+          Importer.getToContext(), OCE->getOperator(), ToCallee, ToArgs, T,
+          OCE->getValueKind(), Importer.Import(OCE->getRParenLoc()),
+          OCE->getFPFeatures());
+  }
+
   return new (Importer.getToContext())
     CallExpr(Importer.getToContext(), ToCallee, 
              llvm::makeArrayRef(ToArgs_Copied, NumArgs), T, E->getValueKind(),
              Importer.Import(E->getRParenLoc()));
 }
 
+Optional<LambdaCapture>
+ASTNodeImporter::ImportLambdaCapture(const LambdaCapture &From) {
+  VarDecl *Var = nullptr;
+  if (From.capturesVariable()) {
+    Var = cast_or_null<VarDecl>(Importer.Import(From.getCapturedVar()));
+    if (!Var)
+      return None;
+  }
+
+  return LambdaCapture(Importer.Import(From.getLocation()), From.isImplicit(),
+                       From.getCaptureKind(), Var,
+                       From.isPackExpansion()
+                         ? Importer.Import(From.getEllipsisLoc())
+                         : SourceLocation());
+}
+
+Expr *ASTNodeImporter::VisitLambdaExpr(LambdaExpr *LE) {
+  CXXRecordDecl *FromClass = LE->getLambdaClass();
+  auto *ToClass = dyn_cast_or_null<CXXRecordDecl>(Importer.Import(FromClass));
+  if (!ToClass)
+    return nullptr;
+
+  // NOTE: lambda classes are created with BeingDefined flag set up.
+  // It means that ImportDefinition doesn't work for them and we should fill it
+  // manually.
+  if (ToClass->isBeingDefined()) {
+    for (auto FromField : FromClass->fields()) {
+      auto *ToField = cast_or_null<FieldDecl>(Importer.Import(FromField));
+      if (!ToField)
+        return nullptr;
+    }
+  }
+
+  auto *ToCallOp = dyn_cast_or_null<CXXMethodDecl>(
+        Importer.Import(LE->getCallOperator()));
+  if (!ToCallOp)
+    return nullptr;
+
+  ToClass->completeDefinition();
+
+  unsigned NumCaptures = LE->capture_size();
+  SmallVector<LambdaCapture, 8> Captures;
+  Captures.reserve(NumCaptures);
+  for (const auto &FromCapture : LE->captures()) {
+    if (auto ToCapture = ImportLambdaCapture(FromCapture))
+      Captures.push_back(*ToCapture);
+    else
+      return nullptr;
+  }
+
+  SmallVector<Expr *, 8> InitCaptures(NumCaptures);
+  if (ImportContainerChecked(LE->capture_inits(), InitCaptures))
+    return nullptr;
+
+  return LambdaExpr::Create(Importer.getToContext(), ToClass,
+                            Importer.Import(LE->getIntroducerRange()),
+                            LE->getCaptureDefault(),
+                            Importer.Import(LE->getCaptureDefaultLoc()),
+                            Captures,
+                            LE->hasExplicitParameters(),
+                            LE->hasExplicitResultType(),
+                            InitCaptures,
+                            Importer.Import(LE->getLocEnd()),
+                            LE->containsUnexpandedParameterPack());
+}
+
+
 Expr *ASTNodeImporter::VisitInitListExpr(InitListExpr *ILE) {
   QualType T = Importer.Import(ILE->getType());
   if (T.isNull())
@@ -5529,6 +6440,48 @@
         Replacement);
 }
 
+Expr *ASTNodeImporter::VisitTypeTraitExpr(TypeTraitExpr *E) {
+  QualType ToType = Importer.Import(E->getType());
+  if (ToType.isNull())
+    return nullptr;
+
+  SmallVector<TypeSourceInfo *, 4> ToArgs(E->getNumArgs());
+  if (ImportContainerChecked(E->getArgs(), ToArgs))
+    return nullptr;
+
+  // According to Sema::BuildTypeTrait(), if E is value-dependent,
+  // Value is always false.
+  bool ToValue = false;
+  if (!E->isValueDependent())
+    ToValue = E->getValue();
+
+  return TypeTraitExpr::Create(
+      Importer.getToContext(), ToType, Importer.Import(E->getLocStart()),
+      E->getTrait(), ToArgs, Importer.Import(E->getLocEnd()), ToValue);
+}
+
+Expr *ASTNodeImporter::VisitCXXTypeidExpr(CXXTypeidExpr *E) {
+  QualType ToType = Importer.Import(E->getType());
+  if (ToType.isNull())
+    return nullptr;
+
+  if (E->isTypeOperand()) {
+    TypeSourceInfo *TSI = Importer.Import(E->getTypeOperandSourceInfo());
+    if (!TSI)
+      return nullptr;
+
+    return new (Importer.getToContext())
+        CXXTypeidExpr(ToType, TSI, Importer.Import(E->getSourceRange()));
+  }
+
+  Expr *Op = Importer.Import(E->getExprOperand());
+  if (!Op)
+    return nullptr;
+
+  return new (Importer.getToContext())
+      CXXTypeidExpr(ToType, Op, Importer.Import(E->getSourceRange()));
+}
+
 void ASTNodeImporter::ImportOverrides(CXXMethodDecl *ToMethod,
                                       CXXMethodDecl *FromMethod) {
   for (auto *FromOverriddenMethod : FromMethod->overridden_methods())
diff --git a/lib/AST/ASTStructuralEquivalence.cpp b/lib/AST/ASTStructuralEquivalence.cpp
index 0df8e56..a563c7e 100644
--- a/lib/AST/ASTStructuralEquivalence.cpp
+++ b/lib/AST/ASTStructuralEquivalence.cpp
@@ -1153,12 +1153,22 @@
                                   D2->getTemplateParameters());
 }
 
+static bool IsTemplateDeclCommonStructurallyEquivalent(
+    StructuralEquivalenceContext &Ctx, TemplateDecl *D1, TemplateDecl *D2) {
+  if (!IsStructurallyEquivalent(D1->getIdentifier(), D2->getIdentifier()))
+    return false;
+  if (!D1->getIdentifier()) // Special name
+    if (D1->getNameAsString() != D2->getNameAsString())
+      return false;
+  return IsStructurallyEquivalent(Ctx, D1->getTemplateParameters(),
+                                  D2->getTemplateParameters());
+}
+
 static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                      ClassTemplateDecl *D1,
                                      ClassTemplateDecl *D2) {
   // Check template parameters.
-  if (!IsStructurallyEquivalent(Context, D1->getTemplateParameters(),
-                                D2->getTemplateParameters()))
+  if (!IsTemplateDeclCommonStructurallyEquivalent(Context, D1, D2))
     return false;
 
   // Check the templated declaration.
@@ -1166,6 +1176,18 @@
                                           D2->getTemplatedDecl());
 }
 
+static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
+                                     FunctionTemplateDecl *D1,
+                                     FunctionTemplateDecl *D2) {
+  // Check template parameters.
+  if (!IsTemplateDeclCommonStructurallyEquivalent(Context, D1, D2))
+    return false;
+
+  // Check the templated declaration.
+  return Context.IsStructurallyEquivalent(D1->getTemplatedDecl()->getType(),
+                                          D2->getTemplatedDecl()->getType());
+}
+
 /// Determine structural equivalence of two declarations.
 static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                      Decl *D1, Decl *D2) {
@@ -1293,6 +1315,7 @@
         // Record/non-record mismatch.
         Equivalent = false;
       }
+
     } else if (EnumDecl *Enum1 = dyn_cast<EnumDecl>(D1)) {
       if (EnumDecl *Enum2 = dyn_cast<EnumDecl>(D2)) {
         // Check for equivalent enum names.
@@ -1309,6 +1332,7 @@
         // Enum/non-enum mismatch
         Equivalent = false;
       }
+
     } else if (TypedefNameDecl *Typedef1 = dyn_cast<TypedefNameDecl>(D1)) {
       if (TypedefNameDecl *Typedef2 = dyn_cast<TypedefNameDecl>(D2)) {
         if (!::IsStructurallyEquivalent(Typedef1->getIdentifier(),
@@ -1320,17 +1344,30 @@
         // Typedef/non-typedef mismatch.
         Equivalent = false;
       }
+
     } else if (ClassTemplateDecl *ClassTemplate1 =
                    dyn_cast<ClassTemplateDecl>(D1)) {
       if (ClassTemplateDecl *ClassTemplate2 = dyn_cast<ClassTemplateDecl>(D2)) {
-        if (!::IsStructurallyEquivalent(ClassTemplate1->getIdentifier(),
-                                        ClassTemplate2->getIdentifier()) ||
-            !::IsStructurallyEquivalent(*this, ClassTemplate1, ClassTemplate2))
+        if (!::IsStructurallyEquivalent(*this, ClassTemplate1,
+                                        ClassTemplate2))
           Equivalent = false;
       } else {
         // Class template/non-class-template mismatch.
         Equivalent = false;
       }
+
+    } else if (FunctionTemplateDecl *FunctionTemplate1 =
+               dyn_cast<FunctionTemplateDecl>(D1)) {
+      if (FunctionTemplateDecl *FunctionTemplate2 =
+          dyn_cast<FunctionTemplateDecl>(D2)) {
+        if (!::IsStructurallyEquivalent(*this, FunctionTemplate1,
+                                        FunctionTemplate2))
+          Equivalent = false;
+      } else {
+        // Class template/non-class-template mismatch.
+        Equivalent = false;
+      }
+
     } else if (TemplateTypeParmDecl *TTP1 =
                    dyn_cast<TemplateTypeParmDecl>(D1)) {
       if (TemplateTypeParmDecl *TTP2 = dyn_cast<TemplateTypeParmDecl>(D2)) {
@@ -1350,6 +1387,7 @@
         // Kind mismatch.
         Equivalent = false;
       }
+
     } else if (TemplateTemplateParmDecl *TTP1 =
                    dyn_cast<TemplateTemplateParmDecl>(D1)) {
       if (TemplateTemplateParmDecl *TTP2 =
diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt
index db771b5..a6f1027 100644
--- a/lib/AST/CMakeLists.txt
+++ b/lib/AST/CMakeLists.txt
@@ -49,6 +49,7 @@
   ODRHash.cpp
   OpenMPClause.cpp
   ParentMap.cpp
+  QualTypeNames.cpp
   RawCommentList.cpp
   RecordLayout.cpp
   RecordLayoutBuilder.cpp
diff --git a/lib/AST/CXXABI.h b/lib/AST/CXXABI.h
index 924ef00..06295b5 100644
--- a/lib/AST/CXXABI.h
+++ b/lib/AST/CXXABI.h
@@ -31,9 +31,16 @@
 public:
   virtual ~CXXABI();
 
-  /// Returns the width and alignment of a member pointer in bits.
-  virtual std::pair<uint64_t, unsigned>
-  getMemberPointerWidthAndAlign(const MemberPointerType *MPT) const = 0;
+  struct MemberPointerInfo {
+    uint64_t Width;
+    unsigned Align;
+    bool HasPadding;
+  };
+
+  /// Returns the width and alignment of a member pointer in bits, as well as
+  /// whether it has padding.
+  virtual MemberPointerInfo
+  getMemberPointerInfo(const MemberPointerType *MPT) const = 0;
 
   /// Returns the default calling convention for C++ methods.
   virtual CallingConv getDefaultMethodCallConv(bool isVariadic) const = 0;
diff --git a/lib/AST/CXXInheritance.cpp b/lib/AST/CXXInheritance.cpp
index fc4d8b1..9314cfd 100644
--- a/lib/AST/CXXInheritance.cpp
+++ b/lib/AST/CXXInheritance.cpp
@@ -1,4 +1,4 @@
-//===------ CXXInheritance.cpp - C++ Inheritance ----------------*- C++ -*-===//
+//===- CXXInheritance.cpp - C++ Inheritance -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,13 +10,27 @@
 // This file provides routines that help analyzing C++ inheritance hierarchies.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/RecordLayout.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include <algorithm>
+#include <utility>
+#include <cassert>
+#include <vector>
 
 using namespace clang;
 
@@ -26,7 +40,7 @@
   assert(NumDeclsFound == 0 && !DeclsFound &&
          "Already computed the set of declarations");
 
-  llvm::SetVector<NamedDecl *, SmallVector<NamedDecl *, 8> > Decls;
+  llvm::SetVector<NamedDecl *, SmallVector<NamedDecl *, 8>> Decls;
   for (paths_iterator Path = begin(), PathEnd = end(); Path != PathEnd; ++Path)
     Decls.insert(Path->Decls.front());
 
@@ -419,8 +433,8 @@
 
 static bool findOrdinaryMember(RecordDecl *BaseRecord, CXXBasePath &Path,
                                DeclarationName Name) {
-  const unsigned IDNS = clang::Decl::IDNS_Ordinary | clang::Decl::IDNS_Tag |
-                        clang::Decl::IDNS_Member;
+  const unsigned IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag |
+                        Decl::IDNS_Member;
   for (Path.Decls = BaseRecord->lookup(Name);
        !Path.Decls.empty();
        Path.Decls = Path.Decls.slice(1)) {
@@ -550,26 +564,27 @@
   }
 }
 
-
 namespace {
-  class FinalOverriderCollector {
-    /// \brief The number of subobjects of a given class type that
-    /// occur within the class hierarchy.
-    llvm::DenseMap<const CXXRecordDecl *, unsigned> SubobjectCount;
 
-    /// \brief Overriders for each virtual base subobject.
-    llvm::DenseMap<const CXXRecordDecl *, CXXFinalOverriderMap *> VirtualOverriders;
+class FinalOverriderCollector {
+  /// \brief The number of subobjects of a given class type that
+  /// occur within the class hierarchy.
+  llvm::DenseMap<const CXXRecordDecl *, unsigned> SubobjectCount;
 
-    CXXFinalOverriderMap FinalOverriders;
+  /// \brief Overriders for each virtual base subobject.
+  llvm::DenseMap<const CXXRecordDecl *, CXXFinalOverriderMap *> VirtualOverriders;
 
-  public:
-    ~FinalOverriderCollector();
+  CXXFinalOverriderMap FinalOverriders;
 
-    void Collect(const CXXRecordDecl *RD, bool VirtualBase,
-                 const CXXRecordDecl *InVirtualSubobject,
-                 CXXFinalOverriderMap &Overriders);
-  };
-}
+public:
+  ~FinalOverriderCollector();
+
+  void Collect(const CXXRecordDecl *RD, bool VirtualBase,
+               const CXXRecordDecl *InVirtualSubobject,
+               CXXFinalOverriderMap &Overriders);
+};
+
+} // namespace
 
 void FinalOverriderCollector::Collect(const CXXRecordDecl *RD, 
                                       bool VirtualBase,
@@ -622,8 +637,7 @@
                                OMEnd = BaseOverriders->end();
            OM != OMEnd;
            ++OM) {
-        const CXXMethodDecl *CanonOM
-          = cast<CXXMethodDecl>(OM->first->getCanonicalDecl());
+        const CXXMethodDecl *CanonOM = OM->first->getCanonicalDecl();
         Overriders[CanonOM].add(OM->second);
       }
     }
@@ -634,10 +648,12 @@
     if (!M->isVirtual())
       continue;
 
-    CXXMethodDecl *CanonM = cast<CXXMethodDecl>(M->getCanonicalDecl());
+    CXXMethodDecl *CanonM = M->getCanonicalDecl();
+    using OverriddenMethodsRange =
+        llvm::iterator_range<CXXMethodDecl::method_iterator>;
+    OverriddenMethodsRange OverriddenMethods = CanonM->overridden_methods();
 
-    if (CanonM->begin_overridden_methods()
-                                       == CanonM->end_overridden_methods()) {
+    if (OverriddenMethods.begin() == OverriddenMethods.end()) {
       // This is a new virtual function that does not override any
       // other virtual function. Add it to the map of virtual
       // functions for which we are tracking overridders. 
@@ -656,11 +672,7 @@
     // overrider. To do so, we dig down to the original virtual
     // functions using data recursion and update all of the methods it
     // overrides.
-    typedef llvm::iterator_range<CXXMethodDecl::method_iterator>
-        OverriddenMethods;
-    SmallVector<OverriddenMethods, 4> Stack;
-    Stack.push_back(llvm::make_range(CanonM->begin_overridden_methods(),
-                                     CanonM->end_overridden_methods()));
+    SmallVector<OverriddenMethodsRange, 4> Stack(1, OverriddenMethods);
     while (!Stack.empty()) {
       for (const CXXMethodDecl *OM : Stack.pop_back_val()) {
         const CXXMethodDecl *CanonOM = OM->getCanonicalDecl();
@@ -678,14 +690,13 @@
                                UniqueVirtualMethod(CanonM, SubobjectNumber,
                                                    InVirtualSubobject));
 
-        if (CanonOM->begin_overridden_methods()
-                                       == CanonOM->end_overridden_methods())
+        auto OverriddenMethods = CanonOM->overridden_methods();
+        if (OverriddenMethods.begin() == OverriddenMethods.end())
           continue;
 
         // Continue recursion to the methods that this virtual method
         // overrides.
-        Stack.push_back(llvm::make_range(CanonOM->begin_overridden_methods(),
-                                         CanonOM->end_overridden_methods()));
+        Stack.push_back(OverriddenMethods);
       }
     }
 
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp
index f27df1d..b640dcc 100644
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -1,4 +1,4 @@
-//===--- Decl.cpp - Declaration AST Node Implementation -------------------===//
+//===- Decl.cpp - Declaration AST Node Implementation ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,24 +16,59 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/ASTMutationListener.h"
-#include "clang/AST/Attr.h"
+#include "clang/AST/CanonicalType.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/ODRHash.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/Redeclarable.h"
 #include "clang/AST/Stmt.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Linkage.h"
 #include "clang/Basic/Module.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SanitizerBlacklist.h"
+#include "clang/Basic/Sanitizers.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/Visibility.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
 
 using namespace clang;
 
@@ -48,7 +83,7 @@
 
 TranslationUnitDecl::TranslationUnitDecl(ASTContext &ctx)
     : Decl(TranslationUnit, nullptr, SourceLocation()),
-      DeclContext(TranslationUnit), Ctx(ctx), AnonymousNamespace(nullptr) {}
+      DeclContext(TranslationUnit), Ctx(ctx) {}
 
 //===----------------------------------------------------------------------===//
 // NamedDecl Implementation
@@ -277,12 +312,12 @@
       LV.merge(getLVForType(*Arg.getAsType(), computation));
       continue;
 
-    case TemplateArgument::Declaration:
-      if (const auto *ND = dyn_cast<NamedDecl>(Arg.getAsDecl())) {
-        assert(!usesTypeVisibility(ND));
-        LV.merge(getLVForDecl(ND, computation));
-      }
+    case TemplateArgument::Declaration: {
+      const NamedDecl *ND = Arg.getAsDecl();
+      assert(!usesTypeVisibility(ND));
+      LV.merge(getLVForDecl(ND, computation));
       continue;
+    }
 
     case TemplateArgument::NullPtr:
       LV.merge(getTypeLinkageAndVisibility(Arg.getNullPtrType()));
@@ -744,7 +779,7 @@
     // unique-external linkage, it's not legally usable from outside
     // this translation unit.  However, we should use the C linkage
     // rules instead for extern "C" declarations.
-    if (Context.getLangOpts().CPlusPlus && !Function->isInExternCContext()) {
+    if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Function)) {
       // Only look at the type-as-written. Otherwise, deducing the return type
       // of a function could change its linkage.
       QualType TypeAsWritten = Function->getType();
@@ -980,7 +1015,7 @@
   return LV;
 }
 
-void NamedDecl::anchor() { }
+void NamedDecl::anchor() {}
 
 bool NamedDecl::isLinkageValid() const {
   if (!hasCachedLinkage())
@@ -1130,7 +1165,7 @@
                                                LVComputationKind computation) {
   if (const auto *Function = dyn_cast<FunctionDecl>(D)) {
     if (Function->isInAnonymousNamespace() &&
-        !Function->isInExternCContext())
+        !isFirstInExternCContext(Function))
       return getInternalLinkageFor(Function);
 
     // This is a "void f();" which got merged with a file static.
@@ -1153,7 +1188,7 @@
 
   if (const auto *Var = dyn_cast<VarDecl>(D)) {
     if (Var->hasExternalStorage()) {
-      if (Var->isInAnonymousNamespace() && !Var->isInExternCContext())
+      if (Var->isInAnonymousNamespace() && !isFirstInExternCContext(Var))
         return getInternalLinkageFor(Var);
 
       LinkageInfo LV;
@@ -1408,7 +1443,7 @@
   case Module::ModuleInterfaceUnit:
     return M;
 
-  case Module::GlobalModuleFragment:
+  case Module::GlobalModuleFragment: {
     // External linkage declarations in the global module have no owning module
     // for linkage purposes. But internal linkage declarations in the global
     // module fragment of a particular module are owned by that module for
@@ -1425,6 +1460,7 @@
     }
     return InternalLinkage ? M->Parent : nullptr;
   }
+  }
 
   llvm_unreachable("unknown module kind");
 }
@@ -1458,21 +1494,21 @@
     return;
   }
 
-  typedef SmallVector<const DeclContext *, 8> ContextsTy;
+  using ContextsTy = SmallVector<const DeclContext *, 8>;
   ContextsTy Contexts;
 
-  // Collect contexts.
-  while (Ctx && isa<NamedDecl>(Ctx)) {
-    Contexts.push_back(Ctx);
+  // Collect named contexts.
+  while (Ctx) {
+    if (isa<NamedDecl>(Ctx))
+      Contexts.push_back(Ctx);
     Ctx = Ctx->getParent();
   }
 
-  for (const DeclContext *DC : reverse(Contexts)) {
+  for (const DeclContext *DC : llvm::reverse(Contexts)) {
     if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
       OS << Spec->getName();
       const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-      TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, TemplateArgs.asArray(), P);
+      printTemplateArgumentList(OS, TemplateArgs.asArray(), P);
     } else if (const auto *ND = dyn_cast<NamespaceDecl>(DC)) {
       if (P.SuppressUnwrittenScope &&
           (ND->isAnonymousNamespace() || ND->isInline()))
@@ -1514,7 +1550,10 @@
       // enumerator is declared in the scope that immediately contains
       // the enum-specifier. Each scoped enumerator is declared in the
       // scope of the enumeration.
-      if (ED->isScoped() || ED->getIdentifier())
+      // For the case of unscoped enumerator, do not include in the qualified
+      // name any information about its enum enclosing scope, as its visibility
+      // is global.
+      if (ED->isScoped())
         OS << *ED;
       else
         continue;
@@ -1597,14 +1636,6 @@
                         cast<UnresolvedUsingValueDecl>(OldD)->getQualifier());
   }
 
-  // UsingDirectiveDecl's are not really NamedDecl's, and all have same name.
-  // They can be replaced if they nominate the same namespace.
-  // FIXME: Is this true even if they have different module visibility?
-  if (auto *UD = dyn_cast<UsingDirectiveDecl>(this))
-    return UD->getNominatedNamespace()->getOriginalNamespace() ==
-           cast<UsingDirectiveDecl>(OldD)->getNominatedNamespace()
-               ->getOriginalNamespace();
-
   if (isRedeclarable(getKind())) {
     if (getCanonicalDecl() != OldD->getCanonicalDecl())
       return false;
@@ -1739,11 +1770,9 @@
   return getTemplateOrInnerLocStart(this);
 }
 
-namespace {
-
 // Helper function: returns true if QT is or contains a type
 // having a postfix component.
-bool typeIsPostfix(clang::QualType QT) {
+static bool typeIsPostfix(QualType QT) {
   while (true) {
     const Type* T = QT.getTypePtr();
     switch (T->getTypeClass()) {
@@ -1777,8 +1806,6 @@
   }
 }
 
-} // namespace
-
 SourceRange DeclaratorDecl::getSourceRange() const {
   SourceLocation RangeEnd = getLocation();
   if (TypeSourceInfo *TInfo = getTypeSourceInfo()) {
@@ -1828,7 +1855,7 @@
                  IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo,
                  StorageClass SC)
     : DeclaratorDecl(DK, DC, IdLoc, Id, T, TInfo, StartLoc),
-      redeclarable_base(C), Init() {
+      redeclarable_base(C) {
   static_assert(sizeof(VarDeclBitfields) <= sizeof(unsigned),
                 "VarDeclBitfields too large!");
   static_assert(sizeof(ParmVarDeclBitfields) <= sizeof(unsigned),
@@ -1952,6 +1979,9 @@
 
 VarDecl::DefinitionKind
 VarDecl::isThisDeclarationADefinition(ASTContext &C) const {
+  if (isThisDeclarationADemotedDefinition())
+    return DeclarationOnly;
+
   // C++ [basic.def]p2:
   //   A declaration is a definition unless [...] it contains the 'extern'
   //   specifier or a linkage-specification and neither an initializer [...],
@@ -1965,9 +1995,6 @@
   //
   // FIXME: How do you declare (but not define) a partial specialization of
   // a static data member template outside the containing class?
-  if (isThisDeclarationADemotedDefinition())
-    return DeclarationOnly;
-
   if (isStaticDataMember()) {
     if (isOutOfLine() &&
         !(getCanonicalDecl()->isInline() &&
@@ -2007,9 +2034,12 @@
   // A variable template specialization (other than a static data member
   // template or an explicit specialization) is a declaration until we
   // instantiate its initializer.
-  if (isa<VarTemplateSpecializationDecl>(this) &&
-      getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
-    return DeclarationOnly;
+  if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(this)) {
+    if (VTSD->getTemplateSpecializationKind() != TSK_ExplicitSpecialization &&
+        !isa<VarTemplatePartialSpecializationDecl>(VTSD) &&
+        !VTSD->IsCompleteDefinition)
+      return DeclarationOnly;
+  }
 
   if (hasExternalStorage())
     return DeclarationOnly;
@@ -2393,15 +2423,21 @@
           dyn_cast<VarTemplateSpecializationDecl>(this)) {
     Spec->setSpecializationKind(TSK);
     if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() &&
-        Spec->getPointOfInstantiation().isInvalid())
+        Spec->getPointOfInstantiation().isInvalid()) {
       Spec->setPointOfInstantiation(PointOfInstantiation);
+      if (ASTMutationListener *L = getASTContext().getASTMutationListener())
+        L->InstantiationRequested(this);
+    }
   }
 
   if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) {
     MSI->setTemplateSpecializationKind(TSK);
     if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() &&
-        MSI->getPointOfInstantiation().isInvalid())
+        MSI->getPointOfInstantiation().isInvalid()) {
       MSI->setPointOfInstantiation(PointOfInstantiation);
+      if (ASTMutationListener *L = getASTContext().getASTMutationListener())
+        L->InstantiationRequested(this);
+    }
   }
 }
 
@@ -2533,8 +2569,7 @@
   NamedDecl::getNameForDiagnostic(OS, Policy, Qualified);
   const TemplateArgumentList *TemplateArgs = getTemplateSpecializationArgs();
   if (TemplateArgs)
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, TemplateArgs->asArray(), Policy);
+    printTemplateArgumentList(OS, TemplateArgs->asArray(), Policy);
 }
 
 bool FunctionDecl::isVariadic() const {
@@ -2867,10 +2902,16 @@
       Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
     return 0;
 
+  // CUDA does not have device-side standard library. printf and malloc are the
+  // only special cases that are supported by device-side runtime.
+  if (Context.getLangOpts().CUDA && hasAttr<CUDADeviceAttr>() &&
+      !hasAttr<CUDAHostAttr>() &&
+      !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
+    return 0;
+
   return BuiltinID;
 }
 
-
 /// getNumParams - Return the number of parameters this function must have
 /// based on its FunctionType.  This is the length of the ParamInfo array
 /// after it has been created.
@@ -3382,7 +3423,6 @@
 DependentFunctionTemplateSpecializationInfo(const UnresolvedSetImpl &Ts,
                                       const TemplateArgumentListInfo &TArgs)
   : AngleLocs(TArgs.getLAngleLoc(), TArgs.getRAngleLoc()) {
-
   NumTemplates = Ts.size();
   NumArgs = TArgs.size();
 
@@ -3420,15 +3460,21 @@
     FTSInfo->setTemplateSpecializationKind(TSK);
     if (TSK != TSK_ExplicitSpecialization &&
         PointOfInstantiation.isValid() &&
-        FTSInfo->getPointOfInstantiation().isInvalid())
+        FTSInfo->getPointOfInstantiation().isInvalid()) {
       FTSInfo->setPointOfInstantiation(PointOfInstantiation);
+      if (ASTMutationListener *L = getASTContext().getASTMutationListener())
+        L->InstantiationRequested(this);
+    }
   } else if (MemberSpecializationInfo *MSInfo
              = TemplateOrSpecialization.dyn_cast<MemberSpecializationInfo*>()) {
     MSInfo->setTemplateSpecializationKind(TSK);
     if (TSK != TSK_ExplicitSpecialization &&
         PointOfInstantiation.isValid() &&
-        MSInfo->getPointOfInstantiation().isInvalid())
+        MSInfo->getPointOfInstantiation().isInvalid()) {
       MSInfo->setPointOfInstantiation(PointOfInstantiation);
+      if (ASTMutationListener *L = getASTContext().getASTMutationListener())
+        L->InstantiationRequested(this);
+    }
   } else
     llvm_unreachable("Function cannot have a template specialization kind");
 }
@@ -3567,6 +3613,31 @@
   return 0;
 }
 
+unsigned FunctionDecl::getODRHash() {
+  if (HasODRHash)
+    return ODRHash;
+
+  if (FunctionDecl *Definition = getDefinition()) {
+    if (Definition != this) {
+      HasODRHash = true;
+      ODRHash = Definition->getODRHash();
+      return ODRHash;
+    }
+  }
+
+  if (auto *FT = getInstantiatedFromMemberFunction()) {
+    HasODRHash = true;
+    ODRHash = FT->getODRHash();
+    return ODRHash;
+  }
+
+  class ODRHash Hash;
+  Hash.AddFunctionDecl(this);
+  HasODRHash = true;
+  ODRHash = Hash.CalculateHash();
+  return ODRHash;
+}
+
 //===----------------------------------------------------------------------===//
 // FieldDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -3609,7 +3680,8 @@
   if (CachedFieldIndex) return CachedFieldIndex - 1;
 
   unsigned Index = 0;
-  const RecordDecl *RD = getParent();
+  const RecordDecl *RD = getParent()->getDefinition();
+  assert(RD && "requested index for field of struct with no definition");
 
   for (auto *Field : RD->fields()) {
     Field->getCanonicalDecl()->CachedFieldIndex = Index + 1;
@@ -3745,7 +3817,7 @@
 // EnumDecl Implementation
 //===----------------------------------------------------------------------===//
 
-void EnumDecl::anchor() { }
+void EnumDecl::anchor() {}
 
 EnumDecl *EnumDecl::Create(ASTContext &C, DeclContext *DC,
                            SourceLocation StartLoc, SourceLocation IdLoc,
@@ -3854,12 +3926,12 @@
                        DeclContext *DC, SourceLocation StartLoc,
                        SourceLocation IdLoc, IdentifierInfo *Id,
                        RecordDecl *PrevDecl)
-    : TagDecl(DK, TK, C, DC, IdLoc, Id, PrevDecl, StartLoc) {
-  HasFlexibleArrayMember = false;
-  AnonymousStructOrUnion = false;
-  HasObjectMember = false;
-  HasVolatileMember = false;
-  LoadedFieldsFromExternalStorage = false;
+    : TagDecl(DK, TK, C, DC, IdLoc, Id, PrevDecl, StartLoc),
+      HasFlexibleArrayMember(false), AnonymousStructOrUnion(false),
+      HasObjectMember(false), HasVolatileMember(false),
+      LoadedFieldsFromExternalStorage(false),
+      NonTrivialToPrimitiveDefaultInitialize(false),
+      NonTrivialToPrimitiveCopy(false), NonTrivialToPrimitiveDestroy(false) {
   assert(classof(static_cast<Decl*>(this)) && "Invalid Kind!");
 }
 
@@ -4008,7 +4080,6 @@
   return nullptr;
 }
 
-
 //===----------------------------------------------------------------------===//
 // BlockDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -4054,13 +4125,13 @@
 // Other Decl Allocation/Deallocation Method Implementations
 //===----------------------------------------------------------------------===//
 
-void TranslationUnitDecl::anchor() { }
+void TranslationUnitDecl::anchor() {}
 
 TranslationUnitDecl *TranslationUnitDecl::Create(ASTContext &C) {
   return new (C, (DeclContext *)nullptr) TranslationUnitDecl(C);
 }
 
-void PragmaCommentDecl::anchor() { }
+void PragmaCommentDecl::anchor() {}
 
 PragmaCommentDecl *PragmaCommentDecl::Create(const ASTContext &C,
                                              TranslationUnitDecl *DC,
@@ -4082,7 +4153,7 @@
       PragmaCommentDecl(nullptr, SourceLocation(), PCK_Unknown);
 }
 
-void PragmaDetectMismatchDecl::anchor() { }
+void PragmaDetectMismatchDecl::anchor() {}
 
 PragmaDetectMismatchDecl *
 PragmaDetectMismatchDecl::Create(const ASTContext &C, TranslationUnitDecl *DC,
@@ -4107,14 +4178,14 @@
       PragmaDetectMismatchDecl(nullptr, SourceLocation(), 0);
 }
 
-void ExternCContextDecl::anchor() { }
+void ExternCContextDecl::anchor() {}
 
 ExternCContextDecl *ExternCContextDecl::Create(const ASTContext &C,
                                                TranslationUnitDecl *DC) {
   return new (C, DC) ExternCContextDecl(DC);
 }
 
-void LabelDecl::anchor() { }
+void LabelDecl::anchor() {}
 
 LabelDecl *LabelDecl::Create(ASTContext &C, DeclContext *DC,
                              SourceLocation IdentL, IdentifierInfo *II) {
@@ -4140,7 +4211,7 @@
   MSAsmName = Buffer;
 }
 
-void ValueDecl::anchor() { }
+void ValueDecl::anchor() {}
 
 bool ValueDecl::isWeak() const {
   for (const auto *I : attrs())
@@ -4150,7 +4221,7 @@
   return isWeakImported();
 }
 
-void ImplicitParamDecl::anchor() { }
+void ImplicitParamDecl::anchor() {}
 
 ImplicitParamDecl *ImplicitParamDecl::Create(ASTContext &C, DeclContext *DC,
                                              SourceLocation IdLoc,
@@ -4233,7 +4304,7 @@
                                       QualType(), nullptr, llvm::APSInt());
 }
 
-void IndirectFieldDecl::anchor() { }
+void IndirectFieldDecl::anchor() {}
 
 IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC,
                                      SourceLocation L, DeclarationName N,
@@ -4267,7 +4338,7 @@
   return SourceRange(getLocation(), End);
 }
 
-void TypeDecl::anchor() { }
+void TypeDecl::anchor() {}
 
 TypedefDecl *TypedefDecl::Create(ASTContext &C, DeclContext *DC,
                                  SourceLocation StartLoc, SourceLocation IdLoc,
@@ -4275,7 +4346,7 @@
   return new (C, DC) TypedefDecl(C, DC, StartLoc, IdLoc, Id, TInfo);
 }
 
-void TypedefNameDecl::anchor() { }
+void TypedefNameDecl::anchor() {}
 
 TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName(bool AnyRedecl) const {
   if (auto *TT = getTypeSourceInfo()->getType()->getAs<TagType>()) {
@@ -4310,9 +4381,7 @@
   };
 
   bool isTransparent = determineIsTransparent();
-  CacheIsTransparentTag = 1;
-  if (isTransparent)
-    CacheIsTransparentTag |= 0x2;
+  MaybeModedTInfo.setInt((isTransparent << 1) | 1);
   return isTransparent;
 }
 
@@ -4349,7 +4418,7 @@
   return SourceRange(getLocStart(), RangeEnd);
 }
 
-void FileScopeAsmDecl::anchor() { }
+void FileScopeAsmDecl::anchor() {}
 
 FileScopeAsmDecl *FileScopeAsmDecl::Create(ASTContext &C, DeclContext *DC,
                                            StringLiteral *Str,
@@ -4392,9 +4461,7 @@
 ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, 
                        Module *Imported,
                        ArrayRef<SourceLocation> IdentifierLocs)
-  : Decl(Import, DC, StartLoc), ImportedAndComplete(Imported, true),
-    NextLocalImport()
-{
+  : Decl(Import, DC, StartLoc), ImportedAndComplete(Imported, true) {
   assert(getNumModuleIdentifiers(Imported) == IdentifierLocs.size());
   auto *StoredLocs = getTrailingObjects<SourceLocation>();
   std::uninitialized_copy(IdentifierLocs.begin(), IdentifierLocs.end(),
@@ -4403,9 +4470,7 @@
 
 ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, 
                        Module *Imported, SourceLocation EndLoc)
-  : Decl(Import, DC, StartLoc), ImportedAndComplete(Imported, false),
-    NextLocalImport()
-{
+  : Decl(Import, DC, StartLoc), ImportedAndComplete(Imported, false) {
   *getTrailingObjects<SourceLocation>() = EndLoc;
 }
 
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 1e15ee9..04fd62f 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -1,4 +1,4 @@
-//===--- DeclBase.cpp - Declaration AST Node Implementation ---------------===//
+//===- DeclBase.cpp - Declaration AST Node Implementation -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,6 +15,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/AttrIterator.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclContextInternals.h"
@@ -25,11 +26,30 @@
 #include "clang/AST/DependentDiagnostic.h"
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Stmt.h"
-#include "clang/AST/StmtCXX.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/VersionTuple.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <string>
+#include <tuple>
+#include <utility>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -216,10 +236,23 @@
     return RD->getDescribedClassTemplate();
   else if (auto *VD = dyn_cast<VarDecl>(this))
     return VD->getDescribedVarTemplate();
+  else if (auto *AD = dyn_cast<TypeAliasDecl>(this))
+    return AD->getDescribedAliasTemplate();
 
   return nullptr;
 }
 
+bool Decl::isTemplated() const {
+  // A declaration is dependent if it is a template or a template pattern, or
+  // is within (lexcially for a friend, semantically otherwise) a dependent
+  // context.
+  // FIXME: Should local extern declarations be treated like friends?
+  if (auto *AsDC = dyn_cast<DeclContext>(this))
+    return AsDC->isDependentContext();
+  auto *DC = getFriendObjectKind() ? getLexicalDeclContext() : getDeclContext();
+  return DC->isDependentContext() || isTemplateDecl() || getDescribedTemplate();
+}
+
 const DeclContext *Decl::getParentFunctionOrMethod() const {
   for (const DeclContext *DC = getDeclContext();
        DC && !DC->isTranslationUnit() && !DC->isNamespace(); 
@@ -230,7 +263,6 @@
   return nullptr;
 }
 
-
 //===----------------------------------------------------------------------===//
 // PrettyStackTraceDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -260,7 +292,7 @@
 //===----------------------------------------------------------------------===//
 
 // Out-of-line virtual method providing a home for Decl.
-Decl::~Decl() { }
+Decl::~Decl() = default;
 
 void Decl::setDeclContext(DeclContext *DC) {
   DeclCtx = DC;
@@ -872,12 +904,14 @@
   // 4. the context is not a record
   // 5. it's invalid
   // 6. it's a C++0x static_assert.
+  // 7. it's a block literal declaration
   if (isa<TranslationUnitDecl>(this) ||
       isa<TemplateTypeParmDecl>(this) ||
       isa<NonTypeTemplateParmDecl>(this) ||
       !isa<CXXRecordDecl>(getDeclContext()) ||
       isInvalidDecl() ||
       isa<StaticAssertDecl>(this) ||
+      isa<BlockDecl>(this) ||
       // FIXME: a ParmVarDecl can have ClassTemplateSpecialization
       // as DeclContext (?).
       isa<ParmVarDecl>(this) ||
@@ -913,7 +947,6 @@
   return Ty->getAs<FunctionType>();
 }
 
-
 /// Starting at a given context (a Decl or DeclContext), look for a
 /// code context that is not a closure (a lambda, block, etc.).
 template <class T> static Decl *getNonClosureContext(T *D) {
@@ -966,7 +999,7 @@
   }
 }
 
-DeclContext::~DeclContext() { }
+DeclContext::~DeclContext() = default;
 
 /// \brief Find the parent context of this context that will be
 /// used for unqualified name lookup.
@@ -1057,15 +1090,14 @@
 }
 
 bool DeclContext::isExternCContext() const {
-  return isLinkageSpecContext(this, clang::LinkageSpecDecl::lang_c);
+  return isLinkageSpecContext(this, LinkageSpecDecl::lang_c);
 }
 
 const LinkageSpecDecl *DeclContext::getExternCContext() const {
   const DeclContext *DC = this;
   while (DC->getDeclKind() != Decl::TranslationUnit) {
     if (DC->getDeclKind() == Decl::LinkageSpec &&
-        cast<LinkageSpecDecl>(DC)->getLanguage() ==
-            clang::LinkageSpecDecl::lang_c)
+        cast<LinkageSpecDecl>(DC)->getLanguage() == LinkageSpecDecl::lang_c)
       return cast<LinkageSpecDecl>(DC);
     DC = DC->getLexicalParent();
   }
@@ -1073,7 +1105,7 @@
 }
 
 bool DeclContext::isExternCXXContext() const {
-  return isLinkageSpecContext(this, clang::LinkageSpecDecl::lang_cxx);
+  return isLinkageSpecContext(this, LinkageSpecDecl::lang_cxx);
 }
 
 bool DeclContext::Encloses(const DeclContext *DC) const {
@@ -1108,13 +1140,11 @@
   case Decl::ObjCInterface:
     if (ObjCInterfaceDecl *Def = cast<ObjCInterfaceDecl>(this)->getDefinition())
       return Def;
-      
     return this;
       
   case Decl::ObjCProtocol:
     if (ObjCProtocolDecl *Def = cast<ObjCProtocolDecl>(this)->getDefinition())
       return Def;
-    
     return this;
       
   case Decl::ObjCCategory:
@@ -1573,17 +1603,7 @@
   if (PrimaryContext != this)
     return PrimaryContext->noload_lookup(Name);
 
-  // If we have any lazy lexical declarations not in our lookup map, add them
-  // now. Don't import any external declarations, not even if we know we have
-  // some missing from the external visible lookups.
-  if (HasLazyLocalLexicalLookups) {
-    SmallVector<DeclContext *, 2> Contexts;
-    collectAllContexts(Contexts);
-    for (unsigned I = 0, N = Contexts.size(); I != N; ++I)
-      buildLookupImpl(Contexts[I], hasExternalVisibleStorage());
-    HasLazyLocalLexicalLookups = false;
-  }
-
+  loadLazyLocalLexicalLookups();
   StoredDeclsMap *Map = LookupPtr;
   if (!Map)
     return lookup_result();
@@ -1593,6 +1613,19 @@
                          : lookup_result();
 }
 
+// If we have any lazy lexical declarations not in our lookup map, add them
+// now. Don't import any external declarations, not even if we know we have
+// some missing from the external visible lookups.
+void DeclContext::loadLazyLocalLexicalLookups() {
+  if (HasLazyLocalLexicalLookups) {
+    SmallVector<DeclContext *, 2> Contexts;
+    collectAllContexts(Contexts);
+    for (unsigned I = 0, N = Contexts.size(); I != N; ++I)
+      buildLookupImpl(Contexts[I], hasExternalVisibleStorage());
+    HasLazyLocalLexicalLookups = false;
+  }
+}
+
 void DeclContext::localUncachedLookup(DeclarationName Name,
                                       SmallVectorImpl<NamedDecl *> &Results) {
   Results.clear();
diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp
index bd8a150..487386f 100644
--- a/lib/AST/DeclCXX.cpp
+++ b/lib/AST/DeclCXX.cpp
@@ -1,4 +1,4 @@
-//===--- DeclCXX.cpp - C++ Declaration AST Node Implementation ------------===//
+//===- DeclCXX.cpp - C++ Declaration AST Node Implementation --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,26 +10,51 @@
 // This file implements the C++ related Decl classes.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/ASTMutationListener.h"
+#include "clang/AST/ASTUnresolvedSet.h"
 #include "clang/AST/CXXInheritance.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/LambdaCapture.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/ODRHash.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
-#include "llvm/ADT/STLExtras.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
 // Decl Allocation/Deallocation Method Implementations
 //===----------------------------------------------------------------------===//
 
-void AccessSpecDecl::anchor() { }
+void AccessSpecDecl::anchor() {}
 
 AccessSpecDecl *AccessSpecDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
   return new (C, ID) AccessSpecDecl(EmptyShell());
@@ -63,7 +88,9 @@
       DefaultedMoveConstructorIsDeleted(false),
       DefaultedMoveAssignmentIsDeleted(false),
       DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
-      DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
+      HasTrivialSpecialMembersForCall(SMF_All),
+      DeclaredNonTrivialSpecialMembers(0),
+      DeclaredNonTrivialSpecialMembersForCall(0), HasIrrelevantDestructor(true),
       HasConstexprNonCopyMoveConstructor(false),
       HasDefaultedDefaultConstructor(false),
       CanPassInRegisters(true),
@@ -76,9 +103,7 @@
       ImplicitCopyAssignmentHasConstParam(true),
       HasDeclaredCopyConstructorWithConstParam(false),
       HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
-      IsParsingBaseSpecifiers(false), HasODRHash(false), ODRHash(0),
-      NumBases(0), NumVBases(0), Bases(), VBases(), Definition(D),
-      FirstFriend() {}
+      IsParsingBaseSpecifiers(false), HasODRHash(false), Definition(D) {}
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
@@ -94,8 +119,7 @@
                              CXXRecordDecl *PrevDecl)
     : RecordDecl(K, TK, C, DC, StartLoc, IdLoc, Id, PrevDecl),
       DefinitionData(PrevDecl ? PrevDecl->DefinitionData
-                              : nullptr),
-      TemplateOrInstantiation() {}
+                              : nullptr) {}
 
 CXXRecordDecl *CXXRecordDecl::Create(const ASTContext &C, TagKind TK,
                                      DeclContext *DC, SourceLocation StartLoc,
@@ -148,7 +172,7 @@
     C.Deallocate(data().getBases());
 
   if (NumBases) {
-    if (!C.getLangOpts().CPlusPlus1z) {
+    if (!C.getLangOpts().CPlusPlus17) {
       // C++ [dcl.init.aggr]p1:
       //   An aggregate is [...] a class with [...] no base classes [...].
       data().Aggregate = false;
@@ -259,6 +283,7 @@
       //   operator for a class X] is trivial [...] if:
       //    -- class X has [...] no virtual base classes
       data().HasTrivialSpecialMembers &= SMF_Destructor;
+      data().HasTrivialSpecialMembersForCall &= SMF_Destructor;
 
       // C++0x [class]p7:
       //   A standard-layout class is a class that: [...]
@@ -292,6 +317,10 @@
       //       subobject is trivial, and
       if (!BaseClassDecl->hasTrivialCopyConstructor())
         data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
+
+      if (!BaseClassDecl->hasTrivialCopyConstructorForCall())
+        data().HasTrivialSpecialMembersForCall &= ~SMF_CopyConstructor;
+
       // If the base class doesn't have a simple move constructor, we'll eagerly
       // declare it and perform overload resolution to determine which function
       // it actually calls. If it does have a simple move constructor, this
@@ -299,6 +328,9 @@
       if (!BaseClassDecl->hasTrivialMoveConstructor())
         data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;
 
+      if (!BaseClassDecl->hasTrivialMoveConstructorForCall())
+        data().HasTrivialSpecialMembersForCall &= ~SMF_MoveConstructor;
+
       // C++0x [class.copy]p27:
       //   A copy/move assignment operator for class X is trivial if [...]
       //    [...]
@@ -335,6 +367,9 @@
     if (!BaseClassDecl->hasTrivialDestructor())
       data().HasTrivialSpecialMembers &= ~SMF_Destructor;
 
+    if (!BaseClassDecl->hasTrivialDestructorForCall())
+      data().HasTrivialSpecialMembersForCall &= ~SMF_Destructor;
+
     if (!BaseClassDecl->hasIrrelevantDestructor())
       data().HasIrrelevantDestructor = false;
 
@@ -403,7 +438,6 @@
   return DefinitionData->ODRHash;
 }
 
-
 void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
   // C++11 [class.copy]p11:
   //   A defaulted copy/move constructor for a class X is defined as
@@ -518,6 +552,7 @@
       //   assignment operator for a class X] is trivial [...] if:
       //    -- class X has no virtual functions [...]
       data().HasTrivialSpecialMembers &= SMF_Destructor;
+      data().HasTrivialSpecialMembersForCall &= SMF_Destructor;
 
       // C++0x [class]p7:
       //   A standard-layout class is a class that: [...]
@@ -602,8 +637,10 @@
 
     // C++11 [class.dtor]p5:
     //   A destructor is trivial if [...] the destructor is not virtual.
-    if (DD->isVirtual())
+    if (DD->isVirtual()) {
       data().HasTrivialSpecialMembers &= ~SMF_Destructor;
+      data().HasTrivialSpecialMembersForCall &= ~SMF_Destructor;
+    }
   }
 
   // Handle member functions.
@@ -649,16 +686,30 @@
       // If this is the first declaration of a special member, we no longer have
       // an implicit trivial special member.
       data().HasTrivialSpecialMembers &=
-        data().DeclaredSpecialMembers | ~SMKind;
+          data().DeclaredSpecialMembers | ~SMKind;
+      data().HasTrivialSpecialMembersForCall &=
+          data().DeclaredSpecialMembers | ~SMKind;
 
       if (!Method->isImplicit() && !Method->isUserProvided()) {
         // This method is user-declared but not user-provided. We can't work out
         // whether it's trivial yet (not until we get to the end of the class).
         // We'll handle this method in finishedDefaultedOrDeletedMember.
-      } else if (Method->isTrivial())
+      } else if (Method->isTrivial()) {
         data().HasTrivialSpecialMembers |= SMKind;
-      else
+        data().HasTrivialSpecialMembersForCall |= SMKind;
+      } else if (Method->isTrivialForCall()) {
+        data().HasTrivialSpecialMembersForCall |= SMKind;
         data().DeclaredNonTrivialSpecialMembers |= SMKind;
+      } else {
+        data().DeclaredNonTrivialSpecialMembers |= SMKind;
+        // If this is a user-provided function, do not set
+        // DeclaredNonTrivialSpecialMembersForCall here since we don't know
+        // yet whether the method would be considered non-trivial for the
+        // purpose of calls (attribute "trivial_abi" can be dropped from the
+        // class later, which can change the special method's triviality).
+        if (!Method->isUserProvided())
+          data().DeclaredNonTrivialSpecialMembersForCall |= SMKind;
+      }
 
       // Note when we have declared a declared special member, and suppress the
       // implicit declaration of this special member.
@@ -751,6 +802,7 @@
         struct DefinitionData &Data = data();
         Data.PlainOldData = false;
         Data.HasTrivialSpecialMembers = 0;
+        Data.HasTrivialSpecialMembersForCall = 0;
         Data.HasIrrelevantDestructor = false;
       } else if (!Context.getLangOpts().ObjCAutoRefCount) {
         setHasObjectMember(true);
@@ -878,12 +930,19 @@
         //       member is trivial;
         if (!FieldRec->hasTrivialCopyConstructor())
           data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
+
+        if (!FieldRec->hasTrivialCopyConstructorForCall())
+          data().HasTrivialSpecialMembersForCall &= ~SMF_CopyConstructor;
+
         // If the field doesn't have a simple move constructor, we'll eagerly
         // declare the move constructor for this class and we'll decide whether
         // it's trivial then.
         if (!FieldRec->hasTrivialMoveConstructor())
           data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;
 
+        if (!FieldRec->hasTrivialMoveConstructorForCall())
+          data().HasTrivialSpecialMembersForCall &= ~SMF_MoveConstructor;
+
         // C++0x [class.copy]p27:
         //   A copy/move assignment operator for class X is trivial if [...]
         //    [...]
@@ -900,6 +959,8 @@
 
         if (!FieldRec->hasTrivialDestructor())
           data().HasTrivialSpecialMembers &= ~SMF_Destructor;
+        if (!FieldRec->hasTrivialDestructorForCall())
+          data().HasTrivialSpecialMembersForCall &= ~SMF_Destructor;
         if (!FieldRec->hasIrrelevantDestructor())
           data().HasIrrelevantDestructor = false;
         if (FieldRec->hasObjectMember())
@@ -1082,6 +1143,23 @@
     data().DeclaredNonTrivialSpecialMembers |= SMKind;
 }
 
+void CXXRecordDecl::setTrivialForCallFlags(CXXMethodDecl *D) {
+  unsigned SMKind = 0;
+
+  if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
+    if (Constructor->isCopyConstructor())
+      SMKind = SMF_CopyConstructor;
+    else if (Constructor->isMoveConstructor())
+      SMKind = SMF_MoveConstructor;
+  } else if (isa<CXXDestructorDecl>(D))
+    SMKind = SMF_Destructor;
+
+  if (D->isTrivialForCall())
+    data().HasTrivialSpecialMembersForCall |= SMKind;
+  else
+    data().DeclaredNonTrivialSpecialMembersForCall |= SMKind;
+}
+
 bool CXXRecordDecl::isCLike() const {
   if (getTagKind() == TTK_Class || getTagKind() == TTK_Interface ||
       !TemplateOrInstantiation.isNull())
@@ -1590,7 +1668,7 @@
   return false;
 }
 
-void CXXDeductionGuideDecl::anchor() { }
+void CXXDeductionGuideDecl::anchor() {}
 
 CXXDeductionGuideDecl *CXXDeductionGuideDecl::Create(
     ASTContext &C, DeclContext *DC, SourceLocation StartLoc, bool IsExplicit,
@@ -1607,7 +1685,7 @@
                                            nullptr, SourceLocation());
 }
 
-void CXXMethodDecl::anchor() { }
+void CXXMethodDecl::anchor() {}
 
 bool CXXMethodDecl::isStatic() const {
   const CXXMethodDecl *MD = getCanonicalDecl();
@@ -1621,9 +1699,7 @@
 
 static bool recursivelyOverrides(const CXXMethodDecl *DerivedMD,
                                  const CXXMethodDecl *BaseMD) {
-  for (CXXMethodDecl::method_iterator I = DerivedMD->begin_overridden_methods(),
-         E = DerivedMD->end_overridden_methods(); I != E; ++I) {
-    const CXXMethodDecl *MD = *I;
+  for (const CXXMethodDecl *MD : DerivedMD->overridden_methods()) {
     if (MD->getCanonicalDecl() == BaseMD->getCanonicalDecl())
       return true;
     if (recursivelyOverrides(MD, BaseMD))
@@ -1751,9 +1827,10 @@
   // We can devirtualize calls on an object accessed by a class member access
   // expression, since by C++11 [basic.life]p6 we know that it can't refer to
   // a derived class object constructed in the same location.
-  if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base))
-    if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl()))
-      return VD->getType()->isRecordType() ? DevirtualizedMethod : nullptr;
+  if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base)) {
+    const ValueDecl *VD = ME->getMemberDecl();
+    return VD->getType()->isRecordType() ? DevirtualizedMethod : nullptr;
+  }
 
   // Likewise for calls on an object accessed by a (non-reference) pointer to
   // member access.
@@ -1954,43 +2031,34 @@
                                        SourceLocation L, Expr *Init,
                                        SourceLocation R,
                                        SourceLocation EllipsisLoc)
-  : Initializee(TInfo), MemberOrEllipsisLocation(EllipsisLoc), Init(Init), 
-    LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(IsVirtual), 
-    IsWritten(false), SourceOrder(0)
-{
-}
+    : Initializee(TInfo), MemberOrEllipsisLocation(EllipsisLoc), Init(Init),
+      LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(IsVirtual),
+      IsWritten(false), SourceOrder(0) {}
 
 CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
                                        FieldDecl *Member,
                                        SourceLocation MemberLoc,
                                        SourceLocation L, Expr *Init,
                                        SourceLocation R)
-  : Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
-    LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
-    IsWritten(false), SourceOrder(0)
-{
-}
+    : Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
+      LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
+      IsWritten(false), SourceOrder(0) {}
 
 CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
                                        IndirectFieldDecl *Member,
                                        SourceLocation MemberLoc,
                                        SourceLocation L, Expr *Init,
                                        SourceLocation R)
-  : Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
-    LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
-    IsWritten(false), SourceOrder(0)
-{
-}
+    : Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
+      LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
+      IsWritten(false), SourceOrder(0) {}
 
 CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
                                        TypeSourceInfo *TInfo,
                                        SourceLocation L, Expr *Init, 
                                        SourceLocation R)
-  : Initializee(TInfo), MemberOrEllipsisLocation(), Init(Init),
-    LParenLoc(L), RParenLoc(R), IsDelegating(true), IsVirtual(false),
-    IsWritten(false), SourceOrder(0)
-{
-}
+    : Initializee(TInfo), Init(Init), LParenLoc(L), RParenLoc(R),
+      IsDelegating(true), IsVirtual(false), IsWritten(false), SourceOrder(0) {}
 
 TypeLoc CXXCtorInitializer::getBaseClassLoc() const {
   if (isBaseInitializer())
@@ -2030,7 +2098,7 @@
   return SourceRange(getSourceLocation(), getRParenLoc());
 }
 
-void CXXConstructorDecl::anchor() { }
+void CXXConstructorDecl::anchor() {}
 
 CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
                                                            unsigned ID,
@@ -2173,7 +2241,7 @@
   return true;  
 }
 
-void CXXDestructorDecl::anchor() { }
+void CXXDestructorDecl::anchor() {}
 
 CXXDestructorDecl *
 CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
@@ -2205,7 +2273,7 @@
   }
 }
 
-void CXXConversionDecl::anchor() { }
+void CXXConversionDecl::anchor() {}
 
 CXXConversionDecl *
 CXXConversionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
@@ -2235,7 +2303,7 @@
          getConversionType()->isBlockPointerType();
 }
 
-void LinkageSpecDecl::anchor() { }
+void LinkageSpecDecl::anchor() {}
 
 LinkageSpecDecl *LinkageSpecDecl::Create(ASTContext &C,
                                          DeclContext *DC,
@@ -2252,7 +2320,7 @@
                                      SourceLocation(), lang_c, false);
 }
 
-void UsingDirectiveDecl::anchor() { }
+void UsingDirectiveDecl::anchor() {}
 
 UsingDirectiveDecl *UsingDirectiveDecl::Create(ASTContext &C, DeclContext *DC,
                                                SourceLocation L,
@@ -2286,7 +2354,7 @@
                              SourceLocation StartLoc, SourceLocation IdLoc,
                              IdentifierInfo *Id, NamespaceDecl *PrevDecl)
     : NamedDecl(Namespace, DC, IdLoc, Id), DeclContext(Namespace),
-      redeclarable_base(C), LocStart(StartLoc), RBraceLoc(),
+      redeclarable_base(C), LocStart(StartLoc),
       AnonOrFirstNamespaceAndInline(nullptr, Inline) {
   setPreviousDecl(PrevDecl);
 
@@ -2326,21 +2394,25 @@
 NamespaceDecl *NamespaceDecl::getNextRedeclarationImpl() {
   return getNextRedeclaration();
 }
+
 NamespaceDecl *NamespaceDecl::getPreviousDeclImpl() {
   return getPreviousDecl();
 }
+
 NamespaceDecl *NamespaceDecl::getMostRecentDeclImpl() {
   return getMostRecentDecl();
 }
 
-void NamespaceAliasDecl::anchor() { }
+void NamespaceAliasDecl::anchor() {}
 
 NamespaceAliasDecl *NamespaceAliasDecl::getNextRedeclarationImpl() {
   return getNextRedeclaration();
 }
+
 NamespaceAliasDecl *NamespaceAliasDecl::getPreviousDeclImpl() {
   return getPreviousDecl();
 }
+
 NamespaceAliasDecl *NamespaceAliasDecl::getMostRecentDeclImpl() {
   return getMostRecentDecl();
 }
@@ -2367,22 +2439,22 @@
                                         SourceLocation(), nullptr);
 }
 
-void UsingShadowDecl::anchor() { }
+void UsingShadowDecl::anchor() {}
 
 UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC,
                                  SourceLocation Loc, UsingDecl *Using,
                                  NamedDecl *Target)
     : NamedDecl(K, DC, Loc, Using ? Using->getDeclName() : DeclarationName()),
-      redeclarable_base(C), Underlying(Target),
+      redeclarable_base(C), Underlying(),
       UsingOrNextShadow(cast<NamedDecl>(Using)) {
   if (Target)
-    IdentifierNamespace = Target->getIdentifierNamespace();
+    setTargetDecl(Target);
   setImplicit();
 }
 
 UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, EmptyShell Empty)
     : NamedDecl(K, nullptr, SourceLocation(), DeclarationName()),
-      redeclarable_base(C), Underlying(), UsingOrNextShadow() {}
+      redeclarable_base(C) {}
 
 UsingShadowDecl *
 UsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
@@ -2397,7 +2469,7 @@
   return cast<UsingDecl>(Shadow->UsingOrNextShadow);
 }
 
-void ConstructorUsingShadowDecl::anchor() { }
+void ConstructorUsingShadowDecl::anchor() {}
 
 ConstructorUsingShadowDecl *
 ConstructorUsingShadowDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2416,7 +2488,7 @@
   return getUsingDecl()->getQualifier()->getAsRecordDecl();
 }
 
-void UsingDecl::anchor() { }
+void UsingDecl::anchor() {}
 
 void UsingDecl::addShadowDecl(UsingShadowDecl *S) {
   assert(std::find(shadow_begin(), shadow_end(), S) == shadow_end() &&
@@ -2468,7 +2540,7 @@
   return SourceRange(Begin, getNameInfo().getEndLoc());
 }
 
-void UsingPackDecl::anchor() { }
+void UsingPackDecl::anchor() {}
 
 UsingPackDecl *UsingPackDecl::Create(ASTContext &C, DeclContext *DC,
                                      NamedDecl *InstantiatedFrom,
@@ -2488,7 +2560,7 @@
   return Result;
 }
 
-void UnresolvedUsingValueDecl::anchor() { }
+void UnresolvedUsingValueDecl::anchor() {}
 
 UnresolvedUsingValueDecl *
 UnresolvedUsingValueDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2516,7 +2588,7 @@
   return SourceRange(Begin, getNameInfo().getEndLoc());
 }
 
-void UnresolvedUsingTypenameDecl::anchor() { }
+void UnresolvedUsingTypenameDecl::anchor() {}
 
 UnresolvedUsingTypenameDecl *
 UnresolvedUsingTypenameDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2538,7 +2610,7 @@
       SourceLocation(), nullptr, SourceLocation());
 }
 
-void StaticAssertDecl::anchor() { }
+void StaticAssertDecl::anchor() {}
 
 StaticAssertDecl *StaticAssertDecl::Create(ASTContext &C, DeclContext *DC,
                                            SourceLocation StaticAssertLoc,
diff --git a/lib/AST/DeclFriend.cpp b/lib/AST/DeclFriend.cpp
index 121403b..461bf36 100644
--- a/lib/AST/DeclFriend.cpp
+++ b/lib/AST/DeclFriend.cpp
@@ -1,4 +1,4 @@
-//===--- DeclFriend.cpp - C++ Friend Declaration AST Node Implementation --===//
+//===- DeclFriend.cpp - C++ Friend Declaration AST Node Implementation ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclFriend.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstddef>
+
 using namespace clang;
 
-void FriendDecl::anchor() { }
+void FriendDecl::anchor() {}
 
 FriendDecl *FriendDecl::getNextFriendSlowCase() {
   return cast_or_null<FriendDecl>(
@@ -28,9 +36,9 @@
                                SourceLocation L,
                                FriendUnion Friend,
                                SourceLocation FriendL,
-                        ArrayRef<TemplateParameterList*> FriendTypeTPLists) {
+                          ArrayRef<TemplateParameterList *> FriendTypeTPLists) {
 #ifndef NDEBUG
-  if (Friend.is<NamedDecl*>()) {
+  if (Friend.is<NamedDecl *>()) {
     NamedDecl *D = Friend.get<NamedDecl*>();
     assert(isa<FunctionDecl>(D) ||
            isa<CXXRecordDecl>(D) ||
@@ -42,7 +50,7 @@
     assert(D->getFriendObjectKind() ||
            (cast<CXXRecordDecl>(DC)->getTemplateSpecializationKind()));
     // These template parameters are for friend types only.
-    assert(FriendTypeTPLists.size() == 0);
+    assert(FriendTypeTPLists.empty());
   }
 #endif
 
diff --git a/lib/AST/DeclGroup.cpp b/lib/AST/DeclGroup.cpp
index 2f95e1f..f74ef9b 100644
--- a/lib/AST/DeclGroup.cpp
+++ b/lib/AST/DeclGroup.cpp
@@ -1,4 +1,4 @@
-//===--- DeclGroup.cpp - Classes for representing groups of Decls -*- C++ -*-==//
+//===- DeclGroup.cpp - Classes for representing groups of Decls -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,7 +13,9 @@
 
 #include "clang/AST/DeclGroup.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/Decl.h"
+#include <cassert>
+#include <memory>
+
 using namespace clang;
 
 DeclGroup* DeclGroup::Create(ASTContext &C, Decl **Decls, unsigned NumDecls) {
diff --git a/lib/AST/DeclObjC.cpp b/lib/AST/DeclObjC.cpp
index d8bdb63..f95d5de 100644
--- a/lib/AST/DeclObjC.cpp
+++ b/lib/AST/DeclObjC.cpp
@@ -1,4 +1,4 @@
-//===--- DeclObjC.cpp - ObjC Declaration AST Node Implementation ----------===//
+//===- DeclObjC.cpp - ObjC Declaration AST Node Implementation ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +15,27 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/Stmt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -28,7 +46,6 @@
   List = nullptr;
   if (Elts == 0) return;  // Setting to an empty list is a noop.
 
-
   List = new (Ctx) void*[Elts];
   NumElts = Elts;
   memcpy(List, InList, sizeof(void*)*Elts);
@@ -48,7 +65,7 @@
 // ObjCInterfaceDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCContainerDecl::anchor() { }
+void ObjCContainerDecl::anchor() {}
 
 /// getIvarDecl - This method looks up an ivar in this ContextDecl.
 ///
@@ -82,7 +99,6 @@
   // - (int) class_method;
   // + (float) class_method;
   // @end
-  //
   lookup_result R = lookup(Sel);
   for (lookup_iterator Meth = R.begin(), MethEnd = R.end();
        Meth != MethEnd; ++Meth) {
@@ -280,7 +296,7 @@
   return nullptr;
 }
 
-void ObjCInterfaceDecl::anchor() { }
+void ObjCInterfaceDecl::anchor() {}
 
 ObjCTypeParamList *ObjCInterfaceDecl::getTypeParamList() const {
   // If this particular declaration has a type parameter list, return it.
@@ -341,7 +357,6 @@
 /// FindPropertyVisibleInPrimaryClass - Finds declaration of the property
 /// with name 'PropertyId' in the primary class; including those in protocols
 /// (direct or indirect) used by the primary class.
-///
 ObjCPropertyDecl *
 ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass(
                        IdentifierInfo *PropertyId,
@@ -409,8 +424,7 @@
 
 void ObjCInterfaceDecl::mergeClassExtensionProtocolList(
                               ObjCProtocolDecl *const* ExtList, unsigned ExtNum,
-                              ASTContext &C)
-{
+                              ASTContext &C) {
   if (data().ExternallyCompleted)
     LoadExternalDefinition();
 
@@ -488,7 +502,7 @@
     return true;
   case DefinitionData::IDI_NotInherited:
     return false;
-  case DefinitionData::IDI_Unknown: {
+  case DefinitionData::IDI_Unknown:
     // If the class introduced initializers we conservatively assume that we
     // don't know if any of them is a designated initializer to avoid possible
     // misleading warnings.
@@ -510,7 +524,6 @@
     return data().InheritedDesignatedInitializers ==
         DefinitionData::IDI_Inherited;
   }
-  }
 
   llvm_unreachable("unexpected InheritedDesignatedInitializers value");
 }
@@ -902,7 +915,6 @@
       if (ObjCMethodDecl *MD = IFD->getMethod(getSelector(),
                                               isInstanceMethod()))
         return MD;
-
   } else if (ObjCCategoryImplDecl *CImplD =
                dyn_cast<ObjCCategoryImplDecl>(CtxD)) {
     if (ObjCCategoryDecl *CatD = CImplD->getCategoryDecl())
@@ -1312,7 +1324,8 @@
   if (!CheckOverrides)
     return nullptr;
 
-  typedef SmallVector<const ObjCMethodDecl *, 8> OverridesTy;
+  using OverridesTy = SmallVector<const ObjCMethodDecl *, 8>;
+
   OverridesTy Overrides;
   getOverriddenMethods(Overrides);
   for (OverridesTy::const_iterator I = Overrides.begin(), E = Overrides.end();
@@ -1328,7 +1341,7 @@
 // ObjCTypeParamDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCTypeParamDecl::anchor() { }
+void ObjCTypeParamDecl::anchor() {}
 
 ObjCTypeParamDecl *ObjCTypeParamDecl::Create(ASTContext &ctx, DeclContext *dc,
                                              ObjCTypeParamVariance variance,
@@ -1373,14 +1386,12 @@
 ObjCTypeParamList::ObjCTypeParamList(SourceLocation lAngleLoc,
                                      ArrayRef<ObjCTypeParamDecl *> typeParams,
                                      SourceLocation rAngleLoc)
-  : NumParams(typeParams.size())
-{
+    : NumParams(typeParams.size()) {
   Brackets.Begin = lAngleLoc.getRawEncoding();
   Brackets.End = rAngleLoc.getRawEncoding();
   std::copy(typeParams.begin(), typeParams.end(), begin());
 }
 
-
 ObjCTypeParamList *ObjCTypeParamList::create(
                      ASTContext &ctx,
                      SourceLocation lAngleLoc,
@@ -1438,8 +1449,7 @@
                                      ObjCInterfaceDecl *PrevDecl,
                                      bool IsInternal)
     : ObjCContainerDecl(ObjCInterface, DC, Id, CLoc, AtLoc),
-      redeclarable_base(C), TypeForDecl(nullptr), TypeParamList(nullptr),
-      Data() {
+      redeclarable_base(C) {
   setPreviousDecl(PrevDecl);
   
   // Copy the 'data' pointer over.
@@ -1518,19 +1528,22 @@
 }
 
 namespace {
-  struct SynthesizeIvarChunk {
-    uint64_t Size;
-    ObjCIvarDecl *Ivar;
-    SynthesizeIvarChunk(uint64_t size, ObjCIvarDecl *ivar)
-      : Size(size), Ivar(ivar) {}
-  };
 
-  bool operator<(const SynthesizeIvarChunk & LHS,
-                 const SynthesizeIvarChunk &RHS) {
-      return LHS.Size < RHS.Size;
-  }
+struct SynthesizeIvarChunk {
+  uint64_t Size;
+  ObjCIvarDecl *Ivar;
+
+  SynthesizeIvarChunk(uint64_t size, ObjCIvarDecl *ivar)
+      : Size(size), Ivar(ivar) {}
+};
+
+bool operator<(const SynthesizeIvarChunk & LHS,
+               const SynthesizeIvarChunk &RHS) {
+    return LHS.Size < RHS.Size;
 }
 
+} // namespace
+
 /// all_declared_ivar_begin - return first ivar declared in this class,
 /// its extensions and its implementation. Lazily build the list on first
 /// access.
@@ -1694,7 +1707,7 @@
 // ObjCIvarDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCIvarDecl::anchor() { }
+void ObjCIvarDecl::anchor() {}
 
 ObjCIvarDecl *ObjCIvarDecl::Create(ASTContext &C, ObjCContainerDecl *DC,
                                    SourceLocation StartLoc,
@@ -1771,7 +1784,7 @@
 // ObjCAtDefsFieldDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCAtDefsFieldDecl::anchor() { }
+void ObjCAtDefsFieldDecl::anchor() {}
 
 ObjCAtDefsFieldDecl
 *ObjCAtDefsFieldDecl::Create(ASTContext &C, DeclContext *DC,
@@ -1791,14 +1804,14 @@
 // ObjCProtocolDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCProtocolDecl::anchor() { }
+void ObjCProtocolDecl::anchor() {}
 
 ObjCProtocolDecl::ObjCProtocolDecl(ASTContext &C, DeclContext *DC,
                                    IdentifierInfo *Id, SourceLocation nameLoc,
                                    SourceLocation atStartLoc,
                                    ObjCProtocolDecl *PrevDecl)
     : ObjCContainerDecl(ObjCProtocol, DC, Id, nameLoc, atStartLoc),
-      redeclarable_base(C), Data() {
+      redeclarable_base(C) {
   setPreviousDecl(PrevDecl);
   if (PrevDecl)
     Data = PrevDecl->Data;
@@ -1874,7 +1887,6 @@
 
 void ObjCProtocolDecl::collectPropertiesToImplement(PropertyMap &PM,
                                                     PropertyDeclOrder &PO) const {
-  
   if (const ObjCProtocolDecl *PDecl = getDefinition()) {
     for (auto *Prop : PDecl->properties()) {
       // Insert into PM if not there already.
@@ -1921,7 +1933,7 @@
 // ObjCCategoryDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCCategoryDecl::anchor() { }
+void ObjCCategoryDecl::anchor() {}
 
 ObjCCategoryDecl::ObjCCategoryDecl(DeclContext *DC, SourceLocation AtLoc,
                                    SourceLocation ClassNameLoc, 
@@ -1930,11 +1942,9 @@
                                    ObjCTypeParamList *typeParamList,
                                    SourceLocation IvarLBraceLoc,
                                    SourceLocation IvarRBraceLoc)
-  : ObjCContainerDecl(ObjCCategory, DC, Id, ClassNameLoc, AtLoc),
-    ClassInterface(IDecl), TypeParamList(nullptr),
-    NextClassCategory(nullptr), CategoryNameLoc(CategoryNameLoc),
-    IvarLBraceLoc(IvarLBraceLoc), IvarRBraceLoc(IvarRBraceLoc) 
-{
+    : ObjCContainerDecl(ObjCCategory, DC, Id, ClassNameLoc, AtLoc),
+      ClassInterface(IDecl), CategoryNameLoc(CategoryNameLoc),
+      IvarLBraceLoc(IvarLBraceLoc), IvarRBraceLoc(IvarRBraceLoc) {
   setTypeParamList(typeParamList);
 }
 
@@ -1994,7 +2004,7 @@
 // ObjCCategoryImplDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCCategoryImplDecl::anchor() { }
+void ObjCCategoryImplDecl::anchor() {}
 
 ObjCCategoryImplDecl *
 ObjCCategoryImplDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2023,8 +2033,7 @@
   return nullptr;
 }
 
-
-void ObjCImplDecl::anchor() { }
+void ObjCImplDecl::anchor() {}
 
 void ObjCImplDecl::addPropertyImplementation(ObjCPropertyImplDecl *property) {
   // FIXME: The context should be correct before we get here.
@@ -2052,7 +2061,6 @@
 /// FindPropertyImplIvarDecl - This method lookup the ivar in the list of
 /// properties implemented in this \@implementation block and returns
 /// the implemented property that uses it.
-///
 ObjCPropertyImplDecl *ObjCImplDecl::
 FindPropertyImplIvarDecl(IdentifierInfo *ivarId) const {
   for (auto *PID : property_impls())
@@ -2065,7 +2073,6 @@
 /// FindPropertyImplDecl - This method looks up a previous ObjCPropertyImplDecl
 /// added to the list of those properties \@synthesized/\@dynamic in this
 /// category \@implementation block.
-///
 ObjCPropertyImplDecl *ObjCImplDecl::
 FindPropertyImplDecl(IdentifierInfo *Id,
                      ObjCPropertyQueryKind QueryKind) const {
@@ -2103,7 +2110,7 @@
 // ObjCImplementationDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCImplementationDecl::anchor() { }
+void ObjCImplementationDecl::anchor() {}
 
 ObjCImplementationDecl *
 ObjCImplementationDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2155,7 +2162,7 @@
 // ObjCCompatibleAliasDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCCompatibleAliasDecl::anchor() { }
+void ObjCCompatibleAliasDecl::anchor() {}
 
 ObjCCompatibleAliasDecl *
 ObjCCompatibleAliasDecl::Create(ASTContext &C, DeclContext *DC,
@@ -2175,7 +2182,7 @@
 // ObjCPropertyDecl
 //===----------------------------------------------------------------------===//
 
-void ObjCPropertyDecl::anchor() { }
+void ObjCPropertyDecl::anchor() {}
 
 ObjCPropertyDecl *ObjCPropertyDecl::Create(ASTContext &C, DeclContext *DC,
                                            SourceLocation L,
diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp
index 68adfc7..87cee79 100644
--- a/lib/AST/DeclPrinter.cpp
+++ b/lib/AST/DeclPrinter.cpp
@@ -37,6 +37,8 @@
     void ProcessDeclGroup(SmallVectorImpl<Decl*>& Decls);
 
     void Print(AccessSpecifier AS);
+    void PrintConstructorInitializers(CXXConstructorDecl *CDecl,
+                                      std::string &Proto);
 
     /// Print an Objective-C method type in parentheses.
     ///
@@ -126,9 +128,7 @@
   // FIXME: This should be on the Type class!
   QualType BaseType = T;
   while (!BaseType->isSpecifierType()) {
-    if (isa<TypedefType>(BaseType))
-      break;
-    else if (const PointerType* PTy = BaseType->getAs<PointerType>())
+    if (const PointerType *PTy = BaseType->getAs<PointerType>())
       BaseType = PTy->getPointeeType();
     else if (const BlockPointerType *BPy = BaseType->getAs<BlockPointerType>())
       BaseType = BPy->getPointeeType();
@@ -142,8 +142,11 @@
       BaseType = RTy->getPointeeType();
     else if (const AutoType *ATy = BaseType->getAs<AutoType>())
       BaseType = ATy->getDeducedType();
+    else if (const ParenType *PTy = BaseType->getAs<ParenType>())
+      BaseType = PTy->desugar();
     else
-      llvm_unreachable("Unknown declarator!");
+      // This must be a syntax error.
+      break;
   }
   return BaseType;
 }
@@ -274,6 +277,71 @@
   }
 }
 
+void DeclPrinter::PrintConstructorInitializers(CXXConstructorDecl *CDecl,
+                                               std::string &Proto) {
+  bool HasInitializerList = false;
+  for (const auto *BMInitializer : CDecl->inits()) {
+    if (BMInitializer->isInClassMemberInitializer())
+      continue;
+
+    if (!HasInitializerList) {
+      Proto += " : ";
+      Out << Proto;
+      Proto.clear();
+      HasInitializerList = true;
+    } else
+      Out << ", ";
+
+    if (BMInitializer->isAnyMemberInitializer()) {
+      FieldDecl *FD = BMInitializer->getAnyMember();
+      Out << *FD;
+    } else {
+      Out << QualType(BMInitializer->getBaseClass(), 0).getAsString(Policy);
+    }
+
+    Out << "(";
+    if (!BMInitializer->getInit()) {
+      // Nothing to print
+    } else {
+      Expr *Init = BMInitializer->getInit();
+      if (ExprWithCleanups *Tmp = dyn_cast<ExprWithCleanups>(Init))
+        Init = Tmp->getSubExpr();
+
+      Init = Init->IgnoreParens();
+
+      Expr *SimpleInit = nullptr;
+      Expr **Args = nullptr;
+      unsigned NumArgs = 0;
+      if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
+        Args = ParenList->getExprs();
+        NumArgs = ParenList->getNumExprs();
+      } else if (CXXConstructExpr *Construct =
+                     dyn_cast<CXXConstructExpr>(Init)) {
+        Args = Construct->getArgs();
+        NumArgs = Construct->getNumArgs();
+      } else
+        SimpleInit = Init;
+
+      if (SimpleInit)
+        SimpleInit->printPretty(Out, nullptr, Policy, Indentation);
+      else {
+        for (unsigned I = 0; I != NumArgs; ++I) {
+          assert(Args[I] != nullptr && "Expected non-null Expr");
+          if (isa<CXXDefaultArgExpr>(Args[I]))
+            break;
+
+          if (I)
+            Out << ", ";
+          Args[I]->printPretty(Out, nullptr, Policy, Indentation);
+        }
+      }
+    }
+    Out << ")";
+    if (BMInitializer->isPackExpansion())
+      Out << "...";
+  }
+}
+
 //----------------------------------------------------------------------------
 // Common C declarations
 //----------------------------------------------------------------------------
@@ -513,13 +581,19 @@
   PrintingPolicy SubPolicy(Policy);
   SubPolicy.SuppressSpecifiers = false;
   std::string Proto;
-  if (!Policy.SuppressScope) {
-    if (const NestedNameSpecifier *NS = D->getQualifier()) {
-      llvm::raw_string_ostream OS(Proto);
-      NS->print(OS, Policy);
+
+  if (Policy.FullyQualifiedName) {
+    Proto += D->getQualifiedNameAsString();
+  } else {
+    if (!Policy.SuppressScope) {
+      if (const NestedNameSpecifier *NS = D->getQualifier()) {
+        llvm::raw_string_ostream OS(Proto);
+        NS->print(OS, Policy);
+      }
     }
+    Proto += D->getNameInfo().getAsString();
   }
-  Proto += D->getNameInfo().getAsString();
+
   if (GuideDecl)
     Proto = GuideDecl->getDeducedTemplate()->getDeclName().getAsString();
   if (const TemplateArgumentList *TArgs = D->getTemplateSpecializationArgs()) {
@@ -608,67 +682,8 @@
     }
 
     if (CDecl) {
-      bool HasInitializerList = false;
-      for (const auto *BMInitializer : CDecl->inits()) {
-        if (BMInitializer->isInClassMemberInitializer())
-          continue;
-
-        if (!HasInitializerList) {
-          Proto += " : ";
-          Out << Proto;
-          Proto.clear();
-          HasInitializerList = true;
-        } else
-          Out << ", ";
-
-        if (BMInitializer->isAnyMemberInitializer()) {
-          FieldDecl *FD = BMInitializer->getAnyMember();
-          Out << *FD;
-        } else {
-          Out << QualType(BMInitializer->getBaseClass(), 0).getAsString(Policy);
-        }
-        
-        Out << "(";
-        if (!BMInitializer->getInit()) {
-          // Nothing to print
-        } else {
-          Expr *Init = BMInitializer->getInit();
-          if (ExprWithCleanups *Tmp = dyn_cast<ExprWithCleanups>(Init))
-            Init = Tmp->getSubExpr();
-          
-          Init = Init->IgnoreParens();
-
-          Expr *SimpleInit = nullptr;
-          Expr **Args = nullptr;
-          unsigned NumArgs = 0;
-          if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
-            Args = ParenList->getExprs();
-            NumArgs = ParenList->getNumExprs();
-          } else if (CXXConstructExpr *Construct
-                                        = dyn_cast<CXXConstructExpr>(Init)) {
-            Args = Construct->getArgs();
-            NumArgs = Construct->getNumArgs();
-          } else
-            SimpleInit = Init;
-          
-          if (SimpleInit)
-            SimpleInit->printPretty(Out, nullptr, Policy, Indentation);
-          else {
-            for (unsigned I = 0; I != NumArgs; ++I) {
-              assert(Args[I] != nullptr && "Expected non-null Expr");
-              if (isa<CXXDefaultArgExpr>(Args[I]))
-                break;
-              
-              if (I)
-                Out << ", ";
-              Args[I]->printPretty(Out, nullptr, Policy, Indentation);
-            }
-          }
-        }
-        Out << ")";
-        if (BMInitializer->isPackExpansion())
-          Out << "...";
-      }
+      if (!Policy.TerseOutput)
+        PrintConstructorInitializers(CDecl, Proto);
     } else if (!ConversionDecl && !isa<CXXDestructorDecl>(D)) {
       if (FT && FT->hasTrailingReturn()) {
         if (!GuideDecl)
@@ -712,7 +727,7 @@
       if (D->getBody())
         D->getBody()->printPretty(Out, nullptr, SubPolicy, Indentation);
     } else {
-      if (isa<CXXConstructorDecl>(*D))
+      if (!Policy.TerseOutput && isa<CXXConstructorDecl>(*D))
         Out << " {}";
     }
   }
@@ -1512,7 +1527,7 @@
                                                 E = D->varlist_end();
                                                 I != E; ++I) {
       Out << (I == D->varlist_begin() ? '(' : ',');
-      NamedDecl *ND = cast<NamedDecl>(cast<DeclRefExpr>(*I)->getDecl());
+      NamedDecl *ND = cast<DeclRefExpr>(*I)->getDecl();
       ND->printQualifiedName(Out);
     }
     Out << ")";
diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp
index 00a6739..a7949b3 100644
--- a/lib/AST/DeclTemplate.cpp
+++ b/lib/AST/DeclTemplate.cpp
@@ -1,4 +1,4 @@
-//===--- DeclTemplate.cpp - Template Declaration AST Node Implementation --===//
+//===- DeclTemplate.cpp - Template Declaration AST Node Implementation ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,13 +15,28 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/ExprCXX.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Builtins.h"
-#include "clang/Basic/IdentifierTable.h"
-#include "llvm/ADT/STLExtras.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 #include <memory>
+#include <utility>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -33,9 +48,9 @@
                                              ArrayRef<NamedDecl *> Params,
                                              SourceLocation RAngleLoc,
                                              Expr *RequiresClause)
-  : TemplateLoc(TemplateLoc), LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
-    NumParams(Params.size()), ContainsUnexpandedParameterPack(false),
-    HasRequiresClause(static_cast<bool>(RequiresClause)) {
+    : TemplateLoc(TemplateLoc), LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
+      NumParams(Params.size()), ContainsUnexpandedParameterPack(false),
+      HasRequiresClause(static_cast<bool>(RequiresClause)) {
   for (unsigned Idx = 0; Idx < NumParams; ++Idx) {
     NamedDecl *P = Params[Idx];
     begin()[Idx] = P;
@@ -124,10 +139,12 @@
 }
 
 namespace clang {
+
 void *allocateDefaultArgStorageChain(const ASTContext &C) {
   return new (C) char[sizeof(void*) * 2];
 }
-}
+
+} // namespace clang
 
 //===----------------------------------------------------------------------===//
 // RedeclarableTemplateDecl Implementation
@@ -165,14 +182,28 @@
   return Common;
 }
 
+void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
+  // Grab the most recent declaration to ensure we've loaded any lazy
+  // redeclarations of this template.
+  CommonBase *CommonBasePtr = getMostRecentDecl()->getCommonPtr();
+  if (CommonBasePtr->LazySpecializations) {
+    ASTContext &Context = getASTContext();
+    uint32_t *Specs = CommonBasePtr->LazySpecializations;
+    CommonBasePtr->LazySpecializations = nullptr;
+    for (uint32_t I = 0, N = *Specs++; I != N; ++I)
+      (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
+  }
+}
+
 template<class EntryType>
 typename RedeclarableTemplateDecl::SpecEntryTraits<EntryType>::DeclType *
 RedeclarableTemplateDecl::findSpecializationImpl(
     llvm::FoldingSetVector<EntryType> &Specs, ArrayRef<TemplateArgument> Args,
     void *&InsertPos) {
-  typedef SpecEntryTraits<EntryType> SETraits;
+  using SETraits = SpecEntryTraits<EntryType>;
+
   llvm::FoldingSetNodeID ID;
-  EntryType::Profile(ID,Args, getASTContext());
+  EntryType::Profile(ID, Args, getASTContext());
   EntryType *Entry = Specs.FindNodeOrInsertPos(ID, InsertPos);
   return Entry ? SETraits::getDecl(Entry)->getMostRecentDecl() : nullptr;
 }
@@ -181,7 +212,8 @@
 void RedeclarableTemplateDecl::addSpecializationImpl(
     llvm::FoldingSetVector<EntryType> &Specializations, EntryType *Entry,
     void *InsertPos) {
-  typedef SpecEntryTraits<EntryType> SETraits;
+  using SETraits = SpecEntryTraits<EntryType>;
+
   if (InsertPos) {
 #ifndef NDEBUG
     void *CorrectInsertPos;
@@ -232,18 +264,7 @@
 }
 
 void FunctionTemplateDecl::LoadLazySpecializations() const {
-  // Grab the most recent declaration to ensure we've loaded any lazy
-  // redeclarations of this template.
-  //
-  // FIXME: Avoid walking the entire redeclaration chain here.
-  Common *CommonPtr = getMostRecentDecl()->getCommonPtr();
-  if (CommonPtr->LazySpecializations) {
-    ASTContext &Context = getASTContext();
-    uint32_t *Specs = CommonPtr->LazySpecializations;
-    CommonPtr->LazySpecializations = nullptr;
-    for (uint32_t I = 0, N = *Specs++; I != N; ++I)
-      (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
-  }
+  loadLazySpecializationsImpl();
 }
 
 llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> &
@@ -311,18 +332,7 @@
 }
 
 void ClassTemplateDecl::LoadLazySpecializations() const {
-  // Grab the most recent declaration to ensure we've loaded any lazy
-  // redeclarations of this template.
-  //
-  // FIXME: Avoid walking the entire redeclaration chain here.
-  Common *CommonPtr = getMostRecentDecl()->getCommonPtr();
-  if (CommonPtr->LazySpecializations) {
-    ASTContext &Context = getASTContext();
-    uint32_t *Specs = CommonPtr->LazySpecializations;
-    CommonPtr->LazySpecializations = nullptr;
-    for (uint32_t I = 0, N = *Specs++; I != N; ++I)
-      (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
-  }
+  loadLazySpecializationsImpl();
 }
 
 llvm::FoldingSetVector<ClassTemplateSpecializationDecl> &
@@ -562,7 +572,7 @@
 // TemplateTemplateParmDecl Method Implementations
 //===----------------------------------------------------------------------===//
 
-void TemplateTemplateParmDecl::anchor() { }
+void TemplateTemplateParmDecl::anchor() {}
 
 TemplateTemplateParmDecl::TemplateTemplateParmDecl(
     DeclContext *DC, SourceLocation L, unsigned D, unsigned P,
@@ -665,11 +675,12 @@
 // TemplateDecl Implementation
 //===----------------------------------------------------------------------===//
 
-void TemplateDecl::anchor() { }
+void TemplateDecl::anchor() {}
 
 //===----------------------------------------------------------------------===//
 // ClassTemplateSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
+
 ClassTemplateSpecializationDecl::
 ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
                                 DeclContext *DC, SourceLocation StartLoc,
@@ -677,11 +688,9 @@
                                 ClassTemplateDecl *SpecializedTemplate,
                                 ArrayRef<TemplateArgument> Args,
                                 ClassTemplateSpecializationDecl *PrevDecl)
-  : CXXRecordDecl(DK, TK, Context, DC, StartLoc, IdLoc,
-                  SpecializedTemplate->getIdentifier(),
-                  PrevDecl),
+    : CXXRecordDecl(DK, TK, Context, DC, StartLoc, IdLoc,
+                    SpecializedTemplate->getIdentifier(), PrevDecl),
     SpecializedTemplate(SpecializedTemplate),
-    ExplicitInfo(nullptr),
     TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
     SpecializationKind(TSK_Undeclared) {
 }
@@ -690,7 +699,7 @@
                                                                  Kind DK)
     : CXXRecordDecl(DK, TTK_Struct, C, nullptr, SourceLocation(),
                     SourceLocation(), nullptr, nullptr),
-      ExplicitInfo(nullptr), SpecializationKind(TSK_Undeclared) {}
+      SpecializationKind(TSK_Undeclared) {}
 
 ClassTemplateSpecializationDecl *
 ClassTemplateSpecializationDecl::Create(ASTContext &Context, TagKind TK,
@@ -726,12 +735,10 @@
   auto *PS = dyn_cast<ClassTemplatePartialSpecializationDecl>(this);
   if (const ASTTemplateArgumentListInfo *ArgsAsWritten =
           PS ? PS->getTemplateArgsAsWritten() : nullptr) {
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, ArgsAsWritten->arguments(), Policy);
+    printTemplateArgumentList(OS, ArgsAsWritten->arguments(), Policy);
   } else {
     const TemplateArgumentList &TemplateArgs = getTemplateArgs();
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, TemplateArgs.asArray(), Policy);
+    printTemplateArgumentList(OS, TemplateArgs.asArray(), Policy);
   }
 }
 
@@ -762,7 +769,7 @@
     // An implicit instantiation of a class template partial specialization
     // uses ExplicitInfo to record the TypeAsWritten, but the source
     // locations should be retrieved from the instantiation pattern.
-    typedef ClassTemplatePartialSpecializationDecl CTPSDecl;
+    using CTPSDecl = ClassTemplatePartialSpecializationDecl;
     CTPSDecl *ctpsd = const_cast<CTPSDecl*>(cast<CTPSDecl>(this));
     CTPSDecl *inst_from = ctpsd->getInstantiatedFromMember();
     assert(inst_from != nullptr);
@@ -785,7 +792,7 @@
 //===----------------------------------------------------------------------===//
 // ClassTemplatePartialSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
-void ClassTemplatePartialSpecializationDecl::anchor() { }
+void ClassTemplatePartialSpecializationDecl::anchor() {}
 
 ClassTemplatePartialSpecializationDecl::
 ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
@@ -797,14 +804,12 @@
                                        ArrayRef<TemplateArgument> Args,
                                const ASTTemplateArgumentListInfo *ArgInfos,
                                ClassTemplatePartialSpecializationDecl *PrevDecl)
-  : ClassTemplateSpecializationDecl(Context,
-                                    ClassTemplatePartialSpecialization,
-                                    TK, DC, StartLoc, IdLoc,
-                                    SpecializedTemplate,
-                                    Args, PrevDecl),
-    TemplateParams(Params), ArgsAsWritten(ArgInfos),
-    InstantiatedFromMember(nullptr, false)
-{
+    : ClassTemplateSpecializationDecl(Context,
+                                      ClassTemplatePartialSpecialization,
+                                      TK, DC, StartLoc, IdLoc,
+                                      SpecializedTemplate, Args, PrevDecl),
+      TemplateParams(Params), ArgsAsWritten(ArgInfos),
+      InstantiatedFromMember(nullptr, false) {
   AdoptTemplateParameterList(Params, this);
 }
 
@@ -845,7 +850,7 @@
 // FriendTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
 
-void FriendTemplateDecl::anchor() { }
+void FriendTemplateDecl::anchor() {}
 
 FriendTemplateDecl *
 FriendTemplateDecl::Create(ASTContext &Context, DeclContext *DC,
@@ -891,7 +896,7 @@
 // ClassScopeFunctionSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
 
-void ClassScopeFunctionSpecializationDecl::anchor() { }
+void ClassScopeFunctionSpecializationDecl::anchor() {}
 
 ClassScopeFunctionSpecializationDecl *
 ClassScopeFunctionSpecializationDecl::CreateDeserialized(ASTContext &C,
@@ -927,21 +932,8 @@
                                      DeclarationName(), nullptr, nullptr);
 }
 
-// TODO: Unify across class, function and variable templates?
-//       May require moving this and Common to RedeclarableTemplateDecl.
 void VarTemplateDecl::LoadLazySpecializations() const {
-  // Grab the most recent declaration to ensure we've loaded any lazy
-  // redeclarations of this template.
-  //
-  // FIXME: Avoid walking the entire redeclaration chain here.
-  Common *CommonPtr = getMostRecentDecl()->getCommonPtr();
-  if (CommonPtr->LazySpecializations) {
-    ASTContext &Context = getASTContext();
-    uint32_t *Specs = CommonPtr->LazySpecializations;
-    CommonPtr->LazySpecializations = nullptr;
-    for (uint32_t I = 0, N = *Specs++; I != N; ++I)
-      (void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
-  }
+  loadLazySpecializationsImpl();
 }
 
 llvm::FoldingSetVector<VarTemplateSpecializationDecl> &
@@ -1020,21 +1012,22 @@
 //===----------------------------------------------------------------------===//
 // VarTemplateSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
+
 VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(
     Kind DK, ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
     TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args)
     : VarDecl(DK, Context, DC, StartLoc, IdLoc,
               SpecializedTemplate->getIdentifier(), T, TInfo, S),
-      SpecializedTemplate(SpecializedTemplate), ExplicitInfo(nullptr),
+      SpecializedTemplate(SpecializedTemplate),
       TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
-      SpecializationKind(TSK_Undeclared) {}
+      SpecializationKind(TSK_Undeclared), IsCompleteDefinition(false) {}
 
 VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(Kind DK,
                                                              ASTContext &C)
     : VarDecl(DK, C, nullptr, SourceLocation(), SourceLocation(), nullptr,
               QualType(), nullptr, SC_None),
-      ExplicitInfo(nullptr), SpecializationKind(TSK_Undeclared) {}
+      SpecializationKind(TSK_Undeclared), IsCompleteDefinition(false) {}
 
 VarTemplateSpecializationDecl *VarTemplateSpecializationDecl::Create(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
@@ -1058,12 +1051,10 @@
   auto *PS = dyn_cast<VarTemplatePartialSpecializationDecl>(this);
   if (const ASTTemplateArgumentListInfo *ArgsAsWritten =
           PS ? PS->getTemplateArgsAsWritten() : nullptr) {
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, ArgsAsWritten->arguments(), Policy);
+    printTemplateArgumentList(OS, ArgsAsWritten->arguments(), Policy);
   } else {
     const TemplateArgumentList &TemplateArgs = getTemplateArgs();
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, TemplateArgs.asArray(), Policy);
+    printTemplateArgumentList(OS, TemplateArgs.asArray(), Policy);
   }
 }
 
@@ -1085,6 +1076,7 @@
 //===----------------------------------------------------------------------===//
 // VarTemplatePartialSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
+
 void VarTemplatePartialSpecializationDecl::anchor() {}
 
 VarTemplatePartialSpecializationDecl::VarTemplatePartialSpecializationDecl(
diff --git a/lib/AST/DeclarationName.cpp b/lib/AST/DeclarationName.cpp
index 1f8e26d..31bae93 100644
--- a/lib/AST/DeclarationName.cpp
+++ b/lib/AST/DeclarationName.cpp
@@ -1,4 +1,4 @@
-//===-- DeclarationName.cpp - Declaration names implementation --*- C++ -*-===//
+//===- DeclarationName.cpp - Declaration names implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,20 +11,36 @@
 // classes.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+
 using namespace clang;
 
 namespace clang {
+
 /// CXXSpecialName - Records the type associated with one of the
 /// "special" kinds of declaration names in C++, e.g., constructors,
 /// destructors, and conversion functions.
@@ -89,6 +105,8 @@
   }
 };
 
+} // namespace clang
+
 static int compareInt(unsigned A, unsigned B) {
   return (A < B ? -1 : (A > B ? 1 : 0));
 }
@@ -197,10 +215,9 @@
   case DeclarationName::CXXConstructorName:
     return printCXXConstructorDestructorName(N.getCXXNameType(), OS, Policy);
 
-  case DeclarationName::CXXDestructorName: {
+  case DeclarationName::CXXDestructorName:
     OS << '~';
     return printCXXConstructorDestructorName(N.getCXXNameType(), OS, Policy);
-  }
 
   case DeclarationName::CXXDeductionGuideName:
     OS << "<deduction guide for ";
@@ -250,13 +267,15 @@
   llvm_unreachable("Unexpected declaration name kind");
 }
 
+namespace clang {
+
 raw_ostream &operator<<(raw_ostream &OS, DeclarationName N) {
   LangOptions LO;
   N.print(OS, PrintingPolicy(LO));
   return OS;
 }
 
-} // end namespace clang
+} // namespace clang
 
 DeclarationName::NameKind DeclarationName::getNameKind() const {
   switch (getStoredNameKind()) {
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 6cda82b..1813dfb 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1780,6 +1780,7 @@
   case OO_Amp: return BO_And;
   case OO_Pipe: return BO_Or;
   case OO_Equal: return BO_Assign;
+  case OO_Spaceship: return BO_Cmp;
   case OO_Less: return BO_LT;
   case OO_Greater: return BO_GT;
   case OO_PlusEqual: return BO_AddAssign;
@@ -1811,6 +1812,7 @@
     OO_Star, OO_Slash, OO_Percent,
     OO_Plus, OO_Minus,
     OO_LessLess, OO_GreaterGreater,
+    OO_Spaceship,
     OO_Less, OO_Greater, OO_LessEqual, OO_GreaterEqual,
     OO_EqualEqual, OO_ExclaimEqual,
     OO_Amp,
@@ -2915,6 +2917,18 @@
   return false;
 }
 
+bool CallExpr::isBuiltinAssumeFalse(const ASTContext &Ctx) const {
+  const FunctionDecl* FD = getDirectCallee();
+  if (!FD || (FD->getBuiltinID() != Builtin::BI__assume &&
+              FD->getBuiltinID() != Builtin::BI__builtin_assume))
+    return false;
+  
+  const Expr* Arg = getArg(0);
+  bool ArgVal;
+  return !Arg->isValueDependent() &&
+         Arg->EvaluateAsBooleanCondition(ArgVal, Ctx) && !ArgVal;
+}
+
 namespace {
   /// \brief Look for any side effects within a Stmt.
   class SideEffectFinder : public ConstEvaluatedExprVisitor<SideEffectFinder> {
@@ -3114,7 +3128,8 @@
     if (DCE->getTypeAsWritten()->isReferenceType() &&
         DCE->getCastKind() == CK_Dynamic)
       return true;
-  } // Fall through.
+    }
+    LLVM_FALLTHROUGH;
   case ImplicitCastExprClass:
   case CStyleCastExprClass:
   case CXXStaticCastExprClass:
@@ -3443,10 +3458,11 @@
       if (Field->isBitField())
         return Field;
 
-  if (ObjCIvarRefExpr *IvarRef = dyn_cast<ObjCIvarRefExpr>(E))
-    if (FieldDecl *Ivar = dyn_cast<FieldDecl>(IvarRef->getDecl()))
-      if (Ivar->isBitField())
-        return Ivar;
+  if (ObjCIvarRefExpr *IvarRef = dyn_cast<ObjCIvarRefExpr>(E)) {
+    FieldDecl *Ivar = IvarRef->getDecl();
+    if (Ivar->isBitField())
+      return Ivar;
+  }
 
   if (DeclRefExpr *DeclRef = dyn_cast<DeclRefExpr>(E)) {
     if (FieldDecl *Field = dyn_cast<FieldDecl>(DeclRef->getDecl()))
diff --git a/lib/AST/ExprCXX.cpp b/lib/AST/ExprCXX.cpp
index 182a8b4..b8a837f 100644
--- a/lib/AST/ExprCXX.cpp
+++ b/lib/AST/ExprCXX.cpp
@@ -1,4 +1,4 @@
-//===--- ExprCXX.cpp - (C++) Expression AST Node Implementation -----------===//
+//===- ExprCXX.cpp - (C++) Expression AST Node Implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,15 +11,33 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclAccessPair.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclTemplate.h"
-#include "clang/AST/ExprCXX.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/LambdaCapture.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
-#include "clang/Basic/IdentifierTable.h"
-using namespace clang;
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <memory>
 
+using namespace clang;
 
 //===----------------------------------------------------------------------===//
 //  Child Iterators for iterating over subexpressions/substatements
@@ -85,15 +103,14 @@
                        Expr *initializer, QualType ty,
                        TypeSourceInfo *allocatedTypeInfo,
                        SourceRange Range, SourceRange directInitRange)
-  : Expr(CXXNewExprClass, ty, VK_RValue, OK_Ordinary,
-         ty->isDependentType(), ty->isDependentType(),
-         ty->isInstantiationDependentType(),
-         ty->containsUnexpandedParameterPack()),
-    SubExprs(nullptr), OperatorNew(operatorNew), OperatorDelete(operatorDelete),
-    AllocatedTypeInfo(allocatedTypeInfo), TypeIdParens(typeIdParens),
-    Range(Range), DirectInitRange(directInitRange),
-    GlobalNew(globalNew), PassAlignment(PassAlignment),
-    UsualArrayDeleteWantsSize(usualArrayDeleteWantsSize) {
+    : Expr(CXXNewExprClass, ty, VK_RValue, OK_Ordinary, ty->isDependentType(),
+           ty->isDependentType(), ty->isInstantiationDependentType(),
+           ty->containsUnexpandedParameterPack()),
+      OperatorNew(operatorNew), OperatorDelete(operatorDelete),
+      AllocatedTypeInfo(allocatedTypeInfo), TypeIdParens(typeIdParens),
+      Range(Range), DirectInitRange(directInitRange), GlobalNew(globalNew),
+      PassAlignment(PassAlignment),
+      UsualArrayDeleteWantsSize(usualArrayDeleteWantsSize) {
   assert((initializer != nullptr || initializationStyle == NoInit) &&
          "Only NoInit can have no initializer.");
   StoredInitializationStyle = initializer ? initializationStyle + 1 : 0;
@@ -187,8 +204,7 @@
 
 // CXXPseudoDestructorExpr
 PseudoDestructorTypeStorage::PseudoDestructorTypeStorage(TypeSourceInfo *Info)
- : Type(Info) 
-{
+    : Type(Info) {
   Location = Info->getTypeLoc().getLocalSourceRange().getBegin();
 }
 
@@ -225,7 +241,7 @@
     Base(static_cast<Stmt *>(Base)), IsArrow(isArrow),
     OperatorLoc(OperatorLoc), QualifierLoc(QualifierLoc),
     ScopeType(ScopeType), ColonColonLoc(ColonColonLoc), TildeLoc(TildeLoc),
-    DestroyedType(DestroyedType) { }
+    DestroyedType(DestroyedType) {}
 
 QualType CXXPseudoDestructorExpr::getDestroyedType() const {
   if (TypeSourceInfo *TInfo = DestroyedType.getTypeSourceInfo())
@@ -251,8 +267,7 @@
                              bool ADL,
                              const TemplateArgumentListInfo *Args,
                              UnresolvedSetIterator Begin,
-                             UnresolvedSetIterator End)
-{
+                             UnresolvedSetIterator End) {
   assert(Args || TemplateKWLoc.isValid());
   unsigned num_args = Args ? Args->size() : 0;
 
@@ -290,21 +305,20 @@
                            bool KnownDependent,
                            bool KnownInstantiationDependent,
                            bool KnownContainsUnexpandedParameterPack)
-  : Expr(K, C.OverloadTy, VK_LValue, OK_Ordinary, KnownDependent, 
-         KnownDependent,
-         (KnownInstantiationDependent ||
-          NameInfo.isInstantiationDependent() ||
-          (QualifierLoc &&
-           QualifierLoc.getNestedNameSpecifier()->isInstantiationDependent())),
-         (KnownContainsUnexpandedParameterPack ||
-          NameInfo.containsUnexpandedParameterPack() ||
-          (QualifierLoc && 
-           QualifierLoc.getNestedNameSpecifier()
-                                      ->containsUnexpandedParameterPack()))),
-    NameInfo(NameInfo), QualifierLoc(QualifierLoc),
-    Results(nullptr), NumResults(End - Begin),
-    HasTemplateKWAndArgsInfo(TemplateArgs != nullptr ||
-                             TemplateKWLoc.isValid()) {
+    : Expr(K, C.OverloadTy, VK_LValue, OK_Ordinary, KnownDependent,
+           KnownDependent,
+           (KnownInstantiationDependent ||
+            NameInfo.isInstantiationDependent() ||
+            (QualifierLoc &&
+            QualifierLoc.getNestedNameSpecifier()->isInstantiationDependent())),
+           (KnownContainsUnexpandedParameterPack ||
+            NameInfo.containsUnexpandedParameterPack() ||
+            (QualifierLoc &&
+             QualifierLoc.getNestedNameSpecifier()
+                                        ->containsUnexpandedParameterPack()))),
+      NameInfo(NameInfo), QualifierLoc(QualifierLoc), NumResults(End - Begin),
+      HasTemplateKWAndArgsInfo(TemplateArgs != nullptr ||
+                               TemplateKWLoc.isValid()) {
   NumResults = End - Begin;
   if (NumResults) {
     // Determine whether this expression is type-dependent.
@@ -509,7 +523,6 @@
   return nullptr;
 }
 
-
 CXXRecordDecl *CXXMemberCallExpr::getRecordDecl() const {
   Expr* ThisArg = getImplicitObjectArgument();
   if (!ThisArg)
@@ -521,7 +534,6 @@
   return ThisArg->getType()->getAsCXXRecordDecl();
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Named casts
 //===----------------------------------------------------------------------===//
@@ -758,16 +770,12 @@
                                                bool ListInitialization,
                                                bool StdInitListInitialization,
                                                bool ZeroInitialization)
-  : CXXConstructExpr(C, CXXTemporaryObjectExprClass, Type,
-                     TSI->getTypeLoc().getBeginLoc(),
-                     Cons, false, Args,
-                     HadMultipleCandidates,
-                     ListInitialization,
-                     StdInitListInitialization,
-                     ZeroInitialization,
-                     CXXConstructExpr::CK_Complete, ParenOrBraceRange),
-    Type(TSI) {
-}
+    : CXXConstructExpr(C, CXXTemporaryObjectExprClass, Type,
+                       TSI->getTypeLoc().getBeginLoc(), Cons, false, Args,
+                       HadMultipleCandidates, ListInitialization,
+                       StdInitListInitialization,  ZeroInitialization,
+                       CXXConstructExpr::CK_Complete, ParenOrBraceRange),
+      Type(TSI) {}
 
 SourceLocation CXXTemporaryObjectExpr::getLocStart() const {
   return Type->getTypeLoc().getBeginLoc();
@@ -810,18 +818,16 @@
                                    bool ZeroInitialization,
                                    ConstructionKind ConstructKind,
                                    SourceRange ParenOrBraceRange)
-  : Expr(SC, T, VK_RValue, OK_Ordinary,
-         T->isDependentType(), T->isDependentType(),
-         T->isInstantiationDependentType(),
-         T->containsUnexpandedParameterPack()),
-    Constructor(Ctor), Loc(Loc), ParenOrBraceRange(ParenOrBraceRange),
-    NumArgs(Args.size()),
-    Elidable(Elidable), HadMultipleCandidates(HadMultipleCandidates),
-    ListInitialization(ListInitialization),
-    StdInitListInitialization(StdInitListInitialization),
-    ZeroInitialization(ZeroInitialization),
-    ConstructKind(ConstructKind), Args(nullptr)
-{
+    : Expr(SC, T, VK_RValue, OK_Ordinary,
+           T->isDependentType(), T->isDependentType(),
+           T->isInstantiationDependentType(),
+           T->containsUnexpandedParameterPack()),
+      Constructor(Ctor), Loc(Loc), ParenOrBraceRange(ParenOrBraceRange),
+      NumArgs(Args.size()), Elidable(Elidable),
+      HadMultipleCandidates(HadMultipleCandidates),
+      ListInitialization(ListInitialization),
+      StdInitListInitialization(StdInitListInitialization),
+      ZeroInitialization(ZeroInitialization), ConstructKind(ConstructKind) {
   if (NumArgs) {
     this->Args = new (C) Stmt*[Args.size()];
     
@@ -843,8 +849,7 @@
 LambdaCapture::LambdaCapture(SourceLocation Loc, bool Implicit,
                              LambdaCaptureKind Kind, VarDecl *Var,
                              SourceLocation EllipsisLoc)
-  : DeclAndBits(Var, 0), Loc(Loc), EllipsisLoc(EllipsisLoc)
-{
+    : DeclAndBits(Var, 0), Loc(Loc), EllipsisLoc(EllipsisLoc) {
   unsigned Bits = 0;
   if (Implicit)
     Bits |= Capture_Implicit;
@@ -852,7 +857,7 @@
   switch (Kind) {
   case LCK_StarThis:
     Bits |= Capture_ByCopy;
-    // Fall through
+    LLVM_FALLTHROUGH;
   case LCK_This:
     assert(!Var && "'this' capture cannot have a variable!");
     Bits |= Capture_This;
@@ -860,7 +865,7 @@
 
   case LCK_ByCopy:
     Bits |= Capture_ByCopy;
-    // Fall through 
+    LLVM_FALLTHROUGH;
   case LCK_ByRef:
     assert(Var && "capture must have a variable!");
     break;
@@ -1011,7 +1016,7 @@
   // initialized in ASTStmtReader::VisitLambdaExpr, but for reasons I
   // don't understand, that doesn't work.
   if (!getStoredStmts()[NumCaptures])
-    *const_cast<clang::Stmt **>(&getStoredStmts()[NumCaptures]) =
+    *const_cast<Stmt **>(&getStoredStmts()[NumCaptures]) =
         getCallOperator()->getBody();
 
   return static_cast<CompoundStmt *>(getStoredStmts()[NumCaptures]);
@@ -1024,12 +1029,12 @@
 ExprWithCleanups::ExprWithCleanups(Expr *subexpr,
                                    bool CleanupsHaveSideEffects,
                                    ArrayRef<CleanupObject> objects)
-  : Expr(ExprWithCleanupsClass, subexpr->getType(),
-         subexpr->getValueKind(), subexpr->getObjectKind(),
-         subexpr->isTypeDependent(), subexpr->isValueDependent(),
-         subexpr->isInstantiationDependent(),
-         subexpr->containsUnexpandedParameterPack()),
-    SubExpr(subexpr) {
+    : Expr(ExprWithCleanupsClass, subexpr->getType(),
+           subexpr->getValueKind(), subexpr->getObjectKind(),
+           subexpr->isTypeDependent(), subexpr->isValueDependent(),
+           subexpr->isInstantiationDependent(),
+           subexpr->containsUnexpandedParameterPack()),
+      SubExpr(subexpr) {
   ExprWithCleanupsBits.CleanupsHaveSideEffects = CleanupsHaveSideEffects;
   ExprWithCleanupsBits.NumObjects = objects.size();
   for (unsigned i = 0, e = objects.size(); i != e; ++i)
@@ -1046,7 +1051,7 @@
 }
 
 ExprWithCleanups::ExprWithCleanups(EmptyShell empty, unsigned numObjects)
-  : Expr(ExprWithCleanupsClass, empty) {
+    : Expr(ExprWithCleanupsClass, empty) {
   ExprWithCleanupsBits.NumObjects = numObjects;
 }
 
@@ -1062,20 +1067,18 @@
                                                  SourceLocation LParenLoc,
                                                  ArrayRef<Expr*> Args,
                                                  SourceLocation RParenLoc)
-  : Expr(CXXUnresolvedConstructExprClass, 
-         Type->getType().getNonReferenceType(),
-         (Type->getType()->isLValueReferenceType() ? VK_LValue
-          :Type->getType()->isRValueReferenceType()? VK_XValue
-          :VK_RValue),
-         OK_Ordinary,
-         Type->getType()->isDependentType() ||
-             Type->getType()->getContainedDeducedType(),
-         true, true,
-         Type->getType()->containsUnexpandedParameterPack()),
-    Type(Type),
-    LParenLoc(LParenLoc),
-    RParenLoc(RParenLoc),
-    NumArgs(Args.size()) {
+    : Expr(CXXUnresolvedConstructExprClass,
+           Type->getType().getNonReferenceType(),
+           (Type->getType()->isLValueReferenceType()
+                ? VK_LValue
+                : Type->getType()->isRValueReferenceType() ? VK_XValue
+                                                           : VK_RValue),
+           OK_Ordinary,
+           Type->getType()->isDependentType() ||
+               Type->getType()->getContainedDeducedType(),
+           true, true, Type->getType()->containsUnexpandedParameterPack()),
+      Type(Type), LParenLoc(LParenLoc), RParenLoc(RParenLoc),
+      NumArgs(Args.size()) {
   Expr **StoredArgs = getTrailingObjects<Expr *>();
   for (unsigned I = 0; I != Args.size(); ++I) {
     if (Args[I]->containsUnexpandedParameterPack())
@@ -1175,7 +1178,7 @@
   void *Mem = C.Allocate(Size, alignof(CXXDependentScopeMemberExpr));
   CXXDependentScopeMemberExpr *E
     =  new (Mem) CXXDependentScopeMemberExpr(C, nullptr, QualType(),
-                                             0, SourceLocation(),
+                                             false, SourceLocation(),
                                              NestedNameSpecifierLoc(),
                                              SourceLocation(), nullptr,
                                              DeclarationNameInfo(), nullptr);
@@ -1218,19 +1221,18 @@
                                    const TemplateArgumentListInfo *TemplateArgs,
                                            UnresolvedSetIterator Begin, 
                                            UnresolvedSetIterator End)
-  : OverloadExpr(UnresolvedMemberExprClass, C, QualifierLoc, TemplateKWLoc,
-                 MemberNameInfo, TemplateArgs, Begin, End,
-                 // Dependent
-                 ((Base && Base->isTypeDependent()) ||
-                  BaseType->isDependentType()),
-                 ((Base && Base->isInstantiationDependent()) ||
-                   BaseType->isInstantiationDependentType()),
-                 // Contains unexpanded parameter pack
-                 ((Base && Base->containsUnexpandedParameterPack()) ||
-                  BaseType->containsUnexpandedParameterPack())),
-    IsArrow(IsArrow), HasUnresolvedUsing(HasUnresolvedUsing),
-    Base(Base), BaseType(BaseType), OperatorLoc(OperatorLoc) {
-
+    : OverloadExpr(
+          UnresolvedMemberExprClass, C, QualifierLoc, TemplateKWLoc,
+          MemberNameInfo, TemplateArgs, Begin, End,
+          // Dependent
+          ((Base && Base->isTypeDependent()) || BaseType->isDependentType()),
+          ((Base && Base->isInstantiationDependent()) ||
+           BaseType->isInstantiationDependentType()),
+          // Contains unexpanded parameter pack
+          ((Base && Base->containsUnexpandedParameterPack()) ||
+           BaseType->containsUnexpandedParameterPack())),
+      IsArrow(IsArrow), HasUnresolvedUsing(HasUnresolvedUsing), Base(Base),
+      BaseType(BaseType), OperatorLoc(OperatorLoc) {
   // Check whether all of the members are non-static member functions,
   // and if so, mark give this bound-member type instead of overload type.
   if (hasOnlyNonStaticMemberFunctions(Begin, End))
@@ -1328,13 +1330,14 @@
 
 SubstNonTypeTemplateParmPackExpr::
 SubstNonTypeTemplateParmPackExpr(QualType T, 
+                                 ExprValueKind ValueKind,
                                  NonTypeTemplateParmDecl *Param,
                                  SourceLocation NameLoc,
                                  const TemplateArgument &ArgPack)
-  : Expr(SubstNonTypeTemplateParmPackExprClass, T, VK_RValue, OK_Ordinary, 
-         true, true, true, true),
-    Param(Param), Arguments(ArgPack.pack_begin()), 
-    NumArguments(ArgPack.pack_size()), NameLoc(NameLoc) { }
+    : Expr(SubstNonTypeTemplateParmPackExprClass, T, ValueKind, OK_Ordinary,
+           true, true, true, true),
+      Param(Param), Arguments(ArgPack.pack_begin()),
+      NumArguments(ArgPack.pack_size()), NameLoc(NameLoc) {}
 
 TemplateArgument SubstNonTypeTemplateParmPackExpr::getArgumentPack() const {
   return TemplateArgument(llvm::makeArrayRef(Arguments, NumArguments));
@@ -1390,13 +1393,12 @@
                              ArrayRef<TypeSourceInfo *> Args,
                              SourceLocation RParenLoc,
                              bool Value)
-  : Expr(TypeTraitExprClass, T, VK_RValue, OK_Ordinary,
-         /*TypeDependent=*/false,
-         /*ValueDependent=*/false,
-         /*InstantiationDependent=*/false,
-         /*ContainsUnexpandedParameterPack=*/false),
-    Loc(Loc), RParenLoc(RParenLoc)
-{
+    : Expr(TypeTraitExprClass, T, VK_RValue, OK_Ordinary,
+           /*TypeDependent=*/false,
+           /*ValueDependent=*/false,
+           /*InstantiationDependent=*/false,
+           /*ContainsUnexpandedParameterPack=*/false),
+      Loc(Loc), RParenLoc(RParenLoc) {
   TypeTraitExprBits.Kind = Kind;
   TypeTraitExprBits.Value = Value;
   TypeTraitExprBits.NumArgs = Args.size();
@@ -1431,4 +1433,4 @@
   return new (Mem) TypeTraitExpr(EmptyShell());
 }
 
-void ArrayTypeTraitExpr::anchor() { }
+void ArrayTypeTraitExpr::anchor() {}
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index ecce50e..135e70e 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -48,6 +48,8 @@
 #include <cstring>
 #include <functional>
 
+#define DEBUG_TYPE "exprconstant"
+
 using namespace clang;
 using llvm::APSInt;
 using llvm::APFloat;
@@ -62,7 +64,13 @@
   static QualType getType(APValue::LValueBase B) {
     if (!B) return QualType();
     if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>())
-      return D->getType();
+      // FIXME: It's unclear where we're supposed to take the type from, and
+      // this actually matters for arrays of unknown bound. Using the type of
+      // the most recent declaration isn't clearly correct in general. Eg:
+      //
+      // extern int arr[]; void f() { extern int arr[3]; };
+      // constexpr int *p = &arr[1]; // valid?
+      return cast<ValueDecl>(D->getMostRecentDecl())->getType();
 
     const Expr *Base = B.get<const Expr*>();
 
@@ -125,7 +133,11 @@
 
     E = E->IgnoreParens();
     // If we're doing a variable assignment from e.g. malloc(N), there will
-    // probably be a cast of some kind. Ignore it.
+    // probably be a cast of some kind. In exotic cases, we might also see a
+    // top-level ExprWithCleanups. Ignore them either way.
+    if (const auto *EC = dyn_cast<ExprWithCleanups>(E))
+      E = EC->getSubExpr()->IgnoreParens();
+
     if (const auto *Cast = dyn_cast<CastExpr>(E))
       E = Cast->getSubExpr()->IgnoreParens();
 
@@ -141,6 +153,12 @@
     return E && E->getType()->isPointerType() && tryUnwrapAllocSizeCall(E);
   }
 
+  /// The bound to claim that an array of unknown bound has.
+  /// The value in MostDerivedArraySize is undefined in this case. So, set it
+  /// to an arbitrary value that's likely to loudly break things if it's used.
+  static const uint64_t AssumedSizeForUnsizedArray =
+      std::numeric_limits<uint64_t>::max() / 2;
+
   /// Determines if an LValue with the given LValueBase will have an unsized
   /// array in its designator.
   /// Find the path length and type of the most-derived subobject in the given
@@ -148,7 +166,8 @@
   static unsigned
   findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base,
                            ArrayRef<APValue::LValuePathEntry> Path,
-                           uint64_t &ArraySize, QualType &Type, bool &IsArray) {
+                           uint64_t &ArraySize, QualType &Type, bool &IsArray,
+                           bool &FirstEntryIsUnsizedArray) {
     // This only accepts LValueBases from APValues, and APValues don't support
     // arrays that lack size info.
     assert(!isBaseAnAllocSizeCall(Base) &&
@@ -158,12 +177,18 @@
 
     for (unsigned I = 0, N = Path.size(); I != N; ++I) {
       if (Type->isArrayType()) {
-        const ConstantArrayType *CAT =
-            cast<ConstantArrayType>(Ctx.getAsArrayType(Type));
-        Type = CAT->getElementType();
-        ArraySize = CAT->getSize().getZExtValue();
+        const ArrayType *AT = Ctx.getAsArrayType(Type);
+        Type = AT->getElementType();
         MostDerivedLength = I + 1;
         IsArray = true;
+
+        if (auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
+          ArraySize = CAT->getSize().getZExtValue();
+        } else {
+          assert(I == 0 && "unexpected unsized array designator");
+          FirstEntryIsUnsizedArray = true;
+          ArraySize = AssumedSizeForUnsizedArray;
+        }
       } else if (Type->isAnyComplexType()) {
         const ComplexType *CT = Type->castAs<ComplexType>();
         Type = CT->getElementType();
@@ -246,10 +271,12 @@
         Entries.insert(Entries.end(), VEntries.begin(), VEntries.end());
         if (V.getLValueBase()) {
           bool IsArray = false;
+          bool FirstIsUnsizedArray = false;
           MostDerivedPathLength = findMostDerivedSubobject(
               Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize,
-              MostDerivedType, IsArray);
+              MostDerivedType, IsArray, FirstIsUnsizedArray);
           MostDerivedIsArrayElement = IsArray;
+          FirstEntryIsAnUnsizedArray = FirstIsUnsizedArray;
         }
       }
     }
@@ -318,7 +345,7 @@
       // The value in MostDerivedArraySize is undefined in this case. So, set it
       // to an arbitrary value that's likely to loudly break things if it's
       // used.
-      MostDerivedArraySize = std::numeric_limits<uint64_t>::max() / 2;
+      MostDerivedArraySize = AssumedSizeForUnsizedArray;
       MostDerivedPathLength = Entries.size();
     }
     /// Update this designator to refer to the given base or member of this
@@ -350,6 +377,7 @@
       MostDerivedArraySize = 2;
       MostDerivedPathLength = Entries.size();
     }
+    void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E);
     void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E,
                                    const APSInt &N);
     /// Add N to the address of this subobject.
@@ -357,6 +385,7 @@
       if (Invalid || !N) return;
       uint64_t TruncatedN = N.extOrTrunc(64).getZExtValue();
       if (isMostDerivedAnUnsizedArray()) {
+        diagnoseUnsizedArrayPointerArithmetic(Info, E);
         // Can't verify -- trust that the user is doing the right thing (or if
         // not, trust that the caller will catch the bad behavior).
         // FIXME: Should we reject if this overflows, at least?
@@ -1094,9 +1123,19 @@
     setInvalid();
     return false;
   }
+  // Note, we do not diagnose if isMostDerivedAnUnsizedArray(), because there
+  // must actually be at least one array element; even a VLA cannot have a
+  // bound of zero. And if our index is nonzero, we already had a CCEDiag.
   return true;
 }
 
+void SubobjectDesignator::diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info,
+                                                                const Expr *E) {
+  Info.CCEDiag(E, diag::note_constexpr_unsized_array_indexed);
+  // Do not set the designator as invalid: we can represent this situation,
+  // and correct handling of __builtin_object_size requires us to do so.
+}
+
 void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info,
                                                     const Expr *E,
                                                     const APSInt &N) {
@@ -1240,8 +1279,6 @@
                     IsNullPtr);
       else {
         assert(!InvalidBase && "APValues can't handle invalid LValue bases");
-        assert(!Designator.FirstEntryIsAnUnsizedArray &&
-               "Unsized array with a valid base?");
         V = APValue(Base, Offset, Designator.Entries,
                     Designator.IsOnePastTheEnd, CallIndex, IsNullPtr);
       }
@@ -1314,12 +1351,17 @@
       if (checkSubobject(Info, E, isa<FieldDecl>(D) ? CSK_Field : CSK_Base))
         Designator.addDeclUnchecked(D, Virtual);
     }
-    void addUnsizedArray(EvalInfo &Info, QualType ElemTy) {
-      assert(Designator.Entries.empty() && getType(Base)->isPointerType());
-      assert(isBaseAnAllocSizeCall(Base) &&
-             "Only alloc_size bases can have unsized arrays");
-      Designator.FirstEntryIsAnUnsizedArray = true;
-      Designator.addUnsizedArrayUnchecked(ElemTy);
+    void addUnsizedArray(EvalInfo &Info, const Expr *E, QualType ElemTy) {
+      if (!Designator.Entries.empty()) {
+        Info.CCEDiag(E, diag::note_constexpr_unsupported_unsized_array);
+        Designator.setInvalid();
+        return;
+      }
+      if (checkSubobject(Info, E, CSK_ArrayToPointer)) {
+        assert(getType(Base)->isPointerType() || getType(Base)->isArrayType());
+        Designator.FirstEntryIsAnUnsizedArray = true;
+        Designator.addUnsizedArrayUnchecked(ElemTy);
+      }
     }
     void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) {
       if (checkSubobject(Info, E, CSK_ArrayToPointer))
@@ -1783,6 +1825,9 @@
       }
     }
     for (const auto *I : RD->fields()) {
+      if (I->isUnnamedBitfield())
+        continue;
+
       if (!CheckConstantExpression(Info, DiagLoc, I->getType(),
                                    Value.getStructField(I->getFieldIndex())))
         return false;
@@ -2605,10 +2650,13 @@
   APValue *Value;
   /// The type of the complete object.
   QualType Type;
+  bool LifetimeStartedInEvaluation;
 
   CompleteObject() : Value(nullptr) {}
-  CompleteObject(APValue *Value, QualType Type)
-      : Value(Value), Type(Type) {
+  CompleteObject(APValue *Value, QualType Type,
+                 bool LifetimeStartedInEvaluation)
+      : Value(Value), Type(Type),
+        LifetimeStartedInEvaluation(LifetimeStartedInEvaluation) {
     assert(Value && "missing value for complete object");
   }
 
@@ -2624,10 +2672,12 @@
   if (Sub.Invalid)
     // A diagnostic will have already been produced.
     return handler.failed();
-  if (Sub.isOnePastTheEnd()) {
+  if (Sub.isOnePastTheEnd() || Sub.isMostDerivedAnUnsizedArray()) {
     if (Info.getLangOpts().CPlusPlus11)
-      Info.FFDiag(E, diag::note_constexpr_access_past_end)
-        << handler.AccessKind;
+      Info.FFDiag(E, Sub.isOnePastTheEnd()
+                         ? diag::note_constexpr_access_past_end
+                         : diag::note_constexpr_access_unsized_array)
+          << handler.AccessKind;
     else
       Info.FFDiag(E);
     return handler.failed();
@@ -2636,6 +2686,8 @@
   APValue *O = Obj.Value;
   QualType ObjType = Obj.Type;
   const FieldDecl *LastField = nullptr;
+  const bool MayReadMutableMembers =
+      Obj.LifetimeStartedInEvaluation && Info.getLangOpts().CPlusPlus14;
 
   // Walk the designator's path to find the subobject.
   for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) {
@@ -2651,7 +2703,7 @@
       // cannot perform this read. (This only happens when performing a trivial
       // copy or assignment.)
       if (ObjType->isRecordType() && handler.AccessKind == AK_Read &&
-          diagnoseUnreadableFields(Info, E, ObjType))
+          !MayReadMutableMembers && diagnoseUnreadableFields(Info, E, ObjType))
         return handler.failed();
 
       if (!handler.found(*O, ObjType))
@@ -2731,7 +2783,11 @@
                                    : O->getComplexFloatReal(), ObjType);
       }
     } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) {
-      if (Field->isMutable() && handler.AccessKind == AK_Read) {
+      // In C++14 onwards, it is permitted to read a mutable member whose
+      // lifetime began within the evaluation.
+      // FIXME: Should we also allow this in C++11?
+      if (Field->isMutable() && handler.AccessKind == AK_Read &&
+          !MayReadMutableMembers) {
         Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1)
           << Field;
         Info.Note(Field->getLocation(), diag::note_declared_at);
@@ -2977,6 +3033,7 @@
   // Compute value storage location and type of base object.
   APValue *BaseVal = nullptr;
   QualType BaseType = getType(LVal.Base);
+  bool LifetimeStartedInEvaluation = Frame;
 
   if (const ValueDecl *D = LVal.Base.dyn_cast<const ValueDecl*>()) {
     // In C++98, const, non-volatile integers initialized with ICEs are ICEs.
@@ -3088,7 +3145,7 @@
         //   int &&r = 1;
         //   int x = ++r;
         //   constexpr int k = r;
-        // Therefore we use the C++1y rules in C++11 too.
+        // Therefore we use the C++14 rules in C++11 too.
         const ValueDecl *VD = Info.EvaluatingDecl.dyn_cast<const ValueDecl*>();
         const ValueDecl *ED = MTE->getExtendingDecl();
         if (!(BaseType.isConstQualified() &&
@@ -3101,6 +3158,7 @@
 
         BaseVal = Info.Ctx.getMaterializedTemporaryValue(MTE, false);
         assert(BaseVal && "got reference to unevaluated temporary");
+        LifetimeStartedInEvaluation = true;
       } else {
         Info.FFDiag(E);
         return CompleteObject();
@@ -3129,9 +3187,10 @@
   if (Info.isEvaluatingConstructor(LVal.getLValueBase(), LVal.CallIndex)) {
     BaseType = Info.Ctx.getCanonicalType(BaseType);
     BaseType.removeLocalConst();
+    LifetimeStartedInEvaluation = true;
   }
 
-  // In C++1y, we can't safely access any mutable state when we might be
+  // In C++14, we can't safely access any mutable state when we might be
   // evaluating after an unmodeled side effect.
   //
   // FIXME: Not all local state is mutable. Allow local constant subobjects
@@ -3141,7 +3200,7 @@
       (AK != AK_Read && Info.IsSpeculativelyEvaluating))
     return CompleteObject();
 
-  return CompleteObject(BaseVal, BaseType);
+  return CompleteObject(BaseVal, BaseType, LifetimeStartedInEvaluation);
 }
 
 /// \brief Perform an lvalue-to-rvalue conversion on the given glvalue. This
@@ -3175,14 +3234,14 @@
       APValue Lit;
       if (!Evaluate(Lit, Info, CLE->getInitializer()))
         return false;
-      CompleteObject LitObj(&Lit, Base->getType());
+      CompleteObject LitObj(&Lit, Base->getType(), false);
       return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal);
     } else if (isa<StringLiteral>(Base) || isa<PredefinedExpr>(Base)) {
       // We represent a string literal array as an lvalue pointing at the
       // corresponding expression, rather than building an array of chars.
       // FIXME: Support ObjCEncodeExpr, MakeStringConstant
       APValue Str(Base, CharUnits::Zero(), APValue::NoLValuePath(), 0);
-      CompleteObject StrObj(&Str, Base->getType());
+      CompleteObject StrObj(&Str, Base->getType(), false);
       return extractSubobject(Info, Conv, StrObj, LVal.Designator, RVal);
     }
   }
@@ -3203,17 +3262,12 @@
   }
 
   CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType);
-  return Obj && modifySubobject(Info, E, Obj, LVal.Designator, Val);
-}
-
-static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) {
-  return T->isSignedIntegerType() &&
-         Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy);
-}
-
-namespace {
-struct CompoundAssignSubobjectHandler {
-  EvalInfo &Info;
+  return Obj && modifySubobject(Info, E, Obj, LVal.Designator, Val);
+}
+
+namespace {
+struct CompoundAssignSubobjectHandler {
+  EvalInfo &Info;
   const Expr *E;
   QualType PromotedLHSType;
   BinaryOperatorKind Opcode;
@@ -3329,13 +3383,13 @@
   return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler);
 }
 
-namespace {
-struct IncDecSubobjectHandler {
-  EvalInfo &Info;
-  const Expr *E;
-  AccessKinds AccessKind;
-  APValue *Old;
-
+namespace {
+struct IncDecSubobjectHandler {
+  EvalInfo &Info;
+  const UnaryOperator *E;
+  AccessKinds AccessKind;
+  APValue *Old;
+
   typedef bool result_type;
 
   bool checkConst(QualType QT) {
@@ -3401,22 +3455,20 @@
     }
 
     bool WasNegative = Value.isNegative();
-    if (AccessKind == AK_Increment) {
-      ++Value;
-
-      if (!WasNegative && Value.isNegative() &&
-          isOverflowingIntegerType(Info.Ctx, SubobjType)) {
-        APSInt ActualValue(Value, /*IsUnsigned*/true);
-        return HandleOverflow(Info, E, ActualValue, SubobjType);
-      }
-    } else {
-      --Value;
-
-      if (WasNegative && !Value.isNegative() &&
-          isOverflowingIntegerType(Info.Ctx, SubobjType)) {
-        unsigned BitWidth = Value.getBitWidth();
-        APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false);
-        ActualValue.setBit(BitWidth);
+    if (AccessKind == AK_Increment) {
+      ++Value;
+
+      if (!WasNegative && Value.isNegative() && E->canOverflow()) {
+        APSInt ActualValue(Value, /*IsUnsigned*/true);
+        return HandleOverflow(Info, E, ActualValue, SubobjType);
+      }
+    } else {
+      --Value;
+
+      if (WasNegative && !Value.isNegative() && E->canOverflow()) {
+        unsigned BitWidth = Value.getBitWidth();
+        APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false);
+        ActualValue.setBit(BitWidth);
         return HandleOverflow(Info, E, ActualValue, SubobjType);
       }
     }
@@ -3471,13 +3523,13 @@
     Info.FFDiag(E);
     return false;
   }
-
-  AccessKinds AK = IsIncrement ? AK_Increment : AK_Decrement;
-  CompleteObject Obj = findCompleteObject(Info, E, AK, LVal, LValType);
-  IncDecSubobjectHandler Handler = { Info, E, AK, Old };
-  return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler);
-}
-
+
+  AccessKinds AK = IsIncrement ? AK_Increment : AK_Decrement;
+  CompleteObject Obj = findCompleteObject(Info, E, AK, LVal, LValType);
+  IncDecSubobjectHandler Handler = {Info, cast<UnaryOperator>(E), AK, Old};
+  return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler);
+}
+
 /// Build an lvalue for the object argument of a member function call.
 static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object,
                                    LValue &This) {
@@ -4335,6 +4387,7 @@
 #endif
   for (const auto *I : Definition->inits()) {
     LValue Subobject = This;
+    LValue SubobjectParent = This;
     APValue *Value = &Result;
 
     // Determine the subobject to initialize.
@@ -4365,7 +4418,8 @@
     } else if (IndirectFieldDecl *IFD = I->getIndirectMember()) {
       // Walk the indirect field decl's chain to find the object to initialize,
       // and make sure we've initialized every step along it.
-      for (auto *C : IFD->chain()) {
+      auto IndirectFieldChain = IFD->chain();
+      for (auto *C : IndirectFieldChain) {
         FD = cast<FieldDecl>(C);
         CXXRecordDecl *CD = cast<CXXRecordDecl>(FD->getParent());
         // Switch the union field if it differs. This happens if we had
@@ -4381,6 +4435,10 @@
             *Value = APValue(APValue::UninitStruct(), CD->getNumBases(),
                              std::distance(CD->field_begin(), CD->field_end()));
         }
+        // Store Subobject as its parent before updating it for the last element
+        // in the chain.
+        if (C == IndirectFieldChain.back())
+          SubobjectParent = Subobject;
         if (!HandleLValueMember(Info, I->getInit(), Subobject, FD))
           return false;
         if (CD->isUnion())
@@ -4392,10 +4450,16 @@
       llvm_unreachable("unknown base initializer kind");
     }
 
+    // Need to override This for implicit field initializers as in this case
+    // This refers to innermost anonymous struct/union containing initializer,
+    // not to currently constructed class.
+    const Expr *Init = I->getInit();
+    ThisOverrideRAII ThisOverride(*Info.CurrentCall, &SubobjectParent,
+                                  isa<CXXDefaultInitExpr>(Init));
     FullExpressionRAII InitScope(Info);
-    if (!EvaluateInPlace(*Value, Info, Subobject, I->getInit()) ||
-        (FD && FD->isBitField() && !truncateBitfieldValue(Info, I->getInit(),
-                                                          *Value, FD))) {
+    if (!EvaluateInPlace(*Value, Info, Subobject, Init) ||
+        (FD && FD->isBitField() &&
+         !truncateBitfieldValue(Info, Init, *Value, FD))) {
       // If we're checking for a potential constant expression, evaluate all
       // initializers even if some of them fail.
       if (!Info.noteFailure())
@@ -4787,7 +4851,7 @@
     assert(BaseTy->castAs<RecordType>()->getDecl()->getCanonicalDecl() ==
            FD->getParent()->getCanonicalDecl() && "record / field mismatch");
 
-    CompleteObject Obj(&Val, BaseTy);
+    CompleteObject Obj(&Val, BaseTy, true);
     SubobjectDesignator Designator(BaseTy);
     Designator.addDeclUnchecked(FD);
 
@@ -5403,9 +5467,8 @@
                                             llvm::APInt &Result) {
   const AllocSizeAttr *AllocSize = getAllocSizeAttr(Call);
 
-  // alloc_size args are 1-indexed, 0 means not present.
-  assert(AllocSize && AllocSize->getElemSizeParam() != 0);
-  unsigned SizeArgNo = AllocSize->getElemSizeParam() - 1;
+  assert(AllocSize && AllocSize->elemSizeParam().isValid());
+  unsigned SizeArgNo = AllocSize->elemSizeParam().getASTIndex();
   unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType());
   if (Call->getNumArgs() <= SizeArgNo)
     return false;
@@ -5423,14 +5486,13 @@
   if (!EvaluateAsSizeT(Call->getArg(SizeArgNo), SizeOfElem))
     return false;
 
-  if (!AllocSize->getNumElemsParam()) {
+  if (!AllocSize->numElemsParam().isValid()) {
     Result = std::move(SizeOfElem);
     return true;
   }
 
   APSInt NumberOfElems;
-  // Argument numbers start at 1
-  unsigned NumArgNo = AllocSize->getNumElemsParam() - 1;
+  unsigned NumArgNo = AllocSize->numElemsParam().getASTIndex();
   if (!EvaluateAsSizeT(Call->getArg(NumArgNo), NumberOfElems))
     return false;
 
@@ -5487,7 +5549,7 @@
   Result.setInvalid(E);
 
   QualType Pointee = E->getType()->castAs<PointerType>()->getPointeeType();
-  Result.addUnsizedArray(Info, Pointee);
+  Result.addUnsizedArray(Info, E, Pointee);
   return true;
 }
 
@@ -5697,7 +5759,8 @@
       return true;
     }
   }
-  case CK_ArrayToPointerDecay:
+
+  case CK_ArrayToPointerDecay: {
     if (SubExpr->isGLValue()) {
       if (!evaluateLValue(SubExpr, Result))
         return false;
@@ -5708,12 +5771,13 @@
         return false;
     }
     // The result is a pointer to the first element of the array.
-    if (const ConstantArrayType *CAT
-          = Info.Ctx.getAsConstantArrayType(SubExpr->getType()))
+    auto *AT = Info.Ctx.getAsArrayType(SubExpr->getType());
+    if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
       Result.addArray(Info, E, CAT);
     else
-      Result.Designator.setInvalid();
+      Result.addUnsizedArray(Info, E, AT->getElementType());
     return true;
+  }
 
   case CK_FunctionToPointerDecay:
     return evaluateLValue(SubExpr, Result);
@@ -5780,7 +5844,7 @@
 
   Result.setInvalid(E);
   QualType PointeeTy = E->getType()->castAs<PointerType>()->getPointeeType();
-  Result.addUnsizedArray(Info, PointeeTy);
+  Result.addUnsizedArray(Info, E, PointeeTy);
   return true;
 }
 
@@ -5870,7 +5934,7 @@
         << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'");
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case Builtin::BI__builtin_strchr:
   case Builtin::BI__builtin_wcschr:
   case Builtin::BI__builtin_memchr:
@@ -5909,7 +5973,7 @@
                                Desired))
         return ZeroInitialization(E);
       StopAtNull = true;
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case Builtin::BImemchr:
     case Builtin::BI__builtin_memchr:
     case Builtin::BI__builtin_char_memchr:
@@ -5922,7 +5986,7 @@
     case Builtin::BIwcschr:
     case Builtin::BI__builtin_wcschr:
       StopAtNull = true;
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case Builtin::BIwmemchr:
     case Builtin::BI__builtin_wmemchr:
       // wcschr and wmemchr are given a wchar_t to look for. Just use it.
@@ -6744,6 +6808,22 @@
   return ArrayExprEvaluator(Info, This, Result).Visit(E);
 }
 
+// Return true iff the given array filler may depend on the element index.
+static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) {
+  // For now, just whitelist non-class value-initialization and initialization
+  // lists comprised of them.
+  if (isa<ImplicitValueInitExpr>(FillerExpr))
+    return false;
+  if (const InitListExpr *ILE = dyn_cast<InitListExpr>(FillerExpr)) {
+    for (unsigned I = 0, E = ILE->getNumInits(); I != E; ++I) {
+      if (MaybeElementDependentArrayFiller(ILE->getInit(I)))
+        return true;
+    }
+    return false;
+  }
+  return true;
+}
+
 bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
   const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType());
   if (!CAT)
@@ -6773,10 +6853,13 @@
   const Expr *FillerExpr = E->hasArrayFiller() ? E->getArrayFiller() : nullptr;
 
   // If the initializer might depend on the array index, run it for each
-  // array element. For now, just whitelist non-class value-initialization.
-  if (NumEltsToInit != NumElts && !isa<ImplicitValueInitExpr>(FillerExpr))
+  // array element.
+  if (NumEltsToInit != NumElts && MaybeElementDependentArrayFiller(FillerExpr))
     NumEltsToInit = NumElts;
 
+  DEBUG(llvm::dbgs() << "The number of elements to initialize: " <<
+        NumEltsToInit << ".\n");
+
   Result = APValue(APValue::UninitArray(), NumEltsToInit, NumElts);
 
   // If the array was previously zero-initialized, preserve the
@@ -7166,6 +7249,7 @@
     case BuiltinType::Dependent:
       llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
     };
+    break;
 
   case Type::Enum:
     return LangOpts.CPlusPlus ? enumeral_type_class : integer_type_class;
@@ -7341,7 +7425,8 @@
 /// Please note: this function is specialized for how __builtin_object_size
 /// views "objects".
 ///
-/// If this encounters an invalid RecordDecl, it will always return true.
+/// If this encounters an invalid RecordDecl or otherwise cannot determine the
+/// correct result, it will always return true.
 static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) {
   assert(!LVal.Designator.Invalid);
 
@@ -7372,11 +7457,13 @@
   unsigned I = 0;
   QualType BaseType = getType(Base);
   if (LVal.Designator.FirstEntryIsAnUnsizedArray) {
-    assert(isBaseAnAllocSizeCall(Base) &&
-           "Unsized array in non-alloc_size call?");
-    // If this is an alloc_size base, we should ignore the initial array index
+    // If we don't know the array bound, conservatively assume we're looking at
+    // the final array element.
     ++I;
-    BaseType = BaseType->castAs<PointerType>()->getPointeeType();
+    if (BaseType->isIncompleteArrayType())
+      BaseType = Ctx.getAsArrayType(BaseType)->getElementType();
+    else
+      BaseType = BaseType->castAs<PointerType>()->getPointeeType();
   }
 
   for (unsigned E = LVal.Designator.Entries.size(); I != E; ++I) {
@@ -7778,7 +7865,7 @@
         << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'");
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case Builtin::BI__builtin_strlen:
   case Builtin::BI__builtin_wcslen: {
     // As an extension, we support __builtin_strlen() as a constant expression,
@@ -7838,7 +7925,7 @@
         << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'");
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case Builtin::BI__builtin_strcmp:
   case Builtin::BI__builtin_wcscmp:
   case Builtin::BI__builtin_strncmp:
@@ -8828,13 +8915,13 @@
     return Visit(E->getSubExpr());
   case UO_Minus: {
     if (!Visit(E->getSubExpr()))
-      return false;
-    if (!Result.isInt()) return Error(E);
-    const APSInt &Value = Result.getInt();
-    if (Value.isSigned() && Value.isMinSignedValue() &&
-        !HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1),
-                        E->getType()))
-      return false;
+      return false;
+    if (!Result.isInt()) return Error(E);
+    const APSInt &Value = Result.getInt();
+    if (Value.isSigned() && Value.isMinSignedValue() && E->canOverflow() &&
+        !HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1),
+                        E->getType()))
+      return false;
     return Success(-Value, E);
   }
   case UO_Not: {
@@ -9123,9 +9210,11 @@
   case Builtin::BI__builtin_huge_val:
   case Builtin::BI__builtin_huge_valf:
   case Builtin::BI__builtin_huge_vall:
+  case Builtin::BI__builtin_huge_valf128:
   case Builtin::BI__builtin_inf:
   case Builtin::BI__builtin_inff:
-  case Builtin::BI__builtin_infl: {
+  case Builtin::BI__builtin_infl:
+  case Builtin::BI__builtin_inff128: {
     const llvm::fltSemantics &Sem =
       Info.Ctx.getFloatTypeSemantics(E->getType());
     Result = llvm::APFloat::getInf(Sem);
@@ -9135,6 +9224,7 @@
   case Builtin::BI__builtin_nans:
   case Builtin::BI__builtin_nansf:
   case Builtin::BI__builtin_nansl:
+  case Builtin::BI__builtin_nansf128:
     if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0),
                                true, Result))
       return Error(E);
@@ -9143,6 +9233,7 @@
   case Builtin::BI__builtin_nan:
   case Builtin::BI__builtin_nanf:
   case Builtin::BI__builtin_nanl:
+  case Builtin::BI__builtin_nanf128:
     // If this is __builtin_nan() turn this into a nan, otherwise we
     // can't constant fold it.
     if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0),
@@ -9153,6 +9244,7 @@
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl:
+  case Builtin::BI__builtin_fabsf128:
     if (!EvaluateFloat(E->getArg(0), Result, Info))
       return false;
 
@@ -9166,7 +9258,8 @@
 
   case Builtin::BI__builtin_copysign:
   case Builtin::BI__builtin_copysignf:
-  case Builtin::BI__builtin_copysignl: {
+  case Builtin::BI__builtin_copysignl:
+  case Builtin::BI__builtin_copysignf128: {
     APFloat RHS(0.);
     if (!EvaluateFloat(E->getArg(0), Result, Info) ||
         !EvaluateFloat(E->getArg(1), RHS, Info))
@@ -10342,7 +10435,7 @@
   case Expr::DeclRefExprClass: {
     if (isa<EnumConstantDecl>(cast<DeclRefExpr>(E)->getDecl()))
       return NoDiag();
-    const ValueDecl *D = dyn_cast<ValueDecl>(cast<DeclRefExpr>(E)->getDecl());
+    const ValueDecl *D = cast<DeclRefExpr>(E)->getDecl();
     if (Ctx.getLangOpts().CPlusPlus &&
         D && IsConstNonVolatile(D->getType())) {
       // Parameter variables are never constants.  Without this check,
@@ -10428,6 +10521,7 @@
     case BO_AndAssign:
     case BO_XorAssign:
     case BO_OrAssign:
+    case BO_Cmp: // FIXME: Re-enable once we can evaluate this.
       // C99 6.6/3 allows assignments within unevaluated subexpressions of
       // constant expressions, but they can never be ICEs because an ICE cannot
       // contain an lvalue operand.
diff --git a/lib/AST/ExprObjC.cpp b/lib/AST/ExprObjC.cpp
index 31c1b3f..e1a23f5 100644
--- a/lib/AST/ExprObjC.cpp
+++ b/lib/AST/ExprObjC.cpp
@@ -1,4 +1,4 @@
-//===--- ExprObjC.cpp - (ObjC) Expression AST Node Implementation ---------===//
+//===- ExprObjC.cpp - (ObjC) Expression AST Node Implementation -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ExprObjC.h"
-
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/SelectorLocationsKind.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 
 using namespace clang;
 
@@ -45,7 +52,6 @@
 
 ObjCArrayLiteral *ObjCArrayLiteral::CreateEmpty(const ASTContext &C,
                                                 unsigned NumElements) {
-
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(NumElements));
   return new (Mem) ObjCArrayLiteral(EmptyShell(), NumElements);
 }
diff --git a/lib/AST/ExternalASTMerger.cpp b/lib/AST/ExternalASTMerger.cpp
index 6b75c51..edf1310 100644
--- a/lib/AST/ExternalASTMerger.cpp
+++ b/lib/AST/ExternalASTMerger.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/ExternalASTMerger.h"
 
 using namespace clang;
@@ -351,6 +352,27 @@
   }
 }
 
+template <typename DeclTy>
+static bool importSpecializations(DeclTy *D, ASTImporter *Importer) {
+  for (auto *Spec : D->specializations())
+    if (!Importer->Import(Spec))
+      return true;
+  return false;
+}
+
+/// Imports specializations from template declarations that can be specialized.
+static bool importSpecializationsIfNeeded(Decl *D, ASTImporter *Importer) {
+  if (!isa<TemplateDecl>(D))
+    return false;
+  if (auto *FunctionTD = dyn_cast<FunctionTemplateDecl>(D))
+    return importSpecializations(FunctionTD, Importer);
+  else if (auto *ClassTD = dyn_cast<ClassTemplateDecl>(D))
+    return importSpecializations(ClassTD, Importer);
+  else if (auto *VarTD = dyn_cast<VarTemplateDecl>(D))
+    return importSpecializations(VarTD, Importer);
+  return false;
+}
+
 bool ExternalASTMerger::FindExternalVisibleDeclsByName(const DeclContext *DC,
                                                        DeclarationName Name) {
   llvm::SmallVector<NamedDecl *, 1> Decls;
@@ -376,9 +398,18 @@
 
   Decls.reserve(Candidates.size());
   for (const Candidate &C : Candidates) {
-    NamedDecl *d = cast<NamedDecl>(C.second->Import(C.first.get()));
-    assert(d);
-    Decls.push_back(d);
+    Decl *LookupRes = C.first.get();
+    ASTImporter *Importer = C.second;
+    NamedDecl *ND = cast_or_null<NamedDecl>(Importer->Import(LookupRes));
+    assert(ND);
+    // If we don't import specialization, they are not available via lookup
+    // because the lookup result is imported TemplateDecl and it does not
+    // reference its specializations until they are imported explicitly.
+    bool IsSpecImportFailed =
+        importSpecializationsIfNeeded(LookupRes, Importer);
+    assert(!IsSpecImportFailed);
+    (void)IsSpecImportFailed;
+    Decls.push_back(ND);
   }
   SetExternalVisibleDeclsForName(DC, Name, Decls);
   return true;
diff --git a/lib/AST/ExternalASTSource.cpp b/lib/AST/ExternalASTSource.cpp
index 182d382..198ba9d 100644
--- a/lib/AST/ExternalASTSource.cpp
+++ b/lib/AST/ExternalASTSource.cpp
@@ -1,4 +1,4 @@
-//===- ExternalASTSource.cpp - Abstract External AST Interface --*- C++ -*-===//
+//===- ExternalASTSource.cpp - Abstract External AST Interface ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,12 +16,16 @@
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/Module.h"
+#include "llvm/ADT/None.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
 
 using namespace clang;
 
-ExternalASTSource::~ExternalASTSource() { }
+ExternalASTSource::~ExternalASTSource() = default;
 
 llvm::Optional<ExternalASTSource::ASTSourceDescriptor>
 ExternalASTSource::getSourceDescriptor(unsigned ID) {
@@ -66,7 +70,7 @@
 
 void ExternalASTSource::StartTranslationUnit(ASTConsumer *Consumer) {}
 
-void ExternalASTSource::PrintStats() { }
+void ExternalASTSource::PrintStats() {}
 
 bool ExternalASTSource::layoutRecordType(
     const RecordDecl *Record, uint64_t &Size, uint64_t &Alignment,
diff --git a/lib/AST/ItaniumCXXABI.cpp b/lib/AST/ItaniumCXXABI.cpp
index 692a455..d6bc16b 100644
--- a/lib/AST/ItaniumCXXABI.cpp
+++ b/lib/AST/ItaniumCXXABI.cpp
@@ -101,15 +101,17 @@
 public:
   ItaniumCXXABI(ASTContext &Ctx) : Context(Ctx) { }
 
-  std::pair<uint64_t, unsigned>
-  getMemberPointerWidthAndAlign(const MemberPointerType *MPT) const override {
+  MemberPointerInfo
+  getMemberPointerInfo(const MemberPointerType *MPT) const override {
     const TargetInfo &Target = Context.getTargetInfo();
     TargetInfo::IntType PtrDiff = Target.getPtrDiffType(0);
-    uint64_t Width = Target.getTypeWidth(PtrDiff);
-    unsigned Align = Target.getTypeAlign(PtrDiff);
+    MemberPointerInfo MPI;
+    MPI.Width = Target.getTypeWidth(PtrDiff);
+    MPI.Align = Target.getTypeAlign(PtrDiff);
+    MPI.HasPadding = false;
     if (MPT->isMemberFunctionPointer())
-      Width = 2 * Width;
-    return std::make_pair(Width, Align);
+      MPI.Width *= 2;
+    return MPI;
   }
 
   CallingConv getDefaultMethodCallConv(bool isVariadic) const override {
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index 58e2bc8..d6d3792 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -1324,8 +1324,7 @@
 
     if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
       // We must have an anonymous union or struct declaration.
-      const RecordDecl *RD =
-        cast<RecordDecl>(VD->getType()->getAs<RecordType>()->getDecl());
+      const RecordDecl *RD = VD->getType()->getAs<RecordType>()->getDecl();
 
       // Itanium C++ ABI 5.1.2:
       //
@@ -1468,7 +1467,7 @@
         if (!MD->isStatic())
           Arity++;
     }
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case DeclarationName::CXXConversionFunctionName:
   case DeclarationName::CXXLiteralOperatorName:
     mangleOperatorName(Name, Arity);
@@ -2195,6 +2194,9 @@
   // Proposal on cxx-abi-dev, 2015-10-21.
   //              ::= aw        # co_await
   case OO_Coawait: Out << "aw"; break;
+  // Proposed in cxx-abi github issue 43.
+  //              ::= ss        # <=>
+  case OO_Spaceship: Out << "ss"; break;
 
   case OO_None:
   case NUM_OVERLOADED_OPERATORS:
diff --git a/lib/AST/MicrosoftCXXABI.cpp b/lib/AST/MicrosoftCXXABI.cpp
index 73324e4..6d73716 100644
--- a/lib/AST/MicrosoftCXXABI.cpp
+++ b/lib/AST/MicrosoftCXXABI.cpp
@@ -76,8 +76,8 @@
 public:
   MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { }
 
-  std::pair<uint64_t, unsigned>
-  getMemberPointerWidthAndAlign(const MemberPointerType *MPT) const override;
+  MemberPointerInfo
+  getMemberPointerInfo(const MemberPointerType *MPT) const override;
 
   CallingConv getDefaultMethodCallConv(bool isVariadic) const override {
     if (!isVariadic &&
@@ -106,7 +106,7 @@
   void addTypedefNameForUnnamedTagDecl(TagDecl *TD,
                                        TypedefNameDecl *DD) override {
     TD = TD->getCanonicalDecl();
-    DD = cast<TypedefNameDecl>(DD->getCanonicalDecl());
+    DD = DD->getCanonicalDecl();
     TypedefNameDecl *&I = UnnamedTagDeclToTypedefNameDecl[TD];
     if (!I)
       I = DD;
@@ -227,7 +227,7 @@
   return std::make_pair(Ptrs, Ints);
 }
 
-std::pair<uint64_t, unsigned> MicrosoftCXXABI::getMemberPointerWidthAndAlign(
+CXXABI::MemberPointerInfo MicrosoftCXXABI::getMemberPointerInfo(
     const MemberPointerType *MPT) const {
   // The nominal struct is laid out with pointers followed by ints and aligned
   // to a pointer width if any are present and an int width otherwise.
@@ -237,22 +237,25 @@
 
   unsigned Ptrs, Ints;
   std::tie(Ptrs, Ints) = getMSMemberPointerSlots(MPT);
-  uint64_t Width = Ptrs * PtrSize + Ints * IntSize;
-  unsigned Align;
+  MemberPointerInfo MPI;
+  MPI.HasPadding = false;
+  MPI.Width = Ptrs * PtrSize + Ints * IntSize;
 
   // When MSVC does x86_32 record layout, it aligns aggregate member pointers to
   // 8 bytes.  However, __alignof usually returns 4 for data memptrs and 8 for
   // function memptrs.
   if (Ptrs + Ints > 1 && Target.getTriple().isArch32Bit())
-    Align = 64;
+    MPI.Align = 64;
   else if (Ptrs)
-    Align = Target.getPointerAlign(0);
+    MPI.Align = Target.getPointerAlign(0);
   else
-    Align = Target.getIntAlign();
+    MPI.Align = Target.getIntAlign();
 
-  if (Target.getTriple().isArch64Bit())
-    Width = llvm::alignTo(Width, Align);
-  return std::make_pair(Width, Align);
+  if (Target.getTriple().isArch64Bit()) {
+    MPI.Width = llvm::alignTo(MPI.Width, MPI.Align);
+    MPI.HasPadding = MPI.Width != (Ptrs * PtrSize + Ints * IntSize);
+  }
+  return MPI;
 }
 
 CXXABI *clang::CreateMicrosoftCXXABI(ASTContext &Ctx) {
diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp
index c804854..068254a 100644
--- a/lib/AST/MicrosoftMangle.cpp
+++ b/lib/AST/MicrosoftMangle.cpp
@@ -362,6 +362,10 @@
                           const TemplateArgumentList &TemplateArgs);
   void mangleTemplateArg(const TemplateDecl *TD, const TemplateArgument &TA,
                          const NamedDecl *Parm);
+
+  void mangleObjCProtocol(const ObjCProtocolDecl *PD);
+  void mangleObjCLifetime(const QualType T, Qualifiers Quals,
+                          SourceRange Range);
 };
 }
 
@@ -950,11 +954,10 @@
   }
 }
 
+// <postfix> ::= <unqualified-name> [<postfix>]
+//           ::= <substitution> [<postfix>]
 void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) {
-  // <postfix> ::= <unqualified-name> [<postfix>]
-  //           ::= <substitution> [<postfix>]
   const DeclContext *DC = getEffectiveDeclContext(ND);
-
   while (!DC->isTranslationUnit()) {
     if (isa<TagDecl>(ND) || isa<VarDecl>(ND)) {
       unsigned Disc;
@@ -1192,6 +1195,15 @@
   // <operator-name> ::= ?__L # co_await
   case OO_Coawait: Out << "?__L"; break;
 
+  case OO_Spaceship: {
+    // FIXME: Once MS picks a mangling, use it.
+    DiagnosticsEngine &Diags = Context.getDiags();
+    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+      "cannot mangle this three-way comparison operator yet");
+    Diags.Report(Loc, DiagID);
+    break;
+  }
+
   case OO_Conditional: {
     DiagnosticsEngine &Diags = Context.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -1356,7 +1368,7 @@
     break;
   }
   case TemplateArgument::Declaration: {
-    const NamedDecl *ND = cast<NamedDecl>(TA.getAsDecl());
+    const NamedDecl *ND = TA.getAsDecl();
     if (isa<FieldDecl>(ND) || isa<IndirectFieldDecl>(ND)) {
       mangleMemberDataPointer(
           cast<CXXRecordDecl>(ND->getDeclContext())->getMostRecentDecl(),
@@ -1448,6 +1460,47 @@
   }
 }
 
+void MicrosoftCXXNameMangler::mangleObjCProtocol(const ObjCProtocolDecl *PD) {
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+
+  Stream << "?$";
+  Extra.mangleSourceName("Protocol");
+  Extra.mangleArtificalTagType(TTK_Struct, PD->getName());
+
+  mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__ObjC"});
+}
+
+void MicrosoftCXXNameMangler::mangleObjCLifetime(const QualType Type,
+                                                 Qualifiers Quals,
+                                                 SourceRange Range) {
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+
+  Stream << "?$";
+  switch (Quals.getObjCLifetime()) {
+  case Qualifiers::OCL_None:
+  case Qualifiers::OCL_ExplicitNone:
+    break;
+  case Qualifiers::OCL_Autoreleasing:
+    Extra.mangleSourceName("Autoreleasing");
+    break;
+  case Qualifiers::OCL_Strong:
+    Extra.mangleSourceName("Strong");
+    break;
+  case Qualifiers::OCL_Weak:
+    Extra.mangleSourceName("Weak");
+    break;
+  }
+  Extra.manglePointerCVQualifiers(Quals);
+  Extra.manglePointerExtQualifiers(Quals, Type);
+  Extra.mangleType(Type, Range);
+
+  mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__ObjC"});
+}
+
 void MicrosoftCXXNameMangler::mangleQualifiers(Qualifiers Quals,
                                                bool IsMember) {
   // <cvr-qualifiers> ::= [E] [F] [I] <base-cvr-qualifiers>
@@ -1550,12 +1603,11 @@
 
 void MicrosoftCXXNameMangler::manglePointerExtQualifiers(Qualifiers Quals,
                                                          QualType PointeeType) {
-  bool HasRestrict = Quals.hasRestrict();
   if (PointersAre64Bit &&
       (PointeeType.isNull() || !PointeeType->isFunctionType()))
     Out << 'E';
 
-  if (HasRestrict)
+  if (Quals.hasRestrict())
     Out << 'I';
 
   if (Quals.hasUnaligned() ||
@@ -1676,6 +1728,8 @@
 
   switch (QMM) {
   case QMM_Drop:
+    if (Quals.hasObjCLifetime())
+      Quals = Quals.withoutObjCLifetime();
     break;
   case QMM_Mangle:
     if (const FunctionType *FT = dyn_cast<FunctionType>(T)) {
@@ -1694,6 +1748,8 @@
   case QMM_Result:
     // Presence of __unaligned qualifier shouldn't affect mangling here.
     Quals.removeUnaligned();
+    if (Quals.hasObjCLifetime())
+      Quals = Quals.withoutObjCLifetime();
     if ((!IsPointer && Quals) || isa<TagType>(T)) {
       Out << '?';
       mangleQualifiers(Quals, false);
@@ -1824,15 +1880,12 @@
     llvm_unreachable("placeholder types shouldn't get to name mangling");
 
   case BuiltinType::ObjCId:
-    Out << "PA";
     mangleArtificalTagType(TTK_Struct, "objc_object");
     break;
   case BuiltinType::ObjCClass:
-    Out << "PA";
     mangleArtificalTagType(TTK_Struct, "objc_class");
     break;
   case BuiltinType::ObjCSel:
-    Out << "PA";
     mangleArtificalTagType(TTK_Struct, "objc_selector");
     break;
 
@@ -2131,6 +2184,7 @@
     case CC_X86StdCall: Out << 'G'; break;
     case CC_X86FastCall: Out << 'I'; break;
     case CC_X86VectorCall: Out << 'Q'; break;
+    case CC_Swift: Out << 'S'; break;
     case CC_X86RegCall: Out << 'w'; break;
   }
 }
@@ -2328,10 +2382,16 @@
 
 void MicrosoftCXXNameMangler::mangleType(const ObjCObjectPointerType *T,
                                          Qualifiers Quals, SourceRange Range) {
-  if (T->isObjCIdType() || T->isObjCClassType())
-    return mangleType(T->getPointeeType(), Range, QMM_Drop);
-
   QualType PointeeType = T->getPointeeType();
+  switch (Quals.getObjCLifetime()) {
+  case Qualifiers::OCL_None:
+  case Qualifiers::OCL_ExplicitNone:
+    break;
+  case Qualifiers::OCL_Autoreleasing:
+  case Qualifiers::OCL_Strong:
+  case Qualifiers::OCL_Weak:
+    return mangleObjCLifetime(PointeeType, Quals, Range);
+  }
   manglePointerCVQualifiers(Quals);
   manglePointerExtQualifiers(Quals, PointeeType);
   mangleType(PointeeType, Range);
@@ -2448,9 +2508,33 @@
 
 void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T, Qualifiers,
                                          SourceRange Range) {
-  // We don't allow overloading by different protocol qualification,
-  // so mangling them isn't necessary.
-  mangleType(T->getBaseType(), Range, QMM_Drop);
+  if (T->qual_empty())
+    return mangleType(T->getBaseType(), Range, QMM_Drop);
+
+  ArgBackRefMap OuterArgsContext;
+  BackRefVec OuterTemplateContext;
+
+  TypeBackReferences.swap(OuterArgsContext);
+  NameBackReferences.swap(OuterTemplateContext);
+
+  mangleTagTypeKind(TTK_Struct);
+
+  Out << "?$";
+  if (T->isObjCId())
+    mangleSourceName("objc_object");
+  else if (T->isObjCClass())
+    mangleSourceName("objc_class");
+  else
+    mangleSourceName(T->getInterface()->getName());
+
+  for (const auto &Q : T->quals())
+    mangleObjCProtocol(Q);
+  Out << '@';
+
+  Out << '@';
+
+  TypeBackReferences.swap(OuterArgsContext);
+  NameBackReferences.swap(OuterTemplateContext);
 }
 
 void MicrosoftCXXNameMangler::mangleType(const BlockPointerType *T,
diff --git a/lib/AST/NestedNameSpecifier.cpp b/lib/AST/NestedNameSpecifier.cpp
index e2e0dbe..f46552e 100644
--- a/lib/AST/NestedNameSpecifier.cpp
+++ b/lib/AST/NestedNameSpecifier.cpp
@@ -1,4 +1,4 @@
-//===--- NestedNameSpecifier.cpp - C++ nested name specifiers -----*- C++ -*-=//
+//===- NestedNameSpecifier.cpp - C++ nested name specifiers ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,16 +11,28 @@
 //  a C++ nested-name-specifier.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
-#include "llvm/Support/AlignOf.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
 #include <cassert>
+#include <cstdlib>
+#include <cstring>
 
 using namespace clang;
 
@@ -314,8 +326,8 @@
       SpecType->getTemplateName().print(OS, InnerPolicy, true);
 
       // Print the template argument list.
-      TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, SpecType->template_arguments(), InnerPolicy);
+      printTemplateArgumentList(OS, SpecType->template_arguments(),
+                                InnerPolicy);
     } else {
       // Print the type normally
       QualType(T, 0).print(OS, InnerPolicy);
@@ -375,22 +387,20 @@
   return Length;
 }
 
-namespace {
-  /// \brief Load a (possibly unaligned) source location from a given address
-  /// and offset.
-  SourceLocation LoadSourceLocation(void *Data, unsigned Offset) {
-    unsigned Raw;
-    memcpy(&Raw, static_cast<char *>(Data) + Offset, sizeof(unsigned));
-    return SourceLocation::getFromRawEncoding(Raw);
-  }
+/// \brief Load a (possibly unaligned) source location from a given address
+/// and offset.
+static SourceLocation LoadSourceLocation(void *Data, unsigned Offset) {
+  unsigned Raw;
+  memcpy(&Raw, static_cast<char *>(Data) + Offset, sizeof(unsigned));
+  return SourceLocation::getFromRawEncoding(Raw);
+}
   
-  /// \brief Load a (possibly unaligned) pointer from a given address and
-  /// offset.
-  void *LoadPointer(void *Data, unsigned Offset) {
-    void *Result;
-    memcpy(&Result, static_cast<char *>(Data) + Offset, sizeof(void*));
-    return Result;
-  }
+/// \brief Load a (possibly unaligned) pointer from a given address and
+/// offset.
+static void *LoadPointer(void *Data, unsigned Offset) {
+  void *Result;
+  memcpy(&Result, static_cast<char *>(Data) + Offset, sizeof(void*));
+  return Result;
 }
 
 SourceRange NestedNameSpecifierLoc::getSourceRange() const {
@@ -446,53 +456,49 @@
   return TypeLoc(Qualifier->getAsType(), TypeData);
 }
 
-namespace {
-  void Append(char *Start, char *End, char *&Buffer, unsigned &BufferSize,
+static void Append(char *Start, char *End, char *&Buffer, unsigned &BufferSize,
               unsigned &BufferCapacity) {
-    if (Start == End)
-      return;
+  if (Start == End)
+    return;
 
-    if (BufferSize + (End - Start) > BufferCapacity) {
-      // Reallocate the buffer.
-      unsigned NewCapacity = std::max(
-          (unsigned)(BufferCapacity ? BufferCapacity * 2 : sizeof(void *) * 2),
-          (unsigned)(BufferSize + (End - Start)));
-      char *NewBuffer = static_cast<char *>(malloc(NewCapacity));
-      if (BufferCapacity) {
-        memcpy(NewBuffer, Buffer, BufferSize);
-        free(Buffer);
-      }
-      Buffer = NewBuffer;
-      BufferCapacity = NewCapacity;
+  if (BufferSize + (End - Start) > BufferCapacity) {
+    // Reallocate the buffer.
+    unsigned NewCapacity = std::max(
+        (unsigned)(BufferCapacity ? BufferCapacity * 2 : sizeof(void *) * 2),
+        (unsigned)(BufferSize + (End - Start)));
+    char *NewBuffer = static_cast<char *>(llvm::safe_malloc(NewCapacity));
+    if (BufferCapacity) {
+      memcpy(NewBuffer, Buffer, BufferSize);
+      free(Buffer);
     }
-    
-    memcpy(Buffer + BufferSize, Start, End - Start);
-    BufferSize += End-Start;
+    Buffer = NewBuffer;
+    BufferCapacity = NewCapacity;
   }
+
+  memcpy(Buffer + BufferSize, Start, End - Start);
+  BufferSize += End-Start;
+}
   
-  /// \brief Save a source location to the given buffer.
-  void SaveSourceLocation(SourceLocation Loc, char *&Buffer,
-                          unsigned &BufferSize, unsigned &BufferCapacity) {
-    unsigned Raw = Loc.getRawEncoding();
-    Append(reinterpret_cast<char *>(&Raw),
-           reinterpret_cast<char *>(&Raw) + sizeof(unsigned),
-           Buffer, BufferSize, BufferCapacity);
-  }
+/// \brief Save a source location to the given buffer.
+static void SaveSourceLocation(SourceLocation Loc, char *&Buffer,
+                               unsigned &BufferSize, unsigned &BufferCapacity) {
+  unsigned Raw = Loc.getRawEncoding();
+  Append(reinterpret_cast<char *>(&Raw),
+         reinterpret_cast<char *>(&Raw) + sizeof(unsigned),
+         Buffer, BufferSize, BufferCapacity);
+}
   
-  /// \brief Save a pointer to the given buffer.
-  void SavePointer(void *Ptr, char *&Buffer, unsigned &BufferSize,
-                   unsigned &BufferCapacity) {
-    Append(reinterpret_cast<char *>(&Ptr),
-           reinterpret_cast<char *>(&Ptr) + sizeof(void *),
-           Buffer, BufferSize, BufferCapacity);
-  }
+/// \brief Save a pointer to the given buffer.
+static void SavePointer(void *Ptr, char *&Buffer, unsigned &BufferSize,
+                        unsigned &BufferCapacity) {
+  Append(reinterpret_cast<char *>(&Ptr),
+         reinterpret_cast<char *>(&Ptr) + sizeof(void *),
+         Buffer, BufferSize, BufferCapacity);
 }
 
 NestedNameSpecifierLocBuilder::
 NestedNameSpecifierLocBuilder(const NestedNameSpecifierLocBuilder &Other) 
-  : Representation(Other.Representation), Buffer(nullptr),
-    BufferSize(0), BufferCapacity(0)
-{
+    : Representation(Other.Representation) {
   if (!Other.Buffer)
     return;
   
diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp
index 17c95f2..528e32a 100644
--- a/lib/AST/ODRHash.cpp
+++ b/lib/AST/ODRHash.cpp
@@ -33,6 +33,15 @@
 }
 
 void ODRHash::AddDeclarationName(DeclarationName Name) {
+  // Index all DeclarationName and use index numbers to refer to them.
+  auto Result = DeclNameMap.insert(std::make_pair(Name, DeclNameMap.size()));
+  ID.AddInteger(Result.first->second);
+  if (!Result.second) {
+    // If found in map, the the DeclarationName has previously been processed.
+    return;
+  }
+
+  // First time processing each DeclarationName, also process its details.
   AddBoolean(Name.isEmpty());
   if (Name.isEmpty())
     return;
@@ -168,7 +177,7 @@
 }
 
 void ODRHash::clear() {
-  DeclMap.clear();
+  DeclNameMap.clear();
   TypeMap.clear();
   Bools.clear();
   ID.clear();
@@ -418,7 +427,6 @@
 
 void ODRHash::AddSubDecl(const Decl *D) {
   assert(D && "Expecting non-null pointer.");
-  AddDecl(D);
 
   ODRDeclVisitor(ID, *this).Visit(D);
 }
@@ -466,21 +474,47 @@
   }
 }
 
+void ODRHash::AddFunctionDecl(const FunctionDecl *Function) {
+  assert(Function && "Expecting non-null pointer.");
+
+  // Skip hashing these kinds of function.
+  if (Function->isImplicit()) return;
+  if (Function->isDefaulted()) return;
+  if (Function->isDeleted()) return;
+  if (!Function->hasBody()) return;
+  if (!Function->getBody()) return;
+
+  // Skip functions that are specializations or in specialization context.
+  const DeclContext *DC = Function;
+  while (DC) {
+    if (isa<ClassTemplateSpecializationDecl>(DC)) return;
+    if (auto *F = dyn_cast<FunctionDecl>(DC))
+      if (F->isFunctionTemplateSpecialization()) return;
+    DC = DC->getParent();
+  }
+
+  AddDecl(Function);
+
+  AddQualType(Function->getReturnType());
+
+  ID.AddInteger(Function->param_size());
+  for (auto Param : Function->parameters())
+    AddSubDecl(Param);
+
+  AddStmt(Function->getBody());
+}
+
 void ODRHash::AddDecl(const Decl *D) {
   assert(D && "Expecting non-null pointer.");
-  auto Result = DeclMap.insert(std::make_pair(D, DeclMap.size()));
-  ID.AddInteger(Result.first->second);
-  // On first encounter of a Decl pointer, process it.  Every time afterwards,
-  // only the index value is needed.
-  if (!Result.second) {
+  D = D->getCanonicalDecl();
+
+  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) {
+    AddDeclarationName(ND->getDeclName());
     return;
   }
 
   ID.AddInteger(D->getKind());
-
-  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) {
-    AddDeclarationName(ND->getDeclName());
-  }
+  // TODO: Handle non-NamedDecl here.
 }
 
 namespace {
diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp
index a4fa4b0..4feac0c 100644
--- a/lib/AST/OpenMPClause.cpp
+++ b/lib/AST/OpenMPClause.cpp
@@ -1,4 +1,4 @@
-//===--- OpenMPClause.cpp - Classes for OpenMP clauses --------------------===//
+//===- OpenMPClause.cpp - Classes for OpenMP clauses ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/OpenMPClause.h"
-
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
 
 using namespace clang;
 
@@ -716,7 +722,6 @@
                      MappableExprComponentListsRef ComponentLists,
                      OpenMPMapClauseKind TypeModifier, OpenMPMapClauseKind Type,
                      bool TypeIsImplicit, SourceLocation TypeLoc) {
-
   unsigned NumVars = Vars.size();
   unsigned NumUniqueDeclarations =
       getUniqueDeclarationsTotalNumber(Declarations);
diff --git a/lib/AST/QualTypeNames.cpp b/lib/AST/QualTypeNames.cpp
new file mode 100644
index 0000000..86c0eff
--- /dev/null
+++ b/lib/AST/QualTypeNames.cpp
@@ -0,0 +1,466 @@
+//===------- QualTypeNames.cpp - Generate Complete QualType Names ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/QualTypeNames.h"
+
+#include <stdio.h>
+#include <memory>
+
+namespace clang {
+
+namespace TypeName {
+
+/// \brief Create a NestedNameSpecifier for Namesp and its enclosing
+/// scopes.
+///
+/// \param[in] Ctx - the AST Context to be used.
+/// \param[in] Namesp - the NamespaceDecl for which a NestedNameSpecifier
+/// is requested.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+static NestedNameSpecifier *createNestedNameSpecifier(
+    const ASTContext &Ctx,
+    const NamespaceDecl *Namesp,
+    bool WithGlobalNsPrefix);
+
+/// \brief Create a NestedNameSpecifier for TagDecl and its enclosing
+/// scopes.
+///
+/// \param[in] Ctx - the AST Context to be used.
+/// \param[in] TD - the TagDecl for which a NestedNameSpecifier is
+/// requested.
+/// \param[in] FullyQualify - Convert all template arguments into fully
+/// qualified names.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+static NestedNameSpecifier *createNestedNameSpecifier(
+    const ASTContext &Ctx, const TypeDecl *TD,
+    bool FullyQualify, bool WithGlobalNsPrefix);
+
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Decl *decl,
+    bool FullyQualified, bool WithGlobalNsPrefix);
+
+static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
+    const ASTContext &Ctx, NestedNameSpecifier *scope, bool WithGlobalNsPrefix);
+
+static bool getFullyQualifiedTemplateName(const ASTContext &Ctx,
+                                          TemplateName &TName,
+                                          bool WithGlobalNsPrefix) {
+  bool Changed = false;
+  NestedNameSpecifier *NNS = nullptr;
+
+  TemplateDecl *ArgTDecl = TName.getAsTemplateDecl();
+  // ArgTDecl won't be NULL because we asserted that this isn't a
+  // dependent context very early in the call chain.
+  assert(ArgTDecl != nullptr);
+  QualifiedTemplateName *QTName = TName.getAsQualifiedTemplateName();
+
+  if (QTName && !QTName->hasTemplateKeyword()) {
+    NNS = QTName->getQualifier();
+    NestedNameSpecifier *QNNS = getFullyQualifiedNestedNameSpecifier(
+        Ctx, NNS, WithGlobalNsPrefix);
+    if (QNNS != NNS) {
+      Changed = true;
+      NNS = QNNS;
+    } else {
+      NNS = nullptr;
+    }
+  } else {
+    NNS = createNestedNameSpecifierForScopeOf(
+        Ctx, ArgTDecl, true, WithGlobalNsPrefix);
+  }
+  if (NNS) {
+    TName = Ctx.getQualifiedTemplateName(NNS,
+                                         /*TemplateKeyword=*/false, ArgTDecl);
+    Changed = true;
+  }
+  return Changed;
+}
+
+static bool getFullyQualifiedTemplateArgument(const ASTContext &Ctx,
+                                              TemplateArgument &Arg,
+                                              bool WithGlobalNsPrefix) {
+  bool Changed = false;
+
+  // Note: we do not handle TemplateArgument::Expression, to replace it
+  // we need the information for the template instance decl.
+
+  if (Arg.getKind() == TemplateArgument::Template) {
+    TemplateName TName = Arg.getAsTemplate();
+    Changed = getFullyQualifiedTemplateName(Ctx, TName, WithGlobalNsPrefix);
+    if (Changed) {
+      Arg = TemplateArgument(TName);
+    }
+  } else if (Arg.getKind() == TemplateArgument::Type) {
+    QualType SubTy = Arg.getAsType();
+    // Check if the type needs more desugaring and recurse.
+    QualType QTFQ = getFullyQualifiedType(SubTy, Ctx, WithGlobalNsPrefix);
+    if (QTFQ != SubTy) {
+      Arg = TemplateArgument(QTFQ);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+static const Type *getFullyQualifiedTemplateType(const ASTContext &Ctx,
+                                                 const Type *TypePtr,
+                                                 bool WithGlobalNsPrefix) {
+  // DependentTemplateTypes exist within template declarations and
+  // definitions. Therefore we shouldn't encounter them at the end of
+  // a translation unit. If we do, the caller has made an error.
+  assert(!isa<DependentTemplateSpecializationType>(TypePtr));
+  // In case of template specializations, iterate over the arguments
+  // and fully qualify them as well.
+  if (const auto *TST = dyn_cast<const TemplateSpecializationType>(TypePtr)) {
+    bool MightHaveChanged = false;
+    SmallVector<TemplateArgument, 4> FQArgs;
+    for (TemplateSpecializationType::iterator I = TST->begin(), E = TST->end();
+         I != E; ++I) {
+      // Cheap to copy and potentially modified by
+      // getFullyQualifedTemplateArgument.
+      TemplateArgument Arg(*I);
+      MightHaveChanged |= getFullyQualifiedTemplateArgument(
+          Ctx, Arg, WithGlobalNsPrefix);
+      FQArgs.push_back(Arg);
+    }
+
+    // If a fully qualified arg is different from the unqualified arg,
+    // allocate new type in the AST.
+    if (MightHaveChanged) {
+      QualType QT = Ctx.getTemplateSpecializationType(
+          TST->getTemplateName(), FQArgs,
+          TST->getCanonicalTypeInternal());
+      // getTemplateSpecializationType returns a fully qualified
+      // version of the specialization itself, so no need to qualify
+      // it.
+      return QT.getTypePtr();
+    }
+  } else if (const auto *TSTRecord = dyn_cast<const RecordType>(TypePtr)) {
+    // We are asked to fully qualify and we have a Record Type,
+    // which can point to a template instantiation with no sugar in any of
+    // its template argument, however we still need to fully qualify them.
+
+    if (const auto *TSTDecl =
+        dyn_cast<ClassTemplateSpecializationDecl>(TSTRecord->getDecl())) {
+      const TemplateArgumentList &TemplateArgs = TSTDecl->getTemplateArgs();
+
+      bool MightHaveChanged = false;
+      SmallVector<TemplateArgument, 4> FQArgs;
+      for (unsigned int I = 0, E = TemplateArgs.size(); I != E; ++I) {
+        // cheap to copy and potentially modified by
+        // getFullyQualifedTemplateArgument
+        TemplateArgument Arg(TemplateArgs[I]);
+        MightHaveChanged |= getFullyQualifiedTemplateArgument(
+            Ctx, Arg, WithGlobalNsPrefix);
+        FQArgs.push_back(Arg);
+      }
+
+      // If a fully qualified arg is different from the unqualified arg,
+      // allocate new type in the AST.
+      if (MightHaveChanged) {
+        TemplateName TN(TSTDecl->getSpecializedTemplate());
+        QualType QT = Ctx.getTemplateSpecializationType(
+            TN, FQArgs,
+            TSTRecord->getCanonicalTypeInternal());
+        // getTemplateSpecializationType returns a fully qualified
+        // version of the specialization itself, so no need to qualify
+        // it.
+        return QT.getTypePtr();
+      }
+    }
+  }
+  return TypePtr;
+}
+
+static NestedNameSpecifier *createOuterNNS(const ASTContext &Ctx, const Decl *D,
+                                           bool FullyQualify,
+                                           bool WithGlobalNsPrefix) {
+  const DeclContext *DC = D->getDeclContext();
+  if (const auto *NS = dyn_cast<NamespaceDecl>(DC)) {
+    while (NS && NS->isInline()) {
+      // Ignore inline namespace;
+      NS = dyn_cast<NamespaceDecl>(NS->getDeclContext());
+    }
+    if (NS->getDeclName()) {
+      return createNestedNameSpecifier(Ctx, NS, WithGlobalNsPrefix);
+    }
+    return nullptr;  // no starting '::', no anonymous
+  } else if (const auto *TD = dyn_cast<TagDecl>(DC)) {
+    return createNestedNameSpecifier(Ctx, TD, FullyQualify, WithGlobalNsPrefix);
+  } else if (const auto *TDD = dyn_cast<TypedefNameDecl>(DC)) {
+    return createNestedNameSpecifier(
+        Ctx, TDD, FullyQualify, WithGlobalNsPrefix);
+  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
+    return NestedNameSpecifier::GlobalSpecifier(Ctx);
+  }
+  return nullptr;  // no starting '::' if |WithGlobalNsPrefix| is false
+}
+
+/// \brief Return a fully qualified version of this name specifier.
+static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
+    const ASTContext &Ctx, NestedNameSpecifier *Scope,
+    bool WithGlobalNsPrefix) {
+  switch (Scope->getKind()) {
+    case NestedNameSpecifier::Global:
+      // Already fully qualified
+      return Scope;
+    case NestedNameSpecifier::Namespace:
+      return TypeName::createNestedNameSpecifier(
+          Ctx, Scope->getAsNamespace(), WithGlobalNsPrefix);
+    case NestedNameSpecifier::NamespaceAlias:
+      // Namespace aliases are only valid for the duration of the
+      // scope where they were introduced, and therefore are often
+      // invalid at the end of the TU.  So use the namespace name more
+      // likely to be valid at the end of the TU.
+      return TypeName::createNestedNameSpecifier(
+          Ctx,
+          Scope->getAsNamespaceAlias()->getNamespace()->getCanonicalDecl(),
+          WithGlobalNsPrefix);
+    case NestedNameSpecifier::Identifier:
+      // A function or some other construct that makes it un-namable
+      // at the end of the TU. Skip the current component of the name,
+      // but use the name of it's prefix.
+      return getFullyQualifiedNestedNameSpecifier(
+          Ctx, Scope->getPrefix(), WithGlobalNsPrefix);
+    case NestedNameSpecifier::Super:
+    case NestedNameSpecifier::TypeSpec:
+    case NestedNameSpecifier::TypeSpecWithTemplate: {
+      const Type *Type = Scope->getAsType();
+      // Find decl context.
+      const TagDecl *TD = nullptr;
+      if (const TagType *TagDeclType = Type->getAs<TagType>()) {
+        TD = TagDeclType->getDecl();
+      } else {
+        TD = Type->getAsCXXRecordDecl();
+      }
+      if (TD) {
+        return TypeName::createNestedNameSpecifier(Ctx, TD,
+                                                   true /*FullyQualified*/,
+                                                   WithGlobalNsPrefix);
+      } else if (const auto *TDD = dyn_cast<TypedefType>(Type)) {
+        return TypeName::createNestedNameSpecifier(Ctx, TDD->getDecl(),
+                                                   true /*FullyQualified*/,
+                                                   WithGlobalNsPrefix);
+      }
+      return Scope;
+    }
+  }
+  llvm_unreachable("bad NNS kind");
+}
+
+/// \brief Create a nested name specifier for the declaring context of
+/// the type.
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Decl *Decl,
+    bool FullyQualified, bool WithGlobalNsPrefix) {
+  assert(Decl);
+
+  const DeclContext *DC = Decl->getDeclContext()->getRedeclContext();
+  const auto *Outer = dyn_cast_or_null<NamedDecl>(DC);
+  const auto *OuterNS = dyn_cast_or_null<NamespaceDecl>(DC);
+  if (Outer && !(OuterNS && OuterNS->isAnonymousNamespace())) {
+    if (const auto *CxxDecl = dyn_cast<CXXRecordDecl>(DC)) {
+      if (ClassTemplateDecl *ClassTempl =
+              CxxDecl->getDescribedClassTemplate()) {
+        // We are in the case of a type(def) that was declared in a
+        // class template but is *not* type dependent.  In clang, it
+        // gets attached to the class template declaration rather than
+        // any specific class template instantiation.  This result in
+        // 'odd' fully qualified typename:
+        //
+        //    vector<_Tp,_Alloc>::size_type
+        //
+        // Make the situation is 'useable' but looking a bit odd by
+        // picking a random instance as the declaring context.
+        if (ClassTempl->spec_begin() != ClassTempl->spec_end()) {
+          Decl = *(ClassTempl->spec_begin());
+          Outer = dyn_cast<NamedDecl>(Decl);
+          OuterNS = dyn_cast<NamespaceDecl>(Decl);
+        }
+      }
+    }
+
+    if (OuterNS) {
+      return createNestedNameSpecifier(Ctx, OuterNS, WithGlobalNsPrefix);
+    } else if (const auto *TD = dyn_cast<TagDecl>(Outer)) {
+      return createNestedNameSpecifier(
+          Ctx, TD, FullyQualified, WithGlobalNsPrefix);
+    } else if (dyn_cast<TranslationUnitDecl>(Outer)) {
+      // Context is the TU. Nothing needs to be done.
+      return nullptr;
+    } else {
+      // Decl's context was neither the TU, a namespace, nor a
+      // TagDecl, which means it is a type local to a scope, and not
+      // accessible at the end of the TU.
+      return nullptr;
+    }
+  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
+    return NestedNameSpecifier::GlobalSpecifier(Ctx);
+  }
+  return nullptr;
+}
+
+/// \brief Create a nested name specifier for the declaring context of
+/// the type.
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Type *TypePtr,
+    bool FullyQualified, bool WithGlobalNsPrefix) {
+  if (!TypePtr) return nullptr;
+
+  Decl *Decl = nullptr;
+  // There are probably other cases ...
+  if (const auto *TDT = dyn_cast<TypedefType>(TypePtr)) {
+    Decl = TDT->getDecl();
+  } else if (const auto *TagDeclType = dyn_cast<TagType>(TypePtr)) {
+    Decl = TagDeclType->getDecl();
+  } else if (const auto *TST = dyn_cast<TemplateSpecializationType>(TypePtr)) {
+    Decl = TST->getTemplateName().getAsTemplateDecl();
+  } else {
+    Decl = TypePtr->getAsCXXRecordDecl();
+  }
+
+  if (!Decl) return nullptr;
+
+  return createNestedNameSpecifierForScopeOf(
+      Ctx, Decl, FullyQualified, WithGlobalNsPrefix);
+}
+
+NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
+                                               const NamespaceDecl *Namespace,
+                                               bool WithGlobalNsPrefix) {
+  while (Namespace && Namespace->isInline()) {
+    // Ignore inline namespace;
+    Namespace = dyn_cast<NamespaceDecl>(Namespace->getDeclContext());
+  }
+  if (!Namespace) return nullptr;
+
+  bool FullyQualified = true;  // doesn't matter, DeclContexts are namespaces
+  return NestedNameSpecifier::Create(
+      Ctx,
+      createOuterNNS(Ctx, Namespace, FullyQualified, WithGlobalNsPrefix),
+      Namespace);
+}
+
+NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
+                                               const TypeDecl *TD,
+                                               bool FullyQualify,
+                                               bool WithGlobalNsPrefix) {
+  return NestedNameSpecifier::Create(
+      Ctx,
+      createOuterNNS(Ctx, TD, FullyQualify, WithGlobalNsPrefix),
+      false /*No TemplateKeyword*/,
+      TD->getTypeForDecl());
+}
+
+/// \brief Return the fully qualified type, including fully-qualified
+/// versions of any template parameters.
+QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
+                               bool WithGlobalNsPrefix) {
+  // In case of myType* we need to strip the pointer first, fully
+  // qualify and attach the pointer once again.
+  if (isa<PointerType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    Qualifiers Quals = QT.getQualifiers();
+    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
+    QT = Ctx.getPointerType(QT);
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+    return QT;
+  }
+
+  // In case of myType& we need to strip the reference first, fully
+  // qualify and attach the reference once again.
+  if (isa<ReferenceType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    bool IsLValueRefTy = isa<LValueReferenceType>(QT.getTypePtr());
+    Qualifiers Quals = QT.getQualifiers();
+    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
+    // Add the r- or l-value reference type back to the fully
+    // qualified one.
+    if (IsLValueRefTy)
+      QT = Ctx.getLValueReferenceType(QT);
+    else
+      QT = Ctx.getRValueReferenceType(QT);
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+    return QT;
+  }
+
+  // Remove the part of the type related to the type being a template
+  // parameter (we won't report it as part of the 'type name' and it
+  // is actually make the code below to be more complex (to handle
+  // those)
+  while (isa<SubstTemplateTypeParmType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    Qualifiers Quals = QT.getQualifiers();
+
+    QT = dyn_cast<SubstTemplateTypeParmType>(QT.getTypePtr())->desugar();
+
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+  }
+
+  NestedNameSpecifier *Prefix = nullptr;
+  // Local qualifiers are attached to the QualType outside of the
+  // elaborated type.  Retrieve them before descending into the
+  // elaborated type.
+  Qualifiers PrefixQualifiers = QT.getLocalQualifiers();
+  QT = QualType(QT.getTypePtr(), 0);
+  ElaboratedTypeKeyword Keyword = ETK_None;
+  if (const auto *ETypeInput = dyn_cast<ElaboratedType>(QT.getTypePtr())) {
+    QT = ETypeInput->getNamedType();
+    assert(!QT.hasLocalQualifiers());
+    Keyword = ETypeInput->getKeyword();
+  }
+  // Create a nested name specifier if needed.
+  Prefix = createNestedNameSpecifierForScopeOf(Ctx, QT.getTypePtr(),
+                                               true /*FullyQualified*/,
+                                               WithGlobalNsPrefix);
+
+  // In case of template specializations iterate over the arguments and
+  // fully qualify them as well.
+  if (isa<const TemplateSpecializationType>(QT.getTypePtr()) ||
+      isa<const RecordType>(QT.getTypePtr())) {
+    // We are asked to fully qualify and we have a Record Type (which
+    // may point to a template specialization) or Template
+    // Specialization Type. We need to fully qualify their arguments.
+
+    const Type *TypePtr = getFullyQualifiedTemplateType(
+        Ctx, QT.getTypePtr(), WithGlobalNsPrefix);
+    QT = QualType(TypePtr, 0);
+  }
+  if (Prefix || Keyword != ETK_None) {
+    QT = Ctx.getElaboratedType(Keyword, Prefix, QT);
+  }
+  QT = Ctx.getQualifiedType(QT, PrefixQualifiers);
+  return QT;
+}
+
+std::string getFullyQualifiedName(QualType QT,
+                                  const ASTContext &Ctx,
+                                  bool WithGlobalNsPrefix) {
+  PrintingPolicy Policy(Ctx.getPrintingPolicy());
+  Policy.SuppressScope = false;
+  Policy.AnonymousTagLocations = false;
+  Policy.PolishForDeclaration = true;
+  Policy.SuppressUnwrittenScope = true;
+  QualType FQQT = getFullyQualifiedType(QT, Ctx, WithGlobalNsPrefix);
+  return FQQT.getAsString(Policy);
+}
+
+}  // end namespace TypeName
+}  // end namespace clang
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index 881a7d9..73a4d9d 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -107,10 +107,10 @@
 }
 
 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
-                       bool Merged, bool ParseAllComments) :
+                       const CommentOptions &CommentOpts, bool Merged) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
-    IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
-    ParseAllComments(ParseAllComments) {
+    IsAttached(false), IsTrailingComment(false),
+    IsAlmostTrailingComment(false) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
     Kind = RCK_Invalid;
@@ -118,10 +118,11 @@
   }
 
   // Guess comment kind.
-  std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
+  std::pair<CommentKind, bool> K =
+      getCommentKind(RawText, CommentOpts.ParseAllComments);
 
   // Guess whether an ordinary comment is trailing.
-  if (ParseAllComments && isOrdinaryKind(K.first)) {
+  if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
     FileID BeginFileID;
     unsigned BeginOffset;
     std::tie(BeginFileID, BeginOffset) =
@@ -270,6 +271,7 @@
 }
 
 void RawCommentList::addComment(const RawComment &RC,
+                                const CommentOptions &CommentOpts,
                                 llvm::BumpPtrAllocator &Allocator) {
   if (RC.isInvalid())
     return;
@@ -284,7 +286,7 @@
   }
 
   // Ordinary comments are not interesting for us.
-  if (RC.isOrdinary())
+  if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
     return;
 
   // If this is the first Doxygen comment, save it (because there isn't
@@ -317,8 +319,7 @@
       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
                             /*MaxNewlinesAllowed=*/1)) {
     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
-    *Comments.back() = RawComment(SourceMgr, MergedRange, true,
-                                  RC.isParseAllComments());
+    *Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true);
   } else {
     Comments.push_back(new (Allocator) RawComment(RC));
   }
diff --git a/lib/AST/RecordLayout.cpp b/lib/AST/RecordLayout.cpp
index 299fd11..37fe029 100644
--- a/lib/AST/RecordLayout.cpp
+++ b/lib/AST/RecordLayout.cpp
@@ -1,4 +1,4 @@
-//===-- RecordLayout.cpp - Layout information for a struct/union -*- C++ -*-==//
+//===- RecordLayout.cpp - Layout information for a struct/union -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,9 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTContext.h"
 #include "clang/AST/RecordLayout.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include <cassert>
 
 using namespace clang;
 
@@ -32,7 +34,7 @@
                                  CharUnits datasize,
                                  ArrayRef<uint64_t> fieldoffsets)
     : Size(size), DataSize(datasize), Alignment(alignment),
-      RequiredAlignment(requiredAlignment), CXXInfo(nullptr) {
+      RequiredAlignment(requiredAlignment) {
   FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
 }
 
@@ -73,7 +75,6 @@
   CXXInfo->EndsWithZeroSizedObject = EndsWithZeroSizedObject;
   CXXInfo->LeadsWithZeroSizedBase = LeadsWithZeroSizedBase;
 
-
 #ifndef NDEBUG
     if (const CXXRecordDecl *PrimaryBase = getPrimaryBase()) {
       if (isPrimaryBaseVirtual()) {
diff --git a/lib/AST/RecordLayoutBuilder.cpp b/lib/AST/RecordLayoutBuilder.cpp
index 0083e39..e95bc91 100644
--- a/lib/AST/RecordLayoutBuilder.cpp
+++ b/lib/AST/RecordLayoutBuilder.cpp
@@ -1504,9 +1504,10 @@
     FieldAlign = TypeSize;
 
     // If the previous field was not a bitfield, or was a bitfield
-    // with a different storage unit size, we're done with that
-    // storage unit.
-    if (LastBitfieldTypeSize != TypeSize) {
+    // with a different storage unit size, or if this field doesn't fit into
+    // the current storage unit, we're done with that storage unit.
+    if (LastBitfieldTypeSize != TypeSize ||
+        UnfilledBitsInLastUnit < FieldSize) {
       // Also, ignore zero-length bitfields after non-bitfields.
       if (!LastBitfieldTypeSize && !FieldSize)
         FieldAlign = 1;
@@ -1751,7 +1752,34 @@
       QualType T = Context.getBaseElementType(D->getType());
       if (const BuiltinType *BTy = T->getAs<BuiltinType>()) {
         CharUnits TypeSize = Context.getTypeSizeInChars(BTy);
-        if (TypeSize > FieldAlign)
+
+        if (!llvm::isPowerOf2_64(TypeSize.getQuantity())) {
+          assert(
+              !Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment() &&
+              "Non PowerOf2 size in MSVC mode");
+          // Base types with sizes that aren't a power of two don't work
+          // with the layout rules for MS structs. This isn't an issue in
+          // MSVC itself since there are no such base data types there.
+          // On e.g. x86_32 mingw and linux, long double is 12 bytes though.
+          // Any structs involving that data type obviously can't be ABI
+          // compatible with MSVC regardless of how it is laid out.
+
+          // Since ms_struct can be mass enabled (via a pragma or via the
+          // -mms-bitfields command line parameter), this can trigger for
+          // structs that don't actually need MSVC compatibility, so we
+          // need to be able to sidestep the ms_struct layout for these types.
+
+          // Since the combination of -mms-bitfields together with structs
+          // like max_align_t (which contains a long double) for mingw is
+          // quite comon (and GCC handles it silently), just handle it
+          // silently there. For other targets that have ms_struct enabled
+          // (most probably via a pragma or attribute), trigger a diagnostic
+          // that defaults to an error.
+          if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment())
+            Diag(D->getLocation(), diag::warn_npot_ms_struct);
+        }
+        if (TypeSize > FieldAlign &&
+            llvm::isPowerOf2_64(TypeSize.getQuantity()))
           FieldAlign = TypeSize;
       }
     }
@@ -2895,13 +2923,12 @@
       Work.insert(MD);
   while (!Work.empty()) {
     const CXXMethodDecl *MD = *Work.begin();
-    CXXMethodDecl::method_iterator i = MD->begin_overridden_methods(),
-                                   e = MD->end_overridden_methods();
+    auto MethodRange = MD->overridden_methods();
     // If a virtual method has no-overrides it lives in its parent's vtable.
-    if (i == e)
+    if (MethodRange.begin() == MethodRange.end())
       BasesWithOverriddenMethods.insert(MD->getParent());
     else
-      Work.insert(i, e);
+      Work.insert(MethodRange.begin(), MethodRange.end());
     // We've finished processing this element, remove it from the working set.
     Work.erase(MD);
   }
@@ -3011,7 +3038,7 @@
     return nullptr;
 
   assert(RD->getDefinition() && "Cannot get key function for forward decl!");
-  RD = cast<CXXRecordDecl>(RD->getDefinition());
+  RD = RD->getDefinition();
 
   // Beware:
   //  1) computing the key function might trigger deserialization, which might
diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp
index 922ff1f..982fd45 100644
--- a/lib/AST/Stmt.cpp
+++ b/lib/AST/Stmt.cpp
@@ -1,4 +1,4 @@
-//===--- Stmt.cpp - Statement AST Node Implementation ---------------------===//
+//===- Stmt.cpp - Statement AST Node Implementation -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,8 @@
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclGroup.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprOpenMP.h"
@@ -22,10 +24,24 @@
 #include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <utility>
+
 using namespace clang;
 
 static struct StmtClassNameTable {
@@ -150,6 +166,7 @@
 }
 
 namespace {
+
   struct good {};
   struct bad {};
 
@@ -191,7 +208,8 @@
   (void) is_good(implements_getLocStart(&type::getLocStart))
 #define ASSERT_IMPLEMENTS_getLocEnd(type) \
   (void) is_good(implements_getLocEnd(&type::getLocEnd))
-}
+
+} // namespace
 
 /// Check whether the various Stmt classes implement their member
 /// functions.
@@ -222,6 +240,7 @@
 //
 // See also Expr.cpp:getExprLoc().
 namespace {
+
   /// This implementation is used when a class provides a custom
   /// implementation of getSourceRange.
   template <class S, class T>
@@ -240,7 +259,8 @@
     return SourceRange(static_cast<const S*>(stmt)->getLocStart(),
                        static_cast<const S*>(stmt)->getLocEnd());
   }
-}
+
+} // namespace
 
 SourceRange Stmt::getSourceRange() const {
   switch (getStmtClass()) {
@@ -279,31 +299,34 @@
   llvm_unreachable("unknown statement kind");
 }
 
-CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts,
-                           SourceLocation LB, SourceLocation RB)
-  : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) {
+CompoundStmt::CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB,
+                           SourceLocation RB)
+    : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) {
   CompoundStmtBits.NumStmts = Stmts.size();
-  assert(CompoundStmtBits.NumStmts == Stmts.size() &&
-         "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!");
-
-  if (Stmts.size() == 0) {
-    Body = nullptr;
-    return;
-  }
-
-  Body = new (C) Stmt*[Stmts.size()];
-  std::copy(Stmts.begin(), Stmts.end(), Body);
+  setStmts(Stmts);
 }
 
-void CompoundStmt::setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts) {
-  if (Body)
-    C.Deallocate(Body);
-  CompoundStmtBits.NumStmts = Stmts.size();
+void CompoundStmt::setStmts(ArrayRef<Stmt *> Stmts) {
   assert(CompoundStmtBits.NumStmts == Stmts.size() &&
          "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!");
 
-  Body = new (C) Stmt*[Stmts.size()];
-  std::copy(Stmts.begin(), Stmts.end(), Body);
+  std::copy(Stmts.begin(), Stmts.end(), body_begin());
+}
+
+CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef<Stmt *> Stmts,
+                                   SourceLocation LB, SourceLocation RB) {
+  void *Mem =
+      C.Allocate(totalSizeToAlloc<Stmt *>(Stmts.size()), alignof(CompoundStmt));
+  return new (Mem) CompoundStmt(Stmts, LB, RB);
+}
+
+CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C,
+                                        unsigned NumStmts) {
+  void *Mem =
+      C.Allocate(totalSizeToAlloc<Stmt *>(NumStmts), alignof(CompoundStmt));
+  CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell());
+  New->CompoundStmtBits.NumStmts = NumStmts;
+  return New;
 }
 
 const char *LabelStmt::getName() const {
@@ -314,7 +337,7 @@
                                        ArrayRef<const Attr*> Attrs,
                                        Stmt *SubStmt) {
   assert(!Attrs.empty() && "Attrs should not be empty");
-  void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * Attrs.size(),
+  void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(Attrs.size()),
                          alignof(AttributedStmt));
   return new (Mem) AttributedStmt(Loc, Attrs, SubStmt);
 }
@@ -322,7 +345,7 @@
 AttributedStmt *AttributedStmt::CreateEmpty(const ASTContext &C,
                                             unsigned NumAttrs) {
   assert(NumAttrs > 0 && "NumAttrs should be greater than zero");
-  void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * NumAttrs,
+  void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(NumAttrs),
                          alignof(AttributedStmt));
   return new (Mem) AttributedStmt(EmptyShell(), NumAttrs);
 }
@@ -408,6 +431,7 @@
 Expr *GCCAsmStmt::getInputExpr(unsigned i) {
   return cast<Expr>(Exprs[i + NumOutputs]);
 }
+
 void GCCAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
@@ -506,7 +530,7 @@
   unsigned LastAsmStringToken = 0;
   unsigned LastAsmStringOffset = 0;
 
-  while (1) {
+  while (true) {
     // Done with the string?
     if (CurPtr == StrEnd) {
       if (!CurStringPiece.empty())
@@ -682,6 +706,7 @@
 Expr *MSAsmStmt::getInputExpr(unsigned i) {
   return cast<Expr>(Exprs[i + NumOutputs]);
 }
+
 void MSAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
@@ -696,9 +721,8 @@
                        StringLiteral **constraints, Expr **exprs,
                        StringLiteral *asmstr, unsigned numclobbers,
                        StringLiteral **clobbers, SourceLocation rparenloc)
-  : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-            numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
-
+    : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
+              numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
   unsigned NumExprs = NumOutputs + NumInputs;
 
   Names = new (C) IdentifierInfo*[NumExprs];
@@ -721,10 +745,9 @@
                      ArrayRef<StringRef> constraints, ArrayRef<Expr*> exprs,
                      StringRef asmstr, ArrayRef<StringRef> clobbers,
                      SourceLocation endloc)
-  : AsmStmt(MSAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-            numinputs, clobbers.size()), LBraceLoc(lbraceloc),
-            EndLoc(endloc), NumAsmToks(asmtoks.size()) {
-
+    : AsmStmt(MSAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
+              numinputs, clobbers.size()), LBraceLoc(lbraceloc),
+              EndLoc(endloc), NumAsmToks(asmtoks.size()) {
   initialize(C, asmstr, asmtoks, constraints, exprs, clobbers);
 }
 
@@ -909,14 +932,9 @@
   return cast_or_null<Expr>(RetExpr);
 }
 
-SEHTryStmt::SEHTryStmt(bool IsCXXTry,
-                       SourceLocation TryLoc,
-                       Stmt *TryBlock,
+SEHTryStmt::SEHTryStmt(bool IsCXXTry, SourceLocation TryLoc, Stmt *TryBlock,
                        Stmt *Handler)
-  : Stmt(SEHTryStmtClass),
-    IsCXXTry(IsCXXTry),
-    TryLoc(TryLoc)
-{
+    : Stmt(SEHTryStmtClass), IsCXXTry(IsCXXTry), TryLoc(TryLoc) {
   Children[TRY]     = TryBlock;
   Children[HANDLER] = Handler;
 }
@@ -935,12 +953,8 @@
   return dyn_cast<SEHFinallyStmt>(getHandler());
 }
 
-SEHExceptStmt::SEHExceptStmt(SourceLocation Loc,
-                             Expr *FilterExpr,
-                             Stmt *Block)
-  : Stmt(SEHExceptStmtClass),
-    Loc(Loc)
-{
+SEHExceptStmt::SEHExceptStmt(SourceLocation Loc, Expr *FilterExpr, Stmt *Block)
+    : Stmt(SEHExceptStmtClass), Loc(Loc) {
   Children[FILTER_EXPR] = FilterExpr;
   Children[BLOCK]       = Block;
 }
@@ -950,12 +964,8 @@
   return new(C) SEHExceptStmt(Loc,FilterExpr,Block);
 }
 
-SEHFinallyStmt::SEHFinallyStmt(SourceLocation Loc,
-                               Stmt *Block)
-  : Stmt(SEHFinallyStmtClass),
-    Loc(Loc),
-    Block(Block)
-{}
+SEHFinallyStmt::SEHFinallyStmt(SourceLocation Loc, Stmt *Block)
+    : Stmt(SEHFinallyStmtClass), Loc(Loc), Block(Block) {}
 
 SEHFinallyStmt* SEHFinallyStmt::Create(const ASTContext &C, SourceLocation Loc,
                                        Stmt *Block) {
@@ -1037,7 +1047,7 @@
 
 CapturedStmt::CapturedStmt(EmptyShell Empty, unsigned NumCaptures)
   : Stmt(CapturedStmtClass, Empty), NumCaptures(NumCaptures),
-    CapDeclAndKind(nullptr, CR_Default), TheRecordDecl(nullptr) {
+    CapDeclAndKind(nullptr, CR_Default) {
   getStoredStmts()[NumCaptures] = nullptr;
 }
 
@@ -1090,6 +1100,7 @@
 CapturedDecl *CapturedStmt::getCapturedDecl() {
   return CapDeclAndKind.getPointer();
 }
+
 const CapturedDecl *CapturedStmt::getCapturedDecl() const {
   return CapDeclAndKind.getPointer();
 }
diff --git a/lib/AST/StmtIterator.cpp b/lib/AST/StmtIterator.cpp
index 861d090..00056e8 100644
--- a/lib/AST/StmtIterator.cpp
+++ b/lib/AST/StmtIterator.cpp
@@ -1,4 +1,4 @@
-//===--- StmtIterator.cpp - Iterators for Statements ------------------------===//
+//===- StmtIterator.cpp - Iterators for Statements ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,11 @@
 
 #include "clang/AST/StmtIterator.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace clang;
 
@@ -31,7 +36,7 @@
 }
 
 void StmtIteratorBase::NextVA() {
-  assert (getVAPtr());
+  assert(getVAPtr());
 
   const VariableArrayType *p = getVAPtr();
   p = FindVA(p->getElementType().getTypePtr());
@@ -93,22 +98,22 @@
 }
 
 StmtIteratorBase::StmtIteratorBase(Decl** dgi, Decl** dge)
-  : DGI(dgi), RawVAPtr(DeclGroupMode), DGE(dge) {
+    : DGI(dgi), RawVAPtr(DeclGroupMode), DGE(dge) {
   NextDecl(false);
 }
 
 StmtIteratorBase::StmtIteratorBase(const VariableArrayType* t)
-  : DGI(nullptr), RawVAPtr(SizeOfTypeVAMode) {
+    : DGI(nullptr), RawVAPtr(SizeOfTypeVAMode) {
   RawVAPtr |= reinterpret_cast<uintptr_t>(t);
 }
 
 Stmt*& StmtIteratorBase::GetDeclExpr() const {
   if (const VariableArrayType* VAPtr = getVAPtr()) {
-    assert (VAPtr->SizeExpr);
+    assert(VAPtr->SizeExpr);
     return const_cast<Stmt*&>(VAPtr->SizeExpr);
   }
 
-  assert (inDeclGroup());
+  assert(inDeclGroup());
   VarDecl* VD = cast<VarDecl>(*DGI);
   return *VD->getInitAddress();
 }
diff --git a/lib/AST/StmtOpenMP.cpp b/lib/AST/StmtOpenMP.cpp
index b4e5795..87bf5aa 100644
--- a/lib/AST/StmtOpenMP.cpp
+++ b/lib/AST/StmtOpenMP.cpp
@@ -795,13 +795,14 @@
 
 OMPTargetEnterDataDirective *OMPTargetEnterDataDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
   void *Mem = C.Allocate(
       llvm::alignTo(sizeof(OMPTargetEnterDataDirective), alignof(OMPClause *)) +
-      sizeof(OMPClause *) * Clauses.size());
+      sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTargetEnterDataDirective *Dir =
       new (Mem) OMPTargetEnterDataDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
   return Dir;
 }
 
@@ -810,20 +811,20 @@
                                          EmptyShell) {
   void *Mem = C.Allocate(
       llvm::alignTo(sizeof(OMPTargetEnterDataDirective), alignof(OMPClause *)) +
-      sizeof(OMPClause *) * N);
+      sizeof(OMPClause *) * N + sizeof(Stmt *));
   return new (Mem) OMPTargetEnterDataDirective(N);
 }
 
-OMPTargetExitDataDirective *
-OMPTargetExitDataDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                                   SourceLocation EndLoc,
-                                   ArrayRef<OMPClause *> Clauses) {
+OMPTargetExitDataDirective *OMPTargetExitDataDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
   void *Mem = C.Allocate(
       llvm::alignTo(sizeof(OMPTargetExitDataDirective), alignof(OMPClause *)) +
-      sizeof(OMPClause *) * Clauses.size());
+      sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTargetExitDataDirective *Dir =
       new (Mem) OMPTargetExitDataDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
   return Dir;
 }
 
@@ -832,7 +833,7 @@
                                         EmptyShell) {
   void *Mem = C.Allocate(
       llvm::alignTo(sizeof(OMPTargetExitDataDirective), alignof(OMPClause *)) +
-      sizeof(OMPClause *) * N);
+      sizeof(OMPClause *) * N + sizeof(Stmt *));
   return new (Mem) OMPTargetExitDataDirective(N);
 }
 
@@ -1007,16 +1008,17 @@
   return new (Mem) OMPDistributeDirective(CollapsedNum, NumClauses);
 }
 
-OMPTargetUpdateDirective *
-OMPTargetUpdateDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                                 SourceLocation EndLoc,
-                                 ArrayRef<OMPClause *> Clauses) {
+OMPTargetUpdateDirective *OMPTargetUpdateDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPTargetUpdateDirective), alignof(OMPClause *));
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTargetUpdateDirective *Dir =
       new (Mem) OMPTargetUpdateDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
   return Dir;
 }
 
@@ -1025,14 +1027,15 @@
                                       EmptyShell) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPTargetUpdateDirective), alignof(OMPClause *));
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses);
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPTargetUpdateDirective(NumClauses);
 }
 
 OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
+    const HelperExprs &Exprs, bool HasCancel) {
   unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
                                 alignof(OMPClause *));
   void *Mem = C.Allocate(
@@ -1076,6 +1079,7 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->HasCancel = HasCancel;
   return Dir;
 }
 
@@ -1476,7 +1480,7 @@
 OMPTeamsDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
+    const HelperExprs &Exprs, bool HasCancel) {
   auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective),
                             alignof(OMPClause *));
   void *Mem = C.Allocate(
@@ -1520,6 +1524,7 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->HasCancel = HasCancel;
   return Dir;
 }
 
@@ -1619,7 +1624,7 @@
 OMPTargetTeamsDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
+    const HelperExprs &Exprs, bool HasCancel) {
   auto Size =
       llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective),
                     alignof(OMPClause *));
@@ -1665,6 +1670,7 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->HasCancel = HasCancel;
   return Dir;
 }
 
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 9cbd1ef..3022483 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -75,7 +75,8 @@
     void PrintCallArgs(CallExpr *E);
     void PrintRawSEHExceptHandler(SEHExceptStmt *S);
     void PrintRawSEHFinallyStmt(SEHFinallyStmt *S);
-    void PrintOMPExecutableDirective(OMPExecutableDirective *S);
+    void PrintOMPExecutableDirective(OMPExecutableDirective *S,
+                                     bool ForceNoStmt = false);
 
     void PrintExpr(Expr *E) {
       if (E)
@@ -1022,46 +1023,43 @@
 //  OpenMP directives printing methods
 //===----------------------------------------------------------------------===//
 
-void StmtPrinter::PrintOMPExecutableDirective(OMPExecutableDirective *S) {
+void StmtPrinter::PrintOMPExecutableDirective(OMPExecutableDirective *S,
+                                              bool ForceNoStmt) {
   OMPClausePrinter Printer(OS, Policy);
   ArrayRef<OMPClause *> Clauses = S->clauses();
   for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
        I != E; ++I)
     if (*I && !(*I)->isImplicit()) {
-      Printer.Visit(*I);
       OS << ' ';
+      Printer.Visit(*I);
     }
   OS << "\n";
-  if (S->hasAssociatedStmt() && S->getAssociatedStmt()) {
-    assert(isa<CapturedStmt>(S->getAssociatedStmt()) &&
-           "Expected captured statement!");
-    Stmt *CS = cast<CapturedStmt>(S->getAssociatedStmt())->getCapturedStmt();
-    PrintStmt(CS);
-  }
+  if (!ForceNoStmt && S->hasAssociatedStmt())
+    PrintStmt(S->getInnermostCapturedStmt()->getCapturedStmt());
 }
 
 void StmtPrinter::VisitOMPParallelDirective(OMPParallelDirective *Node) {
-  Indent() << "#pragma omp parallel ";
+  Indent() << "#pragma omp parallel";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPSimdDirective(OMPSimdDirective *Node) {
-  Indent() << "#pragma omp simd ";
+  Indent() << "#pragma omp simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
-  Indent() << "#pragma omp for ";
+  Indent() << "#pragma omp for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPForSimdDirective(OMPForSimdDirective *Node) {
-  Indent() << "#pragma omp for simd ";
+  Indent() << "#pragma omp for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPSectionsDirective(OMPSectionsDirective *Node) {
-  Indent() << "#pragma omp sections ";
+  Indent() << "#pragma omp sections";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -1071,7 +1069,7 @@
 }
 
 void StmtPrinter::VisitOMPSingleDirective(OMPSingleDirective *Node) {
-  Indent() << "#pragma omp single ";
+  Indent() << "#pragma omp single";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -1087,29 +1085,28 @@
     Node->getDirectiveName().printName(OS);
     OS << ")";
   }
-  OS << " ";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPParallelForDirective(OMPParallelForDirective *Node) {
-  Indent() << "#pragma omp parallel for ";
+  Indent() << "#pragma omp parallel for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPParallelForSimdDirective(
     OMPParallelForSimdDirective *Node) {
-  Indent() << "#pragma omp parallel for simd ";
+  Indent() << "#pragma omp parallel for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPParallelSectionsDirective(
     OMPParallelSectionsDirective *Node) {
-  Indent() << "#pragma omp parallel sections ";
+  Indent() << "#pragma omp parallel sections";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTaskDirective(OMPTaskDirective *Node) {
-  Indent() << "#pragma omp task ";
+  Indent() << "#pragma omp task";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -1129,61 +1126,61 @@
 }
 
 void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) {
-  Indent() << "#pragma omp taskgroup ";
+  Indent() << "#pragma omp taskgroup";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPFlushDirective(OMPFlushDirective *Node) {
-  Indent() << "#pragma omp flush ";
+  Indent() << "#pragma omp flush";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPOrderedDirective(OMPOrderedDirective *Node) {
-  Indent() << "#pragma omp ordered ";
-  PrintOMPExecutableDirective(Node);
+  Indent() << "#pragma omp ordered";
+  PrintOMPExecutableDirective(Node, Node->hasClausesOfKind<OMPDependClause>());
 }
 
 void StmtPrinter::VisitOMPAtomicDirective(OMPAtomicDirective *Node) {
-  Indent() << "#pragma omp atomic ";
+  Indent() << "#pragma omp atomic";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetDirective(OMPTargetDirective *Node) {
-  Indent() << "#pragma omp target ";
+  Indent() << "#pragma omp target";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetDataDirective(OMPTargetDataDirective *Node) {
-  Indent() << "#pragma omp target data ";
+  Indent() << "#pragma omp target data";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetEnterDataDirective(
     OMPTargetEnterDataDirective *Node) {
-  Indent() << "#pragma omp target enter data ";
-  PrintOMPExecutableDirective(Node);
+  Indent() << "#pragma omp target enter data";
+  PrintOMPExecutableDirective(Node, /*ForceNoStmt=*/true);
 }
 
 void StmtPrinter::VisitOMPTargetExitDataDirective(
     OMPTargetExitDataDirective *Node) {
-  Indent() << "#pragma omp target exit data ";
-  PrintOMPExecutableDirective(Node);
+  Indent() << "#pragma omp target exit data";
+  PrintOMPExecutableDirective(Node, /*ForceNoStmt=*/true);
 }
 
 void StmtPrinter::VisitOMPTargetParallelDirective(
     OMPTargetParallelDirective *Node) {
-  Indent() << "#pragma omp target parallel ";
+  Indent() << "#pragma omp target parallel";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetParallelForDirective(
     OMPTargetParallelForDirective *Node) {
-  Indent() << "#pragma omp target parallel for ";
+  Indent() << "#pragma omp target parallel for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTeamsDirective(OMPTeamsDirective *Node) {
-  Indent() << "#pragma omp teams ";
+  Indent() << "#pragma omp teams";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -1196,111 +1193,111 @@
 
 void StmtPrinter::VisitOMPCancelDirective(OMPCancelDirective *Node) {
   Indent() << "#pragma omp cancel "
-           << getOpenMPDirectiveName(Node->getCancelRegion()) << " ";
+           << getOpenMPDirectiveName(Node->getCancelRegion());
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTaskLoopDirective(OMPTaskLoopDirective *Node) {
-  Indent() << "#pragma omp taskloop ";
+  Indent() << "#pragma omp taskloop";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTaskLoopSimdDirective(
     OMPTaskLoopSimdDirective *Node) {
-  Indent() << "#pragma omp taskloop simd ";
+  Indent() << "#pragma omp taskloop simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) {
-  Indent() << "#pragma omp distribute ";
+  Indent() << "#pragma omp distribute";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetUpdateDirective(
     OMPTargetUpdateDirective *Node) {
-  Indent() << "#pragma omp target update ";
-  PrintOMPExecutableDirective(Node);
+  Indent() << "#pragma omp target update";
+  PrintOMPExecutableDirective(Node, /*ForceNoStmt=*/true);
 }
 
 void StmtPrinter::VisitOMPDistributeParallelForDirective(
     OMPDistributeParallelForDirective *Node) {
-  Indent() << "#pragma omp distribute parallel for ";
+  Indent() << "#pragma omp distribute parallel for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPDistributeParallelForSimdDirective(
     OMPDistributeParallelForSimdDirective *Node) {
-  Indent() << "#pragma omp distribute parallel for simd ";
+  Indent() << "#pragma omp distribute parallel for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPDistributeSimdDirective(
     OMPDistributeSimdDirective *Node) {
-  Indent() << "#pragma omp distribute simd ";
+  Indent() << "#pragma omp distribute simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetParallelForSimdDirective(
     OMPTargetParallelForSimdDirective *Node) {
-  Indent() << "#pragma omp target parallel for simd ";
+  Indent() << "#pragma omp target parallel for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetSimdDirective(OMPTargetSimdDirective *Node) {
-  Indent() << "#pragma omp target simd ";
+  Indent() << "#pragma omp target simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTeamsDistributeDirective(
     OMPTeamsDistributeDirective *Node) {
-  Indent() << "#pragma omp teams distribute ";
+  Indent() << "#pragma omp teams distribute";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTeamsDistributeSimdDirective(
     OMPTeamsDistributeSimdDirective *Node) {
-  Indent() << "#pragma omp teams distribute simd ";
+  Indent() << "#pragma omp teams distribute simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTeamsDistributeParallelForSimdDirective(
     OMPTeamsDistributeParallelForSimdDirective *Node) {
-  Indent() << "#pragma omp teams distribute parallel for simd ";
+  Indent() << "#pragma omp teams distribute parallel for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTeamsDistributeParallelForDirective(
     OMPTeamsDistributeParallelForDirective *Node) {
-  Indent() << "#pragma omp teams distribute parallel for ";
+  Indent() << "#pragma omp teams distribute parallel for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetTeamsDirective(OMPTargetTeamsDirective *Node) {
-  Indent() << "#pragma omp target teams ";
+  Indent() << "#pragma omp target teams";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetTeamsDistributeDirective(
     OMPTargetTeamsDistributeDirective *Node) {
-  Indent() << "#pragma omp target teams distribute ";
+  Indent() << "#pragma omp target teams distribute";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective(
     OMPTargetTeamsDistributeParallelForDirective *Node) {
-  Indent() << "#pragma omp target teams distribute parallel for ";
+  Indent() << "#pragma omp target teams distribute parallel for";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
     OMPTargetTeamsDistributeParallelForSimdDirective *Node) {
-  Indent() << "#pragma omp target teams distribute parallel for simd ";
+  Indent() << "#pragma omp target teams distribute parallel for simd";
   PrintOMPExecutableDirective(Node);
 }
 
 void StmtPrinter::VisitOMPTargetTeamsDistributeSimdDirective(
     OMPTargetTeamsDistributeSimdDirective *Node) {
-  Indent() << "#pragma omp target teams distribute simd ";
+  Indent() << "#pragma omp target teams distribute simd";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -1319,8 +1316,7 @@
     OS << "template ";
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitDependentScopeDeclRefExpr(
@@ -1331,8 +1327,7 @@
     OS << "template ";
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *Node) {
@@ -1342,14 +1337,28 @@
     OS << "template ";
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
+}
+
+static bool isImplicitSelf(const Expr *E) {
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+    if (const ImplicitParamDecl *PD =
+            dyn_cast<ImplicitParamDecl>(DRE->getDecl())) {
+      if (PD->getParameterKind() == ImplicitParamDecl::ObjCSelf &&
+          DRE->getLocStart().isInvalid())
+        return true;
+    }
+  }
+  return false;
 }
 
 void StmtPrinter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) {
   if (Node->getBase()) {
-    PrintExpr(Node->getBase());
-    OS << (Node->isArrow() ? "->" : ".");
+    if (!Policy.SuppressImplicitBase ||
+        !isImplicitSelf(Node->getBase()->IgnoreImpCasts())) {
+      PrintExpr(Node->getBase());
+      OS << (Node->isArrow() ? "->" : ".");
+    }
   }
   OS << *Node->getDecl();
 }
@@ -1670,16 +1679,25 @@
   PrintCallArgs(Call);
   OS << ")";
 }
+
+static bool isImplicitThis(const Expr *E) {
+  if (const auto *TE = dyn_cast<CXXThisExpr>(E))
+    return TE->isImplicit();
+  return false;
+}
+
 void StmtPrinter::VisitMemberExpr(MemberExpr *Node) {
-  // FIXME: Suppress printing implicit bases (like "this")
-  PrintExpr(Node->getBase());
+  if (!Policy.SuppressImplicitBase || !isImplicitThis(Node->getBase())) {
+    PrintExpr(Node->getBase());
 
-  MemberExpr *ParentMember = dyn_cast<MemberExpr>(Node->getBase());
-  FieldDecl  *ParentDecl   = ParentMember
-    ? dyn_cast<FieldDecl>(ParentMember->getMemberDecl()) : nullptr;
+    MemberExpr *ParentMember = dyn_cast<MemberExpr>(Node->getBase());
+    FieldDecl *ParentDecl =
+        ParentMember ? dyn_cast<FieldDecl>(ParentMember->getMemberDecl())
+                     : nullptr;
 
-  if (!ParentDecl || !ParentDecl->isAnonymousStructOrUnion())
-    OS << (Node->isArrow() ? "->" : ".");
+    if (!ParentDecl || !ParentDecl->isAnonymousStructOrUnion())
+      OS << (Node->isArrow() ? "->" : ".");
+  }
 
   if (FieldDecl *FD = dyn_cast<FieldDecl>(Node->getMemberDecl()))
     if (FD->isAnonymousStructOrUnion())
@@ -1691,8 +1709,7 @@
     OS << "template ";
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
 }
 void StmtPrinter::VisitObjCIsaExpr(ObjCIsaExpr *Node) {
   PrintExpr(Node->getBase());
@@ -2085,8 +2102,7 @@
 
     if (Args->size() != 1) {
       OS << "operator\"\"" << Node->getUDSuffix()->getName();
-      TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, Args->asArray(), Policy);
+      printTemplateArgumentList(OS, Args->asArray(), Policy);
       OS << "()";
       return;
     }
@@ -2208,6 +2224,9 @@
                                  CEnd = Node->explicit_capture_end();
        C != CEnd;
        ++C) {
+    if (C->capturesVLAType())
+      continue;
+
     if (NeedComma)
       OS << ", ";
     NeedComma = true;
@@ -2410,8 +2429,7 @@
     OS << "template ";
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) {
@@ -2425,8 +2443,7 @@
     OS << "template ";
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->template_arguments(), Policy);
+    printTemplateArgumentList(OS, Node->template_arguments(), Policy);
 }
 
 static const char *getTypeTraitName(TypeTrait TT) {
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index 9acd79b..a12f4bc 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -38,6 +38,8 @@
 
     void VisitStmt(const Stmt *S);
 
+    virtual void HandleStmtClass(Stmt::StmtClass SC) = 0;
+
 #define STMT(Node, Base) void Visit##Node(const Node *S);
 #include "clang/AST/StmtNodes.inc"
 
@@ -50,7 +52,7 @@
     virtual void VisitType(QualType T) = 0;
 
     /// \brief Visit a name that occurs within an expression or statement.
-    virtual void VisitName(DeclarationName Name) = 0;
+    virtual void VisitName(DeclarationName Name, bool TreatAsDecl = false) = 0;
 
     /// \brief Visit identifiers that are not in Decl's or Type's.
     virtual void VisitIdentifierInfo(IdentifierInfo *II) = 0;
@@ -80,6 +82,10 @@
                              const ASTContext &Context, bool Canonical)
         : StmtProfiler(ID, Canonical), Context(Context) {}
   private:
+    void HandleStmtClass(Stmt::StmtClass SC) override {
+      ID.AddInteger(SC);
+    }
+
     void VisitDecl(const Decl *D) override {
       ID.AddInteger(D ? D->getKind() : 0);
 
@@ -134,7 +140,7 @@
       ID.AddPointer(T.getAsOpaquePtr());
     }
 
-    void VisitName(DeclarationName Name) override {
+    void VisitName(DeclarationName Name, bool /*TreatAsDecl*/) override {
       ID.AddPointer(Name.getAsOpaquePtr());
     }
 
@@ -163,11 +169,26 @@
         : StmtProfiler(ID, false), Hash(Hash) {}
 
   private:
+    void HandleStmtClass(Stmt::StmtClass SC) override {
+      if (SC == Stmt::UnresolvedLookupExprClass) {
+        // Pretend that the name looked up is a Decl due to how templates
+        // handle some Decl lookups.
+        ID.AddInteger(Stmt::DeclRefExprClass);
+      } else {
+        ID.AddInteger(SC);
+      }
+    }
+
     void VisitType(QualType T) override {
       Hash.AddQualType(T);
     }
 
-    void VisitName(DeclarationName Name) override {
+    void VisitName(DeclarationName Name, bool TreatAsDecl) override {
+      if (TreatAsDecl) {
+        // A Decl can be null, so each Decl is preceded by a boolean to
+        // store its nullness.  Add a boolean here to match.
+        ID.AddBoolean(true);
+      }
       Hash.AddDeclarationName(Name);
     }
     void VisitIdentifierInfo(IdentifierInfo *II) override {
@@ -196,7 +217,9 @@
 
 void StmtProfiler::VisitStmt(const Stmt *S) {
   assert(S && "Requires non-null Stmt pointer");
-  ID.AddInteger(S->getStmtClass());
+
+  HandleStmtClass(S->getStmtClass());
+
   for (const Stmt *SubStmt : S->children()) {
     if (SubStmt)
       Visit(SubStmt);
@@ -966,8 +989,11 @@
   if (!Canonical)
     VisitNestedNameSpecifier(S->getQualifier());
   VisitDecl(S->getDecl());
-  if (!Canonical)
-    VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs());
+  if (!Canonical) {
+    ID.AddBoolean(S->hasExplicitTemplateArgs());
+    if (S->hasExplicitTemplateArgs())
+      VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs());
+  }
 }
 
 void StmtProfiler::VisitPredefinedExpr(const PredefinedExpr *S) {
@@ -1384,6 +1410,10 @@
   case OO_GreaterEqual:
     BinaryOp = BO_GE;
     return Stmt::BinaryOperatorClass;
+
+  case OO_Spaceship:
+    // FIXME: Update this once we support <=> expressions.
+    llvm_unreachable("<=> expressions not supported yet");
       
   case OO_AmpAmp:
     BinaryOp = BO_LAnd;
@@ -1590,6 +1620,9 @@
   for (LambdaExpr::capture_iterator C = S->explicit_capture_begin(),
                                  CEnd = S->explicit_capture_end();
        C != CEnd; ++C) {
+    if (C->capturesVLAType())
+      continue;
+
     ID.AddInteger(C->getCaptureKind());
     switch (C->getCaptureKind()) {
     case LCK_StarThis:
@@ -1652,7 +1685,7 @@
 void StmtProfiler::VisitOverloadExpr(const OverloadExpr *S) {
   VisitExpr(S);
   VisitNestedNameSpecifier(S->getQualifier());
-  VisitName(S->getName());
+  VisitName(S->getName(), /*TreatAsDecl*/ true);
   ID.AddBoolean(S->hasExplicitTemplateArgs());
   if (S->hasExplicitTemplateArgs())
     VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs());
diff --git a/lib/AST/TemplateBase.cpp b/lib/AST/TemplateBase.cpp
index e4998c3..7ed6bf1 100644
--- a/lib/AST/TemplateBase.cpp
+++ b/lib/AST/TemplateBase.cpp
@@ -1,4 +1,4 @@
-//===--- TemplateBase.cpp - Common template AST class implementation ------===//
+//===- TemplateBase.cpp - Common template AST class implementation --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,17 +14,32 @@
 
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
 
 using namespace clang;
 
@@ -37,7 +52,7 @@
 /// \param Policy the printing policy for EnumConstantDecl printing.
 static void printIntegral(const TemplateArgument &TemplArg,
                           raw_ostream &Out, const PrintingPolicy& Policy) {
-  const ::clang::Type *T = TemplArg.getIntegralType().getTypePtr();
+  const Type *T = TemplArg.getIntegralType().getTypePtr();
   const llvm::APSInt &Val = TemplArg.getAsIntegral();
 
   if (const EnumType *ET = T->getAs<EnumType>()) {
@@ -391,7 +406,7 @@
   }
     
   case Declaration: {
-    NamedDecl *ND = cast<NamedDecl>(getAsDecl());
+    NamedDecl *ND = getAsDecl();
     Out << '&';
     if (ND->getDeclName()) {
       // FIXME: distinguish between pointer and reference args?
@@ -415,10 +430,9 @@
     Out << "...";
     break;
       
-  case Integral: {
+  case Integral:
     printIntegral(*this, Out, Policy);
     break;
-  }
     
   case Expression:
     getAsExpr()->printPretty(Out, nullptr, Policy);
diff --git a/lib/AST/TemplateName.cpp b/lib/AST/TemplateName.cpp
index 9dbe827..548468e 100644
--- a/lib/AST/TemplateName.cpp
+++ b/lib/AST/TemplateName.cpp
@@ -1,4 +1,4 @@
-//===--- TemplateName.cpp - C++ Template Name Representation---------------===//
+//===- TemplateName.cpp - C++ Template Name Representation ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,15 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/TemplateName.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+
 using namespace clang;
-using namespace llvm;
 
 TemplateArgument 
 SubstTemplateTemplateParmPackStorage::getArgumentPack() const {
@@ -176,6 +185,11 @@
 }
 
 bool TemplateName::containsUnexpandedParameterPack() const {
+  if (QualifiedTemplateName *QTN = getAsQualifiedTemplateName()) {
+    if (QTN->getQualifier()->containsUnexpandedParameterPack())
+      return true;
+  }
+
   if (TemplateDecl *Template = getAsTemplateDecl()) {
     if (TemplateTemplateParmDecl *TTP 
                                   = dyn_cast<TemplateTemplateParmDecl>(Template))
@@ -226,7 +240,7 @@
 const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB,
                                            TemplateName N) {
   std::string NameStr;
-  raw_string_ostream OS(NameStr);
+  llvm::raw_string_ostream OS(NameStr);
   LangOptions LO;
   LO.CPlusPlus = true;
   LO.Bool = true;
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index cc5a00b..da7fdb0 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -1,4 +1,4 @@
-//===--- Type.cpp - Type representation and manipulation ------------------===//
+//===- Type.cpp - Type representation and manipulation --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,17 +16,40 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/CharUnits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/TypeVisitor.h"
+#include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/ExceptionSpecificationType.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Linkage.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/Visibility.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
 using namespace clang;
 
 bool Qualifiers::isStrictSupersetOf(Qualifiers Other) const {
@@ -126,12 +149,10 @@
                                                  Expr *e, ArraySizeModifier sm,
                                                  unsigned tq,
                                                  SourceRange brackets)
-    : ArrayType(DependentSizedArray, et, can, sm, tq, 
+    : ArrayType(DependentSizedArray, et, can, sm, tq,
                 (et->containsUnexpandedParameterPack() ||
                  (e && e->containsUnexpandedParameterPack()))),
-      Context(Context), SizeExpr((Stmt*) e), Brackets(brackets) 
-{
-}
+      Context(Context), SizeExpr((Stmt*) e), Brackets(brackets) {}
 
 void DependentSizedArrayType::Profile(llvm::FoldingSetNodeID &ID,
                                       const ASTContext &Context,
@@ -153,13 +174,11 @@
                                                          SourceLocation loc)
     : Type(DependentSizedExtVector, can, /*Dependent=*/true,
            /*InstantiationDependent=*/true,
-           ElementType->isVariablyModifiedType(), 
+           ElementType->isVariablyModifiedType(),
            (ElementType->containsUnexpandedParameterPack() ||
             (SizeExpr && SizeExpr->containsUnexpandedParameterPack()))),
       Context(Context), SizeExpr(SizeExpr), ElementType(ElementType),
-      loc(loc) 
-{
-}
+      loc(loc) {}
 
 void
 DependentSizedExtVectorType::Profile(llvm::FoldingSetNodeID &ID,
@@ -195,12 +214,11 @@
 
 VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements,
                        QualType canonType, VectorKind vecKind)
-  : Type(tc, canonType, vecType->isDependentType(),
-         vecType->isInstantiationDependentType(),
-         vecType->isVariablyModifiedType(),
-         vecType->containsUnexpandedParameterPack()), 
-    ElementType(vecType) 
-{
+    : Type(tc, canonType, vecType->isDependentType(),
+           vecType->isInstantiationDependentType(),
+           vecType->isVariablyModifiedType(),
+           vecType->containsUnexpandedParameterPack()), 
+      ElementType(vecType) {
   VectorTypeBits.VecKind = vecKind;
   VectorTypeBits.NumElements = nElements;
 }
@@ -375,11 +393,13 @@
     }
   }
 }
+
 bool Type::isClassType() const {
   if (const RecordType *RT = getAs<RecordType>())
     return RT->getDecl()->isClass();
   return false;
 }
+
 bool Type::isStructureType() const {
   if (const RecordType *RT = getAs<RecordType>())
     return RT->getDecl()->isStruct();
@@ -402,6 +422,7 @@
   }
   return false;
 }
+
 bool Type::isVoidPointerType() const {
   if (const PointerType *PT = getAs<PointerType>())
     return PT->getPointeeType()->isVoidType();
@@ -555,12 +576,11 @@
 ObjCTypeParamType::ObjCTypeParamType(const ObjCTypeParamDecl *D,
                                      QualType can,
                                      ArrayRef<ObjCProtocolDecl *> protocols)
-  : Type(ObjCTypeParam, can, can->isDependentType(),
-         can->isInstantiationDependentType(),
-         can->isVariablyModifiedType(),
-         /*ContainsUnexpandedParameterPack=*/false),
-    OTPDecl(const_cast<ObjCTypeParamDecl*>(D))
-{
+    : Type(ObjCTypeParam, can, can->isDependentType(),
+           can->isInstantiationDependentType(),
+           can->isVariablyModifiedType(),
+           /*ContainsUnexpandedParameterPack=*/false),
+      OTPDecl(const_cast<ObjCTypeParamDecl*>(D)) {
   initialize(protocols);
 }
 
@@ -568,12 +588,11 @@
                                ArrayRef<QualType> typeArgs,
                                ArrayRef<ObjCProtocolDecl *> protocols,
                                bool isKindOf)
-  : Type(ObjCObject, Canonical, Base->isDependentType(), 
-         Base->isInstantiationDependentType(), 
-         Base->isVariablyModifiedType(), 
-         Base->containsUnexpandedParameterPack()),
-    BaseType(Base) 
-{
+    : Type(ObjCObject, Canonical, Base->isDependentType(), 
+           Base->isInstantiationDependentType(), 
+           Base->isVariablyModifiedType(), 
+           Base->containsUnexpandedParameterPack()),
+      BaseType(Base) {
   ObjCObjectTypeBits.IsKindOf = isKindOf;
 
   ObjCObjectTypeBits.NumTypeArgs = typeArgs.size();
@@ -624,13 +643,13 @@
   if (auto objcObject = getBaseType()->getAs<ObjCObjectType>()) {
     // Terminate when we reach an interface type.
     if (isa<ObjCInterfaceType>(objcObject))
-      return { };
+      return {};
 
     return objcObject->getTypeArgs();
   }
 
   // No type arguments.
-  return { };
+  return {};
 }
 
 bool ObjCObjectType::isKindOfType() const {
@@ -666,7 +685,7 @@
   return ctx.getObjCObjectType(ctx.getQualifiedType(baseType,
                                                     splitBaseType.Quals),
                                getTypeArgsAsWritten(),
-                               /*protocols=*/{ },
+                               /*protocols=*/{},
                                /*isKindOf=*/false);
 }
 
@@ -679,10 +698,10 @@
   return ctx.getObjCObjectPointerType(obj)->castAs<ObjCObjectPointerType>();
 }
 
-namespace {
-
 template<typename F>
-QualType simpleTransform(ASTContext &ctx, QualType type, F &&f);
+static QualType simpleTransform(ASTContext &ctx, QualType type, F &&f);
+
+namespace {
 
 /// Visitor used by simpleTransform() to perform the transformation.
 template<typename F>
@@ -696,7 +715,8 @@
   }
 
 public:
-  SimpleTransformVisitor(ASTContext &ctx, F &&f) : Ctx(ctx), TheFunc(std::move(f)) { }
+  SimpleTransformVisitor(ASTContext &ctx, F &&f)
+      : Ctx(ctx), TheFunc(std::move(f)) {}
 
   // None of the clients of this transformation can occur where
   // there are dependent types, so skip dependent types.
@@ -1065,10 +1085,12 @@
 #undef TRIVIAL_TYPE_CLASS
 };
 
+} // namespace
+
 /// Perform a simple type transformation that does not change the
 /// semantics of the type.
 template<typename F>
-QualType simpleTransform(ASTContext &ctx, QualType type, F &&f) {
+static QualType simpleTransform(ASTContext &ctx, QualType type, F &&f) {
   // Transform the type. If it changed, return the transformed result.
   QualType transformed = f(type);
   if (transformed.getAsOpaquePtr() != type.getAsOpaquePtr())
@@ -1088,8 +1110,6 @@
   return ctx.getQualifiedType(result, splitType.Quals);
 }
 
-} // end anonymous namespace
-
 /// Substitute the given type arguments for Objective-C type
 /// parameters within the given type, recursively.
 QualType QualType::substObjCTypeArgs(
@@ -1103,57 +1123,56 @@
     // Replace an Objective-C type parameter reference with the corresponding
     // type argument.
     if (const auto *OTPTy = dyn_cast<ObjCTypeParamType>(splitType.Ty)) {
-      if (auto *typeParam = dyn_cast<ObjCTypeParamDecl>(OTPTy->getDecl())) {
-        // If we have type arguments, use them.
-        if (!typeArgs.empty()) {
-          QualType argType = typeArgs[typeParam->getIndex()];
-          if (OTPTy->qual_empty())
-            return ctx.getQualifiedType(argType, splitType.Quals);
+      ObjCTypeParamDecl *typeParam = OTPTy->getDecl();
+      // If we have type arguments, use them.
+      if (!typeArgs.empty()) {
+        QualType argType = typeArgs[typeParam->getIndex()];
+        if (OTPTy->qual_empty())
+          return ctx.getQualifiedType(argType, splitType.Quals);
 
-          // Apply protocol lists if exists.
-          bool hasError;
-          SmallVector<ObjCProtocolDecl*, 8> protocolsVec;
-          protocolsVec.append(OTPTy->qual_begin(),
-                              OTPTy->qual_end());
-          ArrayRef<ObjCProtocolDecl *> protocolsToApply = protocolsVec;
-          QualType resultTy = ctx.applyObjCProtocolQualifiers(argType,
-              protocolsToApply, hasError, true/*allowOnPointerType*/);
+        // Apply protocol lists if exists.
+        bool hasError;
+        SmallVector<ObjCProtocolDecl*, 8> protocolsVec;
+        protocolsVec.append(OTPTy->qual_begin(),
+                            OTPTy->qual_end());
+        ArrayRef<ObjCProtocolDecl *> protocolsToApply = protocolsVec;
+        QualType resultTy = ctx.applyObjCProtocolQualifiers(argType,
+            protocolsToApply, hasError, true/*allowOnPointerType*/);
 
-          return ctx.getQualifiedType(resultTy, splitType.Quals);
-        }
+        return ctx.getQualifiedType(resultTy, splitType.Quals);
+      }
 
-        switch (context) {
-        case ObjCSubstitutionContext::Ordinary:
-        case ObjCSubstitutionContext::Parameter:
-        case ObjCSubstitutionContext::Superclass:
-          // Substitute the bound.
+      switch (context) {
+      case ObjCSubstitutionContext::Ordinary:
+      case ObjCSubstitutionContext::Parameter:
+      case ObjCSubstitutionContext::Superclass:
+        // Substitute the bound.
+        return ctx.getQualifiedType(typeParam->getUnderlyingType(),
+                                    splitType.Quals);
+
+      case ObjCSubstitutionContext::Result:
+      case ObjCSubstitutionContext::Property: {
+        // Substitute the __kindof form of the underlying type.
+        const auto *objPtr = typeParam->getUnderlyingType()
+          ->castAs<ObjCObjectPointerType>();
+
+        // __kindof types, id, and Class don't need an additional
+        // __kindof.
+        if (objPtr->isKindOfType() || objPtr->isObjCIdOrClassType())
           return ctx.getQualifiedType(typeParam->getUnderlyingType(),
                                       splitType.Quals);
 
-        case ObjCSubstitutionContext::Result:
-        case ObjCSubstitutionContext::Property: {
-          // Substitute the __kindof form of the underlying type.
-          const auto *objPtr = typeParam->getUnderlyingType()
-            ->castAs<ObjCObjectPointerType>();
+        // Add __kindof.
+        const auto *obj = objPtr->getObjectType();
+        QualType resultTy = ctx.getObjCObjectType(obj->getBaseType(),
+                                                  obj->getTypeArgsAsWritten(),
+                                                  obj->getProtocols(),
+                                                  /*isKindOf=*/true);
 
-          // __kindof types, id, and Class don't need an additional
-          // __kindof.
-          if (objPtr->isKindOfType() || objPtr->isObjCIdOrClassType())
-            return ctx.getQualifiedType(typeParam->getUnderlyingType(),
-                                        splitType.Quals);
-
-          // Add __kindof.
-          const auto *obj = objPtr->getObjectType();
-          QualType resultTy = ctx.getObjCObjectType(obj->getBaseType(),
-                                                    obj->getTypeArgsAsWritten(),
-                                                    obj->getProtocols(),
-                                                    /*isKindOf=*/true);
-
-          // Rebuild object pointer type.
-          resultTy = ctx.getObjCObjectPointerType(resultTy);
-          return ctx.getQualifiedType(resultTy, splitType.Quals);
-        }
-        }
+        // Rebuild object pointer type.
+        resultTy = ctx.getObjCObjectPointerType(resultTy);
+        return ctx.getQualifiedType(resultTy, splitType.Quals);
+      }
       }
     }
 
@@ -1254,7 +1273,7 @@
             if (typeArgs.empty() &&
                 context != ObjCSubstitutionContext::Superclass) {
               return ctx.getObjCObjectType(
-                       objcObjectType->getBaseType(), { },
+                       objcObjectType->getBaseType(), {},
                        protocols,
                        objcObjectType->isKindOfTypeAsWritten());
             }
@@ -1364,7 +1383,7 @@
     objectType = objectPointerType->getObjectType();
   } else if (getAs<BlockPointerType>()) {
     ASTContext &ctx = dc->getParentASTContext();
-    objectType = ctx.getObjCObjectType(ctx.ObjCBuiltinIdTy, { }, { })
+    objectType = ctx.getObjCObjectType(ctx.ObjCBuiltinIdTy, {}, {})
                    ->castAs<ObjCObjectType>();
   } else {
     objectType = getAs<ObjCObjectType>();
@@ -1543,6 +1562,7 @@
   }
   return nullptr;
 }
+
 const ObjCObjectPointerType *Type::getAsObjCInterfacePointerType() const {
   if (const ObjCObjectPointerType *OPT = getAs<ObjCObjectPointerType>()) {
     if (OPT->getInterfaceType())
@@ -1572,7 +1592,7 @@
 
 TagDecl *Type::getAsTagDecl() const {
   if (const auto *TT = getAs<TagType>())
-    return cast<TagDecl>(TT->getDecl());
+    return TT->getDecl();
   if (const auto *Injected = getAs<InjectedClassNameType>())
     return Injected->getDecl();
 
@@ -1580,14 +1600,17 @@
 }
 
 namespace {
+
   class GetContainedDeducedTypeVisitor :
     public TypeVisitor<GetContainedDeducedTypeVisitor, Type*> {
     bool Syntactic;
+
   public:
     GetContainedDeducedTypeVisitor(bool Syntactic = false)
         : Syntactic(Syntactic) {}
 
     using TypeVisitor<GetContainedDeducedTypeVisitor, Type*>::Visit;
+
     Type *Visit(QualType T) {
       if (T.isNull())
         return nullptr;
@@ -1600,50 +1623,64 @@
     }
 
     // Only these types can contain the desired 'auto' type.
+
     Type *VisitElaboratedType(const ElaboratedType *T) {
       return Visit(T->getNamedType());
     }
+
     Type *VisitPointerType(const PointerType *T) {
       return Visit(T->getPointeeType());
     }
+
     Type *VisitBlockPointerType(const BlockPointerType *T) {
       return Visit(T->getPointeeType());
     }
+
     Type *VisitReferenceType(const ReferenceType *T) {
       return Visit(T->getPointeeTypeAsWritten());
     }
+
     Type *VisitMemberPointerType(const MemberPointerType *T) {
       return Visit(T->getPointeeType());
     }
+
     Type *VisitArrayType(const ArrayType *T) {
       return Visit(T->getElementType());
     }
+
     Type *VisitDependentSizedExtVectorType(
       const DependentSizedExtVectorType *T) {
       return Visit(T->getElementType());
     }
+
     Type *VisitVectorType(const VectorType *T) {
       return Visit(T->getElementType());
     }
+
     Type *VisitFunctionProtoType(const FunctionProtoType *T) {
       if (Syntactic && T->hasTrailingReturn())
         return const_cast<FunctionProtoType*>(T);
       return VisitFunctionType(T);
     }
+
     Type *VisitFunctionType(const FunctionType *T) {
       return Visit(T->getReturnType());
     }
+
     Type *VisitParenType(const ParenType *T) {
       return Visit(T->getInnerType());
     }
+
     Type *VisitAttributedType(const AttributedType *T) {
       return Visit(T->getModifiedType());
     }
+
     Type *VisitAdjustedType(const AdjustedType *T) {
       return Visit(T->getOriginalType());
     }
   };
-}
+
+} // namespace
 
 DeducedType *Type::getContainedDeducedType() const {
   return cast_or_null<DeducedType>(
@@ -1694,7 +1731,6 @@
   return false;
 }
 
-
 bool Type::isIntegralOrUnscopedEnumerationType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
@@ -1710,8 +1746,6 @@
   return false;
 }
 
-
-
 bool Type::isCharType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() == BuiltinType::Char_U ||
@@ -1962,12 +1996,7 @@
     EnumDecl *EnumD = cast<EnumType>(CanonicalType)->getDecl();
     if (Def)
       *Def = EnumD;
-    
-    // An enumeration with fixed underlying type is complete (C++0x 7.2p3).
-    if (EnumD->isFixed())
-      return false;
-    
-    return !EnumD->isCompleteDefinition();
+    return !EnumD->isComplete();
   }
   case Record: {
     // A tagged type (struct/union/enum/class) is incomplete if the decl is a
@@ -2036,7 +2065,7 @@
   // We return false for that case. Except for incomplete arrays of PODs, which
   // are PODs according to the standard.
   if (isNull())
-    return 0;
+    return false;
   
   if ((*this)->isIncompleteArrayType())
     return Context.getBaseElementType(*this).isCXX98PODType(Context);
@@ -2084,7 +2113,7 @@
   // We return false for that case. Except for incomplete arrays of PODs, which
   // are PODs according to the standard.
   if (isNull())
-    return 0;
+    return false;
   
   if ((*this)->isArrayType())
     return Context.getBaseElementType(*this).isTrivialType(Context);
@@ -2166,12 +2195,50 @@
   return false;
 }
 
+bool QualType::hasTrivialABIOverride() const {
+  if (const auto *RD = getTypePtr()->getAsCXXRecordDecl())
+    return RD->hasTrivialABIOverride();
+  return false;
+}
+
 bool QualType::isNonWeakInMRRWithObjCWeak(const ASTContext &Context) const {
   return !Context.getLangOpts().ObjCAutoRefCount &&
          Context.getLangOpts().ObjCWeak &&
          getObjCLifetime() != Qualifiers::OCL_Weak;
 }
 
+QualType::PrimitiveDefaultInitializeKind
+QualType::isNonTrivialToPrimitiveDefaultInitialize() const {
+  if (const auto *RT =
+          getTypePtr()->getBaseElementTypeUnsafe()->getAs<RecordType>())
+    if (RT->getDecl()->isNonTrivialToPrimitiveDefaultInitialize())
+      return PDIK_Struct;
+
+  Qualifiers::ObjCLifetime Lifetime = getQualifiers().getObjCLifetime();
+  if (Lifetime == Qualifiers::OCL_Strong)
+    return PDIK_ARCStrong;
+
+  return PDIK_Trivial;
+}
+
+QualType::PrimitiveCopyKind QualType::isNonTrivialToPrimitiveCopy() const {
+  if (const auto *RT =
+          getTypePtr()->getBaseElementTypeUnsafe()->getAs<RecordType>())
+    if (RT->getDecl()->isNonTrivialToPrimitiveCopy())
+      return PCK_Struct;
+
+  Qualifiers Qs = getQualifiers();
+  if (Qs.getObjCLifetime() == Qualifiers::OCL_Strong)
+    return PCK_ARCStrong;
+
+  return Qs.hasVolatile() ? PCK_VolatileTrivial : PCK_Trivial;
+}
+
+QualType::PrimitiveCopyKind
+QualType::isNonTrivialToPrimitiveDestructiveMove() const {
+  return isNonTrivialToPrimitiveCopy();
+}
+
 bool Type::isLiteralType(const ASTContext &Ctx) const {
   if (isDependentType())
     return false;
@@ -2858,7 +2925,6 @@
                                 const QualType *ArgTys, unsigned NumParams,
                                 const ExtProtoInfo &epi,
                                 const ASTContext &Context, bool Canonical) {
-
   // We have to be careful not to get ambiguous profile encodings.
   // Note that valid type pointers are never ambiguous with anything else.
   //
@@ -2920,12 +2986,11 @@
 }
 
 TypeOfExprType::TypeOfExprType(Expr *E, QualType can)
-  : Type(TypeOfExpr, can, E->isTypeDependent(), 
-         E->isInstantiationDependent(),
-         E->getType()->isVariablyModifiedType(),
-         E->containsUnexpandedParameterPack()), 
-    TOExpr(E) {
-}
+    : Type(TypeOfExpr, can, E->isTypeDependent(),
+           E->isInstantiationDependent(),
+           E->getType()->isVariablyModifiedType(),
+           E->containsUnexpandedParameterPack()),
+      TOExpr(E) {}
 
 bool TypeOfExprType::isSugared() const {
   return !TOExpr->isTypeDependent();
@@ -2947,13 +3012,11 @@
   // C++11 [temp.type]p2: "If an expression e involves a template parameter,
   // decltype(e) denotes a unique dependent type." Hence a decltype type is
   // type-dependent even if its expression is only instantiation-dependent.
-  : Type(Decltype, can, E->isInstantiationDependent(),
-         E->isInstantiationDependent(),
-         E->getType()->isVariablyModifiedType(), 
-         E->containsUnexpandedParameterPack()), 
-    E(E),
-  UnderlyingType(underlyingType) {
-}
+    : Type(Decltype, can, E->isInstantiationDependent(),
+           E->isInstantiationDependent(),
+           E->getType()->isVariablyModifiedType(),
+           E->containsUnexpandedParameterPack()),
+      E(E), UnderlyingType(underlyingType) {}
 
 bool DecltypeType::isSugared() const { return !E->isInstantiationDependent(); }
 
@@ -2965,7 +3028,7 @@
 }
 
 DependentDecltypeType::DependentDecltypeType(const ASTContext &Context, Expr *E)
-  : DecltypeType(E, Context.DependentTy), Context(Context) { }
+    : DecltypeType(E, Context.DependentTy), Context(Context) {}
 
 void DependentDecltypeType::Profile(llvm::FoldingSetNodeID &ID,
                                     const ASTContext &Context, Expr *E) {
@@ -2976,26 +3039,23 @@
                                        QualType UnderlyingType,
                                        UTTKind UKind,
                                        QualType CanonicalType)
-  : Type(UnaryTransform, CanonicalType, BaseType->isDependentType(),
-         BaseType->isInstantiationDependentType(),
-         BaseType->isVariablyModifiedType(),
-         BaseType->containsUnexpandedParameterPack())
-  , BaseType(BaseType), UnderlyingType(UnderlyingType), UKind(UKind)
-{}
+    : Type(UnaryTransform, CanonicalType, BaseType->isDependentType(),
+           BaseType->isInstantiationDependentType(),
+           BaseType->isVariablyModifiedType(),
+           BaseType->containsUnexpandedParameterPack()),
+      BaseType(BaseType), UnderlyingType(UnderlyingType), UKind(UKind) {}
 
 DependentUnaryTransformType::DependentUnaryTransformType(const ASTContext &C,
                                                          QualType BaseType,
                                                          UTTKind UKind)
-   : UnaryTransformType(BaseType, C.DependentTy, UKind, QualType())
-{}
-
+     : UnaryTransformType(BaseType, C.DependentTy, UKind, QualType()) {}
 
 TagType::TagType(TypeClass TC, const TagDecl *D, QualType can)
-  : Type(TC, can, D->isDependentType(), 
-         /*InstantiationDependent=*/D->isDependentType(),
-         /*VariablyModified=*/false, 
-         /*ContainsUnexpandedParameterPack=*/false),
-    decl(const_cast<TagDecl*>(D)) {}
+    : Type(TC, can, D->isDependentType(),
+           /*InstantiationDependent=*/D->isDependentType(),
+           /*VariablyModified=*/false,
+           /*ContainsUnexpandedParameterPack=*/false),
+      decl(const_cast<TagDecl*>(D)) {}
 
 static TagDecl *getInterestingTagDecl(TagDecl *decl) {
   for (auto I : decl->redecls()) {
@@ -3140,11 +3200,9 @@
 SubstTemplateTypeParmPackType(const TemplateTypeParmType *Param, 
                               QualType Canon,
                               const TemplateArgument &ArgPack)
-  : Type(SubstTemplateTypeParmPack, Canon, true, true, false, true), 
-    Replaced(Param), 
-    Arguments(ArgPack.pack_begin()), NumArguments(ArgPack.pack_size()) 
-{ 
-}
+    : Type(SubstTemplateTypeParmPack, Canon, true, true, false, true),
+      Replaced(Param),
+      Arguments(ArgPack.pack_begin()), NumArguments(ArgPack.pack_size()) {}
 
 TemplateArgument SubstTemplateTypeParmPackType::getArgumentPack() const {
   return TemplateArgument(llvm::makeArrayRef(Arguments, NumArguments));
@@ -3311,11 +3369,13 @@
                          L.hasLocalOrUnnamedType() | R.hasLocalOrUnnamedType());
   }
 };
-}
+
+} // namespace
 
 static CachedProperties computeCachedProperties(const Type *T);
 
 namespace clang {
+
 /// The type-property cache.  This is templated so as to be
 /// instantiated at an internal type to prevent unnecessary symbol
 /// leakage.
@@ -3353,13 +3413,19 @@
     T->TypeBits.CachedLocalOrUnnamed = Result.hasLocalOrUnnamedType();
   }
 };
-}
+
+} // namespace clang
 
 // Instantiate the friend template at a private class.  In a
 // reasonable implementation, these symbols will be internal.
 // It is terrible that this is the best way to accomplish this.
-namespace { class Private {}; }
-typedef TypePropertyCache<Private> Cache;
+namespace {
+
+class Private {};
+
+} // namespace
+
+using Cache = TypePropertyCache<Private>;
 
 static CachedProperties computeCachedProperties(const Type *T) {
   switch (T->getTypeClass()) {
@@ -3786,11 +3852,13 @@
     return typedefType->getDecl()->hasAttr<ObjCIndependentClassAttr>();
   return false;
 }
+
 bool Type::isObjCRetainableType() const {
   return isObjCObjectPointerType() ||
          isBlockPointerType() ||
          isObjCNSObjectType();
 }
+
 bool Type::isObjCIndirectLifetimeType() const {
   if (isObjCLifetimeType())
     return true;
@@ -3859,12 +3927,20 @@
     return DK_objc_weak_lifetime;
   }
 
-  /// Currently, the only destruction kind we recognize is C++ objects
-  /// with non-trivial destructors.
-  const CXXRecordDecl *record =
-    type->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
-  if (record && record->hasDefinition() && !record->hasTrivialDestructor())
-    return DK_cxx_destructor;
+  if (const auto *RT =
+          type->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+      /// Check if this is a C++ object with a non-trivial destructor.
+      if (CXXRD->hasDefinition() && !CXXRD->hasTrivialDestructor())
+        return DK_cxx_destructor;
+    } else {
+      /// Check if this is a C struct that is non-trivial to destroy or an array
+      /// that contains such a struct.
+      if (RD->isNonTrivialToPrimitiveDestroy())
+        return DK_nontrivial_c_struct;
+    }
+  }
 
   return DK_none;
 }
diff --git a/lib/AST/TypeLoc.cpp b/lib/AST/TypeLoc.cpp
index 9ff437b..0ac50b3 100644
--- a/lib/AST/TypeLoc.cpp
+++ b/lib/AST/TypeLoc.cpp
@@ -1,4 +1,4 @@
-//===--- TypeLoc.cpp - Type Source Info Wrapper -----------------*- C++ -*-===//
+//===- TypeLoc.cpp - Type Source Info Wrapper -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,8 +14,19 @@
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/TypeLocVisitor.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
 using namespace clang;
 
 static const unsigned TypeLocMaxDataAlign = alignof(void *);
@@ -25,16 +36,18 @@
 //===----------------------------------------------------------------------===//
 
 namespace {
-  class TypeLocRanger : public TypeLocVisitor<TypeLocRanger, SourceRange> {
-  public:
+
+class TypeLocRanger : public TypeLocVisitor<TypeLocRanger, SourceRange> {
+public:
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
-    SourceRange Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return TyLoc.getLocalSourceRange(); \
-    }
+  SourceRange Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+    return TyLoc.getLocalSourceRange(); \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
+
+} // namespace
 
 SourceRange TypeLoc::getLocalSourceRangeImpl(TypeLoc TL) {
   if (TL.isNull()) return SourceRange();
@@ -42,16 +55,18 @@
 }
 
 namespace {
-  class TypeAligner : public TypeLocVisitor<TypeAligner, unsigned> {
-  public:
+
+class TypeAligner : public TypeLocVisitor<TypeAligner, unsigned> {
+public:
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
-    unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return TyLoc.getLocalDataAlignment(); \
-    }
+  unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+    return TyLoc.getLocalDataAlignment(); \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
+
+} // namespace
 
 /// \brief Returns the alignment of the type source info data block.
 unsigned TypeLoc::getLocalAlignmentForType(QualType Ty) {
@@ -60,16 +75,18 @@
 }
 
 namespace {
-  class TypeSizer : public TypeLocVisitor<TypeSizer, unsigned> {
-  public:
+
+class TypeSizer : public TypeLocVisitor<TypeSizer, unsigned> {
+public:
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
-    unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return TyLoc.getLocalDataSize(); \
-    }
+  unsigned Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+    return TyLoc.getLocalDataSize(); \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
+
+} // namespace
 
 /// \brief Returns the size of the type source info data block.
 unsigned TypeLoc::getFullDataSizeForType(QualType Ty) {
@@ -88,16 +105,18 @@
 }
 
 namespace {
-  class NextLoc : public TypeLocVisitor<NextLoc, TypeLoc> {
-  public:
+
+class NextLoc : public TypeLocVisitor<NextLoc, TypeLoc> {
+public:
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
-    TypeLoc Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return TyLoc.getNextTypeLoc(); \
-    }
+  TypeLoc Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+    return TyLoc.getNextTypeLoc(); \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
+
+} // namespace
 
 /// \brief Get the next TypeLoc pointed by this TypeLoc, e.g for "int*" the
 /// TypeLoc is a PointerLoc and next TypeLoc is for "int".
@@ -127,20 +146,22 @@
 }
 
 namespace {
-  class TypeLocCopier : public TypeLocVisitor<TypeLocCopier> {
-    TypeLoc Source;
-  public:
-    TypeLocCopier(TypeLoc source) : Source(source) { }
+
+class TypeLocCopier : public TypeLocVisitor<TypeLocCopier> {
+  TypeLoc Source;
+
+public:
+  TypeLocCopier(TypeLoc source) : Source(source) {}
 
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT)                          \
-    void Visit##CLASS##TypeLoc(CLASS##TypeLoc dest) {   \
-      dest.copyLocal(Source.castAs<CLASS##TypeLoc>());  \
-    }
+  void Visit##CLASS##TypeLoc(CLASS##TypeLoc dest) {   \
+    dest.copyLocal(Source.castAs<CLASS##TypeLoc>());  \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
 
+} // namespace
 
 void TypeLoc::copy(TypeLoc other) {
   assert(getFullDataSize() == other.getFullDataSize());
@@ -180,7 +201,7 @@
         LeftMost = Cur;
         break;
       }
-      /* Fall through */
+      LLVM_FALLTHROUGH;
     case FunctionNoProto:
     case ConstantArray:
     case DependentSizedArray:
@@ -243,22 +264,22 @@
   }
 }
 
-
 namespace {
-  struct TSTChecker : public TypeLocVisitor<TSTChecker, bool> {
-    // Overload resolution does the real work for us.
-    static bool isTypeSpec(TypeSpecTypeLoc _) { return true; }
-    static bool isTypeSpec(TypeLoc _) { return false; }
+
+struct TSTChecker : public TypeLocVisitor<TSTChecker, bool> {
+  // Overload resolution does the real work for us.
+  static bool isTypeSpec(TypeSpecTypeLoc _) { return true; }
+  static bool isTypeSpec(TypeLoc _) { return false; }
 
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
-    bool Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
-      return isTypeSpec(TyLoc); \
-    }
+  bool Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc) { \
+    return isTypeSpec(TyLoc); \
+  }
 #include "clang/AST/TypeLocNodes.def"
-  };
-}
+};
 
+} // namespace
 
 /// \brief Determines if the given type loc corresponds to a
 /// TypeSpecTypeLoc.  Since there is not actually a TypeSpecType in
@@ -364,7 +385,7 @@
       return attributedLoc.getAttrNameLoc();
   }
 
-  return SourceLocation();
+  return {};
 }
 
 TypeLoc TypeLoc::findExplicitQualifierLoc() const {
@@ -385,7 +406,7 @@
     return atomic;
   }
 
-  return TypeLoc();
+  return {};
 }
 
 void ObjCTypeParamTypeLoc::initializeLocal(ASTContext &Context,
@@ -423,6 +444,15 @@
       getUnderlyingType(), Loc);
 }
 
+void UnaryTransformTypeLoc::initializeLocal(ASTContext &Context,
+                                       SourceLocation Loc) {
+    setKWLoc(Loc);
+    setRParenLoc(Loc);
+    setLParenLoc(Loc);
+    this->setUnderlyingTInfo(
+        Context.getTrivialTypeSourceInfo(getTypePtr()->getBaseType(), Loc));
+}
+
 void ElaboratedTypeLoc::initializeLocal(ASTContext &Context, 
                                         SourceLocation Loc) {
   setElaboratedKeywordLoc(Loc);
diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp
index 7351d97..c28ada7 100644
--- a/lib/AST/TypePrinter.cpp
+++ b/lib/AST/TypePrinter.cpp
@@ -213,7 +213,7 @@
     case Type::VariableArray:
     case Type::DependentSizedArray:
       NeedARCStrongQualifier = true;
-      // Fall through
+      LLVM_FALLTHROUGH;
       
     case Type::Adjusted:
     case Type::Decayed:
@@ -982,8 +982,7 @@
     IncludeStrongLifetimeRAII Strong(Policy);
     OS << Spec->getIdentifier()->getName();
     const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, TemplateArgs.asArray(), Policy);
+    printTemplateArgumentList(OS, TemplateArgs.asArray(), Policy);
     OS << "::";
   } else if (TagDecl *Tag = dyn_cast<TagDecl>(DC)) {
     if (TypedefNameDecl *Typedef = Tag->getTypedefNameForAnonDecl())
@@ -1070,7 +1069,7 @@
       Args = TemplateArgs.asArray();
     }
     IncludeStrongLifetimeRAII Strong(Policy);
-    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, Policy);
+    printTemplateArgumentList(OS, Args, Policy);
   }
 
   spaceBeforePlaceHolder(OS);
@@ -1129,8 +1128,7 @@
   IncludeStrongLifetimeRAII Strong(Policy);
   T->getTemplateName().print(OS, Policy);
 
-  TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, T->template_arguments(), Policy);
+  printTemplateArgumentList(OS, T->template_arguments(), Policy);
   spaceBeforePlaceHolder(OS);
 }
 void TypePrinter::printTemplateSpecializationAfter(
@@ -1198,19 +1196,18 @@
 void TypePrinter::printDependentTemplateSpecializationBefore(
         const DependentTemplateSpecializationType *T, raw_ostream &OS) { 
   IncludeStrongLifetimeRAII Strong(Policy);
-  
+
   OS << TypeWithKeyword::getKeywordName(T->getKeyword());
   if (T->getKeyword() != ETK_None)
     OS << " ";
-  
+
   if (T->getQualifier())
-    T->getQualifier()->print(OS, Policy);    
+    T->getQualifier()->print(OS, Policy);
   OS << T->getIdentifier()->getName();
-  TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                        T->template_arguments(),
-                                                        Policy);
+  printTemplateArgumentList(OS, T->template_arguments(), Policy);
   spaceBeforePlaceHolder(OS);
 }
+
 void TypePrinter::printDependentTemplateSpecializationAfter(
         const DependentTemplateSpecializationType *T, raw_ostream &OS) { } 
 
@@ -1521,41 +1518,39 @@
     OS << '*';
   }
 }
+
 void TypePrinter::printObjCObjectPointerAfter(const ObjCObjectPointerType *T, 
                                               raw_ostream &OS) { }
 
-void TemplateSpecializationType::
-  PrintTemplateArgumentList(raw_ostream &OS,
-                            const TemplateArgumentListInfo &Args,
-                            const PrintingPolicy &Policy) {
-  return PrintTemplateArgumentList(OS,
-                                   Args.arguments(),
-                                   Policy);
+static
+const TemplateArgument &getArgument(const TemplateArgument &A) { return A; }
+
+static const TemplateArgument &getArgument(const TemplateArgumentLoc &A) {
+  return A.getArgument();
 }
 
-void TemplateSpecializationType::PrintTemplateArgumentList(
-    raw_ostream &OS, ArrayRef<TemplateArgument> Args,
-    const PrintingPolicy &Policy, bool SkipBrackets) {
+template<typename TA>
+static void printTo(raw_ostream &OS, ArrayRef<TA> Args,
+                    const PrintingPolicy &Policy, bool SkipBrackets) {
   const char *Comma = Policy.MSVCFormatting ? "," : ", ";
   if (!SkipBrackets)
     OS << '<';
 
-  bool needSpace = false;
+  bool NeedSpace = false;
   bool FirstArg = true;
-  for (const TemplateArgument &Arg : Args) {
+  for (const auto &Arg : Args) {
     // Print the argument into a string.
     SmallString<128> Buf;
     llvm::raw_svector_ostream ArgOS(Buf);
-    if (Arg.getKind() == TemplateArgument::Pack) {
-      if (Arg.pack_size() && !FirstArg)
+    const TemplateArgument &Argument = getArgument(Arg);
+    if (Argument.getKind() == TemplateArgument::Pack) {
+      if (Argument.pack_size() && !FirstArg)
         OS << Comma;
-      PrintTemplateArgumentList(ArgOS,
-                                Arg.getPackAsArray(),
-                                Policy, true);
+      printTo(ArgOS, Argument.getPackAsArray(), Policy, true);
     } else {
       if (!FirstArg)
         OS << Comma;
-      Arg.print(Policy, ArgOS);
+      Argument.print(Policy, ArgOS);
     }
     StringRef ArgString = ArgOS.str();
 
@@ -1567,65 +1562,36 @@
 
     OS << ArgString;
 
-    needSpace = (!ArgString.empty() && ArgString.back() == '>');
+    NeedSpace = (!ArgString.empty() && ArgString.back() == '>');
     FirstArg = false;
   }
 
   // If the last character of our string is '>', add another space to
   // keep the two '>''s separate tokens. We don't *have* to do this in
   // C++0x, but it's still good hygiene.
-  if (needSpace)
+  if (NeedSpace)
     OS << ' ';
 
   if (!SkipBrackets)
     OS << '>';
 }
 
-// Sadly, repeat all that with TemplateArgLoc.
-void TemplateSpecializationType::
-PrintTemplateArgumentList(raw_ostream &OS,
-                          ArrayRef<TemplateArgumentLoc> Args,
-                          const PrintingPolicy &Policy) {
-  OS << '<';
-  const char *Comma = Policy.MSVCFormatting ? "," : ", ";
+void clang::printTemplateArgumentList(raw_ostream &OS,
+                                      const TemplateArgumentListInfo &Args,
+                                      const PrintingPolicy &Policy) {
+  return printTo(OS, Args.arguments(), Policy, false);
+}
 
-  bool needSpace = false;
-  bool FirstArg = true;
-  for (const TemplateArgumentLoc &Arg : Args) {
-    if (!FirstArg)
-      OS << Comma;
+void clang::printTemplateArgumentList(raw_ostream &OS,
+                                      ArrayRef<TemplateArgument> Args,
+                                      const PrintingPolicy &Policy) {
+  printTo(OS, Args, Policy, false);
+}
 
-    // Print the argument into a string.
-    SmallString<128> Buf;
-    llvm::raw_svector_ostream ArgOS(Buf);
-    if (Arg.getArgument().getKind() == TemplateArgument::Pack) {
-      PrintTemplateArgumentList(ArgOS,
-                                Arg.getArgument().getPackAsArray(),
-                                Policy, true);
-    } else {
-      Arg.getArgument().print(Policy, ArgOS);
-    }
-    StringRef ArgString = ArgOS.str();
-
-    // If this is the first argument and its string representation
-    // begins with the global scope specifier ('::foo'), add a space
-    // to avoid printing the diagraph '<:'.
-    if (FirstArg && !ArgString.empty() && ArgString[0] == ':')
-      OS << ' ';
-
-    OS << ArgString;
-
-    needSpace = (!ArgString.empty() && ArgString.back() == '>');
-    FirstArg = false;
-  }
-
-  // If the last character of our string is '>', add another space to
-  // keep the two '>''s separate tokens. We don't *have* to do this in
-  // C++0x, but it's still good hygiene.
-  if (needSpace)
-    OS << ' ';
-
-  OS << '>';
+void clang::printTemplateArgumentList(raw_ostream &OS,
+                                      ArrayRef<TemplateArgumentLoc> Args,
+                                      const PrintingPolicy &Policy) {
+  printTo(OS, Args, Policy, false);
 }
 
 std::string Qualifiers::getAsString() const {
@@ -1746,16 +1712,20 @@
     OS << ' ';
 }
 
+std::string QualType::getAsString() const {
+  return getAsString(split(), LangOptions());
+}
+
 std::string QualType::getAsString(const PrintingPolicy &Policy) const {
   std::string S;
   getAsStringInternal(S, Policy);
   return S;
 }
 
-std::string QualType::getAsString(const Type *ty, Qualifiers qs) {
+std::string QualType::getAsString(const Type *ty, Qualifiers qs,
+                                  const PrintingPolicy &Policy) {
   std::string buffer;
-  LangOptions options;
-  getAsStringInternal(ty, qs, buffer, PrintingPolicy(options));
+  getAsStringInternal(ty, qs, buffer, Policy);
   return buffer;
 }
 
diff --git a/lib/AST/VTTBuilder.cpp b/lib/AST/VTTBuilder.cpp
index 53461eb..b946f10 100644
--- a/lib/AST/VTTBuilder.cpp
+++ b/lib/AST/VTTBuilder.cpp
@@ -1,4 +1,4 @@
-//===--- VTTBuilder.cpp - C++ VTT layout builder --------------------------===//
+//===- VTTBuilder.cpp - C++ VTT layout builder ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,12 +14,16 @@
 
 #include "clang/AST/VTTBuilder.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/CXXInheritance.h"
+#include "clang/AST/BaseSubobject.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/RecordLayout.h"
-#include "clang/Basic/TargetInfo.h"
-#include "llvm/Support/Format.h"
-#include <algorithm>
-#include <cstdio>
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace clang;
 
@@ -28,9 +32,9 @@
 VTTBuilder::VTTBuilder(ASTContext &Ctx,
                        const CXXRecordDecl *MostDerivedClass,
                        bool GenerateDefinition)
-  : Ctx(Ctx), MostDerivedClass(MostDerivedClass), 
-  MostDerivedClassLayout(Ctx.getASTRecordLayout(MostDerivedClass)),
-    GenerateDefinition(GenerateDefinition) {
+    : Ctx(Ctx), MostDerivedClass(MostDerivedClass), 
+      MostDerivedClassLayout(Ctx.getASTRecordLayout(MostDerivedClass)),
+      GenerateDefinition(GenerateDefinition) {
   // Lay out this VTT.
   LayoutVTT(BaseSubobject(MostDerivedClass, CharUnits::Zero()), 
             /*BaseIsVirtual=*/false);
@@ -56,7 +60,7 @@
 void VTTBuilder::LayoutSecondaryVTTs(BaseSubobject Base) {
   const CXXRecordDecl *RD = Base.getBase();
 
-  for (const auto &I : RD->bases()) {    
+  for (const auto &I : RD->bases()) {
     // Don't layout virtual bases.
     if (I.isVirtual())
         continue;
diff --git a/lib/AST/VTableBuilder.cpp b/lib/AST/VTableBuilder.cpp
index ae8f630..347c516 100644
--- a/lib/AST/VTableBuilder.cpp
+++ b/lib/AST/VTableBuilder.cpp
@@ -1079,9 +1079,7 @@
 visitAllOverriddenMethods(const CXXMethodDecl *MD, VisitorTy &Visitor) {
   assert(MD->isVirtual() && "Method is not virtual!");
 
-  for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-       E = MD->end_overridden_methods(); I != E; ++I) {
-    const CXXMethodDecl *OverriddenMD = *I;
+  for (const CXXMethodDecl *OverriddenMD : MD->overridden_methods()) {
     if (!Visitor(OverriddenMD))
       continue;
     visitAllOverriddenMethods(OverriddenMD, Visitor);
@@ -1329,11 +1327,8 @@
     ItaniumVTableBuilder::PrimaryBasesSetVectorTy &Bases) {
   if (Bases.count(MD->getParent()))
     return true;
-  
-  for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-       E = MD->end_overridden_methods(); I != E; ++I) {
-    const CXXMethodDecl *OverriddenMD = *I;
-    
+
+  for (const CXXMethodDecl *OverriddenMD : MD->overridden_methods()) {
     // Check "indirect overriders".
     if (OverridesIndirectMethodInBases(OverriddenMD, Bases))
       return true;
diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp
index 02aee4b..c06508a 100644
--- a/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -145,17 +145,22 @@
     ScopedIncrement ScopedDepth(&CurrentDepth);
     return (DeclNode == nullptr) || traverse(*DeclNode);
   }
-  bool TraverseStmt(Stmt *StmtNode) {
+  bool TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue = nullptr) {
+    // If we need to keep track of the depth, we can't perform data recursion.
+    if (CurrentDepth == 0 || (CurrentDepth <= MaxDepth && MaxDepth < INT_MAX))
+      Queue = nullptr;
+
     ScopedIncrement ScopedDepth(&CurrentDepth);
-    const Stmt *StmtToTraverse = StmtNode;
-    if (Traversal ==
-        ASTMatchFinder::TK_IgnoreImplicitCastsAndParentheses) {
-      const Expr *ExprNode = dyn_cast_or_null<Expr>(StmtNode);
-      if (ExprNode) {
+    Stmt *StmtToTraverse = StmtNode;
+    if (Traversal == ASTMatchFinder::TK_IgnoreImplicitCastsAndParentheses) {
+      if (Expr *ExprNode = dyn_cast_or_null<Expr>(StmtNode))
         StmtToTraverse = ExprNode->IgnoreParenImpCasts();
-      }
     }
-    return (StmtToTraverse == nullptr) || traverse(*StmtToTraverse);
+    if (!StmtToTraverse)
+      return true;
+    if (!match(*StmtToTraverse))
+      return false;
+    return VisitorBase::TraverseStmt(StmtToTraverse, Queue);
   }
   // We assume that the QualType and the contained type are on the same
   // hierarchy level. Thus, we try to match either of them.
@@ -378,7 +383,7 @@
   }
 
   bool TraverseDecl(Decl *DeclNode);
-  bool TraverseStmt(Stmt *StmtNode);
+  bool TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue = nullptr);
   bool TraverseType(QualType TypeNode);
   bool TraverseTypeLoc(TypeLoc TypeNode);
   bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS);
@@ -841,12 +846,12 @@
   return RecursiveASTVisitor<MatchASTVisitor>::TraverseDecl(DeclNode);
 }
 
-bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode) {
+bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue) {
   if (!StmtNode) {
     return true;
   }
   match(*StmtNode);
-  return RecursiveASTVisitor<MatchASTVisitor>::TraverseStmt(StmtNode);
+  return RecursiveASTVisitor<MatchASTVisitor>::TraverseStmt(StmtNode, Queue);
 }
 
 bool MatchASTVisitor::TraverseType(QualType TypeNode) {
diff --git a/lib/ASTMatchers/ASTMatchersInternal.cpp b/lib/ASTMatchers/ASTMatchersInternal.cpp
index f0bfbf9..0bcdd8e 100644
--- a/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -1,4 +1,4 @@
-//===--- ASTMatchersInternal.cpp - Structural query framework -------------===//
+//===- ASTMatchersInternal.cpp - Structural query framework ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,11 +11,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/ASTMatchers/ASTMatchersInternal.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/PrettyPrinter.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <string>
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace ast_matchers {
@@ -40,7 +59,6 @@
                            BoundNodesTreeBuilder *Builder,
                            ArrayRef<DynTypedMatcher> InnerMatchers);
 
-
 void BoundNodesTreeBuilder::visitMatches(Visitor *ResultVisitor) {
   if (Bindings.empty())
     Bindings.push_back(BoundNodesMap());
@@ -51,7 +69,7 @@
 
 namespace {
 
-typedef bool (*VariadicOperatorFunction)(
+using VariadicOperatorFunction = bool (*)(
     const ast_type_traits::DynTypedNode &DynNode, ASTMatchFinder *Finder,
     BoundNodesTreeBuilder *Builder, ArrayRef<DynTypedMatcher> InnerMatchers);
 
@@ -100,20 +118,22 @@
   TrueMatcherImpl() {
     Retain(); // Reference count will never become zero.
   }
+
   bool dynMatches(const ast_type_traits::DynTypedNode &, ASTMatchFinder *,
                   BoundNodesTreeBuilder *) const override {
     return true;
   }
 };
-static llvm::ManagedStatic<TrueMatcherImpl> TrueMatcherInstance;
 
-}  // namespace
+} // namespace
+
+static llvm::ManagedStatic<TrueMatcherImpl> TrueMatcherInstance;
 
 DynTypedMatcher DynTypedMatcher::constructVariadic(
     DynTypedMatcher::VariadicOperator Op,
     ast_type_traits::ASTNodeKind SupportedKind,
     std::vector<DynTypedMatcher> InnerMatchers) {
-  assert(InnerMatchers.size() > 0 && "Array must not be empty.");
+  assert(!InnerMatchers.empty() && "Array must not be empty.");
   assert(std::all_of(InnerMatchers.begin(), InnerMatchers.end(),
                      [SupportedKind](const DynTypedMatcher &M) {
                        return M.canConvertTo(SupportedKind);
@@ -314,9 +334,7 @@
 #endif
 }
 
-namespace {
-
-bool consumeNameSuffix(StringRef &FullName, StringRef Suffix) {
+static bool consumeNameSuffix(StringRef &FullName, StringRef Suffix) {
   StringRef Name = FullName;
   if (!Name.endswith(Suffix))
     return false;
@@ -330,7 +348,8 @@
   return true;
 }
 
-StringRef getNodeName(const NamedDecl &Node, llvm::SmallString<128> &Scratch) {
+static StringRef getNodeName(const NamedDecl &Node,
+                             llvm::SmallString<128> &Scratch) {
   // Simple name.
   if (Node.getIdentifier())
     return Node.getName();
@@ -346,7 +365,8 @@
   return "(anonymous)";
 }
 
-StringRef getNodeName(const RecordDecl &Node, llvm::SmallString<128> &Scratch) {
+static StringRef getNodeName(const RecordDecl &Node,
+                             llvm::SmallString<128> &Scratch) {
   if (Node.getIdentifier()) {
     return Node.getName();
   }
@@ -354,11 +374,12 @@
   return ("(anonymous " + Node.getKindName() + ")").toStringRef(Scratch);
 }
 
-StringRef getNodeName(const NamespaceDecl &Node,
-                      llvm::SmallString<128> &Scratch) {
+static StringRef getNodeName(const NamespaceDecl &Node,
+                             llvm::SmallString<128> &Scratch) {
   return Node.isAnonymousNamespace() ? "(anonymous namespace)" : Node.getName();
 }
 
+namespace {
 
 class PatternSet {
 public:
@@ -397,10 +418,11 @@
     StringRef P;
     bool IsFullyQualified;
   };
+
   llvm::SmallVector<Pattern, 8> Patterns;
 };
 
-}  // namespace
+} // namespace
 
 bool HasNameMatcher::matchesNodeUnqualified(const NamedDecl &Node) const {
   assert(UseUnqualifiedMatch);
@@ -504,5 +526,282 @@
 }
 
 } // end namespace internal
+
+const internal::VariadicDynCastAllOfMatcher<Decl, TranslationUnitDecl>
+    translationUnitDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, TypedefDecl> typedefDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, TypedefNameDecl>
+    typedefNameDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl> typeAliasDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl>
+    typeAliasTemplateDecl;
+const internal::VariadicAllOfMatcher<Decl> decl;
+const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl>
+    linkageSpecDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, LabelDecl> labelDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceDecl> namespaceDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceAliasDecl>
+    namespaceAliasDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, RecordDecl> recordDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXRecordDecl> cxxRecordDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ClassTemplateDecl>
+    classTemplateDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl,
+                                            ClassTemplateSpecializationDecl>
+    classTemplateSpecializationDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, DeclaratorDecl>
+    declaratorDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ParmVarDecl> parmVarDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, AccessSpecDecl>
+    accessSpecDecl;
+const internal::VariadicAllOfMatcher<CXXCtorInitializer> cxxCtorInitializer;
+const internal::VariadicAllOfMatcher<TemplateArgument> templateArgument;
+const internal::VariadicAllOfMatcher<TemplateName> templateName;
+const internal::VariadicDynCastAllOfMatcher<Decl, NonTypeTemplateParmDecl>
+    nonTypeTemplateParmDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, TemplateTypeParmDecl>
+    templateTypeParmDecl;
+const internal::VariadicAllOfMatcher<QualType> qualType;
+const internal::VariadicAllOfMatcher<Type> type;
+const internal::VariadicAllOfMatcher<TypeLoc> typeLoc;
+const internal::VariadicDynCastAllOfMatcher<Stmt, UnaryExprOrTypeTraitExpr>
+    unaryExprOrTypeTraitExpr;
+const internal::VariadicDynCastAllOfMatcher<Decl, ValueDecl> valueDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXConstructorDecl>
+    cxxConstructorDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXDestructorDecl>
+    cxxDestructorDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, EnumDecl> enumDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, EnumConstantDecl>
+    enumConstantDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl> cxxMethodDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
+    cxxConversionDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, VarDecl> varDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, FieldDecl> fieldDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, FunctionDecl> functionDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, FunctionTemplateDecl>
+    functionTemplateDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, FriendDecl> friendDecl;
+const internal::VariadicAllOfMatcher<Stmt> stmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, DeclStmt> declStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, MemberExpr> memberExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CallExpr> callExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, LambdaExpr> lambdaExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXMemberCallExpr>
+    cxxMemberCallExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCMessageExpr>
+    objcMessageExpr;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCInterfaceDecl>
+    objcInterfaceDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCImplementationDecl>
+    objcImplementationDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCProtocolDecl>
+    objcProtocolDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCCategoryDecl>
+    objcCategoryDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCCategoryImplDecl>
+    objcCategoryImplDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCMethodDecl>
+    objcMethodDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCIvarDecl> objcIvarDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, ObjCPropertyDecl>
+    objcPropertyDecl;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtThrowStmt>
+    objcThrowStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtTryStmt> objcTryStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtCatchStmt>
+    objcCatchStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAtFinallyStmt>
+    objcFinallyStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ExprWithCleanups>
+    exprWithCleanups;
+const internal::VariadicDynCastAllOfMatcher<Stmt, InitListExpr> initListExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXStdInitializerListExpr>
+    cxxStdInitializerListExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ImplicitValueInitExpr>
+    implicitValueInitExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ParenListExpr> parenListExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, SubstNonTypeTemplateParmExpr>
+    substNonTypeTemplateParmExpr;
+const internal::VariadicDynCastAllOfMatcher<Decl, UsingDecl> usingDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, UsingDirectiveDecl>
+    usingDirectiveDecl;
+const internal::VariadicDynCastAllOfMatcher<Stmt, UnresolvedLookupExpr>
+    unresolvedLookupExpr;
+const internal::VariadicDynCastAllOfMatcher<Decl, UnresolvedUsingValueDecl>
+    unresolvedUsingValueDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, UnresolvedUsingTypenameDecl>
+    unresolvedUsingTypenameDecl;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ParenExpr> parenExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXConstructExpr>
+    cxxConstructExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXUnresolvedConstructExpr>
+    cxxUnresolvedConstructExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThisExpr> cxxThisExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBindTemporaryExpr>
+    cxxBindTemporaryExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, MaterializeTemporaryExpr>
+    materializeTemporaryExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> cxxNewExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDeleteExpr> cxxDeleteExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ArraySubscriptExpr>
+    arraySubscriptExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDefaultArgExpr>
+    cxxDefaultArgExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXOperatorCallExpr>
+    cxxOperatorCallExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, Expr> expr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, DeclRefExpr> declRefExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, IfStmt> ifStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ForStmt> forStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXForRangeStmt>
+    cxxForRangeStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, WhileStmt> whileStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, DoStmt> doStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, BreakStmt> breakStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ContinueStmt> continueStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ReturnStmt> returnStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, GotoStmt> gotoStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, LabelStmt> labelStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, AddrLabelExpr> addrLabelExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchStmt> switchStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, SwitchCase> switchCase;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CaseStmt> caseStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, DefaultStmt> defaultStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundStmt> compoundStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXCatchStmt> cxxCatchStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> cxxThrowExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBoolLiteralExpr>
+    cxxBoolLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, StringLiteral> stringLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CharacterLiteral>
+    characterLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, IntegerLiteral>
+    integerLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, FloatingLiteral> floatLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, UserDefinedLiteral>
+    userDefinedLiteral;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundLiteralExpr>
+    compoundLiteralExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNullPtrLiteralExpr>
+    cxxNullPtrLiteralExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, GNUNullExpr> gnuNullExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, AtomicExpr> atomicExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, StmtExpr> stmtExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, BinaryOperator>
+    binaryOperator;
+const internal::VariadicDynCastAllOfMatcher<Stmt, UnaryOperator> unaryOperator;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ConditionalOperator>
+    conditionalOperator;
+const internal::VariadicDynCastAllOfMatcher<Stmt, BinaryConditionalOperator>
+    binaryConditionalOperator;
+const internal::VariadicDynCastAllOfMatcher<Stmt, OpaqueValueExpr>
+    opaqueValueExpr;
+const internal::VariadicDynCastAllOfMatcher<Decl, StaticAssertDecl>
+    staticAssertDecl;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXReinterpretCastExpr>
+    cxxReinterpretCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXStaticCastExpr>
+    cxxStaticCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDynamicCastExpr>
+    cxxDynamicCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXConstCastExpr>
+    cxxConstCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CStyleCastExpr>
+    cStyleCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ExplicitCastExpr>
+    explicitCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, ImplicitCastExpr>
+    implicitCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CastExpr> castExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXFunctionalCastExpr>
+    cxxFunctionalCastExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTemporaryObjectExpr>
+    cxxTemporaryObjectExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, PredefinedExpr>
+    predefinedExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, DesignatedInitExpr>
+    designatedInitExpr;
+const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    eachOf = {internal::DynTypedMatcher::VO_EachOf};
+const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    anyOf = {internal::DynTypedMatcher::VO_AnyOf};
+const internal::VariadicOperatorMatcherFunc<
+    2, std::numeric_limits<unsigned>::max()>
+    allOf = {internal::DynTypedMatcher::VO_AllOf};
+const internal::VariadicFunction<internal::Matcher<NamedDecl>, StringRef,
+                                 internal::hasAnyNameFunc>
+    hasAnyName = {};
+const internal::ArgumentAdaptingMatcherFunc<internal::HasMatcher> has = {};
+const internal::ArgumentAdaptingMatcherFunc<internal::HasDescendantMatcher>
+    hasDescendant = {};
+const internal::ArgumentAdaptingMatcherFunc<internal::ForEachMatcher> forEach =
+    {};
+const internal::ArgumentAdaptingMatcherFunc<internal::ForEachDescendantMatcher>
+    forEachDescendant = {};
+const internal::ArgumentAdaptingMatcherFunc<
+    internal::HasParentMatcher,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+    hasParent = {};
+const internal::ArgumentAdaptingMatcherFunc<
+    internal::HasAncestorMatcher,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+    hasAncestor = {};
+const internal::VariadicOperatorMatcherFunc<1, 1> unless = {
+    internal::DynTypedMatcher::VO_UnaryNot};
+const internal::VariadicAllOfMatcher<NestedNameSpecifier> nestedNameSpecifier;
+const internal::VariadicAllOfMatcher<NestedNameSpecifierLoc>
+    nestedNameSpecifierLoc;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CUDAKernelCallExpr>
+    cudaKernelCallExpr;
+const AstTypeMatcher<BuiltinType> builtinType;
+const AstTypeMatcher<ArrayType> arrayType;
+const AstTypeMatcher<ComplexType> complexType;
+const AstTypeMatcher<ConstantArrayType> constantArrayType;
+const AstTypeMatcher<DependentSizedArrayType> dependentSizedArrayType;
+const AstTypeMatcher<IncompleteArrayType> incompleteArrayType;
+const AstTypeMatcher<VariableArrayType> variableArrayType;
+const AstTypeMatcher<AtomicType> atomicType;
+const AstTypeMatcher<AutoType> autoType;
+const AstTypeMatcher<FunctionType> functionType;
+const AstTypeMatcher<FunctionProtoType> functionProtoType;
+const AstTypeMatcher<ParenType> parenType;
+const AstTypeMatcher<BlockPointerType> blockPointerType;
+const AstTypeMatcher<MemberPointerType> memberPointerType;
+const AstTypeMatcher<PointerType> pointerType;
+const AstTypeMatcher<ObjCObjectPointerType> objcObjectPointerType;
+const AstTypeMatcher<ReferenceType> referenceType;
+const AstTypeMatcher<LValueReferenceType> lValueReferenceType;
+const AstTypeMatcher<RValueReferenceType> rValueReferenceType;
+const AstTypeMatcher<TypedefType> typedefType;
+const AstTypeMatcher<EnumType> enumType;
+const AstTypeMatcher<TemplateSpecializationType> templateSpecializationType;
+const AstTypeMatcher<UnaryTransformType> unaryTransformType;
+const AstTypeMatcher<RecordType> recordType;
+const AstTypeMatcher<TagType> tagType;
+const AstTypeMatcher<ElaboratedType> elaboratedType;
+const AstTypeMatcher<SubstTemplateTypeParmType> substTemplateTypeParmType;
+const AstTypeMatcher<TemplateTypeParmType> templateTypeParmType;
+const AstTypeMatcher<InjectedClassNameType> injectedClassNameType;
+const AstTypeMatcher<DecayedType> decayedType;
+AST_TYPELOC_TRAVERSE_MATCHER_DEF(hasElementType,
+                                 AST_POLYMORPHIC_SUPPORTED_TYPES(ArrayType,
+                                                                 ComplexType));
+AST_TYPELOC_TRAVERSE_MATCHER_DEF(hasValueType,
+                                 AST_POLYMORPHIC_SUPPORTED_TYPES(AtomicType));
+AST_TYPELOC_TRAVERSE_MATCHER_DEF(
+    pointee,
+    AST_POLYMORPHIC_SUPPORTED_TYPES(BlockPointerType, MemberPointerType,
+                                    PointerType, ReferenceType));
+
 } // end namespace ast_matchers
 } // end namespace clang
diff --git a/lib/ASTMatchers/Dynamic/Marshallers.h b/lib/ASTMatchers/Dynamic/Marshallers.h
index c557ff1..af90e2c 100644
--- a/lib/ASTMatchers/Dynamic/Marshallers.h
+++ b/lib/ASTMatchers/Dynamic/Marshallers.h
@@ -1,4 +1,4 @@
-//===--- Marshallers.h - Generic matcher function marshallers ---*- C++ -*-===//
+//===- Marshallers.h - Generic matcher function marshallers -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,7 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-///
+//
 /// \file
 /// \brief Functions templates and classes to wrap matcher construct functions.
 ///
@@ -14,18 +14,33 @@
 /// marshalling layer on top of matcher construct functions.
 /// These are used by the registry to export all marshaller constructors with
 /// the same generic interface.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LIB_ASTMATCHERS_DYNAMIC_MARSHALLERS_H
 #define LLVM_CLANG_LIB_ASTMATCHERS_DYNAMIC_MARSHALLERS_H
 
-#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
 #include "clang/ASTMatchers/Dynamic/VariantValue.h"
+#include "clang/Basic/AttrKinds.h"
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <memory>
 #include <string>
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace ast_matchers {
@@ -41,9 +56,11 @@
 
 template <> struct ArgTypeTraits<std::string> {
   static bool is(const VariantValue &Value) { return Value.isString(); }
+
   static const std::string &get(const VariantValue &Value) {
     return Value.getString();
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_String);
   }
@@ -53,13 +70,15 @@
 struct ArgTypeTraits<StringRef> : public ArgTypeTraits<std::string> {
 };
 
-template <class T> struct ArgTypeTraits<ast_matchers::internal::Matcher<T> > {
+template <class T> struct ArgTypeTraits<ast_matchers::internal::Matcher<T>> {
   static bool is(const VariantValue &Value) {
     return Value.isMatcher() && Value.getMatcher().hasTypedMatcher<T>();
   }
+
   static ast_matchers::internal::Matcher<T> get(const VariantValue &Value) {
     return Value.getMatcher().getTypedMatcher<T>();
   }
+
   static ArgKind getKind() {
     return ArgKind(ast_type_traits::ASTNodeKind::getFromNodeKind<T>());
   }
@@ -67,9 +86,11 @@
 
 template <> struct ArgTypeTraits<bool> {
   static bool is(const VariantValue &Value) { return Value.isBoolean(); }
+
   static bool get(const VariantValue &Value) {
     return Value.getBoolean();
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_Boolean);
   }
@@ -77,9 +98,11 @@
 
 template <> struct ArgTypeTraits<double> {
   static bool is(const VariantValue &Value) { return Value.isDouble(); }
+
   static double get(const VariantValue &Value) {
     return Value.getDouble();
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_Double);
   }
@@ -87,9 +110,11 @@
 
 template <> struct ArgTypeTraits<unsigned> {
   static bool is(const VariantValue &Value) { return Value.isUnsigned(); }
+
   static unsigned get(const VariantValue &Value) {
     return Value.getUnsigned();
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_Unsigned);
   }
@@ -97,42 +122,45 @@
 
 template <> struct ArgTypeTraits<attr::Kind> {
 private:
-  static attr::Kind getAttrKind(llvm::StringRef AttrKind) {
-    return llvm::StringSwitch<attr::Kind>(AttrKind)
+  static Optional<attr::Kind> getAttrKind(llvm::StringRef AttrKind) {
+    return llvm::StringSwitch<Optional<attr::Kind>>(AttrKind)
 #define ATTR(X) .Case("attr::" #X, attr:: X)
 #include "clang/Basic/AttrList.inc"
-        .Default(attr::Kind(-1));
+        .Default(llvm::None);
   }
+
 public:
   static bool is(const VariantValue &Value) {
-    return Value.isString() &&
-        getAttrKind(Value.getString()) != attr::Kind(-1);
+    return Value.isString() && getAttrKind(Value.getString());
   }
+
   static attr::Kind get(const VariantValue &Value) {
-    return getAttrKind(Value.getString());
+    return *getAttrKind(Value.getString());
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_String);
   }
 };
 
-template <> struct ArgTypeTraits<clang::CastKind> {
+template <> struct ArgTypeTraits<CastKind> {
 private:
-  static clang::CastKind getCastKind(llvm::StringRef AttrKind) {
-    return llvm::StringSwitch<clang::CastKind>(AttrKind)
+  static Optional<CastKind> getCastKind(llvm::StringRef AttrKind) {
+    return llvm::StringSwitch<Optional<CastKind>>(AttrKind)
 #define CAST_OPERATION(Name) .Case( #Name, CK_##Name)
 #include "clang/AST/OperationKinds.def"
-        .Default(CK_Invalid);
+        .Default(llvm::None);
   }
 
 public:
   static bool is(const VariantValue &Value) {
-    return Value.isString() &&  
-        getCastKind(Value.getString()) != CK_Invalid;
+    return Value.isString() && getCastKind(Value.getString());
   }
-  static clang::CastKind get(const VariantValue &Value) {
-    return getCastKind(Value.getString());
+
+  static CastKind get(const VariantValue &Value) {
+    return *getCastKind(Value.getString());
   }
+
   static ArgKind getKind() {
     return ArgKind(ArgKind::AK_String);
   }
@@ -144,7 +172,8 @@
 /// arguments, and various other methods for type introspection.
 class MatcherDescriptor {
 public:
-  virtual ~MatcherDescriptor() {}
+  virtual ~MatcherDescriptor() = default;
+
   virtual VariantMatcher create(SourceRange NameRange,
                                 ArrayRef<ParserValue> Args,
                                 Diagnostics *Error) const = 0;
@@ -201,11 +230,11 @@
 /// their types, unpacking them and calling the underlying function.
 class FixedArgCountMatcherDescriptor : public MatcherDescriptor {
 public:
-  typedef VariantMatcher (*MarshallerType)(void (*Func)(),
-                                           StringRef MatcherName,
-                                           SourceRange NameRange,
-                                           ArrayRef<ParserValue> Args,
-                                           Diagnostics *Error);
+  using MarshallerType = VariantMatcher (*)(void (*Func)(),
+                                            StringRef MatcherName,
+                                            SourceRange NameRange,
+                                            ArrayRef<ParserValue> Args,
+                                            Diagnostics *Error);
 
   /// \param Marshaller Function to unpack the arguments and call \c Func
   /// \param Func Matcher construct function. This is the function that
@@ -229,10 +258,12 @@
 
   bool isVariadic() const override { return false; }
   unsigned getNumArgs() const override { return ArgKinds.size(); }
+
   void getArgKinds(ast_type_traits::ASTNodeKind ThisKind, unsigned ArgNo,
                    std::vector<ArgKind> &Kinds) const override {
     Kinds.push_back(ArgKinds[ArgNo]);
   }
+
   bool isConvertibleTo(
       ast_type_traits::ASTNodeKind Kind, unsigned *Specificity,
       ast_type_traits::ASTNodeKind *LeastDerivedKind) const override {
@@ -303,14 +334,14 @@
 };
 
 template <typename T>
-struct BuildReturnTypeVector<ast_matchers::internal::Matcher<T> > {
+struct BuildReturnTypeVector<ast_matchers::internal::Matcher<T>> {
   static void build(std::vector<ast_type_traits::ASTNodeKind> &RetTypes) {
     RetTypes.push_back(ast_type_traits::ASTNodeKind::getFromNodeKind<T>());
   }
 };
 
 template <typename T>
-struct BuildReturnTypeVector<ast_matchers::internal::BindableMatcher<T> > {
+struct BuildReturnTypeVector<ast_matchers::internal::BindableMatcher<T>> {
   static void build(std::vector<ast_type_traits::ASTNodeKind> &RetTypes) {
     RetTypes.push_back(ast_type_traits::ASTNodeKind::getFromNodeKind<T>());
   }
@@ -326,7 +357,8 @@
 
   bool HasError = false;
   for (size_t i = 0, e = Args.size(); i != e; ++i) {
-    typedef ArgTypeTraits<ArgT> ArgTraits;
+    using ArgTraits = ArgTypeTraits<ArgT>;
+
     const ParserValue &Arg = Args[i];
     const VariantValue &Value = Arg.Value;
     if (!ArgTraits::is(Value)) {
@@ -360,10 +392,10 @@
 /// object file.
 class VariadicFuncMatcherDescriptor : public MatcherDescriptor {
 public:
-  typedef VariantMatcher (*RunFunc)(StringRef MatcherName,
-                                    SourceRange NameRange,
-                                    ArrayRef<ParserValue> Args,
-                                    Diagnostics *Error);
+  using RunFunc = VariantMatcher (*)(StringRef MatcherName,
+                                     SourceRange NameRange,
+                                     ArrayRef<ParserValue> Args,
+                                     Diagnostics *Error);
 
   template <typename ResultT, typename ArgT,
             ResultT (*F)(ArrayRef<const ArgT *>)>
@@ -384,10 +416,12 @@
 
   bool isVariadic() const override { return true; }
   unsigned getNumArgs() const override { return 0; }
+
   void getArgKinds(ast_type_traits::ASTNodeKind ThisKind, unsigned ArgNo,
                    std::vector<ArgKind> &Kinds) const override {
     Kinds.push_back(ArgsKind);
   }
+
   bool isConvertibleTo(
       ast_type_traits::ASTNodeKind Kind, unsigned *Specificity,
       ast_type_traits::ASTNodeKind *LeastDerivedKind) const override {
@@ -458,7 +492,7 @@
                                        SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
-  typedef ReturnType (*FuncType)();
+  using FuncType = ReturnType (*)();
   CHECK_ARG_COUNT(0);
   return outvalueToVariantMatcher(reinterpret_cast<FuncType>(Func)());
 }
@@ -469,7 +503,7 @@
                                        SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
-  typedef ReturnType (*FuncType)(ArgType1);
+  using FuncType = ReturnType (*)(ArgType1);
   CHECK_ARG_COUNT(1);
   CHECK_ARG_TYPE(0, ArgType1);
   return outvalueToVariantMatcher(reinterpret_cast<FuncType>(Func)(
@@ -482,7 +516,7 @@
                                        SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
-  typedef ReturnType (*FuncType)(ArgType1, ArgType2);
+  using FuncType = ReturnType (*)(ArgType1, ArgType2);
   CHECK_ARG_COUNT(2);
   CHECK_ARG_TYPE(0, ArgType1);
   CHECK_ARG_TYPE(1, ArgType2);
@@ -507,8 +541,8 @@
   }
 
 private:
-  typedef ast_matchers::internal::ArgumentAdaptingMatcherFunc<
-      ArgumentAdapterT, FromTypes, ToTypes> AdaptativeFunc;
+  using AdaptativeFunc = ast_matchers::internal::ArgumentAdaptingMatcherFunc<
+      ArgumentAdapterT, FromTypes, ToTypes>;
 
   /// \brief End case for the recursion
   static void collect(ast_matchers::internal::EmptyTypeList) {}
@@ -534,7 +568,7 @@
       : Overloads(std::make_move_iterator(Callbacks.begin()),
                   std::make_move_iterator(Callbacks.end())) {}
 
-  ~OverloadedMatcherDescriptor() override {}
+  ~OverloadedMatcherDescriptor() override = default;
 
   VariantMatcher create(SourceRange NameRange,
                         ArrayRef<ParserValue> Args,
@@ -604,7 +638,8 @@
 /// \brief Variadic operator marshaller function.
 class VariadicOperatorMatcherDescriptor : public MatcherDescriptor {
 public:
-  typedef DynTypedMatcher::VariadicOperator VarOp;
+  using VarOp = DynTypedMatcher::VariadicOperator;
+
   VariadicOperatorMatcherDescriptor(unsigned MinCount, unsigned MaxCount,
                                     VarOp Op, StringRef MatcherName)
       : MinCount(MinCount), MaxCount(MaxCount), Op(Op),
@@ -615,7 +650,9 @@
                         Diagnostics *Error) const override {
     if (Args.size() < MinCount || MaxCount < Args.size()) {
       const std::string MaxStr =
-          (MaxCount == UINT_MAX ? "" : Twine(MaxCount)).str();
+          (MaxCount == std::numeric_limits<unsigned>::max() ? ""
+                                                            : Twine(MaxCount))
+              .str();
       Error->addError(NameRange, Error->ET_RegistryWrongArgCount)
           << ("(" + Twine(MinCount) + ", " + MaxStr + ")") << Args.size();
       return VariantMatcher();
@@ -637,10 +674,12 @@
 
   bool isVariadic() const override { return true; }
   unsigned getNumArgs() const override { return 0; }
+
   void getArgKinds(ast_type_traits::ASTNodeKind ThisKind, unsigned ArgNo,
                    std::vector<ArgKind> &Kinds) const override {
     Kinds.push_back(ThisKind);
   }
+
   bool isConvertibleTo(ast_type_traits::ASTNodeKind Kind, unsigned *Specificity,
                        ast_type_traits::ASTNodeKind *LeastDerivedKind) const override {
     if (Specificity)
@@ -649,6 +688,7 @@
       *LeastDerivedKind = Kind;
     return true;
   }
+
   bool isPolymorphic() const override { return true; }
 
 private:
diff --git a/lib/ASTMatchers/Dynamic/Parser.cpp b/lib/ASTMatchers/Dynamic/Parser.cpp
index f5bd296..89e1a26 100644
--- a/lib/ASTMatchers/Dynamic/Parser.cpp
+++ b/lib/ASTMatchers/Dynamic/Parser.cpp
@@ -1,4 +1,4 @@
-//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
+//===- Parser.cpp - Matcher expression parser -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,11 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/ASTMatchers/Dynamic/Parser.h"
+#include "clang/ASTMatchers/ASTMatchersInternal.h"
+#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
 #include "clang/ASTMatchers/Dynamic/Registry.h"
 #include "clang/Basic/CharInfo.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include <algorithm>
+#include <cassert>
+#include <cerrno>
+#include <cstddef>
+#include <cstdlib>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace clang {
@@ -43,10 +53,10 @@
   /// \brief Some known identifiers.
   static const char* const ID_Bind;
 
-  TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
+  TokenInfo() = default;
 
   StringRef Text;
-  TokenKind Kind;
+  TokenKind Kind = TK_Eof;
   SourceRange Range;
   VariantValue Value;
 };
@@ -57,14 +67,13 @@
 class Parser::CodeTokenizer {
 public:
   explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
-      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
-        CodeCompletionLocation(nullptr) {
+      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
     NextToken = getNextToken();
   }
 
   CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
                 unsigned CodeCompletionOffset)
-      : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
+      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
         CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
     NextToken = getNextToken();
   }
@@ -138,7 +147,7 @@
       if (isAlphanumeric(Code[0])) {
         // Parse an identifier
         size_t TokenLength = 1;
-        while (1) {
+        while (true) {
           // A code completion location in/immediately after an identifier will
           // cause the portion of the identifier before the code completion
           // location to become a code completion token.
@@ -283,22 +292,22 @@
 
   StringRef Code;
   StringRef StartOfLine;
-  unsigned Line;
+  unsigned Line = 1;
   Diagnostics *Error;
   TokenInfo NextToken;
-  const char *CodeCompletionLocation;
+  const char *CodeCompletionLocation = nullptr;
 };
 
-Parser::Sema::~Sema() {}
+Parser::Sema::~Sema() = default;
 
 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
-  return std::vector<ArgKind>();
+  return {};
 }
 
 std::vector<MatcherCompletion>
 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
-  return std::vector<MatcherCompletion>();
+  return {};
 }
 
 struct Parser::ScopedContextEntry {
@@ -384,7 +393,7 @@
         EndToken = Tokenizer->consumeNextToken();
         break;
       }
-      if (Args.size() > 0) {
+      if (!Args.empty()) {
         // We must find a , token to continue.
         const TokenInfo CommaToken = Tokenizer->consumeNextToken();
         if (CommaToken.Kind != TokenInfo::TK_Comma) {
@@ -558,7 +567,7 @@
     : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
       NamedValues(NamedValues), Error(Error) {}
 
-Parser::RegistrySema::~RegistrySema() {}
+Parser::RegistrySema::~RegistrySema() = default;
 
 llvm::Optional<MatcherCtor>
 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
@@ -640,6 +649,6 @@
   return Result;
 }
 
-}  // namespace dynamic
-}  // namespace ast_matchers
-}  // namespace clang
+} // namespace dynamic
+} // namespace ast_matchers
+} // namespace clang
diff --git a/lib/ASTMatchers/Dynamic/Registry.cpp b/lib/ASTMatchers/Dynamic/Registry.cpp
index 4bd7a52..79aa4e6 100644
--- a/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -1,37 +1,49 @@
-//===--- Registry.cpp - Matcher registry -------------------------===//
+//===- Registry.cpp - Matcher registry ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===------------------------------------------------------------===//
-///
+//===----------------------------------------------------------------------===//
+//
 /// \file
 /// \brief Registry map populated at static initialization time.
-///
-//===------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
 
 #include "clang/ASTMatchers/Dynamic/Registry.h"
 #include "Marshallers.h"
+#include "clang/AST/ASTTypeTraits.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
+#include "clang/ASTMatchers/Dynamic/VariantValue.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <memory>
 #include <set>
+#include <string>
 #include <utility>
+#include <vector>
 
 using namespace clang::ast_type_traits;
 
 namespace clang {
 namespace ast_matchers {
 namespace dynamic {
+
 namespace {
 
 using internal::MatcherDescriptor;
 
-typedef llvm::StringMap<std::unique_ptr<const MatcherDescriptor>> ConstructorMap;
+using ConstructorMap = llvm::StringMap<std::unique_ptr<const MatcherDescriptor>>;
+
 class RegistryMaps {
 public:
   RegistryMaps();
@@ -46,6 +58,8 @@
   ConstructorMap Constructors;
 };
 
+} // namespace
+
 void RegistryMaps::registerMatcher(
     StringRef MatcherName, std::unique_ptr<MatcherDescriptor> Callback) {
   assert(Constructors.find(MatcherName) == Constructors.end());
@@ -74,7 +88,7 @@
         MATCHER_OVERLOAD_ENTRY(name, 0),                                       \
         MATCHER_OVERLOAD_ENTRY(name, 1)};                                      \
     REGISTER_MATCHER_OVERLOAD(name);                                           \
-  } while (0)
+  } while (false)
 
 /// \brief Generate a registry map with all the known matchers.
 RegistryMaps::RegistryMaps() {
@@ -196,6 +210,7 @@
   REGISTER_MATCHER(forEachArgumentWithParam);
   REGISTER_MATCHER(forEachConstructorInitializer);
   REGISTER_MATCHER(forEachDescendant);
+  REGISTER_MATCHER(forEachOverridden);
   REGISTER_MATCHER(forEachSwitchCase);
   REGISTER_MATCHER(forField);
   REGISTER_MATCHER(forFunction);
@@ -219,6 +234,7 @@
   REGISTER_MATCHER(hasAnyUsingShadowDecl);
   REGISTER_MATCHER(hasArgument);
   REGISTER_MATCHER(hasArgumentOfType);
+  REGISTER_MATCHER(hasArraySize);
   REGISTER_MATCHER(hasAttr);
   REGISTER_MATCHER(hasAutomaticStorageDuration);
   REGISTER_MATCHER(hasBase);
@@ -233,6 +249,8 @@
   REGISTER_MATCHER(hasDeclaration);
   REGISTER_MATCHER(hasDeclContext);
   REGISTER_MATCHER(hasDeducedType);
+  REGISTER_MATCHER(hasDefaultArgument);
+  REGISTER_MATCHER(hasDefinition);
   REGISTER_MATCHER(hasDescendant);
   REGISTER_MATCHER(hasDestinationType);
   REGISTER_MATCHER(hasDynamicExceptionSpec);
@@ -278,6 +296,7 @@
   REGISTER_MATCHER(hasTemplateArgument);
   REGISTER_MATCHER(hasThen);
   REGISTER_MATCHER(hasThreadStorageDuration);
+  REGISTER_MATCHER(hasTrailingReturn);
   REGISTER_MATCHER(hasTrueExpression);
   REGISTER_MATCHER(hasTypeLoc);
   REGISTER_MATCHER(hasUnaryOperand);
@@ -301,6 +320,7 @@
   REGISTER_MATCHER(isAnonymous);
   REGISTER_MATCHER(isAnyCharacter);
   REGISTER_MATCHER(isAnyPointer);
+  REGISTER_MATCHER(isArray);
   REGISTER_MATCHER(isArrow);
   REGISTER_MATCHER(isBaseInitializer);
   REGISTER_MATCHER(isBitField);
@@ -335,12 +355,14 @@
   REGISTER_MATCHER(isMemberInitializer);
   REGISTER_MATCHER(isMoveAssignmentOperator);
   REGISTER_MATCHER(isMoveConstructor);
+  REGISTER_MATCHER(isNoReturn);
   REGISTER_MATCHER(isNoThrow);
   REGISTER_MATCHER(isOverride);
   REGISTER_MATCHER(isPrivate);
   REGISTER_MATCHER(isProtected);
   REGISTER_MATCHER(isPublic);
   REGISTER_MATCHER(isPure);
+  REGISTER_MATCHER(isScoped);
   REGISTER_MATCHER(isSignedInteger);
   REGISTER_MATCHER(isStaticStorageClass);
   REGISTER_MATCHER(isStruct);
@@ -373,7 +395,10 @@
   REGISTER_MATCHER(nullStmt);
   REGISTER_MATCHER(numSelectorArgs);
   REGISTER_MATCHER(ofClass);
+  REGISTER_MATCHER(objcCatchStmt);
   REGISTER_MATCHER(objcCategoryDecl);
+  REGISTER_MATCHER(objcCategoryImplDecl);
+  REGISTER_MATCHER(objcFinallyStmt);
   REGISTER_MATCHER(objcImplementationDecl);
   REGISTER_MATCHER(objcInterfaceDecl);
   REGISTER_MATCHER(objcIvarDecl);
@@ -382,6 +407,8 @@
   REGISTER_MATCHER(objcObjectPointerType);
   REGISTER_MATCHER(objcPropertyDecl);
   REGISTER_MATCHER(objcProtocolDecl);
+  REGISTER_MATCHER(objcThrowStmt);
+  REGISTER_MATCHER(objcTryStmt);
   REGISTER_MATCHER(on);
   REGISTER_MATCHER(onImplicitObjectArgument);
   REGISTER_MATCHER(opaqueValueExpr);
@@ -452,12 +479,10 @@
   REGISTER_MATCHER(withInitializer);
 }
 
-RegistryMaps::~RegistryMaps() {}
+RegistryMaps::~RegistryMaps() = default;
 
 static llvm::ManagedStatic<RegistryMaps> RegistryData;
 
-} // anonymous namespace
-
 // static
 llvm::Optional<MatcherCtor> Registry::lookupMatcherCtor(StringRef MatcherName) {
   auto it = RegistryData->constructors().find(MatcherName);
@@ -466,10 +491,8 @@
              : it->second.get();
 }
 
-namespace {
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
-                              const std::set<ASTNodeKind> &KS) {
+static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                                     const std::set<ASTNodeKind> &KS) {
   unsigned Count = 0;
   for (std::set<ASTNodeKind>::const_iterator I = KS.begin(), E = KS.end();
        I != E; ++I) {
@@ -484,8 +507,6 @@
   return OS;
 }
 
-}  // namespace
-
 std::vector<ArgKind> Registry::getAcceptedCompletionTypes(
     ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
   ASTNodeKind InitialTypes[] = {
@@ -595,7 +616,6 @@
   return Completions;
 }
 
-// static
 VariantMatcher Registry::constructMatcher(MatcherCtor Ctor,
                                           SourceRange NameRange,
                                           ArrayRef<ParserValue> Args,
@@ -603,7 +623,6 @@
   return Ctor->create(NameRange, Args, Error);
 }
 
-// static
 VariantMatcher Registry::constructBoundMatcher(MatcherCtor Ctor,
                                                SourceRange NameRange,
                                                StringRef BindID,
@@ -623,6 +642,6 @@
   return VariantMatcher();
 }
 
-}  // namespace dynamic
-}  // namespace ast_matchers
-}  // namespace clang
+} // namespace dynamic
+} // namespace ast_matchers
+} // namespace clang
diff --git a/lib/Analysis/AnalysisDeclContext.cpp b/lib/Analysis/AnalysisDeclContext.cpp
index 73eb142..2b8259b 100644
--- a/lib/Analysis/AnalysisDeclContext.cpp
+++ b/lib/Analysis/AnalysisDeclContext.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Analysis/AnalysisDeclContext.h"
-#include "BodyFarm.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
@@ -23,6 +22,7 @@
 #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/Analyses/PseudoConstantAnalysis.h"
+#include "clang/Analysis/BodyFarm.h"
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/CFGStmtMap.h"
 #include "clang/Analysis/Support/BumpVector.h"
@@ -63,18 +63,14 @@
   cfgBuildOptions.forcedBlkExprs = &forcedBlkExprs;
 }
 
-AnalysisDeclContextManager::AnalysisDeclContextManager(bool useUnoptimizedCFG,
-                                                       bool addImplicitDtors,
-                                                       bool addInitializers,
-                                                       bool addTemporaryDtors,
-                                                       bool addLifetime,
-                                                       bool addLoopExit,
-                                                       bool synthesizeBodies,
-                                                       bool addStaticInitBranch,
-                                                       bool addCXXNewAllocator,
-                                                       CodeInjector *injector)
-  : Injector(injector), SynthesizeBodies(synthesizeBodies)
-{
+AnalysisDeclContextManager::AnalysisDeclContextManager(
+    ASTContext &ASTCtx, bool useUnoptimizedCFG, bool addImplicitDtors,
+    bool addInitializers, bool addTemporaryDtors, bool addLifetime,
+    bool addLoopExit, bool synthesizeBodies, bool addStaticInitBranch,
+    bool addCXXNewAllocator, bool addRichCXXConstructors,
+    CodeInjector *injector)
+    : Injector(injector), FunctionBodyFarm(ASTCtx, injector),
+      SynthesizeBodies(synthesizeBodies) {
   cfgBuildOptions.PruneTriviallyFalseEdges = !useUnoptimizedCFG;
   cfgBuildOptions.AddImplicitDtors = addImplicitDtors;
   cfgBuildOptions.AddInitializers = addInitializers;
@@ -83,15 +79,11 @@
   cfgBuildOptions.AddLoopExit = addLoopExit;
   cfgBuildOptions.AddStaticInitBranches = addStaticInitBranch;
   cfgBuildOptions.AddCXXNewAllocator = addCXXNewAllocator;
+  cfgBuildOptions.AddRichCXXConstructors = addRichCXXConstructors;
 }
 
 void AnalysisDeclContextManager::clear() { Contexts.clear(); }
 
-static BodyFarm &getBodyFarm(ASTContext &C, CodeInjector *injector = nullptr) {
-  static BodyFarm *BF = new BodyFarm(C, injector);
-  return *BF;
-}
-
 Stmt *AnalysisDeclContext::getBody(bool &IsAutosynthesized) const {
   IsAutosynthesized = false;
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
@@ -99,8 +91,7 @@
     if (auto *CoroBody = dyn_cast_or_null<CoroutineBodyStmt>(Body))
       Body = CoroBody->getBody();
     if (Manager && Manager->synthesizeBodies()) {
-      Stmt *SynthesizedBody =
-          getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(FD);
+      Stmt *SynthesizedBody = Manager->getBodyFarm().getBody(FD);
       if (SynthesizedBody) {
         Body = SynthesizedBody;
         IsAutosynthesized = true;
@@ -111,8 +102,7 @@
   else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
     Stmt *Body = MD->getBody();
     if (Manager && Manager->synthesizeBodies()) {
-      Stmt *SynthesizedBody =
-          getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(MD);
+      Stmt *SynthesizedBody = Manager->getBodyFarm().getBody(MD);
       if (SynthesizedBody) {
         Body = SynthesizedBody;
         IsAutosynthesized = true;
@@ -317,6 +307,8 @@
   return AC.get();
 }
 
+BodyFarm &AnalysisDeclContextManager::getBodyFarm() { return FunctionBodyFarm; }
+
 const StackFrameContext *
 AnalysisDeclContext::getStackFrame(LocationContext const *Parent, const Stmt *S,
                                const CFGBlock *Blk, unsigned Idx) {
@@ -473,28 +465,54 @@
   return false;
 }
 
-void LocationContext::dumpStack(raw_ostream &OS, StringRef Indent) const {
+static void printLocation(raw_ostream &OS, const SourceManager &SM,
+                          SourceLocation SLoc) {
+  if (SLoc.isFileID() && SM.isInMainFile(SLoc))
+    OS << "line " << SM.getExpansionLineNumber(SLoc);
+  else
+    SLoc.print(OS, SM);
+}
+
+void LocationContext::dumpStack(
+    raw_ostream &OS, StringRef Indent, const char *NL, const char *Sep,
+    std::function<void(const LocationContext *)> printMoreInfoPerContext) const {
   ASTContext &Ctx = getAnalysisDeclContext()->getASTContext();
   PrintingPolicy PP(Ctx.getLangOpts());
   PP.TerseOutput = 1;
 
+  const SourceManager &SM =
+      getAnalysisDeclContext()->getASTContext().getSourceManager();
+
   unsigned Frame = 0;
   for (const LocationContext *LCtx = this; LCtx; LCtx = LCtx->getParent()) {
+
     switch (LCtx->getKind()) {
     case StackFrame:
-      OS << Indent << '#' << Frame++ << ' ';
-      cast<StackFrameContext>(LCtx)->getDecl()->print(OS, PP);
-      OS << '\n';
+      OS << Indent << '#' << Frame << ' ';
+      ++Frame;
+      if (const NamedDecl *D = dyn_cast<NamedDecl>(LCtx->getDecl()))
+        OS << "Calling " << D->getQualifiedNameAsString();
+      else
+        OS << "Calling anonymous code";
+      if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) {
+        OS << " at ";
+        printLocation(OS, SM, S->getLocStart());
+      }
       break;
     case Scope:
-      OS << Indent << "    (scope)\n";
+      OS << "Entering scope";
       break;
     case Block:
-      OS << Indent << "    (block context: "
-                   << cast<BlockInvocationContext>(LCtx)->getContextData()
-                   << ")\n";
+      OS << "Invoking block";
+      if (const Decl *D = cast<BlockInvocationContext>(LCtx)->getDecl()) {
+        OS << " defined at ";
+        printLocation(OS, SM, D->getLocStart());
+      }
       break;
     }
+    OS << NL;
+
+    printMoreInfoPerContext(LCtx);
   }
 }
 
@@ -610,8 +628,6 @@
   }
 }
 
-AnalysisDeclContextManager::~AnalysisDeclContextManager() {}
-
 LocationContext::~LocationContext() {}
 
 LocationContextManager::~LocationContextManager() {
diff --git a/lib/Analysis/BodyFarm.cpp b/lib/Analysis/BodyFarm.cpp
index 9568ec7..f4bea4a 100644
--- a/lib/Analysis/BodyFarm.cpp
+++ b/lib/Analysis/BodyFarm.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BodyFarm.h"
+#include "clang/Analysis/BodyFarm.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/Decl.h"
@@ -96,8 +96,8 @@
   /// Create a Return statement.
   ReturnStmt *makeReturn(const Expr *RetVal);
   
-  /// Create an integer literal.
-  IntegerLiteral *makeIntegerLiteral(uint64_t value);
+  /// Create an integer literal expression of the given type.
+  IntegerLiteral *makeIntegerLiteral(uint64_t Value, QualType Ty);
 
   /// Create a member expression.
   MemberExpr *makeMemberExpression(Expr *base, ValueDecl *MemberDecl,
@@ -133,7 +133,7 @@
 }
 
 CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) {
-  return new (C) CompoundStmt(C, Stmts, SourceLocation(), SourceLocation());
+  return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation());
 }
 
 DeclRefExpr *ASTMaker::makeDeclRefExpr(
@@ -149,7 +149,8 @@
 
 UnaryOperator *ASTMaker::makeDereference(const Expr *Arg, QualType Ty) {
   return new (C) UnaryOperator(const_cast<Expr*>(Arg), UO_Deref, Ty,
-                               VK_LValue, OK_Ordinary, SourceLocation());
+                               VK_LValue, OK_Ordinary, SourceLocation(),
+                              /*CanOverflow*/ false);
 }
 
 ImplicitCastExpr *ASTMaker::makeLvalueToRvalue(const Expr *Arg, QualType Ty) {
@@ -168,10 +169,10 @@
 ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty,
                                              CastKind CK) {
   return ImplicitCastExpr::Create(C, Ty,
-                                  /* CastKind= */ CK,
-                                  /* Expr= */ const_cast<Expr *>(Arg),
-                                  /* CXXCastPath= */ nullptr,
-                                  /* ExprValueKind= */ VK_RValue);
+                                  /* CastKind=*/ CK,
+                                  /* Expr=*/ const_cast<Expr *>(Arg),
+                                  /* CXXCastPath=*/ nullptr,
+                                  /* ExprValueKind=*/ VK_RValue);
 }
 
 Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) {
@@ -206,11 +207,9 @@
                             nullptr);
 }
 
-IntegerLiteral *ASTMaker::makeIntegerLiteral(uint64_t value) {
-  return IntegerLiteral::Create(C,
-                                llvm::APInt(
-                                    /*numBits=*/C.getTypeSize(C.IntTy), value),
-                                /*QualType=*/C.IntTy, SourceLocation());
+IntegerLiteral *ASTMaker::makeIntegerLiteral(uint64_t Value, QualType Ty) {
+  llvm::APInt APValue = llvm::APInt(C.getTypeSize(Ty), Value);
+  return IntegerLiteral::Create(C, APValue, Ty, SourceLocation());
 }
 
 MemberExpr *ASTMaker::makeMemberExpression(Expr *base, ValueDecl *MemberDecl,
@@ -222,7 +221,7 @@
       C, base, IsArrow, SourceLocation(), NestedNameSpecifierLoc(),
       SourceLocation(), MemberDecl, FoundDecl,
       DeclarationNameInfo(MemberDecl->getDeclName(), SourceLocation()),
-      /* TemplateArgumentListInfo= */ nullptr, MemberDecl->getType(), ValueKind,
+      /* TemplateArgumentListInfo=*/ nullptr, MemberDecl->getType(), ValueKind,
       OK_Ordinary);
 }
 
@@ -231,7 +230,7 @@
   CXXBasePaths Paths(
       /* FindAmbiguities=*/false,
       /* RecordPaths=*/false,
-      /* DetectVirtual= */ false);
+      /* DetectVirtual=*/ false);
   const IdentifierInfo &II = C.Idents.get(Name);
   DeclarationName DeclName = C.DeclarationNames.getIdentifier(&II);
 
@@ -253,36 +252,43 @@
                                                const ParmVarDecl *Callback,
                                                ArrayRef<Expr *> CallArgs) {
 
-  return new (C) CallExpr(
-      /*ASTContext=*/C,
-      /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Callback),
-      /*args=*/CallArgs,
-      /*QualType=*/C.VoidTy,
-      /*ExprValueType=*/VK_RValue,
-      /*SourceLocation=*/SourceLocation());
+  QualType Ty = Callback->getType();
+  DeclRefExpr *Call = M.makeDeclRefExpr(Callback);
+  CastKind CK;
+  if (Ty->isRValueReferenceType()) {
+    CK = CK_LValueToRValue;
+  } else {
+    assert(Ty->isLValueReferenceType());
+    CK = CK_FunctionToPointerDecay;
+    Ty = C.getPointerType(Ty.getNonReferenceType());
+  }
+
+  return new (C)
+      CallExpr(C, M.makeImplicitCast(Call, Ty.getNonReferenceType(), CK),
+               /*args=*/CallArgs,
+               /*QualType=*/C.VoidTy,
+               /*ExprValueType=*/VK_RValue,
+               /*SourceLocation=*/SourceLocation());
 }
 
 static CallExpr *create_call_once_lambda_call(ASTContext &C, ASTMaker M,
                                               const ParmVarDecl *Callback,
-                                              QualType CallbackType,
+                                              CXXRecordDecl *CallbackDecl,
                                               ArrayRef<Expr *> CallArgs) {
-
-  CXXRecordDecl *CallbackDecl = CallbackType->getAsCXXRecordDecl();
-
   assert(CallbackDecl != nullptr);
   assert(CallbackDecl->isLambda());
   FunctionDecl *callOperatorDecl = CallbackDecl->getLambdaCallOperator();
   assert(callOperatorDecl != nullptr);
 
   DeclRefExpr *callOperatorDeclRef =
-      DeclRefExpr::Create(/* Ctx = */ C,
-                          /* QualifierLoc = */ NestedNameSpecifierLoc(),
-                          /* TemplateKWLoc = */ SourceLocation(),
+      DeclRefExpr::Create(/* Ctx =*/ C,
+                          /* QualifierLoc =*/ NestedNameSpecifierLoc(),
+                          /* TemplateKWLoc =*/ SourceLocation(),
                           const_cast<FunctionDecl *>(callOperatorDecl),
-                          /* RefersToEnclosingVariableOrCapture= */ false,
-                          /* NameLoc = */ SourceLocation(),
-                          /* T = */ callOperatorDecl->getType(),
-                          /* VK = */ VK_LValue);
+                          /* RefersToEnclosingVariableOrCapture=*/ false,
+                          /* NameLoc =*/ SourceLocation(),
+                          /* T =*/ callOperatorDecl->getType(),
+                          /* VK =*/ VK_LValue);
 
   return new (C)
       CXXOperatorCallExpr(/*AstContext=*/C, OO_Call, callOperatorDeclRef,
@@ -318,7 +324,20 @@
 
   const ParmVarDecl *Flag = D->getParamDecl(0);
   const ParmVarDecl *Callback = D->getParamDecl(1);
+
+  if (!Callback->getType()->isReferenceType()) {
+    llvm::dbgs() << "libcxx03 std::call_once implementation, skipping.\n";
+    return nullptr;
+  }
+  if (!Flag->getType()->isReferenceType()) {
+    llvm::dbgs() << "unknown std::call_once implementation, skipping.\n";
+    return nullptr;
+  }
+
   QualType CallbackType = Callback->getType().getNonReferenceType();
+
+  // Nullable pointer, non-null iff function is a CXXRecordDecl.
+  CXXRecordDecl *CallbackRecordDecl = CallbackType->getAsCXXRecordDecl();
   QualType FlagType = Flag->getType().getNonReferenceType();
   auto *FlagRecordDecl = dyn_cast_or_null<RecordDecl>(FlagType->getAsTagDecl());
 
@@ -336,40 +355,80 @@
   // Otherwise, try libstdc++ implementation, with a field
   // `_M_once`
   if (!FlagFieldDecl) {
-    DEBUG(llvm::dbgs() << "No field __state_ found on std::once_flag struct, "
-                       << "assuming libstdc++ implementation\n");
     FlagFieldDecl = M.findMemberField(FlagRecordDecl, "_M_once");
   }
 
   if (!FlagFieldDecl) {
-    DEBUG(llvm::dbgs() << "No field _M_once found on std::once flag struct: "
-                       << "unknown std::call_once implementation, "
-                       << "ignoring the call");
+    DEBUG(llvm::dbgs() << "No field _M_once or __state_ found on "
+                       << "std::once_flag struct: unknown std::call_once "
+                       << "implementation, ignoring the call.");
     return nullptr;
   }
 
-  bool isLambdaCall = CallbackType->getAsCXXRecordDecl() &&
-                      CallbackType->getAsCXXRecordDecl()->isLambda();
+  bool isLambdaCall = CallbackRecordDecl && CallbackRecordDecl->isLambda();
+  if (CallbackRecordDecl && !isLambdaCall) {
+    DEBUG(llvm::dbgs() << "Not supported: synthesizing body for functors when "
+                       << "body farming std::call_once, ignoring the call.");
+    return nullptr;
+  }
 
   SmallVector<Expr *, 5> CallArgs;
+  const FunctionProtoType *CallbackFunctionType;
+  if (isLambdaCall) {
 
-  if (isLambdaCall)
     // Lambda requires callback itself inserted as a first parameter.
     CallArgs.push_back(
         M.makeDeclRefExpr(Callback,
-                          /* RefersToEnclosingVariableOrCapture= */ true));
+                          /* RefersToEnclosingVariableOrCapture=*/ true));
+    CallbackFunctionType = CallbackRecordDecl->getLambdaCallOperator()
+                               ->getType()
+                               ->getAs<FunctionProtoType>();
+  } else if (!CallbackType->getPointeeType().isNull()) {
+    CallbackFunctionType =
+        CallbackType->getPointeeType()->getAs<FunctionProtoType>();
+  } else {
+    CallbackFunctionType = CallbackType->getAs<FunctionProtoType>();
+  }
 
-  // All arguments past first two ones are passed to the callback.
-  for (unsigned int i = 2; i < D->getNumParams(); i++)
-    CallArgs.push_back(
-        M.makeLvalueToRvalue(D->getParamDecl(i),
-                             /* RefersToEnclosingVariableOrCapture= */ false));
+  if (!CallbackFunctionType)
+    return nullptr;
+
+  // First two arguments are used for the flag and for the callback.
+  if (D->getNumParams() != CallbackFunctionType->getNumParams() + 2) {
+    DEBUG(llvm::dbgs() << "Types of params of the callback do not match "
+                       << "params passed to std::call_once, "
+                       << "ignoring the call\n");
+    return nullptr;
+  }
+
+  // All arguments past first two ones are passed to the callback,
+  // and we turn lvalues into rvalues if the argument is not passed by
+  // reference.
+  for (unsigned int ParamIdx = 2; ParamIdx < D->getNumParams(); ParamIdx++) {
+    const ParmVarDecl *PDecl = D->getParamDecl(ParamIdx);
+    if (PDecl &&
+        CallbackFunctionType->getParamType(ParamIdx - 2)
+                .getNonReferenceType()
+                .getCanonicalType() !=
+            PDecl->getType().getNonReferenceType().getCanonicalType()) {
+      DEBUG(llvm::dbgs() << "Types of params of the callback do not match "
+                         << "params passed to std::call_once, "
+                         << "ignoring the call\n");
+      return nullptr;
+    }
+    Expr *ParamExpr = M.makeDeclRefExpr(PDecl);
+    if (!CallbackFunctionType->getParamType(ParamIdx - 2)->isReferenceType()) {
+      QualType PTy = PDecl->getType().getNonReferenceType();
+      ParamExpr = M.makeLvalueToRvalue(ParamExpr, PTy);
+    }
+    CallArgs.push_back(ParamExpr);
+  }
 
   CallExpr *CallbackCall;
   if (isLambdaCall) {
 
-    CallbackCall =
-        create_call_once_lambda_call(C, M, Callback, CallbackType, CallArgs);
+    CallbackCall = create_call_once_lambda_call(C, M, Callback,
+                                                CallbackRecordDecl, CallArgs);
   } else {
 
     // Function pointer case.
@@ -387,25 +446,27 @@
 
   // Negation predicate.
   UnaryOperator *FlagCheck = new (C) UnaryOperator(
-      /* input= */
+      /* input=*/
       M.makeImplicitCast(M.makeLvalueToRvalue(Deref, DerefType), DerefType,
                          CK_IntegralToBoolean),
-      /* opc= */ UO_LNot,
-      /* QualType= */ C.IntTy,
-      /* ExprValueKind= */ VK_RValue,
-      /* ExprObjectKind= */ OK_Ordinary, SourceLocation());
+      /* opc=*/ UO_LNot,
+      /* QualType=*/ C.IntTy,
+      /* ExprValueKind=*/ VK_RValue,
+      /* ExprObjectKind=*/ OK_Ordinary, SourceLocation(),
+      /* CanOverflow*/ false);
 
   // Create assignment.
   BinaryOperator *FlagAssignment = M.makeAssignment(
-      Deref, M.makeIntegralCast(M.makeIntegerLiteral(1), DerefType), DerefType);
+      Deref, M.makeIntegralCast(M.makeIntegerLiteral(1, C.IntTy), DerefType),
+      DerefType);
 
   IfStmt *Out = new (C)
       IfStmt(C, SourceLocation(),
-             /* IsConstexpr= */ false,
-             /* init= */ nullptr,
-             /* var= */ nullptr,
-             /* cond= */ FlagCheck,
-             /* then= */ M.makeCompound({CallbackCall, FlagAssignment}));
+             /* IsConstexpr=*/ false,
+             /* init=*/ nullptr,
+             /* var=*/ nullptr,
+             /* cond=*/ FlagCheck,
+             /* then=*/ M.makeCompound({CallbackCall, FlagAssignment}));
 
   return Out;
 }
@@ -436,8 +497,8 @@
   // sets it, and calls the block.  Basically, an AST dump of:
   //
   // void dispatch_once(dispatch_once_t *predicate, dispatch_block_t block) {
-  //  if (!*predicate) {
-  //    *predicate = 1;
+  //  if (*predicate != ~0l) {
+  //    *predicate = ~0l;
   //    block();
   //  }
   // }
@@ -454,7 +515,10 @@
       /*SourceLocation=*/SourceLocation());
 
   // (2) Create the assignment to the predicate.
-  IntegerLiteral *IL = M.makeIntegerLiteral(1);
+  Expr *DoneValue =
+      new (C) UnaryOperator(M.makeIntegerLiteral(0, C.LongTy), UO_Not, C.LongTy,
+                            VK_RValue, OK_Ordinary, SourceLocation(),
+                            /*CanOverflow*/false);
 
   BinaryOperator *B =
     M.makeAssignment(
@@ -462,7 +526,7 @@
           M.makeLvalueToRvalue(
             M.makeDeclRefExpr(Predicate), PredicateQPtrTy),
             PredicateTy),
-       M.makeIntegralCast(IL, PredicateTy),
+       M.makeIntegralCast(DoneValue, PredicateTy),
        PredicateTy);
   
   // (3) Create the compound statement.
@@ -478,21 +542,15 @@
           PredicateQPtrTy),
         PredicateTy),
     PredicateTy);
-  
-  UnaryOperator *UO = new (C) UnaryOperator(
-      /* input= */ LValToRval,
-      /* opc= */ UO_LNot,
-      /* QualType= */ C.IntTy,
-      /* ExprValueKind= */ VK_RValue,
-      /* ExprObjectKind= */ OK_Ordinary, SourceLocation());
-  
+
+  Expr *GuardCondition = M.makeComparison(LValToRval, DoneValue, BO_NE);
   // (5) Create the 'if' statement.
   IfStmt *If = new (C) IfStmt(C, SourceLocation(),
-                              /* IsConstexpr= */ false,
-                              /* init= */ nullptr,
-                              /* var= */ nullptr,
-                              /* cond= */ UO,
-                              /* then= */ CS);
+                              /* IsConstexpr=*/ false,
+                              /* init=*/ nullptr,
+                              /* var=*/ nullptr,
+                              /* cond=*/ GuardCondition,
+                              /* then=*/ CS);
   return If;
 }
 
@@ -768,4 +826,3 @@
 
   return Val.getValue();
 }
-
diff --git a/lib/Analysis/BodyFarm.h b/lib/Analysis/BodyFarm.h
deleted file mode 100644
index edbe996..0000000
--- a/lib/Analysis/BodyFarm.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//== BodyFarm.h - Factory for conjuring up fake bodies -------------*- C++ -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// BodyFarm is a factory for creating faux implementations for functions/methods
-// for analysis purposes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
-#define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
-
-#include "clang/AST/DeclBase.h"
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-
-namespace clang {
-
-class ASTContext;
-class FunctionDecl;
-class ObjCMethodDecl;
-class ObjCPropertyDecl;
-class Stmt;
-class CodeInjector;
-  
-class BodyFarm {
-public:
-  BodyFarm(ASTContext &C, CodeInjector *injector) : C(C), Injector(injector) {}
-  
-  /// Factory method for creating bodies for ordinary functions.
-  Stmt *getBody(const FunctionDecl *D);
-
-  /// Factory method for creating bodies for Objective-C properties.
-  Stmt *getBody(const ObjCMethodDecl *D);
-
-private:
-  typedef llvm::DenseMap<const Decl *, Optional<Stmt *> > BodyMap;
-
-  ASTContext &C;
-  BodyMap Bodies;
-  CodeInjector *Injector;
-};
-}
-
-#endif
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 164d964..ae18278 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -1,4 +1,4 @@
-//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===//
+//===- CFG.cpp - Classes for representing and building CFGs ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,23 +15,54 @@
 #include "clang/Analysis/CFG.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
-#include "clang/AST/CharUnits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclGroup.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/Support/BumpVector.h"
+#include "clang/Analysis/ConstructionContext.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/ExceptionSpecificationType.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include <memory>
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 
-namespace {
-
 static SourceLocation GetEndLoc(Decl *D) {
   if (VarDecl *VD = dyn_cast<VarDecl>(D))
     if (Expr *Ex = VD->getInit())
@@ -41,7 +72,7 @@
 
 /// Helper for tryNormalizeBinaryOperator. Attempts to extract an IntegerLiteral
 /// or EnumConstantDecl from the given Expr. If it fails, returns nullptr.
-const Expr *tryTransformToIntOrEnumConstant(const Expr *E) {
+static const Expr *tryTransformToIntOrEnumConstant(const Expr *E) {
   E = E->IgnoreParens();
   if (isa<IntegerLiteral>(E))
     return E;
@@ -111,6 +142,8 @@
   return DC1 == DC2;
 }
 
+namespace {
+
 class CFGBuilder;
   
 /// The CFG builder uses a recursive algorithm to build the CFG.  When
@@ -125,7 +158,6 @@
 ///  contextual information.  If AddStmtChoice is 'NotAlwaysAdd', then
 ///  the builder has an option not to add a subexpression as a
 ///  block-level expression.
-///
 class AddStmtChoice {
 public:
   enum Kind { NotAlwaysAdd = 0, AlwaysAdd = 1 };
@@ -168,23 +200,24 @@
 ///
 class LocalScope {
 public:
-  typedef BumpVector<VarDecl*> AutomaticVarsTy;
+  friend class const_iterator;
+
+  using AutomaticVarsTy = BumpVector<VarDecl *>;
 
   /// const_iterator - Iterates local scope backwards and jumps to previous
   /// scope on reaching the beginning of currently iterated scope.
   class const_iterator {
-    const LocalScope* Scope;
+    const LocalScope* Scope = nullptr;
 
     /// VarIter is guaranteed to be greater then 0 for every valid iterator.
     /// Invalid iterator (with null Scope) has VarIter equal to 0.
-    unsigned VarIter;
+    unsigned VarIter = 0;
 
   public:
     /// Create invalid iterator. Dereferencing invalid iterator is not allowed.
     /// Incrementing invalid iterator is allowed and will result in invalid
     /// iterator.
-    const_iterator()
-        : Scope(nullptr), VarIter(0) {}
+    const_iterator() = default;
 
     /// Create valid iterator. In case when S.Prev is an invalid iterator and
     /// I is equal to 0, this will create invalid iterator.
@@ -197,8 +230,8 @@
     }
 
     VarDecl *const* operator->() const {
-      assert (Scope && "Dereferencing invalid iterator is not allowed");
-      assert (VarIter != 0 && "Iterator has invalid value of VarIter member");
+      assert(Scope && "Dereferencing invalid iterator is not allowed");
+      assert(VarIter != 0 && "Iterator has invalid value of VarIter member");
       return &Scope->Vars[VarIter - 1];
     }
     VarDecl *operator*() const {
@@ -209,7 +242,7 @@
       if (!Scope)
         return *this;
 
-      assert (VarIter != 0 && "Iterator has invalid value of VarIter member");
+      assert(VarIter != 0 && "Iterator has invalid value of VarIter member");
       --VarIter;
       if (VarIter == 0)
         *this = Scope->Prev;
@@ -236,13 +269,12 @@
     const_iterator shared_parent(const_iterator L);
   };
 
-  friend class const_iterator;
-
 private:
   BumpVectorContext ctx;
   
   /// Automatic variables in order of declaration.
   AutomaticVarsTy Vars;
+
   /// Iterator to variable in previous scope that was declared just before
   /// begin of this scope.
   const_iterator Prev;
@@ -260,6 +292,8 @@
   }
 };
 
+} // namespace
+
 /// distance - Calculates distance from this to L. L must be reachable from this
 /// (with use of ++ operator). Cost of calculating the distance is linear w.r.t.
 /// number of scopes between this and L.
@@ -267,8 +301,8 @@
   int D = 0;
   const_iterator F = *this;
   while (F.Scope != L.Scope) {
-    assert (F != const_iterator()
-        && "L iterator is not reachable from F iterator.");
+    assert(F != const_iterator() &&
+           "L iterator is not reachable from F iterator.");
     D += F.VarIter;
     F = F.Scope->Prev;
   }
@@ -300,16 +334,18 @@
   }
 }
 
+namespace {
+
 /// Structure for specifying position in CFG during its build process. It
 /// consists of CFGBlock that specifies position in CFG and
 /// LocalScope::const_iterator that specifies position in LocalScope graph.
 struct BlockScopePosPair {
-  BlockScopePosPair() : block(nullptr) {}
+  CFGBlock *block = nullptr;
+  LocalScope::const_iterator scopePosition;
+
+  BlockScopePosPair() = default;
   BlockScopePosPair(CFGBlock *b, LocalScope::const_iterator scopePos)
       : block(b), scopePosition(scopePos) {}
-
-  CFGBlock *block;
-  LocalScope::const_iterator scopePosition;
 };
 
 /// TryResult - a class representing a variant over the values
@@ -317,37 +353,46 @@
 ///  and is used by the CFGBuilder to decide if a branch condition
 ///  can be decided up front during CFG construction.
 class TryResult {
-  int X;
+  int X = -1;
+
 public:
+  TryResult() = default;
   TryResult(bool b) : X(b ? 1 : 0) {}
-  TryResult() : X(-1) {}
   
   bool isTrue() const { return X == 1; }
   bool isFalse() const { return X == 0; }
   bool isKnown() const { return X >= 0; }
+
   void negate() {
     assert(isKnown());
     X ^= 0x1;
   }
 };
 
-TryResult bothKnownTrue(TryResult R1, TryResult R2) {
+} // namespace
+
+static TryResult bothKnownTrue(TryResult R1, TryResult R2) {
   if (!R1.isKnown() || !R2.isKnown())
     return TryResult();
   return TryResult(R1.isTrue() && R2.isTrue());
 }
 
+namespace {
+
 class reverse_children {
   llvm::SmallVector<Stmt *, 12> childrenBuf;
-  ArrayRef<Stmt*> children;
+  ArrayRef<Stmt *> children;
+
 public:
   reverse_children(Stmt *S);
 
-  typedef ArrayRef<Stmt*>::reverse_iterator iterator;
+  using iterator = ArrayRef<Stmt *>::reverse_iterator;
+
   iterator begin() const { return children.rbegin(); }
   iterator end() const { return children.rend(); }
 };
 
+} // namespace
 
 reverse_children::reverse_children(Stmt *S) {
   if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
@@ -374,6 +419,8 @@
   children = childrenBuf;
 }
 
+namespace {
+
 /// CFGBuilder - This class implements CFG construction from an AST.
 ///   The builder is stateful: an instance of the builder should be used to only
 ///   construct a single CFG.
@@ -387,65 +434,72 @@
 ///  the AST in reverse order so that the successor of a basic block is
 ///  constructed prior to its predecessor.  This allows us to nicely capture
 ///  implicit fall-throughs without extra basic blocks.
-///
 class CFGBuilder {
-  typedef BlockScopePosPair JumpTarget;
-  typedef BlockScopePosPair JumpSource;
+  using JumpTarget = BlockScopePosPair;
+  using JumpSource = BlockScopePosPair;
 
   ASTContext *Context;
   std::unique_ptr<CFG> cfg;
 
-  CFGBlock *Block;  // Current block.
-  CFGBlock *Succ;  // Block after the current block.
+  // Current block.
+  CFGBlock *Block = nullptr;
+
+  // Block after the current block.
+  CFGBlock *Succ = nullptr;
+
   JumpTarget ContinueJumpTarget;
   JumpTarget BreakJumpTarget;
   JumpTarget SEHLeaveJumpTarget;
-  CFGBlock *SwitchTerminatedBlock;
-  CFGBlock *DefaultCaseBlock;
+  CFGBlock *SwitchTerminatedBlock = nullptr;
+  CFGBlock *DefaultCaseBlock = nullptr;
 
   // This can point either to a try or a __try block. The frontend forbids
   // mixing both kinds in one function, so having one for both is enough.
-  CFGBlock *TryTerminatedBlock;
+  CFGBlock *TryTerminatedBlock = nullptr;
 
   // Current position in local scope.
   LocalScope::const_iterator ScopePos;
 
   // LabelMap records the mapping from Label expressions to their jump targets.
-  typedef llvm::DenseMap<LabelDecl*, JumpTarget> LabelMapTy;
+  using LabelMapTy = llvm::DenseMap<LabelDecl *, JumpTarget>;
   LabelMapTy LabelMap;
 
   // A list of blocks that end with a "goto" that must be backpatched to their
   // resolved targets upon completion of CFG construction.
-  typedef std::vector<JumpSource> BackpatchBlocksTy;
+  using BackpatchBlocksTy = std::vector<JumpSource>;
   BackpatchBlocksTy BackpatchBlocks;
 
   // A list of labels whose address has been taken (for indirect gotos).
-  typedef llvm::SmallPtrSet<LabelDecl*, 5> LabelSetTy;
+  using LabelSetTy = llvm::SmallSetVector<LabelDecl *, 8>;
   LabelSetTy AddressTakenLabels;
 
-  bool badCFG;
+  // Information about the currently visited C++ object construction site.
+  // This is set in the construction trigger and read when the constructor
+  // itself is being visited.
+  llvm::DenseMap<CXXConstructExpr *, const ConstructionContextLayer *>
+      ConstructionContextMap;
+
+  bool badCFG = false;
   const CFG::BuildOptions &BuildOpts;
   
   // State to track for building switch statements.
-  bool switchExclusivelyCovered;
-  Expr::EvalResult *switchCond;
+  bool switchExclusivelyCovered = false;
+  Expr::EvalResult *switchCond = nullptr;
   
-  CFG::BuildOptions::ForcedBlkExprs::value_type *cachedEntry;
-  const Stmt *lastLookup;
+  CFG::BuildOptions::ForcedBlkExprs::value_type *cachedEntry = nullptr;
+  const Stmt *lastLookup = nullptr;
 
   // Caches boolean evaluations of expressions to avoid multiple re-evaluations
   // during construction of branches for chained logical operators.
-  typedef llvm::DenseMap<Expr *, TryResult> CachedBoolEvalsTy;
+  using CachedBoolEvalsTy = llvm::DenseMap<Expr *, TryResult>;
   CachedBoolEvalsTy CachedBoolEvals;
 
 public:
   explicit CFGBuilder(ASTContext *astContext,
                       const CFG::BuildOptions &buildOpts)
       : Context(astContext), cfg(new CFG()), // crew a new CFG
-        Block(nullptr), Succ(nullptr), SwitchTerminatedBlock(nullptr),
-        DefaultCaseBlock(nullptr), TryTerminatedBlock(nullptr), badCFG(false),
-        BuildOpts(buildOpts), switchExclusivelyCovered(false),
-        switchCond(nullptr), cachedEntry(nullptr), lastLookup(nullptr) {}
+        ConstructionContextMap(), BuildOpts(buildOpts) {}
+
 
   // buildCFG - Used by external clients to construct the CFG.
   std::unique_ptr<CFG> buildCFG(const Decl *D, Stmt *Statement);
@@ -495,6 +549,8 @@
                                                          Stmt *Term,
                                                          CFGBlock *TrueBlock,
                                                          CFGBlock *FalseBlock);
+  CFGBlock *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE,
+                                          AddStmtChoice asc);
   CFGBlock *VisitMemberExpr(MemberExpr *M, AddStmtChoice asc);
   CFGBlock *VisitObjCAtCatchStmt(ObjCAtCatchStmt *S);
   CFGBlock *VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S);
@@ -549,13 +605,9 @@
   ///     if the CXXBindTemporaryExpr was marked executed, and otherwise
   ///     branches to the stored successor.
   struct TempDtorContext {
-    TempDtorContext()
-        : IsConditional(false), KnownExecuted(true), Succ(nullptr),
-          TerminatorExpr(nullptr) {}
-
+    TempDtorContext() = default;
     TempDtorContext(TryResult KnownExecuted)
-        : IsConditional(true), KnownExecuted(KnownExecuted), Succ(nullptr),
-          TerminatorExpr(nullptr) {}
+        : IsConditional(true), KnownExecuted(KnownExecuted) {}
 
     /// Returns whether we need to start a new branch for a temporary destructor
     /// call. This is the case when the temporary destructor is
@@ -574,10 +626,10 @@
       TerminatorExpr = E;
     }
 
-    const bool IsConditional;
-    const TryResult KnownExecuted;
-    CFGBlock *Succ;
-    CXXBindTemporaryExpr *TerminatorExpr;
+    const bool IsConditional = false;
+    const TryResult KnownExecuted = true;
+    CFGBlock *Succ = nullptr;
+    CXXBindTemporaryExpr *TerminatorExpr = nullptr;
   };
 
   // Visitors to walk an AST and generate destructors of temporaries in
@@ -601,6 +653,24 @@
     return Block;
   }
 
+  // Remember to apply the construction context based on the current \p Layer
+  // when constructing the CFG element for \p CE.
+  void consumeConstructionContext(const ConstructionContextLayer *Layer,
+                                  CXXConstructExpr *CE);
+
+  // Scan \p Child statement to find constructors in it, while keeping in mind
+  // that its parent statement is providing a partial construction context
+  // described by \p Layer. If a constructor is found, it would be assigned
+  // the context based on the layer. If an additional construction context layer
+  // is found, the function recurses into that.
+  void findConstructionContexts(const ConstructionContextLayer *Layer,
+                                Stmt *Child);
+
+  // Unset the construction context after consuming it. This is done immediately
+  // after adding the CFGConstructor element, so there's no need to
+  // do this manually in every Visit... function.
+  void cleanupConstructionContext(CXXConstructExpr *CE);
+
   void autoCreateBlock() { if (!Block) Block = createBlock(); }
   CFGBlock *createBlock(bool add_successor = true);
   CFGBlock *createNoReturnBlock();
@@ -608,6 +678,7 @@
   CFGBlock *addStmt(Stmt *S) {
     return Visit(S, AddStmtChoice::AlwaysAdd);
   }
+
   CFGBlock *addInitializer(CXXCtorInitializer *I);
   void addLoopExit(const Stmt *LoopStmt);
   void addAutomaticObjDtors(LocalScope::const_iterator B,
@@ -629,6 +700,7 @@
   void addLocalScopeAndDtors(Stmt *S);
 
   // Interface to CFGBlock - adding CFGElements.
+
   void appendStmt(CFGBlock *B, const Stmt *S) {
     if (alwaysAdd(S) && cachedEntry)
       cachedEntry->second = B;
@@ -637,21 +709,44 @@
     assert(!isa<Expr>(S) || cast<Expr>(S)->IgnoreParens() == S);
     B->appendStmt(const_cast<Stmt*>(S), cfg->getBumpVectorContext());
   }
+
+  void appendConstructor(CFGBlock *B, CXXConstructExpr *CE) {
+    if (BuildOpts.AddRichCXXConstructors) {
+      if (const ConstructionContextLayer *Layer =
+              ConstructionContextMap.lookup(CE)) {
+        const ConstructionContext *CC =
+            ConstructionContext::createFromLayers(cfg->getBumpVectorContext(),
+                                                  Layer);
+        B->appendConstructor(CE, CC, cfg->getBumpVectorContext());
+        cleanupConstructionContext(CE);
+        return;
+      }
+    }
+
+    // No valid construction context found. Fall back to statement.
+    B->appendStmt(CE, cfg->getBumpVectorContext());
+  }
+
   void appendInitializer(CFGBlock *B, CXXCtorInitializer *I) {
     B->appendInitializer(I, cfg->getBumpVectorContext());
   }
+
   void appendNewAllocator(CFGBlock *B, CXXNewExpr *NE) {
     B->appendNewAllocator(NE, cfg->getBumpVectorContext());
   }
+
   void appendBaseDtor(CFGBlock *B, const CXXBaseSpecifier *BS) {
     B->appendBaseDtor(BS, cfg->getBumpVectorContext());
   }
+
   void appendMemberDtor(CFGBlock *B, FieldDecl *FD) {
     B->appendMemberDtor(FD, cfg->getBumpVectorContext());
   }
+
   void appendTemporaryDtor(CFGBlock *B, CXXBindTemporaryExpr *E) {
     B->appendTemporaryDtor(E, cfg->getBumpVectorContext());
   }
+
   void appendAutomaticObjDtor(CFGBlock *B, VarDecl *VD, Stmt *S) {
     B->appendAutomaticObjDtor(VD, S, cfg->getBumpVectorContext());
   }
@@ -811,10 +906,10 @@
     const BinaryOperator *RHS =
         dyn_cast<BinaryOperator>(B->getRHS()->IgnoreParens());
     if (!LHS || !RHS)
-      return TryResult();
+      return {};
 
     if (!LHS->isComparisonOp() || !RHS->isComparisonOp())
-      return TryResult();
+      return {};
 
     const DeclRefExpr *Decl1;
     const Expr *Expr1;
@@ -822,7 +917,7 @@
     std::tie(Decl1, BO1, Expr1) = tryNormalizeBinaryOperator(LHS);
 
     if (!Decl1 || !Expr1)
-      return TryResult();
+      return {};
 
     const DeclRefExpr *Decl2;
     const Expr *Expr2;
@@ -830,26 +925,26 @@
     std::tie(Decl2, BO2, Expr2) = tryNormalizeBinaryOperator(RHS);
 
     if (!Decl2 || !Expr2)
-      return TryResult();
+      return {};
 
     // Check that it is the same variable on both sides.
     if (Decl1->getDecl() != Decl2->getDecl())
-      return TryResult();
+      return {};
 
     // Make sure the user's intent is clear (e.g. they're comparing against two
     // int literals, or two things from the same enum)
     if (!areExprTypesCompatible(Expr1, Expr2))
-      return TryResult();
+      return {};
 
     llvm::APSInt L1, L2;
 
     if (!Expr1->EvaluateAsInt(L1, *Context) ||
         !Expr2->EvaluateAsInt(L2, *Context))
-      return TryResult();
+      return {};
 
     // Can't compare signed with unsigned or with different bit width.
     if (L1.isSigned() != L2.isSigned() || L1.getBitWidth() != L2.getBitWidth())
-      return TryResult();
+      return {};
 
     // Values that will be used to determine if result of logical
     // operator is always true/false
@@ -880,7 +975,7 @@
       Res2 = analyzeLogicOperatorCondition(BO2, Value, L2);
 
       if (!Res1.isKnown() || !Res2.isKnown())
-        return TryResult();
+        return {};
 
       if (B->getOpcode() == BO_LAnd) {
         AlwaysTrue &= (Res1.isTrue() && Res2.isTrue());
@@ -896,7 +991,7 @@
         BuildOpts.Observer->compareAlwaysTrue(B, AlwaysTrue);
       return TryResult(AlwaysTrue);
     }
-    return TryResult();
+    return {};
   }
 
   /// Try and evaluate an expression to an integer constant.
@@ -913,7 +1008,7 @@
   TryResult tryEvaluateBool(Expr *S) {
     if (!BuildOpts.PruneTriviallyFalseEdges ||
         S->isTypeDependent() || S->isValueDependent())
-      return TryResult();
+      return {};
 
     if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(S)) {
       if (Bop->isLogicalOp()) {
@@ -988,7 +1083,7 @@
           }
         }
 
-        return TryResult();
+        return {};
       } else if (Bop->isEqualityOp()) {
           TryResult BopRes = checkIncorrectEqualityOperator(Bop);
           if (BopRes.isKnown())
@@ -1004,12 +1099,14 @@
     if (E->EvaluateAsBooleanCondition(Result, *Context))
       return Result;
 
-    return TryResult();
+    return {};
   }
 
   bool hasTrivialDestructor(VarDecl *VD);
 };
 
+} // namespace
+
 inline bool AddStmtChoice::alwaysAdd(CFGBuilder &builder,
                                      const Stmt *stmt) const {
   return builder.alwaysAdd(stmt) || kind == AlwaysAdd;
@@ -1064,6 +1161,79 @@
   return nullptr;
 }
 
+void CFGBuilder::consumeConstructionContext(
+    const ConstructionContextLayer *Layer, CXXConstructExpr *CE) {
+  if (const ConstructionContextLayer *PreviouslyStoredLayer =
+          ConstructionContextMap.lookup(CE)) {
+    (void)PreviouslyStoredLayer;
+    // We might have visited this child when we were finding construction
+    // contexts within its parents.
+    assert(PreviouslyStoredLayer->isStrictlyMoreSpecificThan(Layer) &&
+           "Already within a different construction context!");
+  } else {
+    ConstructionContextMap[CE] = Layer;
+  }
+}
+
+void CFGBuilder::findConstructionContexts(
+    const ConstructionContextLayer *Layer, Stmt *Child) {
+  if (!BuildOpts.AddRichCXXConstructors)
+    return;
+
+  if (!Child)
+    return;
+
+  switch(Child->getStmtClass()) {
+  case Stmt::CXXConstructExprClass:
+  case Stmt::CXXTemporaryObjectExprClass: {
+    consumeConstructionContext(Layer, cast<CXXConstructExpr>(Child));
+    break;
+  }
+  case Stmt::ExprWithCleanupsClass: {
+    auto *Cleanups = cast<ExprWithCleanups>(Child);
+    findConstructionContexts(Layer, Cleanups->getSubExpr());
+    break;
+  }
+  case Stmt::CXXFunctionalCastExprClass: {
+    auto *Cast = cast<CXXFunctionalCastExpr>(Child);
+    findConstructionContexts(Layer, Cast->getSubExpr());
+    break;
+  }
+  case Stmt::ImplicitCastExprClass: {
+    auto *Cast = cast<ImplicitCastExpr>(Child);
+    // TODO: We need to support CK_ConstructorConversion, maybe other kinds?
+    if (Cast->getCastKind() == CK_NoOp)
+      findConstructionContexts(Layer, Cast->getSubExpr());
+    break;
+  }
+  case Stmt::CXXBindTemporaryExprClass: {
+    auto *BTE = cast<CXXBindTemporaryExpr>(Child);
+    findConstructionContexts(
+        ConstructionContextLayer::create(cfg->getBumpVectorContext(),
+                                         BTE, Layer),
+        BTE->getSubExpr());
+    break;
+  }
+  case Stmt::ConditionalOperatorClass: {
+    auto *CO = cast<ConditionalOperator>(Child);
+    findConstructionContexts(Layer, CO->getLHS());
+    findConstructionContexts(Layer, CO->getRHS());
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+void CFGBuilder::cleanupConstructionContext(CXXConstructExpr *CE) {
+  assert(BuildOpts.AddRichCXXConstructors &&
+         "We should not be managing construction contexts!");
+  assert(ConstructionContextMap.count(CE) &&
+         "Cannot exit construction context without the context!");
+  ConstructionContextMap.erase(CE);
+}
+
+
 /// BuildCFG - Constructs a CFG from an AST (a Stmt*).  The AST can represent an
 ///  arbitrary statement.  Examples include a single expression or a function
 ///  body (compound statement).  The ownership of the returned CFG is
@@ -1131,7 +1301,6 @@
   if (CFGBlock *B = cfg->getIndirectGotoBlock())
     for (LabelSetTy::iterator I = AddressTakenLabels.begin(),
                               E = AddressTakenLabels.end(); I != E; ++I ) {
-      
       // Lookup the target block.
       LabelMapTy::iterator LI = LabelMap.find(*I);
 
@@ -1145,6 +1314,10 @@
   // Create an empty entry block that has no predecessors.
   cfg->setEntry(createBlock());
 
+  if (BuildOpts.AddRichCXXConstructors)
+    assert(ConstructionContextMap.empty() &&
+           "Not all construction contexts were cleaned up!");
+
   return std::move(cfg);
 }
 
@@ -1192,6 +1365,10 @@
   appendInitializer(Block, I);
 
   if (Init) {
+    findConstructionContexts(
+        ConstructionContextLayer::create(cfg->getBumpVectorContext(), I),
+        Init);
+
     if (HasTemporaries) {
       // For expression with temporaries go directly to subexpression to omit
       // generating destructors for the second time.
@@ -1265,7 +1442,6 @@
   return Init->getType();
 }
 
-
 // TODO: Support adding LoopExit element to the CFG in case where the loop is
 // ended by ReturnStmt, GotoStmt or ThrowExpr.
 void CFGBuilder::addLoopExit(const Stmt *LoopStmt){
@@ -1373,8 +1549,8 @@
 /// addImplicitDtorsForDestructor - Add implicit destructors generated for
 /// base and member objects in destructor.
 void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) {
-  assert (BuildOpts.AddImplicitDtors
-      && "Can be called only when dtors should be added");
+  assert(BuildOpts.AddImplicitDtors &&
+         "Can be called only when dtors should be added");
   const CXXRecordDecl *RD = DD->getParent();
 
   // At the end destroy virtual base objects.
@@ -1577,6 +1753,7 @@
   for (LocalScope::const_iterator I = B; I != E; ++I)
     InsertPos = Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminator());
 }
+
 /// Visit - Walk the subtree of a statement and add extra
 ///   blocks for ternary operators, &&, and ||.  We also process "," and
 ///   DeclStmts (which may contain nested control-flow).
@@ -1705,6 +1882,10 @@
     case Stmt::LambdaExprClass:
       return VisitLambdaExpr(cast<LambdaExpr>(S), asc);
 
+    case Stmt::MaterializeTemporaryExprClass:
+      return VisitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(S),
+                                           asc);
+
     case Stmt::MemberExprClass:
       return VisitMemberExpr(cast<MemberExpr>(S), asc);
 
@@ -1831,7 +2012,6 @@
                                  Stmt *Term,
                                  CFGBlock *TrueBlock,
                                  CFGBlock *FalseBlock) {
-
   // Introspect the RHS.  If it is a nested logical operation, we recursively
   // build the CFG using this function.  Otherwise, resort to default
   // CFG construction behavior.
@@ -1920,7 +2100,6 @@
   return std::make_pair(EntryLHSBlock, ExitBlock);
 }
 
-
 CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B,
                                           AddStmtChoice asc) {
    // && or ||
@@ -1982,7 +2161,6 @@
   } else
     badCFG = true;
 
-
   return Block;
 }
 
@@ -2030,7 +2208,7 @@
   bool OmitArguments = false;
 
   if (FunctionDecl *FD = C->getDirectCallee()) {
-    if (FD->isNoReturn())
+    if (FD->isNoReturn() || C->isBuiltinAssumeFalse(*Context))
       NoReturn = true;
     if (FD->hasAttr<NoThrowAttr>())
       AddEHEdge = false;
@@ -2106,7 +2284,6 @@
   return addStmt(C->getCond());
 }
 
-
 CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C) {
   LocalScope::const_iterator scopeBeginPos = ScopePos;
   addLocalScopeForStmt(C);
@@ -2279,7 +2456,11 @@
 
   autoCreateBlock();
   appendStmt(Block, DS);
-  
+
+  findConstructionContexts(
+      ConstructionContextLayer::create(cfg->getBumpVectorContext(), DS),
+      Init);
+
   // Keep track of the last non-null block, as 'Block' can be nulled out
   // if the initializer expression is something like a 'while' in a
   // statement-expression.
@@ -2457,7 +2638,6 @@
   return LastBlock;
 }
 
-
 CFGBlock *CFGBuilder::VisitReturnStmt(ReturnStmt *R) {
   // If we were in the middle of a block we stop processing that block.
   //
@@ -2471,6 +2651,10 @@
 
   addAutomaticObjHandling(ScopePos, LocalScope::const_iterator(), R);
 
+  findConstructionContexts(
+      ConstructionContextLayer::create(cfg->getBumpVectorContext(), R),
+      R->getRetValue());
+
   // If the one of the destructors does not return, we already have the Exit
   // block as a successor.
   if (!Block->hasNoReturnElement())
@@ -2815,7 +2999,6 @@
     // false branch).
     addSuccessor(ExitConditionBlock,
                  KnownVal.isTrue() ? nullptr : LoopSuccessor);
-
   } while (false);
 
   // Link up the loop-back block to the entry condition block.
@@ -2838,6 +3021,16 @@
   return EntryConditionBlock;
 }
 
+CFGBlock *
+CFGBuilder::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE,
+                                          AddStmtChoice asc) {
+  findConstructionContexts(
+      ConstructionContextLayer::create(cfg->getBumpVectorContext(), MTE),
+      MTE->getTemporary());
+
+  return VisitStmt(MTE, asc);
+}
+
 CFGBlock *CFGBuilder::VisitMemberExpr(MemberExpr *M, AddStmtChoice asc) {
   if (asc.alwaysAdd(*this, M)) {
     autoCreateBlock();
@@ -2877,7 +3070,6 @@
   //   the same with newVariable replaced with existingItem; the binding works
   //   the same except that for one ObjCForCollectionStmt::getElement() returns
   //   a DeclStmt and the other returns a DeclRefExpr.
-  //
 
   CFGBlock *LoopSuccessor = nullptr;
 
@@ -3129,7 +3321,6 @@
     // false branch).
     addSuccessor(ExitConditionBlock,
                  KnownVal.isTrue() ? nullptr : LoopSuccessor);
-
   } while(false);
 
   // Link up the loop-back block to the entry condition block.
@@ -3144,7 +3335,6 @@
   return EntryConditionBlock;
 }
 
-
 CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) {
   // FIXME: For now we pretend that @catch and the code it contains does not
   //  exit.
@@ -3317,7 +3507,6 @@
 
 CFGBlock *CFGBuilder::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E,
                                                     AddStmtChoice asc) {
-
   if (asc.alwaysAdd(*this, E)) {
     autoCreateBlock();
     appendStmt(Block, E);
@@ -3822,6 +4011,10 @@
     autoCreateBlock();
     appendStmt(Block, E);
 
+    findConstructionContexts(
+        ConstructionContextLayer::create(cfg->getBumpVectorContext(), E),
+        E->getSubExpr());
+
     // We do not want to propagate the AlwaysAdd property.
     asc = asc.withAlwaysAdd(false);
   }
@@ -3831,26 +4024,33 @@
 CFGBlock *CFGBuilder::VisitCXXConstructExpr(CXXConstructExpr *C,
                                             AddStmtChoice asc) {
   autoCreateBlock();
-  appendStmt(Block, C);
+  appendConstructor(Block, C);
 
   return VisitChildren(C);
 }
 
 CFGBlock *CFGBuilder::VisitCXXNewExpr(CXXNewExpr *NE,
                                       AddStmtChoice asc) {
-
   autoCreateBlock();
   appendStmt(Block, NE);
 
+  findConstructionContexts(
+      ConstructionContextLayer::create(cfg->getBumpVectorContext(), NE),
+      const_cast<CXXConstructExpr *>(NE->getConstructExpr()));
+
   if (NE->getInitializer())
     Block = Visit(NE->getInitializer());
+
   if (BuildOpts.AddCXXNewAllocator)
     appendNewAllocator(Block, NE);
+
   if (NE->isArray())
     Block = Visit(NE->getArraySize());
+
   for (CXXNewExpr::arg_iterator I = NE->placement_arg_begin(),
        E = NE->placement_arg_end(); I != E; ++I)
     Block = Visit(*I);
+
   return Block;
 }
 
@@ -3885,7 +4085,7 @@
 CFGBlock *CFGBuilder::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *C,
                                                   AddStmtChoice asc) {
   autoCreateBlock();
-  appendStmt(Block, C);
+  appendConstructor(Block, C);
   return VisitChildren(C);
 }
 
@@ -4141,8 +4341,6 @@
   return Block;
 }
 
-} // end anonymous namespace
-
 /// createBlock - Constructs and adds a new CFGBlock to the CFG.  The block has
 ///  no successors or predecessors.  If this is the first block created in the
 ///  CFG, it is automatically set to be the Entry and Exit of the CFG.
@@ -4172,11 +4370,12 @@
 const CXXDestructorDecl *
 CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const {
   switch (getKind()) {
-    case CFGElement::Statement:
     case CFGElement::Initializer:
     case CFGElement::NewAllocator:
     case CFGElement::LoopExit:
     case CFGElement::LifetimeEnds:
+    case CFGElement::Statement:
+    case CFGElement::Constructor:
       llvm_unreachable("getDestructorDecl should only be used with "
                        "ImplicitDtors");
     case CFGElement::AutomaticObjectDtor: {
@@ -4217,7 +4416,6 @@
     }
     case CFGElement::BaseDtor:
     case CFGElement::MemberDtor:
-
       // Not yet supported.
       return nullptr;
   }
@@ -4235,14 +4433,14 @@
 //===----------------------------------------------------------------------===//
 
 CFGBlock::AdjacentBlock::AdjacentBlock(CFGBlock *B, bool IsReachable)
-  : ReachableBlock(IsReachable ? B : nullptr),
-    UnreachableBlock(!IsReachable ? B : nullptr,
-                     B && IsReachable ? AB_Normal : AB_Unreachable) {}
+    : ReachableBlock(IsReachable ? B : nullptr),
+      UnreachableBlock(!IsReachable ? B : nullptr,
+                       B && IsReachable ? AB_Normal : AB_Unreachable) {}
 
 CFGBlock::AdjacentBlock::AdjacentBlock(CFGBlock *B, CFGBlock *AlternateBlock)
-  : ReachableBlock(B),
-    UnreachableBlock(B == AlternateBlock ? nullptr : AlternateBlock,
-                     B == AlternateBlock ? AB_Alternate : AB_Normal) {}
+    : ReachableBlock(B),
+      UnreachableBlock(B == AlternateBlock ? nullptr : AlternateBlock,
+                       B == AlternateBlock ? AB_Alternate : AB_Normal) {}
 
 void CFGBlock::addSuccessor(AdjacentBlock Succ,
                             BumpVectorContext &C) {
@@ -4257,7 +4455,6 @@
 
 bool CFGBlock::FilterEdge(const CFGBlock::FilterOptions &F,
         const CFGBlock *From, const CFGBlock *To) {
-
   if (F.IgnoreNullPredecessors && !From)
     return true;
 
@@ -4284,18 +4481,18 @@
 namespace {
 
 class StmtPrinterHelper : public PrinterHelper  {
-  typedef llvm::DenseMap<const Stmt*,std::pair<unsigned,unsigned> > StmtMapTy;
-  typedef llvm::DenseMap<const Decl*,std::pair<unsigned,unsigned> > DeclMapTy;
+  using StmtMapTy = llvm::DenseMap<const Stmt *, std::pair<unsigned, unsigned>>;
+  using DeclMapTy = llvm::DenseMap<const Decl *, std::pair<unsigned, unsigned>>;
+
   StmtMapTy StmtMap;
   DeclMapTy DeclMap;
-  signed currentBlock;
-  unsigned currStmt;
+  signed currentBlock = 0;
+  unsigned currStmt = 0;
   const LangOptions &LangOpts;
-public:
 
+public:
   StmtPrinterHelper(const CFG* cfg, const LangOptions &LO)
-    : currentBlock(0), currStmt(0), LangOpts(LO)
-  {
+      : LangOpts(LO) {
     for (CFG::const_iterator I = cfg->begin(), E = cfg->end(); I != E; ++I ) {
       unsigned j = 1;
       for (CFGBlock::const_iterator BI = (*I)->begin(), BEnd = (*I)->end() ;
@@ -4307,8 +4504,8 @@
 
           switch (stmt->getStmtClass()) {
             case Stmt::DeclStmtClass:
-                DeclMap[cast<DeclStmt>(stmt)->getSingleDecl()] = P;
-                break;
+              DeclMap[cast<DeclStmt>(stmt)->getSingleDecl()] = P;
+              break;
             case Stmt::IfStmtClass: {
               const VarDecl *var = cast<IfStmt>(stmt)->getConditionVariable();
               if (var)
@@ -4350,7 +4547,7 @@
     }
   }
 
-  ~StmtPrinterHelper() override {}
+  ~StmtPrinterHelper() override = default;
 
   const LangOptions &getLangOpts() const { return LangOpts; }
   void setBlockID(signed i) { currentBlock = i; }
@@ -4386,20 +4583,17 @@
     return true;
   }
 };
-} // end anonymous namespace
 
-
-namespace {
 class CFGBlockTerminatorPrint
-  : public StmtVisitor<CFGBlockTerminatorPrint,void> {
-
+    : public StmtVisitor<CFGBlockTerminatorPrint,void> {
   raw_ostream &OS;
   StmtPrinterHelper* Helper;
   PrintingPolicy Policy;
+
 public:
   CFGBlockTerminatorPrint(raw_ostream &os, StmtPrinterHelper* helper,
                           const PrintingPolicy &Policy)
-    : OS(os), Helper(helper), Policy(Policy) {
+      : OS(os), Helper(helper), Policy(Policy) {
     this->Policy.IncludeNewlines = false;
   }
 
@@ -4508,7 +4702,29 @@
     Visit(T.getStmt());
   }
 };
-} // end anonymous namespace
+
+} // namespace
+
+static void print_initializer(raw_ostream &OS, StmtPrinterHelper &Helper,
+                              const CXXCtorInitializer *I) {
+  if (I->isBaseInitializer())
+    OS << I->getBaseClass()->getAsCXXRecordDecl()->getName();
+  else if (I->isDelegatingInitializer())
+    OS << I->getTypeSourceInfo()->getType()->getAsCXXRecordDecl()->getName();
+  else
+    OS << I->getAnyMember()->getName();
+  OS << "(";
+  if (Expr *IE = I->getInit())
+    IE->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts()));
+  OS << ")";
+
+  if (I->isBaseInitializer())
+    OS << " (Base initializer)";
+  else if (I->isDelegatingInitializer())
+    OS << " (Delegating initializer)";
+  else
+    OS << " (Member initializer)";
+}
 
 static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper,
                        const CFGElement &E) {
@@ -4541,14 +4757,53 @@
 
     if (isa<CXXOperatorCallExpr>(S)) {
       OS << " (OperatorCall)";
-    }
-    else if (isa<CXXBindTemporaryExpr>(S)) {
+    } else if (isa<CXXBindTemporaryExpr>(S)) {
       OS << " (BindTemporary)";
-    }
-    else if (const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(S)) {
-      OS << " (CXXConstructExpr, " << CCE->getType().getAsString() << ")";
-    }
-    else if (const CastExpr *CE = dyn_cast<CastExpr>(S)) {
+    } else if (const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(S)) {
+      OS << " (CXXConstructExpr, ";
+      if (Optional<CFGConstructor> CE = E.getAs<CFGConstructor>()) {
+        const ConstructionContext *CC = CE->getConstructionContext();
+        const Stmt *S1 = nullptr, *S2 = nullptr;
+        switch (CC->getKind()) {
+        case ConstructionContext::ConstructorInitializerKind: {
+          const auto *ICC = cast<ConstructorInitializerConstructionContext>(CC);
+          print_initializer(OS, Helper, ICC->getCXXCtorInitializer());
+          OS << ", ";
+          break;
+        }
+        case ConstructionContext::SimpleVariableKind: {
+          const auto *DSCC = cast<SimpleVariableConstructionContext>(CC);
+          S1 = DSCC->getDeclStmt();
+          break;
+        }
+        case ConstructionContext::NewAllocatedObjectKind: {
+          const auto *NECC = cast<NewAllocatedObjectConstructionContext>(CC);
+          S1 = NECC->getCXXNewExpr();
+          break;
+        }
+        case ConstructionContext::ReturnedValueKind: {
+          const auto *RSCC = cast<ReturnedValueConstructionContext>(CC);
+          S1 = RSCC->getReturnStmt();
+          break;
+        }
+        case ConstructionContext::TemporaryObjectKind: {
+          const auto *TOCC = cast<TemporaryObjectConstructionContext>(CC);
+          S1 = TOCC->getCXXBindTemporaryExpr();
+          S2 = TOCC->getMaterializedTemporaryExpr();
+          break;
+        }
+        }
+        if (S1) {
+          Helper.handledStmt(const_cast<Stmt *>(S1), OS);
+          OS << ", ";
+        }
+        if (S2) {
+          Helper.handledStmt(const_cast<Stmt *>(S2), OS);
+          OS << ", ";
+        }
+      }
+      OS << CCE->getType().getAsString() << ")";
+    } else if (const CastExpr *CE = dyn_cast<CastExpr>(S)) {
       OS << " (" << CE->getStmtClassName() << ", "
          << CE->getCastKindName()
          << ", " << CE->getType().getAsString()
@@ -4558,26 +4813,9 @@
     // Expressions need a newline.
     if (isa<Expr>(S))
       OS << '\n';
-
   } else if (Optional<CFGInitializer> IE = E.getAs<CFGInitializer>()) {
-    const CXXCtorInitializer *I = IE->getInitializer();
-    if (I->isBaseInitializer())
-      OS << I->getBaseClass()->getAsCXXRecordDecl()->getName();
-    else if (I->isDelegatingInitializer())
-      OS << I->getTypeSourceInfo()->getType()->getAsCXXRecordDecl()->getName();
-    else OS << I->getAnyMember()->getName();
-
-    OS << "(";
-    if (Expr *IE = I->getInit())
-      IE->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts()));
-    OS << ")";
-
-    if (I->isBaseInitializer())
-      OS << " (Base initializer)\n";
-    else if (I->isDelegatingInitializer())
-      OS << " (Delegating initializer)\n";
-    else OS << " (Member initializer)\n";
-
+    print_initializer(OS, Helper, IE->getInitializer());
+    OS << '\n';
   } else if (Optional<CFGAutomaticObjDtor> DE =
                  E.getAs<CFGAutomaticObjDtor>()) {
     const VarDecl *VD = DE->getVarDecl();
@@ -4590,13 +4828,11 @@
 
     OS << ".~" << T->getAsCXXRecordDecl()->getName().str() << "()";
     OS << " (Implicit destructor)\n";
-
   } else if (Optional<CFGLifetimeEnds> DE = E.getAs<CFGLifetimeEnds>()) {
     const VarDecl *VD = DE->getVarDecl();
     Helper.handleDecl(VD, OS);
 
     OS << " (Lifetime ends)\n";
-
   } else if (Optional<CFGLoopExit> LE = E.getAs<CFGLoopExit>()) {
     const Stmt *LoopStmt = LE->getLoopStmt();
     OS << LoopStmt->getStmtClassName() << " (LoopExit)\n";
@@ -4618,14 +4854,12 @@
     const CXXBaseSpecifier *BS = BE->getBaseSpecifier();
     OS << "~" << BS->getType()->getAsCXXRecordDecl()->getName() << "()";
     OS << " (Base object destructor)\n";
-
   } else if (Optional<CFGMemberDtor> ME = E.getAs<CFGMemberDtor>()) {
     const FieldDecl *FD = ME->getFieldDecl();
     const Type *T = FD->getType()->getBaseElementTypeUnsafe();
     OS << "this->" << FD->getName();
     OS << ".~" << T->getAsCXXRecordDecl()->getName() << "()";
     OS << " (Member object destructor)\n";
-
   } else if (Optional<CFGTemporaryDtor> TE = E.getAs<CFGTemporaryDtor>()) {
     const CXXBindTemporaryExpr *BT = TE->getBindTemporaryExpr();
     OS << "~";
@@ -4638,7 +4872,6 @@
                         const CFGBlock &B,
                         StmtPrinterHelper &Helper, bool print_edges,
                         bool ShowColors) {
-
   Helper.setBlockID(B.getBlockID());
 
   // Print the header.
@@ -4663,7 +4896,6 @@
 
   // Print the label of this block.
   if (Stmt *Label = const_cast<Stmt*>(B.getLabel())) {
-
     if (print_edges)
       OS << "  ";
 
@@ -4705,7 +4937,6 @@
 
   for (CFGBlock::const_iterator I = B.begin(), E = B.end() ;
        I != E ; ++I, ++j ) {
-
     // Print the statement # in the basic block and the statement itself.
     if (print_edges)
       OS << " ";
@@ -4752,7 +4983,6 @@
       
       for (CFGBlock::const_pred_iterator I = B.pred_begin(), E = B.pred_end();
            I != E; ++I, ++i) {
-
         if (i % 10 == 8)
           OS << "\n     ";
 
@@ -4790,7 +5020,6 @@
 
       for (CFGBlock::const_succ_iterator I = B.succ_begin(), E = B.succ_end();
            I != E; ++I, ++i) {
-
         if (i % 10 == 8)
           OS << "\n    ";
 
@@ -4819,7 +5048,6 @@
   }
 }
 
-
 /// dump - A simple pretty printer of a CFG that outputs to stderr.
 void CFG::dump(const LangOptions &LO, bool ShowColors) const {
   print(llvm::errs(), LO, ShowColors);
@@ -4942,7 +5170,6 @@
 // CFG Graphviz Visualization
 //===----------------------------------------------------------------------===//
 
-
 #ifndef NDEBUG
 static StmtPrinterHelper* GraphHelper;
 #endif
@@ -4957,13 +5184,12 @@
 }
 
 namespace llvm {
+
 template<>
 struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits {
-
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
   static std::string getNodeLabel(const CFGBlock *Node, const CFG* Graph) {
-
 #ifndef NDEBUG
     std::string OutSStr;
     llvm::raw_string_ostream Out(OutSStr);
@@ -4981,8 +5207,9 @@
 
     return OutStr;
 #else
-    return "";
+    return {};
 #endif
   }
 };
-} // end namespace llvm
+
+} // namespace llvm
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index fdc9e6c..432067d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -11,6 +11,7 @@
   CallGraph.cpp
   CloneDetection.cpp
   CocoaConventions.cpp
+  ConstructionContext.cpp
   Consumed.cpp
   CodeInjector.cpp
   Dominators.cpp
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 6d9530b..fb6d7e8 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -1,4 +1,4 @@
-//== CallGraph.cpp - AST-based Call graph  ----------------------*- C++ -*--==//
+//===- CallGraph.cpp - AST-based Call graph -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,13 +10,28 @@
 //  This file defines the AST-based CallGraph.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Analysis/CallGraph.h"
-#include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <memory>
+#include <string>
 
 using namespace clang;
 
@@ -26,6 +41,7 @@
 STATISTIC(NumBlockCallEdges, "Number of block call edges");
 
 namespace {
+
 /// A helper class, which walks the AST and locates all the call sites in the
 /// given function body.
 class CGBuilder : public StmtVisitor<CGBuilder> {
@@ -33,8 +49,7 @@
   CallGraphNode *CallerNode;
 
 public:
-  CGBuilder(CallGraph *g, CallGraphNode *N)
-    : G(g), CallerNode(N) {}
+  CGBuilder(CallGraph *g, CallGraphNode *N) : G(g), CallerNode(N) {}
 
   void VisitStmt(Stmt *S) { VisitChildren(S); }
 
@@ -90,7 +105,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 void CallGraph::addNodesForBlocks(DeclContext *D) {
   if (BlockDecl *BD = dyn_cast<BlockDecl>(D))
@@ -105,7 +120,7 @@
   Root = getOrInsertNode(nullptr);
 }
 
-CallGraph::~CallGraph() {}
+CallGraph::~CallGraph() = default;
 
 bool CallGraph::includeInGraph(const Decl *D) {
   assert(D);
@@ -164,8 +179,8 @@
 
   // We are going to print the graph in reverse post order, partially, to make
   // sure the output is deterministic.
-  llvm::ReversePostOrderTraversal<const clang::CallGraph*> RPOT(this);
-  for (llvm::ReversePostOrderTraversal<const clang::CallGraph*>::rpo_iterator
+  llvm::ReversePostOrderTraversal<const CallGraph *> RPOT(this);
+  for (llvm::ReversePostOrderTraversal<const CallGraph *>::rpo_iterator
          I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
     const CallGraphNode *N = *I;
 
@@ -209,8 +224,7 @@
 
 template <>
 struct DOTGraphTraits<const CallGraph*> : public DefaultDOTGraphTraits {
-
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+  DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
   static std::string getNodeLabel(const CallGraphNode *Node,
                                   const CallGraph *CG) {
@@ -222,6 +236,6 @@
     else
       return "< >";
   }
-
 };
-}
+
+} // namespace llvm
diff --git a/lib/Analysis/CloneDetection.cpp b/lib/Analysis/CloneDetection.cpp
index 098803f..3c64f14 100644
--- a/lib/Analysis/CloneDetection.cpp
+++ b/lib/Analysis/CloneDetection.cpp
@@ -534,14 +534,14 @@
   // First check if we already reference this variable
   for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
     if (Variables[KindIndex] == VarDecl) {
-      // If yes, add a new occurence that points to the existing entry in
+      // If yes, add a new occurrence that points to the existing entry in
       // the Variables vector.
       Occurences.emplace_back(KindIndex, Mention);
       return;
     }
   }
   // If this variable wasn't already referenced, add it to the list of
-  // referenced variables and add a occurence that points to this new entry.
+  // referenced variables and add a occurrence that points to this new entry.
   Occurences.emplace_back(Variables.size(), Mention);
   Variables.push_back(VarDecl);
 }
diff --git a/lib/Analysis/CocoaConventions.cpp b/lib/Analysis/CocoaConventions.cpp
index be1262d..4d57623 100644
--- a/lib/Analysis/CocoaConventions.cpp
+++ b/lib/Analysis/CocoaConventions.cpp
@@ -47,12 +47,19 @@
   return Name.startswith(Prefix);
 }
 
+/// Returns true when the passed-in type is a CF-style reference-counted
+/// type from the DiskArbitration framework.
+static bool isDiskArbitrationAPIRefType(QualType T) {
+  return cocoa::isRefType(T, "DADisk") ||
+      cocoa::isRefType(T, "DADissenter") ||
+      cocoa::isRefType(T, "DASessionRef");
+}
+
 bool coreFoundation::isCFObjectRef(QualType T) {
   return cocoa::isRefType(T, "CF") || // Core Foundation.
          cocoa::isRefType(T, "CG") || // Core Graphics.
-         cocoa::isRefType(T, "DADisk") || // Disk Arbitration API.
-         cocoa::isRefType(T, "DADissenter") ||
-         cocoa::isRefType(T, "DASessionRef");
+         cocoa::isRefType(T, "CM") || // Core Media.
+         isDiskArbitrationAPIRefType(T);
 }
 
 
diff --git a/lib/Analysis/ConstructionContext.cpp b/lib/Analysis/ConstructionContext.cpp
new file mode 100644
index 0000000..aea329b
--- /dev/null
+++ b/lib/Analysis/ConstructionContext.cpp
@@ -0,0 +1,92 @@
+//===- ConstructionContext.cpp - CFG constructor information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ConstructionContext class and its sub-classes,
+// which represent various different ways of constructing C++ objects
+// with the additional information the users may want to know about
+// the constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/ConstructionContext.h"
+
+using namespace clang;
+
+const ConstructionContextLayer *
+ConstructionContextLayer::create(BumpVectorContext &C, TriggerTy Trigger,
+                                 const ConstructionContextLayer *Parent) {
+  ConstructionContextLayer *CC =
+      C.getAllocator().Allocate<ConstructionContextLayer>();
+  return new (CC) ConstructionContextLayer(Trigger, Parent);
+}
+
+bool ConstructionContextLayer::isStrictlyMoreSpecificThan(
+    const ConstructionContextLayer *Other) const {
+  const ConstructionContextLayer *Self = this;
+  while (true) {
+    if (!Other)
+      return Self;
+    if (!Self || !Self->isSameLayer(Other))
+      return false;
+    Self = Self->getParent();
+    Other = Other->getParent();
+  }
+  llvm_unreachable("The above loop can only be terminated via return!");
+}
+
+const ConstructionContext *ConstructionContext::createFromLayers(
+    BumpVectorContext &C, const ConstructionContextLayer *TopLayer) {
+  // Before this point all we've had was a stockpile of arbitrary layers.
+  // Now validate that it is shaped as one of the finite amount of expected
+  // patterns.
+  if (const Stmt *S = TopLayer->getTriggerStmt()) {
+    if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+      assert(TopLayer->isLast());
+      auto *CC =
+          C.getAllocator().Allocate<SimpleVariableConstructionContext>();
+      return new (CC) SimpleVariableConstructionContext(DS);
+    } else if (const auto *NE = dyn_cast<CXXNewExpr>(S)) {
+      assert(TopLayer->isLast());
+      auto *CC =
+          C.getAllocator().Allocate<NewAllocatedObjectConstructionContext>();
+      return new (CC) NewAllocatedObjectConstructionContext(NE);
+    } else if (const auto *BTE = dyn_cast<CXXBindTemporaryExpr>(S)) {
+      const MaterializeTemporaryExpr *MTE = nullptr;
+      assert(BTE->getType().getCanonicalType()
+                ->getAsCXXRecordDecl()->hasNonTrivialDestructor());
+      // For temporaries with destructors, there may or may not be
+      // lifetime extension on the parent layer.
+      if (const ConstructionContextLayer *ParentLayer = TopLayer->getParent()) {
+        assert(ParentLayer->isLast());
+        MTE = cast<MaterializeTemporaryExpr>(ParentLayer->getTriggerStmt());
+      }
+      auto *CC =
+          C.getAllocator().Allocate<TemporaryObjectConstructionContext>();
+      return new (CC) TemporaryObjectConstructionContext(BTE, MTE);
+    } else if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(S)) {
+      assert(MTE->getType().getCanonicalType()
+                ->getAsCXXRecordDecl()->hasTrivialDestructor());
+      assert(TopLayer->isLast());
+      auto *CC =
+          C.getAllocator().Allocate<TemporaryObjectConstructionContext>();
+      return new (CC) TemporaryObjectConstructionContext(nullptr, MTE);
+    } else if (const auto *RS = dyn_cast<ReturnStmt>(S)) {
+      assert(TopLayer->isLast());
+      auto *CC =
+          C.getAllocator().Allocate<ReturnedValueConstructionContext>();
+      return new (CC) ReturnedValueConstructionContext(RS);
+    }
+  } else if (const CXXCtorInitializer *I = TopLayer->getTriggerInit()) {
+    assert(TopLayer->isLast());
+    auto *CC =
+        C.getAllocator().Allocate<ConstructorInitializerConstructionContext>();
+    return new (CC) ConstructorInitializerConstructionContext(I);
+  }
+  llvm_unreachable("Unexpected construction context!");
+}
diff --git a/lib/Analysis/ReachableCode.cpp b/lib/Analysis/ReachableCode.cpp
index 7e72795..f644d50 100644
--- a/lib/Analysis/ReachableCode.cpp
+++ b/lib/Analysis/ReachableCode.cpp
@@ -66,6 +66,21 @@
   return false;
 }
 
+static bool isBuiltinAssumeFalse(const CFGBlock *B, const Stmt *S,
+                                 ASTContext &C) {
+  if (B->empty())  {
+    // Happens if S is B's terminator and B contains nothing else
+    // (e.g. a CFGBlock containing only a goto).
+    return false;
+  }
+  if (Optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) {
+    if (const auto *CE = dyn_cast<CallExpr>(CS->getStmt())) {
+      return CE->getCallee()->IgnoreCasts() == S && CE->isBuiltinAssumeFalse(C);
+    }
+  }
+  return false;
+}
+
 static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
   // Look to see if the current control flow ends with a 'return', and see if
   // 'S' is a substatement. The 'return' may not be the last element in the
@@ -372,6 +387,7 @@
     llvm::BitVector &Reachable;
     SmallVector<const CFGBlock *, 10> WorkList;
     Preprocessor &PP;
+    ASTContext &C;
 
     typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
     DeferredLocsTy;
@@ -379,10 +395,10 @@
     DeferredLocsTy DeferredLocs;
 
   public:
-    DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP)
+    DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP, ASTContext &C)
     : Visited(reachable.size()),
       Reachable(reachable),
-      PP(PP) {}
+      PP(PP), C(C) {}
 
     void enqueue(const CFGBlock *block);
     unsigned scanBackwards(const CFGBlock *Start,
@@ -600,7 +616,8 @@
 
   if (isa<BreakStmt>(S)) {
     UK = reachable_code::UK_Break;
-  } else if (isTrivialDoWhile(B, S) || isBuiltinUnreachable(S)) {
+  } else if (isTrivialDoWhile(B, S) || isBuiltinUnreachable(S) ||
+             isBuiltinAssumeFalse(B, S, C)) {
     return;
   }
   else if (isDeadReturn(B, S)) {
@@ -693,7 +710,7 @@
     if (reachable[block->getBlockID()])
       continue;
     
-    DeadCodeScan DS(reachable, PP);
+    DeadCodeScan DS(reachable, PP, AC.getASTContext());
     numReachable += DS.scanBackwards(block, CB);
     
     if (numReachable == cfg->getNumBlockIDs())
diff --git a/lib/Analysis/ThreadSafety.cpp b/lib/Analysis/ThreadSafety.cpp
index 6a9c9a0..f81d916 100644
--- a/lib/Analysis/ThreadSafety.cpp
+++ b/lib/Analysis/ThreadSafety.cpp
@@ -1689,7 +1689,7 @@
   CapExprSet ScopedExclusiveReqs, ScopedSharedReqs;
   StringRef CapDiagKind = "mutex";
 
-  // Figure out if we're calling the constructor of scoped lockable class
+  // Figure out if we're constructing an object of scoped lockable class
   bool isScopedVar = false;
   if (VD) {
     if (const CXXConstructorDecl *CD = dyn_cast<const CXXConstructorDecl>(D)) {
@@ -1991,6 +1991,33 @@
   // FIXME -- only handles constructors in DeclStmt below.
 }
 
+static CXXConstructorDecl *
+findConstructorForByValueReturn(const CXXRecordDecl *RD) {
+  // Prefer a move constructor over a copy constructor. If there's more than
+  // one copy constructor or more than one move constructor, we arbitrarily
+  // pick the first declared such constructor rather than trying to guess which
+  // one is more appropriate.
+  CXXConstructorDecl *CopyCtor = nullptr;
+  for (CXXConstructorDecl *Ctor : RD->ctors()) {
+    if (Ctor->isDeleted())
+      continue;
+    if (Ctor->isMoveConstructor())
+      return Ctor;
+    if (!CopyCtor && Ctor->isCopyConstructor())
+      CopyCtor = Ctor;
+  }
+  return CopyCtor;
+}
+
+static Expr *buildFakeCtorCall(CXXConstructorDecl *CD, ArrayRef<Expr *> Args,
+                               SourceLocation Loc) {
+  ASTContext &Ctx = CD->getASTContext();
+  return CXXConstructExpr::Create(Ctx, Ctx.getRecordType(CD->getParent()), Loc,
+                                  CD, true, Args, false, false, false, false,
+                                  CXXConstructExpr::CK_Complete,
+                                  SourceRange(Loc, Loc));
+}
+
 void BuildLockset::VisitDeclStmt(DeclStmt *S) {
   // adjust the context
   LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, S, LVarCtx);
@@ -1998,15 +2025,34 @@
   for (auto *D : S->getDeclGroup()) {
     if (VarDecl *VD = dyn_cast_or_null<VarDecl>(D)) {
       Expr *E = VD->getInit();
-      // handle constructors that involve temporaries
-      if (ExprWithCleanups *EWC = dyn_cast_or_null<ExprWithCleanups>(E))
-        E = EWC->getSubExpr();
+      if (!E)
+        continue;
+      E = E->IgnoreParens();
 
-      if (CXXConstructExpr *CE = dyn_cast_or_null<CXXConstructExpr>(E)) {
+      // handle constructors that involve temporaries
+      if (auto *EWC = dyn_cast<ExprWithCleanups>(E))
+        E = EWC->getSubExpr();
+      if (auto *BTE = dyn_cast<CXXBindTemporaryExpr>(E))
+        E = BTE->getSubExpr();
+
+      if (CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(E)) {
         NamedDecl *CtorD = dyn_cast_or_null<NamedDecl>(CE->getConstructor());
         if (!CtorD || !CtorD->hasAttrs())
-          return;
-        handleCall(CE, CtorD, VD);
+          continue;
+        handleCall(E, CtorD, VD);
+      } else if (isa<CallExpr>(E) && E->isRValue()) {
+        // If the object is initialized by a function call that returns a
+        // scoped lockable by value, use the attributes on the copy or move
+        // constructor to figure out what effect that should have on the
+        // lockset.
+        // FIXME: Is this really the best way to handle this situation?
+        auto *RD = E->getType()->getAsCXXRecordDecl();
+        if (!RD || !RD->hasAttr<ScopedLockableAttr>())
+          continue;
+        CXXConstructorDecl *CtorD = findConstructorForByValueReturn(RD);
+        if (!CtorD || !CtorD->hasAttrs())
+          continue;
+        handleCall(buildFakeCtorCall(CtorD, {E}, E->getLocStart()), CtorD, VD);
       }
     }
   }
diff --git a/lib/Analysis/ThreadSafetyCommon.cpp b/lib/Analysis/ThreadSafetyCommon.cpp
index 875f28c..99284f0 100644
--- a/lib/Analysis/ThreadSafetyCommon.cpp
+++ b/lib/Analysis/ThreadSafetyCommon.cpp
@@ -319,11 +319,11 @@
 static const CXXMethodDecl *getFirstVirtualDecl(const CXXMethodDecl *D) {
   while (true) {
     D = D->getCanonicalDecl();
-    CXXMethodDecl::method_iterator I = D->begin_overridden_methods(),
-                                   E = D->end_overridden_methods();
-    if (I == E)
+    auto OverriddenMethods = D->overridden_methods();
+    if (OverriddenMethods.begin() == OverriddenMethods.end())
       return D;  // Method does not override anything
-    D = *I;      // FIXME: this does not work with multiple inheritance.
+    // FIXME: this does not work with multiple inheritance.
+    D = *OverriddenMethods.begin();
   }
   return nullptr;
 }
@@ -505,6 +505,7 @@
   case BO_GE:   return translateBinOp(til::BOP_Leq, BO, Ctx, true);
   case BO_EQ:   return translateBinOp(til::BOP_Eq,  BO, Ctx);
   case BO_NE:   return translateBinOp(til::BOP_Neq, BO, Ctx);
+  case BO_Cmp:  return translateBinOp(til::BOP_Cmp, BO, Ctx);
   case BO_And:  return translateBinOp(til::BOP_BitAnd,   BO, Ctx);
   case BO_Xor:  return translateBinOp(til::BOP_BitXor,   BO, Ctx);
   case BO_Or:   return translateBinOp(til::BOP_BitOr,    BO, Ctx);
diff --git a/lib/Analysis/ThreadSafetyTIL.cpp b/lib/Analysis/ThreadSafetyTIL.cpp
index 83aa904..cd7cdc6 100644
--- a/lib/Analysis/ThreadSafetyTIL.cpp
+++ b/lib/Analysis/ThreadSafetyTIL.cpp
@@ -38,6 +38,7 @@
     case BOP_Neq:      return "!=";
     case BOP_Lt:       return "<";
     case BOP_Leq:      return "<=";
+    case BOP_Cmp:      return "<=>";
     case BOP_LogicAnd: return "&&";
     case BOP_LogicOr:  return "||";
   }
diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt
index d0c9b90..823b908 100644
--- a/lib/Basic/CMakeLists.txt
+++ b/lib/Basic/CMakeLists.txt
@@ -83,6 +83,7 @@
   Targets/OSTargets.cpp
   Targets/PNaCl.cpp
   Targets/PPC.cpp
+  Targets/RISCV.cpp
   Targets/SPIR.cpp
   Targets/Sparc.cpp
   Targets/SystemZ.cpp
diff --git a/lib/Basic/Cuda.cpp b/lib/Basic/Cuda.cpp
index 3a5297b..b7e04bf 100644
--- a/lib/Basic/Cuda.cpp
+++ b/lib/Basic/Cuda.cpp
@@ -18,12 +18,16 @@
     return "8.0";
   case CudaVersion::CUDA_90:
     return "9.0";
+  case CudaVersion::CUDA_91:
+    return "9.1";
   }
   llvm_unreachable("invalid enum");
 }
 
 const char *CudaArchToString(CudaArch A) {
   switch (A) {
+  case CudaArch::LAST:
+    break;
   case CudaArch::UNKNOWN:
     return "unknown";
   case CudaArch::SM_20:
@@ -52,6 +56,8 @@
     return "sm_62";
   case CudaArch::SM_70:
     return "sm_70";
+  case CudaArch::SM_72:
+    return "sm_72";
   }
   llvm_unreachable("invalid enum");
 }
@@ -71,6 +77,7 @@
       .Case("sm_61", CudaArch::SM_61)
       .Case("sm_62", CudaArch::SM_62)
       .Case("sm_70", CudaArch::SM_70)
+      .Case("sm_72", CudaArch::SM_72)
       .Default(CudaArch::UNKNOWN);
 }
 
@@ -102,6 +109,8 @@
     return "compute_62";
   case CudaVirtualArch::COMPUTE_70:
     return "compute_70";
+  case CudaVirtualArch::COMPUTE_72:
+    return "compute_72";
   }
   llvm_unreachable("invalid enum");
 }
@@ -120,11 +129,14 @@
       .Case("compute_61", CudaVirtualArch::COMPUTE_61)
       .Case("compute_62", CudaVirtualArch::COMPUTE_62)
       .Case("compute_70", CudaVirtualArch::COMPUTE_70)
+      .Case("compute_72", CudaVirtualArch::COMPUTE_72)
       .Default(CudaVirtualArch::UNKNOWN);
 }
 
 CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
   switch (A) {
+  case CudaArch::LAST:
+    break;
   case CudaArch::UNKNOWN:
     return CudaVirtualArch::UNKNOWN;
   case CudaArch::SM_20:
@@ -152,12 +164,16 @@
     return CudaVirtualArch::COMPUTE_62;
   case CudaArch::SM_70:
     return CudaVirtualArch::COMPUTE_70;
+  case CudaArch::SM_72:
+    return CudaVirtualArch::COMPUTE_72;
   }
   llvm_unreachable("invalid enum");
 }
 
 CudaVersion MinVersionForCudaArch(CudaArch A) {
   switch (A) {
+  case CudaArch::LAST:
+    break;
   case CudaArch::UNKNOWN:
     return CudaVersion::UNKNOWN;
   case CudaArch::SM_20:
@@ -176,8 +192,22 @@
     return CudaVersion::CUDA_80;
   case CudaArch::SM_70:
     return CudaVersion::CUDA_90;
+  case CudaArch::SM_72:
+    return CudaVersion::CUDA_91;
   }
   llvm_unreachable("invalid enum");
 }
 
+CudaVersion MaxVersionForCudaArch(CudaArch A) {
+  switch (A) {
+  case CudaArch::UNKNOWN:
+    return CudaVersion::UNKNOWN;
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+    return CudaVersion::CUDA_80;
+  default:
+    return CudaVersion::LATEST;
+  }
+}
+
 } // namespace clang
diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp
index 47f6a13..cb38161 100644
--- a/lib/Basic/Diagnostic.cpp
+++ b/lib/Basic/Diagnostic.cpp
@@ -1,4 +1,4 @@
-//===--- Diagnostic.cpp - C Language Family Diagnostic Handling -----------===//
+//===- Diagnostic.cpp - C Language Family Diagnostic Handling -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,15 +14,30 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/DiagnosticError.h"
+#include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 
@@ -57,27 +72,13 @@
   Output.append(Str.begin(), Str.end());
 }
 
-DiagnosticsEngine::DiagnosticsEngine(IntrusiveRefCntPtr<DiagnosticIDs> diags,
-                                     DiagnosticOptions *DiagOpts,
-                                     DiagnosticConsumer *client,
-                                     bool ShouldOwnClient)
-    : Diags(std::move(diags)), DiagOpts(DiagOpts), Client(nullptr),
-      SourceMgr(nullptr) {
+DiagnosticsEngine::DiagnosticsEngine(
+    IntrusiveRefCntPtr<DiagnosticIDs> diags,
+    IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, DiagnosticConsumer *client,
+    bool ShouldOwnClient)
+    : Diags(std::move(diags)), DiagOpts(std::move(DiagOpts)) {
   setClient(client, ShouldOwnClient);
   ArgToStringFn = DummyArgToStringFn;
-  ArgToStringCookie = nullptr;
-
-  AllExtensionsSilenced = 0;
-  SuppressAfterFatalError = true;
-  SuppressAllDiagnostics = false;
-  ElideType = true;
-  PrintTemplateTree = false;
-  ShowColors = false;
-  ShowOverloads = Ovl_All;
-
-  ErrorLimit = 0;
-  TemplateBacktraceLimit = 0;
-  ConstexprBacktraceLimit = 0;
 
   Reset();
 }
@@ -121,7 +122,7 @@
   TrapNumErrorsOccurred = 0;
   TrapNumUnrecoverableErrorsOccurred = 0;
   
-  CurDiagID = ~0U;
+  CurDiagID = std::numeric_limits<unsigned>::max();
   LastDiagLevel = DiagnosticIDs::Ignored;
   DelayedDiagID = 0;
 
@@ -152,8 +153,7 @@
   Report(ID) << DelayedDiagArg1 << DelayedDiagArg2;
 }
 
-void DiagnosticsEngine::DiagStateMap::appendFirst(
-                                             DiagState *State) {
+void DiagnosticsEngine::DiagStateMap::appendFirst(DiagState *State) {
   assert(Files.empty() && "not first");
   FirstDiagState = CurDiagState = State;
   CurDiagStateLoc = SourceLocation();
@@ -236,6 +236,96 @@
   return &F;
 }
 
+void DiagnosticsEngine::DiagStateMap::dump(SourceManager &SrcMgr,
+                                           StringRef DiagName) const {
+  llvm::errs() << "diagnostic state at ";
+  CurDiagStateLoc.dump(SrcMgr);
+  llvm::errs() << ": " << CurDiagState << "\n";
+
+  for (auto &F : Files) {
+    FileID ID = F.first;
+    File &File = F.second;
+
+    bool PrintedOuterHeading = false;
+    auto PrintOuterHeading = [&] {
+      if (PrintedOuterHeading) return;
+      PrintedOuterHeading = true;
+
+      llvm::errs() << "File " << &File << " <FileID " << ID.getHashValue()
+                   << ">: " << SrcMgr.getBuffer(ID)->getBufferIdentifier();
+      if (F.second.Parent) {
+        std::pair<FileID, unsigned> Decomp =
+            SrcMgr.getDecomposedIncludedLoc(ID);
+        assert(File.ParentOffset == Decomp.second);
+        llvm::errs() << " parent " << File.Parent << " <FileID "
+                     << Decomp.first.getHashValue() << "> ";
+        SrcMgr.getLocForStartOfFile(Decomp.first)
+              .getLocWithOffset(Decomp.second)
+              .dump(SrcMgr);
+      }
+      if (File.HasLocalTransitions)
+        llvm::errs() << " has_local_transitions";
+      llvm::errs() << "\n";
+    };
+
+    if (DiagName.empty())
+      PrintOuterHeading();
+
+    for (DiagStatePoint &Transition : File.StateTransitions) {
+      bool PrintedInnerHeading = false;
+      auto PrintInnerHeading = [&] {
+        if (PrintedInnerHeading) return;
+        PrintedInnerHeading = true;
+
+        PrintOuterHeading();
+        llvm::errs() << "  ";
+        SrcMgr.getLocForStartOfFile(ID)
+              .getLocWithOffset(Transition.Offset)
+              .dump(SrcMgr);
+        llvm::errs() << ": state " << Transition.State << ":\n";
+      };
+
+      if (DiagName.empty())
+        PrintInnerHeading();
+
+      for (auto &Mapping : *Transition.State) {
+        StringRef Option =
+            DiagnosticIDs::getWarningOptionForDiag(Mapping.first);
+        if (!DiagName.empty() && DiagName != Option)
+          continue;
+
+        PrintInnerHeading();
+        llvm::errs() << "    ";
+        if (Option.empty())
+          llvm::errs() << "<unknown " << Mapping.first << ">";
+        else
+          llvm::errs() << Option;
+        llvm::errs() << ": ";
+
+        switch (Mapping.second.getSeverity()) {
+        case diag::Severity::Ignored: llvm::errs() << "ignored"; break;
+        case diag::Severity::Remark: llvm::errs() << "remark"; break;
+        case diag::Severity::Warning: llvm::errs() << "warning"; break;
+        case diag::Severity::Error: llvm::errs() << "error"; break;
+        case diag::Severity::Fatal: llvm::errs() << "fatal"; break;
+        }
+
+        if (!Mapping.second.isUser())
+          llvm::errs() << " default";
+        if (Mapping.second.isPragma())
+          llvm::errs() << " pragma";
+        if (Mapping.second.hasNoWarningAsError())
+          llvm::errs() << " no-error";
+        if (Mapping.second.hasNoErrorAsFatal())
+          llvm::errs() << " no-fatal";
+        if (Mapping.second.wasUpgradedFromWarning())
+          llvm::errs() << " overruled";
+        llvm::errs() << "\n";
+      }
+    }
+  }
+}
+
 void DiagnosticsEngine::PushDiagStatePoint(DiagState *State,
                                            SourceLocation Loc) {
   assert(Loc.isValid() && "Adding invalid loc point");
@@ -363,8 +453,8 @@
                                           diag::Severity Map,
                                           SourceLocation Loc) {
   // Get all the diagnostics.
-  SmallVector<diag::kind, 64> AllDiags;
-  Diags->getAllDiagnostics(Flavor, AllDiags);
+  std::vector<diag::kind> AllDiags;
+  DiagnosticIDs::getAllDiagnostics(Flavor, AllDiags);
 
   // Set the mapping.
   for (diag::kind Diag : AllDiags)
@@ -373,7 +463,8 @@
 }
 
 void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) {
-  assert(CurDiagID == ~0U && "Multiple diagnostics in flight at once!");
+  assert(CurDiagID == std::numeric_limits<unsigned>::max() &&
+         "Multiple diagnostics in flight at once!");
 
   CurDiagLoc = storedDiag.getLocation();
   CurDiagID = storedDiag.getID();
@@ -394,7 +485,7 @@
       ++NumWarnings;
   }
 
-  CurDiagID = ~0U;
+  CurDiagID = std::numeric_limits<unsigned>::max();
 }
 
 bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) {
@@ -429,8 +520,7 @@
   return Emitted;
 }
 
-
-DiagnosticConsumer::~DiagnosticConsumer() {}
+DiagnosticConsumer::~DiagnosticConsumer() = default;
 
 void DiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
                                         const Diagnostic &Info) {
@@ -447,7 +537,7 @@
 template <std::size_t StrLen>
 static bool ModifierIs(const char *Modifier, unsigned ModifierLen,
                        const char (&Str)[StrLen]) {
-  return StrLen-1 == ModifierLen && !memcmp(Modifier, Str, StrLen-1);
+  return StrLen-1 == ModifierLen && memcmp(Modifier, Str, StrLen-1) == 0;
 }
 
 /// ScanForward - Scans forward, looking for the given character, skipping
@@ -527,7 +617,6 @@
   Out << ValNo << llvm::getOrdinalSuffix(ValNo);
 }
 
-
 /// PluralNumber - Parse an unsigned integer and advance Start.
 static unsigned PluralNumber(const char *&Start, const char *End) {
   // Programming 101: Parse a decimal number :-)
@@ -563,7 +652,7 @@
   if (*Start == ':')
     return true;
 
-  while (1) {
+  while (true) {
     char C = *Start;
     if (C == '%') {
       // Modulo expression
@@ -628,7 +717,7 @@
                                  const char *Argument, unsigned ArgumentLen,
                                  SmallVectorImpl<char> &OutStr) {
   const char *ArgumentEnd = Argument + ArgumentLen;
-  while (1) {
+  while (true) {
     assert(Argument < ArgumentEnd && "Plural expression didn't match.");
     const char *ExprEnd = Argument;
     while (*ExprEnd != ':') {
@@ -679,7 +768,6 @@
 void Diagnostic::
 FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
                  SmallVectorImpl<char> &OutStr) const {
-
   // When the diagnostic string is only "%0", the entire string is being given
   // by an outside source.  Remove unprintable characters from this string
   // and skip all the other string processing.
@@ -899,7 +987,7 @@
                                      FormattedArgs,
                                      OutStr, QualTypeVals);
       break;
-    case DiagnosticsEngine::ak_qualtype_pair:
+    case DiagnosticsEngine::ak_qualtype_pair: {
       // Create a struct with all the info needed for printing.
       TemplateDiffTypes TDT;
       TDT.FromType = getRawArg(ArgNo);
@@ -967,6 +1055,7 @@
       FormatDiagnostic(SecondDollar + 1, Pipe, OutStr);
       break;
     }
+    }
     
     // Remember this argument info for subsequent formatting operations.  Turn
     // std::strings into a null terminated string to make it be the same case as
@@ -978,7 +1067,6 @@
     else
       FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_c_string,
                                         (intptr_t)getArgStdStr(ArgNo).c_str()));
-    
   }
 
   // Append the type tree to the end of the diagnostics.
@@ -987,12 +1075,11 @@
 
 StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID,
                                    StringRef Message)
-  : ID(ID), Level(Level), Loc(), Message(Message) { }
+    : ID(ID), Level(Level), Message(Message) {}
 
 StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, 
                                    const Diagnostic &Info)
-  : ID(Info.getID()), Level(Level) 
-{
+    : ID(Info.getID()), Level(Level) {
   assert((Info.getLocation().isInvalid() || Info.hasSourceManager()) &&
        "Valid source location without setting a source manager for diagnostic");
   if (Info.getLocation().isValid())
@@ -1008,8 +1095,8 @@
                                    StringRef Message, FullSourceLoc Loc,
                                    ArrayRef<CharSourceRange> Ranges,
                                    ArrayRef<FixItHint> FixIts)
-  : ID(ID), Level(Level), Loc(Loc), Message(Message), 
-    Ranges(Ranges.begin(), Ranges.end()), FixIts(FixIts.begin(), FixIts.end())
+    : ID(ID), Level(Level), Loc(Loc), Message(Message),
+      Ranges(Ranges.begin(), Ranges.end()), FixIts(FixIts.begin(), FixIts.end())
 {
 }
 
@@ -1019,9 +1106,9 @@
 ///  reported by DiagnosticsEngine.
 bool DiagnosticConsumer::IncludeInDiagnosticCounts() const { return true; }
 
-void IgnoringDiagConsumer::anchor() { }
+void IgnoringDiagConsumer::anchor() {}
 
-ForwardingDiagnosticConsumer::~ForwardingDiagnosticConsumer() {}
+ForwardingDiagnosticConsumer::~ForwardingDiagnosticConsumer() = default;
 
 void ForwardingDiagnosticConsumer::HandleDiagnostic(
        DiagnosticsEngine::Level DiagLevel,
diff --git a/lib/Basic/DiagnosticIDs.cpp b/lib/Basic/DiagnosticIDs.cpp
index 4f59e55..c4c425d 100644
--- a/lib/Basic/DiagnosticIDs.cpp
+++ b/lib/Basic/DiagnosticIDs.cpp
@@ -583,7 +583,7 @@
 }
 
 void DiagnosticIDs::getAllDiagnostics(diag::Flavor Flavor,
-                                     SmallVectorImpl<diag::kind> &Diags) const {
+                                      std::vector<diag::kind> &Diags) {
   for (unsigned i = 0; i != StaticDiagInfoSize; ++i)
     if (StaticDiagInfo[i].getFlavor() == Flavor)
       Diags.push_back(StaticDiagInfo[i].DiagID);
diff --git a/lib/Basic/DiagnosticOptions.cpp b/lib/Basic/DiagnosticOptions.cpp
index 93c2196..ebd9bb4 100644
--- a/lib/Basic/DiagnosticOptions.cpp
+++ b/lib/Basic/DiagnosticOptions.cpp
@@ -1,4 +1,4 @@
-//===--- DiagnosticOptions.cpp - C Language Family Diagnostic Handling ----===//
+//===- DiagnosticOptions.cpp - C Language Family Diagnostic Handling ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,7 @@
 
 #include "clang/Basic/DiagnosticOptions.h"
 #include "llvm/Support/raw_ostream.h"
+#include <type_traits>
 
 namespace clang {
 
@@ -21,4 +22,4 @@
   return Out << static_cast<UT>(M);
 }
 
-} // end namespace clang
+} // namespace clang
diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp
index fe7829e..2bed531 100644
--- a/lib/Basic/IdentifierTable.cpp
+++ b/lib/Basic/IdentifierTable.cpp
@@ -1,4 +1,4 @@
-//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
+//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,17 +12,24 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/Specifiers.h"
-#include "llvm/ADT/DenseMap.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
 #include <cstdio>
+#include <cstring>
+#include <string>
 
 using namespace clang;
 
@@ -46,27 +53,27 @@
   RevertedTokenID = false;
   OutOfDate = false;
   IsModulesImport = false;
-  FETokenInfo = nullptr;
-  Entry = nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // IdentifierTable Implementation
 //===----------------------------------------------------------------------===//
 
-IdentifierIterator::~IdentifierIterator() { }
+IdentifierIterator::~IdentifierIterator() = default;
 
-IdentifierInfoLookup::~IdentifierInfoLookup() {}
+IdentifierInfoLookup::~IdentifierInfoLookup() = default;
 
 namespace {
-  /// \brief A simple identifier lookup iterator that represents an
-  /// empty sequence of identifiers.
-  class EmptyLookupIterator : public IdentifierIterator
-  {
-  public:
-    StringRef Next() override { return StringRef(); }
-  };
-}
+
+/// \brief A simple identifier lookup iterator that represents an
+/// empty sequence of identifiers.
+class EmptyLookupIterator : public IdentifierIterator
+{
+public:
+  StringRef Next() override { return StringRef(); }
+};
+
+} // namespace
 
 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
   return new EmptyLookupIterator();
@@ -76,11 +83,9 @@
                                  IdentifierInfoLookup* externalLookup)
   : HashTable(8192), // Start with space for 8K identifiers.
     ExternalLookup(externalLookup) {
-
   // Populate the identifier table with info about keywords for the current
   // language.
   AddKeywords(LangOpts);
-      
 
   // Add the '_experimental_modules_import' contextual keyword.
   get("import").setModulesImport(true);
@@ -92,6 +97,7 @@
 
 // Constants for TokenKinds.def
 namespace {
+
   enum {
     KEYC99 = 0x1,
     KEYCXX = 0x2,
@@ -127,7 +133,8 @@
     KS_Enabled,     // Enabled
     KS_Future       // Is a keyword in future standard
   };
-}
+
+} // namespace
 
 /// \brief Translates flags as specified in TokenKinds.def into keyword status
 /// in the given language standard.
@@ -366,6 +373,7 @@
 }
 
 namespace clang {
+
 /// MultiKeywordSelector - One of these variable length records is kept for each
 /// selector containing more than one keyword. We use a folding set
 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to
@@ -375,6 +383,7 @@
   MultiKeywordSelector(unsigned nKeys) {
     ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
   }
+
 public:
   // Constructor for keyword selectors.
   MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
@@ -392,28 +401,34 @@
 
   unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; }
 
-  typedef IdentifierInfo *const *keyword_iterator;
+  using keyword_iterator = IdentifierInfo *const *;
+
   keyword_iterator keyword_begin() const {
     return reinterpret_cast<keyword_iterator>(this+1);
   }
+
   keyword_iterator keyword_end() const {
     return keyword_begin()+getNumArgs();
   }
+
   IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
     assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
     return keyword_begin()[i];
   }
+
   static void Profile(llvm::FoldingSetNodeID &ID,
                       keyword_iterator ArgTys, unsigned NumArgs) {
     ID.AddInteger(NumArgs);
     for (unsigned i = 0; i != NumArgs; ++i)
       ID.AddPointer(ArgTys[i]);
   }
+
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, keyword_begin(), getNumArgs());
   }
 };
-} // end namespace clang.
+
+} // namespace clang.
 
 unsigned Selector::getNumArgs() const {
   unsigned IIF = getIdentifierInfoFlag();
@@ -431,6 +446,7 @@
     assert(argIndex == 0 && "illegal keyword index");
     return getAsIdentifierInfo();
   }
+
   // We point to a MultiKeywordSelector.
   MultiKeywordSelector *SI = getMultiKeywordSelector();
   return SI->getIdentifierInfoForSlot(argIndex);
@@ -592,11 +608,13 @@
 }
 
 namespace {
-  struct SelectorTableImpl {
-    llvm::FoldingSet<MultiKeywordSelector> Table;
-    llvm::BumpPtrAllocator Allocator;
-  };
-} // end anonymous namespace.
+
+struct SelectorTableImpl {
+  llvm::FoldingSet<MultiKeywordSelector> Table;
+  llvm::BumpPtrAllocator Allocator;
+};
+
+} // namespace
 
 static SelectorTableImpl &getSelectorTableImpl(void *P) {
   return *static_cast<SelectorTableImpl*>(P);
diff --git a/lib/Basic/LangOptions.cpp b/lib/Basic/LangOptions.cpp
index db81507..f6b45cb 100644
--- a/lib/Basic/LangOptions.cpp
+++ b/lib/Basic/LangOptions.cpp
@@ -1,4 +1,4 @@
-//===--- LangOptions.cpp - C Language Family Language Options ---*- C++ -*-===//
+//===- LangOptions.cpp - C Language Family Language Options ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,13 +10,12 @@
 //  This file defines the LangOptions class.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Basic/LangOptions.h"
-#include "llvm/ADT/StringRef.h"
 
 using namespace clang;
 
-LangOptions::LangOptions()
-  : IsHeaderFile(false) {
+LangOptions::LangOptions() {
 #define LANGOPT(Name, Bits, Default, Description) Name = Default;
 #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) set##Name(Default);
 #include "clang/Basic/LangOptions.def"
diff --git a/lib/Basic/Module.cpp b/lib/Basic/Module.cpp
index 621b1b2..c990b27 100644
--- a/lib/Basic/Module.cpp
+++ b/lib/Basic/Module.cpp
@@ -1,4 +1,4 @@
-//===--- Module.cpp - Describe a module -----------------------------------===//
+//===- Module.cpp - Describe a module -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,23 +16,33 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 
 Module::Module(StringRef Name, SourceLocation DefinitionLoc, Module *Parent,
                bool IsFramework, bool IsExplicit, unsigned VisibilityID)
-    : Name(Name), DefinitionLoc(DefinitionLoc), Parent(Parent), Directory(),
-      Umbrella(), ASTFile(nullptr), VisibilityID(VisibilityID),
-      IsMissingRequirement(false), HasIncompatibleModuleFile(false),
-      IsAvailable(true), IsFromModuleFile(false), IsFramework(IsFramework),
-      IsExplicit(IsExplicit), IsSystem(false), IsExternC(false),
-      IsInferred(false), InferSubmodules(false), InferExplicitSubmodules(false),
+    : Name(Name), DefinitionLoc(DefinitionLoc), Parent(Parent),
+      VisibilityID(VisibilityID), IsMissingRequirement(false),
+      HasIncompatibleModuleFile(false), IsAvailable(true),
+      IsFromModuleFile(false), IsFramework(IsFramework), IsExplicit(IsExplicit),
+      IsSystem(false), IsExternC(false), IsInferred(false),
+      InferSubmodules(false), InferExplicitSubmodules(false),
       InferExportWildcard(false), ConfigMacrosExhaustive(false),
       NoUndeclaredIncludes(false), NameVisibility(Hidden) {
   if (Parent) {
@@ -68,6 +78,11 @@
                         .Case("coroutines", LangOpts.CoroutinesTS)
                         .Case("cplusplus", LangOpts.CPlusPlus)
                         .Case("cplusplus11", LangOpts.CPlusPlus11)
+                        .Case("cplusplus14", LangOpts.CPlusPlus14)
+                        .Case("cplusplus17", LangOpts.CPlusPlus17)
+                        .Case("c99", LangOpts.C99)
+                        .Case("c11", LangOpts.C11)
+                        .Case("c17", LangOpts.C17)
                         .Case("freestanding", LangOpts.Freestanding)
                         .Case("gnuinlineasm", LangOpts.GNUAsm)
                         .Case("objc", LangOpts.ObjC1)
@@ -85,11 +100,16 @@
 
 bool Module::isAvailable(const LangOptions &LangOpts, const TargetInfo &Target,
                          Requirement &Req,
-                         UnresolvedHeaderDirective &MissingHeader) const {
+                         UnresolvedHeaderDirective &MissingHeader,
+                         Module *&ShadowingModule) const {
   if (IsAvailable)
     return true;
 
   for (const Module *Current = this; Current; Current = Current->Parent) {
+    if (Current->ShadowingModule) {
+      ShadowingModule = Current->ShadowingModule;
+      return false;
+    }
     for (unsigned I = 0, N = Current->Requirements.size(); I != N; ++I) {
       if (hasFeature(Current->Requirements[I].first, LangOpts, Target) !=
               Current->Requirements[I].second) {
@@ -130,6 +150,7 @@
     const std::pair<std::string, SourceLocation> &IdComponent) {
   return IdComponent.first;
 }
+
 static StringRef getModuleNameFromComponent(StringRef R) { return R; }
 
 template<typename InputIter>
diff --git a/lib/Basic/OpenMPKinds.cpp b/lib/Basic/OpenMPKinds.cpp
index b5f66e0..c4398d6 100644
--- a/lib/Basic/OpenMPKinds.cpp
+++ b/lib/Basic/OpenMPKinds.cpp
@@ -793,7 +793,7 @@
 
 bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_target || DKind == OMPD_target_parallel ||
-         DKind == OMPD_target_parallel_for || 
+         DKind == OMPD_target_parallel_for ||
          DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd ||
          DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
          DKind == OMPD_target_teams_distribute_parallel_for ||
@@ -829,7 +829,8 @@
          DKind == OMPD_teams_distribute_simd ||
          DKind == OMPD_teams_distribute_parallel_for_simd ||
          DKind == OMPD_target_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_target_teams_distribute_simd;
+         DKind == OMPD_target_teams_distribute_simd ||
+         DKind == OMPD_target_parallel_for_simd;
 }
 
 bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) {
@@ -884,16 +885,55 @@
   case OMPD_parallel_for_simd:
   case OMPD_parallel_sections:
   case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
     CaptureRegions.push_back(OMPD_parallel);
     break;
   case OMPD_target_teams:
+  case OMPD_target_teams_distribute:
+  case OMPD_target_teams_distribute_simd:
+    CaptureRegions.push_back(OMPD_task);
     CaptureRegions.push_back(OMPD_target);
     CaptureRegions.push_back(OMPD_teams);
     break;
+  case OMPD_teams:
   case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd:
     CaptureRegions.push_back(OMPD_teams);
     break;
-  case OMPD_teams:
+  case OMPD_target:
+  case OMPD_target_simd:
+    CaptureRegions.push_back(OMPD_task);
+    CaptureRegions.push_back(OMPD_target);
+    break;
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd:
+    CaptureRegions.push_back(OMPD_teams);
+    CaptureRegions.push_back(OMPD_parallel);
+    break;
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+    CaptureRegions.push_back(OMPD_task);
+    CaptureRegions.push_back(OMPD_target);
+    CaptureRegions.push_back(OMPD_parallel);
+    break;
+  case OMPD_task:
+  case OMPD_target_enter_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_update:
+    CaptureRegions.push_back(OMPD_task);
+    break;
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+    CaptureRegions.push_back(OMPD_taskloop);
+    break;
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    CaptureRegions.push_back(OMPD_task);
+    CaptureRegions.push_back(OMPD_target);
+    CaptureRegions.push_back(OMPD_teams);
+    CaptureRegions.push_back(OMPD_parallel);
+    break;
   case OMPD_simd:
   case OMPD_for:
   case OMPD_for_simd:
@@ -907,27 +947,8 @@
   case OMPD_ordered:
   case OMPD_atomic:
   case OMPD_target_data:
-  case OMPD_target:
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_simd:
-  case OMPD_task:
-  case OMPD_taskloop:
-  case OMPD_taskloop_simd:
-  case OMPD_distribute_parallel_for_simd:
   case OMPD_distribute_simd:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-  case OMPD_target_teams_distribute_simd:
-    CaptureRegions.push_back(DKind);
-    break;
-  case OMPD_target_parallel:
-    CaptureRegions.push_back(OMPD_target);
-    CaptureRegions.push_back(OMPD_parallel);
+    CaptureRegions.push_back(OMPD_unknown);
     break;
   case OMPD_threadprivate:
   case OMPD_taskyield:
@@ -936,13 +957,10 @@
   case OMPD_cancellation_point:
   case OMPD_cancel:
   case OMPD_flush:
-  case OMPD_target_enter_data:
-  case OMPD_target_exit_data:
   case OMPD_declare_reduction:
   case OMPD_declare_simd:
   case OMPD_declare_target:
   case OMPD_end_declare_target:
-  case OMPD_target_update:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
     llvm_unreachable("Unknown OpenMP directive");
diff --git a/lib/Basic/OperatorPrecedence.cpp b/lib/Basic/OperatorPrecedence.cpp
index 384d23c..3743b6a 100644
--- a/lib/Basic/OperatorPrecedence.cpp
+++ b/lib/Basic/OperatorPrecedence.cpp
@@ -63,6 +63,7 @@
   case tok::lessequal:
   case tok::less:
   case tok::greaterequal:         return prec::Relational;
+  case tok::spaceship:            return prec::Spaceship;
   case tok::lessless:             return prec::Shift;
   case tok::plus:
   case tok::minus:                return prec::Additive;
diff --git a/lib/Basic/SanitizerSpecialCaseList.cpp b/lib/Basic/SanitizerSpecialCaseList.cpp
index 4dd52ee..ee8feec 100644
--- a/lib/Basic/SanitizerSpecialCaseList.cpp
+++ b/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -57,7 +57,7 @@
                                          StringRef Category) const {
   for (auto &S : SanitizerSections)
     if ((S.Mask & Mask) &&
-        SpecialCaseList::inSection(S.Entries, Prefix, Query, Category))
+        SpecialCaseList::inSectionBlame(S.Entries, Prefix, Query, Category))
       return true;
 
   return false;
diff --git a/lib/Basic/SourceLocation.cpp b/lib/Basic/SourceLocation.cpp
index 89ddbc9..71f74e4 100644
--- a/lib/Basic/SourceLocation.cpp
+++ b/lib/Basic/SourceLocation.cpp
@@ -1,4 +1,4 @@
-//==--- SourceLocation.cpp - Compact identifier for Source Files -*- C++ -*-==//
+//===- SourceLocation.cpp - Compact identifier for Source Files -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,10 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cstdio>
+#include <cassert>
+#include <string>
+#include <utility>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -81,7 +88,6 @@
   return SrcMgr->getFileID(*this);
 }
 
-
 FullSourceLoc FullSourceLoc::getExpansionLoc() const {
   assert(isValid());
   return FullSourceLoc(SrcMgr->getExpansionLoc(*this), *SrcMgr);
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index 620f05c..d4fad30 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -1,4 +1,4 @@
-//===--- SourceManager.cpp - Track and cache source files -----------------===//
+//===- SourceManager.cpp - Track and cache source files -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,17 +14,33 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManagerInternals.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <cstring>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 using namespace SrcMgr;
@@ -109,11 +125,12 @@
   // possible.
   if (!BufferOrError) {
     StringRef FillStr("<<<MISSING SOURCE FILE>>>\n");
-    Buffer.setPointer(MemoryBuffer::getNewUninitMemBuffer(
-                          ContentsEntry->getSize(), "<invalid>").release());
-    char *Ptr = const_cast<char*>(Buffer.getPointer()->getBufferStart());
+    auto BackupBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
+        ContentsEntry->getSize(), "<invalid>");
+    char *Ptr = BackupBuffer->getBufferStart();
     for (unsigned i = 0, e = ContentsEntry->getSize(); i != e; ++i)
       Ptr[i] = FillStr[i % FillStr.size()];
+    Buffer.setPointer(BackupBuffer.release());
 
     if (Diag.isDiagnosticInFlight())
       Diag.SetDelayedDiagnostic(diag::err_cannot_open_file,
@@ -153,8 +170,10 @@
   const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
     .StartsWith("\xFE\xFF", "UTF-16 (BE)")
     .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-    .StartsWith("\x00\x00\xFE\xFF", "UTF-32 (BE)")
-    .StartsWith("\xFF\xFE\x00\x00", "UTF-32 (LE)")
+    .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+                                                  "UTF-32 (BE)")
+    .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+                                                  "UTF-32 (LE)")
     .StartsWith("\x2B\x2F\x76", "UTF-7")
     .StartsWith("\xF7\x64\x4C", "UTF-1")
     .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
@@ -222,7 +241,6 @@
                                    IncludeOffset));
 }
 
-
 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
 /// it.  If there is no line entry before Offset in FID, return null.
 const LineEntry *LineTableInfo::FindNearestLineEntry(FileID FID,
@@ -250,7 +268,6 @@
 }
 
 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
-///
 unsigned SourceManager::getLineTableFilenameID(StringRef Name) {
   return getLineTable().getLineTableFilenameID(Name);
 }
@@ -298,10 +315,7 @@
 
 SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr,
                              bool UserFilesAreVolatile)
-  : Diag(Diag), FileMgr(FileMgr), OverridenFilesKeepOriginalName(true),
-    UserFilesAreVolatile(UserFilesAreVolatile), FilesAreTransient(false),
-    ExternalSLocEntries(nullptr), LineTable(nullptr), NumLinearScans(0),
-    NumBinaryProbes(0) {
+  : Diag(Diag), FileMgr(FileMgr), UserFilesAreVolatile(UserFilesAreVolatile) {
   clearIDTables();
   Diag.setSourceManager(this);
 }
@@ -342,7 +356,7 @@
   // Use up FileID #0 as an invalid expansion.
   NextLocalOffset = 0;
   CurrentLoadedOffset = MaxLoadedOffset;
-  createExpansionLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
+  createExpansionLoc(SourceLocation(), SourceLocation(), SourceLocation(), 1);
 }
 
 void SourceManager::initializeForReplay(const SourceManager &Old) {
@@ -408,7 +422,6 @@
   return Entry;
 }
 
-
 /// Create a new ContentCache for the specified memory buffer.
 /// This does no caching.
 const ContentCache *
@@ -716,7 +729,7 @@
   // Find the FileID that contains this.  "I" is an iterator that points to a
   // FileID whose offset is known to be larger than SLocOffset.
   unsigned NumProbes = 0;
-  while (1) {
+  while (true) {
     --I;
     if (I->getOffset() <= SLocOffset) {
       FileID Res = FileID::get(int(I - LocalSLocEntryTable.begin()));
@@ -740,7 +753,7 @@
   // SLocOffset.
   unsigned LessIndex = 0;
   NumProbes = 0;
-  while (1) {
+  while (true) {
     bool Invalid = false;
     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
     unsigned MidOffset = getLocalSLocEntry(MiddleIndex, &Invalid).getOffset();
@@ -817,7 +830,7 @@
   unsigned GreaterIndex = I;
   unsigned LessIndex = LoadedSLocEntryTable.size();
   NumProbes = 0;
-  while (1) {
+  while (true) {
     ++NumProbes;
     unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
     const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex);
@@ -935,7 +948,6 @@
   return Loc.getLocWithOffset(LocInfo.second);
 }
 
-
 /// getImmediateExpansionRange - Loc is required to be an expansion location.
 /// Return the start/end of the expansion information.
 std::pair<SourceLocation,SourceLocation>
@@ -945,6 +957,12 @@
   return Expansion.getExpansionLocRange();
 }
 
+SourceLocation SourceManager::getTopMacroCallerLoc(SourceLocation Loc) const {
+  while (isMacroArgExpansion(Loc))
+    Loc = getImmediateSpellingLoc(Loc);
+  return Loc;
+}
+
 /// getExpansionRange - Given a SourceLocation object, return the range of
 /// tokens covered by the expansion in the ultimate file.
 std::pair<SourceLocation,SourceLocation>
@@ -1055,7 +1073,6 @@
   return true;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Queries about the code at a SourceLocation.
 //===----------------------------------------------------------------------===//
@@ -1084,7 +1101,6 @@
   return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
 }
 
-
 /// getColumnNumber - Return the column # for the specified file position.
 /// this is significantly cheaper to compute than the line number.
 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
@@ -1189,7 +1205,7 @@
   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
   unsigned Offs = 0;
-  while (1) {
+  while (true) {
     // Skip over the contents of the line.
     const unsigned char *NextBuf = (const unsigned char *)Buf;
 
@@ -1419,7 +1435,6 @@
   return getBuffer(getFileID(Loc), Invalid)->getBufferIdentifier();
 }
 
-
 /// getPresumedLoc - This method returns the "presumed" location of a
 /// SourceLocation specifies.  A "presumed location" can be modified by \#line
 /// or GNU line marker directives.  This provides a view on the data that a
@@ -1767,7 +1782,7 @@
   MacroArgsCache.insert(std::make_pair(0, SourceLocation()));
 
   int ID = FID.ID;
-  while (1) {
+  while (true) {
     ++ID;
     // Stop if there are no more FileIDs to check.
     if (ID > 0) {
@@ -1830,7 +1845,7 @@
     FileID SpellFID; // Current FileID in the spelling range.
     unsigned SpellRelativeOffs;
     std::tie(SpellFID, SpellRelativeOffs) = getDecomposedLoc(SpellLoc);
-    while (1) {
+    while (true) {
       const SLocEntry &Entry = getSLocEntry(SpellFID);
       unsigned SpellFIDBeginOffs = Entry.getOffset();
       unsigned SpellFIDSize = getFileIDSize(SpellFID);
@@ -1857,7 +1872,6 @@
       ++SpellFID.ID;
       SpellRelativeOffs = 0;
     }
-
   }
 
   assert(SpellLoc.isFileID());
@@ -1937,8 +1951,8 @@
 
   // Uses IncludedLocMap to retrieve/cache the decomposed loc.
 
-  typedef std::pair<FileID, unsigned> DecompTy;
-  typedef llvm::DenseMap<FileID, DecompTy> MapTy;
+  using DecompTy = std::pair<FileID, unsigned>;
+  using MapTy = llvm::DenseMap<FileID, DecompTy>;
   std::pair<MapTy::iterator, bool>
     InsertOp = IncludedLocMap.insert(std::make_pair(FID, DecompTy()));
   DecompTy &DecompLoc = InsertOp.first->second;
@@ -2085,7 +2099,7 @@
   // of the other looking for a match.
   // We use a map from FileID to Offset to store the chain. Easier than writing
   // a custom set hash info that only depends on the first part of a pair.
-  typedef llvm::SmallDenseMap<FileID, unsigned, 16> LocSet;
+  using LocSet = llvm::SmallDenseMap<FileID, unsigned, 16>;
   LocSet LChain;
   do {
     LChain.insert(LOffs);
@@ -2197,7 +2211,7 @@
   }
 }
 
-ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
+ExternalSLocEntrySource::~ExternalSLocEntrySource() = default;
 
 /// Return the amount of memory used by memory buffers, breaking down
 /// by heap-backed versus mmap'ed memory.
diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp
index 9ae466b..ddd292c 100644
--- a/lib/Basic/TargetInfo.cpp
+++ b/lib/Basic/TargetInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/TargetInfo.h"
-#include "clang/AST/Type.h"
 #include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LangOptions.h"
@@ -31,6 +30,7 @@
   // SPARC.  These should be overridden by concrete targets as needed.
   BigEndian = !T.isLittleEndian();
   TLSSupported = true;
+  VLASupported = true;
   NoAsmVariants = false;
   HasFloat128 = false;
   PointerWidth = PointerAlign = 32;
@@ -44,7 +44,7 @@
   // From the glibc documentation, on GNU systems, malloc guarantees 16-byte
   // alignment on 64-bit systems and 8-byte alignment on 32-bit systems. See
   // https://www.gnu.org/software/libc/manual/html_node/Malloc-Examples.html
-  if (T.isGNUEnvironment())
+  if (T.isGNUEnvironment() || T.isWindowsMSVCEnvironment())
     NewAlign = Triple.isArch64Bit() ? 128 : Triple.isArch32Bit() ? 64 : 0;
   else
     NewAlign = 0; // Infer from basic type alignment.
@@ -356,23 +356,13 @@
   return true;
 }
 
-LangAS TargetInfo::getOpenCLTypeAddrSpace(const Type *T) const {
-  auto BT = dyn_cast<BuiltinType>(T);
-
-  if (!BT) {
-    if (isa<PipeType>(T))
-      return LangAS::opencl_global;
-
-    return LangAS::Default;
-  }
-
-  switch (BT->getKind()) {
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
-  case BuiltinType::Id:                                                        \
+LangAS TargetInfo::getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const {
+  switch (TK) {
+  case OCLTK_Image:
+  case OCLTK_Pipe:
     return LangAS::opencl_global;
-#include "clang/Basic/OpenCLImageTypes.def"
 
-  case BuiltinType::OCLSampler:
+  case OCLTK_Sampler:
     return LangAS::opencl_constant;
 
   default:
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index a9a5f4d..e325403 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -29,6 +29,7 @@
 #include "Targets/OSTargets.h"
 #include "Targets/PNaCl.h"
 #include "Targets/PPC.h"
+#include "Targets/RISCV.h"
 #include "Targets/SPIR.h"
 #include "Targets/Sparc.h"
 #include "Targets/SystemZ.h"
@@ -37,6 +38,7 @@
 #include "Targets/X86.h"
 #include "Targets/XCore.h"
 #include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 
 using namespace clang;
@@ -97,7 +99,14 @@
   }
 }
 
-void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
+void addMinGWDefines(const llvm::Triple &Triple, const LangOptions &Opts,
+                     MacroBuilder &Builder) {
+  DefineStd(Builder, "WIN32", Opts);
+  DefineStd(Builder, "WINNT", Opts);
+  if (Triple.isArch64Bit()) {
+    DefineStd(Builder, "WIN64", Opts);
+    Builder.defineMacro("__MINGW64__");
+  }
   Builder.defineMacro("__MSVCRT__");
   Builder.defineMacro("__MINGW32__");
   addCygMingDefines(Opts, Builder);
@@ -363,6 +372,17 @@
   case llvm::Triple::r600:
     return new AMDGPUTargetInfo(Triple, Opts);
 
+  case llvm::Triple::riscv32:
+    // TODO: add cases for FreeBSD, NetBSD, RTEMS once tested.
+    if (os == llvm::Triple::Linux)
+      return new LinuxTargetInfo<RISCV32TargetInfo>(Triple, Opts);
+    return new RISCV32TargetInfo(Triple, Opts);
+  case llvm::Triple::riscv64:
+    // TODO: add cases for FreeBSD, NetBSD, RTEMS once tested.
+    if (os == llvm::Triple::Linux)
+      return new LinuxTargetInfo<RISCV64TargetInfo>(Triple, Opts);
+    return new RISCV64TargetInfo(Triple, Opts);
+
   case llvm::Triple::sparc:
     switch (os) {
     case llvm::Triple::Linux:
@@ -588,6 +608,10 @@
   // Set the target CPU if specified.
   if (!Opts->CPU.empty() && !Target->setCPU(Opts->CPU)) {
     Diags.Report(diag::err_target_unknown_cpu) << Opts->CPU;
+    SmallVector<StringRef, 32> ValidList;
+    Target->fillValidCPUList(ValidList);
+    if (!ValidList.empty())
+      Diags.Report(diag::note_valid_options) << llvm::join(ValidList, ", ");
     return nullptr;
   }
 
diff --git a/lib/Basic/Targets.h b/lib/Basic/Targets.h
index 12a23fa..6fc967d 100644
--- a/lib/Basic/Targets.h
+++ b/lib/Basic/Targets.h
@@ -40,7 +40,7 @@
                      bool Tuning = true);
 
 LLVM_LIBRARY_VISIBILITY
-void addMinGWDefines(const clang::LangOptions &Opts,
+void addMinGWDefines(const llvm::Triple &Triple, const clang::LangOptions &Opts,
                      clang::MacroBuilder &Builder);
 
 LLVM_LIBRARY_VISIBILITY
diff --git a/lib/Basic/Targets/AArch64.cpp b/lib/Basic/Targets/AArch64.cpp
index f58bf9f..51e6830 100644
--- a/lib/Basic/Targets/AArch64.cpp
+++ b/lib/Basic/Targets/AArch64.cpp
@@ -101,6 +101,11 @@
   return isValidCPUName(Name);
 }
 
+void AArch64TargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  llvm::AArch64::fillValidCPUArchList(Values);
+}
+
 void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts,
                                                 MacroBuilder &Builder) const {
   Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
@@ -159,7 +164,7 @@
     Builder.defineMacro("__ARM_FP_FAST", "1");
 
   Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
-                      llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4));
+                      Twine(Opts.WCharSize ? Opts.WCharSize : 4));
 
   Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
 
@@ -181,6 +186,11 @@
   if (Unaligned)
     Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
 
+  if ((FPU & NeonMode) && HasFullFP16)
+    Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1");
+  if (HasFullFP16)
+   Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1");
+
   switch (ArchKind) {
   default:
     break;
@@ -474,8 +484,6 @@
 void MicrosoftARM64TargetInfo::getVisualStudioDefines(
     const LangOptions &Opts, MacroBuilder &Builder) const {
   WindowsTargetInfo<AArch64leTargetInfo>::getVisualStudioDefines(Opts, Builder);
-  Builder.defineMacro("_WIN32", "1");
-  Builder.defineMacro("_WIN64", "1");
   Builder.defineMacro("_M_ARM64", "1");
 }
 
@@ -491,17 +499,6 @@
   TheCXXABI.set(TargetCXXABI::GenericAArch64);
 }
 
-void MinGWARM64TargetInfo::getTargetDefines(const LangOptions &Opts,
-                                            MacroBuilder &Builder) const {
-  WindowsTargetInfo::getTargetDefines(Opts, Builder);
-  Builder.defineMacro("_WIN32", "1");
-  Builder.defineMacro("_WIN64", "1");
-  Builder.defineMacro("WIN32", "1");
-  Builder.defineMacro("WIN64", "1");
-  addMinGWDefines(Opts, Builder);
-}
-
-
 DarwinAArch64TargetInfo::DarwinAArch64TargetInfo(const llvm::Triple &Triple,
                                                  const TargetOptions &Opts)
     : DarwinTargetInfo<AArch64leTargetInfo>(Triple, Opts) {
diff --git a/lib/Basic/Targets/AArch64.h b/lib/Basic/Targets/AArch64.h
index 8391b73..8feb832 100644
--- a/lib/Basic/Targets/AArch64.h
+++ b/lib/Basic/Targets/AArch64.h
@@ -46,8 +46,13 @@
   bool setABI(const std::string &Name) override;
 
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
   bool setCPU(const std::string &Name) override;
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefinesARMV81A(const LangOptions &Opts,
                                MacroBuilder &Builder) const;
   void getTargetDefinesARMV82A(const LangOptions &Opts,
@@ -122,9 +127,6 @@
     : public WindowsARM64TargetInfo {
 public:
   MinGWARM64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
-
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64beTargetInfo : public AArch64TargetInfo {
diff --git a/lib/Basic/Targets/AMDGPU.cpp b/lib/Basic/Targets/AMDGPU.cpp
index b8740ca..a926722 100644
--- a/lib/Basic/Targets/AMDGPU.cpp
+++ b/lib/Basic/Targets/AMDGPU.cpp
@@ -30,64 +30,35 @@
 
 static const char *const DataLayoutStringR600 =
     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
-    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
-static const char *const DataLayoutStringSIPrivateIsZero =
-    "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
-    "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
-    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
-
-static const char *const DataLayoutStringSIGenericIsZero =
-    "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
+static const char *const DataLayoutStringAMDGCN =
+    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
-static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = {
-    4, // Default
-    1, // opencl_global
-    3, // opencl_local
-    2, // opencl_constant
-    0, // opencl_private
-    4, // opencl_generic
-    1, // cuda_device
-    2, // cuda_constant
-    3  // cuda_shared
+const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
+    Generic,  // Default
+    Global,   // opencl_global
+    Local,    // opencl_local
+    Constant, // opencl_constant
+    Private,  // opencl_private
+    Generic,  // opencl_generic
+    Global,   // cuda_device
+    Constant, // cuda_constant
+    Local     // cuda_shared
 };
 
-static const LangASMap AMDGPUGenIsZeroDefIsGenMap = {
-    0, // Default
-    1, // opencl_global
-    3, // opencl_local
-    2, // opencl_constant
-    5, // opencl_private
-    0, // opencl_generic
-    1, // cuda_device
-    2, // cuda_constant
-    3  // cuda_shared
-};
-
-static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = {
-    0, // Default
-    1, // opencl_global
-    3, // opencl_local
-    2, // opencl_constant
-    0, // opencl_private
-    4, // opencl_generic
-    1, // cuda_device
-    2, // cuda_constant
-    3  // cuda_shared
-};
-
-static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = {
-    5, // Default
-    1, // opencl_global
-    3, // opencl_local
-    2, // opencl_constant
-    5, // opencl_private
-    0, // opencl_generic
-    1, // cuda_device
-    2, // cuda_constant
-    3  // cuda_shared
+const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
+    Private,  // Default
+    Global,   // opencl_global
+    Local,    // opencl_local
+    Constant, // opencl_constant
+    Private,  // opencl_private
+    Generic,  // opencl_generic
+    Global,   // cuda_device
+    Constant, // cuda_constant
+    Local     // cuda_shared
 };
 } // namespace targets
 } // namespace clang
@@ -144,7 +115,7 @@
   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
-  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 
+  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
   "flat_scratch_lo", "flat_scratch_hi"
 };
 
@@ -157,49 +128,62 @@
     const std::vector<std::string> &FeatureVec) const {
 
   // XXX - What does the member GPU mean if device name string passed here?
-  if (getTriple().getArch() == llvm::Triple::amdgcn) {
+  if (isAMDGCN(getTriple())) {
     if (CPU.empty())
-      CPU = "tahiti";
+      CPU = "gfx600";
 
-    switch (parseAMDGCNName(CPU)) {
-    case GK_GFX6:
-    case GK_GFX7:
-      break;
-
-    case GK_GFX9:
+    switch (parseAMDGCNName(CPU).Kind) {
+    case GK_GFX902:
+    case GK_GFX900:
       Features["gfx9-insts"] = true;
       LLVM_FALLTHROUGH;
-    case GK_GFX8:
-      Features["s-memrealtime"] = true;
+    case GK_GFX810:
+    case GK_GFX803:
+    case GK_GFX802:
+    case GK_GFX801:
       Features["16-bit-insts"] = true;
       Features["dpp"] = true;
+      Features["s-memrealtime"] = true;
       break;
-
+    case GK_GFX704:
+    case GK_GFX703:
+    case GK_GFX702:
+    case GK_GFX701:
+    case GK_GFX700:
+    case GK_GFX601:
+    case GK_GFX600:
+      break;
     case GK_NONE:
       return false;
     default:
-      llvm_unreachable("unhandled subtarget");
+      llvm_unreachable("Unhandled GPU!");
     }
   } else {
     if (CPU.empty())
       CPU = "r600";
 
-    switch (parseR600Name(CPU)) {
-    case GK_R600:
-    case GK_R700:
-    case GK_EVERGREEN:
-    case GK_NORTHERN_ISLANDS:
-      break;
-    case GK_R600_DOUBLE_OPS:
-    case GK_R700_DOUBLE_OPS:
-    case GK_EVERGREEN_DOUBLE_OPS:
+    switch (parseR600Name(CPU).Kind) {
     case GK_CAYMAN:
-      Features["fp64"] = true;
+    case GK_CYPRESS:
+    case GK_RV770:
+    case GK_RV670:
+      // TODO: Add fp64 when implemented.
       break;
-    case GK_NONE:
-      return false;
+    case GK_TURKS:
+    case GK_CAICOS:
+    case GK_BARTS:
+    case GK_SUMO:
+    case GK_REDWOOD:
+    case GK_JUNIPER:
+    case GK_CEDAR:
+    case GK_RV730:
+    case GK_RV710:
+    case GK_RS880:
+    case GK_R630:
+    case GK_R600:
+      break;
     default:
-      llvm_unreachable("unhandled subtarget");
+      llvm_unreachable("Unhandled GPU!");
     }
   }
 
@@ -210,6 +194,7 @@
                                            TargetOptions &TargetOpts) const {
   bool hasFP32Denormals = false;
   bool hasFP64Denormals = false;
+  GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
   for (auto &I : TargetOpts.FeaturesAsWritten) {
     if (I == "+fp32-denormals" || I == "-fp32-denormals")
       hasFP32Denormals = true;
@@ -218,120 +203,68 @@
   }
   if (!hasFP32Denormals)
     TargetOpts.Features.push_back(
-        (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
+        (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
                    ? '+'
                    : '-') +
          Twine("fp32-denormals"))
             .str());
   // Always do not flush fp64 or fp16 denorms.
-  if (!hasFP64Denormals && hasFP64)
+  if (!hasFP64Denormals && CGOptsGPU.HasFP64)
     TargetOpts.Features.push_back("+fp64-fp16-denormals");
 }
 
-AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
-  return llvm::StringSwitch<GPUKind>(Name)
-      .Case("r600", GK_R600)
-      .Case("rv610", GK_R600)
-      .Case("rv620", GK_R600)
-      .Case("rv630", GK_R600)
-      .Case("rv635", GK_R600)
-      .Case("rs780", GK_R600)
-      .Case("rs880", GK_R600)
-      .Case("rv670", GK_R600_DOUBLE_OPS)
-      .Case("rv710", GK_R700)
-      .Case("rv730", GK_R700)
-      .Case("rv740", GK_R700_DOUBLE_OPS)
-      .Case("rv770", GK_R700_DOUBLE_OPS)
-      .Case("palm", GK_EVERGREEN)
-      .Case("cedar", GK_EVERGREEN)
-      .Case("sumo", GK_EVERGREEN)
-      .Case("sumo2", GK_EVERGREEN)
-      .Case("redwood", GK_EVERGREEN)
-      .Case("juniper", GK_EVERGREEN)
-      .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
-      .Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
-      .Case("barts", GK_NORTHERN_ISLANDS)
-      .Case("turks", GK_NORTHERN_ISLANDS)
-      .Case("caicos", GK_NORTHERN_ISLANDS)
-      .Case("cayman", GK_CAYMAN)
-      .Case("aruba", GK_CAYMAN)
-      .Default(GK_NONE);
+constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
+constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
+constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
+
+AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
+  const auto *Result = llvm::find_if(
+      R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
+
+  if (Result == std::end(R600GPUs))
+    return InvalidGPU;
+  return *Result;
 }
 
-AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
-  return llvm::StringSwitch<GPUKind>(Name)
-      .Case("gfx600", GK_GFX6)
-      .Case("tahiti", GK_GFX6)
-      .Case("gfx601", GK_GFX6)
-      .Case("pitcairn", GK_GFX6)
-      .Case("verde", GK_GFX6)
-      .Case("oland", GK_GFX6)
-      .Case("hainan", GK_GFX6)
-      .Case("gfx700", GK_GFX7)
-      .Case("bonaire", GK_GFX7)
-      .Case("kaveri", GK_GFX7)
-      .Case("gfx701", GK_GFX7)
-      .Case("hawaii", GK_GFX7)
-      .Case("gfx702", GK_GFX7)
-      .Case("gfx703", GK_GFX7)
-      .Case("kabini", GK_GFX7)
-      .Case("mullins", GK_GFX7)
-      .Case("gfx800", GK_GFX8)
-      .Case("iceland", GK_GFX8)
-      .Case("gfx801", GK_GFX8)
-      .Case("carrizo", GK_GFX8)
-      .Case("gfx802", GK_GFX8)
-      .Case("tonga", GK_GFX8)
-      .Case("gfx803", GK_GFX8)
-      .Case("fiji", GK_GFX8)
-      .Case("polaris10", GK_GFX8)
-      .Case("polaris11", GK_GFX8)
-      .Case("gfx804", GK_GFX8)
-      .Case("gfx810", GK_GFX8)
-      .Case("stoney", GK_GFX8)
-      .Case("gfx900", GK_GFX9)
-      .Case("gfx901", GK_GFX9)
-      .Case("gfx902", GK_GFX9)
-      .Case("gfx903", GK_GFX9)
-      .Default(GK_NONE);
+AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
+  const auto *Result = llvm::find_if(
+      AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
+
+  if (Result == std::end(AMDGCNGPUs))
+    return InvalidGPU;
+  return *Result;
+}
+
+AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const {
+  if (isAMDGCN(getTriple()))
+    return parseAMDGCNName(Name);
+  else
+    return parseR600Name(Name);
+}
+
+void AMDGPUTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  if (isAMDGCN(getTriple()))
+    llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
+                   Values.emplace_back(GPU.Name);});
+  else
+    llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
+                   Values.emplace_back(GPU.Name);});
 }
 
 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
-  if (isGenericZero(getTriple())) {
-    AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
-                                    : &AMDGPUGenIsZeroDefIsGenMap;
-  } else {
-    AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
-                                    : &AMDGPUPrivIsZeroDefIsGenMap;
-  }
+  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
 }
 
 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
                                    const TargetOptions &Opts)
     : TargetInfo(Triple),
-      GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)),
-      hasFP64(false), hasFMAF(false), hasLDEXPF(false),
-      AS(isGenericZero(Triple)) {
-  if (getTriple().getArch() == llvm::Triple::amdgcn) {
-    hasFP64 = true;
-    hasFMAF = true;
-    hasLDEXPF = true;
-  }
-  if (getTriple().getArch() == llvm::Triple::r600) {
-    if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) {
-      hasFMAF = true;
-    }
-  }
-  auto IsGenericZero = isGenericZero(Triple);
-  resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
-                      ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
-                                       : DataLayoutStringSIPrivateIsZero)
-                      : DataLayoutStringR600);
-  assert(DataLayout->getAllocaAddrSpace() == AS.Private);
+      GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
+  resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
+                                        : DataLayoutStringR600);
+  assert(DataLayout->getAllocaAddrSpace() == Private);
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
-                     Triple.getEnvironment() == llvm::Triple::OpenCL ||
-                     Triple.getEnvironmentName() == "amdgizcl" ||
                      !isAMDGCN(Triple));
   UseAddrSpaceMapMangling = true;
 
@@ -349,7 +282,11 @@
 
 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
   TargetInfo::adjust(Opts);
-  setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
+  // ToDo: There are still a few places using default address space as private
+  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+  // can be removed from the following line.
+  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
+                     !isAMDGCN(getTriple()));
 }
 
 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
@@ -359,15 +296,27 @@
 
 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
                                         MacroBuilder &Builder) const {
-  if (getTriple().getArch() == llvm::Triple::amdgcn)
+  Builder.defineMacro("__AMD__");
+  Builder.defineMacro("__AMDGPU__");
+
+  if (isAMDGCN(getTriple()))
     Builder.defineMacro("__AMDGCN__");
   else
     Builder.defineMacro("__R600__");
 
-  if (hasFMAF)
+  if (GPU.Kind != GK_NONE)
+    Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
+
+  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
+  // removed in the near future.
+  if (GPU.HasFMAF)
     Builder.defineMacro("__HAS_FMAF__");
-  if (hasLDEXPF)
+  if (GPU.HasFastFMAF)
+    Builder.defineMacro("FP_FAST_FMAF");
+  if (GPU.HasLDEXPF)
     Builder.defineMacro("__HAS_LDEXPF__");
-  if (hasFP64)
+  if (GPU.HasFP64)
     Builder.defineMacro("__HAS_FP64__");
+  if (GPU.HasFastFMA)
+    Builder.defineMacro("FP_FAST_FMA");
 }
diff --git a/lib/Basic/Targets/AMDGPU.h b/lib/Basic/Targets/AMDGPU.h
index 4cbf0f2..603c9b2 100644
--- a/lib/Basic/Targets/AMDGPU.h
+++ b/lib/Basic/Targets/AMDGPU.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
 #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
 
-#include "clang/AST/Type.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/ADT/StringSet.h"
@@ -29,60 +28,153 @@
   static const Builtin::Info BuiltinInfo[];
   static const char *const GCCRegNames[];
 
-  struct LLVM_LIBRARY_VISIBILITY AddrSpace {
-    unsigned Generic, Global, Local, Constant, Private;
-    AddrSpace(bool IsGenericZero_ = false) {
-      if (IsGenericZero_) {
-        Generic = 0;
-        Global = 1;
-        Local = 3;
-        Constant = 2;
-        Private = 5;
-      } else {
-        Generic = 4;
-        Global = 1;
-        Local = 3;
-        Constant = 2;
-        Private = 0;
-      }
-    }
+  enum AddrSpace {
+    Generic = 0,
+    Global = 1,
+    Local = 3,
+    Constant = 4,
+    Private = 5
+  };
+  static const LangASMap AMDGPUDefIsGenMap;
+  static const LangASMap AMDGPUDefIsPrivMap;
+
+  /// \brief GPU kinds supported by the AMDGPU target.
+  enum GPUKind : uint32_t {
+    // Not specified processor.
+    GK_NONE = 0,
+
+    // R600-based processors.
+    GK_R600,
+    GK_R630,
+    GK_RS880,
+    GK_RV670,
+    GK_RV710,
+    GK_RV730,
+    GK_RV770,
+    GK_CEDAR,
+    GK_CYPRESS,
+    GK_JUNIPER,
+    GK_REDWOOD,
+    GK_SUMO,
+    GK_BARTS,
+    GK_CAICOS,
+    GK_CAYMAN,
+    GK_TURKS,
+
+    GK_R600_FIRST = GK_R600,
+    GK_R600_LAST = GK_TURKS,
+
+    // AMDGCN-based processors.
+    GK_GFX600,
+    GK_GFX601,
+    GK_GFX700,
+    GK_GFX701,
+    GK_GFX702,
+    GK_GFX703,
+    GK_GFX704,
+    GK_GFX801,
+    GK_GFX802,
+    GK_GFX803,
+    GK_GFX810,
+    GK_GFX900,
+    GK_GFX902,
+
+    GK_AMDGCN_FIRST = GK_GFX600,
+    GK_AMDGCN_LAST = GK_GFX902,
   };
 
-  /// \brief The GPU profiles supported by the AMDGPU target.
-  enum GPUKind {
-    GK_NONE,
-    GK_R600,
-    GK_R600_DOUBLE_OPS,
-    GK_R700,
-    GK_R700_DOUBLE_OPS,
-    GK_EVERGREEN,
-    GK_EVERGREEN_DOUBLE_OPS,
-    GK_NORTHERN_ISLANDS,
-    GK_CAYMAN,
-    GK_GFX6,
-    GK_GFX7,
-    GK_GFX8,
-    GK_GFX9
-  } GPU;
+  struct GPUInfo {
+    llvm::StringLiteral Name;
+    llvm::StringLiteral CanonicalName;
+    AMDGPUTargetInfo::GPUKind Kind;
+    bool HasFMAF;
+    bool HasFastFMAF;
+    bool HasLDEXPF;
+    bool HasFP64;
+    bool HasFastFMA;
+  };
 
-  bool hasFP64 : 1;
-  bool hasFMAF : 1;
-  bool hasLDEXPF : 1;
-  const AddrSpace AS;
+  static constexpr GPUInfo InvalidGPU =
+    {{""}, {""}, GK_NONE, false, false, false, false, false};
+  static constexpr GPUInfo R600GPUs[26] = {
+  // Name         Canonical    Kind        Has    Has    Has    Has    Has
+  //              Name                     FMAF   Fast   LDEXPF FP64   Fast
+  //                                              FMAF                 FMA
+    {{"r600"},    {"r600"},    GK_R600,    false, false, false, false, false},
+    {{"rv630"},   {"r600"},    GK_R600,    false, false, false, false, false},
+    {{"rv635"},   {"r600"},    GK_R600,    false, false, false, false, false},
+    {{"r630"},    {"r630"},    GK_R630,    false, false, false, false, false},
+    {{"rs780"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
+    {{"rs880"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
+    {{"rv610"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
+    {{"rv620"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
+    {{"rv670"},   {"rv670"},   GK_RV670,   false, false, false, false, false},
+    {{"rv710"},   {"rv710"},   GK_RV710,   false, false, false, false, false},
+    {{"rv730"},   {"rv730"},   GK_RV730,   false, false, false, false, false},
+    {{"rv740"},   {"rv770"},   GK_RV770,   false, false, false, false, false},
+    {{"rv770"},   {"rv770"},   GK_RV770,   false, false, false, false, false},
+    {{"cedar"},   {"cedar"},   GK_CEDAR,   false, false, false, false, false},
+    {{"palm"},    {"cedar"},   GK_CEDAR,   false, false, false, false, false},
+    {{"cypress"}, {"cypress"}, GK_CYPRESS, true,  false, false, false, false},
+    {{"hemlock"}, {"cypress"}, GK_CYPRESS, true,  false, false, false, false},
+    {{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false},
+    {{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false},
+    {{"sumo"},    {"sumo"},    GK_SUMO,    false, false, false, false, false},
+    {{"sumo2"},   {"sumo"},    GK_SUMO,    false, false, false, false, false},
+    {{"barts"},   {"barts"},   GK_BARTS,   false, false, false, false, false},
+    {{"caicos"},  {"caicos"},  GK_BARTS,   false, false, false, false, false},
+    {{"aruba"},   {"cayman"},  GK_CAYMAN,  true,  false, false, false, false},
+    {{"cayman"},  {"cayman"},  GK_CAYMAN,  true,  false, false, false, false},
+    {{"turks"},   {"turks"},   GK_TURKS,   false, false, false, false, false},
+  };
+  static constexpr GPUInfo AMDGCNGPUs[30] = {
+  // Name           Canonical    Kind        Has   Has    Has    Has   Has
+  //                Name                     FMAF  Fast   LDEXPF FP64  Fast
+  //                                               FMAF                FMA
+    {{"gfx600"},    {"gfx600"},  GK_GFX600,  true, true,  true,  true, true},
+    {{"tahiti"},    {"gfx600"},  GK_GFX600,  true, true,  true,  true, true},
+    {{"gfx601"},    {"gfx601"},  GK_GFX601,  true, false, true,  true, true},
+    {{"hainan"},    {"gfx601"},  GK_GFX601,  true, false, true,  true, true},
+    {{"oland"},     {"gfx601"},  GK_GFX601,  true, false, true,  true, true},
+    {{"pitcairn"},  {"gfx601"},  GK_GFX601,  true, false, true,  true, true},
+    {{"verde"},     {"gfx601"},  GK_GFX601,  true, false, true,  true, true},
+    {{"gfx700"},    {"gfx700"},  GK_GFX700,  true, false, true,  true, true},
+    {{"kaveri"},    {"gfx700"},  GK_GFX700,  true, false, true,  true, true},
+    {{"gfx701"},    {"gfx701"},  GK_GFX701,  true, true,  true,  true, true},
+    {{"hawaii"},    {"gfx701"},  GK_GFX701,  true, true,  true,  true, true},
+    {{"gfx702"},    {"gfx702"},  GK_GFX702,  true, true,  true,  true, true},
+    {{"gfx703"},    {"gfx703"},  GK_GFX703,  true, false, true,  true, true},
+    {{"kabini"},    {"gfx703"},  GK_GFX703,  true, false, true,  true, true},
+    {{"mullins"},   {"gfx703"},  GK_GFX703,  true, false, true,  true, true},
+    {{"gfx704"},    {"gfx704"},  GK_GFX704,  true, false, true,  true, true},
+    {{"bonaire"},   {"gfx704"},  GK_GFX704,  true, false, true,  true, true},
+    {{"gfx801"},    {"gfx801"},  GK_GFX801,  true, true,  true,  true, true},
+    {{"carrizo"},   {"gfx801"},  GK_GFX801,  true, true,  true,  true, true},
+    {{"gfx802"},    {"gfx802"},  GK_GFX802,  true, false, true,  true, true},
+    {{"iceland"},   {"gfx802"},  GK_GFX802,  true, false, true,  true, true},
+    {{"tonga"},     {"gfx802"},  GK_GFX802,  true, false, true,  true, true},
+    {{"gfx803"},    {"gfx803"},  GK_GFX803,  true, false, true,  true, true},
+    {{"fiji"},      {"gfx803"},  GK_GFX803,  true, false, true,  true, true},
+    {{"polaris10"}, {"gfx803"},  GK_GFX803,  true, false, true,  true, true},
+    {{"polaris11"}, {"gfx803"},  GK_GFX803,  true, false, true,  true, true},
+    {{"gfx810"},    {"gfx810"},  GK_GFX810,  true, false, true,  true, true},
+    {{"stoney"},    {"gfx810"},  GK_GFX810,  true, false, true,  true, true},
+    {{"gfx900"},    {"gfx900"},  GK_GFX900,  true, true,  true,  true, true},
+    {{"gfx902"},    {"gfx902"},  GK_GFX900,  true, true,  true,  true, true},
+  };
 
-  static bool hasFullSpeedFMAF32(StringRef GPUName) {
-    return parseAMDGCNName(GPUName) >= GK_GFX9;
-  }
+  static GPUInfo parseR600Name(StringRef Name);
+
+  static GPUInfo parseAMDGCNName(StringRef Name);
+
+  GPUInfo parseGPUName(StringRef Name) const;
+
+  GPUInfo GPU;
 
   static bool isAMDGCN(const llvm::Triple &TT) {
     return TT.getArch() == llvm::Triple::amdgcn;
   }
 
-  static bool isGenericZero(const llvm::Triple &TT) {
-    return TT.getEnvironmentName() == "amdgiz" ||
-           TT.getEnvironmentName() == "amdgizcl";
-  }
-
 public:
   AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
 
@@ -91,12 +183,10 @@
   void adjust(LangOptions &Opts) override;
 
   uint64_t getPointerWidthV(unsigned AddrSpace) const override {
-    if (GPU <= GK_CAYMAN)
+    if (GPU.Kind <= GK_R600_LAST)
       return 32;
-
-    if (AddrSpace == AS.Private || AddrSpace == AS.Local) {
+    if (AddrSpace == Private || AddrSpace == Local)
       return 32;
-    }
     return 64;
   }
 
@@ -212,24 +302,22 @@
     return TargetInfo::CharPtrBuiltinVaList;
   }
 
-  static GPUKind parseR600Name(StringRef Name);
-
-  static GPUKind parseAMDGCNName(StringRef Name);
-
   bool isValidCPUName(StringRef Name) const override {
     if (getTriple().getArch() == llvm::Triple::amdgcn)
-      return GK_NONE != parseAMDGCNName(Name);
+      return GK_NONE != parseAMDGCNName(Name).Kind;
     else
-      return GK_NONE != parseR600Name(Name);
+      return GK_NONE != parseR600Name(Name).Kind;
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     if (getTriple().getArch() == llvm::Triple::amdgcn)
       GPU = parseAMDGCNName(Name);
     else
       GPU = parseR600Name(Name);
 
-    return GPU != GK_NONE;
+    return GK_NONE != GPU.Kind;
   }
 
   void setSupportedOpenCLOpts() override {
@@ -237,16 +325,16 @@
     Opts.support("cl_clang_storage_class_specifiers");
     Opts.support("cl_khr_icd");
 
-    if (hasFP64)
+    if (GPU.HasFP64)
       Opts.support("cl_khr_fp64");
-    if (GPU >= GK_EVERGREEN) {
+    if (GPU.Kind >= GK_CEDAR) {
       Opts.support("cl_khr_byte_addressable_store");
       Opts.support("cl_khr_global_int32_base_atomics");
       Opts.support("cl_khr_global_int32_extended_atomics");
       Opts.support("cl_khr_local_int32_base_atomics");
       Opts.support("cl_khr_local_int32_extended_atomics");
     }
-    if (GPU >= GK_GFX6) {
+    if (GPU.Kind >= GK_AMDGCN_FIRST) {
       Opts.support("cl_khr_fp16");
       Opts.support("cl_khr_int64_base_atomics");
       Opts.support("cl_khr_int64_extended_atomics");
@@ -258,33 +346,29 @@
     }
   }
 
-  LangAS getOpenCLTypeAddrSpace(const Type *T) const override {
-    auto BT = dyn_cast<BuiltinType>(T);
+  LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override {
+    switch (TK) {
+    case OCLTK_Image:
+      return LangAS::opencl_constant;
 
-    if (!BT)
-      return TargetInfo::getOpenCLTypeAddrSpace(T);
-
-    switch (BT->getKind()) {
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
-  case BuiltinType::Id:                                                        \
-    return LangAS::opencl_constant;
-#include "clang/Basic/OpenCLImageTypes.def"
-    case BuiltinType::OCLClkEvent:
-    case BuiltinType::OCLQueue:
-    case BuiltinType::OCLReserveID:
+    case OCLTK_ClkEvent:
+    case OCLTK_Queue:
+    case OCLTK_ReserveID:
       return LangAS::opencl_global;
 
     default:
-      return TargetInfo::getOpenCLTypeAddrSpace(T);
+      return TargetInfo::getOpenCLTypeAddrSpace(TK);
     }
   }
 
   llvm::Optional<LangAS> getConstantAddressSpace() const override {
-    return getLangASFromTargetAS(AS.Constant);
+    return getLangASFromTargetAS(Constant);
   }
 
   /// \returns Target specific vtbl ptr address space.
-  unsigned getVtblPtrAddressSpace() const override { return AS.Constant; }
+  unsigned getVtblPtrAddressSpace() const override {
+    return static_cast<unsigned>(Constant);
+  }
 
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return the
@@ -296,9 +380,9 @@
   getDWARFAddressSpace(unsigned AddressSpace) const override {
     const unsigned DWARF_Private = 1;
     const unsigned DWARF_Local = 2;
-    if (AddressSpace == AS.Private) {
+    if (AddressSpace == Private) {
       return DWARF_Private;
-    } else if (AddressSpace == AS.Local) {
+    } else if (AddressSpace == Local) {
       return DWARF_Local;
     } else {
       return None;
diff --git a/lib/Basic/Targets/ARM.cpp b/lib/Basic/Targets/ARM.cpp
index 992cfbd..3f2d31d 100644
--- a/lib/Basic/Targets/ARM.cpp
+++ b/lib/Basic/Targets/ARM.cpp
@@ -28,14 +28,6 @@
   DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64;
   const llvm::Triple &T = getTriple();
 
-  // size_t is unsigned long on MachO-derived environments, NetBSD, and
-  // OpenBSD.
-  if (T.isOSBinFormatMachO() || T.getOS() == llvm::Triple::NetBSD ||
-      T.getOS() == llvm::Triple::OpenBSD)
-    SizeType = UnsignedLong;
-  else
-    SizeType = UnsignedInt;
-
   bool IsNetBSD = T.getOS() == llvm::Triple::NetBSD;
   bool IsOpenBSD = T.getOS() == llvm::Triple::OpenBSD;
   if (!T.isOSWindows() && !IsNetBSD && !IsOpenBSD)
@@ -83,12 +75,6 @@
   else
     DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
 
-  // size_t is unsigned int on FreeBSD.
-  if (T.getOS() == llvm::Triple::FreeBSD)
-    SizeType = UnsignedInt;
-  else
-    SizeType = UnsignedLong;
-
   WCharType = SignedInt;
 
   // Do not respect the alignment of bit-field types when laying out
@@ -225,20 +211,27 @@
                              const TargetOptions &Opts)
     : TargetInfo(Triple), FPMath(FP_Default), IsAAPCS(true), LDREX(0),
       HW_FP(0) {
+  bool IsOpenBSD = Triple.getOS() == llvm::Triple::OpenBSD;
+  bool IsNetBSD = Triple.getOS() == llvm::Triple::NetBSD;
 
-  switch (getTriple().getOS()) {
-  case llvm::Triple::NetBSD:
-  case llvm::Triple::OpenBSD:
-    PtrDiffType = SignedLong;
-    break;
-  default:
+  // FIXME: the isOSBinFormatMachO is a workaround for identifying a Darwin-like
+  // environment where size_t is `unsigned long` rather than `unsigned int`
+
+  PtrDiffType = IntPtrType =
+      (Triple.isOSDarwin() || Triple.isOSBinFormatMachO() || IsOpenBSD ||
+       IsNetBSD)
+          ? SignedLong
+          : SignedInt;
+
+  SizeType = (Triple.isOSDarwin() || Triple.isOSBinFormatMachO() || IsOpenBSD ||
+              IsNetBSD)
+                 ? UnsignedLong
+                 : UnsignedInt;
+
+  // ptrdiff_t is inconsistent on Darwin
+  if ((Triple.isOSDarwin() || Triple.isOSBinFormatMachO()) &&
+      !Triple.isWatchABI())
     PtrDiffType = SignedInt;
-    break;
-  }
-
-  IntPtrType = (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::OpenBSD)
-                   ? SignedLong
-                   : SignedInt;
 
   // Cache arch related info.
   setArchInfo();
@@ -485,6 +478,10 @@
          llvm::ARM::parseCPUArch(Name) != llvm::ARM::ArchKind::INVALID;
 }
 
+void ARMTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+  llvm::ARM::fillValidCPUArchList(Values);
+}
+
 bool ARMTargetInfo::setCPU(const std::string &Name) {
   if (Name != "generic")
     setArchInfo(llvm::ARM::parseCPUArch(Name));
@@ -589,7 +586,7 @@
 
   // ACLE 6.4.4 LDREX/STREX
   if (LDREX)
-    Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX));
+    Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX));
 
   // ACLE 6.4.5 CLZ
   if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") ||
@@ -598,7 +595,7 @@
 
   // ACLE 6.5.1 Hardware Floating Point
   if (HW_FP)
-    Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP));
+    Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP));
 
   // ACLE predefines.
   Builder.defineMacro("__ARM_ACLE", "200");
@@ -679,11 +676,11 @@
     // current AArch32 NEON implementations do not support double-precision
     // floating-point even when it is present in VFP.
     Builder.defineMacro("__ARM_NEON_FP",
-                        "0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP));
+                        "0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP));
   }
 
   Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
-                      llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4));
+                      Twine(Opts.WCharSize ? Opts.WCharSize : 4));
 
   Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
 
@@ -926,7 +923,6 @@
 WindowsARMTargetInfo::WindowsARMTargetInfo(const llvm::Triple &Triple,
                                            const TargetOptions &Opts)
     : WindowsTargetInfo<ARMleTargetInfo>(Triple, Opts), Triple(Triple) {
-  SizeType = UnsignedInt;
 }
 
 void WindowsARMTargetInfo::getVisualStudioDefines(const LangOptions &Opts,
@@ -1007,10 +1003,7 @@
 void MinGWARMTargetInfo::getTargetDefines(const LangOptions &Opts,
                                           MacroBuilder &Builder) const {
   WindowsARMTargetInfo::getTargetDefines(Opts, Builder);
-  DefineStd(Builder, "WIN32", Opts);
-  DefineStd(Builder, "WINNT", Opts);
   Builder.defineMacro("_ARM_");
-  addMinGWDefines(Opts, Builder);
 }
 
 CygwinARMTargetInfo::CygwinARMTargetInfo(const llvm::Triple &Triple,
@@ -1046,10 +1039,6 @@
     // Darwin on iOS uses a variant of the ARM C++ ABI.
     TheCXXABI.set(TargetCXXABI::WatchOS);
 
-    // The 32-bit ABI is silent on what ptrdiff_t should be, but given that
-    // size_t is long, it's a bit weird for it to be int.
-    PtrDiffType = SignedLong;
-
     // BOOL should be a real boolean on the new ABI
     UseSignedCharForObjCBool = false;
   } else
diff --git a/lib/Basic/Targets/ARM.h b/lib/Basic/Targets/ARM.h
index 232b23c..af197fd 100644
--- a/lib/Basic/Targets/ARM.h
+++ b/lib/Basic/Targets/ARM.h
@@ -122,10 +122,16 @@
   bool hasFeature(StringRef Feature) const override;
 
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override;
 
   bool setFPMath(StringRef Name) override;
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefinesARMV81A(const LangOptions &Opts,
                                MacroBuilder &Builder) const;
 
diff --git a/lib/Basic/Targets/AVR.cpp b/lib/Basic/Targets/AVR.cpp
index 3022fe3..9b66449 100644
--- a/lib/Basic/Targets/AVR.cpp
+++ b/lib/Basic/Targets/AVR.cpp
@@ -28,7 +28,7 @@
 };
 
 // This list should be kept up-to-date with AVRDevices.td in LLVM.
-static ArrayRef<MCUInfo> AVRMcus = {
+static MCUInfo AVRMcus[] = {
     {"at90s1200", "__AVR_AT90S1200__"},
     {"attiny11", "__AVR_ATtiny11__"},
     {"attiny12", "__AVR_ATtiny12__"},
@@ -273,35 +273,29 @@
 } // namespace targets
 } // namespace clang
 
+static constexpr llvm::StringLiteral ValidFamilyNames[] = {
+    "avr1",      "avr2",      "avr25",     "avr3",      "avr31",
+    "avr35",     "avr4",      "avr5",      "avr51",     "avr6",
+    "avrxmega1", "avrxmega2", "avrxmega3", "avrxmega4", "avrxmega5",
+    "avrxmega6", "avrxmega7", "avrtiny"};
+
 bool AVRTargetInfo::isValidCPUName(StringRef Name) const {
-  bool IsFamily = llvm::StringSwitch<bool>(Name)
-                      .Case("avr1", true)
-                      .Case("avr2", true)
-                      .Case("avr25", true)
-                      .Case("avr3", true)
-                      .Case("avr31", true)
-                      .Case("avr35", true)
-                      .Case("avr4", true)
-                      .Case("avr5", true)
-                      .Case("avr51", true)
-                      .Case("avr6", true)
-                      .Case("avrxmega1", true)
-                      .Case("avrxmega2", true)
-                      .Case("avrxmega3", true)
-                      .Case("avrxmega4", true)
-                      .Case("avrxmega5", true)
-                      .Case("avrxmega6", true)
-                      .Case("avrxmega7", true)
-                      .Case("avrtiny", true)
-                      .Default(false);
+  bool IsFamily =
+      llvm::find(ValidFamilyNames, Name) != std::end(ValidFamilyNames);
 
   bool IsMCU =
-      std::find_if(AVRMcus.begin(), AVRMcus.end(), [&](const MCUInfo &Info) {
+      llvm::find_if(AVRMcus, [&](const MCUInfo &Info) {
         return Info.Name == Name;
-      }) != AVRMcus.end();
+      }) != std::end(AVRMcus);
   return IsFamily || IsMCU;
 }
 
+void AVRTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+  Values.append(std::begin(ValidFamilyNames), std::end(ValidFamilyNames));
+  for (const MCUInfo &Info : AVRMcus)
+    Values.push_back(Info.Name);
+}
+
 void AVRTargetInfo::getTargetDefines(const LangOptions &Opts,
                                      MacroBuilder &Builder) const {
   Builder.defineMacro("AVR");
@@ -309,12 +303,10 @@
   Builder.defineMacro("__AVR__");
 
   if (!this->CPU.empty()) {
-    auto It =
-        std::find_if(AVRMcus.begin(), AVRMcus.end(), [&](const MCUInfo &Info) {
-          return Info.Name == this->CPU;
-        });
+    auto It = llvm::find_if(
+        AVRMcus, [&](const MCUInfo &Info) { return Info.Name == this->CPU; });
 
-    if (It != AVRMcus.end())
+    if (It != std::end(AVRMcus))
       Builder.defineMacro(It->DefineName);
   }
 }
diff --git a/lib/Basic/Targets/AVR.h b/lib/Basic/Targets/AVR.h
index 3dfb84f..d595f48 100644
--- a/lib/Basic/Targets/AVR.h
+++ b/lib/Basic/Targets/AVR.h
@@ -55,7 +55,7 @@
     WIntType = SignedInt;
     Char32Type = UnsignedLong;
     SigAtomicType = SignedChar;
-    resetDataLayout("e-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8");
+    resetDataLayout("e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -167,6 +167,7 @@
   }
 
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
   bool setCPU(const std::string &Name) override {
     bool isValid = isValidCPUName(Name);
     if (isValid)
diff --git a/lib/Basic/Targets/BPF.cpp b/lib/Basic/Targets/BPF.cpp
index 54e34f1..cf41a09 100644
--- a/lib/Basic/Targets/BPF.cpp
+++ b/lib/Basic/Targets/BPF.cpp
@@ -14,6 +14,7 @@
 #include "BPF.h"
 #include "Targets.h"
 #include "clang/Basic/MacroBuilder.h"
+#include "llvm/ADT/StringRef.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -23,3 +24,14 @@
   DefineStd(Builder, "bpf", Opts);
   Builder.defineMacro("__BPF__");
 }
+
+static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
+                                                        "probe"};
+
+bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+}
+
+void BPFTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
+}
diff --git a/lib/Basic/Targets/BPF.h b/lib/Basic/Targets/BPF.h
index 4dd9cbd..36d2da4 100644
--- a/lib/Basic/Targets/BPF.h
+++ b/lib/Basic/Targets/BPF.h
@@ -46,7 +46,14 @@
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  bool hasFeature(StringRef Feature) const override { return Feature == "bpf"; }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "bpf" || Feature == "alu32";
+  }
+
+  void setFeatureEnabled(llvm::StringMap<bool> &Features, StringRef Name,
+                         bool Enabled) const override {
+    Features[Name] = Enabled;
+  }
 
   ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
 
@@ -77,12 +84,9 @@
     }
   }
 
-  bool isValidCPUName(StringRef Name) const override {
-    if (Name == "generic" || Name == "v1" ||
-        Name == "v2" || Name == "probe")
-      return true;
-    return false;
-  }
+  bool isValidCPUName(StringRef Name) const override;
+
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
     StringRef CPUName(Name);
diff --git a/lib/Basic/Targets/Hexagon.cpp b/lib/Basic/Targets/Hexagon.cpp
index a43f579..20d0c5c 100644
--- a/lib/Basic/Targets/Hexagon.cpp
+++ b/lib/Basic/Targets/Hexagon.cpp
@@ -52,6 +52,9 @@
   } else if (CPU == "hexagonv62") {
     Builder.defineMacro("__HEXAGON_V62__");
     Builder.defineMacro("__HEXAGON_ARCH__", "62");
+  } else if (CPU == "hexagonv65") {
+    Builder.defineMacro("__HEXAGON_V65__");
+    Builder.defineMacro("__HEXAGON_ARCH__", "65");
   }
 
   if (hasFeature("hvx-length64b")) {
@@ -138,14 +141,29 @@
       .Default(false);
 }
 
+struct CPUSuffix {
+  llvm::StringLiteral Name;
+  llvm::StringLiteral Suffix;
+};
+
+static constexpr CPUSuffix Suffixes[] = {
+    {{"hexagonv4"}, {"4"}},   {{"hexagonv5"}, {"5"}},
+    {{"hexagonv55"}, {"55"}}, {{"hexagonv60"}, {"60"}},
+    {{"hexagonv62"}, {"62"}}, {{"hexagonv65"}, {"65"}},
+};
+
 const char *HexagonTargetInfo::getHexagonCPUSuffix(StringRef Name) {
-  return llvm::StringSwitch<const char *>(Name)
-      .Case("hexagonv4", "4")
-      .Case("hexagonv5", "5")
-      .Case("hexagonv55", "55")
-      .Case("hexagonv60", "60")
-      .Case("hexagonv62", "62")
-      .Default(nullptr);
+  const CPUSuffix *Item = llvm::find_if(
+      Suffixes, [Name](const CPUSuffix &S) { return S.Name == Name; });
+  if (Item == std::end(Suffixes))
+    return nullptr;
+  return Item->Suffix.data();
+}
+
+void HexagonTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  for (const CPUSuffix &Suffix : Suffixes)
+    Values.push_back(Suffix.Name);
 }
 
 ArrayRef<Builtin::Info> HexagonTargetInfo::getTargetBuiltins() const {
diff --git a/lib/Basic/Targets/Hexagon.h b/lib/Basic/Targets/Hexagon.h
index 7b09664..fb4956a 100644
--- a/lib/Basic/Targets/Hexagon.h
+++ b/lib/Basic/Targets/Hexagon.h
@@ -112,6 +112,8 @@
     return getHexagonCPUSuffix(Name);
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     if (!isValidCPUName(Name))
       return false;
diff --git a/lib/Basic/Targets/Lanai.cpp b/lib/Basic/Targets/Lanai.cpp
index 1d8314a..0e8030c 100644
--- a/lib/Basic/Targets/Lanai.cpp
+++ b/lib/Basic/Targets/Lanai.cpp
@@ -40,6 +40,10 @@
 bool LanaiTargetInfo::isValidCPUName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name).Case("v11", true).Default(false);
 }
+void LanaiTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  Values.emplace_back("v11");
+}
 
 bool LanaiTargetInfo::setCPU(const std::string &Name) {
   CPU = llvm::StringSwitch<CPUKind>(Name).Case("v11", CK_V11).Default(CK_NONE);
diff --git a/lib/Basic/Targets/Lanai.h b/lib/Basic/Targets/Lanai.h
index 5f99c17..b9e6dbe 100644
--- a/lib/Basic/Targets/Lanai.h
+++ b/lib/Basic/Targets/Lanai.h
@@ -65,6 +65,8 @@
 
   bool isValidCPUName(StringRef Name) const override;
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override;
 
   bool hasFeature(StringRef Feature) const override;
diff --git a/lib/Basic/Targets/Mips.cpp b/lib/Basic/Targets/Mips.cpp
index 3eb39b3..77fba48 100644
--- a/lib/Basic/Targets/Mips.cpp
+++ b/lib/Basic/Targets/Mips.cpp
@@ -44,26 +44,19 @@
   return false;
 }
 
+static constexpr llvm::StringLiteral ValidCPUNames[] = {
+    {"mips1"},  {"mips2"},    {"mips3"},    {"mips4"},    {"mips5"},
+    {"mips32"}, {"mips32r2"}, {"mips32r3"}, {"mips32r5"}, {"mips32r6"},
+    {"mips64"}, {"mips64r2"}, {"mips64r3"}, {"mips64r5"}, {"mips64r6"},
+    {"octeon"}, {"p5600"}};
+
 bool MipsTargetInfo::isValidCPUName(StringRef Name) const {
-  return llvm::StringSwitch<bool>(Name)
-      .Case("mips1", true)
-      .Case("mips2", true)
-      .Case("mips3", true)
-      .Case("mips4", true)
-      .Case("mips5", true)
-      .Case("mips32", true)
-      .Case("mips32r2", true)
-      .Case("mips32r3", true)
-      .Case("mips32r5", true)
-      .Case("mips32r6", true)
-      .Case("mips64", true)
-      .Case("mips64r2", true)
-      .Case("mips64r3", true)
-      .Case("mips64r5", true)
-      .Case("mips64r6", true)
-      .Case("octeon", true)
-      .Case("p5600", true)
-      .Default(false);
+  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+}
+
+void MipsTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
 }
 
 void MipsTargetInfo::getTargetDefines(const LangOptions &Opts,
@@ -206,6 +199,13 @@
 }
 
 bool MipsTargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
+  // microMIPS64R6 backend was removed.
+  if ((getTriple().getArch() == llvm::Triple::mips64 ||
+       getTriple().getArch() == llvm::Triple::mips64el) &&
+       IsMicromips && (ABI == "n32" || ABI == "n64")) {
+    Diags.Report(diag::err_target_unsupported_cpu_for_micromips) << CPU;
+    return false;
+  }
   // FIXME: It's valid to use O32 on a 64-bit CPU but the backend can't handle
   //        this yet. It's better to fail here than on the backend assertion.
   if (processorSupportsGPR64() && ABI == "o32") {
diff --git a/lib/Basic/Targets/Mips.h b/lib/Basic/Targets/Mips.h
index 28900f2..ff9e790 100644
--- a/lib/Basic/Targets/Mips.h
+++ b/lib/Basic/Targets/Mips.h
@@ -54,6 +54,7 @@
   enum DspRevEnum { NoDSP, DSP1, DSP2 } DspRev;
   bool HasMSA;
   bool DisableMadd4;
+  bool UseIndirectJumpHazard;
 
 protected:
   bool HasFP64;
@@ -64,7 +65,8 @@
       : TargetInfo(Triple), IsMips16(false), IsMicromips(false),
         IsNan2008(false), IsAbs2008(false), IsSingleFloat(false),
         IsNoABICalls(false), CanUseBSDABICalls(false), FloatABI(HardFloat),
-        DspRev(NoDSP), HasMSA(false), DisableMadd4(false), HasFP64(false) {
+        DspRev(NoDSP), HasMSA(false), DisableMadd4(false),
+        UseIndirectJumpHazard(false), HasFP64(false) {
     TheCXXABI.set(TargetCXXABI::GenericMIPS);
 
     setABI((getTriple().getArch() == llvm::Triple::mips ||
@@ -161,6 +163,7 @@
   }
 
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
     CPU = Name;
@@ -338,6 +341,8 @@
         IsAbs2008 = false;
       else if (Feature == "+noabicalls")
         IsNoABICalls = true;
+      else if (Feature == "+use-indirect-jump-hazard")
+        UseIndirectJumpHazard = true;
     }
 
     setDataLayout();
@@ -387,7 +392,9 @@
     return llvm::makeArrayRef(NewABIRegAliases);
   }
 
-  bool hasInt128Type() const override { return ABI == "n32" || ABI == "n64"; }
+  bool hasInt128Type() const override {
+    return (ABI == "n32" || ABI == "n64") || getTargetOpts().ForceEnableInt128;
+  }
 
   bool validateTarget(DiagnosticsEngine &Diags) const override;
 };
diff --git a/lib/Basic/Targets/NVPTX.cpp b/lib/Basic/Targets/NVPTX.cpp
index 3889f09..e8e6d2f 100644
--- a/lib/Basic/Targets/NVPTX.cpp
+++ b/lib/Basic/Targets/NVPTX.cpp
@@ -41,6 +41,7 @@
          "NVPTX only supports 32- and 64-bit modes.");
 
   TLSSupported = false;
+  VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
   UseAddrSpaceMapMangling = true;
 
@@ -156,6 +157,8 @@
     // Set __CUDA_ARCH__ for the GPU specified.
     std::string CUDAArchCode = [this] {
       switch (GPU) {
+      case CudaArch::LAST:
+        break;
       case CudaArch::UNKNOWN:
         assert(false && "No GPU arch when compiling CUDA device code.");
         return "";
@@ -185,6 +188,8 @@
         return "620";
       case CudaArch::SM_70:
         return "700";
+      case CudaArch::SM_72:
+        return "720";
       }
       llvm_unreachable("unhandled CudaArch");
     }();
diff --git a/lib/Basic/Targets/NVPTX.h b/lib/Basic/Targets/NVPTX.h
index a848707..83d7dfb 100644
--- a/lib/Basic/Targets/NVPTX.h
+++ b/lib/Basic/Targets/NVPTX.h
@@ -98,6 +98,12 @@
     return StringToCudaArch(Name) != CudaArch::UNKNOWN;
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
+    for (int i = static_cast<int>(CudaArch::SM_20);
+         i < static_cast<int>(CudaArch::LAST); ++i)
+      Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));
+  }
+
   bool setCPU(const std::string &Name) override {
     GPU = StringToCudaArch(Name);
     return GPU != CudaArch::UNKNOWN;
diff --git a/lib/Basic/Targets/Nios2.h b/lib/Basic/Targets/Nios2.h
index aa02f8f..ffeb414 100644
--- a/lib/Basic/Targets/Nios2.h
+++ b/lib/Basic/Targets/Nios2.h
@@ -56,6 +56,10 @@
     return Name == "nios2r1" || Name == "nios2r2";
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
+    Values.append({"nios2r1", "nios2r2"});
+  }
+
   bool setCPU(const std::string &Name) override {
     if (isValidCPUName(Name)) {
       CPU = Name;
diff --git a/lib/Basic/Targets/OSTargets.cpp b/lib/Basic/Targets/OSTargets.cpp
index b01e682..50abd4c 100644
--- a/lib/Basic/Targets/OSTargets.cpp
+++ b/lib/Basic/Targets/OSTargets.cpp
@@ -133,10 +133,6 @@
   if (Triple.isOSDarwin())
     Builder.defineMacro("__MACH__");
 
-  // The Watch ABI uses Dwarf EH.
-  if (Triple.isWatchABI())
-    Builder.defineMacro("__ARM_DWARF_EH__");
-
   PlatformMinVersion = VersionTuple(Maj, Min, Rev);
 }
 } // namespace targets
diff --git a/lib/Basic/Targets/OSTargets.h b/lib/Basic/Targets/OSTargets.h
index c775fe3..05fcd93 100644
--- a/lib/Basic/Targets/OSTargets.h
+++ b/lib/Basic/Targets/OSTargets.h
@@ -95,16 +95,22 @@
     if (Triple.isMacOSX())
       this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7);
     else if (Triple.isiOS()) {
-      // 64-bit iOS supported it from 8 onwards, 32-bit from 9 onwards.
-      if (Triple.getArch() == llvm::Triple::x86_64 ||
-          Triple.getArch() == llvm::Triple::aarch64)
+      // 64-bit iOS supported it from 8 onwards, 32-bit device from 9 onwards,
+      // 32-bit simulator from 10 onwards.
+      if (Triple.isArch64Bit())
         this->TLSSupported = !Triple.isOSVersionLT(8);
-      else if (Triple.getArch() == llvm::Triple::x86 ||
-               Triple.getArch() == llvm::Triple::arm ||
-               Triple.getArch() == llvm::Triple::thumb)
-        this->TLSSupported = !Triple.isOSVersionLT(9);
-    } else if (Triple.isWatchOS())
-      this->TLSSupported = !Triple.isOSVersionLT(2);
+      else if (Triple.isArch32Bit()) {
+        if (!Triple.isSimulatorEnvironment())
+          this->TLSSupported = !Triple.isOSVersionLT(9);
+        else
+          this->TLSSupported = !Triple.isOSVersionLT(10);
+      }
+    } else if (Triple.isWatchOS()) {
+      if (!Triple.isSimulatorEnvironment())
+        this->TLSSupported = !Triple.isOSVersionLT(2);
+      else
+        this->TLSSupported = !Triple.isOSVersionLT(3);
+    }
 
     this->MCountName = "\01mcount";
   }
@@ -358,17 +364,6 @@
     Builder.defineMacro("__ELF__");
     if (Opts.POSIXThreads)
       Builder.defineMacro("_REENTRANT");
-
-    switch (Triple.getArch()) {
-    default:
-      break;
-    case llvm::Triple::arm:
-    case llvm::Triple::armeb:
-    case llvm::Triple::thumb:
-    case llvm::Triple::thumbeb:
-      Builder.defineMacro("__ARM_DWARF_EH__");
-      break;
-    }
   }
 
 public:
@@ -572,6 +567,11 @@
   void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
                     MacroBuilder &Builder) const override {
     Builder.defineMacro("_WIN32");
+    if (Triple.isArch64Bit())
+      Builder.defineMacro("_WIN64");
+    if (Triple.isWindowsGNUEnvironment())
+      addMinGWDefines(Triple, Opts, Builder);
+
   }
   void getVisualStudioDefines(const LangOptions &Opts,
                               MacroBuilder &Builder) const {
@@ -605,7 +605,7 @@
         Builder.defineMacro("_HAS_CHAR16_T_LANGUAGE_SUPPORT", Twine(1));
 
       if (Opts.isCompatibleWithMSVC(LangOptions::MSVC2015)) {
-        if (Opts.CPlusPlus1z)
+        if (Opts.CPlusPlus17)
           Builder.defineMacro("_MSVC_LANG", "201403L");
         else if (Opts.CPlusPlus14)
           Builder.defineMacro("_MSVC_LANG", "201402L");
@@ -717,11 +717,6 @@
       Builder.defineMacro("_GNU_SOURCE");
   }
 
-  // As an optimization, group static init code together in a section.
-  const char *getStaticInitSectionSpecifier() const final {
-    return ".text.__startup";
-  }
-
 public:
   explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple,
                                    const TargetOptions &Opts)
diff --git a/lib/Basic/Targets/PPC.cpp b/lib/Basic/Targets/PPC.cpp
index a44aa0c..3516a73 100644
--- a/lib/Basic/Targets/PPC.cpp
+++ b/lib/Basic/Targets/PPC.cpp
@@ -479,57 +479,25 @@
   return llvm::makeArrayRef(GCCRegAliases);
 }
 
+static constexpr llvm::StringLiteral ValidCPUNames[] = {
+    {"generic"}, {"440"},         {"450"},     {"601"},    {"602"},
+    {"603"},     {"603e"},        {"603ev"},   {"604"},    {"604e"},
+    {"620"},     {"630"},         {"g3"},      {"7400"},   {"g4"},
+    {"7450"},    {"g4+"},         {"750"},     {"970"},    {"g5"},
+    {"a2"},      {"a2q"},         {"e500mc"},  {"e5500"},  {"power3"},
+    {"pwr3"},    {"power4"},      {"pwr4"},    {"power5"}, {"pwr5"},
+    {"power5x"}, {"pwr5x"},       {"power6"},  {"pwr6"},   {"power6x"},
+    {"pwr6x"},   {"power7"},      {"pwr7"},    {"power8"}, {"pwr8"},
+    {"power9"},  {"pwr9"},        {"powerpc"}, {"ppc"},    {"powerpc64"},
+    {"ppc64"},   {"powerpc64le"}, {"ppc64le"},
+};
+
 bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
-  return llvm::StringSwitch<bool>(Name)
-      .Case("generic", true)
-      .Case("440", true)
-      .Case("450", true)
-      .Case("601", true)
-      .Case("602", true)
-      .Case("603", true)
-      .Case("603e", true)
-      .Case("603ev", true)
-      .Case("604", true)
-      .Case("604e", true)
-      .Case("620", true)
-      .Case("630", true)
-      .Case("g3", true)
-      .Case("7400", true)
-      .Case("g4", true)
-      .Case("7450", true)
-      .Case("g4+", true)
-      .Case("750", true)
-      .Case("970", true)
-      .Case("g5", true)
-      .Case("a2", true)
-      .Case("a2q", true)
-      .Case("e500mc", true)
-      .Case("e5500", true)
-      .Case("power3", true)
-      .Case("pwr3", true)
-      .Case("power4", true)
-      .Case("pwr4", true)
-      .Case("power5", true)
-      .Case("pwr5", true)
-      .Case("power5x", true)
-      .Case("pwr5x", true)
-      .Case("power6", true)
-      .Case("pwr6", true)
-      .Case("power6x", true)
-      .Case("pwr6x", true)
-      .Case("power7", true)
-      .Case("pwr7", true)
-      .Case("power8", true)
-      .Case("pwr8", true)
-      .Case("power9", true)
-      .Case("pwr9", true)
-      .Case("powerpc", true)
-      .Case("ppc", true)
-      .Case("powerpc64", true)
-      .Case("ppc64", true)
-      .Case("powerpc64le", true)
-      .Case("ppc64le", true)
-      .Default(false);
+  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+}
+
+void PPCTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
 }
 
 void PPCTargetInfo::adjust(LangOptions &Opts) {
diff --git a/lib/Basic/Targets/PPC.h b/lib/Basic/Targets/PPC.h
index 04bef25..364a4f6 100644
--- a/lib/Basic/Targets/PPC.h
+++ b/lib/Basic/Targets/PPC.h
@@ -86,6 +86,7 @@
   //  821, 823, 8540, 8548, e300c2, e300c3, e500mc64, e6500, 860, cell,
   //  titan, rs64.
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
     bool CPUKnown = isValidCPUName(Name);
diff --git a/lib/Basic/Targets/RISCV.cpp b/lib/Basic/Targets/RISCV.cpp
new file mode 100644
index 0000000..8b2338b
--- /dev/null
+++ b/lib/Basic/Targets/RISCV.cpp
@@ -0,0 +1,52 @@
+//===--- RISCV.cpp - Implement RISCV target feature support ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RISCV TargetInfo objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "clang/Basic/MacroBuilder.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace clang;
+using namespace clang::targets;
+
+ArrayRef<const char *> RISCVTargetInfo::getGCCRegNames() const {
+  static const char *const GCCRegNames[] = {
+      "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+      "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+      "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"};
+  return llvm::makeArrayRef(GCCRegNames);
+}
+
+ArrayRef<TargetInfo::GCCRegAlias> RISCVTargetInfo::getGCCRegAliases() const {
+  static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
+      {{"zero"}, "x0"}, {{"ra"}, "x1"},  {{"sp"}, "x2"},   {{"gp"}, "x3"},
+      {{"tp"}, "x4"},   {{"t0"}, "x5"},  {{"t1"}, "x6"},   {{"t2"}, "x7"},
+      {{"s0"}, "x8"},   {{"s1"}, "x9"},  {{"a0"}, "x10"},  {{"a1"}, "x11"},
+      {{"a2"}, "x12"},  {{"a3"}, "x13"}, {{"a4"}, "x15"},  {{"a5"}, "x15"},
+      {{"a6"}, "x16"},  {{"a7"}, "x17"}, {{"s2"}, "x18"},  {{"s3"}, "x19"},
+      {{"s4"}, "x20"},  {{"s5"}, "x21"}, {{"s6"}, "x22"},  {{"s7"}, "x23"},
+      {{"s8"}, "x24"},  {{"s9"}, "x25"}, {{"s10"}, "x26"}, {{"s11"}, "x27"},
+      {{"t3"}, "x28"},  {{"t4"}, "x29"}, {{"t5"}, "x30"},  {{"t6"}, "x31"}};
+  return llvm::makeArrayRef(GCCRegAliases);
+}
+
+void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
+                                       MacroBuilder &Builder) const {
+  Builder.defineMacro("__ELF__");
+  Builder.defineMacro("__riscv");
+  bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64;
+  Builder.defineMacro("__riscv_xlen", Is64Bit ? "64" : "32");
+  // TODO: modify when more code models and ABIs are supported.
+  Builder.defineMacro("__riscv_cmodel_medlow");
+  Builder.defineMacro("__riscv_float_abi_soft");
+}
diff --git a/lib/Basic/Targets/RISCV.h b/lib/Basic/Targets/RISCV.h
new file mode 100644
index 0000000..b35afd7
--- /dev/null
+++ b/lib/Basic/Targets/RISCV.h
@@ -0,0 +1,103 @@
+//===--- RISCV.h - Declare RISCV target feature support ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares RISCV TargetInfo objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_RISCV_H
+#define LLVM_CLANG_LIB_BASIC_TARGETS_RISCV_H
+
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Compiler.h"
+
+namespace clang {
+namespace targets {
+
+// RISC-V Target
+class RISCVTargetInfo : public TargetInfo {
+protected:
+  std::string ABI;
+
+public:
+  RISCVTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
+    TLSSupported = false;
+    LongDoubleWidth = 128;
+    LongDoubleAlign = 128;
+    LongDoubleFormat = &llvm::APFloat::IEEEquad();
+    SuitableAlign = 128;
+    WCharType = SignedInt;
+    WIntType = UnsignedInt;
+  }
+
+  StringRef getABI() const override { return ABI; }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override;
+
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
+
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+
+  const char *getClobbers() const override { return ""; }
+
+  ArrayRef<const char *> getGCCRegNames() const override;
+
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
+
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &Info) const override {
+    return false;
+  }
+};
+class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
+public:
+  RISCV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : RISCVTargetInfo(Triple, Opts) {
+    IntPtrType = SignedInt;
+    PtrDiffType = SignedInt;
+    SizeType = UnsignedInt;
+    resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128");
+  }
+
+  bool setABI(const std::string &Name) override {
+    // TODO: support ilp32f and ilp32d ABIs.
+    if (Name == "ilp32") {
+      ABI = Name;
+      return true;
+    }
+    return false;
+  }
+};
+class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
+public:
+  RISCV64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : RISCVTargetInfo(Triple, Opts) {
+    LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
+    IntMaxType = Int64Type = SignedLong;
+    resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
+  }
+
+  bool setABI(const std::string &Name) override {
+    // TODO: support lp64f and lp64d ABIs.
+    if (Name == "lp64") {
+      ABI = Name;
+      return true;
+    }
+    return false;
+  }
+};
+} // namespace targets
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_BASIC_TARGETS_RISCV_H
diff --git a/lib/Basic/Targets/SPIR.h b/lib/Basic/Targets/SPIR.h
index e64ad54..c384d42 100644
--- a/lib/Basic/Targets/SPIR.h
+++ b/lib/Basic/Targets/SPIR.h
@@ -43,6 +43,7 @@
     assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
            "SPIR target must use unknown environment type");
     TLSSupported = false;
+    VLASupported = false;
     LongWidth = LongAlign = 64;
     AddrSpaceMap = &SPIRAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
diff --git a/lib/Basic/Targets/Sparc.cpp b/lib/Basic/Targets/Sparc.cpp
index 429c1ee..1e34f6b 100644
--- a/lib/Basic/Targets/Sparc.cpp
+++ b/lib/Basic/Targets/Sparc.cpp
@@ -51,49 +51,81 @@
       .Default(false);
 }
 
+struct SparcCPUInfo {
+  llvm::StringLiteral Name;
+  SparcTargetInfo::CPUKind Kind;
+  SparcTargetInfo::CPUGeneration Generation;
+};
+
+static constexpr SparcCPUInfo CPUInfo[] = {
+    {{"v8"}, SparcTargetInfo::CK_V8, SparcTargetInfo::CG_V8},
+    {{"supersparc"}, SparcTargetInfo::CK_SUPERSPARC, SparcTargetInfo::CG_V8},
+    {{"sparclite"}, SparcTargetInfo::CK_SPARCLITE, SparcTargetInfo::CG_V8},
+    {{"f934"}, SparcTargetInfo::CK_F934, SparcTargetInfo::CG_V8},
+    {{"hypersparc"}, SparcTargetInfo::CK_HYPERSPARC, SparcTargetInfo::CG_V8},
+    {{"sparclite86x"},
+     SparcTargetInfo::CK_SPARCLITE86X,
+     SparcTargetInfo::CG_V8},
+    {{"sparclet"}, SparcTargetInfo::CK_SPARCLET, SparcTargetInfo::CG_V8},
+    {{"tsc701"}, SparcTargetInfo::CK_TSC701, SparcTargetInfo::CG_V8},
+    {{"v9"}, SparcTargetInfo::CK_V9, SparcTargetInfo::CG_V9},
+    {{"ultrasparc"}, SparcTargetInfo::CK_ULTRASPARC, SparcTargetInfo::CG_V9},
+    {{"ultrasparc3"}, SparcTargetInfo::CK_ULTRASPARC3, SparcTargetInfo::CG_V9},
+    {{"niagara"}, SparcTargetInfo::CK_NIAGARA, SparcTargetInfo::CG_V9},
+    {{"niagara2"}, SparcTargetInfo::CK_NIAGARA2, SparcTargetInfo::CG_V9},
+    {{"niagara3"}, SparcTargetInfo::CK_NIAGARA3, SparcTargetInfo::CG_V9},
+    {{"niagara4"}, SparcTargetInfo::CK_NIAGARA4, SparcTargetInfo::CG_V9},
+    {{"ma2100"}, SparcTargetInfo::CK_MYRIAD2100, SparcTargetInfo::CG_V8},
+    {{"ma2150"}, SparcTargetInfo::CK_MYRIAD2150, SparcTargetInfo::CG_V8},
+    {{"ma2155"}, SparcTargetInfo::CK_MYRIAD2155, SparcTargetInfo::CG_V8},
+    {{"ma2450"}, SparcTargetInfo::CK_MYRIAD2450, SparcTargetInfo::CG_V8},
+    {{"ma2455"}, SparcTargetInfo::CK_MYRIAD2455, SparcTargetInfo::CG_V8},
+    {{"ma2x5x"}, SparcTargetInfo::CK_MYRIAD2x5x, SparcTargetInfo::CG_V8},
+    {{"ma2080"}, SparcTargetInfo::CK_MYRIAD2080, SparcTargetInfo::CG_V8},
+    {{"ma2085"}, SparcTargetInfo::CK_MYRIAD2085, SparcTargetInfo::CG_V8},
+    {{"ma2480"}, SparcTargetInfo::CK_MYRIAD2480, SparcTargetInfo::CG_V8},
+    {{"ma2485"}, SparcTargetInfo::CK_MYRIAD2485, SparcTargetInfo::CG_V8},
+    {{"ma2x8x"}, SparcTargetInfo::CK_MYRIAD2x8x, SparcTargetInfo::CG_V8},
+    // FIXME: the myriad2[.n] spellings are obsolete,
+    // but a grace period is needed to allow updating dependent builds.
+    {{"myriad2"}, SparcTargetInfo::CK_MYRIAD2x5x, SparcTargetInfo::CG_V8},
+    {{"myriad2.1"}, SparcTargetInfo::CK_MYRIAD2100, SparcTargetInfo::CG_V8},
+    {{"myriad2.2"}, SparcTargetInfo::CK_MYRIAD2x5x, SparcTargetInfo::CG_V8},
+    {{"myriad2.3"}, SparcTargetInfo::CK_MYRIAD2x8x, SparcTargetInfo::CG_V8},
+    {{"leon2"}, SparcTargetInfo::CK_LEON2, SparcTargetInfo::CG_V8},
+    {{"at697e"}, SparcTargetInfo::CK_LEON2_AT697E, SparcTargetInfo::CG_V8},
+    {{"at697f"}, SparcTargetInfo::CK_LEON2_AT697F, SparcTargetInfo::CG_V8},
+    {{"leon3"}, SparcTargetInfo::CK_LEON3, SparcTargetInfo::CG_V8},
+    {{"ut699"}, SparcTargetInfo::CK_LEON3_UT699, SparcTargetInfo::CG_V8},
+    {{"gr712rc"}, SparcTargetInfo::CK_LEON3_GR712RC, SparcTargetInfo::CG_V8},
+    {{"leon4"}, SparcTargetInfo::CK_LEON4, SparcTargetInfo::CG_V8},
+    {{"gr740"}, SparcTargetInfo::CK_LEON4_GR740, SparcTargetInfo::CG_V8},
+};
+
+SparcTargetInfo::CPUGeneration
+SparcTargetInfo::getCPUGeneration(CPUKind Kind) const {
+  if (Kind == CK_GENERIC)
+    return CG_V8;
+  const SparcCPUInfo *Item = llvm::find_if(
+      CPUInfo, [Kind](const SparcCPUInfo &Info) { return Info.Kind == Kind; });
+  if (Item == std::end(CPUInfo))
+    llvm_unreachable("Unexpected CPU kind");
+  return Item->Generation;
+}
+
 SparcTargetInfo::CPUKind SparcTargetInfo::getCPUKind(StringRef Name) const {
-  return llvm::StringSwitch<CPUKind>(Name)
-      .Case("v8", CK_V8)
-      .Case("supersparc", CK_SUPERSPARC)
-      .Case("sparclite", CK_SPARCLITE)
-      .Case("f934", CK_F934)
-      .Case("hypersparc", CK_HYPERSPARC)
-      .Case("sparclite86x", CK_SPARCLITE86X)
-      .Case("sparclet", CK_SPARCLET)
-      .Case("tsc701", CK_TSC701)
-      .Case("v9", CK_V9)
-      .Case("ultrasparc", CK_ULTRASPARC)
-      .Case("ultrasparc3", CK_ULTRASPARC3)
-      .Case("niagara", CK_NIAGARA)
-      .Case("niagara2", CK_NIAGARA2)
-      .Case("niagara3", CK_NIAGARA3)
-      .Case("niagara4", CK_NIAGARA4)
-      .Case("ma2100", CK_MYRIAD2100)
-      .Case("ma2150", CK_MYRIAD2150)
-      .Case("ma2155", CK_MYRIAD2155)
-      .Case("ma2450", CK_MYRIAD2450)
-      .Case("ma2455", CK_MYRIAD2455)
-      .Case("ma2x5x", CK_MYRIAD2x5x)
-      .Case("ma2080", CK_MYRIAD2080)
-      .Case("ma2085", CK_MYRIAD2085)
-      .Case("ma2480", CK_MYRIAD2480)
-      .Case("ma2485", CK_MYRIAD2485)
-      .Case("ma2x8x", CK_MYRIAD2x8x)
-      // FIXME: the myriad2[.n] spellings are obsolete,
-      // but a grace period is needed to allow updating dependent builds.
-      .Case("myriad2", CK_MYRIAD2x5x)
-      .Case("myriad2.1", CK_MYRIAD2100)
-      .Case("myriad2.2", CK_MYRIAD2x5x)
-      .Case("myriad2.3", CK_MYRIAD2x8x)
-      .Case("leon2", CK_LEON2)
-      .Case("at697e", CK_LEON2_AT697E)
-      .Case("at697f", CK_LEON2_AT697F)
-      .Case("leon3", CK_LEON3)
-      .Case("ut699", CK_LEON3_UT699)
-      .Case("gr712rc", CK_LEON3_GR712RC)
-      .Case("leon4", CK_LEON4)
-      .Case("gr740", CK_LEON4_GR740)
-      .Default(CK_GENERIC);
+  const SparcCPUInfo *Item = llvm::find_if(
+      CPUInfo, [Name](const SparcCPUInfo &Info) { return Info.Name == Name; });
+
+  if (Item == std::end(CPUInfo))
+    return CK_GENERIC;
+  return Item->Kind;
+}
+
+void SparcTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  for (const SparcCPUInfo &Info : CPUInfo)
+    Values.push_back(Info.Name);
 }
 
 void SparcTargetInfo::getTargetDefines(const LangOptions &Opts,
@@ -178,6 +210,13 @@
       Builder.defineMacro(MyriadArchValue, "1");
       Builder.defineMacro(MyriadArchValue + "__", "1");
     }
+    if (Myriad2Value == "2") {
+      Builder.defineMacro("__ma2x5x", "1");
+      Builder.defineMacro("__ma2x5x__", "1");
+    } else if (Myriad2Value == "3") {
+      Builder.defineMacro("__ma2x8x", "1");
+      Builder.defineMacro("__ma2x8x__", "1");
+    }
     Builder.defineMacro("__myriad2__", Myriad2Value);
     Builder.defineMacro("__myriad2", Myriad2Value);
   }
@@ -195,3 +234,10 @@
     Builder.defineMacro("__sparcv9__");
   }
 }
+
+void SparcV9TargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  for (const SparcCPUInfo &Info : CPUInfo)
+    if (Info.Generation == CG_V9)
+      Values.push_back(Info.Name);
+}
diff --git a/lib/Basic/Targets/Sparc.h b/lib/Basic/Targets/Sparc.h
index aacc261..674a9cc 100644
--- a/lib/Basic/Targets/Sparc.h
+++ b/lib/Basic/Targets/Sparc.h
@@ -131,48 +131,7 @@
     CG_V9,
   };
 
-  CPUGeneration getCPUGeneration(CPUKind Kind) const {
-    switch (Kind) {
-    case CK_GENERIC:
-    case CK_V8:
-    case CK_SUPERSPARC:
-    case CK_SPARCLITE:
-    case CK_F934:
-    case CK_HYPERSPARC:
-    case CK_SPARCLITE86X:
-    case CK_SPARCLET:
-    case CK_TSC701:
-    case CK_MYRIAD2100:
-    case CK_MYRIAD2150:
-    case CK_MYRIAD2155:
-    case CK_MYRIAD2450:
-    case CK_MYRIAD2455:
-    case CK_MYRIAD2x5x:
-    case CK_MYRIAD2080:
-    case CK_MYRIAD2085:
-    case CK_MYRIAD2480:
-    case CK_MYRIAD2485:
-    case CK_MYRIAD2x8x:
-    case CK_LEON2:
-    case CK_LEON2_AT697E:
-    case CK_LEON2_AT697F:
-    case CK_LEON3:
-    case CK_LEON3_UT699:
-    case CK_LEON3_GR712RC:
-    case CK_LEON4:
-    case CK_LEON4_GR740:
-      return CG_V8;
-    case CK_V9:
-    case CK_ULTRASPARC:
-    case CK_ULTRASPARC3:
-    case CK_NIAGARA:
-    case CK_NIAGARA2:
-    case CK_NIAGARA3:
-    case CK_NIAGARA4:
-      return CG_V9;
-    }
-    llvm_unreachable("Unexpected CPU kind");
-  }
+  CPUGeneration getCPUGeneration(CPUKind Kind) const;
 
   CPUKind getCPUKind(StringRef Name) const;
 
@@ -180,6 +139,8 @@
     return getCPUKind(Name) != CK_GENERIC;
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     CPU = getCPUKind(Name);
     return CPU != CK_GENERIC;
@@ -259,6 +220,8 @@
     return getCPUGeneration(SparcTargetInfo::getCPUKind(Name)) == CG_V9;
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     if (!SparcTargetInfo::setCPU(Name))
       return false;
diff --git a/lib/Basic/Targets/SystemZ.cpp b/lib/Basic/Targets/SystemZ.cpp
index 98f3ae2..6f06f1f 100644
--- a/lib/Basic/Targets/SystemZ.cpp
+++ b/lib/Basic/Targets/SystemZ.cpp
@@ -30,15 +30,30 @@
 };
 
 const char *const SystemZTargetInfo::GCCRegNames[] = {
-    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6", "r7",  "r8",  "r9", "r10",
-    "r11", "r12", "r13", "r14", "r15", "f0",  "f2", "f4",  "f6",  "f1", "f3",
-    "f5",  "f7",  "f8",  "f10", "f12", "f14", "f9", "f11", "f13", "f15"
+    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+    "f0",  "f2",  "f4",  "f6",  "f1",  "f3",  "f5",  "f7",
+    "f8",  "f10", "f12", "f14", "f9",  "f11", "f13", "f15",
+    /*ap*/"", "cc", /*fp*/"", /*rp*/"", "a0",  "a1",
+    "v16", "v18", "v20", "v22", "v17", "v19", "v21", "v23",
+    "v24", "v26", "v28", "v30", "v25", "v27", "v29", "v31"
+};
+
+const TargetInfo::AddlRegName GCCAddlRegNames[] = {
+    {{"v0"}, 16}, {{"v2"},  17}, {{"v4"},  18}, {{"v6"},  19},
+    {{"v1"}, 20}, {{"v3"},  21}, {{"v5"},  22}, {{"v7"},  23},
+    {{"v8"}, 24}, {{"v10"}, 25}, {{"v12"}, 26}, {{"v14"}, 27},
+    {{"v9"}, 28}, {{"v11"}, 29}, {{"v13"}, 30}, {{"v15"}, 31}
 };
 
 ArrayRef<const char *> SystemZTargetInfo::getGCCRegNames() const {
   return llvm::makeArrayRef(GCCRegNames);
 }
 
+ArrayRef<TargetInfo::AddlRegName> SystemZTargetInfo::getGCCAddlRegNames() const {
+  return llvm::makeArrayRef(GCCAddlRegNames);
+}
+
 bool SystemZTargetInfo::validateAsmConstraint(
     const char *&Name, TargetInfo::ConstraintInfo &Info) const {
   switch (*Name) {
@@ -48,6 +63,7 @@
   case 'a': // Address register
   case 'd': // Data register (equivalent to 'r')
   case 'f': // Floating-point register
+  case 'v': // Vector register
     Info.setAllowsRegister();
     return true;
 
@@ -67,14 +83,32 @@
   }
 }
 
-int SystemZTargetInfo::getISARevision(const StringRef &Name) const {
-  return llvm::StringSwitch<int>(Name)
-      .Cases("arch8", "z10", 8)
-      .Cases("arch9", "z196", 9)
-      .Cases("arch10", "zEC12", 10)
-      .Cases("arch11", "z13", 11)
-      .Cases("arch12", "z14", 12)
-      .Default(-1);
+struct ISANameRevision {
+  llvm::StringLiteral Name;
+  int ISARevisionID;
+};
+static constexpr ISANameRevision ISARevisions[] = {
+  {{"arch8"}, 8}, {{"z10"}, 8},
+  {{"arch9"}, 9}, {{"z196"}, 9},
+  {{"arch10"}, 10}, {{"zEC12"}, 10},
+  {{"arch11"}, 11}, {{"z13"}, 11},
+  {{"arch12"}, 12}, {{"z14"}, 12}
+};
+
+int SystemZTargetInfo::getISARevision(StringRef Name) const {
+  const auto Rev =
+      llvm::find_if(ISARevisions, [Name](const ISANameRevision &CR) {
+        return CR.Name == Name;
+      });
+  if (Rev == std::end(ISARevisions))
+    return -1;
+  return Rev->ISARevisionID;
+}
+
+void SystemZTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  for (const ISANameRevision &Rev : ISARevisions)
+    Values.push_back(Rev.Name);
 }
 
 bool SystemZTargetInfo::hasFeature(StringRef Feature) const {
diff --git a/lib/Basic/Targets/SystemZ.h b/lib/Basic/Targets/SystemZ.h
index 3023c1d..8423160 100644
--- a/lib/Basic/Targets/SystemZ.h
+++ b/lib/Basic/Targets/SystemZ.h
@@ -62,6 +62,8 @@
     return None;
   }
 
+  ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
+
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
 
@@ -74,12 +76,14 @@
     return TargetInfo::SystemZBuiltinVaList;
   }
 
-  int getISARevision(const StringRef &Name) const;
+  int getISARevision(StringRef Name) const;
 
   bool isValidCPUName(StringRef Name) const override {
     return getISARevision(Name) != -1;
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     CPU = Name;
     ISARevision = getISARevision(CPU);
diff --git a/lib/Basic/Targets/WebAssembly.cpp b/lib/Basic/Targets/WebAssembly.cpp
index 9eaaf8d..b8a2a09 100644
--- a/lib/Basic/Targets/WebAssembly.cpp
+++ b/lib/Basic/Targets/WebAssembly.cpp
@@ -29,18 +29,25 @@
 #include "clang/Basic/BuiltinsWebAssembly.def"
 };
 
+static constexpr llvm::StringLiteral ValidCPUNames[] = {
+    {"mvp"}, {"bleeding-edge"}, {"generic"}};
+
 bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Case("simd128", SIMDLevel >= SIMD128)
+      .Case("nontrapping-fptoint", HasNontrappingFPToInt)
+      .Case("sign-ext", HasSignExt)
+      .Case("exception-handling", HasExceptionHandling)
       .Default(false);
 }
 
 bool WebAssemblyTargetInfo::isValidCPUName(StringRef Name) const {
-  return llvm::StringSwitch<bool>(Name)
-      .Case("mvp", true)
-      .Case("bleeding-edge", true)
-      .Case("generic", true)
-      .Default(false);
+  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+}
+
+void WebAssemblyTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
 }
 
 void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
@@ -61,6 +68,30 @@
       SIMDLevel = std::min(SIMDLevel, SIMDEnum(SIMD128 - 1));
       continue;
     }
+    if (Feature == "+nontrapping-fptoint") {
+      HasNontrappingFPToInt = true;
+      continue;
+    }
+    if (Feature == "-nontrapping-fptoint") {
+      HasNontrappingFPToInt = false;
+      continue;
+    }
+    if (Feature == "+sign-ext") {
+      HasSignExt = true;
+      continue;
+    }
+    if (Feature == "-sign-ext") {
+      HasSignExt = false;
+      continue;
+    }
+    if (Feature == "+exception-handling") {
+      HasExceptionHandling = true;
+      continue;
+    }
+    if (Feature == "-exception-handling") {
+      HasExceptionHandling = false;
+      continue;
+    }
 
     Diags.Report(diag::err_opt_not_valid_with_opt)
         << Feature << "-target-feature";
diff --git a/lib/Basic/Targets/WebAssembly.h b/lib/Basic/Targets/WebAssembly.h
index 20f5750..b0e466b 100644
--- a/lib/Basic/Targets/WebAssembly.h
+++ b/lib/Basic/Targets/WebAssembly.h
@@ -30,9 +30,14 @@
     SIMD128,
   } SIMDLevel;
 
+  bool HasNontrappingFPToInt;
+  bool HasSignExt;
+  bool HasExceptionHandling;
+
 public:
   explicit WebAssemblyTargetInfo(const llvm::Triple &T, const TargetOptions &)
-      : TargetInfo(T), SIMDLevel(NoSIMD) {
+      : TargetInfo(T), SIMDLevel(NoSIMD), HasNontrappingFPToInt(false),
+        HasSignExt(false), HasExceptionHandling(false) {
     NoAsmVariants = true;
     SuitableAlign = 128;
     LargeArrayMinWidth = 128;
@@ -41,6 +46,7 @@
     SigAtomicType = SignedLong;
     LongDoubleWidth = LongDoubleAlign = 128;
     LongDoubleFormat = &llvm::APFloat::IEEEquad();
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     SizeType = UnsignedInt;
     PtrDiffType = SignedInt;
     IntPtrType = SignedInt;
@@ -55,8 +61,11 @@
   initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
                  StringRef CPU,
                  const std::vector<std::string> &FeaturesVec) const override {
-    if (CPU == "bleeding-edge")
+    if (CPU == "bleeding-edge") {
       Features["simd128"] = true;
+      Features["nontrapping-fptoint"] = true;
+      Features["sign-ext"] = true;
+    }
     return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
 
@@ -66,6 +75,7 @@
                             DiagnosticsEngine &Diags) final;
 
   bool isValidCPUName(StringRef Name) const final;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const final;
 
   bool setCPU(const std::string &Name) final { return isValidCPUName(Name); }
 
@@ -111,7 +121,6 @@
   explicit WebAssembly32TargetInfo(const llvm::Triple &T,
                                    const TargetOptions &Opts)
       : WebAssemblyTargetInfo(T, Opts) {
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
   }
 
@@ -128,7 +137,6 @@
       : WebAssemblyTargetInfo(T, Opts) {
     LongAlign = LongWidth = 64;
     PointerAlign = PointerWidth = 64;
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     SizeType = UnsignedLong;
     PtrDiffType = SignedLong;
     IntPtrType = SignedLong;
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 6eee2fa..ab76c77 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -15,8 +15,10 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/TargetParser.h"
 
 namespace clang {
 namespace targets {
@@ -60,6 +62,7 @@
     "k2",    "k3",    "k4",    "k5",    "k6",      "k7",
     "cr0",   "cr2",   "cr3",   "cr4",   "cr8",
     "dr0",   "dr1",   "dr2",   "dr3",   "dr6",     "dr7",
+    "bnd0",  "bnd1",  "bnd2",  "bnd3",
 };
 
 const TargetInfo::AddlRegName AddlRegNames[] = {
@@ -99,6 +102,26 @@
   return false;
 }
 
+bool X86TargetInfo::checkCFProtectionReturnSupported(
+    DiagnosticsEngine &Diags) const {
+  if (HasSHSTK)
+    return true;
+
+  Diags.Report(diag::err_opt_not_valid_without_opt) << "cf-protection=return"
+                                                    << "-mshstk";
+  return false;
+}
+
+bool X86TargetInfo::checkCFProtectionBranchSupported(
+    DiagnosticsEngine &Diags) const {
+  if (HasIBT)
+    return true;
+
+  Diags.Report(diag::err_opt_not_valid_without_opt) << "cf-protection=branch"
+                                                    << "-mibt";
+  return false;
+}
+
 bool X86TargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeaturesVec) const {
@@ -119,7 +142,6 @@
   case CK_i486:
   case CK_i586:
   case CK_Pentium:
-  case CK_i686:
   case CK_PentiumPro:
   case CK_Lakemont:
     break;
@@ -131,6 +153,16 @@
     setFeatureEnabledImpl(Features, "mmx", true);
     break;
 
+  case CK_Icelake:
+    setFeatureEnabledImpl(Features, "vaes", true);
+    setFeatureEnabledImpl(Features, "gfni", true);
+    setFeatureEnabledImpl(Features, "vpclmulqdq", true);
+    setFeatureEnabledImpl(Features, "avx512bitalg", true);
+    setFeatureEnabledImpl(Features, "avx512vnni", true);
+    setFeatureEnabledImpl(Features, "avx512vbmi2", true);
+    setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
+    setFeatureEnabledImpl(Features, "rdpid", true);
+    LLVM_FALLTHROUGH;
   case CK_Cannonlake:
     setFeatureEnabledImpl(Features, "avx512ifma", true);
     setFeatureEnabledImpl(Features, "avx512vbmi", true);
@@ -143,7 +175,8 @@
     setFeatureEnabledImpl(Features, "avx512bw", true);
     setFeatureEnabledImpl(Features, "avx512vl", true);
     setFeatureEnabledImpl(Features, "pku", true);
-    setFeatureEnabledImpl(Features, "clwb", true);
+    if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB
+      setFeatureEnabledImpl(Features, "clwb", true);
     LLVM_FALLTHROUGH;
   case CK_SkylakeClient:
     setFeatureEnabledImpl(Features, "xsavec", true);
@@ -156,6 +189,7 @@
   case CK_Broadwell:
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "adx", true);
+    setFeatureEnabledImpl(Features, "prfchw", true);
     LLVM_FALLTHROUGH;
   case CK_Haswell:
     setFeatureEnabledImpl(Features, "avx2", true);
@@ -187,6 +221,7 @@
     LLVM_FALLTHROUGH;
   case CK_Core2:
     setFeatureEnabledImpl(Features, "ssse3", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     LLVM_FALLTHROUGH;
   case CK_Yonah:
   case CK_Prescott:
@@ -207,7 +242,6 @@
 
   case CK_Goldmont:
     setFeatureEnabledImpl(Features, "sha", true);
-    setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
@@ -218,24 +252,30 @@
     setFeatureEnabledImpl(Features, "fsgsbase", true);
     LLVM_FALLTHROUGH;
   case CK_Silvermont:
+    setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "sse4.2", true);
+    setFeatureEnabledImpl(Features, "prfchw", true);
     LLVM_FALLTHROUGH;
   case CK_Bonnell:
     setFeatureEnabledImpl(Features, "movbe", true);
     setFeatureEnabledImpl(Features, "ssse3", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     break;
 
   case CK_KNM:
     // TODO: Add avx5124fmaps/avx5124vnniw.
+    setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
+    LLVM_FALLTHROUGH;
   case CK_KNL:
     setFeatureEnabledImpl(Features, "avx512f", true);
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512er", true);
     setFeatureEnabledImpl(Features, "avx512pf", true);
+    setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "prefetchwt1", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
@@ -254,6 +294,7 @@
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "xsave", true);
     setFeatureEnabledImpl(Features, "movbe", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     break;
 
   case CK_K6_2:
@@ -267,6 +308,7 @@
     setFeatureEnabledImpl(Features, "sse4a", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "popcnt", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     LLVM_FALLTHROUGH;
   case CK_K8SSE3:
     setFeatureEnabledImpl(Features, "sse3", true);
@@ -300,6 +342,7 @@
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     break;
 
   case CK_ZNVER1:
@@ -323,6 +366,7 @@
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "rdrnd", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     setFeatureEnabledImpl(Features, "sha", true);
     setFeatureEnabledImpl(Features, "sse4a", true);
     setFeatureEnabledImpl(Features, "xsave", true);
@@ -357,6 +401,7 @@
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "xsave", true);
+    setFeatureEnabledImpl(Features, "sahf", true);
     break;
   }
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec))
@@ -395,7 +440,7 @@
   if (Enabled) {
     switch (Level) {
     case AVX512F:
-      Features["avx512f"] = true;
+      Features["avx512f"] = Features["fma"] = Features["f16c"] = true;
       LLVM_FALLTHROUGH;
     case AVX2:
       Features["avx2"] = true;
@@ -435,7 +480,7 @@
     LLVM_FALLTHROUGH;
   case SSE2:
     Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] =
-        false;
+        Features["gfni"] = false;
     LLVM_FALLTHROUGH;
   case SSE3:
     Features["sse3"] = false;
@@ -452,7 +497,7 @@
     LLVM_FALLTHROUGH;
   case AVX:
     Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
-        Features["xsaveopt"] = false;
+        Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;
     setXOPLevel(Features, FMA4, false);
     LLVM_FALLTHROUGH;
   case AVX2:
@@ -462,7 +507,9 @@
     Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
         Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
             Features["avx512vl"] = Features["avx512vbmi"] =
-                Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
+                Features["avx512ifma"] = Features["avx512vpopcntdq"] =
+                    Features["avx512bitalg"] = Features["avx512vnni"] =
+                        Features["avx512vbmi2"] = false;
     break;
   }
 }
@@ -564,9 +611,26 @@
   } else if (Name == "aes") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
+    else
+      Features["vaes"] = false;
+  } else if (Name == "vaes") {
+    if (Enabled) {
+      setSSELevel(Features, AVX, Enabled);
+      Features["aes"] = true;
+    }
   } else if (Name == "pclmul") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
+    else
+      Features["vpclmulqdq"] = false;
+  } else if (Name == "vpclmulqdq") {
+    if (Enabled) {
+      setSSELevel(Features, AVX, Enabled);
+      Features["pclmul"] = true;
+    }
+  } else if (Name == "gfni") {
+     if (Enabled)
+      setSSELevel(Features, SSE2, Enabled);
   } else if (Name == "avx") {
     setSSELevel(Features, AVX, Enabled);
   } else if (Name == "avx2") {
@@ -576,18 +640,22 @@
   } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
              Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
              Name == "avx512vbmi" || Name == "avx512ifma" ||
-             Name == "avx512vpopcntdq") {
+             Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
+             Name == "avx512vnni" || Name == "avx512vbmi2") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
-    // Enable BWI instruction if VBMI is being enabled.
-    if (Name == "avx512vbmi" && Enabled)
+    // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
+    if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
       Features["avx512bw"] = true;
-    // Also disable VBMI if BWI is being disabled.
+    // Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
     if (Name == "avx512bw" && !Enabled)
-      Features["avx512vbmi"] = false;
+      Features["avx512vbmi"] = Features["avx512vbmi2"] =
+      Features["avx512bitalg"] = false;
   } else if (Name == "fma") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);
+    else
+      setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "fma4") {
     setXOPLevel(Features, FMA4, Enabled);
   } else if (Name == "xop") {
@@ -597,6 +665,8 @@
   } else if (Name == "f16c") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);
+    else
+      setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "sha") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
@@ -628,8 +698,12 @@
 
     if (Feature == "+aes") {
       HasAES = true;
+    } else if (Feature == "+vaes") {
+      HasVAES = true;
     } else if (Feature == "+pclmul") {
       HasPCLMUL = true;
+    } else if (Feature == "+vpclmulqdq") {
+      HasVPCLMULQDQ = true;
     } else if (Feature == "+lzcnt") {
       HasLZCNT = true;
     } else if (Feature == "+rdrnd") {
@@ -658,28 +732,40 @@
       HasFMA = true;
     } else if (Feature == "+f16c") {
       HasF16C = true;
+    } else if (Feature == "+gfni") {
+      HasGFNI = true;
     } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
     } else if (Feature == "+avx512vpopcntdq") {
       HasAVX512VPOPCNTDQ = true;
+    } else if (Feature == "+avx512vnni") {
+      HasAVX512VNNI = true;
     } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
       HasAVX512DQ = true;
+    } else if (Feature == "+avx512bitalg") {
+      HasAVX512BITALG = true;
     } else if (Feature == "+avx512bw") {
       HasAVX512BW = true;
     } else if (Feature == "+avx512vl") {
       HasAVX512VL = true;
     } else if (Feature == "+avx512vbmi") {
       HasAVX512VBMI = true;
+    } else if (Feature == "+avx512vbmi2") {
+      HasAVX512VBMI2 = true;
     } else if (Feature == "+avx512ifma") {
       HasAVX512IFMA = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
     } else if (Feature == "+mpx") {
       HasMPX = true;
+    } else if (Feature == "+shstk") {
+      HasSHSTK = true;
+    } else if (Feature == "+ibt") {
+      HasIBT = true;
     } else if (Feature == "+movbe") {
       HasMOVBE = true;
     } else if (Feature == "+sgx") {
@@ -708,6 +794,14 @@
       HasPREFETCHWT1 = true;
     } else if (Feature == "+clzero") {
       HasCLZERO = true;
+    } else if (Feature == "+rdpid") {
+      HasRDPID = true;
+    } else if (Feature == "+retpoline") {
+      HasRetpoline = true;
+    } else if (Feature == "+retpoline-external-thunk") {
+      HasRetpolineExternalThunk = true;
+    } else if (Feature == "+sahf") {
+      HasLAHFSAHF = true;
     }
 
     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -804,15 +898,8 @@
     Builder.defineMacro("__tune_pentium2__");
     LLVM_FALLTHROUGH;
   case CK_PentiumPro:
-    Builder.defineMacro("__tune_i686__");
-    Builder.defineMacro("__tune_pentiumpro__");
-    LLVM_FALLTHROUGH;
-  case CK_i686:
-    Builder.defineMacro("__i686");
-    Builder.defineMacro("__i686__");
-    // Strangely, __tune_i686__ isn't defined by GCC when CPU == i686.
-    Builder.defineMacro("__pentiumpro");
-    Builder.defineMacro("__pentiumpro__");
+    defineCPUMacros(Builder, "i686");
+    defineCPUMacros(Builder, "pentiumpro");
     break;
   case CK_Pentium4:
     defineCPUMacros(Builder, "pentium4");
@@ -842,22 +929,22 @@
   case CK_Haswell:
   case CK_Broadwell:
   case CK_SkylakeClient:
+  case CK_SkylakeServer:
+  case CK_Cannonlake:
+  case CK_Icelake:
     // FIXME: Historically, we defined this legacy name, it would be nice to
     // remove it at some point. We've never exposed fine-grained names for
     // recent primary x86 CPUs, and we should keep it that way.
     defineCPUMacros(Builder, "corei7");
     break;
-  case CK_SkylakeServer:
-    defineCPUMacros(Builder, "skx");
-    break;
-  case CK_Cannonlake:
-    break;
   case CK_KNL:
     defineCPUMacros(Builder, "knl");
     break;
   case CK_KNM:
     break;
   case CK_Lakemont:
+    defineCPUMacros(Builder, "i586", /*Tuning*/false);
+    defineCPUMacros(Builder, "pentium", /*Tuning*/false);
     Builder.defineMacro("__tune_lakemont__");
     break;
   case CK_K6_2:
@@ -929,9 +1016,15 @@
   if (HasAES)
     Builder.defineMacro("__AES__");
 
+  if (HasVAES)
+    Builder.defineMacro("__VAES__");
+
   if (HasPCLMUL)
     Builder.defineMacro("__PCLMUL__");
 
+  if (HasVPCLMULQDQ)
+    Builder.defineMacro("__VPCLMULQDQ__");
+
   if (HasLZCNT)
     Builder.defineMacro("__LZCNT__");
 
@@ -991,22 +1084,31 @@
   if (HasF16C)
     Builder.defineMacro("__F16C__");
 
+  if (HasGFNI)
+    Builder.defineMacro("__GFNI__");
+
   if (HasAVX512CD)
     Builder.defineMacro("__AVX512CD__");
   if (HasAVX512VPOPCNTDQ)
     Builder.defineMacro("__AVX512VPOPCNTDQ__");
+  if (HasAVX512VNNI)
+    Builder.defineMacro("__AVX512VNNI__");
   if (HasAVX512ER)
     Builder.defineMacro("__AVX512ER__");
   if (HasAVX512PF)
     Builder.defineMacro("__AVX512PF__");
   if (HasAVX512DQ)
     Builder.defineMacro("__AVX512DQ__");
+  if (HasAVX512BITALG)
+    Builder.defineMacro("__AVX512BITALG__");
   if (HasAVX512BW)
     Builder.defineMacro("__AVX512BW__");
   if (HasAVX512VL)
     Builder.defineMacro("__AVX512VL__");
   if (HasAVX512VBMI)
     Builder.defineMacro("__AVX512VBMI__");
+  if (HasAVX512VBMI2)
+    Builder.defineMacro("__AVX512VBMI2__");
   if (HasAVX512IFMA)
     Builder.defineMacro("__AVX512IFMA__");
 
@@ -1025,20 +1127,24 @@
     Builder.defineMacro("__XSAVES__");
   if (HasPKU)
     Builder.defineMacro("__PKU__");
-  if (HasCX16)
-    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
   if (HasCLFLUSHOPT)
     Builder.defineMacro("__CLFLUSHOPT__");
   if (HasCLWB)
     Builder.defineMacro("__CLWB__");
   if (HasMPX)
     Builder.defineMacro("__MPX__");
+  if (HasSHSTK)
+    Builder.defineMacro("__SHSTK__");
+  if (HasIBT)
+    Builder.defineMacro("__IBT__");
   if (HasSGX)
     Builder.defineMacro("__SGX__");
   if (HasPREFETCHWT1)
     Builder.defineMacro("__PREFETCHWT1__");
   if (HasCLZERO)
     Builder.defineMacro("__CLZERO__");
+  if (HasRDPID)
+    Builder.defineMacro("__RDPID__");
 
   // Each case falls through to the previous one here.
   switch (SSELevel) {
@@ -1118,6 +1224,8 @@
   }
   if (CPU >= CK_i586)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+  if (HasCX16)
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
 
   if (HasFloat128)
     Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
@@ -1125,18 +1233,24 @@
 
 bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
+      .Case("3dnow", true)
+      .Case("3dnowa", true)
+      .Case("adx", true)
       .Case("aes", true)
       .Case("avx", true)
       .Case("avx2", true)
       .Case("avx512f", true)
       .Case("avx512cd", true)
       .Case("avx512vpopcntdq", true)
+      .Case("avx512vnni", true)
       .Case("avx512er", true)
       .Case("avx512pf", true)
       .Case("avx512dq", true)
+      .Case("avx512bitalg", true)
       .Case("avx512bw", true)
       .Case("avx512vl", true)
       .Case("avx512vbmi", true)
+      .Case("avx512vbmi2", true)
       .Case("avx512ifma", true)
       .Case("bmi", true)
       .Case("bmi2", true)
@@ -1149,34 +1263,38 @@
       .Case("fma4", true)
       .Case("fsgsbase", true)
       .Case("fxsr", true)
+      .Case("gfni", true)
       .Case("lwp", true)
       .Case("lzcnt", true)
-      .Case("mm3dnow", true)
-      .Case("mm3dnowa", true)
       .Case("mmx", true)
       .Case("movbe", true)
       .Case("mpx", true)
+      .Case("mwaitx", true)
       .Case("pclmul", true)
       .Case("pku", true)
       .Case("popcnt", true)
       .Case("prefetchwt1", true)
       .Case("prfchw", true)
+      .Case("rdpid", true)
       .Case("rdrnd", true)
       .Case("rdseed", true)
       .Case("rtm", true)
+      .Case("sahf", true)
       .Case("sgx", true)
       .Case("sha", true)
+      .Case("shstk", true)
       .Case("sse", true)
       .Case("sse2", true)
       .Case("sse3", true)
       .Case("ssse3", true)
+      .Case("sse4", true)
       .Case("sse4.1", true)
       .Case("sse4.2", true)
       .Case("sse4a", true)
       .Case("tbm", true)
-      .Case("x86", true)
-      .Case("x86_32", true)
-      .Case("x86_64", true)
+      .Case("vaes", true)
+      .Case("vpclmulqdq", true)
+      .Case("x87", true)
       .Case("xop", true)
       .Case("xsave", true)
       .Case("xsavec", true)
@@ -1187,18 +1305,22 @@
 
 bool X86TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
+      .Case("adx", HasADX)
       .Case("aes", HasAES)
       .Case("avx", SSELevel >= AVX)
       .Case("avx2", SSELevel >= AVX2)
       .Case("avx512f", SSELevel >= AVX512F)
       .Case("avx512cd", HasAVX512CD)
       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
+      .Case("avx512vnni", HasAVX512VNNI)
       .Case("avx512er", HasAVX512ER)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
+      .Case("avx512bitalg", HasAVX512BITALG)
       .Case("avx512bw", HasAVX512BW)
       .Case("avx512vl", HasAVX512VL)
       .Case("avx512vbmi", HasAVX512VBMI)
+      .Case("avx512vbmi2", HasAVX512VBMI2)
       .Case("avx512ifma", HasAVX512IFMA)
       .Case("bmi", HasBMI)
       .Case("bmi2", HasBMI2)
@@ -1211,6 +1333,8 @@
       .Case("fma4", XOPLevel >= FMA4)
       .Case("fsgsbase", HasFSGSBASE)
       .Case("fxsr", HasFXSR)
+      .Case("gfni", HasGFNI)
+      .Case("ibt", HasIBT)
       .Case("lwp", HasLWP)
       .Case("lzcnt", HasLZCNT)
       .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
@@ -1218,16 +1342,22 @@
       .Case("mmx", MMX3DNowLevel >= MMX)
       .Case("movbe", HasMOVBE)
       .Case("mpx", HasMPX)
+      .Case("mwaitx", HasMWAITX)
       .Case("pclmul", HasPCLMUL)
       .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
       .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
+      .Case("rdpid", HasRDPID)
       .Case("rdrnd", HasRDRND)
       .Case("rdseed", HasRDSEED)
+      .Case("retpoline", HasRetpoline)
+      .Case("retpoline-external-thunk", HasRetpolineExternalThunk)
       .Case("rtm", HasRTM)
+      .Case("sahf", HasLAHFSAHF)
       .Case("sgx", HasSGX)
       .Case("sha", HasSHA)
+      .Case("shstk", HasSHSTK)
       .Case("sse", SSELevel >= SSE1)
       .Case("sse2", SSELevel >= SSE2)
       .Case("sse3", SSELevel >= SSE3)
@@ -1236,6 +1366,8 @@
       .Case("sse4.2", SSELevel >= SSE42)
       .Case("sse4a", XOPLevel >= SSE4A)
       .Case("tbm", HasTBM)
+      .Case("vaes", HasVAES)
+      .Case("vpclmulqdq", HasVPCLMULQDQ)
       .Case("x86", true)
       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
@@ -1254,76 +1386,80 @@
 // X86TargetInfo::hasFeature for a somewhat comprehensive list).
 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
   return llvm::StringSwitch<bool>(FeatureStr)
-      .Case("cmov", true)
-      .Case("mmx", true)
-      .Case("popcnt", true)
-      .Case("sse", true)
-      .Case("sse2", true)
-      .Case("sse3", true)
-      .Case("ssse3", true)
-      .Case("sse4.1", true)
-      .Case("sse4.2", true)
-      .Case("avx", true)
-      .Case("avx2", true)
-      .Case("sse4a", true)
-      .Case("fma4", true)
-      .Case("xop", true)
-      .Case("fma", true)
-      .Case("avx512f", true)
-      .Case("bmi", true)
-      .Case("bmi2", true)
-      .Case("aes", true)
-      .Case("pclmul", true)
-      .Case("avx512vl", true)
-      .Case("avx512bw", true)
-      .Case("avx512dq", true)
-      .Case("avx512cd", true)
-      .Case("avx512er", true)
-      .Case("avx512pf", true)
-      .Case("avx512vbmi", true)
-      .Case("avx512ifma", true)
-      .Case("avx5124vnniw", true)
-      .Case("avx5124fmaps", true)
-      .Case("avx512vpopcntdq", true)
+#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, true)
+#include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
 
+static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
+  return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
+#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, llvm::X86::ENUM)
+#include "llvm/Support/X86TargetParser.def"
+      ;
+  // Note, this function should only be used after ensuring the value is
+  // correct, so it asserts if the value is out of range.
+}
+
+static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) {
+  enum class FeatPriority {
+#define FEATURE(FEAT) FEAT,
+#include "clang/Basic/X86Target.def"
+  };
+  switch (Feat) {
+#define FEATURE(FEAT)                                                          \
+  case llvm::X86::FEAT:                                                        \
+    return static_cast<unsigned>(FeatPriority::FEAT);
+#include "clang/Basic/X86Target.def"
+  default:
+    llvm_unreachable("No Feature Priority for non-CPUSupports Features");
+  }
+}
+
+unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
+  // Valid CPUs have a 'key feature' that compares just better than its key
+  // feature.
+  CPUKind Kind = getCPUKind(Name);
+  if (Kind != CK_Generic) {
+    switch (Kind) {
+    default:
+      llvm_unreachable(
+          "CPU Type without a key feature used in 'target' attribute");
+#define PROC_WITH_FEAT(ENUM, STR, IS64, KEY_FEAT)                              \
+  case CK_##ENUM:                                                              \
+    return (getFeaturePriority(llvm::X86::KEY_FEAT) << 1) + 1;
+#include "clang/Basic/X86Target.def"
+    }
+  }
+
+  // Now we know we have a feature, so get its priority and shift it a few so
+  // that we have sufficient room for the CPUs (above).
+  return getFeaturePriority(getFeature(Name)) << 1;
+}
+
+std::string X86TargetInfo::getCPUKindCanonicalName(CPUKind Kind) const {
+  switch (Kind) {
+  case CK_Generic:
+    return "";
+#define PROC(ENUM, STRING, IS64BIT)                                            \
+  case CK_##ENUM:                                                              \
+    return STRING;
+#include "clang/Basic/X86Target.def"
+  }
+  llvm_unreachable("Invalid CPUKind");
+}
+
 // We can't use a generic validation scheme for the cpus accepted here
 // versus subtarget cpus accepted in the target attribute because the
 // variables intitialized by the runtime only support the below currently
 // rather than the full range of cpus.
 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
   return llvm::StringSwitch<bool>(FeatureStr)
-      .Case("amd", true)
-      .Case("amdfam10h", true)
-      .Case("amdfam15h", true)
-      .Case("amdfam17h", true)
-      .Case("atom", true)
-      .Case("barcelona", true)
-      .Case("bdver1", true)
-      .Case("bdver2", true)
-      .Case("bdver3", true)
-      .Case("bdver4", true)
-      .Case("bonnell", true)
-      .Case("broadwell", true)
-      .Case("btver1", true)
-      .Case("btver2", true)
-      .Case("core2", true)
-      .Case("corei7", true)
-      .Case("haswell", true)
-      .Case("intel", true)
-      .Case("istanbul", true)
-      .Case("ivybridge", true)
-      .Case("knl", true)
-      .Case("nehalem", true)
-      .Case("sandybridge", true)
-      .Case("shanghai", true)
-      .Case("silvermont", true)
-      .Case("skylake", true)
-      .Case("skylake-avx512", true)
-      .Case("slm", true)
-      .Case("westmere", true)
-      .Case("znver1", true)
+#define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
+#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
+  .Cases(STR, ALIAS, true)
+#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
+#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
+#include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
 
@@ -1522,60 +1658,38 @@
   }
 }
 
+bool X86TargetInfo::checkCPUKind(CPUKind Kind) const {
+  // Perform any per-CPU checks necessary to determine if this CPU is
+  // acceptable.
+  switch (Kind) {
+  case CK_Generic:
+    // No processor selected!
+    return false;
+#define PROC(ENUM, STRING, IS64BIT)                                            \
+  case CK_##ENUM:                                                              \
+    return IS64BIT || getTriple().getArch() == llvm::Triple::x86;
+#include "clang/Basic/X86Target.def"
+  }
+  llvm_unreachable("Unhandled CPU kind");
+}
+
+void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+#define PROC(ENUM, STRING, IS64BIT)                                            \
+  if (IS64BIT || getTriple().getArch() == llvm::Triple::x86)                   \
+    Values.emplace_back(STRING);
+  // Go through CPUKind checking to ensure that the alias is de-aliased and 
+  // 64 bit-ness is checked.
+#define PROC_ALIAS(ENUM, ALIAS)                                                \
+  if (checkCPUKind(getCPUKind(ALIAS)))                                         \
+    Values.emplace_back(ALIAS);
+#include "clang/Basic/X86Target.def"
+}
+
 X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const {
   return llvm::StringSwitch<CPUKind>(CPU)
-      .Case("i386", CK_i386)
-      .Case("i486", CK_i486)
-      .Case("winchip-c6", CK_WinChipC6)
-      .Case("winchip2", CK_WinChip2)
-      .Case("c3", CK_C3)
-      .Case("i586", CK_i586)
-      .Case("pentium", CK_Pentium)
-      .Case("pentium-mmx", CK_PentiumMMX)
-      .Case("i686", CK_i686)
-      .Case("pentiumpro", CK_PentiumPro)
-      .Case("pentium2", CK_Pentium2)
-      .Cases("pentium3", "pentium3m", CK_Pentium3)
-      .Case("pentium-m", CK_PentiumM)
-      .Case("c3-2", CK_C3_2)
-      .Case("yonah", CK_Yonah)
-      .Cases("pentium4", "pentium4m", CK_Pentium4)
-      .Case("prescott", CK_Prescott)
-      .Case("nocona", CK_Nocona)
-      .Case("core2", CK_Core2)
-      .Case("penryn", CK_Penryn)
-      .Cases("bonnell", "atom", CK_Bonnell)
-      .Cases("silvermont", "slm", CK_Silvermont)
-      .Case("goldmont", CK_Goldmont)
-      .Cases("nehalem", "corei7", CK_Nehalem)
-      .Case("westmere", CK_Westmere)
-      .Cases("sandybridge", "corei7-avx", CK_SandyBridge)
-      .Cases("ivybridge", "core-avx-i", CK_IvyBridge)
-      .Cases("haswell", "core-avx2", CK_Haswell)
-      .Case("broadwell", CK_Broadwell)
-      .Case("skylake", CK_SkylakeClient)
-      .Cases("skylake-avx512", "skx", CK_SkylakeServer)
-      .Case("cannonlake", CK_Cannonlake)
-      .Case("knl", CK_KNL)
-      .Case("knm", CK_KNM)
-      .Case("lakemont", CK_Lakemont)
-      .Case("k6", CK_K6)
-      .Case("k6-2", CK_K6_2)
-      .Case("k6-3", CK_K6_3)
-      .Cases("athlon", "athlon-tbird", CK_Athlon)
-      .Cases("athlon-xp", "athlon-mp", "athlon-4", CK_AthlonXP)
-      .Cases("k8", "athlon64", "athlon-fx", "opteron", CK_K8)
-      .Cases("k8-sse3", "athlon64-sse3", "opteron-sse3", CK_K8SSE3)
-      .Cases("amdfam10", "barcelona", CK_AMDFAM10)
-      .Case("btver1", CK_BTVER1)
-      .Case("btver2", CK_BTVER2)
-      .Case("bdver1", CK_BDVER1)
-      .Case("bdver2", CK_BDVER2)
-      .Case("bdver3", CK_BDVER3)
-      .Case("bdver4", CK_BDVER4)
-      .Case("znver1", CK_ZNVER1)
-      .Case("x86-64", CK_x86_64)
-      .Case("geode", CK_Geode)
+#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM)
+#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM)
+#include "clang/Basic/X86Target.def"
       .Default(CK_Generic);
 }
 
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index 3b405dc..660e584 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -48,7 +48,10 @@
   enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;
 
   bool HasAES = false;
+  bool HasVAES = false;
   bool HasPCLMUL = false;
+  bool HasVPCLMULQDQ = false;
+  bool HasGFNI = false;
   bool HasLZCNT = false;
   bool HasRDRND = false;
   bool HasFSGSBASE = false;
@@ -65,15 +68,20 @@
   bool HasF16C = false;
   bool HasAVX512CD = false;
   bool HasAVX512VPOPCNTDQ = false;
+  bool HasAVX512VNNI = false;
   bool HasAVX512ER = false;
   bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
+  bool HasAVX512BITALG = false;
   bool HasAVX512BW = false;
   bool HasAVX512VL = false;
   bool HasAVX512VBMI = false;
+  bool HasAVX512VBMI2 = false;
   bool HasAVX512IFMA = false;
   bool HasSHA = false;
   bool HasMPX = false;
+  bool HasSHSTK = false;
+  bool HasIBT = false;
   bool HasSGX = false;
   bool HasCX16 = false;
   bool HasFXSR = false;
@@ -88,262 +96,28 @@
   bool HasCLWB = false;
   bool HasMOVBE = false;
   bool HasPREFETCHWT1 = false;
+  bool HasRDPID = false;
+  bool HasRetpoline = false;
+  bool HasRetpolineExternalThunk = false;
+  bool HasLAHFSAHF = false;
 
+protected:
   /// \brief Enumeration of all of the X86 CPUs supported by Clang.
   ///
   /// Each enumeration represents a particular CPU supported by Clang. These
   /// loosely correspond to the options passed to '-march' or '-mtune' flags.
   enum CPUKind {
     CK_Generic,
-
-    /// \name i386
-    /// i386-generation processors.
-    //@{
-    CK_i386,
-    //@}
-
-    /// \name i486
-    /// i486-generation processors.
-    //@{
-    CK_i486,
-    CK_WinChipC6,
-    CK_WinChip2,
-    CK_C3,
-    //@}
-
-    /// \name i586
-    /// i586-generation processors, P5 microarchitecture based.
-    //@{
-    CK_i586,
-    CK_Pentium,
-    CK_PentiumMMX,
-    //@}
-
-    /// \name i686
-    /// i686-generation processors, P6 / Pentium M microarchitecture based.
-    //@{
-    CK_i686,
-    CK_PentiumPro,
-    CK_Pentium2,
-    CK_Pentium3,
-    CK_PentiumM,
-    CK_C3_2,
-
-    /// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
-    /// Clang however has some logic to support this.
-    // FIXME: Warn, deprecate, and potentially remove this.
-    CK_Yonah,
-    //@}
-
-    /// \name Netburst
-    /// Netburst microarchitecture based processors.
-    //@{
-    CK_Pentium4,
-    CK_Prescott,
-    CK_Nocona,
-    //@}
-
-    /// \name Core
-    /// Core microarchitecture based processors.
-    //@{
-    CK_Core2,
-
-    /// This enumerator, like \see CK_Yonah, is a bit odd. It is another
-    /// codename which GCC no longer accepts as an option to -march, but Clang
-    /// has some logic for recognizing it.
-    // FIXME: Warn, deprecate, and potentially remove this.
-    CK_Penryn,
-    //@}
-
-    /// \name Atom
-    /// Atom processors
-    //@{
-    CK_Bonnell,
-    CK_Silvermont,
-    CK_Goldmont,
-    //@}
-
-    /// \name Nehalem
-    /// Nehalem microarchitecture based processors.
-    CK_Nehalem,
-
-    /// \name Westmere
-    /// Westmere microarchitecture based processors.
-    CK_Westmere,
-
-    /// \name Sandy Bridge
-    /// Sandy Bridge microarchitecture based processors.
-    CK_SandyBridge,
-
-    /// \name Ivy Bridge
-    /// Ivy Bridge microarchitecture based processors.
-    CK_IvyBridge,
-
-    /// \name Haswell
-    /// Haswell microarchitecture based processors.
-    CK_Haswell,
-
-    /// \name Broadwell
-    /// Broadwell microarchitecture based processors.
-    CK_Broadwell,
-
-    /// \name Skylake Client
-    /// Skylake client microarchitecture based processors.
-    CK_SkylakeClient,
-
-    /// \name Skylake Server
-    /// Skylake server microarchitecture based processors.
-    CK_SkylakeServer,
-
-    /// \name Cannonlake Client
-    /// Cannonlake client microarchitecture based processors.
-    CK_Cannonlake,
-
-    /// \name Knights Landing
-    /// Knights Landing processor.
-    CK_KNL,
-
-    /// \name Knights Mill
-    /// Knights Mill processor.
-    CK_KNM,
-
-    /// \name Lakemont
-    /// Lakemont microarchitecture based processors.
-    CK_Lakemont,
-
-    /// \name K6
-    /// K6 architecture processors.
-    //@{
-    CK_K6,
-    CK_K6_2,
-    CK_K6_3,
-    //@}
-
-    /// \name K7
-    /// K7 architecture processors.
-    //@{
-    CK_Athlon,
-    CK_AthlonXP,
-    //@}
-
-    /// \name K8
-    /// K8 architecture processors.
-    //@{
-    CK_K8,
-    CK_K8SSE3,
-    CK_AMDFAM10,
-    //@}
-
-    /// \name Bobcat
-    /// Bobcat architecture processors.
-    //@{
-    CK_BTVER1,
-    CK_BTVER2,
-    //@}
-
-    /// \name Bulldozer
-    /// Bulldozer architecture processors.
-    //@{
-    CK_BDVER1,
-    CK_BDVER2,
-    CK_BDVER3,
-    CK_BDVER4,
-    //@}
-
-    /// \name zen
-    /// Zen architecture processors.
-    //@{
-    CK_ZNVER1,
-    //@}
-
-    /// This specification is deprecated and will be removed in the future.
-    /// Users should prefer \see CK_K8.
-    // FIXME: Warn on this when the CPU is set to it.
-    //@{
-    CK_x86_64,
-    //@}
-
-    /// \name Geode
-    /// Geode processors.
-    //@{
-    CK_Geode
-    //@}
+#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM,
+#include "clang/Basic/X86Target.def"
   } CPU = CK_Generic;
 
-  bool checkCPUKind(CPUKind Kind) const {
-    // Perform any per-CPU checks necessary to determine if this CPU is
-    // acceptable.
-    // FIXME: This results in terrible diagnostics. Clang just says the CPU is
-    // invalid without explaining *why*.
-    switch (Kind) {
-    case CK_Generic:
-      // No processor selected!
-      return false;
-
-    case CK_i386:
-    case CK_i486:
-    case CK_WinChipC6:
-    case CK_WinChip2:
-    case CK_C3:
-    case CK_i586:
-    case CK_Pentium:
-    case CK_PentiumMMX:
-    case CK_i686:
-    case CK_PentiumPro:
-    case CK_Pentium2:
-    case CK_Pentium3:
-    case CK_PentiumM:
-    case CK_Yonah:
-    case CK_C3_2:
-    case CK_Pentium4:
-    case CK_Lakemont:
-    case CK_Prescott:
-    case CK_K6:
-    case CK_K6_2:
-    case CK_K6_3:
-    case CK_Athlon:
-    case CK_AthlonXP:
-    case CK_Geode:
-      // Only accept certain architectures when compiling in 32-bit mode.
-      if (getTriple().getArch() != llvm::Triple::x86)
-        return false;
-
-      LLVM_FALLTHROUGH;
-    case CK_Nocona:
-    case CK_Core2:
-    case CK_Penryn:
-    case CK_Bonnell:
-    case CK_Silvermont:
-    case CK_Goldmont:
-    case CK_Nehalem:
-    case CK_Westmere:
-    case CK_SandyBridge:
-    case CK_IvyBridge:
-    case CK_Haswell:
-    case CK_Broadwell:
-    case CK_SkylakeClient:
-    case CK_SkylakeServer:
-    case CK_Cannonlake:
-    case CK_KNL:
-    case CK_KNM:
-    case CK_K8:
-    case CK_K8SSE3:
-    case CK_AMDFAM10:
-    case CK_BTVER1:
-    case CK_BTVER2:
-    case CK_BDVER1:
-    case CK_BDVER2:
-    case CK_BDVER3:
-    case CK_BDVER4:
-    case CK_ZNVER1:
-    case CK_x86_64:
-      return true;
-    }
-    llvm_unreachable("Unhandled CPU kind");
-  }
+  bool checkCPUKind(CPUKind Kind) const;
 
   CPUKind getCPUKind(StringRef CPU) const;
 
+  std::string getCPUKindCanonicalName(CPUKind Kind) const;
+
   enum FPMathKind { FP_Default, FP_SSE, FP_387 } FPMath = FP_Default;
 
 public:
@@ -389,6 +163,12 @@
 
   bool validateInputSize(StringRef Constraint, unsigned Size) const override;
 
+  virtual bool
+  checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override;
+
+  virtual bool
+  checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const override;
+
   virtual bool validateOperandSize(StringRef Constraint, unsigned Size) const;
 
   std::string convertConstraint(const char *&Constraint) const override;
@@ -396,8 +176,8 @@
     return "~{dirflag},~{fpsr},~{flags}";
   }
 
-  StringRef getConstraintRegister(const StringRef &Constraint,
-                                  const StringRef &Expression) const override {
+  StringRef getConstraintRegister(StringRef Constraint,
+                                  StringRef Expression) const override {
     StringRef::iterator I, E;
     for (I = Constraint.begin(), E = Constraint.end(); I != E; ++I) {
       if (isalpha(*I))
@@ -432,6 +212,10 @@
     return "";
   }
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
   
@@ -481,10 +265,17 @@
     return checkCPUKind(getCPUKind(Name));
   }
 
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   bool setCPU(const std::string &Name) override {
     return checkCPUKind(CPU = getCPUKind(Name));
   }
 
+  bool supportsMultiVersioning() const override {
+    return getTriple().isOSBinFormatELF();
+  }
+  unsigned multiVersionSortPriority(StringRef Name) const override;
+
   bool setFPMath(StringRef Name) override;
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
@@ -538,9 +329,11 @@
          (1 << TargetInfo::LongDouble));
 
     // x86-32 has atomics up to 8 bytes
-    // FIXME: Check that we actually have cmpxchg8b before setting
-    // MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.)
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    CPUKind Kind = getCPUKind(Opts.CPU);
+    if (Kind >= CK_i586 || Kind == CK_Generic)
+      MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    else if (Kind >= CK_i486)
+      MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -651,11 +444,6 @@
                         ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
                         : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
   }
-
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    WindowsTargetInfo<X86_32TargetInfo>::getTargetDefines(Opts, Builder);
-  }
 };
 
 // x86-32 Windows Visual Studio target
@@ -692,10 +480,7 @@
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder);
-    DefineStd(Builder, "WIN32", Opts);
-    DefineStd(Builder, "WINNT", Opts);
     Builder.defineMacro("_X86_");
-    addMinGWDefines(Opts, Builder);
   }
 };
 
@@ -899,12 +684,6 @@
     IntPtrType = SignedLongLong;
   }
 
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    WindowsTargetInfo<X86_64TargetInfo>::getTargetDefines(Opts, Builder);
-    Builder.defineMacro("_WIN64");
-  }
-
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
@@ -963,18 +742,6 @@
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended();
     HasFloat128 = true;
   }
-
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    WindowsX86_64TargetInfo::getTargetDefines(Opts, Builder);
-    DefineStd(Builder, "WIN64", Opts);
-    Builder.defineMacro("__MINGW64__");
-    addMinGWDefines(Opts, Builder);
-
-    // GCC defines this macro when it is using __gxx_personality_seh0.
-    if (!Opts.SjLjExceptions)
-      Builder.defineMacro("__SEH__");
-  }
 };
 
 // x86-64 Cygwin target
@@ -996,10 +763,6 @@
     DefineStd(Builder, "unix", Opts);
     if (Opts.CPlusPlus)
       Builder.defineMacro("_GNU_SOURCE");
-
-    // GCC defines this macro when it is using __gxx_personality_seh0.
-    if (!Opts.SjLjExceptions)
-      Builder.defineMacro("__SEH__");
   }
 };
 
diff --git a/lib/Basic/VirtualFileSystem.cpp b/lib/Basic/VirtualFileSystem.cpp
index debe5d9..9d44597 100644
--- a/lib/Basic/VirtualFileSystem.cpp
+++ b/lib/Basic/VirtualFileSystem.cpp
@@ -493,7 +493,11 @@
 }
 
 bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
-                                 std::unique_ptr<llvm::MemoryBuffer> Buffer) {
+                                 std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                                 Optional<uint32_t> User,
+                                 Optional<uint32_t> Group,
+                                 Optional<llvm::sys::fs::file_type> Type,
+                                 Optional<llvm::sys::fs::perms> Perms) {
   SmallString<128> Path;
   P.toVector(Path);
 
@@ -509,32 +513,42 @@
     return false;
 
   detail::InMemoryDirectory *Dir = Root.get();
-  auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
+  auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path);
+  const auto ResolvedUser = User.getValueOr(0);
+  const auto ResolvedGroup = Group.getValueOr(0);
+  const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
+  const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
+  // Any intermediate directories we create should be accessible by
+  // the owner, even if Perms says otherwise for the final path.
+  const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
   while (true) {
     StringRef Name = *I;
     detail::InMemoryNode *Node = Dir->getChild(Name);
     ++I;
     if (!Node) {
       if (I == E) {
-        // End of the path, create a new file.
-        // FIXME: expose the status details in the interface.
+        // End of the path, create a new file or directory.
         Status Stat(P.str(), getNextVirtualUniqueID(),
-                    llvm::sys::toTimePoint(ModificationTime), 0, 0,
-                    Buffer->getBufferSize(),
-                    llvm::sys::fs::file_type::regular_file,
-                    llvm::sys::fs::all_all);
-        Dir->addChild(Name, llvm::make_unique<detail::InMemoryFile>(
-                                std::move(Stat), std::move(Buffer)));
+                    llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+                    ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+                    ResolvedPerms);
+        std::unique_ptr<detail::InMemoryNode> Child;
+        if (ResolvedType == sys::fs::file_type::directory_file) {
+          Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
+        } else {
+          Child.reset(new detail::InMemoryFile(std::move(Stat),
+                                               std::move(Buffer)));
+        }
+        Dir->addChild(Name, std::move(Child));
         return true;
       }
 
       // Create a new directory. Use the path up to here.
-      // FIXME: expose the status details in the interface.
       Status Stat(
           StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
-          getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime), 0,
-          0, Buffer->getBufferSize(), llvm::sys::fs::file_type::directory_file,
-          llvm::sys::fs::all_all);
+          getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
+          ResolvedUser, ResolvedGroup, Buffer->getBufferSize(),
+          sys::fs::file_type::directory_file, NewDirectoryPerms);
       Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
           Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
       continue;
@@ -558,10 +572,16 @@
 }
 
 bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
-                                      llvm::MemoryBuffer *Buffer) {
+                                      llvm::MemoryBuffer *Buffer,
+                                      Optional<uint32_t> User,
+                                      Optional<uint32_t> Group,
+                                      Optional<llvm::sys::fs::file_type> Type,
+                                      Optional<llvm::sys::fs::perms> Perms) {
   return addFile(P, ModificationTime,
                  llvm::MemoryBuffer::getMemBuffer(
-                     Buffer->getBuffer(), Buffer->getBufferIdentifier()));
+                     Buffer->getBuffer(), Buffer->getBufferIdentifier()),
+                 std::move(User), std::move(Group), std::move(Type),
+                 std::move(Perms));
 }
 
 static ErrorOr<detail::InMemoryNode *>
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 575506d..fa0461f 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -108,8 +108,6 @@
     virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
                                                    uint64_t Members) const;
 
-    virtual bool shouldSignExtUnsignedType(QualType Ty) const;
-
     bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
                                 uint64_t &Members) const;
 
@@ -137,8 +135,7 @@
 
     bool supportsSwift() const final override { return true; }
 
-    virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                              ArrayRef<llvm::Type*> types,
+    virtual bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> types,
                                               bool asReturnValue) const = 0;
 
     virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 893967a..f2733ce 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -44,13 +45,14 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Coroutines.h"
+#include "llvm/Transforms/GCOVProfiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
@@ -168,7 +170,7 @@
 
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
                                   legacy::PassManagerBase &PM) {
-  PM.add(createBoundsCheckingPass());
+  PM.add(createBoundsCheckingLegacyPass());
 }
 
 static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
@@ -236,6 +238,15 @@
                                           /*Recover*/true));
 }
 
+static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
+                                            legacy::PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper &>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+  bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
+  PM.add(createHWAddressSanitizerPass(Recover));
+}
+
 static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
                                    legacy::PassManagerBase &PM) {
   const PassManagerBuilderWrapper &BuilderWrapper =
@@ -351,21 +362,6 @@
   return static_cast<llvm::CodeModel::Model>(CodeModel);
 }
 
-static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) {
-  // Keep this synced with the equivalent code in
-  // lib/Frontend/CompilerInvocation.cpp
-  llvm::Optional<llvm::Reloc::Model> RM;
-  RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
-      .Case("static", llvm::Reloc::Static)
-      .Case("pic", llvm::Reloc::PIC_)
-      .Case("ropi", llvm::Reloc::ROPI)
-      .Case("rwpi", llvm::Reloc::RWPI)
-      .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
-      .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
-  assert(RM.hasValue() && "invalid PIC model!");
-  return *RM;
-}
-
 static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) {
   if (Action == Backend_EmitObj)
     return TargetMachine::CGFT_ObjectFile;
@@ -423,6 +419,10 @@
 
   if (LangOpts.SjLjExceptions)
     Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
+  if (LangOpts.SEHExceptions)
+    Options.ExceptionModel = llvm::ExceptionHandling::WinEH;
+  if (LangOpts.DWARFExceptions)
+    Options.ExceptionModel = llvm::ExceptionHandling::DwarfCFI;
 
   Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
   Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
@@ -433,7 +433,9 @@
   Options.DataSections = CodeGenOpts.DataSections;
   Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
   Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+  Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS;
   Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+  Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
 
   if (CodeGenOpts.EnableSplitDwarf)
     Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
@@ -456,6 +458,23 @@
       Options.MCOptions.IASSearchPaths.push_back(
           Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
 }
+static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
+  if (CodeGenOpts.DisableGCov)
+    return None;
+  if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes)
+    return None;
+  // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
+  // LLVM's -default-gcov-version flag is set to something invalid.
+  GCOVOptions Options;
+  Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
+  Options.EmitData = CodeGenOpts.EmitGcovArcs;
+  llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version));
+  Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
+  Options.NoRedZone = CodeGenOpts.DisableRedZone;
+  Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+  Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
+  return Options;
+}
 
 void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
                                       legacy::FunctionPassManager &FPM) {
@@ -551,6 +570,13 @@
                            addKernelAddressSanitizerPasses);
   }
 
+  if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addHWAddressSanitizerPasses);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addHWAddressSanitizerPasses);
+  }
+
   if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
     PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
                            addMemorySanitizerPass);
@@ -591,20 +617,8 @@
   if (!CodeGenOpts.RewriteMapFiles.empty())
     addSymbolRewriterPass(CodeGenOpts, &MPM);
 
-  if (!CodeGenOpts.DisableGCov &&
-      (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
-    // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
-    // LLVM's -default-gcov-version flag is set to something invalid.
-    GCOVOptions Options;
-    Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
-    Options.EmitData = CodeGenOpts.EmitGcovArcs;
-    memcpy(Options.Version, CodeGenOpts.CoverageVersion, 4);
-    Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
-    Options.NoRedZone = CodeGenOpts.DisableRedZone;
-    Options.FunctionNamesInData =
-        !CodeGenOpts.CoverageNoFunctionNamesInData;
-    Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
-    MPM.add(createGCOVProfilerPass(Options));
+  if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) {
+    MPM.add(createGCOVProfilerPass(*Options));
     if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
       MPM.add(createStripSymbolsPass(true));
   }
@@ -664,7 +678,7 @@
   Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
   std::string FeaturesStr =
       llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
-  llvm::Reloc::Model RM = getRelocModel(CodeGenOpts);
+  llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
   CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
 
   llvm::TargetOptions Options;
@@ -866,10 +880,10 @@
 
   PassBuilder PB(TM.get(), PGOOpt);
 
-  LoopAnalysisManager LAM;
-  FunctionAnalysisManager FAM;
-  CGSCCAnalysisManager CGAM;
-  ModuleAnalysisManager MAM;
+  LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager);
+  FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager);
+  CGSCCAnalysisManager CGAM(CodeGenOpts.DebugPassManager);
+  ModuleAnalysisManager MAM(CodeGenOpts.DebugPassManager);
 
   // Register the AA manager first so that our version is the one used.
   FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); });
@@ -896,9 +910,18 @@
     bool IsLTO = CodeGenOpts.PrepareForLTO;
 
     if (CodeGenOpts.OptimizationLevel == 0) {
+      if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+        MPM.addPass(GCOVProfilerPass(*Options));
+
       // Build a minimal pipeline based on the semantics required by Clang,
       // which is just that always inlining occurs.
       MPM.addPass(AlwaysInlinerPass());
+
+      // At -O0 we directly run necessary sanitizer passes.
+      if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
+        MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
+
+      // Lastly, add a semantically necessary pass for ThinLTO.
       if (IsThinLTO)
         MPM.addPass(NameAnonGlobalPass());
     } else {
@@ -906,6 +929,18 @@
       // configure the pipeline.
       PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
 
+      // Register callbacks to schedule sanitizer passes at the appropriate part of
+      // the pipeline.
+      if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
+        PB.registerScalarOptimizerLateEPCallback(
+            [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+              FPM.addPass(BoundsCheckingPass());
+            });
+      if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
+        PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) {
+          MPM.addPass(GCOVProfilerPass(*Options));
+        });
+
       if (IsThinLTO) {
         MPM = PB.buildThinLTOPreLinkDefaultPipeline(
             Level, CodeGenOpts.DebugPassManager);
@@ -991,16 +1026,22 @@
 
   // The bitcode file may contain multiple modules, we want the one that is
   // marked as being the ThinLTO module.
-  for (BitcodeModule &BM : *BMsOrErr) {
-    Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
-    if (LTOInfo && LTOInfo->IsThinLTO)
-      return BM;
-  }
+  if (const BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr))
+    return *Bm;
 
   return make_error<StringError>("Could not find module summary",
                                  inconvertibleErrorCode());
 }
 
+BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
+  for (BitcodeModule &BM : BMs) {
+    Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+    if (LTOInfo && LTOInfo->IsThinLTO)
+      return &BM;
+  }
+  return nullptr;
+}
+
 static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
                               const HeaderSearchOptions &HeaderOpts,
                               const CodeGenOptions &CGOpts,
@@ -1068,11 +1109,12 @@
   Conf.CPU = TOpts.CPU;
   Conf.CodeModel = getCodeModel(CGOpts);
   Conf.MAttrs = TOpts.Features;
-  Conf.RelocModel = getRelocModel(CGOpts);
+  Conf.RelocModel = CGOpts.RelocationModel;
   Conf.CGOptLevel = getCGOptLevel(CGOpts);
   initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
   Conf.SampleProfile = std::move(SampleProfile);
   Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
+  Conf.DebugPassManager = CGOpts.DebugPassManager;
   switch (Action) {
   case Backend_EmitNothing:
     Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
@@ -1087,7 +1129,7 @@
     break;
   case Backend_EmitBC:
     Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
-      WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists);
+      WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists);
       return false;
     };
     break;
@@ -1112,6 +1154,7 @@
                               const llvm::DataLayout &TDesc, Module *M,
                               BackendAction Action,
                               std::unique_ptr<raw_pwrite_stream> OS) {
+  std::unique_ptr<llvm::Module> EmptyModule;
   if (!CGOpts.ThinLTOIndexFile.empty()) {
     // If we are performing a ThinLTO importing compile, load the function index
     // into memory and pass it into runThinLTOBackend, which will run the
@@ -1129,11 +1172,22 @@
     // A null CombinedIndex means we should skip ThinLTO compilation
     // (LLVM will optionally ignore empty index files, returning null instead
     // of an error).
-    bool DoThinLTOBackend = CombinedIndex != nullptr;
-    if (DoThinLTOBackend) {
-      runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
-                        LOpts, std::move(OS), CGOpts.SampleProfileFile, Action);
-      return;
+    if (CombinedIndex) {
+      if (!CombinedIndex->skipModuleByDistributedBackend()) {
+        runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
+                          LOpts, std::move(OS), CGOpts.SampleProfileFile,
+                          Action);
+        return;
+      }
+      // Distributed indexing detected that nothing from the module is needed
+      // for the final linking. So we can skip the compilation. We sill need to
+      // output an empty object file to make sure that a linker does not fail
+      // trying to read it. Also for some features, like CFI, we must skip
+      // the compilation as CombinedIndex does not contain all required
+      // information.
+      EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext());
+      EmptyModule->setTargetTriple(M->getTargetTriple());
+      M = EmptyModule.get();
     }
   }
 
@@ -1217,7 +1271,7 @@
       // If the input is LLVM Assembly, bitcode is produced by serializing
       // the module. Use-lists order need to be perserved in this case.
       llvm::raw_string_ostream OS(Data);
-      llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+      llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true);
       ModuleData =
           ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
     } else
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index d90c3a5..3d24e1f 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -573,7 +573,7 @@
 
   case AtomicExpr::AO__atomic_add_fetch:
     PostOp = llvm::Instruction::Add;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_add:
   case AtomicExpr::AO__opencl_atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_add:
@@ -582,7 +582,7 @@
 
   case AtomicExpr::AO__atomic_sub_fetch:
     PostOp = llvm::Instruction::Sub;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_sub:
   case AtomicExpr::AO__opencl_atomic_fetch_sub:
   case AtomicExpr::AO__atomic_fetch_sub:
@@ -601,7 +601,7 @@
 
   case AtomicExpr::AO__atomic_and_fetch:
     PostOp = llvm::Instruction::And;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__opencl_atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_and:
@@ -610,7 +610,7 @@
 
   case AtomicExpr::AO__atomic_or_fetch:
     PostOp = llvm::Instruction::Or;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__opencl_atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_or:
@@ -619,7 +619,7 @@
 
   case AtomicExpr::AO__atomic_xor_fetch:
     PostOp = llvm::Instruction::Xor;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_xor:
   case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_xor:
@@ -628,7 +628,7 @@
 
   case AtomicExpr::AO__atomic_nand_fetch:
     PostOp = llvm::Instruction::And; // the NOT is special cased below
-  // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__atomic_fetch_nand:
     Op = llvm::AtomicRMWInst::Nand;
     break;
@@ -828,7 +828,7 @@
       EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
       break;
     }
-    // Fall through.
+      LLVM_FALLTHROUGH;
   case AtomicExpr::AO__atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_sub:
   case AtomicExpr::AO__atomic_add_fetch:
@@ -1035,7 +1035,7 @@
     // T __atomic_fetch_add_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_add_fetch:
       PostOp = llvm::Instruction::Add;
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
@@ -1047,7 +1047,7 @@
     // T __atomic_fetch_and_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_and_fetch:
       PostOp = llvm::Instruction::And;
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__c11_atomic_fetch_and:
     case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
@@ -1059,7 +1059,7 @@
     // T __atomic_fetch_or_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_or_fetch:
       PostOp = llvm::Instruction::Or;
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__c11_atomic_fetch_or:
     case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
@@ -1071,7 +1071,7 @@
     // T __atomic_fetch_sub_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_sub_fetch:
       PostOp = llvm::Instruction::Sub;
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
@@ -1083,7 +1083,7 @@
     // T __atomic_fetch_xor_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_xor_fetch:
       PostOp = llvm::Instruction::Xor;
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__c11_atomic_fetch_xor:
     case AtomicExpr::AO__opencl_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
@@ -1109,7 +1109,7 @@
     // T __atomic_fetch_nand_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_nand_fetch:
       PostOp = llvm::Instruction::And; // the NOT is special cased below
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AtomicExpr::AO__atomic_fetch_nand:
       LibCallName = "__atomic_fetch_nand";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -1508,11 +1508,12 @@
   // which means that the caller is responsible for having zeroed
   // any padding.  Just do an aggregate copy of that type.
   if (rvalue.isAggregate()) {
-    CGF.EmitAggregateCopy(getAtomicAddress(),
-                          rvalue.getAggregateAddress(),
-                          getAtomicType(),
-                          (rvalue.isVolatileQualified()
-                           || LVal.isVolatileQualified()));
+    LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType());
+    LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(),
+                                    getAtomicType());
+    bool IsVolatile = rvalue.isVolatileQualified() ||
+                      LVal.isVolatileQualified();
+    CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), IsVolatile);
     return;
   }
 
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 2915dab..6695860 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -477,6 +477,14 @@
       info.NeedsCopyDispose = true;
       info.HasCXXObject = true;
 
+    // So do C structs that require non-trivial copy construction or
+    // destruction.
+    } else if (variable->getType().isNonTrivialToPrimitiveCopy() ==
+                   QualType::PCK_Struct ||
+               variable->getType().isDestructedType() ==
+                   QualType::DK_nontrivial_c_struct) {
+      info.NeedsCopyDispose = true;
+
     // And so do types with destructors.
     } else if (CGM.getLangOpts().CPlusPlus) {
       if (const CXXRecordDecl *record =
@@ -705,11 +713,8 @@
 /// kind of cleanup object is a BlockDecl*.
 void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
   assert(E->getNumObjects() != 0);
-  ArrayRef<ExprWithCleanups::CleanupObject> cleanups = E->getObjects();
-  for (ArrayRef<ExprWithCleanups::CleanupObject>::iterator
-         i = cleanups.begin(), e = cleanups.end(); i != e; ++i) {
-    enterBlockScope(*this, *i);
-  }
+  for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
+    enterBlockScope(*this, C);
 }
 
 /// Find the layout for the given block in a linked list and remove it.
@@ -740,27 +745,19 @@
 }
 
 /// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
   // If the block has no captures, we won't have a pre-computed
   // layout for it.
   if (!blockExpr->getBlockDecl()->hasCaptures()) {
     // The block literal is emitted as a global variable, and the block invoke
     // function has to be extracted from its initializer.
     if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
-      if (InvokeF) {
-        auto *GV = cast<llvm::GlobalVariable>(
-            cast<llvm::Constant>(Block)->stripPointerCasts());
-        auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
-        *InvokeF = cast<llvm::Function>(
-            BlockInit->getAggregateElement(2)->stripPointerCasts());
-      }
       return Block;
     }
     CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
     computeBlockInfo(CGM, this, blockInfo);
     blockInfo.BlockExpression = blockExpr;
-    return EmitBlockLiteral(blockInfo, InvokeF);
+    return EmitBlockLiteral(blockInfo);
   }
 
   // Find the block info for this block and take ownership of it.
@@ -769,11 +766,10 @@
                                          blockExpr->getBlockDecl()));
 
   blockInfo->BlockExpression = blockExpr;
-  return EmitBlockLiteral(*blockInfo, InvokeF);
+  return EmitBlockLiteral(*blockInfo);
 }
 
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
-                                               llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
   auto GenVoidPtrTy =
       IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
@@ -784,10 +780,10 @@
       8);
   // Using the computed layout, generate the actual block function.
   bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
-  auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
+  CodeGenFunction BlockCGF{CGM, true};
+  BlockCGF.SanOpts = SanOpts;
+  auto *InvokeFn = BlockCGF.GenerateBlockFunction(
       CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  if (InvokeF)
-    *InvokeF = InvokeFn;
   auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
@@ -1022,6 +1018,11 @@
   llvm::Value *result = Builder.CreatePointerCast(
       blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
 
+  if (IsOpenCL) {
+    CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+                                           result);
+  }
+
   return result;
 }
 
@@ -1189,8 +1190,8 @@
                                  variable->getName());
   }
 
-  if (auto refType = capture.fieldType()->getAs<ReferenceType>())
-    addr = EmitLoadOfReference(addr, refType);
+  if (capture.fieldType()->isReferenceType())
+    addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType()));
 
   return addr;
 }
@@ -1285,6 +1286,10 @@
   llvm::Constant *Result =
       llvm::ConstantExpr::getPointerCast(literal, RequiredType);
   CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+  if (CGM.getContext().getLangOpts().OpenCL)
+    CGM.getOpenCLRuntime().recordBlockInfo(
+        blockInfo.BlockExpression,
+        cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
   return Result;
 }
 
@@ -1293,20 +1298,17 @@
                                                llvm::Value *arg) {
   assert(BlockInfo && "not emitting prologue of block invocation function?!");
 
-  llvm::Value *localAddr = nullptr;
-  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-    // Allocate a stack slot to let the debug info survive the RA.
-    Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr");
-    Builder.CreateStore(arg, alloc);
-    localAddr = Builder.CreateLoad(alloc);
-  }
-
+  // Allocate a stack slot like for any local variable to guarantee optimal
+  // debug info at -O0. The mem2reg pass will eliminate it when optimizing.
+  Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr");
+  Builder.CreateStore(arg, alloc);
   if (CGDebugInfo *DI = getDebugInfo()) {
     if (CGM.getCodeGenOpts().getDebugInfo() >=
         codegenoptions::LimitedDebugInfo) {
       DI->setLocation(D->getLocation());
-      DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum,
-                                               localAddr, Builder);
+      DI->EmitDeclareOfBlockLiteralArgVariable(
+          *BlockInfo, D->getName(), argNum,
+          cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
     }
   }
 
@@ -1480,8 +1482,8 @@
         const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
         if (capture.isConstant()) {
           auto addr = LocalDeclMap.find(variable)->second;
-          DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
-                                        Builder);
+          (void)DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
+                                              Builder);
           continue;
         }
 
@@ -1514,6 +1516,7 @@
   CXXRecord, // Copy or destroy
   ARCWeak,
   ARCStrong,
+  NonTrivialCStruct,
   BlockObject, // Assign or release
   None
 };
@@ -1549,39 +1552,47 @@
       Flags |= BLOCK_FIELD_IS_WEAK;
     return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
   }
-  if (!T->isObjCRetainableType())
-    // For all other types, the memcpy is fine.
-    return std::make_pair(BlockCaptureEntityKind::None, Flags);
 
   Flags = BLOCK_FIELD_IS_OBJECT;
   bool isBlockPointer = T->isBlockPointerType();
   if (isBlockPointer)
     Flags = BLOCK_FIELD_IS_BLOCK;
 
-  // Special rules for ARC captures:
-  Qualifiers QS = T.getQualifiers();
-
-  // We need to register __weak direct captures with the runtime.
-  if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
-    return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
-  // We need to retain the copied value for __strong direct captures.
-  if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) {
-    // If it's a block pointer, we have to copy the block and
-    // assign that to the destination pointer, so we might as
-    // well use _Block_object_assign.  Otherwise we can avoid that.
+  switch (T.isNonTrivialToPrimitiveCopy()) {
+  case QualType::PCK_Struct:
+    return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+                          BlockFieldFlags());
+  case QualType::PCK_ARCStrong:
+    // We need to retain the copied value for __strong direct captures.
+    // If it's a block pointer, we have to copy the block and assign that to
+    // the destination pointer, so we might as well use _Block_object_assign.
+    // Otherwise we can avoid that.
     return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong
                                           : BlockCaptureEntityKind::BlockObject,
                           Flags);
+  case QualType::PCK_Trivial:
+  case QualType::PCK_VolatileTrivial: {
+    if (!T->isObjCRetainableType())
+      // For all other types, the memcpy is fine.
+      return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+
+    // Special rules for ARC captures:
+    Qualifiers QS = T.getQualifiers();
+
+    // We need to register __weak direct captures with the runtime.
+    if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+      return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
+
+    // Non-ARC captures of retainable pointers are strong and
+    // therefore require a call to _Block_object_assign.
+    if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+      return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+    // Otherwise the memcpy is fine.
+    return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
   }
-
-  // Non-ARC captures of retainable pointers are strong and
-  // therefore require a call to _Block_object_assign.
-  if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
-    return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
-
-  // Otherwise the memcpy is fine.
-  return std::make_pair(BlockCaptureEntityKind::None, Flags);
+  }
+  llvm_unreachable("after exhaustive PrimitiveCopyKind switch");
 }
 
 /// Find the set of block captures that need to be explicitly copied or destroy.
@@ -1645,12 +1656,10 @@
                                           false,
                                           false);
 
-  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
-  auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
-  // Create a scope with an artificial location for the body of this function.
-  auto AL = ApplyDebugLocation::CreateArtificial(*this);
+  ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
   Address src = GetAddrOfLocalVar(&SrcDecl);
@@ -1680,6 +1689,13 @@
       EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
     } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
       EmitARCCopyWeak(dstField, srcField);
+    // If this is a C struct that requires non-trivial copy construction, emit a
+    // call to its copy constructor.
+    } else if (CopiedCapture.Kind ==
+               BlockCaptureEntityKind::NonTrivialCStruct) {
+      QualType varType = CI.getVariable()->getType();
+      callCStructCopyConstructor(MakeAddrLValue(dstField, varType),
+                                 MakeAddrLValue(srcField, varType));
     } else {
       llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
       if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
@@ -1735,50 +1751,51 @@
   return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
 }
 
+static BlockFieldFlags
+getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
+                                       QualType T) {
+  BlockFieldFlags Flags = BLOCK_FIELD_IS_OBJECT;
+  if (T->isBlockPointerType())
+    Flags = BLOCK_FIELD_IS_BLOCK;
+  return Flags;
+}
+
 static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
 computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
                                   const LangOptions &LangOpts) {
-  BlockFieldFlags Flags;
   if (CI.isByRef()) {
-    Flags = BLOCK_FIELD_IS_BYREF;
+    BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
     if (T.isObjCGCWeak())
       Flags |= BLOCK_FIELD_IS_WEAK;
     return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
   }
 
-  if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) {
-    if (Record->hasTrivialDestructor())
-      return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+  switch (T.isDestructedType()) {
+  case QualType::DK_cxx_destructor:
     return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
+  case QualType::DK_objc_strong_lifetime:
+    // Use objc_storeStrong for __strong direct captures; the
+    // dynamic tools really like it when we do this.
+    return std::make_pair(BlockCaptureEntityKind::ARCStrong,
+                          getBlockFieldFlagsForObjCObjectPointer(CI, T));
+  case QualType::DK_objc_weak_lifetime:
+    // Support __weak direct captures.
+    return std::make_pair(BlockCaptureEntityKind::ARCWeak,
+                          getBlockFieldFlagsForObjCObjectPointer(CI, T));
+  case QualType::DK_nontrivial_c_struct:
+    return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct,
+                          BlockFieldFlags());
+  case QualType::DK_none: {
+    // Non-ARC captures are strong, and we need to use _Block_object_dispose.
+    if (T->isObjCRetainableType() && !T.getQualifiers().hasObjCLifetime() &&
+        !LangOpts.ObjCAutoRefCount)
+      return std::make_pair(BlockCaptureEntityKind::BlockObject,
+                            getBlockFieldFlagsForObjCObjectPointer(CI, T));
+    // Otherwise, we have nothing to do.
+    return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
   }
-
-  // Other types don't need to be destroy explicitly.
-  if (!T->isObjCRetainableType())
-    return std::make_pair(BlockCaptureEntityKind::None, Flags);
-
-  Flags = BLOCK_FIELD_IS_OBJECT;
-  if (T->isBlockPointerType())
-    Flags = BLOCK_FIELD_IS_BLOCK;
-
-  // Special rules for ARC captures.
-  Qualifiers QS = T.getQualifiers();
-
-  // Use objc_storeStrong for __strong direct captures; the
-  // dynamic tools really like it when we do this.
-  if (QS.getObjCLifetime() == Qualifiers::OCL_Strong)
-    return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags);
-
-  // Support __weak direct captures.
-  if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
-    return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
-
-  // Non-ARC captures are strong, and we need to use
-  // _Block_object_dispose.
-  if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount)
-    return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
-
-  // Otherwise, we have nothing to do.
-  return std::make_pair(BlockCaptureEntityKind::None, Flags);
+  }
+  llvm_unreachable("after exhaustive DestructionKind switch");
 }
 
 /// Generate the destroy-helper function for a block closure object:
@@ -1817,12 +1834,10 @@
                                           nullptr, SC_Static,
                                           false, false);
 
-  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
-  // Create a scope with an artificial location for the body of this function.
-  auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
-  auto AL = ApplyDebugLocation::CreateArtificial(*this);
+  ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
@@ -1858,6 +1873,13 @@
     } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
       EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
 
+    // If this is a C struct that requires non-trivial destruction, emit a call
+    // to its destructor.
+    } else if (DestroyedCapture.Kind ==
+               BlockCaptureEntityKind::NonTrivialCStruct) {
+      QualType varType = CI.getVariable()->getType();
+      pushDestroy(varType.isDestructedType(), srcField, varType);
+
     // Otherwise we call _Block_object_dispose.  It wouldn't be too
     // hard to just emit this as a cleanup if we wanted to make sure
     // that things were done in reverse.
@@ -2025,6 +2047,36 @@
     id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
   }
 };
+
+/// Emits the copy/dispose helpers for a __block variable that is a non-trivial
+/// C struct.
+class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers {
+  QualType VarType;
+
+public:
+  NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type)
+    : BlockByrefHelpers(alignment), VarType(type) {}
+
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
+    CGF.callCStructMoveConstructor(CGF.MakeAddrLValue(destField, VarType),
+                                   CGF.MakeAddrLValue(srcField, VarType));
+  }
+
+  bool needsDispose() const override {
+    return VarType.isDestructedType();
+  }
+
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
+    EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
+    CGF.pushDestroy(VarType.isDestructedType(), field, VarType);
+    CGF.PopCleanupBlocks(cleanupDepth);
+  }
+
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
+    id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
+  }
+};
 } // end anonymous namespace
 
 static llvm::Constant *
@@ -2064,7 +2116,7 @@
                                           SC_Static,
                                           false, false);
 
-  CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+  CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
   CGF.StartFunction(FD, R, Fn, FI, args);
 
@@ -2138,7 +2190,7 @@
                                           SC_Static,
                                           false, false);
 
-  CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+  CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
   CGF.StartFunction(FD, R, Fn, FI, args);
 
@@ -2210,6 +2262,13 @@
         CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
   }
 
+  // If type is a non-trivial C struct type that is non-trivial to
+  // destructly move or destroy, build the copy and dispose helpers.
+  if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct ||
+      type.isDestructedType() == QualType::DK_nontrivial_c_struct)
+    return ::buildByrefHelpers(
+        CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type));
+
   // Otherwise, if we don't have a retainable type, there's nothing to do.
   // that the runtime does extra copies.
   if (!type->isObjCRetainableType()) return nullptr;
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index 61fe4aa..3a42248 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -258,23 +258,23 @@
   using CGBuilderBaseTy::CreateMemCpy;
   llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size,
                                bool IsVolatile = false) {
-    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
-    return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
-                        Align.getQuantity(), IsVolatile);
+    return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+                        Src.getPointer(), Src.getAlignment().getQuantity(),
+                        Size,IsVolatile);
   }
   llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size,
                                bool IsVolatile = false) {
-    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
-    return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
-                        Align.getQuantity(), IsVolatile);
+    return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+                        Src.getPointer(), Src.getAlignment().getQuantity(),
+                        Size, IsVolatile);
   }
 
   using CGBuilderBaseTy::CreateMemMove;
   llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
                                 bool IsVolatile = false) {
-    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
-    return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size,
-                         Align.getQuantity(), IsVolatile);
+    return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(),
+                         Src.getPointer(), Src.getAlignment().getQuantity(),
+                         Size, IsVolatile);
   }
 
   using CGBuilderBaseTy::CreateMemSet;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 1de7c6e..49dfc60 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -30,8 +30,9 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
-#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/TargetParser.h"
 #include <sstream>
 
 using namespace clang;
@@ -838,6 +839,97 @@
   return RValue::get(BufAddr.getPointer());
 }
 
+/// Determine if a binop is a checked mixed-sign multiply we can specialize.
+static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
+                                       WidthAndSignedness Op1Info,
+                                       WidthAndSignedness Op2Info,
+                                       WidthAndSignedness ResultInfo) {
+  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
+         Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
+         Op1Info.Signed != Op2Info.Signed;
+}
+
+/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
+/// the generic checked-binop irgen.
+static RValue
+EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
+                             WidthAndSignedness Op1Info, const clang::Expr *Op2,
+                             WidthAndSignedness Op2Info,
+                             const clang::Expr *ResultArg, QualType ResultQTy,
+                             WidthAndSignedness ResultInfo) {
+  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
+                                    Op2Info, ResultInfo) &&
+         "Not a mixed-sign multipliction we can specialize");
+
+  // Emit the signed and unsigned operands.
+  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
+  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
+  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
+  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
+
+  llvm::Type *OpTy = Signed->getType();
+  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
+  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
+  llvm::Type *ResTy = ResultPtr.getElementType();
+
+  // Take the absolute value of the signed operand.
+  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
+  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
+  llvm::Value *AbsSigned =
+      CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
+
+  // Perform a checked unsigned multiplication.
+  llvm::Value *UnsignedOverflow;
+  llvm::Value *UnsignedResult =
+      EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
+                            Unsigned, UnsignedOverflow);
+
+  llvm::Value *Overflow, *Result;
+  if (ResultInfo.Signed) {
+    // Signed overflow occurs if the result is greater than INT_MAX or lesser
+    // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
+    auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
+                      .zextOrSelf(Op1Info.Width);
+    llvm::Value *MaxResult =
+        CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
+                              CGF.Builder.CreateZExt(IsNegative, OpTy));
+    llvm::Value *SignedOverflow =
+        CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
+    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
+
+    // Prepare the signed result (possibly by negating it).
+    llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
+    llvm::Value *SignedResult =
+        CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
+    Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
+  } else {
+    // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
+    llvm::Value *Underflow = CGF.Builder.CreateAnd(
+        IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
+    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
+    if (ResultInfo.Width < Op1Info.Width) {
+      auto IntMax =
+          llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
+      llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
+          UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
+      Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
+    }
+
+    // Negate the product if it would be negative in infinite precision.
+    Result = CGF.Builder.CreateSelect(
+        IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
+
+    Result = CGF.Builder.CreateTrunc(Result, ResTy);
+  }
+  assert(Overflow && Result && "Missing overflow or result");
+
+  bool isVolatile =
+      ResultArg->getType()->getPointeeType().isVolatileQualified();
+  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
+                          isVolatile);
+  return RValue::get(Overflow);
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                         unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -853,8 +945,195 @@
                                                Result.Val.getFloat()));
   }
 
+  // There are LLVM math intrinsics/instructions corresponding to math library
+  // functions except the LLVM op will never set errno while the math library
+  // might. Also, math builtins have the same semantics as their math library
+  // twins. Thus, we can transform math library and builtin calls to their
+  // LLVM counterparts if the call is marked 'const' (known to never set errno).
+  if (FD->hasAttr<ConstAttr>()) {
+    switch (BuiltinID) {
+    case Builtin::BIceil:
+    case Builtin::BIceilf:
+    case Builtin::BIceill:
+    case Builtin::BI__builtin_ceil:
+    case Builtin::BI__builtin_ceilf:
+    case Builtin::BI__builtin_ceill:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+
+    case Builtin::BIcopysign:
+    case Builtin::BIcopysignf:
+    case Builtin::BIcopysignl:
+    case Builtin::BI__builtin_copysign:
+    case Builtin::BI__builtin_copysignf:
+    case Builtin::BI__builtin_copysignl:
+    case Builtin::BI__builtin_copysignf128:
+      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
+
+    case Builtin::BIcos:
+    case Builtin::BIcosf:
+    case Builtin::BIcosl:
+    case Builtin::BI__builtin_cos:
+    case Builtin::BI__builtin_cosf:
+    case Builtin::BI__builtin_cosl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
+
+    case Builtin::BIexp:
+    case Builtin::BIexpf:
+    case Builtin::BIexpl:
+    case Builtin::BI__builtin_exp:
+    case Builtin::BI__builtin_expf:
+    case Builtin::BI__builtin_expl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
+
+    case Builtin::BIexp2:
+    case Builtin::BIexp2f:
+    case Builtin::BIexp2l:
+    case Builtin::BI__builtin_exp2:
+    case Builtin::BI__builtin_exp2f:
+    case Builtin::BI__builtin_exp2l:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
+
+    case Builtin::BIfabs:
+    case Builtin::BIfabsf:
+    case Builtin::BIfabsl:
+    case Builtin::BI__builtin_fabs:
+    case Builtin::BI__builtin_fabsf:
+    case Builtin::BI__builtin_fabsl:
+    case Builtin::BI__builtin_fabsf128:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
+
+    case Builtin::BIfloor:
+    case Builtin::BIfloorf:
+    case Builtin::BIfloorl:
+    case Builtin::BI__builtin_floor:
+    case Builtin::BI__builtin_floorf:
+    case Builtin::BI__builtin_floorl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+
+    case Builtin::BIfma:
+    case Builtin::BIfmaf:
+    case Builtin::BIfmal:
+    case Builtin::BI__builtin_fma:
+    case Builtin::BI__builtin_fmaf:
+    case Builtin::BI__builtin_fmal:
+      return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
+
+    case Builtin::BIfmax:
+    case Builtin::BIfmaxf:
+    case Builtin::BIfmaxl:
+    case Builtin::BI__builtin_fmax:
+    case Builtin::BI__builtin_fmaxf:
+    case Builtin::BI__builtin_fmaxl:
+      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+
+    case Builtin::BIfmin:
+    case Builtin::BIfminf:
+    case Builtin::BIfminl:
+    case Builtin::BI__builtin_fmin:
+    case Builtin::BI__builtin_fminf:
+    case Builtin::BI__builtin_fminl:
+      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+
+    // fmod() is a special-case. It maps to the frem instruction rather than an
+    // LLVM intrinsic.
+    case Builtin::BIfmod:
+    case Builtin::BIfmodf:
+    case Builtin::BIfmodl:
+    case Builtin::BI__builtin_fmod:
+    case Builtin::BI__builtin_fmodf:
+    case Builtin::BI__builtin_fmodl: {
+      Value *Arg1 = EmitScalarExpr(E->getArg(0));
+      Value *Arg2 = EmitScalarExpr(E->getArg(1));
+      return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
+    }
+
+    case Builtin::BIlog:
+    case Builtin::BIlogf:
+    case Builtin::BIlogl:
+    case Builtin::BI__builtin_log:
+    case Builtin::BI__builtin_logf:
+    case Builtin::BI__builtin_logl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
+
+    case Builtin::BIlog10:
+    case Builtin::BIlog10f:
+    case Builtin::BIlog10l:
+    case Builtin::BI__builtin_log10:
+    case Builtin::BI__builtin_log10f:
+    case Builtin::BI__builtin_log10l:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
+
+    case Builtin::BIlog2:
+    case Builtin::BIlog2f:
+    case Builtin::BIlog2l:
+    case Builtin::BI__builtin_log2:
+    case Builtin::BI__builtin_log2f:
+    case Builtin::BI__builtin_log2l:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
+
+    case Builtin::BInearbyint:
+    case Builtin::BInearbyintf:
+    case Builtin::BInearbyintl:
+    case Builtin::BI__builtin_nearbyint:
+    case Builtin::BI__builtin_nearbyintf:
+    case Builtin::BI__builtin_nearbyintl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+
+    case Builtin::BIpow:
+    case Builtin::BIpowf:
+    case Builtin::BIpowl:
+    case Builtin::BI__builtin_pow:
+    case Builtin::BI__builtin_powf:
+    case Builtin::BI__builtin_powl:
+      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
+
+    case Builtin::BIrint:
+    case Builtin::BIrintf:
+    case Builtin::BIrintl:
+    case Builtin::BI__builtin_rint:
+    case Builtin::BI__builtin_rintf:
+    case Builtin::BI__builtin_rintl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+
+    case Builtin::BIround:
+    case Builtin::BIroundf:
+    case Builtin::BIroundl:
+    case Builtin::BI__builtin_round:
+    case Builtin::BI__builtin_roundf:
+    case Builtin::BI__builtin_roundl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+
+    case Builtin::BIsin:
+    case Builtin::BIsinf:
+    case Builtin::BIsinl:
+    case Builtin::BI__builtin_sin:
+    case Builtin::BI__builtin_sinf:
+    case Builtin::BI__builtin_sinl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
+
+    case Builtin::BIsqrt:
+    case Builtin::BIsqrtf:
+    case Builtin::BIsqrtl:
+    case Builtin::BI__builtin_sqrt:
+    case Builtin::BI__builtin_sqrtf:
+    case Builtin::BI__builtin_sqrtl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
+
+    case Builtin::BItrunc:
+    case Builtin::BItruncf:
+    case Builtin::BItruncl:
+    case Builtin::BI__builtin_trunc:
+    case Builtin::BI__builtin_truncf:
+    case Builtin::BI__builtin_truncl:
+      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+
+    default:
+      break;
+    }
+  }
+
   switch (BuiltinID) {
-  default: break;  // Handle intrinsics and libm functions below.
+  default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
   case Builtin::BI__builtin___NSStringMakeConstantString:
     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
@@ -893,64 +1172,6 @@
 
     return RValue::get(Result);
   }
-  case Builtin::BI__builtin_fabs:
-  case Builtin::BI__builtin_fabsf:
-  case Builtin::BI__builtin_fabsl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
-  }
-  case Builtin::BI__builtin_fmod:
-  case Builtin::BI__builtin_fmodf:
-  case Builtin::BI__builtin_fmodl: {
-    Value *Arg1 = EmitScalarExpr(E->getArg(0));
-    Value *Arg2 = EmitScalarExpr(E->getArg(1));
-    Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
-    return RValue::get(Result);
-  }
-  case Builtin::BI__builtin_copysign:
-  case Builtin::BI__builtin_copysignf:
-  case Builtin::BI__builtin_copysignl: {
-    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
-  }
-  case Builtin::BI__builtin_ceil:
-  case Builtin::BI__builtin_ceilf:
-  case Builtin::BI__builtin_ceill: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
-  }
-  case Builtin::BI__builtin_floor:
-  case Builtin::BI__builtin_floorf:
-  case Builtin::BI__builtin_floorl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
-  }
-  case Builtin::BI__builtin_trunc:
-  case Builtin::BI__builtin_truncf:
-  case Builtin::BI__builtin_truncl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
-  }
-  case Builtin::BI__builtin_rint:
-  case Builtin::BI__builtin_rintf:
-  case Builtin::BI__builtin_rintl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
-  }
-  case Builtin::BI__builtin_nearbyint:
-  case Builtin::BI__builtin_nearbyintf:
-  case Builtin::BI__builtin_nearbyintl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
-  }
-  case Builtin::BI__builtin_round:
-  case Builtin::BI__builtin_roundf:
-  case Builtin::BI__builtin_roundl: {
-    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
-  }
-  case Builtin::BI__builtin_fmin:
-  case Builtin::BI__builtin_fminf:
-  case Builtin::BI__builtin_fminl: {
-    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
-  }
-  case Builtin::BI__builtin_fmax:
-  case Builtin::BI__builtin_fmaxf:
-  case Builtin::BI__builtin_fmaxl: {
-    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
-  }
   case Builtin::BI__builtin_conj:
   case Builtin::BI__builtin_conjf:
   case Builtin::BI__builtin_conjl: {
@@ -1217,14 +1438,7 @@
   case Builtin::BI__debugbreak:
     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
   case Builtin::BI__builtin_unreachable: {
-    if (SanOpts.has(SanitizerKind::Unreachable)) {
-      SanitizerScope SanScope(this);
-      EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
-                               SanitizerKind::Unreachable),
-                SanitizerHandler::BuiltinUnreachable,
-                EmitCheckSourceLocation(E->getExprLoc()), None);
-    } else
-      Builder.CreateUnreachable();
+    EmitUnreachable(E->getExprLoc());
 
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("unreachable.cont"));
@@ -1431,7 +1645,7 @@
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
-    return RValue::get(Dest.getPointer());
+    return RValue::get(nullptr);
   }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy: {
@@ -1527,6 +1741,63 @@
     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
     return RValue::get(Dest.getPointer());
   }
+  case Builtin::BI__builtin_wmemcmp: {
+    // The MSVC runtime library does not provide a definition of wmemcmp, so we
+    // need an inline implementation.
+    if (!getTarget().getTriple().isOSMSVCRT())
+      break;
+
+    llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
+
+    Value *Dst = EmitScalarExpr(E->getArg(0));
+    Value *Src = EmitScalarExpr(E->getArg(1));
+    Value *Size = EmitScalarExpr(E->getArg(2));
+
+    BasicBlock *Entry = Builder.GetInsertBlock();
+    BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
+    BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
+    BasicBlock *Next = createBasicBlock("wmemcmp.next");
+    BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
+    Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
+    Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
+
+    EmitBlock(CmpGT);
+    PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
+    DstPhi->addIncoming(Dst, Entry);
+    PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
+    SrcPhi->addIncoming(Src, Entry);
+    PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
+    SizePhi->addIncoming(Size, Entry);
+    CharUnits WCharAlign =
+        getContext().getTypeAlignInChars(getContext().WCharTy);
+    Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
+    Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
+    Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
+    Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
+
+    EmitBlock(CmpLT);
+    Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
+    Builder.CreateCondBr(DstLtSrc, Exit, Next);
+
+    EmitBlock(Next);
+    Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
+    Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
+    Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
+    Value *NextSizeEq0 =
+        Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
+    Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
+    DstPhi->addIncoming(NextDst, Next);
+    SrcPhi->addIncoming(NextSrc, Next);
+    SizePhi->addIncoming(NextSize, Next);
+
+    EmitBlock(Exit);
+    PHINode *Ret = Builder.CreatePHI(IntTy, 4);
+    Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
+    Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
+    Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
+    Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
+    return RValue::get(Ret);
+  }
   case Builtin::BI__builtin_dwarf_cfa: {
     // The offset in bytes from the first argument to the CFA.
     //
@@ -2072,56 +2343,6 @@
     return RValue::get(nullptr);
   }
 
-    // Library functions with special handling.
-  case Builtin::BIsqrt:
-  case Builtin::BIsqrtf:
-  case Builtin::BIsqrtl: {
-    // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
-    // in finite- or unsafe-math mode (the intrinsic has different semantics
-    // for handling negative numbers compared to the library function, so
-    // -fmath-errno=0 is not enough).
-    if (!FD->hasAttr<ConstAttr>())
-      break;
-    if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
-          CGM.getCodeGenOpts().NoNaNsFPMath))
-      break;
-    Value *Arg0 = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgType = Arg0->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
-    return RValue::get(Builder.CreateCall(F, Arg0));
-  }
-
-  case Builtin::BI__builtin_pow:
-  case Builtin::BI__builtin_powf:
-  case Builtin::BI__builtin_powl:
-  case Builtin::BIpow:
-  case Builtin::BIpowf:
-  case Builtin::BIpowl: {
-    // Transform a call to pow* into a @llvm.pow.* intrinsic call.
-    if (!FD->hasAttr<ConstAttr>())
-      break;
-    Value *Base = EmitScalarExpr(E->getArg(0));
-    Value *Exponent = EmitScalarExpr(E->getArg(1));
-    llvm::Type *ArgType = Base->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
-    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
-  }
-
-  case Builtin::BIfma:
-  case Builtin::BIfmaf:
-  case Builtin::BIfmal:
-  case Builtin::BI__builtin_fma:
-  case Builtin::BI__builtin_fmaf:
-  case Builtin::BI__builtin_fmal: {
-    // Rewrite fma to intrinsic.
-    Value *FirstArg = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgType = FirstArg->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
-    return RValue::get(
-        Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
-                               EmitScalarExpr(E->getArg(2))}));
-  }
-
   case Builtin::BI__builtin_signbit:
   case Builtin::BI__builtin_signbitf:
   case Builtin::BI__builtin_signbitl: {
@@ -2245,6 +2466,14 @@
         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
     WidthAndSignedness ResultInfo =
         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
+
+    // Handle mixed-sign multiplication as a special case, because adding
+    // runtime or backend support for our generic irgen would be too expensive.
+    if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
+      return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
+                                          RightInfo, ResultArg, ResultQTy,
+                                          ResultInfo);
+
     WidthAndSignedness EncompassingInfo =
         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
 
@@ -3009,10 +3238,10 @@
   case Builtin::BI__xray_customevent: {
     if (!ShouldXRayInstrumentFunction())
       return RValue::getIgnored();
-    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
-      if (XRayAttr->neverXRayInstrument())
+    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
+      if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
         return RValue::getIgnored();
-    }
+
     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
     auto FTy = F->getFunctionType();
     auto Arg0 = E->getArg(0);
@@ -3168,10 +3397,10 @@
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-    return CGF->EmitARMBuiltinExpr(BuiltinID, E);
+    return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
-    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
+    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
@@ -3190,6 +3419,8 @@
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::hexagon:
+    return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
   default:
     return nullptr;
   }
@@ -3210,6 +3441,7 @@
 
 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
                                      NeonTypeFlags TypeFlags,
+                                     llvm::Triple::ArchType Arch,
                                      bool V1Ty=false) {
   int IsQuad = TypeFlags.isQuad();
   switch (TypeFlags.getEltType()) {
@@ -3218,8 +3450,14 @@
     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
   case NeonTypeFlags::Int16:
   case NeonTypeFlags::Poly16:
-  case NeonTypeFlags::Float16:
     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+  case NeonTypeFlags::Float16:
+    // FIXME: Only AArch64 backend can so far properly handle half types.
+    // Remove else part once ARM backend support for half is complete.
+    if (Arch == llvm::Triple::aarch64)
+      return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
+    else
+      return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
   case NeonTypeFlags::Int32:
     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
   case NeonTypeFlags::Int64:
@@ -3242,6 +3480,8 @@
                                           NeonTypeFlags IntTypeFlags) {
   int IsQuad = IntTypeFlags.isQuad();
   switch (IntTypeFlags.getEltType()) {
+  case NeonTypeFlags::Int16:
+    return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
   case NeonTypeFlags::Int32:
     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
   case NeonTypeFlags::Int64:
@@ -3389,55 +3629,80 @@
   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
+  NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvt_s16_v),
   NEONMAP0(vcvt_s32_v),
   NEONMAP0(vcvt_s64_v),
+  NEONMAP0(vcvt_u16_v),
   NEONMAP0(vcvt_u32_v),
   NEONMAP0(vcvt_u64_v),
+  NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+  NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
   NEONMAP0(vcvtq_f32_v),
+  NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvtq_s16_v),
   NEONMAP0(vcvtq_s32_v),
   NEONMAP0(vcvtq_s64_v),
+  NEONMAP0(vcvtq_u16_v),
   NEONMAP0(vcvtq_u32_v),
   NEONMAP0(vcvtq_u64_v),
   NEONMAP0(vext_v),
@@ -3600,19 +3865,27 @@
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+  NEONMAP0(vcvt_f16_v),
   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
+  NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvtq_f16_v),
   NEONMAP0(vcvtq_f32_v),
+  NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
@@ -3885,6 +4158,37 @@
   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+  // FP16 scalar intrinisics go here.
+  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
+  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
+  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
+  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
+  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
+  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
 };
 
 #undef NEONMAP0
@@ -4022,7 +4326,8 @@
 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
     const char *NameHint, unsigned Modifier, const CallExpr *E,
-    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
+    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
+    llvm::Triple::ArchType Arch) {
   // Get the last argument, which specifies the vector type.
   llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
@@ -4034,7 +4339,7 @@
   bool Usgn = Type.isUnsigned();
   bool Quad = Type.isQuad();
 
-  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
   llvm::Type *Ty = VTy;
   if (!Ty)
     return nullptr;
@@ -4081,9 +4386,20 @@
   case NEON::BI__builtin_neon_vcageq_v:
   case NEON::BI__builtin_neon_vcagt_v:
   case NEON::BI__builtin_neon_vcagtq_v: {
-    llvm::Type *VecFlt = llvm::VectorType::get(
-        VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
-        VTy->getNumElements());
+    llvm::Type *Ty;
+    switch (VTy->getScalarSizeInBits()) {
+    default: llvm_unreachable("unexpected type");
+    case 32:
+      Ty = FloatTy;
+      break;
+    case 64:
+      Ty = DoubleTy;
+      break;
+    case 16:
+      Ty = HalfTy;
+      break;
+    }
+    llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
     llvm::Type *Tys[] = { VTy, VecFlt };
     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     return EmitNeonCall(F, Ops, NameHint);
@@ -4097,11 +4413,19 @@
   case NEON::BI__builtin_neon_vcvt_f32_v:
   case NEON::BI__builtin_neon_vcvtq_f32_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+  case NEON::BI__builtin_neon_vcvt_f16_v:
+  case NEON::BI__builtin_neon_vcvtq_f16_v:
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
+    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+  case NEON::BI__builtin_neon_vcvt_n_f16_v:
   case NEON::BI__builtin_neon_vcvt_n_f32_v:
   case NEON::BI__builtin_neon_vcvt_n_f64_v:
+  case NEON::BI__builtin_neon_vcvtq_n_f16_v:
   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
@@ -4109,11 +4433,15 @@
     Function *F = CGM.getIntrinsic(Int, Tys);
     return EmitNeonCall(F, Ops, "vcvt_n");
   }
+  case NEON::BI__builtin_neon_vcvt_n_s16_v:
   case NEON::BI__builtin_neon_vcvt_n_s32_v:
+  case NEON::BI__builtin_neon_vcvt_n_u16_v:
   case NEON::BI__builtin_neon_vcvt_n_u32_v:
   case NEON::BI__builtin_neon_vcvt_n_s64_v:
   case NEON::BI__builtin_neon_vcvt_n_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_n_s16_v:
   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+  case NEON::BI__builtin_neon_vcvtq_n_u16_v:
   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
@@ -4125,44 +4453,63 @@
   case NEON::BI__builtin_neon_vcvt_u32_v:
   case NEON::BI__builtin_neon_vcvt_s64_v:
   case NEON::BI__builtin_neon_vcvt_u64_v:
+  case NEON::BI__builtin_neon_vcvt_s16_v:
+  case NEON::BI__builtin_neon_vcvt_u16_v:
   case NEON::BI__builtin_neon_vcvtq_s32_v:
   case NEON::BI__builtin_neon_vcvtq_u32_v:
   case NEON::BI__builtin_neon_vcvtq_s64_v:
-  case NEON::BI__builtin_neon_vcvtq_u64_v: {
+  case NEON::BI__builtin_neon_vcvtq_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_s16_v:
+  case NEON::BI__builtin_neon_vcvtq_u16_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   }
+  case NEON::BI__builtin_neon_vcvta_s16_v:
   case NEON::BI__builtin_neon_vcvta_s32_v:
   case NEON::BI__builtin_neon_vcvta_s64_v:
   case NEON::BI__builtin_neon_vcvta_u32_v:
   case NEON::BI__builtin_neon_vcvta_u64_v:
+  case NEON::BI__builtin_neon_vcvtaq_s16_v:
   case NEON::BI__builtin_neon_vcvtaq_s32_v:
   case NEON::BI__builtin_neon_vcvtaq_s64_v:
+  case NEON::BI__builtin_neon_vcvtaq_u16_v:
   case NEON::BI__builtin_neon_vcvtaq_u32_v:
   case NEON::BI__builtin_neon_vcvtaq_u64_v:
+  case NEON::BI__builtin_neon_vcvtn_s16_v:
   case NEON::BI__builtin_neon_vcvtn_s32_v:
   case NEON::BI__builtin_neon_vcvtn_s64_v:
+  case NEON::BI__builtin_neon_vcvtn_u16_v:
   case NEON::BI__builtin_neon_vcvtn_u32_v:
   case NEON::BI__builtin_neon_vcvtn_u64_v:
+  case NEON::BI__builtin_neon_vcvtnq_s16_v:
   case NEON::BI__builtin_neon_vcvtnq_s32_v:
   case NEON::BI__builtin_neon_vcvtnq_s64_v:
+  case NEON::BI__builtin_neon_vcvtnq_u16_v:
   case NEON::BI__builtin_neon_vcvtnq_u32_v:
   case NEON::BI__builtin_neon_vcvtnq_u64_v:
+  case NEON::BI__builtin_neon_vcvtp_s16_v:
   case NEON::BI__builtin_neon_vcvtp_s32_v:
   case NEON::BI__builtin_neon_vcvtp_s64_v:
+  case NEON::BI__builtin_neon_vcvtp_u16_v:
   case NEON::BI__builtin_neon_vcvtp_u32_v:
   case NEON::BI__builtin_neon_vcvtp_u64_v:
+  case NEON::BI__builtin_neon_vcvtpq_s16_v:
   case NEON::BI__builtin_neon_vcvtpq_s32_v:
   case NEON::BI__builtin_neon_vcvtpq_s64_v:
+  case NEON::BI__builtin_neon_vcvtpq_u16_v:
   case NEON::BI__builtin_neon_vcvtpq_u32_v:
   case NEON::BI__builtin_neon_vcvtpq_u64_v:
+  case NEON::BI__builtin_neon_vcvtm_s16_v:
   case NEON::BI__builtin_neon_vcvtm_s32_v:
   case NEON::BI__builtin_neon_vcvtm_s64_v:
+  case NEON::BI__builtin_neon_vcvtm_u16_v:
   case NEON::BI__builtin_neon_vcvtm_u32_v:
   case NEON::BI__builtin_neon_vcvtm_u64_v:
+  case NEON::BI__builtin_neon_vcvtmq_s16_v:
   case NEON::BI__builtin_neon_vcvtmq_s32_v:
   case NEON::BI__builtin_neon_vcvtmq_s64_v:
+  case NEON::BI__builtin_neon_vcvtmq_u16_v:
   case NEON::BI__builtin_neon_vcvtmq_u32_v:
   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
@@ -4633,15 +4980,16 @@
   case NEON::BI__builtin_neon_vsha1cq_u32:
   case NEON::BI__builtin_neon_vsha1pq_u32:
   case NEON::BI__builtin_neon_vsha1mq_u32:
-  case ARM::BI_MoveToCoprocessor:
-  case ARM::BI_MoveToCoprocessor2:
+  case clang::ARM::BI_MoveToCoprocessor:
+  case clang::ARM::BI_MoveToCoprocessor2:
     return false;
   }
   return true;
 }
 
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
-                                           const CallExpr *E) {
+                                           const CallExpr *E,
+                                           llvm::Triple::ArchType Arch) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
     return Hint;
 
@@ -5180,7 +5528,7 @@
   bool usgn = Type.isUnsigned();
   bool rightShift = false;
 
-  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
   llvm::Type *Ty = VTy;
   if (!Ty)
     return nullptr;
@@ -5193,7 +5541,7 @@
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
-        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
 
   unsigned Int;
   switch (BuiltinID) {
@@ -5218,7 +5566,7 @@
       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
     }
-    // fall through
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vld1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
@@ -5343,7 +5691,7 @@
       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
                                                  Tys), Ops);
     }
-    // fall through
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vst1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
@@ -5380,7 +5728,8 @@
 
 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
                                       const CallExpr *E,
-                                      SmallVectorImpl<Value *> &Ops) {
+                                      SmallVectorImpl<Value *> &Ops,
+                                      llvm::Triple::ArchType Arch) {
   unsigned int Int = 0;
   const char *s = nullptr;
 
@@ -5425,7 +5774,7 @@
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
-  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
+  llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
   if (!Ty)
     return nullptr;
 
@@ -5535,7 +5884,8 @@
 }
 
 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
-                                               const CallExpr *E) {
+                                               const CallExpr *E,
+                                               llvm::Triple::ArchType Arch) {
   unsigned HintID = static_cast<unsigned>(-1);
   switch (BuiltinID) {
   default: break;
@@ -5821,6 +6171,9 @@
   // Handle non-overloaded intrinsics first.
   switch (BuiltinID) {
   default: break;
+  case NEON::BI__builtin_neon_vabsh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
   case NEON::BI__builtin_neon_vldrq_p128: {
     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
@@ -5836,7 +6189,7 @@
   case NEON::BI__builtin_neon_vcvts_u32_f32:
   case NEON::BI__builtin_neon_vcvtd_u64_f64:
     usgn = true;
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vcvts_s32_f32:
   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5851,7 +6204,7 @@
   case NEON::BI__builtin_neon_vcvts_f32_u32:
   case NEON::BI__builtin_neon_vcvtd_f64_u64:
     usgn = true;
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vcvts_f32_s32:
   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5863,6 +6216,153 @@
       return Builder.CreateUIToFP(Ops[0], FTy);
     return Builder.CreateSIToFP(Ops[0], FTy);
   }
+  case NEON::BI__builtin_neon_vcvth_f16_u16:
+  case NEON::BI__builtin_neon_vcvth_f16_u32:
+  case NEON::BI__builtin_neon_vcvth_f16_u64:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_f16_s16:
+  case NEON::BI__builtin_neon_vcvth_f16_s32:
+  case NEON::BI__builtin_neon_vcvth_f16_s64: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    llvm::Type *FTy = HalfTy;
+    llvm::Type *InTy;
+    if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
+      InTy = Int64Ty;
+    else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
+      InTy = Int32Ty;
+    else
+      InTy = Int16Ty;
+    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+    if (usgn)
+      return Builder.CreateUIToFP(Ops[0], FTy);
+    return Builder.CreateSIToFP(Ops[0], FTy);
+  }
+  case NEON::BI__builtin_neon_vcvth_u16_f16:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_s16_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Int16Ty);
+    return Builder.CreateFPToSI(Ops[0], Int16Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_u32_f16:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_s32_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Int32Ty);
+    return Builder.CreateFPToSI(Ops[0], Int32Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_u64_f16:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvth_s64_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Int64Ty);
+    return Builder.CreateFPToSI(Ops[0], Int64Ty);
+  }
+  case NEON::BI__builtin_neon_vcvtah_u16_f16:
+  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtph_u16_f16:
+  case NEON::BI__builtin_neon_vcvtah_s16_f16:
+  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+  case NEON::BI__builtin_neon_vcvtph_s16_f16: {
+    unsigned Int;
+    llvm::Type* InTy = Int32Ty;
+    llvm::Type* FTy  = HalfTy;
+    llvm::Type *Tys[2] = {InTy, FTy};
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vcvtah_u16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtau; break;
+    case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtmu; break;
+    case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtnu; break;
+    case NEON::BI__builtin_neon_vcvtph_u16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtpu; break;
+    case NEON::BI__builtin_neon_vcvtah_s16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtas; break;
+    case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtms; break;
+    case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtns; break;
+    case NEON::BI__builtin_neon_vcvtph_s16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtps; break;
+    }
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
+  }
+  case NEON::BI__builtin_neon_vcaleh_f16:
+  case NEON::BI__builtin_neon_vcalth_f16:
+  case NEON::BI__builtin_neon_vcageh_f16:
+  case NEON::BI__builtin_neon_vcagth_f16: {
+    unsigned Int;
+    llvm::Type* InTy = Int32Ty;
+    llvm::Type* FTy  = HalfTy;
+    llvm::Type *Tys[2] = {InTy, FTy};
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vcageh_f16:
+      Int = Intrinsic::aarch64_neon_facge; break;
+    case NEON::BI__builtin_neon_vcagth_f16:
+      Int = Intrinsic::aarch64_neon_facgt; break;
+    case NEON::BI__builtin_neon_vcaleh_f16:
+      Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
+    case NEON::BI__builtin_neon_vcalth_f16:
+      Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
+    }
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
+    unsigned Int;
+    llvm::Type* InTy = Int32Ty;
+    llvm::Type* FTy  = HalfTy;
+    llvm::Type *Tys[2] = {InTy, FTy};
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+      Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
+    case NEON::BI__builtin_neon_vcvth_n_u16_f16:
+      Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
+    }
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
+  }
+  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
+    unsigned Int;
+    llvm::Type* FTy  = HalfTy;
+    llvm::Type* InTy = Int32Ty;
+    llvm::Type *Tys[2] = {FTy, InTy};
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+      Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
+      Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
+      break;
+    case NEON::BI__builtin_neon_vcvth_n_f16_u16:
+      Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
+      Ops[0] = Builder.CreateZExt(Ops[0], InTy);
+      break;
+    }
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
+  }
   case NEON::BI__builtin_neon_vpaddd_s64: {
     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
     Value *Vec = EmitScalarExpr(E->getArg(0));
@@ -5904,6 +6404,7 @@
   case NEON::BI__builtin_neon_vceqzd_s64:
   case NEON::BI__builtin_neon_vceqzd_f64:
   case NEON::BI__builtin_neon_vceqzs_f32:
+  case NEON::BI__builtin_neon_vceqzh_f16:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
         Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -5911,6 +6412,7 @@
   case NEON::BI__builtin_neon_vcgezd_s64:
   case NEON::BI__builtin_neon_vcgezd_f64:
   case NEON::BI__builtin_neon_vcgezs_f32:
+  case NEON::BI__builtin_neon_vcgezh_f16:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
         Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -5918,6 +6420,7 @@
   case NEON::BI__builtin_neon_vclezd_s64:
   case NEON::BI__builtin_neon_vclezd_f64:
   case NEON::BI__builtin_neon_vclezs_f32:
+  case NEON::BI__builtin_neon_vclezh_f16:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
         Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -5925,6 +6428,7 @@
   case NEON::BI__builtin_neon_vcgtzd_s64:
   case NEON::BI__builtin_neon_vcgtzd_f64:
   case NEON::BI__builtin_neon_vcgtzs_f32:
+  case NEON::BI__builtin_neon_vcgtzh_f16:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
         Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -5932,6 +6436,7 @@
   case NEON::BI__builtin_neon_vcltzd_s64:
   case NEON::BI__builtin_neon_vcltzd_f64:
   case NEON::BI__builtin_neon_vcltzs_f32:
+  case NEON::BI__builtin_neon_vcltzh_f16:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
         Ops[0], ConvertType(E->getCallReturnType(getContext())),
@@ -5984,6 +6489,26 @@
     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
   }
+  case NEON::BI__builtin_neon_vceqh_f16:
+  case NEON::BI__builtin_neon_vcleh_f16:
+  case NEON::BI__builtin_neon_vclth_f16:
+  case NEON::BI__builtin_neon_vcgeh_f16:
+  case NEON::BI__builtin_neon_vcgth_f16: {
+    llvm::CmpInst::Predicate P;
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
+    case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
+    case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
+    case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
+    case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
+    }
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
+    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+    return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
+  }
   case NEON::BI__builtin_neon_vceqd_s64:
   case NEON::BI__builtin_neon_vceqd_u64:
   case NEON::BI__builtin_neon_vcgtd_s64:
@@ -6121,6 +6646,31 @@
         llvm::VectorType::get(DoubleTy, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
+  case NEON::BI__builtin_neon_vaddh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
+  case NEON::BI__builtin_neon_vsubh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
+  case NEON::BI__builtin_neon_vmulh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
+  case NEON::BI__builtin_neon_vdivh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
+  case NEON::BI__builtin_neon_vfmah_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    return Builder.CreateCall(F,
+      {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
+  case NEON::BI__builtin_neon_vfmsh_f16: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
+    Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+  }
   case NEON::BI__builtin_neon_vaddd_s64:
   case NEON::BI__builtin_neon_vaddd_u64:
     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
@@ -6278,7 +6828,7 @@
   }
   }
 
-  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
   llvm::Type *Ty = VTy;
   if (!Ty)
     return nullptr;
@@ -6292,9 +6842,9 @@
     return EmitCommonNeonBuiltinExpr(
         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
-        /*never use addresses*/ Address::invalid(), Address::invalid());
+        /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
 
-  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
+  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
     return V;
 
   unsigned Int;
@@ -6343,7 +6893,7 @@
       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
       llvm::Type *VTy = GetNeonType(this,
-        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+        NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
@@ -6372,7 +6922,9 @@
     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
   }
+  case NEON::BI__builtin_neon_vfmah_lane_f16:
   case NEON::BI__builtin_neon_vfmas_lane_f32:
+  case NEON::BI__builtin_neon_vfmah_laneq_f16:
   case NEON::BI__builtin_neon_vfmas_laneq_f32:
   case NEON::BI__builtin_neon_vfmad_lane_f64:
   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
@@ -6393,12 +6945,22 @@
     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+  case NEON::BI__builtin_neon_vmaxh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Int = Intrinsic::aarch64_neon_fmax;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
+  }
   case NEON::BI__builtin_neon_vmin_v:
   case NEON::BI__builtin_neon_vminq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+  case NEON::BI__builtin_neon_vminh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Int = Intrinsic::aarch64_neon_fmin;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
+  }
   case NEON::BI__builtin_neon_vabd_v:
   case NEON::BI__builtin_neon_vabdq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
@@ -6437,20 +6999,31 @@
   case NEON::BI__builtin_neon_vminnmq_v:
     Int = Intrinsic::aarch64_neon_fminnm;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+  case NEON::BI__builtin_neon_vminnmh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Int = Intrinsic::aarch64_neon_fminnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
   case NEON::BI__builtin_neon_vmaxnm_v:
   case NEON::BI__builtin_neon_vmaxnmq_v:
     Int = Intrinsic::aarch64_neon_fmaxnm;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+  case NEON::BI__builtin_neon_vmaxnmh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Int = Intrinsic::aarch64_neon_fmaxnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
   case NEON::BI__builtin_neon_vrecpss_f32: {
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
                         Ops, "vrecps");
   }
-  case NEON::BI__builtin_neon_vrecpsd_f64: {
+  case NEON::BI__builtin_neon_vrecpsd_f64:
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
                         Ops, "vrecps");
-  }
+  case NEON::BI__builtin_neon_vrecpsh_f16:
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
+                        Ops, "vrecps");
   case NEON::BI__builtin_neon_vqshrun_n_v:
     Int = Intrinsic::aarch64_neon_sqshrun;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
@@ -6466,36 +7039,71 @@
   case NEON::BI__builtin_neon_vqrshrn_n_v:
     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
+  case NEON::BI__builtin_neon_vrndah_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::round;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
+  }
   case NEON::BI__builtin_neon_vrnda_v:
   case NEON::BI__builtin_neon_vrndaq_v: {
     Int = Intrinsic::round;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
   }
+  case NEON::BI__builtin_neon_vrndih_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::nearbyint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
+  }
   case NEON::BI__builtin_neon_vrndi_v:
   case NEON::BI__builtin_neon_vrndiq_v: {
     Int = Intrinsic::nearbyint;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
   }
+  case NEON::BI__builtin_neon_vrndmh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::floor;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
+  }
   case NEON::BI__builtin_neon_vrndm_v:
   case NEON::BI__builtin_neon_vrndmq_v: {
     Int = Intrinsic::floor;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
   }
+  case NEON::BI__builtin_neon_vrndnh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::aarch64_neon_frintn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
+  }
   case NEON::BI__builtin_neon_vrndn_v:
   case NEON::BI__builtin_neon_vrndnq_v: {
     Int = Intrinsic::aarch64_neon_frintn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
   }
+  case NEON::BI__builtin_neon_vrndph_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::ceil;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
+  }
   case NEON::BI__builtin_neon_vrndp_v:
   case NEON::BI__builtin_neon_vrndpq_v: {
     Int = Intrinsic::ceil;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
   }
+  case NEON::BI__builtin_neon_vrndxh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::rint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
+  }
   case NEON::BI__builtin_neon_vrndx_v:
   case NEON::BI__builtin_neon_vrndxq_v: {
     Int = Intrinsic::rint;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
   }
+  case NEON::BI__builtin_neon_vrndh_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::trunc;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
+  }
   case NEON::BI__builtin_neon_vrnd_v:
   case NEON::BI__builtin_neon_vrndq_v: {
     Int = Intrinsic::trunc;
@@ -6524,14 +7132,14 @@
   case NEON::BI__builtin_neon_vcvt_f64_v:
   case NEON::BI__builtin_neon_vcvtq_f64_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   case NEON::BI__builtin_neon_vcvt_f64_f32: {
     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
            "unexpected vcvt_f64_f32 builtin");
     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
-    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
 
     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
   }
@@ -6539,7 +7147,7 @@
     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
            "unexpected vcvt_f32_f64 builtin");
     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
-    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
 
     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
   }
@@ -6547,18 +7155,25 @@
   case NEON::BI__builtin_neon_vcvt_u32_v:
   case NEON::BI__builtin_neon_vcvt_s64_v:
   case NEON::BI__builtin_neon_vcvt_u64_v:
+	case NEON::BI__builtin_neon_vcvt_s16_v:
+	case NEON::BI__builtin_neon_vcvt_u16_v:
   case NEON::BI__builtin_neon_vcvtq_s32_v:
   case NEON::BI__builtin_neon_vcvtq_u32_v:
   case NEON::BI__builtin_neon_vcvtq_s64_v:
-  case NEON::BI__builtin_neon_vcvtq_u64_v: {
+  case NEON::BI__builtin_neon_vcvtq_u64_v:
+	case NEON::BI__builtin_neon_vcvtq_s16_v:
+	case NEON::BI__builtin_neon_vcvtq_u16_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
     if (usgn)
       return Builder.CreateFPToUI(Ops[0], Ty);
     return Builder.CreateFPToSI(Ops[0], Ty);
   }
+  case NEON::BI__builtin_neon_vcvta_s16_v:
   case NEON::BI__builtin_neon_vcvta_s32_v:
+  case NEON::BI__builtin_neon_vcvtaq_s16_v:
   case NEON::BI__builtin_neon_vcvtaq_s32_v:
   case NEON::BI__builtin_neon_vcvta_u32_v:
+  case NEON::BI__builtin_neon_vcvtaq_u16_v:
   case NEON::BI__builtin_neon_vcvtaq_u32_v:
   case NEON::BI__builtin_neon_vcvta_s64_v:
   case NEON::BI__builtin_neon_vcvtaq_s64_v:
@@ -6568,9 +7183,13 @@
     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
   }
+  case NEON::BI__builtin_neon_vcvtm_s16_v:
   case NEON::BI__builtin_neon_vcvtm_s32_v:
+  case NEON::BI__builtin_neon_vcvtmq_s16_v:
   case NEON::BI__builtin_neon_vcvtmq_s32_v:
+  case NEON::BI__builtin_neon_vcvtm_u16_v:
   case NEON::BI__builtin_neon_vcvtm_u32_v:
+  case NEON::BI__builtin_neon_vcvtmq_u16_v:
   case NEON::BI__builtin_neon_vcvtmq_u32_v:
   case NEON::BI__builtin_neon_vcvtm_s64_v:
   case NEON::BI__builtin_neon_vcvtmq_s64_v:
@@ -6580,9 +7199,13 @@
     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
   }
+  case NEON::BI__builtin_neon_vcvtn_s16_v:
   case NEON::BI__builtin_neon_vcvtn_s32_v:
+  case NEON::BI__builtin_neon_vcvtnq_s16_v:
   case NEON::BI__builtin_neon_vcvtnq_s32_v:
+  case NEON::BI__builtin_neon_vcvtn_u16_v:
   case NEON::BI__builtin_neon_vcvtn_u32_v:
+  case NEON::BI__builtin_neon_vcvtnq_u16_v:
   case NEON::BI__builtin_neon_vcvtnq_u32_v:
   case NEON::BI__builtin_neon_vcvtn_s64_v:
   case NEON::BI__builtin_neon_vcvtnq_s64_v:
@@ -6592,9 +7215,13 @@
     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
   }
+  case NEON::BI__builtin_neon_vcvtp_s16_v:
   case NEON::BI__builtin_neon_vcvtp_s32_v:
+  case NEON::BI__builtin_neon_vcvtpq_s16_v:
   case NEON::BI__builtin_neon_vcvtpq_s32_v:
+  case NEON::BI__builtin_neon_vcvtp_u16_v:
   case NEON::BI__builtin_neon_vcvtp_u32_v:
+  case NEON::BI__builtin_neon_vcvtpq_u16_v:
   case NEON::BI__builtin_neon_vcvtpq_u32_v:
   case NEON::BI__builtin_neon_vcvtp_s64_v:
   case NEON::BI__builtin_neon_vcvtpq_s64_v:
@@ -6617,7 +7244,7 @@
       Quad = true;
     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
     llvm::Type *VTy = GetNeonType(this,
-      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
+      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -6625,6 +7252,8 @@
   }
   case NEON::BI__builtin_neon_vnegd_s64:
     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
+  case NEON::BI__builtin_neon_vnegh_f16:
+    return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
   case NEON::BI__builtin_neon_vpmaxnm_v:
   case NEON::BI__builtin_neon_vpmaxnmq_v: {
     Int = Intrinsic::aarch64_neon_fmaxnmp;
@@ -6635,6 +7264,11 @@
     Int = Intrinsic::aarch64_neon_fminnmp;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
   }
+  case NEON::BI__builtin_neon_vsqrth_f16: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Int = Intrinsic::sqrt;
+    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
+  }
   case NEON::BI__builtin_neon_vsqrt_v:
   case NEON::BI__builtin_neon_vsqrtq_v: {
     Int = Intrinsic::sqrt;
@@ -6649,7 +7283,7 @@
   case NEON::BI__builtin_neon_vaddv_u8:
     // FIXME: These are handled by the AArch64 scalar code.
     usgn = true;
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vaddv_s8: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = Int32Ty;
@@ -6661,7 +7295,7 @@
   }
   case NEON::BI__builtin_neon_vaddv_u16:
     usgn = true;
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vaddv_s16: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = Int32Ty;
@@ -6673,7 +7307,7 @@
   }
   case NEON::BI__builtin_neon_vaddvq_u8:
     usgn = true;
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vaddvq_s8: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = Int32Ty;
@@ -6685,7 +7319,7 @@
   }
   case NEON::BI__builtin_neon_vaddvq_u16:
     usgn = true;
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case NEON::BI__builtin_neon_vaddvq_s16: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = Int32Ty;
@@ -6767,6 +7401,24 @@
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
     return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
+  case NEON::BI__builtin_neon_vmaxv_f16: {
+    Int = Intrinsic::aarch64_neon_fmaxv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vmaxvq_f16: {
+    Int = Intrinsic::aarch64_neon_fmaxv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
   case NEON::BI__builtin_neon_vminv_u8: {
     Int = Intrinsic::aarch64_neon_uminv;
     Ty = Int32Ty;
@@ -6839,6 +7491,60 @@
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
     return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
+  case NEON::BI__builtin_neon_vminv_f16: {
+    Int = Intrinsic::aarch64_neon_fminv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vminvq_f16: {
+    Int = Intrinsic::aarch64_neon_fminv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vmaxnmv_f16: {
+    Int = Intrinsic::aarch64_neon_fmaxnmv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
+    Int = Intrinsic::aarch64_neon_fmaxnmv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vminnmv_f16: {
+    Int = Intrinsic::aarch64_neon_fminnmv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
+  case NEON::BI__builtin_neon_vminnmvq_f16: {
+    Int = Intrinsic::aarch64_neon_fminnmv;
+    Ty = HalfTy;
+    VTy = llvm::VectorType::get(HalfTy, 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+    return Builder.CreateTrunc(Ops[0], HalfTy);
+  }
   case NEON::BI__builtin_neon_vmul_n_f64: {
     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
@@ -7389,6 +8095,19 @@
   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
 }
 
+static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
+                              unsigned NumElts, SmallVectorImpl<Value *> &Ops,
+                              bool InvertLHS = false) {
+  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
+  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
+
+  if (InvertLHS)
+    LHS = CGF.Builder.CreateNot(LHS);
+
+  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
+                                  CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
+}
+
 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
                                         SmallVectorImpl<Value *> &Ops,
                                         llvm::Type *DstTy,
@@ -7422,8 +8141,33 @@
   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
 }
 
+static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
+                                         unsigned NumElts, Value *MaskIn) {
+  if (MaskIn) {
+    const auto *C = dyn_cast<Constant>(MaskIn);
+    if (!C || !C->isAllOnesValue())
+      Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
+  }
+
+  if (NumElts < 8) {
+    uint32_t Indices[8];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    for (unsigned i = NumElts; i != 8; ++i)
+      Indices[i] = i % NumElts + NumElts;
+    Cmp = CGF.Builder.CreateShuffleVector(
+        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+  }
+
+  return CGF.Builder.CreateBitCast(Cmp,
+                                   IntegerType::get(CGF.getLLVMContext(),
+                                                    std::max(NumElts, 8U)));
+}
+
 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
-                                   bool Signed, SmallVectorImpl<Value *> &Ops) {
+                                   bool Signed, ArrayRef<Value *> Ops) {
+  assert((Ops.size() == 2 || Ops.size() == 4) &&
+         "Unexpected number of arguments");
   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
   Value *Cmp;
 
@@ -7447,22 +8191,16 @@
     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
   }
 
-  const auto *C = dyn_cast<Constant>(Ops.back());
-  if (!C || !C->isAllOnesValue())
-    Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+  Value *MaskIn = nullptr;
+  if (Ops.size() == 4)
+    MaskIn = Ops[3];
 
-  if (NumElts < 8) {
-    uint32_t Indices[8];
-    for (unsigned i = 0; i != NumElts; ++i)
-      Indices[i] = i;
-    for (unsigned i = NumElts; i != 8; ++i)
-      Indices[i] = i % NumElts + NumElts;
-    Cmp = CGF.Builder.CreateShuffleVector(
-        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
-  }
-  return CGF.Builder.CreateBitCast(Cmp,
-                                   IntegerType::get(CGF.getLLVMContext(),
-                                                    std::max(NumElts, 8U)));
+  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
+}
+
+static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
+  Value *Zero = Constant::getNullValue(In->getType());
+  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
 }
 
 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
@@ -7504,78 +8242,6 @@
 
 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
 
-  // This enum contains the vendor, type, and subtype enums from the
-  // runtime library concatenated together. The _START labels mark
-  // the start and are used to adjust the value into the correct
-  // encoding space.
-  enum X86CPUs {
-    INTEL = 1,
-    AMD,
-    CPU_TYPE_START,
-    INTEL_BONNELL,
-    INTEL_CORE2,
-    INTEL_COREI7,
-    AMDFAM10H,
-    AMDFAM15H,
-    INTEL_SILVERMONT,
-    INTEL_KNL,
-    AMD_BTVER1,
-    AMD_BTVER2,
-    AMDFAM17H,
-    CPU_SUBTYPE_START,
-    INTEL_COREI7_NEHALEM,
-    INTEL_COREI7_WESTMERE,
-    INTEL_COREI7_SANDYBRIDGE,
-    AMDFAM10H_BARCELONA,
-    AMDFAM10H_SHANGHAI,
-    AMDFAM10H_ISTANBUL,
-    AMDFAM15H_BDVER1,
-    AMDFAM15H_BDVER2,
-    AMDFAM15H_BDVER3,
-    AMDFAM15H_BDVER4,
-    AMDFAM17H_ZNVER1,
-    INTEL_COREI7_IVYBRIDGE,
-    INTEL_COREI7_HASWELL,
-    INTEL_COREI7_BROADWELL,
-    INTEL_COREI7_SKYLAKE,
-    INTEL_COREI7_SKYLAKE_AVX512,
-  };
-
-  X86CPUs CPU =
-    StringSwitch<X86CPUs>(CPUStr)
-      .Case("amd", AMD)
-      .Case("amdfam10h", AMDFAM10H)
-      .Case("amdfam10", AMDFAM10H)
-      .Case("amdfam15h", AMDFAM15H)
-      .Case("amdfam15", AMDFAM15H)
-      .Case("amdfam17h", AMDFAM17H)
-      .Case("atom", INTEL_BONNELL)
-      .Case("barcelona", AMDFAM10H_BARCELONA)
-      .Case("bdver1", AMDFAM15H_BDVER1)
-      .Case("bdver2", AMDFAM15H_BDVER2)
-      .Case("bdver3", AMDFAM15H_BDVER3)
-      .Case("bdver4", AMDFAM15H_BDVER4)
-      .Case("bonnell", INTEL_BONNELL)
-      .Case("broadwell", INTEL_COREI7_BROADWELL)
-      .Case("btver1", AMD_BTVER1)
-      .Case("btver2", AMD_BTVER2)
-      .Case("core2", INTEL_CORE2)
-      .Case("corei7", INTEL_COREI7)
-      .Case("haswell", INTEL_COREI7_HASWELL)
-      .Case("intel", INTEL)
-      .Case("istanbul", AMDFAM10H_ISTANBUL)
-      .Case("ivybridge", INTEL_COREI7_IVYBRIDGE)
-      .Case("knl", INTEL_KNL)
-      .Case("nehalem", INTEL_COREI7_NEHALEM)
-      .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE)
-      .Case("shanghai", AMDFAM10H_SHANGHAI)
-      .Case("silvermont", INTEL_SILVERMONT)
-      .Case("skylake", INTEL_COREI7_SKYLAKE)
-      .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512)
-      .Case("slm", INTEL_SILVERMONT)
-      .Case("westmere", INTEL_COREI7_WESTMERE)
-      .Case("znver1", AMDFAM17H_ZNVER1);
-
   llvm::Type *Int32Ty = Builder.getInt32Ty();
 
   // Matching the struct layout from the compiler-rt/libgcc structure that is
@@ -7594,22 +8260,22 @@
   // range. Also adjust the expected value.
   unsigned Index;
   unsigned Value;
-  if (CPU > CPU_SUBTYPE_START) {
-    Index = 2;
-    Value = CPU - CPU_SUBTYPE_START;
-  } else if (CPU > CPU_TYPE_START) {
-    Index = 1;
-    Value = CPU - CPU_TYPE_START;
-  } else {
-    Index = 0;
-    Value = CPU;
-  }
+  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
+#define X86_VENDOR(ENUM, STRING)                                               \
+  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
+  .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
+  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
+  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
+#include "llvm/Support/X86TargetParser.def"
+                               .Default({0, 0});
+  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
 
   // Grab the appropriate field from __cpu_model.
-  llvm::Value *Idxs[] = {
-    ConstantInt::get(Int32Ty, 0),
-    ConstantInt::get(Int32Ty, Index)
-  };
+  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
+                         ConstantInt::get(Int32Ty, Index)};
   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
 
@@ -7625,82 +8291,16 @@
 }
 
 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
-  // TODO: When/if this becomes more than x86 specific then use a TargetInfo
-  // based mapping.
   // Processor features and mapping to processor feature value.
-  enum X86Features {
-    CMOV = 0,
-    MMX,
-    POPCNT,
-    SSE,
-    SSE2,
-    SSE3,
-    SSSE3,
-    SSE4_1,
-    SSE4_2,
-    AVX,
-    AVX2,
-    SSE4_A,
-    FMA4,
-    XOP,
-    FMA,
-    AVX512F,
-    BMI,
-    BMI2,
-    AES,
-    PCLMUL,
-    AVX512VL,
-    AVX512BW,
-    AVX512DQ,
-    AVX512CD,
-    AVX512ER,
-    AVX512PF,
-    AVX512VBMI,
-    AVX512IFMA,
-    AVX5124VNNIW,
-    AVX5124FMAPS,
-    AVX512VPOPCNTDQ,
-    MAX
-  };
 
   uint32_t FeaturesMask = 0;
 
   for (const StringRef &FeatureStr : FeatureStrs) {
-    X86Features Feature =
-        StringSwitch<X86Features>(FeatureStr)
-            .Case("cmov", X86Features::CMOV)
-            .Case("mmx", X86Features::MMX)
-            .Case("popcnt", X86Features::POPCNT)
-            .Case("sse", X86Features::SSE)
-            .Case("sse2", X86Features::SSE2)
-            .Case("sse3", X86Features::SSE3)
-            .Case("ssse3", X86Features::SSSE3)
-            .Case("sse4.1", X86Features::SSE4_1)
-            .Case("sse4.2", X86Features::SSE4_2)
-            .Case("avx", X86Features::AVX)
-            .Case("avx2", X86Features::AVX2)
-            .Case("sse4a", X86Features::SSE4_A)
-            .Case("fma4", X86Features::FMA4)
-            .Case("xop", X86Features::XOP)
-            .Case("fma", X86Features::FMA)
-            .Case("avx512f", X86Features::AVX512F)
-            .Case("bmi", X86Features::BMI)
-            .Case("bmi2", X86Features::BMI2)
-            .Case("aes", X86Features::AES)
-            .Case("pclmul", X86Features::PCLMUL)
-            .Case("avx512vl", X86Features::AVX512VL)
-            .Case("avx512bw", X86Features::AVX512BW)
-            .Case("avx512dq", X86Features::AVX512DQ)
-            .Case("avx512cd", X86Features::AVX512CD)
-            .Case("avx512er", X86Features::AVX512ER)
-            .Case("avx512pf", X86Features::AVX512PF)
-            .Case("avx512vbmi", X86Features::AVX512VBMI)
-            .Case("avx512ifma", X86Features::AVX512IFMA)
-            .Case("avx5124vnniw", X86Features::AVX5124VNNIW)
-            .Case("avx5124fmaps", X86Features::AVX5124FMAPS)
-            .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
-            .Default(X86Features::MAX);
-    assert(Feature != X86Features::MAX && "Invalid feature!");
+    unsigned Feature =
+        StringSwitch<unsigned>(FeatureStr)
+#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
+#include "llvm/Support/X86TargetParser.def"
+        ;
     FeaturesMask |= (1U << Feature);
   }
 
@@ -7798,8 +8398,9 @@
   default: return nullptr;
   case X86::BI_mm_prefetch: {
     Value *Address = Ops[0];
-    Value *RW = ConstantInt::get(Int32Ty, 0);
-    Value *Locality = Ops[1];
+    ConstantInt *C = cast<ConstantInt>(Ops[1]);
+    Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
+    Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
     Value *Data = ConstantInt::get(Int32Ty, 1);
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
     return Builder.CreateCall(F, {Address, RW, Locality, Data});
@@ -7918,8 +8519,18 @@
   case X86::BI__builtin_ia32_storesd128_mask: {
     return EmitX86MaskedStore(*this, Ops, 16);
   }
+  case X86::BI__builtin_ia32_vpopcntb_128:
+  case X86::BI__builtin_ia32_vpopcntd_128:
+  case X86::BI__builtin_ia32_vpopcntq_128:
+  case X86::BI__builtin_ia32_vpopcntw_128:
+  case X86::BI__builtin_ia32_vpopcntb_256:
+  case X86::BI__builtin_ia32_vpopcntd_256:
+  case X86::BI__builtin_ia32_vpopcntq_256:
+  case X86::BI__builtin_ia32_vpopcntw_256:
+  case X86::BI__builtin_ia32_vpopcntb_512:
   case X86::BI__builtin_ia32_vpopcntd_512:
-  case X86::BI__builtin_ia32_vpopcntq_512: {
+  case X86::BI__builtin_ia32_vpopcntq_512:
+  case X86::BI__builtin_ia32_vpopcntw_512: {
     llvm::Type *ResultType = ConvertType(E->getType());
     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
     return Builder.CreateCall(F, Ops);
@@ -7938,6 +8549,20 @@
   case X86::BI__builtin_ia32_cvtmask2q512:
     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
 
+  case X86::BI__builtin_ia32_cvtb2mask128:
+  case X86::BI__builtin_ia32_cvtb2mask256:
+  case X86::BI__builtin_ia32_cvtb2mask512:
+  case X86::BI__builtin_ia32_cvtw2mask128:
+  case X86::BI__builtin_ia32_cvtw2mask256:
+  case X86::BI__builtin_ia32_cvtw2mask512:
+  case X86::BI__builtin_ia32_cvtd2mask128:
+  case X86::BI__builtin_ia32_cvtd2mask256:
+  case X86::BI__builtin_ia32_cvtd2mask512:
+  case X86::BI__builtin_ia32_cvtq2mask128:
+  case X86::BI__builtin_ia32_cvtq2mask256:
+  case X86::BI__builtin_ia32_cvtq2mask512:
+    return EmitX86ConvertToMask(*this, Ops[0]);
+
   case X86::BI__builtin_ia32_movdqa32store128_mask:
   case X86::BI__builtin_ia32_movdqa64store128_mask:
   case X86::BI__builtin_ia32_storeaps128_mask:
@@ -8145,32 +8770,6 @@
   case X86::BI__builtin_ia32_selectpd_256:
   case X86::BI__builtin_ia32_selectpd_512:
     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
-  case X86::BI__builtin_ia32_pcmpeqb128_mask:
-  case X86::BI__builtin_ia32_pcmpeqb256_mask:
-  case X86::BI__builtin_ia32_pcmpeqb512_mask:
-  case X86::BI__builtin_ia32_pcmpeqw128_mask:
-  case X86::BI__builtin_ia32_pcmpeqw256_mask:
-  case X86::BI__builtin_ia32_pcmpeqw512_mask:
-  case X86::BI__builtin_ia32_pcmpeqd128_mask:
-  case X86::BI__builtin_ia32_pcmpeqd256_mask:
-  case X86::BI__builtin_ia32_pcmpeqd512_mask:
-  case X86::BI__builtin_ia32_pcmpeqq128_mask:
-  case X86::BI__builtin_ia32_pcmpeqq256_mask:
-  case X86::BI__builtin_ia32_pcmpeqq512_mask:
-    return EmitX86MaskedCompare(*this, 0, false, Ops);
-  case X86::BI__builtin_ia32_pcmpgtb128_mask:
-  case X86::BI__builtin_ia32_pcmpgtb256_mask:
-  case X86::BI__builtin_ia32_pcmpgtb512_mask:
-  case X86::BI__builtin_ia32_pcmpgtw128_mask:
-  case X86::BI__builtin_ia32_pcmpgtw256_mask:
-  case X86::BI__builtin_ia32_pcmpgtw512_mask:
-  case X86::BI__builtin_ia32_pcmpgtd128_mask:
-  case X86::BI__builtin_ia32_pcmpgtd256_mask:
-  case X86::BI__builtin_ia32_pcmpgtd512_mask:
-  case X86::BI__builtin_ia32_pcmpgtq128_mask:
-  case X86::BI__builtin_ia32_pcmpgtq256_mask:
-  case X86::BI__builtin_ia32_pcmpgtq512_mask:
-    return EmitX86MaskedCompare(*this, 6, true, Ops);
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpb256_mask:
   case X86::BI__builtin_ia32_cmpb512_mask:
@@ -8202,6 +8801,57 @@
     return EmitX86MaskedCompare(*this, CC, false, Ops);
   }
 
+  case X86::BI__builtin_ia32_kortestchi:
+  case X86::BI__builtin_ia32_kortestzhi: {
+    Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+    Value *C;
+    if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
+      C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
+    else
+      C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+    Value *Cmp = Builder.CreateICmpEQ(Or, C);
+    return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+  }
+
+  case X86::BI__builtin_ia32_kandhi:
+    return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+  case X86::BI__builtin_ia32_kandnhi:
+    return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+  case X86::BI__builtin_ia32_korhi:
+    return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+  case X86::BI__builtin_ia32_kxnorhi:
+    return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+  case X86::BI__builtin_ia32_kxorhi:
+    return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
+  case X86::BI__builtin_ia32_knothi: {
+    Ops[0] = getMaskVecValue(*this, Ops[0], 16);
+    return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
+                                 Builder.getInt16Ty());
+  }
+
+  case X86::BI__builtin_ia32_kunpckdi:
+  case X86::BI__builtin_ia32_kunpcksi:
+  case X86::BI__builtin_ia32_kunpckhi: {
+    unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+    uint32_t Indices[64];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+
+    // First extract half of each vector. This gives better codegen than
+    // doing it in a single shuffle.
+    LHS = Builder.CreateShuffleVector(LHS, LHS,
+                                      makeArrayRef(Indices, NumElts / 2));
+    RHS = Builder.CreateShuffleVector(RHS, RHS,
+                                      makeArrayRef(Indices, NumElts / 2));
+    // Concat the vectors.
+    // NOTE: Operands are swapped to match the intrinsic definition.
+    Value *Res = Builder.CreateShuffleVector(RHS, LHS,
+                                             makeArrayRef(Indices, NumElts));
+    return Builder.CreateBitCast(Res, Ops[0]->getType());
+  }
+
   case X86::BI__builtin_ia32_vplzcntd_128_mask:
   case X86::BI__builtin_ia32_vplzcntd_256_mask:
   case X86::BI__builtin_ia32_vplzcntd_512_mask:
@@ -8324,6 +8974,43 @@
     return Builder.CreateExtractValue(Call, 1);
   }
 
+  case X86::BI__builtin_ia32_cmpps128_mask:
+  case X86::BI__builtin_ia32_cmpps256_mask:
+  case X86::BI__builtin_ia32_cmpps512_mask:
+  case X86::BI__builtin_ia32_cmppd128_mask:
+  case X86::BI__builtin_ia32_cmppd256_mask:
+  case X86::BI__builtin_ia32_cmppd512_mask: {
+    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+    Value *MaskIn = Ops[3];
+    Ops.erase(&Ops[3]);
+
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpps128_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
+      break;
+    case X86::BI__builtin_ia32_cmpps256_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
+      break;
+    case X86::BI__builtin_ia32_cmpps512_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
+      break;
+    case X86::BI__builtin_ia32_cmppd128_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
+      break;
+    case X86::BI__builtin_ia32_cmppd256_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
+      break;
+    case X86::BI__builtin_ia32_cmppd512_mask:
+      ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
+      break;
+    }
+
+    Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+    return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
+  }
+
   // SSE packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpeqpd:
@@ -8516,6 +9203,45 @@
     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
   case X86::BI_InterlockedIncrement64:
     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+  case X86::BI_InterlockedCompareExchange128: {
+    // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
+    // instead it takes pointers to 64bit ints for Destination and
+    // ComparandResult, and exchange is taken as two 64bit ints (high & low).
+    // The previous value is written to ComparandResult, and success is
+    // returned.
+
+    llvm::Type *Int128Ty = Builder.getInt128Ty();
+    llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
+
+    Value *Destination =
+        Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
+    Value *ExchangeHigh128 =
+        Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
+    Value *ExchangeLow128 =
+        Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
+    Address ComparandResult(
+        Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
+        getContext().toCharUnitsFromBits(128));
+
+    Value *Exchange = Builder.CreateOr(
+        Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
+        ExchangeLow128);
+
+    Value *Comparand = Builder.CreateLoad(ComparandResult);
+
+    AtomicCmpXchgInst *CXI =
+        Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+                                    AtomicOrdering::SequentiallyConsistent,
+                                    AtomicOrdering::SequentiallyConsistent);
+    CXI->setVolatile(true);
+
+    // Write the result back to the inout pointer.
+    Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
+
+    // Get the success boolean and zero extend it to i8.
+    Value *Success = Builder.CreateExtractValue(CXI, 1);
+    return Builder.CreateZExt(Success, ConvertType(E->getType()));
+  }
 
   case X86::BI_AddressOfReturnAddress: {
     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
@@ -9583,6 +10309,16 @@
     return Builder.CreateCall(FnALAF32, {Ptr, Val});
   }
 
+  case NVPTX::BI__nvvm_atom_add_gen_d: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    // atomicrmw only deals with integer arguments, so we need to use
+    // LLVM's nvvm_atomic_load_add_f64 intrinsic.
+    Value *FnALAF64 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
+    return Builder.CreateCall(FnALAF64, {Ptr, Val});
+  }
+
   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
     Value *Ptr = EmitScalarExpr(E->getArg(0));
     Value *Val = EmitScalarExpr(E->getArg(1));
@@ -9957,6 +10693,21 @@
 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
                                                    const CallExpr *E) {
   switch (BuiltinID) {
+  case WebAssembly::BI__builtin_wasm_mem_size: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *I = EmitScalarExpr(E->getArg(0));
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
+    return Builder.CreateCall(Callee, I);
+  }
+  case WebAssembly::BI__builtin_wasm_mem_grow: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *Args[] = {
+      EmitScalarExpr(E->getArg(0)),
+      EmitScalarExpr(E->getArg(1))
+    };
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
+    return Builder.CreateCall(Callee, Args);
+  }
   case WebAssembly::BI__builtin_wasm_current_memory: {
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
@@ -9982,3 +10733,58 @@
     return nullptr;
   }
 }
+
+Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
+  SmallVector<llvm::Value *, 4> Ops;
+  Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+  switch (BuiltinID) {
+  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
+  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
+    Address Dest = EmitPointerWithAlignment(E->getArg(2));
+    unsigned Size;
+    if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
+      Size = 512;
+      ID = Intrinsic::hexagon_V6_vaddcarry;
+    } else {
+      Size = 1024;
+      ID = Intrinsic::hexagon_V6_vaddcarry_128B;
+    }
+    Dest = Builder.CreateBitCast(Dest,
+        llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
+    LoadInst *QLd = Builder.CreateLoad(Dest);
+    Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
+    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+    llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
+    llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
+                                              Vprd->getType()->getPointerTo(0));
+    Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+    return Builder.CreateExtractValue(Result, 0);
+  }
+  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
+  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
+    Address Dest = EmitPointerWithAlignment(E->getArg(2));
+    unsigned Size;
+    if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
+      Size = 512;
+      ID = Intrinsic::hexagon_V6_vsubcarry;
+    } else {
+      Size = 1024;
+      ID = Intrinsic::hexagon_V6_vsubcarry_128B;
+    }
+    Dest = Builder.CreateBitCast(Dest,
+        llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
+    LoadInst *QLd = Builder.CreateLoad(Dest);
+    Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
+    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+    llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
+    llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
+                                              Vprd->getType()->getPointerTo(0));
+    Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+    return Builder.CreateExtractValue(Result, 0);
+  }
+  } // switch
+
+  return nullptr;
+}
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index d24ef0a..4272aef 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -41,10 +41,10 @@
   /// Keeps track of kernel launch stubs emitted in this module
   llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
   llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
-  /// Keeps track of variables containing handles of GPU binaries. Populated by
+  /// Keeps track of variable containing handle of GPU binary. Populated by
   /// ModuleCtorFunction() and used to create corresponding cleanup calls in
   /// ModuleDtorFunction()
-  llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
+  llvm::GlobalVariable *GpuBinaryHandle = nullptr;
 
   llvm::Constant *getSetupArgumentFn() const;
   llvm::Constant *getLaunchFn() const;
@@ -245,16 +245,14 @@
 /// Creates a global constructor function for the module:
 /// \code
 /// void __cuda_module_ctor(void*) {
-///     Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-///     __cuda_register_globals(Handle0);
-///     ...
-///     HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-///     __cuda_register_globals(HandleN);
+///     Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+///     __cuda_register_globals(Handle);
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
-  // No need to generate ctors/dtors if there are no GPU binaries.
-  if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+  // No need to generate ctors/dtors if there is no GPU binary.
+  std::string GpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
+  if (GpuBinaryFileName.empty())
     return nullptr;
 
   // void __cuda_register_globals(void* handle);
@@ -267,6 +265,18 @@
   llvm::StructType *FatbinWrapperTy =
       llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
 
+  // Register GPU binary with the CUDA runtime, store returned handle in a
+  // global variable and save a reference in GpuBinaryHandle to be cleaned up
+  // in destructor on exit. Then associate all known kernels with the GPU binary
+  // handle so CUDA runtime can figure out what to call on the GPU side.
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
+      llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
+  if (std::error_code EC = GpuBinaryOrErr.getError()) {
+    CGM.getDiags().Report(diag::err_cannot_open_file)
+        << GpuBinaryFileName << EC.message();
+    return nullptr;
+  }
+
   llvm::Function *ModuleCtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
       llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
@@ -276,79 +286,56 @@
 
   CtorBuilder.SetInsertPoint(CtorEntryBB);
 
-  // For each GPU binary, register it with the CUDA runtime and store returned
-  // handle in a global variable and save the handle in GpuBinaryHandles vector
-  // to be cleaned up in destructor on exit. Then associate all known kernels
-  // with the GPU binary handle so CUDA runtime can figure out what to call on
-  // the GPU side.
-  for (const std::string &GpuBinaryFileName :
-       CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
-    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
-        llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
-    if (std::error_code EC = GpuBinaryOrErr.getError()) {
-      CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
-                                                        << EC.message();
-      continue;
-    }
+  const char *FatbinConstantName =
+      CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+  // NVIDIA's cuobjdump looks for fatbins in this section.
+  const char *FatbinSectionName =
+      CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
 
-    const char *FatbinConstantName =
-        CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
-    // NVIDIA's cuobjdump looks for fatbins in this section.
-    const char *FatbinSectionName =
-        CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+  // Create initialized wrapper structure that points to the loaded GPU binary
+  ConstantInitBuilder Builder(CGM);
+  auto Values = Builder.beginStruct(FatbinWrapperTy);
+  // Fatbin wrapper magic.
+  Values.addInt(IntTy, 0x466243b1);
+  // Fatbin version.
+  Values.addInt(IntTy, 1);
+  // Data.
+  Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "",
+                                FatbinConstantName, 8));
+  // Unused in fatbin v1.
+  Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+  llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
+      "__cuda_fatbin_wrapper", CGM.getPointerAlign(),
+      /*constant*/ true);
+  FatbinWrapper->setSection(FatbinSectionName);
 
-    // Create initialized wrapper structure that points to the loaded GPU binary
-    ConstantInitBuilder Builder(CGM);
-    auto Values = Builder.beginStruct(FatbinWrapperTy);
-    // Fatbin wrapper magic.
-    Values.addInt(IntTy, 0x466243b1);
-    // Fatbin version.
-    Values.addInt(IntTy, 1);
-    // Data.
-    Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), 
-                                  "", FatbinConstantName, 8));
-    // Unused in fatbin v1.
-    Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
-    llvm::GlobalVariable *FatbinWrapper =
-      Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
-                                   CGM.getPointerAlign(),
-                                   /*constant*/ true);
-    FatbinWrapper->setSection(FatbinSectionName);
+  // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
+  llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+      RegisterFatbinFunc, CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+  GpuBinaryHandle = new llvm::GlobalVariable(
+      TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
+      llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+  CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
+                                 CGM.getPointerAlign());
 
-    // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
-    llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
-        RegisterFatbinFunc,
-        CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
-    llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
-        TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
-    CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
-                                   CGM.getPointerAlign());
-
-    // Call __cuda_register_globals(GpuBinaryHandle);
-    if (RegisterGlobalsFunc)
-      CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
-
-    // Save GpuBinaryHandle so we can unregister it in destructor.
-    GpuBinaryHandles.push_back(GpuBinaryHandle);
-  }
+  // Call __cuda_register_globals(GpuBinaryHandle);
+  if (RegisterGlobalsFunc)
+    CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
 
   CtorBuilder.CreateRetVoid();
   return ModuleCtorFunc;
 }
 
-/// Creates a global destructor function that unregisters all GPU code blobs
+/// Creates a global destructor function that unregisters the GPU code blob
 /// registered by constructor.
 /// \code
 /// void __cuda_module_dtor(void*) {
-///     __cudaUnregisterFatBinary(Handle0);
-///     ...
-///     __cudaUnregisterFatBinary(HandleN);
+///     __cudaUnregisterFatBinary(Handle);
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
-  // No need for destructor if we don't have handles to unregister.
-  if (GpuBinaryHandles.empty())
+  // No need for destructor if we don't have a handle to unregister.
+  if (!GpuBinaryHandle)
     return nullptr;
 
   // void __cudaUnregisterFatBinary(void ** handle);
@@ -364,11 +351,9 @@
   CGBuilderTy DtorBuilder(CGM, Context);
   DtorBuilder.SetInsertPoint(DtorEntryBB);
 
-  for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
-    auto HandleValue =
+  auto HandleValue =
       DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
-    DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
-  }
+  DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
 
   DtorBuilder.CreateRetVoid();
   return ModuleDtorFunc;
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 5ef4dc4..738c635 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -205,7 +205,7 @@
   }
 
   // Finally, set up the alias with its proper name and attributes.
-  setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
+  SetCommonAttributes(AliasDecl, Alias);
 
   return false;
 }
@@ -227,10 +227,9 @@
   }
 
   setFunctionLinkage(GD, Fn);
-  setFunctionDLLStorageClass(GD, Fn);
 
   CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo);
-  setFunctionDefinitionAttributes(MD, Fn);
+  setNonAliasAttributes(GD, Fn);
   SetLLVMFunctionAttributesForDefinition(MD, Fn);
   return Fn;
 }
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 0332586..a27c3e9 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -149,12 +149,15 @@
   }
 }
 
-void CGCXXABI::EmitThisParam(CodeGenFunction &CGF) {
+llvm::Value *CGCXXABI::loadIncomingCXXThis(CodeGenFunction &CGF) {
+  return CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)),
+                                "this");
+}
+
+void CGCXXABI::setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr) {
   /// Initialize the 'this' slot.
   assert(getThisDecl(CGF) && "no 'this' variable for function");
-  CGF.CXXABIThisValue
-    = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)),
-                             "this");
+  CGF.CXXABIThisValue = ThisPtr;
 }
 
 void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 7b912e3..65a4c3d 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -73,9 +73,10 @@
     return CGF.CXXStructorImplicitParamValue;
   }
 
-  /// Perform prolog initialization of the parameter variable suitable
-  /// for 'this' emitted by buildThisParam.
-  void EmitThisParam(CodeGenFunction &CGF);
+  /// Loads the incoming C++ this pointer as it was passed by the caller.
+  llvm::Value *loadIncomingCXXThis(CodeGenFunction &CGF);
+
+  void setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr);
 
   ASTContext &getContext() const { return CGM.getContext(); }
 
@@ -358,13 +359,6 @@
     return CharUnits::Zero();
   }
 
-  /// Perform ABI-specific "this" parameter adjustment in a virtual function
-  /// prologue.
-  virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
-      CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
-    return This;
-  }
-
   /// Emit the ABI-specific prolog for the function.
   virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0;
 
@@ -420,8 +414,7 @@
 
   /// Build a virtual function pointer in the ABI-specific way.
   virtual CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF,
-                                             GlobalDecl GD,
-                                             Address This,
+                                             GlobalDecl GD, Address This,
                                              llvm::Type *Ty,
                                              SourceLocation Loc) = 0;
 
@@ -440,6 +433,7 @@
   /// base tables.
   virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0;
 
+  virtual bool exportThunk() = 0;
   virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
                                GlobalDecl GD, bool ReturnAdjustment) = 0;
 
@@ -588,6 +582,13 @@
   /// Emit a single constructor/destructor with the given type from a C++
   /// constructor Decl.
   virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0;
+
+  /// Load a vtable from This, an object of polymorphic type RD, or from one of
+  /// its virtual bases if it does not have its own vtable. Returns the vtable
+  /// and the class from which the vtable was loaded.
+  virtual std::pair<llvm::Value *, const CXXRecordDecl *>
+  LoadVTablePtr(CodeGenFunction &CGF, Address This,
+                const CXXRecordDecl *RD) = 0;
 };
 
 // Create an instance of a C++ ABI class:
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index d022c99..ebba250 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -1234,8 +1234,8 @@
 
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
-  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
-  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.Int8PtrTy);
+  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
+  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
   CGF.Builder.CreateMemCpy(Casted, SrcCasted,
       llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
       false);
@@ -1316,8 +1316,8 @@
     // to that information.
     Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
     CGF.Builder.CreateStore(Src, Tmp);
-    Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
-    Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.Int8PtrTy);
+    Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
+    Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
     CGF.Builder.CreateMemCpy(DstCasted, Casted,
         llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
         false);
@@ -1479,7 +1479,8 @@
 /***/
 
 bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
-  return FI.getReturnInfo().isIndirect();
+  const auto &RI = FI.getReturnInfo();
+  return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet());
 }
 
 bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
@@ -1739,10 +1740,15 @@
         llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
 
     // TODO: Reciprocal estimate codegen options should apply to instructions?
-    std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
+    const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals;
     if (!Recips.empty())
       FuncAttrs.addAttribute("reciprocal-estimates",
-                             llvm::join(Recips.begin(), Recips.end(), ","));
+                             llvm::join(Recips, ","));
+
+    if (!CodeGenOpts.PreferVectorWidth.empty() &&
+        CodeGenOpts.PreferVectorWidth != "none")
+      FuncAttrs.addAttribute("prefer-vector-width",
+                             CodeGenOpts.PreferVectorWidth);
 
     if (CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("stackrealign");
@@ -1841,10 +1847,9 @@
     HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
     if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
       Optional<unsigned> NumElemsParam;
-      // alloc_size args are base-1, 0 means not present.
-      if (unsigned N = AllocSize->getNumElemsParam())
-        NumElemsParam = N - 1;
-      FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam() - 1,
+      if (AllocSize->numElemsParam().isValid())
+        NumElemsParam = AllocSize->numElemsParam().getLLVMIndex();
+      FuncAttrs.addAllocSizeAttr(AllocSize->elemSizeParam().getLLVMIndex(),
                                  NumElemsParam);
     }
   }
@@ -1855,72 +1860,63 @@
       !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
     FuncAttrs.addAttribute("split-stack");
 
-  if (!AttrOnCallSite) {
-    bool DisableTailCalls =
-        CodeGenOpts.DisableTailCalls ||
-        (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
-                        TargetDecl->hasAttr<AnyX86InterruptAttr>()));
-    FuncAttrs.addAttribute("disable-tail-calls",
-                           llvm::toStringRef(DisableTailCalls));
-
-    // Add target-cpu and target-features attributes to functions. If
-    // we have a decl for the function and it has a target attribute then
-    // parse that and add it to the feature set.
-    StringRef TargetCPU = getTarget().getTargetOpts().CPU;
-    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
-    if (FD && FD->hasAttr<TargetAttr>()) {
-      llvm::StringMap<bool> FeatureMap;
-      getFunctionFeatureMap(FeatureMap, FD);
-
-      // Produce the canonical string for this set of features.
-      std::vector<std::string> Features;
-      for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
-                                                 ie = FeatureMap.end();
-           it != ie; ++it)
-        Features.push_back((it->second ? "+" : "-") + it->first().str());
-
-      // Now add the target-cpu and target-features to the function.
-      // While we populated the feature map above, we still need to
-      // get and parse the target attribute so we can get the cpu for
-      // the function.
-      const auto *TD = FD->getAttr<TargetAttr>();
-      TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
-      if (ParsedAttr.Architecture != "")
-        TargetCPU = ParsedAttr.Architecture;
-      if (TargetCPU != "")
-        FuncAttrs.addAttribute("target-cpu", TargetCPU);
-      if (!Features.empty()) {
-        std::sort(Features.begin(), Features.end());
-        FuncAttrs.addAttribute(
-            "target-features",
-            llvm::join(Features.begin(), Features.end(), ","));
-      }
-    } else {
-      // Otherwise just add the existing target cpu and target features to the
-      // function.
-      std::vector<std::string> &Features = getTarget().getTargetOpts().Features;
-      if (TargetCPU != "")
-        FuncAttrs.addAttribute("target-cpu", TargetCPU);
-      if (!Features.empty()) {
-        std::sort(Features.begin(), Features.end());
-        FuncAttrs.addAttribute(
-            "target-features",
-            llvm::join(Features.begin(), Features.end(), ","));
+  // Add NonLazyBind attribute to function declarations when -fno-plt
+  // is used.
+  if (TargetDecl && CodeGenOpts.NoPLT) {
+    if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
+      if (!Fn->isDefined() && !AttrOnCallSite) {
+        FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind);
       }
     }
   }
 
+  if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+    if (getLangOpts().OpenCLVersion <= 120) {
+      // OpenCL v1.2 Work groups are always uniform
+      FuncAttrs.addAttribute("uniform-work-group-size", "true");
+    } else {
+      // OpenCL v2.0 Work groups may be whether uniform or not.
+      // '-cl-uniform-work-group-size' compile option gets a hint
+      // to the compiler that the global work-size be a multiple of
+      // the work-group size specified to clEnqueueNDRangeKernel
+      // (i.e. work groups are uniform).
+      FuncAttrs.addAttribute("uniform-work-group-size",
+                             llvm::toStringRef(CodeGenOpts.UniformWGSize));
+    }
+  }
+
+  if (!AttrOnCallSite) {
+    bool DisableTailCalls = false;
+
+    if (CodeGenOpts.DisableTailCalls)
+      DisableTailCalls = true;
+    else if (TargetDecl) {
+      if (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+          TargetDecl->hasAttr<AnyX86InterruptAttr>())
+        DisableTailCalls = true;
+      else if (CodeGenOpts.NoEscapingBlockTailCalls) {
+        if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl))
+          if (!BD->doesNotEscape())
+            DisableTailCalls = true;
+      }
+    }
+
+    FuncAttrs.addAttribute("disable-tail-calls",
+                           llvm::toStringRef(DisableTailCalls));
+    GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
+  }
+
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
 
   QualType RetTy = FI.getReturnType();
   const ABIArgInfo &RetAI = FI.getReturnInfo();
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
-    if (RetTy->hasSignedIntegerRepresentation())
+    if (RetAI.isSignExt())
       RetAttrs.addAttribute(llvm::Attribute::SExt);
-    else if (RetTy->hasUnsignedIntegerRepresentation())
+    else
       RetAttrs.addAttribute(llvm::Attribute::ZExt);
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case ABIArgInfo::Direct:
     if (RetAI.getInReg())
       RetAttrs.addAttribute(llvm::Attribute::InReg);
@@ -1997,15 +1993,11 @@
     // sense to do it here because parameters are so messed up.
     switch (AI.getKind()) {
     case ABIArgInfo::Extend:
-      if (ParamType->isSignedIntegerOrEnumerationType())
+      if (AI.isSignExt())
         Attrs.addAttribute(llvm::Attribute::SExt);
-      else if (ParamType->isUnsignedIntegerOrEnumerationType()) {
-        if (getTypes().getABIInfo().shouldSignExtUnsignedType(ParamType))
-          Attrs.addAttribute(llvm::Attribute::SExt);
-        else
-          Attrs.addAttribute(llvm::Attribute::ZExt);
-      }
-      // FALL THROUGH
+      else
+        Attrs.addAttribute(llvm::Attribute::ZExt);
+      LLVM_FALLTHROUGH;
     case ABIArgInfo::Direct:
       if (ArgNo == 0 && FI.isChainCall())
         Attrs.addAttribute(llvm::Attribute::Nest);
@@ -2246,11 +2238,16 @@
   for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
        i != e; ++i, ++info_it, ++ArgNo) {
     const VarDecl *Arg = *i;
-    QualType Ty = info_it->type;
     const ABIArgInfo &ArgI = info_it->info;
 
     bool isPromoted =
       isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
+    // We are converting from ABIArgInfo type to VarDecl type directly, unless
+    // the parameter is promoted. In this case we convert to
+    // CGFunctionInfo::ArgInfo type with subsequent argument demotion.
+    QualType Ty = isPromoted ? info_it->type : Arg->getType();
+    assert(hasScalarEvaluationKind(Ty) ==
+           hasScalarEvaluationKind(Arg->getType()));
 
     unsigned FirstIRArg, NumIRArgs;
     std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -2749,6 +2746,12 @@
 void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
                                          bool EmitRetDbgLoc,
                                          SourceLocation EndLoc) {
+  if (FI.isNoReturn()) {
+    // Noreturn functions don't return.
+    EmitUnreachable(EndLoc);
+    return;
+  }
+
   if (CurCodeDecl && CurCodeDecl->hasAttr<NakedAttr>()) {
     // Naked functions don't have epilogues.
     Builder.CreateUnreachable();
@@ -3128,7 +3131,6 @@
 
 static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
                                             const CallArgList &CallArgs) {
-  assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
   ArrayRef<CallArgList::CallArgCleanup> Cleanups =
     CallArgs.getCleanupsToDeactivate();
   // Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -3441,10 +3443,15 @@
   QualType Ty;
 
   void Emit(CodeGenFunction &CGF, Flags flags) override {
-    const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
-    assert(!Dtor->isTrivial());
-    CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
-                              /*Delegating=*/false, Addr);
+    QualType::DestructionKind DtorKind = Ty.isDestructedType();
+    if (DtorKind == QualType::DK_cxx_destructor) {
+      const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+      assert(!Dtor->isTrivial());
+      CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+                                /*Delegating=*/false, Addr);
+    } else {
+      CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
+    }
   }
 };
 
@@ -3485,8 +3492,7 @@
   // In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
   // However, we still have to push an EH-only cleanup in case we unwind before
   // we make it to the call.
-  if (HasAggregateEvalKind &&
-      CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+  if (HasAggregateEvalKind && getContext().isParamDestroyedInCallee(type)) {
     // If we're using inalloca, use the argument memory.  Otherwise, use a
     // temporary.
     AggValueSlot Slot;
@@ -3495,10 +3501,16 @@
     else
       Slot = CreateAggTemp(type, "agg.tmp");
 
-    const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
-    bool DestroyedInCallee =
-        RD && RD->hasNonTrivialDestructor() &&
-        CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+    bool DestroyedInCallee = true, NeedsEHCleanup = true;
+    if (const auto *RD = type->getAsCXXRecordDecl()) {
+      DestroyedInCallee =
+          RD && RD->hasNonTrivialDestructor() &&
+          (CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default ||
+           RD->hasTrivialABIOverride());
+    } else {
+      NeedsEHCleanup = needsEHCleanup(type.isDestructedType());
+    }
+
     if (DestroyedInCallee)
       Slot.setExternallyDestructed();
 
@@ -3506,7 +3518,7 @@
     RValue RV = Slot.asRValue();
     args.add(RV, type);
 
-    if (DestroyedInCallee) {
+    if (DestroyedInCallee && NeedsEHCleanup) {
       // Create a no-op GEP between the placeholder and the cleanup so we can
       // RAUW it successfully.  It also serves as a marker of the first
       // instruction where the cleanup is active.
@@ -3528,9 +3540,9 @@
     } else {
       // We can't represent a misaligned lvalue in the CallArgList, so copy
       // to an aligned temporary now.
-      Address tmp = CreateMemTemp(type);
-      EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile());
-      args.add(RValue::getAggregate(tmp), type);
+      LValue Dest = MakeAddrLValue(CreateMemTemp(type), type);
+      EmitAggregateCopy(Dest, L, type, L.isVolatile());
+      args.add(RValue::getAggregate(Dest.getAddress()), type);
     }
     return;
   }
@@ -3593,20 +3605,21 @@
 
 // Calls which may throw must have operand bundles indicating which funclet
 // they are nested within.
-static void
-getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad,
-                     SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
+SmallVector<llvm::OperandBundleDef, 1>
+CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) {
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
   // There is no need for a funclet operand bundle if we aren't inside a
   // funclet.
   if (!CurrentFuncletPad)
-    return;
+    return BundleList;
 
   // Skip intrinsics which cannot throw.
   auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts());
   if (CalleeFn && CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow())
-    return;
+    return BundleList;
 
   BundleList.emplace_back("funclet", CurrentFuncletPad);
+  return BundleList;
 }
 
 /// Emits a simple call (never an invoke) to the given runtime function.
@@ -3614,10 +3627,8 @@
 CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
                                  ArrayRef<llvm::Value*> args,
                                  const llvm::Twine &name) {
-  SmallVector<llvm::OperandBundleDef, 1> BundleList;
-  getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
-
-  llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name);
+  llvm::CallInst *call =
+      Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name);
   call->setCallingConv(getRuntimeCC());
   return call;
 }
@@ -3625,8 +3636,8 @@
 /// Emits a call or invoke to the given noreturn runtime function.
 void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
                                                ArrayRef<llvm::Value*> args) {
-  SmallVector<llvm::OperandBundleDef, 1> BundleList;
-  getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+  SmallVector<llvm::OperandBundleDef, 1> BundleList =
+      getBundlesForFunclet(callee);
 
   if (getInvokeDest()) {
     llvm::InvokeInst *invoke = 
@@ -3669,8 +3680,8 @@
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name) {
   llvm::BasicBlock *InvokeDest = getInvokeDest();
-  SmallVector<llvm::OperandBundleDef, 1> BundleList;
-  getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
+  SmallVector<llvm::OperandBundleDef, 1> BundleList =
+      getBundlesForFunclet(Callee);
 
   llvm::Instruction *Inst;
   if (!InvokeDest)
@@ -3709,10 +3720,11 @@
                                  const CGCallee &Callee,
                                  ReturnValueSlot ReturnValue,
                                  const CallArgList &CallArgs,
-                                 llvm::Instruction **callOrInvoke) {
+                                 llvm::Instruction **callOrInvoke,
+                                 SourceLocation Loc) {
   // FIXME: We no longer need the types from CallArgs; lift up and simplify.
 
-  assert(Callee.isOrdinary());
+  assert(Callee.isOrdinary() || Callee.isVirtual());
 
   // Handle struct-return functions by passing a pointer to the
   // location that we would like to return into.
@@ -3867,7 +3879,9 @@
           Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
                                      "byval-temp", false);
           IRCallArgs[FirstIRArg] = AI.getPointer();
-          EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
+          LValue Dest = MakeAddrLValue(AI, I->Ty);
+          LValue Src = MakeAddrLValue(Addr, I->Ty);
+          EmitAggregateCopy(Dest, Src, I->Ty, RV.isVolatileQualified());
         } else {
           // Skip the extra memcpy call.
           IRCallArgs[FirstIRArg] = Addr.getPointer();
@@ -4034,7 +4048,8 @@
     }
   }
 
-  llvm::Value *CalleePtr = Callee.getFunctionPointer();
+  const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
+  llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();
 
   // If we're using inalloca, set up that argument.
   if (ArgMemory.isValid()) {
@@ -4177,8 +4192,8 @@
   }
   llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
 
-  SmallVector<llvm::OperandBundleDef, 1> BundleList;
-  getBundlesForFunclet(CalleePtr, CurrentFuncletPad, BundleList);
+  SmallVector<llvm::OperandBundleDef, 1> BundleList =
+      getBundlesForFunclet(CalleePtr);
 
   // Emit the actual call/invoke instruction.
   llvm::CallSite CS;
@@ -4232,7 +4247,15 @@
       EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
                       SRetPtr.getPointer());
 
-    Builder.CreateUnreachable();
+    // Strip away the noreturn attribute to better diagnose unreachable UB.
+    if (SanOpts.has(SanitizerKind::Unreachable)) {
+      if (auto *F = CS.getCalledFunction())
+        F->removeFnAttr(llvm::Attribute::NoReturn);
+      CS.removeAttribute(llvm::AttributeList::FunctionIndex,
+                         llvm::Attribute::NoReturn);
+    }
+
+    EmitUnreachable(Loc);
     Builder.ClearInsertionPoint();
 
     // FIXME: For now, emit a dummy basic block because expr emitters in
@@ -4371,7 +4394,7 @@
                               OffsetValue);
     } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
       llvm::Value *ParamVal =
-          CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+          CallArgs[AA->paramIndex().getLLVMIndex()].RV.getScalarVal();
       EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
     }
   }
@@ -4379,6 +4402,17 @@
   return Ret;
 }
 
+CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
+  if (isVirtual()) {
+    const CallExpr *CE = getVirtualCallExpr();
+    return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
+        CGF, getVirtualMethodDecl(), getThisAddress(),
+        getFunctionType(), CE ? CE->getLocStart() : SourceLocation());
+  }
+
+  return *this;
+}
+
 /* VarArg handling */
 
 Address CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr) {
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 7e10407..495baf0 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -18,6 +18,7 @@
 #include "CGValue.h"
 #include "EHScopeStack.h"
 #include "clang/AST/CanonicalType.h"
+#include "clang/AST/GlobalDecl.h"
 #include "clang/AST/Type.h"
 #include "llvm/IR/Value.h"
 
@@ -68,8 +69,9 @@
       Invalid,
       Builtin,
       PseudoDestructor,
+      Virtual,
 
-      Last = PseudoDestructor
+      Last = Virtual
     };
 
     struct BuiltinInfoStorage {
@@ -79,12 +81,19 @@
     struct PseudoDestructorInfoStorage {
       const CXXPseudoDestructorExpr *Expr;
     };
+    struct VirtualInfoStorage {
+      const CallExpr *CE;
+      GlobalDecl MD;
+      Address Addr;
+      llvm::FunctionType *FTy;
+    };
 
     SpecialKind KindOrFunctionPointer;
     union {
       CGCalleeInfo AbstractInfo;
       BuiltinInfoStorage BuiltinInfo;
       PseudoDestructorInfoStorage PseudoDestructorInfo;
+      VirtualInfoStorage VirtualInfo;
     };
 
     explicit CGCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {}
@@ -127,6 +136,16 @@
       return CGCallee(abstractInfo, functionPtr);
     }
 
+    static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr,
+                               llvm::FunctionType *FTy) {
+      CGCallee result(SpecialKind::Virtual);
+      result.VirtualInfo.CE = CE;
+      result.VirtualInfo.MD = MD;
+      result.VirtualInfo.Addr = Addr;
+      result.VirtualInfo.FTy = FTy;
+      return result;
+    }
+
     bool isBuiltin() const {
       return KindOrFunctionPointer == SpecialKind::Builtin;
     }
@@ -150,7 +169,9 @@
     bool isOrdinary() const {
       return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last);
     }
-    const CGCalleeInfo &getAbstractInfo() const {
+    CGCalleeInfo getAbstractInfo() const {
+      if (isVirtual())
+        return VirtualInfo.MD.getDecl();
       assert(isOrdinary());
       return AbstractInfo;
     }
@@ -158,14 +179,37 @@
       assert(isOrdinary());
       return reinterpret_cast<llvm::Value*>(uintptr_t(KindOrFunctionPointer));
     }
-    llvm::FunctionType *getFunctionType() const {
-      return cast<llvm::FunctionType>(
-                    getFunctionPointer()->getType()->getPointerElementType());
-    }
     void setFunctionPointer(llvm::Value *functionPtr) {
       assert(isOrdinary());
       KindOrFunctionPointer = SpecialKind(uintptr_t(functionPtr));
     }
+
+    bool isVirtual() const {
+      return KindOrFunctionPointer == SpecialKind::Virtual;
+    }
+    const CallExpr *getVirtualCallExpr() const {
+      assert(isVirtual());
+      return VirtualInfo.CE;
+    }
+    GlobalDecl getVirtualMethodDecl() const {
+      assert(isVirtual());
+      return VirtualInfo.MD;
+    }
+    Address getThisAddress() const {
+      assert(isVirtual());
+      return VirtualInfo.Addr;
+    }
+
+    llvm::FunctionType *getFunctionType() const {
+      if (isVirtual())
+        return VirtualInfo.FTy;
+      return cast<llvm::FunctionType>(
+          getFunctionPointer()->getType()->getPointerElementType());
+    }
+
+    /// If this is a delayed callee computation of some sort, prepare
+    /// a concrete callee.
+    CGCallee prepareConcreteCallee(CodeGenFunction &CGF) const;
   };
 
   struct CallArg {
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 1cd59d1..56bd250 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -406,8 +406,8 @@
 
   // Apply the offset.
   llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy);
-  Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset),
-                            "sub.ptr");
+  Value = Builder.CreateInBoundsGEP(Value, Builder.CreateNeg(NonVirtualOffset),
+                                    "sub.ptr");
 
   // Just cast.
   Value = Builder.CreateBitCast(Value, DerivedPtrTy);
@@ -615,7 +615,14 @@
 
   llvm::Value *ThisPtr = CGF.LoadCXXThis();
   QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
-  LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+  LValue LHS;
+
+  // If a base constructor is being emitted, create an LValue that has the
+  // non-virtual alignment.
+  if (CGF.CurGD.getCtorType() == Ctor_Base)
+    LHS = CGF.MakeNaturalAlignPointeeAddrLValue(ThisPtr, RecordTy);
+  else
+    LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
 
   EmitLValueForAnyFieldInitialization(CGF, MemberInit, LHS);
 
@@ -640,8 +647,7 @@
       LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field);
 
       // Copy the aggregate.
-      CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType,
-                            LHS.isVolatileQualified());
+      CGF.EmitAggregateCopy(LHS, Src, FieldType, LHS.isVolatileQualified());
       // Ensure that we destroy the objects if an exception is thrown later in
       // the constructor.
       QualType::DestructionKind dtorKind = FieldType.isDestructedType();
@@ -2002,10 +2008,10 @@
     assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
 
     const Expr *Arg = E->getArg(0);
-    QualType SrcTy = Arg->getType();
-    Address Src = EmitLValue(Arg).getAddress();
+    LValue Src = EmitLValue(Arg);
     QualType DestTy = getContext().getTypeDeclType(D->getParent());
-    EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+    LValue Dest = MakeAddrLValue(This, DestTy);
+    EmitAggregateCopyCtor(Dest, Src);
     return;
   }
 
@@ -2072,8 +2078,10 @@
 
     QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
     Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
+    LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
     QualType DestTy = getContext().getTypeDeclType(ClassDecl);
-    EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+    LValue DestLVal = MakeAddrLValue(This, DestTy);
+    EmitAggregateCopyCtor(DestLVal, SrcLVal);
     return;
   }
 
@@ -2423,7 +2431,8 @@
   VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
 
   llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField);
-  CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAVTablePtrAccessInfo());
+  TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy);
+  CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
   if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       CGM.getCodeGenOpts().StrictVTablePointers)
     CGM.DecorateInstructionWithInvariantGroup(Store, Vptr.VTableClass);
@@ -2517,7 +2526,8 @@
                                            const CXXRecordDecl *RD) {
   Address VTablePtrSrc = Builder.CreateElementBitCast(This, VTableTy);
   llvm::Instruction *VTable = Builder.CreateLoad(VTablePtrSrc, "vtable");
-  CGM.DecorateInstructionWithTBAA(VTable, CGM.getTBAAVTablePtrAccessInfo());
+  TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTableTy);
+  CGM.DecorateInstructionWithTBAA(VTable, TBAAInfo);
 
   if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       CGM.getCodeGenOpts().StrictVTablePointers)
@@ -2625,8 +2635,9 @@
     EmitBlock(CheckBlock);
   }
 
-  llvm::Value *VTable =
-    GetVTablePtr(Address(Derived, getPointerAlign()), Int8PtrTy, ClassDecl);
+  llvm::Value *VTable;
+  std::tie(VTable, ClassDecl) = CGM.getCXXABI().LoadVTablePtr(
+      *this, Address(Derived, getPointerAlign()), ClassDecl);
 
   EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
 
@@ -2774,9 +2785,12 @@
   RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs);
 
   // If necessary, copy the returned value into the slot.
-  if (!resultType->isVoidType() && returnSlot.isNull())
+  if (!resultType->isVoidType() && returnSlot.isNull()) {
+    if (getLangOpts().ObjCAutoRefCount && resultType->isObjCRetainableType()) {
+      RV = RValue::get(EmitARCRetainAutoreleasedReturnValue(RV.getScalarVal()));
+    }
     EmitReturnOfRValue(RV, resultType);
-  else
+  } else
     EmitBranchThroughCleanup(ReturnBlock);
 }
 
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 22055b2..526def2 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -624,7 +624,7 @@
       si->eraseFromParent();
 
       // Destroy the load.
-      assert(condition->getOperand(0) == CGF.NormalCleanupDest);
+      assert(condition->getOperand(0) == CGF.NormalCleanupDest.getPointer());
       assert(condition->use_empty());
       condition->eraseFromParent();
     }
@@ -833,7 +833,7 @@
         if (NormalCleanupDestSlot->hasOneUse()) {
           NormalCleanupDestSlot->user_back()->eraseFromParent();
           NormalCleanupDestSlot->eraseFromParent();
-          NormalCleanupDest = nullptr;
+          NormalCleanupDest = Address::invalid();
         }
 
         llvm::BasicBlock *BranchAfter = Scope.getBranchAfterBlock(0);
@@ -1250,10 +1250,10 @@
 }
 
 Address CodeGenFunction::getNormalCleanupDestSlot() {
-  if (!NormalCleanupDest)
+  if (!NormalCleanupDest.isValid())
     NormalCleanupDest =
-      CreateTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
-  return Address(NormalCleanupDest, CharUnits::fromQuantity(4));
+      CreateDefaultAlignTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
+  return NormalCleanupDest;
 }
 
 /// Emits all the code to cause the given temporary to be cleaned up.
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 5842e7b..74c158c 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -315,7 +315,7 @@
       GetParamRef Visitor;
       Visitor.Visit(const_cast<Expr*>(InitExpr));
       assert(Visitor.Expr);
-      auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr);
+      DeclRefExpr *DREOrig = Visitor.Expr;
       auto *PD = DREOrig->getDecl();
 
       auto it = LocalDeclMap.find(PD);
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index a73e4a9..4cb9b2d 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -265,8 +265,7 @@
   // Add any template specialization args.
   if (Info) {
     const TemplateArgumentList *TArgs = Info->TemplateArguments;
-    TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(),
-                                                          getPrintingPolicy());
+    printTemplateArgumentList(OS, TArgs->asArray(), getPrintingPolicy());
   }
 
   // Copy this name on the side and use its reference.
@@ -362,18 +361,19 @@
   return StringRef();
 }
 
-llvm::DIFile::ChecksumKind
+Optional<llvm::DIFile::ChecksumKind>
 CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
   Checksum.clear();
 
-  if (!CGM.getCodeGenOpts().EmitCodeView)
-    return llvm::DIFile::CSK_None;
+  if (!CGM.getCodeGenOpts().EmitCodeView &&
+      CGM.getCodeGenOpts().DwarfVersion < 5)
+    return None;
 
   SourceManager &SM = CGM.getContext().getSourceManager();
   bool Invalid;
   llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid);
   if (Invalid)
-    return llvm::DIFile::CSK_None;
+    return None;
 
   llvm::MD5 Hash;
   llvm::MD5::MD5Result Result;
@@ -385,23 +385,30 @@
   return llvm::DIFile::CSK_MD5;
 }
 
+Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, FileID FID) {
+  if (!CGM.getCodeGenOpts().EmbedSource)
+    return None;
+
+  bool SourceInvalid = false;
+  StringRef Source = SM.getBufferData(FID, &SourceInvalid);
+
+  if (SourceInvalid)
+    return None;
+
+  return Source;
+}
+
 llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
   if (!Loc.isValid())
     // If Location is not valid then use main input file.
-    return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
-                               remapDIPath(TheCU->getDirectory()),
-                               TheCU->getFile()->getChecksumKind(),
-                               TheCU->getFile()->getChecksum());
+    return getOrCreateMainFile();
 
   SourceManager &SM = CGM.getContext().getSourceManager();
   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
 
   if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
     // If the location is not valid then use main input file.
-    return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
-                               remapDIPath(TheCU->getDirectory()),
-                               TheCU->getFile()->getChecksumKind(),
-                               TheCU->getFile()->getChecksum());
+    return getOrCreateMainFile();
 
   // Cache the results.
   const char *fname = PLoc.getFilename();
@@ -414,22 +421,27 @@
   }
 
   SmallString<32> Checksum;
-  llvm::DIFile::ChecksumKind CSKind =
+  Optional<llvm::DIFile::ChecksumKind> CSKind =
       computeChecksum(SM.getFileID(Loc), Checksum);
+  Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
+  if (CSKind)
+    CSInfo.emplace(*CSKind, Checksum);
 
   llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()),
                                         remapDIPath(getCurrentDirname()),
-                                        CSKind, Checksum);
+                                        CSInfo,
+                                        getSource(SM, SM.getFileID(Loc)));
 
   DIFileCache[fname].reset(F);
   return F;
 }
 
 llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
-  return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
-                             remapDIPath(TheCU->getDirectory()),
-                             TheCU->getFile()->getChecksumKind(),
-                             TheCU->getFile()->getChecksum());
+  return DBuilder.createFile(
+      remapDIPath(TheCU->getFilename()),
+      remapDIPath(TheCU->getDirectory()),
+      TheCU->getFile()->getChecksum(),
+      CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
 }
 
 std::string CGDebugInfo::remapDIPath(StringRef Path) const {
@@ -473,7 +485,8 @@
 
 void CGDebugInfo::CreateCompileUnit() {
   SmallString<32> Checksum;
-  llvm::DIFile::ChecksumKind CSKind = llvm::DIFile::CSK_None;
+  Optional<llvm::DIFile::ChecksumKind> CSKind;
+  Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
 
   // Should we be asking the SourceManager for the main file name, instead of
   // accepting it as an argument? This just causes the main file name to
@@ -552,13 +565,18 @@
     break;
   }
 
+  if (CSKind)
+    CSInfo.emplace(*CSKind, Checksum);
+
   // Create new compile unit.
   // FIXME - Eliminate TheCU.
   auto &CGOpts = CGM.getCodeGenOpts();
   TheCU = DBuilder.createCompileUnit(
       LangTag,
       DBuilder.createFile(remapDIPath(MainFileName),
-                          remapDIPath(getCurrentDirname()), CSKind, Checksum),
+                          remapDIPath(getCurrentDirname()),
+                          CSInfo,
+                          getSource(SM, SM.getMainFileID())),
       Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex,
       CGOpts.DwarfDebugFlags, RuntimeVers,
       CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
@@ -941,11 +959,8 @@
 
   SmallString<128> NS;
   llvm::raw_svector_ostream OS(NS);
-  Ty->getTemplateName().print(OS, getPrintingPolicy(),
-                              /*qualified*/ false);
-
-  TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, Ty->template_arguments(), getPrintingPolicy());
+  Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false);
+  printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());
 
   auto *AliasDecl = cast<TypeAliasTemplateDecl>(
       Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
@@ -1399,7 +1414,7 @@
       // C++ ABI does not include all virtual methods from non-primary bases in
       // the vtable for the most derived class. For example, if C inherits from
       // A and B, C's primary vftable will not include B's virtual methods.
-      if (Method->begin_overridden_methods() == Method->end_overridden_methods())
+      if (Method->size_overridden_methods() == 0)
         Flags |= llvm::DINode::FlagIntroducedVirtual;
 
       // The 'this' adjustment accounts for both the virtual and non-virtual
@@ -2111,6 +2126,7 @@
             : ~1ULL;
     llvm::DIBuilder DIB(CGM.getModule());
     DIB.createCompileUnit(TheCU->getSourceLanguage(),
+                          // TODO: Support "Source" from external AST providers?
                           DIB.createFile(Mod.getModuleName(), Mod.getPath()),
                           TheCU->getProducer(), true, StringRef(), 0,
                           Mod.getASTFile(), llvm::DICompileUnit::FullDebug,
@@ -2295,12 +2311,14 @@
                                       llvm::DIFile *Unit) {
   llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
   int64_t Count = Ty->getNumElements();
-  if (Count == 0)
-    // If number of elements are not known then this is an unbounded array.
-    // Use Count == -1 to express such arrays.
-    Count = -1;
 
-  llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count);
+  llvm::Metadata *Subscript;
+  QualType QTy(Ty, 0);
+  auto SizeExpr = SizeExprCache.find(QTy);
+  if (SizeExpr != SizeExprCache.end())
+    Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond());
+  else
+    Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1);
   llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2357,8 +2375,12 @@
       }
     }
 
-    // FIXME: Verify this is right for VLAs.
-    Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
+    auto SizeNode = SizeExprCache.find(EltTy);
+    if (SizeNode != SizeExprCache.end())
+      Subscripts.push_back(
+          DBuilder.getOrCreateSubrange(0, SizeNode->getSecond()));
+    else
+      Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
     EltTy = Ty->getElementType();
   }
 
@@ -2489,9 +2511,12 @@
   // Create elements for each enumerator.
   SmallVector<llvm::Metadata *, 16> Enumerators;
   ED = ED->getDefinition();
+  bool IsSigned = ED->getIntegerType()->isSignedIntegerType();
   for (const auto *Enum : ED->enumerators()) {
-    Enumerators.push_back(DBuilder.createEnumerator(
-        Enum->getName(), Enum->getInitVal().getSExtValue()));
+    const auto &InitVal = Enum->getInitVal();
+    auto Value = IsSigned ? InitVal.getSExtValue() : InitVal.getZExtValue();
+    Enumerators.push_back(
+        DBuilder.createEnumerator(Enum->getName(), Value, !IsSigned));
   }
 
   // Return a CompositeType for the enum itself.
@@ -2500,11 +2525,10 @@
   llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
   unsigned Line = getLineNumber(ED->getLocation());
   llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
-  llvm::DIType *ClassTy =
-      ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr;
+  llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
   return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
                                         Line, Size, Align, EltArray, ClassTy,
-                                        FullName);
+                                        FullName, ED->isFixed());
 }
 
 llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -2657,7 +2681,6 @@
     // file where the type's definition is located, so it might be
     // best to make this behavior a command line or debugger tuning
     // option.
-    FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
     if (Module *M = D->getOwningModule()) {
       // This is a (sub-)module.
       auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
@@ -2808,9 +2831,18 @@
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
+  // Explicitly record the calling convention for C++ records.
+  auto Flags = llvm::DINode::FlagZero;
+  if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect)
+      Flags |= llvm::DINode::FlagTypePassByReference;
+    else
+      Flags |= llvm::DINode::FlagTypePassByValue;
+  }
+
   llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
       getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align,
-      llvm::DINode::FlagZero, FullName);
+      Flags, FullName);
 
   // Elements of composite types usually have back to the type, creating
   // uniquing cycles.  Distinct nodes are more efficient.
@@ -3004,7 +3036,7 @@
       !FD->isExternallyVisible(),
       /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(FD));
-  const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
+  const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
   FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
                                  std::make_tuple(CanonDecl),
                                  std::make_tuple(SP));
@@ -3218,7 +3250,7 @@
   if (Name.startswith("\01"))
     Name = Name.substr(1);
 
-  if (!HasDecl || D->isImplicit()) {
+  if (!HasDecl || D->isImplicit() || D->hasAttr<ArtificialAttr>()) {
     Flags |= llvm::DINode::FlagArtificial;
     // Artificial functions should not silently reuse CurLoc.
     CurLoc = SourceLocation();
@@ -3468,13 +3500,14 @@
                                    nullptr, Elements);
 }
 
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
-                              llvm::Optional<unsigned> ArgNo,
-                              CGBuilderTy &Builder) {
+llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
+                                                llvm::Value *Storage,
+                                                llvm::Optional<unsigned> ArgNo,
+                                                CGBuilderTy &Builder) {
   assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
   if (VD->hasAttr<NoDebugAttr>())
-    return;
+    return nullptr;
 
   bool Unwritten =
       VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
@@ -3492,7 +3525,7 @@
   // If there is no debug info for this type then do not emit debug info
   // for this variable.
   if (!Ty)
-    return;
+    return nullptr;
 
   // Get location information.
   unsigned Line = 0;
@@ -3543,7 +3576,7 @@
   } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
     // If VD is an anonymous union then Storage represents value for
     // all union fields.
-    const auto *RD = cast<RecordDecl>(RT->getDecl());
+    const RecordDecl *RD = RT->getDecl();
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
       // GDB has trouble finding local variables in anonymous unions, so we emit
       // artifical local variables for each of the members.
@@ -3588,13 +3621,15 @@
   DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
                          llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
                          Builder.GetInsertBlock());
+
+  return D;
 }
 
-void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
-                                            llvm::Value *Storage,
-                                            CGBuilderTy &Builder) {
+llvm::DILocalVariable *
+CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
+                                       CGBuilderTy &Builder) {
   assert(DebugKind >= codegenoptions::LimitedDebugInfo);
-  EmitDeclare(VD, Storage, llvm::None, Builder);
+  return EmitDeclare(VD, Storage, llvm::None, Builder);
 }
 
 llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
@@ -3694,9 +3729,9 @@
 }
 
 void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
-                                                       llvm::Value *Arg,
+                                                       StringRef Name,
                                                        unsigned ArgNo,
-                                                       llvm::Value *LocalAddr,
+                                                       llvm::AllocaInst *Alloca,
                                                        CGBuilderTy &Builder) {
   assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   ASTContext &C = CGM.getContext();
@@ -3828,19 +3863,11 @@
 
   // Create the descriptor for the parameter.
   auto *debugVar = DBuilder.createParameterVariable(
-      scope, Arg->getName(), ArgNo, tunit, line, type,
+      scope, Name, ArgNo, tunit, line, type,
       CGM.getLangOpts().Optimize, flags);
 
-  if (LocalAddr) {
-    // Insert an llvm.dbg.value into the current block.
-    DBuilder.insertDbgValueIntrinsic(
-        LocalAddr, debugVar, DBuilder.createExpression(),
-        llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
-        Builder.GetInsertBlock());
-  }
-
   // Insert an llvm.dbg.declare into the current block.
-  DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(),
+  DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(),
                          llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
                          Builder.GetInsertBlock());
 }
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 36de231..459d7e0 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -19,6 +19,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/DenseMap.h"
@@ -81,6 +82,10 @@
 
   llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap;
 
+  /// Cache that maps VLA types to size expressions for that type,
+  /// represented by instantiated Metadata nodes.
+  llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache;
+
   struct ObjCInterfaceCacheEntry {
     const ObjCInterfaceType *Type;
     llvm::DIType *Decl;
@@ -309,6 +314,11 @@
 
   void finalize();
 
+  /// Register VLA size expression debug node with the qualified type.
+  void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
+    SizeExprCache[Ty] = SizeExpr;
+  }
+
   /// Module debugging: Support for building PCMs.
   /// @{
   /// Set the main CU's DwoId field to \p Signature.
@@ -379,8 +389,11 @@
 
   /// Emit call to \c llvm.dbg.declare for an automatic variable
   /// declaration.
-  void EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI,
-                                 CGBuilderTy &Builder);
+  /// Returns a pointer to the DILocalVariable associated with the
+  /// llvm.dbg.declare, or nullptr otherwise.
+  llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl,
+                                                   llvm::Value *AI,
+                                                   CGBuilderTy &Builder);
 
   /// Emit call to \c llvm.dbg.declare for an imported variable
   /// declaration in a block.
@@ -398,8 +411,8 @@
   /// Emit call to \c llvm.dbg.declare for the block-literal argument
   /// to a block invocation function.
   void EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
-                                            llvm::Value *Arg, unsigned ArgNo,
-                                            llvm::Value *LocalAddr,
+                                            StringRef Name, unsigned ArgNo,
+                                            llvm::AllocaInst *LocalAddr,
                                             CGBuilderTy &Builder);
 
   /// Emit information about a global variable.
@@ -451,10 +464,14 @@
   llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent,
                                          SourceLocation LineLoc,
                                          SourceLocation FileLoc);
+
 private:
   /// Emit call to llvm.dbg.declare for a variable declaration.
-  void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
-                   llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder);
+  /// Returns a pointer to the DILocalVariable associated with the
+  /// llvm.dbg.declare, or nullptr otherwise.
+  llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI,
+                                     llvm::Optional<unsigned> ArgNo,
+                                     CGBuilderTy &Builder);
 
   /// Build up structure info for the byref.  See \a BuildByRefType.
   llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
@@ -482,8 +499,11 @@
   std::string remapDIPath(StringRef) const;
 
   /// Compute the file checksum debug info for input file ID.
-  llvm::DIFile::ChecksumKind computeChecksum(FileID FID,
-                                             SmallString<32> &Checksum) const;
+  Optional<llvm::DIFile::ChecksumKind>
+  computeChecksum(FileID FID, SmallString<32> &Checksum) const;
+
+  /// Get the source of the given file ID.
+  Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
 
   /// Get the file debug info descriptor for the input location.
   llvm::DIFile *getOrCreateFile(SourceLocation Loc);
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 707f8a5..52294c2 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -162,6 +162,10 @@
   // needs to be emitted like a static variable, e.g. a function-scope
   // variable in constant address space in OpenCL.
   if (D.getStorageDuration() != SD_Automatic) {
+    // Static sampler variables translated to function calls.
+    if (D.getType()->isSamplerT())
+      return;
+
     llvm::GlobalValue::LinkageTypes Linkage =
         CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
 
@@ -236,7 +240,6 @@
       getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
       nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
   GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
-  setGlobalVisibility(GV, &D);
 
   if (supportsCOMDAT() && GV->isWeakForLinker())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -244,12 +247,7 @@
   if (D.getTLSKind())
     setTLSMode(GV, D);
 
-  if (D.isExternallyVisible()) {
-    if (D.hasAttr<DLLImportAttr>())
-      GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
-    else if (D.hasAttr<DLLExportAttr>())
-      GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
-  }
+  setGVProperties(GV, &D);
 
   // Make sure the result is of the correct type.
   LangAS ExpectedAS = Ty.getAddressSpace();
@@ -340,6 +338,7 @@
                                   OldGV->getThreadLocalMode(),
                            CGM.getContext().getTargetAddressSpace(D.getType()));
     GV->setVisibility(OldGV->getVisibility());
+    GV->setDSOLocal(OldGV->isDSOLocal());
     GV->setComdat(OldGV->getComdat());
 
     // Steal the name of the old global
@@ -951,6 +950,59 @@
   C->setDoesNotThrow();
 }
 
+void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
+    CGDebugInfo *DI, const VarDecl &D, bool EmitDebugInfo) {
+  // For each dimension stores its QualType and corresponding
+  // size-expression Value.
+  SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+
+  // Break down the array into individual dimensions.
+  QualType Type1D = D.getType();
+  while (getContext().getAsVariableArrayType(Type1D)) {
+    auto VlaSize = getVLAElements1D(Type1D);
+    if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+      Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
+    else {
+      auto SizeExprAddr = CreateDefaultAlignTempAlloca(
+          VlaSize.NumElts->getType(), "__vla_expr");
+      Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
+      Dimensions.emplace_back(SizeExprAddr.getPointer(),
+                              Type1D.getUnqualifiedType());
+    }
+    Type1D = VlaSize.Type;
+  }
+
+  if (!EmitDebugInfo)
+    return;
+
+  // Register each dimension's size-expression with a DILocalVariable,
+  // so that it can be used by CGDebugInfo when instantiating a DISubrange
+  // to describe this array.
+  for (auto &VlaSize : Dimensions) {
+    llvm::Metadata *MD;
+    if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
+      MD = llvm::ConstantAsMetadata::get(C);
+    else {
+      // Create an artificial VarDecl to generate debug info for.
+      IdentifierInfo &NameIdent = getContext().Idents.getOwn(
+          cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+      auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
+      auto QT = getContext().getIntTypeForBitwidth(
+          VlaExprTy->getScalarSizeInBits(), false);
+      auto *ArtificialDecl = VarDecl::Create(
+          getContext(), const_cast<DeclContext *>(D.getDeclContext()),
+          D.getLocation(), D.getLocation(), &NameIdent, QT,
+          getContext().CreateTypeSourceInfo(QT), SC_Auto);
+      ArtificialDecl->setImplicit();
+
+      MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts,
+                                         Builder);
+    }
+    assert(MD && "No Size expression debug node created");
+    DI->registerVLASizeExpression(VlaSize.Type, MD);
+  }
+}
+
 /// EmitAutoVarAlloca - Emit the alloca and debug information for a
 /// local variable.  Does not emit initialization or destruction.
 CodeGenFunction::AutoVarEmission
@@ -971,6 +1023,10 @@
   if (Ty->isVariablyModifiedType())
     EmitVariablyModifiedType(Ty);
 
+  auto *DI = getDebugInfo();
+  bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >=
+                                 codegenoptions::LimitedDebugInfo;
+
   Address address = Address::invalid();
   if (Ty->isConstantSizeType()) {
     bool NRVO = getLangOpts().ElideConstructors &&
@@ -1104,28 +1160,26 @@
       pushStackRestore(NormalCleanup, Stack);
     }
 
-    llvm::Value *elementCount;
-    QualType elementType;
-    std::tie(elementCount, elementType) = getVLASize(Ty);
-
-    llvm::Type *llvmTy = ConvertTypeForMem(elementType);
+    auto VlaSize = getVLASize(Ty);
+    llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type);
 
     // Allocate memory for the array.
-    address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
+    address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts);
+
+    // If we have debug info enabled, properly describe the VLA dimensions for
+    // this type by registering the vla size expression for each of the
+    // dimensions.
+    EmitAndRegisterVariableArrayDimensions(DI, D, EmitDebugInfo);
   }
 
   setAddrOfLocalVar(&D, address);
   emission.Addr = address;
 
   // Emit debug info for local var declaration.
-  if (HaveInsertPoint())
-    if (CGDebugInfo *DI = getDebugInfo()) {
-      if (CGM.getCodeGenOpts().getDebugInfo() >=
-          codegenoptions::LimitedDebugInfo) {
-        DI->setLocation(D.getLocation());
-        DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
-      }
-    }
+  if (EmitDebugInfo && HaveInsertPoint()) {
+    DI->setLocation(D.getLocation());
+    (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
+  }
 
   if (D.hasAttr<AnnotateAttr>())
     EmitVarAnnotations(&D, address.getPointer());
@@ -1228,6 +1282,19 @@
   if (emission.IsByRef)
     emitByrefStructureInit(emission);
 
+  // Initialize the variable here if it doesn't have a initializer and it is a
+  // C struct that is non-trivial to initialize or an array containing such a
+  // struct.
+  if (!Init &&
+      type.isNonTrivialToPrimitiveDefaultInitialize() ==
+          QualType::PDIK_Struct) {
+    LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
+    if (emission.IsByRef)
+      drillIntoBlockVariable(*this, Dst, &D);
+    defaultInitNonTrivialCStructVar(Dst);
+    return;
+  }
+
   if (isTrivialInitializer(Init))
     return;
 
@@ -1266,7 +1333,7 @@
     llvm::ConstantInt::get(IntPtrTy,
                            getContext().getTypeSizeInChars(type).getQuantity());
 
-  llvm::Type *BP = Int8PtrTy;
+  llvm::Type *BP = AllocaInt8PtrTy;
   if (Loc.getType() != BP)
     Loc = Builder.CreateBitCast(Loc, BP);
 
@@ -1278,7 +1345,8 @@
                          isVolatile);
     // Zero and undef don't require a stores.
     if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
-      Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo());
+      Loc = Builder.CreateBitCast(Loc,
+        constant->getType()->getPointerTo(Loc.getAddressSpace()));
       emitStoresForInitAfterMemset(constant, Loc.getPointer(),
                                    isVolatile, Builder);
     }
@@ -1402,6 +1470,10 @@
 
   case QualType::DK_objc_weak_lifetime:
     break;
+
+  case QualType::DK_nontrivial_c_struct:
+    destroyer = CodeGenFunction::destroyNonTrivialCStruct;
+    break;
   }
 
   // If we haven't chosen a more specific destroyer, use the default.
@@ -1463,6 +1535,8 @@
     return destroyARCStrongPrecise;
   case QualType::DK_objc_weak_lifetime:
     return destroyARCWeak;
+  case QualType::DK_nontrivial_c_struct:
+    return destroyNonTrivialCStruct;
   }
   llvm_unreachable("Unknown DestructionKind");
 }
@@ -1787,29 +1861,14 @@
   // Use better IR generation for certain implicit parameters.
   if (auto IPD = dyn_cast<ImplicitParamDecl>(&D)) {
     // The only implicit argument a block has is its literal.
-    // We assume this is always passed directly.
+    // This may be passed as an inalloca'ed value on Windows x86.
     if (BlockInfo) {
-      setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue());
+      llvm::Value *V = Arg.isIndirect()
+                           ? Builder.CreateLoad(Arg.getIndirectAddress())
+                           : Arg.getDirectValue();
+      setBlockContextParameter(IPD, ArgNo, V);
       return;
     }
-
-    // Apply any prologue 'this' adjustments required by the ABI. Be careful to
-    // handle the case where 'this' is passed indirectly as part of an inalloca
-    // struct.
-    if (const CXXMethodDecl *MD =
-            dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl)) {
-      if (MD->isVirtual() && IPD == CXXABIThisDecl) {
-        llvm::Value *This = Arg.isIndirect()
-                                ? Builder.CreateLoad(Arg.getIndirectAddress())
-                                : Arg.getDirectValue();
-        This = CGM.getCXXABI().adjustThisParameterInVirtualFunctionPrologue(
-            *this, CurGD, This);
-        if (Arg.isIndirect())
-          Builder.CreateStore(This, Arg.getIndirectAddress());
-        else
-          Arg = ParamValue::forDirect(This);
-      }
-    }
   }
 
   Address DeclPtr = Address::invalid();
@@ -1828,10 +1887,13 @@
     // Don't push a cleanup in a thunk for a method that will also emit a
     // cleanup.
     if (!IsScalar && !CurFuncIsThunk &&
-        getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
-      const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
-      if (RD && RD->hasNonTrivialDestructor())
-        pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
+        getContext().isParamDestroyedInCallee(Ty)) {
+      if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) {
+        assert((DtorKind == QualType::DK_cxx_destructor ||
+                DtorKind == QualType::DK_nontrivial_c_struct) &&
+               "unexpected destructor type");
+        pushDestroy(DtorKind, DeclPtr, Ty);
+      }
     }
   } else {
     // Otherwise, create a temporary to hold the value.
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index e3437eb..4050499 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -79,6 +79,7 @@
 
   case QualType::DK_objc_strong_lifetime:
   case QualType::DK_objc_weak_lifetime:
+  case QualType::DK_nontrivial_c_struct:
     // We don't care about releasing objects during process teardown.
     assert(!D.getTLSKind() && "should have rejected this");
     return;
@@ -173,10 +174,12 @@
   ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D));
 
   if (!T->isReferenceType()) {
-    if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
+    if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+        D.hasAttr<OMPThreadPrivateDeclAttr>()) {
       (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
           &D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
           PerformInit, this);
+    }
     if (PerformInit)
       EmitDeclInit(*this, D, DeclAddr);
     if (CGM.isTypeConstant(D.getType(), true))
@@ -309,7 +312,7 @@
       Fn->setSection(Section);
   }
 
-  SetInternalFunctionAttributes(nullptr, Fn, FI);
+  SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
   Fn->setCallingConv(getRuntimeCC());
 
@@ -324,6 +327,10 @@
       !isInSanitizerBlacklist(SanitizerKind::KernelAddress, Fn, Loc))
     Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
 
+  if (getLangOpts().Sanitize.has(SanitizerKind::HWAddress) &&
+      !isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc))
+    Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+
   if (getLangOpts().Sanitize.has(SanitizerKind::Thread) &&
       !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc))
     Fn->addFnAttr(llvm::Attribute::SanitizeThread);
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 1bff649..1978d27 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -112,17 +112,11 @@
 const EHPersonality
 EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr };
 
-/// On Win64, use libgcc's SEH personality function. We fall back to dwarf on
-/// other platforms, unless the user asked for SjLj exceptions.
-static bool useLibGCCSEHPersonality(const llvm::Triple &T) {
-  return T.isOSWindows() && T.getArch() == llvm::Triple::x86_64;
-}
-
 static const EHPersonality &getCPersonality(const llvm::Triple &T,
                                             const LangOptions &L) {
   if (L.SjLjExceptions)
     return EHPersonality::GNU_C_SJLJ;
-  else if (useLibGCCSEHPersonality(T))
+  if (L.SEHExceptions)
     return EHPersonality::GNU_C_SEH;
   return EHPersonality::GNU_C;
 }
@@ -139,12 +133,12 @@
   case ObjCRuntime::GNUstep:
     if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7))
       return EHPersonality::GNUstep_ObjC;
-    // fallthrough
+    LLVM_FALLTHROUGH;
   case ObjCRuntime::GCC:
   case ObjCRuntime::ObjFW:
     if (L.SjLjExceptions)
       return EHPersonality::GNU_ObjC_SJLJ;
-    else if (useLibGCCSEHPersonality(T))
+    if (L.SEHExceptions)
       return EHPersonality::GNU_ObjC_SEH;
     return EHPersonality::GNU_ObjC;
   }
@@ -155,7 +149,7 @@
                                               const LangOptions &L) {
   if (L.SjLjExceptions)
     return EHPersonality::GNU_CPlusPlus_SJLJ;
-  else if (useLibGCCSEHPersonality(T))
+  if (L.SEHExceptions)
     return EHPersonality::GNU_CPlusPlus_SEH;
   return EHPersonality::GNU_CPlusPlus;
 }
@@ -165,26 +159,27 @@
 static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
                                                  const LangOptions &L) {
   switch (L.ObjCRuntime.getKind()) {
+  // In the fragile ABI, just use C++ exception handling and hope
+  // they're not doing crazy exception mixing.
+  case ObjCRuntime::FragileMacOSX:
+    return getCXXPersonality(T, L);
+
   // The ObjC personality defers to the C++ personality for non-ObjC
   // handlers.  Unlike the C++ case, we use the same personality
   // function on targets using (backend-driven) SJLJ EH.
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
   case ObjCRuntime::WatchOS:
-    return EHPersonality::NeXT_ObjC;
+    return getObjCPersonality(T, L);
 
-  // In the fragile ABI, just use C++ exception handling and hope
-  // they're not doing crazy exception mixing.
-  case ObjCRuntime::FragileMacOSX:
-    return getCXXPersonality(T, L);
+  case ObjCRuntime::GNUstep:
+    return EHPersonality::GNU_ObjCXX;
 
   // The GCC runtime's personality function inherently doesn't support
-  // mixed EH.  Use the C++ personality just to avoid returning null.
+  // mixed EH.  Use the ObjC personality just to avoid returning null.
   case ObjCRuntime::GCC:
   case ObjCRuntime::ObjFW:
     return getObjCPersonality(T, L);
-  case ObjCRuntime::GNUstep:
-    return EHPersonality::GNU_ObjCXX;
   }
   llvm_unreachable("bad runtime kind");
 }
@@ -210,8 +205,9 @@
   if (T.isWindowsMSVCEnvironment() && !L.ObjC1) {
     if (L.SjLjExceptions)
       return EHPersonality::GNU_CPlusPlus_SJLJ;
-    else
-      return EHPersonality::MSVC_CxxFrameHandler3;
+    if (L.DWARFExceptions)
+      return EHPersonality::GNU_CPlusPlus;
+    return EHPersonality::MSVC_CxxFrameHandler3;
   }
 
   if (L.CPlusPlus && L.ObjC1)
@@ -650,7 +646,7 @@
     return DispatchBlock;
 
   if (EHS.getKind() == EHScope::Terminate)
-    DispatchBlock = getTerminateHandler();
+    DispatchBlock = getTerminateFunclet();
   else
     DispatchBlock = createBasicBlock();
   CGBuilderTy Builder(*this, DispatchBlock);
@@ -1338,24 +1334,15 @@
   if (TerminateHandler)
     return TerminateHandler;
 
-  CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
-
   // Set up the terminate handler.  This block is inserted at the very
   // end of the function by FinishFunction.
   TerminateHandler = createBasicBlock("terminate.handler");
+  CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
   Builder.SetInsertPoint(TerminateHandler);
+
   llvm::Value *Exn = nullptr;
-  SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
-      CurrentFuncletPad);
-  if (EHPersonality::get(*this).usesFuncletPads()) {
-    llvm::Value *ParentPad = CurrentFuncletPad;
-    if (!ParentPad)
-      ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
-    CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
-  } else {
-    if (getLangOpts().CPlusPlus)
-      Exn = getExceptionFromSlot();
-  }
+  if (getLangOpts().CPlusPlus)
+    Exn = getExceptionFromSlot();
   llvm::CallInst *terminateCall =
       CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
   terminateCall->setDoesNotReturn();
@@ -1367,6 +1354,42 @@
   return TerminateHandler;
 }
 
+llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() {
+  assert(EHPersonality::get(*this).usesFuncletPads() &&
+         "use getTerminateLandingPad for non-funclet EH");
+
+  llvm::BasicBlock *&TerminateFunclet = TerminateFunclets[CurrentFuncletPad];
+  if (TerminateFunclet)
+    return TerminateFunclet;
+
+  CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
+
+  // Set up the terminate handler.  This block is inserted at the very
+  // end of the function by FinishFunction.
+  TerminateFunclet = createBasicBlock("terminate.handler");
+  Builder.SetInsertPoint(TerminateFunclet);
+
+  // Create the cleanuppad using the current parent pad as its token. Use 'none'
+  // if this is a top-level terminate scope, which is the common case.
+  SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+      CurrentFuncletPad);
+  llvm::Value *ParentPad = CurrentFuncletPad;
+  if (!ParentPad)
+    ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
+  CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
+
+  // Emit the __std_terminate call.
+  llvm::CallInst *terminateCall =
+      CGM.getCXXABI().emitTerminateForUnexpectedException(*this, nullptr);
+  terminateCall->setDoesNotReturn();
+  Builder.CreateUnreachable();
+
+  // Restore the saved insertion state.
+  Builder.restoreIP(SavedIP);
+
+  return TerminateFunclet;
+}
+
 llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
   if (EHResumeBlock) return EHResumeBlock;
 
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 158baf7..d8fc270 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -74,12 +74,15 @@
   // cast alloca to the default address space when necessary.
   if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
     auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
-    auto CurIP = Builder.saveIP();
-    Builder.SetInsertPoint(AllocaInsertPt);
+    llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
+    // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
+    // otherwise alloca is inserted at the current insertion point of the
+    // builder.
+    if (!ArraySize)
+      Builder.SetInsertPoint(AllocaInsertPt);
     V = getTargetHooks().performAddrSpaceCast(
         *this, V, getASTAllocaAddressSpace(), LangAS::Default,
         Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
-    Builder.restoreIP(CurIP);
   }
 
   return Address(V, Align);
@@ -567,7 +570,7 @@
 
 bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) {
   return TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
-         TCK == TCK_UpcastToVirtualBase;
+         TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation;
 }
 
 bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) {
@@ -575,7 +578,7 @@
   return (RD && RD->hasDefinition() && RD->isDynamicClass()) &&
          (TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||
           TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference ||
-          TCK == TCK_UpcastToVirtualBase);
+          TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation);
 }
 
 bool CodeGenFunction::sanitizePerformTypeCheck() const {
@@ -811,6 +814,45 @@
   return false;
 }
 
+llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E,
+                                                   QualType EltTy) {
+  ASTContext &C = getContext();
+  uint64_t EltSize = C.getTypeSizeInChars(EltTy).getQuantity();
+  if (!EltSize)
+    return nullptr;
+
+  auto *ArrayDeclRef = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts());
+  if (!ArrayDeclRef)
+    return nullptr;
+
+  auto *ParamDecl = dyn_cast<ParmVarDecl>(ArrayDeclRef->getDecl());
+  if (!ParamDecl)
+    return nullptr;
+
+  auto *POSAttr = ParamDecl->getAttr<PassObjectSizeAttr>();
+  if (!POSAttr)
+    return nullptr;
+
+  // Don't load the size if it's a lower bound.
+  int POSType = POSAttr->getType();
+  if (POSType != 0 && POSType != 1)
+    return nullptr;
+
+  // Find the implicit size parameter.
+  auto PassedSizeIt = SizeArguments.find(ParamDecl);
+  if (PassedSizeIt == SizeArguments.end())
+    return nullptr;
+
+  const ImplicitParamDecl *PassedSizeDecl = PassedSizeIt->second;
+  assert(LocalDeclMap.count(PassedSizeDecl) && "Passed size not loadable");
+  Address AddrOfSize = LocalDeclMap.find(PassedSizeDecl)->second;
+  llvm::Value *SizeInBytes = EmitLoadOfScalar(AddrOfSize, /*Volatile=*/false,
+                                              C.getSizeType(), E->getExprLoc());
+  llvm::Value *SizeOfElement =
+      llvm::ConstantInt::get(SizeInBytes->getType(), EltSize);
+  return Builder.CreateUDiv(SizeInBytes, SizeOfElement);
+}
+
 /// If Base is known to point to the start of an array, return the length of
 /// that array. Return 0 if the length cannot be determined.
 static llvm::Value *getArrayIndexingBound(
@@ -831,10 +873,17 @@
       if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
         return CGF.Builder.getInt(CAT->getSize());
       else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
-        return CGF.getVLASize(VAT).first;
+        return CGF.getVLASize(VAT).NumElts;
+      // Ignore pass_object_size here. It's not applicable on decayed pointers.
     }
   }
 
+  QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0};
+  if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) {
+    IndexedType = Base->getType();
+    return POS;
+  }
+
   return nullptr;
 }
 
@@ -985,8 +1034,12 @@
     // Derived-to-base conversions.
     case CK_UncheckedDerivedToBase:
     case CK_DerivedToBase: {
-      Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo,
-                                              TBAAInfo);
+      // TODO: Support accesses to members of base classes in TBAA. For now, we
+      // conservatively pretend that the complete object is of the base class
+      // type.
+      if (TBAAInfo)
+        *TBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+      Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo);
       auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
       return GetAddressOfBaseClass(Addr, Derived,
                                    CE->path_begin(), CE->path_end(),
@@ -1535,8 +1588,6 @@
     Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
 
-  if (BaseInfo.getMayAlias())
-    TBAAInfo = CGM.getTBAAMayAliasAccessInfo();
   CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
 
   if (EmitScalarRangeCheck(Load, Ty, Loc)) {
@@ -1619,8 +1670,6 @@
     Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
 
-  if (BaseInfo.getMayAlias())
-    TBAAInfo = CGM.getTBAAMayAliasAccessInfo();
   CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
 }
 
@@ -2161,22 +2210,27 @@
   return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
 }
 
-Address CodeGenFunction::EmitLoadOfReference(Address Addr,
-                                             const ReferenceType *RefTy,
-                                             LValueBaseInfo *BaseInfo,
-                                             TBAAAccessInfo *TBAAInfo) {
-  llvm::Value *Ptr = Builder.CreateLoad(Addr);
-  return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(),
-                                              BaseInfo, TBAAInfo,
-                                              /* forPointeeType= */ true));
+Address
+CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
+                                     LValueBaseInfo *PointeeBaseInfo,
+                                     TBAAAccessInfo *PointeeTBAAInfo) {
+  llvm::LoadInst *Load = Builder.CreateLoad(RefLVal.getAddress(),
+                                            RefLVal.isVolatile());
+  CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo());
+
+  CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(),
+                                            PointeeBaseInfo, PointeeTBAAInfo,
+                                            /* forPointeeType= */ true);
+  return Address(Load, Align);
 }
 
-LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
-                                                  const ReferenceType *RefTy) {
-  LValueBaseInfo BaseInfo;
-  TBAAAccessInfo TBAAInfo;
-  Address Addr = EmitLoadOfReference(RefAddr, RefTy, &BaseInfo, &TBAAInfo);
-  return MakeAddrLValue(Addr, RefTy->getPointeeType(), BaseInfo, TBAAInfo);
+LValue CodeGenFunction::EmitLoadOfReferenceLValue(LValue RefLVal) {
+  LValueBaseInfo PointeeBaseInfo;
+  TBAAAccessInfo PointeeTBAAInfo;
+  Address PointeeAddr = EmitLoadOfReference(RefLVal, &PointeeBaseInfo,
+                                            &PointeeTBAAInfo);
+  return MakeAddrLValue(PointeeAddr, RefLVal.getType()->getPointeeType(),
+                        PointeeBaseInfo, PointeeTBAAInfo);
 }
 
 Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
@@ -2211,17 +2265,17 @@
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
   Address Addr(V, Alignment);
-  LValue LV;
   // Emit reference to the private copy of the variable if it is an OpenMP
   // threadprivate variable.
-  if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
+  if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
+      VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
     return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
                                           E->getExprLoc());
-  if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
-    LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy);
-  } else {
-    LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
   }
+  LValue LV = VD->getType()->isReferenceType() ?
+      CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
+                                    AlignmentSource::Decl) :
+      CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
   setObjCGCLValueClass(CGF.getContext(), E, LV);
   return LV;
 }
@@ -2339,18 +2393,18 @@
       else if (CapturedStmtInfo) {
         auto I = LocalDeclMap.find(VD);
         if (I != LocalDeclMap.end()) {
-          if (auto RefTy = VD->getType()->getAs<ReferenceType>())
-            return EmitLoadOfReferenceLValue(I->second, RefTy);
+          if (VD->getType()->isReferenceType())
+            return EmitLoadOfReferenceLValue(I->second, VD->getType(),
+                                             AlignmentSource::Decl);
           return MakeAddrLValue(I->second, T);
         }
         LValue CapLVal =
             EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
                                     CapturedStmtInfo->getContextValue());
-        bool MayAlias = CapLVal.getBaseInfo().getMayAlias();
         return MakeAddrLValue(
             Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)),
-            CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl, MayAlias),
-            CGM.getTBAAAccessInfo(CapLVal.getType()));
+            CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl),
+            CapLVal.getTBAAInfo());
       }
 
       assert(isa<BlockDecl>(CurCodeDecl));
@@ -2398,7 +2452,8 @@
 
 
     // Check for OpenMP threadprivate variables.
-    if (getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+    if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+        VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
       return EmitThreadPrivateVarDeclLValue(
           *this, VD, T, addr, getTypes().ConvertTypeForMem(VD->getType()),
           E->getExprLoc());
@@ -2411,12 +2466,9 @@
     }
 
     // Drill into reference types.
-    LValue LV;
-    if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
-      LV = EmitLoadOfReferenceLValue(addr, RefTy);
-    } else {
-      LV = MakeAddrLValue(addr, T, AlignmentSource::Decl);
-    }
+    LValue LV = VD->getType()->isReferenceType() ?
+        EmitLoadOfReferenceLValue(addr, VD->getType(), AlignmentSource::Decl) :
+        MakeAddrLValue(addr, T, AlignmentSource::Decl);
 
     bool isLocalStorage = VD->hasLocalStorage();
 
@@ -2497,7 +2549,7 @@
          ? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
          : emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
     LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo(),
-                                   CGM.getTBAAAccessInfo(T));
+                                   CGM.getTBAAInfoForSubobject(LV, T));
     ElemLV.getQuals().addQualifiers(LV.getQuals());
     return ElemLV;
   }
@@ -2780,7 +2832,7 @@
   assert(IsSanitizerScope);
   assert(Checked.size() > 0);
   assert(CheckHandler >= 0 &&
-         CheckHandler < sizeof(SanitizerHandlers) / sizeof(*SanitizerHandlers));
+         size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers));
   const StringRef CheckName = SanitizerHandlers[CheckHandler].Name;
 
   llvm::Value *FatalCond = nullptr;
@@ -3031,6 +3083,17 @@
   CGM.addUsedGlobal(F);
 }
 
+void CodeGenFunction::EmitUnreachable(SourceLocation Loc) {
+  if (SanOpts.has(SanitizerKind::Unreachable)) {
+    SanitizerScope SanScope(this);
+    EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
+                             SanitizerKind::Unreachable),
+              SanitizerHandler::BuiltinUnreachable,
+              EmitCheckSourceLocation(Loc), None);
+  }
+  Builder.CreateUnreachable();
+}
+
 void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
@@ -3072,8 +3135,6 @@
   // Expressions of array type can't be bitfields or vector elements.
   LValue LV = EmitLValue(E);
   Address Addr = LV.getAddress();
-  if (BaseInfo) *BaseInfo = LV.getBaseInfo();
-  if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo();
 
   // If the array type was an incomplete type, we need to make sure
   // the decay ends up being the right type.
@@ -3088,7 +3149,15 @@
     Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay");
   }
 
+  // The result of this decay conversion points to an array element within the
+  // base lvalue. However, since TBAA currently does not support representing
+  // accesses to elements of member arrays, we conservatively represent accesses
+  // to the pointee object as if it had no any base lvalue specified.
+  // TODO: Support TBAA for member arrays.
   QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType();
+  if (BaseInfo) *BaseInfo = LV.getBaseInfo();
+  if (TBAAInfo) *TBAAInfo = CGM.getTBAAAccessInfo(EltType);
+
   return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType));
 }
 
@@ -3229,22 +3298,22 @@
     Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true,
                                  SignedIndices, E->getExprLoc());
     return MakeAddrLValue(Addr, EltType, LV.getBaseInfo(),
-                          CGM.getTBAAAccessInfo(EltType));
+                          CGM.getTBAAInfoForSubobject(LV, EltType));
   }
 
-  LValueBaseInfo BaseInfo;
-  TBAAAccessInfo TBAAInfo;
+  LValueBaseInfo EltBaseInfo;
+  TBAAAccessInfo EltTBAAInfo;
   Address Addr = Address::invalid();
   if (const VariableArrayType *vla =
            getContext().getAsVariableArrayType(E->getType())) {
     // The base must be a pointer, which is not an aggregate.  Emit
     // it.  It needs to be emitted first in case it's what captures
     // the VLA bounds.
-    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo);
+    Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
     // The element count here is the total number of non-VLA elements.
-    llvm::Value *numElements = getVLASize(vla).first;
+    llvm::Value *numElements = getVLASize(vla).NumElts;
 
     // Effectively, the multiply by the VLA size is part of the GEP.
     // GEP indexes are signed, and scaling an index isn't permitted to
@@ -3264,7 +3333,7 @@
     // Indexing over an interface, as in "NSString *P; P[4];"
 
     // Emit the base pointer.
-    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo);
+    Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
     CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
@@ -3311,18 +3380,18 @@
         *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
         E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
         E->getExprLoc());
-    BaseInfo = ArrayLV.getBaseInfo();
-    TBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+    EltBaseInfo = ArrayLV.getBaseInfo();
+    EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
   } else {
     // The base must be a pointer; emit it with an estimate of its alignment.
-    Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo);
+    Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
     Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(),
                                  !getLangOpts().isSignedOverflowDefined(),
                                  SignedIndices, E->getExprLoc());
   }
 
-  LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo, TBAAInfo);
+  LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
 
   if (getLangOpts().ObjC1 &&
       getLangOpts().getGC() != LangOptions::NonGC) {
@@ -3361,9 +3430,12 @@
       return CGF.Builder.CreateElementBitCast(Addr,
                                               CGF.ConvertTypeForMem(ElTy));
     }
-    LValueBaseInfo TypeInfo;
-    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeInfo);
-    BaseInfo.mergeForCast(TypeInfo);
+    LValueBaseInfo TypeBaseInfo;
+    TBAAAccessInfo TypeTBAAInfo;
+    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeBaseInfo,
+                                                  &TypeTBAAInfo);
+    BaseInfo.mergeForCast(TypeBaseInfo);
+    TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo);
     return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align);
   }
   return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
@@ -3475,7 +3547,7 @@
         emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo,
                                 BaseTy, VLA->getElementType(), IsLowerBound);
     // The element count here is the total number of non-VLA elements.
-    llvm::Value *NumElements = getVLASize(VLA).first;
+    llvm::Value *NumElements = getVLASize(VLA).NumElts;
 
     // Effectively, the multiply by the VLA size is part of the GEP.
     // GEP indexes are signed, and scaling an index isn't permitted to
@@ -3509,7 +3581,7 @@
         ResultExprTy, !getLangOpts().isSignedOverflowDefined(),
         /*SignedIndices=*/false, E->getExprLoc());
     BaseInfo = ArrayLV.getBaseInfo();
-    TBAAInfo = CGM.getTBAAAccessInfo(ResultExprTy);
+    TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy);
   } else {
     Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo,
                                            TBAAInfo, BaseTy, ResultExprTy,
@@ -3699,7 +3771,7 @@
     QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
     // TODO: Support TBAA for bit fields.
-    LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource(), false);
+    LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource());
     return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo,
                                 TBAAAccessInfo());
   }
@@ -3710,16 +3782,14 @@
   QualType FieldType = field->getType();
   const RecordDecl *rec = field->getParent();
   AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource();
-  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource), false);
+  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource));
   TBAAAccessInfo FieldTBAAInfo;
-  if (BaseInfo.getMayAlias() || rec->hasAttr<MayAliasAttr>() ||
-          FieldType->isVectorType()) {
-    FieldBaseInfo.setMayAlias(true);
-    FieldTBAAInfo = CGM.getTBAAMayAliasAccessInfo();
+  if (base.getTBAAInfo().isMayAlias() ||
+          rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) {
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
   } else if (rec->isUnion()) {
     // TODO: Support TBAA for unions.
-    FieldBaseInfo.setMayAlias(true);
-    FieldTBAAInfo = CGM.getTBAAMayAliasAccessInfo();
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
   } else {
     // If no base type been assigned for the base access, then try to generate
     // one for this base lvalue.
@@ -3738,12 +3808,14 @@
       FieldTBAAInfo.Offset +=
           Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
 
-    // Update the final access type.
+    // Update the final access type and size.
     FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
+    FieldTBAAInfo.Size =
+        getContext().getTypeSizeInChars(FieldType).getQuantity();
   }
 
   Address addr = base.getAddress();
-  unsigned cvr = base.getVRQualifiers();
+  unsigned RecordCVR = base.getVRQualifiers();
   if (rec->isUnion()) {
     // For unions, there is no pointer adjustment.
     assert(!FieldType->isReferenceType() && "union has reference member");
@@ -3758,22 +3830,16 @@
     addr = emitAddrOfFieldStorage(*this, addr, field);
 
     // If this is a reference field, load the reference right now.
-    if (const ReferenceType *refType = FieldType->getAs<ReferenceType>()) {
-      llvm::LoadInst *load = Builder.CreateLoad(addr, "ref");
-      if (cvr & Qualifiers::Volatile) load->setVolatile(true);
+    if (FieldType->isReferenceType()) {
+      LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo,
+                                      FieldTBAAInfo);
+      if (RecordCVR & Qualifiers::Volatile)
+        RefLVal.getQuals().setVolatile(true);
+      addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
 
-      CGM.DecorateInstructionWithTBAA(load, FieldTBAAInfo);
-
-      FieldType = refType->getPointeeType();
-      CharUnits Align = getNaturalTypeAlignment(FieldType, &FieldBaseInfo,
-                                                &FieldTBAAInfo,
-                                                /* forPointeeType= */ true);
-      addr = Address(load, Align);
-
-      // Qualifiers on the struct don't apply to the referencee, and
-      // we'll pick up CVR from the actual type later, so reset these
-      // additional qualifiers now.
-      cvr = 0;
+      // Qualifiers on the struct don't apply to the referencee.
+      RecordCVR = 0;
+      FieldType = FieldType->getPointeeType();
     }
   }
 
@@ -3788,7 +3854,7 @@
     addr = EmitFieldAnnotations(field, addr);
 
   LValue LV = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo);
-  LV.getQuals().addCVRQualifiers(cvr);
+  LV.getQuals().addCVRQualifiers(RecordCVR);
 
   // __weak attribute on a field is ignored.
   if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak)
@@ -3811,13 +3877,14 @@
   llvm::Type *llvmType = ConvertTypeForMem(FieldType);
   V = Builder.CreateElementBitCast(V, llvmType, Field->getName());
 
-  // TODO: access-path TBAA?
+  // TODO: Generate TBAA information that describes this access as a structure
+  // member access and not just an access to an object of the field's type. This
+  // should be similar to what we do in EmitLValueForField().
   LValueBaseInfo BaseInfo = Base.getBaseInfo();
-  LValueBaseInfo FieldBaseInfo(
-      getFieldAlignmentSource(BaseInfo.getAlignmentSource()),
-      BaseInfo.getMayAlias());
+  AlignmentSource FieldAlignSource = BaseInfo.getAlignmentSource();
+  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(FieldAlignSource));
   return MakeAddrLValue(V, FieldType, FieldBaseInfo,
-                        CGM.getTBAAAccessInfo(FieldType));
+                        CGM.getTBAAInfoForSubobject(Base, FieldType));
 }
 
 LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
@@ -3930,11 +3997,10 @@
     AlignmentSource alignSource =
       std::max(lhs->getBaseInfo().getAlignmentSource(),
                rhs->getBaseInfo().getAlignmentSource());
-    bool MayAlias = lhs->getBaseInfo().getMayAlias() ||
-                    rhs->getBaseInfo().getMayAlias();
-    return MakeAddrLValue(result, expr->getType(),
-                          LValueBaseInfo(alignSource, MayAlias),
-                          CGM.getTBAAAccessInfo(expr->getType()));
+    TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForConditionalOperator(
+        lhs->getTBAAInfo(), rhs->getTBAAInfo());
+    return MakeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource),
+                          TBAAInfo);
   } else {
     assert((lhs || rhs) &&
            "both operands of glvalue conditional are throw-expressions?");
@@ -4032,8 +4098,11 @@
         This, DerivedClassDecl, E->path_begin(), E->path_end(),
         /*NullCheckValue=*/false, E->getExprLoc());
 
+    // TODO: Support accesses to members of base classes in TBAA. For now, we
+    // conservatively pretend that the complete object is of the base class
+    // type.
     return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo(),
-                          CGM.getTBAAAccessInfo(E->getType()));
+                          CGM.getTBAAInfoForSubobject(LV, E->getType()));
   }
   case CK_ToUnion:
     return EmitAggExprToLValue(E);
@@ -4061,7 +4130,7 @@
                                 CFITCK_DerivedCast, E->getLocStart());
 
     return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(),
-                          CGM.getTBAAAccessInfo(E->getType()));
+                          CGM.getTBAAInfoForSubobject(LV, E->getType()));
   }
   case CK_LValueBitCast: {
     // This must be a reinterpret_cast (or c-style equivalent).
@@ -4078,14 +4147,14 @@
                                 CFITCK_UnrelatedCast, E->getLocStart());
 
     return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
-                          CGM.getTBAAAccessInfo(E->getType()));
+                          CGM.getTBAAInfoForSubobject(LV, E->getType()));
   }
   case CK_ObjCObjectLValueCast: {
     LValue LV = EmitLValue(E->getSubExpr());
     Address V = Builder.CreateElementBitCast(LV.getAddress(),
                                              ConvertType(E->getType()));
     return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
-                          CGM.getTBAAAccessInfo(E->getType()));
+                          CGM.getTBAAInfoForSubobject(LV, E->getType()));
   }
   case CK_ZeroToOCLQueue:
     llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
@@ -4414,8 +4483,7 @@
 
   CalleeType = getContext().getCanonicalType(CalleeType);
 
-  const auto *FnType =
-      cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
+  auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType();
 
   CGCallee Callee = OrigCallee;
 
@@ -4424,8 +4492,12 @@
     if (llvm::Constant *PrefixSig =
             CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
       SanitizerScope SanScope(this);
+      // Remove any (C++17) exception specifications, to allow calling e.g. a
+      // noexcept function through a non-noexcept pointer.
+      auto ProtoTy =
+        getContext().getFunctionTypeWithExceptionSpec(PointeeType, EST_None);
       llvm::Constant *FTRTTIConst =
-          CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true);
+          CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
       llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty};
       llvm::StructType *PrefixStructTy = llvm::StructType::get(
           CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true);
@@ -4465,6 +4537,8 @@
     }
   }
 
+  const auto *FnType = cast<FunctionType>(PointeeType);
+
   // If we are checking indirect calls and this call is indirect, check that the
   // function pointer is a member of the bit set for the function type.
   if (SanOpts.has(SanitizerKind::CFIICall) &&
@@ -4472,7 +4546,12 @@
     SanitizerScope SanScope(this);
     EmitSanitizerStatReport(llvm::SanStat_CFI_ICall);
 
-    llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0));
+    llvm::Metadata *MD;
+    if (CGM.getCodeGenOpts().SanitizeCfiICallGeneralizePointers)
+      MD = CGM.CreateMetadataIdentifierGeneralized(QualType(FnType, 0));
+    else
+      MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0));
+
     llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
     llvm::Value *CalleePtr = Callee.getFunctionPointer();
@@ -4562,7 +4641,7 @@
     Callee.setFunctionPointer(CalleePtr);
   }
 
-  return EmitCall(FnInfo, Callee, ReturnValue, Args);
+  return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr, E->getExprLoc());
 }
 
 LValue CodeGenFunction::
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 1ab8433..394336e 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenFunction.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenModule.h"
+#include "ConstantEmitter.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
@@ -76,8 +77,15 @@
   /// then loads the result into DestPtr.
   void EmitAggLoadOfLValue(const Expr *E);
 
+  enum ExprValueKind {
+    EVK_RValue,
+    EVK_NonRValue
+  };
+
   /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
-  void EmitFinalDestCopy(QualType type, const LValue &src);
+  /// SrcIsRValue is true if source comes from an RValue.
+  void EmitFinalDestCopy(QualType type, const LValue &src,
+                         ExprValueKind SrcValueKind = EVK_NonRValue);
   void EmitFinalDestCopy(QualType type, RValue src);
   void EmitCopy(QualType type, const AggValueSlot &dest,
                 const AggValueSlot &src);
@@ -85,7 +93,7 @@
   void EmitMoveFromReturnSlot(const Expr *E, RValue Src);
 
   void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
-                     QualType elementType, InitListExpr *E);
+                     QualType ArrayQTy, InitListExpr *E);
 
   AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
     if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
@@ -245,6 +253,13 @@
 /// directly into the return value slot.  Otherwise, a final move
 /// will be performed.
 void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) {
+  // Push destructor if the result is ignored and the type is a C struct that
+  // is non-trivial to destroy.
+  QualType Ty = E->getType();
+  if (Dest.isIgnored() &&
+      Ty.isDestructedType() == QualType::DK_nontrivial_c_struct)
+    CGF.pushDestroy(Ty.isDestructedType(), src.getAggregateAddress(), Ty);
+
   if (shouldUseDestForReturnSlot()) {
     // Logically, Dest.getAddr() should equal Src.getAggregateAddr().
     // The possibility of undef rvalues complicates that a lot,
@@ -261,11 +276,12 @@
 void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) {
   assert(src.isAggregate() && "value must be aggregate value!");
   LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type);
-  EmitFinalDestCopy(type, srcLV);
+  EmitFinalDestCopy(type, srcLV, EVK_RValue);
 }
 
 /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
-void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) {
+void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src,
+                                       ExprValueKind SrcValueKind) {
   // If Dest is ignored, then we're evaluating an aggregate expression
   // in a context that doesn't care about the result.  Note that loads
   // from volatile l-values force the existence of a non-ignored
@@ -273,6 +289,28 @@
   if (Dest.isIgnored())
     return;
 
+  // Copy non-trivial C structs here.
+  LValue DstLV = CGF.MakeAddrLValue(
+      Dest.getAddress(), Dest.isVolatile() ? type.withVolatile() : type);
+
+  if (SrcValueKind == EVK_RValue) {
+    if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) {
+      if (Dest.isPotentiallyAliased())
+        CGF.callCStructMoveAssignmentOperator(DstLV, src);
+      else
+        CGF.callCStructMoveConstructor(DstLV, src);
+      return;
+    }
+  } else {
+    if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
+      if (Dest.isPotentiallyAliased())
+        CGF.callCStructCopyAssignmentOperator(DstLV, src);
+      else
+        CGF.callCStructCopyConstructor(DstLV, src);
+      return;
+    }
+  }
+
   AggValueSlot srcAgg =
     AggValueSlot::forLValue(src, AggValueSlot::IsDestructed,
                             needsGC(type), AggValueSlot::IsAliased);
@@ -298,7 +336,9 @@
   // If the result of the assignment is used, copy the LHS there also.
   // It's volatile if either side is.  Use the minimum alignment of
   // the two sides.
-  CGF.EmitAggregateCopy(dest.getAddress(), src.getAddress(), type,
+  LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type);
+  LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type);
+  CGF.EmitAggregateCopy(DestLV, SrcLV, type,
                         dest.isVolatile() || src.isVolatile());
 }
 
@@ -392,12 +432,15 @@
 
 /// \brief Emit initialization of an array from an initializer list.
 void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
-                                   QualType elementType, InitListExpr *E) {
+                                   QualType ArrayQTy, InitListExpr *E) {
   uint64_t NumInitElements = E->getNumInits();
 
   uint64_t NumArrayElements = AType->getNumElements();
   assert(NumInitElements <= NumArrayElements);
 
+  QualType elementType =
+      CGF.getContext().getAsArrayType(ArrayQTy)->getElementType();
+
   // DestPtr is an array*.  Construct an elementType* by drilling
   // down a level.
   llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
@@ -409,6 +452,29 @@
   CharUnits elementAlign =
     DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
 
+  // Consider initializing the array by copying from a global. For this to be
+  // more efficient than per-element initialization, the size of the elements
+  // with explicit initializers should be large enough.
+  if (NumInitElements * elementSize.getQuantity() > 16 &&
+      elementType.isTriviallyCopyableType(CGF.getContext())) {
+    CodeGen::CodeGenModule &CGM = CGF.CGM;
+    ConstantEmitter Emitter(CGM);
+    LangAS AS = ArrayQTy.getAddressSpace();
+    if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) {
+      auto GV = new llvm::GlobalVariable(
+          CGM.getModule(), C->getType(),
+          CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true),
+          llvm::GlobalValue::PrivateLinkage, C, "constinit",
+          /* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal,
+          CGM.getContext().getTargetAddressSpace(AS));
+      Emitter.finalize(GV);
+      CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy);
+      GV->setAlignment(Align.getQuantity());
+      EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align));
+      return;
+    }
+  }
+
   // Exception safety requires us to destroy all the
   // already-constructed members if an initializer throws.
   // For that, we'll need an EH cleanup.
@@ -692,7 +758,7 @@
       return Visit(E->getSubExpr());
     }
 
-    // fallthrough
+    LLVM_FALLTHROUGH;
 
   case CK_NoOp:
   case CK_UserDefinedConversion:
@@ -1156,11 +1222,8 @@
 
   // Handle initialization of an array.
   if (E->getType()->isArrayType()) {
-    QualType elementType =
-        CGF.getContext().getAsArrayType(E->getType())->getElementType();
-
     auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
-    EmitArrayInit(Dest.getAddress(), AType, elementType, E);
+    EmitArrayInit(Dest.getAddress(), AType, E->getType(), E);
     return;
   }
 
@@ -1541,12 +1604,14 @@
   return LV;
 }
 
-void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
-                                        Address SrcPtr, QualType Ty,
-                                        bool isVolatile,
+void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src,
+                                        QualType Ty, bool isVolatile,
                                         bool isAssignment) {
   assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
 
+  Address DestPtr = Dest.getAddress();
+  Address SrcPtr = Src.getAddress();
+
   if (getLangOpts().CPlusPlus) {
     if (const RecordType *RT = Ty->getAs<RecordType>()) {
       CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl());
@@ -1562,7 +1627,7 @@
         return;
     }
   }
-  
+
   // Aggregate assignment turns into llvm.memcpy.  This is almost valid per
   // C99 6.5.16.1p3, which states "If the value being stored in an object is
   // read from another object that overlaps in anyway the storage of the first
@@ -1657,4 +1722,10 @@
   // the optimizer wishes to expand it in to scalar memory operations.
   if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty))
     Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag);
+
+  if (CGM.getCodeGenOpts().NewStructPathTBAA) {
+    TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer(
+        Dest.getTBAAInfo(), Src.getTBAAInfo());
+    CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
+  }
 }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 3eb4bd6..9f70ef3 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -89,7 +89,8 @@
       *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs);
   auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(
       Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize);
-  return EmitCall(FnInfo, Callee, ReturnValue, Args);
+  return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr,
+                  CE ? CE->getExprLoc() : SourceLocation());
 }
 
 RValue CodeGenFunction::EmitCXXDestructorCall(
@@ -241,11 +242,15 @@
     }
   }
 
-  Address This = Address::invalid();
-  if (IsArrow)
-    This = EmitPointerWithAlignment(Base);
-  else
-    This = EmitLValue(Base).getAddress();
+  LValue This;
+  if (IsArrow) {
+    LValueBaseInfo BaseInfo;
+    TBAAAccessInfo TBAAInfo;
+    Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
+    This = MakeAddrLValue(ThisValue, Base->getType(), BaseInfo, TBAAInfo);
+  } else {
+    This = EmitLValue(Base);
+  }
 
 
   if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) {
@@ -263,7 +268,7 @@
                                (*RtlArgs)[0].RV.getScalarVal(),
                                (*(CE->arg_begin() + 1))->getType())
                          : EmitLValue(*CE->arg_begin());
-        EmitAggregateAssign(This, RHS.getAddress(), CE->getType());
+        EmitAggregateAssign(This, RHS, CE->getType());
         return RValue::get(This.getPointer());
       }
 
@@ -271,8 +276,10 @@
           cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
         // Trivial move and copy ctor are the same.
         assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-        Address RHS = EmitLValue(*CE->arg_begin()).getAddress();
-        EmitAggregateCopy(This, RHS, (*CE->arg_begin())->getType());
+        const Expr *Arg = *CE->arg_begin();
+        LValue RHS = EmitLValue(Arg);
+        LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType());
+        EmitAggregateCopy(Dest, RHS, Arg->getType());
         return RValue::get(This.getPointer());
       }
       llvm_unreachable("unknown trivial member function");
@@ -334,7 +341,8 @@
     assert(ReturnValue.isNull() && "Destructor shouldn't have return value");
     if (UseVirtualCall) {
       CGM.getCXXABI().EmitVirtualDestructorCall(
-          *this, Dtor, Dtor_Complete, This, cast<CXXMemberCallExpr>(CE));
+          *this, Dtor, Dtor_Complete, This.getAddress(),
+          cast<CXXMemberCallExpr>(CE));
     } else {
       CGCallee Callee;
       if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -363,14 +371,16 @@
                   CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
                                  Ctor);
   } else if (UseVirtualCall) {
-    Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty,
-                                                       CE->getLocStart());
+    Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
   } else {
     if (SanOpts.has(SanitizerKind::CFINVCall) &&
         MD->getParent()->isDynamicClass()) {
-      llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent());
-      EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall,
-                                CE->getLocStart());
+      llvm::Value *VTable;
+      const CXXRecordDecl *RD;
+      std::tie(VTable, RD) =
+          CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
+                                        MD->getParent());
+      EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart());
     }
 
     if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -385,8 +395,10 @@
   }
 
   if (MD->isVirtual()) {
-    This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
-        *this, CalleeDecl, This, UseVirtualCall);
+    Address NewThisAddr =
+        CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+            *this, CalleeDecl, This.getAddress(), UseVirtualCall);
+    This.setAddress(NewThisAddr);
   }
 
   return EmitCXXMemberOrOperatorCall(
@@ -444,7 +456,7 @@
   EmitCallArgs(Args, FPT, E->arguments());
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required,
                                                       /*PrefixSize=*/0),
-                  Callee, ReturnValue, Args);
+                  Callee, ReturnValue, Args, nullptr, E->getExprLoc());
 }
 
 RValue
@@ -611,7 +623,7 @@
 
      case CXXConstructExpr::CK_VirtualBase:
       ForVirtualBase = true;
-      // fall-through
+      LLVM_FALLTHROUGH;
 
      case CXXConstructExpr::CK_NonVirtualBase:
       Type = Ctor_Base;
@@ -2053,6 +2065,15 @@
   // Get the vtable pointer.
   Address ThisPtr = CGF.EmitLValue(E).getAddress();
 
+  QualType SrcRecordTy = E->getType();
+
+  // C++ [class.cdtor]p4:
+  //   If the operand of typeid refers to the object under construction or
+  //   destruction and the static type of the operand is neither the constructor
+  //   or destructor’s class nor one of its bases, the behavior is undefined.
+  CGF.EmitTypeCheck(CodeGenFunction::TCK_DynamicOperation, E->getExprLoc(),
+                    ThisPtr.getPointer(), SrcRecordTy);
+
   // C++ [expr.typeid]p2:
   //   If the glvalue expression is obtained by applying the unary * operator to
   //   a pointer and the pointer is a null pointer value, the typeid expression
@@ -2061,7 +2082,6 @@
   // However, this paragraph's intent is not clear.  We choose a very generous
   // interpretation which implores us to consider comma operators, conditional
   // operators, parentheses and other such constructs.
-  QualType SrcRecordTy = E->getType();
   if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked(
           isGLValueFromPointerDeref(E), SrcRecordTy)) {
     llvm::BasicBlock *BadTypeidBlock =
@@ -2124,10 +2144,6 @@
   CGM.EmitExplicitCastExprType(DCE, this);
   QualType DestTy = DCE->getTypeAsWritten();
 
-  if (DCE->isAlwaysNull())
-    if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy))
-      return T;
-
   QualType SrcTy = DCE->getSubExpr()->getType();
 
   // C++ [expr.dynamic.cast]p7:
@@ -2148,6 +2164,18 @@
     DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType();
   }
 
+  // C++ [class.cdtor]p5:
+  //   If the operand of the dynamic_cast refers to the object under
+  //   construction or destruction and the static type of the operand is not a
+  //   pointer to or object of the constructor or destructor’s own class or one
+  //   of its bases, the dynamic_cast results in undefined behavior.
+  EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(),
+                SrcRecordTy);
+
+  if (DCE->isAlwaysNull())
+    if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy))
+      return T;
+
   assert(SrcRecordTy->isRecordType() && "source type must be a record type!");
 
   // C++ [expr.dynamic.cast]p4: 
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index e860b30..9094d3f 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -761,15 +761,16 @@
   llvm::Value *LHSr = Op.LHS.first, *LHSi = Op.LHS.second;
   llvm::Value *RHSr = Op.RHS.first, *RHSi = Op.RHS.second;
 
-
   llvm::Value *DSTr, *DSTi;
   if (LHSr->getType()->isFloatingPointTy()) {
-    // If we have a complex operand on the RHS, we delegate to a libcall to
-    // handle all of the complexities and minimize underflow/overflow cases.
+    // If we have a complex operand on the RHS and FastMath is not allowed, we
+    // delegate to a libcall to handle all of the complexities and minimize
+    // underflow/overflow cases. When FastMath is allowed we construct the
+    // divide inline using the same algorithm as for integer operands.
     //
     // FIXME: We would be able to avoid the libcall in many places if we
     // supported imaginary types in addition to complex types.
-    if (RHSi) {
+    if (RHSi && !CGF.getLangOpts().FastMath) {
       BinOpInfo LibCallOp = Op;
       // If LHS was a real, supply a null imaginary part.
       if (!LHSi)
@@ -791,11 +792,31 @@
       case llvm::Type::FP128TyID:
         return EmitComplexBinOpLibCall("__divtc3", LibCallOp);
       }
-    }
-    assert(LHSi && "Can have at most one non-complex operand!");
+    } else if (RHSi) {
+      if (!LHSi)
+        LHSi = llvm::Constant::getNullValue(RHSi->getType());
 
-    DSTr = Builder.CreateFDiv(LHSr, RHSr);
-    DSTi = Builder.CreateFDiv(LHSi, RHSr);
+      // (a+ib) / (c+id) = ((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd))
+      llvm::Value *AC = Builder.CreateFMul(LHSr, RHSr); // a*c
+      llvm::Value *BD = Builder.CreateFMul(LHSi, RHSi); // b*d
+      llvm::Value *ACpBD = Builder.CreateFAdd(AC, BD); // ac+bd
+
+      llvm::Value *CC = Builder.CreateFMul(RHSr, RHSr); // c*c
+      llvm::Value *DD = Builder.CreateFMul(RHSi, RHSi); // d*d
+      llvm::Value *CCpDD = Builder.CreateFAdd(CC, DD); // cc+dd
+
+      llvm::Value *BC = Builder.CreateFMul(LHSi, RHSr); // b*c
+      llvm::Value *AD = Builder.CreateFMul(LHSr, RHSi); // a*d
+      llvm::Value *BCmAD = Builder.CreateFSub(BC, AD); // bc-ad
+
+      DSTr = Builder.CreateFDiv(ACpBD, CCpDD);
+      DSTi = Builder.CreateFDiv(BCmAD, CCpDD);
+    } else {
+      assert(LHSi && "Can have at most one non-complex operand!");
+
+      DSTr = Builder.CreateFDiv(LHSr, RHSr);
+      DSTi = Builder.CreateFDiv(LHSi, RHSr);
+    }
   } else {
     assert(Op.LHS.second && Op.RHS.second &&
            "Both operands of integer complex operators must be complex!");
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 3e35d4c..7b076ea 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -859,9 +859,10 @@
 
     // Copy initializer elements.
     SmallVector<llvm::Constant*, 16> Elts;
-    Elts.reserve(NumInitableElts + NumElements);
+    Elts.reserve(std::max(NumInitableElts, NumElements));
 
     bool RewriteType = false;
+    bool AllNullValues = true;
     for (unsigned i = 0; i < NumInitableElts; ++i) {
       Expr *Init = ILE->getInit(i);
       llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
@@ -869,15 +870,22 @@
         return nullptr;
       RewriteType |= (C->getType() != ElemTy);
       Elts.push_back(C);
+      if (AllNullValues && !C->isNullValue())
+        AllNullValues = false;
     }
 
+    // If all initializer elements are "zero," then avoid storing NumElements
+    // instances of the zero representation.
+    if (AllNullValues)
+      return llvm::ConstantAggregateZero::get(AType);
+
     RewriteType |= (fillC->getType() != ElemTy);
     Elts.resize(NumElements, fillC);
 
     if (RewriteType) {
       // FIXME: Try to avoid packing the array
       std::vector<llvm::Type*> Types;
-      Types.reserve(NumInitableElts + NumElements);
+      Types.reserve(Elts.size());
       for (unsigned i = 0, e = Elts.size(); i < e; ++i)
         Types.push_back(Elts[i]->getType());
       llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
@@ -1825,7 +1833,7 @@
     const llvm::APFloat &Init = Value.getFloat();
     if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() &&
         !CGM.getContext().getLangOpts().NativeHalfType &&
-        !CGM.getContext().getLangOpts().HalfArgsAndReturns)
+        CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics())
       return llvm::ConstantInt::get(CGM.getLLVMContext(),
                                     Init.bitcastToAPInt());
     else
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 7c11103..2a0dd4a 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -162,13 +162,13 @@
   // we can elide the overflow check.
   if (!Op.mayHaveIntegerOverflow())
     return true;
-
-  // If a unary op has a widened operand, the op cannot overflow.
-  if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
-    return IsWidenedIntegerOp(Ctx, UO->getSubExpr());
-
-  // We usually don't need overflow checks for binops with widened operands.
-  // Multiplication with promoted unsigned operands is a special case.
+
+  // If a unary op has a widened operand, the op cannot overflow.
+  if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+    return !UO->canOverflow();
+
+  // We usually don't need overflow checks for binops with widened operands.
+  // Multiplication with promoted unsigned operands is a special case.
   const auto *BO = cast<BinaryOperator>(Op.E);
   auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
   if (!OptionalLHSTy)
@@ -951,7 +951,7 @@
   if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
     // Cast to FP using the intrinsic if the half type itself isn't supported.
     if (DstTy->isFloatingPointTy()) {
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
         return Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy),
             Src);
@@ -959,7 +959,7 @@
       // Cast to other types through float, using either the intrinsic or FPExt,
       // depending on whether the half type itself is supported
       // (as opposed to operations on half, available with NativeHalfType).
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         Src = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                                  CGF.CGM.FloatTy),
@@ -1068,7 +1068,7 @@
     if (SrcTy->isFloatingPointTy()) {
       // Use the intrinsic if the half type itself isn't supported
       // (as opposed to operations on half, available with NativeHalfType).
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
         return Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
       // If the half type is supported, just use an fptrunc.
@@ -1104,7 +1104,7 @@
   }
 
   if (DstTy != ResTy) {
-    if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+    if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
       assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
       Res = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
@@ -1870,13 +1870,13 @@
     return Builder.CreateAdd(InVal, Amount, Name);
   case LangOptions::SOB_Undefined:
     if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
-      return Builder.CreateNSWAdd(InVal, Amount, Name);
-    // Fall through.
-  case LangOptions::SOB_Trapping:
-    if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr()))
-      return Builder.CreateNSWAdd(InVal, Amount, Name);
-    return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
-  }
+      return Builder.CreateNSWAdd(InVal, Amount, Name);
+    // Fall through.
+  case LangOptions::SOB_Trapping:
+    if (!E->canOverflow())
+      return Builder.CreateNSWAdd(InVal, Amount, Name);
+    return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
+  }
   llvm_unreachable("Unknown SignedOverflowBehaviorTy");
 }
 
@@ -1952,17 +1952,15 @@
     value = Builder.getTrue();
 
   // Most common case by far: integer increment.
-  } else if (type->isIntegerType()) {
-    // Note that signed integer inc/dec with width less than int can't
-    // overflow because of promotion rules; we're just eliding a few steps here.
-    bool CanOverflow = value->getType()->getIntegerBitWidth() >=
-                       CGF.IntTy->getIntegerBitWidth();
-    if (CanOverflow && type->isSignedIntegerOrEnumerationType()) {
-      value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
-    } else if (CanOverflow && type->isUnsignedIntegerType() &&
-               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
-      value =
-          EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
+  } else if (type->isIntegerType()) {
+    // Note that signed integer inc/dec with width less than int can't
+    // overflow because of promotion rules; we're just eliding a few steps here.
+    if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
+      value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
+    } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
+               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
+      value =
+          EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
     } else {
       llvm::Value *amt = llvm::ConstantInt::get(value->getType(), amount, true);
       value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec");
@@ -1975,7 +1973,7 @@
     // VLA types don't have constant size.
     if (const VariableArrayType *vla
           = CGF.getContext().getAsVariableArrayType(type)) {
-      llvm::Value *numElts = CGF.getVLASize(vla).first;
+      llvm::Value *numElts = CGF.getVLASize(vla).NumElts;
       if (!isInc) numElts = Builder.CreateNSWNeg(numElts, "vla.negsize");
       if (CGF.getLangOpts().isSignedOverflowDefined())
         value = Builder.CreateGEP(value, numElts, "vla.inc");
@@ -2028,7 +2026,7 @@
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
       // Another special case: half FP increment should be done via float
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         value = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                                  CGF.CGM.FloatTy),
@@ -2063,7 +2061,7 @@
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         value = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
                                  CGF.CGM.FloatTy),
@@ -2273,16 +2271,13 @@
         CGF.EmitIgnoredExpr(E->getArgumentExpr());
       }
 
-      QualType eltType;
-      llvm::Value *numElts;
-      std::tie(numElts, eltType) = CGF.getVLASize(VAT);
-
-      llvm::Value *size = numElts;
+      auto VlaSize = CGF.getVLASize(VAT);
+      llvm::Value *size = VlaSize.NumElts;
 
       // Scale the number of non-VLA elements by the non-VLA element size.
-      CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType);
+      CharUnits eltSize = CGF.getContext().getTypeSizeInChars(VlaSize.Type);
       if (!eltSize.isOne())
-        size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), numElts);
+        size = CGF.Builder.CreateNUWMul(CGF.CGM.getSize(eltSize), size);
 
       return size;
     }
@@ -2769,7 +2764,7 @@
   if (const VariableArrayType *vla
         = CGF.getContext().getAsVariableArrayType(elementType)) {
     // The element count here is the total number of non-VLA elements.
-    llvm::Value *numElements = CGF.getVLASize(vla).first;
+    llvm::Value *numElements = CGF.getVLASize(vla).NumElts;
 
     // Effectively, the multiply by the VLA size is part of the GEP.
     // GEP indexes are signed, and scaling an index isn't permitted to
@@ -2964,10 +2959,9 @@
   // For a variable-length array, this is going to be non-constant.
   if (const VariableArrayType *vla
         = CGF.getContext().getAsVariableArrayType(elementType)) {
-    llvm::Value *numElements;
-    std::tie(numElements, elementType) = CGF.getVLASize(vla);
-
-    divisor = numElements;
+    auto VlaSize = CGF.getVLASize(vla);
+    elementType = VlaSize.Type;
+    divisor = VlaSize.NumElts;
 
     // Scale the number of non-VLA elements by the non-VLA element size.
     CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType);
@@ -3922,6 +3916,7 @@
   case BO_GE:
   case BO_EQ:
   case BO_NE:
+  case BO_Cmp:
   case BO_And:
   case BO_Xor:
   case BO_Or:
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
new file mode 100644
index 0000000..3b0b173
--- /dev/null
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -0,0 +1,855 @@
+//===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions to generate various special functions for C
+// structs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <array>
+
+using namespace clang;
+using namespace CodeGen;
+
+// Return the size of a field in number of bits.
+static uint64_t getFieldSize(const FieldDecl *FD, ASTContext &Ctx) {
+  if (FD->isBitField())
+    return FD->getBitWidthValue(Ctx);
+  return Ctx.getTypeSize(FD->getType());
+}
+
+namespace {
+enum { DstIdx = 0, SrcIdx = 1 };
+const char *ValNameStr[2] = {"dst", "src"};
+
+template <class Derived, class RetTy = void> struct DestructedTypeVisitor {
+  template <class... Ts> RetTy visit(QualType FT, Ts &&... Args) {
+    return asDerived().visit(FT.isDestructedType(), FT,
+                             std::forward<Ts>(Args)...);
+  }
+
+  template <class... Ts>
+  RetTy visit(QualType::DestructionKind DK, QualType FT, Ts &&... Args) {
+    if (asDerived().getContext().getAsArrayType(FT))
+      return asDerived().visitArray(DK, FT, std::forward<Ts>(Args)...);
+
+    switch (DK) {
+    case QualType::DK_objc_strong_lifetime:
+      return asDerived().visitARCStrong(FT, std::forward<Ts>(Args)...);
+    case QualType::DK_nontrivial_c_struct:
+      return asDerived().visitStruct(FT, std::forward<Ts>(Args)...);
+    case QualType::DK_none:
+      return asDerived().visitTrivial(FT, std::forward<Ts>(Args)...);
+    case QualType::DK_cxx_destructor:
+      return asDerived().visitCXXDestructor(FT, std::forward<Ts>(Args)...);
+    case QualType::DK_objc_weak_lifetime:
+      return asDerived().visitARCWeak(FT, std::forward<Ts>(Args)...);
+    }
+
+    llvm_unreachable("unknown destruction kind");
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+};
+
+template <class Derived, class RetTy = void>
+struct DefaultInitializedTypeVisitor {
+  template <class... Ts> RetTy visit(QualType FT, Ts &&... Args) {
+    return asDerived().visit(FT.isNonTrivialToPrimitiveDefaultInitialize(), FT,
+                             std::forward<Ts>(Args)...);
+  }
+
+  template <class... Ts>
+  RetTy visit(QualType::PrimitiveDefaultInitializeKind PDIK, QualType FT,
+              Ts &&... Args) {
+    if (asDerived().getContext().getAsArrayType(FT))
+      return asDerived().visitArray(PDIK, FT, std::forward<Ts>(Args)...);
+
+    switch (PDIK) {
+    case QualType::PDIK_ARCStrong:
+      return asDerived().visitARCStrong(FT, std::forward<Ts>(Args)...);
+    case QualType::PDIK_Struct:
+      return asDerived().visitStruct(FT, std::forward<Ts>(Args)...);
+    case QualType::PDIK_Trivial:
+      return asDerived().visitTrivial(FT, std::forward<Ts>(Args)...);
+    }
+
+    llvm_unreachable("unknown default-initialize kind");
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+};
+
+template <class Derived, bool IsMove, class RetTy = void>
+struct CopiedTypeVisitor {
+  template <class... Ts> RetTy visit(QualType FT, Ts &&... Args) {
+    QualType::PrimitiveCopyKind PCK =
+        IsMove ? FT.isNonTrivialToPrimitiveDestructiveMove()
+               : FT.isNonTrivialToPrimitiveCopy();
+    return asDerived().visit(PCK, FT, std::forward<Ts>(Args)...);
+  }
+
+  template <class... Ts>
+  RetTy visit(QualType::PrimitiveCopyKind PCK, QualType FT, Ts &&... Args) {
+    asDerived().preVisit(PCK, FT, std::forward<Ts>(Args)...);
+
+    if (asDerived().getContext().getAsArrayType(FT))
+      return asDerived().visitArray(PCK, FT, std::forward<Ts>(Args)...);
+
+    switch (PCK) {
+    case QualType::PCK_ARCStrong:
+      return asDerived().visitARCStrong(FT, std::forward<Ts>(Args)...);
+    case QualType::PCK_Struct:
+      return asDerived().visitStruct(FT, std::forward<Ts>(Args)...);
+    case QualType::PCK_Trivial:
+      return asDerived().visitTrivial(FT, std::forward<Ts>(Args)...);
+    case QualType::PCK_VolatileTrivial:
+      return asDerived().visitVolatileTrivial(FT, std::forward<Ts>(Args)...);
+    }
+
+    llvm_unreachable("unknown primitive copy kind");
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+};
+
+template <class Derived> struct StructVisitor {
+  StructVisitor(ASTContext &Ctx) : Ctx(Ctx) {}
+
+  template <class... Ts>
+  void visitStructFields(QualType QT, CharUnits CurStructOffset, Ts... Args) {
+    const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+
+    // Iterate over the fields of the struct.
+    for (const FieldDecl *FD : RD->fields()) {
+      QualType FT = FD->getType();
+      FT = QT.isVolatileQualified() ? FT.withVolatile() : FT;
+      asDerived().visit(FT, FD, CurStructOffset, Args...);
+    }
+
+    asDerived().flushTrivialFields(Args...);
+  }
+
+  template <class... Ts> void visitTrivial(Ts... Args) {}
+
+  template <class... Ts> void visitARCWeak(Ts... Args) {
+    // FIXME: remove this when visitARCWeak is implemented in the subclasses.
+    llvm_unreachable("weak field is not expected");
+  }
+
+  template <class... Ts> void visitCXXDestructor(Ts... Args) {
+    llvm_unreachable("field of a C++ struct type is not expected");
+  }
+
+  template <class... Ts> void flushTrivialFields(Ts... Args) {}
+
+  uint64_t getFieldOffsetInBits(const FieldDecl *FD) {
+    return FD ? Ctx.getASTRecordLayout(FD->getParent())
+                    .getFieldOffset(FD->getFieldIndex())
+              : 0;
+  }
+
+  CharUnits getFieldOffset(const FieldDecl *FD) {
+    return Ctx.toCharUnitsFromBits(getFieldOffsetInBits(FD));
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+  ASTContext &getContext() { return Ctx; }
+  ASTContext &Ctx;
+};
+
+template <class Derived, bool IsMove>
+struct CopyStructVisitor : StructVisitor<Derived>,
+                           CopiedTypeVisitor<Derived, IsMove> {
+  using StructVisitor<Derived>::asDerived;
+
+  CopyStructVisitor(ASTContext &Ctx) : StructVisitor<Derived>(Ctx) {}
+
+  template <class... Ts>
+  void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT,
+                const FieldDecl *FD, CharUnits CurStructOffsset,
+                Ts &&... Args) {
+    if (PCK)
+      asDerived().flushTrivialFields(std::forward<Ts>(Args)...);
+  }
+
+  template <class... Ts>
+  void visitTrivial(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset,
+                    Ts... Args) {
+    assert(!FT.isVolatileQualified() && "volatile field not expected");
+    ASTContext &Ctx = asDerived().getContext();
+    uint64_t FieldSize = getFieldSize(FD, Ctx);
+
+    // Ignore zero-sized fields.
+    if (FieldSize == 0)
+      return;
+
+    uint64_t FStartInBits = asDerived().getFieldOffsetInBits(FD);
+    uint64_t FEndInBits = FStartInBits + FieldSize;
+    uint64_t RoundedFEnd = llvm::alignTo(FEndInBits, Ctx.getCharWidth());
+
+    // Set Start if this is the first field of a sequence of trivial fields.
+    if (Start == End)
+      Start = CurStructOffset + Ctx.toCharUnitsFromBits(FStartInBits);
+    End = CurStructOffset + Ctx.toCharUnitsFromBits(RoundedFEnd);
+  }
+
+  CharUnits Start = CharUnits::Zero(), End = CharUnits::Zero();
+};
+
+// This function creates the mangled name of a special function of a non-trivial
+// C struct. Since there is no ODR in C, the function is mangled based on the
+// struct contents and not the name. The mangled name has the following
+// structure:
+//
+// <function-name> ::= <prefix> <alignment-info> "_" <struct-field-info>
+// <prefix> ::= "__destructor_" | "__default_constructor_" |
+//              "__copy_constructor_" | "__move_constructor_" |
+//              "__copy_assignment_" | "__move_assignment_"
+// <alignment-info> ::= <dst-alignment> ["_" <src-alignment>]
+// <struct-field-info> ::= <field-info>+
+// <field-info> ::= <struct-or-scalar-field-info> | <array-field-info>
+// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> |
+//                                   <trivial-field-info>
+// <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n"
+//                        <num-elements> <innermost-element-info> "_AE"
+// <innermost-element-info> ::= <struct-or-scalar-field-info>
+// <strong-field-info> ::= "_s" ["b"] ["v"] <field-offset>
+// <trivial-field-info> ::= "_t" ["v"] <field-offset> "_" <field-size>
+
+template <class Derived> struct GenFuncNameBase {
+  std::string getVolatileOffsetStr(bool IsVolatile, CharUnits Offset) {
+    std::string S;
+    if (IsVolatile)
+      S = "v";
+    S += llvm::to_string(Offset.getQuantity());
+    return S;
+  }
+
+  void visitARCStrong(QualType FT, const FieldDecl *FD,
+                      CharUnits CurStructOffset) {
+    appendStr("_s");
+    if (FT->isBlockPointerType())
+      appendStr("b");
+    CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+    appendStr(getVolatileOffsetStr(FT.isVolatileQualified(), FieldOffset));
+  }
+
+  void visitStruct(QualType QT, const FieldDecl *FD,
+                   CharUnits CurStructOffset) {
+    CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+    asDerived().visitStructFields(QT, FieldOffset);
+  }
+
+  template <class FieldKind>
+  void visitArray(FieldKind FK, QualType QT, const FieldDecl *FD,
+                  CharUnits CurStructOffset) {
+    // String for non-volatile trivial fields is emitted when
+    // flushTrivialFields is called.
+    if (!FK)
+      return asDerived().visitTrivial(QT, FD, CurStructOffset);
+
+    CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
+    ASTContext &Ctx = asDerived().getContext();
+    const auto *AT = Ctx.getAsConstantArrayType(QT);
+    unsigned NumElts = Ctx.getConstantArrayElementCount(AT);
+    QualType EltTy = Ctx.getBaseElementType(AT);
+    CharUnits EltSize = Ctx.getTypeSizeInChars(EltTy);
+    appendStr("_AB" + llvm::to_string(FieldOffset.getQuantity()) + "s" +
+              llvm::to_string(EltSize.getQuantity()) + "n" +
+              llvm::to_string(NumElts));
+    EltTy = QT.isVolatileQualified() ? EltTy.withVolatile() : EltTy;
+    asDerived().visit(FK, EltTy, nullptr, FieldOffset);
+    appendStr("_AE");
+  }
+
+  void appendStr(StringRef Str) { Name += Str; }
+
+  std::string getName(QualType QT, bool IsVolatile) {
+    QT = IsVolatile ? QT.withVolatile() : QT;
+    asDerived().visitStructFields(QT, CharUnits::Zero());
+    return Name;
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+  std::string Name;
+};
+
+template <class Derived>
+struct GenUnaryFuncName : StructVisitor<Derived>, GenFuncNameBase<Derived> {
+  GenUnaryFuncName(StringRef Prefix, CharUnits DstAlignment, ASTContext &Ctx)
+      : StructVisitor<Derived>(Ctx) {
+    this->appendStr(Prefix);
+    this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+  }
+};
+
+// Helper function to create a null constant.
+static llvm::Constant *getNullForVariable(Address Addr) {
+  llvm::Type *Ty = Addr.getElementType();
+  return llvm::ConstantPointerNull::get(cast<llvm::PointerType>(Ty));
+}
+
+template <bool IsMove>
+struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>,
+                           GenFuncNameBase<GenBinaryFuncName<IsMove>> {
+
+  GenBinaryFuncName(StringRef Prefix, CharUnits DstAlignment,
+                    CharUnits SrcAlignment, ASTContext &Ctx)
+      : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>(Ctx) {
+    this->appendStr(Prefix);
+    this->appendStr(llvm::to_string(DstAlignment.getQuantity()));
+    this->appendStr("_" + llvm::to_string(SrcAlignment.getQuantity()));
+  }
+
+  void flushTrivialFields() {
+    if (this->Start == this->End)
+      return;
+
+    this->appendStr("_t" + llvm::to_string(this->Start.getQuantity()) + "w" +
+                    llvm::to_string((this->End - this->Start).getQuantity()));
+
+    this->Start = this->End = CharUnits::Zero();
+  }
+
+  void visitVolatileTrivial(QualType FT, const FieldDecl *FD,
+                            CharUnits CurStackOffset) {
+    // Because volatile fields can be bit-fields and are individually copied,
+    // their offset and width are in bits.
+    uint64_t OffsetInBits =
+        this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD);
+    this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" +
+                    llvm::to_string(getFieldSize(FD, this->Ctx)));
+  }
+};
+
+struct GenDefaultInitializeFuncName
+    : GenUnaryFuncName<GenDefaultInitializeFuncName>,
+      DefaultInitializedTypeVisitor<GenDefaultInitializeFuncName> {
+  GenDefaultInitializeFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+      : GenUnaryFuncName<GenDefaultInitializeFuncName>("__default_constructor_",
+                                                       DstAlignment, Ctx) {}
+};
+
+struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>,
+                               DestructedTypeVisitor<GenDestructorFuncName> {
+  GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx)
+      : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment,
+                                                Ctx) {}
+};
+
+// Helper function that creates CGFunctionInfo for an N-ary special function.
+template <size_t N>
+static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM,
+                                             FunctionArgList &Args) {
+  ASTContext &Ctx = CGM.getContext();
+  llvm::SmallVector<ImplicitParamDecl *, N> Params;
+  QualType ParamTy = Ctx.getPointerType(Ctx.VoidPtrTy);
+
+  for (unsigned I = 0; I < N; ++I)
+    Params.push_back(ImplicitParamDecl::Create(
+        Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy,
+        ImplicitParamDecl::Other));
+
+  for (auto &P : Params)
+    Args.push_back(P);
+
+  return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+}
+
+// Template classes that are used as bases for classes that emit special
+// functions.
+template <class Derived> struct GenFuncBase {
+  template <size_t N>
+  void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset,
+                   std::array<Address, N> Addrs) {
+    this->asDerived().callSpecialFunction(
+        FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs);
+  }
+
+  template <class FieldKind, size_t N>
+  void visitArray(FieldKind FK, QualType QT, const FieldDecl *FD,
+                  CharUnits CurStackOffset, std::array<Address, N> Addrs) {
+    // Non-volatile trivial fields are copied when flushTrivialFields is called.
+    if (!FK)
+      return asDerived().visitTrivial(QT, FD, CurStackOffset, Addrs);
+
+    CodeGenFunction &CGF = *this->CGF;
+    ASTContext &Ctx = CGF.getContext();
+
+    // Compute the end address.
+    QualType BaseEltQT;
+    std::array<Address, N> StartAddrs = Addrs;
+    for (unsigned I = 0; I < N; ++I)
+      StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD);
+    Address DstAddr = StartAddrs[DstIdx];
+    llvm::Value *NumElts =
+        CGF.emitArrayLength(Ctx.getAsArrayType(QT), BaseEltQT, DstAddr);
+    unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity();
+    llvm::Value *BaseEltSizeVal =
+        llvm::ConstantInt::get(NumElts->getType(), BaseEltSize);
+    llvm::Value *SizeInBytes =
+        CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts);
+    Address BC = CGF.Builder.CreateBitCast(DstAddr, CGF.CGM.Int8PtrTy);
+    llvm::Value *DstArrayEnd =
+        CGF.Builder.CreateInBoundsGEP(BC.getPointer(), SizeInBytes);
+    DstArrayEnd = CGF.Builder.CreateBitCast(DstArrayEnd, CGF.CGM.Int8PtrPtrTy,
+                                            "dstarray.end");
+    llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock();
+
+    // Create the header block and insert the phi instructions.
+    llvm::BasicBlock *HeaderBB = CGF.createBasicBlock("loop.header");
+    CGF.EmitBlock(HeaderBB);
+    llvm::PHINode *PHIs[N];
+
+    for (unsigned I = 0; I < N; ++I) {
+      PHIs[I] = CGF.Builder.CreatePHI(CGF.CGM.Int8PtrPtrTy, 2, "addr.cur");
+      PHIs[I]->addIncoming(StartAddrs[I].getPointer(), PreheaderBB);
+    }
+
+    // Create the exit and loop body blocks.
+    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("loop.exit");
+    llvm::BasicBlock *LoopBB = CGF.createBasicBlock("loop.body");
+
+    // Emit the comparison and conditional branch instruction that jumps to
+    // either the exit or the loop body.
+    llvm::Value *Done =
+        CGF.Builder.CreateICmpEQ(PHIs[DstIdx], DstArrayEnd, "done");
+    CGF.Builder.CreateCondBr(Done, ExitBB, LoopBB);
+
+    // Visit the element of the array in the loop body.
+    CGF.EmitBlock(LoopBB);
+    QualType EltQT = Ctx.getAsArrayType(QT)->getElementType();
+    CharUnits EltSize = Ctx.getTypeSizeInChars(EltQT);
+    std::array<Address, N> NewAddrs = Addrs;
+
+    for (unsigned I = 0; I < N; ++I)
+      NewAddrs[I] = Address(
+          PHIs[I], StartAddrs[I].getAlignment().alignmentAtOffset(EltSize));
+
+    EltQT = QT.isVolatileQualified() ? EltQT.withVolatile() : EltQT;
+    this->asDerived().visit(EltQT, nullptr, CharUnits::Zero(), NewAddrs);
+
+    LoopBB = CGF.Builder.GetInsertBlock();
+
+    for (unsigned I = 0; I < N; ++I) {
+      // Instrs to update the destination and source addresses.
+      // Update phi instructions.
+      NewAddrs[I] = getAddrWithOffset(NewAddrs[I], EltSize);
+      PHIs[I]->addIncoming(NewAddrs[I].getPointer(), LoopBB);
+    }
+
+    // Insert an unconditional branch to the header block.
+    CGF.Builder.CreateBr(HeaderBB);
+    CGF.EmitBlock(ExitBB);
+  }
+
+  /// Return an address with the specified offset from the passed address.
+  Address getAddrWithOffset(Address Addr, CharUnits Offset) {
+    assert(Addr.isValid() && "invalid address");
+    if (Offset.getQuantity() == 0)
+      return Addr;
+    Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy);
+    Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(),
+                                               CharUnits::One());
+    return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy);
+  }
+
+  Address getAddrWithOffset(Address Addr, CharUnits StructFieldOffset,
+                            const FieldDecl *FD) {
+    return getAddrWithOffset(Addr, StructFieldOffset +
+                                       asDerived().getFieldOffset(FD));
+  }
+
+  template <size_t N>
+  llvm::Function *
+  getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+              std::array<CharUnits, N> Alignments, CodeGenModule &CGM) {
+    // If the special function already exists in the module, return it.
+    if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) {
+      bool WrongType = false;
+      if (!F->getReturnType()->isVoidTy())
+        WrongType = true;
+      else {
+        for (const llvm::Argument &Arg : F->args())
+          if (Arg.getType() != CGM.Int8PtrPtrTy)
+            WrongType = true;
+      }
+
+      if (WrongType) {
+        std::string FuncName = F->getName();
+        SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation();
+        CGM.Error(Loc, "special function " + FuncName +
+                           " for non-trivial C struct has incorrect type");
+        return nullptr;
+      }
+      return F;
+    }
+
+    ASTContext &Ctx = CGM.getContext();
+    FunctionArgList Args;
+    const CGFunctionInfo &FI = getFunctionInfo<N>(CGM, Args);
+    llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
+    llvm::Function *F =
+        llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
+                               FuncName, &CGM.getModule());
+    F->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+    CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
+    IdentifierInfo *II = &Ctx.Idents.get(FuncName);
+    FunctionDecl *FD = FunctionDecl::Create(
+        Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
+        II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+    CodeGenFunction NewCGF(CGM);
+    setCGF(&NewCGF);
+    CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
+
+    for (unsigned I = 0; I < N; ++I) {
+      llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I]));
+      Addrs[I] = Address(V, Alignments[I]);
+    }
+
+    asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs);
+    CGF->FinishFunction();
+    return F;
+  }
+
+  template <size_t N>
+  void callFunc(StringRef FuncName, QualType QT, std::array<Address, N> Addrs,
+                CodeGenFunction &CallerCGF) {
+    std::array<CharUnits, N> Alignments;
+    llvm::Value *Ptrs[N];
+
+    for (unsigned I = 0; I < N; ++I) {
+      Alignments[I] = Addrs[I].getAlignment();
+      Ptrs[I] =
+          CallerCGF.Builder.CreateBitCast(Addrs[I], CallerCGF.CGM.Int8PtrPtrTy)
+              .getPointer();
+    }
+
+    if (llvm::Function *F =
+            getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM))
+      CallerCGF.EmitNounwindRuntimeCall(F, Ptrs);
+  }
+
+  Derived &asDerived() { return static_cast<Derived &>(*this); }
+
+  void setCGF(CodeGenFunction *F) { CGF = F; }
+
+  CodeGenFunction *CGF = nullptr;
+};
+
+template <class Derived, bool IsMove>
+struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>,
+                       GenFuncBase<Derived> {
+  GenBinaryFunc(ASTContext &Ctx) : CopyStructVisitor<Derived, IsMove>(Ctx) {}
+
+  void flushTrivialFields(std::array<Address, 2> Addrs) {
+    CharUnits Size = this->End - this->Start;
+
+    if (Size.getQuantity() == 0)
+      return;
+
+    Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], this->Start);
+    Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], this->Start);
+
+    // Emit memcpy.
+    if (Size.getQuantity() >= 16 || !llvm::isPowerOf2_32(Size.getQuantity())) {
+      llvm::Value *SizeVal =
+          llvm::ConstantInt::get(this->CGF->SizeTy, Size.getQuantity());
+      DstAddr =
+          this->CGF->Builder.CreateElementBitCast(DstAddr, this->CGF->Int8Ty);
+      SrcAddr =
+          this->CGF->Builder.CreateElementBitCast(SrcAddr, this->CGF->Int8Ty);
+      this->CGF->Builder.CreateMemCpy(DstAddr, SrcAddr, SizeVal, false);
+    } else {
+      llvm::Type *Ty = llvm::Type::getIntNTy(
+          this->CGF->getLLVMContext(),
+          Size.getQuantity() * this->CGF->getContext().getCharWidth());
+      DstAddr = this->CGF->Builder.CreateElementBitCast(DstAddr, Ty);
+      SrcAddr = this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty);
+      llvm::Value *SrcVal = this->CGF->Builder.CreateLoad(SrcAddr, false);
+      this->CGF->Builder.CreateStore(SrcVal, DstAddr, false);
+    }
+
+    this->Start = this->End = CharUnits::Zero();
+  }
+
+  template <class... Ts>
+  void visitVolatileTrivial(QualType FT, const FieldDecl *FD, CharUnits Offset,
+                            std::array<Address, 2> Addrs) {
+    QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0);
+    llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo();
+    Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset);
+    LValue DstBase = this->CGF->MakeAddrLValue(
+        this->CGF->Builder.CreateBitCast(DstAddr, PtrTy), FT);
+    LValue DstLV = this->CGF->EmitLValueForField(DstBase, FD);
+    Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], Offset);
+    LValue SrcBase = this->CGF->MakeAddrLValue(
+        this->CGF->Builder.CreateBitCast(SrcAddr, PtrTy), FT);
+    LValue SrcLV = this->CGF->EmitLValueForField(SrcBase, FD);
+    RValue SrcVal = this->CGF->EmitLoadOfLValue(SrcLV, SourceLocation());
+    this->CGF->EmitStoreThroughLValue(SrcVal, DstLV);
+  }
+};
+
+// These classes that emit the special functions for a non-trivial struct.
+struct GenDestructor : StructVisitor<GenDestructor>,
+                       GenFuncBase<GenDestructor>,
+                       DestructedTypeVisitor<GenDestructor> {
+  GenDestructor(ASTContext &Ctx) : StructVisitor<GenDestructor>(Ctx) {}
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+    CGF->destroyARCStrongImprecise(
+        *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+  }
+
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 1> Addrs) {
+    CGF->callCStructDestructor(
+        CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+  }
+};
+
+struct GenDefaultInitialize
+    : StructVisitor<GenDefaultInitialize>,
+      GenFuncBase<GenDefaultInitialize>,
+      DefaultInitializedTypeVisitor<GenDefaultInitialize> {
+  typedef GenFuncBase<GenDefaultInitialize> GenFuncBaseTy;
+  GenDefaultInitialize(ASTContext &Ctx)
+      : StructVisitor<GenDefaultInitialize>(Ctx) {}
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+    CGF->EmitNullInitialization(
+        getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT);
+  }
+
+  template <class FieldKind, size_t... Is>
+  void visitArray(FieldKind FK, QualType QT, const FieldDecl *FD,
+                  CharUnits CurStackOffset, std::array<Address, 1> Addrs) {
+    if (!FK)
+      return visitTrivial(QT, FD, CurStackOffset, Addrs);
+
+    ASTContext &Ctx = getContext();
+    CharUnits Size = Ctx.getTypeSizeInChars(QT);
+    QualType EltTy = Ctx.getBaseElementType(QT);
+
+    if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) {
+      GenFuncBaseTy::visitArray(FK, QT, FD, CurStackOffset, Addrs);
+      return;
+    }
+
+    llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity());
+    Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+    Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty);
+    CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal,
+                              QT.isVolatileQualified());
+  }
+
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 1> Addrs) {
+    CGF->callCStructDefaultConstructor(
+        CGF->MakeAddrLValue(getAddrWithOffset(Addrs[DstIdx], Offset), FT));
+  }
+};
+
+struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> {
+  GenCopyConstructor(ASTContext &Ctx)
+      : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {}
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+    Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+    Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+    llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+        Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+    llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal);
+    CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true);
+  }
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 2> Addrs) {
+    CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+                                    CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+  }
+};
+
+struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> {
+  GenMoveConstructor(ASTContext &Ctx)
+      : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {}
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+    Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+    Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+    LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+    llvm::Value *SrcVal =
+        CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+    CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+    CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT),
+                           /* isInitialization */ true);
+  }
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 2> Addrs) {
+    CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+                                    CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+  }
+};
+
+struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> {
+  GenCopyAssignment(ASTContext &Ctx)
+      : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {}
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+    Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+    Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+    llvm::Value *SrcVal = CGF->EmitLoadOfScalar(
+        Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation());
+    CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal,
+                            false);
+  }
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 2> Addrs) {
+    CGF->callCStructCopyAssignmentOperator(
+        CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+        CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+  }
+};
+
+struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> {
+  GenMoveAssignment(ASTContext &Ctx)
+      : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {}
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      CharUnits CurStackOffset, std::array<Address, 2> Addrs) {
+    Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD);
+    Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD);
+    LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT);
+    llvm::Value *SrcVal =
+        CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal();
+    CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV);
+    LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT);
+    llvm::Value *DstVal =
+        CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+    CGF->EmitStoreOfScalar(SrcVal, DstLV);
+    CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime);
+  }
+
+  void callSpecialFunction(QualType FT, CharUnits Offset,
+                           std::array<Address, 2> Addrs) {
+    CGF->callCStructMoveAssignmentOperator(
+        CGF->MakeAddrLValue(Addrs[DstIdx], FT),
+        CGF->MakeAddrLValue(Addrs[SrcIdx], FT));
+  }
+};
+
+} // namespace
+
+void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF,
+                                               Address Addr, QualType Type) {
+  CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Type));
+}
+
+// Default-initialize a variable that is a non-trivial struct or an array of
+// such structure.
+void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) {
+  GenDefaultInitialize Gen(getContext());
+  Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy);
+  Gen.setCGF(this);
+  QualType QT = Dst.getType();
+  QT = Dst.isVolatile() ? QT.withVolatile() : QT;
+  Gen.visit(QT, nullptr, CharUnits::Zero(), std::array<Address, 1>({{DstPtr}}));
+}
+
+template <class G, size_t N>
+static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT,
+                                bool IsVolatile, CodeGenFunction &CGF,
+                                std::array<Address, N> Addrs) {
+  for (unsigned I = 0; I < N; ++I)
+    Addrs[I] = CGF.Builder.CreateBitCast(Addrs[I], CGF.CGM.Int8PtrPtrTy);
+  QT = IsVolatile ? QT.withVolatile() : QT;
+  Gen.callFunc(FuncName, QT, Addrs, CGF);
+}
+
+// Functions to emit calls to the special functions of a non-trivial C struct.
+void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) {
+  bool IsVolatile = Dst.isVolatile();
+  Address DstPtr = Dst.getAddress();
+  QualType QT = Dst.getType();
+  GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenDefaultInitialize(getContext()), FuncName, QT,
+                      IsVolatile, *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructDestructor(LValue Dst) {
+  bool IsVolatile = Dst.isVolatile();
+  Address DstPtr = Dst.getAddress();
+  QualType QT = Dst.getType();
+  GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile,
+                      *this, std::array<Address, 1>({{DstPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) {
+  bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+  Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+  QualType QT = Dst.getType();
+  GenBinaryFuncName<false> GenName("__copy_constructor_", DstPtr.getAlignment(),
+                                   SrcPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenCopyConstructor(getContext()), FuncName, QT,
+                      IsVolatile, *this,
+                      std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src
+
+) {
+  bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+  Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+  QualType QT = Dst.getType();
+  GenBinaryFuncName<false> GenName("__copy_assignment_", DstPtr.getAlignment(),
+                                   SrcPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenCopyAssignment(getContext()), FuncName, QT, IsVolatile,
+                      *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) {
+  bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+  Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+  QualType QT = Dst.getType();
+  GenBinaryFuncName<true> GenName("__move_constructor_", DstPtr.getAlignment(),
+                                  SrcPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenMoveConstructor(getContext()), FuncName, QT,
+                      IsVolatile, *this,
+                      std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
+
+void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src
+
+) {
+  bool IsVolatile = Dst.isVolatile() || Src.isVolatile();
+  Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress();
+  QualType QT = Dst.getType();
+  GenBinaryFuncName<true> GenName("__move_assignment_", DstPtr.getAlignment(),
+                                  SrcPtr.getAlignment(), getContext());
+  std::string FuncName = GenName.getName(QT, IsVolatile);
+  callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile,
+                      *this, std::array<Address, 2>({{DstPtr, SrcPtr}}));
+}
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index f26263d..e812ef3 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -1008,12 +1008,13 @@
                          /*init*/ true);
       return;
     }
-    case TEK_Aggregate:
+    case TEK_Aggregate: {
       // The return value slot is guaranteed to not be aliased, but
       // that's not necessarily the same as "on the stack", so
       // we still potentially need objc_memmove_collectable.
-      EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType);
-      return;
+      EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType),
+                        /* Src= */ LV, ivarType);
+      return; }
     case TEK_Scalar: {
       llvm::Value *value;
       if (propType->isReferenceType()) {
@@ -1814,22 +1815,6 @@
 }
 
 
-static bool IsForwarding(StringRef Name) {
-  return llvm::StringSwitch<bool>(Name)
-      .Cases("objc_autoreleaseReturnValue",             // ARCInstKind::AutoreleaseRV
-             "objc_autorelease",                        // ARCInstKind::Autorelease
-             "objc_retainAutoreleaseReturnValue",       // ARCInstKind::FusedRetainAutoreleaseRV
-             "objc_retainAutoreleasedReturnValue",      // ARCInstKind::RetainRV
-             "objc_retainAutorelease",                  // ARCInstKind::FusedRetainAutorelease
-             "objc_retainedObject",                     // ARCInstKind::NoopCast
-             "objc_retain",                             // ARCInstKind::Retain
-             "objc_unretainedObject",                   // ARCInstKind::NoopCast
-             "objc_unretainedPointer",                  // ARCInstKind::NoopCast
-             "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV
-             true)
-      .Default(false);
-}
-
 static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
                                                 llvm::FunctionType *FTy,
                                                 StringRef Name) {
@@ -1847,9 +1832,6 @@
       // performance.
       F->addFnAttr(llvm::Attribute::NonLazyBind);
     }
-
-    if (IsForwarding(Name))
-      F->arg_begin()->addAttr(llvm::Attribute::Returned);
   }
 
   return RTF;
@@ -2070,7 +2052,7 @@
 
 /// Claim a possibly-autoreleased return value at +0.  This is only
 /// valid to do in contexts which do not rely on the retain to keep
-/// the object valid for for all of its uses; for example, when
+/// the object valid for all of its uses; for example, when
 /// the value is ignored, or when it is being assigned to an
 /// __unsafe_unretained variable.
 ///
@@ -3261,19 +3243,19 @@
                            "__assign_helper_atomic_property_",
                            &CGM.getModule());
 
-  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr DstExpr(&DstDecl, false, DestTy,
                       VK_RValue, SourceLocation());
   UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
-                    VK_LValue, OK_Ordinary, SourceLocation());
+                    VK_LValue, OK_Ordinary, SourceLocation(), false);
   
   DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
                       VK_RValue, SourceLocation());
   UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
-                    VK_LValue, OK_Ordinary, SourceLocation());
+                    VK_LValue, OK_Ordinary, SourceLocation(), false);
   
   Expr *Args[2] = { &DST, &SRC };
   CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
@@ -3342,8 +3324,8 @@
   llvm::Function *Fn =
   llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
                          "__copy_helper_atomic_property_", &CGM.getModule());
-  
-  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
 
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
@@ -3351,7 +3333,7 @@
                       VK_RValue, SourceLocation());
   
   UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
-                    VK_LValue, OK_Ordinary, SourceLocation());
+                    VK_LValue, OK_Ordinary, SourceLocation(), false);
   
   CXXConstructExpr *CXXConstExpr = 
     cast<CXXConstructExpr>(PID->getGetterCXXConstructor());
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index c8b8be7..5a025ce 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -80,8 +80,7 @@
     if (!Function) {
       if (!FunctionName)
         return nullptr;
-      Function =
-          cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName));
+      Function = CGM->CreateRuntimeFunction(FTy, FunctionName);
     }
     return Function;
   }
@@ -1035,16 +1034,8 @@
                                  const ObjCInterfaceDecl *OID) {
   auto *Value =
       GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
-  if (CGM.getTriple().isOSBinFormatCOFF()) {
-    if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
-      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
-      if (OID->hasAttr<DLLExportAttr>())
-        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
-      else if (OID->hasAttr<DLLImportAttr>())
-        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
-      ClassSymbol->setDLLStorageClass(DLLStorage);
-    }
-  }
+  if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value))
+    CGM.setGVProperties(ClassSymbol, OID);
   return Value;
 }
 
@@ -1061,13 +1052,7 @@
         if ((VD = dyn_cast<VarDecl>(Result)))
           break;
 
-      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
-      if (!VD || VD->hasAttr<DLLImportAttr>())
-        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
-      else if (VD->hasAttr<DLLExportAttr>())
-        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
-
-      ClassSymbol->setDLLStorageClass(DLLStorage);
+      CGM.setGVProperties(ClassSymbol, VD);
     }
   }
   return Value;
@@ -2336,14 +2321,8 @@
       NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0],
       GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr,
       NULLPtr, ZeroPtr, ZeroPtr, true);
-  if (CGM.getTriple().isOSBinFormatCOFF()) {
-    auto Storage = llvm::GlobalValue::DefaultStorageClass;
-    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
-      Storage = llvm::GlobalValue::DLLImportStorageClass;
-    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
-      Storage = llvm::GlobalValue::DLLExportStorageClass;
-    cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage);
-  }
+  CGM.setGVProperties(cast<llvm::GlobalValue>(MetaClassStruct),
+                      OID->getClassInterface());
 
   // Generate the class structure
   llvm::Constant *ClassStruct = GenerateClassStructure(
@@ -2351,14 +2330,8 @@
       llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList,
       GenerateProtocolList(Protocols), IvarOffsetArray, Properties,
       StrongIvarBitmap, WeakIvarBitmap);
-  if (CGM.getTriple().isOSBinFormatCOFF()) {
-    auto Storage = llvm::GlobalValue::DefaultStorageClass;
-    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
-      Storage = llvm::GlobalValue::DLLImportStorageClass;
-    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
-      Storage = llvm::GlobalValue::DLLExportStorageClass;
-    cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage);
-  }
+  CGM.setGVProperties(cast<llvm::GlobalValue>(ClassStruct),
+                      OID->getClassInterface());
 
   // Resolve the class aliases, if they exist.
   if (ClassPtrAlias) {
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 98435fe..c3793b2 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -4179,10 +4179,6 @@
   }
 }
 
-static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, llvm::Value *V) {
-  if (V) S.insert(V);
-}
-
 static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, Address V) {
   if (V.isValid()) S.insert(V.getPointer());
 }
@@ -4885,10 +4881,7 @@
   }
 
   // Indicate whether we're compiling this to run on a simulator.
-  const llvm::Triple &Triple = CGM.getTarget().getTriple();
-  if ((Triple.isiOS() || Triple.isWatchOS()) &&
-      (Triple.getArch() == llvm::Triple::x86 ||
-       Triple.getArch() == llvm::Triple::x86_64))
+  if (CGM.getTarget().getTriple().isSimulatorEnvironment())
     Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated",
                       eImageInfo_ImageIsSimulated);
 
@@ -5084,6 +5077,11 @@
 
   // Drill down into arrays.
   uint64_t numElts = 1;
+  if (auto arrayType = CGM.getContext().getAsIncompleteArrayType(fieldType)) {
+    numElts = 0;
+    fieldType = arrayType->getElementType();
+  }
+  // Unlike incomplete arrays, constant arrays can be nested.
   while (auto arrayType = CGM.getContext().getAsConstantArrayType(fieldType)) {
     numElts *= arrayType->getSize().getZExtValue();
     fieldType = arrayType->getElementType();
@@ -6307,9 +6305,7 @@
   llvm::GlobalVariable *MetaTClass =
     BuildClassObject(CI, /*metaclass*/ true,
                      IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden);
-  if (CGM.getTriple().isOSBinFormatCOFF())
-    if (CI->hasAttr<DLLExportAttr>())
-      MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+  CGM.setGVProperties(MetaTClass, CI);
   DefinedMetaClasses.push_back(MetaTClass);
 
   // Metadata for the class
@@ -6349,9 +6345,7 @@
   llvm::GlobalVariable *ClassMD =
     BuildClassObject(CI, /*metaclass*/ false,
                      MetaTClass, SuperClassGV, CLASS_RO_GV, classIsHidden);
-  if (CGM.getTriple().isOSBinFormatCOFF())
-    if (CI->hasAttr<DLLExportAttr>())
-      ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+  CGM.setGVProperties(ClassMD, CI);
   DefinedClasses.push_back(ClassMD);
   ImplementedClasses.push_back(CI);
 
@@ -6401,7 +6395,7 @@
   PTGV->setAlignment(Align.getQuantity());
   if (!CGM.getTriple().isOSBinFormatMachO())
     PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName));
-  CGM.addCompilerUsedGlobal(PTGV);
+  CGM.addUsedGlobal(PTGV);
   return CGF.Builder.CreateAlignedLoad(PTGV, Align);
 }
 
@@ -6615,10 +6609,14 @@
           Ivar->getAccessControl() == ObjCIvarDecl::Private ||
           Ivar->getAccessControl() == ObjCIvarDecl::Package;
 
-      if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage)
-        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
-      else if (ID->hasAttr<DLLImportAttr>())
-        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      const ObjCInterfaceDecl *ContainingID = Ivar->getContainingInterface();
+
+      if (ContainingID->hasAttr<DLLImportAttr>())
+        IvarOffsetGV
+            ->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      else if (ContainingID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage)
+        IvarOffsetGV
+            ->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
     }
   }
   return IvarOffsetGV;
@@ -6841,7 +6839,7 @@
     Protocols[PD->getIdentifier()] = Entry;
   }
   Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  CGM.addCompilerUsedGlobal(Entry);
+  CGM.addUsedGlobal(Entry);
 
   // Use this protocol meta-data to build protocol list table in section
   // __DATA, __objc_protolist
@@ -6860,7 +6858,7 @@
   PTGV->setSection(GetSectionName("__objc_protolist",
                                   "coalesced,no_dead_strip"));
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  CGM.addCompilerUsedGlobal(PTGV);
+  CGM.addUsedGlobal(PTGV);
   return Entry;
 }
 
@@ -7522,12 +7520,7 @@
       Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
                                        false, llvm::GlobalValue::ExternalLinkage,
                                        nullptr, EHTypeName);
-      if (CGM.getTriple().isOSBinFormatCOFF()) {
-        if (ID->hasAttr<DLLExportAttr>())
-          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
-        else if (ID->hasAttr<DLLImportAttr>())
-          Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-      }
+      CGM.setGVProperties(Entry, ID);
       return Entry;
     }
   }
@@ -7549,8 +7542,9 @@
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
   ConstantInitBuilder builder(CGM);
   auto values = builder.beginStruct(ObjCTypes.EHTypeTy);
-  values.add(llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(),
-                                                  VTableGV, VTableIdx));
+  values.add(
+    llvm::ConstantExpr::getInBoundsGetElementPtr(VTableGV->getValueType(),
+                                                 VTableGV, VTableIdx));
   values.add(GetClassName(ClassName));
   values.add(GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition));
 
@@ -7565,10 +7559,8 @@
                                          CGM.getPointerAlign(),
                                          /*constant*/ false,
                                          L);
-    if (CGM.getTriple().isOSBinFormatCOFF())
-      if (hasObjCExceptionAttribute(CGM.getContext(), ID))
-        if (ID->hasAttr<DLLExportAttr>())
-          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    if (hasObjCExceptionAttribute(CGM.getContext(), ID))
+      CGM.setGVProperties(Entry, ID);
   }
   assert(Entry->getLinkage() == L);
 
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 74b8698..2f886fd 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -110,7 +110,7 @@
                                    llvm::Type::getIntNTy(CGF.getLLVMContext(),
                                                          Info->StorageSize));
   return LValue::MakeBitfield(Addr, *Info, IvarTy,
-                              LValueBaseInfo(AlignmentSource::Decl, false),
+                              LValueBaseInfo(AlignmentSource::Decl),
                               TBAAAccessInfo());
 }
 
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 4d63761..6d0089d 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -37,7 +37,7 @@
 
   llvm::LLVMContext& Ctx = CGM.getLLVMContext();
   uint32_t AddrSpc = CGM.getContext().getTargetAddressSpace(
-      CGM.getTarget().getOpenCLTypeAddrSpace(T));
+      CGM.getContext().getOpenCLTypeAddrSpace(T));
   switch (cast<BuiltinType>(T)->getKind()) {
   default:
     llvm_unreachable("Unexpected opencl builtin type!");
@@ -68,7 +68,7 @@
 llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) {
   if (!PipeTy){
     uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace(
-        CGM.getTarget().getOpenCLTypeAddrSpace(T));
+        CGM.getContext().getOpenCLTypeAddrSpace(T));
     PipeTy = llvm::PointerType::get(llvm::StructType::create(
       CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc);
   }
@@ -81,7 +81,7 @@
     SamplerTy = llvm::PointerType::get(llvm::StructType::create(
       CGM.getLLVMContext(), "opencl.sampler_t"),
       CGM.getContext().getTargetAddressSpace(
-          CGM.getTarget().getOpenCLTypeAddrSpace(T)));
+          CGM.getContext().getOpenCLTypeAddrSpace(T)));
   return SamplerTy;
 }
 
@@ -112,37 +112,51 @@
       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
 }
 
+/// Record emitted llvm invoke function and llvm block literal for the
+/// corresponding block expression.
+void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
+                                      llvm::Function *InvokeF,
+                                      llvm::Value *Block) {
+  assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() &&
+         "Block expression emitted twice");
+  assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
+  assert(Block->getType()->isPointerTy() && "Invalid block literal type");
+  EnqueuedBlockMap[E].InvokeFunc = InvokeF;
+  EnqueuedBlockMap[E].BlockArg = Block;
+  EnqueuedBlockMap[E].Kernel = nullptr;
+}
+
 CGOpenCLRuntime::EnqueuedBlockInfo
 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+  CGF.EmitScalarExpr(E);
+
   // The block literal may be assigned to a const variable. Chasing down
   // to get the block literal.
   if (auto DR = dyn_cast<DeclRefExpr>(E)) {
     E = cast<VarDecl>(DR->getDecl())->getInit();
   }
+  E = E->IgnoreImplicit();
   if (auto Cast = dyn_cast<CastExpr>(E)) {
     E = Cast->getSubExpr();
   }
   auto *Block = cast<BlockExpr>(E);
 
-  // The same block literal may be enqueued multiple times. Cache it if
-  // possible.
-  auto Loc = EnqueuedBlockMap.find(Block);
-  if (Loc != EnqueuedBlockMap.end()) {
-    return Loc->second;
+  assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
+         "Block expression not emitted");
+
+  // Do not emit the block wrapper again if it has been emitted.
+  if (EnqueuedBlockMap[Block].Kernel) {
+    return EnqueuedBlockMap[Block];
   }
 
-  // Emit block literal as a common block expression and get the block invoke
-  // function.
-  llvm::Function *Invoke;
-  auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
   auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
-      CGF, Invoke, V->stripPointerCasts());
+      CGF, EnqueuedBlockMap[Block].InvokeFunc,
+      EnqueuedBlockMap[Block].BlockArg->stripPointerCasts());
 
   // The common part of the post-processing of the kernel goes here.
   F->addFnAttr(llvm::Attribute::NoUnwind);
   F->setCallingConv(
       CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
-  EnqueuedBlockInfo Info{F, V};
-  EnqueuedBlockMap[Block] = Info;
-  return Info;
+  EnqueuedBlockMap[Block].Kernel = F;
+  return EnqueuedBlockMap[Block];
 }
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index ead303d..c0088b5 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -23,6 +23,7 @@
 
 namespace clang {
 
+class BlockExpr;
 class Expr;
 class VarDecl;
 
@@ -39,8 +40,9 @@
 
   /// Structure for enqueued block information.
   struct EnqueuedBlockInfo {
-    llvm::Function *Kernel; /// Enqueued block kernel.
-    llvm::Value *BlockArg;  /// The first argument to enqueued block kernel.
+    llvm::Function *InvokeFunc; /// Block invoke function.
+    llvm::Function *Kernel;     /// Enqueued block kernel.
+    llvm::Value *BlockArg;      /// The first argument to enqueued block kernel.
   };
   /// Maps block expression to block information.
   llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
@@ -76,6 +78,15 @@
   /// \return enqueued block information for enqueued block.
   EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
                                             const Expr *E);
+
+  /// \brief Record invoke function and block literal emitted during normal
+  /// codegen for a block expression. The information is used by
+  /// emitOpenCLEnqueuedBlock to emit wrapper kernel.
+  ///
+  /// \param InvokeF invoke function emitted for the block expression.
+  /// \param Block block literal emitted for the block expression.
+  void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
+                       llvm::Value *Block);
 };
 
 }
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index f98ff85..f9dab90 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -147,7 +147,7 @@
         auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
             CGF.GetAddrOfLocalVar(PartIDVar),
             PartIDVar->getType()->castAs<PointerType>());
-        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
+        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
         auto *DoneBB = CGF.createBasicBlock(".untied.done.");
         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
         CGF.EmitBlock(DoneBB);
@@ -354,7 +354,7 @@
       DeclRefExpr DRE(const_cast<VarDecl *>(VD),
                       /*RefersToEnclosingVariableOrCapture=*/false,
                       VD->getType().getNonReferenceType(), VK_LValue,
-                      SourceLocation());
+                      C.getLocation());
       PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
         return CGF.EmitLValue(&DRE).getAddress();
       });
@@ -397,6 +397,7 @@
   CodeGenFunction &CGF;
   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
   FieldDecl *LambdaThisCaptureField = nullptr;
+  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
 
 public:
   /// \brief Constructs region for combined constructs.
@@ -412,6 +413,8 @@
     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
     CGF.LambdaThisCaptureField = nullptr;
+    BlockInfo = CGF.BlockInfo;
+    CGF.BlockInfo = nullptr;
   }
 
   ~InlinedOpenMPRegionRAII() {
@@ -422,6 +425,7 @@
     CGF.CapturedStmtInfo = OldCSI;
     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
+    CGF.BlockInfo = BlockInfo;
   }
 };
 
@@ -668,27 +672,47 @@
   //
   // Offloading related calls
   //
-  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
-  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
   // *arg_types);
   OMPRTL__tgt_target,
-  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
-  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
-  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+  // *arg_types);
+  OMPRTL__tgt_target_nowait,
+  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+  // *arg_types, int32_t num_teams, int32_t thread_limit);
   OMPRTL__tgt_target_teams,
+  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
+  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  OMPRTL__tgt_target_teams_nowait,
   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
   OMPRTL__tgt_register_lib,
   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
   OMPRTL__tgt_unregister_lib,
-  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
-  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
   OMPRTL__tgt_target_data_begin,
-  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
-  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+  // *arg_types);
+  OMPRTL__tgt_target_data_begin_nowait,
+  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
   OMPRTL__tgt_target_data_end,
-  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
-  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+  // *arg_types);
+  OMPRTL__tgt_target_data_end_nowait,
+  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
   OMPRTL__tgt_target_data_update,
+  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+  // *arg_types);
+  OMPRTL__tgt_target_data_update_nowait,
 };
 
 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
@@ -763,17 +787,17 @@
     RValue InitRVal;
     switch (CGF.getEvaluationKind(Ty)) {
     case TEK_Scalar:
-      InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
       break;
     case TEK_Complex:
       InitRVal =
-          RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
       break;
     case TEK_Aggregate:
       InitRVal = RValue::getAggregate(LV.getAddress());
       break;
     }
-    OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
                          /*IsInitializer=*/false);
@@ -916,8 +940,9 @@
 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
   assert(SharedAddresses.size() == N &&
          "Number of generated lvalues must be exactly N.");
-  SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
-                               emitSharedLValueUB(CGF, ClausesData[N].Ref));
+  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
+  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+  SharedAddresses.emplace_back(First, Second);
 }
 
 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
@@ -925,7 +950,7 @@
       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
   QualType PrivateType = PrivateVD->getType();
   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
-  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+  if (!PrivateType->isVariablyModifiedType()) {
     Sizes.emplace_back(
         CGF.getTypeSize(
             SharedAddresses[N].first.getType().getNonReferenceType()),
@@ -963,10 +988,9 @@
   auto *PrivateVD =
       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
   QualType PrivateType = PrivateVD->getType();
-  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
-  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+  if (!PrivateType->isVariablyModifiedType()) {
     assert(!Size && !Sizes[N].second &&
-           "Size should be nullptr for non-variably modified redution "
+           "Size should be nullptr for non-variably modified reduction "
            "items.");
     return;
   }
@@ -993,9 +1017,8 @@
       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
                                        CGF.ConvertTypeForMem(SharedType)),
       SharedType, SharedAddresses[N].first.getBaseInfo(),
-      CGF.CGM.getTBAAAccessInfo(SharedType));
-  if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
-      CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
+  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
@@ -1038,8 +1061,8 @@
     if (auto *PtrTy = BaseTy->getAs<PointerType>())
       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
     else {
-      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
-                                             BaseTy->castAs<ReferenceType>());
+      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
+      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
     }
     BaseTy = BaseTy->getPointeeType();
   }
@@ -1047,7 +1070,7 @@
       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
                                        CGF.ConvertTypeForMem(ElTy)),
       BaseLV.getType(), BaseLV.getBaseInfo(),
-      CGF.CGM.getTBAAAccessInfo(BaseLV.getType()));
+      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
 }
 
 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
@@ -1078,11 +1101,9 @@
   return Address(Addr, BaseLVAlignment);
 }
 
-Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
-                                               Address PrivateAddr) {
-  const DeclRefExpr *DE;
+static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
   const VarDecl *OrigVD = nullptr;
-  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
+  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
     auto *Base = OASE->getBase()->IgnoreParenImpCasts();
     while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
       Base = TempOASE->getBase()->IgnoreParenImpCasts();
@@ -1090,14 +1111,20 @@
       Base = TempASE->getBase()->IgnoreParenImpCasts();
     DE = cast<DeclRefExpr>(Base);
     OrigVD = cast<VarDecl>(DE->getDecl());
-  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
+  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
     auto *Base = ASE->getBase()->IgnoreParenImpCasts();
     while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
       Base = TempASE->getBase()->IgnoreParenImpCasts();
     DE = cast<DeclRefExpr>(Base);
     OrigVD = cast<VarDecl>(DE->getDecl());
   }
-  if (OrigVD) {
+  return OrigVD;
+}
+
+Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+                                               Address PrivateAddr) {
+  const DeclRefExpr *DE;
+  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
     BaseDecls.emplace_back(OrigVD);
     auto OriginalBaseLValue = CGF.EmitLValue(DE);
     LValue BaseLValue =
@@ -1105,11 +1132,14 @@
                     OriginalBaseLValue);
     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
-    llvm::Value *Ptr =
-        CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
+    llvm::Value *PrivatePointer =
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            PrivateAddr.getPointer(),
+            SharedAddresses[N].first.getAddress().getType());
+    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
     return castToBase(CGF, OrigVD->getType(),
                       SharedAddresses[N].first.getType(),
-                      OriginalBaseLValue.getPointer()->getType(),
+                      OriginalBaseLValue.getAddress().getType(),
                       OriginalBaseLValue.getAlignment(), Ptr);
   }
   BaseDecls.emplace_back(
@@ -1183,14 +1213,15 @@
   auto *Fn = llvm::Function::Create(
       FnTy, llvm::GlobalValue::InternalLinkage,
       IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
   Fn->removeFnAttr(llvm::Attribute::NoInline);
   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
   Fn->addFnAttr(llvm::Attribute::AlwaysInline);
   CodeGenFunction CGF(CGM);
   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
+                    Out->getLocation());
   CodeGenFunction::OMPPrivateScope Scope(CGF);
   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
@@ -1292,6 +1323,15 @@
     HasCancel = OPSD->hasCancel();
   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
     HasCancel = OPFD->hasCancel();
+  else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
+    HasCancel = OPFD->hasCancel();
+  else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
+    HasCancel = OPFD->hasCancel();
+  else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
+    HasCancel = OPFD->hasCancel();
+  else if (auto *OPFD =
+               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
+    HasCancel = OPFD->hasCancel();
   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
                                     HasCancel, OutlinedHelperName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -1335,7 +1375,10 @@
   CodeGen.setAction(Action);
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
-  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  const OpenMPDirectiveKind Region =
+      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
+                                                      : OMPD_task;
+  auto *CS = D.getCapturedStmt(Region);
   auto *TD = dyn_cast<OMPTaskDirective>(&D);
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
@@ -1452,13 +1495,15 @@
       return ThreadID;
   }
   // If exceptions are enabled, do not use parameter to avoid possible crash.
-  if (!CGF.getInvokeDest()) {
+  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
+      !CGF.getLangOpts().CXXExceptions ||
+      CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
     if (auto *OMPRegionInfo =
             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
       if (OMPRegionInfo->getThreadIDVariable()) {
         // Check if this an outlined function with thread id passed as argument.
         auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
-        ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
         // If value loaded in entry block, cache it and use it everywhere in
         // function.
         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
@@ -1476,12 +1521,13 @@
   // function.
   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
-  ThreadID =
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
-                          emitUpdateLocation(CGF, Loc));
+  auto *Call = CGF.Builder.CreateCall(
+      createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+      emitUpdateLocation(CGF, Loc));
+  Call->setCallingConv(CGF.getRuntimeCC());
   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-  Elem.second.ThreadID = ThreadID;
-  return ThreadID;
+  Elem.second.ThreadID = Call;
+  return Call;
 }
 
 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
@@ -2013,32 +2059,48 @@
     break;
   }
   case OMPRTL__tgt_target: {
-    // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
-    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+    // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
     // *arg_types);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
                                 CGM.VoidPtrTy,
                                 CGM.Int32Ty,
                                 CGM.VoidPtrPtrTy,
                                 CGM.VoidPtrPtrTy,
                                 CGM.SizeTy->getPointerTo(),
-                                CGM.Int32Ty->getPointerTo()};
+                                CGM.Int64Ty->getPointerTo()};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
-  case OMPRTL__tgt_target_teams: {
-    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+  case OMPRTL__tgt_target_nowait: {
+    // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
-    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+    // int64_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
                                 CGM.VoidPtrTy,
                                 CGM.Int32Ty,
                                 CGM.VoidPtrPtrTy,
                                 CGM.VoidPtrPtrTy,
                                 CGM.SizeTy->getPointerTo(),
-                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
+    break;
+  }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo(),
                                 CGM.Int32Ty,
                                 CGM.Int32Ty};
     llvm::FunctionType *FnTy =
@@ -2046,6 +2108,24 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
     break;
   }
+  case OMPRTL__tgt_target_teams_nowait: {
+    // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
+    // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+    // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -2067,47 +2147,92 @@
     break;
   }
   case OMPRTL__tgt_target_data_begin: {
-    // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
-    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+    // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
                                 CGM.Int32Ty,
                                 CGM.VoidPtrPtrTy,
                                 CGM.VoidPtrPtrTy,
                                 CGM.SizeTy->getPointerTo(),
-                                CGM.Int32Ty->getPointerTo()};
+                                CGM.Int64Ty->getPointerTo()};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
     break;
   }
-  case OMPRTL__tgt_target_data_end: {
-    // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
-    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+  case OMPRTL__tgt_target_data_begin_nowait: {
+    // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+    // *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
                                 CGM.Int32Ty,
                                 CGM.VoidPtrPtrTy,
                                 CGM.VoidPtrPtrTy,
                                 CGM.SizeTy->getPointerTo(),
-                                CGM.Int32Ty->getPointerTo()};
+                                CGM.Int64Ty->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
+    break;
+  }
+  case OMPRTL__tgt_target_data_end: {
+    // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo()};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
     break;
   }
-  case OMPRTL__tgt_target_data_update: {
-    // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
-    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+  case OMPRTL__tgt_target_data_end_nowait: {
+    // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+    // *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
                                 CGM.Int32Ty,
                                 CGM.VoidPtrPtrTy,
                                 CGM.VoidPtrPtrTy,
                                 CGM.SizeTy->getPointerTo(),
-                                CGM.Int32Ty->getPointerTo()};
+                                CGM.Int64Ty->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
+    break;
+  }
+  case OMPRTL__tgt_target_data_update: {
+    // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo()};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
     break;
   }
+  case OMPRTL__tgt_target_data_update_nowait: {
+    // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+    // *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
+    break;
+  }
   }
   assert(RTLFn && "Unable to find OpenMP runtime function");
   return RTLFn;
@@ -2266,7 +2391,8 @@
       // threadprivate copy of the variable VD
       CodeGenFunction CtorCGF(CGM);
       FunctionArgList Args;
-      ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
                             ImplicitParamDecl::Other);
       Args.push_back(&Dst);
 
@@ -2276,13 +2402,13 @@
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
           FTy, ".__kmpc_global_ctor_.", FI, Loc);
       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
-                            Args, SourceLocation());
+                            Args, Loc, Loc);
       auto ArgVal = CtorCGF.EmitLoadOfScalar(
           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
           CGM.getContext().VoidPtrTy, Dst.getLocation());
       Address Arg = Address(ArgVal, VDAddr.getAlignment());
-      Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
-                                             CtorCGF.ConvertTypeForMem(ASTTy));
+      Arg = CtorCGF.Builder.CreateElementBitCast(
+          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
                                /*IsInitializer=*/true);
       ArgVal = CtorCGF.EmitLoadOfScalar(
@@ -2297,7 +2423,8 @@
       // of the variable VD
       CodeGenFunction DtorCGF(CGM);
       FunctionArgList Args;
-      ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
+                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
                             ImplicitParamDecl::Other);
       Args.push_back(&Dst);
 
@@ -2308,7 +2435,7 @@
           FTy, ".__kmpc_global_dtor_.", FI, Loc);
       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
-                            SourceLocation());
+                            Loc, Loc);
       // Create a scope with an artificial location for the body of this function.
       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
       auto ArgVal = DtorCGF.EmitLoadOfScalar(
@@ -2352,7 +2479,7 @@
       FunctionArgList ArgList;
       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
-                            Loc);
+                            Loc, Loc);
       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
       InitCGF.FinishFunction();
       return InitFunction;
@@ -2666,21 +2793,24 @@
 static llvm::Value *emitCopyprivateCopyFunction(
     CodeGenModule &CGM, llvm::Type *ArgsType,
     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
-    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
+    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
+    SourceLocation Loc) {
   auto &C = CGM.getContext();
   // void copy_func(void *LHSArg, void *RHSArg);
   FunctionArgList Args;
-  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
-  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
+  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.copyprivate.copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
   // Dest = (void*[n])(LHSArg);
   // Src = (void*[n])(RHSArg);
   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -2771,7 +2901,7 @@
     // threads in the corresponding parallel region.
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
-        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
+        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
     auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
     Address CL =
       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
@@ -3282,11 +3412,13 @@
   Args.push_back(&DummyPtr);
 
   CodeGenFunction CGF(CGM);
+  // Disable debug info for global (de-)initializer because they are not part of
+  // some particular construct.
+  CGF.disableDebugInfo();
   auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto FTy = CGM.getTypes().GetFunctionType(FI);
-  auto *Fn =
-      CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
+  auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args);
   Codegen(CGF);
   CGF.FinishFunction();
   return Fn;
@@ -3294,7 +3426,6 @@
 
 llvm::Function *
 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
-
   // If we don't have entries or if we are emitting code for the device, we
   // don't need to do anything.
   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
@@ -3446,11 +3577,9 @@
   EntryInit.addInt(CGM.SizeTy, Size);
   EntryInit.addInt(CGM.Int32Ty, Flags);
   EntryInit.addInt(CGM.Int32Ty, 0);
-  llvm::GlobalVariable *Entry =
-    EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
-                                    Align,
-                                    /*constant*/ true,
-                                    llvm::GlobalValue::ExternalLinkage);
+  llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(
+      Twine(".omp_offloading.entry.") + Name, Align,
+      /*constant*/ true, llvm::GlobalValue::ExternalLinkage);
 
   // The entry has to be created in the section the linker expects it to be.
   Entry->setSection(".omp_offloading.entries");
@@ -3557,9 +3686,7 @@
   if (!MD)
     return;
 
-  for (auto I : MD->operands()) {
-    llvm::MDNode *MN = cast<llvm::MDNode>(I);
-
+  for (llvm::MDNode *MN : MD->operands()) {
     auto getMDInt = [&](unsigned Idx) {
       llvm::ConstantAsMetadata *V =
           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
@@ -3633,6 +3760,7 @@
     addFieldToRecordDecl(
         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
     RD->completeDefinition();
+    RD->addAttr(PackedAttr::CreateImplicit(C));
     TgtOffloadEntryQTy = C.getRecordType(RD);
   }
   return TgtOffloadEntryQTy;
@@ -3826,9 +3954,10 @@
   auto *TaskEntry =
       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
                              ".omp_task_entry.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
+                    Loc, Loc);
 
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
   // tt,
@@ -3852,7 +3981,7 @@
   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
+      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
       CGF.ConvertTypeForMem(SharedsPtrTy));
 
   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
@@ -3875,19 +4004,19 @@
   if (isOpenMPTaskLoopDirective(Kind)) {
     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
     auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
-    auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+    auto *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
     auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
-    auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+    auto *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
     auto StLVal = CGF.EmitLValueForField(Base, *StFI);
-    auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+    auto *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
     auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
-    auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+    auto *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
     auto RLVal = CGF.EmitLValueForField(Base, *RFI);
-    auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
+    auto *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
     CallArgs.push_back(LBParam);
     CallArgs.push_back(UBParam);
     CallArgs.push_back(StParam);
@@ -3919,19 +4048,17 @@
                                 ImplicitParamDecl::Other);
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
-  FunctionType::ExtInfo Info;
   auto &DestructorFnInfo =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
   auto *DestructorFn =
       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
                              ".omp_task_destructor.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
                                     DestructorFnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
-                    Args);
+                    Args, Loc, Loc);
 
   LValue Base = CGF.EmitLoadOfPointerLValue(
       CGF.GetAddrOfLocalVar(&TaskTypeArg),
@@ -4017,15 +4144,14 @@
   auto *TaskPrivatesMap = llvm::Function::Create(
       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
       ".omp_task_privates_map.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
                                     TaskPrivatesMapFnInfo);
   TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
   TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
-                    TaskPrivatesMapFnInfo, Args);
+                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
 
   // *privi = &.privates.privi;
   LValue Base = CGF.EmitLoadOfPointerLValue(
@@ -4046,9 +4172,9 @@
   return TaskPrivatesMap;
 }
 
-static int array_pod_sort_comparator(const PrivateDataTy *P1,
-                                     const PrivateDataTy *P2) {
-  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
+static bool stable_sort_comparator(const PrivateDataTy P1,
+                                   const PrivateDataTy P2) {
+  return P1.first > P2.first;
 }
 
 /// Emit initialization for private variables in task-based directives.
@@ -4062,15 +4188,25 @@
   auto &C = CGF.getContext();
   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
+  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
+                                 ? OMPD_taskloop
+                                 : OMPD_task;
+  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
+  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
   LValue SrcBase;
-  if (!Data.FirstprivateVars.empty()) {
+  bool IsTargetTask =
+      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
+      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
+  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
+  // PointersArray and SizesArray. The original variables for these arrays are
+  // not captured and we get their addresses explicitly.
+  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
+      (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
     SrcBase = CGF.MakeAddrLValue(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
         SharedsTy);
   }
-  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
-      cast<CapturedStmt>(*D.getAssociatedStmt()));
   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
   for (auto &&Pair : Privates) {
     auto *VD = Pair.second.PrivateCopy;
@@ -4080,21 +4216,34 @@
       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
       if (auto *Elem = Pair.second.PrivateElemInit) {
         auto *OriginalVD = Pair.second.Original;
-        auto *SharedField = CapturesInfo.lookup(OriginalVD);
-        auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
-        SharedRefLValue = CGF.MakeAddrLValue(
-            Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
-            SharedRefLValue.getType(),
-            LValueBaseInfo(AlignmentSource::Decl,
-                           SharedRefLValue.getBaseInfo().getMayAlias()),
-            CGF.CGM.getTBAAAccessInfo(SharedRefLValue.getType()));
+        // Check if the variable is the target-based BasePointersArray,
+        // PointersArray or SizesArray.
+        LValue SharedRefLValue;
         QualType Type = OriginalVD->getType();
+        auto *SharedField = CapturesInfo.lookup(OriginalVD);
+        if (IsTargetTask && !SharedField) {
+          assert(isa<ImplicitParamDecl>(OriginalVD) &&
+                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
+                 cast<CapturedDecl>(OriginalVD->getDeclContext())
+                         ->getNumParams() == 0 &&
+                 isa<TranslationUnitDecl>(
+                     cast<CapturedDecl>(OriginalVD->getDeclContext())
+                         ->getDeclContext()) &&
+                 "Expected artificial target data variable.");
+          SharedRefLValue =
+              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
+        } else {
+          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
+          SharedRefLValue = CGF.MakeAddrLValue(
+              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
+              SharedRefLValue.getTBAAInfo());
+        }
         if (Type->isArrayType()) {
           // Initialize firstprivate array.
           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
             // Perform simple memcpy.
-            CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
-                                    SharedRefLValue.getAddress(), Type);
+            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
           } else {
             // Initialize firstprivate array using element-by-element
             // initialization.
@@ -4183,10 +4332,10 @@
   auto *TaskDup =
       llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
                              ".omp_task_dup.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
+                    Loc);
 
   LValue TDBase = CGF.EmitLoadOfPointerLValue(
       CGF.GetAddrOfLocalVar(&DstArg),
@@ -4277,8 +4426,7 @@
                          /*PrivateElemInit=*/nullptr)));
     ++I;
   }
-  llvm::array_pod_sort(Privates.begin(), Privates.end(),
-                       array_pod_sort_comparator);
+  std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   // Build type kmp_routine_entry_t (if not built yet).
   emitKmpRoutineEntryT(KmpInt32Ty);
@@ -4290,8 +4438,10 @@
     }
     KmpTaskTQTy = SavedKmpTaskloopTQTy;
   } else {
-    assert(D.getDirectiveKind() == OMPD_task &&
-           "Expected taskloop or task directive");
+    assert((D.getDirectiveKind() == OMPD_task ||
+            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
+            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
+           "Expected taskloop, task or target directive");
     if (SavedKmpTaskTQTy.isNull()) {
       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
@@ -4385,7 +4535,9 @@
                                            KmpTaskTShareds)),
                     Loc),
                 CGF.getNaturalTypeAlignment(SharedsTy));
-    CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
+    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
+    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
+    CGF.EmitAggregateCopy(Dest, Src, SharedsTy);
   }
   // Emit initial values for private copies (if any).
   TaskResultTy Result;
@@ -4687,7 +4839,7 @@
       IfVal,
       LBLVal.getPointer(),
       UBLVal.getPointer(),
-      CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+      CGF.EmitLoadOfScalar(StLVal, Loc),
       llvm::ConstantInt::getNullValue(
           CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
       llvm::ConstantInt::getSigned(
@@ -4805,24 +4957,26 @@
 }
 
 llvm::Value *CGOpenMPRuntime::emitReductionFunction(
-    CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
-    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
-    ArrayRef<const Expr *> ReductionOps) {
+    CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
+    ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
+    ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
   auto &C = CGM.getContext();
 
   // void reduction_func(void *LHSArg, void *RHSArg);
   FunctionArgList Args;
-  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
-  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
+  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
   auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.reduction.reduction_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   // Dst = (void*[n])(LHSArg);
   // Src = (void*[n])(RHSArg);
@@ -5002,7 +5156,7 @@
       llvm::Value *Size = CGF.Builder.CreateIntCast(
           CGF.getVLASize(
                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .first,
+              .NumElts,
           CGF.SizeTy, /*isSigned=*/false);
       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
                               Elem);
@@ -5011,8 +5165,8 @@
 
   // 2. Emit reduce_func().
   auto *ReductionFn = emitReductionFunction(
-      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
-      LHSExprs, RHSExprs, ReductionOps);
+      CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+      Privates, LHSExprs, RHSExprs, ReductionOps);
 
   // 3. Create static kmp_critical_name lock = { 0 };
   auto *Lock = getCriticalRegionLock(".reduction");
@@ -5205,12 +5359,19 @@
 }
 
 /// Generates unique name for artificial threadprivate variables.
-/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
-static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
-                                      unsigned N) {
+/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
+static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
+                                      const Expr *Ref) {
   SmallString<256> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
-  Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+  const clang::DeclRefExpr *DE;
+  const VarDecl *D = ::getBaseDecl(Ref, DE);
+  if (!D)
+    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
+  D = D->getCanonicalDecl();
+  Out << Prefix << "."
+      << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D))
+      << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding();
   return Out.str();
 }
 
@@ -5227,17 +5388,17 @@
                                            ReductionCodeGen &RCG, unsigned N) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                          ImplicitParamDecl::Other);
   Args.emplace_back(&Param);
   auto &FnInfo =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
                                     ".red_init.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
   Address PrivateAddr = CGF.EmitLoadOfPointer(
       CGF.GetAddrOfLocalVar(&Param),
       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
@@ -5247,10 +5408,9 @@
   if (RCG.getSizes(N).second) {
     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
         CGF, CGM.getContext().getSizeType(),
-        generateUniqueName("reduction_size", Loc, N));
-    Size =
-        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
-                             CGM.getContext().getSizeType(), SourceLocation());
+        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                                CGM.getContext().getSizeType(), Loc);
   }
   RCG.emitAggregateType(CGF, N, Size);
   LValue SharedLVal;
@@ -5261,7 +5421,7 @@
     Address SharedAddr =
         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
             CGF, CGM.getContext().VoidPtrTy,
-            generateUniqueName("reduction", Loc, N));
+            generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
   } else {
     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
@@ -5297,8 +5457,10 @@
   auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
   auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
   FunctionArgList Args;
-  ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
-  ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                               C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                            ImplicitParamDecl::Other);
   Args.emplace_back(&ParamInOut);
   Args.emplace_back(&ParamIn);
   auto &FnInfo =
@@ -5306,20 +5468,18 @@
   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
                                     ".red_comb.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
   llvm::Value *Size = nullptr;
   // If the size of the reduction item is non-constant, load it from global
   // threadprivate variable.
   if (RCG.getSizes(N).second) {
     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
         CGF, CGM.getContext().getSizeType(),
-        generateUniqueName("reduction_size", Loc, N));
-    Size =
-        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
-                             CGM.getContext().getSizeType(), SourceLocation());
+        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                                CGM.getContext().getSizeType(), Loc);
   }
   RCG.emitAggregateType(CGF, N, Size);
   // Remap lhs and rhs variables to the addresses of the function arguments.
@@ -5368,17 +5528,17 @@
     return nullptr;
   auto &C = CGM.getContext();
   FunctionArgList Args;
-  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                          ImplicitParamDecl::Other);
   Args.emplace_back(&Param);
   auto &FnInfo =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
                                     ".red_fini.", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
   CodeGenFunction CGF(CGM);
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
   Address PrivateAddr = CGF.EmitLoadOfPointer(
       CGF.GetAddrOfLocalVar(&Param),
       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
@@ -5388,10 +5548,9 @@
   if (RCG.getSizes(N).second) {
     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
         CGF, CGM.getContext().getSizeType(),
-        generateUniqueName("reduction_size", Loc, N));
-    Size =
-        CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
-                             CGM.getContext().getSizeType(), SourceLocation());
+        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
+    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+                                CGM.getContext().getSizeType(), Loc);
   }
   RCG.emitAggregateType(CGF, N, Size);
   // Emit the finalizer body:
@@ -5518,14 +5677,14 @@
                                                      /*isSigned=*/false);
     Address SizeAddr = getAddrOfArtificialThreadPrivate(
         CGF, CGM.getContext().getSizeType(),
-        generateUniqueName("reduction_size", Loc, N));
+        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
   }
   // Store address of the original reduction item if custom initializer is used.
   if (RCG.usesReductionInitializer(N)) {
     Address SharedAddr = getAddrOfArtificialThreadPrivate(
         CGF, CGM.getContext().VoidPtrTy,
-        generateUniqueName("reduction", Loc, N));
+        generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
     CGF.Builder.CreateStore(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
@@ -5738,7 +5897,7 @@
        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
   }
 
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
 
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
@@ -5765,6 +5924,7 @@
   if (CGM.getLangOpts().OpenMPIsDevice) {
     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    OutlinedFn->setDSOLocal(false);
   } else
     OutlinedFnID = new llvm::GlobalVariable(
         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
@@ -5831,23 +5991,23 @@
   // the expression is captured in the enclosing target environment when the
   // teams directive is not combined with target.
 
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
 
-  // FIXME: Accommodate other combined directives with teams when they become
-  // available.
-  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
           ignoreCompoundStmts(CS.getCapturedStmt()))) {
-    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
-      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
-      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-      llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
-      return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
-                               /*IsSigned=*/true);
-    }
+    if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
+      if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+        CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+        llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+        return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+                                 /*IsSigned=*/true);
+      }
 
-    // If we have an enclosed teams directive but no num_teams clause we use
-    // the default value 0.
-    return Bld.getInt32(0);
+      // If we have an enclosed teams directive but no num_teams clause we use
+      // the default value 0.
+      return Bld.getInt32(0);
+    }
   }
 
   // No teams associated with the directive.
@@ -5934,23 +6094,23 @@
   // the expression is captured in the enclosing target environment when the
   // teams directive is not combined with target.
 
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
 
-  // FIXME: Accommodate other combined directives with teams when they become
-  // available.
-  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
           ignoreCompoundStmts(CS.getCapturedStmt()))) {
-    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
-      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
-      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
-      return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
-                                       /*IsSigned=*/true);
-    }
+    if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
+      if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+        CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+        llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+        return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+                                         /*IsSigned=*/true);
+      }
 
-    // If we have an enclosed teams directive but no thread_limit clause we use
-    // the default value 0.
-    return CGF.Builder.getInt32(0);
+      // If we have an enclosed teams directive but no thread_limit clause we
+      // use the default value 0.
+      return CGF.Builder.getInt32(0);
+    }
   }
 
   // No teams associated with the directive.
@@ -5977,22 +6137,21 @@
     /// \brief Delete the element from the device environment, ignoring the
     /// current reference count associated with the element.
     OMP_MAP_DELETE = 0x08,
-    /// \brief The element being mapped is a pointer, therefore the pointee
-    /// should be mapped as well.
-    OMP_MAP_IS_PTR = 0x10,
-    /// \brief This flags signals that an argument is the first one relating to
-    /// a map/private clause expression. For some cases a single
-    /// map/privatization results in multiple arguments passed to the runtime
-    /// library.
-    OMP_MAP_FIRST_REF = 0x20,
+    /// \brief The element being mapped is a pointer-pointee pair; both the
+    /// pointer and the pointee should be mapped.
+    OMP_MAP_PTR_AND_OBJ = 0x10,
+    /// \brief This flags signals that the base address of an entry should be
+    /// passed to the target kernel as an argument.
+    OMP_MAP_TARGET_PARAM = 0x20,
     /// \brief Signal that the runtime library has to return the device pointer
-    /// in the current position for the data being mapped.
-    OMP_MAP_RETURN_PTR = 0x40,
+    /// in the current position for the data being mapped. Used when we have the
+    /// use_device_ptr clause.
+    OMP_MAP_RETURN_PARAM = 0x40,
     /// \brief This flag signals that the reference being passed is a pointer to
     /// private data.
-    OMP_MAP_PRIVATE_PTR = 0x80,
+    OMP_MAP_PRIVATE = 0x80,
     /// \brief Pass the element to the device by value.
-    OMP_MAP_PRIVATE_VAL = 0x100,
+    OMP_MAP_LITERAL = 0x100,
     /// Implicit map
     OMP_MAP_IMPLICIT = 0x200,
   };
@@ -6016,7 +6175,7 @@
 
   typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
   typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
-  typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
+  typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
 
 private:
   /// \brief Directive from where the map clauses were extracted.
@@ -6027,6 +6186,8 @@
 
   /// \brief Set of all first private variables in the current directive.
   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
+  /// Set of all reduction variables in the current directive.
+  llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
 
   /// Map between device pointer declarations and their expression components.
   /// The key value for declarations in 'this' is null.
@@ -6081,10 +6242,10 @@
   /// a flag marking the map as a pointer if requested. Add a flag marking the
   /// map as the first one of a series of maps that relate to the same map
   /// expression.
-  unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
+  uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
                           OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
-                          bool AddIsFirstFlag) const {
-    unsigned Bits = 0u;
+                          bool AddIsTargetParamFlag) const {
+    uint64_t Bits = 0u;
     switch (MapType) {
     case OMPC_MAP_alloc:
     case OMPC_MAP_release:
@@ -6110,9 +6271,9 @@
       break;
     }
     if (AddPtrFlag)
-      Bits |= OMP_MAP_IS_PTR;
-    if (AddIsFirstFlag)
-      Bits |= OMP_MAP_FIRST_REF;
+      Bits |= OMP_MAP_PTR_AND_OBJ;
+    if (AddIsTargetParamFlag)
+      Bits |= OMP_MAP_TARGET_PARAM;
     if (MapTypeModifier == OMPC_MAP_always)
       Bits |= OMP_MAP_ALWAYS;
     return Bits;
@@ -6219,28 +6380,28 @@
     //
     // map(s.p[:22], s.a s.b)
     // &s, &(s.p), sizeof(double*), noflags
-    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
     //
     // map(s.ps)
     // &s, &(s.ps), sizeof(S2*), noflags
     //
     // map(s.ps->s.i)
     // &s, &(s.ps), sizeof(S2*), noflags
-    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
+    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
     //
     // map(s.ps->ps)
     // &s, &(s.ps), sizeof(S2*), noflags
-    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
     //
     // map(s.ps->ps->ps)
     // &s, &(s.ps), sizeof(S2*), noflags
-    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
-    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
+    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
     //
     // map(s.ps->ps->s.f[:22])
     // &s, &(s.ps), sizeof(S2*), noflags
-    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
-    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
+    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
     //
     // map(ps)
     // &ps, &ps, sizeof(S2*), noflags
@@ -6256,29 +6417,28 @@
     //
     // map(ps->p[:22])
     // ps, &(ps->p), sizeof(double*), noflags
-    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
     //
     // map(ps->ps)
     // ps, &(ps->ps), sizeof(S2*), noflags
     //
     // map(ps->ps->s.i)
     // ps, &(ps->ps), sizeof(S2*), noflags
-    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
+    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
     //
     // map(ps->ps->ps)
     // ps, &(ps->ps), sizeof(S2*), noflags
-    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
     //
     // map(ps->ps->ps->ps)
     // ps, &(ps->ps), sizeof(S2*), noflags
-    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
-    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
+    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
     //
     // map(ps->ps->ps->s.f[:22])
     // ps, &(ps->ps), sizeof(S2*), noflags
-    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
-    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
-    // extra_flag
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
+    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
 
     // Track if the map information being generated is the first for a capture.
     bool IsCaptureFirstInfo = IsFirstComponentList;
@@ -6319,7 +6479,7 @@
       }
     }
 
-    unsigned DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
+    uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
     for (; I != CE; ++I) {
       auto Next = std::next(I);
 
@@ -6418,8 +6578,14 @@
     // 'private ptr' and 'map to' flag. Return the right flags if the captured
     // declaration is known as first-private in this handler.
     if (FirstPrivateDecls.count(Cap.getCapturedVar()))
-      return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
+      return MappableExprsHandler::OMP_MAP_PRIVATE |
              MappableExprsHandler::OMP_MAP_TO;
+    // Reduction variable  will use only the 'private ptr' and 'map to_from'
+    // flag.
+    if (ReductionDecls.count(Cap.getCapturedVar())) {
+      return MappableExprsHandler::OMP_MAP_TO |
+             MappableExprsHandler::OMP_MAP_FROM;
+    }
 
     // We didn't modify anything.
     return CurrentModifiers;
@@ -6433,6 +6599,12 @@
       for (const auto *D : C->varlists())
         FirstPrivateDecls.insert(
             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+    for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
+      for (const auto *D : C->varlists()) {
+        ReductionDecls.insert(
+            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+      }
+    }
     // Extract device pointer clause information.
     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
       for (auto L : C->component_lists())
@@ -6552,14 +6724,12 @@
         // We didn't find any match in our map information - generate a zero
         // size array section.
         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
-        llvm::Value *Ptr =
-            this->CGF
-                .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
-                .getScalarVal();
+        llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE),
+                                                      IE->getExprLoc());
         BasePointers.push_back({Ptr, VD});
         Pointers.push_back(Ptr);
         Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
-        Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
+        Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
       }
 
     for (auto &M : Info) {
@@ -6597,7 +6767,7 @@
                  "No relevant declaration related with device pointer??");
 
           BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
-          Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
+          Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
         }
         IsFirstComponentList = false;
       }
@@ -6627,7 +6797,7 @@
     const ValueDecl *VD =
         Cap->capturesThis()
             ? nullptr
-            : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
+            : Cap->getCapturedVar()->getCanonicalDecl();
 
     // If this declaration appears in a is_device_ptr clause we just have to
     // pass the pointer by value. If it is a reference to a declaration, we just
@@ -6649,7 +6819,7 @@
       BasePointers.push_back({Arg, VD});
       Pointers.push_back(Arg);
       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
-      Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
+      Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
       return;
     }
 
@@ -6692,7 +6862,7 @@
       if (!RI.getType()->isAnyPointerType()) {
         // We have to signal to the runtime captures passed by value that are
         // not pointers.
-        CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
+        CurMapTypes.push_back(OMP_MAP_LITERAL);
         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
       } else {
         // Pointers are implicitly mapped with a zero size and no flags
@@ -6712,19 +6882,12 @@
       // The default map type for a scalar/complex type is 'to' because by
       // default the value doesn't have to be retrieved. For an aggregate
       // type, the default is 'tofrom'.
-      CurMapTypes.push_back(ElementType->isAggregateType()
-                                ? (OMP_MAP_TO | OMP_MAP_FROM)
-                                : OMP_MAP_TO);
-
-      // If we have a capture by reference we may need to add the private
-      // pointer flag if the base declaration shows in some first-private
-      // clause.
-      CurMapTypes.back() =
-          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
+      CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
+          CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
+                                             : OMP_MAP_TO));
     }
-    // Every default map produces a single argument, so, it is always the
-    // first one.
-    CurMapTypes.back() |= OMP_MAP_FIRST_REF;
+    // Every default map produces a single argument which is a target parameter.
+    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
   }
 };
 
@@ -6867,7 +7030,7 @@
         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
         /*Idx0=*/0, /*Idx1=*/0);
     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
-        llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
+        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
         Info.MapTypesArray,
         /*Idx0=*/0,
         /*Idx1=*/0);
@@ -6876,7 +7039,7 @@
     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
     MapTypesArrayArg =
-        llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
+        llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
   }
 }
 
@@ -6884,86 +7047,27 @@
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
                                      llvm::Value *OutlinedFnID,
-                                     const Expr *IfCond, const Expr *Device,
-                                     ArrayRef<llvm::Value *> CapturedVars) {
+                                     const Expr *IfCond, const Expr *Device) {
   if (!CGF.HaveInsertPoint())
     return;
 
   assert(OutlinedFn && "Invalid outlined function!");
 
-  // Fill up the arrays with all the captured variables.
-  MappableExprsHandler::MapValuesArrayTy KernelArgs;
-  MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-  MappableExprsHandler::MapValuesArrayTy Pointers;
-  MappableExprsHandler::MapValuesArrayTy Sizes;
-  MappableExprsHandler::MapFlagsArrayTy MapTypes;
+  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
+  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+  };
+  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
 
-  MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
-  MappableExprsHandler::MapValuesArrayTy CurPointers;
-  MappableExprsHandler::MapValuesArrayTy CurSizes;
-  MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
-
-  // Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
-  auto RI = CS.getCapturedRecordDecl()->field_begin();
-  auto CV = CapturedVars.begin();
-  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
-                                            CE = CS.capture_end();
-       CI != CE; ++CI, ++RI, ++CV) {
-    CurBasePointers.clear();
-    CurPointers.clear();
-    CurSizes.clear();
-    CurMapTypes.clear();
-
-    // VLA sizes are passed to the outlined region by copy and do not have map
-    // information associated.
-    if (CI->capturesVariableArrayType()) {
-      CurBasePointers.push_back(*CV);
-      CurPointers.push_back(*CV);
-      CurSizes.push_back(CGF.getTypeSize(RI->getType()));
-      // Copy to the device as an argument. No need to retrieve it.
-      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
-                            MappableExprsHandler::OMP_MAP_FIRST_REF);
-    } else {
-      // If we have any information in the map clause, we use it, otherwise we
-      // just do a default mapping.
-      MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
-                                       CurSizes, CurMapTypes);
-      if (CurBasePointers.empty())
-        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
-                                         CurPointers, CurSizes, CurMapTypes);
-    }
-    // We expect to have at least an element of information for this capture.
-    assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
-    assert(CurBasePointers.size() == CurPointers.size() &&
-           CurBasePointers.size() == CurSizes.size() &&
-           CurBasePointers.size() == CurMapTypes.size() &&
-           "Inconsistent map information sizes!");
-
-    // The kernel args are always the first elements of the base pointers
-    // associated with a capture.
-    KernelArgs.push_back(*CurBasePointers.front());
-    // We need to append the results of this capture to what we already have.
-    BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-    Pointers.append(CurPointers.begin(), CurPointers.end());
-    Sizes.append(CurSizes.begin(), CurSizes.end());
-    MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
-  }
-
+  CodeGenFunction::OMPTargetDataInfo InputInfo;
+  llvm::Value *MapTypesArray = nullptr;
   // Fill up the pointer arrays and transfer execution to the device.
-  auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
-                    OutlinedFn, OutlinedFnID, &D,
-                    &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
-    auto &RT = CGF.CGM.getOpenMPRuntime();
-    // Emit the offloading arrays.
-    TargetDataInfo Info;
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
-    emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
-                                 Info.PointersArray, Info.SizesArray,
-                                 Info.MapTypesArray, Info);
-
+  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
+                    &MapTypesArray, &CS, RequiresOuterTask,
+                    &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
     // On top of the arrays that were filled up, the target offloading call
     // takes as arguments the device id as well as the host pointer. The host
     // pointer is used by the runtime library to identify the current target
@@ -6978,21 +7082,24 @@
 
     // Emit device ID if any.
     llvm::Value *DeviceID;
-    if (Device)
+    if (Device) {
       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGF.Int32Ty, /*isSigned=*/true);
-    else
-      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+                                           CGF.Int64Ty, /*isSigned=*/true);
+    } else {
+      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+    }
 
     // Emit the number of elements in the offloading arrays.
-    llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+    llvm::Value *PointerNum =
+        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
 
     // Return value of the runtime offloading call.
     llvm::Value *Return;
 
-    auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
-    auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
+    auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
+    auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
 
+    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
     // The target region is an outlined function launched by the runtime
     // via calls __tgt_target() or __tgt_target_teams().
     //
@@ -7028,22 +7135,31 @@
       // passed to the runtime library - a 32-bit integer with the value zero.
       assert(NumThreads && "Thread limit expression should be available along "
                            "with number of teams.");
-      llvm::Value *OffloadingArgs[] = {
-          DeviceID,           OutlinedFnID,
-          PointerNum,         Info.BasePointersArray,
-          Info.PointersArray, Info.SizesArray,
-          Info.MapTypesArray, NumTeams,
-          NumThreads};
+      llvm::Value *OffloadingArgs[] = {DeviceID,
+                                       OutlinedFnID,
+                                       PointerNum,
+                                       InputInfo.BasePointersArray.getPointer(),
+                                       InputInfo.PointersArray.getPointer(),
+                                       InputInfo.SizesArray.getPointer(),
+                                       MapTypesArray,
+                                       NumTeams,
+                                       NumThreads};
       Return = CGF.EmitRuntimeCall(
-          RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
+                                          : OMPRTL__tgt_target_teams),
+          OffloadingArgs);
     } else {
-      llvm::Value *OffloadingArgs[] = {
-          DeviceID,           OutlinedFnID,
-          PointerNum,         Info.BasePointersArray,
-          Info.PointersArray, Info.SizesArray,
-          Info.MapTypesArray};
-      Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
-                                   OffloadingArgs);
+      llvm::Value *OffloadingArgs[] = {DeviceID,
+                                       OutlinedFnID,
+                                       PointerNum,
+                                       InputInfo.BasePointersArray.getPointer(),
+                                       InputInfo.PointersArray.getPointer(),
+                                       InputInfo.SizesArray.getPointer(),
+                                       MapTypesArray};
+      Return = CGF.EmitRuntimeCall(
+          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
+                                          : OMPRTL__tgt_target),
+          OffloadingArgs);
     }
 
     // Check the error code and execute the host version if required.
@@ -7055,17 +7171,114 @@
     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
 
     CGF.EmitBlock(OffloadFailedBlock);
-    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
+    if (RequiresOuterTask) {
+      CapturedVars.clear();
+      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+    }
+    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
     CGF.EmitBranch(OffloadContBlock);
 
     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
   };
 
   // Notify that the host version must be executed.
-  auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
-                                                      PrePostActionTy &) {
-    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
-                             KernelArgs);
+  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
+                    RequiresOuterTask](CodeGenFunction &CGF,
+                                       PrePostActionTy &) {
+    if (RequiresOuterTask) {
+      CapturedVars.clear();
+      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+    }
+    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+  };
+
+  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
+                          &CapturedVars, RequiresOuterTask,
+                          &CS](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Fill up the arrays with all the captured variables.
+    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
+    MappableExprsHandler::MapValuesArrayTy Pointers;
+    MappableExprsHandler::MapValuesArrayTy Sizes;
+    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+    MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
+    MappableExprsHandler::MapValuesArrayTy CurPointers;
+    MappableExprsHandler::MapValuesArrayTy CurSizes;
+    MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+
+    // Get mappable expression information.
+    MappableExprsHandler MEHandler(D, CGF);
+
+    auto RI = CS.getCapturedRecordDecl()->field_begin();
+    auto CV = CapturedVars.begin();
+    for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+                                              CE = CS.capture_end();
+         CI != CE; ++CI, ++RI, ++CV) {
+      CurBasePointers.clear();
+      CurPointers.clear();
+      CurSizes.clear();
+      CurMapTypes.clear();
+
+      // VLA sizes are passed to the outlined region by copy and do not have map
+      // information associated.
+      if (CI->capturesVariableArrayType()) {
+        CurBasePointers.push_back(*CV);
+        CurPointers.push_back(*CV);
+        CurSizes.push_back(CGF.getTypeSize(RI->getType()));
+        // Copy to the device as an argument. No need to retrieve it.
+        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+                              MappableExprsHandler::OMP_MAP_TARGET_PARAM);
+      } else {
+        // If we have any information in the map clause, we use it, otherwise we
+        // just do a default mapping.
+        MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
+                                         CurSizes, CurMapTypes);
+        if (CurBasePointers.empty())
+          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
+                                           CurPointers, CurSizes, CurMapTypes);
+      }
+      // We expect to have at least an element of information for this capture.
+      assert(!CurBasePointers.empty() &&
+             "Non-existing map pointer for capture!");
+      assert(CurBasePointers.size() == CurPointers.size() &&
+             CurBasePointers.size() == CurSizes.size() &&
+             CurBasePointers.size() == CurMapTypes.size() &&
+             "Inconsistent map information sizes!");
+
+      // We need to append the results of this capture to what we already have.
+      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+      Pointers.append(CurPointers.begin(), CurPointers.end());
+      Sizes.append(CurSizes.begin(), CurSizes.end());
+      MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
+    }
+
+    TargetDataInfo Info;
+    // Fill up the arrays and create the arguments.
+    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
+                                 Info.PointersArray, Info.SizesArray,
+                                 Info.MapTypesArray, Info);
+    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+    InputInfo.BasePointersArray =
+        Address(Info.BasePointersArray, CGM.getPointerAlign());
+    InputInfo.PointersArray =
+        Address(Info.PointersArray, CGM.getPointerAlign());
+    InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
+    MapTypesArray = Info.MapTypesArray;
+    if (RequiresOuterTask)
+      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+    else
+      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+  };
+
+  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
+                             CodeGenFunction &CGF, PrePostActionTy &) {
+    if (RequiresOuterTask) {
+      CodeGenFunction::OMPTargetDataInfo InputInfo;
+      CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
+    } else {
+      emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
+    }
   };
 
   // If we have a target function ID it means that we need to support
@@ -7073,14 +7286,14 @@
   // regardless of the conditional in the if clause if, e.g., the user do not
   // specify target triples.
   if (OutlinedFnID) {
-    if (IfCond)
-      emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-    else {
-      RegionCodeGenTy ThenRCG(ThenGen);
+    if (IfCond) {
+      emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
+    } else {
+      RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
     }
   } else {
-    RegionCodeGenTy ElseRCG(ElseGen);
+    RegionCodeGenTy ElseRCG(TargetElseGen);
     ElseRCG(CGF);
   }
 }
@@ -7123,6 +7336,37 @@
       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
           CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
       break;
+    case Stmt::OMPTargetTeamsDistributeDirectiveClass:
+      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
+          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
+      break;
+    case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
+      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
+          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
+      break;
+    case Stmt::OMPTargetParallelForDirectiveClass:
+      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
+          CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
+      break;
+    case Stmt::OMPTargetParallelForSimdDirectiveClass:
+      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
+          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
+      break;
+    case Stmt::OMPTargetSimdDirectiveClass:
+      CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
+          CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
+      break;
+    case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
+      CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+          CGM, ParentName,
+          cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
+      break;
+    case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+      CodeGenFunction::
+          EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+              CGM, ParentName,
+              cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
+      break;
     default:
       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
     }
@@ -7130,12 +7374,11 @@
   }
 
   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
-    if (!E->hasAssociatedStmt())
+    if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
       return;
 
     scanForTargetRegionsFunctions(
-        cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
-        ParentName);
+        E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
     return;
   }
 
@@ -7276,8 +7519,8 @@
   // Generate the code for the opening of the data environment. Capture all the
   // arguments of the runtime call by reference because they are used in the
   // closing of the region.
-  auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
-                                                      PrePostActionTy &) {
+  auto &&BeginThenGen = [this, &D, Device, &Info,
+                         &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
     MappableExprsHandler::MapValuesArrayTy Pointers;
@@ -7300,11 +7543,12 @@
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
-    if (Device)
+    if (Device) {
       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGF.Int32Ty, /*isSigned=*/true);
-    else
-      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+                                           CGF.Int64Ty, /*isSigned=*/true);
+    } else {
+      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+    }
 
     // Emit the number of elements in the offloading arrays.
     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
@@ -7312,8 +7556,7 @@
     llvm::Value *OffloadingArgs[] = {
         DeviceID,         PointerNum,    BasePointersArrayArg,
         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    auto &RT = CGF.CGM.getOpenMPRuntime();
-    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
                         OffloadingArgs);
 
     // If device pointer privatization is required, emit the body of the region
@@ -7323,7 +7566,8 @@
   };
 
   // Generate code for the closing of the data region.
-  auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
     assert(Info.isValid() && "Invalid data environment closing arguments.");
 
     llvm::Value *BasePointersArrayArg = nullptr;
@@ -7335,11 +7579,12 @@
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
-    if (Device)
+    if (Device) {
       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGF.Int32Ty, /*isSigned=*/true);
-    else
-      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+                                           CGF.Int64Ty, /*isSigned=*/true);
+    } else {
+      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+    }
 
     // Emit the number of elements in the offloading arrays.
     auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
@@ -7347,8 +7592,7 @@
     llvm::Value *OffloadingArgs[] = {
         DeviceID,         PointerNum,    BasePointersArrayArg,
         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    auto &RT = CGF.CGM.getOpenMPRuntime();
-    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
                         OffloadingArgs);
   };
 
@@ -7400,8 +7644,57 @@
           isa<OMPTargetUpdateDirective>(D)) &&
          "Expecting either target enter, exit data, or update directives.");
 
+  CodeGenFunction::OMPTargetDataInfo InputInfo;
+  llvm::Value *MapTypesArray = nullptr;
   // Generate the code for the opening of the data environment.
-  auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&ThenGen = [this, &D, Device, &InputInfo,
+                    &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device) {
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int64Ty, /*isSigned=*/true);
+    } else {
+      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+    }
+
+    // Emit the number of elements in the offloading arrays.
+    llvm::Constant *PointerNum =
+        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
+
+    llvm::Value *OffloadingArgs[] = {DeviceID,
+                                     PointerNum,
+                                     InputInfo.BasePointersArray.getPointer(),
+                                     InputInfo.PointersArray.getPointer(),
+                                     InputInfo.SizesArray.getPointer(),
+                                     MapTypesArray};
+
+    // Select the right runtime function call for each expected standalone
+    // directive.
+    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
+    OpenMPRTLFunction RTLFn;
+    switch (D.getDirectiveKind()) {
+    default:
+      llvm_unreachable("Unexpected standalone target data directive.");
+      break;
+    case OMPD_target_enter_data:
+      RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
+                        : OMPRTL__tgt_target_data_begin;
+      break;
+    case OMPD_target_exit_data:
+      RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
+                        : OMPRTL__tgt_target_data_end;
+      break;
+    case OMPD_target_update:
+      RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
+                        : OMPRTL__tgt_target_data_update;
+      break;
+    }
+    CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
+  };
+
+  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
+                             CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
     MappableExprsHandler::MapValuesArrayTy Pointers;
@@ -7412,58 +7705,32 @@
     MappableExprsHandler MEHandler(D, CGF);
     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
 
-    // Fill up the arrays and create the arguments.
     TargetDataInfo Info;
+    // Fill up the arrays and create the arguments.
     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
                                  Info.PointersArray, Info.SizesArray,
                                  Info.MapTypesArray, Info);
-
-    // Emit device ID if any.
-    llvm::Value *DeviceID = nullptr;
-    if (Device)
-      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGF.Int32Ty, /*isSigned=*/true);
+    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+    InputInfo.BasePointersArray =
+        Address(Info.BasePointersArray, CGM.getPointerAlign());
+    InputInfo.PointersArray =
+        Address(Info.PointersArray, CGM.getPointerAlign());
+    InputInfo.SizesArray =
+        Address(Info.SizesArray, CGM.getPointerAlign());
+    MapTypesArray = Info.MapTypesArray;
+    if (D.hasClausesOfKind<OMPDependClause>())
+      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
     else
-      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
-
-    // Emit the number of elements in the offloading arrays.
-    auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
-
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,           PointerNum,      Info.BasePointersArray,
-        Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
-
-    auto &RT = CGF.CGM.getOpenMPRuntime();
-    // Select the right runtime function call for each expected standalone
-    // directive.
-    OpenMPRTLFunction RTLFn;
-    switch (D.getDirectiveKind()) {
-    default:
-      llvm_unreachable("Unexpected standalone target data directive.");
-      break;
-    case OMPD_target_enter_data:
-      RTLFn = OMPRTL__tgt_target_data_begin;
-      break;
-    case OMPD_target_exit_data:
-      RTLFn = OMPRTL__tgt_target_data_end;
-      break;
-    case OMPD_target_update:
-      RTLFn = OMPRTL__tgt_target_data_update;
-      break;
-    }
-    CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
+      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
   };
 
-  // In the event we get an if clause, we don't have to take any action on the
-  // else side.
-  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
-
-  if (IfCond) {
-    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-  } else {
-    RegionCodeGenTy ThenGenRCG(ThenGen);
-    ThenGenRCG(CGF);
+  if (IfCond)
+    emitOMPIfClause(CGF, IfCond, TargetThenGen,
+                    [](CodeGenFunction &CGF, PrePostActionTy &) {});
+  else {
+    RegionCodeGenTy ThenRCG(TargetThenGen);
+    ThenRCG(CGF);
   }
 }
 
@@ -7605,7 +7872,7 @@
 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
                                               llvm::Function *Fn) {
   ASTContext &C = CGM.getContext();
-  FD = FD->getCanonicalDecl();
+  FD = FD->getMostRecentDecl();
   // Map params to their positions in function decl.
   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
   if (isa<CXXMethodDecl>(FD))
@@ -7615,80 +7882,84 @@
     ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
     ++ParamPos;
   }
-  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
-    llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
-    // Mark uniform parameters.
-    for (auto *E : Attr->uniforms()) {
-      E = E->IgnoreParenImpCasts();
-      unsigned Pos;
-      if (isa<CXXThisExpr>(E))
-        Pos = ParamPositions[FD];
-      else {
-        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
-                        ->getCanonicalDecl();
-        Pos = ParamPositions[PVD];
+  while (FD) {
+    for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
+      llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
+      // Mark uniform parameters.
+      for (auto *E : Attr->uniforms()) {
+        E = E->IgnoreParenImpCasts();
+        unsigned Pos;
+        if (isa<CXXThisExpr>(E))
+          Pos = ParamPositions[FD];
+        else {
+          auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                          ->getCanonicalDecl();
+          Pos = ParamPositions[PVD];
+        }
+        ParamAttrs[Pos].Kind = Uniform;
       }
-      ParamAttrs[Pos].Kind = Uniform;
-    }
-    // Get alignment info.
-    auto NI = Attr->alignments_begin();
-    for (auto *E : Attr->aligneds()) {
-      E = E->IgnoreParenImpCasts();
-      unsigned Pos;
-      QualType ParmTy;
-      if (isa<CXXThisExpr>(E)) {
-        Pos = ParamPositions[FD];
-        ParmTy = E->getType();
-      } else {
-        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
-                        ->getCanonicalDecl();
-        Pos = ParamPositions[PVD];
-        ParmTy = PVD->getType();
-      }
-      ParamAttrs[Pos].Alignment =
-          (*NI) ? (*NI)->EvaluateKnownConstInt(C)
+      // Get alignment info.
+      auto NI = Attr->alignments_begin();
+      for (auto *E : Attr->aligneds()) {
+        E = E->IgnoreParenImpCasts();
+        unsigned Pos;
+        QualType ParmTy;
+        if (isa<CXXThisExpr>(E)) {
+          Pos = ParamPositions[FD];
+          ParmTy = E->getType();
+        } else {
+          auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                          ->getCanonicalDecl();
+          Pos = ParamPositions[PVD];
+          ParmTy = PVD->getType();
+        }
+        ParamAttrs[Pos].Alignment =
+            (*NI)
+                ? (*NI)->EvaluateKnownConstInt(C)
                 : llvm::APSInt::getUnsigned(
                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
                           .getQuantity());
-      ++NI;
-    }
-    // Mark linear parameters.
-    auto SI = Attr->steps_begin();
-    auto MI = Attr->modifiers_begin();
-    for (auto *E : Attr->linears()) {
-      E = E->IgnoreParenImpCasts();
-      unsigned Pos;
-      if (isa<CXXThisExpr>(E))
-        Pos = ParamPositions[FD];
-      else {
-        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
-                        ->getCanonicalDecl();
-        Pos = ParamPositions[PVD];
+        ++NI;
       }
-      auto &ParamAttr = ParamAttrs[Pos];
-      ParamAttr.Kind = Linear;
-      if (*SI) {
-        if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
-                                  Expr::SE_AllowSideEffects)) {
-          if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
-            if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
-              ParamAttr.Kind = LinearWithVarStride;
-              ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
-                  ParamPositions[StridePVD->getCanonicalDecl()]);
+      // Mark linear parameters.
+      auto SI = Attr->steps_begin();
+      auto MI = Attr->modifiers_begin();
+      for (auto *E : Attr->linears()) {
+        E = E->IgnoreParenImpCasts();
+        unsigned Pos;
+        if (isa<CXXThisExpr>(E))
+          Pos = ParamPositions[FD];
+        else {
+          auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                          ->getCanonicalDecl();
+          Pos = ParamPositions[PVD];
+        }
+        auto &ParamAttr = ParamAttrs[Pos];
+        ParamAttr.Kind = Linear;
+        if (*SI) {
+          if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
+                                    Expr::SE_AllowSideEffects)) {
+            if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
+              if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
+                ParamAttr.Kind = LinearWithVarStride;
+                ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
+                    ParamPositions[StridePVD->getCanonicalDecl()]);
+              }
             }
           }
         }
+        ++SI;
+        ++MI;
       }
-      ++SI;
-      ++MI;
+      llvm::APSInt VLENVal;
+      if (const Expr *VLEN = Attr->getSimdlen())
+        VLENVal = VLEN->EvaluateKnownConstInt(C);
+      OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
+      if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
+          CGM.getTriple().getArch() == llvm::Triple::x86_64)
+        emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
     }
-    llvm::APSInt VLENVal;
-    if (const Expr *VLEN = Attr->getSimdlen())
-      VLENVal = VLEN->EvaluateKnownConstInt(C);
-    OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
-    if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
-        CGM.getTriple().getArch() == llvm::Triple::x86_64)
-      emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
+    FD = FD->getPreviousDecl();
   }
 }
 
@@ -7799,9 +8070,10 @@
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
 
-void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
-                               ArrayRef<llvm::Value *> Args,
-                               SourceLocation Loc) const {
+void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
+                               llvm::Value *Callee,
+                               ArrayRef<llvm::Value *> Args) const {
+  assert(Loc.isValid() && "Outlined function call location must be valid.");
   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
 
   if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
@@ -7816,8 +8088,7 @@
 void CGOpenMPRuntime::emitOutlinedFunctionCall(
     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
     ArrayRef<llvm::Value *> Args) const {
-  assert(Loc.isValid() && "Outlined function call location must be valid.");
-  emitCall(CGF, OutlinedFn, Args, Loc);
+  emitCall(CGF, Loc, OutlinedFn, Args);
 }
 
 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
@@ -7825,3 +8096,298 @@
                                              const VarDecl *TargetParam) const {
   return CGF.GetAddrOfLocalVar(NativeParam);
 }
+
+llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+    bool Tied, unsigned &NumberOfParts) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
+                                           SourceLocation Loc,
+                                           llvm::Value *OutlinedFn,
+                                           ArrayRef<llvm::Value *> CapturedVars,
+                                           const Expr *IfCond) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCriticalRegion(
+    CodeGenFunction &CGF, StringRef CriticalName,
+    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+    const Expr *Hint) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
+                                           const RegionCodeGenTy &MasterOpGen,
+                                           SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
+                                            SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
+    CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
+    SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitSingleRegion(
+    CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
+    SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
+    ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
+    ArrayRef<const Expr *> AssignmentOps) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
+                                            const RegionCodeGenTy &OrderedOpGen,
+                                            SourceLocation Loc,
+                                            bool IsThreads) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
+                                          SourceLocation Loc,
+                                          OpenMPDirectiveKind Kind,
+                                          bool EmitChecks,
+                                          bool ForceSimpleCall) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForDispatchInit(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
+    bool Ordered, const DispatchRTInput &DispatchValues) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForStaticInit(
+    CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
+    const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
+                                                     SourceLocation Loc,
+                                                     unsigned IVSize,
+                                                     bool IVSigned) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
+                                              SourceLocation Loc,
+                                              OpenMPDirectiveKind DKind) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
+                                              SourceLocation Loc,
+                                              unsigned IVSize, bool IVSigned,
+                                              Address IL, Address LB,
+                                              Address UB, Address ST) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
+                                               llvm::Value *NumThreads,
+                                               SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
+                                             OpenMPProcBindClauseKind ProcBind,
+                                             SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                                    const VarDecl *VD,
+                                                    Address VDAddr,
+                                                    SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
+    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
+    CodeGenFunction *CGF) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
+    CodeGenFunction &CGF, QualType VarType, StringRef Name) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
+                                    ArrayRef<const Expr *> Vars,
+                                    SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                       const OMPExecutableDirective &D,
+                                       llvm::Value *TaskFunction,
+                                       QualType SharedsTy, Address Shareds,
+                                       const Expr *IfCond,
+                                       const OMPTaskDataTy &Data) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskLoopCall(
+    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+    llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
+    const Expr *IfCond, const OMPTaskDataTy &Data) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitReduction(
+    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+  assert(Options.SimpleReduction && "Only simple reduction is expected.");
+  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
+                                 ReductionOps, Options);
+}
+
+llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
+    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
+    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+                                                  SourceLocation Loc,
+                                                  ReductionCodeGen &RCG,
+                                                  unsigned N) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+                                                  SourceLocation Loc,
+                                                  llvm::Value *ReductionsPtr,
+                                                  LValue SharedLVal) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
+                                           SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCancellationPointCall(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    OpenMPDirectiveKind CancelRegion) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
+                                         SourceLocation Loc, const Expr *IfCond,
+                                         OpenMPDirectiveKind CancelRegion) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         llvm::Value *OutlinedFn,
+                                         llvm::Value *OutlinedFnID,
+                                         const Expr *IfCond, const Expr *Device) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
+  return false;
+}
+
+llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
+  return nullptr;
+}
+
+void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &D,
+                                        SourceLocation Loc,
+                                        llvm::Value *OutlinedFn,
+                                        ArrayRef<llvm::Value *> CapturedVars) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
+                                             const Expr *NumTeams,
+                                             const Expr *ThreadLimit,
+                                             SourceLocation Loc) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetDataCalls(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+    const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+    const Expr *Device) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
+                                           const OMPLoopDirective &D) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
+                                              const OMPDependClause *C) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+const VarDecl *
+CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
+                                        const VarDecl *NativeParam) const {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
+Address
+CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
+                                         const VarDecl *NativeParam,
+                                         const VarDecl *TargetParam) const {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 94a1438..179d9b4 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -191,6 +191,8 @@
   }
   /// Returns the base declaration of the reduction item.
   const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; }
+  /// Returns the base declaration of the reduction item.
+  const Expr *getRefExpr(unsigned N) const { return ClausesData[N].Ref; }
   /// Returns true if the initialization of the reduction item uses initializer
   /// from declare reduction construct.
   bool usesReductionInitializer(unsigned N) const;
@@ -251,9 +253,8 @@
   virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; }
 
   /// Emits \p Callee function call with arguments \p Args with location \p Loc.
-  void emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
-                ArrayRef<llvm::Value *> Args = llvm::None,
-                SourceLocation Loc = SourceLocation()) const;
+  void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
+                ArrayRef<llvm::Value *> Args = llvm::None) const;
 
 private:
   /// \brief Default const ident_t object used for initialization of all other
@@ -1048,7 +1049,8 @@
   /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
   /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
   /// or 'operator binop(LHS, RHS)'.
-  llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType,
+  llvm::Value *emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc,
+                                     llvm::Type *ArgsType,
                                      ArrayRef<const Expr *> Privates,
                                      ArrayRef<const Expr *> LHSExprs,
                                      ArrayRef<const Expr *> RHSExprs,
@@ -1206,13 +1208,11 @@
   /// directive, or null if no if clause is used.
   /// \param Device Expression evaluated in device clause associated with the
   /// target directive, or null if no device clause is used.
-  /// \param CapturedVars Values captured in the current region.
   virtual void emitTargetCall(CodeGenFunction &CGF,
                               const OMPExecutableDirective &D,
                               llvm::Value *OutlinedFn,
                               llvm::Value *OutlinedFnID, const Expr *IfCond,
-                              const Expr *Device,
-                              ArrayRef<llvm::Value *> CapturedVars);
+                              const Expr *Device);
 
   /// \brief Emit the target regions enclosed in \a GD function definition or
   /// the function itself in case it is a valid device function. Returns true if
@@ -1364,6 +1364,569 @@
                            ArrayRef<llvm::Value *> Args = llvm::None) const;
 };
 
+/// Class supports emissionof SIMD-only code.
+class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
+public:
+  explicit CGOpenMPSIMDRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) {}
+  ~CGOpenMPSIMDRuntime() override {}
+
+  /// \brief Emits outlined function for the specified OpenMP parallel directive
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Value *
+  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+                               const VarDecl *ThreadIDVar,
+                               OpenMPDirectiveKind InnermostKind,
+                               const RegionCodeGenTy &CodeGen) override;
+
+  /// \brief Emits outlined function for the specified OpenMP teams directive
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Value *
+  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+                            const VarDecl *ThreadIDVar,
+                            OpenMPDirectiveKind InnermostKind,
+                            const RegionCodeGenTy &CodeGen) override;
+
+  /// \brief Emits outlined function for the OpenMP task directive \a D. This
+  /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
+  /// TaskT).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param PartIDVar Variable for partition id in the current OpenMP untied
+  /// task region.
+  /// \param TaskTVar Variable for task_t argument.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  /// \param Tied true if task is generated for tied task, false otherwise.
+  /// \param NumberOfParts Number of parts in untied task. Ignored for tied
+  /// tasks.
+  ///
+  llvm::Value *emitTaskOutlinedFunction(
+      const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+      const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+      bool Tied, unsigned &NumberOfParts) override;
+
+  /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  ///
+  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                        llvm::Value *OutlinedFn,
+                        ArrayRef<llvm::Value *> CapturedVars,
+                        const Expr *IfCond) override;
+
+  /// \brief Emits a critical region.
+  /// \param CriticalName Name of the critical region.
+  /// \param CriticalOpGen Generator for the statement associated with the given
+  /// critical region.
+  /// \param Hint Value of the 'hint' clause (optional).
+  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+                          const RegionCodeGenTy &CriticalOpGen,
+                          SourceLocation Loc,
+                          const Expr *Hint = nullptr) override;
+
+  /// \brief Emits a master region.
+  /// \param MasterOpGen Generator for the statement associated with the given
+  /// master region.
+  void emitMasterRegion(CodeGenFunction &CGF,
+                        const RegionCodeGenTy &MasterOpGen,
+                        SourceLocation Loc) override;
+
+  /// \brief Emits code for a taskyield directive.
+  void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override;
+
+  /// \brief Emit a taskgroup region.
+  /// \param TaskgroupOpGen Generator for the statement associated with the
+  /// given taskgroup region.
+  void emitTaskgroupRegion(CodeGenFunction &CGF,
+                           const RegionCodeGenTy &TaskgroupOpGen,
+                           SourceLocation Loc) override;
+
+  /// \brief Emits a single region.
+  /// \param SingleOpGen Generator for the statement associated with the given
+  /// single region.
+  void emitSingleRegion(CodeGenFunction &CGF,
+                        const RegionCodeGenTy &SingleOpGen, SourceLocation Loc,
+                        ArrayRef<const Expr *> CopyprivateVars,
+                        ArrayRef<const Expr *> DestExprs,
+                        ArrayRef<const Expr *> SrcExprs,
+                        ArrayRef<const Expr *> AssignmentOps) override;
+
+  /// \brief Emit an ordered region.
+  /// \param OrderedOpGen Generator for the statement associated with the given
+  /// ordered region.
+  void emitOrderedRegion(CodeGenFunction &CGF,
+                         const RegionCodeGenTy &OrderedOpGen,
+                         SourceLocation Loc, bool IsThreads) override;
+
+  /// \brief Emit an implicit/explicit barrier for OpenMP threads.
+  /// \param Kind Directive for which this implicit barrier call must be
+  /// generated. Must be OMPD_barrier for explicit barrier generation.
+  /// \param EmitChecks true if need to emit checks for cancellation barriers.
+  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+  /// runtime class decides which one to emit (simple or with cancellation
+  /// checks).
+  ///
+  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
+                       bool ForceSimpleCall = false) override;
+
+  /// This is used for non static scheduled types and when the ordered
+  /// clause is present on the loop construct.
+  /// Depending on the loop schedule, it is necessary to call some runtime
+  /// routine before start of the OpenMP loop to get the loop upper / lower
+  /// bounds \a LB and \a UB and stride \a ST.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the iteration variable.
+  /// \param Ordered true if loop is ordered, false otherwise.
+  /// \param DispatchValues struct containing llvm values for lower bound, upper
+  /// bound, and chunk expression.
+  /// For the default (nullptr) value, the chunk 1 will be used.
+  ///
+  void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
+                           const OpenMPScheduleTy &ScheduleKind,
+                           unsigned IVSize, bool IVSigned, bool Ordered,
+                           const DispatchRTInput &DispatchValues) override;
+
+  /// \brief Call the appropriate runtime routine to initialize it before start
+  /// of loop.
+  ///
+  /// This is used only in case of static schedule, when the user did not
+  /// specify a ordered clause on the loop construct.
+  /// Depending on the loop schedule, it is necessary to call some runtime
+  /// routine before start of the OpenMP loop to get the loop upper / lower
+  /// bounds LB and UB and stride ST.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param DKind Kind of the directive.
+  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+  /// \param Values Input arguments for the construct.
+  ///
+  void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+                         OpenMPDirectiveKind DKind,
+                         const OpenMPScheduleTy &ScheduleKind,
+                         const StaticRTInput &Values) override;
+
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
+  /// \param Values Input arguments for the construct.
+  ///
+  void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                OpenMPDistScheduleClauseKind SchedKind,
+                                const StaticRTInput &Values) override;
+
+  /// \brief Call the appropriate runtime routine to notify that we finished
+  /// iteration of the ordered loop with the dynamic scheduling.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the iteration variable.
+  ///
+  void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc,
+                                  unsigned IVSize, bool IVSigned) override;
+
+  /// \brief Call the appropriate runtime routine to notify that we finished
+  /// all the work with current loop.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param DKind Kind of the directive for which the static finish is emitted.
+  ///
+  void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc,
+                           OpenMPDirectiveKind DKind) override;
+
+  /// Call __kmpc_dispatch_next(
+  ///          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
+  ///          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
+  ///          kmp_int[32|64] *p_stride);
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the iteration variable.
+  /// \param IL Address of the output variable in which the flag of the
+  /// last iteration is returned.
+  /// \param LB Address of the output variable in which the lower iteration
+  /// number is returned.
+  /// \param UB Address of the output variable in which the upper iteration
+  /// number is returned.
+  /// \param ST Address of the output variable in which the stride value is
+  /// returned.
+  llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc,
+                           unsigned IVSize, bool IVSigned, Address IL,
+                           Address LB, Address UB, Address ST) override;
+
+  /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
+                            SourceLocation Loc) override;
+
+  /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  void emitProcBindClause(CodeGenFunction &CGF,
+                          OpenMPProcBindClauseKind ProcBind,
+                          SourceLocation Loc) override;
+
+  /// \brief Returns address of the threadprivate variable for the current
+  /// thread.
+  /// \param VD Threadprivate variable.
+  /// \param VDAddr Address of the global variable \a VD.
+  /// \param Loc Location of the reference to threadprivate var.
+  /// \return Address of the threadprivate variable for the current thread.
+  Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD,
+                                 Address VDAddr, SourceLocation Loc) override;
+
+  /// \brief Emit a code for initialization of threadprivate variable. It emits
+  /// a call to runtime library which adds initial value to the newly created
+  /// threadprivate variable (if it is not constant) and registers destructor
+  /// for the variable (if any).
+  /// \param VD Threadprivate variable.
+  /// \param VDAddr Address of the global variable \a VD.
+  /// \param Loc Location of threadprivate declaration.
+  /// \param PerformInit true if initialization expression is not constant.
+  llvm::Function *
+  emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr,
+                                 SourceLocation Loc, bool PerformInit,
+                                 CodeGenFunction *CGF = nullptr) override;
+
+  /// Creates artificial threadprivate variable with name \p Name and type \p
+  /// VarType.
+  /// \param VarType Type of the artificial threadprivate variable.
+  /// \param Name Name of the artificial threadprivate variable.
+  Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+                                           QualType VarType,
+                                           StringRef Name) override;
+
+  /// \brief Emit flush of the variables specified in 'omp flush' directive.
+  /// \param Vars List of variables to flush.
+  void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
+                 SourceLocation Loc) override;
+
+  /// \brief Emit task region for the task directive. The task region is
+  /// emitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
+  /// kmp_task_t *new_task), where new_task is a resulting structure from
+  /// previous items.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                    const OMPExecutableDirective &D, llvm::Value *TaskFunction,
+                    QualType SharedsTy, Address Shareds, const Expr *IfCond,
+                    const OMPTaskDataTy &Data) override;
+
+  /// Emit task region for the taskloop directive. The taskloop region is
+  /// emitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+  /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+  /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+  /// is a resulting structure from
+  /// previous items.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
+                        const OMPLoopDirective &D, llvm::Value *TaskFunction,
+                        QualType SharedsTy, Address Shareds, const Expr *IfCond,
+                        const OMPTaskDataTy &Data) override;
+
+  /// \brief Emit a code for reduction clause. Next code should be emitted for
+  /// reduction:
+  /// \code
+  ///
+  /// static kmp_critical_name lock = { 0 };
+  ///
+  /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+  ///  ...
+  ///  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+  ///  ...
+  /// }
+  ///
+  /// ...
+  /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+  /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+  /// RedList, reduce_func, &<lock>)) {
+  /// case 1:
+  ///  ...
+  ///  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+  ///  ...
+  /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  /// break;
+  /// case 2:
+  ///  ...
+  ///  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+  ///  ...
+  /// break;
+  /// default:;
+  /// }
+  /// \endcode
+  ///
+  /// \param Privates List of private copies for original reduction arguments.
+  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+  /// or 'operator binop(LHS, RHS)'.
+  /// \param Options List of options for reduction codegen:
+  ///     WithNowait true if parent directive has also nowait clause, false
+  ///     otherwise.
+  ///     SimpleReduction Emit reduction operation only. Used for omp simd
+  ///     directive on the host.
+  ///     ReductionKind The kind of reduction to perform.
+  void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                     ArrayRef<const Expr *> Privates,
+                     ArrayRef<const Expr *> LHSExprs,
+                     ArrayRef<const Expr *> RHSExprs,
+                     ArrayRef<const Expr *> ReductionOps,
+                     ReductionOptionsTy Options) override;
+
+  /// Emit a code for initialization of task reduction clause. Next code
+  /// should be emitted for reduction:
+  /// \code
+  ///
+  /// _task_red_item_t red_data[n];
+  /// ...
+  /// red_data[i].shar = &origs[i];
+  /// red_data[i].size = sizeof(origs[i]);
+  /// red_data[i].f_init = (void*)RedInit<i>;
+  /// red_data[i].f_fini = (void*)RedDest<i>;
+  /// red_data[i].f_comb = (void*)RedOp<i>;
+  /// red_data[i].flags = <Flag_i>;
+  /// ...
+  /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+  /// \endcode
+  ///
+  /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
+  /// \param Data Additional data for task generation like tiedness, final
+  /// state, list of privates, reductions etc.
+  llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                     ArrayRef<const Expr *> LHSExprs,
+                                     ArrayRef<const Expr *> RHSExprs,
+                                     const OMPTaskDataTy &Data) override;
+
+  /// Required to resolve existing problems in the runtime. Emits threadprivate
+  /// variables to store the size of the VLAs/array sections for
+  /// initializer/combiner/finalizer functions + emits threadprivate variable to
+  /// store the pointer to the original reduction item for the custom
+  /// initializer defined by declare reduction construct.
+  /// \param RCG Allows to reuse an existing data for the reductions.
+  /// \param N Reduction item for which fixups must be emitted.
+  void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,
+                               ReductionCodeGen &RCG, unsigned N) override;
+
+  /// Get the address of `void *` type of the privatue copy of the reduction
+  /// item specified by the \p SharedLVal.
+  /// \param ReductionsPtr Pointer to the reduction data returned by the
+  /// emitTaskReductionInit function.
+  /// \param SharedLVal Address of the original reduction item.
+  Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc,
+                               llvm::Value *ReductionsPtr,
+                               LValue SharedLVal) override;
+
+  /// \brief Emit code for 'taskwait' directive.
+  void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override;
+
+  /// \brief Emit code for 'cancellation point' construct.
+  /// \param CancelRegion Region kind for which the cancellation point must be
+  /// emitted.
+  ///
+  void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                 OpenMPDirectiveKind CancelRegion) override;
+
+  /// \brief Emit code for 'cancel' construct.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  /// \param CancelRegion Region kind for which the cancel must be emitted.
+  ///
+  void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                      const Expr *IfCond,
+                      OpenMPDirectiveKind CancelRegion) override;
+
+  /// \brief Emit outilined function for 'target' directive.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                  StringRef ParentName,
+                                  llvm::Function *&OutlinedFn,
+                                  llvm::Constant *&OutlinedFnID,
+                                  bool IsOffloadEntry,
+                                  const RegionCodeGenTy &CodeGen) override;
+
+  /// \brief Emit the target offloading code associated with \a D. The emitted
+  /// code attempts offloading the execution to the device, an the event of
+  /// a failure it executes the host version outlined in \a OutlinedFn.
+  /// \param D Directive to emit.
+  /// \param OutlinedFn Host version of the code to be offloaded.
+  /// \param OutlinedFnID ID of host version of the code to be offloaded.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                      llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
+                      const Expr *IfCond, const Expr *Device) override;
+
+  /// \brief Emit the target regions enclosed in \a GD function definition or
+  /// the function itself in case it is a valid device function. Returns true if
+  /// \a GD was dealt with successfully.
+  /// \param GD Function to scan.
+  bool emitTargetFunctions(GlobalDecl GD) override;
+
+  /// \brief Emit the global variable if it is a valid device global variable.
+  /// Returns true if \a GD was dealt with successfully.
+  /// \param GD Variable declaration to emit.
+  bool emitTargetGlobalVariable(GlobalDecl GD) override;
+
+  /// \brief Emit the global \a GD if it is meaningful for the target. Returns
+  /// if it was emitted successfully.
+  /// \param GD Global to scan.
+  bool emitTargetGlobal(GlobalDecl GD) override;
+
+  /// \brief Creates the offloading descriptor in the event any target region
+  /// was emitted in the current module and return the function that registers
+  /// it.
+  llvm::Function *emitRegistrationFunction() override;
+
+  /// \brief Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Value *OutlinedFn,
+                     ArrayRef<llvm::Value *> CapturedVars) override;
+
+  /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
+  /// for num_teams clause.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// \brief Emit the target data mapping code associated with \a D.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the
+  /// target directive, or null if no device clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  /// \param Info A record used to store information that needs to be preserved
+  /// until the region is closed.
+  void emitTargetDataCalls(CodeGenFunction &CGF,
+                           const OMPExecutableDirective &D, const Expr *IfCond,
+                           const Expr *Device, const RegionCodeGenTy &CodeGen,
+                           TargetDataInfo &Info) override;
+
+  /// \brief Emit the data mapping/movement code associated with the directive
+  /// \a D that should be of the form 'target [{enter|exit} data | update]'.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  void emitTargetDataStandAloneCall(CodeGenFunction &CGF,
+                                    const OMPExecutableDirective &D,
+                                    const Expr *IfCond,
+                                    const Expr *Device) override;
+
+  /// Emit initialization for doacross loop nesting support.
+  /// \param D Loop-based construct used in doacross nesting construct.
+  void emitDoacrossInit(CodeGenFunction &CGF,
+                        const OMPLoopDirective &D) override;
+
+  /// Emit code for doacross ordered directive with 'depend' clause.
+  /// \param C 'depend' clause with 'sink|source' dependency kind.
+  void emitDoacrossOrdered(CodeGenFunction &CGF,
+                           const OMPDependClause *C) override;
+
+  /// Translates the native parameter of outlined function if this is required
+  /// for target.
+  /// \param FD Field decl from captured record for the paramater.
+  /// \param NativeParam Parameter itself.
+  const VarDecl *translateParameter(const FieldDecl *FD,
+                                    const VarDecl *NativeParam) const override;
+
+  /// Gets the address of the native argument basing on the address of the
+  /// target-specific parameter.
+  /// \param NativeParam Parameter itself.
+  /// \param TargetParam Corresponding target-specific parameter.
+  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
+                              const VarDecl *TargetParam) const override;
+};
+
 } // namespace CodeGen
 } // namespace clang
 
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 884bcc1..8ebfa3a 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -22,19 +22,22 @@
 
 namespace {
 enum OpenMPRTLFunctionNVPTX {
-  /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit);
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+  /// int16_t RequiresOMPRuntime);
   OMPRTL_NVPTX__kmpc_kernel_init,
-  /// \brief Call to void __kmpc_kernel_deinit();
+  /// \brief Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
   OMPRTL_NVPTX__kmpc_kernel_deinit,
   /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-  /// short RequiresOMPRuntime, short RequiresDataSharing);
+  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
   OMPRTL_NVPTX__kmpc_spmd_kernel_init,
   /// \brief Call to void __kmpc_spmd_kernel_deinit();
   OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
   /// \brief Call to void __kmpc_kernel_prepare_parallel(void
-  /// *outlined_function);
+  /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t
+  /// IsOMPRuntimeInitialized);
   OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
-  /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function);
+  /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void
+  /// ***args, int16_t IsOMPRuntimeInitialized);
   OMPRTL_NVPTX__kmpc_kernel_parallel,
   /// \brief Call to void __kmpc_kernel_end_parallel();
   OMPRTL_NVPTX__kmpc_kernel_end_parallel,
@@ -247,8 +250,8 @@
 }
 
 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
-    CodeGenModule &CGM)
-    : WorkerFn(nullptr), CGFI(nullptr) {
+    CodeGenModule &CGM, SourceLocation Loc)
+    : WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) {
   createWorkerFunction(CGM);
 }
 
@@ -259,8 +262,8 @@
 
   WorkerFn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
-      /* placeholder */ "_worker", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+      /*placeholder=*/"_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, *CGFI);
 }
 
 bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
@@ -268,19 +271,10 @@
 }
 
 static CGOpenMPRuntimeNVPTX::ExecutionMode
-getExecutionModeForDirective(CodeGenModule &CGM,
-                             const OMPExecutableDirective &D) {
-  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
-  switch (DirectiveKind) {
-  case OMPD_target:
-  case OMPD_target_teams:
-    return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
-  case OMPD_target_parallel:
-    return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
-  default:
-    llvm_unreachable("Unsupported directive on NVPTX device.");
-  }
-  llvm_unreachable("Unsupported directive on NVPTX device.");
+getExecutionMode(CodeGenModule &CGM) {
+  return CGM.getLangOpts().OpenMPCUDAMode
+             ? CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd
+             : CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
 }
 
 void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
@@ -292,37 +286,38 @@
   ExecutionModeRAII ModeRAII(CurrentExecutionMode,
                              CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
   EntryFunctionState EST;
-  WorkerFunctionState WST(CGM);
+  WorkerFunctionState WST(CGM, D.getLocStart());
   Work.clear();
+  WrapperFunctionsMap.clear();
 
   // Emit target region as a standalone region.
   class NVPTXPrePostActionTy : public PrePostActionTy {
-    CGOpenMPRuntimeNVPTX &RT;
     CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
     CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
 
   public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
-                         CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
                          CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
-        : RT(RT), EST(EST), WST(WST) {}
+        : EST(EST), WST(WST) {}
     void Enter(CodeGenFunction &CGF) override {
-      RT.emitGenericEntryHeader(CGF, EST, WST);
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericEntryHeader(CGF, EST, WST);
     }
     void Exit(CodeGenFunction &CGF) override {
-      RT.emitGenericEntryFooter(CGF, EST);
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericEntryFooter(CGF, EST);
     }
-  } Action(*this, EST, WST);
+  } Action(EST, WST);
   CodeGen.setAction(Action);
   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
                                    IsOffloadEntry, CodeGen);
 
-  // Create the worker function
-  emitWorkerFunction(WST);
-
   // Now change the name of the worker function to correspond to this target
   // region's entry function.
   WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+
+  // Create the worker function
+  emitWorkerFunction(WST);
 }
 
 // Setup NVPTX threads for master-worker OpenMP scheme.
@@ -341,7 +336,7 @@
   Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
 
   CGF.EmitBlock(WorkerBB);
-  emitCall(CGF, WST.WorkerFn);
+  emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
   CGF.EmitBranch(EST.ExitBB);
 
   CGF.EmitBlock(MasterCheckBB);
@@ -352,7 +347,9 @@
   CGF.EmitBlock(MasterBB);
   // First action in sequential region:
   // Initialize the state of the OpenMP runtime library on the GPU.
-  llvm::Value *Args[] = {getThreadLimit(CGF)};
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {getThreadLimit(CGF),
+                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
   CGF.EmitRuntimeCall(
       createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
 }
@@ -367,8 +364,10 @@
 
   CGF.EmitBlock(TerminateBB);
   // Signal termination condition.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
   CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None);
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
   // Barrier to terminate worker threads.
   syncCTAThreads(CGF);
   // Master thread jumps to exit point.
@@ -409,7 +408,6 @@
   CodeGen.setAction(Action);
   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
                                    IsOffloadEntry, CodeGen);
-  return;
 }
 
 void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
@@ -467,11 +465,11 @@
 }
 
 void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
-  auto &Ctx = CGM.getContext();
+  ASTContext &Ctx = CGM.getContext();
 
   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {},
+                    WST.Loc, WST.Loc);
   emitWorkerLoop(CGF, WST);
   CGF.FinishFunction();
 }
@@ -510,7 +508,12 @@
   CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
   CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
 
-  llvm::Value *Args[] = {WorkFn.getPointer()};
+  // Set up shared arguments
+  Address SharedArgs =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args");
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(),
+                         /*RequiresOMPRuntime=*/Bld.getInt16(1)};
   llvm::Value *Ret = CGF.EmitRuntimeCall(
       createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
   Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
@@ -529,6 +532,9 @@
   // Signal start of parallel region.
   CGF.EmitBlock(ExecuteBB);
 
+  // Current context
+  ASTContext &Ctx = CGF.getContext();
+
   // Process work items: outlined parallel functions.
   for (auto *W : Work) {
     // Try to match this outlined function.
@@ -544,14 +550,19 @@
     // Execute this outlined function.
     CGF.EmitBlock(ExecuteFNBB);
 
-    // Insert call to work function.
-    // FIXME: Pass arguments to outlined function from master thread.
-    auto *Fn = cast<llvm::Function>(W);
-    Address ZeroAddr =
-        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr");
-    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0));
-    llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()};
-    emitCall(CGF, Fn, FnArgs);
+    // Insert call to work function via shared wrapper. The shared
+    // wrapper takes exactly three arguments:
+    //   - the parallelism level;
+    //   - the master thread ID;
+    //   - the list of references to shared arguments.
+    //
+    // TODO: Assert that the function is a wrapper function.s
+    Address Capture = CGF.EmitLoadOfPointer(SharedArgs,
+       Ctx.getPointerType(
+          Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>());
+    emitOutlinedFunctionCall(CGF, WST.Loc, W,
+                             {Bld.getInt16(/*ParallelLevel=*/0),
+                              getMasterThreadID(CGF), Capture.getPointer()});
 
     // Go to end of parallel region.
     CGF.EmitBranch(TerminateBB);
@@ -585,23 +596,25 @@
   llvm::Constant *RTLFn = nullptr;
   switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
   case OMPRTL_NVPTX__kmpc_kernel_init: {
-    // Build void __kmpc_kernel_init(kmp_int32 thread_limit);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty};
+    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
+    // RequiresOMPRuntime);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
     break;
   }
   case OMPRTL_NVPTX__kmpc_kernel_deinit: {
-    // Build void __kmpc_kernel_deinit();
+    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty};
     llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
     break;
   }
   case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
     // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-    // short RequiresOMPRuntime, short RequiresDataSharing);
+    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
     llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
@@ -617,16 +630,21 @@
   }
   case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
     /// Build void __kmpc_kernel_prepare_parallel(
-    /// void *outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
+    /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t
+    /// IsOMPRuntimeInitialized);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrTy,
+                                CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty,
+                                CGM.Int16Ty};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
     break;
   }
   case OMPRTL_NVPTX__kmpc_kernel_parallel: {
-    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
+    /// Build bool __kmpc_kernel_parallel(void **outlined_function, void
+    /// ***args, int16_t IsOMPRuntimeInitialized);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy,
+                                CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty};
     llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
@@ -790,8 +808,7 @@
 
   assert(!ParentName.empty() && "Invalid target region parent name!");
 
-  CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
-      getExecutionModeForDirective(CGM, D);
+  CGOpenMPRuntimeNVPTX::ExecutionMode Mode = getExecutionMode(CGM);
   switch (Mode) {
   case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
     emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
@@ -845,8 +862,17 @@
 llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
-                                                       InnermostKind, CodeGen);
+
+  auto *OutlinedFun = cast<llvm::Function>(
+    CGOpenMPRuntime::emitParallelOutlinedFunction(
+          D, ThreadIDVar, InnermostKind, CodeGen));
+  if (!isInSpmdExecutionMode()) {
+    llvm::Function *WrapperFun =
+        createDataSharingWrapper(OutlinedFun, D);
+    WrapperFunctionsMap[OutlinedFun] = WrapperFun;
+  }
+
+  return OutlinedFun;
 }
 
 llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
@@ -898,15 +924,57 @@
     CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
     ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
   llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
+  llvm::Function *WFn = WrapperFunctionsMap[Fn];
+  assert(WFn && "Wrapper function does not exist!");
 
-  auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) {
+  // Force inline this outlined function at its call site.
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
+  auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
+                                                    PrePostActionTy &) {
     CGBuilderTy &Bld = CGF.Builder;
 
-    // Prepare for parallel region. Indicate the outlined function.
-    llvm::Value *Args[] = {Bld.CreateBitOrPointerCast(Fn, CGM.Int8PtrTy)};
-    CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
-        Args);
+    llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
+
+    if (!CapturedVars.empty()) {
+      // There's somehting to share, add the attribute
+      CGF.CurFn->addFnAttr("has-nvptx-shared-depot");
+      // Prepare for parallel region. Indicate the outlined function.
+      Address SharedArgs =
+          CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
+              "shared_args");
+      llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
+      // TODO: Optimize runtime initialization and pass in correct value.
+      llvm::Value *Args[] = {ID, SharedArgsPtr,
+                             Bld.getInt32(CapturedVars.size()),
+                             /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+          Args);
+
+      unsigned Idx = 0;
+      ASTContext &Ctx = CGF.getContext();
+      for (llvm::Value *V : CapturedVars) {
+        Address Dst = Bld.CreateConstInBoundsGEP(
+            CGF.EmitLoadOfPointer(SharedArgs,
+            Ctx.getPointerType(
+                Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()),
+            Idx, CGF.getPointerSize());
+        llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
+        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
+            Ctx.getPointerType(Ctx.VoidPtrTy));
+        Idx++;
+      }
+    } else {
+      // TODO: Optimize runtime initialization and pass in correct value.
+      llvm::Value *Args[] = {
+          ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)),
+          /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+          Args);
+    }
 
     // Activate workers. This barrier is used by the master to signal
     // work for the workers.
@@ -921,7 +989,7 @@
     syncCTAThreads(CGF);
 
     // Remember for post-processing in worker loop.
-    Work.push_back(Fn);
+    Work.emplace_back(WFn);
   };
 
   auto *RTLoc = emitUpdateLocation(CGF, Loc);
@@ -971,27 +1039,52 @@
   // TODO: Do something with IfCond when support for the 'if' clause
   // is added on Spmd target directives.
   llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-  OutlinedFnArgs.push_back(
-      llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
-  OutlinedFnArgs.push_back(
-      llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+
+  Address ZeroAddr = CGF.CreateMemTemp(
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+      ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
   OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
   emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
 }
 
+/// Cast value to the specified type.
+static llvm::Value *
+castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy,
+                llvm::Optional<bool> IsSigned = llvm::None) {
+  if (Val->getType() == CastTy)
+    return Val;
+  if (Val->getType()->getPrimitiveSizeInBits() > 0 &&
+      CastTy->getPrimitiveSizeInBits() > 0 &&
+      Val->getType()->getPrimitiveSizeInBits() ==
+          CastTy->getPrimitiveSizeInBits())
+    return CGF.Builder.CreateBitCast(Val, CastTy);
+  if (IsSigned.hasValue() && CastTy->isIntegerTy() &&
+      Val->getType()->isIntegerTy())
+    return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned);
+  Address CastItem = CGF.CreateTempAlloca(
+      CastTy,
+      CharUnits::fromQuantity(
+          CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType())));
+  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+  CGF.Builder.CreateStore(Val, ValCastItem);
+  return CGF.Builder.CreateLoad(CastItem);
+}
+
 /// This function creates calls to one of two shuffle functions to copy
 /// variables between lanes in a warp.
 static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
-                                                 QualType ElemTy,
                                                  llvm::Value *Elem,
                                                  llvm::Value *Offset) {
   auto &CGM = CGF.CGM;
-  auto &C = CGM.getContext();
   auto &Bld = CGF.Builder;
   CGOpenMPRuntimeNVPTX &RT =
       *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
 
-  unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+  unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType());
   assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
 
   OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
@@ -999,17 +1092,16 @@
                                          : OMPRTL_NVPTX__kmpc_shuffle_int64;
 
   // Cast all types to 32- or 64-bit values before calling shuffle routines.
-  auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
-  auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
-  auto *WarpSize = CGF.EmitScalarConversion(
-      getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
-      C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+  llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+  llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true);
+  auto *WarpSize =
+      Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
 
   auto *ShuffledVal =
       CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
                           {ElemCast, Offset, WarpSize});
 
-  return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+  return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true);
 }
 
 namespace {
@@ -1071,10 +1163,9 @@
       // Step 1.1: Get the address for the src element in the Reduce list.
       Address SrcElementPtrAddr =
           Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
-      llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
-          SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-      SrcElementAddr =
-          Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
 
       // Step 1.2: Create a temporary to store the element in the destination
       // Reduce list.
@@ -1090,32 +1181,26 @@
       // Step 1.1: Get the address for the src element in the Reduce list.
       Address SrcElementPtrAddr =
           Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
-      llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
-          SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-      SrcElementAddr =
-          Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
 
       // Step 1.2: Get the address for dest element.  The destination
       // element has already been created on the thread's stack.
       DestElementPtrAddr =
           Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
-      llvm::Value *DestElementPtr =
-          CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
-                               C.VoidPtrTy, SourceLocation());
-      Address DestElemAddr =
-          Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
-      DestElementAddr = Bld.CreateElementBitCast(
-          DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+      DestElementAddr = CGF.EmitLoadOfPointer(
+          DestElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
       break;
     }
     case ThreadToScratchpad: {
       // Step 1.1: Get the address for the src element in the Reduce list.
       Address SrcElementPtrAddr =
           Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
-      llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
-          SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-      SrcElementAddr =
-          Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
 
       // Step 1.2: Get the address for dest element:
       // address = base + index * ElementSizeInChars.
@@ -1128,11 +1213,8 @@
           Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
       ScratchPadElemAbsolutePtrVal =
           Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
-      Address ScratchpadPtr =
-          Address(ScratchPadElemAbsolutePtrVal,
-                  C.getTypeAlignInChars(Private->getType()));
-      DestElementAddr = Bld.CreateElementBitCast(
-          ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+      DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+                                C.getTypeAlignInChars(Private->getType()));
       IncrScratchpadDest = true;
       break;
     }
@@ -1169,14 +1251,15 @@
         SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
     llvm::Value *Elem =
         CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                             Private->getType(), SourceLocation());
+                             Private->getType(), Private->getExprLoc());
 
     // Now that all active lanes have read the element in the
     // Reduce list, shuffle over the value from the remote lane.
-    if (ShuffleInElement) {
-      Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
-                                          RemoteLaneOffset);
-    }
+    if (ShuffleInElement)
+      Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset);
+
+    DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+                                               SrcElementAddr.getElementType());
 
     // Store the source element value to the dest element address.
     CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
@@ -1240,27 +1323,31 @@
 ///    local = local @ remote
 ///  else
 ///    local = remote
-static llvm::Value *
-emitReduceScratchpadFunction(CodeGenModule &CGM,
-                             ArrayRef<const Expr *> Privates,
-                             QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+static llvm::Value *emitReduceScratchpadFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
   auto &C = CGM.getContext();
   auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
 
   // Destination of the copy.
-  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // Base address of the scratchpad array, with each element storing a
   // Reduce list per team.
-  ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // A source index into the scratchpad array.
-  ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+  ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+                             ImplicitParamDecl::Other);
   // Row width of an element in the scratchpad array, typically
   // the number of teams.
-  ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+  ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+                             ImplicitParamDecl::Other);
   // If should_reduce == 1, then it's load AND reduce,
   // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
   // The latter case is used for initialization.
-  ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other);
+  ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                    Int32Ty, ImplicitParamDecl::Other);
 
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
@@ -1273,12 +1360,9 @@
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       "_omp_reduction_load_and_reduce", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  // We don't need debug information in this function as nothing here refers to
-  // user code.
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   auto &Bld = CGF.Builder;
 
@@ -1287,29 +1371,27 @@
   Address ReduceListAddr(
       Bld.CreatePointerBitCastOrAddrSpaceCast(
           CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, SourceLocation()),
+                               C.VoidPtrTy, Loc),
           CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
       CGF.getPointerAlign());
 
   Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
   llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
-      AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+      AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
 
   Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
-  llvm::Value *IndexVal =
-      Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
-                                             Int32Ty, SourceLocation()),
-                        CGM.SizeTy, /*isSigned=*/true);
+  llvm::Value *IndexVal = Bld.CreateIntCast(
+      CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
+      CGM.SizeTy, /*isSigned=*/true);
 
   Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
-  llvm::Value *WidthVal =
-      Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
-                                             Int32Ty, SourceLocation()),
-                        CGM.SizeTy, /*isSigned=*/true);
+  llvm::Value *WidthVal = Bld.CreateIntCast(
+      CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc),
+      CGM.SizeTy, /*isSigned=*/true);
 
   Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
   llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
-      AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation());
+      AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc);
 
   // The absolute ptr address to the base addr of the next element to copy.
   llvm::Value *CumulativeElemBasePtr =
@@ -1342,7 +1424,8 @@
       ReduceListAddr.getPointer(), CGF.VoidPtrTy);
   llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
       RemoteReduceList.getPointer(), CGF.VoidPtrTy);
-  CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr});
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
   Bld.CreateBr(MergeBB);
 
   CGF.EmitBlock(ElseBB);
@@ -1366,22 +1449,27 @@
 ///
 static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
                                          ArrayRef<const Expr *> Privates,
-                                         QualType ReductionArrayTy) {
+                                         QualType ReductionArrayTy,
+                                         SourceLocation Loc) {
 
   auto &C = CGM.getContext();
   auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
 
   // Source of the copy.
-  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // Base address of the scratchpad array, with each element storing a
   // Reduce list per team.
-  ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // A destination index into the scratchpad array, typically the team
   // identifier.
-  ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+  ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+                             ImplicitParamDecl::Other);
   // Row width of an element in the scratchpad array, typically
   // the number of teams.
-  ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+  ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
+                             ImplicitParamDecl::Other);
 
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
@@ -1393,12 +1481,9 @@
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       "_omp_reduction_copy_to_scratchpad", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  // We don't need debug information in this function as nothing here refers to
-  // user code.
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   auto &Bld = CGF.Builder;
 
@@ -1406,19 +1491,18 @@
   Address SrcDataAddr(
       Bld.CreatePointerBitCastOrAddrSpaceCast(
           CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, SourceLocation()),
+                               C.VoidPtrTy, Loc),
           CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
       CGF.getPointerAlign());
 
   Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
   llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
-      AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+      AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
 
   Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
-  llvm::Value *IndexVal =
-      Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
-                                             Int32Ty, SourceLocation()),
-                        CGF.SizeTy, /*isSigned=*/true);
+  llvm::Value *IndexVal = Bld.CreateIntCast(
+      CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
+      CGF.SizeTy, /*isSigned=*/true);
 
   Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
   llvm::Value *WidthVal =
@@ -1455,17 +1539,19 @@
 ///     sync
 static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
                                               ArrayRef<const Expr *> Privates,
-                                              QualType ReductionArrayTy) {
+                                              QualType ReductionArrayTy,
+                                              SourceLocation Loc) {
   auto &C = CGM.getContext();
   auto &M = CGM.getModule();
 
   // ReduceList: thread local Reduce list.
   // At the stage of the computation when this function is called, partially
   // aggregated values reside in the first lane of every active warp.
-  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // NumWarps: number of warps active in the parallel region.  This could
   // be smaller than 32 (max warps in a CTA) for partial block reduction.
-  ImplicitParamDecl NumWarpsArg(C,
+  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
                                 C.getIntTypeForBitwidth(32, /* Signed */ true),
                                 ImplicitParamDecl::Other);
   FunctionArgList Args;
@@ -1476,12 +1562,9 @@
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       "_omp_reduction_inter_warp_copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  // We don't need debug information in this function as nothing here refers to
-  // user code.
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   auto &Bld = CGF.Builder;
 
@@ -1702,21 +1785,23 @@
 ///   (2k+1)th thread is ignored in the value aggregation.  Therefore
 ///   we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
 ///   that the contiguity assumption still holds.
-static llvm::Value *
-emitShuffleAndReduceFunction(CodeGenModule &CGM,
-                             ArrayRef<const Expr *> Privates,
-                             QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+static llvm::Value *emitShuffleAndReduceFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
   auto &C = CGM.getContext();
 
   // Thread local Reduce list used to host the values of data to be reduced.
-  ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
   // Current lane id; could be logical.
-  ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
+                              ImplicitParamDecl::Other);
   // Offset of the remote source lane relative to the current lane.
-  ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy,
-                                        ImplicitParamDecl::Other);
+  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                        C.ShortTy, ImplicitParamDecl::Other);
   // Algorithm version.  This is expected to be known at compile time.
-  ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other);
+  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                               C.ShortTy, ImplicitParamDecl::Other);
   FunctionArgList Args;
   Args.push_back(&ReduceListArg);
   Args.push_back(&LaneIDArg);
@@ -1727,12 +1812,9 @@
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   CodeGenFunction CGF(CGM);
-  // We don't need debug information in this function as nothing here refers to
-  // user code.
-  CGF.disableDebugInfo();
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   auto &Bld = CGF.Builder;
 
@@ -1819,7 +1901,8 @@
       LocalReduceList.getPointer(), CGF.VoidPtrTy);
   llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
       RemoteReduceList.getPointer(), CGF.VoidPtrTy);
-  CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
   Bld.CreateBr(MergeBB);
 
   CGF.EmitBlock(ElseBB);
@@ -2140,7 +2223,7 @@
       llvm::Value *Size = CGF.Builder.CreateIntCast(
           CGF.getVLASize(
                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .first,
+              .NumElts,
           CGF.SizeTy, /*isSigned=*/false);
       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
                               Elem);
@@ -2149,8 +2232,8 @@
 
   // 2. Emit reduce_func().
   auto *ReductionFn = emitReductionFunction(
-      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
-      LHSExprs, RHSExprs, ReductionOps);
+      CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+      Privates, LHSExprs, RHSExprs, ReductionOps);
 
   // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
   // RedList, shuffle_reduce_func, interwarp_copy_func);
@@ -2160,9 +2243,9 @@
       ReductionList.getPointer(), CGF.VoidPtrTy);
 
   auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
-      CGM, Privates, ReductionArrayTy, ReductionFn);
+      CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
   auto *InterWarpCopyFn =
-      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy);
+      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
 
   llvm::Value *Res = nullptr;
   if (ParallelReduction) {
@@ -2180,9 +2263,9 @@
 
   if (TeamsReduction) {
     auto *ScratchPadCopyFn =
-        emitCopyToScratchpad(CGM, Privates, ReductionArrayTy);
+        emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
     auto *LoadAndReduceFn = emitReduceScratchpadFunction(
-        CGM, Privates, ReductionArrayTy, ReductionFn);
+        CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
 
     llvm::Value *Args[] = {ThreadId,
                            CGF.Builder.getInt32(RHSExprs.size()),
@@ -2255,9 +2338,17 @@
   enum { NVPTX_local_addr = 5 };
   QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
   ArgType = QC.apply(CGM.getContext(), ArgType);
-  return ImplicitParamDecl::Create(
-      CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
-      NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
+  if (isa<ImplicitParamDecl>(NativeParam)) {
+    return ImplicitParamDecl::Create(
+        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
+        NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
+  }
+  return ParmVarDecl::Create(
+      CGM.getContext(),
+      const_cast<DeclContext *>(NativeParam->getDeclContext()),
+      NativeParam->getLocStart(), NativeParam->getLocation(),
+      NativeParam->getIdentifier(), ArgType,
+      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
 }
 
 Address
@@ -2317,3 +2408,98 @@
   }
   CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
 }
+
+/// Emit function which wraps the outline parallel region
+/// and controls the arguments which are passed to this function.
+/// The wrapper ensures that the outlined function is called
+/// with the correct arguments when data is shared.
+llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper(
+    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
+  ASTContext &Ctx = CGM.getContext();
+  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_parallel);
+
+  // Create a function that takes as argument the source thread.
+  FunctionArgList WrapperArgs;
+  QualType Int16QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
+  QualType Int32QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
+  QualType Int32PtrQTy = Ctx.getPointerType(Int32QTy);
+  QualType VoidPtrPtrQTy = Ctx.getPointerType(Ctx.VoidPtrTy);
+  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+                                     /*Id=*/nullptr, Int16QTy,
+                                     ImplicitParamDecl::Other);
+  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+                               /*Id=*/nullptr, Int32QTy,
+                               ImplicitParamDecl::Other);
+  ImplicitParamDecl SharedArgsList(Ctx, /*DC=*/nullptr, D.getLocStart(),
+                                   /*Id=*/nullptr, VoidPtrPtrQTy,
+                                   ImplicitParamDecl::Other);
+  WrapperArgs.emplace_back(&ParallelLevelArg);
+  WrapperArgs.emplace_back(&WrapperArg);
+  WrapperArgs.emplace_back(&SharedArgsList);
+
+  auto &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
+
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
+                    D.getLocStart(), D.getLocStart());
+
+  const auto *RD = CS.getCapturedRecordDecl();
+  auto CurField = RD->field_begin();
+
+  // Get the array of arguments.
+  SmallVector<llvm::Value *, 8> Args;
+
+  // TODO: suppport SIMD and pass actual values
+  Args.emplace_back(llvm::ConstantPointerNull::get(
+      CGM.Int32Ty->getPointerTo()));
+  Args.emplace_back(llvm::ConstantPointerNull::get(
+      CGM.Int32Ty->getPointerTo()));
+
+  CGBuilderTy &Bld = CGF.Builder;
+  auto CI = CS.capture_begin();
+
+  // Load the start of the array
+  auto SharedArgs =
+      CGF.EmitLoadOfPointer(CGF.GetAddrOfLocalVar(&SharedArgsList),
+          VoidPtrPtrQTy->castAs<PointerType>());
+
+  // For each captured variable
+  for (unsigned I = 0; I < CS.capture_size(); ++I, ++CI, ++CurField) {
+    // Name of captured variable
+    StringRef Name;
+    if (CI->capturesThis())
+      Name = "this";
+    else
+      Name = CI->getCapturedVar()->getName();
+
+    // We retrieve the CLANG type of the argument. We use it to create
+    // an alloca which will give us the LLVM type.
+    QualType ElemTy = CurField->getType();
+    // If this is a capture by copy the element type has to be the pointer to
+    // the data.
+    if (CI->capturesVariableByCopy())
+      ElemTy = Ctx.getPointerType(ElemTy);
+
+    // Get shared address of the captured variable.
+    Address ArgAddress = Bld.CreateConstInBoundsGEP(
+        SharedArgs, I, CGF.getPointerSize());
+    Address TypedArgAddress = Bld.CreateBitCast(
+        ArgAddress, CGF.ConvertTypeForMem(Ctx.getPointerType(ElemTy)));
+    llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedArgAddress,
+        /*Volatile=*/false, Int32PtrQTy, SourceLocation());
+    Args.emplace_back(Arg);
+  }
+
+  emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args);
+  CGF.FinishFunction();
+  return Fn;
+}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index a9d6386..6ee2b7a 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -36,8 +36,9 @@
   public:
     llvm::Function *WorkerFn;
     const CGFunctionInfo *CGFI;
+    SourceLocation Loc;
 
-    WorkerFunctionState(CodeGenModule &CGM);
+    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
 
   private:
     void createWorkerFunction(CodeGenModule &CGM);
@@ -305,6 +306,17 @@
   // target region and used by containing directives such as 'parallel'
   // to emit optimized code.
   ExecutionMode CurrentExecutionMode;
+
+  /// Map between an outlined function and its wrapper.
+  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
+
+  /// Emit function which wraps the outline parallel region
+  /// and controls the parameters which are passed to this function.
+  /// The wrapper ensures that the outlined function is called
+  /// with the correct arguments when data is shared.
+  llvm::Function *
+  createDataSharingWrapper(llvm::Function *OutlinedParallelFn,
+      const OMPExecutableDirective &D);
 };
 
 } // CodeGen namespace.
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 1644ab4..7296da7 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -443,14 +443,18 @@
 
     // If the start field of a new run is better as a single run, or
     // if current field is better as a single run, or
-    // if current field has zero width bitfield, or
+    // if current field has zero width bitfield and either
+    // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to
+    // true, or
     // if the offset of current field is inconsistent with the offset of
     // previous field plus its offset,
     // skip the block below and go ahead to emit the storage.
     // Otherwise, try to add bitfields to the run.
     if (!StartFieldAsSingleRun && Field != FieldEnd &&
         !IsBetterAsSingleFieldRun(Field) &&
-        Field->getBitWidthValue(Context) != 0 &&
+        (Field->getBitWidthValue(Context) != 0 ||
+         (!Context.getTargetInfo().useZeroLengthBitfieldAlignment() &&
+          !Context.getTargetInfo().useBitFieldTypeAlignment())) &&
         Tail == getFieldBitOffset(*Field)) {
       Tail += Field->getBitWidthValue(Context);
       ++Field;
@@ -785,8 +789,7 @@
   }
                                      
   // Verify that the LLVM and AST field offsets agree.
-  llvm::StructType *ST =
-    dyn_cast<llvm::StructType>(RL->getLLVMType());
+  llvm::StructType *ST = RL->getLLVMType();
   const llvm::StructLayout *SL = getDataLayout().getStructLayout(ST);
 
   const ASTRecordLayout &AST_RL = getContext().getASTRecordLayout(D);
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 6a78865..86c0964 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -74,6 +74,15 @@
   // Generate a stoppoint if we are emitting debug info.
   EmitStopPoint(S);
 
+  // Ignore all OpenMP directives except for simd if OpenMP with Simd is
+  // enabled.
+  if (getLangOpts().OpenMP && getLangOpts().OpenMPSimd) {
+    if (const auto *D = dyn_cast<OMPExecutableDirective>(S)) {
+      EmitSimpleOMPExecutableDirective(*D);
+      return;
+    }
+  }
+
   switch (S->getStmtClass()) {
   case Stmt::NoStmtClass:
   case Stmt::CXXCatchStmtClass:
@@ -996,7 +1005,9 @@
   if (RV.isScalar()) {
     Builder.CreateStore(RV.getScalarVal(), ReturnValue);
   } else if (RV.isAggregate()) {
-    EmitAggregateCopy(ReturnValue, RV.getAggregateAddress(), Ty);
+    LValue Dest = MakeAddrLValue(ReturnValue, Ty);
+    LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty);
+    EmitAggregateCopy(Dest, Src, Ty);
   } else {
     EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
                        /*init*/ true);
@@ -2122,7 +2133,8 @@
   llvm::InlineAsm *IA =
     llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
                          /* IsAlignStack */ false, AsmDialect);
-  llvm::CallInst *Result = Builder.CreateCall(IA, Args);
+  llvm::CallInst *Result =
+      Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
   Result->addAttribute(llvm::AttributeList::FunctionIndex,
                        llvm::Attribute::NoUnwind);
 
@@ -2149,10 +2161,11 @@
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
-    // Conservatively, mark all inline asm blocks in CUDA as convergent
-    // (meaning, they may call an intrinsically convergent op, such as bar.sync,
-    // and so can't have certain optimizations applied around them).
+  if (getLangOpts().assumeFunctionsAreConvergent()) {
+    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as bar.sync, and so can't have certain optimizations applied around
+    // them).
     Result->addAttribute(llvm::AttributeList::FunctionIndex,
                          llvm::Attribute::Convergent);
   }
@@ -2267,7 +2280,6 @@
   Args.append(CD->param_begin(), CD->param_end());
 
   // Create the function declaration.
-  FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
     CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index cdec3e3..d9b4fa8 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -53,34 +53,35 @@
   }
 
 public:
-  OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
-                  bool AsInlined = false, bool EmitPreInitStmt = true)
+  OMPLexicalScope(
+      CodeGenFunction &CGF, const OMPExecutableDirective &S,
+      const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
+      const bool EmitPreInitStmt = true)
       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
         InlinedShareds(CGF) {
     if (EmitPreInitStmt)
       emitPreInitStmt(CGF, S);
-    if (AsInlined) {
-      if (S.hasAssociatedStmt()) {
-        auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
-        for (auto &C : CS->captures()) {
-          if (C.capturesVariable() || C.capturesVariableByCopy()) {
-            auto *VD = C.getCapturedVar();
-            assert(VD == VD->getCanonicalDecl() &&
-                        "Canonical decl must be captured.");
-            DeclRefExpr DRE(const_cast<VarDecl *>(VD),
-                            isCapturedVar(CGF, VD) ||
-                                (CGF.CapturedStmtInfo &&
-                                 InlinedShareds.isGlobalVarCaptured(VD)),
-                            VD->getType().getNonReferenceType(), VK_LValue,
-                            SourceLocation());
-            InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
-              return CGF.EmitLValue(&DRE).getAddress();
-            });
-          }
-        }
-        (void)InlinedShareds.Privatize();
+    if (!CapturedRegion.hasValue())
+      return;
+    assert(S.hasAssociatedStmt() &&
+           "Expected associated statement for inlined directive.");
+    const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
+    for (auto &C : CS->captures()) {
+      if (C.capturesVariable() || C.capturesVariableByCopy()) {
+        auto *VD = C.getCapturedVar();
+        assert(VD == VD->getCanonicalDecl() &&
+               "Canonical decl must be captured.");
+        DeclRefExpr DRE(
+            const_cast<VarDecl *>(VD),
+            isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
+                                       InlinedShareds.isGlobalVarCaptured(VD)),
+            VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
+        InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+          return CGF.EmitLValue(&DRE).getAddress();
+        });
       }
     }
+    (void)InlinedShareds.Privatize();
   }
 };
 
@@ -96,9 +97,8 @@
 
 public:
   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
-      : OMPLexicalScope(CGF, S,
-                        /*AsInlined=*/false,
-                        /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+                        EmitPreInitStmt(S)) {}
 };
 
 /// Lexical scope for OpenMP teams construct, that handles correct codegen
@@ -112,20 +112,25 @@
 
 public:
   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
-      : OMPLexicalScope(CGF, S,
-                        /*AsInlined=*/false,
-                        /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
+                        EmitPreInitStmt(S)) {}
 };
 
 /// Private scope for OpenMP loop-based directives, that supports capturing
 /// of used expression from loop statement.
 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
-    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
-      if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
-        for (const auto *I : PreInits->decls())
-          CGF.EmitVarDecl(cast<VarDecl>(*I));
-      }
+    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
+    for (auto *E : S.counters()) {
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+      (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
+        return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
+      });
+    }
+    (void)PreCondScope.Privatize();
+    if (auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
+      for (const auto *I : PreInits->decls())
+        CGF.EmitVarDecl(cast<VarDecl>(*I));
     }
   }
 
@@ -136,8 +141,77 @@
   }
 };
 
+class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
+  CodeGenFunction::OMPPrivateScope InlinedShareds;
+
+  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
+    return CGF.LambdaCaptureFields.lookup(VD) ||
+           (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
+           (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
+            cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
+  }
+
+public:
+  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
+        InlinedShareds(CGF) {
+    for (const auto *C : S.clauses()) {
+      if (auto *CPI = OMPClauseWithPreInit::get(C)) {
+        if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
+          for (const auto *I : PreInit->decls()) {
+            if (!I->hasAttr<OMPCaptureNoInitAttr>())
+              CGF.EmitVarDecl(cast<VarDecl>(*I));
+            else {
+              CodeGenFunction::AutoVarEmission Emission =
+                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+              CGF.EmitAutoVarCleanups(Emission);
+            }
+          }
+        }
+      } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
+        for (const Expr *E : UDP->varlists()) {
+          const Decl *D = cast<DeclRefExpr>(E)->getDecl();
+          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
+            CGF.EmitVarDecl(*OED);
+        }
+      }
+    }
+    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+      CGF.EmitOMPPrivateClause(S, InlinedShareds);
+    if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
+      if (const Expr *E = TG->getReductionRef())
+        CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
+    }
+    const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
+    while (CS) {
+      for (auto &C : CS->captures()) {
+        if (C.capturesVariable() || C.capturesVariableByCopy()) {
+          auto *VD = C.getCapturedVar();
+          assert(VD == VD->getCanonicalDecl() &&
+                 "Canonical decl must be captured.");
+          DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                          isCapturedVar(CGF, VD) ||
+                              (CGF.CapturedStmtInfo &&
+                               InlinedShareds.isGlobalVarCaptured(VD)),
+                          VD->getType().getNonReferenceType(), VK_LValue,
+                          C.getLocation());
+          InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+            return CGF.EmitLValue(&DRE).getAddress();
+          });
+        }
+      }
+      CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
+    }
+    (void)InlinedShareds.Privatize();
+  }
+};
+
 } // namespace
 
+static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &S,
+                                         const RegionCodeGenTy &CodeGen);
+
 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
@@ -161,9 +235,10 @@
   if (SizeInChars.isZero()) {
     // getTypeSizeInChars() returns 0 for a VLA.
     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
-      llvm::Value *ArraySize;
-      std::tie(ArraySize, Ty) = getVLASize(VAT);
-      Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
+      auto VlaSize = getVLASize(VAT);
+      Ty = VlaSize.Type;
+      Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
+                  : VlaSize.NumElts;
     }
     SizeInChars = C.getTypeSizeInChars(Ty);
     if (SizeInChars.isZero())
@@ -189,8 +264,7 @@
     } else if (CurCap->capturesThis())
       CapturedVars.push_back(CXXThisValue);
     else if (CurCap->capturesVariableByCopy()) {
-      llvm::Value *CV =
-          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
+      llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
 
       // If the field is not a pointer, we need to save the actual value
       // and load it as a void pointer.
@@ -203,7 +277,7 @@
 
         auto *SrcAddrVal = EmitScalarConversion(
             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
-            Ctx.getPointerType(CurField->getType()), SourceLocation());
+            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
         LValue SrcLV =
             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
 
@@ -211,7 +285,7 @@
         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
 
         // Load the value using the destination type pointer.
-        CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+        CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
       }
       CapturedVars.push_back(CV);
     } else {
@@ -221,14 +295,15 @@
   }
 }
 
-static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
-                                    StringRef Name, LValue AddrLV,
+static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
+                                    QualType DstType, StringRef Name,
+                                    LValue AddrLV,
                                     bool isReferenceType = false) {
   ASTContext &Ctx = CGF.getContext();
 
-  auto *CastedPtr = CGF.EmitScalarConversion(
-      AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
-      Ctx.getPointerType(DstType), SourceLocation());
+  auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(),
+                                             Ctx.getUIntPtrType(),
+                                             Ctx.getPointerType(DstType), Loc);
   auto TmpAddr =
       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
           .getAddress();
@@ -254,6 +329,12 @@
   }
   if (T->isPointerType())
     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
+  if (auto *A = T->getAsArrayTypeUnsafe()) {
+    if (auto *VLA = dyn_cast<VariableArrayType>(A))
+      return getCanonicalParamType(C, VLA->getElementType());
+    else if (!A->isVariablyModifiedType())
+      return C.getCanonicalType(T);
+  }
   return C.getCanonicalParamType(T);
 }
 
@@ -301,6 +382,16 @@
       CD->param_begin(),
       std::next(CD->param_begin(), CD->getContextParamPosition()));
   auto I = FO.S->captures().begin();
+  FunctionDecl *DebugFunctionDecl = nullptr;
+  if (!FO.UIntPtrCastRequired) {
+    FunctionProtoType::ExtProtoInfo EPI;
+    DebugFunctionDecl = FunctionDecl::Create(
+        Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
+        SourceLocation(), DeclarationName(), Ctx.VoidTy,
+        Ctx.getTrivialTypeSourceInfo(
+            Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
+        SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
+  }
   for (auto *FD : RD->fields()) {
     QualType ArgType = FD->getType();
     IdentifierInfo *II = nullptr;
@@ -327,10 +418,18 @@
       II = &Ctx.Idents.get("vla");
     }
     if (ArgType->isVariablyModifiedType())
-      ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
-    auto *Arg =
-        ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
-                                  ArgType, ImplicitParamDecl::Other);
+      ArgType = getCanonicalParamType(Ctx, ArgType);
+    VarDecl *Arg;
+    if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
+      Arg = ParmVarDecl::Create(
+          Ctx, DebugFunctionDecl,
+          CapVar ? CapVar->getLocStart() : FD->getLocStart(),
+          CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
+          /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
+    } else {
+      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
+                                      II, ArgType, ImplicitParamDecl::Other);
+    }
     Args.emplace_back(Arg);
     // Do not cast arguments if we emit function with non-original types.
     TargetArgs.emplace_back(
@@ -347,7 +446,6 @@
       CD->param_end());
 
   // Create the function declaration.
-  FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
@@ -396,13 +494,12 @@
                                         AlignmentSource::Decl);
     if (FD->hasCapturedVLAType()) {
       if (FO.UIntPtrCastRequired) {
-        ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
-                                                          Args[Cnt]->getName(),
-                                                          ArgLVal),
-                                     FD->getType(), AlignmentSource::Decl);
+        ArgLVal = CGF.MakeAddrLValue(
+            castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
+                                 Args[Cnt]->getName(), ArgLVal),
+            FD->getType(), AlignmentSource::Decl);
       }
-      auto *ExprArg =
-          CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
+      auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
       auto VAT = FD->getCapturedVLAType();
       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
     } else if (I->capturesVariable()) {
@@ -411,8 +508,7 @@
       Address ArgAddr = ArgLVal.getAddress();
       if (!VarTy->isReferenceType()) {
         if (ArgLVal.getType()->isLValueReferenceType()) {
-          ArgAddr = CGF.EmitLoadOfReference(
-              ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+          ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
           assert(ArgLVal.getType()->isPointerType());
           ArgAddr = CGF.EmitLoadOfPointer(
@@ -431,16 +527,15 @@
       QualType VarTy = Var->getType();
       LocalAddrs.insert(
           {Args[Cnt],
-           {Var,
-            FO.UIntPtrCastRequired
-                ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
-                                       ArgLVal, VarTy->isReferenceType())
-                : ArgLVal.getAddress()}});
+           {Var, FO.UIntPtrCastRequired
+                     ? castValueFromUintptr(CGF, I->getLocation(),
+                                            FD->getType(), Args[Cnt]->getName(),
+                                            ArgLVal, VarTy->isReferenceType())
+                     : ArgLVal.getAddress()}});
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
-      CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
-                         .getScalarVal();
+      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
     }
     ++Cnt;
@@ -505,7 +600,7 @@
           I->second.second,
           I->second.first ? I->second.first->getType() : Arg->getType(),
           AlignmentSource::Decl);
-      CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+      CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
     } else {
       auto EI = VLASizes.find(Arg);
       if (EI != VLASizes.end())
@@ -514,7 +609,7 @@
         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
                                               Arg->getType(),
                                               AlignmentSource::Decl);
-        CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+        CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
       }
     }
     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
@@ -596,7 +691,9 @@
     auto *BO = dyn_cast<BinaryOperator>(Copy);
     if (BO && BO->getOpcode() == BO_Assign) {
       // Perform simple memcpy for simple copying.
-      EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
+      LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
+      LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
+      EmitAggregateAssign(Dest, Src, OriginalType);
     } else {
       // For arrays with complex element types perform element by element
       // copying.
@@ -639,7 +736,12 @@
           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
   }
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
-  CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
+  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
+  // Force emission of the firstprivate copy if the directive does not emit
+  // outlined function, like omp for, omp simd, omp distribute etc.
+  bool MustEmitFirstprivateCopy =
+      CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
@@ -647,9 +749,8 @@
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       bool ThisFirstprivateIsLastprivate =
           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
-      auto *CapFD = CapturesInfo.lookup(OrigVD);
       auto *FD = CapturedStmtInfo->lookup(OrigVD);
-      if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
+      if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
           !FD->getType()->isReferenceType()) {
         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
         ++IRef;
@@ -665,7 +766,8 @@
         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
-        Address OriginalAddr = EmitLValue(&DRE).getAddress();
+        LValue OriginalLVal = EmitLValue(&DRE);
+        Address OriginalAddr = OriginalLVal.getAddress();
         QualType Type = VD->getType();
         if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
@@ -676,8 +778,9 @@
             auto *Init = VD->getInit();
             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
               // Perform simple memcpy.
-              EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
-                                  Type);
+              LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(),
+                                           Type);
+              EmitAggregateAssign(Dest, OriginalLVal, Type);
             } else {
               EmitOMPAggregateAssign(
                   Emission.getAllocatedAddress(), OriginalAddr, Type,
@@ -829,7 +932,8 @@
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
-    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
+    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
+        !getLangOpts().OpenMPSimd)
       break;
     auto IRef = C->varlist_begin();
     auto IDestRef = C->destination_exprs().begin();
@@ -996,7 +1100,9 @@
 
     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-    if (isa<OMPArraySectionExpr>(IRef)) {
+    QualType Type = PrivateVD->getType();
+    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
+    if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
       // Store the address of the original variable associated with the LHS
       // implicit variable.
       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
@@ -1005,7 +1111,8 @@
       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
         return GetAddrOfLocalVar(PrivateVD);
       });
-    } else if (isa<ArraySubscriptExpr>(IRef)) {
+    } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
+               isa<ArraySubscriptExpr>(IRef)) {
       // Store the address of the original variable associated with the LHS
       // implicit variable.
       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
@@ -1063,8 +1170,8 @@
   if (HasAtLeastOneReduction) {
     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
-                      D.getDirectiveKind() == OMPD_simd;
-    bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
+                      ReductionKind == OMPD_simd;
+    bool SimpleReduction = ReductionKind == OMPD_simd;
     // Emit nowait reduction if nowait clause is present or directive is a
     // parallel directive (it always has implicit barrier).
     CGM.getOpenMPRuntime().emitReduction(
@@ -1169,7 +1276,7 @@
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
@@ -1186,9 +1293,13 @@
     EmitIgnoredExpr(I);
   }
   // Update the linear variables.
-  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
-    for (auto *U : C->updates())
-      EmitIgnoredExpr(U);
+  // In distribute directives only loop counters may be marked as linear, no
+  // need to generate the code for them.
+  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+    for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
+      for (auto *U : C->updates())
+        EmitIgnoredExpr(U);
+    }
   }
 
   // On a continue in the body, jump to the end.
@@ -1528,85 +1639,106 @@
   CGF.EmitStopPoint(&S);
 }
 
-void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-    OMPLoopScope PreInitScope(CGF, S);
-    // if (PreCond) {
-    //   for (IV in 0..LastIteration) BODY;
-    //   <Final counter/linear vars updates>;
-    // }
-    //
+/// Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+                               const DeclRefExpr *Helper) {
+  auto VDecl = cast<VarDecl>(Helper->getDecl());
+  CGF.EmitVarDecl(*VDecl);
+  return CGF.EmitLValue(Helper);
+}
 
-    // Emit: if (PreCond) - begin.
-    // If the condition constant folds and can be elided, avoid emitting the
-    // whole loop.
-    bool CondConstant;
-    llvm::BasicBlock *ContBlock = nullptr;
-    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
-      if (!CondConstant)
-        return;
-    } else {
-      auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
-      ContBlock = CGF.createBasicBlock("simd.if.end");
-      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
-                  CGF.getProfileCount(&S));
-      CGF.EmitBlock(ThenBlock);
-      CGF.incrementProfileCounter(&S);
-    }
+static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
+                              PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
+         "Expected simd directive");
+  OMPLoopScope PreInitScope(CGF, S);
+  // if (PreCond) {
+  //   for (IV in 0..LastIteration) BODY;
+  //   <Final counter/linear vars updates>;
+  // }
+  //
+  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
+      isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
+      isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
+    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+  }
 
-    // Emit the loop iteration variable.
-    const Expr *IVExpr = S.getIterationVariable();
-    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
-    CGF.EmitVarDecl(*IVDecl);
-    CGF.EmitIgnoredExpr(S.getInit());
+  // Emit: if (PreCond) - begin.
+  // If the condition constant folds and can be elided, avoid emitting the
+  // whole loop.
+  bool CondConstant;
+  llvm::BasicBlock *ContBlock = nullptr;
+  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+    if (!CondConstant)
+      return;
+  } else {
+    auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
+    ContBlock = CGF.createBasicBlock("simd.if.end");
+    emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+                CGF.getProfileCount(&S));
+    CGF.EmitBlock(ThenBlock);
+    CGF.incrementProfileCounter(&S);
+  }
 
-    // Emit the iterations count variable.
-    // If it is not a variable, Sema decided to calculate iterations count on
-    // each iteration (e.g., it is foldable into a constant).
-    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
-      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
-      // Emit calculation of the iterations count.
-      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
-    }
+  // Emit the loop iteration variable.
+  const Expr *IVExpr = S.getIterationVariable();
+  const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+  CGF.EmitVarDecl(*IVDecl);
+  CGF.EmitIgnoredExpr(S.getInit());
 
-    CGF.EmitOMPSimdInit(S);
+  // Emit the iterations count variable.
+  // If it is not a variable, Sema decided to calculate iterations count on
+  // each iteration (e.g., it is foldable into a constant).
+  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+    CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+    // Emit calculation of the iterations count.
+    CGF.EmitIgnoredExpr(S.getCalcLastIteration());
+  }
 
-    emitAlignedClause(CGF, S);
-    (void)CGF.EmitOMPLinearClauseInit(S);
-    {
-      OMPPrivateScope LoopScope(CGF);
-      CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
-      CGF.EmitOMPLinearClause(S, LoopScope);
-      CGF.EmitOMPPrivateClause(S, LoopScope);
-      CGF.EmitOMPReductionClauseInit(S, LoopScope);
-      bool HasLastprivateClause =
-          CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
-      (void)LoopScope.Privatize();
-      CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
-                           S.getInc(),
-                           [&S](CodeGenFunction &CGF) {
-                             CGF.EmitOMPLoopBody(S, JumpDest());
-                             CGF.EmitStopPoint(&S);
-                           },
-                           [](CodeGenFunction &) {});
-      CGF.EmitOMPSimdFinal(
-          S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
-      // Emit final copy of the lastprivate variables at the end of loops.
-      if (HasLastprivateClause)
-        CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
-      CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
-      emitPostUpdateForReductionClause(
-          CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
-    }
-    CGF.EmitOMPLinearClauseFinal(
+  CGF.EmitOMPSimdInit(S);
+
+  emitAlignedClause(CGF, S);
+  (void)CGF.EmitOMPLinearClauseInit(S);
+  {
+    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
+    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+    CGF.EmitOMPLinearClause(S, LoopScope);
+    CGF.EmitOMPPrivateClause(S, LoopScope);
+    CGF.EmitOMPReductionClauseInit(S, LoopScope);
+    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+    (void)LoopScope.Privatize();
+    CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+    CGF.EmitOMPSimdFinal(
         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
-    // Emit: if (PreCond) - end.
-    if (ContBlock) {
-      CGF.EmitBranch(ContBlock);
-      CGF.EmitBlock(ContBlock, true);
-    }
+    // Emit final copy of the lastprivate variables at the end of loops.
+    if (HasLastprivateClause)
+      CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
+    emitPostUpdateForReductionClause(
+        CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+  }
+  CGF.EmitOMPLinearClauseFinal(
+      S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+  // Emit: if (PreCond) - end.
+  if (ContBlock) {
+    CGF.EmitBranch(ContBlock);
+    CGF.EmitBlock(ContBlock, true);
+  }
+}
+
+void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitOMPSimdRegion(CGF, S, Action);
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 }
 
@@ -1884,14 +2016,6 @@
                    emitEmptyOrdered);
 }
 
-/// Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
-                               const DeclRefExpr *Helper) {
-  auto VDecl = cast<VarDecl>(Helper->getDecl());
-  CGF.EmitVarDecl(*VDecl);
-  return CGF.EmitLValue(Helper);
-}
-
 static std::pair<LValue, LValue>
 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &S) {
@@ -1909,14 +2033,18 @@
   // the current ones.
   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
-  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
+  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
+      PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
   PrevLBVal = CGF.EmitScalarConversion(
       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
-      LS.getIterationVariable()->getType(), SourceLocation());
-  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
+      LS.getIterationVariable()->getType(),
+      LS.getPrevLowerBoundVariable()->getExprLoc());
+  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
+      PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
   PrevUBVal = CGF.EmitScalarConversion(
       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
-      LS.getIterationVariable()->getType(), SourceLocation());
+      LS.getIterationVariable()->getType(),
+      LS.getPrevUpperBoundVariable()->getExprLoc());
 
   CGF.EmitStoreOfScalar(PrevLBVal, LB);
   CGF.EmitStoreOfScalar(PrevUBVal, UB);
@@ -1942,10 +2070,10 @@
   // is not normalized as each team only executes its own assigned
   // distribute chunk
   QualType IteratorTy = IVExpr->getType();
-  llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
-                                            SourceLocation());
-  llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
-                                            SourceLocation());
+  llvm::Value *LBVal =
+      CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
+  llvm::Value *UBVal =
+      CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
   return {LBVal, UBVal};
 }
 
@@ -1972,13 +2100,27 @@
                                  CodeGenFunction::JumpDest LoopExit) {
   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
                                          PrePostActionTy &) {
+    bool HasCancel = false;
+    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
+      if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
+        HasCancel = D->hasCancel();
+      else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
+        HasCancel = D->hasCancel();
+      else if (const auto *D =
+                   dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
+        HasCancel = D->hasCancel();
+    }
+    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
+                                                     HasCancel);
     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
                                emitDistributeParallelForInnerBounds,
                                emitDistributeParallelForDispatchBounds);
   };
 
   emitCommonOMPParallelDirective(
-      CGF, S, OMPD_for, CGInlinedWorksharingLoop,
+      CGF, S,
+      isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
+      CGInlinedWorksharingLoop,
       emitDistributeParallelForDistributeInnerBoundParams);
 }
 
@@ -1988,138 +2130,49 @@
     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
                               S.getDistInc());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
-                                  /*HasCancel=*/false);
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
-                                              /*HasCancel=*/false);
+  OMPLexicalScope Scope(*this, S, OMPD_parallel);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
     const OMPDistributeParallelForSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_distribute_parallel_for_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+                              S.getDistInc());
+  };
+  OMPLexicalScope Scope(*this, S, OMPD_parallel);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPDistributeSimdDirective(
     const OMPDistributeSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_distribute_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+  };
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
-    const OMPTargetParallelForSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_parallel_for_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
+  // Emit SPMD target parallel for region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitOMPSimdRegion(CGF, S, Action);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
 }
 
 void CodeGenFunction::EmitOMPTargetSimdDirective(
     const OMPTargetSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
-    const OMPTeamsDistributeSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_teams_distribute_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
-    const OMPTeamsDistributeParallelForSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_teams_distribute_parallel_for_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
-    const OMPTeamsDistributeParallelForDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_teams_distribute_parallel_for,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
-    const OMPTargetTeamsDistributeDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_teams_distribute,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
-    const OMPTargetTeamsDistributeParallelForDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_teams_distribute_parallel_for,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
-    const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_teams_distribute_parallel_for_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
-}
-
-void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
-    const OMPTargetTeamsDistributeSimdDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_teams_distribute_simd,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitOMPSimdRegion(CGF, S, Action);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
 }
 
 namespace {
@@ -2175,6 +2228,7 @@
       incrementProfileCounter(&S);
     }
 
+    RunCleanupsScope DoacrossCleanupScope(*this);
     bool Ordered = false;
     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
       if (OrderedClause->getNumForLoops())
@@ -2311,6 +2365,7 @@
       return CGF.Builder.CreateIsNotNull(
           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
     });
+    DoacrossCleanupScope.ForceCleanup();
     // We're now done with the loop, so jump to the continuation block.
     if (ContBlock) {
       EmitBranch(ContBlock);
@@ -2358,7 +2413,7 @@
                                                  emitDispatchForLoopBounds);
   };
   {
-    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    OMPLexicalScope Scope(*this, S, OMPD_unknown);
     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
                                                 S.hasCancel());
   }
@@ -2378,7 +2433,7 @@
                                                  emitDispatchForLoopBounds);
   };
   {
-    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    OMPLexicalScope Scope(*this, S, OMPD_unknown);
     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
   }
 
@@ -2398,8 +2453,8 @@
 }
 
 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
-  auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
-  auto *CS = dyn_cast<CompoundStmt>(Stmt);
+  const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
+  const auto *CS = dyn_cast<CompoundStmt>(Stmt);
   bool HasLastprivates = false;
   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
                                                     PrePostActionTy &) {
@@ -2427,7 +2482,7 @@
                         OK_Ordinary, S.getLocStart(), FPOptions());
     // Increment for loop counter.
     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
-                      S.getLocStart());
+                      S.getLocStart(), true);
     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
       // Iterate through all sections and emit a switch construct:
       // switch (IV) {
@@ -2441,9 +2496,9 @@
       // }
       // .omp.sections.exit:
       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
-      auto *SwitchStmt = CGF.Builder.CreateSwitch(
-          CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
-          CS == nullptr ? 1 : CS->size());
+      auto *SwitchStmt =
+          CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
+                                   ExitBB, CS == nullptr ? 1 : CS->size());
       if (CS) {
         unsigned CaseNumber = 0;
         for (auto *SubStmt : CS->children()) {
@@ -2539,7 +2594,7 @@
 
 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
   {
-    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    OMPLexicalScope Scope(*this, S, OMPD_unknown);
     EmitSections(S);
   }
   // Emit an implicit barrier at the end.
@@ -2551,9 +2606,9 @@
 
 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
                                               S.hasCancel());
 }
@@ -2582,10 +2637,10 @@
     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
     CGF.EmitOMPPrivateClause(S, SingleScope);
     (void)SingleScope.Privatize();
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
   };
   {
-    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    OMPLexicalScope Scope(*this, S, OMPD_unknown);
     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
                                             CopyprivateVars, DestExprs,
                                             SrcExprs, AssignmentOps);
@@ -2602,21 +2657,21 @@
 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
   };
   Expr *Hint = nullptr;
   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
     Hint = HintClause->getHint();
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
                                             S.getDirectiveName().getAsString(),
                                             CodeGen, S.getLocStart(), Hint);
@@ -2658,12 +2713,12 @@
                                  emitEmptyBoundParameters);
 }
 
-void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
-                                                const RegionCodeGenTy &BodyGen,
-                                                const TaskGenTy &TaskGen,
-                                                OMPTaskDataTy &Data) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(
+    const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
+    const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
+    OMPTaskDataTy &Data) {
   // Emit outlined function for task construct.
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
   auto *I = CS->getCapturedDecl()->param_begin();
   auto *PartId = std::next(I);
   auto *TaskT = std::next(I, 4);
@@ -2764,8 +2819,9 @@
   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
     for (auto *IRef : C->varlists())
       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
-  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
-      CodeGenFunction &CGF, PrePostActionTy &Action) {
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
+                    CapturedRegion](CodeGenFunction &CGF,
+                                    PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
@@ -2822,7 +2878,7 @@
       }
     }
     if (Data.Reductions) {
-      OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
+      OMPLexicalScope LexScope(CGF, S, CapturedRegion);
       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
                              Data.ReductionOps);
       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
@@ -2837,7 +2893,7 @@
                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
                         CGF.getContext().getPointerType(
                             Data.ReductionCopies[Cnt]->getType()),
-                        SourceLocation()),
+                        Data.ReductionCopies[Cnt]->getExprLoc()),
                     Replacement.getAlignment());
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
@@ -2879,15 +2935,16 @@
         RedCG.emitAggregateType(CGF, Cnt);
         // The taskgroup descriptor variable is always implicit firstprivate and
         // privatized already during procoessing of the firstprivates.
-        llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
-            CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
+        llvm::Value *ReductionsPtr =
+            CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
+                                 TaskgroupDescriptors[Cnt]->getExprLoc());
         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
         Replacement = Address(
             CGF.EmitScalarConversion(
                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
-                SourceLocation()),
+                InRedPrivs[Cnt]->getExprLoc()),
             Replacement.getAlignment());
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
@@ -2911,9 +2968,153 @@
   TaskGen(*this, OutlinedFn, Data);
 }
 
+static ImplicitParamDecl *
+createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
+                                  QualType Ty, CapturedDecl *CD,
+                                  SourceLocation Loc) {
+  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
+                                           ImplicitParamDecl::Other);
+  auto *OrigRef = DeclRefExpr::Create(
+      C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
+      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
+  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
+                                              ImplicitParamDecl::Other);
+  auto *PrivateRef = DeclRefExpr::Create(
+      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
+      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
+  QualType ElemType = C.getBaseElementType(Ty);
+  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
+                                           ImplicitParamDecl::Other);
+  auto *InitRef = DeclRefExpr::Create(
+      C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
+      /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
+  PrivateVD->setInitStyle(VarDecl::CInit);
+  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
+                                              InitRef, /*BasePath=*/nullptr,
+                                              VK_RValue));
+  Data.FirstprivateVars.emplace_back(OrigRef);
+  Data.FirstprivateCopies.emplace_back(PrivateRef);
+  Data.FirstprivateInits.emplace_back(InitRef);
+  return OrigVD;
+}
+
+void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
+    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
+    OMPTargetDataInfo &InputInfo) {
+  // Emit outlined function for task construct.
+  auto CS = S.getCapturedStmt(OMPD_task);
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  auto *I = CS->getCapturedDecl()->param_begin();
+  auto *PartId = std::next(I);
+  auto *TaskT = std::next(I, 4);
+  OMPTaskDataTy Data;
+  // The task is not final.
+  Data.Final.setInt(/*IntVal=*/false);
+  // Get list of firstprivate variables.
+  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
+    auto IRef = C->varlist_begin();
+    auto IElemInitRef = C->inits().begin();
+    for (auto *IInit : C->private_copies()) {
+      Data.FirstprivateVars.push_back(*IRef);
+      Data.FirstprivateCopies.push_back(IInit);
+      Data.FirstprivateInits.push_back(*IElemInitRef);
+      ++IRef;
+      ++IElemInitRef;
+    }
+  }
+  OMPPrivateScope TargetScope(*this);
+  VarDecl *BPVD = nullptr;
+  VarDecl *PVD = nullptr;
+  VarDecl *SVD = nullptr;
+  if (InputInfo.NumberOfTargetItems > 0) {
+    auto *CD = CapturedDecl::Create(
+        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
+    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
+    QualType BaseAndPointersType = getContext().getConstantArrayType(
+        getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
+        /*IndexTypeQuals=*/0);
+    BPVD = createImplicitFirstprivateForType(
+        getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
+    PVD = createImplicitFirstprivateForType(
+        getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
+    QualType SizesType = getContext().getConstantArrayType(
+        getContext().getSizeType(), ArrSize, ArrayType::Normal,
+        /*IndexTypeQuals=*/0);
+    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
+                                            S.getLocStart());
+    TargetScope.addPrivate(
+        BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
+    TargetScope.addPrivate(PVD,
+                           [&InputInfo]() { return InputInfo.PointersArray; });
+    TargetScope.addPrivate(SVD,
+                           [&InputInfo]() { return InputInfo.SizesArray; });
+  }
+  (void)TargetScope.Privatize();
+  // Build list of dependences.
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
+    for (auto *IRef : C->varlists())
+      Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
+                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    // Set proper addresses for generated private copies.
+    OMPPrivateScope Scope(CGF);
+    if (!Data.FirstprivateVars.empty()) {
+      enum { PrivatesParam = 2, CopyFnParam = 3 };
+      auto *CopyFn = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
+      auto *PrivatesPtr = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
+      // Map privates.
+      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
+      llvm::SmallVector<llvm::Value *, 16> CallArgs;
+      CallArgs.push_back(PrivatesPtr);
+      for (auto *E : Data.FirstprivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        Address PrivatePtr =
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".firstpriv.ptr.addr");
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr.getPointer());
+      }
+      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
+                                                          CopyFn, CallArgs);
+      for (auto &&Pair : PrivatePtrs) {
+        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
+                            CGF.getContext().getDeclAlign(Pair.first));
+        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
+      }
+    }
+    // Privatize all private variables except for in_reduction items.
+    (void)Scope.Privatize();
+    if (InputInfo.NumberOfTargetItems > 0) {
+      InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
+          CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
+      InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
+          CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
+      InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
+          CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
+    }
+
+    Action.Enter(CGF);
+    OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
+    BodyGen(CGF);
+  };
+  auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
+      Data.NumberOfParts);
+  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
+  IntegerLiteral IfCond(getContext(), TrueOrFalse,
+                        getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+                        SourceLocation());
+
+  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
+                                      SharedsTy, CapturedStruct, &IfCond, Data);
+}
+
 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   // Emit outlined function for task construct.
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   const Expr *IfCond = nullptr;
@@ -2938,7 +3139,7 @@
                                             SharedsTy, CapturedStruct, IfCond,
                                             Data);
   };
-  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -2987,9 +3188,9 @@
       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
                             /*Volatile=*/false, E->getType());
     }
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
 }
 
@@ -3043,6 +3244,7 @@
       incrementProfileCounter(&S);
     }
 
+    emitAlignedClause(*this, S);
     // Emit 'then' code.
     {
       // Emit helper vars inits.
@@ -3064,14 +3266,18 @@
 
       OMPPrivateScope LoopScope(*this);
       if (EmitOMPFirstprivateClause(S, LoopScope)) {
-        // Emit implicit barrier to synchronize threads and avoid data races on
-        // initialization of firstprivate variables and post-update of
+        // Emit implicit barrier to synchronize threads and avoid data races
+        // on initialization of firstprivate variables and post-update of
         // lastprivate variables.
         CGM.getOpenMPRuntime().emitBarrierCall(
-          *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
-          /*ForceSimpleCall=*/true);
+            *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+            /*ForceSimpleCall=*/true);
       }
       EmitOMPPrivateClause(S, LoopScope);
+      if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
+          !isOpenMPParallelDirective(S.getDirectiveKind()) &&
+          !isOpenMPTeamsDirective(S.getDirectiveKind()))
+        EmitOMPReductionClauseInit(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPPrivateLoopCounters(S, LoopScope);
       (void)LoopScope.Privatize();
@@ -3084,8 +3290,8 @@
         if (const auto *Ch = C->getChunkSize()) {
           Chunk = EmitScalarExpr(Ch);
           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
-          S.getIterationVariable()->getType(),
-          S.getLocStart());
+                                       S.getIterationVariable()->getType(),
+                                       S.getLocStart());
         }
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
@@ -3101,6 +3307,8 @@
       // league. The size of the chunks is unspecified in this case.
       if (RT.isStaticNonchunked(ScheduleKind,
                                 /* Chunked */ Chunk != nullptr)) {
+        if (isOpenMPSimdDirective(S.getDirectiveKind()))
+          EmitOMPSimdInit(S, /*IsMonotonic=*/true);
         CGOpenMPRuntime::StaticRTInput StaticInit(
             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
             LB.getAddress(), UB.getAddress(), ST.getAddress());
@@ -3142,13 +3350,42 @@
         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
                                    CodeGenLoop);
       }
-
+      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+        EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+          return CGF.Builder.CreateIsNotNull(
+              CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+        });
+      }
+      if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
+          !isOpenMPParallelDirective(S.getDirectiveKind()) &&
+          !isOpenMPTeamsDirective(S.getDirectiveKind())) {
+        OpenMPDirectiveKind ReductionKind = OMPD_unknown;
+        if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
+            isOpenMPSimdDirective(S.getDirectiveKind())) {
+          ReductionKind = OMPD_parallel_for_simd;
+        } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
+          ReductionKind = OMPD_parallel_for;
+        } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+          ReductionKind = OMPD_simd;
+        } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
+                   S.hasClausesOfKind<OMPReductionClause>()) {
+          llvm_unreachable(
+              "No reduction clauses is allowed in distribute directive.");
+        }
+        EmitOMPReductionClauseFinal(S, ReductionKind);
+        // Emit post-update of the reduction variables if IsLastIter != 0.
+        emitPostUpdateForReductionClause(
+            *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+              return CGF.Builder.CreateIsNotNull(
+                  CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+            });
+      }
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
-      if (HasLastprivateClause)
+      if (HasLastprivateClause) {
         EmitOMPLastprivateClauseFinal(
             S, /*NoFinals=*/false,
-            Builder.CreateIsNotNull(
-                EmitLoadOfScalar(IL, S.getLocStart())));
+            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
+      }
     }
 
     // We're now done with the loop, so jump to the continuation block.
@@ -3165,9 +3402,8 @@
 
     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
-                                              false);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
 }
 
 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
@@ -3181,7 +3417,9 @@
 }
 
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
-  if (!S.getAssociatedStmt()) {
+  if (S.hasClausesOfKind<OMPDependClause>()) {
+    assert(!S.getAssociatedStmt() &&
+           "No associated statement must be in ordered depend construct.");
     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
     return;
@@ -3189,8 +3427,8 @@
   auto *C = S.getSingleClause<OMPSIMDClause>();
   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
                                  PrePostActionTy &Action) {
+    const CapturedStmt *CS = S.getInnermostCapturedStmt();
     if (C) {
-      auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
@@ -3198,11 +3436,10 @@
                                                       OutlinedFn, CapturedVars);
     } else {
       Action.Enter(CGF);
-      CGF.EmitStmt(
-          cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      CGF.EmitStmt(CS->getCapturedStmt());
     }
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
 }
 
@@ -3381,6 +3618,7 @@
   case BO_GE:
   case BO_EQ:
   case BO_NE:
+  case BO_Cmp:
   case BO_AddAssign:
   case BO_SubAssign:
   case BO_AndAssign:
@@ -3645,8 +3883,7 @@
     }
   }
 
-  const auto *CS =
-      S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
     enterFullExpression(EWC);
   }
@@ -3666,7 +3903,7 @@
                       S.getV(), S.getExpr(), S.getUpdateExpr(),
                       S.isXLHSInRHSPart(), S.getLocStart());
   };
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  OMPLexicalScope Scope(*this, S, OMPD_unknown);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
@@ -3675,7 +3912,6 @@
                                          const RegionCodeGenTy &CodeGen) {
   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
   CodeGenModule &CGM = CGF.CGM;
-  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
 
   llvm::Function *Fn = nullptr;
   llvm::Constant *FnID = nullptr;
@@ -3723,11 +3959,8 @@
   // Emit target region as a standalone region.
   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
                                                     IsOffloadEntry, CodeGen);
-  OMPLexicalScope Scope(CGF, S);
-  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
-  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
-  CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
-                                        CapturedVars);
+  OMPLexicalScope Scope(CGF, S, OMPD_task);
+  CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
 }
 
 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
@@ -3738,7 +3971,7 @@
   (void)PrivateScope.Privatize();
 
   Action.Enter(CGF);
-  CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
 }
 
 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
@@ -3795,10 +4028,10 @@
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
   };
-  emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
+  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
   emitPostUpdateForReductionClause(
       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
 }
@@ -3807,11 +4040,20 @@
                                   const OMPTargetTeamsDirective &S) {
   auto *CS = S.getCapturedStmt(OMPD_teams);
   Action.Enter(CGF);
-  auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
-    // TODO: Add support for clauses.
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.EmitStmt(CS->getCapturedStmt());
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
   };
   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
+  emitPostUpdateForReductionClause(
+      CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
 }
 
 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
@@ -3836,6 +4078,96 @@
   emitCommonOMPTargetDirective(*this, S, CodeGen);
 }
 
+static void
+emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
+                                const OMPTargetTeamsDistributeDirective &S) {
+  Action.Enter(CGF);
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
+                                                    CodeGenDistribute);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
+  emitPostUpdateForReductionClause(CGF, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetTeamsDistributeDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeRegion(CGF, Action, S);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
+    const OMPTargetTeamsDistributeDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeRegion(CGF, Action, S);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
+static void emitTargetTeamsDistributeSimdRegion(
+    CodeGenFunction &CGF, PrePostActionTy &Action,
+    const OMPTargetTeamsDistributeSimdDirective &S) {
+  Action.Enter(CGF);
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
+                                                    CodeGenDistribute);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
+  emitPostUpdateForReductionClause(CGF, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetTeamsDistributeSimdDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
+    const OMPTargetTeamsDistributeSimdDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
     const OMPTeamsDistributeDirective &S) {
 
@@ -3853,11 +4185,177 @@
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
   };
-  emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
+  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
   emitPostUpdateForReductionClause(*this, S,
                                    [](CodeGenFunction &) { return nullptr; });
 }
 
+void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
+    const OMPTeamsDistributeSimdDirective &S) {
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
+                                                    CodeGenDistribute);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
+  emitPostUpdateForReductionClause(*this, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
+    const OMPTeamsDistributeParallelForDirective &S) {
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+                              S.getDistInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
+                                                    CodeGenDistribute);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
+  emitPostUpdateForReductionClause(*this, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
+    const OMPTeamsDistributeParallelForSimdDirective &S) {
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+                              S.getDistInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
+    OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+        CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
+  emitPostUpdateForReductionClause(*this, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+static void emitTargetTeamsDistributeParallelForRegion(
+    CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
+    PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+                              S.getDistInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                                 PrePostActionTy &) {
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+        CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+
+  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
+                              CodeGenTeams);
+  emitPostUpdateForReductionClause(CGF, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetTeamsDistributeParallelForDirective &S) {
+  // Emit SPMD target teams distribute parallel for region as a standalone
+  // region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
+    const OMPTargetTeamsDistributeParallelForDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
+static void emitTargetTeamsDistributeParallelForSimdRegion(
+    CodeGenFunction &CGF,
+    const OMPTargetTeamsDistributeParallelForSimdDirective &S,
+    PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+                              S.getDistInc());
+  };
+
+  // Emit teams region as a standalone region.
+  auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+                                                 PrePostActionTy &) {
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+        CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+  };
+
+  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
+                              CodeGenTeams);
+  emitPostUpdateForReductionClause(CGF, S,
+                                   [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+  // Emit SPMD target teams distribute parallel for simd region as a standalone
+  // region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+    const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
 void CodeGenFunction::EmitOMPCancellationPointDirective(
     const OMPCancellationPointDirective &S) {
   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
@@ -3885,7 +4383,9 @@
   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
          Kind == OMPD_distribute_parallel_for ||
-         Kind == OMPD_target_parallel_for);
+         Kind == OMPD_target_parallel_for ||
+         Kind == OMPD_teams_distribute_parallel_for ||
+         Kind == OMPD_target_teams_distribute_parallel_for);
   return OMPCancelStack.getExitBlock();
 }
 
@@ -3973,10 +4473,9 @@
   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
 
   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
-      CodeGenFunction &CGF, PrePostActionTy &Action) {
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-      CGF.EmitStmt(
-          cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
     };
 
     // Codegen that selects wheather to generate the privatization code or not.
@@ -4058,6 +4557,7 @@
   if (auto *C = S.getSingleClause<OMPDeviceClause>())
     Device = C->getDevice();
 
+  OMPLexicalScope Scope(*this, S, OMPD_task);
   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
 }
 
@@ -4078,6 +4578,7 @@
   if (auto *C = S.getSingleClause<OMPDeviceClause>())
     Device = C->getDevice();
 
+  OMPLexicalScope Scope(*this, S, OMPD_task);
   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
 }
 
@@ -4125,16 +4626,81 @@
   emitCommonOMPTargetDirective(*this, S, CodeGen);
 }
 
+static void emitTargetParallelForRegion(CodeGenFunction &CGF,
+                                        const OMPTargetParallelForDirective &S,
+                                        PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  // Emit directive as a combined directive that consists of two implicit
+  // directives: 'parallel' with 'for' directive.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CodeGenFunction::OMPCancelStackRAII CancelRegion(
+        CGF, OMPD_target_parallel_for, S.hasCancel());
+    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+                               emitDispatchForLoopBounds);
+  };
+  emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
+                                 emitEmptyBoundParameters);
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetParallelForDirective &S) {
+  // Emit SPMD target parallel for region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetParallelForRegion(CGF, S, Action);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
 void CodeGenFunction::EmitOMPTargetParallelForDirective(
     const OMPTargetParallelForDirective &S) {
-  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_parallel_for,
-      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetParallelForRegion(CGF, S, Action);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
+static void
+emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
+                                const OMPTargetParallelForSimdDirective &S,
+                                PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  // Emit directive as a combined directive that consists of two implicit
+  // directives: 'parallel' with 'for' directive.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+                               emitDispatchForLoopBounds);
+  };
+  emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
+                                 emitEmptyBoundParameters);
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
+    CodeGenModule &CGM, StringRef ParentName,
+    const OMPTargetParallelForSimdDirective &S) {
+  // Emit SPMD target parallel for region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetParallelForSimdRegion(CGF, S, Action);
+  };
+  llvm::Function *Fn;
+  llvm::Constant *Addr;
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+  assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
+    const OMPTargetParallelForSimdDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    emitTargetParallelForSimdRegion(CGF, S, Action);
+  };
+  emitCommonOMPTargetDirective(*this, S, CodeGen);
 }
 
 /// Emit a helper variable and return corresponding lvalue.
@@ -4149,7 +4715,7 @@
 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
   // Emit outlined function for task construct.
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   const Expr *IfCond = nullptr;
@@ -4271,15 +4837,16 @@
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
                                                     CodeGen);
   };
-  if (Data.Nogroup)
-    EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
-  else {
+  if (Data.Nogroup) {
+    EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
+  } else {
     CGM.getOpenMPRuntime().emitTaskgroupRegion(
         *this,
         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
                                         PrePostActionTy &Action) {
           Action.Enter(CGF);
-          CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+          CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
+                                        Data);
         },
         S.getLocStart());
   }
@@ -4312,5 +4879,35 @@
   if (auto *C = S.getSingleClause<OMPDeviceClause>())
     Device = C->getDevice();
 
+  OMPLexicalScope Scope(*this, S, OMPD_task);
   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
 }
+
+void CodeGenFunction::EmitSimpleOMPExecutableDirective(
+    const OMPExecutableDirective &D) {
+  if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
+    return;
+  auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+      emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
+    } else {
+      if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
+        for (const auto *E : LD->counters()) {
+          if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
+                  cast<DeclRefExpr>(E)->getDecl())) {
+            // Emit only those that were not explicitly referenced in clauses.
+            if (!CGF.LocalDeclMap.count(VD))
+              CGF.EmitVarDecl(*VD);
+          }
+        }
+      }
+      CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
+    }
+  };
+  OMPSimdLexicalScope Scope(*this, D);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this,
+      isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
+                                                  : D.getDirectiveKind(),
+      CodeGen);
+}
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index 92fd93b..41c8c94 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -100,7 +100,7 @@
     VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName()));
 
   // Set the right visibility.
-  CGM.setGlobalVisibility(VTT, RD);
+  CGM.setGVProperties(VTT, RD);
 }
 
 llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 64b6d0d..4fb749f 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -14,11 +14,12 @@
 #include "CGCXXABI.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
@@ -48,11 +49,6 @@
                                  /*DontDefer=*/true, /*IsThunk=*/true);
 }
 
-static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD,
-                               const ThunkInfo &Thunk, llvm::Function *Fn) {
-  CGM.setGlobalVisibility(Fn, MD);
-}
-
 static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
                                llvm::Function *ThunkFn, bool ForVTable,
                                GlobalDecl GD) {
@@ -61,8 +57,12 @@
                                   !Thunk.Return.isEmpty());
 
   // Set the right visibility.
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-  setThunkVisibility(CGM, MD, Thunk, ThunkFn);
+  CGM.setGVProperties(ThunkFn, GD);
+
+  if (!CGM.getCXXABI().exportThunk()) {
+    ThunkFn->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+    ThunkFn->setDSOLocal(true);
+  }
 
   if (CGM.supportsCOMDAT() && ThunkFn->isWeakForLinker())
     ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
@@ -122,6 +122,33 @@
   return RValue::get(ReturnValue);
 }
 
+/// This function clones a function's DISubprogram node and enters it into 
+/// a value map with the intent that the map can be utilized by the cloner
+/// to short-circuit Metadata node mapping.
+/// Furthermore, the function resolves any DILocalVariable nodes referenced
+/// by dbg.value intrinsics so they can be properly mapped during cloning.
+static void resolveTopLevelMetadata(llvm::Function *Fn,
+                                    llvm::ValueToValueMapTy &VMap) {
+  // Clone the DISubprogram node and put it into the Value map.
+  auto *DIS = Fn->getSubprogram();
+  if (!DIS)
+    return;
+  auto *NewDIS = DIS->replaceWithDistinct(DIS->clone());
+  VMap.MD()[DIS].reset(NewDIS);
+
+  // Find all llvm.dbg.declare intrinsics and resolve the DILocalVariable nodes
+  // they are referencing.
+  for (auto &BB : Fn->getBasicBlockList()) {
+    for (auto &I : BB) {
+      if (auto *DII = dyn_cast<llvm::DbgInfoIntrinsic>(&I)) {
+        auto *DILocal = DII->getVariable();
+        if (!DILocal->isResolved())
+          DILocal->resolve();
+      }
+    }
+  }
+}
+
 // This function does roughly the same thing as GenerateThunk, but in a
 // very different way, so that va_start and va_end work correctly.
 // FIXME: This function assumes "this" is the first non-sret LLVM argument of
@@ -154,6 +181,10 @@
 
   // Clone to thunk.
   llvm::ValueToValueMapTy VMap;
+
+  // We are cloning a function while some Metadata nodes are still unresolved.
+  // Ensure that the value mapper does not encounter any of them.
+  resolveTopLevelMetadata(BaseFn, VMap);
   llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap);
   Fn->replaceAllUsesWith(NewFn);
   NewFn->takeName(Fn);
@@ -698,7 +729,7 @@
   // Create the variable that will hold the construction vtable.
   llvm::GlobalVariable *VTable =
     CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage);
-  CGM.setGlobalVisibility(VTable, RD);
+  CGM.setGVProperties(VTable, RD);
 
   // V-tables are always unnamed_addr.
   VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index a823ef6..7d07ea4 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -149,20 +149,15 @@
 
 class LValueBaseInfo {
   AlignmentSource AlignSource;
-  bool MayAlias;
 
 public:
-  explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type,
-                 bool Alias = false)
-    : AlignSource(Source), MayAlias(Alias) {}
+  explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type)
+    : AlignSource(Source) {}
   AlignmentSource getAlignmentSource() const { return AlignSource; }
   void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; }
-  bool getMayAlias() const { return MayAlias; }
-  void setMayAlias(bool Alias) { MayAlias = Alias; }
 
   void mergeForCast(const LValueBaseInfo &Info) {
     setAlignmentSource(Info.getAlignmentSource());
-    setMayAlias(getMayAlias() || Info.getMayAlias());
   }
 };
 
@@ -426,8 +421,7 @@
     R.LVType = GlobalReg;
     R.V = Reg.getPointer();
     R.Initialize(type, type.getQualifiers(), Reg.getAlignment(),
-                 LValueBaseInfo(AlignmentSource::Decl, false),
-                 TBAAAccessInfo());
+                 LValueBaseInfo(AlignmentSource::Decl), TBAAAccessInfo());
     return R;
   }
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 84248cc..c4c27ca 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -56,6 +56,7 @@
   CGExprScalar.cpp
   CGGPUBuiltin.cpp
   CGLoopInfo.cpp
+  CGNonTrivialStruct.cpp
   CGObjC.cpp
   CGObjCGNU.cpp
   CGObjCMac.cpp
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 6ca69d6..ac2548e 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -846,7 +846,10 @@
 std::unique_ptr<ASTConsumer>
 CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA);
+  std::unique_ptr<raw_pwrite_stream> OS = CI.takeOutputStream();
+  if (!OS)
+    OS = GetOutputStream(CI, InFile, BA);
+
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
@@ -947,12 +950,21 @@
       return {};
     };
 
-    Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef);
-    if (!BMOrErr)
-      return DiagErrors(BMOrErr.takeError());
-
+    Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+    if (!BMsOrErr)
+      return DiagErrors(BMsOrErr.takeError());
+    BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr);
+    // We have nothing to do if the file contains no ThinLTO module. This is
+    // possible if ThinLTO compilation was not able to split module. Content of
+    // the file was already processed by indexing and will be passed to the
+    // linker using merged object file.
+    if (!Bm) {
+      auto M = llvm::make_unique<llvm::Module>("empty", *VMContext);
+      M->setTargetTriple(CI.getTargetOpts().Triple);
+      return M;
+    }
     Expected<std::unique_ptr<llvm::Module>> MOrErr =
-        BMOrErr->parseModule(*VMContext);
+        Bm->parseModule(*VMContext);
     if (!MOrErr)
       return DiagErrors(MOrErr.takeError());
     return std::move(*MOrErr);
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index b6f8770..2f814a7 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -33,9 +33,11 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -68,7 +70,7 @@
       IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false),
       SawAsmBlock(false), IsOutlinedSEHHelper(false), BlockInfo(nullptr),
       BlockPointer(nullptr), LambdaThisCaptureField(nullptr),
-      NormalCleanupDest(nullptr), NextCleanupDestIndex(1),
+      NormalCleanupDest(Address::invalid()), NextCleanupDestIndex(1),
       FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr),
       EHSelectorSlot(nullptr), DebugInfo(CGM.getModuleDebugInfo()),
       DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(nullptr),
@@ -87,7 +89,7 @@
 
   llvm::FastMathFlags FMF;
   if (CGM.getLangOpts().FastMath)
-    FMF.setUnsafeAlgebra();
+    FMF.setFast();
   if (CGM.getLangOpts().FiniteMathOnly) {
     FMF.setNoNaNs();
     FMF.setNoInfs();
@@ -101,6 +103,9 @@
   if (CGM.getCodeGenOpts().ReciprocalMath) {
     FMF.setAllowReciprocal();
   }
+  if (CGM.getCodeGenOpts().Reassociate) {
+    FMF.setAllowReassoc();
+  }
   Builder.setFastMathFlags(FMF);
 }
 
@@ -137,13 +142,13 @@
   if (auto TT = T->getAs<TypedefType>()) {
     if (auto Align = TT->getDecl()->getMaxAlignment()) {
       if (BaseInfo)
-        *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType, false);
+        *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType);
       return getContext().toCharUnitsFromBits(Align);
     }
   }
 
   if (BaseInfo)
-    *BaseInfo = LValueBaseInfo(AlignmentSource::Type, false);
+    *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
 
   CharUnits Alignment;
   if (T->isIncompleteType()) {
@@ -352,8 +357,13 @@
   // Emit function epilog (to return).
   llvm::DebugLoc Loc = EmitReturnBlock();
 
-  if (ShouldInstrumentFunction())
-    EmitFunctionInstrumentation("__cyg_profile_func_exit");
+  if (ShouldInstrumentFunction()) {
+    if (CGM.getCodeGenOpts().InstrumentFunctions)
+      CurFn->addFnAttr("instrument-function-exit", "__cyg_profile_func_exit");
+    if (CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining)
+      CurFn->addFnAttr("instrument-function-exit-inlined",
+                       "__cyg_profile_func_exit");
+  }
 
   // Emit debug descriptor for function end.
   if (CGDebugInfo *DI = getDebugInfo())
@@ -409,6 +419,9 @@
   EmitIfUsed(*this, TerminateHandler);
   EmitIfUsed(*this, UnreachableBlock);
 
+  for (const auto &FuncletAndParent : TerminateFunclets)
+    EmitIfUsed(*this, FuncletAndParent.second);
+
   if (CGM.getCodeGenOpts().EmitDeclMetadata)
     EmitDeclMetadata();
 
@@ -419,12 +432,27 @@
     I->first->replaceAllUsesWith(I->second);
     I->first->eraseFromParent();
   }
+
+  // Eliminate CleanupDestSlot alloca by replacing it with SSA values and
+  // PHIs if the current function is a coroutine. We don't do it for all
+  // functions as it may result in slight increase in numbers of instructions
+  // if compiled with no optimizations. We do it for coroutine as the lifetime
+  // of CleanupDestSlot alloca make correct coroutine frame building very
+  // difficult.
+  if (NormalCleanupDest.isValid() && isCoroutine()) {
+    llvm::DominatorTree DT(*CurFn);
+    llvm::PromoteMemToReg(
+        cast<llvm::AllocaInst>(NormalCleanupDest.getPointer()), DT);
+    NormalCleanupDest = Address::invalid();
+  }
 }
 
 /// ShouldInstrumentFunction - Return true if the current function should be
 /// instrumented with __cyg_profile_func_* calls
 bool CodeGenFunction::ShouldInstrumentFunction() {
-  if (!CGM.getCodeGenOpts().InstrumentFunctions)
+  if (!CGM.getCodeGenOpts().InstrumentFunctions &&
+      !CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining &&
+      !CGM.getCodeGenOpts().InstrumentFunctionEntryBare)
     return false;
   if (!CurFuncDecl || CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>())
     return false;
@@ -437,6 +465,12 @@
   return CGM.getCodeGenOpts().XRayInstrumentFunctions;
 }
 
+/// AlwaysEmitXRayCustomEvents - Return true if we should emit IR for calls to
+/// the __xray_customevent(...) builin calls, when doing XRay instrumentation.
+bool CodeGenFunction::AlwaysEmitXRayCustomEvents() const {
+  return CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents;
+}
+
 llvm::Constant *
 CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F,
                                             llvm::Constant *Addr) {
@@ -474,31 +508,6 @@
                             "decoded_addr");
 }
 
-/// EmitFunctionInstrumentation - Emit LLVM code to call the specified
-/// instrumentation function with the current function and the call site, if
-/// function instrumentation is enabled.
-void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) {
-  auto NL = ApplyDebugLocation::CreateArtificial(*this);
-  // void __cyg_profile_func_{enter,exit} (void *this_fn, void *call_site);
-  llvm::PointerType *PointerTy = Int8PtrTy;
-  llvm::Type *ProfileFuncArgs[] = { PointerTy, PointerTy };
-  llvm::FunctionType *FunctionTy =
-    llvm::FunctionType::get(VoidTy, ProfileFuncArgs, false);
-
-  llvm::Constant *F = CGM.CreateRuntimeFunction(FunctionTy, Fn);
-  llvm::CallInst *CallSite = Builder.CreateCall(
-    CGM.getIntrinsic(llvm::Intrinsic::returnaddress),
-    llvm::ConstantInt::get(Int32Ty, 0),
-    "callsite");
-
-  llvm::Value *args[] = {
-    llvm::ConstantExpr::getBitCast(CurFn, PointerTy),
-    CallSite
-  };
-
-  EmitNounwindRuntimeCall(F, args);
-}
-
 static void removeImageAccessQualifier(std::string& TyName) {
   std::string ReadOnlyQual("__read_only");
   std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual);
@@ -844,6 +853,8 @@
   // Apply sanitizer attributes to the function.
   if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress))
     Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
+  if (SanOpts.hasOneOf(SanitizerKind::HWAddress))
+    Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
   if (SanOpts.has(SanitizerKind::Thread))
     Fn->addFnAttr(llvm::Attribute::SanitizeThread);
   if (SanOpts.has(SanitizerKind::Memory))
@@ -877,7 +888,8 @@
   }
 
   // Apply xray attributes to the function (as a string, for now)
-  if (D && ShouldXRayInstrumentFunction()) {
+  bool InstrumentXray = ShouldXRayInstrumentFunction();
+  if (D && InstrumentXray) {
     if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
       if (XRayAttr->alwaysXRayInstrument())
         Fn->addFnAttr("function-instrument", "xray-always");
@@ -895,10 +907,6 @@
     }
   }
 
-  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
-    if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
-      CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn);
-
   // Add no-jump-tables value.
   Fn->addFnAttr("no-jump-tables",
                 llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));
@@ -918,8 +926,13 @@
   if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) {
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
       if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) {
+        // Remove any (C++17) exception specifications, to allow calling e.g. a
+        // noexcept function through a non-noexcept pointer.
+        auto ProtoTy =
+          getContext().getFunctionTypeWithExceptionSpec(FD->getType(),
+                                                        EST_None);
         llvm::Constant *FTRTTIConst =
-            CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true);
+            CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
         llvm::Constant *FTRTTIConstEncoded =
             EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
         llvm::Constant *PrologueStructElems[] = {PrologueSig,
@@ -987,19 +1000,31 @@
     DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
   }
 
-  if (ShouldInstrumentFunction())
-    EmitFunctionInstrumentation("__cyg_profile_func_enter");
+  if (ShouldInstrumentFunction()) {
+    if (CGM.getCodeGenOpts().InstrumentFunctions)
+      CurFn->addFnAttr("instrument-function-entry", "__cyg_profile_func_enter");
+    if (CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining)
+      CurFn->addFnAttr("instrument-function-entry-inlined",
+                       "__cyg_profile_func_enter");
+    if (CGM.getCodeGenOpts().InstrumentFunctionEntryBare)
+      CurFn->addFnAttr("instrument-function-entry-inlined",
+                       "__cyg_profile_func_enter_bare");
+  }
 
   // Since emitting the mcount call here impacts optimizations such as function
   // inlining, we just add an attribute to insert a mcount call in backend.
   // The attribute "counting-function" is set to mcount function name which is
   // architecture dependent.
   if (CGM.getCodeGenOpts().InstrumentForProfiling) {
-    if (CGM.getCodeGenOpts().CallFEntry)
-      Fn->addFnAttr("fentry-call", "true");
-    else {
-      if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>())
-        Fn->addFnAttr("counting-function", getTarget().getMCountName());
+    // Calls to fentry/mcount should not be generated if function has
+    // the no_instrument_function attribute.
+    if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) {
+      if (CGM.getCodeGenOpts().CallFEntry)
+        Fn->addFnAttr("fentry-call", "true");
+      else {
+        Fn->addFnAttr("instrument-function-entry-inlined",
+                      getTarget().getMCountName());
+      }
     }
   }
 
@@ -1095,8 +1120,7 @@
       // may have a static invoker function, which may call this operator with
       // a null 'this' pointer.
       if (isLambdaCallOperator(MD) &&
-          cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() ==
-              LCD_None)
+          MD->getParent()->getLambdaCaptureDefault() == LCD_None)
         SkippedChecks.set(SanitizerKind::Null, true);
 
       EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall
@@ -1735,12 +1759,9 @@
     if (const VariableArrayType *vlaType =
           dyn_cast_or_null<VariableArrayType>(
                                           getContext().getAsArrayType(Ty))) {
-      QualType eltType;
-      llvm::Value *numElts;
-      std::tie(numElts, eltType) = getVLASize(vlaType);
-
-      SizeVal = numElts;
-      CharUnits eltSize = getContext().getTypeSizeInChars(eltType);
+      auto VlaSize = getVLASize(vlaType);
+      SizeVal = VlaSize.NumElts;
+      CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type);
       if (!eltSize.isOne())
         SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(eltSize));
       vla = vlaType;
@@ -1823,7 +1844,7 @@
   // this is the size of the VLA in bytes, not its size in elements.
   llvm::Value *numVLAElements = nullptr;
   if (isa<VariableArrayType>(arrayType)) {
-    numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).first;
+    numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).NumElts;
 
     // Walk into all VLAs.  This doesn't require changes to addr,
     // which has type T* where T is the first non-VLA element type.
@@ -1904,14 +1925,13 @@
   return numElements;
 }
 
-std::pair<llvm::Value*, QualType>
-CodeGenFunction::getVLASize(QualType type) {
+CodeGenFunction::VlaSizePair CodeGenFunction::getVLASize(QualType type) {
   const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
   assert(vla && "type was not a variable array type!");
   return getVLASize(vla);
 }
 
-std::pair<llvm::Value*, QualType>
+CodeGenFunction::VlaSizePair
 CodeGenFunction::getVLASize(const VariableArrayType *type) {
   // The number of elements so far; always size_t.
   llvm::Value *numElements = nullptr;
@@ -1932,7 +1952,22 @@
     }
   } while ((type = getContext().getAsVariableArrayType(elementType)));
 
-  return std::pair<llvm::Value*,QualType>(numElements, elementType);
+  return { numElements, elementType };
+}
+
+CodeGenFunction::VlaSizePair
+CodeGenFunction::getVLAElements1D(QualType type) {
+  const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
+  assert(vla && "type was not a variable array type!");
+  return getVLAElements1D(vla);
+}
+
+CodeGenFunction::VlaSizePair
+CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) {
+  llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()];
+  assert(VlaSize && "no size for VLA!");
+  assert(VlaSize->getType() == SizeTy);
+  return { VlaSize, Vla->getElementType() };
 }
 
 void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
@@ -2284,6 +2319,60 @@
   CGM.getSanStats().create(IRB, SSK);
 }
 
+llvm::Value *
+CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
+  llvm::Value *TrueCondition = nullptr;
+  if (!RO.ParsedAttribute.Architecture.empty())
+    TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture);
+
+  if (!RO.ParsedAttribute.Features.empty()) {
+    SmallVector<StringRef, 8> FeatureList;
+    llvm::for_each(RO.ParsedAttribute.Features,
+                   [&FeatureList](const std::string &Feature) {
+                     FeatureList.push_back(StringRef{Feature}.substr(1));
+                   });
+    llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList);
+    TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp)
+                                  : FeatureCmp;
+  }
+  return TrueCondition;
+}
+
+void CodeGenFunction::EmitMultiVersionResolver(
+    llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
+  assert((getContext().getTargetInfo().getTriple().getArch() ==
+              llvm::Triple::x86 ||
+          getContext().getTargetInfo().getTriple().getArch() ==
+              llvm::Triple::x86_64) &&
+         "Only implemented for x86 targets");
+
+  // Main function's basic block.
+  llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
+  Builder.SetInsertPoint(CurBlock);
+  EmitX86CpuInit();
+
+  llvm::Function *DefaultFunc = nullptr;
+  for (const MultiVersionResolverOption &RO : Options) {
+    Builder.SetInsertPoint(CurBlock);
+    llvm::Value *TrueCondition = FormResolverCondition(RO);
+
+    if (!TrueCondition) {
+      DefaultFunc = RO.Function;
+    } else {
+      llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver);
+      llvm::IRBuilder<> RetBuilder(RetBlock);
+      RetBuilder.CreateRet(RO.Function);
+      CurBlock = createBasicBlock("ro_else", Resolver);
+      Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+    }
+  }
+
+  assert(DefaultFunc && "No default version?");
+  // Emit return from the 'else-ist' block.
+  Builder.SetInsertPoint(CurBlock);
+  Builder.CreateRet(DefaultFunc);
+}
+
 llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
   if (CGDebugInfo *DI = getDebugInfo())
     return DI->SourceLocToDebugLoc(Location);
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 210dbaa..ca2a8d9 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -34,6 +34,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -225,6 +226,10 @@
   };
   CGCoroInfo CurCoro;
 
+  bool isCoroutine() const {
+    return CurCoro.Data != nullptr;
+  }
+
   /// CurGD - The GlobalDecl for the current function being compiled.
   GlobalDecl CurGD;
 
@@ -429,7 +434,7 @@
   };
 
   /// i32s containing the indexes of the cleanup destinations.
-  llvm::AllocaInst *NormalCleanupDest;
+  Address NormalCleanupDest;
 
   unsigned NextCleanupDestIndex;
 
@@ -764,7 +769,7 @@
         ForceCleanup();
     }
 
-    /// Checks if the global variable is captured in current function. 
+    /// Checks if the global variable is captured in current function.
     bool isGlobalVarCaptured(const VarDecl *VD) const {
       VD = VD->getCanonicalDecl();
       return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
@@ -826,7 +831,7 @@
   /// block through the normal cleanup handling code (if any) and then
   /// on to \arg Dest.
   void EmitBranchThroughCleanup(JumpDest Dest);
-  
+
   /// isObviouslyBranchWithoutCleanups - Return true if a branch to the
   /// specified destination obviously has no cleanups to run.  'false' is always
   /// a conservatively correct answer for this method.
@@ -1045,7 +1050,7 @@
       if (Data.isValid()) Data.unbind(CGF);
     }
   };
-  
+
 private:
   CGDebugInfo *DebugInfo;
   bool DisableDebugInfo;
@@ -1163,19 +1168,6 @@
   };
   OpenMPCancelExitStack OMPCancelStack;
 
-  /// Controls insertion of cancellation exit blocks in worksharing constructs.
-  class OMPCancelStackRAII {
-    CodeGenFunction &CGF;
-
-  public:
-    OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
-                       bool HasCancel)
-        : CGF(CGF) {
-      CGF.OMPCancelStack.enter(CGF, Kind, HasCancel);
-    }
-    ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
-  };
-
   CodeGenPGO PGO;
 
   /// Calculate branch weights appropriate for PGO data
@@ -1429,12 +1421,15 @@
   llvm::BasicBlock *TerminateHandler;
   llvm::BasicBlock *TrapBB;
 
+  /// Terminate funclets keyed by parent funclet pad.
+  llvm::MapVector<llvm::Value *, llvm::BasicBlock *> TerminateFunclets;
+
   /// True if we need emit the life-time markers.
   const bool ShouldEmitLifetimeMarkers;
 
   /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
   /// the function metadata.
-  void EmitOpenCLKernelMetadata(const FunctionDecl *FD, 
+  void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
                                 llvm::Function *Fn);
 
 public:
@@ -1443,10 +1438,10 @@
 
   CodeGenTypes &getTypes() const { return CGM.getTypes(); }
   ASTContext &getContext() const { return CGM.getContext(); }
-  CGDebugInfo *getDebugInfo() { 
-    if (DisableDebugInfo) 
+  CGDebugInfo *getDebugInfo() {
+    if (DisableDebugInfo)
       return nullptr;
-    return DebugInfo; 
+    return DebugInfo;
   }
   void disableDebugInfo() { DisableDebugInfo = true; }
   void enableDebugInfo() { DisableDebugInfo = false; }
@@ -1541,6 +1536,7 @@
       return false;
     case QualType::DK_cxx_destructor:
     case QualType::DK_objc_weak_lifetime:
+    case QualType::DK_nontrivial_c_struct:
       return getLangOpts().Exceptions;
     case QualType::DK_objc_strong_lifetime:
       return getLangOpts().Exceptions &&
@@ -1588,10 +1584,7 @@
   /// \return an LLVM value which is a pointer to a struct which contains
   /// information about the block, including the block invoke function, the
   /// captured variables, etc.
-  /// \param InvokeF will contain the block invoke function if it is not
-  /// nullptr.
-  llvm::Value *EmitBlockLiteral(const BlockExpr *,
-                                llvm::Function **InvokeF = nullptr);
+  llvm::Value *EmitBlockLiteral(const BlockExpr *);
   static void destroyBlockInfos(CGBlockInfo *info);
 
   llvm::Function *GenerateBlockFunction(GlobalDecl GD,
@@ -1780,13 +1773,9 @@
   /// instrumented with XRay nop sleds.
   bool ShouldXRayInstrumentFunction() const;
 
-  /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
-  /// instrumentation function with the current function and the call site, if
-  /// function instrumentation is enabled.
-  void EmitFunctionInstrumentation(const char *Fn);
-
-  /// EmitMCountInstrumentation - Emit call to .mcount.
-  void EmitMCountInstrumentation();
+  /// AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit
+  /// XRay custom event handling calls.
+  bool AlwaysEmitXRayCustomEvents() const;
 
   /// Encode an address into a form suitable for use in a function prologue.
   llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F,
@@ -1821,6 +1810,10 @@
   /// getTerminateLandingPad - Return a landing pad that just calls terminate.
   llvm::BasicBlock *getTerminateLandingPad();
 
+  /// getTerminateLandingPad - Return a cleanup funclet that just calls
+  /// terminate.
+  llvm::BasicBlock *getTerminateFunclet();
+
   /// getTerminateHandler - Return a handler (not a landing pad, just
   /// a catch handler) that just calls terminate.  This is used when
   /// a terminate scope encloses a try.
@@ -1854,11 +1847,7 @@
   llvm::BasicBlock *createBasicBlock(const Twine &name = "",
                                      llvm::Function *parent = nullptr,
                                      llvm::BasicBlock *before = nullptr) {
-#ifdef NDEBUG
-    return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before);
-#else
     return llvm::BasicBlock::Create(getLLVMContext(), name, parent, before);
-#endif
   }
 
   /// getBasicBlockForLabel - Return the LLVM basicblock that the specified
@@ -1919,8 +1908,7 @@
 
   LValue MakeAddrLValue(Address Addr, QualType T,
                         AlignmentSource Source = AlignmentSource::Type) {
-    return LValue::MakeAddr(Addr, T, getContext(),
-                            LValueBaseInfo(Source, false),
+    return LValue::MakeAddr(Addr, T, getContext(), LValueBaseInfo(Source),
                             CGM.getTBAAAccessInfo(T));
   }
 
@@ -1932,8 +1920,7 @@
   LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
                         AlignmentSource Source = AlignmentSource::Type) {
     return LValue::MakeAddr(Address(V, Alignment), T, getContext(),
-                            LValueBaseInfo(Source, false),
-                            CGM.getTBAAAccessInfo(T));
+                            LValueBaseInfo(Source), CGM.getTBAAAccessInfo(T));
   }
 
   LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
@@ -1952,10 +1939,17 @@
                                            LValueBaseInfo *BaseInfo = nullptr,
                                            TBAAAccessInfo *TBAAInfo = nullptr);
 
-  Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy,
-                              LValueBaseInfo *BaseInfo = nullptr,
-                              TBAAAccessInfo *TBAAInfo = nullptr);
-  LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
+  Address EmitLoadOfReference(LValue RefLVal,
+                              LValueBaseInfo *PointeeBaseInfo = nullptr,
+                              TBAAAccessInfo *PointeeTBAAInfo = nullptr);
+  LValue EmitLoadOfReferenceLValue(LValue RefLVal);
+  LValue EmitLoadOfReferenceLValue(Address RefAddr, QualType RefTy,
+                                   AlignmentSource Source =
+                                       AlignmentSource::Type) {
+    LValue RefLVal = MakeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source),
+                                    CGM.getTBAAAccessInfo(RefTy));
+    return EmitLoadOfReferenceLValue(RefLVal);
+  }
 
   Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
                             LValueBaseInfo *BaseInfo = nullptr,
@@ -2101,16 +2095,14 @@
   ///
   /// The difference to EmitAggregateCopy is that tail padding is not copied.
   /// This is required for correctness when assigning non-POD structures in C++.
-  void EmitAggregateAssign(Address DestPtr, Address SrcPtr,
-                           QualType EltTy) {
+  void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) {
     bool IsVolatile = hasVolatileMember(EltTy);
-    EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, true);
+    EmitAggregateCopy(Dest, Src, EltTy, IsVolatile, /* isAssignment= */ true);
   }
 
-  void EmitAggregateCopyCtor(Address DestPtr, Address SrcPtr,
-                             QualType DestTy, QualType SrcTy) {
-    EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false,
-                      /*IsAssignment=*/false);
+  void EmitAggregateCopyCtor(LValue Dest, LValue Src) {
+    EmitAggregateCopy(Dest, Src, Src.getType(),
+                      /* IsVolatile= */ false, /* IsAssignment= */ false);
   }
 
   /// EmitAggregateCopy - Emit an aggregate copy.
@@ -2119,9 +2111,8 @@
   /// volatile.
   /// \param isAssignment - If false, allow padding to be copied.  This often
   /// yields more efficient.
-  void EmitAggregateCopy(Address DestPtr, Address SrcPtr,
-                         QualType EltTy, bool isVolatile=false,
-                         bool isAssignment = false);
+  void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy,
+                         bool isVolatile = false, bool isAssignment = false);
 
   /// GetAddrOfLocalVar - Return the address of a local variable.
   Address GetAddrOfLocalVar(const VarDecl *VD) {
@@ -2201,12 +2192,24 @@
   /// This function can be called with a null (unreachable) insert point.
   void EmitVariablyModifiedType(QualType Ty);
 
-  /// getVLASize - Returns an LLVM value that corresponds to the size,
+  struct VlaSizePair {
+    llvm::Value *NumElts;
+    QualType Type;
+
+    VlaSizePair(llvm::Value *NE, QualType T) : NumElts(NE), Type(T) {}
+  };
+
+  /// Return the number of elements for a single dimension
+  /// for the given array type.
+  VlaSizePair getVLAElements1D(const VariableArrayType *vla);
+  VlaSizePair getVLAElements1D(QualType vla);
+
+  /// Returns an LLVM value that corresponds to the size,
   /// in non-variably-sized elements, of a variable length array type,
   /// plus that largest non-variably-sized element type.  Assumes that
   /// the type has already been emitted with EmitVariablyModifiedType.
-  std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla);
-  std::pair<llvm::Value*,QualType> getVLASize(QualType vla);
+  VlaSizePair getVLASize(const VariableArrayType *vla);
+  VlaSizePair getVLASize(QualType vla);
 
   /// LoadCXXThis - Load the value of 'this'. This function is only valid while
   /// generating code for an C++ member function.
@@ -2379,7 +2382,10 @@
     /// object within its lifetime.
     TCK_UpcastToVirtualBase,
     /// Checking the value assigned to a _Nonnull pointer. Must not be null.
-    TCK_NonnullAssign
+    TCK_NonnullAssign,
+    /// Checking the operand of a dynamic_cast or a typeid expression.  Must be
+    /// null or an object within its lifetime.
+    TCK_DynamicOperation
   };
 
   /// Determine whether the pointer type check \p TCK permits null pointers.
@@ -2507,10 +2513,19 @@
   };
   AutoVarEmission EmitAutoVarAlloca(const VarDecl &var);
   void EmitAutoVarInit(const AutoVarEmission &emission);
-  void EmitAutoVarCleanups(const AutoVarEmission &emission);  
+  void EmitAutoVarCleanups(const AutoVarEmission &emission);
   void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
                               QualType::DestructionKind dtorKind);
 
+  /// Emits the alloca and debug information for the size expressions for each
+  /// dimension of an array. It registers the association of its (1-dimensional)
+  /// QualTypes and size expression's debug node, so that CGDebugInfo can
+  /// reference this node when creating the DISubrange object to describe the
+  /// array types.
+  void EmitAndRegisterVariableArrayDimensions(CGDebugInfo *DI,
+                                              const VarDecl &D,
+                                              bool EmitDebugInfo);
+
   void EmitStaticVarDecl(const VarDecl &D,
                          llvm::GlobalValue::LinkageTypes Linkage);
 
@@ -2529,7 +2544,7 @@
 
     bool isIndirect() const { return Alignment != 0; }
     llvm::Value *getAnyValue() const { return Value; }
-    
+
     llvm::Value *getDirectValue() const {
       assert(!isIndirect());
       return Value;
@@ -2660,6 +2675,9 @@
   llvm::Value *EmitSEHExceptionInfo();
   llvm::Value *EmitSEHAbnormalTermination();
 
+  /// Emit simple code for OpenMP directives in Simd-only mode.
+  void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D);
+
   /// Scan the outlined statement for captures from the parent function. For
   /// each capture, mark the capture as escaped and emit a call to
   /// llvm.localrecover. Insert the localrecover result into the LocalDeclMap.
@@ -2678,6 +2696,19 @@
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
+  /// Controls insertion of cancellation exit blocks in worksharing constructs.
+  class OMPCancelStackRAII {
+    CodeGenFunction &CGF;
+
+  public:
+    OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
+                       bool HasCancel)
+        : CGF(CGF) {
+      CGF.OMPCancelStack.enter(CGF, Kind, HasCancel);
+    }
+    ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
+  };
+
   /// Returns calculated size of the specified type.
   llvm::Value *getTypeSize(QualType Ty);
   LValue InitCapturedStruct(const CapturedStmt &S);
@@ -2813,8 +2844,23 @@
                                         const OMPTaskDataTy & /*Data*/)>
       TaskGenTy;
   void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                 const OpenMPDirectiveKind CapturedRegion,
                                  const RegionCodeGenTy &BodyGen,
                                  const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
+  struct OMPTargetDataInfo {
+    Address BasePointersArray = Address::invalid();
+    Address PointersArray = Address::invalid();
+    Address SizesArray = Address::invalid();
+    unsigned NumberOfTargetItems = 0;
+    explicit OMPTargetDataInfo() = default;
+    OMPTargetDataInfo(Address BasePointersArray, Address PointersArray,
+                      Address SizesArray, unsigned NumberOfTargetItems)
+        : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
+          SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {}
+  };
+  void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S,
+                                       const RegionCodeGenTy &BodyGen,
+                                       OMPTargetDataInfo &InputInfo);
 
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
@@ -2884,9 +2930,39 @@
   static void
   EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
                                       const OMPTargetParallelDirective &S);
+  /// Emit device code for the target parallel for directive.
+  static void EmitOMPTargetParallelForDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetParallelForDirective &S);
+  /// Emit device code for the target parallel for simd directive.
+  static void EmitOMPTargetParallelForSimdDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetParallelForSimdDirective &S);
+  /// Emit device code for the target teams directive.
   static void
   EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
                                    const OMPTargetTeamsDirective &S);
+  /// Emit device code for the target teams distribute directive.
+  static void EmitOMPTargetTeamsDistributeDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetTeamsDistributeDirective &S);
+  /// Emit device code for the target teams distribute simd directive.
+  static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetTeamsDistributeSimdDirective &S);
+  /// Emit device code for the target simd directive.
+  static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM,
+                                              StringRef ParentName,
+                                              const OMPTargetSimdDirective &S);
+  /// Emit device code for the target teams distribute parallel for simd
+  /// directive.
+  static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetTeamsDistributeParallelForSimdDirective &S);
+
+  static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
+      CodeGenModule &CGM, StringRef ParentName,
+      const OMPTargetTeamsDistributeParallelForDirective &S);
   /// \brief Emit inner loop of the worksharing/simd construct.
   ///
   /// \param S Directive, for which the inner loop must be emitted.
@@ -2918,15 +2994,9 @@
                               const CodeGenLoopBoundsTy &CodeGenLoopBounds,
                               const CodeGenDispatchBoundsTy &CGDispatchBounds);
 
-  /// Emits the lvalue for the expression with possibly captured variable.
-  LValue EmitOMPSharedLValue(const Expr *E);
-
-private:
-  /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
-  /// nullptr. It should be called without \p InvokeF if the caller does not
-  /// need invoke function to be returned.
-  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
-                                llvm::Function **InvokeF = nullptr);
+  /// Emit code for the distribute loop-based directive.
+  void EmitOMPDistributeLoop(const OMPLoopDirective &S,
+                             const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);
 
   /// Helpers for the OpenMP loop directives.
   void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
@@ -2934,8 +3004,12 @@
       const OMPLoopDirective &D,
       const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
 
-  void EmitOMPDistributeLoop(const OMPLoopDirective &S,
-                             const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);
+  /// Emits the lvalue for the expression with possibly captured variable.
+  LValue EmitOMPSharedLValue(const Expr *E);
+
+private:
+  /// Helpers for blocks.
+  llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
 
   /// struct with the values to be passed to the OpenMP loop-related functions
   struct OMPLoopArguments {
@@ -3085,8 +3159,7 @@
                                 SourceLocation Loc,
                                 AlignmentSource Source = AlignmentSource::Type,
                                 bool isNontemporal = false) {
-    return EmitLoadOfScalar(Addr, Volatile, Ty, Loc,
-                            LValueBaseInfo(Source, false),
+    return EmitLoadOfScalar(Addr, Volatile, Ty, Loc, LValueBaseInfo(Source),
                             CGM.getTBAAAccessInfo(Ty), isNontemporal);
   }
 
@@ -3108,7 +3181,7 @@
                          bool Volatile, QualType Ty,
                          AlignmentSource Source = AlignmentSource::Type,
                          bool isInit = false, bool isNontemporal = false) {
-    EmitStoreOfScalar(Value, Addr, Volatile, Ty, LValueBaseInfo(Source, false),
+    EmitStoreOfScalar(Value, Addr, Volatile, Ty, LValueBaseInfo(Source),
                       CGM.getTBAAAccessInfo(Ty), isInit, isNontemporal);
   }
 
@@ -3179,7 +3252,7 @@
   LValue EmitCastLValue(const CastExpr *E);
   LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
   LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);
-  
+
   Address EmitExtVectorElementLValue(LValue V);
 
   RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc);
@@ -3262,11 +3335,15 @@
   /// LLVM arguments and the types they were derived from.
   RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
                   ReturnValueSlot ReturnValue, const CallArgList &Args,
-                  llvm::Instruction **callOrInvoke = nullptr);
-
+                  llvm::Instruction **callOrInvoke, SourceLocation Loc);
+  RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
+                  ReturnValueSlot ReturnValue, const CallArgList &Args,
+                  llvm::Instruction **callOrInvoke = nullptr) {
+    return EmitCall(CallInfo, Callee, ReturnValue, Args, callOrInvoke,
+                    SourceLocation());
+  }
   RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E,
-                  ReturnValueSlot ReturnValue,
-                  llvm::Value *Chain = nullptr);
+                  ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr);
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
   RValue EmitSimpleCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
@@ -3285,6 +3362,9 @@
                                           ArrayRef<llvm::Value*> args,
                                           const Twine &name = "");
 
+  SmallVector<llvm::OperandBundleDef, 1>
+  getBundlesForFunclet(llvm::Value *Callee);
+
   llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name = "");
@@ -3296,14 +3376,24 @@
   void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
                                        ArrayRef<llvm::Value*> args);
 
-  CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD, 
+  CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD,
                                      NestedNameSpecifier *Qual,
                                      llvm::Type *Ty);
-  
+
   CGCallee BuildAppleKextVirtualDestructorCall(const CXXDestructorDecl *DD,
-                                               CXXDtorType Type, 
+                                               CXXDtorType Type,
                                                const CXXRecordDecl *RD);
 
+  // These functions emit calls to the special functions of non-trivial C
+  // structs.
+  void defaultInitNonTrivialCStructVar(LValue Dst);
+  void callCStructDefaultConstructor(LValue Dst);
+  void callCStructDestructor(LValue Dst);
+  void callCStructCopyConstructor(LValue Dst, LValue Src);
+  void callCStructMoveConstructor(LValue Dst, LValue Src);
+  void callCStructCopyAssignmentOperator(LValue Dst, LValue Src);
+  void callCStructMoveAssignmentOperator(LValue Dst, LValue Src);
+
   RValue
   EmitCXXMemberOrOperatorCall(const CXXMethodDecl *Method,
                               const CGCallee &Callee,
@@ -3365,7 +3455,8 @@
                                              const llvm::CmpInst::Predicate Fp,
                                              const llvm::CmpInst::Predicate Ip,
                                              const llvm::Twine &Name = "");
-  llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                  llvm::Triple::ArchType Arch);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
                                          unsigned LLVMIntrinsic,
@@ -3374,7 +3465,8 @@
                                          unsigned Modifier,
                                          const CallExpr *E,
                                          SmallVectorImpl<llvm::Value *> &Ops,
-                                         Address PtrOp0, Address PtrOp1);
+                                         Address PtrOp0, Address PtrOp1,
+                                         llvm::Triple::ArchType Arch);
   llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
                                           unsigned Modifier, llvm::Type *ArgTy,
                                           const CallExpr *E);
@@ -3388,7 +3480,8 @@
   llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
                                  llvm::Type *Ty, bool usgn, const char *name);
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
-  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                      llvm::Triple::ArchType Arch);
 
   llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -3398,6 +3491,7 @@
   llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
                                           const CallExpr *E);
+  llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
 private:
   enum class MSVCIntrin;
@@ -3474,12 +3568,13 @@
   static Destroyer destroyARCStrongPrecise;
   static Destroyer destroyARCWeak;
   static Destroyer emitARCIntrinsicUse;
+  static Destroyer destroyNonTrivialCStruct;
 
-  void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); 
+  void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr);
   llvm::Value *EmitObjCAutoreleasePoolPush();
   llvm::Value *EmitObjCMRRAutoreleasePoolPush();
   void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr);
-  void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr); 
+  void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr);
 
   /// \brief Emits a reference binding to the passed in expression.
   RValue EmitReferenceBindingToExpr(const Expr *E);
@@ -3594,7 +3689,7 @@
                                         bool PerformInit);
 
   void EmitCXXConstructExpr(const CXXConstructExpr *E, AggValueSlot Dest);
-  
+
   void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);
 
   void enterFullExpression(const ExprWithCleanups *E) {
@@ -3643,7 +3738,7 @@
   /// Determine if the given statement might introduce a declaration into the
   /// current scope, by being a (possibly-labelled) DeclStmt.
   static bool mightAddDeclToScope(const Stmt *S);
-  
+
   /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
   /// to a constant, or if it does but contains a label, return false.  If it
   /// constant folds return true and set the boolean result in Result.
@@ -3720,6 +3815,10 @@
                             llvm::ConstantInt *TypeId, llvm::Value *Ptr,
                             ArrayRef<llvm::Constant *> StaticArgs);
 
+  /// Emit a reached-unreachable diagnostic if \p Loc is valid and runtime
+  /// checking is enabled. Otherwise, just emit an unreachable instruction.
+  void EmitUnreachable(SourceLocation Loc);
+
   /// \brief Create a basic block that will call the trap intrinsic, and emit a
   /// conditional branch to it, for the -ftrapv checks.
   void EmitTrapCheck(llvm::Value *Checked);
@@ -3911,8 +4010,36 @@
                                    LValueBaseInfo *BaseInfo = nullptr,
                                    TBAAAccessInfo *TBAAInfo = nullptr);
 
+  /// If \p E references a parameter with pass_object_size info or a constant
+  /// array size modifier, emit the object size divided by the size of \p EltTy.
+  /// Otherwise return null.
+  llvm::Value *LoadPassedObjectSize(const Expr *E, QualType EltTy);
+
   void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
 
+  struct MultiVersionResolverOption {
+    llvm::Function *Function;
+    TargetAttr::ParsedTargetAttr ParsedAttribute;
+    unsigned Priority;
+    MultiVersionResolverOption(const TargetInfo &TargInfo, llvm::Function *F,
+                               const clang::TargetAttr::ParsedTargetAttr &PT)
+        : Function(F), ParsedAttribute(PT), Priority(0u) {
+      for (StringRef Feat : PT.Features)
+        Priority = std::max(Priority,
+                            TargInfo.multiVersionSortPriority(Feat.substr(1)));
+
+      if (!PT.Architecture.empty())
+        Priority = std::max(Priority,
+                            TargInfo.multiVersionSortPriority(PT.Architecture));
+    }
+
+    bool operator>(const MultiVersionResolverOption &Other) const {
+      return Priority > Other.Priority;
+    }
+  };
+  void EmitMultiVersionResolver(llvm::Function *Resolver,
+                                ArrayRef<MultiVersionResolverOption> Options);
+
 private:
   QualType getVarArgType(const Expr *Arg);
 
@@ -3929,6 +4056,7 @@
   llvm::Value *EmitX86CpuSupports(const CallExpr *E);
   llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
   llvm::Value *EmitX86CpuInit();
+  llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
 };
 
 /// Helper class with most of the code for saving a value for a
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 4be2a94..de38105 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -61,7 +61,7 @@
 using namespace CodeGen;
 
 static llvm::cl::opt<bool> LimitedCoverage(
-    "limited-coverage-experimental", llvm::cl::ZeroOrMore,
+    "limited-coverage-experimental", llvm::cl::ZeroOrMore, llvm::cl::Hidden,
     llvm::cl::desc("Emit limited coverage mapping information (experimental)"),
     llvm::cl::init(false));
 
@@ -103,6 +103,7 @@
   Int16Ty = llvm::Type::getInt16Ty(LLVMContext);
   Int32Ty = llvm::Type::getInt32Ty(LLVMContext);
   Int64Ty = llvm::Type::getInt64Ty(LLVMContext);
+  HalfTy = llvm::Type::getHalfTy(LLVMContext);
   FloatTy = llvm::Type::getFloatTy(LLVMContext);
   DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
   PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
@@ -136,7 +137,7 @@
   // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
   if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
-    TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
+    TBAA.reset(new CodeGenTBAA(Context, TheModule, CodeGenOpts, getLangOpts(),
                                getCXXABI().getMangleContext()));
 
   // If debug info or coverage generation is enabled, create the CGDebugInfo
@@ -207,7 +208,10 @@
     OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
     break;
   default:
-    OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
+    if (LangOpts.OpenMPSimd)
+      OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this));
+    else
+      OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
     break;
   }
 }
@@ -391,6 +395,7 @@
   applyGlobalValReplacements();
   applyReplacements();
   checkAliases();
+  emitMultiVersionFunctions();
   EmitCXXGlobalInitFunc();
   EmitCXXGlobalDtorFunc();
   EmitCXXThreadLocalInitFunc();
@@ -441,7 +446,7 @@
   if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
     getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
                               CodeGenOpts.NumRegisterParameters);
-  
+
   if (CodeGenOpts.DwarfVersion) {
     // We actually want the latest version when there are conflicts.
     // We can change from Warning to Latest if such mode is supported.
@@ -452,6 +457,10 @@
     // Indicate that we want CodeView in the metadata.
     getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
   }
+  if (CodeGenOpts.ControlFlowGuard) {
+    // We want function ID tables for Control Flow Guard.
+    getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1);
+  }
   if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) {
     // We don't support LTO with 2 with different StrictVTablePointers
     // FIXME: we could support it by stripping all the information introduced
@@ -497,6 +506,20 @@
     getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
   }
 
+  if (CodeGenOpts.CFProtectionReturn &&
+      Target.checkCFProtectionReturnSupported(getDiags())) {
+    // Indicate that we want to instrument return control flow protection.
+    getModule().addModuleFlag(llvm::Module::Override, "cf-protection-return",
+                              1);
+  }
+
+  if (CodeGenOpts.CFProtectionBranch &&
+      Target.checkCFProtectionBranchSupported(getDiags())) {
+    // Indicate that we want to instrument branch control flow protection.
+    getModule().addModuleFlag(llvm::Module::Override, "cf-protection-branch",
+                              1);
+  }
+
   if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) {
     // Indicate whether __nvvm_reflect should be configured to flush denormal
     // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
@@ -532,6 +555,9 @@
       getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel));
   }
 
+  if (CodeGenOpts.NoPLT)
+    getModule().setRtLibUseGOT();
+
   SimplifyPersonality();
 
   if (getCodeGenOpts().EmitDeclMetadata)
@@ -579,13 +605,16 @@
 }
 
 TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {
-  return TBAAAccessInfo(getTBAATypeInfo(AccessType));
-}
-
-TBAAAccessInfo CodeGenModule::getTBAAVTablePtrAccessInfo() {
   if (!TBAA)
     return TBAAAccessInfo();
-  return TBAA->getVTablePtrAccessInfo();
+  return TBAA->getAccessInfo(AccessType);
+}
+
+TBAAAccessInfo
+CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
+  if (!TBAA)
+    return TBAAAccessInfo();
+  return TBAA->getVTablePtrAccessInfo(VTablePtrType);
 }
 
 llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) {
@@ -606,12 +635,6 @@
   return TBAA->getAccessTagInfo(Info);
 }
 
-TBAAAccessInfo CodeGenModule::getTBAAMayAliasAccessInfo() {
-  if (!TBAA)
-    return TBAAAccessInfo();
-  return TBAA->getMayAliasAccessInfo();
-}
-
 TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
                                                    TBAAAccessInfo TargetInfo) {
   if (!TBAA)
@@ -619,6 +642,22 @@
   return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo);
 }
 
+TBAAAccessInfo
+CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                   TBAAAccessInfo InfoB) {
+  if (!TBAA)
+    return TBAAAccessInfo();
+  return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB);
+}
+
+TBAAAccessInfo
+CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                              TBAAAccessInfo SrcInfo) {
+  if (!TBAA)
+    return TBAAAccessInfo();
+  return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo);
+}
+
 void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
                                                 TBAAAccessInfo TBAAInfo) {
   if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo))
@@ -661,6 +700,8 @@
 
 void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
                                         const NamedDecl *D) const {
+  if (GV->hasDLLImportStorageClass())
+    return;
   // Internal definitions always have default visibility.
   if (GV->hasLocalLinkage()) {
     GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
@@ -669,10 +710,119 @@
 
   // Set visibility for definitions.
   LinkageInfo LV = D->getLinkageAndVisibility();
-  if (LV.isVisibilityExplicit() || !GV->hasAvailableExternallyLinkage())
+  if (LV.isVisibilityExplicit() || !GV->isDeclarationForLinker())
     GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
 }
 
+static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
+                                 llvm::GlobalValue *GV) {
+  if (GV->hasLocalLinkage())
+    return true;
+
+  if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage())
+    return true;
+
+  // DLLImport explicitly marks the GV as external.
+  if (GV->hasDLLImportStorageClass())
+    return false;
+
+  const llvm::Triple &TT = CGM.getTriple();
+  // Every other GV is local on COFF.
+  // Make an exception for windows OS in the triple: Some firmware builds use
+  // *-win32-macho triples. This (accidentally?) produced windows relocations
+  // without GOT tables in older clang versions; Keep this behaviour.
+  // FIXME: even thread local variables?
+  if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
+    return true;
+
+  // Only handle COFF and ELF for now.
+  if (!TT.isOSBinFormatELF())
+    return false;
+
+  // If this is not an executable, don't assume anything is local.
+  const auto &CGOpts = CGM.getCodeGenOpts();
+  llvm::Reloc::Model RM = CGOpts.RelocationModel;
+  const auto &LOpts = CGM.getLangOpts();
+  if (RM != llvm::Reloc::Static && !LOpts.PIE)
+    return false;
+
+  // A definition cannot be preempted from an executable.
+  if (!GV->isDeclarationForLinker())
+    return true;
+
+  // Most PIC code sequences that assume that a symbol is local cannot produce a
+  // 0 if it turns out the symbol is undefined. While this is ABI and relocation
+  // depended, it seems worth it to handle it here.
+  if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage())
+    return false;
+
+  // PPC has no copy relocations and cannot use a plt entry as a symbol address.
+  llvm::Triple::ArchType Arch = TT.getArch();
+  if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 ||
+      Arch == llvm::Triple::ppc64le)
+    return false;
+
+  // If we can use copy relocations we can assume it is local.
+  if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV))
+    if (!Var->isThreadLocal() &&
+        (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations))
+      return true;
+
+  // If we can use a plt entry as the symbol address we can assume it
+  // is local.
+  // FIXME: This should work for PIE, but the gold linker doesn't support it.
+  if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static)
+    return true;
+
+  // Otherwise don't assue it is local.
+  return false;
+}
+
+void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const {
+  GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV));
+}
+
+void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV,
+                                          GlobalDecl GD) const {
+  const auto *D = dyn_cast<NamedDecl>(GD.getDecl());
+  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(D)) {
+    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
+      // Don't dllexport/import destructor thunks.
+      GV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+      return;
+    }
+  }
+  setDLLImportDLLExport(GV, D);
+}
+
+void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV,
+                                          const NamedDecl *D) const {
+  if (D->isExternallyVisible()) {
+    if (D->hasAttr<DLLImportAttr>())
+      GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+    else if (D->hasAttr<DLLExportAttr>() && !GV->isDeclarationForLinker())
+      GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
+  }
+}
+
+void CodeGenModule::setGVProperties(llvm::GlobalValue *GV,
+                                    GlobalDecl GD) const {
+  setDLLImportDLLExport(GV, GD);
+  setGlobalVisibilityAndLocal(GV, dyn_cast<NamedDecl>(GD.getDecl()));
+}
+
+void CodeGenModule::setGVProperties(llvm::GlobalValue *GV,
+                                    const NamedDecl *D) const {
+  setDLLImportDLLExport(GV, D);
+  setGlobalVisibilityAndLocal(GV, D);
+}
+
+void CodeGenModule::setGlobalVisibilityAndLocal(llvm::GlobalValue *GV,
+                                                const NamedDecl *D) const {
+  setGlobalVisibility(GV, D);
+  setDSOLocal(GV);
+}
+
 static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) {
   return llvm::StringSwitch<llvm::GlobalVariable::ThreadLocalMode>(S)
       .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel)
@@ -710,6 +860,110 @@
   GV->setThreadLocalMode(TLM);
 }
 
+static void AppendTargetMangling(const CodeGenModule &CGM,
+                                 const TargetAttr *Attr, raw_ostream &Out) {
+  if (Attr->isDefaultVersion())
+    return;
+
+  Out << '.';
+  const auto &Target = CGM.getTarget();
+  TargetAttr::ParsedTargetAttr Info =
+      Attr->parse([&Target](StringRef LHS, StringRef RHS) {
+                    // Multiversioning doesn't allow "no-${feature}", so we can
+                    // only have "+" prefixes here.
+                    assert(LHS.startswith("+") && RHS.startswith("+") &&
+                           "Features should always have a prefix.");
+                    return Target.multiVersionSortPriority(LHS.substr(1)) >
+                           Target.multiVersionSortPriority(RHS.substr(1));
+                  });
+
+  bool IsFirst = true;
+
+  if (!Info.Architecture.empty()) {
+    IsFirst = false;
+    Out << "arch_" << Info.Architecture;
+  }
+
+  for (StringRef Feat : Info.Features) {
+    if (!IsFirst)
+      Out << '_';
+    IsFirst = false;
+    Out << Feat.substr(1);
+  }
+}
+
+static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
+                                      const NamedDecl *ND,
+                                      bool OmitTargetMangling = false) {
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  MangleContext &MC = CGM.getCXXABI().getMangleContext();
+  if (MC.shouldMangleDeclName(ND)) {
+    llvm::raw_svector_ostream Out(Buffer);
+    if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
+      MC.mangleCXXCtor(D, GD.getCtorType(), Out);
+    else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
+      MC.mangleCXXDtor(D, GD.getDtorType(), Out);
+    else
+      MC.mangleName(ND, Out);
+  } else {
+    IdentifierInfo *II = ND->getIdentifier();
+    assert(II && "Attempt to mangle unnamed decl.");
+    const auto *FD = dyn_cast<FunctionDecl>(ND);
+
+    if (FD &&
+        FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
+      llvm::raw_svector_ostream Out(Buffer);
+      Out << "__regcall3__" << II->getName();
+    } else {
+      Out << II->getName();
+    }
+  }
+
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
+    if (FD->isMultiVersion() && !OmitTargetMangling)
+      AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+  return Out.str();
+}
+
+void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
+                                            const FunctionDecl *FD) {
+  if (!FD->isMultiVersion())
+    return;
+
+  // Get the name of what this would be without the 'target' attribute.  This
+  // allows us to lookup the version that was emitted when this wasn't a
+  // multiversion function.
+  std::string NonTargetName =
+      getMangledNameImpl(*this, GD, FD, /*OmitTargetMangling=*/true);
+  GlobalDecl OtherGD;
+  if (lookupRepresentativeDecl(NonTargetName, OtherGD)) {
+    assert(OtherGD.getCanonicalDecl()
+               .getDecl()
+               ->getAsFunction()
+               ->isMultiVersion() &&
+           "Other GD should now be a multiversioned function");
+    // OtherFD is the version of this function that was mangled BEFORE
+    // becoming a MultiVersion function.  It potentially needs to be updated.
+    const FunctionDecl *OtherFD =
+        OtherGD.getCanonicalDecl().getDecl()->getAsFunction();
+    std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD);
+    // This is so that if the initial version was already the 'default'
+    // version, we don't try to update it.
+    if (OtherName != NonTargetName) {
+      // Remove instead of erase, since others may have stored the StringRef
+      // to this.
+      const auto ExistingRecord = Manglings.find(NonTargetName);
+      if (ExistingRecord != std::end(Manglings))
+        Manglings.remove(&(*ExistingRecord));
+      auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD));
+      MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first();
+      if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName))
+        Entry->setName(OtherName);
+    }
+  }
+}
+
 StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
   GlobalDecl CanonicalGD = GD.getCanonicalDecl();
 
@@ -728,35 +982,11 @@
   if (FoundName != MangledDeclNames.end())
     return FoundName->second;
 
-  const auto *ND = cast<NamedDecl>(GD.getDecl());
-  SmallString<256> Buffer;
-  StringRef Str;
-  if (getCXXABI().getMangleContext().shouldMangleDeclName(ND)) {
-    llvm::raw_svector_ostream Out(Buffer);
-    if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
-      getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out);
-    else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
-      getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out);
-    else
-      getCXXABI().getMangleContext().mangleName(ND, Out);
-    Str = Out.str();
-  } else {
-    IdentifierInfo *II = ND->getIdentifier();
-    assert(II && "Attempt to mangle unnamed decl.");
-    const auto *FD = dyn_cast<FunctionDecl>(ND);
-
-    if (FD &&
-        FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
-      llvm::raw_svector_ostream Out(Buffer);
-      Out << "__regcall3__" << II->getName();
-      Str = Out.str();
-    } else {
-      Str = II->getName();
-    }
-  }
 
   // Keep the first result in the case of a mangling collision.
-  auto Result = Manglings.insert(std::make_pair(Str, GD));
+  const auto *ND = cast<NamedDecl>(GD.getDecl());
+  auto Result =
+      Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
 }
 
@@ -768,7 +998,7 @@
   SmallString<256> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
   if (!D)
-    MangleCtx.mangleGlobalBlock(BD, 
+    MangleCtx.mangleGlobalBlock(BD,
       dyn_cast_or_null<VarDecl>(initializedGlobalDecl.getDecl()), Out);
   else if (const auto *CD = dyn_cast<CXXConstructorDecl>(D))
     MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out);
@@ -864,25 +1094,6 @@
   return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
-void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F) {
-  const auto *FD = cast<FunctionDecl>(GD.getDecl());
-
-  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
-    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
-      // Don't dllexport/import destructor thunks.
-      F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
-      return;
-    }
-  }
-
-  if (FD->hasAttr<DLLImportAttr>())
-    F->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
-  else if (FD->hasAttr<DLLExportAttr>())
-    F->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
-  else
-    F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
-}
-
 llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
   llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD);
   if (!MDS) return nullptr;
@@ -890,11 +1101,6 @@
   return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
 }
 
-void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
-                                                    llvm::Function *F) {
-  setNonAliasAttributes(D, F);
-}
-
 void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
                                               const CGFunctionInfo &Info,
                                               llvm::Function *F) {
@@ -1047,10 +1253,10 @@
       CreateFunctionTypeMetadata(FD, F);
 }
 
-void CodeGenModule::SetCommonAttributes(const Decl *D,
-                                        llvm::GlobalValue *GV) {
-  if (const auto *ND = dyn_cast_or_null<NamedDecl>(D))
-    setGlobalVisibility(GV, ND);
+void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
+  const Decl *D = GD.getDecl();
+  if (dyn_cast_or_null<NamedDecl>(D))
+    setGVProperties(GV, GD);
   else
     GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
 
@@ -1058,19 +1264,56 @@
     addUsedGlobal(GV);
 }
 
-void CodeGenModule::setAliasAttributes(const Decl *D,
-                                       llvm::GlobalValue *GV) {
-  SetCommonAttributes(D, GV);
+bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D,
+                                                llvm::AttrBuilder &Attrs) {
+  // Add target-cpu and target-features attributes to functions. If
+  // we have a decl for the function and it has a target attribute then
+  // parse that and add it to the feature set.
+  StringRef TargetCPU = getTarget().getTargetOpts().CPU;
+  std::vector<std::string> Features;
+  const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+  FD = FD ? FD->getMostRecentDecl() : FD;
+  const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
+  bool AddedAttr = false;
+  if (TD) {
+    llvm::StringMap<bool> FeatureMap;
+    getFunctionFeatureMap(FeatureMap, FD);
 
-  // Process the dllexport attribute based on whether the original definition
-  // (not necessarily the aliasee) was exported.
-  if (D->hasAttr<DLLExportAttr>())
-    GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    // Produce the canonical string for this set of features.
+    for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
+      Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str());
+
+    // Now add the target-cpu and target-features to the function.
+    // While we populated the feature map above, we still need to
+    // get and parse the target attribute so we can get the cpu for
+    // the function.
+    TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+    if (ParsedAttr.Architecture != "" &&
+        getTarget().isValidCPUName(ParsedAttr.Architecture))
+      TargetCPU = ParsedAttr.Architecture;
+  } else {
+    // Otherwise just add the existing target cpu and target features to the
+    // function.
+    Features = getTarget().getTargetOpts().Features;
+  }
+
+  if (TargetCPU != "") {
+    Attrs.addAttribute("target-cpu", TargetCPU);
+    AddedAttr = true;
+  }
+  if (!Features.empty()) {
+    std::sort(Features.begin(), Features.end());
+    Attrs.addAttribute("target-features", llvm::join(Features, ","));
+    AddedAttr = true;
+  }
+
+  return AddedAttr;
 }
 
-void CodeGenModule::setNonAliasAttributes(const Decl *D,
+void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
                                           llvm::GlobalObject *GO) {
-  SetCommonAttributes(D, GO);
+  const Decl *D = GD.getDecl();
+  SetCommonAttributes(GD, GO);
 
   if (D) {
     if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) {
@@ -1084,50 +1327,48 @@
 
     if (auto *F = dyn_cast<llvm::Function>(GO)) {
       if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>())
-       if (!D->getAttr<SectionAttr>())
-         F->addFnAttr("implicit-section-name", SA->getName());
+        if (!D->getAttr<SectionAttr>())
+          F->addFnAttr("implicit-section-name", SA->getName());
+
+      llvm::AttrBuilder Attrs;
+      if (GetCPUAndFeaturesAttributes(D, Attrs)) {
+        // We know that GetCPUAndFeaturesAttributes will always have the
+        // newest set, since it has the newest possible FunctionDecl, so the
+        // new ones should replace the old.
+        F->removeFnAttr("target-cpu");
+        F->removeFnAttr("target-features");
+        F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs);
+      }
     }
 
     if (const SectionAttr *SA = D->getAttr<SectionAttr>())
       GO->setSection(SA->getName());
   }
 
-  getTargetCodeGenInfo().setTargetAttributes(D, GO, *this, ForDefinition);
+  getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
 }
 
-void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
+void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD,
                                                   llvm::Function *F,
                                                   const CGFunctionInfo &FI) {
+  const Decl *D = GD.getDecl();
   SetLLVMFunctionAttributes(D, FI, F);
   SetLLVMFunctionAttributesForDefinition(D, F);
 
   F->setLinkage(llvm::Function::InternalLinkage);
 
-  setNonAliasAttributes(D, F);
+  setNonAliasAttributes(GD, F);
 }
 
-static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
-                                         const NamedDecl *ND) {
+static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) {
   // Set linkage and visibility in case we never see a definition.
   LinkageInfo LV = ND->getLinkageAndVisibility();
-  if (!isExternallyVisible(LV.getLinkage())) {
-    // Don't set internal linkage on declarations.
-  } else {
-    if (ND->hasAttr<DLLImportAttr>()) {
-      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
-      GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-    } else if (ND->hasAttr<DLLExportAttr>()) {
-      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
-    } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
-      // "extern_weak" is overloaded in LLVM; we probably should have
-      // separate linkage types for this.
-      GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
-    }
-
-    // Set visibility on a declaration only if it's explicit.
-    if (LV.isVisibilityExplicit())
-      GV->setVisibility(CodeGenModule::GetLLVMVisibility(LV.getVisibility()));
-  }
+  // Don't set internal linkage on declarations.
+  // "extern_weak" is overloaded in LLVM; we probably should have
+  // separate linkage types for this.
+  if (isExternallyVisible(LV.getLinkage()) &&
+      (ND->hasAttr<WeakAttr>() || ND->isWeakImported()))
+    GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 }
 
 void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD,
@@ -1150,6 +1391,7 @@
 
   llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType());
   F->addTypeMetadata(0, MD);
+  F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType()));
 
   // Emit a hash-based bit set entry for cross-DSO calls.
   if (CodeGenOpts.SanitizeCfiCrossDso)
@@ -1159,8 +1401,7 @@
 
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
                                           bool IsIncompleteFunction,
-                                          bool IsThunk,
-                                          ForDefinition_t IsForDefinition) {
+                                          bool IsThunk) {
 
   if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) {
     // If this is an intrinsic function, set the function's attributes
@@ -1174,9 +1415,8 @@
   if (!IsIncompleteFunction) {
     SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
     // Setup target-specific attributes.
-    if (!IsForDefinition)
-      getTargetCodeGenInfo().setTargetAttributes(FD, F, *this,
-                                                 NotForDefinition);
+    if (F->isDeclaration())
+      getTargetCodeGenInfo().setTargetAttributes(FD, F, *this);
   }
 
   // Add the Returned attribute for "this", except for iOS 5 and earlier
@@ -1194,7 +1434,8 @@
   // Only a few attributes are set on declarations; these may later be
   // overridden by a definition.
 
-  setLinkageAndVisibilityForGV(F, FD);
+  setLinkageForGV(F, FD);
+  setGVProperties(F, FD);
 
   if (FD->getAttr<PragmaClangTextSectionAttr>()) {
     F->addFnAttr("implicit-section-name");
@@ -1229,6 +1470,9 @@
   // is handled with better precision by the receiving DSO.
   if (!CodeGenOpts.SanitizeCfiCrossDso)
     CreateFunctionTypeMetadata(FD, F);
+
+  if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
+    getOpenMPRuntime().emitDeclareSimdFunction(FD, F);
 }
 
 void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
@@ -1286,6 +1530,12 @@
   LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
 }
 
+void CodeGenModule::AddELFLibDirective(StringRef Lib) {
+  auto &C = getLLVMContext();
+  LinkerOptionsMetadata.push_back(llvm::MDNode::get(
+      C, {llvm::MDString::get(C, "lib"), llvm::MDString::get(C, Lib)}));
+}
+
 void CodeGenModule::AddDependentLib(StringRef Lib) {
   llvm::SmallString<24> Opt;
   getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt);
@@ -1573,7 +1823,7 @@
                                            StringRef Category) const {
   // For now globals can be blacklisted only in ASan and KASan.
   const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask &
-      (SanitizerKind::Address | SanitizerKind::KernelAddress);
+      (SanitizerKind::Address | SanitizerKind::KernelAddress | SanitizerKind::HWAddress);
   if (!EnabledAsanMask)
     return false;
   const auto &SanitizerBL = getContext().getSanitizerBlacklist();
@@ -2007,12 +2257,12 @@
 void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   const auto *D = cast<ValueDecl>(GD.getDecl());
 
-  PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(), 
+  PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(),
                                  Context.getSourceManager(),
                                  "Generating code for declaration");
-  
+
   if (isa<FunctionDecl>(D)) {
-    // At -O0, don't generate IR for functions with available_externally 
+    // At -O0, don't generate IR for functions with available_externally
     // linkage.
     if (!shouldEmitFunction(GD))
       return;
@@ -2038,13 +2288,84 @@
 
   if (const auto *VD = dyn_cast<VarDecl>(D))
     return EmitGlobalVarDefinition(VD, !VD->hasDefinition());
-  
+
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
 }
 
 static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
                                                       llvm::Function *NewFn);
 
+void CodeGenModule::emitMultiVersionFunctions() {
+  for (GlobalDecl GD : MultiVersionFuncs) {
+    SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
+    const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+    getContext().forEachMultiversionedFunctionVersion(
+        FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
+          GlobalDecl CurGD{
+              (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
+          StringRef MangledName = getMangledName(CurGD);
+          llvm::Constant *Func = GetGlobalValue(MangledName);
+          if (!Func) {
+            if (CurFD->isDefined()) {
+              EmitGlobalFunctionDefinition(CurGD, nullptr);
+              Func = GetGlobalValue(MangledName);
+            } else {
+              const CGFunctionInfo &FI =
+                  getTypes().arrangeGlobalDeclaration(GD);
+              llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+              Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
+                                       /*DontDefer=*/false, ForDefinition);
+            }
+            assert(Func && "This should have just been created");
+          }
+          Options.emplace_back(getTarget(), cast<llvm::Function>(Func),
+                               CurFD->getAttr<TargetAttr>()->parse());
+        });
+
+    llvm::Function *ResolverFunc = cast<llvm::Function>(
+        GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+    if (supportsCOMDAT())
+      ResolverFunc->setComdat(
+          getModule().getOrInsertComdat(ResolverFunc->getName()));
+    std::stable_sort(
+        Options.begin(), Options.end(),
+        std::greater<CodeGenFunction::MultiVersionResolverOption>());
+    CodeGenFunction CGF(*this);
+    CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+  }
+}
+
+/// If an ifunc for the specified mangled name is not in the module, create and
+/// return an llvm IFunc Function with the specified type.
+llvm::Constant *
+CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
+                                            StringRef MangledName,
+                                            const FunctionDecl *FD) {
+  std::string IFuncName = (MangledName + ".ifunc").str();
+  if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
+    return IFuncGV;
+
+  // Since this is the first time we've created this IFunc, make sure
+  // that we put this multiversioned function into the list to be
+  // replaced later.
+  MultiVersionFuncs.push_back(GD);
+
+  std::string ResolverName = (MangledName + ".resolver").str();
+  llvm::Type *ResolverType = llvm::FunctionType::get(
+      llvm::PointerType::get(DeclTy,
+                             Context.getTargetAddressSpace(FD->getType())),
+      false);
+  llvm::Constant *Resolver =
+      GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
+                              /*ForVTable=*/false);
+  llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
+      DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+  GIF->setName(IFuncName);
+  SetCommonAttributes(FD, GIF);
+
+  return GIF;
+}
+
 /// GetOrCreateLLVMFunction - If the specified mangled name is not in the
 /// module, create and return an llvm Function with the specified type. If there
 /// is something in the module with the specified name, return it potentially
@@ -2058,6 +2379,16 @@
     ForDefinition_t IsForDefinition) {
   const Decl *D = GD.getDecl();
 
+  // Any attempts to use a MultiVersion function should result in retrieving
+  // the iFunc instead. Name Mangling will handle the rest of the changes.
+  if (const FunctionDecl *FD = cast_or_null<FunctionDecl>(D)) {
+    if (FD->isMultiVersion() && FD->getAttr<TargetAttr>()->isDefaultVersion()) {
+      UpdateMultiVersionNames(GD, FD);
+      if (!IsForDefinition)
+        return GetOrCreateMultiVersionIFunc(GD, Ty, MangledName, FD);
+    }
+  }
+
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -2144,8 +2475,7 @@
 
   assert(F->getName() == MangledName && "name was uniqued!");
   if (D)
-    SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk,
-                          IsForDefinition);
+    SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
   if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) {
     llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex);
     F->addAttributes(llvm::AttributeList::FunctionIndex, B);
@@ -2281,7 +2611,8 @@
       F->setCallingConv(getRuntimeCC());
 
       if (!Local && getTriple().isOSBinFormatCOFF() &&
-          !getCodeGenOpts().LTOVisibilityPublicStd) {
+          !getCodeGenOpts().LTOVisibilityPublicStd &&
+          !getTriple().isWindowsGNUEnvironment()) {
         const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name);
         if (!FD || FD->hasAttr<DLLImportAttr>()) {
           F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -2430,7 +2761,7 @@
 
     GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
 
-    setLinkageAndVisibilityForGV(GV, D);
+    setLinkageForGV(GV, D);
 
     if (D->getTLSKind()) {
       if (D->getTLSKind() == VarDecl::TLS_Dynamic)
@@ -2438,6 +2769,8 @@
       setTLSMode(GV, *D);
     }
 
+    setGVProperties(GV, D);
+
     // If required by the ABI, treat declarations of static data members with
     // inline initializers as definitions.
     if (getContext().isMSStaticDataMemberInlineDefinition(D)) {
@@ -2486,7 +2819,7 @@
                   GetAddrOfGlobalVar(D, InitType, IsForDefinition));
 
               // Erase the old global, since it is no longer used.
-              cast<llvm::GlobalValue>(GV)->eraseFromParent();
+              GV->eraseFromParent();
               GV = NewGV;
             } else {
               GV->setInitializer(Init);
@@ -2543,7 +2876,7 @@
 }
 
 llvm::GlobalVariable *
-CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, 
+CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
                                       llvm::Type *Ty,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
   llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
@@ -2559,7 +2892,7 @@
     assert(GV->isDeclaration() && "Declaration has wrong type!");
     OldGV = GV;
   }
-  
+
   // Create a new variable.
   GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true,
                                 Linkage, nullptr, Name);
@@ -2567,13 +2900,13 @@
   if (OldGV) {
     // Replace occurrences of the old variable if needed.
     GV->takeName(OldGV);
-    
+
     if (!OldGV->use_empty()) {
       llvm::Constant *NewPtrForOldDecl =
       llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
       OldGV->replaceAllUsesWith(NewPtrForOldDecl);
     }
-    
+
     OldGV->eraseFromParent();
   }
 
@@ -3246,10 +3579,9 @@
   // declarations).
   auto *Fn = cast<llvm::Function>(GV);
   setFunctionLinkage(GD, Fn);
-  setFunctionDLLStorageClass(GD, Fn);
 
   // FIXME: this is redundant with part of setFunctionDefinitionAttributes
-  setGlobalVisibility(Fn, D);
+  setGVProperties(Fn, GD);
 
   MaybeHandleStaticInExternC(D, Fn);
 
@@ -3257,7 +3589,7 @@
 
   CodeGenFunction(*this).GenerateCode(D, Fn, FI);
 
-  setFunctionDefinitionAttributes(D, Fn);
+  setNonAliasAttributes(GD, Fn);
   SetLLVMFunctionAttributesForDefinition(D, Fn);
 
   if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
@@ -3341,7 +3673,7 @@
     if (VD->getTLSKind())
       setTLSMode(GA, *VD);
 
-  setAliasAttributes(D, GA);
+  SetCommonAttributes(GD, GA);
 }
 
 void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
@@ -3400,7 +3732,7 @@
   } else
     GIF->setName(MangledName);
 
-  SetCommonAttributes(D, GIF);
+  SetCommonAttributes(GD, GIF);
 }
 
 llvm::Function *CodeGenModule::getIntrinsic(unsigned IID,
@@ -3578,7 +3910,7 @@
   if (ObjCFastEnumerationStateType.isNull()) {
     RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState");
     D->startDefinition();
-    
+
     QualType FieldTypes[] = {
       Context.UnsignedLongTy,
       Context.getPointerType(Context.getObjCIdType()),
@@ -3586,7 +3918,7 @@
       Context.getConstantArrayType(Context.UnsignedLongTy,
                            llvm::APInt(32, 5), ArrayType::Normal, 0)
     };
-    
+
     for (size_t i = 0; i < 4; ++i) {
       FieldDecl *Field = FieldDecl::Create(Context,
                                            D,
@@ -3599,18 +3931,18 @@
       Field->setAccess(AS_public);
       D->addDecl(Field);
     }
-    
+
     D->completeDefinition();
     ObjCFastEnumerationStateType = Context.getTagDeclType(D);
   }
-  
+
   return ObjCFastEnumerationStateType;
 }
 
 llvm::Constant *
 CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
   assert(!E->getType()->isPointerType() && "Strings are always arrays");
-  
+
   // Don't emit it as the address of the string, emit the string data itself
   // as an inline array.
   if (E->getCharByteWidth() == 1) {
@@ -3636,11 +3968,11 @@
     Elements.resize(NumElements);
     return llvm::ConstantDataArray::get(VMContext, Elements);
   }
-  
+
   assert(ElemTy->getPrimitiveSizeInBits() == 32);
   SmallVector<uint32_t, 32> Elements;
   Elements.reserve(NumElements);
-  
+
   for(unsigned i = 0, e = E->getLength(); i != e; ++i)
     Elements.push_back(E->getCodeUnit(i));
   Elements.resize(NumElements);
@@ -3845,7 +4177,7 @@
       getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
       /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
   if (emitter) emitter->finalize(GV);
-  setGlobalVisibility(GV, VD);
+  setGVProperties(GV, VD);
   GV->setAlignment(Align.getQuantity());
   if (supportsCOMDAT() && GV->isWeakForLinker())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
@@ -3932,11 +4264,11 @@
   if (D->getNumIvarInitializers() == 0 ||
       AllTrivialInitializers(*this, D))
     return;
-  
+
   IdentifierInfo *II = &getContext().Idents.get(".cxx_construct");
   Selector cxxSelector = getContext().Selectors.getSelector(0, &II);
   // The constructor returns 'self'.
-  ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create(getContext(), 
+  ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create(getContext(),
                                                 D->getLocation(),
                                                 D->getLocation(),
                                                 cxxSelector,
@@ -3982,18 +4314,13 @@
 /// EmitTopLevelDecl - Emit code for a single top level declaration.
 void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   // Ignore dependent declarations.
-  if (D->getDeclContext() && D->getDeclContext()->isDependentContext())
+  if (D->isTemplated())
     return;
 
   switch (D->getKind()) {
   case Decl::CXXConversion:
   case Decl::CXXMethod:
   case Decl::Function:
-    // Skip function templates
-    if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() ||
-        cast<FunctionDecl>(D)->isLateTemplateParsed())
-      return;
-
     EmitGlobal(cast<FunctionDecl>(D));
     // Always provide some coverage mapping
     // even for the functions that aren't emitted.
@@ -4006,10 +4333,6 @@
 
   case Decl::Var:
   case Decl::Decomposition:
-    // Skip variable templates
-    if (cast<VarDecl>(D)->getDescribedVarTemplate())
-      return;
-    LLVM_FALLTHROUGH;
   case Decl::VarTemplateSpecialization:
     EmitGlobal(cast<VarDecl>(D));
     if (auto *DD = dyn_cast<DecompositionDecl>(D))
@@ -4027,6 +4350,13 @@
   case Decl::Namespace:
     EmitDeclContext(cast<NamespaceDecl>(D));
     break;
+  case Decl::ClassTemplateSpecialization: {
+    const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
+    if (DebugInfo &&
+        Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition &&
+        Spec->hasDefinition())
+      DebugInfo->completeTemplateDefinition(*Spec);
+  } LLVM_FALLTHROUGH;
   case Decl::CXXRecord:
     if (DebugInfo) {
       if (auto *ES = D->getASTContext().getExternalSource())
@@ -4061,16 +4391,9 @@
       DI->EmitUsingDirective(cast<UsingDirectiveDecl>(*D));
     return;
   case Decl::CXXConstructor:
-    // Skip function templates
-    if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() ||
-        cast<FunctionDecl>(D)->isLateTemplateParsed())
-      return;
-      
     getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D));
     break;
   case Decl::CXXDestructor:
-    if (cast<FunctionDecl>(D)->isLateTemplateParsed())
-      return;
     getCXXABI().EmitCXXDestructors(cast<CXXDestructorDecl>(D));
     break;
 
@@ -4091,7 +4414,7 @@
       ObjCRuntime->GenerateProtocol(Proto);
     break;
   }
-      
+
   case Decl::ObjCCategoryImpl:
     // Categories have properties but don't support synthesize so we
     // can ignore them here.
@@ -4130,7 +4453,11 @@
       AppendLinkerOptions(PCD->getArg());
       break;
     case PCK_Lib:
-      AddDependentLib(PCD->getArg());
+      if (getTarget().getTriple().isOSBinFormatELF() &&
+          !getTarget().getTriple().isPS4())
+        AddELFLibDirective(PCD->getArg());
+      else
+        AddDependentLib(PCD->getArg());
       break;
     case PCK_Compiler:
     case PCK_ExeStr:
@@ -4213,15 +4540,6 @@
     EmitOMPThreadPrivateDecl(cast<OMPThreadPrivateDecl>(D));
     break;
 
-  case Decl::ClassTemplateSpecialization: {
-    const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
-    if (DebugInfo &&
-        Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition &&
-        Spec->hasDefinition())
-      DebugInfo->completeTemplateDefinition(*Spec);
-    break;
-  }
-
   case Decl::OMPDeclareReduction:
     EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D));
     break;
@@ -4277,20 +4595,14 @@
 }
 
 void CodeGenModule::EmitDeferredUnusedCoverageMappings() {
-  std::vector<const Decl *> DeferredDecls;
-  for (const auto &I : DeferredEmptyCoverageMappingDecls) {
-    if (!I.second)
+  // We call takeVector() here to avoid use-after-free.
+  // FIXME: DeferredEmptyCoverageMappingDecls is getting mutated because
+  // we deserialize function bodies to emit coverage info for them, and that
+  // deserializes more declarations. How should we handle that case?
+  for (const auto &Entry : DeferredEmptyCoverageMappingDecls.takeVector()) {
+    if (!Entry.second)
       continue;
-    DeferredDecls.push_back(I.first);
-  }
-  // Sort the declarations by their location to make sure that the tests get a
-  // predictable order for the coverage mapping for the unused declarations.
-  if (CodeGenOpts.DumpCoverageMapping)
-    std::sort(DeferredDecls.begin(), DeferredDecls.end(),
-              [] (const Decl *LHS, const Decl *RHS) {
-      return LHS->getLocStart() < RHS->getLocStart();
-    });
-  for (const auto *D : DeferredDecls) {
+    const Decl *D = Entry.first;
     switch (D->getKind()) {
     case Decl::CXXConversion:
     case Decl::CXXMethod:
@@ -4499,7 +4811,7 @@
   // and it's not for EH?
   if (!ForEH && !getLangOpts().RTTI)
     return llvm::Constant::getNullValue(Int8PtrTy);
-  
+
   if (ForEH && Ty->isObjCObjectPointerType() &&
       LangOpts.ObjCRuntime.isGNUFamily())
     return ObjCRuntime->GetEHType(Ty);
@@ -4508,6 +4820,9 @@
 }
 
 void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
+  // Do not emit threadprivates in simd-only mode.
+  if (LangOpts.OpenMP && LangOpts.OpenMPSimd)
+    return;
   for (auto RefExpr : D->varlists()) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl());
     bool PerformInit =
@@ -4541,6 +4856,60 @@
   return InternalId;
 }
 
+// Generalize pointer types to a void pointer with the qualifiers of the
+// originally pointed-to type, e.g. 'const char *' and 'char * const *'
+// generalize to 'const void *' while 'char *' and 'const char **' generalize to
+// 'void *'.
+static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) {
+  if (!Ty->isPointerType())
+    return Ty;
+
+  return Ctx.getPointerType(
+      QualType(Ctx.VoidTy).withCVRQualifiers(
+          Ty->getPointeeType().getCVRQualifiers()));
+}
+
+// Apply type generalization to a FunctionType's return and argument types
+static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) {
+  if (auto *FnType = Ty->getAs<FunctionProtoType>()) {
+    SmallVector<QualType, 8> GeneralizedParams;
+    for (auto &Param : FnType->param_types())
+      GeneralizedParams.push_back(GeneralizeType(Ctx, Param));
+
+    return Ctx.getFunctionType(
+        GeneralizeType(Ctx, FnType->getReturnType()),
+        GeneralizedParams, FnType->getExtProtoInfo());
+  }
+
+  if (auto *FnType = Ty->getAs<FunctionNoProtoType>())
+    return Ctx.getFunctionNoProtoType(
+        GeneralizeType(Ctx, FnType->getReturnType()));
+
+  llvm_unreachable("Encountered unknown FunctionType");
+}
+
+llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) {
+  T = GeneralizeFunctionType(getContext(), T);
+
+  llvm::Metadata *&InternalId = GeneralizedMetadataIdMap[T.getCanonicalType()];
+  if (InternalId)
+    return InternalId;
+
+  if (isExternallyVisible(T->getLinkage())) {
+    std::string OutName;
+    llvm::raw_string_ostream Out(OutName);
+    getCXXABI().getMangleContext().mangleTypeName(T, Out);
+    Out << ".generalized";
+
+    InternalId = llvm::MDString::get(getLLVMContext(), Out.str());
+  } else {
+    InternalId = llvm::MDNode::getDistinct(getLLVMContext(),
+                                           llvm::ArrayRef<llvm::Metadata *>());
+  }
+
+  return InternalId;
+}
+
 /// Returns whether this module needs the "all-vtables" type identifier.
 bool CodeGenModule::NeedAllVtablesTypeId() const {
   // Returns true if at least one of vtable-based CFI checkers is enabled and
@@ -4582,14 +4951,23 @@
     // If we have a TargetAttr build up the feature map based on that.
     TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
 
+    ParsedAttr.Features.erase(
+        llvm::remove_if(ParsedAttr.Features,
+                        [&](const std::string &Feat) {
+                          return !Target.isValidFeatureName(
+                              StringRef{Feat}.substr(1));
+                        }),
+        ParsedAttr.Features.end());
+
     // Make a copy of the features as passed on the command line into the
     // beginning of the additional features from the function to override.
     ParsedAttr.Features.insert(ParsedAttr.Features.begin(),
                             Target.getTargetOpts().FeaturesAsWritten.begin(),
                             Target.getTargetOpts().FeaturesAsWritten.end());
 
-    if (ParsedAttr.Architecture != "")
-      TargetCPU = ParsedAttr.Architecture ;
+    if (ParsedAttr.Architecture != "" &&
+        Target.isValidCPUName(ParsedAttr.Architecture))
+      TargetCPU = ParsedAttr.Architecture;
 
     // Now populate the feature map, first with the TargetCPU which is either
     // the default or a new one from the target attribute string. Then we'll use
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index ebc1ec6..fbdc4c1 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -324,6 +324,10 @@
   /// is defined once we get to the end of the of the translation unit.
   std::vector<GlobalDecl> Aliases;
 
+  /// List of multiversion functions that have to be emitted.  Used to make sure
+  /// we properly emit the iFunc.
+  std::vector<GlobalDecl> MultiVersionFuncs;
+
   typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy;
   ReplacementsTy Replacements;
 
@@ -490,14 +494,16 @@
 
   /// @}
 
-  llvm::DenseMap<const Decl *, bool> DeferredEmptyCoverageMappingDecls;
+  llvm::MapVector<const Decl *, bool> DeferredEmptyCoverageMappingDecls;
 
   std::unique_ptr<CoverageMappingModuleGen> CoverageMapping;
 
   /// Mapping from canonical types to their metadata identifiers. We need to
   /// maintain this mapping because identifiers may be formed from distinct
   /// MDNodes.
-  llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
+  typedef llvm::DenseMap<QualType, llvm::Metadata *> MetadataTypeMap;
+  MetadataTypeMap MetadataIdMap;
+  MetadataTypeMap GeneralizedMetadataIdMap;
 
 public:
   CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
@@ -662,7 +668,7 @@
 
   /// getTBAAVTablePtrAccessInfo - Get the TBAA information that describes an
   /// access to a virtual table pointer.
-  TBAAAccessInfo getTBAAVTablePtrAccessInfo();
+  TBAAAccessInfo getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType);
 
   llvm::MDNode *getTBAAStructInfo(QualType QTy);
 
@@ -673,15 +679,29 @@
   /// getTBAAAccessTagInfo - Get TBAA tag for a given memory access.
   llvm::MDNode *getTBAAAccessTagInfo(TBAAAccessInfo Info);
 
-  /// getTBAAMayAliasAccessInfo - Get TBAA information that represents
-  /// may-alias accesses.
-  TBAAAccessInfo getTBAAMayAliasAccessInfo();
-
   /// mergeTBAAInfoForCast - Get merged TBAA information for the purposes of
   /// type casts.
   TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
                                       TBAAAccessInfo TargetInfo);
 
+  /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the
+  /// purposes of conditional operator.
+  TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                     TBAAAccessInfo InfoB);
+
+  /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the
+  /// purposes of memory transfer calls.
+  TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                                TBAAAccessInfo SrcInfo);
+
+  /// getTBAAInfoForSubobject - Get TBAA information for an access with a given
+  /// base lvalue.
+  TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) {
+    if (Base.getTBAAInfo().isMayAlias())
+      return TBAAAccessInfo::getMayAliasInfo();
+    return getTBAAAccessInfo(AccessType);
+  }
+
   bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor);
 
   bool isPaddedAtomicType(QualType type);
@@ -701,6 +721,18 @@
   /// Set the visibility for the given LLVM GlobalValue.
   void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const;
 
+  void setGlobalVisibilityAndLocal(llvm::GlobalValue *GV,
+                                   const NamedDecl *D) const;
+
+  void setDSOLocal(llvm::GlobalValue *GV) const;
+
+  void setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl D) const;
+  void setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const;
+  /// Set visibility, dllimport/dllexport and dso_local.
+  /// This must be called after dllimport/dllexport is set.
+  void setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const;
+  void setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const;
+
   /// Set the TLS mode for the given LLVM GlobalValue for the thread-local
   /// variable declaration D.
   void setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const;
@@ -990,7 +1022,7 @@
   /// Set the attributes on the LLVM function for the given decl and function
   /// info. This applies attributes necessary for handling the ABI as well as
   /// user specified attributes like section.
-  void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F,
+  void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F,
                                      const CGFunctionInfo &FI);
 
   /// Set the LLVM function attributes (sext, zext, etc).
@@ -1073,15 +1105,14 @@
   /// value.
   void AddDependentLib(StringRef Lib);
 
+  void AddELFLibDirective(StringRef Lib);
+
   llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
 
   void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) {
     F->setLinkage(getFunctionLinkage(GD));
   }
 
-  /// Set the DLL storage class on F.
-  void setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F);
-
   /// Return the appropriate linkage for the vtable, VTT, and type information
   /// of the given class.
   llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD);
@@ -1153,23 +1184,13 @@
   bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target);
   bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D);
 
-  /// Set attributes for a global definition.
-  void setFunctionDefinitionAttributes(const FunctionDecl *D,
-                                       llvm::Function *F);
-
   llvm::GlobalValue *GetGlobalValue(StringRef Ref);
 
   /// Set attributes which are common to any form of a global definition (alias,
   /// Objective-C method, function, global variable).
   ///
   /// NOTE: This should only be called for definitions.
-  void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV);
-
-  /// Set attributes which must be preserved by an alias. This includes common
-  /// attributes (i.e. it includes a call to SetCommonAttributes).
-  ///
-  /// NOTE: This should only be called for definitions.
-  void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV);
+  void SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV);
 
   void addReplacement(StringRef Name, llvm::Constant *C);
 
@@ -1200,6 +1221,11 @@
   /// internal identifiers).
   llvm::Metadata *CreateMetadataIdentifierForType(QualType T);
 
+  /// Create a metadata identifier for the generalization of the given type.
+  /// This may either be an MDString (for external identifiers) or a distinct
+  /// unnamed MDNode (for internal identifiers).
+  llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
+
   /// Create and attach type metadata to the given function.
   void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F);
 
@@ -1230,18 +1256,25 @@
       llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
       ForDefinition_t IsForDefinition = NotForDefinition);
 
+  llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD,
+                                               llvm::Type *DeclTy,
+                                               StringRef MangledName,
+                                               const FunctionDecl *FD);
+  void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
+
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
                                         const VarDecl *D,
                                         ForDefinition_t IsForDefinition
                                           = NotForDefinition);
 
-  void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
+  bool GetCPUAndFeaturesAttributes(const Decl *D,
+                                   llvm::AttrBuilder &AttrBuilder);
+  void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
 
   /// Set function attributes for a function declaration.
   void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
-                             bool IsIncompleteFunction, bool IsThunk,
-                             ForDefinition_t IsForDefinition);
+                             bool IsIncompleteFunction, bool IsThunk);
 
   void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
@@ -1302,6 +1335,8 @@
 
   void checkAliases();
 
+  void emitMultiVersionFunctions();
+
   /// Emit any vtables which we deferred and still have a use for.
   void EmitDeferredVTables();
 
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index c3d66c1..295893c 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -22,9 +22,10 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MD5.h"
 
-static llvm::cl::opt<bool> EnableValueProfiling(
-  "enable-value-profiling", llvm::cl::ZeroOrMore,
-  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
+                         llvm::cl::desc("Enable value profiling"),
+                         llvm::cl::Hidden, llvm::cl::init(false));
 
 using namespace clang;
 using namespace CodeGen;
@@ -47,6 +48,15 @@
   llvm::createPGOFuncNameMetadata(*Fn, FuncName);
 }
 
+/// The version of the PGO hash algorithm.
+enum PGOHashVersion : unsigned {
+  PGO_HASH_V1,
+  PGO_HASH_V2,
+
+  // Keep this set to the latest hash version.
+  PGO_HASH_LATEST = PGO_HASH_V2
+};
+
 namespace {
 /// \brief Stable hasher for PGO region counters.
 ///
@@ -61,6 +71,7 @@
 class PGOHash {
   uint64_t Working;
   unsigned Count;
+  PGOHashVersion HashVersion;
   llvm::MD5 MD5;
 
   static const int NumBitsPerType = 6;
@@ -93,24 +104,53 @@
     BinaryOperatorLAnd,
     BinaryOperatorLOr,
     BinaryConditionalOperator,
+    // The preceding values are available with PGO_HASH_V1.
+
+    EndOfScope,
+    IfThenBranch,
+    IfElseBranch,
+    GotoStmt,
+    IndirectGotoStmt,
+    BreakStmt,
+    ContinueStmt,
+    ReturnStmt,
+    ThrowExpr,
+    UnaryOperatorLNot,
+    BinaryOperatorLT,
+    BinaryOperatorGT,
+    BinaryOperatorLE,
+    BinaryOperatorGE,
+    BinaryOperatorEQ,
+    BinaryOperatorNE,
+    // The preceding values are available with PGO_HASH_V2.
 
     // Keep this last.  It's for the static assert that follows.
     LastHashType
   };
   static_assert(LastHashType <= TooBig, "Too many types in HashType");
 
-  // TODO: When this format changes, take in a version number here, and use the
-  // old hash calculation for file formats that used the old hash.
-  PGOHash() : Working(0), Count(0) {}
+  PGOHash(PGOHashVersion HashVersion)
+      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
   void combine(HashType Type);
   uint64_t finalize();
+  PGOHashVersion getHashVersion() const { return HashVersion; }
 };
 const int PGOHash::NumBitsPerType;
 const unsigned PGOHash::NumTypesPerWord;
 const unsigned PGOHash::TooBig;
 
+/// Get the PGO hash version used in the given indexed profile.
+static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
+                                        CodeGenModule &CGM) {
+  if (PGOReader->getVersion() <= 4)
+    return PGO_HASH_V1;
+  return PGO_HASH_V2;
+}
+
 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
+  using Base = RecursiveASTVisitor<MapRegionCounters>;
+
   /// The next counter value to assign.
   unsigned NextCounter;
   /// The function hash.
@@ -118,8 +158,9 @@
   /// The map of statements to counters.
   llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
 
-  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
-      : NextCounter(0), CounterMap(CounterMap) {}
+  MapRegionCounters(PGOHashVersion HashVersion,
+                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
+      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
 
   // Blocks and lambdas are handled as separate functions, so we need not
   // traverse them in the parent context.
@@ -145,16 +186,66 @@
     return true;
   }
 
-  bool VisitStmt(const Stmt *S) {
-    auto Type = getHashType(S);
-    if (Type == PGOHash::None)
-      return true;
+  /// If \p S gets a fresh counter, update the counter mappings. Return the
+  /// V1 hash of \p S.
+  PGOHash::HashType updateCounterMappings(Stmt *S) {
+    auto Type = getHashType(PGO_HASH_V1, S);
+    if (Type != PGOHash::None)
+      CounterMap[S] = NextCounter++;
+    return Type;
+  }
 
-    CounterMap[S] = NextCounter++;
-    Hash.combine(Type);
+  /// Include \p S in the function hash.
+  bool VisitStmt(Stmt *S) {
+    auto Type = updateCounterMappings(S);
+    if (Hash.getHashVersion() != PGO_HASH_V1)
+      Type = getHashType(Hash.getHashVersion(), S);
+    if (Type != PGOHash::None)
+      Hash.combine(Type);
     return true;
   }
-  PGOHash::HashType getHashType(const Stmt *S) {
+
+  bool TraverseIfStmt(IfStmt *If) {
+    // If we used the V1 hash, use the default traversal.
+    if (Hash.getHashVersion() == PGO_HASH_V1)
+      return Base::TraverseIfStmt(If);
+
+    // Otherwise, keep track of which branch we're in while traversing.
+    VisitStmt(If);
+    for (Stmt *CS : If->children()) {
+      if (!CS)
+        continue;
+      if (CS == If->getThen())
+        Hash.combine(PGOHash::IfThenBranch);
+      else if (CS == If->getElse())
+        Hash.combine(PGOHash::IfElseBranch);
+      TraverseStmt(CS);
+    }
+    Hash.combine(PGOHash::EndOfScope);
+    return true;
+  }
+
+// If the statement type \p N is nestable, and its nesting impacts profile
+// stability, define a custom traversal which tracks the end of the statement
+// in the hash (provided we're not using the V1 hash).
+#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
+  bool Traverse##N(N *S) {                                                     \
+    Base::Traverse##N(S);                                                      \
+    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
+      Hash.combine(PGOHash::EndOfScope);                                       \
+    return true;                                                               \
+  }
+
+  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
+  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
+  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
+  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
+  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
+  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
+  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
+
+  /// Get version \p HashVersion of the PGO hash for \p S.
+  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
     switch (S->getStmtClass()) {
     default:
       break;
@@ -192,9 +283,53 @@
         return PGOHash::BinaryOperatorLAnd;
       if (BO->getOpcode() == BO_LOr)
         return PGOHash::BinaryOperatorLOr;
+      if (HashVersion == PGO_HASH_V2) {
+        switch (BO->getOpcode()) {
+        default:
+          break;
+        case BO_LT:
+          return PGOHash::BinaryOperatorLT;
+        case BO_GT:
+          return PGOHash::BinaryOperatorGT;
+        case BO_LE:
+          return PGOHash::BinaryOperatorLE;
+        case BO_GE:
+          return PGOHash::BinaryOperatorGE;
+        case BO_EQ:
+          return PGOHash::BinaryOperatorEQ;
+        case BO_NE:
+          return PGOHash::BinaryOperatorNE;
+        }
+      }
       break;
     }
     }
+
+    if (HashVersion == PGO_HASH_V2) {
+      switch (S->getStmtClass()) {
+      default:
+        break;
+      case Stmt::GotoStmtClass:
+        return PGOHash::GotoStmt;
+      case Stmt::IndirectGotoStmtClass:
+        return PGOHash::IndirectGotoStmt;
+      case Stmt::BreakStmtClass:
+        return PGOHash::BreakStmt;
+      case Stmt::ContinueStmtClass:
+        return PGOHash::ContinueStmt;
+      case Stmt::ReturnStmtClass:
+        return PGOHash::ReturnStmt;
+      case Stmt::CXXThrowExprClass:
+        return PGOHash::ThrowExpr;
+      case Stmt::UnaryOperatorClass: {
+        const UnaryOperator *UO = cast<UnaryOperator>(S);
+        if (UO->getOpcode() == UO_LNot)
+          return PGOHash::UnaryOperatorLNot;
+        break;
+      }
+      }
+    }
+
     return PGOHash::None;
   }
 };
@@ -653,8 +788,14 @@
 }
 
 void CodeGenPGO::mapRegionCounters(const Decl *D) {
+  // Use the latest hash version when inserting instrumentation, but use the
+  // version in the indexed profile if we're reading PGO data.
+  PGOHashVersion HashVersion = PGO_HASH_LATEST;
+  if (auto *PGOReader = CGM.getPGOReader())
+    HashVersion = getPGOHashVersion(PGOReader, CGM);
+
   RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
-  MapRegionCounters Walker(*RegionCounterMap);
+  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
     Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
   else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index 6f9747e..ec48231 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -25,16 +25,18 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 using namespace clang;
 using namespace CodeGen;
 
-CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext& VMContext,
+CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M,
                          const CodeGenOptions &CGO,
                          const LangOptions &Features, MangleContext &MContext)
-  : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext),
-    MDHelper(VMContext), Root(nullptr), Char(nullptr) {
-}
+  : Context(Ctx), Module(M), CodeGenOpts(CGO),
+    Features(Features), MContext(MContext), MDHelper(M.getContext()),
+    Root(nullptr), Char(nullptr)
+{}
 
 CodeGenTBAA::~CodeGenTBAA() {
 }
@@ -54,10 +56,13 @@
   return Root;
 }
 
-// For both scalar TBAA and struct-path aware TBAA, the scalar type has the
-// same format: name, parent node, and offset.
-llvm::MDNode *CodeGenTBAA::createTBAAScalarType(StringRef Name,
-                                                llvm::MDNode *Parent) {
+llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name,
+                                                llvm::MDNode *Parent,
+                                                uint64_t Size) {
+  if (CodeGenOpts.NewStructPathTBAA) {
+    llvm::Metadata *Id = MDHelper.createString(Name);
+    return MDHelper.createTBAATypeNode(Parent, Size, Id);
+  }
   return MDHelper.createTBAAScalarTypeNode(Name, Parent);
 }
 
@@ -67,7 +72,7 @@
   // these special powers only cover user-accessible memory, and doesn't
   // include things like vtables.
   if (!Char)
-    Char = createTBAAScalarType("omnipotent char", getRoot());
+    Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1);
 
   return Char;
 }
@@ -88,20 +93,27 @@
   return false;
 }
 
-llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
-  // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
-  if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
-    return nullptr;
+/// Check if the given type is a valid base type to be used in access tags.
+static bool isValidBaseType(QualType QTy) {
+  if (QTy->isReferenceType())
+    return false;
+  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
+    const RecordDecl *RD = TTy->getDecl()->getDefinition();
+    // Incomplete types are not valid base access types.
+    if (!RD)
+      return false;
+    if (RD->hasFlexibleArrayMember())
+      return false;
+    // RD can be struct, union, class, interface or enum.
+    // For now, we only handle struct and class.
+    if (RD->isStruct() || RD->isClass())
+      return true;
+  }
+  return false;
+}
 
-  // If the type has the may_alias attribute (even on a typedef), it is
-  // effectively in the general char alias class.
-  if (TypeHasMayAlias(QTy))
-    return getChar();
-
-  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
-
-  if (llvm::MDNode *N = MetadataCache[Ty])
-    return N;
+llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
+  uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
 
   // Handle builtin types.
   if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) {
@@ -133,8 +145,7 @@
     // treating wchar_t, char16_t, and char32_t as distinct from their
     // "underlying types".
     default:
-      return MetadataCache[Ty] =
-        createTBAAScalarType(BTy->getName(Features), getChar());
+      return createScalarTypeNode(BTy->getName(Features), getChar(), Size);
     }
   }
 
@@ -142,14 +153,17 @@
   // an object through a glvalue of other than one of the following types the
   // behavior is undefined: [...] a char, unsigned char, or std::byte type."
   if (Ty->isStdByteType())
-    return MetadataCache[Ty] = getChar();
+    return getChar();
 
   // Handle pointers and references.
   // TODO: Implement C++'s type "similarity" and consider dis-"similar"
   // pointers distinct.
   if (Ty->isPointerType() || Ty->isReferenceType())
-    return MetadataCache[Ty] = createTBAAScalarType("any pointer",
-                                                    getChar());
+    return createScalarTypeNode("any pointer", getChar(), Size);
+
+  // Accesses to arrays are accesses to objects of their element types.
+  if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType())
+    return getTypeInfo(cast<ArrayType>(Ty)->getElementType());
 
   // Enum types are distinct types. In C++ they have "underlying types",
   // however they aren't related for TBAA.
@@ -159,20 +173,66 @@
     // TODO: Is there a way to get a program-wide unique name for a
     // decl with local linkage or no linkage?
     if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible())
-      return MetadataCache[Ty] = getChar();
+      return getChar();
 
     SmallString<256> OutName;
     llvm::raw_svector_ostream Out(OutName);
     MContext.mangleTypeName(QualType(ETy, 0), Out);
-    return MetadataCache[Ty] = createTBAAScalarType(OutName, getChar());
+    return createScalarTypeNode(OutName, getChar(), Size);
   }
 
   // For now, handle any other kind of type conservatively.
-  return MetadataCache[Ty] = getChar();
+  return getChar();
 }
 
-TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo() {
-  return TBAAAccessInfo(createTBAAScalarType("vtable pointer", getRoot()));
+llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
+  // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
+  if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
+    return nullptr;
+
+  // If the type has the may_alias attribute (even on a typedef), it is
+  // effectively in the general char alias class.
+  if (TypeHasMayAlias(QTy))
+    return getChar();
+
+  // We need this function to not fall back to returning the "omnipotent char"
+  // type node for aggregate and union types. Otherwise, any dereference of an
+  // aggregate will result into the may-alias access descriptor, meaning all
+  // subsequent accesses to direct and indirect members of that aggregate will
+  // be considered may-alias too.
+  // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function.
+  if (isValidBaseType(QTy))
+    return getBaseTypeInfo(QTy);
+
+  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
+  if (llvm::MDNode *N = MetadataCache[Ty])
+    return N;
+
+  // Note that the following helper call is allowed to add new nodes to the
+  // cache, which invalidates all its previously obtained iterators. So we
+  // first generate the node for the type and then add that node to the cache.
+  llvm::MDNode *TypeNode = getTypeInfoHelper(Ty);
+  return MetadataCache[Ty] = TypeNode;
+}
+
+TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) {
+  // Pointee values may have incomplete types, but they shall never be
+  // dereferenced.
+  if (AccessType->isIncompleteType())
+    return TBAAAccessInfo::getIncompleteInfo();
+
+  if (TypeHasMayAlias(AccessType))
+    return TBAAAccessInfo::getMayAliasInfo();
+
+  uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity();
+  return TBAAAccessInfo(getTypeInfo(AccessType), Size);
+}
+
+TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
+  llvm::DataLayout DL(&Module);
+  unsigned Size = DL.getPointerTypeSize(VTablePtrType);
+  return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size),
+                        Size);
 }
 
 bool
@@ -212,7 +272,7 @@
   uint64_t Offset = BaseOffset;
   uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity();
   llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy);
-  llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType));
+  llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
   Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
   return true;
 }
@@ -232,43 +292,23 @@
   return StructMetadataCache[Ty] = nullptr;
 }
 
-/// Check if the given type is a valid base type to be used in access tags.
-static bool isValidBaseType(QualType QTy) {
-  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
+llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
+  if (auto *TTy = dyn_cast<RecordType>(Ty)) {
     const RecordDecl *RD = TTy->getDecl()->getDefinition();
-    if (RD->hasFlexibleArrayMember())
-      return false;
-    // RD can be struct, union, class, interface or enum.
-    // For now, we only handle struct and class.
-    if (RD->isStruct() || RD->isClass())
-      return true;
-  }
-  return false;
-}
-
-llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
-  if (!isValidBaseType(QTy))
-    return nullptr;
-
-  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
-  if (llvm::MDNode *N = BaseTypeMetadataCache[Ty])
-    return N;
-
-  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
-    const RecordDecl *RD = TTy->getDecl()->getDefinition();
-
     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
-    SmallVector <std::pair<llvm::MDNode*, uint64_t>, 4> Fields;
-    unsigned idx = 0;
-    for (RecordDecl::field_iterator i = RD->field_begin(),
-         e = RD->field_end(); i != e; ++i, ++idx) {
-      QualType FieldQTy = i->getType();
-      llvm::MDNode *FieldNode = isValidBaseType(FieldQTy) ?
+    SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
+    for (FieldDecl *Field : RD->fields()) {
+      QualType FieldQTy = Field->getType();
+      llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
           getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
-      if (!FieldNode)
+      if (!TypeNode)
         return BaseTypeMetadataCache[Ty] = nullptr;
-      Fields.push_back(std::make_pair(
-          FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth()));
+
+      uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
+      uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
+      uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
+      Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
+                                                        TypeNode));
     }
 
     SmallString<256> OutName;
@@ -279,20 +319,50 @@
     } else {
       OutName = RD->getName();
     }
+
+    if (CodeGenOpts.NewStructPathTBAA) {
+      llvm::MDNode *Parent = getChar();
+      uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
+      llvm::Metadata *Id = MDHelper.createString(OutName);
+      return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields);
+    }
+
     // Create the struct type node with a vector of pairs (offset, type).
-    return BaseTypeMetadataCache[Ty] =
-      MDHelper.createTBAAStructTypeNode(OutName, Fields);
+    SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes;
+    for (const auto &Field : Fields)
+        OffsetsAndTypes.push_back(std::make_pair(Field.Type, Field.Offset));
+    return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes);
   }
 
-  return BaseTypeMetadataCache[Ty] = nullptr;
+  return nullptr;
+}
+
+llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
+  if (!isValidBaseType(QTy))
+    return nullptr;
+
+  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
+  if (llvm::MDNode *N = BaseTypeMetadataCache[Ty])
+    return N;
+
+  // Note that the following helper call is allowed to add new nodes to the
+  // cache, which invalidates all its previously obtained iterators. So we
+  // first generate the node for the type and then add that node to the cache.
+  llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
+  return BaseTypeMetadataCache[Ty] = TypeNode;
 }
 
 llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
+  assert(!Info.isIncomplete() && "Access to an object of an incomplete type!");
+
+  if (Info.isMayAlias())
+    Info = TBAAAccessInfo(getChar(), Info.Size);
+
   if (!Info.AccessType)
     return nullptr;
 
   if (!CodeGenOpts.StructPathTBAA)
-    Info = TBAAAccessInfo(Info.AccessType);
+    Info = TBAAAccessInfo(Info.AccessType, Info.Size);
 
   llvm::MDNode *&N = AccessTagMetadataCache[Info];
   if (N)
@@ -302,18 +372,53 @@
     Info.BaseType = Info.AccessType;
     assert(!Info.Offset && "Nonzero offset for an access with no base type!");
   }
+  if (CodeGenOpts.NewStructPathTBAA) {
+    return N = MDHelper.createTBAAAccessTag(Info.BaseType, Info.AccessType,
+                                            Info.Offset, Info.Size);
+  }
   return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType,
                                               Info.Offset);
 }
 
-TBAAAccessInfo CodeGenTBAA::getMayAliasAccessInfo() {
-  return TBAAAccessInfo(getChar());
-}
-
 TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
                                                  TBAAAccessInfo TargetInfo) {
-  TBAAAccessInfo MayAliasInfo = getMayAliasAccessInfo();
-  if (SourceInfo == MayAliasInfo || TargetInfo == MayAliasInfo)
-    return MayAliasInfo;
+  if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias())
+    return TBAAAccessInfo::getMayAliasInfo();
   return TargetInfo;
 }
+
+TBAAAccessInfo
+CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                 TBAAAccessInfo InfoB) {
+  if (InfoA == InfoB)
+    return InfoA;
+
+  if (!InfoA || !InfoB)
+    return TBAAAccessInfo();
+
+  if (InfoA.isMayAlias() || InfoB.isMayAlias())
+    return TBAAAccessInfo::getMayAliasInfo();
+
+  // TODO: Implement the rest of the logic here. For example, two accesses
+  // with same final access types result in an access to an object of that final
+  // access type regardless of their base types.
+  return TBAAAccessInfo::getMayAliasInfo();
+}
+
+TBAAAccessInfo
+CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                            TBAAAccessInfo SrcInfo) {
+  if (DestInfo == SrcInfo)
+    return DestInfo;
+
+  if (!DestInfo || !SrcInfo)
+    return TBAAAccessInfo();
+
+  if (DestInfo.isMayAlias() || SrcInfo.isMayAlias())
+    return TBAAAccessInfo::getMayAliasInfo();
+
+  // TODO: Implement the rest of the logic here. For example, two accesses
+  // with same final access types result in an access to an object of that final
+  // access type regardless of their base types.
+  return TBAAAccessInfo::getMayAliasInfo();
+}
diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h
index 7f499da..86ba407 100644
--- a/lib/CodeGen/CodeGenTBAA.h
+++ b/lib/CodeGen/CodeGenTBAA.h
@@ -32,27 +32,70 @@
 namespace CodeGen {
 class CGRecordLayout;
 
+// TBAAAccessKind - A kind of TBAA memory access descriptor.
+enum class TBAAAccessKind : unsigned {
+  Ordinary,
+  MayAlias,
+  Incomplete,
+};
+
 // TBAAAccessInfo - Describes a memory access in terms of TBAA.
 struct TBAAAccessInfo {
-  TBAAAccessInfo(llvm::MDNode *BaseType, llvm::MDNode *AccessType,
-                 uint64_t Offset)
-    : BaseType(BaseType), AccessType(AccessType), Offset(Offset)
+  TBAAAccessInfo(TBAAAccessKind Kind, llvm::MDNode *BaseType,
+                 llvm::MDNode *AccessType, uint64_t Offset, uint64_t Size)
+    : Kind(Kind), BaseType(BaseType), AccessType(AccessType),
+      Offset(Offset), Size(Size)
   {}
 
-  explicit TBAAAccessInfo(llvm::MDNode *AccessType)
-    : TBAAAccessInfo(/* BaseType= */ nullptr, AccessType, /* Offset= */ 0)
+  TBAAAccessInfo(llvm::MDNode *BaseType, llvm::MDNode *AccessType,
+                 uint64_t Offset, uint64_t Size)
+    : TBAAAccessInfo(TBAAAccessKind::Ordinary, BaseType, AccessType,
+                     Offset, Size)
+  {}
+
+  explicit TBAAAccessInfo(llvm::MDNode *AccessType, uint64_t Size)
+    : TBAAAccessInfo(/* BaseType= */ nullptr, AccessType, /* Offset= */ 0, Size)
   {}
 
   TBAAAccessInfo()
-    : TBAAAccessInfo(/* AccessType= */ nullptr)
+    : TBAAAccessInfo(/* AccessType= */ nullptr, /* Size= */ 0)
   {}
 
-  bool operator==(const TBAAAccessInfo &Other) const {
-    return BaseType == Other.BaseType &&
-           AccessType == Other.AccessType &&
-           Offset == Other.Offset;
+  static TBAAAccessInfo getMayAliasInfo() {
+    return TBAAAccessInfo(TBAAAccessKind::MayAlias,
+                          /* BaseType= */ nullptr, /* AccessType= */ nullptr,
+                          /* Offset= */ 0, /* Size= */ 0);
   }
 
+  bool isMayAlias() const { return Kind == TBAAAccessKind::MayAlias; }
+
+  static TBAAAccessInfo getIncompleteInfo() {
+    return TBAAAccessInfo(TBAAAccessKind::Incomplete,
+                          /* BaseType= */ nullptr, /* AccessType= */ nullptr,
+                          /* Offset= */ 0, /* Size= */ 0);
+  }
+
+  bool isIncomplete() const { return Kind == TBAAAccessKind::Incomplete; }
+
+  bool operator==(const TBAAAccessInfo &Other) const {
+    return Kind == Other.Kind &&
+           BaseType == Other.BaseType &&
+           AccessType == Other.AccessType &&
+           Offset == Other.Offset &&
+           Size == Other.Size;
+  }
+
+  bool operator!=(const TBAAAccessInfo &Other) const {
+    return !(*this == Other);
+  }
+
+  explicit operator bool() const {
+    return *this != TBAAAccessInfo();
+  }
+
+  /// Kind - The kind of the access descriptor.
+  TBAAAccessKind Kind;
+
   /// BaseType - The base/leading access type. May be null if this access
   /// descriptor represents an access that is not considered to be an access
   /// to an aggregate or union member.
@@ -65,12 +108,16 @@
   /// Offset - The byte offset of the final access within the base one. Must be
   /// zero if the base access type is not specified.
   uint64_t Offset;
+
+  /// Size - The size of access, in bytes.
+  uint64_t Size;
 };
 
 /// CodeGenTBAA - This class organizes the cross-module state that is used
 /// while lowering AST types to LLVM types.
 class CodeGenTBAA {
   ASTContext &Context;
+  llvm::Module &Module;
   const CodeGenOptions &CodeGenOpts;
   const LangOptions &Features;
   MangleContext &MContext;
@@ -108,25 +155,35 @@
                      SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &Fields,
                      bool MayAlias);
 
-  /// A wrapper function to create a scalar type. For struct-path aware TBAA,
-  /// the scalar type has the same format as the struct type: name, offset,
-  /// pointer to another node in the type DAG.
-  llvm::MDNode *createTBAAScalarType(StringRef Name, llvm::MDNode *Parent);
+  /// createScalarTypeNode - A wrapper function to create a metadata node
+  /// describing a scalar type.
+  llvm::MDNode *createScalarTypeNode(StringRef Name, llvm::MDNode *Parent,
+                                     uint64_t Size);
+
+  /// getTypeInfoHelper - An internal helper function to generate metadata used
+  /// to describe accesses to objects of the given type.
+  llvm::MDNode *getTypeInfoHelper(const Type *Ty);
+
+  /// getBaseTypeInfoHelper - An internal helper function to generate metadata
+  /// used to describe accesses to objects of the given base type.
+  llvm::MDNode *getBaseTypeInfoHelper(const Type *Ty);
 
 public:
-  CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext &VMContext,
-              const CodeGenOptions &CGO,
-              const LangOptions &Features,
-              MangleContext &MContext);
+  CodeGenTBAA(ASTContext &Ctx, llvm::Module &M, const CodeGenOptions &CGO,
+              const LangOptions &Features, MangleContext &MContext);
   ~CodeGenTBAA();
 
   /// getTypeInfo - Get metadata used to describe accesses to objects of the
   /// given type.
   llvm::MDNode *getTypeInfo(QualType QTy);
 
+  /// getAccessInfo - Get TBAA information that describes an access to
+  /// an object of the given type.
+  TBAAAccessInfo getAccessInfo(QualType AccessType);
+
   /// getVTablePtrAccessInfo - Get the TBAA information that describes an
   /// access to a virtual table pointer.
-  TBAAAccessInfo getVTablePtrAccessInfo();
+  TBAAAccessInfo getVTablePtrAccessInfo(llvm::Type *VTablePtrType);
 
   /// getTBAAStructInfo - Get the TBAAStruct MDNode to be used for a memcpy of
   /// the given type.
@@ -139,14 +196,20 @@
   /// getAccessTagInfo - Get TBAA tag for a given memory access.
   llvm::MDNode *getAccessTagInfo(TBAAAccessInfo Info);
 
-  /// getMayAliasAccessInfo - Get TBAA information that represents may-alias
-  /// accesses.
-  TBAAAccessInfo getMayAliasAccessInfo();
-
   /// mergeTBAAInfoForCast - Get merged TBAA information for the purpose of
   /// type casts.
   TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
                                       TBAAAccessInfo TargetInfo);
+
+  /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the
+  /// purpose of conditional operator.
+  TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                     TBAAAccessInfo InfoB);
+
+  /// mergeTBAAInfoForMemoryTransfer - Get merged TBAA information for the
+  /// purpose of memory transfer calls.
+  TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                                TBAAAccessInfo SrcInfo);
 };
 
 }  // end namespace CodeGen
@@ -156,30 +219,37 @@
 
 template<> struct DenseMapInfo<clang::CodeGen::TBAAAccessInfo> {
   static clang::CodeGen::TBAAAccessInfo getEmptyKey() {
+    unsigned UnsignedKey = DenseMapInfo<unsigned>::getEmptyKey();
     return clang::CodeGen::TBAAAccessInfo(
+      static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey),
       DenseMapInfo<MDNode *>::getEmptyKey(),
       DenseMapInfo<MDNode *>::getEmptyKey(),
+      DenseMapInfo<uint64_t>::getEmptyKey(),
       DenseMapInfo<uint64_t>::getEmptyKey());
   }
 
   static clang::CodeGen::TBAAAccessInfo getTombstoneKey() {
+    unsigned UnsignedKey = DenseMapInfo<unsigned>::getTombstoneKey();
     return clang::CodeGen::TBAAAccessInfo(
+      static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey),
       DenseMapInfo<MDNode *>::getTombstoneKey(),
       DenseMapInfo<MDNode *>::getTombstoneKey(),
+      DenseMapInfo<uint64_t>::getTombstoneKey(),
       DenseMapInfo<uint64_t>::getTombstoneKey());
   }
 
   static unsigned getHashValue(const clang::CodeGen::TBAAAccessInfo &Val) {
-    return DenseMapInfo<MDNode *>::getHashValue(Val.BaseType) ^
+    auto KindValue = static_cast<unsigned>(Val.Kind);
+    return DenseMapInfo<unsigned>::getHashValue(KindValue) ^
+           DenseMapInfo<MDNode *>::getHashValue(Val.BaseType) ^
            DenseMapInfo<MDNode *>::getHashValue(Val.AccessType) ^
-           DenseMapInfo<uint64_t>::getHashValue(Val.Offset);
+           DenseMapInfo<uint64_t>::getHashValue(Val.Offset) ^
+           DenseMapInfo<uint64_t>::getHashValue(Val.Size);
   }
 
   static bool isEqual(const clang::CodeGen::TBAAAccessInfo &LHS,
                       const clang::CodeGen::TBAAAccessInfo &RHS) {
-    return LHS.BaseType == RHS.BaseType &&
-           LHS.AccessType == RHS.AccessType &&
-           LHS.Offset == RHS.Offset;
+    return LHS == RHS;
   }
 };
 
diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h
index 2af7b30..fb096ac 100644
--- a/lib/CodeGen/CodeGenTypeCache.h
+++ b/lib/CodeGen/CodeGenTypeCache.h
@@ -37,7 +37,7 @@
   /// i8, i16, i32, and i64
   llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
   /// float, double
-  llvm::Type *FloatTy, *DoubleTy;
+  llvm::Type *HalfTy, *FloatTy, *DoubleTy;
 
   /// int
   llvm::IntegerType *IntTy;
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 80f2a31..ccb6df9 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -451,10 +451,10 @@
 
     case BuiltinType::Half:
       // Half FP can either be storage-only (lowered to i16) or native.
-      ResultType =
-          getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
-                           Context.getLangOpts().NativeHalfType ||
-                               Context.getLangOpts().HalfArgsAndReturns);
+      ResultType = getTypeForFormat(
+          getLLVMContext(), Context.getFloatTypeSemantics(T),
+          Context.getLangOpts().NativeHalfType ||
+              !Context.getTargetInfo().useFP16ConversionIntrinsics());
       break;
     case BuiltinType::Float:
     case BuiltinType::Double:
@@ -767,7 +767,7 @@
   // Records are non-zero-initializable if they contain any
   // non-zero-initializable subobjects.
   if (const RecordType *RT = T->getAs<RecordType>()) {
-    auto RD = cast<RecordDecl>(RT->getDecl());
+    const RecordDecl *RD = RT->getDecl();
     return isZeroInitializable(RD);
   }
 
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 08b8c2e..d4ff7da 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -74,7 +74,10 @@
 
   bool hasEndLoc() const { return LocEnd.hasValue(); }
 
-  void setEndLoc(SourceLocation Loc) { LocEnd = Loc; }
+  void setEndLoc(SourceLocation Loc) {
+    assert(Loc.isValid() && "Setting an invalid end location");
+    LocEnd = Loc;
+  }
 
   SourceLocation getEndLoc() const {
     assert(LocEnd && "Region has no end location");
@@ -112,6 +115,9 @@
     ColumnEnd = SM.getSpellingColumnNumber(LocEnd);
   }
 
+  SpellingRegion(SourceManager &SM, SourceMappingRegion &R)
+      : SpellingRegion(SM, R.getStartLoc(), R.getEndLoc()) {}
+
   /// Check if the start and end locations appear in source order, i.e
   /// top->bottom, left->right.
   bool isInSourceOrder() const {
@@ -445,6 +451,9 @@
   /// expressions cross file or macro boundaries.
   SourceLocation MostRecentLocation;
 
+  /// Location of the last terminated region.
+  Optional<std::pair<SourceLocation, size_t>> LastTerminatedRegion;
+
   /// \brief Return a counter for the subtraction of \c RHS from \c LHS
   Counter subtractCounters(Counter LHS, Counter RHS) {
     return Builder.subtract(LHS, RHS);
@@ -493,12 +502,14 @@
     DeferredRegion = None;
 
     // If the region ends in an expansion, find the expansion site.
-    if (SM.getFileID(DeferredEndLoc) != SM.getMainFileID()) {
-      FileID StartFile = SM.getFileID(DR.getStartLoc());
+    FileID StartFile = SM.getFileID(DR.getStartLoc());
+    if (SM.getFileID(DeferredEndLoc) != StartFile) {
       if (isNestedIn(DeferredEndLoc, StartFile)) {
         do {
           DeferredEndLoc = getIncludeOrExpansionLoc(DeferredEndLoc);
         } while (StartFile != SM.getFileID(DeferredEndLoc));
+      } else {
+        return Index;
       }
     }
 
@@ -520,6 +531,27 @@
     return Index;
   }
 
+  /// Complete a deferred region created after a terminated region at the
+  /// top-level.
+  void completeTopLevelDeferredRegion(Counter Count,
+                                      SourceLocation DeferredEndLoc) {
+    if (DeferredRegion || !LastTerminatedRegion)
+      return;
+
+    if (LastTerminatedRegion->second != RegionStack.size())
+      return;
+
+    SourceLocation Start = LastTerminatedRegion->first;
+    if (SM.getFileID(Start) != SM.getMainFileID())
+      return;
+
+    SourceMappingRegion DR = RegionStack.back();
+    DR.setStartLoc(Start);
+    DR.setDeferred(false);
+    DeferredRegion = DR;
+    completeDeferred(Count, DeferredEndLoc);
+  }
+
   /// \brief Pop regions from the stack into the function's list of regions.
   ///
   /// Adds all regions from \c ParentIndex to the top of the stack to the
@@ -557,6 +589,7 @@
           MostRecentLocation = getIncludeOrExpansionLoc(EndLoc);
 
         assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc));
+        assert(SpellingRegion(SM, Region).isInSourceOrder());
         SourceRegions.push_back(Region);
 
         if (ParentOfDeferredRegion) {
@@ -567,7 +600,7 @@
           if (!DeferredRegion.hasValue() &&
               // File IDs aren't gathered within macro expansions, so it isn't
               // useful to try and create a deferred region inside of one.
-              (SM.getFileID(EndLoc) == SM.getMainFileID()))
+              !EndLoc.isMacroID())
             DeferredRegion =
                 SourceMappingRegion(Counter::getZero(), EndLoc, None);
         }
@@ -576,6 +609,12 @@
         ParentOfDeferredRegion = true;
       }
       RegionStack.pop_back();
+
+      // If the zero region pushed after the last terminated region no longer
+      // exists, clear its cached information.
+      if (LastTerminatedRegion &&
+          RegionStack.size() < LastTerminatedRegion->second)
+        LastTerminatedRegion = None;
     }
     assert(!ParentOfDeferredRegion && "Deferred region with no parent");
   }
@@ -686,9 +725,11 @@
       SourceLocation Loc = MostRecentLocation;
       while (isNestedIn(Loc, ParentFile)) {
         SourceLocation FileStart = getStartOfFileOrMacro(Loc);
-        if (StartLocs.insert(FileStart).second)
+        if (StartLocs.insert(FileStart).second) {
           SourceRegions.emplace_back(*ParentCounter, FileStart,
                                      getEndOfFileOrMacro(Loc));
+          assert(SpellingRegion(SM, SourceRegions.back()).isInSourceOrder());
+        }
         Loc = getIncludeOrExpansionLoc(Loc);
       }
     }
@@ -712,10 +753,45 @@
   void terminateRegion(const Stmt *S) {
     extendRegion(S);
     SourceMappingRegion &Region = getRegion();
+    SourceLocation EndLoc = getEnd(S);
     if (!Region.hasEndLoc())
-      Region.setEndLoc(getEnd(S));
+      Region.setEndLoc(EndLoc);
     pushRegion(Counter::getZero());
-    getRegion().setDeferred(true);
+    auto &ZeroRegion = getRegion();
+    ZeroRegion.setDeferred(true);
+    LastTerminatedRegion = {EndLoc, RegionStack.size()};
+  }
+
+  /// Find a valid gap range between \p AfterLoc and \p BeforeLoc.
+  Optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc,
+                                           SourceLocation BeforeLoc) {
+    // If the start and end locations of the gap are both within the same macro
+    // file, the range may not be in source order.
+    if (AfterLoc.isMacroID() || BeforeLoc.isMacroID())
+      return None;
+    if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc))
+      return None;
+    return {{AfterLoc, BeforeLoc}};
+  }
+
+  /// Find the source range after \p AfterStmt and before \p BeforeStmt.
+  Optional<SourceRange> findGapAreaBetween(const Stmt *AfterStmt,
+                                           const Stmt *BeforeStmt) {
+    return findGapAreaBetween(getPreciseTokenLocEnd(getEnd(AfterStmt)),
+                              getStart(BeforeStmt));
+  }
+
+  /// Emit a gap region between \p StartLoc and \p EndLoc with the given count.
+  void fillGapAreaWithCount(SourceLocation StartLoc, SourceLocation EndLoc,
+                            Counter Count) {
+    if (StartLoc == EndLoc)
+      return;
+    assert(SpellingRegion(SM, StartLoc, EndLoc).isInSourceOrder());
+    handleFileExit(StartLoc);
+    size_t Index = pushRegion(Count, StartLoc, EndLoc);
+    getRegion().setGap(true);
+    handleFileExit(EndLoc);
+    popRegions(Index);
   }
 
   /// \brief Keep counts of breaks and continues inside loops.
@@ -813,10 +889,12 @@
   void VisitGotoStmt(const GotoStmt *S) { terminateRegion(S); }
 
   void VisitLabelStmt(const LabelStmt *S) {
+    Counter LabelCount = getRegionCounter(S);
     SourceLocation Start = getStart(S);
+    completeTopLevelDeferredRegion(LabelCount, Start);
     // We can't extendRegion here or we risk overlapping with our new region.
     handleFileExit(Start);
-    pushRegion(getRegionCounter(S), Start);
+    pushRegion(LabelCount, Start);
     Visit(S->getSubStmt());
   }
 
@@ -864,6 +942,11 @@
     propagateCounts(CondCount, S->getCond());
     adjustForOutOfOrderTraversal(getEnd(S));
 
+    // The body count applies to the area immediately after the increment.
+    auto Gap = findGapAreaBetween(S->getCond(), S->getBody());
+    if (Gap)
+      fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
+
     Counter OutCount =
         addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
     if (OutCount != ParentCount)
@@ -899,27 +982,41 @@
     Counter ParentCount = getRegion().getCounter();
     Counter BodyCount = getRegionCounter(S);
 
+    // The loop increment may contain a break or continue.
+    if (S->getInc())
+      BreakContinueStack.emplace_back();
+
     // Handle the body first so that we can get the backedge count.
-    BreakContinueStack.push_back(BreakContinue());
+    BreakContinueStack.emplace_back();
     extendRegion(S->getBody());
     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
-    BreakContinue BC = BreakContinueStack.pop_back_val();
+    BreakContinue BodyBC = BreakContinueStack.pop_back_val();
 
     // The increment is essentially part of the body but it needs to include
     // the count for all the continue statements.
-    if (const Stmt *Inc = S->getInc())
-      propagateCounts(addCounters(BackedgeCount, BC.ContinueCount), Inc);
+    BreakContinue IncrementBC;
+    if (const Stmt *Inc = S->getInc()) {
+      propagateCounts(addCounters(BackedgeCount, BodyBC.ContinueCount), Inc);
+      IncrementBC = BreakContinueStack.pop_back_val();
+    }
 
     // Go back to handle the condition.
-    Counter CondCount =
-        addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+    Counter CondCount = addCounters(
+        addCounters(ParentCount, BackedgeCount, BodyBC.ContinueCount),
+        IncrementBC.ContinueCount);
     if (const Expr *Cond = S->getCond()) {
       propagateCounts(CondCount, Cond);
       adjustForOutOfOrderTraversal(getEnd(S));
     }
 
-    Counter OutCount =
-        addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
+    // The body count applies to the area immediately after the increment.
+    auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
+                                  getStart(S->getBody()));
+    if (Gap)
+      fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
+
+    Counter OutCount = addCounters(BodyBC.BreakCount, IncrementBC.BreakCount,
+                                   subtractCounters(CondCount, BodyCount));
     if (OutCount != ParentCount)
       pushRegion(OutCount);
   }
@@ -937,6 +1034,12 @@
     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
 
+    // The body count applies to the area immediately after the range.
+    auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
+                                  getStart(S->getBody()));
+    if (Gap)
+      fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
+
     Counter LoopCount =
         addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
     Counter OutCount =
@@ -957,6 +1060,12 @@
     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
 
+    // The body count applies to the area immediately after the collection.
+    auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
+                                  getStart(S->getBody()));
+    if (Gap)
+      fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
+
     Counter LoopCount =
         addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
     Counter OutCount =
@@ -1048,12 +1157,21 @@
     // counter for the body when looking at the coverage.
     propagateCounts(ParentCount, S->getCond());
 
+    // The 'then' count applies to the area immediately after the condition.
+    auto Gap = findGapAreaBetween(S->getCond(), S->getThen());
+    if (Gap)
+      fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount);
+
     extendRegion(S->getThen());
     Counter OutCount = propagateCounts(ThenCount, S->getThen());
 
     Counter ElseCount = subtractCounters(ParentCount, ThenCount);
     if (const Stmt *Else = S->getElse()) {
-      extendRegion(S->getElse());
+      // The 'else' count applies to the area immediately after the 'then'.
+      Gap = findGapAreaBetween(S->getThen(), Else);
+      if (Gap)
+        fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ElseCount);
+      extendRegion(Else);
       OutCount = addCounters(OutCount, propagateCounts(ElseCount, Else));
     } else
       OutCount = addCounters(OutCount, ElseCount);
@@ -1090,9 +1208,16 @@
     Visit(E->getCond());
 
     if (!isa<BinaryConditionalOperator>(E)) {
+      // The 'then' count applies to the area immediately after the condition.
+      auto Gap =
+          findGapAreaBetween(E->getQuestionLoc(), getStart(E->getTrueExpr()));
+      if (Gap)
+        fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), TrueCount);
+
       extendRegion(E->getTrueExpr());
       propagateCounts(TrueCount, E->getTrueExpr());
     }
+
     extendRegion(E->getFalseExpr());
     propagateCounts(subtractCounters(ParentCount, TrueCount),
                     E->getFalseExpr());
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 173bc4d..3b45e57 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -68,8 +68,8 @@
     if (CGM.getCodeGenOpts().getClangABICompat() <=
             CodeGenOptions::ClangABI::Ver4 ||
         CGM.getTriple().getOS() == llvm::Triple::PS4)
-      return RD->hasNonTrivialDestructor() ||
-             RD->hasNonTrivialCopyConstructor();
+      return RD->hasNonTrivialDestructorForCall() ||
+             RD->hasNonTrivialCopyConstructorForCall();
     return !canCopyArgument(RD);
   }
 
@@ -300,16 +300,11 @@
     // linkage together with vtables when needed.
     if (ForVTable && !Thunk->hasLocalLinkage())
       Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
-
-    // Propagate dllexport storage, to enable the linker to generate import
-    // thunks as necessary (e.g. when a parent class has a key function and a
-    // child class doesn't, and the construction vtable for the parent in the
-    // child needs to reference the parent's thunks).
-    const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-    if (MD->hasAttr<DLLExportAttr>())
-      Thunk->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    CGM.setGVProperties(Thunk, GD);
   }
 
+  bool exportThunk() override { return true; }
+
   llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
                                      const ThisAdjustment &TA) override;
 
@@ -389,6 +384,10 @@
 
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
 
+  std::pair<llvm::Value *, const CXXRecordDecl *>
+  LoadVTablePtr(CodeGenFunction &CGF, Address This,
+                const CXXRecordDecl *RD) override;
+
  private:
    bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
      const auto &VtableLayout =
@@ -562,9 +561,9 @@
     llvm::Value *MemFnPtr, const MemberPointerType *MPT) {
   CGBuilderTy &Builder = CGF.Builder;
 
-  const FunctionProtoType *FPT = 
+  const FunctionProtoType *FPT =
     MPT->getPointeeType()->getAs<FunctionProtoType>();
-  const CXXRecordDecl *RD = 
+  const CXXRecordDecl *RD =
     cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
 
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
@@ -591,10 +590,10 @@
   Ptr = Builder.CreateInBoundsGEP(Ptr, Adj);
   This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
   ThisPtrForCall = This;
-  
+
   // Load the function pointer.
   llvm::Value *FnAsInt = Builder.CreateExtractValue(MemFnPtr, 0, "memptr.ptr");
-  
+
   // If the LSB in the function pointer is 1, the function pointer points to
   // a virtual function.
   llvm::Value *IsVirtual;
@@ -642,7 +641,7 @@
   CGF.EmitBlock(FnNonVirtual);
   llvm::Value *NonVirtualFn =
     Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn");
-  
+
   // We're done.
   CGF.EmitBlock(FnEnd);
   llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2);
@@ -807,7 +806,7 @@
 ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
   // Itanium C++ ABI 2.3:
   //   A NULL pointer is represented as -1.
-  if (MPT->isMemberDataPointer()) 
+  if (MPT->isMemberDataPointer())
     return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true);
 
   llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0);
@@ -884,7 +883,7 @@
                                        (UseARMMethodPtrABI ? 2 : 1) *
                                        ThisAdjustment.getQuantity());
   }
-  
+
   return llvm::ConstantStruct::getAnon(MemPtr);
 }
 
@@ -943,7 +942,7 @@
   //                   (L.ptr == 0 && ((L.adj|R.adj) & 1) == 0)))
   // The inequality tautologies have exactly the same structure, except
   // applying De Morgan's laws.
-  
+
   llvm::Value *LPtr = Builder.CreateExtractValue(L, 0, "lhs.memptr.ptr");
   llvm::Value *RPtr = Builder.CreateExtractValue(R, 0, "rhs.memptr.ptr");
 
@@ -996,7 +995,7 @@
       llvm::Constant::getAllOnesValue(MemPtr->getType());
     return Builder.CreateICmpNE(MemPtr, NegativeOne, "memptr.tobool");
   }
-  
+
   // In Itanium, a member function pointer is not null if 'ptr' is not null.
   llvm::Value *Ptr = Builder.CreateExtractValue(MemPtr, 0, "memptr.ptr");
 
@@ -1154,9 +1153,9 @@
   //                      const abi::__class_type_info *src,
   //                      const abi::__class_type_info *dst,
   //                      std::ptrdiff_t src2dst_offset);
-  
+
   llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
-  llvm::Type *PtrDiffTy = 
+  llvm::Type *PtrDiffTy =
     CGF.ConvertType(CGF.getContext().getPointerDiffType());
 
   llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
@@ -1443,8 +1442,9 @@
   if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
     return;
 
-  /// Initialize the 'this' slot.
-  EmitThisParam(CGF);
+  /// Initialize the 'this' slot. In the Itanium C++ ABI, no prologue
+  /// adjustments are required, becuase they are all handled by thunks.
+  setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
 
   /// Initialize the 'vtt' slot if needed.
   if (getStructorImplicitParamDecl(CGF)) {
@@ -1526,7 +1526,7 @@
     VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName()));
 
   // Set the right visibility.
-  CGM.setGlobalVisibility(VTable, RD);
+  CGM.setGVProperties(VTable, RD);
 
   // Use pointer alignment for the vtable. Otherwise we would align them based
   // on the size of the initializer which doesn't make sense as only single
@@ -1637,10 +1637,7 @@
       Name, VTableType, llvm::GlobalValue::ExternalLinkage);
   VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
-  if (RD->hasAttr<DLLImportAttr>())
-    VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-  else if (RD->hasAttr<DLLExportAttr>())
-    VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+  CGM.setGVProperties(VTable, RD);
 
   return VTable;
 }
@@ -1696,10 +1693,9 @@
 
   const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
       Dtor, getFromDtorType(DtorType));
-  llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+  llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   CGCallee Callee =
-      getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
-                                CE ? CE->getLocStart() : SourceLocation());
+      CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty);
 
   CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(),
                                   This.getPointer(), /*ImplicitParam=*/nullptr,
@@ -1842,7 +1838,8 @@
 
   // Handle the array cookie specially in ASan.
   if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
-      expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
+      (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() ||
+       CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) {
     // The store to the CookiePtr does not need to be instrumented.
     CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
     llvm::FunctionType *FTy =
@@ -2046,6 +2043,7 @@
                                      false, var->getLinkage(),
                                      llvm::ConstantInt::get(guardTy, 0),
                                      guardName.str());
+    guard->setDSOLocal(var->isDSOLocal());
     guard->setVisibility(var->getVisibility());
     // If the variable is thread-local, so is its guard variable.
     guard->setThreadLocalMode(var->getThreadLocalMode());
@@ -2139,19 +2137,19 @@
   CGF.EmitBlock(InitCheckBlock);
 
   // Variables used when coping with thread-safe statics and exceptions.
-  if (threadsafe) {    
+  if (threadsafe) {
     // Call __cxa_guard_acquire.
     llvm::Value *V
       = CGF.EmitNounwindRuntimeCall(getGuardAcquireFn(CGM, guardPtrTy), guard);
-               
+
     llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
-  
+
     Builder.CreateCondBr(Builder.CreateIsNotNull(V, "tobool"),
                          InitBlock, EndBlock);
-  
+
     // Call __cxa_guard_abort along the exceptional edge.
     CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard);
-    
+
     CGF.EmitBlock(InitBlock);
   }
 
@@ -2464,11 +2462,11 @@
 /// if it's a base constructor or destructor with virtual bases.
 bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-  
+
   // We don't have any virtual bases, just return early.
   if (!MD->getParent()->getNumVBases())
     return false;
-  
+
   // Check if we have a base constructor.
   if (isa<CXXConstructorDecl>(MD) && GD.getCtorType() == Ctor_Base)
     return true;
@@ -2476,7 +2474,7 @@
   // Check if we have a base destructor.
   if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
     return true;
-  
+
   return false;
 }
 
@@ -2618,8 +2616,7 @@
                                   Name);
     if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
       const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
-      if (RD->hasAttr<DLLImportAttr>())
-        GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+      CGM.setGVProperties(GV, RD);
     }
   }
 
@@ -2755,6 +2752,11 @@
     // N.B. We must always emit the RTTI data ourselves if there exists a key
     // function.
     bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
+
+    // Don't import the RTTI but emit it locally.
+    if (CGM.getTriple().isWindowsGNUEnvironment() && IsDLLImport)
+      return false;
+
     if (CGM.getVTables().isVTableExternal(RD))
       return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
                  ? false
@@ -3201,7 +3203,10 @@
     llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
 
   TypeName->setVisibility(llvmVisibility);
+  CGM.setDSOLocal(TypeName);
+
   GV->setVisibility(llvmVisibility);
+  CGM.setDSOLocal(GV);
 
   if (CGM.getTriple().isWindowsItaniumEnvironment()) {
     auto RD = Ty->getAsCXXRecordDecl();
@@ -3429,9 +3434,7 @@
   if (auto *Proto = Type->getAs<FunctionProtoType>()) {
     if (Proto->isNothrow(Ctx)) {
       Flags |= ItaniumRTTIBuilder::PTI_Noexcept;
-      Type = Ctx.getFunctionType(
-          Proto->getReturnType(), Proto->getParamTypes(),
-          Proto->getExtProtoInfo().withExceptionSpec(EST_None));
+      Type = Ctx.getFunctionTypeWithExceptionSpec(Type, EST_None);
     }
   }
 
@@ -3622,7 +3625,7 @@
   }
 
   // Finally, set up the alias with its proper name and attributes.
-  CGM.setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
+  CGM.SetCommonAttributes(AliasDecl, Alias);
 }
 
 void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
@@ -3898,7 +3901,9 @@
     llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
     Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
                         caughtExnAlignment);
-    CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
+    LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
+    LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
+    CGF.EmitAggregateCopy(Dest, Src, CatchType);
     return;
   }
 
@@ -4039,3 +4044,9 @@
   }
   return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
 }
+
+std::pair<llvm::Value *, const CXXRecordDecl *>
+ItaniumCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This,
+                             const CXXRecordDecl *RD) {
+  return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD};
+}
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index e81dedf..43c061e 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -244,9 +244,6 @@
   void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
                                  FunctionArgList &Params) override;
 
-  llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
-      CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override;
-
   void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
 
   AddedStructorArgs
@@ -360,9 +357,6 @@
 
   void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
                        GlobalDecl GD, bool ReturnAdjustment) override {
-    // Never dllimport/dllexport thunks.
-    Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
-
     GVALinkage Linkage =
         getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl()));
 
@@ -374,6 +368,8 @@
       Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
   }
 
+  bool exportThunk() override { return false; }
+
   llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
                                      const ThisAdjustment &TA) override;
 
@@ -581,7 +577,7 @@
     return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
   }
 
-  std::pair<Address, llvm::Value *>
+  std::tuple<Address, llvm::Value *, const CXXRecordDecl *>
   performBaseAdjustment(CodeGenFunction &CGF, Address Value,
                         QualType SrcRecordTy);
 
@@ -748,6 +744,10 @@
 
   llvm::GlobalVariable *getThrowInfo(QualType T) override;
 
+  std::pair<llvm::Value *, const CXXRecordDecl *>
+  LoadVTablePtr(CodeGenFunction &CGF, Address This,
+                const CXXRecordDecl *RD) override;
+
 private:
   typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
   typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
@@ -819,19 +819,8 @@
     return RAA_Default;
 
   case llvm::Triple::x86_64:
-    // If a class has a destructor, we'd really like to pass it indirectly
-    // because it allows us to elide copies.  Unfortunately, MSVC makes that
-    // impossible for small types, which it will pass in a single register or
-    // stack slot. Most objects with dtors are large-ish, so handle that early.
-    // We can't call out all large objects as being indirect because there are
-    // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
-    // how we pass large POD types.
-    //
-    // Note: This permits small classes with nontrivial destructors to be
-    // passed in registers, which is non-conforming.
-    if (RD->hasNonTrivialDestructor() &&
-        getContext().getTypeSize(RD->getTypeForDecl()) > 64)
-      return RAA_Indirect;
+    bool CopyCtorIsTrivial = false, CopyCtorIsTrivialForCall = false;
+    bool DtorIsTrivialForCall = false;
 
     // If a class has at least one non-deleted, trivial copy constructor, it
     // is passed according to the C ABI. Otherwise, it is passed indirectly.
@@ -840,23 +829,49 @@
     // passed in registers, so long as they *also* have a trivial copy ctor,
     // which is non-conforming.
     if (RD->needsImplicitCopyConstructor()) {
-      // If the copy ctor has not yet been declared, we can read its triviality
-      // off the AST.
-      if (!RD->defaultedCopyConstructorIsDeleted() &&
-          RD->hasTrivialCopyConstructor())
-        return RAA_Default;
+      if (!RD->defaultedCopyConstructorIsDeleted()) {
+        if (RD->hasTrivialCopyConstructor())
+          CopyCtorIsTrivial = true;
+        if (RD->hasTrivialCopyConstructorForCall())
+          CopyCtorIsTrivialForCall = true;
+      }
     } else {
-      // Otherwise, we need to find the copy constructor(s) and ask.
       for (const CXXConstructorDecl *CD : RD->ctors()) {
-        if (CD->isCopyConstructor()) {
-          // We had at least one nondeleted trivial copy ctor.  Return directly.
-          if (!CD->isDeleted() && CD->isTrivial())
-            return RAA_Default;
+        if (CD->isCopyConstructor() && !CD->isDeleted()) {
+          if (CD->isTrivial())
+            CopyCtorIsTrivial = true;
+          if (CD->isTrivialForCall())
+            CopyCtorIsTrivialForCall = true;
         }
       }
     }
 
-    // We have no trivial, non-deleted copy constructor.
+    if (RD->needsImplicitDestructor()) {
+      if (!RD->defaultedDestructorIsDeleted() &&
+          RD->hasTrivialDestructorForCall())
+        DtorIsTrivialForCall = true;
+    } else if (const auto *D = RD->getDestructor()) {
+      if (!D->isDeleted() && D->isTrivialForCall())
+        DtorIsTrivialForCall = true;
+    }
+
+    // If the copy ctor and dtor are both trivial-for-calls, pass direct.
+    if (CopyCtorIsTrivialForCall && DtorIsTrivialForCall)
+      return RAA_Default;
+
+    // If a class has a destructor, we'd really like to pass it indirectly
+    // because it allows us to elide copies.  Unfortunately, MSVC makes that
+    // impossible for small types, which it will pass in a single register or
+    // stack slot. Most objects with dtors are large-ish, so handle that early.
+    // We can't call out all large objects as being indirect because there are
+    // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
+    // how we pass large POD types.
+
+    // Note: This permits small classes with nontrivial destructors to be
+    // passed in registers, which is non-conforming.
+    if (CopyCtorIsTrivial &&
+        getContext().getTypeSize(RD->getTypeForDecl()) <= 64)
+      return RAA_Default;
     return RAA_Indirect;
   }
 
@@ -929,7 +944,7 @@
 /// We need to perform a generic polymorphic operation (like a typeid
 /// or a cast), which requires an object with a vfptr.  Adjust the
 /// address to point to an object with a vfptr.
-std::pair<Address, llvm::Value *>
+std::tuple<Address, llvm::Value *, const CXXRecordDecl *>
 MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy) {
   Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
@@ -940,7 +955,8 @@
   // covers non-virtual base subobjects: a class with its own virtual
   // functions would be a candidate to be a primary base.
   if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
-    return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));
+    return std::make_tuple(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0),
+                           SrcDecl);
 
   // Okay, one of the vbases must have a vfptr, or else this isn't
   // actually a polymorphic class.
@@ -959,7 +975,7 @@
   llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
   CharUnits VBaseAlign =
     CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
-  return std::make_pair(Address(Ptr, VBaseAlign), Offset);
+  return std::make_tuple(Address(Ptr, VBaseAlign), Offset, PolymorphicBase);
 }
 
 bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
@@ -990,7 +1006,7 @@
                                          QualType SrcRecordTy,
                                          Address ThisPtr,
                                          llvm::Type *StdTypeInfoPtrTy) {
-  std::tie(ThisPtr, std::ignore) =
+  std::tie(ThisPtr, std::ignore, std::ignore) =
       performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
   auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
   return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
@@ -1014,7 +1030,8 @@
       CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
 
   llvm::Value *Offset;
-  std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
+  std::tie(This, Offset, std::ignore) =
+      performBaseAdjustment(CGF, This, SrcRecordTy);
   llvm::Value *ThisPtr = This.getPointer();
   Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
 
@@ -1040,7 +1057,8 @@
 MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy,
                                        QualType DestTy) {
-  std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+  std::tie(Value, std::ignore, std::ignore) =
+      performBaseAdjustment(CGF, Value, SrcRecordTy);
 
   // PVOID __RTCastToVoid(
   //   PVOID inptr)
@@ -1229,7 +1247,7 @@
     if (!hasDefaultCXXMethodCC(getContext(), D) || D->getNumParams() != 0) {
       llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure);
       Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
-      Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+      CGM.setGVProperties(Fn, D);
     }
 }
 
@@ -1433,50 +1451,54 @@
   }
 }
 
-llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue(
-    CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
-  // In this ABI, every virtual function takes a pointer to one of the
-  // subobjects that first defines it as the 'this' parameter, rather than a
-  // pointer to the final overrider subobject. Thus, we need to adjust it back
-  // to the final overrider subobject before use.
-  // See comments in the MicrosoftVFTableContext implementation for the details.
-  CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
-  if (Adjustment.isZero())
-    return This;
-
-  unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
-  llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS),
-             *thisTy = This->getType();
-
-  This = CGF.Builder.CreateBitCast(This, charPtrTy);
-  assert(Adjustment.isPositive());
-  This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
-                                                -Adjustment.getQuantity());
-  return CGF.Builder.CreateBitCast(This, thisTy);
-}
-
 void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
   // Naked functions have no prolog.
   if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
     return;
 
-  EmitThisParam(CGF);
+  // Overridden virtual methods of non-primary bases need to adjust the incoming
+  // 'this' pointer in the prologue. In this hierarchy, C::b will subtract
+  // sizeof(void*) to adjust from B* to C*:
+  //   struct A { virtual void a(); };
+  //   struct B { virtual void b(); };
+  //   struct C : A, B { virtual void b(); };
+  //
+  // Leave the value stored in the 'this' alloca unadjusted, so that the
+  // debugger sees the unadjusted value. Microsoft debuggers require this, and
+  // will apply the ThisAdjustment in the method type information.
+  // FIXME: Do something better for DWARF debuggers, which won't expect this,
+  // without making our codegen depend on debug info settings.
+  llvm::Value *This = loadIncomingCXXThis(CGF);
+  const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+  if (!CGF.CurFuncIsThunk && MD->isVirtual()) {
+    CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(CGF.CurGD);
+    if (!Adjustment.isZero()) {
+      unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
+      llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS),
+                 *thisTy = This->getType();
+      This = CGF.Builder.CreateBitCast(This, charPtrTy);
+      assert(Adjustment.isPositive());
+      This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
+                                                    -Adjustment.getQuantity());
+      This = CGF.Builder.CreateBitCast(This, thisTy, "this.adjusted");
+    }
+  }
+  setCXXABIThisValue(CGF, This);
 
-  /// If this is a function that the ABI specifies returns 'this', initialize
-  /// the return slot to 'this' at the start of the function.
-  ///
-  /// Unlike the setting of return types, this is done within the ABI
-  /// implementation instead of by clients of CGCXXABI because:
-  /// 1) getThisValue is currently protected
-  /// 2) in theory, an ABI could implement 'this' returns some other way;
-  ///    HasThisReturn only specifies a contract, not the implementation    
+  // If this is a function that the ABI specifies returns 'this', initialize
+  // the return slot to 'this' at the start of the function.
+  //
+  // Unlike the setting of return types, this is done within the ABI
+  // implementation instead of by clients of CGCXXABI because:
+  // 1) getThisValue is currently protected
+  // 2) in theory, an ABI could implement 'this' returns some other way;
+  //    HasThisReturn only specifies a contract, not the implementation
   if (HasThisReturn(CGF.CurGD))
     CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
   else if (hasMostDerivedReturn(CGF.CurGD))
     CGF.Builder.CreateStore(CGF.EmitCastToVoidPtr(getThisValue(CGF)),
                             CGF.ReturnValue);
 
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
   if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
     assert(getStructorImplicitParamDecl(CGF) &&
            "no implicit parameter for a constructor with virtual bases?");
@@ -1864,9 +1886,8 @@
   GlobalDecl GD(Dtor, Dtor_Deleting);
   const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
       Dtor, StructorType::Deleting);
-  llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
-  CGCallee Callee = getVirtualFunctionPointer(
-      CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation());
+  llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+  CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
 
   ASTContext &Context = getContext();
   llvm::Value *ImplicitParam = llvm::ConstantInt::get(
@@ -1961,7 +1982,7 @@
   // Start defining the function.
   CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
                     FunctionArgs, MD->getLocation(), SourceLocation());
-  EmitThisParam(CGF);
+  setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
 
   // Load the vfptr and then callee from the vftable.  The callee should have
   // adjusted 'this' so that the vfptr is at offset zero.
@@ -3900,7 +3921,7 @@
                     FunctionArgs, CD->getLocation(), SourceLocation());
   // Create a scope with an artificial location for the body of this function.
   auto AL = ApplyDebugLocation::CreateArtificial(CGF);
-  EmitThisParam(CGF);
+  setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
   llvm::Value *This = getThisValue(CGF);
 
   llvm::Value *SrcVal =
@@ -4243,3 +4264,11 @@
   };
   CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
 }
+
+std::pair<llvm::Value *, const CXXRecordDecl *>
+MicrosoftCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This,
+                               const CXXRecordDecl *RD) {
+  std::tie(This, std::ignore, RD) =
+      performBaseAdjustment(CGF, This, QualType(RD->getTypeForDecl(), 0));
+  return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD};
+}
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index d0760b9..150b64f 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -71,9 +71,8 @@
     }
 
     bool VisitImportDecl(ImportDecl *D) {
-      auto *Import = cast<ImportDecl>(D);
-      if (!Import->getImportedOwningModule())
-        DI.EmitImportDecl(*Import);
+      if (!D->getImportedOwningModule())
+        DI.EmitImportDecl(*D);
       return true;
     }
 
@@ -229,6 +228,11 @@
       Builder->getModuleDebugInfo()->completeRequiredType(RD);
   }
 
+  void HandleImplicitImportDecl(ImportDecl *D) override {
+    if (!D->getImportedOwningModule())
+      Builder->getModuleDebugInfo()->EmitImportDecl(*D);
+  }
+
   /// Emit a container holding the serialized AST.
   void HandleTranslationUnit(ASTContext &Ctx) override {
     assert(M && VMContext && Builder);
diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp
index 9848e3e..f891cfb 100644
--- a/lib/CodeGen/SanitizerMetadata.cpp
+++ b/lib/CodeGen/SanitizerMetadata.cpp
@@ -26,7 +26,8 @@
                                            QualType Ty, bool IsDynInit,
                                            bool IsBlacklisted) {
   if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                           SanitizerKind::KernelAddress))
+                                           SanitizerKind::KernelAddress |
+                                           SanitizerKind::HWAddress))
     return;
   IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
   IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
@@ -58,7 +59,8 @@
 void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
                                            const VarDecl &D, bool IsDynInit) {
   if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                           SanitizerKind::KernelAddress))
+                                           SanitizerKind::KernelAddress |
+                                           SanitizerKind::HWAddress))
     return;
   std::string QualName;
   llvm::raw_string_ostream OS(QualName);
@@ -76,7 +78,8 @@
   // For now, just make sure the global is not modified by the ASan
   // instrumentation.
   if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                          SanitizerKind::KernelAddress))
+                                          SanitizerKind::KernelAddress |
+                                          SanitizerKind::HWAddress))
     reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true);
 }
 
diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp
index fc8e36d..2a12078 100644
--- a/lib/CodeGen/SwiftCallingConv.cpp
+++ b/lib/CodeGen/SwiftCallingConv.cpp
@@ -579,11 +579,9 @@
   // Empty types don't need to be passed indirectly.
   if (Entries.empty()) return false;
 
-  CharUnits totalSize = Entries.back().End;
-
   // Avoid copying the array of types when there's just a single element.
   if (Entries.size() == 1) {
-    return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+    return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(
                                                            Entries.back().Type,
                                                              asReturnValue);    
   }
@@ -593,8 +591,14 @@
   for (auto &entry : Entries) {
     componentTys.push_back(entry.Type);
   }
-  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
-                                                           componentTys,
+  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
+                                                           asReturnValue);
+}
+
+bool swiftcall::shouldPassIndirectly(CodeGenModule &CGM,
+                                     ArrayRef<llvm::Type*> componentTys,
+                                     bool asReturnValue) {
+  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(componentTys,
                                                            asReturnValue);
 }
 
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 2651d87..aadbd30 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -201,10 +201,6 @@
   return false;
 }
 
-bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
-  return false;
-}
-
 LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
@@ -682,8 +678,8 @@
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
-  return (Ty->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                        : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
@@ -697,8 +693,8 @@
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
-  return (RetTy->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                           : ABIArgInfo::getDirect());
 }
 
 //===----------------------------------------------------------------------===//
@@ -845,8 +841,8 @@
     return ABIArgInfo::getDirect();
   }
 
-  return (Ty->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                        : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
@@ -861,8 +857,8 @@
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
-  return (RetTy->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                           : ABIArgInfo::getDirect());
 }
 
 /// IsX86_MMXType - Return true if this is an MMX type.
@@ -1028,8 +1024,7 @@
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type*> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     // LLVM's x86-32 lowering currently only assigns up to three
     // integer registers and three fp registers.  Oddly, it'll use up to
@@ -1057,8 +1052,7 @@
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     // Darwin uses different dwarf register numbers for EH.
@@ -1404,8 +1398,8 @@
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
-  return (RetTy->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                           : ABIArgInfo::getDirect());
 }
 
 static bool isSSEVectorType(ASTContext &Context, QualType Ty) {
@@ -1677,8 +1671,8 @@
 
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
-      return ABIArgInfo::getExtendInReg();
-    return ABIArgInfo::getExtend();
+      return ABIArgInfo::getExtendInReg(Ty);
+    return ABIArgInfo::getExtend(Ty);
   }
 
   if (InReg)
@@ -1925,9 +1919,8 @@
 }
 
 void X86_32TargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
-    ForDefinition_t IsForDefinition) const {
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  if (GV->isDeclaration())
     return;
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
@@ -2168,8 +2161,7 @@
     return Has64BitPointers;
   }
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type*> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }  
@@ -2201,8 +2193,7 @@
     return isX86VectorCallAggregateSmallEnough(NumMembers);
   }
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type *> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
@@ -2286,9 +2277,8 @@
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override {
-    if (!IsForDefinition)
+                           CodeGen::CodeGenModule &CGM) const override {
+    if (GV->isDeclaration())
       return;
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
       if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
@@ -2346,8 +2336,7 @@
         Win32StructABI, NumRegisterParameters, false) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &CGM) const override;
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
@@ -2362,26 +2351,24 @@
   }
 };
 
-static void addStackProbeSizeTargetAttribute(const Decl *D,
-                                             llvm::GlobalValue *GV,
-                                             CodeGen::CodeGenModule &CGM) {
-  if (D && isa<FunctionDecl>(D)) {
-    if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
-      llvm::Function *Fn = cast<llvm::Function>(GV);
+static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                                          CodeGen::CodeGenModule &CGM) {
+  if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) {
 
+    if (CGM.getCodeGenOpts().StackProbeSize != 4096)
       Fn->addFnAttr("stack-probe-size",
                     llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
-    }
+    if (CGM.getCodeGenOpts().NoStackArgProbe)
+      Fn->addFnAttr("no-stack-arg-probe");
   }
 }
 
 void WinX86_32TargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
-    ForDefinition_t IsForDefinition) const {
-  X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+  if (GV->isDeclaration())
     return;
-  addStackProbeSizeTargetAttribute(D, GV, CGM);
+  addStackProbeTargetAttributes(D, GV, CGM);
 }
 
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2391,8 +2378,7 @@
       : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
@@ -2422,10 +2408,9 @@
 };
 
 void WinX86_64TargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
-    ForDefinition_t IsForDefinition) const {
-  TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+  if (GV->isDeclaration())
     return;
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
@@ -2443,7 +2428,7 @@
     }
   }
 
-  addStackProbeSizeTargetAttribute(D, GV, CGM);
+  addStackProbeTargetAttributes(D, GV, CGM);
 }
 }
 
@@ -2868,8 +2853,8 @@
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    return (Ty->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                          : ABIArgInfo::getDirect());
   }
 
   return getNaturalAlignIndirect(Ty);
@@ -2901,8 +2886,8 @@
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    return (Ty->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                          : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -3271,7 +3256,7 @@
 
       if (RetTy->isIntegralOrEnumerationType() &&
           RetTy->isPromotableIntegerType())
-        return ABIArgInfo::getExtend();
+        return ABIArgInfo::getExtend(RetTy);
     }
     break;
 
@@ -3416,7 +3401,7 @@
 
       if (Ty->isIntegralOrEnumerationType() &&
           Ty->isPromotableIntegerType())
-        return ABIArgInfo::getExtend();
+        return ABIArgInfo::getExtend(Ty);
     }
 
     break;
@@ -3543,7 +3528,17 @@
 
 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
-  bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall;
+  const unsigned CallingConv = FI.getCallingConvention();
+  // It is possible to force Win64 calling convention on any x86_64 target by
+  // using __attribute__((ms_abi)). In such case to correctly emit Win64
+  // compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
+  if (CallingConv == llvm::CallingConv::Win64) {
+    WinX86_64ABIInfo Win64ABIInfo(CGT);
+    Win64ABIInfo.computeInfo(FI);
+    return;
+  }
+
+  bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
 
   // Keep track of the number of assigned registers.
   unsigned FreeIntRegs = IsRegCall ? 11 : 6;
@@ -3797,17 +3792,18 @@
     Address RegAddrHi =
       CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
                                              CharUnits::fromQuantity(16));
-    llvm::Type *DoubleTy = CGF.DoubleTy;
-    llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
+    llvm::Type *ST = AI.canHaveCoerceToType()
+                         ? AI.getCoerceToType()
+                         : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
     llvm::Value *V;
     Address Tmp = CGF.CreateMemTemp(Ty);
     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
-    V = CGF.Builder.CreateLoad(
-                   CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
+    V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+        RegAddrLo, ST->getStructElementType(0)));
     CGF.Builder.CreateStore(V,
                    CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
-    V = CGF.Builder.CreateLoad(
-                   CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
+    V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+        RegAddrHi, ST->getStructElementType(1)));
     CGF.Builder.CreateStore(V,
           CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
 
@@ -3941,7 +3937,7 @@
   // extended.
   const BuiltinType *BT = Ty->getAs<BuiltinType>();
   if (BT && BT->getKind() == BuiltinType::Bool)
-    return ABIArgInfo::getExtend();
+    return ABIArgInfo::getExtend(Ty);
 
   // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It
   // passes them indirectly through memory.
@@ -4036,7 +4032,10 @@
 namespace {
 /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
 class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
-bool IsSoftFloatABI;
+  bool IsSoftFloatABI;
+
+  CharUnits getParamTypeAlignment(QualType Ty) const;
+
 public:
   PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
       : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
@@ -4058,13 +4057,46 @@
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 };
+}
 
+CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
+  // Complex types are passed just like their elements
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  if (Ty->isVectorType())
+    return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
+                                                                       : 4);
+
+  // For single-element float/vector structs, we consider the whole type
+  // to have the same alignment requirements as its single element.
+  const Type *AlignTy = nullptr;
+  if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
+    const BuiltinType *BT = EltType->getAs<BuiltinType>();
+    if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
+        (BT && BT->isFloatingPoint()))
+      AlignTy = EltType;
+  }
+
+  if (AlignTy)
+    return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
+  return CharUnits::fromQuantity(4);
 }
 
 // TODO: this implementation is now likely redundant with
 // DefaultABIInfo::EmitVAArg.
 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
+  if (getTarget().getTriple().isOSDarwin()) {
+    auto TI = getContext().getTypeInfoInChars(Ty);
+    TI.second = getParamTypeAlignment(Ty);
+
+    CharUnits SlotSize = CharUnits::fromQuantity(4);
+    return emitVoidPtrVAArg(CGF, VAList, Ty,
+                            classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
+                            /*AllowHigherAlign=*/true);
+  }
+
   const unsigned OverflowLimit = 8;
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
@@ -4658,8 +4690,8 @@
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
-  return (isPromotableTypeForABI(Ty) ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+                                     : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo
@@ -4713,8 +4745,8 @@
     return getNaturalAlignIndirect(RetTy);
   }
 
-  return (isPromotableTypeForABI(RetTy) ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+                                        : ABIArgInfo::getDirect());
 }
 
 // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
@@ -4886,8 +4918,7 @@
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
                       QualType Ty) const override;
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type*> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
@@ -4966,7 +4997,7 @@
       Ty = EnumTy->getDecl()->getIntegerType();
 
     return (Ty->isPromotableIntegerType() && isDarwinPCS()
-                ? ABIArgInfo::getExtend()
+                ? ABIArgInfo::getExtend(Ty)
                 : ABIArgInfo::getDirect());
   }
 
@@ -5036,7 +5067,7 @@
       RetTy = EnumTy->getDecl()->getIntegerType();
 
     return (RetTy->isPromotableIntegerType() && isDarwinPCS()
-                ? ABIArgInfo::getExtend()
+                ? ABIArgInfo::getExtend(RetTy)
                 : ABIArgInfo::getDirect());
   }
 
@@ -5485,8 +5516,7 @@
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setCCs();
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type*> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
@@ -5529,9 +5559,8 @@
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override {
-    if (!IsForDefinition)
+                           CodeGen::CodeGenModule &CGM) const override {
+    if (GV->isDeclaration())
       return;
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD)
@@ -5574,8 +5603,7 @@
       : ARMTargetCodeGenInfo(CGT, K) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &CGM) const override;
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
@@ -5589,12 +5617,11 @@
 };
 
 void WindowsARMTargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
-    ForDefinition_t IsForDefinition) const {
-  ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+  if (GV->isDeclaration())
     return;
-  addStackProbeSizeTargetAttribute(D, GV, CGM);
+  addStackProbeTargetAttributes(D, GV, CGM);
 }
 }
 
@@ -5694,10 +5721,11 @@
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
   }
 
-  // __fp16 gets passed as if it were an int or float, but with the top 16 bits
-  // unspecified. This is not done for OpenCL as it handles the half type
-  // natively, and does not need to interwork with AAPCS code.
-  if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
+  // _Float16 and __fp16 get passed as if it were an int or float, but with
+  // the top 16 bits unspecified. This is not done for OpenCL as it handles the
+  // half type natively, and does not need to interwork with AAPCS code.
+  if ((Ty->isFloat16Type() || Ty->isHalfType()) &&
+      !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5710,7 +5738,7 @@
       Ty = EnumTy->getDecl()->getIntegerType();
     }
 
-    return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
+    return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
                                           : ABIArgInfo::getDirect());
   }
 
@@ -5892,10 +5920,11 @@
     return getNaturalAlignIndirect(RetTy);
   }
 
-  // __fp16 gets returned as if it were an int or float, but with the top 16
-  // bits unspecified. This is not done for OpenCL as it handles the half type
-  // natively, and does not need to interwork with AAPCS code.
-  if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
+  // _Float16 and __fp16 get returned as if it were an int or float, but with
+  // the top 16 bits unspecified. This is not done for OpenCL as it handles the
+  // half type natively, and does not need to interwork with AAPCS code.
+  if ((RetTy->isFloat16Type() || RetTy->isHalfType()) &&
+      !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5907,7 +5936,7 @@
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
-    return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend()
+    return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
                                             : ABIArgInfo::getDirect();
   }
 
@@ -6119,8 +6148,7 @@
     : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &M,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &M) const override;
 
 private:
   // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
@@ -6140,8 +6168,8 @@
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
-  return (RetTy->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                           : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6153,8 +6181,8 @@
   if (isAggregateTypeForABI(Ty))
     return getNaturalAlignIndirect(Ty, /* byval */ true);
 
-  return (Ty->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                        : ABIArgInfo::getDirect());
 }
 
 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -6176,9 +6204,8 @@
 }
 
 void NVPTXTargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
-    ForDefinition_t IsForDefinition) const {
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (GV->isDeclaration())
     return;
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
@@ -6277,13 +6304,12 @@
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
-  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
-                                    ArrayRef<llvm::Type*> scalars,
+  bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
   bool isSwiftErrorInRegister() const override {
-    return true;
+    return false;
   }
 };
 
@@ -6550,8 +6576,8 @@
     return ABIArgInfo::getDirect();
   if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
     return getNaturalAlignIndirect(RetTy);
-  return (isPromotableIntegerType(RetTy) ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy)
+                                         : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
@@ -6561,7 +6587,7 @@
 
   // Integers and enums are extended to full register width.
   if (isPromotableIntegerType(Ty))
-    return ABIArgInfo::getExtend();
+    return ABIArgInfo::getExtend(Ty);
 
   // Handle vector types and vector-like structure types.  Note that
   // as opposed to float-like structure types, we do not allow any
@@ -6615,16 +6641,14 @@
   MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &M,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &M) const override;
 };
 
 }
 
 void MSP430TargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
-    ForDefinition_t IsForDefinition) const {
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (GV->isDeclaration())
     return;
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
@@ -6669,7 +6693,7 @@
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
-  bool shouldSignExtUnsignedType(QualType Ty) const override;
+  ABIArgInfo extendType(QualType Ty) const;
 };
 
 class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6684,8 +6708,7 @@
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override {
+                           CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD) return;
     llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -6696,7 +6719,7 @@
       Fn->addFnAttr("short-call");
 
     // Other attributes do not have a meaning for declarations.
-    if (!IsForDefinition)
+    if (GV->isDeclaration())
       return;
 
     if (FD->hasAttr<Mips16Attr>()) {
@@ -6862,7 +6885,7 @@
 
   // All integral types are promoted to the GPR width.
   if (Ty->isIntegralOrEnumerationType())
-    return ABIArgInfo::getExtend();
+    return extendType(Ty);
 
   return ABIArgInfo::getDirect(
       nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
@@ -6944,8 +6967,8 @@
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
-  return (RetTy->isPromotableIntegerType() ?
-          ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+  return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                           : ABIArgInfo::getDirect());
 }
 
 void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -7011,14 +7034,14 @@
   return Addr;
 }
 
-bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
+ABIArgInfo MipsABIInfo::extendType(QualType Ty) const {
   int TySize = getContext().getTypeSize(Ty);
 
   // MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
   if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
-    return true;
+    return ABIArgInfo::getSignExtend(Ty);
 
-  return false;
+  return ABIArgInfo::getExtend(Ty);
 }
 
 bool
@@ -7060,9 +7083,8 @@
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM,
-                           ForDefinition_t IsForDefinition) const override {
-    if (!IsForDefinition)
+                           CodeGen::CodeGenModule &CGM) const override {
+    if (GV->isDeclaration())
       return;
     const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD) return;
@@ -7091,14 +7113,12 @@
     : DefaultTargetCodeGenInfo(CGT) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &M,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &M) const override;
 };
 
 void TCETargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
-    ForDefinition_t IsForDefinition) const {
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (GV->isDeclaration())
     return;
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
@@ -7191,8 +7211,8 @@
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    return (Ty->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
+                                          : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
@@ -7229,8 +7249,8 @@
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
-    return (RetTy->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy)
+                                             : ABIArgInfo::getDirect());
   }
 
   if (isEmptyRecord(getContext(), RetTy, true))
@@ -7373,7 +7393,7 @@
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
       return ABIArgInfo::getDirectInReg();
-    return ABIArgInfo::getExtend();
+    return ABIArgInfo::getExtend(Ty);
   }
   if (InReg)
     return ABIArgInfo::getDirectInReg();
@@ -7603,8 +7623,7 @@
   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &M,
-                           ForDefinition_t IsForDefinition) const override;
+                           CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
 
   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -7626,9 +7645,8 @@
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
-    ForDefinition_t IsForDefinition) const {
-  if (!IsForDefinition)
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (GV->isDeclaration())
     return;
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD)
@@ -7955,7 +7973,7 @@
 
   // Integer types smaller than a register are extended.
   if (Size < 64 && Ty->isIntegerType())
-    return ABIArgInfo::getExtend();
+    return ABIArgInfo::getExtend(Ty);
 
   // Other non-aggregates go in registers.
   if (!isAggregateTypeForABI(Ty))
@@ -8744,6 +8762,182 @@
   return false;
 }
 
+//===----------------------------------------------------------------------===//
+// RISCV ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class RISCVABIInfo : public DefaultABIInfo {
+private:
+  unsigned XLen; // Size of the integer ('x') registers in bits.
+  static const int NumArgGPRs = 8;
+
+public:
+  RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
+      : DefaultABIInfo(CGT), XLen(XLen) {}
+
+  // DefaultABIInfo's classifyReturnType and classifyArgumentType are
+  // non-virtual, but computeInfo is virtual, so we overload it.
+  void computeInfo(CGFunctionInfo &FI) const override;
+
+  ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed,
+                                  int &ArgGPRsLeft) const;
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
+
+  ABIArgInfo extendType(QualType Ty) const;
+};
+} // end anonymous namespace
+
+void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  QualType RetTy = FI.getReturnType();
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(RetTy);
+
+  // IsRetIndirect is true if classifyArgumentType indicated the value should
+  // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128
+  // is passed direct in LLVM IR, relying on the backend lowering code to
+  // rewrite the argument list and pass indirectly on RV32.
+  bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect ||
+                       getContext().getTypeSize(RetTy) > (2 * XLen);
+
+  // We must track the number of GPRs used in order to conform to the RISC-V
+  // ABI, as integer scalars passed in registers should have signext/zeroext
+  // when promoted, but are anyext if passed on the stack. As GPR usage is
+  // different for variadic arguments, we must also track whether we are
+  // examining a vararg or not.
+  int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
+  int NumFixedArgs = FI.getNumRequiredArgs();
+
+  int ArgNum = 0;
+  for (auto &ArgInfo : FI.arguments()) {
+    bool IsFixed = ArgNum < NumFixedArgs;
+    ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft);
+    ArgNum++;
+  }
+}
+
+ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
+                                              int &ArgGPRsLeft) const {
+  assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  // Structures with either a non-trivial destructor or a non-trivial
+  // copy constructor are always passed indirectly.
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+    if (ArgGPRsLeft)
+      ArgGPRsLeft -= 1;
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
+                                           CGCXXABI::RAA_DirectInMemory);
+  }
+
+  // Ignore empty structs/unions.
+  if (isEmptyRecord(getContext(), Ty, true))
+    return ABIArgInfo::getIgnore();
+
+  uint64_t Size = getContext().getTypeSize(Ty);
+  uint64_t NeededAlign = getContext().getTypeAlign(Ty);
+  bool MustUseStack = false;
+  // Determine the number of GPRs needed to pass the current argument
+  // according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
+  // register pairs, so may consume 3 registers.
+  int NeededArgGPRs = 1;
+  if (!IsFixed && NeededAlign == 2 * XLen)
+    NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
+  else if (Size > XLen && Size <= 2 * XLen)
+    NeededArgGPRs = 2;
+
+  if (NeededArgGPRs > ArgGPRsLeft) {
+    MustUseStack = true;
+    NeededArgGPRs = ArgGPRsLeft;
+  }
+
+  ArgGPRsLeft -= NeededArgGPRs;
+
+  if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    // All integral types are promoted to XLen width, unless passed on the
+    // stack.
+    if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) {
+      return extendType(Ty);
+    }
+
+    return ABIArgInfo::getDirect();
+  }
+
+  // Aggregates which are <= 2*XLen will be passed in registers if possible,
+  // so coerce to integers.
+  if (Size <= 2 * XLen) {
+    unsigned Alignment = getContext().getTypeAlign(Ty);
+
+    // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is
+    // required, and a 2-element XLen array if only XLen alignment is required.
+    if (Size <= XLen) {
+      return ABIArgInfo::getDirect(
+          llvm::IntegerType::get(getVMContext(), XLen));
+    } else if (Alignment == 2 * XLen) {
+      return ABIArgInfo::getDirect(
+          llvm::IntegerType::get(getVMContext(), 2 * XLen));
+    } else {
+      return ABIArgInfo::getDirect(llvm::ArrayType::get(
+          llvm::IntegerType::get(getVMContext(), XLen), 2));
+    }
+  }
+  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+}
+
+ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (RetTy->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  int ArgGPRsLeft = 2;
+
+  // The rules for return and argument types are the same, so defer to
+  // classifyArgumentType.
+  return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft);
+}
+
+Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                QualType Ty) const {
+  CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
+
+  // Empty records are ignored for parameter passing purposes.
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize);
+    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+    return Addr;
+  }
+
+  std::pair<CharUnits, CharUnits> SizeAndAlign =
+      getContext().getTypeInfoInChars(Ty);
+
+  // Arguments bigger than 2*Xlen bytes are passed indirectly.
+  bool IsIndirect = SizeAndAlign.first > 2 * SlotSize;
+
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign,
+                          SlotSize, /*AllowHigherAlign=*/true);
+}
+
+ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
+  int TySize = getContext().getTypeSize(Ty);
+  // RV64 ABI requires unsigned 32 bit integers to be sign extended.
+  if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
+    return ABIArgInfo::getSignExtend(Ty);
+  return ABIArgInfo::getExtend(Ty);
+}
+
+namespace {
+class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
+      : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {}
+};
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Driver code
@@ -8858,6 +9052,11 @@
   case llvm::Triple::msp430:
     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
 
+  case llvm::Triple::riscv32:
+    return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32));
+  case llvm::Triple::riscv64:
+    return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64));
+
   case llvm::Triple::systemz: {
     bool HasVector = getTarget().getABI() == "vector";
     return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index d745e42..f7a813e 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -57,8 +57,7 @@
   /// setTargetAttributes - Provides a convenient hook to handle extra
   /// target-specific attributes for the given global.
   virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                                   CodeGen::CodeGenModule &M,
-                                   ForDefinition_t IsForDefinition) const {}
+                                   CodeGen::CodeGenModule &M) const {}
 
   /// emitTargetMD - Provides a convenient hook to handle extra
   /// target-specific metadata for the given global.
diff --git a/lib/CrossTU/CrossTranslationUnit.cpp b/lib/CrossTU/CrossTranslationUnit.cpp
index a503578..e20ea77 100644
--- a/lib/CrossTU/CrossTranslationUnit.cpp
+++ b/lib/CrossTU/CrossTranslationUnit.cpp
@@ -93,10 +93,7 @@
             index_error_code::multiple_definitions, IndexPath.str(), LineNo);
       StringRef FileName = LineRef.substr(Pos + 1);
       SmallString<256> FilePath = CrossTUDir;
-      if (llvm::sys::path::is_absolute(FileName))
-        FilePath = FileName;
-      else
-        llvm::sys::path::append(FilePath, FileName);
+      llvm::sys::path::append(FilePath, FileName);
       Result[FunctionLookupName] = FilePath.str().str();
     } else
       return llvm::make_error<IndexError>(
diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt
index 5bf91f2..afb1cdf 100644
--- a/lib/Driver/CMakeLists.txt
+++ b/lib/Driver/CMakeLists.txt
@@ -25,6 +25,7 @@
   ToolChains/Arch/ARM.cpp
   ToolChains/Arch/Mips.cpp
   ToolChains/Arch/PPC.cpp
+  ToolChains/Arch/RISCV.cpp
   ToolChains/Arch/Sparc.cpp
   ToolChains/Arch/SystemZ.cpp
   ToolChains/Arch/X86.cpp
diff --git a/lib/Driver/Compilation.cpp b/lib/Driver/Compilation.cpp
index c27ea51..645da50 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -182,16 +182,51 @@
   return ExecutionFailed ? 1 : Res;
 }
 
-void Compilation::ExecuteJobs(
-    const JobList &Jobs,
-    SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) const {
+using FailingCommandList = SmallVectorImpl<std::pair<int, const Command *>>;
+
+static bool ActionFailed(const Action *A,
+                         const FailingCommandList &FailingCommands) {
+
+  if (FailingCommands.empty())
+    return false;
+
+  // CUDA can have the same input source code compiled multiple times so do not
+  // compiled again if there are already failures. It is OK to abort the CUDA
+  // pipeline on errors.
+  if (A->isOffloading(Action::OFK_Cuda))
+    return true;
+
+  for (const auto &CI : FailingCommands)
+    if (A == &(CI.second->getSource()))
+      return true;
+
+  for (const Action *AI : A->inputs())
+    if (ActionFailed(AI, FailingCommands))
+      return true;
+
+  return false;
+}
+
+static bool InputsOk(const Command &C,
+                     const FailingCommandList &FailingCommands) {
+  return !ActionFailed(&C.getSource(), FailingCommands);
+}
+
+void Compilation::ExecuteJobs(const JobList &Jobs,
+                              FailingCommandList &FailingCommands) const {
+  // According to UNIX standard, driver need to continue compiling all the
+  // inputs on the command line even one of them failed.
+  // In all but CLMode, execute all the jobs unless the necessary inputs for the
+  // job is missing due to previous failures.
   for (const auto &Job : Jobs) {
+    if (!InputsOk(Job, FailingCommands))
+      continue;
     const Command *FailingCommand = nullptr;
     if (int Res = ExecuteCommand(Job, FailingCommand)) {
       FailingCommands.push_back(std::make_pair(Res, FailingCommand));
-      // Bail as soon as one command fails, so we don't output duplicate error
-      // messages if we die on e.g. the same file.
-      return;
+      // Bail as soon as one command fails in cl driver mode.
+      if (TheDriver.IsCLMode())
+        return;
     }
   }
 }
diff --git a/lib/Driver/Distro.cpp b/lib/Driver/Distro.cpp
index 2df297f..f15c919 100644
--- a/lib/Driver/Distro.cpp
+++ b/lib/Driver/Distro.cpp
@@ -48,6 +48,7 @@
                       .Case("yakkety", Distro::UbuntuYakkety)
                       .Case("zesty", Distro::UbuntuZesty)
                       .Case("artful", Distro::UbuntuArtful)
+                      .Case("bionic", Distro::UbuntuBionic)
                       .Default(Distro::UnknownDistro);
     if (Version != Distro::UnknownDistro)
       return Version;
@@ -88,6 +89,8 @@
         return Distro::DebianJessie;
       case 9:
         return Distro::DebianStretch;
+      case 10:
+        return Distro::DebianBuster;
       default:
         return Distro::UnknownDistro;
       }
@@ -126,6 +129,9 @@
   if (VFS.exists("/etc/exherbo-release"))
     return Distro::Exherbo;
 
+  if (VFS.exists("/etc/alpine-release"))
+    return Distro::AlpineLinux;
+
   if (VFS.exists("/etc/arch-release"))
     return Distro::ArchLinux;
 
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 765b006..4712daf 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -62,6 +62,7 @@
 #include "llvm/Option/OptSpecifier.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
@@ -70,6 +71,7 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/StringSaver.h"
 #include <map>
 #include <memory>
 #include <utility>
@@ -92,7 +94,8 @@
       CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),
       CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false),
       CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple),
-      CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true),
+      CCCGenericGCCName(""), Saver(Alloc),
+      CheckInputsExist(true), CCCUsePCH(true),
       GenReproducer(false), SuppressMissingInputWarning(false) {
 
   // Provide a sane fallback if no VFS is specified.
@@ -103,6 +106,13 @@
   Dir = llvm::sys::path::parent_path(ClangExecutable);
   InstalledDir = Dir; // Provide a sensible default installed dir.
 
+#if defined(CLANG_CONFIG_FILE_SYSTEM_DIR)
+  SystemConfigDir = CLANG_CONFIG_FILE_SYSTEM_DIR;
+#endif
+#if defined(CLANG_CONFIG_FILE_USER_DIR)
+  UserConfigDir = CLANG_CONFIG_FILE_USER_DIR;
+#endif
+
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
   SmallString<128> P(Dir);
@@ -123,7 +133,7 @@
   setDriverModeFromOption(ClangNameParts.DriverMode);
 
   for (const char *ArgPtr : Args) {
-    // Ingore nullptrs, they are response file's EOL markers
+    // Ignore nullptrs, they are the response file's EOL markers.
     if (ArgPtr == nullptr)
       continue;
     const StringRef Arg = ArgPtr;
@@ -138,15 +148,13 @@
     return;
   StringRef Value = Opt.drop_front(OptName.size());
 
-  const unsigned M = llvm::StringSwitch<unsigned>(Value)
-                         .Case("gcc", GCCMode)
-                         .Case("g++", GXXMode)
-                         .Case("cpp", CPPMode)
-                         .Case("cl", CLMode)
-                         .Default(~0U);
-
-  if (M != ~0U)
-    Mode = static_cast<DriverMode>(M);
+  if (auto M = llvm::StringSwitch<llvm::Optional<DriverMode>>(Value)
+                   .Case("gcc", GCCMode)
+                   .Case("g++", GXXMode)
+                   .Case("cpp", CPPMode)
+                   .Case("cl", CLMode)
+                   .Default(None))
+    Mode = *M;
   else
     Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
 }
@@ -178,9 +186,19 @@
   // Check for unsupported options.
   for (const Arg *A : Args) {
     if (A->getOption().hasFlag(options::Unsupported)) {
-      Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
-      ContainsError |= Diags.getDiagnosticLevel(diag::err_drv_unsupported_opt,
-                                                SourceLocation()) >
+      unsigned DiagID;
+      auto ArgString = A->getAsString(Args);
+      std::string Nearest;
+      if (getOpts().findNearest(
+            ArgString, Nearest, IncludedFlagsBitmask,
+            ExcludedFlagsBitmask | options::Unsupported) > 1) {
+        DiagID = diag::err_drv_unsupported_opt;
+        Diag(DiagID) << ArgString;
+      } else {
+        DiagID = diag::err_drv_unsupported_opt_with_suggestion;
+        Diag(DiagID) << ArgString << Nearest;
+      }
+      ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
                        DiagnosticsEngine::Warning;
       continue;
     }
@@ -195,11 +213,20 @@
   }
 
   for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) {
-    auto ID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
-                         : diag::err_drv_unknown_argument;
-
-    Diags.Report(ID) << A->getAsString(Args);
-    ContainsError |= Diags.getDiagnosticLevel(ID, SourceLocation()) >
+    unsigned DiagID;
+    auto ArgString = A->getAsString(Args);
+    std::string Nearest;
+    if (getOpts().findNearest(
+          ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) {
+      DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
+                          : diag::err_drv_unknown_argument;
+      Diags.Report(DiagID) << ArgString;
+    } else {
+      DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl_with_suggestion
+                          : diag::err_drv_unknown_argument_with_suggestion;
+      Diags.Report(DiagID) << ArgString << Nearest;
+    }
+    ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
                      DiagnosticsEngine::Warning;
   }
 
@@ -598,8 +625,216 @@
   //
   // TODO: Add support for other offloading programming models here.
   //
+}
 
-  return;
+/// Looks the given directories for the specified file.
+///
+/// \param[out] FilePath File path, if the file was found.
+/// \param[in]  Dirs Directories used for the search.
+/// \param[in]  FileName Name of the file to search for.
+/// \return True if file was found.
+///
+/// Looks for file specified by FileName sequentially in directories specified
+/// by Dirs.
+///
+static bool searchForFile(SmallVectorImpl<char> &FilePath,
+                          ArrayRef<std::string> Dirs,
+                          StringRef FileName) {
+  SmallString<128> WPath;
+  for (const StringRef &Dir : Dirs) {
+    if (Dir.empty())
+      continue;
+    WPath.clear();
+    llvm::sys::path::append(WPath, Dir, FileName);
+    llvm::sys::path::native(WPath);
+    if (llvm::sys::fs::is_regular_file(WPath)) {
+      FilePath = std::move(WPath);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Driver::readConfigFile(StringRef FileName) {
+  // Try reading the given file.
+  SmallVector<const char *, 32> NewCfgArgs;
+  if (!llvm::cl::readConfigFile(FileName, Saver, NewCfgArgs)) {
+    Diag(diag::err_drv_cannot_read_config_file) << FileName;
+    return true;
+  }
+
+  // Read options from config file.
+  llvm::SmallString<128> CfgFileName(FileName);
+  llvm::sys::path::native(CfgFileName);
+  ConfigFile = CfgFileName.str();
+  bool ContainErrors;
+  CfgOptions = llvm::make_unique<InputArgList>(
+      ParseArgStrings(NewCfgArgs, ContainErrors));
+  if (ContainErrors) {
+    CfgOptions.reset();
+    return true;
+  }
+
+  if (CfgOptions->hasArg(options::OPT_config)) {
+    CfgOptions.reset();
+    Diag(diag::err_drv_nested_config_file);
+    return true;
+  }
+
+  // Claim all arguments that come from a configuration file so that the driver
+  // does not warn on any that is unused.
+  for (Arg *A : *CfgOptions)
+    A->claim();
+  return false;
+}
+
+bool Driver::loadConfigFile() {
+  std::string CfgFileName;
+  bool FileSpecifiedExplicitly = false;
+
+  // Process options that change search path for config files.
+  if (CLOptions) {
+    if (CLOptions->hasArg(options::OPT_config_system_dir_EQ)) {
+      SmallString<128> CfgDir;
+      CfgDir.append(
+          CLOptions->getLastArgValue(options::OPT_config_system_dir_EQ));
+      if (!CfgDir.empty()) {
+        if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
+          SystemConfigDir.clear();
+        else
+          SystemConfigDir = std::string(CfgDir.begin(), CfgDir.end());
+      }
+    }
+    if (CLOptions->hasArg(options::OPT_config_user_dir_EQ)) {
+      SmallString<128> CfgDir;
+      CfgDir.append(
+          CLOptions->getLastArgValue(options::OPT_config_user_dir_EQ));
+      if (!CfgDir.empty()) {
+        if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
+          UserConfigDir.clear();
+        else
+          UserConfigDir = std::string(CfgDir.begin(), CfgDir.end());
+      }
+    }
+  }
+
+  // First try to find config file specified in command line.
+  if (CLOptions) {
+    std::vector<std::string> ConfigFiles =
+        CLOptions->getAllArgValues(options::OPT_config);
+    if (ConfigFiles.size() > 1) {
+      Diag(diag::err_drv_duplicate_config);
+      return true;
+    }
+
+    if (!ConfigFiles.empty()) {
+      CfgFileName = ConfigFiles.front();
+      assert(!CfgFileName.empty());
+
+      // If argument contains directory separator, treat it as a path to
+      // configuration file.
+      if (llvm::sys::path::has_parent_path(CfgFileName)) {
+        SmallString<128> CfgFilePath;
+        if (llvm::sys::path::is_relative(CfgFileName))
+          llvm::sys::fs::current_path(CfgFilePath);
+        llvm::sys::path::append(CfgFilePath, CfgFileName);
+        if (!llvm::sys::fs::is_regular_file(CfgFilePath)) {
+          Diag(diag::err_drv_config_file_not_exist) << CfgFilePath;
+          return true;
+        }
+        return readConfigFile(CfgFilePath);
+      }
+
+      FileSpecifiedExplicitly = true;
+    }
+  }
+
+  // If config file is not specified explicitly, try to deduce configuration
+  // from executable name. For instance, an executable 'armv7l-clang' will
+  // search for config file 'armv7l-clang.cfg'.
+  if (CfgFileName.empty() && !ClangNameParts.TargetPrefix.empty())
+    CfgFileName = ClangNameParts.TargetPrefix + '-' + ClangNameParts.ModeSuffix;
+
+  if (CfgFileName.empty())
+    return false;
+
+  // Determine architecture part of the file name, if it is present.
+  StringRef CfgFileArch = CfgFileName;
+  size_t ArchPrefixLen = CfgFileArch.find('-');
+  if (ArchPrefixLen == StringRef::npos)
+    ArchPrefixLen = CfgFileArch.size();
+  llvm::Triple CfgTriple;
+  CfgFileArch = CfgFileArch.take_front(ArchPrefixLen);
+  CfgTriple = llvm::Triple(llvm::Triple::normalize(CfgFileArch));
+  if (CfgTriple.getArch() == llvm::Triple::ArchType::UnknownArch)
+    ArchPrefixLen = 0;
+
+  if (!StringRef(CfgFileName).endswith(".cfg"))
+    CfgFileName += ".cfg";
+
+  // If config file starts with architecture name and command line options
+  // redefine architecture (with options like -m32 -LE etc), try finding new
+  // config file with that architecture.
+  SmallString<128> FixedConfigFile;
+  size_t FixedArchPrefixLen = 0;
+  if (ArchPrefixLen) {
+    // Get architecture name from config file name like 'i386.cfg' or
+    // 'armv7l-clang.cfg'.
+    // Check if command line options changes effective triple.
+    llvm::Triple EffectiveTriple = computeTargetTriple(*this,
+                                             CfgTriple.getTriple(), *CLOptions);
+    if (CfgTriple.getArch() != EffectiveTriple.getArch()) {
+      FixedConfigFile = EffectiveTriple.getArchName();
+      FixedArchPrefixLen = FixedConfigFile.size();
+      // Append the rest of original file name so that file name transforms
+      // like: i386-clang.cfg -> x86_64-clang.cfg.
+      if (ArchPrefixLen < CfgFileName.size())
+        FixedConfigFile += CfgFileName.substr(ArchPrefixLen);
+    }
+  }
+
+  // Prepare list of directories where config file is searched for.
+  SmallVector<std::string, 3> CfgFileSearchDirs;
+  CfgFileSearchDirs.push_back(UserConfigDir);
+  CfgFileSearchDirs.push_back(SystemConfigDir);
+  CfgFileSearchDirs.push_back(Dir);
+
+  // Try to find config file. First try file with corrected architecture.
+  llvm::SmallString<128> CfgFilePath;
+  if (!FixedConfigFile.empty()) {
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
+      return readConfigFile(CfgFilePath);
+    // If 'x86_64-clang.cfg' was not found, try 'x86_64.cfg'.
+    FixedConfigFile.resize(FixedArchPrefixLen);
+    FixedConfigFile.append(".cfg");
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
+      return readConfigFile(CfgFilePath);
+  }
+
+  // Then try original file name.
+  if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
+    return readConfigFile(CfgFilePath);
+
+  // Finally try removing driver mode part: 'x86_64-clang.cfg' -> 'x86_64.cfg'.
+  if (!ClangNameParts.ModeSuffix.empty() &&
+      !ClangNameParts.TargetPrefix.empty()) {
+    CfgFileName.assign(ClangNameParts.TargetPrefix);
+    CfgFileName.append(".cfg");
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
+      return readConfigFile(CfgFilePath);
+  }
+
+  // Report error but only if config file was specified explicitly, by option
+  // --config. If it was deduced from executable name, it is not an error.
+  if (FileSpecifiedExplicitly) {
+    Diag(diag::err_drv_config_file_not_found) << CfgFileName;
+    for (const std::string &SearchDir : CfgFileSearchDirs)
+      if (!SearchDir.empty())
+        Diag(diag::note_drv_config_file_searched_in) << SearchDir;
+    return true;
+  }
+
+  return false;
 }
 
 Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
@@ -625,12 +860,35 @@
 
   // FIXME: What are we going to do with -V and -b?
 
+  // Arguments specified in command line.
+  bool ContainsError;
+  CLOptions = llvm::make_unique<InputArgList>(
+      ParseArgStrings(ArgList.slice(1), ContainsError));
+
+  // Try parsing configuration file.
+  if (!ContainsError)
+    ContainsError = loadConfigFile();
+  bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr);
+
+  // All arguments, from both config file and command line.
+  InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions)
+                                              : std::move(*CLOptions));
+  if (HasConfigFile)
+    for (auto *Opt : *CLOptions) {
+      const Arg *BaseArg = &Opt->getBaseArg();
+      if (BaseArg == Opt)
+        BaseArg = nullptr;
+      Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Opt->getSpelling(),
+                                     Args.size(), BaseArg);
+      Copy->getValues() = Opt->getValues();
+      if (Opt->isClaimed())
+        Copy->claim();
+      Args.append(Copy);
+    }
+
   // FIXME: This stuff needs to go into the Compilation, not the driver.
   bool CCCPrintPhases;
 
-  bool ContainsError;
-  InputArgList Args = ParseArgStrings(ArgList.slice(1), ContainsError);
-
   // Silence driver warnings if requested
   Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
 
@@ -1146,6 +1404,10 @@
 
   // Print out the install directory.
   OS << "InstalledDir: " << InstalledDir << '\n';
+
+  // If configuration file was used, print its path.
+  if (!ConfigFile.empty())
+    OS << "Configuration file: " << ConfigFile << '\n';
 }
 
 /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
@@ -1157,42 +1419,55 @@
     OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
 }
 
-void Driver::handleAutocompletions(StringRef PassedFlags) const {
+void Driver::HandleAutocompletions(StringRef PassedFlags) const {
+  if (PassedFlags == "")
+    return;
   // Print out all options that start with a given argument. This is used for
   // shell autocompletion.
   std::vector<std::string> SuggestedCompletions;
+  std::vector<std::string> Flags;
 
   unsigned short DisableFlags =
       options::NoDriverOption | options::Unsupported | options::Ignored;
-  // We want to show cc1-only options only when clang is invoked as "clang
-  // -cc1". When clang is invoked as "clang -cc1", we add "#" to the beginning
-  // of an --autocomplete  option so that the clang driver can distinguish
-  // whether it is requested to show cc1-only options or not.
-  if (PassedFlags.size() > 0 && PassedFlags[0] == '#') {
-    DisableFlags &= ~options::NoDriverOption;
-    PassedFlags = PassedFlags.substr(1);
+
+  // Parse PassedFlags by "," as all the command-line flags are passed to this
+  // function separated by ","
+  StringRef TargetFlags = PassedFlags;
+  while (TargetFlags != "") {
+    StringRef CurFlag;
+    std::tie(CurFlag, TargetFlags) = TargetFlags.split(",");
+    Flags.push_back(std::string(CurFlag));
   }
 
-  if (PassedFlags.find(',') == StringRef::npos) {
+  // We want to show cc1-only options only when clang is invoked with -cc1 or
+  // -Xclang.
+  if (std::find(Flags.begin(), Flags.end(), "-Xclang") != Flags.end() ||
+      std::find(Flags.begin(), Flags.end(), "-cc1") != Flags.end())
+    DisableFlags &= ~options::NoDriverOption;
+
+  StringRef Cur;
+  Cur = Flags.at(Flags.size() - 1);
+  StringRef Prev;
+  if (Flags.size() >= 2) {
+    Prev = Flags.at(Flags.size() - 2);
+    SuggestedCompletions = Opts->suggestValueCompletions(Prev, Cur);
+  }
+
+  if (SuggestedCompletions.empty())
+    SuggestedCompletions = Opts->suggestValueCompletions(Cur, "");
+
+  if (SuggestedCompletions.empty()) {
     // If the flag is in the form of "--autocomplete=-foo",
     // we were requested to print out all option names that start with "-foo".
     // For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only".
-    SuggestedCompletions = Opts->findByPrefix(PassedFlags, DisableFlags);
+    SuggestedCompletions = Opts->findByPrefix(Cur, DisableFlags);
 
     // We have to query the -W flags manually as they're not in the OptTable.
     // TODO: Find a good way to add them to OptTable instead and them remove
     // this code.
     for (StringRef S : DiagnosticIDs::getDiagnosticFlags())
-      if (S.startswith(PassedFlags))
+      if (S.startswith(Cur))
         SuggestedCompletions.push_back(S);
-  } else {
-    // If the flag is in the form of "--autocomplete=foo,bar", we were
-    // requested to print out all option values for "-foo" that start with
-    // "bar". For example,
-    // "--autocomplete=-stdlib=,l" is expanded to "libc++" and "libstdc++".
-    StringRef Option, Arg;
-    std::tie(Option, Arg) = PassedFlags.split(',');
-    SuggestedCompletions = Opts->suggestValueCompletions(Option, Arg);
   }
 
   // Sort the autocomplete candidates so that shells print them out in a
@@ -1200,7 +1475,11 @@
   // case-insensitive sorting for consistency with the -help option
   // which prints out options in the case-insensitive alphabetical order.
   std::sort(SuggestedCompletions.begin(), SuggestedCompletions.end(),
-            [](StringRef A, StringRef B) { return A.compare_lower(B) < 0; });
+            [](StringRef A, StringRef B) {
+              if (int X = A.compare_lower(B))
+                return X < 0;
+              return A.compare(B) > 0;
+            });
 
   llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n';
 }
@@ -1248,6 +1527,15 @@
     SuppressMissingInputWarning = true;
   }
 
+  if (C.getArgs().hasArg(options::OPT_v)) {
+    if (!SystemConfigDir.empty())
+      llvm::errs() << "System configuration file directory: "
+                   << SystemConfigDir << "\n";
+    if (!UserConfigDir.empty())
+      llvm::errs() << "User configuration file directory: "
+                   << UserConfigDir << "\n";
+  }
+
   const ToolChain &TC = C.getDefaultToolChain();
 
   if (C.getArgs().hasArg(options::OPT_v))
@@ -1299,7 +1587,7 @@
 
   if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) {
     StringRef PassedFlags = A->getValue();
-    handleAutocompletions(PassedFlags);
+    HandleAutocompletions(PassedFlags);
     return false;
   }
 
@@ -3880,7 +4168,13 @@
         break;
       case llvm::Triple::MSVC:
       case llvm::Triple::UnknownEnvironment:
-        TC = llvm::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
+        if (Args.getLastArgValue(options::OPT_fuse_ld_EQ)
+                .startswith_lower("bfd"))
+          TC = llvm::make_unique<toolchains::CrossWindowsToolChain>(
+              *this, Target, Args);
+        else
+          TC =
+              llvm::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
         break;
       }
       break;
diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
index 5eeaeb3..06e04b1 100644
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -30,10 +30,11 @@
   NeedsUbsanCxxRt = Vptr | CFI,
   NotAllowedWithTrap = Vptr,
   NotAllowedWithMinimalRuntime = Vptr,
-  RequiresPIE = DataFlow,
-  NeedsUnwindTables = Address | Thread | Memory | DataFlow,
-  SupportsCoverage = Address | KernelAddress | Memory | Leak | Undefined |
-                     Integer | Nullability | DataFlow | Fuzzer | FuzzerNoLink,
+  RequiresPIE = DataFlow | Scudo,
+  NeedsUnwindTables = Address | HWAddress | Thread | Memory | DataFlow,
+  SupportsCoverage = Address | HWAddress | KernelAddress | Memory | Leak |
+                     Undefined | Integer | Nullability | DataFlow | Fuzzer |
+                     FuzzerNoLink,
   RecoverableByDefault = Undefined | Integer | Nullability,
   Unrecoverable = Unreachable | Return,
   LegacyFsanitizeRecoverMask = Undefined | Integer,
@@ -91,29 +92,28 @@
 /// Sanitizers set.
 static std::string toString(const clang::SanitizerSet &Sanitizers);
 
-static bool getDefaultBlacklist(const Driver &D, SanitizerMask Kinds,
-                                std::string &BLPath) {
-  const char *BlacklistFile = nullptr;
-  if (Kinds & Address)
-    BlacklistFile = "asan_blacklist.txt";
-  else if (Kinds & Memory)
-    BlacklistFile = "msan_blacklist.txt";
-  else if (Kinds & Thread)
-    BlacklistFile = "tsan_blacklist.txt";
-  else if (Kinds & DataFlow)
-    BlacklistFile = "dfsan_abilist.txt";
-  else if (Kinds & CFI)
-    BlacklistFile = "cfi_blacklist.txt";
-  else if (Kinds & (Undefined | Integer | Nullability))
-    BlacklistFile = "ubsan_blacklist.txt";
+static void addDefaultBlacklists(const Driver &D, SanitizerMask Kinds,
+                                 std::vector<std::string> &BlacklistFiles) {
+  struct Blacklist {
+    const char *File;
+    SanitizerMask Mask;
+  } Blacklists[] = {{"asan_blacklist.txt", Address},
+                    {"hwasan_blacklist.txt", HWAddress},
+                    {"msan_blacklist.txt", Memory},
+                    {"tsan_blacklist.txt", Thread},
+                    {"dfsan_abilist.txt", DataFlow},
+                    {"cfi_blacklist.txt", CFI},
+                    {"ubsan_blacklist.txt", Undefined | Integer | Nullability}};
 
-  if (BlacklistFile) {
+  for (auto BL : Blacklists) {
+    if (!(Kinds & BL.Mask))
+      continue;
+
     clang::SmallString<64> Path(D.ResourceDir);
-    llvm::sys::path::append(Path, BlacklistFile);
-    BLPath = Path.str();
-    return true;
+    llvm::sys::path::append(Path, "share", BL.File);
+    if (llvm::sys::fs::exists(Path))
+      BlacklistFiles.push_back(Path.str());
   }
-  return false;
 }
 
 /// Sets group bits for every group that has at least one representative already
@@ -172,8 +172,8 @@
 
 bool SanitizerArgs::needsUbsanRt() const {
   // All of these include ubsan.
-  if (needsAsanRt() || needsMsanRt() || needsTsanRt() || needsDfsanRt() ||
-      needsLsanRt() || needsCfiDiagRt())
+  if (needsAsanRt() || needsMsanRt() || needsHwasanRt() || needsTsanRt() ||
+      needsDfsanRt() || needsLsanRt() || needsCfiDiagRt() || needsScudoRt())
     return false;
 
   return (Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) ||
@@ -370,17 +370,17 @@
 
   // Warn about incompatible groups of sanitizers.
   std::pair<SanitizerMask, SanitizerMask> IncompatibleGroups[] = {
-      std::make_pair(Address, Thread), std::make_pair(Address, Memory),
-      std::make_pair(Thread, Memory), std::make_pair(Leak, Thread),
-      std::make_pair(Leak, Memory), std::make_pair(KernelAddress, Address),
-      std::make_pair(KernelAddress, Leak),
-      std::make_pair(KernelAddress, Thread),
-      std::make_pair(KernelAddress, Memory),
-      std::make_pair(Efficiency, Address),
-      std::make_pair(Efficiency, Leak),
-      std::make_pair(Efficiency, Thread),
-      std::make_pair(Efficiency, Memory),
-      std::make_pair(Efficiency, KernelAddress)};
+      std::make_pair(Address, Thread | Memory),
+      std::make_pair(Thread, Memory),
+      std::make_pair(Leak, Thread | Memory),
+      std::make_pair(KernelAddress, Address | Leak | Thread | Memory),
+      std::make_pair(HWAddress, Address | Thread | Memory | KernelAddress),
+      std::make_pair(Efficiency, Address | HWAddress | Leak | Thread | Memory |
+                                     KernelAddress),
+      std::make_pair(Scudo, Address | HWAddress | Leak | Thread | Memory |
+                                KernelAddress | Efficiency),
+      std::make_pair(SafeStack, Address | HWAddress | Leak | Thread | Memory |
+                                    KernelAddress | Efficiency)};
   for (auto G : IncompatibleGroups) {
     SanitizerMask Group = G.first;
     if (Kinds & Group) {
@@ -439,14 +439,11 @@
   RecoverableKinds &= ~Unrecoverable;
 
   TrappingKinds &= Kinds;
+  RecoverableKinds &= ~TrappingKinds;
 
   // Setup blacklist files.
   // Add default blacklist from resource directory.
-  {
-    std::string BLPath;
-    if (getDefaultBlacklist(D, Kinds, BLPath) && llvm::sys::fs::exists(BLPath))
-      BlacklistFiles.push_back(BLPath);
-  }
+  addDefaultBlacklists(D, Kinds, BlacklistFiles);
   // Parse -f(no-)sanitize-blacklist options.
   for (const auto *Arg : Args) {
     if (Arg->getOption().matches(options::OPT_fsanitize_blacklist)) {
@@ -455,9 +452,9 @@
       if (llvm::sys::fs::exists(BLPath)) {
         BlacklistFiles.push_back(BLPath);
         ExtraDeps.push_back(BLPath);
-      } else
+      } else {
         D.Diag(clang::diag::err_drv_no_such_file) << BLPath;
-
+      }
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_blacklist)) {
       Arg->claim();
       BlacklistFiles.clear();
@@ -520,6 +517,13 @@
     // Without PIE, external function address may resolve to a PLT record, which
     // can not be verified by the target module.
     NeedPIE |= CfiCrossDso;
+    CfiICallGeneralizePointers =
+        Args.hasArg(options::OPT_fsanitize_cfi_icall_generalize_pointers);
+
+    if (CfiCrossDso && CfiICallGeneralizePointers)
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << "-fsanitize-cfi-cross-dso"
+          << "-fsanitize-cfi-icall-generalize-pointers";
   }
 
   Stats = Args.hasFlag(options::OPT_fsanitize_stats,
@@ -622,7 +626,7 @@
   ImplicitCfiRuntime = TC.getTriple().isAndroid();
 
   if (AllAddedKinds & Address) {
-    NeedPIE |= TC.getTriple().isAndroid() || TC.getTriple().isOSFuchsia();
+    NeedPIE |= TC.getTriple().isOSFuchsia();
     if (Arg *A =
             Args.getLastArg(options::OPT_fsanitize_address_field_padding)) {
         StringRef S = A->getValue();
@@ -675,6 +679,8 @@
   Sanitizers.Mask |= Kinds;
   RecoverableSanitizers.Mask |= RecoverableKinds;
   TrapSanitizers.Mask |= TrappingKinds;
+  assert(!(RecoverableKinds & TrappingKinds) &&
+         "Overlap between recoverable and trapping sanitizers");
 }
 
 static std::string toString(const clang::SanitizerSet &Sanitizers) {
@@ -783,7 +789,7 @@
 
   if (MsanTrackOrigins)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" +
-                                         llvm::utostr(MsanTrackOrigins)));
+                                         Twine(MsanTrackOrigins)));
 
   if (MsanUseAfterDtor)
     CmdArgs.push_back("-fsanitize-memory-use-after-dtor");
@@ -807,6 +813,9 @@
   if (CfiCrossDso)
     CmdArgs.push_back("-fsanitize-cfi-cross-dso");
 
+  if (CfiICallGeneralizePointers)
+    CmdArgs.push_back("-fsanitize-cfi-icall-generalize-pointers");
+
   if (Stats)
     CmdArgs.push_back("-fsanitize-stats");
 
@@ -815,7 +824,7 @@
 
   if (AsanFieldPadding)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
-                                         llvm::utostr(AsanFieldPadding)));
+                                         Twine(AsanFieldPadding)));
 
   if (AsanUseAfterScope)
     CmdArgs.push_back("-fsanitize-address-use-after-scope");
@@ -825,7 +834,7 @@
 
   // MSan: Workaround for PR16386.
   // ASan: This is mainly to help LSan with cases such as
-  // https://code.google.com/p/address-sanitizer/issues/detail?id=373
+  // https://github.com/google/sanitizers/issues/373
   // We can't make this conditional on -fsanitize=leak, as that flag shouldn't
   // affect compilation.
   if (Sanitizers.has(Memory) || Sanitizers.has(Address))
diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp
index 593cd82..ea0554b 100644
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -27,6 +27,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -79,6 +81,12 @@
     getFilePaths().push_back(CandidateLibPath);
 }
 
+void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) {
+  Triple.setEnvironment(Env);
+  if (EffectiveTriple != llvm::Triple())
+    EffectiveTriple.setEnvironment(Env);
+}
+
 ToolChain::~ToolChain() {
 }
 
@@ -90,6 +98,10 @@
                       IsIntegratedAssemblerDefault());
 }
 
+bool ToolChain::useRelaxRelocations() const {
+  return ENABLE_X86_RELAX_RELOCATIONS;
+}
+
 const SanitizerArgs& ToolChain::getSanitizerArgs() const {
   if (!SanitizerArguments.get())
     SanitizerArguments.reset(new SanitizerArgs(*this, Args));
@@ -215,6 +227,10 @@
   }
 }
 
+std::string ToolChain::getInputFilename(const InputInfo &Input) const {
+  return Input.getFilename();
+}
+
 bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
   return false;
 }
@@ -307,10 +323,28 @@
   return llvm::Triple::getArchTypeName(TC.getArch());
 }
 
+StringRef ToolChain::getOSLibName() const {
+  switch (Triple.getOS()) {
+  case llvm::Triple::FreeBSD:
+    return "freebsd";
+  case llvm::Triple::NetBSD:
+    return "netbsd";
+  case llvm::Triple::OpenBSD:
+    return "openbsd";
+  case llvm::Triple::Solaris:
+    return "sunos";
+  default:
+    return getOS();
+  }
+}
+
 std::string ToolChain::getCompilerRTPath() const {
   SmallString<128> Path(getDriver().ResourceDir);
-  StringRef OSLibName = Triple.isOSFreeBSD() ? "freebsd" : getOS();
-  llvm::sys::path::append(Path, "lib", OSLibName);
+  if (Triple.isOSUnknown()) {
+    llvm::sys::path::append(Path, "lib");
+  } else {
+    llvm::sys::path::append(Path, "lib", getOSLibName());
+  }
   return Path.str();
 }
 
@@ -340,8 +374,7 @@
 
 std::string ToolChain::getArchSpecificLibPath() const {
   SmallString<128> Path(getDriver().ResourceDir);
-  StringRef OSLibName = getTriple().isOSFreeBSD() ? "freebsd" : getOS();
-  llvm::sys::path::append(Path, "lib", OSLibName,
+  llvm::sys::path::append(Path, "lib", getOSLibName(),
                           llvm::Triple::getArchTypeName(getArch()));
   return Path.str();
 }
@@ -383,7 +416,7 @@
   if (llvm::sys::path::is_absolute(UseLinker)) {
     // If we're passed what looks like an absolute path, don't attempt to
     // second-guess that.
-    if (llvm::sys::fs::exists(UseLinker))
+    if (llvm::sys::fs::can_execute(UseLinker))
       return UseLinker;
   } else if (UseLinker.empty() || UseLinker == "ld") {
     // If we're passed -fuse-ld= with no argument, or with the argument ld,
@@ -398,7 +431,7 @@
     LinkerName.append(UseLinker);
 
     std::string LinkerPath(GetProgramPath(LinkerName.c_str()));
-    if (llvm::sys::fs::exists(LinkerPath))
+    if (llvm::sys::fs::can_execute(LinkerPath))
       return LinkerPath;
   }
 
@@ -437,6 +470,13 @@
                      VersionTuple());
 }
 
+llvm::ExceptionHandling
+ToolChain::GetExceptionModel(const llvm::opt::ArgList &Args) const {
+  if (Triple.isOSWindows() && Triple.getArch() != llvm::Triple::x86)
+    return llvm::ExceptionHandling::WinEH;
+  return llvm::ExceptionHandling::None;
+}
+
 bool ToolChain::isThreadModelSupported(const StringRef Model) const {
   if (Model == "single") {
     // FIXME: 'single' is only supported on ARM and WebAssembly so far.
@@ -537,11 +577,30 @@
           << tools::arm::getARMArch(MArch, getTriple()) << "ARM";
     }
 
-    // Assembly files should start in ARM mode, unless arch is M-profile.
-    // Windows is always thumb.
-    if ((InputType != types::TY_PP_Asm && Args.hasFlag(options::OPT_mthumb,
-         options::OPT_mno_thumb, ThumbDefault)) || IsMProfile ||
-         getTriple().isOSWindows()) {
+    // Check to see if an explicit choice to use thumb has been made via
+    // -mthumb. For assembler files we must check for -mthumb in the options
+    // passed to the assember via -Wa or -Xassembler.
+    bool IsThumb = false;
+    if (InputType != types::TY_PP_Asm)
+      IsThumb = Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb,
+                              ThumbDefault);
+    else {
+      // Ideally we would check for these flags in
+      // CollectArgsForIntegratedAssembler but we can't change the ArchName at
+      // that point. There is no assembler equivalent of -mno-thumb, -marm, or
+      // -mno-arm.
+      for (const Arg *A :
+           Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
+        for (StringRef Value : A->getValues()) {
+          if (Value == "-mthumb")
+            IsThumb = true;
+        }
+      }
+    }
+    // Assembly files should start in ARM mode, unless arch is M-profile, or
+    // -mthumb has been passed explicitly to the assembler. Windows is always
+    // thumb.
+    if (IsThumb || IsMProfile || getTriple().isOSWindows()) {
       if (IsBigEndian)
         ArchName = "thumbeb";
       else
diff --git a/lib/Driver/ToolChains/AMDGPU.cpp b/lib/Driver/ToolChains/AMDGPU.cpp
index ce7f4e8..a313bc5 100644
--- a/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/lib/Driver/ToolChains/AMDGPU.cpp
@@ -11,6 +11,7 @@
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Driver/Compilation.h"
+#include "clang/Driver/DriverDiagnostic.h"
 #include "llvm/Option/ArgList.h"
 
 using namespace clang::driver;
@@ -35,6 +36,24 @@
                                           CmdArgs, Inputs));
 }
 
+void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
+                                     const llvm::opt::ArgList &Args,
+                                     std::vector<StringRef> &Features) {
+  if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) {
+    StringRef value = dAbi->getValue();
+    if (value == "1.0") {
+      Features.push_back("+amdgpu-debugger-insert-nops");
+      Features.push_back("+amdgpu-debugger-reserve-regs");
+      Features.push_back("+amdgpu-debugger-emit-prologue");
+    } else {
+      D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
+    }
+  }
+
+  handleTargetFeaturesGroup(
+    Args, Features, options::OPT_m_amdgpu_Features_Group);
+}
+
 /// AMDGPU Toolchain
 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args)
diff --git a/lib/Driver/ToolChains/AMDGPU.h b/lib/Driver/ToolChains/AMDGPU.h
index e2a12d4..588c240 100644
--- a/lib/Driver/ToolChains/AMDGPU.h
+++ b/lib/Driver/ToolChains/AMDGPU.h
@@ -19,7 +19,6 @@
 namespace clang {
 namespace driver {
 namespace tools {
-
 namespace amdgpu {
 
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
@@ -33,6 +32,9 @@
                     const char *LinkingOutput) const override;
 };
 
+void getAMDGPUTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
+                             std::vector<StringRef> &Features);
+
 } // end namespace amdgpu
 } // end namespace tools
 
@@ -54,7 +56,7 @@
 public:
   AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
                   const llvm::opt::ArgList &Args);
-  unsigned GetDefaultDwarfVersion() const override { return 2; }
+  unsigned GetDefaultDwarfVersion() const override { return 5; }
   bool IsIntegratedAssemblerDefault() const override { return true; }
   llvm::opt::DerivedArgList *
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
diff --git a/lib/Driver/ToolChains/Ananas.cpp b/lib/Driver/ToolChains/Ananas.cpp
index ee072cc..8b70aea 100644
--- a/lib/Driver/ToolChains/Ananas.cpp
+++ b/lib/Driver/ToolChains/Ananas.cpp
@@ -64,8 +64,19 @@
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
-  // Ananas only supports static linkage for now.
-  CmdArgs.push_back("-Bstatic");
+  if (Args.hasArg(options::OPT_static)) {
+    CmdArgs.push_back("-Bstatic");
+  } else {
+    if (Args.hasArg(options::OPT_rdynamic))
+      CmdArgs.push_back("-export-dynamic");
+    if (Args.hasArg(options::OPT_shared)) {
+      CmdArgs.push_back("-Bshareable");
+    } else {
+      Args.AddAllArgs(CmdArgs, options::OPT_pie);
+      CmdArgs.push_back("-dynamic-linker");
+      CmdArgs.push_back("/lib/ld-ananas.so");
+    }
+  }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
@@ -75,9 +86,15 @@
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+    if (!Args.hasArg(options::OPT_shared)) {
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+    }
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) {
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbeginS.o")));
+    } else {
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
+    }
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
@@ -97,7 +114,10 @@
     CmdArgs.push_back("-lc");
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
+    else
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
   }
 
diff --git a/lib/Driver/ToolChains/Arch/AArch64.cpp b/lib/Driver/ToolChains/Arch/AArch64.cpp
index 36186f0..ad04aed 100644
--- a/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -20,14 +20,12 @@
 using namespace llvm::opt;
 
 /// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are
-/// targeting. Set \p A to the Arg corresponding to the -mcpu or -mtune
-/// arguments if they are provided, or to nullptr otherwise.
+/// targeting. Set \p A to the Arg corresponding to the -mcpu argument if it is
+/// provided, or to nullptr otherwise.
 std::string aarch64::getAArch64TargetCPU(const ArgList &Args, Arg *&A) {
   std::string CPU;
-  // If we have -mtune or -mcpu, use that.
-  if ((A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ))) {
-    CPU = StringRef(A->getValue()).lower();
-  } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
+  // If we have -mcpu, use that.
+  if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
     StringRef Mcpu = A->getValue();
     CPU = Mcpu.split("+").first.lower();
   }
@@ -122,6 +120,12 @@
                                      const ArgList &Args,
                                      std::vector<StringRef> &Features) {
   std::string MtuneLowerCase = Mtune.lower();
+  // Check CPU name is valid
+  std::vector<StringRef> MtuneFeatures;
+  StringRef Tune;
+  if (!DecodeAArch64Mcpu(D, MtuneLowerCase, Tune, MtuneFeatures))
+    return false;
+
   // Handle CPU name is 'native'.
   if (MtuneLowerCase == "native")
     MtuneLowerCase = llvm::sys::getHostCPUName();
diff --git a/lib/Driver/ToolChains/Arch/ARM.cpp b/lib/Driver/ToolChains/Arch/ARM.cpp
index 44c8871..f67d1db 100644
--- a/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -391,12 +391,22 @@
   } else if (HDivArg)
     getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features);
 
-  // Setting -msoft-float effectively disables NEON because of the GCC
-  // implementation, although the same isn't true of VFP or VFP3.
+  // Setting -msoft-float/-mfloat-abi=soft effectively disables the FPU (GCC
+  // ignores the -mfpu options in this case).
+  // Note that the ABI can also be set implicitly by the target selected.
   if (ABI == arm::FloatABI::Soft) {
-    Features.push_back("-neon");
-    // Also need to explicitly disable features which imply NEON.
-    Features.push_back("-crypto");
+    llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features);
+
+    // Disable hardware FP features which have been enabled.
+    // FIXME: Disabling vfp2 and neon should be enough as all the other
+    //        features are dependant on these 2 features in LLVM. However
+    //        there is currently no easy way to test this in clang, so for
+    //        now just be explicit and disable all known dependent features
+    //        as well.
+    for (std::string Feature : {"vfp2", "vfp3", "vfp4", "fp-armv8", "fullfp16",
+                                "neon", "crypto", "dotprod"})
+      if (std::find(std::begin(Features), std::end(Features), "+" + Feature) != std::end(Features))
+        Features.push_back(Args.MakeArgString("-" + Feature));
   }
 
   // En/disable crc code generation.
diff --git a/lib/Driver/ToolChains/Arch/Mips.cpp b/lib/Driver/ToolChains/Arch/Mips.cpp
index 61481a9..e72754d 100644
--- a/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -343,6 +343,28 @@
   AddTargetFeature(Args, Features, options::OPT_mno_madd4, options::OPT_mmadd4,
                    "nomadd4");
   AddTargetFeature(Args, Features, options::OPT_mmt, options::OPT_mno_mt, "mt");
+
+  if (Arg *A = Args.getLastArg(options::OPT_mindirect_jump_EQ)) {
+    StringRef Val = StringRef(A->getValue());
+    if (Val == "hazard") {
+      Arg *B =
+          Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
+      Arg *C = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
+
+      if (B && B->getOption().matches(options::OPT_mmicromips))
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << "micromips";
+      else if (C && C->getOption().matches(options::OPT_mips16))
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << "mips16";
+      else if (mips::supportsIndirectJumpHazardBarrier(CPUName))
+        Features.push_back("+use-indirect-jump-hazard");
+      else
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << CPUName;
+    } else
+      D.Diag(diag::err_drv_unknown_indirect_jump_opt) << Val;
+  }
 }
 
 mips::IEEE754Standard mips::getIEEE754Standard(StringRef &CPU) {
@@ -447,3 +469,20 @@
 
   return UseFPXX;
 }
+
+bool mips::supportsIndirectJumpHazardBarrier(StringRef &CPU) {
+  // Supporting the hazard barrier method of dealing with indirect
+  // jumps requires MIPSR2 support.
+  return llvm::StringSwitch<bool>(CPU)
+      .Case("mips32r2", true)
+      .Case("mips32r3", true)
+      .Case("mips32r5", true)
+      .Case("mips32r6", true)
+      .Case("mips64r2", true)
+      .Case("mips64r3", true)
+      .Case("mips64r5", true)
+      .Case("mips64r6", true)
+      .Case("octeon", true)
+      .Case("p5600", true)
+      .Default(false);
+}
diff --git a/lib/Driver/ToolChains/Arch/Mips.h b/lib/Driver/ToolChains/Arch/Mips.h
index 89eea9a..7e90488 100644
--- a/lib/Driver/ToolChains/Arch/Mips.h
+++ b/lib/Driver/ToolChains/Arch/Mips.h
@@ -53,6 +53,7 @@
 bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
                    StringRef CPUName, StringRef ABIName,
                    mips::FloatABI FloatABI);
+bool supportsIndirectJumpHazardBarrier(StringRef &CPU);
 
 } // end namespace mips
 } // end namespace target
diff --git a/lib/Driver/ToolChains/Arch/RISCV.cpp b/lib/Driver/ToolChains/Arch/RISCV.cpp
new file mode 100644
index 0000000..2b4ae88
--- /dev/null
+++ b/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -0,0 +1,61 @@
+//===--- RISCV.cpp - RISCV Helpers for Tools --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace clang;
+using namespace llvm::opt;
+
+void riscv::getRISCVTargetFeatures(const Driver &D, const ArgList &Args,
+                                   std::vector<StringRef> &Features) {
+  if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    StringRef MArch = A->getValue();
+    // TODO: handle rv64
+    std::pair<StringRef, StringRef> MArchSplit = StringRef(MArch).split("rv32");
+    if (!MArchSplit.second.size())
+      return;
+
+    for (char c : MArchSplit.second) {
+      switch (c) {
+      case 'i':
+        break;
+      case 'm':
+        Features.push_back("+m");
+        break;
+      case 'a':
+        Features.push_back("+a");
+        break;
+      case 'f':
+        Features.push_back("+f");
+        break;
+      case 'd':
+        Features.push_back("+d");
+        break;
+      case 'c':
+        Features.push_back("+c");
+        break;
+      }
+    }
+  }
+}
+
+StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
+  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
+    return A->getValue();
+
+  return Triple.getArch() == llvm::Triple::riscv32 ? "ilp32" : "lp64";
+}
diff --git a/lib/Driver/ToolChains/Arch/RISCV.h b/lib/Driver/ToolChains/Arch/RISCV.h
new file mode 100644
index 0000000..beda149
--- /dev/null
+++ b/lib/Driver/ToolChains/Arch/RISCV.h
@@ -0,0 +1,32 @@
+//===--- RISCV.h - RISCV-specific Tool Helpers ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_RISCV_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_RISCV_H
+
+#include "clang/Driver/Driver.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace driver {
+namespace tools {
+namespace riscv {
+void getRISCVTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
+                            std::vector<llvm::StringRef> &Features);
+StringRef getRISCVABI(const llvm::opt::ArgList &Args,
+                      const llvm::Triple &Triple);
+} // end namespace riscv
+} // namespace tools
+} // end namespace driver
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_RISCV_H
diff --git a/lib/Driver/ToolChains/Arch/X86.cpp b/lib/Driver/ToolChains/Arch/X86.cpp
index a18b2aa..7a4f836 100644
--- a/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/lib/Driver/ToolChains/Arch/X86.cpp
@@ -40,26 +40,29 @@
       return Args.MakeArgString(CPU);
   }
 
-  if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
-    // Mapping built by referring to X86TargetInfo::getDefaultFeatures().
+  if (const Arg *A = Args.getLastArgNoClaim(options::OPT__SLASH_arch)) {
+    // Mapping built by looking at lib/Basic's X86TargetInfo::initFeatureMap().
     StringRef Arch = A->getValue();
-    const char *CPU;
-    if (Triple.getArch() == llvm::Triple::x86) {
+    const char *CPU = nullptr;
+    if (Triple.getArch() == llvm::Triple::x86) {  // 32-bit-only /arch: flags.
       CPU = llvm::StringSwitch<const char *>(Arch)
                 .Case("IA32", "i386")
                 .Case("SSE", "pentium3")
                 .Case("SSE2", "pentium4")
-                .Case("AVX", "sandybridge")
-                .Case("AVX2", "haswell")
                 .Default(nullptr);
-    } else {
+    }
+    if (CPU == nullptr) {  // 32-bit and 64-bit /arch: flags.
       CPU = llvm::StringSwitch<const char *>(Arch)
                 .Case("AVX", "sandybridge")
                 .Case("AVX2", "haswell")
+                .Case("AVX512F", "knl")
+                .Case("AVX512", "skylake-avx512")
                 .Default(nullptr);
     }
-    if (CPU)
+    if (CPU) {
+      A->claim();
       return CPU;
+    }
   }
 
   // Select the default CPU if none was given (or detection failed).
@@ -141,30 +144,6 @@
       Features.push_back("+ssse3");
   }
 
-  // Set features according to the -arch flag on MSVC.
-  if (Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
-    StringRef Arch = A->getValue();
-    bool ArchUsed = false;
-    // First, look for flags that are shared in x86 and x86-64.
-    if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) {
-      if (Arch == "AVX" || Arch == "AVX2") {
-        ArchUsed = true;
-        Features.push_back(Args.MakeArgString("+" + Arch.lower()));
-      }
-    }
-    // Then, look for x86-specific flags.
-    if (ArchType == llvm::Triple::x86) {
-      if (Arch == "IA32") {
-        ArchUsed = true;
-      } else if (Arch == "SSE" || Arch == "SSE2") {
-        ArchUsed = true;
-        Features.push_back(Args.MakeArgString("+" + Arch.lower()));
-      }
-    }
-    if (!ArchUsed)
-      D.Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(Args);
-  }
-
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group);
diff --git a/lib/Driver/ToolChains/BareMetal.h b/lib/Driver/ToolChains/BareMetal.h
index 5e9fd9b..0bed633 100644
--- a/lib/Driver/ToolChains/BareMetal.h
+++ b/lib/Driver/ToolChains/BareMetal.h
@@ -37,7 +37,6 @@
   bool isPIEDefault() const override { return false; }
   bool isPICDefaultForced() const override { return false; }
   bool SupportsProfiling() const override { return false; }
-  bool SupportsObjCGC() const override { return false; }
 
   RuntimeLibType GetDefaultRuntimeLibType() const override {
     return ToolChain::RLT_CompilerRT;
diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp
index 12713f8..3c08e88 100644
--- a/lib/Driver/ToolChains/Clang.cpp
+++ b/lib/Driver/ToolChains/Clang.cpp
@@ -12,9 +12,11 @@
 #include "Arch/ARM.h"
 #include "Arch/Mips.h"
 #include "Arch/PPC.h"
+#include "Arch/RISCV.h"
 #include "Arch/Sparc.h"
 #include "Arch/SystemZ.h"
 #include "Arch/X86.h"
+#include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "Hexagon.h"
 #include "InputInfo.h"
@@ -273,28 +275,32 @@
   OutStrings.push_back(Args.MakeArgString(Out));
 }
 
+/// The -mprefer-vector-width option accepts either a positive integer
+/// or the string "none".
+static void ParseMPreferVectorWidth(const Driver &D, const ArgList &Args,
+                                    ArgStringList &CmdArgs) {
+  Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ);
+  if (!A)
+    return;
+
+  StringRef Value = A->getValue();
+  if (Value == "none") {
+    CmdArgs.push_back("-mprefer-vector-width=none");
+  } else {
+    unsigned Width;
+    if (Value.getAsInteger(10, Width)) {
+      D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
+      return;
+    }
+    CmdArgs.push_back(Args.MakeArgString("-mprefer-vector-width=" + Value));
+  }
+}
+
 static void getWebAssemblyTargetFeatures(const ArgList &Args,
                                          std::vector<StringRef> &Features) {
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
 }
 
-static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args,
-                                    std::vector<StringRef> &Features) {
-  if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) {
-    StringRef value = dAbi->getValue();
-    if (value == "1.0") {
-      Features.push_back("+amdgpu-debugger-insert-nops");
-      Features.push_back("+amdgpu-debugger-reserve-regs");
-      Features.push_back("+amdgpu-debugger-emit-prologue");
-    } else {
-      D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
-    }
-  }
-
-  handleTargetFeaturesGroup(
-    Args, Features, options::OPT_m_amdgpu_Features_Group);
-}
-
 static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
                               const ArgList &Args, ArgStringList &CmdArgs,
                               bool ForAS) {
@@ -322,6 +328,10 @@
   case llvm::Triple::ppc64le:
     ppc::getPPCTargetFeatures(D, Triple, Args, Features);
     break;
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
+    riscv::getRISCVTargetFeatures(D, Args, Features);
+    break;
   case llvm::Triple::systemz:
     systemz::getSystemZTargetFeatures(Args, Features);
     break;
@@ -347,7 +357,7 @@
     break;
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
-    getAMDGPUTargetFeatures(D, Args, Features);
+    amdgpu::getAMDGPUTargetFeatures(D, Args, Features);
     break;
   }
 
@@ -542,6 +552,14 @@
     }
   }
 
+  switch (Triple.getArch()) {
+    case llvm::Triple::riscv32:
+    case llvm::Triple::riscv64:
+      return !areOptimizationsEnabled(Args);
+    default:
+      break;
+  }
+
   if (Triple.isOSWindows()) {
     switch (Triple.getArch()) {
     case llvm::Triple::x86:
@@ -1277,6 +1295,8 @@
 
   case llvm::Triple::hexagon:
   case llvm::Triple::ppc64le:
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
   case llvm::Triple::systemz:
   case llvm::Triple::xcore:
     return false;
@@ -1286,6 +1306,8 @@
 static bool isNoCommonDefault(const llvm::Triple &Triple) {
   switch (Triple.getArch()) {
   default:
+    if (Triple.isOSFuchsia())
+      return true;
     return false;
 
   case llvm::Triple::xcore:
@@ -1386,6 +1408,11 @@
     AddPPCTargetArgs(Args, CmdArgs);
     break;
 
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
+    AddRISCVTargetArgs(Args, CmdArgs);
+    break;
+
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
@@ -1663,6 +1690,25 @@
   }
 }
 
+void Clang::AddRISCVTargetArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
+  // FIXME: currently defaults to the soft-float ABIs. Will need to be
+  // expanded to select ilp32f, ilp32d, lp64f, lp64d when appropiate.
+  const char *ABIName = nullptr;
+  const llvm::Triple &Triple = getToolChain().getTriple();
+  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
+    ABIName = A->getValue();
+  else if (Triple.getArch() == llvm::Triple::riscv32)
+    ABIName = "ilp32";
+  else if (Triple.getArch() == llvm::Triple::riscv64)
+    ABIName = "lp64";
+  else
+    llvm_unreachable("Unexpected triple!");
+
+  CmdArgs.push_back("-target-abi");
+  CmdArgs.push_back(ABIName);
+}
+
 void Clang::AddSparcTargetArgs(const ArgList &Args,
                                ArgStringList &CmdArgs) const {
   sparc::FloatABI FloatABI =
@@ -1717,6 +1763,9 @@
       getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
           << A->getOption().getName() << Value;
     }
+  } else if (getToolChain().getDriver().IsCLMode()) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-x86-asm-syntax=intel");
   }
 
   // Set flags to support MCU ABI.
@@ -1733,10 +1782,9 @@
   CmdArgs.push_back("-Wreturn-type");
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
-    std::string N = llvm::utostr(G.getValue());
-    std::string Opt = std::string("-hexagon-small-data-threshold=") + N;
     CmdArgs.push_back("-mllvm");
-    CmdArgs.push_back(Args.MakeArgString(Opt));
+    CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" +
+                                         Twine(G.getValue())));
   }
 
   if (!Args.hasArg(options::OPT_fno_short_enums))
@@ -1885,7 +1933,7 @@
   // arg after parsing the '-I' arg.
   bool TakeNextArg = false;
 
-  bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS;
+  bool UseRelaxRelocations = C.getDefaultToolChain().useRelaxRelocations();
   const char *MipsTargetFeature = nullptr;
   for (const Arg *A :
        Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
@@ -1905,6 +1953,15 @@
       switch (C.getDefaultToolChain().getArch()) {
       default:
         break;
+      case llvm::Triple::thumb:
+      case llvm::Triple::thumbeb:
+      case llvm::Triple::arm:
+      case llvm::Triple::armeb:
+        if (Value == "-mthumb")
+          // -mthumb has already been processed in ComputeLLVMTriple()
+          // recognize but skip over here.
+          continue;
+        break;
       case llvm::Triple::mips:
       case llvm::Triple::mipsel:
       case llvm::Triple::mips64:
@@ -2156,6 +2213,9 @@
   if (!SignedZeros)
     CmdArgs.push_back("-fno-signed-zeros");
 
+  if (AssociativeMath && !SignedZeros && !TrappingMath)
+    CmdArgs.push_back("-mreassociate");
+
   if (ReciprocalMath)
     CmdArgs.push_back("-freciprocal-math");
 
@@ -2321,6 +2381,7 @@
       options::OPT_cl_no_signed_zeros,
       options::OPT_cl_denorms_are_zero,
       options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
+      options::OPT_cl_uniform_work_group_size
   };
 
   if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
@@ -2442,6 +2503,13 @@
     CmdArgs.push_back("-fno-math-builtin");
 }
 
+void Driver::getDefaultModuleCachePath(SmallVectorImpl<char> &Result) {
+  llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Result);
+  llvm::sys::path::append(Result, "org.llvm.clang.");
+  appendUserToPath(Result);
+  llvm::sys::path::append(Result, "ModuleCache");
+}
+
 static void RenderModulesOptions(Compilation &C, const Driver &D,
                                  const ArgList &Args, const InputInfo &Input,
                                  const InputInfo &Output,
@@ -2502,10 +2570,7 @@
       llvm::sys::path::append(Path, "modules");
     } else if (Path.empty()) {
       // No module path was provided: use the default.
-      llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Path);
-      llvm::sys::path::append(Path, "org.llvm.clang.");
-      appendUserToPath(Path);
-      llvm::sys::path::append(Path, "ModuleCache");
+      Driver::getDefaultModuleCachePath(Path);
     }
 
     const char Arg[] = "-fmodules-cache-path=";
@@ -2622,8 +2687,13 @@
       CmdArgs.push_back("-fwchar-type=short");
       CmdArgs.push_back("-fno-signed-wchar");
     } else {
+      bool IsARM = T.isARM() || T.isThumb() || T.isAArch64();
       CmdArgs.push_back("-fwchar-type=int");
-      CmdArgs.push_back("-fsigned-wchar");
+      if (IsARM && !(T.isOSWindows() || T.getOS() == llvm::Triple::NetBSD ||
+                     T.getOS() == llvm::Triple::OpenBSD))
+        CmdArgs.push_back("-fno-signed-wchar");
+      else
+        CmdArgs.push_back("-fsigned-wchar");
     }
   }
 }
@@ -2898,7 +2968,7 @@
 
   // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility
   // argument parsing.
-  if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) {
+  if (EmitCodeView) {
     // DWARFVersion remains at 0 if no explicit choice was made.
     CmdArgs.push_back("-gcodeview");
   } else if (DWARFVersion == 0 &&
@@ -2954,6 +3024,18 @@
   if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug)
     DebugInfoKind = codegenoptions::FullDebugInfo;
 
+  if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source, false)) {
+    // Source embedding is a vendor extension to DWARF v5. By now we have
+    // checked if a DWARF version was stated explicitly, and have otherwise
+    // fallen back to the target default, so if this is still not at least 5 we
+    // emit an error.
+    if (DWARFVersion < 5)
+      D.Diag(diag::err_drv_argument_only_allowed_with)
+          << Args.getLastArg(options::OPT_gembed_source)->getAsString(Args)
+          << "-gdwarf-5";
+    CmdArgs.push_back("-gembed-source");
+  }
+
   RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DWARFVersion,
                           DebuggerTuning);
 
@@ -3203,13 +3285,21 @@
   if (!C.isForDiagnostics())
     CmdArgs.push_back("-disable-free");
 
-// Disable the verification pass in -asserts builds.
 #ifdef NDEBUG
-  CmdArgs.push_back("-disable-llvm-verifier");
-  // Discard LLVM value names in -asserts builds.
-  CmdArgs.push_back("-discard-value-names");
+  const bool IsAssertBuild = false;
+#else
+  const bool IsAssertBuild = true;
 #endif
 
+  // Disable the verification pass in -asserts builds.
+  if (!IsAssertBuild)
+    CmdArgs.push_back("-disable-llvm-verifier");
+
+  // Discard value names in assert builds unless otherwise specified.
+  if (Args.hasFlag(options::OPT_fdiscard_value_names,
+                   options::OPT_fno_discard_value_names, !IsAssertBuild))
+    CmdArgs.push_back("-discard-value-names");
+
   // Set the main file name, so that debug info works even with
   // -save-temps.
   CmdArgs.push_back("-main-file-name");
@@ -3364,6 +3454,9 @@
   if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
                     options::OPT_fno_optimize_sibling_calls))
     CmdArgs.push_back("-mdisable-tail-calls");
+  if (Args.hasFlag(options::OPT_fno_escaping_block_tail_calls,
+                   options::OPT_fescaping_block_tail_calls))
+    CmdArgs.push_back("-fno-escaping-block-tail-calls");
 
   Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
                   options::OPT_fno_fine_grained_bitfield_accesses);
@@ -3418,6 +3511,10 @@
     CmdArgs.push_back("-mpie-copy-relocations");
   }
 
+  if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) {
+    CmdArgs.push_back("-fno-plt");
+  }
+
   // -fhosted is default.
   // TODO: Audit uses of KernelOrKext and see where it'd be more appropriate to
   // use Freestanding.
@@ -3473,6 +3570,8 @@
   types::ID InputType = Input.getType();
   if (D.IsCLMode())
     AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);
+  else
+    EmitCodeView = Args.hasArg(options::OPT_gcodeview);
 
   const Arg *SplitDWARFArg = nullptr;
   RenderDebugOptions(getToolChain(), D, RawTriple, Args, EmitCodeView,
@@ -3552,9 +3651,17 @@
                     options::OPT_fno_unique_section_names, true))
     CmdArgs.push_back("-fno-unique-section-names");
 
-  Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
+  if (auto *A = Args.getLastArg(
+      options::OPT_finstrument_functions,
+      options::OPT_finstrument_functions_after_inlining,
+      options::OPT_finstrument_function_entry_bare))
+    A->render(Args, CmdArgs);
 
-  addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
+  // NVPTX doesn't support PGO or coverage. There's no runtime support for
+  // sampling, overhead of call arc collection is way too high and there's no
+  // way to collect the output.
+  if (!Triple.isNVPTX())
+    addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
   if (auto *ABICompatArg = Args.getLastArg(options::OPT_fclang_abi_compat_EQ))
     ABICompatArg->render(Args, CmdArgs);
@@ -3769,6 +3876,10 @@
     CmdArgs.push_back(A->getValue());
   }
 
+  if (Args.hasFlag(options::OPT_fstack_size_section,
+                   options::OPT_fno_stack_size_section, RawTriple.isPS4()))
+    CmdArgs.push_back("-fstack-size-section");
+
   CmdArgs.push_back("-ferror-limit");
   if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
     CmdArgs.push_back(A->getValue());
@@ -3829,13 +3940,9 @@
   Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
   Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
-  // Emulated TLS is enabled by default on Android and OpenBSD, and can be enabled
-  // manually with -femulated-tls.
-  bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isOSOpenBSD() ||
-                            Triple.isWindowsCygwinEnvironment();
-  if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
-                   EmulatedTLSDefault))
-    CmdArgs.push_back("-femulated-tls");
+  Args.AddLastArg(CmdArgs, options::OPT_femulated_tls,
+                  options::OPT_fno_emulated_tls);
+
   // AltiVec-like language extensions aren't relevant for assembling.
   if (!isa<PreprocessJobAction>(JA) || Output.getType() != types::TY_PP_Asm)
     Args.AddLastArg(CmdArgs, options::OPT_fzvector);
@@ -3862,6 +3969,11 @@
                         options::OPT_fnoopenmp_use_tls, /*Default=*/true))
         CmdArgs.push_back("-fnoopenmp-use-tls");
       Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
+
+      // When in OpenMP offloading mode with NVPTX target, forward
+      // cuda-mode flag
+      Args.AddLastArg(CmdArgs, options::OPT_fopenmp_cuda_mode,
+                      options::OPT_fno_openmp_cuda_mode);
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
@@ -3872,6 +3984,10 @@
       // semantic analysis, etc.
       break;
     }
+  } else {
+    Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
+                    options::OPT_fno_openmp_simd);
+    Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
   }
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
@@ -3951,6 +4067,10 @@
       CmdArgs.push_back("-mstack-probe-size=0");
   }
 
+  if (!Args.hasFlag(options::OPT_mstack_arg_probe,
+                    options::OPT_mno_stack_arg_probe, true))
+    CmdArgs.push_back(Args.MakeArgString("-mno-stack-arg-probe"));
+
   if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
                                options::OPT_mno_restrict_it)) {
     if (A->getOption().matches(options::OPT_mrestrict_it)) {
@@ -3971,6 +4091,11 @@
   // Forward -cl options to -cc1
   RenderOpenCLOptions(Args, CmdArgs);
 
+  if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
+    CmdArgs.push_back(
+        Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
+  }
+
   // Forward -f options with positive and negative forms; we translate
   // these by hand.
   if (Arg *A = getLastProfileSampleUseArg(Args)) {
@@ -4010,6 +4135,9 @@
     CmdArgs.push_back("-fcoroutines-ts");
   }
 
+  Args.AddLastArg(CmdArgs, options::OPT_fdouble_square_bracket_attributes,
+                  options::OPT_fno_double_square_bracket_attributes);
+
   bool HaveModules = false;
   RenderModulesOptions(C, D, Args, Input, Output, CmdArgs, HaveModules);
 
@@ -4157,9 +4285,33 @@
     addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, Runtime,
                      CmdArgs);
 
-  if (Args.hasArg(options::OPT_fsjlj_exceptions) ||
-      getToolChain().UseSjLjExceptions(Args))
-    CmdArgs.push_back("-fsjlj-exceptions");
+  // Handle exception personalities
+  Arg *A = Args.getLastArg(options::OPT_fsjlj_exceptions,
+                           options::OPT_fseh_exceptions,
+                           options::OPT_fdwarf_exceptions);
+  if (A) {
+    const Option &Opt = A->getOption();
+    if (Opt.matches(options::OPT_fsjlj_exceptions))
+      CmdArgs.push_back("-fsjlj-exceptions");
+    if (Opt.matches(options::OPT_fseh_exceptions))
+      CmdArgs.push_back("-fseh-exceptions");
+    if (Opt.matches(options::OPT_fdwarf_exceptions))
+      CmdArgs.push_back("-fdwarf-exceptions");
+  } else {
+    switch (getToolChain().GetExceptionModel(Args)) {
+    default:
+      break;
+    case llvm::ExceptionHandling::DwarfCFI:
+      CmdArgs.push_back("-fdwarf-exceptions");
+      break;
+    case llvm::ExceptionHandling::SjLj:
+      CmdArgs.push_back("-fsjlj-exceptions");
+      break;
+    case llvm::ExceptionHandling::WinEH:
+      CmdArgs.push_back("-fseh-exceptions");
+      break;
+    }
+  }
 
   // C++ "sane" operator new.
   if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
@@ -4300,6 +4452,8 @@
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
 
+  ParseMPreferVectorWidth(D, Args, CmdArgs);
+
   if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ))
     A->render(Args, CmdArgs);
 
@@ -4330,6 +4484,7 @@
     CmdArgs.push_back("-fapple-pragma-pack");
 
   if (Args.hasFlag(options::OPT_fsave_optimization_record,
+                   options::OPT_foptimization_record_file_EQ,
                    options::OPT_fno_save_optimization_record, false)) {
     CmdArgs.push_back("-opt-record-file");
 
@@ -4525,14 +4680,19 @@
     CmdArgs.push_back(Args.MakeArgString(Flags));
   }
 
-  // Host-side cuda compilation receives device-side outputs as Inputs[1...].
-  // Include them with -fcuda-include-gpubinary.
-  if (IsCuda && Inputs.size() > 1)
-    for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) {
+  if (IsCuda) {
+    // Host-side cuda compilation receives all device-side outputs in a single
+    // fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
+    if (Inputs.size() > 1) {
+      assert(Inputs.size() == 2 && "More than one GPU binary!");
       CmdArgs.push_back("-fcuda-include-gpubinary");
-      CmdArgs.push_back(I->getFilename());
+      CmdArgs.push_back(Inputs[1].getFilename());
     }
 
+    if (Args.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false))
+      CmdArgs.push_back("-fcuda-rdc");
+  }
+
   // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
   // to specify the result of the compile phase on the host, so the meaningful
   // device declarations can be identified. Also, -fopenmp-is-device is passed
@@ -4576,6 +4736,43 @@
     CmdArgs.push_back("-fwhole-program-vtables");
   }
 
+  if (Arg *A = Args.getLastArg(options::OPT_fexperimental_isel,
+                               options::OPT_fno_experimental_isel)) {
+    CmdArgs.push_back("-mllvm");
+    if (A->getOption().matches(options::OPT_fexperimental_isel)) {
+      CmdArgs.push_back("-global-isel=1");
+
+      // GISel is on by default on AArch64 -O0, so don't bother adding
+      // the fallback remarks for it. Other combinations will add a warning of
+      // some kind.
+      bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64;
+      bool IsOptLevelSupported = false;
+
+      Arg *A = Args.getLastArg(options::OPT_O_Group);
+      if (Triple.getArch() == llvm::Triple::aarch64) {
+        if (!A || A->getOption().matches(options::OPT_O0))
+          IsOptLevelSupported = true;
+      }
+      if (!IsArchSupported || !IsOptLevelSupported) {
+        CmdArgs.push_back("-mllvm");
+        CmdArgs.push_back("-global-isel-abort=2");
+
+        if (!IsArchSupported)
+          D.Diag(diag::warn_drv_experimental_isel_incomplete) << Triple.getArchName();
+        else
+          D.Diag(diag::warn_drv_experimental_isel_incomplete_opt);
+      }
+    } else {
+      CmdArgs.push_back("-global-isel=0");
+    }
+  }
+
+  if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128,
+                               options::OPT_fno_force_enable_int128)) {
+    if (A->getOption().matches(options::OPT_fforce_enable_int128))
+      CmdArgs.push_back("-fforce-enable-int128");
+  }
+
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
@@ -4873,7 +5070,9 @@
 
   // Both /showIncludes and /E (and /EP) write to stdout. Allowing both
   // would produce interleaved output, so ignore /showIncludes in such cases.
-  if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP))
+  if ((!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP)) ||
+      (Args.hasArg(options::OPT__SLASH_P) &&
+       Args.hasArg(options::OPT__SLASH_EP) && !Args.hasArg(options::OPT_E)))
     if (Arg *A = Args.getLastArg(options::OPT_show_includes))
       A->render(Args, CmdArgs);
 
@@ -4962,7 +5161,8 @@
   // Parse the default calling convention options.
   if (Arg *CCArg =
           Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
-                          options::OPT__SLASH_Gz, options::OPT__SLASH_Gv)) {
+                          options::OPT__SLASH_Gz, options::OPT__SLASH_Gv,
+                          options::OPT__SLASH_Gregcall)) {
     unsigned DCCOptId = CCArg->getOption().getID();
     const char *DCCFlag = nullptr;
     bool ArchSupported = true;
@@ -4983,6 +5183,10 @@
       ArchSupported = Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64;
       DCCFlag = "-fdefault-calling-conv=vectorcall";
       break;
+    case options::OPT__SLASH_Gregcall:
+      ArchSupported = Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64;
+      DCCFlag = "-fdefault-calling-conv=regcall";
+      break;
     }
 
     // MSVC doesn't warn if /Gr or /Gz is used on x64, so we don't either.
@@ -5000,6 +5204,10 @@
     else
       CmdArgs.push_back("msvc");
   }
+
+  if (Args.hasArg(options::OPT__SLASH_Guard) &&
+      Args.getLastArgValue(options::OPT__SLASH_Guard).equals_lower("cf"))
+    CmdArgs.push_back("-cfguard");
 }
 
 visualstudio::Compiler *Clang::getCLFallback() const {
@@ -5297,12 +5505,15 @@
     if (I)
       Triples += ',';
 
+    // Find ToolChain for this input.
     Action::OffloadKind CurKind = Action::OFK_Host;
     const ToolChain *CurTC = &getToolChain();
     const Action *CurDep = JA.getInputs()[I];
 
     if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
+      CurTC = nullptr;
       OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
+        assert(CurTC == nullptr && "Expected one dependence!");
         CurKind = A->getOffloadingDeviceKind();
         CurTC = TC;
       });
@@ -5323,7 +5534,17 @@
   for (unsigned I = 0; I < Inputs.size(); ++I) {
     if (I)
       UB += ',';
-    UB += Inputs[I].getFilename();
+
+    // Find ToolChain for this input.
+    const ToolChain *CurTC = &getToolChain();
+    if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
+      CurTC = nullptr;
+      OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
+        assert(CurTC == nullptr && "Expected one dependence!");
+        CurTC = TC;
+      });
+    }
+    UB += CurTC->getInputFilename(Inputs[I]);
   }
   CmdArgs.push_back(TCArgs.MakeArgString(UB));
 
@@ -5383,13 +5604,7 @@
   for (unsigned I = 0; I < Outputs.size(); ++I) {
     if (I)
       UB += ',';
-    SmallString<256> OutputFileName(Outputs[I].getFilename());
-    // Change extension of target files for OpenMP offloading
-    // to NVIDIA GPUs.
-    if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
-        JA.isOffloading(Action::OFK_OpenMP))
-      llvm::sys::path::replace_extension(OutputFileName, "cubin");
-    UB += OutputFileName;
+    UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
   }
   CmdArgs.push_back(TCArgs.MakeArgString(UB));
   CmdArgs.push_back("-unbundle");
diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h
index e23822b..8893d88 100644
--- a/lib/Driver/ToolChains/Clang.h
+++ b/lib/Driver/ToolChains/Clang.h
@@ -60,6 +60,8 @@
                         llvm::opt::ArgStringList &CmdArgs) const;
   void AddR600TargetArgs(const llvm::opt::ArgList &Args,
                          llvm::opt::ArgStringList &CmdArgs) const;
+  void AddRISCVTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
   void AddSparcTargetArgs(const llvm::opt::ArgList &Args,
                           llvm::opt::ArgStringList &CmdArgs) const;
   void AddSystemZTargetArgs(const llvm::opt::ArgList &Args,
diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp
index dc50c4f..17ab82e 100644
--- a/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/lib/Driver/ToolChains/CommonArgs.cpp
@@ -363,12 +363,9 @@
   return Parallelism;
 }
 
-// CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by
-// default.
+// CloudABI uses -ffunction-sections and -fdata-sections by default.
 bool tools::isUseSeparateSections(const llvm::Triple &Triple) {
-  return Triple.getOS() == llvm::Triple::CloudABI ||
-         Triple.getArch() == llvm::Triple::wasm32 ||
-         Triple.getArch() == llvm::Triple::wasm64;
+  return Triple.getOS() == llvm::Triple::CloudABI;
 }
 
 void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
@@ -419,8 +416,8 @@
     CmdArgs.push_back("-plugin-opt=thinlto");
 
   if (unsigned Parallelism = getLTOParallelism(Args, D))
-    CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") +
-                                         llvm::to_string(Parallelism)));
+    CmdArgs.push_back(
+        Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));
 
   // If an explicit debugger tuning argument appeared, pass it along.
   if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
@@ -511,9 +508,9 @@
                                 bool IsShared, bool IsWhole) {
   // Wrap any static runtimes that must be forced into executable in
   // whole-archive.
-  if (IsWhole) CmdArgs.push_back("-whole-archive");
+  if (IsWhole) CmdArgs.push_back("--whole-archive");
   CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared));
-  if (IsWhole) CmdArgs.push_back("-no-whole-archive");
+  if (IsWhole) CmdArgs.push_back("--no-whole-archive");
 
   if (IsShared) {
     addArchSpecificRPath(TC, Args, CmdArgs);
@@ -525,6 +522,10 @@
 static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     StringRef Sanitizer) {
+  // Solaris ld defaults to --export-dynamic behaviour but doesn't support
+  // the option, so don't try to pass it.
+  if (TC.getTriple().getOS() == llvm::Triple::Solaris)
+    return true;
   SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
   if (llvm::sys::fs::exists(SanRT + ".syms")) {
     CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
@@ -541,14 +542,20 @@
   // There's no libpthread or librt on RTEMS.
   if (TC.getTriple().getOS() != llvm::Triple::RTEMS) {
     CmdArgs.push_back("-lpthread");
-    CmdArgs.push_back("-lrt");
+    if (TC.getTriple().getOS() != llvm::Triple::OpenBSD)
+      CmdArgs.push_back("-lrt");
   }
   CmdArgs.push_back("-lm");
-  // There's no libdl on FreeBSD or RTEMS.
+  // There's no libdl on all OSes.
   if (TC.getTriple().getOS() != llvm::Triple::FreeBSD &&
       TC.getTriple().getOS() != llvm::Triple::NetBSD &&
+      TC.getTriple().getOS() != llvm::Triple::OpenBSD &&
       TC.getTriple().getOS() != llvm::Triple::RTEMS)
     CmdArgs.push_back("-ldl");
+  // Required for backtrace on some OSes
+  if (TC.getTriple().getOS() == llvm::Triple::NetBSD ||
+      TC.getTriple().getOS() == llvm::Triple::FreeBSD)
+    CmdArgs.push_back("-lexecinfo");
 }
 
 static void
@@ -566,7 +573,6 @@
       if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
         HelperStaticRuntimes.push_back("asan-preinit");
     }
-
     if (SanArgs.needsUbsanRt()) {
       if (SanArgs.requiresMinimalRuntime()) {
         SharedRuntimes.push_back("ubsan_minimal");
@@ -574,6 +580,10 @@
         SharedRuntimes.push_back("ubsan_standalone");
       }
     }
+    if (SanArgs.needsScudoRt())
+      SharedRuntimes.push_back("scudo");
+    if (SanArgs.needsHwasanRt())
+      SharedRuntimes.push_back("hwasan");
   }
 
   // The stats_client library is also statically linked into DSOs.
@@ -590,6 +600,12 @@
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("asan_cxx");
   }
+
+  if (SanArgs.needsHwasanRt()) {
+    StaticRuntimes.push_back("hwasan");
+    if (SanArgs.linkCXXRuntimes())
+      StaticRuntimes.push_back("hwasan_cxx");
+  }
   if (SanArgs.needsDfsanRt())
     StaticRuntimes.push_back("dfsan");
   if (SanArgs.needsLsanRt())
@@ -630,6 +646,11 @@
   }
   if (SanArgs.needsEsanRt())
     StaticRuntimes.push_back("esan");
+  if (SanArgs.needsScudoRt()) {
+    StaticRuntimes.push_back("scudo");
+    if (SanArgs.linkCXXRuntimes())
+      StaticRuntimes.push_back("scudo_cxx");
+  }
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
@@ -671,7 +692,7 @@
   // If there is a static runtime with no dynamic list, force all the symbols
   // to be dynamic to be sure we export sanitizer interface functions.
   if (AddExportDynamic)
-    CmdArgs.push_back("-export-dynamic");
+    CmdArgs.push_back("--export-dynamic");
 
   const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
   if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic)
@@ -719,7 +740,8 @@
   ExtractArgs.push_back(Output.getFilename());
   ExtractArgs.push_back(OutFile);
 
-  const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy"));
+  const char *Exec =
+      Args.MakeArgString(TC.GetProgramPath(CLANG_DEFAULT_OBJCOPY));
   InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());
 
   // First extract the dwo sections.
@@ -838,6 +860,10 @@
     }
   }
 
+  // AMDGPU-specific defaults for PIC.
+  if (Triple.getArch() == llvm::Triple::amdgcn)
+    PIC = true;
+
   // The last argument relating to either PIC or PIE wins, and no
   // other argument is used. If the last argument is any flavor of the
   // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
@@ -1027,15 +1053,7 @@
 
   switch (RLT) {
   case ToolChain::RLT_CompilerRT:
-    switch (TC.getTriple().getOS()) {
-    default:
-      llvm_unreachable("unsupported OS");
-    case llvm::Triple::Win32:
-    case llvm::Triple::Linux:
-    case llvm::Triple::Fuchsia:
-      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
-      break;
-    }
+    CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
     break;
   case ToolChain::RLT_Libgcc:
     // Make sure libgcc is not used under MSVC environment by default
diff --git a/lib/Driver/ToolChains/CrossWindows.cpp b/lib/Driver/ToolChains/CrossWindows.cpp
index 5049033..6ca04a8 100644
--- a/lib/Driver/ToolChains/CrossWindows.cpp
+++ b/lib/Driver/ToolChains/CrossWindows.cpp
@@ -127,7 +127,8 @@
     }
 
     CmdArgs.push_back("-shared");
-    CmdArgs.push_back("-Bdynamic");
+    CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic"
+                                                       : "-Bdynamic");
 
     CmdArgs.push_back("--enable-auto-image-base");
 
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 4f740fc..86a1184 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -8,17 +8,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "Cuda.h"
-#include "InputInfo.h"
 #include "CommonArgs.h"
+#include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
-#include "clang/Config/config.h"
 #include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
+#include "clang/Driver/Distro.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
 #include <system_error>
 
 using namespace clang::driver;
@@ -51,6 +54,8 @@
     return CudaVersion::CUDA_80;
   if (Major == 9 && Minor == 0)
     return CudaVersion::CUDA_90;
+  if (Major == 9 && Minor == 1)
+    return CudaVersion::CUDA_91;
   return CudaVersion::UNKNOWN;
 }
 
@@ -58,37 +63,75 @@
     const Driver &D, const llvm::Triple &HostTriple,
     const llvm::opt::ArgList &Args)
     : D(D) {
-  SmallVector<std::string, 4> CudaPathCandidates;
+  struct Candidate {
+    std::string Path;
+    bool StrictChecking;
+
+    Candidate(std::string Path, bool StrictChecking = false)
+        : Path(Path), StrictChecking(StrictChecking) {}
+  };
+  SmallVector<Candidate, 4> Candidates;
 
   // In decreasing order so we prefer newer versions to older versions.
   std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
 
   if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
-    CudaPathCandidates.push_back(
-        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
+    Candidates.emplace_back(
+        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
   } else if (HostTriple.isOSWindows()) {
     for (const char *Ver : Versions)
-      CudaPathCandidates.push_back(
+      Candidates.emplace_back(
           D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
           Ver);
   } else {
-    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
+    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
+      // Try to find ptxas binary. If the executable is located in a directory
+      // called 'bin/', its parent directory might be a good guess for a valid
+      // CUDA installation.
+      // However, some distributions might installs 'ptxas' to /usr/bin. In that
+      // case the candidate would be '/usr' which passes the following checks
+      // because '/usr/include' exists as well. To avoid this case, we always
+      // check for the directory potentially containing files for libdevice,
+      // even if the user passes -nocudalib.
+      if (llvm::ErrorOr<std::string> ptxas =
+              llvm::sys::findProgramByName("ptxas")) {
+        SmallString<256> ptxasAbsolutePath;
+        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
+
+        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
+        if (llvm::sys::path::filename(ptxasDir) == "bin")
+          Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
+                                  /*StrictChecking=*/true);
+      }
+    }
+
+    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
     for (const char *Ver : Versions)
-      CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
+      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
+
+    if (Distro(D.getVFS()).IsDebian())
+      // Special case for Debian to have nvidia-cuda-toolkit work
+      // out of the box. More info on http://bugs.debian.org/882505
+      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
   }
 
-  for (const auto &CudaPath : CudaPathCandidates) {
-    if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
+  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
+
+  for (const auto &Candidate : Candidates) {
+    InstallPath = Candidate.Path;
+    if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
       continue;
 
-    InstallPath = CudaPath;
-    BinPath = CudaPath + "/bin";
+    BinPath = InstallPath + "/bin";
     IncludePath = InstallPath + "/include";
     LibDevicePath = InstallPath + "/nvvm/libdevice";
 
     auto &FS = D.getVFS();
     if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
       continue;
+    bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
+    if (CheckLibDevice && !FS.exists(LibDevicePath))
+      continue;
 
     // On Linux, we have both lib and lib64 directories, and we need to choose
     // based on our triple.  On MacOS, we have only a lib directory.
@@ -113,14 +156,18 @@
       Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
     }
 
-    if (Version == CudaVersion::CUDA_90) {
-      // CUDA-9 uses single libdevice file for all GPU variants.
+    if (Version >= CudaVersion::CUDA_90) {
+      // CUDA-9+ uses single libdevice file for all GPU variants.
       std::string FilePath = LibDevicePath + "/libdevice.10.bc";
       if (FS.exists(FilePath)) {
-        for (const char *GpuArch :
+        for (const char *GpuArchName :
              {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
-              "sm_60", "sm_61", "sm_62", "sm_70"})
-          LibDeviceMap[GpuArch] = FilePath;
+                   "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
+          const CudaArch GpuArch = StringToCudaArch(GpuArchName);
+          if (Version >= MinVersionForCudaArch(GpuArch) &&
+              Version <= MaxVersionForCudaArch(GpuArch))
+            LibDeviceMap[GpuArchName] = FilePath;
+        }
       }
     } else {
       std::error_code EC;
@@ -168,7 +215,7 @@
 
     // Check that we have found at least one libdevice that we can link in if
     // -nocudalib hasn't been specified.
-    if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
+    if (LibDeviceMap.empty() && !NoCudaLib)
       continue;
 
     IsValid = true;
@@ -205,15 +252,17 @@
 void CudaInstallationDetector::CheckCudaVersionSupportsArch(
     CudaArch Arch) const {
   if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
-      ArchsWithVersionTooLowErrors.count(Arch) > 0)
+      ArchsWithBadVersion.count(Arch) > 0)
     return;
 
-  auto RequiredVersion = MinVersionForCudaArch(Arch);
-  if (Version < RequiredVersion) {
-    ArchsWithVersionTooLowErrors.insert(Arch);
-    D.Diag(diag::err_drv_cuda_version_too_low)
-        << InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
-        << CudaVersionToString(RequiredVersion);
+  auto MinVersion = MinVersionForCudaArch(Arch);
+  auto MaxVersion = MaxVersionForCudaArch(Arch);
+  if (Version < MinVersion || Version > MaxVersion) {
+    ArchsWithBadVersion.insert(Arch);
+    D.Diag(diag::err_drv_cuda_version_unsupported)
+        << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
+        << CudaVersionToString(MaxVersion) << InstallPath
+        << CudaVersionToString(Version);
   }
 }
 
@@ -299,21 +348,24 @@
   CmdArgs.push_back("--gpu-name");
   CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
   CmdArgs.push_back("--output-file");
-  SmallString<256> OutputFileName(Output.getFilename());
-  if (JA.isOffloading(Action::OFK_OpenMP))
-    llvm::sys::path::replace_extension(OutputFileName, "cubin");
-  CmdArgs.push_back(Args.MakeArgString(OutputFileName));
+  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
   for (const auto& II : Inputs)
     CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
 
   for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
     CmdArgs.push_back(Args.MakeArgString(A));
 
-  // In OpenMP we need to generate relocatable code.
-  if (JA.isOffloading(Action::OFK_OpenMP) &&
-      Args.hasFlag(options::OPT_fopenmp_relocatable_target,
-                   options::OPT_fnoopenmp_relocatable_target,
-                   /*Default=*/ true))
+  bool Relocatable = false;
+  if (JA.isOffloading(Action::OFK_OpenMP))
+    // In OpenMP we need to generate relocatable code.
+    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
+                               options::OPT_fnoopenmp_relocatable_target,
+                               /*Default=*/true);
+  else if (JA.isOffloading(Action::OFK_Cuda))
+    Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
+                               options::OPT_fno_cuda_rdc, /*Default=*/false);
+
+  if (Relocatable)
     CmdArgs.push_back("-c");
 
   const char *Exec;
@@ -429,11 +481,8 @@
     if (!II.isFilename())
       continue;
 
-    SmallString<256> Name(II.getFilename());
-    llvm::sys::path::replace_extension(Name, "cubin");
-
-    const char *CubinF =
-        C.addTempFile(C.getArgs().MakeArgString(Name));
+    const char *CubinF = C.addTempFile(
+        C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
 
     CmdArgs.push_back(CubinF);
   }
@@ -461,6 +510,20 @@
   getProgramPaths().push_back(getDriver().Dir);
 }
 
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+  // Only object files are changed, for example assembly files keep their .s
+  // extensions. CUDA also continues to use .o as they don't use nvlink but
+  // fatbinary.
+  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
+    return ToolChain::getInputFilename(Input);
+
+  // Replace extension for object files with cubin because nvlink relies on
+  // these particular file names.
+  SmallString<256> Filename(ToolChain::getInputFilename(Input));
+  llvm::sys::path::replace_extension(Filename, "cubin");
+  return Filename.str();
+}
+
 void CudaToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,
@@ -483,6 +546,10 @@
     if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
                            options::OPT_fno_cuda_approx_transcendentals, false))
       CC1Args.push_back("-fcuda-approx-transcendentals");
+
+    if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
+                           false))
+      CC1Args.push_back("-fcuda-rdc");
   }
 
   if (DriverArgs.hasArg(options::OPT_nocudalib))
diff --git a/lib/Driver/ToolChains/Cuda.h b/lib/Driver/ToolChains/Cuda.h
index 1e30aa7..3d08cec 100644
--- a/lib/Driver/ToolChains/Cuda.h
+++ b/lib/Driver/ToolChains/Cuda.h
@@ -40,7 +40,7 @@
 
   // CUDA architectures for which we have raised an error in
   // CheckCudaVersionSupportsArch.
-  mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
+  mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
 
 public:
   CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
@@ -137,10 +137,12 @@
                 const ToolChain &HostTC, const llvm::opt::ArgList &Args,
                 const Action::OffloadKind OK);
 
-  virtual const llvm::Triple *getAuxTriple() const override {
+  const llvm::Triple *getAuxTriple() const override {
     return &HostTC.getTriple();
   }
 
+  std::string getInputFilename(const InputInfo &Input) const override;
+
   llvm::opt::DerivedArgList *
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
                 Action::OffloadKind DeviceOffloadKind) const override;
@@ -156,7 +158,7 @@
   bool isPIEDefault() const override { return false; }
   bool isPICDefaultForced() const override { return false; }
   bool SupportsProfiling() const override { return false; }
-  bool SupportsObjCGC() const override { return false; }
+  bool IsMathErrnoDefault() const override { return false; }
 
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
diff --git a/lib/Driver/ToolChains/Darwin.cpp b/lib/Driver/ToolChains/Darwin.cpp
index 39c4525..b6ee4de 100644
--- a/lib/Driver/ToolChains/Darwin.cpp
+++ b/lib/Driver/ToolChains/Darwin.cpp
@@ -452,7 +452,8 @@
   // we follow suite for ease of comparison.
   AddLinkArgs(C, Args, CmdArgs, Inputs);
 
-  // For LTO, pass the name of the optimization record file.
+  // For LTO, pass the name of the optimization record file and other
+  // opt-remarks flags.
   if (Args.hasFlag(options::OPT_fsave_optimization_record,
                    options::OPT_fno_save_optimization_record, false)) {
     CmdArgs.push_back("-mllvm");
@@ -467,6 +468,14 @@
     if (getLastProfileUseArg(Args)) {
       CmdArgs.push_back("-mllvm");
       CmdArgs.push_back("-lto-pass-remarks-with-hotness");
+
+      if (const Arg *A =
+              Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) {
+        CmdArgs.push_back("-mllvm");
+        std::string Opt =
+            std::string("-lto-pass-remarks-hotness-threshold=") + A->getValue();
+        CmdArgs.push_back(Args.MakeArgString(Opt));
+      }
     }
   }
 
@@ -545,8 +554,7 @@
   if (unsigned Parallelism =
           getLTOParallelism(Args, getToolChain().getDriver())) {
     CmdArgs.push_back("-mllvm");
-    CmdArgs.push_back(
-        Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism)));
+    CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism)));
   }
 
   if (getToolChain().ShouldLinkCXXStdlib(Args))
@@ -941,13 +949,10 @@
     case DarwinPlatformKind::MacOS:
       return "MacOSX";
     case DarwinPlatformKind::IPhoneOS:
-    case DarwinPlatformKind::IPhoneOSSimulator:
       return "iPhone";
     case DarwinPlatformKind::TvOS:
-    case DarwinPlatformKind::TvOSSimulator:
       return "AppleTV";
     case DarwinPlatformKind::WatchOS:
-    case DarwinPlatformKind::WatchOSSimulator:
       return "Watch";
   }
   llvm_unreachable("Unsupported platform");
@@ -971,17 +976,11 @@
   case DarwinPlatformKind::MacOS:
     return "osx";
   case DarwinPlatformKind::IPhoneOS:
-    return "ios";
-  case DarwinPlatformKind::IPhoneOSSimulator:
-    return "iossim";
+    return TargetEnvironment == NativeEnvironment ? "ios" : "iossim";
   case DarwinPlatformKind::TvOS:
-    return "tvos";
-  case DarwinPlatformKind::TvOSSimulator:
-    return "tvossim";
+    return TargetEnvironment == NativeEnvironment ? "tvos" : "tvossim";
   case DarwinPlatformKind::WatchOS:
-    return "watchos";
-  case DarwinPlatformKind::WatchOSSimulator:
-    return "watchossim";
+    return TargetEnvironment == NativeEnvironment ? "watchos" : "watchossim";
   }
   llvm_unreachable("Unsupported platform");
 }
@@ -1172,6 +1171,351 @@
   return MacOSSDKVersion;
 }
 
+namespace {
+
+/// The Darwin OS that was selected or inferred from arguments / environment.
+struct DarwinPlatform {
+  enum SourceKind {
+    /// The OS was specified using the -target argument.
+    TargetArg,
+    /// The OS was specified using the -m<os>-version-min argument.
+    OSVersionArg,
+    /// The OS was specified using the OS_DEPLOYMENT_TARGET environment.
+    DeploymentTargetEnv,
+    /// The OS was inferred from the SDK.
+    InferredFromSDK,
+    /// The OS was inferred from the -arch.
+    InferredFromArch
+  };
+
+  using DarwinPlatformKind = Darwin::DarwinPlatformKind;
+  using DarwinEnvironmentKind = Darwin::DarwinEnvironmentKind;
+
+  DarwinPlatformKind getPlatform() const { return Platform; }
+
+  DarwinEnvironmentKind getEnvironment() const { return Environment; }
+
+  StringRef getOSVersion() const {
+    if (Kind == OSVersionArg)
+      return Argument->getValue();
+    return OSVersion;
+  }
+
+  void setOSVersion(StringRef S) {
+    assert(Kind == TargetArg && "Unexpected kind!");
+    OSVersion = S;
+  }
+
+  bool hasOSVersion() const { return HasOSVersion; }
+
+  /// Returns true if the target OS was explicitly specified.
+  bool isExplicitlySpecified() const { return Kind <= DeploymentTargetEnv; }
+
+  /// Adds the -m<os>-version-min argument to the compiler invocation.
+  void addOSVersionMinArgument(DerivedArgList &Args, const OptTable &Opts) {
+    if (Argument)
+      return;
+    assert(Kind != TargetArg && Kind != OSVersionArg && "Invalid kind");
+    options::ID Opt;
+    switch (Platform) {
+    case DarwinPlatformKind::MacOS:
+      Opt = options::OPT_mmacosx_version_min_EQ;
+      break;
+    case DarwinPlatformKind::IPhoneOS:
+      Opt = options::OPT_miphoneos_version_min_EQ;
+      break;
+    case DarwinPlatformKind::TvOS:
+      Opt = options::OPT_mtvos_version_min_EQ;
+      break;
+    case DarwinPlatformKind::WatchOS:
+      Opt = options::OPT_mwatchos_version_min_EQ;
+      break;
+    }
+    Argument = Args.MakeJoinedArg(nullptr, Opts.getOption(Opt), OSVersion);
+    Args.append(Argument);
+  }
+
+  /// Returns the OS version with the argument / environment variable that
+  /// specified it.
+  std::string getAsString(DerivedArgList &Args, const OptTable &Opts) {
+    switch (Kind) {
+    case TargetArg:
+    case OSVersionArg:
+    case InferredFromSDK:
+    case InferredFromArch:
+      assert(Argument && "OS version argument not yet inferred");
+      return Argument->getAsString(Args);
+    case DeploymentTargetEnv:
+      return (llvm::Twine(EnvVarName) + "=" + OSVersion).str();
+    }
+    llvm_unreachable("Unsupported Darwin Source Kind");
+  }
+
+  static DarwinPlatform createFromTarget(const llvm::Triple &TT,
+                                         StringRef OSVersion, Arg *A) {
+    DarwinPlatform Result(TargetArg, getPlatformFromOS(TT.getOS()), OSVersion,
+                          A);
+    switch (TT.getEnvironment()) {
+    case llvm::Triple::Simulator:
+      Result.Environment = DarwinEnvironmentKind::Simulator;
+      break;
+    default:
+      break;
+    }
+    unsigned Major, Minor, Micro;
+    TT.getOSVersion(Major, Minor, Micro);
+    if (Major == 0)
+      Result.HasOSVersion = false;
+    return Result;
+  }
+  static DarwinPlatform createOSVersionArg(DarwinPlatformKind Platform,
+                                           Arg *A) {
+    return DarwinPlatform(OSVersionArg, Platform, A);
+  }
+  static DarwinPlatform createDeploymentTargetEnv(DarwinPlatformKind Platform,
+                                                  StringRef EnvVarName,
+                                                  StringRef Value) {
+    DarwinPlatform Result(DeploymentTargetEnv, Platform, Value);
+    Result.EnvVarName = EnvVarName;
+    return Result;
+  }
+  static DarwinPlatform createFromSDK(DarwinPlatformKind Platform,
+                                      StringRef Value) {
+    return DarwinPlatform(InferredFromSDK, Platform, Value);
+  }
+  static DarwinPlatform createFromArch(llvm::Triple::OSType OS,
+                                       StringRef Value) {
+    return DarwinPlatform(InferredFromArch, getPlatformFromOS(OS), Value);
+  }
+
+private:
+  DarwinPlatform(SourceKind Kind, DarwinPlatformKind Platform, Arg *Argument)
+      : Kind(Kind), Platform(Platform), Argument(Argument) {}
+  DarwinPlatform(SourceKind Kind, DarwinPlatformKind Platform, StringRef Value,
+                 Arg *Argument = nullptr)
+      : Kind(Kind), Platform(Platform), OSVersion(Value), Argument(Argument) {}
+
+  static DarwinPlatformKind getPlatformFromOS(llvm::Triple::OSType OS) {
+    switch (OS) {
+    case llvm::Triple::Darwin:
+    case llvm::Triple::MacOSX:
+      return DarwinPlatformKind::MacOS;
+    case llvm::Triple::IOS:
+      return DarwinPlatformKind::IPhoneOS;
+    case llvm::Triple::TvOS:
+      return DarwinPlatformKind::TvOS;
+    case llvm::Triple::WatchOS:
+      return DarwinPlatformKind::WatchOS;
+    default:
+      llvm_unreachable("Unable to infer Darwin variant");
+    }
+  }
+
+  SourceKind Kind;
+  DarwinPlatformKind Platform;
+  DarwinEnvironmentKind Environment = DarwinEnvironmentKind::NativeEnvironment;
+  std::string OSVersion;
+  bool HasOSVersion = true;
+  Arg *Argument;
+  StringRef EnvVarName;
+};
+
+/// Returns the deployment target that's specified using the -m<os>-version-min
+/// argument.
+Optional<DarwinPlatform>
+getDeploymentTargetFromOSVersionArg(DerivedArgList &Args,
+                                    const Driver &TheDriver) {
+  Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
+  Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ,
+                                    options::OPT_mios_simulator_version_min_EQ);
+  Arg *TvOSVersion =
+      Args.getLastArg(options::OPT_mtvos_version_min_EQ,
+                      options::OPT_mtvos_simulator_version_min_EQ);
+  Arg *WatchOSVersion =
+      Args.getLastArg(options::OPT_mwatchos_version_min_EQ,
+                      options::OPT_mwatchos_simulator_version_min_EQ);
+  if (OSXVersion) {
+    if (iOSVersion || TvOSVersion || WatchOSVersion) {
+      TheDriver.Diag(diag::err_drv_argument_not_allowed_with)
+          << OSXVersion->getAsString(Args)
+          << (iOSVersion ? iOSVersion
+                         : TvOSVersion ? TvOSVersion : WatchOSVersion)
+                 ->getAsString(Args);
+    }
+    return DarwinPlatform::createOSVersionArg(Darwin::MacOS, OSXVersion);
+  } else if (iOSVersion) {
+    if (TvOSVersion || WatchOSVersion) {
+      TheDriver.Diag(diag::err_drv_argument_not_allowed_with)
+          << iOSVersion->getAsString(Args)
+          << (TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
+    }
+    return DarwinPlatform::createOSVersionArg(Darwin::IPhoneOS, iOSVersion);
+  } else if (TvOSVersion) {
+    if (WatchOSVersion) {
+      TheDriver.Diag(diag::err_drv_argument_not_allowed_with)
+          << TvOSVersion->getAsString(Args)
+          << WatchOSVersion->getAsString(Args);
+    }
+    return DarwinPlatform::createOSVersionArg(Darwin::TvOS, TvOSVersion);
+  } else if (WatchOSVersion)
+    return DarwinPlatform::createOSVersionArg(Darwin::WatchOS, WatchOSVersion);
+  return None;
+}
+
+/// Returns the deployment target that's specified using the
+/// OS_DEPLOYMENT_TARGET environment variable.
+Optional<DarwinPlatform>
+getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver,
+                                            const llvm::Triple &Triple) {
+  std::string Targets[Darwin::LastDarwinPlatform + 1];
+  const char *EnvVars[] = {
+      "MACOSX_DEPLOYMENT_TARGET",
+      "IPHONEOS_DEPLOYMENT_TARGET",
+      "TVOS_DEPLOYMENT_TARGET",
+      "WATCHOS_DEPLOYMENT_TARGET",
+  };
+  static_assert(llvm::array_lengthof(EnvVars) == Darwin::LastDarwinPlatform + 1,
+                "Missing platform");
+  for (const auto &I : llvm::enumerate(llvm::makeArrayRef(EnvVars))) {
+    if (char *Env = ::getenv(I.value()))
+      Targets[I.index()] = Env;
+  }
+
+  // Do not allow conflicts with the watchOS target.
+  if (!Targets[Darwin::WatchOS].empty() &&
+      (!Targets[Darwin::IPhoneOS].empty() || !Targets[Darwin::TvOS].empty())) {
+    TheDriver.Diag(diag::err_drv_conflicting_deployment_targets)
+        << "WATCHOS_DEPLOYMENT_TARGET"
+        << (!Targets[Darwin::IPhoneOS].empty() ? "IPHONEOS_DEPLOYMENT_TARGET"
+                                               : "TVOS_DEPLOYMENT_TARGET");
+  }
+
+  // Do not allow conflicts with the tvOS target.
+  if (!Targets[Darwin::TvOS].empty() && !Targets[Darwin::IPhoneOS].empty()) {
+    TheDriver.Diag(diag::err_drv_conflicting_deployment_targets)
+        << "TVOS_DEPLOYMENT_TARGET"
+        << "IPHONEOS_DEPLOYMENT_TARGET";
+  }
+
+  // Allow conflicts among OSX and iOS for historical reasons, but choose the
+  // default platform.
+  if (!Targets[Darwin::MacOS].empty() &&
+      (!Targets[Darwin::IPhoneOS].empty() ||
+       !Targets[Darwin::WatchOS].empty() || !Targets[Darwin::TvOS].empty())) {
+    if (Triple.getArch() == llvm::Triple::arm ||
+        Triple.getArch() == llvm::Triple::aarch64 ||
+        Triple.getArch() == llvm::Triple::thumb)
+      Targets[Darwin::MacOS] = "";
+    else
+      Targets[Darwin::IPhoneOS] = Targets[Darwin::WatchOS] =
+          Targets[Darwin::TvOS] = "";
+  }
+
+  for (const auto &Target : llvm::enumerate(llvm::makeArrayRef(Targets))) {
+    if (!Target.value().empty())
+      return DarwinPlatform::createDeploymentTargetEnv(
+          (Darwin::DarwinPlatformKind)Target.index(), EnvVars[Target.index()],
+          Target.value());
+  }
+  return None;
+}
+
+/// Tries to infer the deployment target from the SDK specified by -isysroot
+/// (or SDKROOT).
+Optional<DarwinPlatform> inferDeploymentTargetFromSDK(DerivedArgList &Args) {
+  const Arg *A = Args.getLastArg(options::OPT_isysroot);
+  if (!A)
+    return None;
+  StringRef isysroot = A->getValue();
+  StringRef SDK = Darwin::getSDKName(isysroot);
+  if (!SDK.size())
+    return None;
+  // Slice the version number out.
+  // Version number is between the first and the last number.
+  size_t StartVer = SDK.find_first_of("0123456789");
+  size_t EndVer = SDK.find_last_of("0123456789");
+  if (StartVer != StringRef::npos && EndVer > StartVer) {
+    StringRef Version = SDK.slice(StartVer, EndVer + 1);
+    if (SDK.startswith("iPhoneOS") || SDK.startswith("iPhoneSimulator"))
+      return DarwinPlatform::createFromSDK(Darwin::IPhoneOS, Version);
+    else if (SDK.startswith("MacOSX"))
+      return DarwinPlatform::createFromSDK(Darwin::MacOS,
+                                           getSystemOrSDKMacOSVersion(Version));
+    else if (SDK.startswith("WatchOS") || SDK.startswith("WatchSimulator"))
+      return DarwinPlatform::createFromSDK(Darwin::WatchOS, Version);
+    else if (SDK.startswith("AppleTVOS") || SDK.startswith("AppleTVSimulator"))
+      return DarwinPlatform::createFromSDK(Darwin::TvOS, Version);
+  }
+  return None;
+}
+
+std::string getOSVersion(llvm::Triple::OSType OS, const llvm::Triple &Triple,
+                         const Driver &TheDriver) {
+  unsigned Major, Minor, Micro;
+  switch (OS) {
+  case llvm::Triple::Darwin:
+  case llvm::Triple::MacOSX:
+    if (!Triple.getMacOSXVersion(Major, Minor, Micro))
+      TheDriver.Diag(diag::err_drv_invalid_darwin_version)
+          << Triple.getOSName();
+    break;
+  case llvm::Triple::IOS:
+    Triple.getiOSVersion(Major, Minor, Micro);
+    break;
+  case llvm::Triple::TvOS:
+    Triple.getOSVersion(Major, Minor, Micro);
+    break;
+  case llvm::Triple::WatchOS:
+    Triple.getWatchOSVersion(Major, Minor, Micro);
+    break;
+  default:
+    llvm_unreachable("Unexpected OS type");
+    break;
+  }
+
+  std::string OSVersion;
+  llvm::raw_string_ostream(OSVersion) << Major << '.' << Minor << '.' << Micro;
+  return OSVersion;
+}
+
+/// Tries to infer the target OS from the -arch.
+Optional<DarwinPlatform>
+inferDeploymentTargetFromArch(DerivedArgList &Args, const Darwin &Toolchain,
+                              const llvm::Triple &Triple,
+                              const Driver &TheDriver) {
+  llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS;
+
+  StringRef MachOArchName = Toolchain.getMachOArchName(Args);
+  if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
+      MachOArchName == "arm64")
+    OSTy = llvm::Triple::IOS;
+  else if (MachOArchName == "armv7k")
+    OSTy = llvm::Triple::WatchOS;
+  else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
+           MachOArchName != "armv7em")
+    OSTy = llvm::Triple::MacOSX;
+
+  if (OSTy == llvm::Triple::UnknownOS)
+    return None;
+  return DarwinPlatform::createFromArch(OSTy,
+                                        getOSVersion(OSTy, Triple, TheDriver));
+}
+
+/// Returns the deployment target that's specified using the -target option.
+Optional<DarwinPlatform> getDeploymentTargetFromTargetArg(
+    DerivedArgList &Args, const llvm::Triple &Triple, const Driver &TheDriver) {
+  if (!Args.hasArg(options::OPT_target))
+    return None;
+  if (Triple.getOS() == llvm::Triple::Darwin ||
+      Triple.getOS() == llvm::Triple::UnknownOS)
+    return None;
+  std::string OSVersion = getOSVersion(Triple.getOS(), Triple, TheDriver);
+  return DarwinPlatform::createFromTarget(Triple, OSVersion,
+                                          Args.getLastArg(options::OPT_target));
+}
+
+} // namespace
+
 void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   const OptTable &Opts = getDriver().getOpts();
 
@@ -1194,241 +1538,89 @@
     }
   }
 
-  Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
-  Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ,
-                                    options::OPT_mios_simulator_version_min_EQ);
-  Arg *TvOSVersion =
-      Args.getLastArg(options::OPT_mtvos_version_min_EQ,
-                      options::OPT_mtvos_simulator_version_min_EQ);
-  Arg *WatchOSVersion =
-      Args.getLastArg(options::OPT_mwatchos_version_min_EQ,
-                      options::OPT_mwatchos_simulator_version_min_EQ);
-
-  unsigned Major, Minor, Micro;
-  bool HadExtra;
-
-  // The iOS deployment target that is explicitly specified via a command line
-  // option or an environment variable.
-  std::string ExplicitIOSDeploymentTargetStr;
-
-  if (iOSVersion)
-    ExplicitIOSDeploymentTargetStr = iOSVersion->getAsString(Args);
-
-  // Add a macro to differentiate between m(iphone|tv|watch)os-version-min=X.Y and
-  // -m(iphone|tv|watch)simulator-version-min=X.Y.
-  if (Args.hasArg(options::OPT_mios_simulator_version_min_EQ) ||
-      Args.hasArg(options::OPT_mtvos_simulator_version_min_EQ) ||
-      Args.hasArg(options::OPT_mwatchos_simulator_version_min_EQ))
-    Args.append(Args.MakeSeparateArg(nullptr, Opts.getOption(options::OPT_D),
-                                     " __APPLE_EMBEDDED_SIMULATOR__=1"));
-
-  if (OSXVersion && (iOSVersion || TvOSVersion || WatchOSVersion)) {
-    getDriver().Diag(diag::err_drv_argument_not_allowed_with)
-        << OSXVersion->getAsString(Args)
-        << (iOSVersion ? iOSVersion :
-            TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
-    iOSVersion = TvOSVersion = WatchOSVersion = nullptr;
-  } else if (iOSVersion && (TvOSVersion || WatchOSVersion)) {
-    getDriver().Diag(diag::err_drv_argument_not_allowed_with)
-        << iOSVersion->getAsString(Args)
-        << (TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
-    TvOSVersion = WatchOSVersion = nullptr;
-  } else if (TvOSVersion && WatchOSVersion) {
-     getDriver().Diag(diag::err_drv_argument_not_allowed_with)
-        << TvOSVersion->getAsString(Args)
-        << WatchOSVersion->getAsString(Args);
-    WatchOSVersion = nullptr;
-  } else if (!OSXVersion && !iOSVersion && !TvOSVersion && !WatchOSVersion) {
+  // The OS and the version can be specified using the -target argument.
+  Optional<DarwinPlatform> OSTarget =
+      getDeploymentTargetFromTargetArg(Args, getTriple(), getDriver());
+  if (OSTarget) {
+    Optional<DarwinPlatform> OSVersionArgTarget =
+        getDeploymentTargetFromOSVersionArg(Args, getDriver());
+    if (OSVersionArgTarget) {
+      unsigned TargetMajor, TargetMinor, TargetMicro;
+      bool TargetExtra;
+      unsigned ArgMajor, ArgMinor, ArgMicro;
+      bool ArgExtra;
+      if (OSTarget->getPlatform() != OSVersionArgTarget->getPlatform() ||
+          (Driver::GetReleaseVersion(OSTarget->getOSVersion(), TargetMajor,
+                                     TargetMinor, TargetMicro, TargetExtra) &&
+           Driver::GetReleaseVersion(OSVersionArgTarget->getOSVersion(),
+                                     ArgMajor, ArgMinor, ArgMicro, ArgExtra) &&
+           (VersionTuple(TargetMajor, TargetMinor, TargetMicro) !=
+                VersionTuple(ArgMajor, ArgMinor, ArgMicro) ||
+            TargetExtra != ArgExtra))) {
+        // Select the OS version from the -m<os>-version-min argument when
+        // the -target does not include an OS version.
+        if (OSTarget->getPlatform() == OSVersionArgTarget->getPlatform() &&
+            !OSTarget->hasOSVersion()) {
+          OSTarget->setOSVersion(OSVersionArgTarget->getOSVersion());
+        } else {
+          // Warn about -m<os>-version-min that doesn't match the OS version
+          // that's specified in the target.
+          std::string OSVersionArg =
+              OSVersionArgTarget->getAsString(Args, Opts);
+          std::string TargetArg = OSTarget->getAsString(Args, Opts);
+          getDriver().Diag(clang::diag::warn_drv_overriding_flag_option)
+              << OSVersionArg << TargetArg;
+        }
+      }
+    }
+  } else {
+    // The OS target can be specified using the -m<os>version-min argument.
+    OSTarget = getDeploymentTargetFromOSVersionArg(Args, getDriver());
     // If no deployment target was specified on the command line, check for
     // environment defines.
-    std::string OSXTarget;
-    std::string iOSTarget;
-    std::string TvOSTarget;
-    std::string WatchOSTarget;
-
-    if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
-      OSXTarget = env;
-    if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
-      iOSTarget = env;
-    if (char *env = ::getenv("TVOS_DEPLOYMENT_TARGET"))
-      TvOSTarget = env;
-    if (char *env = ::getenv("WATCHOS_DEPLOYMENT_TARGET"))
-      WatchOSTarget = env;
-
-    if (!iOSTarget.empty())
-      ExplicitIOSDeploymentTargetStr =
-          std::string("IPHONEOS_DEPLOYMENT_TARGET=") + iOSTarget;
-
+    if (!OSTarget)
+      OSTarget =
+          getDeploymentTargetFromEnvironmentVariables(getDriver(), getTriple());
     // If there is no command-line argument to specify the Target version and
     // no environment variable defined, see if we can set the default based
     // on -isysroot.
-    if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() &&
-        TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) {
-      if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
-        StringRef isysroot = A->getValue();
-        StringRef SDK = getSDKName(isysroot);
-        if (SDK.size() > 0) {
-          // Slice the version number out.
-          // Version number is between the first and the last number.
-          size_t StartVer = SDK.find_first_of("0123456789");
-          size_t EndVer = SDK.find_last_of("0123456789");
-          if (StartVer != StringRef::npos && EndVer > StartVer) {
-            StringRef Version = SDK.slice(StartVer, EndVer + 1);
-            if (SDK.startswith("iPhoneOS") ||
-                SDK.startswith("iPhoneSimulator"))
-              iOSTarget = Version;
-            else if (SDK.startswith("MacOSX"))
-              OSXTarget = getSystemOrSDKMacOSVersion(Version);
-            else if (SDK.startswith("WatchOS") ||
-                     SDK.startswith("WatchSimulator"))
-              WatchOSTarget = Version;
-            else if (SDK.startswith("AppleTVOS") ||
-                     SDK.startswith("AppleTVSimulator"))
-              TvOSTarget = Version;
-          }
-        }
-      }
-    }
-
+    if (!OSTarget)
+      OSTarget = inferDeploymentTargetFromSDK(Args);
     // If no OS targets have been specified, try to guess platform from -target
     // or arch name and compute the version from the triple.
-    if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() &&
-        WatchOSTarget.empty()) {
-      llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS;
-
-      // Set the OSTy based on -target if -arch isn't present.
-      if (Args.hasArg(options::OPT_target) && !Args.hasArg(options::OPT_arch)) {
-        OSTy = getTriple().getOS();
-      } else {
-        StringRef MachOArchName = getMachOArchName(Args);
-        if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
-            MachOArchName == "arm64")
-          OSTy = llvm::Triple::IOS;
-        else if (MachOArchName == "armv7k")
-          OSTy = llvm::Triple::WatchOS;
-        else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
-                 MachOArchName != "armv7em")
-          OSTy = llvm::Triple::MacOSX;
-      }
-
-
-      if (OSTy != llvm::Triple::UnknownOS) {
-        unsigned Major, Minor, Micro;
-        std::string *OSTarget;
-
-        switch (OSTy) {
-        case llvm::Triple::Darwin:
-        case llvm::Triple::MacOSX:
-          if (!getTriple().getMacOSXVersion(Major, Minor, Micro))
-            getDriver().Diag(diag::err_drv_invalid_darwin_version)
-                << getTriple().getOSName();
-          OSTarget = &OSXTarget;
-          break;
-        case llvm::Triple::IOS:
-          getTriple().getiOSVersion(Major, Minor, Micro);
-          OSTarget = &iOSTarget;
-          break;
-        case llvm::Triple::TvOS:
-          getTriple().getOSVersion(Major, Minor, Micro);
-          OSTarget = &TvOSTarget;
-          break;
-        case llvm::Triple::WatchOS:
-          getTriple().getWatchOSVersion(Major, Minor, Micro);
-          OSTarget = &WatchOSTarget;
-          break;
-        default:
-          llvm_unreachable("Unexpected OS type");
-          break;
-        }
-
-        llvm::raw_string_ostream(*OSTarget) << Major << '.' << Minor << '.'
-                                            << Micro;
-      }
-    }
-
-    // Do not allow conflicts with the watchOS target.
-    if (!WatchOSTarget.empty() && (!iOSTarget.empty() || !TvOSTarget.empty())) {
-      getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
-        << "WATCHOS_DEPLOYMENT_TARGET"
-        << (!iOSTarget.empty() ? "IPHONEOS_DEPLOYMENT_TARGET" :
-            "TVOS_DEPLOYMENT_TARGET");
-    }
-
-    // Do not allow conflicts with the tvOS target.
-    if (!TvOSTarget.empty() && !iOSTarget.empty()) {
-      getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
-        << "TVOS_DEPLOYMENT_TARGET"
-        << "IPHONEOS_DEPLOYMENT_TARGET";
-    }
-
-    // Allow conflicts among OSX and iOS for historical reasons, but choose the
-    // default platform.
-    if (!OSXTarget.empty() && (!iOSTarget.empty() ||
-                               !WatchOSTarget.empty() ||
-                               !TvOSTarget.empty())) {
-      if (getTriple().getArch() == llvm::Triple::arm ||
-          getTriple().getArch() == llvm::Triple::aarch64 ||
-          getTriple().getArch() == llvm::Triple::thumb)
-        OSXTarget = "";
-      else
-        iOSTarget = WatchOSTarget = TvOSTarget = "";
-    }
-
-    if (!OSXTarget.empty()) {
-      const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
-      OSXVersion = Args.MakeJoinedArg(nullptr, O, OSXTarget);
-      Args.append(OSXVersion);
-    } else if (!iOSTarget.empty()) {
-      const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
-      iOSVersion = Args.MakeJoinedArg(nullptr, O, iOSTarget);
-      Args.append(iOSVersion);
-    } else if (!TvOSTarget.empty()) {
-      const Option O = Opts.getOption(options::OPT_mtvos_version_min_EQ);
-      TvOSVersion = Args.MakeJoinedArg(nullptr, O, TvOSTarget);
-      Args.append(TvOSVersion);
-    } else if (!WatchOSTarget.empty()) {
-      const Option O = Opts.getOption(options::OPT_mwatchos_version_min_EQ);
-      WatchOSVersion = Args.MakeJoinedArg(nullptr, O, WatchOSTarget);
-      Args.append(WatchOSVersion);
-    }
+    if (!OSTarget)
+      OSTarget =
+          inferDeploymentTargetFromArch(Args, *this, getTriple(), getDriver());
   }
 
-  DarwinPlatformKind Platform;
-  if (OSXVersion)
-    Platform = MacOS;
-  else if (iOSVersion)
-    Platform = IPhoneOS;
-  else if (TvOSVersion)
-    Platform = TvOS;
-  else if (WatchOSVersion)
-    Platform = WatchOS;
-  else
-    llvm_unreachable("Unable to infer Darwin variant");
+  assert(OSTarget && "Unable to infer Darwin variant");
+  OSTarget->addOSVersionMinArgument(Args, Opts);
+  DarwinPlatformKind Platform = OSTarget->getPlatform();
 
+  unsigned Major, Minor, Micro;
+  bool HadExtra;
   // Set the tool chain target information.
   if (Platform == MacOS) {
-    assert((!iOSVersion && !TvOSVersion && !WatchOSVersion) &&
-           "Unknown target platform!");
-    if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor, Micro,
-                                   HadExtra) ||
+    if (!Driver::GetReleaseVersion(OSTarget->getOSVersion(), Major, Minor,
+                                   Micro, HadExtra) ||
         HadExtra || Major != 10 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-          << OSXVersion->getAsString(Args);
+          << OSTarget->getAsString(Args, Opts);
   } else if (Platform == IPhoneOS) {
-    assert(iOSVersion && "Unknown target platform!");
-    if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor, Micro,
-                                   HadExtra) ||
+    if (!Driver::GetReleaseVersion(OSTarget->getOSVersion(), Major, Minor,
+                                   Micro, HadExtra) ||
         HadExtra || Major >= 100 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-          << iOSVersion->getAsString(Args);
+          << OSTarget->getAsString(Args, Opts);
+    ;
     // For 32-bit targets, the deployment target for iOS has to be earlier than
     // iOS 11.
     if (getTriple().isArch32Bit() && Major >= 11) {
       // If the deployment target is explicitly specified, print a diagnostic.
-      if (!ExplicitIOSDeploymentTargetStr.empty()) {
+      if (OSTarget->isExplicitlySpecified()) {
         getDriver().Diag(diag::warn_invalid_ios_deployment_target)
-            << ExplicitIOSDeploymentTargetStr;
-      // Otherwise, set it to 10.99.99.
+            << OSTarget->getAsString(Args, Opts);
+        // Otherwise, set it to 10.99.99.
       } else {
         Major = 10;
         Minor = 99;
@@ -1436,32 +1628,28 @@
       }
     }
   } else if (Platform == TvOS) {
-    if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
-                                   Micro, HadExtra) || HadExtra ||
-        Major >= 100 || Minor >= 100 || Micro >= 100)
+    if (!Driver::GetReleaseVersion(OSTarget->getOSVersion(), Major, Minor,
+                                   Micro, HadExtra) ||
+        HadExtra || Major >= 100 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-          << TvOSVersion->getAsString(Args);
+          << OSTarget->getAsString(Args, Opts);
   } else if (Platform == WatchOS) {
-    if (!Driver::GetReleaseVersion(WatchOSVersion->getValue(), Major, Minor,
-                                   Micro, HadExtra) || HadExtra ||
-        Major >= 10 || Minor >= 100 || Micro >= 100)
+    if (!Driver::GetReleaseVersion(OSTarget->getOSVersion(), Major, Minor,
+                                   Micro, HadExtra) ||
+        HadExtra || Major >= 10 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-          << WatchOSVersion->getAsString(Args);
+          << OSTarget->getAsString(Args, Opts);
   } else
     llvm_unreachable("unknown kind of Darwin platform");
 
+  DarwinEnvironmentKind Environment = OSTarget->getEnvironment();
   // Recognize iOS targets with an x86 architecture as the iOS simulator.
-  if (iOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
-                     getTriple().getArch() == llvm::Triple::x86_64))
-    Platform = IPhoneOSSimulator;
-  if (TvOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
-                      getTriple().getArch() == llvm::Triple::x86_64))
-    Platform = TvOSSimulator;
-  if (WatchOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
-                         getTriple().getArch() == llvm::Triple::x86_64))
-    Platform = WatchOSSimulator;
+  if (Environment == NativeEnvironment && Platform != MacOS &&
+      (getTriple().getArch() == llvm::Triple::x86 ||
+       getTriple().getArch() == llvm::Triple::x86_64))
+    Environment = Simulator;
 
-  setTarget(Platform, Major, Minor, Micro);
+  setTarget(Platform, Environment, Major, Minor, Micro);
 
   if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
     StringRef SDK = getSDKName(A->getValue());
@@ -1785,15 +1973,12 @@
     OS = llvm::Triple::MacOSX;
     break;
   case IPhoneOS:
-  case IPhoneOSSimulator:
     OS = llvm::Triple::IOS;
     break;
-  case TvOS:
-  case TvOSSimulator: // Earlier than 11.0.
+  case TvOS: // Earlier than 11.0.
     OS = llvm::Triple::TvOS;
     break;
-  case WatchOS:
-  case WatchOSSimulator: // Earlier than 4.0.
+  case WatchOS: // Earlier than 4.0.
     OS = llvm::Triple::WatchOS;
     break;
   }
@@ -1881,7 +2066,7 @@
   // Unwind tables are not emitted if -fno-exceptions is supplied (except when
   // targeting x86_64).
   return getArch() == llvm::Triple::x86_64 ||
-         (!UseSjLjExceptions(Args) &&
+         (GetExceptionModel(Args) != llvm::ExceptionHandling::SjLj &&
           Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
                        true));
 }
@@ -1892,15 +2077,18 @@
   return false;
 }
 
-bool Darwin::UseSjLjExceptions(const ArgList &Args) const {
+llvm::ExceptionHandling Darwin::GetExceptionModel(const ArgList &Args) const {
   // Darwin uses SjLj exceptions on ARM.
   if (getTriple().getArch() != llvm::Triple::arm &&
       getTriple().getArch() != llvm::Triple::thumb)
-    return false;
+    return llvm::ExceptionHandling::None;
 
   // Only watchOS uses the new DWARF/Compact unwinding method.
   llvm::Triple Triple(ComputeLLVMTriple(Args));
-  return !Triple.isWatchABI();
+  if(Triple.isWatchABI())
+    return llvm::ExceptionHandling::DwarfCFI;
+
+  return llvm::ExceptionHandling::SjLj;
 }
 
 bool Darwin::SupportsEmbeddedBitcode() const {
@@ -2041,8 +2229,6 @@
   }
 }
 
-bool Darwin::SupportsObjCGC() const { return isTargetMacOS(); }
-
 void Darwin::CheckObjCARC() const {
   if (isTargetIOSBased() || isTargetWatchOSBased() ||
       (isTargetMacOS() && !isMacosxVersionLT(10, 6)))
diff --git a/lib/Driver/ToolChains/Darwin.h b/lib/Driver/ToolChains/Darwin.h
index bcc7611..87d553b 100644
--- a/lib/Driver/ToolChains/Darwin.h
+++ b/lib/Driver/ToolChains/Darwin.h
@@ -245,12 +245,11 @@
 
   bool SupportsProfiling() const override;
 
-  bool SupportsObjCGC() const override { return false; }
-
   bool UseDwarfDebugFlags() const override;
 
-  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override {
-    return false;
+  llvm::ExceptionHandling
+  GetExceptionModel(const llvm::opt::ArgList &Args) const override {
+    return llvm::ExceptionHandling::None;
   }
 
   /// }
@@ -269,14 +268,17 @@
   enum DarwinPlatformKind {
     MacOS,
     IPhoneOS,
-    IPhoneOSSimulator,
     TvOS,
-    TvOSSimulator,
     WatchOS,
-    WatchOSSimulator
+    LastDarwinPlatform = WatchOS
+  };
+  enum DarwinEnvironmentKind {
+    NativeEnvironment,
+    Simulator,
   };
 
   mutable DarwinPlatformKind TargetPlatform;
+  mutable DarwinEnvironmentKind TargetEnvironment;
 
   /// The OS version we are targeting.
   mutable VersionTuple TargetVersion;
@@ -318,29 +320,34 @@
 
   // FIXME: Eliminate these ...Target functions and derive separate tool chains
   // for these targets and put version in constructor.
-  void setTarget(DarwinPlatformKind Platform, unsigned Major, unsigned Minor,
-                 unsigned Micro) const {
+  void setTarget(DarwinPlatformKind Platform, DarwinEnvironmentKind Environment,
+                 unsigned Major, unsigned Minor, unsigned Micro) const {
     // FIXME: For now, allow reinitialization as long as values don't
     // change. This will go away when we move away from argument translation.
     if (TargetInitialized && TargetPlatform == Platform &&
+        TargetEnvironment == Environment &&
         TargetVersion == VersionTuple(Major, Minor, Micro))
       return;
 
     assert(!TargetInitialized && "Target already initialized!");
     TargetInitialized = true;
     TargetPlatform = Platform;
+    TargetEnvironment = Environment;
     TargetVersion = VersionTuple(Major, Minor, Micro);
+    if (Environment == Simulator)
+      const_cast<Darwin *>(this)->setTripleEnvironment(llvm::Triple::Simulator);
   }
 
   bool isTargetIPhoneOS() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == IPhoneOS || TargetPlatform == TvOS;
+    return (TargetPlatform == IPhoneOS || TargetPlatform == TvOS) &&
+           TargetEnvironment == NativeEnvironment;
   }
 
   bool isTargetIOSSimulator() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == IPhoneOSSimulator ||
-           TargetPlatform == TvOSSimulator;
+    return (TargetPlatform == IPhoneOS || TargetPlatform == TvOS) &&
+           TargetEnvironment == Simulator;
   }
 
   bool isTargetIOSBased() const {
@@ -350,32 +357,32 @@
 
   bool isTargetTvOS() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == TvOS;
+    return TargetPlatform == TvOS && TargetEnvironment == NativeEnvironment;
   }
 
   bool isTargetTvOSSimulator() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == TvOSSimulator;
+    return TargetPlatform == TvOS && TargetEnvironment == Simulator;
   }
 
   bool isTargetTvOSBased() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == TvOS || TargetPlatform == TvOSSimulator;
+    return TargetPlatform == TvOS;
   }
 
   bool isTargetWatchOS() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == WatchOS;
+    return TargetPlatform == WatchOS && TargetEnvironment == NativeEnvironment;
   }
 
   bool isTargetWatchOSSimulator() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == WatchOSSimulator;
+    return TargetPlatform == WatchOS && TargetEnvironment == Simulator;
   }
 
   bool isTargetWatchOSBased() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == WatchOS || TargetPlatform == WatchOSSimulator;
+    return TargetPlatform == WatchOS;
   }
 
   bool isTargetMacOS() const {
@@ -411,10 +418,11 @@
                              Action::OffloadKind DeviceOffloadKind) const override;
 
   StringRef getPlatformFamily() const;
-  static StringRef getSDKName(StringRef isysroot);
   StringRef getOSLibraryNameSuffix() const;
 
 public:
+  static StringRef getSDKName(StringRef isysroot);
+
   /// }
   /// @name ToolChain Implementation
   /// {
@@ -455,11 +463,10 @@
     return 0;
   }
 
-  bool SupportsObjCGC() const override;
-
   void CheckObjCARC() const override;
 
-  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override;
+  llvm::ExceptionHandling GetExceptionModel(
+      const llvm::opt::ArgList &Args) const override;
 
   bool SupportsEmbeddedBitcode() const override;
 
diff --git a/lib/Driver/ToolChains/FreeBSD.cpp b/lib/Driver/ToolChains/FreeBSD.cpp
index 2f066cf..da2b661 100644
--- a/lib/Driver/ToolChains/FreeBSD.cpp
+++ b/lib/Driver/ToolChains/FreeBSD.cpp
@@ -117,6 +117,30 @@
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
+static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args,
+                           ArgStringList &CmdArgs) {
+  if (Args.hasArg(options::OPT_shared))
+    return false;
+
+  if (Args.hasFlag(options::OPT_fxray_instrument,
+                   options::OPT_fnoxray_instrument, false)) {
+    CmdArgs.push_back("-whole-archive");
+    CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false));
+    CmdArgs.push_back("-no-whole-archive");
+    return true;
+  }
+  
+  return false;
+}
+
+static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args,
+                                ArgStringList &CmdArgs) {
+  CmdArgs.push_back("--no-as-needed");
+  CmdArgs.push_back("-lrt");
+  CmdArgs.push_back("-lm");
+  CmdArgs.push_back("-lpthread");
+} 
+
 void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfo &Output,
                                    const InputInfoList &Inputs,
@@ -235,6 +259,7 @@
     AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D);
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
+  bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
@@ -249,6 +274,8 @@
     }
     if (NeedsSanitizerDeps)
       linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
+    if (NeedsXRayDeps)
+      linkXRayRuntimeDeps(ToolChain, Args, CmdArgs);
     // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding
     // the default system libraries. Just mimic this for now.
     if (Args.hasArg(options::OPT_pg))
@@ -317,6 +344,8 @@
   // When targeting 32-bit platforms, look for '/usr/lib32/crt1.o' and fall
   // back to '/usr/lib' if it doesn't exist.
   if ((Triple.getArch() == llvm::Triple::x86 ||
+       Triple.getArch() == llvm::Triple::mips ||
+       Triple.getArch() == llvm::Triple::mipsel ||
        Triple.getArch() == llvm::Triple::ppc) &&
       D.getVFS().exists(getDriver().SysRoot + "/usr/lib32/crt1.o"))
     getFilePaths().push_back(getDriver().SysRoot + "/usr/lib32");
@@ -359,17 +388,18 @@
 
 Tool *FreeBSD::buildLinker() const { return new tools::freebsd::Linker(*this); }
 
-bool FreeBSD::UseSjLjExceptions(const ArgList &Args) const {
+llvm::ExceptionHandling FreeBSD::GetExceptionModel(const ArgList &Args) const {
   // FreeBSD uses SjLj exceptions on ARM oabi.
   switch (getTriple().getEnvironment()) {
   case llvm::Triple::GNUEABIHF:
   case llvm::Triple::GNUEABI:
   case llvm::Triple::EABI:
-    return false;
-
+    return llvm::ExceptionHandling::None;
   default:
-    return (getTriple().getArch() == llvm::Triple::arm ||
-            getTriple().getArch() == llvm::Triple::thumb);
+    if (getTriple().getArch() == llvm::Triple::arm ||
+        getTriple().getArch() == llvm::Triple::thumb)
+      return llvm::ExceptionHandling::SjLj;
+    return llvm::ExceptionHandling::None;
   }
 }
 
@@ -391,6 +421,10 @@
   }
   if (IsX86 || IsX86_64) {
     Res |= SanitizerKind::SafeStack;
+    Res |= SanitizerKind::Fuzzer;
+    Res |= SanitizerKind::FuzzerNoLink;
   }
+  if (IsX86_64)
+    Res |= SanitizerKind::Memory;
   return Res;
 }
diff --git a/lib/Driver/ToolChains/FreeBSD.h b/lib/Driver/ToolChains/FreeBSD.h
index 25e9df7..2943e1c 100644
--- a/lib/Driver/ToolChains/FreeBSD.h
+++ b/lib/Driver/ToolChains/FreeBSD.h
@@ -66,7 +66,8 @@
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
-  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override;
+  llvm::ExceptionHandling GetExceptionModel(
+      const llvm::opt::ArgList &Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
   unsigned GetDefaultDwarfVersion() const override { return 2; }
diff --git a/lib/Driver/ToolChains/Fuchsia.cpp b/lib/Driver/ToolChains/Fuchsia.cpp
index 5cea62b..5f4995c 100644
--- a/lib/Driver/ToolChains/Fuchsia.cpp
+++ b/lib/Driver/ToolChains/Fuchsia.cpp
@@ -44,10 +44,8 @@
   Args.ClaimAllArgs(options::OPT_w);
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
-  if (llvm::sys::path::stem(Exec).equals_lower("lld")) {
-    CmdArgs.push_back("-flavor");
-    CmdArgs.push_back("gnu");
-
+  if (llvm::sys::path::filename(Exec).equals_lower("ld.lld") ||
+      llvm::sys::path::stem(Exec).equals_lower("ld.lld")) {
     CmdArgs.push_back("-z");
     CmdArgs.push_back("rodynamic");
   }
@@ -280,7 +278,9 @@
 
 SanitizerMask Fuchsia::getSupportedSanitizers() const {
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
-  Res |= SanitizerKind::SafeStack;
   Res |= SanitizerKind::Address;
+  Res |= SanitizerKind::Fuzzer;
+  Res |= SanitizerKind::SafeStack;
+  Res |= SanitizerKind::Scudo;
   return Res;
 }
diff --git a/lib/Driver/ToolChains/Fuchsia.h b/lib/Driver/ToolChains/Fuchsia.h
index dab44a2..6f438de 100644
--- a/lib/Driver/ToolChains/Fuchsia.h
+++ b/lib/Driver/ToolChains/Fuchsia.h
@@ -43,6 +43,7 @@
   bool HasNativeLLVMSupport() const override { return true; }
   bool IsIntegratedAssemblerDefault() const override { return true; }
   bool IsMathErrnoDefault() const override { return false; }
+  bool useRelaxRelocations() const override { return true; };
   RuntimeLibType GetDefaultRuntimeLibType() const override {
     return ToolChain::RLT_CompilerRT;
   }
@@ -82,7 +83,7 @@
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
   const char *getDefaultLinker() const override {
-    return "lld";
+    return "ld.lld";
   }
 
 protected:
diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp
index 757ebda..041bf78 100644
--- a/lib/Driver/ToolChains/Gnu.cpp
+++ b/lib/Driver/ToolChains/Gnu.cpp
@@ -8,13 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "Gnu.h"
-#include "Linux.h"
 #include "Arch/ARM.h"
 #include "Arch/Mips.h"
 #include "Arch/PPC.h"
+#include "Arch/RISCV.h"
 #include "Arch/Sparc.h"
 #include "Arch/SystemZ.h"
 #include "CommonArgs.h"
+#include "Linux.h"
 #include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h" // for GCC_INSTALL_PREFIX
 #include "clang/Driver/Compilation.h"
@@ -42,6 +43,22 @@
          !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput);
 }
 
+// Switch CPU names not recognized by GNU assembler to a close CPU that it does
+// recognize, instead of a lower march from being picked in the absence of a cpu
+// flag.
+static void normalizeCPUNamesForAssembler(const ArgList &Args,
+                                          ArgStringList &CmdArgs) {
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef CPUArg(A->getValue());
+    if (CPUArg.equals_lower("krait"))
+      CmdArgs.push_back("-mcpu=cortex-a15");
+    else if(CPUArg.equals_lower("kryo"))
+      CmdArgs.push_back("-mcpu=cortex-a57");
+    else
+      Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
+  }
+}
+
 void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
@@ -225,10 +242,13 @@
                                 ArgStringList &CmdArgs) {
   CmdArgs.push_back("--no-as-needed");
   CmdArgs.push_back("-lpthread");
-  CmdArgs.push_back("-lrt");
+  if (TC.getTriple().getOS() != llvm::Triple::OpenBSD)
+    CmdArgs.push_back("-lrt");
   CmdArgs.push_back("-lm");
 
-  if (TC.getTriple().getOS() != llvm::Triple::FreeBSD)
+  if (TC.getTriple().getOS() != llvm::Triple::FreeBSD &&
+      TC.getTriple().getOS() != llvm::Triple::NetBSD &&
+      TC.getTriple().getOS() != llvm::Triple::OpenBSD)
     CmdArgs.push_back("-ldl");
 }
 
@@ -254,6 +274,10 @@
     return "elf64ppc";
   case llvm::Triple::ppc64le:
     return "elf64lppc";
+  case llvm::Triple::riscv32:
+    return "elf32lriscv";
+  case llvm::Triple::riscv64:
+    return "elf64lriscv";
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
     return "elf32_sparc";
@@ -282,6 +306,17 @@
   }
 }
 
+static bool getPIE(const ArgList &Args, const toolchains::Linux &ToolChain) {
+  if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static))
+    return false;
+
+  Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
+                           options::OPT_nopie);
+  if (!A)
+    return ToolChain.isPIEDefault();
+  return A->getOption().matches(options::OPT_pie);
+}
+
 void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                            const InputInfo &Output,
                                            const InputInfoList &Inputs,
@@ -296,9 +331,7 @@
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool isAndroid = ToolChain.getTriple().isAndroid();
   const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
-  const bool IsPIE =
-      !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) &&
-      (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
+  const bool IsPIE = getPIE(Args, ToolChain);
   const bool HasCRTBeginEndFiles =
       ToolChain.getTriple().hasEnvironment() ||
       (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies);
@@ -347,9 +380,7 @@
   for (const auto &Opt : ToolChain.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
 
-  if (!Args.hasArg(options::OPT_static)) {
-    CmdArgs.push_back("--eh-frame-hdr");
-  }
+  CmdArgs.push_back("--eh-frame-hdr");
 
   if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) {
     CmdArgs.push_back("-m");
@@ -598,6 +629,18 @@
       ppc::getPPCAsmModeForCPU(getCPUName(Args, getToolChain().getTriple())));
     break;
   }
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64: {
+    StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple());
+    CmdArgs.push_back("-mabi");
+    CmdArgs.push_back(ABIName.data());
+    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+      StringRef MArch = A->getValue();
+      CmdArgs.push_back("-march");
+      CmdArgs.push_back(MArch.data());
+    }
+    break;
+  }
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
@@ -643,23 +686,16 @@
     }
 
     Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
+    normalizeCPUNamesForAssembler(Args, CmdArgs);
 
-    // FIXME: remove krait check when GNU tools support krait cpu
-    // for now replace it with -mcpu=cortex-a15 to avoid a lower
-    // march from being picked in the absence of a cpu flag.
-    Arg *A;
-    if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) &&
-        StringRef(A->getValue()).equals_lower("krait"))
-      CmdArgs.push_back("-mcpu=cortex-a15");
-    else
-      Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
     Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
     break;
   }
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be: {
     Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
-    Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
+    normalizeCPUNamesForAssembler(Args, CmdArgs);
+
     break;
   }
   case llvm::Triple::mips:
@@ -837,6 +873,10 @@
   return A && A->getOption().matches(options::OPT_mmicromips);
 }
 
+static bool isRISCV(llvm::Triple::ArchType Arch) {
+  return Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64;
+}
+
 static Multilib makeMultilib(StringRef commonSuffix) {
   return Multilib(commonSuffix, commonSuffix, commonSuffix);
 }
@@ -1382,11 +1422,48 @@
     Result.Multilibs = AndroidArmMultilibs;
 }
 
+static void findRISCVMultilibs(const Driver &D,
+                               const llvm::Triple &TargetTriple, StringRef Path,
+                               const ArgList &Args, DetectedMultilibs &Result) {
+
+  FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
+  Multilib Ilp32 = makeMultilib("lib32/ilp32").flag("+m32").flag("+mabi=ilp32");
+  Multilib Ilp32f =
+      makeMultilib("lib32/ilp32f").flag("+m32").flag("+mabi=ilp32f");
+  Multilib Ilp32d =
+      makeMultilib("lib32/ilp32d").flag("+m32").flag("+mabi=ilp32d");
+  Multilib Lp64 = makeMultilib("lib64/lp64").flag("+m64").flag("+mabi=lp64");
+  Multilib Lp64f = makeMultilib("lib64/lp64f").flag("+m64").flag("+mabi=lp64f");
+  Multilib Lp64d = makeMultilib("lib64/lp64d").flag("+m64").flag("+mabi=lp64d");
+  MultilibSet RISCVMultilibs =
+      MultilibSet()
+          .Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d})
+          .FilterOut(NonExistent);
+
+  Multilib::flags_list Flags;
+  bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64;
+  StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);
+
+  addMultilibFlag(!IsRV64, "m32", Flags);
+  addMultilibFlag(IsRV64, "m64", Flags);
+  addMultilibFlag(ABIName == "ilp32", "mabi=ilp32", Flags);
+  addMultilibFlag(ABIName == "ilp32f", "mabi=ilp32f", Flags);
+  addMultilibFlag(ABIName == "ilp32d", "mabi=ilp32d", Flags);
+  addMultilibFlag(ABIName == "lp64", "mabi=lp64", Flags);
+  addMultilibFlag(ABIName == "lp64f", "mabi=lp64f", Flags);
+  addMultilibFlag(ABIName == "lp64d", "mabi=lp64d", Flags);
+
+  if (RISCVMultilibs.select(Flags, Result.SelectedMultilib))
+    Result.Multilibs = RISCVMultilibs;
+}
+
 static bool findBiarchMultilibs(const Driver &D,
                                 const llvm::Triple &TargetTriple,
                                 StringRef Path, const ArgList &Args,
                                 bool NeedsBiarchSuffix,
                                 DetectedMultilibs &Result) {
+  Multilib Default;
+
   // Some versions of SUSE and Fedora on ppc64 put 32-bit libs
   // in what would normally be GCCInstallPath and put the 64-bit
   // libs in a subdirectory named 64. The simple logic we follow is that
@@ -1394,10 +1471,26 @@
   // we use that. If not, and if not a biarch triple alias, we look for
   // crtbegin.o without the subdirectory.
 
-  Multilib Default;
+  StringRef Suff64 = "/64";
+  // Solaris uses platform-specific suffixes instead of /64.
+  if (TargetTriple.getOS() == llvm::Triple::Solaris) {
+    switch (TargetTriple.getArch()) {
+    case llvm::Triple::x86:
+    case llvm::Triple::x86_64:
+      Suff64 = "/amd64";
+      break;
+    case llvm::Triple::sparc:
+    case llvm::Triple::sparcv9:
+      Suff64 = "/sparcv9";
+      break;
+    default:
+      break;
+    }
+  }
+
   Multilib Alt64 = Multilib()
-                       .gccSuffix("/64")
-                       .includeSuffix("/64")
+                       .gccSuffix(Suff64)
+                       .includeSuffix(Suff64)
                        .flag("-m32")
                        .flag("+m64")
                        .flag("-mx32");
@@ -1594,21 +1687,17 @@
     // If we have a SysRoot, try that first.
     if (!D.SysRoot.empty()) {
       Prefixes.push_back(D.SysRoot);
-      Prefixes.push_back(D.SysRoot + "/usr");
+      AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
     }
 
     // Then look for gcc installed alongside clang.
     Prefixes.push_back(D.InstalledDir + "/..");
 
-    // Then look for distribution supplied gcc installations.
+    // Next, look for prefix(es) that correspond to distribution-supplied gcc
+    // installations.
     if (D.SysRoot.empty()) {
-      // Look for RHEL devtoolsets.
-      Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");
-      Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
-      Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
-      Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
-      // And finally in /usr.
-      Prefixes.push_back("/usr");
+      // Typically /usr.
+      AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
     }
   }
 
@@ -1679,6 +1768,48 @@
   return false;
 }
 
+void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
+    const llvm::Triple &TargetTriple, SmallVectorImpl<std::string> &Prefixes,
+    StringRef SysRoot) {
+  if (TargetTriple.getOS() == llvm::Triple::Solaris) {
+    // Solaris is a special case.
+    // The GCC installation is under
+    //   /usr/gcc/<major>.<minor>/lib/gcc/<triple>/<major>.<minor>.<patch>/
+    // so we need to find those /usr/gcc/*/lib/gcc libdirs and go with
+    // /usr/gcc/<version> as a prefix.
+
+    std::string PrefixDir = SysRoot.str() + "/usr/gcc";
+    std::error_code EC;
+    for (vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef VersionText = llvm::sys::path::filename(LI->getName());
+      GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
+
+      // Filter out obviously bad entries.
+      if (CandidateVersion.Major == -1 || CandidateVersion.isOlderThan(4, 1, 1))
+        continue;
+
+      std::string CandidatePrefix = PrefixDir + "/" + VersionText.str();
+      std::string CandidateLibPath = CandidatePrefix + "/lib/gcc";
+      if (!D.getVFS().exists(CandidateLibPath))
+        continue;
+
+      Prefixes.push_back(CandidatePrefix);
+    }
+    return;
+  }
+
+  // Non-Solaris is much simpler - most systems just go with "/usr".
+  if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux) {
+    // Yet, still look for RHEL devtoolsets.
+    Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
+  }
+  Prefixes.push_back(SysRoot.str() + "/usr");
+}
+
 /*static*/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples(
     const llvm::Triple &TargetTriple, const llvm::Triple &BiarchTriple,
     SmallVectorImpl<StringRef> &LibDirs,
@@ -1764,6 +1895,10 @@
       "powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu",
       "powerpc64le-suse-linux", "ppc64le-redhat-linux"};
 
+  static const char *const RISCV32LibDirs[] = {"/lib", "/lib32"};
+  static const char *const RISCVTriples[] = {"riscv32-unknown-linux-gnu",
+                                             "riscv64-unknown-linux-gnu"};
+
   static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
   static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
                                                "sparcv8-linux-gnu"};
@@ -1776,17 +1911,49 @@
       "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
       "s390x-suse-linux", "s390x-redhat-linux"};
 
-  // Solaris.
-  static const char *const SolarisSPARCLibDirs[] = {"/gcc"};
-  static const char *const SolarisSPARCTriples[] = {"sparc-sun-solaris2.11",
-                                                    "i386-pc-solaris2.11"};
 
   using std::begin;
   using std::end;
 
   if (TargetTriple.getOS() == llvm::Triple::Solaris) {
-    LibDirs.append(begin(SolarisSPARCLibDirs), end(SolarisSPARCLibDirs));
-    TripleAliases.append(begin(SolarisSPARCTriples), end(SolarisSPARCTriples));
+    static const char *const SolarisLibDirs[] = {"/lib"};
+    static const char *const SolarisSparcV8Triples[] = {
+        "sparc-sun-solaris2.11", "sparc-sun-solaris2.12"};
+    static const char *const SolarisSparcV9Triples[] = {
+        "sparcv9-sun-solaris2.11", "sparcv9-sun-solaris2.12"};
+    static const char *const SolarisX86Triples[] = {"i386-pc-solaris2.11",
+                                                    "i386-pc-solaris2.12"};
+    static const char *const SolarisX86_64Triples[] = {"x86_64-pc-solaris2.11",
+                                                       "x86_64-pc-solaris2.12"};
+    LibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
+    BiarchLibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
+    switch (TargetTriple.getArch()) {
+    case llvm::Triple::x86:
+      TripleAliases.append(begin(SolarisX86Triples), end(SolarisX86Triples));
+      BiarchTripleAliases.append(begin(SolarisX86_64Triples),
+                                 end(SolarisX86_64Triples));
+      break;
+    case llvm::Triple::x86_64:
+      TripleAliases.append(begin(SolarisX86_64Triples),
+                           end(SolarisX86_64Triples));
+      BiarchTripleAliases.append(begin(SolarisX86Triples),
+                                 end(SolarisX86Triples));
+      break;
+    case llvm::Triple::sparc:
+      TripleAliases.append(begin(SolarisSparcV8Triples),
+                           end(SolarisSparcV8Triples));
+      BiarchTripleAliases.append(begin(SolarisSparcV9Triples),
+                                 end(SolarisSparcV9Triples));
+      break;
+    case llvm::Triple::sparcv9:
+      TripleAliases.append(begin(SolarisSparcV9Triples),
+                           end(SolarisSparcV9Triples));
+      BiarchTripleAliases.append(begin(SolarisSparcV8Triples),
+                                 end(SolarisSparcV8Triples));
+      break;
+    default:
+      break;
+    }
     return;
   }
 
@@ -1909,6 +2076,12 @@
     LibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs));
     TripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
     break;
+  case llvm::Triple::riscv32:
+    LibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
+    BiarchLibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
+    TripleAliases.append(begin(RISCVTriples), end(RISCVTriples));
+    BiarchTripleAliases.append(begin(RISCVTriples), end(RISCVTriples));
+    break;
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
     LibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
@@ -1941,56 +2114,6 @@
     BiarchTripleAliases.push_back(BiarchTriple.str());
 }
 
-void Generic_GCC::GCCInstallationDetector::scanLibDirForGCCTripleSolaris(
-    const llvm::Triple &TargetArch, const llvm::opt::ArgList &Args,
-    const std::string &LibDir, StringRef CandidateTriple,
-    bool NeedsBiarchSuffix) {
-  // Solaris is a special case. The GCC installation is under
-  // /usr/gcc/<major>.<minor>/lib/gcc/<triple>/<major>.<minor>.<patch>/, so we
-  // need to iterate twice.
-  std::error_code EC;
-  for (vfs::directory_iterator LI = D.getVFS().dir_begin(LibDir, EC), LE;
-       !EC && LI != LE; LI = LI.increment(EC)) {
-    StringRef VersionText = llvm::sys::path::filename(LI->getName());
-    GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
-
-    if (CandidateVersion.Major != -1) // Filter obviously bad entries.
-      if (!CandidateGCCInstallPaths.insert(LI->getName()).second)
-        continue; // Saw this path before; no need to look at it again.
-    if (CandidateVersion.isOlderThan(4, 1, 1))
-      continue;
-    if (CandidateVersion <= Version)
-      continue;
-
-    GCCInstallPath =
-        LibDir + "/" + VersionText.str() + "/lib/gcc/" + CandidateTriple.str();
-    if (!D.getVFS().exists(GCCInstallPath))
-      continue;
-
-    // If we make it here there has to be at least one GCC version, let's just
-    // use the latest one.
-    std::error_code EEC;
-    for (vfs::directory_iterator
-             LLI = D.getVFS().dir_begin(GCCInstallPath, EEC),
-             LLE;
-         !EEC && LLI != LLE; LLI = LLI.increment(EEC)) {
-
-      StringRef SubVersionText = llvm::sys::path::filename(LLI->getName());
-      GCCVersion CandidateSubVersion = GCCVersion::Parse(SubVersionText);
-
-      if (CandidateSubVersion > Version)
-        Version = CandidateSubVersion;
-    }
-
-    GCCTriple.setTriple(CandidateTriple);
-
-    GCCInstallPath += "/" + Version.Text;
-    GCCParentLibPath = GCCInstallPath + "/../../../../";
-
-    IsValid = true;
-  }
-}
-
 bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs(
     const llvm::Triple &TargetTriple, const ArgList &Args,
     StringRef Path, bool NeedsBiarchSuffix) {
@@ -2006,6 +2129,8 @@
   } else if (tools::isMipsArch(TargetArch)) {
     if (!findMIPSMultilibs(D, TargetTriple, Path, Args, Detected))
       return false;
+  } else if (isRISCV(TargetArch)) {
+    findRISCVMultilibs(D, TargetTriple, Path, Args, Detected);
   } else if (!findBiarchMultilibs(D, TargetTriple, Path, Args,
                                   NeedsBiarchSuffix, Detected)) {
     return false;
@@ -2022,12 +2147,6 @@
     const llvm::Triple &TargetTriple, const ArgList &Args,
     const std::string &LibDir, StringRef CandidateTriple,
     bool NeedsBiarchSuffix) {
-  if (TargetTriple.getOS() == llvm::Triple::Solaris) {
-    scanLibDirForGCCTripleSolaris(TargetTriple, Args, LibDir, CandidateTriple,
-                                  NeedsBiarchSuffix);
-    return;
-  }
-
   llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
   // Locations relative to the system lib directory where GCC's triple-specific
   // directories might reside.
@@ -2040,31 +2159,33 @@
     // Whether this library suffix is relevant for the triple.
     bool Active;
   } Suffixes[] = {
-    // This is the normal place.
-    {"gcc/" + CandidateTriple.str(), "../..", true},
+      // This is the normal place.
+      {"gcc/" + CandidateTriple.str(), "../..", true},
 
-    // Debian puts cross-compilers in gcc-cross.
-    {"gcc-cross/" + CandidateTriple.str(), "../..", true},
+      // Debian puts cross-compilers in gcc-cross.
+      {"gcc-cross/" + CandidateTriple.str(), "../..",
+       TargetTriple.getOS() != llvm::Triple::Solaris},
 
-    // The Freescale PPC SDK has the gcc libraries in
-    // <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well. Only do
-    // this on Freescale triples, though, since some systems put a *lot* of
-    // files in that location, not just GCC installation data.
-    {CandidateTriple.str(), "..",
-      TargetTriple.getVendor() == llvm::Triple::Freescale},
+      // The Freescale PPC SDK has the gcc libraries in
+      // <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well. Only do
+      // this on Freescale triples, though, since some systems put a *lot* of
+      // files in that location, not just GCC installation data.
+      {CandidateTriple.str(), "..",
+       TargetTriple.getVendor() == llvm::Triple::Freescale},
 
-    // Natively multiarch systems sometimes put the GCC triple-specific
-    // directory within their multiarch lib directory, resulting in the
-    // triple appearing twice.
-    {CandidateTriple.str() + "/gcc/" + CandidateTriple.str(), "../../..", true},
+      // Natively multiarch systems sometimes put the GCC triple-specific
+      // directory within their multiarch lib directory, resulting in the
+      // triple appearing twice.
+      {CandidateTriple.str() + "/gcc/" + CandidateTriple.str(), "../../..",
+       TargetTriple.getOS() != llvm::Triple::Solaris},
 
-    // Deal with cases (on Ubuntu) where the system architecture could be i386
-    // but the GCC target architecture could be (say) i686.
-    // FIXME: It may be worthwhile to generalize this and look for a second
-    // triple.
-    {"i386-linux-gnu/gcc/" + CandidateTriple.str(), "../../..",
-      TargetArch == llvm::Triple::x86}
-  };
+      // Deal with cases (on Ubuntu) where the system architecture could be i386
+      // but the GCC target architecture could be (say) i686.
+      // FIXME: It may be worthwhile to generalize this and look for a second
+      // triple.
+      {"i386-linux-gnu/gcc/" + CandidateTriple.str(), "../../..",
+       (TargetArch == llvm::Triple::x86 &&
+        TargetTriple.getOS() != llvm::Triple::Solaris)}};
 
   for (auto &Suffix : Suffixes) {
     if (!Suffix.Active)
@@ -2221,6 +2342,8 @@
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
   case llvm::Triple::systemz:
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
@@ -2357,7 +2480,8 @@
       getTriple().getArch() == llvm::Triple::aarch64 ||
       getTriple().getArch() == llvm::Triple::aarch64_be ||
       (getTriple().getOS() == llvm::Triple::Linux &&
-       (!V.isOlderThan(4, 7, 0) || getTriple().isAndroid())) ||
+       ((!GCCInstallation.isValid() || !V.isOlderThan(4, 7, 0)) ||
+        getTriple().isAndroid())) ||
       getTriple().getOS() == llvm::Triple::NaCl ||
       (getTriple().getVendor() == llvm::Triple::MipsTechnologies &&
        !getTriple().hasEnvironment()) ||
diff --git a/lib/Driver/ToolChains/Gnu.h b/lib/Driver/ToolChains/Gnu.h
index f29342b..9bad560 100644
--- a/lib/Driver/ToolChains/Gnu.h
+++ b/lib/Driver/ToolChains/Gnu.h
@@ -250,6 +250,10 @@
                              SmallVectorImpl<StringRef> &BiarchLibDirs,
                              SmallVectorImpl<StringRef> &BiarchTripleAliases);
 
+    void AddDefaultGCCPrefixes(const llvm::Triple &TargetTriple,
+                               SmallVectorImpl<std::string> &Prefixes,
+                               StringRef SysRoot);
+
     bool ScanGCCForMultilibs(const llvm::Triple &TargetTriple,
                              const llvm::opt::ArgList &Args,
                              StringRef Path,
@@ -261,12 +265,6 @@
                                 StringRef CandidateTriple,
                                 bool NeedsBiarchSuffix = false);
 
-    void scanLibDirForGCCTripleSolaris(const llvm::Triple &TargetArch,
-                                       const llvm::opt::ArgList &Args,
-                                       const std::string &LibDir,
-                                       StringRef CandidateTriple,
-                                       bool NeedsBiarchSuffix = false);
-
     bool ScanGentooGccConfig(const llvm::Triple &TargetTriple,
                              const llvm::opt::ArgList &Args,
                              StringRef CandidateTriple,
@@ -307,8 +305,8 @@
   /// \brief Check whether the target triple's architecture is 32-bits.
   bool isTarget32Bit() const { return getTriple().isArch32Bit(); }
 
-  // FIXME: This should be final, but the Solaris tool chain does weird
-  // things we can't easily represent.
+  // FIXME: This should be final, but the CrossWindows toolchain does weird
+  // things that can't be easily generalized.
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
diff --git a/lib/Driver/ToolChains/Hexagon.cpp b/lib/Driver/ToolChains/Hexagon.cpp
index d4ab5dc..77193cd0 100644
--- a/lib/Driver/ToolChains/Hexagon.cpp
+++ b/lib/Driver/ToolChains/Hexagon.cpp
@@ -32,6 +32,7 @@
   return llvm::StringSwitch<StringRef>(Cpu)
       .Case("v60", "64b")
       .Case("v62", "64b")
+      .Case("v65", "64b")
       .Default("128b");
 }
 
@@ -45,7 +46,7 @@
   // Handle the unsupported values passed to mhvx-length.
   if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_length_EQ)) {
     StringRef Val = A->getValue();
-    if (Val != "64B" && Val != "128B")
+    if (!Val.equals_lower("64b") && !Val.equals_lower("128b"))
       D.Diag(diag::err_drv_unsupported_option_argument)
           << A->getOption().getName() << Val;
   }
@@ -137,16 +138,15 @@
   const Driver &D = HTC.getDriver();
   ArgStringList CmdArgs;
 
-  std::string MArchString = "-march=hexagon";
-  CmdArgs.push_back(Args.MakeArgString(MArchString));
+  CmdArgs.push_back("-march=hexagon");
 
   RenderExtraToolArgs(JA, CmdArgs);
 
-  std::string AsName = "hexagon-llvm-mc";
-  std::string MCpuString = "-mcpu=hexagon" +
-        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
+  const char *AsName = "hexagon-llvm-mc";
   CmdArgs.push_back("-filetype=obj");
-  CmdArgs.push_back(Args.MakeArgString(MCpuString));
+  CmdArgs.push_back(Args.MakeArgString(
+      "-mcpu=hexagon" +
+      toolchains::HexagonToolChain::GetTargetCPUVersion(Args)));
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
@@ -157,8 +157,7 @@
   }
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
-    std::string N = llvm::utostr(G.getValue());
-    CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N));
+    CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue())));
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
@@ -191,7 +190,7 @@
       II.getInputArg().render(Args, CmdArgs);
   }
 
-  auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str()));
+  auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
@@ -242,10 +241,8 @@
     CmdArgs.push_back(Opt.c_str());
 
   CmdArgs.push_back("-march=hexagon");
-  std::string CpuVer =
-        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
-  std::string MCpuString = "-mcpu=hexagon" + CpuVer;
-  CmdArgs.push_back(Args.MakeArgString(MCpuString));
+  StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args);
+  CmdArgs.push_back(Args.MakeArgString("-mcpu=hexagon" + CpuVer));
 
   if (IsShared) {
     CmdArgs.push_back("-shared");
@@ -260,8 +257,7 @@
     CmdArgs.push_back("-pie");
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
-    std::string N = llvm::utostr(G.getValue());
-    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N));
+    CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue())));
     UseG0 = G.getValue() == 0;
   }
 
@@ -290,7 +286,7 @@
   //----------------------------------------------------------------------------
   // Start Files
   //----------------------------------------------------------------------------
-  const std::string MCpuSuffix = "/" + CpuVer;
+  const std::string MCpuSuffix = "/" + CpuVer.str();
   const std::string MCpuG0Suffix = MCpuSuffix + "/G0";
   const std::string RootDir =
       HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/";
@@ -350,7 +346,7 @@
     CmdArgs.push_back("--start-group");
 
     if (!IsShared) {
-      for (const std::string &Lib : OsLibs)
+      for (StringRef Lib : OsLibs)
         CmdArgs.push_back(Args.MakeArgString("-l" + Lib));
       CmdArgs.push_back("-lc");
     }
@@ -525,11 +521,15 @@
 void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
                                              ArgStringList &CC1Args,
                                              Action::OffloadKind) const {
-  if (DriverArgs.hasArg(options::OPT_ffp_contract))
-    return;
-  unsigned OptLevel = getOptimizationLevel(DriverArgs);
-  if (OptLevel >= 3)
-    CC1Args.push_back("-ffp-contract=fast");
+  if (!DriverArgs.hasArg(options::OPT_ffp_contract)) {
+    unsigned OptLevel = getOptimizationLevel(DriverArgs);
+    if (OptLevel >= 3)
+      CC1Args.push_back("-ffp-contract=fast");
+  }
+  if (DriverArgs.hasArg(options::OPT_ffixed_r19)) {
+    CC1Args.push_back("-target-feature");
+    CC1Args.push_back("+reserved-r19");
+  }
 }
 
 void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
index 1adf9f7..4e9d0f1 100644
--- a/lib/Driver/ToolChains/Linux.cpp
+++ b/lib/Driver/ToolChains/Linux.cpp
@@ -11,6 +11,7 @@
 #include "Arch/ARM.h"
 #include "Arch/Mips.h"
 #include "Arch/PPC.h"
+#include "Arch/RISCV.h"
 #include "CommonArgs.h"
 #include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h"
@@ -176,6 +177,9 @@
       Triple.getEnvironment() == llvm::Triple::GNUX32)
     return "libx32";
 
+  if (Triple.getArch() == llvm::Triple::riscv32)
+    return "lib32";
+
   return Triple.isArch32Bit() ? "lib" : "lib64";
 }
 
@@ -210,7 +214,12 @@
 
   Distro Distro(D.getVFS());
 
-  if (Distro.IsOpenSUSE() || Distro.IsUbuntu()) {
+  if (Distro.IsAlpineLinux()) {
+    ExtraOpts.push_back("-z");
+    ExtraOpts.push_back("now");
+  }
+
+  if (Distro.IsOpenSUSE() || Distro.IsUbuntu() || Distro.IsAlpineLinux()) {
     ExtraOpts.push_back("-z");
     ExtraOpts.push_back("relro");
   }
@@ -221,6 +230,8 @@
   const bool IsAndroid = Triple.isAndroid();
   const bool IsMips = tools::isMipsArch(Arch);
   const bool IsHexagon = Arch == llvm::Triple::hexagon;
+  const bool IsRISCV =
+      Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64;
 
   if (IsMips && !SysRoot.empty())
     ExtraOpts.push_back("--sysroot=" + SysRoot);
@@ -232,7 +243,7 @@
   // Android loader does not support .gnu.hash.
   // Hexagon linker/loader does not support .gnu.hash
   if (!IsMips && !IsAndroid && !IsHexagon) {
-    if (Distro.IsRedhat() || Distro.IsOpenSUSE() ||
+    if (Distro.IsRedhat() || Distro.IsOpenSUSE() || Distro.IsAlpineLinux() ||
         (Distro.IsUbuntu() && Distro >= Distro::UbuntuMaverick))
       ExtraOpts.push_back("--hash-style=gnu");
 
@@ -248,7 +259,7 @@
   ExtraOpts.push_back("--build-id");
 #endif
 
-  if (Distro.IsOpenSUSE())
+  if (IsAndroid || Distro.IsOpenSUSE())
     ExtraOpts.push_back("--enable-new-dtags");
 
   // The selection of paths to try here is designed to match the patterns which
@@ -328,6 +339,11 @@
   addPathIfExists(D, SysRoot + "/lib/../" + OSLibDir, Paths);
   addPathIfExists(D, SysRoot + "/usr/lib/" + MultiarchTriple, Paths);
   addPathIfExists(D, SysRoot + "/usr/lib/../" + OSLibDir, Paths);
+  if (IsRISCV) {
+    StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple);
+    addPathIfExists(D, SysRoot + "/" + OSLibDir + "/" + ABIName, Paths);
+    addPathIfExists(D, SysRoot + "/usr/" + OSLibDir + "/" + ABIName, Paths);
+  }
 
   // Try walking via the GCC triple path in case of biarch or multiarch GCC
   // installations with strange symlinks.
@@ -506,6 +522,18 @@
     Loader =
         (tools::ppc::hasPPCAbiArg(Args, "elfv1")) ? "ld64.so.1" : "ld64.so.2";
     break;
+  case llvm::Triple::riscv32: {
+    StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple);
+    LibDir = "lib";
+    Loader = ("ld-linux-riscv32-" + ABIName + ".so.1").str();
+    break;
+  }
+  case llvm::Triple::riscv64: {
+    StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple);
+    LibDir = "lib";
+    Loader = ("ld-linux-riscv64-" + ABIName + ".so.1").str();
+    break;
+  }
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
     LibDir = "lib";
@@ -810,11 +838,16 @@
   }
 }
 
-bool Linux::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
+bool Linux::isPIEDefault() const {
+  return (getTriple().isAndroid() && !getTriple().isAndroidVersionLT(16)) ||
+          getTriple().isMusl() || getSanitizerArgs().requiresPIE();
+}
 
 SanitizerMask Linux::getSupportedSanitizers() const {
   const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
   const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
+  const bool IsMIPS = getTriple().getArch() == llvm::Triple::mips ||
+                      getTriple().getArch() == llvm::Triple::mipsel;
   const bool IsMIPS64 = getTriple().getArch() == llvm::Triple::mips64 ||
                         getTriple().getArch() == llvm::Triple::mips64el;
   const bool IsPowerPC64 = getTriple().getArch() == llvm::Triple::ppc64 ||
@@ -834,7 +867,7 @@
   Res |= SanitizerKind::SafeStack;
   if (IsX86_64 || IsMIPS64 || IsAArch64)
     Res |= SanitizerKind::DataFlow;
-  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch)
+  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64)
     Res |= SanitizerKind::Leak;
   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64)
     Res |= SanitizerKind::Thread;
@@ -842,9 +875,12 @@
     Res |= SanitizerKind::Memory;
   if (IsX86_64 || IsMIPS64)
     Res |= SanitizerKind::Efficiency;
-  if (IsX86 || IsX86_64) {
+  if (IsX86 || IsX86_64)
     Res |= SanitizerKind::Function;
-  }
+  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch)
+    Res |= SanitizerKind::Scudo;
+  if (IsAArch64)
+    Res |= SanitizerKind::HWAddress;
   return Res;
 }
 
diff --git a/lib/Driver/ToolChains/MSVC.cpp b/lib/Driver/ToolChains/MSVC.cpp
index ae41ee9..8458c03 100644
--- a/lib/Driver/ToolChains/MSVC.cpp
+++ b/lib/Driver/ToolChains/MSVC.cpp
@@ -752,6 +752,8 @@
     return "x64";
   case ArchType::arm:
     return "arm";
+  case ArchType::aarch64:
+    return "arm64";
   default:
     return "";
   }
@@ -769,6 +771,8 @@
     return "amd64";
   case ArchType::arm:
     return "arm";
+  case ArchType::aarch64:
+    return "arm64";
   default:
     return "";
   }
@@ -784,6 +788,8 @@
     return "amd64";
   case ArchType::arm:
     return "arm";
+  case ArchType::aarch64:
+    return "arm64";
   default:
     return "";
   }
diff --git a/lib/Driver/ToolChains/MinGW.cpp b/lib/Driver/ToolChains/MinGW.cpp
index 660b0c7..db83ac6 100644
--- a/lib/Driver/ToolChains/MinGW.cpp
+++ b/lib/Driver/ToolChains/MinGW.cpp
@@ -107,14 +107,6 @@
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
-  StringRef LinkerName = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "ld");
-  if (LinkerName.equals_lower("lld")) {
-    CmdArgs.push_back("-flavor");
-    CmdArgs.push_back("gnu");
-  } else if (!LinkerName.equals_lower("ld")) {
-    D.Diag(diag::err_drv_unsupported_linker) << LinkerName;
-  }
-
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
@@ -149,22 +141,21 @@
     CmdArgs.push_back("console");
   }
 
+  if (Args.hasArg(options::OPT_mdll))
+    CmdArgs.push_back("--dll");
+  else if (Args.hasArg(options::OPT_shared))
+    CmdArgs.push_back("--shared");
   if (Args.hasArg(options::OPT_static))
     CmdArgs.push_back("-Bstatic");
-  else {
-    if (Args.hasArg(options::OPT_mdll))
-      CmdArgs.push_back("--dll");
-    else if (Args.hasArg(options::OPT_shared))
-      CmdArgs.push_back("--shared");
+  else
     CmdArgs.push_back("-Bdynamic");
-    if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
-      CmdArgs.push_back("-e");
-      if (TC.getArch() == llvm::Triple::x86)
-        CmdArgs.push_back("_DllMainCRTStartup@12");
-      else
-        CmdArgs.push_back("DllMainCRTStartup");
-      CmdArgs.push_back("--enable-auto-image-base");
-    }
+  if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
+    CmdArgs.push_back("-e");
+    if (TC.getArch() == llvm::Triple::x86)
+      CmdArgs.push_back("_DllMainCRTStartup@12");
+    else
+      CmdArgs.push_back("DllMainCRTStartup");
+    CmdArgs.push_back("--enable-auto-image-base");
   }
 
   CmdArgs.push_back("-o");
@@ -244,7 +235,7 @@
 
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--end-group");
-      else if (!LinkerName.equals_lower("lld"))
+      else
         AddLibGCC(Args, CmdArgs);
     }
 
@@ -255,7 +246,7 @@
       CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
     }
   }
-  const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data()));
+  const char *Exec = Args.MakeArgString(TC.GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
@@ -375,8 +366,11 @@
   return getArch() == llvm::Triple::x86_64;
 }
 
-bool toolchains::MinGW::UseSEHExceptions() const {
-  return getArch() == llvm::Triple::x86_64;
+llvm::ExceptionHandling
+toolchains::MinGW::GetExceptionModel(const ArgList &Args) const {
+  if (getArch() == llvm::Triple::x86_64)
+    return llvm::ExceptionHandling::WinEH;
+  return llvm::ExceptionHandling::DwarfCFI;
 }
 
 void toolchains::MinGW::AddCudaIncludeArgs(const ArgList &DriverArgs,
diff --git a/lib/Driver/ToolChains/MinGW.h b/lib/Driver/ToolChains/MinGW.h
index 9b3d7c5..f8dbcae 100644
--- a/lib/Driver/ToolChains/MinGW.h
+++ b/lib/Driver/ToolChains/MinGW.h
@@ -64,7 +64,9 @@
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
-  bool UseSEHExceptions() const;
+
+  llvm::ExceptionHandling GetExceptionModel(
+      const llvm::opt::ArgList &Args) const override;
 
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
diff --git a/lib/Driver/ToolChains/Myriad.cpp b/lib/Driver/ToolChains/Myriad.cpp
index 6fdb5a2..2c0b316 100644
--- a/lib/Driver/ToolChains/Myriad.cpp
+++ b/lib/Driver/ToolChains/Myriad.cpp
@@ -107,7 +107,6 @@
     CmdArgs.push_back(
         Args.MakeArgString(std::string("-i:") + A->getValue(0)));
   }
-  CmdArgs.push_back("-elf"); // Output format.
   CmdArgs.push_back(II.getFilename());
   CmdArgs.push_back(
       Args.MakeArgString(std::string("-o:") + Output.getFilename()));
@@ -199,7 +198,7 @@
   }
 
   std::string Exec =
-      Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld"));
+      Args.MakeArgString(TC.GetProgramPath("sparc-myriad-rtems-ld"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
                                           CmdArgs, Inputs));
 }
@@ -218,10 +217,11 @@
     D.Diag(clang::diag::err_target_unsupported_arch)
         << Triple.getArchName() << "myriad";
     LLVM_FALLTHROUGH;
+  case llvm::Triple::shave:
+    return;
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
-  case llvm::Triple::shave:
-    GCCInstallation.init(Triple, Args, {"sparc-myriad-elf"});
+    GCCInstallation.init(Triple, Args, {"sparc-myriad-rtems"});
   }
 
   if (GCCInstallation.isValid()) {
@@ -231,7 +231,7 @@
     addPathIfExists(D, CompilerSupportDir, getFilePaths());
   }
   // libstd++ and libc++ must both be found in this one place.
-  addPathIfExists(D, D.Dir + "/../sparc-myriad-elf/lib", getFilePaths());
+  addPathIfExists(D, D.Dir + "/../sparc-myriad-rtems/lib", getFilePaths());
 }
 
 MyriadToolChain::~MyriadToolChain() {}
diff --git a/lib/Driver/ToolChains/NetBSD.cpp b/lib/Driver/ToolChains/NetBSD.cpp
index c2c9007..0db6578 100644
--- a/lib/Driver/ToolChains/NetBSD.cpp
+++ b/lib/Driver/ToolChains/NetBSD.cpp
@@ -416,6 +416,15 @@
                            "", DriverArgs, CC1Args);
 }
 
+llvm::ExceptionHandling NetBSD::GetExceptionModel(const ArgList &Args) const {
+  // NetBSD uses Dwarf exceptions on ARM.
+  llvm::Triple::ArchType TArch = getTriple().getArch();
+  if (TArch == llvm::Triple::arm || TArch == llvm::Triple::armeb ||
+      TArch == llvm::Triple::thumb || TArch == llvm::Triple::thumbeb)
+    return llvm::ExceptionHandling::DwarfCFI;
+  return llvm::ExceptionHandling::None;
+}
+
 SanitizerMask NetBSD::getSupportedSanitizers() const {
   const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
   const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
@@ -425,11 +434,15 @@
     Res |= SanitizerKind::Function;
     Res |= SanitizerKind::Leak;
     Res |= SanitizerKind::SafeStack;
+    Res |= SanitizerKind::Scudo;
     Res |= SanitizerKind::Vptr;
   }
   if (IsX86_64) {
+    Res |= SanitizerKind::Efficiency;
     Res |= SanitizerKind::Fuzzer;
     Res |= SanitizerKind::FuzzerNoLink;
+    Res |= SanitizerKind::KernelAddress;
+    Res |= SanitizerKind::Memory;
     Res |= SanitizerKind::Thread;
   }
   return Res;
diff --git a/lib/Driver/ToolChains/NetBSD.h b/lib/Driver/ToolChains/NetBSD.h
index 5163ff7..e98df72 100644
--- a/lib/Driver/ToolChains/NetBSD.h
+++ b/lib/Driver/ToolChains/NetBSD.h
@@ -69,6 +69,9 @@
     return true;
   }
 
+  llvm::ExceptionHandling GetExceptionModel(
+      const llvm::opt::ArgList &Args) const override;
+
   SanitizerMask getSupportedSanitizers() const override;
 
 protected:
diff --git a/lib/Driver/ToolChains/OpenBSD.cpp b/lib/Driver/ToolChains/OpenBSD.cpp
index fbb84a6..c0f8eff 100644
--- a/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/lib/Driver/ToolChains/OpenBSD.cpp
@@ -13,6 +13,7 @@
 #include "CommonArgs.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Options.h"
+#include "clang/Driver/SanitizerArgs.h"
 #include "llvm/Option/ArgList.h"
 
 using namespace clang::driver;
@@ -97,6 +98,8 @@
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
+  const toolchains::OpenBSD &ToolChain =
+      static_cast<const toolchains::OpenBSD &>(getToolChain());
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -170,11 +173,13 @@
     Triple.replace(0, 6, "amd64");
   CmdArgs.push_back(
       Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1"));
+  CmdArgs.push_back(Args.MakeArgString("-L/usr/lib"));
 
   Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
                             options::OPT_e, options::OPT_s, options::OPT_t,
                             options::OPT_Z_Flag, options::OPT_r});
 
+  bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
@@ -186,7 +191,10 @@
       else
         CmdArgs.push_back("-lm");
     }
-
+    if (NeedsSanitizerDeps) {
+      CmdArgs.push_back(ToolChain.getCompilerRTArgString(Args, "builtins", false));
+      linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
+    }
     // FIXME: For some reason GCC passes -lgcc before adding
     // the default system libraries. Just mimic this for now.
     CmdArgs.push_back("-lgcc");
@@ -221,6 +229,19 @@
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
+SanitizerMask OpenBSD::getSupportedSanitizers() const {
+  const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
+  const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
+
+  // For future use, only UBsan at the moment
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+
+  if (IsX86 || IsX86_64)
+    Res |= SanitizerKind::Vptr;
+
+  return Res;
+}
+
 /// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.
 
 OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
diff --git a/lib/Driver/ToolChains/OpenBSD.h b/lib/Driver/ToolChains/OpenBSD.h
index 1cc0ca7..5277609 100644
--- a/lib/Driver/ToolChains/OpenBSD.h
+++ b/lib/Driver/ToolChains/OpenBSD.h
@@ -64,6 +64,8 @@
   }
   unsigned GetDefaultDwarfVersion() const override { return 2; }
 
+  SanitizerMask getSupportedSanitizers() const override;
+
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
diff --git a/lib/Driver/ToolChains/Solaris.cpp b/lib/Driver/ToolChains/Solaris.cpp
index 9fe6e9d..b48edbb 100644
--- a/lib/Driver/ToolChains/Solaris.cpp
+++ b/lib/Driver/ToolChains/Solaris.cpp
@@ -71,6 +71,11 @@
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("ld.so.1")));
     }
+
+    // libpthread has been folded into libc since Solaris 10, no need to do
+    // anything for pthreads. Claim argument to avoid warning.
+    Args.ClaimAllArgs(options::OPT_pthread);
+    Args.ClaimAllArgs(options::OPT_pthreads);
   }
 
   if (Output.isFilename()) {
@@ -92,24 +97,48 @@
         Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
+  // Provide __start___sancov_guards.  Solaris ld doesn't automatically create
+  // __start_SECNAME labels.
+  CmdArgs.push_back("--whole-archive");
+  CmdArgs.push_back(
+      getToolChain().getCompilerRTArgString(Args, "sancov_begin", false));
+  CmdArgs.push_back("--no-whole-archive");
+
   getToolChain().AddFilePathLibArgs(Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
                             options::OPT_e, options::OPT_r});
 
+  bool NeedsSanitizerDeps = addSanitizerRuntimes(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (getToolChain().ShouldLinkCXXStdlib(Args))
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
+    if (Args.hasArg(options::OPT_fstack_protector) ||
+        Args.hasArg(options::OPT_fstack_protector_strong) ||
+        Args.hasArg(options::OPT_fstack_protector_all)) {
+      // Explicitly link ssp libraries, not folded into Solaris libc.
+      CmdArgs.push_back("-lssp_nonshared");
+      CmdArgs.push_back("-lssp");
+    }
     CmdArgs.push_back("-lgcc_s");
     CmdArgs.push_back("-lc");
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-lgcc");
       CmdArgs.push_back("-lm");
     }
+    if (NeedsSanitizerDeps)
+      linkSanitizerRuntimeDeps(getToolChain(), CmdArgs);
   }
 
+  // Provide __stop___sancov_guards.  Solaris ld doesn't automatically create
+  // __stop_SECNAME labels.
+  CmdArgs.push_back("--whole-archive");
+  CmdArgs.push_back(
+      getToolChain().getCompilerRTArgString(Args, "sancov_end", false));
+  CmdArgs.push_back("--no-whole-archive");
+
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
@@ -122,6 +151,21 @@
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
+static StringRef getSolarisLibSuffix(const llvm::Triple &Triple) {
+  switch (Triple.getArch()) {
+  case llvm::Triple::x86:
+  case llvm::Triple::sparc:
+    break;
+  case llvm::Triple::x86_64:
+    return "/amd64";
+  case llvm::Triple::sparcv9:
+    return "/sparcv9";
+  default:
+    llvm_unreachable("Unsupported architecture");
+  }
+  return "";
+}
+
 /// Solaris - Solaris tool chain which can call as(1) and ld(1) directly.
 
 Solaris::Solaris(const Driver &D, const llvm::Triple &Triple,
@@ -130,32 +174,35 @@
 
   GCCInstallation.init(Triple, Args);
 
+  StringRef LibSuffix = getSolarisLibSuffix(Triple);
   path_list &Paths = getFilePaths();
-  if (GCCInstallation.isValid())
-    addPathIfExists(D, GCCInstallation.getInstallPath(), Paths);
-
-  addPathIfExists(D, getDriver().getInstalledDir(), Paths);
-  if (getDriver().getInstalledDir() != getDriver().Dir)
-    addPathIfExists(D, getDriver().Dir, Paths);
-
-  addPathIfExists(D, getDriver().SysRoot + getDriver().Dir + "/../lib", Paths);
-
-  std::string LibPath = "/usr/lib/";
-  switch (Triple.getArch()) {
-  case llvm::Triple::x86:
-  case llvm::Triple::sparc:
-    break;
-  case llvm::Triple::x86_64:
-    LibPath += "amd64/";
-    break;
-  case llvm::Triple::sparcv9:
-    LibPath += "sparcv9/";
-    break;
-  default:
-    llvm_unreachable("Unsupported architecture");
+  if (GCCInstallation.isValid()) {
+    // On Solaris gcc uses both an architecture-specific path with triple in it
+    // as well as a more generic lib path (+arch suffix).
+    addPathIfExists(D,
+                    GCCInstallation.getInstallPath() +
+                        GCCInstallation.getMultilib().gccSuffix(),
+                    Paths);
+    addPathIfExists(D, GCCInstallation.getParentLibPath() + LibSuffix, Paths);
   }
 
-  addPathIfExists(D, getDriver().SysRoot + LibPath, Paths);
+  // If we are currently running Clang inside of the requested system root,
+  // add its parent library path to those searched.
+  if (StringRef(D.Dir).startswith(D.SysRoot))
+    addPathIfExists(D, D.Dir + "/../lib", Paths);
+
+  addPathIfExists(D, D.SysRoot + "/usr/lib" + LibSuffix, Paths);
+}
+
+SanitizerMask Solaris::getSupportedSanitizers() const {
+  const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  // FIXME: Omit X86_64 until 64-bit support is figured out.
+  if (IsX86) {
+    Res |= SanitizerKind::Address;
+  }
+  Res |= SanitizerKind::Vptr;
+  return Res;
 }
 
 Tool *Solaris::buildAssembler() const {
@@ -164,30 +211,72 @@
 
 Tool *Solaris::buildLinker() const { return new tools::solaris::Linker(*this); }
 
-void Solaris::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
-                                           ArgStringList &CC1Args) const {
-  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
-      DriverArgs.hasArg(options::OPT_nostdincxx))
+void Solaris::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                        ArgStringList &CC1Args) const {
+  const Driver &D = getDriver();
+
+  if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
     return;
 
-  // Include the support directory for things like xlocale and fudged system
-  // headers.
-  // FIXME: This is a weird mix of libc++ and libstdc++. We should also be
-  // checking the value of -stdlib= here and adding the includes for libc++
-  // rather than libstdc++ if it's requested.
-  addSystemInclude(DriverArgs, CC1Args, "/usr/include/c++/v1/support/solaris");
+  if (!DriverArgs.hasArg(options::OPT_nostdlibinc))
+    addSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/local/include");
 
-  if (GCCInstallation.isValid()) {
-    GCCVersion Version = GCCInstallation.getVersion();
-    addSystemInclude(DriverArgs, CC1Args,
-                     getDriver().SysRoot + "/usr/gcc/" +
-                     Version.MajorStr + "." +
-                     Version.MinorStr +
-                     "/include/c++/" + Version.Text);
-    addSystemInclude(DriverArgs, CC1Args,
-                     getDriver().SysRoot + "/usr/gcc/" + Version.MajorStr +
-                     "." + Version.MinorStr + "/include/c++/" +
-                     Version.Text + "/" +
-                     GCCInstallation.getTriple().str());
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> P(D.ResourceDir);
+    llvm::sys::path::append(P, "include");
+    addSystemInclude(DriverArgs, CC1Args, P);
   }
+
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  // Check for configure-time C include directories.
+  StringRef CIncludeDirs(C_INCLUDE_DIRS);
+  if (CIncludeDirs != "") {
+    SmallVector<StringRef, 5> dirs;
+    CIncludeDirs.split(dirs, ":");
+    for (StringRef dir : dirs) {
+      StringRef Prefix =
+          llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
+      addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
+    }
+    return;
+  }
+
+  // Add include directories specific to the selected multilib set and multilib.
+  if (GCCInstallation.isValid()) {
+    const MultilibSet::IncludeDirsFunc &Callback =
+        Multilibs.includeDirsCallback();
+    if (Callback) {
+      for (const auto &Path : Callback(GCCInstallation.getMultilib()))
+        addExternCSystemIncludeIfExists(
+            DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path);
+    }
+  }
+
+  addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
+}
+
+void Solaris::addLibStdCxxIncludePaths(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args) const {
+  // We need a detected GCC installation on Solaris (similar to Linux)
+  // to provide libstdc++'s headers.
+  if (!GCCInstallation.isValid())
+    return;
+
+  // By default, look for the C++ headers in an include directory adjacent to
+  // the lib directory of the GCC installation.
+  // On Solaris this usually looks like /usr/gcc/X.Y/include/c++/X.Y.Z
+  StringRef LibDir = GCCInstallation.getParentLibPath();
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  const Multilib &Multilib = GCCInstallation.getMultilib();
+  const GCCVersion &Version = GCCInstallation.getVersion();
+
+  // The primary search for libstdc++ supports multiarch variants.
+  addLibStdCXXIncludePaths(LibDir.str() + "/../include", "/c++/" + Version.Text,
+                           TripleStr,
+                           /*GCCMultiarchTriple*/ "",
+                           /*TargetMultiarchTriple*/ "",
+                           Multilib.includeSuffix(), DriverArgs, CC1Args);
 }
diff --git a/lib/Driver/ToolChains/Solaris.h b/lib/Driver/ToolChains/Solaris.h
index 787917a..9e14269 100644
--- a/lib/Driver/ToolChains/Solaris.h
+++ b/lib/Driver/ToolChains/Solaris.h
@@ -57,10 +57,15 @@
 
   bool IsIntegratedAssemblerDefault() const override { return true; }
 
-  void AddClangCXXStdlibIncludeArgs(
-      const llvm::opt::ArgList &DriverArgs,
-      llvm::opt::ArgStringList &CC1Args) const override;
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
 
+  void
+  addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                           llvm::opt::ArgStringList &CC1Args) const override;
+
+  SanitizerMask getSupportedSanitizers() const override;
   unsigned GetDefaultDwarfVersion() const override { return 2; }
 
 protected:
diff --git a/lib/Driver/ToolChains/WebAssembly.cpp b/lib/Driver/ToolChains/WebAssembly.cpp
index 354f77f..94f7279 100644
--- a/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/lib/Driver/ToolChains/WebAssembly.cpp
@@ -11,6 +11,7 @@
 #include "CommonArgs.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
 #include "llvm/Option/ArgList.h"
 
@@ -47,8 +48,12 @@
     CmdArgs.push_back("--strip-all");
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
+  Args.AddAllArgs(CmdArgs, options::OPT_u);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o")));
+
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
@@ -58,10 +63,8 @@
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
 
-    CmdArgs.push_back("-allow-undefined-file");
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("wasm.syms")));
     CmdArgs.push_back("-lc");
-    CmdArgs.push_back("-lcompiler_rt");
+    AddRunTimeLibs(ToolChain, ToolChain.getDriver(), CmdArgs, Args);
   }
 
   CmdArgs.push_back("-o");
@@ -95,9 +98,6 @@
 
 bool WebAssembly::IsIntegratedAssemblerDefault() const { return true; }
 
-// TODO: Support Objective C stuff.
-bool WebAssembly::SupportsObjCGC() const { return false; }
-
 bool WebAssembly::hasBlocksRuntime() const { return false; }
 
 // TODO: Support profiling.
@@ -118,6 +118,12 @@
 }
 
 ToolChain::CXXStdlibType WebAssembly::GetCXXStdlibType(const ArgList &Args) const {
+  if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
+    StringRef Value = A->getValue();
+    if (Value != "libc++")
+      getDriver().Diag(diag::err_drv_invalid_stdlib_name)
+          << A->getAsString(Args);
+  }
   return ToolChain::CST_Libcxx;
 }
 
@@ -135,6 +141,27 @@
                      getDriver().SysRoot + "/include/c++/v1");
 }
 
+void WebAssembly::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                                      llvm::opt::ArgStringList &CmdArgs) const {
+
+  switch (GetCXXStdlibType(Args)) {
+  case ToolChain::CST_Libcxx:
+    CmdArgs.push_back("-lc++");
+    CmdArgs.push_back("-lc++abi");
+    break;
+  case ToolChain::CST_Libstdcxx:
+    llvm_unreachable("invalid stdlib name");
+  }
+}
+
+std::string WebAssembly::getThreadModel() const {
+  // The WebAssembly MVP does not yet support threads; for now, use the
+  // "single" threading model, which lowers atomics to non-atomic operations.
+  // When threading support is standardized and implemented in popular engines,
+  // this override should be removed.
+  return "single";
+}
+
 Tool *WebAssembly::buildLinker() const {
   return new tools::wasm::Linker(*this);
 }
diff --git a/lib/Driver/ToolChains/WebAssembly.h b/lib/Driver/ToolChains/WebAssembly.h
index 2999db4..cdbb34f 100644
--- a/lib/Driver/ToolChains/WebAssembly.h
+++ b/lib/Driver/ToolChains/WebAssembly.h
@@ -49,7 +49,6 @@
   bool isPICDefaultForced() const override;
   bool IsIntegratedAssemblerDefault() const override;
   bool hasBlocksRuntime() const override;
-  bool SupportsObjCGC() const override;
   bool SupportsProfiling() const override;
   bool HasNativeLLVMSupport() const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
@@ -63,6 +62,9 @@
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
+  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+  std::string getThreadModel() const override;
 
   const char *getDefaultLinker() const override {
     return "lld";
diff --git a/lib/Driver/XRayArgs.cpp b/lib/Driver/XRayArgs.cpp
index 8d68a84..c85d89c 100644
--- a/lib/Driver/XRayArgs.cpp
+++ b/lib/Driver/XRayArgs.cpp
@@ -27,8 +27,6 @@
 constexpr char XRayInstrumentOption[] = "-fxray-instrument";
 constexpr char XRayInstructionThresholdOption[] =
     "-fxray-instruction-threshold=";
-constexpr char XRayAlwaysInstrumentOption[] = "-fxray-always-instrument=";
-constexpr char XRayNeverInstrumentOption[] = "-fxray-never-instrument=";
 } // namespace
 
 XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
@@ -36,7 +34,7 @@
   const llvm::Triple &Triple = TC.getTriple();
   if (Args.hasFlag(options::OPT_fxray_instrument,
                    options::OPT_fnoxray_instrument, false)) {
-    if (Triple.getOS() == llvm::Triple::Linux)
+    if (Triple.getOS() == llvm::Triple::Linux) {
       switch (Triple.getArch()) {
       case llvm::Triple::x86_64:
       case llvm::Triple::arm:
@@ -51,9 +49,15 @@
         D.Diag(diag::err_drv_clang_unsupported)
             << (std::string(XRayInstrumentOption) + " on " + Triple.str());
       }
-    else
+    } else if (Triple.getOS() == llvm::Triple::FreeBSD) {
+        if (Triple.getArch() != llvm::Triple::x86_64) {
+          D.Diag(diag::err_drv_clang_unsupported)
+              << (std::string(XRayInstrumentOption) + " on " + Triple.str());
+        }
+    } else {
       D.Diag(diag::err_drv_clang_unsupported)
-          << (std::string(XRayInstrumentOption) + " on non-Linux target OS");
+          << (std::string(XRayInstrumentOption) + " on non-supported target OS");
+    }
     XRayInstrument = true;
     if (const Arg *A =
             Args.getLastArg(options::OPT_fxray_instruction_threshold_,
@@ -63,6 +67,14 @@
         D.Diag(clang::diag::err_drv_invalid_value) << A->getAsString(Args) << S;
     }
 
+    // By default, the back-end will not emit the lowering for XRay customevent
+    // calls if the function is not instrumented. In the future we will change
+    // this default to be the reverse, but in the meantime we're going to
+    // introduce the new functionality behind a flag.
+    if (Args.hasFlag(options::OPT_fxray_always_emit_customevents,
+                     options::OPT_fnoxray_always_emit_customevents, false))
+      XRayAlwaysEmitCustomEvents = true;
+
     // Validate the always/never attribute files. We also make sure that they
     // are treated as actual dependencies.
     for (const auto &Filename :
@@ -91,17 +103,21 @@
     return;
 
   CmdArgs.push_back(XRayInstrumentOption);
+
+  if (XRayAlwaysEmitCustomEvents)
+    CmdArgs.push_back("-fxray-always-emit-customevents");
+
   CmdArgs.push_back(Args.MakeArgString(Twine(XRayInstructionThresholdOption) +
                                        Twine(InstructionThreshold)));
 
   for (const auto &Always : AlwaysInstrumentFiles) {
-    SmallString<64> AlwaysInstrumentOpt(XRayAlwaysInstrumentOption);
+    SmallString<64> AlwaysInstrumentOpt("-fxray-always-instrument=");
     AlwaysInstrumentOpt += Always;
     CmdArgs.push_back(Args.MakeArgString(AlwaysInstrumentOpt));
   }
 
   for (const auto &Never : NeverInstrumentFiles) {
-    SmallString<64> NeverInstrumentOpt(XRayNeverInstrumentOption);
+    SmallString<64> NeverInstrumentOpt("-fxray-never-instrument=");
     NeverInstrumentOpt += Never;
     CmdArgs.push_back(Args.MakeArgString(NeverInstrumentOpt));
   }
diff --git a/lib/Edit/Commit.cpp b/lib/Edit/Commit.cpp
index cb7a784..0cc152b 100644
--- a/lib/Edit/Commit.cpp
+++ b/lib/Edit/Commit.cpp
@@ -225,8 +225,7 @@
     isAtStartOfMacroExpansion(loc, &loc);
 
   const SourceManager &SM = SourceMgr;
-  while (SM.isMacroArgExpansion(loc))
-    loc = SM.getImmediateSpellingLoc(loc);
+  loc = SM.getTopMacroCallerLoc(loc);
 
   if (loc.isMacroID())
     if (!isAtStartOfMacroExpansion(loc, &loc))
@@ -256,8 +255,7 @@
     isAtEndOfMacroExpansion(loc, &loc);
 
   const SourceManager &SM = SourceMgr;
-  while (SM.isMacroArgExpansion(loc))
-    loc = SM.getImmediateSpellingLoc(loc);
+  loc = SM.getTopMacroCallerLoc(loc);
 
   if (loc.isMacroID())
     if (!isAtEndOfMacroExpansion(loc, &loc))
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index bbf3d88..72d2078 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -40,9 +40,15 @@
   }
 }
 
-static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///<", "//!<", "///", "//",
-                                              "//!"};
+static StringRef getLineCommentIndentPrefix(StringRef Comment,
+                                            const FormatStyle &Style) {
+  static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
+                                                    "//!"};
+  static const char *const KnownTextProtoPrefixes[] = {"//", "#"};
+  ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
+  if (Style.Language == FormatStyle::LK_TextProto)
+    KnownPrefixes = KnownTextProtoPrefixes;
+
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -61,6 +67,8 @@
                                              unsigned ColumnLimit,
                                              unsigned TabWidth,
                                              encoding::Encoding Encoding) {
+  DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit
+                     << "\", Content start: " << ContentStartColumn << "\n");
   if (ColumnLimit <= ContentStartColumn + 1)
     return BreakableToken::Split(StringRef::npos, 0);
 
@@ -165,7 +173,7 @@
 }
 
 unsigned
-BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
                                               Split Split) const {
   // Example: consider the content
   // lala  lala
@@ -175,50 +183,57 @@
   // We compute the number of columns when the split is compressed into a single
   // space, like:
   // lala lala
+  //
+  // FIXME: Correctly measure the length of whitespace in Split.second so it
+  // works with tabs.
   return RemainingTokenColumns + 1 - Split.second;
 }
 
-unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+unsigned BreakableStringLiteral::getLineCount() const { return 1; }
 
-unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
-                                       StartColumn + Prefix.size(),
-                                       Style.TabWidth, Encoding);
+unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
+                                                unsigned Offset,
+                                                StringRef::size_type Length,
+                                                unsigned StartColumn) const {
+  llvm_unreachable("Getting the length of a part of the string literal "
+                   "indicates that the code tries to reflow it.");
 }
 
-BreakableSingleLineToken::BreakableSingleLineToken(
+unsigned
+BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
+                                           unsigned StartColumn) const {
+  return UnbreakableTailLength + Postfix.size() +
+         encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),
+                                       StartColumn, Style.TabWidth, Encoding);
+}
+
+unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
+                                                       bool Break) const {
+  return StartColumn + Prefix.size();
+}
+
+BreakableStringLiteral::BreakableStringLiteral(
     const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
-    const FormatStyle &Style)
+    StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
+    encoding::Encoding Encoding, const FormatStyle &Style)
     : BreakableToken(Tok, InPPDirective, Encoding, Style),
-      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+      UnbreakableTailLength(UnbreakableTailLength) {
   assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
   Line = Tok.TokenText.substr(
       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
 }
 
-BreakableStringLiteral::BreakableStringLiteral(
-    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
-    const FormatStyle &Style)
-    : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
-                               Encoding, Style) {}
-
-BreakableToken::Split
-BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
-                                 unsigned ColumnLimit,
-                                 llvm::Regex &CommentPragmasRegex) const {
-  return getStringSplit(Line.substr(TailOffset),
-                        StartColumn + Prefix.size() + Postfix.size(),
-                        ColumnLimit, Style.TabWidth, Encoding);
+BreakableToken::Split BreakableStringLiteral::getSplit(
+    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+    unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const {
+  return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
+                        ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
 }
 
 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
                                          unsigned TailOffset, Split Split,
-                                         WhitespaceManager &Whitespaces) {
+                                         WhitespaceManager &Whitespaces) const {
   Whitespaces.replaceWhitespaceInToken(
       Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
       Prefix, InPPDirective, 1, StartColumn);
@@ -235,19 +250,19 @@
 
 BreakableToken::Split
 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                           unsigned ColumnLimit,
+                           unsigned ColumnLimit, unsigned ContentStartColumn,
                            llvm::Regex &CommentPragmasRegex) const {
   // Don't break lines matching the comment pragmas regex.
   if (CommentPragmasRegex.match(Content[LineIndex]))
     return Split(StringRef::npos, 0);
   return getCommentSplit(Content[LineIndex].substr(TailOffset),
-                         getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Style.TabWidth, Encoding);
+                         ContentStartColumn, ColumnLimit, Style.TabWidth,
+                         Encoding);
 }
 
-void BreakableComment::compressWhitespace(unsigned LineIndex,
-                                          unsigned TailOffset, Split Split,
-                                          WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(
+    unsigned LineIndex, unsigned TailOffset, Split Split,
+    WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   // Text is relative to the content line, but Whitespaces operates relative to
   // the start of the corresponding token, so compute the start of the Split
@@ -261,44 +276,6 @@
       /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
 }
 
-BreakableToken::Split
-BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
-                                 unsigned PreviousEndColumn,
-                                 unsigned ColumnLimit) const {
-  unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
-  StringRef TrimmedText = Text.rtrim(Blanks);
-  // This is the width of the resulting line in case the full line of Text gets
-  // reflown up starting at ReflowStartColumn.
-  unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
-                                               TrimmedText, ReflowStartColumn,
-                                               Style.TabWidth, Encoding);
-  // If the full line fits up, we return a reflow split after it,
-  // otherwise we compute the largest piece of text that fits after
-  // ReflowStartColumn.
-  Split ReflowSplit =
-      FullWidth <= ColumnLimit
-          ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
-          : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
-                            Style.TabWidth, Encoding);
-
-  // We need to be extra careful here, because while it's OK to keep a long line
-  // if it can't be broken into smaller pieces (like when the first word of a
-  // long line is longer than the column limit), it's not OK to reflow that long
-  // word up. So we recompute the size of the previous line after reflowing and
-  // only return the reflow split if that's under the line limit.
-  if (ReflowSplit.first != StringRef::npos &&
-      // Check if the width of the newly reflown line is under the limit.
-      PreviousEndColumn + ReflowPrefix.size() +
-              encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
-                                            PreviousEndColumn +
-                                                ReflowPrefix.size(),
-                                            Style.TabWidth, Encoding) <=
-          ColumnLimit) {
-    return ReflowSplit;
-  }
-  return Split(StringRef::npos, 0);
-}
-
 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
   return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
 }
@@ -339,7 +316,8 @@
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
     encoding::Encoding Encoding, const FormatStyle &Style)
     : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
-      DelimitersOnNewline(false) {
+      DelimitersOnNewline(false),
+      UnbreakableTailLength(Token.UnbreakableTailLength) {
   assert(Tok.is(TT_BlockComment) &&
          "block comment section must start with a block comment");
 
@@ -495,30 +473,45 @@
       IndentDelta;
 }
 
-unsigned BreakableBlockComment::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
+                                               unsigned Offset,
+                                               StringRef::size_type Length,
+                                               unsigned StartColumn) const {
   unsigned LineLength =
-      ContentStartColumn + encoding::columnWidthWithTabs(
-                               Content[LineIndex].substr(TailOffset, Length),
-                               ContentStartColumn, Style.TabWidth, Encoding);
+      encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length),
+                                    StartColumn, Style.TabWidth, Encoding);
+  // FIXME: This should go into getRemainingLength instead, but we currently
+  // break tests when putting it there. Investigate how to fix those tests.
   // The last line gets a "*/" postfix.
   if (LineIndex + 1 == Lines.size()) {
     LineLength += 2;
     // We never need a decoration when breaking just the trailing "*/" postfix.
     // Note that checking that Length == 0 is not enough, since Length could
     // also be StringRef::npos.
-    if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+    if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) {
       LineLength -= Decoration.size();
     }
   }
   return LineLength;
 }
 
+unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
+                                                   unsigned Offset,
+                                                   unsigned StartColumn) const {
+  return UnbreakableTailLength +
+         getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+}
+
+unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+                                                      bool Break) const {
+  if (Break)
+    return IndentAtLineBreak;
+  return std::max(0, ContentColumn[LineIndex]);
+}
+
 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
                                         Split Split,
-                                        WhitespaceManager &Whitespaces) {
+                                        WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   StringRef Prefix = Decoration;
   // We need this to account for the case when we have a decoration "* " for all
@@ -544,110 +537,55 @@
       /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
 }
 
-BreakableToken::Split BreakableBlockComment::getSplitBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    llvm::Regex &CommentPragmasRegex) const {
+BreakableToken::Split
+BreakableBlockComment::getReflowSplit(unsigned LineIndex,
+                                      llvm::Regex &CommentPragmasRegex) const {
   if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
-  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-  Split Result = getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
-                                ColumnLimit);
-  // Result is relative to TrimmedContent. Adapt it relative to
-  // Content[LineIndex].
-  if (Result.first != StringRef::npos)
-    Result.first += Content[LineIndex].size() - TrimmedContent.size();
-  return Result;
+
+  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
 }
 
-unsigned
-BreakableBlockComment::getReflownColumn(StringRef Content, unsigned LineIndex,
-                                        unsigned PreviousEndColumn) const {
-  unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
-  // If this is the last line, it will carry around its '*/' postfix.
-  unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
-  // The line is composed of previous text, reflow prefix, reflown text and
-  // postfix.
-  unsigned ReflownColumn = StartColumn +
-                           encoding::columnWidthWithTabs(
-                               Content, StartColumn, Style.TabWidth, Encoding) +
-                           PostfixLength;
-  return ReflownColumn;
-}
-
-unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
-    unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn,
-    unsigned ColumnLimit, Split SplitBefore) const {
-  if (SplitBefore.first == StringRef::npos ||
-      // Block comment line contents contain the trailing whitespace after the
-      // decoration, so the need of left trim. Note that this behavior is
-      // consistent with the breaking of block comments where the indentation of
-      // a broken line is uniform across all the lines of the block comment.
-      SplitBefore.first + SplitBefore.second <
-          Content[LineIndex].ltrim().size()) {
-    // A piece of line, not the whole, gets reflown.
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  } else {
-    // The whole line gets reflown, need to check if we need to insert a break
-    // for the postfix or not.
-    StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-    unsigned ReflownColumn =
-        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
-    if (ReflownColumn <= ColumnLimit) {
-      return ReflownColumn;
-    }
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  }
-}
-
-bool BreakableBlockComment::introducesBreakBefore(unsigned LineIndex) const {
+bool BreakableBlockComment::introducesBreakBeforeToken() const {
   // A break is introduced when we want delimiters on newline.
-  return LineIndex == 0 && DelimitersOnNewline &&
+  return DelimitersOnNewline &&
          Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
 }
 
-void BreakableBlockComment::replaceWhitespaceBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    Split SplitBefore, WhitespaceManager &Whitespaces) {
+void BreakableBlockComment::reflow(unsigned LineIndex,
+                                   WhitespaceManager &Whitespaces) const {
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  // Here we need to reflow.
+  assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+         "Reflowing whitespace within a token");
+  // This is the offset of the end of the last line relative to the start of
+  // the token text in the token.
+  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+                                     Content[LineIndex - 1].size() -
+                                     tokenAt(LineIndex).TokenText.data();
+  unsigned WhitespaceLength = TrimmedContent.data() -
+                              tokenAt(LineIndex).TokenText.data() -
+                              WhitespaceOffsetInToken;
+  Whitespaces.replaceWhitespaceInToken(
+      tokenAt(LineIndex), WhitespaceOffsetInToken,
+      /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+      /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+      /*Spaces=*/0);
+}
+
+void BreakableBlockComment::adaptStartOfLine(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
   if (LineIndex == 0) {
     if (DelimitersOnNewline) {
-      // Since we're breaking af index 1 below, the break position and the
+      // Since we're breaking at index 1 below, the break position and the
       // break length are the same.
       size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
-      if (BreakLength != StringRef::npos) {
+      if (BreakLength != StringRef::npos)
         insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);
-        DelimitersOnNewline = true;
-      }
     }
     return;
   }
-  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-  if (SplitBefore.first != StringRef::npos) {
-    // Here we need to reflow.
-    assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
-           "Reflowing whitespace within a token");
-    // This is the offset of the end of the last line relative to the start of
-    // the token text in the token.
-    unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
-                                       Content[LineIndex - 1].size() -
-                                       tokenAt(LineIndex).TokenText.data();
-    unsigned WhitespaceLength = TrimmedContent.data() -
-                                tokenAt(LineIndex).TokenText.data() -
-                                WhitespaceOffsetInToken;
-    Whitespaces.replaceWhitespaceInToken(
-        tokenAt(LineIndex), WhitespaceOffsetInToken,
-        /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
-        /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
-        /*Spaces=*/0);
-    // Check if we need to also insert a break at the whitespace range.
-    // Note that we don't need a penalty for this break, since it doesn't change
-    // the total number of lines.
-    unsigned ReflownColumn =
-        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
-    if (ReflownColumn > ColumnLimit)
-      insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
-    return;
-  }
-
   // Here no reflow with the previous line will happen.
   // Fix the decoration of the line at LineIndex.
   StringRef Prefix = Decoration;
@@ -683,8 +621,7 @@
 }
 
 BreakableToken::Split
-BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset,
-                                             unsigned ColumnLimit) const {
+BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
   if (DelimitersOnNewline) {
     // Replace the trailing whitespace of the last line with a newline.
     // In case the last line is empty, the ending '*/' is already on its own
@@ -710,15 +647,6 @@
          !switchesFormatting(tokenAt(LineIndex));
 }
 
-unsigned
-BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
-                                             unsigned TailOffset) const {
-  // If we break, we always break at the predefined indent.
-  if (TailOffset != 0)
-    return IndentAtLineBreak;
-  return std::max(0, ContentColumn[LineIndex]);
-}
-
 BreakableLineCommentSection::BreakableLineCommentSection(
     const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
@@ -732,7 +660,8 @@
        CurrentTok = CurrentTok->Next) {
     LastLineTok = LineTok;
     StringRef TokenText(CurrentTok->TokenText);
-    assert(TokenText.startswith("//"));
+    assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+           "unsupported line comment prefix, '//' and '#' are supported");
     size_t FirstLineIndex = Lines.size();
     TokenText.split(Lines, "\n");
     Content.resize(Lines.size());
@@ -742,11 +671,13 @@
     Prefix.resize(Lines.size());
     OriginalPrefix.resize(Lines.size());
     for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      Lines[i] = Lines[i].ltrim(Blanks);
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =
-          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
-      assert(IndentPrefix.startswith("//"));
+          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+      assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+             "unsupported line comment prefix, '//' and '#' are supported");
       OriginalPrefix[i] = Prefix[i] = IndentPrefix;
       if (Lines[i].size() > Prefix[i].size() &&
           isAlphanumeric(Lines[i][Prefix[i].size()])) {
@@ -760,6 +691,9 @@
           Prefix[i] = "///< ";
         else if (Prefix[i] == "//!<")
           Prefix[i] = "//!< ";
+        else if (Prefix[i] == "#" &&
+                 Style.Language == FormatStyle::LK_TextProto)
+          Prefix[i] = "# ";
       }
 
       Tokens[i] = LineTok;
@@ -801,20 +735,25 @@
   }
 }
 
-unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  unsigned ContentStartColumn =
-      (TailOffset == 0 ? ContentColumn[LineIndex]
-                       : OriginalContentColumn[LineIndex]);
-  return ContentStartColumn + encoding::columnWidthWithTabs(
-                                  Content[LineIndex].substr(TailOffset, Length),
-                                  ContentStartColumn, Style.TabWidth, Encoding);
+unsigned
+BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
+                                            StringRef::size_type Length,
+                                            unsigned StartColumn) const {
+  return encoding::columnWidthWithTabs(
+      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
+      Encoding);
 }
 
-void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
-                                              unsigned TailOffset, Split Split,
-                                              WhitespaceManager &Whitespaces) {
+unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+                                                            bool Break) const {
+  if (Break)
+    return OriginalContentColumn[LineIndex];
+  return ContentColumn[LineIndex];
+}
+
+void BreakableLineCommentSection::insertBreak(
+    unsigned LineIndex, unsigned TailOffset, Split Split,
+    WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   // Compute the offset of the split relative to the beginning of the token
   // text.
@@ -833,34 +772,42 @@
       /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
 }
 
-BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    llvm::Regex &CommentPragmasRegex) const {
+BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
+    unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
   if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
-  return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
-                        ColumnLimit);
+
+  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+
+  // In a line comment section each line is a separate token; thus, after a
+  // split we replace all whitespace before the current line comment token
+  // (which does not need to be included in the split), plus the start of the
+  // line up to where the content starts.
+  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
 }
 
-unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
-    unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn,
-    unsigned ColumnLimit, Split SplitBefore) const {
-  if (SplitBefore.first == StringRef::npos ||
-      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
-    // A piece of line, not the whole line, gets reflown.
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  } else {
-    // The whole line gets reflown.
-    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
-    return StartColumn +
-           encoding::columnWidthWithTabs(Content[LineIndex], StartColumn,
-                                         Style.TabWidth, Encoding);
-  }
+void BreakableLineCommentSection::reflow(unsigned LineIndex,
+                                         WhitespaceManager &Whitespaces) const {
+  // Reflow happens between tokens. Replace the whitespace between the
+  // tokens by the empty string.
+  Whitespaces.replaceWhitespace(
+      *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+      /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+  // Replace the indent and prefix of the token with the reflow prefix.
+  unsigned WhitespaceLength =
+      Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+                                       /*Offset=*/0,
+                                       /*ReplaceChars=*/WhitespaceLength,
+                                       /*PreviousPostfix=*/"",
+                                       /*CurrentPrefix=*/ReflowPrefix,
+                                       /*InPPDirective=*/false,
+                                       /*Newlines=*/0,
+                                       /*Spaces=*/0);
 }
 
-void BreakableLineCommentSection::replaceWhitespaceBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    Split SplitBefore, WhitespaceManager &Whitespaces) {
+void BreakableLineCommentSection::adaptStartOfLine(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
   // If this is the first line of a token, we need to inform Whitespace Manager
   // about it: either adapt the whitespace range preceding it, or mark it as an
   // untouchable token.
@@ -868,44 +815,25 @@
   // // line 1 \
   // // line 2
   if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
-    if (SplitBefore.first != StringRef::npos) {
-      // Reflow happens between tokens. Replace the whitespace between the
-      // tokens by the empty string.
-      Whitespaces.replaceWhitespace(
-          *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
-          /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
-      // Replace the indent and prefix of the token with the reflow prefix.
-      unsigned WhitespaceLength =
-          Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
-      Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
-                                           /*Offset=*/0,
-                                           /*ReplaceChars=*/WhitespaceLength,
-                                           /*PreviousPostfix=*/"",
-                                           /*CurrentPrefix=*/ReflowPrefix,
-                                           /*InPPDirective=*/false,
-                                           /*Newlines=*/0,
-                                           /*Spaces=*/0);
-    } else {
-      // This is the first line for the current token, but no reflow with the
-      // previous token is necessary. However, we still may need to adjust the
-      // start column. Note that ContentColumn[LineIndex] is the expected
-      // content column after a possible update to the prefix, hence the prefix
-      // length change is included.
-      unsigned LineColumn =
-          ContentColumn[LineIndex] -
-          (Content[LineIndex].data() - Lines[LineIndex].data()) +
-          (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+    // This is the first line for the current token, but no reflow with the
+    // previous token is necessary. However, we still may need to adjust the
+    // start column. Note that ContentColumn[LineIndex] is the expected
+    // content column after a possible update to the prefix, hence the prefix
+    // length change is included.
+    unsigned LineColumn =
+        ContentColumn[LineIndex] -
+        (Content[LineIndex].data() - Lines[LineIndex].data()) +
+        (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
 
-      // We always want to create a replacement instead of adding an untouchable
-      // token, even if LineColumn is the same as the original column of the
-      // token. This is because WhitespaceManager doesn't align trailing
-      // comments if they are untouchable.
-      Whitespaces.replaceWhitespace(*Tokens[LineIndex],
-                                    /*Newlines=*/1,
-                                    /*Spaces=*/LineColumn,
-                                    /*StartOfTokenColumn=*/LineColumn,
-                                    /*InPPDirective=*/false);
-    }
+    // We always want to create a replacement instead of adding an untouchable
+    // token, even if LineColumn is the same as the original column of the
+    // token. This is because WhitespaceManager doesn't align trailing
+    // comments if they are untouchable.
+    Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                  /*Newlines=*/1,
+                                  /*Spaces=*/LineColumn,
+                                  /*StartOfTokenColumn=*/LineColumn,
+                                  /*InPPDirective=*/false);
   }
   if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
     // Adjust the prefix if necessary.
@@ -918,13 +846,6 @@
         tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
         /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
   }
-  // Add a break after a reflow split has been introduced, if necessary.
-  // Note that this break doesn't need to be penalized, since it doesn't change
-  // the number of lines.
-  if (SplitBefore.first != StringRef::npos &&
-      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
-    insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
-  }
 }
 
 void BreakableLineCommentSection::updateNextToken(LineState &State) const {
@@ -941,20 +862,17 @@
   if (Lines[LineIndex].startswith("//")) {
     IndentContent = Lines[LineIndex].substr(2);
   }
+  // FIXME: Decide whether we want to reflow non-regular indents:
+  // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
+  // OriginalPrefix[LineIndex-1]. That means we don't reflow
+  // // text that protrudes
+  // //    into text with different indent
+  // We do reflow in that case in block comments.
   return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
          mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
          !switchesFormatting(tokenAt(LineIndex)) &&
          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
 }
 
-unsigned
-BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
-                                                   unsigned TailOffset) const {
-  if (TailOffset != 0) {
-    return OriginalContentColumn[LineIndex];
-  }
-  return ContentColumn[LineIndex];
-}
-
 } // namespace format
 } // namespace clang
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index 8c2dc74..eba48f7 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -33,19 +33,32 @@
 
 struct FormatStyle;
 
-/// \brief Base class for strategies on how to break tokens.
+/// \brief Base class for tokens / ranges of tokens that can allow breaking
+/// within the tokens - for example, to avoid whitespace beyond the column
+/// limit, or to reflow text.
 ///
-/// This is organised around the concept of a \c Split, which is a whitespace
-/// range that signifies a position of the content of a token where a
-/// reformatting might be done. Operating with splits is divided into 3
-/// operations:
+/// Generally, a breakable token consists of logical lines, addressed by a line
+/// index. For example, in a sequence of line comments, each line comment is its
+/// own logical line; similarly, for a block comment, each line in the block
+/// comment is on its own logical line.
+///
+/// There are two methods to compute the layout of the token:
+/// - getRangeLength measures the number of columns needed for a range of text
+///   within a logical line, and
+/// - getContentStartColumn returns the start column at which we want the
+///   content of a logical line to start (potentially after introducing a line
+///   break).
+///
+/// The mechanism to adapt the layout of the breakable token is organised
+/// around the concept of a \c Split, which is a whitespace range that signifies
+/// a position of the content of a token where a reformatting might be done.
+///
+/// Operating with splits is divided into two operations:
 /// - getSplit, for finding a split starting at a position,
-/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
-///   of the content after a split has been used for breaking, and
 /// - insertBreak, for executing the split using a whitespace manager.
 ///
 /// There is a pair of operations that are used to compress a long whitespace
-/// range with a single space if that will bring the line lenght under the
+/// range with a single space if that will bring the line length under the
 /// column limit:
 /// - getLineLengthAfterCompression, for calculating the size in columns of the
 ///   line after a whitespace range has been compressed, and
@@ -56,14 +69,12 @@
 /// For tokens where the whitespace before each line needs to be also
 /// reformatted, for example for tokens supporting reflow, there are analogous
 /// operations that might be executed before the main line breaking occurs:
-/// - getSplitBefore, for finding a split such that the content preceding it
+/// - getReflowSplit, for finding a split such that the content preceding it
 ///   needs to be specially reflown,
+/// - reflow, for executing the split using a whitespace manager,
 /// - introducesBreakBefore, for checking if reformatting the beginning
 ///   of the content introduces a line break before it,
-/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
-///   of the remainder of the content after the beginning of the content has
-///   been reformatted, and
-/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// - adaptStartOfLine, for executing the reflow using a whitespace
 ///   manager.
 ///
 /// For tokens that require the whitespace after the last line to be
@@ -72,13 +83,9 @@
 /// that might be executed after the last line has been reformatted:
 /// - getSplitAfterLastLine, for finding a split after the last line that needs
 ///   to be reflown,
-/// - getLineLengthAfterSplitAfterLastLine, for calculating the line length in
-///   columns of the remainder of the token, and
 /// - replaceWhitespaceAfterLastLine, for executing the reflow using a
 ///   whitespace manager.
 ///
-/// FIXME: The interface seems set in stone, so we might want to just pull the
-/// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
 public:
   /// \brief Contains starting character index and length of split.
@@ -89,79 +96,103 @@
   /// \brief Returns the number of lines in this token in the original code.
   virtual unsigned getLineCount() const = 0;
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
+  /// \brief Returns the number of columns required to format the text in the
+  /// byte range [\p Offset, \p Offset \c + \p Length).
   ///
-  /// Note that previous breaks are not taken into account. \p TailOffset is
-  /// always specified from the start of the (original) line.
-  /// \p Length can be set to StringRef::npos, which means "to the end of line".
-  virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                          StringRef::size_type Length) const = 0;
+  /// \p Offset is the byte offset from the start of the content of the line
+  ///    at \p LineIndex.
+  ///
+  /// \p StartColumn is the column at which the text starts in the formatted
+  ///    file, needed to compute tab stops correctly.
+  virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                                  StringRef::size_type Length,
+                                  unsigned StartColumn) const = 0;
+
+  /// \brief Returns the number of columns required to format the text following
+  /// the byte \p Offset in the line \p LineIndex, including potentially
+  /// unbreakable sequences of tokens following after the end of the token.
+  ///
+  /// \p Offset is the byte offset from the start of the content of the line
+  ///    at \p LineIndex.
+  ///
+  /// \p StartColumn is the column at which the text starts in the formatted
+  ///    file, needed to compute tab stops correctly.
+  ///
+  /// For breakable tokens that never use extra space at the end of a line, this
+  /// is equivalent to getRangeLength with a Length of StringRef::npos.
+  virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                                      unsigned StartColumn) const {
+    return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+  }
+
+  /// \brief Returns the column at which content in line \p LineIndex starts,
+  /// assuming no reflow.
+  ///
+  /// If \p Break is true, returns the column at which the line should start
+  /// after the line break.
+  /// If \p Break is false, returns the column at which the line itself will
+  /// start.
+  virtual unsigned getContentStartColumn(unsigned LineIndex,
+                                         bool Break) const = 0;
 
   /// \brief Returns a range (offset, length) at which to break the line at
   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
-  /// violate \p ColumnLimit.
+  /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
+  /// the token is formatted starting at ContentStartColumn in the reformatted
+  /// file.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                         unsigned ColumnLimit,
+                         unsigned ColumnLimit, unsigned ContentStartColumn,
                          llvm::Regex &CommentPragmasRegex) const = 0;
 
   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           WhitespaceManager &Whitespaces) = 0;
+                           WhitespaceManager &Whitespaces) const = 0;
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
-  /// \p Split has been compressed into a single space.
-  unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
-                                         Split Split) const;
+  /// \brief Returns the number of columns needed to format
+  /// \p RemainingTokenColumns, assuming that Split is within the range measured
+  /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
+  /// to a single space.
+  unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
+                                     Split Split) const;
 
   /// \brief Replaces the whitespace range described by \p Split with a single
   /// space.
   virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
                                   Split Split,
-                                  WhitespaceManager &Whitespaces) = 0;
+                                  WhitespaceManager &Whitespaces) const = 0;
 
-  /// \brief Returns a whitespace range (offset, length) of the content at
-  /// \p LineIndex such that the content preceding this range needs to be
-  /// reformatted before any breaks are made to this line.
+  /// \brief Returns whether the token supports reflowing text.
+  virtual bool supportsReflow() const { return false; }
+
+  /// \brief Returns a whitespace range (offset, length) of the content at \p
+  /// LineIndex such that the content of that line is reflown to the end of the
+  /// previous one.
   ///
-  /// \p PreviousEndColumn is the end column of the previous line after
-  /// formatting.
+  /// Returning (StringRef::npos, 0) indicates reflowing is not possible.
   ///
-  /// A result having offset == StringRef::npos means that no piece of the line
-  /// needs to be reformatted before any breaks are made.
-  virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit,
+  /// The range will include any whitespace preceding the specified line's
+  /// content.
+  ///
+  /// If the split is not contained within one token, for example when reflowing
+  /// line comments, returns (0, <length>).
+  virtual Split getReflowSplit(unsigned LineIndex,
                                llvm::Regex &CommentPragmasRegex) const {
     return Split(StringRef::npos, 0);
   }
 
-  /// \brief Returns if a break before the content at \p LineIndex will be
-  /// inserted after the whitespace preceding the content has been reformatted.
-  virtual bool introducesBreakBefore(unsigned LineIndex) const {
+  /// \brief Reflows the current line into the end of the previous one.
+  virtual void reflow(unsigned LineIndex,
+                      WhitespaceManager &Whitespaces) const {}
+
+  /// \brief Returns whether there will be a line break at the start of the
+  /// token.
+  virtual bool introducesBreakBeforeToken() const {
     return false;
   }
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex after the content preceding the whitespace range specified
-  /// \p SplitBefore has been reformatted, but before any breaks are made to
-  /// this line.
-  virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                                 unsigned TailOffset,
-                                                 unsigned PreviousEndColumn,
-                                                 unsigned ColumnLimit,
-                                                 Split SplitBefore) const {
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  }
-
   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
-  /// Performs a reformatting of the content at \p LineIndex preceding the
-  /// whitespace range \p SplitBefore.
-  virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned PreviousEndColumn,
-                                       unsigned ColumnLimit, Split SplitBefore,
-                                       WhitespaceManager &Whitespaces) {}
+  virtual void adaptStartOfLine(unsigned LineIndex,
+                                WhitespaceManager &Whitespaces) const {}
 
   /// \brief Returns a whitespace range (offset, length) of the content at
   /// the last line that needs to be reformatted after the last line has been
@@ -169,28 +200,15 @@
   ///
   /// A result having offset == StringRef::npos means that no reformat is
   /// necessary.
-  virtual Split getSplitAfterLastLine(unsigned TailOffset,
-                                      unsigned ColumnLimit) const {
+  virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
     return Split(StringRef::npos, 0);
   }
 
-  /// \brief Returns the number of columns required to format the piece token
-  /// after the last line after a reformat of the whitespace range \p
-  /// \p SplitAfterLastLine on the last line has been performed.
-  virtual unsigned
-  getLineLengthAfterSplitAfterLastLine(unsigned TailOffset,
-                                       Split SplitAfterLastLine) const {
-    return getLineLengthAfterSplit(getLineCount() - 1,
-                                   TailOffset + SplitAfterLastLine.first +
-                                       SplitAfterLastLine.second,
-                                   StringRef::npos);
-  }
-
   /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
   /// after the last line has been formatted by performing a reformatting.
-  virtual void replaceWhitespaceAfterLastLine(unsigned TailOffset,
-                                              Split SplitAfterLastLine,
-                                              WhitespaceManager &Whitespaces) {
+  void replaceWhitespaceAfterLastLine(unsigned TailOffset,
+                                      Split SplitAfterLastLine,
+                                      WhitespaceManager &Whitespaces) const {
     insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
                 Whitespaces);
   }
@@ -212,21 +230,33 @@
   const FormatStyle &Style;
 };
 
-/// \brief Base class for single line tokens that can be broken.
-///
-/// \c getSplit() needs to be implemented by child classes.
-class BreakableSingleLineToken : public BreakableToken {
+class BreakableStringLiteral : public BreakableToken {
 public:
+  /// \brief Creates a breakable token for a single line string literal.
+  ///
+  /// \p StartColumn specifies the column in which the token will start
+  /// after formatting.
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
+                         StringRef Prefix, StringRef Postfix,
+                         unsigned UnbreakableTailLength, bool InPPDirective,
+                         encoding::Encoding Encoding, const FormatStyle &Style);
+
+  Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+                 unsigned ReflowColumn,
+                 llvm::Regex &CommentPragmasRegex) const override;
+  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                   WhitespaceManager &Whitespaces) const override;
+  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+                          WhitespaceManager &Whitespaces) const override {}
   unsigned getLineCount() const override;
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                              unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
 
 protected:
-  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
-                           StringRef Prefix, StringRef Postfix,
-                           bool InPPDirective, encoding::Encoding Encoding,
-                           const FormatStyle &Style);
-
   // The column in which the token starts.
   unsigned StartColumn;
   // The prefix a line needs after a break in the token.
@@ -235,25 +265,9 @@
   StringRef Postfix;
   // The token text excluding the prefix and postfix.
   StringRef Line;
-};
-
-class BreakableStringLiteral : public BreakableSingleLineToken {
-public:
-  /// \brief Creates a breakable token for a single line string literal.
-  ///
-  /// \p StartColumn specifies the column in which the token will start
-  /// after formatting.
-  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         StringRef Prefix, StringRef Postfix,
-                         bool InPPDirective, encoding::Encoding Encoding,
-                         const FormatStyle &Style);
-
-  Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
-                 llvm::Regex &CommentPragmasRegex) const override;
-  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                          WhitespaceManager &Whitespaces) override {}
+  // Length of the sequence of tokens after this string literal that cannot
+  // contain line breaks.
+  unsigned UnbreakableTailLength;
 };
 
 class BreakableComment : public BreakableToken {
@@ -267,21 +281,15 @@
                    const FormatStyle &Style);
 
 public:
+  bool supportsReflow() const override { return true; }
   unsigned getLineCount() const override;
   Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+                 unsigned ReflowColumn,
                  llvm::Regex &CommentPragmasRegex) const override;
   void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                          WhitespaceManager &Whitespaces) override;
+                          WhitespaceManager &Whitespaces) const override;
 
 protected:
-  virtual unsigned getContentStartColumn(unsigned LineIndex,
-                                         unsigned TailOffset) const = 0;
-
-  // Returns a split that divides Text into a left and right parts, such that
-  // the left part is suitable for reflowing after PreviousEndColumn.
-  Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
-                       unsigned PreviousEndColumn, unsigned ColumnLimit) const;
-
   // Returns the token containing the line at LineIndex.
   const FormatToken &tokenAt(unsigned LineIndex) const;
 
@@ -340,24 +348,22 @@
                         bool InPPDirective, encoding::Encoding Encoding,
                         const FormatStyle &Style);
 
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                              unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit,
+                   WhitespaceManager &Whitespaces) const override;
+  Split getReflowSplit(unsigned LineIndex,
                        llvm::Regex &CommentPragmasRegex) const override;
-  bool introducesBreakBefore(unsigned LineIndex) const override;
-  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                         unsigned TailOffset,
-                                         unsigned PreviousEndColumn,
-                                         unsigned ColumnLimit,
-                                         Split SplitBefore) const override;
-  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit, Split SplitBefore,
-                               WhitespaceManager &Whitespaces) override;
-  Split getSplitAfterLastLine(unsigned TailOffset,
-                              unsigned ColumnLimit) const override;
+  void reflow(unsigned LineIndex,
+              WhitespaceManager &Whitespaces) const override;
+  bool introducesBreakBeforeToken() const override;
+  void adaptStartOfLine(unsigned LineIndex,
+                        WhitespaceManager &Whitespaces) const override;
+  Split getSplitAfterLastLine(unsigned TailOffset) const override;
 
   bool mayReflow(unsigned LineIndex,
                  llvm::Regex &CommentPragmasRegex) const override;
@@ -373,14 +379,6 @@
   // considered part of the text).
   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
 
-  // Computes the end column if the full Content from LineIndex gets reflown
-  // after PreviousEndColumn.
-  unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
-                            unsigned PreviousEndColumn) const;
-
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const override;
-
   // The column at which the text of a broken line should start.
   // Note that an optional decoration would go before that column.
   // IndentAtLineBreak is a uniform position for all lines in a block comment,
@@ -407,6 +405,10 @@
   // If true, make sure that the opening '/**' and the closing '*/' ends on a
   // line of itself. Styles like jsdoc require this for multiline comments.
   bool DelimitersOnNewline;
+
+  // Length of the sequence of tokens after this string literal that cannot
+  // contain line breaks.
+  unsigned UnbreakableTailLength;
 };
 
 class BreakableLineCommentSection : public BreakableComment {
@@ -416,29 +418,23 @@
                               bool InPPDirective, encoding::Encoding Encoding,
                               const FormatStyle &Style);
 
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit,
+                   WhitespaceManager &Whitespaces) const override;
+  Split getReflowSplit(unsigned LineIndex,
                        llvm::Regex &CommentPragmasRegex) const override;
-  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                         unsigned TailOffset,
-                                         unsigned PreviousEndColumn,
-                                         unsigned ColumnLimit,
-                                         Split SplitBefore) const override;
-  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit, Split SplitBefore,
-                               WhitespaceManager &Whitespaces) override;
+  void reflow(unsigned LineIndex,
+              WhitespaceManager &Whitespaces) const override;
+  void adaptStartOfLine(unsigned LineIndex,
+                        WhitespaceManager &Whitespaces) const override;
   void updateNextToken(LineState &State) const override;
   bool mayReflow(unsigned LineIndex,
                  llvm::Regex &CommentPragmasRegex) const override;
 
 private:
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const override;
-
   // OriginalPrefix[i] contains the original prefix of line i, including
   // trailing whitespace before the start of the content. The indentation
   // preceding the prefix is not included.
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index b57b8de..d28254d 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -14,6 +14,7 @@
 
 #include "ContinuationIndenter.h"
 #include "BreakableToken.h"
+#include "FormatInternal.h"
 #include "WhitespaceManager.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/SourceManager.h"
@@ -76,6 +77,84 @@
            (LessTok.Previous && LessTok.Previous->is(tok::equal))));
 }
 
+// Returns the delimiter of a raw string literal, or None if TokenText is not
+// the text of a raw string literal. The delimiter could be the empty string.
+// For example, the delimiter of R"deli(cont)deli" is deli.
+static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
+  if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
+      || !TokenText.startswith("R\"") || !TokenText.endswith("\""))
+    return None;
+
+  // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
+  // size at most 16 by the standard, so the first '(' must be among the first
+  // 19 bytes.
+  size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
+  if (LParenPos == StringRef::npos)
+    return None;
+  StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
+
+  // Check that the string ends in ')Delimiter"'.
+  size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
+  if (TokenText[RParenPos] != ')')
+    return None;
+  if (!TokenText.substr(RParenPos + 1).startswith(Delimiter))
+    return None;
+  return Delimiter;
+}
+
+// Returns the canonical delimiter for \p Language, or the empty string if no
+// canonical delimiter is specified.
+static StringRef
+getCanonicalRawStringDelimiter(const FormatStyle &Style,
+                               FormatStyle::LanguageKind Language) {
+  for (const auto &Format : Style.RawStringFormats) {
+    if (Format.Language == Language)
+      return StringRef(Format.CanonicalDelimiter);
+  }
+  return "";
+}
+
+RawStringFormatStyleManager::RawStringFormatStyleManager(
+    const FormatStyle &CodeStyle) {
+  for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
+    llvm::Optional<FormatStyle> LanguageStyle =
+        CodeStyle.GetLanguageStyle(RawStringFormat.Language);
+    if (!LanguageStyle) {
+      FormatStyle PredefinedStyle;
+      if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
+                              RawStringFormat.Language, &PredefinedStyle)) {
+        PredefinedStyle = getLLVMStyle();
+        PredefinedStyle.Language = RawStringFormat.Language;
+      }
+      LanguageStyle = PredefinedStyle;
+    }
+    LanguageStyle->ColumnLimit = CodeStyle.ColumnLimit;
+    for (StringRef Delimiter : RawStringFormat.Delimiters) {
+      DelimiterStyle.insert({Delimiter, *LanguageStyle});
+    }
+    for (StringRef EnclosingFunction : RawStringFormat.EnclosingFunctions) {
+      EnclosingFunctionStyle.insert({EnclosingFunction, *LanguageStyle});
+    }
+  }
+}
+
+llvm::Optional<FormatStyle>
+RawStringFormatStyleManager::getDelimiterStyle(StringRef Delimiter) const {
+  auto It = DelimiterStyle.find(Delimiter);
+  if (It == DelimiterStyle.end())
+    return None;
+  return It->second;
+}
+
+llvm::Optional<FormatStyle>
+RawStringFormatStyleManager::getEnclosingFunctionStyle(
+    StringRef EnclosingFunction) const {
+  auto It = EnclosingFunctionStyle.find(EnclosingFunction);
+  if (It == EnclosingFunctionStyle.end())
+    return None;
+  return It->second;
+}
+
 ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
                                            const AdditionalKeywords &Keywords,
                                            const SourceManager &SourceMgr,
@@ -85,14 +164,18 @@
     : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
       Whitespaces(Whitespaces), Encoding(Encoding),
       BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
-      CommentPragmasRegex(Style.CommentPragmas) {}
+      CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
 
 LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+                                                unsigned FirstStartColumn,
                                                 const AnnotatedLine *Line,
                                                 bool DryRun) {
   LineState State;
   State.FirstIndent = FirstIndent;
-  State.Column = FirstIndent;
+  if (FirstStartColumn && Line->First->NewlinesBefore == 0)
+    State.Column = FirstStartColumn;
+  else
+    State.Column = FirstIndent;
   // With preprocessor directive indentation, the line starts on column 0
   // since it's indented after the hash, but FirstIndent is set to the
   // preprocessor indent.
@@ -117,6 +200,7 @@
     // global scope.
     State.Stack.back().AvoidBinPacking = true;
     State.Stack.back().BreakBeforeParameter = true;
+    State.Stack.back().AlignColons = false;
   }
 
   // The first token has already been indented and thus consumed.
@@ -183,6 +267,11 @@
     return true;
   if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
     return true;
+  if (Style.Language == FormatStyle::LK_ObjC &&
+      Current.ObjCSelectorNameParts > 1 &&
+      Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) {
+    return true;
+  }
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
        (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
         Style.isCpp() &&
@@ -271,7 +360,8 @@
     // We need special cases for ">>" which we have split into two ">" while
     // lexing in order to make template parsing easier.
     bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
-                         Previous.getPrecedence() == prec::Equality) &&
+                         Previous.getPrecedence() == prec::Equality ||
+                         Previous.getPrecedence() == prec::Spaceship) &&
                         Previous.Previous &&
                         Previous.Previous->isNot(TT_BinaryOperator); // For >>.
     bool LHSIsBinaryExpr =
@@ -484,7 +574,8 @@
        (P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
       !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
       P->getPrecedence() != prec::Assignment &&
-      P->getPrecedence() != prec::Relational) {
+      P->getPrecedence() != prec::Relational &&
+      P->getPrecedence() != prec::Spaceship) {
     bool BreakBeforeOperator =
         P->MustBreakBefore || P->is(tok::lessless) ||
         (P->is(TT_BinaryOperator) &&
@@ -611,7 +702,8 @@
                  ? std::max(State.Stack.back().Indent,
                             State.FirstIndent + Style.ContinuationIndentWidth)
                  : State.Stack.back().Indent) +
-            NextNonComment->LongestObjCSelectorName;
+            std::max(NextNonComment->LongestObjCSelectorName,
+                     NextNonComment->ColumnWidth);
       }
     } else if (State.Stack.back().AlignColons &&
                State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) {
@@ -649,8 +741,18 @@
     State.Stack.back().BreakBeforeParameter = false;
 
   if (!DryRun) {
+    unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;
+    if (Current.is(tok::r_brace) && Current.MatchingParen &&
+        // Only strip trailing empty lines for l_braces that have children, i.e.
+        // for function expressions (lambdas, arrows, etc).
+        !Current.MatchingParen->Children.empty()) {
+      // lambdas and arrow functions are expressions, thus their r_brace is not
+      // on its own line, and thus not covered by UnwrappedLineFormatter's logic
+      // about removing empty lines on closing blocks. Special case them here.
+      MaxEmptyLinesToKeep = 1;
+    }
     unsigned Newlines = std::max(
-        1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+        1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
     bool ContinuePPDirective =
         State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
     Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
@@ -762,7 +864,7 @@
       (Current.Next->is(TT_DictLiteral) ||
        ((Style.Language == FormatStyle::LK_Proto ||
          Style.Language == FormatStyle::LK_TextProto) &&
-        Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace))))
+        Current.Next->isOneOf(tok::less, tok::l_brace))))
     return State.Stack.back().Indent;
   if (NextNonComment->is(TT_ObjCStringLiteral) &&
       State.StartOfStringLiteral != 0)
@@ -800,7 +902,8 @@
                   ? std::max(State.Stack.back().Indent,
                              State.FirstIndent + Style.ContinuationIndentWidth)
                   : State.Stack.back().Indent) +
-             NextNonComment->LongestObjCSelectorName -
+             std::max(NextNonComment->LongestObjCSelectorName,
+                      NextNonComment->ColumnWidth) -
              NextNonComment->ColumnWidth;
     }
     if (!State.Stack.back().AlignColons)
@@ -840,6 +943,8 @@
   if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() &&
       !Current.isOneOf(tok::colon, tok::comment))
     return ContinuationIndent;
+  if (Current.is(TT_ProtoExtensionLSquare))
+    return State.Stack.back().Indent;
   if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment &&
       PreviousNonComment->isNot(tok::r_brace))
     // Ensure that we fall back to the continuation indent width instead of
@@ -956,8 +1061,8 @@
 
   moveStatePastFakeLParens(State, Newline);
   moveStatePastScopeCloser(State);
-  bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
-                                 !State.Stack.back().NoLineBreakInOperand;
+  bool AllowBreak = !State.Stack.back().NoLineBreak &&
+                    !State.Stack.back().NoLineBreakInOperand;
   moveStatePastScopeOpener(State, Newline);
   moveStatePastFakeRParens(State);
 
@@ -971,13 +1076,9 @@
 
   State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
-  unsigned Penalty = 0;
-  if (CanBreakProtrudingToken)
-    Penalty = breakProtrudingToken(Current, State, DryRun);
-  if (State.Column > getColumnLimit(State)) {
-    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
-    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
-  }
+
+  unsigned Penalty =
+      handleEndOfLine(Current, State, DryRun, AllowBreak);
 
   if (Current.Role)
     Current.Role->formatFromToken(State, this, DryRun);
@@ -1120,7 +1221,6 @@
     // void SomeFunction(vector<  // break
     //                       int> v);
     // FIXME: We likely want to do this for more combinations of brackets.
-    // Verify that it is wanted for ObjC, too.
     if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) {
       NewIndent = std::max(NewIndent, State.Stack.back().Indent);
       LastSpace = std::max(LastSpace, State.Stack.back().Indent);
@@ -1131,9 +1231,20 @@
         Current.MatchingParen->getPreviousNonComment() &&
         Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
 
+    // If ObjCBinPackProtocolList is unspecified, fall back to BinPackParameters
+    // for backwards compatibility.
+    bool ObjCBinPackProtocolList =
+        (Style.ObjCBinPackProtocolList == FormatStyle::BPS_Auto &&
+         Style.BinPackParameters) ||
+        Style.ObjCBinPackProtocolList == FormatStyle::BPS_Always;
+
+    bool BinPackDeclaration =
+        (State.Line->Type != LT_ObjCDecl && Style.BinPackParameters) ||
+        (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList);
+
     AvoidBinPacking =
         (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) ||
-        (State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
+        (State.Line->MustBeDeclaration && !BinPackDeclaration) ||
         (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
         (Style.ExperimentalAutoDetectBinPacking &&
          (Current.PackingKind == PPK_OnePerLine ||
@@ -1179,6 +1290,9 @@
   State.Stack.back().NestedBlockIndent = NestedBlockIndent;
   State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
   State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1;
+  State.Stack.back().IsInsideObjCArrayLiteral =
+      Current.is(TT_ArrayInitializerLSquare) && Current.Previous &&
+      Current.Previous->is(tok::at);
 }
 
 void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
@@ -1191,7 +1305,8 @@
   if (State.Stack.size() > 1 &&
       (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) ||
        (Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
-       State.NextToken->is(TT_TemplateCloser)))
+       State.NextToken->is(TT_TemplateCloser) ||
+       (Current.is(tok::greater) && Current.is(TT_DictLiteral))))
     State.Stack.pop_back();
 
   if (Current.is(tok::r_square)) {
@@ -1216,11 +1331,133 @@
   State.Stack.back().BreakBeforeParameter = true;
 }
 
+static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
+                                     unsigned TabWidth,
+                                     encoding::Encoding Encoding) {
+  size_t LastNewlinePos = Text.find_last_of("\n");
+  if (LastNewlinePos == StringRef::npos) {
+    return StartColumn +
+           encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
+  } else {
+    return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
+                                         /*StartColumn=*/0, TabWidth, Encoding);
+  }
+}
+
+unsigned ContinuationIndenter::reformatRawStringLiteral(
+    const FormatToken &Current, LineState &State,
+    const FormatStyle &RawStringStyle, bool DryRun) {
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
+  StringRef NewDelimiter =
+      getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
+  if (NewDelimiter.empty() || OldDelimiter.empty())
+    NewDelimiter = OldDelimiter;
+  // The text of a raw string is between the leading 'R"delimiter(' and the
+  // trailing 'delimiter)"'.
+  unsigned OldPrefixSize = 3 + OldDelimiter.size();
+  unsigned OldSuffixSize = 2 + OldDelimiter.size();
+  // We create a virtual text environment which expects a null-terminated
+  // string, so we cannot use StringRef.
+  std::string RawText =
+      Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize);
+  if (NewDelimiter != OldDelimiter) {
+    // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
+    // raw string.
+    std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
+    if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
+      NewDelimiter = OldDelimiter;
+  }
+
+  unsigned NewPrefixSize = 3 + NewDelimiter.size();
+  unsigned NewSuffixSize = 2 + NewDelimiter.size();
+
+  // The first start column is the column the raw text starts after formatting.
+  unsigned FirstStartColumn = StartColumn + NewPrefixSize;
+
+  // The next start column is the intended indentation a line break inside
+  // the raw string at level 0. It is determined by the following rules:
+  //   - if the content starts on newline, it is one level more than the current
+  //     indent, and
+  //   - if the content does not start on a newline, it is the first start
+  //     column.
+  // These rules have the advantage that the formatted content both does not
+  // violate the rectangle rule and visually flows within the surrounding
+  // source.
+  bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
+  unsigned NextStartColumn = ContentStartsOnNewline
+                                 ? State.Stack.back().Indent + Style.IndentWidth
+                                 : FirstStartColumn;
+
+  // The last start column is the column the raw string suffix starts if it is
+  // put on a newline.
+  // The last start column is the intended indentation of the raw string postfix
+  // if it is put on a newline. It is determined by the following rules:
+  //   - if the raw string prefix starts on a newline, it is the column where
+  //     that raw string prefix starts, and
+  //   - if the raw string prefix does not start on a newline, it is the current
+  //     indent.
+  unsigned LastStartColumn = Current.NewlinesBefore
+                                 ? FirstStartColumn - NewPrefixSize
+                                 : State.Stack.back().Indent;
+
+  std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
+      RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
+      FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
+      /*Status=*/nullptr);
+
+  auto NewCode = applyAllReplacements(RawText, Fixes.first);
+  tooling::Replacements NoFixes;
+  if (!NewCode) {
+    State.Column += Current.ColumnWidth;
+    return 0;
+  }
+  if (!DryRun) {
+    if (NewDelimiter != OldDelimiter) {
+      // In 'R"delimiter(...', the delimiter starts 2 characters after the start
+      // of the token.
+      SourceLocation PrefixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(2);
+      auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (PrefixErr) {
+        llvm::errs()
+            << "Failed to update the prefix delimiter of a raw string: "
+            << llvm::toString(std::move(PrefixErr)) << "\n";
+      }
+      // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
+      // position length - 1 - |delimiter|.
+      SourceLocation SuffixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
+                                                     1 - OldDelimiter.size());
+      auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (SuffixErr) {
+        llvm::errs()
+            << "Failed to update the suffix delimiter of a raw string: "
+            << llvm::toString(std::move(SuffixErr)) << "\n";
+      }
+    }
+    SourceLocation OriginLoc =
+        Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
+    for (const tooling::Replacement &Fix : Fixes.first) {
+      auto Err = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
+          Fix.getLength(), Fix.getReplacementText()));
+      if (Err) {
+        llvm::errs() << "Failed to reformat raw string: "
+                     << llvm::toString(std::move(Err)) << "\n";
+      }
+    }
+  }
+  unsigned RawLastLineEndColumn = getLastLineEndColumn(
+      *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
+  State.Column = RawLastLineEndColumn + NewSuffixSize;
+  return Fixes.second;
+}
+
 unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
                                                  LineState &State) {
-  if (!Current.IsMultiline)
-    return 0;
-
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
@@ -1235,33 +1472,108 @@
   return 0;
 }
 
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool DryRun) {
-  // Don't break multi-line tokens other than block comments. Instead, just
-  // update the state.
-  if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
-    return addMultilineToken(Current, State);
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+                                               LineState &State, bool DryRun,
+                                               bool AllowBreak) {
+  unsigned Penalty = 0;
+  // Compute the raw string style to use in case this is a raw string literal
+  // that can be reformatted.
+  auto RawStringStyle = getRawStringStyle(Current, State);
+  if (RawStringStyle) {
+    Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+  } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+    // Don't break multi-line tokens other than block comments and raw string
+    // literals. Instead, just update the state.
+    Penalty = addMultilineToken(Current, State);
+  } else if (State.Line->Type != LT_ImportStatement) {
+    // We generally don't break import statements.
+    LineState OriginalState = State;
 
-  // Don't break implicit string literals or import statements.
-  if (Current.is(TT_ImplicitStringLiteral) ||
-      State.Line->Type == LT_ImportStatement)
-    return 0;
+    // Whether we force the reflowing algorithm to stay strictly within the
+    // column limit.
+    bool Strict = false;
+    // Whether the first non-strict attempt at reflowing did intentionally
+    // exceed the column limit.
+    bool Exceeded = false;
+    std::tie(Penalty, Exceeded) = breakProtrudingToken(
+        Current, State, AllowBreak, /*DryRun=*/true, Strict);
+    if (Exceeded) {
+      // If non-strict reflowing exceeds the column limit, try whether strict
+      // reflowing leads to an overall lower penalty.
+      LineState StrictState = OriginalState;
+      unsigned StrictPenalty =
+          breakProtrudingToken(Current, StrictState, AllowBreak,
+                               /*DryRun=*/true, /*Strict=*/true)
+              .first;
+      Strict = StrictPenalty <= Penalty;
+      if (Strict) {
+        Penalty = StrictPenalty;
+        State = StrictState;
+      }
+    }
+    if (!DryRun) {
+      // If we're not in dry-run mode, apply the changes with the decision on
+      // strictness made above.
+      breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
+                           Strict);
+    }
+  }
+  if (State.Column > getColumnLimit(State)) {
+    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+  }
+  return Penalty;
+}
 
-  if (!Current.isStringLiteral() && !Current.is(tok::comment))
-    return 0;
+// Returns the enclosing function name of a token, or the empty string if not
+// found.
+static StringRef getEnclosingFunctionName(const FormatToken &Current) {
+  // Look for: 'function(' or 'function<templates>(' before Current.
+  auto Tok = Current.getPreviousNonComment();
+  if (!Tok || !Tok->is(tok::l_paren))
+    return "";
+  Tok = Tok->getPreviousNonComment();
+  if (!Tok)
+    return "";
+  if (Tok->is(TT_TemplateCloser)) {
+    Tok = Tok->MatchingParen;
+    if (Tok)
+      Tok = Tok->getPreviousNonComment();
+  }
+  if (!Tok || !Tok->is(tok::identifier))
+    return "";
+  return Tok->TokenText;
+}
 
-  std::unique_ptr<BreakableToken> Token;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+                                        const LineState &State) {
+  if (!Current.isStringLiteral())
+    return None;
+  auto Delimiter = getRawStringDelimiter(Current.TokenText);
+  if (!Delimiter)
+    return None;
+  auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter);
+  if (!RawStringStyle)
+    RawStringStyle = RawStringFormats.getEnclosingFunctionStyle(
+        getEnclosingFunctionName(Current));
+  if (!RawStringStyle)
+    return None;
+  RawStringStyle->ColumnLimit = getColumnLimit(State);
+  return RawStringStyle;
+}
+
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+    const FormatToken &Current, LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  unsigned ColumnLimit = getColumnLimit(State);
-
   if (Current.isStringLiteral()) {
     // FIXME: String literal breaking is currently disabled for Java and JS, as
     // it requires strings to be merged using "+" which we don't support.
     if (Style.Language == FormatStyle::LK_Java ||
         Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals)
-      return 0;
+        !Style.BreakStringLiterals ||
+        !AllowBreak)
+      return nullptr;
 
     // Don't break string literals inside preprocessor directives (except for
     // #define directives, as their contents are stored in separate lines and
@@ -1269,11 +1581,16 @@
     // This way we avoid breaking code with line directives and unknown
     // preprocessor directives that contain long string literals.
     if (State.Line->Type == LT_PreprocessorDirective)
-      return 0;
+      return nullptr;
     // Exempts unterminated string literals from line breaking. The user will
     // likely want to terminate the string before any line breaking is done.
     if (Current.IsUnterminatedLiteral)
-      return 0;
+      return nullptr;
+    // Don't break string literals inside Objective-C array literals (doing so
+    // raises the warning -Wobjc-string-concatenation).
+    if (State.Stack.back().IsInsideObjCArrayLiteral) {
+      return nullptr;
+    }
 
     StringRef Text = Current.TokenText;
     StringRef Prefix;
@@ -1288,126 +1605,366 @@
           Text.startswith(Prefix = "u8\"") ||
           Text.startswith(Prefix = "L\""))) ||
         (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
-                                             Postfix, State.Line->InPPDirective,
-                                             Encoding, Style));
-    } else {
-      return 0;
+      // We need this to address the case where there is an unbreakable tail
+      // only if certain other formatting decisions have been taken. The
+      // UnbreakableTailLength of Current is an overapproximation is that case
+      // and we need to be correct here.
+      unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
+                                           ? 0
+                                           : Current.UnbreakableTailLength;
+      return llvm::make_unique<BreakableStringLiteral>(
+          Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
+          State.Line->InPPDirective, Encoding, Style);
     }
   } else if (Current.is(TT_BlockComment)) {
     if (!Style.ReflowComments ||
         // If a comment token switches formatting, like
         // /* clang-format on */, we don't want to break it further,
         // but we may still want to adjust its indentation.
-        switchesFormatting(Current))
-      return addMultilineToken(Current, State);
-    Token.reset(new BreakableBlockComment(
+        switchesFormatting(Current)) {
+      return nullptr;
+    }
+    return llvm::make_unique<BreakableBlockComment>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding, Style));
+        State.Line->InPPDirective, Encoding, Style);
   } else if (Current.is(TT_LineComment) &&
              (Current.Previous == nullptr ||
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
     if (!Style.ReflowComments ||
         CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
         switchesFormatting(Current))
-      return 0;
-    Token.reset(new BreakableLineCommentSection(
+      return nullptr;
+    return llvm::make_unique<BreakableLineCommentSection>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        /*InPPDirective=*/false, Encoding, Style));
+        /*InPPDirective=*/false, Encoding, Style);
+  }
+  return nullptr;
+}
+
+std::pair<unsigned, bool>
+ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                           LineState &State, bool AllowBreak,
+                                           bool DryRun, bool Strict) {
+  std::unique_ptr<const BreakableToken> Token =
+      createBreakableToken(Current, State, AllowBreak);
+  if (!Token)
+    return {0, false};
+  assert(Token->getLineCount() > 0);
+  unsigned ColumnLimit = getColumnLimit(State);
+  if (Current.is(TT_LineComment)) {
     // We don't insert backslashes when breaking line comments.
     ColumnLimit = Style.ColumnLimit;
-  } else {
-    return 0;
   }
   if (Current.UnbreakableTailLength >= ColumnLimit)
-    return 0;
-
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
-  bool BreakInserted = false;
+    return {0, false};
+  // ColumnWidth was already accounted into State.Column before calling
+  // breakProtrudingToken.
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  unsigned NewBreakPenalty = Current.isStringLiteral()
+                                 ? Style.PenaltyBreakString
+                                 : Style.PenaltyBreakComment;
+  // Stores whether we intentionally decide to let a line exceed the column
+  // limit.
+  bool Exceeded = false;
+  // Stores whether we introduce a break anywhere in the token.
+  bool BreakInserted = Token->introducesBreakBeforeToken();
+  // Store whether we inserted a new line break at the end of the previous
+  // logical line.
+  bool NewBreakBefore = false;
   // We use a conservative reflowing strategy. Reflow starts after a line is
   // broken or the corresponding whitespace compressed. Reflow ends as soon as a
   // line that doesn't get reflown with the previous line is reached.
-  bool ReflowInProgress = false;
-  unsigned Penalty = 0;
-  unsigned RemainingTokenColumns = 0;
+  bool Reflow = false;
+  // Keep track of where we are in the token:
+  // Where we are in the content of the current logical line.
   unsigned TailOffset = 0;
+  // The column number we're currently at.
+  unsigned ContentStartColumn =
+      Token->getContentStartColumn(0, /*Break=*/false);
+  // The number of columns left in the current logical line after TailOffset.
+  unsigned RemainingTokenColumns =
+      Token->getRemainingLength(0, TailOffset, ContentStartColumn);
+  // Adapt the start of the token, for example indent.
+  if (!DryRun)
+    Token->adaptStartOfLine(0, Whitespaces);
+
+  unsigned Penalty = 0;
+  DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn
+                     << ".\n");
   for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
        LineIndex != EndIndex; ++LineIndex) {
-    BreakableToken::Split SplitBefore(StringRef::npos, 0);
-    if (ReflowInProgress) {
-      SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
-                                          RemainingSpace, CommentPragmasRegex);
-    }
-    ReflowInProgress = SplitBefore.first != StringRef::npos;
-    TailOffset =
-        ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
-    BreakInserted = BreakInserted || Token->introducesBreakBefore(LineIndex);
-    if (!DryRun)
-      Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
-                                     RemainingSpace, SplitBefore, Whitespaces);
-    RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
-        LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
-    while (RemainingTokenColumns > RemainingSpace) {
-      BreakableToken::Split Split = Token->getSplit(
-          LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+    DEBUG(llvm::dbgs() << "  Line: " << LineIndex << " (Reflow: " << Reflow
+                       << ")\n");
+    NewBreakBefore = false;
+    // If we did reflow the previous line, we'll try reflowing again. Otherwise
+    // we'll start reflowing if the current line is broken or whitespace is
+    // compressed.
+    bool TryReflow = Reflow;
+    // Break the current token until we can fit the rest of the line.
+    while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+      DEBUG(llvm::dbgs() << "    Over limit, need: "
+                         << (ContentStartColumn + RemainingTokenColumns)
+                         << ", space: " << ColumnLimit
+                         << ", reflown prefix: " << ContentStartColumn
+                         << ", offset in line: " << TailOffset << "\n");
+      // If the current token doesn't fit, find the latest possible split in the
+      // current line so that breaking at it will be under the column limit.
+      // FIXME: Use the earliest possible split while reflowing to correctly
+      // compress whitespace within a line.
+      BreakableToken::Split Split =
+          Token->getSplit(LineIndex, TailOffset, ColumnLimit,
+                          ContentStartColumn, CommentPragmasRegex);
       if (Split.first == StringRef::npos) {
-        // The last line's penalty is handled in addNextStateToQueue().
+        // No break opportunity - update the penalty and continue with the next
+        // logical line.
         if (LineIndex < EndIndex - 1)
+          // The last line's penalty is handled in addNextStateToQueue().
           Penalty += Style.PenaltyExcessCharacter *
-                     (RemainingTokenColumns - RemainingSpace);
+                     (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
+        DEBUG(llvm::dbgs() << "    No break opportunity.\n");
         break;
       }
       assert(Split.first != 0);
 
-      // Check if compressing the whitespace range will bring the line length
-      // under the limit. If that is the case, we perform whitespace compression
-      // instead of inserting a line break.
-      unsigned RemainingTokenColumnsAfterCompression =
-          Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
-      if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
-        RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
-        ReflowInProgress = true;
-        if (!DryRun)
-          Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
-        break;
-      }
+      if (Token->supportsReflow()) {
+        // Check whether the next natural split point after the current one can
+        // still fit the line, either because we can compress away whitespace,
+        // or because the penalty the excess characters introduce is lower than
+        // the break penalty.
+        // We only do this for tokens that support reflowing, and thus allow us
+        // to change the whitespace arbitrarily (e.g. comments).
+        // Other tokens, like string literals, can be broken on arbitrary
+        // positions.
 
-      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
-          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+        // First, compute the columns from TailOffset to the next possible split
+        // position.
+        // For example:
+        // ColumnLimit:     |
+        // // Some text   that    breaks
+        //    ^ tail offset
+        //             ^-- split
+        //    ^-------- to split columns
+        //                    ^--- next split
+        //    ^--------------- to next split columns
+        unsigned ToSplitColumns = Token->getRangeLength(
+            LineIndex, TailOffset, Split.first, ContentStartColumn);
+        DEBUG(llvm::dbgs() << "    ToSplit: " << ToSplitColumns << "\n");
+
+        BreakableToken::Split NextSplit = Token->getSplit(
+            LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+            ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
+        // Compute the columns necessary to fit the next non-breakable sequence
+        // into the current line.
+        unsigned ToNextSplitColumns = 0;
+        if (NextSplit.first == StringRef::npos) {
+          ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
+                                                         ContentStartColumn);
+        } else {
+          ToNextSplitColumns = Token->getRangeLength(
+              LineIndex, TailOffset,
+              Split.first + Split.second + NextSplit.first, ContentStartColumn);
+        }
+        // Compress the whitespace between the break and the start of the next
+        // unbreakable sequence.
+        ToNextSplitColumns =
+            Token->getLengthAfterCompression(ToNextSplitColumns, Split);
+        DEBUG(llvm::dbgs() << "    ContentStartColumn: " << ContentStartColumn
+                           << "\n");
+        DEBUG(llvm::dbgs() << "    ToNextSplit: " << ToNextSplitColumns << "\n");
+        // If the whitespace compression makes us fit, continue on the current
+        // line.
+        bool ContinueOnLine =
+            ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
+        unsigned ExcessCharactersPenalty = 0;
+        if (!ContinueOnLine && !Strict) {
+          // Similarly, if the excess characters' penalty is lower than the
+          // penalty of introducing a new break, continue on the current line.
+          ExcessCharactersPenalty =
+              (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
+              Style.PenaltyExcessCharacter;
+          DEBUG(llvm::dbgs()
+                << "    Penalty excess: " << ExcessCharactersPenalty
+                << "\n            break : " << NewBreakPenalty << "\n");
+          if (ExcessCharactersPenalty < NewBreakPenalty) {
+            Exceeded = true;
+            ContinueOnLine = true;
+          }
+        }
+        if (ContinueOnLine) {
+          DEBUG(llvm::dbgs() << "    Continuing on line...\n");
+          // The current line fits after compressing the whitespace - reflow
+          // the next line into it if possible.
+          TryReflow = true;
+          if (!DryRun)
+            Token->compressWhitespace(LineIndex, TailOffset, Split,
+                                      Whitespaces);
+          // When we continue on the same line, leave one space between content.
+          ContentStartColumn += ToSplitColumns + 1;
+          Penalty += ExcessCharactersPenalty;
+          TailOffset += Split.first + Split.second;
+          RemainingTokenColumns = Token->getRemainingLength(
+              LineIndex, TailOffset, ContentStartColumn);
+          continue;
+        }
+      }
+      DEBUG(llvm::dbgs() << "    Breaking...\n");
+      ContentStartColumn =
+          Token->getContentStartColumn(LineIndex, /*Break=*/true);
+      unsigned NewRemainingTokenColumns = Token->getRemainingLength(
+          LineIndex, TailOffset + Split.first + Split.second,
+          ContentStartColumn);
 
       // When breaking before a tab character, it may be moved by a few columns,
       // but will still be expanded to the next tab stop, so we don't save any
       // columns.
-      if (NewRemainingTokenColumns == RemainingTokenColumns)
+      if (NewRemainingTokenColumns == RemainingTokenColumns) {
+        // FIXME: Do we need to adjust the penalty?
         break;
-
+      }
       assert(NewRemainingTokenColumns < RemainingTokenColumns);
+
+      DEBUG(llvm::dbgs() << "    Breaking at: " << TailOffset + Split.first
+                         << ", " << Split.second << "\n");
       if (!DryRun)
         Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
-      Penalty += Current.SplitPenalty;
-      unsigned ColumnsUsed =
-          Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
-      if (ColumnsUsed > ColumnLimit) {
-        Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
-      }
+
+      Penalty += NewBreakPenalty;
       TailOffset += Split.first + Split.second;
       RemainingTokenColumns = NewRemainingTokenColumns;
-      ReflowInProgress = true;
       BreakInserted = true;
+      NewBreakBefore = true;
+    }
+    // In case there's another line, prepare the state for the start of the next
+    // line.
+    if (LineIndex + 1 != EndIndex) {
+      unsigned NextLineIndex = LineIndex + 1;
+      if (NewBreakBefore)
+        // After breaking a line, try to reflow the next line into the current
+        // one once RemainingTokenColumns fits.
+        TryReflow = true;
+      if (TryReflow) {
+        // We decided that we want to try reflowing the next line into the
+        // current one.
+        // We will now adjust the state as if the reflow is successful (in
+        // preparation for the next line), and see whether that works. If we
+        // decide that we cannot reflow, we will later reset the state to the
+        // start of the next line.
+        Reflow = false;
+        // As we did not continue breaking the line, RemainingTokenColumns is
+        // known to fit after ContentStartColumn. Adapt ContentStartColumn to
+        // the position at which we want to format the next line if we do
+        // actually reflow.
+        // When we reflow, we need to add a space between the end of the current
+        // line and the next line's start column.
+        ContentStartColumn += RemainingTokenColumns + 1;
+        // Get the split that we need to reflow next logical line into the end
+        // of the current one; the split will include any leading whitespace of
+        // the next logical line.
+        BreakableToken::Split SplitBeforeNext =
+            Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
+        DEBUG(llvm::dbgs() << "    Size of reflown text: " << ContentStartColumn
+                           << "\n    Potential reflow split: ");
+        if (SplitBeforeNext.first != StringRef::npos) {
+          DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
+                             << SplitBeforeNext.second << "\n");
+          TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
+          // If the rest of the next line fits into the current line below the
+          // column limit, we can safely reflow.
+          RemainingTokenColumns = Token->getRemainingLength(
+              NextLineIndex, TailOffset, ContentStartColumn);
+          Reflow = true;
+          if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+            DEBUG(llvm::dbgs() << "    Over limit after reflow, need: "
+                               << (ContentStartColumn + RemainingTokenColumns)
+                               << ", space: " << ColumnLimit
+                               << ", reflown prefix: " << ContentStartColumn
+                               << ", offset in line: " << TailOffset << "\n");
+            // If the whole next line does not fit, try to find a point in
+            // the next line at which we can break so that attaching the part
+            // of the next line to that break point onto the current line is
+            // below the column limit.
+            BreakableToken::Split Split =
+                Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+                                ContentStartColumn, CommentPragmasRegex);
+            if (Split.first == StringRef::npos) {
+              DEBUG(llvm::dbgs() << "    Did not find later break\n");
+              Reflow = false;
+            } else {
+              // Check whether the first split point gets us below the column
+              // limit. Note that we will execute this split below as part of
+              // the normal token breaking and reflow logic within the line.
+              unsigned ToSplitColumns = Token->getRangeLength(
+                  NextLineIndex, TailOffset, Split.first, ContentStartColumn);
+              if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
+                DEBUG(llvm::dbgs() << "    Next split protrudes, need: "
+                                   << (ContentStartColumn + ToSplitColumns)
+                                   << ", space: " << ColumnLimit);
+                unsigned ExcessCharactersPenalty =
+                    (ContentStartColumn + ToSplitColumns - ColumnLimit) *
+                    Style.PenaltyExcessCharacter;
+                if (NewBreakPenalty < ExcessCharactersPenalty) {
+                  Reflow = false;
+                }
+              }
+            }
+          }
+        } else {
+          DEBUG(llvm::dbgs() << "not found.\n");
+        }
+      }
+      if (!Reflow) {
+        // If we didn't reflow into the next line, the only space to consider is
+        // the next logical line. Reset our state to match the start of the next
+        // line.
+        TailOffset = 0;
+        ContentStartColumn =
+            Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
+        RemainingTokenColumns = Token->getRemainingLength(
+            NextLineIndex, TailOffset, ContentStartColumn);
+        // Adapt the start of the token, for example indent.
+        if (!DryRun)
+          Token->adaptStartOfLine(NextLineIndex, Whitespaces);
+      } else {
+        // If we found a reflow split and have added a new break before the next
+        // line, we are going to remove the line break at the start of the next
+        // logical line. For example, here we'll add a new line break after
+        // 'text', and subsequently delete the line break between 'that' and
+        // 'reflows'.
+        //   // some text that
+        //   // reflows
+        // ->
+        //   // some text
+        //   // that reflows
+        // When adding the line break, we also added the penalty for it, so we
+        // need to subtract that penalty again when we remove the line break due
+        // to reflowing.
+        if (NewBreakBefore) {
+          assert(Penalty >= NewBreakPenalty);
+          Penalty -= NewBreakPenalty;
+        }
+        if (!DryRun)
+          Token->reflow(NextLineIndex, Whitespaces);
+      }
     }
   }
 
   BreakableToken::Split SplitAfterLastLine =
-      Token->getSplitAfterLastLine(TailOffset, ColumnLimit);
+      Token->getSplitAfterLastLine(TailOffset);
   if (SplitAfterLastLine.first != StringRef::npos) {
+    DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
     if (!DryRun)
       Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
                                             Whitespaces);
-    RemainingTokenColumns = Token->getLineLengthAfterSplitAfterLastLine(
-        TailOffset, SplitAfterLastLine);
+    ContentStartColumn =
+        Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
+    RemainingTokenColumns = Token->getRemainingLength(
+        Token->getLineCount() - 1,
+        TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
+        ContentStartColumn);
   }
 
-  State.Column = RemainingTokenColumns;
+  State.Column = ContentStartColumn + RemainingTokenColumns -
+                 Current.UnbreakableTailLength;
 
   if (BreakInserted) {
     // If we break the token inside a parameter list, we need to break before
@@ -1421,15 +1978,12 @@
     if (Current.is(TT_BlockComment))
       State.NoContinuation = true;
 
-    Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString
-                                         : Style.PenaltyBreakComment;
-
     State.Stack.back().LastSpace = StartColumn;
   }
 
   Token->updateNextToken(State);
 
-  return Penalty;
+  return {Penalty, Exceeded};
 }
 
 unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
@@ -1451,7 +2005,7 @@
   if (Current.getNextNonComment() &&
       Current.getNextNonComment()->isStringLiteral())
     return true; // Implicit concatenation.
-  if (Style.ColumnLimit != 0 &&
+  if (Style.ColumnLimit != 0 && Style.BreakStringLiterals &&
       State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
           Style.ColumnLimit)
     return true; // String will be split.
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
index 631129c..64b3a3e 100644
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h
@@ -20,6 +20,8 @@
 #include "FormatToken.h"
 #include "clang/Format/Format.h"
 #include "llvm/Support/Regex.h"
+#include <map>
+#include <tuple>
 
 namespace clang {
 class SourceManager;
@@ -27,11 +29,25 @@
 namespace format {
 
 class AnnotatedLine;
+class BreakableToken;
 struct FormatToken;
 struct LineState;
 struct ParenState;
+struct RawStringFormatStyleManager;
 class WhitespaceManager;
 
+struct RawStringFormatStyleManager {
+  llvm::StringMap<FormatStyle> DelimiterStyle;
+  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
+
+  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
+
+  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
+
+  llvm::Optional<FormatStyle>
+  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
+};
+
 class ContinuationIndenter {
 public:
   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
@@ -44,9 +60,11 @@
                        bool BinPackInconclusiveFunctions);
 
   /// \brief Get the initial state, i.e. the state after placing \p Line's
-  /// first token at \p FirstIndent.
-  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
-                            bool DryRun);
+  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
+  /// the case of formatting inside raw string literals, \p FirstStartColumn is
+  /// the column at which the state of the parent formatter is.
+  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
+                            const AnnotatedLine *Line, bool DryRun);
 
   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
   // better home.
@@ -88,17 +106,52 @@
   /// \brief Update 'State' with the next token opening a nested block.
   void moveStateToNewBlock(LineState &State);
 
+  /// \brief Reformats a raw string literal.
+  /// 
+  /// \returns An extra penalty induced by reformatting the token.
+  unsigned reformatRawStringLiteral(const FormatToken &Current,
+                                    LineState &State,
+                                    const FormatStyle &RawStringStyle,
+                                    bool DryRun);
+
+  /// \brief If the current token is at the end of the current line, handle
+  /// the transition to the next line.
+  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+                           bool DryRun, bool AllowBreak);
+
+  /// \brief If \p Current is a raw string that is configured to be reformatted,
+  /// return the style to be used.
+  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+                                                const LineState &State);
+
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
   ///
-  /// \returns An extra penalty if a token was broken, otherwise 0.
+  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
+  /// when tokens are broken or lines exceed the column limit, and exceeded
+  /// indicates whether the algorithm purposefully left lines exceeding the
+  /// column limit.
   ///
-  /// The returned penalty will cover the cost of the additional line breaks and
-  /// column limit violation in all lines except for the last one. The penalty
-  /// for the column limit violation in the last line (and in single line
-  /// tokens) is handled in \c addNextStateToQueue.
-  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
-                                bool DryRun);
+  /// The returned penalty will cover the cost of the additional line breaks
+  /// and column limit violation in all lines except for the last one. The
+  /// penalty for the column limit violation in the last line (and in single
+  /// line tokens) is handled in \c addNextStateToQueue.
+  ///
+  /// \p Strict indicates whether reflowing is allowed to leave characters
+  /// protruding the column limit; if true, lines will be split strictly within
+  /// the column limit where possible; if false, words are allowed to protrude
+  /// over the column limit as long as the penalty is less than the penalty
+  /// of a break.
+  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
+                                                 LineState &State,
+                                                 bool AllowBreak, bool DryRun,
+                                                 bool Strict);
+
+  /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
+  /// if the current token cannot be broken.
+  std::unique_ptr<BreakableToken>
+  createBreakableToken(const FormatToken &Current, LineState &State,
+                       bool AllowBreak);
 
   /// \brief Appends the next token to \p State and updates information
   /// necessary for indentation.
@@ -143,6 +196,7 @@
   encoding::Encoding Encoding;
   bool BinPackInconclusiveFunctions;
   llvm::Regex CommentPragmasRegex;
+  const RawStringFormatStyleManager RawStringFormats;
 };
 
 struct ParenState {
@@ -154,7 +208,8 @@
         NoLineBreakInOperand(false), LastOperatorWrapped(true),
         ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
         AlignColons(true), ObjCSelectorNameFound(false),
-        HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
+        HasMultipleNestedBlocks(false), NestedBlockInlined(false),
+        IsInsideObjCArrayLiteral(false) {}
 
   /// \brief The position to which a specific parenthesis level needs to be
   /// indented.
@@ -260,10 +315,14 @@
   /// the same token.
   bool HasMultipleNestedBlocks : 1;
 
-  // \brief The start of a nested block (e.g. lambda introducer in C++ or
-  // "function" in JavaScript) is not wrapped to a new line.
+  /// \brief The start of a nested block (e.g. lambda introducer in C++ or
+  /// "function" in JavaScript) is not wrapped to a new line.
   bool NestedBlockInlined : 1;
 
+  /// \brief \c true if the current \c ParenState represents an Objective-C
+  /// array literal.
+  bool IsInsideObjCArrayLiteral : 1;
+
   bool operator<(const ParenState &Other) const {
     if (Indent != Other.Indent)
       return Indent < Other.Indent;
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index cb0a010..5807db9 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -16,6 +16,7 @@
 #include "clang/Format/Format.h"
 #include "AffectedRangeManager.h"
 #include "ContinuationIndenter.h"
+#include "FormatInternal.h"
 #include "FormatTokenLexer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "SortJavaScriptImports.h"
@@ -31,6 +32,7 @@
 #include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Path.h"
@@ -45,6 +47,7 @@
 using clang::format::FormatStyle;
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat)
 
 namespace llvm {
 namespace yaml {
@@ -102,6 +105,14 @@
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::BinPackStyle> {
+  static void enumeration(IO &IO, FormatStyle::BinPackStyle &Value) {
+    IO.enumCase(Value, "Auto", FormatStyle::BPS_Auto);
+    IO.enumCase(Value, "Always", FormatStyle::BPS_Always);
+    IO.enumCase(Value, "Never", FormatStyle::BPS_Never);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
     IO.enumCase(Value, "All", FormatStyle::BOS_All);
@@ -359,6 +370,7 @@
                    Style.ExperimentalAutoDetectBinPacking);
     IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
+    IO.mapOptional("IncludeBlocks", Style.IncludeBlocks);
     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
     IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
@@ -374,6 +386,7 @@
     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
+    IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList);
     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
     IO.mapOptional("ObjCSpaceBeforeProtocolList",
@@ -389,6 +402,7 @@
     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
                    Style.PenaltyReturnTypeOnItsOwnLine);
     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
+    IO.mapOptional("RawStringFormats", Style.RawStringFormats);
     IO.mapOptional("ReflowComments", Style.ReflowComments);
     IO.mapOptional("SortIncludes", Style.SortIncludes);
     IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
@@ -397,7 +411,13 @@
                    Style.SpaceAfterTemplateKeyword);
     IO.mapOptional("SpaceBeforeAssignmentOperators",
                    Style.SpaceBeforeAssignmentOperators);
+    IO.mapOptional("SpaceBeforeCtorInitializerColon",
+                   Style.SpaceBeforeCtorInitializerColon);
+    IO.mapOptional("SpaceBeforeInheritanceColon",
+                   Style.SpaceBeforeInheritanceColon);
     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
+    IO.mapOptional("SpaceBeforeRangeBasedForLoopColon",
+                   Style.SpaceBeforeRangeBasedForLoopColon);
     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
     IO.mapOptional("SpacesBeforeTrailingComments",
                    Style.SpacesBeforeTrailingComments);
@@ -441,6 +461,24 @@
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> {
+  static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) {
+    IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve);
+    IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge);
+    IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup);
+  }
+};
+
+template <> struct MappingTraits<FormatStyle::RawStringFormat> {
+  static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {
+    IO.mapOptional("Language", Format.Language);
+    IO.mapOptional("Delimiters", Format.Delimiters);
+    IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
+    IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter);
+    IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
+  }
+};
+
 // Allows to read vector<FormatStyle> while keeping default values.
 // IO.getContext() should contain a pointer to the FormatStyle structure, that
 // will be used to get default values for missing keys.
@@ -455,7 +493,7 @@
     if (Index >= Seq.size()) {
       assert(Index == Seq.size());
       FormatStyle Template;
-      if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
+      if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) {
         Template = Seq[0];
       } else {
         Template = *((const FormatStyle *)IO.getContext());
@@ -603,6 +641,7 @@
                                  {"^(<|\"(gtest|gmock|isl|json)/)", 3},
                                  {".*", 1}};
   LLVMStyle.IncludeIsMainRegex = "(Test)?$";
+  LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve;
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None;
   LLVMStyle.IndentWrappedFunctionNames = false;
@@ -613,6 +652,7 @@
   LLVMStyle.MaxEmptyLinesToKeep = 1;
   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
+  LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto;
   LLVMStyle.ObjCBlockIndentWidth = 2;
   LLVMStyle.ObjCSpaceAfterProperty = false;
   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
@@ -628,7 +668,10 @@
   LLVMStyle.SpacesInCStyleCastParentheses = false;
   LLVMStyle.SpaceAfterCStyleCast = false;
   LLVMStyle.SpaceAfterTemplateKeyword = true;
+  LLVMStyle.SpaceBeforeCtorInitializerColon = true;
+  LLVMStyle.SpaceBeforeInheritanceColon = true;
   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
+  LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true;
   LLVMStyle.SpaceBeforeAssignmentOperators = true;
   LLVMStyle.SpacesInAngles = false;
 
@@ -651,6 +694,7 @@
   if (Language == FormatStyle::LK_TextProto) {
     FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto);
     GoogleStyle.Language = FormatStyle::LK_TextProto;
+
     return GoogleStyle;
   }
 
@@ -670,9 +714,24 @@
   GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
+  GoogleStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Never;
   GoogleStyle.ObjCSpaceAfterProperty = false;
-  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
+  GoogleStyle.ObjCSpaceBeforeProtocolList = true;
   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
+  GoogleStyle.RawStringFormats = {{
+      FormatStyle::LK_TextProto,
+      /*Delimiters=*/
+      {
+          "pb",
+          "PB",
+          "proto",
+          "PROTO",
+      },
+      /*EnclosingFunctionNames=*/
+      {},
+      /*CanonicalDelimiter=*/"",
+      /*BasedOnStyle=*/"google",
+  }};
   GoogleStyle.SpacesBeforeTrailingComments = 2;
   GoogleStyle.Standard = FormatStyle::LS_Auto;
 
@@ -708,6 +767,13 @@
   } else if (Language == FormatStyle::LK_Proto) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
     GoogleStyle.SpacesInContainerLiterals = false;
+    GoogleStyle.Cpp11BracedListStyle = false;
+    // This affects protocol buffer options specifications and text protos.
+    // Text protos are currently mostly formatted inside C++ raw string literals
+    // and often the current breaking behavior of string literals is not
+    // beneficial there. Investigate turning this on once proper string reflow
+    // has been implemented.
+    GoogleStyle.BreakStringLiterals = false;
   } else if (Language == FormatStyle::LK_ObjC) {
     GoogleStyle.ColumnLimit = 100;
   }
@@ -837,7 +903,7 @@
   assert(Language != FormatStyle::LK_None);
   if (Text.trim().empty())
     return make_error_code(ParseError::Error);
-
+  Style->StyleSet.Clear();
   std::vector<FormatStyle> Styles;
   llvm::yaml::Input Input(Text);
   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
@@ -866,15 +932,23 @@
   // Look for a suitable configuration starting from the end, so we can
   // find the configuration for the specific language first, and the default
   // configuration (which can only be at slot 0) after it.
+  FormatStyle::FormatStyleSet StyleSet;
+  bool LanguageFound = false;
   for (int i = Styles.size() - 1; i >= 0; --i) {
-    if (Styles[i].Language == Language ||
-        Styles[i].Language == FormatStyle::LK_None) {
-      *Style = Styles[i];
-      Style->Language = Language;
-      return make_error_code(ParseError::Success);
-    }
+    if (Styles[i].Language != FormatStyle::LK_None)
+      StyleSet.Add(Styles[i]);
+    if (Styles[i].Language == Language)
+      LanguageFound = true;
   }
-  return make_error_code(ParseError::Unsuitable);
+  if (!LanguageFound) {
+    if (Styles.empty() || Styles[0].Language != FormatStyle::LK_None)
+      return make_error_code(ParseError::Unsuitable);
+    FormatStyle DefaultStyle = Styles[0];
+    DefaultStyle.Language = Language;
+    StyleSet.Add(std::move(DefaultStyle));
+  }
+  *Style = *StyleSet.Get(Language);
+  return make_error_code(ParseError::Success);
 }
 
 std::string configurationAsText(const FormatStyle &Style) {
@@ -888,6 +962,38 @@
   return Stream.str();
 }
 
+llvm::Optional<FormatStyle>
+FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const {
+  if (!Styles)
+    return None;
+  auto It = Styles->find(Language);
+  if (It == Styles->end())
+    return None;
+  FormatStyle Style = It->second;
+  Style.StyleSet = *this;
+  return Style;
+}
+
+void FormatStyle::FormatStyleSet::Add(FormatStyle Style) {
+  assert(Style.Language != LK_None &&
+         "Cannot add a style for LK_None to a StyleSet");
+  assert(
+      !Style.StyleSet.Styles &&
+      "Cannot add a style associated with an existing StyleSet to a StyleSet");
+  if (!Styles)
+    Styles = std::make_shared<MapType>();
+  (*Styles)[Style.Language] = std::move(Style);
+}
+
+void FormatStyle::FormatStyleSet::Clear() {
+  Styles.reset();
+}
+
+llvm::Optional<FormatStyle>
+FormatStyle::GetLanguageStyle(FormatStyle::LanguageKind Language) const {
+  return StyleSet.Get(Language);
+}
+
 namespace {
 
 class JavaScriptRequoter : public TokenAnalyzer {
@@ -895,7 +1001,7 @@
   JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)
       : TokenAnalyzer(Env, Style) {}
 
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override {
@@ -903,7 +1009,7 @@
                                           AnnotatedLines.end());
     tooling::Replacements Result;
     requoteJSStringLiteral(AnnotatedLines, Result);
-    return Result;
+    return {Result, 0};
   }
 
 private:
@@ -984,7 +1090,7 @@
             FormattingAttemptStatus *Status)
       : TokenAnalyzer(Env, Style), Status(Status) {}
 
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override {
@@ -1003,13 +1109,20 @@
     ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
                                   Env.getSourceManager(), Whitespaces, Encoding,
                                   BinPackInconclusiveFunctions);
-    UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
-                           Env.getSourceManager(), Status)
-        .format(AnnotatedLines);
+    unsigned Penalty =
+        UnwrappedLineFormatter(&Indenter, &Whitespaces, Style,
+                               Tokens.getKeywords(), Env.getSourceManager(),
+                               Status)
+            .format(AnnotatedLines, /*DryRun=*/false,
+                    /*AdditionalIndent=*/0,
+                    /*FixBadIndentation=*/false,
+                    /*FirstStartColumn=*/Env.getFirstStartColumn(),
+                    /*NextStartColumn=*/Env.getNextStartColumn(),
+                    /*LastStartColumn=*/Env.getLastStartColumn());
     for (const auto &R : Whitespaces.generateReplacements())
       if (Result.add(R))
-        return Result;
-    return Result;
+        return std::make_pair(Result, 0);
+    return std::make_pair(Result, Penalty);
   }
 
 private:
@@ -1097,7 +1210,7 @@
         DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
 
   // FIXME: eliminate unused parameters.
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override {
@@ -1125,7 +1238,7 @@
       }
     }
 
-    return generateFixes();
+    return {generateFixes(), 0};
   }
 
 private:
@@ -1312,6 +1425,108 @@
   std::set<FormatToken *, FormatTokenLess> DeletedTokens;
 };
 
+class ObjCHeaderStyleGuesser : public TokenAnalyzer {
+public:
+  ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style), IsObjC(false) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override {
+    assert(Style.Language == FormatStyle::LK_Cpp);
+    IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords());
+    tooling::Replacements Result;
+    return {Result, 0};
+  }
+
+  bool isObjC() { return IsObjC; }
+
+private:
+  static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                          const AdditionalKeywords &Keywords) {
+    // Keep this array sorted, since we are binary searching over it.
+    static constexpr llvm::StringLiteral FoundationIdentifiers[] = {
+        "CGFloat",
+        "NSAffineTransform",
+        "NSArray",
+        "NSAttributedString",
+        "NSBundle",
+        "NSCache",
+        "NSCalendar",
+        "NSCharacterSet",
+        "NSCountedSet",
+        "NSData",
+        "NSDataDetector",
+        "NSDecimal",
+        "NSDecimalNumber",
+        "NSDictionary",
+        "NSEdgeInsets",
+        "NSHashTable",
+        "NSIndexPath",
+        "NSIndexSet",
+        "NSInteger",
+        "NSLocale",
+        "NSMapTable",
+        "NSMutableArray",
+        "NSMutableAttributedString",
+        "NSMutableCharacterSet",
+        "NSMutableData",
+        "NSMutableDictionary",
+        "NSMutableIndexSet",
+        "NSMutableOrderedSet",
+        "NSMutableSet",
+        "NSMutableString",
+        "NSNumber",
+        "NSNumberFormatter",
+        "NSObject",
+        "NSOrderedSet",
+        "NSPoint",
+        "NSPointerArray",
+        "NSRange",
+        "NSRect",
+        "NSRegularExpression",
+        "NSSet",
+        "NSSize",
+        "NSString",
+        "NSTimeZone",
+        "NSUInteger",
+        "NSURL",
+        "NSURLComponents",
+        "NSURLQueryItem",
+        "NSUUID",
+        "NSValue",
+    };
+
+    for (auto &Line : AnnotatedLines) {
+      for (FormatToken *FormatTok = Line->First; FormatTok;
+           FormatTok = FormatTok->Next) {
+        if ((FormatTok->Previous && FormatTok->Previous->is(tok::at) &&
+             (FormatTok->isObjCAtKeyword(tok::objc_interface) ||
+              FormatTok->isObjCAtKeyword(tok::objc_implementation) ||
+              FormatTok->isObjCAtKeyword(tok::objc_protocol) ||
+              FormatTok->isObjCAtKeyword(tok::objc_end) ||
+              FormatTok->isOneOf(tok::numeric_constant, tok::l_square,
+                                 tok::l_brace))) ||
+            (FormatTok->Tok.isAnyIdentifier() &&
+             std::binary_search(std::begin(FoundationIdentifiers),
+                                std::end(FoundationIdentifiers),
+                                FormatTok->TokenText)) ||
+            FormatTok->is(TT_ObjCStringLiteral) ||
+            FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
+                               TT_ObjCBlockLBrace, TT_ObjCBlockLParen,
+                               TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr,
+                               TT_ObjCMethodSpecifier, TT_ObjCProperty)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  bool IsObjC;
+};
+
 struct IncludeDirective {
   StringRef Filename;
   StringRef Text;
@@ -1401,19 +1616,27 @@
                             }),
                 Indices.end());
 
+  int CurrentCategory = Includes.front().Category;
+
   // If the #includes are out of order, we generate a single replacement fixing
   // the entire block. Otherwise, no replacement is generated.
   if (Indices.size() == Includes.size() &&
-      std::is_sorted(Indices.begin(), Indices.end()))
+      std::is_sorted(Indices.begin(), Indices.end()) &&
+      Style.IncludeBlocks == FormatStyle::IBS_Preserve)
     return;
 
   std::string result;
   for (unsigned Index : Indices) {
-    if (!result.empty())
+    if (!result.empty()) {
       result += "\n";
+      if (Style.IncludeBlocks == FormatStyle::IBS_Regroup &&
+          CurrentCategory != Includes[Index].Category)
+        result += "\n";
+    }
     result += Includes[Index].Text;
     if (Cursor && CursorIndex == Index)
       *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;
+    CurrentCategory = Includes[Index].Category;
   }
 
   auto Err = Replaces.add(tooling::Replacement(
@@ -1521,6 +1744,10 @@
     else if (Trimmed == "// clang-format on")
       FormattingOff = false;
 
+    const bool EmptyLineSkipped =
+        Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge ||
+                            Style.IncludeBlocks == FormatStyle::IBS_Regroup);
+
     if (!FormattingOff && !Line.endswith("\\")) {
       if (IncludeRegex.match(Line, &Matches)) {
         StringRef IncludeName = Matches[2];
@@ -1530,7 +1757,7 @@
         if (Category == 0)
           MainIncludeFound = true;
         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
-      } else if (!IncludesInBlock.empty()) {
+      } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) {
         sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
                         Cursor);
         IncludesInBlock.clear();
@@ -1906,19 +2133,22 @@
   return processReplacements(Cleanup, Code, NewReplaces, Style);
 }
 
-tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
-                               ArrayRef<tooling::Range> Ranges,
-                               StringRef FileName,
-                               FormattingAttemptStatus *Status) {
+namespace internal {
+std::pair<tooling::Replacements, unsigned>
+reformat(const FormatStyle &Style, StringRef Code,
+         ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+         unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
+         FormattingAttemptStatus *Status) {
   FormatStyle Expanded = expandPresets(Style);
   if (Expanded.DisableFormat)
-    return tooling::Replacements();
+    return {tooling::Replacements(), 0};
   if (isLikelyXml(Code))
-    return tooling::Replacements();
+    return {tooling::Replacements(), 0};
   if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
-    return tooling::Replacements();
+    return {tooling::Replacements(), 0};
 
-  typedef std::function<tooling::Replacements(const Environment &)>
+  typedef std::function<std::pair<tooling::Replacements, unsigned>(
+      const Environment &)>
       AnalyzerPass;
   SmallVector<AnalyzerPass, 4> Passes;
 
@@ -1944,26 +2174,42 @@
     return Formatter(Env, Expanded, Status).process();
   });
 
-  std::unique_ptr<Environment> Env =
-      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(
+      Code, FileName, Ranges, FirstStartColumn, NextStartColumn,
+      LastStartColumn);
   llvm::Optional<std::string> CurrentCode = None;
   tooling::Replacements Fixes;
+  unsigned Penalty = 0;
   for (size_t I = 0, E = Passes.size(); I < E; ++I) {
-    tooling::Replacements PassFixes = Passes[I](*Env);
+    std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env);
     auto NewCode = applyAllReplacements(
-        CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes);
+        CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first);
     if (NewCode) {
-      Fixes = Fixes.merge(PassFixes);
+      Fixes = Fixes.merge(PassFixes.first);
+      Penalty += PassFixes.second;
       if (I + 1 < E) {
         CurrentCode = std::move(*NewCode);
         Env = Environment::CreateVirtualEnvironment(
             *CurrentCode, FileName,
-            tooling::calculateRangesAfterReplacements(Fixes, Ranges));
+            tooling::calculateRangesAfterReplacements(Fixes, Ranges),
+            FirstStartColumn, NextStartColumn, LastStartColumn);
       }
     }
   }
 
-  return Fixes;
+  return {Fixes, Penalty};
+}
+} // namespace internal
+
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+                               ArrayRef<tooling::Range> Ranges,
+                               StringRef FileName,
+                               FormattingAttemptStatus *Status) {
+  return internal::reformat(Style, Code, Ranges,
+                            /*FirstStartColumn=*/0,
+                            /*NextStartColumn=*/0,
+                            /*LastStartColumn=*/0, FileName, Status)
+      .first;
 }
 
 tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
@@ -1975,7 +2221,7 @@
   std::unique_ptr<Environment> Env =
       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
   Cleaner Clean(*Env, Style);
-  return Clean.process();
+  return Clean.process().first;
 }
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
@@ -1995,7 +2241,7 @@
   std::unique_ptr<Environment> Env =
       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
   NamespaceEndCommentsFixer Fix(*Env, Style);
-  return Fix.process();
+  return Fix.process().first;
 }
 
 tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
@@ -2005,7 +2251,7 @@
   std::unique_ptr<Environment> Env =
       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
   UsingDeclarationsSorter Sorter(*Env, Style);
-  return Sorter.process();
+  return Sorter.process().first;
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -2013,7 +2259,8 @@
   LangOpts.CPlusPlus = 1;
   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
-  LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+  LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+  LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
   LangOpts.LineComment = 1;
   bool AlternativeOperators = Style.isCpp();
   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
@@ -2046,11 +2293,35 @@
   if (FileName.endswith_lower(".proto") ||
       FileName.endswith_lower(".protodevel"))
     return FormatStyle::LK_Proto;
+  if (FileName.endswith_lower(".textpb") ||
+      FileName.endswith_lower(".pb.txt") ||
+      FileName.endswith_lower(".textproto") ||
+      FileName.endswith_lower(".asciipb"))
+    return FormatStyle::LK_TextProto;
   if (FileName.endswith_lower(".td"))
     return FormatStyle::LK_TableGen;
   return FormatStyle::LK_Cpp;
 }
 
+FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
+  const auto GuessedLanguage = getLanguageByFileName(FileName);
+  if (GuessedLanguage == FormatStyle::LK_Cpp) {
+    auto Extension = llvm::sys::path::extension(FileName);
+    // If there's no file extension (or it's .h), we need to check the contents
+    // of the code to see if it contains Objective-C.
+    if (Extension.empty() || Extension == ".h") {
+      auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
+      std::unique_ptr<Environment> Env =
+          Environment::CreateVirtualEnvironment(Code, NonEmptyFileName, /*Ranges=*/{});
+      ObjCHeaderStyleGuesser Guesser(*Env, getLLVMStyle());
+      Guesser.process();
+      if (Guesser.isObjC())
+        return FormatStyle::LK_ObjC;
+    }
+  }
+  return GuessedLanguage;
+}
+
 llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
                                      StringRef FallbackStyleName,
                                      StringRef Code, vfs::FileSystem *FS) {
@@ -2058,14 +2329,7 @@
     FS = vfs::getRealFileSystem().get();
   }
   FormatStyle Style = getLLVMStyle();
-  Style.Language = getLanguageByFileName(FileName);
-
-  // This is a very crude detection of whether a header contains ObjC code that
-  // should be improved over time and probably be done on tokens, not one the
-  // bare content of the file.
-  if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
-      (Code.contains("\n- (") || Code.contains("\n+ (")))
-    Style.Language = FormatStyle::LK_ObjC;
+  Style.Language = guessLanguage(FileName, Code);
 
   FormatStyle FallbackStyle = getNoStyle();
   if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h
new file mode 100644
index 0000000..3984158
--- /dev/null
+++ b/lib/Format/FormatInternal.h
@@ -0,0 +1,83 @@
+//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares Format APIs to be used internally by the
+/// formatting library implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
+
+#include "BreakableToken.h"
+#include "clang/Tooling/Core/Lookup.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+namespace internal {
+
+/// \brief Reformats the given \p Ranges in the code fragment \p Code.
+///
+/// A fragment of code could conceptually be surrounded by other code that might
+/// constrain how that fragment is laid out.
+/// For example, consider the fragment of code between 'R"(' and ')"',
+/// exclusive, in the following code:
+///
+/// void outer(int x) {
+///   string inner = R"(name: data
+///                     ^ FirstStartColumn
+///     value: {
+///       x: 1
+///     ^ NextStartColumn
+///     }
+///   )";
+///   ^ LastStartColumn
+/// }
+///
+/// The outer code can influence the inner fragment as follows:
+///   * \p FirstStartColumn specifies the column at which \p Code starts.
+///   * \p NextStartColumn specifies the additional indent dictated by the
+///     surrounding code. It is applied to the rest of the lines of \p Code.
+///   * \p LastStartColumn specifies the column at which the last line of
+///     \p Code should end, in case the last line is an empty line.
+///
+///     In the case where the last line of the fragment contains content,
+///     the fragment ends at the end of that content and \p LastStartColumn is
+///     not taken into account, for example in:
+///
+///     void block() {
+///       string inner = R"(name: value)";
+///     }
+///
+/// Each range is extended on either end to its next bigger logic unit, i.e.
+/// everything that might influence its formatting or might be influenced by its
+/// formatting.
+///
+/// Returns a pair P, where:
+///   * P.first are the ``Replacements`` necessary to make all \p Ranges comply
+///     with \p Style.
+///   * P.second is the penalty induced by formatting the fragment \p Code.
+///     If the formatting of the fragment doesn't have a notion of penalty,
+///     returns 0.
+///
+/// If ``Status`` is non-null, its value will be populated with the status of
+/// this formatting attempt. See \c FormattingAttemptStatus.
+std::pair<tooling::Replacements, unsigned>
+reformat(const FormatStyle &Style, StringRef Code,
+         ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+         unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
+         FormattingAttemptStatus *Status);
+
+} // namespace internal
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index 3dc0ab0..550bdcc 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -88,6 +88,7 @@
   TYPE(TemplateCloser)                                                         \
   TYPE(TemplateOpener)                                                         \
   TYPE(TemplateString)                                                         \
+  TYPE(ProtoExtensionLSquare)                                                  \
   TYPE(TrailingAnnotation)                                                     \
   TYPE(TrailingReturnArrow)                                                    \
   TYPE(TrailingUnaryOperator)                                                  \
@@ -240,6 +241,10 @@
   /// e.g. because several of them are block-type.
   unsigned LongestObjCSelectorName = 0;
 
+  /// \brief How many parts ObjC selector have (i.e. how many parameters method
+  /// has).
+  unsigned ObjCSelectorNameParts = 0;
+
   /// \brief Stores the number of required fake parentheses and the
   /// corresponding operator precedence.
   ///
@@ -348,17 +353,23 @@
             Next->isObjCAtKeyword(tok::objc_private));
   }
 
-  /// \brief Returns whether \p Tok is ([{ or a template opening <.
+  /// \brief Returns whether \p Tok is ([{ or an opening < of a template or in
+  /// protos.
   bool opensScope() const {
     if (is(TT_TemplateString) && TokenText.endswith("${"))
       return true;
+    if (is(TT_DictLiteral) && is(tok::less))
+      return true;
     return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
                    TT_TemplateOpener);
   }
-  /// \brief Returns whether \p Tok is )]} or a template closing >.
+  /// \brief Returns whether \p Tok is )]} or a closing > of a template or in
+  /// protos.
   bool closesScope() const {
     if (is(TT_TemplateString) && TokenText.startswith("}"))
       return true;
+    if (is(TT_DictLiteral) && is(tok::greater))
+      return true;
     return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
                    TT_TemplateCloser);
   }
@@ -441,7 +452,8 @@
   }
 
   prec::Level getPrecedence() const {
-    return getBinOpPrecedence(Tok.getKind(), true, true);
+    return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
+                              /*CPlusPlus11=*/true);
   }
 
   /// \brief Returns the previous token ignoring comments.
@@ -466,6 +478,7 @@
     if (is(TT_TemplateString) && opensScope())
       return true;
     return is(TT_ArrayInitializerLSquare) ||
+           is(TT_ProtoExtensionLSquare) ||
            (is(tok::l_brace) &&
             (BlockKind == BK_Block || is(TT_DictLiteral) ||
              (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp
index 8fb3d84..199d297 100644
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -24,10 +24,10 @@
 namespace format {
 
 FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
-                                   const FormatStyle &Style,
+                                   unsigned Column, const FormatStyle &Style,
                                    encoding::Encoding Encoding)
     : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
-      Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+      Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
       Style(Style), IdentTable(getFormattingLangOpts(Style)),
       Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
       FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
@@ -50,6 +50,8 @@
       tryParseJSRegexLiteral();
       handleTemplateStrings();
     }
+    if (Style.Language == FormatStyle::LK_TextProto)
+      tryParsePythonComment();
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
@@ -330,6 +332,27 @@
   resetLexer(SourceMgr.getFileOffset(loc));
 }
 
+void FormatTokenLexer::tryParsePythonComment() {
+  FormatToken *HashToken = Tokens.back();
+  if (HashToken->isNot(tok::hash))
+    return;
+  // Turn the remainder of this line into a comment.
+  const char *CommentBegin =
+      Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+  size_t From = CommentBegin - Lex->getBuffer().begin();
+  size_t To = Lex->getBuffer().find_first_of('\n', From);
+  if (To == StringRef::npos)
+    To = Lex->getBuffer().size();
+  size_t Len = To - From;
+  HashToken->Type = TT_LineComment;
+  HashToken->Tok.setKind(tok::comment);
+  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+  SourceLocation Loc = To < Lex->getBuffer().size()
+                           ? Lex->getSourceLocation(CommentBegin + Len)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
 bool FormatTokenLexer::tryMerge_TMacro() {
   if (Tokens.size() < 4)
     return false;
@@ -554,13 +577,21 @@
   // take them into account as whitespace - this pattern is quite frequent
   // in macro definitions.
   // FIXME: Add a more explicit test.
-  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
-         FormatTok->TokenText[1] == '\n') {
+  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
+    unsigned SkippedWhitespace = 0;
+    if (FormatTok->TokenText.size() > 2 &&
+        (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))
+      SkippedWhitespace = 3;
+    else if (FormatTok->TokenText[1] == '\n')
+      SkippedWhitespace = 2;
+    else
+      break;
+
     ++FormatTok->NewlinesBefore;
-    WhitespaceLength += 2;
-    FormatTok->LastNewlineOffset = 2;
+    WhitespaceLength += SkippedWhitespace;
+    FormatTok->LastNewlineOffset = SkippedWhitespace;
     Column = 0;
-    FormatTok->TokenText = FormatTok->TokenText.substr(2);
+    FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
   }
 
   FormatTok->WhitespaceRange = SourceRange(
diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h
index bf10f09..59dc2a7 100644
--- a/lib/Format/FormatTokenLexer.h
+++ b/lib/Format/FormatTokenLexer.h
@@ -36,7 +36,7 @@
 
 class FormatTokenLexer {
 public:
-  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
                    const FormatStyle &Style, encoding::Encoding Encoding);
 
   ArrayRef<FormatToken *> lex();
@@ -73,6 +73,8 @@
   // nested template parts by balancing curly braces.
   void handleTemplateStrings();
 
+  void tryParsePythonComment();
+
   bool tryMerge_TMacro();
 
   bool tryMergeConflictMarkers();
diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp
index c660843..df99bb2 100644
--- a/lib/Format/NamespaceEndCommentsFixer.cpp
+++ b/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -137,7 +137,7 @@
                                                      const FormatStyle &Style)
     : TokenAnalyzer(Env, Style) {}
 
-tooling::Replacements NamespaceEndCommentsFixer::analyze(
+std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
     FormatTokenLexer &Tokens) {
   const SourceManager &SourceMgr = Env.getSourceManager();
@@ -206,7 +206,7 @@
     }
     StartLineIndex = SIZE_MAX;
   }
-  return Fixes;
+  return {Fixes, 0};
 }
 
 } // namespace format
diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h
index 7790668..4779f0d 100644
--- a/lib/Format/NamespaceEndCommentsFixer.h
+++ b/lib/Format/NamespaceEndCommentsFixer.h
@@ -25,7 +25,7 @@
 public:
   NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);
 
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override;
diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp
index c4db9a6..d0b979e 100644
--- a/lib/Format/SortJavaScriptImports.cpp
+++ b/lib/Format/SortJavaScriptImports.cpp
@@ -123,7 +123,7 @@
       : TokenAnalyzer(Env, Style),
         FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
 
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override {
@@ -138,7 +138,7 @@
         parseModuleReferences(Keywords, AnnotatedLines);
 
     if (References.empty())
-      return Result;
+      return {Result, 0};
 
     SmallVector<unsigned, 16> Indices;
     for (unsigned i = 0, e = References.size(); i != e; ++i)
@@ -168,7 +168,7 @@
     }
 
     if (ReferencesInOrder && SymbolsInOrder)
-      return Result;
+      return {Result, 0};
 
     SourceRange InsertionPoint = References[0].Range;
     InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
@@ -202,7 +202,7 @@
       assert(false);
     }
 
-    return Result;
+    return {Result, 0};
   }
 
 private:
@@ -449,7 +449,7 @@
   std::unique_ptr<Environment> Env =
       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
   JavaScriptImportSorter Sorter(*Env, Style);
-  return Sorter.process();
+  return Sorter.process().first;
 }
 
 } // end namespace format
diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp
index 7a8b70c..d1dfb1f 100644
--- a/lib/Format/TokenAnalyzer.cpp
+++ b/lib/Format/TokenAnalyzer.cpp
@@ -38,7 +38,10 @@
 // Code.
 std::unique_ptr<Environment>
 Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
-                                      ArrayRef<tooling::Range> Ranges) {
+                                      ArrayRef<tooling::Range> Ranges,
+                                      unsigned FirstStartColumn,
+                                      unsigned NextStartColumn,
+                                      unsigned LastStartColumn) {
   // This is referenced by `FileMgr` and will be released by `FileMgr` when it
   // is deleted.
   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
@@ -70,9 +73,9 @@
     SourceLocation End = Start.getLocWithOffset(Range.getLength());
     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
   }
-  return llvm::make_unique<Environment>(ID, std::move(FileMgr),
-                                        std::move(VirtualSM),
-                                        std::move(Diagnostics), CharRanges);
+  return llvm::make_unique<Environment>(
+      ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics),
+      CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn);
 }
 
 TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
@@ -89,14 +92,16 @@
                      << "\n");
 }
 
-tooling::Replacements TokenAnalyzer::process() {
+std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
   tooling::Replacements Result;
-  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
-                          Encoding);
+  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
+                          Env.getFirstStartColumn(), Style, Encoding);
 
-  UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
+  UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
+                             Env.getFirstStartColumn(), Tokens.lex(), *this);
   Parser.parse();
   assert(UnwrappedLines.rbegin()->empty());
+  unsigned Penalty = 0;
   for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
     DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
     SmallVector<AnnotatedLine *, 16> AnnotatedLines;
@@ -107,13 +112,13 @@
       Annotator.annotate(*AnnotatedLines.back());
     }
 
-    tooling::Replacements RunResult =
+    std::pair<tooling::Replacements, unsigned> RunResult =
         analyze(Annotator, AnnotatedLines, Tokens);
 
     DEBUG({
       llvm::dbgs() << "Replacements for run " << Run << ":\n";
-      for (tooling::Replacements::const_iterator I = RunResult.begin(),
-                                                 E = RunResult.end();
+      for (tooling::Replacements::const_iterator I = RunResult.first.begin(),
+                                                 E = RunResult.first.end();
            I != E; ++I) {
         llvm::dbgs() << I->toString() << "\n";
       }
@@ -121,17 +126,19 @@
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
       delete AnnotatedLines[i];
     }
-    for (const auto &R : RunResult) {
+
+    Penalty += RunResult.second;
+    for (const auto &R : RunResult.first) {
       auto Err = Result.add(R);
       // FIXME: better error handling here. For now, simply return an empty
       // Replacements to indicate failure.
       if (Err) {
         llvm::errs() << llvm::toString(std::move(Err)) << "\n";
-        return tooling::Replacements();
+        return {tooling::Replacements(), 0};
       }
     }
   }
-  return Result;
+  return {Result, Penalty};
 }
 
 void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h
index 78a3d1b..96ea00b 100644
--- a/lib/Format/TokenAnalyzer.h
+++ b/lib/Format/TokenAnalyzer.h
@@ -37,21 +37,37 @@
 class Environment {
 public:
   Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
-      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
+      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM),
+      FirstStartColumn(0),
+      NextStartColumn(0),
+      LastStartColumn(0) {}
 
   Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
               std::unique_ptr<SourceManager> VirtualSM,
               std::unique_ptr<DiagnosticsEngine> Diagnostics,
-              const std::vector<CharSourceRange> &CharRanges)
+              const std::vector<CharSourceRange> &CharRanges,
+              unsigned FirstStartColumn,
+              unsigned NextStartColumn,
+              unsigned LastStartColumn)
       : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
-        SM(*VirtualSM), FileMgr(std::move(FileMgr)),
+        SM(*VirtualSM), 
+        FirstStartColumn(FirstStartColumn),
+        NextStartColumn(NextStartColumn),
+        LastStartColumn(LastStartColumn),
+        FileMgr(std::move(FileMgr)),
         VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
 
-  // This sets up an virtual file system with file \p FileName containing \p
-  // Code.
+  // This sets up an virtual file system with file \p FileName containing the
+  // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,
+  // that the next lines of \p Code should start at \p NextStartColumn, and
+  // that \p Code should end at \p LastStartColumn if it ends in newline.
+  // See also the documentation of clang::format::internal::reformat.
   static std::unique_ptr<Environment>
   CreateVirtualEnvironment(StringRef Code, StringRef FileName,
-                           ArrayRef<tooling::Range> Ranges);
+                           ArrayRef<tooling::Range> Ranges,
+                           unsigned FirstStartColumn = 0,
+                           unsigned NextStartColumn = 0,
+                           unsigned LastStartColumn = 0);
 
   FileID getFileID() const { return ID; }
 
@@ -59,10 +75,25 @@
 
   const SourceManager &getSourceManager() const { return SM; }
 
+  // Returns the column at which the fragment of code managed by this
+  // environment starts.
+  unsigned getFirstStartColumn() const { return FirstStartColumn; }
+
+  // Returns the column at which subsequent lines of the fragment of code
+  // managed by this environment should start.
+  unsigned getNextStartColumn() const { return NextStartColumn; }
+
+  // Returns the column at which the fragment of code managed by this
+  // environment should end if it ends in a newline.
+  unsigned getLastStartColumn() const { return LastStartColumn; }
+
 private:
   FileID ID;
   SmallVector<CharSourceRange, 8> CharRanges;
   SourceManager &SM;
+  unsigned FirstStartColumn;
+  unsigned NextStartColumn;
+  unsigned LastStartColumn;
 
   // The order of these fields are important - they should be in the same order
   // as they are created in `CreateVirtualEnvironment` so that they can be
@@ -76,10 +107,10 @@
 public:
   TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
 
-  tooling::Replacements process();
+  std::pair<tooling::Replacements, unsigned> process();
 
 protected:
-  virtual tooling::Replacements
+  virtual std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) = 0;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index a49cbaa..013a77b 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -79,7 +79,17 @@
       if (CurrentToken->is(tok::greater)) {
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
-        CurrentToken->Type = TT_TemplateCloser;
+        // In TT_Proto, we must distignuish between:
+        //   map<key, value>
+        //   msg < item: data >
+        //   msg: < item: data >
+        // In TT_TextProto, map<key, value> does not occur.
+        if (Style.Language == FormatStyle::LK_TextProto ||
+            (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
+             Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
+          CurrentToken->Type = TT_DictLiteral;
+        else
+          CurrentToken->Type = TT_TemplateCloser;
         next();
         return true;
       }
@@ -206,6 +216,7 @@
     bool HasMultipleParametersOnALine = false;
     bool MightBeObjCForRangeLoop =
         Left->Previous && Left->Previous->is(tok::kw_for);
+    FormatToken *PossibleObjCForInToken = nullptr;
     while (CurrentToken) {
       // LookForDecls is set when "if (" has been seen. Check for
       // 'identifier' '*' 'identifier' followed by not '=' -- this
@@ -291,10 +302,17 @@
            CurrentToken->Previous->isSimpleTypeSpecifier()) &&
           !CurrentToken->is(tok::l_brace))
         Contexts.back().IsExpression = false;
-      if (CurrentToken->isOneOf(tok::semi, tok::colon))
+      if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
         MightBeObjCForRangeLoop = false;
-      if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
-        CurrentToken->Type = TT_ObjCForIn;
+        if (PossibleObjCForInToken) {
+          PossibleObjCForInToken->Type = TT_Unknown;
+          PossibleObjCForInToken = nullptr;
+        }
+      }
+      if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
+        PossibleObjCForInToken = CurrentToken;
+        PossibleObjCForInToken->Type = TT_ObjCForIn;
+      }
       // When we discover a 'new', we set CanBeExpression to 'false' in order to
       // parse the type correctly. Reset that after a comma.
       if (CurrentToken->is(tok::comma))
@@ -358,12 +376,48 @@
                  Parent->is(TT_TemplateCloser)) {
         Left->Type = TT_ArraySubscriptLSquare;
       } else if (Style.Language == FormatStyle::LK_Proto ||
-                 (!CppArrayTemplates && Parent &&
-                  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
-                                  tok::comma, tok::l_paren, tok::l_square,
-                                  tok::question, tok::colon, tok::kw_return,
-                                  // Should only be relevant to JavaScript:
-                                  tok::kw_default))) {
+                 Style.Language == FormatStyle::LK_TextProto) {
+        // Square braces in LK_Proto can either be message field attributes:
+        //
+        // optional Aaa aaa = 1 [
+        //   (aaa) = aaa
+        // ];
+        //
+        // extensions 123 [
+        //   (aaa) = aaa
+        // ];
+        //
+        // or text proto extensions (in options):
+        //
+        // option (Aaa.options) = {
+        //   [type.type/type] {
+        //     key: value
+        //   }
+        // }
+        //
+        // or repeated fields (in options):
+        //
+        // option (Aaa.options) = {
+        //   keys: [ 1, 2, 3 ]
+        // }
+        //
+        // In the first and the third case we want to spread the contents inside
+        // the square braces; in the second we want to keep them inline.
+        Left->Type = TT_ArrayInitializerLSquare;
+        if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
+                                tok::equal) &&
+            !Left->endsSequence(tok::l_square, tok::numeric_constant,
+                                tok::identifier) &&
+            !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
+          Left->Type = TT_ProtoExtensionLSquare;
+          BindingIncrease = 10;
+        }
+      } else if (!CppArrayTemplates && Parent &&
+                 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
+                                 tok::comma, tok::l_paren, tok::l_square,
+                                 tok::question, tok::colon, tok::kw_return,
+                                 // Should only be relevant to JavaScript:
+                                 tok::kw_default)) {
         Left->Type = TT_ArrayInitializerLSquare;
       } else {
         BindingIncrease = 10;
@@ -401,6 +455,8 @@
         if (Contexts.back().FirstObjCSelectorName) {
           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
               Contexts.back().LongestObjCSelectorName;
+          Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts =
+              Left->ParameterCount;
           if (Left->BlockParameterCount > 1)
             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
         }
@@ -414,6 +470,11 @@
                           TT_DesignatedInitializerLSquare)) {
           Left->Type = TT_ObjCMethodExpr;
           StartsObjCMethodExpr = true;
+          // ParameterCount might have been set to 1 before expression was
+          // recognized as ObjCMethodExpr (as '1 + number of commas' formula is
+          // used for other expression types). Parameter counter has to be,
+          // therefore, reset to 0.
+          Left->ParameterCount = 0;
           Contexts.back().ColonIsObjCMethodExpr = true;
           if (Parent && Parent->is(tok::r_paren))
             Parent->Type = TT_CastRParen;
@@ -462,13 +523,15 @@
           FormatToken *Previous = CurrentToken->getPreviousNonComment();
           if (Previous->is(TT_JsTypeOptionalQuestion))
             Previous = Previous->getPreviousNonComment();
-          if (((CurrentToken->is(tok::colon) &&
-                (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
-               Style.Language == FormatStyle::LK_Proto ||
-               Style.Language == FormatStyle::LK_TextProto) &&
-              (Previous->Tok.getIdentifierInfo() ||
-               Previous->is(tok::string_literal)))
-            Previous->Type = TT_SelectorName;
+          if ((CurrentToken->is(tok::colon) &&
+               (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
+              Style.Language == FormatStyle::LK_Proto ||
+              Style.Language == FormatStyle::LK_TextProto) {
+            Left->Type = TT_DictLiteral;
+            if (Previous->Tok.getIdentifierInfo() ||
+                Previous->is(tok::string_literal))
+              Previous->Type = TT_SelectorName;
+          }
           if (CurrentToken->is(tok::colon) ||
               Style.Language == FormatStyle::LK_JavaScript)
             Left->Type = TT_DictLiteral;
@@ -486,7 +549,10 @@
   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
     if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
       ++Left->BlockParameterCount;
-    if (Current->is(tok::comma)) {
+    if (Left->Type == TT_ObjCMethodExpr) {
+      if (Current->is(tok::colon))
+        ++Left->ParameterCount;
+    } else if (Current->is(tok::comma)) {
       ++Left->ParameterCount;
       if (!Left->Role)
         Left->Role.reset(new CommaSeparatedList(Style));
@@ -569,12 +635,12 @@
             BeforePrevious->is(tok::r_square) ||
             Contexts.back().LongestObjCSelectorName == 0) {
           Tok->Previous->Type = TT_SelectorName;
-          if (Tok->Previous->ColumnWidth >
-              Contexts.back().LongestObjCSelectorName)
-            Contexts.back().LongestObjCSelectorName =
-                Tok->Previous->ColumnWidth;
           if (!Contexts.back().FirstObjCSelectorName)
             Contexts.back().FirstObjCSelectorName = Tok->Previous;
+          else if (Tok->Previous->ColumnWidth >
+                   Contexts.back().LongestObjCSelectorName)
+            Contexts.back().LongestObjCSelectorName =
+                Tok->Previous->ColumnWidth;
         }
       } else if (Contexts.back().ColonIsForRangeExpr) {
         Tok->Type = TT_RangeBasedForLoopColon;
@@ -617,7 +683,9 @@
       break;
     case tok::kw_for:
       if (Style.Language == FormatStyle::LK_JavaScript) {
-        if (Tok->Previous && Tok->Previous->is(tok::period))
+        // x.for and {for: ...}
+        if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
+            (Tok->Next && Tok->Next->is(tok::colon)))
           break;
         // JS' for await ( ...
         if (CurrentToken && CurrentToken->is(Keywords.kw_await))
@@ -666,7 +734,15 @@
     case tok::less:
       if (parseAngle()) {
         Tok->Type = TT_TemplateOpener;
-        if (Style.Language == FormatStyle::LK_TextProto) {
+        // In TT_Proto, we must distignuish between:
+        //   map<key, value>
+        //   msg < item: data >
+        //   msg: < item: data >
+        // In TT_TextProto, map<key, value> does not occur.
+        if (Style.Language == FormatStyle::LK_TextProto ||
+            (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
+             Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
+          Tok->Type = TT_DictLiteral;
           FormatToken *Previous = Tok->getPreviousNonComment();
           if (Previous && Previous->Type != TT_DictLiteral)
             Previous->Type = TT_SelectorName;
@@ -687,16 +763,21 @@
         return false;
       break;
     case tok::greater:
-      Tok->Type = TT_BinaryOperator;
+      if (Style.Language != FormatStyle::LK_TextProto)
+        Tok->Type = TT_BinaryOperator;
       break;
     case tok::kw_operator:
+      if (Style.Language == FormatStyle::LK_TextProto ||
+          Style.Language == FormatStyle::LK_Proto)
+        break;
       while (CurrentToken &&
              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
         if (CurrentToken->isOneOf(tok::star, tok::amp))
           CurrentToken->Type = TT_PointerOrReference;
         consumeToken();
         if (CurrentToken &&
-            CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
+            CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
+                                            tok::comma))
           CurrentToken->Previous->Type = TT_OverloadedOperator;
       }
       if (CurrentToken) {
@@ -868,7 +949,7 @@
       return parsePreprocessorDirective();
 
     // Directly allow to 'import <string-literal>' to support protocol buffer
-    // definitions (code.google.com/p/protobuf) or missing "#" (either way we
+    // definitions (github.com/google/protobuf) or missing "#" (either way we
     // should not break the line).
     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
     if ((Style.Language == FormatStyle::LK_Java &&
@@ -1152,7 +1233,9 @@
         Current.Type = TT_ConditionalExpr;
       }
     } else if (Current.isBinaryOperator() &&
-               (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
+               (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
+               (!Current.is(tok::greater) &&
+                Style.Language != FormatStyle::LK_TextProto)) {
       Current.Type = TT_BinaryOperator;
     } else if (Current.is(tok::comment)) {
       if (Current.TokenText.startswith("/*")) {
@@ -1456,10 +1539,8 @@
     if (!PrevToken)
       return TT_UnaryOperator;
 
-    if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
-        !PrevToken->is(tok::exclaim))
-      // There aren't any trailing unary operators except for TypeScript's
-      // non-null operator (!). Thus, this must be squence of leading operators.
+    if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
+      // This must be a sequence of leading unary operators.
       return TT_UnaryOperator;
 
     // Use heuristics to recognize unary operators.
@@ -1513,7 +1594,7 @@
                    AnnotatedLine &Line)
       : Style(Style), Keywords(Keywords), Current(Line.First) {}
 
-  /// \brief Parse expressions with the given operatore precedence.
+  /// \brief Parse expressions with the given operator precedence.
   void parse(int Precedence = 0) {
     // Skip 'return' and ObjC selector colons as they are not part of a binary
     // expression.
@@ -1662,17 +1743,15 @@
   /// \brief Parse unary operator expressions and surround them with fake
   /// parentheses if appropriate.
   void parseUnaryOperator() {
-    if (!Current || Current->isNot(TT_UnaryOperator)) {
-      parse(PrecedenceArrowAndPeriod);
-      return;
+    llvm::SmallVector<FormatToken *, 2> Tokens;
+    while (Current && Current->is(TT_UnaryOperator)) {
+      Tokens.push_back(Current);
+      next();
     }
-
-    FormatToken *Start = Current;
-    next();
-    parseUnaryOperator();
-
-    // The actual precedence doesn't matter.
-    addFakeParenthesis(Start, prec::Unknown);
+    parse(PrecedenceArrowAndPeriod);
+    for (FormatToken *Token : llvm::reverse(Tokens))
+      // The actual precedence doesn't matter.
+      addFakeParenthesis(Token, prec::Unknown);
   }
 
   void parseConditionalExpr() {
@@ -1722,15 +1801,18 @@
       }
     }
 
-    if (NextNonCommentLine && CommentLine) {
-      // If the comment is currently aligned with the line immediately following
-      // it, that's probably intentional and we should keep it.
-      bool AlignedWithNextLine =
-          NextNonCommentLine->First->NewlinesBefore <= 1 &&
-          NextNonCommentLine->First->OriginalColumn ==
-              (*I)->First->OriginalColumn;
-      if (AlignedWithNextLine)
-        (*I)->Level = NextNonCommentLine->Level;
+    // If the comment is currently aligned with the line immediately following
+    // it, that's probably intentional and we should keep it.
+    if (NextNonCommentLine && CommentLine &&
+        NextNonCommentLine->First->NewlinesBefore <= 1 &&
+        NextNonCommentLine->First->OriginalColumn ==
+            (*I)->First->OriginalColumn) {
+      // Align comments for preprocessor lines with the # in column 0.
+      // Otherwise, align with the next line.
+      (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective ||
+                     NextNonCommentLine->Type == LT_ImportStatement)
+                        ? 0
+                        : NextNonCommentLine->Level;
     } else {
       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
     }
@@ -1891,7 +1973,8 @@
   }
 
   Line.First->TotalLength =
-      Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
+      Line.First->IsMultiline ? Style.ColumnLimit
+                              : Line.FirstStartColumn + Line.First->ColumnWidth;
   FormatToken *Current = Line.First->Next;
   bool InFunctionDecl = Line.MightBeFunctionDecl;
   while (Current) {
@@ -2151,7 +2234,7 @@
   if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
       Left.Previous->isLabelString() &&
       (Left.NextOperator || Left.OperatorIndex != 0))
-    return 45;
+    return 50;
   if (Right.is(tok::plus) && Left.isLabelString() &&
       (Right.NextOperator || Right.OperatorIndex != 0))
     return 25;
@@ -2219,8 +2302,17 @@
     return !Left.is(TT_ObjCMethodExpr);
   if (Left.is(tok::coloncolon))
     return false;
-  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
+  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
+    if (Style.Language == FormatStyle::LK_TextProto ||
+        (Style.Language == FormatStyle::LK_Proto &&
+         (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
+      // Format empty list as `<>`.
+      if (Left.is(tok::less) && Right.is(tok::greater))
+        return false;
+      return !Style.Cpp11BracedListStyle;
+    }
     return false;
+  }
   if (Right.is(tok::ellipsis))
     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
                                     Left.Previous->is(tok::kw_case));
@@ -2261,16 +2353,26 @@
             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
   if (Right.is(tok::star) && Left.is(tok::l_paren))
     return false;
+  const auto SpaceRequiredForArrayInitializerLSquare =
+      [](const FormatToken &LSquareTok, const FormatStyle &Style) {
+        return Style.SpacesInContainerLiterals ||
+               ((Style.Language == FormatStyle::LK_Proto ||
+                 Style.Language == FormatStyle::LK_TextProto) &&
+                !Style.Cpp11BracedListStyle &&
+                LSquareTok.endsSequence(tok::l_square, tok::colon,
+                                        TT_SelectorName));
+      };
   if (Left.is(tok::l_square))
-    return (Left.is(TT_ArrayInitializerLSquare) &&
-            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
+    return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
+            SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
            (Left.isOneOf(TT_ArraySubscriptLSquare,
                          TT_StructuredBindingLSquare) &&
             Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
   if (Right.is(tok::r_square))
     return Right.MatchingParen &&
-           ((Style.SpacesInContainerLiterals &&
-             Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
+           ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
+             SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
+                                                     Style)) ||
             (Style.SpacesInSquareBrackets &&
              Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
                                           TT_StructuredBindingLSquare)));
@@ -2349,6 +2451,15 @@
       return true;
     if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
       return true;
+    // Slashes occur in text protocol extension syntax: [type/type] { ... }.
+    if (Left.is(tok::slash) || Right.is(tok::slash))
+      return false;
+    if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
+        Right.isOneOf(tok::l_brace, tok::less))
+      return !Style.Cpp11BracedListStyle;
+    // A percent is probably part of a formatting specification, such as %lld.
+    if (Left.is(tok::percent))
+      return false;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     if (Left.is(TT_JsFatArrow))
       return true;
@@ -2397,9 +2508,11 @@
     if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
                       tok::kw_const) ||
          // "of" is only a keyword if it appears after another identifier
-         // (e.g. as "const x of y" in a for loop).
+         // (e.g. as "const x of y" in a for loop), or after a destructuring
+         // operation (const [x, y] of z, const {a, b} of c).
          (Left.is(Keywords.kw_of) && Left.Previous &&
-          Left.Previous->Tok.getIdentifierInfo())) &&
+          (Left.Previous->Tok.is(tok::identifier) ||
+           Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
         (!Left.Previous || !Left.Previous->is(tok::period)))
       return true;
     if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
@@ -2431,8 +2544,9 @@
       return false;
     if (Right.is(TT_JsNonNullAssertion))
       return false;
-    if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))
-      return true; // "x! as string"
+    if (Left.is(TT_JsNonNullAssertion) &&
+        Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
+      return true; // "x! as string", "x! in y"
   } else if (Style.Language == FormatStyle::LK_Java) {
     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
       return true;
@@ -2467,8 +2581,15 @@
     return true;
   if (Right.is(tok::comma))
     return false;
-  if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
+  if (Right.is(TT_ObjCBlockLParen))
     return true;
+  if (Right.is(TT_CtorInitializerColon))
+    return Style.SpaceBeforeCtorInitializerColon;
+  if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
+    return false;
+  if (Right.is(TT_RangeBasedForLoopColon) &&
+      !Style.SpaceBeforeRangeBasedForLoopColon)
+    return false;
   if (Right.is(tok::colon)) {
     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
@@ -2492,9 +2613,13 @@
     return Style.SpaceAfterCStyleCast ||
            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
 
-  if (Left.is(tok::greater) && Right.is(tok::greater))
+  if (Left.is(tok::greater) && Right.is(tok::greater)) {
+    if (Style.Language == FormatStyle::LK_TextProto ||
+        (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
+      return !Style.Cpp11BracedListStyle;
     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
+  }
   if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
       (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
@@ -2678,6 +2803,9 @@
       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
     return true;
 
+  if (Right.is(TT_ProtoExtensionLSquare))
+    return true;
+
   return false;
 }
 
@@ -2696,12 +2824,17 @@
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     const FormatToken *NonComment = Right.getPreviousNonComment();
     if (NonComment &&
-        NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
-                            tok::kw_throw, Keywords.kw_interface,
-                            Keywords.kw_type, tok::kw_static, tok::kw_public,
-                            tok::kw_private, tok::kw_protected,
-                            Keywords.kw_readonly, Keywords.kw_abstract,
-                            Keywords.kw_get, Keywords.kw_set))
+        NonComment->isOneOf(
+            tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
+            tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
+            tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
+            Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
+            Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
+      return false; // Otherwise automatic semicolon insertion would trigger.
+    if (Right.NestingLevel == 0 &&
+        (Left.Tok.getIdentifierInfo() ||
+         Left.isOneOf(tok::r_square, tok::r_paren)) &&
+        Right.isOneOf(tok::l_square, tok::l_paren))
       return false; // Otherwise automatic semicolon insertion would trigger.
     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
       return false;
@@ -2771,11 +2904,17 @@
   if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
       Left.isNot(TT_SelectorName))
     return true;
+
   if (Right.is(tok::colon) &&
       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
     return false;
-  if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
+  if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
+    if ((Style.Language == FormatStyle::LK_Proto ||
+         Style.Language == FormatStyle::LK_TextProto) &&
+        Right.isStringLiteral())
+      return false;
     return true;
+  }
   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
                                     Right.Next->is(TT_ObjCMethodExpr)))
     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index 8055095..04a18d4 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -43,7 +43,8 @@
         InPPDirective(Line.InPPDirective),
         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
         IsMultiVariableDeclStmt(false), Affected(false),
-        LeadingEmptyLinesAffected(false), ChildrenAffected(false) {
+        LeadingEmptyLinesAffected(false), ChildrenAffected(false),
+        FirstStartColumn(Line.FirstStartColumn) {
     assert(!Line.Tokens.empty());
 
     // Calculate Next and Previous for all tokens. Note that we must overwrite
@@ -127,6 +128,8 @@
   /// \c True if one of this line's children intersects with an input range.
   bool ChildrenAffected;
 
+  unsigned FirstStartColumn;
+
 private:
   // Disallow copying.
   AnnotatedLine(const AnnotatedLine &) = delete;
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index d25f0a1..2ce39fb 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -304,9 +304,23 @@
     if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last &&
         I != AnnotatedLines.begin() &&
         I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {
-      return Style.AllowShortBlocksOnASingleLine
-                 ? tryMergeSimpleBlock(I - 1, E, Limit)
-                 : 0;
+      unsigned MergedLines = 0;
+      if (Style.AllowShortBlocksOnASingleLine) {
+        MergedLines = tryMergeSimpleBlock(I - 1, E, Limit);
+        // If we managed to merge the block, discard the first merged line
+        // since we are merging starting from I.
+        if (MergedLines > 0)
+          --MergedLines;
+      }
+      return MergedLines;
+    }
+    // Don't merge block with left brace wrapped after ObjC special blocks
+    if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() &&
+        I[-1]->First->is(tok::at) && I[-1]->First->Next) {
+      tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID();
+      if (kwId == clang::tok::objc_autoreleasepool ||
+          kwId == clang::tok::objc_synchronized)
+        return 0;
     }
     // Try to merge a block with left brace wrapped that wasn't yet covered
     if (TheLine->Last->is(tok::l_brace)) {
@@ -517,8 +531,13 @@
       } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
                  !startsExternCBlock(Line)) {
         // We don't merge short records.
-        FormatToken *RecordTok =
-            Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First;
+        FormatToken *RecordTok = Line.First;
+        // Skip record modifiers.
+        while (RecordTok->Next &&
+               RecordTok->isOneOf(tok::kw_typedef, tok::kw_export,
+                                  Keywords.kw_declare, Keywords.kw_abstract,
+                                  tok::kw_default))
+          RecordTok = RecordTok->Next;
         if (RecordTok &&
             RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
                                Keywords.kw_interface))
@@ -659,7 +678,9 @@
   /// \brief Formats an \c AnnotatedLine and returns the penalty.
   ///
   /// If \p DryRun is \c false, directly applies the changes.
-  virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+  virtual unsigned formatLine(const AnnotatedLine &Line,
+                              unsigned FirstIndent,
+                              unsigned FirstStartColumn,
                               bool DryRun) = 0;
 
 protected:
@@ -730,7 +751,8 @@
           *Child->First, /*Newlines=*/0, /*Spaces=*/1,
           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
     }
-    Penalty += formatLine(*Child, State.Column + 1, DryRun);
+    Penalty +=
+        formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);
 
     State.Column += 1 + Child->Last->TotalLength;
     return true;
@@ -756,10 +778,10 @@
   /// \brief Formats the line, simply keeping all of the input's line breaking
   /// decisions.
   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
-                      bool DryRun) override {
+                      unsigned FirstStartColumn, bool DryRun) override {
     assert(!DryRun);
-    LineState State =
-        Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
+    LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,
+                                                &Line, /*DryRun=*/false);
     while (State.NextToken) {
       bool Newline =
           Indenter->mustBreak(State) ||
@@ -782,9 +804,10 @@
 
   /// \brief Puts all tokens into a single line.
   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
-                      bool DryRun) override {
+                      unsigned FirstStartColumn, bool DryRun) override {
     unsigned Penalty = 0;
-    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+    LineState State =
+        Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
     while (State.NextToken) {
       formatChildren(State, /*Newline=*/false, DryRun, Penalty);
       Indenter->addTokenToState(
@@ -806,8 +829,9 @@
   /// \brief Formats the line by finding the best line breaks with line lengths
   /// below the column limit.
   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
-                      bool DryRun) override {
-    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+                      unsigned FirstStartColumn, bool DryRun) override {
+    LineState State =
+        Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
 
     // If the ObjC method declaration does not fit on a line, we should format
     // it with one arg per line.
@@ -974,7 +998,10 @@
 unsigned
 UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
                                bool DryRun, int AdditionalIndent,
-                               bool FixBadIndentation) {
+                               bool FixBadIndentation,
+                               unsigned FirstStartColumn,
+                               unsigned NextStartColumn,
+                               unsigned LastStartColumn) {
   LineJoiner Joiner(Style, Keywords, Lines);
 
   // Try to look up already computed penalty in DryRun-mode.
@@ -994,9 +1021,10 @@
   // The minimum level of consecutive lines that have been formatted.
   unsigned RangeMinLevel = UINT_MAX;
 
+  bool FirstLine = true;
   for (const AnnotatedLine *Line =
            Joiner.getNextMergedLine(DryRun, IndentTracker);
-       Line; Line = NextLine) {
+       Line; Line = NextLine, FirstLine = false) {
     const AnnotatedLine &TheLine = *Line;
     unsigned Indent = IndentTracker.getIndent();
 
@@ -1020,8 +1048,12 @@
     }
 
     if (ShouldFormat && TheLine.Type != LT_Invalid) {
-      if (!DryRun)
-        formatFirstToken(TheLine, PreviousLine, Indent);
+      if (!DryRun) {
+        bool LastLine = Line->First->is(tok::eof);
+        formatFirstToken(TheLine, PreviousLine,
+                         Indent,
+                         LastLine ? LastStartColumn : NextStartColumn + Indent);
+      }
 
       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
@@ -1030,16 +1062,18 @@
           (TheLine.Type == LT_ImportStatement &&
            (Style.Language != FormatStyle::LK_JavaScript ||
             !Style.JavaScriptWrapImports));
-
       if (Style.ColumnLimit == 0)
         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
-            .formatLine(TheLine, Indent, DryRun);
+            .formatLine(TheLine, NextStartColumn + Indent,
+                        FirstLine ? FirstStartColumn : 0, DryRun);
       else if (FitsIntoOneLine)
         Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
-                       .formatLine(TheLine, Indent, DryRun);
+                       .formatLine(TheLine, NextStartColumn + Indent,
+                                   FirstLine ? FirstStartColumn : 0, DryRun);
       else
         Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
-                       .formatLine(TheLine, Indent, DryRun);
+                       .formatLine(TheLine, NextStartColumn + Indent,
+                                   FirstLine ? FirstStartColumn : 0, DryRun);
       RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
     } else {
       // If no token in the current line is affected, we still need to format
@@ -1062,6 +1096,7 @@
         // Format the first token.
         if (ReformatLeadingWhitespace)
           formatFirstToken(TheLine, PreviousLine,
+                           TheLine.First->OriginalColumn,
                            TheLine.First->OriginalColumn);
         else
           Whitespaces->addUntouchableToken(*TheLine.First,
@@ -1084,12 +1119,14 @@
 
 void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
                                               const AnnotatedLine *PreviousLine,
-                                              unsigned Indent) {
+                                              unsigned Indent,
+                                              unsigned NewlineIndent) {
   FormatToken &RootToken = *Line.First;
   if (RootToken.is(tok::eof)) {
     unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
-    Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0,
-                                   /*StartOfTokenColumn=*/0);
+    unsigned TokenIndent = Newlines ? NewlineIndent : 0;
+    Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,
+                                   TokenIndent);
     return;
   }
   unsigned Newlines =
@@ -1099,15 +1136,14 @@
       (!RootToken.Next ||
        (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
     Newlines = std::min(Newlines, 1u);
+  // Remove empty lines at the start of nested blocks (lambdas/arrow functions)
+  if (PreviousLine == nullptr && Line.Level > 0)
+    Newlines = std::min(Newlines, 1u);
   if (Newlines == 0 && !RootToken.IsFirst)
     Newlines = 1;
   if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
     Newlines = 0;
 
-  // Preprocessor directives get indented after the hash, if indented.
-  if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement)
-    Indent = 0;
-
   // Remove empty lines after "{".
   if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
       PreviousLine->Last->is(tok::l_brace) &&
@@ -1125,6 +1161,13 @@
       (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
     Newlines = std::min(1u, Newlines);
 
+  if (Newlines)
+    Indent = NewlineIndent;
+
+  // Preprocessor directives get indented after the hash, if indented.
+  if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement)
+    Indent = 0;
+
   Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
                                  Line.InPPDirective &&
                                      !RootToken.HasUnescapedNewline);
diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 9d7a910..6432ca8 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h
@@ -40,13 +40,17 @@
   /// \brief Format the current block and return the penalty.
   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,
                   bool DryRun = false, int AdditionalIndent = 0,
-                  bool FixBadIndentation = false);
+                  bool FixBadIndentation = false,
+                  unsigned FirstStartColumn = 0,
+                  unsigned NextStartColumn = 0,
+                  unsigned LastStartColumn = 0);
 
 private:
   /// \brief Add a new line and the required indent before the first Token
   /// of the \c UnwrappedLine if there was no structural parsing error.
   void formatFirstToken(const AnnotatedLine &Line,
-                        const AnnotatedLine *PreviousLine, unsigned Indent);
+                        const AnnotatedLine *PreviousLine, unsigned Indent,
+                        unsigned NewlineIndent);
 
   /// \brief Returns the column limit for a line, taking into account whether we
   /// need an escaped newline due to a continued preprocessor directive.
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index e210beb..5ab5cc4 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -18,6 +18,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <algorithm>
+
 #define DEBUG_TYPE "format-parser"
 
 namespace clang {
@@ -56,7 +58,7 @@
 };
 
 static bool isLineComment(const FormatToken &FormatTok) {
-  return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
+  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
 }
 
 // Checks if \p FormatTok is a line comment that continues the line comment
@@ -225,20 +227,24 @@
 
 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
                                          const AdditionalKeywords &Keywords,
+                                         unsigned FirstStartColumn,
                                          ArrayRef<FormatToken *> Tokens,
                                          UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
-      IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
-      IncludeGuardRejected(false) {}
+      IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
+                       ? IG_Rejected
+                       : IG_Inited),
+      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
 
 void UnwrappedLineParser::reset() {
   PPBranchLevel = -1;
-  IfNdefCondition = nullptr;
-  FoundIncludeGuardStart = false;
-  IncludeGuardRejected = false;
+  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
+                     ? IG_Rejected
+                     : IG_Inited;
+  IncludeGuardToken = nullptr;
   Line.reset(new UnwrappedLine);
   CommentsBeforeNextToken.clear();
   FormatTok = nullptr;
@@ -247,10 +253,12 @@
   CurrentLines = &Lines;
   DeclarationScopeStack.clear();
   PPStack.clear();
+  Line->FirstStartColumn = FirstStartColumn;
 }
 
 void UnwrappedLineParser::parse() {
   IndexedTokenSource TokenSource(AllTokens);
+  Line->FirstStartColumn = FirstStartColumn;
   do {
     DEBUG(llvm::dbgs() << "----\n");
     reset();
@@ -259,6 +267,14 @@
 
     readToken();
     parseFile();
+
+    // If we found an include guard then all preprocessor directives (other than
+    // the guard) are over-indented by one.
+    if (IncludeGuard == IG_Found)
+      for (auto &Line : Lines)
+        if (Line.InPPDirective && Line.Level > 0)
+          --Line.Level;
+
     // Create line with eof token.
     pushToken(FormatTok);
     addUnwrappedLine();
@@ -328,7 +344,19 @@
       nextToken();
       addUnwrappedLine();
       break;
-    case tok::kw_default:
+    case tok::kw_default: {
+      unsigned StoredPosition = Tokens->getPosition();
+      FormatToken *Next = Tokens->getNextToken();
+      FormatTok = Tokens->setPosition(StoredPosition);
+      if (Next && Next->isNot(tok::colon)) {
+        // default not followed by ':' is not a case label; treat it like
+        // an identifier.
+        parseStructuralElement();
+        break;
+      }
+      // Else, if it is 'default:', fall through to the case handling.
+      LLVM_FALLTHROUGH;
+    }
     case tok::kw_case:
       if (Style.Language == FormatStyle::LK_JavaScript &&
           Line->MustBeDeclaration) {
@@ -374,13 +402,16 @@
     switch (Tok->Tok.getKind()) {
     case tok::l_brace:
       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
-        if (PrevTok->is(tok::colon))
-          // A colon indicates this code is in a type, or a braced list
-          // following a label in an object literal ({a: {b: 1}}). The code
-          // below could be confused by semicolons between the individual
-          // members in a type member list, which would normally trigger
-          // BK_Block. In both cases, this must be parsed as an inline braced
-          // init.
+        if (PrevTok->isOneOf(tok::colon, tok::less))
+          // A ':' indicates this code is in a type, or a braced list
+          // following a label in an object literal ({a: {b: 1}}).
+          // A '<' could be an object used in a comparison, but that is nonsense
+          // code (can never return true), so more likely it is a generic type
+          // argument (`X<{a: string; b: number}>`).
+          // The code below could be confused by semicolons between the
+          // individual members in a type member list, which would normally
+          // trigger BK_Block. In both cases, this must be parsed as an inline
+          // braced init.
           Tok->BlockKind = BK_BracedInit;
         else if (PrevTok->is(tok::r_paren))
           // `) { }` can only occur in function or method declarations in JS.
@@ -704,26 +735,27 @@
   // If there's a #ifndef on the first line, and the only lines before it are
   // comments, it could be an include guard.
   bool MaybeIncludeGuard = IfNDef;
-  if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
+  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
     for (auto &Line : Lines) {
       if (!Line.Tokens.front().Tok->is(tok::comment)) {
         MaybeIncludeGuard = false;
-        IncludeGuardRejected = true;
+        IncludeGuard = IG_Rejected;
         break;
       }
     }
-  }
   --PPBranchLevel;
   parsePPUnknown();
   ++PPBranchLevel;
-  if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
-    IfNdefCondition = IfCondition;
+  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
+    IncludeGuard = IG_IfNdefed;
+    IncludeGuardToken = IfCondition;
+  }
 }
 
 void UnwrappedLineParser::parsePPElse() {
   // If a potential include guard has an #else, it's not an include guard.
-  if (FoundIncludeGuardStart && PPBranchLevel == 0)
-    FoundIncludeGuardStart = false;
+  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
+    IncludeGuard = IG_Rejected;
   conditionalCompilationAlternative();
   if (PPBranchLevel > -1)
     --PPBranchLevel;
@@ -737,34 +769,37 @@
   conditionalCompilationEnd();
   parsePPUnknown();
   // If the #endif of a potential include guard is the last thing in the file,
-  // then we count it as a real include guard and subtract one from every
-  // preprocessor indent.
+  // then we found an include guard.
   unsigned TokenPosition = Tokens->getPosition();
   FormatToken *PeekNext = AllTokens[TokenPosition];
-  if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
+  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
+      PeekNext->is(tok::eof) &&
       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
-    for (auto &Line : Lines)
-      if (Line.InPPDirective && Line.Level > 0)
-        --Line.Level;
+    IncludeGuard = IG_Found;
 }
 
 void UnwrappedLineParser::parsePPDefine() {
   nextToken();
 
   if (FormatTok->Tok.getKind() != tok::identifier) {
+    IncludeGuard = IG_Rejected;
+    IncludeGuardToken = nullptr;
     parsePPUnknown();
     return;
   }
-  if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
-    FoundIncludeGuardStart = true;
+
+  if (IncludeGuard == IG_IfNdefed &&
+      IncludeGuardToken->TokenText == FormatTok->TokenText) {
+    IncludeGuard = IG_Defined;
+    IncludeGuardToken = nullptr;
     for (auto &Line : Lines) {
       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
-        FoundIncludeGuardStart = false;
+        IncludeGuard = IG_Rejected;
         break;
       }
     }
   }
-  IfNdefCondition = nullptr;
+
   nextToken();
   if (FormatTok->Tok.getKind() == tok::l_paren &&
       FormatTok->WhitespaceRange.getBegin() ==
@@ -791,7 +826,6 @@
   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
     Line->Level += PPBranchLevel + 1;
   addUnwrappedLine();
-  IfNdefCondition = nullptr;
 }
 
 // Here we blacklist certain tokens that are not usually the first token in an
@@ -888,11 +922,14 @@
   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
   bool PreviousStartsTemplateExpr =
       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
-  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
-    // If the token before the previous one is an '@', the previous token is an
-    // annotation and can precede another identifier/value.
-    const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
-    if (PrePrevious->is(tok::at))
+  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
+    // If the line contains an '@' sign, the previous token might be an
+    // annotation, which can precede another identifier/value.
+    bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
+                              [](UnwrappedLineNode &LineNode) {
+                                return LineNode.Tok->is(tok::at);
+                              }) != Line->Tokens.end();
+    if (HasAt)
       return;
   }
   if (Next->is(tok::exclaim) && PreviousMustBeValue)
@@ -921,49 +958,6 @@
     return;
   }
   switch (FormatTok->Tok.getKind()) {
-  case tok::at:
-    nextToken();
-    if (FormatTok->Tok.is(tok::l_brace)) {
-      nextToken();
-      parseBracedList();
-      break;
-    }
-    switch (FormatTok->Tok.getObjCKeywordID()) {
-    case tok::objc_public:
-    case tok::objc_protected:
-    case tok::objc_package:
-    case tok::objc_private:
-      return parseAccessSpecifier();
-    case tok::objc_interface:
-    case tok::objc_implementation:
-      return parseObjCInterfaceOrImplementation();
-    case tok::objc_protocol:
-      return parseObjCProtocol();
-    case tok::objc_end:
-      return; // Handled by the caller.
-    case tok::objc_optional:
-    case tok::objc_required:
-      nextToken();
-      addUnwrappedLine();
-      return;
-    case tok::objc_autoreleasepool:
-      nextToken();
-      if (FormatTok->Tok.is(tok::l_brace)) {
-        if (Style.BraceWrapping.AfterObjCDeclaration)
-          addUnwrappedLine();
-        parseBlock(/*MustBeDeclaration=*/false);
-      }
-      addUnwrappedLine();
-      return;
-    case tok::objc_try:
-      // This branch isn't strictly necessary (the kw_try case below would
-      // do this too after the tok::at is parsed above).  But be explicit.
-      parseTryCatch();
-      return;
-    default:
-      break;
-    }
-    break;
   case tok::kw_asm:
     nextToken();
     if (FormatTok->is(tok::l_brace)) {
@@ -1021,8 +1015,12 @@
       // 'default: string' field declaration.
       break;
     nextToken();
-    parseLabel();
-    return;
+    if (FormatTok->is(tok::colon)) {
+      parseLabel();
+      return;
+    }
+    // e.g. "default void f() {}" in a Java interface.
+    break;
   case tok::kw_case:
     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
       // 'case: string' field declaration.
@@ -1106,6 +1104,56 @@
       if (FormatTok->Tok.is(tok::l_brace)) {
         nextToken();
         parseBracedList();
+        break;
+      }
+      switch (FormatTok->Tok.getObjCKeywordID()) {
+      case tok::objc_public:
+      case tok::objc_protected:
+      case tok::objc_package:
+      case tok::objc_private:
+        return parseAccessSpecifier();
+      case tok::objc_interface:
+      case tok::objc_implementation:
+        return parseObjCInterfaceOrImplementation();
+      case tok::objc_protocol:
+        if (parseObjCProtocol())
+          return;
+        break;
+      case tok::objc_end:
+        return; // Handled by the caller.
+      case tok::objc_optional:
+      case tok::objc_required:
+        nextToken();
+        addUnwrappedLine();
+        return;
+      case tok::objc_autoreleasepool:
+        nextToken();
+        if (FormatTok->Tok.is(tok::l_brace)) {
+          if (Style.BraceWrapping.AfterControlStatement)
+            addUnwrappedLine();
+          parseBlock(/*MustBeDeclaration=*/false);
+        }
+        addUnwrappedLine();
+        return;
+      case tok::objc_synchronized:
+        nextToken();
+        if (FormatTok->Tok.is(tok::l_paren))
+           // Skip synchronization object
+           parseParens();
+        if (FormatTok->Tok.is(tok::l_brace)) {
+          if (Style.BraceWrapping.AfterControlStatement)
+            addUnwrappedLine();
+          parseBlock(/*MustBeDeclaration=*/false);
+        }
+        addUnwrappedLine();
+        return;
+      case tok::objc_try:
+        // This branch isn't strictly necessary (the kw_try case below would
+        // do this too after the tok::at is parsed above).  But be explicit.
+        parseTryCatch();
+        return;
+      default:
+        break;
       }
       break;
     case tok::kw_enum:
@@ -2102,6 +2150,8 @@
 }
 
 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
+  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
+         FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
   nextToken();
   nextToken(); // interface name
 
@@ -2129,8 +2179,21 @@
   parseObjCUntilAtEnd();
 }
 
-void UnwrappedLineParser::parseObjCProtocol() {
+// Returns true for the declaration/definition form of @protocol,
+// false for the expression form.
+bool UnwrappedLineParser::parseObjCProtocol() {
+  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
   nextToken();
+
+  if (FormatTok->is(tok::l_paren))
+    // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
+    return false;
+
+  // The definition/declaration form,
+  // @protocol Foo
+  // - (int)someMethod;
+  // @end
+
   nextToken(); // protocol name
 
   if (FormatTok->Tok.is(tok::less))
@@ -2139,11 +2202,13 @@
   // Check for protocol declaration.
   if (FormatTok->Tok.is(tok::semi)) {
     nextToken();
-    return addUnwrappedLine();
+    addUnwrappedLine();
+    return true;
   }
 
   addUnwrappedLine();
   parseObjCUntilAtEnd();
+  return true;
 }
 
 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
@@ -2176,7 +2241,7 @@
   while (!eof()) {
     if (FormatTok->is(tok::semi))
       return;
-    if (Line->Tokens.size() == 0) {
+    if (Line->Tokens.empty()) {
       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
       // import statement should terminate.
       return;
@@ -2193,7 +2258,8 @@
 
 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
                                                  StringRef Prefix = "") {
-  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
+  llvm::dbgs() << Prefix << "Line(" << Line.Level
+               << ", FSC=" << Line.FirstStartColumn << ")"
                << (Line.InPPDirective ? " MACRO" : "") << ": ";
   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
                                                     E = Line.Tokens.end();
@@ -2226,6 +2292,7 @@
   CurrentLines->push_back(std::move(*Line));
   Line->Tokens.clear();
   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
+  Line->FirstStartColumn = 0;
   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
     CurrentLines->append(
         std::make_move_iterator(PreprocessorDirectives.begin()),
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 4437ccf..589ea23 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -56,6 +56,8 @@
   size_t MatchingOpeningBlockLineIndex;
 
   static const size_t kInvalidIndex = -1;
+
+  unsigned FirstStartColumn = 0;
 };
 
 class UnwrappedLineConsumer {
@@ -71,6 +73,7 @@
 public:
   UnwrappedLineParser(const FormatStyle &Style,
                       const AdditionalKeywords &Keywords,
+                      unsigned FirstStartColumn,
                       ArrayRef<FormatToken *> Tokens,
                       UnwrappedLineConsumer &Callback);
 
@@ -116,7 +119,7 @@
   void parseObjCProtocolList();
   void parseObjCUntilAtEnd();
   void parseObjCInterfaceOrImplementation();
-  void parseObjCProtocol();
+  bool parseObjCProtocol();
   void parseJavaScriptEs6ImportExport();
   bool tryToParseLambda();
   bool tryToParseLambdaIntroducer();
@@ -245,10 +248,27 @@
   // sequence.
   std::stack<int> PPChainBranchIndex;
 
-  // Contains the #ifndef condition for a potential include guard.
-  FormatToken *IfNdefCondition;
-  bool FoundIncludeGuardStart;
-  bool IncludeGuardRejected;
+  // Include guard search state. Used to fixup preprocessor indent levels
+  // so that include guards do not participate in indentation.
+  enum IncludeGuardState {
+    IG_Inited,   // Search started, looking for #ifndef.
+    IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition.
+    IG_Defined,  // Matching #define found, checking other requirements.
+    IG_Found,    // All requirements met, need to fix indents.
+    IG_Rejected, // Search failed or never started.
+  };
+
+  // Current state of include guard search.
+  IncludeGuardState IncludeGuard;
+
+  // Points to the #ifndef condition for a potential include guard. Null unless
+  // IncludeGuardState == IG_IfNdefed.
+  FormatToken *IncludeGuardToken;
+
+  // Contains the first start column where the source begins. This is zero for
+  // normal source code and may be nonzero when formatting a code fragment that
+  // does not start at the beginning of the file.
+  unsigned FirstStartColumn;
 
   friend class ScopedLineState;
   friend class CompoundStatementIndenter;
diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp
index 4d60d8f..ef0c7a7 100644
--- a/lib/Format/UsingDeclarationsSorter.cpp
+++ b/lib/Format/UsingDeclarationsSorter.cpp
@@ -26,6 +26,45 @@
 
 namespace {
 
+// The order of using declaration is defined as follows:
+// Split the strings by "::" and discard any initial empty strings. The last
+// element of each list is a non-namespace name; all others are namespace
+// names. Sort the lists of names lexicographically, where the sort order of
+// individual names is that all non-namespace names come before all namespace
+// names, and within those groups, names are in case-insensitive lexicographic
+// order.
+int compareLabels(StringRef A, StringRef B) {
+  SmallVector<StringRef, 2> NamesA;
+  A.split(NamesA, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  SmallVector<StringRef, 2> NamesB;
+  B.split(NamesB, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  size_t SizeA = NamesA.size();
+  size_t SizeB = NamesB.size();
+  for (size_t I = 0, E = std::min(SizeA, SizeB); I < E; ++I) {
+    if (I + 1 == SizeA) {
+      // I is the last index of NamesA and NamesA[I] is a non-namespace name.
+
+      // Non-namespace names come before all namespace names.
+      if (SizeB > SizeA)
+        return -1;
+
+      // Two names within a group compare case-insensitively.
+      return NamesA[I].compare_lower(NamesB[I]);
+    }
+
+    // I is the last index of NamesB and NamesB[I] is a non-namespace name.
+    // Non-namespace names come before all namespace names.
+    if (I + 1 == SizeB)
+      return 1;
+
+    // Two namespaces names within a group compare case-insensitively.
+    int C = NamesA[I].compare_lower(NamesB[I]);
+    if (C != 0)
+      return C;
+  }
+  return 0;
+}
+
 struct UsingDeclaration {
   const AnnotatedLine *Line;
   std::string Label;
@@ -34,7 +73,7 @@
       : Line(Line), Label(Label) {}
 
   bool operator<(const UsingDeclaration &Other) const {
-    return StringRef(Label).compare_lower(Other.Label) < 0;
+    return compareLabels(Label, Other.Label) < 0;
   }
 };
 
@@ -77,7 +116,7 @@
     SmallVectorImpl<UsingDeclaration> *UsingDeclarations,
     const SourceManager &SourceMgr, tooling::Replacements *Fixes) {
   bool BlockAffected = false;
-  for (const UsingDeclaration& Declaration : *UsingDeclarations) {
+  for (const UsingDeclaration &Declaration : *UsingDeclarations) {
     if (Declaration.Line->Affected) {
       BlockAffected = true;
       break;
@@ -91,7 +130,27 @@
       UsingDeclarations->begin(), UsingDeclarations->end());
   std::stable_sort(SortedUsingDeclarations.begin(),
                    SortedUsingDeclarations.end());
+  SortedUsingDeclarations.erase(
+      std::unique(SortedUsingDeclarations.begin(),
+                  SortedUsingDeclarations.end(),
+                  [](const UsingDeclaration &a, const UsingDeclaration &b) {
+                    return a.Label == b.Label;
+                  }),
+      SortedUsingDeclarations.end());
   for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) {
+    if (I >= SortedUsingDeclarations.size()) {
+      // This using declaration has been deduplicated, delete it.
+      auto Begin =
+          (*UsingDeclarations)[I].Line->First->WhitespaceRange.getBegin();
+      auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc();
+      auto Range = CharSourceRange::getCharRange(Begin, End);
+      auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, ""));
+      if (Err) {
+        llvm::errs() << "Error while sorting using declarations: "
+                     << llvm::toString(std::move(Err)) << "\n";
+      }
+      continue;
+    }
     if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line)
       continue;
     auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation();
@@ -124,7 +183,7 @@
                                                  const FormatStyle &Style)
     : TokenAnalyzer(Env, Style) {}
 
-tooling::Replacements UsingDeclarationsSorter::analyze(
+std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze(
     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
     FormatTokenLexer &Tokens) {
   const SourceManager &SourceMgr = Env.getSourceManager();
@@ -133,15 +192,17 @@
   tooling::Replacements Fixes;
   SmallVector<UsingDeclaration, 4> UsingDeclarations;
   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+    const auto *FirstTok = AnnotatedLines[I]->First;
     if (AnnotatedLines[I]->InPPDirective ||
-        !AnnotatedLines[I]->startsWith(tok::kw_using) ||
-        AnnotatedLines[I]->First->Finalized) {
+        !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) {
       endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
       continue;
     }
-    if (AnnotatedLines[I]->First->NewlinesBefore > 1)
+    if (FirstTok->NewlinesBefore > 1)
       endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
-    std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First);
+    const auto *UsingTok =
+        FirstTok->is(tok::comment) ? FirstTok->getNextNonComment() : FirstTok;
+    std::string Label = computeUsingDeclarationLabel(UsingTok);
     if (Label.empty()) {
       endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
       continue;
@@ -149,7 +210,7 @@
     UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label));
   }
   endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
-  return Fixes;
+  return {Fixes, 0};
 }
 
 } // namespace format
diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h
index f7d5f97..6f13771 100644
--- a/lib/Format/UsingDeclarationsSorter.h
+++ b/lib/Format/UsingDeclarationsSorter.h
@@ -25,7 +25,7 @@
 public:
   UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style);
 
-  tooling::Replacements
+  std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
           FormatTokenLexer &Tokens) override;
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 0f2732c..a5477a9 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -67,6 +67,11 @@
                            /*IsInsideToken=*/false));
 }
 
+llvm::Error
+WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) {
+  return Replaces.add(Replacement);
+}
+
 void WhitespaceManager::replaceWhitespaceInToken(
     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 98a2071..af20dc5 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -57,6 +57,8 @@
   /// was not called.
   void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
 
+  llvm::Error addReplacement(const tooling::Replacement &Replacement);
+
   /// \brief Inserts or replaces whitespace in the middle of a token.
   ///
   /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp
index 9eeec37..473da22 100644
--- a/lib/Frontend/ASTUnit.cpp
+++ b/lib/Frontend/ASTUnit.cpp
@@ -35,9 +35,9 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
@@ -185,7 +185,7 @@
 ASTUnit::ASTUnit(bool _MainFileIsAST)
   : Reader(nullptr), HadModuleLoaderFatalFailure(false),
     OnlyLocalDecls(false), CaptureDiagnostics(false),
-    MainFileIsAST(_MainFileIsAST), 
+    MainFileIsAST(_MainFileIsAST),
     TUKind(TU_Complete), WantTiming(getenv("LIBCLANG_TIMING")),
     OwnsRemappedFileBuffers(true),
     NumStoredDiagnosticsFromDriver(0),
@@ -196,7 +196,7 @@
     CompletionCacheTopLevelHashValue(0),
     PreambleTopLevelHashValue(0),
     CurrentTopLevelHashValue(0),
-    UnsafeToFree(false) { 
+    UnsafeToFree(false) {
   if (getenv("LIBCLANG_OBJTRACKING"))
     fprintf(stderr, "+++ %u translation units\n", ++ActiveASTUnitObjects);
 }
@@ -219,8 +219,8 @@
       delete RB.second;
   }
 
-  ClearCachedCompletionResults();  
-  
+  ClearCachedCompletionResults();
+
   if (getenv("LIBCLANG_OBJTRACKING"))
     fprintf(stderr, "--- %u translation units\n", --ActiveASTUnitObjects);
 }
@@ -229,20 +229,20 @@
   this->PP = std::move(PP);
 }
 
-/// \brief Determine the set of code-completion contexts in which this 
+/// \brief Determine the set of code-completion contexts in which this
 /// declaration should be shown.
 static unsigned getDeclShowContexts(const NamedDecl *ND,
                                     const LangOptions &LangOpts,
                                     bool &IsNestedNameSpecifier) {
   IsNestedNameSpecifier = false;
-  
+
   if (isa<UsingShadowDecl>(ND))
-    ND = dyn_cast<NamedDecl>(ND->getUnderlyingDecl());
+    ND = ND->getUnderlyingDecl();
   if (!ND)
     return 0;
-  
+
   uint64_t Contexts = 0;
-  if (isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND) || 
+  if (isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND) ||
       isa<ClassTemplateDecl>(ND) || isa<TemplateTemplateParmDecl>(ND) ||
       isa<TypeAliasTemplateDecl>(ND)) {
     // Types can appear in these contexts.
@@ -257,20 +257,24 @@
     // In C++, types can appear in expressions contexts (for functional casts).
     if (LangOpts.CPlusPlus)
       Contexts |= (1LL << CodeCompletionContext::CCC_Expression);
-    
+
     // In Objective-C, message sends can send interfaces. In Objective-C++,
     // all types are available due to functional casts.
     if (LangOpts.CPlusPlus || isa<ObjCInterfaceDecl>(ND))
       Contexts |= (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver);
-    
+
     // In Objective-C, you can only be a subclass of another Objective-C class
-    if (isa<ObjCInterfaceDecl>(ND))
+    if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(ND)) {
+      // Objective-C interfaces can be used in a class property expression.
+      if (ID->getDefinition())
+        Contexts |= (1LL << CodeCompletionContext::CCC_Expression);
       Contexts |= (1LL << CodeCompletionContext::CCC_ObjCInterfaceName);
+    }
 
     // Deal with tag names.
     if (isa<EnumDecl>(ND)) {
       Contexts |= (1LL << CodeCompletionContext::CCC_EnumTag);
-      
+
       // Part of the nested-name-specifier in C++0x.
       if (LangOpts.CPlusPlus11)
         IsNestedNameSpecifier = true;
@@ -279,7 +283,7 @@
         Contexts |= (1LL << CodeCompletionContext::CCC_UnionTag);
       else
         Contexts |= (1LL << CodeCompletionContext::CCC_ClassOrStructTag);
-      
+
       if (LangOpts.CPlusPlus)
         IsNestedNameSpecifier = true;
     } else if (isa<ClassTemplateDecl>(ND))
@@ -296,24 +300,24 @@
     Contexts = (1LL << CodeCompletionContext::CCC_ObjCCategoryName);
   } else if (isa<NamespaceDecl>(ND) || isa<NamespaceAliasDecl>(ND)) {
     Contexts = (1LL << CodeCompletionContext::CCC_Namespace);
-   
+
     // Part of the nested-name-specifier.
     IsNestedNameSpecifier = true;
   }
-  
+
   return Contexts;
 }
 
 void ASTUnit::CacheCodeCompletionResults() {
   if (!TheSema)
     return;
-  
+
   SimpleTimer Timer(WantTiming);
   Timer.setOutput("Cache global code completions for " + getMainFileName());
 
   // Clear out the previous results.
   ClearCachedCompletionResults();
-  
+
   // Gather the set of global code completions.
   typedef CodeCompletionResult Result;
   SmallVector<Result, 8> Results;
@@ -321,7 +325,7 @@
   CodeCompletionTUInfo CCTUInfo(CachedCompletionAllocator);
   TheSema->GatherGlobalCodeCompletions(*CachedCompletionAllocator,
                                        CCTUInfo, Results);
-  
+
   // Translate global code completions into cached completions.
   llvm::DenseMap<CanQualType, unsigned> CompletionTypes;
   CodeCompletionContext CCContext(CodeCompletionContext::CCC_TopLevel);
@@ -340,7 +344,7 @@
       CachedResult.Kind = R.CursorKind;
       CachedResult.Availability = R.Availability;
 
-      // Keep track of the type of this completion in an ASTContext-agnostic 
+      // Keep track of the type of this completion in an ASTContext-agnostic
       // way.
       QualType UsageType = getDeclUsageType(*Ctx, R.Declaration);
       if (UsageType.isNull()) {
@@ -352,7 +356,7 @@
         CachedResult.TypeClass = getSimplifiedTypeClass(CanUsageType);
 
         // Determine whether we have already seen this type. If so, we save
-        // ourselves the work of formatting the type string by using the 
+        // ourselves the work of formatting the type string by using the
         // temporary, CanQualType-based hash table to find the associated value.
         unsigned &TypeValue = CompletionTypes[CanUsageType];
         if (TypeValue == 0) {
@@ -360,12 +364,12 @@
           CachedCompletionTypes[QualType(CanUsageType).getAsString()]
             = TypeValue;
         }
-        
+
         CachedResult.Type = TypeValue;
       }
-      
+
       CachedCompletionResults.push_back(CachedResult);
-      
+
       /// Handle nested-name-specifiers in C++.
       if (TheSema->Context.getLangOpts().CPlusPlus && IsNestedNameSpecifier &&
           !R.StartsNestedNameSpecifier) {
@@ -388,10 +392,10 @@
             isa<NamespaceAliasDecl>(R.Declaration))
           NNSContexts |= (1LL << CodeCompletionContext::CCC_Namespace);
 
-        if (unsigned RemainingContexts 
+        if (unsigned RemainingContexts
                                 = NNSContexts & ~CachedResult.ShowInContexts) {
-          // If there any contexts where this completion can be a 
-          // nested-name-specifier but isn't already an option, create a 
+          // If there any contexts where this completion can be a
+          // nested-name-specifier but isn't already an option, create a
           // nested-name-specifier completion.
           R.StartsNestedNameSpecifier = true;
           CachedResult.Completion = R.CreateCodeCompletionString(
@@ -406,13 +410,13 @@
       }
       break;
     }
-        
+
     case Result::RK_Keyword:
     case Result::RK_Pattern:
       // Ignore keywords and patterns; we don't care, since they are so
       // easily regenerated.
       break;
-      
+
     case Result::RK_Macro: {
       CachedCodeCompletionResult CachedResult;
       CachedResult.Completion = R.CreateCodeCompletionString(
@@ -442,7 +446,7 @@
     }
     }
   }
-  
+
   // Save the current top-level hash value.
   CompletionCacheTopLevelHashValue = CurrentTopLevelHashValue;
 }
@@ -482,10 +486,10 @@
                            bool AllowCompatibleDifferences) override {
     if (InitializedLanguage)
       return false;
-    
+
     LangOpt = LangOpts;
     InitializedLanguage = true;
-    
+
     updated();
     return false;
   }
@@ -794,14 +798,14 @@
 
 /// \brief Add the given macro to the hash of all top-level entities.
 void AddDefinedMacroToHash(const Token &MacroNameTok, unsigned &Hash) {
-  Hash = llvm::HashString(MacroNameTok.getIdentifierInfo()->getName(), Hash);
+  Hash = llvm::djbHash(MacroNameTok.getIdentifierInfo()->getName(), Hash);
 }
 
-/// \brief Preprocessor callback class that updates a hash value with the names 
+/// \brief Preprocessor callback class that updates a hash value with the names
 /// of all macros that have been defined by the translation unit.
 class MacroDefinitionTrackerPPCallbacks : public PPCallbacks {
   unsigned &Hash;
-  
+
 public:
   explicit MacroDefinitionTrackerPPCallbacks(unsigned &Hash) : Hash(Hash) { }
 
@@ -815,11 +819,11 @@
 void AddTopLevelDeclarationToHash(Decl *D, unsigned &Hash) {
   if (!D)
     return;
-  
+
   DeclContext *DC = D->getDeclContext();
   if (!DC)
     return;
-  
+
   if (!(DC->isTranslationUnit() || DC->getLookupParent()->isTranslationUnit()))
     return;
 
@@ -830,16 +834,16 @@
       if (!EnumD->isScoped()) {
         for (const auto *EI : EnumD->enumerators()) {
           if (EI->getIdentifier())
-            Hash = llvm::HashString(EI->getIdentifier()->getName(), Hash);
+            Hash = llvm::djbHash(EI->getIdentifier()->getName(), Hash);
         }
       }
     }
 
     if (ND->getIdentifier())
-      Hash = llvm::HashString(ND->getIdentifier()->getName(), Hash);
+      Hash = llvm::djbHash(ND->getIdentifier()->getName(), Hash);
     else if (DeclarationName Name = ND->getDeclName()) {
       std::string NameStr = Name.getAsString();
-      Hash = llvm::HashString(NameStr, Hash);
+      Hash = llvm::djbHash(NameStr, Hash);
     }
     return;
   }
@@ -847,7 +851,7 @@
   if (ImportDecl *ImportD = dyn_cast<ImportDecl>(D)) {
     if (Module *Mod = ImportD->getImportedModule()) {
       std::string ModName = Mod->getFullModuleName();
-      Hash = llvm::HashString(ModName, Hash);
+      Hash = llvm::djbHash(ModName, Hash);
     }
     return;
   }
@@ -856,7 +860,7 @@
 class TopLevelDeclTrackerConsumer : public ASTConsumer {
   ASTUnit &Unit;
   unsigned &Hash;
-  
+
 public:
   TopLevelDeclTrackerConsumer(ASTUnit &_Unit, unsigned &Hash)
     : Unit(_Unit), Hash(Hash) {
@@ -929,7 +933,7 @@
 
   bool hasCodeCompletionSupport() const override { return false; }
   TranslationUnitKind getTranslationUnitKind() override {
-    return Unit.getTranslationUnitKind(); 
+    return Unit.getTranslationUnitKind();
   }
 };
 
@@ -966,9 +970,8 @@
     }
   }
 
-  void HandleMacroDefined(const Token &MacroNameTok,
-                          const MacroDirective *MD) override {
-    AddDefinedMacroToHash(MacroNameTok, Hash);
+  std::unique_ptr<PPCallbacks> createPPCallbacks() override {
+    return llvm::make_unique<MacroDefinitionTrackerPPCallbacks>(Hash);
   }
 
 private:
@@ -1009,24 +1012,6 @@
   }
 }
 
-static IntrusiveRefCntPtr<vfs::FileSystem> createVFSOverlayForPreamblePCH(
-    StringRef PCHFilename,
-    IntrusiveRefCntPtr<vfs::FileSystem> RealFS,
-    IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
-  // We want only the PCH file from the real filesystem to be available,
-  // so we create an in-memory VFS with just that and overlay it on top.
-  auto Buf = RealFS->getBufferForFile(PCHFilename);
-  if (!Buf)
-    return VFS;
-  IntrusiveRefCntPtr<vfs::InMemoryFileSystem>
-      PCHFS(new vfs::InMemoryFileSystem());
-  PCHFS->addFile(PCHFilename, 0, std::move(*Buf));
-  IntrusiveRefCntPtr<vfs::OverlayFileSystem>
-      Overlay(new vfs::OverlayFileSystem(VFS));
-  Overlay->pushOverlay(PCHFS);
-  return Overlay;
-}
-
 /// Parse the source file into a translation unit using the given compiler
 /// invocation, replacing the current translation unit.
 ///
@@ -1038,6 +1023,19 @@
   if (!Invocation)
     return true;
 
+  auto CCInvocation = std::make_shared<CompilerInvocation>(*Invocation);
+  if (OverrideMainBuffer) {
+    assert(Preamble &&
+           "No preamble was built, but OverrideMainBuffer is not null");
+    IntrusiveRefCntPtr<vfs::FileSystem> OldVFS = VFS;
+    Preamble->AddImplicitPreamble(*CCInvocation, VFS, OverrideMainBuffer.get());
+    if (OldVFS != VFS && FileMgr) {
+      assert(OldVFS == FileMgr->getVirtualFileSystem() &&
+             "VFS passed to Parse and VFS in FileMgr are different");
+      FileMgr = new FileManager(FileMgr->getFileSystemOpts(), VFS);
+    }
+  }
+
   // Create the compiler instance to use for building the AST.
   std::unique_ptr<CompilerInstance> Clang(
       new CompilerInstance(std::move(PCHContainerOps)));
@@ -1048,35 +1046,17 @@
     Clang->setVirtualFileSystem(VFS);
   }
 
-  // Make sure we can access the PCH file even if we're using a VFS
-  if (!VFS && FileMgr)
-    VFS = FileMgr->getVirtualFileSystem();
-  IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem();
-  if (OverrideMainBuffer && VFS && RealFS && VFS != RealFS &&
-      !VFS->exists(Preamble->GetPCHPath())) {
-    // We have a slight inconsistency here -- we're using the VFS to
-    // read files, but the PCH was generated in the real file system.
-    VFS = createVFSOverlayForPreamblePCH(Preamble->GetPCHPath(), RealFS, VFS);
-    if (FileMgr) {
-      FileMgr = new FileManager(FileMgr->getFileSystemOpts(), VFS);
-      Clang->setFileManager(FileMgr.get());
-    }
-    else {
-      Clang->setVirtualFileSystem(VFS);
-    }
-  }
-
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
     CICleanup(Clang.get());
 
-  Clang->setInvocation(std::make_shared<CompilerInvocation>(*Invocation));
+  Clang->setInvocation(CCInvocation);
   OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile();
-    
+
   // Set up diagnostics, capturing any diagnostics that would
   // otherwise be dropped.
   Clang->setDiagnostics(&getDiagnostics());
-  
+
   // Create the target instance.
   Clang->setTarget(TargetInfo::CreateTargetInfo(
       Clang->getDiagnostics(), Clang->getInvocation().TargetOpts));
@@ -1088,7 +1068,7 @@
   // FIXME: We shouldn't need to do this, the target should be immutable once
   // created. This complexity should be lifted elsewhere.
   Clang->getTarget().adjust(Clang->getLangOpts());
-  
+
   assert(Clang->getFrontendOpts().Inputs.size() == 1 &&
          "Invocation must have exactly one source file!");
   assert(Clang->getFrontendOpts().Inputs[0].getKind().getFormat() ==
@@ -1117,16 +1097,13 @@
 
   // Create a file manager object to provide access to and cache the filesystem.
   Clang->setFileManager(&getFileManager());
-  
+
   // Create the source manager.
   Clang->setSourceManager(&getSourceManager());
-  
+
   // If the main file has been overridden due to the use of a preamble,
   // make that override happen and introduce the preamble.
   if (OverrideMainBuffer) {
-    assert(Preamble && "No preamble was built, but OverrideMainBuffer is not null");
-    Preamble->AddImplicitPreamble(Clang->getInvocation(), OverrideMainBuffer.get());
-    
     // The stored diagnostic has the old source manager in it; update
     // the locations to refer into the new source manager. Since we've
     // been careful to make sure that the source manager's state
@@ -1158,7 +1135,7 @@
     goto error;
 
   transferASTDataFromCompilerInstance(*Clang);
-  
+
   Act->EndSourceFile();
 
   FailedParseDiagnostics.clear();
@@ -1227,10 +1204,10 @@
 /// the source file.
 ///
 /// This routine will compute the preamble of the main source file. If a
-/// non-trivial preamble is found, it will precompile that preamble into a 
+/// non-trivial preamble is found, it will precompile that preamble into a
 /// precompiled header so that the precompiled preamble can be used to reduce
 /// reparsing time. If a precompiled preamble has already been constructed,
-/// this routine will determine if it is still valid and, if so, avoid 
+/// this routine will determine if it is still valid and, if so, avoid
 /// rebuilding the precompiled preamble.
 ///
 /// \param AllowRebuild When true (the default), this routine is
@@ -1315,7 +1292,7 @@
 
     llvm::ErrorOr<PrecompiledPreamble> NewPreamble = PrecompiledPreamble::Build(
         PreambleInvocationIn, MainFileBuffer.get(), Bounds, *Diagnostics, VFS,
-        PCHContainerOps, Callbacks);
+        PCHContainerOps, /*StoreInMemory=*/false, Callbacks);
     if (NewPreamble) {
       Preamble = std::move(*NewPreamble);
       PreambleRebuildCounter = 1;
@@ -1465,7 +1442,7 @@
     if (!AST)
       return nullptr;
   }
-  
+
   if (!ResourceFilesPath.empty()) {
     // Override the resources path.
     CI->getHeaderSearchOpts().ResourceDir = ResourceFilesPath;
@@ -1501,11 +1478,11 @@
 
   Clang->setInvocation(std::move(CI));
   AST->OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile();
-    
+
   // Set up diagnostics, capturing any diagnostics that would
   // otherwise be dropped.
   Clang->setDiagnostics(&AST->getDiagnostics());
-  
+
   // Create the target instance.
   Clang->setTarget(TargetInfo::CreateTargetInfo(
       Clang->getDiagnostics(), Clang->getInvocation().TargetOpts));
@@ -1517,7 +1494,7 @@
   // FIXME: We shouldn't need to do this, the target should be immutable once
   // created. This complexity should be lifted elsewhere.
   Clang->getTarget().adjust(Clang->getLangOpts());
-  
+
   assert(Clang->getFrontendOpts().Inputs.size() == 1 &&
          "Invocation must have exactly one source file!");
   assert(Clang->getFrontendOpts().Inputs[0].getKind().getFormat() ==
@@ -1535,7 +1512,7 @@
 
   // Create a file manager object to provide access to and cache the filesystem.
   Clang->setFileManager(&AST->getFileManager());
-  
+
   // Create the source manager.
   Clang->setSourceManager(&AST->getSourceManager());
 
@@ -1581,7 +1558,7 @@
 
   // Steal the created target, context, and preprocessor.
   AST->transferASTDataFromCompilerInstance(*Clang);
-  
+
   Act->EndSourceFile();
 
   if (OwnAST)
@@ -1646,7 +1623,7 @@
   AST->FileSystemOpts = FileMgr->getFileSystemOpts();
   AST->FileMgr = FileMgr;
   AST->UserFilesAreVolatile = UserFilesAreVolatile;
-  
+
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<ASTUnit>
     ASTUnitCleanup(AST.get());
@@ -1699,7 +1676,7 @@
   PPOpts.RemappedFilesKeepOriginalName = RemappedFilesKeepOriginalName;
   PPOpts.AllowPCHWithCompilerErrors = AllowPCHWithCompilerErrors;
   PPOpts.SingleFileParseMode = SingleFileParse;
-  
+
   // Override the resources path.
   CI->getHeaderSearchOpts().ResourceDir = ResourceFilesPath;
 
@@ -1768,7 +1745,7 @@
   }
 
   clearFileLevelDecls();
-  
+
   SimpleTimer ParsingTimer(WantTiming);
   ParsingTimer.setOutput("Reparsing " + getMainFileName());
 
@@ -1802,7 +1779,7 @@
   bool Result =
       Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer), VFS);
 
-  // If we're caching global code-completion results, and the top-level 
+  // If we're caching global code-completion results, and the top-level
   // declarations have changed, clear out the code-completion cache.
   if (!Result && ShouldCacheCodeCompletionResults &&
       CurrentTopLevelHashValue != CompletionCacheTopLevelHashValue)
@@ -1811,7 +1788,7 @@
   // We now need to clear out the completion info related to this translation
   // unit; it'll be recreated if necessary.
   CCTUInfo.reset();
-  
+
   return Result;
 }
 
@@ -1835,21 +1812,21 @@
 namespace {
   /// \brief Code completion consumer that combines the cached code-completion
   /// results from an ASTUnit with the code-completion results provided to it,
-  /// then passes the result on to 
+  /// then passes the result on to
   class AugmentedCodeCompleteConsumer : public CodeCompleteConsumer {
     uint64_t NormalContexts;
     ASTUnit &AST;
     CodeCompleteConsumer &Next;
-    
+
   public:
     AugmentedCodeCompleteConsumer(ASTUnit &AST, CodeCompleteConsumer &Next,
                                   const CodeCompleteOptions &CodeCompleteOpts)
       : CodeCompleteConsumer(CodeCompleteOpts, Next.isOutputBinary()),
         AST(AST), Next(Next)
-    { 
+    {
       // Compute the set of contexts in which we will look when we don't have
       // any information about the specific context.
-      NormalContexts 
+      NormalContexts
         = (1LL << CodeCompletionContext::CCC_TopLevel)
         | (1LL << CodeCompletionContext::CCC_ObjCInterface)
         | (1LL << CodeCompletionContext::CCC_ObjCImplementation)
@@ -1918,13 +1895,13 @@
   case CodeCompletionContext::CCC_ParenthesizedExpression:
   case CodeCompletionContext::CCC_ObjCInterfaceName:
     break;
-    
+
   case CodeCompletionContext::CCC_EnumTag:
   case CodeCompletionContext::CCC_UnionTag:
   case CodeCompletionContext::CCC_ClassOrStructTag:
     OnlyTagNames = true;
     break;
-    
+
   case CodeCompletionContext::CCC_ObjCProtocolName:
   case CodeCompletionContext::CCC_MacroName:
   case CodeCompletionContext::CCC_MacroNameUse:
@@ -1942,12 +1919,12 @@
     // be hidden.
     return;
   }
-  
+
   typedef CodeCompletionResult Result;
   for (unsigned I = 0; I != NumResults; ++I) {
     if (Results[I].Kind != Result::RK_Declaration)
       continue;
-    
+
     unsigned IDNS
       = Results[I].Declaration->getUnderlyingDecl()->getIdentifierNamespace();
 
@@ -1955,17 +1932,17 @@
     if (OnlyTagNames)
       Hiding = (IDNS & Decl::IDNS_Tag);
     else {
-      unsigned HiddenIDNS = (Decl::IDNS_Type | Decl::IDNS_Member | 
+      unsigned HiddenIDNS = (Decl::IDNS_Type | Decl::IDNS_Member |
                              Decl::IDNS_Namespace | Decl::IDNS_Ordinary |
                              Decl::IDNS_NonMemberOperator);
       if (Ctx.getLangOpts().CPlusPlus)
         HiddenIDNS |= Decl::IDNS_Tag;
       Hiding = (IDNS & HiddenIDNS);
     }
-  
+
     if (!Hiding)
       continue;
-    
+
     DeclarationName Name = Results[I].Declaration->getDeclName();
     if (IdentifierInfo *Identifier = Name.getAsIdentifierInfo())
       HiddenNames.insert(Identifier->getName());
@@ -1977,7 +1954,7 @@
 void AugmentedCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &S,
                                             CodeCompletionContext Context,
                                             CodeCompletionResult *Results,
-                                            unsigned NumResults) { 
+                                            unsigned NumResults) {
   // Merge the results we were given with the results we cached.
   bool AddedResult = false;
   uint64_t InContexts =
@@ -1987,29 +1964,29 @@
   llvm::StringSet<llvm::BumpPtrAllocator> HiddenNames;
   typedef CodeCompletionResult Result;
   SmallVector<Result, 8> AllResults;
-  for (ASTUnit::cached_completion_iterator 
+  for (ASTUnit::cached_completion_iterator
             C = AST.cached_completion_begin(),
          CEnd = AST.cached_completion_end();
        C != CEnd; ++C) {
-    // If the context we are in matches any of the contexts we are 
+    // If the context we are in matches any of the contexts we are
     // interested in, we'll add this result.
     if ((C->ShowInContexts & InContexts) == 0)
       continue;
-    
+
     // If we haven't added any results previously, do so now.
     if (!AddedResult) {
-      CalculateHiddenNames(Context, Results, NumResults, S.Context, 
+      CalculateHiddenNames(Context, Results, NumResults, S.Context,
                            HiddenNames);
       AllResults.insert(AllResults.end(), Results, Results + NumResults);
       AddedResult = true;
     }
-    
+
     // Determine whether this global completion result is hidden by a local
     // completion result. If so, skip it.
     if (C->Kind != CXCursor_MacroDefinition &&
         HiddenNames.count(C->Completion->getTypedText()))
       continue;
-    
+
     // Adjust priority based on similar type classes.
     unsigned Priority = C->Priority;
     CodeCompletionString *Completion = C->Completion;
@@ -2017,7 +1994,7 @@
       if (C->Kind == CXCursor_MacroDefinition) {
         Priority = getMacroUsagePriority(C->Completion->getTypedText(),
                                          S.getLangOpts(),
-                               Context.getPreferredType()->isAnyPointerType());        
+                               Context.getPreferredType()->isAnyPointerType());
       } else if (C->Type) {
         CanQualType Expected
           = S.Context.getCanonicalType(
@@ -2036,7 +2013,7 @@
         }
       }
     }
-    
+
     // Adjust the completion string, if required.
     if (C->Kind == CXCursor_MacroDefinition &&
         Context.getKind() == CodeCompletionContext::CCC_MacroNameUse) {
@@ -2048,18 +2025,18 @@
       Priority = CCP_CodePattern;
       Completion = Builder.TakeString();
     }
-    
+
     AllResults.push_back(Result(Completion, Priority, C->Kind,
                                 C->Availability));
   }
-  
+
   // If we did not add any cached completion results, just forward the
   // results we were given to the next consumer.
   if (!AddedResult) {
     Next.ProcessCodeCompleteResults(S, Context, Results, NumResults);
     return;
   }
-  
+
   Next.ProcessCodeCompleteResults(S, Context, AllResults.data(),
                                   AllResults.size());
 }
@@ -2091,6 +2068,7 @@
   CodeCompleteOpts.IncludeCodePatterns = IncludeCodePatterns;
   CodeCompleteOpts.IncludeGlobals = CachedCompletionResults.empty();
   CodeCompleteOpts.IncludeBriefComments = IncludeBriefComments;
+  CodeCompleteOpts.LoadExternal = Consumer.loadExternal();
 
   assert(IncludeBriefComments == this->IncludeBriefCommentsInCodeCompletion);
 
@@ -2115,11 +2093,11 @@
   auto &Inv = *CCInvocation;
   Clang->setInvocation(std::move(CCInvocation));
   OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile();
-    
+
   // Set up diagnostics, capturing any diagnostics produced.
   Clang->setDiagnostics(&Diag);
-  CaptureDroppedDiagnostics Capture(true, 
-                                    Clang->getDiagnostics(), 
+  CaptureDroppedDiagnostics Capture(true,
+                                    Clang->getDiagnostics(),
                                     &StoredDiagnostics, nullptr);
   ProcessWarningOptions(Diag, Inv.getDiagnosticOpts());
 
@@ -2130,13 +2108,13 @@
     Clang->setInvocation(nullptr);
     return;
   }
-  
+
   // Inform the target of the language options.
   //
   // FIXME: We shouldn't need to do this, the target should be immutable once
   // created. This complexity should be lifted elsewhere.
   Clang->getTarget().adjust(Clang->getLangOpts());
-  
+
   assert(Clang->getFrontendOpts().Inputs.size() == 1 &&
          "Invocation must have exactly one source file!");
   assert(Clang->getFrontendOpts().Inputs[0].getKind().getFormat() ==
@@ -2145,7 +2123,7 @@
   assert(Clang->getFrontendOpts().Inputs[0].getKind().getLanguage() !=
              InputKind::LLVM_IR &&
          "IR inputs not support here!");
-  
+
   // Use the source and file managers that we were given.
   Clang->setFileManager(&FileMgr);
   Clang->setSourceManager(&SourceMgr);
@@ -2191,8 +2169,16 @@
   // If the main file has been overridden due to the use of a preamble,
   // make that override happen and introduce the preamble.
   if (OverrideMainBuffer) {
-    assert(Preamble && "No preamble was built, but OverrideMainBuffer is not null");
-    Preamble->AddImplicitPreamble(Clang->getInvocation(), OverrideMainBuffer.get());
+    assert(Preamble &&
+           "No preamble was built, but OverrideMainBuffer is not null");
+
+    auto VFS = FileMgr.getVirtualFileSystem();
+    Preamble->AddImplicitPreamble(Clang->getInvocation(), VFS,
+                                  OverrideMainBuffer.get());
+    // FIXME: there is no way to update VFS if it was changed by
+    // AddImplicitPreamble as FileMgr is accepted as a parameter by this method.
+    // We use on-disk preambles instead and rely on FileMgr's VFS to ensure the
+    // PCH files are always readable.
     OwnedBuffers.push_back(OverrideMainBuffer.release());
   } else {
     PreprocessorOpts.PrecompiledPreambleBytes.first = 0;
@@ -2224,7 +2210,7 @@
   if (llvm::sys::fs::createUniqueFile(TempPath, fd, TempPath))
     return true;
 
-  // FIXME: Can we somehow regenerate the stat cache here, or do we need to 
+  // FIXME: Can we somehow regenerate the stat cache here, or do we need to
   // unconditionally create a stat cache when we parse the file?
   llvm::raw_fd_ostream Out(fd, /*shouldClose=*/true);
 
@@ -2327,7 +2313,7 @@
       FH.RemoveRange = CharSourceRange::getCharRange(BL, EL);
     }
 
-    Result.push_back(StoredDiagnostic(SD.Level, SD.ID, 
+    Result.push_back(StoredDiagnostic(SD.Level, SD.ID,
                                       SD.Message, Loc, Ranges, FixIts));
   }
   Result.swap(Out);
@@ -2335,7 +2321,7 @@
 
 void ASTUnit::addFileLevelDecl(Decl *D) {
   assert(D);
-  
+
   // We only care about local declarations.
   if (D->isFromASTFile())
     return;
@@ -2412,7 +2398,7 @@
       std::make_pair(Offset + Length, (Decl *)nullptr), llvm::less_first());
   if (EndIt != LocDecls.end())
     ++EndIt;
-  
+
   for (LocDeclsTy::iterator DIt = BeginIt; DIt != EndIt; ++DIt)
     Decls.push_back(DIt->second);
 }
@@ -2477,10 +2463,10 @@
   FileID FID;
   if (SourceMgr)
     FID = SourceMgr->getPreambleFileID();
-  
+
   if (Loc.isInvalid() || FID.isInvalid())
     return false;
-  
+
   return SourceMgr->isInFileID(Loc, FID);
 }
 
@@ -2488,10 +2474,10 @@
   FileID FID;
   if (SourceMgr)
     FID = SourceMgr->getMainFileID();
-  
+
   if (Loc.isInvalid() || FID.isInvalid())
     return false;
-  
+
   return SourceMgr->isInFileID(Loc, FID);
 }
 
@@ -2499,7 +2485,7 @@
   FileID FID;
   if (SourceMgr)
     FID = SourceMgr->getPreambleFileID();
-  
+
   if (FID.isInvalid())
     return SourceLocation();
 
@@ -2510,10 +2496,10 @@
   FileID FID;
   if (SourceMgr)
     FID = SourceMgr->getMainFileID();
-  
+
   if (FID.isInvalid())
     return SourceLocation();
-  
+
   return SourceMgr->getLocForStartOfFile(FID);
 }
 
diff --git a/lib/Frontend/CacheTokens.cpp b/lib/Frontend/CacheTokens.cpp
index 72e8f68..f3569ed 100644
--- a/lib/Frontend/CacheTokens.cpp
+++ b/lib/Frontend/CacheTokens.cpp
@@ -21,8 +21,8 @@
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Preprocessor.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -128,7 +128,7 @@
   typedef unsigned offset_type;
 
   static hash_value_type ComputeHash(PTHEntryKeyVariant V) {
-    return llvm::HashString(V.getString());
+    return llvm::djbHash(V.getString());
   }
 
   static std::pair<unsigned,unsigned>
@@ -625,7 +625,7 @@
   typedef unsigned offset_type;
 
   static hash_value_type ComputeHash(PTHIdKey* key) {
-    return llvm::HashString(key->II->getName());
+    return llvm::djbHash(key->II->getName());
   }
 
   static std::pair<unsigned,unsigned>
@@ -662,7 +662,8 @@
   //  (2) a map from (IdentifierInfo*, Offset)* -> persistent IDs
 
   // Note that we use 'calloc', so all the bytes are 0.
-  PTHIdKey *IIDMap = (PTHIdKey*)calloc(idcount, sizeof(PTHIdKey));
+  PTHIdKey *IIDMap = static_cast<PTHIdKey*>(
+      llvm::safe_calloc(idcount, sizeof(PTHIdKey)));
 
   // Create the hashtable.
   llvm::OnDiskChainedHashTableGenerator<PTHIdentifierTableTrait> IIOffMap;
diff --git a/lib/Frontend/CodeGenOptions.cpp b/lib/Frontend/CodeGenOptions.cpp
index 50bb9f9..84a39f2 100644
--- a/lib/Frontend/CodeGenOptions.cpp
+++ b/lib/Frontend/CodeGenOptions.cpp
@@ -17,7 +17,7 @@
 #define ENUM_CODEGENOPT(Name, Type, Bits, Default) set##Name(Default);
 #include "clang/Frontend/CodeGenOptions.def"
 
-  RelocationModel = "pic";
+  RelocationModel = llvm::Reloc::PIC_;
   memcpy(CoverageVersion, "402*", 4);
 }
 
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index 32f1232..306d7c0 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -1854,10 +1854,44 @@
 
   // Verify that the rest of the module path actually corresponds to
   // a submodule.
+  bool MapPrivateSubModToTopLevel = false;
   if (!getLangOpts().ModulesTS && Path.size() > 1) {
     for (unsigned I = 1, N = Path.size(); I != N; ++I) {
       StringRef Name = Path[I].first->getName();
       clang::Module *Sub = Module->findSubmodule(Name);
+
+      // If the user is requesting Foo.Private and it doesn't exist, try to
+      // match Foo_Private and emit a warning asking for the user to write
+      // @import Foo_Private instead. FIXME: remove this when existing clients
+      // migrate off of Foo.Private syntax.
+      if (!Sub && PP->getLangOpts().ImplicitModules && Name == "Private" &&
+          Module == Module->getTopLevelModule()) {
+        SmallString<128> PrivateModule(Module->Name);
+        PrivateModule.append("_Private");
+
+        SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> PrivPath;
+        auto &II = PP->getIdentifierTable().get(
+            PrivateModule, PP->getIdentifierInfo(Module->Name)->getTokenID());
+        PrivPath.push_back(std::make_pair(&II, Path[0].second));
+
+        if (PP->getHeaderSearchInfo().lookupModule(PrivateModule))
+          Sub =
+              loadModule(ImportLoc, PrivPath, Visibility, IsInclusionDirective);
+        if (Sub) {
+          MapPrivateSubModToTopLevel = true;
+          if (!getDiagnostics().isIgnored(
+                  diag::warn_no_priv_submodule_use_toplevel, ImportLoc)) {
+            getDiagnostics().Report(Path[I].second,
+                                    diag::warn_no_priv_submodule_use_toplevel)
+                << Path[I].first << Module->getFullModuleName() << PrivateModule
+                << SourceRange(Path[0].second, Path[I].second)
+                << FixItHint::CreateReplacement(SourceRange(Path[0].second),
+                                                PrivateModule);
+            getDiagnostics().Report(Sub->DefinitionLoc,
+                                    diag::note_private_top_level_defined);
+          }
+        }
+      }
       
       if (!Sub) {
         // Attempt to perform typo correction to find a module name that works.
@@ -1892,7 +1926,7 @@
           Sub = Module->findSubmodule(Best[0]);
         }
       }
-      
+
       if (!Sub) {
         // No submodule by this name. Complain, and don't look for further
         // submodules.
@@ -1909,7 +1943,7 @@
   // Make the named module visible, if it's not already part of the module
   // we are parsing.
   if (ModuleName != getLangOpts().CurrentModule) {
-    if (!Module->IsFromModuleFile) {
+    if (!Module->IsFromModuleFile && !MapPrivateSubModToTopLevel) {
       // We have an umbrella header or directory that doesn't actually include
       // all of the headers within the directory it covers. Complain about
       // this missing submodule and recover by forgetting that we ever saw
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 19e26c1..23f3c8f 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -305,6 +305,14 @@
     }
   }
 
+  llvm::raw_string_ostream os(Opts.FullCompilerInvocation);
+  for (unsigned i=0; i<Args.getNumInputArgStrings(); i++) {
+    if (i != 0)
+      os << " ";
+    os << Args.getArgString(i);
+  }
+  os.flush();
+
   return Success;
 }
 
@@ -330,15 +338,23 @@
   return "default";
 }
 
-static StringRef getRelocModel(ArgList &Args, DiagnosticsEngine &Diags) {
+static llvm::Reloc::Model getRelocModel(ArgList &Args,
+                                        DiagnosticsEngine &Diags) {
   if (Arg *A = Args.getLastArg(OPT_mrelocation_model)) {
     StringRef Value = A->getValue();
-    if (Value == "static" || Value == "pic" || Value == "ropi" ||
-        Value == "rwpi" || Value == "ropi-rwpi" || Value == "dynamic-no-pic")
-      return Value;
+    auto RM = llvm::StringSwitch<llvm::Optional<llvm::Reloc::Model>>(Value)
+                  .Case("static", llvm::Reloc::Static)
+                  .Case("pic", llvm::Reloc::PIC_)
+                  .Case("ropi", llvm::Reloc::ROPI)
+                  .Case("rwpi", llvm::Reloc::RWPI)
+                  .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
+                  .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC)
+                  .Default(None);
+    if (RM.hasValue())
+      return *RM;
     Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Value;
   }
-  return "pic";
+  return llvm::Reloc::PIC_;
 }
 
 /// \brief Create a new Regex instance out of the string value in \p RpassArg.
@@ -529,6 +545,7 @@
   Opts.DebugTypeExtRefs = Args.hasArg(OPT_dwarf_ext_refs);
   Opts.DebugExplicitImport = Args.hasArg(OPT_dwarf_explicit_import);
   Opts.DebugFwdTemplateParams = Args.hasArg(OPT_debug_forward_template_params);
+  Opts.EmbedSource = Args.hasArg(OPT_gembed_source);
 
   for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ))
     Opts.DebugPrefixMap.insert(StringRef(Arg).split('='));
@@ -546,6 +563,8 @@
     OPT_fuse_register_sized_bitfield_access);
   Opts.RelaxedAliasing = Args.hasArg(OPT_relaxed_aliasing);
   Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa);
+  Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) &&
+                           Args.hasArg(OPT_new_struct_path_tbaa);
   Opts.FineGrainedBitfieldAccesses =
       Args.hasFlag(OPT_ffine_grained_bitfield_accesses,
                    OPT_fno_fine_grained_bitfield_accesses, false);
@@ -621,6 +640,8 @@
   Opts.DisableFree = Args.hasArg(OPT_disable_free);
   Opts.DiscardValueNames = Args.hasArg(OPT_discard_value_names);
   Opts.DisableTailCalls = Args.hasArg(OPT_mdisable_tail_calls);
+  Opts.NoEscapingBlockTailCalls =
+      Args.hasArg(OPT_fno_escaping_block_tail_calls);
   Opts.FloatABI = Args.getLastArgValue(OPT_mfloat_abi);
   Opts.LessPreciseFPMAD = Args.hasArg(OPT_cl_mad_enable) ||
                           Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
@@ -637,9 +658,13 @@
                         Args.hasArg(OPT_cl_no_signed_zeros) ||
                         Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
                         Args.hasArg(OPT_cl_fast_relaxed_math));
+  Opts.Reassociate = Args.hasArg(OPT_mreassociate);
   Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
   Opts.CorrectlyRoundedDivSqrt =
       Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
+  Opts.UniformWGSize =
+      Args.hasArg(OPT_cl_uniform_work_group_size);
+  Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
   Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
   Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
@@ -653,6 +678,7 @@
       Args.hasArg(OPT_mincremental_linker_compatible);
   Opts.PIECopyRelocations =
       Args.hasArg(OPT_mpie_copy_relocations);
+  Opts.NoPLT = Args.hasArg(OPT_fno_plt);
   Opts.OmitLeafFramePointer = Args.hasArg(OPT_momit_leaf_frame_pointer);
   Opts.SaveTempLabels = Args.hasArg(OPT_msave_temp_labels);
   Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
@@ -677,6 +703,8 @@
                                        OPT_fno_function_sections, false);
   Opts.DataSections = Args.hasFlag(OPT_fdata_sections,
                                    OPT_fno_data_sections, false);
+  Opts.StackSizeSection =
+      Args.hasFlag(OPT_fstack_size_section, OPT_fno_stack_size_section, false);
   Opts.UniqueSectionNames = Args.hasFlag(OPT_funique_section_names,
                                          OPT_fno_unique_section_names, true);
 
@@ -709,9 +737,13 @@
   Opts.VectorizeLoop = Args.hasArg(OPT_vectorize_loops);
   Opts.VectorizeSLP = Args.hasArg(OPT_vectorize_slp);
 
+  Opts.PreferVectorWidth = Args.getLastArgValue(OPT_mprefer_vector_width_EQ);
+
   Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name);
   Opts.VerifyModule = !Args.hasArg(OPT_disable_llvm_verifier);
 
+  Opts.ControlFlowGuard = Args.hasArg(OPT_cfguard);
+
   Opts.DisableGCov = Args.hasArg(OPT_test_coverage);
   Opts.EmitGcovArcs = Args.hasArg(OPT_femit_coverage_data);
   Opts.EmitGcovNotes = Args.hasArg(OPT_femit_coverage_notes);
@@ -776,13 +808,34 @@
 
   Opts.PreserveVec3Type = Args.hasArg(OPT_fpreserve_vec3_type);
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
+  Opts.InstrumentFunctionsAfterInlining =
+      Args.hasArg(OPT_finstrument_functions_after_inlining);
+  Opts.InstrumentFunctionEntryBare =
+      Args.hasArg(OPT_finstrument_function_entry_bare);
   Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
+  Opts.XRayAlwaysEmitCustomEvents =
+      Args.hasArg(OPT_fxray_always_emit_customevents);
   Opts.XRayInstructionThreshold =
       getLastArgIntValue(Args, OPT_fxray_instruction_threshold_EQ, 200, Diags);
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
   Opts.CallFEntry = Args.hasArg(OPT_mfentry);
   Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info);
 
+  if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) {
+    StringRef Name = A->getValue();
+    if (Name == "full") {
+      Opts.CFProtectionReturn = 1;
+      Opts.CFProtectionBranch = 1;
+    } else if (Name == "return")
+      Opts.CFProtectionReturn = 1;
+    else if (Name == "branch")
+      Opts.CFProtectionBranch = 1;
+    else if (Name != "none") {
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
+      Success = false;
+    }
+  }
+
   if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections,
                                      OPT_compress_debug_sections_EQ)) {
     if (A->getOption().getID() == OPT_compress_debug_sections) {
@@ -839,7 +892,16 @@
                    false);
   Opts.SanitizeMinimalRuntime = Args.hasArg(OPT_fsanitize_minimal_runtime);
   Opts.SanitizeCfiCrossDso = Args.hasArg(OPT_fsanitize_cfi_cross_dso);
+  Opts.SanitizeCfiICallGeneralizePointers =
+      Args.hasArg(OPT_fsanitize_cfi_icall_generalize_pointers);
   Opts.SanitizeStats = Args.hasArg(OPT_fsanitize_stats);
+  if (Arg *A = Args.getLastArg(
+          OPT_fsanitize_address_poison_class_member_array_new_cookie,
+          OPT_fno_sanitize_address_poison_class_member_array_new_cookie)) {
+    Opts.SanitizeAddressPoisonClassMemberArrayNewCookie =
+        A->getOption().getID() ==
+        OPT_fsanitize_address_poison_class_member_array_new_cookie;
+  }
   if (Arg *A = Args.getLastArg(OPT_fsanitize_address_use_after_scope,
                                OPT_fno_sanitize_address_use_after_scope)) {
     Opts.SanitizeAddressUseAfterScope =
@@ -864,6 +926,8 @@
     Opts.StackProbeSize = StackProbeSize;
   }
 
+  Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe);
+
   if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
     StringRef Name = A->getValue();
     unsigned Method = llvm::StringSwitch<unsigned>(Name)
@@ -880,8 +944,12 @@
     }
   }
 
-  Opts.EmulatedTLS =
-      Args.hasFlag(OPT_femulated_tls, OPT_fno_emulated_tls, false);
+  if (Args.getLastArg(OPT_femulated_tls) ||
+      Args.getLastArg(OPT_fno_emulated_tls)) {
+    Opts.ExplicitEmulatedTLS = true;
+    Opts.EmulatedTLS =
+        Args.hasFlag(OPT_femulated_tls, OPT_fno_emulated_tls, false);
+  }
 
   if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
     StringRef Name = A->getValue();
@@ -984,8 +1052,8 @@
                       Args.getAllArgValues(OPT_fsanitize_trap_EQ), Diags,
                       Opts.SanitizeTrap);
 
-  Opts.CudaGpuBinaryFileNames =
-      Args.getAllArgValues(OPT_fcuda_include_gpubinary);
+  Opts.CudaGpuBinaryFileName =
+      Args.getLastArgValue(OPT_fcuda_include_gpubinary);
 
   Opts.Backchain = Args.hasArg(OPT_mbackchain);
 
@@ -1057,6 +1125,26 @@
           llvm::sys::Process::StandardErrHasColors());
 }
 
+static bool checkVerifyPrefixes(const std::vector<std::string> &VerifyPrefixes,
+                                DiagnosticsEngine *Diags) {
+  bool Success = true;
+  for (const auto &Prefix : VerifyPrefixes) {
+    // Every prefix must start with a letter and contain only alphanumeric
+    // characters, hyphens, and underscores.
+    auto BadChar = std::find_if(Prefix.begin(), Prefix.end(),
+                                [](char C){return !isAlphanumeric(C)
+                                                  && C != '-' && C != '_';});
+    if (BadChar != Prefix.end() || !isLetter(Prefix[0])) {
+      Success = false;
+      if (Diags) {
+        Diags->Report(diag::err_drv_invalid_value) << "-verify=" << Prefix;
+        Diags->Report(diag::note_drv_verify_prefix_spelling);
+      }
+    }
+  }
+  return Success;
+}
+
 bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
                                 DiagnosticsEngine *Diags,
                                 bool DefaultDiagColor, bool DefaultShowOpt) {
@@ -1144,7 +1232,18 @@
   Opts.ShowSourceRanges = Args.hasArg(OPT_fdiagnostics_print_source_range_info);
   Opts.ShowParseableFixits = Args.hasArg(OPT_fdiagnostics_parseable_fixits);
   Opts.ShowPresumedLoc = !Args.hasArg(OPT_fno_diagnostics_use_presumed_location);
-  Opts.VerifyDiagnostics = Args.hasArg(OPT_verify);
+  Opts.VerifyDiagnostics = Args.hasArg(OPT_verify) || Args.hasArg(OPT_verify_EQ);
+  Opts.VerifyPrefixes = Args.getAllArgValues(OPT_verify_EQ);
+  if (Args.hasArg(OPT_verify))
+    Opts.VerifyPrefixes.push_back("expected");
+  // Keep VerifyPrefixes in its original order for the sake of diagnostics, and
+  // then sort it to prepare for fast lookup using std::binary_search.
+  if (!checkVerifyPrefixes(Opts.VerifyPrefixes, Diags)) {
+    Opts.VerifyDiagnostics = false;
+    Success = false;
+  }
+  else
+    std::sort(Opts.VerifyPrefixes.begin(), Opts.VerifyPrefixes.end());
   DiagnosticLevelMask DiagMask = DiagnosticLevelMask::None;
   Success &= parseDiagnosticLevelMask("-verify-ignore-unexpected=",
     Args.getAllArgValues(OPT_verify_ignore_unexpected_EQ),
@@ -1277,6 +1376,8 @@
       Opts.ProgramAction = frontend::PrintPreamble; break;
     case OPT_E:
       Opts.ProgramAction = frontend::PrintPreprocessedInput; break;
+    case OPT_templight_dump:
+      Opts.ProgramAction = frontend::TemplightDump; break;
     case OPT_rewrite_macros:
       Opts.ProgramAction = frontend::RewriteMacros; break;
     case OPT_rewrite_objc:
@@ -1366,6 +1467,8 @@
     = Args.hasArg(OPT_code_completion_patterns);
   Opts.CodeCompleteOpts.IncludeGlobals
     = !Args.hasArg(OPT_no_code_completion_globals);
+  Opts.CodeCompleteOpts.IncludeNamespaceLevelDecls
+    = !Args.hasArg(OPT_no_code_completion_ns_level_decls);
   Opts.CodeCompleteOpts.IncludeBriefComments
     = Args.hasArg(OPT_code_completion_brief_comments);
 
@@ -1717,11 +1820,7 @@
       break;
     case InputKind::CXX:
     case InputKind::ObjCXX:
-      // The PS4 uses C++11 as the default C++ standard.
-      if (T.isPS4())
-        LangStd = LangStandard::lang_gnucxx11;
-      else
-        LangStd = LangStandard::lang_gnucxx98;
+      LangStd = LangStandard::lang_gnucxx14;
       break;
     case InputKind::RenderScript:
       LangStd = LangStandard::lang_c99;
@@ -1733,10 +1832,11 @@
   Opts.LineComment = Std.hasLineComments();
   Opts.C99 = Std.isC99();
   Opts.C11 = Std.isC11();
+  Opts.C17 = Std.isC17();
   Opts.CPlusPlus = Std.isCPlusPlus();
   Opts.CPlusPlus11 = Std.isCPlusPlus11();
   Opts.CPlusPlus14 = Std.isCPlusPlus14();
-  Opts.CPlusPlus1z = Std.isCPlusPlus1z();
+  Opts.CPlusPlus17 = Std.isCPlusPlus17();
   Opts.CPlusPlus2a = Std.isCPlusPlus2a();
   Opts.Digraphs = Std.hasDigraphs();
   Opts.GNUMode = Std.isGNUMode();
@@ -1792,7 +1892,7 @@
   Opts.GNUKeywords = Opts.GNUMode;
   Opts.CXXOperatorNames = Opts.CPlusPlus;
 
-  Opts.AlignedAllocation = Opts.CPlusPlus1z;
+  Opts.AlignedAllocation = Opts.CPlusPlus17;
 
   Opts.DollarIdents = !Opts.AsmPreprocessor;
 }
@@ -1992,6 +2092,8 @@
   if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
     Opts.CUDADeviceApproxTranscendentals = 1;
 
+  Opts.CUDARelocatableDeviceCode = Args.hasArg(OPT_fcuda_rdc);
+
   if (Opts.ObjC1) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
       StringRef value = arg->getValue();
@@ -2111,7 +2213,7 @@
   // Mimicing gcc's behavior, trigraphs are only enabled if -trigraphs
   // is specified, or -std is set to a conforming mode.
   // Trigraphs are disabled by default in c++1z onwards.
-  Opts.Trigraphs = !Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus1z;
+  Opts.Trigraphs = !Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus17;
   Opts.Trigraphs =
       Args.hasFlag(OPT_ftrigraphs, OPT_fno_trigraphs, Opts.Trigraphs);
 
@@ -2131,7 +2233,18 @@
   Opts.Exceptions = Args.hasArg(OPT_fexceptions);
   Opts.ObjCExceptions = Args.hasArg(OPT_fobjc_exceptions);
   Opts.CXXExceptions = Args.hasArg(OPT_fcxx_exceptions);
-  Opts.SjLjExceptions = Args.hasArg(OPT_fsjlj_exceptions);
+
+  // Handle exception personalities
+  Arg *A = Args.getLastArg(options::OPT_fsjlj_exceptions,
+                           options::OPT_fseh_exceptions,
+                           options::OPT_fdwarf_exceptions);
+  if (A) {
+    const Option &Opt = A->getOption();
+    Opts.SjLjExceptions = Opt.matches(options::OPT_fsjlj_exceptions);
+    Opts.SEHExceptions = Opt.matches(options::OPT_fseh_exceptions);
+    Opts.DWARFExceptions = Opt.matches(options::OPT_fdwarf_exceptions);
+  }
+
   Opts.ExternCNoUnwind = Args.hasArg(OPT_fexternc_nounwind);
   Opts.TraditionalCPP = Args.hasArg(OPT_traditional_cpp);
 
@@ -2141,7 +2254,7 @@
     && Opts.OpenCLVersion >= 200);
   Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
   Opts.CoroutinesTS = Args.hasArg(OPT_fcoroutines_ts);
-  
+
   // Enable [[]] attributes in C++11 by default.
   Opts.DoubleSquareBracketAttributes =
       Args.hasFlag(OPT_fdouble_square_bracket_attributes,
@@ -2308,12 +2421,12 @@
   // Check for MS default calling conventions being specified.
   if (Arg *A = Args.getLastArg(OPT_fdefault_calling_conv_EQ)) {
     LangOptions::DefaultCallingConvention DefaultCC =
-        llvm::StringSwitch<LangOptions::DefaultCallingConvention>(
-            A->getValue())
+        llvm::StringSwitch<LangOptions::DefaultCallingConvention>(A->getValue())
             .Case("cdecl", LangOptions::DCC_CDecl)
             .Case("fastcall", LangOptions::DCC_FastCall)
             .Case("stdcall", LangOptions::DCC_StdCall)
             .Case("vectorcall", LangOptions::DCC_VectorCall)
+            .Case("regcall", LangOptions::DCC_RegCall)
             .Default(LangOptions::DCC_None);
     if (DefaultCC == LangOptions::DCC_None)
       Diags.Report(diag::err_drv_invalid_value)
@@ -2324,7 +2437,8 @@
     bool emitError = (DefaultCC == LangOptions::DCC_FastCall ||
                       DefaultCC == LangOptions::DCC_StdCall) &&
                      Arch != llvm::Triple::x86;
-    emitError |= DefaultCC == LangOptions::DCC_VectorCall &&
+    emitError |= (DefaultCC == LangOptions::DCC_VectorCall ||
+                  DefaultCC == LangOptions::DCC_RegCall) &&
                  !(Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64);
     if (emitError)
       Diags.Report(diag::err_drv_argument_not_allowed_with)
@@ -2350,16 +2464,22 @@
 
   // Check if -fopenmp is specified.
   Opts.OpenMP = Args.hasArg(options::OPT_fopenmp) ? 1 : 0;
+  // Check if -fopenmp-simd is specified.
+  Opts.OpenMPSimd = !Opts.OpenMP && Args.hasFlag(options::OPT_fopenmp_simd,
+                                                 options::OPT_fno_openmp_simd,
+                                                 /*Default=*/false);
   Opts.OpenMPUseTLS =
       Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
   Opts.OpenMPIsDevice =
       Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
 
-  if (Opts.OpenMP) {
-    int Version =
-        getLastArgIntValue(Args, OPT_fopenmp_version_EQ, Opts.OpenMP, Diags);
-    if (Version != 0)
+  if (Opts.OpenMP || Opts.OpenMPSimd) {
+    if (int Version =
+            getLastArgIntValue(Args, OPT_fopenmp_version_EQ,
+                               Opts.OpenMPSimd ? 45 : Opts.OpenMP, Diags))
       Opts.OpenMP = Version;
+    else if (Opts.OpenMPSimd)
+      Opts.OpenMP = 45;
     // Provide diagnostic when a given target is not expected to be an OpenMP
     // device or host.
     if (!Opts.OpenMPIsDevice) {
@@ -2412,6 +2532,10 @@
           << Opts.OMPHostIRFile;
   }
 
+  // set CUDA mode for OpenMP target NVPTX if specified in options
+  Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
+                        Args.hasArg(options::OPT_fopenmp_cuda_mode);
+
   // Record whether the __DEPRECATED define was requested.
   Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro,
                                  OPT_fno_deprecated_macro,
@@ -2481,6 +2605,11 @@
   Opts.XRayInstrument =
       Args.hasFlag(OPT_fxray_instrument, OPT_fnoxray_instrument, false);
 
+  // -fxray-always-emit-customevents
+  Opts.XRayAlwaysEmitCustomEvents =
+      Args.hasFlag(OPT_fxray_always_emit_customevents,
+                   OPT_fnoxray_always_emit_customevents, false);
+
   // -fxray-{always,never}-instrument= filenames.
   Opts.XRayAlwaysInstrumentFiles =
       Args.getAllArgValues(OPT_fxray_always_instrument);
@@ -2517,6 +2646,7 @@
   case frontend::RewriteObjC:
   case frontend::RewriteTest:
   case frontend::RunAnalysis:
+  case frontend::TemplightDump:
   case frontend::MigrateSource:
     return false;
 
@@ -2533,7 +2663,6 @@
 }
 
 static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
-                                  FileManager &FileMgr,
                                   DiagnosticsEngine &Diags,
                                   frontend::ActionKind Action) {
   using namespace options;
@@ -2659,11 +2788,11 @@
   Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature);
   Opts.LinkerVersion = Args.getLastArgValue(OPT_target_linker_version);
   Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple));
-  Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   // Use the default target triple if unspecified.
   if (Opts.Triple.empty())
     Opts.Triple = llvm::sys::getDefaultTargetTriple();
   Opts.OpenCLExtensionsAsWritten = Args.getAllArgValues(OPT_cl_ext_EQ);
+  Opts.ForceEnableInt128 = Args.hasArg(OPT_fforce_enable_int128);
 }
 
 bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
@@ -2690,7 +2819,13 @@
 
   // Issue errors on unknown arguments.
   for (const Arg *A : Args.filtered(OPT_UNKNOWN)) {
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
+    auto ArgString = A->getAsString(Args);
+    std::string Nearest;
+    if (Opts->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1)
+      Diags.Report(diag::err_drv_unknown_argument) << ArgString;
+    else
+      Diags.Report(diag::err_drv_unknown_argument_with_suggestion)
+          << ArgString << Nearest;
     Success = false;
   }
 
@@ -2750,12 +2885,7 @@
       !LangOpts.Sanitize.has(SanitizerKind::Address) &&
       !LangOpts.Sanitize.has(SanitizerKind::Memory);
 
-  // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
-  // PCH file and find the original header name. Remove the need to do that in
-  // ParsePreprocessorArgs and remove the FileManager
-  // parameters from the function and the "FileManager.h" #include.
-  FileManager FileMgr(Res.getFileSystemOpts());
-  ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, FileMgr, Diags,
+  ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, Diags,
                         Res.getFrontendOpts().ProgramAction);
   ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), Args,
                               Res.getFrontendOpts().ProgramAction);
diff --git a/lib/Frontend/FrontendAction.cpp b/lib/Frontend/FrontendAction.cpp
index 12226b2..1b4d789 100644
--- a/lib/Frontend/FrontendAction.cpp
+++ b/lib/Frontend/FrontendAction.cpp
@@ -858,6 +858,13 @@
       CI.getDiagnostics().Report(diag::err_module_map_not_found) << Filename;
   }
 
+  // Add a module declaration scope so that modules from -fmodule-map-file
+  // arguments may shadow modules found implicitly in search paths.
+  CI.getPreprocessor()
+      .getHeaderSearchInfo()
+      .getModuleMap()
+      .finishModuleDeclarationScope();
+
   // If we were asked to load any module files, do so now.
   for (const auto &ModuleFile : CI.getFrontendOpts().ModuleFiles)
     if (!CI.loadModuleFile(ModuleFile))
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 86460f1..1eff566 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -18,17 +18,36 @@
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <memory>
 #include <system_error>
 
 using namespace clang;
 
+namespace {
+CodeCompleteConsumer *GetCodeCompletionConsumer(CompilerInstance &CI) {
+  return CI.hasCodeCompletionConsumer() ? &CI.getCodeCompletionConsumer()
+                                        : nullptr;
+}
+
+void EnsureSemaIsCreated(CompilerInstance &CI, FrontendAction &Action) {
+  if (Action.hasCodeCompletionSupport() &&
+      !CI.getFrontendOpts().CodeCompletionAt.FileName.empty())
+    CI.createCodeCompletionConsumer();
+
+  if (!CI.hasSema())
+    CI.createSema(Action.getTranslationUnitKind(),
+                  GetCodeCompletionConsumer(CI));
+}
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // Custom Actions
 //===----------------------------------------------------------------------===//
@@ -80,9 +99,12 @@
 std::unique_ptr<ASTConsumer>
 GeneratePCHAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::string Sysroot;
+  if (!ComputeASTConsumerArguments(CI, /*ref*/ Sysroot))
+    return nullptr;
+
   std::string OutputFile;
   std::unique_ptr<raw_pwrite_stream> OS =
-      ComputeASTConsumerArguments(CI, InFile, Sysroot, OutputFile);
+      CreateOutputFile(CI, InFile, /*ref*/ OutputFile);
   if (!OS)
     return nullptr;
 
@@ -103,17 +125,20 @@
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
-std::unique_ptr<raw_pwrite_stream>
-GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
-                                               StringRef InFile,
-                                               std::string &Sysroot,
-                                               std::string &OutputFile) {
+bool GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
+                                                    std::string &Sysroot) {
   Sysroot = CI.getHeaderSearchOpts().Sysroot;
   if (CI.getFrontendOpts().RelocatablePCH && Sysroot.empty()) {
     CI.getDiagnostics().Report(diag::err_relocatable_without_isysroot);
-    return nullptr;
+    return false;
   }
 
+  return true;
+}
+
+std::unique_ptr<llvm::raw_pwrite_stream>
+GeneratePCHAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile,
+                                    std::string &OutputFile) {
   // We use createOutputFile here because this is exposed via libclang, and we
   // must disable the RemoveFileOnSignal behavior.
   // We use a temporary to avoid race conditions.
@@ -256,6 +281,140 @@
 }
 
 namespace {
+struct TemplightEntry {
+  std::string Name;
+  std::string Kind;
+  std::string Event;
+  std::string DefinitionLocation;
+  std::string PointOfInstantiation;
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<TemplightEntry> {
+  static void mapping(IO &io, TemplightEntry &fields) {
+    io.mapRequired("name", fields.Name);
+    io.mapRequired("kind", fields.Kind);
+    io.mapRequired("event", fields.Event);
+    io.mapRequired("orig", fields.DefinitionLocation);
+    io.mapRequired("poi", fields.PointOfInstantiation);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+class DefaultTemplateInstCallback : public TemplateInstantiationCallback {
+  using CodeSynthesisContext = Sema::CodeSynthesisContext;
+
+public:
+  void initialize(const Sema &) override {}
+
+  void finalize(const Sema &) override {}
+
+  void atTemplateBegin(const Sema &TheSema,
+                       const CodeSynthesisContext &Inst) override {
+    displayTemplightEntry<true>(llvm::outs(), TheSema, Inst);
+  }
+
+  void atTemplateEnd(const Sema &TheSema,
+                     const CodeSynthesisContext &Inst) override {
+    displayTemplightEntry<false>(llvm::outs(), TheSema, Inst);
+  }
+
+private:
+  static std::string toString(CodeSynthesisContext::SynthesisKind Kind) {
+    switch (Kind) {
+    case CodeSynthesisContext::TemplateInstantiation:
+      return "TemplateInstantiation";
+    case CodeSynthesisContext::DefaultTemplateArgumentInstantiation:
+      return "DefaultTemplateArgumentInstantiation";
+    case CodeSynthesisContext::DefaultFunctionArgumentInstantiation:
+      return "DefaultFunctionArgumentInstantiation";
+    case CodeSynthesisContext::ExplicitTemplateArgumentSubstitution:
+      return "ExplicitTemplateArgumentSubstitution";
+    case CodeSynthesisContext::DeducedTemplateArgumentSubstitution:
+      return "DeducedTemplateArgumentSubstitution";
+    case CodeSynthesisContext::PriorTemplateArgumentSubstitution:
+      return "PriorTemplateArgumentSubstitution";
+    case CodeSynthesisContext::DefaultTemplateArgumentChecking:
+      return "DefaultTemplateArgumentChecking";
+    case CodeSynthesisContext::ExceptionSpecInstantiation:
+      return "ExceptionSpecInstantiation";
+    case CodeSynthesisContext::DeclaringSpecialMember:
+      return "DeclaringSpecialMember";
+    case CodeSynthesisContext::DefiningSynthesizedFunction:
+      return "DefiningSynthesizedFunction";
+    case CodeSynthesisContext::Memoization:
+      return "Memoization";
+    }
+    return "";
+  }
+
+  template <bool BeginInstantiation>
+  static void displayTemplightEntry(llvm::raw_ostream &Out, const Sema &TheSema,
+                                    const CodeSynthesisContext &Inst) {
+    std::string YAML;
+    {
+      llvm::raw_string_ostream OS(YAML);
+      llvm::yaml::Output YO(OS);
+      TemplightEntry Entry =
+          getTemplightEntry<BeginInstantiation>(TheSema, Inst);
+      llvm::yaml::EmptyContext Context;
+      llvm::yaml::yamlize(YO, Entry, true, Context);
+    }
+    Out << "---" << YAML << "\n";
+  }
+
+  template <bool BeginInstantiation>
+  static TemplightEntry getTemplightEntry(const Sema &TheSema,
+                                          const CodeSynthesisContext &Inst) {
+    TemplightEntry Entry;
+    Entry.Kind = toString(Inst.Kind);
+    Entry.Event = BeginInstantiation ? "Begin" : "End";
+    if (auto *NamedTemplate = dyn_cast_or_null<NamedDecl>(Inst.Entity)) {
+      llvm::raw_string_ostream OS(Entry.Name);
+      NamedTemplate->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      const PresumedLoc DefLoc = 
+        TheSema.getSourceManager().getPresumedLoc(Inst.Entity->getLocation());
+      if(!DefLoc.isInvalid())
+        Entry.DefinitionLocation = std::string(DefLoc.getFilename()) + ":" +
+                                   std::to_string(DefLoc.getLine()) + ":" +
+                                   std::to_string(DefLoc.getColumn());
+    }
+    const PresumedLoc PoiLoc =
+        TheSema.getSourceManager().getPresumedLoc(Inst.PointOfInstantiation);
+    if (!PoiLoc.isInvalid()) {
+      Entry.PointOfInstantiation = std::string(PoiLoc.getFilename()) + ":" +
+                                   std::to_string(PoiLoc.getLine()) + ":" +
+                                   std::to_string(PoiLoc.getColumn());
+    }
+    return Entry;
+  }
+};
+} // namespace
+
+std::unique_ptr<ASTConsumer>
+TemplightDumpAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
+  return llvm::make_unique<ASTConsumer>();
+}
+
+void TemplightDumpAction::ExecuteAction() {
+  CompilerInstance &CI = getCompilerInstance();
+
+  // This part is normally done by ASTFrontEndAction, but needs to happen
+  // before Templight observers can be created
+  // FIXME: Move the truncation aspect of this into Sema, we delayed this till
+  // here so the source manager would be initialized.
+  EnsureSemaIsCreated(CI, *this);
+
+  CI.getSema().TemplateInstCallbacks.push_back(
+      llvm::make_unique<DefaultTemplateInstCallback>());
+  ASTFrontendAction::ExecuteAction();
+}
+
+namespace {
   /// \brief AST reader listener that dumps module information for a module
   /// file.
   class DumpModuleInfoListener : public ASTReaderListener {
diff --git a/lib/Frontend/InitHeaderSearch.cpp b/lib/Frontend/InitHeaderSearch.cpp
index 8c6face..0506975 100644
--- a/lib/Frontend/InitHeaderSearch.cpp
+++ b/lib/Frontend/InitHeaderSearch.cpp
@@ -216,6 +216,7 @@
     case llvm::Triple::NaCl:
     case llvm::Triple::PS4:
     case llvm::Triple::ELFIAMCU:
+    case llvm::Triple::Fuchsia:
       break;
     case llvm::Triple::Win32:
       if (triple.getEnvironment() != llvm::Triple::Cygnus)
@@ -255,6 +256,7 @@
 
   switch (os) {
   case llvm::Triple::Linux:
+  case llvm::Triple::Solaris:
     llvm_unreachable("Include management is handled in the driver.");
 
   case llvm::Triple::CloudABI: {
@@ -321,6 +323,7 @@
   case llvm::Triple::RTEMS:
   case llvm::Triple::NaCl:
   case llvm::Triple::ELFIAMCU:
+  case llvm::Triple::Fuchsia:
     break;
   case llvm::Triple::PS4: {
     // <isysroot> gets prepended later in AddPath().
@@ -396,6 +399,7 @@
 
   switch (os) {
   case llvm::Triple::Linux:
+  case llvm::Triple::Solaris:
     llvm_unreachable("Include management is handled in the driver.");
     break;
   case llvm::Triple::Win32:
@@ -443,6 +447,7 @@
     break; // Everything else continues to use this routine's logic.
 
   case llvm::Triple::Linux:
+  case llvm::Triple::Solaris:
     return;
 
   case llvm::Triple::Win32:
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index 1dfbf18..321d963 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -193,7 +193,7 @@
 /// the width, suffix, and signedness of the given type
 static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty,
                            const TargetInfo &TI, MacroBuilder &Builder) {
-  DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), 
+  DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty),
                  TI.isTypeSigned(Ty), Builder);
 }
 
@@ -303,25 +303,25 @@
 
 /// \brief Add definitions required for a smooth interaction between
 /// Objective-C++ automated reference counting and libstdc++ (4.2).
-static void AddObjCXXARCLibstdcxxDefines(const LangOptions &LangOpts, 
+static void AddObjCXXARCLibstdcxxDefines(const LangOptions &LangOpts,
                                          MacroBuilder &Builder) {
   Builder.defineMacro("_GLIBCXX_PREDEFINED_OBJC_ARC_IS_SCALAR");
-  
+
   std::string Result;
   {
-    // Provide specializations for the __is_scalar type trait so that 
+    // Provide specializations for the __is_scalar type trait so that
     // lifetime-qualified objects are not considered "scalar" types, which
     // libstdc++ uses as an indicator of the presence of trivial copy, assign,
     // default-construct, and destruct semantics (none of which hold for
     // lifetime-qualified objects in ARC).
     llvm::raw_string_ostream Out(Result);
-    
+
     Out << "namespace std {\n"
         << "\n"
         << "struct __true_type;\n"
         << "struct __false_type;\n"
         << "\n";
-    
+
     Out << "template<typename _Tp> struct __is_scalar;\n"
         << "\n";
 
@@ -333,7 +333,7 @@
           << "};\n"
           << "\n";
     }
-      
+
     if (LangOpts.ObjCWeak) {
       Out << "template<typename _Tp>\n"
           << "struct __is_scalar<__attribute__((objc_ownership(weak))) _Tp> {\n"
@@ -342,7 +342,7 @@
           << "};\n"
           << "\n";
     }
-    
+
     if (LangOpts.ObjCAutoRefCount) {
       Out << "template<typename _Tp>\n"
           << "struct __is_scalar<__attribute__((objc_ownership(autoreleasing)))"
@@ -352,7 +352,7 @@
           << "};\n"
           << "\n";
     }
-      
+
     Out << "}\n";
   }
   Builder.append(Result);
@@ -370,7 +370,9 @@
     Builder.defineMacro("__STDC_HOSTED__");
 
   if (!LangOpts.CPlusPlus) {
-    if (LangOpts.C11)
+    if (LangOpts.C17)
+      Builder.defineMacro("__STDC_VERSION__", "201710L");
+    else if (LangOpts.C11)
       Builder.defineMacro("__STDC_VERSION__", "201112L");
     else if (LangOpts.C99)
       Builder.defineMacro("__STDC_VERSION__", "199901L");
@@ -383,7 +385,7 @@
     // C++17 [cpp.predefined]p1:
     //   The name __cplusplus is defined to the value 201703L when compiling a
     //   C++ translation unit.
-    else if (LangOpts.CPlusPlus1z)
+    else if (LangOpts.CPlusPlus17)
       Builder.defineMacro("__cplusplus", "201703L");
     // C++1y [cpp.predefined]p1:
     //   The name __cplusplus is defined to the value 201402L when compiling a
@@ -483,12 +485,12 @@
     Builder.defineMacro("__cpp_user_defined_literals", "200809");
     Builder.defineMacro("__cpp_lambdas", "200907");
     Builder.defineMacro("__cpp_constexpr",
-                        LangOpts.CPlusPlus1z ? "201603" : 
+                        LangOpts.CPlusPlus17 ? "201603" :
                         LangOpts.CPlusPlus14 ? "201304" : "200704");
     Builder.defineMacro("__cpp_range_based_for",
-                        LangOpts.CPlusPlus1z ? "201603" : "200907");
+                        LangOpts.CPlusPlus17 ? "201603" : "200907");
     Builder.defineMacro("__cpp_static_assert",
-                        LangOpts.CPlusPlus1z ? "201411" : "200410");
+                        LangOpts.CPlusPlus17 ? "201411" : "200410");
     Builder.defineMacro("__cpp_decltype", "200707");
     Builder.defineMacro("__cpp_attributes", "200809");
     Builder.defineMacro("__cpp_rvalue_references", "200610");
@@ -518,7 +520,7 @@
     Builder.defineMacro("__cpp_sized_deallocation", "201309");
 
   // C++17 features.
-  if (LangOpts.CPlusPlus1z) {
+  if (LangOpts.CPlusPlus17) {
     Builder.defineMacro("__cpp_hex_float", "201603");
     Builder.defineMacro("__cpp_inline_variables", "201606");
     Builder.defineMacro("__cpp_noexcept_function_type", "201510");
@@ -559,7 +561,7 @@
   Builder.defineMacro("__clang_patchlevel__", TOSTR(CLANG_VERSION_PATCHLEVEL));
 #undef TOSTR
 #undef TOSTR2
-  Builder.defineMacro("__clang_version__", 
+  Builder.defineMacro("__clang_version__",
                       "\"" CLANG_VERSION_STRING " "
                       + getClangFullRepositoryVersion() + "\"");
   if (!LangOpts.MSVCCompat) {
@@ -599,7 +601,7 @@
   // As sad as it is, enough software depends on the __VERSION__ for version
   // checks that it is necessary to report 4.2.1 (the base GCC version we claim
   // compatibility with) first.
-  Builder.defineMacro("__VERSION__", "\"4.2.1 Compatible " + 
+  Builder.defineMacro("__VERSION__", "\"4.2.1 Compatible " +
                       Twine(getClangFullCPPVersion()) + "\"");
 
   // Initialize language-specific preprocessor defines.
@@ -614,7 +616,7 @@
   if (LangOpts.ObjC1) {
     if (LangOpts.ObjCRuntime.isNonFragile()) {
       Builder.defineMacro("__OBJC2__");
-      
+
       if (LangOpts.ObjCExceptions)
         Builder.defineMacro("OBJC_ZEROCOST_EXCEPTIONS");
     }
@@ -677,8 +679,14 @@
     Builder.defineMacro("__EXCEPTIONS");
   if (!LangOpts.MSVCCompat && LangOpts.RTTI)
     Builder.defineMacro("__GXX_RTTI");
+
   if (LangOpts.SjLjExceptions)
     Builder.defineMacro("__USING_SJLJ_EXCEPTIONS__");
+  else if (LangOpts.SEHExceptions)
+    Builder.defineMacro("__SEH__");
+  else if (LangOpts.DWARFExceptions &&
+          (TI.getTriple().isThumb() || TI.getTriple().isARM()))
+    Builder.defineMacro("__ARM_DWARF_EH__");
 
   if (LangOpts.Deprecated)
     Builder.defineMacro("__DEPRECATED");
@@ -745,6 +753,7 @@
   DefineTypeSize("__LONG_MAX__", TargetInfo::SignedLong, TI, Builder);
   DefineTypeSize("__LONG_LONG_MAX__", TargetInfo::SignedLongLong, TI, Builder);
   DefineTypeSize("__WCHAR_MAX__", TI.getWCharType(), TI, Builder);
+  DefineTypeSize("__WINT_MAX__", TI.getWIntType(), TI, Builder);
   DefineTypeSize("__INTMAX_MAX__", TI.getIntMaxType(), TI, Builder);
   DefineTypeSize("__SIZE_MAX__", TI.getSizeType(), TI, Builder);
 
@@ -987,6 +996,11 @@
     Builder.defineMacro("__nullable", "_Nullable");
   }
 
+  // Add a macro to differentiate between regular iOS/tvOS/watchOS targets and
+  // the corresponding simulator targets.
+  if (TI.getTriple().isOSDarwin() && TI.getTriple().isSimulatorEnvironment())
+    Builder.defineMacro("__APPLE_EMBEDDED_SIMULATOR__", "1");
+
   // OpenMP definition
   // OpenMP 2.2:
   //   In implementations that support a preprocessor, the _OPENMP
@@ -1003,8 +1017,8 @@
     Builder.defineMacro("_OPENMP", "201511");
     break;
   default:
-    // Default version is OpenMP 3.1
-    Builder.defineMacro("_OPENMP", "201107");
+    // Default version is OpenMP 3.1, in Simd only mode - 4.5
+    Builder.defineMacro("_OPENMP", LangOpts.OpenMPSimd ? "201511" : "201107");
     break;
   }
 
@@ -1028,6 +1042,10 @@
         LangOpts.OpenCLVersion)) \
       Builder.defineMacro(#Ext);
 #include "clang/Basic/OpenCLExtensions.def"
+
+    auto Arch = TI.getTriple().getArch();
+    if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64)
+      Builder.defineMacro("__IMAGE_SUPPORT__");
   }
 
   if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) {
@@ -1086,7 +1104,7 @@
       }
     }
   }
-  
+
   // Even with predefines off, some macros are still predefined.
   // These should all be defined in the preprocessor according to the
   // current language configuration.
@@ -1132,7 +1150,7 @@
   // Instruct the preprocessor to skip the preamble.
   PP.setSkipMainFilePreamble(InitOpts.PrecompiledPreambleBytes.first,
                              InitOpts.PrecompiledPreambleBytes.second);
-                          
+
   // Copy PredefinedBuffer into the Preprocessor.
   PP.setPredefines(Predefines.str());
 }
diff --git a/lib/Frontend/MultiplexConsumer.cpp b/lib/Frontend/MultiplexConsumer.cpp
index 16000bc..04a8f6c 100644
--- a/lib/Frontend/MultiplexConsumer.cpp
+++ b/lib/Frontend/MultiplexConsumer.cpp
@@ -119,12 +119,13 @@
                               const FunctionDecl *Delete,
                               Expr *ThisArg) override;
   void CompletedImplicitDefinition(const FunctionDecl *D) override;
-  void StaticDataMemberInstantiated(const VarDecl *D) override;
+  void InstantiationRequested(const ValueDecl *D) override;
+  void VariableDefinitionInstantiated(const VarDecl *D) override;
+  void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void DefaultArgumentInstantiated(const ParmVarDecl *D) override;
   void DefaultMemberInitializerInstantiated(const FieldDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
-  void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
   void DeclarationMarkedOpenMPDeclareTarget(const Decl *D,
@@ -193,10 +194,19 @@
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->CompletedImplicitDefinition(D);
 }
-void MultiplexASTMutationListener::StaticDataMemberInstantiated(
-                                                             const VarDecl *D) {
+void MultiplexASTMutationListener::InstantiationRequested(const ValueDecl *D) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
-    Listeners[i]->StaticDataMemberInstantiated(D);
+    Listeners[i]->InstantiationRequested(D);
+}
+void MultiplexASTMutationListener::VariableDefinitionInstantiated(
+    const VarDecl *D) {
+  for (size_t i = 0, e = Listeners.size(); i != e; ++i)
+    Listeners[i]->VariableDefinitionInstantiated(D);
+}
+void MultiplexASTMutationListener::FunctionDefinitionInstantiated(
+    const FunctionDecl *D) {
+  for (auto &Listener : Listeners)
+    Listener->FunctionDefinitionInstantiated(D);
 }
 void MultiplexASTMutationListener::DefaultArgumentInstantiated(
                                                          const ParmVarDecl *D) {
@@ -214,11 +224,6 @@
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->AddedObjCCategoryToInterface(CatD, IFD);
 }
-void MultiplexASTMutationListener::FunctionDefinitionInstantiated(
-    const FunctionDecl *D) {
-  for (auto &Listener : Listeners)
-    Listener->FunctionDefinitionInstantiated(D);
-}
 void MultiplexASTMutationListener::DeclarationMarkedUsed(const Decl *D) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->DeclarationMarkedUsed(D);
diff --git a/lib/Frontend/PrecompiledPreamble.cpp b/lib/Frontend/PrecompiledPreamble.cpp
index 81466d0..d761f6e 100644
--- a/lib/Frontend/PrecompiledPreamble.cpp
+++ b/lib/Frontend/PrecompiledPreamble.cpp
@@ -24,16 +24,45 @@
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Process.h"
+#include <limits>
+#include <utility>
 
 using namespace clang;
 
 namespace {
 
+StringRef getInMemoryPreamblePath() {
+#if defined(LLVM_ON_UNIX)
+  return "/__clang_tmp/___clang_inmemory_preamble___";
+#elif defined(LLVM_ON_WIN32)
+  return "C:\\__clang_tmp\\___clang_inmemory_preamble___";
+#else
+#warning "Unknown platform. Defaulting to UNIX-style paths for in-memory PCHs"
+  return "/__clang_tmp/___clang_inmemory_preamble___";
+#endif
+}
+
+IntrusiveRefCntPtr<vfs::FileSystem>
+createVFSOverlayForPreamblePCH(StringRef PCHFilename,
+                               std::unique_ptr<llvm::MemoryBuffer> PCHBuffer,
+                               IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
+  // We want only the PCH file from the real filesystem to be available,
+  // so we create an in-memory VFS with just that and overlay it on top.
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> PCHFS(
+      new vfs::InMemoryFileSystem());
+  PCHFS->addFile(PCHFilename, 0, std::move(PCHBuffer));
+  IntrusiveRefCntPtr<vfs::OverlayFileSystem> Overlay(
+      new vfs::OverlayFileSystem(VFS));
+  Overlay->pushOverlay(PCHFS);
+  return Overlay;
+}
+
 /// Keeps a track of files to be deleted in destructor.
 class TemporaryFiles {
 public:
@@ -86,23 +115,11 @@
   llvm::sys::fs::remove(File);
 }
 
-class PreambleMacroCallbacks : public PPCallbacks {
-public:
-  PreambleMacroCallbacks(PreambleCallbacks &Callbacks) : Callbacks(Callbacks) {}
-
-  void MacroDefined(const Token &MacroNameTok,
-                    const MacroDirective *MD) override {
-    Callbacks.HandleMacroDefined(MacroNameTok, MD);
-  }
-
-private:
-  PreambleCallbacks &Callbacks;
-};
-
 class PrecompilePreambleAction : public ASTFrontendAction {
 public:
-  PrecompilePreambleAction(PreambleCallbacks &Callbacks)
-      : Callbacks(Callbacks) {}
+  PrecompilePreambleAction(std::string *InMemStorage,
+                           PreambleCallbacks &Callbacks)
+      : InMemStorage(InMemStorage), Callbacks(Callbacks) {}
 
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override;
@@ -123,6 +140,7 @@
   friend class PrecompilePreambleConsumer;
 
   bool HasEmittedPreamblePCH = false;
+  std::string *InMemStorage;
   PreambleCallbacks &Callbacks;
 };
 
@@ -164,21 +182,24 @@
 
 std::unique_ptr<ASTConsumer>
 PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
-
                                             StringRef InFile) {
   std::string Sysroot;
-  std::string OutputFile;
-  std::unique_ptr<raw_ostream> OS =
-      GeneratePCHAction::ComputeASTConsumerArguments(CI, InFile, Sysroot,
-                                                     OutputFile);
+  if (!GeneratePCHAction::ComputeASTConsumerArguments(CI, Sysroot))
+    return nullptr;
+
+  std::unique_ptr<llvm::raw_ostream> OS;
+  if (InMemStorage) {
+    OS = llvm::make_unique<llvm::raw_string_ostream>(*InMemStorage);
+  } else {
+    std::string OutputFile;
+    OS = GeneratePCHAction::CreateOutputFile(CI, InFile, OutputFile);
+  }
   if (!OS)
     return nullptr;
 
   if (!CI.getFrontendOpts().RelocatablePCH)
     Sysroot.clear();
 
-  CI.getPreprocessor().addPPCallbacks(
-      llvm::make_unique<PreambleMacroCallbacks>(Callbacks));
   return llvm::make_unique<PrecompilePreambleConsumer>(
       *this, CI.getPreprocessor(), Sysroot, std::move(OS));
 }
@@ -202,7 +223,7 @@
     const CompilerInvocation &Invocation,
     const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
     DiagnosticsEngine &Diagnostics, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
-    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps, bool StoreInMemory,
     PreambleCallbacks &Callbacks) {
   assert(VFS && "VFS is null");
 
@@ -214,12 +235,19 @@
   PreprocessorOptions &PreprocessorOpts =
       PreambleInvocation->getPreprocessorOpts();
 
-  // Create a temporary file for the precompiled preamble. In rare
-  // circumstances, this can fail.
-  llvm::ErrorOr<PrecompiledPreamble::TempPCHFile> PreamblePCHFile =
-      PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile();
-  if (!PreamblePCHFile)
-    return BuildPreambleError::CouldntCreateTempFile;
+  llvm::Optional<TempPCHFile> TempFile;
+  if (!StoreInMemory) {
+    // Create a temporary file for the precompiled preamble. In rare
+    // circumstances, this can fail.
+    llvm::ErrorOr<PrecompiledPreamble::TempPCHFile> PreamblePCHFile =
+        PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile();
+    if (!PreamblePCHFile)
+      return BuildPreambleError::CouldntCreateTempFile;
+    TempFile = std::move(*PreamblePCHFile);
+  }
+
+  PCHStorage Storage = StoreInMemory ? PCHStorage(InMemoryPreamble())
+                                     : PCHStorage(std::move(*TempFile));
 
   // Save the preamble text for later; we'll need to compare against it for
   // subsequent reparses.
@@ -230,8 +258,8 @@
 
   // Tell the compiler invocation to generate a temporary precompiled header.
   FrontendOpts.ProgramAction = frontend::GeneratePCH;
-  // FIXME: Generate the precompiled header into memory?
-  FrontendOpts.OutputFile = PreamblePCHFile->getFilePath();
+  FrontendOpts.OutputFile = StoreInMemory ? getInMemoryPreamblePath()
+                                          : Storage.asFile().getFilePath();
   PreprocessorOpts.PrecompiledPreambleBytes.first = 0;
   PreprocessorOpts.PrecompiledPreambleBytes.second = false;
   // Inform preprocessor to record conditional stack when building the preamble.
@@ -303,10 +331,17 @@
   }
 
   std::unique_ptr<PrecompilePreambleAction> Act;
-  Act.reset(new PrecompilePreambleAction(Callbacks));
+  Act.reset(new PrecompilePreambleAction(
+      StoreInMemory ? &Storage.asMemory().Data : nullptr, Callbacks));
+  Callbacks.BeforeExecute(*Clang);
   if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0]))
     return BuildPreambleError::BeginSourceFileFailed;
 
+  std::unique_ptr<PPCallbacks> DelegatedPPCallbacks =
+      Callbacks.createPPCallbacks();
+  if (DelegatedPPCallbacks)
+    Clang->getPreprocessor().addPPCallbacks(std::move(DelegatedPPCallbacks));
+
   Act->Execute();
 
   // Run the callbacks.
@@ -337,15 +372,36 @@
     }
   }
 
-  return PrecompiledPreamble(
-      std::move(*PreamblePCHFile), std::move(PreambleBytes),
-      PreambleEndsAtStartOfLine, std::move(FilesInPreamble));
+  return PrecompiledPreamble(std::move(Storage), std::move(PreambleBytes),
+                             PreambleEndsAtStartOfLine,
+                             std::move(FilesInPreamble));
 }
 
 PreambleBounds PrecompiledPreamble::getBounds() const {
   return PreambleBounds(PreambleBytes.size(), PreambleEndsAtStartOfLine);
 }
 
+std::size_t PrecompiledPreamble::getSize() const {
+  switch (Storage.getKind()) {
+  case PCHStorage::Kind::Empty:
+    assert(false && "Calling getSize() on invalid PrecompiledPreamble. "
+                    "Was it std::moved?");
+    return 0;
+  case PCHStorage::Kind::InMemory:
+    return Storage.asMemory().Data.size();
+  case PCHStorage::Kind::TempFile: {
+    uint64_t Result;
+    if (llvm::sys::fs::file_size(Storage.asFile().getFilePath(), Result))
+      return 0;
+
+    assert(Result <= std::numeric_limits<std::size_t>::max() &&
+           "file size did not fit into size_t");
+    return Result;
+  }
+  }
+  llvm_unreachable("Unhandled storage kind");
+}
+
 bool PrecompiledPreamble::CanReuse(const CompilerInvocation &Invocation,
                                    const llvm::MemoryBuffer *MainFileBuffer,
                                    PreambleBounds Bounds,
@@ -427,27 +483,28 @@
 }
 
 void PrecompiledPreamble::AddImplicitPreamble(
-    CompilerInvocation &CI, llvm::MemoryBuffer *MainFileBuffer) const {
-  auto &PreprocessorOpts = CI.getPreprocessorOpts();
+    CompilerInvocation &CI, IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
+    llvm::MemoryBuffer *MainFileBuffer) const {
+  PreambleBounds Bounds(PreambleBytes.size(), PreambleEndsAtStartOfLine);
+  configurePreamble(Bounds, CI, VFS, MainFileBuffer);
+}
 
-  // Configure ImpicitPCHInclude.
-  PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleBytes.size();
-  PreprocessorOpts.PrecompiledPreambleBytes.second = PreambleEndsAtStartOfLine;
-  PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath();
-  PreprocessorOpts.DisablePCHValidation = true;
-
-  // Remap main file to point to MainFileBuffer.
-  auto MainFilePath = CI.getFrontendOpts().Inputs[0].getFile();
-  PreprocessorOpts.addRemappedFile(MainFilePath, MainFileBuffer);
+void PrecompiledPreamble::OverridePreamble(
+    CompilerInvocation &CI, IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
+    llvm::MemoryBuffer *MainFileBuffer) const {
+  auto Bounds = ComputePreambleBounds(*CI.getLangOpts(), MainFileBuffer, 0);
+  configurePreamble(Bounds, CI, VFS, MainFileBuffer);
 }
 
 PrecompiledPreamble::PrecompiledPreamble(
-    TempPCHFile PCHFile, std::vector<char> PreambleBytes,
+    PCHStorage Storage, std::vector<char> PreambleBytes,
     bool PreambleEndsAtStartOfLine,
     llvm::StringMap<PreambleFileHash> FilesInPreamble)
-    : PCHFile(std::move(PCHFile)), FilesInPreamble(FilesInPreamble),
+    : Storage(std::move(Storage)), FilesInPreamble(std::move(FilesInPreamble)),
       PreambleBytes(std::move(PreambleBytes)),
-      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {
+  assert(this->Storage.getKind() != PCHStorage::Kind::Empty);
+}
 
 llvm::ErrorOr<PrecompiledPreamble::TempPCHFile>
 PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() {
@@ -465,11 +522,10 @@
                                                         StringRef Suffix) {
   llvm::SmallString<64> File;
   // Using a version of createTemporaryFile with a file descriptor guarantees
-  // that we would never get a race condition in a multi-threaded setting (i.e.,
-  // multiple threads getting the same temporary path).
+  // that we would never get a race condition in a multi-threaded setting
+  // (i.e., multiple threads getting the same temporary path).
   int FD;
-  auto EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, /*ref*/ FD,
-                                               /*ref*/ File);
+  auto EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, FD, File);
   if (EC)
     return EC;
   // We only needed to make sure the file exists, close the file right away.
@@ -515,6 +571,87 @@
   return *FilePath;
 }
 
+PrecompiledPreamble::PCHStorage::PCHStorage(TempPCHFile File)
+    : StorageKind(Kind::TempFile) {
+  new (&asFile()) TempPCHFile(std::move(File));
+}
+
+PrecompiledPreamble::PCHStorage::PCHStorage(InMemoryPreamble Memory)
+    : StorageKind(Kind::InMemory) {
+  new (&asMemory()) InMemoryPreamble(std::move(Memory));
+}
+
+PrecompiledPreamble::PCHStorage::PCHStorage(PCHStorage &&Other) : PCHStorage() {
+  *this = std::move(Other);
+}
+
+PrecompiledPreamble::PCHStorage &PrecompiledPreamble::PCHStorage::
+operator=(PCHStorage &&Other) {
+  destroy();
+
+  StorageKind = Other.StorageKind;
+  switch (StorageKind) {
+  case Kind::Empty:
+    // do nothing;
+    break;
+  case Kind::TempFile:
+    new (&asFile()) TempPCHFile(std::move(Other.asFile()));
+    break;
+  case Kind::InMemory:
+    new (&asMemory()) InMemoryPreamble(std::move(Other.asMemory()));
+    break;
+  }
+
+  Other.setEmpty();
+  return *this;
+}
+
+PrecompiledPreamble::PCHStorage::~PCHStorage() { destroy(); }
+
+PrecompiledPreamble::PCHStorage::Kind
+PrecompiledPreamble::PCHStorage::getKind() const {
+  return StorageKind;
+}
+
+PrecompiledPreamble::TempPCHFile &PrecompiledPreamble::PCHStorage::asFile() {
+  assert(getKind() == Kind::TempFile);
+  return *reinterpret_cast<TempPCHFile *>(Storage.buffer);
+}
+
+const PrecompiledPreamble::TempPCHFile &
+PrecompiledPreamble::PCHStorage::asFile() const {
+  return const_cast<PCHStorage *>(this)->asFile();
+}
+
+PrecompiledPreamble::InMemoryPreamble &
+PrecompiledPreamble::PCHStorage::asMemory() {
+  assert(getKind() == Kind::InMemory);
+  return *reinterpret_cast<InMemoryPreamble *>(Storage.buffer);
+}
+
+const PrecompiledPreamble::InMemoryPreamble &
+PrecompiledPreamble::PCHStorage::asMemory() const {
+  return const_cast<PCHStorage *>(this)->asMemory();
+}
+
+void PrecompiledPreamble::PCHStorage::destroy() {
+  switch (StorageKind) {
+  case Kind::Empty:
+    return;
+  case Kind::TempFile:
+    asFile().~TempPCHFile();
+    return;
+  case Kind::InMemory:
+    asMemory().~InMemoryPreamble();
+    return;
+  }
+}
+
+void PrecompiledPreamble::PCHStorage::setEmpty() {
+  destroy();
+  StorageKind = Kind::Empty;
+}
+
 PrecompiledPreamble::PreambleFileHash
 PrecompiledPreamble::PreambleFileHash::createForFile(off_t Size,
                                                      time_t ModTime) {
@@ -539,11 +676,69 @@
   return Result;
 }
 
+void PrecompiledPreamble::configurePreamble(
+    PreambleBounds Bounds, CompilerInvocation &CI,
+    IntrusiveRefCntPtr<vfs::FileSystem> &VFS,
+    llvm::MemoryBuffer *MainFileBuffer) const {
+  assert(VFS);
+
+  auto &PreprocessorOpts = CI.getPreprocessorOpts();
+
+  // Remap main file to point to MainFileBuffer.
+  auto MainFilePath = CI.getFrontendOpts().Inputs[0].getFile();
+  PreprocessorOpts.addRemappedFile(MainFilePath, MainFileBuffer);
+
+  // Configure ImpicitPCHInclude.
+  PreprocessorOpts.PrecompiledPreambleBytes.first = Bounds.Size;
+  PreprocessorOpts.PrecompiledPreambleBytes.second =
+      Bounds.PreambleEndsAtStartOfLine;
+  PreprocessorOpts.DisablePCHValidation = true;
+
+  setupPreambleStorage(Storage, PreprocessorOpts, VFS);
+}
+
+void PrecompiledPreamble::setupPreambleStorage(
+    const PCHStorage &Storage, PreprocessorOptions &PreprocessorOpts,
+    IntrusiveRefCntPtr<vfs::FileSystem> &VFS) {
+  if (Storage.getKind() == PCHStorage::Kind::TempFile) {
+    const TempPCHFile &PCHFile = Storage.asFile();
+    PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath();
+
+    // Make sure we can access the PCH file even if we're using a VFS
+    IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem();
+    auto PCHPath = PCHFile.getFilePath();
+    if (VFS == RealFS || VFS->exists(PCHPath))
+      return;
+    auto Buf = RealFS->getBufferForFile(PCHPath);
+    if (!Buf) {
+      // We can't read the file even from RealFS, this is clearly an error,
+      // but we'll just leave the current VFS as is and let clang's code
+      // figure out what to do with missing PCH.
+      return;
+    }
+
+    // We have a slight inconsistency here -- we're using the VFS to
+    // read files, but the PCH was generated in the real file system.
+    VFS = createVFSOverlayForPreamblePCH(PCHPath, std::move(*Buf), VFS);
+  } else {
+    assert(Storage.getKind() == PCHStorage::Kind::InMemory);
+    // For in-memory preamble, we have to provide a VFS overlay that makes it
+    // accessible.
+    StringRef PCHPath = getInMemoryPreamblePath();
+    PreprocessorOpts.ImplicitPCHInclude = PCHPath;
+
+    auto Buf = llvm::MemoryBuffer::getMemBuffer(Storage.asMemory().Data);
+    VFS = createVFSOverlayForPreamblePCH(PCHPath, std::move(Buf), VFS);
+  }
+}
+
+void PreambleCallbacks::BeforeExecute(CompilerInstance &CI) {}
 void PreambleCallbacks::AfterExecute(CompilerInstance &CI) {}
 void PreambleCallbacks::AfterPCHEmitted(ASTWriter &Writer) {}
 void PreambleCallbacks::HandleTopLevelDecl(DeclGroupRef DG) {}
-void PreambleCallbacks::HandleMacroDefined(const Token &MacroNameTok,
-                                           const MacroDirective *MD) {}
+std::unique_ptr<PPCallbacks> PreambleCallbacks::createPPCallbacks() {
+  return nullptr;
+}
 
 std::error_code clang::make_error_code(BuildPreambleError Error) {
   return std::error_code(static_cast<int>(Error), BuildPreambleErrorCategory());
diff --git a/lib/Frontend/PrintPreprocessedOutput.cpp b/lib/Frontend/PrintPreprocessedOutput.cpp
index 2e02329..36ecdad 100644
--- a/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -752,7 +752,7 @@
     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
                Tok.getLiteralData()) {
       OS.write(Tok.getLiteralData(), Tok.getLength());
-    } else if (Tok.getLength() < 256) {
+    } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
       OS.write(TokPtr, Len);
diff --git a/lib/Frontend/Rewrite/HTMLPrint.cpp b/lib/Frontend/Rewrite/HTMLPrint.cpp
index 11e431d..34ee967 100644
--- a/lib/Frontend/Rewrite/HTMLPrint.cpp
+++ b/lib/Frontend/Rewrite/HTMLPrint.cpp
@@ -86,8 +86,7 @@
 
   // Emit the HTML.
   const RewriteBuffer &RewriteBuf = R.getEditBuffer(FID);
-  char *Buffer = (char*)malloc(RewriteBuf.size());
-  std::copy(RewriteBuf.begin(), RewriteBuf.end(), Buffer);
-  Out->write(Buffer, RewriteBuf.size());
-  free(Buffer);
+  std::unique_ptr<char[]> Buffer(new char[RewriteBuf.size()]);
+  std::copy(RewriteBuf.begin(), RewriteBuf.end(), Buffer.get());
+  Out->write(Buffer.get(), RewriteBuf.size());
 }
diff --git a/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 1954b24..681e876 100644
--- a/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -1714,7 +1714,7 @@
   else {
     DeclRefExpr *DR = cast<DeclRefExpr>(S->getElement());
     elementName = DR->getDecl()->getName();
-    ValueDecl *VD = cast<ValueDecl>(DR->getDecl());
+    ValueDecl *VD = DR->getDecl();
     if (VD->getType()->isObjCQualifiedIdType() ||
         VD->getType()->isObjCQualifiedInterfaceType())
       // Simply use 'id' for all qualified types.
@@ -2590,7 +2590,7 @@
   Expr *Unop = new (Context) UnaryOperator(DRE, UO_AddrOf,
                                  Context->getPointerType(DRE->getType()),
                                            VK_RValue, OK_Ordinary,
-                                           SourceLocation());
+                                           SourceLocation(), false);
   // cast to NSConstantString *
   CastExpr *cast = NoTypeInfoCStyleCastExpr(Context, Exp->getType(),
                                             CK_CPointerToObjCPointerCast, Unop);
@@ -3295,7 +3295,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                              VK_RValue, OK_Ordinary,
-                                             SourceLocation());
+                                             SourceLocation(), false);
       SuperRep = NoTypeInfoCStyleCastExpr(Context,
                                           Context->getPointerType(superType),
                                           CK_BitCast, SuperRep);
@@ -3313,7 +3313,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                              VK_RValue, OK_Ordinary,
-                                             SourceLocation());
+                                             SourceLocation(), false);
     }
     MsgExprs.push_back(SuperRep);
     break;
@@ -3389,7 +3389,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                VK_RValue, OK_Ordinary,
-                               SourceLocation());
+                               SourceLocation(), false);
       SuperRep = NoTypeInfoCStyleCastExpr(Context,
                                Context->getPointerType(superType),
                                CK_BitCast, SuperRep);
@@ -4720,7 +4720,7 @@
       return DRE;
   Expr *Exp = new (Context) UnaryOperator(DRE, UO_Deref, DRE->getType(),
                                           VK_LValue, OK_Ordinary,
-                                          DRE->getLocation());
+                                          DRE->getLocation(), false);
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), 
                                           Exp);
@@ -5314,7 +5314,7 @@
                                 UO_AddrOf,
                                 Context->getPointerType(Context->VoidPtrTy), 
                                 VK_RValue, OK_Ordinary,
-                                SourceLocation());
+                                SourceLocation(), false);
   InitExprs.push_back(DescRefExpr); 
   
   // Add initializers for any closure decl refs.
@@ -5332,7 +5332,8 @@
           QualType QT = (*I)->getType();
           QT = Context->getPointerType(QT);
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf, QT, VK_RValue,
-                                            OK_Ordinary, SourceLocation());
+                                            OK_Ordinary, SourceLocation(),
+                                            false);
         }
       } else if (isTopLevelBlockPointerType((*I)->getType())) {
         FD = SynthBlockInitFunctionDecl((*I)->getName());
@@ -5348,7 +5349,8 @@
           QualType QT = (*I)->getType();
           QT = Context->getPointerType(QT);
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf, QT, VK_RValue,
-                                            OK_Ordinary, SourceLocation());
+                                            OK_Ordinary, SourceLocation(),
+                                            false);
         }
         
       }
@@ -5388,7 +5390,8 @@
       if (!isNestedCapturedVar)
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf,
                                      Context->getPointerType(Exp->getType()),
-                                     VK_RValue, OK_Ordinary, SourceLocation());
+                                     VK_RValue, OK_Ordinary, SourceLocation(),
+                                     false);
       Exp = NoTypeInfoCStyleCastExpr(Context, castT, CK_BitCast, Exp);
       InitExprs.push_back(Exp);
     }
@@ -5414,7 +5417,7 @@
   
   NewRep = new (Context) UnaryOperator(NewRep, UO_AddrOf,
                              Context->getPointerType(NewRep->getType()),
-                             VK_RValue, OK_Ordinary, SourceLocation());
+                             VK_RValue, OK_Ordinary, SourceLocation(), false);
   NewRep = NoTypeInfoCStyleCastExpr(Context, FType, CK_BitCast,
                                     NewRep);
   // Put Paren around the call.
@@ -7558,7 +7561,7 @@
       
       Expr *Exp = new (Context) UnaryOperator(castExpr, UO_Deref, IvarT,
                                               VK_LValue, OK_Ordinary,
-                                              SourceLocation());
+                                              SourceLocation(), false);
       PE = new (Context) ParenExpr(OldRange.getBegin(),
                                    OldRange.getEnd(),
                                    Exp);
diff --git a/lib/Frontend/Rewrite/RewriteObjC.cpp b/lib/Frontend/Rewrite/RewriteObjC.cpp
index 096b81b..9938f89 100644
--- a/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -1502,7 +1502,7 @@
   else {
     DeclRefExpr *DR = cast<DeclRefExpr>(S->getElement());
     elementName = DR->getDecl()->getName();
-    ValueDecl *VD = cast<ValueDecl>(DR->getDecl());
+    ValueDecl *VD = DR->getDecl();
     if (VD->getType()->isObjCQualifiedIdType() ||
         VD->getType()->isObjCQualifiedInterfaceType())
       // Simply use 'id' for all qualified types.
@@ -2511,7 +2511,7 @@
   Expr *Unop = new (Context) UnaryOperator(DRE, UO_AddrOf,
                                  Context->getPointerType(DRE->getType()),
                                            VK_RValue, OK_Ordinary,
-                                           SourceLocation());
+                                           SourceLocation(), false);
   // cast to NSConstantString *
   CastExpr *cast = NoTypeInfoCStyleCastExpr(Context, Exp->getType(),
                                             CK_CPointerToObjCPointerCast, Unop);
@@ -2712,7 +2712,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                              VK_RValue, OK_Ordinary,
-                                             SourceLocation());
+                                             SourceLocation(), false);
       SuperRep = NoTypeInfoCStyleCastExpr(Context,
                                           Context->getPointerType(superType),
                                           CK_BitCast, SuperRep);
@@ -2730,7 +2730,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                              VK_RValue, OK_Ordinary,
-                                             SourceLocation());
+                                             SourceLocation(), false);
     }
     MsgExprs.push_back(SuperRep);
     break;
@@ -2806,7 +2806,7 @@
       SuperRep = new (Context) UnaryOperator(SuperRep, UO_AddrOf,
                                Context->getPointerType(SuperRep->getType()),
                                VK_RValue, OK_Ordinary,
-                               SourceLocation());
+                               SourceLocation(), false);
       SuperRep = NoTypeInfoCStyleCastExpr(Context,
                                Context->getPointerType(superType),
                                CK_BitCast, SuperRep);
@@ -3045,7 +3045,7 @@
                                                VK_LValue, SourceLocation());
   Expr *DerefExpr = new (Context) UnaryOperator(DRE, UO_AddrOf,
                              Context->getPointerType(DRE->getType()),
-                             VK_RValue, OK_Ordinary, SourceLocation());
+                             VK_RValue, OK_Ordinary, SourceLocation(), false);
   CastExpr *castExpr = NoTypeInfoCStyleCastExpr(Context, DerefExpr->getType(),
                                                 CK_BitCast,
                                                 DerefExpr);
@@ -3875,7 +3875,7 @@
       return DRE;
   Expr *Exp = new (Context) UnaryOperator(DRE, UO_Deref, DRE->getType(),
                                           VK_LValue, OK_Ordinary,
-                                          DRE->getLocation());
+                                          DRE->getLocation(), false);
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), 
                                           Exp);
@@ -4438,7 +4438,7 @@
                                 UO_AddrOf,
                                 Context->getPointerType(Context->VoidPtrTy), 
                                 VK_RValue, OK_Ordinary,
-                                SourceLocation());
+                                SourceLocation(), false);
   InitExprs.push_back(DescRefExpr); 
   
   // Add initializers for any closure decl refs.
@@ -4456,7 +4456,8 @@
           QualType QT = (*I)->getType();
           QT = Context->getPointerType(QT);
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf, QT, VK_RValue,
-                                            OK_Ordinary, SourceLocation());
+                                            OK_Ordinary, SourceLocation(),
+                                            false);
         }
       } else if (isTopLevelBlockPointerType((*I)->getType())) {
         FD = SynthBlockInitFunctionDecl((*I)->getName());
@@ -4472,7 +4473,8 @@
           QualType QT = (*I)->getType();
           QT = Context->getPointerType(QT);
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf, QT, VK_RValue,
-                                            OK_Ordinary, SourceLocation());
+                                            OK_Ordinary, SourceLocation(),
+                                            false);
         }
       }
       InitExprs.push_back(Exp);
@@ -4509,9 +4511,9 @@
       // captured nested byref variable has its address passed. Do not take
       // its address again.
       if (!isNestedCapturedVar)
-          Exp = new (Context) UnaryOperator(Exp, UO_AddrOf,
-                                     Context->getPointerType(Exp->getType()),
-                                     VK_RValue, OK_Ordinary, SourceLocation());
+        Exp = new (Context) UnaryOperator(
+            Exp, UO_AddrOf, Context->getPointerType(Exp->getType()), VK_RValue,
+            OK_Ordinary, SourceLocation(), false);
       Exp = NoTypeInfoCStyleCastExpr(Context, castT, CK_BitCast, Exp);
       InitExprs.push_back(Exp);
     }
@@ -4529,7 +4531,7 @@
                                   FType, VK_LValue, SourceLocation());
   NewRep = new (Context) UnaryOperator(NewRep, UO_AddrOf,
                              Context->getPointerType(NewRep->getType()),
-                             VK_RValue, OK_Ordinary, SourceLocation());
+                             VK_RValue, OK_Ordinary, SourceLocation(), false);
   NewRep = NoTypeInfoCStyleCastExpr(Context, FType, CK_BitCast,
                                     NewRep);
   BlockDeclRefs.clear();
diff --git a/lib/Frontend/TextDiagnosticBuffer.cpp b/lib/Frontend/TextDiagnosticBuffer.cpp
index d49e983..2885073 100644
--- a/lib/Frontend/TextDiagnosticBuffer.cpp
+++ b/lib/Frontend/TextDiagnosticBuffer.cpp
@@ -30,34 +30,45 @@
   default: llvm_unreachable(
                          "Diagnostic not handled during diagnostic buffering!");
   case DiagnosticsEngine::Note:
+    All.emplace_back(Level, Notes.size());
     Notes.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Warning:
+    All.emplace_back(Level, Warnings.size());
     Warnings.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Remark:
+    All.emplace_back(Level, Remarks.size());
     Remarks.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Error:
   case DiagnosticsEngine::Fatal:
+    All.emplace_back(Level, Errors.size());
     Errors.emplace_back(Info.getLocation(), Buf.str());
     break;
   }
 }
 
 void TextDiagnosticBuffer::FlushDiagnostics(DiagnosticsEngine &Diags) const {
-  // FIXME: Flush the diagnostics in order.
-  for (const_iterator it = err_begin(), ie = err_end(); it != ie; ++it)
-    Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0"))
-        << it->second;
-  for (const_iterator it = warn_begin(), ie = warn_end(); it != ie; ++it)
-    Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Warning, "%0"))
-        << it->second;
-  for (const_iterator it = remark_begin(), ie = remark_end(); it != ie; ++it)
-    Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Remark, "%0"))
-        << it->second;
-  for (const_iterator it = note_begin(), ie = note_end(); it != ie; ++it)
-    Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Note, "%0"))
-        << it->second;
+  for (auto it = All.begin(), ie = All.end(); it != ie; ++it) {
+    auto Diag = Diags.Report(Diags.getCustomDiagID(it->first, "%0"));
+    switch (it->first) {
+    default: llvm_unreachable(
+                           "Diagnostic not handled during diagnostic flushing!");
+    case DiagnosticsEngine::Note:
+      Diag << Notes[it->second].second;
+      break;
+    case DiagnosticsEngine::Warning:
+      Diag << Warnings[it->second].second;
+      break;
+    case DiagnosticsEngine::Remark:
+      Diag << Remarks[it->second].second;
+      break;
+    case DiagnosticsEngine::Error:
+    case DiagnosticsEngine::Fatal:
+      Diag << Errors[it->second].second;
+      break;
+    }
+  }
 }
 
diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp
index a49c424..0df5393 100644
--- a/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -229,22 +229,52 @@
     return true;
   }
 
-  // Return true if string literal is found.
-  // When true, P marks begin-position of S in content.
-  bool Search(StringRef S, bool EnsureStartOfWord = false) {
+  // Return true if string literal S is matched in content.
+  // When true, P marks begin-position of the match, and calling Advance sets C
+  // to end-position of the match.
+  // If S is the empty string, then search for any letter instead (makes sense
+  // with FinishDirectiveToken=true).
+  // If EnsureStartOfWord, then skip matches that don't start a new word.
+  // If FinishDirectiveToken, then assume the match is the start of a comment
+  // directive for -verify, and extend the match to include the entire first
+  // token of that directive.
+  bool Search(StringRef S, bool EnsureStartOfWord = false,
+              bool FinishDirectiveToken = false) {
     do {
-      P = std::search(C, End, S.begin(), S.end());
-      PEnd = P + S.size();
+      if (!S.empty()) {
+        P = std::search(C, End, S.begin(), S.end());
+        PEnd = P + S.size();
+      }
+      else {
+        P = C;
+        while (P != End && !isLetter(*P))
+          ++P;
+        PEnd = P + 1;
+      }
       if (P == End)
         break;
-      if (!EnsureStartOfWord
-            // Check if string literal starts a new word.
-            || P == Begin || isWhitespace(P[-1])
-            // Or it could be preceded by the start of a comment.
-            || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
-                                &&  P[-2] == '/'))
-        return true;
-      // Otherwise, skip and search again.
+      // If not start of word but required, skip and search again.
+      if (EnsureStartOfWord
+               // Check if string literal starts a new word.
+          && !(P == Begin || isWhitespace(P[-1])
+               // Or it could be preceded by the start of a comment.
+               || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
+                                   &&  P[-2] == '/')))
+        continue;
+      if (FinishDirectiveToken) {
+        while (PEnd != End && (isAlphanumeric(*PEnd)
+                               || *PEnd == '-' || *PEnd == '_'))
+          ++PEnd;
+        // Put back trailing digits and hyphens to be parsed later as a count
+        // or count range.  Because -verify prefixes must start with letters,
+        // we know the actual directive we found starts with a letter, so
+        // we won't put back the entire directive word and thus record an empty
+        // string.
+        assert(isLetter(*P) && "-verify prefix must start with a letter");
+        while (isDigit(PEnd[-1]) || PEnd[-1] == '-')
+          --PEnd;
+      }
+      return true;
     } while (Advance());
     return false;
   }
@@ -314,37 +344,68 @@
   // A single comment may contain multiple directives.
   bool FoundDirective = false;
   for (ParseHelper PH(S); !PH.Done();) {
-    // Search for token: expected
-    if (!PH.Search("expected", true))
+    // Search for the initial directive token.
+    // If one prefix, save time by searching only for its directives.
+    // Otherwise, search for any potential directive token and check it later.
+    const auto &Prefixes = Diags.getDiagnosticOptions().VerifyPrefixes;
+    if (!(Prefixes.size() == 1 ? PH.Search(*Prefixes.begin(), true, true)
+                               : PH.Search("", true, true)))
       break;
     PH.Advance();
 
-    // Next token: -
-    if (!PH.Next("-"))
-      continue;
-    PH.Advance();
+    // Default directive kind.
+    bool RegexKind = false;
+    const char* KindStr = "string";
 
-    // Next token: { error | warning | note }
+    // Parse the initial directive token in reverse so we can easily determine
+    // its exact actual prefix.  If we were to parse it from the front instead,
+    // it would be harder to determine where the prefix ends because there
+    // might be multiple matching -verify prefixes because some might prefix
+    // others.
+    StringRef DToken(PH.P, PH.C - PH.P);
+
+    // Regex in initial directive token: -re
+    if (DToken.endswith("-re")) {
+      RegexKind = true;
+      KindStr = "regex";
+      DToken = DToken.substr(0, DToken.size()-3);
+    }
+
+    // Type in initial directive token: -{error|warning|note|no-diagnostics}
     DirectiveList *DL = nullptr;
-    if (PH.Next("error"))
+    bool NoDiag = false;
+    StringRef DType;
+    if (DToken.endswith(DType="-error"))
       DL = ED ? &ED->Errors : nullptr;
-    else if (PH.Next("warning"))
+    else if (DToken.endswith(DType="-warning"))
       DL = ED ? &ED->Warnings : nullptr;
-    else if (PH.Next("remark"))
+    else if (DToken.endswith(DType="-remark"))
       DL = ED ? &ED->Remarks : nullptr;
-    else if (PH.Next("note"))
+    else if (DToken.endswith(DType="-note"))
       DL = ED ? &ED->Notes : nullptr;
-    else if (PH.Next("no-diagnostics")) {
+    else if (DToken.endswith(DType="-no-diagnostics")) {
+      NoDiag = true;
+      if (RegexKind)
+        continue;
+    }
+    else
+      continue;
+    DToken = DToken.substr(0, DToken.size()-DType.size());
+
+    // What's left in DToken is the actual prefix.  That might not be a -verify
+    // prefix even if there is only one -verify prefix (for example, the full
+    // DToken is foo-bar-warning, but foo is the only -verify prefix).
+    if (!std::binary_search(Prefixes.begin(), Prefixes.end(), DToken))
+      continue;
+
+    if (NoDiag) {
       if (Status == VerifyDiagnosticConsumer::HasOtherExpectedDirectives)
         Diags.Report(Pos, diag::err_verify_invalid_no_diags)
           << /*IsExpectedNoDiagnostics=*/true;
       else
         Status = VerifyDiagnosticConsumer::HasExpectedNoDiagnostics;
       continue;
-    } else
-      continue;
-    PH.Advance();
-
+    }
     if (Status == VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
       Diags.Report(Pos, diag::err_verify_invalid_no_diags)
         << /*IsExpectedNoDiagnostics=*/false;
@@ -357,17 +418,6 @@
     if (!DL)
       return true;
 
-    // Default directive kind.
-    bool RegexKind = false;
-    const char* KindStr = "string";
-
-    // Next optional token: -
-    if (PH.Next("-re")) {
-      PH.Advance();
-      RegexKind = true;
-      KindStr = "regex";
-    }
-
     // Next optional token: @
     SourceLocation ExpectedLoc;
     bool MatchAnyLine = false;
diff --git a/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 4167e1f..1bb939a 100644
--- a/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -32,6 +32,8 @@
 using namespace clang;
 using namespace llvm::opt;
 
+namespace clang {
+
 static std::unique_ptr<FrontendAction>
 CreateFrontendBaseAction(CompilerInstance &CI) {
   using namespace clang::frontend;
@@ -63,6 +65,7 @@
   case ParseSyntaxOnly:        return llvm::make_unique<SyntaxOnlyAction>();
   case ModuleFileInfo:         return llvm::make_unique<DumpModuleInfoAction>();
   case VerifyPCH:              return llvm::make_unique<VerifyPCHAction>();
+  case TemplightDump:          return llvm::make_unique<TemplightDumpAction>();
 
   case PluginAction: {
     for (FrontendPluginRegistry::iterator it =
@@ -122,7 +125,7 @@
 #endif
 }
 
-static std::unique_ptr<FrontendAction>
+std::unique_ptr<FrontendAction>
 CreateFrontendAction(CompilerInstance &CI) {
   // Create the underlying action.
   std::unique_ptr<FrontendAction> Act = CreateFrontendBaseAction(CI);
@@ -173,7 +176,7 @@
   return Act;
 }
 
-bool clang::ExecuteCompilerInvocation(CompilerInstance *Clang) {
+bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
   // Honor -help.
   if (Clang->getFrontendOpts().ShowHelp) {
     std::unique_ptr<OptTable> Opts = driver::createDriverOptTable();
@@ -254,3 +257,5 @@
     BuryPointer(std::move(Act));
   return Success;
 }
+
+} // namespace clang 
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 7d5e933..a6ea5b6 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -7,6 +7,8 @@
   arm64intr.h
   avx2intrin.h
   avx512bwintrin.h
+  avx512bitalgintrin.h
+  avx512vlbitalgintrin.h
   avx512cdintrin.h
   avx512vpopcntdqintrin.h
   avx512dqintrin.h
@@ -17,19 +19,27 @@
   avx512pfintrin.h
   avx512vbmiintrin.h
   avx512vbmivlintrin.h
+  avx512vbmi2intrin.h
+  avx512vlvbmi2intrin.h
   avx512vlbwintrin.h
   avx512vlcdintrin.h
   avx512vldqintrin.h
   avx512vlintrin.h
+  avx512vpopcntdqvlintrin.h
+  avx512vnniintrin.h
+  avx512vlvnniintrin.h
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
   __clang_cuda_builtin_vars.h
   __clang_cuda_cmath.h
   __clang_cuda_complex_builtins.h
+  __clang_cuda_device_functions.h
   __clang_cuda_intrinsics.h
+  __clang_cuda_libdevice_declares.h
   __clang_cuda_math_forward_declares.h
   __clang_cuda_runtime_wrapper.h
+  cetintrin.h
   clzerointrin.h
   cpuid.h
   clflushoptintrin.h
@@ -40,6 +50,7 @@
   fma4intrin.h
   fmaintrin.h
   fxsrintrin.h
+  gfniintrin.h
   htmintrin.h
   htmxlintrin.h
   ia32intrin.h
@@ -80,8 +91,10 @@
   tmmintrin.h
   unwind.h
   vadefs.h
+  vaesintrin.h
   varargs.h
   vecintrin.h
+  vpclmulqdqintrin.h
   wmmintrin.h
   __wmmintrin_aes.h
   __wmmintrin_pclmul.h
@@ -105,7 +118,12 @@
 
 # Generate arm_neon.h
 clang_tablegen(arm_neon.h -gen-arm-neon
+  -I ${CLANG_SOURCE_DIR}/include/clang/Basic/
   SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_neon.td)
+# Generate arm_fp16.h
+clang_tablegen(arm_fp16.h -gen-arm-fp16
+  -I ${CLANG_SOURCE_DIR}/include/clang/Basic/
+  SOURCE ${CLANG_SOURCE_DIR}/include/clang/Basic/arm_fp16.td)
 
 set(out_files)
 foreach( f ${files} ${cuda_wrapper_files} )
@@ -123,6 +141,11 @@
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_neon.h ${output_dir}/arm_neon.h
   COMMENT "Copying clang's arm_neon.h...")
 list(APPEND out_files ${output_dir}/arm_neon.h)
+add_custom_command(OUTPUT ${output_dir}/arm_fp16.h
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h ${output_dir}/arm_fp16.h
+  COMMENT "Copying clang's arm_fp16.h...")
+list(APPEND out_files ${output_dir}/arm_fp16.h)
 
 add_custom_target(clang-headers ALL DEPENDS ${out_files})
 set_target_properties(clang-headers PROPERTIES FOLDER "Misc")
@@ -134,15 +157,19 @@
   DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
 
 install(
+  FILES ${files} ${CMAKE_CURRENT_BINARY_DIR}/arm_fp16.h
+  COMPONENT clang-headers
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+  DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
+
+install(
   FILES ${cuda_wrapper_files}
   COMPONENT clang-headers
   PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
   DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include/cuda_wrappers)
 
 if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
-  add_custom_target(install-clang-headers
-    DEPENDS clang-headers
-    COMMAND "${CMAKE_COMMAND}"
-            -DCMAKE_INSTALL_COMPONENT=clang-headers
-            -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+  add_llvm_install_targets(install-clang-headers
+                           DEPENDS clang-headers
+                           COMPONENT clang-headers)
 endif()
diff --git a/lib/Headers/__clang_cuda_cmath.h b/lib/Headers/__clang_cuda_cmath.h
index 9bef826..5331ba4 100644
--- a/lib/Headers/__clang_cuda_cmath.h
+++ b/lib/Headers/__clang_cuda_cmath.h
@@ -131,15 +131,6 @@
 __DEVICE__ float log(float __x) { return ::logf(__x); }
 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float nexttoward(float __from, double __to) {
-  return __builtin_nexttowardf(__from, __to);
-}
-__DEVICE__ double nexttoward(double __from, double __to) {
-  return __builtin_nexttoward(__from, __to);
-}
-__DEVICE__ float nexttowardf(float __from, double __to) {
-  return __builtin_nexttowardf(__from, __to);
-}
 __DEVICE__ float pow(float __base, float __exp) {
   return ::powf(__base, __exp);
 }
@@ -157,6 +148,10 @@
 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 
+// Notably missing above is nexttoward.  We omit it because
+// libdevice doesn't provide an implementation, and we don't want to be in the
+// business of implementing tricky libm functions in this header.
+
 // Now we've defined everything we promised we'd define in
 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 // fix up our math functions.
@@ -295,13 +290,6 @@
   return std::ldexp((double)__x, __exp);
 }
 
-template <typename __T>
-__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
-                                           double>::type
-nexttoward(__T __from, double __to) {
-  return std::nexttoward((double)__from, __to);
-}
-
 template <typename __T1, typename __T2>
 __DEVICE__ typename __clang_cuda_enable_if<
     std::numeric_limits<__T1>::is_specialized &&
@@ -388,7 +376,6 @@
 using ::lround;
 using ::nearbyint;
 using ::nextafter;
-using ::nexttoward;
 using ::pow;
 using ::remainder;
 using ::remquo;
@@ -456,8 +443,6 @@
 using ::modff;
 using ::nearbyintf;
 using ::nextafterf;
-using ::nexttowardf;
-using ::nexttowardf;
 using ::powf;
 using ::remainderf;
 using ::remquof;
diff --git a/lib/Headers/__clang_cuda_device_functions.h b/lib/Headers/__clang_cuda_device_functions.h
new file mode 100644
index 0000000..c4c2176
--- /dev/null
+++ b/lib/Headers/__clang_cuda_device_functions.h
@@ -0,0 +1,1341 @@
+/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
+#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
+
+#if CUDA_VERSION < 9000
+#error This file is intended to be used with CUDA-9+ only.
+#endif
+
+// __DEVICE__ is a helper macro with common set of attributes for the wrappers
+// we implement in this file. We need static in order to avoid emitting unused
+// functions and __forceinline__ helps inlining these wrappers at -O1.
+#pragma push_macro("__DEVICE__")
+#define __DEVICE__ static __device__ __forceinline__
+
+// libdevice provides fast low precision and slow full-recision implementations
+// for some functions. Which one gets selected depends on
+// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
+// -ffast-math or -fcuda-approx-transcendentals are in effect.
+#pragma push_macro("__FAST_OR_SLOW")
+#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#define __FAST_OR_SLOW(fast, slow) fast
+#else
+#define __FAST_OR_SLOW(fast, slow) slow
+#endif
+
+__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
+__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
+__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
+__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
+__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
+  return __nv_brevll(__a);
+}
+__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
+__DEVICE__ void __brkpt(int __a) { __brkpt(); }
+__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
+                                    unsigned int __c) {
+  return __nv_byte_perm(__a, __b, __c);
+}
+__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }
+__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }
+__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }
+__DEVICE__ double __dAtomicAdd(double *__p, double __v) {
+  return __nvvm_atom_add_gen_d(__p, __v);
+}
+__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {
+  return __nvvm_atom_cta_add_gen_d(__p, __v);
+}
+__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {
+  return __nvvm_atom_sys_add_gen_d(__p, __v);
+}
+__DEVICE__ double __dadd_rd(double __a, double __b) {
+  return __nv_dadd_rd(__a, __b);
+}
+__DEVICE__ double __dadd_rn(double __a, double __b) {
+  return __nv_dadd_rn(__a, __b);
+}
+__DEVICE__ double __dadd_ru(double __a, double __b) {
+  return __nv_dadd_ru(__a, __b);
+}
+__DEVICE__ double __dadd_rz(double __a, double __b) {
+  return __nv_dadd_rz(__a, __b);
+}
+__DEVICE__ double __ddiv_rd(double __a, double __b) {
+  return __nv_ddiv_rd(__a, __b);
+}
+__DEVICE__ double __ddiv_rn(double __a, double __b) {
+  return __nv_ddiv_rn(__a, __b);
+}
+__DEVICE__ double __ddiv_ru(double __a, double __b) {
+  return __nv_ddiv_ru(__a, __b);
+}
+__DEVICE__ double __ddiv_rz(double __a, double __b) {
+  return __nv_ddiv_rz(__a, __b);
+}
+__DEVICE__ double __dmul_rd(double __a, double __b) {
+  return __nv_dmul_rd(__a, __b);
+}
+__DEVICE__ double __dmul_rn(double __a, double __b) {
+  return __nv_dmul_rn(__a, __b);
+}
+__DEVICE__ double __dmul_ru(double __a, double __b) {
+  return __nv_dmul_ru(__a, __b);
+}
+__DEVICE__ double __dmul_rz(double __a, double __b) {
+  return __nv_dmul_rz(__a, __b);
+}
+__DEVICE__ float __double2float_rd(double __a) {
+  return __nv_double2float_rd(__a);
+}
+__DEVICE__ float __double2float_rn(double __a) {
+  return __nv_double2float_rn(__a);
+}
+__DEVICE__ float __double2float_ru(double __a) {
+  return __nv_double2float_ru(__a);
+}
+__DEVICE__ float __double2float_rz(double __a) {
+  return __nv_double2float_rz(__a);
+}
+__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }
+__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }
+__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }
+__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }
+__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }
+__DEVICE__ long long __double2ll_rd(double __a) {
+  return __nv_double2ll_rd(__a);
+}
+__DEVICE__ long long __double2ll_rn(double __a) {
+  return __nv_double2ll_rn(__a);
+}
+__DEVICE__ long long __double2ll_ru(double __a) {
+  return __nv_double2ll_ru(__a);
+}
+__DEVICE__ long long __double2ll_rz(double __a) {
+  return __nv_double2ll_rz(__a);
+}
+__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }
+__DEVICE__ unsigned int __double2uint_rd(double __a) {
+  return __nv_double2uint_rd(__a);
+}
+__DEVICE__ unsigned int __double2uint_rn(double __a) {
+  return __nv_double2uint_rn(__a);
+}
+__DEVICE__ unsigned int __double2uint_ru(double __a) {
+  return __nv_double2uint_ru(__a);
+}
+__DEVICE__ unsigned int __double2uint_rz(double __a) {
+  return __nv_double2uint_rz(__a);
+}
+__DEVICE__ unsigned long long __double2ull_rd(double __a) {
+  return __nv_double2ull_rd(__a);
+}
+__DEVICE__ unsigned long long __double2ull_rn(double __a) {
+  return __nv_double2ull_rn(__a);
+}
+__DEVICE__ unsigned long long __double2ull_ru(double __a) {
+  return __nv_double2ull_ru(__a);
+}
+__DEVICE__ unsigned long long __double2ull_rz(double __a) {
+  return __nv_double2ull_rz(__a);
+}
+__DEVICE__ long long __double_as_longlong(double __a) {
+  return __nv_double_as_longlong(__a);
+}
+__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }
+__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }
+__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }
+__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }
+__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }
+__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }
+__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }
+__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }
+__DEVICE__ double __dsub_rd(double __a, double __b) {
+  return __nv_dsub_rd(__a, __b);
+}
+__DEVICE__ double __dsub_rn(double __a, double __b) {
+  return __nv_dsub_rn(__a, __b);
+}
+__DEVICE__ double __dsub_ru(double __a, double __b) {
+  return __nv_dsub_ru(__a, __b);
+}
+__DEVICE__ double __dsub_rz(double __a, double __b) {
+  return __nv_dsub_rz(__a, __b);
+}
+__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }
+__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }
+__DEVICE__ float __fAtomicAdd(float *__p, float __v) {
+  return __nvvm_atom_add_gen_f(__p, __v);
+}
+__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {
+  return __nvvm_atom_cta_add_gen_f(__p, __v);
+}
+__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {
+  return __nvvm_atom_sys_add_gen_f(__p, __v);
+}
+__DEVICE__ float __fAtomicExch(float *__p, float __v) {
+  return __nv_int_as_float(
+      __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
+}
+__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {
+  return __nv_int_as_float(
+      __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
+}
+__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {
+  return __nv_int_as_float(
+      __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
+}
+__DEVICE__ float __fadd_rd(float __a, float __b) {
+  return __nv_fadd_rd(__a, __b);
+}
+__DEVICE__ float __fadd_rn(float __a, float __b) {
+  return __nv_fadd_rn(__a, __b);
+}
+__DEVICE__ float __fadd_ru(float __a, float __b) {
+  return __nv_fadd_ru(__a, __b);
+}
+__DEVICE__ float __fadd_rz(float __a, float __b) {
+  return __nv_fadd_rz(__a, __b);
+}
+__DEVICE__ float __fdiv_rd(float __a, float __b) {
+  return __nv_fdiv_rd(__a, __b);
+}
+__DEVICE__ float __fdiv_rn(float __a, float __b) {
+  return __nv_fdiv_rn(__a, __b);
+}
+__DEVICE__ float __fdiv_ru(float __a, float __b) {
+  return __nv_fdiv_ru(__a, __b);
+}
+__DEVICE__ float __fdiv_rz(float __a, float __b) {
+  return __nv_fdiv_rz(__a, __b);
+}
+__DEVICE__ float __fdividef(float __a, float __b) {
+  return __nv_fast_fdividef(__a, __b);
+}
+__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
+__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
+__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
+__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
+__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
+__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
+__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
+__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }
+__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }
+__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }
+__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }
+__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }
+__DEVICE__ unsigned int __float2uint_rd(float __a) {
+  return __nv_float2uint_rd(__a);
+}
+__DEVICE__ unsigned int __float2uint_rn(float __a) {
+  return __nv_float2uint_rn(__a);
+}
+__DEVICE__ unsigned int __float2uint_ru(float __a) {
+  return __nv_float2uint_ru(__a);
+}
+__DEVICE__ unsigned int __float2uint_rz(float __a) {
+  return __nv_float2uint_rz(__a);
+}
+__DEVICE__ unsigned long long __float2ull_rd(float __a) {
+  return __nv_float2ull_rd(__a);
+}
+__DEVICE__ unsigned long long __float2ull_rn(float __a) {
+  return __nv_float2ull_rn(__a);
+}
+__DEVICE__ unsigned long long __float2ull_ru(float __a) {
+  return __nv_float2ull_ru(__a);
+}
+__DEVICE__ unsigned long long __float2ull_rz(float __a) {
+  return __nv_float2ull_rz(__a);
+}
+__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }
+__DEVICE__ unsigned int __float_as_uint(float __a) {
+  return __nv_float_as_uint(__a);
+}
+__DEVICE__ double __fma_rd(double __a, double __b, double __c) {
+  return __nv_fma_rd(__a, __b, __c);
+}
+__DEVICE__ double __fma_rn(double __a, double __b, double __c) {
+  return __nv_fma_rn(__a, __b, __c);
+}
+__DEVICE__ double __fma_ru(double __a, double __b, double __c) {
+  return __nv_fma_ru(__a, __b, __c);
+}
+__DEVICE__ double __fma_rz(double __a, double __b, double __c) {
+  return __nv_fma_rz(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {
+  return __nv_fmaf_ieee_rd(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {
+  return __nv_fmaf_ieee_rn(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {
+  return __nv_fmaf_ieee_ru(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {
+  return __nv_fmaf_ieee_rz(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {
+  return __nv_fmaf_rd(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {
+  return __nv_fmaf_rn(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {
+  return __nv_fmaf_ru(__a, __b, __c);
+}
+__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {
+  return __nv_fmaf_rz(__a, __b, __c);
+}
+__DEVICE__ float __fmul_rd(float __a, float __b) {
+  return __nv_fmul_rd(__a, __b);
+}
+__DEVICE__ float __fmul_rn(float __a, float __b) {
+  return __nv_fmul_rn(__a, __b);
+}
+__DEVICE__ float __fmul_ru(float __a, float __b) {
+  return __nv_fmul_ru(__a, __b);
+}
+__DEVICE__ float __fmul_rz(float __a, float __b) {
+  return __nv_fmul_rz(__a, __b);
+}
+__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }
+__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }
+__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }
+__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }
+__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }
+__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }
+__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }
+__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }
+__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }
+__DEVICE__ float __fsub_rd(float __a, float __b) {
+  return __nv_fsub_rd(__a, __b);
+}
+__DEVICE__ float __fsub_rn(float __a, float __b) {
+  return __nv_fsub_rn(__a, __b);
+}
+__DEVICE__ float __fsub_ru(float __a, float __b) {
+  return __nv_fsub_ru(__a, __b);
+}
+__DEVICE__ float __fsub_rz(float __a, float __b) {
+  return __nv_fsub_rz(__a, __b);
+}
+__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }
+__DEVICE__ double __hiloint2double(int __a, int __b) {
+  return __nv_hiloint2double(__a, __b);
+}
+__DEVICE__ int __iAtomicAdd(int *__p, int __v) {
+  return __nvvm_atom_add_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {
+  __nvvm_atom_cta_add_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {
+  __nvvm_atom_sys_add_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicAnd(int *__p, int __v) {
+  return __nvvm_atom_and_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {
+  return __nvvm_atom_cta_and_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {
+  return __nvvm_atom_sys_and_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {
+  return __nvvm_atom_cas_gen_i(__p, __cmp, __v);
+}
+__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {
+  return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);
+}
+__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {
+  return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);
+}
+__DEVICE__ int __iAtomicExch(int *__p, int __v) {
+  return __nvvm_atom_xchg_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {
+  return __nvvm_atom_cta_xchg_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {
+  return __nvvm_atom_sys_xchg_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMax(int *__p, int __v) {
+  return __nvvm_atom_max_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {
+  return __nvvm_atom_cta_max_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {
+  return __nvvm_atom_sys_max_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMin(int *__p, int __v) {
+  return __nvvm_atom_min_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {
+  return __nvvm_atom_cta_min_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {
+  return __nvvm_atom_sys_min_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicOr(int *__p, int __v) {
+  return __nvvm_atom_or_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {
+  return __nvvm_atom_cta_or_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {
+  return __nvvm_atom_sys_or_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicXor(int *__p, int __v) {
+  return __nvvm_atom_xor_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {
+  return __nvvm_atom_cta_xor_gen_i(__p, __v);
+}
+__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {
+  return __nvvm_atom_sys_xor_gen_i(__p, __v);
+}
+__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {
+  return __nvvm_atom_max_gen_ll(__p, __v);
+}
+__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {
+  return __nvvm_atom_cta_max_gen_ll(__p, __v);
+}
+__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {
+  return __nvvm_atom_sys_max_gen_ll(__p, __v);
+}
+__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {
+  return __nvvm_atom_min_gen_ll(__p, __v);
+}
+__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {
+  return __nvvm_atom_cta_min_gen_ll(__p, __v);
+}
+__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {
+  return __nvvm_atom_sys_min_gen_ll(__p, __v);
+}
+__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }
+__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }
+__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }
+__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }
+__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }
+__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
+__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
+__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
+__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
+__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
+__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
+__DEVICE__ double __ll2double_rd(long long __a) {
+  return __nv_ll2double_rd(__a);
+}
+__DEVICE__ double __ll2double_rn(long long __a) {
+  return __nv_ll2double_rn(__a);
+}
+__DEVICE__ double __ll2double_ru(long long __a) {
+  return __nv_ll2double_ru(__a);
+}
+__DEVICE__ double __ll2double_rz(long long __a) {
+  return __nv_ll2double_rz(__a);
+}
+__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }
+__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }
+__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }
+__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }
+__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {
+  return __nvvm_atom_and_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {
+  return __nvvm_atom_cta_and_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {
+  return __nvvm_atom_sys_and_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {
+  return __nvvm_atom_or_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {
+  return __nvvm_atom_cta_or_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {
+  return __nvvm_atom_sys_or_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {
+  return __nvvm_atom_xor_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {
+  return __nvvm_atom_cta_xor_gen_ll(__p, __v);
+}
+__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {
+  return __nvvm_atom_sys_xor_gen_ll(__p, __v);
+}
+__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }
+__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }
+__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }
+__DEVICE__ double __longlong_as_double(long long __a) {
+  return __nv_longlong_as_double(__a);
+}
+__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }
+__DEVICE__ long long __mul64hi(long long __a, long long __b) {
+  return __nv_mul64hi(__a, __b);
+}
+__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }
+__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
+__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
+__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
+__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
+__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
+__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
+__DEVICE__ float __powf(float __a, float __b) {
+  return __nv_fast_powf(__a, __b);
+}
+
+// Parameter must have a known integer value.
+#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
+__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
+__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
+  return __nv_sad(__a, __b, __c);
+}
+__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
+__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
+__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
+__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {
+  return __nv_fast_sincosf(__a, __sptr, __cptr);
+}
+__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
+__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
+__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }
+__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }
+__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
+__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
+__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
+__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
+__DEVICE__ void __trap(void) { asm volatile("trap;"); }
+__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_add_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_add_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_add_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_and_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_and_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_and_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,
+                                     unsigned int __v) {
+  return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned int
+__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
+  return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned int
+__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
+  return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_dec_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_dec_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_dec_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_xchg_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,
+                                             unsigned int __v) {
+  return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_inc_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_inc_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_inc_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_max_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_max_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_max_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_min_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_min_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_min_gen_ui(__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_or_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_cta_or_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_sys_or_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {
+  return __nvvm_atom_xor_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,
+                                           unsigned int __v) {
+  return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,
+                                            unsigned int __v) {
+  return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);
+}
+__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {
+  return __nv_uhadd(__a, __b);
+}
+__DEVICE__ double __uint2double_rn(unsigned int __a) {
+  return __nv_uint2double_rn(__a);
+}
+__DEVICE__ float __uint2float_rd(unsigned int __a) {
+  return __nv_uint2float_rd(__a);
+}
+__DEVICE__ float __uint2float_rn(unsigned int __a) {
+  return __nv_uint2float_rn(__a);
+}
+__DEVICE__ float __uint2float_ru(unsigned int __a) {
+  return __nv_uint2float_ru(__a);
+}
+__DEVICE__ float __uint2float_rz(unsigned int __a) {
+  return __nv_uint2float_rz(__a);
+}
+__DEVICE__ float __uint_as_float(unsigned int __a) {
+  return __nv_uint_as_float(__a);
+} //
+__DEVICE__ double __ull2double_rd(unsigned long long __a) {
+  return __nv_ull2double_rd(__a);
+}
+__DEVICE__ double __ull2double_rn(unsigned long long __a) {
+  return __nv_ull2double_rn(__a);
+}
+__DEVICE__ double __ull2double_ru(unsigned long long __a) {
+  return __nv_ull2double_ru(__a);
+}
+__DEVICE__ double __ull2double_rz(unsigned long long __a) {
+  return __nv_ull2double_rz(__a);
+}
+__DEVICE__ float __ull2float_rd(unsigned long long __a) {
+  return __nv_ull2float_rd(__a);
+}
+__DEVICE__ float __ull2float_rn(unsigned long long __a) {
+  return __nv_ull2float_rn(__a);
+}
+__DEVICE__ float __ull2float_ru(unsigned long long __a) {
+  return __nv_ull2float_ru(__a);
+}
+__DEVICE__ float __ull2float_rz(unsigned long long __a) {
+  return __nv_ull2float_rz(__a);
+}
+__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,
+                                             unsigned long long __v) {
+  return __nvvm_atom_add_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,
+                                             unsigned long long __v) {
+  return __nvvm_atom_and_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,
+                                             unsigned long long __cmp,
+                                             unsigned long long __v) {
+  return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,
+                                                   unsigned long long __cmp,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,
+                                                    unsigned long long __cmp,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,
+                                              unsigned long long __v) {
+  return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,
+                                                     unsigned long long __v) {
+  return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,
+                                             unsigned long long __v) {
+  return __nvvm_atom_max_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_max_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_max_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,
+                                             unsigned long long __v) {
+  return __nvvm_atom_min_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_min_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_min_gen_ull(__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,
+                                            unsigned long long __v) {
+  return __nvvm_atom_or_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,
+                                                  unsigned long long __v) {
+  return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,
+                                             unsigned long long __v) {
+  return __nvvm_atom_xor_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,
+                                                   unsigned long long __v) {
+  return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,
+                                                    unsigned long long __v) {
+  return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);
+}
+__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {
+  return __nv_umul24(__a, __b);
+}
+__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,
+                                         unsigned long long __b) {
+  return __nv_umul64hi(__a, __b);
+}
+__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {
+  return __nv_umulhi(__a, __b);
+}
+__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {
+  return __nv_urhadd(__a, __b);
+}
+__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,
+                               unsigned int __c) {
+  return __nv_usad(__a, __b, __c);
+}
+__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }
+__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }
+__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
+  return __nv_vabsdiffs2(__a, __b);
+}
+__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
+  return __nv_vabsdiffs4(__a, __b);
+}
+__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
+  return __nv_vabsdiffu2(__a, __b);
+}
+__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
+  return __nv_vabsdiffu4(__a, __b);
+}
+__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
+  return __nv_vabsss2(__a);
+}
+__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
+  return __nv_vabsss4(__a);
+}
+__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
+  return __nv_vadd2(__a, __b);
+}
+__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
+  return __nv_vadd4(__a, __b);
+}
+__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
+  return __nv_vaddss2(__a, __b);
+}
+__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
+  return __nv_vaddss4(__a, __b);
+}
+__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
+  return __nv_vaddus2(__a, __b);
+}
+__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
+  return __nv_vaddus4(__a, __b);
+}
+__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
+  return __nv_vavgs2(__a, __b);
+}
+__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
+  return __nv_vavgs4(__a, __b);
+}
+__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
+  return __nv_vavgu2(__a, __b);
+}
+__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
+  return __nv_vavgu4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpeq2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpeq4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpges2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpges4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgeu2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgeu4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgts2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgts4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgtu2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpgtu4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmples2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmples4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpleu2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpleu4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmplts2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmplts4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpltu2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpltu4(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpne2(__a, __b);
+}
+__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
+  return __nv_vcmpne4(__a, __b);
+}
+__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
+  return __nv_vhaddu2(__a, __b);
+}
+__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
+  return __nv_vhaddu4(__a, __b);
+}
+__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
+  return __nv_vmaxs2(__a, __b);
+}
+__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
+  return __nv_vmaxs4(__a, __b);
+}
+__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
+  return __nv_vmaxu2(__a, __b);
+}
+__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
+  return __nv_vmaxu4(__a, __b);
+}
+__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
+  return __nv_vmins2(__a, __b);
+}
+__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
+  return __nv_vmins4(__a, __b);
+}
+__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
+  return __nv_vminu2(__a, __b);
+}
+__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
+  return __nv_vminu4(__a, __b);
+}
+__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }
+__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }
+__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
+  return __nv_vnegss2(__a);
+}
+__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
+  return __nv_vnegss4(__a);
+}
+__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
+  return __nv_vsads2(__a, __b);
+}
+__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
+  return __nv_vsads4(__a, __b);
+}
+__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
+  return __nv_vsadu2(__a, __b);
+}
+__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
+  return __nv_vsadu4(__a, __b);
+}
+__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
+  return __nv_vseteq2(__a, __b);
+}
+__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
+  return __nv_vseteq4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetges2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetges4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgeu2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgeu4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgts2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgts4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgtu2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetgtu4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetles2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetles4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetleu2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetleu4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetlts2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetlts4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetltu2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetltu4(__a, __b);
+}
+__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
+  return __nv_vsetne2(__a, __b);
+}
+__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
+  return __nv_vsetne4(__a, __b);
+}
+__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
+  return __nv_vsub2(__a, __b);
+}
+__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
+  return __nv_vsub4(__a, __b);
+}
+__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
+  return __nv_vsubss2(__a, __b);
+}
+__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
+  return __nv_vsubss4(__a, __b);
+}
+__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
+  return __nv_vsubus2(__a, __b);
+}
+__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
+  return __nv_vsubus4(__a, __b);
+}
+__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
+__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
+__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
+__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }
+__DEVICE__ double asin(double __a) { return __nv_asin(__a); }
+__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }
+__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }
+__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }
+__DEVICE__ double atan(double __a) { return __nv_atan(__a); }
+__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
+__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
+__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }
+__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }
+__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }
+__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }
+__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
+__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
+__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
+__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }
+__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
+__DEVICE__ double copysign(double __a, double __b) {
+  return __nv_copysign(__a, __b);
+}
+__DEVICE__ float copysignf(float __a, float __b) {
+  return __nv_copysignf(__a, __b);
+}
+__DEVICE__ double cos(double __a) { return __nv_cos(__a); }
+__DEVICE__ float cosf(float __a) {
+  return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);
+}
+__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }
+__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }
+__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }
+__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }
+__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }
+__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }
+__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }
+__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }
+__DEVICE__ double erf(double __a) { return __nv_erf(__a); }
+__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }
+__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }
+__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }
+__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }
+__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }
+__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }
+__DEVICE__ float erff(float __a) { return __nv_erff(__a); }
+__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }
+__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }
+__DEVICE__ double exp(double __a) { return __nv_exp(__a); }
+__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }
+__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }
+__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }
+__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }
+__DEVICE__ float expf(float __a) { return __nv_expf(__a); }
+__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }
+__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }
+__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
+__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }
+__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
+__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
+__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }
+__DEVICE__ float fdividef(float __a, float __b) {
+#if __FAST_MATH__ && !__CUDA_PREC_DIV
+  return __nv_fast_fdividef(__a, __b);
+#else
+  return __a / __b;
+#endif
+}
+__DEVICE__ double floor(double __f) { return __nv_floor(__f); }
+__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }
+__DEVICE__ double fma(double __a, double __b, double __c) {
+  return __nv_fma(__a, __b, __c);
+}
+__DEVICE__ float fmaf(float __a, float __b, float __c) {
+  return __nv_fmaf(__a, __b, __c);
+}
+__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }
+__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }
+__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }
+__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }
+__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }
+__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }
+__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }
+__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }
+__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }
+__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }
+__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }
+__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }
+__DEVICE__ double j0(double __a) { return __nv_j0(__a); }
+__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }
+__DEVICE__ double j1(double __a) { return __nv_j1(__a); }
+__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
+__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
+__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
+#if defined(__LP64__)
+__DEVICE__ long labs(long __a) { return llabs(__a); };
+#else
+__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
+#endif
+__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
+__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
+__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
+__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
+__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
+__DEVICE__ long long llmax(long long __a, long long __b) {
+  return __nv_llmax(__a, __b);
+}
+__DEVICE__ long long llmin(long long __a, long long __b) {
+  return __nv_llmin(__a, __b);
+}
+__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }
+__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }
+__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }
+__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }
+__DEVICE__ double log(double __a) { return __nv_log(__a); }
+__DEVICE__ double log10(double __a) { return __nv_log10(__a); }
+__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }
+__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }
+__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }
+__DEVICE__ double log2(double __a) { return __nv_log2(__a); }
+__DEVICE__ float log2f(float __a) {
+  return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);
+}
+__DEVICE__ double logb(double __a) { return __nv_logb(__a); }
+__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }
+__DEVICE__ float logf(float __a) {
+  return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
+}
+#if defined(__LP64__)
+__DEVICE__ long lrint(double __a) { return llrint(__a); }
+__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
+__DEVICE__ long lround(double __a) { return llround(__a); }
+__DEVICE__ long lroundf(float __a) { return llroundf(__a); }
+#else
+__DEVICE__ long lrint(double __a) { return (long)rint(__a); }
+__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }
+__DEVICE__ long lround(double __a) { return round(__a); }
+__DEVICE__ long lroundf(float __a) { return roundf(__a); }
+#endif
+__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
+__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
+  return __builtin_memcpy(__a, __b, __c);
+}
+__DEVICE__ void *memset(void *__a, int __b, size_t __c) {
+  return __builtin_memset(__a, __b, __c);
+}
+__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
+__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
+__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
+__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }
+__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }
+__DEVICE__ double nextafter(double __a, double __b) {
+  return __nv_nextafter(__a, __b);
+}
+__DEVICE__ float nextafterf(float __a, float __b) {
+  return __nv_nextafterf(__a, __b);
+}
+__DEVICE__ double norm(int __dim, const double *__t) {
+  return __nv_norm(__dim, __t);
+}
+__DEVICE__ double norm3d(double __a, double __b, double __c) {
+  return __nv_norm3d(__a, __b, __c);
+}
+__DEVICE__ float norm3df(float __a, float __b, float __c) {
+  return __nv_norm3df(__a, __b, __c);
+}
+__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {
+  return __nv_norm4d(__a, __b, __c, __d);
+}
+__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {
+  return __nv_norm4df(__a, __b, __c, __d);
+}
+__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }
+__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }
+__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }
+__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }
+__DEVICE__ float normf(int __dim, const float *__t) {
+  return __nv_normf(__dim, __t);
+}
+__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }
+__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }
+__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }
+__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }
+__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }
+__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }
+__DEVICE__ double remainder(double __a, double __b) {
+  return __nv_remainder(__a, __b);
+}
+__DEVICE__ float remainderf(float __a, float __b) {
+  return __nv_remainderf(__a, __b);
+}
+__DEVICE__ double remquo(double __a, double __b, int *__c) {
+  return __nv_remquo(__a, __b, __c);
+}
+__DEVICE__ float remquof(float __a, float __b, int *__c) {
+  return __nv_remquof(__a, __b, __c);
+}
+__DEVICE__ double rhypot(double __a, double __b) {
+  return __nv_rhypot(__a, __b);
+}
+__DEVICE__ float rhypotf(float __a, float __b) {
+  return __nv_rhypotf(__a, __b);
+}
+__DEVICE__ double rint(double __a) { return __nv_rint(__a); }
+__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }
+__DEVICE__ double rnorm(int __a, const double *__b) {
+  return __nv_rnorm(__a, __b);
+}
+__DEVICE__ double rnorm3d(double __a, double __b, double __c) {
+  return __nv_rnorm3d(__a, __b, __c);
+}
+__DEVICE__ float rnorm3df(float __a, float __b, float __c) {
+  return __nv_rnorm3df(__a, __b, __c);
+}
+__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {
+  return __nv_rnorm4d(__a, __b, __c, __d);
+}
+__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {
+  return __nv_rnorm4df(__a, __b, __c, __d);
+}
+__DEVICE__ float rnormf(int __dim, const float *__t) {
+  return __nv_rnormf(__dim, __t);
+}
+__DEVICE__ double round(double __a) { return __nv_round(__a); }
+__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }
+__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
+__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
+__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
+__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
+__DEVICE__ double scalbln(double __a, long __b) {
+  if (__b > INT_MAX)
+    return __a > 0 ? HUGE_VAL : -HUGE_VAL;
+  if (__b < INT_MIN)
+    return __a > 0 ? 0.0 : -0.0;
+  return scalbn(__a, (int)__b);
+}
+__DEVICE__ float scalblnf(float __a, long __b) {
+  if (__b > INT_MAX)
+    return __a > 0 ? HUGE_VALF : -HUGE_VALF;
+  if (__b < INT_MIN)
+    return __a > 0 ? 0.f : -0.f;
+  return scalbnf(__a, (int)__b);
+}
+__DEVICE__ double sin(double __a) { return __nv_sin(__a); }
+__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {
+  return __nv_sincos(__a, __sptr, __cptr);
+}
+__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {
+  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);
+}
+__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {
+  return __nv_sincospi(__a, __sptr, __cptr);
+}
+__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {
+  return __nv_sincospif(__a, __sptr, __cptr);
+}
+__DEVICE__ float sinf(float __a) {
+  return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);
+}
+__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }
+__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }
+__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }
+__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }
+__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }
+__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }
+__DEVICE__ double tan(double __a) { return __nv_tan(__a); }
+__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }
+__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }
+__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }
+__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }
+__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }
+__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }
+__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }
+__DEVICE__ unsigned long long ullmax(unsigned long long __a,
+                                     unsigned long long __b) {
+  return __nv_ullmax(__a, __b);
+}
+__DEVICE__ unsigned long long ullmin(unsigned long long __a,
+                                     unsigned long long __b) {
+  return __nv_ullmin(__a, __b);
+}
+__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {
+  return __nv_umax(__a, __b);
+}
+__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {
+  return __nv_umin(__a, __b);
+}
+__DEVICE__ double y0(double __a) { return __nv_y0(__a); }
+__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }
+__DEVICE__ double y1(double __a) { return __nv_y1(__a); }
+__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
+__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
+__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
+
+#pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__FAST_OR_SLOW")
+#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__
diff --git a/lib/Headers/__clang_cuda_intrinsics.h b/lib/Headers/__clang_cuda_intrinsics.h
index bc5b876..1794eb3 100644
--- a/lib/Headers/__clang_cuda_intrinsics.h
+++ b/lib/Headers/__clang_cuda_intrinsics.h
@@ -34,23 +34,24 @@
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 
 #pragma push_macro("__MAKE_SHUFFLES")
-#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask)    \
-  inline __device__ int __FnName(int __val, int __offset,                      \
+#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask,    \
+                        __Type)                                                \
+  inline __device__ int __FnName(int __val, __Type __offset,                   \
                                  int __width = warpSize) {                     \
     return __IntIntrinsic(__val, __offset,                                     \
                           ((warpSize - __width) << 8) | (__Mask));             \
   }                                                                            \
-  inline __device__ float __FnName(float __val, int __offset,                  \
+  inline __device__ float __FnName(float __val, __Type __offset,               \
                                    int __width = warpSize) {                   \
     return __FloatIntrinsic(__val, __offset,                                   \
                             ((warpSize - __width) << 8) | (__Mask));           \
   }                                                                            \
-  inline __device__ unsigned int __FnName(unsigned int __val, int __offset,    \
+  inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
                                           int __width = warpSize) {            \
     return static_cast<unsigned int>(                                          \
         ::__FnName(static_cast<int>(__val), __offset, __width));               \
   }                                                                            \
-  inline __device__ long long __FnName(long long __val, int __offset,          \
+  inline __device__ long long __FnName(long long __val, __Type __offset,       \
                                        int __width = warpSize) {               \
     struct __Bits {                                                            \
       int __a, __b;                                                            \
@@ -65,12 +66,29 @@
     memcpy(&__ret, &__tmp, sizeof(__tmp));                                     \
     return __ret;                                                              \
   }                                                                            \
+  inline __device__ long __FnName(long __val, __Type __offset,                 \
+                                  int __width = warpSize) {                    \
+    _Static_assert(sizeof(long) == sizeof(long long) ||                        \
+                   sizeof(long) == sizeof(int));                               \
+    if (sizeof(long) == sizeof(long long)) {                                   \
+      return static_cast<long>(                                                \
+          ::__FnName(static_cast<long long>(__val), __offset, __width));       \
+    } else if (sizeof(long) == sizeof(int)) {                                  \
+      return static_cast<long>(                                                \
+          ::__FnName(static_cast<int>(__val), __offset, __width));             \
+    }                                                                          \
+  }                                                                            \
+  inline __device__ unsigned long __FnName(                                    \
+      unsigned long __val, __Type __offset, int __width = warpSize) {          \
+    return static_cast<unsigned long>(                                         \
+        ::__FnName(static_cast<long>(__val), __offset, __width));              \
+  }                                                                            \
   inline __device__ unsigned long long __FnName(                               \
-      unsigned long long __val, int __offset, int __width = warpSize) {        \
+      unsigned long long __val, __Type __offset, int __width = warpSize) {     \
     return static_cast<unsigned long long>(::__FnName(                         \
         static_cast<unsigned long long>(__val), __offset, __width));           \
   }                                                                            \
-  inline __device__ double __FnName(double __val, int __offset,                \
+  inline __device__ double __FnName(double __val, __Type __offset,             \
                                     int __width = warpSize) {                  \
     long long __tmp;                                                           \
     _Static_assert(sizeof(__tmp) == sizeof(__val));                            \
@@ -81,13 +99,15 @@
     return __ret;                                                              \
   }
 
-__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
+__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
 // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
 // maxLane.
-__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
-__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
-__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
-
+__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
+                unsigned int);
+__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
+                unsigned int);
+__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
+                int);
 #pragma pop_macro("__MAKE_SHUFFLES")
 
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
@@ -97,25 +117,26 @@
 // __shfl_sync_* variants available in CUDA-9
 #pragma push_macro("__MAKE_SYNC_SHUFFLES")
 #define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic,       \
-                             __Mask)                                           \
-  inline __device__ int __FnName(unsigned int __mask, int __val, int __offset, \
-                                 int __width = warpSize) {                     \
+                             __Mask, __Type)                                   \
+  inline __device__ int __FnName(unsigned int __mask, int __val,               \
+                                 __Type __offset, int __width = warpSize) {    \
     return __IntIntrinsic(__mask, __val, __offset,                             \
                           ((warpSize - __width) << 8) | (__Mask));             \
   }                                                                            \
   inline __device__ float __FnName(unsigned int __mask, float __val,           \
-                                   int __offset, int __width = warpSize) {     \
+                                   __Type __offset, int __width = warpSize) {  \
     return __FloatIntrinsic(__mask, __val, __offset,                           \
                             ((warpSize - __width) << 8) | (__Mask));           \
   }                                                                            \
   inline __device__ unsigned int __FnName(unsigned int __mask,                 \
-                                          unsigned int __val, int __offset,    \
+                                          unsigned int __val, __Type __offset, \
                                           int __width = warpSize) {            \
     return static_cast<unsigned int>(                                          \
         ::__FnName(__mask, static_cast<int>(__val), __offset, __width));       \
   }                                                                            \
   inline __device__ long long __FnName(unsigned int __mask, long long __val,   \
-                                       int __offset, int __width = warpSize) { \
+                                       __Type __offset,                        \
+                                       int __width = warpSize) {               \
     struct __Bits {                                                            \
       int __a, __b;                                                            \
     };                                                                         \
@@ -130,13 +151,31 @@
     return __ret;                                                              \
   }                                                                            \
   inline __device__ unsigned long long __FnName(                               \
-      unsigned int __mask, unsigned long long __val, int __offset,             \
+      unsigned int __mask, unsigned long long __val, __Type __offset,          \
       int __width = warpSize) {                                                \
     return static_cast<unsigned long long>(::__FnName(                         \
         __mask, static_cast<unsigned long long>(__val), __offset, __width));   \
   }                                                                            \
+  inline __device__ long __FnName(unsigned int __mask, long __val,             \
+                                  __Type __offset, int __width = warpSize) {   \
+    _Static_assert(sizeof(long) == sizeof(long long) ||                        \
+                   sizeof(long) == sizeof(int));                               \
+    if (sizeof(long) == sizeof(long long)) {                                   \
+      return static_cast<long>(::__FnName(                                     \
+          __mask, static_cast<long long>(__val), __offset, __width));          \
+    } else if (sizeof(long) == sizeof(int)) {                                  \
+      return static_cast<long>(                                                \
+          ::__FnName(__mask, static_cast<int>(__val), __offset, __width));     \
+    }                                                                          \
+  }                                                                            \
+  inline __device__ unsigned long __FnName(                                    \
+      unsigned int __mask, unsigned long __val, __Type __offset,               \
+      int __width = warpSize) {                                                \
+    return static_cast<unsigned long>(                                         \
+        ::__FnName(__mask, static_cast<long>(__val), __offset, __width));      \
+  }                                                                            \
   inline __device__ double __FnName(unsigned int __mask, double __val,         \
-                                    int __offset, int __width = warpSize) {    \
+                                    __Type __offset, int __width = warpSize) { \
     long long __tmp;                                                           \
     _Static_assert(sizeof(__tmp) == sizeof(__val));                            \
     memcpy(&__tmp, &__val, sizeof(__val));                                     \
@@ -146,15 +185,15 @@
     return __ret;                                                              \
   }
 __MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
-                     __nvvm_shfl_sync_idx_f32, 0x1f);
+                     __nvvm_shfl_sync_idx_f32, 0x1f, int);
 // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
 // maxLane.
 __MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
-                     __nvvm_shfl_sync_up_f32, 0);
+                     __nvvm_shfl_sync_up_f32, 0, unsigned int);
 __MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
-                     __nvvm_shfl_sync_down_f32, 0x1f);
+                     __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
 __MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
-                     __nvvm_shfl_sync_bfly_f32, 0x1f);
+                     __nvvm_shfl_sync_bfly_f32, 0x1f, int);
 #pragma pop_macro("__MAKE_SYNC_SHUFFLES")
 
 inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
@@ -188,6 +227,10 @@
 
 inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
 
+inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
+  return __nvvm_fns(mask, base, offset);
+}
+
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 
 // Define __match* builtins CUDA-9 headers expect to see.
diff --git a/lib/Headers/__clang_cuda_libdevice_declares.h b/lib/Headers/__clang_cuda_libdevice_declares.h
new file mode 100644
index 0000000..73ea8f8
--- /dev/null
+++ b/lib/Headers/__clang_cuda_libdevice_declares.h
@@ -0,0 +1,465 @@
+/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
+#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__
+
+extern "C" {
+
+__device__ int __nv_abs(int __a);
+__device__ double __nv_acos(double __a);
+__device__ float __nv_acosf(float __a);
+__device__ double __nv_acosh(double __a);
+__device__ float __nv_acoshf(float __a);
+__device__ double __nv_asin(double __a);
+__device__ float __nv_asinf(float __a);
+__device__ double __nv_asinh(double __a);
+__device__ float __nv_asinhf(float __a);
+__device__ double __nv_atan2(double __a, double __b);
+__device__ float __nv_atan2f(float __a, float __b);
+__device__ double __nv_atan(double __a);
+__device__ float __nv_atanf(float __a);
+__device__ double __nv_atanh(double __a);
+__device__ float __nv_atanhf(float __a);
+__device__ int __nv_brev(int __a);
+__device__ long long __nv_brevll(long long __a);
+__device__ int __nv_byte_perm(int __a, int __b, int __c);
+__device__ double __nv_cbrt(double __a);
+__device__ float __nv_cbrtf(float __a);
+__device__ double __nv_ceil(double __a);
+__device__ float __nv_ceilf(float __a);
+__device__ int __nv_clz(int __a);
+__device__ int __nv_clzll(long long __a);
+__device__ double __nv_copysign(double __a, double __b);
+__device__ float __nv_copysignf(float __a, float __b);
+__device__ double __nv_cos(double __a);
+__device__ float __nv_cosf(float __a);
+__device__ double __nv_cosh(double __a);
+__device__ float __nv_coshf(float __a);
+__device__ double __nv_cospi(double __a);
+__device__ float __nv_cospif(float __a);
+__device__ double __nv_cyl_bessel_i0(double __a);
+__device__ float __nv_cyl_bessel_i0f(float __a);
+__device__ double __nv_cyl_bessel_i1(double __a);
+__device__ float __nv_cyl_bessel_i1f(float __a);
+__device__ double __nv_dadd_rd(double __a, double __b);
+__device__ double __nv_dadd_rn(double __a, double __b);
+__device__ double __nv_dadd_ru(double __a, double __b);
+__device__ double __nv_dadd_rz(double __a, double __b);
+__device__ double __nv_ddiv_rd(double __a, double __b);
+__device__ double __nv_ddiv_rn(double __a, double __b);
+__device__ double __nv_ddiv_ru(double __a, double __b);
+__device__ double __nv_ddiv_rz(double __a, double __b);
+__device__ double __nv_dmul_rd(double __a, double __b);
+__device__ double __nv_dmul_rn(double __a, double __b);
+__device__ double __nv_dmul_ru(double __a, double __b);
+__device__ double __nv_dmul_rz(double __a, double __b);
+__device__ float __nv_double2float_rd(double __a);
+__device__ float __nv_double2float_rn(double __a);
+__device__ float __nv_double2float_ru(double __a);
+__device__ float __nv_double2float_rz(double __a);
+__device__ int __nv_double2hiint(double __a);
+__device__ int __nv_double2int_rd(double __a);
+__device__ int __nv_double2int_rn(double __a);
+__device__ int __nv_double2int_ru(double __a);
+__device__ int __nv_double2int_rz(double __a);
+__device__ long long __nv_double2ll_rd(double __a);
+__device__ long long __nv_double2ll_rn(double __a);
+__device__ long long __nv_double2ll_ru(double __a);
+__device__ long long __nv_double2ll_rz(double __a);
+__device__ int __nv_double2loint(double __a);
+__device__ unsigned int __nv_double2uint_rd(double __a);
+__device__ unsigned int __nv_double2uint_rn(double __a);
+__device__ unsigned int __nv_double2uint_ru(double __a);
+__device__ unsigned int __nv_double2uint_rz(double __a);
+__device__ unsigned long long __nv_double2ull_rd(double __a);
+__device__ unsigned long long __nv_double2ull_rn(double __a);
+__device__ unsigned long long __nv_double2ull_ru(double __a);
+__device__ unsigned long long __nv_double2ull_rz(double __a);
+__device__ unsigned long long __nv_double_as_longlong(double __a);
+__device__ double __nv_drcp_rd(double __a);
+__device__ double __nv_drcp_rn(double __a);
+__device__ double __nv_drcp_ru(double __a);
+__device__ double __nv_drcp_rz(double __a);
+__device__ double __nv_dsqrt_rd(double __a);
+__device__ double __nv_dsqrt_rn(double __a);
+__device__ double __nv_dsqrt_ru(double __a);
+__device__ double __nv_dsqrt_rz(double __a);
+__device__ double __nv_dsub_rd(double __a, double __b);
+__device__ double __nv_dsub_rn(double __a, double __b);
+__device__ double __nv_dsub_ru(double __a, double __b);
+__device__ double __nv_dsub_rz(double __a, double __b);
+__device__ double __nv_erfc(double __a);
+__device__ float __nv_erfcf(float __a);
+__device__ double __nv_erfcinv(double __a);
+__device__ float __nv_erfcinvf(float __a);
+__device__ double __nv_erfcx(double __a);
+__device__ float __nv_erfcxf(float __a);
+__device__ double __nv_erf(double __a);
+__device__ float __nv_erff(float __a);
+__device__ double __nv_erfinv(double __a);
+__device__ float __nv_erfinvf(float __a);
+__device__ double __nv_exp10(double __a);
+__device__ float __nv_exp10f(float __a);
+__device__ double __nv_exp2(double __a);
+__device__ float __nv_exp2f(float __a);
+__device__ double __nv_exp(double __a);
+__device__ float __nv_expf(float __a);
+__device__ double __nv_expm1(double __a);
+__device__ float __nv_expm1f(float __a);
+__device__ double __nv_fabs(double __a);
+__device__ float __nv_fabsf(float __a);
+__device__ float __nv_fadd_rd(float __a, float __b);
+__device__ float __nv_fadd_rn(float __a, float __b);
+__device__ float __nv_fadd_ru(float __a, float __b);
+__device__ float __nv_fadd_rz(float __a, float __b);
+__device__ float __nv_fast_cosf(float __a);
+__device__ float __nv_fast_exp10f(float __a);
+__device__ float __nv_fast_expf(float __a);
+__device__ float __nv_fast_fdividef(float __a, float __b);
+__device__ float __nv_fast_log10f(float __a);
+__device__ float __nv_fast_log2f(float __a);
+__device__ float __nv_fast_logf(float __a);
+__device__ float __nv_fast_powf(float __a, float __b);
+__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
+__device__ float __nv_fast_sinf(float __a);
+__device__ float __nv_fast_tanf(float __a);
+__device__ double __nv_fdim(double __a, double __b);
+__device__ float __nv_fdimf(float __a, float __b);
+__device__ float __nv_fdiv_rd(float __a, float __b);
+__device__ float __nv_fdiv_rn(float __a, float __b);
+__device__ float __nv_fdiv_ru(float __a, float __b);
+__device__ float __nv_fdiv_rz(float __a, float __b);
+__device__ int __nv_ffs(int __a);
+__device__ int __nv_ffsll(long long __a);
+__device__ int __nv_finitef(float __a);
+__device__ unsigned short __nv_float2half_rn(float __a);
+__device__ int __nv_float2int_rd(float __a);
+__device__ int __nv_float2int_rn(float __a);
+__device__ int __nv_float2int_ru(float __a);
+__device__ int __nv_float2int_rz(float __a);
+__device__ long long __nv_float2ll_rd(float __a);
+__device__ long long __nv_float2ll_rn(float __a);
+__device__ long long __nv_float2ll_ru(float __a);
+__device__ long long __nv_float2ll_rz(float __a);
+__device__ unsigned int __nv_float2uint_rd(float __a);
+__device__ unsigned int __nv_float2uint_rn(float __a);
+__device__ unsigned int __nv_float2uint_ru(float __a);
+__device__ unsigned int __nv_float2uint_rz(float __a);
+__device__ unsigned long long __nv_float2ull_rd(float __a);
+__device__ unsigned long long __nv_float2ull_rn(float __a);
+__device__ unsigned long long __nv_float2ull_ru(float __a);
+__device__ unsigned long long __nv_float2ull_rz(float __a);
+__device__ int __nv_float_as_int(float __a);
+__device__ unsigned int __nv_float_as_uint(float __a);
+__device__ double __nv_floor(double __a);
+__device__ float __nv_floorf(float __a);
+__device__ double __nv_fma(double __a, double __b, double __c);
+__device__ float __nv_fmaf(float __a, float __b, float __c);
+__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
+__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
+__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
+__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
+__device__ float __nv_fmaf_rd(float __a, float __b, float __c);
+__device__ float __nv_fmaf_rn(float __a, float __b, float __c);
+__device__ float __nv_fmaf_ru(float __a, float __b, float __c);
+__device__ float __nv_fmaf_rz(float __a, float __b, float __c);
+__device__ double __nv_fma_rd(double __a, double __b, double __c);
+__device__ double __nv_fma_rn(double __a, double __b, double __c);
+__device__ double __nv_fma_ru(double __a, double __b, double __c);
+__device__ double __nv_fma_rz(double __a, double __b, double __c);
+__device__ double __nv_fmax(double __a, double __b);
+__device__ float __nv_fmaxf(float __a, float __b);
+__device__ double __nv_fmin(double __a, double __b);
+__device__ float __nv_fminf(float __a, float __b);
+__device__ double __nv_fmod(double __a, double __b);
+__device__ float __nv_fmodf(float __a, float __b);
+__device__ float __nv_fmul_rd(float __a, float __b);
+__device__ float __nv_fmul_rn(float __a, float __b);
+__device__ float __nv_fmul_ru(float __a, float __b);
+__device__ float __nv_fmul_rz(float __a, float __b);
+__device__ float __nv_frcp_rd(float __a);
+__device__ float __nv_frcp_rn(float __a);
+__device__ float __nv_frcp_ru(float __a);
+__device__ float __nv_frcp_rz(float __a);
+__device__ double __nv_frexp(double __a, int *__b);
+__device__ float __nv_frexpf(float __a, int *__b);
+__device__ float __nv_frsqrt_rn(float __a);
+__device__ float __nv_fsqrt_rd(float __a);
+__device__ float __nv_fsqrt_rn(float __a);
+__device__ float __nv_fsqrt_ru(float __a);
+__device__ float __nv_fsqrt_rz(float __a);
+__device__ float __nv_fsub_rd(float __a, float __b);
+__device__ float __nv_fsub_rn(float __a, float __b);
+__device__ float __nv_fsub_ru(float __a, float __b);
+__device__ float __nv_fsub_rz(float __a, float __b);
+__device__ int __nv_hadd(int __a, int __b);
+__device__ float __nv_half2float(unsigned short __h);
+__device__ double __nv_hiloint2double(int __a, int __b);
+__device__ double __nv_hypot(double __a, double __b);
+__device__ float __nv_hypotf(float __a, float __b);
+__device__ int __nv_ilogb(double __a);
+__device__ int __nv_ilogbf(float __a);
+__device__ double __nv_int2double_rn(int __a);
+__device__ float __nv_int2float_rd(int __a);
+__device__ float __nv_int2float_rn(int __a);
+__device__ float __nv_int2float_ru(int __a);
+__device__ float __nv_int2float_rz(int __a);
+__device__ float __nv_int_as_float(int __a);
+__device__ int __nv_isfinited(double __a);
+__device__ int __nv_isinfd(double __a);
+__device__ int __nv_isinff(float __a);
+__device__ int __nv_isnand(double __a);
+__device__ int __nv_isnanf(float __a);
+__device__ double __nv_j0(double __a);
+__device__ float __nv_j0f(float __a);
+__device__ double __nv_j1(double __a);
+__device__ float __nv_j1f(float __a);
+__device__ float __nv_jnf(int __a, float __b);
+__device__ double __nv_jn(int __a, double __b);
+__device__ double __nv_ldexp(double __a, int __b);
+__device__ float __nv_ldexpf(float __a, int __b);
+__device__ double __nv_lgamma(double __a);
+__device__ float __nv_lgammaf(float __a);
+__device__ double __nv_ll2double_rd(long long __a);
+__device__ double __nv_ll2double_rn(long long __a);
+__device__ double __nv_ll2double_ru(long long __a);
+__device__ double __nv_ll2double_rz(long long __a);
+__device__ float __nv_ll2float_rd(long long __a);
+__device__ float __nv_ll2float_rn(long long __a);
+__device__ float __nv_ll2float_ru(long long __a);
+__device__ float __nv_ll2float_rz(long long __a);
+__device__ long long __nv_llabs(long long __a);
+__device__ long long __nv_llmax(long long __a, long long __b);
+__device__ long long __nv_llmin(long long __a, long long __b);
+__device__ long long __nv_llrint(double __a);
+__device__ long long __nv_llrintf(float __a);
+__device__ long long __nv_llround(double __a);
+__device__ long long __nv_llroundf(float __a);
+__device__ double __nv_log10(double __a);
+__device__ float __nv_log10f(float __a);
+__device__ double __nv_log1p(double __a);
+__device__ float __nv_log1pf(float __a);
+__device__ double __nv_log2(double __a);
+__device__ float __nv_log2f(float __a);
+__device__ double __nv_logb(double __a);
+__device__ float __nv_logbf(float __a);
+__device__ double __nv_log(double __a);
+__device__ float __nv_logf(float __a);
+__device__ double __nv_longlong_as_double(long long __a);
+__device__ int __nv_max(int __a, int __b);
+__device__ int __nv_min(int __a, int __b);
+__device__ double __nv_modf(double __a, double *__b);
+__device__ float __nv_modff(float __a, float *__b);
+__device__ int __nv_mul24(int __a, int __b);
+__device__ long long __nv_mul64hi(long long __a, long long __b);
+__device__ int __nv_mulhi(int __a, int __b);
+__device__ double __nv_nan(const signed char *__a);
+__device__ float __nv_nanf(const signed char *__a);
+__device__ double __nv_nearbyint(double __a);
+__device__ float __nv_nearbyintf(float __a);
+__device__ double __nv_nextafter(double __a, double __b);
+__device__ float __nv_nextafterf(float __a, float __b);
+__device__ double __nv_norm3d(double __a, double __b, double __c);
+__device__ float __nv_norm3df(float __a, float __b, float __c);
+__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);
+__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);
+__device__ double __nv_normcdf(double __a);
+__device__ float __nv_normcdff(float __a);
+__device__ double __nv_normcdfinv(double __a);
+__device__ float __nv_normcdfinvf(float __a);
+__device__ float __nv_normf(int __a, const float *__b);
+__device__ double __nv_norm(int __a, const double *__b);
+__device__ int __nv_popc(int __a);
+__device__ int __nv_popcll(long long __a);
+__device__ double __nv_pow(double __a, double __b);
+__device__ float __nv_powf(float __a, float __b);
+__device__ double __nv_powi(double __a, int __b);
+__device__ float __nv_powif(float __a, int __b);
+__device__ double __nv_rcbrt(double __a);
+__device__ float __nv_rcbrtf(float __a);
+__device__ double __nv_rcp64h(double __a);
+__device__ double __nv_remainder(double __a, double __b);
+__device__ float __nv_remainderf(float __a, float __b);
+__device__ double __nv_remquo(double __a, double __b, int *__c);
+__device__ float __nv_remquof(float __a, float __b, int *__c);
+__device__ int __nv_rhadd(int __a, int __b);
+__device__ double __nv_rhypot(double __a, double __b);
+__device__ float __nv_rhypotf(float __a, float __b);
+__device__ double __nv_rint(double __a);
+__device__ float __nv_rintf(float __a);
+__device__ double __nv_rnorm3d(double __a, double __b, double __c);
+__device__ float __nv_rnorm3df(float __a, float __b, float __c);
+__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
+__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
+__device__ float __nv_rnormf(int __a, const float *__b);
+__device__ double __nv_rnorm(int __a, const double *__b);
+__device__ double __nv_round(double __a);
+__device__ float __nv_roundf(float __a);
+__device__ double __nv_rsqrt(double __a);
+__device__ float __nv_rsqrtf(float __a);
+__device__ int __nv_sad(int __a, int __b, int __c);
+__device__ float __nv_saturatef(float __a);
+__device__ double __nv_scalbn(double __a, int __b);
+__device__ float __nv_scalbnf(float __a, int __b);
+__device__ int __nv_signbitd(double __a);
+__device__ int __nv_signbitf(float __a);
+__device__ void __nv_sincos(double __a, double *__b, double *__c);
+__device__ void __nv_sincosf(float __a, float *__b, float *__c);
+__device__ void __nv_sincospi(double __a, double *__b, double *__c);
+__device__ void __nv_sincospif(float __a, float *__b, float *__c);
+__device__ double __nv_sin(double __a);
+__device__ float __nv_sinf(float __a);
+__device__ double __nv_sinh(double __a);
+__device__ float __nv_sinhf(float __a);
+__device__ double __nv_sinpi(double __a);
+__device__ float __nv_sinpif(float __a);
+__device__ double __nv_sqrt(double __a);
+__device__ float __nv_sqrtf(float __a);
+__device__ double __nv_tan(double __a);
+__device__ float __nv_tanf(float __a);
+__device__ double __nv_tanh(double __a);
+__device__ float __nv_tanhf(float __a);
+__device__ double __nv_tgamma(double __a);
+__device__ float __nv_tgammaf(float __a);
+__device__ double __nv_trunc(double __a);
+__device__ float __nv_truncf(float __a);
+__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);
+__device__ double __nv_uint2double_rn(unsigned int __i);
+__device__ float __nv_uint2float_rd(unsigned int __a);
+__device__ float __nv_uint2float_rn(unsigned int __a);
+__device__ float __nv_uint2float_ru(unsigned int __a);
+__device__ float __nv_uint2float_rz(unsigned int __a);
+__device__ float __nv_uint_as_float(unsigned int __a);
+__device__ double __nv_ull2double_rd(unsigned long long __a);
+__device__ double __nv_ull2double_rn(unsigned long long __a);
+__device__ double __nv_ull2double_ru(unsigned long long __a);
+__device__ double __nv_ull2double_rz(unsigned long long __a);
+__device__ float __nv_ull2float_rd(unsigned long long __a);
+__device__ float __nv_ull2float_rn(unsigned long long __a);
+__device__ float __nv_ull2float_ru(unsigned long long __a);
+__device__ float __nv_ull2float_rz(unsigned long long __a);
+__device__ unsigned long long __nv_ullmax(unsigned long long __a,
+                                          unsigned long long __b);
+__device__ unsigned long long __nv_ullmin(unsigned long long __a,
+                                          unsigned long long __b);
+__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
+__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
+__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
+__device__ unsigned long long __nv_umul64hi(unsigned long long __a,
+                                            unsigned long long __b);
+__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
+__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
+__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
+                                  unsigned int __c);
+__device__ int __nv_vabs2(int __a);
+__device__ int __nv_vabs4(int __a);
+__device__ int __nv_vabsdiffs2(int __a, int __b);
+__device__ int __nv_vabsdiffs4(int __a, int __b);
+__device__ int __nv_vabsdiffu2(int __a, int __b);
+__device__ int __nv_vabsdiffu4(int __a, int __b);
+__device__ int __nv_vabsss2(int __a);
+__device__ int __nv_vabsss4(int __a);
+__device__ int __nv_vadd2(int __a, int __b);
+__device__ int __nv_vadd4(int __a, int __b);
+__device__ int __nv_vaddss2(int __a, int __b);
+__device__ int __nv_vaddss4(int __a, int __b);
+__device__ int __nv_vaddus2(int __a, int __b);
+__device__ int __nv_vaddus4(int __a, int __b);
+__device__ int __nv_vavgs2(int __a, int __b);
+__device__ int __nv_vavgs4(int __a, int __b);
+__device__ int __nv_vavgu2(int __a, int __b);
+__device__ int __nv_vavgu4(int __a, int __b);
+__device__ int __nv_vcmpeq2(int __a, int __b);
+__device__ int __nv_vcmpeq4(int __a, int __b);
+__device__ int __nv_vcmpges2(int __a, int __b);
+__device__ int __nv_vcmpges4(int __a, int __b);
+__device__ int __nv_vcmpgeu2(int __a, int __b);
+__device__ int __nv_vcmpgeu4(int __a, int __b);
+__device__ int __nv_vcmpgts2(int __a, int __b);
+__device__ int __nv_vcmpgts4(int __a, int __b);
+__device__ int __nv_vcmpgtu2(int __a, int __b);
+__device__ int __nv_vcmpgtu4(int __a, int __b);
+__device__ int __nv_vcmples2(int __a, int __b);
+__device__ int __nv_vcmples4(int __a, int __b);
+__device__ int __nv_vcmpleu2(int __a, int __b);
+__device__ int __nv_vcmpleu4(int __a, int __b);
+__device__ int __nv_vcmplts2(int __a, int __b);
+__device__ int __nv_vcmplts4(int __a, int __b);
+__device__ int __nv_vcmpltu2(int __a, int __b);
+__device__ int __nv_vcmpltu4(int __a, int __b);
+__device__ int __nv_vcmpne2(int __a, int __b);
+__device__ int __nv_vcmpne4(int __a, int __b);
+__device__ int __nv_vhaddu2(int __a, int __b);
+__device__ int __nv_vhaddu4(int __a, int __b);
+__device__ int __nv_vmaxs2(int __a, int __b);
+__device__ int __nv_vmaxs4(int __a, int __b);
+__device__ int __nv_vmaxu2(int __a, int __b);
+__device__ int __nv_vmaxu4(int __a, int __b);
+__device__ int __nv_vmins2(int __a, int __b);
+__device__ int __nv_vmins4(int __a, int __b);
+__device__ int __nv_vminu2(int __a, int __b);
+__device__ int __nv_vminu4(int __a, int __b);
+__device__ int __nv_vneg2(int __a);
+__device__ int __nv_vneg4(int __a);
+__device__ int __nv_vnegss2(int __a);
+__device__ int __nv_vnegss4(int __a);
+__device__ int __nv_vsads2(int __a, int __b);
+__device__ int __nv_vsads4(int __a, int __b);
+__device__ int __nv_vsadu2(int __a, int __b);
+__device__ int __nv_vsadu4(int __a, int __b);
+__device__ int __nv_vseteq2(int __a, int __b);
+__device__ int __nv_vseteq4(int __a, int __b);
+__device__ int __nv_vsetges2(int __a, int __b);
+__device__ int __nv_vsetges4(int __a, int __b);
+__device__ int __nv_vsetgeu2(int __a, int __b);
+__device__ int __nv_vsetgeu4(int __a, int __b);
+__device__ int __nv_vsetgts2(int __a, int __b);
+__device__ int __nv_vsetgts4(int __a, int __b);
+__device__ int __nv_vsetgtu2(int __a, int __b);
+__device__ int __nv_vsetgtu4(int __a, int __b);
+__device__ int __nv_vsetles2(int __a, int __b);
+__device__ int __nv_vsetles4(int __a, int __b);
+__device__ int __nv_vsetleu2(int __a, int __b);
+__device__ int __nv_vsetleu4(int __a, int __b);
+__device__ int __nv_vsetlts2(int __a, int __b);
+__device__ int __nv_vsetlts4(int __a, int __b);
+__device__ int __nv_vsetltu2(int __a, int __b);
+__device__ int __nv_vsetltu4(int __a, int __b);
+__device__ int __nv_vsetne2(int __a, int __b);
+__device__ int __nv_vsetne4(int __a, int __b);
+__device__ int __nv_vsub2(int __a, int __b);
+__device__ int __nv_vsub4(int __a, int __b);
+__device__ int __nv_vsubss2(int __a, int __b);
+__device__ int __nv_vsubss4(int __a, int __b);
+__device__ int __nv_vsubus2(int __a, int __b);
+__device__ int __nv_vsubus4(int __a, int __b);
+__device__ double __nv_y0(double __a);
+__device__ float __nv_y0f(float __a);
+__device__ double __nv_y1(double __a);
+__device__ float __nv_y1f(float __a);
+__device__ float __nv_ynf(int __a, float __b);
+__device__ double __nv_yn(int __a, double __b);
+
+} // extern "C"
+#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__
diff --git a/lib/Headers/__clang_cuda_math_forward_declares.h b/lib/Headers/__clang_cuda_math_forward_declares.h
index 49c8051..c31b1f4 100644
--- a/lib/Headers/__clang_cuda_math_forward_declares.h
+++ b/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -149,9 +149,6 @@
 __DEVICE__ float nearbyint(float);
 __DEVICE__ double nextafter(double, double);
 __DEVICE__ float nextafter(float, float);
-__DEVICE__ double nexttoward(double, double);
-__DEVICE__ float nexttoward(float, double);
-__DEVICE__ float nexttowardf(float, double);
 __DEVICE__ double pow(double, double);
 __DEVICE__ double pow(double, int);
 __DEVICE__ float pow(float, float);
@@ -185,6 +182,10 @@
 __DEVICE__ double trunc(double);
 __DEVICE__ float trunc(float);
 
+// Notably missing above is nexttoward, which we don't define on
+// the device side because libdevice doesn't give us an implementation, and we
+// don't want to be in the business of writing one ourselves.
+
 // We need to define these overloads in exactly the namespace our standard
 // library uses (including the right inline namespace), otherwise they won't be
 // picked up by other functions in the standard library (e.g. functions in
@@ -255,7 +256,6 @@
 using ::nanf;
 using ::nearbyint;
 using ::nextafter;
-using ::nexttoward;
 using ::pow;
 using ::remainder;
 using ::remquo;
diff --git a/lib/Headers/__clang_cuda_runtime_wrapper.h b/lib/Headers/__clang_cuda_runtime_wrapper.h
index b8ffc2c..a5eb4bd 100644
--- a/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -62,7 +62,7 @@
 #include "cuda.h"
 #if !defined(CUDA_VERSION)
 #error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9010
 #error "Unsupported CUDA version!"
 #endif
 
@@ -84,6 +84,9 @@
 #define __DEVICE_FUNCTIONS_H__
 #define __MATH_FUNCTIONS_H__
 #define __COMMON_FUNCTIONS_H__
+// device_functions_decls is replaced by __clang_cuda_device_functions.h
+// included below.
+#define __DEVICE_FUNCTIONS_DECLS_H__
 
 #undef __CUDACC__
 #if CUDA_VERSION < 9000
@@ -137,20 +140,22 @@
 }
 #endif
 
-// We need decls for functions in CUDA's libdevice with __device__
-// attribute only. Alas they come either as __host__ __device__ or
-// with no attributes at all. To work around that, define __CUDA_RTC__
-// which produces HD variant and undef __host__ which gives us desided
-// decls with __device__ attribute.
-#pragma push_macro("__host__")
-#define __host__
-#define __CUDACC_RTC__
-#include "device_functions_decls.h"
-#undef __CUDACC_RTC__
+// CUDA 9.1 no longer provides declarations for libdevice functions, so we need
+// to provide our own.
+#include <__clang_cuda_libdevice_declares.h>
 
-// Temporarily poison __host__ macro to ensure it's not used by any of
-// the headers we're about to include.
-#define __host__ UNEXPECTED_HOST_ATTRIBUTE
+// Wrappers for many device-side standard library functions became compiler
+// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now
+// provides its own implementation of the wrappers.
+#if CUDA_VERSION >= 9000
+#include <__clang_cuda_device_functions.h>
+#endif
+
+// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
+// counterpart does not do it, so we need to make it empty here to keep
+// following CUDA includes happy.
+#undef __THROW
+#define __THROW
 
 // CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
 // Previous versions used to check whether they are defined or not.
@@ -167,24 +172,20 @@
 #endif
 #endif
 
+// Temporarily poison __host__ macro to ensure it's not used by any of
+// the headers we're about to include.
+#pragma push_macro("__host__")
+#define __host__ UNEXPECTED_HOST_ATTRIBUTE
+
 // device_functions.hpp and math_functions*.hpp use 'static
 // __forceinline__' (with no __device__) for definitions of device
 // functions. Temporarily redefine __forceinline__ to include
 // __device__.
 #pragma push_macro("__forceinline__")
 #define __forceinline__ __device__ __inline__ __attribute__((always_inline))
-
-#pragma push_macro("__float2half_rn")
-#if CUDA_VERSION >= 9000
-// CUDA-9 has conflicting prototypes for __float2half_rn(float f) in
-// cuda_fp16.h[pp] and device_functions.hpp. We need to get the one in
-// device_functions.hpp out of the way.
-#define __float2half_rn  __float2half_rn_disabled
-#endif
-
+#if CUDA_VERSION < 9000
 #include "device_functions.hpp"
-#pragma pop_macro("__float2half_rn")
-
+#endif
 
 // math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
 // get the slow-but-accurate or fast-but-inaccurate versions of functions like
@@ -196,17 +197,27 @@
 #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
 #define __USE_FAST_MATH__ 1
 #endif
+
+#if CUDA_VERSION >= 9000
+#include "crt/math_functions.hpp"
+#else
 #include "math_functions.hpp"
+#endif
+
 #pragma pop_macro("__USE_FAST_MATH__")
 
+#if CUDA_VERSION < 9000
 #include "math_functions_dbl_ptx3.hpp"
+#endif
 #pragma pop_macro("__forceinline__")
 
 // Pull in host-only functions that are only available when neither
 // __CUDACC__ nor __CUDABE__ are defined.
 #undef __MATH_FUNCTIONS_HPP__
 #undef __CUDABE__
+#if CUDA_VERSION < 9000
 #include "math_functions.hpp"
+#endif
 // Alas, additional overloads for these functions are hard to get to.
 // Considering that we only need these overloads for a few functions,
 // we can provide them here.
@@ -222,22 +233,36 @@
 static inline float normcdf(float __a) { return normcdff(__a); }
 static inline float erfcx(float __a) { return erfcxf(__a); }
 
+#if CUDA_VERSION < 9000
 // For some reason single-argument variant is not always declared by
 // CUDA headers. Alas, device_functions.hpp included below needs it.
 static inline __device__ void __brkpt(int __c) { __brkpt(); }
+#endif
 
 // Now include *.hpp with definitions of various GPU functions.  Alas,
 // a lot of thins get declared/defined with __host__ attribute which
 // we don't want and we have to define it out. We also have to include
 // {device,math}_functions.hpp again in order to extract the other
 // branch of #if/else inside.
-
 #define __host__
 #undef __CUDABE__
 #define __CUDACC__
+#if CUDA_VERSION >= 9000
+// Some atomic functions became compiler builtins in CUDA-9 , so we need their
+// declarations.
+#include "device_atomic_functions.h"
+#endif
 #undef __DEVICE_FUNCTIONS_HPP__
 #include "device_atomic_functions.hpp"
+#if CUDA_VERSION >= 9000
+#include "crt/device_functions.hpp"
+#include "crt/device_double_functions.hpp"
+#else
 #include "device_functions.hpp"
+#define __CUDABE__
+#include "device_double_functions.h"
+#undef __CUDABE__
+#endif
 #include "sm_20_atomic_functions.hpp"
 #include "sm_20_intrinsics.hpp"
 #include "sm_32_atomic_functions.hpp"
@@ -251,8 +276,11 @@
 // reason about our code.
 
 #if CUDA_VERSION >= 8000
+#pragma push_macro("__CUDA_ARCH__")
+#undef __CUDA_ARCH__
 #include "sm_60_atomic_functions.hpp"
 #include "sm_61_intrinsics.hpp"
+#pragma pop_macro("__CUDA_ARCH__")
 #endif
 
 #undef __MATH_FUNCTIONS_HPP__
@@ -270,12 +298,22 @@
 // include guard from math.h wrapper from libstdc++. We have to undo the header
 // guard temporarily to get the definitions we need.
 #pragma push_macro("_GLIBCXX_MATH_H")
+#pragma push_macro("_LIBCPP_VERSION")
 #if CUDA_VERSION >= 9000
 #undef _GLIBCXX_MATH_H
+// We also need to undo another guard that checks for libc++ 3.8+
+#ifdef _LIBCPP_VERSION
+#define _LIBCPP_VERSION 3700
+#endif
 #endif
 
+#if CUDA_VERSION >= 9000
+#include "crt/math_functions.hpp"
+#else
 #include "math_functions.hpp"
+#endif
 #pragma pop_macro("_GLIBCXX_MATH_H")
+#pragma pop_macro("_LIBCPP_VERSION")
 #pragma pop_macro("__GNUC__")
 #pragma pop_macro("signbit")
 
diff --git a/lib/Headers/avx512bitalgintrin.h b/lib/Headers/avx512bitalgintrin.h
new file mode 100644
index 0000000..2dd1471d2
--- /dev/null
+++ b/lib/Headers/avx512bitalgintrin.h
@@ -0,0 +1,97 @@
+/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512BITALGINTRIN_H
+#define __AVX512BITALGINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_popcnt_epi16(__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
+              (__v32hi) _mm512_popcnt_epi16(__B),
+              (__v32hi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
+{
+  return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
+              __U,
+              __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_popcnt_epi8(__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
+              (__v64qi) _mm512_popcnt_epi8(__B),
+              (__v64qi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
+{
+  return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
+              __U,
+              __B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
+              (__v64qi) __B,
+              __U);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
+{
+  return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
+              __A,
+              __B);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index 53da586..2aa7214 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -56,293 +56,145 @@
 
 /* Integer compare */
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
-                                                   (__mmask64)-1);
-}
+#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), (int)(p), \
+                                         (__mmask64)-1); })
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
-                                                   __u);
-}
+#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), (int)(p), \
+                                         (__mmask64)(m)); })
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
-                                                 (__mmask64)-1);
-}
+#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), (int)(p), \
+                                          (__mmask64)-1); })
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
-                                                 __u);
-}
+#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), (int)(p), \
+                                          (__mmask64)(m)); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
-                                                   (__mmask32)-1);
-}
+#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), (int)(p), \
+                                         (__mmask32)-1); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
-                                                   __u);
-}
+#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), (int)(p), \
+                                         (__mmask32)(m)); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
-                                                 (__mmask32)-1);
-}
+#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), (int)(p), \
+                                          (__mmask32)-1); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
-                                                 __u);
-}
+#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), (int)(p), \
+                                          (__mmask32)(m)); })
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
-                                                (__mmask64)-1);
-}
+#define _mm512_cmpeq_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epi8_mask(A, B) \
+    _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
+    _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
-                                                __u);
-}
+#define _mm512_cmpeq_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epu8_mask(A, B) \
+    _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
+    _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
-                                                 (__mmask64)-1);
-}
+#define _mm512_cmpeq_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epi16_mask(A, B) \
+    _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
+    _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
-                                                 __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
-                                                   (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
-                                                 (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
-                                                   (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
-                                                (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
-                                                 (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
-                                                (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
-                                                 (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
-                                                (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
-                                                 (__mmask64)-1);
-}
-
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
-  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
-  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
-                                                 __u);
-}
+#define _mm512_cmpeq_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epu16_mask(A, B) \
+    _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
+    _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
@@ -1156,87 +1008,70 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-                (__v32hi) __B,
-                (__v32hi) _mm512_setzero_hi(),
-                (__mmask32) -1);
+  return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-        __m512i __B)
+_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-                (__v32hi) __B,
-                (__v32hi) __W,
-                (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
+                                         (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-                (__v32hi) __B,
-                (__v32hi) _mm512_setzero_hi(),
-                (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                         (__v32hi)_mm512_mulhrs_epi16(__A, __B),
+                                         (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+_mm512_mulhi_epi16(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-              (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi(),
-              (__mmask32) -1);
+  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
        __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-              (__v32hi) __B,
-              (__v32hi) __W,
-              (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
+                                          (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-              (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi(),
-              (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
+                                          (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
+_mm512_mulhi_epu16(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-               (__v32hi) __B,
-               (__v32hi) _mm512_setzero_hi(),
-               (__mmask32) -1);
+  return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-       __m512i __B)
+_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-               (__v32hi) __B,
-               (__v32hi) __W,
-               (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
+                                          (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-               (__v32hi) __B,
-               (__v32hi) _mm512_setzero_hi(),
-               (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
+                                          (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -1541,46 +1376,6 @@
 }
 
 
-#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
-  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
-                                         (__v64qi)(__m512i)(b), (int)(p), \
-                                         (__mmask64)-1); })
-
-#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
-                                         (__v64qi)(__m512i)(b), (int)(p), \
-                                         (__mmask64)(m)); })
-
-#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
-  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
-                                          (__v64qi)(__m512i)(b), (int)(p), \
-                                          (__mmask64)-1); })
-
-#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
-                                          (__v64qi)(__m512i)(b), (int)(p), \
-                                          (__mmask64)(m)); })
-
-#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
-                                         (__v32hi)(__m512i)(b), (int)(p), \
-                                         (__mmask32)-1); })
-
-#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
-                                         (__v32hi)(__m512i)(b), (int)(p), \
-                                         (__mmask32)(m)); })
-
-#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
-                                          (__v32hi)(__m512i)(b), (int)(p), \
-                                          (__mmask32)-1); })
-
-#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
-                                          (__v32hi)(__m512i)(b), (int)(p), \
-                                          (__mmask32)(m)); })
-
 #define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \
   (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
                                    (__v32hi)_mm512_undefined_epi32(), \
@@ -2105,61 +1900,56 @@
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_test_epi8_mask (__m512i __A, __m512i __B)
 {
-  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
-            (__v64qi) __B,
-            (__mmask64) -1);
+  return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
+                                  _mm512_setzero_qi());
 }
 
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
 {
-  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
-            (__v64qi) __B, __U);
+  return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
+                                       _mm512_setzero_qi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_test_epi16_mask (__m512i __A, __m512i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
-            (__v32hi) __B,
-            (__mmask32) -1);
+  return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
+                                   _mm512_setzero_qi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
-            (__v32hi) __B, __U);
+  return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
+                                        _mm512_setzero_qi());
 }
 
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
 {
-  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
-             (__v64qi) __B,
-             (__mmask64) -1);
+  return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_qi());
 }
 
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
 {
-  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
-             (__v64qi) __B, __U);
+  return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
+                                      _mm512_setzero_qi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
-             (__v32hi) __B,
-             (__mmask32) -1);
+  return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
+                                  _mm512_setzero_qi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
-             (__v32hi) __B, __U);
+  return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
+                                       _mm512_setzero_qi());
 }
 
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/avx512cdintrin.h b/lib/Headers/avx512cdintrin.h
index 23c4235..ec7e0cd 100644
--- a/lib/Headers/avx512cdintrin.h
+++ b/lib/Headers/avx512cdintrin.h
@@ -130,13 +130,14 @@
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastmb_epi64 (__mmask8 __A)
 {
-  return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+  return (__m512i) _mm512_set1_epi64((long long) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastmw_epi32 (__mmask16 __A)
 {
-  return (__m512i) __builtin_ia32_broadcastmw512 (__A);
+  return (__m512i) _mm512_set1_epi32((int) __A);
+
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 247ac87..f513742 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -4543,37 +4543,6 @@
                                         (__v8di)_mm512_setzero_si512());
 }
 
-/* Bit Test */
-
-static __inline __mmask16 __DEFAULT_FN_ATTRS
-_mm512_test_epi32_mask(__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
-            (__v16si) __B,
-            (__mmask16) -1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
-                 (__v16si) __B, __U);
-}
-
-static __inline __mmask8 __DEFAULT_FN_ATTRS
-_mm512_test_epi64_mask(__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
-                 (__v8di) __B,
-                 (__mmask8) -1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
-}
-
 
 /* SIMD load ops */
 
@@ -4844,293 +4813,105 @@
 
 /* Integer compare */
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
-                                                   (__mmask16)-1);
-}
+#define _mm512_cmpeq_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epi32_mask(A, B) \
+    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
+    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
-                                                   __u);
-}
+#define _mm512_cmpeq_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epu32_mask(A, B) \
+    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
+    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
-                                                 (__mmask16)-1);
-}
+#define _mm512_cmpeq_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epi64_mask(A, B) \
+    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
+    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
-                                                   (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
-  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
-                                                __u);
-}
+#define _mm512_cmpeq_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm512_cmpge_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm512_cmpgt_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm512_cmple_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm512_mask_cmple_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm512_cmplt_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm512_cmpneq_epu64_mask(A, B) \
+    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
+    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtepi8_epi32(__m128i __A)
@@ -6797,35 +6578,6 @@
                                                         (__mmask16) __U);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
-             (__v16si) __B,
-             (__mmask16) -1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
-             (__v16si) __B, __U);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
-            (__v8di) __B,
-            (__mmask8) -1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
-            (__v8di) __B, __U);
-}
 
 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
@@ -7194,76 +6946,100 @@
 }
 
 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)(__m512)(B), (int)(imm), \
-                                         (__v16sf)_mm512_undefined_ps(), \
-                                         (__mmask16)-1); })
+  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
+                                  (__v16sf)(__m512)(B), \
+                                  0 + ((((imm) >> 0) & 0x3) * 4), \
+                                  1 + ((((imm) >> 0) & 0x3) * 4), \
+                                  2 + ((((imm) >> 0) & 0x3) * 4), \
+                                  3 + ((((imm) >> 0) & 0x3) * 4), \
+                                  0 + ((((imm) >> 2) & 0x3) * 4), \
+                                  1 + ((((imm) >> 2) & 0x3) * 4), \
+                                  2 + ((((imm) >> 2) & 0x3) * 4), \
+                                  3 + ((((imm) >> 2) & 0x3) * 4), \
+                                  16 + ((((imm) >> 4) & 0x3) * 4), \
+                                  17 + ((((imm) >> 4) & 0x3) * 4), \
+                                  18 + ((((imm) >> 4) & 0x3) * 4), \
+                                  19 + ((((imm) >> 4) & 0x3) * 4), \
+                                  16 + ((((imm) >> 6) & 0x3) * 4), \
+                                  17 + ((((imm) >> 6) & 0x3) * 4), \
+                                  18 + ((((imm) >> 6) & 0x3) * 4), \
+                                  19 + ((((imm) >> 6) & 0x3) * 4)); })
 
 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)(__m512)(B), (int)(imm), \
-                                         (__v16sf)(__m512)(W), \
-                                         (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+                                      (__v16sf)(__m512)(W)); })
 
 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)(__m512)(B), (int)(imm), \
-                                         (__v16sf)_mm512_setzero_ps(), \
-                                         (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+                                      (__v16sf)_mm512_setzero_ps()); })
 
 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)(__m512d)(B), (int)(imm), \
-                                          (__v8df)_mm512_undefined_pd(), \
-                                          (__mmask8)-1); })
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+                                   (__v8df)(__m512d)(B), \
+                                   0 + ((((imm) >> 0) & 0x3) * 2), \
+                                   1 + ((((imm) >> 0) & 0x3) * 2), \
+                                   0 + ((((imm) >> 2) & 0x3) * 2), \
+                                   1 + ((((imm) >> 2) & 0x3) * 2), \
+                                   8 + ((((imm) >> 4) & 0x3) * 2), \
+                                   9 + ((((imm) >> 4) & 0x3) * 2), \
+                                   8 + ((((imm) >> 6) & 0x3) * 2), \
+                                   9 + ((((imm) >> 6) & 0x3) * 2)); })
 
 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)(__m512d)(B), (int)(imm), \
-                                          (__v8df)(__m512d)(W), \
-                                          (__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+                                       (__v8df)(__m512d)(W)); })
 
 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)(__m512d)(B), (int)(imm), \
-                                          (__v8df)_mm512_setzero_pd(), \
-                                          (__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+                                       (__v8df)_mm512_setzero_pd()); })
 
 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
-                                          (__v16si)(__m512i)(B), (int)(imm), \
-                                          (__v16si)_mm512_setzero_si512(), \
-                                          (__mmask16)-1); })
+  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+                                   (__v8di)(__m512i)(B), \
+                                   0 + ((((imm) >> 0) & 0x3) * 2), \
+                                   1 + ((((imm) >> 0) & 0x3) * 2), \
+                                   0 + ((((imm) >> 2) & 0x3) * 2), \
+                                   1 + ((((imm) >> 2) & 0x3) * 2), \
+                                   8 + ((((imm) >> 4) & 0x3) * 2), \
+                                   9 + ((((imm) >> 4) & 0x3) * 2), \
+                                   8 + ((((imm) >> 6) & 0x3) * 2), \
+                                   9 + ((((imm) >> 6) & 0x3) * 2)); })
 
 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
-                                          (__v16si)(__m512i)(B), (int)(imm), \
-                                          (__v16si)(__m512i)(W), \
-                                          (__mmask16)(U)); })
+  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+                                      (__v16si)(__m512i)(W)); })
 
 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
-                                          (__v16si)(__m512i)(B), (int)(imm), \
-                                          (__v16si)_mm512_setzero_si512(), \
-                                          (__mmask16)(U)); })
+  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+                                      (__v16si)_mm512_setzero_si512()); })
 
 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
-                                          (__v8di)(__m512i)(B), (int)(imm), \
-                                          (__v8di)_mm512_setzero_si512(), \
-                                          (__mmask8)-1); })
+  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+                                   (__v8di)(__m512i)(B), \
+                                   0 + ((((imm) >> 0) & 0x3) * 2), \
+                                   1 + ((((imm) >> 0) & 0x3) * 2), \
+                                   0 + ((((imm) >> 2) & 0x3) * 2), \
+                                   1 + ((((imm) >> 2) & 0x3) * 2), \
+                                   8 + ((((imm) >> 4) & 0x3) * 2), \
+                                   9 + ((((imm) >> 4) & 0x3) * 2), \
+                                   8 + ((((imm) >> 6) & 0x3) * 2), \
+                                   9 + ((((imm) >> 6) & 0x3) * 2)); })
 
 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
-                                          (__v8di)(__m512i)(B), (int)(imm), \
-                                          (__v8di)(__m512i)(W), \
-                                          (__mmask8)(U)); })
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+                                      (__v8di)(__m512i)(W)); })
 
 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
-  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
-                                          (__v8di)(__m512i)(B), (int)(imm), \
-                                          (__v8di)_mm512_setzero_si512(), \
-                                          (__mmask8)(U)); })
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+                                      (__v8di)_mm512_setzero_si512()); })
 
 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
@@ -9166,6 +8942,64 @@
                                       (__mmask8)(M), \
                                       _MM_FROUND_CUR_DIRECTION); })
 
+/* Bit Test */
+
+static __inline __mmask16 __DEFAULT_FN_ATTRS
+_mm512_test_epi32_mask (__m512i __A, __m512i __B)
+{
+  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
+                                   _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
+                                        _mm512_setzero_epi32());
+}
+
+static __inline __mmask8 __DEFAULT_FN_ATTRS
+_mm512_test_epi64_mask (__m512i __A, __m512i __B)
+{
+  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
+                                   _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
+                                        _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
+{
+  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
+                                  _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
+                                       _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
+{
+  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
+                                  _mm512_setzero_epi32());
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
+                                       _mm512_setzero_epi32());
+}
+
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_movehdup_ps (__m512 __A)
 {
diff --git a/lib/Headers/avx512vbmi2intrin.h b/lib/Headers/avx512vbmi2intrin.h
new file mode 100644
index 0000000..43e97b4
--- /dev/null
+++ b/lib/Headers/avx512vbmi2intrin.h
@@ -0,0 +1,391 @@
+/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VBMI2INTRIN_H
+#define __AVX512VBMI2INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
+              (__v32hi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
+              (__v32hi) _mm512_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
+              (__v64qi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
+              (__v64qi) _mm512_setzero_qi(),
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
+{
+  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
+{
+  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
+              (__v32hi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
+              (__v32hi) _mm512_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
+              (__v64qi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
+{
+  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
+              (__v64qi) _mm512_setzero_qi(),
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
+              (__v32hi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
+              (__v32hi) _mm512_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
+              (__v64qi) __S,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
+              (__v64qi) _mm512_setzero_qi(),
+              __U);
+}
+
+#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
+                                          (__v8di)(B), \
+                                          (int)(I), \
+                                          (__v8di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_maskz_shldi_epi64(U, A, B, I) \
+  _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shldi_epi64(A, B, I) \
+  _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
+                                          (__v16si)(B), \
+                                          (int)(I), \
+                                          (__v16si)(S), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_shldi_epi32(U, A, B, I) \
+  _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shldi_epi32(A, B, I) \
+  _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
+
+#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
+                                          (__v32hi)(B), \
+                                          (int)(I), \
+                                          (__v32hi)(S), \
+                                          (__mmask32)(U)); })
+
+#define _mm512_maskz_shldi_epi16(U, A, B, I) \
+  _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shldi_epi16(A, B, I) \
+  _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
+
+#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
+                                          (__v8di)(B), \
+                                          (int)(I), \
+                                          (__v8di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
+  _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shrdi_epi64(A, B, I) \
+  _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
+                                          (__v16si)(B), \
+                                          (int)(I), \
+                                          (__v16si)(S), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
+  _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shrdi_epi32(A, B, I) \
+  _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
+
+#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
+                                          (__v32hi)(B), \
+                                          (int)(I), \
+                                          (__v32hi)(S), \
+                                          (__mmask32)(U)); })
+
+#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
+  _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm512_shrdi_epi16(A, B, I) \
+  _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
+              (__v8di) __A,
+              (__v8di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              (__mmask32) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
+
diff --git a/lib/Headers/avx512vlbitalgintrin.h b/lib/Headers/avx512vlbitalgintrin.h
new file mode 100644
index 0000000..76eb877
--- /dev/null
+++ b/lib/Headers/avx512vlbitalgintrin.h
@@ -0,0 +1,157 @@
+/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLBITALGINTRIN_H
+#define __AVX512VLBITALGINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_popcnt_epi16(__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
+              (__v16hi) _mm256_popcnt_epi16(__B),
+              (__v16hi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
+{
+  return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
+              __U,
+              __B);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_popcnt_epi16(__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
+              (__v8hi) _mm128_popcnt_epi16(__B),
+              (__v8hi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
+{
+  return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
+              __U,
+              __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_popcnt_epi8(__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
+              (__v32qi) _mm256_popcnt_epi8(__B),
+              (__v32qi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
+{
+  return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
+              __U,
+              __B);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_popcnt_epi8(__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
+              (__v16qi) _mm128_popcnt_epi8(__B),
+              (__v16qi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
+{
+  return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
+              __U,
+              __B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
+              (__v32qi) __B,
+              __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
+{
+  return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
+              __A,
+              __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
+              (__v16qi) __B,
+              __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
+{
+  return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
+              __A,
+              __B);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index 4ab785b..e940e2b 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -38,581 +38,285 @@
 
 /* Integer compare */
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
-                                                   (__mmask16)-1);
-}
+#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+                                         (__v16qi)(__m128i)(b), (int)(p), \
+                                         (__mmask16)-1); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
-                                                   __u);
-}
+#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+                                         (__v16qi)(__m128i)(b), (int)(p), \
+                                         (__mmask16)(m)); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
-                                                 (__mmask16)-1);
-}
+#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+                                          (__v16qi)(__m128i)(b), (int)(p), \
+                                          (__mmask16)-1); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
-                                                 __u);
-}
+#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+                                          (__v16qi)(__m128i)(b), (int)(p), \
+                                          (__mmask16)(m)); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
-                                                   (__mmask32)-1);
-}
+#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+                                         (__v32qi)(__m256i)(b), (int)(p), \
+                                         (__mmask32)-1); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
-                                                   __u);
-}
+#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+                                         (__v32qi)(__m256i)(b), (int)(p), \
+                                         (__mmask32)(m)); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
-                                                 (__mmask32)-1);
-}
+#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+                                          (__v32qi)(__m256i)(b), (int)(p), \
+                                          (__mmask32)-1); })
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
-                                                 __u);
-}
+#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+                                          (__v32qi)(__m256i)(b), (int)(p), \
+                                          (__mmask32)(m)); })
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
-                                                  (__mmask8)-1);
-}
+#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+                                        (__v8hi)(__m128i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
-                                                  __u);
-}
+#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+                                        (__v8hi)(__m128i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
-                                                (__mmask8)-1);
-}
+#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+                                         (__v8hi)(__m128i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
-                                                __u);
-}
+#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+                                         (__v8hi)(__m128i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
-                                                   (__mmask16)-1);
-}
+#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+                                         (__v16hi)(__m256i)(b), (int)(p), \
+                                         (__mmask16)-1); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
-                                                   __u);
-}
+#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+                                         (__v16hi)(__m256i)(b), (int)(p), \
+                                         (__mmask16)(m)); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
-                                                 (__mmask16)-1);
-}
+#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+                                          (__v16hi)(__m256i)(b), (int)(p), \
+                                          (__mmask16)-1); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
-                                                 __u);
-}
+#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+                                          (__v16hi)(__m256i)(b), (int)(p), \
+                                          (__mmask16)(m)); })
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
-                                                (__mmask16)-1);
-}
+#define _mm_cmpeq_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epi8_mask(A, B) \
+    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
+    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
-                                                __u);
-}
+#define _mm256_cmpeq_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epi8_mask(A, B) \
+    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
+    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
-                                                 (__mmask16)-1);
-}
+#define _mm_cmpeq_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epu8_mask(A, B) \
+    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
+    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
-                                                 __u);
-}
+#define _mm256_cmpeq_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epu8_mask(A, B) \
+    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
+    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
-                                                (__mmask32)-1);
-}
+#define _mm_cmpeq_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epi16_mask(A, B) \
+    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
+    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
-                                                __u);
-}
+#define _mm256_cmpeq_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epi16_mask(A, B) \
+    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
+    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
-                                                 (__mmask32)-1);
-}
+#define _mm_cmpeq_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epu16_mask(A, B) \
+    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
+    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
-                                                 __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
-                                                   (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
-                                                   (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
-                                                   (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
-                                                   __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
-                                                 __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmple_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmple_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
-                                                 __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
-                                                 __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
-  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
-                                                 __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
-                                                (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
-                                                 (__mmask32)-1);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
-  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
-                                                 __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
-                                                (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
-                                                 (__mmask16)-1);
-}
-
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
-  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
-                                                 __u);
-}
+#define _mm256_cmpeq_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epu16_mask(A, B) \
+    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
+    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
@@ -2146,86 +1850,6 @@
 }
 
 
-#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
-                                         (__v16qi)(__m128i)(b), (int)(p), \
-                                         (__mmask16)-1); })
-
-#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
-                                         (__v16qi)(__m128i)(b), (int)(p), \
-                                         (__mmask16)(m)); })
-
-#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
-                                          (__v16qi)(__m128i)(b), (int)(p), \
-                                          (__mmask16)-1); })
-
-#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
-                                          (__v16qi)(__m128i)(b), (int)(p), \
-                                          (__mmask16)(m)); })
-
-#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
-                                         (__v32qi)(__m256i)(b), (int)(p), \
-                                         (__mmask32)-1); })
-
-#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
-                                         (__v32qi)(__m256i)(b), (int)(p), \
-                                         (__mmask32)(m)); })
-
-#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
-                                          (__v32qi)(__m256i)(b), (int)(p), \
-                                          (__mmask32)-1); })
-
-#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
-                                          (__v32qi)(__m256i)(b), (int)(p), \
-                                          (__mmask32)(m)); })
-
-#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
-                                        (__v8hi)(__m128i)(b), (int)(p), \
-                                        (__mmask8)-1); })
-
-#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
-                                        (__v8hi)(__m128i)(b), (int)(p), \
-                                        (__mmask8)(m)); })
-
-#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
-  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
-                                         (__v8hi)(__m128i)(b), (int)(p), \
-                                         (__mmask8)-1); })
-
-#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
-                                         (__v8hi)(__m128i)(b), (int)(p), \
-                                         (__mmask8)(m)); })
-
-#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
-                                         (__v16hi)(__m256i)(b), (int)(p), \
-                                         (__mmask16)-1); })
-
-#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
-                                         (__v16hi)(__m256i)(b), (int)(p), \
-                                         (__mmask16)(m)); })
-
-#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
-                                          (__v16hi)(__m256i)(b), (int)(p), \
-                                          (__mmask16)-1); })
-
-#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
-                                          (__v16hi)(__m256i)(b), (int)(p), \
-                                          (__mmask16)(m)); })
-
 #define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
   (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
@@ -2791,121 +2415,108 @@
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_test_epi8_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
-            (__v16qi) __B,
-            (__mmask16) -1);
+  return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_hi());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
-            (__v16qi) __B, __U);
+  return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
+                                    _mm_setzero_hi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_test_epi8_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
-            (__v32qi) __B,
-            (__mmask32) -1);
+  return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
+                                  _mm256_setzero_si256());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
-            (__v32qi) __B, __U);
+  return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
+                                       _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_test_epi16_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
-                 (__v8hi) __B,
-                 (__mmask8) -1);
+  return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
-                 (__v8hi) __B, __U);
+  return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
+                                     _mm_setzero_hi());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_test_epi16_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
-            (__v16hi) __B,
-            (__mmask16) -1);
+  return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
+                                   _mm256_setzero_si256 ());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
-            (__v16hi) __B, __U);
+  return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
+                                        _mm256_setzero_si256());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_testn_epi8_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
-             (__v16qi) __B,
-             (__mmask16) -1);
+  return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
-             (__v16qi) __B, __U);
+  return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
+                                  _mm_setzero_hi());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_testn_epi8_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
-             (__v32qi) __B,
-             (__mmask32) -1);
+  return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
+                                 _mm256_setzero_si256());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
-             (__v32qi) __B, __U);
+  return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
+                                      _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_testn_epi16_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
-            (__v8hi) __B,
-            (__mmask8) -1);
+  return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
-            (__v8hi) __B, __U);
+  return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_hi());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_testn_epi16_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
-             (__v16hi) __B,
-             (__mmask16) -1);
+  return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
+                                  _mm256_setzero_si256());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
-             (__v16hi) __B, __U);
+  return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
+                                       _mm256_setzero_si256());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/avx512vlcdintrin.h b/lib/Headers/avx512vlcdintrin.h
index 7b02e2e..8f1cd25 100644
--- a/lib/Headers/avx512vlcdintrin.h
+++ b/lib/Headers/avx512vlcdintrin.h
@@ -33,26 +33,26 @@
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
+{ 
+  return (__m128i) _mm_set1_epi64x((long long) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastmb_epi64 (__mmask8 __A)
 {
-  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
+  return (__m256i) _mm256_set1_epi64x((long long)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastmw_epi32 (__mmask16 __A)
 {
-  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
+  return (__m128i) _mm_set1_epi32((int)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastmw_epi32 (__mmask16 __A)
 {
-  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
+  return (__m256i) _mm256_set1_epi32((int)__A);
 }
 
 
diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h
index 7e17cff..16e44c3 100644
--- a/lib/Headers/avx512vlintrin.h
+++ b/lib/Headers/avx512vlintrin.h
@@ -38,582 +38,205 @@
 
 /* Integer compare */
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
-                                                  (__mmask8)-1);
-}
+#define _mm_cmpeq_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epi32_mask(A, B) \
+    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
+    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
-                                                  __u);
-}
+#define _mm256_cmpeq_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epi32_mask(A, B) \
+    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
+    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
-                                                (__mmask8)-1);
-}
+#define _mm_cmpeq_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epu32_mask(A, B) \
+    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
+    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
-                                                __u);
-}
+#define _mm256_cmpeq_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epu32_mask(A, B) \
+    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
+    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
-                                                  (__mmask8)-1);
-}
+#define _mm_cmpeq_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epi64_mask(A, B) \
+    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
+    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
-                                                  __u);
-}
+#define _mm256_cmpeq_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epi64_mask(A, B) \
+    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
+    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
-                                                (__mmask8)-1);
-}
+#define _mm_cmpeq_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm_cmpge_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm_mask_cmpge_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm_cmpgt_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm_cmple_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm_mask_cmple_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm_cmplt_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm_mask_cmplt_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm_cmpneq_epu64_mask(A, B) \
+    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
+    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
-                                                __u);
-}
-
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
-                                                  (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
-                                                  __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
-                                                __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
-                                               (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
-                                               __u);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
-                                                (__mmask8)-1);
-}
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
-                                                __u);
-}
+#define _mm256_cmpeq_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
+#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
+#define _mm256_cmpge_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
+#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
+#define _mm256_cmpgt_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
+#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
+#define _mm256_cmple_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
+#define _mm256_mask_cmple_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
+#define _mm256_cmplt_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
+#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
+#define _mm256_cmpneq_epu64_mask(A, B) \
+    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
+#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
+    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
@@ -2162,32 +1785,30 @@
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-               (__v4sf) __W,
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_cvtepi32_ps(__A),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-               (__v4sf)
-               _mm_setzero_ps (),
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_cvtepi32_ps(__A),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-               (__v8sf) __W,
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_cvtepi32_ps(__A),
+                                             (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-               (__v8sf)
-               _mm256_setzero_ps (),
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_cvtepi32_ps(__A),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -2207,17 +1828,16 @@
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
-                (__v4si) __W,
-                (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm256_cvtpd_epi32(__A),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
-                (__v4si)
-                _mm_setzero_si128 (),
-                (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm256_cvtpd_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -2237,17 +1857,16 @@
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
-               (__v4sf) __W,
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm256_cvtpd_ps(__A),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
-               (__v4sf)
-               _mm_setzero_ps (),
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm256_cvtpd_ps(__A),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -2298,62 +1917,58 @@
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
-  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
-                (__v4si) __W,
-                (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_cvtps_epi32(__A),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
-  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
-                (__v4si)
-                _mm_setzero_si128 (),
-                (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_cvtps_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
-  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
-                (__v8si) __W,
-                (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_cvtps_epi32(__A),
+                                             (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
-  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
-                (__v8si)
-                _mm256_setzero_si256 (),
-                (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_cvtps_epi32(__A),
+                                             (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
-  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
-                (__v2df) __W,
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_cvtps_pd(__A),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
-  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
-                (__v2df)
-                _mm_setzero_pd (),
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_cvtps_pd(__A),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
-  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
-                (__v4df) __W,
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_cvtps_pd(__A),
+                                              (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
-  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
-                (__v4df)
-                _mm256_setzero_pd (),
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_cvtps_pd(__A),
+                                              (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -2419,17 +2034,16 @@
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
-                 (__v4si) __W,
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm256_cvttpd_epi32(__A),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
-                 (__v4si)
-                 _mm_setzero_si128 (),
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm256_cvttpd_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -2480,32 +2094,30 @@
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
-  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
-                 (__v4si) __W,
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_cvttps_epi32(__A),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
-  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
-                 (__v4si)
-                 _mm_setzero_si128 (),
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_cvttps_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
-  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
-                 (__v8si) __W,
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_cvttps_epi32(__A),
+                                             (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
-  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
-                 (__v8si)
-                 _mm256_setzero_si256 (),
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_cvttps_epi32(__A),
+                                             (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -6503,125 +6115,111 @@
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_test_epi32_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
-                 (__v4si) __B,
-                 (__mmask8) -1);
+  return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
-                 (__v4si) __B, __U);
+  return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
+                                     _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
-                 (__v8si) __B,
-                 (__mmask8) -1);
+  return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
+                                   _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
-                 (__v8si) __B, __U);
+  return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
+                                        _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_test_epi64_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
-                 (__v2di) __B,
-                 (__mmask8) -1);
+  return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
-                 (__v2di) __B, __U);
+  return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
+                                     _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
-                 (__v4di) __B,
-                 (__mmask8) -1);
+  return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
+                                   _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
-                 (__v4di) __B, __U);
+  return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
+                                        _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
-            (__v4si) __B,
-            (__mmask8) -1);
+  return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
-            (__v4si) __B, __U);
+  return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
+                                    _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
-            (__v8si) __B,
-            (__mmask8) -1);
+  return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
+                                  _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
-            (__v8si) __B, __U);
+  return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
+                                       _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
-            (__v2di) __B,
-            (__mmask8) -1);
+  return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
-            (__v2di) __B, __U);
+  return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
+                                    _mm_setzero_di());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
-            (__v4di) __B,
-            (__mmask8) -1);
+  return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
+                                  _mm256_setzero_si256());
 }
 
 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
-            (__v4di) __B, __U);
+  return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
+                                       _mm256_setzero_si256());
 }
 
-
-
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
@@ -6977,85 +6575,81 @@
 
 
 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
-                                             (__v8sf)(__m256)(B), (int)(imm), \
-                                             (__v8sf)_mm256_setzero_ps(), \
-                                             (__mmask8)-1); })
+  (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
+                                  (__v8sf)(__m256)(B), \
+                                   0 + ((((imm) >> 0) & 0x1) * 4), \
+                                   1 + ((((imm) >> 0) & 0x1) * 4), \
+                                   2 + ((((imm) >> 0) & 0x1) * 4), \
+                                   3 + ((((imm) >> 0) & 0x1) * 4), \
+                                   8 + ((((imm) >> 1) & 0x1) * 4), \
+                                   9 + ((((imm) >> 1) & 0x1) * 4), \
+                                   10 + ((((imm) >> 1) & 0x1) * 4), \
+                                   11 + ((((imm) >> 1) & 0x1) * 4)); })                                  
 
 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
-                                             (__v8sf)(__m256)(B), (int)(imm), \
-                                             (__v8sf)(__m256)(W), \
-                                             (__mmask8)(U)); })
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+                                      (__v8sf)(__m256)(W)); })
 
 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
-                                             (__v8sf)(__m256)(B), (int)(imm), \
-                                             (__v8sf)_mm256_setzero_ps(), \
-                                             (__mmask8)(U)); })
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+                                      (__v8sf)_mm256_setzero_ps()); })
 
 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
-                                              (__v4df)(__m256d)(B), \
-                                              (int)(imm), \
-                                              (__v4df)_mm256_setzero_pd(), \
-                                              (__mmask8)-1); })
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+                                   (__v4df)(__m256d)(B), \
+                                   0 + ((((imm) >> 0) & 0x1) * 2), \
+                                   1 + ((((imm) >> 0) & 0x1) * 2), \
+                                   4 + ((((imm) >> 1) & 0x1) * 2), \
+                                   5 + ((((imm) >> 1) & 0x1) * 2)); })
 
 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
-                                              (__v4df)(__m256d)(B), \
-                                              (int)(imm), \
-                                              (__v4df)(__m256d)(W), \
-                                              (__mmask8)(U)); })
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+                                      (__v4df)(__m256)(W)); })
 
 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
-                                              (__v4df)(__m256d)(B), \
-                                              (int)(imm), \
-                                              (__v4df)_mm256_setzero_pd(), \
-                                              (__mmask8)(U)); })
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+                                      (__v4df)_mm256_setzero_pd()); })
 
 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
-                                              (__v8si)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v8si)_mm256_setzero_si256(), \
-                                              (__mmask8)-1); })
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+                                   (__v4di)(__m256i)(B), \
+                                   0 + ((((imm) >> 0) & 0x1) * 2), \
+                                   1 + ((((imm) >> 0) & 0x1) * 2), \
+                                   4 + ((((imm) >> 1) & 0x1) * 2), \
+                                   5 + ((((imm) >> 1) & 0x1) * 2)); })
 
 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
-                                              (__v8si)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v8si)(__m256i)(W), \
-                                              (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+                                      (__v8si)(__m256)(W)); })
 
 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
-                                              (__v8si)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v8si)_mm256_setzero_si256(), \
-                                              (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+                                      (__v8si)_mm256_setzero_si256()); })
 
 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
-                                              (__v4di)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v4di)_mm256_setzero_si256(), \
-                                              (__mmask8)-1); })
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+                                   (__v4di)(__m256i)(B), \
+                                   0 + ((((imm) >> 0) & 0x1) * 2), \
+                                   1 + ((((imm) >> 0) & 0x1) * 2), \
+                                   4 + ((((imm) >> 1) & 0x1) * 2), \
+                                   5 + ((((imm) >> 1) & 0x1) * 2)); })
 
 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
-                                              (__v4di)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v4di)(__m256i)(W), \
-                                              (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+                                      (__v4di)(__m256)(W)); })
+
 
 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
-                                              (__v4di)(__m256i)(B), \
-                                              (int)(imm), \
-                                              (__v4di)_mm256_setzero_si256(), \
-                                              (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+                                      (__v4di)_mm256_setzero_si256()); })
 
 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
@@ -8573,60 +8167,41 @@
                  __M);
 }
 
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
+
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
-          __m256 __Y)
+_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
 {
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-                (__v8si) __X,
-                (__v8sf) __W,
-                (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
+                                        (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
 {
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-                (__v8si) __X,
-                (__v8sf) _mm256_setzero_ps (),
-                (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
+                                        (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
+#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
+                              __m256i __Y)
 {
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-                (__v8si) __X,
-                (__v8sf) _mm256_undefined_si256 (),
-                (__mmask8) -1);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
+                                     (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
-                 (__v8si) __X,
-                 (__v8si) _mm256_setzero_si256 (),
-                 __M);
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
-             __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
-                 (__v8si) __X,
-                 (__v8si) __W,
-                 (__mmask8) __M);
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
-                 (__v8si) __X,
-                 (__v8si) _mm256_undefined_si256(),
-                 (__mmask8) -1);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
+                                     (__v8si)_mm256_setzero_si256());
 }
 
 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
diff --git a/lib/Headers/avx512vlvbmi2intrin.h b/lib/Headers/avx512vlvbmi2intrin.h
new file mode 100644
index 0000000..d1ec497
--- /dev/null
+++ b/lib/Headers/avx512vlvbmi2intrin.h
@@ -0,0 +1,748 @@
+/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLVBMI2INTRIN_H
+#define __AVX512VLVBMI2INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
+
+static  __inline __m128i __DEFAULT_FN_ATTRS
+_mm128_setzero_hi(void) {
+  return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
+              (__v8hi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
+              (__v8hi) _mm128_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
+              (__v16qi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
+              (__v16qi) _mm128_setzero_hi(),
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
+{
+  __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
+{
+  __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
+              (__v8hi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
+              (__v8hi) _mm128_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
+              (__v16qi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D)
+{
+  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
+              (__v16qi) _mm128_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
+              (__v8hi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
+              (__v8hi) _mm128_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
+              (__v16qi) __S,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
+              (__v16qi) _mm128_setzero_hi(),
+              __U);
+}
+
+static  __inline __m256i __DEFAULT_FN_ATTRS
+_mm256_setzero_hi(void) {
+  return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
+              (__v16hi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
+              (__v16hi) _mm256_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
+              (__v32qi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
+              (__v32qi) _mm256_setzero_hi(),
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
+{
+  __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
+{
+  __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
+              (__v16hi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
+              (__v16hi) _mm256_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
+              (__v32qi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
+{
+  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
+              (__v32qi) _mm256_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
+              (__v16hi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
+              (__v16hi) _mm256_setzero_hi(),
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
+              (__v32qi) __S,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
+              (__v32qi) _mm256_setzero_hi(),
+              __U);
+}
+
+#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
+                                          (__v4di)(B), \
+                                          (int)(I), \
+                                          (__v4di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_shldi_epi64(U, A, B, I) \
+  _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shldi_epi64(A, B, I) \
+  _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
+                                          (__v2di)(B), \
+                                          (int)(I), \
+                                          (__v2di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shldi_epi64(U, A, B, I) \
+  _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shldi_epi64(A, B, I) \
+  _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
+                                          (__v8si)(B), \
+                                          (int)(I), \
+                                          (__v8si)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_shldi_epi32(U, A, B, I) \
+  _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shldi_epi32(A, B, I) \
+  _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
+                                          (__v4si)(B), \
+                                          (int)(I), \
+                                          (__v4si)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shldi_epi32(U, A, B, I) \
+  _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shldi_epi32(A, B, I) \
+  _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
+                                          (__v16hi)(B), \
+                                          (int)(I), \
+                                          (__v16hi)(S), \
+                                          (__mmask16)(U)); })
+
+#define _mm256_maskz_shldi_epi16(U, A, B, I) \
+  _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shldi_epi16(A, B, I) \
+  _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
+                                          (__v8hi)(B), \
+                                          (int)(I), \
+                                          (__v8hi)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shldi_epi16(U, A, B, I) \
+  _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shldi_epi16(A, B, I) \
+  _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
+                                          (__v4di)(B), \
+                                          (int)(I), \
+                                          (__v4di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
+  _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shrdi_epi64(A, B, I) \
+  _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
+                                          (__v2di)(B), \
+                                          (int)(I), \
+                                          (__v2di)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shrdi_epi64(U, A, B, I) \
+  _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shrdi_epi64(A, B, I) \
+  _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
+                                          (__v8si)(B), \
+                                          (int)(I), \
+                                          (__v8si)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
+  _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shrdi_epi32(A, B, I) \
+  _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
+                                          (__v4si)(B), \
+                                          (int)(I), \
+                                          (__v4si)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shrdi_epi32(U, A, B, I) \
+  _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shrdi_epi32(A, B, I) \
+  _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
+                                          (__v16hi)(B), \
+                                          (int)(I), \
+                                          (__v16hi)(S), \
+                                          (__mmask16)(U)); })
+
+#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
+  _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm256_shrdi_epi16(A, B, I) \
+  _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
+
+#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
+                                          (__v8hi)(B), \
+                                          (int)(I), \
+                                          (__v8hi)(S), \
+                                          (__mmask8)(U)); })
+
+#define _mm128_maskz_shrdi_epi16(U, A, B, I) \
+  _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
+
+#define _mm128_shrdi_epi16(A, B, I) \
+  _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
+              (__v4di) __A,
+              (__v4di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
+              (__v2di) __A,
+              (__v2di) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
+              (__v16hi) __A,
+              (__v16hi) __B,
+              (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
+              (__v8hi) __A,
+              (__v8hi) __B,
+              (__mmask8) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512vlvnniintrin.h b/lib/Headers/avx512vlvnniintrin.h
new file mode 100644
index 0000000..745ae8b
--- /dev/null
+++ b/lib/Headers/avx512vlvnniintrin.h
@@ -0,0 +1,254 @@
+/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLVNNIINTRIN_H
+#define __AVX512VLVNNIINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512vnniintrin.h b/lib/Headers/avx512vnniintrin.h
new file mode 100644
index 0000000..0c6badd
--- /dev/null
+++ b/lib/Headers/avx512vnniintrin.h
@@ -0,0 +1,146 @@
+/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VNNIINTRIN_H
+#define __AVX512VNNIINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
+              (__v16si) __A,
+              (__v16si) __B,
+              (__mmask16) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512vpopcntdqvlintrin.h b/lib/Headers/avx512vpopcntdqvlintrin.h
new file mode 100644
index 0000000..c2058a8
--- /dev/null
+++ b/lib/Headers/avx512vpopcntdqvlintrin.h
@@ -0,0 +1,99 @@
+/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
+ *------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VPOPCNTDQVLINTRIN_H
+#define __AVX512VPOPCNTDQVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl")))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
+  return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
+  return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
+  return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
+  return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index dff5897..27dc644 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -1120,7 +1120,7 @@
 /// \param A
 ///    A 256-bit vector of [8 x float].
 /// \param C
-///    An immediate integer operand specifying how the values are to be \n
+///    An immediate integer operand specifying how the values are to be
 ///    copied. \n
 ///    Bits [1:0]: \n
 ///      00: Bits [31:0] of the source are copied to bits [31:0] of the
@@ -1150,7 +1150,7 @@
 ///      11: Bits [127:96] of the source are copied to bits [95:64] of the
 ///          returned vector. \n
 ///    Bits [7:6]: \n
-///      00: Bits [31:qq0] of the source are copied to bits [127:96] of the
+///      00: Bits [31:0] of the source are copied to bits [127:96] of the
 ///          returned vector. \n
 ///      01: Bits [63:32] of the source are copied to bits [127:96] of the
 ///          returned vector. \n
@@ -1665,38 +1665,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
 #define _mm_cmp_pd(a, b, c) __extension__ ({ \
   (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
@@ -1725,38 +1725,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
 #define _mm_cmp_ps(a, b, c) __extension__ ({ \
   (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
@@ -1785,38 +1785,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 256-bit vector of [4 x double] containing the comparison results.
 #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
   (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
@@ -1845,38 +1845,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 256-bit vector of [8 x float] containing the comparison results.
 #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
   (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
@@ -1904,38 +1904,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
 #define _mm_cmp_sd(a, b, c) __extension__ ({ \
   (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
@@ -1963,38 +1963,38 @@
 /// \param c
 ///    An immediate integer operand, with bits [4:0] specifying which comparison
 ///    operation to use: \n
-///    0x00 : Equal (ordered, non-signaling)
-///    0x01 : Less-than (ordered, signaling)
-///    0x02 : Less-than-or-equal (ordered, signaling)
-///    0x03 : Unordered (non-signaling)
-///    0x04 : Not-equal (unordered, non-signaling)
-///    0x05 : Not-less-than (unordered, signaling)
-///    0x06 : Not-less-than-or-equal (unordered, signaling)
-///    0x07 : Ordered (non-signaling)
-///    0x08 : Equal (unordered, non-signaling)
-///    0x09 : Not-greater-than-or-equal (unordered, signaling)
-///    0x0a : Not-greater-than (unordered, signaling)
-///    0x0b : False (ordered, non-signaling)
-///    0x0c : Not-equal (ordered, non-signaling)
-///    0x0d : Greater-than-or-equal (ordered, signaling)
-///    0x0e : Greater-than (ordered, signaling)
-///    0x0f : True (unordered, non-signaling)
-///    0x10 : Equal (ordered, signaling)
-///    0x11 : Less-than (ordered, non-signaling)
-///    0x12 : Less-than-or-equal (ordered, non-signaling)
-///    0x13 : Unordered (signaling)
-///    0x14 : Not-equal (unordered, signaling)
-///    0x15 : Not-less-than (unordered, non-signaling)
-///    0x16 : Not-less-than-or-equal (unordered, non-signaling)
-///    0x17 : Ordered (signaling)
-///    0x18 : Equal (unordered, signaling)
-///    0x19 : Not-greater-than-or-equal (unordered, non-signaling)
-///    0x1a : Not-greater-than (unordered, non-signaling)
-///    0x1b : False (ordered, signaling)
-///    0x1c : Not-equal (ordered, signaling)
-///    0x1d : Greater-than-or-equal (ordered, non-signaling)
-///    0x1e : Greater-than (ordered, non-signaling)
-///    0x1f : True (unordered, signaling)
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
 #define _mm_cmp_ss(a, b, c) __extension__ ({ \
   (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
@@ -2375,9 +2375,8 @@
 }
 
 /* Vector replicate */
-/// \brief Moves and duplicates high-order (odd-indexed) values from a 256-bit
-///    vector of [8 x float] to float values in a 256-bit vector of
-///    [8 x float].
+/// \brief Moves and duplicates odd-indexed values from a 256-bit vector of
+///    [8 x float] to float values in a 256-bit vector of [8 x float].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2401,8 +2400,8 @@
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
 }
 
-/// \brief Moves and duplicates low-order (even-indexed) values from a 256-bit
-///    vector of [8 x float] to float values in a 256-bit vector of [8 x float].
+/// \brief Moves and duplicates even-indexed values from a 256-bit vector of
+///    [8 x float] to float values in a 256-bit vector of [8 x float].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2998,7 +2997,7 @@
   return __builtin_ia32_movmskpd256((__v4df)__a);
 }
 
-/// \brief Extracts the sign bits of double-precision floating point elements
+/// \brief Extracts the sign bits of single-precision floating point elements
 ///    in a 256-bit vector of [8 x float] and writes them to the lower order
 ///    bits of the return value.
 ///
@@ -3007,7 +3006,7 @@
 /// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.
 ///
 /// \param __a
-///    A 256-bit vector of [8 x float] containing the double-precision floating
+///    A 256-bit vector of [8 x float] containing the single-precision floating
 ///    point values with sign bits to be extracted.
 /// \returns The sign bits from the operand, written to bits [7:0].
 static __inline int __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/cetintrin.h b/lib/Headers/cetintrin.h
new file mode 100644
index 0000000..b9e9801
--- /dev/null
+++ b/lib/Headers/cetintrin.h
@@ -0,0 +1,113 @@
+/*===---- cetintrin.h - CET intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <cetintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __CETINTRIN_H
+#define __CETINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("shstk")))
+
+static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {
+  __builtin_ia32_incsspd(__a);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {
+  __builtin_ia32_incsspq(__a);
+}
+#endif /* __x86_64__ */
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
+  __builtin_ia32_incsspq(__a);
+}
+#else /* __x86_64__ */
+static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
+  __builtin_ia32_incsspd((int)__a);
+}
+#endif /* __x86_64__ */
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {
+  return __builtin_ia32_rdsspd(__a);
+}
+
+#ifdef __x86_64__
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {
+  return __builtin_ia32_rdsspq(__a);
+}
+#endif /* __x86_64__ */
+
+#ifdef __x86_64__
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {
+  return __builtin_ia32_rdsspq(0);
+}
+#else /* __x86_64__ */
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {
+  return __builtin_ia32_rdsspd(0);
+}
+#endif /* __x86_64__ */
+
+static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {
+  __builtin_ia32_saveprevssp();
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {
+  __builtin_ia32_rstorssp(__p);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {
+  __builtin_ia32_wrssd(__a, __p);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {
+  __builtin_ia32_wrssq(__a, __p);
+}
+#endif /* __x86_64__ */
+
+static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {
+  __builtin_ia32_wrussd(__a, __p);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {
+  __builtin_ia32_wrussq(__a, __p);
+}
+#endif /* __x86_64__ */
+
+static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {
+  __builtin_ia32_setssbsy();
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {
+  __builtin_ia32_clrssbsy(__p);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __CETINTRIN_H */
diff --git a/lib/Headers/cpuid.h b/lib/Headers/cpuid.h
index 2dd0add..a41c311 100644
--- a/lib/Headers/cpuid.h
+++ b/lib/Headers/cpuid.h
@@ -166,23 +166,31 @@
 #define bit_CLFLUSHOPT  0x00800000
 #define bit_CLWB        0x01000000
 #define bit_AVX512PF    0x04000000
-#define bit_AVX51SER    0x08000000
+#define bit_AVX512ER    0x08000000
 #define bit_AVX512CD    0x10000000
 #define bit_SHA         0x20000000
 #define bit_AVX512BW    0x40000000
 #define bit_AVX512VL    0x80000000
 
 /* Features in %ecx for leaf 7 sub-leaf 0 */
-#define bit_PREFTCHWT1  0x00000001
-#define bit_AVX512VBMI  0x00000002
-#define bit_PKU         0x00000004
-#define bit_OSPKE       0x00000010
+#define bit_PREFTCHWT1       0x00000001
+#define bit_AVX512VBMI       0x00000002
+#define bit_PKU              0x00000004
+#define bit_OSPKE            0x00000010
+#define bit_AVX512VBMI2      0x00000040
+#define bit_SHSTK            0x00000080
+#define bit_GFNI             0x00000100
+#define bit_VAES             0x00000200
+#define bit_VPCLMULQDQ       0x00000400
+#define bit_AVX512VNNI       0x00000800
+#define bit_AVX512BITALG     0x00001000
 #define bit_AVX512VPOPCNTDQ  0x00004000
-#define bit_RDPID       0x00400000
+#define bit_RDPID            0x00400000
 
 /* Features in %edx for leaf 7 sub-leaf 0 */
 #define bit_AVX5124VNNIW  0x00000004
 #define bit_AVX5124FMAPS  0x00000008
+#define bit_IBT           0x00100000
 
 /* Features in %eax for leaf 13 sub-leaf 1 */
 #define bit_XSAVEOPT    0x00000001
@@ -192,6 +200,7 @@
 /* Features in %ecx for leaf 0x80000001 */
 #define bit_LAHF_LM     0x00000001
 #define bit_ABM         0x00000020
+#define bit_LZCNT       bit_ABM        /* for gcc compat */
 #define bit_SSE4a       0x00000040
 #define bit_PRFCHW      0x00000100
 #define bit_XOP         0x00000800
diff --git a/lib/Headers/cuda_wrappers/algorithm b/lib/Headers/cuda_wrappers/algorithm
index 95d9beb..cedd707 100644
--- a/lib/Headers/cuda_wrappers/algorithm
+++ b/lib/Headers/cuda_wrappers/algorithm
@@ -80,7 +80,7 @@
 template <class __T>
 inline __device__ const __T &
 min(const __T &__a, const __T &__b) {
-  return __a < __b ? __b : __a;
+  return __a < __b ? __a : __b;
 }
 
 #ifdef _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 3372508..9302321 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -217,8 +217,8 @@
 
 /// \brief Calculates the square root of the lower double-precision value of
 ///    the second operand and returns it in the lower 64 bits of the result.
-///    The upper 64 bits of the result are copied from the upper double-
-///    precision value of the first operand.
+///    The upper 64 bits of the result are copied from the upper
+///    double-precision value of the first operand.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -260,8 +260,8 @@
 
 /// \brief Compares lower 64-bit double-precision values of both operands, and
 ///    returns the lesser of the pair of values in the lower 64-bits of the
-///    result. The upper 64 bits of the result are copied from the upper double-
-///    precision value of the first operand.
+///    result. The upper 64 bits of the result are copied from the upper
+///    double-precision value of the first operand.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -304,8 +304,8 @@
 
 /// \brief Compares lower 64-bit double-precision values of both operands, and
 ///    returns the greater of the pair of values in the lower 64-bits of the
-///    result. The upper 64 bits of the result are copied from the upper double-
-///    precision value of the first operand.
+///    result. The upper 64 bits of the result are copied from the upper
+///    double-precision value of the first operand.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -422,8 +422,8 @@
 }
 
 /// \brief Compares each of the corresponding double-precision values of the
-///    128-bit vectors of [2 x double] for equality. Each comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true.
+///    128-bit vectors of [2 x double] for equality. Each comparison yields 0x0
+///    for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -443,7 +443,7 @@
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are less than those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -464,7 +464,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are less than or equal to those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -485,7 +485,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are greater than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -506,7 +506,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are greater than or equal to those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -528,8 +528,8 @@
 ///    operand are ordered with respect to those in the second operand.
 ///
 ///    A pair of double-precision values are "ordered" with respect to each
-///    other if neither value is a NaN. Each comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    other if neither value is a NaN. Each comparison yields 0x0 for false,
+///    0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -551,8 +551,8 @@
 ///    operand are unordered with respect to those in the second operand.
 ///
 ///    A pair of double-precision values are "unordered" with respect to each
-///    other if one or both values are NaN. Each comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    other if one or both values are NaN. Each comparison yields 0x0 for
+///    false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -574,7 +574,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are unequal to those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -595,7 +595,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not less than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -616,7 +616,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not less than or equal to those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -637,7 +637,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not greater than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -658,7 +658,7 @@
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not greater than or equal to those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -678,7 +678,7 @@
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] for equality.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -703,7 +703,7 @@
 ///    the value in the first parameter is less than the corresponding value in
 ///    the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -728,7 +728,7 @@
 ///    the value in the first parameter is less than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -753,7 +753,7 @@
 ///    the value in the first parameter is greater than the corresponding value
 ///    in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -779,7 +779,7 @@
 ///    the value in the first parameter is greater than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -805,8 +805,8 @@
 ///    the value in the first parameter is "ordered" with respect to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
-///    double-precision values are "ordered" with respect to each other if
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
+///    of double-precision values are "ordered" with respect to each other if
 ///    neither value is a NaN.
 ///
 /// \headerfile <x86intrin.h>
@@ -832,9 +832,9 @@
 ///    the value in the first parameter is "unordered" with respect to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
-///    double-precision values are "unordered" with respect to each other if one
-///    or both values are NaN.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair
+///    of double-precision values are "unordered" with respect to each other if
+///    one or both values are NaN.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -860,7 +860,7 @@
 ///    the value in the first parameter is unequal to the corresponding value in
 ///    the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -885,7 +885,7 @@
 ///    the value in the first parameter is not less than the corresponding
 ///    value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -910,7 +910,7 @@
 ///    the value in the first parameter is not less than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -935,7 +935,7 @@
 ///    the value in the first parameter is not greater than the corresponding
 ///    value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -961,7 +961,7 @@
 ///    the value in the first parameter is not greater than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -983,8 +983,10 @@
 }
 
 /// \brief Compares the lower double-precision floating-point values in each of
-///    the two 128-bit floating-point vectors of [2 x double] for equality. The
-///    comparison yields 0 for false, 1 for true.
+///    the two 128-bit floating-point vectors of [2 x double] for equality.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -996,7 +998,8 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_sd(__m128d __a, __m128d __b)
 {
@@ -1008,7 +1011,8 @@
 ///    the value in the first parameter is less than the corresponding value in
 ///    the second parameter.
 ///
-///    The comparison yields 0 for false, 1 for true.
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1020,7 +1024,8 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_sd(__m128d __a, __m128d __b)
 {
@@ -1032,7 +1037,8 @@
 ///    the value in the first parameter is less than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0 for false, 1 for true.
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1044,7 +1050,8 @@
 /// \param __b
 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
 ///     compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_sd(__m128d __a, __m128d __b)
 {
@@ -1056,7 +1063,8 @@
 ///    the value in the first parameter is greater than the corresponding value
 ///    in the second parameter.
 ///
-///    The comparison yields 0 for false, 1 for true.
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1068,7 +1076,8 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_sd(__m128d __a, __m128d __b)
 {
@@ -1080,7 +1089,8 @@
 ///    the value in the first parameter is greater than or equal to the
 ///    corresponding value in the second parameter.
 ///
-///    The comparison yields 0 for false, 1 for true.
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1092,7 +1102,8 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_sd(__m128d __a, __m128d __b)
 {
@@ -1104,7 +1115,8 @@
 ///    the value in the first parameter is unequal to the corresponding value in
 ///    the second parameter.
 ///
-///    The comparison yields 0 for false, 1 for true.
+///    The comparison yields 0 for false, 1 for true. If either of the two
+///    lower double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1116,7 +1128,8 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower double-precision values is NaN, 1 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_sd(__m128d __a, __m128d __b)
 {
@@ -1127,7 +1140,7 @@
 ///    the two 128-bit floating-point vectors of [2 x double] for equality. The
 ///    comparison yields 0 for false, 1 for true.
 ///
-///    If either of the two lower double-precision values is NaN, 1 is returned.
+///    If either of the two lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1140,7 +1153,7 @@
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
 /// \returns An integer containing the comparison results. If either of the two
-///    lower double-precision values is NaN, 1 is returned.
+///    lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_sd(__m128d __a, __m128d __b)
 {
@@ -1153,7 +1166,7 @@
 ///    the second parameter.
 ///
 ///    The comparison yields 0 for false, 1 for true. If either of the two lower
-///    double-precision values is NaN, 1 is returned.
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1166,7 +1179,7 @@
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
 /// \returns An integer containing the comparison results. If either of the two
-///    lower double-precision values is NaN, 1 is returned.
+///    lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_sd(__m128d __a, __m128d __b)
 {
@@ -1179,7 +1192,7 @@
 ///    corresponding value in the second parameter.
 ///
 ///    The comparison yields 0 for false, 1 for true. If either of the two lower
-///    double-precision values is NaN, 1 is returned.
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1192,7 +1205,7 @@
 ///     A 128-bit vector of [2 x double]. The lower double-precision value is
 ///     compared to the lower double-precision value of \a __a.
 /// \returns An integer containing the comparison results. If either of the two
-///     lower double-precision values is NaN, 1 is returned.
+///     lower double-precision values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_sd(__m128d __a, __m128d __b)
 {
@@ -1257,7 +1270,7 @@
 ///    the second parameter.
 ///
 ///    The comparison yields 0 for false, 1 for true. If either of the two lower
-///    double-precision values is NaN, 0 is returned.
+///    double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1270,7 +1283,7 @@
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
 ///    compared to the lower double-precision value of \a __a.
 /// \returns An integer containing the comparison result. If either of the two
-///    lower double-precision values is NaN, 0 is returned.
+///    lower double-precision values is NaN, 1 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_sd(__m128d __a, __m128d __b)
 {
@@ -1935,14 +1948,15 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
+/// This intrinsic corresponds to the
+///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
 ///
 /// \param __dp
 ///    A pointer to a memory location that can store two double-precision
 ///    values.
 /// \param __a
 ///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
-///    of the values in \a dp.
+///    of the values in \a __dp.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
@@ -1950,18 +1964,20 @@
   _mm_store_pd(__dp, __a);
 }
 
-/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
-///    location.
+/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
+///    the upper and lower 64 bits of a memory location.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
+/// This intrinsic corresponds to the
+///   <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
 ///
 /// \param __dp
-///    A pointer to a 128-bit memory location. The address of the memory
-///    location has to be 16-byte aligned.
+///    A pointer to a memory location that can store two double-precision
+///    values.
 /// \param __a
-///    A 128-bit vector of [2 x double] containing the values to be stored.
+///    A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
+///    of the values in \a __dp.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_pd1(double *__dp, __m128d __a)
 {
@@ -2158,8 +2174,8 @@
 
 /// \brief Adds, with saturation, the corresponding elements of two 128-bit
 ///    signed [16 x i8] vectors, saving each sum in the corresponding element of
-///    a 128-bit result vector of [16 x i8]. Positive sums greater than 7Fh are
-///    saturated to 7Fh. Negative sums less than 80h are saturated to 80h.
+///    a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are
+///    saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2179,9 +2195,9 @@
 
 /// \brief Adds, with saturation, the corresponding elements of two 128-bit
 ///    signed [8 x i16] vectors, saving each sum in the corresponding element of
-///    a 128-bit result vector of [8 x i16]. Positive sums greater than 7FFFh
-///    are saturated to 7FFFh. Negative sums less than 8000h are saturated to
-///    8000h.
+///    a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF
+///    are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
+///    0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2201,8 +2217,8 @@
 
 /// \brief Adds, with saturation, the corresponding elements of two 128-bit
 ///    unsigned [16 x i8] vectors, saving each sum in the corresponding element
-///    of a 128-bit result vector of [16 x i8]. Positive sums greater than FFh
-///    are saturated to FFh. Negative sums are saturated to 00h.
+///    of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF
+///    are saturated to 0xFF. Negative sums are saturated to 0x00.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2222,8 +2238,8 @@
 
 /// \brief Adds, with saturation, the corresponding elements of two 128-bit
 ///    unsigned [8 x i16] vectors, saving each sum in the corresponding element
-///    of a 128-bit result vector of [8 x i16]. Positive sums greater than FFFFh
-///    are saturated to FFFFh. Negative sums are saturated to 0000h.
+///    of a 128-bit result vector of [8 x i16]. Positive sums greater than
+///    0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2608,8 +2624,8 @@
 
 /// \brief Subtracts corresponding 8-bit signed integer values in the input and
 ///    returns the differences in the corresponding bytes in the destination.
-///    Differences greater than 7Fh are saturated to 7Fh, and differences less
-///    than 80h are saturated to 80h.
+///    Differences greater than 0x7F are saturated to 0x7F, and differences less
+///    than 0x80 are saturated to 0x80.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2629,8 +2645,8 @@
 
 /// \brief Subtracts corresponding 16-bit signed integer values in the input and
 ///    returns the differences in the corresponding bytes in the destination.
-///    Differences greater than 7FFFh are saturated to 7FFFh, and values less
-///    than 8000h are saturated to 8000h.
+///    Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
+///    than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2650,7 +2666,7 @@
 
 /// \brief Subtracts corresponding 8-bit unsigned integer values in the input
 ///    and returns the differences in the corresponding bytes in the
-///    destination. Differences less than 00h are saturated to 00h.
+///    destination. Differences less than 0x00 are saturated to 0x00.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2670,7 +2686,7 @@
 
 /// \brief Subtracts corresponding 16-bit unsigned integer values in the input
 ///    and returns the differences in the corresponding bytes in the
-///    destination. Differences less than 0000h are saturated to 0000h.
+///    destination. Differences less than 0x0000 are saturated to 0x0000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3152,7 +3168,7 @@
 }
 
 /// \brief Compares each of the corresponding 8-bit values of the 128-bit
-///    integer vectors for equality. Each comparison yields 0h for false, FFh
+///    integer vectors for equality. Each comparison yields 0x0 for false, 0xFF
 ///    for true.
 ///
 /// \headerfile <x86intrin.h>
@@ -3171,8 +3187,8 @@
 }
 
 /// \brief Compares each of the corresponding 16-bit values of the 128-bit
-///    integer vectors for equality. Each comparison yields 0h for false, FFFFh
-///    for true.
+///    integer vectors for equality. Each comparison yields 0x0 for false,
+///    0xFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3190,8 +3206,8 @@
 }
 
 /// \brief Compares each of the corresponding 32-bit values of the 128-bit
-///    integer vectors for equality. Each comparison yields 0h for false,
-///    FFFFFFFFh for true.
+///    integer vectors for equality. Each comparison yields 0x0 for false,
+///    0xFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3210,8 +3226,8 @@
 
 /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
 ///    integer vectors to determine if the values in the first operand are
-///    greater than those in the second operand. Each comparison yields 0h for
-///    false, FFh for true.
+///    greater than those in the second operand. Each comparison yields 0x0 for
+///    false, 0xFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3234,7 +3250,7 @@
 ///    128-bit integer vectors to determine if the values in the first operand
 ///    are greater than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3255,7 +3271,7 @@
 ///    128-bit integer vectors to determine if the values in the first operand
 ///    are greater than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3276,7 +3292,7 @@
 ///    integer vectors to determine if the values in the first operand are less
 ///    than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFh for true.
+///    Each comparison yields 0x0 for false, 0xFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3297,7 +3313,7 @@
 ///    128-bit integer vectors to determine if the values in the first operand
 ///    are less than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3318,7 +3334,7 @@
 ///    128-bit integer vectors to determine if the values in the first operand
 ///    are less than those in the second operand.
 ///
-///    Each comparison yields 0h for false, FFFFFFFFh for true.
+///    Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3846,8 +3862,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
-///   instruction.
+/// This intrinsic does not correspond to a specific instruction.
 ///
 /// \param __q0
 ///    A 64-bit integral value used to initialize the lower 64 bits of the
@@ -4018,7 +4033,7 @@
 ///    specified unaligned memory location. When a mask bit is 1, the
 ///    corresponding byte is written, otherwise it is not written.
 ///
-///    To minimize caching, the date is flagged as non-temporal (unlikely to be
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon). Exception and trap behavior for elements not selected
 ///    for storage to memory are implementation dependent.
 ///
@@ -4532,8 +4547,8 @@
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
 }
 
-/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
-///    of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
+/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
+///    [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -4665,7 +4680,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic has no corresponding instruction.
+/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
 ///
 /// \param __a
 ///    A 128-bit integer vector operand. The lower 64 bits are moved to the
@@ -4682,7 +4697,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
+/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit value.
@@ -4712,8 +4727,8 @@
   return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
 }
 
-/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
-///    of [2 x double] and interleaves them into a 128-bit vector of [2 x
+/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
+///    [2 x double] and interleaves them into a 128-bit vector of [2 x
 ///    double].
 ///
 /// \headerfile <x86intrin.h>
@@ -4733,7 +4748,7 @@
   return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
 }
 
-/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
+/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors
 ///    of [2 x double] and interleaves them into a 128-bit vector of [2 x
 ///    double].
 ///
@@ -4792,9 +4807,9 @@
 ///    A 128-bit vector of [2 x double].
 /// \param i
 ///    An 8-bit immediate value. The least significant two bits specify which
-///    elements to copy from a and b: \n
-///    Bit[0] = 0: lower element of a copied to lower element of result. \n
-///    Bit[0] = 1: upper element of a copied to lower element of result. \n
+///    elements to copy from \a a and \a b: \n
+///    Bit[0] = 0: lower element of \a a copied to lower element of result. \n
+///    Bit[0] = 1: upper element of \a a copied to lower element of result. \n
 ///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
 ///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
 /// \returns A 128-bit vector of [2 x double] containing the shuffled values.
diff --git a/lib/Headers/fma4intrin.h b/lib/Headers/fma4intrin.h
index 11aa8ce..962b1a6 100644
--- a/lib/Headers/fma4intrin.h
+++ b/lib/Headers/fma4intrin.h
@@ -60,73 +60,73 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -144,13 +144,13 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -168,37 +168,37 @@
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -216,13 +216,13 @@
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h
index 0e2ef0b..478a0ac 100644
--- a/lib/Headers/fmaintrin.h
+++ b/lib/Headers/fmaintrin.h
@@ -46,85 +46,85 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -142,13 +142,13 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -166,37 +166,37 @@
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -214,13 +214,13 @@
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/gfniintrin.h b/lib/Headers/gfniintrin.h
new file mode 100644
index 0000000..20fadcc
--- /dev/null
+++ b/lib/Headers/gfniintrin.h
@@ -0,0 +1,202 @@
+/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __GFNIINTRIN_H
+#define __GFNIINTRIN_H
+
+
+#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                   \
+  (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A),          \
+                                                  (__v16qi)(__m128i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({        \
+  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U),                             \
+        (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),                          \
+        (__v16qi)(__m128i)(S)); })
+
+
+#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({          \
+  (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),       \
+        U, A, B, I); })
+
+
+#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                \
+  (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),          \
+                                                  (__v32qi)(__m256i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({     \
+   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),                            \
+        (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),                       \
+        (__v32qi)(__m256i)(S)); })
+
+#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({       \
+  (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
+        U, A, B, I); })
+
+
+#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                \
+  (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),          \
+                                                  (__v64qi)(__m512i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({     \
+   (__m512i)__builtin_ia32_selectb_512((__mmask64)(U),                            \
+        (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I),                       \
+        (__v64qi)(__m512i)(S)); })
+
+#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({       \
+  (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(),    \
+        U, A, B, I); })
+
+#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                      \
+  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A),             \
+                                                  (__v16qi)(__m128i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({           \
+  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U),                             \
+        (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I),                             \
+        (__v16qi)(__m128i)(S)); })
+
+
+#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({             \
+  (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(),          \
+        U, A, B, I); })
+
+
+#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                   \
+  (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A),             \
+                                                  (__v32qi)(__m256i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({        \
+   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),                            \
+        (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I),                          \
+        (__v32qi)(__m256i)(S)); })
+
+#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({          \
+  (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(),    \
+        U, A, B, I); })
+
+
+#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                   \
+  (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A),             \
+                                                  (__v64qi)(__m512i)(B),          \
+                                                  (char)(I)); })
+
+#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({        \
+   (__m512i)__builtin_ia32_selectb_512((__mmask64)(U),                            \
+        (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),                          \
+        (__v64qi)(__m512i)(S)); })
+
+#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({          \
+  (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(),       \
+        U, A, B, I); })
+
+/* Default attributes for simple form (no masking). */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni")))
+
+/* Default attributes for ZMM forms. */
+#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni")))
+
+/* Default attributes for VLX forms. */
+#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni")))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
+              (__v16qi) __B);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
+_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_selectb_128(__U,
+              (__v16qi) _mm_gf2p8mul_epi8(__A, __B),
+              (__v16qi) __S);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
+_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
+{
+  return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
+              __U, __A, __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
+              (__v32qi) __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
+_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_selectb_256(__U,
+              (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
+              (__v32qi) __S);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
+_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
+{
+  return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
+              __U, __A, __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
+              (__v64qi) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_selectb_512(__U,
+              (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
+              (__v64qi) __S);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(),
+              __U, __A, __B);
+}
+
+#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_F
+#undef __DEFAULT_FN_ATTRS_VL
+
+#endif // __GFNIINTRIN_H
+
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index d86e0ef..a332879 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -118,6 +118,10 @@
 }
 #endif /* __AVX2__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
+#include <vpclmulqdqintrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
 #include <bmiintrin.h>
 #endif
@@ -146,6 +150,10 @@
 #include <avx512bwintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
+#include <avx512bitalgintrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
 #include <avx512cdintrin.h>
 #endif
@@ -154,11 +162,30 @@
 #include <avx512vpopcntdqintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
+#include <avx512vpopcntdqvlintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
+#include <avx512vnniintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512VNNI__))
+#include <avx512vlvnniintrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 #include <avx512dqintrin.h>
 #endif
 
 #if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512BITALG__))
+#include <avx512vlbitalgintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
     (defined(__AVX512VL__) && defined(__AVX512BW__))
 #include <avx512vlbwintrin.h>
 #endif
@@ -195,6 +222,15 @@
 #include <avx512vbmivlintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
+#include <avx512vbmi2intrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
+#include <avx512vlvbmi2intrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
 #include <avx512pfintrin.h>
 #endif
@@ -203,6 +239,26 @@
 #include <pkuintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
+#include <vaesintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
+#include <gfniintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
+/// \brief Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> RDPID </c> instruction.
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
+_rdpid_u32(void) {
+  return __builtin_ia32_rdpid();
+}
+#endif // __RDPID__
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand16_step(unsigned short *__p)
@@ -319,6 +375,10 @@
 #include <xsavesintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
+#include <cetintrin.h>
+#endif
+
 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
  * whereas others are also available at all times. */
 #include <adxintrin.h>
diff --git a/lib/Headers/module.modulemap b/lib/Headers/module.modulemap
index 95d26ce..961d8dd 100644
--- a/lib/Headers/module.modulemap
+++ b/lib/Headers/module.modulemap
@@ -38,6 +38,7 @@
     explicit module neon {
       requires neon
       header "arm_neon.h"
+      header "arm_fp16.h"
       export *
     }
   }
diff --git a/lib/Headers/opencl-c.h b/lib/Headers/opencl-c.h
index 35fb0a8..e648b0f 100644
--- a/lib/Headers/opencl-c.h
+++ b/lib/Headers/opencl-c.h
@@ -12862,7 +12862,7 @@
  * Read memory barrier that orders only
  * loads.
  * The flags argument specifies the memory
- * address space and can be set to to a
+ * address space and can be set to a
  * combination of the following literal
  * values:
  * CLK_LOCAL_MEM_FENCE
@@ -12874,7 +12874,7 @@
  * Write memory barrier that orders only
  * stores.
  * The flags argument specifies the memory
- * address space and can be set to to a
+ * address space and can be set to a
  * combination of the following literal
  * values:
  * CLK_LOCAL_MEM_FENCE
@@ -15421,8 +15421,8 @@
 #define CLK_DEPTH_STENCIL     0x10BE
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #define CLK_sRGB              0x10BF
-#define CLK_sRGBA             0x10C1
 #define CLK_sRGBx             0x10C0
+#define CLK_sRGBA             0x10C1
 #define CLK_sBGRA             0x10C2
 #define CLK_ABGR              0x10C3
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@@ -15886,6 +15886,313 @@
 
 #endif //cl_khr_subgroups cl_intel_subgroups
 
+#if defined(cl_intel_subgroups)
+// Intel-Specific Sub Group Functions
+float   __ovld __conv intel_sub_group_shuffle( float  x, uint c );
+float2  __ovld __conv intel_sub_group_shuffle( float2 x, uint c );
+float3  __ovld __conv intel_sub_group_shuffle( float3 x, uint c );
+float4  __ovld __conv intel_sub_group_shuffle( float4 x, uint c );
+float8  __ovld __conv intel_sub_group_shuffle( float8 x, uint c );
+float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );
+
+int     __ovld __conv intel_sub_group_shuffle( int  x, uint c );
+int2    __ovld __conv intel_sub_group_shuffle( int2 x, uint c );
+int3    __ovld __conv intel_sub_group_shuffle( int3 x, uint c );
+int4    __ovld __conv intel_sub_group_shuffle( int4 x, uint c );
+int8    __ovld __conv intel_sub_group_shuffle( int8 x, uint c );
+int16   __ovld __conv intel_sub_group_shuffle( int16 x, uint c );
+
+uint    __ovld __conv intel_sub_group_shuffle( uint  x, uint c );
+uint2   __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );
+uint3   __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );
+uint4   __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );
+uint8   __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );
+uint16  __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );
+
+long    __ovld __conv intel_sub_group_shuffle( long x, uint c );
+ulong   __ovld __conv intel_sub_group_shuffle( ulong x, uint c );
+
+float   __ovld __conv intel_sub_group_shuffle_down( float  cur, float  next, uint c );
+float2  __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );
+float3  __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );
+float4  __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );
+float8  __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );
+float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );
+
+int     __ovld __conv intel_sub_group_shuffle_down( int  cur, int  next, uint c );
+int2    __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );
+int3    __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );
+int4    __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );
+int8    __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );
+int16   __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );
+
+uint    __ovld __conv intel_sub_group_shuffle_down( uint  cur, uint  next, uint c );
+uint2   __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );
+uint3   __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );
+uint4   __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );
+uint8   __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );
+uint16  __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );
+
+long    __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );
+ulong   __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );
+
+float   __ovld __conv intel_sub_group_shuffle_up( float  prev, float  cur, uint c );
+float2  __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );
+float3  __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );
+float4  __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );
+float8  __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );
+float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );
+
+int     __ovld __conv intel_sub_group_shuffle_up( int  prev, int  cur, uint c );
+int2    __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );
+int3    __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );
+int4    __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );
+int8    __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );
+int16   __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );
+
+uint    __ovld __conv intel_sub_group_shuffle_up( uint  prev, uint  cur, uint c );
+uint2   __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );
+uint3   __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );
+uint4   __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );
+uint8   __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );
+uint16  __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );
+
+long    __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );
+ulong   __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );
+
+float   __ovld __conv intel_sub_group_shuffle_xor( float  x, uint c );
+float2  __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );
+float3  __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );
+float4  __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );
+float8  __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );
+float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );
+
+int     __ovld __conv intel_sub_group_shuffle_xor( int  x, uint c );
+int2    __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );
+int3    __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );
+int4    __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );
+int8    __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );
+int16   __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );
+
+uint    __ovld __conv intel_sub_group_shuffle_xor( uint  x, uint c );
+uint2   __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );
+uint3   __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );
+uint4   __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );
+uint8   __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );
+uint16  __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
+
+long    __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
+ulong   __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
+
+uint    __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
+uint2   __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
+uint4   __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
+uint8   __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+uint    __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
+uint2   __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
+uint4   __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
+uint8   __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+uint    __ovld __conv intel_sub_group_block_read( const __global uint* p );
+uint2   __ovld __conv intel_sub_group_block_read2( const __global uint* p );
+uint4   __ovld __conv intel_sub_group_block_read4( const __global uint* p );
+uint8   __ovld __conv intel_sub_group_block_read8( const __global uint* p );
+
+void    __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
+void    __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
+void    __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
+void    __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+void    __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
+void    __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
+void    __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
+void    __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+void    __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
+void    __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
+void    __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );
+void    __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );
+
+#ifdef cl_khr_fp16
+half    __ovld __conv intel_sub_group_shuffle( half x, uint c );
+half    __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );
+half    __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
+half    __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
+#endif
+
+#if defined(cl_khr_fp64)
+double  __ovld __conv intel_sub_group_shuffle( double x, uint c );
+double  __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
+double  __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
+double  __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );
+#endif
+
+#endif //cl_intel_subgroups
+
+#if defined(cl_intel_subgroups_short)
+short       __ovld __conv intel_sub_group_broadcast( short  x, uint sub_group_local_id );
+short2      __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );
+short3      __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );
+short4      __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );
+short8      __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );
+
+ushort      __ovld __conv intel_sub_group_broadcast( ushort  x, uint sub_group_local_id );
+ushort2     __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );
+ushort3     __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );
+ushort4     __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );
+ushort8     __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );
+
+short       __ovld __conv intel_sub_group_shuffle( short   x, uint c );
+short2      __ovld __conv intel_sub_group_shuffle( short2  x, uint c );
+short3      __ovld __conv intel_sub_group_shuffle( short3  x, uint c );
+short4      __ovld __conv intel_sub_group_shuffle( short4  x, uint c );
+short8      __ovld __conv intel_sub_group_shuffle( short8  x, uint c );
+short16     __ovld __conv intel_sub_group_shuffle( short16 x, uint c);
+
+ushort      __ovld __conv intel_sub_group_shuffle( ushort   x, uint c );
+ushort2     __ovld __conv intel_sub_group_shuffle( ushort2  x, uint c );
+ushort3     __ovld __conv intel_sub_group_shuffle( ushort3  x, uint c );
+ushort4     __ovld __conv intel_sub_group_shuffle( ushort4  x, uint c );
+ushort8     __ovld __conv intel_sub_group_shuffle( ushort8  x, uint c );
+ushort16    __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );
+
+short       __ovld __conv intel_sub_group_shuffle_down( short   cur, short   next, uint c );
+short2      __ovld __conv intel_sub_group_shuffle_down( short2  cur, short2  next, uint c );
+short3      __ovld __conv intel_sub_group_shuffle_down( short3  cur, short3  next, uint c );
+short4      __ovld __conv intel_sub_group_shuffle_down( short4  cur, short4  next, uint c );
+short8      __ovld __conv intel_sub_group_shuffle_down( short8  cur, short8  next, uint c );
+short16     __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );
+
+ushort      __ovld __conv intel_sub_group_shuffle_down( ushort   cur, ushort   next, uint c );
+ushort2     __ovld __conv intel_sub_group_shuffle_down( ushort2  cur, ushort2  next, uint c );
+ushort3     __ovld __conv intel_sub_group_shuffle_down( ushort3  cur, ushort3  next, uint c );
+ushort4     __ovld __conv intel_sub_group_shuffle_down( ushort4  cur, ushort4  next, uint c );
+ushort8     __ovld __conv intel_sub_group_shuffle_down( ushort8  cur, ushort8  next, uint c );
+ushort16    __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );
+
+short       __ovld __conv intel_sub_group_shuffle_up( short   cur, short   next, uint c );
+short2      __ovld __conv intel_sub_group_shuffle_up( short2  cur, short2  next, uint c );
+short3      __ovld __conv intel_sub_group_shuffle_up( short3  cur, short3  next, uint c );
+short4      __ovld __conv intel_sub_group_shuffle_up( short4  cur, short4  next, uint c );
+short8      __ovld __conv intel_sub_group_shuffle_up( short8  cur, short8  next, uint c );
+short16     __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );
+
+ushort      __ovld __conv intel_sub_group_shuffle_up( ushort   cur, ushort   next, uint c );
+ushort2     __ovld __conv intel_sub_group_shuffle_up( ushort2  cur, ushort2  next, uint c );
+ushort3     __ovld __conv intel_sub_group_shuffle_up( ushort3  cur, ushort3  next, uint c );
+ushort4     __ovld __conv intel_sub_group_shuffle_up( ushort4  cur, ushort4  next, uint c );
+ushort8     __ovld __conv intel_sub_group_shuffle_up( ushort8  cur, ushort8  next, uint c );
+ushort16    __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );
+
+short       __ovld __conv intel_sub_group_shuffle_xor( short   x, uint c );
+short2      __ovld __conv intel_sub_group_shuffle_xor( short2  x, uint c );
+short3      __ovld __conv intel_sub_group_shuffle_xor( short3  x, uint c );
+short4      __ovld __conv intel_sub_group_shuffle_xor( short4  x, uint c );
+short8      __ovld __conv intel_sub_group_shuffle_xor( short8  x, uint c );
+short16     __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );
+
+ushort      __ovld __conv intel_sub_group_shuffle_xor( ushort   x, uint c );
+ushort2     __ovld __conv intel_sub_group_shuffle_xor( ushort2  x, uint c );
+ushort3     __ovld __conv intel_sub_group_shuffle_xor( ushort3  x, uint c );
+ushort4     __ovld __conv intel_sub_group_shuffle_xor( ushort4  x, uint c );
+ushort8     __ovld __conv intel_sub_group_shuffle_xor( ushort8  x, uint c );
+ushort16    __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );
+
+short       __ovld __conv intel_sub_group_reduce_add( short   x );
+ushort      __ovld __conv intel_sub_group_reduce_add( ushort  x );
+short       __ovld __conv intel_sub_group_reduce_min( short   x );
+ushort      __ovld __conv intel_sub_group_reduce_min( ushort  x );
+short       __ovld __conv intel_sub_group_reduce_max( short   x );
+ushort      __ovld __conv intel_sub_group_reduce_max( ushort  x );
+
+short       __ovld __conv intel_sub_group_scan_exclusive_add( short   x );
+ushort      __ovld __conv intel_sub_group_scan_exclusive_add( ushort  x );
+short       __ovld __conv intel_sub_group_scan_exclusive_min( short   x );
+ushort      __ovld __conv intel_sub_group_scan_exclusive_min( ushort  x );
+short       __ovld __conv intel_sub_group_scan_exclusive_max( short   x );
+ushort      __ovld __conv intel_sub_group_scan_exclusive_max( ushort  x );
+
+short       __ovld __conv intel_sub_group_scan_inclusive_add( short   x );
+ushort      __ovld __conv intel_sub_group_scan_inclusive_add( ushort  x );
+short       __ovld __conv intel_sub_group_scan_inclusive_min( short   x );
+ushort      __ovld __conv intel_sub_group_scan_inclusive_min( ushort  x );
+short       __ovld __conv intel_sub_group_scan_inclusive_max( short   x );
+ushort      __ovld __conv intel_sub_group_scan_inclusive_max( ushort  x );
+
+uint       __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
+uint2      __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
+uint4      __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
+uint8      __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+uint       __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
+uint2      __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
+uint4      __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
+uint8      __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
+uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
+uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
+uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
+
+void       __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
+void       __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
+void       __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
+void       __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+void       __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
+void       __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
+void       __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
+void       __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
+void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
+void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
+void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
+
+ushort      __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
+ushort2     __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
+ushort4     __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
+ushort8     __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+ushort      __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
+ushort2     __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
+ushort4     __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
+ushort8     __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+ushort      __ovld __conv intel_sub_group_block_read_us(  const __global ushort* p );
+ushort2     __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
+ushort4     __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
+ushort8     __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
+
+void        __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort  data);
+void        __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
+void        __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
+void        __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
+
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+void        __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort  data);
+void        __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
+void        __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
+void        __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+
+void        __ovld __conv intel_sub_group_block_write_us(  __global ushort* p, ushort  data );
+void        __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
+void        __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
+void        __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
+#endif // cl_intel_subgroups_short
+
 #ifdef cl_amd_media_ops
 uint __ovld amd_bitalign(uint a, uint b, uint c);
 uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);
diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h
index 559ece2..7ec08a1 100644
--- a/lib/Headers/pmmintrin.h
+++ b/lib/Headers/pmmintrin.h
@@ -115,8 +115,8 @@
   return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
 }
 
-/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
-///    vector of [4 x float] to float values stored in a 128-bit vector of
+/// \brief Moves and duplicates odd-indexed values from a 128-bit vector
+///    of [4 x float] to float values stored in a 128-bit vector of
 ///    [4 x float].
 ///
 /// \headerfile <x86intrin.h>
@@ -137,7 +137,7 @@
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
 }
 
-/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
+/// \brief Duplicates even-indexed values from a 128-bit vector of
 ///    [4 x float] to float values stored in a 128-bit vector of [4 x float].
 ///
 /// \headerfile <x86intrin.h>
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index c2fa5a4..e02775c 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -648,7 +648,7 @@
 ///    input vectors are used as an input for dot product; otherwise that input
 ///    is treated as zero. Bits [1:0] determine which elements of the result
 ///    will receive a copy of the final dot product, with bit [0] corresponding
-///    to the lowest element and bit [3] corresponding to the highest element of
+///    to the lowest element and bit [1] corresponding to the highest element of
 ///    each [2 x double] vector. If a bit is set, the dot product is returned in
 ///    the corresponding element; otherwise that element is set to zero.
 #define _mm_dp_pd(X, Y, M) __extension__ ({\
@@ -866,8 +866,8 @@
 ///      11: Copies the selected bits from \a Y to result bits [127:96]. \n
 ///    Bits[3:0]: If any of these bits are set, the corresponding result
 ///    element is cleared.
-/// \returns A 128-bit vector of [4 x float] containing the copied single-
-///    precision floating point elements from the operands.
+/// \returns A 128-bit vector of [4 x float] containing the copied
+///    single-precision floating point elements from the operands.
 #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
 
 /// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
diff --git a/lib/Headers/stdbool.h b/lib/Headers/stdbool.h
index 0467893..5cb66b5 100644
--- a/lib/Headers/stdbool.h
+++ b/lib/Headers/stdbool.h
@@ -32,12 +32,15 @@
 #define true 1
 #define false 0
 #elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
-/* Define _Bool, bool, false, true as a GNU extension. */
+/* Define _Bool as a GNU extension. */
 #define _Bool bool
+#if __cplusplus < 201103L
+/* For C++98, define bool, false, true as a GNU extension. */
 #define bool  bool
 #define false false
 #define true  true
 #endif
+#endif
 
 #define __bool_true_false_are_defined 1
 
diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h
index 042bfc7..97d6148 100644
--- a/lib/Headers/tmmintrin.h
+++ b/lib/Headers/tmmintrin.h
@@ -276,8 +276,9 @@
 }
 
 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
-///    128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
-///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+///    128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
+///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
+///    0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -300,8 +301,9 @@
 }
 
 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
-///    64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
-///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+///    64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
+///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
+///    0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -417,8 +419,8 @@
 
 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
 ///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
-///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
-///    saturated to 8000h.
+///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
+///    saturated to 0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -442,8 +444,8 @@
 
 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
 ///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
-///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
-///    saturated to 8000h.
+///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
+///    saturated to 0x8000.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Headers/vaesintrin.h b/lib/Headers/vaesintrin.h
new file mode 100644
index 0000000..efbb8a5
--- /dev/null
+++ b/lib/Headers/vaesintrin.h
@@ -0,0 +1,98 @@
+/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VAESINTRIN_H
+#define __VAESINTRIN_H
+
+/* Default attributes for YMM forms. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
+
+/* Default attributes for ZMM forms. */
+#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_aesenc_epi128(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
+              (__v4di) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+ _mm512_aesenc_epi128(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
+              (__v8di) __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_aesdec_epi128(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
+              (__v4di) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+ _mm512_aesdec_epi128(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
+              (__v8di) __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_aesenclast_epi128(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
+              (__v4di) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+ _mm512_aesenclast_epi128(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
+              (__v8di) __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
+              (__v4di) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_F
+ _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
+              (__v8di) __B);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_F
+
+#endif
diff --git a/lib/Headers/vpclmulqdqintrin.h b/lib/Headers/vpclmulqdqintrin.h
new file mode 100644
index 0000000..21cda22
--- /dev/null
+++ b/lib/Headers/vpclmulqdqintrin.h
@@ -0,0 +1,42 @@
+/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VPCLMULQDQINTRIN_H
+#define __VPCLMULQDQINTRIN_H
+
+#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({    \
+  (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A),  \
+                                       (__v4di)(__m256i)(B),  \
+                                       (char)(I)); })
+
+#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({    \
+  (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A),  \
+                                       (__v8di)(__m512i)(B),  \
+                                       (char)(I)); })
+
+#endif // __VPCLMULQDQINTRIN_H
+
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index bbc2117..5e8901c 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -1011,6 +1011,8 @@
 /// \brief Compares two 32-bit float values in the low-order bits of both
 ///    operands for equality and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>
@@ -1022,7 +1024,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the
+///    two lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_ss(__m128 __a, __m128 __b)
 {
@@ -1033,6 +1036,8 @@
 ///    operands to determine if the first operand is less than the second
 ///    operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>
@@ -1044,7 +1049,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_ss(__m128 __a, __m128 __b)
 {
@@ -1055,6 +1061,8 @@
 ///    operands to determine if the first operand is less than or equal to the
 ///    second operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.
@@ -1065,7 +1073,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_ss(__m128 __a, __m128 __b)
 {
@@ -1076,6 +1085,8 @@
 ///    operands to determine if the first operand is greater than the second
 ///    operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.
@@ -1086,7 +1097,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the
+///     two lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_ss(__m128 __a, __m128 __b)
 {
@@ -1097,6 +1109,8 @@
 ///    operands to determine if the first operand is greater than or equal to
 ///    the second operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.
@@ -1107,7 +1121,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///    lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_ss(__m128 __a, __m128 __b)
 {
@@ -1118,6 +1133,8 @@
 ///    operands to determine if the first operand is not equal to the second
 ///    operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 1 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.
@@ -1128,7 +1145,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the
+///     two lower 32-bit values is NaN, 1 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_ss(__m128 __a, __m128 __b)
 {
@@ -1139,6 +1157,8 @@
 ///    the low-order bits of both operands to determine equality and returns
 ///    the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1149,7 +1169,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_ss(__m128 __a, __m128 __b)
 {
@@ -1160,6 +1181,8 @@
 ///    the low-order bits of both operands to determine if the first operand is
 ///    less than the second operand and returns the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1170,7 +1193,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///    lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_ss(__m128 __a, __m128 __b)
 {
@@ -1182,6 +1206,8 @@
 ///    less than or equal to the second operand and returns the result of the
 ///    comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1192,7 +1218,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_ss(__m128 __a, __m128 __b)
 {
@@ -1204,6 +1231,8 @@
 ///    greater than the second operand and returns the result of the
 ///    comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1214,7 +1243,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomigt_ss(__m128 __a, __m128 __b)
 {
@@ -1226,6 +1256,8 @@
 ///    greater than or equal to the second operand and returns the result of
 ///    the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 0 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1236,7 +1268,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///     lower 32-bit values is NaN, 0 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomige_ss(__m128 __a, __m128 __b)
 {
@@ -1247,6 +1280,8 @@
 ///    the low-order bits of both operands to determine inequality and returns
 ///    the result of the comparison.
 ///
+///    If either of the two lower 32-bit values is NaN, 1 is returned.
+///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.
@@ -1257,7 +1292,8 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
 ///    used in the comparison.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+///    lower 32-bit values is NaN, 1 is returned.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_ss(__m128 __a, __m128 __b)
 {
@@ -1571,7 +1607,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.
+/// This intrinsic has no corresponding instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
@@ -1667,7 +1703,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVSS / MOVSS + shuffling </c>
+/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>
 ///    instruction.
 ///
 /// \param __p
@@ -1696,7 +1732,7 @@
 /// \param __p
 ///    A pointer to a 128-bit memory location. The address of the memory
 ///    location has to be 128-bit aligned.
-/// \returns A 128-bit vector of [4 x float] containing the loaded valus.
+/// \returns A 128-bit vector of [4 x float] containing the loaded values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
@@ -1888,7 +1924,7 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPEXTRQ / MOVQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.
 ///
 /// \param __p
 ///    A pointer to a 64-bit memory location.
@@ -2035,9 +2071,11 @@
   _mm_store_ps(__p, __a);
 }
 
-#define _MM_HINT_T0 3
-#define _MM_HINT_T1 2
-#define _MM_HINT_T2 1
+#define _MM_HINT_ET0 7
+#define _MM_HINT_ET1 6
+#define _MM_HINT_T0  3
+#define _MM_HINT_T1  2
+#define _MM_HINT_T2  1
 #define _MM_HINT_NTA 0
 
 #ifndef _MSC_VER
@@ -2068,7 +2106,8 @@
 ///    be generated. \n
 ///    _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
 ///    be generated.
-#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
+#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
+                                                 ((sel) >> 2) & 1, (sel) & 0x3))
 #endif
 
 /// \brief Stores a 64-bit integer in the specified aligned memory location. To
@@ -2160,7 +2199,7 @@
 /// __m64 _mm_insert_pi16(__m64 a, int d, int n);
 /// \endcode
 ///
-/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
+/// This intrinsic corresponds to the <c> PINSRW </c> instruction.
 ///
 /// \param a
 ///    A 64-bit vector of [4 x i16].
@@ -2258,7 +2297,7 @@
 }
 
 /// \brief Takes the most significant bit from each 8-bit element in a 64-bit
-///    integer vector to create a 16-bit mask value. Zero-extends the value to
+///    integer vector to create an 8-bit mask value. Zero-extends the value to
 ///    32-bit integer and writes it to the destination.
 ///
 /// \headerfile <x86intrin.h>
@@ -2267,8 +2306,8 @@
 ///
 /// \param __a
 ///    A 64-bit integer vector containing the values with bits to be extracted.
-/// \returns The most significant bit from each 8-bit element in the operand,
-///    written to bits [15:0].
+/// \returns The most significant bit from each 8-bit element in \a __a,
+///    written to bits [7:0].
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_pi8(__m64 __a)
 {
@@ -2441,7 +2480,7 @@
 ///    <li>
 ///      For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,
 ///      _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
-///      _MM_GET_ROUNDING_MODE(x) where x is one of these macros.
+///      _MM_GET_ROUNDING_MODE().
 ///    </li>
 ///    <li>
 ///      For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
@@ -2454,11 +2493,11 @@
 ///    </li>
 ///    </ul>
 ///
-///    For example, the expression below checks if an overflow exception has
+///    For example, the following expression checks if an overflow exception has
 ///    occurred:
 ///      ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )
 ///
-///    The following example gets the current rounding mode:
+///    The following expression gets the current rounding mode:
 ///      _MM_GET_ROUNDING_MODE()
 ///
 /// \headerfile <x86intrin.h>
@@ -2508,10 +2547,12 @@
 ///      _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)
 ///
 ///    The following example sets the DAZ and FTZ flags:
-///      void setFlags() {
-///        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON)
-///        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON)
-///      }
+///    \code
+///    void setFlags() {
+///      _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+///      _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+///    }
+///    \endcode
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2618,7 +2659,8 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.
+/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>
+///    instruction.
 ///
 /// \param __a
 ///    A 128-bit floating-point vector of [4 x float]. The upper 96 bits are
diff --git a/lib/Index/IndexBody.cpp b/lib/Index/IndexBody.cpp
index 6bbd381..ac34956 100644
--- a/lib/Index/IndexBody.cpp
+++ b/lib/Index/IndexBody.cpp
@@ -427,6 +427,17 @@
 
     return true;
   }
+
+  bool VisitOffsetOfExpr(OffsetOfExpr *S) {
+    for (unsigned I = 0, E = S->getNumComponents(); I != E; ++I) {
+      const OffsetOfNode &Component = S->getComponent(I);
+      if (Component.getKind() == OffsetOfNode::Field)
+        IndexCtx.handleReference(Component.getField(), Component.getLocEnd(),
+                                 Parent, ParentDC, SymbolRoleSet(), {});
+      // FIXME: Try to resolve dependent field references.
+    }
+    return true;
+  }
 };
 
 } // anonymous namespace
diff --git a/lib/Index/IndexDecl.cpp b/lib/Index/IndexDecl.cpp
index 2704e6d..f1ef6c0 100644
--- a/lib/Index/IndexDecl.cpp
+++ b/lib/Index/IndexDecl.cpp
@@ -233,9 +233,8 @@
     if (auto *CXXMD = dyn_cast<CXXMethodDecl>(D)) {
       if (CXXMD->isVirtual())
         Roles |= (unsigned)SymbolRole::Dynamic;
-      for (auto I = CXXMD->begin_overridden_methods(),
-           E = CXXMD->end_overridden_methods(); I != E; ++I) {
-        Relations.emplace_back((unsigned)SymbolRole::RelationOverrideOf, *I);
+      for (const CXXMethodDecl *O : CXXMD->overridden_methods()) {
+        Relations.emplace_back((unsigned)SymbolRole::RelationOverrideOf, O);
       }
     }
     gatherTemplatePseudoOverrides(D, Relations);
@@ -354,12 +353,10 @@
         gatherTemplatePseudoOverrides(D, Relations);
         IndexCtx.indexTagDecl(D, Relations);
       } else {
-        auto *Parent = dyn_cast<NamedDecl>(D->getDeclContext());
         SmallVector<SymbolRelation, 1> Relations;
         gatherTemplatePseudoOverrides(D, Relations);
-        return IndexCtx.handleReference(D, D->getLocation(), Parent,
-                                        D->getLexicalDeclContext(),
-                                        SymbolRoleSet(), Relations);
+        return IndexCtx.handleDecl(D, D->getLocation(), SymbolRoleSet(),
+                                   Relations, D->getLexicalDeclContext());
       }
     }
     return true;
@@ -667,8 +664,11 @@
 
   bool VisitTemplateDecl(const TemplateDecl *D) {
 
-    // Index the default values for the template parameters.
     const NamedDecl *Parent = D->getTemplatedDecl();
+    if (!Parent)
+      return true;
+
+    // Index the default values for the template parameters.
     if (D->getTemplateParameters() &&
         shouldIndexTemplateParameterDefaultValue(Parent)) {
       const TemplateParameterList *Params = D->getTemplateParameters();
@@ -687,7 +687,7 @@
       }
     }
 
-    return Visit(D->getTemplatedDecl());
+    return Visit(Parent);
   }
 
   bool VisitFriendDecl(const FriendDecl *D) {
diff --git a/lib/Index/IndexSymbol.cpp b/lib/Index/IndexSymbol.cpp
index 03db0cd..733d4db 100644
--- a/lib/Index/IndexSymbol.cpp
+++ b/lib/Index/IndexSymbol.cpp
@@ -42,10 +42,10 @@
 
 static void checkForIBOutlets(const Decl *D, SymbolPropertySet &PropSet) {
   if (D->hasAttr<IBOutletAttr>()) {
-    PropSet |= (unsigned)SymbolProperty::IBAnnotated;
+    PropSet |= (SymbolPropertySet)SymbolProperty::IBAnnotated;
   } else if (D->hasAttr<IBOutletCollectionAttr>()) {
-    PropSet |= (unsigned)SymbolProperty::IBAnnotated;
-    PropSet |= (unsigned)SymbolProperty::IBOutletCollection;
+    PropSet |= (SymbolPropertySet)SymbolProperty::IBAnnotated;
+    PropSet |= (SymbolPropertySet)SymbolProperty::IBOutletCollection;
   }
 }
 
@@ -93,7 +93,7 @@
   Info.Lang = SymbolLanguage::C;
 
   if (isFunctionLocalSymbol(D)) {
-    Info.Properties |= (unsigned)SymbolProperty::Local;
+    Info.Properties |= (SymbolPropertySet)SymbolProperty::Local;
   }
 
   if (const TagDecl *TD = dyn_cast<TagDecl>(D)) {
@@ -118,17 +118,19 @@
       if (!CXXRec->isCLike()) {
         Info.Lang = SymbolLanguage::CXX;
         if (CXXRec->getDescribedClassTemplate()) {
-          Info.Properties |= (unsigned)SymbolProperty::Generic;
+          Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
         }
       }
     }
 
     if (isa<ClassTemplatePartialSpecializationDecl>(D)) {
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
-      Info.Properties |= (unsigned)SymbolProperty::TemplatePartialSpecialization;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+      Info.Properties |=
+          (SymbolPropertySet)SymbolProperty::TemplatePartialSpecialization;
     } else if (isa<ClassTemplateSpecializationDecl>(D)) {
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
-      Info.Properties |= (unsigned)SymbolProperty::TemplateSpecialization;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+      Info.Properties |=
+          (SymbolPropertySet)SymbolProperty::TemplateSpecialization;
     }
 
   } else if (auto *VD = dyn_cast<VarDecl>(D)) {
@@ -142,15 +144,17 @@
 
     if (isa<VarTemplatePartialSpecializationDecl>(D)) {
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
-      Info.Properties |= (unsigned)SymbolProperty::TemplatePartialSpecialization;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+      Info.Properties |=
+          (SymbolPropertySet)SymbolProperty::TemplatePartialSpecialization;
     } else if (isa<VarTemplateSpecializationDecl>(D)) {
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
-      Info.Properties |= (unsigned)SymbolProperty::TemplateSpecialization;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+      Info.Properties |=
+          (SymbolPropertySet)SymbolProperty::TemplateSpecialization;
     } else if (VD->getDescribedVarTemplate()) {
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
     }
 
   } else {
@@ -181,7 +185,7 @@
       if (!ClsD)
         ClsD = cast<ObjCImplementationDecl>(D)->getClassInterface();
       if (isUnitTestCase(ClsD))
-        Info.Properties |= (unsigned)SymbolProperty::UnitTest;
+        Info.Properties |= (SymbolPropertySet)SymbolProperty::UnitTest;
       break;
     }
     case Decl::ObjCProtocol:
@@ -198,7 +202,7 @@
       else
         ClsD = cast<ObjCCategoryImplDecl>(D)->getClassInterface();
       if (isUnitTestCase(ClsD))
-        Info.Properties |= (unsigned)SymbolProperty::UnitTest;
+        Info.Properties |= (SymbolPropertySet)SymbolProperty::UnitTest;
       break;
     }
     case Decl::ObjCMethod: {
@@ -212,9 +216,9 @@
       }
       Info.Lang = SymbolLanguage::ObjC;
       if (isUnitTest(MD))
-        Info.Properties |= (unsigned)SymbolProperty::UnitTest;
+        Info.Properties |= (SymbolPropertySet)SymbolProperty::UnitTest;
       if (D->hasAttr<IBActionAttr>())
-        Info.Properties |= (unsigned)SymbolProperty::IBAnnotated;
+        Info.Properties |= (SymbolPropertySet)SymbolProperty::IBAnnotated;
       break;
     }
     case Decl::ObjCProperty:
@@ -223,7 +227,7 @@
       checkForIBOutlets(D, Info.Properties);
       if (auto *Annot = D->getAttr<AnnotateAttr>()) {
         if (Annot->getAnnotation() == "gk_inspectable")
-          Info.Properties |= (unsigned)SymbolProperty::GKInspectable;
+          Info.Properties |= (SymbolPropertySet)SymbolProperty::GKInspectable;
       }
       break;
     case Decl::ObjCIvar:
@@ -268,12 +272,12 @@
     }
     case Decl::ClassTemplate:
       Info.Kind = SymbolKind::Class;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
       Info.Lang = SymbolLanguage::CXX;
       break;
     case Decl::FunctionTemplate:
       Info.Kind = SymbolKind::Function;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
       Info.Lang = SymbolLanguage::CXX;
       if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(
                            cast<FunctionTemplateDecl>(D)->getTemplatedDecl())) {
@@ -294,7 +298,7 @@
     case Decl::TypeAliasTemplate:
       Info.Kind = SymbolKind::TypeAlias;
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
       break;
     case Decl::TypeAlias:
       Info.Kind = SymbolKind::TypeAlias;
@@ -304,13 +308,13 @@
       Info.Kind = SymbolKind::Using;
       Info.SubKind = SymbolSubKind::UsingTypename;
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
       break;
     case Decl::UnresolvedUsingValue:
       Info.Kind = SymbolKind::Using;
       Info.SubKind = SymbolSubKind::UsingValue;
       Info.Lang = SymbolLanguage::CXX;
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
       break;
     case Decl::Binding:
       Info.Kind = SymbolKind::Variable;
@@ -327,12 +331,13 @@
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->getTemplatedKind() ==
           FunctionDecl::TK_FunctionTemplateSpecialization) {
-      Info.Properties |= (unsigned)SymbolProperty::Generic;
-      Info.Properties |= (unsigned)SymbolProperty::TemplateSpecialization;
+      Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+      Info.Properties |=
+          (SymbolPropertySet)SymbolProperty::TemplateSpecialization;
     }
   }
 
-  if (Info.Properties & (unsigned)SymbolProperty::Generic)
+  if (Info.Properties & (SymbolPropertySet)SymbolProperty::Generic)
     Info.Lang = SymbolLanguage::CXX;
 
   if (auto *attr = D->getExternalSourceSymbolAttr()) {
@@ -490,9 +495,9 @@
 
 void index::applyForEachSymbolProperty(SymbolPropertySet Props,
                                   llvm::function_ref<void(SymbolProperty)> Fn) {
-#define APPLY_FOR_PROPERTY(K) \
-  if (Props & (unsigned)SymbolProperty::K) \
-    Fn(SymbolProperty::K)
+#define APPLY_FOR_PROPERTY(K)                                                  \
+  if (Props & (SymbolPropertySet)SymbolProperty::K)                            \
+  Fn(SymbolProperty::K)
 
   APPLY_FOR_PROPERTY(Generic);
   APPLY_FOR_PROPERTY(TemplatePartialSpecialization);
diff --git a/lib/Index/IndexingAction.cpp b/lib/Index/IndexingAction.cpp
index 84d3120..411657b 100644
--- a/lib/Index/IndexingAction.cpp
+++ b/lib/Index/IndexingAction.cpp
@@ -8,10 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Index/IndexingAction.h"
-#include "clang/Index/IndexDataConsumer.h"
 #include "IndexingContext.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/Frontend/MultiplexConsumer.h"
+#include "clang/Index/IndexDataConsumer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Serialization/ASTReader.h"
 
@@ -42,16 +43,18 @@
 namespace {
 
 class IndexASTConsumer : public ASTConsumer {
+  std::shared_ptr<Preprocessor> PP;
   IndexingContext &IndexCtx;
 
 public:
-  IndexASTConsumer(IndexingContext &IndexCtx)
-    : IndexCtx(IndexCtx) {}
+  IndexASTConsumer(std::shared_ptr<Preprocessor> PP, IndexingContext &IndexCtx)
+      : PP(std::move(PP)), IndexCtx(IndexCtx) {}
 
 protected:
   void Initialize(ASTContext &Context) override {
     IndexCtx.setASTContext(Context);
     IndexCtx.getDataConsumer().initialize(Context);
+    IndexCtx.getDataConsumer().setPreprocessor(PP);
   }
 
   bool HandleTopLevelDecl(DeclGroupRef DG) override {
@@ -80,8 +83,10 @@
     : DataConsumer(std::move(dataConsumer)),
       IndexCtx(Opts, *DataConsumer) {}
 
-  std::unique_ptr<IndexASTConsumer> createIndexASTConsumer() {
-    return llvm::make_unique<IndexASTConsumer>(IndexCtx);
+  std::unique_ptr<IndexASTConsumer>
+  createIndexASTConsumer(CompilerInstance &CI) {
+    return llvm::make_unique<IndexASTConsumer>(CI.getPreprocessorPtr(),
+                                               IndexCtx);
   }
 
   void finish() {
@@ -98,7 +103,7 @@
 protected:
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override {
-    return createIndexASTConsumer();
+    return createIndexASTConsumer(CI);
   }
 
   void EndSourceFileAction() override {
@@ -142,7 +147,7 @@
 
   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
   Consumers.push_back(std::move(OtherConsumer));
-  Consumers.push_back(createIndexASTConsumer());
+  Consumers.push_back(createIndexASTConsumer(CI));
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
@@ -173,6 +178,7 @@
   IndexingContext IndexCtx(Opts, *DataConsumer);
   IndexCtx.setASTContext(Unit.getASTContext());
   DataConsumer->initialize(Unit.getASTContext());
+  DataConsumer->setPreprocessor(Unit.getPreprocessorPtr());
   indexTranslationUnit(Unit, IndexCtx);
   DataConsumer->finish();
 }
@@ -198,7 +204,7 @@
   IndexCtx.setASTContext(Ctx);
   DataConsumer->initialize(Ctx);
 
-  for (const Decl *D :Reader.getModuleFileLevelDecls(Mod)) {
+  for (const Decl *D : Reader.getModuleFileLevelDecls(Mod)) {
     IndexCtx.indexTopLevelDecl(D);
   }
   DataConsumer->finish();
diff --git a/lib/Index/USRGeneration.cpp b/lib/Index/USRGeneration.cpp
index 3a06554..ee1c950 100644
--- a/lib/Index/USRGeneration.cpp
+++ b/lib/Index/USRGeneration.cpp
@@ -103,7 +103,7 @@
   void VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenameDecl *D);
 
   void VisitLinkageSpecDecl(const LinkageSpecDecl *D) {
-    IgnoreResults = true;
+    IgnoreResults = true; // No USRs for linkage specs themselves.
   }
 
   void VisitUsingDirectiveDecl(const UsingDirectiveDecl *D) {
@@ -192,6 +192,8 @@
 void USRGenerator::VisitDeclContext(const DeclContext *DC) {
   if (const NamedDecl *D = dyn_cast<NamedDecl>(DC))
     Visit(D);
+  else if (isa<LinkageSpecDecl>(DC)) // Linkage specs are transparent in USRs.
+    VisitDeclContext(DC->getParent());
 }
 
 void USRGenerator::VisitFieldDecl(const FieldDecl *D) {
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index b18d273..3528f88 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -1,4 +1,4 @@
-//===--- HeaderSearch.cpp - Resolve Header File Locations ---===//
+//===- HeaderSearch.cpp - Resolve Header File Locations -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,25 +12,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderMap.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/Lexer.h"
+#include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Capacity.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
 #include <cstdio>
+#include <cstring>
+#include <string>
+#include <system_error>
 #include <utility>
-#if defined(LLVM_ON_UNIX)
-#include <limits.h>
-#endif
+
 using namespace clang;
 
 const IdentifierInfo *
@@ -52,7 +65,7 @@
   return ControllingMacro;
 }
 
-ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {}
+ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() = default;
 
 HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
                            SourceManager &SourceMgr, DiagnosticsEngine &Diags,
@@ -60,17 +73,7 @@
                            const TargetInfo *Target)
     : HSOpts(std::move(HSOpts)), Diags(Diags),
       FileMgr(SourceMgr.getFileManager()), FrameworkMap(64),
-      ModMap(SourceMgr, Diags, LangOpts, Target, *this) {
-  AngledDirIdx = 0;
-  SystemDirIdx = 0;
-  NoCurDirSearch = false;
-
-  ExternalLookup = nullptr;
-  ExternalSource = nullptr;
-  NumIncluded = 0;
-  NumMultiIncludeFileOptzn = 0;
-  NumFrameworkLookups = NumSubFrameworkLookups = 0;
-}
+      ModMap(SourceMgr, Diags, LangOpts, Target, *this) {}
 
 HeaderSearch::~HeaderSearch() {
   // Delete headermaps.
@@ -142,27 +145,26 @@
     return i->second;
 
   if (FileMapOnly || HSOpts->PrebuiltModulePaths.empty())
-      return std::string();
+    return {};
 
   // Then go through each prebuilt module directory and try to find the pcm
   // file.
-    for (const std::string &Dir : HSOpts->PrebuiltModulePaths) {
-      SmallString<256> Result(Dir);
-      llvm::sys::fs::make_absolute(Result);
-
-      llvm::sys::path::append(Result, ModuleName + ".pcm");
-      if (getFileMgr().getFile(Result.str()))
-        return Result.str().str();
-    }
-    return std::string();
+  for (const std::string &Dir : HSOpts->PrebuiltModulePaths) {
+    SmallString<256> Result(Dir);
+    llvm::sys::fs::make_absolute(Result);
+    llvm::sys::path::append(Result, ModuleName + ".pcm");
+    if (getFileMgr().getFile(Result.str()))
+      return Result.str().str();
   }
+  return {};
+}
 
 std::string HeaderSearch::getCachedModuleFileName(StringRef ModuleName,
                                                   StringRef ModuleMapPath) {
   // If we don't have a module cache path or aren't supposed to use one, we
   // can't do anything.
   if (getModuleCachePath().empty())
-    return std::string();
+    return {};
 
   SmallString<256> Result(getModuleCachePath());
   llvm::sys::fs::make_absolute(Result);
@@ -182,7 +184,7 @@
       Parent = ".";
     auto *Dir = FileMgr.getDirectory(Parent);
     if (!Dir)
-      return std::string();
+      return {};
     auto DirName = FileMgr.getCanonicalName(Dir);
     auto FileName = llvm::sys::path::filename(ModuleMapPath);
 
@@ -207,11 +209,14 @@
 
   // The facility for "private modules" -- adjacent, optional module maps named
   // module.private.modulemap that are supposed to define private submodules --
-  // is sometimes misused by frameworks that name their associated private
-  // module FooPrivate, rather than as a submodule named Foo.Private as
-  // intended. Here we compensate for such cases by looking in directories named
-  // Foo.framework, when we previously looked and failed to find a
-  // FooPrivate.framework.
+  // may have different flavors of names: FooPrivate, Foo_Private and Foo.Private.
+  //
+  // Foo.Private is now depracated in favor of Foo_Private. Users of FooPrivate
+  // should also rename to Foo_Private. Representing private as submodules
+  // could force building unwanted dependencies into the parent module and cause
+  // dependency cycles.
+  if (!Module && SearchName.consume_back("_Private"))
+    Module = lookupModule(ModuleName, SearchName);
   if (!Module && SearchName.consume_back("Private"))
     Module = lookupModule(ModuleName, SearchName);
   return Module;
@@ -381,7 +386,6 @@
     Filename = StringRef(MappedName.begin(), MappedName.size());
     HasBeenMapped = true;
     Result = HM->LookupFile(Filename, HS.getFileMgr());
-
   } else {
     Result = HS.getFileMgr().getFile(Dest);
   }
@@ -592,7 +596,6 @@
   ModMap.setTarget(Target);
 }
 
-
 //===----------------------------------------------------------------------===//
 // Header File Location.
 //===----------------------------------------------------------------------===//
@@ -959,7 +962,6 @@
 
   HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
   if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) {
-
     // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
     HeadersFilename = FrameworkName;
     HeadersFilename += "PrivateHeaders/";
@@ -1116,7 +1118,7 @@
 
   // FIXME: this is a workaround for the lack of proper modules-aware support
   // for #import / #pragma once
-  auto TryEnterImported = [&](void) -> bool {
+  auto TryEnterImported = [&]() -> bool {
     if (!ModulesEnabled)
       return false;
     // Ensure FileInfo bits are up to date.
@@ -1449,7 +1451,6 @@
   return ModMap.findModule(Name);
 }
 
-
 HeaderSearch::LoadModuleMapResult 
 HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem,
                                 bool IsFramework) {
@@ -1579,9 +1580,15 @@
 std::string HeaderSearch::suggestPathToFileForDiagnostics(const FileEntry *File,
                                                           bool *IsSystem) {
   // FIXME: We assume that the path name currently cached in the FileEntry is
-  // the most appropriate one for this analysis (and that it's spelled the same
-  // way as the corresponding header search path).
-  StringRef Name = File->getName();
+  // the most appropriate one for this analysis (and that it's spelled the
+  // same way as the corresponding header search path).
+  return suggestPathToFileForDiagnostics(File->getName(), /*BuildDir=*/"",
+                                         IsSystem);
+}
+
+std::string HeaderSearch::suggestPathToFileForDiagnostics(
+    llvm::StringRef File, llvm::StringRef WorkingDir, bool *IsSystem) {
+  using namespace llvm::sys;
 
   unsigned BestPrefixLength = 0;
   unsigned BestSearchDir;
@@ -1592,12 +1599,17 @@
       continue;
 
     StringRef Dir = SearchDirs[I].getDir()->getName();
-    for (auto NI = llvm::sys::path::begin(Name),
-              NE = llvm::sys::path::end(Name),
-              DI = llvm::sys::path::begin(Dir),
-              DE = llvm::sys::path::end(Dir);
+    llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
+    if (!WorkingDir.empty() && !path::is_absolute(Dir)) {
+      auto err = fs::make_absolute(WorkingDir, DirPath);
+      if (!err)
+        path::remove_dots(DirPath, /*remove_dot_dot=*/true);
+      Dir = DirPath;
+    }
+    for (auto NI = path::begin(File), NE = path::end(File),
+              DI = path::begin(Dir), DE = path::end(Dir);
          /*termination condition in loop*/; ++NI, ++DI) {
-      // '.' components in Name are ignored.
+      // '.' components in File are ignored.
       while (NI != NE && *NI == ".")
         ++NI;
       if (NI == NE)
@@ -1607,9 +1619,9 @@
       while (DI != DE && *DI == ".")
         ++DI;
       if (DI == DE) {
-        // Dir is a prefix of Name, up to '.' components and choice of path
+        // Dir is a prefix of File, up to '.' components and choice of path
         // separators.
-        unsigned PrefixLength = NI - llvm::sys::path::begin(Name);
+        unsigned PrefixLength = NI - path::begin(File);
         if (PrefixLength > BestPrefixLength) {
           BestPrefixLength = PrefixLength;
           BestSearchDir = I;
@@ -1624,5 +1636,5 @@
 
   if (IsSystem)
     *IsSystem = BestPrefixLength ? BestSearchDir >= SystemDirIdx : false;
-  return Name.drop_front(BestPrefixLength);
+  return File.drop_front(BestPrefixLength);
 }
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 5132d0e..8bd4ab0 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1,4 +1,4 @@
-//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
+//===- Lexer.cpp - C Language Family Lexer --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,17 +15,29 @@
 #include "UnicodeCharSets.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MultipleIncludeOpt.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/UnicodeCharRanges.h"
 #include <algorithm>
 #include <cassert>
@@ -63,7 +75,7 @@
 // Lexer Class Implementation
 //===----------------------------------------------------------------------===//
 
-void Lexer::anchor() { }
+void Lexer::anchor() {}
 
 void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
                       const char *BufEnd) {
@@ -120,31 +132,21 @@
 /// assumes that the associated file buffer and Preprocessor objects will
 /// outlive it, so it doesn't take ownership of either of them.
 Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
-  : PreprocessorLexer(&PP, FID),
-    FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
-    LangOpts(PP.getLangOpts()) {
-
+    : PreprocessorLexer(&PP, FID),
+      FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
+      LangOpts(PP.getLangOpts()) {
   InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
             InputFile->getBufferEnd());
 
   resetExtendedTokenMode();
 }
 
-void Lexer::resetExtendedTokenMode() {
-  assert(PP && "Cannot reset token mode without a preprocessor");
-  if (LangOpts.TraditionalCPP)
-    SetKeepWhitespaceMode(true);
-  else
-    SetCommentRetentionState(PP->getCommentRetentionState());
-}
-
 /// Lexer constructor - Create a new raw lexer object.  This object is only
 /// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the text
 /// range will outlive it, so it doesn't take ownership of it.
 Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
              const char *BufStart, const char *BufPtr, const char *BufEnd)
-  : FileLoc(fileloc), LangOpts(langOpts) {
-
+    : FileLoc(fileloc), LangOpts(langOpts) {
   InitLexer(BufStart, BufPtr, BufEnd);
 
   // We *are* in raw mode.
@@ -159,6 +161,14 @@
     : Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
             FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
 
+void Lexer::resetExtendedTokenMode() {
+  assert(PP && "Cannot reset token mode without a preprocessor");
+  if (LangOpts.TraditionalCPP)
+    SetKeepWhitespaceMode(true);
+  else
+    SetCommentRetentionState(PP->getCommentRetentionState());
+}
+
 /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
 /// _Pragma expansion.  This has a variety of magic semantics that this method
 /// sets up.  It returns a new'd Lexer that must be delete'd when done.
@@ -209,30 +219,39 @@
   return L;
 }
 
-/// Stringify - Convert the specified string into a C string, with surrounding
-/// ""'s, and with escaped \ and " characters.
+template <typename T> static void StringifyImpl(T &Str, char Quote) {
+  typename T::size_type i = 0, e = Str.size();
+  while (i < e) {
+    if (Str[i] == '\\' || Str[i] == Quote) {
+      Str.insert(Str.begin() + i, '\\');
+      i += 2;
+      ++e;
+    } else if (Str[i] == '\n' || Str[i] == '\r') {
+      // Replace '\r\n' and '\n\r' to '\\' followed by 'n'.
+      if ((i < e - 1) && (Str[i + 1] == '\n' || Str[i + 1] == '\r') &&
+          Str[i] != Str[i + 1]) {
+        Str[i] = '\\';
+        Str[i + 1] = 'n';
+      } else {
+        // Replace '\n' and '\r' to '\\' followed by 'n'.
+        Str[i] = '\\';
+        Str.insert(Str.begin() + i + 1, 'n');
+        ++e;
+      }
+      i += 2;
+    } else
+      ++i;
+  }
+}
+
 std::string Lexer::Stringify(StringRef Str, bool Charify) {
   std::string Result = Str;
   char Quote = Charify ? '\'' : '"';
-  for (unsigned i = 0, e = Result.size(); i != e; ++i) {
-    if (Result[i] == '\\' || Result[i] == Quote) {
-      Result.insert(Result.begin()+i, '\\');
-      ++i; ++e;
-    }
-  }
+  StringifyImpl(Result, Quote);
   return Result;
 }
 
-/// Stringify - Convert the specified string into a C string by escaping '\'
-/// and " characters.  This does not add surrounding ""'s to the string.
-void Lexer::Stringify(SmallVectorImpl<char> &Str) {
-  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-    if (Str[i] == '\\' || Str[i] == '"') {
-      Str.insert(Str.begin()+i, '\\');
-      ++i; ++e;
-    }
-  }
-}
+void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); }
 
 //===----------------------------------------------------------------------===//
 // Token Spelling
@@ -307,7 +326,7 @@
   StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
   if (invalidTemp) {
     if (invalid) *invalid = true;
-    return StringRef();
+    return {};
   }
 
   const char *tokenBegin = file.data() + locInfo.second;
@@ -345,7 +364,7 @@
   if (Invalid)
     *Invalid = CharDataInvalid;
   if (CharDataInvalid)
-    return std::string();
+    return {};
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning())
@@ -367,7 +386,7 @@
 /// to point to a constant buffer with the data already in it (avoiding a
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
-unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
                             const SourceManager &SourceMgr,
                             const LangOptions &LangOpts, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
@@ -545,12 +564,12 @@
 
 namespace {
 
-  enum PreambleDirectiveKind {
-    PDK_Skipped,
-    PDK_Unknown
-  };
+enum PreambleDirectiveKind {
+  PDK_Skipped,
+  PDK_Unknown
+};
 
-} // end anonymous namespace
+} // namespace
 
 PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
                                       const LangOptions &LangOpts,
@@ -592,17 +611,17 @@
       if (TheTok.getKind() == tok::eof) {
         break;
       }
-      
+
       // If we haven't hit the end of the preprocessor directive, skip this
       // token.
       if (!TheTok.isAtStartOfLine())
         continue;
-        
+
       // We've passed the end of the preprocessor directive, and will look
       // at this token again below.
       InPreprocessorDirective = false;
     }
-    
+
     // Keep track of the # of lines in the preamble.
     if (TheTok.isAtStartOfLine()) {
       unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;
@@ -619,13 +638,13 @@
         ActiveCommentLoc = TheTok.getLocation();
       continue;
     }
-    
+
     if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
-      // This is the start of a preprocessor directive. 
+      // This is the start of a preprocessor directive.
       Token HashTok = TheTok;
       InPreprocessorDirective = true;
       ActiveCommentLoc = SourceLocation();
-      
+
       // Figure out which directive this is. Since we're lexing raw tokens,
       // we don't have an identifier table available. Instead, just look at
       // the raw identifier to recognize and categorize preprocessor directives.
@@ -665,7 +684,7 @@
           break;
         }
       }
-      
+
       // We only end up here if we didn't recognize the preprocessor
       // directive or it was one that can't occur in the preamble at this
       // point. Roll back the current token to the location of the '#'.
@@ -678,7 +697,7 @@
     // the preamble.
     break;
   } while (true);
-  
+
   SourceLocation End;
   if (ActiveCommentLoc.isValid())
     End = ActiveCommentLoc; // don't truncate a decl comment.
@@ -700,13 +719,13 @@
   // trigraphs.
   bool Invalid = false;
   const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);
-  
+
   // If they request the first char of the token, we're trivially done.
   if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
     return TokStart;
-  
+
   unsigned PhysOffset = 0;
-  
+
   // The usual case is that tokens don't contain anything interesting.  Skip
   // over the uninteresting characters.  If a token only consists of simple
   // chars, this method is extremely fast.
@@ -717,7 +736,7 @@
     --CharNo;
     ++PhysOffset;
   }
-  
+
   // If we have a character that may be a trigraph or escaped newline, use a
   // lexer to parse it correctly.
   for (; CharNo; --CharNo) {
@@ -726,14 +745,14 @@
     TokPtr += Size;
     PhysOffset += Size;
   }
-  
+
   // Final detail: if we end up on an escaped newline, we want to return the
   // location of the actual byte of the token.  For example foo\<newline>bar
   // advanced by 3 should return the location of b, not of \\.  One compounding
   // detail of this is that the escape may be made by a trigraph.
   if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
     PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
-  
+
   return TokStart.getLocWithOffset(PhysOffset);
 }
 
@@ -756,11 +775,11 @@
                                           const SourceManager &SM,
                                           const LangOptions &LangOpts) {
   if (Loc.isInvalid())
-    return SourceLocation();
+    return {};
 
   if (Loc.isMacroID()) {
     if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
-      return SourceLocation(); // Points inside the macro expansion.
+      return {}; // Points inside the macro expansion.
   }
 
   unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
@@ -768,7 +787,7 @@
     Len = Len - Offset;
   else
     return Loc;
-  
+
   return Loc.getLocWithOffset(Len);
 }
 
@@ -831,7 +850,7 @@
   if (Range.isTokenRange()) {
     End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
     if (End.isInvalid())
-      return CharSourceRange();
+      return {};
   }
 
   // Break down the source locations.
@@ -839,12 +858,12 @@
   unsigned BeginOffs;
   std::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
   if (FID.isInvalid())
-    return CharSourceRange();
+    return {};
 
   unsigned EndOffs;
   if (!SM.isInFileID(End, FID, &EndOffs) ||
       BeginOffs > EndOffs)
-    return CharSourceRange();
+    return {};
 
   return CharSourceRange::getCharRange(Begin, End);
 }
@@ -855,14 +874,14 @@
   SourceLocation Begin = Range.getBegin();
   SourceLocation End = Range.getEnd();
   if (Begin.isInvalid() || End.isInvalid())
-    return CharSourceRange();
+    return {};
 
   if (Begin.isFileID() && End.isFileID())
     return makeRangeFromFileLocs(Range, SM, LangOpts);
 
   if (Begin.isMacroID() && End.isFileID()) {
     if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin))
-      return CharSourceRange();
+      return {};
     Range.setBegin(Begin);
     return makeRangeFromFileLocs(Range, SM, LangOpts);
   }
@@ -872,7 +891,7 @@
                                                           &End)) ||
         (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
                                                            &End)))
-      return CharSourceRange();
+      return {};
     Range.setEnd(End);
     return makeRangeFromFileLocs(Range, SM, LangOpts);
   }
@@ -893,13 +912,13 @@
   const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin),
                                                         &Invalid);
   if (Invalid)
-    return CharSourceRange();
+    return {};
 
   if (BeginEntry.getExpansion().isMacroArgExpansion()) {
     const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End),
                                                         &Invalid);
     if (Invalid)
-      return CharSourceRange();
+      return {};
 
     if (EndEntry.getExpansion().isMacroArgExpansion() &&
         BeginEntry.getExpansion().getExpansionLocStart() ==
@@ -910,7 +929,7 @@
     }
   }
 
-  return CharSourceRange();
+  return {};
 }
 
 StringRef Lexer::getSourceText(CharSourceRange Range,
@@ -920,21 +939,21 @@
   Range = makeFileCharRange(Range, SM, LangOpts);
   if (Range.isInvalid()) {
     if (Invalid) *Invalid = true;
-    return StringRef();
+    return {};
   }
 
   // Break down the source location.
   std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin());
   if (beginInfo.first.isInvalid()) {
     if (Invalid) *Invalid = true;
-    return StringRef();
+    return {};
   }
 
   unsigned EndOffs;
   if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
       beginInfo.second > EndOffs) {
     if (Invalid) *Invalid = true;
-    return StringRef();
+    return {};
   }
 
   // Try to the load the file buffer.
@@ -942,7 +961,7 @@
   StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
   if (invalidTemp) {
     if (Invalid) *Invalid = true;
-    return StringRef();
+    return {};
   }
 
   if (Invalid) *Invalid = false;
@@ -965,7 +984,7 @@
 
     // For macro arguments we need to check that the argument did not come
     // from an inner macro, e.g: "MAC1( MAC2(foo) )"
-    
+
     // Loc points to the argument id of the macro definition, move to the
     // macro expansion.
     Loc = SM.getImmediateExpansionRange(Loc).first;
@@ -1006,7 +1025,7 @@
   // If the macro's spelling has no FileID, then it's actually a token paste
   // or stringization (or similar) and not a macro at all.
   if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc))))
-    return StringRef();
+    return {};
 
   // Find the spelling location of the start of the non-argument expansion
   // range. This is where the macro name was spelled in order to begin
@@ -1048,17 +1067,17 @@
 StringRef Lexer::getIndentationForLine(SourceLocation Loc,
                                        const SourceManager &SM) {
   if (Loc.isInvalid() || Loc.isMacroID())
-    return "";
+    return {};
   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
-    return "";
+    return {};
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
   if (Invalid)
-    return "";
+    return {};
   const char *Line = findBeginningOfLine(Buffer, LocInfo.second);
   if (!Line)
-    return "";
+    return {};
   StringRef Rest = Buffer.substr(Line - Buffer.data());
   size_t NumWhitespaceChars = Rest.find_first_not_of(" \t");
   return NumWhitespaceChars == StringRef::npos
@@ -1212,18 +1231,12 @@
   }
 }
 
-/// \brief Checks that the given token is the first token that occurs after the
-/// given location (this excludes comments and whitespace). Returns the location
-/// immediately after the specified token. If the token is not found or the
-/// location is inside a macro, the returned source location will be invalid.
-SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
-                                        tok::TokenKind TKind,
-                                        const SourceManager &SM,
-                                        const LangOptions &LangOpts,
-                                        bool SkipTrailingWhitespaceAndNewLine) {
+Optional<Token> Lexer::findNextToken(SourceLocation Loc,
+                                     const SourceManager &SM,
+                                     const LangOptions &LangOpts) {
   if (Loc.isMacroID()) {
     if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
-      return SourceLocation();
+      return None;
   }
   Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
 
@@ -1234,7 +1247,7 @@
   bool InvalidTemp = false;
   StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
   if (InvalidTemp)
-    return SourceLocation();
+    return None;
 
   const char *TokenBegin = File.data() + LocInfo.second;
 
@@ -1244,15 +1257,25 @@
   // Find the token.
   Token Tok;
   lexer.LexFromRawLexer(Tok);
-  if (Tok.isNot(TKind))
-    return SourceLocation();
-  SourceLocation TokenLoc = Tok.getLocation();
+  return Tok;
+}
+
+/// \brief Checks that the given token is the first token that occurs after the
+/// given location (this excludes comments and whitespace). Returns the location
+/// immediately after the specified token. If the token is not found or the
+/// location is inside a macro, the returned source location will be invalid.
+SourceLocation Lexer::findLocationAfterToken(
+    SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM,
+    const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) {
+  Optional<Token> Tok = findNextToken(Loc, SM, LangOpts);
+  if (!Tok || Tok->isNot(TKind))
+    return {};
+  SourceLocation TokenLoc = Tok->getLocation();
 
   // Calculate how much whitespace needs to be skipped if any.
   unsigned NumWhitespaceChars = 0;
   if (SkipTrailingWhitespaceAndNewLine) {
-    const char *TokenEnd = SM.getCharacterData(TokenLoc) +
-                           Tok.getLength();
+    const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength();
     unsigned char C = *TokenEnd;
     while (isHorizontalWhitespace(C)) {
       C = *(++TokenEnd);
@@ -1269,7 +1292,7 @@
     }
   }
 
-  return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);
+  return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars);
 }
 
 /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
@@ -1287,7 +1310,6 @@
 ///
 /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
 /// be updated to match.
-///
 char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
                                Token *Tok) {
   // If we have a slash, look for an escaped newline.
@@ -1479,6 +1501,75 @@
   }
 }
 
+/// After encountering UTF-8 character C and interpreting it as an identifier
+/// character, check whether it's a homoglyph for a common non-identifier
+/// source character that is unlikely to be an intentional identifier
+/// character and warn if so.
+static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
+                                       CharSourceRange Range) {
+  // FIXME: Handle Unicode quotation marks (smart quotes, fullwidth quotes).
+  struct HomoglyphPair {
+    uint32_t Character;
+    char LooksLike;
+    bool operator<(HomoglyphPair R) const { return Character < R.Character; }
+  };
+  static constexpr HomoglyphPair SortedHomoglyphs[] = {
+    {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
+    {U'\u037e', ';'}, // GREEK QUESTION MARK
+    {U'\u2212', '-'}, // MINUS SIGN
+    {U'\u2215', '/'}, // DIVISION SLASH
+    {U'\u2216', '\\'}, // SET MINUS
+    {U'\u2217', '*'}, // ASTERISK OPERATOR
+    {U'\u2223', '|'}, // DIVIDES
+    {U'\u2227', '^'}, // LOGICAL AND
+    {U'\u2236', ':'}, // RATIO
+    {U'\u223c', '~'}, // TILDE OPERATOR
+    {U'\ua789', ':'}, // MODIFIER LETTER COLON
+    {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
+    {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
+    {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
+    {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN
+    {U'\uff06', '&'}, // FULLWIDTH AMPERSAND
+    {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS
+    {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS
+    {U'\uff0a', '*'}, // FULLWIDTH ASTERISK
+    {U'\uff0b', '+'}, // FULLWIDTH ASTERISK
+    {U'\uff0c', ','}, // FULLWIDTH COMMA
+    {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS
+    {U'\uff0e', '.'}, // FULLWIDTH FULL STOP
+    {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS
+    {U'\uff1a', ':'}, // FULLWIDTH COLON
+    {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON
+    {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN
+    {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN
+    {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN
+    {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK
+    {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT
+    {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET
+    {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
+    {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET
+    {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT
+    {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET
+    {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE
+    {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET
+    {U'\uff5e', '~'}, // FULLWIDTH TILDE
+    {0, 0}
+  };
+  auto Homoglyph =
+      std::lower_bound(std::begin(SortedHomoglyphs),
+                       std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'});
+  if (Homoglyph->Character == C) {
+    llvm::SmallString<5> CharBuf;
+    {
+      llvm::raw_svector_ostream CharOS(CharBuf);
+      llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
+    }
+    const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
+    Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
+        << Range << CharBuf << LooksLikeStr;
+  }
+}
+
 bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
                                     Token &Result) {
   const char *UCNPtr = CurPtr + Size;
@@ -1513,10 +1604,13 @@
       !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
     return false;
 
-  if (!isLexingRawMode())
+  if (!isLexingRawMode()) {
     maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
                               makeCharRange(*this, CurPtr, UnicodePtr),
                               /*IsFirst=*/false);
+    maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint,
+                               makeCharRange(*this, CurPtr, UnicodePtr));
+  }
 
   CurPtr = UnicodePtr;
   return true;
@@ -1582,7 +1676,6 @@
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);
       continue;
-
     } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
       C = getCharAndSize(CurPtr, Size);
       continue;
@@ -1645,7 +1738,7 @@
     if (!LangOpts.C99) {
       if (!isHexaLiteral(BufferPtr, LangOpts))
         IsHexFloat = false;
-      else if (!getLangOpts().CPlusPlus1z &&
+      else if (!getLangOpts().CPlusPlus17 &&
                std::find(BufferPtr, CurPtr, '_') != CurPtr)
         IsHexFloat = false;
     }
@@ -1791,7 +1884,7 @@
     // getAndAdvanceChar.
     if (C == '\\')
       C = getAndAdvanceChar(CurPtr, Result);
-    
+
     if (C == '\n' || C == '\r' ||             // Newline.
         (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
@@ -1799,7 +1892,7 @@
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return true;
     }
-    
+
     if (C == 0) {
       if (isCodeCompletionPoint(CurPtr-1)) {
         PP->CodeCompleteNaturalLanguage();
@@ -1916,18 +2009,21 @@
   const char *AfterLessPos = CurPtr;
   char C = getAndAdvanceChar(CurPtr, Result);
   while (C != '>') {
-    // Skip escaped characters.
-    if (C == '\\' && CurPtr < BufferEnd) {
-      // Skip the escaped character.
-      getAndAdvanceChar(CurPtr, Result);
-    } else if (C == '\n' || C == '\r' ||             // Newline.
-               (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
-                           isCodeCompletionPoint(CurPtr-1)))) {
+    // Skip escaped characters.  Escaped newlines will already be processed by
+    // getAndAdvanceChar.
+    if (C == '\\')
+      C = getAndAdvanceChar(CurPtr, Result);
+
+    if (C == '\n' || C == '\r' ||             // Newline.
+        (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
+                    isCodeCompletionPoint(CurPtr-1)))) {
       // If the filename is unterminated, then it must just be a lone <
       // character.  Return this as such.
       FormTokenWithChars(Result, AfterLessPos, tok::less);
       return true;
-    } else if (C == 0) {
+    }
+
+    if (C == 0) {
       NulCharacter = CurPtr-1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
@@ -2013,7 +2109,6 @@
 /// Update BufferPtr to point to the next non-whitespace character and return.
 ///
 /// This method forms a token and returns true if KeepWhitespaceMode is enabled.
-///
 bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
                            bool &TokAtPhysicalStartOfLine) {
   // Whitespace - Skip it, then return the token after the whitespace.
@@ -2228,7 +2323,7 @@
   std::string Spelling = PP->getSpelling(Result, &Invalid);
   if (Invalid)
     return true;
-  
+
   assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
   Spelling[1] = '*';   // Change prefix to "/*".
   Spelling += "*/";    // add suffix.
@@ -2554,7 +2649,7 @@
       resetExtendedTokenMode();
     return true;  // Have a token.
   }
- 
+
   // If we are in raw mode, return this event as an EOF token.  Let the caller
   // that put us in raw mode handle the event.
   if (isLexingRawMode()) {
@@ -2563,7 +2658,7 @@
     FormTokenWithChars(Result, BufferEnd, tok::eof);
     return true;
   }
-  
+
   if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
     PP->setRecordedPreambleConditionalStack(ConditionalStack);
     ConditionalStack.clear();
@@ -2675,7 +2770,7 @@
   if (CurPtr != BufferStart &&
       CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
     return false;
-  
+
   // Check to see if we have <<<<<<< or >>>>.
   if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&
       !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))
@@ -2685,7 +2780,7 @@
   // it.
   if (CurrentConflictMarkerState || isLexingRawMode())
     return false;
-  
+
   ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;
 
   // Check to see if there is an ending marker somewhere in the buffer at the
@@ -2695,7 +2790,7 @@
     // Diagnose this, and ignore to the end of line.
     Diag(CurPtr, diag::err_conflict_marker);
     CurrentConflictMarkerState = Kind;
-    
+
     // Skip ahead to the end of line.  We know this exists because the
     // end-of-conflict marker starts with \r or \n.
     while (*CurPtr != '\r' && *CurPtr != '\n') {
@@ -2705,7 +2800,7 @@
     BufferPtr = CurPtr;
     return true;
   }
-  
+
   // No end of conflict marker found.
   return false;
 }
@@ -2719,35 +2814,35 @@
   if (CurPtr != BufferStart &&
       CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
     return false;
-  
+
   // If we have a situation where we don't care about conflict markers, ignore
   // it.
   if (!CurrentConflictMarkerState || isLexingRawMode())
     return false;
-  
+
   // Check to see if we have the marker (4 characters in a row).
   for (unsigned i = 1; i != 4; ++i)
     if (CurPtr[i] != CurPtr[0])
       return false;
-  
+
   // If we do have it, search for the end of the conflict marker.  This could
   // fail if it got skipped with a '#if 0' or something.  Note that CurPtr might
   // be the end of conflict marker.
   if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
                                         CurrentConflictMarkerState)) {
     CurPtr = End;
-    
+
     // Skip ahead to the end of line.
     while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
       ++CurPtr;
-    
+
     BufferPtr = CurPtr;
-    
+
     // No longer in the conflict marker.
     CurrentConflictMarkerState = CMK_None;
     return true;
   }
-  
+
   return false;
 }
 
@@ -2886,7 +2981,6 @@
     }
 
     return 0;
-
   } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
     // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't.
     // We don't use isLexingRawMode() here because we need to diagnose bad
@@ -3056,7 +3150,7 @@
     // We know the lexer hasn't changed, so just try again with this lexer.
     // (We manually eliminate the tail call to avoid recursion.)
     goto LexNextToken;
-      
+
   case 26:  // DOS & CP/M EOF: "^Z".
     // If we're in Microsoft extensions mode, treat this as end of file.
     if (LangOpts.MicrosoftExt) {
@@ -3068,7 +3162,7 @@
     // If Microsoft extensions are disabled, this is just random garbage.
     Kind = tok::unknown;
     break;
-      
+
   case '\r':
     if (CurPtr[0] == '\n')
       Char = getAndAdvanceChar(CurPtr, Result);
@@ -3131,7 +3225,7 @@
     // We only saw whitespace, so just try again with this lexer.
     // (We manually eliminate the tail call to avoid recursion.)
     goto LexNextToken;
-      
+
   // C99 6.4.4.1: Integer Constants.
   // C99 6.4.4.2: Floating Constants.
   case '0': case '1': case '2': case '3': case '4':
@@ -3174,7 +3268,7 @@
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                            SizeTmp2, Result),
                                tok::utf8_string_literal);
-        if (Char2 == '\'' && LangOpts.CPlusPlus1z)
+        if (Char2 == '\'' && LangOpts.CPlusPlus17)
           return LexCharConstant(
               Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                   SizeTmp2, Result),
@@ -3518,6 +3612,24 @@
         Kind = tok::lessless;
       }
     } else if (Char == '=') {
+      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      if (After == '>') {
+        if (getLangOpts().CPlusPlus2a) {
+          if (!isLexingRawMode())
+            Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
+          CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                               SizeTmp2, Result);
+          Kind = tok::spaceship;
+          break;
+        }
+        // Suggest adding a space between the '<=' and the '>' to avoid a
+        // change in semantics if this turns up in C++ <=17 mode.
+        if (getLangOpts().CPlusPlus && !isLexingRawMode()) {
+          Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
+            << FixItHint::CreateInsertion(
+                   getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
+        }
+      }
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::lessequal;
     } else if (LangOpts.Digraphs && Char == ':') {     // '<:' -> '['
@@ -3630,7 +3742,7 @@
       // If this is '====' and we're in a conflict marker, ignore it.
       if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
         goto LexNextToken;
-      
+
       Kind = tok::equalequal;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else {
@@ -3701,6 +3813,7 @@
     // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
     // an escaped newline.
     --CurPtr;
+    const char *UTF8StartPtr = CurPtr;
     llvm::ConversionResult Status =
         llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
                                   (const llvm::UTF8 *)BufferEnd,
@@ -3715,9 +3828,12 @@
         // (We manually eliminate the tail call to avoid recursion.)
         goto LexNextToken;
       }
+      if (!isLexingRawMode())
+        maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint,
+                                   makeCharRange(*this, UTF8StartPtr, CurPtr));
       return LexUnicode(Result, CodePoint, CurPtr);
     }
-    
+
     if (isLexingRawMode() || ParsingPreprocessorDirective ||
         PP->isPreprocessedOutput()) {
       ++CurPtr;
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 5f6e5ef..ec342ee 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -738,15 +738,17 @@
     s++;
     radix = 10;
     saw_exponent = true;
-    if (*s == '+' || *s == '-')  s++; // sign
+    if (s != ThisTokEnd && (*s == '+' || *s == '-'))  s++; // sign
     const char *first_non_digit = SkipDigits(s);
     if (containsDigits(s, first_non_digit)) {
       checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = first_non_digit;
     } else {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
-              diag::err_exponent_has_no_digits);
-      hadError = true;
+      if (!hadError) {
+        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+                diag::err_exponent_has_no_digits);
+        hadError = true;
+      }
       return;
     }
   }
@@ -787,10 +789,12 @@
   } else if (Pos == ThisTokEnd)
     return;
 
-  if (isDigitSeparator(*Pos))
+  if (isDigitSeparator(*Pos)) {
     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
             diag::err_digit_separator_not_between_digits)
       << IsAfterDigits;
+    hadError = true;
+  }
 }
 
 /// ParseNumberStartingWithZero - This method is called when the first character
@@ -840,12 +844,14 @@
       const char *Exponent = s;
       s++;
       saw_exponent = true;
-      if (*s == '+' || *s == '-')  s++; // sign
+      if (s != ThisTokEnd && (*s == '+' || *s == '-'))  s++; // sign
       const char *first_non_digit = SkipDigits(s);
       if (!containsDigits(s, first_non_digit)) {
-        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
-                diag::err_exponent_has_no_digits);
-        hadError = true;
+        if (!hadError) {
+          PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+                  diag::err_exponent_has_no_digits);
+          hadError = true;
+        }
         return;
       }
       checkSeparator(TokLoc, s, CSK_BeforeDigits);
@@ -855,7 +861,7 @@
         PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus
                             ? diag::ext_hex_literal_invalid
                             : diag::ext_hex_constant_invalid);
-      else if (PP.getLangOpts().CPlusPlus1z)
+      else if (PP.getLangOpts().CPlusPlus17)
         PP.Diag(TokLoc, diag::warn_cxx17_hex_literal);
     } else if (saw_period) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 5c0f062..df3f8a4 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -49,7 +49,8 @@
   if (!ResultEnt) {
     // Allocate memory for a MacroArgs object with the lexer tokens at the end,
     // and construct the MacroArgs object.
-    Result = new (std::malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
+    Result = new (
+        llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
         MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
   } else {
     Result = *ResultEnt;
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 6dc7841..b13767a 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -1,4 +1,4 @@
-//===--- MacroInfo.cpp - Information about #defined identifiers -----------===//
+//===- MacroInfo.cpp - Information about #defined identifiers -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,25 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
 using namespace clang;
 
 MacroInfo::MacroInfo(SourceLocation DefLoc)
-  : Location(DefLoc),
-    ParameterList(nullptr),
-    NumParameters(0),
-    IsDefinitionLengthCached(false),
-    IsFunctionLike(false),
-    IsC99Varargs(false),
-    IsGNUVarargs(false),
-    IsBuiltinMacro(false),
-    HasCommaPasting(false),
-    IsDisabled(false),
-    IsUsed(false),
-    IsAllowRedefinitionsWithoutWarning(false),
-    IsWarnIfUnused(false),
-    UsedForHeaderGuard(false) {
-}
+    : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
+      IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
+      HasCommaPasting(false), IsDisabled(false), IsUsed(false),
+      IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false),
+      UsedForHeaderGuard(false) {}
 
 unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const {
   assert(!IsDefinitionLengthCached);
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index fc7fe13..2ff0a68 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -1,4 +1,4 @@
-//===--- ModuleMap.cpp - Describe the layout of modules ---------*- C++ -*-===//
+//===- ModuleMap.cpp - Describe the layout of modules ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,29 +11,47 @@
 // of a module as it relates to headers.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Basic/TargetOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
-#include <stdlib.h>
-#if defined(LLVM_ON_UNIX)
-#include <limits.h>
-#endif
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <system_error>
+#include <utility>
+
 using namespace clang;
 
 Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) {
@@ -80,7 +98,7 @@
   // Resolve the module-id.
   Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain);
   if (!Context)
-    return Module::ExportDecl();
+    return {};
 
   return Module::ExportDecl(Context, Unresolved.Wildcard);
 }
@@ -263,6 +281,8 @@
 ModuleMap::~ModuleMap() {
   for (auto &M : Modules)
     delete M.getValue();
+  for (auto *M : ShadowModules)
+    delete M;
 }
 
 void ModuleMap::setTarget(const TargetInfo &Target) {
@@ -344,7 +364,7 @@
 ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File,
                     SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs) {
   if (UmbrellaDirs.empty())
-    return KnownHeader();
+    return {};
 
   const DirectoryEntry *Dir = File->getDir();
   assert(Dir && "file in no directory");
@@ -372,7 +392,7 @@
     // Resolve the parent path to a directory entry.
     Dir = SourceMgr.getFileManager().getDirectory(DirName);
   } while (Dir);
-  return KnownHeader();
+  return {};
 }
 
 static bool violatesPrivateInclude(Module *RequestingModule,
@@ -455,7 +475,7 @@
   // We have found a module, but we don't use it.
   if (NotUsed) {
     Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module)
-        << RequestingModule->getFullModuleName() << Filename;
+        << RequestingModule->getTopLevelModule()->Name << Filename;
     return;
   }
 
@@ -466,7 +486,7 @@
 
   if (LangOpts.ModulesStrictDeclUse) {
     Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module)
-        << RequestingModule->getFullModuleName() << Filename;
+        << RequestingModule->getTopLevelModule()->Name << Filename;
   } else if (RequestingModule && RequestingModuleIsModuleInterface &&
              LangOpts.isCompilingModule()) {
     // Do not diagnose when we are not compiling a module. 
@@ -502,7 +522,7 @@
                                                       bool AllowTextual) {
   auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader {
     if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader)
-      return ModuleMap::KnownHeader();
+      return {};
     return R;
   };
 
@@ -592,7 +612,7 @@
     return Header;
   }
 
-  return KnownHeader();
+  return {};
 }
 
 ArrayRef<ModuleMap::KnownHeader>
@@ -733,7 +753,7 @@
   // Try to find an existing module with this name.
   if (Module *Sub = lookupModuleQualified(Name, Parent))
     return std::make_pair(Sub, false);
-  
+
   // Create a new module with this name.
   Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework,
                               IsExplicit, NumCreatedModules++);
@@ -741,6 +761,7 @@
     if (LangOpts.CurrentModule == Name)
       SourceModule = Result;
     Modules[Name] = Result;
+    ModuleScopeIDs[Result] = CurrentModuleScopeID;
   }
   return std::make_pair(Result, true);
 }
@@ -909,6 +930,7 @@
     if (LangOpts.CurrentModule == ModuleName)
       SourceModule = Result;
     Modules[ModuleName] = Result;
+    ModuleScopeIDs[Result] = CurrentModuleScopeID;
   }
 
   Result->IsSystem |= Attrs.IsSystem;
@@ -981,6 +1003,21 @@
   return Result;
 }
 
+Module *ModuleMap::createShadowedModule(StringRef Name, bool IsFramework,
+                                        Module *ShadowingModule) {
+
+  // Create a new module with this name.
+  Module *Result =
+      new Module(Name, SourceLocation(), /*Parent=*/nullptr, IsFramework,
+                 /*IsExplicit=*/false, NumCreatedModules++);
+  Result->ShadowingModule = ShadowingModule;
+  Result->IsAvailable = false;
+  ModuleScopeIDs[Result] = CurrentModuleScopeID;
+  ShadowModules.push_back(Result);
+
+  return Result;
+}
+
 void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader,
                                   Twine NameAsWritten) {
   Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader));
@@ -1188,6 +1225,7 @@
 //----------------------------------------------------------------------------//
 
 namespace clang {
+
   /// \brief A token in a module map file.
   struct MMToken {
     enum TokenKind {
@@ -1226,6 +1264,7 @@
     union {
       // If Kind != IntegerLiteral.
       const char *StringData;
+
       // If Kind == IntegerLiteral.
       uint64_t IntegerValue;
     };
@@ -1275,7 +1314,7 @@
     bool IsSystem;
     
     /// \brief Whether an error occurred.
-    bool HadError;
+    bool HadError = false;
         
     /// \brief Stores string data for the various string literals referenced
     /// during parsing.
@@ -1285,7 +1324,7 @@
     MMToken Tok;
     
     /// \brief The active module.
-    Module *ActiveModule;
+    Module *ActiveModule = nullptr;
 
     /// \brief Whether a module uses the 'requires excluded' hack to mark its
     /// contents as 'textual'.
@@ -1299,18 +1338,18 @@
 
     /// \brief Consume the current token and return its location.
     SourceLocation consumeToken();
-    
+
     /// \brief Skip tokens until we reach the a token with the given kind
     /// (or the end of the file).
     void skipUntil(MMToken::TokenKind K);
 
-    typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId;
+    using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>;
+
     bool parseModuleId(ModuleId &Id);
     void parseModuleDecl();
     void parseExternModuleDecl();
     void parseRequiresDecl();
-    void parseHeaderDecl(clang::MMToken::TokenKind,
-                         SourceLocation LeadingLoc);
+    void parseHeaderDecl(MMToken::TokenKind, SourceLocation LeadingLoc);
     void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc);
     void parseExportDecl();
     void parseExportAsDecl();
@@ -1320,31 +1359,29 @@
     void parseConflict();
     void parseInferredModuleDecl(bool Framework, bool Explicit);
 
-    typedef ModuleMap::Attributes Attributes;
+    using Attributes = ModuleMap::Attributes;
+
     bool parseOptionalAttributes(Attributes &Attrs);
     
   public:
-    explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr, 
-                             const TargetInfo *Target,
-                             DiagnosticsEngine &Diags,
-                             ModuleMap &Map,
-                             const FileEntry *ModuleMapFile,
-                             const DirectoryEntry *Directory,
-                             bool IsSystem)
-      : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), 
-        ModuleMapFile(ModuleMapFile), Directory(Directory),
-        IsSystem(IsSystem), HadError(false), ActiveModule(nullptr)
-    {
+    explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr,
+                             const TargetInfo *Target, DiagnosticsEngine &Diags,
+                             ModuleMap &Map, const FileEntry *ModuleMapFile,
+                             const DirectoryEntry *Directory, bool IsSystem)
+        : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map),
+          ModuleMapFile(ModuleMapFile), Directory(Directory),
+          IsSystem(IsSystem) {
       Tok.clear();
       consumeToken();
     }
-    
+
     bool parseModuleMapFile();
 
     bool terminatedByDirective() { return false; }
     SourceLocation getLocation() { return Tok.getLocation(); }
   };
-}
+
+} // namespace clang
 
 SourceLocation ModuleMapParser::consumeToken() {
   SourceLocation Result = Tok.getLocation();
@@ -1566,19 +1603,73 @@
 }
 
 namespace {
+
   /// \brief Enumerates the known attributes.
   enum AttributeKind {
     /// \brief An unknown attribute.
     AT_unknown,
+
     /// \brief The 'system' attribute.
     AT_system,
+
     /// \brief The 'extern_c' attribute.
     AT_extern_c,
+
     /// \brief The 'exhaustive' attribute.
     AT_exhaustive,
+
     /// \brief The 'no_undeclared_includes' attribute.
     AT_no_undeclared_includes
   };
+
+} // namespace
+
+/// Private modules are canonicalized as Foo_Private. Clang provides extra
+/// module map search logic to find the appropriate private module when PCH
+/// is used with implicit module maps. Warn when private modules are written
+/// in other ways (FooPrivate and Foo.Private), providing notes and fixits.
+static void diagnosePrivateModules(const ModuleMap &Map,
+                                   DiagnosticsEngine &Diags,
+                                   const Module *ActiveModule) {
+
+  auto GenNoteAndFixIt = [&](StringRef BadName, StringRef Canonical,
+                             const Module *M) {
+    auto D = Diags.Report(ActiveModule->DefinitionLoc,
+                          diag::note_mmap_rename_top_level_private_module);
+    D << BadName << M->Name;
+    D << FixItHint::CreateReplacement(ActiveModule->DefinitionLoc, Canonical);
+  };
+
+  for (auto E = Map.module_begin(); E != Map.module_end(); ++E) {
+    auto const *M = E->getValue();
+    if (M->Directory != ActiveModule->Directory)
+      continue;
+
+    SmallString<128> FullName(ActiveModule->getFullModuleName());
+    if (!FullName.startswith(M->Name) && !FullName.endswith("Private"))
+      continue;
+    SmallString<128> Canonical(M->Name);
+    Canonical.append("_Private");
+
+    // Foo.Private -> Foo_Private
+    if (ActiveModule->Parent && ActiveModule->Name == "Private" && !M->Parent &&
+        M->Name == ActiveModule->Parent->Name) {
+      Diags.Report(ActiveModule->DefinitionLoc,
+                   diag::warn_mmap_mismatched_private_submodule)
+          << FullName;
+      GenNoteAndFixIt(FullName, Canonical, M);
+      continue;
+    }
+
+    // FooPrivate and whatnots -> Foo_Private
+    if (!ActiveModule->Parent && !M->Parent && M->Name != ActiveModule->Name &&
+        ActiveModule->Name != Canonical) {
+      Diags.Report(ActiveModule->DefinitionLoc,
+                   diag::warn_mmap_mismatched_private_module_name)
+          << ActiveModule->Name;
+      GenNoteAndFixIt(ActiveModule->Name, Canonical, M);
+    }
+  }
 }
 
 /// \brief Parse a module declaration.
@@ -1702,7 +1793,6 @@
   if (parseOptionalAttributes(Attrs))
     return;
 
-  
   // Parse the opening brace.
   if (!Tok.is(MMToken::LBrace)) {
     Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace)
@@ -1713,6 +1803,7 @@
   SourceLocation LBraceLoc = consumeToken();
   
   // Determine whether this (sub)module has already been defined.
+  Module *ShadowingModule = nullptr;
   if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) {
     // We might see a (re)definition of a module that we already have a
     // definition for in two cases:
@@ -1738,23 +1829,35 @@
       }
       return;
     }
-    
-    Diags.Report(ModuleNameLoc, diag::err_mmap_module_redefinition)
-      << ModuleName;
-    Diags.Report(Existing->DefinitionLoc, diag::note_mmap_prev_definition);
-    
-    // Skip the module definition.
-    skipUntil(MMToken::RBrace);
-    if (Tok.is(MMToken::RBrace))
-      consumeToken();
-    
-    HadError = true;
-    return;
+
+    if (!Existing->Parent && Map.mayShadowNewModule(Existing)) {
+      ShadowingModule = Existing;
+    } else {
+      // This is not a shawdowed module decl, it is an illegal redefinition.
+      Diags.Report(ModuleNameLoc, diag::err_mmap_module_redefinition)
+          << ModuleName;
+      Diags.Report(Existing->DefinitionLoc, diag::note_mmap_prev_definition);
+
+      // Skip the module definition.
+      skipUntil(MMToken::RBrace);
+      if (Tok.is(MMToken::RBrace))
+        consumeToken();
+
+      HadError = true;
+      return;
+    }
   }
 
   // Start defining this module.
-  ActiveModule = Map.findOrCreateModule(ModuleName, ActiveModule, Framework,
-                                        Explicit).first;
+  if (ShadowingModule) {
+    ActiveModule =
+        Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
+  } else {
+    ActiveModule =
+        Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
+            .first;
+  }
+
   ActiveModule->DefinitionLoc = ModuleNameLoc;
   if (Attrs.IsSystem || IsSystem)
     ActiveModule->IsSystem = true;
@@ -1765,41 +1868,21 @@
     ActiveModule->NoUndeclaredIncludes = true;
   ActiveModule->Directory = Directory;
 
-  if (!ActiveModule->Parent) {
-    StringRef MapFileName(ModuleMapFile->getName());
-    if (MapFileName.endswith("module.private.modulemap") ||
-        MapFileName.endswith("module_private.map")) {
-      // Adding a top-level module from a private modulemap is likely a
-      // user error; we check to see if there's another top-level module
-      // defined in the non-private map in the same dir, and if so emit a
-      // warning.
-      for (auto E = Map.module_begin(); E != Map.module_end(); ++E) {
-        auto const *M = E->getValue();
-        if (!M->Parent &&
-            M->Directory == ActiveModule->Directory &&
-            M->Name != ActiveModule->Name) {
-          Diags.Report(ActiveModule->DefinitionLoc,
-                       diag::warn_mmap_mismatched_top_level_private)
-            << ActiveModule->Name << M->Name;
-          // The pattern we're defending against here is typically due to
-          // a module named FooPrivate which is supposed to be a submodule
-          // called Foo.Private. Emit a fixit in that case.
-          auto D =
-            Diags.Report(ActiveModule->DefinitionLoc,
-                         diag::note_mmap_rename_top_level_private_as_submodule);
-          D << ActiveModule->Name << M->Name;
-          StringRef Bad(ActiveModule->Name);
-          if (Bad.consume_back("Private")) {
-            SmallString<128> Fixed = Bad;
-            Fixed.append(".Private");
-            D << FixItHint::CreateReplacement(ActiveModule->DefinitionLoc,
-                                              Fixed);
-          }
-          break;
-        }
-      }
-    }
-  }
+
+  // Private modules named as FooPrivate, Foo.Private or similar are likely a
+  // user error; provide warnings, notes and fixits to direct users to use
+  // Foo_Private instead.
+  SourceLocation StartLoc =
+      SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
+  StringRef MapFileName(ModuleMapFile->getName());
+  if (Map.HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps &&
+      !Diags.isIgnored(diag::warn_mmap_mismatched_private_submodule,
+                       StartLoc) &&
+      !Diags.isIgnored(diag::warn_mmap_mismatched_private_module_name,
+                       StartLoc) &&
+      (MapFileName.endswith("module.private.modulemap") ||
+       MapFileName.endswith("module_private.map")))
+    diagnosePrivateModules(Map, Diags, ActiveModule);
 
   bool Done = false;
   do {
@@ -2574,7 +2657,7 @@
       Done = true;
       break;
 
-    case MMToken::ExcludeKeyword: {
+    case MMToken::ExcludeKeyword:
       if (ActiveModule) {
         Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member)
           << (ActiveModule != nullptr);
@@ -2593,7 +2676,6 @@
         .push_back(Tok.getString());
       consumeToken();
       break;
-    }
 
     case MMToken::ExportKeyword:
       if (!ActiveModule) {
@@ -2801,5 +2883,6 @@
   // Notify callbacks that we parsed it.
   for (const auto &Cb : Callbacks)
     Cb->moduleMapFileRead(Start, *File, IsSystem);
+
   return Result;
 }
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index f5e8cdc..9758557 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -105,8 +105,10 @@
 }
 
 void Preprocessor::EnterCachingLexMode() {
-  if (InCachingLexMode())
+  if (InCachingLexMode()) {
+    assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
     return;
+  }
 
   PushIncludeMacroStack();
   CurLexerKind = CLK_CachingLexer;
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index f9a9750..a901a20 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -115,6 +115,24 @@
   return false;
 }
 
+// The -fmodule-name option (represented here by \p CurrentModule) tells the
+// compiler to textually include headers in the specified module, meaning clang
+// won't build the specified module. This is useful in a number of situations,
+// for instance, when building a library that vends a module map, one might want
+// to avoid hitting intermediate build products containig the the module map or
+// avoid finding the system installed modulemap for that library.
+static bool isForModuleBuilding(Module *M, StringRef CurrentModule) {
+  StringRef TopLevelName = M->getTopLevelModuleName();
+
+  // When building framework Foo, we wanna make sure that Foo *and* Foo_Private
+  // are textually included and no modules are built for both.
+  if (M->getTopLevelModule()->IsFramework &&
+      !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private"))
+    TopLevelName = TopLevelName.drop_back(8);
+
+  return TopLevelName == CurrentModule;
+}
+
 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
   const LangOptions &Lang = PP.getLangOpts();
   StringRef Text = II->getName();
@@ -350,7 +368,7 @@
 /// If ElseOk is true, then \#else directives are ok, if not, then we have
 /// already seen one so a \#else directive is a duplicate.  When this returns,
 /// the caller can lex the first valid token.
-void Preprocessor::SkipExcludedConditionalBlock(const Token &HashToken,
+void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
                                                 SourceLocation IfTokenLoc,
                                                 bool FoundNonSkipPortion,
                                                 bool FoundElse,
@@ -358,8 +376,11 @@
   ++NumSkipped;
   assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");
 
-  CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
-                                 FoundNonSkipPortion, FoundElse);
+  if (PreambleConditionalStack.reachedEOFWhileSkipping())
+    PreambleConditionalStack.clearSkipInfo();
+  else
+    CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
+                                     FoundNonSkipPortion, FoundElse);
 
   if (CurPTHLexer) {
     PTHSkipExcludedConditionalBlock();
@@ -385,6 +406,9 @@
       // We don't emit errors for unterminated conditionals here,
       // Lexer::LexEndOfFile can do that propertly.
       // Just return and let the caller lex after this #include.
+      if (PreambleConditionalStack.isRecording())
+        PreambleConditionalStack.SkipInfo.emplace(
+            HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
       break;
     }
 
@@ -552,9 +576,11 @@
   // the #if block.
   CurPPLexer->LexingRawMode = false;
 
-  if (Callbacks)
+  // The last skipped range isn't actually skipped yet if it's truncated
+  // by the end of the preamble; we'll resume parsing after the preamble.
+  if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
     Callbacks->SourceRangeSkipped(
-        SourceRange(HashToken.getLocation(), CurPPLexer->getSourceLocation()),
+        SourceRange(HashTokenLoc, CurPPLexer->getSourceLocation()),
         Tok.getLocation());
 }
 
@@ -1649,12 +1675,18 @@
                                           DiagnosticsEngine &Diags, Module *M) {
   Module::Requirement Requirement;
   Module::UnresolvedHeaderDirective MissingHeader;
-  if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader))
+  Module *ShadowingModule = nullptr;
+  if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
+                     ShadowingModule))
     return false;
 
   if (MissingHeader.FileNameLoc.isValid()) {
     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
         << MissingHeader.IsUmbrella << MissingHeader.FileName;
+  } else if (ShadowingModule) {
+    Diags.Report(M->DefinitionLoc, diag::err_module_shadowed) << M->Name;
+    Diags.Report(ShadowingModule->DefinitionLoc,
+                 diag::note_previous_definition);
   } else {
     // FIXME: Track the location at which the requirement was specified, and
     // use it here.
@@ -1842,8 +1874,8 @@
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
   if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules &&
-      SuggestedModule.getModule()->getTopLevelModuleName() !=
-          getLangOpts().CurrentModule) {
+      !isForModuleBuilding(SuggestedModule.getModule(),
+                           getLangOpts().CurrentModule)) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
@@ -1990,7 +2022,7 @@
       // ShouldEnter is false because we are skipping the header. In that
       // case, We are not importing the specified module.
       if (SkipHeader && getLangOpts().CompilingPCH &&
-          M->getTopLevelModuleName() == getLangOpts().CurrentModule)
+          isForModuleBuilding(M, getLangOpts().CurrentModule))
         return;
 
       makeModuleVisible(M, HashLoc);
@@ -2018,11 +2050,20 @@
 
   // Determine if we're switching to building a new submodule, and which one.
   if (auto *M = SuggestedModule.getModule()) {
+    if (M->getTopLevelModule()->ShadowingModule) {
+      // We are building a submodule that belongs to a shadowed module. This
+      // means we find header files in the shadowed module.
+      Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
+        << M->getFullModuleName();
+      Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
+           diag::note_previous_definition);
+      return;
+    }
     // When building a pch, -fmodule-name tells the compiler to textually
     // include headers in the specified module. We are not building the
     // specified module.
     if (getLangOpts().CompilingPCH &&
-        M->getTopLevelModuleName() == getLangOpts().CurrentModule)
+        isForModuleBuilding(M, getLangOpts().CurrentModule))
       return;
 
     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
@@ -2676,7 +2717,8 @@
   if (MacroNameTok.is(tok::eod)) {
     // Skip code until we get to #endif.  This helps with recovery by not
     // emitting an error when the #endif is reached.
-    SkipExcludedConditionalBlock(HashToken, DirectiveTok.getLocation(),
+    SkipExcludedConditionalBlock(HashToken.getLocation(),
+                                 DirectiveTok.getLocation(),
                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
     return;
   }
@@ -2725,7 +2767,8 @@
                                      /*foundelse*/false);
   } else {
     // No, skip the contents of this block.
-    SkipExcludedConditionalBlock(HashToken, DirectiveTok.getLocation(),
+    SkipExcludedConditionalBlock(HashToken.getLocation(),
+                                 DirectiveTok.getLocation(),
                                  /*Foundnonskip*/ false,
                                  /*FoundElse*/ false);
   }
@@ -2772,7 +2815,7 @@
                                    /*foundnonskip*/true, /*foundelse*/false);
   } else {
     // No, skip the contents of this block.
-    SkipExcludedConditionalBlock(HashToken, IfToken.getLocation(),
+    SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
                                  /*Foundnonskip*/ false,
                                  /*FoundElse*/ false);
   }
@@ -2837,7 +2880,8 @@
   }
 
   // Finally, skip the rest of the contents of this block.
-  SkipExcludedConditionalBlock(HashToken, CI.IfLoc, /*Foundnonskip*/ true,
+  SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
+                               /*Foundnonskip*/ true,
                                /*FoundElse*/ true, Result.getLocation());
 }
 
@@ -2881,7 +2925,7 @@
   }
 
   // Finally, skip the rest of the contents of this block.
-  SkipExcludedConditionalBlock(HashToken, CI.IfLoc, /*Foundnonskip*/ true,
-                               /*FoundElse*/ CI.FoundElse,
-                               ElifToken.getLocation());
+  SkipExcludedConditionalBlock(
+      HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
+      /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
 }
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index e484e9c..f217873 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -444,6 +444,7 @@
       }
 
       CurPPLexer = nullptr;
+      recomputeCurLexerKind();
       return true;
     }
 
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index b0330f2..7f524c5 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -369,11 +369,17 @@
   Ident__has_extension    = RegisterBuiltinMacro(*this, "__has_extension");
   Ident__has_builtin      = RegisterBuiltinMacro(*this, "__has_builtin");
   Ident__has_attribute    = RegisterBuiltinMacro(*this, "__has_attribute");
+  Ident__has_c_attribute  = RegisterBuiltinMacro(*this, "__has_c_attribute");
   Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
   Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include");
   Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
   Ident__has_warning      = RegisterBuiltinMacro(*this, "__has_warning");
   Ident__is_identifier    = RegisterBuiltinMacro(*this, "__is_identifier");
+  Ident__is_target_arch   = RegisterBuiltinMacro(*this, "__is_target_arch");
+  Ident__is_target_vendor = RegisterBuiltinMacro(*this, "__is_target_vendor");
+  Ident__is_target_os     = RegisterBuiltinMacro(*this, "__is_target_os");
+  Ident__is_target_environment =
+      RegisterBuiltinMacro(*this, "__is_target_environment");
 
   // Modules.
   Ident__building_module  = RegisterBuiltinMacro(*this, "__building_module");
@@ -1098,6 +1104,8 @@
       .Case("address_sanitizer",
             LangOpts.Sanitize.hasOneOf(SanitizerKind::Address |
                                        SanitizerKind::KernelAddress))
+      .Case("hwaddress_sanitizer",
+            LangOpts.Sanitize.hasOneOf(SanitizerKind::HWAddress))
       .Case("assume_nonnull", true)
       .Case("attribute_analyzer_noreturn", true)
       .Case("attribute_availability", true)
@@ -1135,12 +1143,15 @@
       .Case("nullability_on_arrays", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
-      .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
+      .Case("dataflow_sanitizer",
+            LangOpts.Sanitize.has(SanitizerKind::DataFlow))
       .Case("efficiency_sanitizer",
             LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency))
+      .Case("scudo", LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
+      .Case("objc_arc_fields", true)
       .Case("objc_arc_weak", LangOpts.ObjCWeak)
       .Case("objc_default_synthesize_properties", LangOpts.ObjC2)
       .Case("objc_fixed_enum", LangOpts.ObjC2)
@@ -1588,6 +1599,56 @@
   return nullptr;
 }
 
+/// Implements the __is_target_arch builtin macro.
+static bool isTargetArch(const TargetInfo &TI, const IdentifierInfo *II) {
+  std::string ArchName = II->getName().lower() + "--";
+  llvm::Triple Arch(ArchName);
+  const llvm::Triple &TT = TI.getTriple();
+  if (TT.isThumb()) {
+    // arm matches thumb or thumbv7. armv7 matches thumbv7.
+    if ((Arch.getSubArch() == llvm::Triple::NoSubArch ||
+         Arch.getSubArch() == TT.getSubArch()) &&
+        ((TT.getArch() == llvm::Triple::thumb &&
+          Arch.getArch() == llvm::Triple::arm) ||
+         (TT.getArch() == llvm::Triple::thumbeb &&
+          Arch.getArch() == llvm::Triple::armeb)))
+      return true;
+  }
+  // Check the parsed arch when it has no sub arch to allow Clang to
+  // match thumb to thumbv7 but to prohibit matching thumbv6 to thumbv7.
+  return (Arch.getSubArch() == llvm::Triple::NoSubArch ||
+          Arch.getSubArch() == TT.getSubArch()) &&
+         Arch.getArch() == TT.getArch();
+}
+
+/// Implements the __is_target_vendor builtin macro.
+static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) {
+  StringRef VendorName = TI.getTriple().getVendorName();
+  if (VendorName.empty())
+    VendorName = "unknown";
+  return VendorName.equals_lower(II->getName());
+}
+
+/// Implements the __is_target_os builtin macro.
+static bool isTargetOS(const TargetInfo &TI, const IdentifierInfo *II) {
+  std::string OSName =
+      (llvm::Twine("unknown-unknown-") + II->getName().lower()).str();
+  llvm::Triple OS(OSName);
+  if (OS.getOS() == llvm::Triple::Darwin) {
+    // Darwin matches macos, ios, etc.
+    return TI.getTriple().isOSDarwin();
+  }
+  return TI.getTriple().getOS() == OS.getOS();
+}
+
+/// Implements the __is_target_environment builtin macro.
+static bool isTargetEnvironment(const TargetInfo &TI,
+                                const IdentifierInfo *II) {
+  std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str();
+  llvm::Triple Env(EnvName);
+  return TI.getTriple().getEnvironment() == Env.getEnvironment();
+}
+
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
 /// as a builtin macro, handle it and return the next token as 'Tok'.
 void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1750,6 +1811,10 @@
                       .Case("__make_integer_seq", LangOpts.CPlusPlus)
                       .Case("__type_pack_element", LangOpts.CPlusPlus)
                       .Case("__builtin_available", true)
+                      .Case("__is_target_arch", true)
+                      .Case("__is_target_vendor", true)
+                      .Case("__is_target_os", true)
+                      .Case("__is_target_environment", true)
                       .Default(false);
         }
       });
@@ -1774,30 +1839,34 @@
         return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II,
                                  getTargetInfo(), getLangOpts()) : 0;
       });
-  } else if (II == Ident__has_cpp_attribute) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
-      [this](Token &Tok, bool &HasLexedNextToken) -> int {
-        IdentifierInfo *ScopeII = nullptr;
-        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
-                                           diag::err_feature_check_malformed);
-        if (!II)
-          return false;
+  } else if (II == Ident__has_cpp_attribute ||
+             II == Ident__has_c_attribute) {
+    bool IsCXX = II == Ident__has_cpp_attribute;
+    EvaluateFeatureLikeBuiltinMacro(
+        OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int {
+          IdentifierInfo *ScopeII = nullptr;
+          IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+              Tok, *this, diag::err_feature_check_malformed);
+          if (!II)
+            return false;
 
-        // It is possible to receive a scope token.  Read the "::", if it is
-        // available, and the subsequent identifier.
-        LexUnexpandedToken(Tok);
-        if (Tok.isNot(tok::coloncolon))
-          HasLexedNextToken = true;
-        else {
-          ScopeII = II;
+          // It is possible to receive a scope token.  Read the "::", if it is
+          // available, and the subsequent identifier.
           LexUnexpandedToken(Tok);
-          II = ExpectFeatureIdentifierInfo(Tok, *this,
-                                           diag::err_feature_check_malformed);
-        }
+          if (Tok.isNot(tok::coloncolon))
+            HasLexedNextToken = true;
+          else {
+            ScopeII = II;
+            LexUnexpandedToken(Tok);
+            II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                             diag::err_feature_check_malformed);
+          }
 
-        return II ? hasAttribute(AttrSyntax::CXX, ScopeII, II,
-                                 getTargetInfo(), getLangOpts()) : 0;
-      });
+          AttrSyntax Syntax = IsCXX ? AttrSyntax::CXX : AttrSyntax::C;
+          return II ? hasAttribute(Syntax, ScopeII, II, getTargetInfo(),
+                                   getLangOpts())
+                    : 0;
+        });
   } else if (II == Ident__has_include ||
              II == Ident__has_include_next) {
     // The argument to these two builtins should be a parenthesized
@@ -1897,6 +1966,34 @@
       Diag(LParenLoc, diag::note_matching) << tok::l_paren;
     }
     return;
+  } else if (II == Ident__is_target_arch) {
+    EvaluateFeatureLikeBuiltinMacro(
+        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+          IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+              Tok, *this, diag::err_feature_check_malformed);
+          return II && isTargetArch(getTargetInfo(), II);
+        });
+  } else if (II == Ident__is_target_vendor) {
+    EvaluateFeatureLikeBuiltinMacro(
+        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+          IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+              Tok, *this, diag::err_feature_check_malformed);
+          return II && isTargetVendor(getTargetInfo(), II);
+        });
+  } else if (II == Ident__is_target_os) {
+    EvaluateFeatureLikeBuiltinMacro(
+        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+          IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+              Tok, *this, diag::err_feature_check_malformed);
+          return II && isTargetOS(getTargetInfo(), II);
+        });
+  } else if (II == Ident__is_target_environment) {
+    EvaluateFeatureLikeBuiltinMacro(
+        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+          IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+              Tok, *this, diag::err_feature_check_malformed);
+          return II && isTargetEnvironment(getTargetInfo(), II);
+        });
   } else {
     llvm_unreachable("Unknown identifier!");
   }
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index ec806e8..be6f684 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -1,4 +1,4 @@
-//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
+//===- PTHLexer.cpp - Lex from a token stream -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,19 +12,32 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/PTHLexer.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemStatCache.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/Token.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/EndianStream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/OnDiskHashTable.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
 #include <memory>
-#include <system_error>
+#include <utility>
+
 using namespace clang;
 
 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
@@ -35,9 +48,8 @@
 
 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
                    const unsigned char *ppcond, PTHManager &PM)
-  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
-    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
-
+    : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), PPCond(ppcond),
+      CurPPCondPtr(ppcond), PTHMgr(PM) {
   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
 }
 
@@ -133,7 +145,7 @@
     ParsingPreprocessorDirective = false; // Done parsing the "line".
     return true;  // Have a token.
   }
-  
+
   assert(!LexingRawMode);
 
   // If we are in a #if directive, emit an error.
@@ -167,7 +179,7 @@
   // We don't need to actually reconstruct full tokens from the token buffer.
   // This saves some copies and it also reduces IdentifierInfo* lookup.
   const unsigned char* p = CurPtr;
-  while (1) {
+  while (true) {
     // Read the token kind.  Are we at the end of the file?
     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
     if (x == tok::eof) break;
@@ -186,6 +198,7 @@
 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
 bool PTHLexer::SkipBlock() {
   using namespace llvm::support;
+
   assert(CurPPCondPtr && "No cached PP conditional information.");
   assert(LastHashTokPtr && "No known '#' token.");
 
@@ -303,31 +316,33 @@
 ///  to map from FileEntry objects managed by FileManager to offsets within
 ///  the PTH file.
 namespace {
+
 class PTHFileData {
   const uint32_t TokenOff;
   const uint32_t PPCondOff;
+
 public:
   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
-    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
+      : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
 
   uint32_t getTokenOffset() const { return TokenOff; }
   uint32_t getPPCondOffset() const { return PPCondOff; }
 };
 
-
 class PTHFileLookupCommonTrait {
 public:
-  typedef std::pair<unsigned char, StringRef> internal_key_type;
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using internal_key_type = std::pair<unsigned char, StringRef>;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
 
   static hash_value_type ComputeHash(internal_key_type x) {
-    return llvm::HashString(x.second);
+    return llvm::djbHash(x.second);
   }
 
   static std::pair<unsigned, unsigned>
   ReadKeyDataLength(const unsigned char*& d) {
     using namespace llvm::support;
+
     unsigned keyLen =
         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
     unsigned dataLen = (unsigned) *(d++);
@@ -340,12 +355,12 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
 public:
-  typedef const FileEntry* external_key_type;
-  typedef PTHFileData      data_type;
+  using external_key_type = const FileEntry *;
+  using data_type = PTHFileData;
 
   static internal_key_type GetInternalKey(const FileEntry* FE) {
     return std::make_pair((unsigned char) 0x1, FE->getName());
@@ -357,8 +372,9 @@
 
   static PTHFileData ReadData(const internal_key_type& k,
                               const unsigned char* d, unsigned) {
-    assert(k.first == 0x1 && "Only file lookups can match!");
     using namespace llvm::support;
+
+    assert(k.first == 0x1 && "Only file lookups can match!");
     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
     return PTHFileData(x, y);
@@ -367,11 +383,11 @@
 
 class PTHManager::PTHStringLookupTrait {
 public:
-  typedef uint32_t data_type;
-  typedef const std::pair<const char*, unsigned> external_key_type;
-  typedef external_key_type internal_key_type;
-  typedef uint32_t hash_value_type;
-  typedef unsigned offset_type;
+  using data_type = uint32_t;
+  using external_key_type = const std::pair<const char *, unsigned>;
+  using internal_key_type = external_key_type;
+  using hash_value_type = uint32_t;
+  using offset_type = unsigned;
 
   static bool EqualKey(const internal_key_type& a,
                        const internal_key_type& b) {
@@ -380,7 +396,7 @@
   }
 
   static hash_value_type ComputeHash(const internal_key_type& a) {
-    return llvm::HashString(StringRef(a.first, a.second));
+    return llvm::djbHash(StringRef(a.first, a.second));
   }
 
   // This hopefully will just get inlined and removed by the optimizer.
@@ -390,6 +406,7 @@
   static std::pair<unsigned, unsigned>
   ReadKeyDataLength(const unsigned char*& d) {
     using namespace llvm::support;
+
     return std::make_pair(
         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
         sizeof(uint32_t));
@@ -404,6 +421,7 @@
   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
                            unsigned) {
     using namespace llvm::support;
+
     return endian::readNext<uint32_t, little, unaligned>(d);
   }
 };
@@ -420,11 +438,10 @@
     const unsigned char *spellingBase, const char *originalSourceFile)
     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
-      StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
+      StringIdLookup(std::move(stringIdLookup)), NumIds(numIds),
       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
 
-PTHManager::~PTHManager() {
-}
+PTHManager::~PTHManager() = default;
 
 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
@@ -557,6 +574,7 @@
 
 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
   using namespace llvm::support;
+
   // Look in the PTH file for the string data for the IdentifierInfo object.
   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
   const unsigned char *IDData =
@@ -566,7 +584,7 @@
 
   // Allocate the object.
   std::pair<IdentifierInfo,const unsigned char*> *Mem =
-    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
+      Alloc.Allocate<std::pair<IdentifierInfo, const unsigned char *>>();
 
   Mem->second = IDData;
   assert(IDData[0] != '\0');
@@ -626,26 +644,26 @@
 //===----------------------------------------------------------------------===//
 
 namespace {
+
 class PTHStatData {
 public:
   uint64_t Size;
   time_t ModTime;
   llvm::sys::fs::UniqueID UniqueID;
-  const bool HasData;
+  const bool HasData = false;
   bool IsDirectory;
 
+  PTHStatData() = default;
   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
               bool IsDirectory)
       : Size(Size), ModTime(ModTime), UniqueID(UniqueID), HasData(true),
         IsDirectory(IsDirectory) {}
-
-  PTHStatData() : HasData(false) {}
 };
 
 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
 public:
-  typedef StringRef external_key_type; // const char*
-  typedef PTHStatData data_type;
+  using external_key_type = StringRef; // const char*
+  using data_type = PTHStatData;
 
   static internal_key_type GetInternalKey(StringRef path) {
     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
@@ -660,7 +678,6 @@
 
   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
                             unsigned) {
-
     if (k.first /* File or Directory */) {
       bool IsDirectory = true;
       if (k.first == 0x1 /* File */) {
@@ -682,11 +699,14 @@
     return data_type();
   }
 };
-} // end anonymous namespace
+
+} // namespace
 
 namespace clang {
+
 class PTHStatCache : public FileSystemStatCache {
-  typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
+  using CacheTy = llvm::OnDiskChainedHashTable<PTHStatLookupTrait>;
+
   CacheTy Cache;
 
 public:
@@ -720,7 +740,8 @@
     return CacheExists;
   }
 };
-}
+
+} // namespace clang
 
 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
   return llvm::make_unique<PTHStatCache>(*FileLookup);
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index b5d0f93..db32e45 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -1,4 +1,4 @@
-//===--- Pragma.cpp - Pragma registration and handling --------------------===//
+//===- Pragma.cpp - Pragma registration and handling ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,15 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Pragma.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorLexer.h"
@@ -30,25 +36,27 @@
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <limits>
 #include <string>
+#include <utility>
 #include <vector>
 
 using namespace clang;
 
 // Out-of-line destructor to provide a home for the class.
-PragmaHandler::~PragmaHandler() {
-}
+PragmaHandler::~PragmaHandler() = default;
 
 //===----------------------------------------------------------------------===//
 // EmptyPragmaHandler Implementation.
@@ -144,15 +152,14 @@
 class LexingFor_PragmaRAII {
   Preprocessor &PP;
   bool InMacroArgPreExpansion;
-  bool Failed;
+  bool Failed = false;
   Token &OutTok;
   Token PragmaTok;
 
 public:
   LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
                        Token &Tok)
-    : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion),
-      Failed(false), OutTok(Tok) {
+      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
     if (InMacroArgPreExpansion) {
       PragmaTok = OutTok;
       PP.EnableBacktrackAtThisPos();
@@ -186,13 +193,12 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
 /// return the first token after the directive.  The _Pragma token has just
 /// been read into 'Tok'.
 void Preprocessor::Handle_Pragma(Token &Tok) {
-
   // This works differently if we are pre-expanding a macro argument.
   // In that case we don't actually "activate" the pragma now, we only lex it
   // until we are sure it is lexically correct and then we backtrack so that
@@ -381,7 +387,6 @@
 }
 
 /// HandlePragmaOnce - Handle \#pragma once.  OnceTok is the 'once'.
-///
 void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
   // Don't honor the 'once' when handling the primary source file, unless
   // this is a prefix to a TU, which indicates we're generating a PCH file, or
@@ -406,7 +411,6 @@
 }
 
 /// HandlePragmaPoison - Handle \#pragma GCC poison.  PoisonTok is the 'poison'.
-///
 void Preprocessor::HandlePragmaPoison() {
   Token Tok;
 
@@ -461,7 +465,6 @@
   // Mark the file as a system header.
   HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry());
 
-
   PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation());
   if (PLoc.isInvalid())
     return;
@@ -482,7 +485,6 @@
 }
 
 /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
-///
 void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
   Token FilenameTok;
   CurPPLexer->LexIncludeFilename(FilenameTok);
@@ -623,7 +625,7 @@
   if (!IdentInfo) return;
 
   // Find the vector<MacroInfo*> associated with the macro.
-  llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> >::iterator iter =
+  llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>::iterator iter =
     PragmaPushMacroInfo.find(IdentInfo);
   if (iter != PragmaPushMacroInfo.end()) {
     // Forget the MacroInfo currently associated with IdentInfo.
@@ -962,6 +964,7 @@
 /// PragmaOnceHandler - "\#pragma once" marks the file as atomically included.
 struct PragmaOnceHandler : public PragmaHandler {
   PragmaOnceHandler() : PragmaHandler("once") {}
+
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &OnceTok) override {
     PP.CheckEndOfDirective("pragma once");
@@ -1050,6 +1053,20 @@
         PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument)
             << II->getName();
       }
+    } else if (II->isStr("diag_mapping")) {
+      Token DiagName;
+      PP.LexUnexpandedToken(DiagName);
+      if (DiagName.is(tok::eod))
+        PP.getDiagnostics().dump();
+      else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) {
+        StringLiteralParser Literal(DiagName, PP);
+        if (Literal.hadError)
+          return;
+        PP.getDiagnostics().dump(Literal.GetString());
+      } else {
+        PP.Diag(DiagName, diag::warn_pragma_debug_missing_argument)
+            << II->getName();
+      }
     } else if (II->isStr("llvm_fatal_error")) {
       llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
     } else if (II->isStr("llvm_unreachable")) {
@@ -1116,7 +1133,6 @@
 #ifdef _MSC_VER
     #pragma warning(default : 4717)
 #endif
-
 };
 
 /// PragmaDiagnosticHandler - e.g. '\#pragma GCC diagnostic ignored "-Wformat"'
@@ -1125,8 +1141,8 @@
   const char *Namespace;
 
 public:
-  explicit PragmaDiagnosticHandler(const char *NS) :
-    PragmaHandler("diagnostic"), Namespace(NS) {}
+  explicit PragmaDiagnosticHandler(const char *NS)
+      : PragmaHandler("diagnostic"), Namespace(NS) {}
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &DiagToken) override {
@@ -1330,6 +1346,7 @@
 /// PragmaIncludeAliasHandler - "\#pragma include_alias("...")".
 struct PragmaIncludeAliasHandler : public PragmaHandler {
   PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {}
+
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &IncludeAliasTok) override {
     PP.HandlePragmaIncludeAlias(IncludeAliasTok);
@@ -1370,7 +1387,8 @@
 public:
   PragmaMessageHandler(PPCallbacks::PragmaMessageKind Kind,
                        StringRef Namespace = StringRef())
-    : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind), Namespace(Namespace) {}
+      : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind),
+        Namespace(Namespace) {}
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &Tok) override {
@@ -1597,45 +1615,6 @@
   }
 };
 
-// Pragma STDC implementations.
-
-/// PragmaSTDC_FENV_ACCESSHandler - "\#pragma STDC FENV_ACCESS ...".
-struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler {
-  PragmaSTDC_FENV_ACCESSHandler() : PragmaHandler("FENV_ACCESS") {}
-
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
-                    Token &Tok) override {
-    tok::OnOffSwitch OOS;
-    if (PP.LexOnOffSwitch(OOS))
-     return;
-    if (OOS == tok::OOS_ON)
-      PP.Diag(Tok, diag::warn_stdc_fenv_access_not_supported);
-  }
-};
-
-/// PragmaSTDC_CX_LIMITED_RANGEHandler - "\#pragma STDC CX_LIMITED_RANGE ...".
-struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler {
-  PragmaSTDC_CX_LIMITED_RANGEHandler()
-    : PragmaHandler("CX_LIMITED_RANGE") {}
-
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
-                    Token &Tok) override {
-    tok::OnOffSwitch OOS;
-    PP.LexOnOffSwitch(OOS);
-  }
-};
-
-/// PragmaSTDC_UnknownHandler - "\#pragma STDC ...".
-struct PragmaSTDC_UnknownHandler : public PragmaHandler {
-  PragmaSTDC_UnknownHandler() {}
-
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
-                    Token &UnknownTok) override {
-    // C99 6.10.6p2, unknown forms are not allowed.
-    PP.Diag(UnknownTok, diag::ext_stdc_pragma_ignored);
-  }
-};
-
 /// PragmaARCCFCodeAuditedHandler - 
 ///   \#pragma clang arc_cf_code_audited begin/end
 struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
@@ -1763,7 +1742,7 @@
 /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a>
 /// pragma, just skipped by compiler.
 struct PragmaRegionHandler : public PragmaHandler {
-  PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { }
+  PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {}
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &NameTok) override {
@@ -1774,7 +1753,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
@@ -1811,17 +1790,15 @@
   ModuleHandler->AddPragma(new PragmaModuleEndHandler());
   ModuleHandler->AddPragma(new PragmaModuleBuildHandler());
   ModuleHandler->AddPragma(new PragmaModuleLoadHandler());
-
-  AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler());
-  AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler());
-  AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler());
+    
+  // Add region pragmas.
+  AddPragmaHandler(new PragmaRegionHandler("region"));
+  AddPragmaHandler(new PragmaRegionHandler("endregion"));
 
   // MS extensions.
   if (LangOpts.MicrosoftExt) {
     AddPragmaHandler(new PragmaWarningHandler());
     AddPragmaHandler(new PragmaIncludeAliasHandler());
-    AddPragmaHandler(new PragmaRegionHandler("region"));
-    AddPragmaHandler(new PragmaRegionHandler("endregion"));
   }
 
   // Pragmas added by plugins
@@ -1840,17 +1817,4 @@
   // in Preprocessor::RegisterBuiltinPragmas().
   AddPragmaHandler("GCC", new EmptyPragmaHandler());
   AddPragmaHandler("clang", new EmptyPragmaHandler());
-  if (PragmaHandler *NS = PragmaHandlers->FindHandler("STDC")) {
-    // Preprocessor::RegisterBuiltinPragmas() already registers
-    // PragmaSTDC_UnknownHandler as the empty handler, so remove it first,
-    // otherwise there will be an assert about a duplicate handler.
-    PragmaNamespace *STDCNamespace = NS->getIfNamespace();
-    assert(STDCNamespace &&
-           "Invalid namespace, registered as a regular pragma handler!");
-    if (PragmaHandler *Existing = STDCNamespace->FindHandler("", false)) {
-      RemovePragmaHandler("STDC", Existing);
-      delete Existing;
-    }
-  }
-  AddPragmaHandler("STDC", new EmptyPragmaHandler());
 }
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index 954b569..cd4a63f 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -1,4 +1,4 @@
-//===--- PreprocessingRecord.cpp - Record of Preprocessing ------*- C++ -*-===//
+//===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,15 +11,34 @@
 //  of what occurred during preprocessing, and its helpers.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Token.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 
-ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() { }
+ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() =
+    default;
 
 InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
                                        InclusionKind Kind, StringRef FileName,
@@ -33,10 +52,7 @@
   this->FileName = StringRef(Memory, FileName.size());
 }
 
-PreprocessingRecord::PreprocessingRecord(SourceManager &SM)
-  : SourceMgr(SM),
-    ExternalSource(nullptr) {
-}
+PreprocessingRecord::PreprocessingRecord(SourceManager &SM) : SourceMgr(SM) {}
 
 /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities
 /// that source range \p Range encompasses.
@@ -166,7 +182,7 @@
 struct PPEntityComp {
   const SourceManager &SM;
 
-  explicit PPEntityComp(const SourceManager &SM) : SM(SM) { }
+  explicit PPEntityComp(const SourceManager &SM) : SM(SM) {}
 
   bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const {
     SourceLocation LHS = getLoc(L);
@@ -190,7 +206,7 @@
   }
 };
 
-}
+} // namespace
 
 unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity(
                                                      SourceLocation Loc) const {
@@ -271,7 +287,7 @@
   //  FM(M1, M2)
   // \endcode
 
-  typedef std::vector<PreprocessedEntity *>::iterator pp_iter;
+  using pp_iter = std::vector<PreprocessedEntity *>::iterator;
 
   // Usually there are few macro expansions when defining the filename, do a
   // linear search for a few entities.
@@ -313,6 +329,23 @@
   return Result;
 }
 
+unsigned PreprocessingRecord::allocateSkippedRanges(unsigned NumRanges) {
+  unsigned Result = SkippedRanges.size();
+  SkippedRanges.resize(SkippedRanges.size() + NumRanges);
+  SkippedRangesAllLoaded = false;
+  return Result;
+}
+
+void PreprocessingRecord::ensureSkippedRangesLoaded() {
+  if (SkippedRangesAllLoaded || !ExternalSource)
+    return;
+  for (unsigned Index = 0; Index != SkippedRanges.size(); ++Index) {
+    if (SkippedRanges[Index].isInvalid())
+      SkippedRanges[Index] = ExternalSource->ReadSkippedRange(Index);
+  }
+  SkippedRangesAllLoaded = true;
+}
+
 void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
                                                   MacroDefinitionRecord *Def) {
   MacroDefinitions[Macro] = Def;
@@ -402,6 +435,7 @@
 
 void PreprocessingRecord::SourceRangeSkipped(SourceRange Range,
                                              SourceLocation EndifLoc) {
+  assert(Range.isValid());
   SkippedRanges.emplace_back(Range.getBegin(), EndifLoc);
 }
 
@@ -430,7 +464,7 @@
 
 void PreprocessingRecord::InclusionDirective(
     SourceLocation HashLoc,
-    const clang::Token &IncludeTok,
+    const Token &IncludeTok,
     StringRef FileName,
     bool IsAngled,
     CharSourceRange FilenameRange,
@@ -470,10 +504,10 @@
       EndLoc = EndLoc.getLocWithOffset(-1); // the InclusionDirective expects
                                             // a token range.
   }
-  clang::InclusionDirective *ID
-    = new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled,
-                                            (bool)Imported,
-                                            File, SourceRange(HashLoc, EndLoc));
+  clang::InclusionDirective *ID =
+      new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled,
+                                            (bool)Imported, File,
+                                            SourceRange(HashLoc, EndLoc));
   addPreprocessedEntity(ID);
 }
 
@@ -481,5 +515,6 @@
   return BumpAlloc.getTotalMemory()
     + llvm::capacity_in_bytes(MacroDefinitions)
     + llvm::capacity_in_bytes(PreprocessedEntities)
-    + llvm::capacity_in_bytes(LoadedPreprocessedEntities);
+    + llvm::capacity_in_bytes(LoadedPreprocessedEntities)
+    + llvm::capacity_in_bytes(SkippedRanges);
 }
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 65df6a5..08469bc 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -1,4 +1,4 @@
-//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,22 +28,33 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemStatCache.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/PTHLexer.h"
 #include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Pragma.h"
 #include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Lex/PreprocessorLexer.h"
 #include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/ScratchBuffer.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -65,8 +76,7 @@
 
 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
 
-//===----------------------------------------------------------------------===//
-ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
+ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
 
 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
                            DiagnosticsEngine &diags, LangOptions &opts,
@@ -74,34 +84,16 @@
                            HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
                            TranslationUnitKind TUKind)
-    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
-      AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
+    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
+      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
       PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
       HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
       ExternalSource(nullptr), Identifiers(opts, IILookup),
-      PragmaHandlers(new PragmaNamespace(StringRef())),
-      IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
-      CodeCompletionFile(nullptr), CodeCompletionOffset(0),
-      LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
-      CodeCompletionReached(false), CodeCompletionII(nullptr),
-      MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
-      CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
-      CurLexerSubmodule(nullptr), Callbacks(nullptr),
-      CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
-      Record(nullptr), MIChainHead(nullptr) {
+      PragmaHandlers(new PragmaNamespace(StringRef())), TUKind(TUKind),
+      SkipMainFilePreamble(0, true),
+      CurSubmoduleState(&NullSubmoduleState) {
   OwnsHeaderSearch = OwnsHeaders;
   
-  CounterValue = 0; // __COUNTER__ starts at 0.
-  
-  // Clear stats.
-  NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
-  NumIf = NumElse = NumEndif = 0;
-  NumEnteredSourceFiles = 0;
-  NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
-  NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
-  MaxIncludeStackDepth = 0;
-  NumSkipped = 0;
-  
   // Default to discarding comments.
   KeepComments = false;
   KeepMacroComments = false;
@@ -117,8 +109,6 @@
   ParsingIfOrElifDirective = false;
   PreprocessedOutput = false;
 
-  CachedLexPos = 0;
-
   // We haven't read anything from the external source.
   ReadMacrosFromExternalSource = false;
 
@@ -430,10 +420,9 @@
   CodeCompletionFile = File;
   CodeCompletionOffset = Position - Buffer->getBufferStart();
 
-  std::unique_ptr<MemoryBuffer> NewBuffer =
-      MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
-                                          Buffer->getBufferIdentifier());
-  char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
+  auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
+      Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
+  char *NewBuf = NewBuffer->getBufferStart();
   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
   *NewPos = '\0';
   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
@@ -550,6 +539,13 @@
            "CurPPLexer is null when calling replayPreambleConditionalStack.");
     CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
     PreambleConditionalStack.doneReplaying();
+    if (PreambleConditionalStack.reachedEOFWhileSkipping())
+      SkipExcludedConditionalBlock(
+          PreambleConditionalStack.SkipInfo->HashTokenLoc,
+          PreambleConditionalStack.SkipInfo->IfTokenLoc,
+          PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
+          PreambleConditionalStack.SkipInfo->FoundElse,
+          PreambleConditionalStack.SkipInfo->ElseLoc);
   }
 }
 
@@ -592,7 +588,7 @@
   Identifier.setIdentifierInfo(II);
   if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
       getSourceManager().isInSystemHeader(Identifier.getLocation()))
-    Identifier.setKind(clang::tok::identifier);
+    Identifier.setKind(tok::identifier);
   else
     Identifier.setKind(II->getTokenID());
 
@@ -777,8 +773,13 @@
     }
   } while (!ReturnedToken);
 
-  if (Result.is(tok::code_completion))
+  if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
+    // Remember the identifier before code completion token.
     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
+    // Set IdenfitierInfo to null to avoid confusing code that handles both
+    // identifiers and completion tokens.
+    Result.setIdentifierInfo(nullptr);
+  }
 
   LastTokenWasAt = Result.is(tok::at);
 }
@@ -934,8 +935,8 @@
 }
 
 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
-  std::vector<CommentHandler *>::iterator Pos
-  = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
+  std::vector<CommentHandler *>::iterator Pos =
+      std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
   CommentHandlers.erase(Pos);
 }
@@ -954,11 +955,11 @@
   return true;
 }
 
-ModuleLoader::~ModuleLoader() { }
+ModuleLoader::~ModuleLoader() = default;
 
-CommentHandler::~CommentHandler() { }
+CommentHandler::~CommentHandler() = default;
 
-CodeCompletionHandler::~CodeCompletionHandler() { }
+CodeCompletionHandler::~CodeCompletionHandler() = default;
 
 void Preprocessor::createPreprocessingRecord() {
   if (Record)
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index 33ccbc0..2e85f46 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -1,4 +1,4 @@
-//===--- PreprocessorLexer.cpp - C Language Family Lexer ------------------===//
+//===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,14 +15,15 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include <cassert>
+
 using namespace clang;
 
-void PreprocessorLexer::anchor() { }
+void PreprocessorLexer::anchor() {}
 
 PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)
-  : PP(pp), FID(fid), InitialNumSLocEntries(0),
-    ParsingPreprocessorDirective(false),
-    ParsingFilename(false), LexingRawMode(false) {
+    : PP(pp), FID(fid) {
   if (pp)
     InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size();
 }
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index e0f3966..dc03e16 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -74,11 +74,11 @@
 
   // Get scratch buffer. Zero-initialize it so it can be dumped into a PCH file
   // deterministically.
-  std::unique_ptr<llvm::MemoryBuffer> OwnBuf =
-      llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>");
-  llvm::MemoryBuffer &Buf = *OwnBuf;
+  std::unique_ptr<llvm::WritableMemoryBuffer> OwnBuf =
+      llvm::WritableMemoryBuffer::getNewMemBuffer(RequestLen,
+                                                  "<scratch space>");
+  CurBuffer = OwnBuf->getBufferStart();
   FileID FID = SourceMgr.createFileID(std::move(OwnBuf));
   BufferStartLoc = SourceMgr.getLocForStartOfFile(FID);
-  CurBuffer = const_cast<char*>(Buf.getBufferStart());
   BytesUsed = 0;
 }
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index d1facd9..ec73479 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -99,10 +99,14 @@
     TokenInfo[tok::utf32_char_constant ] |= aci_custom;
   }
 
-  // These tokens have custom code in C++1z mode.
-  if (PP.getLangOpts().CPlusPlus1z)
+  // These tokens have custom code in C++17 mode.
+  if (PP.getLangOpts().CPlusPlus17)
     TokenInfo[tok::utf8_char_constant] |= aci_custom;
 
+  // These tokens have custom code in C++2a mode.
+  if (PP.getLangOpts().CPlusPlus2a)
+    TokenInfo[tok::lessequal ] |= aci_custom_firstchar;
+
   // These tokens change behavior if followed by an '='.
   TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &=
   TokenInfo[tok::plus        ] |= aci_avoid_equal;           // +=
@@ -283,5 +287,7 @@
     return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
   case tok::arrow:           // ->*
     return PP.getLangOpts().CPlusPlus && FirstChar == '*';
+  case tok::lessequal:       // <=> (C++2a)
+    return PP.getLangOpts().CPlusPlus2a && FirstChar == '>';
   }
 }
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 194ceec..d7f1c7a9 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -1,4 +1,4 @@
-//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,13 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/TokenLexer.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
 #include "clang/Lex/VariadicMacroSupport.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
+#include <cstring>
 
 using namespace clang;
 
@@ -238,7 +250,6 @@
   VAOptExpansionContext VCtx(PP);
   
   for (unsigned I = 0, E = NumTokens; I != E; ++I) {
-    
     const Token &CurTok = Tokens[I];
     // We don't want a space for the next token after a paste
     // operator.  In valid code, the token will get smooshed onto the
@@ -287,7 +298,6 @@
         }
         // ... else the macro was called with variadic arguments, and we do not
         // have a closing rparen - so process this token normally.
-
       } else {
         // Current token is the closing r_paren which marks the end of the
         // __VA_OPT__ invocation, so handle any place-marker pasting (if
@@ -573,7 +583,6 @@
 }
 
 /// Lex - Lex and return a token from this macro stream.
-///
 bool TokenLexer::Lex(Token &Tok) {
   // Lexing off the end of the macro, pop this macro off the expansion stack.
   if (isAtEnd()) {
@@ -677,6 +686,7 @@
 bool TokenLexer::pasteTokens(Token &Tok) {
   return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx);
 }
+
 /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
 /// operator.  Read the ## and RHS, and paste the LHS/RHS together.  If there
 /// are more ## after it, chomp them iteratively.  Return the result as LHSTok.
diff --git a/lib/Parse/ParseAST.cpp b/lib/Parse/ParseAST.cpp
index d018d4c..354b380 100644
--- a/lib/Parse/ParseAST.cpp
+++ b/lib/Parse/ParseAST.cpp
@@ -21,6 +21,7 @@
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaConsumer.h"
+#include "clang/Sema/TemplateInstCallback.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include <cstdio>
 #include <memory>
@@ -121,6 +122,10 @@
   bool OldCollectStats = PrintStats;
   std::swap(OldCollectStats, S.CollectStats);
 
+  // Initialize the template instantiation observer chain.
+  // FIXME: See note on "finalize" below.
+  initialize(S.TemplateInstCallbacks, S);
+
   ASTConsumer *Consumer = &S.getASTConsumer();
 
   std::unique_ptr<Parser> ParseOP(
@@ -158,6 +163,13 @@
   
   Consumer->HandleTranslationUnit(S.getASTContext());
 
+  // Finalize the template instantiation observer chain.
+  // FIXME: This (and init.) should be done in the Sema class, but because
+  // Sema does not have a reliable "Finalize" function (it has a
+  // destructor, but it is not guaranteed to be called ("-disable-free")).
+  // So, do the initialization above and do the finalization here:
+  finalize(S.TemplateInstCallbacks, S);
+
   std::swap(OldCollectStats, S.CollectStats);
   if (PrintStats) {
     llvm::errs() << "\nSTATISTICS:\n";
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index cc032c8..deefcaf 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -43,13 +43,13 @@
 ///
 /// Called type-id in C++.
 TypeResult Parser::ParseTypeName(SourceRange *Range,
-                                 Declarator::TheContext Context,
+                                 DeclaratorContext Context,
                                  AccessSpecifier AS,
                                  Decl **OwnedType,
                                  ParsedAttributes *Attrs) {
   DeclSpecContext DSC = getDeclSpecContextFromDeclaratorContext(Context);
-  if (DSC == DSC_normal)
-    DSC = DSC_type_specifier;
+  if (DSC == DeclSpecContext::DSC_normal)
+    DSC = DeclSpecContext::DSC_type_specifier;
 
   // Parse the common declaration-specifiers piece.
   DeclSpec DS(AttrFactory);
@@ -404,14 +404,28 @@
   AttributeList::Kind AttrKind =
       AttributeList::getKind(AttrName, ScopeName, Syntax);
 
-  if (AttrKind == AttributeList::AT_ExternalSourceSymbol) {
+  switch (AttrKind) {
+  default:
+    return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc,
+                                    ScopeName, ScopeLoc, Syntax);
+  case AttributeList::AT_ExternalSourceSymbol:
     ParseExternalSourceSymbolAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc,
                                        ScopeName, ScopeLoc, Syntax);
-    return Attrs.getList() ? Attrs.getList()->getNumArgs() : 0;
+    break;
+  case AttributeList::AT_Availability:
+    ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName,
+                               ScopeLoc, Syntax);
+    break;
+  case AttributeList::AT_ObjCBridgeRelated:
+    ParseObjCBridgeRelatedAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc,
+                                    ScopeName, ScopeLoc, Syntax);
+    break;
+  case AttributeList::AT_TypeTagForDatatype:
+    ParseTypeTagForDatatypeAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc,
+                                     ScopeName, ScopeLoc, Syntax);
+    break;
   }
-
-  return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc,
-                                  ScopeName, ScopeLoc, Syntax);
+  return Attrs.getList() ? Attrs.getList()->getNumArgs() : 0;
 }
 
 bool Parser::ParseMicrosoftDeclSpecArgs(IdentifierInfo *AttrName,
@@ -1244,7 +1258,9 @@
     return;
   }
 
-  // Parse optional class method name.
+  // Parse class method name.  It's non-optional in the sense that a trailing
+  // comma is required, but it can be the empty string, and then we record a
+  // nullptr.
   IdentifierLoc *ClassMethod = nullptr;
   if (Tok.is(tok::identifier)) {
     ClassMethod = ParseIdentifierLoc();
@@ -1263,7 +1279,8 @@
     return;
   }
   
-  // Parse optional instance method name.
+  // Parse instance method name.  Also non-optional but empty string is
+  // permitted.
   IdentifierLoc *InstanceMethod = nullptr;
   if (Tok.is(tok::identifier))
     InstanceMethod = ParseIdentifierLoc();
@@ -1548,15 +1565,21 @@
   SourceLocation Loc = Tok.getLocation();
   ParseCXX11Attributes(Attrs);
   CharSourceRange AttrRange(SourceRange(Loc, Attrs.Range.getEnd()), true);
-
+  // FIXME: use err_attributes_misplaced
   Diag(Loc, diag::err_attributes_not_allowed)
     << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange)
     << FixItHint::CreateRemoval(AttrRange);
 }
 
-void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs) {
-  Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed)
-    << attrs.Range;
+void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs,
+                                          const SourceLocation CorrectLocation) {
+  if (CorrectLocation.isValid()) {
+    CharSourceRange AttrRange(attrs.Range, true);
+    Diag(CorrectLocation, diag::err_attributes_misplaced)
+        << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange)
+        << FixItHint::CreateRemoval(AttrRange);
+  } else
+    Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) << attrs.Range;
 }
 
 void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs,
@@ -1633,7 +1656,7 @@
 
 /// ParseDeclaration - Parse a full 'declaration', which consists of
 /// declaration-specifiers, some number of declarators, and a semicolon.
-/// 'Context' should be a Declarator::TheContext value.  This returns the
+/// 'Context' should be a DeclaratorContext value.  This returns the
 /// location of the semicolon in DeclEnd.
 ///
 ///       declaration: [C99 6.7]
@@ -1647,7 +1670,7 @@
 /// [C++11/C11] static_assert-declaration
 ///         others... [FIXME]
 ///
-Parser::DeclGroupPtrTy Parser::ParseDeclaration(unsigned Context,
+Parser::DeclGroupPtrTy Parser::ParseDeclaration(DeclaratorContext Context,
                                                 SourceLocation &DeclEnd,
                                           ParsedAttributesWithRange &attrs) {
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
@@ -1708,7 +1731,7 @@
 /// of a simple-declaration. If we find that we are, we also parse the
 /// for-range-initializer, and place it here.
 Parser::DeclGroupPtrTy
-Parser::ParseSimpleDeclaration(unsigned Context,
+Parser::ParseSimpleDeclaration(DeclaratorContext Context,
                                SourceLocation &DeclEnd,
                                ParsedAttributesWithRange &Attrs,
                                bool RequireSemi, ForRangeInit *FRI) {
@@ -1747,7 +1770,7 @@
 
 /// Returns true if this might be the start of a declarator, or a common typo
 /// for a declarator.
-bool Parser::MightBeDeclarator(unsigned Context) {
+bool Parser::MightBeDeclarator(DeclaratorContext Context) {
   switch (Tok.getKind()) {
   case tok::annot_cxxscope:
   case tok::annot_template_id:
@@ -1766,11 +1789,12 @@
     return getLangOpts().CPlusPlus;
 
   case tok::l_square: // Might be an attribute on an unnamed bit-field.
-    return Context == Declarator::MemberContext && getLangOpts().CPlusPlus11 &&
-           NextToken().is(tok::l_square);
+    return Context == DeclaratorContext::MemberContext &&
+           getLangOpts().CPlusPlus11 && NextToken().is(tok::l_square);
 
   case tok::colon: // Might be a typo for '::' or an unnamed bit-field.
-    return Context == Declarator::MemberContext || getLangOpts().CPlusPlus;
+    return Context == DeclaratorContext::MemberContext ||
+           getLangOpts().CPlusPlus;
 
   case tok::identifier:
     switch (NextToken().getKind()) {
@@ -1796,8 +1820,9 @@
       // At namespace scope, 'identifier:' is probably a typo for 'identifier::'
       // and in block scope it's probably a label. Inside a class definition,
       // this is a bit-field.
-      return Context == Declarator::MemberContext ||
-             (getLangOpts().CPlusPlus && Context == Declarator::FileContext);
+      return Context == DeclaratorContext::MemberContext ||
+             (getLangOpts().CPlusPlus &&
+              Context == DeclaratorContext::FileContext);
 
     case tok::identifier: // Possible virt-specifier.
       return getLangOpts().CPlusPlus11 && isCXX11VirtSpecifier(NextToken());
@@ -1896,11 +1921,11 @@
 /// definition or a group of object declarations, actually parse the
 /// result.
 Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
-                                              unsigned Context,
+                                              DeclaratorContext Context,
                                               SourceLocation *DeclEnd,
                                               ForRangeInit *FRI) {
   // Parse the first declarator.
-  ParsingDeclarator D(*this, DS, static_cast<Declarator::TheContext>(Context));
+  ParsingDeclarator D(*this, DS, Context);
   ParseDeclarator(D);
 
   // Bail out if the first declarator didn't seem well-formed.
@@ -1947,7 +1972,7 @@
     // Function definitions are only allowed at file scope and in C++ classes.
     // The C++ inline method definition case is handled elsewhere, so we only
     // need to handle the file scope definition case.
-    if (Context == Declarator::FileContext) {
+    if (Context == DeclaratorContext::FileContext) {
       if (isStartOfFunctionDefinition(D)) {
         if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) {
           Diag(Tok, diag::err_function_declared_typedef);
@@ -2018,7 +2043,7 @@
   if (FirstDecl)
     DeclsInGroup.push_back(FirstDecl);
 
-  bool ExpectSemi = Context != Declarator::ForContext;
+  bool ExpectSemi = Context != DeclaratorContext::ForContext;
   
   // If we don't have a comma, it is either the end of the list (a ';') or an
   // error, bail out.
@@ -2064,7 +2089,7 @@
     *DeclEnd = Tok.getLocation();
 
   if (ExpectSemi &&
-      ExpectAndConsumeSemi(Context == Declarator::FileContext
+      ExpectAndConsumeSemi(Context == DeclaratorContext::FileContext
                            ? diag::err_invalid_token_after_toplevel_declarator
                            : diag::err_expected_semi_declaration)) {
     // Okay, there was no semicolon and one was expected.  If we see a
@@ -2193,7 +2218,7 @@
       // FIXME: This check should be for a variable template instantiation only.
 
       // Check that this is a valid instantiation
-      if (D.getName().getKind() != UnqualifiedId::IK_TemplateId) {
+      if (D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
         // If the declarator-id is not a template-id, issue a diagnostic and
         // recover by ignoring the 'template' keyword.
         Diag(Tok, diag::err_template_defn_explicit_instantiation)
@@ -2267,8 +2292,8 @@
       if (Init.isInvalid()) {
         SmallVector<tok::TokenKind, 2> StopTokens;
         StopTokens.push_back(tok::comma);
-        if (D.getContext() == Declarator::ForContext ||
-            D.getContext() == Declarator::InitStmtContext)
+        if (D.getContext() == DeclaratorContext::ForContext ||
+            D.getContext() == DeclaratorContext::InitStmtContext)
           StopTokens.push_back(tok::r_paren);
         SkipUntil(StopTokens, StopAtSemi | StopBeforeMatch);
         Actions.ActOnInitializerError(ThisDecl);
@@ -2391,7 +2416,7 @@
   }
 
   // Issue diagnostic and remove constexpr specfier if present.
-  if (DS.isConstexprSpecified() && DSC != DSC_condition) {
+  if (DS.isConstexprSpecified() && DSC != DeclSpecContext::DSC_condition) {
     Diag(DS.getConstexprSpecLoc(), diag::err_typename_invalid_constexpr);
     DS.ClearConstexprSpec();
   }
@@ -2480,7 +2505,7 @@
     // classes.
     if (ParsedType T = Actions.ActOnMSVCUnknownTypeName(
             *Tok.getIdentifierInfo(), Tok.getLocation(),
-            DSC == DSC_template_type_arg)) {
+            DSC == DeclSpecContext::DSC_template_type_arg)) {
       const char *PrevSpec;
       unsigned DiagID;
       DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec, DiagID, T,
@@ -2534,18 +2559,20 @@
 
       // Parse this as a tag as if the missing tag were present.
       if (TagKind == tok::kw_enum)
-        ParseEnumSpecifier(Loc, DS, TemplateInfo, AS, DSC_normal);
+        ParseEnumSpecifier(Loc, DS, TemplateInfo, AS,
+                           DeclSpecContext::DSC_normal);
       else
         ParseClassSpecifier(TagKind, Loc, DS, TemplateInfo, AS,
-                            /*EnteringContext*/ false, DSC_normal, Attrs);
+                            /*EnteringContext*/ false,
+                            DeclSpecContext::DSC_normal, Attrs);
       return true;
     }
   }
 
   // Determine whether this identifier could plausibly be the name of something
   // being declared (with a missing type).
-  if (!isTypeSpecifier(DSC) &&
-      (!SS || DSC == DSC_top_level || DSC == DSC_class)) {
+  if (!isTypeSpecifier(DSC) && (!SS || DSC == DeclSpecContext::DSC_top_level ||
+                                DSC == DeclSpecContext::DSC_class)) {
     // Look ahead to the next token to try to figure out what this declaration
     // was supposed to be.
     switch (NextToken().getKind()) {
@@ -2569,7 +2596,8 @@
 
       // If we're in a context where we could be declaring a constructor,
       // check whether this is a constructor declaration with a bogus name.
-      if (DSC == DSC_class || (DSC == DSC_top_level && SS)) {
+      if (DSC == DeclSpecContext::DSC_class ||
+          (DSC == DeclSpecContext::DSC_top_level && SS)) {
         IdentifierInfo *II = Tok.getIdentifierInfo();
         if (Actions.isCurrentClassNameTypo(II, SS)) {
           Diag(Loc, diag::err_constructor_bad_name)
@@ -2649,23 +2677,25 @@
 /// context.
 ///
 /// \param Context the declarator context, which is one of the
-/// Declarator::TheContext enumerator values.
+/// DeclaratorContext enumerator values.
 Parser::DeclSpecContext
-Parser::getDeclSpecContextFromDeclaratorContext(unsigned Context) {
-  if (Context == Declarator::MemberContext)
-    return DSC_class;
-  if (Context == Declarator::FileContext)
-    return DSC_top_level;
-  if (Context == Declarator::TemplateParamContext)
-    return DSC_template_param;
-  if (Context == Declarator::TemplateTypeArgContext)
-    return DSC_template_type_arg;
-  if (Context == Declarator::TrailingReturnContext)
-    return DSC_trailing;
-  if (Context == Declarator::AliasDeclContext ||
-      Context == Declarator::AliasTemplateContext)
-    return DSC_alias_declaration;
-  return DSC_normal;
+Parser::getDeclSpecContextFromDeclaratorContext(DeclaratorContext Context) {
+  if (Context == DeclaratorContext::MemberContext)
+    return DeclSpecContext::DSC_class;
+  if (Context == DeclaratorContext::FileContext)
+    return DeclSpecContext::DSC_top_level;
+  if (Context == DeclaratorContext::TemplateParamContext)
+    return DeclSpecContext::DSC_template_param;
+  if (Context == DeclaratorContext::TemplateArgContext ||
+      Context == DeclaratorContext::TemplateTypeArgContext)
+    return DeclSpecContext::DSC_template_type_arg;
+  if (Context == DeclaratorContext::TrailingReturnContext ||
+      Context == DeclaratorContext::TrailingReturnVarContext)
+    return DeclSpecContext::DSC_trailing;
+  if (Context == DeclaratorContext::AliasDeclContext ||
+      Context == DeclaratorContext::AliasTemplateContext)
+    return DeclSpecContext::DSC_alias_declaration;
+  return DeclSpecContext::DSC_normal;
 }
 
 /// ParseAlignArgument - Parse the argument to an alignment-specifier.
@@ -2745,7 +2775,8 @@
                                               LateParsedAttrList *LateAttrs) {
   assert(DS.hasTagDefinition() && "shouldn't call this");
 
-  bool EnteringContext = (DSContext == DSC_class || DSContext == DSC_top_level);
+  bool EnteringContext = (DSContext == DeclSpecContext::DSC_class ||
+                          DSContext == DeclSpecContext::DSC_top_level);
 
   if (getLangOpts().CPlusPlus &&
       Tok.isOneOf(tok::identifier, tok::coloncolon, tok::kw_decltype,
@@ -2879,7 +2910,8 @@
     DS.SetRangeEnd(SourceLocation());
   }
 
-  bool EnteringContext = (DSContext == DSC_class || DSContext == DSC_top_level);
+  bool EnteringContext = (DSContext == DeclSpecContext::DSC_class ||
+                          DSContext == DeclSpecContext::DSC_top_level);
   bool AttrsLastTime = false;
   ParsedAttributesWithRange attrs(AttrFactory);
   // We use Sema's policy to get bool macros right.
@@ -2949,8 +2981,8 @@
                                           Scope::FunctionPrototypeScope |
                                           Scope::AtCatchScope)) == 0;
         bool AllowNestedNameSpecifiers
-          = DSContext == DSC_top_level ||
-            (DSContext == DSC_class && DS.isFriendSpecified());
+          = DSContext == DeclSpecContext::DSC_top_level ||
+            (DSContext == DeclSpecContext::DSC_class && DS.isFriendSpecified());
 
         Actions.CodeCompleteDeclSpec(getCurScope(), DS,
                                      AllowNonIdentifiers,
@@ -2961,9 +2993,9 @@
       if (getCurScope()->getFnParent() || getCurScope()->getBlockParent())
         CCC = Sema::PCC_LocalDeclarationSpecifiers;
       else if (TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate)
-        CCC = DSContext == DSC_class? Sema::PCC_MemberTemplate
-                                    : Sema::PCC_Template;
-      else if (DSContext == DSC_class)
+        CCC = DSContext == DeclSpecContext::DSC_class ? Sema::PCC_MemberTemplate
+                                                      : Sema::PCC_Template;
+      else if (DSContext == DeclSpecContext::DSC_class)
         CCC = Sema::PCC_Class;
       else if (CurParsedObjCImpl)
         CCC = Sema::PCC_ObjCImplementation;
@@ -3007,10 +3039,11 @@
         // To improve diagnostics for this case, parse the declaration as a
         // constructor (and reject the extra template arguments later).
         TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Next);
-        if ((DSContext == DSC_top_level || DSContext == DSC_class) &&
+        if ((DSContext == DeclSpecContext::DSC_top_level ||
+             DSContext == DeclSpecContext::DSC_class) &&
             TemplateId->Name &&
             Actions.isCurrentClassName(*TemplateId->Name, getCurScope(), &SS) &&
-            isConstructorDeclarator(/*Unqualified*/false)) {
+            isConstructorDeclarator(/*Unqualified*/ false)) {
           // The user meant this to be an out-of-line constructor
           // definition, but template arguments are not allowed
           // there.  Just allow this as a constructor; we'll
@@ -3049,7 +3082,8 @@
       // Check whether this is a constructor declaration. If we're in a
       // context where the identifier could be a class name, and it has the
       // shape of a constructor declaration, process it as one.
-      if ((DSContext == DSC_top_level || DSContext == DSC_class) &&
+      if ((DSContext == DeclSpecContext::DSC_top_level ||
+           DSContext == DeclSpecContext::DSC_class) &&
           Actions.isCurrentClassName(*Next.getIdentifierInfo(), getCurScope(),
                                      &SS) &&
           isConstructorDeclarator(/*Unqualified*/ false))
@@ -3187,7 +3221,8 @@
       if (DS.isTypeAltiVecVector())
         goto DoneWithDeclSpec;
 
-      if (DSContext == DSC_objc_method_result && isObjCInstancetype()) {
+      if (DSContext == DeclSpecContext::DSC_objc_method_result &&
+          isObjCInstancetype()) {
         ParsedType TypeRep = Actions.ActOnObjCInstanceType(Loc);
         assert(TypeRep);
         isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec,
@@ -3221,15 +3256,16 @@
 
       // If we're in a context where the identifier could be a class name,
       // check whether this is a constructor declaration.
-      if (getLangOpts().CPlusPlus && DSContext == DSC_class &&
+      if (getLangOpts().CPlusPlus && DSContext == DeclSpecContext::DSC_class &&
           Actions.isCurrentClassName(*Tok.getIdentifierInfo(), getCurScope()) &&
           isConstructorDeclarator(/*Unqualified*/true))
         goto DoneWithDeclSpec;
 
       // Likewise, if this is a context where the identifier could be a template
       // name, check whether this is a deduction guide declaration.
-      if (getLangOpts().CPlusPlus1z &&
-          (DSContext == DSC_class || DSContext == DSC_top_level) &&
+      if (getLangOpts().CPlusPlus17 &&
+          (DSContext == DeclSpecContext::DSC_class ||
+           DSContext == DeclSpecContext::DSC_top_level) &&
           Actions.isDeductionGuideName(getCurScope(), *Tok.getIdentifierInfo(),
                                        Tok.getLocation()) &&
           isConstructorDeclarator(/*Unqualified*/ true,
@@ -3275,7 +3311,7 @@
       // If we're in a context where the template-id could be a
       // constructor name or specialization, check whether this is a
       // constructor declaration.
-      if (getLangOpts().CPlusPlus && DSContext == DSC_class &&
+      if (getLangOpts().CPlusPlus && DSContext == DeclSpecContext::DSC_class &&
           Actions.isCurrentClassName(*TemplateId->Name, getCurScope()) &&
           isConstructorDeclarator(TemplateId->SS.isEmpty()))
         goto DoneWithDeclSpec;
@@ -3445,7 +3481,7 @@
 
     // friend
     case tok::kw_friend:
-      if (DSContext == DSC_class)
+      if (DSContext == DeclSpecContext::DSC_class)
         isInvalid = DS.SetFriendSpec(Loc, PrevSpec, DiagID);
       else {
         PrevSpec = ""; // not actually used by the diagnostic
@@ -3464,11 +3500,6 @@
       isInvalid = DS.SetConstexprSpec(Loc, PrevSpec, DiagID);
       break;
 
-    // concept
-    case tok::kw_concept:
-      isInvalid = DS.SetConceptSpec(Loc, PrevSpec, DiagID);
-      break;
-
     // type-specifier
     case tok::kw_short:
       isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec,
@@ -4069,7 +4100,7 @@
   SuppressAccessChecks diagsFromTag(*this, shouldDelayDiagsInTag);
 
   // Enum definitions should not be parsed in a trailing-return-type.
-  bool AllowDeclaration = DSC != DSC_trailing;
+  bool AllowDeclaration = DSC != DeclSpecContext::DSC_trailing;
 
   bool AllowFixedUnderlyingType = AllowDeclaration &&
     (getLangOpts().CPlusPlus11 || getLangOpts().MicrosoftExt ||
@@ -4295,14 +4326,14 @@
   bool IsDependent = false;
   const char *PrevSpec = nullptr;
   unsigned DiagID;
-  Decl *TagDecl = Actions.ActOnTag(getCurScope(), DeclSpec::TST_enum, TUK,
-                                   StartLoc, SS, Name, NameLoc, attrs.getList(),
-                                   AS, DS.getModulePrivateSpecLoc(), TParams,
-                                   Owned, IsDependent, ScopedEnumKWLoc,
-                                   IsScopedUsingClassTag, BaseType,
-                                   DSC == DSC_type_specifier,
-                                   DSC == DSC_template_param ||
-                                   DSC == DSC_template_type_arg, &SkipBody);
+  Decl *TagDecl = Actions.ActOnTag(
+      getCurScope(), DeclSpec::TST_enum, TUK, StartLoc, SS, Name, NameLoc,
+      attrs.getList(), AS, DS.getModulePrivateSpecLoc(), TParams, Owned,
+      IsDependent, ScopedEnumKWLoc, IsScopedUsingClassTag, BaseType,
+      DSC == DeclSpecContext::DSC_type_specifier,
+      DSC == DeclSpecContext::DSC_template_param ||
+          DSC == DeclSpecContext::DSC_template_type_arg,
+      &SkipBody);
 
   if (SkipBody.ShouldSkip) {
     assert(TUK == Sema::TUK_Definition && "can only skip a definition");
@@ -4420,7 +4451,7 @@
     ProhibitAttributes(attrs); // GNU-style attributes are prohibited.
     if (standardAttributesAllowed() && isCXX11AttributeSpecifier()) {
       if (getLangOpts().CPlusPlus)
-        Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z
+        Diag(Tok.getLocation(), getLangOpts().CPlusPlus17
                                     ? diag::warn_cxx14_compat_ns_enum_attribute
                                     : diag::ext_ns_enum_attribute)
             << 1 /*enumerator*/;
@@ -4825,9 +4856,6 @@
   case tok::annot_decltype:
   case tok::kw_constexpr:
 
-    // C++ Concepts TS - concept
-  case tok::kw_concept:
-
     // C11 _Atomic
   case tok::kw__Atomic:
     return true;
@@ -5177,7 +5205,7 @@
 }
 
 static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang,
-                               unsigned TheContext) {
+                               DeclaratorContext TheContext) {
   if (Kind == tok::star || Kind == tok::caret)
     return true;
 
@@ -5196,8 +5224,9 @@
   // (The same thing can in theory happen after a trailing-return-type, but
   // since those are a C++11 feature, there is no rejects-valid issue there.)
   if (Kind == tok::ampamp)
-    return Lang.CPlusPlus11 || (TheContext != Declarator::ConversionIdContext &&
-                                TheContext != Declarator::CXXNewContext);
+    return Lang.CPlusPlus11 ||
+           (TheContext != DeclaratorContext::ConversionIdContext &&
+            TheContext != DeclaratorContext::CXXNewContext);
 
   return false;
 }
@@ -5251,8 +5280,9 @@
        (Tok.is(tok::identifier) &&
         (NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) ||
        Tok.is(tok::annot_cxxscope))) {
-    bool EnteringContext = D.getContext() == Declarator::FileContext ||
-                           D.getContext() == Declarator::MemberContext;
+    bool EnteringContext =
+        D.getContext() == DeclaratorContext::FileContext ||
+        D.getContext() == DeclaratorContext::MemberContext;
     CXXScopeSpec SS;
     ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext);
 
@@ -5318,9 +5348,9 @@
     // GNU attributes are not allowed here in a new-type-id, but Declspec and
     // C++11 attributes are allowed.
     unsigned Reqs = AR_CXX11AttributesParsed | AR_DeclspecAttributesParsed |
-                            ((D.getContext() != Declarator::CXXNewContext)
-                                 ? AR_GNUAttributesParsed
-                                 : AR_GNUAttributesParsedAndRejected);
+                    ((D.getContext() != DeclaratorContext::CXXNewContext)
+                         ? AR_GNUAttributesParsed
+                         : AR_GNUAttributesParsedAndRejected);
     ParseTypeQualifierListOpt(DS, Reqs, true, !D.mayOmitIdentifier());
     D.ExtendWithDeclSpec(DS);
 
@@ -5472,15 +5502,16 @@
     // Don't parse FOO:BAR as if it were a typo for FOO::BAR inside a class, in
     // this context it is a bitfield. Also in range-based for statement colon
     // may delimit for-range-declaration.
-    ColonProtectionRAIIObject X(*this,
-                                D.getContext() == Declarator::MemberContext ||
-                                    (D.getContext() == Declarator::ForContext &&
-                                     getLangOpts().CPlusPlus11));
+    ColonProtectionRAIIObject X(
+        *this, D.getContext() == DeclaratorContext::MemberContext ||
+                   (D.getContext() == DeclaratorContext::ForContext &&
+                    getLangOpts().CPlusPlus11));
 
     // ParseDeclaratorInternal might already have parsed the scope.
     if (D.getCXXScopeSpec().isEmpty()) {
-      bool EnteringContext = D.getContext() == Declarator::FileContext ||
-                             D.getContext() == Declarator::MemberContext;
+      bool EnteringContext =
+          D.getContext() == DeclaratorContext::FileContext ||
+          D.getContext() == DeclaratorContext::MemberContext;
       ParseOptionalCXXScopeSpecifier(D.getCXXScopeSpec(), nullptr,
                                      EnteringContext);
     }
@@ -5509,9 +5540,9 @@
     //   been expanded or contains auto; otherwise, it is parsed as part of the
     //   parameter-declaration-clause.
     if (Tok.is(tok::ellipsis) && D.getCXXScopeSpec().isEmpty() &&
-        !((D.getContext() == Declarator::PrototypeContext ||
-           D.getContext() == Declarator::LambdaExprParameterContext ||
-           D.getContext() == Declarator::BlockLiteralContext) &&
+        !((D.getContext() == DeclaratorContext::PrototypeContext ||
+           D.getContext() == DeclaratorContext::LambdaExprParameterContext ||
+           D.getContext() == DeclaratorContext::BlockLiteralContext) &&
           NextToken().is(tok::r_paren) &&
           !D.hasGroupingParens() &&
           !Actions.containsUnexpandedParameterPacks(D) &&
@@ -5543,14 +5574,15 @@
         AllowDeductionGuide = false;
       } else if (D.getCXXScopeSpec().isSet()) {
         AllowConstructorName =
-          (D.getContext() == Declarator::FileContext ||
-           D.getContext() == Declarator::MemberContext);
+          (D.getContext() == DeclaratorContext::FileContext ||
+           D.getContext() == DeclaratorContext::MemberContext);
         AllowDeductionGuide = false;
       } else {
-        AllowConstructorName = (D.getContext() == Declarator::MemberContext);
+        AllowConstructorName =
+            (D.getContext() == DeclaratorContext::MemberContext);
         AllowDeductionGuide = 
-          (D.getContext() == Declarator::FileContext ||
-           D.getContext() == Declarator::MemberContext);
+          (D.getContext() == DeclaratorContext::FileContext ||
+           D.getContext() == DeclaratorContext::MemberContext);
       }
 
       SourceLocation TemplateKWLoc;
@@ -5606,15 +5638,16 @@
       // An identifier within parens is unlikely to be intended to be anything
       // other than a name being "declared".
       DiagnoseIdentifier = true;
-    else if (D.getContext() == Declarator::TemplateTypeArgContext)
+    else if (D.getContext() == DeclaratorContext::TemplateArgContext)
       // T<int N> is an accidental identifier; T<int N indicates a missing '>'.
       DiagnoseIdentifier =
           NextToken().isOneOf(tok::comma, tok::greater, tok::greatergreater);
-    else if (D.getContext() == Declarator::AliasDeclContext ||
-             D.getContext() == Declarator::AliasTemplateContext)
+    else if (D.getContext() == DeclaratorContext::AliasDeclContext ||
+             D.getContext() == DeclaratorContext::AliasTemplateContext)
       // The most likely error is that the ';' was forgotten.
       DiagnoseIdentifier = NextToken().isOneOf(tok::comma, tok::semi);
-    else if (D.getContext() == Declarator::TrailingReturnContext &&
+    else if ((D.getContext() == DeclaratorContext::TrailingReturnContext ||
+              D.getContext() == DeclaratorContext::TrailingReturnVarContext) &&
              !isCXX11VirtSpecifier(Tok))
       DiagnoseIdentifier = NextToken().isOneOf(
           tok::comma, tok::semi, tok::equal, tok::l_brace, tok::kw_try);
@@ -5628,6 +5661,18 @@
   }
 
   if (Tok.is(tok::l_paren)) {
+    // If this might be an abstract-declarator followed by a direct-initializer,
+    // check whether this is a valid declarator chunk. If it can't be, assume
+    // that it's an initializer instead.
+    if (D.mayOmitIdentifier() && D.mayBeFollowedByCXXDirectInit()) {
+      RevertingTentativeParsingAction PA(*this);
+      if (TryParseDeclarator(true, D.mayHaveIdentifier(), true) ==
+              TPResult::False) {
+        D.SetIdentifier(nullptr, Tok.getLocation());
+        goto PastIdentifier;
+      }
+    }
+
     // direct-declarator: '(' declarator ')'
     // direct-declarator: '(' attributes declarator ')'
     // Example: 'char (*X)'   or 'int (*XX)(void)'
@@ -5661,7 +5706,7 @@
       LLVM_BUILTIN_TRAP;
     if (Tok.is(tok::l_square))
       return ParseMisplacedBracketDeclarator(D);
-    if (D.getContext() == Declarator::MemberContext) {
+    if (D.getContext() == DeclaratorContext::MemberContext) {
       // Objective-C++: Detect C++ keywords and try to prevent further errors by
       // treating these keyword as valid member names.
       if (getLangOpts().ObjC1 && getLangOpts().CPlusPlus &&
@@ -6044,9 +6089,9 @@
       bool IsCXX11MemberFunction =
         getLangOpts().CPlusPlus11 &&
         D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
-        (D.getContext() == Declarator::MemberContext
+        (D.getContext() == DeclaratorContext::MemberContext
          ? !D.getDeclSpec().isFriendSpecified()
-         : D.getContext() == Declarator::FileContext &&
+         : D.getContext() == DeclaratorContext::FileContext &&
            D.getCXXScopeSpec().isValid() &&
            Actions.CurContext->isRecord());
       Sema::CXXThisScopeRAII ThisScope(Actions,
@@ -6098,7 +6143,8 @@
           StartLoc = D.getDeclSpec().getTypeSpecTypeLoc();
         LocalEndLoc = Tok.getLocation();
         SourceRange Range;
-        TrailingReturnType = ParseTrailingReturnType(Range);
+        TrailingReturnType =
+            ParseTrailingReturnType(Range, D.mayBeFollowedByCXXDirectInit());
         EndLoc = Range.getEnd();
       }
     } else if (standardAttributesAllowed()) {
@@ -6311,10 +6357,10 @@
     // Parse the declarator.  This is "PrototypeContext" or 
     // "LambdaExprParameterContext", because we must accept either 
     // 'declarator' or 'abstract-declarator' here.
-    Declarator ParmDeclarator(DS, 
-              D.getContext() == Declarator::LambdaExprContext ?
-                                  Declarator::LambdaExprParameterContext : 
-                                                Declarator::PrototypeContext);
+    Declarator ParmDeclarator(
+        DS, D.getContext() == DeclaratorContext::LambdaExprContext
+                ? DeclaratorContext::LambdaExprParameterContext
+                : DeclaratorContext::PrototypeContext);
     ParseDeclarator(ParmDeclarator);
 
     // Parse GNU attributes, if present.
@@ -6357,7 +6403,7 @@
         SourceLocation EqualLoc = Tok.getLocation();
 
         // Parse the default argument
-        if (D.getContext() == Declarator::MemberContext) {
+        if (D.getContext() == DeclaratorContext::MemberContext) {
           // If we're inside a class definition, cache the tokens
           // corresponding to the default argument. We'll actually parse
           // them when we see the end of the class definition.
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index ada67e8..172950f 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -55,7 +55,7 @@
 ///       namespace-alias-definition:  [C++ 7.3.2: namespace.alias]
 ///         'namespace' identifier '=' qualified-namespace-specifier ';'
 ///
-Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
+Parser::DeclGroupPtrTy Parser::ParseNamespace(DeclaratorContext Context,
                                               SourceLocation &DeclEnd,
                                               SourceLocation InlineLoc) {
   assert(Tok.is(tok::kw_namespace) && "Not a namespace!");
@@ -77,7 +77,7 @@
   ParsedAttributesWithRange attrs(AttrFactory);
   SourceLocation attrLoc;
   if (getLangOpts().CPlusPlus11 && isCXX11AttributeSpecifier()) {
-    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus17
                                 ? diag::warn_cxx14_compat_ns_enum_attribute
                                 : diag::ext_ns_enum_attribute)
       << 0 /*namespace*/;
@@ -142,7 +142,7 @@
     // Normal namespace definition, not a nested-namespace-definition.
   } else if (InlineLoc.isValid()) {
     Diag(InlineLoc, diag::err_inline_nested_namespace_definition);
-  } else if (getLangOpts().CPlusPlus1z) {
+  } else if (getLangOpts().CPlusPlus17) {
     Diag(ExtraNamespaceLoc[0],
          diag::warn_cxx14_compat_nested_namespace_definition);
   } else {
@@ -307,7 +307,7 @@
 ///         'extern' string-literal '{' declaration-seq[opt] '}'
 ///         'extern' string-literal declaration
 ///
-Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, unsigned Context) {
+Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) {
   assert(isTokenStringLiteral() && "Not a string literal!");
   ExprResult Lang = ParseStringLiteralExpression(false);
 
@@ -434,7 +434,7 @@
 /// ParseUsingDirectiveOrDeclaration - Parse C++ using using-declaration or
 /// using-directive. Assumes that current token is 'using'.
 Parser::DeclGroupPtrTy
-Parser::ParseUsingDirectiveOrDeclaration(unsigned Context,
+Parser::ParseUsingDirectiveOrDeclaration(DeclaratorContext Context,
                                          const ParsedTemplateInfo &TemplateInfo,
                                          SourceLocation &DeclEnd,
                                          ParsedAttributesWithRange &attrs) {
@@ -482,7 +482,7 @@
 ///        'using' 'namespace' ::[opt] nested-name-specifier[opt]
 ///                 namespace-name attributes[opt] ;
 ///
-Decl *Parser::ParseUsingDirective(unsigned Context,
+Decl *Parser::ParseUsingDirective(DeclaratorContext Context,
                                   SourceLocation UsingLoc,
                                   SourceLocation &DeclEnd,
                                   ParsedAttributes &attrs) {
@@ -551,7 +551,8 @@
 ///     using-declarator:
 ///       'typename'[opt] nested-name-specifier unqualified-id
 ///
-bool Parser::ParseUsingDeclarator(unsigned Context, UsingDeclarator &D) {
+bool Parser::ParseUsingDeclarator(DeclaratorContext Context,
+                                  UsingDeclarator &D) {
   D.clear();
 
   // Ignore optional 'typename'.
@@ -582,7 +583,8 @@
   //   or the simple-template-id's template-name in the last component of the
   //   nested-name-specifier, the name is [...] considered to name the
   //   constructor.
-  if (getLangOpts().CPlusPlus11 && Context == Declarator::MemberContext &&
+  if (getLangOpts().CPlusPlus11 &&
+      Context == DeclaratorContext::MemberContext &&
       Tok.is(tok::identifier) &&
       (NextToken().is(tok::semi) || NextToken().is(tok::comma) ||
        NextToken().is(tok::ellipsis)) &&
@@ -605,7 +607,7 @@
   }
 
   if (TryConsumeToken(tok::ellipsis, D.EllipsisLoc))
-    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z ?
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus17 ?
          diag::warn_cxx17_compat_using_declaration_pack :
          diag::ext_using_declaration_pack);
 
@@ -629,7 +631,7 @@
 ///       'using' identifier attribute-specifier-seq[opt] = type-id ;
 ///
 Parser::DeclGroupPtrTy
-Parser::ParseUsingDeclaration(unsigned Context,
+Parser::ParseUsingDeclaration(DeclaratorContext Context,
                               const ParsedTemplateInfo &TemplateInfo,
                               SourceLocation UsingLoc, SourceLocation &DeclEnd,
                               AccessSpecifier AS) {
@@ -699,7 +701,7 @@
       // "typename" keyword is allowed for identifiers only,
       // because it may be a type definition.
       if (D.TypenameLoc.isValid() &&
-          D.Name.getKind() != UnqualifiedId::IK_Identifier) {
+          D.Name.getKind() != UnqualifiedIdKind::IK_Identifier) {
         Diag(D.Name.getSourceRange().getBegin(),
              diag::err_typename_identifiers_only)
             << FixItHint::CreateRemoval(SourceRange(D.TypenameLoc));
@@ -723,7 +725,7 @@
   }
 
   if (DeclsInGroup.size() > 1)
-    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z ?
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus17 ?
          diag::warn_cxx17_compat_multi_using_declaration :
          diag::ext_multi_using_declaration);
 
@@ -753,7 +755,7 @@
   // Type alias templates cannot be specialized.
   int SpecKind = -1;
   if (TemplateInfo.Kind == ParsedTemplateInfo::Template &&
-      D.Name.getKind() == UnqualifiedId::IK_TemplateId)
+      D.Name.getKind() == UnqualifiedIdKind::IK_TemplateId)
     SpecKind = 0;
   if (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization)
     SpecKind = 1;
@@ -773,7 +775,7 @@
   }
 
   // Name must be an identifier.
-  if (D.Name.getKind() != UnqualifiedId::IK_Identifier) {
+  if (D.Name.getKind() != UnqualifiedIdKind::IK_Identifier) {
     Diag(D.Name.StartLocation, diag::err_alias_declaration_not_identifier);
     // No removal fixit: can't recover from this.
     SkipUntil(tok::semi);
@@ -791,11 +793,11 @@
       << FixItHint::CreateRemoval(SourceRange(D.EllipsisLoc));
 
   Decl *DeclFromDeclSpec = nullptr;
-  TypeResult TypeAlias =
-      ParseTypeName(nullptr,
-                    TemplateInfo.Kind ? Declarator::AliasTemplateContext
-                                      : Declarator::AliasDeclContext,
-                    AS, &DeclFromDeclSpec, &Attrs);
+  TypeResult TypeAlias = ParseTypeName(
+      nullptr,
+      TemplateInfo.Kind ? DeclaratorContext::AliasTemplateContext
+                        : DeclaratorContext::AliasDeclContext,
+      AS, &DeclFromDeclSpec, &Attrs);
   if (OwnedType)
     *OwnedType = DeclFromDeclSpec;
 
@@ -851,10 +853,10 @@
 
   ExprResult AssertMessage;
   if (Tok.is(tok::r_paren)) {
-    Diag(Tok, getLangOpts().CPlusPlus1z
+    Diag(Tok, getLangOpts().CPlusPlus17
                   ? diag::warn_cxx14_compat_static_assert_no_message
                   : diag::ext_static_assert_no_message)
-      << (getLangOpts().CPlusPlus1z
+      << (getLangOpts().CPlusPlus17
               ? FixItHint()
               : FixItHint::CreateInsertion(Tok.getLocation(), ", \"\""));
   } else {
@@ -1094,7 +1096,7 @@
 
     EndLocation = ParseDecltypeSpecifier(DS);
 
-    Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+    Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
     return Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
   }
 
@@ -1195,7 +1197,7 @@
   DS.SetTypeSpecType(TST_typename, IdLoc, PrevSpec, DiagID, Type,
                      Actions.getASTContext().getPrintingPolicy());
 
-  Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
   return Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
 }
 
@@ -1612,14 +1614,15 @@
   //   new struct s;
   // or
   //   &T::operator struct s;
-  // For these, DSC is DSC_type_specifier or DSC_alias_declaration.
+  // For these, DSC is DeclSpecContext::DSC_type_specifier or
+  // DeclSpecContext::DSC_alias_declaration.
 
   // If there are attributes after class name, parse them.
   MaybeParseCXX11Attributes(Attributes);
 
   const PrintingPolicy &Policy = Actions.getASTContext().getPrintingPolicy();
   Sema::TagUseKind TUK;
-  if (DSC == DSC_trailing)
+  if (DSC == DeclSpecContext::DSC_trailing)
     TUK = Sema::TUK_Reference;
   else if (Tok.is(tok::l_brace) ||
            (getLangOpts().CPlusPlus && Tok.is(tok::colon)) ||
@@ -1881,15 +1884,14 @@
     stripTypeAttributesOffDeclSpec(attrs, DS, TUK);
 
     // Declaration or definition of a class type
-    TagOrTempResult = Actions.ActOnTag(getCurScope(), TagType, TUK, StartLoc,
-                                       SS, Name, NameLoc, attrs.getList(), AS,
-                                       DS.getModulePrivateSpecLoc(),
-                                       TParams, Owned, IsDependent,
-                                       SourceLocation(), false,
-                                       clang::TypeResult(),
-                                       DSC == DSC_type_specifier,
-                                       DSC == DSC_template_param ||
-                                       DSC == DSC_template_type_arg, &SkipBody);
+    TagOrTempResult = Actions.ActOnTag(
+        getCurScope(), TagType, TUK, StartLoc, SS, Name, NameLoc,
+        attrs.getList(), AS, DS.getModulePrivateSpecLoc(), TParams, Owned,
+        IsDependent, SourceLocation(), false, clang::TypeResult(),
+        DSC == DeclSpecContext::DSC_type_specifier,
+        DSC == DeclSpecContext::DSC_template_param ||
+            DSC == DeclSpecContext::DSC_template_type_arg,
+        &SkipBody);
 
     // If ActOnTag said the type was dependent, try again with the
     // less common call.
@@ -2509,7 +2511,7 @@
     SourceLocation DeclEnd;
     return DeclGroupPtrTy::make(
         DeclGroupRef(ParseTemplateDeclarationOrSpecialization(
-            Declarator::MemberContext, DeclEnd, AS, AccessAttrs)));
+            DeclaratorContext::MemberContext, DeclEnd, AS, AccessAttrs)));
   }
 
   // Handle:  member-declaration ::= '__extension__' member-declaration
@@ -2545,7 +2547,7 @@
     }
     SourceLocation DeclEnd;
     // Otherwise, it must be a using-declaration or an alias-declaration.
-    return ParseUsingDeclaration(Declarator::MemberContext, TemplateInfo,
+    return ParseUsingDeclaration(DeclaratorContext::MemberContext, TemplateInfo,
                                  UsingLoc, DeclEnd, AS);
   }
 
@@ -2559,7 +2561,7 @@
   if (MalformedTypeSpec)
     DS.SetTypeSpecError();
 
-  ParseDeclarationSpecifiers(DS, TemplateInfo, AS, DSC_class,
+  ParseDeclarationSpecifiers(DS, TemplateInfo, AS, DeclSpecContext::DSC_class,
                              &CommonLateParsedAttrs);
 
   // Turn off colon protection that was set for declspec.
@@ -2569,7 +2571,7 @@
   // may get this far before the problem becomes obvious.
   if (DS.hasTagDefinition() &&
       TemplateInfo.Kind == ParsedTemplateInfo::NonTemplate &&
-      DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_class,
+      DiagnoseMissingSemiAfterTagDefinition(DS, AS, DeclSpecContext::DSC_class,
                                             &CommonLateParsedAttrs))
     return nullptr;
 
@@ -2593,7 +2595,7 @@
     return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
 
-  ParsingDeclarator DeclaratorInfo(*this, DS, Declarator::MemberContext);
+  ParsingDeclarator DeclaratorInfo(*this, DS, DeclaratorContext::MemberContext);
   VirtSpecifiers VS;
 
   // Hold late-parsed attributes so we can attach a Decl to them later.
@@ -2852,7 +2854,7 @@
       break;
 
     if (Tok.isAtStartOfLine() &&
-        !MightBeDeclarator(Declarator::MemberContext)) {
+        !MightBeDeclarator(DeclaratorContext::MemberContext)) {
       // This comma was followed by a line-break and something which can't be
       // the start of a declarator. The comma was probably a typo for a
       // semicolon.
@@ -3195,6 +3197,9 @@
   }
 
   if (Tok.is(tok::colon)) {
+    ParseScope InheritanceScope(this, getCurScope()->getFlags() |
+                                          Scope::ClassInheritanceScope);
+
     ParseBaseClause(TagDecl);
     if (!Tok.is(tok::l_brace)) {
       bool SuggestFixIt = false;
@@ -3622,7 +3627,7 @@
   if (P.getLangOpts().CPlusPlus11) {
     const char *Replacement = IsNoexcept ? "noexcept" : "noexcept(false)";
     P.Diag(Range.getBegin(),
-           P.getLangOpts().CPlusPlus1z && !IsNoexcept
+           P.getLangOpts().CPlusPlus17 && !IsNoexcept
                ? diag::ext_dynamic_exception_spec
                : diag::warn_exception_spec_deprecated)
         << Range;
@@ -3701,12 +3706,15 @@
 
 /// ParseTrailingReturnType - Parse a trailing return type on a new-style
 /// function declaration.
-TypeResult Parser::ParseTrailingReturnType(SourceRange &Range) {
+TypeResult Parser::ParseTrailingReturnType(SourceRange &Range,
+                                           bool MayBeFollowedByDirectInit) {
   assert(Tok.is(tok::arrow) && "expected arrow");
 
   ConsumeToken();
 
-  return ParseTypeName(&Range, Declarator::TrailingReturnContext);
+  return ParseTypeName(&Range, MayBeFollowedByDirectInit
+                                   ? DeclaratorContext::TrailingReturnVarContext
+                                   : DeclaratorContext::TrailingReturnContext);
 }
 
 /// \brief We have just started parsing the definition of a new class,
@@ -3953,7 +3961,7 @@
   SourceLocation CommonScopeLoc;
   IdentifierInfo *CommonScopeName = nullptr;
   if (Tok.is(tok::kw_using)) {
-    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus17
                                 ? diag::warn_cxx14_compat_using_attribute_ns
                                 : diag::ext_using_attribute_ns);
     ConsumeToken();
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 5e0688c..1b8865e 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -66,12 +66,16 @@
 ///         shift-expression '<<' additive-expression
 ///         shift-expression '>>' additive-expression
 ///
-///       relational-expression: [C99 6.5.8]
+///       compare-expression: [C++20 expr.spaceship]
 ///         shift-expression
-///         relational-expression '<' shift-expression
-///         relational-expression '>' shift-expression
-///         relational-expression '<=' shift-expression
-///         relational-expression '>=' shift-expression
+///         compare-expression '<=>' shift-expression
+///
+///       relational-expression: [C99 6.5.8]
+///         compare-expression
+///         relational-expression '<' compare-expression
+///         relational-expression '>' compare-expression
+///         relational-expression '<=' compare-expression
+///         relational-expression '>=' compare-expression
 ///
 ///       equality-expression: [C99 6.5.9]
 ///         relational-expression
@@ -266,11 +270,13 @@
   return false;
 }
 
-static bool isFoldOperator(prec::Level Level) {
-  return Level > prec::Unknown && Level != prec::Conditional;
+bool Parser::isFoldOperator(prec::Level Level) const {
+  return Level > prec::Unknown && Level != prec::Conditional &&
+         Level != prec::Spaceship;
 }
-static bool isFoldOperator(tok::TokenKind Kind) {
-  return isFoldOperator(getBinOpPrecedence(Kind, false, true));
+
+bool Parser::isFoldOperator(tok::TokenKind Kind) const {
+  return isFoldOperator(getBinOpPrecedence(Kind, GreaterThanIsOperator, true));
 }
 
 /// \brief Parse a binary expression that starts with \p LHS and has a
@@ -716,6 +722,7 @@
 ///                   '__is_sealed'                           [MS]
 ///                   '__is_trivial'
 ///                   '__is_union'
+///                   '__has_unique_object_representations'
 ///
 /// [Clang] unary-type-trait:
 ///                   '__is_aggregate'
@@ -993,7 +1000,7 @@
             DS.SetTypeSpecType(TST_typename, ILoc, PrevSpec, DiagID, Typ,
                                Actions.getASTContext().getPrintingPolicy());
             
-            Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+            Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
             TypeResult Ty = Actions.ActOnTypeName(getCurScope(), 
                                                   DeclaratorInfo);
             if (Ty.isInvalid())
@@ -1202,7 +1209,7 @@
                          PrevSpec, DiagID, Type,
                          Actions.getASTContext().getPrintingPolicy());
 
-      Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+      Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
       TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
       if (Ty.isInvalid())
         break;
@@ -1817,7 +1824,7 @@
       if (isTypeIdUnambiguously()) {
         DeclSpec DS(AttrFactory);
         ParseSpecifierQualifierList(DS);
-        Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+        Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
         ParseDeclarator(DeclaratorInfo);
 
         SourceLocation LParenLoc = PP.getLocForEndOfToken(OpTok.getLocation());
@@ -2374,7 +2381,7 @@
     // Parse the type declarator.
     DeclSpec DS(AttrFactory);
     ParseSpecifierQualifierList(DS);
-    Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+    Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
     ParseDeclarator(DeclaratorInfo);
     
     // If our type is followed by an identifier and either ':' or ']', then 
@@ -2731,7 +2738,7 @@
     }
   }
 
-  Diag(EllipsisLoc, getLangOpts().CPlusPlus1z
+  Diag(EllipsisLoc, getLangOpts().CPlusPlus17
                         ? diag::warn_cxx14_compat_fold_expression
                         : diag::ext_fold_expression);
 
@@ -2851,7 +2858,7 @@
   ParseSpecifierQualifierList(DS);
 
   // Parse the block-declarator.
-  Declarator DeclaratorInfo(DS, Declarator::BlockLiteralContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::BlockLiteralContext);
   DeclaratorInfo.setFunctionDefinitionKind(FDK_Definition);
   ParseDeclarator(DeclaratorInfo);
 
@@ -2890,7 +2897,7 @@
 
   // Parse the return type if present.
   DeclSpec DS(AttrFactory);
-  Declarator ParamInfo(DS, Declarator::BlockLiteralContext);
+  Declarator ParamInfo(DS, DeclaratorContext::BlockLiteralContext);
   ParamInfo.setFunctionDefinitionKind(FDK_Definition);
   // FIXME: Since the return type isn't actually parsed, it can't be used to
   // fill ParamInfo with an initial valid range, so do it manually.
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index a640439..ecb36fe 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -249,7 +249,7 @@
 
       if (Tok.is(tok::code_completion)) {
         // Code completion for a nested-name-specifier, where the code
-        // code completion token follows the '::'.
+        // completion token follows the '::'.
         Actions.CodeCompleteQualifiedId(getCurScope(), SS, EnteringContext);
         // Include code completion token into the range of the scope otherwise
         // when we try to annotate the scope tokens the dangling code completion
@@ -292,8 +292,8 @@
           break;
         }
 
-        if (TemplateName.getKind() != UnqualifiedId::IK_OperatorFunctionId &&
-            TemplateName.getKind() != UnqualifiedId::IK_LiteralOperatorId) {
+        if (TemplateName.getKind() != UnqualifiedIdKind::IK_OperatorFunctionId &&
+            TemplateName.getKind() != UnqualifiedIdKind::IK_LiteralOperatorId) {
           Diag(TemplateName.getSourceRange().getBegin(),
                diag::err_id_after_template_in_nested_name_spec)
             << TemplateName.getSourceRange();
@@ -991,27 +991,34 @@
 ///
 /// Returns true if it hit something unexpected.
 bool Parser::TryParseLambdaIntroducer(LambdaIntroducer &Intro) {
-  TentativeParsingAction PA(*this);
+  {
+    bool SkippedInits = false;
+    TentativeParsingAction PA1(*this);
 
-  bool SkippedInits = false;
-  Optional<unsigned> DiagID(ParseLambdaIntroducer(Intro, &SkippedInits));
+    if (ParseLambdaIntroducer(Intro, &SkippedInits)) {
+      PA1.Revert();
+      return true;
+    }
 
-  if (DiagID) {
-    PA.Revert();
-    return true;
+    if (!SkippedInits) {
+      PA1.Commit();
+      return false;
+    }
+
+    PA1.Revert();
   }
 
-  if (SkippedInits) {
-    // Parse it again, but this time parse the init-captures too.
-    PA.Revert();
-    Intro = LambdaIntroducer();
-    DiagID = ParseLambdaIntroducer(Intro);
-    assert(!DiagID && "parsing lambda-introducer failed on reparse");
+  // Try to parse it again, but this time parse the init-captures too.
+  Intro = LambdaIntroducer();
+  TentativeParsingAction PA2(*this);
+
+  if (!ParseLambdaIntroducer(Intro)) {
+    PA2.Commit();
     return false;
   }
 
-  PA.Commit();
-  return false;
+  PA2.Revert();
+  return true;
 }
 
 static void
@@ -1055,7 +1062,7 @@
 addConstexprToLambdaDeclSpecifier(Parser &P, SourceLocation ConstexprLoc,
                                   DeclSpec &DS) {
   if (ConstexprLoc.isValid()) {
-    P.Diag(ConstexprLoc, !P.getLangOpts().CPlusPlus1z
+    P.Diag(ConstexprLoc, !P.getLangOpts().CPlusPlus17
                              ? diag::ext_constexpr_on_lambda_cxx17
                              : diag::warn_cxx14_compat_constexpr_on_lambda);
     const char *PrevSpec = nullptr;
@@ -1083,7 +1090,7 @@
 
   // Parse lambda-declarator[opt].
   DeclSpec DS(AttrFactory);
-  Declarator D(DS, Declarator::LambdaExprContext);
+  Declarator D(DS, DeclaratorContext::LambdaExprContext);
   TemplateParameterDepthRAII CurTemplateDepthTracker(TemplateParameterDepth);
   Actions.PushLambdaScope();
 
@@ -1176,7 +1183,8 @@
     if (Tok.is(tok::arrow)) {
       FunLocalRangeEnd = Tok.getLocation();
       SourceRange Range;
-      TrailingReturnType = ParseTrailingReturnType(Range);
+      TrailingReturnType =
+          ParseTrailingReturnType(Range, /*MayBeFollowedByDirectInit*/ false);
       if (Range.getEnd().isValid())
         DeclEndLoc = Range.getEnd();
     }
@@ -1246,7 +1254,8 @@
     // Parse the return type, if there is one.
     if (Tok.is(tok::arrow)) {
       SourceRange Range;
-      TrailingReturnType = ParseTrailingReturnType(Range);
+      TrailingReturnType =
+          ParseTrailingReturnType(Range, /*MayBeFollowedByDirectInit*/ false);
       if (Range.getEnd().isValid())
         DeclEndLoc = Range.getEnd();
     }
@@ -1346,7 +1355,7 @@
   ParseSpecifierQualifierList(DS);
 
   // Parse the abstract-declarator, if present.
-  Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
   ParseDeclarator(DeclaratorInfo);
 
   SourceLocation RAngleBracketLoc = Tok.getLocation();
@@ -1653,7 +1662,7 @@
 /// In C++1z onwards, the type specifier can also be a template-name.
 ExprResult
 Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) {
-  Declarator DeclaratorInfo(DS, Declarator::FunctionalCastContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::FunctionalCastContext);
   ParsedType TypeRep = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo).get();
 
   assert((Tok.is(tok::l_paren) ||
@@ -1665,9 +1674,9 @@
     if (Init.isInvalid())
       return Init;
     Expr *InitList = Init.get();
-    return Actions.ActOnCXXTypeConstructExpr(TypeRep, SourceLocation(),
-                                             MultiExprArg(&InitList, 1),
-                                             SourceLocation());
+    return Actions.ActOnCXXTypeConstructExpr(
+        TypeRep, InitList->getLocStart(), MultiExprArg(&InitList, 1),
+        InitList->getLocEnd(), /*ListInitialization=*/true);
   } else {
     BalancedDelimiterTracker T(*this, tok::l_paren);
     T.consumeOpen();
@@ -1695,9 +1704,9 @@
 
     assert((Exprs.size() == 0 || Exprs.size()-1 == CommaLocs.size())&&
            "Unexpected number of commas!");
-    return Actions.ActOnCXXTypeConstructExpr(TypeRep, T.getOpenLocation(), 
-                                             Exprs,
-                                             T.getCloseLocation());
+    return Actions.ActOnCXXTypeConstructExpr(TypeRep, T.getOpenLocation(),
+                                             Exprs, T.getCloseLocation(),
+                                             /*ListInitialization=*/false);
   }
 }
 
@@ -1708,6 +1717,8 @@
 ///         type-specifier-seq declarator '=' assignment-expression
 /// [C++11] type-specifier-seq declarator '=' initializer-clause
 /// [C++11] type-specifier-seq declarator braced-init-list
+/// [Clang] type-specifier-seq ref-qualifier[opt] '[' identifier-list ']'
+///             brace-or-equal-initializer
 /// [GNU]   type-specifier-seq declarator simple-asm-expr[opt] attributes[opt]
 ///             '=' assignment-expression
 ///
@@ -1753,13 +1764,14 @@
   }
 
   case ConditionOrInitStatement::InitStmtDecl: {
-    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus17
                                 ? diag::warn_cxx14_compat_init_statement
                                 : diag::ext_init_statement)
         << (CK == Sema::ConditionKind::Switch);
     SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
-    DeclGroupPtrTy DG = ParseSimpleDeclaration(
-        Declarator::InitStmtContext, DeclEnd, attrs, /*RequireSemi=*/true);
+    DeclGroupPtrTy DG =
+        ParseSimpleDeclaration(DeclaratorContext::InitStmtContext, DeclEnd,
+                               attrs, /*RequireSemi=*/true);
     *InitStmt = Actions.ActOnDeclStmt(DG, DeclStart, DeclEnd);
     return ParseCXXCondition(nullptr, Loc, CK);
   }
@@ -1772,10 +1784,10 @@
   // type-specifier-seq
   DeclSpec DS(AttrFactory);
   DS.takeAttributesFrom(attrs);
-  ParseSpecifierQualifierList(DS, AS_none, DSC_condition);
+  ParseSpecifierQualifierList(DS, AS_none, DeclSpecContext::DSC_condition);
 
   // declarator
-  Declarator DeclaratorInfo(DS, Declarator::ConditionContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::ConditionContext);
   ParseDeclarator(DeclaratorInfo);
 
   // simple-asm-expr[opt]
@@ -1973,7 +1985,7 @@
 ///     type-specifier type-specifier-seq[opt]
 ///
 bool Parser::ParseCXXTypeSpecifierSeq(DeclSpec &DS) {
-  ParseSpecifierQualifierList(DS, AS_none, DSC_type_specifier);
+  ParseSpecifierQualifierList(DS, AS_none, DeclSpecContext::DSC_type_specifier);
   DS.Finish(Actions, Actions.getASTContext().getPrintingPolicy());
   return false;
 }
@@ -2024,9 +2036,9 @@
   TemplateTy Template;
   TemplateNameKind TNK = TNK_Non_template;
   switch (Id.getKind()) {
-  case UnqualifiedId::IK_Identifier:
-  case UnqualifiedId::IK_OperatorFunctionId:
-  case UnqualifiedId::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_Identifier:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
     if (AssumeTemplateId) {
       // We defer the injected-class-name checks until we've found whether
       // this template-id is used to form a nested-name-specifier or not.
@@ -2049,11 +2061,11 @@
         // parse correctly as a template, so suggest the keyword 'template'
         // before 'getAs' and treat this as a dependent template name.
         std::string Name;
-        if (Id.getKind() == UnqualifiedId::IK_Identifier)
+        if (Id.getKind() == UnqualifiedIdKind::IK_Identifier)
           Name = Id.Identifier->getName();
         else {
           Name = "operator ";
-          if (Id.getKind() == UnqualifiedId::IK_OperatorFunctionId)
+          if (Id.getKind() == UnqualifiedIdKind::IK_OperatorFunctionId)
             Name += getOperatorSpelling(Id.OperatorFunctionId.Operator);
           else
             Name += Id.Identifier->getName();
@@ -2070,7 +2082,7 @@
     }
     break;
       
-  case UnqualifiedId::IK_ConstructorName: {
+  case UnqualifiedIdKind::IK_ConstructorName: {
     UnqualifiedId TemplateName;
     bool MemberOfUnknownSpecialization;
     TemplateName.setIdentifier(Name, NameLoc);
@@ -2081,7 +2093,7 @@
     break;
   }
       
-  case UnqualifiedId::IK_DestructorName: {
+  case UnqualifiedIdKind::IK_DestructorName: {
     UnqualifiedId TemplateName;
     bool MemberOfUnknownSpecialization;
     TemplateName.setIdentifier(Name, NameLoc);
@@ -2120,18 +2132,20 @@
                                true, LAngleLoc, TemplateArgs, RAngleLoc))
     return true;
   
-  if (Id.getKind() == UnqualifiedId::IK_Identifier ||
-      Id.getKind() == UnqualifiedId::IK_OperatorFunctionId ||
-      Id.getKind() == UnqualifiedId::IK_LiteralOperatorId) {
+  if (Id.getKind() == UnqualifiedIdKind::IK_Identifier ||
+      Id.getKind() == UnqualifiedIdKind::IK_OperatorFunctionId ||
+      Id.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId) {
     // Form a parsed representation of the template-id to be stored in the
     // UnqualifiedId.
 
     // FIXME: Store name for literal operator too.
     IdentifierInfo *TemplateII =
-        Id.getKind() == UnqualifiedId::IK_Identifier ? Id.Identifier : nullptr;
-    OverloadedOperatorKind OpKind = Id.getKind() == UnqualifiedId::IK_Identifier
-                                        ? OO_None
-                                        : Id.OperatorFunctionId.Operator;
+        Id.getKind() == UnqualifiedIdKind::IK_Identifier ? Id.Identifier
+                                                         : nullptr;
+    OverloadedOperatorKind OpKind =
+        Id.getKind() == UnqualifiedIdKind::IK_Identifier
+            ? OO_None
+            : Id.OperatorFunctionId.Operator;
 
     TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create(
         SS, TemplateKWLoc, Id.StartLocation, TemplateII, OpKind, Template, TNK,
@@ -2153,7 +2167,7 @@
   if (Type.isInvalid())
     return true;
   
-  if (Id.getKind() == UnqualifiedId::IK_ConstructorName)
+  if (Id.getKind() == UnqualifiedIdKind::IK_ConstructorName)
     Id.setConstructorName(Type.get(), NameLoc, RAngleLoc);
   else
     Id.setDestructorName(Id.StartLocation, Type.get(), RAngleLoc);
@@ -2177,7 +2191,7 @@
 ///            !     =    <  >    += -=   *=  /=  %=
 ///            ^=    &=   |= <<   >> >>= <<=  ==  !=
 ///            <=    >=   && ||   ++ --   ,   ->* ->
-///            ()    []
+///            ()    []   <=>
 ///
 ///       conversion-function-id: [C++ 12.3.2]
 ///         operator conversion-type-id
@@ -2386,7 +2400,7 @@
   
   // Parse the conversion-declarator, which is merely a sequence of
   // ptr-operators.
-  Declarator D(DS, Declarator::ConversionIdContext);
+  Declarator D(DS, DeclaratorContext::ConversionIdContext);
   ParseDeclaratorInternal(D, /*DirectDeclParser=*/nullptr);
 
   // Finish up the type.
@@ -2473,7 +2487,7 @@
                                           /*IsCtorOrDtorName=*/true,
                                           /*NonTrivialTypeSourceInfo=*/true);
       Result.setConstructorName(Ty, IdLoc, IdLoc);
-    } else if (getLangOpts().CPlusPlus1z &&
+    } else if (getLangOpts().CPlusPlus17 &&
                AllowDeductionGuide && SS.isEmpty() &&
                Actions.isDeductionGuideName(getCurScope(), *Id, IdLoc,
                                             &TemplateName)) {
@@ -2547,8 +2561,8 @@
     // 
     //   template-id:
     //     operator-function-id < template-argument-list[opt] >
-    if ((Result.getKind() == UnqualifiedId::IK_OperatorFunctionId ||
-         Result.getKind() == UnqualifiedId::IK_LiteralOperatorId) &&
+    if ((Result.getKind() == UnqualifiedIdKind::IK_OperatorFunctionId ||
+         Result.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId) &&
         (TemplateSpecified || Tok.is(tok::less)))
       return ParseUnqualifiedIdTemplateId(SS, TemplateKWLoc,
                                           nullptr, SourceLocation(),
@@ -2687,7 +2701,7 @@
 
   SourceRange TypeIdParens;
   DeclSpec DS(AttrFactory);
-  Declarator DeclaratorInfo(DS, Declarator::CXXNewContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::CXXNewContext);
   if (Tok.is(tok::l_paren)) {
     // If it turns out to be a placement, we change the type location.
     BalancedDelimiterTracker T(*this, tok::l_paren);
@@ -3163,7 +3177,7 @@
   if (ParseAs >= CompoundLiteral) {
     // Parse the type declarator.
     DeclSpec DS(AttrFactory);
-    Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+    Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
     {
       ColonProtectionRAIIObject InnerColonProtection(*this);
       ParseSpecifierQualifierList(DS);
diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp
index edf1da1..13f968f 100644
--- a/lib/Parse/ParseObjc.cpp
+++ b/lib/Parse/ParseObjc.cpp
@@ -45,7 +45,8 @@
 /// [OBJC]  objc-protocol-definition
 /// [OBJC]  objc-method-definition
 /// [OBJC]  '@' 'end'
-Parser::DeclGroupPtrTy Parser::ParseObjCAtDirectives() {
+Parser::DeclGroupPtrTy
+Parser::ParseObjCAtDirectives(ParsedAttributesWithRange &Attrs) {
   SourceLocation AtLoc = ConsumeToken(); // the "@"
 
   if (Tok.is(tok::code_completion)) {
@@ -58,15 +59,11 @@
   switch (Tok.getObjCKeywordID()) {
   case tok::objc_class:
     return ParseObjCAtClassDeclaration(AtLoc);
-  case tok::objc_interface: {
-    ParsedAttributes attrs(AttrFactory);
-    SingleDecl = ParseObjCAtInterfaceDeclaration(AtLoc, attrs);
+  case tok::objc_interface:
+    SingleDecl = ParseObjCAtInterfaceDeclaration(AtLoc, Attrs);
     break;
-  }
-  case tok::objc_protocol: {
-    ParsedAttributes attrs(AttrFactory);
-    return ParseObjCAtProtocolDeclaration(AtLoc, attrs);
-  }
+  case tok::objc_protocol:
+    return ParseObjCAtProtocolDeclaration(AtLoc, Attrs);
   case tok::objc_implementation:
     return ParseObjCAtImplementationDeclaration(AtLoc);
   case tok::objc_end:
@@ -81,8 +78,10 @@
     SingleDecl = ParseObjCPropertyDynamic(AtLoc);
     break;
   case tok::objc_import:
-    if (getLangOpts().Modules || getLangOpts().DebuggerSupport)
-      return ParseModuleImport(AtLoc);
+    if (getLangOpts().Modules || getLangOpts().DebuggerSupport) {
+      SingleDecl = ParseModuleImport(AtLoc);
+      break;
+    }
     Diag(AtLoc, diag::err_atimport);
     SkipUntil(tok::semi);
     return Actions.ConvertDeclToDeclGroup(nullptr);
@@ -1138,14 +1137,14 @@
 ///     'null_unspecified'
 ///
 void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
-                                        Declarator::TheContext Context) {
-  assert(Context == Declarator::ObjCParameterContext ||
-         Context == Declarator::ObjCResultContext);
+                                        DeclaratorContext Context) {
+  assert(Context == DeclaratorContext::ObjCParameterContext ||
+         Context == DeclaratorContext::ObjCResultContext);
 
   while (1) {
     if (Tok.is(tok::code_completion)) {
       Actions.CodeCompleteObjCPassingType(getCurScope(), DS, 
-                          Context == Declarator::ObjCParameterContext);
+                          Context == DeclaratorContext::ObjCParameterContext);
       return cutOffParsing();
     }
     
@@ -1240,12 +1239,12 @@
 ///     '(' objc-type-qualifiers[opt] ')'
 ///
 ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS, 
-                                     Declarator::TheContext context,
+                                     DeclaratorContext context,
                                      ParsedAttributes *paramAttrs) {
-  assert(context == Declarator::ObjCParameterContext ||
-         context == Declarator::ObjCResultContext);
+  assert(context == DeclaratorContext::ObjCParameterContext ||
+         context == DeclaratorContext::ObjCResultContext);
   assert((paramAttrs != nullptr) ==
-         (context == Declarator::ObjCParameterContext));
+         (context == DeclaratorContext::ObjCParameterContext));
 
   assert(Tok.is(tok::l_paren) && "expected (");
 
@@ -1263,9 +1262,9 @@
     // Parse an abstract declarator.
     DeclSpec declSpec(AttrFactory);
     declSpec.setObjCQualifiers(&DS);
-    DeclSpecContext dsContext = DSC_normal;
-    if (context == Declarator::ObjCResultContext)
-      dsContext = DSC_objc_method_result;
+    DeclSpecContext dsContext = DeclSpecContext::DSC_normal;
+    if (context == DeclaratorContext::ObjCResultContext)
+      dsContext = DeclSpecContext::DSC_objc_method_result;
     ParseSpecifierQualifierList(declSpec, AS_none, dsContext);
     Declarator declarator(declSpec, context);
     ParseDeclarator(declarator);
@@ -1286,7 +1285,7 @@
 
       // If we're parsing a parameter, steal all the decl attributes
       // and add them to the decl spec.
-      if (context == Declarator::ObjCParameterContext)
+      if (context == DeclaratorContext::ObjCParameterContext)
         takeDeclAttributes(*paramAttrs, declarator);
     }
   }
@@ -1350,13 +1349,14 @@
   ParsedType ReturnType;
   ObjCDeclSpec DSRet;
   if (Tok.is(tok::l_paren))
-    ReturnType = ParseObjCTypeName(DSRet, Declarator::ObjCResultContext,
+    ReturnType = ParseObjCTypeName(DSRet, DeclaratorContext::ObjCResultContext,
                                    nullptr);
 
   // If attributes exist before the method, parse them.
   ParsedAttributes methodAttrs(AttrFactory);
   if (getLangOpts().ObjC2)
     MaybeParseGNUAttributes(methodAttrs);
+  MaybeParseCXX11Attributes(methodAttrs);
 
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteObjCMethodDecl(getCurScope(), mType == tok::minus, 
@@ -1383,6 +1383,7 @@
     // If attributes exist after the method, parse them.
     if (getLangOpts().ObjC2)
       MaybeParseGNUAttributes(methodAttrs);
+    MaybeParseCXX11Attributes(methodAttrs);
 
     Selector Sel = PP.getSelectorTable().getNullarySelector(SelIdent);
     Decl *Result
@@ -1414,16 +1415,15 @@
     ArgInfo.Type = nullptr;
     if (Tok.is(tok::l_paren)) // Parse the argument type if present.
       ArgInfo.Type = ParseObjCTypeName(ArgInfo.DeclSpec,
-                                       Declarator::ObjCParameterContext,
+                                       DeclaratorContext::ObjCParameterContext,
                                        &paramAttrs);
 
     // If attributes exist before the argument name, parse them.
     // Regardless, collect all the attributes we've parsed so far.
-    ArgInfo.ArgAttrs = nullptr;
-    if (getLangOpts().ObjC2) {
+    if (getLangOpts().ObjC2)
       MaybeParseGNUAttributes(paramAttrs);
-      ArgInfo.ArgAttrs = paramAttrs.getList();
-    }
+    MaybeParseCXX11Attributes(paramAttrs);
+    ArgInfo.ArgAttrs = paramAttrs.getList();
 
     // Code completion for the next piece of the selector.
     if (Tok.is(tok::code_completion)) {
@@ -1492,7 +1492,7 @@
     DeclSpec DS(AttrFactory);
     ParseDeclarationSpecifiers(DS);
     // Parse the declarator.
-    Declarator ParmDecl(DS, Declarator::PrototypeContext);
+    Declarator ParmDecl(DS, DeclaratorContext::PrototypeContext);
     ParseDeclarator(ParmDecl);
     IdentifierInfo *ParmII = ParmDecl.getIdentifier();
     Decl *Param = Actions.ActOnParamDeclarator(getCurScope(), ParmDecl);
@@ -1506,7 +1506,8 @@
   // If attributes exist after the method, parse them.
   if (getLangOpts().ObjC2)
     MaybeParseGNUAttributes(methodAttrs);
-  
+  MaybeParseCXX11Attributes(methodAttrs);
+
   if (KeyIdents.size() == 0)
     return nullptr;
 
@@ -1701,7 +1702,7 @@
                          typeArg, Actions.getASTContext().getPrintingPolicy());
 
       // Form a declarator to turn this into a type.
-      Declarator D(DS, Declarator::TypeNameContext);
+      Declarator D(DS, DeclaratorContext::TypeNameContext);
       TypeResult fullTypeArg = Actions.ActOnTypeName(getCurScope(), D);
       if (fullTypeArg.isUsable()) {
         typeArgs.push_back(fullTypeArg.get());
@@ -2541,7 +2542,7 @@
         if (Tok.isNot(tok::ellipsis)) {
           DeclSpec DS(AttrFactory);
           ParseDeclarationSpecifiers(DS);
-          Declarator ParmDecl(DS, Declarator::ObjCCatchContext);
+          Declarator ParmDecl(DS, DeclaratorContext::ObjCCatchContext);
           ParseDeclarator(ParmDecl);
 
           // Inform the actions module about the declarator, so it
@@ -2943,7 +2944,7 @@
   // We have a class message. Turn the simple-type-specifier or
   // typename-specifier we parsed into a type and parse the
   // remainder of the class message.
-  Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+  Declarator DeclaratorInfo(DS, DeclaratorContext::TypeNameContext);
   TypeResult Type = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
   if (Type.isInvalid())
     return true;
diff --git a/lib/Parse/ParseOpenMP.cpp b/lib/Parse/ParseOpenMP.cpp
index e1685f6..d34fad8 100644
--- a/lib/Parse/ParseOpenMP.cpp
+++ b/lib/Parse/ParseOpenMP.cpp
@@ -250,7 +250,8 @@
   do {
     ColonProtectionRAIIObject ColonRAII(*this);
     SourceRange Range;
-    TypeResult TR = ParseTypeName(&Range, Declarator::PrototypeContext, AS);
+    TypeResult TR =
+        ParseTypeName(&Range, DeclaratorContext::PrototypeContext, AS);
     if (TR.isUsable()) {
       auto ReductionType =
           Actions.ActOnOpenMPDeclareReductionType(Range.getBegin(), TR);
@@ -850,7 +851,7 @@
   case OMPD_target_teams_distribute_parallel_for_simd:
   case OMPD_target_teams_distribute_simd:
     Diag(Tok, diag::err_omp_unexpected_directive)
-        << getOpenMPDirectiveName(DKind);
+        << 1 << getOpenMPDirectiveName(DKind);
     break;
   }
   while (Tok.isNot(tok::annot_pragma_openmp_end))
@@ -1079,12 +1080,18 @@
     StmtResult AssociatedStmt;
     if (HasAssociatedStatement) {
       // The body is a block scope like in Lambdas and Blocks.
-      Sema::CompoundScopeRAII CompoundScope(Actions);
       Actions.ActOnOpenMPRegionStart(DKind, getCurScope());
-      Actions.ActOnStartOfCompoundStmt();
-      // Parse statement
-      AssociatedStmt = ParseStatement();
-      Actions.ActOnFinishOfCompoundStmt();
+      // FIXME: We create a bogus CompoundStmt scope to hold the contents of
+      // the captured region. Code elsewhere assumes that any FunctionScopeInfo
+      // should have at least one compound statement scope within it.
+      AssociatedStmt = (Sema::CompoundScopeRAII(Actions), ParseStatement());
+      AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
+    } else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data ||
+               DKind == OMPD_target_exit_data) {
+      Actions.ActOnOpenMPRegionStart(DKind, getCurScope());
+      AssociatedStmt = (Sema::CompoundScopeRAII(Actions),
+                        Actions.ActOnCompoundStmt(Loc, Loc, llvm::None,
+                                                  /*isStmtExpr=*/false));
       AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
     }
     Directive = Actions.ActOnOpenMPExecutableDirective(
@@ -1100,7 +1107,7 @@
   case OMPD_declare_target:
   case OMPD_end_declare_target:
     Diag(Tok, diag::err_omp_unexpected_directive)
-        << getOpenMPDirectiveName(DKind);
+        << 1 << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::annot_pragma_openmp_end);
     break;
   case OMPD_unknown:
@@ -1195,11 +1202,13 @@
                                      OpenMPClauseKind CKind, bool FirstClause) {
   OMPClause *Clause = nullptr;
   bool ErrorFound = false;
+  bool WrongDirective = false;
   // Check if clause is allowed for the given directive.
   if (CKind != OMPC_unknown && !isAllowedClauseForDirective(DKind, CKind)) {
     Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind)
                                                << getOpenMPDirectiveName(DKind);
     ErrorFound = true;
+    WrongDirective = true;
   }
 
   switch (CKind) {
@@ -1243,9 +1252,9 @@
     }
 
     if (CKind == OMPC_ordered && PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
-      Clause = ParseOpenMPClause(CKind);
+      Clause = ParseOpenMPClause(CKind, WrongDirective);
     else
-      Clause = ParseOpenMPSingleExprClause(CKind);
+      Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);
     break;
   case OMPC_default:
   case OMPC_proc_bind:
@@ -1260,7 +1269,7 @@
       ErrorFound = true;
     }
 
-    Clause = ParseOpenMPSimpleClause(CKind);
+    Clause = ParseOpenMPSimpleClause(CKind, WrongDirective);
     break;
   case OMPC_schedule:
   case OMPC_dist_schedule:
@@ -1277,7 +1286,7 @@
     LLVM_FALLTHROUGH;
 
   case OMPC_if:
-    Clause = ParseOpenMPSingleExprWithArgClause(CKind);
+    Clause = ParseOpenMPSingleExprWithArgClause(CKind, WrongDirective);
     break;
   case OMPC_nowait:
   case OMPC_untied:
@@ -1300,7 +1309,7 @@
       ErrorFound = true;
     }
 
-    Clause = ParseOpenMPClause(CKind);
+    Clause = ParseOpenMPClause(CKind, WrongDirective);
     break;
   case OMPC_private:
   case OMPC_firstprivate:
@@ -1320,7 +1329,7 @@
   case OMPC_from:
   case OMPC_use_device_ptr:
   case OMPC_is_device_ptr:
-    Clause = ParseOpenMPVarListClause(DKind, CKind);
+    Clause = ParseOpenMPVarListClause(DKind, CKind, WrongDirective);
     break;
   case OMPC_unknown:
     Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
@@ -1329,8 +1338,9 @@
     break;
   case OMPC_threadprivate:
   case OMPC_uniform:
-    Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind)
-                                               << getOpenMPDirectiveName(DKind);
+    if (!WrongDirective)
+      Diag(Tok, diag::err_omp_unexpected_clause)
+          << getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::comma, tok::annot_pragma_openmp_end, StopBeforeMatch);
     break;
   }
@@ -1390,7 +1400,8 @@
 ///    hint-clause:
 ///      'hint' '(' expression ')'
 ///
-OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind) {
+OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind,
+                                               bool ParseOnly) {
   SourceLocation Loc = ConsumeToken();
   SourceLocation LLoc = Tok.getLocation();
   SourceLocation RLoc;
@@ -1400,6 +1411,8 @@
   if (Val.isInvalid())
     return nullptr;
 
+  if (ParseOnly)
+    return nullptr;
   return Actions.ActOnOpenMPSingleExprClause(Kind, Val.get(), Loc, LLoc, RLoc);
 }
 
@@ -1411,7 +1424,8 @@
 ///    proc_bind-clause:
 ///         'proc_bind' '(' 'master' | 'close' | 'spread' ')
 ///
-OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind) {
+OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind,
+                                           bool ParseOnly) {
   SourceLocation Loc = Tok.getLocation();
   SourceLocation LOpen = ConsumeToken();
   // Parse '('.
@@ -1430,6 +1444,8 @@
   // Parse ')'.
   T.consumeClose();
 
+  if (ParseOnly)
+    return nullptr;
   return Actions.ActOnOpenMPSimpleClause(Kind, Type, TypeLoc, LOpen, Loc,
                                          Tok.getLocation());
 }
@@ -1460,10 +1476,12 @@
 ///    nogroup-clause:
 ///         'nogroup'
 ///
-OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind) {
+OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind, bool ParseOnly) {
   SourceLocation Loc = Tok.getLocation();
   ConsumeAnyToken();
 
+  if (ParseOnly)
+    return nullptr;
   return Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation());
 }
 
@@ -1481,7 +1499,8 @@
 ///    defaultmap:
 ///      'defaultmap' '(' modifier ':' kind ')'
 ///
-OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
+OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind,
+                                                      bool ParseOnly) {
   SourceLocation Loc = ConsumeToken();
   SourceLocation DelimLoc;
   // Parse '('.
@@ -1603,6 +1622,8 @@
   if (NeedAnExpression && Val.isInvalid())
     return nullptr;
 
+  if (ParseOnly)
+    return nullptr;
   return Actions.ActOnOpenMPSingleExprWithArgClause(
       Kind, Arg, Val.get(), Loc, T.getOpenLocation(), KLoc, DelimLoc,
       T.getCloseLocation());
@@ -1930,7 +1951,8 @@
 ///  modifier(list)
 /// where modifier is 'val' (C) or 'ref', 'val' or 'uval'(C++).
 OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
-                                            OpenMPClauseKind Kind) {
+                                            OpenMPClauseKind Kind,
+                                            bool ParseOnly) {
   SourceLocation Loc = Tok.getLocation();
   SourceLocation LOpen = ConsumeToken();
   SmallVector<Expr *, 4> Vars;
@@ -1939,6 +1961,8 @@
   if (ParseOpenMPVarList(DKind, Kind, Vars, Data))
     return nullptr;
 
+  if (ParseOnly)
+    return nullptr;
   return Actions.ActOnOpenMPVarListClause(
       Kind, Vars, Data.TailExpr, Loc, LOpen, Data.ColonLoc, Tok.getLocation(),
       Data.ReductionIdScopeSpec, Data.ReductionId, Data.DepKind, Data.LinKind,
diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp
index 198d5c6..e815ea1 100644
--- a/lib/Parse/ParsePragma.cpp
+++ b/lib/Parse/ParsePragma.cpp
@@ -95,6 +95,44 @@
                     Token &FirstToken) override;
 };
 
+// Pragma STDC implementations.
+
+/// PragmaSTDC_FENV_ACCESSHandler - "\#pragma STDC FENV_ACCESS ...".
+struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler {
+  PragmaSTDC_FENV_ACCESSHandler() : PragmaHandler("FENV_ACCESS") {}
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &Tok) override {
+    tok::OnOffSwitch OOS;
+    if (PP.LexOnOffSwitch(OOS))
+     return;
+    if (OOS == tok::OOS_ON)
+      PP.Diag(Tok, diag::warn_stdc_fenv_access_not_supported);
+  }
+};
+
+/// PragmaSTDC_CX_LIMITED_RANGEHandler - "\#pragma STDC CX_LIMITED_RANGE ...".
+struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler {
+  PragmaSTDC_CX_LIMITED_RANGEHandler() : PragmaHandler("CX_LIMITED_RANGE") {}
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &Tok) override {
+    tok::OnOffSwitch OOS;
+    PP.LexOnOffSwitch(OOS);
+  }
+};
+
+/// PragmaSTDC_UnknownHandler - "\#pragma STDC ...".
+struct PragmaSTDC_UnknownHandler : public PragmaHandler {
+  PragmaSTDC_UnknownHandler() = default;
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &UnknownTok) override {
+    // C99 6.10.6p2, unknown forms are not allowed.
+    PP.Diag(UnknownTok, diag::ext_stdc_pragma_ignored);
+  }
+};
+
 struct PragmaFPHandler : public PragmaHandler {
   PragmaFPHandler() : PragmaHandler("fp") {}
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
@@ -233,6 +271,15 @@
   FPContractHandler.reset(new PragmaFPContractHandler());
   PP.AddPragmaHandler("STDC", FPContractHandler.get());
 
+  STDCFENVHandler.reset(new PragmaSTDC_FENV_ACCESSHandler());
+  PP.AddPragmaHandler("STDC", STDCFENVHandler.get());
+
+  STDCCXLIMITHandler.reset(new PragmaSTDC_CX_LIMITED_RANGEHandler());
+  PP.AddPragmaHandler("STDC", STDCCXLIMITHandler.get());
+
+  STDCUnknownHandler.reset(new PragmaSTDC_UnknownHandler());
+  PP.AddPragmaHandler("STDC", STDCUnknownHandler.get());
+
   PCSectionHandler.reset(new PragmaClangSectionHandler(Actions));
   PP.AddPragmaHandler("clang", PCSectionHandler.get());
 
@@ -248,7 +295,8 @@
     OpenMPHandler.reset(new PragmaNoOpenMPHandler());
   PP.AddPragmaHandler(OpenMPHandler.get());
 
-  if (getLangOpts().MicrosoftExt || getTargetInfo().getTriple().isPS4()) {
+  if (getLangOpts().MicrosoftExt ||
+      getTargetInfo().getTriple().isOSBinFormatELF()) {
     MSCommentHandler.reset(new PragmaCommentHandler(Actions));
     PP.AddPragmaHandler(MSCommentHandler.get());
   }
@@ -330,7 +378,8 @@
   PP.RemovePragmaHandler(OpenMPHandler.get());
   OpenMPHandler.reset();
 
-  if (getLangOpts().MicrosoftExt || getTargetInfo().getTriple().isPS4()) {
+  if (getLangOpts().MicrosoftExt ||
+      getTargetInfo().getTriple().isOSBinFormatELF()) {
     PP.RemovePragmaHandler(MSCommentHandler.get());
     MSCommentHandler.reset();
   }
@@ -371,6 +420,15 @@
   PP.RemovePragmaHandler("STDC", FPContractHandler.get());
   FPContractHandler.reset();
 
+  PP.RemovePragmaHandler("STDC", STDCFENVHandler.get());
+  STDCFENVHandler.reset();
+
+  PP.RemovePragmaHandler("STDC", STDCCXLIMITHandler.get());
+  STDCCXLIMITHandler.reset();
+
+  PP.RemovePragmaHandler("STDC", STDCUnknownHandler.get());
+  STDCUnknownHandler.reset();
+
   PP.RemovePragmaHandler("clang", OptimizeHandler.get());
   OptimizeHandler.reset();
 
@@ -451,8 +509,10 @@
   Sema::PragmaOptionsAlignKind Kind =
     static_cast<Sema::PragmaOptionsAlignKind>(
     reinterpret_cast<uintptr_t>(Tok.getAnnotationValue()));
-  SourceLocation PragmaLoc = ConsumeAnnotationToken();
-  Actions.ActOnPragmaOptionsAlign(Kind, PragmaLoc);
+  Actions.ActOnPragmaOptionsAlign(Kind, Tok.getLocation());
+  // Consume the token after processing the pragma to enable pragma-specific
+  // #include warnings.
+  ConsumeAnnotationToken();
 }
 
 void Parser::HandlePragmaDump() {
@@ -2057,9 +2117,21 @@
   Tok.setKind(tok::annot_pragma_openmp);
   Tok.setLocation(FirstTok.getLocation());
 
-  while (Tok.isNot(tok::eod)) {
+  while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof)) {
     Pragma.push_back(Tok);
     PP.Lex(Tok);
+    if (Tok.is(tok::annot_pragma_openmp)) {
+      PP.Diag(Tok, diag::err_omp_unexpected_directive) << 0;
+      unsigned InnerPragmaCnt = 1;
+      while (InnerPragmaCnt != 0) {
+        PP.Lex(Tok);
+        if (Tok.is(tok::annot_pragma_openmp))
+          ++InnerPragmaCnt;
+        else if (Tok.is(tok::annot_pragma_openmp_end))
+          --InnerPragmaCnt;
+      }
+      PP.Lex(Tok);
+    }
   }
   SourceLocation EodLoc = Tok.getLocation();
   Tok.startToken();
@@ -2393,6 +2465,12 @@
     return;
   }
 
+  if (PP.getTargetInfo().getTriple().isOSBinFormatELF() && Kind != PCK_Lib) {
+    PP.Diag(Tok.getLocation(), diag::warn_pragma_comment_ignored)
+        << II->getName();
+    return;
+  }
+
   // On PS4, issue a warning about any pragma comments other than
   // #pragma comment lib.
   if (PP.getTargetInfo().getTriple().isPS4() && Kind != PCK_Lib) {
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index 65c3f21..b63c69d 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -211,7 +211,7 @@
          Allowed == ACK_Any) &&
         isDeclarationStatement()) {
       SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
-      DeclGroupPtrTy Decl = ParseDeclaration(Declarator::BlockContext,
+      DeclGroupPtrTy Decl = ParseDeclaration(DeclaratorContext::BlockContext,
                                              DeclEnd, Attrs);
       return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd);
     }
@@ -954,7 +954,7 @@
   if (T.consumeOpen())
     return StmtError();
 
-  Sema::CompoundScopeRAII CompoundScope(Actions);
+  Sema::CompoundScopeRAII CompoundScope(Actions, isStmtExpr);
 
   // Parse any pragmas at the beginning of the compound statement.
   ParseCompoundStatementLeadingPragmas();
@@ -1021,8 +1021,8 @@
         ExtensionRAIIObject O(Diags);
 
         SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
-        DeclGroupPtrTy Res = ParseDeclaration(Declarator::BlockContext, DeclEnd,
-                                              attrs);
+        DeclGroupPtrTy Res =
+            ParseDeclaration(DeclaratorContext::BlockContext, DeclEnd, attrs);
         R = Actions.ActOnDeclStmt(Res, DeclStart, DeclEnd);
       } else {
         // Otherwise this was a unary __extension__ marker.
@@ -1128,7 +1128,7 @@
 
   bool IsConstexpr = false;
   if (Tok.is(tok::kw_constexpr)) {
-    Diag(Tok, getLangOpts().CPlusPlus1z ? diag::warn_cxx14_compat_constexpr_if
+    Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
                                         : diag::ext_constexpr_if);
     IsConstexpr = true;
     ConsumeToken();
@@ -1479,6 +1479,9 @@
   DiagnoseAndSkipCXX11Attributes();
 
   ExprResult Cond = ParseExpression();
+  // Correct the typos in condition before closing the scope.
+  if (Cond.isUsable())
+    Cond = Actions.CorrectDelayedTyposInExpr(Cond);
   T.consumeClose();
   DoScope.Exit();
 
@@ -1610,7 +1613,7 @@
       ForRangeInit.RangeExpr = ParseExpression();
 
     Diag(Loc, diag::err_for_range_identifier)
-      << ((getLangOpts().CPlusPlus11 && !getLangOpts().CPlusPlus1z)
+      << ((getLangOpts().CPlusPlus11 && !getLangOpts().CPlusPlus17)
               ? FixItHint::CreateInsertion(Loc, "auto &&")
               : FixItHint());
 
@@ -1628,7 +1631,7 @@
 
     SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
     DeclGroupPtrTy DG = ParseSimpleDeclaration(
-        Declarator::ForContext, DeclEnd, attrs, false,
+        DeclaratorContext::ForContext, DeclEnd, attrs, false,
         MightBeForRangeStmt ? &ForRangeInit : nullptr);
     FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation());
     if (ForRangeInit.ParsedForRangeDecl()) {
@@ -2178,7 +2181,7 @@
     if (ParseCXXTypeSpecifierSeq(DS))
       return StmtError();
 
-    Declarator ExDecl(DS, Declarator::CXXCatchContext);
+    Declarator ExDecl(DS, DeclaratorContext::CXXCatchContext);
     ParseDeclarator(ExDecl);
     ExceptionDecl = Actions.ActOnExceptionDeclarator(getCurScope(), ExDecl);
   } else
diff --git a/lib/Parse/ParseStmtAsm.cpp b/lib/Parse/ParseStmtAsm.cpp
index f38ad27..d81029e 100644
--- a/lib/Parse/ParseStmtAsm.cpp
+++ b/lib/Parse/ParseStmtAsm.cpp
@@ -56,53 +56,11 @@
 
   void LookupInlineAsmIdentifier(StringRef &LineBuf,
                                  llvm::InlineAsmIdentifierInfo &Info,
-                                 bool IsUnevaluatedContext) override {
-    // Collect the desired tokens.
-    SmallVector<Token, 16> LineToks;
-    const Token *FirstOrigToken = nullptr;
-    findTokensForString(LineBuf, LineToks, FirstOrigToken);
-
-    unsigned NumConsumedToks;
-    ExprResult Result = TheParser.ParseMSAsmIdentifier(
-        LineToks, NumConsumedToks, IsUnevaluatedContext);
-
-    // If we consumed the entire line, tell MC that.
-    // Also do this if we consumed nothing as a way of reporting failure.
-    if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) {
-      // By not modifying LineBuf, we're implicitly consuming it all.
-
-      // Otherwise, consume up to the original tokens.
-    } else {
-      assert(FirstOrigToken && "not using original tokens?");
-
-      // Since we're using original tokens, apply that offset.
-      assert(FirstOrigToken[NumConsumedToks].getLocation() ==
-             LineToks[NumConsumedToks].getLocation());
-      unsigned FirstIndex = FirstOrigToken - AsmToks.begin();
-      unsigned LastIndex = FirstIndex + NumConsumedToks - 1;
-
-      // The total length we've consumed is the relative offset
-      // of the last token we consumed plus its length.
-      unsigned TotalOffset =
-          (AsmTokOffsets[LastIndex] + AsmToks[LastIndex].getLength() -
-           AsmTokOffsets[FirstIndex]);
-      LineBuf = LineBuf.substr(0, TotalOffset);
-    }
-
-    // Initialize Info with the lookup result.
-    if (!Result.isUsable())
-      return;
-    TheParser.getActions().FillInlineAsmIdentifierInfo(Result.get(), Info);
-  }
+                                 bool IsUnevaluatedContext) override;
 
   StringRef LookupInlineAsmLabel(StringRef Identifier, llvm::SourceMgr &LSM,
                                  llvm::SMLoc Location,
-                                 bool Create) override {
-    SourceLocation Loc = translateLocation(LSM, Location);
-    LabelDecl *Label =
-      TheParser.getActions().GetOrCreateMSAsmLabel(Identifier, Loc, Create);
-    return Label->getMSAsmLabel();
-  }
+                                 bool Create) override;
 
   bool LookupInlineAsmField(StringRef Base, StringRef Member,
                             unsigned &Offset) override {
@@ -117,67 +75,129 @@
 private:
   /// Collect the appropriate tokens for the given string.
   void findTokensForString(StringRef Str, SmallVectorImpl<Token> &TempToks,
-                           const Token *&FirstOrigToken) const {
-    // For now, assert that the string we're working with is a substring
-    // of what we gave to MC.  This lets us use the original tokens.
-    assert(!std::less<const char *>()(Str.begin(), AsmString.begin()) &&
-           !std::less<const char *>()(AsmString.end(), Str.end()));
+                           const Token *&FirstOrigToken) const;
 
-    // Try to find a token whose offset matches the first token.
-    unsigned FirstCharOffset = Str.begin() - AsmString.begin();
-    const unsigned *FirstTokOffset = std::lower_bound(
-        AsmTokOffsets.begin(), AsmTokOffsets.end(), FirstCharOffset);
+  SourceLocation translateLocation(const llvm::SourceMgr &LSM,
+                                   llvm::SMLoc SMLoc);
 
-    // For now, assert that the start of the string exactly
-    // corresponds to the start of a token.
-    assert(*FirstTokOffset == FirstCharOffset);
-
-    // Use all the original tokens for this line.  (We assume the
-    // end of the line corresponds cleanly to a token break.)
-    unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin();
-    FirstOrigToken = &AsmToks[FirstTokIndex];
-    unsigned LastCharOffset = Str.end() - AsmString.begin();
-    for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) {
-      if (AsmTokOffsets[i] >= LastCharOffset)
-        break;
-      TempToks.push_back(AsmToks[i]);
-    }
-  }
-
-  SourceLocation translateLocation(const llvm::SourceMgr &LSM, llvm::SMLoc SMLoc) {
-    // Compute an offset into the inline asm buffer.
-    // FIXME: This isn't right if .macro is involved (but hopefully, no
-    // real-world code does that).
-    const llvm::MemoryBuffer *LBuf =
-        LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(SMLoc));
-    unsigned Offset = SMLoc.getPointer() - LBuf->getBufferStart();
-
-    // Figure out which token that offset points into.
-    const unsigned *TokOffsetPtr =
-        std::lower_bound(AsmTokOffsets.begin(), AsmTokOffsets.end(), Offset);
-    unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin();
-    unsigned TokOffset = *TokOffsetPtr;
-
-    // If we come up with an answer which seems sane, use it; otherwise,
-    // just point at the __asm keyword.
-    // FIXME: Assert the answer is sane once we handle .macro correctly.
-    SourceLocation Loc = AsmLoc;
-    if (TokIndex < AsmToks.size()) {
-      const Token &Tok = AsmToks[TokIndex];
-      Loc = Tok.getLocation();
-      Loc = Loc.getLocWithOffset(Offset - TokOffset);
-    }
-    return Loc;
-  }
-
-  void handleDiagnostic(const llvm::SMDiagnostic &D) {
-    const llvm::SourceMgr &LSM = *D.getSourceMgr();
-    SourceLocation Loc = translateLocation(LSM, D.getLoc());
-    TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing) << D.getMessage();
-  }
+  void handleDiagnostic(const llvm::SMDiagnostic &D);
 };
 }
 
+void ClangAsmParserCallback::LookupInlineAsmIdentifier(
+    StringRef &LineBuf, llvm::InlineAsmIdentifierInfo &Info,
+    bool IsUnevaluatedContext) {
+  // Collect the desired tokens.
+  SmallVector<Token, 16> LineToks;
+  const Token *FirstOrigToken = nullptr;
+  findTokensForString(LineBuf, LineToks, FirstOrigToken);
+
+  unsigned NumConsumedToks;
+  ExprResult Result = TheParser.ParseMSAsmIdentifier(LineToks, NumConsumedToks,
+                                                     IsUnevaluatedContext);
+
+  // If we consumed the entire line, tell MC that.
+  // Also do this if we consumed nothing as a way of reporting failure.
+  if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) {
+    // By not modifying LineBuf, we're implicitly consuming it all.
+
+    // Otherwise, consume up to the original tokens.
+  } else {
+    assert(FirstOrigToken && "not using original tokens?");
+
+    // Since we're using original tokens, apply that offset.
+    assert(FirstOrigToken[NumConsumedToks].getLocation() ==
+           LineToks[NumConsumedToks].getLocation());
+    unsigned FirstIndex = FirstOrigToken - AsmToks.begin();
+    unsigned LastIndex = FirstIndex + NumConsumedToks - 1;
+
+    // The total length we've consumed is the relative offset
+    // of the last token we consumed plus its length.
+    unsigned TotalOffset =
+        (AsmTokOffsets[LastIndex] + AsmToks[LastIndex].getLength() -
+         AsmTokOffsets[FirstIndex]);
+    LineBuf = LineBuf.substr(0, TotalOffset);
+  }
+
+  // Initialize Info with the lookup result.
+  if (!Result.isUsable())
+    return;
+  TheParser.getActions().FillInlineAsmIdentifierInfo(Result.get(), Info);
+}
+
+StringRef ClangAsmParserCallback::LookupInlineAsmLabel(StringRef Identifier,
+                                                       llvm::SourceMgr &LSM,
+                                                       llvm::SMLoc Location,
+                                                       bool Create) {
+  SourceLocation Loc = translateLocation(LSM, Location);
+  LabelDecl *Label =
+      TheParser.getActions().GetOrCreateMSAsmLabel(Identifier, Loc, Create);
+  return Label->getMSAsmLabel();
+}
+
+void ClangAsmParserCallback::findTokensForString(
+    StringRef Str, SmallVectorImpl<Token> &TempToks,
+    const Token *&FirstOrigToken) const {
+  // For now, assert that the string we're working with is a substring
+  // of what we gave to MC.  This lets us use the original tokens.
+  assert(!std::less<const char *>()(Str.begin(), AsmString.begin()) &&
+         !std::less<const char *>()(AsmString.end(), Str.end()));
+
+  // Try to find a token whose offset matches the first token.
+  unsigned FirstCharOffset = Str.begin() - AsmString.begin();
+  const unsigned *FirstTokOffset = std::lower_bound(
+      AsmTokOffsets.begin(), AsmTokOffsets.end(), FirstCharOffset);
+
+  // For now, assert that the start of the string exactly
+  // corresponds to the start of a token.
+  assert(*FirstTokOffset == FirstCharOffset);
+
+  // Use all the original tokens for this line.  (We assume the
+  // end of the line corresponds cleanly to a token break.)
+  unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin();
+  FirstOrigToken = &AsmToks[FirstTokIndex];
+  unsigned LastCharOffset = Str.end() - AsmString.begin();
+  for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) {
+    if (AsmTokOffsets[i] >= LastCharOffset)
+      break;
+    TempToks.push_back(AsmToks[i]);
+  }
+}
+
+SourceLocation
+ClangAsmParserCallback::translateLocation(const llvm::SourceMgr &LSM,
+                                          llvm::SMLoc SMLoc) {
+  // Compute an offset into the inline asm buffer.
+  // FIXME: This isn't right if .macro is involved (but hopefully, no
+  // real-world code does that).
+  const llvm::MemoryBuffer *LBuf =
+      LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(SMLoc));
+  unsigned Offset = SMLoc.getPointer() - LBuf->getBufferStart();
+
+  // Figure out which token that offset points into.
+  const unsigned *TokOffsetPtr =
+      std::lower_bound(AsmTokOffsets.begin(), AsmTokOffsets.end(), Offset);
+  unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin();
+  unsigned TokOffset = *TokOffsetPtr;
+
+  // If we come up with an answer which seems sane, use it; otherwise,
+  // just point at the __asm keyword.
+  // FIXME: Assert the answer is sane once we handle .macro correctly.
+  SourceLocation Loc = AsmLoc;
+  if (TokIndex < AsmToks.size()) {
+    const Token &Tok = AsmToks[TokIndex];
+    Loc = Tok.getLocation();
+    Loc = Loc.getLocWithOffset(Offset - TokOffset);
+  }
+  return Loc;
+}
+
+void ClangAsmParserCallback::handleDiagnostic(const llvm::SMDiagnostic &D) {
+  const llvm::SourceMgr &LSM = *D.getSourceMgr();
+  SourceLocation Loc = translateLocation(LSM, D.getLoc());
+  TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing) << D.getMessage();
+}
+
 /// Parse an identifier in an MS-style inline assembly block.
 ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
                                         unsigned &NumLineToksConsumed,
@@ -558,7 +578,7 @@
   if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString))
     return StmtError();
 
-  TargetOptions TO = Actions.Context.getTargetInfo().getTargetOpts();
+  const TargetOptions &TO = Actions.Context.getTargetInfo().getTargetOpts();
   std::string FeaturesStr =
       llvm::join(TO.Features.begin(), TO.Features.end(), ",");
 
diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp
index fcb1142..88a5745 100644
--- a/lib/Parse/ParseTemplate.cpp
+++ b/lib/Parse/ParseTemplate.cpp
@@ -24,7 +24,7 @@
 /// \brief Parse a template declaration, explicit instantiation, or
 /// explicit specialization.
 Decl *
-Parser::ParseDeclarationStartingWithTemplate(unsigned Context,
+Parser::ParseDeclarationStartingWithTemplate(DeclaratorContext Context,
                                              SourceLocation &DeclEnd,
                                              AccessSpecifier AS,
                                              AttributeList *AccessAttrs) {
@@ -57,7 +57,7 @@
 ///       explicit-specialization: [ C++ temp.expl.spec]
 ///         'template' '<' '>' declaration
 Decl *
-Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
+Parser::ParseTemplateDeclarationOrSpecialization(DeclaratorContext Context,
                                                  SourceLocation &DeclEnd,
                                                  AccessSpecifier AS,
                                                  AttributeList *AccessAttrs) {
@@ -169,7 +169,7 @@
 /// \returns the new declaration.
 Decl *
 Parser::ParseSingleDeclarationAfterTemplate(
-                                       unsigned Context,
+                                       DeclaratorContext Context,
                                        const ParsedTemplateInfo &TemplateInfo,
                                        ParsingDeclRAIIObject &DiagsFromTParams,
                                        SourceLocation &DeclEnd,
@@ -186,7 +186,7 @@
     return ParseStaticAssertDeclaration(DeclEnd);
   }
 
-  if (Context == Declarator::MemberContext) {
+  if (Context == DeclaratorContext::MemberContext) {
     // We are parsing a member template.
     ParseCXXClassMemberDeclaration(AS, AccessAttrs, TemplateInfo,
                                    &DiagsFromTParams);
@@ -234,7 +234,7 @@
     DS.takeAttributesFrom(prefixAttrs);
 
   // Parse the declarator.
-  ParsingDeclarator DeclaratorInfo(*this, DS, (Declarator::TheContext)Context);
+  ParsingDeclarator DeclaratorInfo(*this, DS, (DeclaratorContext)Context);
   ParseDeclarator(DeclaratorInfo);
   // Error parsing the declarator?
   if (!DeclaratorInfo.hasName()) {
@@ -255,7 +255,7 @@
     // Function definitions are only allowed at file scope and in C++ classes.
     // The C++ inline method definition case is handled elsewhere, so we only
     // need to handle the file scope definition case.
-    if (Context != Declarator::FileContext) {
+    if (Context != DeclaratorContext::FileContext) {
       Diag(Tok, diag::err_function_definition_not_allowed);
       SkipMalformedDecl();
       return nullptr;
@@ -271,7 +271,8 @@
     }
 
     if (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation) {
-      if (DeclaratorInfo.getName().getKind() != UnqualifiedId::IK_TemplateId) {
+      if (DeclaratorInfo.getName().getKind() !=
+          UnqualifiedIdKind::IK_TemplateId) {
         // If the declarator-id is not a template-id, issue a diagnostic and
         // recover by ignoring the 'template' keyword.
         Diag(Tok, diag::err_template_defn_explicit_instantiation) << 0;
@@ -369,13 +370,13 @@
 ///         template-parameter
 ///         template-parameter-list ',' template-parameter
 bool
-Parser::ParseTemplateParameterList(unsigned Depth,
+Parser::ParseTemplateParameterList(const unsigned Depth,
                              SmallVectorImpl<NamedDecl*> &TemplateParams) {
   while (1) {
-    // FIXME: ParseTemplateParameter should probably just return a NamedDecl.
-    if (Decl *TmpParam
+    
+    if (NamedDecl *TmpParam
           = ParseTemplateParameter(Depth, TemplateParams.size())) {
-      TemplateParams.push_back(dyn_cast<NamedDecl>(TmpParam));
+      TemplateParams.push_back(TmpParam);
     } else {
       // If we failed to parse a template parameter, skip until we find
       // a comma or closing brace.
@@ -480,13 +481,27 @@
 ///               'class' ...[opt] identifier[opt]
 ///         'template' '<' template-parameter-list '>' 'class' identifier[opt]
 ///               = id-expression
-Decl *Parser::ParseTemplateParameter(unsigned Depth, unsigned Position) {
+NamedDecl *Parser::ParseTemplateParameter(unsigned Depth, unsigned Position) {
   if (isStartOfTemplateTypeParameter())
     return ParseTypeParameter(Depth, Position);
 
   if (Tok.is(tok::kw_template))
     return ParseTemplateTemplateParameter(Depth, Position);
 
+  // Is there just a typo in the input code? ('typedef' instead of 'typename')
+  if (Tok.is(tok::kw_typedef)) {
+    Diag(Tok.getLocation(), diag::err_expected_template_parameter);
+
+    Diag(Tok.getLocation(), diag::note_meant_to_use_typename)
+        << FixItHint::CreateReplacement(CharSourceRange::getCharRange(
+                                            Tok.getLocation(), Tok.getEndLoc()),
+                                        "typename");
+
+    Tok.setKind(tok::kw_typename);
+
+    return ParseTypeParameter(Depth, Position);
+  }
+
   // If it's none of the above, then it must be a parameter declaration.
   // NOTE: This will pick up errors in the closure of the template parameter
   // list (e.g., template < ; Check here to implement >> style closures.
@@ -502,7 +517,7 @@
 ///         'class' identifier[opt] '=' type-id
 ///         'typename' ...[opt][C++0x] identifier[opt]
 ///         'typename' identifier[opt] '=' type-id
-Decl *Parser::ParseTypeParameter(unsigned Depth, unsigned Position) {
+NamedDecl *Parser::ParseTypeParameter(unsigned Depth, unsigned Position) {
   assert(Tok.isOneOf(tok::kw_class, tok::kw_typename) &&
          "A type-parameter starts with 'class' or 'typename'");
 
@@ -546,7 +561,7 @@
   ParsedType DefaultArg;
   if (TryConsumeToken(tok::equal, EqualLoc))
     DefaultArg = ParseTypeName(/*Range=*/nullptr,
-                               Declarator::TemplateTypeArgContext).get();
+                               DeclaratorContext::TemplateTypeArgContext).get();
 
   return Actions.ActOnTypeParameter(getCurScope(), TypenameKeyword, EllipsisLoc,
                                     KeyLoc, ParamName, NameLoc, Depth, Position,
@@ -564,7 +579,7 @@
 ///       type-parameter-key:
 ///         'class'
 ///         'typename'       [C++1z]
-Decl *
+NamedDecl *
 Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
   assert(Tok.is(tok::kw_template) && "Expected 'template' keyword");
 
@@ -590,10 +605,10 @@
     const Token &Next = Tok.is(tok::kw_struct) ? NextToken() : Tok;
     if (Tok.is(tok::kw_typename)) {
       Diag(Tok.getLocation(),
-           getLangOpts().CPlusPlus1z
+           getLangOpts().CPlusPlus17
                ? diag::warn_cxx14_compat_template_template_param_typename
                : diag::ext_template_template_param_typename)
-        << (!getLangOpts().CPlusPlus1z
+        << (!getLangOpts().CPlusPlus17
                 ? FixItHint::CreateReplacement(Tok.getLocation(), "class")
                 : FixItHint());
     } else if (Next.isOneOf(tok::identifier, tok::comma, tok::greater,
@@ -669,17 +684,17 @@
 ///       template-parameter:
 ///         ...
 ///         parameter-declaration
-Decl *
+NamedDecl *
 Parser::ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position) {
   // Parse the declaration-specifiers (i.e., the type).
   // FIXME: The type should probably be restricted in some way... Not all
   // declarators (parts of declarators?) are accepted for parameters.
   DeclSpec DS(AttrFactory);
   ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS_none,
-                             DSC_template_param);
+                             DeclSpecContext::DSC_template_param);
 
   // Parse this as a typename.
-  Declarator ParamDecl(DS, Declarator::TemplateParamContext);
+  Declarator ParamDecl(DS, DeclaratorContext::TemplateParamContext);
   ParseDeclarator(ParamDecl);
   if (DS.getTypeSpecType() == DeclSpec::TST_unspecified) {
     Diag(Tok.getLocation(), diag::err_expected_template_parameter);
@@ -1015,17 +1030,17 @@
     Tok.setKind(tok::annot_template_id);
     
     IdentifierInfo *TemplateII =
-        TemplateName.getKind() == UnqualifiedId::IK_Identifier
+        TemplateName.getKind() == UnqualifiedIdKind::IK_Identifier
             ? TemplateName.Identifier
             : nullptr;
 
     OverloadedOperatorKind OpKind =
-        TemplateName.getKind() == UnqualifiedId::IK_Identifier
+        TemplateName.getKind() == UnqualifiedIdKind::IK_Identifier
             ? OO_None
             : TemplateName.OperatorFunctionId.Operator;
 
-    TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create(
-      SS, TemplateKWLoc, TemplateNameLoc, TemplateII, OpKind, Template, TNK,
+    TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create(
+      SS, TemplateKWLoc, TemplateNameLoc, TemplateII, OpKind, Template, TNK,
       LAngleLoc, RAngleLoc, TemplateArgs, TemplateIds);
     
     Tok.setAnnotationValue(TemplateId);
@@ -1192,15 +1207,9 @@
   EnterExpressionEvaluationContext EnterConstantEvaluated(
       Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated);
   if (isCXXTypeId(TypeIdAsTemplateArgument)) {
-    SourceLocation Loc = Tok.getLocation();
-    TypeResult TypeArg = ParseTypeName(/*Range=*/nullptr,
-                                       Declarator::TemplateTypeArgContext);
-    if (TypeArg.isInvalid())
-      return ParsedTemplateArgument();
-    
-    return ParsedTemplateArgument(ParsedTemplateArgument::Type,
-                                  TypeArg.get().getAsOpaquePtr(), 
-                                  Loc);
+    TypeResult TypeArg = ParseTypeName(
+        /*Range=*/nullptr, DeclaratorContext::TemplateArgContext);
+    return Actions.ActOnTemplateTypeArgument(TypeArg);
   }
   
   // Try to parse a template template argument.
@@ -1297,7 +1306,7 @@
 ///         'extern' [opt] 'template' declaration
 ///
 /// Note that the 'extern' is a GNU extension and C++11 feature.
-Decl *Parser::ParseExplicitInstantiation(unsigned Context,
+Decl *Parser::ParseExplicitInstantiation(DeclaratorContext Context,
                                          SourceLocation ExternLoc,
                                          SourceLocation TemplateLoc,
                                          SourceLocation &DeclEnd,
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index 48393d9..88c0e17 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -873,7 +873,8 @@
 ///           template-id                                                 [TODO]
 ///
 Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
-                                            bool mayHaveIdentifier) {
+                                            bool mayHaveIdentifier,
+                                            bool mayHaveDirectInit) {
   // declarator:
   //   direct-declarator
   //   ptr-operator declarator
@@ -930,6 +931,9 @@
     return TPResult::False;
   }
 
+  if (mayHaveDirectInit)
+    return TPResult::Ambiguous;
+
   while (1) {
     TPResult TPR(TPResult::Ambiguous);
 
@@ -1242,6 +1246,17 @@
       case ANK_TentativeDecl:
         return TPResult::False;
       case ANK_TemplateName:
+        // In C++17, this could be a type template for class template argument
+        // deduction. Try to form a type annotation for it. If we're in a
+        // template template argument, we'll undo this when checking the
+        // validity of the argument.
+        if (getLangOpts().CPlusPlus17) {
+          if (TryAnnotateTypeOrScopeToken())
+            return TPResult::Error;
+          if (Tok.isNot(tok::identifier))
+            break;
+        }
+
         // A bare type template-name which can't be a template template
         // argument is an error, and was probably intended to be a type.
         return GreaterThanIsOperator ? TPResult::True : TPResult::False;
@@ -1299,11 +1314,9 @@
     //   'friend'
     //   'typedef'
     //   'constexpr'
-    //   'concept'
   case tok::kw_friend:
   case tok::kw_typedef:
   case tok::kw_constexpr:
-  case tok::kw_concept:
     // storage-class-specifier
   case tok::kw_register:
   case tok::kw_static:
@@ -1422,8 +1435,6 @@
             *HasMissingTypename = true;
             return TPResult::Ambiguous;
           }
-
-          // FIXME: Fails to either revert or commit the tentative parse!
         } else {
           // Try to resolve the name. If it doesn't exist, assume it was
           // intended to name a type and keep disambiguating.
@@ -1433,19 +1444,33 @@
           case ANK_TentativeDecl:
             return TPResult::False;
           case ANK_TemplateName:
+            // In C++17, this could be a type template for class template
+            // argument deduction.
+            if (getLangOpts().CPlusPlus17) {
+              if (TryAnnotateTypeOrScopeToken())
+                return TPResult::Error;
+              if (Tok.isNot(tok::identifier))
+                break;
+            }
+
             // A bare type template-name which can't be a template template
             // argument is an error, and was probably intended to be a type.
-            return GreaterThanIsOperator ? TPResult::True : TPResult::False;
+            // In C++17, this could be class template argument deduction.
+            return (getLangOpts().CPlusPlus17 || GreaterThanIsOperator)
+                       ? TPResult::True
+                       : TPResult::False;
           case ANK_Unresolved:
             return HasMissingTypename ? TPResult::Ambiguous
                                       : TPResult::False;
           case ANK_Success:
-            // Annotated it, check again.
-            assert(Tok.isNot(tok::annot_cxxscope) ||
-                   NextToken().isNot(tok::identifier));
-            return isCXXDeclarationSpecifier(BracedCastResult,
-                                             HasMissingTypename);
+            break;
           }
+
+          // Annotated it, check again.
+          assert(Tok.isNot(tok::annot_cxxscope) ||
+                 NextToken().isNot(tok::identifier));
+          return isCXXDeclarationSpecifier(BracedCastResult,
+                                           HasMissingTypename);
         }
       }
       return TPResult::False;
diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp
index e24735d..f617d24 100644
--- a/lib/Parse/Parser.cpp
+++ b/lib/Parse/Parser.cpp
@@ -556,10 +556,6 @@
     HandlePragmaUnused();
     return false;
 
-  case tok::kw_import:
-    Result = ParseModuleImport(SourceLocation());
-    return false;
-
   case tok::kw_export:
     if (NextToken().isNot(tok::kw_module))
       break;
@@ -637,6 +633,9 @@
 ///           ';'
 ///
 /// [C++0x/GNU] 'extern' 'template' declaration
+///
+/// [Modules-TS] module-import-declaration
+///
 Parser::DeclGroupPtrTy
 Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
                                  ParsingDeclSpec *DS) {
@@ -742,7 +741,7 @@
     break;
   }
   case tok::at:
-    return ParseObjCAtDirectives();
+    return ParseObjCAtDirectives(attrs);
   case tok::minus:
   case tok::plus:
     if (!getLangOpts().ObjC1) {
@@ -753,11 +752,20 @@
     SingleDecl = ParseObjCMethodDefinition();
     break;
   case tok::code_completion:
-      Actions.CodeCompleteOrdinaryName(getCurScope(), 
-                             CurParsedObjCImpl? Sema::PCC_ObjCImplementation
-                                              : Sema::PCC_Namespace);
+    if (CurParsedObjCImpl) {
+      // Code-complete Objective-C methods even without leading '-'/'+' prefix.
+      Actions.CodeCompleteObjCMethodDecl(getCurScope(),
+                                         /*IsInstanceMethod=*/None,
+                                         /*ReturnType=*/nullptr);
+    }
+    Actions.CodeCompleteOrdinaryName(
+        getCurScope(),
+        CurParsedObjCImpl ? Sema::PCC_ObjCImplementation : Sema::PCC_Namespace);
     cutOffParsing();
     return nullptr;
+  case tok::kw_import:
+    SingleDecl = ParseModuleImport(SourceLocation());
+    break;
   case tok::kw_export:
     if (getLangOpts().ModulesTS) {
       SingleDecl = ParseExportDeclaration();
@@ -775,7 +783,7 @@
     // A function definition cannot start with any of these keywords.
     {
       SourceLocation DeclEnd;
-      return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
+      return ParseDeclaration(DeclaratorContext::FileContext, DeclEnd, attrs);
     }
 
   case tok::kw_static:
@@ -785,7 +793,7 @@
       Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
         << 0;
       SourceLocation DeclEnd;
-      return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
+      return ParseDeclaration(DeclaratorContext::FileContext, DeclEnd, attrs);
     }
     goto dont_know;
       
@@ -796,7 +804,7 @@
       // Inline namespaces. Allowed as an extension even in C++03.
       if (NextKind == tok::kw_namespace) {
         SourceLocation DeclEnd;
-        return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
+        return ParseDeclaration(DeclaratorContext::FileContext, DeclEnd, attrs);
       }
       
       // Parse (then ignore) 'inline' prior to a template instantiation. This is
@@ -805,7 +813,7 @@
         Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
           << 1;
         SourceLocation DeclEnd;
-        return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
+        return ParseDeclaration(DeclaratorContext::FileContext, DeclEnd, attrs);
       }
     }
     goto dont_know;
@@ -820,7 +828,7 @@
              diag::ext_extern_template) << SourceRange(ExternLoc, TemplateLoc);
       SourceLocation DeclEnd;
       return Actions.ConvertDeclToDeclGroup(
-                  ParseExplicitInstantiation(Declarator::FileContext,
+                  ParseExplicitInstantiation(DeclaratorContext::FileContext,
                                              ExternLoc, TemplateLoc, DeclEnd));
     }
     goto dont_know;
@@ -911,18 +919,43 @@
                                        AccessSpecifier AS) {
   MaybeParseMicrosoftAttributes(DS.getAttributes());
   // Parse the common declaration-specifiers piece.
-  ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS, DSC_top_level);
+  ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS,
+                             DeclSpecContext::DSC_top_level);
 
   // If we had a free-standing type definition with a missing semicolon, we
   // may get this far before the problem becomes obvious.
-  if (DS.hasTagDefinition() &&
-      DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_top_level))
+  if (DS.hasTagDefinition() && DiagnoseMissingSemiAfterTagDefinition(
+                                   DS, AS, DeclSpecContext::DSC_top_level))
     return nullptr;
 
   // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
   // declaration-specifiers init-declarator-list[opt] ';'
   if (Tok.is(tok::semi)) {
-    ProhibitAttributes(attrs);
+    auto LengthOfTSTToken = [](DeclSpec::TST TKind) {
+      assert(DeclSpec::isDeclRep(TKind));
+      switch(TKind) {
+      case DeclSpec::TST_class:
+        return 5;
+      case DeclSpec::TST_struct:
+        return 6;
+      case DeclSpec::TST_union:
+        return 5;
+      case DeclSpec::TST_enum:
+        return 4;
+      case DeclSpec::TST_interface:
+        return 9;
+      default:
+        llvm_unreachable("we only expect to get the length of the class/struct/union/enum");
+      }
+      
+    };
+    // Suggest correct location to fix '[[attrib]] struct' to 'struct [[attrib]]'
+    SourceLocation CorrectLocationForAttributes =
+        DeclSpec::isDeclRep(DS.getTypeSpecType())
+            ? DS.getTypeSpecTypeLoc().getLocWithOffset(
+                  LengthOfTSTToken(DS.getTypeSpecType()))
+            : SourceLocation();
+    ProhibitAttributes(attrs, CorrectLocationForAttributes);
     ConsumeToken();
     RecordDecl *AnonRecord = nullptr;
     Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
@@ -972,11 +1005,11 @@
   if (getLangOpts().CPlusPlus && isTokenStringLiteral() &&
       DS.getStorageClassSpec() == DeclSpec::SCS_extern &&
       DS.getParsedSpecifiers() == DeclSpec::PQ_StorageClassSpecifier) {
-    Decl *TheDecl = ParseLinkage(DS, Declarator::FileContext);
+    Decl *TheDecl = ParseLinkage(DS, DeclaratorContext::FileContext);
     return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
 
-  return ParseDeclGroup(DS, Declarator::FileContext);
+  return ParseDeclGroup(DS, DeclaratorContext::FileContext);
 }
 
 Parser::DeclGroupPtrTy
@@ -1281,7 +1314,7 @@
     }
 
     // Parse the first declarator attached to this declspec.
-    Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext);
+    Declarator ParmDeclarator(DS, DeclaratorContext::KNRTypeListContext);
     ParseDeclarator(ParmDeclarator);
 
     // Handle the full declarator list.
@@ -1742,8 +1775,8 @@
             *Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope(), &SS,
             false, NextToken().is(tok::period), nullptr,
             /*IsCtorOrDtorName=*/false,
-            /*NonTrivialTypeSourceInfo*/ true,
-            /*IsClassTemplateDeductionContext*/GreaterThanIsOperator)) {
+            /*NonTrivialTypeSourceInfo*/true,
+            /*IsClassTemplateDeductionContext*/true)) {
       SourceLocation BeginLoc = Tok.getLocation();
       if (SS.isNotEmpty()) // it was a C++ qualified type name.
         BeginLoc = SS.getBeginLoc();
@@ -2086,7 +2119,7 @@
 ///           '@' 'import' module-name ';'
 /// [ModTS] module-import-declaration:
 ///           'import' module-name attribute-specifier-seq[opt] ';'
-Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
+Decl *Parser::ParseModuleImport(SourceLocation AtLoc) {
   assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import)
                             : Tok.isObjCAtKeyword(tok::objc_import)) &&
          "Improper start to module import");
@@ -2113,7 +2146,7 @@
   if (Import.isInvalid())
     return nullptr;
 
-  return Actions.ConvertDeclToDeclGroup(Import.get());
+  return Import.get();
 }
 
 /// Parse a C++ Modules TS / Objective-C module name (both forms use the same
diff --git a/lib/Rewrite/HTMLRewrite.cpp b/lib/Rewrite/HTMLRewrite.cpp
index 23d1895..3a08653 100644
--- a/lib/Rewrite/HTMLRewrite.cpp
+++ b/lib/Rewrite/HTMLRewrite.cpp
@@ -210,9 +210,9 @@
   SmallString<256> Str;
   llvm::raw_svector_ostream OS(Str);
 
-  OS << "<tr><td class=\"num\" id=\"LN"
-     << LineNo << "\">"
-     << LineNo << "</td><td class=\"line\">";
+  OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">"
+     << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo
+     << "</td><td class=\"line\">";
 
   if (B == E) { // Handle empty lines.
     OS << " </td></tr>";
@@ -263,7 +263,10 @@
   }
 
   // Add one big table tag that surrounds all of the code.
-  RB.InsertTextBefore(0, "<table class=\"code\">\n");
+  std::string s;
+  llvm::raw_string_ostream os(s);
+  os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n";
+  RB.InsertTextBefore(0, os.str());
   RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
 }
 
@@ -285,77 +288,128 @@
   if (!title.empty())
     os << "<title>" << html::EscapeText(title) << "</title>\n";
 
-  os << "<style type=\"text/css\">\n"
-      " body { color:#000000; background-color:#ffffff }\n"
-      " body { font-family:Helvetica, sans-serif; font-size:10pt }\n"
-      " h1 { font-size:14pt }\n"
-      " .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }\n"
-      " .FileNav { margin-left: 5px; margin-right: 5px; display: inline; }\n"
-      " .FileNav a { text-decoration:none; font-size: larger; }\n"
-      " .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }\n"
-      " .divider { background-color: gray; }\n"
-      " .code { border-collapse:collapse; width:100%; }\n"
-      " .code { font-family: \"Monospace\", monospace; font-size:10pt }\n"
-      " .code { line-height: 1.2em }\n"
-      " .comment { color: green; font-style: oblique }\n"
-      " .keyword { color: blue }\n"
-      " .string_literal { color: red }\n"
-      " .directive { color: darkmagenta }\n"
-      // Macro expansions.
-      " .expansion { display: none; }\n"
-      " .macro:hover .expansion { display: block; border: 2px solid #FF0000; "
-          "padding: 2px; background-color:#FFF0F0; font-weight: normal; "
-          "  -webkit-border-radius:5px;  -webkit-box-shadow:1px 1px 7px #000; "
-          "  border-radius:5px;  box-shadow:1px 1px 7px #000; "
-          "position: absolute; top: -1em; left:10em; z-index: 1 } \n"
-      " .macro { color: darkmagenta; background-color:LemonChiffon;"
-             // Macros are position: relative to provide base for expansions.
-             " position: relative }\n"
-      " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n"
-      " .num { text-align:right; font-size:8pt }\n"
-      " .num { color:#444444 }\n"
-      " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n"
-      " .line { white-space: pre }\n"
-      " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n"
-      " .msg { box-shadow:1px 1px 7px #000 }\n"
-      " .msg { -webkit-border-radius:5px }\n"
-      " .msg { border-radius:5px }\n"
-      " .msg { font-family:Helvetica, sans-serif; font-size:8pt }\n"
-      " .msg { float:left }\n"
-      " .msg { padding:0.25em 1ex 0.25em 1ex }\n"
-      " .msg { margin-top:10px; margin-bottom:10px }\n"
-      " .msg { font-weight:bold }\n"
-      " .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }\n"
-      " .msgT { padding:0x; spacing:0x }\n"
-      " .msgEvent { background-color:#fff8b4; color:#000000 }\n"
-      " .msgControl { background-color:#bbbbbb; color:#000000 }\n"
-      " .msgNote { background-color:#ddeeff; color:#000000 }\n"
-      " .mrange { background-color:#dfddf3 }\n"
-      " .mrange { border-bottom:1px solid #6F9DBE }\n"
-      " .PathIndex { font-weight: bold; padding:0px 5px; "
-        "margin-right:5px; }\n"
-      " .PathIndex { -webkit-border-radius:8px }\n"
-      " .PathIndex { border-radius:8px }\n"
-      " .PathIndexEvent { background-color:#bfba87 }\n"
-      " .PathIndexControl { background-color:#8c8c8c }\n"
-      " .PathNav a { text-decoration:none; font-size: larger }\n"
-      " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n"
-      " .CodeRemovalHint { background-color:#de1010 }\n"
-      " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n"
-      " table.simpletable {\n"
-      "   padding: 5px;\n"
-      "   font-size:12pt;\n"
-      "   margin:20px;\n"
-      "   border-collapse: collapse; border-spacing: 0px;\n"
-      " }\n"
-      " td.rowname {\n"
-      "   text-align: right;\n"
-      "   vertical-align: top;\n"
-      "   font-weight: bold;\n"
-      "   color:#444444;\n"
-      "   padding-right:2ex;\n"
-      " }\n"
-      "</style>\n</head>\n<body>";
+  os << R"<<<(
+<style type="text/css">
+body { color:#000000; background-color:#ffffff }
+body { font-family:Helvetica, sans-serif; font-size:10pt }
+h1 { font-size:14pt }
+.FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }
+.FileNav { margin-left: 5px; margin-right: 5px; display: inline; }
+.FileNav a { text-decoration:none; font-size: larger; }
+.divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }
+.divider { background-color: gray; }
+.code { border-collapse:collapse; width:100%; }
+.code { font-family: "Monospace", monospace; font-size:10pt }
+.code { line-height: 1.2em }
+.comment { color: green; font-style: oblique }
+.keyword { color: blue }
+.string_literal { color: red }
+.directive { color: darkmagenta }
+/* Macro expansions. */
+.expansion { display: none; }
+.macro:hover .expansion {
+  display: block;
+  border: 2px solid #FF0000;
+  padding: 2px;
+  background-color:#FFF0F0;
+  font-weight: normal;
+  -webkit-border-radius:5px;
+  -webkit-box-shadow:1px 1px 7px #000;
+  border-radius:5px;
+  box-shadow:1px 1px 7px #000;
+  position: absolute;
+  top: -1em;
+  left:10em;
+  z-index: 1
+}
+
+#tooltiphint {
+  position: fixed;
+  width: 50em;
+  margin-left: -25em;
+  left: 50%;
+  padding: 10px;
+  border: 1px solid #b0b0b0;
+  border-radius: 2px;
+  box-shadow: 1px 1px 7px black;
+  background-color: #c0c0c0;
+  z-index: 2;
+}
+.macro {
+  color: darkmagenta;
+  background-color:LemonChiffon;
+  /* Macros are position: relative to provide base for expansions. */
+  position: relative;
+}
+
+.num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
+.num { text-align:right; font-size:8pt }
+.num { color:#444444 }
+.line { padding-left: 1ex; border-left: 3px solid #ccc }
+.line { white-space: pre }
+.msg { -webkit-box-shadow:1px 1px 7px #000 }
+.msg { box-shadow:1px 1px 7px #000 }
+.msg { -webkit-border-radius:5px }
+.msg { border-radius:5px }
+.msg { font-family:Helvetica, sans-serif; font-size:8pt }
+.msg { float:left }
+.msg { padding:0.25em 1ex 0.25em 1ex }
+.msg { margin-top:10px; margin-bottom:10px }
+.msg { font-weight:bold }
+.msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }
+.msgT { padding:0x; spacing:0x }
+.msgEvent { background-color:#fff8b4; color:#000000 }
+.msgControl { background-color:#bbbbbb; color:#000000 }
+.msgNote { background-color:#ddeeff; color:#000000 }
+.mrange { background-color:#dfddf3 }
+.mrange { border-bottom:1px solid #6F9DBE }
+.PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; }
+.PathIndex { -webkit-border-radius:8px }
+.PathIndex { border-radius:8px }
+.PathIndexEvent { background-color:#bfba87 }
+.PathIndexControl { background-color:#8c8c8c }
+.PathNav a { text-decoration:none; font-size: larger }
+.CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
+.CodeRemovalHint { background-color:#de1010 }
+.CodeRemovalHint { border-bottom:1px solid #6F9DBE }
+.selected{ background-color:orange !important; }
+
+table.simpletable {
+  padding: 5px;
+  font-size:12pt;
+  margin:20px;
+  border-collapse: collapse; border-spacing: 0px;
+}
+td.rowname {
+  text-align: right;
+  vertical-align: top;
+  font-weight: bold;
+  color:#444444;
+  padding-right:2ex;
+}
+
+/* Hidden text. */
+input.spoilerhider + label {
+  cursor: pointer;
+  text-decoration: underline;
+  display: block;
+}
+input.spoilerhider {
+ display: none;
+}
+input.spoilerhider ~ .spoiler {
+  overflow: hidden;
+  margin: 10px auto 0;
+  height: 0;
+  opacity: 0;
+}
+input.spoilerhider:checked + label + .spoiler{
+  height: auto;
+  opacity: 1;
+}
+</style>
+</head>
+<body>)<<<";
 
   // Generate header
   R.InsertTextBefore(StartLoc, os.str());
diff --git a/lib/Sema/AnalysisBasedWarnings.cpp b/lib/Sema/AnalysisBasedWarnings.cpp
index f004a99..9c9e1c3 100644
--- a/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/lib/Sema/AnalysisBasedWarnings.cpp
@@ -281,114 +281,62 @@
 //===----------------------------------------------------------------------===//
 // Check for throw in a non-throwing function.
 //===----------------------------------------------------------------------===//
-enum ThrowState {
-  FoundNoPathForThrow,
-  FoundPathForThrow,
-  FoundPathWithNoThrowOutFunction,
-};
 
-static bool isThrowCaught(const CXXThrowExpr *Throw,
-                          const CXXCatchStmt *Catch) {
-  const Type *CaughtType = Catch->getCaughtType().getTypePtrOrNull();
-  if (!CaughtType)
-    return true;
-  const Type *ThrowType = nullptr;
-  if (Throw->getSubExpr())
-    ThrowType = Throw->getSubExpr()->getType().getTypePtrOrNull();
-  if (!ThrowType)
-    return false;
-  if (ThrowType->isReferenceType())
-    ThrowType = ThrowType->castAs<ReferenceType>()
-                    ->getPointeeType()
-                    ->getUnqualifiedDesugaredType();
-  if (CaughtType->isReferenceType())
-    CaughtType = CaughtType->castAs<ReferenceType>()
-                     ->getPointeeType()
-                     ->getUnqualifiedDesugaredType();
-  if (ThrowType->isPointerType() && CaughtType->isPointerType()) {
-    ThrowType = ThrowType->getPointeeType()->getUnqualifiedDesugaredType();
-    CaughtType = CaughtType->getPointeeType()->getUnqualifiedDesugaredType();
-  }
-  if (CaughtType == ThrowType)
-    return true;
-  const CXXRecordDecl *CaughtAsRecordType =
-      CaughtType->getAsCXXRecordDecl();
-  const CXXRecordDecl *ThrowTypeAsRecordType = ThrowType->getAsCXXRecordDecl();
-  if (CaughtAsRecordType && ThrowTypeAsRecordType)
-    return ThrowTypeAsRecordType->isDerivedFrom(CaughtAsRecordType);
-  return false;
-}
-
-static bool isThrowCaughtByHandlers(const CXXThrowExpr *CE,
-                                    const CXXTryStmt *TryStmt) {
-  for (unsigned H = 0, E = TryStmt->getNumHandlers(); H < E; ++H) {
-    if (isThrowCaught(CE, TryStmt->getHandler(H)))
-      return true;
-  }
-  return false;
-}
-
-static bool doesThrowEscapePath(CFGBlock Block, SourceLocation &OpLoc) {
-  for (const auto &B : Block) {
-    if (B.getKind() != CFGElement::Statement)
-      continue;
-    const auto *CE = dyn_cast<CXXThrowExpr>(B.getAs<CFGStmt>()->getStmt());
-    if (!CE)
-      continue;
-
-    OpLoc = CE->getThrowLoc();
-    for (const auto &I : Block.succs()) {
-      if (!I.isReachable())
-        continue;
-      if (const auto *Terminator =
-              dyn_cast_or_null<CXXTryStmt>(I->getTerminator()))
-        if (isThrowCaughtByHandlers(CE, Terminator))
-          return false;
-    }
-    return true;
-  }
-  return false;
-}
-
-static bool hasThrowOutNonThrowingFunc(SourceLocation &OpLoc, CFG *BodyCFG) {
-
-  unsigned ExitID = BodyCFG->getExit().getBlockID();
-
-  SmallVector<ThrowState, 16> States(BodyCFG->getNumBlockIDs(),
-                                     FoundNoPathForThrow);
-  States[BodyCFG->getEntry().getBlockID()] = FoundPathWithNoThrowOutFunction;
-
+/// Determine whether an exception thrown by E, unwinding from ThrowBlock,
+/// can reach ExitBlock.
+static bool throwEscapes(Sema &S, const CXXThrowExpr *E, CFGBlock &ThrowBlock,
+                         CFG *Body) {
   SmallVector<CFGBlock *, 16> Stack;
-  Stack.push_back(&BodyCFG->getEntry());
-  while (!Stack.empty()) {
-    CFGBlock *CurBlock = Stack.pop_back_val();
+  llvm::BitVector Queued(Body->getNumBlockIDs());
 
-    unsigned ID = CurBlock->getBlockID();
-    ThrowState CurState = States[ID];
-    if (CurState == FoundPathWithNoThrowOutFunction) {
-      if (ExitID == ID)
+  Stack.push_back(&ThrowBlock);
+  Queued[ThrowBlock.getBlockID()] = true;
+
+  while (!Stack.empty()) {
+    CFGBlock &UnwindBlock = *Stack.back();
+    Stack.pop_back();
+
+    for (auto &Succ : UnwindBlock.succs()) {
+      if (!Succ.isReachable() || Queued[Succ->getBlockID()])
         continue;
 
-      if (doesThrowEscapePath(*CurBlock, OpLoc))
-        CurState = FoundPathForThrow;
-    }
+      if (Succ->getBlockID() == Body->getExit().getBlockID())
+        return true;
 
-    // Loop over successor blocks and add them to the Stack if their state
-    // changes.
-    for (const auto &I : CurBlock->succs())
-      if (I.isReachable()) {
-        unsigned NextID = I->getBlockID();
-        if (NextID == ExitID && CurState == FoundPathForThrow) {
-          States[NextID] = CurState;
-        } else if (States[NextID] < CurState) {
-          States[NextID] = CurState;
-          Stack.push_back(I);
-        }
+      if (auto *Catch =
+              dyn_cast_or_null<CXXCatchStmt>(Succ->getLabel())) {
+        QualType Caught = Catch->getCaughtType();
+        if (Caught.isNull() || // catch (...) catches everything
+            !E->getSubExpr() || // throw; is considered cuaght by any handler
+            S.handlerCanCatch(Caught, E->getSubExpr()->getType()))
+          // Exception doesn't escape via this path.
+          break;
+      } else {
+        Stack.push_back(Succ);
+        Queued[Succ->getBlockID()] = true;
       }
+    }
   }
-  // Return true if the exit node is reachable, and only reachable through
-  // a throw expression.
-  return States[ExitID] == FoundPathForThrow;
+
+  return false;
+}
+
+static void visitReachableThrows(
+    CFG *BodyCFG,
+    llvm::function_ref<void(const CXXThrowExpr *, CFGBlock &)> Visit) {
+  llvm::BitVector Reachable(BodyCFG->getNumBlockIDs());
+  clang::reachable_code::ScanReachableFromBlock(&BodyCFG->getEntry(), Reachable);
+  for (CFGBlock *B : *BodyCFG) {
+    if (!Reachable[B->getBlockID()])
+      continue;
+    for (CFGElement &E : *B) {
+      Optional<CFGStmt> S = E.getAs<CFGStmt>();
+      if (!S)
+        continue;
+      if (auto *Throw = dyn_cast<CXXThrowExpr>(S->getStmt()))
+        Visit(Throw, *B);
+    }
+  }
 }
 
 static void EmitDiagForCXXThrowInNonThrowingFunc(Sema &S, SourceLocation OpLoc,
@@ -418,9 +366,10 @@
     return;
   if (BodyCFG->getExit().pred_empty())
     return;
-  SourceLocation OpLoc;
-  if (hasThrowOutNonThrowingFunc(OpLoc, BodyCFG))
-    EmitDiagForCXXThrowInNonThrowingFunc(S, OpLoc, FD);
+  visitReachableThrows(BodyCFG, [&](const CXXThrowExpr *Throw, CFGBlock &Block) {
+    if (throwEscapes(S, Throw, Block, BodyCFG))
+      EmitDiagForCXXThrowInNonThrowingFunc(S, Throw->getThrowLoc(), FD);
+  });
 }
 
 static bool isNoexcept(const FunctionDecl *FD) {
@@ -1275,7 +1224,7 @@
     tok::r_square, tok::r_square
   };
 
-  bool PreferClangAttr = !PP.getLangOpts().CPlusPlus1z;
+  bool PreferClangAttr = !PP.getLangOpts().CPlusPlus17;
 
   StringRef MacroName;
   if (PreferClangAttr)
@@ -2080,10 +2029,10 @@
   //     time.
   DiagnosticsEngine &Diags = S.getDiagnostics();
 
-  // Do not do any analysis for declarations in system headers if we are
-  // going to just ignore them.
-  if (Diags.getSuppressSystemWarnings() &&
-      S.SourceMgr.isInSystemHeader(D->getLocation()))
+  // Do not do any analysis if we are going to just ignore them.
+  if (Diags.getIgnoreAllWarnings() ||
+      (Diags.getSuppressSystemWarnings() &&
+       S.SourceMgr.isInSystemHeader(D->getLocation())))
     return;
 
   // For code in dependent contexts, we'll do this at instantiation time.
diff --git a/lib/Sema/AttributeList.cpp b/lib/Sema/AttributeList.cpp
index 14d3347..bdb91c7 100644
--- a/lib/Sema/AttributeList.cpp
+++ b/lib/Sema/AttributeList.cpp
@@ -1,4 +1,4 @@
-//===--- AttributeList.cpp --------------------------------------*- C++ -*-===//
+//===- AttributeList.cpp --------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,14 +13,17 @@
 
 #include "clang/Sema/AttributeList.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclTemplate.h"
-#include "clang/AST/Expr.h"
 #include "clang/Basic/AttrSubjectMatchRules.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaInternal.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
 using namespace clang;
 
 IdentifierLoc *IdentifierLoc::create(ASTContext &Ctx, SourceLocation Loc,
@@ -44,7 +47,7 @@
   // Go ahead and configure all the inline capacity.  This is just a memset.
   FreeLists.resize(InlineFreeListsCapacity);
 }
-AttributeFactory::~AttributeFactory() {}
+AttributeFactory::~AttributeFactory() = default;
 
 static size_t getFreeListIndexForSize(size_t size) {
   assert(size >= sizeof(AttributeList));
@@ -175,8 +178,10 @@
 };
 
 namespace {
-  #include "clang/Sema/AttrParsedAttrImpl.inc"
-}
+
+#include "clang/Sema/AttrParsedAttrImpl.inc"
+
+} // namespace
 
 static const ParsedAttrInfo &getInfo(const AttributeList &A) {
   return AttrInfoMap[A.getKind()];
diff --git a/lib/Sema/CodeCompleteConsumer.cpp b/lib/Sema/CodeCompleteConsumer.cpp
index f5b0104..d012e55 100644
--- a/lib/Sema/CodeCompleteConsumer.cpp
+++ b/lib/Sema/CodeCompleteConsumer.cpp
@@ -1,4 +1,4 @@
-//===--- CodeCompleteConsumer.cpp - Code Completion Interface ---*- C++ -*-===//
+//===- CodeCompleteConsumer.cpp - Code Completion Interface ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,21 +10,30 @@
 //  This file implements the CodeCompleteConsumer class.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang-c/Index.h"
-#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
-#include "clang/Sema/Scope.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/IdentifierTable.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Lex/Preprocessor.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <cstring>
-#include <functional>
+#include <cassert>
+#include <cstdint>
+#include <string>
 
 using namespace clang;
 
@@ -33,7 +42,7 @@
 //===----------------------------------------------------------------------===//
 
 bool CodeCompletionContext::wantConstructorResults() const {
-  switch (Kind) {
+  switch (CCKind) {
   case CCC_Recovery:
   case CCC_Statement:
   case CCC_Expression:
@@ -76,12 +85,87 @@
   llvm_unreachable("Invalid CodeCompletionContext::Kind!");
 }
 
+StringRef clang::getCompletionKindString(CodeCompletionContext::Kind Kind) {
+  using CCKind = CodeCompletionContext::Kind;
+  switch (Kind) {
+  case CCKind::CCC_Other:
+    return "Other";
+  case CCKind::CCC_OtherWithMacros:
+    return "OtherWithMacros";
+  case CCKind::CCC_TopLevel:
+    return "TopLevel";
+  case CCKind::CCC_ObjCInterface:
+    return "ObjCInterface";
+  case CCKind::CCC_ObjCImplementation:
+    return "ObjCImplementation";
+  case CCKind::CCC_ObjCIvarList:
+    return "ObjCIvarList";
+  case CCKind::CCC_ClassStructUnion:
+    return "ClassStructUnion";
+  case CCKind::CCC_Statement:
+    return "Statement";
+  case CCKind::CCC_Expression:
+    return "Expression";
+  case CCKind::CCC_ObjCMessageReceiver:
+    return "ObjCMessageReceiver";
+  case CCKind::CCC_DotMemberAccess:
+    return "DotMemberAccess";
+  case CCKind::CCC_ArrowMemberAccess:
+    return "ArrowMemberAccess";
+  case CCKind::CCC_ObjCPropertyAccess:
+    return "ObjCPropertyAccess";
+  case CCKind::CCC_EnumTag:
+    return "EnumTag";
+  case CCKind::CCC_UnionTag:
+    return "UnionTag";
+  case CCKind::CCC_ClassOrStructTag:
+    return "ClassOrStructTag";
+  case CCKind::CCC_ObjCProtocolName:
+    return "ObjCProtocolName";
+  case CCKind::CCC_Namespace:
+    return "Namespace";
+  case CCKind::CCC_Type:
+    return "Type";
+  case CCKind::CCC_Name:
+    return "Name";
+  case CCKind::CCC_PotentiallyQualifiedName:
+    return "PotentiallyQualifiedName";
+  case CCKind::CCC_MacroName:
+    return "MacroName";
+  case CCKind::CCC_MacroNameUse:
+    return "MacroNameUse";
+  case CCKind::CCC_PreprocessorExpression:
+    return "PreprocessorExpression";
+  case CCKind::CCC_PreprocessorDirective:
+    return "PreprocessorDirective";
+  case CCKind::CCC_NaturalLanguage:
+    return "NaturalLanguage";
+  case CCKind::CCC_SelectorName:
+    return "SelectorName";
+  case CCKind::CCC_TypeQualifiers:
+    return "TypeQualifiers";
+  case CCKind::CCC_ParenthesizedExpression:
+    return "ParenthesizedExpression";
+  case CCKind::CCC_ObjCInstanceMessage:
+    return "ObjCInstanceMessage";
+  case CCKind::CCC_ObjCClassMessage:
+    return "ObjCClassMessage";
+  case CCKind::CCC_ObjCInterfaceName:
+    return "ObjCInterfaceName";
+  case CCKind::CCC_ObjCCategoryName:
+    return "ObjCCategoryName";
+  case CCKind::CCC_Recovery:
+    return "Recovery";
+  }
+  llvm_unreachable("Invalid CodeCompletionContext::Kind!");
+}
+
 //===----------------------------------------------------------------------===//
 // Code completion string implementation
 //===----------------------------------------------------------------------===//
+
 CodeCompletionString::Chunk::Chunk(ChunkKind Kind, const char *Text) 
-  : Kind(Kind), Text("")
-{
+    : Kind(Kind), Text("") {
   switch (Kind) {
   case CK_TypedText:
   case CK_Text:
@@ -195,10 +279,9 @@
                                            unsigned NumAnnotations,
                                            StringRef ParentName,
                                            const char *BriefComment)
-  : NumChunks(NumChunks), NumAnnotations(NumAnnotations),
-    Priority(Priority), Availability(Availability),
-    ParentName(ParentName), BriefComment(BriefComment)
-{ 
+    : NumChunks(NumChunks), NumAnnotations(NumAnnotations),
+      Priority(Priority), Availability(Availability),
+      ParentName(ParentName), BriefComment(BriefComment) { 
   assert(NumChunks <= 0xffff);
   assert(NumAnnotations <= 0xffff);
 
@@ -222,7 +305,6 @@
     return nullptr;
 }
 
-
 std::string CodeCompletionString::getAsString() const {
   std::string Result;
   llvm::raw_string_ostream OS(Result);
@@ -267,7 +349,7 @@
 StringRef CodeCompletionTUInfo::getParentName(const DeclContext *DC) {
   const NamedDecl *ND = dyn_cast<NamedDecl>(DC);
   if (!ND)
-    return StringRef();
+    return {};
   
   // Check whether we've already cached the parent name.
   StringRef &CachedParentName = ParentNames[DC];
@@ -277,7 +359,7 @@
   // If we already processed this DeclContext and assigned empty to it, the
   // data pointer will be non-null.
   if (CachedParentName.data() != nullptr)
-    return StringRef();
+    return {};
 
   // Find the interesting names.
   SmallVector<const DeclContext *, 2> Contexts;
@@ -311,7 +393,7 @@
           // Assign an empty StringRef but with non-null data to distinguish
           // between empty because we didn't process the DeclContext yet.
           CachedParentName = StringRef((const char *)(uintptr_t)~0U, 0);
-          return StringRef();
+          return {};
         }
         
         OS << Interface->getName() << '(' << Cat->getName() << ')';
@@ -375,9 +457,8 @@
 }
 
 void CodeCompletionBuilder::addParentContext(const DeclContext *DC) {
-  if (DC->isTranslationUnit()) {
+  if (DC->isTranslationUnit())
     return;
-  }
   
   if (DC->isFunctionOrMethod())
     return;
@@ -427,25 +508,21 @@
 // Code completion consumer implementation
 //===----------------------------------------------------------------------===//
 
-CodeCompleteConsumer::~CodeCompleteConsumer() { }
+CodeCompleteConsumer::~CodeCompleteConsumer() = default;
 
 bool PrintingCodeCompleteConsumer::isResultFilteredOut(StringRef Filter,
                                                 CodeCompletionResult Result) {
   switch (Result.Kind) {
-  case CodeCompletionResult::RK_Declaration: {
+  case CodeCompletionResult::RK_Declaration:
     return !(Result.Declaration->getIdentifier() &&
             Result.Declaration->getIdentifier()->getName().startswith(Filter));
-  }
-  case CodeCompletionResult::RK_Keyword: {
+  case CodeCompletionResult::RK_Keyword:
     return !StringRef(Result.Keyword).startswith(Filter);
-  }
-  case CodeCompletionResult::RK_Macro: {
+  case CodeCompletionResult::RK_Macro:
     return !Result.Macro->getName().startswith(Filter);
-  }
-  case CodeCompletionResult::RK_Pattern: {
+  case CodeCompletionResult::RK_Pattern:
     return !StringRef(Result.Pattern->getAsString()).startswith(Filter);
   }
-  }
   llvm_unreachable("Unknown code completion result Kind.");
 }
 
@@ -477,7 +554,6 @@
         if (const char *BriefComment = CCS->getBriefComment())
           OS << " : " << BriefComment;
       }
-        
       OS << '\n';
       break;
       
@@ -485,7 +561,7 @@
       OS << Results[I].Keyword << '\n';
       break;
         
-    case CodeCompletionResult::RK_Macro: {
+    case CodeCompletionResult::RK_Macro:
       OS << Results[I].Macro->getName();
       if (CodeCompletionString *CCS 
             = Results[I].CreateCodeCompletionString(SemaRef, Context,
@@ -496,14 +572,12 @@
       }
       OS << '\n';
       break;
-    }
         
-    case CodeCompletionResult::RK_Pattern: {
+    case CodeCompletionResult::RK_Pattern:
       OS << "Pattern : " 
          << Results[I].Pattern->getAsString() << '\n';
       break;
     }
-    }
   }
 }
 
@@ -562,7 +636,7 @@
       // Do nothing: Patterns can come with cursor kinds!
       break;
     }
-    // Fall through
+    LLVM_FALLTHROUGH;
       
   case RK_Declaration: {
     // Set the availability based on attributes.
@@ -613,24 +687,20 @@
 ///
 /// If the name needs to be constructed as a string, that string will be
 /// saved into Saved and the returned StringRef will refer to it.
-static StringRef getOrderedName(const CodeCompletionResult &R,
-                                    std::string &Saved) {
-  switch (R.Kind) {
-    case CodeCompletionResult::RK_Keyword:
-      return R.Keyword;
-      
-    case CodeCompletionResult::RK_Pattern:
-      return R.Pattern->getTypedText();
-      
-    case CodeCompletionResult::RK_Macro:
-      return R.Macro->getName();
-      
-    case CodeCompletionResult::RK_Declaration:
+StringRef CodeCompletionResult::getOrderedName(std::string &Saved) const {
+  switch (Kind) {
+    case RK_Keyword:
+      return Keyword;
+    case RK_Pattern:
+      return Pattern->getTypedText();
+    case RK_Macro:
+      return Macro->getName();
+    case RK_Declaration:
       // Handle declarations below.
       break;
   }
   
-  DeclarationName Name = R.Declaration->getDeclName();
+  DeclarationName Name = Declaration->getDeclName();
   
   // If the name is a simple identifier (by far the common case), or a
   // zero-argument selector, just return a reference to that identifier.
@@ -648,8 +718,8 @@
 bool clang::operator<(const CodeCompletionResult &X, 
                       const CodeCompletionResult &Y) {
   std::string XSaved, YSaved;
-  StringRef XStr = getOrderedName(X, XSaved);
-  StringRef YStr = getOrderedName(Y, YSaved);
+  StringRef XStr = X.getOrderedName(XSaved);
+  StringRef YStr = Y.getOrderedName(YSaved);
   int cmp = XStr.compare_lower(YStr);
   if (cmp)
     return cmp < 0;
diff --git a/lib/Sema/CoroutineStmtBuilder.h b/lib/Sema/CoroutineStmtBuilder.h
index 33a368d..3b7ee7a 100644
--- a/lib/Sema/CoroutineStmtBuilder.h
+++ b/lib/Sema/CoroutineStmtBuilder.h
@@ -51,9 +51,6 @@
   /// name lookup.
   bool buildDependentStatements();
 
-  /// \brief Build just parameter moves. To use for rebuilding in TreeTransform.
-  bool buildParameterMoves();
-
   bool isInvalid() const { return !this->IsValid; }
 
 private:
@@ -65,7 +62,6 @@
   bool makeReturnObject();
   bool makeGroDeclAndReturnStmt();
   bool makeReturnOnAllocFailure();
-  bool makeParamMoves();
 };
 
 } // end namespace clang
diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp
index bce4939..2fad5a1 100644
--- a/lib/Sema/DeclSpec.cpp
+++ b/lib/Sema/DeclSpec.cpp
@@ -30,7 +30,7 @@
 
 void UnqualifiedId::setTemplateId(TemplateIdAnnotation *TemplateId) {
   assert(TemplateId && "NULL template-id annotation?");
-  Kind = IK_TemplateId;
+  Kind = UnqualifiedIdKind::IK_TemplateId;
   this->TemplateId = TemplateId;
   StartLocation = TemplateId->TemplateNameLoc;
   EndLocation = TemplateId->RAngleLoc;
@@ -38,7 +38,7 @@
 
 void UnqualifiedId::setConstructorTemplateId(TemplateIdAnnotation *TemplateId) {
   assert(TemplateId && "NULL template-id annotation?");
-  Kind = IK_ConstructorTemplateId;
+  Kind = UnqualifiedIdKind::IK_ConstructorTemplateId;
   this->TemplateId = TemplateId;
   StartLocation = TemplateId->TemplateNameLoc;
   EndLocation = TemplateId->RAngleLoc;
@@ -387,16 +387,16 @@
 }
 
 bool Declarator::isStaticMember() {
-  assert(getContext() == MemberContext);
+  assert(getContext() == DeclaratorContext::MemberContext);
   return getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static ||
-         (getName().Kind == UnqualifiedId::IK_OperatorFunctionId &&
+         (getName().Kind == UnqualifiedIdKind::IK_OperatorFunctionId &&
           CXXMethodDecl::isStaticOverloadedOperator(
               getName().OperatorFunctionId.Operator));
 }
 
 bool Declarator::isCtorOrDtor() {
-  return (getName().getKind() == UnqualifiedId::IK_ConstructorName) ||
-         (getName().getKind() == UnqualifiedId::IK_DestructorName);
+  return (getName().getKind() == UnqualifiedIdKind::IK_ConstructorName) ||
+         (getName().getKind() == UnqualifiedIdKind::IK_DestructorName);
 }
 
 bool DeclSpec::hasTagDefinition() const {
@@ -969,18 +969,6 @@
   return false;
 }
 
-bool DeclSpec::SetConceptSpec(SourceLocation Loc, const char *&PrevSpec,
-                              unsigned &DiagID) {
-  if (Concept_specified) {
-    DiagID = diag::ext_duplicate_declspec;
-    PrevSpec = "concept";
-    return true;
-  }
-  Concept_specified = true;
-  ConceptLoc = Loc;
-  return false;
-}
-
 void DeclSpec::SaveWrittenBuiltinSpecs() {
   writtenBS.Sign = getTypeSpecSign();
   writtenBS.Width = getTypeSpecWidth();
@@ -1293,7 +1281,7 @@
 void UnqualifiedId::setOperatorFunctionId(SourceLocation OperatorLoc, 
                                           OverloadedOperatorKind Op,
                                           SourceLocation SymbolLocations[3]) {
-  Kind = IK_OperatorFunctionId;
+  Kind = UnqualifiedIdKind::IK_OperatorFunctionId;
   StartLocation = OperatorLoc;
   EndLocation = OperatorLoc;
   OperatorFunctionId.Operator = Op;
diff --git a/lib/Sema/DelayedDiagnostic.cpp b/lib/Sema/DelayedDiagnostic.cpp
index 3d321d5..2555206 100644
--- a/lib/Sema/DelayedDiagnostic.cpp
+++ b/lib/Sema/DelayedDiagnostic.cpp
@@ -1,4 +1,4 @@
-//===--- DelayedDiagnostic.cpp - Delayed declarator diagnostics -*- C++ -*-===//
+//===- DelayedDiagnostic.cpp - Delayed declarator diagnostics -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,8 +14,10 @@
 // This file also defines AccessedEntity.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Sema/DelayedDiagnostic.h"
-#include <string.h>
+#include <cstring>
+
 using namespace clang;
 using namespace sema;
 
@@ -37,7 +39,7 @@
   DD.AvailabilityData.UnknownObjCClass = UnknownObjCClass;
   DD.AvailabilityData.ObjCProperty = ObjCProperty;
   char *MessageData = nullptr;
-  if (Msg.size()) {
+  if (!Msg.empty()) {
     MessageData = new char [Msg.size()];
     memcpy(MessageData, Msg.data(), Msg.size());
   }
diff --git a/lib/Sema/IdentifierResolver.cpp b/lib/Sema/IdentifierResolver.cpp
index 0bdb194..f31c517 100644
--- a/lib/Sema/IdentifierResolver.cpp
+++ b/lib/Sema/IdentifierResolver.cpp
@@ -1,4 +1,4 @@
-//===- IdentifierResolver.cpp - Lexical Scope Name lookup -------*- C++ -*-===//
+//===- IdentifierResolver.cpp - Lexical Scope Name lookup -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,10 +14,16 @@
 
 #include "clang/Sema/IdentifierResolver.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Scope.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace clang;
 
@@ -35,17 +41,17 @@
   /// impossible to add something to a pre-C++0x STL container without
   /// a completely unnecessary copy.
   struct IdDeclInfoPool {
-    IdDeclInfoPool(IdDeclInfoPool *Next) : Next(Next) {}
-    
     IdDeclInfoPool *Next;
     IdDeclInfo Pool[POOL_SIZE];
+
+    IdDeclInfoPool(IdDeclInfoPool *Next) : Next(Next) {}
   };
   
-  IdDeclInfoPool *CurPool;
-  unsigned int CurIndex;
+  IdDeclInfoPool *CurPool = nullptr;
+  unsigned int CurIndex = POOL_SIZE;
 
 public:
-  IdDeclInfoMap() : CurPool(nullptr), CurIndex(POOL_SIZE) {}
+  IdDeclInfoMap() = default;
 
   ~IdDeclInfoMap() {
     IdDeclInfoPool *Cur = CurPool;
@@ -60,7 +66,6 @@
   IdDeclInfo &operator[](DeclarationName Name);
 };
 
-
 //===----------------------------------------------------------------------===//
 // IdDeclInfo Implementation
 //===----------------------------------------------------------------------===//
@@ -83,9 +88,7 @@
 //===----------------------------------------------------------------------===//
 
 IdentifierResolver::IdentifierResolver(Preprocessor &PP)
-  : LangOpt(PP.getLangOpts()), PP(PP),
-    IdDeclInfos(new IdDeclInfoMap) {
-}
+    : LangOpt(PP.getLangOpts()), PP(PP), IdDeclInfos(new IdDeclInfoMap) {}
 
 IdentifierResolver::~IdentifierResolver() {
   delete IdDeclInfos;
@@ -245,12 +248,14 @@
 }
 
 namespace {
-  enum DeclMatchKind {
-    DMK_Different,
-    DMK_Replace,
-    DMK_Ignore
-  };
-}
+
+enum DeclMatchKind {
+  DMK_Different,
+  DMK_Replace,
+  DMK_Ignore
+};
+
+} // namespace
 
 /// \brief Compare two declarations to see whether they are different or,
 /// if they are the same, whether the new declaration should replace the 
diff --git a/lib/Sema/JumpDiagnostics.cpp b/lib/Sema/JumpDiagnostics.cpp
index 865aea9..55582f8 100644
--- a/lib/Sema/JumpDiagnostics.cpp
+++ b/lib/Sema/JumpDiagnostics.cpp
@@ -154,6 +154,10 @@
         return ScopePair(diag::note_protected_by_objc_weak_init,
                          diag::note_exits_objc_weak);
 
+      case QualType::DK_nontrivial_c_struct:
+        return ScopePair(diag::note_protected_by_non_trivial_c_struct_init,
+                         diag::note_exits_dtor);
+
       case QualType::DK_cxx_destructor:
         OutDiag = diag::note_exits_dtor;
         break;
@@ -254,6 +258,10 @@
         Diags = ScopePair(diag::note_enters_block_captures_weak,
                           diag::note_exits_block_captures_weak);
         break;
+      case QualType::DK_nontrivial_c_struct:
+        Diags = ScopePair(diag::note_enters_block_captures_non_trivial_c_struct,
+                          diag::note_exits_block_captures_non_trivial_c_struct);
+        break;
       case QualType::DK_none:
         llvm_unreachable("non-lifetime captured variable");
     }
@@ -323,7 +331,7 @@
       BuildScopeInformation(Var, ParentScope);
       ++StmtsToSkip;
     }
-    // Fall through
+    LLVM_FALLTHROUGH;
 
   case Stmt::GotoStmtClass:
     // Remember both what scope a goto is in as well as the fact that we have
diff --git a/lib/Sema/MultiplexExternalSemaSource.cpp b/lib/Sema/MultiplexExternalSemaSource.cpp
index 77ace0c..46238fb 100644
--- a/lib/Sema/MultiplexExternalSemaSource.cpp
+++ b/lib/Sema/MultiplexExternalSemaSource.cpp
@@ -164,6 +164,13 @@
     Sources[i]->PrintStats();
 }
 
+Module *MultiplexExternalSemaSource::getModule(unsigned ID) {
+  for (size_t i = 0; i < Sources.size(); ++i)
+    if (auto M = Sources[i]->getModule(ID))
+      return M;
+  return nullptr;
+}
+
 bool MultiplexExternalSemaSource::layoutRecordType(const RecordDecl *Record,
                                                    uint64_t &Size, 
                                                    uint64_t &Alignment,
diff --git a/lib/Sema/Scope.cpp b/lib/Sema/Scope.cpp
index ae5b181..eae5a32 100644
--- a/lib/Sema/Scope.cpp
+++ b/lib/Sema/Scope.cpp
@@ -143,72 +143,43 @@
   if (HasFlags)
     OS << "Flags: ";
 
-  while (Flags) {
-    if (Flags & FnScope) {
-      OS << "FnScope";
-      Flags &= ~FnScope;
-    } else if (Flags & BreakScope) {
-      OS << "BreakScope";
-      Flags &= ~BreakScope;
-    } else if (Flags & ContinueScope) {
-      OS << "ContinueScope";
-      Flags &= ~ContinueScope;
-    } else if (Flags & DeclScope) {
-      OS << "DeclScope";
-      Flags &= ~DeclScope;
-    } else if (Flags & ControlScope) {
-      OS << "ControlScope";
-      Flags &= ~ControlScope;
-    } else if (Flags & ClassScope) {
-      OS << "ClassScope";
-      Flags &= ~ClassScope;
-    } else if (Flags & BlockScope) {
-      OS << "BlockScope";
-      Flags &= ~BlockScope;
-    } else if (Flags & TemplateParamScope) {
-      OS << "TemplateParamScope";
-      Flags &= ~TemplateParamScope;
-    } else if (Flags & FunctionPrototypeScope) {
-      OS << "FunctionPrototypeScope";
-      Flags &= ~FunctionPrototypeScope;
-    } else if (Flags & FunctionDeclarationScope) {
-      OS << "FunctionDeclarationScope";
-      Flags &= ~FunctionDeclarationScope;
-    } else if (Flags & AtCatchScope) {
-      OS << "AtCatchScope";
-      Flags &= ~AtCatchScope;
-    } else if (Flags & ObjCMethodScope) {
-      OS << "ObjCMethodScope";
-      Flags &= ~ObjCMethodScope;
-    } else if (Flags & SwitchScope) {
-      OS << "SwitchScope";
-      Flags &= ~SwitchScope;
-    } else if (Flags & TryScope) {
-      OS << "TryScope";
-      Flags &= ~TryScope;
-    } else if (Flags & FnTryCatchScope) {
-      OS << "FnTryCatchScope";
-      Flags &= ~FnTryCatchScope;
-    } else if (Flags & SEHTryScope) {
-      OS << "SEHTryScope";
-      Flags &= ~SEHTryScope;
-    } else if (Flags & SEHExceptScope) {
-      OS << "SEHExceptScope";
-      Flags &= ~SEHExceptScope;
-    } else if (Flags & OpenMPDirectiveScope) {
-      OS << "OpenMPDirectiveScope";
-      Flags &= ~OpenMPDirectiveScope;
-    } else if (Flags & OpenMPLoopDirectiveScope) {
-      OS << "OpenMPLoopDirectiveScope";
-      Flags &= ~OpenMPLoopDirectiveScope;
-    } else if (Flags & OpenMPSimdDirectiveScope) {
-      OS << "OpenMPSimdDirectiveScope";
-      Flags &= ~OpenMPSimdDirectiveScope;
-    }
+  std::pair<unsigned, const char *> FlagInfo[] = {
+      {FnScope, "FnScope"},
+      {BreakScope, "BreakScope"},
+      {ContinueScope, "ContinueScope"},
+      {DeclScope, "DeclScope"},
+      {ControlScope, "ControlScope"},
+      {ClassScope, "ClassScope"},
+      {BlockScope, "BlockScope"},
+      {TemplateParamScope, "TemplateParamScope"},
+      {FunctionPrototypeScope, "FunctionPrototypeScope"},
+      {FunctionDeclarationScope, "FunctionDeclarationScope"},
+      {AtCatchScope, "AtCatchScope"},
+      {ObjCMethodScope, "ObjCMethodScope"},
+      {SwitchScope, "SwitchScope"},
+      {TryScope, "TryScope"},
+      {FnTryCatchScope, "FnTryCatchScope"},
+      {OpenMPDirectiveScope, "OpenMPDirectiveScope"},
+      {OpenMPLoopDirectiveScope, "OpenMPLoopDirectiveScope"},
+      {OpenMPSimdDirectiveScope, "OpenMPSimdDirectiveScope"},
+      {EnumScope, "EnumScope"},
+      {SEHTryScope, "SEHTryScope"},
+      {SEHExceptScope, "SEHExceptScope"},
+      {SEHFilterScope, "SEHFilterScope"},
+      {CompoundStmtScope, "CompoundStmtScope"},
+      {ClassInheritanceScope, "ClassInheritanceScope"}};
 
-    if (Flags)
-      OS << " | ";
+  for (auto Info : FlagInfo) {
+    if (Flags & Info.first) {
+      OS << Info.second;
+      Flags &= ~Info.first;
+      if (Flags)
+        OS << " | ";
+    }
   }
+
+  assert(Flags == 0 && "Unknown scope flags");
+
   if (HasFlags)
     OS << '\n';
 
diff --git a/lib/Sema/ScopeInfo.cpp b/lib/Sema/ScopeInfo.cpp
index b309a36..62a83cc 100644
--- a/lib/Sema/ScopeInfo.cpp
+++ b/lib/Sema/ScopeInfo.cpp
@@ -43,6 +43,7 @@
   // Coroutine state
   FirstCoroutineStmtLoc = SourceLocation();
   CoroutinePromise = nullptr;
+  CoroutineParameterMoves.clear();
   NeedsCoroutineSuspends = true;
   CoroutineSuspends.first = nullptr;
   CoroutineSuspends.second = nullptr;
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 548f336..1e9e53b 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -37,6 +37,7 @@
 #include "clang/Sema/SemaConsumer.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/TemplateDeduction.h"
+#include "clang/Sema/TemplateInstCallback.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 using namespace clang;
@@ -436,14 +437,26 @@
 }
 
 void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) {
-  if (Kind != CK_NullToPointer && Kind != CK_NullToMemberPointer)
-    return;
-  if (E->getType()->isNullPtrType())
+  if (Diags.isIgnored(diag::warn_zero_as_null_pointer_constant,
+                      E->getLocStart()))
     return;
   // nullptr only exists from C++11 on, so don't warn on its absence earlier.
   if (!getLangOpts().CPlusPlus11)
     return;
 
+  if (Kind != CK_NullToPointer && Kind != CK_NullToMemberPointer)
+    return;
+  if (E->IgnoreParenImpCasts()->getType()->isNullPtrType())
+    return;
+
+  // If it is a macro from system header, and if the macro name is not "NULL",
+  // do not warn.
+  SourceLocation MaybeMacroLoc = E->getLocStart();
+  if (Diags.getSuppressSystemWarnings() &&
+      SourceMgr.isInSystemMacro(MaybeMacroLoc) &&
+      !findMacroSpelling(MaybeMacroLoc, "NULL"))
+    return;
+
   Diag(E->getLocStart(), diag::warn_zero_as_null_pointer_constant)
       << FixItHint::CreateReplacement(E->getSourceRange(), "nullptr");
 }
@@ -517,7 +530,7 @@
   case Type::STK_IntegralComplex: return CK_IntegralComplexToBoolean;
   case Type::STK_FloatingComplex: return CK_FloatingComplexToBoolean;
   }
-  return CK_Invalid;
+  llvm_unreachable("unknown scalar type kind");
 }
 
 /// \brief Used to prune the decls of Sema's UnusedFileScopedDecls vector.
@@ -725,7 +738,8 @@
                                   E = RD->decls_end();
        I != E && Complete; ++I) {
     if (const CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(*I))
-      Complete = M->isDefined() || (M->isPure() && !isa<CXXDestructorDecl>(M));
+      Complete = M->isDefined() || M->isDefaulted() ||
+                 (M->isPure() && !isa<CXXDestructorDecl>(M));
     else if (const FunctionTemplateDecl *F = dyn_cast<FunctionTemplateDecl>(*I))
       // If the template function is marked as late template parsed at this
       // point, it has not been instantiated and therefore we have not
@@ -1380,8 +1394,8 @@
     delete Scope;
 }
 
-void Sema::PushCompoundScope() {
-  getCurFunction()->CompoundScopes.push_back(CompoundScopeInfo());
+void Sema::PushCompoundScope(bool IsStmtExpr) {
+  getCurFunction()->CompoundScopes.push_back(CompoundScopeInfo(IsStmtExpr));
 }
 
 void Sema::PopCompoundScope() {
@@ -1449,8 +1463,7 @@
   if (!LangOpts.RetainCommentsFromSystemHeaders &&
       SourceMgr.isInSystemHeader(Comment.getBegin()))
     return;
-  RawComment RC(SourceMgr, Comment, false,
-                LangOpts.CommentOpts.ParseAllComments);
+  RawComment RC(SourceMgr, Comment, LangOpts.CommentOpts, false);
   if (RC.isAlmostTrailingComment()) {
     SourceRange MagicMarkerRange(Comment.getBegin(),
                                  Comment.getBegin().getLocWithOffset(3));
@@ -1634,6 +1647,12 @@
     }
 
     NamedDecl *Fn = (*It)->getUnderlyingDecl();
+    // Don't print overloads for non-default multiversioned functions.
+    if (const auto *FD = Fn->getAsFunction()) {
+      if (FD->isMultiVersion() &&
+          !FD->getAttr<TargetAttr>()->isDefaultVersion())
+        continue;
+    }
     S.Diag(Fn->getLocation(), diag::note_possible_target_of_call);
     ++ShownOverloads;
   }
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 79263f1..9caabde 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -1,4 +1,4 @@
-//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
+//===- SemaChecking.cpp - Extra Semantic Checking -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,35 +12,88 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/APValue.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/AttrIterator.h"
 #include "clang/AST/CharUnits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprOpenMP.h"
-#include "clang/AST/StmtCXX.h"
-#include "clang/AST/StmtObjC.h"
+#include "clang/AST/NSAPI.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/AST/UnresolvedSet.h"
 #include "clang/Analysis/Analyses/FormatString.h"
+#include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OpenCLOptions.h"
+#include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TypeTraits.h"
 #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/Ownership.h"
+#include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaInternal.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Locale.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <string>
+#include <tuple>
+#include <utility>
 
 using namespace clang;
 using namespace sema;
@@ -638,7 +691,7 @@
   // OpenCL v2.0 s6.13.16.2 - The built-in read/write
   // functions have two forms.
   switch (Call->getNumArgs()) {
-  case 2: {
+  case 2:
     if (checkOpenCLPipeArg(S, Call))
       return true;
     // The call with 2 arguments should be
@@ -646,7 +699,7 @@
     // Check packet type T.
     if (checkOpenCLPipePacketType(S, Call, 1))
       return true;
-  } break;
+    break;
 
   case 4: {
     if (checkOpenCLPipeArg(S, Call))
@@ -753,6 +806,7 @@
 
   return false;
 }
+
 // \brief OpenCL v2.0 s6.13.9 - Address space qualifier functions.
 // \brief Performs semantic analysis for the to_global/local/private call.
 // \param S Reference to the semantic analyzer.
@@ -905,7 +959,6 @@
     if (checkArgCount(*this, TheCall, 1))
       return true;
     break;
-
   case Builtin::BI__builtin_classify_type:
     if (checkArgCount(*this, TheCall, 1)) return true;
     TheCall->setType(Context.IntTy);
@@ -1170,9 +1223,8 @@
     break;
   case Builtin::BI__builtin_os_log_format:
   case Builtin::BI__builtin_os_log_format_buffer_size:
-    if (SemaBuiltinOSLogFormat(TheCall)) {
+    if (SemaBuiltinOSLogFormat(TheCall))
       return ExprError();
-    }
     break;
   }
 
@@ -1301,6 +1353,7 @@
   switch (BuiltinID) {
 #define GET_NEON_OVERLOAD_CHECK
 #include "clang/Basic/arm_neon.inc"
+#include "clang/Basic/arm_fp16.inc"
 #undef GET_NEON_OVERLOAD_CHECK
   }
 
@@ -1352,6 +1405,7 @@
     return false;
 #define GET_NEON_IMMEDIATE_CHECK
 #include "clang/Basic/arm_neon.inc"
+#include "clang/Basic/arm_fp16.inc"
 #undef GET_NEON_IMMEDIATE_CHECK
   }
 
@@ -1502,21 +1556,26 @@
 
   // For intrinsics which take an immediate value as part of the instruction,
   // range check them here.
-  unsigned i = 0, l = 0, u = 0;
+  // FIXME: VFP Intrinsics should error if VFP not present.
   switch (BuiltinID) {
   default: return false;
-  case ARM::BI__builtin_arm_ssat: i = 1; l = 1; u = 31; break;
-  case ARM::BI__builtin_arm_usat: i = 1; u = 31; break;
+  case ARM::BI__builtin_arm_ssat:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 1, 32);
+  case ARM::BI__builtin_arm_usat:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31);
+  case ARM::BI__builtin_arm_ssat16:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 1, 16);
+  case ARM::BI__builtin_arm_usat16:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15);
   case ARM::BI__builtin_arm_vcvtr_f:
-  case ARM::BI__builtin_arm_vcvtr_d: i = 1; u = 1; break;
+  case ARM::BI__builtin_arm_vcvtr_d:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
   case ARM::BI__builtin_arm_dmb:
   case ARM::BI__builtin_arm_dsb:
   case ARM::BI__builtin_arm_isb:
-  case ARM::BI__builtin_arm_dbg: l = 0; u = 15; break;
+  case ARM::BI__builtin_arm_dbg:
+    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15);
   }
-
-  // FIXME: VFP Intrinsics should error if VFP not present.
-  return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
 }
 
 bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
@@ -2221,7 +2280,7 @@
   default:
     return false;
   case X86::BI_mm_prefetch:
-    i = 1; l = 0; u = 3;
+    i = 1; l = 0; u = 7;
     break;
   case X86::BI__builtin_ia32_sha1rnds4:
   case X86::BI__builtin_ia32_shuf_f32x4_256_mask:
@@ -2304,15 +2363,11 @@
   case X86::BI__builtin_ia32_cmpss_mask:
     i = 2; l = 0; u = 31;
     break;
-  case X86::BI__builtin_ia32_xabort:
-    i = 0; l = -128; u = 255;
-    break;
-  case X86::BI__builtin_ia32_pshufw:
-  case X86::BI__builtin_ia32_aeskeygenassist128:
-    i = 1; l = -128; u = 255;
-    break;
   case X86::BI__builtin_ia32_vcvtps2ph:
+  case X86::BI__builtin_ia32_vcvtps2ph_mask:
   case X86::BI__builtin_ia32_vcvtps2ph256:
+  case X86::BI__builtin_ia32_vcvtps2ph256_mask:
+  case X86::BI__builtin_ia32_vcvtps2ph512_mask:
   case X86::BI__builtin_ia32_rndscaleps_128_mask:
   case X86::BI__builtin_ia32_rndscalepd_128_mask:
   case X86::BI__builtin_ia32_rndscaleps_256_mask:
@@ -2345,27 +2400,6 @@
   case X86::BI__builtin_ia32_fpclassss_mask:
     i = 1; l = 0; u = 255;
     break;
-  case X86::BI__builtin_ia32_palignr:
-  case X86::BI__builtin_ia32_insertps128:
-  case X86::BI__builtin_ia32_dpps:
-  case X86::BI__builtin_ia32_dppd:
-  case X86::BI__builtin_ia32_dpps256:
-  case X86::BI__builtin_ia32_mpsadbw128:
-  case X86::BI__builtin_ia32_mpsadbw256:
-  case X86::BI__builtin_ia32_pcmpistrm128:
-  case X86::BI__builtin_ia32_pcmpistri128:
-  case X86::BI__builtin_ia32_pcmpistria128:
-  case X86::BI__builtin_ia32_pcmpistric128:
-  case X86::BI__builtin_ia32_pcmpistrio128:
-  case X86::BI__builtin_ia32_pcmpistris128:
-  case X86::BI__builtin_ia32_pcmpistriz128:
-  case X86::BI__builtin_ia32_pclmulqdq128:
-  case X86::BI__builtin_ia32_vperm2f128_pd256:
-  case X86::BI__builtin_ia32_vperm2f128_ps256:
-  case X86::BI__builtin_ia32_vperm2f128_si256:
-  case X86::BI__builtin_ia32_permti256:
-    i = 2; l = -128; u = 255;
-    break;
   case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256:
   case X86::BI__builtin_ia32_palignr512_mask:
@@ -2378,6 +2412,24 @@
   case X86::BI__builtin_ia32_dbpsadbw128_mask:
   case X86::BI__builtin_ia32_dbpsadbw256_mask:
   case X86::BI__builtin_ia32_dbpsadbw512_mask:
+  case X86::BI__builtin_ia32_vpshldd128_mask:
+  case X86::BI__builtin_ia32_vpshldd256_mask:
+  case X86::BI__builtin_ia32_vpshldd512_mask:
+  case X86::BI__builtin_ia32_vpshldq128_mask:
+  case X86::BI__builtin_ia32_vpshldq256_mask:
+  case X86::BI__builtin_ia32_vpshldq512_mask:
+  case X86::BI__builtin_ia32_vpshldw128_mask:
+  case X86::BI__builtin_ia32_vpshldw256_mask:
+  case X86::BI__builtin_ia32_vpshldw512_mask:
+  case X86::BI__builtin_ia32_vpshrdd128_mask:
+  case X86::BI__builtin_ia32_vpshrdd256_mask:
+  case X86::BI__builtin_ia32_vpshrdd512_mask:
+  case X86::BI__builtin_ia32_vpshrdq128_mask:
+  case X86::BI__builtin_ia32_vpshrdq256_mask:
+  case X86::BI__builtin_ia32_vpshrdq512_mask:
+  case X86::BI__builtin_ia32_vpshrdw128_mask:
+  case X86::BI__builtin_ia32_vpshrdw256_mask:
+  case X86::BI__builtin_ia32_vpshrdw512_mask:
     i = 2; l = 0; u = 255;
     break;
   case X86::BI__builtin_ia32_fixupimmpd512_mask:
@@ -2420,15 +2472,6 @@
   case X86::BI__builtin_ia32_scatterpfqps:
     i = 4; l = 2; u = 3;
     break;
-  case X86::BI__builtin_ia32_pcmpestrm128:
-  case X86::BI__builtin_ia32_pcmpestri128:
-  case X86::BI__builtin_ia32_pcmpestria128:
-  case X86::BI__builtin_ia32_pcmpestric128:
-  case X86::BI__builtin_ia32_pcmpestrio128:
-  case X86::BI__builtin_ia32_pcmpestris128:
-  case X86::BI__builtin_ia32_pcmpestriz128:
-    i = 4; l = -128; u = 255;
-    break;
   case X86::BI__builtin_ia32_rndscalesd_round_mask:
   case X86::BI__builtin_ia32_rndscaless_round_mask:
     i = 4; l = 0; u = 255;
@@ -2505,6 +2548,7 @@
   }
   return false;
 }
+
 /// \brief Diagnose use of %s directive in an NSString which is being passed
 /// as formatting string to formatting method.
 static void
@@ -2575,12 +2619,13 @@
         return;
       }
 
-      for (unsigned Val : NonNull->args()) {
-        if (Val >= Args.size())
+      for (const ParamIdx &Idx : NonNull->args()) {
+        unsigned IdxAST = Idx.getASTIndex();
+        if (IdxAST >= Args.size())
           continue;
         if (NonNullArgs.empty())
           NonNullArgs.resize(Args.size());
-        NonNullArgs.set(Val);
+        NonNullArgs.set(IdxAST);
       }
     }
   }
@@ -2698,7 +2743,7 @@
     // Type safety checking.
     if (FDecl) {
       for (const auto *I : FDecl->specific_attrs<ArgumentWithTypeTagAttr>())
-        CheckArgumentWithTypeTag(I, Args.data());
+        CheckArgumentWithTypeTag(I, Args, Loc);
     }
   }
 
@@ -2871,23 +2916,32 @@
   enum {
     // C    __c11_atomic_init(A *, C)
     Init,
+
     // C    __c11_atomic_load(A *, int)
     Load,
+
     // void __atomic_load(A *, CP, int)
     LoadCopy,
+
     // void __atomic_store(A *, CP, int)
     Copy,
+
     // C    __c11_atomic_add(A *, M, int)
     Arithmetic,
+
     // C    __atomic_exchange_n(A *, CP, int)
     Xchg,
+
     // void __atomic_exchange(A *, C *, CP, int)
     GNUXchg,
+
     // bool __c11_atomic_compare_exchange_strong(A *, C *, CP, int, int)
     C11CmpXchg,
+
     // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
     GNUCmpXchg
   } Form = Init;
+
   const unsigned NumForm = GNUCmpXchg + 1;
   const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
   const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
@@ -2950,7 +3004,7 @@
   case AtomicExpr::AO__atomic_add_fetch:
   case AtomicExpr::AO__atomic_sub_fetch:
     IsAddSub = true;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
@@ -3780,7 +3834,7 @@
   bool IsWindows = TT.isOSWindows();
   bool IsMSVAStart = BuiltinID == Builtin::BI__builtin_ms_va_start;
   if (IsX64 || IsAArch64) {
-    clang::CallingConv CC = CC_C;
+    CallingConv CC = CC_C;
     if (const FunctionDecl *FD = S.getCurFunctionDecl())
       CC = FD->getType()->getAs<FunctionType>()->getCallConv();
     if (IsMSVAStart) {
@@ -4302,9 +4356,9 @@
            << (unsigned)Context.getCharWidth()
            << Arg->getSourceRange();
 
-    if (Result > INT32_MAX)
+    if (Result > std::numeric_limits<int32_t>::max())
       return Diag(TheCall->getLocStart(), diag::err_alignment_too_big)
-           << INT32_MAX
+           << std::numeric_limits<int32_t>::max()
            << Arg->getSourceRange();
   }
 
@@ -4569,7 +4623,6 @@
     if (!ValidString)
       return Diag(TheCall->getLocStart(), diag::err_arm_invalid_specialreg)
              << Arg->getSourceRange();
-
   } else if (IsAArch64Builtin && Fields.size() == 1) {
     // If the register name is one of those that appear in the condition below
     // and the special register builtin being used is one of the write builtins,
@@ -4623,13 +4676,15 @@
 }
 
 namespace {
+
 class UncoveredArgHandler {
   enum { Unknown = -1, AllCovered = -2 };
-  signed FirstUncoveredArg;
+
+  signed FirstUncoveredArg = Unknown;
   SmallVector<const Expr *, 4> DiagnosticExprs;
 
 public:
-  UncoveredArgHandler() : FirstUncoveredArg(Unknown) { }
+  UncoveredArgHandler() = default;
 
   bool hasUncoveredArg() const {
     return (FirstUncoveredArg >= 0);
@@ -4673,7 +4728,8 @@
   SLCT_UncheckedLiteral,
   SLCT_CheckedLiteral
 };
-} // end anonymous namespace
+
+} // namespace
 
 static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
                                      BinaryOperatorKind BinOpKind,
@@ -4706,7 +4762,8 @@
   // We add an offset to a pointer here so we should support an offset as big as
   // possible.
   if (Ov) {
-    assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+    assert(BitWidth <= std::numeric_limits<unsigned>::max() / 2 &&
+           "index (intermediate) result too big");
     Offset = Offset.sext(2 * BitWidth);
     sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
     return;
@@ -4716,6 +4773,7 @@
 }
 
 namespace {
+
 // This is a wrapper class around StringLiteral to support offsetted string
 // literals as format strings. It takes the offset into account when returning
 // the string and its length or the source locations to display notes correctly.
@@ -4734,6 +4792,7 @@
   unsigned getByteLength() const {
     return FExpr->getByteLength() - getCharByteWidth() * Offset;
   }
+
   unsigned getLength() const { return FExpr->getLength() - Offset; }
   unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
 
@@ -4759,9 +4818,11 @@
   SourceLocation getLocStart() const LLVM_READONLY {
     return FExpr->getLocStart().getLocWithOffset(Offset);
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); }
 };
-}  // end anonymous namespace
+
+}  // namespace
 
 static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
                               const Expr *OrigFormatExpr,
@@ -4848,10 +4909,9 @@
     return (CheckLeft && Left < Right) ? Left : Right;
   }
 
-  case Stmt::ImplicitCastExprClass: {
+  case Stmt::ImplicitCastExprClass:
     E = cast<ImplicitCastExpr>(E)->getSubExpr();
     goto tryAgain;
-  }
 
   case Stmt::OpaqueValueExprClass:
     if (const Expr *src = cast<OpaqueValueExpr>(E)->getSourceExpr()) {
@@ -4943,12 +5003,7 @@
     const CallExpr *CE = cast<CallExpr>(E);
     if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(CE->getCalleeDecl())) {
       if (const FormatArgAttr *FA = ND->getAttr<FormatArgAttr>()) {
-        unsigned ArgIndex = FA->getFormatIdx();
-        if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
-          if (MD->isInstance())
-            --ArgIndex;
-        const Expr *Arg = CE->getArg(ArgIndex - 1);
-
+        const Expr *Arg = CE->getArg(FA->formatIdx().getASTIndex());
         return checkFormatStringExpr(S, Arg, Args,
                                      HasVAListArg, format_idx, firstDataArg,
                                      Type, CallType, InFunctionCall,
@@ -4973,8 +5028,7 @@
     const auto *ME = cast<ObjCMessageExpr>(E);
     if (const auto *ND = ME->getMethodDecl()) {
       if (const auto *FA = ND->getAttr<FormatArgAttr>()) {
-        unsigned ArgIndex = FA->getFormatIdx();
-        const Expr *Arg = ME->getArg(ArgIndex - 1);
+        const Expr *Arg = ME->getArg(FA->formatIdx().getASTIndex());
         return checkFormatStringExpr(
             S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type,
             CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset);
@@ -5040,7 +5094,7 @@
   case Stmt::UnaryOperatorClass: {
     const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
     auto ASE = dyn_cast<ArraySubscriptExpr>(UnaOp->getSubExpr());
-    if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) {
+    if (UnaOp->getOpcode() == UO_AddrOf && ASE) {
       llvm::APSInt IndexResult;
       if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) {
         sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true);
@@ -5173,6 +5227,7 @@
 }
 
 namespace {
+
 class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
 protected:
   Sema &S;
@@ -5186,8 +5241,8 @@
   ArrayRef<const Expr *> Args;
   unsigned FormatIdx;
   llvm::SmallBitVector CoveredArgs;
-  bool usesPositionalArgs;
-  bool atFirstArg;
+  bool usesPositionalArgs = false;
+  bool atFirstArg = true;
   bool inFunctionCall;
   Sema::VariadicCallType CallType;
   llvm::SmallBitVector &CheckedVarArgs;
@@ -5205,7 +5260,6 @@
       : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), FSType(type),
         FirstDataArg(firstDataArg), NumDataArgs(numDataArgs), Beg(beg),
         HasVAListArg(hasVAListArg), Args(Args), FormatIdx(formatIdx),
-        usesPositionalArgs(false), atFirstArg(true),
         inFunctionCall(inFunctionCall), CallType(callType),
         CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg) {
     CoveredArgs.resize(numDataArgs);
@@ -5275,7 +5329,8 @@
                             bool IsStringLocation, Range StringRange,
                             ArrayRef<FixItHint> Fixit = None);
 };
-} // end anonymous namespace
+
+} // namespace
 
 SourceRange CheckFormatHandler::getFormatStringRange() {
   return OrigFormatExpr->getSourceRange();
@@ -5629,6 +5684,7 @@
 //===--- CHECK: Printf format string checking ------------------------------===//
 
 namespace {
+
 class CheckPrintfHandler : public CheckFormatHandler {
 public:
   CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr,
@@ -5693,7 +5749,8 @@
                                            const char *conversionPosition) 
                                              override;
 };
-} // end anonymous namespace
+
+} // namespace
 
 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
                                       const analyze_printf::PrintfSpecifier &FS,
@@ -5811,10 +5868,6 @@
                          getSpecifierRange(ignoredFlag.getPosition(), 1)));
 }
 
-//  void EmitFormatDiagnostic(PartialDiagnostic PDiag, SourceLocation StringLoc,
-//                            bool IsStringLocation, Range StringRange,
-//                            ArrayRef<FixItHint> Fixit = None);
-                            
 void CheckPrintfHandler::HandleEmptyObjCModifierFlag(const char *startFlag,
                                                      unsigned flagLen) {
   // Warn about an empty flag.
@@ -5881,7 +5934,8 @@
 /// FIXME: This returns the wrong results in some cases (if cv-qualifiers don't
 /// allow the call, or if it would be ambiguous).
 bool Sema::hasCStrMethod(const Expr *E) {
-  typedef llvm::SmallPtrSet<CXXMethodDecl*, 1> MethodSet;
+  using MethodSet = llvm::SmallPtrSet<CXXMethodDecl *, 1>;
+
   MethodSet Results =
       CXXRecordMembersNamed<CXXMethodDecl>("c_str", *this, E->getType());
   for (MethodSet::iterator MI = Results.begin(), ME = Results.end();
@@ -5896,7 +5950,7 @@
 // Returns true when a c_str() conversion method is found.
 bool CheckPrintfHandler::checkForCStrMembers(
     const analyze_printf::ArgType &AT, const Expr *E) {
-  typedef llvm::SmallPtrSet<CXXMethodDecl*, 1> MethodSet;
+  using MethodSet = llvm::SmallPtrSet<CXXMethodDecl *, 1>;
 
   MethodSet Results =
       CXXRecordMembersNamed<CXXMethodDecl>("c_str", S, E->getType());
@@ -5924,7 +5978,8 @@
                                           const char *startSpecifier,
                                           unsigned specifierLen) {
   using namespace analyze_format_string;
-  using namespace analyze_printf;  
+  using namespace analyze_printf;
+
   const PrintfConversionSpecifier &CS = FS.getConversionSpecifier();
 
   if (FS.consumesDataArgument()) {
@@ -6222,6 +6277,7 @@
                                     const Expr *E) {
   using namespace analyze_format_string;
   using namespace analyze_printf;
+
   // Now type check the data expression that matches the
   // format specifier.
   const analyze_printf::ArgType &AT = FS.getArgType(S.Context, isObjCContext());
@@ -6474,6 +6530,7 @@
 //===--- CHECK: Scanf format string checking ------------------------------===//
 
 namespace {  
+
 class CheckScanfHandler : public CheckFormatHandler {
 public:
   CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
@@ -6500,7 +6557,8 @@
 
   void HandleIncompleteScanList(const char *start, const char *end) override;
 };
-} // end anonymous namespace
+
+} // namespace
 
 void CheckScanfHandler::HandleIncompleteScanList(const char *start,
                                                  const char *end) {
@@ -6513,7 +6571,6 @@
                                         const analyze_scanf::ScanfSpecifier &FS,
                                         const char *startSpecifier,
                                         unsigned specifierLen) {
-
   const analyze_scanf::ScanfConversionSpecifier &CS =
     FS.getConversionSpecifier();
 
@@ -7200,8 +7257,8 @@
   if (!Size)
     return false;
 
-  // if E is binop and op is >, <, >=, <=, ==, &&, ||:
-  if (!Size->isComparisonOp() && !Size->isEqualityOp() && !Size->isLogicalOp())
+  // if E is binop and op is <=>, >, <, >=, <=, ==, &&, ||:
+  if (!Size->isComparisonOp() && !Size->isLogicalOp())
     return false;
 
   SourceRange SizeRange = Size->getSourceRange();
@@ -7255,7 +7312,7 @@
 static const Expr *getSizeOfExprArg(const Expr *E) {
   if (const UnaryExprOrTypeTraitExpr *SizeOf =
       dyn_cast<UnaryExprOrTypeTraitExpr>(E))
-    if (SizeOf->getKind() == clang::UETT_SizeOf && !SizeOf->isArgumentType())
+    if (SizeOf->getKind() == UETT_SizeOf && !SizeOf->isArgumentType())
       return SizeOf->getArgumentExpr()->IgnoreParenImpCasts();
 
   return nullptr;
@@ -7265,7 +7322,7 @@
 static QualType getSizeOfArgType(const Expr *E) {
   if (const UnaryExprOrTypeTraitExpr *SizeOf =
       dyn_cast<UnaryExprOrTypeTraitExpr>(E))
-    if (SizeOf->getKind() == clang::UETT_SizeOf)
+    if (SizeOf->getKind() == UETT_SizeOf)
       return SizeOf->getTypeOfArgument();
 
   return QualType();
@@ -7453,7 +7510,7 @@
 static const Expr *ignoreLiteralAdditions(const Expr *Ex, ASTContext &Ctx) {
   Ex = Ex->IgnoreParenCasts();
 
-  for (;;) {
+  while (true) {
     const BinaryOperator * BO = dyn_cast<BinaryOperator>(Ex);
     if (!BO || !BO->isAdditiveOp())
       break;
@@ -7671,7 +7728,6 @@
 static void
 CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
                      SourceLocation ReturnLoc) {
-
   const Expr *stackE = nullptr;
   SmallVector<const DeclRefExpr *, 8> refVars;
 
@@ -8156,8 +8212,7 @@
   bool NonNegative;
 
   IntRange(unsigned Width, bool NonNegative)
-    : Width(Width), NonNegative(NonNegative)
-  {}
+      : Width(Width), NonNegative(NonNegative) {}
 
   /// Returns the range of the bool type.
   static IntRange forBoolType() {
@@ -8181,14 +8236,19 @@
     if (const AtomicType *AT = dyn_cast<AtomicType>(T))
       T = AT->getValueType().getTypePtr();
 
-    // For enum types, use the known bit width of the enumerators.
-    if (const EnumType *ET = dyn_cast<EnumType>(T)) {
+    if (!C.getLangOpts().CPlusPlus) {
+      // For enum types in C code, use the underlying datatype.
+      if (const EnumType *ET = dyn_cast<EnumType>(T))
+        T = ET->getDecl()->getIntegerType().getDesugaredType(C).getTypePtr();
+    } else if (const EnumType *ET = dyn_cast<EnumType>(T)) {
+      // For enum types in C++, use the known bit width of the enumerators.
       EnumDecl *Enum = ET->getDecl();
-      // In C++11, enums without definitions can have an explicitly specified
-      // underlying type.  Use this type to compute the range.
-      if (!Enum->isCompleteDefinition())
+      // In C++11, enums can have a fixed underlying type. Use this type to
+      // compute the range.
+      if (Enum->isFixed()) {
         return IntRange(C.getIntWidth(QualType(T, 0)),
                         !ET->isSignedIntegerOrEnumerationType());
+      }
 
       unsigned NumPositive = Enum->getNumPositiveBits();
       unsigned NumNegative = Enum->getNumNegativeBits();
@@ -8242,7 +8302,10 @@
   }
 };
 
-IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) {
+} // namespace
+
+static IntRange GetValueRange(ASTContext &C, llvm::APSInt &value,
+                              unsigned MaxWidth) {
   if (value.isSigned() && value.isNegative())
     return IntRange(value.getMinSignedBits(), false);
 
@@ -8254,8 +8317,8 @@
   return IntRange(value.getActiveBits(), true);
 }
 
-IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty,
-                       unsigned MaxWidth) {
+static IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty,
+                              unsigned MaxWidth) {
   if (result.isInt())
     return GetValueRange(C, result.getInt(), MaxWidth);
 
@@ -8283,7 +8346,7 @@
   return IntRange(MaxWidth, Ty->isUnsignedIntegerOrEnumerationType());
 }
 
-QualType GetExprType(const Expr *E) {
+static QualType GetExprType(const Expr *E) {
   QualType Ty = E->getType();
   if (const AtomicType *AtomicRHS = Ty->getAs<AtomicType>())
     Ty = AtomicRHS->getValueType();
@@ -8294,7 +8357,7 @@
 /// range of values it might take.
 ///
 /// \param MaxWidth - the width to which the value will be truncated
-IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth) {
+static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth) {
   E = E->IgnoreParens();
 
   // Try a full evaluation first.
@@ -8348,6 +8411,8 @@
 
   if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
     switch (BO->getOpcode()) {
+    case BO_Cmp:
+      llvm_unreachable("builtin <=> should have class type");
 
     // Boolean-valued operations are single-bit and positive.
     case BO_LAnd:
@@ -8400,7 +8465,7 @@
           return IntRange(R.Width, /*NonNegative*/ true);
         }
       }
-      // fallthrough
+      LLVM_FALLTHROUGH;
 
     case BO_ShlAssign:
       return IntRange::forValueOfType(C, GetExprType(E));
@@ -8512,16 +8577,16 @@
   return IntRange::forValueOfType(C, GetExprType(E));
 }
 
-IntRange GetExprRange(ASTContext &C, const Expr *E) {
+static IntRange GetExprRange(ASTContext &C, const Expr *E) {
   return GetExprRange(C, E, C.getIntWidth(GetExprType(E)));
 }
 
 /// Checks whether the given value, which currently has the given
 /// source semantics, has the same value when coerced through the
 /// target semantics.
-bool IsSameFloatAfterCast(const llvm::APFloat &value,
-                          const llvm::fltSemantics &Src,
-                          const llvm::fltSemantics &Tgt) {
+static bool IsSameFloatAfterCast(const llvm::APFloat &value,
+                                 const llvm::fltSemantics &Src,
+                                 const llvm::fltSemantics &Tgt) {
   llvm::APFloat truncated = value;
 
   bool ignored;
@@ -8536,9 +8601,9 @@
 /// target semantics.
 ///
 /// The value might be a vector of floats (or a complex number).
-bool IsSameFloatAfterCast(const APValue &value,
-                          const llvm::fltSemantics &Src,
-                          const llvm::fltSemantics &Tgt) {
+static bool IsSameFloatAfterCast(const APValue &value,
+                                 const llvm::fltSemantics &Src,
+                                 const llvm::fltSemantics &Tgt) {
   if (value.isFloat())
     return IsSameFloatAfterCast(value.getFloat(), Src, Tgt);
 
@@ -8554,9 +8619,9 @@
           IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt));
 }
 
-void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC);
+static void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC);
 
-bool IsEnumConstOrFromMacro(Sema &S, Expr *E) {
+static bool IsEnumConstOrFromMacro(Sema &S, Expr *E) {
   // Suppress cases where we are comparing against an enum constant.
   if (const DeclRefExpr *DR =
       dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
@@ -8570,60 +8635,138 @@
   return false;
 }
 
-bool isNonBooleanIntegerValue(Expr *E) {
-  return !E->isKnownToHaveBooleanValue() && E->getType()->isIntegerType();
-}
-
-bool isNonBooleanUnsignedValue(Expr *E) {
-  // We are checking that the expression is not known to have boolean value,
-  // is an integer type; and is either unsigned after implicit casts,
-  // or was unsigned before implicit casts.
-  return isNonBooleanIntegerValue(E) &&
+static bool isKnownToHaveUnsignedValue(Expr *E) {
+  return E->getType()->isIntegerType() &&
          (!E->getType()->isSignedIntegerType() ||
           !E->IgnoreParenImpCasts()->getType()->isSignedIntegerType());
 }
 
-enum class LimitType {
-  Max, // e.g. 32767 for short
-  Min  // e.g. -32768 for short
+namespace {
+/// The promoted range of values of a type. In general this has the
+/// following structure:
+///
+///     |-----------| . . . |-----------|
+///     ^           ^       ^           ^
+///    Min       HoleMin  HoleMax      Max
+///
+/// ... where there is only a hole if a signed type is promoted to unsigned
+/// (in which case Min and Max are the smallest and largest representable
+/// values).
+struct PromotedRange {
+  // Min, or HoleMax if there is a hole.
+  llvm::APSInt PromotedMin;
+  // Max, or HoleMin if there is a hole.
+  llvm::APSInt PromotedMax;
+
+  PromotedRange(IntRange R, unsigned BitWidth, bool Unsigned) {
+    if (R.Width == 0)
+      PromotedMin = PromotedMax = llvm::APSInt(BitWidth, Unsigned);
+    else if (R.Width >= BitWidth && !Unsigned) {
+      // Promotion made the type *narrower*. This happens when promoting
+      // a < 32-bit unsigned / <= 32-bit signed bit-field to 'signed int'.
+      // Treat all values of 'signed int' as being in range for now.
+      PromotedMin = llvm::APSInt::getMinValue(BitWidth, Unsigned);
+      PromotedMax = llvm::APSInt::getMaxValue(BitWidth, Unsigned);
+    } else {
+      PromotedMin = llvm::APSInt::getMinValue(R.Width, R.NonNegative)
+                        .extOrTrunc(BitWidth);
+      PromotedMin.setIsUnsigned(Unsigned);
+
+      PromotedMax = llvm::APSInt::getMaxValue(R.Width, R.NonNegative)
+                        .extOrTrunc(BitWidth);
+      PromotedMax.setIsUnsigned(Unsigned);
+    }
+  }
+
+  // Determine whether this range is contiguous (has no hole).
+  bool isContiguous() const { return PromotedMin <= PromotedMax; }
+
+  // Where a constant value is within the range.
+  enum ComparisonResult {
+    LT = 0x1,
+    LE = 0x2,
+    GT = 0x4,
+    GE = 0x8,
+    EQ = 0x10,
+    NE = 0x20,
+    InRangeFlag = 0x40,
+
+    Less = LE | LT | NE,
+    Min = LE | InRangeFlag,
+    InRange = InRangeFlag,
+    Max = GE | InRangeFlag,
+    Greater = GE | GT | NE,
+
+    OnlyValue = LE | GE | EQ | InRangeFlag,
+    InHole = NE
+  };
+
+  ComparisonResult compare(const llvm::APSInt &Value) const {
+    assert(Value.getBitWidth() == PromotedMin.getBitWidth() &&
+           Value.isUnsigned() == PromotedMin.isUnsigned());
+    if (!isContiguous()) {
+      assert(Value.isUnsigned() && "discontiguous range for signed compare");
+      if (Value.isMinValue()) return Min;
+      if (Value.isMaxValue()) return Max;
+      if (Value >= PromotedMin) return InRange;
+      if (Value <= PromotedMax) return InRange;
+      return InHole;
+    }
+
+    switch (llvm::APSInt::compareValues(Value, PromotedMin)) {
+    case -1: return Less;
+    case 0: return PromotedMin == PromotedMax ? OnlyValue : Min;
+    case 1:
+      switch (llvm::APSInt::compareValues(Value, PromotedMax)) {
+      case -1: return InRange;
+      case 0: return Max;
+      case 1: return Greater;
+      }
+    }
+
+    llvm_unreachable("impossible compare result");
+  }
+
+  static llvm::Optional<StringRef>
+  constantValue(BinaryOperatorKind Op, ComparisonResult R, bool ConstantOnRHS) {
+    if (Op == BO_Cmp) {
+      ComparisonResult LTFlag = LT, GTFlag = GT;
+      if (ConstantOnRHS) std::swap(LTFlag, GTFlag);
+
+      if (R & EQ) return StringRef("'std::strong_ordering::equal'");
+      if (R & LTFlag) return StringRef("'std::strong_ordering::less'");
+      if (R & GTFlag) return StringRef("'std::strong_ordering::greater'");
+      return llvm::None;
+    }
+
+    ComparisonResult TrueFlag, FalseFlag;
+    if (Op == BO_EQ) {
+      TrueFlag = EQ;
+      FalseFlag = NE;
+    } else if (Op == BO_NE) {
+      TrueFlag = NE;
+      FalseFlag = EQ;
+    } else {
+      if ((Op == BO_LT || Op == BO_GE) ^ ConstantOnRHS) {
+        TrueFlag = LT;
+        FalseFlag = GE;
+      } else {
+        TrueFlag = GT;
+        FalseFlag = LE;
+      }
+      if (Op == BO_GE || Op == BO_LE)
+        std::swap(TrueFlag, FalseFlag);
+    }
+    if (R & TrueFlag)
+      return StringRef("true");
+    if (R & FalseFlag)
+      return StringRef("false");
+    return llvm::None;
+  }
 };
-
-/// Checks whether Expr 'Constant' may be the
-/// std::numeric_limits<>::max() or std::numeric_limits<>::min()
-/// of the Expr 'Other'. If true, then returns the limit type (min or max).
-/// The Value is the evaluation of Constant
-llvm::Optional<LimitType> IsTypeLimit(Sema &S, Expr *Constant, Expr *Other,
-                                      const llvm::APSInt &Value) {
-  if (IsEnumConstOrFromMacro(S, Constant))
-    return llvm::Optional<LimitType>();
-
-  if (isNonBooleanUnsignedValue(Other) && Value == 0)
-    return LimitType::Min;
-
-  // TODO: Investigate using GetExprRange() to get tighter bounds
-  // on the bit ranges.
-  QualType OtherT = Other->IgnoreParenImpCasts()->getType();
-  if (const auto *AT = OtherT->getAs<AtomicType>())
-    OtherT = AT->getValueType();
-
-  IntRange OtherRange = IntRange::forValueOfType(S.Context, OtherT);
-
-  if (llvm::APSInt::isSameValue(
-          llvm::APSInt::getMaxValue(OtherRange.Width,
-                                    OtherT->isUnsignedIntegerType()),
-          Value))
-    return LimitType::Max;
-
-  if (llvm::APSInt::isSameValue(
-          llvm::APSInt::getMinValue(OtherRange.Width,
-                                    OtherT->isUnsignedIntegerType()),
-          Value))
-    return LimitType::Min;
-
-  return llvm::Optional<LimitType>();
 }
 
-bool HasEnumType(Expr *E) {
+static bool HasEnumType(Expr *E) {
   // Strip off implicit integral promotions.
   while (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) {
     if (ICE->getCastKind() != CK_IntegralCast &&
@@ -8635,236 +8778,75 @@
   return E->getType()->isEnumeralType();
 }
 
-bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, Expr *Constant,
-                                 Expr *Other, const llvm::APSInt &Value,
-                                 bool RhsConstant) {
-  // Disable warning in template instantiations
-  // and only analyze <, >, <= and >= operations.
-  if (S.inTemplateInstantiation() || !E->isRelationalOp())
-    return false;
-
-  BinaryOperatorKind Op = E->getOpcode();
-
-  QualType OType = Other->IgnoreParenImpCasts()->getType();
-
-  llvm::Optional<LimitType> ValueType; // Which limit (min/max) is the constant?
-
-  if (!(isNonBooleanIntegerValue(Other) &&
-        (ValueType = IsTypeLimit(S, Constant, Other, Value))))
-    return false;
-
-  bool ConstIsLowerBound = (Op == BO_LT || Op == BO_LE) ^ RhsConstant;
-  bool ResultWhenConstEqualsOther = (Op == BO_LE || Op == BO_GE);
-  bool ResultWhenConstNeOther =
-      ConstIsLowerBound ^ (ValueType == LimitType::Max);
-  if (ResultWhenConstEqualsOther != ResultWhenConstNeOther)
-    return false; // The comparison is not tautological.
-
-  const bool Result = ResultWhenConstEqualsOther;
-
-  unsigned Diag = (isNonBooleanUnsignedValue(Other) && Value == 0)
-                      ? (HasEnumType(Other)
-                             ? diag::warn_unsigned_enum_always_true_comparison
-                             : diag::warn_unsigned_always_true_comparison)
-                      : diag::warn_tautological_constant_compare;
-
-  // Should be enough for uint128 (39 decimal digits)
-  SmallString<64> PrettySourceValue;
-  llvm::raw_svector_ostream OS(PrettySourceValue);
-  OS << Value;
-
-  S.Diag(E->getOperatorLoc(), Diag)
-      << RhsConstant << OType << E->getOpcodeStr() << OS.str() << Result
-      << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange();
-
-  return true;
+static int classifyConstantValue(Expr *Constant) {
+  // The values of this enumeration are used in the diagnostics
+  // diag::warn_out_of_range_compare and diag::warn_tautological_bool_compare.
+  enum ConstantValueKind {
+    Miscellaneous = 0,
+    LiteralTrue,
+    LiteralFalse
+  };
+  if (auto *BL = dyn_cast<CXXBoolLiteralExpr>(Constant))
+    return BL->getValue() ? ConstantValueKind::LiteralTrue
+                          : ConstantValueKind::LiteralFalse;
+  return ConstantValueKind::Miscellaneous;
 }
 
-bool DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E, Expr *Constant,
-                                  Expr *Other, const llvm::APSInt &Value,
-                                  bool RhsConstant) {
-  // Disable warning in template instantiations.
+static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E,
+                                        Expr *Constant, Expr *Other,
+                                        const llvm::APSInt &Value,
+                                        bool RhsConstant) {
   if (S.inTemplateInstantiation())
     return false;
 
+  Expr *OriginalOther = Other;
+
   Constant = Constant->IgnoreParenImpCasts();
   Other = Other->IgnoreParenImpCasts();
 
+  // Suppress warnings on tautological comparisons between values of the same
+  // enumeration type. There are only two ways we could warn on this:
+  //  - If the constant is outside the range of representable values of
+  //    the enumeration. In such a case, we should warn about the cast
+  //    to enumeration type, not about the comparison.
+  //  - If the constant is the maximum / minimum in-range value. For an
+  //    enumeratin type, such comparisons can be meaningful and useful.
+  if (Constant->getType()->isEnumeralType() &&
+      S.Context.hasSameUnqualifiedType(Constant->getType(), Other->getType()))
+    return false;
+
   // TODO: Investigate using GetExprRange() to get tighter bounds
   // on the bit ranges.
   QualType OtherT = Other->getType();
   if (const auto *AT = OtherT->getAs<AtomicType>())
     OtherT = AT->getValueType();
   IntRange OtherRange = IntRange::forValueOfType(S.Context, OtherT);
-  unsigned OtherWidth = OtherRange.Width;
 
-  bool OtherIsBooleanType = Other->isKnownToHaveBooleanValue();
+  // Whether we're treating Other as being a bool because of the form of
+  // expression despite it having another type (typically 'int' in C).
+  bool OtherIsBooleanDespiteType =
+      !OtherT->isBooleanType() && Other->isKnownToHaveBooleanValue();
+  if (OtherIsBooleanDespiteType)
+    OtherRange = IntRange::forBoolType();
 
-  BinaryOperatorKind op = E->getOpcode();
-  bool IsTrue = true;
+  // Determine the promoted range of the other type and see if a comparison of
+  // the constant against that range is tautological.
+  PromotedRange OtherPromotedRange(OtherRange, Value.getBitWidth(),
+                                   Value.isUnsigned());
+  auto Cmp = OtherPromotedRange.compare(Value);
+  auto Result = PromotedRange::constantValue(E->getOpcode(), Cmp, RhsConstant);
+  if (!Result)
+    return false;
 
-  // Used for diagnostic printout.
-  enum {
-    LiteralConstant = 0,
-    CXXBoolLiteralTrue,
-    CXXBoolLiteralFalse
-  } LiteralOrBoolConstant = LiteralConstant;
-
-  if (!OtherIsBooleanType) {
-    QualType ConstantT = Constant->getType();
-    QualType CommonT = E->getLHS()->getType();
-
-    if (S.Context.hasSameUnqualifiedType(OtherT, ConstantT))
-      return false;
-    assert((OtherT->isIntegerType() && ConstantT->isIntegerType()) &&
-           "comparison with non-integer type");
-
-    bool ConstantSigned = ConstantT->isSignedIntegerType();
-    bool CommonSigned = CommonT->isSignedIntegerType();
-
-    bool EqualityOnly = false;
-
-    if (CommonSigned) {
-      // The common type is signed, therefore no signed to unsigned conversion.
-      if (!OtherRange.NonNegative) {
-        // Check that the constant is representable in type OtherT.
-        if (ConstantSigned) {
-          if (OtherWidth >= Value.getMinSignedBits())
-            return false;
-        } else { // !ConstantSigned
-          if (OtherWidth >= Value.getActiveBits() + 1)
-            return false;
-        }
-      } else { // !OtherSigned
-               // Check that the constant is representable in type OtherT.
-        // Negative values are out of range.
-        if (ConstantSigned) {
-          if (Value.isNonNegative() && OtherWidth >= Value.getActiveBits())
-            return false;
-        } else { // !ConstantSigned
-          if (OtherWidth >= Value.getActiveBits())
-            return false;
-        }
-      }
-    } else { // !CommonSigned
-      if (OtherRange.NonNegative) {
-        if (OtherWidth >= Value.getActiveBits())
-          return false;
-      } else { // OtherSigned
-        assert(!ConstantSigned &&
-               "Two signed types converted to unsigned types.");
-        // Check to see if the constant is representable in OtherT.
-        if (OtherWidth > Value.getActiveBits())
-          return false;
-        // Check to see if the constant is equivalent to a negative value
-        // cast to CommonT.
-        if (S.Context.getIntWidth(ConstantT) ==
-                S.Context.getIntWidth(CommonT) &&
-            Value.isNegative() && Value.getMinSignedBits() <= OtherWidth)
-          return false;
-        // The constant value rests between values that OtherT can represent
-        // after conversion.  Relational comparison still works, but equality
-        // comparisons will be tautological.
-        EqualityOnly = true;
-      }
-    }
-
-    bool PositiveConstant = !ConstantSigned || Value.isNonNegative();
-
-    if (op == BO_EQ || op == BO_NE) {
-      IsTrue = op == BO_NE;
-    } else if (EqualityOnly) {
-      return false;
-    } else if (RhsConstant) {
-      if (op == BO_GT || op == BO_GE)
-        IsTrue = !PositiveConstant;
-      else // op == BO_LT || op == BO_LE
-        IsTrue = PositiveConstant;
-    } else {
-      if (op == BO_LT || op == BO_LE)
-        IsTrue = !PositiveConstant;
-      else // op == BO_GT || op == BO_GE
-        IsTrue = PositiveConstant;
-    }
-  } else {
-    // Other isKnownToHaveBooleanValue
-    enum CompareBoolWithConstantResult { AFals, ATrue, Unkwn };
-    enum ConstantValue { LT_Zero, Zero, One, GT_One, SizeOfConstVal };
-    enum ConstantSide { Lhs, Rhs, SizeOfConstSides };
-
-    static const struct LinkedConditions {
-      CompareBoolWithConstantResult BO_LT_OP[SizeOfConstSides][SizeOfConstVal];
-      CompareBoolWithConstantResult BO_GT_OP[SizeOfConstSides][SizeOfConstVal];
-      CompareBoolWithConstantResult BO_LE_OP[SizeOfConstSides][SizeOfConstVal];
-      CompareBoolWithConstantResult BO_GE_OP[SizeOfConstSides][SizeOfConstVal];
-      CompareBoolWithConstantResult BO_EQ_OP[SizeOfConstSides][SizeOfConstVal];
-      CompareBoolWithConstantResult BO_NE_OP[SizeOfConstSides][SizeOfConstVal];
-
-    } TruthTable = {
-        // Constant on LHS.              | Constant on RHS.              |
-        // LT_Zero| Zero  | One   |GT_One| LT_Zero| Zero  | One   |GT_One|
-        { { ATrue, Unkwn, AFals, AFals }, { AFals, AFals, Unkwn, ATrue } },
-        { { AFals, AFals, Unkwn, ATrue }, { ATrue, Unkwn, AFals, AFals } },
-        { { ATrue, ATrue, Unkwn, AFals }, { AFals, Unkwn, ATrue, ATrue } },
-        { { AFals, Unkwn, ATrue, ATrue }, { ATrue, ATrue, Unkwn, AFals } },
-        { { AFals, Unkwn, Unkwn, AFals }, { AFals, Unkwn, Unkwn, AFals } },
-        { { ATrue, Unkwn, Unkwn, ATrue }, { ATrue, Unkwn, Unkwn, ATrue } }
-      };
-
-    bool ConstantIsBoolLiteral = isa<CXXBoolLiteralExpr>(Constant);
-
-    enum ConstantValue ConstVal = Zero;
-    if (Value.isUnsigned() || Value.isNonNegative()) {
-      if (Value == 0) {
-        LiteralOrBoolConstant =
-            ConstantIsBoolLiteral ? CXXBoolLiteralFalse : LiteralConstant;
-        ConstVal = Zero;
-      } else if (Value == 1) {
-        LiteralOrBoolConstant =
-            ConstantIsBoolLiteral ? CXXBoolLiteralTrue : LiteralConstant;
-        ConstVal = One;
-      } else {
-        LiteralOrBoolConstant = LiteralConstant;
-        ConstVal = GT_One;
-      }
-    } else {
-      ConstVal = LT_Zero;
-    }
-
-    CompareBoolWithConstantResult CmpRes;
-
-    switch (op) {
-    case BO_LT:
-      CmpRes = TruthTable.BO_LT_OP[RhsConstant][ConstVal];
-      break;
-    case BO_GT:
-      CmpRes = TruthTable.BO_GT_OP[RhsConstant][ConstVal];
-      break;
-    case BO_LE:
-      CmpRes = TruthTable.BO_LE_OP[RhsConstant][ConstVal];
-      break;
-    case BO_GE:
-      CmpRes = TruthTable.BO_GE_OP[RhsConstant][ConstVal];
-      break;
-    case BO_EQ:
-      CmpRes = TruthTable.BO_EQ_OP[RhsConstant][ConstVal];
-      break;
-    case BO_NE:
-      CmpRes = TruthTable.BO_NE_OP[RhsConstant][ConstVal];
-      break;
-    default:
-      CmpRes = Unkwn;
-      break;
-    }
-
-    if (CmpRes == AFals) {
-      IsTrue = false;
-    } else if (CmpRes == ATrue) {
-      IsTrue = true;
-    } else {
-      return false;
-    }
-  }
+  // Suppress the diagnostic for an in-range comparison if the constant comes
+  // from a macro or enumerator. We don't want to diagnose
+  //
+  //   some_long_value <= INT_MAX
+  //
+  // when sizeof(int) == sizeof(long).
+  bool InRange = Cmp & PromotedRange::InRangeFlag;
+  if (InRange && IsEnumConstOrFromMacro(S, Constant))
+    return false;
 
   // If this is a comparison to an enum constant, include that
   // constant in the diagnostic.
@@ -8872,6 +8854,7 @@
   if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Constant))
     ED = dyn_cast<EnumConstantDecl>(DR->getDecl());
 
+  // Should be enough for uint128 (39 decimal digits)
   SmallString<64> PrettySourceValue;
   llvm::raw_svector_ostream OS(PrettySourceValue);
   if (ED)
@@ -8879,19 +8862,35 @@
   else
     OS << Value;
 
-  S.DiagRuntimeBehavior(
-    E->getOperatorLoc(), E,
-    S.PDiag(diag::warn_out_of_range_compare)
-        << OS.str() << LiteralOrBoolConstant
-        << OtherT << (OtherIsBooleanType && !OtherT->isBooleanType()) << IsTrue
-        << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange());
+  // FIXME: We use a somewhat different formatting for the in-range cases and
+  // cases involving boolean values for historical reasons. We should pick a
+  // consistent way of presenting these diagnostics.
+  if (!InRange || Other->isKnownToHaveBooleanValue()) {
+    S.DiagRuntimeBehavior(
+      E->getOperatorLoc(), E,
+      S.PDiag(!InRange ? diag::warn_out_of_range_compare
+                       : diag::warn_tautological_bool_compare)
+          << OS.str() << classifyConstantValue(Constant)
+          << OtherT << OtherIsBooleanDespiteType << *Result
+          << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange());
+  } else {
+    unsigned Diag = (isKnownToHaveUnsignedValue(OriginalOther) && Value == 0)
+                        ? (HasEnumType(OriginalOther)
+                               ? diag::warn_unsigned_enum_always_true_comparison
+                               : diag::warn_unsigned_always_true_comparison)
+                        : diag::warn_tautological_constant_compare;
 
-   return true;
+    S.Diag(E->getOperatorLoc(), Diag)
+        << RhsConstant << OtherT << E->getOpcodeStr() << OS.str() << *Result
+        << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange();
+  }
+
+  return true;
 }
 
 /// Analyze the operands of the given comparison.  Implements the
 /// fallback case from AnalyzeComparison.
-void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) {
+static void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) {
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
   AnalyzeImplicitConversions(S, E->getRHS(), E->getOperatorLoc());
 }
@@ -8899,7 +8898,7 @@
 /// \brief Implements -Wsign-compare.
 ///
 /// \param E the binary operator to check for warnings
-void AnalyzeComparison(Sema &S, BinaryOperator *E) {
+static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   // The type the comparison is being performed in.
   QualType T = E->getLHS()->getType();
 
@@ -8936,12 +8935,8 @@
 
       // Check whether an integer constant comparison results in a value
       // of 'true' or 'false'.
-
       if (CheckTautologicalComparison(S, E, Const, Other, Value, RhsConstant))
         return AnalyzeImpConvsInComparison(S, E);
-
-      if (DiagnoseOutOfRangeComparison(S, E, Const, Other, Value, RhsConstant))
-        return AnalyzeImpConvsInComparison(S, E);
     }
   }
 
@@ -8955,6 +8950,16 @@
   LHS = LHS->IgnoreParenImpCasts();
   RHS = RHS->IgnoreParenImpCasts();
 
+  if (!S.getLangOpts().CPlusPlus) {
+    // Avoid warning about comparison of integers with different signs when
+    // RHS/LHS has a `typeof(E)` type whose sign is different from the sign of
+    // the type of `E`.
+    if (const auto *TET = dyn_cast<TypeOfExprType>(LHS->getType()))
+      LHS = TET->getUnderlyingExpr()->IgnoreParenImpCasts();
+    if (const auto *TET = dyn_cast<TypeOfExprType>(RHS->getType()))
+      RHS = TET->getUnderlyingExpr()->IgnoreParenImpCasts();
+  }
+
   // Check to see if one of the (unmodified) operands is of different
   // signedness.
   Expr *signedOperand, *unsignedOperand;
@@ -9007,8 +9012,8 @@
 /// Analyzes an attempt to assign the given value to a bitfield.
 ///
 /// Returns true if there was something fishy about the attempt.
-bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
-                               SourceLocation InitLoc) {
+static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
+                                      SourceLocation InitLoc) {
   assert(Bitfield->isBitField());
   if (Bitfield->isInvalidDecl())
     return false;
@@ -9138,7 +9143,7 @@
 
 /// Analyze the given simple or compound assignment for warning-worthy
 /// operations.
-void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
+static void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
   // Just recurse on the LHS.
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
 
@@ -9157,9 +9162,9 @@
 }
 
 /// Diagnose an implicit cast;  purely a helper for CheckImplicitConversion.
-void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T, 
-                     SourceLocation CContext, unsigned diag,
-                     bool pruneControlFlow = false) {
+static void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T, 
+                            SourceLocation CContext, unsigned diag,
+                            bool pruneControlFlow = false) {
   if (pruneControlFlow) {
     S.DiagRuntimeBehavior(E->getExprLoc(), E,
                           S.PDiag(diag)
@@ -9172,16 +9177,16 @@
 }
 
 /// Diagnose an implicit cast;  purely a helper for CheckImplicitConversion.
-void DiagnoseImpCast(Sema &S, Expr *E, QualType T, SourceLocation CContext,
-                     unsigned diag, bool pruneControlFlow = false) {
+static void DiagnoseImpCast(Sema &S, Expr *E, QualType T,
+                            SourceLocation CContext,
+                            unsigned diag, bool pruneControlFlow = false) {
   DiagnoseImpCast(S, E, E->getType(), T, CContext, diag, pruneControlFlow);
 }
 
 
 /// Diagnose an implicit cast from a floating point value to an integer value.
-void DiagnoseFloatingImpCast(Sema &S, Expr *E, QualType T,
-
-                             SourceLocation CContext) {
+static void DiagnoseFloatingImpCast(Sema &S, Expr *E, QualType T,
+                                    SourceLocation CContext) {
   const bool IsBool = T->isSpecificBuiltinType(BuiltinType::Bool);
   const bool PruneWarnings = S.inTemplateInstantiation();
 
@@ -9271,7 +9276,8 @@
   }
 }
 
-std::string PrettyPrintInRange(const llvm::APSInt &Value, IntRange Range) {
+static std::string PrettyPrintInRange(const llvm::APSInt &Value,
+                                      IntRange Range) {
   if (!Range.Width) return "0";
 
   llvm::APSInt ValueInRange = Value;
@@ -9280,7 +9286,7 @@
   return ValueInRange.toString(10);
 }
 
-bool IsImplicitBoolFloatConversion(Sema &S, Expr *Ex, bool ToBool) {
+static bool IsImplicitBoolFloatConversion(Sema &S, Expr *Ex, bool ToBool) {
   if (!isa<ImplicitCastExpr>(Ex))
     return false;
 
@@ -9299,8 +9305,8 @@
           FloatCandidateBT && (FloatCandidateBT->isFloatingPoint()));
 }
 
-void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall,
-                                      SourceLocation CC) {
+static void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall,
+                                             SourceLocation CC) {
   unsigned NumArgs = TheCall->getNumArgs();
   for (unsigned i = 0; i < NumArgs; ++i) {
     Expr *CurrA = TheCall->getArg(i);
@@ -9320,7 +9326,8 @@
   }
 }
 
-void DiagnoseNullConversion(Sema &S, Expr *E, QualType T, SourceLocation CC) {
+static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
+                                   SourceLocation CC) {
   if (S.Diags.isIgnored(diag::warn_impcast_null_pointer_to_integer,
                         E->getExprLoc()))
     return;
@@ -9345,11 +9352,8 @@
   // Venture through the macro stacks to get to the source of macro arguments.
   // The new location is a better location than the complete location that was
   // passed in.
-  while (S.SourceMgr.isMacroArgExpansion(Loc))
-    Loc = S.SourceMgr.getImmediateMacroCallerLoc(Loc);
-
-  while (S.SourceMgr.isMacroArgExpansion(CC))
-    CC = S.SourceMgr.getImmediateMacroCallerLoc(CC);
+  Loc = S.SourceMgr.getTopMacroCallerLoc(Loc);
+  CC = S.SourceMgr.getTopMacroCallerLoc(CC);
 
   // __null is usually wrapped in a macro.  Go up a macro if that is the case.
   if (NullKind == Expr::NPCK_GNUNull && Loc.isMacroID()) {
@@ -9364,20 +9368,24 @@
     return;
 
   S.Diag(Loc, diag::warn_impcast_null_pointer_to_integer)
-      << (NullKind == Expr::NPCK_CXX11_nullptr) << T << clang::SourceRange(CC)
+      << (NullKind == Expr::NPCK_CXX11_nullptr) << T << SourceRange(CC)
       << FixItHint::CreateReplacement(Loc,
                                       S.getFixItZeroLiteralForType(T, Loc));
 }
 
-void checkObjCArrayLiteral(Sema &S, QualType TargetType,
-                           ObjCArrayLiteral *ArrayLiteral);
-void checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
-                                ObjCDictionaryLiteral *DictionaryLiteral);
+static void checkObjCArrayLiteral(Sema &S, QualType TargetType,
+                                  ObjCArrayLiteral *ArrayLiteral);
+
+static void
+checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
+                           ObjCDictionaryLiteral *DictionaryLiteral);
 
 /// Check a single element within a collection literal against the
 /// target element type.
-void checkObjCCollectionLiteralElement(Sema &S, QualType TargetElementType,
-                                       Expr *Element, unsigned ElementKind) {
+static void checkObjCCollectionLiteralElement(Sema &S,
+                                              QualType TargetElementType,
+                                              Expr *Element,
+                                              unsigned ElementKind) {
   // Skip a bitcast to 'id' or qualified 'id'.
   if (auto ICE = dyn_cast<ImplicitCastExpr>(Element)) {
     if (ICE->getCastKind() == CK_BitCast &&
@@ -9406,8 +9414,8 @@
 
 /// Check an Objective-C array literal being converted to the given
 /// target type.
-void checkObjCArrayLiteral(Sema &S, QualType TargetType,
-                           ObjCArrayLiteral *ArrayLiteral) {
+static void checkObjCArrayLiteral(Sema &S, QualType TargetType,
+                                  ObjCArrayLiteral *ArrayLiteral) {
   if (!S.NSArrayDecl)
     return;
 
@@ -9434,8 +9442,9 @@
 
 /// Check an Objective-C dictionary literal being converted to the given
 /// target type.
-void checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
-                                ObjCDictionaryLiteral *DictionaryLiteral) {
+static void
+checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
+                           ObjCDictionaryLiteral *DictionaryLiteral) {
   if (!S.NSDictionaryDecl)
     return;
 
@@ -9463,8 +9472,8 @@
 
 // Helper function to filter out cases for constant width constant conversion.
 // Don't warn on char array initialization or for non-decimal values.
-bool isSameWidthConstantConversion(Sema &S, Expr *E, QualType T,
-                                   SourceLocation CC) {
+static bool isSameWidthConstantConversion(Sema &S, Expr *E, QualType T,
+                                          SourceLocation CC) {
   // If initializing from a constant, and the constant starts with '0',
   // then it is a binary, octal, or hexadecimal.  Allow these constants
   // to fill all the bits, even if there is a sign change.
@@ -9487,8 +9496,9 @@
   return true;
 }
 
-void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
-                             SourceLocation CC, bool *ICContext = nullptr) {
+static void
+CheckImplicitConversion(Sema &S, Expr *E, QualType T, SourceLocation CC,
+                        bool *ICContext = nullptr) {
   if (E->isTypeDependent() || E->isValueDependent()) return;
 
   const Type *Source = S.Context.getCanonicalType(E->getType()).getTypePtr();
@@ -9756,11 +9766,11 @@
       }
 }
 
-void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
-                              SourceLocation CC, QualType T);
+static void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
+                                     SourceLocation CC, QualType T);
 
-void CheckConditionalOperand(Sema &S, Expr *E, QualType T,
-                             SourceLocation CC, bool &ICContext) {
+static void CheckConditionalOperand(Sema &S, Expr *E, QualType T,
+                                    SourceLocation CC, bool &ICContext) {
   E = E->IgnoreParenImpCasts();
 
   if (isa<ConditionalOperator>(E))
@@ -9771,8 +9781,8 @@
     return CheckImplicitConversion(S, E, T, CC, &ICContext);
 }
 
-void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
-                              SourceLocation CC, QualType T) {
+static void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
+                                     SourceLocation CC, QualType T) {
   AnalyzeImplicitConversions(S, E->getCond(), E->getQuestionLoc());
 
   bool Suspicious = false;
@@ -9801,7 +9811,7 @@
 
 /// CheckBoolLikeConversion - Check conversion of given expression to boolean.
 /// Input argument E is a logical expression.
-void CheckBoolLikeConversion(Sema &S, Expr *E, SourceLocation CC) {
+static void CheckBoolLikeConversion(Sema &S, Expr *E, SourceLocation CC) {
   if (S.getLangOpts().Bool)
     return;
   CheckImplicitConversion(S, E->IgnoreParenImpCasts(), S.Context.BoolTy, CC);
@@ -9810,7 +9820,8 @@
 /// AnalyzeImplicitConversions - Find and report any interesting
 /// implicit conversions in the given expression.  There are a couple
 /// of competing diagnostics here, -Wconversion and -Wsign-compare.
-void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) {
+static void AnalyzeImplicitConversions(Sema &S, Expr *OrigE,
+                                       SourceLocation CC) {
   QualType T = OrigE->getType();
   Expr *E = OrigE->IgnoreParenImpCasts();
 
@@ -9903,8 +9914,6 @@
       ::CheckBoolLikeConversion(S, U->getSubExpr(), CC);
 }
 
-} // end anonymous namespace
-
 /// Diagnose integer type and any valid implicit convertion to it.
 static bool checkOpenCLEnqueueIntType(Sema &S, Expr *E, const QualType &IntT) {
   // Taking into account implicit conversions,
@@ -10072,8 +10081,8 @@
               return;
           }
 
-          for (unsigned ArgNo : NonNull->args()) {
-            if (ArgNo == ParamNo) {
+          for (const ParamIdx &ArgNo : NonNull->args()) {
+            if (ArgNo.getASTIndex() == ParamNo) {
               ComplainAboutNonnullParamOrCall(NonNull);
               return;
             }
@@ -10210,10 +10219,11 @@
 }
 
 namespace {
+
 /// \brief Visitor for expressions which looks for unsequenced operations on the
 /// same object.
 class SequenceChecker : public EvaluatedExprVisitor<SequenceChecker> {
-  typedef EvaluatedExprVisitor<SequenceChecker> Base;
+  using Base = EvaluatedExprVisitor<SequenceChecker>;
 
   /// \brief A tree of sequenced regions within an expression. Two regions are
   /// unsequenced if one is an ancestor or a descendent of the other. When we
@@ -10232,11 +10242,14 @@
     /// \brief A region within an expression which may be sequenced with respect
     /// to some other region.
     class Seq {
-      explicit Seq(unsigned N) : Index(N) {}
-      unsigned Index;
       friend class SequenceTree;
+
+      unsigned Index = 0;
+
+      explicit Seq(unsigned N) : Index(N) {}
+
     public:
-      Seq() : Index(0) {}
+      Seq() = default;
     };
 
     SequenceTree() { Values.push_back(Value(0)); }
@@ -10280,16 +10293,18 @@
   };
 
   /// An object for which we can track unsequenced uses.
-  typedef NamedDecl *Object;
+  using Object = NamedDecl *;
 
   /// Different flavors of object usage which we track. We only track the
   /// least-sequenced usage of each kind.
   enum UsageKind {
     /// A read of an object. Multiple unsequenced reads are OK.
     UK_Use,
+
     /// A modification of an object which is sequenced before the value
     /// computation of the expression, such as ++n in C++.
     UK_ModAsValue,
+
     /// A modification of an object which is not sequenced before the value
     /// computation of the expression, such as n++.
     UK_ModAsSideEffect,
@@ -10298,29 +10313,37 @@
   };
 
   struct Usage {
-    Usage() : Use(nullptr), Seq() {}
-    Expr *Use;
+    Expr *Use = nullptr;
     SequenceTree::Seq Seq;
+
+    Usage() = default;
   };
 
   struct UsageInfo {
-    UsageInfo() : Diagnosed(false) {}
     Usage Uses[UK_Count];
+
     /// Have we issued a diagnostic for this variable already?
-    bool Diagnosed;
+    bool Diagnosed = false;
+
+    UsageInfo() = default;
   };
-  typedef llvm::SmallDenseMap<Object, UsageInfo, 16> UsageInfoMap;
+  using UsageInfoMap = llvm::SmallDenseMap<Object, UsageInfo, 16>;
 
   Sema &SemaRef;
+
   /// Sequenced regions within the expression.
   SequenceTree Tree;
+
   /// Declaration modifications and references which we have seen.
   UsageInfoMap UsageMap;
+
   /// The region we are currently within.
   SequenceTree::Seq Region;
+
   /// Filled in with declarations which were modified as a side-effect
   /// (that is, post-increment operations).
-  SmallVectorImpl<std::pair<Object, Usage> > *ModAsSideEffect;
+  SmallVectorImpl<std::pair<Object, Usage>> *ModAsSideEffect = nullptr;
+
   /// Expressions to check later. We defer checking these to reduce
   /// stack usage.
   SmallVectorImpl<Expr *> &WorkList;
@@ -10335,6 +10358,7 @@
       : Self(Self), OldModAsSideEffect(Self.ModAsSideEffect) {
       Self.ModAsSideEffect = &ModAsSideEffect;
     }
+
     ~SequencedSubexpression() {
       for (auto &M : llvm::reverse(ModAsSideEffect)) {
         UsageInfo &U = Self.UsageMap[M.first];
@@ -10347,7 +10371,7 @@
 
     SequenceChecker &Self;
     SmallVector<std::pair<Object, Usage>, 4> ModAsSideEffect;
-    SmallVectorImpl<std::pair<Object, Usage> > *OldModAsSideEffect;
+    SmallVectorImpl<std::pair<Object, Usage>> *OldModAsSideEffect;
   };
 
   /// RAII object wrapping the visitation of a subexpression which we might
@@ -10357,9 +10381,10 @@
   class EvaluationTracker {
   public:
     EvaluationTracker(SequenceChecker &Self)
-        : Self(Self), Prev(Self.EvalTracker), EvalOK(true) {
+        : Self(Self), Prev(Self.EvalTracker) {
       Self.EvalTracker = this;
     }
+
     ~EvaluationTracker() {
       Self.EvalTracker = Prev;
       if (Prev)
@@ -10376,8 +10401,8 @@
   private:
     SequenceChecker &Self;
     EvaluationTracker *Prev;
-    bool EvalOK;
-  } *EvalTracker;
+    bool EvalOK = true;
+  } *EvalTracker = nullptr;
 
   /// \brief Find the object which is produced by the specified expression,
   /// if any.
@@ -10411,6 +10436,7 @@
       U.Seq = Region;
     }
   }
+
   /// \brief Check whether a modification or use conflicts with a prior usage.
   void checkUsage(Object O, UsageInfo &UI, Expr *Ref, UsageKind OtherKind,
                   bool IsModMod) {
@@ -10438,6 +10464,7 @@
     // Uses conflict with other modifications.
     checkUsage(O, U, Use, UK_ModAsValue, false);
   }
+
   void notePostUse(Object O, Expr *Use) {
     UsageInfo &U = UsageMap[O];
     checkUsage(O, U, Use, UK_ModAsSideEffect, false);
@@ -10450,6 +10477,7 @@
     checkUsage(O, U, Mod, UK_ModAsValue, true);
     checkUsage(O, U, Mod, UK_Use, false);
   }
+
   void notePostMod(Object O, Expr *Use, UsageKind UK) {
     UsageInfo &U = UsageMap[O];
     checkUsage(O, U, Use, UK_ModAsSideEffect, true);
@@ -10458,8 +10486,7 @@
 
 public:
   SequenceChecker(Sema &S, Expr *E, SmallVectorImpl<Expr *> &WorkList)
-      : Base(S.Context), SemaRef(S), Region(Tree.root()),
-        ModAsSideEffect(nullptr), WorkList(WorkList), EvalTracker(nullptr) {
+      : Base(S.Context), SemaRef(S), Region(Tree.root()), WorkList(WorkList) {
     Visit(E);
   }
 
@@ -10693,7 +10720,8 @@
       Tree.merge(Elts[I]);
   }
 };
-} // end anonymous namespace
+
+} // namespace
 
 void Sema::CheckUnsequencedOperations(Expr *E) {
   SmallVector<Expr *, 8> WorkList;
@@ -10788,23 +10816,18 @@
     // information is added for it.
     diagnoseArrayStarInParamType(*this, PType, Param->getLocation());
 
-    // MSVC destroys objects passed by value in the callee.  Therefore a
-    // function definition which takes such a parameter must be able to call the
-    // object's destructor.  However, we don't perform any direct access check
-    // on the dtor.
-    if (getLangOpts().CPlusPlus && Context.getTargetInfo()
-                                       .getCXXABI()
-                                       .areArgsDestroyedLeftToRightInCallee()) {
-      if (!Param->isInvalidDecl()) {
-        if (const RecordType *RT = Param->getType()->getAs<RecordType>()) {
-          CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
-          if (!ClassDecl->isInvalidDecl() &&
-              !ClassDecl->hasIrrelevantDestructor() &&
-              !ClassDecl->isDependentContext()) {
-            CXXDestructorDecl *Destructor = LookupDestructor(ClassDecl);
-            MarkFunctionReferenced(Param->getLocation(), Destructor);
-            DiagnoseUseOfDecl(Destructor, Param->getLocation());
-          }
+    // If the parameter is a c++ class type and it has to be destructed in the
+    // callee function, declare the destructor so that it can be called by the
+    // callee function. Do not perfom any direct access check on the dtor here.
+    if (!Param->isInvalidDecl()) {
+      if (CXXRecordDecl *ClassDecl = Param->getType()->getAsCXXRecordDecl()) {
+        if (!ClassDecl->isInvalidDecl() &&
+            !ClassDecl->hasIrrelevantDestructor() &&
+            !ClassDecl->isDependentContext() &&
+            Context.isParamDestroyedInCallee(Param->getType())) {
+          CXXDestructorDecl *Destructor = LookupDestructor(ClassDecl);
+          MarkFunctionReferenced(Param->getLocation(), Destructor);
+          DiagnoseUseOfDecl(Destructor, Param->getLocation());
         }
       }
     }
@@ -10957,9 +10980,9 @@
 
   const NamedDecl *ND = nullptr;
   if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
-    ND = dyn_cast<NamedDecl>(DRE->getDecl());
+    ND = DRE->getDecl();
   if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
-    ND = dyn_cast<NamedDecl>(ME->getMemberDecl());
+    ND = ME->getMemberDecl();
 
   if (index.isUnsigned() || !index.isNegative()) {
     llvm::APInt size = ArrayTy->getSize();
@@ -11042,9 +11065,9 @@
            dyn_cast<ArraySubscriptExpr>(BaseExpr))
       BaseExpr = ASE->getBase()->IgnoreParenCasts();
     if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
-      ND = dyn_cast<NamedDecl>(DRE->getDecl());
+      ND = DRE->getDecl();
     if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
-      ND = dyn_cast<NamedDecl>(ME->getMemberDecl());
+      ND = ME->getMemberDecl();
   }
 
   if (ND)
@@ -11110,19 +11133,22 @@
 //===--- CHECK: Objective-C retain cycles ----------------------------------//
 
 namespace {
-  struct RetainCycleOwner {
-    RetainCycleOwner() : Variable(nullptr), Indirect(false) {}
-    VarDecl *Variable;
-    SourceRange Range;
-    SourceLocation Loc;
-    bool Indirect;
 
-    void setLocsFrom(Expr *e) {
-      Loc = e->getExprLoc();
-      Range = e->getSourceRange();
-    }
-  };
-} // end anonymous namespace
+struct RetainCycleOwner {
+  VarDecl *Variable = nullptr;
+  SourceRange Range;
+  SourceLocation Loc;
+  bool Indirect = false;
+
+  RetainCycleOwner() = default;
+
+  void setLocsFrom(Expr *e) {
+    Loc = e->getExprLoc();
+    Range = e->getSourceRange();
+  }
+};
+
+} // namespace
 
 /// Consider whether capturing the given variable can possibly lead to
 /// a retain cycle.
@@ -11219,15 +11245,16 @@
 }
 
 namespace {
+
   struct FindCaptureVisitor : EvaluatedExprVisitor<FindCaptureVisitor> {
-    FindCaptureVisitor(ASTContext &Context, VarDecl *variable)
-      : EvaluatedExprVisitor<FindCaptureVisitor>(Context),
-        Context(Context), Variable(variable), Capturer(nullptr),
-        VarWillBeReased(false) {}
     ASTContext &Context;
     VarDecl *Variable;
-    Expr *Capturer;
-    bool VarWillBeReased;
+    Expr *Capturer = nullptr;
+    bool VarWillBeReased = false;
+
+    FindCaptureVisitor(ASTContext &Context, VarDecl *variable)
+        : EvaluatedExprVisitor<FindCaptureVisitor>(Context),
+          Context(Context), Variable(variable) {}
 
     void VisitDeclRefExpr(DeclRefExpr *ref) {
       if (ref->getDecl() == Variable && !Capturer)
@@ -11252,6 +11279,7 @@
       if (OVE->getSourceExpr())
         Visit(OVE->getSourceExpr());
     }
+
     void VisitBinaryOperator(BinaryOperator *BinOp) {
       if (!Variable || VarWillBeReased || BinOp->getOpcode() != BO_Assign)
         return;
@@ -11268,7 +11296,8 @@
       }
     }
   };
-} // end anonymous namespace
+
+} // namespace
 
 /// Check whether the given argument is a block which captures a
 /// variable.
@@ -11525,9 +11554,15 @@
   }
 
   // Check whether the receiver is captured by any of the arguments.
-  for (unsigned i = 0, e = msg->getNumArgs(); i != e; ++i)
-    if (Expr *capturer = findCapturingExpr(*this, msg->getArg(i), owner))
+  const ObjCMethodDecl *MD = msg->getMethodDecl();
+  for (unsigned i = 0, e = msg->getNumArgs(); i != e; ++i) {
+    if (Expr *capturer = findCapturingExpr(*this, msg->getArg(i), owner)) {
+      // noescape blocks should not be retained by the method.
+      if (MD && MD->parameters()[i]->hasAttr<NoEscapeAttr>())
+        continue;
       return diagnoseRetainCycle(*this, capturer, owner);
+    }
+  }
 }
 
 /// Check a property assign to see if it's likely to cause a retain cycle.
@@ -11675,16 +11710,14 @@
 
 //===--- CHECK: Empty statement body (-Wempty-body) ---------------------===//
 
-namespace {
-bool ShouldDiagnoseEmptyStmtBody(const SourceManager &SourceMgr,
-                                 SourceLocation StmtLoc,
-                                 const NullStmt *Body) {
+static bool ShouldDiagnoseEmptyStmtBody(const SourceManager &SourceMgr,
+                                        SourceLocation StmtLoc,
+                                        const NullStmt *Body) {
   // Do not warn if the body is a macro that expands to nothing, e.g:
   //
   // #define CALL(x)
   // if (condition)
   //   CALL(0);
-  //
   if (Body->hasLeadingEmptyMacro())
     return false;
 
@@ -11707,7 +11740,6 @@
 
   return true;
 }
-} // end anonymous namespace
 
 void Sema::DiagnoseEmptyStmtBody(SourceLocation StmtLoc,
                                  const Stmt *Body,
@@ -11887,12 +11919,10 @@
 
 //===--- Layout compatibility ----------------------------------------------//
 
-namespace {
-
-bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2);
+static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2);
 
 /// \brief Check if two enumeration types are layout-compatible.
-bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) {
+static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) {
   // C++11 [dcl.enum] p8:
   // Two enumeration types are layout-compatible if they have the same
   // underlying type.
@@ -11901,7 +11931,8 @@
 }
 
 /// \brief Check if two fields are layout-compatible.
-bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1, FieldDecl *Field2) {
+static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1,
+                               FieldDecl *Field2) {
   if (!isLayoutCompatible(C, Field1->getType(), Field2->getType()))
     return false;
 
@@ -11922,9 +11953,8 @@
 
 /// \brief Check if two standard-layout structs are layout-compatible.
 /// (C++11 [class.mem] p17)
-bool isLayoutCompatibleStruct(ASTContext &C,
-                              RecordDecl *RD1,
-                              RecordDecl *RD2) {
+static bool isLayoutCompatibleStruct(ASTContext &C, RecordDecl *RD1,
+                                     RecordDecl *RD2) {
   // If both records are C++ classes, check that base classes match.
   if (const CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(RD1)) {
     // If one of records is a CXXRecordDecl we are in C++ mode,
@@ -11967,9 +11997,8 @@
 
 /// \brief Check if two standard-layout unions are layout-compatible.
 /// (C++11 [class.mem] p18)
-bool isLayoutCompatibleUnion(ASTContext &C,
-                             RecordDecl *RD1,
-                             RecordDecl *RD2) {
+static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1,
+                                    RecordDecl *RD2) {
   llvm::SmallPtrSet<FieldDecl *, 8> UnmatchedFields;
   for (auto *Field2 : RD2->fields())
     UnmatchedFields.insert(Field2);
@@ -11994,7 +12023,8 @@
   return UnmatchedFields.empty();
 }
 
-bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1, RecordDecl *RD2) {
+static bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1,
+                               RecordDecl *RD2) {
   if (RD1->isUnion() != RD2->isUnion())
     return false;
 
@@ -12005,7 +12035,7 @@
 }
 
 /// \brief Check if two types are layout-compatible in C++11 sense.
-bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) {
+static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) {
   if (T1.isNull() || T2.isNull())
     return false;
 
@@ -12039,11 +12069,9 @@
 
   return false;
 }
-} // end anonymous namespace
 
 //===--- CHECK: pointer_with_type_tag attribute: datatypes should match ----//
 
-namespace {
 /// \brief Given a type tag expression find the type tag itself.
 ///
 /// \param TypeExpr Type tag expression, as it appears in user's code.
@@ -12051,8 +12079,8 @@
 /// \param VD Declaration of an identifier that appears in a type tag.
 ///
 /// \param MagicValue Type tag magic value.
-bool FindTypeTagExpr(const Expr *TypeExpr, const ASTContext &Ctx,
-                     const ValueDecl **VD, uint64_t *MagicValue) {
+static bool FindTypeTagExpr(const Expr *TypeExpr, const ASTContext &Ctx,
+                            const ValueDecl **VD, uint64_t *MagicValue) {
   while(true) {
     if (!TypeExpr)
       return false;
@@ -12127,7 +12155,7 @@
 /// \param TypeInfo Information about the corresponding C type.
 ///
 /// \returns true if the corresponding C type was found.
-bool GetMatchingCType(
+static bool GetMatchingCType(
         const IdentifierInfo *ArgumentKind,
         const Expr *TypeExpr, const ASTContext &Ctx,
         const llvm::DenseMap<Sema::TypeTagMagicValue,
@@ -12170,7 +12198,6 @@
   TypeInfo = I->second;
   return true;
 }
-} // end anonymous namespace
 
 void Sema::RegisterTypeTagForDatatype(const IdentifierInfo *ArgumentKind,
                                       uint64_t MagicValue, QualType Type,
@@ -12185,8 +12212,7 @@
       TypeTagData(Type, LayoutCompatible, MustBeNull);
 }
 
-namespace {
-bool IsSameCharType(QualType T1, QualType T2) {
+static bool IsSameCharType(QualType T1, QualType T2) {
   const BuiltinType *BT1 = T1->getAs<BuiltinType>();
   if (!BT1)
     return false;
@@ -12203,14 +12229,21 @@
          (T1Kind == BuiltinType::Char_U && T2Kind == BuiltinType::UChar) ||
          (T1Kind == BuiltinType::Char_S && T2Kind == BuiltinType::SChar);
 }
-} // end anonymous namespace
 
 void Sema::CheckArgumentWithTypeTag(const ArgumentWithTypeTagAttr *Attr,
-                                    const Expr * const *ExprArgs) {
+                                    const ArrayRef<const Expr *> ExprArgs,
+                                    SourceLocation CallSiteLoc) {
   const IdentifierInfo *ArgumentKind = Attr->getArgumentKind();
   bool IsPointerAttr = Attr->getIsPointer();
 
-  const Expr *TypeTagExpr = ExprArgs[Attr->getTypeTagIdx()];
+  // Retrieve the argument representing the 'type_tag'.
+  unsigned TypeTagIdxAST = Attr->typeTagIdx().getASTIndex();
+  if (TypeTagIdxAST >= ExprArgs.size()) {
+    Diag(CallSiteLoc, diag::err_tag_index_out_of_range)
+        << 0 << Attr->typeTagIdx().getSourceIndex();
+    return;
+  }
+  const Expr *TypeTagExpr = ExprArgs[TypeTagIdxAST];
   bool FoundWrongKind;
   TypeTagData TypeInfo;
   if (!GetMatchingCType(ArgumentKind, TypeTagExpr, Context,
@@ -12223,7 +12256,14 @@
     return;
   }
 
-  const Expr *ArgumentExpr = ExprArgs[Attr->getArgumentIdx()];
+  // Retrieve the argument representing the 'arg_idx'.
+  unsigned ArgumentIdxAST = Attr->argumentIdx().getASTIndex();
+  if (ArgumentIdxAST >= ExprArgs.size()) {
+    Diag(CallSiteLoc, diag::err_tag_index_out_of_range)
+        << 1 << Attr->argumentIdx().getSourceIndex();
+    return;
+  }
+  const Expr *ArgumentExpr = ExprArgs[ArgumentIdxAST];
   if (IsPointerAttr) {
     // Skip implicit cast of pointer to `void *' (as a function argument).
     if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(ArgumentExpr))
@@ -12314,8 +12354,9 @@
                           MisalignedMember(Op));
       if (MA != MisalignedMembers.end() &&
           (T->isIntegerType() ||
-           (T->isPointerType() &&
-            Context.getTypeAlignInChars(T->getPointeeType()) <= MA->Alignment)))
+           (T->isPointerType() && (T->getPointeeType()->isIncompleteType() ||
+                                   Context.getTypeAlignInChars(
+                                       T->getPointeeType()) <= MA->Alignment))))
         MisalignedMembers.erase(MA);
     }
   }
@@ -12432,8 +12473,8 @@
 
 void Sema::CheckAddressOfPackedMember(Expr *rhs) {
   using namespace std::placeholders;
+
   RefersToMemberWithReducedAlignment(
       rhs, std::bind(&Sema::AddPotentialMisalignedMembers, std::ref(*this), _1,
                      _2, _3, _4));
 }
-
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index 90a15e1..6d4e583 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -14,6 +14,7 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/QualTypeNames.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/MacroInfo.h"
@@ -47,7 +48,7 @@
     /// the result set (when it returns true) and which declarations should be
     /// filtered out (returns false).
     typedef bool (ResultBuilder::*LookupFilter)(const NamedDecl *) const;
-    
+
     typedef CodeCompletionResult Result;
     
   private:
@@ -317,6 +318,11 @@
     /// \brief Ignore this declaration, if it is seen again.
     void Ignore(const Decl *D) { AllDeclsFound.insert(D->getCanonicalDecl()); }
 
+    /// \brief Add a visited context.
+    void addVisitedContext(DeclContext *Ctx) {
+      CompletionContext.addVisitedContext(Ctx);
+    }
+
     /// \name Name lookup predicates
     ///
     /// These predicates can be passed to the name lookup functions to filter the
@@ -669,7 +675,7 @@
 /// \brief Get the type that a given expression will have if this declaration
 /// is used as an expression in its "typical" code-completion form.
 QualType clang::getDeclUsageType(ASTContext &C, const NamedDecl *ND) {
-  ND = cast<NamedDecl>(ND->getUnderlyingDecl());
+  ND = ND->getUnderlyingDecl();
   
   if (const TypeDecl *Type = dyn_cast<TypeDecl>(ND))
     return C.getTypeDeclType(Type);
@@ -837,6 +843,12 @@
   }
 }
 
+static bool isConstructor(const Decl *ND) {
+  if (const auto *Tmpl = dyn_cast<FunctionTemplateDecl>(ND))
+    ND = Tmpl->getTemplatedDecl();
+  return isa<CXXConstructorDecl>(ND);
+}
+
 void ResultBuilder::MaybeAddResult(Result R, DeclContext *CurContext) {
   assert(!ShadowMaps.empty() && "Must enter into a results scope");
   
@@ -864,7 +876,7 @@
     return;
       
   // C++ constructors are never found by name lookup.
-  if (isa<CXXConstructorDecl>(R.Declaration))
+  if (isConstructor(R.Declaration))
     return;
 
   ShadowMap &SMap = ShadowMaps.back();
@@ -977,7 +989,7 @@
     return;
   
   // C++ constructors are never found by name lookup.
-  if (isa<CXXConstructorDecl>(R.Declaration))
+  if (isConstructor(R.Declaration))
     return;
 
   if (Hiding && CheckHiddenResult(R, CurContext, Hiding))
@@ -1062,7 +1074,7 @@
 /// \brief Determines whether this given declaration will be found by
 /// ordinary name lookup.
 bool ResultBuilder::IsOrdinaryName(const NamedDecl *ND) const {
-  ND = cast<NamedDecl>(ND->getUnderlyingDecl());
+  ND = ND->getUnderlyingDecl();
 
   // If name lookup finds a local extern declaration, then we are in a
   // context where it behaves like an ordinary name.
@@ -1080,10 +1092,17 @@
 /// \brief Determines whether this given declaration will be found by
 /// ordinary name lookup but is not a type name.
 bool ResultBuilder::IsOrdinaryNonTypeName(const NamedDecl *ND) const {
-  ND = cast<NamedDecl>(ND->getUnderlyingDecl());
-  if (isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND))
+  ND = ND->getUnderlyingDecl();
+  if (isa<TypeDecl>(ND))
     return false;
-  
+  // Objective-C interfaces names are not filtered by this method because they
+  // can be used in a class property expression. We can still filter out
+  // @class declarations though.
+  if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(ND)) {
+    if (!ID->getDefinition())
+      return false;
+  }
+
   unsigned IDNS = Decl::IDNS_Ordinary | Decl::IDNS_LocalExtern;
   if (SemaRef.getLangOpts().CPlusPlus)
     IDNS |= Decl::IDNS_Tag | Decl::IDNS_Namespace | Decl::IDNS_Member;
@@ -1109,7 +1128,7 @@
 /// \brief Determines whether this given declaration will be found by
 /// ordinary name lookup.
 bool ResultBuilder::IsOrdinaryNonValueName(const NamedDecl *ND) const {
-  ND = cast<NamedDecl>(ND->getUnderlyingDecl());
+  ND = ND->getUnderlyingDecl();
 
   unsigned IDNS = Decl::IDNS_Ordinary | Decl::IDNS_LocalExtern;
   if (SemaRef.getLangOpts().CPlusPlus)
@@ -1272,7 +1291,7 @@
   class CodeCompletionDeclConsumer : public VisibleDeclConsumer {
     ResultBuilder &Results;
     DeclContext *CurContext;
-    
+
   public:
     CodeCompletionDeclConsumer(ResultBuilder &Results, DeclContext *CurContext)
       : Results(Results), CurContext(CurContext) { }
@@ -1287,6 +1306,10 @@
                                    false, Accessible);
       Results.AddResult(Result, CurContext, Hiding, InBaseClass);
     }
+
+    void EnteredContext(DeclContext* Ctx) override {
+      Results.addVisitedContext(Ctx);
+    }
   };
 }
 
@@ -1412,7 +1435,7 @@
       Results.AddResult(Result("mutable"));
       Results.AddResult(Result("virtual"));
     }    
-    // Fall through
+    LLVM_FALLTHROUGH;
 
   case Sema::PCC_ObjCInterface:
   case Sema::PCC_ObjCImplementation:
@@ -1495,6 +1518,7 @@
   Policy.AnonymousTagLocations = false;
   Policy.SuppressStrongLifetime = true;
   Policy.SuppressUnwrittenScope = true;
+  Policy.SuppressScope = true;
   return Policy;
 }
 
@@ -1629,7 +1653,7 @@
       AddObjCTopLevelResults(Results, true);
       
     AddTypedefResult(Results);
-    // Fall through
+    LLVM_FALLTHROUGH;
 
   case Sema::PCC_Class:
     if (SemaRef.getLangOpts().CPlusPlus) {
@@ -1658,26 +1682,28 @@
       if (CCC == Sema::PCC_Class) {
         AddTypedefResult(Results);
 
+        bool IsNotInheritanceScope =
+            !(S->getFlags() & Scope::ClassInheritanceScope);
         // public:
         Builder.AddTypedTextChunk("public");
-        if (Results.includeCodePatterns())
+        if (IsNotInheritanceScope && Results.includeCodePatterns())
           Builder.AddChunk(CodeCompletionString::CK_Colon);
         Results.AddResult(Result(Builder.TakeString()));
 
         // protected:
         Builder.AddTypedTextChunk("protected");
-        if (Results.includeCodePatterns())
+        if (IsNotInheritanceScope && Results.includeCodePatterns())
           Builder.AddChunk(CodeCompletionString::CK_Colon);
         Results.AddResult(Result(Builder.TakeString()));
 
         // private:
         Builder.AddTypedTextChunk("private");
-        if (Results.includeCodePatterns())
+        if (IsNotInheritanceScope && Results.includeCodePatterns())
           Builder.AddChunk(CodeCompletionString::CK_Colon);
         Results.AddResult(Result(Builder.TakeString()));
       }
     }
-    // Fall through
+    LLVM_FALLTHROUGH;
 
   case Sema::PCC_Template:
   case Sema::PCC_MemberTemplate:
@@ -2125,7 +2151,7 @@
 
   // Skip constructors and conversion functions, which have their return types
   // built into their names.
-  if (isa<CXXConstructorDecl>(ND) || isa<CXXConversionDecl>(ND))
+  if (isConstructor(ND) || isa<CXXConversionDecl>(ND))
     return;
 
   // Determine the type of the declaration (if it has a type).
@@ -2137,9 +2163,10 @@
       T = Method->getSendResultType(BaseType);
     else
       T = Method->getReturnType();
-  } else if (const EnumConstantDecl *Enumerator = dyn_cast<EnumConstantDecl>(ND))
+  } else if (const EnumConstantDecl *Enumerator = dyn_cast<EnumConstantDecl>(ND)) {
     T = Context.getTypeDeclType(cast<TypeDecl>(Enumerator->getDeclContext()));
-  else if (isa<UnresolvedUsingValueDecl>(ND)) {
+    T = clang::TypeName::getFullyQualifiedType(T, Context);
+  } else if (isa<UnresolvedUsingValueDecl>(ND)) {
     /* Do nothing: ignore unresolved using declarations*/
   } else if (const ObjCIvarDecl *Ivar = dyn_cast<ObjCIvarDecl>(ND)) {
     if (!BaseType.isNull())
@@ -3366,12 +3393,9 @@
       return;
 
   PrintingPolicy Policy = getCompletionPrintingPolicy(S);
-  for (CXXMethodDecl::method_iterator M = Method->begin_overridden_methods(),
-                                   MEnd = Method->end_overridden_methods();
-       M != MEnd; ++M) {
+  for (const CXXMethodDecl *Overridden : Method->overridden_methods()) {
     CodeCompletionBuilder Builder(Results.getAllocator(),
                                   Results.getCodeCompletionTUInfo());
-    const CXXMethodDecl *Overridden = *M;
     if (Overridden->getCanonicalDecl() == Method->getCanonicalDecl())
       continue;
         
@@ -3515,7 +3539,8 @@
   
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
 
   AddOrdinaryNameResults(CompletionContext, S, *this, Results);
   Results.ExitScope();
@@ -3590,7 +3615,8 @@
       Results.setFilter(&ResultBuilder::IsImpossibleToSatisfy);
       CodeCompletionDeclConsumer Consumer(Results, CurContext);
       LookupVisibleDecls(S, LookupNestedNameSpecifierName, Consumer,
-                         CodeCompleter->includeGlobals());
+                         CodeCompleter->includeGlobals(),
+                         CodeCompleter->loadExternal());
       Results.setFilter(nullptr);
     }
   }
@@ -3660,8 +3686,9 @@
   
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
-  
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
+
   Results.EnterNewScope();
   AddOrdinaryNameResults(PCC_Expression, S, *this, Results);
   Results.ExitScope();
@@ -3930,7 +3957,8 @@
   CodeCompletionDeclConsumer Consumer(Results, SemaRef.CurContext);
   SemaRef.LookupVisibleDecls(RD, Sema::LookupMemberName, Consumer,
                              SemaRef.CodeCompleter->includeGlobals(),
-                             /*IncludeDependentBases=*/true);
+                             /*IncludeDependentBases=*/true,
+                             SemaRef.CodeCompleter->loadExternal());
 
   if (SemaRef.getLangOpts().CPlusPlus) {
     if (!Results.empty()) {
@@ -4040,8 +4068,9 @@
     if (Class) {
       CodeCompletionDeclConsumer Consumer(Results, CurContext);
       Results.setFilter(&ResultBuilder::IsObjCIvar);
-      LookupVisibleDecls(Class, LookupMemberName, Consumer,
-                         CodeCompleter->includeGlobals());
+      LookupVisibleDecls(
+          Class, LookupMemberName, Consumer, CodeCompleter->includeGlobals(),
+          /*IncludeDependentBases=*/false, CodeCompleter->loadExternal());
     }
   }
   
@@ -4115,12 +4144,15 @@
   // First pass: look for tags.
   Results.setFilter(Filter);
   LookupVisibleDecls(S, LookupTagName, Consumer,
-                     CodeCompleter->includeGlobals());
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
 
   if (CodeCompleter->includeGlobals()) {
     // Second pass: look for nested name specifiers.
     Results.setFilter(&ResultBuilder::IsNestedNameSpecifier);
-    LookupVisibleDecls(S, LookupNestedNameSpecifierName, Consumer);
+    LookupVisibleDecls(S, LookupNestedNameSpecifierName, Consumer,
+                       CodeCompleter->includeGlobals(),
+                       CodeCompleter->loadExternal());
   }
   
   HandleCodeCompleteResults(this, CodeCompleter, Results.getCompletionContext(),
@@ -4162,8 +4194,8 @@
   AddTypeQualifierResults(DS, Results, LangOpts);
   if (LangOpts.CPlusPlus11) {
     Results.AddResult("noexcept");
-    if (D.getContext() == Declarator::MemberContext && !D.isCtorOrDtor() &&
-        !D.isStaticMember()) {
+    if (D.getContext() == DeclaratorContext::MemberContext &&
+        !D.isCtorOrDtor() && !D.isStaticMember()) {
       if (!VS || !VS->isFinalSpecified())
         Results.AddResult("final");
       if (!VS || !VS->isOverrideSpecified())
@@ -4394,9 +4426,11 @@
     ArgExprs.append(Args.begin(), Args.end());
     UnresolvedSet<8> Decls;
     Decls.append(UME->decls_begin(), UME->decls_end());
+    const bool FirstArgumentIsBase = !UME->isImplicitAccess() && UME->getBase();
     AddFunctionCandidates(Decls, ArgExprs, CandidateSet, TemplateArgs,
                           /*SuppressUsedConversions=*/false,
-                          /*PartialOverloading=*/true);
+                          /*PartialOverloading=*/true,
+                          FirstArgumentIsBase);
   } else {
     FunctionDecl *FD = nullptr;
     if (auto MCE = dyn_cast<MemberExpr>(NakedFn))
@@ -4529,8 +4563,9 @@
   
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
-  
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
+
   AddOrdinaryNameResults(PCC_Statement, S, *this, Results);
   
   // "else" block
@@ -4589,9 +4624,19 @@
 
 void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
                                    bool EnteringContext) {
-  if (!SS.getScopeRep() || !CodeCompleter)
+  if (SS.isEmpty() || !CodeCompleter)
     return;
 
+  // We want to keep the scope specifier even if it's invalid (e.g. the scope
+  // "a::b::" is not corresponding to any context/namespace in the AST), since
+  // it can be useful for global code completion which have information about
+  // contexts/symbols that are not in the AST.
+  if (SS.isInvalid()) {
+    CodeCompletionContext CC(CodeCompletionContext::CCC_Name);
+    CC.setCXXScopeSpecifier(SS);
+    HandleCodeCompleteResults(this, CodeCompleter, CC, nullptr, 0);
+    return;
+  }
   // Always pretend to enter a context to ensure that a dependent type
   // resolves to a dependent record.
   DeclContext *Ctx = computeDeclContext(SS, /*EnteringContext=*/true);
@@ -4607,7 +4652,7 @@
                         CodeCompleter->getCodeCompletionTUInfo(),
                         CodeCompletionContext::CCC_Name);
   Results.EnterNewScope();
-  
+
   // The "template" keyword can follow "::" in the grammar, but only
   // put it into the grammar if the nested-name-specifier is dependent.
   NestedNameSpecifier *NNS = SS.getScopeRep();
@@ -4621,16 +4666,22 @@
   // qualified-id completions.
   if (!EnteringContext)
     MaybeAddOverrideCalls(*this, Ctx, Results);
-  Results.ExitScope();  
-  
-  CodeCompletionDeclConsumer Consumer(Results, CurContext);
-  LookupVisibleDecls(Ctx, LookupOrdinaryName, Consumer,
-                     /*IncludeGlobalScope=*/true,
-                     /*IncludeDependentBases=*/true);
+  Results.ExitScope();
 
-  HandleCodeCompleteResults(this, CodeCompleter, 
-                            Results.getCompletionContext(),
-                            Results.data(),Results.size());
+  if (CodeCompleter->includeNamespaceLevelDecls() ||
+      (!Ctx->isNamespace() && !Ctx->isTranslationUnit())) {
+    CodeCompletionDeclConsumer Consumer(Results, CurContext);
+    LookupVisibleDecls(Ctx, LookupOrdinaryName, Consumer,
+                       /*IncludeGlobalScope=*/true,
+                       /*IncludeDependentBases=*/true,
+                       CodeCompleter->loadExternal());
+  }
+
+  auto CC = Results.getCompletionContext();
+  CC.setCXXScopeSpecifier(SS);
+
+  HandleCodeCompleteResults(this, CodeCompleter, CC, Results.data(),
+                            Results.size());
 }
 
 void Sema::CodeCompleteUsing(Scope *S) {
@@ -4651,7 +4702,8 @@
   // nested-name-specifier.
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
   Results.ExitScope();
   
   HandleCodeCompleteResults(this, CodeCompleter, 
@@ -4672,7 +4724,8 @@
   Results.EnterNewScope();
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
   Results.ExitScope();
   HandleCodeCompleteResults(this, CodeCompleter, 
                             CodeCompletionContext::CCC_Namespace,
@@ -4738,7 +4791,8 @@
                         &ResultBuilder::IsNamespaceOrAlias);
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
   HandleCodeCompleteResults(this, CodeCompleter, 
                             Results.getCompletionContext(),
                             Results.data(),Results.size());
@@ -4765,8 +4819,9 @@
   Results.allowNestedNameSpecifiers();
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
-  
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
+
   // Add any type specifiers
   AddTypeSpecifierResults(getLangOpts(), Results);
   Results.ExitScope();
@@ -5594,8 +5649,9 @@
   Results.setFilter(&ResultBuilder::IsOrdinaryNonValueName);
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
-  
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
+
   if (CodeCompleter->includeMacros())
     AddMacroResults(PP, Results, false);
 
@@ -5808,8 +5864,9 @@
   CodeCompletionDeclConsumer Consumer(Results, CurContext);
   Results.EnterNewScope();
   LookupVisibleDecls(S, LookupOrdinaryName, Consumer,
-                     CodeCompleter->includeGlobals());
-  
+                     CodeCompleter->includeGlobals(),
+                     CodeCompleter->loadExternal());
+
   // If we are in an Objective-C method inside a class that has a superclass,
   // add "super" as an option.
   if (ObjCMethodDecl *Method = getCurMethodDecl())
@@ -6645,7 +6702,7 @@
 /// indexed by selector so they can be easily found.
 static void FindImplementableMethods(ASTContext &Context,
                                      ObjCContainerDecl *Container,
-                                     bool WantInstanceMethods,
+                                     Optional<bool> WantInstanceMethods,
                                      QualType ReturnType,
                                      KnownMethodsMap &KnownMethods,
                                      bool InOriginalClass = true) {
@@ -6716,7 +6773,7 @@
   // we want the methods from this container to override any methods
   // we've previously seen with the same selector.
   for (auto *M : Container->methods()) {
-    if (M->isInstanceMethod() == WantInstanceMethods) {
+    if (!WantInstanceMethods || M->isInstanceMethod() == *WantInstanceMethods) {
       if (!ReturnType.isNull() &&
           !Context.hasSameUnqualifiedType(ReturnType, M->getReturnType()))
         continue;
@@ -7388,8 +7445,7 @@
   }
 }
 
-void Sema::CodeCompleteObjCMethodDecl(Scope *S, 
-                                      bool IsInstanceMethod,
+void Sema::CodeCompleteObjCMethodDecl(Scope *S, Optional<bool> IsInstanceMethod,
                                       ParsedType ReturnTy) {
   // Determine the return type of the method we're declaring, if
   // provided.
@@ -7397,7 +7453,7 @@
   Decl *IDecl = nullptr;
   if (CurContext->isObjCContainer()) {
       ObjCContainerDecl *OCD = dyn_cast<ObjCContainerDecl>(CurContext);
-      IDecl = cast<Decl>(OCD);
+      IDecl = OCD;
   }
   // Determine where we should start searching for methods.
   ObjCContainerDecl *SearchDecl = nullptr;
@@ -7444,7 +7500,13 @@
     ObjCMethodDecl *Method = M->second.getPointer();
     CodeCompletionBuilder Builder(Results.getAllocator(),
                                   Results.getCodeCompletionTUInfo());
-    
+
+    // Add the '-'/'+' prefix if it wasn't provided yet.
+    if (!IsInstanceMethod) {
+      Builder.AddTextChunk(Method->isInstanceMethod() ? "-" : "+");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+    }
+
     // If the result type was not already provided, add it to the
     // pattern as (type).
     if (ReturnType.isNull()) {
@@ -7546,11 +7608,13 @@
     if (IFace)
       for (auto *Cat : IFace->visible_categories())
         Containers.push_back(Cat);
-    
-    for (unsigned I = 0, N = Containers.size(); I != N; ++I)
-      for (auto *P : Containers[I]->instance_properties())
-        AddObjCKeyValueCompletions(P, IsInstanceMethod, ReturnType, Context, 
-                                   KnownSelectors, Results);
+
+    if (IsInstanceMethod) {
+      for (unsigned I = 0, N = Containers.size(); I != N; ++I)
+        for (auto *P : Containers[I]->instance_properties())
+          AddObjCKeyValueCompletions(P, *IsInstanceMethod, ReturnType, Context,
+                                     KnownSelectors, Results);
+    }
   }
   
   Results.ExitScope();
@@ -7909,8 +7973,9 @@
   if (!CodeCompleter || CodeCompleter->includeGlobals()) {
     CodeCompletionDeclConsumer Consumer(Builder, 
                                         Context.getTranslationUnitDecl());
-    LookupVisibleDecls(Context.getTranslationUnitDecl(), LookupAnyName, 
-                       Consumer);
+    LookupVisibleDecls(Context.getTranslationUnitDecl(), LookupAnyName,
+                       Consumer,
+                       !CodeCompleter || CodeCompleter->loadExternal());
   }
   
   if (!CodeCompleter || CodeCompleter->includeMacros())
diff --git a/lib/Sema/SemaCoroutine.cpp b/lib/Sema/SemaCoroutine.cpp
index e6b640f..54dcc97 100644
--- a/lib/Sema/SemaCoroutine.cpp
+++ b/lib/Sema/SemaCoroutine.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CoroutineStmtBuilder.h"
+#include "clang/AST/ASTLambda.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/StmtCXX.h"
@@ -494,9 +495,58 @@
   CheckVariableDeclarationType(VD);
   if (VD->isInvalidDecl())
     return nullptr;
-  ActOnUninitializedDecl(VD);
+
+  auto *ScopeInfo = getCurFunction();
+  // Build a list of arguments, based on the coroutine functions arguments,
+  // that will be passed to the promise type's constructor.
+  llvm::SmallVector<Expr *, 4> CtorArgExprs;
+  auto &Moves = ScopeInfo->CoroutineParameterMoves;
+  for (auto *PD : FD->parameters()) {
+    if (PD->getType()->isDependentType())
+      continue;
+
+    auto RefExpr = ExprEmpty();
+    auto Move = Moves.find(PD);
+    assert(Move != Moves.end() &&
+           "Coroutine function parameter not inserted into move map");
+    // If a reference to the function parameter exists in the coroutine
+    // frame, use that reference.
+    auto *MoveDecl =
+        cast<VarDecl>(cast<DeclStmt>(Move->second)->getSingleDecl());
+    RefExpr =
+        BuildDeclRefExpr(MoveDecl, MoveDecl->getType().getNonReferenceType(),
+                         ExprValueKind::VK_LValue, FD->getLocation());
+    if (RefExpr.isInvalid())
+      return nullptr;
+    CtorArgExprs.push_back(RefExpr.get());
+  }
+
+  // Create an initialization sequence for the promise type using the
+  // constructor arguments, wrapped in a parenthesized list expression.
+  Expr *PLE = new (Context) ParenListExpr(Context, FD->getLocation(),
+                                          CtorArgExprs, FD->getLocation());
+  InitializedEntity Entity = InitializedEntity::InitializeVariable(VD);
+  InitializationKind Kind = InitializationKind::CreateForInit(
+      VD->getLocation(), /*DirectInit=*/true, PLE);
+  InitializationSequence InitSeq(*this, Entity, Kind, CtorArgExprs,
+                                 /*TopLevelOfInitList=*/false,
+                                 /*TreatUnavailableAsInvalid=*/false);
+
+  // Attempt to initialize the promise type with the arguments.
+  // If that fails, fall back to the promise type's default constructor.
+  if (InitSeq) {
+    ExprResult Result = InitSeq.Perform(*this, Entity, Kind, CtorArgExprs);
+    if (Result.isInvalid()) {
+      VD->setInvalidDecl();
+    } else if (Result.get()) {
+      VD->setInit(MaybeCreateExprWithCleanups(Result.get()));
+      VD->setInitStyle(VarDecl::CallInit);
+      CheckCompleteVariableDeclaration(VD);
+    }
+  } else
+    ActOnUninitializedDecl(VD);
+
   FD->addDecl(VD);
-  assert(!VD->isInvalidDecl());
   return VD;
 }
 
@@ -518,6 +568,9 @@
   if (ScopeInfo->CoroutinePromise)
     return ScopeInfo;
 
+  if (!S.buildCoroutineParameterMoves(Loc))
+    return nullptr;
+
   ScopeInfo->CoroutinePromise = S.buildCoroutinePromise(Loc);
   if (!ScopeInfo->CoroutinePromise)
     return nullptr;
@@ -861,6 +914,11 @@
           !Fn.CoroutinePromise ||
           Fn.CoroutinePromise->getType()->isDependentType()) {
   this->Body = Body;
+
+  for (auto KV : Fn.CoroutineParameterMoves)
+    this->ParamMovesVector.push_back(KV.second);
+  this->ParamMoves = this->ParamMovesVector;
+
   if (!IsPromiseDependentType) {
     PromiseRecordDecl = Fn.CoroutinePromise->getType()->getAsCXXRecordDecl();
     assert(PromiseRecordDecl && "Type should have already been checked");
@@ -870,7 +928,7 @@
 
 bool CoroutineStmtBuilder::buildStatements() {
   assert(this->IsValid && "coroutine already invalid");
-  this->IsValid = makeReturnObject() && makeParamMoves();
+  this->IsValid = makeReturnObject();
   if (this->IsValid && !IsPromiseDependentType)
     buildDependentStatements();
   return this->IsValid;
@@ -886,12 +944,6 @@
   return this->IsValid;
 }
 
-bool CoroutineStmtBuilder::buildParameterMoves() {
-  assert(this->IsValid && "coroutine already invalid");
-  assert(this->ParamMoves.empty() && "param moves already built");
-  return this->IsValid = makeParamMoves();
-}
-
 bool CoroutineStmtBuilder::makePromiseStmt() {
   // Form a declaration statement for the promise declaration, so that AST
   // visitors can more easily find it.
@@ -990,7 +1042,12 @@
 
   const bool RequiresNoThrowAlloc = ReturnStmtOnAllocFailure != nullptr;
 
-  // FIXME: Add support for stateful allocators.
+  // [dcl.fct.def.coroutine]/7
+  // Lookup allocation functions using a parameter list composed of the
+  // requested size of the coroutine state being allocated, followed by
+  // the coroutine function's arguments. If a matching allocation function
+  // exists, use it. Otherwise, use an allocation function that just takes
+  // the requested size.
 
   FunctionDecl *OperatorNew = nullptr;
   FunctionDecl *OperatorDelete = nullptr;
@@ -998,10 +1055,62 @@
   bool PassAlignment = false;
   SmallVector<Expr *, 1> PlacementArgs;
 
+  // [dcl.fct.def.coroutine]/7
+  // "The allocation function’s name is looked up in the scope of P.
+  // [...] If the lookup finds an allocation function in the scope of P,
+  // overload resolution is performed on a function call created by assembling
+  // an argument list. The first argument is the amount of space requested,
+  // and has type std::size_t. The lvalues p1 ... pn are the succeeding
+  // arguments."
+  //
+  // ...where "p1 ... pn" are defined earlier as:
+  //
+  // [dcl.fct.def.coroutine]/3
+  // "For a coroutine f that is a non-static member function, let P1 denote the
+  // type of the implicit object parameter (13.3.1) and P2 ... Pn be the types
+  // of the function parameters; otherwise let P1 ... Pn be the types of the
+  // function parameters. Let p1 ... pn be lvalues denoting those objects."
+  if (auto *MD = dyn_cast<CXXMethodDecl>(&FD)) {
+    if (MD->isInstance() && !isLambdaCallOperator(MD)) {
+      ExprResult ThisExpr = S.ActOnCXXThis(Loc);
+      if (ThisExpr.isInvalid())
+        return false;
+      ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get());
+      if (ThisExpr.isInvalid())
+        return false;
+      PlacementArgs.push_back(ThisExpr.get());
+    }
+  }
+  for (auto *PD : FD.parameters()) {
+    if (PD->getType()->isDependentType())
+      continue;
+
+    // Build a reference to the parameter.
+    auto PDLoc = PD->getLocation();
+    ExprResult PDRefExpr =
+        S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(),
+                           ExprValueKind::VK_LValue, PDLoc);
+    if (PDRefExpr.isInvalid())
+      return false;
+
+    PlacementArgs.push_back(PDRefExpr.get());
+  }
   S.FindAllocationFunctions(Loc, SourceRange(),
                             /*UseGlobal*/ false, PromiseType,
                             /*isArray*/ false, PassAlignment, PlacementArgs,
-                            OperatorNew, UnusedResult);
+                            OperatorNew, UnusedResult, /*Diagnose*/ false);
+
+  // [dcl.fct.def.coroutine]/7
+  // "If no matching function is found, overload resolution is performed again
+  // on a function call created by passing just the amount of space required as
+  // an argument of type std::size_t."
+  if (!OperatorNew && !PlacementArgs.empty()) {
+    PlacementArgs.clear();
+    S.FindAllocationFunctions(Loc, SourceRange(),
+                              /*UseGlobal*/ false, PromiseType,
+                              /*isArray*/ false, PassAlignment,
+                              PlacementArgs, OperatorNew, UnusedResult);
+  }
 
   bool IsGlobalOverload =
       OperatorNew && !isa<CXXRecordDecl>(OperatorNew->getDeclContext());
@@ -1020,7 +1129,8 @@
                               OperatorNew, UnusedResult);
   }
 
-  assert(OperatorNew && "expected definition of operator new to be found");
+  if (!OperatorNew)
+    return false;
 
   if (RequiresNoThrowAlloc) {
     const auto *FT = OperatorNew->getType()->getAs<FunctionProtoType>();
@@ -1256,10 +1366,6 @@
   if (Res.isInvalid())
     return false;
 
-  if (GroType == FnRetType) {
-    GroDecl->setNRVOVariable(true);
-  }
-
   S.AddInitializerToDecl(GroDecl, Res.get(),
                          /*DirectInit=*/false);
 
@@ -1283,6 +1389,8 @@
     noteMemberDeclaredHere(S, ReturnValue, Fn);
     return false;
   }
+  if (cast<clang::ReturnStmt>(ReturnStmt.get())->getNRVOCandidate() == GroDecl)
+    GroDecl->setNRVOVariable(true);
 
   this->ReturnStmt = ReturnStmt.get();
   return true;
@@ -1304,47 +1412,53 @@
       .get();
 }
 
-
 /// \brief Build a variable declaration for move parameter.
 static VarDecl *buildVarDecl(Sema &S, SourceLocation Loc, QualType Type,
                              IdentifierInfo *II) {
   TypeSourceInfo *TInfo = S.Context.getTrivialTypeSourceInfo(Type, Loc);
-  VarDecl *Decl =
-      VarDecl::Create(S.Context, S.CurContext, Loc, Loc, II, Type, TInfo, SC_None);
+  VarDecl *Decl = VarDecl::Create(S.Context, S.CurContext, Loc, Loc, II, Type,
+                                  TInfo, SC_None);
   Decl->setImplicit();
   return Decl;
 }
 
-bool CoroutineStmtBuilder::makeParamMoves() {
-  for (auto *paramDecl : FD.parameters()) {
-    auto Ty = paramDecl->getType();
-    if (Ty->isDependentType())
+// Build statements that move coroutine function parameters to the coroutine
+// frame, and store them on the function scope info.
+bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) {
+  assert(isa<FunctionDecl>(CurContext) && "not in a function scope");
+  auto *FD = cast<FunctionDecl>(CurContext);
+
+  auto *ScopeInfo = getCurFunction();
+  assert(ScopeInfo->CoroutineParameterMoves.empty() &&
+         "Should not build parameter moves twice");
+
+  for (auto *PD : FD->parameters()) {
+    if (PD->getType()->isDependentType())
       continue;
 
-    // No need to copy scalars, llvm will take care of them.
-    if (Ty->getAsCXXRecordDecl()) {
-      ExprResult ParamRef =
-          S.BuildDeclRefExpr(paramDecl, paramDecl->getType(),
-                             ExprValueKind::VK_LValue, Loc); // FIXME: scope?
-      if (ParamRef.isInvalid())
-        return false;
+    ExprResult PDRefExpr =
+        BuildDeclRefExpr(PD, PD->getType().getNonReferenceType(),
+                         ExprValueKind::VK_LValue, Loc); // FIXME: scope?
+    if (PDRefExpr.isInvalid())
+      return false;
 
-      Expr *RCast = castForMoving(S, ParamRef.get());
+    Expr *CExpr = nullptr;
+    if (PD->getType()->getAsCXXRecordDecl() ||
+        PD->getType()->isRValueReferenceType())
+      CExpr = castForMoving(*this, PDRefExpr.get());
+    else
+      CExpr = PDRefExpr.get();
 
-      auto D = buildVarDecl(S, Loc, Ty, paramDecl->getIdentifier());
-      S.AddInitializerToDecl(D, RCast, /*DirectInit=*/true);
+    auto D = buildVarDecl(*this, Loc, PD->getType(), PD->getIdentifier());
+    AddInitializerToDecl(D, CExpr, /*DirectInit=*/true);
 
-      // Convert decl to a statement.
-      StmtResult Stmt = S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(D), Loc, Loc);
-      if (Stmt.isInvalid())
-        return false;
+    // Convert decl to a statement.
+    StmtResult Stmt = ActOnDeclStmt(ConvertDeclToDeclGroup(D), Loc, Loc);
+    if (Stmt.isInvalid())
+      return false;
 
-      ParamMovesVector.push_back(Stmt.get());
-    }
+    ScopeInfo->CoroutineParameterMoves.insert(std::make_pair(PD, Stmt.get()));
   }
-
-  // Convert to ArrayRef in CtorArgs structure that builder inherits from.
-  ParamMoves = ParamMovesVector;
   return true;
 }
 
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index d75a469..65b6d06 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -281,7 +281,7 @@
                              IdentifierInfo **CorrectedII) {
   // FIXME: Consider allowing this outside C++1z mode as an extension.
   bool AllowDeducedTemplate = IsClassTemplateDeductionContext &&
-                              getLangOpts().CPlusPlus1z && !IsCtorOrDtorName &&
+                              getLangOpts().CPlusPlus17 && !IsCtorOrDtorName &&
                               !isClassName && !HasTrailingDot;
 
   // Determine where we will perform name lookup.
@@ -1519,9 +1519,7 @@
 
   if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(D))
     return CD->isCopyConstructor();
-  if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
-    return Method->isCopyAssignmentOperator();
-  return false;
+  return D->isCopyAssignmentOperator();
 }
 
 // We need this to handle
@@ -2495,7 +2493,7 @@
   else if (const auto *UA = dyn_cast<UuidAttr>(Attr))
     NewAttr = S.mergeUuidAttr(D, UA->getRange(), AttrSpellingListIndex,
                               UA->getGuid());
-  else if (Attr->duplicatesAllowed() || !DeclHasAttr(D, Attr))
+  else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
     NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));
 
   if (NewAttr) {
@@ -2929,6 +2927,48 @@
   return std::equal(A->param_begin(), A->param_end(), B->param_begin(), AttrEq);
 }
 
+/// If necessary, adjust the semantic declaration context for a qualified
+/// declaration to name the correct inline namespace within the qualifier.
+static void adjustDeclContextForDeclaratorDecl(DeclaratorDecl *NewD,
+                                               DeclaratorDecl *OldD) {
+  // The only case where we need to update the DeclContext is when
+  // redeclaration lookup for a qualified name finds a declaration
+  // in an inline namespace within the context named by the qualifier:
+  //
+  //   inline namespace N { int f(); }
+  //   int ::f(); // Sema DC needs adjusting from :: to N::.
+  //
+  // For unqualified declarations, the semantic context *can* change
+  // along the redeclaration chain (for local extern declarations,
+  // extern "C" declarations, and friend declarations in particular).
+  if (!NewD->getQualifier())
+    return;
+
+  // NewD is probably already in the right context.
+  auto *NamedDC = NewD->getDeclContext()->getRedeclContext();
+  auto *SemaDC = OldD->getDeclContext()->getRedeclContext();
+  if (NamedDC->Equals(SemaDC))
+    return;
+
+  assert((NamedDC->InEnclosingNamespaceSetOf(SemaDC) ||
+          NewD->isInvalidDecl() || OldD->isInvalidDecl()) &&
+         "unexpected context for redeclaration");
+
+  auto *LexDC = NewD->getLexicalDeclContext();
+  auto FixSemaDC = [=](NamedDecl *D) {
+    if (!D)
+      return;
+    D->setDeclContext(SemaDC);
+    D->setLexicalDeclContext(LexDC);
+  };
+
+  FixSemaDC(NewD);
+  if (auto *FD = dyn_cast<FunctionDecl>(NewD))
+    FixSemaDC(FD->getDescribedFunctionTemplate());
+  else if (auto *VD = dyn_cast<VarDecl>(NewD))
+    FixSemaDC(VD->getDescribedVarTemplate());
+}
+
 /// MergeFunctionDecl - We just parsed a function 'New' from
 /// declarator D which has the same name and scope as a previous
 /// declaration 'Old'.  Figure out how to resolve this situation,
@@ -3581,6 +3621,8 @@
          ni = newMethod->param_begin(), ne = newMethod->param_end();
        ni != ne && oi != oe; ++ni, ++oi)
     mergeParamDeclAttributes(*ni, *oi, *this);
+
+  CheckObjCMethodOverride(newMethod, oldMethod);
 }
 
 static void diagnoseVarDeclTypeMismatch(Sema &S, VarDecl *New, VarDecl* Old) {
@@ -3953,6 +3995,7 @@
   New->setPreviousDecl(Old);
   if (NewTemplate)
     NewTemplate->setPreviousDecl(OldTemplate);
+  adjustDeclContextForDeclaratorDecl(New, Old);
 
   // Inherit access appropriately.
   New->setAccess(Old->getAccess());
@@ -4199,7 +4242,7 @@
 
   if (DS.isInlineSpecified())
     Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
 
   if (DS.isConstexprSpecified()) {
     // C++0x [dcl.constexpr]p1: constexpr can only be applied to declarations
@@ -4213,14 +4256,6 @@
     return TagD;
   }
 
-  if (DS.isConceptSpecified()) {
-    // C++ Concepts TS [dcl.spec.concept]p1: A concept definition refers to
-    // either a function concept and its definition or a variable concept and
-    // its initializer.
-    Diag(DS.getConceptSpecLoc(), diag::err_concept_wrong_decl_kind);
-    return TagD;
-  }
-
   DiagnoseFunctionSpecifiers(DS);
 
   if (DS.isFriendSpecified()) {
@@ -4754,7 +4789,7 @@
   }
 
   // Mock up a declarator.
-  Declarator Dc(DS, Declarator::MemberContext);
+  Declarator Dc(DS, DeclaratorContext::MemberContext);
   TypeSourceInfo *TInfo = GetTypeForDeclarator(Dc, S);
   assert(TInfo && "couldn't build declarator info for anonymous struct/union");
 
@@ -4851,7 +4886,7 @@
   assert(Record && "expected a record!");
 
   // Mock up a declarator.
-  Declarator Dc(DS, Declarator::TypeNameContext);
+  Declarator Dc(DS, DeclaratorContext::TypeNameContext);
   TypeSourceInfo *TInfo = GetTypeForDeclarator(Dc, S);
   assert(TInfo && "couldn't build declarator info for anonymous struct");
 
@@ -4905,13 +4940,13 @@
 
   switch (Name.getKind()) {
 
-  case UnqualifiedId::IK_ImplicitSelfParam:
-  case UnqualifiedId::IK_Identifier:
+  case UnqualifiedIdKind::IK_ImplicitSelfParam:
+  case UnqualifiedIdKind::IK_Identifier:
     NameInfo.setName(Name.Identifier);
     NameInfo.setLoc(Name.StartLocation);
     return NameInfo;
 
-  case UnqualifiedId::IK_DeductionGuideName: {
+  case UnqualifiedIdKind::IK_DeductionGuideName: {
     // C++ [temp.deduct.guide]p3:
     //   The simple-template-id shall name a class template specialization.
     //   The template-name shall be the same identifier as the template-name
@@ -4939,7 +4974,7 @@
     return NameInfo;
   }
 
-  case UnqualifiedId::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
     NameInfo.setName(Context.DeclarationNames.getCXXOperatorName(
                                            Name.OperatorFunctionId.Operator));
     NameInfo.setLoc(Name.StartLocation);
@@ -4949,14 +4984,14 @@
       = Name.EndLocation.getRawEncoding();
     return NameInfo;
 
-  case UnqualifiedId::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
     NameInfo.setName(Context.DeclarationNames.getCXXLiteralOperatorName(
                                                            Name.Identifier));
     NameInfo.setLoc(Name.StartLocation);
     NameInfo.setCXXLiteralOperatorNameLoc(Name.EndLocation);
     return NameInfo;
 
-  case UnqualifiedId::IK_ConversionFunctionId: {
+  case UnqualifiedIdKind::IK_ConversionFunctionId: {
     TypeSourceInfo *TInfo;
     QualType Ty = GetTypeFromParser(Name.ConversionFunctionId, &TInfo);
     if (Ty.isNull())
@@ -4968,7 +5003,7 @@
     return NameInfo;
   }
 
-  case UnqualifiedId::IK_ConstructorName: {
+  case UnqualifiedIdKind::IK_ConstructorName: {
     TypeSourceInfo *TInfo;
     QualType Ty = GetTypeFromParser(Name.ConstructorName, &TInfo);
     if (Ty.isNull())
@@ -4980,7 +5015,7 @@
     return NameInfo;
   }
 
-  case UnqualifiedId::IK_ConstructorTemplateId: {
+  case UnqualifiedIdKind::IK_ConstructorTemplateId: {
     // In well-formed code, we can only have a constructor
     // template-id that refers to the current context, so go there
     // to find the actual type being constructed.
@@ -5003,7 +5038,7 @@
     return NameInfo;
   }
 
-  case UnqualifiedId::IK_DestructorName: {
+  case UnqualifiedIdKind::IK_DestructorName: {
     TypeSourceInfo *TInfo;
     QualType Ty = GetTypeFromParser(Name.DestructorName, &TInfo);
     if (Ty.isNull())
@@ -5015,7 +5050,7 @@
     return NameInfo;
   }
 
-  case UnqualifiedId::IK_TemplateId: {
+  case UnqualifiedIdKind::IK_TemplateId: {
     TemplateName TName = Name.TemplateId->Template.get();
     SourceLocation TNameLoc = Name.TemplateId->TemplateNameLoc;
     return Context.getNameForTemplate(TName, TNameLoc);
@@ -5459,23 +5494,6 @@
   if (getLangOpts().CPlusPlus)
     CheckExtraCXXDefaultArguments(D);
 
-  if (D.getDeclSpec().isConceptSpecified()) {
-    // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
-    // applied only to the definition of a function template or variable
-    // template, declared in namespace scope
-    if (!TemplateParamLists.size()) {
-      Diag(D.getDeclSpec().getConceptSpecLoc(),
-           diag:: err_concept_wrong_decl_kind);
-      return nullptr;
-    }
-
-    if (!DC->getRedeclContext()->isFileContext()) {
-      Diag(D.getIdentifierLoc(),
-           diag::err_concept_decls_may_only_appear_in_namespace_scope);
-      return nullptr;
-    }
-  }
-
   NamedDecl *New;
 
   bool AddToScope = true;
@@ -5690,16 +5708,13 @@
 
   if (D.getDeclSpec().isInlineSpecified())
     Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
   if (D.getDeclSpec().isConstexprSpecified())
     Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 1;
-  if (D.getDeclSpec().isConceptSpecified())
-    Diag(D.getDeclSpec().getConceptSpecLoc(),
-         diag::err_concept_wrong_decl_kind);
 
-  if (D.getName().Kind != UnqualifiedId::IK_Identifier) {
-    if (D.getName().Kind == UnqualifiedId::IK_DeductionGuideName)
+  if (D.getName().Kind != UnqualifiedIdKind::IK_Identifier) {
+    if (D.getName().Kind == UnqualifiedIdKind::IK_DeductionGuideName)
       Diag(D.getName().StartLocation,
            diag::err_deduction_guide_invalid_specifier)
           << "typedef";
@@ -6362,7 +6377,7 @@
     // Suppress the warning in system macros, it's used in macros in some
     // popular C system headers, such as in glibc's htonl() macro.
     Diag(D.getDeclSpec().getStorageClassSpecLoc(),
-         getLangOpts().CPlusPlus1z ? diag::ext_register_storage_class
+         getLangOpts().CPlusPlus17 ? diag::ext_register_storage_class
                                    : diag::warn_deprecated_register)
       << FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
   }
@@ -6453,7 +6468,7 @@
     TemplateParams = MatchTemplateParametersToScopeSpecifier(
         D.getDeclSpec().getLocStart(), D.getIdentifierLoc(),
         D.getCXXScopeSpec(),
-        D.getName().getKind() == UnqualifiedId::IK_TemplateId
+        D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
             ? D.getName().TemplateId
             : nullptr,
         TemplateParamLists,
@@ -6461,7 +6476,7 @@
 
     if (TemplateParams) {
       if (!TemplateParams->size() &&
-          D.getName().getKind() != UnqualifiedId::IK_TemplateId) {
+          D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
         // There is an extraneous 'template<>' for this variable. Complain
         // about it, but allow the declaration of the variable.
         Diag(TemplateParams->getTemplateLoc(),
@@ -6471,7 +6486,7 @@
                          TemplateParams->getRAngleLoc());
         TemplateParams = nullptr;
       } else {
-        if (D.getName().getKind() == UnqualifiedId::IK_TemplateId) {
+        if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
           // This is an explicit specialization or a partial specialization.
           // FIXME: Check that we can declare a specialization here.
           IsVariableTemplateSpecialization = true;
@@ -6492,9 +6507,9 @@
         }
       }
     } else {
-      assert(
-          (Invalid || D.getName().getKind() != UnqualifiedId::IK_TemplateId) &&
-          "should have a 'template<>' for this decl");
+      assert((Invalid ||
+              D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) &&
+             "should have a 'template<>' for this decl");
     }
 
     if (IsVariableTemplateSpecialization) {
@@ -6550,49 +6565,9 @@
       // C++1z [dcl.spec.constexpr]p1:
       //   A static data member declared with the constexpr specifier is
       //   implicitly an inline variable.
-      if (NewVD->isStaticDataMember() && getLangOpts().CPlusPlus1z)
+      if (NewVD->isStaticDataMember() && getLangOpts().CPlusPlus17)
         NewVD->setImplicitlyInline();
     }
-
-    if (D.getDeclSpec().isConceptSpecified()) {
-      if (VarTemplateDecl *VTD = NewVD->getDescribedVarTemplate())
-        VTD->setConcept();
-
-      // C++ Concepts TS [dcl.spec.concept]p2: A concept definition shall not
-      // be declared with the thread_local, inline, friend, or constexpr
-      // specifiers, [...]
-      if (D.getDeclSpec().getThreadStorageClassSpec() == TSCS_thread_local) {
-        Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
-             diag::err_concept_decl_invalid_specifiers)
-            << 0 << 0;
-        NewVD->setInvalidDecl(true);
-      }
-
-      if (D.getDeclSpec().isConstexprSpecified()) {
-        Diag(D.getDeclSpec().getConstexprSpecLoc(),
-             diag::err_concept_decl_invalid_specifiers)
-            << 0 << 3;
-        NewVD->setInvalidDecl(true);
-      }
-
-      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
-      // applied only to the definition of a function template or variable
-      // template, declared in namespace scope.
-      if (IsVariableTemplateSpecialization) {
-        Diag(D.getDeclSpec().getConceptSpecLoc(),
-             diag::err_concept_specified_specialization)
-            << (IsPartialSpecialization ? 2 : 1);
-      }
-
-      // C++ Concepts TS [dcl.spec.concept]p6: A variable concept has the
-      // following restrictions:
-      // - The declared type shall have the type bool.
-      if (!Context.hasSameType(NewVD->getType(), Context.BoolTy) &&
-          !NewVD->isInvalidDecl()) {
-        Diag(D.getIdentifierLoc(), diag::err_variable_concept_bool_decl);
-        NewVD->setInvalidDecl(true);
-      }
-    }
   }
 
   if (D.getDeclSpec().isInlineSpecified()) {
@@ -6606,7 +6581,7 @@
         << FixItHint::CreateRemoval(D.getDeclSpec().getInlineSpecLoc());
     } else {
       Diag(D.getDeclSpec().getInlineSpecLoc(),
-           getLangOpts().CPlusPlus1z ? diag::warn_cxx14_compat_inline_variable
+           getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_inline_variable
                                      : diag::ext_inline_variable);
       NewVD->setInlineSpecified();
     }
@@ -6843,25 +6818,6 @@
     if (!IsVariableTemplateSpecialization)
       D.setRedeclaration(CheckVariableDeclaration(NewVD, Previous));
 
-    // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare [...]
-    // an explicit specialization (14.8.3) or a partial specialization of a
-    // concept definition.
-    if (IsVariableTemplateSpecialization &&
-        !D.getDeclSpec().isConceptSpecified() && !Previous.empty() &&
-        Previous.isSingleResult()) {
-      NamedDecl *PreviousDecl = Previous.getFoundDecl();
-      if (VarTemplateDecl *VarTmpl = dyn_cast<VarTemplateDecl>(PreviousDecl)) {
-        if (VarTmpl->isConcept()) {
-          Diag(NewVD->getLocation(), diag::err_concept_specialized)
-              << 1                            /*variable*/
-              << (IsPartialSpecialization ? 2 /*partially specialized*/
-                                          : 1 /*explicitly specialized*/);
-          Diag(VarTmpl->getLocation(), diag::note_previous_declaration);
-          NewVD->setInvalidDecl();
-        }
-      }
-    }
-
     if (NewTemplate) {
       VarTemplateDecl *PrevVarTemplate =
           NewVD->getPreviousDecl()
@@ -6929,9 +6885,9 @@
   }
 
   if (D.isRedeclaration() && !Previous.empty()) {
-    checkDLLAttributeRedeclaration(
-        *this, dyn_cast<NamedDecl>(Previous.getRepresentativeDecl()), NewVD,
-        IsMemberSpecialization, D.isFunctionDefinition());
+    NamedDecl *Prev = Previous.getRepresentativeDecl();
+    checkDLLAttributeRedeclaration(*this, Prev, NewVD, IsMemberSpecialization,
+                                   D.isFunctionDefinition());
   }
 
   if (NewTemplate) {
@@ -7604,16 +7560,14 @@
 static void ReportOverrides(Sema& S, unsigned DiagID, const CXXMethodDecl *MD,
                             OverrideErrorKind OEK = OEK_All) {
   S.Diag(MD->getLocation(), DiagID) << MD->getDeclName();
-  for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-                                      E = MD->end_overridden_methods();
-       I != E; ++I) {
+  for (const CXXMethodDecl *O : MD->overridden_methods()) {
     // This check (& the OEK parameter) could be replaced by a predicate, but
     // without lambdas that would be overkill. This is still nicer than writing
     // out the diag loop 3 times.
     if ((OEK == OEK_All) ||
-        (OEK == OEK_NonDeleted && !(*I)->isDeleted()) ||
-        (OEK == OEK_Deleted && (*I)->isDeleted()))
-      S.Diag((*I)->getLocation(), diag::note_overridden_virtual_function);
+        (OEK == OEK_NonDeleted && !O->isDeleted()) ||
+        (OEK == OEK_Deleted && O->isDeleted()))
+      S.Diag(O->getLocation(), diag::note_overridden_virtual_function);
   }
 }
 
@@ -8315,7 +8269,6 @@
     bool isVirtual = D.getDeclSpec().isVirtualSpecified();
     bool isExplicit = D.getDeclSpec().isExplicitSpecified();
     bool isConstexpr = D.getDeclSpec().isConstexprSpecified();
-    bool isConcept = D.getDeclSpec().isConceptSpecified();
     isFriend = D.getDeclSpec().isFriendSpecified();
     if (isFriend && !isInline && D.isFunctionDefinition()) {
       // C++ [class.friend]p5
@@ -8351,7 +8304,7 @@
             MatchTemplateParametersToScopeSpecifier(
                 D.getDeclSpec().getLocStart(), D.getIdentifierLoc(),
                 D.getCXXScopeSpec(),
-                D.getName().getKind() == UnqualifiedId::IK_TemplateId
+                D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
                     ? D.getName().TemplateId
                     : nullptr,
                 TemplateParamLists, isFriend, isMemberSpecialization,
@@ -8408,7 +8361,7 @@
           // and clearly the user wants a template specialization.  So
           // we need to insert '<>' after the name.
           SourceLocation InsertLoc;
-          if (D.getName().getKind() != UnqualifiedId::IK_TemplateId) {
+          if (D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
             InsertLoc = D.getName().getSourceRange().getEnd();
             InsertLoc = getLocForEndOfToken(InsertLoc);
           }
@@ -8527,89 +8480,6 @@
         Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_constexpr_dtor);
     }
 
-    if (isConcept) {
-      // This is a function concept.
-      if (FunctionTemplateDecl *FTD = NewFD->getDescribedFunctionTemplate())
-        FTD->setConcept();
-
-      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
-      // applied only to the definition of a function template [...]
-      if (!D.isFunctionDefinition()) {
-        Diag(D.getDeclSpec().getConceptSpecLoc(),
-             diag::err_function_concept_not_defined);
-        NewFD->setInvalidDecl();
-      }
-
-      // C++ Concepts TS [dcl.spec.concept]p1: [...] A function concept shall
-      // have no exception-specification and is treated as if it were specified
-      // with noexcept(true) (15.4). [...]
-      if (const FunctionProtoType *FPT = R->getAs<FunctionProtoType>()) {
-        if (FPT->hasExceptionSpec()) {
-          SourceRange Range;
-          if (D.isFunctionDeclarator())
-            Range = D.getFunctionTypeInfo().getExceptionSpecRange();
-          Diag(NewFD->getLocation(), diag::err_function_concept_exception_spec)
-              << FixItHint::CreateRemoval(Range);
-          NewFD->setInvalidDecl();
-        } else {
-          Context.adjustExceptionSpec(NewFD, EST_BasicNoexcept);
-        }
-
-        // C++ Concepts TS [dcl.spec.concept]p5: A function concept has the
-        // following restrictions:
-        // - The declared return type shall have the type bool.
-        if (!Context.hasSameType(FPT->getReturnType(), Context.BoolTy)) {
-          Diag(D.getIdentifierLoc(), diag::err_function_concept_bool_ret);
-          NewFD->setInvalidDecl();
-        }
-
-        // C++ Concepts TS [dcl.spec.concept]p5: A function concept has the
-        // following restrictions:
-        // - The declaration's parameter list shall be equivalent to an empty
-        //   parameter list.
-        if (FPT->getNumParams() > 0 || FPT->isVariadic())
-          Diag(NewFD->getLocation(), diag::err_function_concept_with_params);
-      }
-
-      // C++ Concepts TS [dcl.spec.concept]p2: Every concept definition is
-      // implicity defined to be a constexpr declaration (implicitly inline)
-      NewFD->setImplicitlyInline();
-
-      // C++ Concepts TS [dcl.spec.concept]p2: A concept definition shall not
-      // be declared with the thread_local, inline, friend, or constexpr
-      // specifiers, [...]
-      if (isInline) {
-        Diag(D.getDeclSpec().getInlineSpecLoc(),
-             diag::err_concept_decl_invalid_specifiers)
-            << 1 << 1;
-        NewFD->setInvalidDecl(true);
-      }
-
-      if (isFriend) {
-        Diag(D.getDeclSpec().getFriendSpecLoc(),
-             diag::err_concept_decl_invalid_specifiers)
-            << 1 << 2;
-        NewFD->setInvalidDecl(true);
-      }
-
-      if (isConstexpr) {
-        Diag(D.getDeclSpec().getConstexprSpecLoc(),
-             diag::err_concept_decl_invalid_specifiers)
-            << 1 << 3;
-        NewFD->setInvalidDecl(true);
-      }
-
-      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
-      // applied only to the definition of a function template or variable
-      // template, declared in namespace scope.
-      if (isFunctionTemplateSpecialization) {
-        Diag(D.getDeclSpec().getConceptSpecLoc(),
-             diag::err_concept_specified_specialization) << 1;
-        NewFD->setInvalidDecl(true);
-        return NewFD;
-      }
-    }
-
     // If __module_private__ was specified, mark the function accordingly.
     if (D.getDeclSpec().isModulePrivateSpecified()) {
       if (isFunctionTemplateSpecialization) {
@@ -8892,7 +8762,7 @@
 
     // If the declarator is a template-id, translate the parser's template
     // argument list into our AST format.
-    if (D.getName().getKind() == UnqualifiedId::IK_TemplateId) {
+    if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
       TemplateIdAnnotation *TemplateId = D.getName().TemplateId;
       TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc);
       TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc);
@@ -8962,10 +8832,10 @@
           diag::ext_function_specialization_in_class :
           diag::err_function_specialization_in_class)
           << NewFD->getDeclName();
-      } else if (CheckFunctionTemplateSpecialization(NewFD,
-                                  (HasExplicitTemplateArgs ? &TemplateArgs
-                                                           : nullptr),
-                                                     Previous))
+      } else if (!NewFD->isInvalidDecl() &&
+                 CheckFunctionTemplateSpecialization(
+                     NewFD, (HasExplicitTemplateArgs ? &TemplateArgs : nullptr),
+                     Previous))
         NewFD->setInvalidDecl();
 
       // C++ [dcl.stc]p1:
@@ -9167,10 +9037,11 @@
   NewFD->setRangeEnd(D.getSourceRange().getEnd());
 
   if (D.isRedeclaration() && !Previous.empty()) {
-    checkDLLAttributeRedeclaration(
-        *this, dyn_cast<NamedDecl>(Previous.getRepresentativeDecl()), NewFD,
-        isMemberSpecialization || isFunctionTemplateSpecialization,
-        D.isFunctionDefinition());
+    NamedDecl *Prev = Previous.getRepresentativeDecl();
+    checkDLLAttributeRedeclaration(*this, Prev, NewFD,
+                                   isMemberSpecialization ||
+                                       isFunctionTemplateSpecialization,
+                                   D.isFunctionDefinition());
   }
 
   if (getLangOpts().CUDA) {
@@ -9287,6 +9158,341 @@
            D->getFriendObjectKind() != Decl::FOK_None);
 }
 
+/// \brief Check the target attribute of the function for MultiVersion
+/// validity.
+///
+/// Returns true if there was an error, false otherwise.
+static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) {
+  const auto *TA = FD->getAttr<TargetAttr>();
+  assert(TA && "MultiVersion Candidate requires a target attribute");
+  TargetAttr::ParsedTargetAttr ParseInfo = TA->parse();
+  const TargetInfo &TargetInfo = S.Context.getTargetInfo();
+  enum ErrType { Feature = 0, Architecture = 1 };
+
+  if (!ParseInfo.Architecture.empty() &&
+      !TargetInfo.validateCpuIs(ParseInfo.Architecture)) {
+    S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+        << Architecture << ParseInfo.Architecture;
+    return true;
+  }
+
+  for (const auto &Feat : ParseInfo.Features) {
+    auto BareFeat = StringRef{Feat}.substr(1);
+    if (Feat[0] == '-') {
+      S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+          << Feature << ("no-" + BareFeat).str();
+      return true;
+    }
+
+    if (!TargetInfo.validateCpuSupports(BareFeat) ||
+        !TargetInfo.isValidFeatureName(BareFeat)) {
+      S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+          << Feature << BareFeat;
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD,
+                                             const FunctionDecl *NewFD,
+                                             bool CausesMV) {
+  enum DoesntSupport {
+    FuncTemplates = 0,
+    VirtFuncs = 1,
+    DeducedReturn = 2,
+    Constructors = 3,
+    Destructors = 4,
+    DeletedFuncs = 5,
+    DefaultedFuncs = 6
+  };
+  enum Different {
+    CallingConv = 0,
+    ReturnType = 1,
+    ConstexprSpec = 2,
+    InlineSpec = 3,
+    StorageClass = 4,
+    Linkage = 5
+  };
+
+  // For now, disallow all other attributes.  These should be opt-in, but
+  // an analysis of all of them is a future FIXME.
+  if (CausesMV && OldFD &&
+      std::distance(OldFD->attr_begin(), OldFD->attr_end()) != 1) {
+    S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs);
+    S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
+    return true;
+  }
+
+  if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) != 1)
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs);
+
+  if (NewFD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
+           << FuncTemplates;
+
+  if (const auto *NewCXXFD = dyn_cast<CXXMethodDecl>(NewFD)) {
+    if (NewCXXFD->isVirtual())
+      return S.Diag(NewCXXFD->getLocation(),
+                    diag::err_multiversion_doesnt_support)
+             << VirtFuncs;
+
+    if (const auto *NewCXXCtor = dyn_cast<CXXConstructorDecl>(NewFD))
+      return S.Diag(NewCXXCtor->getLocation(),
+                    diag::err_multiversion_doesnt_support)
+             << Constructors;
+
+    if (const auto *NewCXXDtor = dyn_cast<CXXDestructorDecl>(NewFD))
+      return S.Diag(NewCXXDtor->getLocation(),
+                    diag::err_multiversion_doesnt_support)
+             << Destructors;
+  }
+
+  if (NewFD->isDeleted())
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
+           << DeletedFuncs;
+
+  if (NewFD->isDefaulted())
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
+           << DefaultedFuncs;
+
+  QualType NewQType = S.getASTContext().getCanonicalType(NewFD->getType());
+  const auto *NewType = cast<FunctionType>(NewQType);
+  QualType NewReturnType = NewType->getReturnType();
+
+  if (NewReturnType->isUndeducedType())
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
+           << DeducedReturn;
+
+  // Only allow transition to MultiVersion if it hasn't been used.
+  if (OldFD && CausesMV && OldFD->isUsed(false))
+    return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);
+
+  // Ensure the return type is identical.
+  if (OldFD) {
+    QualType OldQType = S.getASTContext().getCanonicalType(OldFD->getType());
+    const auto *OldType = cast<FunctionType>(OldQType);
+    FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo();
+    FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo();
+
+    if (OldTypeInfo.getCC() != NewTypeInfo.getCC())
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << CallingConv;
+
+    QualType OldReturnType = OldType->getReturnType();
+
+    if (OldReturnType != NewReturnType)
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << ReturnType;
+
+    if (OldFD->isConstexpr() != NewFD->isConstexpr())
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << ConstexprSpec;
+
+    if (OldFD->isInlineSpecified() != NewFD->isInlineSpecified())
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << InlineSpec;
+
+    if (OldFD->getStorageClass() != NewFD->getStorageClass())
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << StorageClass;
+
+    if (OldFD->isExternC() != NewFD->isExternC())
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_diff)
+             << Linkage;
+
+    if (S.CheckEquivalentExceptionSpec(
+            OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(),
+            NewFD->getType()->getAs<FunctionProtoType>(), NewFD->getLocation()))
+      return true;
+  }
+  return false;
+}
+
+/// \brief Check the validity of a mulitversion function declaration.
+/// Also sets the multiversion'ness' of the function itself.
+///
+/// This sets NewFD->isInvalidDecl() to true if there was an error.
+///
+/// Returns true if there was an error, false otherwise.
+static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
+                                      bool &Redeclaration, NamedDecl *&OldDecl,
+                                      bool &MergeTypeWithPrevious,
+                                      LookupResult &Previous) {
+  const auto *NewTA = NewFD->getAttr<TargetAttr>();
+  if (NewFD->isMain()) {
+    if (NewTA && NewTA->isDefaultVersion()) {
+      S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+    return false;
+  }
+
+  // If there is no matching previous decl, only 'default' can
+  // cause MultiVersioning.
+  if (!OldDecl) {
+    if (NewTA && NewTA->isDefaultVersion()) {
+      if (!NewFD->getType()->getAs<FunctionProtoType>()) {
+        S.Diag(NewFD->getLocation(), diag::err_multiversion_noproto);
+        NewFD->setInvalidDecl();
+        return true;
+      }
+      if (CheckMultiVersionAdditionalRules(S, nullptr, NewFD, true)) {
+        NewFD->setInvalidDecl();
+        return true;
+      }
+      if (!S.getASTContext().getTargetInfo().supportsMultiVersioning()) {
+        S.Diag(NewFD->getLocation(), diag::err_multiversion_not_supported);
+        NewFD->setInvalidDecl();
+        return true;
+      }
+
+      NewFD->setIsMultiVersion();
+    }
+    return false;
+  }
+
+  if (OldDecl->getDeclContext()->getRedeclContext() !=
+      NewFD->getDeclContext()->getRedeclContext())
+    return false;
+
+  FunctionDecl *OldFD = OldDecl->getAsFunction();
+  // Unresolved 'using' statements (the other way OldDecl can be not a function)
+  // likely cannot cause a problem here.
+  if (!OldFD)
+    return false;
+
+  if (!OldFD->isMultiVersion() && !NewTA)
+    return false;
+
+  if (OldFD->isMultiVersion() && !NewTA) {
+    S.Diag(NewFD->getLocation(), diag::err_target_required_in_redecl);
+    NewFD->setInvalidDecl();
+    return true;
+  }
+
+  TargetAttr::ParsedTargetAttr NewParsed = NewTA->parse();
+  // Sort order doesn't matter, it just needs to be consistent.
+  std::sort(NewParsed.Features.begin(), NewParsed.Features.end());
+
+  const auto *OldTA = OldFD->getAttr<TargetAttr>();
+  if (!OldFD->isMultiVersion()) {
+    // If the old decl is NOT MultiVersioned yet, and we don't cause that
+    // to change, this is a simple redeclaration.
+    if (!OldTA || OldTA->getFeaturesStr() == NewTA->getFeaturesStr())
+      return false;
+
+    // Otherwise, this decl causes MultiVersioning.
+    if (!S.getASTContext().getTargetInfo().supportsMultiVersioning()) {
+      S.Diag(NewFD->getLocation(), diag::err_multiversion_not_supported);
+      S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    if (!OldFD->getType()->getAs<FunctionProtoType>()) {
+      S.Diag(OldFD->getLocation(), diag::err_multiversion_noproto);
+      S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    if (CheckMultiVersionValue(S, NewFD)) {
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    if (CheckMultiVersionValue(S, OldFD)) {
+      S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    TargetAttr::ParsedTargetAttr OldParsed =
+        OldTA->parse(std::less<std::string>());
+
+    if (OldParsed == NewParsed) {
+      S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
+      S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    for (const auto *FD : OldFD->redecls()) {
+      const auto *CurTA = FD->getAttr<TargetAttr>();
+      if (!CurTA || CurTA->isInherited()) {
+        S.Diag(FD->getLocation(), diag::err_target_required_in_redecl);
+        S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
+        NewFD->setInvalidDecl();
+        return true;
+      }
+    }
+
+    if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD, true)) {
+      NewFD->setInvalidDecl();
+      return true;
+    }
+
+    OldFD->setIsMultiVersion();
+    NewFD->setIsMultiVersion();
+    Redeclaration = false;
+    MergeTypeWithPrevious = false;
+    OldDecl = nullptr;
+    Previous.clear();
+    return false;
+  }
+
+  bool UseMemberUsingDeclRules =
+      S.CurContext->isRecord() && !NewFD->getFriendObjectKind();
+
+  // Next, check ALL non-overloads to see if this is a redeclaration of a
+  // previous member of the MultiVersion set.
+  for (NamedDecl *ND : Previous) {
+    FunctionDecl *CurFD = ND->getAsFunction();
+    if (!CurFD)
+      continue;
+    if (S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules))
+      continue;
+
+    const auto *CurTA = CurFD->getAttr<TargetAttr>();
+    if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) {
+      NewFD->setIsMultiVersion();
+      Redeclaration = true;
+      OldDecl = ND;
+      return false;
+    }
+
+    TargetAttr::ParsedTargetAttr CurParsed =
+        CurTA->parse(std::less<std::string>());
+
+    if (CurParsed == NewParsed) {
+      S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
+      S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
+      NewFD->setInvalidDecl();
+      return true;
+    }
+  }
+
+  // Else, this is simply a non-redecl case.
+  if (CheckMultiVersionValue(S, NewFD)) {
+    NewFD->setInvalidDecl();
+    return true;
+  }
+
+  if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD, false)) {
+    NewFD->setInvalidDecl();
+    return true;
+  }
+
+  NewFD->setIsMultiVersion();
+  Redeclaration = false;
+  MergeTypeWithPrevious = false;
+  OldDecl = nullptr;
+  Previous.clear();
+  return false;
+}
+
 /// \brief Perform semantic checking of a new function declaration.
 ///
 /// Performs semantic analysis of the new function declaration
@@ -9374,6 +9580,10 @@
     }
   }
 
+  if (CheckMultiVersionFunction(*this, NewFD, Redeclaration, OldDecl,
+                                MergeTypeWithPrevious, Previous))
+    return Redeclaration;
+
   // C++11 [dcl.constexpr]p8:
   //   A constexpr specifier for a non-static member function that is not
   //   a constructor declares that member function to be const.
@@ -9425,12 +9635,13 @@
 
     if (FunctionTemplateDecl *OldTemplateDecl
                                   = dyn_cast<FunctionTemplateDecl>(OldDecl)) {
-      NewFD->setPreviousDeclaration(OldTemplateDecl->getTemplatedDecl());
+      auto *OldFD = OldTemplateDecl->getTemplatedDecl();
+      NewFD->setPreviousDeclaration(OldFD);
+      adjustDeclContextForDeclaratorDecl(NewFD, OldFD);
       FunctionTemplateDecl *NewTemplateDecl
         = NewFD->getDescribedFunctionTemplate();
       assert(NewTemplateDecl && "Template/non-template mismatch");
-      if (CXXMethodDecl *Method
-            = dyn_cast<CXXMethodDecl>(NewTemplateDecl->getTemplatedDecl())) {
+      if (auto *Method = dyn_cast<CXXMethodDecl>(NewFD)) {
         Method->setAccess(OldTemplateDecl->getAccess());
         NewTemplateDecl->setAccess(OldTemplateDecl->getAccess());
       }
@@ -9443,22 +9654,22 @@
         assert(OldTemplateDecl->isMemberSpecialization());
         // Explicit specializations of a member template do not inherit deleted
         // status from the parent member template that they are specializing.
-        if (OldTemplateDecl->getTemplatedDecl()->isDeleted()) {
-          FunctionDecl *const OldTemplatedDecl =
-              OldTemplateDecl->getTemplatedDecl();
+        if (OldFD->isDeleted()) {
           // FIXME: This assert will not hold in the presence of modules.
-          assert(OldTemplatedDecl->getCanonicalDecl() == OldTemplatedDecl);
+          assert(OldFD->getCanonicalDecl() == OldFD);
           // FIXME: We need an update record for this AST mutation.
-          OldTemplatedDecl->setDeletedAsWritten(false);
+          OldFD->setDeletedAsWritten(false);
         }
       }
 
     } else {
       if (shouldLinkDependentDeclWithPrevious(NewFD, OldDecl)) {
+        auto *OldFD = cast<FunctionDecl>(OldDecl);
         // This needs to happen first so that 'inline' propagates.
-        NewFD->setPreviousDeclaration(cast<FunctionDecl>(OldDecl));
+        NewFD->setPreviousDeclaration(OldFD);
+        adjustDeclContextForDeclaratorDecl(NewFD, OldFD);
         if (isa<CXXMethodDecl>(NewFD))
-          NewFD->setAccess(OldDecl->getAccess());
+          NewFD->setAccess(OldFD->getAccess());
       }
     }
   } else if (!getLangOpts().CPlusPlus && MayNeedOverloadableChecks &&
@@ -9601,7 +9812,7 @@
     // return type. (Exception specifications on the function itself are OK in
     // most cases, and exception specifications are not permitted in most other
     // contexts where they could make it into a mangling.)
-    if (!getLangOpts().CPlusPlus1z && !NewFD->getPrimaryTemplate()) {
+    if (!getLangOpts().CPlusPlus17 && !NewFD->getPrimaryTemplate()) {
       auto HasNoexcept = [&](QualType T) -> bool {
         // Strip off declarator chunks that could be between us and a function
         // type. We don't need to look far, exception specifications are very
@@ -9673,6 +9884,13 @@
   assert(T->isFunctionType() && "function decl is not of function type");
   const FunctionType* FT = T->castAs<FunctionType>();
 
+  // Set default calling convention for main()
+  if (FT->getCallConv() != CC_C) {
+    FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(CC_C));
+    FD->setType(QualType(FT, 0));
+    T = Context.getCanonicalType(FD->getType());
+  }
+
   if (getLangOpts().GNUMode && !getLangOpts().CPlusPlus) {
     // In C with GNU extensions we allow main() to have non-integer return
     // type, but we should warn about the extension, and we disable the
@@ -10804,7 +11022,7 @@
       if (Var->isStaticDataMember()) {
         // C++1z removes the relevant rule; the in-class declaration is always
         // a definition there.
-        if (!getLangOpts().CPlusPlus1z) {
+        if (!getLangOpts().CPlusPlus17) {
           Diag(Var->getLocation(),
                diag::err_constexpr_static_mem_var_requires_init)
             << Var->getDeclName();
@@ -10818,17 +11036,6 @@
       }
     }
 
-    // C++ Concepts TS [dcl.spec.concept]p1: [...]  A variable template
-    // definition having the concept specifier is called a variable concept. A
-    // concept definition refers to [...] a variable concept and its initializer.
-    if (VarTemplateDecl *VTD = Var->getDescribedVarTemplate()) {
-      if (VTD->isConcept()) {
-        Diag(Var->getLocation(), diag::err_var_concept_not_initialized);
-        Var->setInvalidDecl();
-        return;
-      }
-    }
-
     // OpenCL v1.1 s6.5.3: variables declared in the constant address space must
     // be initialized.
     if (!Var->isInvalidDecl() &&
@@ -10848,7 +11055,7 @@
       // that has an in-class initializer, so we type-check this like
       // a declaration.
       //
-      // Fall through
+      LLVM_FALLTHROUGH;
 
     case VarDecl::DeclarationOnly:
       // It's only a declaration.
@@ -11070,7 +11277,7 @@
   DS.SetTypeSpecType(DeclSpec::TST_auto, IdentLoc, PrevSpec, DiagID,
                      getPrintingPolicy());
 
-  Declarator D(DS, Declarator::ForContext);
+  Declarator D(DS, DeclaratorContext::ForContext);
   D.SetIdentifier(Ident, IdentLoc);
   D.takeAttributes(Attrs, AttrEnd);
 
@@ -11116,6 +11323,9 @@
     }
   }
 
+  if (var->getType().isDestructedType() == QualType::DK_nontrivial_c_struct)
+    getCurFunction()->setHasBranchProtectedScope();
+
   // Warn about externally-visible variables being defined without a
   // prior declaration.  We only want to do this for global
   // declarations, but we also specifically need to avoid doing it for
@@ -11124,6 +11334,8 @@
   if (var->isThisDeclarationADefinition() &&
       var->getDeclContext()->getRedeclContext()->isFileContext() &&
       var->isExternallyVisible() && var->hasLinkage() &&
+      !var->isInline() && !var->getDescribedVarTemplate() &&
+      !isTemplateInstantiation(var->getTemplateSpecializationKind()) &&
       !getDiagnostics().isIgnored(diag::warn_missing_variable_declarations,
                                   var->getLocation())) {
     // Find a previous declaration that's not a definition.
@@ -11718,6 +11930,14 @@
   StorageClass SC = SC_None;
   if (DS.getStorageClassSpec() == DeclSpec::SCS_register) {
     SC = SC_Register;
+    // In C++11, the 'register' storage class specifier is deprecated.
+    // In C++17, it is not allowed, but we tolerate it as an extension.
+    if (getLangOpts().CPlusPlus11) {
+      Diag(DS.getStorageClassSpecLoc(),
+           getLangOpts().CPlusPlus17 ? diag::ext_register_storage_class
+                                     : diag::warn_deprecated_register)
+        << FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
+    }
   } else if (getLangOpts().CPlusPlus &&
              DS.getStorageClassSpec() == DeclSpec::SCS_auto) {
     SC = SC_Auto;
@@ -11732,12 +11952,10 @@
       << DeclSpec::getSpecifierName(TSCS);
   if (DS.isInlineSpecified())
     Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
   if (DS.isConstexprSpecified())
     Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 0;
-  if (DS.isConceptSpecified())
-    Diag(DS.getConceptSpecLoc(), diag::err_concept_wrong_decl_kind);
 
   DiagnoseFunctionSpecifiers(DS);
 
@@ -11980,7 +12198,7 @@
         // Use the identifier location for the type source range.
         DS.SetRangeStart(FTI.Params[i].IdentLoc);
         DS.SetRangeEnd(FTI.Params[i].IdentLoc);
-        Declarator ParamD(DS, Declarator::KNRTypeListContext);
+        Declarator ParamD(DS, DeclaratorContext::KNRTypeListContext);
         ParamD.SetIdentifier(FTI.Params[i].Ident, FTI.Params[i].IdentLoc);
         FTI.Params[i].Param = ActOnParamDeclarator(S, ParamD);
       }
@@ -12065,9 +12283,45 @@
                                    const FunctionDecl *EffectiveDefinition,
                                    SkipBodyInfo *SkipBody) {
   const FunctionDecl *Definition = EffectiveDefinition;
+  if (!Definition && !FD->isDefined(Definition) && !FD->isCXXClassMember()) {
+    // If this is a friend function defined in a class template, it does not
+    // have a body until it is used, nevertheless it is a definition, see
+    // [temp.inst]p2:
+    //
+    // ... for the purpose of determining whether an instantiated redeclaration
+    // is valid according to [basic.def.odr] and [class.mem], a declaration that
+    // corresponds to a definition in the template is considered to be a
+    // definition.
+    //
+    // The following code must produce redefinition error:
+    //
+    //     template<typename T> struct C20 { friend void func_20() {} };
+    //     C20<int> c20i;
+    //     void func_20() {}
+    //
+    for (auto I : FD->redecls()) {
+      if (I != FD && !I->isInvalidDecl() &&
+          I->getFriendObjectKind() != Decl::FOK_None) {
+        if (FunctionDecl *Original = I->getInstantiatedFromMemberFunction()) {
+          if (FunctionDecl *OrigFD = FD->getInstantiatedFromMemberFunction()) {
+            // A merged copy of the same function, instantiated as a member of
+            // the same class, is OK.
+            if (declaresSameEntity(OrigFD, Original) &&
+                declaresSameEntity(cast<Decl>(I->getLexicalDeclContext()),
+                                   cast<Decl>(FD->getLexicalDeclContext())))
+              continue;
+          }
+
+          if (Original->isThisDeclarationADefinition()) {
+            Definition = I;
+            break;
+          }
+        }
+      }
+    }
+  }
   if (!Definition)
-    if (!FD->isDefined(Definition))
-      return;
+    return;
 
   if (canRedefineFunction(Definition, getLangOpts()))
     return;
@@ -12350,9 +12604,11 @@
 }
 
 Decl *Sema::ActOnSkippedFunctionBody(Decl *Decl) {
-  if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Decl))
+  if (!Decl)
+    return nullptr;
+  if (FunctionDecl *FD = Decl->getAsFunction())
     FD->setHasSkippedBody();
-  else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Decl))
+  else if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(Decl))
     MD->setHasSkippedBody();
   return Decl;
 }
@@ -12676,10 +12932,20 @@
 /// call, forming a call to an implicitly defined function (per C99 6.5.1p2).
 NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc,
                                           IdentifierInfo &II, Scope *S) {
+  // Find the scope in which the identifier is injected and the corresponding
+  // DeclContext.
+  // FIXME: C89 does not say what happens if there is no enclosing block scope.
+  // In that case, we inject the declaration into the translation unit scope
+  // instead.
   Scope *BlockScope = S;
   while (!BlockScope->isCompoundStmtScope() && BlockScope->getParent())
     BlockScope = BlockScope->getParent();
 
+  Scope *ContextScope = BlockScope;
+  while (!ContextScope->getEntity())
+    ContextScope = ContextScope->getParent();
+  ContextRAII SavedContext(*this, ContextScope->getEntity());
+
   // Before we produce a declaration for an implicitly defined
   // function, see whether there was a locally-scoped declaration of
   // this name as a function or variable. If so, use that
@@ -12743,7 +13009,7 @@
   (void)Error; // Silence warning.
   assert(!Error && "Error setting up implicit decl!");
   SourceLocation NoLoc;
-  Declarator D(DS, Declarator::BlockContext);
+  Declarator D(DS, DeclaratorContext::BlockContext);
   D.AddTypeInfo(DeclaratorChunk::getFunction(/*HasProto=*/false,
                                              /*IsAmbiguous=*/false,
                                              /*LParenLoc=*/NoLoc,
@@ -12823,15 +13089,33 @@
                                               FD->getLocation()));
     }
 
-    // Mark const if we don't care about errno and that is the only
-    // thing preventing the function from being const. This allows
-    // IRgen to use LLVM intrinsics for such functions.
-    if (!getLangOpts().MathErrno &&
-        Context.BuiltinInfo.isConstWithoutErrno(BuiltinID)) {
-      if (!FD->hasAttr<ConstAttr>())
-        FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
-    }
+    // Mark const if we don't care about errno and that is the only thing
+    // preventing the function from being const. This allows IRgen to use LLVM
+    // intrinsics for such functions.
+    if (!getLangOpts().MathErrno && !FD->hasAttr<ConstAttr>() &&
+        Context.BuiltinInfo.isConstWithoutErrno(BuiltinID))
+      FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
 
+    // We make "fma" on GNU or Windows const because we know it does not set
+    // errno in those environments even though it could set errno based on the
+    // C standard.
+    const llvm::Triple &Trip = Context.getTargetInfo().getTriple();
+    if ((Trip.isGNUEnvironment() || Trip.isOSMSVCRT()) &&
+        !FD->hasAttr<ConstAttr>()) {
+      switch (BuiltinID) {
+      case Builtin::BI__builtin_fma:
+      case Builtin::BI__builtin_fmaf:
+      case Builtin::BI__builtin_fmal:
+      case Builtin::BIfma:
+      case Builtin::BIfmaf:
+      case Builtin::BIfmal:
+        FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
+        break;
+      default:
+        break;
+      }
+    }
+  
     if (Context.BuiltinInfo.isReturnsTwice(BuiltinID) &&
         !FD->hasAttr<ReturnsTwiceAttr>())
       FD->addAttr(ReturnsTwiceAttr::CreateImplicit(Context,
@@ -12892,7 +13176,7 @@
     // We already have a __builtin___CFStringMakeConstantString,
     // but builds that use -fno-constant-cfstrings don't go through that.
     if (!FD->hasAttr<FormatArgAttr>())
-      FD->addAttr(FormatArgAttr::CreateImplicit(Context, 1,
+      FD->addAttr(FormatArgAttr::CreateImplicit(Context, ParamIdx(1, FD),
                                                 FD->getLocation()));
   }
 }
@@ -12972,11 +13256,9 @@
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
 /// \return true if the redeclaration was invalid.
-bool Sema::CheckEnumRedeclaration(
-    SourceLocation EnumLoc, bool IsScoped, QualType EnumUnderlyingTy,
-    bool EnumUnderlyingIsImplicit, const EnumDecl *Prev) {
-  bool IsFixed = !EnumUnderlyingTy.isNull();
-
+bool Sema::CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
+                                  QualType EnumUnderlyingTy, bool IsFixed,
+                                  const EnumDecl *Prev) {
   if (IsScoped != Prev->isScoped()) {
     Diag(EnumLoc, diag::err_enum_redeclare_scoped_mismatch)
       << Prev->isScoped();
@@ -12996,10 +13278,6 @@
           << Prev->getIntegerTypeRange();
       return true;
     }
-  } else if (IsFixed && !Prev->isFixed() && EnumUnderlyingIsImplicit) {
-    ;
-  } else if (!IsFixed && Prev->isFixed() && !Prev->getIntegerTypeSourceInfo()) {
-    ;
   } else if (IsFixed != Prev->isFixed()) {
     Diag(EnumLoc, diag::err_enum_redeclare_fixed_mismatch)
       << Prev->isFixed();
@@ -13195,7 +13473,7 @@
 }
 
 /// \brief Determine whether a tag originally declared in context \p OldDC can
-/// be redeclared with an unqualfied name in \p NewDC (assuming name lookup
+/// be redeclared with an unqualified name in \p NewDC (assuming name lookup
 /// found a declaration in \p OldDC as a previous decl, perhaps through a
 /// using-declaration).
 static bool isAcceptableTagRedeclContext(Sema &S, DeclContext *OldDC,
@@ -13295,14 +13573,14 @@
   // this early, because it's needed to detect if this is an incompatible
   // redeclaration.
   llvm::PointerUnion<const Type*, TypeSourceInfo*> EnumUnderlying;
-  bool EnumUnderlyingIsImplicit = false;
+  bool IsFixed = !UnderlyingType.isUnset() || ScopedEnum;
 
   if (Kind == TTK_Enum) {
-    if (UnderlyingType.isInvalid() || (!UnderlyingType.get() && ScopedEnum))
+    if (UnderlyingType.isInvalid() || (!UnderlyingType.get() && ScopedEnum)) {
       // No underlying type explicitly specified, or we failed to parse the
       // type, default to int.
       EnumUnderlying = Context.IntTy.getTypePtr();
-    else if (UnderlyingType.get()) {
+    } else if (UnderlyingType.get()) {
       // C++0x 7.2p2: The type-specifier-seq of an enum-base shall name an
       // integral type; any cv-qualification is ignored.
       TypeSourceInfo *TI = nullptr;
@@ -13318,11 +13596,12 @@
         EnumUnderlying = Context.IntTy.getTypePtr();
 
     } else if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
-      if (getLangOpts().MSVCCompat || TUK == TUK_Definition) {
-        // Microsoft enums are always of int type.
+      // For MSVC ABI compatibility, unfixed enums must use an underlying type
+      // of 'int'. However, if this is an unfixed forward declaration, don't set
+      // the underlying type unless the user enables -fms-compatibility. This
+      // makes unfixed forward declared enums incomplete and is more conforming.
+      if (TUK == TUK_Definition || getLangOpts().MSVCCompat)
         EnumUnderlying = Context.IntTy.getTypePtr();
-        EnumUnderlyingIsImplicit = true;
-      }
     }
   }
 
@@ -13348,8 +13627,7 @@
 
     if (Kind == TTK_Enum) {
       New = EnumDecl::Create(Context, SearchDC, KWLoc, Loc, Name, nullptr,
-                             ScopedEnum, ScopedEnumUsesClassTag,
-                             !EnumUnderlying.isNull());
+                             ScopedEnum, ScopedEnumUsesClassTag, IsFixed);
       // If this is an undefined enum, bail.
       if (TUK != TUK_Definition && !Invalid)
         return nullptr;
@@ -13728,7 +14006,7 @@
           // in which case we want the caller to bail out.
           if (CheckEnumRedeclaration(NameLoc.isValid() ? NameLoc : KWLoc,
                                      ScopedEnum, EnumUnderlyingTy,
-                                     EnumUnderlyingIsImplicit, PrevEnum))
+                                     IsFixed, PrevEnum))
             return TUK == TUK_Declaration ? PrevTagDecl : nullptr;
         }
 
@@ -13944,7 +14222,7 @@
     // enum X { A, B, C } D;    D should chain to X.
     New = EnumDecl::Create(Context, SearchDC, KWLoc, Loc, Name,
                            cast_or_null<EnumDecl>(PrevDecl), ScopedEnum,
-                           ScopedEnumUsesClassTag, !EnumUnderlying.isNull());
+                           ScopedEnumUsesClassTag, IsFixed);
 
     if (isStdAlignValT && (!StdAlignValT || getStdAlignValT()->isImplicit()))
       StdAlignValT = cast<EnumDecl>(New);
@@ -13952,8 +14230,7 @@
     // If this is an undefined enum, warn.
     if (TUK != TUK_Definition && !Invalid) {
       TagDecl *Def;
-      if (!EnumUnderlyingIsImplicit &&
-          (getLangOpts().CPlusPlus11 || getLangOpts().ObjC2) &&
+      if (IsFixed && (getLangOpts().CPlusPlus11 || getLangOpts().ObjC2) &&
           cast<EnumDecl>(New)->isFixed()) {
         // C++0x: 7.2p2: opaque-enum-declaration.
         // Conflicts are diagnosed above. Do nothing.
@@ -13985,6 +14262,7 @@
       else
         ED->setIntegerType(QualType(EnumUnderlying.get<const Type*>(), 0));
       ED->setPromotionType(ED->getIntegerType());
+      assert(ED->isComplete() && "enum with type should be complete");
     }
   } else {
     // struct/union/class
@@ -14458,7 +14736,7 @@
 
   if (D.getDeclSpec().isInlineSpecified())
     Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
@@ -14974,6 +15252,7 @@
 
     // Get the type for the field.
     const Type *FDTy = FD->getType().getTypePtr();
+    Qualifiers QS = FD->getType().getQualifiers();
 
     if (!FD->isAnonymousStructOrUnion()) {
       // Remember all fields written by the user.
@@ -14997,6 +15276,7 @@
     //   possibly recursively, a member that is such a structure)
     //   shall not be a member of a structure or an element of an
     //   array.
+    bool IsLastField = (i + 1 == Fields.end());
     if (FDTy->isFunctionType()) {
       // Field declared as a function.
       Diag(FD->getLocation(), diag::err_field_declared_as_function)
@@ -15004,60 +15284,70 @@
       FD->setInvalidDecl();
       EnclosingDecl->setInvalidDecl();
       continue;
-    } else if (FDTy->isIncompleteArrayType() && Record &&
-               ((i + 1 == Fields.end() && !Record->isUnion()) ||
-                ((getLangOpts().MicrosoftExt ||
-                  getLangOpts().CPlusPlus) &&
-                 (i + 1 == Fields.end() || Record->isUnion())))) {
-      // Flexible array member.
-      // Microsoft and g++ is more permissive regarding flexible array.
-      // It will accept flexible array in union and also
-      // as the sole element of a struct/class.
-      unsigned DiagID = 0;
-      if (Record->isUnion())
-        DiagID = getLangOpts().MicrosoftExt
-                     ? diag::ext_flexible_array_union_ms
-                     : getLangOpts().CPlusPlus
-                           ? diag::ext_flexible_array_union_gnu
-                           : diag::err_flexible_array_union;
-      else if (NumNamedMembers < 1)
-        DiagID = getLangOpts().MicrosoftExt
-                     ? diag::ext_flexible_array_empty_aggregate_ms
-                     : getLangOpts().CPlusPlus
-                           ? diag::ext_flexible_array_empty_aggregate_gnu
-                           : diag::err_flexible_array_empty_aggregate;
+    } else if (FDTy->isIncompleteArrayType() &&
+               (Record || isa<ObjCContainerDecl>(EnclosingDecl))) {
+      if (Record) {
+        // Flexible array member.
+        // Microsoft and g++ is more permissive regarding flexible array.
+        // It will accept flexible array in union and also
+        // as the sole element of a struct/class.
+        unsigned DiagID = 0;
+        if (!Record->isUnion() && !IsLastField) {
+          Diag(FD->getLocation(), diag::err_flexible_array_not_at_end)
+            << FD->getDeclName() << FD->getType() << Record->getTagKind();
+          Diag((*(i + 1))->getLocation(), diag::note_next_field_declaration);
+          FD->setInvalidDecl();
+          EnclosingDecl->setInvalidDecl();
+          continue;
+        } else if (Record->isUnion())
+          DiagID = getLangOpts().MicrosoftExt
+                       ? diag::ext_flexible_array_union_ms
+                       : getLangOpts().CPlusPlus
+                             ? diag::ext_flexible_array_union_gnu
+                             : diag::err_flexible_array_union;
+        else if (NumNamedMembers < 1)
+          DiagID = getLangOpts().MicrosoftExt
+                       ? diag::ext_flexible_array_empty_aggregate_ms
+                       : getLangOpts().CPlusPlus
+                             ? diag::ext_flexible_array_empty_aggregate_gnu
+                             : diag::err_flexible_array_empty_aggregate;
 
-      if (DiagID)
-        Diag(FD->getLocation(), DiagID) << FD->getDeclName()
-                                        << Record->getTagKind();
-      // While the layout of types that contain virtual bases is not specified
-      // by the C++ standard, both the Itanium and Microsoft C++ ABIs place
-      // virtual bases after the derived members.  This would make a flexible
-      // array member declared at the end of an object not adjacent to the end
-      // of the type.
-      if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Record))
-        if (RD->getNumVBases() != 0)
-          Diag(FD->getLocation(), diag::err_flexible_array_virtual_base)
+        if (DiagID)
+          Diag(FD->getLocation(), DiagID) << FD->getDeclName()
+                                          << Record->getTagKind();
+        // While the layout of types that contain virtual bases is not specified
+        // by the C++ standard, both the Itanium and Microsoft C++ ABIs place
+        // virtual bases after the derived members.  This would make a flexible
+        // array member declared at the end of an object not adjacent to the end
+        // of the type.
+        if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Record))
+          if (RD->getNumVBases() != 0)
+            Diag(FD->getLocation(), diag::err_flexible_array_virtual_base)
+              << FD->getDeclName() << Record->getTagKind();
+        if (!getLangOpts().C99)
+          Diag(FD->getLocation(), diag::ext_c99_flexible_array_member)
             << FD->getDeclName() << Record->getTagKind();
-      if (!getLangOpts().C99)
-        Diag(FD->getLocation(), diag::ext_c99_flexible_array_member)
-          << FD->getDeclName() << Record->getTagKind();
 
-      // If the element type has a non-trivial destructor, we would not
-      // implicitly destroy the elements, so disallow it for now.
-      //
-      // FIXME: GCC allows this. We should probably either implicitly delete
-      // the destructor of the containing class, or just allow this.
-      QualType BaseElem = Context.getBaseElementType(FD->getType());
-      if (!BaseElem->isDependentType() && BaseElem.isDestructedType()) {
-        Diag(FD->getLocation(), diag::err_flexible_array_has_nontrivial_dtor)
-          << FD->getDeclName() << FD->getType();
-        FD->setInvalidDecl();
-        EnclosingDecl->setInvalidDecl();
-        continue;
+        // If the element type has a non-trivial destructor, we would not
+        // implicitly destroy the elements, so disallow it for now.
+        //
+        // FIXME: GCC allows this. We should probably either implicitly delete
+        // the destructor of the containing class, or just allow this.
+        QualType BaseElem = Context.getBaseElementType(FD->getType());
+        if (!BaseElem->isDependentType() && BaseElem.isDestructedType()) {
+          Diag(FD->getLocation(), diag::err_flexible_array_has_nontrivial_dtor)
+            << FD->getDeclName() << FD->getType();
+          FD->setInvalidDecl();
+          EnclosingDecl->setInvalidDecl();
+          continue;
+        }
+        // Okay, we have a legal flexible array member at the end of the struct.
+        Record->setHasFlexibleArrayMember(true);
+      } else {
+        // In ObjCContainerDecl ivars with incomplete array type are accepted,
+        // unless they are followed by another ivar. That check is done
+        // elsewhere, after synthesized ivars are known.
       }
-      // Okay, we have a legal flexible array member at the end of the struct.
-      Record->setHasFlexibleArrayMember(true);
     } else if (!FDTy->isDependentType() &&
                RequireCompleteType(FD->getLocation(), FD->getType(),
                                    diag::err_field_incomplete)) {
@@ -15074,7 +15364,7 @@
           // If this is a struct/class and this is not the last element, reject
           // it.  Note that GCC supports variable sized arrays in the middle of
           // structures.
-          if (i + 1 != Fields.end())
+          if (!IsLastField)
             Diag(FD->getLocation(), diag::ext_variable_sized_type_in_struct)
               << FD->getDeclName() << FD->getType();
           else {
@@ -15104,7 +15394,9 @@
       FD->setType(T);
     } else if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
                Record && !ObjCFieldLifetimeErrReported &&
-               (!getLangOpts().CPlusPlus || Record->isUnion())) {
+               ((!getLangOpts().CPlusPlus &&
+                 QS.getObjCLifetime() == Qualifiers::OCL_Weak) ||
+                Record->isUnion())) {
       // It's an error in ARC or Weak if a field has lifetime.
       // We don't want to report this in a system header, though,
       // so we just make the field unavailable.
@@ -15140,6 +15432,18 @@
                Record->setHasObjectMember(true);
       }
     }
+
+    if (Record && !getLangOpts().CPlusPlus) {
+      QualType FT = FD->getType();
+      if (FT.isNonTrivialToPrimitiveDefaultInitialize())
+        Record->setNonTrivialToPrimitiveDefaultInitialize();
+      QualType::PrimitiveCopyKind PCK = FT.isNonTrivialToPrimitiveCopy();
+      if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial)
+        Record->setNonTrivialToPrimitiveCopy();
+      if (FT.isDestructedType())
+        Record->setNonTrivialToPrimitiveDestroy();
+    }
+
     if (Record && FD->getType().isVolatileQualified())
       Record->setHasVolatileMember(true);
     // Keep track of the number of named members.
@@ -15167,10 +15471,10 @@
                                           CXXRecord->getDestructor());
         }
 
-        if (!CXXRecord->isInvalidDecl()) {
-          // Add any implicitly-declared members to this class.
-          AddImplicitlyDeclaredMembersToClass(CXXRecord);
+        // Add any implicitly-declared members to this class.
+        AddImplicitlyDeclaredMembersToClass(CXXRecord);
 
+        if (!CXXRecord->isInvalidDecl()) {
           // If we have virtual base classes, we may end up finding multiple
           // final overriders for a given virtual function. Check for this
           // problem now.
@@ -15357,7 +15661,8 @@
 static bool isRepresentableIntegerValue(ASTContext &Context,
                                         llvm::APSInt &Value,
                                         QualType T) {
-  assert(T->isIntegralType(Context) && "Integral type required!");
+  assert((T->isIntegralType(Context) || T->isEnumeralType()) &&
+         "Integral type required!");
   unsigned BitWidth = Context.getIntWidth(T);
 
   if (Value.isUnsigned() || Value.isNonNegative()) {
@@ -15373,7 +15678,8 @@
 static QualType getNextLargerIntegralType(ASTContext &Context, QualType T) {
   // FIXME: Int128/UInt128 support, which also needs to be introduced into
   // enum checking below.
-  assert(T->isIntegralType(Context) && "Integral type required!");
+  assert((T->isIntegralType(Context) ||
+         T->isEnumeralType()) && "Integral type required!");
   const unsigned NumTypes = 4;
   QualType SignedIntegralTypes[NumTypes] = {
     Context.ShortTy, Context.IntTy, Context.LongTy, Context.LongLongTy
@@ -15430,7 +15736,7 @@
                                                          &EnumVal).get())) {
         // C99 6.7.2.2p2: Make sure we have an integer constant expression.
       } else {
-        if (Enum->isFixed()) {
+        if (Enum->isComplete()) {
           EltTy = Enum->getIntegerType();
 
           // In Obj-C and Microsoft mode, require the enumeration value to be
@@ -15941,7 +16247,9 @@
   if (LangOpts.ShortEnums)
     Packed = true;
 
-  if (Enum->isFixed()) {
+  // If the enum already has a type because it is fixed or dictated by the
+  // target, promote that type instead of analyzing the enumerators.
+  if (Enum->isComplete()) {
     BestType = Enum->getIntegerType();
     if (BestType->isPromotableIntegerType())
       BestPromotionType = Context.getPromotedIntegerType(BestType);
@@ -16127,7 +16435,7 @@
     DC = LSD->getParent();
   }
 
-  while (isa<LinkageSpecDecl>(DC))
+  while (isa<LinkageSpecDecl>(DC) || isa<ExportDecl>(DC))
     DC = DC->getParent();
 
   if (!isa<TranslationUnitDecl>(DC)) {
@@ -16305,12 +16613,17 @@
     IdentifierLocs.push_back(Path[I].second);
   }
 
-  TranslationUnitDecl *TU = getASTContext().getTranslationUnitDecl();
-  ImportDecl *Import = ImportDecl::Create(Context, TU, StartLoc,
+  ImportDecl *Import = ImportDecl::Create(Context, CurContext, StartLoc,
                                           Mod, IdentifierLocs);
   if (!ModuleScopes.empty())
     Context.addModuleInitializer(ModuleScopes.back().Module, Import);
-  TU->addDecl(Import);
+  CurContext->addDecl(Import);
+
+  // Re-export the module if needed.
+  if (Import->isExported() &&
+      !ModuleScopes.empty() && ModuleScopes.back().ModuleInterface)
+    getCurrentModule()->Exports.emplace_back(Mod, false);
+
   return Import;
 }
 
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 655d6e9..b35c311 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -87,7 +87,7 @@
 static unsigned getFunctionOrMethodNumParams(const Decl *D) {
   if (const FunctionType *FnTy = D->getFunctionType())
     return cast<FunctionProtoType>(FnTy)->getNumParams();
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(D))
+  if (const auto *BD = dyn_cast<BlockDecl>(D))
     return BD->getNumParams();
   return cast<ObjCMethodDecl>(D)->param_size();
 }
@@ -95,7 +95,7 @@
 static QualType getFunctionOrMethodParamType(const Decl *D, unsigned Idx) {
   if (const FunctionType *FnTy = D->getFunctionType())
     return cast<FunctionProtoType>(FnTy)->getParamType(Idx);
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(D))
+  if (const auto *BD = dyn_cast<BlockDecl>(D))
     return BD->getParamDecl(Idx)->getType();
 
   return cast<ObjCMethodDecl>(D)->parameters()[Idx]->getType();
@@ -113,7 +113,7 @@
 
 static QualType getFunctionOrMethodResultType(const Decl *D) {
   if (const FunctionType *FnTy = D->getFunctionType())
-    return cast<FunctionType>(FnTy)->getReturnType();
+    return FnTy->getReturnType();
   return cast<ObjCMethodDecl>(D)->getReturnType();
 }
 
@@ -126,24 +126,21 @@
 }
 
 static bool isFunctionOrMethodVariadic(const Decl *D) {
-  if (const FunctionType *FnTy = D->getFunctionType()) {
-    const FunctionProtoType *proto = cast<FunctionProtoType>(FnTy);
-    return proto->isVariadic();
-  }
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(D))
+  if (const FunctionType *FnTy = D->getFunctionType())
+    return cast<FunctionProtoType>(FnTy)->isVariadic();
+  if (const auto *BD = dyn_cast<BlockDecl>(D))
     return BD->isVariadic();
-
   return cast<ObjCMethodDecl>(D)->isVariadic();
 }
 
 static bool isInstanceMethod(const Decl *D) {
-  if (const CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(D))
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(D))
     return MethodDecl->isInstance();
   return false;
 }
 
 static inline bool isNSStringType(QualType T, ASTContext &Ctx) {
-  const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
+  const auto *PT = T->getAs<ObjCObjectPointerType>();
   if (!PT)
     return false;
 
@@ -159,11 +156,11 @@
 }
 
 static inline bool isCFStringType(QualType T, ASTContext &Ctx) {
-  const PointerType *PT = T->getAs<PointerType>();
+  const auto *PT = T->getAs<PointerType>();
   if (!PT)
     return false;
 
-  const RecordType *RT = PT->getPointeeType()->getAs<RecordType>();
+  const auto *RT = PT->getPointeeType()->getAs<RecordType>();
   if (!RT)
     return false;
 
@@ -174,17 +171,17 @@
   return RD->getIdentifier() == &Ctx.Idents.get("__CFString");
 }
 
-static unsigned getNumAttributeArgs(const AttributeList &Attr) {
+static unsigned getNumAttributeArgs(const AttributeList &AL) {
   // FIXME: Include the type in the argument list.
-  return Attr.getNumArgs() + Attr.hasParsedType();
+  return AL.getNumArgs() + AL.hasParsedType();
 }
 
 template <typename Compare>
-static bool checkAttributeNumArgsImpl(Sema &S, const AttributeList &Attr,
+static bool checkAttributeNumArgsImpl(Sema &S, const AttributeList &AL,
                                       unsigned Num, unsigned Diag,
                                       Compare Comp) {
-  if (Comp(getNumAttributeArgs(Attr), Num)) {
-    S.Diag(Attr.getLoc(), Diag) << Attr.getName() << Num;
+  if (Comp(getNumAttributeArgs(AL), Num)) {
+    S.Diag(AL.getLoc(), Diag) << AL.getName() << Num;
     return false;
   }
 
@@ -193,27 +190,27 @@
 
 /// \brief Check if the attribute has exactly as many args as Num. May
 /// output an error.
-static bool checkAttributeNumArgs(Sema &S, const AttributeList &Attr,
+static bool checkAttributeNumArgs(Sema &S, const AttributeList &AL,
                                   unsigned Num) {
-  return checkAttributeNumArgsImpl(S, Attr, Num,
+  return checkAttributeNumArgsImpl(S, AL, Num,
                                    diag::err_attribute_wrong_number_arguments,
                                    std::not_equal_to<unsigned>());
 }
 
 /// \brief Check if the attribute has at least as many args as Num. May
 /// output an error.
-static bool checkAttributeAtLeastNumArgs(Sema &S, const AttributeList &Attr,
+static bool checkAttributeAtLeastNumArgs(Sema &S, const AttributeList &AL,
                                          unsigned Num) {
-  return checkAttributeNumArgsImpl(S, Attr, Num,
+  return checkAttributeNumArgsImpl(S, AL, Num,
                                    diag::err_attribute_too_few_arguments,
                                    std::less<unsigned>());
 }
 
 /// \brief Check if the attribute has at most as many args as Num. May
 /// output an error.
-static bool checkAttributeAtMostNumArgs(Sema &S, const AttributeList &Attr,
+static bool checkAttributeAtMostNumArgs(Sema &S, const AttributeList &AL,
                                          unsigned Num) {
-  return checkAttributeNumArgsImpl(S, Attr, Num,
+  return checkAttributeNumArgsImpl(S, AL, Num,
                                    diag::err_attribute_too_many_arguments,
                                    std::greater<unsigned>());
 }
@@ -221,42 +218,42 @@
 /// \brief A helper function to provide Attribute Location for the Attr types
 /// AND the AttributeList.
 template <typename AttrInfo>
-static typename std::enable_if<std::is_base_of<clang::Attr, AttrInfo>::value,
+static typename std::enable_if<std::is_base_of<Attr, AttrInfo>::value,
                                SourceLocation>::type
-getAttrLoc(const AttrInfo &Attr) {
-  return Attr.getLocation();
+getAttrLoc(const AttrInfo &AL) {
+  return AL.getLocation();
 }
-static SourceLocation getAttrLoc(const clang::AttributeList &Attr) {
-  return Attr.getLoc();
+static SourceLocation getAttrLoc(const AttributeList &AL) {
+  return AL.getLoc();
 }
 
 /// \brief A helper function to provide Attribute Name for the Attr types
 /// AND the AttributeList.
 template <typename AttrInfo>
-static typename std::enable_if<std::is_base_of<clang::Attr, AttrInfo>::value,
+static typename std::enable_if<std::is_base_of<Attr, AttrInfo>::value,
                                const AttrInfo *>::type
-getAttrName(const AttrInfo &Attr) {
-  return &Attr;
+getAttrName(const AttrInfo &AL) {
+  return &AL;
 }
-static const IdentifierInfo *getAttrName(const clang::AttributeList &Attr) {
-  return Attr.getName();
+static const IdentifierInfo *getAttrName(const AttributeList &AL) {
+  return AL.getName();
 }
 
 /// \brief If Expr is a valid integer constant, get the value of the integer
 /// expression and return success or failure. May output an error.
-template<typename AttrInfo>
-static bool checkUInt32Argument(Sema &S, const AttrInfo& Attr, const Expr *Expr,
+template <typename AttrInfo>
+static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
                                 uint32_t &Val, unsigned Idx = UINT_MAX) {
   llvm::APSInt I(32);
   if (Expr->isTypeDependent() || Expr->isValueDependent() ||
       !Expr->isIntegerConstantExpr(I, S.Context)) {
     if (Idx != UINT_MAX)
-      S.Diag(getAttrLoc(Attr), diag::err_attribute_argument_n_type)
-        << getAttrName(Attr) << Idx << AANT_ArgumentIntegerConstant
+      S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
+        << getAttrName(AI) << Idx << AANT_ArgumentIntegerConstant
         << Expr->getSourceRange();
     else
-      S.Diag(getAttrLoc(Attr), diag::err_attribute_argument_type)
-        << getAttrName(Attr) << AANT_ArgumentIntegerConstant
+      S.Diag(getAttrLoc(AI), diag::err_attribute_argument_type)
+        << getAttrName(AI) << AANT_ArgumentIntegerConstant
         << Expr->getSourceRange();
     return false;
   }
@@ -274,11 +271,11 @@
 /// \brief Wrapper around checkUInt32Argument, with an extra check to be sure
 /// that the result will fit into a regular (signed) int. All args have the same
 /// purpose as they do in checkUInt32Argument.
-template<typename AttrInfo>
-static bool checkPositiveIntArgument(Sema &S, const AttrInfo& Attr, const Expr *Expr,
+template <typename AttrInfo>
+static bool checkPositiveIntArgument(Sema &S, const AttrInfo &AI, const Expr *Expr,
                                      int &Val, unsigned Idx = UINT_MAX) {
   uint32_t UVal;
-  if (!checkUInt32Argument(S, Attr, Expr, UVal, Idx))
+  if (!checkUInt32Argument(S, AI, Expr, UVal, Idx))
     return false;
 
   if (UVal > (uint32_t)std::numeric_limits<int>::max()) {
@@ -298,7 +295,7 @@
 template <typename AttrTy>
 static bool checkAttrMutualExclusion(Sema &S, Decl *D, SourceRange Range,
                                      IdentifierInfo *Ident) {
-  if (AttrTy *A = D->getAttr<AttrTy>()) {
+  if (const auto *A = D->getAttr<AttrTy>()) {
     S.Diag(Range.getBegin(), diag::err_attributes_are_not_compatible) << Ident
                                                                       << A;
     S.Diag(A->getLocation(), diag::note_conflicting_attribute);
@@ -313,8 +310,8 @@
 /// \returns true if IdxExpr is a valid index.
 template <typename AttrInfo>
 static bool checkFunctionOrMethodParameterIndex(
-    Sema &S, const Decl *D, const AttrInfo &Attr, unsigned AttrArgNum,
-    const Expr *IdxExpr, uint64_t &Idx, bool AllowImplicitThis = false) {
+    Sema &S, const Decl *D, const AttrInfo &AI, unsigned AttrArgNum,
+    const Expr *IdxExpr, ParamIdx &Idx, bool CanIndexImplicitThis = false) {
   assert(isFunctionOrMethodOrBlock(D));
 
   // In C++ the implicit 'this' function parameter also counts.
@@ -328,27 +325,26 @@
   llvm::APSInt IdxInt;
   if (IdxExpr->isTypeDependent() || IdxExpr->isValueDependent() ||
       !IdxExpr->isIntegerConstantExpr(IdxInt, S.Context)) {
-    S.Diag(getAttrLoc(Attr), diag::err_attribute_argument_n_type)
-      << getAttrName(Attr) << AttrArgNum << AANT_ArgumentIntegerConstant
+    S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
+      << getAttrName(AI) << AttrArgNum << AANT_ArgumentIntegerConstant
       << IdxExpr->getSourceRange();
     return false;
   }
 
-  Idx = IdxInt.getLimitedValue();
-  if (Idx < 1 || (!IV && Idx > NumParams)) {
-    S.Diag(getAttrLoc(Attr), diag::err_attribute_argument_out_of_bounds)
-      << getAttrName(Attr) << AttrArgNum << IdxExpr->getSourceRange();
+  Idx = ParamIdx(IdxInt.getLimitedValue(UINT_MAX), D);
+  unsigned IdxSource = Idx.getSourceIndex();
+  if (IdxSource < 1 || (!IV && IdxSource > NumParams)) {
+    S.Diag(getAttrLoc(AI), diag::err_attribute_argument_out_of_bounds)
+        << getAttrName(AI) << AttrArgNum << IdxExpr->getSourceRange();
     return false;
   }
-  Idx--; // Convert to zero-based.
-  if (HasImplicitThisParam && !AllowImplicitThis) {
-    if (Idx == 0) {
-      S.Diag(getAttrLoc(Attr),
+  if (HasImplicitThisParam && !CanIndexImplicitThis) {
+    if (IdxSource == 1) {
+      S.Diag(getAttrLoc(AI),
              diag::err_attribute_invalid_implicit_this_argument)
-        << getAttrName(Attr) << IdxExpr->getSourceRange();
+          << getAttrName(AI) << IdxExpr->getSourceRange();
       return false;
     }
-    --Idx;
   }
 
   return true;
@@ -358,14 +354,14 @@
 /// If not emit an error and return false. If the argument is an identifier it
 /// will emit an error with a fixit hint and treat it as if it was a string
 /// literal.
-bool Sema::checkStringLiteralArgumentAttr(const AttributeList &Attr,
+bool Sema::checkStringLiteralArgumentAttr(const AttributeList &AL,
                                           unsigned ArgNum, StringRef &Str,
                                           SourceLocation *ArgLocation) {
   // Look for identifiers. If we have one emit a hint to fix it to a literal.
-  if (Attr.isArgIdent(ArgNum)) {
-    IdentifierLoc *Loc = Attr.getArgAsIdent(ArgNum);
+  if (AL.isArgIdent(ArgNum)) {
+    IdentifierLoc *Loc = AL.getArgAsIdent(ArgNum);
     Diag(Loc->Loc, diag::err_attribute_argument_type)
-        << Attr.getName() << AANT_ArgumentString
+        << AL.getName() << AANT_ArgumentString
         << FixItHint::CreateInsertion(Loc->Loc, "\"")
         << FixItHint::CreateInsertion(getLocForEndOfToken(Loc->Loc), "\"");
     Str = Loc->Ident->getName();
@@ -375,14 +371,14 @@
   }
 
   // Now check for an actual string literal.
-  Expr *ArgExpr = Attr.getArgAsExpr(ArgNum);
-  StringLiteral *Literal = dyn_cast<StringLiteral>(ArgExpr->IgnoreParenCasts());
+  Expr *ArgExpr = AL.getArgAsExpr(ArgNum);
+  const auto *Literal = dyn_cast<StringLiteral>(ArgExpr->IgnoreParenCasts());
   if (ArgLocation)
     *ArgLocation = ArgExpr->getLocStart();
 
   if (!Literal || !Literal->isAscii()) {
     Diag(ArgExpr->getLocStart(), diag::err_attribute_argument_type)
-        << Attr.getName() << AANT_ArgumentString;
+        << AL.getName() << AANT_ArgumentString;
     return false;
   }
 
@@ -393,16 +389,15 @@
 /// \brief Applies the given attribute to the Decl without performing any
 /// additional semantic checking.
 template <typename AttrType>
-static void handleSimpleAttribute(Sema &S, Decl *D,
-                                  const AttributeList &Attr) {
-  D->addAttr(::new (S.Context) AttrType(Attr.getRange(), S.Context,
-                                        Attr.getAttributeSpellingListIndex()));
+static void handleSimpleAttribute(Sema &S, Decl *D, const AttributeList &AL) {
+  D->addAttr(::new (S.Context) AttrType(AL.getRange(), S.Context,
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 template <typename AttrType>
 static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D,
-                                                const AttributeList &Attr) {
-  handleSimpleAttribute<AttrType>(S, D, Attr);
+                                                const AttributeList &AL) {
+  handleSimpleAttribute<AttrType>(S, D, AL);
 }
 
 /// \brief Applies the given attribute to the Decl so long as the Decl doesn't
@@ -410,12 +405,12 @@
 template <typename AttrType, typename IncompatibleAttrType,
           typename... IncompatibleAttrTypes>
 static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D,
-                                                const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<IncompatibleAttrType>(S, D, Attr.getRange(),
-                                                     Attr.getName()))
+                                                const AttributeList &AL) {
+  if (checkAttrMutualExclusion<IncompatibleAttrType>(S, D, AL.getRange(),
+                                                     AL.getName()))
     return;
   handleSimpleAttributeWithExclusions<AttrType, IncompatibleAttrTypes...>(S, D,
-                                                                          Attr);
+                                                                          AL);
 }
 
 /// \brief Check if the passed-in expression is of type int or bool.
@@ -445,13 +440,13 @@
 /// Note that this function may produce an error message.
 /// \return true if the Decl is a pointer type; false otherwise
 static bool threadSafetyCheckIsPointer(Sema &S, const Decl *D,
-                                       const AttributeList &Attr) {
-  const ValueDecl *vd = cast<ValueDecl>(D);
-  QualType QT = vd->getType();
+                                       const AttributeList &AL) {
+  const auto *VD = cast<ValueDecl>(D);
+  QualType QT = VD->getType();
   if (QT->isAnyPointerType())
     return true;
 
-  if (const RecordType *RT = QT->getAs<RecordType>()) {
+  if (const auto *RT = QT->getAs<RecordType>()) {
     // If it's an incomplete type, it could be a smart pointer; skip it.
     // (We don't want to force template instantiation if we can avoid it,
     // since that would alter the order in which templates are instantiated.)
@@ -462,19 +457,19 @@
       return true;
   }
 
-  S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_pointer)
-    << Attr.getName() << QT;
+  S.Diag(AL.getLoc(), diag::warn_thread_attribute_decl_not_pointer)
+      << AL.getName() << QT;
   return false;
 }
 
 /// \brief Checks that the passed in QualType either is of RecordType or points
 /// to RecordType. Returns the relevant RecordType, null if it does not exit.
 static const RecordType *getRecordType(QualType QT) {
-  if (const RecordType *RT = QT->getAs<RecordType>())
+  if (const auto *RT = QT->getAs<RecordType>())
     return RT;
 
   // Now check if we point to record type.
-  if (const PointerType *PT = QT->getAs<PointerType>())
+  if (const auto *PT = QT->getAs<PointerType>())
     return PT->getPointeeType()->getAs<RecordType>();
 
   return nullptr;
@@ -501,7 +496,7 @@
     return true;
 
   // Else check if any base classes have a capability.
-  if (CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
+  if (const auto *CRD = dyn_cast<CXXRecordDecl>(RD)) {
     CXXBasePaths BPaths(false, false);
     if (CRD->lookupInBases([](const CXXBaseSpecifier *BS, CXXBasePath &) {
           const auto *Type = BS->getType()->getAs<RecordType>();
@@ -540,14 +535,13 @@
   // a DeclRefExpr is found, its type should be checked to determine whether it
   // is a capability or not.
 
-  if (const auto *E = dyn_cast<DeclRefExpr>(Ex))
-    return typeHasCapability(S, E->getType());
-  else if (const auto *E = dyn_cast<CastExpr>(Ex))
+  if (const auto *E = dyn_cast<CastExpr>(Ex))
     return isCapabilityExpr(S, E->getSubExpr());
   else if (const auto *E = dyn_cast<ParenExpr>(Ex))
     return isCapabilityExpr(S, E->getSubExpr());
   else if (const auto *E = dyn_cast<UnaryOperator>(Ex)) {
-    if (E->getOpcode() == UO_LNot)
+    if (E->getOpcode() == UO_LNot || E->getOpcode() == UO_AddrOf ||
+        E->getOpcode() == UO_Deref)
       return isCapabilityExpr(S, E->getSubExpr());
     return false;
   } else if (const auto *E = dyn_cast<BinaryOperator>(Ex)) {
@@ -557,7 +551,7 @@
     return false;
   }
 
-  return false;
+  return typeHasCapability(S, Ex->getType());
 }
 
 /// \brief Checks that all attribute arguments, starting from Sidx, resolve to
@@ -566,12 +560,12 @@
 /// \param ParamIdxOk Whether an argument can be indexing into a function
 /// parameter list.
 static void checkAttrArgsAreCapabilityObjs(Sema &S, Decl *D,
-                                           const AttributeList &Attr,
+                                           const AttributeList &AL,
                                            SmallVectorImpl<Expr *> &Args,
                                            int Sidx = 0,
                                            bool ParamIdxOk = false) {
-  for (unsigned Idx = Sidx; Idx < Attr.getNumArgs(); ++Idx) {
-    Expr *ArgExp = Attr.getArgAsExpr(Idx);
+  for (unsigned Idx = Sidx; Idx < AL.getNumArgs(); ++Idx) {
+    Expr *ArgExp = AL.getArgAsExpr(Idx);
 
     if (ArgExp->isTypeDependent()) {
       // FIXME -- need to check this again on template instantiation
@@ -579,7 +573,7 @@
       continue;
     }
 
-    if (StringLiteral *StrLit = dyn_cast<StringLiteral>(ArgExp)) {
+    if (const auto *StrLit = dyn_cast<StringLiteral>(ArgExp)) {
       if (StrLit->getLength() == 0 ||
           (StrLit->isAscii() && StrLit->getString() == StringRef("*"))) {
         // Pass empty strings to the analyzer without warnings.
@@ -590,8 +584,7 @@
 
       // We allow constant strings to be used as a placeholder for expressions
       // that are not valid C++ syntax, but warn that they are ignored.
-      S.Diag(Attr.getLoc(), diag::warn_thread_attribute_ignored) <<
-        Attr.getName();
+      S.Diag(AL.getLoc(), diag::warn_thread_attribute_ignored) << AL.getName();
       Args.push_back(ArgExp);
       continue;
     }
@@ -600,9 +593,9 @@
 
     // A pointer to member expression of the form  &MyClass::mu is treated
     // specially -- we need to look at the type of the member.
-    if (UnaryOperator *UOp = dyn_cast<UnaryOperator>(ArgExp))
+    if (const auto *UOp = dyn_cast<UnaryOperator>(ArgExp))
       if (UOp->getOpcode() == UO_AddrOf)
-        if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(UOp->getSubExpr()))
+        if (const auto *DRE = dyn_cast<DeclRefExpr>(UOp->getSubExpr()))
           if (DRE->getDecl()->isCXXInstanceMember())
             ArgTy = DRE->getDecl()->getType();
 
@@ -611,16 +604,16 @@
 
     // Now check if we index into a record type function param.
     if(!RT && ParamIdxOk) {
-      FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
-      IntegerLiteral *IL = dyn_cast<IntegerLiteral>(ArgExp);
+      const auto *FD = dyn_cast<FunctionDecl>(D);
+      const auto *IL = dyn_cast<IntegerLiteral>(ArgExp);
       if(FD && IL) {
         unsigned int NumParams = FD->getNumParams();
         llvm::APInt ArgValue = IL->getValue();
         uint64_t ParamIdxFromOne = ArgValue.getZExtValue();
         uint64_t ParamIdxFromZero = ParamIdxFromOne - 1;
-        if(!ArgValue.isStrictlyPositive() || ParamIdxFromOne > NumParams) {
-          S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_range)
-            << Attr.getName() << Idx + 1 << NumParams;
+        if (!ArgValue.isStrictlyPositive() || ParamIdxFromOne > NumParams) {
+          S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_range)
+              << AL.getName() << Idx + 1 << NumParams;
           continue;
         }
         ArgTy = FD->getParamDecl(ParamIdxFromZero)->getType();
@@ -632,8 +625,8 @@
     // capability may be on the type, and the expression is a capability
     // boolean logic expression. Eg) requires_capability(A || B && !C)
     if (!typeHasCapability(S, ArgTy) && !isCapabilityExpr(S, ArgExp))
-      S.Diag(Attr.getLoc(), diag::warn_thread_attribute_argument_not_lockable)
-          << Attr.getName() << ArgTy;
+      S.Diag(AL.getLoc(), diag::warn_thread_attribute_argument_not_lockable)
+          << AL.getName() << ArgTy;
 
     Args.push_back(ArgExp);
   }
@@ -644,21 +637,20 @@
 //===----------------------------------------------------------------------===//
 
 static void handlePtGuardedVarAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  if (!threadSafetyCheckIsPointer(S, D, Attr))
+                                   const AttributeList &AL) {
+  if (!threadSafetyCheckIsPointer(S, D, AL))
     return;
 
   D->addAttr(::new (S.Context)
-             PtGuardedVarAttr(Attr.getRange(), S.Context,
-                              Attr.getAttributeSpellingListIndex()));
+             PtGuardedVarAttr(AL.getRange(), S.Context,
+                              AL.getAttributeSpellingListIndex()));
 }
 
-static bool checkGuardedByAttrCommon(Sema &S, Decl *D,
-                                     const AttributeList &Attr,
-                                     Expr* &Arg) {
-  SmallVector<Expr*, 1> Args;
+static bool checkGuardedByAttrCommon(Sema &S, Decl *D, const AttributeList &AL,
+                                     Expr *&Arg) {
+  SmallVector<Expr *, 1> Args;
   // check that all arguments are lockable objects
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args);
   unsigned Size = Args.size();
   if (Size != 1)
     return false;
@@ -668,45 +660,44 @@
   return true;
 }
 
-static void handleGuardedByAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleGuardedByAttr(Sema &S, Decl *D, const AttributeList &AL) {
   Expr *Arg = nullptr;
-  if (!checkGuardedByAttrCommon(S, D, Attr, Arg))
+  if (!checkGuardedByAttrCommon(S, D, AL, Arg))
     return;
 
-  D->addAttr(::new (S.Context) GuardedByAttr(Attr.getRange(), S.Context, Arg,
-                                        Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) GuardedByAttr(
+      AL.getRange(), S.Context, Arg, AL.getAttributeSpellingListIndex()));
 }
 
 static void handlePtGuardedByAttr(Sema &S, Decl *D,
-                                  const AttributeList &Attr) {
+                                  const AttributeList &AL) {
   Expr *Arg = nullptr;
-  if (!checkGuardedByAttrCommon(S, D, Attr, Arg))
+  if (!checkGuardedByAttrCommon(S, D, AL, Arg))
     return;
 
-  if (!threadSafetyCheckIsPointer(S, D, Attr))
+  if (!threadSafetyCheckIsPointer(S, D, AL))
     return;
 
-  D->addAttr(::new (S.Context) PtGuardedByAttr(Attr.getRange(),
-                                               S.Context, Arg,
-                                        Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) PtGuardedByAttr(
+      AL.getRange(), S.Context, Arg, AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkAcquireOrderAttrCommon(Sema &S, Decl *D,
-                                        const AttributeList &Attr,
+                                        const AttributeList &AL,
                                         SmallVectorImpl<Expr *> &Args) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return false;
 
   // Check that this attribute only applies to lockable types.
   QualType QT = cast<ValueDecl>(D)->getType();
   if (!QT->isDependentType() && !typeHasCapability(S, QT)) {
-    S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_lockable)
-      << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_thread_attribute_decl_not_lockable)
+        << AL.getName();
     return false;
   }
 
   // Check that all arguments are lockable objects.
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args);
   if (Args.empty())
     return false;
 
@@ -714,227 +705,206 @@
 }
 
 static void handleAcquiredAfterAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  SmallVector<Expr*, 1> Args;
-  if (!checkAcquireOrderAttrCommon(S, D, Attr, Args))
+                                    const AttributeList &AL) {
+  SmallVector<Expr *, 1> Args;
+  if (!checkAcquireOrderAttrCommon(S, D, AL, Args))
     return;
 
   Expr **StartArg = &Args[0];
-  D->addAttr(::new (S.Context)
-             AcquiredAfterAttr(Attr.getRange(), S.Context,
-                               StartArg, Args.size(),
-                               Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) AcquiredAfterAttr(
+      AL.getRange(), S.Context, StartArg, Args.size(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAcquiredBeforeAttr(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
-  SmallVector<Expr*, 1> Args;
-  if (!checkAcquireOrderAttrCommon(S, D, Attr, Args))
+                                     const AttributeList &AL) {
+  SmallVector<Expr *, 1> Args;
+  if (!checkAcquireOrderAttrCommon(S, D, AL, Args))
     return;
 
   Expr **StartArg = &Args[0];
-  D->addAttr(::new (S.Context)
-             AcquiredBeforeAttr(Attr.getRange(), S.Context,
-                                StartArg, Args.size(),
-                                Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) AcquiredBeforeAttr(
+      AL.getRange(), S.Context, StartArg, Args.size(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkLockFunAttrCommon(Sema &S, Decl *D,
-                                   const AttributeList &Attr,
+                                   const AttributeList &AL,
                                    SmallVectorImpl<Expr *> &Args) {
   // zero or more arguments ok
   // check that all arguments are lockable objects
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args, 0, /*ParamIdxOk=*/true);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args, 0, /*ParamIdxOk=*/true);
 
   return true;
 }
 
 static void handleAssertSharedLockAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
-  SmallVector<Expr*, 1> Args;
-  if (!checkLockFunAttrCommon(S, D, Attr, Args))
+                                       const AttributeList &AL) {
+  SmallVector<Expr *, 1> Args;
+  if (!checkLockFunAttrCommon(S, D, AL, Args))
     return;
 
   unsigned Size = Args.size();
   Expr **StartArg = Size == 0 ? nullptr : &Args[0];
   D->addAttr(::new (S.Context)
-             AssertSharedLockAttr(Attr.getRange(), S.Context, StartArg, Size,
-                                  Attr.getAttributeSpellingListIndex()));
+                 AssertSharedLockAttr(AL.getRange(), S.Context, StartArg, Size,
+                                      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAssertExclusiveLockAttr(Sema &S, Decl *D,
-                                          const AttributeList &Attr) {
-  SmallVector<Expr*, 1> Args;
-  if (!checkLockFunAttrCommon(S, D, Attr, Args))
+                                          const AttributeList &AL) {
+  SmallVector<Expr *, 1> Args;
+  if (!checkLockFunAttrCommon(S, D, AL, Args))
     return;
 
   unsigned Size = Args.size();
   Expr **StartArg = Size == 0 ? nullptr : &Args[0];
-  D->addAttr(::new (S.Context)
-             AssertExclusiveLockAttr(Attr.getRange(), S.Context,
-                                     StartArg, Size,
-                                     Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) AssertExclusiveLockAttr(
+      AL.getRange(), S.Context, StartArg, Size,
+      AL.getAttributeSpellingListIndex()));
 }
 
-/// \brief Checks to be sure that the given parameter number is in bounds, and is
-/// an integral type. Will emit appropriate diagnostics if this returns
+/// \brief Checks to be sure that the given parameter number is in bounds, and
+/// is an integral type. Will emit appropriate diagnostics if this returns
 /// false.
 ///
-/// FuncParamNo is expected to be from the user, so is base-1. AttrArgNo is used
-/// to actually retrieve the argument, so it's base-0.
+/// AttrArgNo is used to actually retrieve the argument, so it's base-0.
 template <typename AttrInfo>
 static bool checkParamIsIntegerType(Sema &S, const FunctionDecl *FD,
-                                    const AttrInfo &Attr, Expr *AttrArg,
-                                    unsigned FuncParamNo, unsigned AttrArgNo,
-                                    bool AllowDependentType = false) {
-  uint64_t Idx;
-  if (!checkFunctionOrMethodParameterIndex(S, FD, Attr, FuncParamNo, AttrArg,
+                                    const AttrInfo &AI, unsigned AttrArgNo) {
+  assert(AI.isArgExpr(AttrArgNo) && "Expected expression argument");
+  Expr *AttrArg = AI.getArgAsExpr(AttrArgNo);
+  ParamIdx Idx;
+  if (!checkFunctionOrMethodParameterIndex(S, FD, AI, AttrArgNo + 1, AttrArg,
                                            Idx))
     return false;
 
-  const ParmVarDecl *Param = FD->getParamDecl(Idx);
-  if (AllowDependentType && Param->getType()->isDependentType())
-    return true;
+  const ParmVarDecl *Param = FD->getParamDecl(Idx.getASTIndex());
   if (!Param->getType()->isIntegerType() && !Param->getType()->isCharType()) {
     SourceLocation SrcLoc = AttrArg->getLocStart();
     S.Diag(SrcLoc, diag::err_attribute_integers_only)
-        << getAttrName(Attr) << Param->getSourceRange();
+        << getAttrName(AI) << Param->getSourceRange();
     return false;
   }
   return true;
 }
 
-/// \brief Checks to be sure that the given parameter number is in bounds, and is
-/// an integral type. Will emit appropriate diagnostics if this returns false.
-///
-/// FuncParamNo is expected to be from the user, so is base-1. AttrArgNo is used
-/// to actually retrieve the argument, so it's base-0.
-static bool checkParamIsIntegerType(Sema &S, const FunctionDecl *FD,
-                                    const AttributeList &Attr,
-                                    unsigned FuncParamNo, unsigned AttrArgNo,
-                                    bool AllowDependentType = false) {
-  assert(Attr.isArgExpr(AttrArgNo) && "Expected expression argument");
-  return checkParamIsIntegerType(S, FD, Attr, Attr.getArgAsExpr(AttrArgNo),
-                                 FuncParamNo, AttrArgNo, AllowDependentType);
-}
-
-static void handleAllocSizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
-      !checkAttributeAtMostNumArgs(S, Attr, 2))
+static void handleAllocSizeAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1) ||
+      !checkAttributeAtMostNumArgs(S, AL, 2))
     return;
 
   const auto *FD = cast<FunctionDecl>(D);
   if (!FD->getReturnType()->isPointerType()) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_return_pointers_only)
-        << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_attribute_return_pointers_only)
+        << AL.getName();
     return;
   }
 
-  const Expr *SizeExpr = Attr.getArgAsExpr(0);
-  int SizeArgNo;
+  const Expr *SizeExpr = AL.getArgAsExpr(0);
+  int SizeArgNoVal;
   // Parameter indices are 1-indexed, hence Index=1
-  if (!checkPositiveIntArgument(S, Attr, SizeExpr, SizeArgNo, /*Index=*/1))
+  if (!checkPositiveIntArgument(S, AL, SizeExpr, SizeArgNoVal, /*Index=*/1))
+    return;
+  ParamIdx SizeArgNo(SizeArgNoVal, D);
+
+  if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/0))
     return;
 
-  if (!checkParamIsIntegerType(S, FD, Attr, SizeArgNo, /*AttrArgNo=*/0))
-    return;
-
-  // Args are 1-indexed, so 0 implies that the arg was not present
-  int NumberArgNo = 0;
-  if (Attr.getNumArgs() == 2) {
-    const Expr *NumberExpr = Attr.getArgAsExpr(1);
+  ParamIdx NumberArgNo;
+  if (AL.getNumArgs() == 2) {
+    const Expr *NumberExpr = AL.getArgAsExpr(1);
+    int Val;
     // Parameter indices are 1-based, hence Index=2
-    if (!checkPositiveIntArgument(S, Attr, NumberExpr, NumberArgNo,
-                                  /*Index=*/2))
+    if (!checkPositiveIntArgument(S, AL, NumberExpr, Val, /*Index=*/2))
       return;
+    NumberArgNo = ParamIdx(Val, D);
 
-    if (!checkParamIsIntegerType(S, FD, Attr, NumberArgNo, /*AttrArgNo=*/1))
+    if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/1))
       return;
   }
 
-  D->addAttr(::new (S.Context) AllocSizeAttr(
-      Attr.getRange(), S.Context, SizeArgNo, NumberArgNo,
-      Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context)
+                 AllocSizeAttr(AL.getRange(), S.Context, SizeArgNo, NumberArgNo,
+                               AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkTryLockFunAttrCommon(Sema &S, Decl *D,
-                                      const AttributeList &Attr,
+                                      const AttributeList &AL,
                                       SmallVectorImpl<Expr *> &Args) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return false;
 
-  if (!isIntOrBool(Attr.getArgAsExpr(0))) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << 1 << AANT_ArgumentIntOrBool;
+  if (!isIntOrBool(AL.getArgAsExpr(0))) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+      << AL.getName() << 1 << AANT_ArgumentIntOrBool;
     return false;
   }
 
   // check that all arguments are lockable objects
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args, 1);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args, 1);
 
   return true;
 }
 
 static void handleSharedTrylockFunctionAttr(Sema &S, Decl *D,
-                                            const AttributeList &Attr) {
+                                            const AttributeList &AL) {
   SmallVector<Expr*, 2> Args;
-  if (!checkTryLockFunAttrCommon(S, D, Attr, Args))
+  if (!checkTryLockFunAttrCommon(S, D, AL, Args))
     return;
 
-  D->addAttr(::new (S.Context)
-             SharedTrylockFunctionAttr(Attr.getRange(), S.Context,
-                                       Attr.getArgAsExpr(0),
-                                       Args.data(), Args.size(),
-                                       Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) SharedTrylockFunctionAttr(
+      AL.getRange(), S.Context, AL.getArgAsExpr(0), Args.data(), Args.size(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleExclusiveTrylockFunctionAttr(Sema &S, Decl *D,
-                                               const AttributeList &Attr) {
+                                               const AttributeList &AL) {
   SmallVector<Expr*, 2> Args;
-  if (!checkTryLockFunAttrCommon(S, D, Attr, Args))
+  if (!checkTryLockFunAttrCommon(S, D, AL, Args))
     return;
 
   D->addAttr(::new (S.Context) ExclusiveTrylockFunctionAttr(
-      Attr.getRange(), S.Context, Attr.getArgAsExpr(0), Args.data(),
-      Args.size(), Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getArgAsExpr(0), Args.data(),
+      Args.size(), AL.getAttributeSpellingListIndex()));
 }
 
 static void handleLockReturnedAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
+                                   const AttributeList &AL) {
   // check that the argument is lockable object
   SmallVector<Expr*, 1> Args;
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args);
   unsigned Size = Args.size();
   if (Size == 0)
     return;
 
   D->addAttr(::new (S.Context)
-             LockReturnedAttr(Attr.getRange(), S.Context, Args[0],
-                              Attr.getAttributeSpellingListIndex()));
+             LockReturnedAttr(AL.getRange(), S.Context, Args[0],
+                              AL.getAttributeSpellingListIndex()));
 }
 
 static void handleLocksExcludedAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+                                    const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
 
   // check that all arguments are lockable objects
   SmallVector<Expr*, 1> Args;
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args);
   unsigned Size = Args.size();
   if (Size == 0)
     return;
   Expr **StartArg = &Args[0];
 
   D->addAttr(::new (S.Context)
-             LocksExcludedAttr(Attr.getRange(), S.Context, StartArg, Size,
-                               Attr.getAttributeSpellingListIndex()));
+             LocksExcludedAttr(AL.getRange(), S.Context, StartArg, Size,
+                               AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkFunctionConditionAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr,
+                                       const AttributeList &AL,
                                        Expr *&Cond, StringRef &Msg) {
-  Cond = Attr.getArgAsExpr(0);
+  Cond = AL.getArgAsExpr(0);
   if (!Cond->isTypeDependent()) {
     ExprResult Converted = S.PerformContextuallyConvertToBool(Cond);
     if (Converted.isInvalid())
@@ -942,7 +912,7 @@
     Cond = Converted.get();
   }
 
-  if (!S.checkStringLiteralArgumentAttr(Attr, 1, Msg))
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, Msg))
     return false;
 
   if (Msg.empty())
@@ -952,8 +922,8 @@
   if (isa<FunctionDecl>(D) && !Cond->isValueDependent() &&
       !Expr::isPotentialConstantExprUnevaluated(Cond, cast<FunctionDecl>(D),
                                                 Diags)) {
-    S.Diag(Attr.getLoc(), diag::err_attr_cond_never_constant_expr)
-        << Attr.getName();
+    S.Diag(AL.getLoc(), diag::err_attr_cond_never_constant_expr)
+        << AL.getName();
     for (const PartialDiagnosticAt &PDiag : Diags)
       S.Diag(PDiag.first, PDiag.second);
     return false;
@@ -961,15 +931,15 @@
   return true;
 }
 
-static void handleEnableIfAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  S.Diag(Attr.getLoc(), diag::ext_clang_enable_if);
+static void handleEnableIfAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  S.Diag(AL.getLoc(), diag::ext_clang_enable_if);
 
   Expr *Cond;
   StringRef Msg;
-  if (checkFunctionConditionAttr(S, D, Attr, Cond, Msg))
+  if (checkFunctionConditionAttr(S, D, AL, Cond, Msg))
     D->addAttr(::new (S.Context)
-                   EnableIfAttr(Attr.getRange(), S.Context, Cond, Msg,
-                                Attr.getAttributeSpellingListIndex()));
+                   EnableIfAttr(AL.getRange(), S.Context, Cond, Msg,
+                                AL.getAttributeSpellingListIndex()));
 }
 
 namespace {
@@ -1018,21 +988,21 @@
 };
 }
 
-static void handleDiagnoseIfAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  S.Diag(Attr.getLoc(), diag::ext_clang_diagnose_if);
+static void handleDiagnoseIfAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  S.Diag(AL.getLoc(), diag::ext_clang_diagnose_if);
 
   Expr *Cond;
   StringRef Msg;
-  if (!checkFunctionConditionAttr(S, D, Attr, Cond, Msg))
+  if (!checkFunctionConditionAttr(S, D, AL, Cond, Msg))
     return;
 
   StringRef DiagTypeStr;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 2, DiagTypeStr))
+  if (!S.checkStringLiteralArgumentAttr(AL, 2, DiagTypeStr))
     return;
 
   DiagnoseIfAttr::DiagnosticType DiagType;
   if (!DiagnoseIfAttr::ConvertStrToDiagnosticType(DiagTypeStr, DiagType)) {
-    S.Diag(Attr.getArgAsExpr(2)->getLocStart(),
+    S.Diag(AL.getArgAsExpr(2)->getLocStart(),
            diag::err_diagnose_if_invalid_diagnostic_type);
     return;
   }
@@ -1041,21 +1011,21 @@
   if (const auto *FD = dyn_cast<FunctionDecl>(D))
     ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond);
   D->addAttr(::new (S.Context) DiagnoseIfAttr(
-      Attr.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent, cast<NamedDecl>(D),
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent,
+      cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
 }
 
 static void handlePassObjectSizeAttr(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
+                                     const AttributeList &AL) {
   if (D->hasAttr<PassObjectSizeAttr>()) {
     S.Diag(D->getLocStart(), diag::err_attribute_only_once_per_parameter)
-        << Attr.getName();
+        << AL.getName();
     return;
   }
 
-  Expr *E = Attr.getArgAsExpr(0);
+  Expr *E = AL.getArgAsExpr(0);
   uint32_t Type;
-  if (!checkUInt32Argument(S, Attr, E, Type, /*Idx=*/1))
+  if (!checkUInt32Argument(S, AL, E, Type, /*Idx=*/1))
     return;
 
   // pass_object_size's argument is passed in as the second argument of
@@ -1063,7 +1033,7 @@
   // argument; namely, it must be in the range [0, 3].
   if (Type > 3) {
     S.Diag(E->getLocStart(), diag::err_attribute_argument_outof_range)
-        << Attr.getName() << 0 << 3 << E->getSourceRange();
+        << AL.getName() << 0 << 3 << E->getSourceRange();
     return;
   }
 
@@ -1073,45 +1043,44 @@
   // definition, so we defer the constness check until later.
   if (!cast<ParmVarDecl>(D)->getType()->isPointerType()) {
     S.Diag(D->getLocStart(), diag::err_attribute_pointers_only)
-        << Attr.getName() << 1;
+        << AL.getName() << 1;
     return;
   }
 
-  D->addAttr(::new (S.Context)
-                 PassObjectSizeAttr(Attr.getRange(), S.Context, (int)Type,
-                                    Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) PassObjectSizeAttr(
+      AL.getRange(), S.Context, (int)Type, AL.getAttributeSpellingListIndex()));
 }
 
-static void handleConsumableAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleConsumableAttr(Sema &S, Decl *D, const AttributeList &AL) {
   ConsumableAttr::ConsumedState DefaultState;
 
-  if (Attr.isArgIdent(0)) {
-    IdentifierLoc *IL = Attr.getArgAsIdent(0);
+  if (AL.isArgIdent(0)) {
+    IdentifierLoc *IL = AL.getArgAsIdent(0);
     if (!ConsumableAttr::ConvertStrToConsumedState(IL->Ident->getName(),
                                                    DefaultState)) {
       S.Diag(IL->Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << IL->Ident;
+        << AL.getName() << IL->Ident;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
-        << Attr.getName() << AANT_ArgumentIdentifier;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type)
+        << AL.getName() << AANT_ArgumentIdentifier;
     return;
   }
   
   D->addAttr(::new (S.Context)
-             ConsumableAttr(Attr.getRange(), S.Context, DefaultState,
-                            Attr.getAttributeSpellingListIndex()));
+             ConsumableAttr(AL.getRange(), S.Context, DefaultState,
+                            AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkForConsumableClass(Sema &S, const CXXMethodDecl *MD,
-                                        const AttributeList &Attr) {
+                                        const AttributeList &AL) {
   ASTContext &CurrContext = S.getASTContext();
   QualType ThisType = MD->getThisType(CurrContext)->getPointeeType();
   
   if (const CXXRecordDecl *RD = ThisType->getAsCXXRecordDecl()) {
     if (!RD->hasAttr<ConsumableAttr>()) {
-      S.Diag(Attr.getLoc(), diag::warn_attr_on_unconsumable_class) <<
+      S.Diag(AL.getLoc(), diag::warn_attr_on_unconsumable_class) <<
         RD->getNameAsString();
       
       return false;
@@ -1122,32 +1091,32 @@
 }
 
 static void handleCallableWhenAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+                                   const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
   
-  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), Attr))
+  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), AL))
     return;
   
   SmallVector<CallableWhenAttr::ConsumedState, 3> States;
-  for (unsigned ArgIndex = 0; ArgIndex < Attr.getNumArgs(); ++ArgIndex) {
+  for (unsigned ArgIndex = 0; ArgIndex < AL.getNumArgs(); ++ArgIndex) {
     CallableWhenAttr::ConsumedState CallableState;
     
     StringRef StateString;
     SourceLocation Loc;
-    if (Attr.isArgIdent(ArgIndex)) {
-      IdentifierLoc *Ident = Attr.getArgAsIdent(ArgIndex);
+    if (AL.isArgIdent(ArgIndex)) {
+      IdentifierLoc *Ident = AL.getArgAsIdent(ArgIndex);
       StateString = Ident->Ident->getName();
       Loc = Ident->Loc;
     } else {
-      if (!S.checkStringLiteralArgumentAttr(Attr, ArgIndex, StateString, &Loc))
+      if (!S.checkStringLiteralArgumentAttr(AL, ArgIndex, StateString, &Loc))
         return;
     }
 
     if (!CallableWhenAttr::ConvertStrToConsumedState(StateString,
                                                      CallableState)) {
       S.Diag(Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << StateString;
+        << AL.getName() << StateString;
       return;
     }
       
@@ -1155,27 +1124,27 @@
   }
   
   D->addAttr(::new (S.Context)
-             CallableWhenAttr(Attr.getRange(), S.Context, States.data(),
-               States.size(), Attr.getAttributeSpellingListIndex()));
+             CallableWhenAttr(AL.getRange(), S.Context, States.data(),
+               States.size(), AL.getAttributeSpellingListIndex()));
 }
 
 static void handleParamTypestateAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
+                                    const AttributeList &AL) {
   ParamTypestateAttr::ConsumedState ParamState;
   
-  if (Attr.isArgIdent(0)) {
-    IdentifierLoc *Ident = Attr.getArgAsIdent(0);
+  if (AL.isArgIdent(0)) {
+    IdentifierLoc *Ident = AL.getArgAsIdent(0);
     StringRef StateString = Ident->Ident->getName();
 
     if (!ParamTypestateAttr::ConvertStrToConsumedState(StateString,
                                                        ParamState)) {
       S.Diag(Ident->Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << StateString;
+        << AL.getName() << StateString;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) <<
-      Attr.getName() << AANT_ArgumentIdentifier;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) <<
+      AL.getName() << AANT_ArgumentIdentifier;
     return;
   }
   
@@ -1186,31 +1155,31 @@
   //const CXXRecordDecl *RD = ReturnType->getAsCXXRecordDecl();
   //
   //if (!RD || !RD->hasAttr<ConsumableAttr>()) {
-  //    S.Diag(Attr.getLoc(), diag::warn_return_state_for_unconsumable_type) <<
+  //    S.Diag(AL.getLoc(), diag::warn_return_state_for_unconsumable_type) <<
   //      ReturnType.getAsString();
   //    return;
   //}
   
   D->addAttr(::new (S.Context)
-             ParamTypestateAttr(Attr.getRange(), S.Context, ParamState,
-                                Attr.getAttributeSpellingListIndex()));
+             ParamTypestateAttr(AL.getRange(), S.Context, ParamState,
+                                AL.getAttributeSpellingListIndex()));
 }
 
 static void handleReturnTypestateAttr(Sema &S, Decl *D,
-                                      const AttributeList &Attr) {
+                                      const AttributeList &AL) {
   ReturnTypestateAttr::ConsumedState ReturnState;
   
-  if (Attr.isArgIdent(0)) {
-    IdentifierLoc *IL = Attr.getArgAsIdent(0);
+  if (AL.isArgIdent(0)) {
+    IdentifierLoc *IL = AL.getArgAsIdent(0);
     if (!ReturnTypestateAttr::ConvertStrToConsumedState(IL->Ident->getName(),
                                                         ReturnState)) {
       S.Diag(IL->Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << IL->Ident;
+        << AL.getName() << IL->Ident;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) <<
-      Attr.getName() << AANT_ArgumentIdentifier;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) <<
+      AL.getName() << AANT_ArgumentIdentifier;
     return;
   }
   
@@ -1238,72 +1207,71 @@
   //      ReturnType.getAsString();
   //    return;
   //}
-  
+
   D->addAttr(::new (S.Context)
-             ReturnTypestateAttr(Attr.getRange(), S.Context, ReturnState,
-                                 Attr.getAttributeSpellingListIndex()));
+                 ReturnTypestateAttr(AL.getRange(), S.Context, ReturnState,
+                                     AL.getAttributeSpellingListIndex()));
 }
 
-static void handleSetTypestateAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), Attr))
+static void handleSetTypestateAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), AL))
     return;
   
   SetTypestateAttr::ConsumedState NewState;
-  if (Attr.isArgIdent(0)) {
-    IdentifierLoc *Ident = Attr.getArgAsIdent(0);
+  if (AL.isArgIdent(0)) {
+    IdentifierLoc *Ident = AL.getArgAsIdent(0);
     StringRef Param = Ident->Ident->getName();
     if (!SetTypestateAttr::ConvertStrToConsumedState(Param, NewState)) {
       S.Diag(Ident->Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << Param;
+        << AL.getName() << Param;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) <<
-      Attr.getName() << AANT_ArgumentIdentifier;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) <<
+      AL.getName() << AANT_ArgumentIdentifier;
     return;
   }
   
   D->addAttr(::new (S.Context)
-             SetTypestateAttr(Attr.getRange(), S.Context, NewState,
-                              Attr.getAttributeSpellingListIndex()));
+             SetTypestateAttr(AL.getRange(), S.Context, NewState,
+                              AL.getAttributeSpellingListIndex()));
 }
 
 static void handleTestTypestateAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), Attr))
+                                    const AttributeList &AL) {
+  if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), AL))
     return;
   
   TestTypestateAttr::ConsumedState TestState;  
-  if (Attr.isArgIdent(0)) {
-    IdentifierLoc *Ident = Attr.getArgAsIdent(0);
+  if (AL.isArgIdent(0)) {
+    IdentifierLoc *Ident = AL.getArgAsIdent(0);
     StringRef Param = Ident->Ident->getName();
     if (!TestTypestateAttr::ConvertStrToConsumedState(Param, TestState)) {
       S.Diag(Ident->Loc, diag::warn_attribute_type_not_supported)
-        << Attr.getName() << Param;
+        << AL.getName() << Param;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) <<
-      Attr.getName() << AANT_ArgumentIdentifier;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) <<
+      AL.getName() << AANT_ArgumentIdentifier;
     return;
   }
   
   D->addAttr(::new (S.Context)
-             TestTypestateAttr(Attr.getRange(), S.Context, TestState,
-                                Attr.getAttributeSpellingListIndex()));
+             TestTypestateAttr(AL.getRange(), S.Context, TestState,
+                                AL.getAttributeSpellingListIndex()));
 }
 
-static void handleExtVectorTypeAttr(Sema &S, Scope *scope, Decl *D,
-                                    const AttributeList &Attr) {
+static void handleExtVectorTypeAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // Remember this typedef decl, we will need it later for diagnostics.
   S.ExtVectorDecls.push_back(cast<TypedefNameDecl>(D));
 }
 
-static void handlePackedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (TagDecl *TD = dyn_cast<TagDecl>(D))
-    TD->addAttr(::new (S.Context) PackedAttr(Attr.getRange(), S.Context,
-                                        Attr.getAttributeSpellingListIndex()));
-  else if (FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
+static void handlePackedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (auto *TD = dyn_cast<TagDecl>(D))
+    TD->addAttr(::new (S.Context) PackedAttr(AL.getRange(), S.Context,
+                                        AL.getAttributeSpellingListIndex()));
+  else if (auto *FD = dyn_cast<FieldDecl>(D)) {
     bool BitfieldByteAligned = (!FD->getType()->isDependentType() &&
                                 !FD->getType()->isIncompleteType() &&
                                 FD->isBitField() &&
@@ -1312,81 +1280,81 @@
     if (S.getASTContext().getTargetInfo().getTriple().isPS4()) {
       if (BitfieldByteAligned)
         // The PS4 target needs to maintain ABI backwards compatibility.
-        S.Diag(Attr.getLoc(), diag::warn_attribute_ignored_for_field_of_type)
-          << Attr.getName() << FD->getType();
+        S.Diag(AL.getLoc(), diag::warn_attribute_ignored_for_field_of_type)
+          << AL.getName() << FD->getType();
       else
         FD->addAttr(::new (S.Context) PackedAttr(
-                    Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+                    AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     } else {
       // Report warning about changed offset in the newer compiler versions.
       if (BitfieldByteAligned)
-        S.Diag(Attr.getLoc(), diag::warn_attribute_packed_for_bitfield);
+        S.Diag(AL.getLoc(), diag::warn_attribute_packed_for_bitfield);
 
       FD->addAttr(::new (S.Context) PackedAttr(
-                  Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+                  AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     }
 
   } else
-    S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL.getName();
 }
 
-static bool checkIBOutletCommon(Sema &S, Decl *D, const AttributeList &Attr) {
+static bool checkIBOutletCommon(Sema &S, Decl *D, const AttributeList &AL) {
   // The IBOutlet/IBOutletCollection attributes only apply to instance
   // variables or properties of Objective-C classes.  The outlet must also
   // have an object reference type.
-  if (const ObjCIvarDecl *VD = dyn_cast<ObjCIvarDecl>(D)) {
+  if (const auto *VD = dyn_cast<ObjCIvarDecl>(D)) {
     if (!VD->getType()->getAs<ObjCObjectPointerType>()) {
-      S.Diag(Attr.getLoc(), diag::warn_iboutlet_object_type)
-        << Attr.getName() << VD->getType() << 0;
+      S.Diag(AL.getLoc(), diag::warn_iboutlet_object_type)
+        << AL.getName() << VD->getType() << 0;
       return false;
     }
   }
-  else if (const ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(D)) {
+  else if (const auto *PD = dyn_cast<ObjCPropertyDecl>(D)) {
     if (!PD->getType()->getAs<ObjCObjectPointerType>()) {
-      S.Diag(Attr.getLoc(), diag::warn_iboutlet_object_type)
-        << Attr.getName() << PD->getType() << 1;
+      S.Diag(AL.getLoc(), diag::warn_iboutlet_object_type)
+        << AL.getName() << PD->getType() << 1;
       return false;
     }
   }
   else {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_iboutlet) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_attribute_iboutlet) << AL.getName();
     return false;
   }
 
   return true;
 }
 
-static void handleIBOutlet(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!checkIBOutletCommon(S, D, Attr))
+static void handleIBOutlet(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!checkIBOutletCommon(S, D, AL))
     return;
 
   D->addAttr(::new (S.Context)
-             IBOutletAttr(Attr.getRange(), S.Context,
-                          Attr.getAttributeSpellingListIndex()));
+             IBOutletAttr(AL.getRange(), S.Context,
+                          AL.getAttributeSpellingListIndex()));
 }
 
 static void handleIBOutletCollection(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
+                                     const AttributeList &AL) {
 
   // The iboutletcollection attribute can have zero or one arguments.
-  if (Attr.getNumArgs() > 1) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
-      << Attr.getName() << 1;
+  if (AL.getNumArgs() > 1) {
+    S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments)
+      << AL.getName() << 1;
     return;
   }
 
-  if (!checkIBOutletCommon(S, D, Attr))
+  if (!checkIBOutletCommon(S, D, AL))
     return;
 
   ParsedType PT;
 
-  if (Attr.hasParsedType())
-    PT = Attr.getTypeArg();
+  if (AL.hasParsedType())
+    PT = AL.getTypeArg();
   else {
-    PT = S.getTypeName(S.Context.Idents.get("NSObject"), Attr.getLoc(),
+    PT = S.getTypeName(S.Context.Idents.get("NSObject"), AL.getLoc(),
                        S.getScopeForContext(D->getDeclContext()->getParent()));
     if (!PT) {
-      S.Diag(Attr.getLoc(), diag::err_iboutletcollection_type) << "NSObject";
+      S.Diag(AL.getLoc(), diag::err_iboutletcollection_type) << "NSObject";
       return;
     }
   }
@@ -1394,22 +1362,22 @@
   TypeSourceInfo *QTLoc = nullptr;
   QualType QT = S.GetTypeFromParser(PT, &QTLoc);
   if (!QTLoc)
-    QTLoc = S.Context.getTrivialTypeSourceInfo(QT, Attr.getLoc());
+    QTLoc = S.Context.getTrivialTypeSourceInfo(QT, AL.getLoc());
 
   // Diagnose use of non-object type in iboutletcollection attribute.
   // FIXME. Gnu attribute extension ignores use of builtin types in
   // attributes. So, __attribute__((iboutletcollection(char))) will be
   // treated as __attribute__((iboutletcollection())).
   if (!QT->isObjCIdType() && !QT->isObjCObjectType()) {
-    S.Diag(Attr.getLoc(),
+    S.Diag(AL.getLoc(),
            QT->isBuiltinType() ? diag::err_iboutletcollection_builtintype
                                : diag::err_iboutletcollection_type) << QT;
     return;
   }
 
   D->addAttr(::new (S.Context)
-             IBOutletCollectionAttr(Attr.getRange(), S.Context, QTLoc,
-                                    Attr.getAttributeSpellingListIndex()));
+             IBOutletCollectionAttr(AL.getRange(), S.Context, QTLoc,
+                                    AL.getAttributeSpellingListIndex()));
 }
 
 bool Sema::isValidPointerAttrType(QualType T, bool RefOkay) {
@@ -1436,35 +1404,36 @@
   return T->isAnyPointerType() || T->isBlockPointerType();
 }
 
-static bool attrNonNullArgCheck(Sema &S, QualType T, const AttributeList &Attr,
+static bool attrNonNullArgCheck(Sema &S, QualType T, const AttributeList &AL,
                                 SourceRange AttrParmRange,
                                 SourceRange TypeRange,
                                 bool isReturnValue = false) {
   if (!S.isValidPointerAttrType(T)) {
     if (isReturnValue)
-      S.Diag(Attr.getLoc(), diag::warn_attribute_return_pointers_only)
-          << Attr.getName() << AttrParmRange << TypeRange;
+      S.Diag(AL.getLoc(), diag::warn_attribute_return_pointers_only)
+          << AL.getName() << AttrParmRange << TypeRange;
     else
-      S.Diag(Attr.getLoc(), diag::warn_attribute_pointers_only)
-          << Attr.getName() << AttrParmRange << TypeRange << 0;
+      S.Diag(AL.getLoc(), diag::warn_attribute_pointers_only)
+          << AL.getName() << AttrParmRange << TypeRange << 0;
     return false;
   }
   return true;
 }
 
-static void handleNonNullAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  SmallVector<unsigned, 8> NonNullArgs;
-  for (unsigned I = 0; I < Attr.getNumArgs(); ++I) {
-    Expr *Ex = Attr.getArgAsExpr(I);
-    uint64_t Idx;
-    if (!checkFunctionOrMethodParameterIndex(S, D, Attr, I + 1, Ex, Idx))
+static void handleNonNullAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  SmallVector<ParamIdx, 8> NonNullArgs;
+  for (unsigned I = 0; I < AL.getNumArgs(); ++I) {
+    Expr *Ex = AL.getArgAsExpr(I);
+    ParamIdx Idx;
+    if (!checkFunctionOrMethodParameterIndex(S, D, AL, I + 1, Ex, Idx))
       return;
 
     // Is the function argument a pointer type?
-    if (Idx < getFunctionOrMethodNumParams(D) &&
-        !attrNonNullArgCheck(S, getFunctionOrMethodParamType(D, Idx), Attr,
-                             Ex->getSourceRange(),
-                             getFunctionOrMethodParamRange(D, Idx)))
+    if (Idx.getASTIndex() < getFunctionOrMethodNumParams(D) &&
+        !attrNonNullArgCheck(
+            S, getFunctionOrMethodParamType(D, Idx.getASTIndex()), AL,
+            Ex->getSourceRange(),
+            getFunctionOrMethodParamRange(D, Idx.getASTIndex())))
       continue;
 
     NonNullArgs.push_back(Idx);
@@ -1474,7 +1443,7 @@
   // arguments have a nonnull attribute; warn if there aren't any. Skip this
   // check if the attribute came from a macro expansion or a template
   // instantiation.
-  if (NonNullArgs.empty() && Attr.getLoc().isFileID() &&
+  if (NonNullArgs.empty() && AL.getLoc().isFileID() &&
       !S.inTemplateInstantiation()) {
     bool AnyPointers = isFunctionOrMethodVariadic(D);
     for (unsigned I = 0, E = getFunctionOrMethodNumParams(D);
@@ -1485,80 +1454,80 @@
     }
 
     if (!AnyPointers)
-      S.Diag(Attr.getLoc(), diag::warn_attribute_nonnull_no_pointers);
+      S.Diag(AL.getLoc(), diag::warn_attribute_nonnull_no_pointers);
   }
 
-  unsigned *Start = NonNullArgs.data();
+  ParamIdx *Start = NonNullArgs.data();
   unsigned Size = NonNullArgs.size();
   llvm::array_pod_sort(Start, Start + Size);
   D->addAttr(::new (S.Context)
-             NonNullAttr(Attr.getRange(), S.Context, Start, Size,
-                         Attr.getAttributeSpellingListIndex()));
+                 NonNullAttr(AL.getRange(), S.Context, Start, Size,
+                             AL.getAttributeSpellingListIndex()));
 }
 
 static void handleNonNullAttrParameter(Sema &S, ParmVarDecl *D,
-                                       const AttributeList &Attr) {
-  if (Attr.getNumArgs() > 0) {
+                                       const AttributeList &AL) {
+  if (AL.getNumArgs() > 0) {
     if (D->getFunctionType()) {
-      handleNonNullAttr(S, D, Attr);
+      handleNonNullAttr(S, D, AL);
     } else {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_nonnull_parm_no_args)
+      S.Diag(AL.getLoc(), diag::warn_attribute_nonnull_parm_no_args)
         << D->getSourceRange();
     }
     return;
   }
 
   // Is the argument a pointer type?
-  if (!attrNonNullArgCheck(S, D->getType(), Attr, SourceRange(),
+  if (!attrNonNullArgCheck(S, D->getType(), AL, SourceRange(),
                            D->getSourceRange()))
     return;
 
   D->addAttr(::new (S.Context)
-             NonNullAttr(Attr.getRange(), S.Context, nullptr, 0,
-                         Attr.getAttributeSpellingListIndex()));
+                 NonNullAttr(AL.getRange(), S.Context, nullptr, 0,
+                             AL.getAttributeSpellingListIndex()));
 }
 
 static void handleReturnsNonNullAttr(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
+                                     const AttributeList &AL) {
   QualType ResultType = getFunctionOrMethodResultType(D);
   SourceRange SR = getFunctionOrMethodResultSourceRange(D);
-  if (!attrNonNullArgCheck(S, ResultType, Attr, SourceRange(), SR,
+  if (!attrNonNullArgCheck(S, ResultType, AL, SourceRange(), SR,
                            /* isReturnValue */ true))
     return;
 
   D->addAttr(::new (S.Context)
-            ReturnsNonNullAttr(Attr.getRange(), S.Context,
-                               Attr.getAttributeSpellingListIndex()));
+            ReturnsNonNullAttr(AL.getRange(), S.Context,
+                               AL.getAttributeSpellingListIndex()));
 }
 
-static void handleNoEscapeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleNoEscapeAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (D->isInvalidDecl())
     return;
 
   // noescape only applies to pointer types.
   QualType T = cast<ParmVarDecl>(D)->getType();
   if (!S.isValidPointerAttrType(T, /* RefOkay */ true)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_pointers_only)
-        << Attr.getName() << Attr.getRange() << 0;
+    S.Diag(AL.getLoc(), diag::warn_attribute_pointers_only)
+        << AL.getName() << AL.getRange() << 0;
     return;
   }
 
   D->addAttr(::new (S.Context) NoEscapeAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAssumeAlignedAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  Expr *E = Attr.getArgAsExpr(0),
-       *OE = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr;
-  S.AddAssumeAlignedAttr(Attr.getRange(), D, E, OE,
-                         Attr.getAttributeSpellingListIndex());
+                                    const AttributeList &AL) {
+  Expr *E = AL.getArgAsExpr(0),
+       *OE = AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr;
+  S.AddAssumeAlignedAttr(AL.getRange(), D, E, OE,
+                         AL.getAttributeSpellingListIndex());
 }
 
 static void handleAllocAlignAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  S.AddAllocAlignAttr(Attr.getRange(), D, Attr.getArgAsExpr(0),
-                      Attr.getAttributeSpellingListIndex());
+                                   const AttributeList &AL) {
+  S.AddAllocAlignAttr(AL.getRange(), D, AL.getArgAsExpr(0),
+                      AL.getAttributeSpellingListIndex());
 }
 
 void Sema::AddAssumeAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E,
@@ -1616,7 +1585,7 @@
                              unsigned SpellingListIndex) {
   QualType ResultType = getFunctionOrMethodResultType(D);
 
-  AllocAlignAttr TmpAttr(AttrRange, Context, 0, SpellingListIndex);
+  AllocAlignAttr TmpAttr(AttrRange, Context, ParamIdx(), SpellingListIndex);
   SourceLocation AttrLoc = AttrRange.getBegin();
 
   if (!ResultType->isDependentType() &&
@@ -1626,28 +1595,22 @@
     return;
   }
 
-  uint64_t IndexVal;
+  ParamIdx Idx;
   const auto *FuncDecl = cast<FunctionDecl>(D);
   if (!checkFunctionOrMethodParameterIndex(*this, FuncDecl, TmpAttr,
-                                           /*AttrArgNo=*/1, ParamExpr,
-                                           IndexVal))
+                                           /*AttrArgNo=*/1, ParamExpr, Idx))
     return;
 
-  QualType Ty = getFunctionOrMethodParamType(D, IndexVal);
+  QualType Ty = getFunctionOrMethodParamType(D, Idx.getASTIndex());
   if (!Ty->isDependentType() && !Ty->isIntegralType(Context)) {
     Diag(ParamExpr->getLocStart(), diag::err_attribute_integers_only)
-        << &TmpAttr << FuncDecl->getParamDecl(IndexVal)->getSourceRange();
+        << &TmpAttr
+        << FuncDecl->getParamDecl(Idx.getASTIndex())->getSourceRange();
     return;
   }
 
-  // We cannot use the Idx returned from checkFunctionOrMethodParameterIndex
-  // because that has corrected for the implicit this parameter, and is zero-
-  // based.  The attribute expects what the user wrote explicitly.
-  llvm::APSInt Val;
-  ParamExpr->EvaluateAsInt(Val, Context);
-
-  D->addAttr(::new (Context) AllocAlignAttr(
-      AttrRange, Context, Val.getZExtValue(), SpellingListIndex));
+  D->addAttr(::new (Context)
+                 AllocAlignAttr(AttrRange, Context, Idx, SpellingListIndex));
 }
 
 /// Normalize the attribute, __foo__ becomes foo.
@@ -1707,15 +1670,15 @@
     Module = &S.PP.getIdentifierTable().get(ModuleName);
   }
 
-  SmallVector<unsigned, 8> OwnershipArgs;
+  SmallVector<ParamIdx, 8> OwnershipArgs;
   for (unsigned i = 1; i < AL.getNumArgs(); ++i) {
     Expr *Ex = AL.getArgAsExpr(i);
-    uint64_t Idx;
+    ParamIdx Idx;
     if (!checkFunctionOrMethodParameterIndex(S, D, AL, i, Ex, Idx))
       return;
 
     // Is the function argument a pointer type?
-    QualType T = getFunctionOrMethodParamType(D, Idx);
+    QualType T = getFunctionOrMethodParamType(D, Idx.getASTIndex());
     int Err = -1;  // No error
     switch (K) {
       case OwnershipAttr::Takes:
@@ -1746,14 +1709,13 @@
       } else if (K == OwnershipAttr::Returns &&
                  I->getOwnKind() == OwnershipAttr::Returns) {
         // A returns attribute conflicts with any other returns attribute using
-        // a different index. Note, diagnostic reporting is 1-based, but stored
-        // argument indexes are 0-based.
+        // a different index.
         if (std::find(I->args_begin(), I->args_end(), Idx) == I->args_end()) {
           S.Diag(I->getLocation(), diag::err_ownership_returns_index_mismatch)
-              << *(I->args_begin()) + 1;
+              << I->args_begin()->getSourceIndex();
           if (I->args_size())
             S.Diag(AL.getLoc(), diag::note_ownership_returns_index_mismatch)
-                << (unsigned)Idx + 1 << Ex->getSourceRange();
+                << Idx.getSourceIndex() << Ex->getSourceRange();
           return;
         }
       }
@@ -1761,25 +1723,22 @@
     OwnershipArgs.push_back(Idx);
   }
 
-  unsigned* start = OwnershipArgs.data();
-  unsigned size = OwnershipArgs.size();
-  llvm::array_pod_sort(start, start + size);
-
+  ParamIdx *Start = OwnershipArgs.data();
+  unsigned Size = OwnershipArgs.size();
+  llvm::array_pod_sort(Start, Start + Size);
   D->addAttr(::new (S.Context)
-             OwnershipAttr(AL.getLoc(), S.Context, Module, start, size,
-                           AL.getAttributeSpellingListIndex()));
+                 OwnershipAttr(AL.getLoc(), S.Context, Module, Start, Size,
+                               AL.getAttributeSpellingListIndex()));
 }
 
-static void handleWeakRefAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleWeakRefAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // Check the attribute arguments.
-  if (Attr.getNumArgs() > 1) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
-      << Attr.getName() << 1;
+  if (AL.getNumArgs() > 1) {
+    S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments)
+      << AL.getName() << 1;
     return;
   }
 
-  NamedDecl *nd = cast<NamedDecl>(D);
-
   // gcc rejects
   // class c {
   //   static int a __attribute__((weakref ("v2")));
@@ -1792,8 +1751,8 @@
   // we reject them
   const DeclContext *Ctx = D->getDeclContext()->getRedeclContext();
   if (!Ctx->isFileContext()) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_weakref_not_global_context)
-      << nd;
+    S.Diag(AL.getLoc(), diag::err_attribute_weakref_not_global_context)
+        << cast<NamedDecl>(D);
     return;
   }
 
@@ -1823,94 +1782,72 @@
   // of transforming it into an AliasAttr.  The WeakRefAttr never uses the
   // StringRef parameter it was given anyway.
   StringRef Str;
-  if (Attr.getNumArgs() && S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (AL.getNumArgs() && S.checkStringLiteralArgumentAttr(AL, 0, Str))
     // GCC will accept anything as the argument of weakref. Should we
     // check for an existing decl?
-    D->addAttr(::new (S.Context) AliasAttr(Attr.getRange(), S.Context, Str,
-                                        Attr.getAttributeSpellingListIndex()));
+    D->addAttr(::new (S.Context) AliasAttr(AL.getRange(), S.Context, Str,
+                                        AL.getAttributeSpellingListIndex()));
 
   D->addAttr(::new (S.Context)
-             WeakRefAttr(Attr.getRange(), S.Context,
-                         Attr.getAttributeSpellingListIndex()));
+             WeakRefAttr(AL.getRange(), S.Context,
+                         AL.getAttributeSpellingListIndex()));
 }
 
-static void handleIFuncAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleIFuncAttr(Sema &S, Decl *D, const AttributeList &AL) {
   StringRef Str;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
     return;
 
   // Aliases should be on declarations, not definitions.
   const auto *FD = cast<FunctionDecl>(D);
   if (FD->isThisDeclarationADefinition()) {
-    S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << FD << 1;
-    return;
-  }
-  // FIXME: it should be handled as a target specific attribute.
-  if (S.Context.getTargetInfo().getTriple().getObjectFormat() !=
-          llvm::Triple::ELF) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::err_alias_is_definition) << FD << 1;
     return;
   }
 
-  D->addAttr(::new (S.Context) IFuncAttr(Attr.getRange(), S.Context, Str,
-                                         Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) IFuncAttr(AL.getRange(), S.Context, Str,
+                                         AL.getAttributeSpellingListIndex()));
 }
 
-static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &AL) {
   StringRef Str;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
     return;
 
   if (S.Context.getTargetInfo().getTriple().isOSDarwin()) {
-    S.Diag(Attr.getLoc(), diag::err_alias_not_supported_on_darwin);
+    S.Diag(AL.getLoc(), diag::err_alias_not_supported_on_darwin);
     return;
   }
   if (S.Context.getTargetInfo().getTriple().isNVPTX()) {
-    S.Diag(Attr.getLoc(), diag::err_alias_not_supported_on_nvptx);
+    S.Diag(AL.getLoc(), diag::err_alias_not_supported_on_nvptx);
   }
 
   // Aliases should be on declarations, not definitions.
   if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isThisDeclarationADefinition()) {
-      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << FD << 0;
+      S.Diag(AL.getLoc(), diag::err_alias_is_definition) << FD << 0;
       return;
     }
   } else {
     const auto *VD = cast<VarDecl>(D);
     if (VD->isThisDeclarationADefinition() && VD->isExternallyVisible()) {
-      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << VD << 0;
+      S.Diag(AL.getLoc(), diag::err_alias_is_definition) << VD << 0;
       return;
     }
   }
 
   // FIXME: check if target symbol exists in current file
 
-  D->addAttr(::new (S.Context) AliasAttr(Attr.getRange(), S.Context, Str,
-                                         Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleColdAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<HotAttr>(S, D, Attr.getRange(), Attr.getName()))
-    return;
-
-  D->addAttr(::new (S.Context) ColdAttr(Attr.getRange(), S.Context,
-                                        Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleHotAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<ColdAttr>(S, D, Attr.getRange(), Attr.getName()))
-    return;
-
-  D->addAttr(::new (S.Context) HotAttr(Attr.getRange(), S.Context,
-                                       Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) AliasAttr(AL.getRange(), S.Context, Str,
+                                         AL.getAttributeSpellingListIndex()));
 }
 
 static void handleTLSModelAttr(Sema &S, Decl *D,
-                               const AttributeList &Attr) {
+                               const AttributeList &AL) {
   StringRef Model;
   SourceLocation LiteralLoc;
   // Check that it is a string.
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Model, &LiteralLoc))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Model, &LiteralLoc))
     return;
 
   // Check that the value.
@@ -1921,52 +1858,52 @@
   }
 
   D->addAttr(::new (S.Context)
-             TLSModelAttr(Attr.getRange(), S.Context, Model,
-                          Attr.getAttributeSpellingListIndex()));
+             TLSModelAttr(AL.getRange(), S.Context, Model,
+                          AL.getAttributeSpellingListIndex()));
 }
 
-static void handleRestrictAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleRestrictAttr(Sema &S, Decl *D, const AttributeList &AL) {
   QualType ResultType = getFunctionOrMethodResultType(D);
   if (ResultType->isAnyPointerType() || ResultType->isBlockPointerType()) {
     D->addAttr(::new (S.Context) RestrictAttr(
-        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+        AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     return;
   }
 
-  S.Diag(Attr.getLoc(), diag::warn_attribute_return_pointers_only)
-      << Attr.getName() << getFunctionOrMethodResultSourceRange(D);
+  S.Diag(AL.getLoc(), diag::warn_attribute_return_pointers_only)
+      << AL.getName() << getFunctionOrMethodResultSourceRange(D);
 }
 
-static void handleCommonAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleCommonAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (S.LangOpts.CPlusPlus) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_not_supported_in_lang)
-        << Attr.getName() << AttributeLangSupport::Cpp;
+    S.Diag(AL.getLoc(), diag::err_attribute_not_supported_in_lang)
+        << AL.getName() << AttributeLangSupport::Cpp;
     return;
   }
 
-  if (CommonAttr *CA = S.mergeCommonAttr(D, Attr.getRange(), Attr.getName(),
-                                         Attr.getAttributeSpellingListIndex()))
+  if (CommonAttr *CA = S.mergeCommonAttr(D, AL.getRange(), AL.getName(),
+                                         AL.getAttributeSpellingListIndex()))
     D->addAttr(CA);
 }
 
-static void handleNakedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<DisableTailCallsAttr>(S, D, Attr.getRange(),
-                                                     Attr.getName()))
+static void handleNakedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (checkAttrMutualExclusion<DisableTailCallsAttr>(S, D, AL.getRange(),
+                                                     AL.getName()))
     return;
 
-  if (Attr.isDeclspecAttribute()) {
+  if (AL.isDeclspecAttribute()) {
     const auto &Triple = S.getASTContext().getTargetInfo().getTriple();
     const auto &Arch = Triple.getArch();
     if (Arch != llvm::Triple::x86 &&
         (Arch != llvm::Triple::arm && Arch != llvm::Triple::thumb)) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_not_supported_on_arch)
-          << Attr.getName() << Triple.getArchName();
+      S.Diag(AL.getLoc(), diag::err_attribute_not_supported_on_arch)
+          << AL.getName() << Triple.getArchName();
       return;
     }
   }
 
-  D->addAttr(::new (S.Context) NakedAttr(Attr.getRange(), S.Context,
-                                         Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) NakedAttr(AL.getRange(), S.Context,
+                                         AL.getAttributeSpellingListIndex()));
 }
 
 static void handleNoReturnAttr(Sema &S, Decl *D, const AttributeList &Attrs) {
@@ -1986,12 +1923,12 @@
 }
 
 static void handleNoCallerSavedRegsAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
-  if (S.CheckNoCallerSavedRegsAttr(Attr))
+                                        const AttributeList &AL) {
+  if (S.CheckNoCallerSavedRegsAttr(AL))
     return;
 
   D->addAttr(::new (S.Context) AnyX86NoCallerSavedRegistersAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
 bool Sema::CheckNoReturnAttr(const AttributeList &Attrs) {
@@ -2003,16 +1940,16 @@
   return false;
 }
 
-bool Sema::CheckNoCallerSavedRegsAttr(const AttributeList &Attr) {
+bool Sema::CheckNoCallerSavedRegsAttr(const AttributeList &AL) {
   // Check whether the attribute is valid on the current target.
-  if (!Attr.existsInTarget(Context.getTargetInfo())) {
-    Diag(Attr.getLoc(), diag::warn_unknown_attribute_ignored) << Attr.getName();
-    Attr.setInvalid();
+  if (!AL.existsInTarget(Context.getTargetInfo())) {
+    Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) << AL.getName();
+    AL.setInvalid();
     return true;
   }
 
-  if (!checkAttributeNumArgs(*this, Attr, 0)) {
-    Attr.setInvalid();
+  if (!checkAttributeNumArgs(*this, AL, 0)) {
+    AL.setInvalid();
     return true;
   }
 
@@ -2020,7 +1957,7 @@
 }
 
 static void handleAnalyzerNoReturnAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
+                                       const AttributeList &AL) {
   
   // The checking path for 'noreturn' and 'analyzer_noreturn' are different
   // because 'analyzer_noreturn' does not impact the type.
@@ -2028,21 +1965,21 @@
     ValueDecl *VD = dyn_cast<ValueDecl>(D);
     if (!VD || (!VD->getType()->isBlockPointerType() &&
                 !VD->getType()->isFunctionPointerType())) {
-      S.Diag(Attr.getLoc(),
-             Attr.isCXX11Attribute() ? diag::err_attribute_wrong_decl_type
+      S.Diag(AL.getLoc(),
+             AL.isCXX11Attribute() ? diag::err_attribute_wrong_decl_type
                                      : diag::warn_attribute_wrong_decl_type)
-        << Attr.getName() << ExpectedFunctionMethodOrBlock;
+        << AL.getName() << ExpectedFunctionMethodOrBlock;
       return;
     }
   }
   
   D->addAttr(::new (S.Context)
-             AnalyzerNoReturnAttr(Attr.getRange(), S.Context,
-                                  Attr.getAttributeSpellingListIndex()));
+             AnalyzerNoReturnAttr(AL.getRange(), S.Context,
+                                  AL.getAttributeSpellingListIndex()));
 }
 
 // PS3 PPU-specific.
-static void handleVecReturnAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleVecReturnAttr(Sema &S, Decl *D, const AttributeList &AL) {
 /*
   Returning a Vector Class in Registers
   
@@ -2067,157 +2004,119 @@
   }
 */
   if (VecReturnAttr *A = D->getAttr<VecReturnAttr>()) {
-    S.Diag(Attr.getLoc(), diag::err_repeat_attribute) << A;
+    S.Diag(AL.getLoc(), diag::err_repeat_attribute) << A;
     return;
   }
 
-  RecordDecl *record = cast<RecordDecl>(D);
+  const auto *R = cast<RecordDecl>(D);
   int count = 0;
 
-  if (!isa<CXXRecordDecl>(record)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_vecreturn_only_vector_member);
+  if (!isa<CXXRecordDecl>(R)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_vecreturn_only_vector_member);
     return;
   }
 
-  if (!cast<CXXRecordDecl>(record)->isPOD()) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_vecreturn_only_pod_record);
+  if (!cast<CXXRecordDecl>(R)->isPOD()) {
+    S.Diag(AL.getLoc(), diag::err_attribute_vecreturn_only_pod_record);
     return;
   }
 
-  for (const auto *I : record->fields()) {
+  for (const auto *I : R->fields()) {
     if ((count == 1) || !I->getType()->isVectorType()) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_vecreturn_only_vector_member);
+      S.Diag(AL.getLoc(), diag::err_attribute_vecreturn_only_vector_member);
       return;
     }
     count++;
   }
 
-  D->addAttr(::new (S.Context)
-             VecReturnAttr(Attr.getRange(), S.Context,
-                           Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) VecReturnAttr(
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
 static void handleDependencyAttr(Sema &S, Scope *Scope, Decl *D,
-                                 const AttributeList &Attr) {
+                                 const AttributeList &AL) {
   if (isa<ParmVarDecl>(D)) {
     // [[carries_dependency]] can only be applied to a parameter if it is a
     // parameter of a function declaration or lambda.
     if (!(Scope->getFlags() & clang::Scope::FunctionDeclarationScope)) {
-      S.Diag(Attr.getLoc(),
+      S.Diag(AL.getLoc(),
              diag::err_carries_dependency_param_not_function_decl);
       return;
     }
   }
 
   D->addAttr(::new (S.Context) CarriesDependencyAttr(
-                                   Attr.getRange(), S.Context,
-                                   Attr.getAttributeSpellingListIndex()));
+                                   AL.getRange(), S.Context,
+                                   AL.getAttributeSpellingListIndex()));
 }
 
-static void handleNotTailCalledAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<AlwaysInlineAttr>(S, D, Attr.getRange(),
-                                                 Attr.getName()))
-    return;
+static void handleUnusedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  bool IsCXX17Attr = AL.isCXX11Attribute() && !AL.getScopeName();
 
-  D->addAttr(::new (S.Context) NotTailCalledAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleDisableTailCallsAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<NakedAttr>(S, D, Attr.getRange(),
-                                          Attr.getName()))
-    return;
-
-  D->addAttr(::new (S.Context) DisableTailCallsAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleUsedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
-    if (VD->hasLocalStorage()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
-      return;
-    }
-  } else if (!isFunctionOrMethod(D)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedVariableOrFunction;
-    return;
-  }
-
-  D->addAttr(::new (S.Context)
-             UsedAttr(Attr.getRange(), S.Context,
-                      Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleUnusedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  bool IsCXX1zAttr = Attr.isCXX11Attribute() && !Attr.getScopeName();
-
-  if (IsCXX1zAttr && isa<VarDecl>(D)) {
-    // The C++1z spelling of this attribute cannot be applied to a static data
+  if (IsCXX17Attr && isa<VarDecl>(D)) {
+    // The C++17 spelling of this attribute cannot be applied to a static data
     // member per [dcl.attr.unused]p2.
     if (cast<VarDecl>(D)->isStaticDataMember()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-          << Attr.getName() << ExpectedForMaybeUnused;
+      S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+          << AL.getName() << ExpectedForMaybeUnused;
       return;
     }
   }
 
-  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // If this is spelled as the standard C++17 attribute, but not in C++17, warn
   // about using it as an extension.
-  if (!S.getLangOpts().CPlusPlus1z && IsCXX1zAttr)
-    S.Diag(Attr.getLoc(), diag::ext_cxx17_attr) << Attr.getName();
+  if (!S.getLangOpts().CPlusPlus17 && IsCXX17Attr)
+    S.Diag(AL.getLoc(), diag::ext_cxx17_attr) << AL.getName();
 
   D->addAttr(::new (S.Context) UnusedAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
-static void handleConstructorAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleConstructorAttr(Sema &S, Decl *D, const AttributeList &AL) {
   uint32_t priority = ConstructorAttr::DefaultPriority;
-  if (Attr.getNumArgs() &&
-      !checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), priority))
+  if (AL.getNumArgs() &&
+      !checkUInt32Argument(S, AL, AL.getArgAsExpr(0), priority))
     return;
 
   D->addAttr(::new (S.Context)
-             ConstructorAttr(Attr.getRange(), S.Context, priority,
-                             Attr.getAttributeSpellingListIndex()));
+             ConstructorAttr(AL.getRange(), S.Context, priority,
+                             AL.getAttributeSpellingListIndex()));
 }
 
-static void handleDestructorAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleDestructorAttr(Sema &S, Decl *D, const AttributeList &AL) {
   uint32_t priority = DestructorAttr::DefaultPriority;
-  if (Attr.getNumArgs() &&
-      !checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), priority))
+  if (AL.getNumArgs() &&
+      !checkUInt32Argument(S, AL, AL.getArgAsExpr(0), priority))
     return;
 
   D->addAttr(::new (S.Context)
-             DestructorAttr(Attr.getRange(), S.Context, priority,
-                            Attr.getAttributeSpellingListIndex()));
+             DestructorAttr(AL.getRange(), S.Context, priority,
+                            AL.getAttributeSpellingListIndex()));
 }
 
 template <typename AttrTy>
 static void handleAttrWithMessage(Sema &S, Decl *D,
-                                  const AttributeList &Attr) {
+                                  const AttributeList &AL) {
   // Handle the case where the attribute has a text message.
   StringRef Str;
-  if (Attr.getNumArgs() == 1 && !S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (AL.getNumArgs() == 1 && !S.checkStringLiteralArgumentAttr(AL, 0, Str))
     return;
 
-  D->addAttr(::new (S.Context) AttrTy(Attr.getRange(), S.Context, Str,
-                                      Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) AttrTy(AL.getRange(), S.Context, Str,
+                                      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleObjCSuppresProtocolAttr(Sema &S, Decl *D,
-                                          const AttributeList &Attr) {
+                                          const AttributeList &AL) {
   if (!cast<ObjCProtocolDecl>(D)->isThisDeclarationADefinition()) {
-    S.Diag(Attr.getLoc(), diag::err_objc_attr_protocol_requires_definition)
-      << Attr.getName() << Attr.getRange();
+    S.Diag(AL.getLoc(), diag::err_objc_attr_protocol_requires_definition)
+      << AL.getName() << AL.getRange();
     return;
   }
 
   D->addAttr(::new (S.Context)
-          ObjCExplicitProtocolImplAttr(Attr.getRange(), S.Context,
-                                       Attr.getAttributeSpellingListIndex()));
+          ObjCExplicitProtocolImplAttr(AL.getRange(), S.Context,
+                                       AL.getAttributeSpellingListIndex()));
 }
 
 static bool checkAvailabilityAttr(Sema &S, SourceRange Range,
@@ -2309,7 +2208,7 @@
   if (D->hasAttrs()) {
     AttrVec &Attrs = D->getAttrs();
     for (unsigned i = 0, e = Attrs.size(); i != e;) {
-      const AvailabilityAttr *OldAA = dyn_cast<AvailabilityAttr>(Attrs[i]);
+      const auto *OldAA = dyn_cast<AvailabilityAttr>(Attrs[i]);
       if (!OldAA) {
         ++i;
         continue;
@@ -2442,36 +2341,34 @@
 }
 
 static void handleAvailabilityAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  if (!checkAttributeNumArgs(S, Attr, 1))
+                                   const AttributeList &AL) {
+  if (!checkAttributeNumArgs(S, AL, 1))
     return;
-  IdentifierLoc *Platform = Attr.getArgAsIdent(0);
-  unsigned Index = Attr.getAttributeSpellingListIndex();
+  IdentifierLoc *Platform = AL.getArgAsIdent(0);
+  unsigned Index = AL.getAttributeSpellingListIndex();
   
   IdentifierInfo *II = Platform->Ident;
   if (AvailabilityAttr::getPrettyPlatformName(II->getName()).empty())
     S.Diag(Platform->Loc, diag::warn_availability_unknown_platform)
       << Platform->Ident;
 
-  NamedDecl *ND = dyn_cast<NamedDecl>(D);
+  auto *ND = dyn_cast<NamedDecl>(D);
   if (!ND) // We warned about this already, so just return.
     return;
 
-  AvailabilityChange Introduced = Attr.getAvailabilityIntroduced();
-  AvailabilityChange Deprecated = Attr.getAvailabilityDeprecated();
-  AvailabilityChange Obsoleted = Attr.getAvailabilityObsoleted();
-  bool IsUnavailable = Attr.getUnavailableLoc().isValid();
-  bool IsStrict = Attr.getStrictLoc().isValid();
+  AvailabilityChange Introduced = AL.getAvailabilityIntroduced();
+  AvailabilityChange Deprecated = AL.getAvailabilityDeprecated();
+  AvailabilityChange Obsoleted = AL.getAvailabilityObsoleted();
+  bool IsUnavailable = AL.getUnavailableLoc().isValid();
+  bool IsStrict = AL.getStrictLoc().isValid();
   StringRef Str;
-  if (const StringLiteral *SE =
-          dyn_cast_or_null<StringLiteral>(Attr.getMessageExpr()))
+  if (const auto *SE = dyn_cast_or_null<StringLiteral>(AL.getMessageExpr()))
     Str = SE->getString();
   StringRef Replacement;
-  if (const StringLiteral *SE =
-          dyn_cast_or_null<StringLiteral>(Attr.getReplacementExpr()))
+  if (const auto *SE = dyn_cast_or_null<StringLiteral>(AL.getReplacementExpr()))
     Replacement = SE->getString();
 
-  AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND, Attr.getRange(), II,
+  AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND, AL.getRange(), II,
                                                       false/*Implicit*/,
                                                       Introduced.Version,
                                                       Deprecated.Version,
@@ -2516,7 +2413,7 @@
         auto NewObsoleted = adjustWatchOSVersion(Obsoleted.Version);
 
         AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
-                                                            Attr.getRange(),
+                                                            AL.getRange(),
                                                             NewII,
                                                             true/*Implicit*/,
                                                             NewIntroduced,
@@ -2541,7 +2438,7 @@
 
     if (NewII) {
         AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
-                                                            Attr.getRange(),
+                                                            AL.getRange(),
                                                             NewII,
                                                             true/*Implicit*/,
                                                             Introduced.Version,
@@ -2559,23 +2456,23 @@
 }
 
 static void handleExternalSourceSymbolAttr(Sema &S, Decl *D,
-                                           const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+                                           const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
-  assert(checkAttributeAtMostNumArgs(S, Attr, 3) &&
+  assert(checkAttributeAtMostNumArgs(S, AL, 3) &&
          "Invalid number of arguments in an external_source_symbol attribute");
 
   StringRef Language;
-  if (const auto *SE = dyn_cast_or_null<StringLiteral>(Attr.getArgAsExpr(0)))
+  if (const auto *SE = dyn_cast_or_null<StringLiteral>(AL.getArgAsExpr(0)))
     Language = SE->getString();
   StringRef DefinedIn;
-  if (const auto *SE = dyn_cast_or_null<StringLiteral>(Attr.getArgAsExpr(1)))
+  if (const auto *SE = dyn_cast_or_null<StringLiteral>(AL.getArgAsExpr(1)))
     DefinedIn = SE->getString();
-  bool IsGeneratedDeclaration = Attr.getArgAsIdent(2) != nullptr;
+  bool IsGeneratedDeclaration = AL.getArgAsIdent(2) != nullptr;
 
   D->addAttr(::new (S.Context) ExternalSourceSymbolAttr(
-      Attr.getRange(), S.Context, Language, DefinedIn, IsGeneratedDeclaration,
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, Language, DefinedIn, IsGeneratedDeclaration,
+      AL.getAttributeSpellingListIndex()));
 }
 
 template <class T>
@@ -2608,12 +2505,12 @@
                                                    AttrSpellingListIndex);
 }
 
-static void handleVisibilityAttr(Sema &S, Decl *D, const AttributeList &Attr,
+static void handleVisibilityAttr(Sema &S, Decl *D, const AttributeList &AL,
                                  bool isTypeVisibility) {
   // Visibility attributes don't mean anything on a typedef.
   if (isa<TypedefNameDecl>(D)) {
-    S.Diag(Attr.getRange().getBegin(), diag::warn_attribute_ignored)
-      << Attr.getName();
+    S.Diag(AL.getRange().getBegin(), diag::warn_attribute_ignored)
+      << AL.getName();
     return;
   }
 
@@ -2622,21 +2519,21 @@
       !(isa<TagDecl>(D) ||
         isa<ObjCInterfaceDecl>(D) ||
         isa<NamespaceDecl>(D))) {
-    S.Diag(Attr.getRange().getBegin(), diag::err_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedTypeOrNamespace;
+    S.Diag(AL.getRange().getBegin(), diag::err_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedTypeOrNamespace;
     return;
   }
 
   // Check that the argument is a string literal.
   StringRef TypeStr;
   SourceLocation LiteralLoc;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, TypeStr, &LiteralLoc))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, TypeStr, &LiteralLoc))
     return;
 
   VisibilityAttr::VisibilityType type;
   if (!VisibilityAttr::ConvertStrToVisibilityType(TypeStr, type)) {
     S.Diag(LiteralLoc, diag::warn_attribute_type_not_supported)
-      << Attr.getName() << TypeStr;
+      << AL.getName() << TypeStr;
     return;
   }
   
@@ -2644,62 +2541,61 @@
   // (like Darwin) that don't support it.
   if (type == VisibilityAttr::Protected &&
       !S.Context.getTargetInfo().hasProtectedVisibility()) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_protected_visibility);
+    S.Diag(AL.getLoc(), diag::warn_attribute_protected_visibility);
     type = VisibilityAttr::Default;
   }
 
-  unsigned Index = Attr.getAttributeSpellingListIndex();
-  clang::Attr *newAttr;
+  unsigned Index = AL.getAttributeSpellingListIndex();
+  Attr *newAttr;
   if (isTypeVisibility) {
-    newAttr = S.mergeTypeVisibilityAttr(D, Attr.getRange(),
+    newAttr = S.mergeTypeVisibilityAttr(D, AL.getRange(),
                                     (TypeVisibilityAttr::VisibilityType) type,
                                         Index);
   } else {
-    newAttr = S.mergeVisibilityAttr(D, Attr.getRange(), type, Index);
+    newAttr = S.mergeVisibilityAttr(D, AL.getRange(), type, Index);
   }
   if (newAttr)
     D->addAttr(newAttr);
 }
 
-static void handleObjCMethodFamilyAttr(Sema &S, Decl *decl,
-                                       const AttributeList &Attr) {
-  ObjCMethodDecl *method = cast<ObjCMethodDecl>(decl);
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << 1 << AANT_ArgumentIdentifier;
+static void handleObjCMethodFamilyAttr(Sema &S, Decl *D,
+                                       const AttributeList &AL) {
+  const auto *M = cast<ObjCMethodDecl>(D);
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+        << AL.getName() << 1 << AANT_ArgumentIdentifier;
     return;
   }
 
-  IdentifierLoc *IL = Attr.getArgAsIdent(0);
+  IdentifierLoc *IL = AL.getArgAsIdent(0);
   ObjCMethodFamilyAttr::FamilyKind F;
   if (!ObjCMethodFamilyAttr::ConvertStrToFamilyKind(IL->Ident->getName(), F)) {
-    S.Diag(IL->Loc, diag::warn_attribute_type_not_supported) << Attr.getName()
-      << IL->Ident;
+    S.Diag(IL->Loc, diag::warn_attribute_type_not_supported)
+        << AL.getName() << IL->Ident;
     return;
   }
 
   if (F == ObjCMethodFamilyAttr::OMF_init &&
-      !method->getReturnType()->isObjCObjectPointerType()) {
-    S.Diag(method->getLocation(), diag::err_init_method_bad_return_type)
-        << method->getReturnType();
+      !M->getReturnType()->isObjCObjectPointerType()) {
+    S.Diag(M->getLocation(), diag::err_init_method_bad_return_type)
+        << M->getReturnType();
     // Ignore the attribute.
     return;
   }
 
-  method->addAttr(new (S.Context) ObjCMethodFamilyAttr(Attr.getRange(),
-                                                       S.Context, F,
-                                        Attr.getAttributeSpellingListIndex()));
+  D->addAttr(new (S.Context) ObjCMethodFamilyAttr(
+      AL.getRange(), S.Context, F, AL.getAttributeSpellingListIndex()));
 }
 
-static void handleObjCNSObject(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D)) {
+static void handleObjCNSObject(Sema &S, Decl *D, const AttributeList &AL) {
+  if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
     QualType T = TD->getUnderlyingType();
     if (!T->isCARCBridgableType()) {
       S.Diag(TD->getLocation(), diag::err_nsobject_attribute);
       return;
     }
   }
-  else if (ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(D)) {
+  else if (const auto *PD = dyn_cast<ObjCPropertyDecl>(D)) {
     QualType T = PD->getType();
     if (!T->isCARCBridgableType()) {
       S.Diag(PD->getLocation(), diag::err_nsobject_attribute);
@@ -2716,12 +2612,12 @@
     S.Diag(D->getLocation(), diag::warn_nsobject_attribute);
   }
   D->addAttr(::new (S.Context)
-             ObjCNSObjectAttr(Attr.getRange(), S.Context,
-                              Attr.getAttributeSpellingListIndex()));
+             ObjCNSObjectAttr(AL.getRange(), S.Context,
+                              AL.getAttributeSpellingListIndex()));
 }
 
-static void handleObjCIndependentClass(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D)) {
+static void handleObjCIndependentClass(Sema &S, Decl *D, const AttributeList &AL) {
+  if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
     QualType T = TD->getUnderlyingType();
     if (!T->isObjCObjectPointerType()) {
       S.Diag(TD->getLocation(), diag::warn_ptr_independentclass_attribute);
@@ -2732,45 +2628,45 @@
     return;
   }
   D->addAttr(::new (S.Context)
-             ObjCIndependentClassAttr(Attr.getRange(), S.Context,
-                              Attr.getAttributeSpellingListIndex()));
+             ObjCIndependentClassAttr(AL.getRange(), S.Context,
+                              AL.getAttributeSpellingListIndex()));
 }
 
-static void handleBlocksAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << 1 << AANT_ArgumentIdentifier;
+static void handleBlocksAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+      << AL.getName() << 1 << AANT_ArgumentIdentifier;
     return;
   }
 
-  IdentifierInfo *II = Attr.getArgAsIdent(0)->Ident;
+  IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
   BlocksAttr::BlockType type;
   if (!BlocksAttr::ConvertStrToBlockType(II->getName(), type)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
-      << Attr.getName() << II;
+    S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
+      << AL.getName() << II;
     return;
   }
 
   D->addAttr(::new (S.Context)
-             BlocksAttr(Attr.getRange(), S.Context, type,
-                        Attr.getAttributeSpellingListIndex()));
+             BlocksAttr(AL.getRange(), S.Context, type,
+                        AL.getAttributeSpellingListIndex()));
 }
 
-static void handleSentinelAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleSentinelAttr(Sema &S, Decl *D, const AttributeList &AL) {
   unsigned sentinel = (unsigned)SentinelAttr::DefaultSentinel;
-  if (Attr.getNumArgs() > 0) {
-    Expr *E = Attr.getArgAsExpr(0);
+  if (AL.getNumArgs() > 0) {
+    Expr *E = AL.getArgAsExpr(0);
     llvm::APSInt Idx(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
         !E->isIntegerConstantExpr(Idx, S.Context)) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-        << Attr.getName() << 1 << AANT_ArgumentIntegerConstant
+      S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+        << AL.getName() << 1 << AANT_ArgumentIntegerConstant
         << E->getSourceRange();
       return;
     }
 
     if (Idx.isSigned() && Idx.isNegative()) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_sentinel_less_than_zero)
+      S.Diag(AL.getLoc(), diag::err_attribute_sentinel_less_than_zero)
         << E->getSourceRange();
       return;
     }
@@ -2779,13 +2675,13 @@
   }
 
   unsigned nullPos = (unsigned)SentinelAttr::DefaultNullPos;
-  if (Attr.getNumArgs() > 1) {
-    Expr *E = Attr.getArgAsExpr(1);
+  if (AL.getNumArgs() > 1) {
+    Expr *E = AL.getArgAsExpr(1);
     llvm::APSInt Idx(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
         !E->isIntegerConstantExpr(Idx, S.Context)) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-        << Attr.getName() << 2 << AANT_ArgumentIntegerConstant
+      S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+        << AL.getName() << 2 << AANT_ArgumentIntegerConstant
         << E->getSourceRange();
       return;
     }
@@ -2794,34 +2690,34 @@
     if ((Idx.isSigned() && Idx.isNegative()) || nullPos > 1) {
       // FIXME: This error message could be improved, it would be nice
       // to say what the bounds actually are.
-      S.Diag(Attr.getLoc(), diag::err_attribute_sentinel_not_zero_or_one)
+      S.Diag(AL.getLoc(), diag::err_attribute_sentinel_not_zero_or_one)
         << E->getSourceRange();
       return;
     }
   }
 
-  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     const FunctionType *FT = FD->getType()->castAs<FunctionType>();
     if (isa<FunctionNoProtoType>(FT)) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_sentinel_named_arguments);
+      S.Diag(AL.getLoc(), diag::warn_attribute_sentinel_named_arguments);
       return;
     }
 
     if (!cast<FunctionProtoType>(FT)->isVariadic()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 0;
+      S.Diag(AL.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 0;
       return;
     }
-  } else if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
+  } else if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
     if (!MD->isVariadic()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 0;
+      S.Diag(AL.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 0;
       return;
     }
-  } else if (BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
+  } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
     if (!BD->isVariadic()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 1;
+      S.Diag(AL.getLoc(), diag::warn_attribute_sentinel_not_variadic) << 1;
       return;
     }
-  } else if (const VarDecl *V = dyn_cast<VarDecl>(D)) {
+  } else if (const auto *V = dyn_cast<VarDecl>(D)) {
     QualType Ty = V->getType();
     if (Ty->isBlockPointerType() || Ty->isFunctionPointerType()) {
       const FunctionType *FT = Ty->isFunctionPointerType()
@@ -2829,84 +2725,84 @@
        : Ty->getAs<BlockPointerType>()->getPointeeType()->getAs<FunctionType>();
       if (!cast<FunctionProtoType>(FT)->isVariadic()) {
         int m = Ty->isFunctionPointerType() ? 0 : 1;
-        S.Diag(Attr.getLoc(), diag::warn_attribute_sentinel_not_variadic) << m;
+        S.Diag(AL.getLoc(), diag::warn_attribute_sentinel_not_variadic) << m;
         return;
       }
     } else {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-        << Attr.getName() << ExpectedFunctionMethodOrBlock;
+      S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+        << AL.getName() << ExpectedFunctionMethodOrBlock;
       return;
     }
   } else {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedFunctionMethodOrBlock;
+    S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedFunctionMethodOrBlock;
     return;
   }
   D->addAttr(::new (S.Context)
-             SentinelAttr(Attr.getRange(), S.Context, sentinel, nullPos,
-                          Attr.getAttributeSpellingListIndex()));
+             SentinelAttr(AL.getRange(), S.Context, sentinel, nullPos,
+                          AL.getAttributeSpellingListIndex()));
 }
 
-static void handleWarnUnusedResult(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleWarnUnusedResult(Sema &S, Decl *D, const AttributeList &AL) {
   if (D->getFunctionType() &&
       D->getFunctionType()->getReturnType()->isVoidType()) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_void_function_method)
-      << Attr.getName() << 0;
+    S.Diag(AL.getLoc(), diag::warn_attribute_void_function_method)
+      << AL.getName() << 0;
     return;
   }
-  if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
     if (MD->getReturnType()->isVoidType()) {
-      S.Diag(Attr.getLoc(), diag::warn_attribute_void_function_method)
-      << Attr.getName() << 1;
+      S.Diag(AL.getLoc(), diag::warn_attribute_void_function_method)
+      << AL.getName() << 1;
       return;
     }
   
-  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // If this is spelled as the standard C++17 attribute, but not in C++17, warn
   // about using it as an extension.
-  if (!S.getLangOpts().CPlusPlus1z && Attr.isCXX11Attribute() &&
-      !Attr.getScopeName())
-    S.Diag(Attr.getLoc(), diag::ext_cxx17_attr) << Attr.getName();
+  if (!S.getLangOpts().CPlusPlus17 && AL.isCXX11Attribute() &&
+      !AL.getScopeName())
+    S.Diag(AL.getLoc(), diag::ext_cxx17_attr) << AL.getName();
 
   D->addAttr(::new (S.Context) 
-             WarnUnusedResultAttr(Attr.getRange(), S.Context,
-                                  Attr.getAttributeSpellingListIndex()));
+             WarnUnusedResultAttr(AL.getRange(), S.Context,
+                                  AL.getAttributeSpellingListIndex()));
 }
 
-static void handleWeakImportAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleWeakImportAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // weak_import only applies to variable & function declarations.
   bool isDef = false;
   if (!D->canBeWeakImported(isDef)) {
     if (isDef)
-      S.Diag(Attr.getLoc(), diag::warn_attribute_invalid_on_definition)
+      S.Diag(AL.getLoc(), diag::warn_attribute_invalid_on_definition)
         << "weak_import";
     else if (isa<ObjCPropertyDecl>(D) || isa<ObjCMethodDecl>(D) ||
              (S.Context.getTargetInfo().getTriple().isOSDarwin() &&
               (isa<ObjCInterfaceDecl>(D) || isa<EnumDecl>(D)))) {
       // Nothing to warn about here.
     } else
-      S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-        << Attr.getName() << ExpectedVariableOrFunction;
+      S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+        << AL.getName() << ExpectedVariableOrFunction;
 
     return;
   }
 
   D->addAttr(::new (S.Context)
-             WeakImportAttr(Attr.getRange(), S.Context,
-                            Attr.getAttributeSpellingListIndex()));
+             WeakImportAttr(AL.getRange(), S.Context,
+                            AL.getAttributeSpellingListIndex()));
 }
 
 // Handles reqd_work_group_size and work_group_size_hint.
 template <typename WorkGroupAttr>
 static void handleWorkGroupSize(Sema &S, Decl *D,
-                                const AttributeList &Attr) {
+                                const AttributeList &AL) {
   uint32_t WGSize[3];
   for (unsigned i = 0; i < 3; ++i) {
-    const Expr *E = Attr.getArgAsExpr(i);
-    if (!checkUInt32Argument(S, Attr, E, WGSize[i], i))
+    const Expr *E = AL.getArgAsExpr(i);
+    if (!checkUInt32Argument(S, AL, E, WGSize[i], i))
       return;
     if (WGSize[i] == 0) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
-        << Attr.getName() << E->getSourceRange();
+      S.Diag(AL.getLoc(), diag::err_attribute_argument_is_zero)
+        << AL.getName() << E->getSourceRange();
       return;
     }
   }
@@ -2915,64 +2811,64 @@
   if (Existing && !(Existing->getXDim() == WGSize[0] &&
                     Existing->getYDim() == WGSize[1] &&
                     Existing->getZDim() == WGSize[2]))
-    S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName();
 
-  D->addAttr(::new (S.Context) WorkGroupAttr(Attr.getRange(), S.Context,
+  D->addAttr(::new (S.Context) WorkGroupAttr(AL.getRange(), S.Context,
                                              WGSize[0], WGSize[1], WGSize[2],
-                                       Attr.getAttributeSpellingListIndex()));
+                                       AL.getAttributeSpellingListIndex()));
 }
 
 // Handles intel_reqd_sub_group_size.
-static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &AL) {
   uint32_t SGSize;
-  const Expr *E = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(S, Attr, E, SGSize))
+  const Expr *E = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, AL, E, SGSize))
     return;
   if (SGSize == 0) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
-        << Attr.getName() << E->getSourceRange();
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_is_zero)
+        << AL.getName() << E->getSourceRange();
     return;
   }
 
   OpenCLIntelReqdSubGroupSizeAttr *Existing =
       D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>();
   if (Existing && Existing->getSubGroupSize() != SGSize)
-    S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName();
 
   D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr(
-      Attr.getRange(), S.Context, SGSize,
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, SGSize,
+      AL.getAttributeSpellingListIndex()));
 }
 
-static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!Attr.hasParsedType()) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
-      << Attr.getName() << 1;
+static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!AL.hasParsedType()) {
+    S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments)
+      << AL.getName() << 1;
     return;
   }
 
   TypeSourceInfo *ParmTSI = nullptr;
-  QualType ParmType = S.GetTypeFromParser(Attr.getTypeArg(), &ParmTSI);
+  QualType ParmType = S.GetTypeFromParser(AL.getTypeArg(), &ParmTSI);
   assert(ParmTSI && "no type source info for attribute argument");
 
   if (!ParmType->isExtVectorType() && !ParmType->isFloatingType() &&
       (ParmType->isBooleanType() ||
        !ParmType->isIntegralType(S.getASTContext()))) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_vec_type_hint)
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_vec_type_hint)
         << ParmType;
     return;
   }
 
   if (VecTypeHintAttr *A = D->getAttr<VecTypeHintAttr>()) {
     if (!S.Context.hasSameType(A->getTypeHint(), ParmType)) {
-      S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
+      S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName();
       return;
     }
   }
 
-  D->addAttr(::new (S.Context) VecTypeHintAttr(Attr.getLoc(), S.Context,
+  D->addAttr(::new (S.Context) VecTypeHintAttr(AL.getLoc(), S.Context,
                                                ParmTSI,
-                                        Attr.getAttributeSpellingListIndex()));
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 SectionAttr *Sema::mergeSectionAttr(Decl *D, SourceRange Range,
@@ -2998,12 +2894,12 @@
   return true;
 }
 
-static void handleSectionAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleSectionAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // Make sure that there is a string literal as the sections's single
   // argument.
   StringRef Str;
   SourceLocation LiteralLoc;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &LiteralLoc))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc))
     return;
 
   if (!S.checkSectionName(LiteralLoc, Str))
@@ -3017,8 +2913,8 @@
     return;
   }
 
-  unsigned Index = Attr.getAttributeSpellingListIndex();
-  SectionAttr *NewAttr = S.mergeSectionAttr(D, Attr.getRange(), Str, Index);
+  unsigned Index = AL.getAttributeSpellingListIndex();
+  SectionAttr *NewAttr = S.mergeSectionAttr(D, AL.getRange(), Str, Index);
   if (NewAttr)
     D->addAttr(NewAttr);
 }
@@ -3051,36 +2947,31 @@
              << Unsupported << None << CurFeature;
   }
 
-  return true;
+  return false;
 }
 
-static void handleTargetAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleTargetAttr(Sema &S, Decl *D, const AttributeList &AL) {
   StringRef Str;
   SourceLocation LiteralLoc;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &LiteralLoc) ||
-      !S.checkTargetAttr(LiteralLoc, Str))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) ||
+      S.checkTargetAttr(LiteralLoc, Str))
     return;
-  unsigned Index = Attr.getAttributeSpellingListIndex();
+
+  unsigned Index = AL.getAttributeSpellingListIndex();
   TargetAttr *NewAttr =
-      ::new (S.Context) TargetAttr(Attr.getRange(), S.Context, Str, Index);
+      ::new (S.Context) TargetAttr(AL.getRange(), S.Context, Str, Index);
   D->addAttr(NewAttr);
 }
 
-static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  VarDecl *VD = cast<VarDecl>(D);
-  if (!VD->hasLocalStorage()) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
-    return;
-  }
-
-  Expr *E = Attr.getArgAsExpr(0);
+static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  Expr *E = AL.getArgAsExpr(0);
   SourceLocation Loc = E->getExprLoc();
   FunctionDecl *FD = nullptr;
   DeclarationNameInfo NI;
 
   // gcc only allows for simple identifiers. Since we support more than gcc, we
   // will warn the user.
-  if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+  if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
     if (DRE->hasQualifier())
       S.Diag(Loc, diag::warn_cleanup_ext);
     FD = dyn_cast<FunctionDecl>(DRE->getDecl());
@@ -3090,7 +2981,7 @@
         << NI.getName();
       return;
     }
-  } else if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(E)) {
+  } else if (auto *ULE = dyn_cast<UnresolvedLookupExpr>(E)) {
     if (ULE->hasExplicitTemplateArgs())
       S.Diag(Loc, diag::warn_cleanup_ext);
     FD = S.ResolveSingleFunctionTemplateSpecialization(ULE, true);
@@ -3115,7 +3006,7 @@
 
   // We're currently more strict than GCC about what function types we accept.
   // If this ever proves to be a problem it should be easy to fix.
-  QualType Ty = S.Context.getPointerType(VD->getType());
+  QualType Ty = S.Context.getPointerType(cast<VarDecl>(D)->getType());
   QualType ParamTy = FD->getParamDecl(0)->getType();
   if (S.CheckAssignmentConstraints(FD->getParamDecl(0)->getLocation(),
                                    ParamTy, Ty) != Sema::Compatible) {
@@ -3125,49 +3016,49 @@
   }
 
   D->addAttr(::new (S.Context)
-             CleanupAttr(Attr.getRange(), S.Context, FD,
-                         Attr.getAttributeSpellingListIndex()));
+             CleanupAttr(AL.getRange(), S.Context, FD,
+                         AL.getAttributeSpellingListIndex()));
 }
 
 static void handleEnumExtensibilityAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-        << Attr.getName() << 0 << AANT_ArgumentIdentifier;
+                                        const AttributeList &AL) {
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+        << AL.getName() << 0 << AANT_ArgumentIdentifier;
     return;
   }
 
   EnumExtensibilityAttr::Kind ExtensibilityKind;
-  IdentifierInfo *II = Attr.getArgAsIdent(0)->Ident;
+  IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
   if (!EnumExtensibilityAttr::ConvertStrToKind(II->getName(),
                                                ExtensibilityKind)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
-        << Attr.getName() << II;
+    S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
+        << AL.getName() << II;
     return;
   }
 
   D->addAttr(::new (S.Context) EnumExtensibilityAttr(
-      Attr.getRange(), S.Context, ExtensibilityKind,
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, ExtensibilityKind,
+      AL.getAttributeSpellingListIndex()));
 }
 
 /// Handle __attribute__((format_arg((idx)))) attribute based on
 /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-static void handleFormatArgAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  Expr *IdxExpr = Attr.getArgAsExpr(0);
-  uint64_t Idx;
-  if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 1, IdxExpr, Idx))
+static void handleFormatArgAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  Expr *IdxExpr = AL.getArgAsExpr(0);
+  ParamIdx Idx;
+  if (!checkFunctionOrMethodParameterIndex(S, D, AL, 1, IdxExpr, Idx))
     return;
 
   // Make sure the format string is really a string.
-  QualType Ty = getFunctionOrMethodParamType(D, Idx);
+  QualType Ty = getFunctionOrMethodParamType(D, Idx.getASTIndex());
 
   bool NotNSStringTy = !isNSStringType(Ty, S.Context);
   if (NotNSStringTy &&
       !isCFStringType(Ty, S.Context) &&
       (!Ty->isPointerType() ||
        !Ty->getAs<PointerType>()->getPointeeType()->isCharType())) {
-    S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
+    S.Diag(AL.getLoc(), diag::err_format_attribute_not)
         << "a string type" << IdxExpr->getSourceRange()
         << getFunctionOrMethodParamRange(D, 0);
     return;
@@ -3177,21 +3068,14 @@
       !isCFStringType(Ty, S.Context) &&
       (!Ty->isPointerType() ||
        !Ty->getAs<PointerType>()->getPointeeType()->isCharType())) {
-    S.Diag(Attr.getLoc(), diag::err_format_attribute_result_not)
+    S.Diag(AL.getLoc(), diag::err_format_attribute_result_not)
         << (NotNSStringTy ? "string type" : "NSString")
         << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, 0);
     return;
   }
 
-  // We cannot use the Idx returned from checkFunctionOrMethodParameterIndex
-  // because that has corrected for the implicit this parameter, and is zero-
-  // based.  The attribute expects what the user wrote explicitly.
-  llvm::APSInt Val;
-  IdxExpr->EvaluateAsInt(Val, S.Context);
-
-  D->addAttr(::new (S.Context)
-             FormatArgAttr(Attr.getRange(), S.Context, Val.getZExtValue(),
-                           Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) FormatArgAttr(
+      AL.getRange(), S.Context, Idx, AL.getAttributeSpellingListIndex()));
 }
 
 enum FormatAttrKind {
@@ -3227,42 +3111,42 @@
 /// Handle __attribute__((init_priority(priority))) attributes based on
 /// http://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html
 static void handleInitPriorityAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
+                                   const AttributeList &AL) {
   if (!S.getLangOpts().CPlusPlus) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
+    S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL.getName();
     return;
   }
   
   if (S.getCurFunctionOrMethodDecl()) {
-    S.Diag(Attr.getLoc(), diag::err_init_priority_object_attr);
-    Attr.setInvalid();
+    S.Diag(AL.getLoc(), diag::err_init_priority_object_attr);
+    AL.setInvalid();
     return;
   }
   QualType T = cast<VarDecl>(D)->getType();
   if (S.Context.getAsArrayType(T))
     T = S.Context.getBaseElementType(T);
   if (!T->getAs<RecordType>()) {
-    S.Diag(Attr.getLoc(), diag::err_init_priority_object_attr);
-    Attr.setInvalid();
+    S.Diag(AL.getLoc(), diag::err_init_priority_object_attr);
+    AL.setInvalid();
     return;
   }
 
-  Expr *E = Attr.getArgAsExpr(0);
+  Expr *E = AL.getArgAsExpr(0);
   uint32_t prioritynum;
-  if (!checkUInt32Argument(S, Attr, E, prioritynum)) {
-    Attr.setInvalid();
+  if (!checkUInt32Argument(S, AL, E, prioritynum)) {
+    AL.setInvalid();
     return;
   }
 
   if (prioritynum < 101 || prioritynum > 65535) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_outof_range)
-      << E->getSourceRange() << Attr.getName() << 101 << 65535;
-    Attr.setInvalid();
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_outof_range)
+      << E->getSourceRange() << AL.getName() << 101 << 65535;
+    AL.setInvalid();
     return;
   }
   D->addAttr(::new (S.Context)
-             InitPriorityAttr(Attr.getRange(), S.Context, prioritynum,
-                              Attr.getAttributeSpellingListIndex()));
+             InitPriorityAttr(AL.getRange(), S.Context, prioritynum,
+                              AL.getAttributeSpellingListIndex()));
 }
 
 FormatAttr *Sema::mergeFormatAttr(Decl *D, SourceRange Range,
@@ -3288,10 +3172,10 @@
 
 /// Handle __attribute__((format(type,idx,firstarg))) attributes based on
 /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << 1 << AANT_ArgumentIdentifier;
+static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+      << AL.getName() << 1 << AANT_ArgumentIdentifier;
     return;
   }
 
@@ -3300,7 +3184,7 @@
   bool HasImplicitThisParam = isInstanceMethod(D);
   unsigned NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
 
-  IdentifierInfo *II = Attr.getArgAsIdent(0)->Ident;
+  IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
   StringRef Format = II->getName();
 
   if (normalizeName(Format)) {
@@ -3315,20 +3199,20 @@
     return;
   
   if (Kind == InvalidFormat) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
-      << Attr.getName() << II->getName();
+    S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
+      << AL.getName() << II->getName();
     return;
   }
 
   // checks for the 2nd argument
-  Expr *IdxExpr = Attr.getArgAsExpr(1);
+  Expr *IdxExpr = AL.getArgAsExpr(1);
   uint32_t Idx;
-  if (!checkUInt32Argument(S, Attr, IdxExpr, Idx, 2))
+  if (!checkUInt32Argument(S, AL, IdxExpr, Idx, 2))
     return;
 
   if (Idx < 1 || Idx > NumArgs) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds)
-      << Attr.getName() << 2 << IdxExpr->getSourceRange();
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
+      << AL.getName() << 2 << IdxExpr->getSourceRange();
     return;
   }
 
@@ -3337,7 +3221,7 @@
 
   if (HasImplicitThisParam) {
     if (ArgIdx == 0) {
-      S.Diag(Attr.getLoc(),
+      S.Diag(AL.getLoc(),
              diag::err_format_attribute_implicit_this_format_string)
         << IdxExpr->getSourceRange();
       return;
@@ -3350,7 +3234,7 @@
 
   if (Kind == CFStringFormat) {
     if (!isCFStringType(Ty, S.Context)) {
-      S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
+      S.Diag(AL.getLoc(), diag::err_format_attribute_not)
         << "a CFString" << IdxExpr->getSourceRange()
         << getFunctionOrMethodParamRange(D, ArgIdx);
       return;
@@ -3359,23 +3243,23 @@
     // FIXME: do we need to check if the type is NSString*?  What are the
     // semantics?
     if (!isNSStringType(Ty, S.Context)) {
-      S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
+      S.Diag(AL.getLoc(), diag::err_format_attribute_not)
         << "an NSString" << IdxExpr->getSourceRange()
         << getFunctionOrMethodParamRange(D, ArgIdx);
       return;
     }
   } else if (!Ty->isPointerType() ||
              !Ty->getAs<PointerType>()->getPointeeType()->isCharType()) {
-    S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
+    S.Diag(AL.getLoc(), diag::err_format_attribute_not)
       << "a string type" << IdxExpr->getSourceRange()
       << getFunctionOrMethodParamRange(D, ArgIdx);
     return;
   }
 
   // check the 3rd argument
-  Expr *FirstArgExpr = Attr.getArgAsExpr(2);
+  Expr *FirstArgExpr = AL.getArgAsExpr(2);
   uint32_t FirstArg;
-  if (!checkUInt32Argument(S, Attr, FirstArgExpr, FirstArg, 3))
+  if (!checkUInt32Argument(S, AL, FirstArgExpr, FirstArg, 3))
     return;
 
   // check if the function is variadic if the 3rd argument non-zero
@@ -3392,43 +3276,43 @@
   // variable the input is just the current time + the format string.
   if (Kind == StrftimeFormat) {
     if (FirstArg != 0) {
-      S.Diag(Attr.getLoc(), diag::err_format_strftime_third_parameter)
+      S.Diag(AL.getLoc(), diag::err_format_strftime_third_parameter)
         << FirstArgExpr->getSourceRange();
       return;
     }
   // if 0 it disables parameter checking (to use with e.g. va_list)
   } else if (FirstArg != 0 && FirstArg != NumArgs) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds)
-      << Attr.getName() << 3 << FirstArgExpr->getSourceRange();
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
+      << AL.getName() << 3 << FirstArgExpr->getSourceRange();
     return;
   }
 
-  FormatAttr *NewAttr = S.mergeFormatAttr(D, Attr.getRange(), II,
+  FormatAttr *NewAttr = S.mergeFormatAttr(D, AL.getRange(), II,
                                           Idx, FirstArg,
-                                          Attr.getAttributeSpellingListIndex());
+                                          AL.getAttributeSpellingListIndex());
   if (NewAttr)
     D->addAttr(NewAttr);
 }
 
 static void handleTransparentUnionAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
+                                       const AttributeList &AL) {
   // Try to find the underlying union declaration.
   RecordDecl *RD = nullptr;
-  TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D);
+  const auto *TD = dyn_cast<TypedefNameDecl>(D);
   if (TD && TD->getUnderlyingType()->isUnionType())
     RD = TD->getUnderlyingType()->getAsUnionType()->getDecl();
   else
     RD = dyn_cast<RecordDecl>(D);
 
   if (!RD || !RD->isUnion()) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedUnion;
+    S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedUnion;
     return;
   }
 
   if (!RD->isCompleteDefinition()) {
     if (!RD->isBeingDefined())
-      S.Diag(Attr.getLoc(),
+      S.Diag(AL.getLoc(),
              diag::warn_transparent_union_attribute_not_definition);
     return;
   }
@@ -3436,7 +3320,7 @@
   RecordDecl::field_iterator Field = RD->field_begin(),
                           FieldEnd = RD->field_end();
   if (Field == FieldEnd) {
-    S.Diag(Attr.getLoc(), diag::warn_transparent_union_attribute_zero_fields);
+    S.Diag(AL.getLoc(), diag::warn_transparent_union_attribute_zero_fields);
     return;
   }
 
@@ -3480,15 +3364,15 @@
   }
 
   RD->addAttr(::new (S.Context)
-              TransparentUnionAttr(Attr.getRange(), S.Context,
-                                   Attr.getAttributeSpellingListIndex()));
+              TransparentUnionAttr(AL.getRange(), S.Context,
+                                   AL.getAttributeSpellingListIndex()));
 }
 
-static void handleAnnotateAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAnnotateAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // Make sure that there is a string literal as the annotation's single
   // argument.
   StringRef Str;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
     return;
 
   // Don't duplicate annotations that are already set.
@@ -3498,14 +3382,14 @@
   }
   
   D->addAttr(::new (S.Context)
-             AnnotateAttr(Attr.getRange(), S.Context, Str,
-                          Attr.getAttributeSpellingListIndex()));
+             AnnotateAttr(AL.getRange(), S.Context, Str,
+                          AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAlignValueAttr(Sema &S, Decl *D,
-                                 const AttributeList &Attr) {
-  S.AddAlignValueAttr(Attr.getRange(), D, Attr.getArgAsExpr(0),
-                      Attr.getAttributeSpellingListIndex());
+                                 const AttributeList &AL) {
+  S.AddAlignValueAttr(AL.getRange(), D, AL.getArgAsExpr(0),
+                      AL.getAttributeSpellingListIndex());
 }
 
 void Sema::AddAlignValueAttr(SourceRange AttrRange, Decl *D, Expr *E,
@@ -3514,9 +3398,9 @@
   SourceLocation AttrLoc = AttrRange.getBegin();
 
   QualType T;
-  if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
+  if (const auto *TD = dyn_cast<TypedefNameDecl>(D))
     T = TD->getUnderlyingType();
-  else if (ValueDecl *VD = dyn_cast<ValueDecl>(D))
+  else if (const auto *VD = dyn_cast<ValueDecl>(D))
     T = VD->getType();
   else
     llvm_unreachable("Unknown decl type for align_value");
@@ -3553,42 +3437,42 @@
   D->addAttr(::new (Context) AlignValueAttr(TmpAttr));
 }
 
-static void handleAlignedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAlignedAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // check the attribute arguments.
-  if (Attr.getNumArgs() > 1) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
-      << Attr.getName() << 1;
+  if (AL.getNumArgs() > 1) {
+    S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments)
+      << AL.getName() << 1;
     return;
   }
 
-  if (Attr.getNumArgs() == 0) {
-    D->addAttr(::new (S.Context) AlignedAttr(Attr.getRange(), S.Context,
-               true, nullptr, Attr.getAttributeSpellingListIndex()));
+  if (AL.getNumArgs() == 0) {
+    D->addAttr(::new (S.Context) AlignedAttr(AL.getRange(), S.Context,
+               true, nullptr, AL.getAttributeSpellingListIndex()));
     return;
   }
 
-  Expr *E = Attr.getArgAsExpr(0);
-  if (Attr.isPackExpansion() && !E->containsUnexpandedParameterPack()) {
-    S.Diag(Attr.getEllipsisLoc(),
+  Expr *E = AL.getArgAsExpr(0);
+  if (AL.isPackExpansion() && !E->containsUnexpandedParameterPack()) {
+    S.Diag(AL.getEllipsisLoc(),
            diag::err_pack_expansion_without_parameter_packs);
     return;
   }
 
-  if (!Attr.isPackExpansion() && S.DiagnoseUnexpandedParameterPack(E))
+  if (!AL.isPackExpansion() && S.DiagnoseUnexpandedParameterPack(E))
     return;
 
   if (E->isValueDependent()) {
     if (const auto *TND = dyn_cast<TypedefNameDecl>(D)) {
       if (!TND->getUnderlyingType()->isDependentType()) {
-        S.Diag(Attr.getLoc(), diag::err_alignment_dependent_typedef_name)
+        S.Diag(AL.getLoc(), diag::err_alignment_dependent_typedef_name)
             << E->getSourceRange();
         return;
       }
     }
   }
 
-  S.AddAlignedAttr(Attr.getRange(), D, E, Attr.getAttributeSpellingListIndex(),
-                   Attr.isPackExpansion());
+  S.AddAlignedAttr(AL.getRange(), D, E, AL.getAttributeSpellingListIndex(),
+                   AL.isPackExpansion());
 }
 
 void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E,
@@ -3612,12 +3496,12 @@
     int DiagKind = -1;
     if (isa<ParmVarDecl>(D)) {
       DiagKind = 0;
-    } else if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
+    } else if (const auto *VD = dyn_cast<VarDecl>(D)) {
       if (VD->getStorageClass() == SC_Register)
         DiagKind = 1;
       if (VD->isExceptionVariable())
         DiagKind = 2;
-    } else if (FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
+    } else if (const auto *FD = dyn_cast<FieldDecl>(D)) {
       if (FD->isBitField())
         DiagKind = 3;
     } else if (!isa<TagDecl>(D)) {
@@ -3641,7 +3525,7 @@
     return;
   }
 
-  // FIXME: Cache the number on the Attr object?
+  // FIXME: Cache the number on the AL object?
   llvm::APSInt Alignment;
   ExprResult ICE
     = VerifyIntegerConstantExpression(E, &Alignment,
@@ -3679,7 +3563,7 @@
     unsigned MaxTLSAlign =
         Context.toCharUnitsFromBits(Context.getTargetInfo().getMaxTLSAlign())
             .getQuantity();
-    auto *VD = dyn_cast<VarDecl>(D);
+    const auto *VD = dyn_cast<VarDecl>(D);
     if (MaxTLSAlign && AlignVal > MaxTLSAlign && VD &&
         VD->getTLSKind() != VarDecl::TLS_None) {
       Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum)
@@ -3696,7 +3580,7 @@
 
 void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, TypeSourceInfo *TS,
                           unsigned SpellingListIndex, bool IsPackExpansion) {
-  // FIXME: Cache the number on the Attr object if non-dependent?
+  // FIXME: Cache the number on the AL object if non-dependent?
   // FIXME: Perform checking of type validity
   AlignedAttr *AA = ::new (Context) AlignedAttr(AttrRange, Context, false, TS,
                                                 SpellingListIndex);
@@ -3708,11 +3592,11 @@
   assert(D->hasAttrs() && "no attributes on decl");
 
   QualType UnderlyingTy, DiagTy;
-  if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) {
+  if (const auto *VD = dyn_cast<ValueDecl>(D)) {
     UnderlyingTy = DiagTy = VD->getType();
   } else {
     UnderlyingTy = DiagTy = Context.getTagDeclType(cast<TagDecl>(D));
-    if (EnumDecl *ED = dyn_cast<EnumDecl>(D))
+    if (const auto *ED = dyn_cast<EnumDecl>(D))
       UnderlyingTy = ED->getIntegerType();
   }
   if (DiagTy->isDependentType() || DiagTy->isIncompleteType())
@@ -3832,18 +3716,18 @@
 /// Despite what would be logical, the mode attribute is a decl attribute, not a
 /// type attribute: 'int ** __attribute((mode(HI))) *G;' tries to make 'G' be
 /// HImode, not an intermediate pointer.
-static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleModeAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // This attribute isn't documented, but glibc uses it.  It changes
   // the width of an int or unsigned int to the specified size.
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName()
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) << AL.getName()
       << AANT_ArgumentIdentifier;
     return;
   }
 
-  IdentifierInfo *Name = Attr.getArgAsIdent(0)->Ident;
+  IdentifierInfo *Name = AL.getArgAsIdent(0)->Ident;
 
-  S.AddModeAttr(Attr.getRange(), D, Name, Attr.getAttributeSpellingListIndex());
+  S.AddModeAttr(AL.getRange(), D, Name, AL.getAttributeSpellingListIndex());
 }
 
 void Sema::AddModeAttr(SourceRange AttrRange, Decl *D, IdentifierInfo *Name,
@@ -3889,9 +3773,9 @@
   }
 
   QualType OldTy;
-  if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
+  if (const auto *TD = dyn_cast<TypedefNameDecl>(D))
     OldTy = TD->getUnderlyingType();
-  else if (EnumDecl *ED = dyn_cast<EnumDecl>(D)) {
+  else if (const auto *ED = dyn_cast<EnumDecl>(D)) {
     // Something like 'typedef enum { X } __attribute__((mode(XX))) T;'.
     // Try to get type from enum declaration, default to int.
     OldTy = ED->getIntegerType();
@@ -3909,7 +3793,7 @@
   // Base type can also be a vector type (see PR17453).
   // Distinguish between base type and base element type.
   QualType OldElemTy = OldTy;
-  if (const VectorType *VT = OldTy->getAs<VectorType>())
+  if (const auto *VT = OldTy->getAs<VectorType>())
     OldElemTy = VT->getElementType();
 
   // GCC allows 'mode' attribute on enumeration types (even incomplete), except
@@ -3958,7 +3842,7 @@
   if (VectorSize.getBoolValue()) {
     NewTy = Context.getVectorType(NewTy, VectorSize.getZExtValue(),
                                   VectorType::GenericVector);
-  } else if (const VectorType *OldVT = OldTy->getAs<VectorType>()) {
+  } else if (const auto *OldVT = OldTy->getAs<VectorType>()) {
     // Complex machine mode does not support base vector types.
     if (ComplexMode) {
       Diag(AttrLoc, diag::err_complex_mode_vector_type);
@@ -3977,9 +3861,9 @@
   }
 
   // Install the new type.
-  if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
+  if (auto *TD = dyn_cast<TypedefNameDecl>(D))
     TD->setModedTypeSourceInfo(TD->getTypeSourceInfo(), NewTy);
-  else if (EnumDecl *ED = dyn_cast<EnumDecl>(D))
+  else if (auto *ED = dyn_cast<EnumDecl>(D))
     ED->setIntegerType(NewTy);
   else
     cast<ValueDecl>(D)->setType(NewTy);
@@ -3988,10 +3872,10 @@
              ModeAttr(AttrRange, Context, Name, SpellingListIndex));
 }
 
-static void handleNoDebugAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleNoDebugAttr(Sema &S, Decl *D, const AttributeList &AL) {
   D->addAttr(::new (S.Context)
-             NoDebugAttr(Attr.getRange(), S.Context,
-                         Attr.getAttributeSpellingListIndex()));
+             NoDebugAttr(AL.getRange(), S.Context,
+                         AL.getAttributeSpellingListIndex()));
 }
 
 AlwaysInlineAttr *Sema::mergeAlwaysInlineAttr(Decl *D, SourceRange Range,
@@ -4023,7 +3907,7 @@
 Sema::mergeInternalLinkageAttr(Decl *D, SourceRange Range,
                                IdentifierInfo *Ident,
                                unsigned AttrSpellingListIndex) {
-  if (auto VD = dyn_cast<VarDecl>(D)) {
+  if (const auto *VD = dyn_cast<VarDecl>(D)) {
     // Attribute applies to Var but not any subclass of it (like ParmVar,
     // ImplicitParm or VarTemplateSpecialization).
     if (VD->getKind() != Decl::Var) {
@@ -4081,70 +3965,71 @@
 }
 
 static void handleAlwaysInlineAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<NotTailCalledAttr>(S, D, Attr.getRange(),
-                                                  Attr.getName()))
+                                   const AttributeList &AL) {
+  if (checkAttrMutualExclusion<NotTailCalledAttr>(S, D, AL.getRange(),
+                                                  AL.getName()))
     return;
 
   if (AlwaysInlineAttr *Inline = S.mergeAlwaysInlineAttr(
-          D, Attr.getRange(), Attr.getName(),
-          Attr.getAttributeSpellingListIndex()))
+          D, AL.getRange(), AL.getName(),
+          AL.getAttributeSpellingListIndex()))
     D->addAttr(Inline);
 }
 
-static void handleMinSizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleMinSizeAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (MinSizeAttr *MinSize = S.mergeMinSizeAttr(
-          D, Attr.getRange(), Attr.getAttributeSpellingListIndex()))
+          D, AL.getRange(), AL.getAttributeSpellingListIndex()))
     D->addAttr(MinSize);
 }
 
 static void handleOptimizeNoneAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
+                                   const AttributeList &AL) {
   if (OptimizeNoneAttr *Optnone = S.mergeOptimizeNoneAttr(
-          D, Attr.getRange(), Attr.getAttributeSpellingListIndex()))
+          D, AL.getRange(), AL.getAttributeSpellingListIndex()))
     D->addAttr(Optnone);
 }
 
-static void handleConstantAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CUDASharedAttr>(S, D, Attr.getRange(),
-                                               Attr.getName()))
+static void handleConstantAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (checkAttrMutualExclusion<CUDASharedAttr>(S, D, AL.getRange(),
+                                               AL.getName()))
     return;
-  auto *VD = cast<VarDecl>(D);
+  const auto *VD = cast<VarDecl>(D);
   if (!VD->hasGlobalStorage()) {
-    S.Diag(Attr.getLoc(), diag::err_cuda_nonglobal_constant);
+    S.Diag(AL.getLoc(), diag::err_cuda_nonglobal_constant);
     return;
   }
   D->addAttr(::new (S.Context) CUDAConstantAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
-static void handleSharedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, Attr.getRange(),
-                                                 Attr.getName()))
+static void handleSharedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL.getRange(),
+                                                 AL.getName()))
     return;
-  auto *VD = cast<VarDecl>(D);
+  const auto *VD = cast<VarDecl>(D);
   // extern __shared__ is only allowed on arrays with no length (e.g.
   // "int x[]").
-  if (VD->hasExternalStorage() && !isa<IncompleteArrayType>(VD->getType())) {
-    S.Diag(Attr.getLoc(), diag::err_cuda_extern_shared) << VD;
+  if (!S.getLangOpts().CUDARelocatableDeviceCode && VD->hasExternalStorage() &&
+      !isa<IncompleteArrayType>(VD->getType())) {
+    S.Diag(AL.getLoc(), diag::err_cuda_extern_shared) << VD;
     return;
   }
   if (S.getLangOpts().CUDA && VD->hasLocalStorage() &&
-      S.CUDADiagIfHostCode(Attr.getLoc(), diag::err_cuda_host_shared)
+      S.CUDADiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared)
           << S.CurrentCUDATarget())
     return;
   D->addAttr(::new (S.Context) CUDASharedAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
-static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, Attr.getRange(),
-                                               Attr.getName()) ||
-      checkAttrMutualExclusion<CUDAHostAttr>(S, D, Attr.getRange(),
-                                             Attr.getName())) {
+static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, AL.getRange(),
+                                               AL.getName()) ||
+      checkAttrMutualExclusion<CUDAHostAttr>(S, D, AL.getRange(),
+                                             AL.getName())) {
     return;
   }
-  FunctionDecl *FD = cast<FunctionDecl>(D);
+  const auto *FD = cast<FunctionDecl>(D);
   if (!FD->getReturnType()->isVoidType()) {
     SourceRange RTRange = FD->getReturnTypeSourceRange();
     S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
@@ -4166,86 +4051,86 @@
     S.Diag(FD->getLocStart(), diag::warn_kern_is_inline) << FD;
 
   D->addAttr(::new (S.Context)
-              CUDAGlobalAttr(Attr.getRange(), S.Context,
-                             Attr.getAttributeSpellingListIndex()));
+              CUDAGlobalAttr(AL.getRange(), S.Context,
+                             AL.getAttributeSpellingListIndex()));
 }
 
-static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  FunctionDecl *Fn = cast<FunctionDecl>(D);
+static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  const auto *Fn = cast<FunctionDecl>(D);
   if (!Fn->isInlineSpecified()) {
-    S.Diag(Attr.getLoc(), diag::warn_gnu_inline_attribute_requires_inline);
+    S.Diag(AL.getLoc(), diag::warn_gnu_inline_attribute_requires_inline);
     return;
   }
 
   D->addAttr(::new (S.Context)
-             GNUInlineAttr(Attr.getRange(), S.Context,
-                           Attr.getAttributeSpellingListIndex()));
+             GNUInlineAttr(AL.getRange(), S.Context,
+                           AL.getAttributeSpellingListIndex()));
 }
 
-static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (hasDeclarator(D)) return;
 
-  // Diagnostic is emitted elsewhere: here we store the (valid) Attr
+  // Diagnostic is emitted elsewhere: here we store the (valid) AL
   // in the Decl node for syntactic reasoning, e.g., pretty-printing.
   CallingConv CC;
-  if (S.CheckCallingConvAttr(Attr, CC, /*FD*/nullptr))
+  if (S.CheckCallingConvAttr(AL, CC, /*FD*/nullptr))
     return;
 
   if (!isa<ObjCMethodDecl>(D)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedFunctionOrMethod;
+    S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedFunctionOrMethod;
     return;
   }
 
-  switch (Attr.getKind()) {
+  switch (AL.getKind()) {
   case AttributeList::AT_FastCall:
     D->addAttr(::new (S.Context)
-               FastCallAttr(Attr.getRange(), S.Context,
-                            Attr.getAttributeSpellingListIndex()));
+               FastCallAttr(AL.getRange(), S.Context,
+                            AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_StdCall:
     D->addAttr(::new (S.Context)
-               StdCallAttr(Attr.getRange(), S.Context,
-                           Attr.getAttributeSpellingListIndex()));
+               StdCallAttr(AL.getRange(), S.Context,
+                           AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_ThisCall:
     D->addAttr(::new (S.Context)
-               ThisCallAttr(Attr.getRange(), S.Context,
-                            Attr.getAttributeSpellingListIndex()));
+               ThisCallAttr(AL.getRange(), S.Context,
+                            AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_CDecl:
     D->addAttr(::new (S.Context)
-               CDeclAttr(Attr.getRange(), S.Context,
-                         Attr.getAttributeSpellingListIndex()));
+               CDeclAttr(AL.getRange(), S.Context,
+                         AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_Pascal:
     D->addAttr(::new (S.Context)
-               PascalAttr(Attr.getRange(), S.Context,
-                          Attr.getAttributeSpellingListIndex()));
+               PascalAttr(AL.getRange(), S.Context,
+                          AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_SwiftCall:
     D->addAttr(::new (S.Context)
-               SwiftCallAttr(Attr.getRange(), S.Context,
-                             Attr.getAttributeSpellingListIndex()));
+               SwiftCallAttr(AL.getRange(), S.Context,
+                             AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_VectorCall:
     D->addAttr(::new (S.Context)
-               VectorCallAttr(Attr.getRange(), S.Context,
-                              Attr.getAttributeSpellingListIndex()));
+               VectorCallAttr(AL.getRange(), S.Context,
+                              AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_MSABI:
     D->addAttr(::new (S.Context)
-               MSABIAttr(Attr.getRange(), S.Context,
-                         Attr.getAttributeSpellingListIndex()));
+               MSABIAttr(AL.getRange(), S.Context,
+                         AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_SysVABI:
     D->addAttr(::new (S.Context)
-               SysVABIAttr(Attr.getRange(), S.Context,
-                           Attr.getAttributeSpellingListIndex()));
+               SysVABIAttr(AL.getRange(), S.Context,
+                           AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_RegCall:
     D->addAttr(::new (S.Context) RegCallAttr(
-        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+        AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_Pcs: {
     PcsAttr::PCSType PCS;
@@ -4261,37 +4146,37 @@
     }
 
     D->addAttr(::new (S.Context)
-               PcsAttr(Attr.getRange(), S.Context, PCS,
-                       Attr.getAttributeSpellingListIndex()));
+               PcsAttr(AL.getRange(), S.Context, PCS,
+                       AL.getAttributeSpellingListIndex()));
     return;
   }
   case AttributeList::AT_IntelOclBicc:
     D->addAttr(::new (S.Context)
-               IntelOclBiccAttr(Attr.getRange(), S.Context,
-                                Attr.getAttributeSpellingListIndex()));
+               IntelOclBiccAttr(AL.getRange(), S.Context,
+                                AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_PreserveMost:
     D->addAttr(::new (S.Context) PreserveMostAttr(
-        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+        AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     return;
   case AttributeList::AT_PreserveAll:
     D->addAttr(::new (S.Context) PreserveAllAttr(
-        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+        AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
     return;
   default:
     llvm_unreachable("unexpected attribute kind");
   }
 }
 
-static void handleSuppressAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+static void handleSuppressAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
 
   std::vector<StringRef> DiagnosticIdentifiers;
-  for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
     StringRef RuleName;
 
-    if (!S.checkStringLiteralArgumentAttr(Attr, I, RuleName, nullptr))
+    if (!S.checkStringLiteralArgumentAttr(AL, I, RuleName, nullptr))
       return;
 
     // FIXME: Warn if the rule name is unknown. This is tricky because only
@@ -4299,8 +4184,8 @@
     DiagnosticIdentifiers.push_back(RuleName);
   }
   D->addAttr(::new (S.Context) SuppressAttr(
-      Attr.getRange(), S.Context, DiagnosticIdentifiers.data(),
-      DiagnosticIdentifiers.size(), Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, DiagnosticIdentifiers.data(),
+      DiagnosticIdentifiers.size(), AL.getAttributeSpellingListIndex()));
 }
 
 bool Sema::CheckCallingConvAttr(const AttributeList &Attrs, CallingConv &CC, 
@@ -4382,36 +4267,36 @@
 }
 
 /// Pointer-like types in the default address space.
-static bool isValidSwiftContextType(QualType type) {
-  if (!type->hasPointerRepresentation())
-    return type->isDependentType();
-  return type->getPointeeType().getAddressSpace() == LangAS::Default;
+static bool isValidSwiftContextType(QualType Ty) {
+  if (!Ty->hasPointerRepresentation())
+    return Ty->isDependentType();
+  return Ty->getPointeeType().getAddressSpace() == LangAS::Default;
 }
 
 /// Pointers and references in the default address space.
-static bool isValidSwiftIndirectResultType(QualType type) {
-  if (auto ptrType = type->getAs<PointerType>()) {
-    type = ptrType->getPointeeType();
-  } else if (auto refType = type->getAs<ReferenceType>()) {
-    type = refType->getPointeeType();
+static bool isValidSwiftIndirectResultType(QualType Ty) {
+  if (const auto *PtrType = Ty->getAs<PointerType>()) {
+    Ty = PtrType->getPointeeType();
+  } else if (const auto *RefType = Ty->getAs<ReferenceType>()) {
+    Ty = RefType->getPointeeType();
   } else {
-    return type->isDependentType();
+    return Ty->isDependentType();
   }
-  return type.getAddressSpace() == LangAS::Default;
+  return Ty.getAddressSpace() == LangAS::Default;
 }
 
 /// Pointers and references to pointers in the default address space.
-static bool isValidSwiftErrorResultType(QualType type) {
-  if (auto ptrType = type->getAs<PointerType>()) {
-    type = ptrType->getPointeeType();
-  } else if (auto refType = type->getAs<ReferenceType>()) {
-    type = refType->getPointeeType();
+static bool isValidSwiftErrorResultType(QualType Ty) {
+  if (const auto *PtrType = Ty->getAs<PointerType>()) {
+    Ty = PtrType->getPointeeType();
+  } else if (const auto *RefType = Ty->getAs<ReferenceType>()) {
+    Ty = RefType->getPointeeType();
   } else {
-    return type->isDependentType();
+    return Ty->isDependentType();
   }
-  if (!type.getQualifiers().empty())
+  if (!Ty.getQualifiers().empty())
     return false;
-  return isValidSwiftContextType(type);
+  return isValidSwiftContextType(Ty);
 }
 
 static void handleParameterABIAttr(Sema &S, Decl *D, const AttributeList &Attrs,
@@ -4473,34 +4358,34 @@
 
 /// Checks a regparm attribute, returning true if it is ill-formed and
 /// otherwise setting numParams to the appropriate value.
-bool Sema::CheckRegparmAttr(const AttributeList &Attr, unsigned &numParams) {
-  if (Attr.isInvalid())
+bool Sema::CheckRegparmAttr(const AttributeList &AL, unsigned &numParams) {
+  if (AL.isInvalid())
     return true;
 
-  if (!checkAttributeNumArgs(*this, Attr, 1)) {
-    Attr.setInvalid();
+  if (!checkAttributeNumArgs(*this, AL, 1)) {
+    AL.setInvalid();
     return true;
   }
 
   uint32_t NP;
-  Expr *NumParamsExpr = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(*this, Attr, NumParamsExpr, NP)) {
-    Attr.setInvalid();
+  Expr *NumParamsExpr = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(*this, AL, NumParamsExpr, NP)) {
+    AL.setInvalid();
     return true;
   }
 
   if (Context.getTargetInfo().getRegParmMax() == 0) {
-    Diag(Attr.getLoc(), diag::err_attribute_regparm_wrong_platform)
+    Diag(AL.getLoc(), diag::err_attribute_regparm_wrong_platform)
       << NumParamsExpr->getSourceRange();
-    Attr.setInvalid();
+    AL.setInvalid();
     return true;
   }
 
   numParams = NP;
   if (numParams > Context.getTargetInfo().getRegParmMax()) {
-    Diag(Attr.getLoc(), diag::err_attribute_regparm_invalid_number)
+    Diag(AL.getLoc(), diag::err_attribute_regparm_invalid_number)
       << Context.getTargetInfo().getRegParmMax() << NumParamsExpr->getSourceRange();
-    Attr.setInvalid();
+    AL.setInvalid();
     return true;
   }
 
@@ -4512,7 +4397,7 @@
 // non-nullptr Expr result on success. Otherwise, it returns nullptr
 // and may output an error.
 static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
-                                     const CUDALaunchBoundsAttr &Attr,
+                                     const CUDALaunchBoundsAttr &AL,
                                      const unsigned Idx) {
   if (S.DiagnoseUnexpandedParameterPack(E))
     return nullptr;
@@ -4525,7 +4410,7 @@
   llvm::APSInt I(64);
   if (!E->isIntegerConstantExpr(I, S.Context)) {
     S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
-        << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+        << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
     return nullptr;
   }
   // Make sure we can fit it in 32 bits.
@@ -4536,7 +4421,7 @@
   }
   if (I < 0)
     S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
-        << &Attr << Idx << E->getSourceRange();
+        << &AL << Idx << E->getSourceRange();
 
   // We may need to perform implicit conversion of the argument.
   InitializedEntity Entity = InitializedEntity::InitializeParameter(
@@ -4567,205 +4452,185 @@
 }
 
 static void handleLaunchBoundsAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
-      !checkAttributeAtMostNumArgs(S, Attr, 2))
+                                   const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1) ||
+      !checkAttributeAtMostNumArgs(S, AL, 2))
     return;
 
-  S.AddLaunchBoundsAttr(Attr.getRange(), D, Attr.getArgAsExpr(0),
-                        Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr,
-                        Attr.getAttributeSpellingListIndex());
+  S.AddLaunchBoundsAttr(AL.getRange(), D, AL.getArgAsExpr(0),
+                        AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
+                        AL.getAttributeSpellingListIndex());
 }
 
 static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
-                                          const AttributeList &Attr) {
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << /* arg num = */ 1 << AANT_ArgumentIdentifier;
-    return;
-  }
-  
-  if (!checkAttributeNumArgs(S, Attr, 3))
-    return;
-
-  IdentifierInfo *ArgumentKind = Attr.getArgAsIdent(0)->Ident;
-
-  if (!isFunctionOrMethod(D) || !hasFunctionProto(D)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedFunctionOrMethod;
+                                          const AttributeList &AL) {
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+      << AL.getName() << /* arg num = */ 1 << AANT_ArgumentIdentifier;
     return;
   }
 
-  uint64_t ArgumentIdx;
-  if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 2, Attr.getArgAsExpr(1),
+  ParamIdx ArgumentIdx;
+  if (!checkFunctionOrMethodParameterIndex(S, D, AL, 2, AL.getArgAsExpr(1),
                                            ArgumentIdx))
     return;
 
-  uint64_t TypeTagIdx;
-  if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 3, Attr.getArgAsExpr(2),
+  ParamIdx TypeTagIdx;
+  if (!checkFunctionOrMethodParameterIndex(S, D, AL, 3, AL.getArgAsExpr(2),
                                            TypeTagIdx))
     return;
 
-  bool IsPointer = (Attr.getName()->getName() == "pointer_with_type_tag");
+  bool IsPointer = AL.getName()->getName() == "pointer_with_type_tag";
   if (IsPointer) {
     // Ensure that buffer has a pointer type.
-    QualType BufferTy = getFunctionOrMethodParamType(D, ArgumentIdx);
-    if (!BufferTy->isPointerType()) {
-      S.Diag(Attr.getLoc(), diag::err_attribute_pointers_only)
-        << Attr.getName() << 0;
-    }
+    unsigned ArgumentIdxAST = ArgumentIdx.getASTIndex();
+    if (ArgumentIdxAST >= getFunctionOrMethodNumParams(D) ||
+        !getFunctionOrMethodParamType(D, ArgumentIdxAST)->isPointerType())
+      S.Diag(AL.getLoc(), diag::err_attribute_pointers_only)
+          << AL.getName() << 0;
   }
 
-  D->addAttr(::new (S.Context)
-             ArgumentWithTypeTagAttr(Attr.getRange(), S.Context, ArgumentKind,
-                                     ArgumentIdx, TypeTagIdx, IsPointer,
-                                     Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) ArgumentWithTypeTagAttr(
+      AL.getRange(), S.Context, AL.getArgAsIdent(0)->Ident, ArgumentIdx,
+      TypeTagIdx, IsPointer, AL.getAttributeSpellingListIndex()));
 }
 
 static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D,
-                                         const AttributeList &Attr) {
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
-      << Attr.getName() << 1 << AANT_ArgumentIdentifier;
+                                         const AttributeList &AL) {
+  if (!AL.isArgIdent(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+      << AL.getName() << 1 << AANT_ArgumentIdentifier;
     return;
   }
   
-  if (!checkAttributeNumArgs(S, Attr, 1))
+  if (!checkAttributeNumArgs(S, AL, 1))
     return;
 
   if (!isa<VarDecl>(D)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type)
-      << Attr.getName() << ExpectedVariable;
+    S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedVariable;
     return;
   }
 
-  IdentifierInfo *PointerKind = Attr.getArgAsIdent(0)->Ident;
+  IdentifierInfo *PointerKind = AL.getArgAsIdent(0)->Ident;
   TypeSourceInfo *MatchingCTypeLoc = nullptr;
-  S.GetTypeFromParser(Attr.getMatchingCType(), &MatchingCTypeLoc);
+  S.GetTypeFromParser(AL.getMatchingCType(), &MatchingCTypeLoc);
   assert(MatchingCTypeLoc && "no type source info for attribute argument");
 
   D->addAttr(::new (S.Context)
-             TypeTagForDatatypeAttr(Attr.getRange(), S.Context, PointerKind,
+             TypeTagForDatatypeAttr(AL.getRange(), S.Context, PointerKind,
                                     MatchingCTypeLoc,
-                                    Attr.getLayoutCompatible(),
-                                    Attr.getMustBeNull(),
-                                    Attr.getAttributeSpellingListIndex()));
+                                    AL.getLayoutCompatible(),
+                                    AL.getMustBeNull(),
+                                    AL.getAttributeSpellingListIndex()));
 }
 
-static void handleXRayLogArgsAttr(Sema &S, Decl *D,
-                                  const AttributeList &Attr) {
-  uint64_t ArgCount;
+static void handleXRayLogArgsAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  ParamIdx ArgCount;
 
-  if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 1, Attr.getArgAsExpr(0),
+  if (!checkFunctionOrMethodParameterIndex(S, D, AL, 1, AL.getArgAsExpr(0),
                                            ArgCount,
-                                           true /* AllowImplicitThis*/))
+                                           true /* CanIndexImplicitThis */))
     return;
 
-  // ArgCount isn't a parameter index [0;n), it's a count [1;n] - hence + 1.
-  D->addAttr(::new (S.Context)
-                 XRayLogArgsAttr(Attr.getRange(), S.Context, ++ArgCount,
-                                 Attr.getAttributeSpellingListIndex()));
+  // ArgCount isn't a parameter index [0;n), it's a count [1;n]
+  D->addAttr(::new (S.Context) XRayLogArgsAttr(
+      AL.getRange(), S.Context, ArgCount.getSourceIndex(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 //===----------------------------------------------------------------------===//
 // Checker-specific attribute handlers.
 //===----------------------------------------------------------------------===//
 
-static bool isValidSubjectOfNSReturnsRetainedAttribute(QualType type) {
-  return type->isDependentType() ||
-         type->isObjCRetainableType();
+static bool isValidSubjectOfNSReturnsRetainedAttribute(QualType QT) {
+  return QT->isDependentType() || QT->isObjCRetainableType();
 }
 
-static bool isValidSubjectOfNSAttribute(Sema &S, QualType type) {
-  return type->isDependentType() || 
-         type->isObjCObjectPointerType() || 
-         S.Context.isObjCNSObjectType(type);
+static bool isValidSubjectOfNSAttribute(Sema &S, QualType QT) {
+  return QT->isDependentType() || QT->isObjCObjectPointerType() ||
+         S.Context.isObjCNSObjectType(QT);
 }
 
-static bool isValidSubjectOfCFAttribute(Sema &S, QualType type) {
-  return type->isDependentType() || 
-         type->isPointerType() || 
-         isValidSubjectOfNSAttribute(S, type);
+static bool isValidSubjectOfCFAttribute(Sema &S, QualType QT) {
+  return QT->isDependentType() || QT->isPointerType() ||
+         isValidSubjectOfNSAttribute(S, QT);
 }
 
-static void handleNSConsumedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  S.AddNSConsumedAttr(Attr.getRange(), D, Attr.getAttributeSpellingListIndex(),
-                      Attr.getKind() == AttributeList::AT_NSConsumed,
+static void handleNSConsumedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  S.AddNSConsumedAttr(AL.getRange(), D, AL.getAttributeSpellingListIndex(),
+                      AL.getKind() == AttributeList::AT_NSConsumed,
                       /*template instantiation*/ false);
 }
 
-void Sema::AddNSConsumedAttr(SourceRange attrRange, Decl *D,
-                             unsigned spellingIndex, bool isNSConsumed,
-                             bool isTemplateInstantiation) {
-  ParmVarDecl *param = cast<ParmVarDecl>(D);
-  bool typeOK;
+void Sema::AddNSConsumedAttr(SourceRange AttrRange, Decl *D,
+                             unsigned SpellingIndex, bool IsNSConsumed,
+                             bool IsTemplateInstantiation) {
+  const auto *Param = cast<ParmVarDecl>(D);
+  bool TypeOK;
 
-  if (isNSConsumed) {
-    typeOK = isValidSubjectOfNSAttribute(*this, param->getType());
-  } else {
-    typeOK = isValidSubjectOfCFAttribute(*this, param->getType());
-  }
+  if (IsNSConsumed)
+    TypeOK = isValidSubjectOfNSAttribute(*this, Param->getType());
+  else
+    TypeOK = isValidSubjectOfCFAttribute(*this, Param->getType());
 
-  if (!typeOK) {
+  if (!TypeOK) {
     // These attributes are normally just advisory, but in ARC, ns_consumed
     // is significant.  Allow non-dependent code to contain inappropriate
     // attributes even in ARC, but require template instantiations to be
     // set up correctly.
-    Diag(D->getLocStart(),
-         (isTemplateInstantiation && isNSConsumed &&
-            getLangOpts().ObjCAutoRefCount
-          ? diag::err_ns_attribute_wrong_parameter_type
-          : diag::warn_ns_attribute_wrong_parameter_type))
-      << attrRange
-      << (isNSConsumed ? "ns_consumed" : "cf_consumed")
-      << (isNSConsumed ? /*objc pointers*/ 0 : /*cf pointers*/ 1);
+    Diag(D->getLocStart(), (IsTemplateInstantiation && IsNSConsumed &&
+                                    getLangOpts().ObjCAutoRefCount
+                                ? diag::err_ns_attribute_wrong_parameter_type
+                                : diag::warn_ns_attribute_wrong_parameter_type))
+        << AttrRange << (IsNSConsumed ? "ns_consumed" : "cf_consumed")
+        << (IsNSConsumed ? /*objc pointers*/ 0 : /*cf pointers*/ 1);
     return;
   }
 
-  if (isNSConsumed)
-    param->addAttr(::new (Context)
-                   NSConsumedAttr(attrRange, Context, spellingIndex));
+  if (IsNSConsumed)
+    D->addAttr(::new (Context)
+                   NSConsumedAttr(AttrRange, Context, SpellingIndex));
   else
-    param->addAttr(::new (Context)
-                   CFConsumedAttr(attrRange, Context, spellingIndex));
+    D->addAttr(::new (Context)
+                   CFConsumedAttr(AttrRange, Context, SpellingIndex));
 }
 
-bool Sema::checkNSReturnsRetainedReturnType(SourceLocation loc,
-                                            QualType type) {
-  if (isValidSubjectOfNSReturnsRetainedAttribute(type))
+bool Sema::checkNSReturnsRetainedReturnType(SourceLocation Loc, QualType QT) {
+  if (isValidSubjectOfNSReturnsRetainedAttribute(QT))
     return false;
 
-  Diag(loc, diag::warn_ns_attribute_wrong_return_type)
-    << "'ns_returns_retained'" << 0 << 0;
+  Diag(Loc, diag::warn_ns_attribute_wrong_return_type)
+      << "'ns_returns_retained'" << 0 << 0;
   return true;
 }
 
 static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
-  QualType returnType;
+                                        const AttributeList &AL) {
+  QualType ReturnType;
 
-  if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
-    returnType = MD->getReturnType();
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
+    ReturnType = MD->getReturnType();
   else if (S.getLangOpts().ObjCAutoRefCount && hasDeclarator(D) &&
-           (Attr.getKind() == AttributeList::AT_NSReturnsRetained))
+           (AL.getKind() == AttributeList::AT_NSReturnsRetained))
     return; // ignore: was handled as a type attribute
-  else if (ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(D))
-    returnType = PD->getType();
-  else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-    returnType = FD->getReturnType();
-  else if (auto *Param = dyn_cast<ParmVarDecl>(D)) {
-    returnType = Param->getType()->getPointeeType();
-    if (returnType.isNull()) {
+  else if (const auto *PD = dyn_cast<ObjCPropertyDecl>(D))
+    ReturnType = PD->getType();
+  else if (const auto *FD = dyn_cast<FunctionDecl>(D))
+    ReturnType = FD->getReturnType();
+  else if (const auto *Param = dyn_cast<ParmVarDecl>(D)) {
+    ReturnType = Param->getType()->getPointeeType();
+    if (ReturnType.isNull()) {
       S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
-          << Attr.getName() << /*pointer-to-CF*/2
-          << Attr.getRange();
+          << AL.getName() << /*pointer-to-CF*/2
+          << AL.getRange();
       return;
     }
-  } else if (Attr.isUsedAsTypeAttr()) {
+  } else if (AL.isUsedAsTypeAttr()) {
     return;
   } else {
     AttributeDeclKind ExpectedDeclKind;
-    switch (Attr.getKind()) {
+    switch (AL.getKind()) {
     default: llvm_unreachable("invalid ownership attribute");
     case AttributeList::AT_NSReturnsRetained:
     case AttributeList::AT_NSReturnsAutoreleased:
@@ -4779,40 +4644,40 @@
       break;
     }
     S.Diag(D->getLocStart(), diag::warn_attribute_wrong_decl_type)
-        << Attr.getRange() << Attr.getName() << ExpectedDeclKind;
+        << AL.getRange() << AL.getName() << ExpectedDeclKind;
     return;
   }
 
-  bool typeOK;
-  bool cf;
-  switch (Attr.getKind()) {
+  bool TypeOK;
+  bool Cf;
+  switch (AL.getKind()) {
   default: llvm_unreachable("invalid ownership attribute");
   case AttributeList::AT_NSReturnsRetained:
-    typeOK = isValidSubjectOfNSReturnsRetainedAttribute(returnType);
-    cf = false;
+    TypeOK = isValidSubjectOfNSReturnsRetainedAttribute(ReturnType);
+    Cf = false;
     break;
       
   case AttributeList::AT_NSReturnsAutoreleased:
   case AttributeList::AT_NSReturnsNotRetained:
-    typeOK = isValidSubjectOfNSAttribute(S, returnType);
-    cf = false;
+    TypeOK = isValidSubjectOfNSAttribute(S, ReturnType);
+    Cf = false;
     break;
 
   case AttributeList::AT_CFReturnsRetained:
   case AttributeList::AT_CFReturnsNotRetained:
-    typeOK = isValidSubjectOfCFAttribute(S, returnType);
-    cf = true;
+    TypeOK = isValidSubjectOfCFAttribute(S, ReturnType);
+    Cf = true;
     break;
   }
 
-  if (!typeOK) {
-    if (Attr.isUsedAsTypeAttr())
+  if (!TypeOK) {
+    if (AL.isUsedAsTypeAttr())
       return;
 
     if (isa<ParmVarDecl>(D)) {
       S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
-          << Attr.getName() << /*pointer-to-CF*/2
-          << Attr.getRange();
+          << AL.getName() << /*pointer-to-CF*/2
+          << AL.getRange();
     } else {
       // Needs to be kept in sync with warn_ns_attribute_wrong_return_type.
       enum : unsigned {
@@ -4825,34 +4690,34 @@
       else if (isa<ObjCPropertyDecl>(D))
         SubjectKind = Property;
       S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_return_type)
-          << Attr.getName() << SubjectKind << cf
-          << Attr.getRange();
+          << AL.getName() << SubjectKind << Cf
+          << AL.getRange();
     }
     return;
   }
 
-  switch (Attr.getKind()) {
+  switch (AL.getKind()) {
     default:
       llvm_unreachable("invalid ownership attribute");
     case AttributeList::AT_NSReturnsAutoreleased:
       D->addAttr(::new (S.Context) NSReturnsAutoreleasedAttr(
-          Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+          AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
       return;
     case AttributeList::AT_CFReturnsNotRetained:
       D->addAttr(::new (S.Context) CFReturnsNotRetainedAttr(
-          Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+          AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
       return;
     case AttributeList::AT_NSReturnsNotRetained:
       D->addAttr(::new (S.Context) NSReturnsNotRetainedAttr(
-          Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+          AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
       return;
     case AttributeList::AT_CFReturnsRetained:
       D->addAttr(::new (S.Context) CFReturnsRetainedAttr(
-          Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+          AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
       return;
     case AttributeList::AT_NSReturnsRetained:
       D->addAttr(::new (S.Context) NSReturnsRetainedAttr(
-          Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+          AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
       return;
   };
 }
@@ -4887,115 +4752,90 @@
 
 static void handleObjCRequiresSuperAttr(Sema &S, Decl *D,
                                         const AttributeList &Attrs) {
-  ObjCMethodDecl *method = cast<ObjCMethodDecl>(D);
-  
-  DeclContext *DC = method->getDeclContext();
-  if (const ObjCProtocolDecl *PDecl = dyn_cast_or_null<ObjCProtocolDecl>(DC)) {
+  const auto *Method = cast<ObjCMethodDecl>(D);
+
+  const DeclContext *DC = Method->getDeclContext();
+  if (const auto *PDecl = dyn_cast_or_null<ObjCProtocolDecl>(DC)) {
     S.Diag(D->getLocStart(), diag::warn_objc_requires_super_protocol)
-    << Attrs.getName() << 0;
+        << Attrs.getName() << 0;
     S.Diag(PDecl->getLocation(), diag::note_protocol_decl);
     return;
   }
-  if (method->getMethodFamily() == OMF_dealloc) {
+  if (Method->getMethodFamily() == OMF_dealloc) {
     S.Diag(D->getLocStart(), diag::warn_objc_requires_super_protocol)
-    << Attrs.getName() << 1;
+        << Attrs.getName() << 1;
     return;
   }
-  
-  method->addAttr(::new (S.Context)
-                  ObjCRequiresSuperAttr(Attrs.getRange(), S.Context,
-                                        Attrs.getAttributeSpellingListIndex()));
+
+  D->addAttr(::new (S.Context) ObjCRequiresSuperAttr(
+      Attrs.getRange(), S.Context, Attrs.getAttributeSpellingListIndex()));
 }
 
-static void handleCFAuditedTransferAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CFUnknownTransferAttr>(S, D, Attr.getRange(),
-                                                      Attr.getName()))
-    return;
-
-  D->addAttr(::new (S.Context)
-             CFAuditedTransferAttr(Attr.getRange(), S.Context,
-                                   Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleCFUnknownTransferAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CFAuditedTransferAttr>(S, D, Attr.getRange(),
-                                                      Attr.getName()))
-    return;
-
-  D->addAttr(::new (S.Context)
-             CFUnknownTransferAttr(Attr.getRange(), S.Context,
-             Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleObjCBridgeAttr(Sema &S, Scope *Sc, Decl *D,
-                                const AttributeList &Attr) {
-  IdentifierLoc * Parm = Attr.isArgIdent(0) ? Attr.getArgAsIdent(0) : nullptr;
+static void handleObjCBridgeAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  IdentifierLoc *Parm = AL.isArgIdent(0) ? AL.getArgAsIdent(0) : nullptr;
 
   if (!Parm) {
-    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << Attr.getName() << 0;
+    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << AL.getName() << 0;
     return;
   }
 
   // Typedefs only allow objc_bridge(id) and have some additional checking.
-  if (auto TD = dyn_cast<TypedefNameDecl>(D)) {
+  if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
     if (!Parm->Ident->isStr("id")) {
-      S.Diag(Attr.getLoc(), diag::err_objc_attr_typedef_not_id)
-        << Attr.getName();
+      S.Diag(AL.getLoc(), diag::err_objc_attr_typedef_not_id)
+        << AL.getName();
       return;
     }
 
     // Only allow 'cv void *'.
     QualType T = TD->getUnderlyingType();
     if (!T->isVoidPointerType()) {
-      S.Diag(Attr.getLoc(), diag::err_objc_attr_typedef_not_void_pointer);
+      S.Diag(AL.getLoc(), diag::err_objc_attr_typedef_not_void_pointer);
       return;
     }
   }
   
   D->addAttr(::new (S.Context)
-             ObjCBridgeAttr(Attr.getRange(), S.Context, Parm->Ident,
-                           Attr.getAttributeSpellingListIndex()));
+             ObjCBridgeAttr(AL.getRange(), S.Context, Parm->Ident,
+                           AL.getAttributeSpellingListIndex()));
 }
 
-static void handleObjCBridgeMutableAttr(Sema &S, Scope *Sc, Decl *D,
-                                        const AttributeList &Attr) {
-  IdentifierLoc * Parm = Attr.isArgIdent(0) ? Attr.getArgAsIdent(0) : nullptr;
+static void handleObjCBridgeMutableAttr(Sema &S, Decl *D,
+                                        const AttributeList &AL) {
+  IdentifierLoc *Parm = AL.isArgIdent(0) ? AL.getArgAsIdent(0) : nullptr;
 
   if (!Parm) {
-    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << Attr.getName() << 0;
+    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << AL.getName() << 0;
     return;
   }
   
   D->addAttr(::new (S.Context)
-             ObjCBridgeMutableAttr(Attr.getRange(), S.Context, Parm->Ident,
-                            Attr.getAttributeSpellingListIndex()));
+             ObjCBridgeMutableAttr(AL.getRange(), S.Context, Parm->Ident,
+                            AL.getAttributeSpellingListIndex()));
 }
 
-static void handleObjCBridgeRelatedAttr(Sema &S, Scope *Sc, Decl *D,
-                                 const AttributeList &Attr) {
+static void handleObjCBridgeRelatedAttr(Sema &S, Decl *D,
+                                        const AttributeList &AL) {
   IdentifierInfo *RelatedClass =
-    Attr.isArgIdent(0) ? Attr.getArgAsIdent(0)->Ident : nullptr;
+      AL.isArgIdent(0) ? AL.getArgAsIdent(0)->Ident : nullptr;
   if (!RelatedClass) {
-    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << Attr.getName() << 0;
+    S.Diag(D->getLocStart(), diag::err_objc_attr_not_id) << AL.getName() << 0;
     return;
   }
   IdentifierInfo *ClassMethod =
-    Attr.getArgAsIdent(1) ? Attr.getArgAsIdent(1)->Ident : nullptr;
+    AL.getArgAsIdent(1) ? AL.getArgAsIdent(1)->Ident : nullptr;
   IdentifierInfo *InstanceMethod =
-    Attr.getArgAsIdent(2) ? Attr.getArgAsIdent(2)->Ident : nullptr;
+    AL.getArgAsIdent(2) ? AL.getArgAsIdent(2)->Ident : nullptr;
   D->addAttr(::new (S.Context)
-             ObjCBridgeRelatedAttr(Attr.getRange(), S.Context, RelatedClass,
+             ObjCBridgeRelatedAttr(AL.getRange(), S.Context, RelatedClass,
                                    ClassMethod, InstanceMethod,
-                                   Attr.getAttributeSpellingListIndex()));
+                                   AL.getAttributeSpellingListIndex()));
 }
 
 static void handleObjCDesignatedInitializer(Sema &S, Decl *D,
-                                            const AttributeList &Attr) {
+                                            const AttributeList &AL) {
   ObjCInterfaceDecl *IFace;
-  if (ObjCCategoryDecl *CatDecl =
-          dyn_cast<ObjCCategoryDecl>(D->getDeclContext()))
+  if (auto *CatDecl = dyn_cast<ObjCCategoryDecl>(D->getDeclContext()))
     IFace = CatDecl->getClassInterface();
   else
     IFace = cast<ObjCInterfaceDecl>(D->getDeclContext());
@@ -5005,29 +4845,29 @@
 
   IFace->setHasDesignatedInitializers();
   D->addAttr(::new (S.Context)
-                  ObjCDesignatedInitializerAttr(Attr.getRange(), S.Context,
-                                         Attr.getAttributeSpellingListIndex()));
+                  ObjCDesignatedInitializerAttr(AL.getRange(), S.Context,
+                                         AL.getAttributeSpellingListIndex()));
 }
 
 static void handleObjCRuntimeName(Sema &S, Decl *D,
-                                  const AttributeList &Attr) {
+                                  const AttributeList &AL) {
   StringRef MetaDataName;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, MetaDataName))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, MetaDataName))
     return;
   D->addAttr(::new (S.Context)
-             ObjCRuntimeNameAttr(Attr.getRange(), S.Context,
+             ObjCRuntimeNameAttr(AL.getRange(), S.Context,
                                  MetaDataName,
-                                 Attr.getAttributeSpellingListIndex()));
+                                 AL.getAttributeSpellingListIndex()));
 }
 
 // When a user wants to use objc_boxable with a union or struct
 // but they don't have access to the declaration (legacy/third-party code)
 // then they can 'enable' this feature with a typedef:
 // typedef struct __attribute((objc_boxable)) legacy_struct legacy_struct;
-static void handleObjCBoxable(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleObjCBoxable(Sema &S, Decl *D, const AttributeList &AL) {
   bool notify = false;
 
-  RecordDecl *RD = dyn_cast<RecordDecl>(D);
+  auto *RD = dyn_cast<RecordDecl>(D);
   if (RD && RD->getDefinition()) {
     RD = RD->getDefinition();
     notify = true;
@@ -5035,8 +4875,8 @@
 
   if (RD) {
     ObjCBoxableAttr *BoxableAttr = ::new (S.Context)
-                          ObjCBoxableAttr(Attr.getRange(), S.Context,
-                                          Attr.getAttributeSpellingListIndex());
+                          ObjCBoxableAttr(AL.getRange(), S.Context,
+                                          AL.getAttributeSpellingListIndex());
     RD->addAttr(BoxableAttr);
     if (notify) {
       // we need to notify ASTReader/ASTWriter about
@@ -5048,35 +4888,35 @@
 }
 
 static void handleObjCOwnershipAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
+                                    const AttributeList &AL) {
   if (hasDeclarator(D)) return;
 
   S.Diag(D->getLocStart(), diag::err_attribute_wrong_decl_type)
-    << Attr.getRange() << Attr.getName() << ExpectedVariable;
+    << AL.getRange() << AL.getName() << ExpectedVariable;
 }
 
 static void handleObjCPreciseLifetimeAttr(Sema &S, Decl *D,
-                                          const AttributeList &Attr) {
-  ValueDecl *vd = cast<ValueDecl>(D);
-  QualType type = vd->getType();
+                                          const AttributeList &AL) {
+  const auto *VD = cast<ValueDecl>(D);
+  QualType QT = VD->getType();
 
-  if (!type->isDependentType() &&
-      !type->isObjCLifetimeType()) {
-    S.Diag(Attr.getLoc(), diag::err_objc_precise_lifetime_bad_type)
-      << type;
+  if (!QT->isDependentType() &&
+      !QT->isObjCLifetimeType()) {
+    S.Diag(AL.getLoc(), diag::err_objc_precise_lifetime_bad_type)
+      << QT;
     return;
   }
 
-  Qualifiers::ObjCLifetime lifetime = type.getObjCLifetime();
+  Qualifiers::ObjCLifetime Lifetime = QT.getObjCLifetime();
 
   // If we have no lifetime yet, check the lifetime we're presumably
   // going to infer.
-  if (lifetime == Qualifiers::OCL_None && !type->isDependentType())
-    lifetime = type->getObjCARCImplicitLifetime();
+  if (Lifetime == Qualifiers::OCL_None && !QT->isDependentType())
+    Lifetime = QT->getObjCARCImplicitLifetime();
 
-  switch (lifetime) {
+  switch (Lifetime) {
   case Qualifiers::OCL_None:
-    assert(type->isDependentType() &&
+    assert(QT->isDependentType() &&
            "didn't infer lifetime for non-dependent type?");
     break;
 
@@ -5086,14 +4926,14 @@
 
   case Qualifiers::OCL_ExplicitNone:
   case Qualifiers::OCL_Autoreleasing:
-    S.Diag(Attr.getLoc(), diag::warn_objc_precise_lifetime_meaningless)
-      << (lifetime == Qualifiers::OCL_Autoreleasing);
+    S.Diag(AL.getLoc(), diag::warn_objc_precise_lifetime_meaningless)
+        << (Lifetime == Qualifiers::OCL_Autoreleasing);
     break;
   }
 
   D->addAttr(::new (S.Context)
-             ObjCPreciseLifetimeAttr(Attr.getRange(), S.Context,
-                                     Attr.getAttributeSpellingListIndex()));
+             ObjCPreciseLifetimeAttr(AL.getRange(), S.Context,
+                                     AL.getAttributeSpellingListIndex()));
 }
 
 //===----------------------------------------------------------------------===//
@@ -5113,16 +4953,16 @@
   return ::new (Context) UuidAttr(Range, Context, Uuid, AttrSpellingListIndex);
 }
 
-static void handleUuidAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleUuidAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (!S.LangOpts.CPlusPlus) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_not_supported_in_lang)
-      << Attr.getName() << AttributeLangSupport::C;
+    S.Diag(AL.getLoc(), diag::err_attribute_not_supported_in_lang)
+      << AL.getName() << AttributeLangSupport::C;
     return;
   }
 
   StringRef StrRef;
   SourceLocation LiteralLoc;
-  if (!S.checkStringLiteralArgumentAttr(Attr, 0, StrRef, &LiteralLoc))
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, StrRef, &LiteralLoc))
     return;
 
   // GUID format is "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" or
@@ -5154,25 +4994,25 @@
   // separating attributes nor of the [ and the ] are in the AST.
   // Cf "SourceLocations of attribute list delimiters - [[ ... , ... ]] etc"
   // on cfe-dev.
-  if (Attr.isMicrosoftAttribute()) // Check for [uuid(...)] spelling.
-    S.Diag(Attr.getLoc(), diag::warn_atl_uuid_deprecated);
+  if (AL.isMicrosoftAttribute()) // Check for [uuid(...)] spelling.
+    S.Diag(AL.getLoc(), diag::warn_atl_uuid_deprecated);
 
-  UuidAttr *UA = S.mergeUuidAttr(D, Attr.getRange(),
-                                 Attr.getAttributeSpellingListIndex(), StrRef);
+  UuidAttr *UA = S.mergeUuidAttr(D, AL.getRange(),
+                                 AL.getAttributeSpellingListIndex(), StrRef);
   if (UA)
     D->addAttr(UA);
 }
 
-static void handleMSInheritanceAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleMSInheritanceAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (!S.LangOpts.CPlusPlus) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_not_supported_in_lang)
-      << Attr.getName() << AttributeLangSupport::C;
+    S.Diag(AL.getLoc(), diag::err_attribute_not_supported_in_lang)
+      << AL.getName() << AttributeLangSupport::C;
     return;
   }
   MSInheritanceAttr *IA = S.mergeMSInheritanceAttr(
-      D, Attr.getRange(), /*BestCase=*/true,
-      Attr.getAttributeSpellingListIndex(),
-      (MSInheritanceAttr::Spelling)Attr.getSemanticSpelling());
+      D, AL.getRange(), /*BestCase=*/true,
+      AL.getAttributeSpellingListIndex(),
+      (MSInheritanceAttr::Spelling)AL.getSemanticSpelling());
   if (IA) {
     D->addAttr(IA);
     S.Consumer.AssignInheritanceModel(cast<CXXRecordDecl>(D));
@@ -5180,45 +5020,45 @@
 }
 
 static void handleDeclspecThreadAttr(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
-  VarDecl *VD = cast<VarDecl>(D);
+                                     const AttributeList &AL) {
+  const auto *VD = cast<VarDecl>(D);
   if (!S.Context.getTargetInfo().isTLSSupported()) {
-    S.Diag(Attr.getLoc(), diag::err_thread_unsupported);
+    S.Diag(AL.getLoc(), diag::err_thread_unsupported);
     return;
   }
   if (VD->getTSCSpec() != TSCS_unspecified) {
-    S.Diag(Attr.getLoc(), diag::err_declspec_thread_on_thread_variable);
+    S.Diag(AL.getLoc(), diag::err_declspec_thread_on_thread_variable);
     return;
   }
   if (VD->hasLocalStorage()) {
-    S.Diag(Attr.getLoc(), diag::err_thread_non_global) << "__declspec(thread)";
+    S.Diag(AL.getLoc(), diag::err_thread_non_global) << "__declspec(thread)";
     return;
   }
-  VD->addAttr(::new (S.Context) ThreadAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) ThreadAttr(AL.getRange(), S.Context,
+                                          AL.getAttributeSpellingListIndex()));
 }
 
-static void handleAbiTagAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAbiTagAttr(Sema &S, Decl *D, const AttributeList &AL) {
   SmallVector<StringRef, 4> Tags;
-  for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
     StringRef Tag;
-    if (!S.checkStringLiteralArgumentAttr(Attr, I, Tag))
+    if (!S.checkStringLiteralArgumentAttr(AL, I, Tag))
       return;
     Tags.push_back(Tag);
   }
 
   if (const auto *NS = dyn_cast<NamespaceDecl>(D)) {
     if (!NS->isInline()) {
-      S.Diag(Attr.getLoc(), diag::warn_attr_abi_tag_namespace) << 0;
+      S.Diag(AL.getLoc(), diag::warn_attr_abi_tag_namespace) << 0;
       return;
     }
     if (NS->isAnonymousNamespace()) {
-      S.Diag(Attr.getLoc(), diag::warn_attr_abi_tag_namespace) << 1;
+      S.Diag(AL.getLoc(), diag::warn_attr_abi_tag_namespace) << 1;
       return;
     }
-    if (Attr.getNumArgs() == 0)
+    if (AL.getNumArgs() == 0)
       Tags.push_back(NS->getName());
-  } else if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+  } else if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
 
   // Store tags sorted and without duplicates.
@@ -5226,90 +5066,90 @@
   Tags.erase(std::unique(Tags.begin(), Tags.end()), Tags.end());
 
   D->addAttr(::new (S.Context)
-             AbiTagAttr(Attr.getRange(), S.Context, Tags.data(), Tags.size(),
-                        Attr.getAttributeSpellingListIndex()));
+             AbiTagAttr(AL.getRange(), S.Context, Tags.data(), Tags.size(),
+                        AL.getAttributeSpellingListIndex()));
 }
 
 static void handleARMInterruptAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
+                                   const AttributeList &AL) {
   // Check the attribute arguments.
-  if (Attr.getNumArgs() > 1) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_too_many_arguments)
-      << Attr.getName() << 1;
+  if (AL.getNumArgs() > 1) {
+    S.Diag(AL.getLoc(), diag::err_attribute_too_many_arguments)
+      << AL.getName() << 1;
     return;
   }
 
   StringRef Str;
   SourceLocation ArgLoc;
 
-  if (Attr.getNumArgs() == 0)
+  if (AL.getNumArgs() == 0)
     Str = "";
-  else if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &ArgLoc))
+  else if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc))
     return;
 
   ARMInterruptAttr::InterruptType Kind;
   if (!ARMInterruptAttr::ConvertStrToInterruptType(Str, Kind)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
-      << Attr.getName() << Str << ArgLoc;
+    S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
+      << AL.getName() << Str << ArgLoc;
     return;
   }
 
-  unsigned Index = Attr.getAttributeSpellingListIndex();
+  unsigned Index = AL.getAttributeSpellingListIndex();
   D->addAttr(::new (S.Context)
-             ARMInterruptAttr(Attr.getLoc(), S.Context, Kind, Index));
+             ARMInterruptAttr(AL.getLoc(), S.Context, Kind, Index));
 }
 
 static void handleMSP430InterruptAttr(Sema &S, Decl *D,
-                                      const AttributeList &Attr) {
-  if (!checkAttributeNumArgs(S, Attr, 1))
+                                      const AttributeList &AL) {
+  if (!checkAttributeNumArgs(S, AL, 1))
     return;
 
-  if (!Attr.isArgExpr(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName()
+  if (!AL.isArgExpr(0)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type) << AL.getName()
       << AANT_ArgumentIntegerConstant;
     return;    
   }
 
   // FIXME: Check for decl - it should be void ()(void).
 
-  Expr *NumParamsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  Expr *NumParamsExpr = static_cast<Expr *>(AL.getArgAsExpr(0));
   llvm::APSInt NumParams(32);
   if (!NumParamsExpr->isIntegerConstantExpr(NumParams, S.Context)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
-      << Attr.getName() << AANT_ArgumentIntegerConstant
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_type)
+      << AL.getName() << AANT_ArgumentIntegerConstant
       << NumParamsExpr->getSourceRange();
     return;
   }
 
   unsigned Num = NumParams.getLimitedValue(255);
   if ((Num & 1) || Num > 30) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds)
-      << Attr.getName() << (int)NumParams.getSExtValue()
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
+      << AL.getName() << (int)NumParams.getSExtValue()
       << NumParamsExpr->getSourceRange();
     return;
   }
 
   D->addAttr(::new (S.Context)
-              MSP430InterruptAttr(Attr.getLoc(), S.Context, Num,
-                                  Attr.getAttributeSpellingListIndex()));
+              MSP430InterruptAttr(AL.getLoc(), S.Context, Num,
+                                  AL.getAttributeSpellingListIndex()));
   D->addAttr(UsedAttr::CreateImplicit(S.Context));
 }
 
 static void handleMipsInterruptAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
+                                    const AttributeList &AL) {
   // Only one optional argument permitted.
-  if (Attr.getNumArgs() > 1) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_too_many_arguments)
-        << Attr.getName() << 1;
+  if (AL.getNumArgs() > 1) {
+    S.Diag(AL.getLoc(), diag::err_attribute_too_many_arguments)
+        << AL.getName() << 1;
     return;
   }
 
   StringRef Str;
   SourceLocation ArgLoc;
 
-  if (Attr.getNumArgs() == 0)
+  if (AL.getNumArgs() == 0)
     Str = "";
-  else if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &ArgLoc))
+  else if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc))
     return;
 
   // Semantic checks for a function with the 'interrupt' attribute for MIPS:
@@ -5339,23 +5179,23 @@
     return;
   }
 
-  if (checkAttrMutualExclusion<Mips16Attr>(S, D, Attr.getRange(),
-                                           Attr.getName()))
+  if (checkAttrMutualExclusion<Mips16Attr>(S, D, AL.getRange(),
+                                           AL.getName()))
     return;
 
   MipsInterruptAttr::InterruptType Kind;
   if (!MipsInterruptAttr::ConvertStrToInterruptType(Str, Kind)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
-        << Attr.getName() << "'" + std::string(Str) + "'";
+    S.Diag(AL.getLoc(), diag::warn_attribute_type_not_supported)
+        << AL.getName() << "'" + std::string(Str) + "'";
     return;
   }
 
   D->addAttr(::new (S.Context) MipsInterruptAttr(
-      Attr.getLoc(), S.Context, Kind, Attr.getAttributeSpellingListIndex()));
+      AL.getLoc(), S.Context, Kind, AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAnyX86InterruptAttr(Sema &S, Decl *D,
-                                      const AttributeList &Attr) {
+                                      const AttributeList &AL) {
   // Semantic checks for a function with the 'interrupt' attribute.
   // a) Must be a function.
   // b) Must have the 'void' return type.
@@ -5365,8 +5205,8 @@
   if (!isFunctionOrMethod(D) || !hasFunctionProto(D) || isInstanceMethod(D) ||
       CXXMethodDecl::isStaticOverloadedOperator(
           cast<NamedDecl>(D)->getDeclName().getCXXOverloadedOperator())) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-        << Attr.getName() << ExpectedFunctionWithProtoType;
+    S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+        << AL.getName() << ExpectedFunctionWithProtoType;
     return;
   }
   // Interrupt handler must have void return type.
@@ -5416,182 +5256,182 @@
     return;
   }
   D->addAttr(::new (S.Context) AnyX86InterruptAttr(
-      Attr.getLoc(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getLoc(), S.Context, AL.getAttributeSpellingListIndex()));
   D->addAttr(UsedAttr::CreateImplicit(S.Context));
 }
 
-static void handleAVRInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAVRInterruptAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (!isFunctionOrMethod(D)) {
     S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
         << "'interrupt'" << ExpectedFunction;
     return;
   }
 
-  if (!checkAttributeNumArgs(S, Attr, 0))
+  if (!checkAttributeNumArgs(S, AL, 0))
     return;
 
-  handleSimpleAttribute<AVRInterruptAttr>(S, D, Attr);
+  handleSimpleAttribute<AVRInterruptAttr>(S, D, AL);
 }
 
-static void handleAVRSignalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleAVRSignalAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (!isFunctionOrMethod(D)) {
     S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
         << "'signal'" << ExpectedFunction;
     return;
   }
 
-  if (!checkAttributeNumArgs(S, Attr, 0))
+  if (!checkAttributeNumArgs(S, AL, 0))
     return;
 
-  handleSimpleAttribute<AVRSignalAttr>(S, D, Attr);
+  handleSimpleAttribute<AVRSignalAttr>(S, D, AL);
 }
 
-static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // Dispatch the interrupt attribute based on the current target.
   switch (S.Context.getTargetInfo().getTriple().getArch()) {
   case llvm::Triple::msp430:
-    handleMSP430InterruptAttr(S, D, Attr);
+    handleMSP430InterruptAttr(S, D, AL);
     break;
   case llvm::Triple::mipsel:
   case llvm::Triple::mips:
-    handleMipsInterruptAttr(S, D, Attr);
+    handleMipsInterruptAttr(S, D, AL);
     break;
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
-    handleAnyX86InterruptAttr(S, D, Attr);
+    handleAnyX86InterruptAttr(S, D, AL);
     break;
   case llvm::Triple::avr:
-    handleAVRInterruptAttr(S, D, Attr);
+    handleAVRInterruptAttr(S, D, AL);
     break;
   default:
-    handleARMInterruptAttr(S, D, Attr);
+    handleARMInterruptAttr(S, D, AL);
     break;
   }
 }
 
 static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D,
-                                              const AttributeList &Attr) {
+                                              const AttributeList &AL) {
   uint32_t Min = 0;
-  Expr *MinExpr = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+  Expr *MinExpr = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, AL, MinExpr, Min))
     return;
 
   uint32_t Max = 0;
-  Expr *MaxExpr = Attr.getArgAsExpr(1);
-  if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+  Expr *MaxExpr = AL.getArgAsExpr(1);
+  if (!checkUInt32Argument(S, AL, MaxExpr, Max))
     return;
 
   if (Min == 0 && Max != 0) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
-      << Attr.getName() << 0;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid)
+      << AL.getName() << 0;
     return;
   }
   if (Min > Max) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
-      << Attr.getName() << 1;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid)
+      << AL.getName() << 1;
     return;
   }
 
   D->addAttr(::new (S.Context)
-             AMDGPUFlatWorkGroupSizeAttr(Attr.getLoc(), S.Context, Min, Max,
-                                         Attr.getAttributeSpellingListIndex()));
+             AMDGPUFlatWorkGroupSizeAttr(AL.getLoc(), S.Context, Min, Max,
+                                         AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
+                                       const AttributeList &AL) {
   uint32_t Min = 0;
-  Expr *MinExpr = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+  Expr *MinExpr = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, AL, MinExpr, Min))
     return;
 
   uint32_t Max = 0;
-  if (Attr.getNumArgs() == 2) {
-    Expr *MaxExpr = Attr.getArgAsExpr(1);
-    if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+  if (AL.getNumArgs() == 2) {
+    Expr *MaxExpr = AL.getArgAsExpr(1);
+    if (!checkUInt32Argument(S, AL, MaxExpr, Max))
       return;
   }
 
   if (Min == 0 && Max != 0) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
-      << Attr.getName() << 0;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid)
+      << AL.getName() << 0;
     return;
   }
   if (Max != 0 && Min > Max) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
-      << Attr.getName() << 1;
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid)
+      << AL.getName() << 1;
     return;
   }
 
   D->addAttr(::new (S.Context)
-             AMDGPUWavesPerEUAttr(Attr.getLoc(), S.Context, Min, Max,
-                                  Attr.getAttributeSpellingListIndex()));
+             AMDGPUWavesPerEUAttr(AL.getLoc(), S.Context, Min, Max,
+                                  AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
+                                    const AttributeList &AL) {
   uint32_t NumSGPR = 0;
-  Expr *NumSGPRExpr = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(S, Attr, NumSGPRExpr, NumSGPR))
+  Expr *NumSGPRExpr = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, AL, NumSGPRExpr, NumSGPR))
     return;
 
   D->addAttr(::new (S.Context)
-             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, NumSGPR,
-                               Attr.getAttributeSpellingListIndex()));
+             AMDGPUNumSGPRAttr(AL.getLoc(), S.Context, NumSGPR,
+                               AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
+                                    const AttributeList &AL) {
   uint32_t NumVGPR = 0;
-  Expr *NumVGPRExpr = Attr.getArgAsExpr(0);
-  if (!checkUInt32Argument(S, Attr, NumVGPRExpr, NumVGPR))
+  Expr *NumVGPRExpr = AL.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, AL, NumVGPRExpr, NumVGPR))
     return;
 
   D->addAttr(::new (S.Context)
-             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, NumVGPR,
-                               Attr.getAttributeSpellingListIndex()));
+             AMDGPUNumVGPRAttr(AL.getLoc(), S.Context, NumVGPR,
+                               AL.getAttributeSpellingListIndex()));
 }
 
 static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
-                                              const AttributeList& Attr) {
+                                              const AttributeList& AL) {
   // If we try to apply it to a function pointer, don't warn, but don't
   // do anything, either. It doesn't matter anyway, because there's nothing
   // special about calling a force_align_arg_pointer function.
-  ValueDecl *VD = dyn_cast<ValueDecl>(D);
+  const auto *VD = dyn_cast<ValueDecl>(D);
   if (VD && VD->getType()->isFunctionPointerType())
     return;
   // Also don't warn on function pointer typedefs.
-  TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D);
+  const auto *TD = dyn_cast<TypedefNameDecl>(D);
   if (TD && (TD->getUnderlyingType()->isFunctionPointerType() ||
     TD->getUnderlyingType()->isFunctionType()))
     return;
   // Attribute can only be applied to function types.
   if (!isa<FunctionDecl>(D)) {
-    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
-      << Attr.getName() << /* function */0;
+    S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type)
+      << AL.getName() << ExpectedFunction;
     return;
   }
 
   D->addAttr(::new (S.Context)
-              X86ForceAlignArgPointerAttr(Attr.getRange(), S.Context,
-                                        Attr.getAttributeSpellingListIndex()));
+              X86ForceAlignArgPointerAttr(AL.getRange(), S.Context,
+                                        AL.getAttributeSpellingListIndex()));
 }
 
-static void handleLayoutVersion(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleLayoutVersion(Sema &S, Decl *D, const AttributeList &AL) {
   uint32_t Version;
-  Expr *VersionExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), Version))
+  Expr *VersionExpr = static_cast<Expr *>(AL.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, AL, AL.getArgAsExpr(0), Version))
     return;
 
   // TODO: Investigate what happens with the next major version of MSVC.
   if (Version != LangOptions::MSVC2015) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds)
-        << Attr.getName() << Version << VersionExpr->getSourceRange();
+    S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
+        << AL.getName() << Version << VersionExpr->getSourceRange();
     return;
   }
 
   D->addAttr(::new (S.Context)
-                 LayoutVersionAttr(Attr.getRange(), S.Context, Version,
-                                   Attr.getAttributeSpellingListIndex()));
+                 LayoutVersionAttr(AL.getRange(), S.Context, Version,
+                                   AL.getAttributeSpellingListIndex()));
 }
 
 DLLImportAttr *Sema::mergeDLLImportAttr(Decl *D, SourceRange Range,
@@ -5628,7 +5468,7 @@
     return;
   }
 
-  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isInlined() && A.getKind() == AttributeList::AT_DLLImport &&
         !S.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
       // MinGW doesn't allow dllimport on inline functions.
@@ -5638,7 +5478,7 @@
     }
   }
 
-  if (auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
     if (S.Context.getTargetInfo().getCXXABI().isMicrosoft() &&
         MD->getParent()->isLambda()) {
       S.Diag(A.getRange().getBegin(), diag::err_attribute_dll_lambda) << A.getName();
@@ -5667,7 +5507,7 @@
     D->dropAttr<MSInheritanceAttr>();
   }
 
-  CXXRecordDecl *RD = cast<CXXRecordDecl>(D);
+  auto *RD = cast<CXXRecordDecl>(D);
   if (RD->hasDefinition()) {
     if (checkMSInheritanceAttrOnDefinition(RD, Range, BestCase,
                                            SemanticSpelling)) {
@@ -5690,7 +5530,7 @@
       MSInheritanceAttr(Range, Context, BestCase, AttrSpellingListIndex);
 }
 
-static void handleCapabilityAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleCapabilityAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // The capability attributes take a single string parameter for the name of
   // the capability they represent. The lockable attribute does not take any
   // parameters. However, semantically, both attributes represent the same
@@ -5701,8 +5541,8 @@
   // literal will be considered a "mutex."
   StringRef N("mutex");
   SourceLocation LiteralLoc;
-  if (Attr.getKind() == AttributeList::AT_Capability &&
-      !S.checkStringLiteralArgumentAttr(Attr, 0, N, &LiteralLoc))
+  if (AL.getKind() == AttributeList::AT_Capability &&
+      !S.checkStringLiteralArgumentAttr(AL, 0, N, &LiteralLoc))
     return;
 
   // Currently, there are only two names allowed for a capability: role and
@@ -5710,80 +5550,80 @@
   if (!N.equals_lower("mutex") && !N.equals_lower("role"))
     S.Diag(LiteralLoc, diag::warn_invalid_capability_name) << N;
 
-  D->addAttr(::new (S.Context) CapabilityAttr(Attr.getRange(), S.Context, N,
-                                        Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (S.Context) CapabilityAttr(AL.getRange(), S.Context, N,
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAssertCapabilityAttr(Sema &S, Decl *D,
-                                       const AttributeList &Attr) {
+                                       const AttributeList &AL) {
   SmallVector<Expr*, 1> Args;
-  if (!checkLockFunAttrCommon(S, D, Attr, Args))
+  if (!checkLockFunAttrCommon(S, D, AL, Args))
     return;
 
-  D->addAttr(::new (S.Context) AssertCapabilityAttr(Attr.getRange(), S.Context,
+  D->addAttr(::new (S.Context) AssertCapabilityAttr(AL.getRange(), S.Context,
                                                     Args.data(), Args.size(),
-                                        Attr.getAttributeSpellingListIndex()));
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 static void handleAcquireCapabilityAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
+                                        const AttributeList &AL) {
   SmallVector<Expr*, 1> Args;
-  if (!checkLockFunAttrCommon(S, D, Attr, Args))
+  if (!checkLockFunAttrCommon(S, D, AL, Args))
     return;
 
-  D->addAttr(::new (S.Context) AcquireCapabilityAttr(Attr.getRange(),
+  D->addAttr(::new (S.Context) AcquireCapabilityAttr(AL.getRange(),
                                                      S.Context,
                                                      Args.data(), Args.size(),
-                                        Attr.getAttributeSpellingListIndex()));
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 static void handleTryAcquireCapabilityAttr(Sema &S, Decl *D,
-                                           const AttributeList &Attr) {
+                                           const AttributeList &AL) {
   SmallVector<Expr*, 2> Args;
-  if (!checkTryLockFunAttrCommon(S, D, Attr, Args))
+  if (!checkTryLockFunAttrCommon(S, D, AL, Args))
     return;
 
-  D->addAttr(::new (S.Context) TryAcquireCapabilityAttr(Attr.getRange(),
+  D->addAttr(::new (S.Context) TryAcquireCapabilityAttr(AL.getRange(),
                                                         S.Context,
-                                                        Attr.getArgAsExpr(0),
+                                                        AL.getArgAsExpr(0),
                                                         Args.data(),
                                                         Args.size(),
-                                        Attr.getAttributeSpellingListIndex()));
+                                        AL.getAttributeSpellingListIndex()));
 }
 
 static void handleReleaseCapabilityAttr(Sema &S, Decl *D,
-                                        const AttributeList &Attr) {
+                                        const AttributeList &AL) {
   // Check that all arguments are lockable objects.
   SmallVector<Expr *, 1> Args;
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args, 0, true);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args, 0, true);
 
   D->addAttr(::new (S.Context) ReleaseCapabilityAttr(
-      Attr.getRange(), S.Context, Args.data(), Args.size(),
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, Args.data(), Args.size(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleRequiresCapabilityAttr(Sema &S, Decl *D,
-                                         const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+                                         const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
 
   // check that all arguments are lockable objects
   SmallVector<Expr*, 1> Args;
-  checkAttrArgsAreCapabilityObjs(S, D, Attr, Args);
+  checkAttrArgsAreCapabilityObjs(S, D, AL, Args);
   if (Args.empty())
     return;
 
   RequiresCapabilityAttr *RCA = ::new (S.Context)
-    RequiresCapabilityAttr(Attr.getRange(), S.Context, Args.data(),
-                           Args.size(), Attr.getAttributeSpellingListIndex());
+    RequiresCapabilityAttr(AL.getRange(), S.Context, Args.data(),
+                           Args.size(), AL.getAttributeSpellingListIndex());
 
   D->addAttr(RCA);
 }
 
-static void handleDeprecatedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (auto *NSD = dyn_cast<NamespaceDecl>(D)) {
+static void handleDeprecatedAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (const auto *NSD = dyn_cast<NamespaceDecl>(D)) {
     if (NSD->isAnonymousNamespace()) {
-      S.Diag(Attr.getLoc(), diag::warn_deprecated_anonymous_namespace);
+      S.Diag(AL.getLoc(), diag::warn_deprecated_anonymous_namespace);
       // Do not want to attach the attribute to the namespace because that will
       // cause confusing diagnostic reports for uses of declarations within the
       // namespace.
@@ -5793,25 +5633,25 @@
 
   // Handle the cases where the attribute has a text message.
   StringRef Str, Replacement;
-  if (Attr.isArgExpr(0) && Attr.getArgAsExpr(0) &&
-      !S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+  if (AL.isArgExpr(0) && AL.getArgAsExpr(0) &&
+      !S.checkStringLiteralArgumentAttr(AL, 0, Str))
     return;
 
   // Only support a single optional message for Declspec and CXX11.
-  if (Attr.isDeclspecAttribute() || Attr.isCXX11Attribute())
-    checkAttributeAtMostNumArgs(S, Attr, 1);
-  else if (Attr.isArgExpr(1) && Attr.getArgAsExpr(1) &&
-           !S.checkStringLiteralArgumentAttr(Attr, 1, Replacement))
+  if (AL.isDeclspecAttribute() || AL.isCXX11Attribute())
+    checkAttributeAtMostNumArgs(S, AL, 1);
+  else if (AL.isArgExpr(1) && AL.getArgAsExpr(1) &&
+           !S.checkStringLiteralArgumentAttr(AL, 1, Replacement))
     return;
 
   if (!S.getLangOpts().CPlusPlus14)
-    if (Attr.isCXX11Attribute() &&
-        !(Attr.hasScope() && Attr.getScopeName()->isStr("gnu")))
-      S.Diag(Attr.getLoc(), diag::ext_cxx14_attr) << Attr.getName();
+    if (AL.isCXX11Attribute() &&
+        !(AL.hasScope() && AL.getScopeName()->isStr("gnu")))
+      S.Diag(AL.getLoc(), diag::ext_cxx14_attr) << AL.getName();
 
   D->addAttr(::new (S.Context)
-                 DeprecatedAttr(Attr.getRange(), S.Context, Str, Replacement,
-                                Attr.getAttributeSpellingListIndex()));
+                 DeprecatedAttr(AL.getRange(), S.Context, Str, Replacement,
+                                AL.getAttributeSpellingListIndex()));
 }
 
 static bool isGlobalVar(const Decl *D) {
@@ -5820,35 +5660,35 @@
   return false;
 }
 
-static void handleNoSanitizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+static void handleNoSanitizeAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
     return;
 
   std::vector<StringRef> Sanitizers;
 
-  for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
     StringRef SanitizerName;
     SourceLocation LiteralLoc;
 
-    if (!S.checkStringLiteralArgumentAttr(Attr, I, SanitizerName, &LiteralLoc))
+    if (!S.checkStringLiteralArgumentAttr(AL, I, SanitizerName, &LiteralLoc))
       return;
 
     if (parseSanitizerValue(SanitizerName, /*AllowGroups=*/true) == 0)
       S.Diag(LiteralLoc, diag::warn_unknown_sanitizer_ignored) << SanitizerName;
     else if (isGlobalVar(D) && SanitizerName != "address")
       S.Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-          << Attr.getName() << ExpectedFunctionOrMethod;
+          << AL.getName() << ExpectedFunctionOrMethod;
     Sanitizers.push_back(SanitizerName);
   }
 
   D->addAttr(::new (S.Context) NoSanitizeAttr(
-      Attr.getRange(), S.Context, Sanitizers.data(), Sanitizers.size(),
-      Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, Sanitizers.data(), Sanitizers.size(),
+      AL.getAttributeSpellingListIndex()));
 }
 
 static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
-                                         const AttributeList &Attr) {
-  StringRef AttrName = Attr.getName()->getName();
+                                         const AttributeList &AL) {
+  StringRef AttrName = AL.getName()->getName();
   normalizeName(AttrName);
   StringRef SanitizerName = llvm::StringSwitch<StringRef>(AttrName)
                                 .Case("no_address_safety_analysis", "address")
@@ -5857,63 +5697,63 @@
                                 .Case("no_sanitize_memory", "memory");
   if (isGlobalVar(D) && SanitizerName != "address")
     S.Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << Attr.getName() << ExpectedFunction;
+        << AL.getName() << ExpectedFunction;
   D->addAttr(::new (S.Context)
-                 NoSanitizeAttr(Attr.getRange(), S.Context, &SanitizerName, 1,
-                                Attr.getAttributeSpellingListIndex()));
+                 NoSanitizeAttr(AL.getRange(), S.Context, &SanitizerName, 1,
+                                AL.getAttributeSpellingListIndex()));
 }
 
 static void handleInternalLinkageAttr(Sema &S, Decl *D,
-                                      const AttributeList &Attr) {
+                                      const AttributeList &AL) {
   if (InternalLinkageAttr *Internal =
-          S.mergeInternalLinkageAttr(D, Attr.getRange(), Attr.getName(),
-                                     Attr.getAttributeSpellingListIndex()))
+          S.mergeInternalLinkageAttr(D, AL.getRange(), AL.getName(),
+                                     AL.getAttributeSpellingListIndex()))
     D->addAttr(Internal);
 }
 
-static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const AttributeList &AL) {
   if (S.LangOpts.OpenCLVersion != 200)
-    S.Diag(Attr.getLoc(), diag::err_attribute_requires_opencl_version)
-        << Attr.getName() << "2.0" << 0;
+    S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version)
+        << AL.getName() << "2.0" << 0;
   else
-    S.Diag(Attr.getLoc(), diag::warn_opencl_attr_deprecated_ignored)
-        << Attr.getName() << "2.0";
+    S.Diag(AL.getLoc(), diag::warn_opencl_attr_deprecated_ignored)
+        << AL.getName() << "2.0";
 }
 
 /// Handles semantic checking for features that are common to all attributes,
 /// such as checking whether a parameter was properly specified, or the correct
 /// number of arguments were passed, etc.
-static bool handleCommonAttributeFeatures(Sema &S, Scope *scope, Decl *D,
-                                          const AttributeList &Attr) {
+static bool handleCommonAttributeFeatures(Sema &S, Decl *D,
+                                          const AttributeList &AL) {
   // Several attributes carry different semantics than the parsing requires, so
   // those are opted out of the common argument checks.
   //
   // We also bail on unknown and ignored attributes because those are handled
   // as part of the target-specific handling logic.
-  if (Attr.getKind() == AttributeList::UnknownAttribute)
+  if (AL.getKind() == AttributeList::UnknownAttribute)
     return false;
   // Check whether the attribute requires specific language extensions to be
   // enabled.
-  if (!Attr.diagnoseLangOpts(S))
+  if (!AL.diagnoseLangOpts(S))
     return true;
   // Check whether the attribute appertains to the given subject.
-  if (!Attr.diagnoseAppertainsTo(S, D))
+  if (!AL.diagnoseAppertainsTo(S, D))
     return true;
-  if (Attr.hasCustomParsing())
+  if (AL.hasCustomParsing())
     return false;
 
-  if (Attr.getMinArgs() == Attr.getMaxArgs()) {
+  if (AL.getMinArgs() == AL.getMaxArgs()) {
     // If there are no optional arguments, then checking for the argument count
     // is trivial.
-    if (!checkAttributeNumArgs(S, Attr, Attr.getMinArgs()))
+    if (!checkAttributeNumArgs(S, AL, AL.getMinArgs()))
       return true;
   } else {
     // There are optional arguments, so checking is slightly more involved.
-    if (Attr.getMinArgs() &&
-        !checkAttributeAtLeastNumArgs(S, Attr, Attr.getMinArgs()))
+    if (AL.getMinArgs() &&
+        !checkAttributeAtLeastNumArgs(S, AL, AL.getMinArgs()))
       return true;
-    else if (!Attr.hasVariadicArg() && Attr.getMaxArgs() &&
-             !checkAttributeAtMostNumArgs(S, Attr, Attr.getMaxArgs()))
+    else if (!AL.hasVariadicArg() && AL.getMaxArgs() &&
+             !checkAttributeAtMostNumArgs(S, AL, AL.getMaxArgs()))
       return true;
   }
 
@@ -5921,13 +5761,13 @@
 }
 
 static void handleOpenCLAccessAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
+                                   const AttributeList &AL) {
   if (D->isInvalidDecl())
     return;
 
   // Check if there is only one access qualifier.
   if (D->hasAttr<OpenCLAccessAttr>()) {
-    S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers)
+    S.Diag(AL.getLoc(), diag::err_opencl_multiple_access_qualifiers)
         << D->getSourceRange();
     D->setInvalidDecl(true);
     return;
@@ -5938,12 +5778,12 @@
   // OpenCL v2.0 s6.13.6 - A kernel cannot read from and write to the same pipe
   // object. Using the read_write (or __read_write) qualifier with the pipe
   // qualifier is a compilation error.
-  if (const ParmVarDecl *PDecl = dyn_cast<ParmVarDecl>(D)) {
+  if (const auto *PDecl = dyn_cast<ParmVarDecl>(D)) {
     const Type *DeclTy = PDecl->getType().getCanonicalType().getTypePtr();
-    if (Attr.getName()->getName().find("read_write") != StringRef::npos) {
+    if (AL.getName()->getName().find("read_write") != StringRef::npos) {
       if (S.getLangOpts().OpenCLVersion < 200 || DeclTy->isPipeType()) {
-        S.Diag(Attr.getLoc(), diag::err_opencl_invalid_read_write)
-            << Attr.getName() << PDecl->getType() << DeclTy->isImageType();
+        S.Diag(AL.getLoc(), diag::err_opencl_invalid_read_write)
+            << AL.getName() << PDecl->getType() << DeclTy->isImageType();
         D->setInvalidDecl(true);
         return;
       }
@@ -5951,7 +5791,7 @@
   }
 
   D->addAttr(::new (S.Context) OpenCLAccessAttr(
-      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+      AL.getRange(), S.Context, AL.getAttributeSpellingListIndex()));
 }
 
 //===----------------------------------------------------------------------===//
@@ -5962,441 +5802,448 @@
 /// the attribute applies to decls.  If the attribute is a type attribute, just
 /// silently ignore it if a GNU attribute.
 static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
-                                 const AttributeList &Attr,
+                                 const AttributeList &AL,
                                  bool IncludeCXX11Attributes) {
-  if (Attr.isInvalid() || Attr.getKind() == AttributeList::IgnoredAttribute)
+  if (AL.isInvalid() || AL.getKind() == AttributeList::IgnoredAttribute)
     return;
 
   // Ignore C++11 attributes on declarator chunks: they appertain to the type
   // instead.
-  if (Attr.isCXX11Attribute() && !IncludeCXX11Attributes)
+  if (AL.isCXX11Attribute() && !IncludeCXX11Attributes)
     return;
 
   // Unknown attributes are automatically warned on. Target-specific attributes
   // which do not apply to the current target architecture are treated as
   // though they were unknown attributes.
-  if (Attr.getKind() == AttributeList::UnknownAttribute ||
-      !Attr.existsInTarget(S.Context.getTargetInfo())) {
-    S.Diag(Attr.getLoc(), Attr.isDeclspecAttribute()
+  if (AL.getKind() == AttributeList::UnknownAttribute ||
+      !AL.existsInTarget(S.Context.getTargetInfo())) {
+    S.Diag(AL.getLoc(), AL.isDeclspecAttribute()
                               ? diag::warn_unhandled_ms_attribute_ignored
                               : diag::warn_unknown_attribute_ignored)
-        << Attr.getName();
+        << AL.getName();
     return;
   }
 
-  if (handleCommonAttributeFeatures(S, scope, D, Attr))
+  if (handleCommonAttributeFeatures(S, D, AL))
     return;
 
-  switch (Attr.getKind()) {
+  switch (AL.getKind()) {
   default:
-    if (!Attr.isStmtAttr()) {
+    if (!AL.isStmtAttr()) {
       // Type attributes are handled elsewhere; silently move on.
-      assert(Attr.isTypeAttr() && "Non-type attribute not handled");
+      assert(AL.isTypeAttr() && "Non-type attribute not handled");
       break;
     }
-    S.Diag(Attr.getLoc(), diag::err_stmt_attribute_invalid_on_decl)
-        << Attr.getName() << D->getLocation();
+    S.Diag(AL.getLoc(), diag::err_stmt_attribute_invalid_on_decl)
+        << AL.getName() << D->getLocation();
     break;
   case AttributeList::AT_Interrupt:
-    handleInterruptAttr(S, D, Attr);
+    handleInterruptAttr(S, D, AL);
     break;
   case AttributeList::AT_X86ForceAlignArgPointer:
-    handleX86ForceAlignArgPointerAttr(S, D, Attr);
+    handleX86ForceAlignArgPointerAttr(S, D, AL);
     break;
   case AttributeList::AT_DLLExport:
   case AttributeList::AT_DLLImport:
-    handleDLLAttr(S, D, Attr);
+    handleDLLAttr(S, D, AL);
     break;
   case AttributeList::AT_Mips16:
     handleSimpleAttributeWithExclusions<Mips16Attr, MicroMipsAttr,
-                                        MipsInterruptAttr>(S, D, Attr);
+                                        MipsInterruptAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoMips16:
-    handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
+    handleSimpleAttribute<NoMips16Attr>(S, D, AL);
     break;
   case AttributeList::AT_MicroMips:
-    handleSimpleAttributeWithExclusions<MicroMipsAttr, Mips16Attr>(S, D, Attr);
+    handleSimpleAttributeWithExclusions<MicroMipsAttr, Mips16Attr>(S, D, AL);
     break;
   case AttributeList::AT_NoMicroMips:
-    handleSimpleAttribute<NoMicroMipsAttr>(S, D, Attr);
+    handleSimpleAttribute<NoMicroMipsAttr>(S, D, AL);
     break;
   case AttributeList::AT_MipsLongCall:
     handleSimpleAttributeWithExclusions<MipsLongCallAttr, MipsShortCallAttr>(
-        S, D, Attr);
+        S, D, AL);
     break;
   case AttributeList::AT_MipsShortCall:
     handleSimpleAttributeWithExclusions<MipsShortCallAttr, MipsLongCallAttr>(
-        S, D, Attr);
+        S, D, AL);
     break;
   case AttributeList::AT_AMDGPUFlatWorkGroupSize:
-    handleAMDGPUFlatWorkGroupSizeAttr(S, D, Attr);
+    handleAMDGPUFlatWorkGroupSizeAttr(S, D, AL);
     break;
   case AttributeList::AT_AMDGPUWavesPerEU:
-    handleAMDGPUWavesPerEUAttr(S, D, Attr);
+    handleAMDGPUWavesPerEUAttr(S, D, AL);
     break;
   case AttributeList::AT_AMDGPUNumSGPR:
-    handleAMDGPUNumSGPRAttr(S, D, Attr);
+    handleAMDGPUNumSGPRAttr(S, D, AL);
     break;
   case AttributeList::AT_AMDGPUNumVGPR:
-    handleAMDGPUNumVGPRAttr(S, D, Attr);
+    handleAMDGPUNumVGPRAttr(S, D, AL);
     break;
   case AttributeList::AT_AVRSignal:
-    handleAVRSignalAttr(S, D, Attr);
+    handleAVRSignalAttr(S, D, AL);
     break;
   case AttributeList::AT_IBAction:
-    handleSimpleAttribute<IBActionAttr>(S, D, Attr);
+    handleSimpleAttribute<IBActionAttr>(S, D, AL);
     break;
   case AttributeList::AT_IBOutlet:
-    handleIBOutlet(S, D, Attr);
+    handleIBOutlet(S, D, AL);
     break;
   case AttributeList::AT_IBOutletCollection:
-    handleIBOutletCollection(S, D, Attr);
+    handleIBOutletCollection(S, D, AL);
     break;
   case AttributeList::AT_IFunc:
-    handleIFuncAttr(S, D, Attr);
+    handleIFuncAttr(S, D, AL);
     break;
   case AttributeList::AT_Alias:
-    handleAliasAttr(S, D, Attr);
+    handleAliasAttr(S, D, AL);
     break;
   case AttributeList::AT_Aligned:
-    handleAlignedAttr(S, D, Attr);
+    handleAlignedAttr(S, D, AL);
     break;
   case AttributeList::AT_AlignValue:
-    handleAlignValueAttr(S, D, Attr);
+    handleAlignValueAttr(S, D, AL);
     break;
   case AttributeList::AT_AllocSize:
-    handleAllocSizeAttr(S, D, Attr);
+    handleAllocSizeAttr(S, D, AL);
     break;
   case AttributeList::AT_AlwaysInline:
-    handleAlwaysInlineAttr(S, D, Attr);
+    handleAlwaysInlineAttr(S, D, AL);
+    break;
+  case AttributeList::AT_Artificial:
+    handleSimpleAttribute<ArtificialAttr>(S, D, AL);
     break;
   case AttributeList::AT_AnalyzerNoReturn:
-    handleAnalyzerNoReturnAttr(S, D, Attr);
+    handleAnalyzerNoReturnAttr(S, D, AL);
     break;
   case AttributeList::AT_TLSModel:
-    handleTLSModelAttr(S, D, Attr);
+    handleTLSModelAttr(S, D, AL);
     break;
   case AttributeList::AT_Annotate:
-    handleAnnotateAttr(S, D, Attr);
+    handleAnnotateAttr(S, D, AL);
     break;
   case AttributeList::AT_Availability:
-    handleAvailabilityAttr(S, D, Attr);
+    handleAvailabilityAttr(S, D, AL);
     break;
   case AttributeList::AT_CarriesDependency:
-    handleDependencyAttr(S, scope, D, Attr);
+    handleDependencyAttr(S, scope, D, AL);
     break;
   case AttributeList::AT_Common:
-    handleCommonAttr(S, D, Attr);
+    handleCommonAttr(S, D, AL);
     break;
   case AttributeList::AT_CUDAConstant:
-    handleConstantAttr(S, D, Attr);
+    handleConstantAttr(S, D, AL);
     break;
   case AttributeList::AT_PassObjectSize:
-    handlePassObjectSizeAttr(S, D, Attr);
+    handlePassObjectSizeAttr(S, D, AL);
     break;
   case AttributeList::AT_Constructor:
-    handleConstructorAttr(S, D, Attr);
+    handleConstructorAttr(S, D, AL);
     break;
   case AttributeList::AT_CXX11NoReturn:
-    handleSimpleAttribute<CXX11NoReturnAttr>(S, D, Attr);
+    handleSimpleAttribute<CXX11NoReturnAttr>(S, D, AL);
     break;
   case AttributeList::AT_Deprecated:
-    handleDeprecatedAttr(S, D, Attr);
+    handleDeprecatedAttr(S, D, AL);
     break;
   case AttributeList::AT_Destructor:
-    handleDestructorAttr(S, D, Attr);
+    handleDestructorAttr(S, D, AL);
     break;
   case AttributeList::AT_EnableIf:
-    handleEnableIfAttr(S, D, Attr);
+    handleEnableIfAttr(S, D, AL);
     break;
   case AttributeList::AT_DiagnoseIf:
-    handleDiagnoseIfAttr(S, D, Attr);
+    handleDiagnoseIfAttr(S, D, AL);
     break;
   case AttributeList::AT_ExtVectorType:
-    handleExtVectorTypeAttr(S, scope, D, Attr);
+    handleExtVectorTypeAttr(S, D, AL);
     break;
   case AttributeList::AT_ExternalSourceSymbol:
-    handleExternalSourceSymbolAttr(S, D, Attr);
+    handleExternalSourceSymbolAttr(S, D, AL);
     break;
   case AttributeList::AT_MinSize:
-    handleMinSizeAttr(S, D, Attr);
+    handleMinSizeAttr(S, D, AL);
     break;
   case AttributeList::AT_OptimizeNone:
-    handleOptimizeNoneAttr(S, D, Attr);
+    handleOptimizeNoneAttr(S, D, AL);
     break;
   case AttributeList::AT_FlagEnum:
-    handleSimpleAttribute<FlagEnumAttr>(S, D, Attr);
+    handleSimpleAttribute<FlagEnumAttr>(S, D, AL);
     break;
   case AttributeList::AT_EnumExtensibility:
-    handleEnumExtensibilityAttr(S, D, Attr);
+    handleEnumExtensibilityAttr(S, D, AL);
     break;
   case AttributeList::AT_Flatten:
-    handleSimpleAttribute<FlattenAttr>(S, D, Attr);
+    handleSimpleAttribute<FlattenAttr>(S, D, AL);
     break;
   case AttributeList::AT_Format:
-    handleFormatAttr(S, D, Attr);
+    handleFormatAttr(S, D, AL);
     break;
   case AttributeList::AT_FormatArg:
-    handleFormatArgAttr(S, D, Attr);
+    handleFormatArgAttr(S, D, AL);
     break;
   case AttributeList::AT_CUDAGlobal:
-    handleGlobalAttr(S, D, Attr);
+    handleGlobalAttr(S, D, AL);
     break;
   case AttributeList::AT_CUDADevice:
     handleSimpleAttributeWithExclusions<CUDADeviceAttr, CUDAGlobalAttr>(S, D,
-                                                                        Attr);
+                                                                        AL);
     break;
   case AttributeList::AT_CUDAHost:
     handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D,
-                                                                      Attr);
+                                                                      AL);
     break;
   case AttributeList::AT_GNUInline:
-    handleGNUInlineAttr(S, D, Attr);
+    handleGNUInlineAttr(S, D, AL);
     break;
   case AttributeList::AT_CUDALaunchBounds:
-    handleLaunchBoundsAttr(S, D, Attr);
+    handleLaunchBoundsAttr(S, D, AL);
     break;
   case AttributeList::AT_Restrict:
-    handleRestrictAttr(S, D, Attr);
+    handleRestrictAttr(S, D, AL);
     break;
   case AttributeList::AT_MayAlias:
-    handleSimpleAttribute<MayAliasAttr>(S, D, Attr);
+    handleSimpleAttribute<MayAliasAttr>(S, D, AL);
     break;
   case AttributeList::AT_Mode:
-    handleModeAttr(S, D, Attr);
+    handleModeAttr(S, D, AL);
     break;
   case AttributeList::AT_NoAlias:
-    handleSimpleAttribute<NoAliasAttr>(S, D, Attr);
+    handleSimpleAttribute<NoAliasAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoCommon:
-    handleSimpleAttribute<NoCommonAttr>(S, D, Attr);
+    handleSimpleAttribute<NoCommonAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoSplitStack:
-    handleSimpleAttribute<NoSplitStackAttr>(S, D, Attr);
+    handleSimpleAttribute<NoSplitStackAttr>(S, D, AL);
     break;
   case AttributeList::AT_NonNull:
-    if (ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(D))
-      handleNonNullAttrParameter(S, PVD, Attr);
+    if (auto *PVD = dyn_cast<ParmVarDecl>(D))
+      handleNonNullAttrParameter(S, PVD, AL);
     else
-      handleNonNullAttr(S, D, Attr);
+      handleNonNullAttr(S, D, AL);
     break;
   case AttributeList::AT_ReturnsNonNull:
-    handleReturnsNonNullAttr(S, D, Attr);
+    handleReturnsNonNullAttr(S, D, AL);
     break;
   case AttributeList::AT_NoEscape:
-    handleNoEscapeAttr(S, D, Attr);
+    handleNoEscapeAttr(S, D, AL);
     break;
   case AttributeList::AT_AssumeAligned:
-    handleAssumeAlignedAttr(S, D, Attr);
+    handleAssumeAlignedAttr(S, D, AL);
     break;
   case AttributeList::AT_AllocAlign:
-    handleAllocAlignAttr(S, D, Attr);
+    handleAllocAlignAttr(S, D, AL);
     break;
   case AttributeList::AT_Overloadable:
-    handleSimpleAttribute<OverloadableAttr>(S, D, Attr);
+    handleSimpleAttribute<OverloadableAttr>(S, D, AL);
     break;
   case AttributeList::AT_Ownership:
-    handleOwnershipAttr(S, D, Attr);
+    handleOwnershipAttr(S, D, AL);
     break;
   case AttributeList::AT_Cold:
-    handleColdAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<ColdAttr, HotAttr>(S, D, AL);
     break;
   case AttributeList::AT_Hot:
-    handleHotAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<HotAttr, ColdAttr>(S, D, AL);
     break;
   case AttributeList::AT_Naked:
-    handleNakedAttr(S, D, Attr);
+    handleNakedAttr(S, D, AL);
     break;
   case AttributeList::AT_NoReturn:
-    handleNoReturnAttr(S, D, Attr);
+    handleNoReturnAttr(S, D, AL);
     break;
   case AttributeList::AT_NoThrow:
-    handleSimpleAttribute<NoThrowAttr>(S, D, Attr);
+    handleSimpleAttribute<NoThrowAttr>(S, D, AL);
     break;
   case AttributeList::AT_CUDAShared:
-    handleSharedAttr(S, D, Attr);
+    handleSharedAttr(S, D, AL);
     break;
   case AttributeList::AT_VecReturn:
-    handleVecReturnAttr(S, D, Attr);
+    handleVecReturnAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCOwnership:
-    handleObjCOwnershipAttr(S, D, Attr);
+    handleObjCOwnershipAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCPreciseLifetime:
-    handleObjCPreciseLifetimeAttr(S, D, Attr);
+    handleObjCPreciseLifetimeAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCReturnsInnerPointer:
-    handleObjCReturnsInnerPointerAttr(S, D, Attr);
+    handleObjCReturnsInnerPointerAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCRequiresSuper:
-    handleObjCRequiresSuperAttr(S, D, Attr);
+    handleObjCRequiresSuperAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCBridge:
-    handleObjCBridgeAttr(S, scope, D, Attr);
+    handleObjCBridgeAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCBridgeMutable:
-    handleObjCBridgeMutableAttr(S, scope, D, Attr);
+    handleObjCBridgeMutableAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCBridgeRelated:
-    handleObjCBridgeRelatedAttr(S, scope, D, Attr);
+    handleObjCBridgeRelatedAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCDesignatedInitializer:
-    handleObjCDesignatedInitializer(S, D, Attr);
+    handleObjCDesignatedInitializer(S, D, AL);
     break;
   case AttributeList::AT_ObjCRuntimeName:
-    handleObjCRuntimeName(S, D, Attr);
+    handleObjCRuntimeName(S, D, AL);
     break;
    case AttributeList::AT_ObjCRuntimeVisible:
-    handleSimpleAttribute<ObjCRuntimeVisibleAttr>(S, D, Attr);
+    handleSimpleAttribute<ObjCRuntimeVisibleAttr>(S, D, AL);
     break;
   case AttributeList::AT_ObjCBoxable:
-    handleObjCBoxable(S, D, Attr);
+    handleObjCBoxable(S, D, AL);
     break;
   case AttributeList::AT_CFAuditedTransfer:
-    handleCFAuditedTransferAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CFAuditedTransferAttr,
+                                        CFUnknownTransferAttr>(S, D, AL);
     break;
   case AttributeList::AT_CFUnknownTransfer:
-    handleCFUnknownTransferAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CFUnknownTransferAttr,
+                                        CFAuditedTransferAttr>(S, D, AL);
     break;
   case AttributeList::AT_CFConsumed:
   case AttributeList::AT_NSConsumed:
-    handleNSConsumedAttr(S, D, Attr);
+    handleNSConsumedAttr(S, D, AL);
     break;
   case AttributeList::AT_NSConsumesSelf:
-    handleSimpleAttribute<NSConsumesSelfAttr>(S, D, Attr);
+    handleSimpleAttribute<NSConsumesSelfAttr>(S, D, AL);
     break;
   case AttributeList::AT_NSReturnsAutoreleased:
   case AttributeList::AT_NSReturnsNotRetained:
   case AttributeList::AT_CFReturnsNotRetained:
   case AttributeList::AT_NSReturnsRetained:
   case AttributeList::AT_CFReturnsRetained:
-    handleNSReturnsRetainedAttr(S, D, Attr);
+    handleNSReturnsRetainedAttr(S, D, AL);
     break;
   case AttributeList::AT_WorkGroupSizeHint:
-    handleWorkGroupSize<WorkGroupSizeHintAttr>(S, D, Attr);
+    handleWorkGroupSize<WorkGroupSizeHintAttr>(S, D, AL);
     break;
   case AttributeList::AT_ReqdWorkGroupSize:
-    handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, Attr);
+    handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, AL);
     break;
   case AttributeList::AT_OpenCLIntelReqdSubGroupSize:
-    handleSubGroupSize(S, D, Attr);
+    handleSubGroupSize(S, D, AL);
     break;
   case AttributeList::AT_VecTypeHint:
-    handleVecTypeHint(S, D, Attr);
+    handleVecTypeHint(S, D, AL);
     break;
   case AttributeList::AT_RequireConstantInit:
-    handleSimpleAttribute<RequireConstantInitAttr>(S, D, Attr);
+    handleSimpleAttribute<RequireConstantInitAttr>(S, D, AL);
     break;
   case AttributeList::AT_InitPriority:
-    handleInitPriorityAttr(S, D, Attr);
+    handleInitPriorityAttr(S, D, AL);
     break;
   case AttributeList::AT_Packed:
-    handlePackedAttr(S, D, Attr);
+    handlePackedAttr(S, D, AL);
     break;
   case AttributeList::AT_Section:
-    handleSectionAttr(S, D, Attr);
+    handleSectionAttr(S, D, AL);
     break;
   case AttributeList::AT_Target:
-    handleTargetAttr(S, D, Attr);
+    handleTargetAttr(S, D, AL);
     break;
   case AttributeList::AT_Unavailable:
-    handleAttrWithMessage<UnavailableAttr>(S, D, Attr);
+    handleAttrWithMessage<UnavailableAttr>(S, D, AL);
     break;
   case AttributeList::AT_ArcWeakrefUnavailable:
-    handleSimpleAttribute<ArcWeakrefUnavailableAttr>(S, D, Attr);
+    handleSimpleAttribute<ArcWeakrefUnavailableAttr>(S, D, AL);
     break;
   case AttributeList::AT_ObjCRootClass:
-    handleSimpleAttribute<ObjCRootClassAttr>(S, D, Attr);
+    handleSimpleAttribute<ObjCRootClassAttr>(S, D, AL);
     break;
   case AttributeList::AT_ObjCSubclassingRestricted:
-    handleSimpleAttribute<ObjCSubclassingRestrictedAttr>(S, D, Attr);
+    handleSimpleAttribute<ObjCSubclassingRestrictedAttr>(S, D, AL);
     break;
   case AttributeList::AT_ObjCExplicitProtocolImpl:
-    handleObjCSuppresProtocolAttr(S, D, Attr);
+    handleObjCSuppresProtocolAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCRequiresPropertyDefs:
-    handleSimpleAttribute<ObjCRequiresPropertyDefsAttr>(S, D, Attr);
+    handleSimpleAttribute<ObjCRequiresPropertyDefsAttr>(S, D, AL);
     break;
   case AttributeList::AT_Unused:
-    handleUnusedAttr(S, D, Attr);
+    handleUnusedAttr(S, D, AL);
     break;
   case AttributeList::AT_ReturnsTwice:
-    handleSimpleAttribute<ReturnsTwiceAttr>(S, D, Attr);
+    handleSimpleAttribute<ReturnsTwiceAttr>(S, D, AL);
     break;
   case AttributeList::AT_NotTailCalled:
-    handleNotTailCalledAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<NotTailCalledAttr,
+                                        AlwaysInlineAttr>(S, D, AL);
     break;
   case AttributeList::AT_DisableTailCalls:
-    handleDisableTailCallsAttr(S, D, Attr);
+    handleSimpleAttributeWithExclusions<DisableTailCallsAttr,
+                                        NakedAttr>(S, D, AL);
     break;
   case AttributeList::AT_Used:
-    handleUsedAttr(S, D, Attr);
+    handleSimpleAttribute<UsedAttr>(S, D, AL);
     break;
   case AttributeList::AT_Visibility:
-    handleVisibilityAttr(S, D, Attr, false);
+    handleVisibilityAttr(S, D, AL, false);
     break;
   case AttributeList::AT_TypeVisibility:
-    handleVisibilityAttr(S, D, Attr, true);
+    handleVisibilityAttr(S, D, AL, true);
     break;
   case AttributeList::AT_WarnUnused:
-    handleSimpleAttribute<WarnUnusedAttr>(S, D, Attr);
+    handleSimpleAttribute<WarnUnusedAttr>(S, D, AL);
     break;
   case AttributeList::AT_WarnUnusedResult:
-    handleWarnUnusedResult(S, D, Attr);
+    handleWarnUnusedResult(S, D, AL);
     break;
   case AttributeList::AT_Weak:
-    handleSimpleAttribute<WeakAttr>(S, D, Attr);
+    handleSimpleAttribute<WeakAttr>(S, D, AL);
     break;
   case AttributeList::AT_WeakRef:
-    handleWeakRefAttr(S, D, Attr);
+    handleWeakRefAttr(S, D, AL);
     break;
   case AttributeList::AT_WeakImport:
-    handleWeakImportAttr(S, D, Attr);
+    handleWeakImportAttr(S, D, AL);
     break;
   case AttributeList::AT_TransparentUnion:
-    handleTransparentUnionAttr(S, D, Attr);
+    handleTransparentUnionAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCException:
-    handleSimpleAttribute<ObjCExceptionAttr>(S, D, Attr);
+    handleSimpleAttribute<ObjCExceptionAttr>(S, D, AL);
     break;
   case AttributeList::AT_ObjCMethodFamily:
-    handleObjCMethodFamilyAttr(S, D, Attr);
+    handleObjCMethodFamilyAttr(S, D, AL);
     break;
   case AttributeList::AT_ObjCNSObject:
-    handleObjCNSObject(S, D, Attr);
+    handleObjCNSObject(S, D, AL);
     break;
   case AttributeList::AT_ObjCIndependentClass:
-    handleObjCIndependentClass(S, D, Attr);
+    handleObjCIndependentClass(S, D, AL);
     break;
   case AttributeList::AT_Blocks:
-    handleBlocksAttr(S, D, Attr);
+    handleBlocksAttr(S, D, AL);
     break;
   case AttributeList::AT_Sentinel:
-    handleSentinelAttr(S, D, Attr);
+    handleSentinelAttr(S, D, AL);
     break;
   case AttributeList::AT_Const:
-    handleSimpleAttribute<ConstAttr>(S, D, Attr);
+    handleSimpleAttribute<ConstAttr>(S, D, AL);
     break;
   case AttributeList::AT_Pure:
-    handleSimpleAttribute<PureAttr>(S, D, Attr);
+    handleSimpleAttribute<PureAttr>(S, D, AL);
     break;
   case AttributeList::AT_Cleanup:
-    handleCleanupAttr(S, D, Attr);
+    handleCleanupAttr(S, D, AL);
     break;
   case AttributeList::AT_NoDebug:
-    handleNoDebugAttr(S, D, Attr);
+    handleNoDebugAttr(S, D, AL);
     break;
   case AttributeList::AT_NoDuplicate:
-    handleSimpleAttribute<NoDuplicateAttr>(S, D, Attr);
+    handleSimpleAttribute<NoDuplicateAttr>(S, D, AL);
     break;
   case AttributeList::AT_Convergent:
-    handleSimpleAttribute<ConvergentAttr>(S, D, Attr);
+    handleSimpleAttribute<ConvergentAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoInline:
-    handleSimpleAttribute<NoInlineAttr>(S, D, Attr);
+    handleSimpleAttribute<NoInlineAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoInstrumentFunction: // Interacts with -pg.
-    handleSimpleAttribute<NoInstrumentFunctionAttr>(S, D, Attr);
+    handleSimpleAttribute<NoInstrumentFunctionAttr>(S, D, AL);
     break;
   case AttributeList::AT_StdCall:
   case AttributeList::AT_CDecl:
@@ -6412,183 +6259,186 @@
   case AttributeList::AT_IntelOclBicc:
   case AttributeList::AT_PreserveMost:
   case AttributeList::AT_PreserveAll:
-    handleCallConvAttr(S, D, Attr);
+    handleCallConvAttr(S, D, AL);
     break;
   case AttributeList::AT_Suppress:
-    handleSuppressAttr(S, D, Attr);
+    handleSuppressAttr(S, D, AL);
     break;
   case AttributeList::AT_OpenCLKernel:
-    handleSimpleAttribute<OpenCLKernelAttr>(S, D, Attr);
+    handleSimpleAttribute<OpenCLKernelAttr>(S, D, AL);
     break;
   case AttributeList::AT_OpenCLAccess:
-    handleOpenCLAccessAttr(S, D, Attr);
+    handleOpenCLAccessAttr(S, D, AL);
     break;
   case AttributeList::AT_OpenCLNoSVM:
-    handleOpenCLNoSVMAttr(S, D, Attr);
+    handleOpenCLNoSVMAttr(S, D, AL);
     break;
   case AttributeList::AT_SwiftContext:
-    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftContext);
+    handleParameterABIAttr(S, D, AL, ParameterABI::SwiftContext);
     break;
   case AttributeList::AT_SwiftErrorResult:
-    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftErrorResult);
+    handleParameterABIAttr(S, D, AL, ParameterABI::SwiftErrorResult);
     break;
   case AttributeList::AT_SwiftIndirectResult:
-    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftIndirectResult);
+    handleParameterABIAttr(S, D, AL, ParameterABI::SwiftIndirectResult);
     break;
   case AttributeList::AT_InternalLinkage:
-    handleInternalLinkageAttr(S, D, Attr);
+    handleInternalLinkageAttr(S, D, AL);
     break;
   case AttributeList::AT_LTOVisibilityPublic:
-    handleSimpleAttribute<LTOVisibilityPublicAttr>(S, D, Attr);
+    handleSimpleAttribute<LTOVisibilityPublicAttr>(S, D, AL);
     break;
 
   // Microsoft attributes:
   case AttributeList::AT_EmptyBases:
-    handleSimpleAttribute<EmptyBasesAttr>(S, D, Attr);
+    handleSimpleAttribute<EmptyBasesAttr>(S, D, AL);
     break;
   case AttributeList::AT_LayoutVersion:
-    handleLayoutVersion(S, D, Attr);
+    handleLayoutVersion(S, D, AL);
+    break;
+  case AttributeList::AT_TrivialABI:
+    handleSimpleAttribute<TrivialABIAttr>(S, D, AL);
     break;
   case AttributeList::AT_MSNoVTable:
-    handleSimpleAttribute<MSNoVTableAttr>(S, D, Attr);
+    handleSimpleAttribute<MSNoVTableAttr>(S, D, AL);
     break;
   case AttributeList::AT_MSStruct:
-    handleSimpleAttribute<MSStructAttr>(S, D, Attr);
+    handleSimpleAttribute<MSStructAttr>(S, D, AL);
     break;
   case AttributeList::AT_Uuid:
-    handleUuidAttr(S, D, Attr);
+    handleUuidAttr(S, D, AL);
     break;
   case AttributeList::AT_MSInheritance:
-    handleMSInheritanceAttr(S, D, Attr);
+    handleMSInheritanceAttr(S, D, AL);
     break;
   case AttributeList::AT_SelectAny:
-    handleSimpleAttribute<SelectAnyAttr>(S, D, Attr);
+    handleSimpleAttribute<SelectAnyAttr>(S, D, AL);
     break;
   case AttributeList::AT_Thread:
-    handleDeclspecThreadAttr(S, D, Attr);
+    handleDeclspecThreadAttr(S, D, AL);
     break;
 
   case AttributeList::AT_AbiTag:
-    handleAbiTagAttr(S, D, Attr);
+    handleAbiTagAttr(S, D, AL);
     break;
 
   // Thread safety attributes:
   case AttributeList::AT_AssertExclusiveLock:
-    handleAssertExclusiveLockAttr(S, D, Attr);
+    handleAssertExclusiveLockAttr(S, D, AL);
     break;
   case AttributeList::AT_AssertSharedLock:
-    handleAssertSharedLockAttr(S, D, Attr);
+    handleAssertSharedLockAttr(S, D, AL);
     break;
   case AttributeList::AT_GuardedVar:
-    handleSimpleAttribute<GuardedVarAttr>(S, D, Attr);
+    handleSimpleAttribute<GuardedVarAttr>(S, D, AL);
     break;
   case AttributeList::AT_PtGuardedVar:
-    handlePtGuardedVarAttr(S, D, Attr);
+    handlePtGuardedVarAttr(S, D, AL);
     break;
   case AttributeList::AT_ScopedLockable:
-    handleSimpleAttribute<ScopedLockableAttr>(S, D, Attr);
+    handleSimpleAttribute<ScopedLockableAttr>(S, D, AL);
     break;
   case AttributeList::AT_NoSanitize:
-    handleNoSanitizeAttr(S, D, Attr);
+    handleNoSanitizeAttr(S, D, AL);
     break;
   case AttributeList::AT_NoSanitizeSpecific:
-    handleNoSanitizeSpecificAttr(S, D, Attr);
+    handleNoSanitizeSpecificAttr(S, D, AL);
     break;
   case AttributeList::AT_NoThreadSafetyAnalysis:
-    handleSimpleAttribute<NoThreadSafetyAnalysisAttr>(S, D, Attr);
+    handleSimpleAttribute<NoThreadSafetyAnalysisAttr>(S, D, AL);
     break;
   case AttributeList::AT_GuardedBy:
-    handleGuardedByAttr(S, D, Attr);
+    handleGuardedByAttr(S, D, AL);
     break;
   case AttributeList::AT_PtGuardedBy:
-    handlePtGuardedByAttr(S, D, Attr);
+    handlePtGuardedByAttr(S, D, AL);
     break;
   case AttributeList::AT_ExclusiveTrylockFunction:
-    handleExclusiveTrylockFunctionAttr(S, D, Attr);
+    handleExclusiveTrylockFunctionAttr(S, D, AL);
     break;
   case AttributeList::AT_LockReturned:
-    handleLockReturnedAttr(S, D, Attr);
+    handleLockReturnedAttr(S, D, AL);
     break;
   case AttributeList::AT_LocksExcluded:
-    handleLocksExcludedAttr(S, D, Attr);
+    handleLocksExcludedAttr(S, D, AL);
     break;
   case AttributeList::AT_SharedTrylockFunction:
-    handleSharedTrylockFunctionAttr(S, D, Attr);
+    handleSharedTrylockFunctionAttr(S, D, AL);
     break;
   case AttributeList::AT_AcquiredBefore:
-    handleAcquiredBeforeAttr(S, D, Attr);
+    handleAcquiredBeforeAttr(S, D, AL);
     break;
   case AttributeList::AT_AcquiredAfter:
-    handleAcquiredAfterAttr(S, D, Attr);
+    handleAcquiredAfterAttr(S, D, AL);
     break;
 
   // Capability analysis attributes.
   case AttributeList::AT_Capability:
   case AttributeList::AT_Lockable:
-    handleCapabilityAttr(S, D, Attr);
+    handleCapabilityAttr(S, D, AL);
     break;
   case AttributeList::AT_RequiresCapability:
-    handleRequiresCapabilityAttr(S, D, Attr);
+    handleRequiresCapabilityAttr(S, D, AL);
     break;
 
   case AttributeList::AT_AssertCapability:
-    handleAssertCapabilityAttr(S, D, Attr);
+    handleAssertCapabilityAttr(S, D, AL);
     break;
   case AttributeList::AT_AcquireCapability:
-    handleAcquireCapabilityAttr(S, D, Attr);
+    handleAcquireCapabilityAttr(S, D, AL);
     break;
   case AttributeList::AT_ReleaseCapability:
-    handleReleaseCapabilityAttr(S, D, Attr);
+    handleReleaseCapabilityAttr(S, D, AL);
     break;
   case AttributeList::AT_TryAcquireCapability:
-    handleTryAcquireCapabilityAttr(S, D, Attr);
+    handleTryAcquireCapabilityAttr(S, D, AL);
     break;
 
   // Consumed analysis attributes.
   case AttributeList::AT_Consumable:
-    handleConsumableAttr(S, D, Attr);
+    handleConsumableAttr(S, D, AL);
     break;
   case AttributeList::AT_ConsumableAutoCast:
-    handleSimpleAttribute<ConsumableAutoCastAttr>(S, D, Attr);
+    handleSimpleAttribute<ConsumableAutoCastAttr>(S, D, AL);
     break;
   case AttributeList::AT_ConsumableSetOnRead:
-    handleSimpleAttribute<ConsumableSetOnReadAttr>(S, D, Attr);
+    handleSimpleAttribute<ConsumableSetOnReadAttr>(S, D, AL);
     break;
   case AttributeList::AT_CallableWhen:
-    handleCallableWhenAttr(S, D, Attr);
+    handleCallableWhenAttr(S, D, AL);
     break;
   case AttributeList::AT_ParamTypestate:
-    handleParamTypestateAttr(S, D, Attr);
+    handleParamTypestateAttr(S, D, AL);
     break;
   case AttributeList::AT_ReturnTypestate:
-    handleReturnTypestateAttr(S, D, Attr);
+    handleReturnTypestateAttr(S, D, AL);
     break;
   case AttributeList::AT_SetTypestate:
-    handleSetTypestateAttr(S, D, Attr);
+    handleSetTypestateAttr(S, D, AL);
     break;
   case AttributeList::AT_TestTypestate:
-    handleTestTypestateAttr(S, D, Attr);
+    handleTestTypestateAttr(S, D, AL);
     break;
 
   // Type safety attributes.
   case AttributeList::AT_ArgumentWithTypeTag:
-    handleArgumentWithTypeTagAttr(S, D, Attr);
+    handleArgumentWithTypeTagAttr(S, D, AL);
     break;
   case AttributeList::AT_TypeTagForDatatype:
-    handleTypeTagForDatatypeAttr(S, D, Attr);
+    handleTypeTagForDatatypeAttr(S, D, AL);
     break;
   case AttributeList::AT_AnyX86NoCallerSavedRegisters:
-    handleNoCallerSavedRegsAttr(S, D, Attr);
+    handleNoCallerSavedRegsAttr(S, D, AL);
     break;
   case AttributeList::AT_RenderScriptKernel:
-    handleSimpleAttribute<RenderScriptKernelAttr>(S, D, Attr);
+    handleSimpleAttribute<RenderScriptKernelAttr>(S, D, AL);
     break;
   // XRay attributes.
   case AttributeList::AT_XRayInstrument:
-    handleSimpleAttribute<XRayInstrumentAttr>(S, D, Attr);
+    handleSimpleAttribute<XRayInstrumentAttr>(S, D, AL);
     break;
   case AttributeList::AT_XRayLogArgs:
-    handleXRayLogArgsAttr(S, D, Attr);
+    handleXRayLogArgsAttr(S, D, AL);
     break;
   }
 }
@@ -6618,34 +6468,34 @@
   // attribute must never appear as a group" for attributes like cold and hot.
   if (!D->hasAttr<OpenCLKernelAttr>()) {
     // These attributes cannot be applied to a non-kernel function.
-    if (Attr *A = D->getAttr<ReqdWorkGroupSizeAttr>()) {
+    if (const auto *A = D->getAttr<ReqdWorkGroupSizeAttr>()) {
       // FIXME: This emits a different error message than
       // diag::err_attribute_wrong_decl_type + ExpectedKernelFunction.
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<WorkGroupSizeHintAttr>()) {
+    } else if (const auto *A = D->getAttr<WorkGroupSizeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<VecTypeHintAttr>()) {
+    } else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+    } else if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
+    } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
+    } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+    } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
+    } else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
     }
@@ -6654,9 +6504,9 @@
 
 // Helper for delayed processing TransparentUnion attribute.
 void Sema::ProcessDeclAttributeDelayed(Decl *D, const AttributeList *AttrList) {
-  for (const AttributeList *Attr = AttrList; Attr; Attr = Attr->getNext())
-    if (Attr->getKind() == AttributeList::AT_TransparentUnion) {
-      handleTransparentUnionAttr(*this, D, *Attr);
+  for (const AttributeList *AL = AttrList; AL; AL = AL->getNext())
+    if (AL->getKind() == AttributeList::AT_TransparentUnion) {
+      handleTransparentUnionAttr(*this, D, *AL);
       break;
     }
 }
@@ -6711,7 +6561,7 @@
                                       SourceLocation Loc) {
   assert(isa<FunctionDecl>(ND) || isa<VarDecl>(ND));
   NamedDecl *NewD = nullptr;
-  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
+  if (auto *FD = dyn_cast<FunctionDecl>(ND)) {
     FunctionDecl *NewFD;
     // FIXME: Missing call to CheckFunctionDeclaration().
     // FIXME: Mangling?
@@ -6731,7 +6581,7 @@
     // Fake up parameter variables; they are declared as if this were
     // a typedef.
     QualType FDTy = FD->getType();
-    if (const FunctionProtoType *FT = FDTy->getAs<FunctionProtoType>()) {
+    if (const auto *FT = FDTy->getAs<FunctionProtoType>()) {
       SmallVector<ParmVarDecl*, 16> Params;
       for (const auto &AI : FT->param_types()) {
         ParmVarDecl *Param = BuildParmVarDeclForTypedef(NewFD, Loc, AI);
@@ -6740,15 +6590,13 @@
       }
       NewFD->setParams(Params);
     }
-  } else if (VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+  } else if (auto *VD = dyn_cast<VarDecl>(ND)) {
     NewD = VarDecl::Create(VD->getASTContext(), VD->getDeclContext(),
                            VD->getInnerLocStart(), VD->getLocation(), II,
                            VD->getType(), VD->getTypeSourceInfo(),
                            VD->getStorageClass());
-    if (VD->getQualifier()) {
-      VarDecl *NewVD = cast<VarDecl>(NewD);
-      NewVD->setQualifierInfo(VD->getQualifierLoc());
-    }
+    if (VD->getQualifier())
+	  cast<VarDecl>(NewD)->setQualifierInfo(VD->getQualifierLoc());
   }
   return NewD;
 }
@@ -6784,10 +6632,10 @@
   LoadExternalWeakUndeclaredIdentifiers();
   if (!WeakUndeclaredIdentifiers.empty()) {
     NamedDecl *ND = nullptr;
-    if (VarDecl *VD = dyn_cast<VarDecl>(D))
+    if (auto *VD = dyn_cast<VarDecl>(D))
       if (VD->isExternC())
         ND = VD;
-    if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+    if (auto *FD = dyn_cast<FunctionDecl>(D))
       if (FD->isExternC())
         ND = FD;
     if (ND) {
@@ -6830,14 +6678,14 @@
 /// Is the given declaration allowed to use a forbidden type?
 /// If so, it'll still be annotated with an attribute that makes it
 /// illegal to actually use.
-static bool isForbiddenTypeAllowed(Sema &S, Decl *decl,
+static bool isForbiddenTypeAllowed(Sema &S, Decl *D,
                                    const DelayedDiagnostic &diag,
                                    UnavailableAttr::ImplicitReason &reason) {
   // Private ivars are always okay.  Unfortunately, people don't
   // always properly make their ivars private, even in system headers.
   // Plus we need to make fields okay, too.
-  if (!isa<FieldDecl>(decl) && !isa<ObjCPropertyDecl>(decl) &&
-      !isa<FunctionDecl>(decl))
+  if (!isa<FieldDecl>(D) && !isa<ObjCPropertyDecl>(D) &&
+      !isa<FunctionDecl>(D))
     return false;
 
   // Silently accept unsupported uses of __weak in both user and system
@@ -6845,7 +6693,7 @@
   // -fno-objc-arc files.  We do have to take some care against attempts
   // to define such things;  for now, we've only done that for ivars
   // and properties.
-  if ((isa<ObjCIvarDecl>(decl) || isa<ObjCPropertyDecl>(decl))) {
+  if ((isa<ObjCIvarDecl>(D) || isa<ObjCPropertyDecl>(D))) {
     if (diag.getForbiddenTypeDiagnostic() == diag::err_arc_weak_disabled ||
         diag.getForbiddenTypeDiagnostic() == diag::err_arc_weak_no_runtime) {
       reason = UnavailableAttr::IR_ForbiddenWeak;
@@ -6854,7 +6702,7 @@
   }
 
   // Allow all sorts of things in system headers.
-  if (S.Context.getSourceManager().isInSystemHeader(decl->getLocation())) {
+  if (S.Context.getSourceManager().isInSystemHeader(D->getLocation())) {
     // Currently, all the failures dealt with this way are due to ARC
     // restrictions.
     reason = UnavailableAttr::IR_ARCForbiddenType;
@@ -6865,30 +6713,29 @@
 }
 
 /// Handle a delayed forbidden-type diagnostic.
-static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &diag,
-                                       Decl *decl) {
-  auto reason = UnavailableAttr::IR_None;
-  if (decl && isForbiddenTypeAllowed(S, decl, diag, reason)) {
-    assert(reason && "didn't set reason?");
-    decl->addAttr(UnavailableAttr::CreateImplicit(S.Context, "", reason,
-                                                  diag.Loc));
+static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &DD,
+                                       Decl *D) {
+  auto Reason = UnavailableAttr::IR_None;
+  if (D && isForbiddenTypeAllowed(S, D, DD, Reason)) {
+    assert(Reason && "didn't set reason?");
+    D->addAttr(UnavailableAttr::CreateImplicit(S.Context, "", Reason, DD.Loc));
     return;
   }
   if (S.getLangOpts().ObjCAutoRefCount)
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(decl)) {
+    if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
       // FIXME: we may want to suppress diagnostics for all
-      // kind of forbidden type messages on unavailable functions. 
+      // kind of forbidden type messages on unavailable functions.
       if (FD->hasAttr<UnavailableAttr>() &&
-          diag.getForbiddenTypeDiagnostic() == 
-          diag::err_arc_array_param_no_ownership) {
-        diag.Triggered = true;
+          DD.getForbiddenTypeDiagnostic() ==
+              diag::err_arc_array_param_no_ownership) {
+        DD.Triggered = true;
         return;
       }
     }
 
-  S.Diag(diag.Loc, diag.getForbiddenTypeDiagnostic())
-    << diag.getForbiddenTypeOperand() << diag.getForbiddenTypeArgument();
-  diag.Triggered = true;
+  S.Diag(DD.Loc, DD.getForbiddenTypeDiagnostic())
+      << DD.getForbiddenTypeOperand() << DD.getForbiddenTypeArgument();
+  DD.Triggered = true;
 }
 
 static const AvailabilityAttr *getAttrForPlatform(ASTContext &Context,
@@ -6931,9 +6778,9 @@
 
   // For typedefs, if the typedef declaration appears available look
   // to the underlying type to see if it is more restrictive.
-  while (const TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D)) {
+  while (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
     if (Result == AR_Available) {
-      if (const TagType *TT = TD->getUnderlyingType()->getAs<TagType>()) {
+      if (const auto *TT = TD->getUnderlyingType()->getAs<TagType>()) {
         D = TT->getDecl();
         Result = D->getAvailability(Message);
         continue;
@@ -6943,7 +6790,7 @@
   }
 
   // Forward class declarations get their attributes from their definition.
-  if (const ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(D)) {
+  if (const auto *IDecl = dyn_cast<ObjCInterfaceDecl>(D)) {
     if (IDecl->getDefinition()) {
       D = IDecl->getDefinition();
       Result = D->getAvailability(Message);
@@ -6991,18 +6838,18 @@
   // interface when they really shouldn't: they are members of the enclosing
   // context, and can referenced from there.
   if (S.OriginalLexicalContext && cast<Decl>(S.OriginalLexicalContext) != Ctx) {
-    auto *OrigCtx = cast<Decl>(S.OriginalLexicalContext);
+    const auto *OrigCtx = cast<Decl>(S.OriginalLexicalContext);
     if (CheckContext(OrigCtx))
       return false;
 
     // An implementation implicitly has the availability of the interface.
-    if (auto *CatOrImpl = dyn_cast<ObjCImplDecl>(OrigCtx)) {
+    if (const auto *CatOrImpl = dyn_cast<ObjCImplDecl>(OrigCtx)) {
       if (const ObjCInterfaceDecl *Interface = CatOrImpl->getClassInterface())
         if (CheckContext(Interface))
           return false;
     }
     // A category implicitly has the availability of the interface.
-    else if (auto *CatD = dyn_cast<ObjCCategoryDecl>(OrigCtx))
+    else if (const auto *CatD = dyn_cast<ObjCCategoryDecl>(OrigCtx))
       if (const ObjCInterfaceDecl *Interface = CatD->getClassInterface())
         if (CheckContext(Interface))
           return false;
@@ -7013,13 +6860,13 @@
       return false;
 
     // An implementation implicitly has the availability of the interface.
-    if (auto *CatOrImpl = dyn_cast<ObjCImplDecl>(Ctx)) {
+    if (const auto *CatOrImpl = dyn_cast<ObjCImplDecl>(Ctx)) {
       if (const ObjCInterfaceDecl *Interface = CatOrImpl->getClassInterface())
         if (CheckContext(Interface))
           return false;
     }
     // A category implicitly has the availability of the interface.
-    else if (auto *CatD = dyn_cast<ObjCCategoryDecl>(Ctx))
+    else if (const auto *CatD = dyn_cast<ObjCCategoryDecl>(Ctx))
       if (const ObjCInterfaceDecl *Interface = CatD->getClassInterface())
         if (CheckContext(Interface))
           return false;
@@ -7190,7 +7037,7 @@
         << OffendingDecl << /* partial */ 3;
 
     if (const auto *Enclosing = findEnclosingDeclToAnnotate(Ctx)) {
-      if (auto *TD = dyn_cast<TagDecl>(Enclosing))
+      if (const auto *TD = dyn_cast<TagDecl>(Enclosing))
         if (TD->getDeclName().isEmpty()) {
           S.Diag(TD->getLocation(),
                  diag::note_decl_unguarded_availability_silence)
@@ -7231,8 +7078,8 @@
     diag_fwdclass_message = diag::warn_deprecated_fwdclass_message;
     property_note_select = /* deprecated */ 0;
     available_here_select_kind = /* deprecated */ 2;
-    if (const auto *Attr = OffendingDecl->getAttr<DeprecatedAttr>())
-      NoteLocation = Attr->getLocation();
+    if (const auto *AL = OffendingDecl->getAttr<DeprecatedAttr>())
+      NoteLocation = AL->getLocation();
     break;
 
   case AR_Unavailable:
@@ -7243,8 +7090,8 @@
     property_note_select = /* unavailable */ 1;
     available_here_select_kind = /* unavailable */ 0;
 
-    if (auto Attr = OffendingDecl->getAttr<UnavailableAttr>()) {
-      if (Attr->isImplicit() && Attr->getImplicitReason()) {
+    if (auto AL = OffendingDecl->getAttr<UnavailableAttr>()) {
+      if (AL->isImplicit() && AL->getImplicitReason()) {
         // Most of these failures are due to extra restrictions in ARC;
         // reflect that in the primary diagnostic when applicable.
         auto flagARCError = [&] {
@@ -7254,7 +7101,7 @@
             diag = diag::err_unavailable_in_arc;
         };
 
-        switch (Attr->getImplicitReason()) {
+        switch (AL->getImplicitReason()) {
         case UnavailableAttr::IR_None: break;
 
         case UnavailableAttr::IR_ARCForbiddenType:
@@ -7295,10 +7142,10 @@
   CharSourceRange UseRange;
   StringRef Replacement;
   if (K == AR_Deprecated) {
-    if (auto Attr = OffendingDecl->getAttr<DeprecatedAttr>())
-      Replacement = Attr->getReplacement();
-    if (auto Attr = getAttrForPlatform(S.Context, OffendingDecl))
-      Replacement = Attr->getReplacement();
+    if (auto AL = OffendingDecl->getAttr<DeprecatedAttr>())
+      Replacement = AL->getReplacement();
+    if (auto AL = getAttrForPlatform(S.Context, OffendingDecl))
+      Replacement = AL->getReplacement();
 
     if (!Replacement.empty())
       UseRange =
@@ -7694,11 +7541,11 @@
   if (Range.isInvalid())
     return true;
 
-  if (const TagType *TT = dyn_cast<TagType>(TyPtr)) {
+  if (const auto *TT = dyn_cast<TagType>(TyPtr)) {
     TagDecl *TD = TT->getDecl();
     DiagnoseDeclAvailability(TD, Range);
 
-  } else if (const TypedefType *TD = dyn_cast<TypedefType>(TyPtr)) {
+  } else if (const auto *TD = dyn_cast<TypedefType>(TyPtr)) {
     TypedefNameDecl *D = TD->getDecl();
     DiagnoseDeclAvailability(D, Range);
 
@@ -7782,7 +7629,7 @@
   }
 
   const ObjCPropertyDecl *ObjCPDecl = nullptr;
-  if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
     if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) {
       AvailabilityResult PDeclResult = PD->getAvailability(nullptr);
       if (PDeclResult == Result)
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index 750e062..51ba96f 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -692,8 +692,9 @@
   assert(D.isDecompositionDeclarator());
   const DecompositionDeclarator &Decomp = D.getDecompositionDeclarator();
 
-  // The syntax only allows a decomposition declarator as a simple-declaration
-  // or a for-range-declaration, but we parse it in more cases than that.
+  // The syntax only allows a decomposition declarator as a simple-declaration,
+  // a for-range-declaration, or a condition in Clang, but we parse it in more
+  // cases than that.
   if (!D.mayHaveDecompositionDeclarator()) {
     Diag(Decomp.getLSquareLoc(), diag::err_decomp_decl_context)
       << Decomp.getSourceRange();
@@ -708,9 +709,12 @@
     return nullptr;
   }
 
-  Diag(Decomp.getLSquareLoc(), getLangOpts().CPlusPlus1z
-                                   ? diag::warn_cxx14_compat_decomp_decl
-                                   : diag::ext_decomp_decl)
+  Diag(Decomp.getLSquareLoc(),
+       !getLangOpts().CPlusPlus17
+           ? diag::ext_decomp_decl
+           : D.getContext() == DeclaratorContext::ConditionContext
+                 ? diag::ext_decomp_decl_cond
+                 : diag::warn_cxx14_compat_decomp_decl)
       << Decomp.getSourceRange();
 
   // The semantic context is always just the current context.
@@ -2413,9 +2417,16 @@
   // Attach the remaining base class specifiers to the derived class.
   Class->setBases(Bases.data(), NumGoodBases);
 
+  // Check that the only base classes that are duplicate are virtual.
   for (unsigned idx = 0; idx < NumGoodBases; ++idx) {
     // Check whether this direct base is inaccessible due to ambiguity.
     QualType BaseType = Bases[idx]->getType();
+
+    // Skip all dependent types in templates being used as base specifiers.
+    // Checks below assume that the base specifier is a CXXRecord.
+    if (BaseType->isDependentType())
+      continue;
+
     CanQualType CanonicalBase = Context.getCanonicalType(BaseType)
       .getUnqualifiedType();
 
@@ -2503,13 +2514,8 @@
   return DerivedRD->isDerivedFrom(BaseRD, Paths);
 }
 
-void Sema::BuildBasePathArray(const CXXBasePaths &Paths,
-                              CXXCastPath &BasePathArray) {
-  assert(BasePathArray.empty() && "Base path array must be empty!");
-  assert(Paths.isRecordingPaths() && "Must record paths!");
-
-  const CXXBasePath &Path = Paths.front();
-
+static void BuildBasePathArray(const CXXBasePath &Path,
+                               CXXCastPath &BasePathArray) {
   // We first go backward and check if we have a virtual base.
   // FIXME: It would be better if CXXBasePath had the base specifier for
   // the nearest virtual base.
@@ -2526,6 +2532,13 @@
     BasePathArray.push_back(const_cast<CXXBaseSpecifier*>(Path[I].Base));
 }
 
+
+void Sema::BuildBasePathArray(const CXXBasePaths &Paths,
+                              CXXCastPath &BasePathArray) {
+  assert(BasePathArray.empty() && "Base path array must be empty!");
+  assert(Paths.isRecordingPaths() && "Must record paths!");
+  return ::BuildBasePathArray(Paths.front(), BasePathArray);
+}
 /// CheckDerivedToBaseConversion - Check whether the Derived-to-Base
 /// conversion (where Derived and Base are class types) is
 /// well-formed, meaning that the conversion is unambiguous (and
@@ -2553,27 +2566,45 @@
   CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                      /*DetectVirtual=*/false);
   bool DerivationOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
-  assert(DerivationOkay &&
-         "Can only be used with a derived-to-base conversion");
-  (void)DerivationOkay;
+  if (!DerivationOkay)
+    return true;
 
-  if (!Paths.isAmbiguous(Context.getCanonicalType(Base).getUnqualifiedType())) {
+  const CXXBasePath *Path = nullptr;
+  if (!Paths.isAmbiguous(Context.getCanonicalType(Base).getUnqualifiedType()))
+    Path = &Paths.front();
+
+  // For MSVC compatibility, check if Derived directly inherits from Base. Clang
+  // warns about this hierarchy under -Winaccessible-base, but MSVC allows the
+  // user to access such bases.
+  if (!Path && getLangOpts().MSVCCompat) {
+    for (const CXXBasePath &PossiblePath : Paths) {
+      if (PossiblePath.size() == 1) {
+        Path = &PossiblePath;
+        if (AmbigiousBaseConvID)
+          Diag(Loc, diag::ext_ms_ambiguous_direct_base)
+              << Base << Derived << Range;
+        break;
+      }
+    }
+  }
+
+  if (Path) {
     if (!IgnoreAccess) {
       // Check that the base class can be accessed.
-      switch (CheckBaseClassAccess(Loc, Base, Derived, Paths.front(),
-                                   InaccessibleBaseID)) {
-        case AR_inaccessible:
-          return true;
-        case AR_accessible:
-        case AR_dependent:
-        case AR_delayed:
-          break;
+      switch (
+          CheckBaseClassAccess(Loc, Base, Derived, *Path, InaccessibleBaseID)) {
+      case AR_inaccessible:
+        return true;
+      case AR_accessible:
+      case AR_dependent:
+      case AR_delayed:
+        break;
       }
     }
 
     // Build a base path if necessary.
     if (BasePath)
-      BuildBasePathArray(Paths, *BasePath);
+      ::BuildBasePathArray(*Path, *BasePath);
     return false;
   }
 
@@ -2724,8 +2755,7 @@
   //   If a function is marked with the virt-specifier override and
   //   does not override a member function of a base class, the program is
   //   ill-formed.
-  bool HasOverriddenMethods =
-    MD->begin_overridden_methods() != MD->end_overridden_methods();
+  bool HasOverriddenMethods = MD->size_overridden_methods() != 0;
   if (MD->hasAttr<OverrideAttr>() && !HasOverriddenMethods)
     Diag(MD->getLocation(), diag::err_function_marked_override_not_overriding)
       << MD->getDeclName();
@@ -3559,9 +3589,12 @@
   ExprResult Init = InitExpr;
   if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) {
     InitializedEntity Entity = InitializedEntity::InitializeMember(FD);
-    InitializationKind Kind = FD->getInClassInitStyle() == ICIS_ListInit
-        ? InitializationKind::CreateDirectList(InitExpr->getLocStart())
-        : InitializationKind::CreateCopy(InitExpr->getLocStart(), InitLoc);
+    InitializationKind Kind =
+        FD->getInClassInitStyle() == ICIS_ListInit
+            ? InitializationKind::CreateDirectList(InitExpr->getLocStart(),
+                                                   InitExpr->getLocStart(),
+                                                   InitExpr->getLocEnd())
+            : InitializationKind::CreateCopy(InitExpr->getLocStart(), InitLoc);
     InitializationSequence Seq(*this, Entity, Kind, InitExpr);
     Init = Seq.Perform(*this, Entity, Kind, InitExpr);
     if (Init.isInvalid()) {
@@ -3956,9 +3989,10 @@
                    : InitializedEntity::InitializeMember(IndirectMember,
                                                          nullptr);
     InitializationKind Kind =
-      InitList ? InitializationKind::CreateDirectList(IdLoc)
-               : InitializationKind::CreateDirect(IdLoc, InitRange.getBegin(),
-                                                  InitRange.getEnd());
+        InitList ? InitializationKind::CreateDirectList(
+                       IdLoc, Init->getLocStart(), Init->getLocEnd())
+                 : InitializationKind::CreateDirect(IdLoc, InitRange.getBegin(),
+                                                    InitRange.getEnd());
 
     InitializationSequence InitSeq(*this, MemberEntity, Kind, Args);
     ExprResult MemberInit = InitSeq.Perform(*this, MemberEntity, Kind, Args,
@@ -4010,9 +4044,10 @@
   InitializedEntity DelegationEntity = InitializedEntity::InitializeDelegation(
                                      QualType(ClassDecl->getTypeForDecl(), 0));
   InitializationKind Kind =
-    InitList ? InitializationKind::CreateDirectList(NameLoc)
-             : InitializationKind::CreateDirect(NameLoc, InitRange.getBegin(),
-                                                InitRange.getEnd());
+      InitList ? InitializationKind::CreateDirectList(
+                     NameLoc, Init->getLocStart(), Init->getLocEnd())
+               : InitializationKind::CreateDirect(NameLoc, InitRange.getBegin(),
+                                                  InitRange.getEnd());
   InitializationSequence InitSeq(*this, DelegationEntity, Kind, Args);
   ExprResult DelegationInit = InitSeq.Perform(*this, DelegationEntity, Kind,
                                               Args, nullptr);
@@ -4144,9 +4179,9 @@
   InitializedEntity BaseEntity =
     InitializedEntity::InitializeBase(Context, BaseSpec, VirtualBaseSpec);
   InitializationKind Kind =
-    InitList ? InitializationKind::CreateDirectList(BaseLoc)
-             : InitializationKind::CreateDirect(BaseLoc, InitRange.getBegin(),
-                                                InitRange.getEnd());
+      InitList ? InitializationKind::CreateDirectList(BaseLoc)
+               : InitializationKind::CreateDirect(BaseLoc, InitRange.getBegin(),
+                                                  InitRange.getEnd());
   InitializationSequence InitSeq(*this, BaseEntity, Kind, Args);
   ExprResult BaseInit = InitSeq.Perform(*this, BaseEntity, Kind, Args, nullptr);
   if (BaseInit.isInvalid())
@@ -5441,7 +5476,7 @@
   }
 }
 
-static void ReferenceDllExportedMethods(Sema &S, CXXRecordDecl *Class) {
+static void ReferenceDllExportedMembers(Sema &S, CXXRecordDecl *Class) {
   Attr *ClassAttr = getDLLAttr(Class);
   if (!ClassAttr)
     return;
@@ -5456,6 +5491,14 @@
     return;
 
   for (Decl *Member : Class->decls()) {
+    // Defined static variables that are members of an exported base
+    // class must be marked export too.
+    auto *VD = dyn_cast<VarDecl>(Member);
+    if (VD && Member->getAttr<DLLExportAttr>() &&
+        VD->getStorageClass() == SC_Static &&
+        TSK == TSK_ImplicitInstantiation)
+      S.MarkVariableReferenced(VD->getLocation(), VD);
+
     auto *MD = dyn_cast<CXXMethodDecl>(Member);
     if (!MD)
       continue;
@@ -5745,20 +5788,20 @@
 
   if (D->needsImplicitCopyConstructor() &&
       !D->defaultedCopyConstructorIsDeleted()) {
-    if (!D->hasTrivialCopyConstructor())
+    if (!D->hasTrivialCopyConstructorForCall())
       return false;
     HasNonDeletedCopyOrMove = true;
   }
 
   if (S.getLangOpts().CPlusPlus11 && D->needsImplicitMoveConstructor() &&
       !D->defaultedMoveConstructorIsDeleted()) {
-    if (!D->hasTrivialMoveConstructor())
+    if (!D->hasTrivialMoveConstructorForCall())
       return false;
     HasNonDeletedCopyOrMove = true;
   }
 
   if (D->needsImplicitDestructor() && !D->defaultedDestructorIsDeleted() &&
-      !D->hasTrivialDestructor())
+      !D->hasTrivialDestructorForCall())
     return false;
 
   for (const CXXMethodDecl *MD : D->methods()) {
@@ -5771,7 +5814,7 @@
     else if (!isa<CXXDestructorDecl>(MD))
       continue;
 
-    if (!MD->isTrivial())
+    if (!MD->isTrivialForCall())
       return false;
   }
 
@@ -5855,6 +5898,13 @@
     }
   }
 
+  // Set HasTrivialSpecialMemberForCall if the record has attribute
+  // "trivial_abi".
+  bool HasTrivialABI = Record->hasAttr<TrivialABIAttr>();
+
+  if (HasTrivialABI)
+    Record->setHasTrivialSpecialMemberForCall();
+
   bool HasMethodWithOverrideControl = false,
        HasOverridingMethodWithoutOverrideControl = false;
   if (!Record->isDependentType()) {
@@ -5877,12 +5927,23 @@
       if (!M->isImplicit() && !M->isUserProvided()) {
         if (CSM != CXXInvalid) {
           M->setTrivial(SpecialMemberIsTrivial(M, CSM));
-
           // Inform the class that we've finished declaring this member.
           Record->finishedDefaultedOrDeletedMember(M);
+          M->setTrivialForCall(
+              HasTrivialABI ||
+              SpecialMemberIsTrivial(M, CSM, TAH_ConsiderTrivialABI));
+          Record->setTrivialForCallFlags(M);
         }
       }
 
+      // Set triviality for the purpose of calls if this is a user-provided
+      // copy/move constructor or destructor.
+      if ((CSM == CXXCopyConstructor || CSM == CXXMoveConstructor ||
+           CSM == CXXDestructor) && M->isUserProvided()) {
+        M->setTrivialForCall(HasTrivialABI);
+        Record->setTrivialForCallFlags(M);
+      }
+
       if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
           M->hasAttr<DLLExportAttr>()) {
         if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
@@ -6994,9 +7055,14 @@
 ///
 /// If \p Selected is not \c NULL, \c *Selected will be filled in with the
 /// member that was most likely to be intended to be trivial, if any.
+///
+/// If \p ForCall is true, look at CXXRecord::HasTrivialSpecialMembersForCall to
+/// determine whether the special member is trivial.
 static bool findTrivialSpecialMember(Sema &S, CXXRecordDecl *RD,
                                      Sema::CXXSpecialMember CSM, unsigned Quals,
-                                     bool ConstRHS, CXXMethodDecl **Selected) {
+                                     bool ConstRHS,
+                                     Sema::TrivialABIHandling TAH,
+                                     CXXMethodDecl **Selected) {
   if (Selected)
     *Selected = nullptr;
 
@@ -7037,7 +7103,9 @@
     // C++11 [class.dtor]p5:
     //   A destructor is trivial if:
     //    - all the direct [subobjects] have trivial destructors
-    if (RD->hasTrivialDestructor())
+    if (RD->hasTrivialDestructor() ||
+        (TAH == Sema::TAH_ConsiderTrivialABI &&
+         RD->hasTrivialDestructorForCall()))
       return true;
 
     if (Selected) {
@@ -7052,7 +7120,9 @@
     // C++11 [class.copy]p12:
     //   A copy constructor is trivial if:
     //    - the constructor selected to copy each direct [subobject] is trivial
-    if (RD->hasTrivialCopyConstructor()) {
+    if (RD->hasTrivialCopyConstructor() ||
+        (TAH == Sema::TAH_ConsiderTrivialABI &&
+         RD->hasTrivialCopyConstructorForCall())) {
       if (Quals == Qualifiers::Const)
         // We must either select the trivial copy constructor or reach an
         // ambiguity; no need to actually perform overload resolution.
@@ -7105,6 +7175,10 @@
     // not supposed to!
     if (Selected)
       *Selected = SMOR.getMethod();
+
+    if (TAH == Sema::TAH_ConsiderTrivialABI &&
+        (CSM == Sema::CXXCopyConstructor || CSM == Sema::CXXMoveConstructor))
+      return SMOR.getMethod()->isTrivialForCall();
     return SMOR.getMethod()->isTrivial();
   }
 
@@ -7143,14 +7217,14 @@
                                       QualType SubType, bool ConstRHS,
                                       Sema::CXXSpecialMember CSM,
                                       TrivialSubobjectKind Kind,
-                                      bool Diagnose) {
+                                      Sema::TrivialABIHandling TAH, bool Diagnose) {
   CXXRecordDecl *SubRD = SubType->getAsCXXRecordDecl();
   if (!SubRD)
     return true;
 
   CXXMethodDecl *Selected;
   if (findTrivialSpecialMember(S, SubRD, CSM, SubType.getCVRQualifiers(),
-                               ConstRHS, Diagnose ? &Selected : nullptr))
+                               ConstRHS, TAH, Diagnose ? &Selected : nullptr))
     return true;
 
   if (Diagnose) {
@@ -7180,7 +7254,8 @@
           << Kind << SubType.getUnqualifiedType() << CSM;
 
       // Explain why the defaulted or deleted special member isn't trivial.
-      S.SpecialMemberIsTrivial(Selected, CSM, Diagnose);
+      S.SpecialMemberIsTrivial(Selected, CSM, Sema::TAH_IgnoreTrivialABI,
+                               Diagnose);
     }
   }
 
@@ -7191,7 +7266,9 @@
 /// trivial.
 static bool checkTrivialClassMembers(Sema &S, CXXRecordDecl *RD,
                                      Sema::CXXSpecialMember CSM,
-                                     bool ConstArg, bool Diagnose) {
+                                     bool ConstArg,
+                                     Sema::TrivialABIHandling TAH,
+                                     bool Diagnose) {
   for (const auto *FI : RD->fields()) {
     if (FI->isInvalidDecl() || FI->isUnnamedBitfield())
       continue;
@@ -7201,7 +7278,7 @@
     // Pretend anonymous struct or union members are members of this class.
     if (FI->isAnonymousStructOrUnion()) {
       if (!checkTrivialClassMembers(S, FieldType->getAsCXXRecordDecl(),
-                                    CSM, ConstArg, Diagnose))
+                                    CSM, ConstArg, TAH, Diagnose))
         return false;
       continue;
     }
@@ -7229,7 +7306,7 @@
 
     bool ConstRHS = ConstArg && !FI->isMutable();
     if (!checkTrivialSubobjectCall(S, FI->getLocation(), FieldType, ConstRHS,
-                                   CSM, TSK_Field, Diagnose))
+                                   CSM, TSK_Field, TAH, Diagnose))
       return false;
   }
 
@@ -7243,14 +7320,15 @@
 
   bool ConstArg = (CSM == CXXCopyConstructor || CSM == CXXCopyAssignment);
   checkTrivialSubobjectCall(*this, RD->getLocation(), Ty, ConstArg, CSM,
-                            TSK_CompleteObject, /*Diagnose*/true);
+                            TSK_CompleteObject, TAH_IgnoreTrivialABI,
+                            /*Diagnose*/true);
 }
 
 /// Determine whether a defaulted or deleted special member function is trivial,
 /// as specified in C++11 [class.ctor]p5, C++11 [class.copy]p12,
 /// C++11 [class.copy]p25, and C++11 [class.dtor]p5.
 bool Sema::SpecialMemberIsTrivial(CXXMethodDecl *MD, CXXSpecialMember CSM,
-                                  bool Diagnose) {
+                                  TrivialABIHandling TAH, bool Diagnose) {
   assert(!MD->isUserProvided() && CSM != CXXInvalid && "not special enough");
 
   CXXRecordDecl *RD = MD->getParent();
@@ -7327,7 +7405,7 @@
   //       destructors]
   for (const auto &BI : RD->bases())
     if (!checkTrivialSubobjectCall(*this, BI.getLocStart(), BI.getType(),
-                                   ConstArg, CSM, TSK_BaseClass, Diagnose))
+                                   ConstArg, CSM, TSK_BaseClass, TAH, Diagnose))
       return false;
 
   // C++11 [class.ctor]p5, C++11 [class.dtor]p5:
@@ -7342,7 +7420,7 @@
   //    -- for all of the non-static data members of its class that are of class
   //       type (or array thereof), each such class has a trivial [default
   //       constructor or destructor]
-  if (!checkTrivialClassMembers(*this, RD, CSM, ConstArg, Diagnose))
+  if (!checkTrivialClassMembers(*this, RD, CSM, ConstArg, TAH, Diagnose))
     return false;
 
   // C++11 [class.dtor]p5:
@@ -7400,10 +7478,8 @@
       const llvm::SmallPtrSetImpl<const CXXMethodDecl *> &Methods) {
     if (MD->size_overridden_methods() == 0)
       return Methods.count(MD->getCanonicalDecl());
-    for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-                                        E = MD->end_overridden_methods();
-         I != E; ++I)
-      if (CheckMostOverridenMethods(*I, Methods))
+    for (const CXXMethodDecl *O : MD->overridden_methods())
+      if (CheckMostOverridenMethods(O, Methods))
         return true;
     return false;
   }
@@ -7461,10 +7537,9 @@
                         llvm::SmallPtrSetImpl<const CXXMethodDecl *>& Methods) {
   if (MD->size_overridden_methods() == 0)
     Methods.insert(MD->getCanonicalDecl());
-  for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-                                      E = MD->end_overridden_methods();
-       I != E; ++I)
-    AddMostOverridenMethods(*I, Methods);
+  else
+    for (const CXXMethodDecl *O : MD->overridden_methods())
+      AddMostOverridenMethods(O, Methods);
 }
 
 /// \brief Check if a method overloads virtual methods in a base class without
@@ -7527,6 +7602,50 @@
   }
 }
 
+void Sema::checkIllFormedTrivialABIStruct(CXXRecordDecl &RD) {
+  auto PrintDiagAndRemoveAttr = [&]() {
+    // No diagnostics if this is a template instantiation.
+    if (!isTemplateInstantiation(RD.getTemplateSpecializationKind()))
+      Diag(RD.getAttr<TrivialABIAttr>()->getLocation(),
+           diag::ext_cannot_use_trivial_abi) << &RD;
+    RD.dropAttr<TrivialABIAttr>();
+  };
+
+  // Ill-formed if the struct has virtual functions.
+  if (RD.isPolymorphic()) {
+    PrintDiagAndRemoveAttr();
+    return;
+  }
+
+  for (const auto &B : RD.bases()) {
+    // Ill-formed if the base class is non-trivial for the purpose of calls or a
+    // virtual base.
+    if ((!B.getType()->isDependentType() &&
+         !B.getType()->getAsCXXRecordDecl()->canPassInRegisters()) ||
+        B.isVirtual()) {
+      PrintDiagAndRemoveAttr();
+      return;
+    }
+  }
+
+  for (const auto *FD : RD.fields()) {
+    // Ill-formed if the field is an ObjectiveC pointer or of a type that is
+    // non-trivial for the purpose of calls.
+    QualType FT = FD->getType();
+    if (FT.getObjCLifetime() == Qualifiers::OCL_Weak) {
+      PrintDiagAndRemoveAttr();
+      return;
+    }
+
+    if (const auto *RT = FT->getBaseElementTypeUnsafe()->getAs<RecordType>())
+      if (!RT->isDependentType() &&
+          !cast<CXXRecordDecl>(RT->getDecl())->canPassInRegisters()) {
+        PrintDiagAndRemoveAttr();
+        return;
+      }
+  }
+}
+
 void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
                                              Decl *TagDecl,
                                              SourceLocation LBrac,
@@ -7545,12 +7664,17 @@
       l->getName();
   }
 
+  // See if trivial_abi has to be dropped.
+  auto *RD = dyn_cast<CXXRecordDecl>(TagDecl);
+  if (RD && RD->hasAttr<TrivialABIAttr>())
+    checkIllFormedTrivialABIStruct(*RD);
+
   ActOnFields(S, RLoc, TagDecl, llvm::makeArrayRef(
               // strict aliasing violation!
               reinterpret_cast<Decl**>(FieldCollector->getCurFields()),
               FieldCollector->getCurNumFields()), LBrac, RBrac, AttrList);
 
-  CheckCompletedCXXClass(dyn_cast_or_null<CXXRecordDecl>(TagDecl));
+  CheckCompletedCXXClass(RD);
 }
 
 /// AddImplicitlyDeclaredMembersToClass - Adds any implicitly-declared
@@ -8137,7 +8261,7 @@
           PastFunctionChunk = true;
           break;
         }
-        // Fall through.
+        LLVM_FALLTHROUGH;
       case DeclaratorChunk::Array:
         NeedsTypedef = true;
         extendRight(After, Chunk.getSourceRange());
@@ -8322,8 +8446,7 @@
   // We leave 'friend' and 'virtual' to be rejected in the normal way.
   if (DS.hasTypeSpecifier() || DS.getTypeQualifiers() ||
       DS.getStorageClassSpecLoc().isValid() || DS.isInlineSpecified() ||
-      DS.isNoreturnSpecified() || DS.isConstexprSpecified() ||
-      DS.isConceptSpecified()) {
+      DS.isNoreturnSpecified() || DS.isConstexprSpecified()) {
     BadSpecifierDiagnoser Diagnoser(
         *this, D.getIdentifierLoc(),
         diag::err_deduction_guide_invalid_specifier);
@@ -8336,9 +8459,7 @@
     Diagnoser.check(DS.getInlineSpecLoc(), "inline");
     Diagnoser.check(DS.getNoreturnSpecLoc(), "_Noreturn");
     Diagnoser.check(DS.getConstexprSpecLoc(), "constexpr");
-    Diagnoser.check(DS.getConceptSpecLoc(), "concept");
     DS.ClearConstexprSpec();
-    DS.ClearConceptSpec();
 
     Diagnoser.check(DS.getConstSpecLoc(), "const");
     Diagnoser.check(DS.getRestrictSpecLoc(), "__restrict");
@@ -8915,7 +9036,7 @@
 
     // Find enclosing context containing both using-directive and
     // nominated namespace.
-    DeclContext *CommonAncestor = cast<DeclContext>(NS);
+    DeclContext *CommonAncestor = NS;
     while (CommonAncestor && !CommonAncestor->Encloses(CurContext))
       CommonAncestor = CommonAncestor->getParent();
 
@@ -8969,15 +9090,15 @@
   }
 
   switch (Name.getKind()) {
-  case UnqualifiedId::IK_ImplicitSelfParam:
-  case UnqualifiedId::IK_Identifier:
-  case UnqualifiedId::IK_OperatorFunctionId:
-  case UnqualifiedId::IK_LiteralOperatorId:
-  case UnqualifiedId::IK_ConversionFunctionId:
+  case UnqualifiedIdKind::IK_ImplicitSelfParam:
+  case UnqualifiedIdKind::IK_Identifier:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_ConversionFunctionId:
     break;
 
-  case UnqualifiedId::IK_ConstructorName:
-  case UnqualifiedId::IK_ConstructorTemplateId:
+  case UnqualifiedIdKind::IK_ConstructorName:
+  case UnqualifiedIdKind::IK_ConstructorTemplateId:
     // C++11 inheriting constructors.
     Diag(Name.getLocStart(),
          getLangOpts().CPlusPlus11 ?
@@ -8989,17 +9110,17 @@
 
     return nullptr;
 
-  case UnqualifiedId::IK_DestructorName:
+  case UnqualifiedIdKind::IK_DestructorName:
     Diag(Name.getLocStart(), diag::err_using_decl_destructor)
       << SS.getRange();
     return nullptr;
 
-  case UnqualifiedId::IK_TemplateId:
+  case UnqualifiedIdKind::IK_TemplateId:
     Diag(Name.getLocStart(), diag::err_using_decl_template_id)
       << SourceRange(Name.TemplateId->LAngleLoc, Name.TemplateId->RAngleLoc);
     return nullptr;
 
-  case UnqualifiedId::IK_DeductionGuideName:
+  case UnqualifiedIdKind::IK_DeductionGuideName:
     llvm_unreachable("cannot parse qualified deduction guide name");
   }
 
@@ -10023,7 +10144,7 @@
     Previous.clear();
   }
 
-  assert(Name.Kind == UnqualifiedId::IK_Identifier &&
+  assert(Name.Kind == UnqualifiedIdKind::IK_Identifier &&
          "name in alias declaration must be an identifier");
   TypeAliasDecl *NewTD = TypeAliasDecl::Create(Context, CurContext, UsingLoc,
                                                Name.StartLocation,
@@ -10703,6 +10824,8 @@
   // We don't need to use SpecialMemberIsTrivial here; triviality for
   // destructors is easy to compute.
   Destructor->setTrivial(ClassDecl->hasTrivialDestructor());
+  Destructor->setTrivialForCall(ClassDecl->hasAttr<TrivialABIAttr>() ||
+                                ClassDecl->hasTrivialDestructorForCall());
 
   // Note that we have declared this destructor.
   ++ASTContext::NumImplicitDestructorsDeclared;
@@ -10787,12 +10910,12 @@
 
 void Sema::referenceDLLExportedClassMethods() {
   if (!DelayedDllExportClasses.empty()) {
-    // Calling ReferenceDllExportedMethods might cause the current function to
+    // Calling ReferenceDllExportedMembers might cause the current function to
     // be called again, so use a local copy of DelayedDllExportClasses.
     SmallVector<CXXRecordDecl *, 4> WorkList;
     std::swap(DelayedDllExportClasses, WorkList);
     for (CXXRecordDecl *Class : WorkList)
-      ReferenceDllExportedMethods(*this, Class);
+      ReferenceDllExportedMembers(*this, Class);
   }
 }
 
@@ -10973,11 +11096,11 @@
   Expr *From = FromB.build(S, Loc);
   From = new (S.Context) UnaryOperator(From, UO_AddrOf,
                          S.Context.getPointerType(From->getType()),
-                         VK_RValue, OK_Ordinary, Loc);
+                         VK_RValue, OK_Ordinary, Loc, false);
   Expr *To = ToB.build(S, Loc);
   To = new (S.Context) UnaryOperator(To, UO_AddrOf,
                        S.Context.getPointerType(To->getType()),
-                       VK_RValue, OK_Ordinary, Loc);
+                       VK_RValue, OK_Ordinary, Loc, false);
 
   const Type *E = T->getBaseElementTypeUnsafe();
   bool NeedsCollectableMemCpy =
@@ -11213,16 +11336,18 @@
   Expr *Comparison
     = new (S.Context) BinaryOperator(IterationVarRefRVal.build(S, Loc),
                      IntegerLiteral::Create(S.Context, Upper, SizeType, Loc),
-                                     BO_NE, S.Context.BoolTy,
-                                     VK_RValue, OK_Ordinary, Loc, FPOptions());
-
-  // Create the pre-increment of the iteration variable.
-  Expr *Increment
-    = new (S.Context) UnaryOperator(IterationVarRef.build(S, Loc), UO_PreInc,
-                                    SizeType, VK_LValue, OK_Ordinary, Loc);
-
-  // Construct the loop that copies all elements of this array.
-  return S.ActOnForStmt(
+                                     BO_NE, S.Context.BoolTy,
+                                     VK_RValue, OK_Ordinary, Loc, FPOptions());
+
+  // Create the pre-increment of the iteration variable. We can determine
+  // whether the increment will overflow based on the value of the array
+  // bound.
+  Expr *Increment = new (S.Context)
+      UnaryOperator(IterationVarRef.build(S, Loc), UO_PreInc, SizeType,
+                    VK_LValue, OK_Ordinary, Loc, Upper.isMaxValue());
+
+  // Construct the loop that copies all elements of this array.
+  return S.ActOnForStmt(
       Loc, Loc, InitStmt,
       S.ActOnCondition(nullptr, Loc, Comparison, Sema::ConditionKind::Boolean),
       S.MakeFullDiscardedValueExpr(Increment), Loc, Copy.get());
@@ -12004,9 +12129,16 @@
   CopyConstructor->setParams(FromParam);
 
   CopyConstructor->setTrivial(
-    ClassDecl->needsOverloadResolutionForCopyConstructor()
-      ? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor)
-      : ClassDecl->hasTrivialCopyConstructor());
+      ClassDecl->needsOverloadResolutionForCopyConstructor()
+          ? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor)
+          : ClassDecl->hasTrivialCopyConstructor());
+
+  CopyConstructor->setTrivialForCall(
+      ClassDecl->hasAttr<TrivialABIAttr>() ||
+      (ClassDecl->needsOverloadResolutionForCopyConstructor()
+           ? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor,
+             TAH_ConsiderTrivialABI)
+           : ClassDecl->hasTrivialCopyConstructorForCall()));
 
   // Note that we have declared this constructor.
   ++ASTContext::NumImplicitCopyConstructorsDeclared;
@@ -12127,9 +12259,16 @@
   MoveConstructor->setParams(FromParam);
 
   MoveConstructor->setTrivial(
-    ClassDecl->needsOverloadResolutionForMoveConstructor()
-      ? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor)
-      : ClassDecl->hasTrivialMoveConstructor());
+      ClassDecl->needsOverloadResolutionForMoveConstructor()
+          ? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor)
+          : ClassDecl->hasTrivialMoveConstructor());
+
+  MoveConstructor->setTrivialForCall(
+      ClassDecl->hasAttr<TrivialABIAttr>() ||
+      (ClassDecl->needsOverloadResolutionForMoveConstructor()
+           ? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor,
+                                    TAH_ConsiderTrivialABI)
+           : ClassDecl->hasTrivialMoveConstructorForCall()));
 
   // Note that we have declared this constructor.
   ++ASTContext::NumImplicitMoveConstructorsDeclared;
@@ -12198,30 +12337,27 @@
                             SourceLocation CurrentLocation,
                             CXXConversionDecl *Conv) {
   SynthesizedFunctionScope Scope(*this, Conv);
+  assert(!Conv->getReturnType()->isUndeducedType());
 
   CXXRecordDecl *Lambda = Conv->getParent();
-  CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
-  // If we are defining a specialization of a conversion to function-ptr
-  // cache the deduced template arguments for this specialization
-  // so that we can use them to retrieve the corresponding call-operator
-  // and static-invoker.
-  const TemplateArgumentList *DeducedTemplateArgs = nullptr;
+  FunctionDecl *CallOp = Lambda->getLambdaCallOperator();
+  FunctionDecl *Invoker = Lambda->getLambdaStaticInvoker();
 
-  // Retrieve the corresponding call-operator specialization.
-  if (Lambda->isGenericLambda()) {
-    assert(Conv->isFunctionTemplateSpecialization());
-    FunctionTemplateDecl *CallOpTemplate =
-        CallOp->getDescribedFunctionTemplate();
-    DeducedTemplateArgs = Conv->getTemplateSpecializationArgs();
-    void *InsertPos = nullptr;
-    FunctionDecl *CallOpSpec = CallOpTemplate->findSpecialization(
-                                                DeducedTemplateArgs->asArray(),
-                                                InsertPos);
-    assert(CallOpSpec &&
-          "Conversion operator must have a corresponding call operator");
-    CallOp = cast<CXXMethodDecl>(CallOpSpec);
+  if (auto *TemplateArgs = Conv->getTemplateSpecializationArgs()) {
+    CallOp = InstantiateFunctionDeclaration(
+        CallOp->getDescribedFunctionTemplate(), TemplateArgs, CurrentLocation);
+    if (!CallOp)
+      return;
+
+    Invoker = InstantiateFunctionDeclaration(
+        Invoker->getDescribedFunctionTemplate(), TemplateArgs, CurrentLocation);
+    if (!Invoker)
+      return;
   }
 
+  if (CallOp->isInvalidDecl())
+    return;
+
   // Mark the call operator referenced (and add to pending instantiations
   // if necessary).
   // For both the conversion and static-invoker template specializations
@@ -12229,40 +12365,24 @@
   // to the PendingInstantiations.
   MarkFunctionReferenced(CurrentLocation, CallOp);
 
-  // Retrieve the static invoker...
-  CXXMethodDecl *Invoker = Lambda->getLambdaStaticInvoker();
-  // ... and get the corresponding specialization for a generic lambda.
-  if (Lambda->isGenericLambda()) {
-    assert(DeducedTemplateArgs &&
-      "Must have deduced template arguments from Conversion Operator");
-    FunctionTemplateDecl *InvokeTemplate =
-                          Invoker->getDescribedFunctionTemplate();
-    void *InsertPos = nullptr;
-    FunctionDecl *InvokeSpec = InvokeTemplate->findSpecialization(
-                                                DeducedTemplateArgs->asArray(),
-                                                InsertPos);
-    assert(InvokeSpec &&
-      "Must have a corresponding static invoker specialization");
-    Invoker = cast<CXXMethodDecl>(InvokeSpec);
-  }
-  // Construct the body of the conversion function { return __invoke; }.
-  Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
-                                        VK_LValue, Conv->getLocation()).get();
-   assert(FunctionRef && "Can't refer to __invoke function?");
-   Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
-   Conv->setBody(new (Context) CompoundStmt(Context, Return,
-                                            Conv->getLocation(),
-                                            Conv->getLocation()));
-
-  Conv->markUsed(Context);
-  Conv->setReferenced();
-
   // Fill in the __invoke function with a dummy implementation. IR generation
-  // will fill in the actual details.
+  // will fill in the actual details. Update its type in case it contained
+  // an 'auto'.
   Invoker->markUsed(Context);
   Invoker->setReferenced();
+  Invoker->setType(Conv->getReturnType()->getPointeeType());
   Invoker->setBody(new (Context) CompoundStmt(Conv->getLocation()));
 
+  // Construct the body of the conversion function { return __invoke; }.
+  Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
+                                       VK_LValue, Conv->getLocation()).get();
+  assert(FunctionRef && "Can't refer to __invoke function?");
+  Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
+  Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(),
+                                     Conv->getLocation()));
+  Conv->markUsed(Context);
+  Conv->setReferenced();
+
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Conv);
     L->CompletedImplicitDefinition(Invoker);
@@ -12313,9 +12433,8 @@
 
   // Set the body of the conversion function.
   Stmt *ReturnS = Return.get();
-  Conv->setBody(new (Context) CompoundStmt(Context, ReturnS,
-                                           Conv->getLocation(),
-                                           Conv->getLocation()));
+  Conv->setBody(CompoundStmt::Create(Context, ReturnS, Conv->getLocation(),
+                                     Conv->getLocation()));
   Conv->markUsed(Context);
 
   // We're done; notify the mutation listener, if any.
@@ -12335,7 +12454,7 @@
     if (!Args[1]->isDefaultArgument())
       return false;
 
-    // fall through
+    LLVM_FALLTHROUGH;
   case 1:
     return !Args[0]->isDefaultArgument();
   }
@@ -13055,7 +13174,8 @@
 
   StringRef LiteralName
     = FnDecl->getDeclName().getCXXLiteralIdentifier()->getName();
-  if (LiteralName[0] != '_') {
+  if (LiteralName[0] != '_' &&
+      !getSourceManager().isInSystemHeader(FnDecl->getLocation())) {
     // C++11 [usrlit.suffix]p1:
     //   Literal suffix identifiers that do not start with an underscore
     //   are reserved for future standardization.
@@ -13609,7 +13729,7 @@
   // Try to convert the decl specifier to a type.  This works for
   // friend templates because ActOnTag never produces a ClassTemplateDecl
   // for a TUK_Friend.
-  Declarator TheDeclarator(DS, Declarator::MemberContext);
+  Declarator TheDeclarator(DS, DeclaratorContext::MemberContext);
   TypeSourceInfo *TSI = GetTypeForDeclarator(TheDeclarator, S);
   QualType T = TSI->getType();
   if (TheDeclarator.isInvalidType())
@@ -13783,7 +13903,8 @@
     //   elaborated-type-specifier, the lookup to determine whether
     //   the entity has been previously declared shall not consider
     //   any scopes outside the innermost enclosing namespace.
-    bool isTemplateId = D.getName().getKind() == UnqualifiedId::IK_TemplateId;
+    bool isTemplateId =
+        D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId;
 
     // Find the appropriate context according to the above.
     DC = CurContext;
@@ -13894,24 +14015,24 @@
   if (!DC->isRecord()) {
     int DiagArg = -1;
     switch (D.getName().getKind()) {
-    case UnqualifiedId::IK_ConstructorTemplateId:
-    case UnqualifiedId::IK_ConstructorName:
+    case UnqualifiedIdKind::IK_ConstructorTemplateId:
+    case UnqualifiedIdKind::IK_ConstructorName:
       DiagArg = 0;
       break;
-    case UnqualifiedId::IK_DestructorName:
+    case UnqualifiedIdKind::IK_DestructorName:
       DiagArg = 1;
       break;
-    case UnqualifiedId::IK_ConversionFunctionId:
+    case UnqualifiedIdKind::IK_ConversionFunctionId:
       DiagArg = 2;
       break;
-    case UnqualifiedId::IK_DeductionGuideName:
+    case UnqualifiedIdKind::IK_DeductionGuideName:
       DiagArg = 3;
       break;
-    case UnqualifiedId::IK_Identifier:
-    case UnqualifiedId::IK_ImplicitSelfParam:
-    case UnqualifiedId::IK_LiteralOperatorId:
-    case UnqualifiedId::IK_OperatorFunctionId:
-    case UnqualifiedId::IK_TemplateId:
+    case UnqualifiedIdKind::IK_Identifier:
+    case UnqualifiedIdKind::IK_ImplicitSelfParam:
+    case UnqualifiedIdKind::IK_LiteralOperatorId:
+    case UnqualifiedIdKind::IK_OperatorFunctionId:
+    case UnqualifiedIdKind::IK_TemplateId:
       break;
     }
     // This implies that it has to be an operator or function.
@@ -14040,15 +14161,13 @@
   // non-deleted virtual function.
   if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn)) {
     bool IssuedDiagnostic = false;
-    for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-                                        E = MD->end_overridden_methods();
-         I != E; ++I) {
+    for (const CXXMethodDecl *O : MD->overridden_methods()) {
       if (!(*MD->begin_overridden_methods())->isDeleted()) {
         if (!IssuedDiagnostic) {
           Diag(DelLoc, diag::err_deleted_override) << MD->getDeclName();
           IssuedDiagnostic = true;
         }
-        Diag((*I)->getLocation(), diag::note_overridden_virtual_function);
+        Diag(O->getLocation(), diag::note_overridden_virtual_function);
       }
     }
     // If this function was implicitly deleted because it was defaulted,
@@ -14417,7 +14536,7 @@
 
   // Try to insert this class into the map.
   LoadExternalVTableUses();
-  Class = cast<CXXRecordDecl>(Class->getCanonicalDecl());
+  Class = Class->getCanonicalDecl();
   std::pair<llvm::DenseMap<CXXRecordDecl *, bool>::iterator, bool>
     Pos = VTablesUsed.insert(std::make_pair(Class, DefinitionRequired));
   if (!Pos.second) {
@@ -14529,7 +14648,7 @@
     // vtable for this class is required.
     DefinedAnything = true;
     MarkVirtualMembersReferenced(Loc, Class);
-    CXXRecordDecl *Canonical = cast<CXXRecordDecl>(Class->getCanonicalDecl());
+    CXXRecordDecl *Canonical = Class->getCanonicalDecl();
     if (VTablesUsed[Canonical])
       Consumer.HandleVTable(Class);
 
@@ -14959,10 +15078,8 @@
 
   if (Method->isVirtual()) {
     // Check overrides, which we previously had to delay.
-    for (CXXMethodDecl::method_iterator O = Method->begin_overridden_methods(),
-                                     OEnd = Method->end_overridden_methods();
-         O != OEnd; ++O)
-      CheckOverridingFunctionExceptionSpec(Method, *O);
+    for (const CXXMethodDecl *O : Method->overridden_methods())
+      CheckOverridingFunctionExceptionSpec(Method, O);
   }
 }
 
@@ -14998,7 +15115,7 @@
 
   if (D.getDeclSpec().isInlineSpecified())
     Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
diff --git a/lib/Sema/SemaDeclObjC.cpp b/lib/Sema/SemaDeclObjC.cpp
index 607ffe9..748285b 100644
--- a/lib/Sema/SemaDeclObjC.cpp
+++ b/lib/Sema/SemaDeclObjC.cpp
@@ -156,23 +156,23 @@
       Diag(Overridden->getLocation(), 
            diag::note_related_result_type_overridden);
   }
-  if (getLangOpts().ObjCAutoRefCount) {
-    Diags.setSeverity(diag::warn_nsreturns_retained_attribute_mismatch,
-                      diag::Severity::Error, SourceLocation());
-    Diags.setSeverity(diag::warn_nsconsumed_attribute_mismatch,
-                      diag::Severity::Error, SourceLocation());
-  }
 
   if ((NewMethod->hasAttr<NSReturnsRetainedAttr>() !=
        Overridden->hasAttr<NSReturnsRetainedAttr>())) {
     Diag(NewMethod->getLocation(),
-         diag::warn_nsreturns_retained_attribute_mismatch) << 1;
+         getLangOpts().ObjCAutoRefCount
+             ? diag::err_nsreturns_retained_attribute_mismatch
+             : diag::warn_nsreturns_retained_attribute_mismatch)
+        << 1;
     Diag(Overridden->getLocation(), diag::note_previous_decl) << "method";
   }
   if ((NewMethod->hasAttr<NSReturnsNotRetainedAttr>() !=
        Overridden->hasAttr<NSReturnsNotRetainedAttr>())) {
     Diag(NewMethod->getLocation(),
-         diag::warn_nsreturns_retained_attribute_mismatch) << 0;
+         getLangOpts().ObjCAutoRefCount
+             ? diag::err_nsreturns_retained_attribute_mismatch
+             : diag::warn_nsreturns_retained_attribute_mismatch)
+        << 0;
     Diag(Overridden->getLocation(), diag::note_previous_decl)  << "method";
   }
 
@@ -185,7 +185,10 @@
     ParmVarDecl *newDecl = (*ni);
     if (newDecl->hasAttr<NSConsumedAttr>() !=
         oldDecl->hasAttr<NSConsumedAttr>()) {
-      Diag(newDecl->getLocation(), diag::warn_nsconsumed_attribute_mismatch);
+      Diag(newDecl->getLocation(),
+           getLangOpts().ObjCAutoRefCount
+               ? diag::err_nsconsumed_attribute_mismatch
+               : diag::warn_nsconsumed_attribute_mismatch);
       Diag(oldDecl->getLocation(), diag::note_previous_decl) << "parameter";
     }
 
@@ -1538,7 +1541,7 @@
     DS.SetRangeEnd(loc);
 
     // Form the declarator.
-    Declarator D(DS, Declarator::TypeNameContext);
+    Declarator D(DS, DeclaratorContext::TypeNameContext);
 
     // If we have a typedef of an Objective-C class type that is missing a '*',
     // add the '*'.
@@ -1832,6 +1835,13 @@
   // FIXME: PushOnScopeChains?
   CurContext->addDecl(CDecl);
 
+  // Process the attributes before looking at protocols to ensure that the
+  // availability attribute is attached to the category to provide availability
+  // checking for protocol uses.
+  if (AttrList)
+    ProcessDeclAttributeList(TUScope, CDecl, AttrList);
+  AddPragmaAttributes(TUScope, CDecl);
+
   if (NumProtoRefs) {
     diagnoseUseOfProtocols(*this, CDecl, (ObjCProtocolDecl*const*)ProtoRefs,
                            NumProtoRefs, ProtoLocs);
@@ -1843,10 +1853,6 @@
                                             NumProtoRefs, Context); 
   }
 
-  if (AttrList)
-    ProcessDeclAttributeList(TUScope, CDecl, AttrList);
-  AddPragmaAttributes(TUScope, CDecl);
-
   CheckObjCDeclScope(CDecl);
   return ActOnObjCContainerStartDefinition(CDecl);
 }
@@ -3721,6 +3727,26 @@
   }
 }
 
+/// Diagnose attempts to use flexible array member with retainable object type.
+static void DiagnoseRetainableFlexibleArrayMember(Sema &S,
+                                                  ObjCInterfaceDecl *ID) {
+  if (!S.getLangOpts().ObjCAutoRefCount)
+    return;
+
+  for (auto ivar = ID->all_declared_ivar_begin(); ivar;
+       ivar = ivar->getNextIvar()) {
+    if (ivar->isInvalidDecl())
+      continue;
+    QualType IvarTy = ivar->getType();
+    if (IvarTy->isIncompleteArrayType() &&
+        (IvarTy.getObjCLifetime() != Qualifiers::OCL_ExplicitNone) &&
+        IvarTy->isObjCLifetimeType()) {
+      S.Diag(ivar->getLocation(), diag::err_flexible_array_arc_retainable);
+      ivar->setInvalidDecl();
+    }
+  }
+}
+
 Sema::ObjCContainerKind Sema::getObjCContainerKind() const {
   switch (CurContext->getDeclKind()) {
     case Decl::ObjCInterface:
@@ -3741,6 +3767,96 @@
   }
 }
 
+static bool IsVariableSizedType(QualType T) {
+  if (T->isIncompleteArrayType())
+    return true;
+  const auto *RecordTy = T->getAs<RecordType>();
+  return (RecordTy && RecordTy->getDecl()->hasFlexibleArrayMember());
+}
+
+static void DiagnoseVariableSizedIvars(Sema &S, ObjCContainerDecl *OCD) {
+  ObjCInterfaceDecl *IntfDecl = nullptr;
+  ObjCInterfaceDecl::ivar_range Ivars = llvm::make_range(
+      ObjCInterfaceDecl::ivar_iterator(), ObjCInterfaceDecl::ivar_iterator());
+  if ((IntfDecl = dyn_cast<ObjCInterfaceDecl>(OCD))) {
+    Ivars = IntfDecl->ivars();
+  } else if (auto *ImplDecl = dyn_cast<ObjCImplementationDecl>(OCD)) {
+    IntfDecl = ImplDecl->getClassInterface();
+    Ivars = ImplDecl->ivars();
+  } else if (auto *CategoryDecl = dyn_cast<ObjCCategoryDecl>(OCD)) {
+    if (CategoryDecl->IsClassExtension()) {
+      IntfDecl = CategoryDecl->getClassInterface();
+      Ivars = CategoryDecl->ivars();
+    }
+  }
+
+  // Check if variable sized ivar is in interface and visible to subclasses.
+  if (!isa<ObjCInterfaceDecl>(OCD)) {
+    for (auto ivar : Ivars) {
+      if (!ivar->isInvalidDecl() && IsVariableSizedType(ivar->getType())) {
+        S.Diag(ivar->getLocation(), diag::warn_variable_sized_ivar_visibility)
+            << ivar->getDeclName() << ivar->getType();
+      }
+    }
+  }
+
+  // Subsequent checks require interface decl.
+  if (!IntfDecl)
+    return;
+
+  // Check if variable sized ivar is followed by another ivar.
+  for (ObjCIvarDecl *ivar = IntfDecl->all_declared_ivar_begin(); ivar;
+       ivar = ivar->getNextIvar()) {
+    if (ivar->isInvalidDecl() || !ivar->getNextIvar())
+      continue;
+    QualType IvarTy = ivar->getType();
+    bool IsInvalidIvar = false;
+    if (IvarTy->isIncompleteArrayType()) {
+      S.Diag(ivar->getLocation(), diag::err_flexible_array_not_at_end)
+          << ivar->getDeclName() << IvarTy
+          << TTK_Class; // Use "class" for Obj-C.
+      IsInvalidIvar = true;
+    } else if (const RecordType *RecordTy = IvarTy->getAs<RecordType>()) {
+      if (RecordTy->getDecl()->hasFlexibleArrayMember()) {
+        S.Diag(ivar->getLocation(),
+               diag::err_objc_variable_sized_type_not_at_end)
+            << ivar->getDeclName() << IvarTy;
+        IsInvalidIvar = true;
+      }
+    }
+    if (IsInvalidIvar) {
+      S.Diag(ivar->getNextIvar()->getLocation(),
+             diag::note_next_ivar_declaration)
+          << ivar->getNextIvar()->getSynthesize();
+      ivar->setInvalidDecl();
+    }
+  }
+
+  // Check if ObjC container adds ivars after variable sized ivar in superclass.
+  // Perform the check only if OCD is the first container to declare ivars to
+  // avoid multiple warnings for the same ivar.
+  ObjCIvarDecl *FirstIvar =
+      (Ivars.begin() == Ivars.end()) ? nullptr : *Ivars.begin();
+  if (FirstIvar && (FirstIvar == IntfDecl->all_declared_ivar_begin())) {
+    const ObjCInterfaceDecl *SuperClass = IntfDecl->getSuperClass();
+    while (SuperClass && SuperClass->ivar_empty())
+      SuperClass = SuperClass->getSuperClass();
+    if (SuperClass) {
+      auto IvarIter = SuperClass->ivar_begin();
+      std::advance(IvarIter, SuperClass->ivar_size() - 1);
+      const ObjCIvarDecl *LastIvar = *IvarIter;
+      if (IsVariableSizedType(LastIvar->getType())) {
+        S.Diag(FirstIvar->getLocation(),
+               diag::warn_superclass_variable_sized_type_not_at_end)
+            << FirstIvar->getDeclName() << LastIvar->getDeclName()
+            << LastIvar->getType() << SuperClass->getDeclName();
+        S.Diag(LastIvar->getLocation(), diag::note_entity_declared_at)
+            << LastIvar->getDeclName();
+      }
+    }
+  }
+}
+
 // Note: For class/category implementations, allMethods is always null.
 Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
                        ArrayRef<DeclGroupPtrTy> allTUVars) {
@@ -3749,9 +3865,9 @@
 
   assert(AtEnd.isValid() && "Invalid location for '@end'");
 
-  ObjCContainerDecl *OCD = dyn_cast<ObjCContainerDecl>(CurContext);
-  Decl *ClassDecl = cast<Decl>(OCD);
-  
+  auto *OCD = cast<ObjCContainerDecl>(CurContext);
+  Decl *ClassDecl = OCD;
+
   bool isInterfaceDeclKind =
         isa<ObjCInterfaceDecl>(ClassDecl) || isa<ObjCCategoryDecl>(ClassDecl)
          || isa<ObjCProtocolDecl>(ClassDecl);
@@ -3872,6 +3988,7 @@
       if (IDecl->hasDesignatedInitializers())
         DiagnoseMissingDesignatedInitOverrides(IC, IDecl);
       DiagnoseWeakIvars(*this, IC);
+      DiagnoseRetainableFlexibleArrayMember(*this, IDecl);
 
       bool HasRootClassAttr = IDecl->hasAttr<ObjCRootClassAttr>();
       if (IDecl->getSuperClass() == nullptr) {
@@ -3941,6 +4058,7 @@
       }
     }
   }
+  DiagnoseVariableSizedIvars(*this, OCD);
   if (isInterfaceDeclKind) {
     // Reject invalid vardecls.
     for (unsigned i = 0, e = allTUVars.size(); i != e; i++) {
@@ -4018,7 +4136,7 @@
 public:
   Sema &S;
   ObjCMethodDecl *Method;
-  llvm::SmallPtrSet<ObjCMethodDecl*, 4> Overridden;
+  llvm::SmallSetVector<ObjCMethodDecl*, 4> Overridden;
   bool Recursive;
 
 public:
@@ -4055,7 +4173,7 @@
     }
   }
 
-  typedef llvm::SmallPtrSetImpl<ObjCMethodDecl*>::iterator iterator;
+  typedef decltype(Overridden)::iterator iterator;
   iterator begin() const { return Overridden.begin(); }
   iterator end() const { return Overridden.end(); }
 
@@ -4223,10 +4341,6 @@
 
     // Then merge the declarations.
     mergeObjCMethodDecls(ObjCMethod, overridden);
-  }
-
-  for (ObjCMethodDecl *overridden : overrides) {
-    CheckObjCMethodOverride(ObjCMethod, overridden);
 
     if (ObjCMethod->isImplicit() && overridden->isImplicit())
       continue; // Conflicting properties are detected elsewhere.
@@ -4407,8 +4521,7 @@
     Diag(MethodLoc, diag::err_missing_method_context);
     return nullptr;
   }
-  ObjCContainerDecl *OCD = dyn_cast<ObjCContainerDecl>(CurContext);
-  Decl *ClassDecl = cast<Decl>(OCD); 
+  Decl *ClassDecl = cast<ObjCContainerDecl>(CurContext);
   QualType resultDeclType;
 
   bool HasRelatedResultType = false;
@@ -4657,7 +4770,7 @@
   Context.DeepCollectObjCIvars(Class, true, Ivars);
   // For each ivar, create a fresh ObjCAtDefsFieldDecl.
   for (unsigned i = 0; i < Ivars.size(); i++) {
-    const FieldDecl* ID = cast<FieldDecl>(Ivars[i]);
+    const FieldDecl* ID = Ivars[i];
     RecordDecl *Record = dyn_cast<RecordDecl>(TagD);
     Decl *FD = ObjCAtDefsFieldDecl::Create(Context, Record,
                                            /*FIXME: StartL=*/ID->getLocation(),
@@ -4672,7 +4785,7 @@
        D != Decls.end(); ++D) {
     FieldDecl *FD = cast<FieldDecl>(*D);
     if (getLangOpts().CPlusPlus)
-      PushOnScopeChains(cast<FieldDecl>(FD), S);
+      PushOnScopeChains(FD, S);
     else if (RecordDecl *Record = dyn_cast<RecordDecl>(TagD))
       Record->addDecl(FD);
   }
@@ -4734,7 +4847,7 @@
   }
   if (DS.isInlineSpecified())
     Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
-        << getLangOpts().CPlusPlus1z;
+        << getLangOpts().CPlusPlus17;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp
index deb6cbb..21ca08f 100644
--- a/lib/Sema/SemaExceptionSpec.cpp
+++ b/lib/Sema/SemaExceptionSpec.cpp
@@ -145,7 +145,7 @@
 bool Sema::CheckDistantExceptionSpec(QualType T) {
   // C++17 removes this rule in favor of putting exception specifications into
   // the type system.
-  if (getLangOpts().CPlusPlus1z)
+  if (getLangOpts().CPlusPlus17)
     return false;
 
   if (const PointerType *PT = T->getAs<PointerType>())
@@ -203,8 +203,8 @@
     if (auto *Listener = getASTMutationListener())
       Listener->ResolvedExceptionSpec(FD);
 
-  for (auto *Redecl : FD->redecls())
-    Context.adjustExceptionSpec(cast<FunctionDecl>(Redecl), ESI);
+  for (FunctionDecl *Redecl : FD->redecls())
+    Context.adjustExceptionSpec(Redecl, ESI);
 }
 
 static bool CheckEquivalentExceptionSpecImpl(
@@ -237,10 +237,10 @@
 }
 
 bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
-  // Just completely ignore this under -fno-exceptions prior to C++1z.
-  // In C++1z onwards, the exception specification is part of the type and
+  // Just completely ignore this under -fno-exceptions prior to C++17.
+  // In C++17 onwards, the exception specification is part of the type and
   // we will diagnose mismatches anyway, so it's better to check for them here.
-  if (!getLangOpts().CXXExceptions && !getLangOpts().CPlusPlus1z)
+  if (!getLangOpts().CXXExceptions && !getLangOpts().CPlusPlus17)
     return false;
 
   OverloadedOperatorKind OO = New->getDeclName().getCXXOverloadedOperator();
@@ -626,6 +626,90 @@
                                           New, NewLoc);
 }
 
+bool Sema::handlerCanCatch(QualType HandlerType, QualType ExceptionType) {
+  // [except.handle]p3:
+  //   A handler is a match for an exception object of type E if:
+
+  // HandlerType must be ExceptionType or derived from it, or pointer or
+  // reference to such types.
+  const ReferenceType *RefTy = HandlerType->getAs<ReferenceType>();
+  if (RefTy)
+    HandlerType = RefTy->getPointeeType();
+
+  //   -- the handler is of type cv T or cv T& and E and T are the same type
+  if (Context.hasSameUnqualifiedType(ExceptionType, HandlerType))
+    return true;
+
+  // FIXME: ObjC pointer types?
+  if (HandlerType->isPointerType() || HandlerType->isMemberPointerType()) {
+    if (RefTy && (!HandlerType.isConstQualified() ||
+                  HandlerType.isVolatileQualified()))
+      return false;
+
+    // -- the handler is of type cv T or const T& where T is a pointer or
+    //    pointer to member type and E is std::nullptr_t
+    if (ExceptionType->isNullPtrType())
+      return true;
+
+    // -- the handler is of type cv T or const T& where T is a pointer or
+    //    pointer to member type and E is a pointer or pointer to member type
+    //    that can be converted to T by one or more of
+    //    -- a qualification conversion
+    //    -- a function pointer conversion
+    bool LifetimeConv;
+    QualType Result;
+    // FIXME: Should we treat the exception as catchable if a lifetime
+    // conversion is required?
+    if (IsQualificationConversion(ExceptionType, HandlerType, false,
+                                  LifetimeConv) ||
+        IsFunctionConversion(ExceptionType, HandlerType, Result))
+      return true;
+
+    //    -- a standard pointer conversion [...]
+    if (!ExceptionType->isPointerType() || !HandlerType->isPointerType())
+      return false;
+
+    // Handle the "qualification conversion" portion.
+    Qualifiers EQuals, HQuals;
+    ExceptionType = Context.getUnqualifiedArrayType(
+        ExceptionType->getPointeeType(), EQuals);
+    HandlerType = Context.getUnqualifiedArrayType(
+        HandlerType->getPointeeType(), HQuals);
+    if (!HQuals.compatiblyIncludes(EQuals))
+      return false;
+
+    if (HandlerType->isVoidType() && ExceptionType->isObjectType())
+      return true;
+
+    // The only remaining case is a derived-to-base conversion.
+  }
+
+  //   -- the handler is of type cg T or cv T& and T is an unambiguous public
+  //      base class of E
+  if (!ExceptionType->isRecordType() || !HandlerType->isRecordType())
+    return false;
+  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                     /*DetectVirtual=*/false);
+  if (!IsDerivedFrom(SourceLocation(), ExceptionType, HandlerType, Paths) ||
+      Paths.isAmbiguous(Context.getCanonicalType(HandlerType)))
+    return false;
+
+  // Do this check from a context without privileges.
+  switch (CheckBaseClassAccess(SourceLocation(), HandlerType, ExceptionType,
+                               Paths.front(),
+                               /*Diagnostic*/ 0,
+                               /*ForceCheck*/ true,
+                               /*ForceUnprivileged*/ true)) {
+  case AR_accessible: return true;
+  case AR_inaccessible: return false;
+  case AR_dependent:
+    llvm_unreachable("access check dependent for unprivileged context");
+  case AR_delayed:
+    llvm_unreachable("access check delayed in non-declaration");
+  }
+  llvm_unreachable("unexpected access check result");
+}
+
 /// CheckExceptionSpecSubset - Check whether the second function type's
 /// exception specification is a subset (or equivalent) of the first function
 /// type. This is used by override and pointer assignment checks.
@@ -722,75 +806,23 @@
          "Exception spec subset: non-dynamic case slipped through.");
 
   // Neither contains everything or nothing. Do a proper comparison.
-  for (const auto &SubI : Subset->exceptions()) {
-    // Take one type from the subset.
-    QualType CanonicalSubT = Context.getCanonicalType(SubI);
-    // Unwrap pointers and references so that we can do checks within a class
-    // hierarchy. Don't unwrap member pointers; they don't have hierarchy
-    // conversions on the pointee.
-    bool SubIsPointer = false;
-    if (const ReferenceType *RefTy = CanonicalSubT->getAs<ReferenceType>())
-      CanonicalSubT = RefTy->getPointeeType();
-    if (const PointerType *PtrTy = CanonicalSubT->getAs<PointerType>()) {
-      CanonicalSubT = PtrTy->getPointeeType();
-      SubIsPointer = true;
-    }
-    bool SubIsClass = CanonicalSubT->isRecordType();
-    CanonicalSubT = CanonicalSubT.getLocalUnqualifiedType();
+  for (QualType SubI : Subset->exceptions()) {
+    if (const ReferenceType *RefTy = SubI->getAs<ReferenceType>())
+      SubI = RefTy->getPointeeType();
 
-    CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
-                       /*DetectVirtual=*/false);
-
-    bool Contained = false;
     // Make sure it's in the superset.
-    for (const auto &SuperI : Superset->exceptions()) {
-      QualType CanonicalSuperT = Context.getCanonicalType(SuperI);
-      // SubT must be SuperT or derived from it, or pointer or reference to
-      // such types.
-      if (const ReferenceType *RefTy = CanonicalSuperT->getAs<ReferenceType>())
-        CanonicalSuperT = RefTy->getPointeeType();
-      if (SubIsPointer) {
-        if (const PointerType *PtrTy = CanonicalSuperT->getAs<PointerType>())
-          CanonicalSuperT = PtrTy->getPointeeType();
-        else {
-          continue;
-        }
-      }
-      CanonicalSuperT = CanonicalSuperT.getLocalUnqualifiedType();
-      // If the types are the same, move on to the next type in the subset.
-      if (CanonicalSubT == CanonicalSuperT) {
+    bool Contained = false;
+    for (QualType SuperI : Superset->exceptions()) {
+      // [except.spec]p5:
+      //   the target entity shall allow at least the exceptions allowed by the
+      //   source
+      //
+      // We interpret this as meaning that a handler for some target type would
+      // catch an exception of each source type.
+      if (handlerCanCatch(SuperI, SubI)) {
         Contained = true;
         break;
       }
-
-      // Otherwise we need to check the inheritance.
-      if (!SubIsClass || !CanonicalSuperT->isRecordType())
-        continue;
-
-      Paths.clear();
-      if (!IsDerivedFrom(SubLoc, CanonicalSubT, CanonicalSuperT, Paths))
-        continue;
-
-      if (Paths.isAmbiguous(Context.getCanonicalType(CanonicalSuperT)))
-        continue;
-
-      // Do this check from a context without privileges.
-      switch (CheckBaseClassAccess(SourceLocation(),
-                                   CanonicalSuperT, CanonicalSubT,
-                                   Paths.front(),
-                                   /*Diagnostic*/ 0,
-                                   /*ForceCheck*/ true,
-                                   /*ForceUnprivileged*/ true)) {
-      case AR_accessible: break;
-      case AR_inaccessible: continue;
-      case AR_dependent:
-        llvm_unreachable("access check dependent for unprivileged context");
-      case AR_delayed:
-        llvm_unreachable("access check delayed in non-declaration");
-      }
-
-      Contained = true;
-      break;
     }
     if (!Contained) {
       Diag(SubLoc, DiagID);
@@ -872,7 +904,7 @@
   // This is not an error in C++17 onwards, unless the noexceptness doesn't
   // match, but in that case we have a full-on type mismatch, not just a
   // type sugar mismatch.
-  if (getLangOpts().CPlusPlus1z) {
+  if (getLangOpts().CPlusPlus17) {
     DiagID = diag::warn_incompatible_exception_specs;
     NestedDiagID = diag::warn_deep_exception_specs_differ;
   }
@@ -892,7 +924,7 @@
   return CheckExceptionSpecSubset(PDiag(DiagID), PDiag(NestedDiagID), PDiag(),
                                   ToFunc, From->getSourceRange().getBegin(),
                                   FromFunc, SourceLocation()) &&
-         !getLangOpts().CPlusPlus1z;
+         !getLangOpts().CPlusPlus17;
 }
 
 bool Sema::CheckOverridingFunctionExceptionSpec(const CXXMethodDecl *New,
@@ -953,7 +985,7 @@
   QualType T;
 
   // In C++1z, just look at the function type of the callee.
-  if (S.getLangOpts().CPlusPlus1z && isa<CallExpr>(E)) {
+  if (S.getLangOpts().CPlusPlus17 && isa<CallExpr>(E)) {
     E = cast<CallExpr>(E)->getCallee();
     T = E->getType();
     if (T->isSpecificPlaceholderType(BuiltinType::BoundMember)) {
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 1d8b9bc..8c22c0e 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -776,6 +776,9 @@
     return VAK_Valid;
   }
 
+  if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct)
+    return VAK_Invalid;
+
   if (Ty.isCXX98PODType(Context))
     return VAK_Valid;
 
@@ -817,7 +820,7 @@
         E->getLocStart(), nullptr,
         PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg)
           << Ty << CT);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case VAK_Valid:
     if (Ty->isRecordType()) {
       // This is unlikely to be what the user intended. If the class has a
@@ -837,7 +840,10 @@
     break;
 
   case VAK_Invalid:
-    if (Ty->isObjCObjectType())
+    if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct)
+      Diag(E->getLocStart(),
+           diag::err_cannot_pass_non_trivial_c_struct_to_vararg) << Ty << CT;
+    else if (Ty->isObjCObjectType())
       DiagRuntimeBehavior(
           E->getLocStart(), nullptr,
           PDiag(diag::err_cannot_pass_objc_interface_to_vararg)
@@ -1092,13 +1098,12 @@
   Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy &&
                             RHSElemType == S.Context.Float128Ty);
 
-  /* We've handled the situation where __float128 and long double have the same
-     representation. The only other allowable conversion is if long double is
-     really just double.
-  */
+  // We've handled the situation where __float128 and long double have the same
+  // representation. We allow all conversions for all possible long double types
+  // except PPC's double double.
   return Float128AndLongDouble &&
-    (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) !=
-     &llvm::APFloat::IEEEdouble());
+    (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) ==
+     &llvm::APFloat::PPCDoubleDouble());
 }
 
 typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType);
@@ -1711,7 +1716,7 @@
                              TemplateArgumentListInfo &Buffer,
                              DeclarationNameInfo &NameInfo,
                              const TemplateArgumentListInfo *&TemplateArgs) {
-  if (Id.getKind() == UnqualifiedId::IK_TemplateId) {
+  if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) {
     Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc);
     Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc);
 
@@ -2070,9 +2075,10 @@
                                       IsAddressOfOperand, TemplateArgs);
 
   // Perform the required lookup.
-  LookupResult R(*this, NameInfo, 
-                 (Id.getKind() == UnqualifiedId::IK_ImplicitSelfParam) 
-                  ? LookupObjCImplicitSelfParam : LookupOrdinaryName);
+  LookupResult R(*this, NameInfo,
+                 (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam)
+                     ? LookupObjCImplicitSelfParam
+                     : LookupOrdinaryName);
   if (TemplateArgs) {
     // Lookup the template name again to correctly establish the context in
     // which it was found. This is really unfortunate as we already did the
@@ -2243,7 +2249,7 @@
     // In C++1y, if this is a variable template id, then check it
     // in BuildTemplateIdExpr().
     // The single lookup result must be a variable template declaration.
-    if (Id.getKind() == UnqualifiedId::IK_TemplateId && Id.TemplateId &&
+    if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId &&
         Id.TemplateId->Kind == TNK_Var_template) {
       assert(R.getAsSingle<VarTemplateDecl>() &&
              "There should only be one declaration found.");
@@ -2401,7 +2407,7 @@
       IdentifierInfo &II = Context.Idents.get("self");
       UnqualifiedId SelfName;
       SelfName.setIdentifier(&II, SourceLocation());
-      SelfName.setKind(UnqualifiedId::IK_ImplicitSelfParam);
+      SelfName.setKind(UnqualifiedIdKind::IK_ImplicitSelfParam);
       CXXScopeSpec SelfScopeSpec;
       SourceLocation TemplateKWLoc;
       ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc,
@@ -2881,7 +2887,7 @@
         valueKind = VK_RValue;
         break;
       }
-      // fallthrough
+      LLVM_FALLTHROUGH;
 
     case Decl::ImplicitParam:
     case Decl::ParmVar: {
@@ -2978,7 +2984,7 @@
         valueKind = VK_LValue;
         break;
       }
-      // fallthrough
+      LLVM_FALLTHROUGH;
 
     case Decl::CXXConversion:
     case Decl::CXXDestructor:
@@ -4395,8 +4401,13 @@
     if (VK != VK_RValue)
       OK = OK_VectorComponent;
 
-    // FIXME: need to deal with const...
     ResultType = VTy->getElementType();
+    QualType BaseType = BaseExpr->getType();
+    Qualifiers BaseQuals = BaseType.getQualifiers();
+    Qualifiers MemberQuals = ResultType.getQualifiers();
+    Qualifiers Combined = BaseQuals + MemberQuals;
+    if (Combined != MemberQuals)
+      ResultType = Context.getQualifiedType(ResultType, Combined);
   } else if (LHSTy->isArrayType()) {
     // If we see an array that wasn't promoted by
     // DefaultFunctionArrayLvalueConversion, it must be an array that
@@ -4830,6 +4841,10 @@
                (!Param || !Param->hasAttr<CFConsumedAttr>()))
         CFAudited = true;
 
+      if (Proto->getExtParameterInfo(i).isNoEscape())
+        if (auto *BE = dyn_cast<BlockExpr>(Arg->IgnoreParenNoopCasts(Context)))
+          BE->getBlockDecl()->setDoesNotEscape();
+
       InitializedEntity Entity =
           Param ? InitializedEntity::InitializeParameter(Context, Param,
                                                          ProtoArgType)
@@ -7242,6 +7257,16 @@
       commonExpr = commonRes.get();
     }
 
+    // If the common expression is a class or array prvalue, materialize it
+    // so that we can safely refer to it multiple times.
+    if (commonExpr->isRValue() && (commonExpr->getType()->isRecordType() ||
+                                   commonExpr->getType()->isArrayType())) {
+      ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr);
+      if (MatExpr.isInvalid())
+        return ExprError();
+      commonExpr = MatExpr.get();
+    }
+
     opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(),
                                                 commonExpr->getType(),
                                                 commonExpr->getValueKind(),
@@ -7501,7 +7526,7 @@
   // usually happen on valid code.
   OpaqueValueExpr RHSExpr(Loc, RHSType, VK_RValue);
   ExprResult RHSPtr = &RHSExpr;
-  CastKind K = CK_Invalid;
+  CastKind K;
 
   return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false);
 }
@@ -7893,7 +7918,7 @@
       }
     }
 
-    CastKind Kind = CK_Invalid;
+    CastKind Kind;
     if (CheckAssignmentConstraints(it->getType(), RHS, Kind)
           == Compatible) {
       RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind);
@@ -8009,7 +8034,7 @@
     }
   }
   
-  CastKind Kind = CK_Invalid;
+  CastKind Kind;
   Sema::AssignConvertType result =
     CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS);
 
@@ -8104,7 +8129,7 @@
                                      unsigned &DiagID) {
   // The conversion to apply to the scalar before splatting it,
   // if necessary.
-  CastKind scalarCast = CK_Invalid;
+  CastKind scalarCast = CK_NoOp;
   
   if (vectorEltTy->isIntegralType(S.Context)) {
     if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() ||
@@ -8135,7 +8160,7 @@
 
   // Adjust scalar if desired.
   if (scalar) {
-    if (scalarCast != CK_Invalid)
+    if (scalarCast != CK_NoOp)
       *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast);
     *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat);
   }
@@ -9289,16 +9314,6 @@
   return LHSType;
 }
 
-static bool IsWithinTemplateSpecialization(Decl *D) {
-  if (DeclContext *DC = D->getDeclContext()) {
-    if (isa<ClassTemplateSpecializationDecl>(DC))
-      return true;
-    if (FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
-      return FD->isFunctionTemplateSpecialization();
-  }
-  return false;
-}
-
 /// If two different enums are compared, raise a warning.
 static void checkEnumComparison(Sema &S, SourceLocation Loc, Expr *LHS,
                                 Expr *RHS) {
@@ -9576,137 +9591,181 @@
 // Get the decl for a simple expression: a reference to a variable,
 // an implicit C++ field reference, or an implicit ObjC ivar reference.
 static ValueDecl *getCompareDecl(Expr *E) {
-  if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(E))
+  if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
     return DR->getDecl();
-  if (ObjCIvarRefExpr* Ivar = dyn_cast<ObjCIvarRefExpr>(E)) {
+  if (ObjCIvarRefExpr *Ivar = dyn_cast<ObjCIvarRefExpr>(E)) {
     if (Ivar->isFreeIvar())
       return Ivar->getDecl();
   }
-  if (MemberExpr* Mem = dyn_cast<MemberExpr>(E)) {
+  if (MemberExpr *Mem = dyn_cast<MemberExpr>(E)) {
     if (Mem->isImplicitAccess())
       return Mem->getMemberDecl();
   }
   return nullptr;
 }
 
+/// Diagnose some forms of syntactically-obvious tautological comparison.
+static void diagnoseTautologicalComparison(Sema &S, SourceLocation Loc,
+                                           Expr *LHS, Expr *RHS,
+                                           BinaryOperatorKind Opc) {
+  Expr *LHSStripped = LHS->IgnoreParenImpCasts();
+  Expr *RHSStripped = RHS->IgnoreParenImpCasts();
+
+  QualType LHSType = LHS->getType();
+  if (LHSType->hasFloatingRepresentation() ||
+      (LHSType->isBlockPointerType() && !BinaryOperator::isEqualityOp(Opc)) ||
+      LHS->getLocStart().isMacroID() || RHS->getLocStart().isMacroID() ||
+      S.inTemplateInstantiation())
+    return;
+
+  // For non-floating point types, check for self-comparisons of the form
+  // x == x, x != x, x < x, etc.  These always evaluate to a constant, and
+  // often indicate logic errors in the program.
+  //
+  // NOTE: Don't warn about comparison expressions resulting from macro
+  // expansion. Also don't warn about comparisons which are only self
+  // comparisons within a template instantiation. The warnings should catch
+  // obvious cases in the definition of the template anyways. The idea is to
+  // warn when the typed comparison operator will always evaluate to the same
+  // result.
+  ValueDecl *DL = getCompareDecl(LHSStripped);
+  ValueDecl *DR = getCompareDecl(RHSStripped);
+  if (DL && DR && declaresSameEntity(DL, DR)) {
+    StringRef Result;
+    switch (Opc) {
+    case BO_EQ: case BO_LE: case BO_GE:
+      Result = "true";
+      break;
+    case BO_NE: case BO_LT: case BO_GT:
+      Result = "false";
+      break;
+    case BO_Cmp:
+      Result = "'std::strong_ordering::equal'";
+      break;
+    default:
+      break;
+    }
+    S.DiagRuntimeBehavior(Loc, nullptr,
+                          S.PDiag(diag::warn_comparison_always)
+                              << 0 /*self-comparison*/ << !Result.empty()
+                              << Result);
+  } else if (DL && DR &&
+             DL->getType()->isArrayType() && DR->getType()->isArrayType() &&
+             !DL->isWeak() && !DR->isWeak()) {
+    // What is it always going to evaluate to?
+    StringRef Result;
+    switch(Opc) {
+    case BO_EQ: // e.g. array1 == array2
+      Result = "false";
+      break;
+    case BO_NE: // e.g. array1 != array2
+      Result = "true";
+      break;
+    default: // e.g. array1 <= array2
+      // The best we can say is 'a constant'
+      break;
+    }
+    S.DiagRuntimeBehavior(Loc, nullptr,
+                          S.PDiag(diag::warn_comparison_always)
+                              << 1 /*array comparison*/
+                              << !Result.empty() << Result);
+  }
+
+  if (isa<CastExpr>(LHSStripped))
+    LHSStripped = LHSStripped->IgnoreParenCasts();
+  if (isa<CastExpr>(RHSStripped))
+    RHSStripped = RHSStripped->IgnoreParenCasts();
+
+  // Warn about comparisons against a string constant (unless the other
+  // operand is null); the user probably wants strcmp.
+  Expr *LiteralString = nullptr;
+  Expr *LiteralStringStripped = nullptr;
+  if ((isa<StringLiteral>(LHSStripped) || isa<ObjCEncodeExpr>(LHSStripped)) &&
+      !RHSStripped->isNullPointerConstant(S.Context,
+                                          Expr::NPC_ValueDependentIsNull)) {
+    LiteralString = LHS;
+    LiteralStringStripped = LHSStripped;
+  } else if ((isa<StringLiteral>(RHSStripped) ||
+              isa<ObjCEncodeExpr>(RHSStripped)) &&
+             !LHSStripped->isNullPointerConstant(S.Context,
+                                          Expr::NPC_ValueDependentIsNull)) {
+    LiteralString = RHS;
+    LiteralStringStripped = RHSStripped;
+  }
+
+  if (LiteralString) {
+    S.DiagRuntimeBehavior(Loc, nullptr,
+                          S.PDiag(diag::warn_stringcompare)
+                              << isa<ObjCEncodeExpr>(LiteralStringStripped)
+                              << LiteralString->getSourceRange());
+  }
+}
+
+static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS,
+                                                 ExprResult &RHS,
+                                                 SourceLocation Loc,
+                                                 BinaryOperatorKind Opc) {
+  // C99 6.5.8p3 / C99 6.5.9p4
+  QualType Type = S.UsualArithmeticConversions(LHS, RHS);
+  if (LHS.isInvalid() || RHS.isInvalid())
+    return QualType();
+  if (Type.isNull())
+    return S.InvalidOperands(Loc, LHS, RHS);
+  assert(Type->isArithmeticType() || Type->isEnumeralType());
+
+  checkEnumComparison(S, Loc, LHS.get(), RHS.get());
+
+  enum { StrongEquality, PartialOrdering, StrongOrdering } Ordering;
+  if (Type->isAnyComplexType())
+    Ordering = StrongEquality;
+  else if (Type->isFloatingType())
+    Ordering = PartialOrdering;
+  else
+    Ordering = StrongOrdering;
+
+  if (Ordering == StrongEquality && BinaryOperator::isRelationalOp(Opc))
+    return S.InvalidOperands(Loc, LHS, RHS);
+
+  // Check for comparisons of floating point operands using != and ==.
+  if (Type->hasFloatingRepresentation() && BinaryOperator::isEqualityOp(Opc))
+    S.CheckFloatComparison(Loc, LHS.get(), RHS.get());
+
+  // The result of comparisons is 'bool' in C++, 'int' in C.
+  // FIXME: For BO_Cmp, return the relevant comparison category type.
+  return S.Context.getLogicalOperationType();
+}
+
 // C99 6.5.8, C++ [expr.rel]
 QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
                                     SourceLocation Loc, BinaryOperatorKind Opc,
                                     bool IsRelational) {
+  // Comparisons expect an rvalue, so convert to rvalue before any
+  // type-related checks.
+  LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
+  if (LHS.isInvalid())
+    return QualType();
+  RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
+  if (RHS.isInvalid())
+    return QualType();
+
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true);
 
   // Handle vector comparisons separately.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
-    return CheckVectorCompareOperands(LHS, RHS, Loc, IsRelational);
+    return CheckVectorCompareOperands(LHS, RHS, Loc, Opc);
+
+  diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);
+  diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc);
 
   QualType LHSType = LHS.get()->getType();
   QualType RHSType = RHS.get()->getType();
+  if ((LHSType->isArithmeticType() || LHSType->isEnumeralType()) &&
+      (RHSType->isArithmeticType() || RHSType->isEnumeralType()))
+    return checkArithmeticOrEnumeralCompare(*this, LHS, RHS, Loc, Opc);
 
-  Expr *LHSStripped = LHS.get()->IgnoreParenImpCasts();
-  Expr *RHSStripped = RHS.get()->IgnoreParenImpCasts();
-
-  checkEnumComparison(*this, Loc, LHS.get(), RHS.get());
-  diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);
-
-  if (!LHSType->hasFloatingRepresentation() &&
-      !(LHSType->isBlockPointerType() && IsRelational) &&
-      !LHS.get()->getLocStart().isMacroID() &&
-      !RHS.get()->getLocStart().isMacroID() &&
-      !inTemplateInstantiation()) {
-    // For non-floating point types, check for self-comparisons of the form
-    // x == x, x != x, x < x, etc.  These always evaluate to a constant, and
-    // often indicate logic errors in the program.
-    //
-    // NOTE: Don't warn about comparison expressions resulting from macro
-    // expansion. Also don't warn about comparisons which are only self
-    // comparisons within a template specialization. The warnings should catch
-    // obvious cases in the definition of the template anyways. The idea is to
-    // warn when the typed comparison operator will always evaluate to the same
-    // result.
-    ValueDecl *DL = getCompareDecl(LHSStripped);
-    ValueDecl *DR = getCompareDecl(RHSStripped);
-    if (DL && DR && DL == DR && !IsWithinTemplateSpecialization(DL)) {
-      DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always)
-                          << 0 // self-
-                          << (Opc == BO_EQ
-                              || Opc == BO_LE
-                              || Opc == BO_GE));
-    } else if (DL && DR && LHSType->isArrayType() && RHSType->isArrayType() &&
-               !DL->getType()->isReferenceType() &&
-               !DR->getType()->isReferenceType()) {
-        // what is it always going to eval to?
-        char always_evals_to;
-        switch(Opc) {
-        case BO_EQ: // e.g. array1 == array2
-          always_evals_to = 0; // false
-          break;
-        case BO_NE: // e.g. array1 != array2
-          always_evals_to = 1; // true
-          break;
-        default:
-          // best we can say is 'a constant'
-          always_evals_to = 2; // e.g. array1 <= array2
-          break;
-        }
-        DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always)
-                            << 1 // array
-                            << always_evals_to);
-    }
-
-    if (isa<CastExpr>(LHSStripped))
-      LHSStripped = LHSStripped->IgnoreParenCasts();
-    if (isa<CastExpr>(RHSStripped))
-      RHSStripped = RHSStripped->IgnoreParenCasts();
-
-    // Warn about comparisons against a string constant (unless the other
-    // operand is null), the user probably wants strcmp.
-    Expr *literalString = nullptr;
-    Expr *literalStringStripped = nullptr;
-    if ((isa<StringLiteral>(LHSStripped) || isa<ObjCEncodeExpr>(LHSStripped)) &&
-        !RHSStripped->isNullPointerConstant(Context,
-                                            Expr::NPC_ValueDependentIsNull)) {
-      literalString = LHS.get();
-      literalStringStripped = LHSStripped;
-    } else if ((isa<StringLiteral>(RHSStripped) ||
-                isa<ObjCEncodeExpr>(RHSStripped)) &&
-               !LHSStripped->isNullPointerConstant(Context,
-                                            Expr::NPC_ValueDependentIsNull)) {
-      literalString = RHS.get();
-      literalStringStripped = RHSStripped;
-    }
-
-    if (literalString) {
-      DiagRuntimeBehavior(Loc, nullptr,
-        PDiag(diag::warn_stringcompare)
-          << isa<ObjCEncodeExpr>(literalStringStripped)
-          << literalString->getSourceRange());
-    }
-  }
-
-  // C99 6.5.8p3 / C99 6.5.9p4
-  UsualArithmeticConversions(LHS, RHS);
-  if (LHS.isInvalid() || RHS.isInvalid())
-    return QualType();
-
-  LHSType = LHS.get()->getType();
-  RHSType = RHS.get()->getType();
-
-  // The result of comparisons is 'bool' in C++, 'int' in C.
   QualType ResultTy = Context.getLogicalOperationType();
 
-  if (IsRelational) {
-    if (LHSType->isRealType() && RHSType->isRealType())
-      return ResultTy;
-  } else {
-    // Check for comparisons of floating point operands using != and ==.
-    if (LHSType->hasFloatingRepresentation())
-      CheckFloatComparison(Loc, LHS.get(), RHS.get());
-
-    if (LHSType->isArithmeticType() && RHSType->isArithmeticType())
-      return ResultTy;
-  }
-
   const Expr::NullPointerConstantKind LHSNullKind =
       LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull);
   const Expr::NullPointerConstantKind RHSNullKind =
@@ -9880,13 +9939,6 @@
       else
         return ResultTy;
     }
-
-    // Handle scoped enumeration types specifically, since they don't promote
-    // to integers.
-    if (LHS.get()->getType()->isEnumeralType() &&
-        Context.hasSameUnqualifiedType(LHS.get()->getType(),
-                                       RHS.get()->getType()))
-      return ResultTy;
   }
 
   // Handle block pointer types.
@@ -10090,7 +10142,7 @@
 /// types.
 QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS,
                                           SourceLocation Loc,
-                                          bool IsRelational) {
+                                          BinaryOperatorKind Opc) {
   // Check to make sure we're operating on vectors of the same type and width,
   // Allowing one side to be a scalar of element type.
   QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/false,
@@ -10110,22 +10162,12 @@
   // For non-floating point types, check for self-comparisons of the form
   // x == x, x != x, x < x, etc.  These always evaluate to a constant, and
   // often indicate logic errors in the program.
-  if (!LHSType->hasFloatingRepresentation() && !inTemplateInstantiation()) {
-    if (DeclRefExpr* DRL
-          = dyn_cast<DeclRefExpr>(LHS.get()->IgnoreParenImpCasts()))
-      if (DeclRefExpr* DRR
-            = dyn_cast<DeclRefExpr>(RHS.get()->IgnoreParenImpCasts()))
-        if (DRL->getDecl() == DRR->getDecl())
-          DiagRuntimeBehavior(Loc, nullptr,
-                              PDiag(diag::warn_comparison_always)
-                                << 0 // self-
-                                << 2 // "a constant"
-                              );
-  }
+  diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc);
 
   // Check for comparisons of floating point operands using != and ==.
-  if (!IsRelational && LHSType->hasFloatingRepresentation()) {
-    assert (RHS.get()->getType()->hasFloatingRepresentation());
+  if (BinaryOperator::isEqualityOp(Opc) &&
+      LHSType->hasFloatingRepresentation()) {
+    assert(RHS.get()->getType()->hasFloatingRepresentation());
     CheckFloatComparison(Loc, LHS.get(), RHS.get());
   }
 
@@ -10407,8 +10449,16 @@
         // Static fields do not inherit constness from parents.
         break;
       }
-      break;
-    } // End MemberExpr
+      break; // End MemberExpr
+    } else if (const ArraySubscriptExpr *ASE =
+                   dyn_cast<ArraySubscriptExpr>(E)) {
+      E = ASE->getBase()->IgnoreParenImpCasts();
+      continue;
+    } else if (const ExtVectorElementExpr *EVE =
+                   dyn_cast<ExtVectorElementExpr>(E)) {
+      E = EVE->getBase()->IgnoreParenImpCasts();
+      continue;
+    }
     break;
   }
 
@@ -10909,7 +10959,7 @@
       return QualType();
     }
     // Increment of bool sets it to true, but is deprecated.
-    S.Diag(OpLoc, S.getLangOpts().CPlusPlus1z ? diag::ext_increment_bool
+    S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool
                                               : diag::warn_increment_bool)
       << Op->getSourceRange();
   } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) {
@@ -11365,6 +11415,7 @@
   case tok::greater:              Opc = BO_GT; break;
   case tok::exclaimequal:         Opc = BO_NE; break;
   case tok::equalequal:           Opc = BO_EQ; break;
+  case tok::spaceship:            Opc = BO_Cmp; break;
   case tok::amp:                  Opc = BO_And; break;
   case tok::caret:                Opc = BO_Xor; break;
   case tok::pipe:                 Opc = BO_Or; break;
@@ -11555,7 +11606,8 @@
 static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx,
                                      QualType SrcType) {
   return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType &&
-         Ctx.getLangOpts().HalfArgsAndReturns && isVector(SrcType, Ctx.HalfTy);
+         !Ctx.getTargetInfo().useFP16ConversionIntrinsics() &&
+         isVector(SrcType, Ctx.HalfTy);
 }
 
 /// CreateBuiltinBinOp - Creates a new built-in binary operation with
@@ -11571,8 +11623,8 @@
     // C++11 5.17p9:
     //   The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning
     //   of x = {} is x = T().
-    InitializationKind Kind =
-        InitializationKind::CreateDirectList(RHSExpr->getLocStart());
+    InitializationKind Kind = InitializationKind::CreateDirectList(
+        RHSExpr->getLocStart(), RHSExpr->getLocStart(), RHSExpr->getLocEnd());
     InitializedEntity Entity =
         InitializedEntity::InitializeTemporary(LHSExpr->getType());
     InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
@@ -11672,6 +11724,13 @@
     ConvertHalfVec = true;
     ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, false);
     break;
+  case BO_Cmp:
+    // FIXME: Implement proper semantic checking of '<=>'.
+    ConvertHalfVec = true;
+    ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true);
+    if (!ResultTy.isNull())
+      ResultTy = Context.VoidTy;
+    break;
   case BO_And:
     checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc);
     LLVM_FALLTHROUGH;
@@ -12141,6 +12200,16 @@
   return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr);
 }
 
+static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) {
+  if (T.isNull() || T->isDependentType())
+    return false;
+
+  if (!T->isPromotableIntegerType())
+    return true;
+
+  return Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy);
+}
+
 ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
                                       UnaryOperatorKind Opc,
                                       Expr *InputExpr) {
@@ -12148,6 +12217,8 @@
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   QualType resultType;
+  bool CanOverflow = false;
+
   bool ConvertHalfVec = false;
   if (getLangOpts().OpenCL) {
     QualType Ty = InputExpr->getType();
@@ -12173,6 +12244,7 @@
                                                 Opc == UO_PostInc,
                                                 Opc == UO_PreInc ||
                                                 Opc == UO_PreDec);
+    CanOverflow = isOverflowingIntegerType(Context, resultType);
     break;
   case UO_AddrOf:
     resultType = CheckAddressOfOperand(Input, OpLoc);
@@ -12186,6 +12258,8 @@
   }
   case UO_Plus:
   case UO_Minus:
+    CanOverflow = Opc == UO_Minus &&
+                  isOverflowingIntegerType(Context, Input.get()->getType());
     Input = UsualUnaryConversions(Input.get());
     if (Input.isInvalid()) return ExprError();
     // Unary plus and minus require promoting an operand of half vector to a
@@ -12222,6 +12296,7 @@
     if (Input.isInvalid())
       return ExprError();
     resultType = Input.get()->getType();
+
     if (resultType->isDependentType())
       break;
     // C99 6.5.3.3p1. We allow complex int and float as a GCC extension.
@@ -12336,7 +12411,7 @@
     CheckArrayAccess(Input.get());
 
   auto *UO = new (Context)
-      UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc);
+      UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc, CanOverflow);
   // Convert the result back to a half vector.
   if (ConvertHalfVec)
     return convertVector(UO, Context.HalfTy, *this);
@@ -12578,15 +12653,7 @@
       && RequireCompleteType(BuiltinLoc, ArgTy,
                              diag::err_offsetof_incomplete_type, TypeRange))
     return ExprError();
-  
-  // offsetof with non-identifier designators (e.g. "offsetof(x, a.b[c])") are a
-  // GCC extension, diagnose them.
-  // FIXME: This diagnostic isn't actually visible because the location is in
-  // a system header!
-  if (Components.size() != 1)
-    Diag(BuiltinLoc, diag::ext_offsetof_extended_field_designator)
-      << SourceRange(Components[1].LocStart, Components.back().LocEnd);
-  
+
   bool DidWarnAboutNonPOD = false;
   QualType CurrentType = ArgTy;
   SmallVector<OffsetOfNode, 4> Comps;
@@ -12823,7 +12890,7 @@
                                Scope *CurScope) {
   assert(ParamInfo.getIdentifier() == nullptr &&
          "block-id should have no identifier!");
-  assert(ParamInfo.getContext() == Declarator::BlockLiteralContext);
+  assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteralContext);
   BlockScopeInfo *CurBlock = getCurBlock();
 
   TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope);
@@ -12849,8 +12916,8 @@
   // Look for an explicit signature in that function type.
   FunctionProtoTypeLoc ExplicitSignature;
 
-  TypeLoc tmp = Sig->getTypeLoc().IgnoreParens();
-  if ((ExplicitSignature = tmp.getAs<FunctionProtoTypeLoc>())) {
+  if ((ExplicitSignature =
+           Sig->getTypeLoc().getAsAdjusted<FunctionProtoTypeLoc>())) {
 
     // Check whether that explicit signature was synthesized by
     // GetTypeForDeclarator.  If so, don't save that as part of the
@@ -13733,7 +13800,7 @@
       // are part of function-signatures.  Be mindful that P0315 (Lambdas in
       // unevaluated contexts) might lift some of these restrictions in a 
       // future version.
-      if (!Rec.isConstantEvaluated() || !getLangOpts().CPlusPlus1z)
+      if (!Rec.isConstantEvaluated() || !getLangOpts().CPlusPlus17)
         for (const auto *L : Rec.Lambdas)
           Diag(L->getLocStart(), D);
     } else {
@@ -13958,29 +14025,21 @@
   // Implicit instantiation of function templates and member functions of
   // class templates.
   if (Func->isImplicitlyInstantiable()) {
-    bool AlreadyInstantiated = false;
-    SourceLocation PointOfInstantiation = Loc;
-    if (FunctionTemplateSpecializationInfo *SpecInfo
-                              = Func->getTemplateSpecializationInfo()) {
-      if (SpecInfo->getPointOfInstantiation().isInvalid())
-        SpecInfo->setPointOfInstantiation(Loc);
-      else if (SpecInfo->getTemplateSpecializationKind()
-                 == TSK_ImplicitInstantiation) {
-        AlreadyInstantiated = true;
-        PointOfInstantiation = SpecInfo->getPointOfInstantiation();
-      }
-    } else if (MemberSpecializationInfo *MSInfo
-                                = Func->getMemberSpecializationInfo()) {
-      if (MSInfo->getPointOfInstantiation().isInvalid())
-        MSInfo->setPointOfInstantiation(Loc);
-      else if (MSInfo->getTemplateSpecializationKind()
-                 == TSK_ImplicitInstantiation) {
-        AlreadyInstantiated = true;
-        PointOfInstantiation = MSInfo->getPointOfInstantiation();
-      }
+    TemplateSpecializationKind TSK = Func->getTemplateSpecializationKind();
+    SourceLocation PointOfInstantiation = Func->getPointOfInstantiation();
+    bool FirstInstantiation = PointOfInstantiation.isInvalid();
+    if (FirstInstantiation) {
+      PointOfInstantiation = Loc;
+      Func->setTemplateSpecializationKind(TSK, PointOfInstantiation);
+    } else if (TSK != TSK_ImplicitInstantiation) {
+      // Use the point of use as the point of instantiation, instead of the
+      // point of explicit instantiation (which we track as the actual point of
+      // instantiation). This gives better backtraces in diagnostics.
+      PointOfInstantiation = Loc;
     }
 
-    if (!AlreadyInstantiated || Func->isConstexpr()) {
+    if (FirstInstantiation || TSK != TSK_ImplicitInstantiation ||
+        Func->isConstexpr()) {
       if (isa<CXXRecordDecl>(Func->getDeclContext()) &&
           cast<CXXRecordDecl>(Func->getDeclContext())->isLocalClass() &&
           CodeSynthesisContexts.size())
@@ -14342,8 +14401,13 @@
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
   if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
-    if (S.IsOpenMPCapturedDecl(Var))
+    if (S.IsOpenMPCapturedDecl(Var)) {
+      bool HasConst = DeclRefType.isConstQualified();
       DeclRefType = DeclRefType.getUnqualifiedType();
+      // Don't lose diagnostics about assignments to const.
+      if (HasConst)
+        DeclRefType.addConst();
+    }
     ByRef = S.IsOpenMPCapturedByRef(Var, RSI->OpenMPLevel);
   }
 
@@ -14620,14 +14684,16 @@
         // just break here. Similarly, global variables that are captured in a
         // target region should not be captured outside the scope of the region.
         if (RSI->CapRegionKind == CR_OpenMP) {
-          auto IsTargetCap = isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel);
+          bool IsOpenMPPrivateDecl = isOpenMPPrivateDecl(Var, RSI->OpenMPLevel);
+          auto IsTargetCap = !IsOpenMPPrivateDecl &&
+                             isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel);
           // When we detect target captures we are looking from inside the
           // target region, therefore we need to propagate the capture from the
           // enclosing region. Therefore, the capture is not initially nested.
           if (IsTargetCap)
-            FunctionScopesIndex--;
+            adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);
 
-          if (IsTargetCap || isOpenMPPrivateDecl(Var, RSI->OpenMPLevel)) {
+          if (IsTargetCap || IsOpenMPPrivateDecl) {
             Nested = !IsTargetCap;
             DeclRefType = DeclRefType.getUnqualifiedType();
             CaptureType = Context.getLValueReferenceType(DeclRefType);
@@ -14852,22 +14918,14 @@
         TSK == TSK_ImplicitInstantiation ||
         (TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr);
 
-    if (TryInstantiating && !isa<VarTemplateSpecializationDecl>(Var)) {
-      if (Var->getPointOfInstantiation().isInvalid()) {
-        // This is a modification of an existing AST node. Notify listeners.
-        if (ASTMutationListener *L = SemaRef.getASTMutationListener())
-          L->StaticDataMemberInstantiated(Var);
-      } else if (!UsableInConstantExpr)
-        // Don't bother trying to instantiate it again, unless we might need
-        // its initializer before we get to the end of the TU.
-        TryInstantiating = false;
-    }
-
-    if (Var->getPointOfInstantiation().isInvalid())
-      Var->setTemplateSpecializationKind(TSK, Loc);
-
     if (TryInstantiating) {
       SourceLocation PointOfInstantiation = Var->getPointOfInstantiation();
+      bool FirstInstantiation = PointOfInstantiation.isInvalid();
+      if (FirstInstantiation) {
+        PointOfInstantiation = Loc;
+        Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
+      }
+
       bool InstantiationDependent = false;
       bool IsNonDependent =
           VarSpec ? !TemplateSpecializationType::anyDependentTemplateArguments(
@@ -14877,10 +14935,16 @@
       // Do not instantiate specializations that are still type-dependent.
       if (IsNonDependent) {
         if (UsableInConstantExpr) {
-          // Do not defer instantiations of variables which could be used in a
+          // Do not defer instantiations of variables that could be used in a
           // constant expression.
           SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var);
-        } else {
+        } else if (FirstInstantiation ||
+                   isa<VarTemplateSpecializationDecl>(Var)) {
+          // FIXME: For a specialization of a variable template, we don't
+          // distinguish between "declaration and type implicitly instantiated"
+          // and "implicit instantiation of definition requested", so we have
+          // no direct way to avoid enqueueing the pending instantiation
+          // multiple times.
           SemaRef.PendingInstantiations
               .push_back(std::make_pair(Var, PointOfInstantiation));
         }
@@ -14917,7 +14981,8 @@
     if (RefersToEnclosingScope) {
       LambdaScopeInfo *const LSI =
           SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true);
-      if (LSI && !LSI->CallOperator->Encloses(Var->getDeclContext())) {
+      if (LSI && (!LSI->CallOperator ||
+                  !LSI->CallOperator->Encloses(Var->getDeclContext()))) {
         // If a variable could potentially be odr-used, defer marking it so
         // until we finish analyzing the full expression for any
         // lvalue-to-rvalue
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index 7f5b792..d3ab003 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -356,7 +356,7 @@
 
 bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS,
                                   const UnqualifiedId &Name) {
-  assert(Name.getKind() == UnqualifiedId::IK_LiteralOperatorId);
+  assert(Name.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId);
 
   if (!SS.isValid())
     return false;
@@ -1244,11 +1244,16 @@
   return Class && Class->isBeingDefined();
 }
 
+/// Parse construction of a specified type.
+/// Can be interpreted either as function-style casting ("int(x)")
+/// or class type construction ("ClassType(x,y,z)")
+/// or creation of a value-initialized type ("int()").
 ExprResult
 Sema::ActOnCXXTypeConstructExpr(ParsedType TypeRep,
-                                SourceLocation LParenLoc,
+                                SourceLocation LParenOrBraceLoc,
                                 MultiExprArg exprs,
-                                SourceLocation RParenLoc) {
+                                SourceLocation RParenOrBraceLoc,
+                                bool ListInitialization) {
   if (!TypeRep)
     return ExprError();
 
@@ -1257,7 +1262,8 @@
   if (!TInfo)
     TInfo = Context.getTrivialTypeSourceInfo(Ty, SourceLocation());
 
-  auto Result = BuildCXXTypeConstructExpr(TInfo, LParenLoc, exprs, RParenLoc);
+  auto Result = BuildCXXTypeConstructExpr(TInfo, LParenOrBraceLoc, exprs,
+                                          RParenOrBraceLoc, ListInitialization);
   // Avoid creating a non-type-dependent expression that contains typos.
   // Non-type-dependent expressions are liable to be discarded without
   // checking for embedded typos.
@@ -1267,38 +1273,40 @@
   return Result;
 }
 
-/// ActOnCXXTypeConstructExpr - Parse construction of a specified type.
-/// Can be interpreted either as function-style casting ("int(x)")
-/// or class type construction ("ClassType(x,y,z)")
-/// or creation of a value-initialized type ("int()").
 ExprResult
 Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
-                                SourceLocation LParenLoc,
+                                SourceLocation LParenOrBraceLoc,
                                 MultiExprArg Exprs,
-                                SourceLocation RParenLoc) {
+                                SourceLocation RParenOrBraceLoc,
+                                bool ListInitialization) {
   QualType Ty = TInfo->getType();
   SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc();
 
   if (Ty->isDependentType() || CallExpr::hasAnyTypeDependentArguments(Exprs)) {
-    return CXXUnresolvedConstructExpr::Create(Context, TInfo, LParenLoc, Exprs,
-                                              RParenLoc);
+    // FIXME: CXXUnresolvedConstructExpr does not model list-initialization
+    // directly. We work around this by dropping the locations of the braces.
+    SourceRange Locs = ListInitialization
+                           ? SourceRange()
+                           : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc);
+    return CXXUnresolvedConstructExpr::Create(Context, TInfo, Locs.getBegin(),
+                                              Exprs, Locs.getEnd());
   }
 
-  bool ListInitialization = LParenLoc.isInvalid();
   assert((!ListInitialization ||
           (Exprs.size() == 1 && isa<InitListExpr>(Exprs[0]))) &&
          "List initialization must have initializer list as expression.");
-  SourceRange FullRange = SourceRange(TyBeginLoc,
-      ListInitialization ? Exprs[0]->getSourceRange().getEnd() : RParenLoc);
+  SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc);
 
   InitializedEntity Entity = InitializedEntity::InitializeTemporary(TInfo);
   InitializationKind Kind =
       Exprs.size()
           ? ListInitialization
-                ? InitializationKind::CreateDirectList(TyBeginLoc)
-                : InitializationKind::CreateDirect(TyBeginLoc, LParenLoc,
-                                                   RParenLoc)
-          : InitializationKind::CreateValue(TyBeginLoc, LParenLoc, RParenLoc);
+                ? InitializationKind::CreateDirectList(
+                      TyBeginLoc, LParenOrBraceLoc, RParenOrBraceLoc)
+                : InitializationKind::CreateDirect(TyBeginLoc, LParenOrBraceLoc,
+                                                   RParenOrBraceLoc)
+          : InitializationKind::CreateValue(TyBeginLoc, LParenOrBraceLoc,
+                                            RParenOrBraceLoc);
 
   // C++1z [expr.type.conv]p1:
   //   If the type is a placeholder for a deduced class type, [...perform class
@@ -1319,7 +1327,8 @@
   if (Exprs.size() == 1 && !ListInitialization &&
       !isa<InitListExpr>(Exprs[0])) {
     Expr *Arg = Exprs[0];
-    return BuildCXXFunctionalCastExpr(TInfo, Ty, LParenLoc, Arg, RParenLoc);
+    return BuildCXXFunctionalCastExpr(TInfo, Ty, LParenOrBraceLoc, Arg,
+                                      RParenOrBraceLoc);
   }
 
   //   For an expression of the form T(), T shall not be an array type.
@@ -1367,9 +1376,12 @@
     // CXXTemporaryObjectExpr. It's also weird that the functional cast
     // is sometimes handled by initialization and sometimes not.
     QualType ResultType = Result.get()->getType();
+    SourceRange Locs = ListInitialization
+                           ? SourceRange()
+                           : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc);
     Result = CXXFunctionalCastExpr::Create(
-        Context, ResultType, Expr::getValueKindForType(Ty), TInfo,
-        CK_NoOp, Result.get(), /*Path=*/nullptr, LParenLoc, RParenLoc);
+        Context, ResultType, Expr::getValueKindForType(Ty), TInfo, CK_NoOp,
+        Result.get(), /*Path=*/nullptr, Locs.getBegin(), Locs.getEnd());
   }
 
   return Result;
@@ -1728,7 +1740,9 @@
   //     - Otherwise, the new-initializer is interpreted according to the
   //       initialization rules of 8.5 for direct-initialization.
         : initStyle == CXXNewExpr::ListInit
-            ? InitializationKind::CreateDirectList(TypeRange.getBegin())
+            ? InitializationKind::CreateDirectList(TypeRange.getBegin(),
+                                                   Initializer->getLocStart(),
+                                                   Initializer->getLocEnd())
             : InitializationKind::CreateDirect(TypeRange.getBegin(),
                                                DirectInitRange.getBegin(),
                                                DirectInitRange.getEnd());
@@ -1748,20 +1762,27 @@
     if (AllocType.isNull())
       return ExprError();
   } else if (Deduced) {
+    bool Braced = (initStyle == CXXNewExpr::ListInit);
+    if (NumInits == 1) {
+      if (auto p = dyn_cast_or_null<InitListExpr>(Inits[0])) {
+        Inits = p->getInits();
+        NumInits = p->getNumInits();
+        Braced = true;
+      }
+    }
+
     if (initStyle == CXXNewExpr::NoInit || NumInits == 0)
       return ExprError(Diag(StartLoc, diag::err_auto_new_requires_ctor_arg)
                        << AllocType << TypeRange);
-    if (initStyle == CXXNewExpr::ListInit ||
-        (NumInits == 1 && isa<InitListExpr>(Inits[0])))
-      return ExprError(Diag(Inits[0]->getLocStart(),
-                            diag::err_auto_new_list_init)
-                       << AllocType << TypeRange);
     if (NumInits > 1) {
       Expr *FirstBad = Inits[1];
       return ExprError(Diag(FirstBad->getLocStart(),
                             diag::err_auto_new_ctor_multiple_expressions)
                        << AllocType << TypeRange);
     }
+    if (Braced && !getLangOpts().CPlusPlus17)
+      Diag(Initializer->getLocStart(), diag::ext_auto_new_list_init)
+          << AllocType << TypeRange;
     Expr *Deduce = Inits[0];
     QualType DeducedType;
     if (DeduceAutoType(AllocTypeInfo, Deduce, DeducedType) == DAR_Failed)
@@ -2130,12 +2151,10 @@
   return false;
 }
 
-static bool
-resolveAllocationOverload(Sema &S, LookupResult &R, SourceRange Range,
-                          SmallVectorImpl<Expr *> &Args, bool &PassAlignment,
-                          FunctionDecl *&Operator,
-                          OverloadCandidateSet *AlignedCandidates = nullptr,
-                          Expr *AlignArg = nullptr) {
+static bool resolveAllocationOverload(
+    Sema &S, LookupResult &R, SourceRange Range, SmallVectorImpl<Expr *> &Args,
+    bool &PassAlignment, FunctionDecl *&Operator,
+    OverloadCandidateSet *AlignedCandidates, Expr *AlignArg, bool Diagnose) {
   OverloadCandidateSet Candidates(R.getNameLoc(),
                                   OverloadCandidateSet::CSK_Normal);
   for (LookupResult::iterator Alloc = R.begin(), AllocEnd = R.end();
@@ -2181,7 +2200,8 @@
       AlignArg = Args[1];
       Args.erase(Args.begin() + 1);
       return resolveAllocationOverload(S, R, Range, Args, PassAlignment,
-                                       Operator, &Candidates, AlignArg);
+                                       Operator, &Candidates, AlignArg,
+                                       Diagnose);
     }
 
     // MSVC will fall back on trying to find a matching global operator new
@@ -2197,48 +2217,53 @@
       S.LookupQualifiedName(R, S.Context.getTranslationUnitDecl());
       // FIXME: This will give bad diagnostics pointing at the wrong functions.
       return resolveAllocationOverload(S, R, Range, Args, PassAlignment,
-                                       Operator, nullptr);
+                                       Operator, /*Candidates=*/nullptr,
+                                       /*AlignArg=*/nullptr, Diagnose);
     }
 
-    S.Diag(R.getNameLoc(), diag::err_ovl_no_viable_function_in_call)
-      << R.getLookupName() << Range;
+    if (Diagnose) {
+      S.Diag(R.getNameLoc(), diag::err_ovl_no_viable_function_in_call)
+          << R.getLookupName() << Range;
 
-    // If we have aligned candidates, only note the align_val_t candidates
-    // from AlignedCandidates and the non-align_val_t candidates from
-    // Candidates.
-    if (AlignedCandidates) {
-      auto IsAligned = [](OverloadCandidate &C) {
-        return C.Function->getNumParams() > 1 &&
-               C.Function->getParamDecl(1)->getType()->isAlignValT();
-      };
-      auto IsUnaligned = [&](OverloadCandidate &C) { return !IsAligned(C); };
+      // If we have aligned candidates, only note the align_val_t candidates
+      // from AlignedCandidates and the non-align_val_t candidates from
+      // Candidates.
+      if (AlignedCandidates) {
+        auto IsAligned = [](OverloadCandidate &C) {
+          return C.Function->getNumParams() > 1 &&
+                 C.Function->getParamDecl(1)->getType()->isAlignValT();
+        };
+        auto IsUnaligned = [&](OverloadCandidate &C) { return !IsAligned(C); };
 
-      // This was an overaligned allocation, so list the aligned candidates
-      // first.
-      Args.insert(Args.begin() + 1, AlignArg);
-      AlignedCandidates->NoteCandidates(S, OCD_AllCandidates, Args, "",
-                                        R.getNameLoc(), IsAligned);
-      Args.erase(Args.begin() + 1);
-      Candidates.NoteCandidates(S, OCD_AllCandidates, Args, "", R.getNameLoc(),
-                                IsUnaligned);
-    } else {
-      Candidates.NoteCandidates(S, OCD_AllCandidates, Args);
+        // This was an overaligned allocation, so list the aligned candidates
+        // first.
+        Args.insert(Args.begin() + 1, AlignArg);
+        AlignedCandidates->NoteCandidates(S, OCD_AllCandidates, Args, "",
+                                          R.getNameLoc(), IsAligned);
+        Args.erase(Args.begin() + 1);
+        Candidates.NoteCandidates(S, OCD_AllCandidates, Args, "", R.getNameLoc(),
+                                  IsUnaligned);
+      } else {
+        Candidates.NoteCandidates(S, OCD_AllCandidates, Args);
+      }
     }
     return true;
 
   case OR_Ambiguous:
-    S.Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call)
-      << R.getLookupName() << Range;
-    Candidates.NoteCandidates(S, OCD_ViableCandidates, Args);
+    if (Diagnose) {
+      S.Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call)
+          << R.getLookupName() << Range;
+      Candidates.NoteCandidates(S, OCD_ViableCandidates, Args);
+    }
     return true;
 
   case OR_Deleted: {
-    S.Diag(R.getNameLoc(), diag::err_ovl_deleted_call)
-      << Best->Function->isDeleted()
-      << R.getLookupName()
-      << S.getDeletedOrUnavailableSuffix(Best->Function)
-      << Range;
-    Candidates.NoteCandidates(S, OCD_AllCandidates, Args);
+    if (Diagnose) {
+      S.Diag(R.getNameLoc(), diag::err_ovl_deleted_call)
+          << Best->Function->isDeleted() << R.getLookupName()
+          << S.getDeletedOrUnavailableSuffix(Best->Function) << Range;
+      Candidates.NoteCandidates(S, OCD_AllCandidates, Args);
+    }
     return true;
   }
   }
@@ -2253,7 +2278,8 @@
                                    bool IsArray, bool &PassAlignment,
                                    MultiExprArg PlaceArgs,
                                    FunctionDecl *&OperatorNew,
-                                   FunctionDecl *&OperatorDelete) {
+                                   FunctionDecl *&OperatorDelete,
+                                   bool Diagnose) {
   // --- Choosing an allocation function ---
   // C++ 5.3.4p8 - 14 & 18
   // 1) If UseGlobal is true, only look in the global scope. Else, also look
@@ -2328,7 +2354,8 @@
     R.suppressDiagnostics();
 
     if (resolveAllocationOverload(*this, R, Range, AllocArgs, PassAlignment,
-                                  OperatorNew))
+                                  OperatorNew, /*Candidates=*/nullptr,
+                                  /*AlignArg=*/nullptr, Diagnose))
       return true;
   }
 
@@ -3293,6 +3320,15 @@
     // is trivial and left to AST consumers to handle.
     QualType ParamType = OperatorDelete->getParamDecl(0)->getType();
     if (!IsVirtualDelete && !ParamType->getPointeeType()->isVoidType()) {
+      Qualifiers Qs = Pointee.getQualifiers();
+      if (Qs.hasCVRQualifiers()) {
+        // Qualifiers are irrelevant to this conversion; we're only looking
+        // for access and ambiguity.
+        Qs.removeCVRQualifiers();
+        QualType Unqual = Context.getPointerType(
+            Context.getQualifiedType(Pointee.getUnqualifiedType(), Qs));
+        Ex = ImpCastExprToType(Ex.get(), Unqual, CK_NoOp);
+      }
       Ex = PerformImplicitConversion(Ex.get(), ParamType, AA_Passing);
       if (Ex.isInvalid())
         return ExprError();
@@ -3831,7 +3867,7 @@
           << From->getSourceRange();
     }
 
-    CastKind Kind = CK_Invalid;
+    CastKind Kind;
     CXXCastPath BasePath;
     if (CheckPointerConversion(From, ToType, Kind, BasePath, CStyle))
       return ExprError();
@@ -3851,7 +3887,7 @@
   }
 
   case ICK_Pointer_Member: {
-    CastKind Kind = CK_Invalid;
+    CastKind Kind;
     CXXCastPath BasePath;
     if (CheckMemberPointerConversion(From, ToType, Kind, BasePath, CStyle))
       return ExprError();
@@ -4175,6 +4211,7 @@
   case UTT_IsDestructible:
   case UTT_IsNothrowDestructible:
   case UTT_IsTriviallyDestructible:
+  case UTT_HasUniqueObjectRepresentations:
     if (ArgTy->isIncompleteArrayType() || ArgTy->isVoidType())
       return true;
 
@@ -4614,6 +4651,8 @@
     //   Returns True if and only if T is a complete type at the point of the
     //   function call.
     return !T->isIncompleteType();
+  case UTT_HasUniqueObjectRepresentations:
+    return C.hasUniqueObjectRepresentations(T);
   }
 }
 
@@ -4626,11 +4665,14 @@
   if (Kind <= UTT_Last)
     return EvaluateUnaryTypeTrait(S, Kind, KWLoc, Args[0]->getType());
 
-  if (Kind <= BTT_Last)
+  // Evaluate BTT_ReferenceBindsToTemporary alongside the IsConstructible
+  // traits to avoid duplication.
+  if (Kind <= BTT_Last && Kind != BTT_ReferenceBindsToTemporary)
     return EvaluateBinaryTypeTrait(S, Kind, Args[0]->getType(),
                                    Args[1]->getType(), RParenLoc);
 
   switch (Kind) {
+  case clang::BTT_ReferenceBindsToTemporary:
   case clang::TT_IsConstructible:
   case clang::TT_IsNothrowConstructible:
   case clang::TT_IsTriviallyConstructible: {
@@ -4707,6 +4749,13 @@
     if (Kind == clang::TT_IsConstructible)
       return true;
 
+    if (Kind == clang::BTT_ReferenceBindsToTemporary) {
+      if (!T->isReferenceType())
+        return false;
+
+      return !Init.isDirectReferenceBinding();
+    }
+
     if (Kind == clang::TT_IsNothrowConstructible)
       return S.canThrow(Result.get()) == CT_Cannot;
 
@@ -5747,7 +5796,7 @@
   // happen in C++17, because it would mean we were computing the composite
   // pointer type of dependent types, which should never happen.
   if (EST1 == EST_ComputedNoexcept || EST2 == EST_ComputedNoexcept) {
-    assert(!S.getLangOpts().CPlusPlus1z &&
+    assert(!S.getLangOpts().CPlusPlus17 &&
            "computing composite pointer type of dependent types");
     return FunctionProtoType::ExceptionSpecInfo();
   }
@@ -6246,9 +6295,8 @@
   // a StmtExpr; currently this is only used for asm statements.
   // This is hacky, either create a new CXXStmtWithTemporaries statement or
   // a new AsmStmtWithTemporaries.
-  CompoundStmt *CompStmt = new (Context) CompoundStmt(Context, SubStmt,
-                                                      SourceLocation(),
-                                                      SourceLocation());
+  CompoundStmt *CompStmt = CompoundStmt::Create(
+      Context, SubStmt, SourceLocation(), SourceLocation());
   Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(),
                                    SourceLocation());
   return MaybeCreateExprWithCleanups(E);
@@ -6698,11 +6746,11 @@
                                            SourceLocation CCLoc,
                                            SourceLocation TildeLoc,
                                            UnqualifiedId &SecondTypeName) {
-  assert((FirstTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
-          FirstTypeName.getKind() == UnqualifiedId::IK_Identifier) &&
+  assert((FirstTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId ||
+          FirstTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) &&
          "Invalid first type name in pseudo-destructor");
-  assert((SecondTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
-          SecondTypeName.getKind() == UnqualifiedId::IK_Identifier) &&
+  assert((SecondTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId ||
+          SecondTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) &&
          "Invalid second type name in pseudo-destructor");
 
   QualType ObjectType;
@@ -6724,7 +6772,7 @@
   QualType DestructedType;
   TypeSourceInfo *DestructedTypeInfo = nullptr;
   PseudoDestructorTypeStorage Destructed;
-  if (SecondTypeName.getKind() == UnqualifiedId::IK_Identifier) {
+  if (SecondTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) {
     ParsedType T = getTypeName(*SecondTypeName.Identifier,
                                SecondTypeName.StartLocation,
                                S, &SS, true, false, ObjectTypePtrForLookup,
@@ -6782,9 +6830,9 @@
   // Convert the name of the scope type (the type prior to '::') into a type.
   TypeSourceInfo *ScopeTypeInfo = nullptr;
   QualType ScopeType;
-  if (FirstTypeName.getKind() == UnqualifiedId::IK_TemplateId ||
+  if (FirstTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId ||
       FirstTypeName.Identifier) {
-    if (FirstTypeName.getKind() == UnqualifiedId::IK_Identifier) {
+    if (FirstTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) {
       ParsedType T = getTypeName(*FirstTypeName.Identifier,
                                  FirstTypeName.StartLocation,
                                  S, &SS, true, false, ObjectTypePtrForLookup,
@@ -7293,8 +7341,7 @@
   /// suggest the corrections. Otherwise the diagnostics will not suggest
   /// anything (having been passed an empty TypoCorrection).
   void EmitAllDiagnostics() {
-    for (auto E : TypoExprs) {
-      TypoExpr *TE = cast<TypoExpr>(E);
+    for (TypoExpr *TE : TypoExprs) {
       auto &State = SemaRef.getTypoExprState(TE);
       if (State.DiagHandler) {
         TypoCorrection TC = State.Consumer->getCurrentCorrection();
diff --git a/lib/Sema/SemaExprMember.cpp b/lib/Sema/SemaExprMember.cpp
index bf4cb53..5bb9653 100644
--- a/lib/Sema/SemaExprMember.cpp
+++ b/lib/Sema/SemaExprMember.cpp
@@ -251,7 +251,7 @@
   case IMA_Field_Uneval_Context:
     Diag(R.getNameLoc(), diag::warn_cxx98_compat_non_static_member_use)
       << R.getLookupNameInfo().getName();
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case IMA_Static:
   case IMA_Abstract:
   case IMA_Mixed_StaticContext:
@@ -1623,10 +1623,14 @@
       else
         VK = BaseExpr.get()->getValueKind();
     }
+
     QualType ret = CheckExtVectorComponent(S, BaseType, VK, OpLoc,
                                            Member, MemberLoc);
     if (ret.isNull())
       return ExprError();
+    Qualifiers BaseQ =
+        S.Context.getCanonicalType(BaseExpr.get()->getType()).getQualifiers();
+    ret = S.Context.getQualifiedType(ret, BaseQ);
 
     return new (S.Context)
         ExtVectorElementExpr(ret, VK, BaseExpr.get(), *Member, MemberLoc);
@@ -1707,7 +1711,7 @@
 
   // Warn about the explicit constructor calls Microsoft extension.
   if (getLangOpts().MicrosoftExt &&
-      Id.getKind() == UnqualifiedId::IK_ConstructorName)
+      Id.getKind() == UnqualifiedIdKind::IK_ConstructorName)
     Diag(Id.getSourceRange().getBegin(),
          diag::ext_ms_explicit_constructor_call);
 
@@ -1791,7 +1795,9 @@
       MemberType = Context.getQualifiedType(MemberType, Combined);
   }
 
-  UnusedPrivateFields.remove(Field);
+  auto *CurMethod = dyn_cast<CXXMethodDecl>(CurContext);
+  if (!(CurMethod && CurMethod->isDefaulted()))
+    UnusedPrivateFields.remove(Field);
 
   ExprResult Base = PerformObjectMemberConversion(BaseExpr, SS.getScopeRep(),
                                                   FoundDecl, Field);
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp
index 369ba64..8d017af 100644
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -564,6 +564,13 @@
       
       BoxingMethod = StringWithUTF8StringMethod;
       BoxedType = NSStringPointer;
+      // Transfer the nullability from method's return type.
+      Optional<NullabilityKind> Nullability =
+          BoxingMethod->getReturnType()->getNullability(Context);
+      if (Nullability)
+        BoxedType = Context.getAttributedType(
+            AttributedType::getNullabilityAttrKind(*Nullability), BoxedType,
+            BoxedType);
     }
   } else if (ValueType->isBuiltinType()) {
     // The other types we support are numeric, char and BOOL/bool. We could also
@@ -1606,6 +1613,11 @@
     ParmVarDecl *param = Method->parameters()[i];
     assert(argExpr && "CheckMessageArgumentTypes(): missing expression");
 
+    if (param->hasAttr<NoEscapeAttr>())
+      if (auto *BE = dyn_cast<BlockExpr>(
+              argExpr->IgnoreParenNoopCasts(Context)))
+        BE->getBlockDecl()->setDoesNotEscape();
+
     // Strip the unbridged-cast placeholder expression off unless it's
     // a consumed argument.
     if (argExpr->hasPlaceholderType(BuiltinType::ARCUnbridgedCast) &&
@@ -2974,6 +2986,7 @@
     case OMF_init:
       if (Method)
         checkInitMethod(Method, ReceiverType);
+      break;
 
     case OMF_None:
     case OMF_alloc:
@@ -4268,9 +4281,9 @@
   } else if (UnaryOperator *uo = dyn_cast<UnaryOperator>(e)) {
     assert(uo->getOpcode() == UO_Extension);
     Expr *sub = stripARCUnbridgedCast(uo->getSubExpr());
-    return new (Context) UnaryOperator(sub, UO_Extension, sub->getType(),
-                                   sub->getValueKind(), sub->getObjectKind(),
-                                       uo->getOperatorLoc());
+    return new (Context)
+        UnaryOperator(sub, UO_Extension, sub->getType(), sub->getValueKind(),
+                      sub->getObjectKind(), uo->getOperatorLoc(), false);
   } else if (GenericSelectionExpr *gse = dyn_cast<GenericSelectionExpr>(e)) {
     assert(!gse->isResultDependent());
 
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index dc7fe1d..df4090c 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -352,6 +352,7 @@
                                bool FillWithNoInit = false);
   void FillInEmptyInitializations(const InitializedEntity &Entity,
                                   InitListExpr *ILE, bool &RequiresSecondPass,
+                                  InitListExpr *OuterILE, unsigned OuterIndex,
                                   bool FillWithNoInit = false);
   bool CheckFlexibleArrayInit(const InitializedEntity &Entity,
                               Expr *InitExpr, FieldDecl *Field,
@@ -517,12 +518,13 @@
     ILE->setInit(Init, BaseInit.getAs<Expr>());
   } else if (InitListExpr *InnerILE =
                  dyn_cast<InitListExpr>(ILE->getInit(Init))) {
-    FillInEmptyInitializations(BaseEntity, InnerILE,
-                               RequiresSecondPass, FillWithNoInit);
+    FillInEmptyInitializations(BaseEntity, InnerILE, RequiresSecondPass,
+                               ILE, Init, FillWithNoInit);
   } else if (DesignatedInitUpdateExpr *InnerDIUE =
                dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init))) {
     FillInEmptyInitializations(BaseEntity, InnerDIUE->getUpdater(),
-                               RequiresSecondPass, /*FillWithNoInit =*/true);
+                               RequiresSecondPass, ILE, Init,
+                               /*FillWithNoInit =*/true);
   }
 }
 
@@ -605,24 +607,43 @@
   } else if (InitListExpr *InnerILE
                = dyn_cast<InitListExpr>(ILE->getInit(Init)))
     FillInEmptyInitializations(MemberEntity, InnerILE,
-                               RequiresSecondPass, FillWithNoInit);
+                               RequiresSecondPass, ILE, Init, FillWithNoInit);
   else if (DesignatedInitUpdateExpr *InnerDIUE
                = dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init)))
     FillInEmptyInitializations(MemberEntity, InnerDIUE->getUpdater(),
-                               RequiresSecondPass, /*FillWithNoInit =*/ true);
+                               RequiresSecondPass, ILE, Init,
+                               /*FillWithNoInit =*/true);
 }
 
 /// Recursively replaces NULL values within the given initializer list
 /// with expressions that perform value-initialization of the
-/// appropriate type.
+/// appropriate type, and finish off the InitListExpr formation.
 void
 InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
                                             InitListExpr *ILE,
                                             bool &RequiresSecondPass,
+                                            InitListExpr *OuterILE,
+                                            unsigned OuterIndex,
                                             bool FillWithNoInit) {
   assert((ILE->getType() != SemaRef.Context.VoidTy) &&
          "Should not have void type");
 
+  // If this is a nested initializer list, we might have changed its contents
+  // (and therefore some of its properties, such as instantiation-dependence)
+  // while filling it in. Inform the outer initializer list so that its state
+  // can be updated to match.
+  // FIXME: We should fully build the inner initializers before constructing
+  // the outer InitListExpr instead of mutating AST nodes after they have
+  // been used as subexpressions of other nodes.
+  struct UpdateOuterILEWithUpdatedInit {
+    InitListExpr *Outer;
+    unsigned OuterIndex;
+    ~UpdateOuterILEWithUpdatedInit() {
+      if (Outer)
+        Outer->setInit(OuterIndex, Outer->getInit(OuterIndex));
+    }
+  } UpdateOuterRAII = {OuterILE, OuterIndex};
+
   // A transparent ILE is not performing aggregate initialization and should
   // not be filled in.
   if (ILE->isTransparent())
@@ -769,11 +790,12 @@
     } else if (InitListExpr *InnerILE
                  = dyn_cast_or_null<InitListExpr>(InitExpr))
       FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass,
-                                 FillWithNoInit);
+                                 ILE, Init, FillWithNoInit);
     else if (DesignatedInitUpdateExpr *InnerDIUE
                  = dyn_cast_or_null<DesignatedInitUpdateExpr>(InitExpr))
       FillInEmptyInitializations(ElementEntity, InnerDIUE->getUpdater(),
-                                 RequiresSecondPass, /*FillWithNoInit =*/ true);
+                                 RequiresSecondPass, ILE, Init,
+                                 /*FillWithNoInit =*/true);
   }
 }
 
@@ -795,10 +817,11 @@
 
   if (!hadError && !VerifyOnly) {
     bool RequiresSecondPass = false;
-    FillInEmptyInitializations(Entity, FullyStructuredList, RequiresSecondPass);
+    FillInEmptyInitializations(Entity, FullyStructuredList, RequiresSecondPass,
+                               /*OuterILE=*/nullptr, /*OuterIndex=*/0);
     if (RequiresSecondPass && !hadError)
       FillInEmptyInitializations(Entity, FullyStructuredList,
-                                 RequiresSecondPass);
+                                 RequiresSecondPass, nullptr, 0);
   }
 }
 
@@ -1162,10 +1185,12 @@
       if (!hadError && !VerifyOnly) {
         bool RequiresSecondPass = false;
         FillInEmptyInitializations(Entity, InnerStructuredList,
-                                   RequiresSecondPass);
+                                   RequiresSecondPass, StructuredList,
+                                   StructuredIndex);
         if (RequiresSecondPass && !hadError)
           FillInEmptyInitializations(Entity, InnerStructuredList,
-                                     RequiresSecondPass);
+                                     RequiresSecondPass, StructuredList,
+                                     StructuredIndex);
       }
       ++StructuredIndex;
       ++Index;
@@ -3533,8 +3558,8 @@
       clang::ArrayType::Normal, 0);
   InitializedEntity HiddenArray =
       InitializedEntity::InitializeTemporary(ArrayType);
-  InitializationKind Kind =
-      InitializationKind::CreateDirectList(List->getExprLoc());
+  InitializationKind Kind = InitializationKind::CreateDirectList(
+      List->getExprLoc(), List->getLocStart(), List->getLocEnd());
   TryListInitialization(S, HiddenArray, Kind, List, Sequence,
                         TreatUnavailableAsInvalid);
   if (Sequence)
@@ -3631,7 +3656,7 @@
   //
   // Note: SecondStepOfCopyInit is only ever true in this case when
   // evaluating whether to produce a C++98 compatibility warning.
-  if (S.getLangOpts().CPlusPlus1z && Args.size() == 1 &&
+  if (S.getLangOpts().CPlusPlus17 && Args.size() == 1 &&
       !SecondStepOfCopyInit) {
     Expr *Initializer = Args[0];
     auto *SourceRD = Initializer->getType()->getAsCXXRecordDecl();
@@ -3701,7 +3726,7 @@
     return;
   }
 
-  // C++1z [dcl.init]p17:
+  // C++17 [dcl.init]p17:
   //     - If the initializer expression is a prvalue and the cv-unqualified
   //       version of the source type is the same class as the class of the
   //       destination, the initializer expression is used to initialize the
@@ -3710,7 +3735,7 @@
   // class or delegating to another constructor from a mem-initializer.
   // ObjC++: Lambda captured by the block in the lambda to block conversion
   // should avoid copy elision.
-  if (S.getLangOpts().CPlusPlus1z &&
+  if (S.getLangOpts().CPlusPlus17 &&
       Entity.getKind() != InitializedEntity::EK_Base &&
       Entity.getKind() != InitializedEntity::EK_Delegating &&
       Entity.getKind() !=
@@ -4078,7 +4103,7 @@
     //     value T(v); if a narrowing conversion is required to convert v to
     //     the underlying type of T, the program is ill-formed.
     auto *ET = DestType->getAs<EnumType>();
-    if (S.getLangOpts().CPlusPlus1z &&
+    if (S.getLangOpts().CPlusPlus17 &&
         Kind.getKind() == InitializationKind::IK_DirectList &&
         ET && ET->getDecl()->isFixed() &&
         !S.Context.hasSameUnqualifiedType(E->getType(), DestType) &&
@@ -4511,7 +4536,7 @@
         RefRelationship == Sema::Ref_Related)) &&
       ((InitCategory.isXValue() && !isNonReferenceableGLValue(Initializer)) ||
        (InitCategory.isPRValue() &&
-        (S.getLangOpts().CPlusPlus1z || T2->isRecordType() ||
+        (S.getLangOpts().CPlusPlus17 || T2->isRecordType() ||
          T2->isArrayType())))) {
     ExprValueKind ValueKind = InitCategory.isXValue() ? VK_XValue : VK_RValue;
     if (InitCategory.isPRValue() && T2->isRecordType()) {
@@ -4887,7 +4912,7 @@
     //     copy-initialization.
     // Note that this just performs a simple object copy from the temporary.
     //
-    // C++1z:
+    // C++17:
     //   - if the function is a constructor, the call is a prvalue of the
     //     cv-unqualified version of the destination type whose return object
     //     is initialized by the constructor. The call is used to
@@ -4896,7 +4921,7 @@
     // Therefore we need to do nothing further.
     //
     // FIXME: Mark this copy as extraneous.
-    if (!S.getLangOpts().CPlusPlus1z)
+    if (!S.getLangOpts().CPlusPlus17)
       Sequence.AddFinalCopy(DestType);
     else if (DestType.hasQualifiers())
       Sequence.AddQualificationConversionStep(DestType, VK_RValue);
@@ -4912,13 +4937,13 @@
     //   The call is used to direct-initialize [...] the object that is the
     //   destination of the copy-initialization.
     //
-    // In C++1z, this does not call a constructor if we enter /17.6.1:
+    // In C++17, this does not call a constructor if we enter /17.6.1:
     //   - If the initializer expression is a prvalue and the cv-unqualified
     //     version of the source type is the same as the class of the
     //     destination [... do not make an extra copy]
     //
     // FIXME: Mark this copy as extraneous.
-    if (!S.getLangOpts().CPlusPlus1z ||
+    if (!S.getLangOpts().CPlusPlus17 ||
         Function->getReturnType()->isReferenceType() ||
         !S.Context.hasSameUnqualifiedType(ConvType, DestType))
       Sequence.AddFinalCopy(DestType);
@@ -6031,10 +6056,7 @@
     TypeSourceInfo *TSInfo = Entity.getTypeSourceInfo();
     if (!TSInfo)
       TSInfo = S.Context.getTrivialTypeSourceInfo(Entity.getType(), Loc);
-    SourceRange ParenOrBraceRange =
-      (Kind.getKind() == InitializationKind::IK_DirectList)
-      ? SourceRange(LBraceLoc, RBraceLoc)
-      : Kind.getParenRange();
+    SourceRange ParenOrBraceRange = Kind.getParenOrBraceRange();
 
     if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(
             Step.Function.FoundDecl.getDecl())) {
@@ -6068,7 +6090,7 @@
     if (IsListInitialization)
       ParenOrBraceRange = SourceRange(LBraceLoc, RBraceLoc);
     else if (Kind.getKind() == InitializationKind::IK_Direct)
-      ParenOrBraceRange = Kind.getParenRange();
+      ParenOrBraceRange = Kind.getParenOrBraceRange();
 
     // If the entity allows NRVO, mark the construction as elidable
     // unconditionally.
@@ -6212,7 +6234,7 @@
     if (Entity->getParent())
       return getEntityForTemporaryLifetimeExtension(Entity->getParent(),
                                                     Entity);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case InitializedEntity::EK_Delegating:
     // We can reach this case for aggregate initialization in a constructor:
     //   struct A { int &&r; };
@@ -6422,6 +6444,10 @@
     if (!VD || !VD->hasLocalStorage())
       return;
 
+    // __block variables are not moved implicitly.
+    if (VD->hasAttr<BlocksAttr>())
+      return;
+
     QualType SourceType = VD->getType();
     if (!SourceType->isRecordType())
       return;
@@ -6590,7 +6616,7 @@
     if (Kind.getKind() == InitializationKind::IK_Direct &&
         !Kind.isExplicitCast()) {
       // Rebuild the ParenListExpr.
-      SourceRange ParenRange = Kind.getParenRange();
+      SourceRange ParenRange = Kind.getParenOrBraceRange();
       return S.ActOnParenListExpr(ParenRange.getBegin(), ParenRange.getEnd(),
                                   Args);
     }
@@ -7110,14 +7136,17 @@
       bool IsStdInitListInit =
           Step->Kind == SK_StdInitializerListConstructorCall;
       Expr *Source = CurInit.get();
+      SourceRange Range = Kind.hasParenOrBraceRange()
+                              ? Kind.getParenOrBraceRange()
+                              : SourceRange();
       CurInit = PerformConstructorInitialization(
           S, UseTemporary ? TempEntity : Entity, Kind,
           Source ? MultiExprArg(Source) : Args, *Step,
           ConstructorInitRequiresZeroInit,
           /*IsListInitialization*/ IsStdInitListInit,
           /*IsStdInitListInitialization*/ IsStdInitListInit,
-          /*LBraceLoc*/ SourceLocation(),
-          /*RBraceLoc*/ SourceLocation());
+          /*LBraceLoc*/ Range.getBegin(),
+          /*RBraceLoc*/ Range.getEnd());
       break;
     }
 
@@ -7652,7 +7681,7 @@
       << Args[0]->getSourceRange();
       break;
     }
-    // Intentional fallthrough
+    LLVM_FALLTHROUGH;
 
   case FK_NonConstLValueReferenceBindingToUnrelated:
     S.Diag(Kind.getLocation(),
diff --git a/lib/Sema/SemaLambda.cpp b/lib/Sema/SemaLambda.cpp
index 996ed3e..19d2de7 100644
--- a/lib/Sema/SemaLambda.cpp
+++ b/lib/Sema/SemaLambda.cpp
@@ -947,7 +947,7 @@
        PrevCaptureLoc = C->Loc, ++C) {
     if (C->Kind == LCK_This || C->Kind == LCK_StarThis) {
       if (C->Kind == LCK_StarThis) 
-        Diag(C->Loc, !getLangOpts().CPlusPlus1z
+        Diag(C->Loc, !getLangOpts().CPlusPlus17
                              ? diag::ext_star_this_lambda_capture_cxx17
                              : diag::warn_cxx14_compat_star_this_lambda_capture);
 
@@ -1167,6 +1167,24 @@
   PopFunctionScopeInfo();
 }
 
+QualType Sema::getLambdaConversionFunctionResultType(
+    const FunctionProtoType *CallOpProto) {
+  // The function type inside the pointer type is the same as the call
+  // operator with some tweaks. The calling convention is the default free
+  // function convention, and the type qualifications are lost.
+  const FunctionProtoType::ExtProtoInfo CallOpExtInfo = 
+      CallOpProto->getExtProtoInfo();   
+  FunctionProtoType::ExtProtoInfo InvokerExtInfo = CallOpExtInfo;
+  CallingConv CC = Context.getDefaultCallingConvention(
+      CallOpProto->isVariadic(), /*IsCXXMethod=*/false);
+  InvokerExtInfo.ExtInfo = InvokerExtInfo.ExtInfo.withCallingConv(CC);
+  InvokerExtInfo.TypeQuals = 0;
+  assert(InvokerExtInfo.RefQualifier == RQ_None && 
+      "Lambda's call operator should not have a reference qualifier");
+  return Context.getFunctionType(CallOpProto->getReturnType(),
+                                 CallOpProto->getParamTypes(), InvokerExtInfo);
+}
+
 /// \brief Add a lambda's conversion to function pointer, as described in
 /// C++11 [expr.prim.lambda]p6.
 static void addFunctionPointerConversion(Sema &S,
@@ -1182,25 +1200,9 @@
     return;
 
   // Add the conversion to function pointer.
-  const FunctionProtoType *CallOpProto = 
-      CallOperator->getType()->getAs<FunctionProtoType>();
-  const FunctionProtoType::ExtProtoInfo CallOpExtInfo = 
-      CallOpProto->getExtProtoInfo();   
-  QualType PtrToFunctionTy;
-  QualType InvokerFunctionTy;
-  {
-    FunctionProtoType::ExtProtoInfo InvokerExtInfo = CallOpExtInfo;
-    CallingConv CC = S.Context.getDefaultCallingConvention(
-        CallOpProto->isVariadic(), /*IsCXXMethod=*/false);
-    InvokerExtInfo.ExtInfo = InvokerExtInfo.ExtInfo.withCallingConv(CC);
-    InvokerExtInfo.TypeQuals = 0;
-    assert(InvokerExtInfo.RefQualifier == RQ_None && 
-        "Lambda's call operator should not have a reference qualifier");
-    InvokerFunctionTy =
-        S.Context.getFunctionType(CallOpProto->getReturnType(),
-                                  CallOpProto->getParamTypes(), InvokerExtInfo);
-    PtrToFunctionTy = S.Context.getPointerType(InvokerFunctionTy);
-  }
+  QualType InvokerFunctionTy = S.getLambdaConversionFunctionResultType(
+      CallOperator->getType()->castAs<FunctionProtoType>());
+  QualType PtrToFunctionTy = S.Context.getPointerType(InvokerFunctionTy);
 
   // Create the type of the conversion function.
   FunctionProtoType::ExtProtoInfo ConvExtInfo(
@@ -1288,7 +1290,7 @@
                                 ConvTy, 
                                 ConvTSI,
                                 /*isInline=*/true, /*isExplicit=*/false,
-                                /*isConstexpr=*/S.getLangOpts().CPlusPlus1z, 
+                                /*isConstexpr=*/S.getLangOpts().CPlusPlus17, 
                                 CallOperator->getBody()->getLocEnd());
   Conversion->setAccess(AS_public);
   Conversion->setImplicit(true);
@@ -1357,19 +1359,8 @@
                                       SourceRange IntroducerRange,
                                       CXXRecordDecl *Class,
                                       CXXMethodDecl *CallOperator) {
-  const FunctionProtoType *Proto =
-      CallOperator->getType()->getAs<FunctionProtoType>();
-
-  // The function type inside the block pointer type is the same as the call
-  // operator with some tweaks. The calling convention is the default free
-  // function convention, and the type qualifications are lost.
-  FunctionProtoType::ExtProtoInfo BlockEPI = Proto->getExtProtoInfo();
-  BlockEPI.ExtInfo =
-      BlockEPI.ExtInfo.withCallingConv(S.Context.getDefaultCallingConvention(
-          Proto->isVariadic(), /*IsCXXMethod=*/false));
-  BlockEPI.TypeQuals = 0;
-  QualType FunctionTy = S.Context.getFunctionType(
-      Proto->getReturnType(), Proto->getParamTypes(), BlockEPI);
+  QualType FunctionTy = S.getLambdaConversionFunctionResultType(
+      CallOperator->getType()->castAs<FunctionProtoType>());
   QualType BlockPtrTy = S.Context.getBlockPointerType(FunctionTy);
 
   FunctionProtoType::ExtProtoInfo ConversionEPI(
@@ -1481,6 +1472,9 @@
   if (CaptureHasSideEffects(From))
     return;
 
+  if (From.isVLATypeCapture())
+    return;
+
   auto diag = Diag(From.getLocation(), diag::warn_unused_lambda_capture);
   if (From.isThisCapture())
     diag << "'this'";
@@ -1608,7 +1602,7 @@
   // If the lambda expression's call operator is not explicitly marked constexpr
   // and we are not in a dependent context, analyze the call operator to infer
   // its constexpr-ness, suppressing diagnostics while doing so.
-  if (getLangOpts().CPlusPlus1z && !CallOperator->isInvalidDecl() &&
+  if (getLangOpts().CPlusPlus17 && !CallOperator->isInvalidDecl() &&
       !CallOperator->isConstexpr() &&
       !isa<CoroutineBodyStmt>(CallOperator->getBody()) &&
       !Class->getDeclContext()->isDependentContext()) {
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index 73aceaa..dd54fb4 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -155,7 +155,7 @@
       while (true) {
         for (auto UD : DC->using_directives()) {
           DeclContext *NS = UD->getNominatedNamespace();
-          if (visited.insert(NS).second && SemaRef.isVisible(UD)) {
+          if (SemaRef.isVisible(UD) && visited.insert(NS).second) {
             addUsingDirective(UD, EffectiveDC);
             queue.push_back(NS);
           }
@@ -1589,7 +1589,7 @@
     return false;
 
   // Find the extra places where we need to look.
-  llvm::DenseSet<Module*> &LookupModules = SemaRef.getLookupModules();
+  const auto &LookupModules = SemaRef.getLookupModules();
   if (LookupModules.empty())
     return false;
 
@@ -1604,7 +1604,8 @@
   // Check whether DeclModule is transitively exported to an import of
   // the lookup set.
   return std::any_of(LookupModules.begin(), LookupModules.end(),
-                     [&](Module *M) { return M->isModuleVisible(DeclModule); });
+                     [&](const Module *M) {
+                       return M->isModuleVisible(DeclModule); });
 }
 
 bool Sema::isVisibleSlow(const NamedDecl *D) {
@@ -1655,7 +1656,8 @@
 ///
 /// \returns D, or a visible previous declaration of D, whichever is more recent
 /// and visible. If no declaration of D is visible, returns null.
-static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D) {
+static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D,
+                                     unsigned IDNS) {
   assert(!LookupResult::isVisible(SemaRef, D) && "not in slow case");
 
   for (auto RD : D->redecls()) {
@@ -1667,7 +1669,8 @@
     // FIXME: This is wrong in the case where the previous declaration is not
     // visible in the same scope as D. This needs to be done much more
     // carefully.
-    if (LookupResult::isVisible(SemaRef, ND))
+    if (ND->isInIdentifierNamespace(IDNS) &&
+        LookupResult::isVisible(SemaRef, ND))
       return ND;
   }
 
@@ -1692,14 +1695,15 @@
     auto *Key = ND->getCanonicalDecl();
     if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key))
       return Acceptable;
-    auto *Acceptable =
-        isVisible(getSema(), Key) ? Key : findAcceptableDecl(getSema(), Key);
+    auto *Acceptable = isVisible(getSema(), Key)
+                           ? Key
+                           : findAcceptableDecl(getSema(), Key, IDNS);
     if (Acceptable)
       getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable));
     return Acceptable;
   }
 
-  return findAcceptableDecl(getSema(), D);
+  return findAcceptableDecl(getSema(), D, IDNS);
 }
 
 /// @brief Perform unqualified name lookup starting from a given
@@ -1883,7 +1887,7 @@
   // with its using-children.
   for (auto *I : StartDC->using_directives()) {
     NamespaceDecl *ND = I->getNominatedNamespace()->getOriginalNamespace();
-    if (Visited.insert(ND).second && S.isVisible(I))
+    if (S.isVisible(I) && Visited.insert(ND).second)
       Queue.push_back(ND);
   }
 
@@ -1931,7 +1935,7 @@
 
     for (auto I : ND->using_directives()) {
       NamespaceDecl *Nom = I->getNominatedNamespace();
-      if (Visited.insert(Nom).second && S.isVisible(I))
+      if (S.isVisible(I) && Visited.insert(Nom).second)
         Queue.push_back(Nom);
     }
   }
@@ -3328,6 +3332,23 @@
     //        lookup (11.4).
     DeclContext::lookup_result R = NS->lookup(Name);
     for (auto *D : R) {
+      auto *Underlying = D;
+      if (auto *USD = dyn_cast<UsingShadowDecl>(D))
+        Underlying = USD->getTargetDecl();
+
+      if (!isa<FunctionDecl>(Underlying) &&
+          !isa<FunctionTemplateDecl>(Underlying))
+        continue;
+
+      if (!isVisible(D)) {
+        D = findAcceptableDecl(
+            *this, D, (Decl::IDNS_Ordinary | Decl::IDNS_OrdinaryFriend));
+        if (!D)
+          continue;
+        if (auto *USD = dyn_cast<UsingShadowDecl>(D))
+          Underlying = USD->getTargetDecl();
+      }
+
       // If the only declaration here is an ordinary friend, consider
       // it only if it was declared in an associated classes.
       if ((D->getIdentifierNamespace() & Decl::IDNS_Ordinary) == 0) {
@@ -3349,22 +3370,6 @@
           continue;
       }
 
-      auto *Underlying = D;
-      if (auto *USD = dyn_cast<UsingShadowDecl>(D))
-        Underlying = USD->getTargetDecl();
-
-      if (!isa<FunctionDecl>(Underlying) &&
-          !isa<FunctionTemplateDecl>(Underlying))
-        continue;
-
-      if (!isVisible(D)) {
-        D = findAcceptableDecl(*this, D);
-        if (!D)
-          continue;
-        if (auto *USD = dyn_cast<UsingShadowDecl>(D))
-          Underlying = USD->getTargetDecl();
-      }
-
       // FIXME: Preserve D as the FoundDecl.
       Result.insert(Underlying);
     }
@@ -3493,7 +3498,8 @@
                                bool InBaseClass,
                                VisibleDeclConsumer &Consumer,
                                VisibleDeclsRecord &Visited,
-                               bool IncludeDependentBases = false) {
+                               bool IncludeDependentBases,
+                               bool LoadExternal) {
   if (!Ctx)
     return;
 
@@ -3501,6 +3507,8 @@
   if (Visited.visitedContext(Ctx->getPrimaryContext()))
     return;
 
+  Consumer.EnteredContext(Ctx);
+
   // Outside C++, lookup results for the TU live on identifiers.
   if (isa<TranslationUnitDecl>(Ctx) &&
       !Result.getSema().getLangOpts().CPlusPlus) {
@@ -3508,11 +3516,12 @@
     auto &Idents = S.Context.Idents;
 
     // Ensure all external identifiers are in the identifier table.
-    if (IdentifierInfoLookup *External = Idents.getExternalIdentifierLookup()) {
-      std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
-      for (StringRef Name = Iter->Next(); !Name.empty(); Name = Iter->Next())
-        Idents.get(Name);
-    }
+    if (LoadExternal)
+      if (IdentifierInfoLookup *External = Idents.getExternalIdentifierLookup()) {
+        std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
+        for (StringRef Name = Iter->Next(); !Name.empty(); Name = Iter->Next())
+          Idents.get(Name);
+      }
 
     // Walk all lookup results in the TU for each identifier.
     for (const auto &Ident : Idents) {
@@ -3534,8 +3543,13 @@
   if (CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(Ctx))
     Result.getSema().ForceDeclarationOfImplicitMembers(Class);
 
+  // We sometimes skip loading namespace-level results (they tend to be huge).
+  bool Load = LoadExternal ||
+              !(isa<TranslationUnitDecl>(Ctx) || isa<NamespaceDecl>(Ctx));
   // Enumerate all of the results in this context.
-  for (DeclContextLookupResult R : Ctx->lookups()) {
+  for (DeclContextLookupResult R :
+       Load ? Ctx->lookups()
+            : Ctx->noload_lookups(/*PreserveInternalState=*/false)) {
     for (auto *D : R) {
       if (auto *ND = Result.getAcceptableDecl(D)) {
         Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
@@ -3552,7 +3566,7 @@
         continue;
       LookupVisibleDecls(I->getNominatedNamespace(), Result,
                          QualifiedNameLookup, InBaseClass, Consumer, Visited,
-                         IncludeDependentBases);
+                         IncludeDependentBases, LoadExternal);
     }
   }
 
@@ -3609,7 +3623,7 @@
       // Find results in this base class (and its bases).
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(RD, Result, QualifiedNameLookup, true, Consumer,
-                         Visited, IncludeDependentBases);
+                         Visited, IncludeDependentBases, LoadExternal);
     }
   }
 
@@ -3618,22 +3632,23 @@
     // Traverse categories.
     for (auto *Cat : IFace->visible_categories()) {
       ShadowContextRAII Shadow(Visited);
-      LookupVisibleDecls(Cat, Result, QualifiedNameLookup, false,
-                         Consumer, Visited);
+      LookupVisibleDecls(Cat, Result, QualifiedNameLookup, false, Consumer,
+                         Visited, IncludeDependentBases, LoadExternal);
     }
 
     // Traverse protocols.
     for (auto *I : IFace->all_referenced_protocols()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer,
-                         Visited);
+                         Visited, IncludeDependentBases, LoadExternal);
     }
 
     // Traverse the superclass.
     if (IFace->getSuperClass()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(IFace->getSuperClass(), Result, QualifiedNameLookup,
-                         true, Consumer, Visited);
+                         true, Consumer, Visited, IncludeDependentBases,
+                         LoadExternal);
     }
 
     // If there is an implementation, traverse it. We do this to find
@@ -3641,26 +3656,28 @@
     if (IFace->getImplementation()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(IFace->getImplementation(), Result,
-                         QualifiedNameLookup, InBaseClass, Consumer, Visited);
+                         QualifiedNameLookup, InBaseClass, Consumer, Visited,
+                         IncludeDependentBases, LoadExternal);
     }
   } else if (ObjCProtocolDecl *Protocol = dyn_cast<ObjCProtocolDecl>(Ctx)) {
     for (auto *I : Protocol->protocols()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer,
-                         Visited);
+                         Visited, IncludeDependentBases, LoadExternal);
     }
   } else if (ObjCCategoryDecl *Category = dyn_cast<ObjCCategoryDecl>(Ctx)) {
     for (auto *I : Category->protocols()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer,
-                         Visited);
+                         Visited, IncludeDependentBases, LoadExternal);
     }
 
     // If there is an implementation, traverse it.
     if (Category->getImplementation()) {
       ShadowContextRAII Shadow(Visited);
       LookupVisibleDecls(Category->getImplementation(), Result,
-                         QualifiedNameLookup, true, Consumer, Visited);
+                         QualifiedNameLookup, true, Consumer, Visited,
+                         IncludeDependentBases, LoadExternal);
     }
   }
 }
@@ -3668,7 +3685,8 @@
 static void LookupVisibleDecls(Scope *S, LookupResult &Result,
                                UnqualUsingDirectiveSet &UDirs,
                                VisibleDeclConsumer &Consumer,
-                               VisibleDeclsRecord &Visited) {
+                               VisibleDeclsRecord &Visited,
+                               bool LoadExternal) {
   if (!S)
     return;
 
@@ -3707,7 +3725,8 @@
                                   Result.getNameLoc(), Sema::LookupMemberName);
           if (ObjCInterfaceDecl *IFace = Method->getClassInterface()) {
             LookupVisibleDecls(IFace, IvarResult, /*QualifiedNameLookup=*/false,
-                               /*InBaseClass=*/false, Consumer, Visited);
+                               /*InBaseClass=*/false, Consumer, Visited,
+                               /*IncludeDependentBases=*/false, LoadExternal);
           }
         }
 
@@ -3721,7 +3740,8 @@
         continue;
 
       LookupVisibleDecls(Ctx, Result, /*QualifiedNameLookup=*/false,
-                         /*InBaseClass=*/false, Consumer, Visited);
+                         /*InBaseClass=*/false, Consumer, Visited,
+                         /*IncludeDependentBases=*/false, LoadExternal);
     }
   } else if (!S->getParent()) {
     // Look into the translation unit scope. We walk through the translation
@@ -3735,7 +3755,8 @@
     // in DeclContexts unless we have to" optimization), we can eliminate this.
     Entity = Result.getSema().Context.getTranslationUnitDecl();
     LookupVisibleDecls(Entity, Result, /*QualifiedNameLookup=*/false,
-                       /*InBaseClass=*/false, Consumer, Visited);
+                       /*InBaseClass=*/false, Consumer, Visited,
+                       /*IncludeDependentBases=*/false, LoadExternal);
   }
 
   if (Entity) {
@@ -3744,17 +3765,19 @@
     for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(Entity))
       LookupVisibleDecls(const_cast<DeclContext *>(UUE.getNominatedNamespace()),
                          Result, /*QualifiedNameLookup=*/false,
-                         /*InBaseClass=*/false, Consumer, Visited);
+                         /*InBaseClass=*/false, Consumer, Visited,
+                         /*IncludeDependentBases=*/false, LoadExternal);
   }
 
   // Lookup names in the parent scope.
   ShadowContextRAII Shadow(Visited);
-  LookupVisibleDecls(S->getParent(), Result, UDirs, Consumer, Visited);
+  LookupVisibleDecls(S->getParent(), Result, UDirs, Consumer, Visited,
+                     LoadExternal);
 }
 
 void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind,
                               VisibleDeclConsumer &Consumer,
-                              bool IncludeGlobalScope) {
+                              bool IncludeGlobalScope, bool LoadExternal) {
   // Determine the set of using directives available during
   // unqualified name lookup.
   Scope *Initial = S;
@@ -3775,13 +3798,13 @@
   if (!IncludeGlobalScope)
     Visited.visitedContext(Context.getTranslationUnitDecl());
   ShadowContextRAII Shadow(Visited);
-  ::LookupVisibleDecls(Initial, Result, UDirs, Consumer, Visited);
+  ::LookupVisibleDecls(Initial, Result, UDirs, Consumer, Visited, LoadExternal);
 }
 
 void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
                               VisibleDeclConsumer &Consumer,
                               bool IncludeGlobalScope,
-                              bool IncludeDependentBases) {
+                              bool IncludeDependentBases, bool LoadExternal) {
   LookupResult Result(*this, DeclarationName(), SourceLocation(), Kind);
   Result.setAllowHidden(Consumer.includeHiddenDecls());
   VisibleDeclsRecord Visited;
@@ -3790,7 +3813,7 @@
   ShadowContextRAII Shadow(Visited);
   ::LookupVisibleDecls(Ctx, Result, /*QualifiedNameLookup=*/true,
                        /*InBaseClass=*/false, Consumer, Visited,
-                       IncludeDependentBases);
+                       IncludeDependentBases, LoadExternal);
 }
 
 /// LookupOrCreateLabel - Do a name lookup of a label with the specified name.
@@ -3864,17 +3887,13 @@
   bool AnyVisibleDecls = !NewDecls.empty();
 
   for (/**/; DI != DE; ++DI) {
-    NamedDecl *VisibleDecl = *DI;
-    if (!LookupResult::isVisible(SemaRef, *DI))
-      VisibleDecl = findAcceptableDecl(SemaRef, *DI);
-
-    if (VisibleDecl) {
+    if (LookupResult::isVisible(SemaRef, *DI)) {
       if (!AnyVisibleDecls) {
         // Found a visible decl, discard all hidden ones.
         AnyVisibleDecls = true;
         NewDecls.clear();
       }
-      NewDecls.push_back(VisibleDecl);
+      NewDecls.push_back(*DI);
     } else if (!AnyVisibleDecls && !(*DI)->isModulePrivate())
       NewDecls.push_back(*DI);
   }
@@ -3944,8 +3963,7 @@
 
   // Only consider visible declarations and declarations from modules with
   // names that exactly match.
-  if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo &&
-      !findAcceptableDecl(SemaRef, ND))
+  if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo)
     return;
 
   FoundName(Name->getName());
diff --git a/lib/Sema/SemaObjCProperty.cpp b/lib/Sema/SemaObjCProperty.cpp
index 9c61d45..3b8ad3d 100644
--- a/lib/Sema/SemaObjCProperty.cpp
+++ b/lib/Sema/SemaObjCProperty.cpp
@@ -618,7 +618,7 @@
     TInfo = Context.getTrivialTypeSourceInfo(T, TLoc);
   }
 
-  DeclContext *DC = cast<DeclContext>(CDecl);
+  DeclContext *DC = CDecl;
   ObjCPropertyDecl *PDecl = ObjCPropertyDecl::Create(Context, DC,
                                                      FD.D.getIdentifierLoc(),
                                                      PropertyId, AtLoc, 
@@ -1290,6 +1290,14 @@
         // An abstract type is as bad as an incomplete type.
         CompleteTypeErr = true;
       }
+      if (!CompleteTypeErr) {
+        const RecordType *RecordTy = PropertyIvarType->getAs<RecordType>();
+        if (RecordTy && RecordTy->getDecl()->hasFlexibleArrayMember()) {
+          Diag(PropertyIvarLoc, diag::err_synthesize_variable_sized_ivar)
+            << PropertyIvarType;
+          CompleteTypeErr = true; // suppress later diagnostics about the ivar
+        }
+      }
       if (CompleteTypeErr)
         Ivar->setInvalidDecl();
       ClassImpDecl->addDecl(Ivar);
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index a024888..be51787 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -38,7 +38,7 @@
 static Expr *CheckMapClauseExpressionBase(
     Sema &SemaRef, Expr *E,
     OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents,
-    OpenMPClauseKind CKind);
+    OpenMPClauseKind CKind, bool NoDiagnose);
 
 namespace {
 /// \brief Default data sharing attributes, which can be applied to directive.
@@ -178,6 +178,10 @@
   explicit DSAStackTy(Sema &S) : SemaRef(S) {}
 
   bool isClauseParsingMode() const { return ClauseKindMode != OMPC_unknown; }
+  OpenMPClauseKind getClauseParsingMode() const {
+    assert(isClauseParsingMode() && "Must be in clause parsing mode.");
+    return ClauseKindMode;
+  }
   void setClauseParsingMode(OpenMPClauseKind K) { ClauseKindMode = K; }
 
   bool isForceVarCapturing() const { return ForceCapturing; }
@@ -330,6 +334,11 @@
   OpenMPDirectiveKind getCurrentDirective() const {
     return isStackEmpty() ? OMPD_unknown : Stack.back().first.back().Directive;
   }
+  /// \brief Returns directive kind at specified level.
+  OpenMPDirectiveKind getDirective(unsigned Level) const {
+    assert(!isStackEmpty() && "No directive at specified level.");
+    return Stack.back().first[Level].Directive;
+  }
   /// \brief Returns parent directive.
   OpenMPDirectiveKind getParentDirective() const {
     if (isStackEmpty() || Stack.back().first.size() == 1)
@@ -835,10 +844,10 @@
   Expr *&TaskgroupReductionRef =
       Stack.back().first.back().TaskgroupReductionRef;
   if (!TaskgroupReductionRef) {
-    auto *VD = buildVarDecl(SemaRef, SourceLocation(),
+    auto *VD = buildVarDecl(SemaRef, SR.getBegin(),
                             SemaRef.Context.VoidPtrTy, ".task_red.");
-    TaskgroupReductionRef = buildDeclRefExpr(
-        SemaRef, VD, SemaRef.Context.VoidPtrTy, SourceLocation());
+    TaskgroupReductionRef =
+        buildDeclRefExpr(SemaRef, VD, SemaRef.Context.VoidPtrTy, SR.getBegin());
   }
 }
 
@@ -858,10 +867,10 @@
   Expr *&TaskgroupReductionRef =
       Stack.back().first.back().TaskgroupReductionRef;
   if (!TaskgroupReductionRef) {
-    auto *VD = buildVarDecl(SemaRef, SourceLocation(),
-                            SemaRef.Context.VoidPtrTy, ".task_red.");
-    TaskgroupReductionRef = buildDeclRefExpr(
-        SemaRef, VD, SemaRef.Context.VoidPtrTy, SourceLocation());
+    auto *VD = buildVarDecl(SemaRef, SR.getBegin(), SemaRef.Context.VoidPtrTy,
+                            ".task_red.");
+    TaskgroupReductionRef =
+        buildDeclRefExpr(SemaRef, VD, SemaRef.Context.VoidPtrTy, SR.getBegin());
   }
 }
 
@@ -927,10 +936,11 @@
 
 bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) {
   D = D->getCanonicalDecl();
-  if (!isStackEmpty() && Stack.back().first.size() > 1) {
+  if (!isStackEmpty()) {
     reverse_iterator I = Iter, E = Stack.back().first.rend();
     Scope *TopScope = nullptr;
-    while (I != E && !isParallelOrTaskRegion(I->Directive))
+    while (I != E && !isParallelOrTaskRegion(I->Directive) &&
+           !isOpenMPTargetExecutionDirective(I->Directive))
       ++I;
     if (I == E)
       return false;
@@ -947,26 +957,75 @@
   D = getCanonicalDecl(D);
   DSAVarData DVar;
 
+  auto *VD = dyn_cast<VarDecl>(D);
+  auto TI = Threadprivates.find(D);
+  if (TI != Threadprivates.end()) {
+    DVar.RefExpr = TI->getSecond().RefExpr.getPointer();
+    DVar.CKind = OMPC_threadprivate;
+    return DVar;
+  } else if (VD && VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+    DVar.RefExpr = buildDeclRefExpr(
+        SemaRef, VD, D->getType().getNonReferenceType(),
+        VD->getAttr<OMPThreadPrivateDeclAttr>()->getLocation());
+    DVar.CKind = OMPC_threadprivate;
+    addDSA(D, DVar.RefExpr, OMPC_threadprivate);
+    return DVar;
+  }
   // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
   // in a Construct, C/C++, predetermined, p.1]
   //  Variables appearing in threadprivate directives are threadprivate.
-  auto *VD = dyn_cast<VarDecl>(D);
   if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
        !(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
          SemaRef.getLangOpts().OpenMPUseTLS &&
          SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
       (VD && VD->getStorageClass() == SC_Register &&
        VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
-    addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
-                               D->getLocation()),
-           OMPC_threadprivate);
-  }
-  auto TI = Threadprivates.find(D);
-  if (TI != Threadprivates.end()) {
-    DVar.RefExpr = TI->getSecond().RefExpr.getPointer();
+    DVar.RefExpr = buildDeclRefExpr(
+        SemaRef, VD, D->getType().getNonReferenceType(), D->getLocation());
     DVar.CKind = OMPC_threadprivate;
+    addDSA(D, DVar.RefExpr, OMPC_threadprivate);
     return DVar;
   }
+  if (SemaRef.getLangOpts().OpenMPCUDAMode && VD &&
+      VD->isLocalVarDeclOrParm() && !isStackEmpty() &&
+      !isLoopControlVariable(D).first) {
+    auto IterTarget =
+        std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(),
+                     [](const SharingMapTy &Data) {
+                       return isOpenMPTargetExecutionDirective(Data.Directive);
+                     });
+    if (IterTarget != Stack.back().first.rend()) {
+      auto ParentIterTarget = std::next(IterTarget, 1);
+      auto Iter = Stack.back().first.rbegin();
+      while (Iter != ParentIterTarget) {
+        if (isOpenMPLocal(VD, Iter)) {
+          DVar.RefExpr =
+              buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
+                               D->getLocation());
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        }
+        std::advance(Iter, 1);
+      }
+      if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) {
+        auto DSAIter = IterTarget->SharingMap.find(D);
+        if (DSAIter != IterTarget->SharingMap.end() &&
+            isOpenMPPrivate(DSAIter->getSecond().Attributes)) {
+          DVar.RefExpr = DSAIter->getSecond().RefExpr.getPointer();
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        } else if (!SemaRef.IsOpenMPCapturedByRef(
+                       D, std::distance(ParentIterTarget,
+                                        Stack.back().first.rend()))) {
+          DVar.RefExpr =
+              buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
+                               IterTarget->ConstructLoc);
+          DVar.CKind = OMPC_threadprivate;
+          return DVar;
+        }
+      }
+    }
+  }
 
   if (isStackEmpty())
     // Not in OpenMP execution region and top scope was already checked.
@@ -1010,7 +1069,7 @@
         D, [](OpenMPClauseKind C) -> bool { return C == OMPC_firstprivate; },
         MatchesAlways, FromParent);
     if (DVarTemp.CKind == OMPC_firstprivate && DVarTemp.RefExpr)
-      return DVar;
+      return DVarTemp;
 
     DVar.CKind = OMPC_shared;
     return DVar;
@@ -1091,8 +1150,6 @@
 bool DSAStackTy::hasExplicitDSA(
     ValueDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
     unsigned Level, bool NotLastprivate) {
-  if (CPred(ClauseKindMode))
-    return true;
   if (isStackEmpty())
     return false;
   D = getCanonicalDecl(D);
@@ -1266,16 +1323,26 @@
       // reference except if it is a pointer that is dereferenced somehow.
       IsByRef = !(Ty->isPointerType() && IsVariableAssociatedWithSection);
     } else {
-      // By default, all the data that has a scalar type is mapped by copy.
-      IsByRef = !Ty->isScalarType() ||
-                DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar;
+      // By default, all the data that has a scalar type is mapped by copy
+      // (except for reduction variables).
+      IsByRef =
+          !Ty->isScalarType() ||
+          DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar ||
+          DSAStack->hasExplicitDSA(
+              D, [](OpenMPClauseKind K) { return K == OMPC_reduction; }, Level);
     }
   }
 
   if (IsByRef && Ty.getNonReferenceType()->isScalarType()) {
-    IsByRef = !DSAStack->hasExplicitDSA(
-        D, [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
-        Level, /*NotLastprivate=*/true);
+    IsByRef =
+        !DSAStack->hasExplicitDSA(
+            D,
+            [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
+            Level, /*NotLastprivate=*/true) &&
+        // If the variable is artificial and must be captured by value - try to
+        // capture by value.
+        !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() &&
+          !cast<OMPCapturedExprDecl>(D)->getInit()->isGLValue());
   }
 
   // When passing data by copy, we need to make sure it fits the uintptr size
@@ -1297,6 +1364,17 @@
   return DSAStack->getNestingLevel();
 }
 
+bool Sema::isInOpenMPTargetExecutionDirective() const {
+  return (isOpenMPTargetExecutionDirective(DSAStack->getCurrentDirective()) &&
+          !DSAStack->isClauseParsingMode()) ||
+         DSAStack->hasDirective(
+             [](OpenMPDirectiveKind K, const DeclarationNameInfo &,
+                SourceLocation) -> bool {
+               return isOpenMPTargetExecutionDirective(K);
+             },
+             false);
+}
+
 VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
@@ -1309,18 +1387,8 @@
   // inserted here once support for 'declare target' is added.
   //
   auto *VD = dyn_cast<VarDecl>(D);
-  if (VD && !VD->hasLocalStorage()) {
-    if (isOpenMPTargetExecutionDirective(DSAStack->getCurrentDirective()) &&
-        !DSAStack->isClauseParsingMode())
-      return VD;
-    if (DSAStack->hasDirective(
-            [](OpenMPDirectiveKind K, const DeclarationNameInfo &,
-               SourceLocation) -> bool {
-              return isOpenMPTargetExecutionDirective(K);
-            },
-            false))
-      return VD;
-  }
+  if (VD && !VD->hasLocalStorage() && isInOpenMPTargetExecutionDirective())
+    return VD;
 
   if (DSAStack->getCurrentDirective() != OMPD_unknown &&
       (!DSAStack->isClauseParsingMode() ||
@@ -1343,11 +1411,20 @@
   return nullptr;
 }
 
+void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
+                                        unsigned Level) const {
+  SmallVector<OpenMPDirectiveKind, 4> Regions;
+  getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level));
+  FunctionScopesIndex -= Regions.size();
+}
+
 bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   return DSAStack->hasExplicitDSA(
              D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; },
              Level) ||
+         (DSAStack->isClauseParsingMode() &&
+          DSAStack->getClauseParsingMode() == OMPC_private) ||
          // Consider taskgroup reduction descriptor variable a private to avoid
          // possible capture in the region.
          (DSAStack->hasExplicitDirective(
@@ -1499,7 +1576,7 @@
   explicit VarOrFuncDeclFilterCCC(Sema &S) : SemaRef(S) {}
   bool ValidateCandidate(const TypoCorrection &Candidate) override {
     NamedDecl *ND = Candidate.getCorrectionDecl();
-    if (isa<VarDecl>(ND) || isa<FunctionDecl>(ND)) {
+    if (ND && (isa<VarDecl>(ND) || isa<FunctionDecl>(ND))) {
       return SemaRef.isDeclInScope(ND, SemaRef.getCurLexicalContext(),
                                    SemaRef.getCurScope());
     }
@@ -1558,7 +1635,7 @@
   }
 
   VarDecl *CanonicalVD = VD->getCanonicalDecl();
-  NamedDecl *ND = cast<NamedDecl>(CanonicalVD);
+  NamedDecl *ND = CanonicalVD;
   // OpenMP [2.9.2, Restrictions, C/C++, p.2]
   //   A threadprivate directive for file-scope variables must appear outside
   //   any definition or declaration.
@@ -1925,10 +2002,10 @@
         E->containsUnexpandedParameterPack() || E->isInstantiationDependent())
       return;
     auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl());
-    if (!FD)
-      return;
     OpenMPDirectiveKind DKind = Stack->getCurrentDirective();
     if (isa<CXXThisExpr>(E->getBase()->IgnoreParens())) {
+      if (!FD)
+        return;
       auto DVar = Stack->getTopDSA(FD, false);
       // Check if the variable has explicit DSA set and stop analysis if it
       // so.
@@ -1951,12 +2028,8 @@
         // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C/C++, p.3]
         //  A bit-field cannot appear in a map clause.
         //
-        if (FD->isBitField()) {
-          SemaRef.Diag(E->getMemberLoc(),
-                       diag::err_omp_bit_fields_forbidden_in_clause)
-              << E->getSourceRange() << getOpenMPClauseName(OMPC_map);
+        if (FD->isBitField())
           return;
-        }
         ImplicitMap.emplace_back(E);
         return;
       }
@@ -1987,9 +2060,11 @@
         ImplicitFirstprivate.push_back(E);
       return;
     }
-    if (isOpenMPTargetExecutionDirective(DKind) && !FD->isBitField()) {
+    if (isOpenMPTargetExecutionDirective(DKind)) {
       OMPClauseMappableExprCommon::MappableExprComponentList CurComponents;
-      CheckMapClauseExpressionBase(SemaRef, E, CurComponents, OMPC_map);
+      if (!CheckMapClauseExpressionBase(SemaRef, E, CurComponents, OMPC_map,
+                                        /*NoDiagnose=*/true))
+        return;
       auto *VD = cast<ValueDecl>(
           CurComponents.back().getAssociatedDeclaration()->getCanonicalDecl());
       if (!Stack->checkMappableExprComponentListsForDecl(
@@ -2070,7 +2145,8 @@
   case OMPD_parallel_for_simd:
   case OMPD_parallel_sections:
   case OMPD_teams:
-  case OMPD_teams_distribute: {
+  case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -2084,14 +2160,38 @@
     break;
   }
   case OMPD_target_teams:
-  case OMPD_target_parallel: {
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+  case OMPD_target_teams_distribute:
+  case OMPD_target_teams_distribute_simd: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
     Sema::CapturedParamNameType ParamsTarget[] = {
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     // Start a captured region for 'target' with no implicit parameters.
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              ParamsTarget);
-    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
@@ -2105,6 +2205,33 @@
                              ParamsTeamsOrParallel);
     break;
   }
+  case OMPD_target:
+  case OMPD_target_simd: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             std::make_pair(StringRef(), QualType()));
+    break;
+  }
   case OMPD_simd:
   case OMPD_for:
   case OMPD_for_simd:
@@ -2115,13 +2242,10 @@
   case OMPD_critical:
   case OMPD_taskgroup:
   case OMPD_distribute:
+  case OMPD_distribute_simd:
   case OMPD_ordered:
   case OMPD_atomic:
-  case OMPD_target_data:
-  case OMPD_target:
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_simd: {
+  case OMPD_target_data: {
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
@@ -2191,15 +2315,7 @@
     break;
   }
   case OMPD_distribute_parallel_for_simd:
-  case OMPD_distribute_simd:
-  case OMPD_distribute_parallel_for:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-  case OMPD_target_teams_distribute_simd: {
+  case OMPD_distribute_parallel_for: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -2214,6 +2330,116 @@
                              Params);
     break;
   }
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
+
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
+    Sema::CapturedParamNameType ParamsTarget[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    // Start a captured region for 'target' with no implicit parameters.
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             ParamsTarget);
+
+    Sema::CapturedParamNameType ParamsTeams[] = {
+        std::make_pair(".global_tid.", KmpInt32PtrTy),
+        std::make_pair(".bound_tid.", KmpInt32PtrTy),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    // Start a captured region for 'target' with no implicit parameters.
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             ParamsTeams);
+
+    Sema::CapturedParamNameType ParamsParallel[] = {
+        std::make_pair(".global_tid.", KmpInt32PtrTy),
+        std::make_pair(".bound_tid.", KmpInt32PtrTy),
+        std::make_pair(".previous.lb.", Context.getSizeType()),
+        std::make_pair(".previous.ub.", Context.getSizeType()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    // Start a captured region for 'teams' or 'parallel'.  Both regions have
+    // the same implicit parameters.
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             ParamsParallel);
+    break;
+  }
+
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
+
+    Sema::CapturedParamNameType ParamsTeams[] = {
+        std::make_pair(".global_tid.", KmpInt32PtrTy),
+        std::make_pair(".bound_tid.", KmpInt32PtrTy),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    // Start a captured region for 'target' with no implicit parameters.
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             ParamsTeams);
+
+    Sema::CapturedParamNameType ParamsParallel[] = {
+        std::make_pair(".global_tid.", KmpInt32PtrTy),
+        std::make_pair(".bound_tid.", KmpInt32PtrTy),
+        std::make_pair(".previous.lb.", Context.getSizeType()),
+        std::make_pair(".previous.ub.", Context.getSizeType()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    // Start a captured region for 'teams' or 'parallel'.  Both regions have
+    // the same implicit parameters.
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             ParamsParallel);
+    break;
+  }
+  case OMPD_target_update:
+  case OMPD_target_enter_data:
+  case OMPD_target_exit_data: {
+    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
+    break;
+  }
   case OMPD_threadprivate:
   case OMPD_taskyield:
   case OMPD_barrier:
@@ -2221,13 +2447,10 @@
   case OMPD_cancellation_point:
   case OMPD_cancel:
   case OMPD_flush:
-  case OMPD_target_enter_data:
-  case OMPD_target_exit_data:
   case OMPD_declare_reduction:
   case OMPD_declare_simd:
   case OMPD_declare_target:
   case OMPD_end_declare_target:
-  case OMPD_target_update:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
     llvm_unreachable("Unknown OpenMP directive");
@@ -2248,9 +2471,9 @@
   Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts();
   QualType Ty = Init->getType();
   if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) {
-    if (S.getLangOpts().CPlusPlus)
+    if (S.getLangOpts().CPlusPlus) {
       Ty = C.getLValueReferenceType(Ty);
-    else {
+    } else {
       Ty = C.getPointerType(Ty);
       ExprResult Res =
           S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_AddrOf, Init);
@@ -2272,31 +2495,34 @@
 static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
                                  bool WithInit) {
   OMPCapturedExprDecl *CD;
-  if (auto *VD = S.IsOpenMPCapturedDecl(D))
+  if (auto *VD = S.IsOpenMPCapturedDecl(D)) {
     CD = cast<OMPCapturedExprDecl>(VD);
-  else
+  } else {
     CD = buildCaptureDecl(S, D->getIdentifier(), CaptureExpr, WithInit,
                           /*AsExpression=*/false);
+  }
   return buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
                           CaptureExpr->getExprLoc());
 }
 
 static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) {
+  CaptureExpr = S.DefaultLvalueConversion(CaptureExpr).get();
   if (!Ref) {
-    auto *CD =
-        buildCaptureDecl(S, &S.getASTContext().Idents.get(".capture_expr."),
-                         CaptureExpr, /*WithInit=*/true, /*AsExpression=*/true);
+    OMPCapturedExprDecl *CD = buildCaptureDecl(
+        S, &S.getASTContext().Idents.get(".capture_expr."), CaptureExpr,
+        /*WithInit=*/true, /*AsExpression=*/true);
     Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
                            CaptureExpr->getExprLoc());
   }
   ExprResult Res = Ref;
   if (!S.getLangOpts().CPlusPlus &&
       CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue() &&
-      Ref->getType()->isPointerType())
+      Ref->getType()->isPointerType()) {
     Res = S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_Deref, Ref);
-  if (!Res.isUsable())
-    return ExprError();
-  return CaptureExpr->isGLValue() ? Res : S.DefaultLvalueConversion(Res.get());
+    if (!Res.isUsable())
+      return ExprError();
+  }
+  return S.DefaultLvalueConversion(Res.get());
 }
 
 namespace {
@@ -2338,6 +2564,8 @@
     return StmtError();
   }
 
+  SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+  getOpenMPCaptureRegions(CaptureRegions, DSAStack->getCurrentDirective());
   OMPOrderedClause *OC = nullptr;
   OMPScheduleClause *SC = nullptr;
   SmallVector<OMPLinearClause *, 4> LCs;
@@ -2366,7 +2594,8 @@
         }
       }
       DSAStack->setForceVarCapturing(/*V=*/false);
-    } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
+    } else if (CaptureRegions.size() > 1 ||
+               CaptureRegions.back() != OMPD_unknown) {
       if (auto *C = OMPClauseWithPreInit::get(Clause))
         PICs.push_back(C);
       if (auto *C = OMPClauseWithPostUpdate::get(Clause)) {
@@ -2414,13 +2643,11 @@
     return StmtError();
   }
   StmtResult SR = S;
-  SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
-  getOpenMPCaptureRegions(CaptureRegions, DSAStack->getCurrentDirective());
-  for (auto ThisCaptureRegion : llvm::reverse(CaptureRegions)) {
+  for (OpenMPDirectiveKind ThisCaptureRegion : llvm::reverse(CaptureRegions)) {
     // Mark all variables in private list clauses as used in inner region.
     // Required for proper codegen of combined directives.
     // TODO: add processing for other clauses.
-    if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
+    if (ThisCaptureRegion != OMPD_unknown) {
       for (auto *C : PICs) {
         OpenMPDirectiveKind CaptureRegion = C->getCaptureRegion();
         // Find the particular capture region for the clause if the
@@ -2535,7 +2762,10 @@
               ParentRegion == OMPD_target_parallel)) ||
             (CancelRegion == OMPD_for &&
              (ParentRegion == OMPD_for || ParentRegion == OMPD_parallel_for ||
-              ParentRegion == OMPD_target_parallel_for)) ||
+              ParentRegion == OMPD_target_parallel_for ||
+              ParentRegion == OMPD_distribute_parallel_for ||
+              ParentRegion == OMPD_teams_distribute_parallel_for ||
+              ParentRegion == OMPD_target_teams_distribute_parallel_for)) ||
             (CancelRegion == OMPD_taskgroup && ParentRegion == OMPD_task) ||
             (CancelRegion == OMPD_sections &&
              (ParentRegion == OMPD_section || ParentRegion == OMPD_sections ||
@@ -2614,7 +2844,6 @@
       NestingProhibited = ParentRegion != OMPD_target;
       OrphanSeen = ParentRegion == OMPD_unknown;
       Recommend = ShouldBeInTargetRegion;
-      Stack->setParentTeamsRegionLoc(Stack->getConstructLoc());
     }
     if (!NestingProhibited &&
         !isOpenMPTargetExecutionDirective(CurrentRegion) &&
@@ -2960,12 +3189,12 @@
     break;
   case OMPD_target_enter_data:
     Res = ActOnOpenMPTargetEnterDataDirective(ClausesWithImplicit, StartLoc,
-                                              EndLoc);
+                                              EndLoc, AStmt);
     AllowedNameModifiers.push_back(OMPD_target_enter_data);
     break;
   case OMPD_target_exit_data:
     Res = ActOnOpenMPTargetExitDataDirective(ClausesWithImplicit, StartLoc,
-                                             EndLoc);
+                                             EndLoc, AStmt);
     AllowedNameModifiers.push_back(OMPD_target_exit_data);
     break;
   case OMPD_taskloop:
@@ -2983,9 +3212,8 @@
                                          EndLoc, VarsWithInheritedDSA);
     break;
   case OMPD_target_update:
-    assert(!AStmt && "Statement is not allowed for target update");
-    Res =
-        ActOnOpenMPTargetUpdateDirective(ClausesWithImplicit, StartLoc, EndLoc);
+    Res = ActOnOpenMPTargetUpdateDirective(ClausesWithImplicit, StartLoc,
+                                           EndLoc, AStmt);
     AllowedNameModifiers.push_back(OMPD_target_update);
     break;
   case OMPD_distribute_parallel_for:
@@ -4322,7 +4550,7 @@
 
 /// Build preinits statement for the given declarations.
 static Stmt *buildPreInits(ASTContext &Context,
-                           SmallVectorImpl<Decl *> &PreInits) {
+                           MutableArrayRef<Decl *> PreInits) {
   if (!PreInits.empty()) {
     return new (Context) DeclStmt(
         DeclGroupRef::Create(Context, PreInits.begin(), PreInits.size()),
@@ -4332,8 +4560,9 @@
 }
 
 /// Build preinits statement for the given declarations.
-static Stmt *buildPreInits(ASTContext &Context,
-                           llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
+static Stmt *
+buildPreInits(ASTContext &Context,
+              const llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
   if (!Captures.empty()) {
     SmallVector<Decl *, 16> PreInits;
     for (auto &Pair : Captures)
@@ -4678,7 +4907,7 @@
   }
 
   // Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops.
-  SourceLocation CondLoc;
+  SourceLocation CondLoc = AStmt->getLocStart();
   ExprResult Cond =
       (isOpenMPWorksharingDirective(DKind) ||
        isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind))
@@ -4691,7 +4920,7 @@
         SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), CombUB.get());
   }
   // Loop increment (IV = IV + 1)
-  SourceLocation IncLoc;
+  SourceLocation IncLoc = AStmt->getLocStart();
   ExprResult Inc =
       SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, IV.get(),
                          SemaRef.ActOnIntegerConstant(IncLoc, 1).get());
@@ -4758,7 +4987,7 @@
   // directive with for as IV = IV + ST; ensure upper bound expression based
   // on PrevUB instead of NumIterations - used to implement 'for' when found
   // in combination with 'distribute', like in 'distribute parallel for'
-  SourceLocation DistIncLoc;
+  SourceLocation DistIncLoc = AStmt->getLocStart();
   ExprResult DistCond, DistInc, PrevEUB;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
@@ -4774,7 +5003,7 @@
 
     // Build expression: UB = min(UB, prevUB) for #for in composite or combined
     // construct
-    SourceLocation DistEUBLoc;
+    SourceLocation DistEUBLoc = AStmt->getLocStart();
     ExprResult IsUBGreater =
         SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, UB.get(), PrevUB.get());
     ExprResult CondOp = SemaRef.ActOnConditionalOp(
@@ -6234,13 +6463,23 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   // OpenMP [2.16, Nesting of Regions]
   // If specified, a teams construct must be contained within a target
   // construct. That target construct must contain no statements or directives
   // outside of the teams construct.
   if (DSAStack->hasInnerTeamsRegion()) {
-    auto S = AStmt->IgnoreContainers(/*IgnoreCaptured*/ true);
+    Stmt *S = CS->IgnoreContainers(/*IgnoreCaptured=*/true);
     bool OMPTeamsFound = true;
     if (auto *CS = dyn_cast<CompoundStmt>(S)) {
       auto I = CS->body_begin();
@@ -6287,6 +6526,16 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   getCurFunction()->setHasBranchProtectedScope();
 
@@ -6308,13 +6557,23 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel_for);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_target_parallel_for, getCollapseNumberExpr(Clauses),
-                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      getOrderedNumberExpr(Clauses), CS, *this, *DSAStack,
                       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6379,7 +6638,28 @@
 StmtResult
 Sema::ActOnOpenMPTargetEnterDataDirective(ArrayRef<OMPClause *> Clauses,
                                           SourceLocation StartLoc,
-                                          SourceLocation EndLoc) {
+                                          SourceLocation EndLoc, Stmt *AStmt) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_enter_data);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
   // OpenMP [2.10.2, Restrictions, p. 99]
   // At least one map clause must appear on the directive.
   if (!hasClauses(Clauses, OMPC_map)) {
@@ -6388,14 +6668,35 @@
     return StmtError();
   }
 
-  return OMPTargetEnterDataDirective::Create(Context, StartLoc, EndLoc,
-                                             Clauses);
+  return OMPTargetEnterDataDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                             AStmt);
 }
 
 StmtResult
 Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef<OMPClause *> Clauses,
                                          SourceLocation StartLoc,
-                                         SourceLocation EndLoc) {
+                                         SourceLocation EndLoc, Stmt *AStmt) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_exit_data);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
   // OpenMP [2.10.3, Restrictions, p. 102]
   // At least one map clause must appear on the directive.
   if (!hasClauses(Clauses, OMPC_map)) {
@@ -6404,17 +6705,41 @@
     return StmtError();
   }
 
-  return OMPTargetExitDataDirective::Create(Context, StartLoc, EndLoc, Clauses);
+  return OMPTargetExitDataDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                            AStmt);
 }
 
 StmtResult Sema::ActOnOpenMPTargetUpdateDirective(ArrayRef<OMPClause *> Clauses,
                                                   SourceLocation StartLoc,
-                                                  SourceLocation EndLoc) {
+                                                  SourceLocation EndLoc,
+                                                  Stmt *AStmt) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_update);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
   if (!hasClauses(Clauses, OMPC_to, OMPC_from)) {
     Diag(StartLoc, diag::err_omp_at_least_one_motion_clause_required);
     return StmtError();
   }
-  return OMPTargetUpdateDirective::Create(Context, StartLoc, EndLoc, Clauses);
+  return OMPTargetUpdateDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                          AStmt);
 }
 
 StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
@@ -6433,6 +6758,8 @@
 
   getCurFunction()->setHasBranchProtectedScope();
 
+  DSAStack->setParentTeamsRegionLoc(StartLoc);
+
   return OMPTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
 }
 
@@ -6599,6 +6926,8 @@
   // clause must not be specified.
   if (checkReductionClauseWithNogroup(*this, Clauses))
     return StmtError();
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTaskLoopSimdDirective::Create(Context, StartLoc, EndLoc,
@@ -6645,13 +6974,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_distribute_parallel_for);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
       OMPD_distribute_parallel_for, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6661,7 +7001,8 @@
 
   getCurFunction()->setHasBranchProtectedScope();
   return OMPDistributeParallelForDirective::Create(
-      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
@@ -6678,13 +7019,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_distribute_parallel_for_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
       OMPD_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6692,6 +7044,17 @@
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto *LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
   if (checkSimdlenSafelenSpecified(*this, Clauses))
     return StmtError();
 
@@ -6714,20 +7077,41 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_distribute_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_distribute_simd, getCollapseNumberExpr(Clauses),
-                      nullptr /*ordered not a clause on distribute*/, AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+                      nullptr /*ordered not a clause on distribute*/, CS, *this,
+                      *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto *LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
   if (checkSimdlenSafelenSpecified(*this, Clauses))
     return StmtError();
 
@@ -6750,13 +7134,23 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_parallel_for);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
       OMPD_target_parallel_for_simd, getCollapseNumberExpr(Clauses),
-      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+      getOrderedNumberExpr(Clauses), CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6796,13 +7190,23 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will define the
   // nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_target_simd, getCollapseNumberExpr(Clauses),
-                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      getOrderedNumberExpr(Clauses), CS, *this, *DSAStack,
                       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6843,14 +7247,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_teams_distribute);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_teams_distribute, getCollapseNumberExpr(Clauses),
-                      nullptr /*ordered not a clause on distribute*/, AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+                      nullptr /*ordered not a clause on distribute*/, CS, *this,
+                      *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6858,6 +7272,9 @@
          "omp teams distribute loop exprs were not built");
 
   getCurFunction()->setHasBranchProtectedScope();
+
+  DSAStack->setParentTeamsRegionLoc(StartLoc);
+
   return OMPTeamsDistributeDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
@@ -6876,13 +7293,25 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_teams_distribute_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
       OMPD_teams_distribute_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
 
   if (NestedLoopCount == 0)
@@ -6906,6 +7335,9 @@
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
+
+  DSAStack->setParentTeamsRegionLoc(StartLoc);
+
   return OMPTeamsDistributeSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
@@ -6925,12 +7357,24 @@
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
 
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_teams_distribute_parallel_for_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   auto NestedLoopCount = CheckOpenMPLoop(
       OMPD_teams_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
 
   if (NestedLoopCount == 0)
@@ -6954,6 +7398,9 @@
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
+
+  DSAStack->setParentTeamsRegionLoc(StartLoc);
+
   return OMPTeamsDistributeParallelForSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
@@ -6973,12 +7420,24 @@
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
 
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_teams_distribute_parallel_for);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
       OMPD_teams_distribute_parallel_for, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
 
   if (NestedLoopCount == 0)
@@ -6987,20 +7446,13 @@
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for loop exprs were not built");
 
-  if (!CurContext->isDependentContext()) {
-    // Finalize the clauses that need pre-built expressions for CodeGen.
-    for (auto C : Clauses) {
-      if (auto *LC = dyn_cast<OMPLinearClause>(C))
-        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope,
-                                     DSAStack))
-          return StmtError();
-    }
-  }
-
   getCurFunction()->setHasBranchProtectedScope();
+
+  DSAStack->setParentTeamsRegionLoc(StartLoc);
+
   return OMPTeamsDistributeParallelForDirective::Create(
-      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef<OMPClause *> Clauses,
@@ -7018,6 +7470,16 @@
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
 
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(OMPD_target_teams);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
   getCurFunction()->setHasBranchProtectedScope();
 
   return OMPTargetTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses,
@@ -7038,14 +7500,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_target_teams_distribute);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute,
-      getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      OMPD_target_teams_distribute, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -7072,14 +7544,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_target_teams_distribute_parallel_for);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute_parallel_for,
-      getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      OMPD_target_teams_distribute_parallel_for, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -7100,7 +7582,8 @@
 
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTargetTeamsDistributeParallelForDirective::Create(
-      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
@@ -7117,15 +7600,26 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel = getOpenMPCaptureLevels(
+           OMPD_target_teams_distribute_parallel_for_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute_parallel_for_simd,
-      getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  auto NestedLoopCount =
+      CheckOpenMPLoop(OMPD_target_teams_distribute_parallel_for_simd,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, CS, *this,
+                      *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -7144,6 +7638,9 @@
     }
   }
 
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTargetTeamsDistributeParallelForSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
@@ -7163,13 +7660,24 @@
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_target_teams_distribute_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
 
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   auto NestedLoopCount = CheckOpenMPLoop(
       OMPD_target_teams_distribute_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
@@ -7177,6 +7685,20 @@
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp target teams distribute simd loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto *LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPTargetTeamsDistributeSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
@@ -7279,16 +7801,33 @@
     OpenMPDirectiveKind DKind, OpenMPClauseKind CKind,
     OpenMPDirectiveKind NameModifier = OMPD_unknown) {
   OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
-
   switch (CKind) {
   case OMPC_if:
     switch (DKind) {
     case OMPD_target_parallel:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
       // If this clause applies to the nested 'parallel' region, capture within
       // the 'target' region, otherwise do not capture.
       if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel)
         CaptureRegion = OMPD_target;
       break;
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+      // If this clause applies to the nested 'parallel' region, capture within
+      // the 'teams' region, otherwise do not capture.
+      if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel)
+        CaptureRegion = OMPD_teams;
+      break;
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+      CaptureRegion = OMPD_teams;
+      break;
+    case OMPD_target_update:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+      CaptureRegion = OMPD_task;
+      break;
     case OMPD_cancel:
     case OMPD_parallel:
     case OMPD_parallel_sections:
@@ -7296,24 +7835,15 @@
     case OMPD_parallel_for_simd:
     case OMPD_target:
     case OMPD_target_simd:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
     case OMPD_target_teams:
     case OMPD_target_teams_distribute:
     case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
     case OMPD_distribute_parallel_for:
     case OMPD_distribute_parallel_for_simd:
     case OMPD_task:
     case OMPD_taskloop:
     case OMPD_taskloop_simd:
     case OMPD_target_data:
-    case OMPD_target_enter_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_update:
       // Do not capture if-clause expressions.
       break;
     case OMPD_threadprivate:
@@ -7350,35 +7880,37 @@
   case OMPC_num_threads:
     switch (DKind) {
     case OMPD_target_parallel:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
       CaptureRegion = OMPD_target;
       break;
-    case OMPD_cancel:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+      CaptureRegion = OMPD_teams;
+      break;
     case OMPD_parallel:
     case OMPD_parallel_sections:
     case OMPD_parallel_for:
     case OMPD_parallel_for_simd:
-    case OMPD_target:
-    case OMPD_target_simd:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams:
-    case OMPD_target_teams_distribute:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
     case OMPD_distribute_parallel_for:
     case OMPD_distribute_parallel_for_simd:
-    case OMPD_task:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
+      // Do not capture num_threads-clause expressions.
+      break;
     case OMPD_target_data:
     case OMPD_target_enter_data:
     case OMPD_target_exit_data:
     case OMPD_target_update:
-      // Do not capture num_threads-clause expressions.
-      break;
+    case OMPD_target:
+    case OMPD_target_simd:
+    case OMPD_target_teams:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_cancel:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
     case OMPD_threadprivate:
     case OMPD_taskyield:
     case OMPD_barrier:
@@ -7413,8 +7945,28 @@
   case OMPC_num_teams:
     switch (DKind) {
     case OMPD_target_teams:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
       CaptureRegion = OMPD_target;
       break;
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_teams:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+      // Do not capture num_teams-clause expressions.
+      break;
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_update:
     case OMPD_cancel:
     case OMPD_parallel:
     case OMPD_parallel_sections:
@@ -7425,26 +7977,6 @@
     case OMPD_target_parallel:
     case OMPD_target_parallel_for:
     case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_task:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_target_data:
-    case OMPD_target_enter_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_update:
-    case OMPD_teams:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-      // Do not capture num_teams-clause expressions.
-      break;
     case OMPD_threadprivate:
     case OMPD_taskyield:
     case OMPD_barrier:
@@ -7476,8 +8008,28 @@
   case OMPC_thread_limit:
     switch (DKind) {
     case OMPD_target_teams:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
       CaptureRegion = OMPD_target;
       break;
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_teams:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+      // Do not capture thread_limit-clause expressions.
+      break;
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_update:
     case OMPD_cancel:
     case OMPD_parallel:
     case OMPD_parallel_sections:
@@ -7488,26 +8040,6 @@
     case OMPD_target_parallel:
     case OMPD_target_parallel_for:
     case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_task:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_target_data:
-    case OMPD_target_enter_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_update:
-    case OMPD_teams:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-      // Do not capture thread_limit-clause expressions.
-      break;
     case OMPD_threadprivate:
     case OMPD_taskyield:
     case OMPD_barrier:
@@ -7537,7 +8069,194 @@
     }
     break;
   case OMPC_schedule:
+    switch (DKind) {
+    case OMPD_parallel_for:
+    case OMPD_parallel_for_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+      CaptureRegion = OMPD_parallel;
+      break;
+    case OMPD_for:
+    case OMPD_for_simd:
+      // Do not capture schedule-clause expressions.
+      break;
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_update:
+    case OMPD_teams:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target:
+    case OMPD_target_simd:
+    case OMPD_target_parallel:
+    case OMPD_cancel:
+    case OMPD_parallel:
+    case OMPD_parallel_sections:
+    case OMPD_threadprivate:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_cancellation_point:
+    case OMPD_flush:
+    case OMPD_declare_reduction:
+    case OMPD_declare_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskgroup:
+    case OMPD_distribute:
+    case OMPD_ordered:
+    case OMPD_atomic:
+    case OMPD_distribute_simd:
+    case OMPD_target_teams:
+      llvm_unreachable("Unexpected OpenMP directive with schedule clause");
+    case OMPD_unknown:
+      llvm_unreachable("Unknown OpenMP directive");
+    }
+    break;
   case OMPC_dist_schedule:
+    switch (DKind) {
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+      CaptureRegion = OMPD_teams;
+      break;
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+      // Do not capture thread_limit-clause expressions.
+      break;
+    case OMPD_parallel_for:
+    case OMPD_parallel_for_simd:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_update:
+    case OMPD_teams:
+    case OMPD_target:
+    case OMPD_target_simd:
+    case OMPD_target_parallel:
+    case OMPD_cancel:
+    case OMPD_parallel:
+    case OMPD_parallel_sections:
+    case OMPD_threadprivate:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_cancellation_point:
+    case OMPD_flush:
+    case OMPD_declare_reduction:
+    case OMPD_declare_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_simd:
+    case OMPD_for:
+    case OMPD_for_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskgroup:
+    case OMPD_ordered:
+    case OMPD_atomic:
+    case OMPD_target_teams:
+      llvm_unreachable("Unexpected OpenMP directive with schedule clause");
+    case OMPD_unknown:
+      llvm_unreachable("Unknown OpenMP directive");
+    }
+    break;
+  case OMPC_device:
+    switch (DKind) {
+    case OMPD_target_update:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target:
+    case OMPD_target_simd:
+    case OMPD_target_teams:
+    case OMPD_target_parallel:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+      CaptureRegion = OMPD_task;
+      break;
+    case OMPD_target_data:
+      // Do not capture device-clause expressions.
+      break;
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_teams:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_cancel:
+    case OMPD_parallel:
+    case OMPD_parallel_sections:
+    case OMPD_parallel_for:
+    case OMPD_parallel_for_simd:
+    case OMPD_threadprivate:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_cancellation_point:
+    case OMPD_flush:
+    case OMPD_declare_reduction:
+    case OMPD_declare_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_simd:
+    case OMPD_for:
+    case OMPD_for_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskgroup:
+    case OMPD_distribute:
+    case OMPD_ordered:
+    case OMPD_atomic:
+    case OMPD_distribute_simd:
+      llvm_unreachable("Unexpected OpenMP directive with num_teams-clause");
+    case OMPD_unknown:
+      llvm_unreachable("Unknown OpenMP directive");
+    }
+    break;
   case OMPC_firstprivate:
   case OMPC_lastprivate:
   case OMPC_reduction:
@@ -7567,7 +8286,6 @@
   case OMPC_capture:
   case OMPC_seq_cst:
   case OMPC_depend:
-  case OMPC_device:
   case OMPC_threads:
   case OMPC_simd:
   case OMPC_map:
@@ -7604,12 +8322,13 @@
     if (Val.isInvalid())
       return nullptr;
 
-    ValExpr = MakeFullExpr(Val.get()).get();
+    ValExpr = Val.get();
 
     OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
     CaptureRegion =
         getOpenMPCaptureRegionForClause(DKind, OMPC_if, NameModifier);
-    if (CaptureRegion != OMPD_unknown) {
+    if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+      ValExpr = MakeFullExpr(ValExpr).get();
       llvm::MapVector<Expr *, DeclRefExpr *> Captures;
       ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
       HelperValStmt = buildPreInits(Context, Captures);
@@ -7715,7 +8434,6 @@
                                              SourceLocation EndLoc) {
   Expr *ValExpr = NumThreads;
   Stmt *HelperValStmt = nullptr;
-  OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
   // OpenMP [2.5, Restrictions]
   //  The num_threads expression must evaluate to a positive integer value.
@@ -7724,8 +8442,10 @@
     return nullptr;
 
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
-  CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads);
-  if (CaptureRegion != OMPD_unknown) {
+  OpenMPDirectiveKind CaptureRegion =
+      getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads);
+  if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -8149,8 +8869,11 @@
               << "schedule" << 1 << ChunkSize->getSourceRange();
           return nullptr;
         }
-      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
+      } else if (getOpenMPCaptureRegionForClause(
+                     DSAStack->getCurrentDirective(), OMPC_schedule) !=
+                     OMPD_unknown &&
                  !CurContext->isDependentContext()) {
+        ValExpr = MakeFullExpr(ValExpr).get();
         llvm::MapVector<Expr *, DeclRefExpr *> Captures;
         ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
         HelperValStmt = buildPreInits(Context, Captures);
@@ -8561,7 +9284,7 @@
     // A list item cannot appear in both a map clause and a data-sharing
     // attribute clause on the same construct
     if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel ||
-        CurrDir == OMPD_target_teams || 
+        CurrDir == OMPD_target_teams ||
         CurrDir == OMPD_target_teams_distribute ||
         CurrDir == OMPD_target_teams_distribute_parallel_for ||
         CurrDir == OMPD_target_teams_distribute_parallel_for_simd ||
@@ -8704,7 +9427,8 @@
       // A list item may appear in a firstprivate or lastprivate clause but not
       // both.
       if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_firstprivate &&
-          (CurrDir == OMPD_distribute || DVar.CKind != OMPC_lastprivate) &&
+          (isOpenMPDistributeDirective(CurrDir) ||
+           DVar.CKind != OMPC_lastprivate) &&
           DVar.RefExpr) {
         Diag(ELoc, diag::err_omp_wrong_dsa)
             << getOpenMPClauseName(DVar.CKind)
@@ -8798,14 +9522,7 @@
       // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
       // A list item cannot appear in both a map clause and a data-sharing
       // attribute clause on the same construct
-      if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel ||
-          CurrDir == OMPD_target_teams || 
-          CurrDir == OMPD_target_teams_distribute ||
-          CurrDir == OMPD_target_teams_distribute_parallel_for ||
-          CurrDir == OMPD_target_teams_distribute_parallel_for_simd ||
-          CurrDir == OMPD_target_teams_distribute_simd ||
-          CurrDir == OMPD_target_parallel_for_simd ||
-          CurrDir == OMPD_target_parallel_for) {
+      if (isOpenMPTargetExecutionDirective(CurrDir)) {
         OpenMPClauseKind ConflictKind;
         if (DSAStack->checkMappableExprComponentListsForDecl(
                 VD, /*CurrentRegionOnly=*/true,
@@ -8965,7 +9682,8 @@
     // both.
     DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
     if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_lastprivate &&
-        (CurrDir == OMPD_distribute || DVar.CKind != OMPC_firstprivate) &&
+        (isOpenMPDistributeDirective(CurrDir) ||
+         DVar.CKind != OMPC_firstprivate) &&
         (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
       Diag(ELoc, diag::err_omp_wrong_dsa)
           << getOpenMPClauseName(DVar.CKind)
@@ -9330,6 +10048,68 @@
 };
 } // namespace
 
+static bool CheckOMPArraySectionConstantForReduction(
+    ASTContext &Context, const OMPArraySectionExpr *OASE, bool &SingleElement,
+    SmallVectorImpl<llvm::APSInt> &ArraySizes) {
+  const Expr *Length = OASE->getLength();
+  if (Length == nullptr) {
+    // For array sections of the form [1:] or [:], we would need to analyze
+    // the lower bound...
+    if (OASE->getColonLoc().isValid())
+      return false;
+
+    // This is an array subscript which has implicit length 1!
+    SingleElement = true;
+    ArraySizes.push_back(llvm::APSInt::get(1));
+  } else {
+    llvm::APSInt ConstantLengthValue;
+    if (!Length->EvaluateAsInt(ConstantLengthValue, Context))
+      return false;
+
+    SingleElement = (ConstantLengthValue.getSExtValue() == 1);
+    ArraySizes.push_back(ConstantLengthValue);
+  }
+
+  // Get the base of this array section and walk up from there.
+  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+
+  // We require length = 1 for all array sections except the right-most to
+  // guarantee that the memory region is contiguous and has no holes in it.
+  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) {
+    Length = TempOASE->getLength();
+    if (Length == nullptr) {
+      // For array sections of the form [1:] or [:], we would need to analyze
+      // the lower bound...
+      if (OASE->getColonLoc().isValid())
+        return false;
+
+      // This is an array subscript which has implicit length 1!
+      ArraySizes.push_back(llvm::APSInt::get(1));
+    } else {
+      llvm::APSInt ConstantLengthValue;
+      if (!Length->EvaluateAsInt(ConstantLengthValue, Context) ||
+          ConstantLengthValue.getSExtValue() != 1)
+        return false;
+
+      ArraySizes.push_back(ConstantLengthValue);
+    }
+    Base = TempOASE->getBase()->IgnoreParenImpCasts();
+  }
+
+  // If we have a single element, we don't need to add the implicit lengths.
+  if (!SingleElement) {
+    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) {
+      // Has implicit length 1!
+      ArraySizes.push_back(llvm::APSInt::get(1));
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    }
+  }
+
+  // This array section can be privatized as a single value or as a constant
+  // sized array.
+  return true;
+}
+
 static bool ActOnOMPReductionKindClause(
     Sema &S, DSAStackTy *Stack, OpenMPClauseKind ClauseKind,
     ArrayRef<Expr *> VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -9398,6 +10178,7 @@
   case OO_GreaterGreaterEqual:
   case OO_EqualEqual:
   case OO_ExclaimEqual:
+  case OO_Spaceship:
   case OO_PlusPlus:
   case OO_MinusMinus:
   case OO_Comma:
@@ -9628,9 +10409,34 @@
     auto *RHSVD = buildVarDecl(S, ELoc, Type, D->getName(),
                                D->hasAttrs() ? &D->getAttrs() : nullptr);
     auto PrivateTy = Type;
-    if (OASE ||
-        (!ASE &&
+
+    // Try if we can determine constant lengths for all array sections and avoid
+    // the VLA.
+    bool ConstantLengthOASE = false;
+    if (OASE) {
+      bool SingleElement;
+      llvm::SmallVector<llvm::APSInt, 4> ArraySizes;
+      ConstantLengthOASE = CheckOMPArraySectionConstantForReduction(
+          Context, OASE, SingleElement, ArraySizes);
+
+      // If we don't have a single element, we must emit a constant array type.
+      if (ConstantLengthOASE && !SingleElement) {
+        for (auto &Size : ArraySizes) {
+          PrivateTy = Context.getConstantArrayType(
+              PrivateTy, Size, ArrayType::Normal, /*IndexTypeQuals=*/0);
+        }
+      }
+    }
+
+    if ((OASE && !ConstantLengthOASE) ||
+        (!OASE && !ASE &&
          D->getType().getNonReferenceType()->isVariablyModifiedType())) {
+      if (!Context.getTargetInfo().isVLASupported() &&
+          S.shouldDiagnoseTargetSupportFromOpenMP()) {
+        S.Diag(ELoc, diag::err_omp_reduction_vla_unsupported) << !!OASE;
+        S.Diag(ELoc, diag::note_vla_unsupported);
+        continue;
+      }
       // For arrays/array sections only:
       // Create pseudo array type for private copy. The size for this array will
       // be generated during codegen.
@@ -9638,8 +10444,7 @@
       // (type of the variable or single array element).
       PrivateTy = Context.getVariableArrayType(
           Type,
-          new (Context) OpaqueValueExpr(SourceLocation(), Context.getSizeType(),
-                                        VK_RValue),
+          new (Context) OpaqueValueExpr(ELoc, Context.getSizeType(), VK_RValue),
           ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange());
     } else if (!ASE && !OASE &&
                Context.getAsArrayType(D->getType().getNonReferenceType()))
@@ -9721,8 +10526,7 @@
           if (Type->isPointerType()) {
             // Cast to pointer type.
             auto CastExpr = S.BuildCStyleCastExpr(
-                SourceLocation(), Context.getTrivialTypeSourceInfo(Type, ELoc),
-                SourceLocation(), Init);
+                ELoc, Context.getTrivialTypeSourceInfo(Type, ELoc), ELoc, Init);
             if (CastExpr.isInvalid())
               continue;
             Init = CastExpr.get();
@@ -9747,6 +10551,7 @@
       case BO_GE:
       case BO_EQ:
       case BO_NE:
+      case BO_Cmp:
       case BO_AndAssign:
       case BO_XorAssign:
       case BO_OrAssign:
@@ -9816,9 +10621,9 @@
               S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(),
                            BO_Assign, LHSDRE, ReductionOp.get());
         } else {
-          auto *ConditionalOp = new (Context) ConditionalOperator(
-              ReductionOp.get(), SourceLocation(), LHSDRE, SourceLocation(),
-              RHSDRE, Type, VK_LValue, OK_Ordinary);
+          auto *ConditionalOp = new (Context)
+              ConditionalOperator(ReductionOp.get(), ELoc, LHSDRE, ELoc, RHSDRE,
+                                  Type, VK_LValue, OK_Ordinary);
           ReductionOp =
               S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(),
                            BO_Assign, LHSDRE, ConditionalOp);
@@ -10220,6 +11025,18 @@
       continue;
     }
     auto &&Info = Stack->isLoopControlVariable(D);
+    // OpenMP [2.15.11, distribute simd Construct]
+    // A list item may not appear in a linear clause, unless it is the loop
+    // iteration variable.
+    if (isOpenMPDistributeDirective(Stack->getCurrentDirective()) &&
+        isOpenMPSimdDirective(Stack->getCurrentDirective()) && !Info.first) {
+      SemaRef.Diag(ELoc,
+                   diag::err_omp_linear_distribute_var_non_loop_iteration);
+      Updates.push_back(nullptr);
+      Finals.push_back(nullptr);
+      HasErrors = true;
+      continue;
+    }
     Expr *InitExpr = *CurInit;
 
     // Build privatized reference to the current linear var.
@@ -10594,138 +11411,135 @@
       TotalDepCount.setIsUnsigned(/*Val=*/true);
     }
   }
-  if ((DepKind != OMPC_DEPEND_sink && DepKind != OMPC_DEPEND_source) ||
-      DSAStack->getParentOrderedRegionParam()) {
-    for (auto &RefExpr : VarList) {
-      assert(RefExpr && "NULL expr in OpenMP shared clause.");
-      if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+  for (auto &RefExpr : VarList) {
+    assert(RefExpr && "NULL expr in OpenMP shared clause.");
+    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+      // It will be analyzed later.
+      Vars.push_back(RefExpr);
+      continue;
+    }
+
+    SourceLocation ELoc = RefExpr->getExprLoc();
+    auto *SimpleExpr = RefExpr->IgnoreParenCasts();
+    if (DepKind == OMPC_DEPEND_sink) {
+      if (DSAStack->getParentOrderedRegionParam() &&
+          DepCounter >= TotalDepCount) {
+        Diag(ELoc, diag::err_omp_depend_sink_unexpected_expr);
+        continue;
+      }
+      ++DepCounter;
+      // OpenMP  [2.13.9, Summary]
+      // depend(dependence-type : vec), where dependence-type is:
+      // 'sink' and where vec is the iteration vector, which has the form:
+      //  x1 [+- d1], x2 [+- d2 ], . . . , xn [+- dn]
+      // where n is the value specified by the ordered clause in the loop
+      // directive, xi denotes the loop iteration variable of the i-th nested
+      // loop associated with the loop directive, and di is a constant
+      // non-negative integer.
+      if (CurContext->isDependentContext()) {
         // It will be analyzed later.
         Vars.push_back(RefExpr);
         continue;
       }
-
-      SourceLocation ELoc = RefExpr->getExprLoc();
-      auto *SimpleExpr = RefExpr->IgnoreParenCasts();
-      if (DepKind == OMPC_DEPEND_sink) {
-        if (DepCounter >= TotalDepCount) {
-          Diag(ELoc, diag::err_omp_depend_sink_unexpected_expr);
-          continue;
-        }
-        ++DepCounter;
-        // OpenMP  [2.13.9, Summary]
-        // depend(dependence-type : vec), where dependence-type is:
-        // 'sink' and where vec is the iteration vector, which has the form:
-        //  x1 [+- d1], x2 [+- d2 ], . . . , xn [+- dn]
-        // where n is the value specified by the ordered clause in the loop
-        // directive, xi denotes the loop iteration variable of the i-th nested
-        // loop associated with the loop directive, and di is a constant
-        // non-negative integer.
-        if (CurContext->isDependentContext()) {
-          // It will be analyzed later.
-          Vars.push_back(RefExpr);
-          continue;
-        }
-        SimpleExpr = SimpleExpr->IgnoreImplicit();
-        OverloadedOperatorKind OOK = OO_None;
-        SourceLocation OOLoc;
-        Expr *LHS = SimpleExpr;
-        Expr *RHS = nullptr;
-        if (auto *BO = dyn_cast<BinaryOperator>(SimpleExpr)) {
-          OOK = BinaryOperator::getOverloadedOperator(BO->getOpcode());
-          OOLoc = BO->getOperatorLoc();
-          LHS = BO->getLHS()->IgnoreParenImpCasts();
-          RHS = BO->getRHS()->IgnoreParenImpCasts();
-        } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(SimpleExpr)) {
-          OOK = OCE->getOperator();
-          OOLoc = OCE->getOperatorLoc();
-          LHS = OCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
-          RHS = OCE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
-        } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(SimpleExpr)) {
-          OOK = MCE->getMethodDecl()
-                    ->getNameInfo()
-                    .getName()
-                    .getCXXOverloadedOperator();
-          OOLoc = MCE->getCallee()->getExprLoc();
-          LHS = MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
-          RHS = MCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
-        }
-        SourceLocation ELoc;
-        SourceRange ERange;
-        auto Res = getPrivateItem(*this, LHS, ELoc, ERange,
-                                  /*AllowArraySection=*/false);
-        if (Res.second) {
-          // It will be analyzed later.
-          Vars.push_back(RefExpr);
-        }
-        ValueDecl *D = Res.first;
-        if (!D)
-          continue;
-
-        if (OOK != OO_Plus && OOK != OO_Minus && (RHS || OOK != OO_None)) {
-          Diag(OOLoc, diag::err_omp_depend_sink_expected_plus_minus);
-          continue;
-        }
-        if (RHS) {
-          ExprResult RHSRes = VerifyPositiveIntegerConstantInClause(
-              RHS, OMPC_depend, /*StrictlyPositive=*/false);
-          if (RHSRes.isInvalid())
-            continue;
-        }
-        if (!CurContext->isDependentContext() &&
-            DSAStack->getParentOrderedRegionParam() &&
-            DepCounter != DSAStack->isParentLoopControlVariable(D).first) {
-          ValueDecl* VD = DSAStack->getParentLoopControlVariable(
-              DepCounter.getZExtValue());
-          if (VD) {
-            Diag(ELoc, diag::err_omp_depend_sink_expected_loop_iteration)
-                << 1 << VD;
-          } else {
-             Diag(ELoc, diag::err_omp_depend_sink_expected_loop_iteration) << 0;
-          }
-          continue;
-        }
-        OpsOffs.push_back({RHS, OOK});
-      } else {
-        auto *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
-        if (!RefExpr->IgnoreParenImpCasts()->isLValue() ||
-            (ASE &&
-             !ASE->getBase()
-                  ->getType()
-                  .getNonReferenceType()
-                  ->isPointerType() &&
-             !ASE->getBase()->getType().getNonReferenceType()->isArrayType())) {
-          Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
-              << RefExpr->getSourceRange();
-          continue;
-        }
-        bool Suppress = getDiagnostics().getSuppressAllDiagnostics();
-        getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
-        ExprResult Res = CreateBuiltinUnaryOp(SourceLocation(), UO_AddrOf,
-                                              RefExpr->IgnoreParenImpCasts());
-        getDiagnostics().setSuppressAllDiagnostics(Suppress);
-        if (!Res.isUsable() && !isa<OMPArraySectionExpr>(SimpleExpr)) {
-          Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
-              << RefExpr->getSourceRange();
-          continue;
-        }
+      SimpleExpr = SimpleExpr->IgnoreImplicit();
+      OverloadedOperatorKind OOK = OO_None;
+      SourceLocation OOLoc;
+      Expr *LHS = SimpleExpr;
+      Expr *RHS = nullptr;
+      if (auto *BO = dyn_cast<BinaryOperator>(SimpleExpr)) {
+        OOK = BinaryOperator::getOverloadedOperator(BO->getOpcode());
+        OOLoc = BO->getOperatorLoc();
+        LHS = BO->getLHS()->IgnoreParenImpCasts();
+        RHS = BO->getRHS()->IgnoreParenImpCasts();
+      } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(SimpleExpr)) {
+        OOK = OCE->getOperator();
+        OOLoc = OCE->getOperatorLoc();
+        LHS = OCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+        RHS = OCE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+      } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(SimpleExpr)) {
+        OOK = MCE->getMethodDecl()
+                  ->getNameInfo()
+                  .getName()
+                  .getCXXOverloadedOperator();
+        OOLoc = MCE->getCallee()->getExprLoc();
+        LHS = MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
+        RHS = MCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
       }
-      Vars.push_back(RefExpr->IgnoreParenImpCasts());
-    }
+      SourceLocation ELoc;
+      SourceRange ERange;
+      auto Res = getPrivateItem(*this, LHS, ELoc, ERange,
+                                /*AllowArraySection=*/false);
+      if (Res.second) {
+        // It will be analyzed later.
+        Vars.push_back(RefExpr);
+      }
+      ValueDecl *D = Res.first;
+      if (!D)
+        continue;
 
-    if (!CurContext->isDependentContext() && DepKind == OMPC_DEPEND_sink &&
-        TotalDepCount > VarList.size() &&
-        DSAStack->getParentOrderedRegionParam() &&
-        DSAStack->getParentLoopControlVariable(VarList.size() + 1)) {
-      Diag(EndLoc, diag::err_omp_depend_sink_expected_loop_iteration) << 1
-          << DSAStack->getParentLoopControlVariable(VarList.size() + 1);
+      if (OOK != OO_Plus && OOK != OO_Minus && (RHS || OOK != OO_None)) {
+        Diag(OOLoc, diag::err_omp_depend_sink_expected_plus_minus);
+        continue;
+      }
+      if (RHS) {
+        ExprResult RHSRes = VerifyPositiveIntegerConstantInClause(
+            RHS, OMPC_depend, /*StrictlyPositive=*/false);
+        if (RHSRes.isInvalid())
+          continue;
+      }
+      if (!CurContext->isDependentContext() &&
+          DSAStack->getParentOrderedRegionParam() &&
+          DepCounter != DSAStack->isParentLoopControlVariable(D).first) {
+        ValueDecl *VD =
+            DSAStack->getParentLoopControlVariable(DepCounter.getZExtValue());
+        if (VD) {
+          Diag(ELoc, diag::err_omp_depend_sink_expected_loop_iteration)
+              << 1 << VD;
+        } else {
+          Diag(ELoc, diag::err_omp_depend_sink_expected_loop_iteration) << 0;
+        }
+        continue;
+      }
+      OpsOffs.push_back({RHS, OOK});
+    } else {
+      auto *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
+      if (!RefExpr->IgnoreParenImpCasts()->isLValue() ||
+          (ASE &&
+           !ASE->getBase()->getType().getNonReferenceType()->isPointerType() &&
+           !ASE->getBase()->getType().getNonReferenceType()->isArrayType())) {
+        Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
+            << RefExpr->getSourceRange();
+        continue;
+      }
+      bool Suppress = getDiagnostics().getSuppressAllDiagnostics();
+      getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
+      ExprResult Res =
+          CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RefExpr->IgnoreParenImpCasts());
+      getDiagnostics().setSuppressAllDiagnostics(Suppress);
+      if (!Res.isUsable() && !isa<OMPArraySectionExpr>(SimpleExpr)) {
+        Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
+            << RefExpr->getSourceRange();
+        continue;
+      }
     }
-    if (DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink &&
-        Vars.empty())
-      return nullptr;
+    Vars.push_back(RefExpr->IgnoreParenImpCasts());
   }
+
+  if (!CurContext->isDependentContext() && DepKind == OMPC_DEPEND_sink &&
+      TotalDepCount > VarList.size() &&
+      DSAStack->getParentOrderedRegionParam() &&
+      DSAStack->getParentLoopControlVariable(VarList.size() + 1)) {
+    Diag(EndLoc, diag::err_omp_depend_sink_expected_loop_iteration)
+        << 1 << DSAStack->getParentLoopControlVariable(VarList.size() + 1);
+  }
+  if (DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink &&
+      Vars.empty())
+    return nullptr;
+
   auto *C = OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc,
                                     DepKind, DepLoc, ColonLoc, Vars);
-  if (DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source)
+  if ((DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) &&
+      DSAStack->isParentOrderedRegion())
     DSAStack->addDoacrossDependClause(C, OpsOffs);
   return C;
 }
@@ -10743,24 +11557,30 @@
     return nullptr;
 
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
-  if (isOpenMPTargetExecutionDirective(DKind) &&
-      !CurContext->isDependentContext()) {
+  OpenMPDirectiveKind CaptureRegion =
+      getOpenMPCaptureRegionForClause(DKind, OMPC_device);
+  if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
   }
 
-  return new (Context)
-      OMPDeviceClause(ValExpr, HelperValStmt, StartLoc, LParenLoc, EndLoc);
+  return new (Context) OMPDeviceClause(ValExpr, HelperValStmt, CaptureRegion,
+                                       StartLoc, LParenLoc, EndLoc);
 }
 
 static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
-                              DSAStackTy *Stack, QualType QTy) {
+                              DSAStackTy *Stack, QualType QTy,
+                              bool FullCheck = true) {
   NamedDecl *ND;
   if (QTy->isIncompleteType(&ND)) {
     SemaRef.Diag(SL, diag::err_incomplete_type) << QTy << SR;
     return false;
   }
+  if (FullCheck && !SemaRef.CurContext->isDependentContext() &&
+      !QTy.isTrivialType(SemaRef.Context))
+    SemaRef.Diag(SL, diag::warn_omp_non_trivial_type_mapped) << QTy << SR;
   return true;
 }
 
@@ -10859,7 +11679,7 @@
 static Expr *CheckMapClauseExpressionBase(
     Sema &SemaRef, Expr *E,
     OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents,
-    OpenMPClauseKind CKind) {
+    OpenMPClauseKind CKind, bool NoDiagnose) {
   SourceLocation ELoc = E->getExprLoc();
   SourceRange ERange = E->getSourceRange();
 
@@ -10909,7 +11729,7 @@
 
     if (auto *CurE = dyn_cast<DeclRefExpr>(E)) {
       if (!isa<VarDecl>(CurE->getDecl()))
-        break;
+        return nullptr;
 
       RelevantExpr = CurE;
 
@@ -10919,12 +11739,8 @@
       AllowWholeSizeArraySection = false;
 
       // Record the component.
-      CurComponents.push_back(OMPClauseMappableExprCommon::MappableComponent(
-          CurE, CurE->getDecl()));
-      continue;
-    }
-
-    if (auto *CurE = dyn_cast<MemberExpr>(E)) {
+      CurComponents.emplace_back(CurE, CurE->getDecl());
+    } else if (auto *CurE = dyn_cast<MemberExpr>(E)) {
       auto *BaseE = CurE->getBase()->IgnoreParenImpCasts();
 
       if (isa<CXXThisExpr>(BaseE))
@@ -10934,9 +11750,14 @@
         E = BaseE;
 
       if (!isa<FieldDecl>(CurE->getMemberDecl())) {
-        SemaRef.Diag(ELoc, diag::err_omp_expected_access_to_data_field)
-            << CurE->getSourceRange();
-        break;
+        if (!NoDiagnose) {
+          SemaRef.Diag(ELoc, diag::err_omp_expected_access_to_data_field)
+              << CurE->getSourceRange();
+          return nullptr;
+        }
+        if (RelevantExpr)
+          return nullptr;
+        continue;
       }
 
       auto *FD = cast<FieldDecl>(CurE->getMemberDecl());
@@ -10945,9 +11766,14 @@
       //  A bit-field cannot appear in a map clause.
       //
       if (FD->isBitField()) {
-        SemaRef.Diag(ELoc, diag::err_omp_bit_fields_forbidden_in_clause)
-            << CurE->getSourceRange() << getOpenMPClauseName(CKind);
-        break;
+        if (!NoDiagnose) {
+          SemaRef.Diag(ELoc, diag::err_omp_bit_fields_forbidden_in_clause)
+              << CurE->getSourceRange() << getOpenMPClauseName(CKind);
+          return nullptr;
+        }
+        if (RelevantExpr)
+          return nullptr;
+        continue;
       }
 
       // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
@@ -10959,12 +11785,16 @@
       //  A list item cannot be a variable that is a member of a structure with
       //  a union type.
       //
-      if (auto *RT = CurType->getAs<RecordType>())
+      if (auto *RT = CurType->getAs<RecordType>()) {
         if (RT->isUnionType()) {
-          SemaRef.Diag(ELoc, diag::err_omp_union_type_not_allowed)
-              << CurE->getSourceRange();
-          break;
+          if (!NoDiagnose) {
+            SemaRef.Diag(ELoc, diag::err_omp_union_type_not_allowed)
+                << CurE->getSourceRange();
+            return nullptr;
+          }
+          continue;
         }
+      }
 
       // If we got a member expression, we should not expect any array section
       // before that:
@@ -10977,18 +11807,17 @@
       AllowWholeSizeArraySection = false;
 
       // Record the component.
-      CurComponents.push_back(
-          OMPClauseMappableExprCommon::MappableComponent(CurE, FD));
-      continue;
-    }
-
-    if (auto *CurE = dyn_cast<ArraySubscriptExpr>(E)) {
+      CurComponents.emplace_back(CurE, FD);
+    } else if (auto *CurE = dyn_cast<ArraySubscriptExpr>(E)) {
       E = CurE->getBase()->IgnoreParenImpCasts();
 
       if (!E->getType()->isAnyPointerType() && !E->getType()->isArrayType()) {
-        SemaRef.Diag(ELoc, diag::err_omp_expected_base_var_name)
-            << 0 << CurE->getSourceRange();
-        break;
+        if (!NoDiagnose) {
+          SemaRef.Diag(ELoc, diag::err_omp_expected_base_var_name)
+              << 0 << CurE->getSourceRange();
+          return nullptr;
+        }
+        continue;
       }
 
       // If we got an array subscript that express the whole dimension we
@@ -10999,15 +11828,12 @@
         AllowWholeSizeArraySection = false;
 
       // Record the component - we don't have any declaration associated.
-      CurComponents.push_back(
-          OMPClauseMappableExprCommon::MappableComponent(CurE, nullptr));
-      continue;
-    }
-
-    if (auto *CurE = dyn_cast<OMPArraySectionExpr>(E)) {
+      CurComponents.emplace_back(CurE, nullptr);
+    } else if (auto *CurE = dyn_cast<OMPArraySectionExpr>(E)) {
+      assert(!NoDiagnose && "Array sections cannot be implicitly mapped.");
       E = CurE->getBase()->IgnoreParenImpCasts();
 
-      auto CurType =
+      QualType CurType =
           OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
 
       // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
@@ -11021,7 +11847,7 @@
       if (!IsPointer && !CurType->isArrayType()) {
         SemaRef.Diag(ELoc, diag::err_omp_expected_base_var_name)
             << 0 << CurE->getSourceRange();
-        break;
+        return nullptr;
       }
 
       bool NotWhole =
@@ -11044,20 +11870,20 @@
         SemaRef.Diag(
             ELoc, diag::err_array_section_does_not_specify_contiguous_storage)
             << CurE->getSourceRange();
-        break;
+        return nullptr;
       }
 
       // Record the component - we don't have any declaration associated.
-      CurComponents.push_back(
-          OMPClauseMappableExprCommon::MappableComponent(CurE, nullptr));
-      continue;
+      CurComponents.emplace_back(CurE, nullptr);
+    } else {
+      if (!NoDiagnose) {
+        // If nothing else worked, this is not a valid map clause expression.
+        SemaRef.Diag(
+            ELoc, diag::err_omp_expected_named_var_member_or_array_expression)
+            << ERange;
+      }
+      return nullptr;
     }
-
-    // If nothing else worked, this is not a valid map clause expression.
-    SemaRef.Diag(ELoc,
-                 diag::err_omp_expected_named_var_member_or_array_expression)
-        << ERange;
-    break;
   }
 
   return RelevantExpr;
@@ -11209,14 +12035,20 @@
                 DerivedLoc,
                 diag::err_omp_pointer_mapped_along_with_derived_section)
                 << DerivedLoc;
-          } else {
+            SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
+                << RE->getSourceRange();
+            return true;
+          } else if (CI->getAssociatedExpression()->getStmtClass() !=
+                         SI->getAssociatedExpression()->getStmtClass() ||
+                     CI->getAssociatedDeclaration()->getCanonicalDecl() ==
+                         SI->getAssociatedDeclaration()->getCanonicalDecl()) {
             assert(CI != CE && SI != SE);
-            SemaRef.Diag(DerivedLoc, diag::err_omp_same_pointer_derreferenced)
+            SemaRef.Diag(DerivedLoc, diag::err_omp_same_pointer_dereferenced)
                 << DerivedLoc;
+            SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
+                << RE->getSourceRange();
+            return true;
           }
-          SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
-              << RE->getSourceRange();
-          return true;
         }
 
         // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.4]
@@ -11348,8 +12180,8 @@
 
     // Obtain the array or member expression bases if required. Also, fill the
     // components array with all the components identified in the process.
-    auto *BE =
-        CheckMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents, CKind);
+    auto *BE = CheckMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents,
+                                            CKind, /*NoDiagnose=*/false);
     if (!BE)
       continue;
 
@@ -11751,7 +12583,6 @@
                                            SourceLocation EndLoc) {
   Expr *ValExpr = NumTeams;
   Stmt *HelperValStmt = nullptr;
-  OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
   // OpenMP [teams Constrcut, Restrictions]
   // The num_teams expression must evaluate to a positive integer value.
@@ -11760,8 +12591,10 @@
     return nullptr;
 
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
-  CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams);
-  if (CaptureRegion != OMPD_unknown) {
+  OpenMPDirectiveKind CaptureRegion =
+      getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams);
+  if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -11777,7 +12610,6 @@
                                               SourceLocation EndLoc) {
   Expr *ValExpr = ThreadLimit;
   Stmt *HelperValStmt = nullptr;
-  OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
   // OpenMP [teams Constrcut, Restrictions]
   // The thread_limit expression must evaluate to a positive integer value.
@@ -11786,8 +12618,10 @@
     return nullptr;
 
   OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
-  CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_thread_limit);
-  if (CaptureRegion != OMPD_unknown) {
+  OpenMPDirectiveKind CaptureRegion =
+      getOpenMPCaptureRegionForClause(DKind, OMPC_thread_limit);
+  if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -11894,8 +12728,11 @@
               << "dist_schedule" << ChunkSize->getSourceRange();
           return nullptr;
         }
-      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
+      } else if (getOpenMPCaptureRegionForClause(
+                     DSAStack->getCurrentDirective(), OMPC_dist_schedule) !=
+                     OMPD_unknown &&
                  !CurContext->isDependentContext()) {
+        ValExpr = MakeFullExpr(ValExpr).get();
         llvm::MapVector<Expr *, DeclRefExpr *> Captures;
         ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
         HelperValStmt = buildPreInits(Context, Captures);
@@ -12002,7 +12839,7 @@
       ND->addAttr(A);
       if (ASTMutationListener *ML = Context.getASTMutationListener())
         ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
-      checkDeclIsAllowedInOpenMPTarget(nullptr, ND);
+      checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Id.getLoc());
     } else if (ND->getAttr<OMPDeclareTargetDeclAttr>()->getMapType() != MT) {
       Diag(Id.getLoc(), diag::err_omp_declare_target_to_and_link)
           << Id.getName();
@@ -12015,7 +12852,7 @@
                                      Sema &SemaRef, Decl *D) {
   if (!D)
     return;
-  Decl *LD = nullptr;
+  const Decl *LD = nullptr;
   if (isa<TagDecl>(D)) {
     LD = cast<TagDecl>(D)->getDefinition();
   } else if (isa<VarDecl>(D)) {
@@ -12031,22 +12868,29 @@
         ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
       return;
     }
-
-  } else if (isa<FunctionDecl>(D)) {
+  } else if (auto *F = dyn_cast<FunctionDecl>(D)) {
     const FunctionDecl *FD = nullptr;
-    if (cast<FunctionDecl>(D)->hasBody(FD))
-      LD = const_cast<FunctionDecl *>(FD);
-
-    // If the definition is associated with the current declaration in the
-    // target region (it can be e.g. a lambda) that is legal and we do not need
-    // to do anything else.
-    if (LD == D) {
-      Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
-          SemaRef.Context, OMPDeclareTargetDeclAttr::MT_To);
-      D->addAttr(A);
-      if (ASTMutationListener *ML = SemaRef.Context.getASTMutationListener())
-        ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
-      return;
+    if (cast<FunctionDecl>(D)->hasBody(FD)) {
+      LD = FD;
+      // If the definition is associated with the current declaration in the
+      // target region (it can be e.g. a lambda) that is legal and we do not
+      // need to do anything else.
+      if (LD == D) {
+        Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+            SemaRef.Context, OMPDeclareTargetDeclAttr::MT_To);
+        D->addAttr(A);
+        if (ASTMutationListener *ML = SemaRef.Context.getASTMutationListener())
+          ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+        return;
+      }
+    } else if (F->isFunctionTemplateSpecialization() &&
+               F->getTemplateSpecializationKind() ==
+                   TSK_ImplicitInstantiation) {
+      // Check if the function is implicitly instantiated from the template
+      // defined in the declare target region.
+      const FunctionTemplateDecl *FTD = F->getPrimaryTemplate();
+      if (FTD && FTD->hasAttr<OMPDeclareTargetDeclAttr>())
+        return;
     }
   }
   if (!LD)
@@ -12058,7 +12902,7 @@
       SemaRef.Diag(LD->getLocation(), diag::warn_omp_not_in_target_context);
       SemaRef.Diag(SL, diag::note_used_here) << SR;
     } else {
-      DeclContext *DC = LD->getDeclContext();
+      const DeclContext *DC = LD->getDeclContext();
       while (DC) {
         if (isa<FunctionDecl>(DC) &&
             cast<FunctionDecl>(DC)->hasAttr<OMPDeclareTargetDeclAttr>())
@@ -12086,12 +12930,14 @@
                                    ValueDecl *VD) {
   if (VD->hasAttr<OMPDeclareTargetDeclAttr>())
     return true;
-  if (!CheckTypeMappable(SL, SR, SemaRef, Stack, VD->getType()))
+  if (!CheckTypeMappable(SL, SR, SemaRef, Stack, VD->getType(),
+                         /*FullCheck=*/false))
     return false;
   return true;
 }
 
-void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D) {
+void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D,
+                                            SourceLocation IdLoc) {
   if (!D || D->isInvalidDecl())
     return;
   SourceRange SR = E ? E->getSourceRange() : D->getSourceRange();
@@ -12110,7 +12956,8 @@
     if ((E || !VD->getType()->isIncompleteType()) &&
         !checkValueDeclInTarget(SL, SR, *this, DSAStack, VD)) {
       // Mark decl as declared target to prevent further diagnostic.
-      if (isa<VarDecl>(VD) || isa<FunctionDecl>(VD)) {
+      if (isa<VarDecl>(VD) || isa<FunctionDecl>(VD) ||
+          isa<FunctionTemplateDecl>(VD)) {
         Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
             Context, OMPDeclareTargetDeclAttr::MT_To);
         VD->addAttr(A);
@@ -12120,10 +12967,31 @@
       return;
     }
   }
+  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (FD->hasAttr<OMPDeclareTargetDeclAttr>() &&
+        (FD->getAttr<OMPDeclareTargetDeclAttr>()->getMapType() ==
+         OMPDeclareTargetDeclAttr::MT_Link)) {
+      assert(IdLoc.isValid() && "Source location is expected");
+      Diag(IdLoc, diag::err_omp_function_in_link_clause);
+      Diag(FD->getLocation(), diag::note_defined_here) << FD;
+      return;
+    }
+  }
+  if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D)) {
+    if (FTD->hasAttr<OMPDeclareTargetDeclAttr>() &&
+        (FTD->getAttr<OMPDeclareTargetDeclAttr>()->getMapType() ==
+         OMPDeclareTargetDeclAttr::MT_Link)) {
+      assert(IdLoc.isValid() && "Source location is expected");
+      Diag(IdLoc, diag::err_omp_function_in_link_clause);
+      Diag(FTD->getLocation(), diag::note_defined_here) << FTD;
+      return;
+    }
+  }
   if (!E) {
     // Checking declaration inside declare target region.
     if (!D->hasAttr<OMPDeclareTargetDeclAttr>() &&
-        (isa<VarDecl>(D) || isa<FunctionDecl>(D))) {
+        (isa<VarDecl>(D) || isa<FunctionDecl>(D) ||
+         isa<FunctionTemplateDecl>(D))) {
       Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
           Context, OMPDeclareTargetDeclAttr::MT_To);
       D->addAttr(A);
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 0e53ffa..2c03f69 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -327,7 +327,8 @@
   FloatingIntegralConversion:
     if (FromType->isRealFloatingType() && ToType->isIntegralType(Ctx)) {
       return NK_Type_Narrowing;
-    } else if (FromType->isIntegralType(Ctx) && ToType->isRealFloatingType()) {
+    } else if (FromType->isIntegralOrUnscopedEnumerationType() &&
+               ToType->isRealFloatingType()) {
       llvm::APSInt IntConstantValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Converted);
       assert(Initializer && "Unknown conversion expression");
@@ -1475,10 +1476,8 @@
     const auto *ToFPT = cast<FunctionProtoType>(ToFn);
     if (FromFPT->isNothrow(Context) && !ToFPT->isNothrow(Context)) {
       FromFn = cast<FunctionType>(
-          Context.getFunctionType(FromFPT->getReturnType(),
-                                  FromFPT->getParamTypes(),
-                                  FromFPT->getExtProtoInfo().withExceptionSpec(
-                                      FunctionProtoType::ExceptionSpecInfo()))
+          Context.getFunctionTypeWithExceptionSpec(QualType(FromFPT, 0),
+                                                   EST_None)
                  .getTypePtr());
       Changed = true;
     }
@@ -1766,8 +1765,8 @@
                                    (FromType == S.Context.LongDoubleTy &&
                                     ToType == S.Context.Float128Ty));
       if (Float128AndLongDouble &&
-          (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) !=
-           &llvm::APFloat::IEEEdouble()))
+          (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) ==
+           &llvm::APFloat::PPCDoubleDouble()))
         return false;
     }
     // Floating point conversions (C++ 4.8).
@@ -5145,7 +5144,8 @@
       *this, From->getLocStart(), From->getType(), FromClassification, Method,
       Method->getParent());
   if (ICS.isBad()) {
-    if (ICS.Bad.Kind == BadConversionSequence::bad_qualifiers) {
+    switch (ICS.Bad.Kind) {
+    case BadConversionSequence::bad_qualifiers: {
       Qualifiers FromQs = FromRecordType.getQualifiers();
       Qualifiers ToQs = DestType.getQualifiers();
       unsigned CVR = FromQs.getCVRQualifiers() & ~ToQs.getCVRQualifiers();
@@ -5158,10 +5158,28 @@
           << Method->getDeclName();
         return ExprError();
       }
+      break;
+    }
+
+    case BadConversionSequence::lvalue_ref_to_rvalue:
+    case BadConversionSequence::rvalue_ref_to_lvalue: {
+      bool IsRValueQualified =
+        Method->getRefQualifier() == RefQualifierKind::RQ_RValue;
+      Diag(From->getLocStart(), diag::err_member_function_call_bad_ref)
+        << Method->getDeclName() << FromClassification.isRValue()
+        << IsRValueQualified;
+      Diag(Method->getLocation(), diag::note_previous_decl)
+        << Method->getDeclName();
+      return ExprError();
+    }
+
+    case BadConversionSequence::no_conversion:
+    case BadConversionSequence::unrelated_class:
+      break;
     }
 
     return Diag(From->getLocStart(),
-                diag::err_implicit_object_parameter_init)
+                diag::err_member_function_call_bad_type)
        << ImplicitParamRecordType << FromRecordType << From->getSourceRange();
   }
 
@@ -5790,7 +5808,7 @@
                                      HadMultipleCandidates,
                                      ExplicitConversions))
         return ExprError();
-    // fall through 'OR_Deleted' case.
+      LLVM_FALLTHROUGH;
     case OR_Deleted:
       // We'll complain below about a non-integral condition type.
       break;
@@ -5940,6 +5958,13 @@
   Candidate.IgnoreObjectArgument = false;
   Candidate.ExplicitCallArguments = Args.size();
 
+  if (Function->isMultiVersion() &&
+      !Function->getAttr<TargetAttr>()->isDefaultVersion()) {
+    Candidate.Viable = false;
+    Candidate.FailureKind = ovl_non_default_multiversion_function;
+    return;
+  }
+
   if (Constructor) {
     // C++ [class.copy]p3:
     //   A member function template is never instantiated to perform the copy
@@ -6343,24 +6368,36 @@
                                  OverloadCandidateSet& CandidateSet,
                                  TemplateArgumentListInfo *ExplicitTemplateArgs,
                                  bool SuppressUserConversions,
-                                 bool PartialOverloading) {
+                                 bool PartialOverloading,
+                                 bool FirstArgumentIsBase) {
   for (UnresolvedSetIterator F = Fns.begin(), E = Fns.end(); F != E; ++F) {
     NamedDecl *D = F.getDecl()->getUnderlyingDecl();
     if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+      ArrayRef<Expr *> FunctionArgs = Args;
       if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) {
         QualType ObjectType;
         Expr::Classification ObjectClassification;
-        if (Expr *E = Args[0]) {
-          // Use the explit base to restrict the lookup:
-          ObjectType = E->getType();
-          ObjectClassification = E->Classify(Context);
-        } // .. else there is an implit base.
+        if (Args.size() > 0) {
+          if (Expr *E = Args[0]) {
+            // Use the explit base to restrict the lookup:
+            ObjectType = E->getType();
+            ObjectClassification = E->Classify(Context);
+          } // .. else there is an implit base.
+          FunctionArgs = Args.slice(1);
+        }
         AddMethodCandidate(cast<CXXMethodDecl>(FD), F.getPair(),
                            cast<CXXMethodDecl>(FD)->getParent(), ObjectType,
-                           ObjectClassification, Args.slice(1), CandidateSet,
+                           ObjectClassification, FunctionArgs, CandidateSet,
                            SuppressUserConversions, PartialOverloading);
       } else {
-        AddOverloadCandidate(FD, F.getPair(), Args, CandidateSet,
+        // Slice the first argument (which is the base) when we access
+        // static method as non-static
+        if (Args.size() > 0 && (!Args[0] || (FirstArgumentIsBase && isa<CXXMethodDecl>(FD) &&
+                                             !isa<CXXConstructorDecl>(FD)))) {
+          assert(cast<CXXMethodDecl>(FD)->isStatic());
+          FunctionArgs = Args.slice(1);
+        }
+        AddOverloadCandidate(FD, F.getPair(), FunctionArgs, CandidateSet,
                              SuppressUserConversions, PartialOverloading);
       }
     } else {
@@ -6552,6 +6589,12 @@
     Candidate.DeductionFailure.Data = FailedAttr;
     return;
   }
+
+  if (Method->isMultiVersion() &&
+      !Method->getAttr<TargetAttr>()->isDefaultVersion()) {
+    Candidate.Viable = false;
+    Candidate.FailureKind = ovl_non_default_multiversion_function;
+  }
 }
 
 /// \brief Add a C++ member function template as a candidate to the candidate
@@ -6955,6 +6998,12 @@
     Candidate.DeductionFailure.Data = FailedAttr;
     return;
   }
+
+  if (Conversion->isMultiVersion() &&
+      !Conversion->getAttr<TargetAttr>()->isDefaultVersion()) {
+    Candidate.Viable = false;
+    Candidate.FailureKind = ovl_non_default_multiversion_function;
+  }
 }
 
 /// \brief Adds a conversion function template specialization
@@ -7603,53 +7652,62 @@
   SmallVectorImpl<BuiltinCandidateTypeSet> &CandidateTypes;
   OverloadCandidateSet &CandidateSet;
 
-  // Define some constants used to index and iterate over the arithemetic types
-  // provided via the getArithmeticType() method below.
-  // The "promoted arithmetic types" are the arithmetic
+  static constexpr int ArithmeticTypesCap = 24;
+  SmallVector<CanQualType, ArithmeticTypesCap> ArithmeticTypes;
+
+  // Define some indices used to iterate over the arithemetic types in
+  // ArithmeticTypes.  The "promoted arithmetic types" are the arithmetic
   // types are that preserved by promotion (C++ [over.built]p2).
-  static const unsigned FirstIntegralType = 4;
-  static const unsigned LastIntegralType = 21;
-  static const unsigned FirstPromotedIntegralType = 4,
-                        LastPromotedIntegralType = 12;
-  static const unsigned FirstPromotedArithmeticType = 0,
-                        LastPromotedArithmeticType = 12;
-  static const unsigned NumArithmeticTypes = 21;
+  unsigned FirstIntegralType,
+           LastIntegralType;
+  unsigned FirstPromotedIntegralType,
+           LastPromotedIntegralType;
+  unsigned FirstPromotedArithmeticType,
+           LastPromotedArithmeticType;
+  unsigned NumArithmeticTypes;
 
-  /// \brief Get the canonical type for a given arithmetic type index.
-  CanQualType getArithmeticType(unsigned index) {
-    assert(index < NumArithmeticTypes);
-    static CanQualType ASTContext::* const
-      ArithmeticTypes[NumArithmeticTypes] = {
-      // Start of promoted types.
-      &ASTContext::FloatTy,
-      &ASTContext::DoubleTy,
-      &ASTContext::LongDoubleTy,
-      &ASTContext::Float128Ty,
+  void InitArithmeticTypes() {
+    // Start of promoted types.
+    FirstPromotedArithmeticType = 0;
+    ArithmeticTypes.push_back(S.Context.FloatTy);
+    ArithmeticTypes.push_back(S.Context.DoubleTy);
+    ArithmeticTypes.push_back(S.Context.LongDoubleTy);
+    if (S.Context.getTargetInfo().hasFloat128Type())
+      ArithmeticTypes.push_back(S.Context.Float128Ty);
 
-      // Start of integral types.
-      &ASTContext::IntTy,
-      &ASTContext::LongTy,
-      &ASTContext::LongLongTy,
-      &ASTContext::Int128Ty,
-      &ASTContext::UnsignedIntTy,
-      &ASTContext::UnsignedLongTy,
-      &ASTContext::UnsignedLongLongTy,
-      &ASTContext::UnsignedInt128Ty,
-      // End of promoted types.
+    // Start of integral types.
+    FirstIntegralType = ArithmeticTypes.size();
+    FirstPromotedIntegralType = ArithmeticTypes.size();
+    ArithmeticTypes.push_back(S.Context.IntTy);
+    ArithmeticTypes.push_back(S.Context.LongTy);
+    ArithmeticTypes.push_back(S.Context.LongLongTy);
+    if (S.Context.getTargetInfo().hasInt128Type())
+      ArithmeticTypes.push_back(S.Context.Int128Ty);
+    ArithmeticTypes.push_back(S.Context.UnsignedIntTy);
+    ArithmeticTypes.push_back(S.Context.UnsignedLongTy);
+    ArithmeticTypes.push_back(S.Context.UnsignedLongLongTy);
+    if (S.Context.getTargetInfo().hasInt128Type())
+      ArithmeticTypes.push_back(S.Context.UnsignedInt128Ty);
+    LastPromotedIntegralType = ArithmeticTypes.size();
+    LastPromotedArithmeticType = ArithmeticTypes.size();
+    // End of promoted types.
 
-      &ASTContext::BoolTy,
-      &ASTContext::CharTy,
-      &ASTContext::WCharTy,
-      &ASTContext::Char16Ty,
-      &ASTContext::Char32Ty,
-      &ASTContext::SignedCharTy,
-      &ASTContext::ShortTy,
-      &ASTContext::UnsignedCharTy,
-      &ASTContext::UnsignedShortTy,
-      // End of integral types.
-      // FIXME: What about complex? What about half?
-    };
-    return S.Context.*ArithmeticTypes[index];
+    ArithmeticTypes.push_back(S.Context.BoolTy);
+    ArithmeticTypes.push_back(S.Context.CharTy);
+    ArithmeticTypes.push_back(S.Context.WCharTy);
+    ArithmeticTypes.push_back(S.Context.Char16Ty);
+    ArithmeticTypes.push_back(S.Context.Char32Ty);
+    ArithmeticTypes.push_back(S.Context.SignedCharTy);
+    ArithmeticTypes.push_back(S.Context.ShortTy);
+    ArithmeticTypes.push_back(S.Context.UnsignedCharTy);
+    ArithmeticTypes.push_back(S.Context.UnsignedShortTy);
+    LastIntegralType = ArithmeticTypes.size();
+    NumArithmeticTypes = ArithmeticTypes.size();
+    // End of integral types.
+    // FIXME: What about complex? What about half?
+
+    assert(ArithmeticTypes.size() <= ArithmeticTypesCap &&
+           "Enough inline storage for all arithmetic types.");
   }
 
   /// \brief Helper method to factor out the common pattern of adding overloads
@@ -7708,18 +7766,8 @@
         HasArithmeticOrEnumeralCandidateType),
       CandidateTypes(CandidateTypes),
       CandidateSet(CandidateSet) {
-    // Validate some of our static helper constants in debug builds.
-    assert(getArithmeticType(FirstPromotedIntegralType) == S.Context.IntTy &&
-           "Invalid first promoted integral type");
-    assert(getArithmeticType(LastPromotedIntegralType - 1)
-             == S.Context.UnsignedInt128Ty &&
-           "Invalid last promoted integral type");
-    assert(getArithmeticType(FirstPromotedArithmeticType)
-             == S.Context.FloatTy &&
-           "Invalid first promoted arithmetic type");
-    assert(getArithmeticType(LastPromotedArithmeticType - 1)
-             == S.Context.UnsignedInt128Ty &&
-           "Invalid last promoted arithmetic type");
+
+    InitArithmeticTypes();
   }
 
   // C++ [over.built]p3:
@@ -7746,7 +7794,7 @@
     for (unsigned Arith = (Op == OO_PlusPlus? 0 : 1);
          Arith < NumArithmeticTypes; ++Arith) {
       addPlusPlusMinusMinusStyleOverloads(
-        getArithmeticType(Arith),
+        ArithmeticTypes[Arith],
         VisibleTypeConversionsQuals.hasVolatile(),
         VisibleTypeConversionsQuals.hasRestrict());
     }
@@ -7819,7 +7867,7 @@
 
     for (unsigned Arith = FirstPromotedArithmeticType;
          Arith < LastPromotedArithmeticType; ++Arith) {
-      QualType ArithTy = getArithmeticType(Arith);
+      QualType ArithTy = ArithmeticTypes[Arith];
       S.AddBuiltinCandidate(&ArithTy, Args, CandidateSet);
     }
 
@@ -7859,7 +7907,7 @@
 
     for (unsigned Int = FirstPromotedIntegralType;
          Int < LastPromotedIntegralType; ++Int) {
-      QualType IntTy = getArithmeticType(Int);
+      QualType IntTy = ArithmeticTypes[Int];
       S.AddBuiltinCandidate(&IntTy, Args, CandidateSet);
     }
 
@@ -8087,8 +8135,8 @@
          Left < LastPromotedArithmeticType; ++Left) {
       for (unsigned Right = FirstPromotedArithmeticType;
            Right < LastPromotedArithmeticType; ++Right) {
-        QualType LandR[2] = { getArithmeticType(Left),
-                              getArithmeticType(Right) };
+        QualType LandR[2] = { ArithmeticTypes[Left],
+                              ArithmeticTypes[Right] };
         S.AddBuiltinCandidate(LandR, Args, CandidateSet);
       }
     }
@@ -8131,8 +8179,8 @@
          Left < LastPromotedIntegralType; ++Left) {
       for (unsigned Right = FirstPromotedIntegralType;
            Right < LastPromotedIntegralType; ++Right) {
-        QualType LandR[2] = { getArithmeticType(Left),
-                              getArithmeticType(Right) };
+        QualType LandR[2] = { ArithmeticTypes[Left],
+                              ArithmeticTypes[Right] };
         S.AddBuiltinCandidate(LandR, Args, CandidateSet);
       }
     }
@@ -8312,18 +8360,18 @@
       for (unsigned Right = FirstPromotedArithmeticType;
            Right < LastPromotedArithmeticType; ++Right) {
         QualType ParamTypes[2];
-        ParamTypes[1] = getArithmeticType(Right);
+        ParamTypes[1] = ArithmeticTypes[Right];
 
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] =
-          S.Context.getLValueReferenceType(getArithmeticType(Left));
+          S.Context.getLValueReferenceType(ArithmeticTypes[Left]);
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                               /*IsAssigmentOperator=*/isEqualOp);
 
         // Add this built-in operator as a candidate (VQ is 'volatile').
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           ParamTypes[0] =
-            S.Context.getVolatileType(getArithmeticType(Left));
+            S.Context.getVolatileType(ArithmeticTypes[Left]);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
                                 /*IsAssigmentOperator=*/isEqualOp);
@@ -8378,15 +8426,15 @@
       for (unsigned Right = FirstPromotedIntegralType;
            Right < LastPromotedIntegralType; ++Right) {
         QualType ParamTypes[2];
-        ParamTypes[1] = getArithmeticType(Right);
+        ParamTypes[1] = ArithmeticTypes[Right];
 
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] =
-          S.Context.getLValueReferenceType(getArithmeticType(Left));
+          S.Context.getLValueReferenceType(ArithmeticTypes[Left]);
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           // Add this built-in operator as a candidate (VQ is 'volatile').
-          ParamTypes[0] = getArithmeticType(Left);
+          ParamTypes[0] = ArithmeticTypes[Left];
           ParamTypes[0] = S.Context.getVolatileType(ParamTypes[0]);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet);
@@ -8640,7 +8688,7 @@
   case OO_Plus: // '+' is either unary or binary
     if (Args.size() == 1)
       OpBuilder.addUnaryPlusPointerOverloads();
-    // Fall through.
+    LLVM_FALLTHROUGH;
 
   case OO_Minus: // '-' is either unary or binary
     if (Args.size() == 1) {
@@ -8671,7 +8719,7 @@
   case OO_EqualEqual:
   case OO_ExclaimEqual:
     OpBuilder.addEqualEqualOrNotEqualMemberPointerOrNullptrOverloads();
-    // Fall through.
+    LLVM_FALLTHROUGH;
 
   case OO_Less:
   case OO_Greater:
@@ -8681,6 +8729,9 @@
     OpBuilder.addGenericBinaryArithmeticOverloads();
     break;
 
+  case OO_Spaceship:
+    llvm_unreachable("<=> expressions not supported yet");
+
   case OO_Percent:
   case OO_Caret:
   case OO_Pipe:
@@ -8705,12 +8756,12 @@
 
   case OO_Equal:
     OpBuilder.addAssignmentMemberPointerOrEnumeralOverloads();
-    // Fall through.
+    LLVM_FALLTHROUGH;
 
   case OO_PlusEqual:
   case OO_MinusEqual:
     OpBuilder.addAssignmentPointerOverloads(Op == OO_Equal);
-    // Fall through.
+    LLVM_FALLTHROUGH;
 
   case OO_StarEqual:
   case OO_SlashEqual:
@@ -8965,11 +9016,17 @@
     // C++14 [over.match.best]p1 section 2 bullet 3.
   }
 
-  //    -- F1 is generated from a deduction-guide and F2 is not
-  auto *Guide1 = dyn_cast_or_null<CXXDeductionGuideDecl>(Cand1.Function);
-  auto *Guide2 = dyn_cast_or_null<CXXDeductionGuideDecl>(Cand2.Function);
-  if (Guide1 && Guide2 && Guide1->isImplicit() != Guide2->isImplicit())
-    return Guide2->isImplicit();
+  // FIXME: Work around a defect in the C++17 guaranteed copy elision wording,
+  // as combined with the resolution to CWG issue 243.
+  //
+  // When the context is initialization by constructor ([over.match.ctor] or
+  // either phase of [over.match.list]), a constructor is preferred over
+  // a conversion function.
+  if (Kind == OverloadCandidateSet::CSK_InitByConstructor && NumArgs == 1 &&
+      Cand1.Function && Cand2.Function &&
+      isa<CXXConstructorDecl>(Cand1.Function) !=
+          isa<CXXConstructorDecl>(Cand2.Function))
+    return isa<CXXConstructorDecl>(Cand1.Function);
 
   //    -- F1 is a non-template function and F2 is a function template
   //       specialization, or, if not that,
@@ -9015,17 +9072,20 @@
     // Inherited from sibling base classes: still ambiguous.
   }
 
-  // FIXME: Work around a defect in the C++17 guaranteed copy elision wording,
-  // as combined with the resolution to CWG issue 243.
-  //
-  // When the context is initialization by constructor ([over.match.ctor] or
-  // either phase of [over.match.list]), a constructor is preferred over
-  // a conversion function.
-  if (Kind == OverloadCandidateSet::CSK_InitByConstructor && NumArgs == 1 &&
-      Cand1.Function && Cand2.Function &&
-      isa<CXXConstructorDecl>(Cand1.Function) !=
-          isa<CXXConstructorDecl>(Cand2.Function))
-    return isa<CXXConstructorDecl>(Cand1.Function);
+  // Check C++17 tie-breakers for deduction guides.
+  {
+    auto *Guide1 = dyn_cast_or_null<CXXDeductionGuideDecl>(Cand1.Function);
+    auto *Guide2 = dyn_cast_or_null<CXXDeductionGuideDecl>(Cand2.Function);
+    if (Guide1 && Guide2) {
+      //  -- F1 is generated from a deduction-guide and F2 is not
+      if (Guide1->isImplicit() != Guide2->isImplicit())
+        return Guide2->isImplicit();
+
+      //  -- F1 is the copy deduction candidate(16.3.1.8) and F2 is not
+      if (Guide1->isCopyDeductionCandidate())
+        return true;
+    }
+  }
 
   // Check for enable_if value-based overload resolution.
   if (Cand1.Function && Cand2.Function) {
@@ -9352,6 +9412,8 @@
                                  QualType DestType, bool TakingAddress) {
   if (TakingAddress && !checkAddressOfCandidateIsAvailable(*this, Fn))
     return;
+  if (Fn->isMultiVersion() && !Fn->getAttr<TargetAttr>()->isDefaultVersion())
+    return;
 
   std::string FnDesc;
   OverloadCandidateKind K = ClassifyOverloadCandidate(*this, Found, Fn, FnDesc);
@@ -10153,6 +10215,9 @@
     assert(!Available);
     break;
   }
+  case ovl_non_default_multiversion_function:
+    // Do nothing, these should simply be ignored.
+    break;
   }
 }
 
@@ -10470,9 +10535,8 @@
   }
 }
 
-/// PrintOverloadCandidates - When overload resolution fails, prints
-/// diagnostic messages containing the candidates in the candidate
-/// set.
+/// When overload resolution fails, prints diagnostic messages containing the
+/// candidates in the candidate set.
 void OverloadCandidateSet::NoteCandidates(
     Sema &S, OverloadCandidateDisplayKind OCD, ArrayRef<Expr *> Args,
     StringRef Opc, SourceLocation OpLoc,
@@ -10495,7 +10559,7 @@
     }
   }
 
-  std::sort(Cands.begin(), Cands.end(),
+  std::stable_sort(Cands.begin(), Cands.end(),
             CompareOverloadCandidatesForDisplay(S, OpLoc, Args.size(), Kind));
 
   bool ReportedAmbiguousConversions = false;
@@ -10676,7 +10740,7 @@
     return true;
 
   auto *FPT = FD->getType()->castAs<FunctionProtoType>();
-  if (S.getLangOpts().CPlusPlus1z &&
+  if (S.getLangOpts().CPlusPlus17 &&
       isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
       !S.ResolveExceptionSpec(Loc, FPT))
     return true;
@@ -10889,6 +10953,12 @@
         if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext))
           if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl))
             return false;
+      if (FunDecl->isMultiVersion()) {
+        const auto *TA = FunDecl->getAttr<TargetAttr>();
+        assert(TA && "Multiversioned functions require a target attribute");
+        if (!TA->isDefaultVersion())
+          return false;
+      }
 
       // If any candidate has a placeholder return type, trigger its deduction
       // now.
@@ -11286,9 +11356,6 @@
   return Matched;
 }
 
-
-
-
 // Resolve and fix an overloaded expression that can be resolved
 // because it identifies a single function template specialization.
 //
@@ -11955,7 +12022,7 @@
   if (Input->isTypeDependent()) {
     if (Fns.empty())
       return new (Context) UnaryOperator(Input, Opc, Context.DependentTy,
-                                         VK_RValue, OK_Ordinary, OpLoc);
+                                         VK_RValue, OK_Ordinary, OpLoc, false);
 
     CXXRecordDecl *NamingClass = nullptr; // lookup ignores member operators
     UnresolvedLookupExpr *Fn
@@ -13485,7 +13552,7 @@
 
         return new (Context) UnaryOperator(SubExpr, UO_AddrOf, MemPtrType,
                                            VK_RValue, OK_Ordinary,
-                                           UnOp->getOperatorLoc());
+                                           UnOp->getOperatorLoc(), false);
       }
     }
     Expr *SubExpr = FixOverloadedFunctionReference(UnOp->getSubExpr(),
@@ -13496,7 +13563,7 @@
     return new (Context) UnaryOperator(SubExpr, UO_AddrOf,
                                      Context.getPointerType(SubExpr->getType()),
                                        VK_RValue, OK_Ordinary,
-                                       UnOp->getOperatorLoc());
+                                       UnOp->getOperatorLoc(), false);
   }
 
   // C++ [except.spec]p17:
diff --git a/lib/Sema/SemaPseudoObject.cpp b/lib/Sema/SemaPseudoObject.cpp
index d159172..f09a6f7 100644
--- a/lib/Sema/SemaPseudoObject.cpp
+++ b/lib/Sema/SemaPseudoObject.cpp
@@ -132,7 +132,8 @@
                                              uop->getType(),
                                              uop->getValueKind(),
                                              uop->getObjectKind(),
-                                             uop->getOperatorLoc());
+                                             uop->getOperatorLoc(),
+                                             uop->canOverflow());
       }
 
       if (GenericSelectionExpr *gse = dyn_cast<GenericSelectionExpr>(e)) {
@@ -524,15 +525,18 @@
   addSemanticExpr(result.get());
   if (UnaryOperator::isPrefix(opcode) && !captureSetValueAsResult() &&
       !result.get()->getType()->isVoidType() &&
-      (result.get()->isTypeDependent() || CanCaptureValue(result.get())))
-    setResultToLastSemantic();
-
-  UnaryOperator *syntactic =
-    new (S.Context) UnaryOperator(syntacticOp, opcode, resultType,
-                                  VK_LValue, OK_Ordinary, opcLoc);
-  return complete(syntactic);
-}
-
+      (result.get()->isTypeDependent() || CanCaptureValue(result.get())))
+    setResultToLastSemantic();
+
+  UnaryOperator *syntactic = new (S.Context) UnaryOperator(
+      syntacticOp, opcode, resultType, VK_LValue, OK_Ordinary, opcLoc,
+      !resultType->isDependentType()
+          ? S.Context.getTypeSize(resultType) >=
+                S.Context.getTypeSize(S.Context.IntTy)
+          : false);
+  return complete(syntactic);
+}
+
 
 //===----------------------------------------------------------------------===//
 //  Objective-C @property and implicit property references
@@ -1442,8 +1446,7 @@
 
 Expr *MSPropertyOpBuilder::rebuildAndCaptureObject(Expr *syntacticBase) {
   InstanceBase = capture(RefExpr->getBaseExpr());
-  std::for_each(CallArgs.begin(), CallArgs.end(),
-                [this](Expr *&Arg) { Arg = capture(Arg); });
+  llvm::for_each(CallArgs, [this](Expr *&Arg) { Arg = capture(Arg); });
   syntacticBase = Rebuilder(S, [=](Expr *, unsigned Idx) -> Expr * {
                     switch (Idx) {
                     case 0:
@@ -1551,7 +1554,7 @@
   // Do nothing if the operand is dependent.
   if (op->isTypeDependent())
     return new (Context) UnaryOperator(op, opcode, Context.DependentTy,
-                                       VK_RValue, OK_Ordinary, opcLoc);
+                                       VK_RValue, OK_Ordinary, opcLoc, false);
 
   assert(UnaryOperator::isIncrementDecrementOp(opcode));
   Expr *opaqueRef = op->IgnoreParens();
@@ -1631,15 +1634,15 @@
 /// capable of rebuilding a tree without stripping implicit
 /// operations.
 Expr *Sema::recreateSyntacticForm(PseudoObjectExpr *E) {
-  Expr *syntax = E->getSyntacticForm();
-  if (UnaryOperator *uop = dyn_cast<UnaryOperator>(syntax)) {
-    Expr *op = stripOpaqueValuesFromPseudoObjectRef(*this, uop->getSubExpr());
-    return new (Context) UnaryOperator(op, uop->getOpcode(), uop->getType(),
-                                       uop->getValueKind(), uop->getObjectKind(),
-                                       uop->getOperatorLoc());
-  } else if (CompoundAssignOperator *cop
-               = dyn_cast<CompoundAssignOperator>(syntax)) {
-    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(*this, cop->getLHS());
+  Expr *syntax = E->getSyntacticForm();
+  if (UnaryOperator *uop = dyn_cast<UnaryOperator>(syntax)) {
+    Expr *op = stripOpaqueValuesFromPseudoObjectRef(*this, uop->getSubExpr());
+    return new (Context) UnaryOperator(
+        op, uop->getOpcode(), uop->getType(), uop->getValueKind(),
+        uop->getObjectKind(), uop->getOperatorLoc(), uop->canOverflow());
+  } else if (CompoundAssignOperator *cop
+               = dyn_cast<CompoundAssignOperator>(syntax)) {
+    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(*this, cop->getLHS());
     Expr *rhs = cast<OpaqueValueExpr>(cop->getRHS())->getSourceExpr();
     return new (Context) CompoundAssignOperator(lhs, rhs, cop->getOpcode(),
                                                 cop->getType(),
diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp
index 3a3eb5e..b770185 100644
--- a/lib/Sema/SemaStmt.cpp
+++ b/lib/Sema/SemaStmt.cpp
@@ -14,6 +14,7 @@
 #include "clang/Sema/SemaInternal.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
+#include "clang/AST/ASTLambda.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclObjC.h"
@@ -127,34 +128,47 @@
 /// warning from firing.
 static bool DiagnoseUnusedComparison(Sema &S, const Expr *E) {
   SourceLocation Loc;
-  bool IsNotEqual, CanAssign, IsRelational;
+  bool CanAssign;
+  enum { Equality, Inequality, Relational, ThreeWay } Kind;
 
   if (const BinaryOperator *Op = dyn_cast<BinaryOperator>(E)) {
     if (!Op->isComparisonOp())
       return false;
 
-    IsRelational = Op->isRelationalOp();
+    if (Op->getOpcode() == BO_EQ)
+      Kind = Equality;
+    else if (Op->getOpcode() == BO_NE)
+      Kind = Inequality;
+    else if (Op->getOpcode() == BO_Cmp)
+      Kind = ThreeWay;
+    else {
+      assert(Op->isRelationalOp());
+      Kind = Relational;
+    }
     Loc = Op->getOperatorLoc();
-    IsNotEqual = Op->getOpcode() == BO_NE;
     CanAssign = Op->getLHS()->IgnoreParenImpCasts()->isLValue();
   } else if (const CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E)) {
     switch (Op->getOperator()) {
-    default:
-      return false;
     case OO_EqualEqual:
+      Kind = Equality;
+      break;
     case OO_ExclaimEqual:
-      IsRelational = false;
+      Kind = Inequality;
       break;
     case OO_Less:
     case OO_Greater:
     case OO_GreaterEqual:
     case OO_LessEqual:
-      IsRelational = true;
+      Kind = Relational;
       break;
+    case OO_Spaceship:
+      Kind = ThreeWay;
+      break;
+    default:
+      return false;
     }
 
     Loc = Op->getOperatorLoc();
-    IsNotEqual = Op->getOperator() == OO_ExclaimEqual;
     CanAssign = Op->getArg(0)->IgnoreParenImpCasts()->isLValue();
   } else {
     // Not a typo-prone comparison.
@@ -167,15 +181,15 @@
     return false;
 
   S.Diag(Loc, diag::warn_unused_comparison)
-    << (unsigned)IsRelational << (unsigned)IsNotEqual << E->getSourceRange();
+    << (unsigned)Kind << E->getSourceRange();
 
   // If the LHS is a plausible entity to assign to, provide a fixit hint to
   // correct common typos.
-  if (!IsRelational && CanAssign) {
-    if (IsNotEqual)
+  if (CanAssign) {
+    if (Kind == Inequality)
       S.Diag(Loc, diag::note_inequality_comparison_to_or_assign)
         << FixItHint::CreateReplacement(Loc, "|=");
-    else
+    else if (Kind == Equality)
       S.Diag(Loc, diag::note_equality_comparison_to_assign)
         << FixItHint::CreateReplacement(Loc, "=");
   }
@@ -323,8 +337,8 @@
   DiagRuntimeBehavior(Loc, nullptr, PDiag(DiagID) << R1 << R2);
 }
 
-void Sema::ActOnStartOfCompoundStmt() {
-  PushCompoundScope();
+void Sema::ActOnStartOfCompoundStmt(bool IsStmtExpr) {
+  PushCompoundScope(IsStmtExpr);
 }
 
 void Sema::ActOnFinishOfCompoundStmt() {
@@ -375,7 +389,7 @@
       DiagnoseEmptyLoopBody(Elts[i], Elts[i + 1]);
   }
 
-  return new (Context) CompoundStmt(Context, Elts, L, R);
+  return CompoundStmt::Create(Context, Elts, L, R);
 }
 
 StmtResult
@@ -2011,7 +2025,7 @@
 
 /// Build a variable declaration for a for-range statement.
 VarDecl *BuildForRangeVarDecl(Sema &SemaRef, SourceLocation Loc,
-                              QualType Type, const char *Name) {
+                              QualType Type, StringRef Name) {
   DeclContext *DC = SemaRef.CurContext;
   IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name);
   TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc);
@@ -2080,10 +2094,12 @@
   }
 
   // Build  auto && __range = range-init
+  // Divide by 2, since the variables are in the inner scope (loop body).
+  const auto DepthStr = std::to_string(S->getDepth() / 2);
   SourceLocation RangeLoc = Range->getLocStart();
   VarDecl *RangeVar = BuildForRangeVarDecl(*this, RangeLoc,
                                            Context.getAutoRRefDeductType(),
-                                           "__range");
+                                           std::string("__range") + DepthStr);
   if (FinishForRangeVarDecl(*this, RangeVar, Range, RangeLoc,
                             diag::err_for_range_deduction_failure)) {
     LoopVar->setInvalidDecl();
@@ -2326,10 +2342,12 @@
       return StmtError();
 
     // Build auto __begin = begin-expr, __end = end-expr.
+    // Divide by 2, since the variables are in the inner scope (loop body).
+    const auto DepthStr = std::to_string(S->getDepth() / 2);
     VarDecl *BeginVar = BuildForRangeVarDecl(*this, ColonLoc, AutoType,
-                                             "__begin");
+                                             std::string("__begin") + DepthStr);
     VarDecl *EndVar = BuildForRangeVarDecl(*this, ColonLoc, AutoType,
-                                           "__end");
+                                           std::string("__end") + DepthStr);
 
     // Build begin-expr and end-expr and attach to __begin and __end variables.
     ExprResult BeginExpr, EndExpr;
@@ -2480,7 +2498,7 @@
     // C++1z removes this restriction.
     QualType BeginType = BeginVar->getType(), EndType = EndVar->getType();
     if (!Context.hasSameType(BeginType, EndType)) {
-      Diag(RangeLoc, getLangOpts().CPlusPlus1z
+      Diag(RangeLoc, getLangOpts().CPlusPlus17
                          ? diag::warn_for_range_begin_end_types_differ
                          : diag::ext_for_range_begin_end_types_differ)
           << BeginType << EndType;
@@ -3215,6 +3233,12 @@
                                             SourceLocation ReturnLoc,
                                             Expr *&RetExpr,
                                             AutoType *AT) {
+  // If this is the conversion function for a lambda, we choose to deduce it
+  // type from the corresponding call operator, not from the synthesized return
+  // statement within it. See Sema::DeduceReturnType.
+  if (isLambdaConversionOperator(FD))
+    return false;
+
   TypeLoc OrigResultType = getReturnTypeLoc(FD);
   QualType Deduced;
 
diff --git a/lib/Sema/SemaStmtAttr.cpp b/lib/Sema/SemaStmtAttr.cpp
index d43c8c3..e55e20c 100644
--- a/lib/Sema/SemaStmtAttr.cpp
+++ b/lib/Sema/SemaStmtAttr.cpp
@@ -43,9 +43,9 @@
     return nullptr;
   }
 
-  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // If this is spelled as the standard C++17 attribute, but not in C++17, warn
   // about using it as an extension.
-  if (!S.getLangOpts().CPlusPlus1z && A.isCXX11Attribute() &&
+  if (!S.getLangOpts().CPlusPlus17 && A.isCXX11Attribute() &&
       !A.getScopeName())
     S.Diag(A.getLoc(), diag::ext_cxx17_attr) << A.getName();
 
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp
index 46cda5e..2e8a706 100644
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -169,16 +169,16 @@
   MemberOfUnknownSpecialization = false;
 
   switch (Name.getKind()) {
-  case UnqualifiedId::IK_Identifier:
+  case UnqualifiedIdKind::IK_Identifier:
     TName = DeclarationName(Name.Identifier);
     break;
 
-  case UnqualifiedId::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
     TName = Context.DeclarationNames.getCXXOperatorName(
                                               Name.OperatorFunctionId.Operator);
     break;
 
-  case UnqualifiedId::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
     TName = Context.DeclarationNames.getCXXLiteralOperatorName(Name.Identifier);
     break;
 
@@ -783,6 +783,56 @@
     SemaRef.DiagnoseTemplateParameterShadow(Loc, PrevDecl);
 }
 
+/// Convert a parsed type into a parsed template argument. This is mostly
+/// trivial, except that we may have parsed a C++17 deduced class template
+/// specialization type, in which case we should form a template template
+/// argument instead of a type template argument.
+ParsedTemplateArgument Sema::ActOnTemplateTypeArgument(TypeResult ParsedType) {
+  TypeSourceInfo *TInfo;
+  QualType T = GetTypeFromParser(ParsedType.get(), &TInfo);
+  if (T.isNull())
+    return ParsedTemplateArgument();
+  assert(TInfo && "template argument with no location");
+
+  // If we might have formed a deduced template specialization type, convert
+  // it to a template template argument.
+  if (getLangOpts().CPlusPlus17) {
+    TypeLoc TL = TInfo->getTypeLoc();
+    SourceLocation EllipsisLoc;
+    if (auto PET = TL.getAs<PackExpansionTypeLoc>()) {
+      EllipsisLoc = PET.getEllipsisLoc();
+      TL = PET.getPatternLoc();
+    }
+
+    CXXScopeSpec SS;
+    if (auto ET = TL.getAs<ElaboratedTypeLoc>()) {
+      SS.Adopt(ET.getQualifierLoc());
+      TL = ET.getNamedTypeLoc();
+    }
+
+    if (auto DTST = TL.getAs<DeducedTemplateSpecializationTypeLoc>()) {
+      TemplateName Name = DTST.getTypePtr()->getTemplateName();
+      if (SS.isSet())
+        Name = Context.getQualifiedTemplateName(SS.getScopeRep(),
+                                                /*HasTemplateKeyword*/ false,
+                                                Name.getAsTemplateDecl());
+      ParsedTemplateArgument Result(SS, TemplateTy::make(Name),
+                                    DTST.getTemplateNameLoc());
+      if (EllipsisLoc.isValid())
+        Result = Result.getTemplatePackExpansion(EllipsisLoc);
+      return Result;
+    }
+  }
+
+  // This is a normal type template argument. Note, if the type template
+  // argument is an injected-class-name for a template, it has a dual nature
+  // and can be used as either a type or a template. We handle that in 
+  // convertTypeTemplateArgumentToTemplate.
+  return ParsedTemplateArgument(ParsedTemplateArgument::Type,
+                                ParsedType.get().getAsOpaquePtr(),
+                                TInfo->getTypeLoc().getLocStart());
+}
+
 /// ActOnTypeParameter - Called when a C++ template type parameter
 /// (e.g., "typename T") has been parsed. Typename specifies whether
 /// the keyword "typename" was used to declare the type parameter
@@ -792,7 +842,7 @@
 /// ParamNameLoc is the location of the parameter name (if any).
 /// If the type parameter has a default argument, it will be added
 /// later via ActOnTypeParameterDefault.
-Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
+NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
                                SourceLocation EllipsisLoc,
                                SourceLocation KeyLoc,
                                IdentifierInfo *ParamName,
@@ -922,13 +972,67 @@
   return QualType();
 }
 
-Decl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
+NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
                                           unsigned Depth,
                                           unsigned Position,
                                           SourceLocation EqualLoc,
                                           Expr *Default) {
   TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
 
+  // Check that we have valid decl-specifiers specified.
+  auto CheckValidDeclSpecifiers = [this, &D] {
+    // C++ [temp.param]
+    // p1 
+    //   template-parameter:
+    //     ...
+    //     parameter-declaration
+    // p2 
+    //   ... A storage class shall not be specified in a template-parameter
+    //   declaration.
+    // [dcl.typedef]p1: 
+    //   The typedef specifier [...] shall not be used in the decl-specifier-seq
+    //   of a parameter-declaration
+    const DeclSpec &DS = D.getDeclSpec();
+    auto EmitDiag = [this](SourceLocation Loc) {
+      Diag(Loc, diag::err_invalid_decl_specifier_in_nontype_parm)
+          << FixItHint::CreateRemoval(Loc);
+    };
+    if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified)
+      EmitDiag(DS.getStorageClassSpecLoc());
+    
+    if (DS.getThreadStorageClassSpec() != TSCS_unspecified)
+      EmitDiag(DS.getThreadStorageClassSpecLoc());
+    
+    // [dcl.inline]p1: 
+    //   The inline specifier can be applied only to the declaration or 
+    //   definition of a variable or function.
+    
+    if (DS.isInlineSpecified())
+      EmitDiag(DS.getInlineSpecLoc());
+    
+    // [dcl.constexpr]p1:
+    //   The constexpr specifier shall be applied only to the definition of a 
+    //   variable or variable template or the declaration of a function or 
+    //   function template.
+    
+    if (DS.isConstexprSpecified())
+      EmitDiag(DS.getConstexprSpecLoc());
+
+    // [dcl.fct.spec]p1:
+    //   Function-specifiers can be used only in function declarations.
+
+    if (DS.isVirtualSpecified())
+      EmitDiag(DS.getVirtualSpecLoc());
+
+    if (DS.isExplicitSpecified())
+      EmitDiag(DS.getExplicitSpecLoc());
+
+    if (DS.isNoreturnSpecified())
+      EmitDiag(DS.getNoreturnSpecLoc());
+  };
+
+  CheckValidDeclSpecifiers();
+  
   if (TInfo->getType()->isUndeducedType()) {
     Diag(D.getIdentifierLoc(),
          diag::warn_cxx14_compat_template_nontype_parm_auto_type)
@@ -999,7 +1103,7 @@
 /// ActOnTemplateTemplateParameter - Called when a C++ template template
 /// parameter (e.g. T in template <template \<typename> class T> class array)
 /// has been parsed. S is the current scope.
-Decl *Sema::ActOnTemplateTemplateParameter(Scope* S,
+NamedDecl *Sema::ActOnTemplateTemplateParameter(Scope* S,
                                            SourceLocation TmpLoc,
                                            TemplateParameterList *Params,
                                            SourceLocation EllipsisLoc,
@@ -1837,7 +1941,6 @@
   // for which some class template parameter without a default argument never
   // appears in a deduced context).
   bool AddedAny = false;
-  bool AddedCopyOrMove = false;
   for (NamedDecl *D : LookupConstructors(Transform.Primary)) {
     D = D->getUnderlyingDecl();
     if (D->isInvalidDecl() || D->isImplicit())
@@ -1854,20 +1957,22 @@
 
     Transform.transformConstructor(FTD, CD);
     AddedAny = true;
-
-    AddedCopyOrMove |= CD->isCopyOrMoveConstructor();
   }
 
-  // Synthesize an X() -> X<...> guide if there were no declared constructors.
-  // FIXME: The standard doesn't say (how) to do this.
+  // C++17 [over.match.class.deduct]
+  //    --  If C is not defined or does not declare any constructors, an
+  //    additional function template derived as above from a hypothetical
+  //    constructor C().
   if (!AddedAny)
     Transform.buildSimpleDeductionGuide(None);
 
-  // Synthesize an X(X<...>) -> X<...> guide if there was no declared constructor
-  // resembling a copy or move constructor.
-  // FIXME: The standard doesn't say (how) to do this.
-  if (!AddedCopyOrMove)
-    Transform.buildSimpleDeductionGuide(Transform.DeducedType);
+  //    -- An additional function template derived as above from a hypothetical
+  //    constructor C(C), called the copy deduction candidate.
+  cast<CXXDeductionGuideDecl>(
+      cast<FunctionTemplateDecl>(
+          Transform.buildSimpleDeductionGuide(Transform.DeducedType))
+          ->getTemplatedDecl())
+      ->setIsCopyDeductionCandidate();
 }
 
 /// \brief Diagnose the presence of a default template argument on a
@@ -3421,7 +3526,7 @@
                            const llvm::SmallBitVector &DeducibleParams) {
   for (unsigned I = 0, N = DeducibleParams.size(); I != N; ++I) {
     if (!DeducibleParams[I]) {
-      NamedDecl *Param = cast<NamedDecl>(TemplateParams->getParam(I));
+      NamedDecl *Param = TemplateParams->getParam(I);
       if (Param->getDeclName())
         S.Diag(Param->getLocation(), diag::note_non_deducible_parameter)
             << Param->getDeclName();
@@ -3502,7 +3607,7 @@
     TemplateParameterList *TemplateParams, StorageClass SC,
     bool IsPartialSpecialization) {
   // D must be variable template id.
-  assert(D.getName().getKind() == UnqualifiedId::IK_TemplateId &&
+  assert(D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId &&
          "Variable template specialization is declared with a template it.");
 
   TemplateIdAnnotation *TemplateId = D.getName().TemplateId;
@@ -4029,8 +4134,8 @@
       // We found something; return it.
       auto *LookupRD = dyn_cast<CXXRecordDecl>(LookupCtx);
       if (!AllowInjectedClassName && SS.isSet() && LookupRD &&
-          Name.getKind() == UnqualifiedId::IK_Identifier && Name.Identifier &&
-          LookupRD->getIdentifier() == Name.Identifier) {
+          Name.getKind() == UnqualifiedIdKind::IK_Identifier &&
+          Name.Identifier && LookupRD->getIdentifier() == Name.Identifier) {
         // C++14 [class.qual]p2:
         //   In a lookup in which function names are not ignored and the
         //   nested-name-specifier nominates a class C, if the name specified
@@ -4052,17 +4157,17 @@
   NestedNameSpecifier *Qualifier = SS.getScopeRep();
 
   switch (Name.getKind()) {
-  case UnqualifiedId::IK_Identifier:
+  case UnqualifiedIdKind::IK_Identifier:
     Result = TemplateTy::make(Context.getDependentTemplateName(Qualifier,
                                                               Name.Identifier));
     return TNK_Dependent_template_name;
 
-  case UnqualifiedId::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
     Result = TemplateTy::make(Context.getDependentTemplateName(Qualifier,
                                              Name.OperatorFunctionId.Operator));
     return TNK_Function_template;
 
-  case UnqualifiedId::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
     llvm_unreachable("literal operator id cannot have a dependent scope");
 
   default:
@@ -4093,11 +4198,12 @@
     ArgType = Arg.getAsType();
     TSI = AL.getTypeSourceInfo();
     break;
-  case TemplateArgument::Template: {
+  case TemplateArgument::Template:
+  case TemplateArgument::TemplateExpansion: {
     // We have a template type parameter but the template argument
     // is a template without any arguments.
     SourceRange SR = AL.getSourceRange();
-    TemplateName Name = Arg.getAsTemplate();
+    TemplateName Name = Arg.getAsTemplateOrTemplatePattern();
     Diag(SR.getBegin(), diag::err_template_missing_args)
       << (int)getTemplateNameKindForDiagnostics(Name) << Name << SR;
     if (TemplateDecl *Decl = Name.getAsTemplateDecl())
@@ -4804,7 +4910,12 @@
   // template.
   TemplateArgumentListInfo NewArgs = TemplateArgs;
 
-  TemplateParameterList *Params = Template->getTemplateParameters();
+  // Make sure we get the template parameter list from the most
+  // recentdeclaration, since that is the only one that has is guaranteed to
+  // have all the default template argument information.
+  TemplateParameterList *Params =
+      cast<TemplateDecl>(Template->getMostRecentDecl())
+          ->getTemplateParameters();
 
   SourceLocation RAngleLoc = NewArgs.getRAngleLoc();
 
@@ -5823,17 +5934,16 @@
   }
   // A constant of pointer-to-member type.
   else if ((DRE = dyn_cast<DeclRefExpr>(Arg))) {
-    if (ValueDecl *VD = dyn_cast<ValueDecl>(DRE->getDecl())) {
-      if (VD->getType()->isMemberPointerType()) {
-        if (isa<NonTypeTemplateParmDecl>(VD)) {
-          if (Arg->isTypeDependent() || Arg->isValueDependent()) {
-            Converted = TemplateArgument(Arg);
-          } else {
-            VD = cast<ValueDecl>(VD->getCanonicalDecl());
-            Converted = TemplateArgument(VD, ParamType);
-          }
-          return Invalid;
+    ValueDecl *VD = DRE->getDecl();
+    if (VD->getType()->isMemberPointerType()) {
+      if (isa<NonTypeTemplateParmDecl>(VD)) {
+        if (Arg->isTypeDependent() || Arg->isValueDependent()) {
+          Converted = TemplateArgument(Arg);
+        } else {
+          VD = cast<ValueDecl>(VD->getCanonicalDecl());
+          Converted = TemplateArgument(VD, ParamType);
         }
+        return Invalid;
       }
     }
 
@@ -5917,7 +6027,7 @@
   SourceLocation StartLoc = Arg->getLocStart();
 
   // If the parameter type somehow involves auto, deduce the type now.
-  if (getLangOpts().CPlusPlus1z && ParamType->isUndeducedType()) {
+  if (getLangOpts().CPlusPlus17 && ParamType->isUndeducedType()) {
     // During template argument deduction, we allow 'decltype(auto)' to
     // match an arbitrary dependent argument.
     // FIXME: The language rules don't say what happens in this case.
@@ -6001,8 +6111,8 @@
   EnterExpressionEvaluationContext ConstantEvaluated(
       *this, Sema::ExpressionEvaluationContext::ConstantEvaluated);
 
-  if (getLangOpts().CPlusPlus1z) {
-    // C++1z [temp.arg.nontype]p1:
+  if (getLangOpts().CPlusPlus17) {
+    // C++17 [temp.arg.nontype]p1:
     //   A template-argument for a non-type template parameter shall be
     //   a converted constant expression of the type of the template-parameter.
     APValue Value;
@@ -6524,7 +6634,7 @@
   assert(Arg.getKind() == TemplateArgument::Declaration &&
          "Only declaration template arguments permitted here");
 
-  ValueDecl *VD = cast<ValueDecl>(Arg.getAsDecl());
+  ValueDecl *VD = Arg.getAsDecl();
 
   if (VD->getDeclContext()->isRecord() &&
       (isa<CXXMethodDecl>(VD) || isa<FieldDecl>(VD) ||
@@ -8031,15 +8141,6 @@
   // Ignore access information;  it doesn't figure into redeclaration checking.
   FunctionDecl *Specialization = cast<FunctionDecl>(*Result);
 
-  // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare [...]
-  // an explicit specialization (14.8.3) [...] of a concept definition.
-  if (Specialization->getPrimaryTemplate()->isConcept()) {
-    Diag(FD->getLocation(), diag::err_concept_specialized)
-        << 0 /*function*/ << 1 /*explicitly specialized*/;
-    Diag(Specialization->getLocation(), diag::note_previous_declaration);
-    return true;
-  }
-
   FunctionTemplateSpecializationInfo *SpecInfo
     = Specialization->getTemplateSpecializationInfo();
   assert(SpecInfo && "Function template specialization info missing?");
@@ -8926,15 +9027,6 @@
     Diag(D.getDeclSpec().getConstexprSpecLoc(),
          diag::err_explicit_instantiation_constexpr);
 
-  // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
-  // applied only to the definition of a function template or variable template,
-  // declared in namespace scope.
-  if (D.getDeclSpec().isConceptSpecified()) {
-    Diag(D.getDeclSpec().getConceptSpecLoc(),
-         diag::err_concept_specified_specialization) << 0;
-    return true;
-  }
-
   // A deduction guide is not on the list of entities that can be explicitly
   // instantiated.
   if (Name.getNameKind() == DeclarationName::CXXDeductionGuideName) {
@@ -9002,7 +9094,7 @@
         return true;
       }
 
-      if (D.getName().getKind() != UnqualifiedId::IK_TemplateId) {
+      if (D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId) {
         // C++1y [temp.explicit]p3:
         //   If the explicit instantiation is for a variable, the unqualified-id
         //   in the declaration shall be a template-id.
@@ -9014,15 +9106,6 @@
         return true;
       }
 
-      // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare an
-      // explicit instantiation (14.8.2) [...] of a concept definition.
-      if (PrevTemplate->isConcept()) {
-        Diag(D.getIdentifierLoc(), diag::err_concept_specialized)
-            << 1 /*variable*/ << 0 /*explicitly instantiated*/;
-        Diag(PrevTemplate->getLocation(), diag::note_previous_declaration);
-        return true;
-      }
-
       // Translate the parser's template argument list into our AST format.
       TemplateArgumentListInfo TemplateArgs =
           makeTemplateArgumentListInfo(*this, *D.getName().TemplateId);
@@ -9065,7 +9148,6 @@
 
     if (!HasNoEffect) {
       // Instantiate static data member or variable template.
-
       Prev->setTemplateSpecializationKind(TSK, D.getIdentifierLoc());
       if (PrevTemplate) {
         // Merge attributes.
@@ -9094,7 +9176,7 @@
   // argument list into our AST format.
   bool HasExplicitTemplateArgs = false;
   TemplateArgumentListInfo TemplateArgs;
-  if (D.getName().getKind() == UnqualifiedId::IK_TemplateId) {
+  if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
     TemplateArgs = makeTemplateArgumentListInfo(*this, *D.getName().TemplateId);
     HasExplicitTemplateArgs = true;
   }
@@ -9233,10 +9315,18 @@
       return (Decl*) nullptr;
   }
 
-  Specialization->setTemplateSpecializationKind(TSK, D.getIdentifierLoc());
   if (Attr)
     ProcessDeclAttributeList(S, Specialization, Attr);
 
+  // In MSVC mode, dllimported explicit instantiation definitions are treated as
+  // instantiation declarations.
+  if (TSK == TSK_ExplicitInstantiationDefinition &&
+      Specialization->hasAttr<DLLImportAttr>() &&
+      Context.getTargetInfo().getCXXABI().isMicrosoft())
+    TSK = TSK_ExplicitInstantiationDeclaration;
+
+  Specialization->setTemplateSpecializationKind(TSK, D.getIdentifierLoc());
+
   if (Specialization->isDefined()) {
     // Let the ASTConsumer know that this function has been explicitly
     // instantiated now, and its linkage might have changed.
@@ -9252,23 +9342,13 @@
   //
   // C++98 has the same restriction, just worded differently.
   FunctionTemplateDecl *FunTmpl = Specialization->getPrimaryTemplate();
-  if (D.getName().getKind() != UnqualifiedId::IK_TemplateId && !FunTmpl &&
+  if (D.getName().getKind() != UnqualifiedIdKind::IK_TemplateId && !FunTmpl &&
       D.getCXXScopeSpec().isSet() &&
       !ScopeSpecifierHasTemplateId(D.getCXXScopeSpec()))
     Diag(D.getIdentifierLoc(),
          diag::ext_explicit_instantiation_without_qualified_id)
     << Specialization << D.getCXXScopeSpec().getRange();
 
-  // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare an
-  // explicit instantiation (14.8.2) [...] of a concept definition.
-  if (FunTmpl && FunTmpl->isConcept() &&
-      !D.getDeclSpec().isConceptSpecified()) {
-    Diag(D.getIdentifierLoc(), diag::err_concept_specialized)
-        << 0 /*function*/ << 0 /*explicitly instantiated*/;
-    Diag(FunTmpl->getLocation(), diag::note_previous_declaration);
-    return true;
-  }
-
   CheckExplicitInstantiationScope(*this,
                    FunTmpl? (NamedDecl *)FunTmpl
                           : Specialization->getInstantiatedFromMemberFunction(),
@@ -9448,8 +9528,7 @@
       EnableIfTy.getAs<TemplateSpecializationTypeLoc>();
   if (!EnableIfTSTLoc || EnableIfTSTLoc.getNumArgs() == 0)
     return false;
-  const TemplateSpecializationType *EnableIfTST =
-    cast<TemplateSpecializationType>(EnableIfTSTLoc.getTypePtr());
+  const TemplateSpecializationType *EnableIfTST = EnableIfTSTLoc.getTypePtr();
 
   // ... which names a complete class template declaration...
   const TemplateDecl *EnableIfDecl =
@@ -9607,7 +9686,7 @@
     //   A type-specifier of the form
     //     typename[opt] nested-name-specifier[opt] template-name
     //   is a placeholder for a deduced class type [...].
-    if (getLangOpts().CPlusPlus1z) {
+    if (getLangOpts().CPlusPlus17) {
       if (auto *TD = getAsTypeTemplateDecl(Result.getFoundDecl())) {
         return Context.getElaboratedType(
             Keyword, QualifierLoc.getNestedNameSpecifier(),
diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp
index 51f21be..1823259 100644
--- a/lib/Sema/SemaTemplateDeduction.cpp
+++ b/lib/Sema/SemaTemplateDeduction.cpp
@@ -1,33 +1,64 @@
-//===------- SemaTemplateDeduction.cpp - Template Argument Deduction ------===/
+//===- SemaTemplateDeduction.cpp - Template Argument Deduction ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===/
 //
-//  This file implements C++ template argument deduction.
+//===----------------------------------------------------------------------===//
 //
-//===----------------------------------------------------------------------===/
+// This file implements C++ template argument deduction.
+//
+//===----------------------------------------------------------------------===//
 
 #include "clang/Sema/TemplateDeduction.h"
 #include "TreeTransform.h"
+#include "TypeLocBuilder.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
-#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclAccessPair.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/AST/TypeOrdering.h"
-#include "clang/Sema/DeclSpec.h"
+#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
+#include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/AST/UnresolvedSet.h"
+#include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/ExceptionSpecificationType.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Sema/Ownership.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/Template.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <utility>
 
 namespace clang {
-  using namespace sema;
+
   /// \brief Various flags that control template argument deduction.
   ///
   /// These flags can be bitwise-OR'd together.
@@ -36,24 +67,30 @@
     /// strictest results for template argument deduction (as used for, e.g.,
     /// matching class template partial specializations).
     TDF_None = 0,
+
     /// \brief Within template argument deduction from a function call, we are
     /// matching with a parameter type for which the original parameter was
     /// a reference.
     TDF_ParamWithReferenceType = 0x1,
+
     /// \brief Within template argument deduction from a function call, we
     /// are matching in a case where we ignore cv-qualifiers.
     TDF_IgnoreQualifiers = 0x02,
+
     /// \brief Within template argument deduction from a function call,
     /// we are matching in a case where we can perform template argument
     /// deduction from a template-id of a derived class of the argument type.
     TDF_DerivedClass = 0x04,
+
     /// \brief Allow non-dependent types to differ, e.g., when performing
     /// template argument deduction from a function call where conversions
     /// may apply.
     TDF_SkipNonDependent = 0x08,
+
     /// \brief Whether we are performing template argument deduction for
     /// parameters and arguments in a top-level template argument
     TDF_TopLevelParameterTypeList = 0x10,
+
     /// \brief Within template argument deduction from overload resolution per
     /// C++ [over.over] allow matching function types that are compatible in
     /// terms of noreturn and default calling convention adjustments, or
@@ -66,6 +103,7 @@
 }
 
 using namespace clang;
+using namespace sema;
 
 /// \brief Compare two APSInts, extending and switching the sign as
 /// necessary to compare their values regardless of underlying type.
@@ -132,7 +170,7 @@
 getDeducedParameterFromExpr(TemplateDeductionInfo &Info, Expr *E) {
   // If we are within an alias template, the expression may have undergone
   // any number of parameter substitutions already.
-  while (1) {
+  while (true) {
     if (ImplicitCastExpr *IC = dyn_cast<ImplicitCastExpr>(E))
       E = IC->getSubExpr();
     else if (SubstNonTypeTemplateParmExpr *Subst =
@@ -297,7 +335,7 @@
     // All other combinations are incompatible.
     return DeducedTemplateArgument();
 
-  case TemplateArgument::Pack:
+  case TemplateArgument::Pack: {
     if (Y.getKind() != TemplateArgument::Pack ||
         X.pack_size() != Y.pack_size())
       return DeducedTemplateArgument();
@@ -319,6 +357,7 @@
         TemplateArgument::CreatePackCopy(Context, NewPack),
         X.wasDeducedFromArrayBound() && Y.wasDeducedFromArrayBound());
   }
+  }
 
   llvm_unreachable("Invalid TemplateArgument Kind!");
 }
@@ -344,7 +383,7 @@
   }
 
   Deduced[NTTP->getIndex()] = Result;
-  if (!S.getLangOpts().CPlusPlus1z)
+  if (!S.getLangOpts().CPlusPlus17)
     return Sema::TDK_Success;
 
   if (NTTP->isExpandedParameterPack())
@@ -354,8 +393,9 @@
     // expanded NTTP should be a pack expansion type?
     return Sema::TDK_Success;
 
-  // Get the type of the parameter for deduction.
-  QualType ParamType = NTTP->getType();
+  // Get the type of the parameter for deduction. If it's a (dependent) array
+  // or function type, we will not have decayed it yet, so do that now.
+  QualType ParamType = S.Context.getAdjustedParameterType(NTTP->getType());
   if (auto *Expansion = dyn_cast<PackExpansionType>(ParamType))
     ParamType = Expansion->getPattern();
 
@@ -501,6 +541,10 @@
                         SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
   assert(Arg.isCanonical() && "Argument type must be canonical");
 
+  // Treat an injected-class-name as its underlying template-id.
+  if (auto *Injected = dyn_cast<InjectedClassNameType>(Arg))
+    Arg = Injected->getInjectedSpecializationType();
+
   // Check whether the template argument is a dependent template-id.
   if (const TemplateSpecializationType *SpecArg
         = dyn_cast<TemplateSpecializationType>(Arg)) {
@@ -615,8 +659,6 @@
 
 /// A pack that we're currently deducing.
 struct clang::DeducedPack {
-  DeducedPack(unsigned Index) : Index(Index), Outer(nullptr) {}
-
   // The index of the pack.
   unsigned Index;
 
@@ -631,10 +673,13 @@
   SmallVector<DeducedTemplateArgument, 4> New;
 
   // The outer deduction for this pack, if any.
-  DeducedPack *Outer;
+  DeducedPack *Outer = nullptr;
+
+  DeducedPack(unsigned Index) : Index(Index) {}
 };
 
 namespace {
+
 /// A scope in which we're performing pack deduction.
 class PackDeductionScope {
 public:
@@ -849,6 +894,7 @@
 
   SmallVector<DeducedPack, 2> Packs;
 };
+
 } // namespace
 
 /// \brief Deduce the template arguments by comparing the list of parameter
@@ -1352,7 +1398,7 @@
     case Type::Enum:
     case Type::ObjCObject:
     case Type::ObjCInterface:
-    case Type::ObjCObjectPointer: {
+    case Type::ObjCObjectPointer:
       if (TDF & TDF_SkipNonDependent)
         return Sema::TDK_Success;
 
@@ -1362,7 +1408,6 @@
       }
 
       return Param == Arg? Sema::TDK_Success : Sema::TDK_NonDeducedMismatch;
-    }
 
     //     _Complex T   [placeholder extension]
     case Type::Complex:
@@ -1581,7 +1626,7 @@
       return Sema::TDK_Success;
     }
 
-    case Type::InjectedClassName: {
+    case Type::InjectedClassName:
       // Treat a template's injected-class-name as if the template
       // specialization type had been used.
       Param = cast<InjectedClassNameType>(Param)
@@ -1589,7 +1634,6 @@
       assert(isa<TemplateSpecializationType>(Param) &&
              "injected class name is not a template specialization type");
       LLVM_FALLTHROUGH;
-    }
 
     //     template-name<T> (where template-name refers to a class template)
     //     template-name<i>
@@ -1991,7 +2035,7 @@
     Info.SecondArg = Arg;
     return Sema::TDK_NonDeducedMismatch;
 
-  case TemplateArgument::Expression: {
+  case TemplateArgument::Expression:
     if (NonTypeTemplateParmDecl *NTTP
           = getDeducedParameterFromExpr(Info, Param.getAsExpr())) {
       if (Arg.getKind() == TemplateArgument::Integral)
@@ -2020,7 +2064,7 @@
 
     // Can't deduce anything, but that's okay.
     return Sema::TDK_Success;
-  }
+
   case TemplateArgument::Pack:
     llvm_unreachable("Argument packs should be expanded by the caller!");
   }
@@ -2302,7 +2346,6 @@
   llvm_unreachable("Invalid TemplateArgument Kind!");
 }
 
-
 /// \brief Convert the given deduced template argument and add it to the set of
 /// fully-converted template arguments.
 static bool
@@ -2933,7 +2976,7 @@
     // so substitution into the type must also substitute into the exception
     // specification.
     SmallVector<QualType, 4> ExceptionStorage;
-    if (getLangOpts().CPlusPlus1z &&
+    if (getLangOpts().CPlusPlus17 &&
         SubstExceptionSpec(
             Function->getLocation(), EPI.ExceptionSpec, ExceptionStorage,
             MultiLevelTemplateArgumentList(*ExplicitArgumentList)))
@@ -3079,7 +3122,7 @@
     return Sema::TDK_Success;
 
   if (A->isRecordType() && isSimpleTemplateIdType(OriginalParamType) &&
-      S.IsDerivedFrom(SourceLocation(), A, DeducedA))
+      S.IsDerivedFrom(Info.getLocation(), A, DeducedA))
     return Sema::TDK_Success;
 
   return Failed();
@@ -3266,13 +3309,14 @@
   // We may need to deduce the return type of the function now.
   if (S.getLangOpts().CPlusPlus14 && Fn->getReturnType()->isUndeducedType() &&
       S.DeduceReturnType(Fn, R.Expression->getExprLoc(), /*Diagnose*/ false))
-    return QualType();
+    return {};
 
   if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(Fn))
     if (Method->isInstance()) {
       // An instance method that's referenced in a form that doesn't
       // look like a member pointer is just invalid.
-      if (!R.HasFormOfMemberPointer) return QualType();
+      if (!R.HasFormOfMemberPointer)
+        return {};
 
       return S.Context.getMemberPointerType(Fn->getType(),
                S.Context.getTypeDeclType(Method->getParent()).getTypePtr());
@@ -3321,7 +3365,7 @@
             S.resolveAddressOfOnlyViableOverloadCandidate(Arg, DAP))
       return GetTypeOfFunction(S, R, Viable);
 
-    return QualType();
+    return {};
   }
 
   // Gather the explicit template arguments, if any.
@@ -3338,7 +3382,7 @@
       //     function templates, the parameter is treated as a
       //     non-deduced context.
       if (!Ovl->hasExplicitTemplateArgs())
-        return QualType();
+        return {};
 
       // Otherwise, see if we can resolve a function type
       FunctionDecl *Specialization = nullptr;
@@ -3378,7 +3422,8 @@
       = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, ParamType,
                                            ArgType, Info, Deduced, TDF);
     if (Result) continue;
-    if (!Match.isNull()) return QualType();
+    if (!Match.isNull())
+      return {};
     Match = ArgType;
   }
 
@@ -3907,7 +3952,7 @@
   // so we can check that the exception specification matches.
   auto *SpecializationFPT =
       Specialization->getType()->castAs<FunctionProtoType>();
-  if (getLangOpts().CPlusPlus1z &&
+  if (getLangOpts().CPlusPlus17 &&
       isUnresolvedExceptionSpec(SpecializationFPT->getExceptionSpecType()) &&
       !ResolveExceptionSpec(Info.getLocation(), SpecializationFPT))
     return TDK_MiscellaneousDeductionFailure;
@@ -3939,117 +3984,6 @@
   return TDK_Success;
 }
 
-/// \brief Given a function declaration (e.g. a generic lambda conversion
-///  function) that contains an 'auto' in its result type, substitute it
-///  with TypeToReplaceAutoWith.  Be careful to pass in the type you want
-///  to replace 'auto' with and not the actual result type you want
-///  to set the function to.
-static inline void
-SubstAutoWithinFunctionReturnType(FunctionDecl *F,
-                                    QualType TypeToReplaceAutoWith, Sema &S) {
-  assert(!TypeToReplaceAutoWith->getContainedAutoType());
-  QualType AutoResultType = F->getReturnType();
-  assert(AutoResultType->getContainedAutoType());
-  QualType DeducedResultType = S.SubstAutoType(AutoResultType,
-                                               TypeToReplaceAutoWith);
-  S.Context.adjustDeducedFunctionResultType(F, DeducedResultType);
-}
-
-/// \brief Given a specialized conversion operator of a generic lambda
-/// create the corresponding specializations of the call operator and
-/// the static-invoker. If the return type of the call operator is auto,
-/// deduce its return type and check if that matches the
-/// return type of the destination function ptr.
-
-static inline Sema::TemplateDeductionResult
-SpecializeCorrespondingLambdaCallOperatorAndInvoker(
-    CXXConversionDecl *ConversionSpecialized,
-    SmallVectorImpl<DeducedTemplateArgument> &DeducedArguments,
-    QualType ReturnTypeOfDestFunctionPtr,
-    TemplateDeductionInfo &TDInfo,
-    Sema &S) {
-
-  CXXRecordDecl *LambdaClass = ConversionSpecialized->getParent();
-  assert(LambdaClass && LambdaClass->isGenericLambda());
-
-  CXXMethodDecl *CallOpGeneric = LambdaClass->getLambdaCallOperator();
-  QualType CallOpResultType = CallOpGeneric->getReturnType();
-  const bool GenericLambdaCallOperatorHasDeducedReturnType =
-      CallOpResultType->getContainedAutoType();
-
-  FunctionTemplateDecl *CallOpTemplate =
-      CallOpGeneric->getDescribedFunctionTemplate();
-
-  FunctionDecl *CallOpSpecialized = nullptr;
-  // Use the deduced arguments of the conversion function, to specialize our
-  // generic lambda's call operator.
-  if (Sema::TemplateDeductionResult Result
-      = S.FinishTemplateArgumentDeduction(CallOpTemplate,
-                                          DeducedArguments,
-                                          0, CallOpSpecialized, TDInfo))
-    return Result;
-
-  // If we need to deduce the return type, do so (instantiates the callop).
-  if (GenericLambdaCallOperatorHasDeducedReturnType &&
-      CallOpSpecialized->getReturnType()->isUndeducedType())
-    S.DeduceReturnType(CallOpSpecialized,
-                       CallOpSpecialized->getPointOfInstantiation(),
-                       /*Diagnose*/ true);
-
-  // Check to see if the return type of the destination ptr-to-function
-  // matches the return type of the call operator.
-  if (!S.Context.hasSameType(CallOpSpecialized->getReturnType(),
-                             ReturnTypeOfDestFunctionPtr))
-    return Sema::TDK_NonDeducedMismatch;
-  // Since we have succeeded in matching the source and destination
-  // ptr-to-functions (now including return type), and have successfully
-  // specialized our corresponding call operator, we are ready to
-  // specialize the static invoker with the deduced arguments of our
-  // ptr-to-function.
-  FunctionDecl *InvokerSpecialized = nullptr;
-  FunctionTemplateDecl *InvokerTemplate = LambdaClass->
-                  getLambdaStaticInvoker()->getDescribedFunctionTemplate();
-
-#ifndef NDEBUG
-  Sema::TemplateDeductionResult LLVM_ATTRIBUTE_UNUSED Result =
-#endif
-    S.FinishTemplateArgumentDeduction(InvokerTemplate, DeducedArguments, 0,
-          InvokerSpecialized, TDInfo);
-  assert(Result == Sema::TDK_Success &&
-    "If the call operator succeeded so should the invoker!");
-  // Set the result type to match the corresponding call operator
-  // specialization's result type.
-  if (GenericLambdaCallOperatorHasDeducedReturnType &&
-      InvokerSpecialized->getReturnType()->isUndeducedType()) {
-    // Be sure to get the type to replace 'auto' with and not
-    // the full result type of the call op specialization
-    // to substitute into the 'auto' of the invoker and conversion
-    // function.
-    // For e.g.
-    //  int* (*fp)(int*) = [](auto* a) -> auto* { return a; };
-    // We don't want to subst 'int*' into 'auto' to get int**.
-
-    QualType TypeToReplaceAutoWith = CallOpSpecialized->getReturnType()
-                                         ->getContainedAutoType()
-                                         ->getDeducedType();
-    SubstAutoWithinFunctionReturnType(InvokerSpecialized,
-        TypeToReplaceAutoWith, S);
-    SubstAutoWithinFunctionReturnType(ConversionSpecialized,
-        TypeToReplaceAutoWith, S);
-  }
-
-  // Ensure that static invoker doesn't have a const qualifier.
-  // FIXME: When creating the InvokerTemplate in SemaLambda.cpp
-  // do not use the CallOperator's TypeSourceInfo which allows
-  // the const qualifier to leak through.
-  const FunctionProtoType *InvokerFPT = InvokerSpecialized->
-                  getType().getTypePtr()->castAs<FunctionProtoType>();
-  FunctionProtoType::ExtProtoInfo EPI = InvokerFPT->getExtProtoInfo();
-  EPI.TypeQuals = 0;
-  InvokerSpecialized->setType(S.Context.getFunctionType(
-      InvokerFPT->getReturnType(), InvokerFPT->getParamTypes(), EPI));
-  return Sema::TDK_Success;
-}
 /// \brief Deduce template arguments for a templated conversion
 /// function (C++ [temp.deduct.conv]) and, if successful, produce a
 /// conversion function template specialization.
@@ -4157,35 +4091,6 @@
       = FinishTemplateArgumentDeduction(ConversionTemplate, Deduced, 0,
                                         ConversionSpecialized, Info);
   Specialization = cast_or_null<CXXConversionDecl>(ConversionSpecialized);
-
-  // If the conversion operator is being invoked on a lambda closure to convert
-  // to a ptr-to-function, use the deduced arguments from the conversion
-  // function to specialize the corresponding call operator.
-  //   e.g., int (*fp)(int) = [](auto a) { return a; };
-  if (Result == TDK_Success && isLambdaConversionOperator(ConversionGeneric)) {
-
-    // Get the return type of the destination ptr-to-function we are converting
-    // to.  This is necessary for matching the lambda call operator's return
-    // type to that of the destination ptr-to-function's return type.
-    assert(A->isPointerType() &&
-        "Can only convert from lambda to ptr-to-function");
-    const FunctionType *ToFunType =
-        A->getPointeeType().getTypePtr()->getAs<FunctionType>();
-    const QualType DestFunctionPtrReturnType = ToFunType->getReturnType();
-
-    // Create the corresponding specializations of the call operator and
-    // the static-invoker; and if the return type is auto,
-    // deduce the return type and check if it matches the
-    // DestFunctionPtrReturnType.
-    // For instance:
-    //   auto L = [](auto a) { return f(a); };
-    //   int (*fp)(int) = L;
-    //   char (*fp2)(int) = L; <-- Not OK.
-
-    Result = SpecializeCorrespondingLambdaCallOperatorAndInvoker(
-        Specialization, Deduced, DestFunctionPtrReturnType,
-        Info, *this);
-  }
   return Result;
 }
 
@@ -4224,12 +4129,14 @@
 }
 
 namespace {
+
   /// Substitute the 'auto' specifier or deduced template specialization type
   /// specifier within a type for a given replacement type.
   class SubstituteDeducedTypeTransform :
       public TreeTransform<SubstituteDeducedTypeTransform> {
     QualType Replacement;
     bool UseTypeSugar;
+
   public:
     SubstituteDeducedTypeTransform(Sema &SemaRef, QualType Replacement,
                             bool UseTypeSugar = true)
@@ -4291,7 +4198,8 @@
       return TransformType(TLB, TL);
     }
   };
-}
+
+} // namespace
 
 Sema::DeduceAutoResult
 Sema::DeduceAutoType(TypeSourceInfo *Type, Expr *&Init, QualType &Result,
@@ -4535,6 +4443,43 @@
                             bool Diagnose) {
   assert(FD->getReturnType()->isUndeducedType());
 
+  // For a lambda's conversion operator, deduce any 'auto' or 'decltype(auto)'
+  // within the return type from the call operator's type.
+  if (isLambdaConversionOperator(FD)) {
+    CXXRecordDecl *Lambda = cast<CXXMethodDecl>(FD)->getParent();
+    FunctionDecl *CallOp = Lambda->getLambdaCallOperator();
+
+    // For a generic lambda, instantiate the call operator if needed. 
+    if (auto *Args = FD->getTemplateSpecializationArgs()) {
+      CallOp = InstantiateFunctionDeclaration(
+          CallOp->getDescribedFunctionTemplate(), Args, Loc);
+      if (!CallOp || CallOp->isInvalidDecl())
+        return true;
+
+      // We might need to deduce the return type by instantiating the definition
+      // of the operator() function.
+      if (CallOp->getReturnType()->isUndeducedType())
+        InstantiateFunctionDefinition(Loc, CallOp);
+    }
+
+    if (CallOp->isInvalidDecl())
+      return true;
+    assert(!CallOp->getReturnType()->isUndeducedType() &&
+           "failed to deduce lambda return type");
+
+    // Build the new return type from scratch.
+    QualType RetType = getLambdaConversionFunctionResultType(
+        CallOp->getType()->castAs<FunctionProtoType>());
+    if (FD->getReturnType()->getAs<PointerType>())
+      RetType = Context.getPointerType(RetType);
+    else {
+      assert(FD->getReturnType()->getAs<BlockPointerType>());
+      RetType = Context.getBlockPointerType(RetType);
+    }
+    Context.adjustDeducedFunctionResultType(FD, RetType);
+    return false;
+  }
+
   if (FD->getTemplateInstantiationPattern())
     InstantiateFunctionDefinition(Loc, FD);
 
@@ -5127,7 +5072,7 @@
 
   // Skip through any implicit casts we added while type-checking, and any
   // substitutions performed by template alias expansion.
-  while (1) {
+  while (true) {
     if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E))
       E = ICE->getSubExpr();
     else if (const SubstNonTypeTemplateParmExpr *Subst =
@@ -5151,9 +5096,9 @@
   if (NTTP->getDepth() == Depth)
     Used[NTTP->getIndex()] = true;
 
-  // In C++1z mode, additional arguments may be deduced from the type of a
+  // In C++17 mode, additional arguments may be deduced from the type of a
   // non-type argument.
-  if (Ctx.getLangOpts().CPlusPlus1z)
+  if (Ctx.getLangOpts().CPlusPlus17)
     MarkUsedTemplateParameters(Ctx, NTTP->getType(), OnlyDeduced, Depth, Used);
 }
 
@@ -5323,7 +5268,7 @@
 
   case Type::InjectedClassName:
     T = cast<InjectedClassNameType>(T)->getInjectedSpecializationType();
-    // fall through
+    LLVM_FALLTHROUGH;
 
   case Type::TemplateSpecialization: {
     const TemplateSpecializationType *Spec
@@ -5430,6 +5375,7 @@
     MarkUsedTemplateParameters(Ctx,
                                cast<DeducedType>(T)->getDeducedType(),
                                OnlyDeduced, Depth, Used);
+    break;
 
   // None of these types have any template parameters in them.
   case Type::Builtin:
diff --git a/lib/Sema/SemaTemplateInstantiate.cpp b/lib/Sema/SemaTemplateInstantiate.cpp
index 16f7381..bcc1e66 100644
--- a/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/lib/Sema/SemaTemplateInstantiate.cpp
@@ -25,6 +25,7 @@
 #include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/Template.h"
 #include "clang/Sema/TemplateDeduction.h"
+#include "clang/Sema/TemplateInstCallback.h"
 
 using namespace clang;
 using namespace sema;
@@ -199,6 +200,10 @@
   case DeclaringSpecialMember:
   case DefiningSynthesizedFunction:
     return false;
+       
+  // This function should never be called when Kind's value is Memoization.
+  case Memoization:
+    break;
   }
 
   llvm_unreachable("Invalid SynthesisKind!");
@@ -235,6 +240,7 @@
         !SemaRef.InstantiatingSpecializations
              .insert(std::make_pair(Inst.Entity->getCanonicalDecl(), Inst.Kind))
              .second;
+    atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, Inst);
   }
 }
 
@@ -394,8 +400,10 @@
           std::make_pair(Active.Entity, Active.Kind));
     }
 
-    SemaRef.popCodeSynthesisContext();
+    atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef,
+                  SemaRef.CodeSynthesisContexts.back());
 
+    SemaRef.popCodeSynthesisContext();
     Invalid = true;
   }
 }
@@ -496,8 +504,8 @@
       SmallVector<char, 128> TemplateArgsStr;
       llvm::raw_svector_ostream OS(TemplateArgsStr);
       Template->printName(OS);
-      TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, Active->template_arguments(), getPrintingPolicy());
+      printTemplateArgumentList(OS, Active->template_arguments(),
+                                getPrintingPolicy());
       Diags.Report(Active->PointOfInstantiation,
                    diag::note_default_arg_instantiation_here)
         << OS.str()
@@ -562,8 +570,8 @@
       SmallVector<char, 128> TemplateArgsStr;
       llvm::raw_svector_ostream OS(TemplateArgsStr);
       FD->printName(OS);
-      TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, Active->template_arguments(), getPrintingPolicy());
+      printTemplateArgumentList(OS, Active->template_arguments(),
+                                getPrintingPolicy());
       Diags.Report(Active->PointOfInstantiation,
                    diag::note_default_function_arg_instantiation_here)
         << OS.str()
@@ -626,7 +634,7 @@
         << cast<CXXRecordDecl>(Active->Entity) << Active->SpecialMember;
       break;
 
-    case CodeSynthesisContext::DefiningSynthesizedFunction:
+    case CodeSynthesisContext::DefiningSynthesizedFunction: {
       // FIXME: For synthesized members other than special members, produce a note.
       auto *MD = dyn_cast<CXXMethodDecl>(Active->Entity);
       auto CSM = MD ? getSpecialMember(MD) : CXXInvalid;
@@ -637,6 +645,10 @@
       }
       break;
     }
+
+    case CodeSynthesisContext::Memoization:
+      break;
+    }
   }
 }
 
@@ -682,6 +694,9 @@
       // This happens in a context unrelated to template instantiation, so
       // there is no SFINAE.
       return None;
+
+    case CodeSynthesisContext::Memoization:
+      break;
     }
 
     // The inner context was transparent for SFINAE. If it occurred within a
@@ -1197,11 +1212,11 @@
                                               NTTP->getDeclName());
       if (TargetType.isNull())
         return ExprError();
-      
-      return new (SemaRef.Context) SubstNonTypeTemplateParmPackExpr(TargetType,
-                                                                    NTTP, 
-                                                              E->getLocation(),
-                                                                    Arg);
+
+      return new (SemaRef.Context) SubstNonTypeTemplateParmPackExpr(
+          TargetType.getNonLValueExprType(SemaRef.Context),
+          TargetType->isReferenceType() ? VK_LValue : VK_RValue, NTTP,
+          E->getLocation(), Arg);
     }
     
     Arg = getPackSubstitutedTemplateArgument(getSema(), Arg);
@@ -1246,7 +1261,7 @@
              arg.getKind() == TemplateArgument::NullPtr) {
     ValueDecl *VD;
     if (arg.getKind() == TemplateArgument::Declaration) {
-      VD = cast<ValueDecl>(arg.getAsDecl());
+      VD = arg.getAsDecl();
 
       // Find the instantiation of the template argument.  This is
       // required for nested templates.
@@ -2108,6 +2123,10 @@
     }
   }
 
+  // See if trivial_abi has to be dropped.
+  if (Instantiation && Instantiation->hasAttr<TrivialABIAttr>())
+    checkIllFormedTrivialABIStruct(*Instantiation);
+
   // Finish checking fields.
   ActOnFields(nullptr, Instantiation->getLocation(), Instantiation, Fields,
               SourceLocation(), SourceLocation(), nullptr);
@@ -2613,7 +2632,7 @@
             continue;
           
           Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
-          InstantiateStaticDataMemberDefinition(PointOfInstantiation, Var);
+          InstantiateVariableDefinition(PointOfInstantiation, Var);
         } else {
           Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
         }
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 186a618..108724f 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -23,6 +23,7 @@
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/Template.h"
+#include "clang/Sema/TemplateInstCallback.h"
 
 using namespace clang;
 
@@ -175,7 +176,7 @@
     Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
     const AllocAlignAttr *Align, Decl *New) {
   Expr *Param = IntegerLiteral::Create(
-      S.getASTContext(), llvm::APInt(64, Align->getParamIndex()),
+      S.getASTContext(), llvm::APInt(64, Align->paramIndex().getSourceIndex()),
       S.getASTContext().UnsignedLongLongTy, Align->getLocation());
   S.AddAllocAlignAttr(Align->getLocation(), New, Param,
                       Align->getSpellingListIndex());
@@ -343,14 +344,6 @@
       Attr.getRange());
 }
 
-static bool DeclContainsAttr(const Decl *D, const Attr *NewAttr) {
-  if (!D->hasAttrs() || NewAttr->duplicatesAllowed())
-    return false;
-  return llvm::find_if(D->getAttrs(), [NewAttr](const Attr *Attr) {
-           return Attr->getKind() == NewAttr->getKind();
-         }) != D->getAttrs().end();
-}
-
 void Sema::InstantiateAttrsForDecl(
     const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl,
     Decl *New, LateInstantiatedAttrVec *LateAttrs,
@@ -365,7 +358,7 @@
 
       Attr *NewAttr = sema::instantiateTemplateAttributeForDecl(
           TmplAttr, Context, *this, TemplateArgs);
-      if (NewAttr && !DeclContainsAttr(New, NewAttr))
+      if (NewAttr)
         New->addAttr(NewAttr);
     }
   }
@@ -470,8 +463,7 @@
 
       Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context,
                                                          *this, TemplateArgs);
-
-      if (NewAttr && !DeclContainsAttr(New, NewAttr))
+      if (NewAttr)
         New->addAttr(NewAttr);
     }
   }
@@ -1049,8 +1041,7 @@
         SemaRef.SubstType(TI->getType(), TemplateArgs,
                           UnderlyingLoc, DeclarationName());
       SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(),
-                                     DefnUnderlying,
-                                     /*EnumUnderlyingIsImplicit=*/false, Enum);
+                                     DefnUnderlying, /*IsFixed=*/true, Enum);
     }
   }
 
@@ -1587,9 +1578,10 @@
 }
 
 /// Normal class members are of more specific types and therefore
-/// don't make it here.  This function serves two purposes:
+/// don't make it here.  This function serves three purposes:
 ///   1) instantiating function templates
 ///   2) substituting friend declarations
+///   3) substituting deduction guide declarations for nested class templates
 Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D,
                                        TemplateParameterList *TemplateParams) {
   // Check whether there is already a function template specialization for
@@ -1650,14 +1642,19 @@
                                          TemplateArgs);
   }
 
+  DeclarationNameInfo NameInfo
+    = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);
+
   FunctionDecl *Function;
-  if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D))
+  if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D)) {
     Function = CXXDeductionGuideDecl::Create(
-        SemaRef.Context, DC, D->getInnerLocStart(), DGuide->isExplicit(),
-        D->getNameInfo(), T, TInfo, D->getSourceRange().getEnd());
-  else {
+      SemaRef.Context, DC, D->getInnerLocStart(), DGuide->isExplicit(),
+      NameInfo, T, TInfo, D->getSourceRange().getEnd());
+    if (DGuide->isCopyDeductionCandidate())
+      cast<CXXDeductionGuideDecl>(Function)->setIsCopyDeductionCandidate();
+  } else {
     Function = FunctionDecl::Create(
-        SemaRef.Context, DC, D->getInnerLocStart(), D->getNameInfo(), T, TInfo,
+        SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo,
         D->getCanonicalDecl()->getStorageClass(), D->isInlineSpecified(),
         D->hasWrittenPrototype(), D->isConstexpr());
     Function->setRangeEnd(D->getSourceRange().getEnd());
@@ -1809,45 +1806,24 @@
     //   apply to non-template function declarations and definitions also apply
     //   to these implicit definitions.
     if (D->isThisDeclarationADefinition()) {
-      // Check for a function body.
-      const FunctionDecl *Definition = nullptr;
-      if (Function->isDefined(Definition) &&
-          Definition->getTemplateSpecializationKind() == TSK_Undeclared) {
-        SemaRef.Diag(Function->getLocation(), diag::err_redefinition)
-            << Function->getDeclName();
-        SemaRef.Diag(Definition->getLocation(), diag::note_previous_definition);
-      }
-      // Check for redefinitions due to other instantiations of this or
-      // a similar friend function.
-      else for (auto R : Function->redecls()) {
-        if (R == Function)
-          continue;
+      SemaRef.CheckForFunctionRedefinition(Function);
+      if (!Function->isInvalidDecl()) {
+        for (auto R : Function->redecls()) {
+          if (R == Function)
+            continue;
 
-        // If some prior declaration of this function has been used, we need
-        // to instantiate its definition.
-        if (!QueuedInstantiation && R->isUsed(false)) {
-          if (MemberSpecializationInfo *MSInfo =
-                  Function->getMemberSpecializationInfo()) {
-            if (MSInfo->getPointOfInstantiation().isInvalid()) {
-              SourceLocation Loc = R->getLocation(); // FIXME
-              MSInfo->setPointOfInstantiation(Loc);
-              SemaRef.PendingLocalImplicitInstantiations.push_back(
-                                               std::make_pair(Function, Loc));
-              QueuedInstantiation = true;
-            }
-          }
-        }
-
-        // If some prior declaration of this function was a friend with an
-        // uninstantiated definition, reject it.
-        if (R->getFriendObjectKind()) {
-          if (const FunctionDecl *RPattern =
-                  R->getTemplateInstantiationPattern()) {
-            if (RPattern->isDefined(RPattern)) {
-              SemaRef.Diag(Function->getLocation(), diag::err_redefinition)
-                << Function->getDeclName();
-              SemaRef.Diag(R->getLocation(), diag::note_previous_definition);
-              break;
+          // If some prior declaration of this function has been used, we need
+          // to instantiate its definition.
+          if (!QueuedInstantiation && R->isUsed(false)) {
+            if (MemberSpecializationInfo *MSInfo =
+                Function->getMemberSpecializationInfo()) {
+              if (MSInfo->getPointOfInstantiation().isInvalid()) {
+                SourceLocation Loc = R->getLocation(); // FIXME
+                MSInfo->setPointOfInstantiation(Loc);
+                SemaRef.PendingLocalImplicitInstantiations.push_back(
+                    std::make_pair(Function, Loc));
+                QueuedInstantiation = true;
+              }
             }
           }
         }
@@ -2691,9 +2667,9 @@
 Decl *TemplateDeclInstantiator::VisitUsingPackDecl(UsingPackDecl *D) {
   SmallVector<NamedDecl*, 8> Expansions;
   for (auto *UD : D->expansions()) {
-    if (auto *NewUD =
+    if (NamedDecl *NewUD =
             SemaRef.FindInstantiatedDecl(D->getLocation(), UD, TemplateArgs))
-      Expansions.push_back(cast<NamedDecl>(NewUD));
+      Expansions.push_back(NewUD);
     else
       return nullptr;
   }
@@ -3651,8 +3627,10 @@
       assert(FunTmpl->getTemplatedDecl() == Tmpl &&
              "Deduction from the wrong function template?");
       (void) FunTmpl;
+      atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
       ActiveInst.Kind = ActiveInstType::TemplateInstantiation;
       ActiveInst.Entity = New;
+      atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
     }
   }
 
@@ -3723,6 +3701,30 @@
   return false;
 }
 
+/// Instantiate (or find existing instantiation of) a function template with a
+/// given set of template arguments.
+///
+/// Usually this should not be used, and template argument deduction should be
+/// used in its place.
+FunctionDecl *
+Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD,
+                                     const TemplateArgumentList *Args,
+                                     SourceLocation Loc) {
+  FunctionDecl *FD = FTD->getTemplatedDecl();
+
+  sema::TemplateDeductionInfo Info(Loc);
+  InstantiatingTemplate Inst(
+      *this, Loc, FTD, Args->asArray(),
+      CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info);
+  if (Inst.isInvalid())
+    return nullptr;
+
+  ContextRAII SavedContext(*this, FD);
+  MultiLevelTemplateArgumentList MArgs(*Args);
+
+  return cast_or_null<FunctionDecl>(SubstDecl(FD, FD->getParent(), MArgs));
+}
+
 /// In the MS ABI, we need to instantiate default arguments of dllexported
 /// default constructors along with the constructor definition. This allows IR
 /// gen to emit a constructor closure which calls the default constructor with
@@ -3766,7 +3768,8 @@
                                          bool Recursive,
                                          bool DefinitionRequired,
                                          bool AtEndOfTU) {
-  if (Function->isInvalidDecl() || Function->isDefined())
+  if (Function->isInvalidDecl() || Function->isDefined() ||
+      isa<CXXDeductionGuideDecl>(Function))
     return;
 
   // Never instantiate an explicit specialization except if it is a class scope
@@ -3805,7 +3808,8 @@
       PendingInstantiations.push_back(
         std::make_pair(Function, PointOfInstantiation));
     } else if (TSK == TSK_ImplicitInstantiation) {
-      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred()) {
+      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
+          !getSourceManager().isInSystemHeader(PatternDecl->getLocStart())) {
         Diag(PointOfInstantiation, diag::warn_func_template_missing)
           << Function;
         Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
@@ -3852,7 +3856,8 @@
   }
 
   // Note, we should never try to instantiate a deleted function template.
-  assert((Pattern || PatternDecl->isDefaulted()) &&
+  assert((Pattern || PatternDecl->isDefaulted() ||
+          PatternDecl->hasSkippedBody()) &&
          "unexpected kind of function template definition");
 
   // C++1y [temp.explicit]p10:
@@ -3924,30 +3929,34 @@
                                          TemplateArgs))
       return;
 
-    if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) {
-      // If this is a constructor, instantiate the member initializers.
-      InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl),
-                                 TemplateArgs);
+    if (PatternDecl->hasSkippedBody()) {
+      ActOnSkippedFunctionBody(Function);
+    } else {
+      if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) {
+        // If this is a constructor, instantiate the member initializers.
+        InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl),
+                                   TemplateArgs);
 
-      // If this is an MS ABI dllexport default constructor, instantiate any
-      // default arguments.
-      if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
-          Ctor->isDefaultConstructor()) {
-        InstantiateDefaultCtorDefaultArgs(*this, Ctor);
+        // If this is an MS ABI dllexport default constructor, instantiate any
+        // default arguments.
+        if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+            Ctor->isDefaultConstructor()) {
+          InstantiateDefaultCtorDefaultArgs(*this, Ctor);
+        }
       }
+
+      // Instantiate the function body.
+      StmtResult Body = SubstStmt(Pattern, TemplateArgs);
+
+      if (Body.isInvalid())
+        Function->setInvalidDecl();
+
+      // FIXME: finishing the function body while in an expression evaluation
+      // context seems wrong. Investigate more.
+      ActOnFinishFunctionBody(Function, Body.get(),
+                              /*IsInstantiation=*/true);
     }
 
-    // Instantiate the function body.
-    StmtResult Body = SubstStmt(Pattern, TemplateArgs);
-
-    if (Body.isInvalid())
-      Function->setInvalidDecl();
-
-    // FIXME: finishing the function body while in an expression evaluation
-    // context seems wrong. Investigate more.
-    ActOnFinishFunctionBody(Function, Body.get(),
-                            /*IsInstantiation=*/true);
-
     PerformDependentDiagnostics(PatternDecl, TemplateArgs);
 
     if (auto *Listener = getASTMutationListener())
@@ -4018,6 +4027,8 @@
 VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
     VarTemplateSpecializationDecl *VarSpec, VarDecl *PatternDecl,
     const MultiLevelTemplateArgumentList &TemplateArgs) {
+  assert(PatternDecl->isThisDeclarationADefinition() &&
+         "don't have a definition to instantiate from");
 
   // Do substitution on the type of the declaration
   TypeSourceInfo *DI =
@@ -4029,6 +4040,9 @@
   // Update the type of this variable template specialization.
   VarSpec->setType(DI->getType());
 
+  // Convert the declaration into a definition now.
+  VarSpec->setCompleteDefinition();
+
   // Instantiate the initializer.
   InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs);
 
@@ -4118,7 +4132,8 @@
   // it right away if the type contains 'auto'.
   if ((!isa<VarTemplateSpecializationDecl>(NewVar) &&
        !InstantiatingVarTemplate &&
-       !(OldVar->isInline() && OldVar->isThisDeclarationADefinition())) ||
+       !(OldVar->isInline() && OldVar->isThisDeclarationADefinition() &&
+         !NewVar->isThisDeclarationADefinition())) ||
       NewVar->getType()->isUndeducedType())
     InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
 
@@ -4134,6 +4149,9 @@
 void Sema::InstantiateVariableInitializer(
     VarDecl *Var, VarDecl *OldVar,
     const MultiLevelTemplateArgumentList &TemplateArgs) {
+  if (ASTMutationListener *L = getASTContext().getASTMutationListener())
+    L->VariableDefinitionInstantiated(Var);
+
   // We propagate the 'inline' flag with the initializer, because it
   // would otherwise imply that the variable is a definition for a
   // non-static data member.
@@ -4196,26 +4214,16 @@
 ///
 /// \param PointOfInstantiation the point at which the instantiation was
 /// required. Note that this is not precisely a "point of instantiation"
-/// for the function, but it's close.
+/// for the variable, but it's close.
 ///
-/// \param Var the already-instantiated declaration of a static member
-/// variable of a class template specialization.
+/// \param Var the already-instantiated declaration of a templated variable.
 ///
 /// \param Recursive if true, recursively instantiates any functions that
 /// are required by this instantiation.
 ///
 /// \param DefinitionRequired if true, then we are performing an explicit
-/// instantiation where an out-of-line definition of the member variable
-/// is required. Complain if there is no such definition.
-void Sema::InstantiateStaticDataMemberDefinition(
-                                          SourceLocation PointOfInstantiation,
-                                                 VarDecl *Var,
-                                                 bool Recursive,
-                                                 bool DefinitionRequired) {
-  InstantiateVariableDefinition(PointOfInstantiation, Var, Recursive,
-                                DefinitionRequired);
-}
-
+/// instantiation where a definition of the variable is required. Complain
+/// if there is no such definition.
 void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
                                          VarDecl *Var, bool Recursive,
                                       bool DefinitionRequired, bool AtEndOfTU) {
@@ -4272,6 +4280,11 @@
     // If this is a static data member template, there might be an
     // uninstantiated initializer on the declaration. If so, instantiate
     // it now.
+    //
+    // FIXME: This largely duplicates what we would do below. The difference
+    // is that along this path we may instantiate an initializer from an
+    // in-class declaration of the template and instantiate the definition
+    // from a separate out-of-class definition.
     if (PatternDecl->isStaticDataMember() &&
         (PatternDecl = PatternDecl->getFirstDecl())->hasInit() &&
         !Var->hasInit()) {
@@ -4332,7 +4345,8 @@
         std::make_pair(Var, PointOfInstantiation));
     } else if (TSK == TSK_ImplicitInstantiation) {
       // Warn about missing definition at the end of translation unit.
-      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred()) {
+      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
+          !getSourceManager().isInSystemHeader(PatternDecl->getLocStart())) {
         Diag(PointOfInstantiation, diag::warn_var_template_missing)
           << Var;
         Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
@@ -5189,7 +5203,9 @@
     case TSK_ExplicitInstantiationDefinition:
       // We only need an instantiation if the pending instantiation *is* the
       // explicit instantiation.
-      if (Var != Var->getMostRecentDecl()) continue;
+      if (Var != Var->getMostRecentDecl())
+        continue;
+      break;
     case TSK_ImplicitInstantiation:
       break;
     }
diff --git a/lib/Sema/SemaTemplateVariadic.cpp b/lib/Sema/SemaTemplateVariadic.cpp
index d81837d..7aa0e31 100644
--- a/lib/Sema/SemaTemplateVariadic.cpp
+++ b/lib/Sema/SemaTemplateVariadic.cpp
@@ -314,8 +314,18 @@
   // later.
   SmallVector<UnexpandedParameterPack, 4> LambdaParamPackReferences;
   for (unsigned N = FunctionScopes.size(); N; --N) {
-    if (sema::LambdaScopeInfo *LSI =
-          dyn_cast<sema::LambdaScopeInfo>(FunctionScopes[N-1])) {
+    sema::FunctionScopeInfo *Func = FunctionScopes[N-1];
+    // We do not permit pack expansion that would duplicate a statement
+    // expression, not even within a lambda.
+    // FIXME: We could probably support this for statement expressions that do
+    // not contain labels, and for pack expansions that expand both the stmt
+    // expr and the enclosing lambda.
+    if (std::any_of(
+            Func->CompoundScopes.begin(), Func->CompoundScopes.end(),
+            [](sema::CompoundScopeInfo &CSI) { return CSI.IsStmtExpr; }))
+      break;
+
+    if (auto *LSI = dyn_cast<sema::LambdaScopeInfo>(Func)) {
       if (N == FunctionScopes.size()) {
         for (auto &Param : Unexpanded) {
           auto *PD = dyn_cast_or_null<ParmVarDecl>(
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 80b6c1d..9791003 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -31,6 +31,7 @@
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
+#include "clang/Sema/TemplateInstCallback.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -47,7 +48,7 @@
 /// isOmittedBlockReturnType - Return true if this declarator is missing a
 /// return type because this is a omitted return type on a block literal.
 static bool isOmittedBlockReturnType(const Declarator &D) {
-  if (D.getContext() != Declarator::BlockLiteralContext ||
+  if (D.getContext() != DeclaratorContext::BlockLiteralContext ||
       D.getDeclSpec().hasTypeSpecifier())
     return false;
 
@@ -367,7 +368,7 @@
           if (onlyBlockPointers)
             continue;
 
-          // fallthrough
+          LLVM_FALLTHROUGH;
 
         case DeclaratorChunk::BlockPointer:
           result = &ptrChunk;
@@ -1291,11 +1292,12 @@
     // The declspec is always missing in a lambda expr context; it is either
     // specified with a trailing return type or inferred.
     if (S.getLangOpts().CPlusPlus14 &&
-        declarator.getContext() == Declarator::LambdaExprContext) {
+        declarator.getContext() == DeclaratorContext::LambdaExprContext) {
       // In C++1y, a lambda's implicit return type is 'auto'.
       Result = Context.getAutoDeductType();
       break;
-    } else if (declarator.getContext() == Declarator::LambdaExprContext ||
+    } else if (declarator.getContext() ==
+                   DeclaratorContext::LambdaExprContext ||
                checkOmittedBlockReturnType(S, declarator,
                                            Context.DependentTy)) {
       Result = Context.DependentTy;
@@ -1340,7 +1342,7 @@
       }
     }
 
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case DeclSpec::TST_int: {
     if (DS.getTypeSpecSign() != DeclSpec::TSS_unsigned) {
       switch (DS.getTypeSpecWidth()) {
@@ -1572,7 +1574,7 @@
 
   // Before we process any type attributes, synthesize a block literal
   // function declarator if necessary.
-  if (declarator.getContext() == Declarator::BlockLiteralContext)
+  if (declarator.getContext() == DeclaratorContext::BlockLiteralContext)
     maybeSynthesizeBlockSignature(state, Result);
 
   // Apply any type attributes from the decl spec.  This may cause the
@@ -2180,9 +2182,18 @@
     Diag(Loc, diag::err_opencl_vla);
     return QualType();
   }
-  // CUDA device code doesn't support VLAs.
-  if (getLangOpts().CUDA && T->isVariableArrayType())
-    CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget();
+
+  if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) {
+    if (getLangOpts().CUDA) {
+      // CUDA device code doesn't support VLAs.
+      CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget();
+    } else if (!getLangOpts().OpenMP ||
+               shouldDiagnoseTargetSupportFromOpenMP()) {
+      // Some targets don't support VLAs.
+      Diag(Loc, diag::err_vla_unsupported);
+      return QualType();
+    }
+  }
 
   // If this is not C99, extwarn about VLA's and C99 array size modifiers.
   if (!getLangOpts().C99) {
@@ -2693,7 +2704,7 @@
   // If the qualifiers come from a conversion function type, don't diagnose
   // them -- they're not necessarily redundant, since such a conversion
   // operator can be explicitly called as "x.operator const int()".
-  if (D.getName().getKind() == UnqualifiedId::IK_ConversionFunctionId)
+  if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
     return;
 
   // Just parens all the way out to the decl specifiers. Diagnose any qualifiers
@@ -2719,11 +2730,11 @@
   TagDecl *OwnedTagDecl = nullptr;
 
   switch (D.getName().getKind()) {
-  case UnqualifiedId::IK_ImplicitSelfParam:
-  case UnqualifiedId::IK_OperatorFunctionId:
-  case UnqualifiedId::IK_Identifier:
-  case UnqualifiedId::IK_LiteralOperatorId:
-  case UnqualifiedId::IK_TemplateId:
+  case UnqualifiedIdKind::IK_ImplicitSelfParam:
+  case UnqualifiedIdKind::IK_OperatorFunctionId:
+  case UnqualifiedIdKind::IK_Identifier:
+  case UnqualifiedIdKind::IK_LiteralOperatorId:
+  case UnqualifiedIdKind::IK_TemplateId:
     T = ConvertDeclSpecToType(state);
 
     if (!D.isInvalidType() && D.getDeclSpec().isTypeSpecOwned()) {
@@ -2733,9 +2744,9 @@
     }
     break;
 
-  case UnqualifiedId::IK_ConstructorName:
-  case UnqualifiedId::IK_ConstructorTemplateId:
-  case UnqualifiedId::IK_DestructorName:
+  case UnqualifiedIdKind::IK_ConstructorName:
+  case UnqualifiedIdKind::IK_ConstructorTemplateId:
+  case UnqualifiedIdKind::IK_DestructorName:
     // Constructors and destructors don't have return types. Use
     // "void" instead.
     T = SemaRef.Context.VoidTy;
@@ -2743,13 +2754,13 @@
                      D.getDeclSpec().getAttributes().getList());
     break;
 
-  case UnqualifiedId::IK_DeductionGuideName:
+  case UnqualifiedIdKind::IK_DeductionGuideName:
     // Deduction guides have a trailing return type and no type in their
     // decl-specifier sequence. Use a placeholder return type for now.
     T = SemaRef.Context.DependentTy;
     break;
 
-  case UnqualifiedId::IK_ConversionFunctionId:
+  case UnqualifiedIdKind::IK_ConversionFunctionId:
     // The result type of a conversion function is the type that it
     // converts to.
     T = SemaRef.GetTypeFromParser(D.getName().ConversionFunctionId,
@@ -2771,16 +2782,16 @@
         (Auto && Auto->getKeyword() != AutoTypeKeyword::GNUAutoType);
 
     switch (D.getContext()) {
-    case Declarator::LambdaExprContext:
+    case DeclaratorContext::LambdaExprContext:
       // Declared return type of a lambda-declarator is implicit and is always
       // 'auto'.
       break;
-    case Declarator::ObjCParameterContext:
-    case Declarator::ObjCResultContext:
-    case Declarator::PrototypeContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::PrototypeContext:
       Error = 0;  
       break;
-    case Declarator::LambdaExprParameterContext:
+    case DeclaratorContext::LambdaExprParameterContext:
       // In C++14, generic lambdas allow 'auto' in their parameters.
       if (!SemaRef.getLangOpts().CPlusPlus14 ||
           !Auto || Auto->getKeyword() != AutoTypeKeyword::Auto)
@@ -2812,7 +2823,7 @@
             T, QualType(CorrespondingTemplateParam->getTypeForDecl(), 0));
       }
       break;
-    case Declarator::MemberContext: {
+    case DeclaratorContext::MemberContext: {
       if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static ||
           D.isFunctionDeclarator())
         break;
@@ -2828,57 +2839,66 @@
         Error = 20; // Friend type
       break;
     }
-    case Declarator::CXXCatchContext:
-    case Declarator::ObjCCatchContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
       Error = 7; // Exception declaration
       break;
-    case Declarator::TemplateParamContext:
+    case DeclaratorContext::TemplateParamContext:
       if (isa<DeducedTemplateSpecializationType>(Deduced))
         Error = 19; // Template parameter
-      else if (!SemaRef.getLangOpts().CPlusPlus1z)
-        Error = 8; // Template parameter (until C++1z)
+      else if (!SemaRef.getLangOpts().CPlusPlus17)
+        Error = 8; // Template parameter (until C++17)
       break;
-    case Declarator::BlockLiteralContext:
+    case DeclaratorContext::BlockLiteralContext:
       Error = 9; // Block literal
       break;
-    case Declarator::TemplateTypeArgContext:
+    case DeclaratorContext::TemplateArgContext:
+      // Within a template argument list, a deduced template specialization
+      // type will be reinterpreted as a template template argument.
+      if (isa<DeducedTemplateSpecializationType>(Deduced) &&
+          !D.getNumTypeObjects() &&
+          D.getDeclSpec().getParsedSpecifiers() == DeclSpec::PQ_TypeSpecifier)
+        break;
+      LLVM_FALLTHROUGH;
+    case DeclaratorContext::TemplateTypeArgContext:
       Error = 10; // Template type argument
       break;
-    case Declarator::AliasDeclContext:
-    case Declarator::AliasTemplateContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
       Error = 12; // Type alias
       break;
-    case Declarator::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType)
         Error = 13; // Function return type
       break;
-    case Declarator::ConversionIdContext:
+    case DeclaratorContext::ConversionIdContext:
       if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType)
         Error = 14; // conversion-type-id
       break;
-    case Declarator::FunctionalCastContext:
+    case DeclaratorContext::FunctionalCastContext:
       if (isa<DeducedTemplateSpecializationType>(Deduced))
         break;
       LLVM_FALLTHROUGH;
-    case Declarator::TypeNameContext:
+    case DeclaratorContext::TypeNameContext:
       Error = 15; // Generic
       break;
-    case Declarator::FileContext:
-    case Declarator::BlockContext:
-    case Declarator::ForContext:
-    case Declarator::InitStmtContext:
-    case Declarator::ConditionContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
       // FIXME: P0091R3 (erroneously) does not permit class template argument
       // deduction in conditions, for-init-statements, and other declarations
       // that are not simple-declarations.
       break;
-    case Declarator::CXXNewContext:
+    case DeclaratorContext::CXXNewContext:
       // FIXME: P0091R3 does not permit class template argument deduction here,
       // but we follow GCC and allow it anyway.
       if (!IsCXXAutoType && !isa<DeducedTemplateSpecializationType>(Deduced))
         Error = 17; // 'new' type
       break;
-    case Declarator::KNRTypeListContext:
+    case DeclaratorContext::KNRTypeListContext:
       Error = 18; // K&R function parameter
       break;
     }
@@ -2907,7 +2927,7 @@
     }
 
     SourceRange AutoRange = D.getDeclSpec().getTypeSpecTypeLoc();
-    if (D.getName().getKind() == UnqualifiedId::IK_ConversionFunctionId)
+    if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
       AutoRange = D.getName().getSourceRange();
 
     if (Error != -1) {
@@ -2950,47 +2970,49 @@
     // or enumeration in a type-specifier-seq.
     unsigned DiagID = 0;
     switch (D.getContext()) {
-    case Declarator::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       // Class and enumeration definitions are syntactically not allowed in
       // trailing return types.
       llvm_unreachable("parser should not have allowed this");
       break;
-    case Declarator::FileContext:
-    case Declarator::MemberContext:
-    case Declarator::BlockContext:
-    case Declarator::ForContext:
-    case Declarator::InitStmtContext:
-    case Declarator::BlockLiteralContext:
-    case Declarator::LambdaExprContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
       // C++11 [dcl.type]p3:
       //   A type-specifier-seq shall not define a class or enumeration unless
       //   it appears in the type-id of an alias-declaration (7.1.3) that is not
       //   the declaration of a template-declaration.
-    case Declarator::AliasDeclContext:
+    case DeclaratorContext::AliasDeclContext:
       break;
-    case Declarator::AliasTemplateContext:
+    case DeclaratorContext::AliasTemplateContext:
       DiagID = diag::err_type_defined_in_alias_template;
       break;
-    case Declarator::TypeNameContext:
-    case Declarator::FunctionalCastContext:
-    case Declarator::ConversionIdContext:
-    case Declarator::TemplateParamContext:
-    case Declarator::CXXNewContext:
-    case Declarator::CXXCatchContext:
-    case Declarator::ObjCCatchContext:
-    case Declarator::TemplateTypeArgContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
       DiagID = diag::err_type_defined_in_type_specifier;
       break;
-    case Declarator::PrototypeContext:
-    case Declarator::LambdaExprParameterContext:
-    case Declarator::ObjCParameterContext:
-    case Declarator::ObjCResultContext:
-    case Declarator::KNRTypeListContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::KNRTypeListContext:
       // C++ [dcl.fct]p6:
       //   Types shall not be defined in return or parameter types.
       DiagID = diag::err_type_defined_in_param_type;
       break;
-    case Declarator::ConditionContext:
+    case DeclaratorContext::ConditionContext:
       // C++ 6.4p2:
       // The type-specifier-seq shall not contain typedef and shall not declare
       // a new class or enumeration.
@@ -3039,7 +3061,7 @@
 
   // Inside a condition, a direct initializer is not permitted. We allow one to
   // be parsed in order to give better diagnostics in condition parsing.
-  if (D.getContext() == Declarator::ConditionContext)
+  if (D.getContext() == DeclaratorContext::ConditionContext)
     return;
 
   SourceRange ParenRange(DeclType.Loc, DeclType.EndLoc);
@@ -3128,10 +3150,15 @@
       (T->isRecordType() || T->isDependentType()) &&
       D.getDeclSpec().getTypeQualifiers() == 0 && D.isFirstDeclarator();
 
+  bool StartsWithDeclaratorId = true;
   for (auto &C : D.type_objects()) {
     switch (C.Kind) {
-    case DeclaratorChunk::Pointer:
     case DeclaratorChunk::Paren:
+      if (&C == &Paren)
+        continue;
+      LLVM_FALLTHROUGH;
+    case DeclaratorChunk::Pointer:
+      StartsWithDeclaratorId = false;
       continue;
 
     case DeclaratorChunk::Array:
@@ -3145,18 +3172,25 @@
       // We assume that something like 'T (&x) = y;' is highly likely to not
       // be intended to be a temporary object.
       CouldBeTemporaryObject = false;
+      StartsWithDeclaratorId = false;
       continue;
 
     case DeclaratorChunk::Function:
       // In a new-type-id, function chunks require parentheses.
-      if (D.getContext() == Declarator::CXXNewContext)
+      if (D.getContext() == DeclaratorContext::CXXNewContext)
         return;
-      LLVM_FALLTHROUGH;
+      // FIXME: "A(f())" deserves a vexing-parse warning, not just a
+      // redundant-parens warning, but we don't know whether the function
+      // chunk was syntactically valid as an expression here.
+      CouldBeTemporaryObject = false;
+      continue;
+
     case DeclaratorChunk::BlockPointer:
     case DeclaratorChunk::MemberPointer:
     case DeclaratorChunk::Pipe:
       // These cannot appear in expressions.
       CouldBeTemporaryObject = false;
+      StartsWithDeclaratorId = false;
       continue;
     }
   }
@@ -3177,6 +3211,18 @@
   SourceRange ParenRange(Paren.Loc, Paren.EndLoc);
 
   if (!CouldBeTemporaryObject) {
+    // If we have A (::B), the parentheses affect the meaning of the program.
+    // Suppress the warning in that case. Don't bother looking at the DeclSpec
+    // here: even (e.g.) "int ::x" is visually ambiguous even though it's
+    // formally unambiguous.
+    if (StartsWithDeclaratorId && D.getCXXScopeSpec().isValid()) {
+      for (NestedNameSpecifier *NNS = D.getCXXScopeSpec().getScopeRep(); NNS;
+           NNS = NNS->getPrefix()) {
+        if (NNS->getKind() == NestedNameSpecifier::Global)
+          return;
+      }
+    }
+
     S.Diag(Paren.Loc, diag::warn_redundant_parens_around_declarator)
         << ParenRange << FixItHint::CreateRemoval(Paren.Loc)
         << FixItHint::CreateRemoval(Paren.EndLoc);
@@ -3249,7 +3295,7 @@
       // in a member pointer.
       IsCXXInstanceMethod =
           D.getTypeObject(I).Kind == DeclaratorChunk::MemberPointer;
-    } else if (D.getContext() == Declarator::LambdaExprContext) {
+    } else if (D.getContext() == DeclaratorContext::LambdaExprContext) {
       // This can only be a call operator for a lambda, which is an instance
       // method.
       IsCXXInstanceMethod = true;
@@ -3482,13 +3528,20 @@
         isCFError = (S.CFError == recordDecl);
       } else {
         // Check whether this is CFError, which we identify based on its bridge
-        // to NSError.
+        // to NSError. CFErrorRef used to be declared with "objc_bridge" but is
+        // now declared with "objc_bridge_mutable", so look for either one of
+        // the two attributes.
         if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) {
-          if (auto bridgeAttr = recordDecl->getAttr<ObjCBridgeAttr>()) {
-            if (bridgeAttr->getBridgedType() == S.getNSErrorIdent()) {
-              S.CFError = recordDecl;
-              isCFError = true;
-            }
+          IdentifierInfo *bridgedType = nullptr;
+          if (auto bridgeAttr = recordDecl->getAttr<ObjCBridgeAttr>())
+            bridgedType = bridgeAttr->getBridgedType();
+          else if (auto bridgeAttr =
+                       recordDecl->getAttr<ObjCBridgeMutableAttr>())
+            bridgedType = bridgeAttr->getBridgedType();
+
+          if (bridgedType == S.getNSErrorIdent()) {
+            S.CFError = recordDecl;
+            isCFError = true;
           }
         }
       }
@@ -3739,8 +3792,8 @@
   // Does this declaration declare a typedef-name?
   bool IsTypedefName =
     D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef ||
-    D.getContext() == Declarator::AliasDeclContext ||
-    D.getContext() == Declarator::AliasTemplateContext;
+    D.getContext() == DeclaratorContext::AliasDeclContext ||
+    D.getContext() == DeclaratorContext::AliasTemplateContext;
 
   // Does T refer to a function type with a cv-qualifier or a ref-qualifier?
   bool IsQualifiedFunction = T->isFunctionProtoType() &&
@@ -3869,14 +3922,15 @@
   } else {
     bool isFunctionOrMethod = false;
     switch (auto context = state.getDeclarator().getContext()) {
-    case Declarator::ObjCParameterContext:
-    case Declarator::ObjCResultContext:
-    case Declarator::PrototypeContext:
-    case Declarator::TrailingReturnContext:
+    case DeclaratorContext::ObjCParameterContext:
+    case DeclaratorContext::ObjCResultContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
       isFunctionOrMethod = true;
-      // fallthrough
+      LLVM_FALLTHROUGH;
 
-    case Declarator::MemberContext:
+    case DeclaratorContext::MemberContext:
       if (state.getDeclarator().isObjCIvar() && !isFunctionOrMethod) {
         complainAboutMissingNullability = CAMN_No;
         break;
@@ -3888,10 +3942,10 @@
         break;
       }
 
-      // fallthrough
+      LLVM_FALLTHROUGH;
 
-    case Declarator::FileContext:
-    case Declarator::KNRTypeListContext: {
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::KNRTypeListContext: {
       complainAboutMissingNullability = CAMN_Yes;
 
       // Nullability inference depends on the type and declarator.
@@ -3907,8 +3961,9 @@
         if (inAssumeNonNullRegion) {
           complainAboutInferringWithinChunk = wrappingKind;
           inferNullability = NullabilityKind::NonNull;
-          inferNullabilityCS = (context == Declarator::ObjCParameterContext ||
-                                context == Declarator::ObjCResultContext);
+          inferNullabilityCS =
+              (context == DeclaratorContext::ObjCParameterContext ||
+               context == DeclaratorContext::ObjCResultContext);
         }
         break;
 
@@ -3948,26 +4003,27 @@
       break;
     }
 
-    case Declarator::ConversionIdContext:
+    case DeclaratorContext::ConversionIdContext:
       complainAboutMissingNullability = CAMN_Yes;
       break;
 
-    case Declarator::AliasDeclContext:
-    case Declarator::AliasTemplateContext:
-    case Declarator::BlockContext:
-    case Declarator::BlockLiteralContext:
-    case Declarator::ConditionContext:
-    case Declarator::CXXCatchContext:
-    case Declarator::CXXNewContext:
-    case Declarator::ForContext:
-    case Declarator::InitStmtContext:
-    case Declarator::LambdaExprContext:
-    case Declarator::LambdaExprParameterContext:
-    case Declarator::ObjCCatchContext:
-    case Declarator::TemplateParamContext:
-    case Declarator::TemplateTypeArgContext:
-    case Declarator::TypeNameContext:
-    case Declarator::FunctionalCastContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::ConditionContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::LambdaExprParameterContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::TemplateParamContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
       // Don't infer in these contexts.
       break;
     }
@@ -4047,7 +4103,7 @@
     case CAMN_InnerPointers:
       if (NumPointersRemaining == 0)
         break;
-      // Fallthrough.
+      LLVM_FALLTHROUGH;
 
     case CAMN_Yes:
       checkNullabilityConsistency(S, pointerKind, pointerLoc, pointerEndLoc);
@@ -4203,7 +4259,7 @@
       // array type, ...
       if (ASM == ArrayType::Static || ATI.TypeQuals) {
         if (!(D.isPrototypeContext() ||
-              D.getContext() == Declarator::KNRTypeListContext)) {
+              D.getContext() == DeclaratorContext::KNRTypeListContext)) {
           S.Diag(DeclType.Loc, diag::err_array_static_outside_prototype) <<
               (ASM == ArrayType::Static ? "'static'" : "type qualifier");
           // Remove the 'static' and the type qualifiers.
@@ -4227,7 +4283,8 @@
       const AutoType *AT = T->getContainedAutoType();
       // Allow arrays of auto if we are a generic lambda parameter.
       // i.e. [](auto (&array)[5]) { return array[0]; }; OK
-      if (AT && D.getContext() != Declarator::LambdaExprParameterContext) {
+      if (AT &&
+          D.getContext() != DeclaratorContext::LambdaExprParameterContext) {
         // We've already diagnosed this for decltype(auto).
         if (!AT->isDecltypeAuto())
           S.Diag(DeclType.Loc, diag::err_illegal_decl_array_of_auto)
@@ -4279,14 +4336,14 @@
               << T << D.getSourceRange();
             D.setInvalidType(true);
           } else if (D.getName().getKind() ==
-                         UnqualifiedId::IK_DeductionGuideName) {
+                     UnqualifiedIdKind::IK_DeductionGuideName) {
             if (T != Context.DependentTy) {
               S.Diag(D.getDeclSpec().getLocStart(),
                      diag::err_deduction_guide_with_complex_decl)
                   << D.getSourceRange();
               D.setInvalidType(true);
             }
-          } else if (D.getContext() != Declarator::LambdaExprContext &&
+          } else if (D.getContext() != DeclaratorContext::LambdaExprContext &&
                      (T.hasQualifiers() || !isa<AutoType>(T) ||
                       cast<AutoType>(T)->getKeyword() !=
                           AutoTypeKeyword::Auto)) {
@@ -4307,12 +4364,13 @@
       // C99 6.7.5.3p1: The return type may not be a function or array type.
       // For conversion functions, we'll diagnose this particular error later.
       if (!D.isInvalidType() && (T->isArrayType() || T->isFunctionType()) &&
-          (D.getName().getKind() != UnqualifiedId::IK_ConversionFunctionId)) {
+          (D.getName().getKind() !=
+           UnqualifiedIdKind::IK_ConversionFunctionId)) {
         unsigned diagID = diag::err_func_returning_array_function;
         // Last processing chunk in block context means this function chunk
         // represents the block.
         if (chunkIndex == 0 &&
-            D.getContext() == Declarator::BlockLiteralContext)
+            D.getContext() == DeclaratorContext::BlockLiteralContext)
           diagID = diag::err_block_returning_array_function;
         S.Diag(DeclType.Loc, diagID) << T->isFunctionType() << T;
         T = Context.IntTy;
@@ -4447,11 +4505,11 @@
 
       // Exception specs are not allowed in typedefs. Complain, but add it
       // anyway.
-      if (IsTypedefName && FTI.getExceptionSpecType() && !LangOpts.CPlusPlus1z)
+      if (IsTypedefName && FTI.getExceptionSpecType() && !LangOpts.CPlusPlus17)
         S.Diag(FTI.getExceptionSpecLocBeg(),
                diag::err_exception_spec_in_typedef)
-            << (D.getContext() == Declarator::AliasDeclContext ||
-                D.getContext() == Declarator::AliasTemplateContext);
+            << (D.getContext() == DeclaratorContext::AliasDeclContext ||
+                D.getContext() == DeclaratorContext::AliasTemplateContext);
 
       // If we see "T var();" or "T var(T());" at block scope, it is probably
       // an attempt to initialize a variable, not a function declaration.
@@ -4723,7 +4781,7 @@
         break;
       case DeclaratorChunk::Function: {
         const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
-        if (FTI.NumParams == 0)
+        if (FTI.NumParams == 0 && !FTI.isVariadic)
           S.Diag(DeclType.Loc, diag::warn_strict_prototypes)
               << IsBlock
               << FixItHint::CreateInsertion(FTI.getRParenLoc(), "void");
@@ -4751,11 +4809,11 @@
     // Core issue 547 also allows cv-qualifiers on function types that are
     // top-level template type arguments.
     enum { NonMember, Member, DeductionGuide } Kind = NonMember;
-    if (D.getName().getKind() == UnqualifiedId::IK_DeductionGuideName)
+    if (D.getName().getKind() == UnqualifiedIdKind::IK_DeductionGuideName)
       Kind = DeductionGuide;
     else if (!D.getCXXScopeSpec().isSet()) {
-      if ((D.getContext() == Declarator::MemberContext ||
-           D.getContext() == Declarator::LambdaExprContext) &&
+      if ((D.getContext() == DeclaratorContext::MemberContext ||
+           D.getContext() == DeclaratorContext::LambdaExprContext) &&
           !D.getDeclSpec().isFriendSpecified())
         Kind = Member;
     } else {
@@ -4784,7 +4842,8 @@
         !(Kind == Member &&
           D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static) &&
         !IsTypedefName &&
-        D.getContext() != Declarator::TemplateTypeArgContext) {
+        D.getContext() != DeclaratorContext::TemplateArgContext &&
+        D.getContext() != DeclaratorContext::TemplateTypeArgContext) {
       SourceLocation Loc = D.getLocStart();
       SourceRange RemovalRange;
       unsigned I;
@@ -4850,8 +4909,8 @@
     //   only be used in a parameter-declaration. Such a parameter-declaration
     //   is a parameter pack (14.5.3). [...]
     switch (D.getContext()) {
-    case Declarator::PrototypeContext:
-    case Declarator::LambdaExprParameterContext:
+    case DeclaratorContext::PrototypeContext:
+    case DeclaratorContext::LambdaExprParameterContext:
       // C++0x [dcl.fct]p13:
       //   [...] When it is part of a parameter-declaration-clause, the
       //   parameter pack is a function parameter pack (14.5.3). The type T
@@ -4870,7 +4929,7 @@
         T = Context.getPackExpansionType(T, None);
       }
       break;
-    case Declarator::TemplateParamContext:
+    case DeclaratorContext::TemplateParamContext:
       // C++0x [temp.param]p15:
       //   If a template-parameter is a [...] is a parameter-declaration that
       //   declares a parameter pack (8.3.5), then the template-parameter is a
@@ -4888,27 +4947,31 @@
                  : diag::ext_variadic_templates);
       break;
 
-    case Declarator::FileContext:
-    case Declarator::KNRTypeListContext:
-    case Declarator::ObjCParameterContext:  // FIXME: special diagnostic here?
-    case Declarator::ObjCResultContext:     // FIXME: special diagnostic here?
-    case Declarator::TypeNameContext:
-    case Declarator::FunctionalCastContext:
-    case Declarator::CXXNewContext:
-    case Declarator::AliasDeclContext:
-    case Declarator::AliasTemplateContext:
-    case Declarator::MemberContext:
-    case Declarator::BlockContext:
-    case Declarator::ForContext:
-    case Declarator::InitStmtContext:
-    case Declarator::ConditionContext:
-    case Declarator::CXXCatchContext:
-    case Declarator::ObjCCatchContext:
-    case Declarator::BlockLiteralContext:
-    case Declarator::LambdaExprContext:
-    case Declarator::ConversionIdContext:
-    case Declarator::TrailingReturnContext:
-    case Declarator::TemplateTypeArgContext:
+    case DeclaratorContext::FileContext:
+    case DeclaratorContext::KNRTypeListContext:
+    case DeclaratorContext::ObjCParameterContext:  // FIXME: special diagnostic
+                                                   // here?
+    case DeclaratorContext::ObjCResultContext:     // FIXME: special diagnostic
+                                                   // here?
+    case DeclaratorContext::TypeNameContext:
+    case DeclaratorContext::FunctionalCastContext:
+    case DeclaratorContext::CXXNewContext:
+    case DeclaratorContext::AliasDeclContext:
+    case DeclaratorContext::AliasTemplateContext:
+    case DeclaratorContext::MemberContext:
+    case DeclaratorContext::BlockContext:
+    case DeclaratorContext::ForContext:
+    case DeclaratorContext::InitStmtContext:
+    case DeclaratorContext::ConditionContext:
+    case DeclaratorContext::CXXCatchContext:
+    case DeclaratorContext::ObjCCatchContext:
+    case DeclaratorContext::BlockLiteralContext:
+    case DeclaratorContext::LambdaExprContext:
+    case DeclaratorContext::ConversionIdContext:
+    case DeclaratorContext::TrailingReturnContext:
+    case DeclaratorContext::TrailingReturnVarContext:
+    case DeclaratorContext::TemplateArgContext:
+    case DeclaratorContext::TemplateTypeArgContext:
       // FIXME: We may want to allow parameter packs in block-literal contexts
       // in the future.
       S.Diag(D.getEllipsisLoc(),
@@ -5597,9 +5660,9 @@
   // to apply them to the actual parameter declaration.
   // Likewise, we don't want to do this for alias declarations, because
   // we are actually going to build a declaration from this eventually.
-  if (D.getContext() != Declarator::ObjCParameterContext &&
-      D.getContext() != Declarator::AliasDeclContext &&
-      D.getContext() != Declarator::AliasTemplateContext)
+  if (D.getContext() != DeclaratorContext::ObjCParameterContext &&
+      D.getContext() != DeclaratorContext::AliasDeclContext &&
+      D.getContext() != DeclaratorContext::AliasTemplateContext)
     checkUnusedDeclAttributes(D);
 
   if (getLangOpts().CPlusPlus) {
@@ -7026,7 +7089,7 @@
       IsFuncReturnType || IsFuncType ||
       // Do not deduce addr space for member types of struct, except the pointee
       // type of a pointer member type.
-      (D.getContext() == Declarator::MemberContext && !IsPointee) ||
+      (D.getContext() == DeclaratorContext::MemberContext && !IsPointee) ||
       // Do not deduce addr space for types used to define a typedef and the
       // typedef itself, except the pointee type of a pointer type which is used
       // to define the typedef.
@@ -7058,7 +7121,7 @@
     if (IsPointee) {
       ImpAddr = LangAS::opencl_generic;
     } else {
-      if (D.getContext() == Declarator::FileContext) {
+      if (D.getContext() == DeclaratorContext::FileContext) {
         ImpAddr = LangAS::opencl_global;
       } else {
         if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static ||
@@ -7253,31 +7316,29 @@
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
     if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
       if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) {
-        SourceLocation PointOfInstantiation = E->getExprLoc();
+        auto *Def = Var->getDefinition();
+        if (!Def) {
+          SourceLocation PointOfInstantiation = E->getExprLoc();
+          InstantiateVariableDefinition(PointOfInstantiation, Var);
+          Def = Var->getDefinition();
 
-        if (MemberSpecializationInfo *MSInfo =
-                Var->getMemberSpecializationInfo()) {
-          // If we don't already have a point of instantiation, this is it.
-          if (MSInfo->getPointOfInstantiation().isInvalid()) {
-            MSInfo->setPointOfInstantiation(PointOfInstantiation);
-
-            // This is a modification of an existing AST node. Notify
-            // listeners.
-            if (ASTMutationListener *L = getASTMutationListener())
-              L->StaticDataMemberInstantiated(Var);
+          // If we don't already have a point of instantiation, and we managed
+          // to instantiate a definition, this is the point of instantiation.
+          // Otherwise, we don't request an end-of-TU instantiation, so this is
+          // not a point of instantiation.
+          // FIXME: Is this really the right behavior?
+          if (Var->getPointOfInstantiation().isInvalid() && Def) {
+            assert(Var->getTemplateSpecializationKind() ==
+                       TSK_ImplicitInstantiation &&
+                   "explicit instantiation with no point of instantiation");
+            Var->setTemplateSpecializationKind(
+                Var->getTemplateSpecializationKind(), PointOfInstantiation);
           }
-        } else {
-          VarTemplateSpecializationDecl *VarSpec =
-              cast<VarTemplateSpecializationDecl>(Var);
-          if (VarSpec->getPointOfInstantiation().isInvalid())
-            VarSpec->setPointOfInstantiation(PointOfInstantiation);
         }
 
-        InstantiateVariableDefinition(PointOfInstantiation, Var);
-
-        // Update the type to the newly instantiated definition's type both
-        // here and within the expression.
-        if (VarDecl *Def = Var->getDefinition()) {
+        // Update the type to the definition's type both here and within the
+        // expression.
+        if (Def) {
           DRE->setDecl(Def);
           QualType T = Def->getType();
           DRE->setType(T);
@@ -7517,6 +7578,14 @@
         diagnoseMissingImport(Loc, SuggestedDef, MissingImportKind::Definition,
                               /*Recover*/TreatAsComplete);
       return !TreatAsComplete;
+    } else if (Def && !TemplateInstCallbacks.empty()) {
+      CodeSynthesisContext TempInst;
+      TempInst.Kind = CodeSynthesisContext::Memoization;
+      TempInst.Template = Def;
+      TempInst.Entity = Def;
+      TempInst.PointOfInstantiation = Loc;
+      atTemplateBegin(TemplateInstCallbacks, *this, TempInst);
+      atTemplateEnd(TemplateInstCallbacks, *this, TempInst);
     }
 
     return false;
@@ -7746,7 +7815,8 @@
          diag::note_non_literal_user_provided_dtor :
          diag::note_non_literal_nontrivial_dtor) << RD;
     if (!Dtor->isUserProvided())
-      SpecialMemberIsTrivial(Dtor, CXXDestructor, /*Diagnose*/true);
+      SpecialMemberIsTrivial(Dtor, CXXDestructor, TAH_IgnoreTrivialABI,
+                             /*Diagnose*/true);
   }
 
   return true;
@@ -7807,11 +7877,12 @@
   //
   // We apply the same rules for Objective-C ivar and property references.
   if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
-    if (const ValueDecl *VD = dyn_cast<ValueDecl>(DRE->getDecl()))
-      return VD->getType();
+    const ValueDecl *VD = DRE->getDecl();
+    return VD->getType();
   } else if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
-    if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
-      return FD->getType();
+    if (const ValueDecl *VD = ME->getMemberDecl())
+      if (isa<FieldDecl>(VD) || isa<VarDecl>(VD))
+        return VD->getType();
   } else if (const ObjCIvarRefExpr *IR = dyn_cast<ObjCIvarRefExpr>(E)) {
     return IR->getDecl()->getType();
   } else if (const ObjCPropertyRefExpr *PR = dyn_cast<ObjCPropertyRefExpr>(E)) {
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index 96969ea..7d52394 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -2352,18 +2352,8 @@
   /// Subclasses may override this routine to provide different behavior.
   ExprResult RebuildInitList(SourceLocation LBraceLoc,
                              MultiExprArg Inits,
-                             SourceLocation RBraceLoc,
-                             QualType ResultTy) {
-    ExprResult Result
-      = SemaRef.ActOnInitList(LBraceLoc, Inits, RBraceLoc);
-    if (Result.isInvalid() || ResultTy->isDependentType())
-      return Result;
-
-    // Patch in the result type we were given, which may have been computed
-    // when the initial InitListExpr was built.
-    InitListExpr *ILE = cast<InitListExpr>((Expr *)Result.get());
-    ILE->setType(ResultTy);
-    return Result;
+                             SourceLocation RBraceLoc) {
+    return SemaRef.ActOnInitList(LBraceLoc, Inits, RBraceLoc);
   }
 
   /// \brief Build a new designated initializer expression.
@@ -2591,10 +2581,11 @@
   ExprResult RebuildCXXFunctionalCastExpr(TypeSourceInfo *TInfo,
                                           SourceLocation LParenLoc,
                                           Expr *Sub,
-                                          SourceLocation RParenLoc) {
+                                          SourceLocation RParenLoc,
+                                          bool ListInitialization) {
     return getSema().BuildCXXTypeConstructExpr(TInfo, LParenLoc,
-                                               MultiExprArg(&Sub, 1),
-                                               RParenLoc);
+                                               MultiExprArg(&Sub, 1), RParenLoc,
+                                               ListInitialization);
   }
 
   /// \brief Build a new C++ typeid(type) expression.
@@ -2694,8 +2685,8 @@
   ExprResult RebuildCXXScalarValueInitExpr(TypeSourceInfo *TSInfo,
                                            SourceLocation LParenLoc,
                                            SourceLocation RParenLoc) {
-    return getSema().BuildCXXTypeConstructExpr(TSInfo, LParenLoc,
-                                               None, RParenLoc);
+    return getSema().BuildCXXTypeConstructExpr(
+        TSInfo, LParenLoc, None, RParenLoc, /*ListInitialization=*/false);
   }
 
   /// \brief Build a new C++ "new" expression.
@@ -2852,13 +2843,12 @@
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
   ExprResult RebuildCXXTemporaryObjectExpr(TypeSourceInfo *TSInfo,
-                                           SourceLocation LParenLoc,
+                                           SourceLocation LParenOrBraceLoc,
                                            MultiExprArg Args,
-                                           SourceLocation RParenLoc) {
-    return getSema().BuildCXXTypeConstructExpr(TSInfo,
-                                               LParenLoc,
-                                               Args,
-                                               RParenLoc);
+                                           SourceLocation RParenOrBraceLoc,
+                                           bool ListInitialization) {
+    return getSema().BuildCXXTypeConstructExpr(
+        TSInfo, LParenOrBraceLoc, Args, RParenOrBraceLoc, ListInitialization);
   }
 
   /// \brief Build a new object-construction expression.
@@ -2868,11 +2858,10 @@
   ExprResult RebuildCXXUnresolvedConstructExpr(TypeSourceInfo *TSInfo,
                                                SourceLocation LParenLoc,
                                                MultiExprArg Args,
-                                               SourceLocation RParenLoc) {
-    return getSema().BuildCXXTypeConstructExpr(TSInfo,
-                                               LParenLoc,
-                                               Args,
-                                               RParenLoc);
+                                               SourceLocation RParenLoc,
+                                               bool ListInitialization) {
+    return getSema().BuildCXXTypeConstructExpr(TSInfo, LParenLoc, Args,
+                                               RParenLoc, ListInitialization);
   }
 
   /// \brief Build a new member reference expression.
@@ -3394,11 +3383,10 @@
                                   /*IsCall*/true, NewArgs, &ArgChanged))
     return ExprError();
 
-  // If this was list initialization, revert to list form.
+  // If this was list initialization, revert to syntactic list form.
   if (Construct->isListInitialization())
     return getDerived().RebuildInitList(Construct->getLocStart(), NewArgs,
-                                        Construct->getLocEnd(),
-                                        Construct->getType());
+                                        Construct->getLocEnd());
 
   // Build a ParenListExpr to represent anything else.
   SourceRange Parens = Construct->getParenOrBraceRange();
@@ -6956,6 +6944,8 @@
 
   // The new CoroutinePromise object needs to be built and put into the current
   // FunctionScopeInfo before any transformations or rebuilding occurs.
+  if (!SemaRef.buildCoroutineParameterMoves(FD->getLocation()))
+    return StmtError();
   auto *Promise = SemaRef.buildCoroutinePromise(FD->getLocation());
   if (!Promise)
     return StmtError();
@@ -7046,8 +7036,6 @@
       Builder.ReturnStmt = Res.get();
     }
   }
-  if (!Builder.buildParameterMoves())
-    return StmtError();
 
   return getDerived().RebuildCoroutineBodyStmt(Builder);
 }
@@ -7642,11 +7630,7 @@
     StmtResult Body;
     {
       Sema::CompoundScopeRAII CompoundScope(getSema());
-      int ThisCaptureLevel =
-          Sema::getOpenMPCaptureLevels(D->getDirectiveKind());
-      Stmt *CS = D->getAssociatedStmt();
-      while (--ThisCaptureLevel >= 0)
-        CS = cast<CapturedStmt>(CS)->getCapturedStmt();
+      Stmt *CS = D->getInnermostCapturedStmt()->getCapturedStmt();
       Body = getDerived().TransformStmt(CS);
     }
     AssociatedStmt =
@@ -9517,7 +9501,7 @@
   }
 
   return getDerived().RebuildInitList(E->getLBraceLoc(), Inits,
-                                      E->getRBraceLoc(), E->getType());
+                                      E->getRBraceLoc());
 }
 
 template<typename Derived>
@@ -9951,7 +9935,8 @@
   return getDerived().RebuildCXXFunctionalCastExpr(Type,
                                                    E->getLParenLoc(),
                                                    SubExpr.get(),
-                                                   E->getRParenLoc());
+                                                   E->getRParenLoc(),
+                                                   E->isListInitialization());
 }
 
 template<typename Derived>
@@ -10867,11 +10852,12 @@
     return SemaRef.MaybeBindToTemporary(E);
   }
 
-  // FIXME: Pass in E->isListInitialization().
-  return getDerived().RebuildCXXTemporaryObjectExpr(T,
-                                          /*FIXME:*/T->getTypeLoc().getEndLoc(),
-                                                    Args,
-                                                    E->getLocEnd());
+  // FIXME: We should just pass E->isListInitialization(), but we're not
+  // prepared to handle list-initialization without a child InitListExpr.
+  SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
+  return getDerived().RebuildCXXTemporaryObjectExpr(
+      T, LParenLoc, Args, E->getLocEnd(),
+      /*ListInitialization=*/LParenLoc.isInvalid());
 }
 
 template<typename Derived>
@@ -11157,10 +11143,8 @@
     return E;
 
   // FIXME: we're faking the locations of the commas
-  return getDerived().RebuildCXXUnresolvedConstructExpr(T,
-                                                        E->getLParenLoc(),
-                                                        Args,
-                                                        E->getRParenLoc());
+  return getDerived().RebuildCXXUnresolvedConstructExpr(
+      T, E->getLParenLoc(), Args, E->getRParenLoc(), E->isListInitialization());
 }
 
 template<typename Derived>
@@ -11408,8 +11392,10 @@
         ArgStorage = TemplateArgument(TemplateName(TTPD), None);
       } else {
         auto *VD = cast<ValueDecl>(Pack);
-        ExprResult DRE = getSema().BuildDeclRefExpr(VD, VD->getType(),
-                                                    VK_RValue, E->getPackLoc());
+        ExprResult DRE = getSema().BuildDeclRefExpr(
+            VD, VD->getType().getNonLValueExprType(getSema().Context),
+            VD->getType()->isReferenceType() ? VK_LValue : VK_RValue,
+            E->getPackLoc());
         if (DRE.isInvalid())
           return ExprError();
         ArgStorage = new (getSema().Context) PackExpansionExpr(
diff --git a/lib/Serialization/ASTCommon.cpp b/lib/Serialization/ASTCommon.cpp
index 9c6f03c..535aacb 100644
--- a/lib/Serialization/ASTCommon.cpp
+++ b/lib/Serialization/ASTCommon.cpp
@@ -16,7 +16,7 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/DJB.h"
 
 using namespace clang;
 
@@ -171,7 +171,7 @@
   unsigned R = 5381;
   for (unsigned I = 0; I != N; ++I)
     if (IdentifierInfo *II = Sel.getIdentifierInfoForSlot(I))
-      R = llvm::HashString(II->getName(), R);
+      R = llvm::djbHash(II->getName(), R);
   return R;
 }
 
@@ -231,7 +231,7 @@
   default:
     llvm_unreachable("Unhandled DeclContext in AST reader");
   }
-  
+
   llvm_unreachable("Unhandled decl kind");
 }
 
diff --git a/lib/Serialization/ASTCommon.h b/lib/Serialization/ASTCommon.h
index cbc5f04..6aca453 100644
--- a/lib/Serialization/ASTCommon.h
+++ b/lib/Serialization/ASTCommon.h
@@ -27,7 +27,8 @@
   UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION,
   UPD_CXX_ADDED_ANONYMOUS_NAMESPACE,
   UPD_CXX_ADDED_FUNCTION_DEFINITION,
-  UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER,
+  UPD_CXX_ADDED_VAR_DEFINITION,
+  UPD_CXX_POINT_OF_INSTANTIATION,
   UPD_CXX_INSTANTIATED_CLASS_DEFINITION,
   UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT,
   UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER,
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index 5a3423a..b6fc4b2 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -1,4 +1,4 @@
-//===-- ASTReader.cpp - AST File Reader -----------------------------------===//
+//===- ASTReader.cpp - AST File Reader ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,28 +19,41 @@
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/ASTUnresolvedSet.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclGroup.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/ODRHash.h"
 #include "clang/AST/RawCommentList.h"
+#include "clang/AST/TemplateBase.h"
+#include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeLocVisitor.h"
 #include "clang/AST/UnresolvedSet.h"
 #include "clang/Basic/CommentOptions.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/ExceptionSpecificationType.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/MemoryBufferCache.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/PragmaKinds.h"
 #include "clang/Basic/Sanitizers.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/SourceManagerInternals.h"
 #include "clang/Basic/Specifiers.h"
@@ -57,41 +70,62 @@
 #include "clang/Lex/PreprocessingRecord.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Sema/ObjCMethodList.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/Weak.h"
+#include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
+#include "clang/Serialization/ContinuousRangeMap.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
+#include "clang/Serialization/Module.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "clang/Serialization/ModuleManager.h"
 #include "clang/Serialization/SerializationDiagnostic.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <cstdio>
-#include <cstring>
 #include <ctime>
 #include <iterator>
 #include <limits>
 #include <map>
 #include <memory>
-#include <new>
 #include <string>
 #include <system_error>
 #include <tuple>
@@ -171,19 +205,23 @@
                                         SuggestedPredefines) ||
          Second->ReadPreprocessorOptions(PPOpts, Complain, SuggestedPredefines);
 }
+
 void ChainedASTReaderListener::ReadCounter(const serialization::ModuleFile &M,
                                            unsigned Value) {
   First->ReadCounter(M, Value);
   Second->ReadCounter(M, Value);
 }
+
 bool ChainedASTReaderListener::needsInputFileVisitation() {
   return First->needsInputFileVisitation() ||
          Second->needsInputFileVisitation();
 }
+
 bool ChainedASTReaderListener::needsSystemInputFileVisitation() {
   return First->needsSystemInputFileVisitation() ||
   Second->needsSystemInputFileVisitation();
 }
+
 void ChainedASTReaderListener::visitModuleFile(StringRef Filename,
                                                ModuleKind Kind) {
   First->visitModuleFile(Filename, Kind);
@@ -216,7 +254,7 @@
 // PCH validator implementation
 //===----------------------------------------------------------------------===//
 
-ASTReaderListener::~ASTReaderListener() {}
+ASTReaderListener::~ASTReaderListener() = default;
 
 /// \brief Compare the given set of language options against an existing set of
 /// language options.
@@ -409,17 +447,16 @@
 
 namespace {
 
-  typedef llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/> >
-    MacroDefinitionsMap;
-  typedef llvm::DenseMap<DeclarationName, SmallVector<NamedDecl *, 8> >
-    DeclsMap;
+using MacroDefinitionsMap =
+    llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/>>;
+using DeclsMap = llvm::DenseMap<DeclarationName, SmallVector<NamedDecl *, 8>>;
 
-} // end anonymous namespace
+} // namespace
 
 static bool checkDiagnosticGroupMappings(DiagnosticsEngine &StoredDiags,
                                          DiagnosticsEngine &Diags,
                                          bool Complain) {
-  typedef DiagnosticsEngine::Level Level;
+  using Level = DiagnosticsEngine::Level;
 
   // Check current mappings for new -Werror mappings, and the stored mappings
   // for cases that were explicitly mapped to *not* be errors that are now
@@ -603,8 +640,8 @@
     std::pair<StringRef, bool> Existing = ExistingMacros[MacroName];
 
     // Check whether we know anything about this macro name or not.
-    llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/> >::iterator Known
-      = ASTFileMacros.find(MacroName);
+    llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/>>::iterator Known =
+        ASTFileMacros.find(MacroName);
     if (!Validate || Known == ASTFileMacros.end()) {
       // FIXME: Check whether this identifier was referenced anywhere in the
       // AST file. If so, we should reject the AST file. Unfortunately, this
@@ -770,6 +807,7 @@
 std::pair<unsigned, unsigned>
 ASTSelectorLookupTrait::ReadKeyDataLength(const unsigned char*& d) {
   using namespace llvm::support;
+
   unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
   unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
   return std::make_pair(KeyLen, DataLen);
@@ -778,6 +816,7 @@
 ASTSelectorLookupTrait::internal_key_type
 ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) {
   using namespace llvm::support;
+
   SelectorTable &SelTable = Reader.getContext().Selectors;
   unsigned N = endian::readNext<uint16_t, little, unaligned>(d);
   IdentifierInfo *FirstII = Reader.getLocalIdentifier(
@@ -832,12 +871,13 @@
 }
 
 unsigned ASTIdentifierLookupTraitBase::ComputeHash(const internal_key_type& a) {
-  return llvm::HashString(a);
+  return llvm::djbHash(a);
 }
 
 std::pair<unsigned, unsigned>
 ASTIdentifierLookupTraitBase::ReadKeyDataLength(const unsigned char*& d) {
   using namespace llvm::support;
+
   unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
   unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
   return std::make_pair(KeyLen, DataLen);
@@ -868,6 +908,7 @@
 
 IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
   using namespace llvm::support;
+
   unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
   return Reader.getGlobalIdentifierID(F, RawID >> 1);
 }
@@ -885,6 +926,7 @@
                                                    const unsigned char* d,
                                                    unsigned DataLen) {
   using namespace llvm::support;
+
   unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
   bool IsInteresting = RawID & 0x01;
 
@@ -1027,6 +1069,7 @@
 ModuleFile *
 ASTDeclContextNameLookupTrait::ReadFileRef(const unsigned char *&d) {
   using namespace llvm::support;
+
   uint32_t ModuleFileID = endian::readNext<uint32_t, little, unaligned>(d);
   return Reader.getLocalModuleFile(F, ModuleFileID);
 }
@@ -1034,6 +1077,7 @@
 std::pair<unsigned, unsigned>
 ASTDeclContextNameLookupTrait::ReadKeyDataLength(const unsigned char *&d) {
   using namespace llvm::support;
+
   unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
   unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
   return std::make_pair(KeyLen, DataLen);
@@ -1079,6 +1123,7 @@
                                                  unsigned DataLen,
                                                  data_type_builder &Val) {
   using namespace llvm::support;
+
   for (unsigned NumDecls = DataLen / 4; NumDecls; --NumDecls) {
     uint32_t LocalID = endian::readNext<uint32_t, little, unaligned>(d);
     Val.insert(Reader.getGlobalDeclID(F, LocalID));
@@ -1176,6 +1221,7 @@
 
   // Parse the file names
   std::map<int, int> FileIDs;
+  FileIDs[-1] = -1; // For unspecified filenames.
   for (unsigned I = 0; Record[Idx]; ++I) {
     // Extract the file name
     auto Filename = ReadPath(F, Record, Idx);
@@ -1278,7 +1324,9 @@
                                  const std::string &CurrDir) {
   assert(OriginalDir != CurrDir &&
          "No point trying to resolve the file if the PCH dir didn't change");
+
   using namespace llvm::sys;
+
   SmallString<128> filePath(Filename);
   fs::make_absolute(filePath);
   assert(path::is_absolute(OriginalDir));
@@ -1672,6 +1720,7 @@
 std::pair<unsigned, unsigned>
 HeaderFileInfoTrait::ReadKeyDataLength(const unsigned char*& d) {
   using namespace llvm::support;
+
   unsigned KeyLen = (unsigned) endian::readNext<uint16_t, little, unaligned>(d);
   unsigned DataLen = (unsigned) *d++;
   return std::make_pair(KeyLen, DataLen);
@@ -1680,6 +1729,7 @@
 HeaderFileInfoTrait::internal_key_type
 HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) {
   using namespace llvm::support;
+
   internal_key_type ikey;
   ikey.Size = off_t(endian::readNext<uint64_t, little, unaligned>(d));
   ikey.ModTime = time_t(endian::readNext<uint64_t, little, unaligned>(d));
@@ -1691,8 +1741,9 @@
 HeaderFileInfoTrait::data_type
 HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
                               unsigned DataLen) {
-  const unsigned char *End = d + DataLen;
   using namespace llvm::support;
+
+  const unsigned char *End = d + DataLen;
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
@@ -1813,7 +1864,7 @@
     unsigned PriorGeneration;
     unsigned &NumIdentifierLookups;
     unsigned &NumIdentifierLookupHits;
-    IdentifierInfo *Found;
+    IdentifierInfo *Found = nullptr;
 
   public:
     IdentifierLookupVisitor(StringRef Name, unsigned PriorGeneration,
@@ -1822,10 +1873,7 @@
       : Name(Name), NameHash(ASTIdentifierLookupTrait::ComputeHash(Name)),
         PriorGeneration(PriorGeneration),
         NumIdentifierLookups(NumIdentifierLookups),
-        NumIdentifierLookupHits(NumIdentifierLookupHits),
-        Found()
-    {
-    }
+        NumIdentifierLookupHits(NumIdentifierLookupHits) {}
 
     bool operator()(ModuleFile &M) {
       // If we've already searched this module file, skip it now.
@@ -1858,7 +1906,7 @@
     IdentifierInfo *getIdentifierInfo() const { return Found; }
   };
 
-} // end anonymous namespace
+} // namespace
 
 void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) {
   // Note that we are loading an identifier.
@@ -1985,10 +2033,9 @@
       MD = PP.AllocateDefMacroDirective(MI, Loc);
       break;
     }
-    case MacroDirective::MD_Undefine: {
+    case MacroDirective::MD_Undefine:
       MD = PP.AllocateUndefMacroDirective(Loc);
       break;
-    }
     case MacroDirective::MD_Visibility:
       bool isPublic = Record[Idx++];
       MD = PP.AllocateVisibilityMacroDirective(Loc, isPublic);
@@ -2124,7 +2171,7 @@
     if (Complain) {
       // Build a list of the PCH imports that got us here (in reverse).
       SmallVector<ModuleFile *, 4> ImportStack(1, &F);
-      while (ImportStack.back()->ImportedBy.size() > 0)
+      while (!ImportStack.back()->ImportedBy.empty())
         ImportStack.push_back(ImportStack.back()->ImportedBy[0]);
 
       // The top-level PCH is stale.
@@ -2622,7 +2669,7 @@
     case llvm::BitstreamEntry::Error:
       Error("error at end of module block in AST file");
       return Failure;
-    case llvm::BitstreamEntry::EndBlock: {
+    case llvm::BitstreamEntry::EndBlock:
       // Outside of C++, we do not store a lookup map for the translation unit.
       // Instead, mark it as needing a lookup map to be built if this module
       // contains any declarations lexically within it (which it always does!).
@@ -2635,7 +2682,6 @@
       }
 
       return Success;
-    }
     case llvm::BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       case DECLTYPES_BLOCK_ID:
@@ -2995,8 +3041,20 @@
 
     case PP_CONDITIONAL_STACK:
       if (!Record.empty()) {
+        unsigned Idx = 0, End = Record.size() - 1;
+        bool ReachedEOFWhileSkipping = Record[Idx++];
+        llvm::Optional<Preprocessor::PreambleSkipInfo> SkipInfo;
+        if (ReachedEOFWhileSkipping) {
+          SourceLocation HashToken = ReadSourceLocation(F, Record, Idx);
+          SourceLocation IfTokenLoc = ReadSourceLocation(F, Record, Idx);
+          bool FoundNonSkipPortion = Record[Idx++];
+          bool FoundElse = Record[Idx++];
+          SourceLocation ElseLoc = ReadSourceLocation(F, Record, Idx);
+          SkipInfo.emplace(HashToken, IfTokenLoc, FoundNonSkipPortion,
+                           FoundElse, ElseLoc);
+        }
         SmallVector<PPConditionalInfo, 4> ConditionalStack;
-        for (unsigned Idx = 0, N = Record.size() - 1; Idx < N; /* in loop */) {
+        while (Idx < End) {
           auto Loc = ReadSourceLocation(F, Record, Idx);
           bool WasSkipping = Record[Idx++];
           bool FoundNonSkip = Record[Idx++];
@@ -3004,7 +3062,7 @@
           ConditionalStack.push_back(
               {Loc, WasSkipping, FoundNonSkip, FoundElse});
         }
-        PP.setReplayablePreambleConditionalStack(ConditionalStack);
+        PP.setReplayablePreambleConditionalStack(ConditionalStack, SkipInfo);
       }
       break;
 
@@ -3158,7 +3216,25 @@
       break;
     }
 
-    case DECL_UPDATE_OFFSETS: {
+    case PPD_SKIPPED_RANGES: {
+      F.PreprocessedSkippedRangeOffsets = (const PPSkippedRange*)Blob.data();
+      assert(Blob.size() % sizeof(PPSkippedRange) == 0);
+      F.NumPreprocessedSkippedRanges = Blob.size() / sizeof(PPSkippedRange);
+
+      if (!PP.getPreprocessingRecord())
+        PP.createPreprocessingRecord();
+      if (!PP.getPreprocessingRecord()->getExternalSource())
+        PP.getPreprocessingRecord()->SetExternalSource(*this);
+      F.BasePreprocessedSkippedRangeID = PP.getPreprocessingRecord()
+          ->allocateSkippedRanges(F.NumPreprocessedSkippedRanges);
+
+      if (F.NumPreprocessedSkippedRanges > 0)
+        GlobalSkippedRangeMap.insert(
+            std::make_pair(F.BasePreprocessedSkippedRangeID, &F));
+      break;
+    }
+
+    case DECL_UPDATE_OFFSETS:
       if (Record.size() % 2 != 0) {
         Error("invalid DECL_UPDATE_OFFSETS block in AST file");
         return Failure;
@@ -3174,9 +3250,8 @@
               PendingUpdateRecord(ID, D, /*JustLoaded=*/false));
       }
       break;
-    }
 
-    case OBJC_CATEGORIES_MAP: {
+    case OBJC_CATEGORIES_MAP:
       if (F.LocalNumObjCCategoriesInMap != 0) {
         Error("duplicate OBJC_CATEGORIES_MAP record in AST file");
         return Failure;
@@ -3185,7 +3260,6 @@
       F.LocalNumObjCCategoriesInMap = Record[0];
       F.ObjCCategoriesMap = (const ObjCCategoriesInfo *)Blob.data();
       break;
-    }
 
     case OBJC_CATEGORIES:
       F.ObjCCategories.swap(Record);
@@ -3199,7 +3273,7 @@
         CUDASpecialDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
       break;
 
-    case HEADER_SEARCH_TABLE: {
+    case HEADER_SEARCH_TABLE:
       F.HeaderFileInfoTableData = Blob.data();
       F.LocalNumHeaderFileInfos = Record[1];
       if (Record[0]) {
@@ -3216,7 +3290,6 @@
           PP.getHeaderSearchInfo().SetExternalLookup(this);
       }
       break;
-    }
 
     case FP_PRAGMA_OPTIONS:
       // Later tables overwrite earlier ones.
@@ -3284,6 +3357,7 @@
             ReadSourceLocation(F, Record, I).getRawEncoding());
       }
       break;
+
     case DELETE_EXPRS_TO_ANALYZE:
       for (unsigned I = 0, N = Record.size(); I != N;) {
         DelayedDeleteExprs.push_back(getGlobalDeclID(F, Record[I++]));
@@ -3297,7 +3371,7 @@
       }
       break;
 
-    case IMPORTED_MODULES: {
+    case IMPORTED_MODULES:
       if (!F.isModule()) {
         // If we aren't loading a module (which has its own exports), make
         // all of the imported modules visible.
@@ -3313,7 +3387,6 @@
         }
       }
       break;
-    }
 
     case MACRO_OFFSET: {
       if (F.LocalNumMacros != 0) {
@@ -3339,10 +3412,9 @@
       break;
     }
 
-    case LATE_PARSED_TEMPLATE: {
+    case LATE_PARSED_TEMPLATE:
       LateParsedTemplates.append(Record.begin(), Record.end());
       break;
-    }
 
     case OPTIMIZE_PRAGMA_OPTIONS:
       if (Record.size() != 1) {
@@ -3424,8 +3496,7 @@
   }
 
   // Continuous range maps we may be updating in our module.
-  typedef ContinuousRangeMap<uint32_t, int, 2>::Builder
-      RemapBuilder;
+  using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
   RemapBuilder SLocRemap(F.SLocRemap);
   RemapBuilder IdentifierRemap(F.IdentifierRemap);
   RemapBuilder MacroRemap(F.MacroRemap);
@@ -3518,15 +3589,22 @@
     if (!ModMap) {
       assert(ImportedBy && "top-level import should be verified");
       if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) {
-        if (auto *ASTFE = M ? M->getASTFile() : nullptr)
+        if (auto *ASTFE = M ? M->getASTFile() : nullptr) {
           // This module was defined by an imported (explicit) module.
           Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
                                                << ASTFE->getName();
-        else
+        } else {
           // This module was built with a different module map.
           Diag(diag::err_imported_module_not_found)
               << F.ModuleName << F.FileName << ImportedBy->FileName
               << F.ModuleMapPath;
+          // In case it was imported by a PCH, there's a chance the user is
+          // just missing to include the search path to the directory containing
+          // the modulemap.
+          if (ImportedBy->Kind == MK_PCH)
+            Diag(diag::note_imported_by_pch_module_not_found)
+                << llvm::sys::path::parent_path(F.ModuleMapPath);
+        }
       }
       return OutOfDate;
     }
@@ -3589,7 +3667,6 @@
   return Success;
 }
 
-
 /// \brief Move the given method to the back of the global list of methods.
 static void moveMethodToBackOfGlobalList(Sema &S, ObjCMethodDecl *Method) {
   // Find the entry for this selector in the method pool.
@@ -4242,7 +4319,7 @@
   // Read all of the records in the options block.
   RecordData Record;
   ASTReadResult Result = Success;
-  while (1) {
+  while (true) {
     llvm::BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
@@ -4262,11 +4339,10 @@
     Record.clear();
     switch (
         (UnhashedControlBlockRecordTypes)Stream.readRecord(Entry.ID, Record)) {
-    case SIGNATURE: {
+    case SIGNATURE:
       if (F)
         std::copy(Record.begin(), Record.end(), F->Signature.data());
       break;
-    }
     case DIAGNOSTIC_OPTIONS: {
       bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
       if (Listener && ValidateDiagnosticOptions &&
@@ -4595,9 +4671,7 @@
         ExistingTargetOpts(ExistingTargetOpts),
         ExistingPPOpts(ExistingPPOpts),
         ExistingModuleCachePath(ExistingModuleCachePath),
-        FileMgr(FileMgr)
-    {
-    }
+        FileMgr(FileMgr) {}
 
     bool ReadLanguageOptions(const LangOptions &LangOpts, bool Complain,
                              bool AllowCompatibleDifferences) override {
@@ -4627,7 +4701,7 @@
     }
   };
 
-} // end anonymous namespace
+} // namespace
 
 bool ASTReader::readASTFileControlBlock(
     StringRef Filename, FileManager &FileMgr,
@@ -4711,15 +4785,12 @@
     StringRef Blob;
     unsigned RecCode = Stream.readRecord(Entry.ID, Record, &Blob);
     switch ((ControlRecordTypes)RecCode) {
-    case METADATA: {
+    case METADATA:
       if (Record[0] != VERSION_MAJOR)
         return true;
-
       if (Listener.ReadFullVersionInformation(Blob))
         return true;
-
       break;
-    }
     case MODULE_NAME:
       Listener.ReadModuleName(Blob);
       break;
@@ -5027,10 +5098,9 @@
       // them here.
       break;
 
-    case SUBMODULE_TOPHEADER: {
+    case SUBMODULE_TOPHEADER:
       CurrentModule->addTopHeaderFilename(Blob);
       break;
-    }
 
     case SUBMODULE_UMBRELLA_DIR: {
       std::string Dirname = Blob;
@@ -5067,7 +5137,7 @@
       break;
     }
 
-    case SUBMODULE_IMPORTS: {
+    case SUBMODULE_IMPORTS:
       for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
         UnresolvedModuleRef Unresolved;
         Unresolved.File = &F;
@@ -5078,9 +5148,8 @@
         UnresolvedModuleRefs.push_back(Unresolved);
       }
       break;
-    }
 
-    case SUBMODULE_EXPORTS: {
+    case SUBMODULE_EXPORTS:
       for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) {
         UnresolvedModuleRef Unresolved;
         Unresolved.File = &F;
@@ -5095,12 +5164,11 @@
       // the parsed, unresolved exports around.
       CurrentModule->UnresolvedExports.clear();
       break;
-    }
-    case SUBMODULE_REQUIRES: {
+
+    case SUBMODULE_REQUIRES:
       CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(),
                                     PP.getTargetInfo());
       break;
-    }
 
     case SUBMODULE_LINK_LIBRARY:
       CurrentModule->LinkLibraries.push_back(
@@ -5338,6 +5406,20 @@
                          Mod.FileSortedDecls + Mod.NumFileSortedDecls));
 }
 
+SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
+  auto I = GlobalSkippedRangeMap.find(GlobalIndex);
+  assert(I != GlobalSkippedRangeMap.end() &&
+    "Corrupted global skipped range map");
+  ModuleFile *M = I->second;
+  unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
+  assert(LocalIndex < M->NumPreprocessedSkippedRanges);
+  PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
+  SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()),
+                    TranslateSourceLocation(*M, RawRange.getEnd()));
+  assert(Range.isValid());
+  return Range;
+}
+
 PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
   PreprocessedEntityID PPID = Index+1;
   std::pair<ModuleFile *, unsigned> PPInfo = getModulePreprocessedEntity(Index);
@@ -5450,7 +5532,7 @@
   const ASTReader &Reader;
   ModuleFile &M;
 
-  PPEntityComp(const ASTReader &Reader, ModuleFile &M) : Reader(Reader), M(M) { }
+  PPEntityComp(const ASTReader &Reader, ModuleFile &M) : Reader(Reader), M(M) {}
 
   bool operator()(const PPEntityOffset &L, const PPEntityOffset &R) const {
     SourceLocation LHS = getLoc(L);
@@ -5473,7 +5555,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
                                                        bool EndsAfter) const {
@@ -5489,7 +5571,9 @@
     return findNextPreprocessedEntity(SLocMapI);
 
   ModuleFile &M = *SLocMapI->second;
-  typedef const PPEntityOffset *pp_iterator;
+
+  using pp_iterator = const PPEntityOffset *;
+
   pp_iterator pp_begin = M.PreprocessedEntityOffsets;
   pp_iterator pp_end = pp_begin + M.NumPreprocessedEntities;
 
@@ -5567,12 +5651,10 @@
   /// \brief Visitor used to search for information about a header file.
   class HeaderFileInfoVisitor {
     const FileEntry *FE;
-
     Optional<HeaderFileInfo> HFI;
 
   public:
-    explicit HeaderFileInfoVisitor(const FileEntry *FE)
-      : FE(FE) { }
+    explicit HeaderFileInfoVisitor(const FileEntry *FE) : FE(FE) {}
 
     bool operator()(ModuleFile &M) {
       HeaderFileInfoLookupTable *Table
@@ -5592,7 +5674,7 @@
     Optional<HeaderFileInfo> getHeaderFileInfo() const { return HFI; }
   };
 
-} // end anonymous namespace
+} // namespace
 
 HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) {
   HeaderFileInfoVisitor Visitor(FE);
@@ -5680,6 +5762,8 @@
       Initial.ExtBehavior = (diag::Severity)Flags;
       FirstState = ReadDiagState(Initial, SourceLocation(), true);
 
+      assert(F.OriginalSourceFileID.isValid());
+
       // Set up the root buffer of the module to start with the initial
       // diagnostic state of the module itself, to cover files that contain no
       // explicit transitions (for which we did not serialize anything).
@@ -5700,6 +5784,7 @@
              "Invalid data, missing pragma diagnostic states");
       SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]);
       auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc);
+      assert(IDAndOffset.first.isValid() && "invalid FileID for transition");
       assert(IDAndOffset.second == 0 && "not a start location for a FileID");
       unsigned Transitions = Record[Idx++];
 
@@ -6113,6 +6198,7 @@
       Protos.push_back(ReadDeclAs<ObjCProtocolDecl>(*Loc.F, Record, Idx));
     return Context.getObjCTypeParamType(Decl, Protos);
   }
+
   case TYPE_OBJC_OBJECT: {
     unsigned Idx = 0;
     QualType Base = readType(*Loc.F, Record, Idx);
@@ -6309,7 +6395,9 @@
   }
 }
 
-class clang::TypeLocReader : public TypeLocVisitor<TypeLocReader> {
+namespace clang {
+
+class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
   ModuleFile *F;
   ASTReader *Reader;
   const ASTReader::RecordData &Record;
@@ -6344,6 +6432,8 @@
   void VisitArrayTypeLoc(ArrayTypeLoc);
 };
 
+} // namespace clang
+
 void TypeLocReader::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
   // nothing to do
 }
@@ -6460,23 +6550,28 @@
 void TypeLocReader::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
   VisitFunctionTypeLoc(TL);
 }
+
 void TypeLocReader::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
   TL.setNameLoc(ReadSourceLocation());
 }
+
 void TypeLocReader::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
   TL.setNameLoc(ReadSourceLocation());
 }
+
 void TypeLocReader::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
   TL.setTypeofLoc(ReadSourceLocation());
   TL.setLParenLoc(ReadSourceLocation());
   TL.setRParenLoc(ReadSourceLocation());
 }
+
 void TypeLocReader::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
   TL.setTypeofLoc(ReadSourceLocation());
   TL.setLParenLoc(ReadSourceLocation());
   TL.setRParenLoc(ReadSourceLocation());
   TL.setUnderlyingTInfo(GetTypeSourceInfo());
 }
+
 void TypeLocReader::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
   TL.setNameLoc(ReadSourceLocation());
 }
@@ -6530,10 +6625,12 @@
                                             SubstTemplateTypeParmTypeLoc TL) {
   TL.setNameLoc(ReadSourceLocation());
 }
+
 void TypeLocReader::VisitSubstTemplateTypeParmPackTypeLoc(
                                           SubstTemplateTypeParmPackTypeLoc TL) {
   TL.setNameLoc(ReadSourceLocation());
 }
+
 void TypeLocReader::VisitTemplateSpecializationTypeLoc(
                                            TemplateSpecializationTypeLoc TL) {
   TL.setTemplateKeywordLoc(ReadSourceLocation());
@@ -6546,6 +6643,7 @@
         Reader->GetTemplateArgumentLocInfo(
             *F, TL.getTypePtr()->getArg(i).getKind(), Record, Idx));
 }
+
 void TypeLocReader::VisitParenTypeLoc(ParenTypeLoc TL) {
   TL.setLParenLoc(ReadSourceLocation());
   TL.setRParenLoc(ReadSourceLocation());
@@ -6656,13 +6754,11 @@
     case PREDEF_TYPE_BOOL_ID:
       T = Context.BoolTy;
       break;
-
     case PREDEF_TYPE_CHAR_U_ID:
     case PREDEF_TYPE_CHAR_S_ID:
       // FIXME: Check that the signedness of CharTy is correct!
       T = Context.CharTy;
       break;
-
     case PREDEF_TYPE_UCHAR_ID:
       T = Context.UnsignedCharTy;
       break;
@@ -6776,19 +6872,15 @@
     case PREDEF_TYPE_AUTO_DEDUCT:
       T = Context.getAutoDeductType();
       break;
-
     case PREDEF_TYPE_AUTO_RREF_DEDUCT:
       T = Context.getAutoRRefDeductType();
       break;
-
     case PREDEF_TYPE_ARC_UNBRIDGED_CAST:
       T = Context.ARCUnbridgedCastTy;
       break;
-
     case PREDEF_TYPE_BUILTIN_FN:
       T = Context.BuiltinFnTy;
       break;
-
     case PREDEF_TYPE_OMP_ARRAY_SECTION:
       T = Context.OMPArraySectionTy;
       break;
@@ -7275,7 +7367,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 void ASTReader::FindFileRegionDecls(FileID File,
                                     unsigned Offset, unsigned Length,
@@ -7471,30 +7563,25 @@
                  NumVisibleDeclContextsRead, TotalVisibleDeclContexts,
                  ((float)NumVisibleDeclContextsRead/TotalVisibleDeclContexts
                   * 100));
-  if (TotalNumMethodPoolEntries) {
+  if (TotalNumMethodPoolEntries)
     std::fprintf(stderr, "  %u/%u method pool entries read (%f%%)\n",
                  NumMethodPoolEntriesRead, TotalNumMethodPoolEntries,
                  ((float)NumMethodPoolEntriesRead/TotalNumMethodPoolEntries
                   * 100));
-  }
-  if (NumMethodPoolLookups) {
+  if (NumMethodPoolLookups)
     std::fprintf(stderr, "  %u/%u method pool lookups succeeded (%f%%)\n",
                  NumMethodPoolHits, NumMethodPoolLookups,
                  ((float)NumMethodPoolHits/NumMethodPoolLookups * 100.0));
-  }
-  if (NumMethodPoolTableLookups) {
+  if (NumMethodPoolTableLookups)
     std::fprintf(stderr, "  %u/%u method pool table lookups succeeded (%f%%)\n",
                  NumMethodPoolTableHits, NumMethodPoolTableLookups,
                  ((float)NumMethodPoolTableHits/NumMethodPoolTableLookups
                   * 100.0));
-  }
-
-  if (NumIdentifierLookupHits) {
+  if (NumIdentifierLookupHits)
     std::fprintf(stderr,
                  "  %u / %u identifier table lookups succeeded (%f%%)\n",
                  NumIdentifierLookupHits, NumIdentifierLookups,
                  (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
-  }
 
   if (GlobalIndex) {
     std::fprintf(stderr, "\n");
@@ -7514,7 +7601,8 @@
   if (Map.begin() == Map.end())
     return;
 
-  typedef ContinuousRangeMap<Key, ModuleFile *, InitialCapacity> MapType;
+  using MapType = ContinuousRangeMap<Key, ModuleFile *, InitialCapacity>;
+
   llvm::errs() << Name << ":\n";
   for (typename MapType::const_iterator I = Map.begin(), IEnd = Map.end();
        I != IEnd; ++I) {
@@ -7711,7 +7799,7 @@
     StringRef Next() override;
   };
 
-} // end namespace clang
+} // namespace clang
 
 ASTIdentifierIterator::ASTIdentifierIterator(const ASTReader &Reader,
                                              bool SkipModules)
@@ -7769,7 +7857,7 @@
   }
 };
 
-} // end anonymous namespace.
+} // namespace
 
 IdentifierIterator *ASTReader::getIdentifiers() {
   if (!loadGlobalIndex()) {
@@ -7791,19 +7879,17 @@
     ASTReader &Reader;
     Selector Sel;
     unsigned PriorGeneration;
-    unsigned InstanceBits;
-    unsigned FactoryBits;
-    bool InstanceHasMoreThanOneDecl;
-    bool FactoryHasMoreThanOneDecl;
+    unsigned InstanceBits = 0;
+    unsigned FactoryBits = 0;
+    bool InstanceHasMoreThanOneDecl = false;
+    bool FactoryHasMoreThanOneDecl = false;
     SmallVector<ObjCMethodDecl *, 4> InstanceMethods;
     SmallVector<ObjCMethodDecl *, 4> FactoryMethods;
 
   public:
     ReadMethodPoolVisitor(ASTReader &Reader, Selector Sel,
                           unsigned PriorGeneration)
-        : Reader(Reader), Sel(Sel), PriorGeneration(PriorGeneration),
-          InstanceBits(0), FactoryBits(0), InstanceHasMoreThanOneDecl(false),
-          FactoryHasMoreThanOneDecl(false) {}
+        : Reader(Reader), Sel(Sel), PriorGeneration(PriorGeneration) {}
 
     bool operator()(ModuleFile &M) {
       if (!M.SelectorLookupTable)
@@ -7851,14 +7937,16 @@
 
     unsigned getInstanceBits() const { return InstanceBits; }
     unsigned getFactoryBits() const { return FactoryBits; }
+
     bool instanceHasMoreThanOneDecl() const {
       return InstanceHasMoreThanOneDecl;
     }
+
     bool factoryHasMoreThanOneDecl() const { return FactoryHasMoreThanOneDecl; }
   };
 
-} // end namespace serialization
-} // end namespace clang
+} // namespace serialization
+} // namespace clang
 
 /// \brief Add the given set of methods to the method list.
 static void addMethodsToPool(Sema &S, ArrayRef<ObjCMethodDecl *> Methods,
@@ -8001,7 +8089,7 @@
 }
 
 void ASTReader::ReadReferencedSelectors(
-       SmallVectorImpl<std::pair<Selector, SourceLocation> > &Sels) {
+       SmallVectorImpl<std::pair<Selector, SourceLocation>> &Sels) {
   if (ReferencedSelectorsData.empty())
     return;
 
@@ -8019,7 +8107,7 @@
 }
 
 void ASTReader::ReadWeakUndeclaredIdentifiers(
-       SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo> > &WeakIDs) {
+       SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo>> &WeakIDs) {
   if (WeakUndeclaredIdentifiers.empty())
     return;
 
@@ -8051,7 +8139,7 @@
 }
 
 void ASTReader::ReadPendingInstantiations(
-       SmallVectorImpl<std::pair<ValueDecl *, SourceLocation> > &Pending) {
+       SmallVectorImpl<std::pair<ValueDecl *, SourceLocation>> &Pending) {
   for (unsigned Idx = 0, N = PendingInstantiations.size(); Idx < N;) {
     ValueDecl *D = cast<ValueDecl>(GetDecl(PendingInstantiations[Idx++]));
     SourceLocation Loc
@@ -8763,11 +8851,10 @@
       break;
     }
 
-    case NestedNameSpecifier::Global: {
+    case NestedNameSpecifier::Global:
       NNS = NestedNameSpecifier::GlobalSpecifier(Context);
       // No associated value, and there can't be a prefix.
       break;
-    }
 
     case NestedNameSpecifier::Super: {
       CXXRecordDecl *RD = ReadDeclAs<CXXRecordDecl>(F, Record, Idx);
@@ -8944,7 +9031,7 @@
   ASTContext &Context = getContext();
   std::vector<RawComment *> Comments;
   for (SmallVectorImpl<std::pair<BitstreamCursor,
-                                 serialization::ModuleFile *> >::iterator
+                                 serialization::ModuleFile *>>::iterator
        I = CommentsCursors.begin(),
        E = CommentsCursors.end();
        I != E; ++I) {
@@ -8981,8 +9068,7 @@
         bool IsTrailingComment = Record[Idx++];
         bool IsAlmostTrailingComment = Record[Idx++];
         Comments.push_back(new (Context) RawComment(
-            SR, Kind, IsTrailingComment, IsAlmostTrailingComment,
-            Context.getLangOpts().CommentOpts.ParseAllComments));
+            SR, Kind, IsTrailingComment, IsAlmostTrailingComment));
         break;
       }
       }
@@ -9034,7 +9120,7 @@
     return M->ModuleName;
 
   // Not from a module.
-  return "";
+  return {};
 }
 
 void ASTReader::finishPendingActions() {
@@ -9044,8 +9130,8 @@
          !PendingUpdateRecords.empty()) {
     // If any identifiers with corresponding top-level declarations have
     // been loaded, load those declarations now.
-    typedef llvm::DenseMap<IdentifierInfo *, SmallVector<Decl *, 2> >
-      TopLevelDeclsMap;
+    using TopLevelDeclsMap =
+        llvm::DenseMap<IdentifierInfo *, SmallVector<Decl *, 2>>;
     TopLevelDeclsMap TopLevelDecls;
 
     while (!PendingIdentifierInfos.empty()) {
@@ -9184,8 +9270,16 @@
       const FunctionDecl *Defn = nullptr;
       if (!getContext().getLangOpts().Modules || !FD->hasBody(Defn)) {
         FD->setLazyBody(PB->second);
-      } else
-        mergeDefinitionVisibility(const_cast<FunctionDecl*>(Defn), FD);
+      } else {
+        auto *NonConstDefn = const_cast<FunctionDecl*>(Defn);
+        mergeDefinitionVisibility(NonConstDefn, FD);
+
+        if (!FD->isLateTemplateParsed() &&
+            !NonConstDefn->isLateTemplateParsed() &&
+            FD->getODRHash() != NonConstDefn->getODRHash()) {
+          PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
+        }
+      }
       continue;
     }
 
@@ -9202,7 +9296,8 @@
 }
 
 void ASTReader::diagnoseOdrViolations() {
-  if (PendingOdrMergeFailures.empty() && PendingOdrMergeChecks.empty())
+  if (PendingOdrMergeFailures.empty() && PendingOdrMergeChecks.empty() &&
+      PendingFunctionOdrMergeFailures.empty())
     return;
 
   // Trigger the import of the full definition of each class that had any
@@ -9224,6 +9319,20 @@
     }
   }
 
+  // Trigger the import of functions.
+  auto FunctionOdrMergeFailures = std::move(PendingFunctionOdrMergeFailures);
+  PendingFunctionOdrMergeFailures.clear();
+  for (auto &Merge : FunctionOdrMergeFailures) {
+    Merge.first->buildLookup();
+    Merge.first->decls_begin();
+    Merge.first->getBody();
+    for (auto &FD : Merge.second) {
+      FD->buildLookup();
+      FD->decls_begin();
+      FD->getBody();
+    }
+  }
+
   // For each declaration from a merged context, check that the canonical
   // definition of that context also contains a declaration of the same
   // entity.
@@ -9306,13 +9415,35 @@
     }
   }
 
-  if (OdrMergeFailures.empty())
+  if (OdrMergeFailures.empty() && FunctionOdrMergeFailures.empty())
     return;
 
   // Ensure we don't accidentally recursively enter deserialization while
   // we're producing our diagnostics.
   Deserializing RecursionGuard(this);
 
+  // Common code for hashing helpers.
+  ODRHash Hash;
+  auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
+    Hash.clear();
+    Hash.AddQualType(Ty);
+    return Hash.CalculateHash();
+  };
+
+  auto ComputeODRHash = [&Hash](const Stmt *S) {
+    assert(S);
+    Hash.clear();
+    Hash.AddStmt(S);
+    return Hash.CalculateHash();
+  };
+
+  auto ComputeSubDeclODRHash = [&Hash](const Decl *D) {
+    assert(D);
+    Hash.clear();
+    Hash.AddSubDecl(D);
+    return Hash.CalculateHash();
+  };
+
   // Issue any pending ODR-failure diagnostics.
   for (auto &Merge : OdrMergeFailures) {
     // If we've already pointed out a specific problem with this class, don't
@@ -9360,13 +9491,6 @@
                  << SecondModule << Range << DiffType;
         };
 
-        ODRHash Hash;
-        auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
-          Hash.clear();
-          Hash.AddQualType(Ty);
-          return Hash.CalculateHash();
-        };
-
         unsigned FirstNumBases = FirstDD->NumBases;
         unsigned FirstNumVBases = FirstDD->NumVBases;
         unsigned SecondNumBases = SecondDD->NumBases;
@@ -9469,14 +9593,12 @@
       if (FirstTemplate && SecondTemplate) {
         DeclHashes FirstTemplateHashes;
         DeclHashes SecondTemplateHashes;
-        ODRHash Hash;
 
         auto PopulateTemplateParameterHashs =
-            [&Hash](DeclHashes &Hashes, const ClassTemplateDecl *TD) {
+            [&ComputeSubDeclODRHash](DeclHashes &Hashes,
+                                     const ClassTemplateDecl *TD) {
               for (auto *D : TD->getTemplateParameters()->asArray()) {
-                Hash.clear();
-                Hash.AddSubDecl(D);
-                Hashes.emplace_back(D, Hash.CalculateHash());
+                Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
               }
             };
 
@@ -9645,18 +9767,15 @@
 
       DeclHashes FirstHashes;
       DeclHashes SecondHashes;
-      ODRHash Hash;
 
-      auto PopulateHashes = [&Hash, FirstRecord](DeclHashes &Hashes,
-                                                 CXXRecordDecl *Record) {
+      auto PopulateHashes = [&ComputeSubDeclODRHash, FirstRecord](
+                                DeclHashes &Hashes, CXXRecordDecl *Record) {
         for (auto *D : Record->decls()) {
           // Due to decl merging, the first CXXRecordDecl is the parent of
           // Decls in both records.
           if (!ODRHash::isWhitelistedDecl(D, FirstRecord))
             continue;
-          Hash.clear();
-          Hash.AddSubDecl(D);
-          Hashes.emplace_back(D, Hash.CalculateHash());
+          Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
         }
       };
       PopulateHashes(FirstHashes, FirstRecord);
@@ -9850,19 +9969,6 @@
                << SecondModule << Range << DiffType;
       };
 
-      auto ComputeODRHash = [&Hash](const Stmt* S) {
-        assert(S);
-        Hash.clear();
-        Hash.AddStmt(S);
-        return Hash.CalculateHash();
-      };
-
-      auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
-        Hash.clear();
-        Hash.AddQualType(Ty);
-        return Hash.CalculateHash();
-      };
-
       switch (FirstDiffType) {
       case Other:
       case EndOfClass:
@@ -10412,7 +10518,7 @@
       }
       }
 
-      if (Diagnosed == true)
+      if (Diagnosed)
         continue;
 
       Diag(FirstDecl->getLocation(),
@@ -10437,6 +10543,161 @@
         << Merge.first;
     }
   }
+
+  // Issue ODR failures diagnostics for functions.
+  for (auto &Merge : FunctionOdrMergeFailures) {
+    enum ODRFunctionDifference {
+      ReturnType,
+      ParameterName,
+      ParameterType,
+      ParameterSingleDefaultArgument,
+      ParameterDifferentDefaultArgument,
+      FunctionBody,
+    };
+
+    FunctionDecl *FirstFunction = Merge.first;
+    std::string FirstModule = getOwningModuleNameForDiagnostic(FirstFunction);
+
+    bool Diagnosed = false;
+    for (auto &SecondFunction : Merge.second) {
+
+      if (FirstFunction == SecondFunction)
+        continue;
+
+      std::string SecondModule =
+          getOwningModuleNameForDiagnostic(SecondFunction);
+
+      auto ODRDiagError = [FirstFunction, &FirstModule,
+                           this](SourceLocation Loc, SourceRange Range,
+                                 ODRFunctionDifference DiffType) {
+        return Diag(Loc, diag::err_module_odr_violation_function)
+               << FirstFunction << FirstModule.empty() << FirstModule << Range
+               << DiffType;
+      };
+      auto ODRDiagNote = [&SecondModule, this](SourceLocation Loc,
+                                               SourceRange Range,
+                                               ODRFunctionDifference DiffType) {
+        return Diag(Loc, diag::note_module_odr_violation_function)
+               << SecondModule << Range << DiffType;
+      };
+
+      if (ComputeQualTypeODRHash(FirstFunction->getReturnType()) !=
+          ComputeQualTypeODRHash(SecondFunction->getReturnType())) {
+        ODRDiagError(FirstFunction->getReturnTypeSourceRange().getBegin(),
+                     FirstFunction->getReturnTypeSourceRange(), ReturnType)
+            << FirstFunction->getReturnType();
+        ODRDiagNote(SecondFunction->getReturnTypeSourceRange().getBegin(),
+                    SecondFunction->getReturnTypeSourceRange(), ReturnType)
+            << SecondFunction->getReturnType();
+        Diagnosed = true;
+        break;
+      }
+
+      assert(FirstFunction->param_size() == SecondFunction->param_size() &&
+             "Merged functions with different number of parameters");
+
+      auto ParamSize = FirstFunction->param_size();
+      bool ParameterMismatch = false;
+      for (unsigned I = 0; I < ParamSize; ++I) {
+        auto *FirstParam = FirstFunction->getParamDecl(I);
+        auto *SecondParam = SecondFunction->getParamDecl(I);
+
+        assert(getContext().hasSameType(FirstParam->getType(),
+                                      SecondParam->getType()) &&
+               "Merged function has different parameter types.");
+
+        if (FirstParam->getDeclName() != SecondParam->getDeclName()) {
+          ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
+                       ParameterName)
+              << I + 1 << FirstParam->getDeclName();
+          ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
+                      ParameterName)
+              << I + 1 << SecondParam->getDeclName();
+          ParameterMismatch = true;
+          break;
+        };
+
+        QualType FirstParamType = FirstParam->getType();
+        QualType SecondParamType = SecondParam->getType();
+        if (FirstParamType != SecondParamType &&
+            ComputeQualTypeODRHash(FirstParamType) !=
+                ComputeQualTypeODRHash(SecondParamType)) {
+          if (const DecayedType *ParamDecayedType =
+                  FirstParamType->getAs<DecayedType>()) {
+            ODRDiagError(FirstParam->getLocation(),
+                         FirstParam->getSourceRange(), ParameterType)
+                << (I + 1) << FirstParamType << true
+                << ParamDecayedType->getOriginalType();
+          } else {
+            ODRDiagError(FirstParam->getLocation(),
+                         FirstParam->getSourceRange(), ParameterType)
+                << (I + 1) << FirstParamType << false;
+          }
+
+          if (const DecayedType *ParamDecayedType =
+                  SecondParamType->getAs<DecayedType>()) {
+            ODRDiagNote(SecondParam->getLocation(),
+                        SecondParam->getSourceRange(), ParameterType)
+                << (I + 1) << SecondParamType << true
+                << ParamDecayedType->getOriginalType();
+          } else {
+            ODRDiagNote(SecondParam->getLocation(),
+                        SecondParam->getSourceRange(), ParameterType)
+                << (I + 1) << SecondParamType << false;
+          }
+          ParameterMismatch = true;
+          break;
+        }
+
+        const Expr *FirstInit = FirstParam->getInit();
+        const Expr *SecondInit = SecondParam->getInit();
+        if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
+          ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
+                       ParameterSingleDefaultArgument)
+              << (I + 1) << (FirstInit == nullptr)
+              << (FirstInit ? FirstInit->getSourceRange() : SourceRange());
+          ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
+                      ParameterSingleDefaultArgument)
+              << (I + 1) << (SecondInit == nullptr)
+              << (SecondInit ? SecondInit->getSourceRange() : SourceRange());
+          ParameterMismatch = true;
+          break;
+        }
+
+        if (FirstInit && SecondInit &&
+            ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
+          ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
+                       ParameterDifferentDefaultArgument)
+              << (I + 1) << FirstInit->getSourceRange();
+          ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
+                      ParameterDifferentDefaultArgument)
+              << (I + 1) << SecondInit->getSourceRange();
+          ParameterMismatch = true;
+          break;
+        }
+
+        assert(ComputeSubDeclODRHash(FirstParam) ==
+                   ComputeSubDeclODRHash(SecondParam) &&
+               "Undiagnosed parameter difference.");
+      }
+
+      if (ParameterMismatch) {
+        Diagnosed = true;
+        break;
+      }
+
+      // If no error has been generated before now, assume the problem is in
+      // the body and generate a message.
+      ODRDiagError(FirstFunction->getLocation(),
+                   FirstFunction->getSourceRange(), FunctionBody);
+      ODRDiagNote(SecondFunction->getLocation(),
+                  SecondFunction->getSourceRange(), FunctionBody);
+      Diagnosed = true;
+      break;
+    }
+    (void)Diagnosed;
+    assert(Diagnosed && "Unable to emit ODR diagnostic.");
+  }
 }
 
 void ASTReader::StartedDeserializing() {
diff --git a/lib/Serialization/ASTReaderDecl.cpp b/lib/Serialization/ASTReaderDecl.cpp
index c3c6c3e..10439d3 100644
--- a/lib/Serialization/ASTReaderDecl.cpp
+++ b/lib/Serialization/ASTReaderDecl.cpp
@@ -786,16 +786,21 @@
   FD->HasWrittenPrototype = Record.readInt();
   FD->IsDeleted = Record.readInt();
   FD->IsTrivial = Record.readInt();
+  FD->IsTrivialForCall = Record.readInt();
   FD->IsDefaulted = Record.readInt();
   FD->IsExplicitlyDefaulted = Record.readInt();
   FD->HasImplicitReturnZero = Record.readInt();
   FD->IsConstexpr = Record.readInt();
   FD->UsesSEHTry = Record.readInt();
   FD->HasSkippedBody = Record.readInt();
+  FD->IsMultiVersion = Record.readInt();
   FD->IsLateTemplateParsed = Record.readInt();
   FD->setCachedLinkage(Linkage(Record.readInt()));
   FD->EndRangeLoc = ReadSourceLocation();
 
+  FD->ODRHash = Record.readInt();
+  FD->HasODRHash = true;
+
   switch ((FunctionDecl::TemplatedKind)Record.readInt()) {
   case FunctionDecl::TK_NonTemplate:
     mergeRedeclarable(FD, Redecl);
@@ -1497,7 +1502,8 @@
 void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) {
   RedeclarableResult Redecl = VisitRedeclarable(D);
   VisitNamedDecl(D);
-  D->setTargetDecl(ReadDeclAs<NamedDecl>());
+  D->Underlying = ReadDeclAs<NamedDecl>();
+  D->IdentifierNamespace = Record.readInt();
   D->UsingOrNextShadow = ReadDeclAs<NamedDecl>();
   UsingShadowDecl *Pattern = ReadDeclAs<UsingShadowDecl>();
   if (Pattern)
@@ -1572,7 +1578,9 @@
   Data.DefaultedMoveAssignmentIsDeleted = Record.readInt();
   Data.DefaultedDestructorIsDeleted = Record.readInt();
   Data.HasTrivialSpecialMembers = Record.readInt();
+  Data.HasTrivialSpecialMembersForCall = Record.readInt();
   Data.DeclaredNonTrivialSpecialMembers = Record.readInt();
+  Data.DeclaredNonTrivialSpecialMembersForCall = Record.readInt();
   Data.HasIrrelevantDestructor = Record.readInt();
   Data.HasConstexprNonCopyMoveConstructor = Record.readInt();
   Data.HasDefaultedDefaultConstructor = Record.readInt();
@@ -1710,7 +1718,9 @@
   MATCH_FIELD(DefaultedMoveAssignmentIsDeleted)
   MATCH_FIELD(DefaultedDestructorIsDeleted)
   OR_FIELD(HasTrivialSpecialMembers)
+  OR_FIELD(HasTrivialSpecialMembersForCall)
   OR_FIELD(DeclaredNonTrivialSpecialMembers)
+  OR_FIELD(DeclaredNonTrivialSpecialMembersForCall)
   MATCH_FIELD(HasIrrelevantDestructor)
   OR_FIELD(HasConstexprNonCopyMoveConstructor)
   OR_FIELD(HasDefaultedDefaultConstructor)
@@ -1863,6 +1873,7 @@
 
 void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
   VisitFunctionDecl(D);
+  D->IsCopyDeductionCandidate = Record.readInt();
 }
 
 void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) {
@@ -1900,7 +1911,7 @@
   VisitCXXMethodDecl(D);
 
   if (auto *OperatorDelete = ReadDeclAs<FunctionDecl>()) {
-    auto *Canon = cast<CXXDestructorDecl>(D->getCanonicalDecl());
+    CXXDestructorDecl *Canon = D->getCanonicalDecl();
     auto *ThisArg = Record.readExpr();
     // FIXME: Check consistency if we have an old and new operator delete.
     if (!Canon->OperatorDelete) {
@@ -2197,6 +2208,7 @@
   D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
   D->PointOfInstantiation = ReadSourceLocation();
   D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
+  D->IsCompleteDefinition = Record.readInt();
 
   bool writtenAsCanonicalDecl = Record.readInt();
   if (writtenAsCanonicalDecl) {
@@ -2812,6 +2824,21 @@
                         CtorY->getInheritedConstructor().getConstructor()))
         return false;
     }
+
+    if (FuncX->isMultiVersion() != FuncY->isMultiVersion())
+      return false;
+
+    // Multiversioned functions with different feature strings are represented
+    // as separate declarations.
+    if (FuncX->isMultiVersion()) {
+      const auto *TAX = FuncX->getAttr<TargetAttr>();
+      const auto *TAY = FuncY->getAttr<TargetAttr>();
+      assert(TAX && TAY && "Multiversion Function without target attribute");
+
+      if (TAX->getFeaturesStr() != TAY->getFeaturesStr())
+        return false;
+    }
+
     ASTContext &C = FuncX->getASTContext();
     if (!C.hasSameType(FuncX->getType(), FuncY->getType())) {
       // We can get functions with different types on the redecl chain in C++17
@@ -2820,7 +2847,7 @@
       // FIXME: Do we need to check for C++14 deduced return types here too?
       auto *XFPT = FuncX->getType()->getAs<FunctionProtoType>();
       auto *YFPT = FuncY->getType()->getAs<FunctionProtoType>();
-      if (C.getLangOpts().CPlusPlus1z && XFPT && YFPT &&
+      if (C.getLangOpts().CPlusPlus17 && XFPT && YFPT &&
           (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) ||
            isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
           C.hasSameFunctionTypeIgnoringExceptionSpec(FuncX->getType(),
@@ -3982,10 +4009,10 @@
       break;
     }
 
-    case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
+    case UPD_CXX_ADDED_VAR_DEFINITION: {
       VarDecl *VD = cast<VarDecl>(D);
-      VD->getMemberSpecializationInfo()->setPointOfInstantiation(
-          ReadSourceLocation());
+      VD->NonParmVarDeclBits.IsInline = Record.readInt();
+      VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt();
       uint64_t Val = Record.readInt();
       if (Val && !VD->getInit()) {
         VD->setInit(Record.readExpr());
@@ -3998,6 +4025,25 @@
       break;
     }
 
+    case UPD_CXX_POINT_OF_INSTANTIATION: {
+      SourceLocation POI = Record.readSourceLocation();
+      if (VarTemplateSpecializationDecl *VTSD =
+              dyn_cast<VarTemplateSpecializationDecl>(D)) {
+        VTSD->setPointOfInstantiation(POI);
+      } else if (auto *VD = dyn_cast<VarDecl>(D)) {
+        VD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
+      } else {
+        auto *FD = cast<FunctionDecl>(D);
+        if (auto *FTSInfo = FD->TemplateOrSpecialization
+                    .dyn_cast<FunctionTemplateSpecializationInfo *>())
+          FTSInfo->setPointOfInstantiation(POI);
+        else
+          FD->TemplateOrSpecialization.get<MemberSpecializationInfo *>()
+              ->setPointOfInstantiation(POI);
+      }
+      break;
+    }
+
     case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
       auto Param = cast<ParmVarDecl>(D);
 
diff --git a/lib/Serialization/ASTReaderInternals.h b/lib/Serialization/ASTReaderInternals.h
index 6cb4d66..2b92ae6 100644
--- a/lib/Serialization/ASTReaderInternals.h
+++ b/lib/Serialization/ASTReaderInternals.h
@@ -1,4 +1,4 @@
-//===--- ASTReaderInternals.h - AST Reader Internals ------------*- C++ -*-===//
+//===- ASTReaderInternals.h - AST Reader Internals --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,23 +10,29 @@
 //  This file provides internal definitions used in the AST reader.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_LIB_SERIALIZATION_ASTREADERINTERNALS_H
 #define LLVM_CLANG_LIB_SERIALIZATION_ASTREADERINTERNALS_H
 
 #include "MultiOnDiskHashTable.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include <ctime>
 #include <utility>
 
 namespace clang {
 
 class ASTReader;
-class HeaderSearch;
-struct HeaderFileInfo;
 class FileEntry;
+struct HeaderFileInfo;
+class HeaderSearch;
+class IdentifierTable;
+class ObjCMethodDecl;
   
 namespace serialization {
 
@@ -45,12 +51,14 @@
   static const int MaxTables = 4;
 
   /// The lookup result is a list of global declaration IDs.
-  typedef llvm::SmallVector<DeclID, 4> data_type;
+  using data_type = SmallVector<DeclID, 4>;
+
   struct data_type_builder {
     data_type &Data;
     llvm::DenseSet<DeclID> Found;
 
     data_type_builder(data_type &D) : Data(D) {}
+
     void insert(DeclID ID) {
       // Just use a linear scan unless we have more than a few IDs.
       if (Found.empty() && !Data.empty()) {
@@ -70,15 +78,15 @@
         Data.push_back(ID);
     }
   };
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
-  typedef ModuleFile *file_type;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
+  using file_type = ModuleFile *;
 
-  typedef DeclarationName external_key_type;
-  typedef DeclarationNameKey internal_key_type;
+  using external_key_type = DeclarationName;
+  using internal_key_type = DeclarationNameKey;
 
   explicit ASTDeclContextNameLookupTrait(ASTReader &Reader, ModuleFile &F)
-    : Reader(Reader), F(F) { }
+      : Reader(Reader), F(F) {}
 
   static bool EqualKey(const internal_key_type &a, const internal_key_type &b) {
     return a == b;
@@ -87,6 +95,7 @@
   static hash_value_type ComputeHash(const internal_key_type &Key) {
     return Key.getHash();
   }
+
   static internal_key_type GetInternalKey(const external_key_type &Name) {
     return Name;
   }
@@ -119,14 +128,13 @@
 /// functionality for accessing the on-disk hash table of identifiers
 /// in an AST file. Different subclasses customize that functionality
 /// based on what information they are interested in. Those subclasses
-/// must provide the \c data_type typedef and the ReadData operation,
-/// only.
+/// must provide the \c data_type type and the ReadData operation, only.
 class ASTIdentifierLookupTraitBase {
 public:
-  typedef StringRef external_key_type;
-  typedef StringRef internal_key_type;
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using external_key_type = StringRef;
+  using internal_key_type = StringRef;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
 
   static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
     return a == b;
@@ -159,11 +167,11 @@
   IdentifierInfo *KnownII;
   
 public:
-  typedef IdentifierInfo * data_type;
+  using data_type = IdentifierInfo *;
 
   ASTIdentifierLookupTrait(ASTReader &Reader, ModuleFile &F,
                            IdentifierInfo *II = nullptr)
-    : Reader(Reader), F(F), KnownII(II) { }
+      : Reader(Reader), F(F), KnownII(II) {}
 
   data_type ReadData(const internal_key_type& k,
                      const unsigned char* d,
@@ -176,8 +184,8 @@
   
 /// \brief The on-disk hash table used to contain information about
 /// all of the identifiers in the program.
-typedef llvm::OnDiskIterableChainedHashTable<ASTIdentifierLookupTrait>
-  ASTIdentifierLookupTable;
+using ASTIdentifierLookupTable =
+    llvm::OnDiskIterableChainedHashTable<ASTIdentifierLookupTrait>;
 
 /// \brief Class that performs lookup for a selector's entries in the global
 /// method pool stored in an AST file.
@@ -196,13 +204,13 @@
     SmallVector<ObjCMethodDecl *, 2> Factory;
   };
   
-  typedef Selector external_key_type;
-  typedef external_key_type internal_key_type;
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using external_key_type = Selector;
+  using internal_key_type = external_key_type;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
   
-  ASTSelectorLookupTrait(ASTReader &Reader, ModuleFile &F) 
-    : Reader(Reader), F(F) { }
+  ASTSelectorLookupTrait(ASTReader &Reader, ModuleFile &F)
+      : Reader(Reader), F(F) {}
   
   static bool EqualKey(const internal_key_type& a,
                        const internal_key_type& b) {
@@ -222,8 +230,8 @@
 };
   
 /// \brief The on-disk hash table used for the global method pool.
-typedef llvm::OnDiskChainedHashTable<ASTSelectorLookupTrait>
-  ASTSelectorLookupTable;
+using ASTSelectorLookupTable =
+    llvm::OnDiskChainedHashTable<ASTSelectorLookupTrait>;
   
 /// \brief Trait class used to search the on-disk hash table containing all of
 /// the header search information.
@@ -241,7 +249,7 @@
   const char *FrameworkStrings;
 
 public:
-  typedef const FileEntry *external_key_type;
+  using external_key_type = const FileEntry *;
 
   struct internal_key_type {
     off_t Size;
@@ -249,15 +257,16 @@
     StringRef Filename;
     bool Imported;
   };
-  typedef const internal_key_type &internal_key_ref;
+
+  using internal_key_ref = const internal_key_type &;
   
-  typedef HeaderFileInfo data_type;
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using data_type = HeaderFileInfo;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
   
   HeaderFileInfoTrait(ASTReader &Reader, ModuleFile &M, HeaderSearch *HS,
                       const char *FrameworkStrings)
-  : Reader(Reader), M(M), HS(HS), FrameworkStrings(FrameworkStrings) { }
+      : Reader(Reader), M(M), HS(HS), FrameworkStrings(FrameworkStrings) {}
   
   static hash_value_type ComputeHash(internal_key_ref ikey);
   internal_key_type GetInternalKey(const FileEntry *FE);
@@ -272,12 +281,13 @@
 };
 
 /// \brief The on-disk hash table used for known header files.
-typedef llvm::OnDiskChainedHashTable<HeaderFileInfoTrait>
-  HeaderFileInfoLookupTable;
+using HeaderFileInfoLookupTable =
+    llvm::OnDiskChainedHashTable<HeaderFileInfoTrait>;
   
-} // end namespace clang::serialization::reader
-} // end namespace clang::serialization
-} // end namespace clang
+} // namespace reader
 
+} // namespace serialization
 
-#endif
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_SERIALIZATION_ASTREADERINTERNALS_H
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index 2b8a10f..4938b0e 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -119,7 +119,7 @@
   unsigned NumStmts = Record.readInt();
   while (NumStmts--)
     Stmts.push_back(Record.readSubStmt());
-  S->setStmts(Record.getContext(), Stmts);
+  S->setStmts(Stmts);
   S->LBraceLoc = ReadSourceLocation();
   S->RBraceLoc = ReadSourceLocation();
 }
@@ -550,12 +550,13 @@
 
 void ASTStmtReader::VisitUnaryOperator(UnaryOperator *E) {
   VisitExpr(E);
-  E->setSubExpr(Record.readSubExpr());
-  E->setOpcode((UnaryOperator::Opcode)Record.readInt());
-  E->setOperatorLoc(ReadSourceLocation());
-}
-
-void ASTStmtReader::VisitOffsetOfExpr(OffsetOfExpr *E) {
+  E->setSubExpr(Record.readSubExpr());
+  E->setOpcode((UnaryOperator::Opcode)Record.readInt());
+  E->setOperatorLoc(ReadSourceLocation());
+  E->setCanOverflow(Record.readInt());
+}
+
+void ASTStmtReader::VisitOffsetOfExpr(OffsetOfExpr *E) {
   VisitExpr(E);
   assert(E->getNumComponents() == Record.peekInt());
   Record.skipInts(1);
@@ -2920,6 +2921,7 @@
 void ASTStmtReader::VisitOMPDistributeParallelForDirective(
     OMPDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setHasCancel(Record.readInt());
 }
 
 void ASTStmtReader::VisitOMPDistributeParallelForSimdDirective(
@@ -2959,6 +2961,7 @@
 void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective(
     OMPTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setHasCancel(Record.readInt());
 }
 
 void ASTStmtReader::VisitOMPTargetTeamsDirective(OMPTargetTeamsDirective *D) {
@@ -2976,6 +2979,7 @@
 void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
     OMPTargetTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setHasCancel(Record.readInt());
 }
 
 void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
@@ -3078,7 +3082,8 @@
       break;
 
     case STMT_COMPOUND:
-      S = new (Context) CompoundStmt(Empty);
+      S = CompoundStmt::CreateEmpty(
+          Context, /*NumStmts=*/Record[ASTStmtReader::NumStmtFields]);
       break;
 
     case STMT_CASE:
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 7e88c59..53e09b3 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -1,4 +1,4 @@
-//===--- ASTWriter.cpp - AST File Writer ------------------------*- C++ -*-===//
+//===- ASTWriter.cpp - AST File Writer ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,11 +17,15 @@
 #include "MultiOnDiskHashTable.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclContextInternals.h"
 #include "clang/AST/DeclFriend.h"
+#include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/LambdaCapture.h"
@@ -30,16 +34,22 @@
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLocVisitor.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/Lambda.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/MemoryBufferCache.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/OpenCLOptions.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/SourceManagerInternals.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/Version.h"
@@ -62,24 +72,30 @@
 #include "clang/Serialization/SerializationDiagnostic.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/OnDiskHashTable.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -87,11 +103,14 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <ctime>
 #include <deque>
 #include <limits>
-#include <new>
+#include <memory>
+#include <queue>
 #include <tuple>
 #include <utility>
+#include <vector>
 
 using namespace clang;
 using namespace clang::serialization;
@@ -120,13 +139,14 @@
     ASTRecordWriter Record;
 
     /// \brief Type code that corresponds to the record generated.
-    TypeCode Code;
+    TypeCode Code = static_cast<TypeCode>(0);
+
     /// \brief Abbreviation to use for the record, if any.
-    unsigned AbbrevToUse;
+    unsigned AbbrevToUse = 0;
 
   public:
     ASTTypeWriter(ASTWriter &Writer, ASTWriter::RecordDataImpl &Record)
-      : Writer(Writer), Record(Writer, Record), Code((TypeCode)0), AbbrevToUse(0) { }
+      : Writer(Writer), Record(Writer, Record) {}
 
     uint64_t Emit() {
       return Record.Emit(Code, AbbrevToUse);
@@ -160,7 +180,7 @@
 #include "clang/AST/TypeNodes.def"
   };
 
-} // end namespace clang
+} // namespace clang
 
 void ASTTypeWriter::VisitBuiltinType(const BuiltinType *T) {
   llvm_unreachable("Built-in types are never serialized");
@@ -433,7 +453,7 @@
   Code = TYPE_DEPENDENT_SIZED_EXT_VECTOR;
 }
 
-void 
+void
 ASTTypeWriter::VisitDependentAddressSpaceType(
     const DependentAddressSpaceType *T) {
   Record.AddTypeRef(T->getPointeeType());
@@ -550,8 +570,7 @@
   ASTRecordWriter &Record;
 
 public:
-  TypeLocWriter(ASTRecordWriter &Record)
-    : Record(Record) { }
+  TypeLocWriter(ASTRecordWriter &Record) : Record(Record) {}
 
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
@@ -562,7 +581,7 @@
   void VisitFunctionTypeLoc(FunctionTypeLoc TyLoc);
 };
 
-} // end anonymous namespace
+} // namespace
 
 void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
   // nothing to do
@@ -642,7 +661,7 @@
   SourceRange range = TL.getAttrOperandParensRange();
   Record.AddSourceLocation(range.getBegin());
   Record.AddSourceLocation(range.getEnd());
-  Record.AddStmt(TL.getAttrExprOperand());       
+  Record.AddStmt(TL.getAttrExprOperand());
 }
 
 void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc(
@@ -667,18 +686,23 @@
   for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i)
     Record.AddDeclRef(TL.getParam(i));
 }
+
 void TypeLocWriter::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
   VisitFunctionTypeLoc(TL);
 }
+
 void TypeLocWriter::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
   VisitFunctionTypeLoc(TL);
 }
+
 void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
   Record.AddSourceLocation(TL.getNameLoc());
 }
+
 void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
   Record.AddSourceLocation(TL.getNameLoc());
 }
+
 void TypeLocWriter::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) {
   if (TL.getNumProtocols()) {
     Record.AddSourceLocation(TL.getProtocolLAngleLoc());
@@ -687,6 +711,7 @@
   for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
     Record.AddSourceLocation(TL.getProtocolLoc(i));
 }
+
 void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
   Record.AddSourceLocation(TL.getTypeofLoc());
   Record.AddSourceLocation(TL.getLParenLoc());
@@ -1079,6 +1104,7 @@
   RECORD(UNUSED_FILESCOPED_DECLS);
   RECORD(PPD_ENTITIES_OFFSETS);
   RECORD(VTABLE_USES);
+  RECORD(PPD_SKIPPED_RANGES);
   RECORD(REFERENCED_SELECTOR_POOL);
   RECORD(TU_UPDATE_LEXICAL);
   RECORD(SEMA_DECL_REFS);
@@ -1268,7 +1294,7 @@
   RECORD(DECL_PRAGMA_COMMENT);
   RECORD(DECL_PRAGMA_DETECT_MISMATCH);
   RECORD(DECL_OMP_DECLARE_REDUCTION);
-  
+
   // Statements and Exprs can occur in the Decls and Types block.
   AddStmtsExprs(Stream, Record);
 
@@ -1416,9 +1442,10 @@
                                   StringRef isysroot,
                                   const std::string &OutputFile) {
   using namespace llvm;
+
   Stream.EnterSubblock(CONTROL_BLOCK_ID, 5);
   RecordData Record;
-  
+
   // Metadata
   auto MetadataAbbrev = std::make_shared<BitCodeAbbrev>();
   MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA));
@@ -1706,21 +1733,22 @@
 
 namespace  {
 
-  /// \brief An input file.
-  struct InputFileEntry {
-    const FileEntry *File;
-    bool IsSystemFile;
-    bool IsTransient;
-    bool BufferOverridden;
-    bool IsTopLevelModuleMap;
-  };
+/// \brief An input file.
+struct InputFileEntry {
+  const FileEntry *File;
+  bool IsSystemFile;
+  bool IsTransient;
+  bool BufferOverridden;
+  bool IsTopLevelModuleMap;
+};
 
-} // end anonymous namespace
+} // namespace
 
 void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
                                 HeaderSearchOptions &HSOpts,
                                 bool Modules) {
   using namespace llvm;
+
   Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
 
   // Create input-file abbreviation.
@@ -1884,11 +1912,11 @@
   // Trait used for the on-disk hash table of header search information.
   class HeaderFileInfoTrait {
     ASTWriter &Writer;
-    
+
     // Keep track of the framework names we've used during serialization.
     SmallVector<char, 128> FrameworkStringData;
     llvm::StringMap<unsigned> FrameworkNameOffset;
-    
+
   public:
     HeaderFileInfoTrait(ASTWriter &Writer) : Writer(Writer) {}
 
@@ -1897,31 +1925,32 @@
       off_t Size;
       time_t ModTime;
     };
-    typedef const key_type &key_type_ref;
+    using key_type_ref = const key_type &;
 
     using UnresolvedModule =
         llvm::PointerIntPair<Module *, 2, ModuleMap::ModuleHeaderRole>;
-    
+
     struct data_type {
       const HeaderFileInfo &HFI;
       ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
       UnresolvedModule Unresolved;
     };
-    typedef const data_type &data_type_ref;
+    using data_type_ref = const data_type &;
 
-    typedef unsigned hash_value_type;
-    typedef unsigned offset_type;
-    
+    using hash_value_type = unsigned;
+    using offset_type = unsigned;
+
     hash_value_type ComputeHash(key_type_ref key) {
       // The hash is based only on size/time of the file, so that the reader can
       // match even when symlinking or excess path elements ("foo/../", "../")
       // change the form of the name. However, complete path is still the key.
       return llvm::hash_combine(key.Size, key.ModTime);
     }
-    
-    std::pair<unsigned,unsigned>
+
+    std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       using namespace llvm::support;
+
       endian::Writer<little> LE(Out);
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
       LE.write<uint16_t>(KeyLen);
@@ -1937,6 +1966,7 @@
 
     void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) {
       using namespace llvm::support;
+
       endian::Writer<little> LE(Out);
       LE.write<uint64_t>(key.Size);
       KeyLen -= 8;
@@ -1948,16 +1978,17 @@
     void EmitData(raw_ostream &Out, key_type_ref key,
                   data_type_ref Data, unsigned DataLen) {
       using namespace llvm::support;
+
       endian::Writer<little> LE(Out);
       uint64_t Start = Out.tell(); (void)Start;
-      
+
       unsigned char Flags = (Data.HFI.isImport << 5)
                           | (Data.HFI.isPragmaOnce << 4)
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
       LE.write<uint8_t>(Flags);
       LE.write<uint16_t>(Data.HFI.NumIncludes);
-      
+
       if (!Data.HFI.ControllingMacro)
         LE.write<uint32_t>(Data.HFI.ControllingMacroID);
       else
@@ -1970,10 +2001,10 @@
           = FrameworkNameOffset.find(Data.HFI.Framework);
         if (Pos == FrameworkNameOffset.end()) {
           Offset = FrameworkStringData.size() + 1;
-          FrameworkStringData.append(Data.HFI.Framework.begin(), 
+          FrameworkStringData.append(Data.HFI.Framework.begin(),
                                      Data.HFI.Framework.end());
           FrameworkStringData.push_back(0);
-          
+
           FrameworkNameOffset[Data.HFI.Framework] = Offset;
         } else
           Offset = Pos->second;
@@ -1997,14 +2028,14 @@
 
       assert(Out.tell() - Start == DataLen && "Wrong data length");
     }
-    
+
     const char *strings_begin() const { return FrameworkStringData.begin(); }
     const char *strings_end() const { return FrameworkStringData.end(); }
   };
 
-} // end anonymous namespace
+} // namespace
 
-/// \brief Write the header search block for the list of files that 
+/// \brief Write the header search block for the list of files that
 ///
 /// \param HS The header search structure to save.
 void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
@@ -2070,7 +2101,7 @@
 
   SmallVector<const FileEntry *, 16> FilesByUID;
   HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
-  
+
   if (FilesByUID.size() > HS.header_file_size())
     FilesByUID.resize(HS.header_file_size());
 
@@ -2115,6 +2146,7 @@
   uint32_t BucketOffset;
   {
     using namespace llvm::support;
+
     llvm::raw_svector_ostream Out(TableData);
     // Make sure that no bucket is at offset 0
     endian::Writer<little>(Out).write<uint32_t>(0);
@@ -2131,13 +2163,13 @@
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
-  
+
   // Write the header search table
   RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset,
                                      NumHeaderSearchEntries, TableData.size()};
   TableData.append(GeneratorTrait.strings_begin(),GeneratorTrait.strings_end());
   Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData);
-  
+
   // Free all of the strings we had to duplicate.
   for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I)
     free(const_cast<char *>(SavedStrings[I]));
@@ -2146,7 +2178,7 @@
 static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob,
                      unsigned SLocBufferBlobCompressedAbbrv,
                      unsigned SLocBufferBlobAbbrv) {
-  typedef ASTWriter::RecordData::value_type RecordDataType;
+  using RecordDataType = ASTWriter::RecordData::value_type;
 
   // Compress the buffer if possible. We expect that almost all PCM
   // consumers will not want its contents.
@@ -2237,7 +2269,7 @@
         Record.push_back(InputFileIDs[Content->OrigEntry]);
 
         Record.push_back(File.NumCreatedFIDs);
-        
+
         FileDeclIDsTy::iterator FDI = FileDeclIDs.find(FID);
         if (FDI != FileDeclIDs.end()) {
           Record.push_back(FDI->second->FirstDeclIndex);
@@ -2246,9 +2278,9 @@
           Record.push_back(0);
           Record.push_back(0);
         }
-        
+
         Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
-        
+
         if (Content->BufferOverridden || Content->IsTransient)
           EmitBlob = true;
       } else {
@@ -2332,12 +2364,13 @@
 
     // Emit the needed file names.
     llvm::DenseMap<int, int> FilenameMap;
+    FilenameMap[-1] = -1; // For unspecified filenames.
     for (const auto &L : LineTable) {
       if (L.first.ID < 0)
         continue;
       for (auto &LE : L.second) {
         if (FilenameMap.insert(std::make_pair(LE.FilenameID,
-                                              FilenameMap.size())).second)
+                                              FilenameMap.size() - 1)).second)
           AddPath(LineTable.getFilename(LE.FilenameID), Record);
       }
     }
@@ -2390,7 +2423,6 @@
 
 /// \brief Writes the block containing the serialized form of the
 /// preprocessor.
-///
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
   if (PPRec)
@@ -2407,6 +2439,17 @@
 
   if (PP.isRecordingPreamble() && PP.hasRecordedPreamble()) {
     assert(!IsModule);
+    auto SkipInfo = PP.getPreambleSkipInfo();
+    if (SkipInfo.hasValue()) {
+      Record.push_back(true);
+      AddSourceLocation(SkipInfo->HashTokenLoc, Record);
+      AddSourceLocation(SkipInfo->IfTokenLoc, Record);
+      Record.push_back(SkipInfo->FoundNonSkipPortion);
+      Record.push_back(SkipInfo->FoundElse);
+      AddSourceLocation(SkipInfo->ElseLoc, Record);
+    } else {
+      Record.push_back(false);
+    }
     for (const auto &Cond : PP.getPreambleConditionalStack()) {
       AddSourceLocation(Cond.IfLoc, Record);
       Record.push_back(Cond.WasSkipping);
@@ -2598,8 +2641,8 @@
   // If the preprocessor has a preprocessing record, emit it.
   unsigned NumPreprocessingRecords = 0;
   using namespace llvm;
-  
-  // Set up the abbreviation for 
+
+  // Set up the abbreviation for
   unsigned InclusionAbbrev = 0;
   {
     auto Abbrev = std::make_shared<BitCodeAbbrev>();
@@ -2611,15 +2654,15 @@
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   }
-  
-  unsigned FirstPreprocessorEntityID 
-    = (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0) 
+
+  unsigned FirstPreprocessorEntityID
+    = (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0)
     + NUM_PREDEF_PP_ENTITY_IDS;
   unsigned NextPreprocessorEntityID = FirstPreprocessorEntityID;
   RecordData Record;
   for (PreprocessingRecord::iterator E = PPRec.local_begin(),
                                   EEnd = PPRec.local_end();
-       E != EEnd; 
+       E != EEnd;
        (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
     Record.clear();
 
@@ -2660,7 +2703,7 @@
       Stream.EmitRecordWithBlob(InclusionAbbrev, Record, Buffer);
       continue;
     }
-    
+
     llvm_unreachable("Unhandled PreprocessedEntity in ASTWriter");
   }
   Stream.ExitBlock();
@@ -2684,6 +2727,26 @@
     Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record,
                               bytes(PreprocessedEntityOffsets));
   }
+
+  // Write the skipped region table for the preprocessing record.
+  ArrayRef<SourceRange> SkippedRanges = PPRec.getSkippedRanges();
+  if (SkippedRanges.size() > 0) {
+    std::vector<PPSkippedRange> SerializedSkippedRanges;
+    SerializedSkippedRanges.reserve(SkippedRanges.size());
+    for (auto const& Range : SkippedRanges)
+      SerializedSkippedRanges.emplace_back(Range);
+
+    using namespace llvm;
+    auto Abbrev = std::make_shared<BitCodeAbbrev>();
+    Abbrev->Add(BitCodeAbbrevOp(PPD_SKIPPED_RANGES));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+    unsigned PPESkippedRangeAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+    Record.clear();
+    Record.push_back(PPD_SKIPPED_RANGES);
+    Stream.EmitRecordWithBlob(PPESkippedRangeAbbrev, Record,
+                              bytes(SerializedSkippedRanges));
+  }
 }
 
 unsigned ASTWriter::getLocalOrImportedSubmoduleID(Module *Mod) {
@@ -2720,14 +2783,14 @@
   for (auto Sub = Mod->submodule_begin(), SubEnd = Mod->submodule_end();
        Sub != SubEnd; ++Sub)
     ChildModules += getNumberOfModules(*Sub);
-  
+
   return ChildModules + 1;
 }
 
 void ASTWriter::WriteSubmodules(Module *WritingModule) {
   // Enter the submodule description block.
   Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
-  
+
   // Write the abbreviations needed for the submodules block.
   using namespace llvm;
 
@@ -2820,7 +2883,7 @@
       getNumberOfModules(WritingModule),
       FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS};
   Stream.EmitRecord(SUBMODULE_METADATA, Record);
-  
+
   // Write all of the submodules.
   std::queue<Module *> Q;
   Q.push(WritingModule);
@@ -2896,7 +2959,7 @@
         Stream.EmitRecordWithBlob(TopHeaderAbbrev, Record, H->getName());
     }
 
-    // Emit the imports. 
+    // Emit the imports.
     if (!Mod->Imports.empty()) {
       RecordData Record;
       for (auto *I : Mod->Imports)
@@ -2904,7 +2967,7 @@
       Stream.EmitRecord(SUBMODULE_IMPORTS, Record);
     }
 
-    // Emit the exports. 
+    // Emit the exports.
     if (!Mod->Exports.empty()) {
       RecordData Record;
       for (const auto &E : Mod->Exports) {
@@ -2954,12 +3017,12 @@
       RecordData::value_type Record[] = {SUBMODULE_EXPORT_AS};
       Stream.EmitRecordWithBlob(ExportAsAbbrev, Record, Mod->ExportAsModule);
     }
-    
+
     // Queue up the submodules of this module.
     for (auto *M : Mod->submodules())
       Q.push(M);
   }
-  
+
   Stream.ExitBlock();
 
   assert((NextSubmoduleID - FirstSubmoduleID ==
@@ -2998,7 +3061,7 @@
 
     unsigned &DiagStateID = DiagStateIDMap[State];
     Record.push_back(DiagStateID);
-  
+
     if (DiagStateID == 0) {
       DiagStateID = ++CurrID;
 
@@ -3029,8 +3092,11 @@
         !FileIDAndFile.second.HasLocalTransitions)
       continue;
     ++NumLocations;
-    AddSourceLocation(Diag.SourceMgr->getLocForStartOfFile(FileIDAndFile.first),
-                      Record);
+
+    SourceLocation Loc = Diag.SourceMgr->getComposedLoc(FileIDAndFile.first, 0);
+    assert(!Loc.isInvalid() && "start loc for valid FileID is invalid");
+    AddSourceLocation(Loc, Record);
+
     Record.push_back(FileIDAndFile.second.StateTransitions.size());
     for (auto &StatePoint : FileIDAndFile.second.StateTransitions) {
       Record.push_back(StatePoint.Offset);
@@ -3196,28 +3262,29 @@
   ASTWriter &Writer;
 
 public:
-  typedef Selector key_type;
-  typedef key_type key_type_ref;
+  using key_type = Selector;
+  using key_type_ref = key_type;
 
   struct data_type {
     SelectorID ID;
     ObjCMethodList Instance, Factory;
   };
-  typedef const data_type& data_type_ref;
+  using data_type_ref = const data_type &;
 
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
 
-  explicit ASTMethodPoolTrait(ASTWriter &Writer) : Writer(Writer) { }
+  explicit ASTMethodPoolTrait(ASTWriter &Writer) : Writer(Writer) {}
 
   static hash_value_type ComputeHash(Selector Sel) {
     return serialization::ComputeHash(Sel);
   }
 
-  std::pair<unsigned,unsigned>
+  std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, Selector Sel,
                       data_type_ref Methods) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4);
     LE.write<uint16_t>(KeyLen);
@@ -3236,6 +3303,7 @@
 
   void EmitKey(raw_ostream& Out, Selector Sel, unsigned) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     uint64_t Start = Out.tell();
     assert((Start >> 32) == 0 && "Selector key offset too large");
@@ -3252,6 +3320,7 @@
   void EmitData(raw_ostream& Out, key_type_ref,
                 data_type_ref Methods, unsigned DataLen) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     uint64_t Start = Out.tell(); (void)Start;
     LE.write<uint32_t>(Methods.ID);
@@ -3296,7 +3365,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 /// \brief Write ObjC data: selectors and the method pool.
 ///
@@ -3360,6 +3429,7 @@
     uint32_t BucketOffset;
     {
       using namespace llvm::support;
+
       ASTMethodPoolTrait Trait(*this);
       llvm::raw_svector_ostream Out(MethodPool);
       // Make sure that no bucket is at offset 0
@@ -3404,6 +3474,7 @@
 /// \brief Write the selectors referenced in @selector expression into AST file.
 void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
   using namespace llvm;
+
   if (SemaRef.ReferencedSelectors.empty())
     return;
 
@@ -3475,7 +3546,7 @@
   bool IsModule;
   bool NeedDecls;
   ASTWriter::RecordData *InterestingIdentifierOffsets;
-  
+
   /// \brief Determines whether this is an "interesting" identifier that needs a
   /// full IdentifierInfo structure written into the hash table. Notably, this
   /// doesn't check whether the name has macros defined; use PublicMacroIterator
@@ -3492,14 +3563,14 @@
   }
 
 public:
-  typedef IdentifierInfo* key_type;
-  typedef key_type  key_type_ref;
+  using key_type = IdentifierInfo *;
+  using key_type_ref = key_type;
 
-  typedef IdentID data_type;
-  typedef data_type data_type_ref;
+  using data_type = IdentID;
+  using data_type_ref = data_type;
 
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
 
   ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
                           IdentifierResolver &IdResolver, bool IsModule,
@@ -3511,7 +3582,7 @@
   bool needDecls() const { return NeedDecls; }
 
   static hash_value_type ComputeHash(const IdentifierInfo* II) {
-    return llvm::HashString(II->getName());
+    return llvm::djbHash(II->getName());
   }
 
   bool isInterestingIdentifier(const IdentifierInfo *II) {
@@ -3523,7 +3594,7 @@
     return isInterestingIdentifier(II, 0);
   }
 
-  std::pair<unsigned,unsigned>
+  std::pair<unsigned, unsigned>
   EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
     unsigned KeyLen = II->getLength() + 1;
     unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
@@ -3541,7 +3612,9 @@
           DataLen += 4;
       }
     }
+
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
 
     assert((uint16_t)DataLen == DataLen && (uint16_t)KeyLen == KeyLen);
@@ -3570,6 +3643,7 @@
   void EmitData(raw_ostream& Out, IdentifierInfo* II,
                 IdentID ID, unsigned) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
 
     auto MacroOffset = Writer.getMacroDirectivesOffset(II);
@@ -3613,14 +3687,14 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 /// \brief Write the identifier table into the AST file.
 ///
 /// The identifier table consists of a blob containing string data
 /// (the actual identifiers themselves) and a separate "offsets" index
 /// that maps identifier IDs to locations within the blob.
-void ASTWriter::WriteIdentifierTable(Preprocessor &PP, 
+void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
                                      IdentifierResolver &IdResolver,
                                      bool IsModule) {
   using namespace llvm;
@@ -3671,6 +3745,7 @@
     uint32_t BucketOffset;
     {
       using namespace llvm::support;
+
       llvm::raw_svector_ostream Out(IdentifierTable);
       // Make sure that no bucket is at offset 0
       endian::Writer<little>(Out).write<uint32_t>(0);
@@ -3726,17 +3801,17 @@
   llvm::SmallVector<DeclID, 64> DeclIDs;
 
 public:
-  typedef DeclarationNameKey key_type;
-  typedef key_type key_type_ref;
+  using key_type = DeclarationNameKey;
+  using key_type_ref = key_type;
 
   /// A start and end index into DeclIDs, representing a sequence of decls.
-  typedef std::pair<unsigned, unsigned> data_type;
-  typedef const data_type& data_type_ref;
+  using data_type = std::pair<unsigned, unsigned>;
+  using data_type_ref = const data_type &;
 
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
+  using hash_value_type = unsigned;
+  using offset_type = unsigned;
 
-  explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) { }
+  explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) {}
 
   template<typename Coll>
   data_type getData(const Coll &Decls) {
@@ -3768,6 +3843,7 @@
            "have reference to loaded module file but no chain?");
 
     using namespace llvm::support;
+
     endian::Writer<little>(Out)
         .write<uint32_t>(Writer.getChain()->getModuleFileID(F));
   }
@@ -3776,6 +3852,7 @@
                                                   DeclarationNameKey Name,
                                                   data_type_ref Lookup) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     unsigned KeyLen = 1;
     switch (Name.getKind()) {
@@ -3809,6 +3886,7 @@
 
   void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     LE.write<uint8_t>(Name.getKind());
     switch (Name.getKind()) {
@@ -3840,6 +3918,7 @@
   void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup,
                 unsigned DataLen) {
     using namespace llvm::support;
+
     endian::Writer<little> LE(Out);
     uint64_t Start = Out.tell(); (void)Start;
     for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I)
@@ -3848,7 +3927,7 @@
   }
 };
 
-} // end anonymous namespace
+} // namespace
 
 bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result,
                                        DeclContext *DC) {
@@ -4219,16 +4298,16 @@
 void ASTWriter::WriteObjCCategories() {
   SmallVector<ObjCCategoriesInfo, 2> CategoriesMap;
   RecordData Categories;
-  
+
   for (unsigned I = 0, N = ObjCClassesWithCategories.size(); I != N; ++I) {
     unsigned Size = 0;
     unsigned StartIndex = Categories.size();
-    
+
     ObjCInterfaceDecl *Class = ObjCClassesWithCategories[I];
-    
+
     // Allocate space for the size.
     Categories.push_back(0);
-    
+
     // Add the categories.
     for (ObjCInterfaceDecl::known_categories_iterator
            Cat = Class->known_categories_begin(),
@@ -4237,10 +4316,10 @@
       assert(getDeclID(*Cat) != 0 && "Bogus category");
       AddDeclRef(*Cat, Categories);
     }
-    
+
     // Update the size.
     Categories[StartIndex] = Size;
-    
+
     // Record this interface -> category map.
     ObjCCategoriesInfo CatInfo = { getDeclID(Class), StartIndex };
     CategoriesMap.push_back(CatInfo);
@@ -4381,7 +4460,6 @@
     Record.AddSourceRange(A->getRange());
 
 #include "clang/Serialization/AttrPCHWrite.inc"
-
   }
 }
 
@@ -4501,7 +4579,7 @@
   WritingAST = true;
 
   ASTHasCompilerErrors = hasErrors;
-  
+
   // Emit the file header.
   Stream.Emit((unsigned)'C', 8);
   Stream.Emit((unsigned)'P', 8);
@@ -4549,7 +4627,7 @@
   // Make sure that the AST reader knows to finalize itself.
   if (Chain)
     Chain->finalizeForWriting();
-  
+
   ASTContext &Context = SemaRef.Context;
   Preprocessor &PP = SemaRef.PP;
 
@@ -4590,7 +4668,7 @@
   // headers.
   RecordData TentativeDefinitions;
   AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, TentativeDefinitions);
-  
+
   // Build a record containing all of the file scoped decls in this file.
   RecordData UnusedFileScopedDecls;
   if (!isModule)
@@ -4711,7 +4789,7 @@
       NewGlobalKindDeclPairs.push_back(GetDeclRef(D));
     }
   }
-  
+
   auto Abv = std::make_shared<BitCodeAbbrev>();
   Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
@@ -4733,7 +4811,7 @@
   // If we have any extern "C" names, write out a visible update for them.
   if (Context.ExternCContext)
     WriteDeclContextVisibleUpdate(Context.ExternCContext);
-  
+
   // If the translation unit has an anonymous namespace, and we don't already
   // have an update block for it, write it as an update block.
   // FIXME: Why do we not do this if there's already an update block?
@@ -4822,11 +4900,10 @@
     //   declaration-id:i32
     //   c++-base-specifiers-id:i32
     //   type-id:i32)
-    // 
+    //
     // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or
     // MK_ExplicitModule, then the module-name is the module name. Otherwise,
     // it is the module file name.
-    //
     auto Abbrev = std::make_shared<BitCodeAbbrev>();
     Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
@@ -4836,6 +4913,7 @@
       llvm::raw_svector_ostream Out(Buffer);
       for (ModuleFile &M : Chain->ModuleMgr) {
         using namespace llvm::support;
+
         endian::Writer<little> LE(Out);
         LE.write<uint8_t>(static_cast<uint8_t>(M.Kind));
         StringRef Name =
@@ -4916,7 +4994,7 @@
   WriteOpenCLExtensionDecls(SemaRef);
   WriteCUDAPragmas(SemaRef);
 
-  // If we're emitting a module, write out the submodule information.  
+  // If we're emitting a module, write out the submodule information.
   if (WritingModule)
     WriteSubmodules(WritingModule);
 
@@ -4966,7 +5044,7 @@
   // Write the record containing CUDA-specific declaration references.
   if (!CUDASpecialDeclRefs.empty())
     Stream.EmitRecord(CUDA_SPECIAL_DECL_REFS, CUDASpecialDeclRefs);
-  
+
   // Write the delegating constructors.
   if (!DelegatingCtorDecls.empty())
     Stream.EmitRecord(DELEGATING_CTORS, DelegatingCtorDecls);
@@ -5081,9 +5159,15 @@
       case UPD_CXX_ADDED_FUNCTION_DEFINITION:
         break;
 
-      case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
-        const VarDecl *VD = cast<VarDecl>(D);
+      case UPD_CXX_POINT_OF_INSTANTIATION:
+        // FIXME: Do we need to also save the template specialization kind here?
         Record.AddSourceLocation(Update.getLoc());
+        break;
+
+      case UPD_CXX_ADDED_VAR_DEFINITION: {
+        const VarDecl *VD = cast<VarDecl>(D);
+        Record.push_back(VD->isInline());
+        Record.push_back(VD->isInlineSpecified());
         if (VD->getInit()) {
           Record.push_back(!VD->isInitKnownICE() ? 1
                                                  : (VD->isInitICE() ? 3 : 2));
@@ -5263,7 +5347,7 @@
 MacroID ASTWriter::getMacroID(MacroInfo *MI) {
   if (!MI || MI->isBuiltinMacro())
     return 0;
-  
+
   assert(MacroIDs.find(MI) != MacroIDs.end() && "Macro not emitted!");
   return MacroIDs[MI];
 }
@@ -5407,12 +5491,12 @@
   if (!D) {
     return 0;
   }
-  
+
   // If D comes from an AST file, its declaration ID is already known and
   // fixed.
   if (D->isFromASTFile())
     return D->getGlobalID();
-  
+
   assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
   DeclID &ID = DeclIDs[D];
   if (ID == 0) {
@@ -5456,7 +5540,9 @@
     return;
   // FIXME: ParmVarDecls that are part of a function type of a parameter of
   // a function/objc method, should not have TU as lexical context.
-  if (isa<ParmVarDecl>(D))
+  // TemplateTemplateParmDecls that are part of an alias template, should not
+  // have TU as lexical context.
+  if (isa<ParmVarDecl>(D) || isa<TemplateTemplateParmDecl>(D))
     return;
 
   SourceManager &SM = Context->getSourceManager();
@@ -5732,7 +5818,7 @@
     AddTemplateName(subst->getReplacement());
     break;
   }
-      
+
   case TemplateName::SubstTemplateTemplateParmPack: {
     SubstTemplateTemplateParmPackStorage *SubstPack
       = Name.getAsSubstTemplateTemplateParmPack();
@@ -5832,7 +5918,7 @@
   Record->push_back(Base.getInheritConstructors());
   AddTypeSourceInfo(Base.getTypeSourceInfo());
   AddSourceRange(Base.getSourceRange());
-  AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc() 
+  AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc()
                                           : SourceLocation());
 }
 
@@ -5926,7 +6012,9 @@
   Record->push_back(Data.DefaultedMoveAssignmentIsDeleted);
   Record->push_back(Data.DefaultedDestructorIsDeleted);
   Record->push_back(Data.HasTrivialSpecialMembers);
+  Record->push_back(Data.HasTrivialSpecialMembersForCall);
   Record->push_back(Data.DeclaredNonTrivialSpecialMembers);
+  Record->push_back(Data.DeclaredNonTrivialSpecialMembersForCall);
   Record->push_back(Data.HasIrrelevantDestructor);
   Record->push_back(Data.HasConstexprNonCopyMoveConstructor);
   Record->push_back(Data.HasDefaultedDefaultConstructor);
@@ -5964,9 +6052,9 @@
 
   AddUnresolvedSet(Data.Conversions.get(*Writer->Context));
   AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context));
-  // Data.Definition is the owning decl, no need to write it. 
+  // Data.Definition is the owning decl, no need to write it.
   AddDeclRef(D->getFirstFriend());
-  
+
   // Add lambda-specific data.
   if (Data.IsLambda) {
     auto &Lambda = D->getLambdaData();
@@ -6200,6 +6288,15 @@
   DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
 }
 
+void ASTWriter::VariableDefinitionInstantiated(const VarDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
+  assert(!WritingAST && "Already writing the AST!");
+  if (!D->isFromASTFile())
+    return;
+
+  DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_VAR_DEFINITION));
+}
+
 void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) {
   if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
@@ -6209,7 +6306,7 @@
   DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
 }
 
-void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
+void ASTWriter::InstantiationRequested(const ValueDecl *D) {
   if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
@@ -6217,9 +6314,12 @@
 
   // Since the actual instantiation is delayed, this really means that we need
   // to update the instantiation location.
-  DeclUpdates[D].push_back(
-      DeclUpdate(UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER,
-       D->getMemberSpecializationInfo()->getPointOfInstantiation()));
+  SourceLocation POI;
+  if (auto *VD = dyn_cast<VarDecl>(D))
+    POI = VD->getPointOfInstantiation();
+  else
+    POI = cast<FunctionDecl>(D)->getPointOfInstantiation();
+  DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_POINT_OF_INSTANTIATION, POI));
 }
 
 void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
@@ -6247,7 +6347,7 @@
   assert(!WritingAST && "Already writing the AST!");
   if (!IFD->isFromASTFile())
     return; // Declaration not imported from PCH.
-  
+
   assert(IFD->getDefinition() && "Category on a class without a definition?");
   ObjCClassesWithCategories.insert(
     const_cast<ObjCInterfaceDecl *>(IFD->getDefinition()));
diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp
index 7a3f2e3..dd57029 100644
--- a/lib/Serialization/ASTWriterDecl.cpp
+++ b/lib/Serialization/ASTWriterDecl.cpp
@@ -528,16 +528,20 @@
   Record.push_back(D->HasWrittenPrototype);
   Record.push_back(D->IsDeleted);
   Record.push_back(D->IsTrivial);
+  Record.push_back(D->IsTrivialForCall);
   Record.push_back(D->IsDefaulted);
   Record.push_back(D->IsExplicitlyDefaulted);
   Record.push_back(D->HasImplicitReturnZero);
   Record.push_back(D->IsConstexpr);
   Record.push_back(D->UsesSEHTry);
   Record.push_back(D->HasSkippedBody);
+  Record.push_back(D->IsMultiVersion);
   Record.push_back(D->IsLateTemplateParsed);
   Record.push_back(D->getLinkageInternal());
   Record.AddSourceLocation(D->getLocEnd());
 
+  Record.push_back(D->getODRHash());
+
   Record.push_back(D->getTemplatedKind());
   switch (D->getTemplatedKind()) {
   case FunctionDecl::TK_NonTemplate:
@@ -612,6 +616,7 @@
 
 void ASTDeclWriter::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
   VisitFunctionDecl(D);
+  Record.push_back(D->IsCopyDeductionCandidate);
   Code = serialization::DECL_CXX_DEDUCTION_GUIDE;
 }
 
@@ -1189,6 +1194,7 @@
   VisitRedeclarable(D);
   VisitNamedDecl(D);
   Record.AddDeclRef(D->getTargetDecl());
+  Record.push_back(D->getIdentifierNamespace());
   Record.AddDeclRef(D->UsingOrNextShadow);
   Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D));
   Code = serialization::DECL_USING_SHADOW;
@@ -1266,10 +1272,8 @@
   VisitFunctionDecl(D);
   if (D->isCanonicalDecl()) {
     Record.push_back(D->size_overridden_methods());
-    for (CXXMethodDecl::method_iterator
-           I = D->begin_overridden_methods(), E = D->end_overridden_methods();
-           I != E; ++I)
-      Record.AddDeclRef(*I);
+    for (const CXXMethodDecl *MD : D->overridden_methods())
+      Record.AddDeclRef(MD);
   } else {
     // We only need to record overridden methods once for the canonical decl.
     Record.push_back(0);
@@ -1493,6 +1497,7 @@
   Record.AddTemplateArgumentList(&D->getTemplateArgs());
   Record.AddSourceLocation(D->getPointOfInstantiation());
   Record.push_back(D->getSpecializationKind());
+  Record.push_back(D->IsCompleteDefinition);
   Record.push_back(D->isCanonicalDecl());
 
   if (D->isCanonicalDecl()) {
@@ -2063,15 +2068,18 @@
   Abv->Add(BitCodeAbbrevOp(1));                         // HasWrittenProto
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Deleted
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Trivial
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // TrivialForCall
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Defaulted
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ExplicitlyDefaulted
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ImplicitReturnZero
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constexpr
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // UsesSEHTry
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // SkippedBody
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // MultiVersion
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // LateParsed
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Linkage
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // LocEnd
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // TemplateKind
   // This Array slurps the rest of the record. Fortunately we want to encode
   // (nearly) all the remaining (variable number of) fields in the same way.
@@ -2257,15 +2265,24 @@
   assert(FD->doesThisDeclarationHaveABody());
   bool ModulesCodegen = false;
   if (Writer->WritingModule && !FD->isDependentContext()) {
-    // Under -fmodules-codegen, codegen is performed for all defined functions.
-    // When building a C++ Modules TS module interface unit, a strong definition
-    // in the module interface is provided by the compilation of that module
-    // interface unit, not by its users. (Inline functions are still emitted
-    // in module users.)
-    ModulesCodegen =
-        Writer->Context->getLangOpts().ModulesCodegen ||
-        (Writer->WritingModule->Kind == Module::ModuleInterfaceUnit &&
-         Writer->Context->GetGVALinkageForFunction(FD) == GVA_StrongExternal);
+    Optional<GVALinkage> Linkage;
+    if (Writer->WritingModule->Kind == Module::ModuleInterfaceUnit) {
+      // When building a C++ Modules TS module interface unit, a strong
+      // definition in the module interface is provided by the compilation of
+      // that module interface unit, not by its users. (Inline functions are
+      // still emitted in module users.)
+      Linkage = Writer->Context->GetGVALinkageForFunction(FD);
+      ModulesCodegen = *Linkage == GVA_StrongExternal;
+    }
+    if (Writer->Context->getLangOpts().ModulesCodegen) {
+      // Under -fmodules-codegen, codegen is performed for all non-internal,
+      // non-always_inline functions.
+      if (!FD->hasAttr<AlwaysInlineAttr>()) {
+        if (!Linkage)
+          Linkage = Writer->Context->GetGVALinkageForFunction(FD);
+        ModulesCodegen = *Linkage != GVA_Internal;
+      }
+    }
   }
   Record->push_back(ModulesCodegen);
   if (ModulesCodegen)
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 15d2425..6fd0372 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -506,12 +506,13 @@
 
 void ASTStmtWriter::VisitUnaryOperator(UnaryOperator *E) {
   VisitExpr(E);
-  Record.AddStmt(E->getSubExpr());
-  Record.push_back(E->getOpcode()); // FIXME: stable encoding
-  Record.AddSourceLocation(E->getOperatorLoc());
-  Code = serialization::EXPR_UNARY_OPERATOR;
-}
-
+  Record.AddStmt(E->getSubExpr());
+  Record.push_back(E->getOpcode()); // FIXME: stable encoding
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.push_back(E->canOverflow());
+  Code = serialization::EXPR_UNARY_OPERATOR;
+}
+
 void ASTStmtWriter::VisitOffsetOfExpr(OffsetOfExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumComponents());
@@ -2568,6 +2569,7 @@
 void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
     OMPDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
 
@@ -2615,6 +2617,7 @@
 void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective(
     OMPTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
 
@@ -2634,6 +2637,7 @@
 void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
     OMPTargetTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
 
diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp
index 20c1142..8083631 100644
--- a/lib/Serialization/GlobalModuleIndex.cpp
+++ b/lib/Serialization/GlobalModuleIndex.cpp
@@ -21,9 +21,9 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LockFileManager.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -81,7 +81,7 @@
   }
 
   static hash_value_type ComputeHash(const internal_key_type& a) {
-    return llvm::HashString(a);
+    return llvm::djbHash(a);
   }
 
   static std::pair<unsigned, unsigned>
@@ -289,7 +289,7 @@
 
 bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
   Hits.clear();
-  
+
   // If there's no identifier index, there is nothing we can do.
   if (!IdentifierIndex)
     return false;
@@ -413,7 +413,7 @@
     /// \brief A mapping from all interesting identifiers to the set of module
     /// files in which those identifiers are considered interesting.
     InterestingIdentifierMap InterestingIdentifiers;
-    
+
     /// \brief Write the block-info block for the global module index file.
     void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
 
@@ -608,7 +608,7 @@
         // Skip the import location
         ++Idx;
 
-        // Load stored size/modification time. 
+        // Load stored size/modification time.
         off_t StoredSize = (off_t)Record[Idx++];
         time_t StoredModTime = (time_t)Record[Idx++];
 
@@ -697,7 +697,7 @@
   typedef unsigned offset_type;
 
   static hash_value_type ComputeHash(key_type_ref Key) {
-    return llvm::HashString(Key);
+    return llvm::djbHash(Key);
   }
 
   std::pair<unsigned,unsigned>
@@ -710,7 +710,7 @@
     LE.write<uint16_t>(DataLen);
     return std::make_pair(KeyLen, DataLen);
   }
-  
+
   void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
     Out.write(Key.data(), KeyLen);
   }
@@ -740,7 +740,7 @@
   }
 
   using namespace llvm;
-  
+
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
@@ -789,7 +789,7 @@
          I != IEnd; ++I) {
       Generator.insert(I->first(), I->second, Trait);
     }
-    
+
     // Create the on-disk hash table in a buffer.
     SmallString<4096> IdentifierTable;
     uint32_t BucketOffset;
@@ -902,7 +902,7 @@
 
   // Rename the newly-written index file to the proper name.
   if (llvm::sys::fs::rename(IndexTmpPath, IndexPath)) {
-    // Rename failed; just remove the 
+    // Rename failed; just remove the
     llvm::sys::fs::remove(IndexTmpPath);
     return EC_IOError;
   }
diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp
index 7563440..b512fa4 100644
--- a/lib/Serialization/ModuleManager.cpp
+++ b/lib/Serialization/ModuleManager.cpp
@@ -1,4 +1,4 @@
-//===--- ModuleManager.cpp - Module Manager ---------------------*- C++ -*-===//
+//===- ModuleManager.cpp - Module Manager ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,19 +11,33 @@
 //  modules for the ASTReader.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Serialization/ModuleManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/MemoryBufferCache.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include <system_error>
-
-#ifndef NDEBUG
+#include "clang/Serialization/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/GraphWriter.h"
-#endif
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <string>
+#include <system_error>
 
 using namespace clang;
 using namespace serialization;
@@ -208,7 +222,6 @@
   if (First == Last)
     return;
 
-
   // Explicitly clear VisitOrder since we might not notice it is stale.
   VisitOrder.clear();
 
@@ -267,7 +280,6 @@
 void
 ModuleManager::addInMemoryBuffer(StringRef FileName,
                                  std::unique_ptr<llvm::MemoryBuffer> Buffer) {
-
   const FileEntry *Entry =
       FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0);
   InMemoryBuffers[Entry] = std::move(Buffer);
@@ -317,8 +329,7 @@
                              const PCHContainerReader &PCHContainerRdr,
                              const HeaderSearch& HeaderSearchInfo)
     : FileMgr(FileMgr), PCMCache(&PCMCache), PCHContainerRdr(PCHContainerRdr),
-      HeaderSearchInfo (HeaderSearchInfo), GlobalIndex(),
-      FirstVisitState(nullptr) {}
+      HeaderSearchInfo(HeaderSearchInfo) {}
 
 ModuleManager::~ModuleManager() { delete FirstVisitState; }
 
@@ -452,11 +463,12 @@
 
 #ifndef NDEBUG
 namespace llvm {
+
   template<>
   struct GraphTraits<ModuleManager> {
-    typedef ModuleFile *NodeRef;
-    typedef llvm::SetVector<ModuleFile *>::const_iterator ChildIteratorType;
-    typedef pointer_iterator<ModuleManager::ModuleConstIterator> nodes_iterator;
+    using NodeRef = ModuleFile *;
+    using ChildIteratorType = llvm::SetVector<ModuleFile *>::const_iterator;
+    using nodes_iterator = pointer_iterator<ModuleManager::ModuleConstIterator>;
 
     static ChildIteratorType child_begin(NodeRef Node) {
       return Node->Imports.begin();
@@ -478,17 +490,16 @@
   template<>
   struct DOTGraphTraits<ModuleManager> : public DefaultDOTGraphTraits {
     explicit DOTGraphTraits(bool IsSimple = false)
-      : DefaultDOTGraphTraits(IsSimple) { }
+        : DefaultDOTGraphTraits(IsSimple) {}
     
-    static bool renderGraphFromBottomUp() {
-      return true;
-    }
+    static bool renderGraphFromBottomUp() { return true; }
 
     std::string getNodeLabel(ModuleFile *M, const ModuleManager&) {
       return M->ModuleName;
     }
   };
-}
+
+} // namespace llvm
 
 void ModuleManager::viewGraph() {
   llvm::ViewGraph(*this, "Modules");
diff --git a/lib/Serialization/MultiOnDiskHashTable.h b/lib/Serialization/MultiOnDiskHashTable.h
index fdbbb60..44d1616 100644
--- a/lib/Serialization/MultiOnDiskHashTable.h
+++ b/lib/Serialization/MultiOnDiskHashTable.h
@@ -1,4 +1,4 @@
-//===--- MultiOnDiskHashTable.h - Merged set of hash tables -----*- C++ -*-===//
+//===- MultiOnDiskHashTable.h - Merged set of hash tables -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,6 +15,7 @@
 //  files.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
 #define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
 
@@ -22,33 +23,43 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <vector>
 
 namespace clang {
 namespace serialization {
 
-class ModuleFile;
-
 /// \brief A collection of on-disk hash tables, merged when relevant for performance.
 template<typename Info> class MultiOnDiskHashTable {
 public:
   /// A handle to a file, used when overriding tables.
-  typedef typename Info::file_type file_type;
-  /// A pointer to an on-disk representation of the hash table.
-  typedef const unsigned char *storage_type;
+  using file_type = typename Info::file_type;
 
-  typedef typename Info::external_key_type external_key_type;
-  typedef typename Info::internal_key_type internal_key_type;
-  typedef typename Info::data_type data_type;
-  typedef typename Info::data_type_builder data_type_builder;
-  typedef unsigned hash_value_type;
+  /// A pointer to an on-disk representation of the hash table.
+  using storage_type = const unsigned char *;
+
+  using external_key_type = typename Info::external_key_type;
+  using internal_key_type = typename Info::internal_key_type;
+  using data_type = typename Info::data_type;
+  using data_type_builder = typename Info::data_type_builder;
+  using hash_value_type = unsigned;
 
 private:
+  /// The generator is permitted to read our merged table.
+  template<typename ReaderInfo, typename WriterInfo>
+  friend class MultiOnDiskHashTableGenerator;
+
   /// \brief A hash table stored on disk.
   struct OnDiskTable {
-    typedef llvm::OnDiskIterableChainedHashTable<Info> HashTable;
+    using HashTable = llvm::OnDiskIterableChainedHashTable<Info>;
 
     file_type File;
     HashTable Table;
@@ -65,8 +76,8 @@
     llvm::DenseMap<internal_key_type, data_type> Data;
   };
 
-  typedef llvm::PointerUnion<OnDiskTable*, MergedTable*> Table;
-  typedef llvm::TinyPtrVector<void*> TableVector;
+  using Table = llvm::PointerUnion<OnDiskTable *, MergedTable *>;
+  using TableVector = llvm::TinyPtrVector<void *>;
 
   /// \brief The current set of on-disk and merged tables.
   /// We manually store the opaque value of the Table because TinyPtrVector
@@ -80,14 +91,16 @@
   llvm::TinyPtrVector<file_type> PendingOverrides;
 
   struct AsOnDiskTable {
-    typedef OnDiskTable *result_type;
+    using result_type = OnDiskTable *;
+
     result_type operator()(void *P) const {
       return Table::getFromOpaqueValue(P).template get<OnDiskTable *>();
     }
   };
-  typedef llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>
-      table_iterator;
-  typedef llvm::iterator_range<table_iterator> table_range;
+
+  using table_iterator =
+      llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>;
+  using table_range = llvm::iterator_range<table_iterator>;
 
   /// \brief The current set of on-disk tables.
   table_range tables() {
@@ -160,17 +173,15 @@
     Tables.push_back(Table(Merged).getOpaqueValue());
   }
 
-  /// The generator is permitted to read our merged table.
-  template<typename ReaderInfo, typename WriterInfo>
-  friend class MultiOnDiskHashTableGenerator;
-
 public:
-  MultiOnDiskHashTable() {}
+  MultiOnDiskHashTable() = default;
+
   MultiOnDiskHashTable(MultiOnDiskHashTable &&O)
       : Tables(std::move(O.Tables)),
         PendingOverrides(std::move(O.PendingOverrides)) {
     O.Tables.clear();
   }
+
   MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) {
     if (&O == this)
       return *this;
@@ -180,11 +191,13 @@
     PendingOverrides = std::move(O.PendingOverrides);
     return *this;
   }
+
   ~MultiOnDiskHashTable() { clear(); }
 
   /// \brief Add the table \p Data loaded from file \p File.
   void add(file_type File, storage_type Data, Info InfoObj = Info()) {
     using namespace llvm::support;
+
     storage_type Ptr = Data;
 
     uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr);
@@ -278,8 +291,8 @@
 /// \brief Writer for the on-disk hash table.
 template<typename ReaderInfo, typename WriterInfo>
 class MultiOnDiskHashTableGenerator {
-  typedef MultiOnDiskHashTable<ReaderInfo> BaseTable;
-  typedef llvm::OnDiskChainedHashTableGenerator<WriterInfo> Generator;
+  using BaseTable = MultiOnDiskHashTable<ReaderInfo>;
+  using Generator = llvm::OnDiskChainedHashTableGenerator<WriterInfo>;
 
   Generator Gen;
 
@@ -294,6 +307,7 @@
   void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info,
             const BaseTable *Base) {
     using namespace llvm::support;
+
     llvm::raw_svector_ostream OutStream(Out);
 
     // Write our header information.
@@ -327,8 +341,7 @@
   }
 };
 
-} // end namespace clang::serialization
-} // end namespace clang
+} // namespace serialization
+} // namespace clang
 
-
-#endif
+#endif // LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
diff --git a/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp b/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp
index 90d5c0e..e4cdc50 100644
--- a/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp
@@ -15,8 +15,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 
 using namespace clang;
@@ -29,8 +31,17 @@
                      check::PostStmt<CastExpr>,
                      check::PreStmt<ArraySubscriptExpr>,
                      check::PostStmt<ArraySubscriptExpr>,
+                     check::PreStmt<CXXNewExpr>,
+                     check::PostStmt<CXXNewExpr>,
+                     check::PreStmt<OffsetOfExpr>,
+                     check::PostStmt<OffsetOfExpr>,
+                     check::PreCall,
+                     check::PostCall,
+                     check::NewAllocator,
                      check::Bind,
-                     check::RegionChanges> {
+                     check::RegionChanges,
+                     check::LiveSymbols> {
+
   bool isCallbackEnabled(AnalyzerOptions &Opts, StringRef CallbackName) const {
     return Opts.getBooleanOption("*", false, this) ||
         Opts.getBooleanOption(CallbackName, false, this);
@@ -72,11 +83,60 @@
       llvm::errs() << "PostStmt<ArraySubscriptExpr>\n";
   }
 
+  void checkPreStmt(const CXXNewExpr *NE, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PreStmtCXXNewExpr"))
+      llvm::errs() << "PreStmt<CXXNewExpr>\n";
+  }
+
+  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PostStmtCXXNewExpr"))
+      llvm::errs() << "PostStmt<CXXNewExpr>\n";
+  }
+
+  void checkPreStmt(const OffsetOfExpr *OOE, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PreStmtOffsetOfExpr"))
+      llvm::errs() << "PreStmt<OffsetOfExpr>\n";
+  }
+
+  void checkPostStmt(const OffsetOfExpr *OOE, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PostStmtOffsetOfExpr"))
+      llvm::errs() << "PostStmt<OffsetOfExpr>\n";
+  }
+
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PreCall")) {
+      llvm::errs() << "PreCall";
+      if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(Call.getDecl()))
+        llvm::errs() << " (" << ND->getQualifiedNameAsString() << ')';
+      llvm::errs() << '\n';
+    }
+  }
+
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const {
+    if (isCallbackEnabled(C, "PostCall")) {
+      llvm::errs() << "PostCall";
+      if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(Call.getDecl()))
+        llvm::errs() << " (" << ND->getQualifiedNameAsString() << ')';
+      llvm::errs() << '\n';
+    }
+  }
+
+  void checkNewAllocator(const CXXNewExpr *CNE, SVal Target,
+                         CheckerContext &C) const {
+    if (isCallbackEnabled(C, "NewAllocator"))
+      llvm::errs() << "NewAllocator\n";
+  }
+
   void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const {
     if (isCallbackEnabled(C, "Bind"))
       llvm::errs() << "Bind\n";
   }
 
+  void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SymReaper) const {
+    if (isCallbackEnabled(State, "LiveSymbols"))
+      llvm::errs() << "LiveSymbols\n";
+  }
+
   ProgramStateRef
   checkRegionChanges(ProgramStateRef State,
                      const InvalidatedSymbols *Invalidated,
diff --git a/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
index 64c30e7..aadc6ba 100644
--- a/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
@@ -122,6 +122,8 @@
       E = CE.blocks_exhausted_end(); I != E; ++I) {
     const BlockEdge &BE =  I->first;
     const CFGBlock *Exit = BE.getDst();
+    if (Exit->empty())
+      continue;
     const CFGElement &CE = Exit->front();
     if (Optional<CFGStmt> CS = CE.getAs<CFGStmt>()) {
       SmallString<128> bufI;
diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 848c266..b944f90 100644
--- a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -259,6 +259,7 @@
       llvm::make_unique<BugReport>(*BT, os.str(), errorNode));
 }
 
+#ifndef NDEBUG
 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
   dumpToStream(llvm::errs());
 }
@@ -266,7 +267,7 @@
 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
   os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
 }
-
+#endif
 
 // Lazily computes a value to be used by 'computeOffset'.  If 'val'
 // is unknown or undefined, we lazily substitute '0'.  Otherwise,
diff --git a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 3711877..b16e32e 100644
--- a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -436,8 +436,7 @@
     return;
 
   // Get the value of the "theType" argument.
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal TheTypeVal = state->getSVal(CE->getArg(1), LCtx);
+  SVal TheTypeVal = C.getSVal(CE->getArg(1));
 
   // FIXME: We really should allow ranges of valid theType values, and
   //   bifurcate the state appropriately.
@@ -457,7 +456,7 @@
   // Look at the value of the integer being passed by reference.  Essentially
   // we want to catch cases where the value passed in is not equal to the
   // size of the type being created.
-  SVal TheValueExpr = state->getSVal(CE->getArg(2), LCtx);
+  SVal TheValueExpr = C.getSVal(CE->getArg(2));
 
   // FIXME: Eventually we should handle arbitrary locations.  We can do this
   //  by having an enhanced memory model that does low-level typing.
@@ -571,7 +570,7 @@
 
   // Get the argument's value.
   const Expr *Arg = CE->getArg(0);
-  SVal ArgVal = state->getSVal(Arg, C.getLocationContext());
+  SVal ArgVal = C.getSVal(Arg);
   Optional<DefinedSVal> DefArgVal = ArgVal.getAs<DefinedSVal>();
   if (!DefArgVal)
     return;
@@ -977,8 +976,7 @@
   if (!State)
     return nullptr;
 
-  SymbolRef CollectionS =
-    State->getSVal(FCS->getCollection(), C.getLocationContext()).getAsSymbol();
+  SymbolRef CollectionS = C.getSVal(FCS->getCollection()).getAsSymbol();
   return assumeCollectionNonEmpty(C, State, CollectionS, Assumption);
 }
 
@@ -1206,7 +1204,7 @@
 ObjCNonNilReturnValueChecker::assumeExprIsNonNull(const Expr *NonNullExpr,
                                                   ProgramStateRef State,
                                                   CheckerContext &C) const {
-  SVal Val = State->getSVal(NonNullExpr, C.getLocationContext());
+  SVal Val = C.getSVal(NonNullExpr);
   if (Optional<DefinedOrUnknownSVal> DV = Val.getAs<DefinedOrUnknownSVal>())
     return State->assume(*DV, true);
   return State;
diff --git a/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
index d19630e..c31f279 100644
--- a/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
@@ -26,15 +26,22 @@
 
 namespace {
 
-class BlockInCriticalSectionChecker : public Checker<check::PostCall,
-                                                     check::PreCall> {
+class BlockInCriticalSectionChecker : public Checker<check::PostCall> {
+
+  mutable IdentifierInfo *IILockGuard, *IIUniqueLock;
 
   CallDescription LockFn, UnlockFn, SleepFn, GetcFn, FgetsFn, ReadFn, RecvFn,
                   PthreadLockFn, PthreadTryLockFn, PthreadUnlockFn,
                   MtxLock, MtxTimedLock, MtxTryLock, MtxUnlock;
 
+  StringRef ClassLockGuard, ClassUniqueLock;
+
+  mutable bool IdentifierInfoInitialized;
+
   std::unique_ptr<BugType> BlockInCritSectionBugType;
 
+  void initIdentifierInfo(ASTContext &Ctx) const;
+
   void reportBlockInCritSection(SymbolRef FileDescSym,
                                 const CallEvent &call,
                                 CheckerContext &C) const;
@@ -46,13 +53,10 @@
   bool isLockFunction(const CallEvent &Call) const;
   bool isUnlockFunction(const CallEvent &Call) const;
 
-  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
-
   /// Process unlock.
   /// Process lock.
   /// Process blocking functions (sleep, getc, fgets, read, recv)
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
-
 };
 
 } // end anonymous namespace
@@ -60,7 +64,8 @@
 REGISTER_TRAIT_WITH_PROGRAMSTATE(MutexCounter, unsigned)
 
 BlockInCriticalSectionChecker::BlockInCriticalSectionChecker()
-    : LockFn("lock"), UnlockFn("unlock"), SleepFn("sleep"), GetcFn("getc"),
+    : IILockGuard(nullptr), IIUniqueLock(nullptr),
+      LockFn("lock"), UnlockFn("unlock"), SleepFn("sleep"), GetcFn("getc"),
       FgetsFn("fgets"), ReadFn("read"), RecvFn("recv"),
       PthreadLockFn("pthread_mutex_lock"),
       PthreadTryLockFn("pthread_mutex_trylock"),
@@ -68,13 +73,29 @@
       MtxLock("mtx_lock"),
       MtxTimedLock("mtx_timedlock"),
       MtxTryLock("mtx_trylock"),
-      MtxUnlock("mtx_unlock") {
+      MtxUnlock("mtx_unlock"),
+      ClassLockGuard("lock_guard"),
+      ClassUniqueLock("unique_lock"),
+      IdentifierInfoInitialized(false) {
   // Initialize the bug type.
   BlockInCritSectionBugType.reset(
       new BugType(this, "Call to blocking function in critical section",
                         "Blocking Error"));
 }
 
+void BlockInCriticalSectionChecker::initIdentifierInfo(ASTContext &Ctx) const {
+  if (!IdentifierInfoInitialized) {
+    /* In case of checking C code, or when the corresponding headers are not
+     * included, we might end up query the identifier table every time when this
+     * function is called instead of early returning it. To avoid this, a bool
+     * variable (IdentifierInfoInitialized) is used and the function will be run
+     * only once. */
+    IILockGuard  = &Ctx.Idents.get(ClassLockGuard);
+    IIUniqueLock = &Ctx.Idents.get(ClassUniqueLock);
+    IdentifierInfoInitialized = true;
+  }
+}
+
 bool BlockInCriticalSectionChecker::isBlockingFunction(const CallEvent &Call) const {
   if (Call.isCalled(SleepFn)
       || Call.isCalled(GetcFn)
@@ -87,6 +108,12 @@
 }
 
 bool BlockInCriticalSectionChecker::isLockFunction(const CallEvent &Call) const {
+  if (const auto *Ctor = dyn_cast<CXXConstructorCall>(&Call)) {
+    auto IdentifierInfo = Ctor->getDecl()->getParent()->getIdentifier();
+    if (IdentifierInfo == IILockGuard || IdentifierInfo == IIUniqueLock)
+      return true;
+  }
+
   if (Call.isCalled(LockFn)
       || Call.isCalled(PthreadLockFn)
       || Call.isCalled(PthreadTryLockFn)
@@ -99,6 +126,13 @@
 }
 
 bool BlockInCriticalSectionChecker::isUnlockFunction(const CallEvent &Call) const {
+  if (const auto *Dtor = dyn_cast<CXXDestructorCall>(&Call)) {
+    const auto *DRecordDecl = dyn_cast<CXXRecordDecl>(Dtor->getDecl()->getParent());
+    auto IdentifierInfo = DRecordDecl->getIdentifier();
+    if (IdentifierInfo == IILockGuard || IdentifierInfo == IIUniqueLock)
+      return true;
+  }
+
   if (Call.isCalled(UnlockFn)
        || Call.isCalled(PthreadUnlockFn)
        || Call.isCalled(MtxUnlock)) {
@@ -107,12 +141,10 @@
   return false;
 }
 
-void BlockInCriticalSectionChecker::checkPreCall(const CallEvent &Call,
-                                                 CheckerContext &C) const {
-}
-
 void BlockInCriticalSectionChecker::checkPostCall(const CallEvent &Call,
                                                   CheckerContext &C) const {
+  initIdentifierInfo(C.getASTContext());
+
   if (!isBlockingFunction(Call)
       && !isLockFunction(Call)
       && !isUnlockFunction(Call))
diff --git a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 097d419..0e781d0 100644
--- a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -43,7 +43,7 @@
 
   case Builtin::BI__builtin_assume: {
     assert (CE->arg_begin() != CE->arg_end());
-    SVal ArgSVal = state->getSVal(CE->getArg(0), LCtx);
+    SVal ArgSVal = C.getSVal(CE->getArg(0));
     if (ArgSVal.isUndef())
       return true; // Return true to model purity.
 
@@ -68,7 +68,7 @@
     // __builtin_addressof is going from a reference to a pointer, but those
     // are represented the same way in the analyzer.
     assert (CE->arg_begin() != CE->arg_end());
-    SVal X = state->getSVal(*(CE->arg_begin()), LCtx);
+    SVal X = C.getSVal(*(CE->arg_begin()));
     C.addTransition(state->BindExpr(CE, LCtx, X));
     return true;
   }
@@ -83,8 +83,7 @@
     // Set the extent of the region in bytes. This enables us to use the
     // SVal of the argument directly. If we save the extent in bits, we
     // cannot represent values like symbol*8.
-    DefinedOrUnknownSVal Size =
-        state->getSVal(*(CE->arg_begin()), LCtx).castAs<DefinedOrUnknownSVal>();
+    auto Size = C.getSVal(*(CE->arg_begin())).castAs<DefinedOrUnknownSVal>();
 
     SValBuilder& svalBuilder = C.getSValBuilder();
     DefinedOrUnknownSVal Extent = R->getExtent(svalBuilder);
@@ -97,7 +96,8 @@
     return true;
   }
 
-  case Builtin::BI__builtin_object_size: {
+  case Builtin::BI__builtin_object_size:
+  case Builtin::BI__builtin_constant_p: {
     // This must be resolvable at compile time, so we defer to the constant
     // evaluator for a value.
     SVal V = UnknownVal();
diff --git a/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 7ab9c61..9b42a55 100644
--- a/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -37,6 +37,7 @@
   DynamicTypeChecker.cpp
   ExprInspectionChecker.cpp
   FixedAddressChecker.cpp
+  GCDAsyncSemaphoreChecker.cpp
   GenericTaintChecker.cpp
   GTestChecker.cpp
   IdenticalExprChecker.cpp
@@ -49,6 +50,7 @@
   MallocChecker.cpp
   MallocOverflowSecurityChecker.cpp
   MallocSizeofChecker.cpp
+  MmapWriteExecChecker.cpp
   MisusedMovedObjectChecker.cpp
   MPI-Checker/MPIBugReporter.cpp
   MPI-Checker/MPIChecker.cpp
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 58218df..4eb189e 100644
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -289,8 +289,8 @@
   if (!ER)
     return state;
 
-  assert(ER->getValueType() == C.getASTContext().CharTy &&
-    "CheckLocation should only be called with char* ElementRegions");
+  if (ER->getValueType() != C.getASTContext().CharTy)
+    return state;
 
   // Get the size of the array.
   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
@@ -309,9 +309,19 @@
     if (!N)
       return nullptr;
 
+    CheckName Name;
+    // These checks are either enabled by the CString out-of-bounds checker
+    // explicitly or the "basic" CStringNullArg checker support that Malloc
+    // checker enables.
+    assert(Filter.CheckCStringOutOfBounds || Filter.CheckCStringNullArg);
+    if (Filter.CheckCStringOutOfBounds)
+      Name = Filter.CheckNameCStringOutOfBounds;
+    else
+      Name = Filter.CheckNameCStringNullArg;
+
     if (!BT_Bounds) {
       BT_Bounds.reset(new BuiltinBug(
-          Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
+          Name, "Out-of-bound array access",
           "Byte string function accesses out-of-bound array element"));
     }
     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
@@ -366,7 +376,7 @@
   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
 
   // Check that the first buffer is non-null.
-  SVal BufVal = state->getSVal(FirstBuf, LCtx);
+  SVal BufVal = C.getSVal(FirstBuf);
   state = checkNonNull(C, state, FirstBuf, BufVal);
   if (!state)
     return nullptr;
@@ -378,7 +388,7 @@
   // Get the access length and make sure it is known.
   // FIXME: This assumes the caller has already checked that the access length
   // is positive. And that it's unsigned.
-  SVal LengthVal = state->getSVal(Size, LCtx);
+  SVal LengthVal = C.getSVal(Size);
   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
   if (!Length)
     return state;
@@ -874,6 +884,8 @@
   if (!ER)
     return true; // cf top comment.
 
+  // FIXME: Does this crash when a non-standard definition
+  // of a library function is encountered?
   assert(ER->getValueType() == C.getASTContext().CharTy &&
          "IsFirstBufInBound should only be called with char* ElementRegions");
 
@@ -1021,21 +1033,6 @@
   if (stateNonZeroSize) {
     state = stateNonZeroSize;
 
-    // Ensure the destination is not null. If it is NULL there will be a
-    // NULL pointer dereference.
-    state = checkNonNull(C, state, Dest, destVal);
-    if (!state)
-      return;
-
-    // Get the value of the Src.
-    SVal srcVal = state->getSVal(Source, LCtx);
-
-    // Ensure the source is not null. If it is NULL there will be a
-    // NULL pointer dereference.
-    state = checkNonNull(C, state, Source, srcVal);
-    if (!state)
-      return;
-
     // Ensure the accesses are valid and that the buffers do not overlap.
     const char * const writeWarning =
       "Memory copy function overflows destination buffer";
@@ -2021,12 +2018,6 @@
     return;
   }
 
-  // Ensure the memory area is not null.
-  // If it is NULL there will be a NULL pointer dereference.
-  State = checkNonNull(C, StateNonZeroSize, Mem, MemVal);
-  if (!State)
-    return;
-
   State = CheckBufferAccess(C, State, Size, Mem);
   if (!State)
     return;
@@ -2147,7 +2138,7 @@
     if (!MR)
       continue;
 
-    SVal StrVal = state->getSVal(Init, C.getLocationContext());
+    SVal StrVal = C.getSVal(Init);
     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
     DefinedOrUnknownSVal strLength =
         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
diff --git a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
index 07285d2..20a4684 100644
--- a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
@@ -179,7 +179,7 @@
 
   if (const MemRegion *SValMemRegion = V.getAsRegion()) {
     const ProgramStateRef State = C.getState();
-    const SVal PSV = State->getSVal(SValMemRegion);
+    const SVal PSV = State->getSVal(SValMemRegion, C.getASTContext().CharTy);
     if (PSV.isUndef()) {
       if (ExplodedNode *N = C.generateErrorNode()) {
         LazyInit_BT(BD, BT);
diff --git a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
index 3e17815..059553b 100644
--- a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
@@ -101,7 +101,7 @@
     return;
 
   ProgramStateRef state = C.getState();
-  const MemRegion *R = state->getSVal(E, C.getLocationContext()).getAsRegion();
+  const MemRegion *R = C.getSVal(E).getAsRegion();
   if (!R)
     return;
 
diff --git a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
index 65e8131..00e9033 100644
--- a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
@@ -78,7 +78,7 @@
     // Don't warn for references
     const ValueDecl *VD = nullptr;
     if (const auto *SE = dyn_cast<DeclRefExpr>(U->getSubExpr()))
-      VD = dyn_cast<ValueDecl>(SE->getDecl());
+      VD = SE->getDecl();
     else if (const auto *SE = dyn_cast<MemberExpr>(U->getSubExpr()))
       VD = SE->getMemberDecl();
     if (!VD || VD->getType()->isReferenceType())
diff --git a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
index 2818c9d..bfd4019 100644
--- a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
@@ -535,7 +535,7 @@
       continue;
 
     // Prevents diagnosing multiple times for the same instance variable
-    // at, for example, both a return and at the end of of the function.
+    // at, for example, both a return and at the end of the function.
     NewUnreleased = F.remove(NewUnreleased, IvarSymbol);
 
     if (State->getStateManager()
@@ -645,7 +645,7 @@
 bool ObjCDeallocChecker::diagnoseExtraRelease(SymbolRef ReleasedValue,
                                               const ObjCMethodCall &M,
                                               CheckerContext &C) const {
-  // Try to get the region from which the the released value was loaded.
+  // Try to get the region from which the released value was loaded.
   // Note that, unlike diagnosing for missing releases, here we don't track
   // values that must not be released in the state. This is because even if
   // these values escape, it is still an error under the rules of MRR to
diff --git a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
index 6dbacad..62831be 100644
--- a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
@@ -510,6 +510,17 @@
   if (!checkCall_strCommon(CE, FD))
     return;
 
+  const auto *Target = CE->getArg(0)->IgnoreImpCasts(),
+             *Source = CE->getArg(1)->IgnoreImpCasts();
+  if (const auto *DeclRef = dyn_cast<DeclRefExpr>(Target))
+    if (const auto *Array = dyn_cast<ConstantArrayType>(DeclRef->getType())) {
+      uint64_t ArraySize = BR.getContext().getTypeSize(Array) / 8;
+      if (const auto *String = dyn_cast<StringLiteral>(Source)) {
+        if (ArraySize >= String->getLength() + 1)
+          return;
+      }
+    }
+
   // Issue a warning.
   PathDiagnosticLocation CELoc =
     PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC);
diff --git a/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
index 95b6c4d..ab1e2ec 100644
--- a/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
@@ -42,6 +42,7 @@
                                        check::PreCall,
                                        check::PostCall,
                                        check::BranchCondition,
+                                       check::NewAllocator,
                                        check::Location,
                                        check::Bind,
                                        check::DeadSymbols,
@@ -126,6 +127,22 @@
   /// \brief Pre-visit of the condition statement of a branch (such as IfStmt).
   void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const {}
 
+  /// \brief Post-visit the C++ operator new's allocation call.
+  ///
+  /// Execution of C++ operator new consists of the following phases: (1) call
+  /// default or overridden operator new() to allocate memory (2) cast the
+  /// return value of operator new() from void pointer type to class pointer
+  /// type, (3) assuming that the value is non-null, call the object's
+  /// constructor over this pointer, (4) declare that the value of the
+  /// new-expression is this pointer. This callback is called between steps
+  /// (2) and (3). Post-call for the allocator is called after step (1).
+  /// Pre-statement for the new-expression is called on step (4) when the value
+  /// of the expression is evaluated.
+  /// \param NE     The C++ new-expression that triggered the allocation.
+  /// \param Target The allocated region, casted to the class type.
+  void checkNewAllocator(const CXXNewExpr *NE, SVal Target,
+                         CheckerContext &) const {}
+
   /// \brief Called on a load from and a store to a location.
   ///
   /// The method will be called each time a location (pointer) value is
diff --git a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
index 9e9939a..d9739373 100644
--- a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
@@ -105,7 +105,7 @@
 
   // After chdir("/"), enter the jail, set the enum value JAIL_ENTERED.
   const Expr *ArgExpr = CE->getArg(0);
-  SVal ArgVal = state->getSVal(ArgExpr, C.getLocationContext());
+  SVal ArgVal = C.getSVal(ArgExpr);
 
   if (const MemRegion *R = ArgVal.getAsRegion()) {
     R = R->StripCasts();
diff --git a/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
index 2eef168..810a33e 100644
--- a/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
+++ b/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
@@ -16,7 +16,6 @@
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/CallGraph.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
-#include "clang/StaticAnalyzer/Core/IssueHash.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -213,35 +212,3 @@
   mgr.registerChecker<ExplodedGraphViewer>();
 }
 
-//===----------------------------------------------------------------------===//
-// DumpBugHash 
-//===----------------------------------------------------------------------===//
-
-namespace {
-class BugHashDumper : public Checker<check::PostStmt<Stmt>> {
-public:
-  mutable std::unique_ptr<BugType> BT;
-
-  void checkPostStmt(const Stmt *S, CheckerContext &C) const {
-    if (!BT)
-      BT.reset(new BugType(this, "Dump hash components", "debug"));
-
-    ExplodedNode *N = C.generateNonFatalErrorNode();
-    if (!N)
-      return;
-
-    const LangOptions &Opts = C.getLangOpts();
-    const SourceManager &SM = C.getSourceManager();
-    FullSourceLoc FL(S->getLocStart(), SM);
-    std::string HashContent =
-        GetIssueString(SM, FL, getCheckName().getName(), BT->getCategory(),
-                       C.getLocationContext()->getDecl(), Opts);
-
-    C.emitReport(llvm::make_unique<BugReport>(*BT, HashContent, N));
-  }
-};
-}
-
-void ento::registerBugHashDumper(CheckerManager &mgr) {
-  mgr.registerChecker<BugHashDumper>();
-}
diff --git a/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp b/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp
index e04e2ab..53632b4 100644
--- a/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp
@@ -110,8 +110,6 @@
   if (Satisfied)
     return nullptr;
 
-  ProgramStateRef State = N->getState();
-  const LocationContext *LC = N->getLocationContext();
   const Stmt *S = PathDiagnosticLocation::getStmt(N);
   if (!S)
     return nullptr;
@@ -128,7 +126,7 @@
   }
 
   // Region associated with the current cast expression.
-  const MemRegion *M = State->getSVal(CastE, LC).getAsRegion();
+  const MemRegion *M = N->getSVal(CastE).getAsRegion();
   if (!M)
     return nullptr;
 
diff --git a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5985023..68e5c02 100644
--- a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -57,7 +57,7 @@
   if (!B->getRHS()->getType()->isScalarType())
     return;
 
-  SVal Denom = C.getState()->getSVal(B->getRHS(), C.getLocationContext());
+  SVal Denom = C.getSVal(B->getRHS());
   Optional<DefinedSVal> DV = Denom.getAs<DefinedSVal>();
 
   // Divide-by-undefined handled in the generic checking for uses of
diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 0891ea8..431c77c 100644
--- a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -22,6 +22,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/AST/ParentMap.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@@ -262,8 +263,19 @@
       if (const MemRegion *Target = Ctor->getCXXThisVal().getAsRegion()) {
         // We just finished a base constructor. Now we can use the subclass's
         // type when resolving virtual calls.
-        const Decl *D = C.getLocationContext()->getDecl();
-        recordFixedType(Target, cast<CXXConstructorDecl>(D), C);
+        const LocationContext *LCtx = C.getLocationContext();
+
+        // FIXME: In C++17 classes with non-virtual bases may be treated as
+        // aggregates, and in such case no top-frame constructor will be called.
+        // Figure out if we need to do anything in this case.
+        // FIXME: Instead of relying on the ParentMap, we should have the
+        // trigger-statement (InitListExpr in this case) available in this
+        // callback, ideally as part of CallEvent.
+        if (dyn_cast_or_null<InitListExpr>(
+                LCtx->getParentMap().getParent(Ctor->getOriginExpr())))
+          return;
+
+        recordFixedType(Target, cast<CXXConstructorDecl>(LCtx->getDecl()), C);
       }
       return;
     }
@@ -546,38 +558,38 @@
   OrigObjectPtrType = OrigObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt);
   DestObjectPtrType = DestObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt);
 
-  // TODO: erase tracked information when there is a cast to unrelated type
-  //       and everything is unspecialized statically.
   if (OrigObjectPtrType->isUnspecialized() &&
       DestObjectPtrType->isUnspecialized())
     return;
 
-  SymbolRef Sym = State->getSVal(CE, C.getLocationContext()).getAsSymbol();
+  SymbolRef Sym = C.getSVal(CE).getAsSymbol();
   if (!Sym)
     return;
 
+  const ObjCObjectPointerType *const *TrackedType =
+      State->get<MostSpecializedTypeArgsMap>(Sym);
+
+  if (isa<ExplicitCastExpr>(CE)) {
+    // Treat explicit casts as an indication from the programmer that the
+    // Objective-C type system is not rich enough to express the needed
+    // invariant. In such cases, forget any existing information inferred
+    // about the type arguments. We don't assume the casted-to specialized
+    // type here because the invariant the programmer specifies in the cast
+    // may only hold at this particular program point and not later ones.
+    // We don't want a suppressing cast to require a cascade of casts down the
+    // line.
+    if (TrackedType) {
+      State = State->remove<MostSpecializedTypeArgsMap>(Sym);
+      C.addTransition(State, AfterTypeProp);
+    }
+    return;
+  }
+
   // Check which assignments are legal.
   bool OrigToDest =
       ASTCtxt.canAssignObjCInterfaces(DestObjectPtrType, OrigObjectPtrType);
   bool DestToOrig =
       ASTCtxt.canAssignObjCInterfaces(OrigObjectPtrType, DestObjectPtrType);
-  const ObjCObjectPointerType *const *TrackedType =
-      State->get<MostSpecializedTypeArgsMap>(Sym);
-
-  // Downcasts and upcasts handled in an uniform way regardless of being
-  // explicit. Explicit casts however can happen between mismatched types.
-  if (isa<ExplicitCastExpr>(CE) && !OrigToDest && !DestToOrig) {
-    // Mismatched types. If the DestType specialized, store it. Forget the
-    // tracked type otherwise.
-    if (DestObjectPtrType->isSpecialized()) {
-      State = State->set<MostSpecializedTypeArgsMap>(Sym, DestObjectPtrType);
-      C.addTransition(State, AfterTypeProp);
-    } else if (TrackedType) {
-      State = State->remove<MostSpecializedTypeArgsMap>(Sym);
-      C.addTransition(State, AfterTypeProp);
-    }
-    return;
-  }
 
   // The tracked type should be the sub or super class of the static destination
   // type. When an (implicit) upcast or a downcast happens according to static
diff --git a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
index 4a581bd..0005ec4 100644
--- a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -8,10 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Checkers/SValExplainer.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-#include "clang/StaticAnalyzer/Checkers/SValExplainer.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ScopedPrinter.h"
 
@@ -41,6 +42,7 @@
   void analyzerExplain(const CallExpr *CE, CheckerContext &C) const;
   void analyzerPrintState(const CallExpr *CE, CheckerContext &C) const;
   void analyzerGetExtent(const CallExpr *CE, CheckerContext &C) const;
+  void analyzerHashDump(const CallExpr *CE, CheckerContext &C) const;
 
   typedef void (ExprInspectionChecker::*FnCheck)(const CallExpr *,
                                                  CheckerContext &C) const;
@@ -79,6 +81,7 @@
           &ExprInspectionChecker::analyzerPrintState)
     .Case("clang_analyzer_numTimesReached",
           &ExprInspectionChecker::analyzerNumTimesReached)
+    .Case("clang_analyzer_hashDump", &ExprInspectionChecker::analyzerHashDump)
     .Default(nullptr);
 
   if (!Handler)
@@ -280,7 +283,18 @@
   LLVM_BUILTIN_TRAP;
 }
 
+void ExprInspectionChecker::analyzerHashDump(const CallExpr *CE,
+                                             CheckerContext &C) const {
+  const LangOptions &Opts = C.getLangOpts();
+  const SourceManager &SM = C.getSourceManager();
+  FullSourceLoc FL(CE->getArg(0)->getLocStart(), SM);
+  std::string HashContent =
+      GetIssueString(SM, FL, getCheckName().getName(), "Category",
+                     C.getLocationContext()->getDecl(), Opts);
+
+  reportBug(HashContent, C);
+}
+
 void ento::registerExprInspectionChecker(CheckerManager &Mgr) {
   Mgr.registerChecker<ExprInspectionChecker>();
 }
-
diff --git a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
index 3fe89f9..059203f 100644
--- a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
@@ -44,8 +44,7 @@
   if (!T->isPointerType())
     return;
 
-  ProgramStateRef state = C.getState();
-  SVal RV = state->getSVal(B->getRHS(), C.getLocationContext());
+  SVal RV = C.getSVal(B->getRHS());
 
   if (!RV.isConstant() || RV.isZeroConstant())
     return;
diff --git a/lib/StaticAnalyzer/Checkers/GCDAsyncSemaphoreChecker.cpp b/lib/StaticAnalyzer/Checkers/GCDAsyncSemaphoreChecker.cpp
new file mode 100644
index 0000000..23ef032
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/GCDAsyncSemaphoreChecker.cpp
@@ -0,0 +1,156 @@
+//===- GCDAsyncSemaphoreChecker.cpp -----------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines GCDAsyncSemaphoreChecker which checks against a common
+// antipattern when synchronous API is emulated from asynchronous callbacks
+// using a semaphor:
+//
+//   dispatch_semapshore_t sema = dispatch_semaphore_create(0);
+
+//   AnyCFunctionCall(^{
+//     // code…
+//     dispatch_semapshore_signal(sema);
+//   })
+//   dispatch_semapshore_wait(sema, *)
+//
+// Such code is a common performance problem, due to inability of GCD to
+// properly handle QoS when a combination of queues and semaphors is used.
+// Good code would either use asynchronous API (when available), or perform
+// the necessary action in asynchronous callback.
+//
+// Currently, the check is performed using a simple heuristical AST pattern
+// matching.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "gcdasyncsemaphorechecker"
+
+using namespace clang;
+using namespace ento;
+using namespace ast_matchers;
+
+namespace {
+
+const char *WarningBinding = "semaphore_wait";
+
+class GCDAsyncSemaphoreChecker : public Checker<check::ASTCodeBody> {
+public:
+  void checkASTCodeBody(const Decl *D,
+                        AnalysisManager &AM,
+                        BugReporter &BR) const;
+};
+
+class Callback : public MatchFinder::MatchCallback {
+  BugReporter &BR;
+  const GCDAsyncSemaphoreChecker *C;
+  AnalysisDeclContext *ADC;
+
+public:
+  Callback(BugReporter &BR,
+           AnalysisDeclContext *ADC,
+           const GCDAsyncSemaphoreChecker *C) : BR(BR), C(C), ADC(ADC) {}
+
+  virtual void run(const MatchFinder::MatchResult &Result) override;
+};
+
+auto callsName(const char *FunctionName)
+    -> decltype(callee(functionDecl())) {
+  return callee(functionDecl(hasName(FunctionName)));
+}
+
+auto equalsBoundArgDecl(int ArgIdx, const char *DeclName)
+    -> decltype(hasArgument(0, expr())) {
+  return hasArgument(ArgIdx, ignoringParenCasts(declRefExpr(
+                                 to(varDecl(equalsBoundNode(DeclName))))));
+}
+
+auto bindAssignmentToDecl(const char *DeclName) -> decltype(hasLHS(expr())) {
+  return hasLHS(ignoringParenImpCasts(
+                         declRefExpr(to(varDecl().bind(DeclName)))));
+}
+
+void GCDAsyncSemaphoreChecker::checkASTCodeBody(const Decl *D,
+                                               AnalysisManager &AM,
+                                               BugReporter &BR) const {
+
+  // The pattern is very common in tests, and it is OK to use it there.
+  if (const auto* ND = dyn_cast<NamedDecl>(D)) {
+    std::string DeclName = ND->getNameAsString();
+    if (StringRef(DeclName).startswith("test"))
+      return;
+  }
+
+  const char *SemaphoreBinding = "semaphore_name";
+  auto SemaphoreCreateM = callExpr(callsName("dispatch_semaphore_create"));
+
+  auto SemaphoreBindingM = anyOf(
+      forEachDescendant(
+          varDecl(hasDescendant(SemaphoreCreateM)).bind(SemaphoreBinding)),
+      forEachDescendant(binaryOperator(bindAssignmentToDecl(SemaphoreBinding),
+                     hasRHS(SemaphoreCreateM))));
+
+  auto SemaphoreWaitM = forEachDescendant(
+    callExpr(
+      allOf(
+        callsName("dispatch_semaphore_wait"),
+        equalsBoundArgDecl(0, SemaphoreBinding)
+      )
+    ).bind(WarningBinding));
+
+  auto AcceptsBlockM =
+    forEachDescendant(callExpr(hasAnyArgument(hasType(
+            hasCanonicalType(blockPointerType())
+            ))));
+
+  auto BlockSignallingM =
+    forEachDescendant(callExpr(hasAnyArgument(hasDescendant(callExpr(
+          allOf(
+              callsName("dispatch_semaphore_signal"),
+              equalsBoundArgDecl(0, SemaphoreBinding)
+              ))))));
+
+  auto FinalM = compoundStmt(
+      SemaphoreBindingM,
+      SemaphoreWaitM,
+      AcceptsBlockM,
+      BlockSignallingM);
+
+  MatchFinder F;
+  Callback CB(BR, AM.getAnalysisDeclContext(D), this);
+
+  F.addMatcher(FinalM, &CB);
+  F.match(*D->getBody(), AM.getASTContext());
+}
+
+void Callback::run(const MatchFinder::MatchResult &Result) {
+  const auto *SW = Result.Nodes.getNodeAs<CallExpr>(WarningBinding);
+  assert(SW);
+  BR.EmitBasicReport(
+      ADC->getDecl(), C,
+      /*Name=*/"Semaphore performance anti-pattern",
+      /*Category=*/"Performance",
+      "Possible semaphore performance anti-pattern: wait on a semaphore "
+      "signalled to in a callback",
+      PathDiagnosticLocation::createBegin(SW, BR.getSourceManager(), ADC),
+      SW->getSourceRange());
+}
+
+}
+
+void ento::registerGCDAsyncSemaphoreChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker<GCDAsyncSemaphoreChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/GTestChecker.cpp b/lib/StaticAnalyzer/Checkers/GTestChecker.cpp
index f0be41b..3ef95e6 100644
--- a/lib/StaticAnalyzer/Checkers/GTestChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GTestChecker.cpp
@@ -161,7 +161,7 @@
     const CXXConstructorCall *Call, CheckerContext &C) const {
   assert(Call->getNumArgs() == 1);
 
-  // The first parameter of the the copy constructor must be the other
+  // The first parameter of the copy constructor must be the other
   // instance to initialize this instances fields from.
   SVal OtherVal = Call->getArgSVal(0);
   SVal ThisVal = Call->getCXXThisVal();
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 883c6a6..3186abe 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -466,9 +466,9 @@
 }
 
 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
-                                            const Expr* Arg) {
+                                                     const Expr *Arg) {
   ProgramStateRef State = C.getState();
-  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
+  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
   if (AddrVal.isUnknownOrUndef())
     return None;
 
@@ -476,9 +476,18 @@
   if (!AddrLoc)
     return None;
 
-  const PointerType *ArgTy =
-    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
-  return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
+  QualType ArgTy = Arg->getType().getCanonicalType();
+  if (!ArgTy->isPointerType())
+    return None;
+
+  QualType ValTy = ArgTy->getPointeeType();
+
+  // Do not dereference void pointers. Treat them as byte pointers instead.
+  // FIXME: we might want to consider more than just the first byte.
+  if (ValTy->isVoidType())
+    ValTy = C.getASTContext().CharTy;
+
+  return State->getSVal(*AddrLoc, ValTy);
 }
 
 ProgramStateRef
@@ -612,7 +621,7 @@
 
 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
   ProgramStateRef State = C.getState();
-  SVal Val = State->getSVal(E, C.getLocationContext());
+  SVal Val = C.getSVal(E);
 
   // stdin is a pointer, so it would be a region.
   const MemRegion *MemReg = Val.getAsRegion();
@@ -637,7 +646,8 @@
     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
         if (const PointerType * PtrTy =
               dyn_cast<PointerType>(D->getType().getTypePtr()))
-          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
+          if (PtrTy->getPointeeType().getCanonicalType() ==
+              C.getASTContext().getFILEType().getCanonicalType())
             return true;
   }
   return false;
diff --git a/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
index 0c3bff5..cf57b8d 100644
--- a/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
@@ -255,7 +255,10 @@
     PathDiagnosticLocation ELoc =
         PathDiagnosticLocation::createOperatorLoc(B, BR.getSourceManager());
     StringRef Message;
-    if (((Op == BO_EQ) || (Op == BO_LE) || (Op == BO_GE)))
+    if (Op == BO_Cmp)
+      Message = "comparison of identical expressions always evaluates to "
+                "'equal'";
+    else if (((Op == BO_EQ) || (Op == BO_LE) || (Op == BO_GE)))
       Message = "comparison of identical expressions always evaluates to true";
     else
       Message = "comparison of identical expressions always evaluates to false";
diff --git a/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp b/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
index 0f9b749..11f4222 100644
--- a/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
@@ -355,12 +355,11 @@
                                     CheckerContext &C) const {
   /* Transfer iterator state to temporary objects */
   auto State = C.getState();
-  const auto *LCtx = C.getLocationContext();
   const auto *Pos =
-      getIteratorPosition(State, State->getSVal(MTE->GetTemporaryExpr(), LCtx));
+      getIteratorPosition(State, C.getSVal(MTE->GetTemporaryExpr()));
   if (!Pos)
     return;
-  State = setIteratorPosition(State, State->getSVal(MTE, LCtx), *Pos);
+  State = setIteratorPosition(State, C.getSVal(MTE), *Pos);
   C.addTransition(State);
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
index 8076ca0..fd310f8 100644
--- a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
@@ -402,13 +402,13 @@
     // Find the setter and the getter.
     const ObjCMethodDecl *SetterD = PD->getSetterMethodDecl();
     if (SetterD) {
-      SetterD = cast<ObjCMethodDecl>(SetterD->getCanonicalDecl());
+      SetterD = SetterD->getCanonicalDecl();
       PropSetterToIvarMap[SetterD] = ID;
     }
 
     const ObjCMethodDecl *GetterD = PD->getGetterMethodDecl();
     if (GetterD) {
-      GetterD = cast<ObjCMethodDecl>(GetterD->getCanonicalDecl());
+      GetterD = GetterD->getCanonicalDecl();
       PropGetterToIvarMap[GetterD] = ID;
     }
   }
@@ -606,7 +606,7 @@
     const ObjCMessageExpr *ME) {
   const ObjCMethodDecl *MD = ME->getMethodDecl();
   if (MD) {
-    MD = cast<ObjCMethodDecl>(MD->getCanonicalDecl());
+    MD = MD->getCanonicalDecl();
     MethToIvarMapTy::const_iterator IvI = PropertyGetterToIvarMap.find(MD);
     if (IvI != PropertyGetterToIvarMap.end())
       markInvalidated(IvI->second);
@@ -630,7 +630,7 @@
   if (PA->isImplicitProperty()) {
     const ObjCMethodDecl *MD = PA->getImplicitPropertySetter();
     if (MD) {
-      MD = cast<ObjCMethodDecl>(MD->getCanonicalDecl());
+      MD = MD->getCanonicalDecl();
       MethToIvarMapTy::const_iterator IvI =PropertyGetterToIvarMap.find(MD);
       if (IvI != PropertyGetterToIvarMap.end())
         markInvalidated(IvI->second);
@@ -702,7 +702,7 @@
 
   // Check if we call a setter and set the property to 'nil'.
   if (MD && (ME->getNumArgs() == 1) && isZero(ME->getArg(0))) {
-    MD = cast<ObjCMethodDecl>(MD->getCanonicalDecl());
+    MD = MD->getCanonicalDecl();
     MethToIvarMapTy::const_iterator IvI = PropertySetterToIvarMap.find(MD);
     if (IvI != PropertySetterToIvarMap.end()) {
       markInvalidated(IvI->second);
diff --git a/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
index 655ce33..ed0ee9f 100644
--- a/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
@@ -1017,8 +1017,7 @@
   if (!LiteralExpr)
     return nullptr;
 
-  ProgramStateRef State = Succ->getState();
-  SVal LiteralSVal = State->getSVal(LiteralExpr, Succ->getLocationContext());
+  SVal LiteralSVal = Succ->getSVal(LiteralExpr);
   if (LiteralSVal.getAsRegion() != NonLocalizedString)
     return nullptr;
 
diff --git a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index f8473db..8ba0bcb 100644
--- a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -202,7 +202,7 @@
 static SymbolRef getAsPointeeSymbol(const Expr *Expr,
                                     CheckerContext &C) {
   ProgramStateRef State = C.getState();
-  SVal ArgV = State->getSVal(Expr, C.getLocationContext());
+  SVal ArgV = C.getSVal(Expr);
 
   if (Optional<loc::MemRegionVal> X = ArgV.getAs<loc::MemRegionVal>()) {
     StoreManager& SM = C.getStoreManager();
@@ -297,7 +297,7 @@
 
   // Check the argument to the deallocator.
   const Expr *ArgExpr = CE->getArg(paramIdx);
-  SVal ArgSVal = State->getSVal(ArgExpr, C.getLocationContext());
+  SVal ArgSVal = C.getSVal(ArgExpr);
 
   // Undef is reported by another checker.
   if (ArgSVal.isUndef())
@@ -426,8 +426,7 @@
     // allocated value symbol, since our diagnostics depend on the value
     // returned by the call. Ex: Data should only be freed if noErr was
     // returned during allocation.)
-    SymbolRef RetStatusSymbol =
-      State->getSVal(CE, C.getLocationContext()).getAsSymbol();
+    SymbolRef RetStatusSymbol = C.getSVal(CE).getAsSymbol();
     C.getSymbolManager().addSymbolDependency(V, RetStatusSymbol);
 
     // Track the allocated value in the checker state.
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 8511140..a3da79e 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -162,6 +162,7 @@
                                      check::PreCall,
                                      check::PostStmt<CallExpr>,
                                      check::PostStmt<CXXNewExpr>,
+                                     check::NewAllocator,
                                      check::PreStmt<CXXDeleteExpr>,
                                      check::PostStmt<BlockExpr>,
                                      check::PostObjCMessage,
@@ -207,6 +208,8 @@
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
   void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
+  void checkNewAllocator(const CXXNewExpr *NE, SVal Target,
+                         CheckerContext &C) const;
   void checkPreStmt(const CXXDeleteExpr *DE, CheckerContext &C) const;
   void checkPostObjCMessage(const ObjCMethodCall &Call, CheckerContext &C) const;
   void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const;
@@ -281,10 +284,18 @@
   bool isStandardNewDelete(const FunctionDecl *FD, ASTContext &C) const;
   ///@}
 
+  /// \brief Process C++ operator new()'s allocation, which is the part of C++
+  /// new-expression that goes before the constructor.
+  void processNewAllocation(const CXXNewExpr *NE, CheckerContext &C,
+                            SVal Target) const;
+
   /// \brief Perform a zero-allocation check.
+  /// The optional \p RetVal parameter specifies the newly allocated pointer
+  /// value; if unspecified, the value of expression \p E is used.
   ProgramStateRef ProcessZeroAllocation(CheckerContext &C, const Expr *E,
                                         const unsigned AllocationSizeArg,
-                                        ProgramStateRef State) const;
+                                        ProgramStateRef State,
+                                        Optional<SVal> RetVal = None) const;
 
   ProgramStateRef MallocMemReturnsAttr(CheckerContext &C,
                                        const CallExpr *CE,
@@ -300,7 +311,7 @@
                                       AllocationFamily Family = AF_Malloc);
 
   static ProgramStateRef addExtentSize(CheckerContext &C, const CXXNewExpr *NE,
-                                       ProgramStateRef State);
+                                       ProgramStateRef State, SVal Target);
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -309,9 +320,12 @@
                       const ProgramStateRef &State) const;
 
   /// Update the RefState to reflect the new memory allocation.
+  /// The optional \p RetVal parameter specifies the newly allocated pointer
+  /// value; if unspecified, the value of expression \p E is used.
   static ProgramStateRef
   MallocUpdateRefState(CheckerContext &C, const Expr *E, ProgramStateRef State,
-                       AllocationFamily Family = AF_Malloc);
+                       AllocationFamily Family = AF_Malloc,
+                       Optional<SVal> RetVal = None);
 
   ProgramStateRef FreeMemAttr(CheckerContext &C, const CallExpr *CE,
                               const OwnershipAttr* Att,
@@ -432,15 +446,24 @@
     // A symbol from when the primary region should have been reallocated.
     SymbolRef FailedReallocSymbol;
 
+    // A C++ destructor stack frame in which memory was released. Used for
+    // miscellaneous false positive suppression.
+    const StackFrameContext *ReleaseDestructorLC;
+
     bool IsLeak;
 
   public:
     MallocBugVisitor(SymbolRef S, bool isLeak = false)
-       : Sym(S), Mode(Normal), FailedReallocSymbol(nullptr), IsLeak(isLeak) {}
+        : Sym(S), Mode(Normal), FailedReallocSymbol(nullptr),
+          ReleaseDestructorLC(nullptr), IsLeak(isLeak) {}
+
+    static void *getTag() {
+      static int Tag = 0;
+      return &Tag;
+    }
 
     void Profile(llvm::FoldingSetNodeID &ID) const override {
-      static int X = 0;
-      ID.AddPointer(&X);
+      ID.AddPointer(getTag());
       ID.AddPointer(Sym);
     }
 
@@ -758,7 +781,7 @@
     return None;
 
   const Expr *FlagsEx = CE->getArg(CE->getNumArgs() - 1);
-  const SVal V = State->getSVal(FlagsEx, C.getLocationContext());
+  const SVal V = C.getSVal(FlagsEx);
   if (!V.getAs<NonLoc>()) {
     // The case where 'V' can be a location can only be due to a bad header,
     // so in this case bail out.
@@ -949,13 +972,15 @@
 }
 
 // Performs a 0-sized allocations check.
-ProgramStateRef MallocChecker::ProcessZeroAllocation(CheckerContext &C,
-                                               const Expr *E,
-                                               const unsigned AllocationSizeArg,
-                                               ProgramStateRef State) const {
+ProgramStateRef MallocChecker::ProcessZeroAllocation(
+    CheckerContext &C, const Expr *E, const unsigned AllocationSizeArg,
+    ProgramStateRef State, Optional<SVal> RetVal) const {
   if (!State)
     return nullptr;
 
+  if (!RetVal)
+    RetVal = C.getSVal(E);
+
   const Expr *Arg = nullptr;
 
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
@@ -972,8 +997,7 @@
 
   assert(Arg);
 
-  Optional<DefinedSVal> DefArgVal =
-      State->getSVal(Arg, C.getLocationContext()).getAs<DefinedSVal>();
+  Optional<DefinedSVal> DefArgVal = C.getSVal(Arg).getAs<DefinedSVal>();
 
   if (!DefArgVal)
     return State;
@@ -988,8 +1012,7 @@
       State->assume(SvalBuilder.evalEQ(State, *DefArgVal, Zero));
 
   if (TrueState && !FalseState) {
-    SVal retVal = State->getSVal(E, C.getLocationContext());
-    SymbolRef Sym = retVal.getAsLocSymbol();
+    SymbolRef Sym = RetVal->getAsLocSymbol();
     if (!Sym)
       return State;
 
@@ -1050,9 +1073,9 @@
   return false;
 }
 
-void MallocChecker::checkPostStmt(const CXXNewExpr *NE,
-                                  CheckerContext &C) const {
-
+void MallocChecker::processNewAllocation(const CXXNewExpr *NE,
+                                         CheckerContext &C,
+                                         SVal Target) const {
   if (NE->getNumPlacementArgs())
     for (CXXNewExpr::const_arg_iterator I = NE->placement_arg_begin(),
          E = NE->placement_arg_end(); I != E; ++I)
@@ -1072,37 +1095,48 @@
   // MallocUpdateRefState() instead of MallocMemAux() which breakes the
   // existing binding.
   State = MallocUpdateRefState(C, NE, State, NE->isArray() ? AF_CXXNewArray
-                                                           : AF_CXXNew);
-  State = addExtentSize(C, NE, State);
-  State = ProcessZeroAllocation(C, NE, 0, State);
+                                                           : AF_CXXNew, Target);
+  State = addExtentSize(C, NE, State, Target);
+  State = ProcessZeroAllocation(C, NE, 0, State, Target);
   C.addTransition(State);
 }
 
+void MallocChecker::checkPostStmt(const CXXNewExpr *NE,
+                                  CheckerContext &C) const {
+  if (!C.getAnalysisManager().getAnalyzerOptions().mayInlineCXXAllocator())
+    processNewAllocation(NE, C, C.getSVal(NE));
+}
+
+void MallocChecker::checkNewAllocator(const CXXNewExpr *NE, SVal Target,
+                                      CheckerContext &C) const {
+  if (!C.wasInlined)
+    processNewAllocation(NE, C, Target);
+}
+
 // Sets the extent value of the MemRegion allocated by
 // new expression NE to its size in Bytes.
 //
 ProgramStateRef MallocChecker::addExtentSize(CheckerContext &C,
                                              const CXXNewExpr *NE,
-                                             ProgramStateRef State) {
+                                             ProgramStateRef State,
+                                             SVal Target) {
   if (!State)
     return nullptr;
   SValBuilder &svalBuilder = C.getSValBuilder();
   SVal ElementCount;
-  const LocationContext *LCtx = C.getLocationContext();
   const SubRegion *Region;
   if (NE->isArray()) {
     const Expr *SizeExpr = NE->getArraySize();
-    ElementCount = State->getSVal(SizeExpr, C.getLocationContext());
+    ElementCount = C.getSVal(SizeExpr);
     // Store the extent size for the (symbolic)region
     // containing the elements.
-    Region = (State->getSVal(NE, LCtx))
-                 .getAsRegion()
+    Region = Target.getAsRegion()
                  ->getAs<SubRegion>()
-                 ->getSuperRegion()
+                 ->StripCasts()
                  ->getAs<SubRegion>();
   } else {
     ElementCount = svalBuilder.makeIntVal(1, true);
-    Region = (State->getSVal(NE, LCtx)).getAsRegion()->getAs<SubRegion>();
+    Region = Target.getAsRegion()->getAs<SubRegion>();
   }
   assert(Region);
 
@@ -1197,9 +1231,10 @@
   if (Att->getModule() != II_malloc)
     return nullptr;
 
-  OwnershipAttr::args_iterator I = Att->args_begin(), E = Att->args_end();
+  ParamIdx *I = Att->args_begin(), *E = Att->args_end();
   if (I != E) {
-    return MallocMemAux(C, CE, CE->getArg(*I), UndefinedVal(), State);
+    return MallocMemAux(C, CE, CE->getArg(I->getASTIndex()), UndefinedVal(),
+                        State);
   }
   return MallocMemAux(C, CE, UnknownVal(), UndefinedVal(), State);
 }
@@ -1212,8 +1247,7 @@
   if (!State)
     return nullptr;
 
-  return MallocMemAux(C, CE, State->getSVal(SizeEx, C.getLocationContext()),
-                      Init, State, Family);
+  return MallocMemAux(C, CE, C.getSVal(SizeEx), Init, State, Family);
 }
 
 ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C,
@@ -1263,18 +1297,22 @@
 ProgramStateRef MallocChecker::MallocUpdateRefState(CheckerContext &C,
                                                     const Expr *E,
                                                     ProgramStateRef State,
-                                                    AllocationFamily Family) {
+                                                    AllocationFamily Family,
+                                                    Optional<SVal> RetVal) {
   if (!State)
     return nullptr;
 
   // Get the return value.
-  SVal retVal = State->getSVal(E, C.getLocationContext());
+  if (!RetVal)
+    RetVal = C.getSVal(E);
 
   // We expect the malloc functions to return a pointer.
-  if (!retVal.getAs<Loc>())
+  if (!RetVal->getAs<Loc>())
     return nullptr;
 
-  SymbolRef Sym = retVal.getAsLocSymbol();
+  SymbolRef Sym = RetVal->getAsLocSymbol();
+  // This is a return value of a function that was not inlined, such as malloc()
+  // or new(). We've checked that in the caller. Therefore, it must be a symbol.
   assert(Sym);
 
   // Set the symbol's state to Allocated.
@@ -1294,9 +1332,9 @@
   bool ReleasedAllocated = false;
 
   for (const auto &Arg : Att->args()) {
-    ProgramStateRef StateI = FreeMemAux(C, CE, State, Arg,
-                               Att->getOwnKind() == OwnershipAttr::Holds,
-                               ReleasedAllocated);
+    ProgramStateRef StateI = FreeMemAux(
+        C, CE, State, Arg.getASTIndex(),
+        Att->getOwnKind() == OwnershipAttr::Holds, ReleasedAllocated);
     if (StateI)
       State = StateI;
   }
@@ -1457,7 +1495,7 @@
   if (!State)
     return nullptr;
 
-  SVal ArgVal = State->getSVal(ArgExpr, C.getLocationContext());
+  SVal ArgVal = C.getSVal(ArgExpr);
   if (!ArgVal.getAs<DefinedOrUnknownSVal>())
     return nullptr;
   DefinedOrUnknownSVal location = ArgVal.castAs<DefinedOrUnknownSVal>();
@@ -2047,8 +2085,8 @@
 
   if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_BadFree[*CheckKind])
-      BT_BadFree[*CheckKind].reset(
-          new BugType(CheckNames[*CheckKind], "Bad free", "Memory Error"));
+      BT_BadFree[*CheckKind].reset(new BugType(
+          CheckNames[*CheckKind], "Bad free", categories::MemoryError));
 
     SmallString<100> Buf;
     llvm::raw_svector_ostream Os(Buf);
@@ -2084,8 +2122,7 @@
     return nullptr;
 
   const Expr *arg0Expr = CE->getArg(0);
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal Arg0Val = State->getSVal(arg0Expr, LCtx);
+  SVal Arg0Val = C.getSVal(arg0Expr);
   if (!Arg0Val.getAs<DefinedOrUnknownSVal>())
     return nullptr;
   DefinedOrUnknownSVal arg0Val = Arg0Val.castAs<DefinedOrUnknownSVal>();
@@ -2099,7 +2136,7 @@
   const Expr *Arg1 = CE->getArg(1);
 
   // Get the value of the size argument.
-  SVal TotalSize = State->getSVal(Arg1, LCtx);
+  SVal TotalSize = C.getSVal(Arg1);
   if (SuffixWithN)
     TotalSize = evalMulForBufferSize(C, Arg1, CE->getArg(2));
   if (!TotalSize.getAs<DefinedOrUnknownSVal>())
@@ -2133,7 +2170,7 @@
   // Get the from and to pointer symbols as in toPtr = realloc(fromPtr, size).
   assert(!PrtIsNull);
   SymbolRef FromPtr = arg0Val.getAsSymbol();
-  SVal RetVal = State->getSVal(CE, LCtx);
+  SVal RetVal = C.getSVal(CE);
   SymbolRef ToPtr = RetVal.getAsSymbol();
   if (!FromPtr || !ToPtr)
     return nullptr;
@@ -2406,7 +2443,7 @@
 
   // Check if we are returning a symbol.
   ProgramStateRef State = C.getState();
-  SVal RetVal = State->getSVal(E, C.getLocationContext());
+  SVal RetVal = C.getSVal(E);
   SymbolRef Sym = RetVal.getAsSymbol();
   if (!Sym)
     // If we are returning a field of the allocated struct or an array element,
@@ -2436,8 +2473,7 @@
 
   ProgramStateRef state = C.getState();
   const BlockDataRegion *R =
-    cast<BlockDataRegion>(state->getSVal(BE,
-                                         C.getLocationContext()).getAsRegion());
+    cast<BlockDataRegion>(C.getSVal(BE).getAsRegion());
 
   BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(),
                                             E = R->referenced_vars_end();
@@ -2796,6 +2832,32 @@
 std::shared_ptr<PathDiagnosticPiece> MallocChecker::MallocBugVisitor::VisitNode(
     const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
     BugReport &BR) {
+  const Stmt *S = PathDiagnosticLocation::getStmt(N);
+  if (!S)
+    return nullptr;
+
+  const LocationContext *CurrentLC = N->getLocationContext();
+
+  // If we find an atomic fetch_add or fetch_sub within the destructor in which
+  // the pointer was released (before the release), this is likely a destructor
+  // of a shared pointer.
+  // Because we don't model atomics, and also because we don't know that the
+  // original reference count is positive, we should not report use-after-frees
+  // on objects deleted in such destructors. This can probably be improved
+  // through better shared pointer modeling.
+  if (ReleaseDestructorLC) {
+    if (const auto *AE = dyn_cast<AtomicExpr>(S)) {
+      AtomicExpr::AtomicOp Op = AE->getOp();
+      if (Op == AtomicExpr::AO__c11_atomic_fetch_add ||
+          Op == AtomicExpr::AO__c11_atomic_fetch_sub) {
+        if (ReleaseDestructorLC == CurrentLC ||
+            ReleaseDestructorLC->isParentOf(CurrentLC)) {
+          BR.markInvalid(getTag(), S);
+        }
+      }
+    }
+  }
+
   ProgramStateRef state = N->getState();
   ProgramStateRef statePrev = PrevN->getState();
 
@@ -2804,10 +2866,6 @@
   if (!RS)
     return nullptr;
 
-  const Stmt *S = PathDiagnosticLocation::getStmt(N);
-  if (!S)
-    return nullptr;
-
   // FIXME: We will eventually need to handle non-statement-based events
   // (__attribute__((cleanup))).
 
@@ -2823,6 +2881,24 @@
       Msg = "Memory is released";
       StackHint = new StackHintGeneratorForSymbol(Sym,
                                              "Returning; memory was released");
+
+      // See if we're releasing memory while inlining a destructor (or one of
+      // its callees). If so, enable the atomic-related suppression within that
+      // destructor (and all of its callees), which would kick in while visiting
+      // other nodes (the visit order is from the bug to the graph root).
+      for (const LocationContext *LC = CurrentLC; LC; LC = LC->getParent()) {
+        if (isa<CXXDestructorDecl>(LC->getDecl())) {
+          assert(!ReleaseDestructorLC &&
+                 "There can be only one release point!");
+          ReleaseDestructorLC = LC->getCurrentStackFrame();
+          // It is unlikely that releasing memory is delegated to a destructor
+          // inside a destructor of a shared pointer, because it's fairly hard
+          // to pass the information that the pointer indeed needs to be
+          // released into it. So we're only interested in the innermost
+          // destructor.
+          break;
+        }
+      }
     } else if (isRelinquished(RS, RSPrev, S)) {
       Msg = "Memory ownership is transferred";
       StackHint = new StackHintGeneratorForSymbol(Sym, "");
@@ -2900,8 +2976,13 @@
       mgr.getCurrentCheckName();
   // We currently treat NewDeleteLeaks checker as a subchecker of NewDelete
   // checker.
-  if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker])
+  if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker]) {
     checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker] = true;
+    // FIXME: This does not set the correct name, but without this workaround
+    //        no name will be set at all.
+    checker->CheckNames[MallocChecker::CK_NewDeleteChecker] =
+        mgr.getCurrentCheckName();
+  }
 }
 
 #define REGISTER_CHECKER(name)                                                 \
diff --git a/lib/StaticAnalyzer/Checkers/MisusedMovedObjectChecker.cpp b/lib/StaticAnalyzer/Checkers/MisusedMovedObjectChecker.cpp
index 4b69a1b..c6493fe 100644
--- a/lib/StaticAnalyzer/Checkers/MisusedMovedObjectChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MisusedMovedObjectChecker.cpp
@@ -60,6 +60,7 @@
                   const char *NL, const char *Sep) const override;
 
 private:
+  enum MisuseKind {MK_FunCall, MK_Copy, MK_Move};
   class MovedBugVisitor : public BugReporterVisitorImpl<MovedBugVisitor> {
   public:
     MovedBugVisitor(const MemRegion *R) : Region(R), Found(false) {}
@@ -83,7 +84,7 @@
 
   mutable std::unique_ptr<BugType> BT;
   ExplodedNode *reportBug(const MemRegion *Region, const CallEvent &Call,
-                          CheckerContext &C, bool isCopy) const;
+                          CheckerContext &C, MisuseKind MK) const;
   bool isInMoveSafeContext(const LocationContext *LC) const;
   bool isStateResetMethod(const CXXMethodDecl *MethodDec) const;
   bool isMoveSafeMethod(const CXXMethodDecl *MethodDec) const;
@@ -100,8 +101,6 @@
                                        const MemRegion *Region) {
   if (!Region)
     return State;
-  // Note: The isSubRegionOf function is not reflexive.
-  State = State->remove<TrackedRegionMap>(Region);
   for (auto &E : State->get<TrackedRegionMap>()) {
     if (E.first->isSubRegionOf(Region))
       State = State->remove<TrackedRegionMap>(E.first);
@@ -179,7 +178,7 @@
 ExplodedNode *MisusedMovedObjectChecker::reportBug(const MemRegion *Region,
                                                    const CallEvent &Call,
                                                    CheckerContext &C,
-                                                   bool isCopy = false) const {
+                                                   MisuseKind MK) const {
   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     if (!BT)
       BT.reset(new BugType(this, "Usage of a 'moved-from' object",
@@ -195,10 +194,17 @@
 
     // Creating the error message.
     std::string ErrorMessage;
-    if (isCopy)
-      ErrorMessage = "Copying a 'moved-from' object";
-    else
-      ErrorMessage = "Method call on a 'moved-from' object";
+    switch(MK) {
+      case MK_FunCall:
+        ErrorMessage = "Method call on a 'moved-from' object";
+        break;
+      case MK_Copy:
+        ErrorMessage = "Copying a 'moved-from' object";
+        break;
+      case MK_Move:
+        ErrorMessage = "Moving a 'moved-from' object";
+        break;
+    }
     if (const auto DecReg = Region->getAs<DeclRegion>()) {
       const auto *RegionDecl = dyn_cast<NamedDecl>(DecReg->getDecl());
       ErrorMessage += " '" + RegionDecl->getNameAsString() + "'";
@@ -352,7 +358,7 @@
   const LocationContext *LC = C.getLocationContext();
   ExplodedNode *N = nullptr;
 
-  // Remove the MemRegions from the map on which a ctor/dtor call or assignement
+  // Remove the MemRegions from the map on which a ctor/dtor call or assignment
   // happened.
 
   // Checking constructor calls.
@@ -365,7 +371,10 @@
       const RegionState *ArgState = State->get<TrackedRegionMap>(ArgRegion);
       if (ArgState && ArgState->isMoved()) {
         if (!isInMoveSafeContext(LC)) {
-          N = reportBug(ArgRegion, Call, C, /*isCopy=*/true);
+          if(CtorDec->isMoveConstructor())
+            N = reportBug(ArgRegion, Call, C, MK_Move);
+          else
+            N = reportBug(ArgRegion, Call, C, MK_Copy);
           State = State->set<TrackedRegionMap>(ArgRegion,
                                                RegionState::getReported());
         }
@@ -380,8 +389,11 @@
     return;
   // In case of destructor call we do not track the object anymore.
   const MemRegion *ThisRegion = IC->getCXXThisVal().getAsRegion();
+  if (!ThisRegion)
+    return;
+
   if (dyn_cast_or_null<CXXDestructorDecl>(Call.getDecl())) {
-    State = removeFromState(State, IC->getCXXThisVal().getAsRegion());
+    State = removeFromState(State, ThisRegion);
     C.addTransition(State);
     return;
   }
@@ -402,7 +414,10 @@
           State->get<TrackedRegionMap>(IC->getArgSVal(0).getAsRegion());
       if (ArgState && ArgState->isMoved() && !isInMoveSafeContext(LC)) {
         const MemRegion *ArgRegion = IC->getArgSVal(0).getAsRegion();
-        N = reportBug(ArgRegion, Call, C, /*isCopy=*/true);
+        if(MethodDecl->isMoveAssignmentOperator())
+          N = reportBug(ArgRegion, Call, C, MK_Move);
+        else
+          N = reportBug(ArgRegion, Call, C, MK_Copy);
         State =
             State->set<TrackedRegionMap>(ArgRegion, RegionState::getReported());
       }
@@ -412,35 +427,35 @@
   }
 
   // The remaining part is check only for method call on a moved-from object.
+
+  // We want to investigate the whole object, not only sub-object of a parent
+  // class in which the encountered method defined.
+  while (const CXXBaseObjectRegion *BR =
+             dyn_cast<CXXBaseObjectRegion>(ThisRegion))
+    ThisRegion = BR->getSuperRegion();
+
   if (isMoveSafeMethod(MethodDecl))
     return;
 
   if (isStateResetMethod(MethodDecl)) {
-    // A state reset method resets the whole object, not only sub-object
-    // of a parent class in which it is defined.
-    const MemRegion *WholeObjectRegion = ThisRegion;
-    while (const CXXBaseObjectRegion *BR =
-               dyn_cast<CXXBaseObjectRegion>(WholeObjectRegion))
-      WholeObjectRegion = BR->getSuperRegion();
-
-    State = State->remove<TrackedRegionMap>(WholeObjectRegion);
+    State = removeFromState(State, ThisRegion);
     C.addTransition(State);
     return;
   }
 
-  // If it is already reported then we dont report the bug again.
+  // If it is already reported then we don't report the bug again.
   const RegionState *ThisState = State->get<TrackedRegionMap>(ThisRegion);
   if (!(ThisState && ThisState->isMoved()))
     return;
 
-  // Dont report it in case if any base region is already reported
+  // Don't report it in case if any base region is already reported
   if (isAnyBaseRegionReported(State, ThisRegion))
     return;
 
   if (isInMoveSafeContext(LC))
     return;
 
-  N = reportBug(ThisRegion, Call, C);
+  N = reportBug(ThisRegion, Call, C, MK_FunCall);
   State = State->set<TrackedRegionMap>(ThisRegion, RegionState::getReported());
   C.addTransition(State, N);
 }
diff --git a/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp b/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
new file mode 100644
index 0000000..e62fa0f
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
@@ -0,0 +1,87 @@
+// MmapWriteExecChecker.cpp - Check for the prot argument -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This checker tests the 3rd argument of mmap's calls to check if
+// it is writable and executable in the same time. It's somehow
+// an optional checker since for example in JIT libraries it is pretty common.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+using llvm::APSInt;
+
+namespace {
+class MmapWriteExecChecker : public Checker<check::PreCall> {
+  CallDescription MmapFn;
+  static int ProtWrite;
+  static int ProtExec;
+  static int ProtRead;
+  mutable std::unique_ptr<BugType> BT;
+public:
+  MmapWriteExecChecker() : MmapFn("mmap", 6) {}
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+  int ProtExecOv;
+  int ProtReadOv;
+};
+}
+
+int MmapWriteExecChecker::ProtWrite = 0x02;
+int MmapWriteExecChecker::ProtExec  = 0x04;
+int MmapWriteExecChecker::ProtRead  = 0x01;
+
+void MmapWriteExecChecker::checkPreCall(const CallEvent &Call,
+                                         CheckerContext &C) const {
+  if (Call.isCalled(MmapFn)) {
+    SVal ProtVal = Call.getArgSVal(2); 
+    Optional<nonloc::ConcreteInt> ProtLoc = ProtVal.getAs<nonloc::ConcreteInt>();
+    int64_t Prot = ProtLoc->getValue().getSExtValue();
+    if (ProtExecOv != ProtExec)
+      ProtExec = ProtExecOv;
+    if (ProtReadOv != ProtRead)
+      ProtRead = ProtReadOv;
+
+    // Wrong settings
+    if (ProtRead == ProtExec)
+      return;
+
+    if ((Prot & (ProtWrite | ProtExec)) == (ProtWrite | ProtExec)) {
+      if (!BT)
+        BT.reset(new BugType(this, "W^X check fails, Write Exec prot flags set", "Security"));
+
+      ExplodedNode *N = C.generateNonFatalErrorNode();
+      if (!N)
+        return;
+
+      auto Report = llvm::make_unique<BugReport>(
+          *BT, "Both PROT_WRITE and PROT_EXEC flags are set. This can "
+               "lead to exploitable memory regions, which could be overwritten "
+               "with malicious code", N);
+      Report->addRange(Call.getArgSourceRange(2));
+      C.emitReport(std::move(Report));
+    }
+  }
+}
+
+void ento::registerMmapWriteExecChecker(CheckerManager &mgr) {
+  MmapWriteExecChecker *Mwec =
+      mgr.registerChecker<MmapWriteExecChecker>();
+  Mwec->ProtExecOv =
+    mgr.getAnalyzerOptions().getOptionAsInteger("MmapProtExec", 0x04, Mwec);
+  Mwec->ProtReadOv =
+    mgr.getAnalyzerOptions().getOptionAsInteger("MmapProtRead", 0x01, Mwec);
+}
diff --git a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
index 6d05159..6f27995 100644
--- a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
@@ -58,10 +58,11 @@
       AttrNonNull.set(0, NumArgs);
       break;
     }
-    for (unsigned Val : NonNull->args()) {
-      if (Val >= NumArgs)
+    for (const ParamIdx &Idx : NonNull->args()) {
+      unsigned IdxAST = Idx.getASTIndex();
+      if (IdxAST >= NumArgs)
         continue;
-      AttrNonNull.set(Val);
+      AttrNonNull.set(IdxAST);
     }
   }
 
diff --git a/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp b/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp
index 0b4ecb4..f65e1d0 100644
--- a/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp
@@ -73,9 +73,9 @@
     return;
 
   ProgramStateRef State = C.getState();
-  SVal V = State->getSVal(location.castAs<Loc>());
 
   if (isGlobalConstString(location)) {
+    SVal V = State->getSVal(location.castAs<Loc>());
     Optional<DefinedOrUnknownSVal> Constr = V.getAs<DefinedOrUnknownSVal>();
 
     if (Constr) {
diff --git a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
index fa9a317..b348453 100644
--- a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
@@ -560,8 +560,7 @@
   if (State->get<InvariantViolated>())
     return;
 
-  auto RetSVal =
-      State->getSVal(S, C.getLocationContext()).getAs<DefinedOrUnknownSVal>();
+  auto RetSVal = C.getSVal(S).getAs<DefinedOrUnknownSVal>();
   if (!RetSVal)
     return;
 
@@ -977,8 +976,7 @@
   if (DestNullability == Nullability::Unspecified)
     return;
 
-  auto RegionSVal =
-      State->getSVal(CE, C.getLocationContext()).getAs<DefinedOrUnknownSVal>();
+  auto RegionSVal = C.getSVal(CE).getAs<DefinedOrUnknownSVal>();
   const MemRegion *Region = getTrackRegion(*RegionSVal);
   if (!Region)
     return;
diff --git a/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp b/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
index 40e379c..d1749cf 100644
--- a/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
@@ -270,8 +270,10 @@
                            hasRHS(SuspiciousNumberObjectExprM)));
 
   auto ConversionThroughBranchingM =
-      ifStmt(hasCondition(SuspiciousNumberObjectExprM))
-      .bind("pedantic");
+      ifStmt(allOf(
+          hasCondition(SuspiciousNumberObjectExprM),
+          unless(hasConditionVariableStatement(declStmt())
+      ))).bind("pedantic");
 
   auto ConversionThroughCallM =
       callExpr(hasAnyArgument(allOf(hasType(SuspiciousScalarTypeM),
diff --git a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
index cbaa5c2..b7339fe 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
@@ -39,7 +39,7 @@
 
   const Expr *Ex = S->getSynchExpr();
   ProgramStateRef state = C.getState();
-  SVal V = state->getSVal(Ex, C.getLocationContext());
+  SVal V = C.getSVal(Ex);
 
   // Uninitialized value used for the mutex?
   if (V.getAs<UndefinedVal>()) {
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index 58ebf72..fb05ca6 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -39,7 +39,7 @@
   }
 
   inline SymbolRef getArraySym(const Expr *E, CheckerContext &C) const {
-    SVal ArrayRef = C.getState()->getSVal(E, C.getLocationContext());
+    SVal ArrayRef = C.getSVal(E);
     SymbolRef ArraySym = ArrayRef.getAsSymbol();
     return ArraySym;
   }
@@ -66,13 +66,13 @@
 void ObjCContainersChecker::addSizeInfo(const Expr *Array, const Expr *Size,
                                         CheckerContext &C) const {
   ProgramStateRef State = C.getState();
-  SVal SizeV = State->getSVal(Size, C.getLocationContext());
+  SVal SizeV = C.getSVal(Size);
   // Undefined is reported by another checker.
   if (SizeV.isUnknownOrUndef())
     return;
 
   // Get the ArrayRef symbol.
-  SVal ArrayRef = State->getSVal(Array, C.getLocationContext());
+  SVal ArrayRef = C.getSVal(Array);
   SymbolRef ArraySym = ArrayRef.getAsSymbol();
   if (!ArraySym)
     return;
@@ -128,7 +128,7 @@
 
     // Get the index.
     const Expr *IdxExpr = CE->getArg(1);
-    SVal IdxVal = State->getSVal(IdxExpr, C.getLocationContext());
+    SVal IdxVal = C.getSVal(IdxExpr);
     if (IdxVal.isUnknownOrUndef())
       return;
     DefinedSVal Idx = IdxVal.castAs<DefinedSVal>();
diff --git a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
index ffa3a27..6184de9 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
@@ -132,7 +132,7 @@
 /// points to and is an object that did not come from the result of calling
 /// an initializer.
 static bool isInvalidSelf(const Expr *E, CheckerContext &C) {
-  SVal exprVal = C.getState()->getSVal(E, C.getLocationContext());
+  SVal exprVal = C.getSVal(E);
   if (!hasSelfFlag(exprVal, SelfFlag_Self, C))
     return false; // value did not come from 'self'.
   if (hasSelfFlag(exprVal, SelfFlag_InitRes, C))
@@ -183,7 +183,7 @@
     // value out when we return from this method.
     state = state->set<CalledInit>(true);
 
-    SVal V = state->getSVal(Msg.getOriginExpr(), C.getLocationContext());
+    SVal V = C.getSVal(Msg.getOriginExpr());
     addSelfFlag(state, V, SelfFlag_InitRes, C);
     return;
   }
diff --git a/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp
index 075ff09..69b19a7 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp
@@ -107,8 +107,6 @@
   }
 
   reportUseAfterDealloc(ReceiverSymbol, Desc, M.getOriginExpr(), C);
-
-  return;
 }
 
 void ObjCSuperDeallocChecker::checkPreCall(const CallEvent &Call,
diff --git a/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
index a51dda6..b17cd8d 100644
--- a/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
@@ -1,328 +1,330 @@
-//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines a checker that checks for padding that could be
-//  removed by re-ordering members.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ClangSACheckers.h"
-#include "clang/AST/CharUnits.h"
-#include "clang/AST/DeclTemplate.h"
-#include "clang/AST/RecordLayout.h"
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
-#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
-#include "clang/StaticAnalyzer/Core/Checker.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <numeric>
-
-using namespace clang;
-using namespace ento;
-
-namespace {
-class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
-private:
-  mutable std::unique_ptr<BugType> PaddingBug;
-  mutable int64_t AllowedPad;
-  mutable BugReporter *BR;
-
-public:
-  void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
-                    BugReporter &BRArg) const {
-    BR = &BRArg;
-    AllowedPad =
-        MGR.getAnalyzerOptions().getOptionAsInteger("AllowedPad", 24, this);
-    assert(AllowedPad >= 0 && "AllowedPad option should be non-negative");
-
-    // The calls to checkAST* from AnalysisConsumer don't
-    // visit template instantiations or lambda classes. We
-    // want to visit those, so we make our own RecursiveASTVisitor.
-    struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
-      const PaddingChecker *Checker;
-      bool shouldVisitTemplateInstantiations() const { return true; }
-      bool shouldVisitImplicitCode() const { return true; }
-      explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
-      bool VisitRecordDecl(const RecordDecl *RD) {
-        Checker->visitRecord(RD);
-        return true;
-      }
-      bool VisitVarDecl(const VarDecl *VD) {
-        Checker->visitVariable(VD);
-        return true;
-      }
-      // TODO: Visit array new and mallocs for arrays.
-    };
-
-    LocalVisitor visitor(this);
-    visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
-  }
-
-  /// \brief Look for records of overly padded types. If padding *
-  /// PadMultiplier exceeds AllowedPad, then generate a report.
-  /// PadMultiplier is used to share code with the array padding
-  /// checker.
-  void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
-    if (shouldSkipDecl(RD))
-      return;
-
-    auto &ASTContext = RD->getASTContext();
-    const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
-    assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
-
-    CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
-    if (BaselinePad.isZero())
-      return;
-
-    CharUnits OptimalPad;
-    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
-    std::tie(OptimalPad, OptimalFieldsOrder) =
-        calculateOptimalPad(RD, ASTContext, RL);
-
-    CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
-    if (DiffPad.getQuantity() <= AllowedPad) {
-      assert(!DiffPad.isNegative() && "DiffPad should not be negative");
-      // There is not enough excess padding to trigger a warning.
-      return;
-    }
-    reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
-  }
-
-  /// \brief Look for arrays of overly padded types. If the padding of the
-  /// array type exceeds AllowedPad, then generate a report.
-  void visitVariable(const VarDecl *VD) const {
-    const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
-    if (ArrTy == nullptr)
-      return;
-    uint64_t Elts = 0;
-    if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
-      Elts = CArrTy->getSize().getZExtValue();
-    if (Elts == 0)
-      return;
-    const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
-    if (RT == nullptr)
-      return;
-
-    // TODO: Recurse into the fields and base classes to see if any
-    // of those have excess padding.
-    visitRecord(RT->getDecl(), Elts);
-  }
-
-  bool shouldSkipDecl(const RecordDecl *RD) const {
-    auto Location = RD->getLocation();
-    // If the construct doesn't have a source file, then it's not something
-    // we want to diagnose.
-    if (!Location.isValid())
-      return true;
-    SrcMgr::CharacteristicKind Kind =
-        BR->getSourceManager().getFileCharacteristic(Location);
-    // Throw out all records that come from system headers.
-    if (Kind != SrcMgr::C_User)
-      return true;
-
-    // Not going to attempt to optimize unions.
-    if (RD->isUnion())
-      return true;
-    // How do you reorder fields if you haven't got any?
-    if (RD->field_empty())
-      return true;
-    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-      // Tail padding with base classes ends up being very complicated.
-      // We will skip objects with base classes for now.
-      if (CXXRD->getNumBases() != 0)
-        return true;
-      // Virtual bases are complicated, skipping those for now.
-      if (CXXRD->getNumVBases() != 0)
-        return true;
-      // Can't layout a template, so skip it. We do still layout the
-      // instantiations though.
-      if (CXXRD->getTypeForDecl()->isDependentType())
-        return true;
-      if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
-        return true;
-    }
-    auto IsTrickyField = [](const FieldDecl *FD) -> bool {
-      // Bitfield layout is hard.
-      if (FD->isBitField())
-        return true;
-
-      // Variable length arrays are tricky too.
-      QualType Ty = FD->getType();
-      if (Ty->isIncompleteArrayType())
-        return true;
-      return false;
-    };
-
-    if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
-      return true;
-    return false;
-  }
-
-  static CharUnits calculateBaselinePad(const RecordDecl *RD,
-                                        const ASTContext &ASTContext,
-                                        const ASTRecordLayout &RL) {
-    CharUnits PaddingSum;
-    CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
-    for (const FieldDecl *FD : RD->fields()) {
-      // This checker only cares about the padded size of the
-      // field, and not the data size. If the field is a record
-      // with tail padding, then we won't put that number in our
-      // total because reordering fields won't fix that problem.
-      CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
-      auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
-      CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
-      PaddingSum += (FieldOffset - Offset);
-      Offset = FieldOffset + FieldSize;
-    }
-    PaddingSum += RL.getSize() - Offset;
-    return PaddingSum;
-  }
-
-  /// Optimal padding overview:
-  /// 1.  Find a close approximation to where we can place our first field.
-  ///     This will usually be at offset 0.
-  /// 2.  Try to find the best field that can legally be placed at the current
-  ///     offset.
-  ///   a.  "Best" is the largest alignment that is legal, but smallest size.
-  ///       This is to account for overly aligned types.
-  /// 3.  If no fields can fit, pad by rounding the current offset up to the
-  ///     smallest alignment requirement of our fields. Measure and track the
-  //      amount of padding added. Go back to 2.
-  /// 4.  Increment the current offset by the size of the chosen field.
-  /// 5.  Remove the chosen field from the set of future possibilities.
-  /// 6.  Go back to 2 if there are still unplaced fields.
-  /// 7.  Add tail padding by rounding the current offset up to the structure
-  ///     alignment. Track the amount of padding added.
-
-  static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
-  calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
-                      const ASTRecordLayout &RL) {
-    struct FieldInfo {
-      CharUnits Align;
-      CharUnits Size;
-      const FieldDecl *Field;
-      bool operator<(const FieldInfo &RHS) const {
-        // Order from small alignments to large alignments,
-        // then large sizes to small sizes.
-        // then large field indices to small field indices
-        return std::make_tuple(Align, -Size,
-                               Field ? -static_cast<int>(Field->getFieldIndex())
-                                     : 0) <
-               std::make_tuple(
-                   RHS.Align, -RHS.Size,
-                   RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
-                             : 0);
-      }
-    };
-    SmallVector<FieldInfo, 20> Fields;
-    auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
-      FieldInfo RetVal;
-      RetVal.Field = FD;
-      auto &Ctx = FD->getASTContext();
-      std::tie(RetVal.Size, RetVal.Align) =
-          Ctx.getTypeInfoInChars(FD->getType());
-      assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
-      if (auto Max = FD->getMaxAlignment())
-        RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
-      return RetVal;
-    };
-    std::transform(RD->field_begin(), RD->field_end(),
-                   std::back_inserter(Fields), GatherSizesAndAlignments);
-    std::sort(Fields.begin(), Fields.end());
-    // This lets us skip over vptrs and non-virtual bases,
-    // so that we can just worry about the fields in our object.
-    // Note that this does cause us to miss some cases where we
-    // could pack more bytes in to a base class's tail padding.
-    CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
-    CharUnits NewPad;
-    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
-    while (!Fields.empty()) {
-      unsigned TrailingZeros =
-          llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
-      // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
-      // 64 will overflow our unsigned long long. Shifting 63 will turn
-      // our long long (and CharUnits internal type) negative. So shift 62.
-      long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
-      CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
-      FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};
-      auto CurBegin = Fields.begin();
-      auto CurEnd = Fields.end();
-
-      // In the typical case, this will find the last element
-      // of the vector. We won't find a middle element unless
-      // we started on a poorly aligned address or have an overly
-      // aligned field.
-      auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint);
-      if (Iter != CurBegin) {
-        // We found a field that we can layout with the current alignment.
-        --Iter;
-        NewOffset += Iter->Size;
-        OptimalFieldsOrder.push_back(Iter->Field);
-        Fields.erase(Iter);
-      } else {
-        // We are poorly aligned, and we need to pad in order to layout another
-        // field. Round up to at least the smallest field alignment that we
-        // currently have.
-        CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
-        NewPad += NextOffset - NewOffset;
-        NewOffset = NextOffset;
-      }
-    }
-    // Calculate tail padding.
-    CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
-    NewPad += NewSize - NewOffset;
-    return {NewPad, std::move(OptimalFieldsOrder)};
-  }
-
-  void reportRecord(
-      const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
-      const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
-    if (!PaddingBug)
-      PaddingBug =
-          llvm::make_unique<BugType>(this, "Excessive Padding", "Performance");
-
-    SmallString<100> Buf;
-    llvm::raw_svector_ostream Os(Buf);
-    Os << "Excessive padding in '";
-    Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers()) << "'";
-
-    if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
-      // TODO: make this show up better in the console output and in
-      // the HTML. Maybe just make it show up in HTML like the path
-      // diagnostics show.
-      SourceLocation ILoc = TSD->getPointOfInstantiation();
-      if (ILoc.isValid())
-        Os << " instantiated here: "
-           << ILoc.printToString(BR->getSourceManager());
-    }
-
-    Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
-       << OptimalPad.getQuantity() << " is optimal). \n"
-       << "Optimal fields order: \n";
-    for (const auto *FD : OptimalFieldsOrder)
-      Os << FD->getName() << ", \n";
-    Os << "consider reordering the fields or adding explicit padding "
-          "members.";
-
-    PathDiagnosticLocation CELoc =
-        PathDiagnosticLocation::create(RD, BR->getSourceManager());
-    auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc);
-    Report->setDeclWithIssue(RD);
-    Report->addRange(RD->getSourceRange());
-    BR->emitReport(std::move(Report));
-  }
-};
-}
-
-void ento::registerPaddingChecker(CheckerManager &Mgr) {
-  Mgr.registerChecker<PaddingChecker>();
-}
+//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a checker that checks for padding that could be
+//  removed by re-ordering members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
+private:
+  mutable std::unique_ptr<BugType> PaddingBug;
+  mutable int64_t AllowedPad;
+  mutable BugReporter *BR;
+
+public:
+  void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
+                    BugReporter &BRArg) const {
+    BR = &BRArg;
+    AllowedPad =
+        MGR.getAnalyzerOptions().getOptionAsInteger("AllowedPad", 24, this);
+    assert(AllowedPad >= 0 && "AllowedPad option should be non-negative");
+
+    // The calls to checkAST* from AnalysisConsumer don't
+    // visit template instantiations or lambda classes. We
+    // want to visit those, so we make our own RecursiveASTVisitor.
+    struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
+      const PaddingChecker *Checker;
+      bool shouldVisitTemplateInstantiations() const { return true; }
+      bool shouldVisitImplicitCode() const { return true; }
+      explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
+      bool VisitRecordDecl(const RecordDecl *RD) {
+        Checker->visitRecord(RD);
+        return true;
+      }
+      bool VisitVarDecl(const VarDecl *VD) {
+        Checker->visitVariable(VD);
+        return true;
+      }
+      // TODO: Visit array new and mallocs for arrays.
+    };
+
+    LocalVisitor visitor(this);
+    visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
+  }
+
+  /// \brief Look for records of overly padded types. If padding *
+  /// PadMultiplier exceeds AllowedPad, then generate a report.
+  /// PadMultiplier is used to share code with the array padding
+  /// checker.
+  void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
+    if (shouldSkipDecl(RD))
+      return;
+
+    auto &ASTContext = RD->getASTContext();
+    const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
+    assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
+
+    CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
+    if (BaselinePad.isZero())
+      return;
+
+    CharUnits OptimalPad;
+    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
+    std::tie(OptimalPad, OptimalFieldsOrder) =
+        calculateOptimalPad(RD, ASTContext, RL);
+
+    CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
+    if (DiffPad.getQuantity() <= AllowedPad) {
+      assert(!DiffPad.isNegative() && "DiffPad should not be negative");
+      // There is not enough excess padding to trigger a warning.
+      return;
+    }
+    reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
+  }
+
+  /// \brief Look for arrays of overly padded types. If the padding of the
+  /// array type exceeds AllowedPad, then generate a report.
+  void visitVariable(const VarDecl *VD) const {
+    const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
+    if (ArrTy == nullptr)
+      return;
+    uint64_t Elts = 0;
+    if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
+      Elts = CArrTy->getSize().getZExtValue();
+    if (Elts == 0)
+      return;
+    const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
+    if (RT == nullptr)
+      return;
+
+    // TODO: Recurse into the fields and base classes to see if any
+    // of those have excess padding.
+    visitRecord(RT->getDecl(), Elts);
+  }
+
+  bool shouldSkipDecl(const RecordDecl *RD) const {
+    auto Location = RD->getLocation();
+    // If the construct doesn't have a source file, then it's not something
+    // we want to diagnose.
+    if (!Location.isValid())
+      return true;
+    SrcMgr::CharacteristicKind Kind =
+        BR->getSourceManager().getFileCharacteristic(Location);
+    // Throw out all records that come from system headers.
+    if (Kind != SrcMgr::C_User)
+      return true;
+
+    // Not going to attempt to optimize unions.
+    if (RD->isUnion())
+      return true;
+    // How do you reorder fields if you haven't got any?
+    if (RD->field_empty())
+      return true;
+    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+      // Tail padding with base classes ends up being very complicated.
+      // We will skip objects with base classes for now.
+      if (CXXRD->getNumBases() != 0)
+        return true;
+      // Virtual bases are complicated, skipping those for now.
+      if (CXXRD->getNumVBases() != 0)
+        return true;
+      // Can't layout a template, so skip it. We do still layout the
+      // instantiations though.
+      if (CXXRD->getTypeForDecl()->isDependentType())
+        return true;
+      if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
+        return true;
+    }
+    auto IsTrickyField = [](const FieldDecl *FD) -> bool {
+      // Bitfield layout is hard.
+      if (FD->isBitField())
+        return true;
+
+      // Variable length arrays are tricky too.
+      QualType Ty = FD->getType();
+      if (Ty->isIncompleteArrayType())
+        return true;
+      return false;
+    };
+
+    if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
+      return true;
+    return false;
+  }
+
+  static CharUnits calculateBaselinePad(const RecordDecl *RD,
+                                        const ASTContext &ASTContext,
+                                        const ASTRecordLayout &RL) {
+    CharUnits PaddingSum;
+    CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
+    for (const FieldDecl *FD : RD->fields()) {
+      // This checker only cares about the padded size of the
+      // field, and not the data size. If the field is a record
+      // with tail padding, then we won't put that number in our
+      // total because reordering fields won't fix that problem.
+      CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
+      auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
+      CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
+      PaddingSum += (FieldOffset - Offset);
+      Offset = FieldOffset + FieldSize;
+    }
+    PaddingSum += RL.getSize() - Offset;
+    return PaddingSum;
+  }
+
+  /// Optimal padding overview:
+  /// 1.  Find a close approximation to where we can place our first field.
+  ///     This will usually be at offset 0.
+  /// 2.  Try to find the best field that can legally be placed at the current
+  ///     offset.
+  ///   a.  "Best" is the largest alignment that is legal, but smallest size.
+  ///       This is to account for overly aligned types.
+  /// 3.  If no fields can fit, pad by rounding the current offset up to the
+  ///     smallest alignment requirement of our fields. Measure and track the
+  //      amount of padding added. Go back to 2.
+  /// 4.  Increment the current offset by the size of the chosen field.
+  /// 5.  Remove the chosen field from the set of future possibilities.
+  /// 6.  Go back to 2 if there are still unplaced fields.
+  /// 7.  Add tail padding by rounding the current offset up to the structure
+  ///     alignment. Track the amount of padding added.
+
+  static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
+  calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
+                      const ASTRecordLayout &RL) {
+    struct FieldInfo {
+      CharUnits Align;
+      CharUnits Size;
+      const FieldDecl *Field;
+      bool operator<(const FieldInfo &RHS) const {
+        // Order from small alignments to large alignments,
+        // then large sizes to small sizes.
+        // then large field indices to small field indices
+        return std::make_tuple(Align, -Size,
+                               Field ? -static_cast<int>(Field->getFieldIndex())
+                                     : 0) <
+               std::make_tuple(
+                   RHS.Align, -RHS.Size,
+                   RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
+                             : 0);
+      }
+    };
+    SmallVector<FieldInfo, 20> Fields;
+    auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
+      FieldInfo RetVal;
+      RetVal.Field = FD;
+      auto &Ctx = FD->getASTContext();
+      std::tie(RetVal.Size, RetVal.Align) =
+          Ctx.getTypeInfoInChars(FD->getType());
+      assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
+      if (auto Max = FD->getMaxAlignment())
+        RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
+      return RetVal;
+    };
+    std::transform(RD->field_begin(), RD->field_end(),
+                   std::back_inserter(Fields), GatherSizesAndAlignments);
+    std::sort(Fields.begin(), Fields.end());
+    // This lets us skip over vptrs and non-virtual bases,
+    // so that we can just worry about the fields in our object.
+    // Note that this does cause us to miss some cases where we
+    // could pack more bytes in to a base class's tail padding.
+    CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
+    CharUnits NewPad;
+    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
+    while (!Fields.empty()) {
+      unsigned TrailingZeros =
+          llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
+      // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
+      // 64 will overflow our unsigned long long. Shifting 63 will turn
+      // our long long (and CharUnits internal type) negative. So shift 62.
+      long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
+      CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
+      FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};
+      auto CurBegin = Fields.begin();
+      auto CurEnd = Fields.end();
+
+      // In the typical case, this will find the last element
+      // of the vector. We won't find a middle element unless
+      // we started on a poorly aligned address or have an overly
+      // aligned field.
+      auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint);
+      if (Iter != CurBegin) {
+        // We found a field that we can layout with the current alignment.
+        --Iter;
+        NewOffset += Iter->Size;
+        OptimalFieldsOrder.push_back(Iter->Field);
+        Fields.erase(Iter);
+      } else {
+        // We are poorly aligned, and we need to pad in order to layout another
+        // field. Round up to at least the smallest field alignment that we
+        // currently have.
+        CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
+        NewPad += NextOffset - NewOffset;
+        NewOffset = NextOffset;
+      }
+    }
+    // Calculate tail padding.
+    CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
+    NewPad += NewSize - NewOffset;
+    return {NewPad, std::move(OptimalFieldsOrder)};
+  }
+
+  void reportRecord(
+      const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
+      const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
+    if (!PaddingBug)
+      PaddingBug =
+          llvm::make_unique<BugType>(this, "Excessive Padding", "Performance");
+
+    SmallString<100> Buf;
+    llvm::raw_svector_ostream Os(Buf);
+    Os << "Excessive padding in '";
+    Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(),
+                                LangOptions())
+       << "'";
+
+    if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+      // TODO: make this show up better in the console output and in
+      // the HTML. Maybe just make it show up in HTML like the path
+      // diagnostics show.
+      SourceLocation ILoc = TSD->getPointOfInstantiation();
+      if (ILoc.isValid())
+        Os << " instantiated here: "
+           << ILoc.printToString(BR->getSourceManager());
+    }
+
+    Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
+       << OptimalPad.getQuantity() << " is optimal). \n"
+       << "Optimal fields order: \n";
+    for (const auto *FD : OptimalFieldsOrder)
+      Os << FD->getName() << ", \n";
+    Os << "consider reordering the fields or adding explicit padding "
+          "members.";
+
+    PathDiagnosticLocation CELoc =
+        PathDiagnosticLocation::create(RD, BR->getSourceManager());
+    auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc);
+    Report->setDeclWithIssue(RD);
+    Report->addRange(RD->getSourceRange());
+    BR->emitReport(std::move(Report));
+  }
+};
+}
+
+void ento::registerPaddingChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker<PaddingChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
index 8caf6df..4c4a8e3 100644
--- a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
@@ -154,8 +154,7 @@
     return;
 
   ProgramStateRef State = C.getState();
-  const MemRegion *Region =
-      State->getSVal(E, C.getLocationContext()).getAsRegion();
+  const MemRegion *Region = C.getSVal(E).getAsRegion();
   if (!Region)
     return;
   if (PointedNeeded)
@@ -227,7 +226,7 @@
   if (AllocFunctions.count(FunI) == 0)
     return;
 
-  SVal SV = State->getSVal(CE, C.getLocationContext());
+  SVal SV = C.getSVal(CE);
   const MemRegion *Region = SV.getAsRegion();
   if (!Region)
     return;
@@ -248,7 +247,7 @@
   AllocKind Kind = getKindOfNewOp(NE, FD);
 
   ProgramStateRef State = C.getState();
-  SVal AllocedVal = State->getSVal(NE, C.getLocationContext());
+  SVal AllocedVal = C.getSVal(NE);
   const MemRegion *Region = AllocedVal.getAsRegion();
   if (!Region)
     return;
@@ -263,7 +262,7 @@
 
   const Expr *CastedExpr = CE->getSubExpr();
   ProgramStateRef State = C.getState();
-  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+  SVal CastedVal = C.getSVal(CastedExpr);
 
   const MemRegion *Region = CastedVal.getAsRegion();
   if (!Region)
@@ -281,7 +280,7 @@
 
   const Expr *CastedExpr = CE->getSubExpr();
   ProgramStateRef State = C.getState();
-  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+  SVal CastedVal = C.getSVal(CastedExpr);
 
   const MemRegion *Region = CastedVal.getAsRegion();
   if (!Region)
@@ -304,8 +303,7 @@
 
 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
                                        CheckerContext &C) const {
-  ProgramStateRef State = C.getState();
-  SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext());
+  SVal Idx = C.getSVal(SubsExpr->getIdx());
 
   // Indexing with 0 is OK.
   if (Idx.isZeroConstant())
@@ -324,14 +322,14 @@
   ProgramStateRef State = C.getState();
 
   if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
-    SVal RHSVal = State->getSVal(Rhs, C.getLocationContext());
+    SVal RHSVal = C.getSVal(Rhs);
     if (State->isNull(RHSVal).isConstrainedTrue())
       return;
     reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
   }
   // The int += ptr; case is not valid C++.
   if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
-    SVal LHSVal = State->getSVal(Lhs, C.getLocationContext());
+    SVal LHSVal = C.getSVal(Lhs);
     if (State->isNull(LHSVal).isConstrainedTrue())
       return;
     reportPointerArithMisuse(Rhs, C);
diff --git a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
index 2d33ebc..9aa5348 100644
--- a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
@@ -39,10 +39,8 @@
   if (B->getOpcode() != BO_Sub)
     return;
 
-  ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal LV = state->getSVal(B->getLHS(), LCtx);
-  SVal RV = state->getSVal(B->getRHS(), LCtx);
+  SVal LV = C.getSVal(B->getLHS());
+  SVal RV = C.getSVal(B->getRHS());
 
   const MemRegion *LR = LV.getAsRegion();
   const MemRegion *RR = RV.getAsRegion();
diff --git a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
index dab29be..10ab952 100644
--- a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
@@ -109,8 +109,6 @@
 
 void PthreadLockChecker::checkPostStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
-  ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
   StringRef FName = C.getCalleeName(CE);
   if (FName.empty())
     return;
@@ -121,34 +119,31 @@
   if (FName == "pthread_mutex_lock" ||
       FName == "pthread_rwlock_rdlock" ||
       FName == "pthread_rwlock_wrlock")
-    AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx),
-                false, PthreadSemantics);
+    AcquireLock(C, CE, C.getSVal(CE->getArg(0)), false, PthreadSemantics);
   else if (FName == "lck_mtx_lock" ||
            FName == "lck_rw_lock_exclusive" ||
            FName == "lck_rw_lock_shared")
-    AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx),
-                false, XNUSemantics);
+    AcquireLock(C, CE, C.getSVal(CE->getArg(0)), false, XNUSemantics);
   else if (FName == "pthread_mutex_trylock" ||
            FName == "pthread_rwlock_tryrdlock" ||
            FName == "pthread_rwlock_trywrlock")
-    AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx),
+    AcquireLock(C, CE, C.getSVal(CE->getArg(0)),
                 true, PthreadSemantics);
   else if (FName == "lck_mtx_try_lock" ||
            FName == "lck_rw_try_lock_exclusive" ||
            FName == "lck_rw_try_lock_shared")
-    AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx),
-                true, XNUSemantics);
+    AcquireLock(C, CE, C.getSVal(CE->getArg(0)), true, XNUSemantics);
   else if (FName == "pthread_mutex_unlock" ||
            FName == "pthread_rwlock_unlock" ||
            FName == "lck_mtx_unlock" ||
            FName == "lck_rw_done")
-    ReleaseLock(C, CE, state->getSVal(CE->getArg(0), LCtx));
+    ReleaseLock(C, CE, C.getSVal(CE->getArg(0)));
   else if (FName == "pthread_mutex_destroy")
-    DestroyLock(C, CE, state->getSVal(CE->getArg(0), LCtx), PthreadSemantics);
+    DestroyLock(C, CE, C.getSVal(CE->getArg(0)), PthreadSemantics);
   else if (FName == "lck_mtx_destroy")
-    DestroyLock(C, CE, state->getSVal(CE->getArg(0), LCtx), XNUSemantics);
+    DestroyLock(C, CE, C.getSVal(CE->getArg(0)), XNUSemantics);
   else if (FName == "pthread_mutex_init")
-    InitLock(C, CE, state->getSVal(CE->getArg(0), LCtx));
+    InitLock(C, CE, C.getSVal(CE->getArg(0)));
 }
 
 // When a lock is destroyed, in some semantics(like PthreadSemantics) we are not
@@ -232,7 +227,7 @@
   if (sym)
     state = resolvePossiblyDestroyedMutex(state, lockR, sym);
 
-  SVal X = state->getSVal(CE, C.getLocationContext());
+  SVal X = C.getSVal(CE);
   if (X.isUnknownOrUndef())
     return;
 
diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 4db83af..bf5945c 100644
--- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -1201,10 +1201,10 @@
         break;
       }
 
-      // For the Disk Arbitration API (DiskArbitration/DADisk.h)
-      if (cocoa::isRefType(RetTy, "DADisk") ||
-          cocoa::isRefType(RetTy, "DADissenter") ||
-          cocoa::isRefType(RetTy, "DASessionRef")) {
+      // For all other CF-style types, use the Create/Get
+      // rule for summaries but don't support Retain functions
+      // with framework-specific prefixes.
+      if (coreFoundation::isCFObjectRef(RetTy)) {
         S = getCFCreateGetRuleSummary(FD);
         break;
       }
@@ -1929,6 +1929,12 @@
          isa<CXXBoolLiteralExpr>(E);
 }
 
+static std::string describeRegion(const MemRegion *MR) {
+  // Once we support more storage locations for bindings,
+  // this would need to be improved.
+  return cast<VarRegion>(MR)->getDecl()->getName();
+}
+
 /// Returns true if this stack frame is for an Objective-C method that is a
 /// property getter or setter whose body has been synthesized by the analyzer.
 static bool isSynthesizedAccessor(const StackFrameContext *SFC) {
@@ -2395,7 +2401,7 @@
 
   if (FirstBinding) {
     os << "object allocated and stored into '"
-       << FirstBinding->getString() << '\'';
+       << describeRegion(FirstBinding) << '\'';
   }
   else
     os << "allocated object";
@@ -2523,7 +2529,7 @@
   os << "of an object";
 
   if (AllocBinding) {
-    os << " stored into '" << AllocBinding->getString() << '\'';
+    os << " stored into '" << describeRegion(AllocBinding) << '\'';
     if (IncludeAllocationLine) {
       FullSourceLoc SL(AllocStmt->getLocStart(), Ctx.getSourceManager());
       os << " (allocated on line " << SL.getSpellingLineNumber() << ")";
@@ -2799,9 +2805,7 @@
     return;
 
   ProgramStateRef state = C.getState();
-  const BlockDataRegion *R =
-    cast<BlockDataRegion>(state->getSVal(BE,
-                                         C.getLocationContext()).getAsRegion());
+  auto *R = cast<BlockDataRegion>(C.getSVal(BE).getAsRegion());
 
   BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(),
                                             E = R->referenced_vars_end();
@@ -2851,7 +2855,7 @@
   }
 
   ProgramStateRef state = C.getState();
-  SymbolRef Sym = state->getSVal(CE, C.getLocationContext()).getAsLocSymbol();
+  SymbolRef Sym = C.getSVal(CE).getAsLocSymbol();
   if (!Sym)
     return;
   const RefVal* T = getRefBinding(state, Sym);
@@ -2874,7 +2878,7 @@
   ProgramStateRef state = C.getState();
   const ExplodedNode *pred = C.getPredecessor();
   for (const Stmt *Child : Ex->children()) {
-    SVal V = state->getSVal(Child, pred->getLocationContext());
+    SVal V = pred->getSVal(Child);
     if (SymbolRef sym = V.getAsSymbol())
       if (const RefVal* T = getRefBinding(state, sym)) {
         RefVal::Kind hasErr = (RefVal::Kind) 0;
@@ -2913,10 +2917,9 @@
 void RetainCountChecker::checkPostStmt(const ObjCBoxedExpr *Ex,
                                        CheckerContext &C) const {
   const ExplodedNode *Pred = C.getPredecessor();
-  const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
 
-  if (SymbolRef Sym = State->getSVal(Ex, LCtx).getAsSymbol()) {
+  if (SymbolRef Sym = Pred->getSVal(Ex).getAsSymbol()) {
     QualType ResultTy = Ex->getType();
     State = setRefBinding(State, Sym,
                           RefVal::makeNotOwned(RetEffect::ObjC, ResultTy));
diff --git a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
index 19fa0fb..1952715 100644
--- a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
@@ -40,7 +40,7 @@
   if (!RetE)
     return;
 
-  SVal V = state->getSVal(RetE, C.getLocationContext());
+  SVal V = C.getSVal(RetE);
   const MemRegion *R = V.getAsRegion();
 
   const ElementRegion *ER = dyn_cast_or_null<ElementRegion>(R);
diff --git a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index 556274d..fb639ad 100644
--- a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -18,6 +18,7 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "llvm/ADT/SmallString.h"
@@ -26,85 +27,139 @@
 using namespace ento;
 
 namespace {
-class StackAddrEscapeChecker : public Checker< check::PreStmt<ReturnStmt>,
-                                               check::EndFunction > {
+class StackAddrEscapeChecker
+    : public Checker<check::PreCall, check::PreStmt<ReturnStmt>,
+                     check::EndFunction> {
+  mutable IdentifierInfo *dispatch_semaphore_tII;
   mutable std::unique_ptr<BuiltinBug> BT_stackleak;
   mutable std::unique_ptr<BuiltinBug> BT_returnstack;
+  mutable std::unique_ptr<BuiltinBug> BT_capturedstackasync;
+  mutable std::unique_ptr<BuiltinBug> BT_capturedstackret;
 
 public:
+  enum CheckKind {
+    CK_StackAddrEscapeChecker,
+    CK_StackAddrAsyncEscapeChecker,
+    CK_NumCheckKinds
+  };
+
+  DefaultBool ChecksEnabled[CK_NumCheckKinds];
+
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const;
   void checkEndFunction(CheckerContext &Ctx) const;
+
 private:
+  void checkReturnedBlockCaptures(const BlockDataRegion &B,
+                                  CheckerContext &C) const;
+  void checkAsyncExecutedBlockCaptures(const BlockDataRegion &B,
+                                       CheckerContext &C) const;
   void EmitStackError(CheckerContext &C, const MemRegion *R,
                       const Expr *RetE) const;
+  bool isSemaphoreCaptured(const BlockDecl &B) const;
   static SourceRange genName(raw_ostream &os, const MemRegion *R,
                              ASTContext &Ctx);
+  static SmallVector<const MemRegion *, 4>
+  getCapturedStackRegions(const BlockDataRegion &B, CheckerContext &C);
+  static bool isArcManagedBlock(const MemRegion *R, CheckerContext &C);
+  static bool isNotInCurrentFrame(const MemRegion *R, CheckerContext &C);
 };
-}
+} // namespace
 
 SourceRange StackAddrEscapeChecker::genName(raw_ostream &os, const MemRegion *R,
                                             ASTContext &Ctx) {
-    // Get the base region, stripping away fields and elements.
+  // Get the base region, stripping away fields and elements.
   R = R->getBaseRegion();
   SourceManager &SM = Ctx.getSourceManager();
   SourceRange range;
   os << "Address of ";
 
   // Check if the region is a compound literal.
-  if (const CompoundLiteralRegion* CR = dyn_cast<CompoundLiteralRegion>(R)) {
+  if (const auto *CR = dyn_cast<CompoundLiteralRegion>(R)) {
     const CompoundLiteralExpr *CL = CR->getLiteralExpr();
     os << "stack memory associated with a compound literal "
           "declared on line "
-        << SM.getExpansionLineNumber(CL->getLocStart())
-        << " returned to caller";
+       << SM.getExpansionLineNumber(CL->getLocStart()) << " returned to caller";
     range = CL->getSourceRange();
-  }
-  else if (const AllocaRegion* AR = dyn_cast<AllocaRegion>(R)) {
+  } else if (const auto *AR = dyn_cast<AllocaRegion>(R)) {
     const Expr *ARE = AR->getExpr();
     SourceLocation L = ARE->getLocStart();
     range = ARE->getSourceRange();
     os << "stack memory allocated by call to alloca() on line "
        << SM.getExpansionLineNumber(L);
-  }
-  else if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) {
+  } else if (const auto *BR = dyn_cast<BlockDataRegion>(R)) {
     const BlockDecl *BD = BR->getCodeRegion()->getDecl();
     SourceLocation L = BD->getLocStart();
     range = BD->getSourceRange();
     os << "stack-allocated block declared on line "
        << SM.getExpansionLineNumber(L);
-  }
-  else if (const VarRegion *VR = dyn_cast<VarRegion>(R)) {
-    os << "stack memory associated with local variable '"
-       << VR->getString() << '\'';
+  } else if (const auto *VR = dyn_cast<VarRegion>(R)) {
+    os << "stack memory associated with local variable '" << VR->getString()
+       << '\'';
     range = VR->getDecl()->getSourceRange();
-  }
-  else if (const CXXTempObjectRegion *TOR = dyn_cast<CXXTempObjectRegion>(R)) {
+  } else if (const auto *TOR = dyn_cast<CXXTempObjectRegion>(R)) {
     QualType Ty = TOR->getValueType().getLocalUnqualifiedType();
     os << "stack memory associated with temporary object of type '";
     Ty.print(os, Ctx.getPrintingPolicy());
     os << "'";
     range = TOR->getExpr()->getSourceRange();
-  }
-  else {
+  } else {
     llvm_unreachable("Invalid region in ReturnStackAddressChecker.");
   }
 
   return range;
 }
 
-void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, const MemRegion *R,
-                                          const Expr *RetE) const {
-  ExplodedNode *N = C.generateErrorNode();
+bool StackAddrEscapeChecker::isArcManagedBlock(const MemRegion *R,
+                                               CheckerContext &C) {
+  assert(R && "MemRegion should not be null");
+  return C.getASTContext().getLangOpts().ObjCAutoRefCount &&
+         isa<BlockDataRegion>(R);
+}
 
+bool StackAddrEscapeChecker::isNotInCurrentFrame(const MemRegion *R,
+                                                 CheckerContext &C) {
+  const StackSpaceRegion *S = cast<StackSpaceRegion>(R->getMemorySpace());
+  return S->getStackFrame() != C.getLocationContext()->getCurrentStackFrame();
+}
+
+bool StackAddrEscapeChecker::isSemaphoreCaptured(const BlockDecl &B) const {
+  if (!dispatch_semaphore_tII)
+    dispatch_semaphore_tII = &B.getASTContext().Idents.get("dispatch_semaphore_t");
+  for (const auto &C : B.captures()) {
+    const auto *T = C.getVariable()->getType()->getAs<TypedefType>();
+    if (T && T->getDecl()->getIdentifier() == dispatch_semaphore_tII)
+      return true; 
+  }
+  return false;
+}
+
+SmallVector<const MemRegion *, 4>
+StackAddrEscapeChecker::getCapturedStackRegions(const BlockDataRegion &B,
+                                                CheckerContext &C) {
+  SmallVector<const MemRegion *, 4> Regions;
+  BlockDataRegion::referenced_vars_iterator I = B.referenced_vars_begin();
+  BlockDataRegion::referenced_vars_iterator E = B.referenced_vars_end();
+  for (; I != E; ++I) {
+    SVal Val = C.getState()->getSVal(I.getCapturedRegion());
+    const MemRegion *Region = Val.getAsRegion();
+    if (Region && isa<StackSpaceRegion>(Region->getMemorySpace()))
+      Regions.push_back(Region);
+  }
+  return Regions;
+}
+
+void StackAddrEscapeChecker::EmitStackError(CheckerContext &C,
+                                            const MemRegion *R,
+                                            const Expr *RetE) const {
+  ExplodedNode *N = C.generateNonFatalErrorNode();
   if (!N)
     return;
-
   if (!BT_returnstack)
-    BT_returnstack.reset(
-        new BuiltinBug(this, "Return of address to stack-allocated memory"));
-
+    BT_returnstack = llvm::make_unique<BuiltinBug>(
+        this, "Return of address to stack-allocated memory");
   // Generate a report for this bug.
-  SmallString<512> buf;
+  SmallString<128> buf;
   llvm::raw_svector_ostream os(buf);
   SourceRange range = genName(os, R, C.getASTContext());
   os << " returned to caller";
@@ -112,40 +167,104 @@
   report->addRange(RetE->getSourceRange());
   if (range.isValid())
     report->addRange(range);
-
   C.emitReport(std::move(report));
 }
 
+void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
+    const BlockDataRegion &B, CheckerContext &C) const {
+  // There is a not-too-uncommon idiom
+  // where a block passed to dispatch_async captures a semaphore
+  // and then the thread (which called dispatch_async) is blocked on waiting
+  // for the completion of the execution of the block 
+  // via dispatch_semaphore_wait. To avoid false-positives (for now) 
+  // we ignore all the blocks which have captured 
+  // a variable of the type "dispatch_semaphore_t".
+  if (isSemaphoreCaptured(*B.getDecl()))
+    return;
+  for (const MemRegion *Region : getCapturedStackRegions(B, C)) {
+    // The block passed to dispatch_async may capture another block
+    // created on the stack. However, there is no leak in this situaton,
+    // no matter if ARC or no ARC is enabled:
+    // dispatch_async copies the passed "outer" block (via Block_copy)
+    // and if the block has captured another "inner" block,
+    // the "inner" block will be copied as well.
+    if (isa<BlockDataRegion>(Region))
+      continue;
+    ExplodedNode *N = C.generateNonFatalErrorNode();
+    if (!N)
+      continue;
+    if (!BT_capturedstackasync)
+      BT_capturedstackasync = llvm::make_unique<BuiltinBug>(
+          this, "Address of stack-allocated memory is captured");
+    SmallString<128> Buf;
+    llvm::raw_svector_ostream Out(Buf);
+    SourceRange Range = genName(Out, Region, C.getASTContext());
+    Out << " is captured by an asynchronously-executed block";
+    auto Report =
+        llvm::make_unique<BugReport>(*BT_capturedstackasync, Out.str(), N);
+    if (Range.isValid())
+      Report->addRange(Range);
+    C.emitReport(std::move(Report));
+  }
+}
+
+void StackAddrEscapeChecker::checkReturnedBlockCaptures(
+    const BlockDataRegion &B, CheckerContext &C) const {
+  for (const MemRegion *Region : getCapturedStackRegions(B, C)) {
+    if (isArcManagedBlock(Region, C) || isNotInCurrentFrame(Region, C))
+      continue;
+    ExplodedNode *N = C.generateNonFatalErrorNode();
+    if (!N)
+      continue;
+    if (!BT_capturedstackret)
+      BT_capturedstackret = llvm::make_unique<BuiltinBug>(
+          this, "Address of stack-allocated memory is captured");
+    SmallString<128> Buf;
+    llvm::raw_svector_ostream Out(Buf);
+    SourceRange Range = genName(Out, Region, C.getASTContext());
+    Out << " is captured by a returned block";
+    auto Report =
+        llvm::make_unique<BugReport>(*BT_capturedstackret, Out.str(), N);
+    if (Range.isValid())
+      Report->addRange(Range);
+    C.emitReport(std::move(Report));
+  }
+}
+
+void StackAddrEscapeChecker::checkPreCall(const CallEvent &Call,
+                                          CheckerContext &C) const {
+  if (!ChecksEnabled[CK_StackAddrAsyncEscapeChecker])
+    return;
+  if (!Call.isGlobalCFunction("dispatch_after") &&
+      !Call.isGlobalCFunction("dispatch_async"))
+    return;
+  for (unsigned Idx = 0, NumArgs = Call.getNumArgs(); Idx < NumArgs; ++Idx) {
+    if (const BlockDataRegion *B = dyn_cast_or_null<BlockDataRegion>(
+            Call.getArgSVal(Idx).getAsRegion()))
+      checkAsyncExecutedBlockCaptures(*B, C);
+  }
+}
+
 void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
                                           CheckerContext &C) const {
+  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+    return;
 
   const Expr *RetE = RS->getRetValue();
   if (!RetE)
     return;
   RetE = RetE->IgnoreParens();
 
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal V = C.getState()->getSVal(RetE, LCtx);
+  SVal V = C.getSVal(RetE);
   const MemRegion *R = V.getAsRegion();
-
   if (!R)
     return;
 
-  const StackSpaceRegion *SS =
-    dyn_cast_or_null<StackSpaceRegion>(R->getMemorySpace());
+  if (const BlockDataRegion *B = dyn_cast<BlockDataRegion>(R))
+    checkReturnedBlockCaptures(*B, C);
 
-  if (!SS)
-    return;
-
-  // Return stack memory in an ancestor stack frame is fine.
-  const StackFrameContext *CurFrame = LCtx->getCurrentStackFrame();
-  const StackFrameContext *MemFrame = SS->getStackFrame();
-  if (MemFrame != CurFrame)
-    return;
-
-  // Automatic reference counting automatically copies blocks.
-  if (C.getASTContext().getLangOpts().ObjCAutoRefCount &&
-      isa<BlockDataRegion>(R))
+  if (!isa<StackSpaceRegion>(R->getMemorySpace()) || 
+      isNotInCurrentFrame(R, C) || isArcManagedBlock(R, C))
     return;
 
   // Returning a record by value is fine. (In this case, the returned
@@ -169,7 +288,10 @@
 }
 
 void StackAddrEscapeChecker::checkEndFunction(CheckerContext &Ctx) const {
-  ProgramStateRef state = Ctx.getState();
+  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+    return;
+
+  ProgramStateRef State = Ctx.getState();
 
   // Iterate over all bindings to global variables and see if it contains
   // a memory region in the stack space.
@@ -177,82 +299,73 @@
   private:
     CheckerContext &Ctx;
     const StackFrameContext *CurSFC;
+
   public:
-    SmallVector<std::pair<const MemRegion*, const MemRegion*>, 10> V;
+    SmallVector<std::pair<const MemRegion *, const MemRegion *>, 10> V;
 
-    CallBack(CheckerContext &CC) :
-      Ctx(CC),
-      CurSFC(CC.getLocationContext()->getCurrentStackFrame())
-    {}
+    CallBack(CheckerContext &CC)
+        : Ctx(CC), CurSFC(CC.getLocationContext()->getCurrentStackFrame()) {}
 
-    bool HandleBinding(StoreManager &SMgr, Store store,
-                       const MemRegion *region, SVal val) override {
+    bool HandleBinding(StoreManager &SMgr, Store S, const MemRegion *Region,
+                       SVal Val) override {
 
-      if (!isa<GlobalsSpaceRegion>(region->getMemorySpace()))
+      if (!isa<GlobalsSpaceRegion>(Region->getMemorySpace()))
         return true;
-
-      const MemRegion *vR = val.getAsRegion();
-      if (!vR)
-        return true;
-
-      // Under automated retain release, it is okay to assign a block
-      // directly to a global variable.
-      if (Ctx.getASTContext().getLangOpts().ObjCAutoRefCount &&
-          isa<BlockDataRegion>(vR))
-        return true;
-
-      if (const StackSpaceRegion *SSR =
-          dyn_cast<StackSpaceRegion>(vR->getMemorySpace())) {
-        // If the global variable holds a location in the current stack frame,
-        // record the binding to emit a warning.
-        if (SSR->getStackFrame() == CurSFC)
-          V.push_back(std::make_pair(region, vR));
-      }
-
+      const MemRegion *VR = Val.getAsRegion();
+      if (VR && isa<StackSpaceRegion>(VR->getMemorySpace()) &&
+          !isArcManagedBlock(VR, Ctx) && !isNotInCurrentFrame(VR, Ctx))
+        V.emplace_back(Region, VR);
       return true;
     }
   };
 
-  CallBack cb(Ctx);
-  state->getStateManager().getStoreManager().iterBindings(state->getStore(),cb);
+  CallBack Cb(Ctx);
+  State->getStateManager().getStoreManager().iterBindings(State->getStore(),
+                                                          Cb);
 
-  if (cb.V.empty())
+  if (Cb.V.empty())
     return;
 
   // Generate an error node.
-  ExplodedNode *N = Ctx.generateNonFatalErrorNode(state);
+  ExplodedNode *N = Ctx.generateNonFatalErrorNode(State);
   if (!N)
     return;
 
   if (!BT_stackleak)
-    BT_stackleak.reset(
-        new BuiltinBug(this, "Stack address stored into global variable",
-                       "Stack address was saved into a global variable. "
-                       "This is dangerous because the address will become "
-                       "invalid after returning from the function"));
+    BT_stackleak = llvm::make_unique<BuiltinBug>(
+        this, "Stack address stored into global variable",
+        "Stack address was saved into a global variable. "
+        "This is dangerous because the address will become "
+        "invalid after returning from the function");
 
-  for (unsigned i = 0, e = cb.V.size(); i != e; ++i) {
+  for (const auto &P : Cb.V) {
     // Generate a report for this bug.
-    SmallString<512> buf;
-    llvm::raw_svector_ostream os(buf);
-    SourceRange range = genName(os, cb.V[i].second, Ctx.getASTContext());
-    os << " is still referred to by the ";
-    if (isa<StaticGlobalSpaceRegion>(cb.V[i].first->getMemorySpace()))
-      os << "static";
+    SmallString<128> Buf;
+    llvm::raw_svector_ostream Out(Buf);
+    SourceRange Range = genName(Out, P.second, Ctx.getASTContext());
+    Out << " is still referred to by the ";
+    if (isa<StaticGlobalSpaceRegion>(P.first->getMemorySpace()))
+      Out << "static";
     else
-      os << "global";
-    os << " variable '";
-    const VarRegion *VR = cast<VarRegion>(cb.V[i].first->getBaseRegion());
-    os << *VR->getDecl()
-       << "' upon returning to the caller.  This will be a dangling reference";
-    auto report = llvm::make_unique<BugReport>(*BT_stackleak, os.str(), N);
-    if (range.isValid())
-      report->addRange(range);
+      Out << "global";
+    Out << " variable '";
+    const VarRegion *VR = cast<VarRegion>(P.first->getBaseRegion());
+    Out << *VR->getDecl()
+        << "' upon returning to the caller.  This will be a dangling reference";
+    auto Report = llvm::make_unique<BugReport>(*BT_stackleak, Out.str(), N);
+    if (Range.isValid())
+      Report->addRange(Range);
 
-    Ctx.emitReport(std::move(report));
+    Ctx.emitReport(std::move(Report));
   }
 }
 
-void ento::registerStackAddrEscapeChecker(CheckerManager &mgr) {
-  mgr.registerChecker<StackAddrEscapeChecker>();
-}
+#define REGISTER_CHECKER(name) \
+  void ento::register##name(CheckerManager &Mgr) { \
+    StackAddrEscapeChecker *Chk = \
+        Mgr.registerChecker<StackAddrEscapeChecker>(); \
+    Chk->ChecksEnabled[StackAddrEscapeChecker::CK_##name] = true; \
+  }
+
+REGISTER_CHECKER(StackAddrEscapeChecker)
+REGISTER_CHECKER(StackAddrAsyncEscapeChecker)
diff --git a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 915514b..d779755 100644
--- a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -242,22 +242,19 @@
 
 void StreamChecker::Fread(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(3), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(3)), state, C))
     return;
 }
 
 void StreamChecker::Fwrite(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(3), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(3)), state, C))
     return;
 }
 
 void StreamChecker::Fseek(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!(state = CheckNullStream(state->getSVal(CE->getArg(0),
-                                               C.getLocationContext()), state, C)))
+  if (!(state = CheckNullStream(C.getSVal(CE->getArg(0)), state, C)))
     return;
   // Check the legality of the 'whence' argument of 'fseek'.
   SVal Whence = state->getSVal(CE->getArg(2), C.getLocationContext());
@@ -283,57 +280,49 @@
 
 void StreamChecker::Ftell(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Rewind(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Fgetpos(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Fsetpos(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Clearerr(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Feof(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Ferror(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
 void StreamChecker::Fileno(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
-  if (!CheckNullStream(state->getSVal(CE->getArg(0), C.getLocationContext()),
-                       state, C))
+  if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C))
     return;
 }
 
@@ -363,8 +352,7 @@
 ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE,
                                                ProgramStateRef state,
                                                CheckerContext &C) const {
-  SymbolRef Sym =
-    state->getSVal(CE->getArg(0), C.getLocationContext()).getAsSymbol();
+  SymbolRef Sym = C.getSVal(CE->getArg(0)).getAsSymbol();
   if (!Sym)
     return state;
 
diff --git a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
index 5268bbf..b83dea7 100644
--- a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
@@ -114,8 +114,7 @@
   if (!E)
     return nullptr;
 
-  ProgramStateRef State = Succ->getState();
-  SVal S = State->getSVal(E, Succ->getLocationContext());
+  SVal S = Succ->getSVal(E);
   if (ZeroSymbol == S.getAsSymbol() && SFC == Succ->getStackFrame()) {
     Satisfied = true;
 
diff --git a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
index 0a27429..934ee63 100644
--- a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
@@ -59,7 +59,7 @@
 
 void UndefBranchChecker::checkBranchCondition(const Stmt *Condition,
                                               CheckerContext &Ctx) const {
-  SVal X = Ctx.getState()->getSVal(Condition, Ctx.getLocationContext());
+  SVal X = Ctx.getSVal(Condition);
   if (X.isUndef()) {
     // Generate a sink node, which implicitly marks both outgoing branches as
     // infeasible.
diff --git a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
index 17fe861..6a93c10 100644
--- a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
@@ -55,9 +55,7 @@
     return;
 
   ProgramStateRef state = C.getState();
-  const BlockDataRegion *R =
-    cast<BlockDataRegion>(state->getSVal(BE,
-                                         C.getLocationContext()).getAsRegion());
+  auto *R = cast<BlockDataRegion>(C.getSVal(BE).getAsRegion());
 
   BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(),
                                             E = R->referenced_vars_end();
diff --git a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
index 21e8b10..b9a93be 100644
--- a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
@@ -37,12 +37,11 @@
 
 static bool isArrayIndexOutOfBounds(CheckerContext &C, const Expr *Ex) {
   ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
 
   if (!isa<ArraySubscriptExpr>(Ex))
     return false;
 
-  SVal Loc = state->getSVal(Ex, LCtx);
+  SVal Loc = C.getSVal(Ex);
   if (!Loc.isValid())
     return false;
 
@@ -64,11 +63,18 @@
       B->getRHS(), C.getASTContext().getIntWidth(B->getLHS()->getType()));
 }
 
+static bool isLeftShiftResultUnrepresentable(const BinaryOperator *B,
+                                             CheckerContext &C) {
+  SValBuilder &SB = C.getSValBuilder();
+  ProgramStateRef State = C.getState();
+  const llvm::APSInt *LHS = SB.getKnownValue(State, C.getSVal(B->getLHS()));
+  const llvm::APSInt *RHS = SB.getKnownValue(State, C.getSVal(B->getRHS()));
+  return (unsigned)RHS->getZExtValue() > LHS->countLeadingZeros();
+}
+
 void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
                                        CheckerContext &C) const {
-  ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
-  if (state->getSVal(B, LCtx).isUndef()) {
+  if (C.getSVal(B).isUndef()) {
 
     // Do not report assignments of uninitialized values inside swap functions.
     // This should allow to swap partially uninitialized structs
@@ -92,11 +98,11 @@
     const Expr *Ex = nullptr;
     bool isLeft = true;
 
-    if (state->getSVal(B->getLHS(), LCtx).isUndef()) {
+    if (C.getSVal(B->getLHS()).isUndef()) {
       Ex = B->getLHS()->IgnoreParenCasts();
       isLeft = true;
     }
-    else if (state->getSVal(B->getRHS(), LCtx).isUndef()) {
+    else if (C.getSVal(B->getRHS()).isUndef()) {
       Ex = B->getRHS()->IgnoreParenCasts();
       isLeft = false;
     }
@@ -137,6 +143,23 @@
 
         OS << " greater or equal to the width of type '"
            << B->getLHS()->getType().getAsString() << "'.";
+      } else if (B->getOpcode() == BinaryOperatorKind::BO_Shl &&
+                 C.isNegative(B->getLHS())) {
+        OS << "The result of the left shift is undefined because the left "
+              "operand is negative";
+      } else if (B->getOpcode() == BinaryOperatorKind::BO_Shl &&
+                 isLeftShiftResultUnrepresentable(B, C)) {
+        ProgramStateRef State = C.getState();
+        SValBuilder &SB = C.getSValBuilder();
+        const llvm::APSInt *LHS =
+            SB.getKnownValue(State, C.getSVal(B->getLHS()));
+        const llvm::APSInt *RHS =
+            SB.getKnownValue(State, C.getSVal(B->getRHS()));
+        OS << "The result of the left shift is undefined due to shifting \'"
+           << LHS->getSExtValue() << "\' by \'" << RHS->getZExtValue()
+           << "\', which is unrepresentable in the unsigned version of "
+           << "the return type \'" << B->getLHS()->getType().getAsString()
+           << "\'";
       } else {
         OS << "The result of the '"
            << BinaryOperator::getOpcodeStr(B->getOpcode())
diff --git a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
index 7a31efc..2ef6855 100644
--- a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
@@ -51,20 +51,30 @@
   if (!N)
     return;
 
-  const char *str = "Assigned value is garbage or undefined";
-
+  static const char *const DefaultMsg =
+      "Assigned value is garbage or undefined";
   if (!BT)
-    BT.reset(new BuiltinBug(this, str));
+    BT.reset(new BuiltinBug(this, DefaultMsg));
 
   // Generate a report for this bug.
+  llvm::SmallString<128> Str;
+  llvm::raw_svector_ostream OS(Str);
+
   const Expr *ex = nullptr;
 
   while (StoreE) {
+    if (const UnaryOperator *U = dyn_cast<UnaryOperator>(StoreE)) {
+      OS << "The expression is an uninitialized value. "
+            "The computed value will also be garbage";
+
+      ex = U->getSubExpr();
+      break;
+    }
+
     if (const BinaryOperator *B = dyn_cast<BinaryOperator>(StoreE)) {
       if (B->isCompoundAssignmentOp()) {
-        ProgramStateRef state = C.getState();
-        if (state->getSVal(B->getLHS(), C.getLocationContext()).isUndef()) {
-          str = "The left expression of the compound assignment is an "
+        if (C.getSVal(B->getLHS()).isUndef()) {
+          OS << "The left expression of the compound assignment is an "
                 "uninitialized value. The computed value will also be garbage";
           ex = B->getLHS();
           break;
@@ -80,10 +90,26 @@
       ex = VD->getInit();
     }
 
+    if (const auto *CD =
+            dyn_cast<CXXConstructorDecl>(C.getStackFrame()->getDecl())) {
+      if (CD->isImplicit()) {
+        for (auto I : CD->inits()) {
+          if (I->getInit()->IgnoreImpCasts() == StoreE) {
+            OS << "Value assigned to field '" << I->getMember()->getName()
+               << "' in implicit constructor is garbage or undefined";
+            break;
+          }
+        }
+      }
+    }
+
     break;
   }
 
-  auto R = llvm::make_unique<BugReport>(*BT, str, N);
+  if (OS.str().empty())
+    OS << DefaultMsg;
+
+  auto R = llvm::make_unique<BugReport>(*BT, OS.str(), N);
   if (ex) {
     R->addRange(ex->getSourceRange());
     bugreporter::trackNullOrUndefValue(N, ex, *R);
diff --git a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index 7f9a00f..a6b50dc 100644
--- a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -194,7 +194,7 @@
 
   // Now check if oflags has O_CREAT set.
   const Expr *oflagsEx = CE->getArg(FlagsArgIndex);
-  const SVal V = state->getSVal(oflagsEx, C.getLocationContext());
+  const SVal V = C.getSVal(oflagsEx);
   if (!V.getAs<NonLoc>()) {
     // The case where 'V' can be a location can only be due to a bad header,
     // so in this case bail out.
@@ -248,8 +248,7 @@
   // Check if the first argument is stack allocated.  If so, issue a warning
   // because that's likely to be bad news.
   ProgramStateRef state = C.getState();
-  const MemRegion *R =
-    state->getSVal(CE->getArg(0), C.getLocationContext()).getAsRegion();
+  const MemRegion *R = C.getSVal(CE->getArg(0)).getAsRegion();
   if (!R || !isa<StackSpaceRegion>(R->getMemorySpace()))
     return;
 
@@ -336,7 +335,7 @@
   ProgramStateRef state = C.getState();
   ProgramStateRef trueState = nullptr, falseState = nullptr;
   const Expr *arg = CE->getArg(sizeArg);
-  SVal argVal = state->getSVal(arg, C.getLocationContext());
+  SVal argVal = C.getSVal(arg);
 
   if (argVal.isUnknownOrUndef())
     return;
@@ -364,7 +363,7 @@
   unsigned int i;
   for (i = 0; i < nArgs; i++) {
     const Expr *arg = CE->getArg(i);
-    SVal argVal = state->getSVal(arg, C.getLocationContext());
+    SVal argVal = C.getSVal(arg);
     if (argVal.isUnknownOrUndef()) {
       if (i == 0)
         continue;
diff --git a/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
index 6f21e86..dbd12cc 100644
--- a/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
@@ -132,7 +132,8 @@
            ci != ce; ++ci) {
         if (Optional<CFGStmt> S = (*ci).getAs<CFGStmt>())
           if (const CallExpr *CE = dyn_cast<CallExpr>(S->getStmt())) {
-            if (CE->getBuiltinCallee() == Builtin::BI__builtin_unreachable) {
+            if (CE->getBuiltinCallee() == Builtin::BI__builtin_unreachable ||
+                CE->isBuiltinAssumeFalse(Eng.getContext())) {
               foundUnreachable = true;
               break;
             }
diff --git a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index 40217bd..0497ee7 100644
--- a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -94,7 +94,7 @@
   // FIXME: Handle multi-dimensional VLAs.
   const Expr *SE = VLA->getSizeExpr();
   ProgramStateRef state = C.getState();
-  SVal sizeV = state->getSVal(SE, C.getLocationContext());
+  SVal sizeV = C.getSVal(SE);
 
   if (sizeV.isUndef()) {
     reportBug(VLA_Garbage, SE, state, C);
diff --git a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
index 06c4ef7..44147e5 100644
--- a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
@@ -64,7 +64,7 @@
                                  CheckerContext &C) const;
   void reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1,
                            StringRef Msg2, CheckerContext &C, ExplodedNode *N,
-                           bool ForceReport = false) const;
+                           bool ReportUninit = false) const;
 
   void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
                             bool IsCopy) const;
@@ -189,7 +189,7 @@
                                  CheckerContext &C) const {
   ProgramStateRef State = C.getState();
   const Expr *VASubExpr = VAA->getSubExpr();
-  SVal VAListSVal = State->getSVal(VASubExpr, C.getLocationContext());
+  SVal VAListSVal = C.getSVal(VASubExpr);
   bool Symbolic;
   const MemRegion *VAList =
       getVAListAsRegion(VAListSVal, VASubExpr, Symbolic, C);
@@ -267,15 +267,19 @@
 void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists,
                                         StringRef Msg1, StringRef Msg2,
                                         CheckerContext &C, ExplodedNode *N,
-                                        bool ForceReport) const {
+                                        bool ReportUninit) const {
   if (!(ChecksEnabled[CK_Unterminated] ||
-        (ChecksEnabled[CK_Uninitialized] && ForceReport)))
+        (ChecksEnabled[CK_Uninitialized] && ReportUninit)))
     return;
   for (auto Reg : LeakedVALists) {
     if (!BT_leakedvalist) {
-      BT_leakedvalist.reset(new BugType(CheckNames[CK_Unterminated],
-                                        "Leaked va_list",
-                                        categories::MemoryError));
+      // FIXME: maybe creating a new check name for this type of bug is a better
+      // solution.
+      BT_leakedvalist.reset(
+          new BugType(CheckNames[CK_Unterminated].getName().empty()
+                          ? CheckNames[CK_Uninitialized]
+                          : CheckNames[CK_Unterminated],
+                      "Leaked va_list", categories::MemoryError));
       BT_leakedvalist->setSuppressOnSink(true);
     }
 
@@ -375,7 +379,7 @@
 
 std::shared_ptr<PathDiagnosticPiece> ValistChecker::ValistBugVisitor::VisitNode(
     const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
-    BugReport &BR) {
+    BugReport &) {
   ProgramStateRef State = N->getState();
   ProgramStateRef StatePrev = PrevN->getState();
 
diff --git a/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/lib/StaticAnalyzer/Core/AnalysisManager.cpp
index b7e2c27..53199f7 100644
--- a/lib/StaticAnalyzer/Core/AnalysisManager.cpp
+++ b/lib/StaticAnalyzer/Core/AnalysisManager.cpp
@@ -14,33 +14,26 @@
 
 void AnalysisManager::anchor() { }
 
-AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags,
-                                 const LangOptions &lang,
-                                 const PathDiagnosticConsumers &PDC,
-                                 StoreManagerCreator storemgr,
-                                 ConstraintManagerCreator constraintmgr,
-                                 CheckerManager *checkerMgr,
-                                 AnalyzerOptions &Options,
-                                 CodeInjector *injector)
-  : AnaCtxMgr(Options.UnoptimizedCFG,
-              Options.includeImplicitDtorsInCFG(),
-              /*AddInitializers=*/true,
-              Options.includeTemporaryDtorsInCFG(),
-	            Options.includeLifetimeInCFG(),
-              // Adding LoopExit elements to the CFG is a requirement for loop
-              // unrolling.
-              Options.includeLoopExitInCFG() || Options.shouldUnrollLoops(),
-              Options.shouldSynthesizeBodies(),
-              Options.shouldConditionalizeStaticInitializers(),
-              /*addCXXNewAllocator=*/true,
-              injector),
-    Ctx(ctx),
-    Diags(diags),
-    LangOpts(lang),
-    PathConsumers(PDC),
-    CreateStoreMgr(storemgr), CreateConstraintMgr(constraintmgr),
-    CheckerMgr(checkerMgr),
-    options(Options) {
+AnalysisManager::AnalysisManager(
+    ASTContext &ASTCtx, DiagnosticsEngine &diags, const LangOptions &lang,
+    const PathDiagnosticConsumers &PDC, StoreManagerCreator storemgr,
+    ConstraintManagerCreator constraintmgr, CheckerManager *checkerMgr,
+    AnalyzerOptions &Options, CodeInjector *injector)
+    : AnaCtxMgr(ASTCtx, Options.UnoptimizedCFG,
+                Options.includeImplicitDtorsInCFG(),
+                /*AddInitializers=*/true, Options.includeTemporaryDtorsInCFG(),
+                Options.includeLifetimeInCFG(),
+                // Adding LoopExit elements to the CFG is a requirement for loop
+                // unrolling.
+                Options.includeLoopExitInCFG() || Options.shouldUnrollLoops(),
+                Options.shouldSynthesizeBodies(),
+                Options.shouldConditionalizeStaticInitializers(),
+                /*addCXXNewAllocator=*/true,
+                Options.includeRichConstructorsInCFG(),
+                injector),
+      Ctx(ASTCtx), Diags(diags), LangOpts(lang), PathConsumers(PDC),
+      CreateStoreMgr(storemgr), CreateConstraintMgr(constraintmgr),
+      CheckerMgr(checkerMgr), options(Options) {
   AnaCtxMgr.getCFGBuildOptions().setAllAlwaysAdd();
 }
 
diff --git a/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
index 48e3e22..b831c69 100644
--- a/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
+++ b/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
@@ -55,6 +55,30 @@
   return UserMode;
 }
 
+AnalyzerOptions::ExplorationStrategyKind
+AnalyzerOptions::getExplorationStrategy() {
+  if (ExplorationStrategy == ExplorationStrategyKind::NotSet) {
+    StringRef StratStr =
+        Config
+            .insert(std::make_pair("exploration_strategy", "unexplored_first_queue"))
+            .first->second;
+    ExplorationStrategy =
+        llvm::StringSwitch<ExplorationStrategyKind>(StratStr)
+            .Case("dfs", ExplorationStrategyKind::DFS)
+            .Case("bfs", ExplorationStrategyKind::BFS)
+            .Case("unexplored_first",
+                  ExplorationStrategyKind::UnexploredFirst)
+            .Case("unexplored_first_queue",
+                  ExplorationStrategyKind::UnexploredFirstQueue)
+            .Case("bfs_block_dfs_contents",
+                  ExplorationStrategyKind::BFSBlockDFSContents)
+            .Default(ExplorationStrategyKind::NotSet);
+    assert(ExplorationStrategy != ExplorationStrategyKind::NotSet &&
+           "User mode is invalid.");
+  }
+  return ExplorationStrategy;
+}
+
 IPAKind AnalyzerOptions::getIPAMode() {
   if (IPAMode == IPAK_NotSet) {
 
@@ -169,7 +193,7 @@
 bool AnalyzerOptions::includeTemporaryDtorsInCFG() {
   return getBooleanOption(IncludeTemporaryDtorsInCFG,
                           "cfg-temporary-dtors",
-                          /* Default = */ false);
+                          /* Default = */ true);
 }
 
 bool AnalyzerOptions::includeImplicitDtorsInCFG() {
@@ -185,7 +209,13 @@
 
 bool AnalyzerOptions::includeLoopExitInCFG() {
   return getBooleanOption(IncludeLoopExitInCFG, "cfg-loopexit",
-          /* Default = */ false);
+                          /* Default = */ false);
+}
+
+bool AnalyzerOptions::includeRichConstructorsInCFG() {
+  return getBooleanOption(IncludeRichConstructorsInCFG,
+                          "cfg-rich-constructors",
+                          /* Default = */ true);
 }
 
 bool AnalyzerOptions::mayInlineCXXStandardLibrary() {
@@ -203,7 +233,7 @@
 bool AnalyzerOptions::mayInlineCXXAllocator() {
   return getBooleanOption(InlineCXXAllocator,
                           "c++-allocator-inlining",
-                          /*Default=*/false);
+                          /*Default=*/true);
 }
 
 bool AnalyzerOptions::mayInlineCXXContainerMethods() {
@@ -218,6 +248,11 @@
                           /*Default=*/false);
 }
 
+bool AnalyzerOptions::mayInlineCXXTemporaryDtors() {
+  return getBooleanOption(InlineCXXTemporaryDtors,
+                          "c++-temp-dtor-inlining",
+                          /*Default=*/false);
+}
 
 bool AnalyzerOptions::mayInlineObjCMethod() {
   return getBooleanOption(ObjCInliningMode,
@@ -262,6 +297,12 @@
                           /* Default = */ false);
 }
 
+bool AnalyzerOptions::shouldSerializeStats() {
+  return getBooleanOption(SerializeStats,
+                          "serialize-stats",
+                          /* Default = */ false);
+}
+
 int AnalyzerOptions::getOptionAsInteger(StringRef Name, int DefaultVal,
                                         const CheckerBase *C,
                                         bool SearchInParents) {
@@ -392,3 +433,26 @@
         getBooleanOption("notes-as-events", /*Default=*/false);
   return DisplayNotesAsEvents.getValue();
 }
+
+StringRef AnalyzerOptions::getCTUDir() {
+  if (!CTUDir.hasValue()) {
+    CTUDir = getOptionAsString("ctu-dir", "");
+    if (!llvm::sys::fs::is_directory(*CTUDir))
+      CTUDir = "";
+  }
+  return CTUDir.getValue();
+}
+
+bool AnalyzerOptions::naiveCTUEnabled() {
+  if (!NaiveCTU.hasValue()) {
+    NaiveCTU = getBooleanOption("experimental-enable-naive-ctu-analysis",
+                                /*Default=*/false);
+  }
+  return NaiveCTU.getValue();
+}
+
+StringRef AnalyzerOptions::getCTUIndexName() {
+  if (!CTUIndexName.hasValue())
+    CTUIndexName = getOptionAsString("ctu-index-name", "externalFnMap.txt");
+  return CTUIndexName.getValue();
+}
diff --git a/lib/StaticAnalyzer/Core/BasicValueFactory.cpp b/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
index ebbace4..db4c143 100644
--- a/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
+++ b/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
@@ -1,4 +1,4 @@
-//=== BasicValueFactory.cpp - Basic values for Path Sens analysis --*- C++ -*-//
+//===- BasicValueFactory.cpp - Basic values for Path Sens analysis --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,9 +13,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ImmutableList.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -40,10 +49,11 @@
   ID.AddPointer(L.getInternalPointer());
 }
 
-typedef std::pair<SVal, uintptr_t> SValData;
-typedef std::pair<SVal, SVal> SValPair;
+using SValData = std::pair<SVal, uintptr_t>;
+using SValPair = std::pair<SVal, SVal>;
 
 namespace llvm {
+
 template<> struct FoldingSetTrait<SValData> {
   static inline void Profile(const SValData& X, llvm::FoldingSetNodeID& ID) {
     X.first.Profile(ID);
@@ -57,20 +67,21 @@
     X.second.Profile(ID);
   }
 };
-}
 
-typedef llvm::FoldingSet<llvm::FoldingSetNodeWrapper<SValData> >
-  PersistentSValsTy;
+} // namespace llvm
 
-typedef llvm::FoldingSet<llvm::FoldingSetNodeWrapper<SValPair> >
-  PersistentSValPairsTy;
+using PersistentSValsTy =
+    llvm::FoldingSet<llvm::FoldingSetNodeWrapper<SValData>>;
+
+using PersistentSValPairsTy =
+    llvm::FoldingSet<llvm::FoldingSetNodeWrapper<SValPair>>;
 
 BasicValueFactory::~BasicValueFactory() {
   // Note that the dstor for the contents of APSIntSet will never be called,
   // so we iterate over the set and invoke the dstor for each APSInt.  This
   // frees an aux. memory allocated to represent very large constants.
-  for (APSIntSetTy::iterator I=APSIntSet.begin(), E=APSIntSet.end(); I!=E; ++I)
-    I->getValue().~APSInt();
+  for (const auto &I : APSIntSet)
+    I.getValue().~APSInt();
 
   delete (PersistentSValsTy*) PersistentSVals;
   delete (PersistentSValPairsTy*) PersistentSValPairs;
@@ -79,7 +90,8 @@
 const llvm::APSInt& BasicValueFactory::getValue(const llvm::APSInt& X) {
   llvm::FoldingSetNodeID ID;
   void *InsertPos;
-  typedef llvm::FoldingSetNodeWrapper<llvm::APSInt> FoldNodeTy;
+
+  using FoldNodeTy = llvm::FoldingSetNodeWrapper<llvm::APSInt>;
 
   X.Profile(ID);
   FoldNodeTy* P = APSIntSet.FindNodeOrInsertPos(ID, InsertPos);
@@ -107,14 +119,12 @@
 }
 
 const llvm::APSInt& BasicValueFactory::getValue(uint64_t X, QualType T) {
-
   return getValue(getAPSIntType(T).getValue(X));
 }
 
 const CompoundValData*
 BasicValueFactory::getCompoundValData(QualType T,
                                       llvm::ImmutableList<SVal> Vals) {
-
   llvm::FoldingSetNodeID ID;
   CompoundValData::Profile(ID, T, Vals);
   void *InsertPos;
@@ -150,7 +160,7 @@
 }
 
 const PointerToMemberData *BasicValueFactory::getPointerToMemberData(
-    const DeclaratorDecl *DD, llvm::ImmutableList<const CXXBaseSpecifier*> L) {
+    const DeclaratorDecl *DD, llvm::ImmutableList<const CXXBaseSpecifier *> L) {
   llvm::FoldingSetNodeID ID;
   PointerToMemberData::Profile(ID, DD, L);
   void *InsertPos;
@@ -167,7 +177,7 @@
   return D;
 }
 
-const clang::ento::PointerToMemberData *BasicValueFactory::accumCXXBase(
+const PointerToMemberData *BasicValueFactory::accumCXXBase(
     llvm::iterator_range<CastExpr::path_const_iterator> PathRange,
     const nonloc::PointerToMember &PTM) {
   nonloc::PointerToMember::PTMDataType PTMDT = PTM.getPTMData();
@@ -195,10 +205,9 @@
 const llvm::APSInt*
 BasicValueFactory::evalAPSInt(BinaryOperator::Opcode Op,
                              const llvm::APSInt& V1, const llvm::APSInt& V2) {
-
   switch (Op) {
     default:
-      assert (false && "Invalid Opcode.");
+      assert(false && "Invalid Opcode.");
 
     case BO_Mul:
       return &getValue( V1 * V2 );
@@ -220,11 +229,11 @@
       return &getValue( V1 - V2 );
 
     case BO_Shl: {
-
       // FIXME: This logic should probably go higher up, where we can
       // test these conditions symbolically.
 
-      // FIXME: Expand these checks to include all undefined behavior.
+      if (V1.isSigned() && V1.isNegative())
+        return nullptr;
 
       if (V2.isSigned() && V2.isNegative())
         return nullptr;
@@ -234,16 +243,16 @@
       if (Amt >= V1.getBitWidth())
         return nullptr;
 
+      if (V1.isSigned() && Amt > V1.countLeadingZeros())
+          return nullptr;
+
       return &getValue( V1.operator<<( (unsigned) Amt ));
     }
 
     case BO_Shr: {
-
       // FIXME: This logic should probably go higher up, where we can
       // test these conditions symbolically.
 
-      // FIXME: Expand these checks to include all undefined behavior.
-
       if (V2.isSigned() && V2.isNegative())
         return nullptr;
 
@@ -286,10 +295,8 @@
   }
 }
 
-
 const std::pair<SVal, uintptr_t>&
 BasicValueFactory::getPersistentSValWithData(const SVal& V, uintptr_t Data) {
-
   // Lazily create the folding set.
   if (!PersistentSVals) PersistentSVals = new PersistentSValsTy();
 
@@ -300,7 +307,8 @@
 
   PersistentSValsTy& Map = *((PersistentSValsTy*) PersistentSVals);
 
-  typedef llvm::FoldingSetNodeWrapper<SValData> FoldNodeTy;
+  using FoldNodeTy = llvm::FoldingSetNodeWrapper<SValData>;
+
   FoldNodeTy* P = Map.FindNodeOrInsertPos(ID, InsertPos);
 
   if (!P) {
@@ -314,7 +322,6 @@
 
 const std::pair<SVal, SVal>&
 BasicValueFactory::getPersistentSValPair(const SVal& V1, const SVal& V2) {
-
   // Lazily create the folding set.
   if (!PersistentSValPairs) PersistentSValPairs = new PersistentSValPairsTy();
 
@@ -325,7 +332,8 @@
 
   PersistentSValPairsTy& Map = *((PersistentSValPairsTy*) PersistentSValPairs);
 
-  typedef llvm::FoldingSetNodeWrapper<SValPair> FoldNodeTy;
+  using FoldNodeTy = llvm::FoldingSetNodeWrapper<SValPair>;
+
   FoldNodeTy* P = Map.FindNodeOrInsertPos(ID, InsertPos);
 
   if (!P) {
diff --git a/lib/StaticAnalyzer/Core/BugReporter.cpp b/lib/StaticAnalyzer/Core/BugReporter.cpp
index 4a5d25f..5d32ef3 100644
--- a/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -229,7 +229,6 @@
     PathDiagnosticCallPiece *Call = dyn_cast<PathDiagnosticCallPiece>(I->get());
 
     if (!Call) {
-      assert((*I)->getLocation().asLocation().isValid());
       continue;
     }
 
@@ -557,16 +556,322 @@
 
 static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM);
 
+
+/// Add path diagnostic for statement associated with node \p N
+/// to diagnostics \p PD.
+/// Precondition: location associated with \p N is a \c BlockEdge.
+static void generateMinimalDiagnosticsForBlockEdge(const ExplodedNode *N,
+                                       PathDiagnosticBuilder &PDB,
+                                       PathDiagnostic &PD) {
+
+  const LocationContext *LC = N->getLocationContext();
+  SourceManager& SMgr = PDB.getSourceManager();
+  BlockEdge BE = N->getLocation().castAs<BlockEdge>();
+  const CFGBlock *Src = BE.getSrc();
+  const CFGBlock *Dst = BE.getDst();
+  const Stmt *T = Src->getTerminator();
+  if (!T)
+    return;
+
+  auto Start = PathDiagnosticLocation::createBegin(T, SMgr, LC);
+  switch (T->getStmtClass()) {
+  default:
+    break;
+
+  case Stmt::GotoStmtClass:
+  case Stmt::IndirectGotoStmtClass: {
+    const Stmt *S = PathDiagnosticLocation::getNextStmt(N);
+
+    if (!S)
+      break;
+
+    std::string sbuf;
+    llvm::raw_string_ostream os(sbuf);
+    const PathDiagnosticLocation &End = PDB.getEnclosingStmtLocation(S);
+
+    os << "Control jumps to line " << End.asLocation().getExpansionLineNumber();
+    PD.getActivePath().push_front(
+        std::make_shared<PathDiagnosticControlFlowPiece>(Start, End, os.str()));
+    break;
+  }
+
+  case Stmt::SwitchStmtClass: {
+    // Figure out what case arm we took.
+    std::string sbuf;
+    llvm::raw_string_ostream os(sbuf);
+
+    if (const Stmt *S = Dst->getLabel()) {
+      PathDiagnosticLocation End(S, SMgr, LC);
+
+      switch (S->getStmtClass()) {
+      default:
+        os << "No cases match in the switch statement. "
+              "Control jumps to line "
+           << End.asLocation().getExpansionLineNumber();
+        break;
+      case Stmt::DefaultStmtClass:
+        os << "Control jumps to the 'default' case at line "
+           << End.asLocation().getExpansionLineNumber();
+        break;
+
+      case Stmt::CaseStmtClass: {
+        os << "Control jumps to 'case ";
+        const CaseStmt *Case = cast<CaseStmt>(S);
+        const Expr *LHS = Case->getLHS()->IgnoreParenCasts();
+
+        // Determine if it is an enum.
+        bool GetRawInt = true;
+
+        if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(LHS)) {
+          // FIXME: Maybe this should be an assertion.  Are there cases
+          // were it is not an EnumConstantDecl?
+          const EnumConstantDecl *D = dyn_cast<EnumConstantDecl>(DR->getDecl());
+
+          if (D) {
+            GetRawInt = false;
+            os << *D;
+          }
+        }
+
+        if (GetRawInt)
+          os << LHS->EvaluateKnownConstInt(PDB.getASTContext());
+
+        os << ":'  at line " << End.asLocation().getExpansionLineNumber();
+        break;
+      }
+      }
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                           os.str()));
+    } else {
+      os << "'Default' branch taken. ";
+      const PathDiagnosticLocation &End = PDB.ExecutionContinues(os, N);
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                           os.str()));
+    }
+
+    break;
+  }
+
+  case Stmt::BreakStmtClass:
+  case Stmt::ContinueStmtClass: {
+    std::string sbuf;
+    llvm::raw_string_ostream os(sbuf);
+    PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
+    PD.getActivePath().push_front(
+        std::make_shared<PathDiagnosticControlFlowPiece>(Start, End, os.str()));
+    break;
+  }
+
+  // Determine control-flow for ternary '?'.
+  case Stmt::BinaryConditionalOperatorClass:
+  case Stmt::ConditionalOperatorClass: {
+    std::string sbuf;
+    llvm::raw_string_ostream os(sbuf);
+    os << "'?' condition is ";
+
+    if (*(Src->succ_begin() + 1) == Dst)
+      os << "false";
+    else
+      os << "true";
+
+    PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+
+    if (const Stmt *S = End.asStmt())
+      End = PDB.getEnclosingStmtLocation(S);
+
+    PD.getActivePath().push_front(
+        std::make_shared<PathDiagnosticControlFlowPiece>(Start, End, os.str()));
+    break;
+  }
+
+  // Determine control-flow for short-circuited '&&' and '||'.
+  case Stmt::BinaryOperatorClass: {
+    if (!PDB.supportsLogicalOpControlFlow())
+      break;
+
+    const BinaryOperator *B = cast<BinaryOperator>(T);
+    std::string sbuf;
+    llvm::raw_string_ostream os(sbuf);
+    os << "Left side of '";
+
+    if (B->getOpcode() == BO_LAnd) {
+      os << "&&"
+         << "' is ";
+
+      if (*(Src->succ_begin() + 1) == Dst) {
+        os << "false";
+        PathDiagnosticLocation End(B->getLHS(), SMgr, LC);
+        PathDiagnosticLocation Start =
+            PathDiagnosticLocation::createOperatorLoc(B, SMgr);
+        PD.getActivePath().push_front(
+            std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                             os.str()));
+      } else {
+        os << "true";
+        PathDiagnosticLocation Start(B->getLHS(), SMgr, LC);
+        PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+        PD.getActivePath().push_front(
+            std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                             os.str()));
+      }
+    } else {
+      assert(B->getOpcode() == BO_LOr);
+      os << "||"
+         << "' is ";
+
+      if (*(Src->succ_begin() + 1) == Dst) {
+        os << "false";
+        PathDiagnosticLocation Start(B->getLHS(), SMgr, LC);
+        PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+        PD.getActivePath().push_front(
+            std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                             os.str()));
+      } else {
+        os << "true";
+        PathDiagnosticLocation End(B->getLHS(), SMgr, LC);
+        PathDiagnosticLocation Start =
+            PathDiagnosticLocation::createOperatorLoc(B, SMgr);
+        PD.getActivePath().push_front(
+            std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                             os.str()));
+      }
+    }
+
+    break;
+  }
+
+  case Stmt::DoStmtClass: {
+    if (*(Src->succ_begin()) == Dst) {
+      std::string sbuf;
+      llvm::raw_string_ostream os(sbuf);
+
+      os << "Loop condition is true. ";
+      PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
+
+      if (const Stmt *S = End.asStmt())
+        End = PDB.getEnclosingStmtLocation(S);
+
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                           os.str()));
+    } else {
+      PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+
+      if (const Stmt *S = End.asStmt())
+        End = PDB.getEnclosingStmtLocation(S);
+
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(
+              Start, End, "Loop condition is false.  Exiting loop"));
+    }
+
+    break;
+  }
+
+  case Stmt::WhileStmtClass:
+  case Stmt::ForStmtClass: {
+    if (*(Src->succ_begin() + 1) == Dst) {
+      std::string sbuf;
+      llvm::raw_string_ostream os(sbuf);
+
+      os << "Loop condition is false. ";
+      PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
+      if (const Stmt *S = End.asStmt())
+        End = PDB.getEnclosingStmtLocation(S);
+
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
+                                                           os.str()));
+    } else {
+      PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+      if (const Stmt *S = End.asStmt())
+        End = PDB.getEnclosingStmtLocation(S);
+
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(
+              Start, End, "Loop condition is true.  Entering loop body"));
+    }
+
+    break;
+  }
+
+  case Stmt::IfStmtClass: {
+    PathDiagnosticLocation End = PDB.ExecutionContinues(N);
+
+    if (const Stmt *S = End.asStmt())
+      End = PDB.getEnclosingStmtLocation(S);
+
+    if (*(Src->succ_begin() + 1) == Dst)
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(
+              Start, End, "Taking false branch"));
+    else
+      PD.getActivePath().push_front(
+          std::make_shared<PathDiagnosticControlFlowPiece>(
+              Start, End, "Taking true branch"));
+
+    break;
+  }
+  }
+}
+
+/// Generate minimal diagnostics for node \p N, and write it into path
+/// diagnostics \p PD.
+void generateMinimalDiagnosticsForNode(const ExplodedNode *N,
+    PathDiagnosticBuilder &PDB,
+    PathDiagnostic &PD,
+    LocationContextMap &LCM,
+    StackDiagVector &CallStack) {
+
+  SourceManager &SMgr = PDB.getSourceManager();
+  ProgramPoint P = N->getLocation();
+
+  if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
+    auto C = PathDiagnosticCallPiece::construct(N, *CE, SMgr);
+    // Record the mapping from call piece to LocationContext.
+    LCM[&C->path] = CE->getCalleeContext();
+    auto *P = C.get();
+    PD.getActivePath().push_front(std::move(C));
+    PD.pushActivePath(&P->path);
+    CallStack.push_back(StackDiagPair(P, N));
+  } else if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
+    // Flush all locations, and pop the active path.
+    bool VisitedEntireCall = PD.isWithinCall();
+    PD.popActivePath();
+
+    // Either we just added a bunch of stuff to the top-level path, or
+    // we have a previous CallExitEnd.  If the former, it means that the
+    // path terminated within a function call.  We must then take the
+    // current contents of the active path and place it within
+    // a new PathDiagnosticCallPiece.
+    PathDiagnosticCallPiece *C;
+    if (VisitedEntireCall) {
+      C = cast<PathDiagnosticCallPiece>(PD.getActivePath().front().get());
+    } else {
+      const Decl *Caller = CE->getLocationContext()->getDecl();
+      C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
+      // Record the mapping from call piece to LocationContext.
+      LCM[&C->path] = CE->getCalleeContext();
+    }
+
+    C->setCallee(*CE, SMgr);
+    if (!CallStack.empty()) {
+      assert(CallStack.back().first == C);
+      CallStack.pop_back();
+    }
+  } else if (P.getKind() == ProgramPoint::BlockEdgeKind) {
+    generateMinimalDiagnosticsForBlockEdge(N, PDB, PD);
+  }
+}
+
 static bool GenerateMinimalPathDiagnostic(
     PathDiagnostic &PD, PathDiagnosticBuilder &PDB, const ExplodedNode *N,
     LocationContextMap &LCM,
     ArrayRef<std::unique_ptr<BugReporterVisitor>> visitors) {
-
-  SourceManager& SMgr = PDB.getSourceManager();
-  const LocationContext *LC = PDB.LC;
   const ExplodedNode *NextNode = N->pred_empty()
                                         ? nullptr : *(N->pred_begin());
-
   StackDiagVector CallStack;
 
   while (NextNode) {
@@ -574,319 +879,17 @@
     PDB.LC = N->getLocationContext();
     NextNode = N->getFirstPred();
 
-    ProgramPoint P = N->getLocation();
-
-    do {
-      if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
-        auto C = PathDiagnosticCallPiece::construct(N, *CE, SMgr);
-        // Record the mapping from call piece to LocationContext.
-        LCM[&C->path] = CE->getCalleeContext();
-        auto *P = C.get();
-        PD.getActivePath().push_front(std::move(C));
-        PD.pushActivePath(&P->path);
-        CallStack.push_back(StackDiagPair(P, N));
-        break;
-      }
-
-      if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
-        // Flush all locations, and pop the active path.
-        bool VisitedEntireCall = PD.isWithinCall();
-        PD.popActivePath();
-
-        // Either we just added a bunch of stuff to the top-level path, or
-        // we have a previous CallExitEnd.  If the former, it means that the
-        // path terminated within a function call.  We must then take the
-        // current contents of the active path and place it within
-        // a new PathDiagnosticCallPiece.
-        PathDiagnosticCallPiece *C;
-        if (VisitedEntireCall) {
-          C = cast<PathDiagnosticCallPiece>(PD.getActivePath().front().get());
-        } else {
-          const Decl *Caller = CE->getLocationContext()->getDecl();
-          C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
-          // Record the mapping from call piece to LocationContext.
-          LCM[&C->path] = CE->getCalleeContext();
-        }
-
-        C->setCallee(*CE, SMgr);
-        if (!CallStack.empty()) {
-          assert(CallStack.back().first == C);
-          CallStack.pop_back();
-        }
-        break;
-      }
-
-      if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
-        const CFGBlock *Src = BE->getSrc();
-        const CFGBlock *Dst = BE->getDst();
-        const Stmt *T = Src->getTerminator();
-
-        if (!T)
-          break;
-
-        PathDiagnosticLocation Start =
-            PathDiagnosticLocation::createBegin(T, SMgr,
-                N->getLocationContext());
-
-        switch (T->getStmtClass()) {
-        default:
-          break;
-
-        case Stmt::GotoStmtClass:
-        case Stmt::IndirectGotoStmtClass: {
-          const Stmt *S = PathDiagnosticLocation::getNextStmt(N);
-
-          if (!S)
-            break;
-
-          std::string sbuf;
-          llvm::raw_string_ostream os(sbuf);
-          const PathDiagnosticLocation &End = PDB.getEnclosingStmtLocation(S);
-
-          os << "Control jumps to line "
-              << End.asLocation().getExpansionLineNumber();
-          PD.getActivePath().push_front(
-              std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                               os.str()));
-          break;
-        }
-
-        case Stmt::SwitchStmtClass: {
-          // Figure out what case arm we took.
-          std::string sbuf;
-          llvm::raw_string_ostream os(sbuf);
-
-          if (const Stmt *S = Dst->getLabel()) {
-            PathDiagnosticLocation End(S, SMgr, LC);
-
-            switch (S->getStmtClass()) {
-            default:
-              os << "No cases match in the switch statement. "
-              "Control jumps to line "
-              << End.asLocation().getExpansionLineNumber();
-              break;
-            case Stmt::DefaultStmtClass:
-              os << "Control jumps to the 'default' case at line "
-              << End.asLocation().getExpansionLineNumber();
-              break;
-
-            case Stmt::CaseStmtClass: {
-              os << "Control jumps to 'case ";
-              const CaseStmt *Case = cast<CaseStmt>(S);
-              const Expr *LHS = Case->getLHS()->IgnoreParenCasts();
-
-              // Determine if it is an enum.
-              bool GetRawInt = true;
-
-              if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(LHS)) {
-                // FIXME: Maybe this should be an assertion.  Are there cases
-                // were it is not an EnumConstantDecl?
-                const EnumConstantDecl *D =
-                    dyn_cast<EnumConstantDecl>(DR->getDecl());
-
-                if (D) {
-                  GetRawInt = false;
-                  os << *D;
-                }
-              }
-
-              if (GetRawInt)
-                os << LHS->EvaluateKnownConstInt(PDB.getASTContext());
-
-              os << ":'  at line "
-                  << End.asLocation().getExpansionLineNumber();
-              break;
-            }
-            }
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                 os.str()));
-          }
-          else {
-            os << "'Default' branch taken. ";
-            const PathDiagnosticLocation &End = PDB.ExecutionContinues(os, N);
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                 os.str()));
-          }
-
-          break;
-        }
-
-        case Stmt::BreakStmtClass:
-        case Stmt::ContinueStmtClass: {
-          std::string sbuf;
-          llvm::raw_string_ostream os(sbuf);
-          PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
-          PD.getActivePath().push_front(
-              std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                               os.str()));
-          break;
-        }
-
-        // Determine control-flow for ternary '?'.
-        case Stmt::BinaryConditionalOperatorClass:
-        case Stmt::ConditionalOperatorClass: {
-          std::string sbuf;
-          llvm::raw_string_ostream os(sbuf);
-          os << "'?' condition is ";
-
-          if (*(Src->succ_begin()+1) == Dst)
-            os << "false";
-          else
-            os << "true";
-
-          PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-
-          if (const Stmt *S = End.asStmt())
-            End = PDB.getEnclosingStmtLocation(S);
-
-          PD.getActivePath().push_front(
-              std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                               os.str()));
-          break;
-        }
-
-        // Determine control-flow for short-circuited '&&' and '||'.
-        case Stmt::BinaryOperatorClass: {
-          if (!PDB.supportsLogicalOpControlFlow())
-            break;
-
-          const BinaryOperator *B = cast<BinaryOperator>(T);
-          std::string sbuf;
-          llvm::raw_string_ostream os(sbuf);
-          os << "Left side of '";
-
-          if (B->getOpcode() == BO_LAnd) {
-            os << "&&" << "' is ";
-
-            if (*(Src->succ_begin()+1) == Dst) {
-              os << "false";
-              PathDiagnosticLocation End(B->getLHS(), SMgr, LC);
-              PathDiagnosticLocation Start =
-                  PathDiagnosticLocation::createOperatorLoc(B, SMgr);
-              PD.getActivePath().push_front(
-                  std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                   os.str()));
-            }
-            else {
-              os << "true";
-              PathDiagnosticLocation Start(B->getLHS(), SMgr, LC);
-              PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-              PD.getActivePath().push_front(
-                  std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                   os.str()));
-            }
-          }
-          else {
-            assert(B->getOpcode() == BO_LOr);
-            os << "||" << "' is ";
-
-            if (*(Src->succ_begin()+1) == Dst) {
-              os << "false";
-              PathDiagnosticLocation Start(B->getLHS(), SMgr, LC);
-              PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-              PD.getActivePath().push_front(
-                  std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                   os.str()));
-            }
-            else {
-              os << "true";
-              PathDiagnosticLocation End(B->getLHS(), SMgr, LC);
-              PathDiagnosticLocation Start =
-                  PathDiagnosticLocation::createOperatorLoc(B, SMgr);
-              PD.getActivePath().push_front(
-                  std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                   os.str()));
-            }
-          }
-
-          break;
-        }
-
-        case Stmt::DoStmtClass:  {
-          if (*(Src->succ_begin()) == Dst) {
-            std::string sbuf;
-            llvm::raw_string_ostream os(sbuf);
-
-            os << "Loop condition is true. ";
-            PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
-
-            if (const Stmt *S = End.asStmt())
-              End = PDB.getEnclosingStmtLocation(S);
-
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                 os.str()));
-          }
-          else {
-            PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-
-            if (const Stmt *S = End.asStmt())
-              End = PDB.getEnclosingStmtLocation(S);
-
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(
-                    Start, End, "Loop condition is false.  Exiting loop"));
-          }
-
-          break;
-        }
-
-        case Stmt::WhileStmtClass:
-        case Stmt::ForStmtClass: {
-          if (*(Src->succ_begin()+1) == Dst) {
-            std::string sbuf;
-            llvm::raw_string_ostream os(sbuf);
-
-            os << "Loop condition is false. ";
-            PathDiagnosticLocation End = PDB.ExecutionContinues(os, N);
-            if (const Stmt *S = End.asStmt())
-              End = PDB.getEnclosingStmtLocation(S);
-
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(Start, End,
-                                                                 os.str()));
-          }
-          else {
-            PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-            if (const Stmt *S = End.asStmt())
-              End = PDB.getEnclosingStmtLocation(S);
-
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(
-                    Start, End, "Loop condition is true.  Entering loop body"));
-          }
-
-          break;
-        }
-
-        case Stmt::IfStmtClass: {
-          PathDiagnosticLocation End = PDB.ExecutionContinues(N);
-
-          if (const Stmt *S = End.asStmt())
-            End = PDB.getEnclosingStmtLocation(S);
-
-          if (*(Src->succ_begin()+1) == Dst)
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(
-                    Start, End, "Taking false branch"));
-          else
-            PD.getActivePath().push_front(
-                std::make_shared<PathDiagnosticControlFlowPiece>(
-                    Start, End, "Taking true branch"));
-
-          break;
-        }
-        }
-      }
-    } while(0);
+    generateMinimalDiagnosticsForNode(N, PDB, PD, LCM, CallStack);
 
     if (NextNode) {
       // Add diagnostic pieces from custom visitors.
       BugReport *R = PDB.getBugReport();
+      llvm::FoldingSet<PathDiagnosticPiece> DeduplicationSet;
       for (auto &V : visitors) {
         if (auto p = V->VisitNode(N, NextNode, PDB, *R)) {
+          if (DeduplicationSet.GetOrInsertNode(p.get()) != p.get())
+            continue;
+
           updateStackPiecesWithMessage(*p, CallStack);
           PD.getActivePath().push_front(std::move(p));
         }
@@ -1312,6 +1315,8 @@
   }
 }
 
+
+
 //===----------------------------------------------------------------------===//
 // Functions for determining if a loop was executed 0 times.
 //===----------------------------------------------------------------------===//
@@ -1401,6 +1406,169 @@
   return isContainedByStmt(PM, LoopBody, S);
 }
 
+/// Generate extensive diagnostics for statement associated with node \p N,
+/// and write it into \p PD.
+static void generateExtensiveDiagnosticsForNode(
+        const ExplodedNode *N,
+        PathDiagnosticBuilder &PDB,
+        LocationContextMap &LCM,
+        EdgeBuilder &EB,
+        StackDiagVector &CallStack,
+        PathDiagnostic &PD,
+        InterestingExprs &IE) {
+
+  const ExplodedNode *NextNode = N->getFirstPred();
+  ProgramPoint P = N->getLocation();
+  const SourceManager& SM = PDB.getSourceManager();
+
+  if (Optional<PostStmt> PS = P.getAs<PostStmt>()) {
+    if (const Expr *Ex = PS->getStmtAs<Expr>())
+      reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
+          N->getState().get(), Ex,
+          N->getLocationContext());
+    return;
+  } else if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
+    const Stmt *S = CE->getCalleeContext()->getCallSite();
+    if (const Expr *Ex = dyn_cast_or_null<Expr>(S)) {
+      reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
+          N->getState().get(), Ex,
+          N->getLocationContext());
+    }
+
+    auto C = PathDiagnosticCallPiece::construct(N, *CE, SM);
+    LCM[&C->path] = CE->getCalleeContext();
+
+    EB.addEdge(C->callReturn, /*AlwaysAdd=*/true, /*IsPostJump=*/true);
+    EB.flushLocations();
+
+    auto *P = C.get();
+    PD.getActivePath().push_front(std::move(C));
+    PD.pushActivePath(&P->path);
+    CallStack.push_back(StackDiagPair(P, N));
+    return;
+  } else if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
+
+    // Pop the call hierarchy if we are done walking the contents
+    // of a function call.
+
+    // Add an edge to the start of the function.
+    const Decl *D = CE->getCalleeContext()->getDecl();
+    PathDiagnosticLocation pos =
+      PathDiagnosticLocation::createBegin(D, SM);
+    EB.addEdge(pos);
+
+    // Flush all locations, and pop the active path.
+    bool VisitedEntireCall = PD.isWithinCall();
+    EB.flushLocations();
+    PD.popActivePath();
+    PDB.LC = N->getLocationContext();
+
+    // Either we just added a bunch of stuff to the top-level path, or
+    // we have a previous CallExitEnd.  If the former, it means that the
+    // path terminated within a function call.  We must then take the
+    // current contents of the active path and place it within
+    // a new PathDiagnosticCallPiece.
+    PathDiagnosticCallPiece *C;
+    if (VisitedEntireCall) {
+      C = cast<PathDiagnosticCallPiece>(PD.getActivePath().front().get());
+    } else {
+      const Decl *Caller = CE->getLocationContext()->getDecl();
+      C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
+      LCM[&C->path] = CE->getCalleeContext();
+    }
+
+    C->setCallee(*CE, SM);
+    EB.addContext(C->getLocation());
+
+    if (!CallStack.empty()) {
+      assert(CallStack.back().first == C);
+      CallStack.pop_back();
+    }
+    return;
+  }
+
+  // Note that is important that we update the LocationContext
+  // after looking at CallExits.  CallExit basically adds an
+  // edge in the *caller*, so we don't want to update the LocationContext
+  // too soon.
+  PDB.LC = N->getLocationContext();
+
+  if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
+    // Does this represent entering a call?  If so, look at propagating
+    // interesting symbols across call boundaries.
+    if (NextNode) {
+      const LocationContext *CallerCtx = NextNode->getLocationContext();
+      const LocationContext *CalleeCtx = PDB.LC;
+      if (CallerCtx != CalleeCtx) {
+        reversePropagateInterestingSymbols(*PDB.getBugReport(), IE,
+            N->getState().get(),
+            CalleeCtx, CallerCtx);
+      }
+    }
+
+    // Are we jumping to the head of a loop?  Add a special diagnostic.
+    if (const Stmt *Loop = BE->getSrc()->getLoopTarget()) {
+      PathDiagnosticLocation L(Loop, SM, PDB.LC);
+      const CompoundStmt *CS = nullptr;
+
+      if (const ForStmt *FS = dyn_cast<ForStmt>(Loop))
+        CS = dyn_cast<CompoundStmt>(FS->getBody());
+      else if (const WhileStmt *WS = dyn_cast<WhileStmt>(Loop))
+        CS = dyn_cast<CompoundStmt>(WS->getBody());
+
+      auto p = std::make_shared<PathDiagnosticEventPiece>(
+          L, "Looping back to the head of the loop");
+      p->setPrunable(true);
+
+      EB.addEdge(p->getLocation(), true);
+      PD.getActivePath().push_front(std::move(p));
+
+      if (CS) {
+        PathDiagnosticLocation BL =
+          PathDiagnosticLocation::createEndBrace(CS, SM);
+        EB.addEdge(BL);
+      }
+    }
+
+    const CFGBlock *BSrc = BE->getSrc();
+    ParentMap &PM = PDB.getParentMap();
+
+    if (const Stmt *Term = BSrc->getTerminator()) {
+      // Are we jumping past the loop body without ever executing the
+      // loop (because the condition was false)?
+      if (isLoopJumpPastBody(Term, &*BE) &&
+          !isInLoopBody(PM,
+            getStmtBeforeCond(PM,
+              BSrc->getTerminatorCondition(),
+              N),
+            Term)) {
+        PathDiagnosticLocation L(Term, SM, PDB.LC);
+        auto PE = std::make_shared<PathDiagnosticEventPiece>(
+            L, "Loop body executed 0 times");
+        PE->setPrunable(true);
+
+        EB.addEdge(PE->getLocation(), true);
+        PD.getActivePath().push_front(std::move(PE));
+      }
+
+      // In any case, add the terminator as the current statement
+      // context for control edges.
+      EB.addContext(Term);
+    }
+  } else if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) {
+    Optional<CFGElement> First = BE->getFirstElement();
+    if (Optional<CFGStmt> S = First ? First->getAs<CFGStmt>() : None) {
+      const Stmt *stmt = S->getStmt();
+      if (IsControlFlowExpr(stmt)) {
+        // Add the proper context for '&&', '||', and '?'.
+        EB.addContext(stmt);
+      }
+      else
+        EB.addExtendedContext(PDB.getEnclosingStmtLocation(stmt).asStmt());
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Top-level logic for generating extensive path diagnostics.
 //===----------------------------------------------------------------------===//
@@ -1410,182 +1578,27 @@
     LocationContextMap &LCM,
     ArrayRef<std::unique_ptr<BugReporterVisitor>> visitors) {
   EdgeBuilder EB(PD, PDB);
-  const SourceManager& SM = PDB.getSourceManager();
   StackDiagVector CallStack;
-  InterestingExprs IE;
 
+  InterestingExprs IE;
   const ExplodedNode *NextNode = N->pred_empty() ? nullptr : *(N->pred_begin());
   while (NextNode) {
     N = NextNode;
     NextNode = N->getFirstPred();
-    ProgramPoint P = N->getLocation();
 
-    do {
-      if (Optional<PostStmt> PS = P.getAs<PostStmt>()) {
-        if (const Expr *Ex = PS->getStmtAs<Expr>())
-          reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
-                                              N->getState().get(), Ex,
-                                              N->getLocationContext());
-      }
-
-      if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
-        const Stmt *S = CE->getCalleeContext()->getCallSite();
-        if (const Expr *Ex = dyn_cast_or_null<Expr>(S)) {
-            reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
-                                                N->getState().get(), Ex,
-                                                N->getLocationContext());
-        }
-
-        auto C = PathDiagnosticCallPiece::construct(N, *CE, SM);
-        LCM[&C->path] = CE->getCalleeContext();
-
-        EB.addEdge(C->callReturn, /*AlwaysAdd=*/true, /*IsPostJump=*/true);
-        EB.flushLocations();
-
-        auto *P = C.get();
-        PD.getActivePath().push_front(std::move(C));
-        PD.pushActivePath(&P->path);
-        CallStack.push_back(StackDiagPair(P, N));
-        break;
-      }
-
-      // Pop the call hierarchy if we are done walking the contents
-      // of a function call.
-      if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
-        // Add an edge to the start of the function.
-        const Decl *D = CE->getCalleeContext()->getDecl();
-        PathDiagnosticLocation pos =
-          PathDiagnosticLocation::createBegin(D, SM);
-        EB.addEdge(pos);
-
-        // Flush all locations, and pop the active path.
-        bool VisitedEntireCall = PD.isWithinCall();
-        EB.flushLocations();
-        PD.popActivePath();
-        PDB.LC = N->getLocationContext();
-
-        // Either we just added a bunch of stuff to the top-level path, or
-        // we have a previous CallExitEnd.  If the former, it means that the
-        // path terminated within a function call.  We must then take the
-        // current contents of the active path and place it within
-        // a new PathDiagnosticCallPiece.
-        PathDiagnosticCallPiece *C;
-        if (VisitedEntireCall) {
-          C = cast<PathDiagnosticCallPiece>(PD.getActivePath().front().get());
-        } else {
-          const Decl *Caller = CE->getLocationContext()->getDecl();
-          C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
-          LCM[&C->path] = CE->getCalleeContext();
-        }
-
-        C->setCallee(*CE, SM);
-        EB.addContext(C->getLocation());
-
-        if (!CallStack.empty()) {
-          assert(CallStack.back().first == C);
-          CallStack.pop_back();
-        }
-        break;
-      }
-
-      // Note that is important that we update the LocationContext
-      // after looking at CallExits.  CallExit basically adds an
-      // edge in the *caller*, so we don't want to update the LocationContext
-      // too soon.
-      PDB.LC = N->getLocationContext();
-
-      // Block edges.
-      if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
-        // Does this represent entering a call?  If so, look at propagating
-        // interesting symbols across call boundaries.
-        if (NextNode) {
-          const LocationContext *CallerCtx = NextNode->getLocationContext();
-          const LocationContext *CalleeCtx = PDB.LC;
-          if (CallerCtx != CalleeCtx) {
-            reversePropagateInterestingSymbols(*PDB.getBugReport(), IE,
-                                               N->getState().get(),
-                                               CalleeCtx, CallerCtx);
-          }
-        }
-
-        // Are we jumping to the head of a loop?  Add a special diagnostic.
-        if (const Stmt *Loop = BE->getSrc()->getLoopTarget()) {
-          PathDiagnosticLocation L(Loop, SM, PDB.LC);
-          const CompoundStmt *CS = nullptr;
-
-          if (const ForStmt *FS = dyn_cast<ForStmt>(Loop))
-            CS = dyn_cast<CompoundStmt>(FS->getBody());
-          else if (const WhileStmt *WS = dyn_cast<WhileStmt>(Loop))
-            CS = dyn_cast<CompoundStmt>(WS->getBody());
-
-          auto p = std::make_shared<PathDiagnosticEventPiece>(
-              L, "Looping back to the head of the loop");
-          p->setPrunable(true);
-
-          EB.addEdge(p->getLocation(), true);
-          PD.getActivePath().push_front(std::move(p));
-
-          if (CS) {
-            PathDiagnosticLocation BL =
-              PathDiagnosticLocation::createEndBrace(CS, SM);
-            EB.addEdge(BL);
-          }
-        }
-
-        const CFGBlock *BSrc = BE->getSrc();
-        ParentMap &PM = PDB.getParentMap();
-
-        if (const Stmt *Term = BSrc->getTerminator()) {
-          // Are we jumping past the loop body without ever executing the
-          // loop (because the condition was false)?
-          if (isLoopJumpPastBody(Term, &*BE) &&
-              !isInLoopBody(PM,
-                            getStmtBeforeCond(PM,
-                                              BSrc->getTerminatorCondition(),
-                                              N),
-                            Term)) {
-            PathDiagnosticLocation L(Term, SM, PDB.LC);
-            auto PE = std::make_shared<PathDiagnosticEventPiece>(
-                L, "Loop body executed 0 times");
-            PE->setPrunable(true);
-
-            EB.addEdge(PE->getLocation(), true);
-            PD.getActivePath().push_front(std::move(PE));
-          }
-
-          // In any case, add the terminator as the current statement
-          // context for control edges.
-          EB.addContext(Term);
-        }
-
-        break;
-      }
-
-      if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) {
-        Optional<CFGElement> First = BE->getFirstElement();
-        if (Optional<CFGStmt> S = First ? First->getAs<CFGStmt>() : None) {
-          const Stmt *stmt = S->getStmt();
-          if (IsControlFlowExpr(stmt)) {
-            // Add the proper context for '&&', '||', and '?'.
-            EB.addContext(stmt);
-          }
-          else
-            EB.addExtendedContext(PDB.getEnclosingStmtLocation(stmt).asStmt());
-        }
-
-        break;
-      }
-
-
-    } while (0);
+    generateExtensiveDiagnosticsForNode(N, PDB, LCM, EB, CallStack, PD, IE);
 
     if (!NextNode)
       continue;
 
     // Add pieces from custom visitors.
     BugReport *R = PDB.getBugReport();
+    llvm::FoldingSet<PathDiagnosticPiece> DeduplicationSet;
     for (auto &V : visitors) {
       if (auto p = V->VisitNode(N, NextNode, PDB, *R)) {
+        if (DeduplicationSet.GetOrInsertNode(p.get()) != p.get())
+          continue;
+
         const PathDiagnosticLocation &Loc = p->getLocation();
         EB.addEdge(Loc, true);
         updateStackPiecesWithMessage(*p, CallStack);
@@ -1644,6 +1657,223 @@
 static const char StrLoopCollectionEmpty[] =
   "Loop body skipped when collection is empty";
 
+/// Generate alternate-extensive diagnostics for the node \p N,
+/// and write it into \p PD.
+static void generateAlternateExtensiveDiagnosticsForNode(const ExplodedNode *N,
+      PathDiagnostic &PD,
+      PathDiagnosticLocation &PrevLoc,
+      PathDiagnosticBuilder &PDB,
+      LocationContextMap &LCM,
+      StackDiagVector &CallStack,
+      InterestingExprs &IE) {
+  const ExplodedNode *NextNode = N->getFirstPred();
+  ProgramPoint P = N->getLocation();
+  const SourceManager& SM = PDB.getSourceManager();
+
+  // Have we encountered an entrance to a call?  It may be
+  // the case that we have not encountered a matching
+  // call exit before this point.  This means that the path
+  // terminated within the call itself.
+  if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
+    // Add an edge to the start of the function.
+    const StackFrameContext *CalleeLC = CE->getCalleeContext();
+    const Decl *D = CalleeLC->getDecl();
+    // Add the edge only when the callee has body. We jump to the beginning
+    // of the *declaration*, however we expect it to be followed by the
+    // body. This isn't the case for autosynthesized property accessors in
+    // Objective-C. No need for a similar extra check for CallExit points
+    // because the exit edge comes from a statement (i.e. return),
+    // not from declaration.
+    if (D->hasBody())
+      addEdgeToPath(PD.getActivePath(), PrevLoc,
+          PathDiagnosticLocation::createBegin(D, SM), CalleeLC);
+
+    // Did we visit an entire call?
+    bool VisitedEntireCall = PD.isWithinCall();
+    PD.popActivePath();
+
+    PathDiagnosticCallPiece *C;
+    if (VisitedEntireCall) {
+      PathDiagnosticPiece *P = PD.getActivePath().front().get();
+      C = cast<PathDiagnosticCallPiece>(P);
+    } else {
+      const Decl *Caller = CE->getLocationContext()->getDecl();
+      C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
+
+      // Since we just transferred the path over to the call piece,
+      // reset the mapping from active to location context.
+      assert(PD.getActivePath().size() == 1 &&
+          PD.getActivePath().front().get() == C);
+      LCM[&PD.getActivePath()] = nullptr;
+
+      // Record the location context mapping for the path within
+      // the call.
+      assert(LCM[&C->path] == nullptr ||
+          LCM[&C->path] == CE->getCalleeContext());
+      LCM[&C->path] = CE->getCalleeContext();
+
+      // If this is the first item in the active path, record
+      // the new mapping from active path to location context.
+      const LocationContext *&NewLC = LCM[&PD.getActivePath()];
+      if (!NewLC)
+        NewLC = N->getLocationContext();
+
+      PDB.LC = NewLC;
+    }
+    C->setCallee(*CE, SM);
+
+    // Update the previous location in the active path.
+    PrevLoc = C->getLocation();
+
+    if (!CallStack.empty()) {
+      assert(CallStack.back().first == C);
+      CallStack.pop_back();
+    }
+    return;
+  }
+
+  // Query the location context here and the previous location
+  // as processing CallEnter may change the active path.
+  PDB.LC = N->getLocationContext();
+
+  // Record the mapping from the active path to the location
+  // context.
+  assert(!LCM[&PD.getActivePath()] ||
+      LCM[&PD.getActivePath()] == PDB.LC);
+  LCM[&PD.getActivePath()] = PDB.LC;
+
+  // Have we encountered an exit from a function call?
+  if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
+    const Stmt *S = CE->getCalleeContext()->getCallSite();
+    // Propagate the interesting symbols accordingly.
+    if (const Expr *Ex = dyn_cast_or_null<Expr>(S)) {
+      reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
+          N->getState().get(), Ex,
+          N->getLocationContext());
+    }
+
+    // We are descending into a call (backwards).  Construct
+    // a new call piece to contain the path pieces for that call.
+    auto C = PathDiagnosticCallPiece::construct(N, *CE, SM);
+
+    // Record the location context for this call piece.
+    LCM[&C->path] = CE->getCalleeContext();
+
+    // Add the edge to the return site.
+    addEdgeToPath(PD.getActivePath(), PrevLoc, C->callReturn, PDB.LC);
+    auto *P = C.get();
+    PD.getActivePath().push_front(std::move(C));
+    PrevLoc.invalidate();
+
+    // Make the contents of the call the active path for now.
+    PD.pushActivePath(&P->path);
+    CallStack.push_back(StackDiagPair(P, N));
+  } else if (Optional<PostStmt> PS = P.getAs<PostStmt>()) {
+    // For expressions, make sure we propagate the
+    // interesting symbols correctly.
+    if (const Expr *Ex = PS->getStmtAs<Expr>())
+      reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
+          N->getState().get(), Ex,
+          N->getLocationContext());
+
+    // Add an edge.  If this is an ObjCForCollectionStmt do
+    // not add an edge here as it appears in the CFG both
+    // as a terminator and as a terminator condition.
+    if (!isa<ObjCForCollectionStmt>(PS->getStmt())) {
+      PathDiagnosticLocation L =
+        PathDiagnosticLocation(PS->getStmt(), SM, PDB.LC);
+      addEdgeToPath(PD.getActivePath(), PrevLoc, L, PDB.LC);
+    }
+  } else if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
+    // Does this represent entering a call?  If so, look at propagating
+    // interesting symbols across call boundaries.
+    if (NextNode) {
+      const LocationContext *CallerCtx = NextNode->getLocationContext();
+      const LocationContext *CalleeCtx = PDB.LC;
+      if (CallerCtx != CalleeCtx) {
+        reversePropagateInterestingSymbols(*PDB.getBugReport(), IE,
+            N->getState().get(),
+            CalleeCtx, CallerCtx);
+      }
+    }
+
+    // Are we jumping to the head of a loop?  Add a special diagnostic.
+    if (const Stmt *Loop = BE->getSrc()->getLoopTarget()) {
+      PathDiagnosticLocation L(Loop, SM, PDB.LC);
+      const Stmt *Body = nullptr;
+
+      if (const ForStmt *FS = dyn_cast<ForStmt>(Loop))
+        Body = FS->getBody();
+      else if (const WhileStmt *WS = dyn_cast<WhileStmt>(Loop))
+        Body = WS->getBody();
+      else if (const ObjCForCollectionStmt *OFS =
+          dyn_cast<ObjCForCollectionStmt>(Loop)) {
+        Body = OFS->getBody();
+      } else if (const CXXForRangeStmt *FRS =
+          dyn_cast<CXXForRangeStmt>(Loop)) {
+        Body = FRS->getBody();
+      }
+      // do-while statements are explicitly excluded here
+
+      auto p = std::make_shared<PathDiagnosticEventPiece>(
+          L, "Looping back to the head "
+          "of the loop");
+      p->setPrunable(true);
+
+      addEdgeToPath(PD.getActivePath(), PrevLoc, p->getLocation(), PDB.LC);
+      PD.getActivePath().push_front(std::move(p));
+
+      if (const CompoundStmt *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
+        addEdgeToPath(PD.getActivePath(), PrevLoc,
+            PathDiagnosticLocation::createEndBrace(CS, SM),
+            PDB.LC);
+      }
+    }
+
+    const CFGBlock *BSrc = BE->getSrc();
+    ParentMap &PM = PDB.getParentMap();
+
+    if (const Stmt *Term = BSrc->getTerminator()) {
+      // Are we jumping past the loop body without ever executing the
+      // loop (because the condition was false)?
+      if (isLoop(Term)) {
+        const Stmt *TermCond = getTerminatorCondition(BSrc);
+        bool IsInLoopBody =
+          isInLoopBody(PM, getStmtBeforeCond(PM, TermCond, N), Term);
+
+        const char *str = nullptr;
+
+        if (isJumpToFalseBranch(&*BE)) {
+          if (!IsInLoopBody) {
+            if (isa<ObjCForCollectionStmt>(Term)) {
+              str = StrLoopCollectionEmpty;
+            } else if (isa<CXXForRangeStmt>(Term)) {
+              str = StrLoopRangeEmpty;
+            } else {
+              str = StrLoopBodyZero;
+            }
+          }
+        } else {
+          str = StrEnteringLoop;
+        }
+
+        if (str) {
+          PathDiagnosticLocation L(TermCond ? TermCond : Term, SM, PDB.LC);
+          auto PE = std::make_shared<PathDiagnosticEventPiece>(L, str);
+          PE->setPrunable(true);
+          addEdgeToPath(PD.getActivePath(), PrevLoc,
+              PE->getLocation(), PDB.LC);
+          PD.getActivePath().push_front(std::move(PE));
+        }
+      } else if (isa<BreakStmt>(Term) || isa<ContinueStmt>(Term) ||
+          isa<GotoStmt>(Term)) {
+        PathDiagnosticLocation L(Term, SM, PDB.LC);
+        addEdgeToPath(PD.getActivePath(), PrevLoc, L, PDB.LC);
+      }
+    }
+  }
+}
+
 static bool GenerateAlternateExtensivePathDiagnostic(
     PathDiagnostic &PD, PathDiagnosticBuilder &PDB, const ExplodedNode *N,
     LocationContextMap &LCM,
@@ -1660,227 +1890,20 @@
   while (NextNode) {
     N = NextNode;
     NextNode = N->getFirstPred();
-    ProgramPoint P = N->getLocation();
 
-    do {
-      // Have we encountered an entrance to a call?  It may be
-      // the case that we have not encountered a matching
-      // call exit before this point.  This means that the path
-      // terminated within the call itself.
-      if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
-        // Add an edge to the start of the function.
-        const StackFrameContext *CalleeLC = CE->getCalleeContext();
-        const Decl *D = CalleeLC->getDecl();
-        // Add the edge only when the callee has body. We jump to the beginning
-        // of the *declaration*, however we expect it to be followed by the
-        // body. This isn't the case for autosynthesized property accessors in
-        // Objective-C. No need for a similar extra check for CallExit points
-        // because the exit edge comes from a statement (i.e. return),
-        // not from declaration.
-        if (D->hasBody())
-          addEdgeToPath(PD.getActivePath(), PrevLoc,
-                        PathDiagnosticLocation::createBegin(D, SM), CalleeLC);
-
-        // Did we visit an entire call?
-        bool VisitedEntireCall = PD.isWithinCall();
-        PD.popActivePath();
-
-        PathDiagnosticCallPiece *C;
-        if (VisitedEntireCall) {
-          PathDiagnosticPiece *P = PD.getActivePath().front().get();
-          C = cast<PathDiagnosticCallPiece>(P);
-        } else {
-          const Decl *Caller = CE->getLocationContext()->getDecl();
-          C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller);
-
-          // Since we just transferred the path over to the call piece,
-          // reset the mapping from active to location context.
-          assert(PD.getActivePath().size() == 1 &&
-                 PD.getActivePath().front().get() == C);
-          LCM[&PD.getActivePath()] = nullptr;
-
-          // Record the location context mapping for the path within
-          // the call.
-          assert(LCM[&C->path] == nullptr ||
-                 LCM[&C->path] == CE->getCalleeContext());
-          LCM[&C->path] = CE->getCalleeContext();
-
-          // If this is the first item in the active path, record
-          // the new mapping from active path to location context.
-          const LocationContext *&NewLC = LCM[&PD.getActivePath()];
-          if (!NewLC)
-            NewLC = N->getLocationContext();
-
-          PDB.LC = NewLC;
-        }
-        C->setCallee(*CE, SM);
-
-        // Update the previous location in the active path.
-        PrevLoc = C->getLocation();
-
-        if (!CallStack.empty()) {
-          assert(CallStack.back().first == C);
-          CallStack.pop_back();
-        }
-        break;
-      }
-
-      // Query the location context here and the previous location
-      // as processing CallEnter may change the active path.
-      PDB.LC = N->getLocationContext();
-
-      // Record the mapping from the active path to the location
-      // context.
-      assert(!LCM[&PD.getActivePath()] ||
-             LCM[&PD.getActivePath()] == PDB.LC);
-      LCM[&PD.getActivePath()] = PDB.LC;
-
-      // Have we encountered an exit from a function call?
-      if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
-        const Stmt *S = CE->getCalleeContext()->getCallSite();
-        // Propagate the interesting symbols accordingly.
-        if (const Expr *Ex = dyn_cast_or_null<Expr>(S)) {
-          reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
-                                              N->getState().get(), Ex,
-                                              N->getLocationContext());
-        }
-
-        // We are descending into a call (backwards).  Construct
-        // a new call piece to contain the path pieces for that call.
-        auto C = PathDiagnosticCallPiece::construct(N, *CE, SM);
-
-        // Record the location context for this call piece.
-        LCM[&C->path] = CE->getCalleeContext();
-
-        // Add the edge to the return site.
-        addEdgeToPath(PD.getActivePath(), PrevLoc, C->callReturn, PDB.LC);
-        auto *P = C.get();
-        PD.getActivePath().push_front(std::move(C));
-        PrevLoc.invalidate();
-
-        // Make the contents of the call the active path for now.
-        PD.pushActivePath(&P->path);
-        CallStack.push_back(StackDiagPair(P, N));
-        break;
-      }
-
-      if (Optional<PostStmt> PS = P.getAs<PostStmt>()) {
-        // For expressions, make sure we propagate the
-        // interesting symbols correctly.
-        if (const Expr *Ex = PS->getStmtAs<Expr>())
-          reversePropagateIntererstingSymbols(*PDB.getBugReport(), IE,
-                                              N->getState().get(), Ex,
-                                              N->getLocationContext());
-
-        // Add an edge.  If this is an ObjCForCollectionStmt do
-        // not add an edge here as it appears in the CFG both
-        // as a terminator and as a terminator condition.
-        if (!isa<ObjCForCollectionStmt>(PS->getStmt())) {
-          PathDiagnosticLocation L =
-            PathDiagnosticLocation(PS->getStmt(), SM, PDB.LC);
-          addEdgeToPath(PD.getActivePath(), PrevLoc, L, PDB.LC);
-        }
-        break;
-      }
-
-      // Block edges.
-      if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
-        // Does this represent entering a call?  If so, look at propagating
-        // interesting symbols across call boundaries.
-        if (NextNode) {
-          const LocationContext *CallerCtx = NextNode->getLocationContext();
-          const LocationContext *CalleeCtx = PDB.LC;
-          if (CallerCtx != CalleeCtx) {
-            reversePropagateInterestingSymbols(*PDB.getBugReport(), IE,
-                                               N->getState().get(),
-                                               CalleeCtx, CallerCtx);
-          }
-        }
-
-        // Are we jumping to the head of a loop?  Add a special diagnostic.
-        if (const Stmt *Loop = BE->getSrc()->getLoopTarget()) {
-          PathDiagnosticLocation L(Loop, SM, PDB.LC);
-          const Stmt *Body = nullptr;
-
-          if (const ForStmt *FS = dyn_cast<ForStmt>(Loop))
-            Body = FS->getBody();
-          else if (const WhileStmt *WS = dyn_cast<WhileStmt>(Loop))
-            Body = WS->getBody();
-          else if (const ObjCForCollectionStmt *OFS =
-                     dyn_cast<ObjCForCollectionStmt>(Loop)) {
-            Body = OFS->getBody();
-          } else if (const CXXForRangeStmt *FRS =
-                       dyn_cast<CXXForRangeStmt>(Loop)) {
-            Body = FRS->getBody();
-          }
-          // do-while statements are explicitly excluded here
-
-          auto p = std::make_shared<PathDiagnosticEventPiece>(
-              L, "Looping back to the head "
-                 "of the loop");
-          p->setPrunable(true);
-
-          addEdgeToPath(PD.getActivePath(), PrevLoc, p->getLocation(), PDB.LC);
-          PD.getActivePath().push_front(std::move(p));
-
-          if (const CompoundStmt *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
-            addEdgeToPath(PD.getActivePath(), PrevLoc,
-                          PathDiagnosticLocation::createEndBrace(CS, SM),
-                          PDB.LC);
-          }
-        }
-
-        const CFGBlock *BSrc = BE->getSrc();
-        ParentMap &PM = PDB.getParentMap();
-
-        if (const Stmt *Term = BSrc->getTerminator()) {
-          // Are we jumping past the loop body without ever executing the
-          // loop (because the condition was false)?
-          if (isLoop(Term)) {
-            const Stmt *TermCond = getTerminatorCondition(BSrc);
-            bool IsInLoopBody =
-              isInLoopBody(PM, getStmtBeforeCond(PM, TermCond, N), Term);
-
-            const char *str = nullptr;
-
-            if (isJumpToFalseBranch(&*BE)) {
-              if (!IsInLoopBody) {
-                if (isa<ObjCForCollectionStmt>(Term)) {
-                  str = StrLoopCollectionEmpty;
-                } else if (isa<CXXForRangeStmt>(Term)) {
-                  str = StrLoopRangeEmpty;
-                } else {
-                  str = StrLoopBodyZero;
-                }
-              }
-            } else {
-              str = StrEnteringLoop;
-            }
-
-            if (str) {
-              PathDiagnosticLocation L(TermCond ? TermCond : Term, SM, PDB.LC);
-              auto PE = std::make_shared<PathDiagnosticEventPiece>(L, str);
-              PE->setPrunable(true);
-              addEdgeToPath(PD.getActivePath(), PrevLoc,
-                            PE->getLocation(), PDB.LC);
-              PD.getActivePath().push_front(std::move(PE));
-            }
-          } else if (isa<BreakStmt>(Term) || isa<ContinueStmt>(Term) ||
-                     isa<GotoStmt>(Term)) {
-            PathDiagnosticLocation L(Term, SM, PDB.LC);
-            addEdgeToPath(PD.getActivePath(), PrevLoc, L, PDB.LC);
-          }
-        }
-        break;
-      }
-    } while (0);
+    generateAlternateExtensiveDiagnosticsForNode(
+        N, PD, PrevLoc, PDB, LCM, CallStack, IE);
 
     if (!NextNode)
       continue;
 
     // Add pieces from custom visitors.
+    llvm::FoldingSet<PathDiagnosticPiece> DeduplicationSet;
     for (auto &V : visitors) {
       if (auto p = V->VisitNode(N, NextNode, PDB, *report)) {
+        if (DeduplicationSet.GetOrInsertNode(p.get()) != p.get())
+          continue;
+
         addEdgeToPath(PD.getActivePath(), PrevLoc, p->getLocation(), PDB.LC);
         updateStackPiecesWithMessage(*p, CallStack);
         PD.getActivePath().push_front(std::move(p));
@@ -3498,6 +3521,84 @@
   }
 }
 
+/// Insert all lines participating in the function signature \p Signature
+/// into \p ExecutedLines.
+static void populateExecutedLinesWithFunctionSignature(
+    const Decl *Signature, SourceManager &SM,
+    std::unique_ptr<FilesToLineNumsMap> &ExecutedLines) {
+
+  SourceRange SignatureSourceRange;
+  const Stmt* Body = Signature->getBody();
+  if (auto FD = dyn_cast<FunctionDecl>(Signature)) {
+    SignatureSourceRange = FD->getSourceRange();
+  } else if (auto OD = dyn_cast<ObjCMethodDecl>(Signature)) {
+    SignatureSourceRange = OD->getSourceRange();
+  } else {
+    return;
+  }
+  SourceLocation Start = SignatureSourceRange.getBegin();
+  SourceLocation End = Body ? Body->getSourceRange().getBegin()
+    : SignatureSourceRange.getEnd();
+  unsigned StartLine = SM.getExpansionLineNumber(Start);
+  unsigned EndLine = SM.getExpansionLineNumber(End);
+
+  FileID FID = SM.getFileID(SM.getExpansionLoc(Start));
+  for (unsigned Line = StartLine; Line <= EndLine; Line++)
+    ExecutedLines->operator[](FID.getHashValue()).insert(Line);
+}
+
+static void populateExecutedLinesWithStmt(
+    const Stmt *S, SourceManager &SM,
+    std::unique_ptr<FilesToLineNumsMap> &ExecutedLines) {
+  SourceLocation Loc = S->getSourceRange().getBegin();
+  SourceLocation ExpansionLoc = SM.getExpansionLoc(Loc);
+  FileID FID = SM.getFileID(ExpansionLoc);
+  unsigned LineNo = SM.getExpansionLineNumber(ExpansionLoc);
+  ExecutedLines->operator[](FID.getHashValue()).insert(LineNo);
+}
+
+/// \return all executed lines including function signatures on the path
+/// starting from \p N.
+static std::unique_ptr<FilesToLineNumsMap>
+findExecutedLines(SourceManager &SM, const ExplodedNode *N) {
+  auto ExecutedLines = llvm::make_unique<FilesToLineNumsMap>();
+
+  while (N) {
+    if (N->getFirstPred() == nullptr) {
+
+      // First node: show signature of the entrance point.
+      const Decl *D = N->getLocationContext()->getDecl();
+      populateExecutedLinesWithFunctionSignature(D, SM, ExecutedLines);
+
+    } else if (auto CE = N->getLocationAs<CallEnter>()) {
+
+      // Inlined function: show signature.
+      const Decl* D = CE->getCalleeContext()->getDecl();
+      populateExecutedLinesWithFunctionSignature(D, SM, ExecutedLines);
+
+    } else if (const Stmt *S = PathDiagnosticLocation::getStmt(N)) {
+      populateExecutedLinesWithStmt(S, SM, ExecutedLines);
+
+      // Show extra context for some parent kinds.
+      const Stmt *P = N->getParentMap().getParent(S);
+
+      // The path exploration can die before the node with the associated
+      // return statement is generated, but we do want to show the whole
+      // return.
+      if (auto *RS = dyn_cast_or_null<ReturnStmt>(P)) {
+        populateExecutedLinesWithStmt(RS, SM, ExecutedLines);
+        P = N->getParentMap().getParent(RS);
+      }
+
+      if (P && (isa<SwitchCase>(P) || isa<LabelStmt>(P)))
+        populateExecutedLinesWithStmt(P, SM, ExecutedLines);
+    }
+
+    N = N->getFirstPred();
+  }
+  return ExecutedLines;
+}
+
 void BugReporter::FlushReport(BugReport *exampleReport,
                               PathDiagnosticConsumer &PD,
                               ArrayRef<BugReport*> bugReports) {
@@ -3506,13 +3607,13 @@
   // Probably doesn't make a difference in practice.
   BugType& BT = exampleReport->getBugType();
 
-  std::unique_ptr<PathDiagnostic> D(new PathDiagnostic(
+  auto D = llvm::make_unique<PathDiagnostic>(
       exampleReport->getBugType().getCheckName(),
       exampleReport->getDeclWithIssue(), exampleReport->getBugType().getName(),
       exampleReport->getDescription(),
       exampleReport->getShortDescription(/*Fallback=*/false), BT.getCategory(),
-      exampleReport->getUniqueingLocation(),
-      exampleReport->getUniqueingDecl()));
+      exampleReport->getUniqueingLocation(), exampleReport->getUniqueingDecl(),
+      findExecutedLines(getSourceManager(), exampleReport->getErrorNode()));
 
   if (exampleReport->isPathSensitive()) {
     // Generate the full path diagnostic, using the generation scheme
diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index 5f00b51..f5b4e7c 100644
--- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -11,7 +11,7 @@
 //  enhance the diagnostics reported for a bug.
 //
 //===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/Analysis/CFGStmtMap.h"
@@ -159,8 +159,380 @@
   return std::move(P);
 }
 
+/// \return name of the macro inside the location \p Loc.
+static StringRef getMacroName(SourceLocation Loc,
+    BugReporterContext &BRC) {
+  return Lexer::getImmediateMacroName(
+      Loc,
+      BRC.getSourceManager(),
+      BRC.getASTContext().getLangOpts());
+}
+
+/// \return Whether given spelling location corresponds to an expansion
+/// of a function-like macro.
+static bool isFunctionMacroExpansion(SourceLocation Loc,
+                                const SourceManager &SM) {
+  if (!Loc.isMacroID())
+    return false;
+  while (SM.isMacroArgExpansion(Loc))
+    Loc = SM.getImmediateExpansionRange(Loc).first;
+  std::pair<FileID, unsigned> TLInfo = SM.getDecomposedLoc(Loc);
+  SrcMgr::SLocEntry SE = SM.getSLocEntry(TLInfo.first);
+  const SrcMgr::ExpansionInfo &EInfo = SE.getExpansion();
+  return EInfo.isFunctionMacroExpansion();
+}
 
 namespace {
+
+/// Put a diagnostic on return statement of all inlined functions
+/// for which  the region of interest \p RegionOfInterest was passed into,
+/// but not written inside, and it has caused an undefined read or a null
+/// pointer dereference outside.
+class NoStoreFuncVisitor final
+    : public BugReporterVisitorImpl<NoStoreFuncVisitor> {
+
+  const SubRegion *RegionOfInterest;
+  static constexpr const char *DiagnosticsMsg =
+      "Returning without writing to '";
+  bool Initialized = false;
+
+  /// Frames writing into \c RegionOfInterest.
+  /// This visitor generates a note only if a function does not write into
+  /// a region of interest. This information is not immediately available
+  /// by looking at the node associated with the exit from the function
+  /// (usually the return statement). To avoid recomputing the same information
+  /// many times (going up the path for each node and checking whether the
+  /// region was written into) we instead pre-compute and store all
+  /// stack frames along the path which write into the region of interest
+  /// on the first \c VisitNode invocation.
+  llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingRegion;
+
+public:
+  NoStoreFuncVisitor(const SubRegion *R) : RegionOfInterest(R) {}
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    static int Tag = 0;
+    ID.AddPointer(&Tag);
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override {
+    if (!Initialized) {
+      findModifyingFrames(N);
+      Initialized = true;
+    }
+
+    const LocationContext *Ctx = N->getLocationContext();
+    const StackFrameContext *SCtx = Ctx->getCurrentStackFrame();
+    ProgramStateRef State = N->getState();
+    auto CallExitLoc = N->getLocationAs<CallExitBegin>();
+
+    // No diagnostic if region was modified inside the frame.
+    if (!CallExitLoc || FramesModifyingRegion.count(SCtx))
+      return nullptr;
+
+    CallEventRef<> Call =
+        BRC.getStateManager().getCallEventManager().getCaller(SCtx, State);
+
+    const PrintingPolicy &PP = BRC.getASTContext().getPrintingPolicy();
+    const SourceManager &SM = BRC.getSourceManager();
+    if (auto *CCall = dyn_cast<CXXConstructorCall>(Call)) {
+      const MemRegion *ThisRegion = CCall->getCXXThisVal().getAsRegion();
+      if (RegionOfInterest->isSubRegionOf(ThisRegion) &&
+          !CCall->getDecl()->isImplicit())
+        return notModifiedInConstructorDiagnostics(Ctx, SM, PP, *CallExitLoc,
+                                                   CCall, ThisRegion);
+    }
+
+    ArrayRef<ParmVarDecl *> parameters = getCallParameters(Call);
+    for (unsigned I = 0, E = Call->getNumArgs(); I != E; ++I) {
+      const ParmVarDecl *PVD = parameters[I];
+      SVal S = Call->getArgSVal(I);
+      unsigned IndirectionLevel = 1;
+      QualType T = PVD->getType();
+      while (const MemRegion *R = S.getAsRegion()) {
+        if (RegionOfInterest->isSubRegionOf(R) &&
+            !isPointerToConst(PVD->getType()))
+          return notModifiedDiagnostics(
+              Ctx, SM, PP, *CallExitLoc, Call, PVD, R, IndirectionLevel);
+        QualType PT = T->getPointeeType();
+        if (PT.isNull() || PT->isVoidType()) break;
+        S = State->getSVal(R, PT);
+        T = PT;
+        IndirectionLevel++;
+      }
+    }
+
+    return nullptr;
+  }
+
+private:
+  /// Write to \c FramesModifyingRegion all stack frames along
+  /// the path which modify \c RegionOfInterest.
+  void findModifyingFrames(const ExplodedNode *N) {
+    ProgramStateRef LastReturnState;
+    do {
+      ProgramStateRef State = N->getState();
+      auto CallExitLoc = N->getLocationAs<CallExitBegin>();
+      if (CallExitLoc) {
+        LastReturnState = State;
+      }
+      if (LastReturnState &&
+          wasRegionOfInterestModifiedAt(N, LastReturnState)) {
+        const StackFrameContext *SCtx =
+            N->getLocationContext()->getCurrentStackFrame();
+        while (!SCtx->inTopFrame()) {
+          auto p = FramesModifyingRegion.insert(SCtx);
+          if (!p.second)
+            break; // Frame and all its parents already inserted.
+          SCtx = SCtx->getParent()->getCurrentStackFrame();
+        }
+      }
+
+      N = N->getFirstPred();
+    } while (N);
+  }
+
+  /// \return Whether \c RegionOfInterest was modified at \p N,
+  /// where \p ReturnState is a state associated with the return
+  /// from the current frame.
+  bool wasRegionOfInterestModifiedAt(const ExplodedNode *N,
+                                     ProgramStateRef ReturnState) {
+    SVal ValueAtReturn = ReturnState->getSVal(RegionOfInterest);
+
+    // Writing into region of interest.
+    if (auto PS = N->getLocationAs<PostStmt>())
+      if (auto *BO = PS->getStmtAs<BinaryOperator>())
+        if (BO->isAssignmentOp() && RegionOfInterest->isSubRegionOf(
+                                        N->getSVal(BO->getLHS()).getAsRegion()))
+          return true;
+
+    // SVal after the state is possibly different.
+    SVal ValueAtN = N->getState()->getSVal(RegionOfInterest);
+    if (!ReturnState->areEqual(ValueAtN, ValueAtReturn).isConstrainedTrue() &&
+        (!ValueAtN.isUndef() || !ValueAtReturn.isUndef()))
+      return true;
+
+    return false;
+  }
+
+  /// Get parameters associated with runtime definition in order
+  /// to get the correct parameter name.
+  ArrayRef<ParmVarDecl *> getCallParameters(CallEventRef<> Call) {
+    // Use runtime definition, if available.
+    RuntimeDefinition RD = Call->getRuntimeDefinition();
+    if (auto *FD = dyn_cast_or_null<FunctionDecl>(RD.getDecl()))
+      return FD->parameters();
+
+    return Call->parameters();
+  }
+
+  /// \return whether \p Ty points to a const type, or is a const reference.
+  bool isPointerToConst(QualType Ty) {
+    return !Ty->getPointeeType().isNull() &&
+           Ty->getPointeeType().getCanonicalType().isConstQualified();
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> notModifiedInConstructorDiagnostics(
+      const LocationContext *Ctx,
+      const SourceManager &SM,
+      const PrintingPolicy &PP,
+      CallExitBegin &CallExitLoc,
+      const CXXConstructorCall *Call,
+      const MemRegion *ArgRegion) {
+
+    SmallString<256> sbuf;
+    llvm::raw_svector_ostream os(sbuf);
+    os << DiagnosticsMsg;
+    bool out = prettyPrintRegionName(
+        "this", "->", /*IsReference=*/true,
+        /*IndirectionLevel=*/1, ArgRegion, os, PP);
+
+    // Return nothing if we have failed to pretty-print.
+    if (!out)
+      return nullptr;
+
+    os << "'";
+    PathDiagnosticLocation L =
+        getPathDiagnosticLocation(nullptr, SM, Ctx, Call);
+    return std::make_shared<PathDiagnosticEventPiece>(L, os.str());
+  }
+
+  /// \p IndirectionLevel How many times \c ArgRegion has to be dereferenced
+  /// before we get to the super region of \c RegionOfInterest
+  std::shared_ptr<PathDiagnosticPiece>
+  notModifiedDiagnostics(const LocationContext *Ctx,
+                         const SourceManager &SM,
+                         const PrintingPolicy &PP,
+                         CallExitBegin &CallExitLoc,
+                         CallEventRef<> Call,
+                         const ParmVarDecl *PVD,
+                         const MemRegion *ArgRegion,
+                         unsigned IndirectionLevel) {
+
+    PathDiagnosticLocation L = getPathDiagnosticLocation(
+        CallExitLoc.getReturnStmt(), SM, Ctx, Call);
+    SmallString<256> sbuf;
+    llvm::raw_svector_ostream os(sbuf);
+    os << DiagnosticsMsg;
+    bool IsReference = PVD->getType()->isReferenceType();
+    const char *Sep = IsReference && IndirectionLevel == 1 ? "." : "->";
+    bool Success = prettyPrintRegionName(
+        PVD->getQualifiedNameAsString().c_str(),
+        Sep, IsReference, IndirectionLevel, ArgRegion, os, PP);
+
+    // Print the parameter name if the pretty-printing has failed.
+    if (!Success)
+      PVD->printQualifiedName(os);
+    os << "'";
+    return std::make_shared<PathDiagnosticEventPiece>(L, os.str());
+  }
+
+  /// \return a path diagnostic location for the optionally
+  /// present return statement \p RS.
+  PathDiagnosticLocation getPathDiagnosticLocation(const ReturnStmt *RS,
+                                                   const SourceManager &SM,
+                                                   const LocationContext *Ctx,
+                                                   CallEventRef<> Call) {
+    if (RS)
+      return PathDiagnosticLocation::createBegin(RS, SM, Ctx);
+    return PathDiagnosticLocation(
+        Call->getRuntimeDefinition().getDecl()->getSourceRange().getEnd(), SM);
+  }
+
+  /// Pretty-print region \p ArgRegion starting from parent to \p os.
+  /// \return whether printing has succeeded
+  bool prettyPrintRegionName(const char *TopRegionName,
+                             const char *Sep,
+                             bool IsReference,
+                             int IndirectionLevel,
+                             const MemRegion *ArgRegion,
+                             llvm::raw_svector_ostream &os,
+                             const PrintingPolicy &PP) {
+    SmallVector<const MemRegion *, 5> Subregions;
+    const MemRegion *R = RegionOfInterest;
+    while (R != ArgRegion) {
+      if (!(isa<FieldRegion>(R) || isa<CXXBaseObjectRegion>(R)))
+        return false; // Pattern-matching failed.
+      Subregions.push_back(R);
+      R = dyn_cast<SubRegion>(R)->getSuperRegion();
+    }
+    bool IndirectReference = !Subregions.empty();
+
+    if (IndirectReference)
+      IndirectionLevel--; // Due to "->" symbol.
+
+    if (IsReference)
+      IndirectionLevel--; // Due to reference semantics.
+
+    bool ShouldSurround = IndirectReference && IndirectionLevel > 0;
+
+    if (ShouldSurround)
+      os << "(";
+    for (int i=0; i<IndirectionLevel; i++)
+      os << "*";
+    os << TopRegionName;
+    if (ShouldSurround)
+      os << ")";
+
+    for (auto I = Subregions.rbegin(), E = Subregions.rend(); I != E; ++I) {
+      if (auto *FR = dyn_cast<FieldRegion>(*I)) {
+        os << Sep;
+        FR->getDecl()->getDeclName().print(os, PP);
+        Sep = ".";
+      } else if (isa<CXXBaseObjectRegion>(*I)) {
+        continue; // Just keep going up to the base region.
+      } else {
+        llvm_unreachable("Previous check has missed an unexpected region");
+      }
+    }
+    return true;
+  }
+};
+
+} // namespace
+
+namespace {
+
+class MacroNullReturnSuppressionVisitor final
+    : public BugReporterVisitorImpl<MacroNullReturnSuppressionVisitor> {
+
+  const SubRegion *RegionOfInterest;
+
+public:
+  MacroNullReturnSuppressionVisitor(const SubRegion *R) : RegionOfInterest(R) {}
+
+  static void *getTag() {
+    static int Tag = 0;
+    return static_cast<void *>(&Tag);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.AddPointer(getTag());
+  }
+
+  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
+                                                 const ExplodedNode *PrevN,
+                                                 BugReporterContext &BRC,
+                                                 BugReport &BR) override {
+    auto BugPoint = BR.getErrorNode()->getLocation().getAs<StmtPoint>();
+    if (!BugPoint)
+      return nullptr;
+
+    const SourceManager &SMgr = BRC.getSourceManager();
+    if (auto Loc = matchAssignment(N, BRC)) {
+      if (isFunctionMacroExpansion(*Loc, SMgr)) {
+        std::string MacroName = getMacroName(*Loc, BRC);
+        SourceLocation BugLoc = BugPoint->getStmt()->getLocStart();
+        if (!BugLoc.isMacroID() || getMacroName(BugLoc, BRC) != MacroName)
+          BR.markInvalid(getTag(), MacroName.c_str());
+      }
+    }
+    return nullptr;
+  }
+
+  static void addMacroVisitorIfNecessary(
+        const ExplodedNode *N, const MemRegion *R,
+        bool EnableNullFPSuppression, BugReport &BR,
+        const SVal V) {
+    AnalyzerOptions &Options = N->getState()->getStateManager()
+        .getOwningEngine()->getAnalysisManager().options;
+    if (EnableNullFPSuppression && Options.shouldSuppressNullReturnPaths()
+          && V.getAs<Loc>())
+      BR.addVisitor(llvm::make_unique<MacroNullReturnSuppressionVisitor>(
+              R->getAs<SubRegion>()));
+  }
+
+private:
+  /// \return Source location of right hand side of an assignment
+  /// into \c RegionOfInterest, empty optional if none found.
+  Optional<SourceLocation> matchAssignment(const ExplodedNode *N,
+                                           BugReporterContext &BRC) {
+    const Stmt *S = PathDiagnosticLocation::getStmt(N);
+    ProgramStateRef State = N->getState();
+    auto *LCtx = N->getLocationContext();
+    if (!S)
+      return None;
+
+    if (auto *DS = dyn_cast<DeclStmt>(S)) {
+      if (const VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl()))
+        if (const Expr *RHS = VD->getInit())
+          if (RegionOfInterest->isSubRegionOf(
+                  State->getLValue(VD, LCtx).getAsRegion()))
+            return RHS->getLocStart();
+    } else if (auto *BO = dyn_cast<BinaryOperator>(S)) {
+      const MemRegion *R = N->getSVal(BO->getLHS()).getAsRegion();
+      const Expr *RHS = BO->getRHS();
+      if (BO->isAssignmentOp() && RegionOfInterest->isSubRegionOf(R)) {
+        return RHS->getLocStart();
+      }
+    }
+    return None;
+  }
+};
+
 /// Emits an extra note at the return statement of an interesting stack frame.
 ///
 /// The returned value is marked as an interesting value, and if it's null,
@@ -235,7 +607,7 @@
 
     // Check the return value.
     ProgramStateRef State = Node->getState();
-    SVal RetVal = State->getSVal(S, Node->getLocationContext());
+    SVal RetVal = Node->getSVal(S);
 
     // Handle cases where a reference is returned and then immediately used.
     if (cast<Expr>(S)->isGLValue())
@@ -480,6 +852,127 @@
   return FrameSpace->getStackFrame() == LCtx->getCurrentStackFrame();
 }
 
+/// Show diagnostics for initializing or declaring a region \p R with a bad value.
+void showBRDiagnostics(const char *action,
+    llvm::raw_svector_ostream& os,
+    const MemRegion *R,
+    SVal V,
+    const DeclStmt *DS) {
+  if (R->canPrintPretty()) {
+    R->printPretty(os);
+    os << " ";
+  }
+
+  if (V.getAs<loc::ConcreteInt>()) {
+    bool b = false;
+    if (R->isBoundable()) {
+      if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
+        if (TR->getValueType()->isObjCObjectPointerType()) {
+          os << action << "nil";
+          b = true;
+        }
+      }
+    }
+    if (!b)
+      os << action << "a null pointer value";
+
+  } else if (auto CVal = V.getAs<nonloc::ConcreteInt>()) {
+    os << action << CVal->getValue();
+  } else if (DS) {
+    if (V.isUndef()) {
+      if (isa<VarRegion>(R)) {
+        const VarDecl *VD = cast<VarDecl>(DS->getSingleDecl());
+        if (VD->getInit()) {
+          os << (R->canPrintPretty() ? "initialized" : "Initializing")
+            << " to a garbage value";
+        } else {
+          os << (R->canPrintPretty() ? "declared" : "Declaring")
+            << " without an initial value";
+        }
+      }
+    } else {
+      os << (R->canPrintPretty() ? "initialized" : "Initialized")
+        << " here";
+    }
+  }
+}
+
+/// Display diagnostics for passing bad region as a parameter.
+static void showBRParamDiagnostics(llvm::raw_svector_ostream& os,
+    const VarRegion *VR,
+    SVal V) {
+  const auto *Param = cast<ParmVarDecl>(VR->getDecl());
+
+  os << "Passing ";
+
+  if (V.getAs<loc::ConcreteInt>()) {
+    if (Param->getType()->isObjCObjectPointerType())
+      os << "nil object reference";
+    else
+      os << "null pointer value";
+  } else if (V.isUndef()) {
+    os << "uninitialized value";
+  } else if (auto CI = V.getAs<nonloc::ConcreteInt>()) {
+    os << "the value " << CI->getValue();
+  } else {
+    os << "value";
+  }
+
+  // Printed parameter indexes are 1-based, not 0-based.
+  unsigned Idx = Param->getFunctionScopeIndex() + 1;
+  os << " via " << Idx << llvm::getOrdinalSuffix(Idx) << " parameter";
+  if (VR->canPrintPretty()) {
+    os << " ";
+    VR->printPretty(os);
+  }
+}
+
+/// Show default diagnostics for storing bad region.
+static void showBRDefaultDiagnostics(llvm::raw_svector_ostream& os,
+    const MemRegion *R,
+    SVal V) {
+  if (V.getAs<loc::ConcreteInt>()) {
+    bool b = false;
+    if (R->isBoundable()) {
+      if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
+        if (TR->getValueType()->isObjCObjectPointerType()) {
+          os << "nil object reference stored";
+          b = true;
+        }
+      }
+    }
+    if (!b) {
+      if (R->canPrintPretty())
+        os << "Null pointer value stored";
+      else
+        os << "Storing null pointer value";
+    }
+
+  } else if (V.isUndef()) {
+    if (R->canPrintPretty())
+      os << "Uninitialized value stored";
+    else
+      os << "Storing uninitialized value";
+
+  } else if (auto CV = V.getAs<nonloc::ConcreteInt>()) {
+    if (R->canPrintPretty())
+      os << "The value " << CV->getValue() << " is assigned";
+    else
+      os << "Assigning " << CV->getValue();
+
+  } else {
+    if (R->canPrintPretty())
+      os << "Value assigned";
+    else
+      os << "Assigning value";
+  }
+
+  if (R->canPrintPretty()) {
+    os << " to ";
+    R->printPretty(os);
+  }
+}
+
 std::shared_ptr<PathDiagnosticPiece>
 FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ,
                                   const ExplodedNode *Pred,
@@ -596,7 +1089,7 @@
       if (VR) {
         // See if we can get the BlockVarRegion.
         ProgramStateRef State = StoreSite->getState();
-        SVal V = State->getSVal(S, PS->getLocationContext());
+        SVal V = StoreSite->getSVal(S);
         if (const BlockDataRegion *BDR =
               dyn_cast_or_null<BlockDataRegion>(V.getAsRegion())) {
           if (const VarRegion *OriginalR = BDR->getOriginalRegion(VR)) {
@@ -608,122 +1101,16 @@
         }
       }
     }
+    if (action)
+      showBRDiagnostics(action, os, R, V, DS);
 
-    if (action) {
-      if (R->canPrintPretty()) {
-        R->printPretty(os);
-        os << " ";
-      }
-
-      if (V.getAs<loc::ConcreteInt>()) {
-        bool b = false;
-        if (R->isBoundable()) {
-          if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
-            if (TR->getValueType()->isObjCObjectPointerType()) {
-              os << action << "nil";
-              b = true;
-            }
-          }
-        }
-
-        if (!b)
-          os << action << "a null pointer value";
-      } else if (Optional<nonloc::ConcreteInt> CVal =
-                     V.getAs<nonloc::ConcreteInt>()) {
-        os << action << CVal->getValue();
-      }
-      else if (DS) {
-        if (V.isUndef()) {
-          if (isa<VarRegion>(R)) {
-            const VarDecl *VD = cast<VarDecl>(DS->getSingleDecl());
-            if (VD->getInit()) {
-              os << (R->canPrintPretty() ? "initialized" : "Initializing")
-                 << " to a garbage value";
-            } else {
-              os << (R->canPrintPretty() ? "declared" : "Declaring")
-                 << " without an initial value";
-            }
-          }
-        }
-        else {
-          os << (R->canPrintPretty() ? "initialized" : "Initialized")
-             << " here";
-        }
-      }
-    }
   } else if (StoreSite->getLocation().getAs<CallEnter>()) {
-    if (const VarRegion *VR = dyn_cast<VarRegion>(R)) {
-      const ParmVarDecl *Param = cast<ParmVarDecl>(VR->getDecl());
-
-      os << "Passing ";
-
-      if (V.getAs<loc::ConcreteInt>()) {
-        if (Param->getType()->isObjCObjectPointerType())
-          os << "nil object reference";
-        else
-          os << "null pointer value";
-      } else if (V.isUndef()) {
-        os << "uninitialized value";
-      } else if (Optional<nonloc::ConcreteInt> CI =
-                     V.getAs<nonloc::ConcreteInt>()) {
-        os << "the value " << CI->getValue();
-      } else {
-        os << "value";
-      }
-
-      // Printed parameter indexes are 1-based, not 0-based.
-      unsigned Idx = Param->getFunctionScopeIndex() + 1;
-      os << " via " << Idx << llvm::getOrdinalSuffix(Idx) << " parameter";
-      if (R->canPrintPretty()) {
-        os << " ";
-        R->printPretty(os);
-      }
-    }
+    if (const VarRegion *VR = dyn_cast<VarRegion>(R))
+      showBRParamDiagnostics(os, VR, V);
   }
 
-  if (os.str().empty()) {
-    if (V.getAs<loc::ConcreteInt>()) {
-      bool b = false;
-      if (R->isBoundable()) {
-        if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
-          if (TR->getValueType()->isObjCObjectPointerType()) {
-            os << "nil object reference stored";
-            b = true;
-          }
-        }
-      }
-      if (!b) {
-        if (R->canPrintPretty())
-          os << "Null pointer value stored";
-        else
-          os << "Storing null pointer value";
-      }
-
-    } else if (V.isUndef()) {
-      if (R->canPrintPretty())
-        os << "Uninitialized value stored";
-      else
-        os << "Storing uninitialized value";
-
-    } else if (Optional<nonloc::ConcreteInt> CV =
-                   V.getAs<nonloc::ConcreteInt>()) {
-      if (R->canPrintPretty())
-        os << "The value " << CV->getValue() << " is assigned";
-      else
-        os << "Assigning " << CV->getValue();
-
-    } else {
-      if (R->canPrintPretty())
-        os << "Value assigned";
-      else
-        os << "Assigning value";
-    }
-
-    if (R->canPrintPretty()) {
-      os << " to ";
-      R->printPretty(os);
-    }
-  }
+  if (os.str().empty())
+    showBRDefaultDiagnostics(os, R, V);
 
   // Construct a new PathDiagnosticPiece.
   ProgramPoint P = StoreSite->getLocation();
@@ -878,9 +1265,6 @@
     if (!BugPoint)
       return nullptr;
 
-    SourceLocation BugLoc = BugPoint->getStmt()->getLocStart();
-    if (BugLoc.isMacroID())
-      return nullptr;
 
     ProgramPoint CurPoint = Succ->getLocation();
     const Stmt *CurTerminatorStmt = nullptr;
@@ -902,14 +1286,14 @@
 
     SourceLocation TerminatorLoc = CurTerminatorStmt->getLocStart();
     if (TerminatorLoc.isMacroID()) {
-      const SourceManager &SMgr = BRC.getSourceManager();
-      std::pair<FileID, unsigned> TLInfo = SMgr.getDecomposedLoc(TerminatorLoc);
-      SrcMgr::SLocEntry SE = SMgr.getSLocEntry(TLInfo.first);
-      const SrcMgr::ExpansionInfo &EInfo = SE.getExpansion();
-      if (EInfo.isFunctionMacroExpansion()) {
+      SourceLocation BugLoc = BugPoint->getStmt()->getLocStart();
+
+      // Suppress reports unless we are in that same macro.
+      if (!BugLoc.isMacroID() ||
+          getMacroName(BugLoc, BRC) != getMacroName(TerminatorLoc, BRC)) {
         BR.markInvalid("Suppress Macro IDC", CurLC);
-        return nullptr;
       }
+      return nullptr;
     }
   }
   return nullptr;
@@ -978,6 +1362,65 @@
   return Ex;
 }
 
+/// Walk through nodes until we get one that matches the statement exactly.
+/// Alternately, if we hit a known lvalue for the statement, we know we've
+/// gone too far (though we can likely track the lvalue better anyway).
+static const ExplodedNode* findNodeForStatement(const ExplodedNode *N,
+                                                const Stmt *S,
+                                                const Expr *Inner) {
+  do {
+    const ProgramPoint &pp = N->getLocation();
+    if (auto ps = pp.getAs<StmtPoint>()) {
+      if (ps->getStmt() == S || ps->getStmt() == Inner)
+        break;
+    } else if (auto CEE = pp.getAs<CallExitEnd>()) {
+      if (CEE->getCalleeContext()->getCallSite() == S ||
+          CEE->getCalleeContext()->getCallSite() == Inner)
+        break;
+    }
+    N = N->getFirstPred();
+  } while (N);
+  return N;
+}
+
+/// Find the ExplodedNode where the lvalue (the value of 'Ex')
+/// was computed.
+static const ExplodedNode* findNodeForExpression(const ExplodedNode *N,
+    const Expr *Inner) {
+  while (N) {
+    if (auto P = N->getLocation().getAs<PostStmt>()) {
+      if (P->getStmt() == Inner)
+        break;
+    }
+    N = N->getFirstPred();
+  }
+  assert(N && "Unable to find the lvalue node.");
+  return N;
+
+}
+
+/// Performing operator `&' on an lvalue expression is essentially a no-op.
+/// Then, if we are taking addresses of fields or elements, these are also
+/// unlikely to matter.
+static const Expr* peelOfOuterAddrOf(const Expr* Ex) {
+  Ex = Ex->IgnoreParenCasts();
+
+  // FIXME: There's a hack in our Store implementation that always computes
+  // field offsets around null pointers as if they are always equal to 0.
+  // The idea here is to report accesses to fields as null dereferences
+  // even though the pointer value that's being dereferenced is actually
+  // the offset of the field rather than exactly 0.
+  // See the FIXME in StoreManager's getLValueFieldOrIvar() method.
+  // This code interacts heavily with this hack; otherwise the value
+  // would not be null at all for most fields, so we'd be unable to track it.
+  if (const auto *Op = dyn_cast<UnaryOperator>(Ex))
+    if (Op->getOpcode() == UO_AddrOf && Op->getSubExpr()->isLValue())
+      if (const Expr *DerefEx = bugreporter::getDerefExpr(Op->getSubExpr()))
+        return DerefEx;
+  return Ex;
+
+}
+
 bool bugreporter::trackNullOrUndefValue(const ExplodedNode *N,
                                         const Stmt *S,
                                         BugReport &report, bool IsArg,
@@ -985,56 +1428,23 @@
   if (!S || !N)
     return false;
 
-  if (const Expr *Ex = dyn_cast<Expr>(S)) {
-    Ex = Ex->IgnoreParenCasts();
-    const Expr *PeeledEx = peelOffOuterExpr(Ex, N);
-    if (Ex != PeeledEx)
-      S = PeeledEx;
-  }
+  if (const auto *Ex = dyn_cast<Expr>(S))
+    S = peelOffOuterExpr(Ex, N);
 
   const Expr *Inner = nullptr;
-  if (const Expr *Ex = dyn_cast<Expr>(S)) {
+  if (const auto *Ex = dyn_cast<Expr>(S)) {
+    Ex = peelOfOuterAddrOf(Ex);
     Ex = Ex->IgnoreParenCasts();
 
-    // Performing operator `&' on an lvalue expression is essentially a no-op.
-    // Then, if we are taking addresses of fields or elements, these are also
-    // unlikely to matter.
-    // FIXME: There's a hack in our Store implementation that always computes
-    // field offsets around null pointers as if they are always equal to 0.
-    // The idea here is to report accesses to fields as null dereferences
-    // even though the pointer value that's being dereferenced is actually
-    // the offset of the field rather than exactly 0.
-    // See the FIXME in StoreManager's getLValueFieldOrIvar() method.
-    // This code interacts heavily with this hack; otherwise the value
-    // would not be null at all for most fields, so we'd be unable to track it.
-    if (const auto *Op = dyn_cast<UnaryOperator>(Ex))
-      if (Op->getOpcode() == UO_AddrOf && Op->getSubExpr()->isLValue())
-        if (const Expr *DerefEx = getDerefExpr(Op->getSubExpr()))
-          Ex = DerefEx;
-
-    if (Ex && (ExplodedGraph::isInterestingLValueExpr(Ex) || CallEvent::isCallStmt(Ex)))
+    if (Ex && (ExplodedGraph::isInterestingLValueExpr(Ex)
+          || CallEvent::isCallStmt(Ex)))
       Inner = Ex;
   }
 
   if (IsArg && !Inner) {
     assert(N->getLocation().getAs<CallEnter>() && "Tracking arg but not at call");
   } else {
-    // Walk through nodes until we get one that matches the statement exactly.
-    // Alternately, if we hit a known lvalue for the statement, we know we've
-    // gone too far (though we can likely track the lvalue better anyway).
-    do {
-      const ProgramPoint &pp = N->getLocation();
-      if (Optional<StmtPoint> ps = pp.getAs<StmtPoint>()) {
-        if (ps->getStmt() == S || ps->getStmt() == Inner)
-          break;
-      } else if (Optional<CallExitEnd> CEE = pp.getAs<CallExitEnd>()) {
-        if (CEE->getCalleeContext()->getCallSite() == S ||
-            CEE->getCalleeContext()->getCallSite() == Inner)
-          break;
-      }
-      N = N->getFirstPred();
-    } while (N);
-
+    N = findNodeForStatement(N, S, Inner);
     if (!N)
       return false;
   }
@@ -1045,51 +1455,43 @@
   // At this point in the path, the receiver should be live since we are at the
   // message send expr. If it is nil, start tracking it.
   if (const Expr *Receiver = NilReceiverBRVisitor::getNilReceiver(S, N))
-    trackNullOrUndefValue(N, Receiver, report, false, EnableNullFPSuppression);
-
+    trackNullOrUndefValue(N, Receiver, report, /* IsArg=*/ false,
+        EnableNullFPSuppression);
 
   // See if the expression we're interested refers to a variable.
   // If so, we can track both its contents and constraints on its value.
   if (Inner && ExplodedGraph::isInterestingLValueExpr(Inner)) {
-    const MemRegion *R = nullptr;
-
-    // Find the ExplodedNode where the lvalue (the value of 'Ex')
-    // was computed.  We need this for getting the location value.
-    const ExplodedNode *LVNode = N;
-    while (LVNode) {
-      if (Optional<PostStmt> P = LVNode->getLocation().getAs<PostStmt>()) {
-        if (P->getStmt() == Inner)
-          break;
-      }
-      LVNode = LVNode->getFirstPred();
-    }
-    assert(LVNode && "Unable to find the lvalue node.");
+    const ExplodedNode *LVNode = findNodeForExpression(N, Inner);
     ProgramStateRef LVState = LVNode->getState();
-    SVal LVal = LVState->getSVal(Inner, LVNode->getLocationContext());
+    SVal LVal = LVNode->getSVal(Inner);
 
-    if (LVState->isNull(LVal).isConstrainedTrue()) {
-      // In case of C++ references, we want to differentiate between a null
-      // reference and reference to null pointer.
-      // If the LVal is null, check if we are dealing with null reference.
-      // For those, we want to track the location of the reference.
-      if (const MemRegion *RR = getLocationRegionIfReference(Inner, N))
-        R = RR;
-    } else {
-      R = LVState->getSVal(Inner, LVNode->getLocationContext()).getAsRegion();
+    const MemRegion *RR = getLocationRegionIfReference(Inner, N);
+    bool LVIsNull = LVState->isNull(LVal).isConstrainedTrue();
 
-      // If this is a C++ reference to a null pointer, we are tracking the
-      // pointer. In addition, we should find the store at which the reference
-      // got initialized.
-      if (const MemRegion *RR = getLocationRegionIfReference(Inner, N)) {
-        if (Optional<KnownSVal> KV = LVal.getAs<KnownSVal>())
-          report.addVisitor(llvm::make_unique<FindLastStoreBRVisitor>(
+    // If this is a C++ reference to a null pointer, we are tracking the
+    // pointer. In addition, we should find the store at which the reference
+    // got initialized.
+    if (RR && !LVIsNull) {
+      if (auto KV = LVal.getAs<KnownSVal>())
+        report.addVisitor(llvm::make_unique<FindLastStoreBRVisitor>(
               *KV, RR, EnableNullFPSuppression));
-      }
     }
 
+    // In case of C++ references, we want to differentiate between a null
+    // reference and reference to null pointer.
+    // If the LVal is null, check if we are dealing with null reference.
+    // For those, we want to track the location of the reference.
+    const MemRegion *R = (RR && LVIsNull) ? RR :
+        LVNode->getSVal(Inner).getAsRegion();
+
     if (R) {
       // Mark both the variable region and its contents as interesting.
       SVal V = LVState->getRawSVal(loc::MemRegionVal(R));
+      report.addVisitor(
+          llvm::make_unique<NoStoreFuncVisitor>(cast<SubRegion>(R)));
+
+      MacroNullReturnSuppressionVisitor::addMacroVisitorIfNecessary(
+          N, R, EnableNullFPSuppression, report, V);
 
       report.markInteresting(R);
       report.markInteresting(V);
@@ -1098,21 +1500,21 @@
       // If the contents are symbolic, find out when they became null.
       if (V.getAsLocSymbol(/*IncludeBaseRegions*/ true))
         report.addVisitor(llvm::make_unique<TrackConstraintBRVisitor>(
-            V.castAs<DefinedSVal>(), false));
+              V.castAs<DefinedSVal>(), false));
 
       // Add visitor, which will suppress inline defensive checks.
-      if (Optional<DefinedSVal> DV = V.getAs<DefinedSVal>()) {
+      if (auto DV = V.getAs<DefinedSVal>()) {
         if (!DV->isZeroConstant() && LVState->isNull(*DV).isConstrainedTrue() &&
             EnableNullFPSuppression) {
           report.addVisitor(
               llvm::make_unique<SuppressInlineDefensiveChecksVisitor>(*DV,
-                                                                      LVNode));
+                LVNode));
         }
       }
 
-      if (Optional<KnownSVal> KV = V.getAs<KnownSVal>())
+      if (auto KV = V.getAs<KnownSVal>())
         report.addVisitor(llvm::make_unique<FindLastStoreBRVisitor>(
-            *KV, R, EnableNullFPSuppression));
+              *KV, R, EnableNullFPSuppression));
       return true;
     }
   }
@@ -1123,7 +1525,7 @@
 
   // If the value came from an inlined function call, we should at least make
   // sure that function isn't pruned in our output.
-  if (const Expr *E = dyn_cast<Expr>(S))
+  if (const auto *E = dyn_cast<Expr>(S))
     S = E->IgnoreParenCasts();
 
   ReturnVisitor::addVisitorIfNecessary(N, S, report, EnableNullFPSuppression);
@@ -1132,26 +1534,29 @@
   // base value that was dereferenced.
   // assert(!V.isUnknownOrUndef());
   // Is it a symbolic value?
-  if (Optional<loc::MemRegionVal> L = V.getAs<loc::MemRegionVal>()) {
+  if (auto L = V.getAs<loc::MemRegionVal>()) {
+    report.addVisitor(llvm::make_unique<UndefOrNullArgVisitor>(L->getRegion()));
+
     // At this point we are dealing with the region's LValue.
     // However, if the rvalue is a symbolic region, we should track it as well.
     // Try to use the correct type when looking up the value.
     SVal RVal;
-    if (const Expr *E = dyn_cast<Expr>(S))
+    if (const auto *E = dyn_cast<Expr>(S))
       RVal = state->getRawSVal(L.getValue(), E->getType());
     else
       RVal = state->getSVal(L->getRegion());
 
-    const MemRegion *RegionRVal = RVal.getAsRegion();
-    report.addVisitor(llvm::make_unique<UndefOrNullArgVisitor>(L->getRegion()));
+    if (auto KV = RVal.getAs<KnownSVal>())
+      report.addVisitor(llvm::make_unique<FindLastStoreBRVisitor>(
+            *KV, L->getRegion(), EnableNullFPSuppression));
 
+    const MemRegion *RegionRVal = RVal.getAsRegion();
     if (RegionRVal && isa<SymbolicRegion>(RegionRVal)) {
       report.markInteresting(RegionRVal);
       report.addVisitor(llvm::make_unique<TrackConstraintBRVisitor>(
-          loc::MemRegionVal(RegionRVal), false));
+            loc::MemRegionVal(RegionRVal), false));
     }
   }
-
   return true;
 }
 
@@ -1162,7 +1567,7 @@
     return nullptr;
   if (const Expr *Receiver = ME->getInstanceReceiver()) {
     ProgramStateRef state = N->getState();
-    SVal V = state->getSVal(Receiver, N->getLocationContext());
+    SVal V = N->getSVal(Receiver);
     if (state->isNull(V).isConstrainedTrue())
       return Receiver;
   }
@@ -1218,8 +1623,7 @@
     const Stmt *Head = WorkList.front();
     WorkList.pop_front();
 
-    ProgramStateRef state = N->getState();
-    ProgramStateManager &StateMgr = state->getStateManager();
+    ProgramStateManager &StateMgr = N->getState()->getStateManager();
 
     if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Head)) {
       if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
@@ -1227,7 +1631,7 @@
         StateMgr.getRegionManager().getVarRegion(VD, N->getLocationContext());
 
         // What did we load?
-        SVal V = state->getSVal(S, N->getLocationContext());
+        SVal V = N->getSVal(S);
 
         if (V.getAs<loc::ConcreteInt>() || V.getAs<nonloc::ConcreteInt>()) {
           // Register a new visitor with the BugReport.
@@ -1541,7 +1945,7 @@
   // For non-assignment operations, we require that we can understand
   // both the LHS and RHS.
   if (LhsString.empty() || RhsString.empty() ||
-      !BinaryOperator::isComparisonOp(Op))
+      !BinaryOperator::isComparisonOp(Op) || Op == BO_Cmp)
     return nullptr;
 
   // Should we invert the strings if the LHS is not a variable name?
@@ -1797,7 +2201,7 @@
     const MemRegion *ArgReg = Call->getArgSVal(Idx).getAsRegion();
 
     // Are we tracking the argument or its subregion?
-    if ( !ArgReg || (ArgReg != R && !R->isSubRegionOf(ArgReg->StripCasts())))
+    if ( !ArgReg || !R->isSubRegionOf(ArgReg->StripCasts()))
       continue;
 
     // Check the function parameter type.
diff --git a/lib/StaticAnalyzer/Core/CMakeLists.txt b/lib/StaticAnalyzer/Core/CMakeLists.txt
index 5ac4f94..436a8a2 100644
--- a/lib/StaticAnalyzer/Core/CMakeLists.txt
+++ b/lib/StaticAnalyzer/Core/CMakeLists.txt
@@ -58,6 +58,7 @@
   clangASTMatchers
   clangAnalysis
   clangBasic
+  clangCrossTU
   clangLex
   clangRewrite
   ${Z3_LINK_FILES}
diff --git a/lib/StaticAnalyzer/Core/CallEvent.cpp b/lib/StaticAnalyzer/Core/CallEvent.cpp
index f0a8176..3ff02f1 100644
--- a/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -1,4 +1,4 @@
-//===- Calls.cpp - Wrapper for all function and method calls ------*- C++ -*--//
+//===- CallEvent.cpp - Wrapper for all function and method calls ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,14 +14,52 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
 #include "clang/AST/ParentMap.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
 #include "clang/Analysis/ProgramPoint.h"
+#include "clang/CrossTU/CrossTranslationUnit.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
 
 #define DEBUG_TYPE "static-analyzer-call-event"
 
@@ -78,7 +116,7 @@
 }
 
 static bool isVoidPointerToNonConst(QualType T) {
-  if (const PointerType *PT = T->getAs<PointerType>()) {
+  if (const auto *PT = T->getAs<PointerType>()) {
     QualType PointeeTy = PT->getPointeeType();
     if (PointeeTy.isConstQualified())
       return false;
@@ -119,7 +157,7 @@
 }
 
 bool CallEvent::isGlobalCFunction(StringRef FunctionName) const {
-  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(getDecl());
+  const auto *FD = dyn_cast_or_null<FunctionDecl>(getDecl());
   if (!FD)
     return false;
 
@@ -211,7 +249,9 @@
 }
 
 bool CallEvent::isCalled(const CallDescription &CD) const {
-  assert(getKind() != CE_ObjCMessage && "Obj-C methods are not supported");
+  // FIXME: Add ObjC Message support.
+  if (getKind() == CE_ObjCMessage)
+    return false;
   if (!CD.IsLookupDone) {
     CD.IsLookupDone = true;
     CD.II = &getState()->getStateManager().getContext().Idents.get(CD.FuncName);
@@ -233,7 +273,7 @@
 SourceRange CallEvent::getArgSourceRange(unsigned Index) const {
   const Expr *ArgE = getArgExpr(Index);
   if (!ArgE)
-    return SourceRange();
+    return {};
   return ArgE->getSourceRange();
 }
 
@@ -264,7 +304,6 @@
   Out << "Unknown call (type " << getKind() << ")";
 }
 
-
 bool CallEvent::isCallStmt(const Stmt *S) {
   return isa<CallExpr>(S) || isa<ObjCMessageExpr>(S)
                           || isa<CXXConstructExpr>(S)
@@ -273,11 +312,11 @@
 
 QualType CallEvent::getDeclaredResultType(const Decl *D) {
   assert(D);
-  if (const FunctionDecl* FD = dyn_cast<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     return FD->getReturnType();
-  if (const ObjCMethodDecl* MD = dyn_cast<ObjCMethodDecl>(D))
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
     return MD->getReturnType();
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
+  if (const auto *BD = dyn_cast<BlockDecl>(D)) {
     // Blocks are difficult because the return type may not be stored in the
     // BlockDecl itself. The AST should probably be enhanced, but for now we
     // just do what we can.
@@ -294,7 +333,7 @@
         return Ty;
     }
 
-    return QualType();
+    return {};
   }
 
   llvm_unreachable("unknown callable kind");
@@ -303,11 +342,11 @@
 bool CallEvent::isVariadic(const Decl *D) {
   assert(D);
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     return FD->isVariadic();
-  if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
     return MD->isVariadic();
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(D))
+  if (const auto *BD = dyn_cast<BlockDecl>(D))
     return BD->isVariadic();
 
   llvm_unreachable("unknown callable kind");
@@ -367,13 +406,33 @@
     }
   }
 
-  return RuntimeDefinition();
+  SubEngine *Engine = getState()->getStateManager().getOwningEngine();
+  AnalyzerOptions &Opts = Engine->getAnalysisManager().options;
+
+  // Try to get CTU definition only if CTUDir is provided.
+  if (!Opts.naiveCTUEnabled())
+    return RuntimeDefinition();
+
+  cross_tu::CrossTranslationUnitContext &CTUCtx =
+      *Engine->getCrossTranslationUnitContext();
+  llvm::Expected<const FunctionDecl *> CTUDeclOrError =
+      CTUCtx.getCrossTUDefinition(FD, Opts.getCTUDir(), Opts.getCTUIndexName());
+
+  if (!CTUDeclOrError) {
+    handleAllErrors(CTUDeclOrError.takeError(),
+                    [&](const cross_tu::IndexError &IE) {
+                      CTUCtx.emitCrossTUDiagnostics(IE);
+                    });
+    return {};
+  }
+
+  return RuntimeDefinition(*CTUDeclOrError);
 }
 
 void AnyFunctionCall::getInitialStackFrameContents(
                                         const StackFrameContext *CalleeCtx,
                                         BindingsTy &Bindings) const {
-  const FunctionDecl *D = cast<FunctionDecl>(CalleeCtx->getDecl());
+  const auto *D = cast<FunctionDecl>(CalleeCtx->getDecl());
   SValBuilder &SVB = getState()->getStateManager().getSValBuilder();
   addParameterValuesToBindings(CalleeCtx, Bindings, SVB, *this,
                                D->parameters());
@@ -440,7 +499,6 @@
   return false;
 }
 
-
 const FunctionDecl *SimpleFunctionCall::getDecl() const {
   const FunctionDecl *D = getOriginExpr()->getDirectCallee();
   if (D)
@@ -449,9 +507,8 @@
   return getSVal(getOriginExpr()->getCallee()).getAsFunctionDecl();
 }
 
-
 const FunctionDecl *CXXInstanceCall::getDecl() const {
-  const CallExpr *CE = cast_or_null<CallExpr>(getOriginExpr());
+  const auto *CE = cast_or_null<CallExpr>(getOriginExpr());
   if (!CE)
     return AnyFunctionCall::getDecl();
 
@@ -468,7 +525,7 @@
   Values.push_back(ThisVal);
 
   // Don't invalidate if the method is const and there are no mutable fields.
-  if (const CXXMethodDecl *D = cast_or_null<CXXMethodDecl>(getDecl())) {
+  if (const auto *D = cast_or_null<CXXMethodDecl>(getDecl())) {
     if (!D->isConst())
       return;
     // Get the record decl for the class of 'This'. D->getParent() may return a
@@ -499,27 +556,26 @@
   return ThisVal;
 }
 
-
 RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const {
   // Do we have a decl at all?
   const Decl *D = getDecl();
   if (!D)
-    return RuntimeDefinition();
+    return {};
 
   // If the method is non-virtual, we know we can inline it.
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(D);
+  const auto *MD = cast<CXXMethodDecl>(D);
   if (!MD->isVirtual())
     return AnyFunctionCall::getRuntimeDefinition();
 
   // Do we know the implicit 'this' object being called?
   const MemRegion *R = getCXXThisVal().getAsRegion();
   if (!R)
-    return RuntimeDefinition();
+    return {};
 
   // Do we know anything about the type of 'this'?
   DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R);
   if (!DynType.isValid())
-    return RuntimeDefinition();
+    return {};
 
   // Is the type a C++ class? (This is mostly a defensive check.)
   QualType RegionType = DynType.getType()->getPointeeType();
@@ -527,7 +583,7 @@
 
   const CXXRecordDecl *RD = RegionType->getAsCXXRecordDecl();
   if (!RD || !RD->hasDefinition())
-    return RuntimeDefinition();
+    return {};
 
   // Find the decl for this method in that class.
   const CXXMethodDecl *Result = MD->getCorrespondingMethodInClass(RD, true);
@@ -545,13 +601,13 @@
     // this is fixed. <rdar://problem/12287087>
     //assert(!MD->getParent()->isDerivedFrom(RD) && "Bad DynamicTypeInfo");
 
-    return RuntimeDefinition();
+    return {};
   }
 
   // Does the decl that we found have an implementation?
   const FunctionDecl *Definition;
   if (!Result->hasBody(Definition))
-    return RuntimeDefinition();
+    return {};
 
   // We found a definition. If we're not sure that this devirtualization is
   // actually what will happen at runtime, make sure to provide the region so
@@ -572,7 +628,7 @@
     ProgramStateManager &StateMgr = getState()->getStateManager();
     SValBuilder &SVB = StateMgr.getSValBuilder();
 
-    const CXXMethodDecl *MD = cast<CXXMethodDecl>(CalleeCtx->getDecl());
+    const auto *MD = cast<CXXMethodDecl>(CalleeCtx->getDecl());
     Loc ThisLoc = SVB.getCXXThis(MD, CalleeCtx);
 
     // If we devirtualized to a different member function, we need to make sure
@@ -585,7 +641,15 @@
       // FIXME: CallEvent maybe shouldn't be directly accessing StoreManager.
       bool Failed;
       ThisVal = StateMgr.getStoreManager().attemptDownCast(ThisVal, Ty, Failed);
-      assert(!Failed && "Calling an incorrectly devirtualized method");
+      if (Failed) {
+        // We might have suffered some sort of placement new earlier, so
+        // we're constructing in a completely unexpected storage.
+        // Fall back to a generic pointer cast for this-value.
+        const CXXMethodDecl *StaticMD = cast<CXXMethodDecl>(getDecl());
+        const CXXRecordDecl *StaticClass = StaticMD->getParent();
+        QualType StaticTy = Ctx.getPointerType(Ctx.getRecordType(StaticClass));
+        ThisVal = SVB.evalCast(ThisVal, Ty, StaticTy);
+      }
     }
 
     if (!ThisVal.isUnknown())
@@ -593,8 +657,6 @@
   }
 }
 
-
-
 const Expr *CXXMemberCall::getCXXThisExpr() const {
   return getOriginExpr()->getImplicitObjectArgument();
 }
@@ -604,19 +666,17 @@
   // id-expression in the class member access expression is a qualified-id,
   // that function is called. Otherwise, its final overrider in the dynamic type
   // of the object expression is called.
-  if (const MemberExpr *ME = dyn_cast<MemberExpr>(getOriginExpr()->getCallee()))
+  if (const auto *ME = dyn_cast<MemberExpr>(getOriginExpr()->getCallee()))
     if (ME->hasQualifier())
       return AnyFunctionCall::getRuntimeDefinition();
 
   return CXXInstanceCall::getRuntimeDefinition();
 }
 
-
 const Expr *CXXMemberOperatorCall::getCXXThisExpr() const {
   return getOriginExpr()->getArg(0);
 }
 
-
 const BlockDataRegion *BlockCall::getBlockRegion() const {
   const Expr *Callee = getOriginExpr()->getCallee();
   const MemRegion *DataReg = getSVal(Callee).getAsRegion();
@@ -661,7 +721,6 @@
                                Params);
 }
 
-
 SVal CXXConstructorCall::getCXXThisVal() const {
   if (Data)
     return loc::MemRegionVal(static_cast<const MemRegion *>(Data));
@@ -670,8 +729,13 @@
 
 void CXXConstructorCall::getExtraInvalidatedValues(ValueList &Values,
                            RegionAndSymbolInvalidationTraits *ETraits) const {
-  if (Data)
-    Values.push_back(loc::MemRegionVal(static_cast<const MemRegion *>(Data)));
+  if (Data) {
+    loc::MemRegionVal MV(static_cast<const MemRegion *>(Data));
+    if (SymbolRef Sym = MV.getAsSymbol(true))
+      ETraits->setTrait(Sym,
+                        RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
+    Values.push_back(MV);
+  }
 }
 
 void CXXConstructorCall::getInitialStackFrameContents(
@@ -682,7 +746,7 @@
   SVal ThisVal = getCXXThisVal();
   if (!ThisVal.isUnknown()) {
     SValBuilder &SVB = getState()->getStateManager().getSValBuilder();
-    const CXXMethodDecl *MD = cast<CXXMethodDecl>(CalleeCtx->getDecl());
+    const auto *MD = cast<CXXMethodDecl>(CalleeCtx->getDecl());
     Loc ThisLoc = SVB.getCXXThis(MD, CalleeCtx);
     Bindings.push_back(std::make_pair(ThisLoc, ThisVal));
   }
@@ -783,7 +847,7 @@
   llvm_unreachable("unknown message kind");
 }
 
-typedef llvm::PointerIntPair<const PseudoObjectExpr *, 2> ObjCMessageDataTy;
+using ObjCMessageDataTy = llvm::PointerIntPair<const PseudoObjectExpr *, 2>;
 
 const PseudoObjectExpr *ObjCMethodCall::getContainingPseudoObjectExpr() const {
   assert(Data && "Lazy lookup not yet performed.");
@@ -797,7 +861,7 @@
 
   // This handles the funny case of assigning to the result of a getter.
   // This can happen if the getter returns a non-const reference.
-  if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(Syntactic))
+  if (const auto *BO = dyn_cast<BinaryOperator>(Syntactic))
     Syntactic = BO->getLHS();
 
   return Syntactic;
@@ -805,13 +869,12 @@
 
 ObjCMessageKind ObjCMethodCall::getMessageKind() const {
   if (!Data) {
-
     // Find the parent, ignoring implicit casts.
     ParentMap &PM = getLocationContext()->getParentMap();
     const Stmt *S = PM.getParentIgnoreParenCasts(getOriginExpr());
 
     // Check if parent is a PseudoObjectExpr.
-    if (const PseudoObjectExpr *POE = dyn_cast_or_null<PseudoObjectExpr>(S)) {
+    if (const auto *POE = dyn_cast_or_null<PseudoObjectExpr>(S)) {
       const Expr *Syntactic = getSyntacticFromForPseudoObjectExpr(POE);
 
       ObjCMessageKind K;
@@ -963,7 +1026,6 @@
   Selector Sel = E->getSelector();
 
   if (E->isInstanceMessage()) {
-
     // Find the receiver type.
     const ObjCObjectPointerType *ReceiverT = nullptr;
     bool CanBeSubClassed = false;
@@ -978,13 +1040,13 @@
     } else {
       Receiver = getReceiverSVal().getAsRegion();
       if (!Receiver)
-        return RuntimeDefinition();
+        return {};
 
       DynamicTypeInfo DTI = getDynamicTypeInfo(getState(), Receiver);
       if (!DTI.isValid()) {
         assert(isa<AllocaRegion>(Receiver) &&
                "Unhandled untyped region class!");
-        return RuntimeDefinition();
+        return {};
       }
 
       QualType DynType = DTI.getType();
@@ -1039,11 +1101,9 @@
         // need to revisit this someday.  In terms of memory, this table
         // stays around until clang quits, which also may be bad if we
         // need to release memory.
-        typedef std::pair<const ObjCInterfaceDecl*, Selector>
-                PrivateMethodKey;
-        typedef llvm::DenseMap<PrivateMethodKey,
-                               Optional<const ObjCMethodDecl *> >
-                PrivateMethodCache;
+        using PrivateMethodKey = std::pair<const ObjCInterfaceDecl *, Selector>;
+        using PrivateMethodCache =
+            llvm::DenseMap<PrivateMethodKey, Optional<const ObjCMethodDecl *>>;
 
         static PrivateMethodCache PMC;
         Optional<const ObjCMethodDecl *> &Val = PMC[std::make_pair(IDecl, Sel)];
@@ -1088,7 +1148,6 @@
         else
           return RuntimeDefinition(MD, nullptr);
       }
-
   } else {
     // This is a class method.
     // If we have type info for the receiver class, we are calling via
@@ -1099,7 +1158,7 @@
     }
   }
 
-  return RuntimeDefinition();
+  return {};
 }
 
 bool ObjCMethodCall::argumentsMayEscape() const {
@@ -1116,7 +1175,7 @@
 void ObjCMethodCall::getInitialStackFrameContents(
                                              const StackFrameContext *CalleeCtx,
                                              BindingsTy &Bindings) const {
-  const ObjCMethodDecl *D = cast<ObjCMethodDecl>(CalleeCtx->getDecl());
+  const auto *D = cast<ObjCMethodDecl>(CalleeCtx->getDecl());
   SValBuilder &SVB = getState()->getStateManager().getSValBuilder();
   addParameterValuesToBindings(CalleeCtx, Bindings, SVB, *this,
                                D->parameters());
@@ -1133,12 +1192,12 @@
 CallEventRef<>
 CallEventManager::getSimpleCall(const CallExpr *CE, ProgramStateRef State,
                                 const LocationContext *LCtx) {
-  if (const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(CE))
+  if (const auto *MCE = dyn_cast<CXXMemberCallExpr>(CE))
     return create<CXXMemberCall>(MCE, State, LCtx);
 
-  if (const CXXOperatorCallExpr *OpCE = dyn_cast<CXXOperatorCallExpr>(CE)) {
+  if (const auto *OpCE = dyn_cast<CXXOperatorCallExpr>(CE)) {
     const FunctionDecl *DirectCallee = OpCE->getDirectCallee();
-    if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(DirectCallee))
+    if (const auto *MD = dyn_cast<CXXMethodDecl>(DirectCallee))
       if (MD->isInstance())
         return create<CXXMemberOperatorCall>(OpCE, State, LCtx);
 
@@ -1151,7 +1210,6 @@
   return create<SimpleFunctionCall>(CE, State, LCtx);
 }
 
-
 CallEventRef<>
 CallEventManager::getCaller(const StackFrameContext *CalleeCtx,
                             ProgramStateRef State) {
@@ -1169,7 +1227,7 @@
     case Stmt::CXXConstructExprClass:
     case Stmt::CXXTemporaryObjectExprClass: {
       SValBuilder &SVB = State->getStateManager().getSValBuilder();
-      const CXXMethodDecl *Ctor = cast<CXXMethodDecl>(CalleeCtx->getDecl());
+      const auto *Ctor = cast<CXXMethodDecl>(CalleeCtx->getDecl());
       Loc ThisPtr = SVB.getCXXThis(Ctor, CalleeCtx);
       SVal ThisVal = State->getSVal(ThisPtr);
 
@@ -1190,12 +1248,11 @@
   // destructors, though this could change in the future.
   const CFGBlock *B = CalleeCtx->getCallSiteBlock();
   CFGElement E = (*B)[CalleeCtx->getIndex()];
-  assert(E.getAs<CFGImplicitDtor>() &&
+  assert((E.getAs<CFGImplicitDtor>() || E.getAs<CFGTemporaryDtor>()) &&
          "All other CFG elements should have exprs");
-  assert(!E.getAs<CFGTemporaryDtor>() && "We don't handle temporaries yet");
 
   SValBuilder &SVB = State->getStateManager().getSValBuilder();
-  const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CalleeCtx->getDecl());
+  const auto *Dtor = cast<CXXDestructorDecl>(CalleeCtx->getDecl());
   Loc ThisPtr = SVB.getCXXThis(Dtor, CalleeCtx);
   SVal ThisVal = State->getSVal(ThisPtr);
 
@@ -1203,7 +1260,7 @@
   if (Optional<CFGAutomaticObjDtor> AutoDtor = E.getAs<CFGAutomaticObjDtor>())
     Trigger = AutoDtor->getTriggerStmt();
   else if (Optional<CFGDeleteDtor> DeleteDtor = E.getAs<CFGDeleteDtor>())
-    Trigger = cast<Stmt>(DeleteDtor->getDeleteExpr());
+    Trigger = DeleteDtor->getDeleteExpr();
   else
     Trigger = Dtor->getBody();
 
diff --git a/lib/StaticAnalyzer/Core/CheckerContext.cpp b/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 61cbf38..6cf931a 100644
--- a/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -20,9 +20,8 @@
 using namespace ento;
 
 const FunctionDecl *CheckerContext::getCalleeDecl(const CallExpr *CE) const {
-  ProgramStateRef State = getState();
   const Expr *Callee = CE->getCallee();
-  SVal L = State->getSVal(Callee, Pred->getLocationContext());
+  SVal L = Pred->getSVal(Callee);
   return L.getAsFunctionDecl();
 }
 
diff --git a/lib/StaticAnalyzer/Core/CheckerManager.cpp b/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 49f3ede..882e4f9 100644
--- a/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -1,4 +1,4 @@
-//===--- CheckerManager.cpp - Static Analyzer Checker Manager -------------===//
+//===- CheckerManager.cpp - Static Analyzer Checker Manager ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,10 +13,20 @@
 
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/AST/DeclBase.h"
+#include "clang/AST/Stmt.h"
 #include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <vector>
 
 using namespace clang;
 using namespace ento;
@@ -43,9 +53,9 @@
 #ifndef NDEBUG
   // Make sure that for every event that has listeners, there is at least
   // one dispatcher registered for it.
-  for (llvm::DenseMap<EventTag, EventInfo>::iterator
-         I = Events.begin(), E = Events.end(); I != E; ++I)
-    assert(I->second.HasDispatcher && "No dispatcher registered for an event");
+  for (const auto &Event : Events)
+    assert(Event.second.HasDispatcher &&
+           "No dispatcher registered for an event");
 #endif
 }
 
@@ -65,25 +75,22 @@
   } else {
     // Find the checkers that should run for this Decl and cache them.
     checkers = &CachedDeclCheckersMap[DeclKind];
-    for (unsigned i = 0, e = DeclCheckers.size(); i != e; ++i) {
-      DeclCheckerInfo &info = DeclCheckers[i];
+    for (const auto &info : DeclCheckers)
       if (info.IsForDeclFn(D))
         checkers->push_back(info.CheckFn);
-    }
   }
 
   assert(checkers);
-  for (CachedDeclCheckers::iterator
-         I = checkers->begin(), E = checkers->end(); I != E; ++I)
-    (*I)(D, mgr, BR);
+  for (const auto checker : *checkers)
+    checker(D, mgr, BR);
 }
 
 void CheckerManager::runCheckersOnASTBody(const Decl *D, AnalysisManager& mgr,
                                           BugReporter &BR) {
   assert(D && D->hasBody());
 
-  for (unsigned i = 0, e = BodyCheckers.size(); i != e; ++i)
-    BodyCheckers[i](D, mgr, BR);
+  for (const auto BodyChecker : BodyCheckers)
+    BodyChecker(D, mgr, BR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -118,10 +125,8 @@
     }
 
     NodeBuilder B(*PrevSet, *CurrSet, BldrCtx);
-    for (ExplodedNodeSet::iterator NI = PrevSet->begin(), NE = PrevSet->end();
-         NI != NE; ++NI) {
-      checkCtx.runChecker(*I, B, *NI);
-    }
+    for (const auto &NI : *PrevSet)
+      checkCtx.runChecker(*I, B, NI);
 
     // If all the produced transitions are sinks, stop.
     if (CurrSet->empty())
@@ -133,21 +138,23 @@
 }
 
 namespace {
+
   struct CheckStmtContext {
-    typedef SmallVectorImpl<CheckerManager::CheckStmtFunc> CheckersTy;
+    using CheckersTy = SmallVectorImpl<CheckerManager::CheckStmtFunc>;
+
     bool IsPreVisit;
     const CheckersTy &Checkers;
     const Stmt *S;
     ExprEngine &Eng;
     bool WasInlined;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckStmtContext(bool isPreVisit, const CheckersTy &checkers,
                      const Stmt *s, ExprEngine &eng, bool wasInlined = false)
-      : IsPreVisit(isPreVisit), Checkers(checkers), S(s), Eng(eng),
-        WasInlined(wasInlined) {}
+        : IsPreVisit(isPreVisit), Checkers(checkers), S(s), Eng(eng),
+          WasInlined(wasInlined) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckStmtFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -160,7 +167,8 @@
       checkFn(S, C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for visiting Stmts.
 void CheckerManager::runCheckersForStmt(bool isPreVisit,
@@ -175,8 +183,9 @@
 }
 
 namespace {
+
   struct CheckObjCMessageContext {
-    typedef std::vector<CheckerManager::CheckObjCMessageFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckObjCMessageFunc>;
 
     ObjCMessageVisitKind Kind;
     bool WasInlined;
@@ -184,19 +193,18 @@
     const ObjCMethodCall &Msg;
     ExprEngine &Eng;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckObjCMessageContext(ObjCMessageVisitKind visitKind,
                             const CheckersTy &checkers,
                             const ObjCMethodCall &msg, ExprEngine &eng,
                             bool wasInlined)
-      : Kind(visitKind), WasInlined(wasInlined), Checkers(checkers),
-        Msg(msg), Eng(eng) { }
+        : Kind(visitKind), WasInlined(wasInlined), Checkers(checkers), Msg(msg),
+          Eng(eng) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckObjCMessageFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
-
       bool IsPreVisit;
 
       switch (Kind) {
@@ -215,7 +223,8 @@
       checkFn(*Msg.cloneWithState<ObjCMethodCall>(Pred->getState()), C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for visiting obj-c messages.
 void CheckerManager::runCheckersForObjCMessage(ObjCMessageVisitKind visitKind,
@@ -242,24 +251,27 @@
   }
   llvm_unreachable("Unknown Kind");
 }
+
 namespace {
+
   // FIXME: This has all the same signatures as CheckObjCMessageContext.
   // Is there a way we can merge the two?
   struct CheckCallContext {
-    typedef std::vector<CheckerManager::CheckCallFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckCallFunc>;
+
     bool IsPreVisit, WasInlined;
     const CheckersTy &Checkers;
     const CallEvent &Call;
     ExprEngine &Eng;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckCallContext(bool isPreVisit, const CheckersTy &checkers,
                      const CallEvent &call, ExprEngine &eng,
                      bool wasInlined)
-    : IsPreVisit(isPreVisit), WasInlined(wasInlined), Checkers(checkers),
-      Call(call), Eng(eng) { }
+        : IsPreVisit(isPreVisit), WasInlined(wasInlined), Checkers(checkers),
+          Call(call), Eng(eng) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckCallFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -269,7 +281,8 @@
       checkFn(*Call.cloneWithState(Pred->getState()), C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for visiting an abstract call event.
 void CheckerManager::runCheckersForCallEvent(bool isPreVisit,
@@ -286,8 +299,10 @@
 }
 
 namespace {
+
   struct CheckLocationContext {
-    typedef std::vector<CheckerManager::CheckLocationFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckLocationFunc>;
+
     const CheckersTy &Checkers;
     SVal Loc;
     bool IsLoad;
@@ -295,15 +310,15 @@
     const Stmt *BoundEx;
     ExprEngine &Eng;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckLocationContext(const CheckersTy &checkers,
                          SVal loc, bool isLoad, const Stmt *NodeEx,
                          const Stmt *BoundEx,
                          ExprEngine &eng)
-      : Checkers(checkers), Loc(loc), IsLoad(isLoad), NodeEx(NodeEx),
-        BoundEx(BoundEx), Eng(eng) {}
+        : Checkers(checkers), Loc(loc), IsLoad(isLoad), NodeEx(NodeEx),
+          BoundEx(BoundEx), Eng(eng) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckLocationFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -317,7 +332,8 @@
       checkFn(Loc, IsLoad, BoundEx, C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for load/store of a location.
 
@@ -333,8 +349,10 @@
 }
 
 namespace {
+
   struct CheckBindContext {
-    typedef std::vector<CheckerManager::CheckBindFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckBindFunc>;
+
     const CheckersTy &Checkers;
     SVal Loc;
     SVal Val;
@@ -342,13 +360,13 @@
     ExprEngine &Eng;
     const ProgramPoint &PP;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckBindContext(const CheckersTy &checkers,
                      SVal loc, SVal val, const Stmt *s, ExprEngine &eng,
                      const ProgramPoint &pp)
-      : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp) {}
+        : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckBindFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -358,7 +376,8 @@
       checkFn(Loc, Val, S, C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for binding of a value to a location.
 void CheckerManager::runCheckersForBind(ExplodedNodeSet &Dst,
@@ -373,24 +392,26 @@
 void CheckerManager::runCheckersForEndAnalysis(ExplodedGraph &G,
                                                BugReporter &BR,
                                                ExprEngine &Eng) {
-  for (unsigned i = 0, e = EndAnalysisCheckers.size(); i != e; ++i)
-    EndAnalysisCheckers[i](G, BR, Eng);
+  for (const auto EndAnalysisChecker : EndAnalysisCheckers)
+    EndAnalysisChecker(G, BR, Eng);
 }
 
 namespace {
+
 struct CheckBeginFunctionContext {
-  typedef std::vector<CheckerManager::CheckBeginFunctionFunc> CheckersTy;
+  using CheckersTy = std::vector<CheckerManager::CheckBeginFunctionFunc>;
+
   const CheckersTy &Checkers;
   ExprEngine &Eng;
   const ProgramPoint &PP;
 
-  CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-  CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
   CheckBeginFunctionContext(const CheckersTy &Checkers, ExprEngine &Eng,
                             const ProgramPoint &PP)
       : Checkers(Checkers), Eng(Eng), PP(PP) {}
 
+  CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+  CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
+
   void runChecker(CheckerManager::CheckBeginFunctionFunc checkFn,
                   NodeBuilder &Bldr, ExplodedNode *Pred) {
     const ProgramPoint &L = PP.withTag(checkFn.Checker);
@@ -399,7 +420,8 @@
     checkFn(C);
   }
 };
-}
+
+} // namespace
 
 void CheckerManager::runCheckersForBeginFunction(ExplodedNodeSet &Dst,
                                                  const BlockEdge &L,
@@ -418,14 +440,11 @@
                                                ExplodedNodeSet &Dst,
                                                ExplodedNode *Pred,
                                                ExprEngine &Eng) {
-
   // We define the builder outside of the loop bacause if at least one checkers
   // creates a sucsessor for Pred, we do not need to generate an
   // autotransition for it.
   NodeBuilder Bldr(Pred, Dst, BC);
-  for (unsigned i = 0, e = EndFunctionCheckers.size(); i != e; ++i) {
-    CheckEndFunctionFunc checkFn = EndFunctionCheckers[i];
-
+  for (const auto checkFn : EndFunctionCheckers) {
     const ProgramPoint &L = BlockEntrance(BC.Block,
                                           Pred->getLocationContext(),
                                           checkFn.Checker);
@@ -435,18 +454,20 @@
 }
 
 namespace {
+
   struct CheckBranchConditionContext {
-    typedef std::vector<CheckerManager::CheckBranchConditionFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckBranchConditionFunc>;
+
     const CheckersTy &Checkers;
     const Stmt *Condition;
     ExprEngine &Eng;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckBranchConditionContext(const CheckersTy &checkers,
                                 const Stmt *Cond, ExprEngine &eng)
-      : Checkers(checkers), Condition(Cond), Eng(eng) {}
+        : Checkers(checkers), Condition(Cond), Eng(eng) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckBranchConditionFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -456,7 +477,8 @@
       checkFn(Condition, C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for branch condition.
 void CheckerManager::runCheckersForBranchCondition(const Stmt *Condition,
@@ -469,29 +491,69 @@
   expandGraphWithCheckers(C, Dst, Src);
 }
 
+namespace {
+
+  struct CheckNewAllocatorContext {
+    using CheckersTy = std::vector<CheckerManager::CheckNewAllocatorFunc>;
+
+    const CheckersTy &Checkers;
+    const CXXNewExpr *NE;
+    SVal Target;
+    bool WasInlined;
+    ExprEngine &Eng;
+
+    CheckNewAllocatorContext(const CheckersTy &Checkers, const CXXNewExpr *NE,
+                             SVal Target, bool WasInlined, ExprEngine &Eng)
+        : Checkers(Checkers), NE(NE), Target(Target), WasInlined(WasInlined),
+          Eng(Eng) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
+
+    void runChecker(CheckerManager::CheckNewAllocatorFunc checkFn,
+                    NodeBuilder &Bldr, ExplodedNode *Pred) {
+      ProgramPoint L = PostAllocatorCall(NE, Pred->getLocationContext());
+      CheckerContext C(Bldr, Eng, Pred, L, WasInlined);
+      checkFn(NE, Target, C);
+    }
+  };
+
+} // namespace
+
+void CheckerManager::runCheckersForNewAllocator(
+    const CXXNewExpr *NE, SVal Target, ExplodedNodeSet &Dst, ExplodedNode *Pred,
+    ExprEngine &Eng, bool WasInlined) {
+  ExplodedNodeSet Src;
+  Src.insert(Pred);
+  CheckNewAllocatorContext C(NewAllocatorCheckers, NE, Target, WasInlined, Eng);
+  expandGraphWithCheckers(C, Dst, Src);
+}
+
 /// \brief Run checkers for live symbols.
 void CheckerManager::runCheckersForLiveSymbols(ProgramStateRef state,
                                                SymbolReaper &SymReaper) {
-  for (unsigned i = 0, e = LiveSymbolsCheckers.size(); i != e; ++i)
-    LiveSymbolsCheckers[i](state, SymReaper);
+  for (const auto LiveSymbolsChecker : LiveSymbolsCheckers)
+    LiveSymbolsChecker(state, SymReaper);
 }
 
 namespace {
+
   struct CheckDeadSymbolsContext {
-    typedef std::vector<CheckerManager::CheckDeadSymbolsFunc> CheckersTy;
+    using CheckersTy = std::vector<CheckerManager::CheckDeadSymbolsFunc>;
+
     const CheckersTy &Checkers;
     SymbolReaper &SR;
     const Stmt *S;
     ExprEngine &Eng;
     ProgramPoint::Kind ProgarmPointKind;
 
-    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
-    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
-
     CheckDeadSymbolsContext(const CheckersTy &checkers, SymbolReaper &sr,
                             const Stmt *s, ExprEngine &eng,
                             ProgramPoint::Kind K)
-      : Checkers(checkers), SR(sr), S(s), Eng(eng), ProgarmPointKind(K) { }
+        : Checkers(checkers), SR(sr), S(s), Eng(eng), ProgarmPointKind(K) {}
+
+    CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+    CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
     void runChecker(CheckerManager::CheckDeadSymbolsFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
@@ -505,7 +567,8 @@
       checkFn(SR, C);
     }
   };
-}
+
+} // namespace
 
 /// \brief Run checkers for dead symbols.
 void CheckerManager::runCheckersForDeadSymbols(ExplodedNodeSet &Dst,
@@ -526,14 +589,13 @@
                                             ArrayRef<const MemRegion *> Regions,
                                             const LocationContext *LCtx,
                                             const CallEvent *Call) {
-  for (unsigned i = 0, e = RegionChangesCheckers.size(); i != e; ++i) {
+  for (const auto RegionChangesChecker : RegionChangesCheckers) {
     // If any checker declares the state infeasible (or if it starts that way),
     // bail out.
     if (!state)
       return nullptr;
-    state = RegionChangesCheckers[i](state, invalidated,
-                                     ExplicitRegions, Regions,
-                                     LCtx, Call);
+    state = RegionChangesChecker(state, invalidated, ExplicitRegions, Regions,
+                                 LCtx, Call);
   }
   return state;
 }
@@ -549,13 +611,13 @@
           (Kind != PSK_DirectEscapeOnCall &&
            Kind != PSK_IndirectEscapeOnCall)) &&
          "Call must not be NULL when escaping on call");
-    for (unsigned i = 0, e = PointerEscapeCheckers.size(); i != e; ++i) {
-      // If any checker declares the state infeasible (or if it starts that
-      //  way), bail out.
-      if (!State)
-        return nullptr;
-      State = PointerEscapeCheckers[i](State, Escaped, Call, Kind, ETraits);
-    }
+  for (const auto PointerEscapeChecker : PointerEscapeCheckers) {
+    // If any checker declares the state infeasible (or if it starts that
+    //  way), bail out.
+    if (!State)
+      return nullptr;
+    State = PointerEscapeChecker(State, Escaped, Call, Kind, ETraits);
+  }
   return State;
 }
 
@@ -563,12 +625,12 @@
 ProgramStateRef
 CheckerManager::runCheckersForEvalAssume(ProgramStateRef state,
                                          SVal Cond, bool Assumption) {
-  for (unsigned i = 0, e = EvalAssumeCheckers.size(); i != e; ++i) {
+  for (const auto EvalAssumeChecker : EvalAssumeCheckers) {
     // If any checker declares the state infeasible (or if it starts that way),
     // bail out.
     if (!state)
       return nullptr;
-    state = EvalAssumeCheckers[i](state, Cond, Assumption);
+    state = EvalAssumeChecker(state, Cond, Assumption);
   }
   return state;
 }
@@ -580,27 +642,24 @@
                                             const CallEvent &Call,
                                             ExprEngine &Eng) {
   const CallExpr *CE = cast<CallExpr>(Call.getOriginExpr());
-  for (ExplodedNodeSet::iterator
-         NI = Src.begin(), NE = Src.end(); NI != NE; ++NI) {
-    ExplodedNode *Pred = *NI;
+  for (const auto Pred : Src) {
     bool anyEvaluated = false;
 
     ExplodedNodeSet checkDst;
     NodeBuilder B(Pred, checkDst, Eng.getBuilderContext());
 
     // Check if any of the EvalCall callbacks can evaluate the call.
-    for (std::vector<EvalCallFunc>::iterator
-           EI = EvalCallCheckers.begin(), EE = EvalCallCheckers.end();
-         EI != EE; ++EI) {
+    for (const auto EvalCallChecker : EvalCallCheckers) {
       ProgramPoint::Kind K = ProgramPoint::PostStmtKind;
-      const ProgramPoint &L = ProgramPoint::getProgramPoint(CE, K,
-                                Pred->getLocationContext(), EI->Checker);
+      const ProgramPoint &L =
+          ProgramPoint::getProgramPoint(CE, K, Pred->getLocationContext(),
+                                        EvalCallChecker.Checker);
       bool evaluated = false;
       { // CheckerContext generates transitions(populates checkDest) on
         // destruction, so introduce the scope to make sure it gets properly
         // populated.
         CheckerContext C(B, Eng, Pred, L);
-        evaluated = (*EI)(CE, C);
+        evaluated = EvalCallChecker(CE, C);
       }
       assert(!(evaluated && anyEvaluated)
              && "There are more than one checkers evaluating the call");
@@ -626,16 +685,15 @@
                                                   const TranslationUnitDecl *TU,
                                                   AnalysisManager &mgr,
                                                   BugReporter &BR) {
-  for (unsigned i = 0, e = EndOfTranslationUnitCheckers.size(); i != e; ++i)
-    EndOfTranslationUnitCheckers[i](TU, mgr, BR);
+  for (const auto EndOfTranslationUnitChecker : EndOfTranslationUnitCheckers)
+    EndOfTranslationUnitChecker(TU, mgr, BR);
 }
 
 void CheckerManager::runCheckersForPrintState(raw_ostream &Out,
                                               ProgramStateRef State,
                                               const char *NL, const char *Sep) {
-  for (llvm::DenseMap<CheckerTag, CheckerRef>::iterator
-        I = CheckerTags.begin(), E = CheckerTags.end(); I != E; ++I)
-    I->second->printState(Out, State, NL, Sep);
+  for (const auto &CheckerTag : CheckerTags)
+    CheckerTag.second->printState(Out, State, NL, Sep);
 }
 
 //===----------------------------------------------------------------------===//
@@ -661,6 +719,7 @@
   StmtCheckerInfo info = { checkfn, isForStmtFn, /*IsPreVisit*/true };
   StmtCheckers.push_back(info);
 }
+
 void CheckerManager::_registerForPostStmt(CheckStmtFunc checkfn,
                                           HandlesStmtFunc isForStmtFn) {
   StmtCheckerInfo info = { checkfn, isForStmtFn, /*IsPreVisit*/false };
@@ -711,6 +770,10 @@
   BranchConditionCheckers.push_back(checkfn);
 }
 
+void CheckerManager::_registerForNewAllocator(CheckNewAllocatorFunc checkfn) {
+  NewAllocatorCheckers.push_back(checkfn);
+}
+
 void CheckerManager::_registerForLiveSymbols(CheckLiveSymbolsFunc checkfn) {
   LiveSymbolsCheckers.push_back(checkfn);
 }
@@ -760,15 +823,13 @@
 
   // Find the checkers that should run for this Stmt and cache them.
   CachedStmtCheckers &Checkers = CachedStmtCheckersMap[Key];
-  for (unsigned i = 0, e = StmtCheckers.size(); i != e; ++i) {
-    StmtCheckerInfo &Info = StmtCheckers[i];
+  for (const auto &Info : StmtCheckers)
     if (Info.IsPreVisit == isPreVisit && Info.IsForStmtFn(S))
       Checkers.push_back(Info.CheckFn);
-  }
   return Checkers;
 }
 
 CheckerManager::~CheckerManager() {
-  for (unsigned i = 0, e = CheckerDtors.size(); i != e; ++i)
-    CheckerDtors[i]();
+  for (const auto CheckerDtor : CheckerDtors)
+    CheckerDtor();
 }
diff --git a/lib/StaticAnalyzer/Core/ConstraintManager.cpp b/lib/StaticAnalyzer/Core/ConstraintManager.cpp
index 8de2b0e..ef9c44c 100644
--- a/lib/StaticAnalyzer/Core/ConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/ConstraintManager.cpp
@@ -1,4 +1,4 @@
-//== ConstraintManager.cpp - Constraints on symbolic values -----*- C++ -*--==//
+//===- ConstraintManager.cpp - Constraints on symbolic values. ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,12 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h"
+#include "clang/AST/Type.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 
 using namespace clang;
 using namespace ento;
 
-ConstraintManager::~ConstraintManager() {}
+ConstraintManager::~ConstraintManager() = default;
 
 static DefinedSVal getLocFromSymbol(const ProgramStateRef &State,
                                     SymbolRef Sym) {
@@ -35,5 +40,5 @@
     return ConditionTruthVal(false);
   if (!P.first && P.second)
     return ConditionTruthVal(true);
-  return ConditionTruthVal();
+  return {};
 }
diff --git a/lib/StaticAnalyzer/Core/CoreEngine.cpp b/lib/StaticAnalyzer/Core/CoreEngine.cpp
index e2e9ddf..592f0ba 100644
--- a/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -1,4 +1,4 @@
-//==- CoreEngine.cpp - Path-Sensitive Dataflow Engine ------------*- C++ -*-//
+//===- CoreEngine.cpp - Path-Sensitive Dataflow Engine --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,11 +15,33 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <memory>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 using namespace ento;
@@ -33,15 +55,18 @@
 STATISTIC(NumPathsExplored,
             "The # of paths explored by the analyzer.");
 
+STATISTIC(MaxQueueSize, "Maximum size of the worklist");
+STATISTIC(MaxReachableSize, "Maximum size of auxiliary worklist set");
+
 //===----------------------------------------------------------------------===//
 // Worklist classes for exploration of reachable states.
 //===----------------------------------------------------------------------===//
 
-WorkList::Visitor::~Visitor() {}
-
 namespace {
+
 class DFS : public WorkList {
-  SmallVector<WorkListUnit,20> Stack;
+  SmallVector<WorkListUnit, 20> Stack;
+
 public:
   bool hasWork() const override {
     return !Stack.empty();
@@ -52,24 +77,16 @@
   }
 
   WorkListUnit dequeue() override {
-    assert (!Stack.empty());
+    assert(!Stack.empty());
     const WorkListUnit& U = Stack.back();
     Stack.pop_back(); // This technically "invalidates" U, but we are fine.
     return U;
   }
-
-  bool visitItemsInWorkList(Visitor &V) override {
-    for (SmallVectorImpl<WorkListUnit>::iterator
-         I = Stack.begin(), E = Stack.end(); I != E; ++I) {
-      if (V.visit(*I))
-        return true;
-    }
-    return false;
-  }
 };
 
 class BFS : public WorkList {
   std::deque<WorkListUnit> Queue;
+
 public:
   bool hasWork() const override {
     return !Queue.empty();
@@ -84,30 +101,28 @@
     Queue.pop_front();
     return U;
   }
-
-  bool visitItemsInWorkList(Visitor &V) override {
-    for (std::deque<WorkListUnit>::iterator
-         I = Queue.begin(), E = Queue.end(); I != E; ++I) {
-      if (V.visit(*I))
-        return true;
-    }
-    return false;
-  }
 };
 
-} // end anonymous namespace
+} // namespace
 
 // Place the dstor for WorkList here because it contains virtual member
 // functions, and we the code for the dstor generated in one compilation unit.
-WorkList::~WorkList() {}
+WorkList::~WorkList() = default;
 
-WorkList *WorkList::makeDFS() { return new DFS(); }
-WorkList *WorkList::makeBFS() { return new BFS(); }
+std::unique_ptr<WorkList> WorkList::makeDFS() {
+  return llvm::make_unique<DFS>();
+}
+
+std::unique_ptr<WorkList> WorkList::makeBFS() {
+  return llvm::make_unique<BFS>();
+}
 
 namespace {
+
   class BFSBlockDFSContents : public WorkList {
     std::deque<WorkListUnit> Queue;
-    SmallVector<WorkListUnit,20> Stack;
+    SmallVector<WorkListUnit, 20> Stack;
+
   public:
     bool hasWork() const override {
       return !Queue.empty() || !Stack.empty();
@@ -135,45 +150,172 @@
       Queue.pop_front();
       return U;
     }
-    bool visitItemsInWorkList(Visitor &V) override {
-      for (SmallVectorImpl<WorkListUnit>::iterator
-           I = Stack.begin(), E = Stack.end(); I != E; ++I) {
-        if (V.visit(*I))
-          return true;
+  };
+
+} // namespace
+
+std::unique_ptr<WorkList> WorkList::makeBFSBlockDFSContents() {
+  return llvm::make_unique<BFSBlockDFSContents>();
+}
+
+namespace {
+
+class UnexploredFirstStack : public WorkList {
+  /// Stack of nodes known to have statements we have not traversed yet.
+  SmallVector<WorkListUnit, 20> StackUnexplored;
+
+  /// Stack of all other nodes.
+  SmallVector<WorkListUnit, 20> StackOthers;
+
+  using BlockID = unsigned;
+  using LocIdentifier = std::pair<BlockID, const StackFrameContext *>;
+
+  llvm::DenseSet<LocIdentifier> Reachable;
+
+public:
+  bool hasWork() const override {
+    return !(StackUnexplored.empty() && StackOthers.empty());
+  }
+
+  void enqueue(const WorkListUnit &U) override {
+    const ExplodedNode *N = U.getNode();
+    auto BE = N->getLocation().getAs<BlockEntrance>();
+
+    if (!BE) {
+      // Assume the choice of the order of the preceeding block entrance was
+      // correct.
+      StackUnexplored.push_back(U);
+    } else {
+      LocIdentifier LocId = std::make_pair(
+          BE->getBlock()->getBlockID(), N->getStackFrame());
+      auto InsertInfo = Reachable.insert(LocId);
+
+      if (InsertInfo.second) {
+        StackUnexplored.push_back(U);
+      } else {
+        StackOthers.push_back(U);
       }
-      for (std::deque<WorkListUnit>::iterator
-           I = Queue.begin(), E = Queue.end(); I != E; ++I) {
-        if (V.visit(*I))
-          return true;
-      }
-      return false;
+    }
+    MaxReachableSize.updateMax(Reachable.size());
+    MaxQueueSize.updateMax(StackUnexplored.size() + StackOthers.size());
+  }
+
+  WorkListUnit dequeue() override {
+    if (!StackUnexplored.empty()) {
+      WorkListUnit &U = StackUnexplored.back();
+      StackUnexplored.pop_back();
+      return U;
+    } else {
+      WorkListUnit &U = StackOthers.back();
+      StackOthers.pop_back();
+      return U;
+    }
+  }
+};
+
+} // namespace
+
+std::unique_ptr<WorkList> WorkList::makeUnexploredFirst() {
+  return llvm::make_unique<UnexploredFirstStack>();
+}
+
+class UnexploredFirstPriorityQueue : public WorkList {
+  using BlockID = unsigned;
+  using LocIdentifier = std::pair<BlockID, const StackFrameContext *>;
+
+  // How many times each location was visited.
+  // Is signed because we negate it later in order to have a reversed
+  // comparison.
+  using VisitedTimesMap = llvm::DenseMap<LocIdentifier, int>;
+
+  // Compare by number of times the location was visited first (negated
+  // to prefer less often visited locations), then by insertion time (prefer
+  // expanding nodes inserted sooner first).
+  using QueuePriority = std::pair<int, unsigned long>;
+  using QueueItem = std::pair<WorkListUnit, QueuePriority>;
+
+  struct ExplorationComparator {
+    bool operator() (const QueueItem &LHS, const QueueItem &RHS) {
+      return LHS.second < RHS.second;
+    }
+  };
+
+  // Number of inserted nodes, used to emulate DFS ordering in the priority
+  // queue when insertions are equal.
+  unsigned long Counter = 0;
+
+  // Number of times a current location was reached.
+  VisitedTimesMap NumReached;
+
+  // The top item is the largest one.
+  llvm::PriorityQueue<QueueItem, std::vector<QueueItem>, ExplorationComparator>
+      queue;
+
+public:
+  bool hasWork() const override {
+    return !queue.empty();
+  }
+
+  void enqueue(const WorkListUnit &U) override {
+    const ExplodedNode *N = U.getNode();
+    unsigned NumVisited = 0;
+    if (auto BE = N->getLocation().getAs<BlockEntrance>()) {
+      LocIdentifier LocId = std::make_pair(
+          BE->getBlock()->getBlockID(), N->getStackFrame());
+      NumVisited = NumReached[LocId]++;
     }
 
-  };
-} // end anonymous namespace
+    queue.push(std::make_pair(U, std::make_pair(-NumVisited, ++Counter)));
+  }
 
-WorkList* WorkList::makeBFSBlockDFSContents() {
-  return new BFSBlockDFSContents();
+  WorkListUnit dequeue() override {
+    QueueItem U = queue.top();
+    queue.pop();
+    return U.first;
+  }
+};
+
+std::unique_ptr<WorkList> WorkList::makeUnexploredFirstPriorityQueue() {
+  return llvm::make_unique<UnexploredFirstPriorityQueue>();
 }
 
 //===----------------------------------------------------------------------===//
 // Core analysis engine.
 //===----------------------------------------------------------------------===//
 
+static std::unique_ptr<WorkList> generateWorkList(AnalyzerOptions &Opts) {
+  switch (Opts.getExplorationStrategy()) {
+    case AnalyzerOptions::ExplorationStrategyKind::DFS:
+      return WorkList::makeDFS();
+    case AnalyzerOptions::ExplorationStrategyKind::BFS:
+      return WorkList::makeBFS();
+    case AnalyzerOptions::ExplorationStrategyKind::BFSBlockDFSContents:
+      return WorkList::makeBFSBlockDFSContents();
+    case AnalyzerOptions::ExplorationStrategyKind::UnexploredFirst:
+      return WorkList::makeUnexploredFirst();
+    case AnalyzerOptions::ExplorationStrategyKind::UnexploredFirstQueue:
+      return WorkList::makeUnexploredFirstPriorityQueue();
+    default:
+      llvm_unreachable("Unexpected case");
+  }
+}
+
+CoreEngine::CoreEngine(SubEngine &subengine, FunctionSummariesTy *FS,
+                       AnalyzerOptions &Opts)
+    : SubEng(subengine), WList(generateWorkList(Opts)),
+      BCounterFactory(G.getAllocator()), FunctionSummaries(FS) {}
+
 /// ExecuteWorkList - Run the worklist algorithm for a maximum number of steps.
 bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps,
                                    ProgramStateRef InitState) {
-
   if (G.num_roots() == 0) { // Initialize the analysis by constructing
     // the root if none exists.
 
     const CFGBlock *Entry = &(L->getCFG()->getEntry());
 
-    assert (Entry->empty() &&
-            "Entry block must be empty.");
+    assert(Entry->empty() && "Entry block must be empty.");
 
-    assert (Entry->succ_size() == 1 &&
-            "Entry block must have 1 successor.");
+    assert(Entry->succ_size() == 1 && "Entry block must have 1 successor.");
 
     // Mark the entry block as visited.
     FunctionSummaries->markVisitedBasicBlock(Entry->getBlockID(),
@@ -195,7 +337,7 @@
 
     bool IsNew;
     ExplodedNode *Node = G.getNode(StartLoc, InitState, false, &IsNew);
-    assert (IsNew);
+    assert(IsNew);
     G.addRoot(Node);
 
     NodeBuilderContext BuilderCtx(*this, StartLoc.getDst(), Node);
@@ -251,13 +393,12 @@
       break;
 
     case ProgramPoint::BlockExitKind:
-      assert (false && "BlockExit location never occur in forward analysis.");
+      assert(false && "BlockExit location never occur in forward analysis.");
       break;
 
-    case ProgramPoint::CallEnterKind: {
+    case ProgramPoint::CallEnterKind:
       HandleCallEnter(Loc.castAs<CallEnter>(), Pred);
       break;
-    }
 
     case ProgramPoint::CallExitBeginKind:
       SubEng.processCallExit(Pred);
@@ -275,7 +416,8 @@
              Loc.getAs<PostInitializer>() ||
              Loc.getAs<PostImplicitCall>() ||
              Loc.getAs<CallExitEnd>() ||
-             Loc.getAs<LoopExit>());
+             Loc.getAs<LoopExit>() ||
+             Loc.getAs<PostAllocatorCall>());
       HandlePostStmt(WU.getBlock(), WU.getIndex(), Pred);
       break;
   }
@@ -294,7 +436,6 @@
 }
 
 void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) {
-
   const CFGBlock *Blk = L.getDst();
   NodeBuilderContext BuilderCtx(*this, Blk, Pred);
 
@@ -306,18 +447,14 @@
 
   // Check if we are entering the EXIT block.
   if (Blk == &(L.getLocationContext()->getCFG()->getExit())) {
-
-    assert (L.getLocationContext()->getCFG()->getExit().size() == 0
-            && "EXIT block cannot contain Stmts.");
+    assert(L.getLocationContext()->getCFG()->getExit().empty() &&
+           "EXIT block cannot contain Stmts.");
 
     // Get return statement..
     const ReturnStmt *RS = nullptr;
     if (!L.getSrc()->empty()) {
       if (Optional<CFGStmt> LastStmt = L.getSrc()->back().getAs<CFGStmt>()) {
-        if ((RS = dyn_cast<ReturnStmt>(LastStmt->getStmt()))) {
-          if (!RS->getRetValue())
-            RS = nullptr;
-        }
+        RS = dyn_cast<ReturnStmt>(LastStmt->getStmt());
       }
     }
 
@@ -345,7 +482,6 @@
 
 void CoreEngine::HandleBlockEntrance(const BlockEntrance &L,
                                        ExplodedNode *Pred) {
-
   // Increment the block counter.
   const LocationContext *LC = Pred->getLocationContext();
   unsigned BlockId = L.getBlock()->getBlockID();
@@ -364,7 +500,6 @@
 }
 
 void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
-
   if (const Stmt *Term = B->getTerminator()) {
     switch (Term->getStmtClass()) {
       default:
@@ -397,7 +532,7 @@
         HandleBranch(cast<ChooseExpr>(Term)->getCond(), Term, B, Pred);
         return;
 
-      case Stmt::CXXTryStmtClass: {
+      case Stmt::CXXTryStmtClass:
         // Generate a node for each of the successors.
         // Our logic for EH analysis can certainly be improved.
         for (CFGBlock::const_succ_iterator it = B->succ_begin(),
@@ -408,7 +543,6 @@
           }
         }
         return;
-      }
 
       case Stmt::DoStmtClass:
         HandleBranch(cast<DoStmt>(Term)->getCond(), Term, B, Pred);
@@ -433,7 +567,7 @@
 
       case Stmt::IndirectGotoStmtClass: {
         // Only 1 successor: the indirect goto dispatch block.
-        assert (B->succ_size() == 1);
+        assert(B->succ_size() == 1);
 
         IndirectGotoNodeBuilder
            builder(Pred, B, cast<IndirectGotoStmt>(Term)->getTarget(),
@@ -443,7 +577,7 @@
         return;
       }
 
-      case Stmt::ObjCForCollectionStmtClass: {
+      case Stmt::ObjCForCollectionStmtClass:
         // In the case of ObjCForCollectionStmt, it appears twice in a CFG:
         //
         //  (1) inside a basic block, which represents the binding of the
@@ -456,7 +590,6 @@
         // contain nil elements.
         HandleBranch(Term, Term, B, Pred);
         return;
-      }
 
       case Stmt::SwitchStmtClass: {
         SwitchNodeBuilder builder(Pred, B, cast<SwitchStmt>(Term)->getCond(),
@@ -472,8 +605,8 @@
     }
   }
 
-  assert (B->succ_size() == 1 &&
-          "Blocks with no terminator should have at most 1 successor.");
+  assert(B->succ_size() == 1 &&
+         "Blocks with no terminator should have at most 1 successor.");
 
   generateNode(BlockEdge(B, *(B->succ_begin()), Pred->getLocationContext()),
                Pred->State, Pred);
@@ -518,9 +651,8 @@
   enqueue(Dst);
 }
 
-
 void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx,
-                                  ExplodedNode *Pred) {
+                                ExplodedNode *Pred) {
   assert(B);
   assert(!B->empty());
 
@@ -537,14 +669,13 @@
 void CoreEngine::generateNode(const ProgramPoint &Loc,
                               ProgramStateRef State,
                               ExplodedNode *Pred) {
-
   bool IsNew;
   ExplodedNode *Node = G.getNode(Loc, State, false, &IsNew);
 
   if (Pred)
     Node->addPredecessor(Pred, G); // Link 'Node' with its predecessor.
   else {
-    assert (IsNew);
+    assert(IsNew);
     G.addRoot(Node); // 'Node' has no predecessor.  Make it a root.
   }
 
@@ -555,7 +686,7 @@
 void CoreEngine::enqueueStmtNode(ExplodedNode *N,
                                  const CFGBlock *Block, unsigned Idx) {
   assert(Block);
-  assert (!N->isSink());
+  assert(!N->isSink());
 
   // Check if this node entered a callee.
   if (N->getLocation().getAs<CallEnter>()) {
@@ -605,8 +736,7 @@
 ExplodedNode *CoreEngine::generateCallExitBeginNode(ExplodedNode *N,
                                                     const ReturnStmt *RS) {
   // Create a CallExitBegin node and enqueue it.
-  const StackFrameContext *LocCtx
-                         = cast<StackFrameContext>(N->getLocationContext());
+  const auto *LocCtx = cast<StackFrameContext>(N->getLocationContext());
 
   // Use the callee location context.
   CallExitBegin Loc(LocCtx, RS);
@@ -617,40 +747,33 @@
   return isNew ? Node : nullptr;
 }
 
-
 void CoreEngine::enqueue(ExplodedNodeSet &Set) {
-  for (ExplodedNodeSet::iterator I = Set.begin(),
-                                 E = Set.end(); I != E; ++I) {
-    WList->enqueue(*I);
-  }
+  for (const auto I : Set)
+    WList->enqueue(I);
 }
 
 void CoreEngine::enqueue(ExplodedNodeSet &Set,
                          const CFGBlock *Block, unsigned Idx) {
-  for (ExplodedNodeSet::iterator I = Set.begin(),
-                                 E = Set.end(); I != E; ++I) {
-    enqueueStmtNode(*I, Block, Idx);
-  }
+  for (const auto I : Set)
+    enqueueStmtNode(I, Block, Idx);
 }
 
 void CoreEngine::enqueueEndOfFunction(ExplodedNodeSet &Set, const ReturnStmt *RS) {
-  for (ExplodedNodeSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) {
-    ExplodedNode *N = *I;
+  for (auto I : Set) {
     // If we are in an inlined call, generate CallExitBegin node.
-    if (N->getLocationContext()->getParent()) {
-      N = generateCallExitBeginNode(N, RS);
-      if (N)
-        WList->enqueue(N);
+    if (I->getLocationContext()->getParent()) {
+      I = generateCallExitBeginNode(I, RS);
+      if (I)
+        WList->enqueue(I);
     } else {
       // TODO: We should run remove dead bindings here.
-      G.addEndOfPath(N);
+      G.addEndOfPath(I);
       NumPathsExplored++;
     }
   }
 }
 
-
-void NodeBuilder::anchor() { }
+void NodeBuilder::anchor() {}
 
 ExplodedNode* NodeBuilder::generateNodeImpl(const ProgramPoint &Loc,
                                             ProgramStateRef State,
@@ -671,16 +794,15 @@
   return N;
 }
 
-void NodeBuilderWithSinks::anchor() { }
+void NodeBuilderWithSinks::anchor() {}
 
 StmtNodeBuilder::~StmtNodeBuilder() {
   if (EnclosingBldr)
-    for (ExplodedNodeSet::iterator I = Frontier.begin(),
-                                   E = Frontier.end(); I != E; ++I )
-      EnclosingBldr->addNodes(*I);
+    for (const auto I : Frontier)
+      EnclosingBldr->addNodes(I);
 }
 
-void BranchNodeBuilder::anchor() { }
+void BranchNodeBuilder::anchor() {}
 
 ExplodedNode *BranchNodeBuilder::generateNode(ProgramStateRef State,
                                               bool branch,
@@ -714,11 +836,9 @@
   return Succ;
 }
 
-
 ExplodedNode*
 SwitchNodeBuilder::generateCaseStmtNode(const iterator &I,
                                         ProgramStateRef St) {
-
   bool IsNew;
   ExplodedNode *Succ =
       Eng.G.getNode(BlockEdge(Src, I.getBlock(), Pred->getLocationContext()),
@@ -731,7 +851,6 @@
   return Succ;
 }
 
-
 ExplodedNode*
 SwitchNodeBuilder::generateDefaultCaseNode(ProgramStateRef St,
                                            bool IsSink) {
diff --git a/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp b/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
index a01ff36..5309339 100644
--- a/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
+++ b/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
@@ -1,4 +1,4 @@
-//==- DynamicTypeMap.cpp - Dynamic Type Info related APIs ----------*- C++ -*-//
+//===- DynamicTypeMap.cpp - Dynamic Type Info related APIs ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,6 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 namespace clang {
 namespace ento {
@@ -28,15 +35,15 @@
     return *GDMType;
 
   // Otherwise, fall back to what we know about the region.
-  if (const TypedRegion *TR = dyn_cast<TypedRegion>(Reg))
+  if (const auto *TR = dyn_cast<TypedRegion>(Reg))
     return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false);
 
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
+  if (const auto *SR = dyn_cast<SymbolicRegion>(Reg)) {
     SymbolRef Sym = SR->getSymbol();
     return DynamicTypeInfo(Sym->getType());
   }
 
-  return DynamicTypeInfo();
+  return {};
 }
 
 ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *Reg,
@@ -47,5 +54,28 @@
   return NewState;
 }
 
+void printDynamicTypeInfo(ProgramStateRef State, raw_ostream &Out,
+                          const char *NL, const char *Sep) {
+  bool First = true;
+  for (const auto &I : State->get<DynamicTypeMap>()) {
+    if (First) {
+      Out << NL << "Dynamic types of regions:" << NL;
+      First = false;
+    }
+    const MemRegion *MR = I.first;
+    const DynamicTypeInfo &DTI = I.second;
+    Out << MR << " : ";
+    if (DTI.isValid()) {
+      Out << DTI.getType()->getPointeeType().getAsString();
+      if (DTI.canBeASubClass()) {
+        Out << " (or its subclass)";
+      }
+    } else {
+      Out << "Invalid type info";
+    }
+    Out << NL;
+  }
+}
+
 } // namespace ento
 } // namespace clang
diff --git a/lib/StaticAnalyzer/Core/Environment.cpp b/lib/StaticAnalyzer/Core/Environment.cpp
index c6acb9d..67b37a3 100644
--- a/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/lib/StaticAnalyzer/Core/Environment.cpp
@@ -1,4 +1,4 @@
-//== Environment.cpp - Map from Stmt* to Locations/Values -------*- C++ -*--==//
+//===- Environment.cpp - Map from Stmt* to Locations/Values ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,12 +11,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
-#include "clang/AST/ExprObjC.h"
+#include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/Stmt.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
-#include "clang/Analysis/CFG.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 using namespace clang;
 using namespace ento;
@@ -46,16 +59,16 @@
 }
 
 static const Stmt *ignoreTransparentExprs(const Stmt *S) {
-  if (const Expr *E = dyn_cast<Expr>(S))
+  if (const auto *E = dyn_cast<Expr>(S))
     return ignoreTransparentExprs(E);
   return S;
 }
 
 EnvironmentEntry::EnvironmentEntry(const Stmt *S, const LocationContext *L)
-  : std::pair<const Stmt *,
-              const StackFrameContext *>(ignoreTransparentExprs(S),
-                                         L ? L->getCurrentStackFrame()
-                                           : nullptr) {}
+    : std::pair<const Stmt *,
+                const StackFrameContext *>(ignoreTransparentExprs(S),
+                                           L ? L->getCurrentStackFrame()
+                                             : nullptr) {}
 
 SVal Environment::lookupExpr(const EnvironmentEntry &E) const {
   const SVal* X = ExprBindings.lookup(E);
@@ -95,7 +108,7 @@
     return svalBuilder.getConstantVal(cast<Expr>(S)).getValue();
 
   case Stmt::ReturnStmtClass: {
-    const ReturnStmt *RS = cast<ReturnStmt>(S);
+    const auto *RS = cast<ReturnStmt>(S);
     if (const Expr *RE = RS->getRetValue())
       return getSVal(EnvironmentEntry(RE, LCtx), svalBuilder);
     return UndefinedVal();
@@ -121,20 +134,25 @@
 }
 
 namespace {
+
 class MarkLiveCallback final : public SymbolVisitor {
   SymbolReaper &SymReaper;
+
 public:
   MarkLiveCallback(SymbolReaper &symreaper) : SymReaper(symreaper) {}
+
   bool VisitSymbol(SymbolRef sym) override {
     SymReaper.markLive(sym);
     return true;
   }
+
   bool VisitMemRegion(const MemRegion *R) override {
     SymReaper.markLive(R);
     return true;
   }
 };
-} // end anonymous namespace
+
+} // namespace
 
 // removeDeadBindings:
 //  - Remove subexpression bindings.
@@ -147,7 +165,6 @@
 EnvironmentManager::removeDeadBindings(Environment Env,
                                        SymbolReaper &SymReaper,
                                        ProgramStateRef ST) {
-
   // We construct a new Environment object entirely, as this is cheaper than
   // individually removing all the subexpression bindings (which will greatly
   // outnumber block-level expression bindings).
@@ -156,14 +173,13 @@
   MarkLiveCallback CB(SymReaper);
   ScanReachableSymbols RSScaner(ST, CB);
 
-  llvm::ImmutableMapRef<EnvironmentEntry,SVal>
+  llvm::ImmutableMapRef<EnvironmentEntry, SVal>
     EBMapRef(NewEnv.ExprBindings.getRootWithoutRetain(),
              F.getTreeFactory());
 
   // Iterate over the block-expr bindings.
   for (Environment::iterator I = Env.begin(), E = Env.end();
        I != E; ++I) {
-
     const EnvironmentEntry &BlkExpr = I.getKey();
     const SVal &X = I.getData();
 
@@ -186,28 +202,41 @@
 }
 
 void Environment::print(raw_ostream &Out, const char *NL,
-                        const char *Sep) const {
-  bool isFirst = true;
+                        const char *Sep, const LocationContext *WithLC) const {
+  if (ExprBindings.isEmpty())
+    return;
 
-  for (Environment::iterator I = begin(), E = end(); I != E; ++I) {
-    const EnvironmentEntry &En = I.getKey();
-
-    if (isFirst) {
-      Out << NL << NL
-          << "Expressions:"
-          << NL;
-      isFirst = false;
-    } else {
-      Out << NL;
+  if (!WithLC) {
+    // Find the freshest location context.
+    llvm::SmallPtrSet<const LocationContext *, 16> FoundContexts;
+    for (auto I : *this) {
+      const LocationContext *LC = I.first.getLocationContext();
+      if (FoundContexts.count(LC) == 0) {
+        // This context is fresher than all other contexts so far.
+        WithLC = LC;
+        for (const LocationContext *LCI = LC; LCI; LCI = LCI->getParent())
+          FoundContexts.insert(LCI);
+      }
     }
-
-    const Stmt *S = En.getStmt();
-    assert(S != nullptr && "Expected non-null Stmt");
-
-    Out << " (" << (const void*) En.getLocationContext() << ','
-      << (const void*) S << ") ";
-    LangOptions LO; // FIXME.
-    S->printPretty(Out, nullptr, PrintingPolicy(LO));
-    Out << " : " << I.getData();
   }
+
+  assert(WithLC);
+
+  LangOptions LO; // FIXME.
+  PrintingPolicy PP(LO);
+
+  Out << NL << NL << "Expressions by stack frame:" << NL;
+  WithLC->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
+    for (auto I : ExprBindings) {
+      if (I.first.getLocationContext() != LC)
+        continue;
+
+      const Stmt *S = I.first.getStmt();
+      assert(S != nullptr && "Expected non-null Stmt");
+
+      Out << "(" << (const void *)LC << ',' << (const void *)S << ") ";
+      S->printPretty(Out, nullptr, PP);
+      Out << " : " << I.second << NL;
+    }
+  });
 }
diff --git a/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 3bc8e09..ece103d 100644
--- a/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -1,4 +1,4 @@
-//=-- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -*- C++ -*------=//
+//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,13 +13,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprObjC.h"
 #include "clang/AST/ParentMap.h"
 #include "clang/AST/Stmt.h"
+#include "clang/Analysis/ProgramPoint.h"
+#include "clang/Analysis/Support/BumpVector.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <memory>
 
 using namespace clang;
 using namespace ento;
@@ -29,7 +40,7 @@
 //===----------------------------------------------------------------------===//
 
 // An out of line virtual method to provide a home for the class vtable.
-ExplodedNode::Auditor::~Auditor() {}
+ExplodedNode::Auditor::~Auditor() = default;
 
 #ifndef NDEBUG
 static ExplodedNode::Auditor* NodeAuditor = nullptr;
@@ -45,10 +56,9 @@
 // Cleanup.
 //===----------------------------------------------------------------------===//
 
-ExplodedGraph::ExplodedGraph()
-  : NumNodes(0), ReclaimNodeInterval(0) {}
+ExplodedGraph::ExplodedGraph() = default;
 
-ExplodedGraph::~ExplodedGraph() {}
+ExplodedGraph::~ExplodedGraph() = default;
 
 //===----------------------------------------------------------------------===//
 // Node reclamation.
@@ -187,12 +197,9 @@
     return;
   ReclaimCounter = ReclaimNodeInterval;
 
-  for (NodeVector::iterator it = ChangedNodes.begin(), et = ChangedNodes.end();
-       it != et; ++it) {
-    ExplodedNode *node = *it;
+  for (const auto node : ChangedNodes)
     if (shouldCollect(node))
       collectNode(node);
-  }
   ChangedNodes.clear();
 }
 
@@ -210,11 +217,11 @@
 // 2. The group is empty, in which case the storage value is null.
 // 3. The group contains a single node.
 // 4. The group contains more than one node.
-typedef BumpVector<ExplodedNode *> ExplodedNodeVector;
-typedef llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *> GroupStorage;
+using ExplodedNodeVector = BumpVector<ExplodedNode *>;
+using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
 
 void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) {
-  assert (!V->isSink());
+  assert(!V->isSink());
   Preds.addNode(V, G);
   V->Succs.addNode(this, G);
 #ifndef NDEBUG
@@ -346,25 +353,22 @@
 ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
                     InterExplodedGraphMap *ForwardMap,
                     InterExplodedGraphMap *InverseMap) const {
-
   if (Nodes.empty())
     return nullptr;
 
-  typedef llvm::DenseSet<const ExplodedNode*> Pass1Ty;
+  using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
   Pass1Ty Pass1;
 
-  typedef InterExplodedGraphMap Pass2Ty;
+  using Pass2Ty = InterExplodedGraphMap;
   InterExplodedGraphMap Pass2Scratch;
   Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
 
   SmallVector<const ExplodedNode*, 10> WL1, WL2;
 
   // ===- Pass 1 (reverse DFS) -===
-  for (ArrayRef<const NodeTy *>::iterator I = Sinks.begin(), E = Sinks.end();
-       I != E; ++I) {
-    if (*I)
-      WL1.push_back(*I);
-  }
+  for (const auto Sink : Sinks)
+    if (Sink)
+      WL1.push_back(Sink);
 
   // Process the first worklist until it is empty.
   while (!WL1.empty()) {
@@ -445,4 +449,3 @@
 
   return G;
 }
-
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 0126e03..1569e86 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1,4 +1,4 @@
-//=-- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ---*- C++ -*-=
+//===- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,31 +15,74 @@
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "PrettyStackTraceLocationContext.h"
-#include "clang/AST/CharUnits.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
 #include "clang/AST/ParentMap.h"
-#include "clang/Analysis/CFGStmtMap.h"
+#include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
-#include "clang/Basic/Builtins.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/PrettyStackTrace.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
-
-#ifndef NDEBUG
-#include "llvm/Support/GraphWriter.h"
-#endif
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 using namespace ento;
-using llvm::APSInt;
 
 #define DEBUG_TYPE "ExprEngine"
 
@@ -54,14 +97,42 @@
 STATISTIC(NumTimesRetriedWithoutInlining,
             "The # of times we re-evaluated a call without inlining");
 
-typedef std::pair<const CXXBindTemporaryExpr *, const StackFrameContext *>
-    CXXBindTemporaryContext;
+using InitializedTemporariesMap =
+    llvm::ImmutableMap<std::pair<const CXXBindTemporaryExpr *,
+                                 const StackFrameContext *>,
+                       const CXXTempObjectRegion *>;
 
 // Keeps track of whether CXXBindTemporaryExpr nodes have been evaluated.
 // The StackFrameContext assures that nested calls due to inlined recursive
 // functions do not interfere.
-REGISTER_TRAIT_WITH_PROGRAMSTATE(InitializedTemporariesSet,
-                                 llvm::ImmutableSet<CXXBindTemporaryContext>)
+REGISTER_TRAIT_WITH_PROGRAMSTATE(InitializedTemporaries,
+                                 InitializedTemporariesMap)
+
+using TemporaryMaterializationMap =
+    llvm::ImmutableMap<std::pair<const MaterializeTemporaryExpr *,
+                                 const StackFrameContext *>,
+                       const CXXTempObjectRegion *>;
+
+// Keeps track of temporaries that will need to be materialized later.
+// The StackFrameContext assures that nested calls due to inlined recursive
+// functions do not interfere.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(TemporaryMaterializations,
+                                 TemporaryMaterializationMap)
+
+using CXXNewAllocatorValuesMap =
+    llvm::ImmutableMap<std::pair<const CXXNewExpr *, const LocationContext *>,
+                       SVal>;
+
+// Keeps track of return values of various operator new() calls between
+// evaluation of the inlined operator new(), through the constructor call,
+// to the actual evaluation of the CXXNewExpr.
+// TODO: Refactor the key for this trait into a LocationContext sub-class,
+// which would be put on the stack of location contexts before operator new()
+// is evaluated, and removed from the stack when the whole CXXNewExpr
+// is fully evaluated.
+// Probably do something similar to the previous trait as well.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(CXXNewAllocatorValues,
+                                 CXXNewAllocatorValuesMap)
 
 //===----------------------------------------------------------------------===//
 // Engine construction and deletion.
@@ -69,25 +140,21 @@
 
 static const char* TagProviderName = "ExprEngine";
 
-ExprEngine::ExprEngine(AnalysisManager &mgr, bool gcEnabled,
+ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
+                       AnalysisManager &mgr, bool gcEnabled,
                        SetOfConstDecls *VisitedCalleesIn,
                        FunctionSummariesTy *FS,
                        InliningModes HowToInlineIn)
-  : AMgr(mgr),
-    AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
-    Engine(*this, FS),
-    G(Engine.getGraph()),
-    StateMgr(getContext(), mgr.getStoreManagerCreator(),
-             mgr.getConstraintManagerCreator(), G.getAllocator(),
-             this),
-    SymMgr(StateMgr.getSymbolManager()),
-    svalBuilder(StateMgr.getSValBuilder()),
-    currStmtIdx(0), currBldrCtx(nullptr),
-    ObjCNoRet(mgr.getASTContext()),
-    ObjCGCEnabled(gcEnabled), BR(mgr, *this),
-    VisitedCallees(VisitedCalleesIn),
-    HowToInline(HowToInlineIn)
-{
+    : CTU(CTU), AMgr(mgr),
+      AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
+      Engine(*this, FS, mgr.getAnalyzerOptions()), G(Engine.getGraph()),
+      StateMgr(getContext(), mgr.getStoreManagerCreator(),
+               mgr.getConstraintManagerCreator(), G.getAllocator(),
+               this),
+      SymMgr(StateMgr.getSymbolManager()),
+      svalBuilder(StateMgr.getSValBuilder()), ObjCNoRet(mgr.getASTContext()),
+      ObjCGCEnabled(gcEnabled), BR(mgr, *this),
+      VisitedCallees(VisitedCalleesIn), HowToInline(HowToInlineIn) {
   unsigned TrimInterval = mgr.options.getGraphTrimInterval();
   if (TrimInterval != 0) {
     // Enable eager node reclaimation when constructing the ExplodedGraph.
@@ -111,8 +178,7 @@
   // FIXME: It would be nice if we had a more general mechanism to add
   // such preconditions.  Some day.
   do {
-
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
       // Precondition: the first argument of 'main' is an integer guaranteed
       //  to be > 0.
       const IdentifierInfo *II = FD->getIdentifier();
@@ -121,7 +187,7 @@
 
       const ParmVarDecl *PD = FD->getParamDecl(0);
       QualType T = PD->getType();
-      const BuiltinType *BT = dyn_cast<BuiltinType>(T);
+      const auto *BT = dyn_cast<BuiltinType>(T);
       if (!BT || !BT->isInteger())
         break;
 
@@ -145,9 +211,9 @@
     }
     break;
   }
-  while (0);
+  while (false);
 
-  if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
+  if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
     // Precondition: 'self' is always non-null upon entry to an Objective-C
     // method.
     const ImplicitParamDecl *SelfD = MD->getSelfDecl();
@@ -161,7 +227,7 @@
     }
   }
 
-  if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
     if (!MD->isStatic()) {
       // Precondition: 'this' is always non-null upon entry to the
       // top-level function.  This is our starting assumption for
@@ -237,17 +303,34 @@
   const Expr *Init = InitWithAdjustments->skipRValueSubobjectAdjustments(
       CommaLHSs, Adjustments);
 
+  // Take the region for Init, i.e. for the whole object. If we do not remember
+  // the region in which the object originally was constructed, come up with
+  // a new temporary region out of thin air and copy the contents of the object
+  // (which are currently present in the Environment, because Init is an rvalue)
+  // into that region. This is not correct, but it is better than nothing.
+  bool FoundOriginalMaterializationRegion = false;
   const TypedValueRegion *TR = nullptr;
-  if (const MaterializeTemporaryExpr *MT =
-          dyn_cast<MaterializeTemporaryExpr>(Result)) {
-    StorageDuration SD = MT->getStorageDuration();
-    // If this object is bound to a reference with static storage duration, we
-    // put it in a different region to prevent "address leakage" warnings.
-    if (SD == SD_Static || SD == SD_Thread)
-      TR = MRMgr.getCXXStaticTempObjectRegion(Init);
-  }
-  if (!TR)
+  if (const auto *MT = dyn_cast<MaterializeTemporaryExpr>(Result)) {
+    auto Key = std::make_pair(MT, LC->getCurrentStackFrame());
+    if (const CXXTempObjectRegion *const *TRPtr =
+            State->get<TemporaryMaterializations>(Key)) {
+      FoundOriginalMaterializationRegion = true;
+      TR = *TRPtr;
+      assert(TR);
+      State = State->remove<TemporaryMaterializations>(Key);
+    } else {
+      StorageDuration SD = MT->getStorageDuration();
+      // If this object is bound to a reference with static storage duration, we
+      // put it in a different region to prevent "address leakage" warnings.
+      if (SD == SD_Static || SD == SD_Thread) {
+        TR = MRMgr.getCXXStaticTempObjectRegion(Init);
+      } else {
+        TR = MRMgr.getCXXTempObjectRegion(Init, LC);
+      }
+    }
+  } else {
     TR = MRMgr.getCXXTempObjectRegion(Init, LC);
+  }
 
   SVal Reg = loc::MemRegionVal(TR);
   SVal BaseReg = Reg;
@@ -269,32 +352,35 @@
     }
   }
 
-  // What remains is to copy the value of the object to the new region.
-  // FIXME: In other words, what we should always do is copy value of the
-  // Init expression (which corresponds to the bigger object) to the whole
-  // temporary region TR. However, this value is often no longer present
-  // in the Environment. If it has disappeared, we instead invalidate TR.
-  // Still, what we can do is assign the value of expression Ex (which
-  // corresponds to the sub-object) to the TR's sub-region Reg. At least,
-  // values inside Reg would be correct.
-  SVal InitVal = State->getSVal(Init, LC);
-  if (InitVal.isUnknown()) {
-    InitVal = getSValBuilder().conjureSymbolVal(Result, LC, Init->getType(),
-                                                currBldrCtx->blockCount());
-    State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
+  if (!FoundOriginalMaterializationRegion) {
+    // What remains is to copy the value of the object to the new region.
+    // FIXME: In other words, what we should always do is copy value of the
+    // Init expression (which corresponds to the bigger object) to the whole
+    // temporary region TR. However, this value is often no longer present
+    // in the Environment. If it has disappeared, we instead invalidate TR.
+    // Still, what we can do is assign the value of expression Ex (which
+    // corresponds to the sub-object) to the TR's sub-region Reg. At least,
+    // values inside Reg would be correct.
+    SVal InitVal = State->getSVal(Init, LC);
+    if (InitVal.isUnknown()) {
+      InitVal = getSValBuilder().conjureSymbolVal(Result, LC, Init->getType(),
+                                                  currBldrCtx->blockCount());
+      State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
 
-    // Then we'd need to take the value that certainly exists and bind it over.
-    if (InitValWithAdjustments.isUnknown()) {
-      // Try to recover some path sensitivity in case we couldn't
-      // compute the value.
-      InitValWithAdjustments = getSValBuilder().conjureSymbolVal(
-          Result, LC, InitWithAdjustments->getType(),
-          currBldrCtx->blockCount());
+      // Then we'd need to take the value that certainly exists and bind it
+      // over.
+      if (InitValWithAdjustments.isUnknown()) {
+        // Try to recover some path sensitivity in case we couldn't
+        // compute the value.
+        InitValWithAdjustments = getSValBuilder().conjureSymbolVal(
+            Result, LC, InitWithAdjustments->getType(),
+            currBldrCtx->blockCount());
+      }
+      State =
+          State->bindLoc(Reg.castAs<Loc>(), InitValWithAdjustments, LC, false);
+    } else {
+      State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
     }
-    State =
-        State->bindLoc(Reg.castAs<Loc>(), InitValWithAdjustments, LC, false);
-  } else {
-    State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
   }
 
   // The result expression would now point to the correct sub-region of the
@@ -302,12 +388,104 @@
   // correctly in case (Result == Init).
   State = State->BindExpr(Result, LC, Reg);
 
-  // Notify checkers once for two bindLoc()s.
-  State = processRegionChange(State, TR, LC);
+  if (!FoundOriginalMaterializationRegion) {
+    // Notify checkers once for two bindLoc()s.
+    State = processRegionChange(State, TR, LC);
+  }
 
   return State;
 }
 
+ProgramStateRef ExprEngine::addInitializedTemporary(
+    ProgramStateRef State, const CXXBindTemporaryExpr *BTE,
+    const LocationContext *LC, const CXXTempObjectRegion *R) {
+  const auto &Key = std::make_pair(BTE, LC->getCurrentStackFrame());
+  if (!State->contains<InitializedTemporaries>(Key)) {
+    return State->set<InitializedTemporaries>(Key, R);
+  }
+
+  // FIXME: Currently the state might already contain the marker due to
+  // incorrect handling of temporaries bound to default parameters; for
+  // those, we currently skip the CXXBindTemporaryExpr but rely on adding
+  // temporary destructor nodes. Otherwise, this branch should be unreachable.
+  return State;
+}
+
+bool ExprEngine::areInitializedTemporariesClear(ProgramStateRef State,
+                                                const LocationContext *FromLC,
+                                                const LocationContext *ToLC) {
+  const LocationContext *LC = FromLC;
+  while (LC != ToLC) {
+    assert(LC && "ToLC must be a parent of FromLC!");
+    for (auto I : State->get<InitializedTemporaries>())
+      if (I.first.second == LC)
+        return false;
+
+    LC = LC->getParent();
+  }
+  return true;
+}
+
+ProgramStateRef ExprEngine::addTemporaryMaterialization(
+    ProgramStateRef State, const MaterializeTemporaryExpr *MTE,
+    const LocationContext *LC, const CXXTempObjectRegion *R) {
+  const auto &Key = std::make_pair(MTE, LC->getCurrentStackFrame());
+  assert(!State->contains<TemporaryMaterializations>(Key));
+  return State->set<TemporaryMaterializations>(Key, R);
+}
+
+bool ExprEngine::areTemporaryMaterializationsClear(
+    ProgramStateRef State, const LocationContext *FromLC,
+    const LocationContext *ToLC) {
+  const LocationContext *LC = FromLC;
+  while (LC != ToLC) {
+    assert(LC && "ToLC must be a parent of FromLC!");
+    for (auto I : State->get<TemporaryMaterializations>())
+      if (I.first.second == LC)
+        return false;
+
+    LC = LC->getParent();
+  }
+  return true;
+}
+
+ProgramStateRef
+ExprEngine::setCXXNewAllocatorValue(ProgramStateRef State,
+                                    const CXXNewExpr *CNE,
+                                    const LocationContext *CallerLC, SVal V) {
+  assert(!State->get<CXXNewAllocatorValues>(std::make_pair(CNE, CallerLC)) &&
+         "Allocator value already set!");
+  return State->set<CXXNewAllocatorValues>(std::make_pair(CNE, CallerLC), V);
+}
+
+SVal ExprEngine::getCXXNewAllocatorValue(ProgramStateRef State,
+                                         const CXXNewExpr *CNE,
+                                         const LocationContext *CallerLC) {
+  return *State->get<CXXNewAllocatorValues>(std::make_pair(CNE, CallerLC));
+}
+
+ProgramStateRef
+ExprEngine::clearCXXNewAllocatorValue(ProgramStateRef State,
+                                      const CXXNewExpr *CNE,
+                                      const LocationContext *CallerLC) {
+  return State->remove<CXXNewAllocatorValues>(std::make_pair(CNE, CallerLC));
+}
+
+bool ExprEngine::areCXXNewAllocatorValuesClear(ProgramStateRef State,
+                                               const LocationContext *FromLC,
+                                               const LocationContext *ToLC) {
+  const LocationContext *LC = FromLC;
+  while (LC != ToLC) {
+    assert(LC && "ToLC must be a parent of FromLC!");
+    for (auto I : State->get<CXXNewAllocatorValues>())
+      if (I.first.second == LC)
+        return false;
+
+    LC = LC->getParent();
+  }
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Top-level transfer function logic (Dispatcher).
 //===----------------------------------------------------------------------===//
@@ -331,8 +509,93 @@
                                                          LCtx, Call);
 }
 
+static void printInitializedTemporariesForContext(raw_ostream &Out,
+                                                  ProgramStateRef State,
+                                                  const char *NL,
+                                                  const char *Sep,
+                                                  const LocationContext *LC) {
+  PrintingPolicy PP =
+      LC->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
+  for (auto I : State->get<InitializedTemporaries>()) {
+    std::pair<const CXXBindTemporaryExpr *, const LocationContext *> Key =
+        I.first;
+    const MemRegion *Value = I.second;
+    if (Key.second != LC)
+      continue;
+    Out << '(' << Key.second << ',' << Key.first << ") ";
+    Key.first->printPretty(Out, nullptr, PP);
+    if (Value)
+      Out << " : " << Value;
+    Out << NL;
+  }
+}
+
+static void printTemporaryMaterializationsForContext(
+    raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep,
+    const LocationContext *LC) {
+  PrintingPolicy PP =
+      LC->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
+  for (auto I : State->get<TemporaryMaterializations>()) {
+    std::pair<const MaterializeTemporaryExpr *, const LocationContext *> Key =
+        I.first;
+    const MemRegion *Value = I.second;
+    if (Key.second != LC)
+      continue;
+    Out << '(' << Key.second << ',' << Key.first << ") ";
+    Key.first->printPretty(Out, nullptr, PP);
+    assert(Value);
+    Out << " : " << Value << NL;
+  }
+}
+
+static void printCXXNewAllocatorValuesForContext(raw_ostream &Out,
+                                                 ProgramStateRef State,
+                                                 const char *NL,
+                                                 const char *Sep,
+                                                 const LocationContext *LC) {
+  PrintingPolicy PP =
+      LC->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
+
+  for (auto I : State->get<CXXNewAllocatorValues>()) {
+    std::pair<const CXXNewExpr *, const LocationContext *> Key = I.first;
+    SVal Value = I.second;
+    if (Key.second != LC)
+      continue;
+    Out << '(' << Key.second << ',' << Key.first << ") ";
+    Key.first->printPretty(Out, nullptr, PP);
+    Out << " : " << Value << NL;
+  }
+}
+
 void ExprEngine::printState(raw_ostream &Out, ProgramStateRef State,
-                            const char *NL, const char *Sep) {
+                            const char *NL, const char *Sep,
+                            const LocationContext *LCtx) {
+  if (LCtx) {
+    if (!State->get<InitializedTemporaries>().isEmpty()) {
+      Out << Sep << "Initialized temporaries:" << NL;
+
+      LCtx->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
+        printInitializedTemporariesForContext(Out, State, NL, Sep, LC);
+      });
+    }
+
+    if (!State->get<TemporaryMaterializations>().isEmpty()) {
+      Out << Sep << "Temporaries to be materialized:" << NL;
+
+      LCtx->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
+        printTemporaryMaterializationsForContext(Out, State, NL, Sep, LC);
+      });
+    }
+
+    if (!State->get<CXXNewAllocatorValues>().isEmpty()) {
+      Out << Sep << "operator new() allocator return values:" << NL;
+
+      LCtx->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
+        printCXXNewAllocatorValuesForContext(Out, State, NL, Sep, LC);
+      });
+    }
+  }
+
   getCheckerManager().runCheckersForPrintState(Out, State, NL, Sep);
 }
 
@@ -348,10 +611,11 @@
 
   switch (E.getKind()) {
     case CFGElement::Statement:
-      ProcessStmt(const_cast<Stmt*>(E.castAs<CFGStmt>().getStmt()), Pred);
+    case CFGElement::Constructor:
+      ProcessStmt(E.castAs<CFGStmt>().getStmt(), Pred);
       return;
     case CFGElement::Initializer:
-      ProcessInitializer(E.castAs<CFGInitializer>().getInitializer(), Pred);
+      ProcessInitializer(E.castAs<CFGInitializer>(), Pred);
       return;
     case CFGElement::NewAllocator:
       ProcessNewAllocator(E.castAs<CFGNewAllocator>().getAllocatorExpr(),
@@ -373,10 +637,9 @@
 }
 
 static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
-                                     const CFGStmt S,
+                                     const Stmt *S,
                                      const ExplodedNode *Pred,
                                      const LocationContext *LC) {
-
   // Are we never purging state values?
   if (AMgr.options.AnalysisPurgeOpt == PurgeNone)
     return false;
@@ -386,17 +649,17 @@
     return true;
 
   // Is this on a non-expression?
-  if (!isa<Expr>(S.getStmt()))
+  if (!isa<Expr>(S))
     return true;
 
   // Run before processing a call.
-  if (CallEvent::isCallStmt(S.getStmt()))
+  if (CallEvent::isCallStmt(S))
     return true;
 
   // Is this an expression that is consumed by another expression?  If so,
   // postpone cleaning out the state.
   ParentMap &PM = LC->getAnalysisDeclContext()->getParentMap();
-  return !PM.isConsumedExpr(cast<Expr>(S.getStmt()));
+  return !PM.isConsumedExpr(cast<Expr>(S));
 }
 
 void ExprEngine::removeDead(ExplodedNode *Pred, ExplodedNodeSet &Out,
@@ -429,6 +692,21 @@
   const StackFrameContext *SFC = LC ? LC->getCurrentStackFrame() : nullptr;
   SymbolReaper SymReaper(SFC, ReferenceStmt, SymMgr, getStoreManager());
 
+  for (auto I : CleanedState->get<InitializedTemporaries>())
+    if (I.second)
+      SymReaper.markLive(I.second);
+
+  for (auto I : CleanedState->get<TemporaryMaterializations>())
+    if (I.second)
+      SymReaper.markLive(I.second);
+
+  for (auto I : CleanedState->get<CXXNewAllocatorValues>()) {
+    if (SymbolRef Sym = I.second.getAsSymbol())
+      SymReaper.markLive(Sym);
+    if (const MemRegion *MR = I.second.getAsRegion())
+      SymReaper.markElementIndicesLive(MR);
+  }
+
   getCheckerManager().runCheckersForLiveSymbols(CleanedState, SymReaper);
 
   // Create a state in which dead bindings are removed from the environment
@@ -457,9 +735,8 @@
     // environment, the store, and the constraints cleaned up but have the
     // user-supplied states as the predecessors.
     StmtNodeBuilder Bldr(CheckedSet, Out, *currBldrCtx);
-    for (ExplodedNodeSet::const_iterator
-          I = CheckedSet.begin(), E = CheckedSet.end(); I != E; ++I) {
-      ProgramStateRef CheckerState = (*I)->getState();
+    for (const auto I : CheckedSet) {
+      ProgramStateRef CheckerState = I->getState();
 
       // The constraint manager has not been cleaned up yet, so clean up now.
       CheckerState = getConstraintManager().removeDeadBindings(CheckerState,
@@ -476,35 +753,34 @@
       // generate a transition to that state.
       ProgramStateRef CleanedCheckerSt =
         StateMgr.getPersistentStateWithGDM(CleanedState, CheckerState);
-      Bldr.generateNode(DiagnosticStmt, *I, CleanedCheckerSt, &cleanupTag, K);
+      Bldr.generateNode(DiagnosticStmt, I, CleanedCheckerSt, &cleanupTag, K);
     }
   }
 }
 
-void ExprEngine::ProcessStmt(const CFGStmt S,
-                             ExplodedNode *Pred) {
+void ExprEngine::ProcessStmt(const Stmt *currStmt, ExplodedNode *Pred) {
   // Reclaim any unnecessary nodes in the ExplodedGraph.
   G.reclaimRecentlyAllocatedNodes();
 
-  const Stmt *currStmt = S.getStmt();
   PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
                                 currStmt->getLocStart(),
                                 "Error evaluating statement");
 
   // Remove dead bindings and symbols.
   ExplodedNodeSet CleanedStates;
-  if (shouldRemoveDeadBindings(AMgr, S, Pred, Pred->getLocationContext())){
-    removeDead(Pred, CleanedStates, currStmt, Pred->getLocationContext());
+  if (shouldRemoveDeadBindings(AMgr, currStmt, Pred,
+                               Pred->getLocationContext())) {
+    removeDead(Pred, CleanedStates, currStmt,
+                                    Pred->getLocationContext());
   } else
     CleanedStates.Add(Pred);
 
   // Visit the statement.
   ExplodedNodeSet Dst;
-  for (ExplodedNodeSet::iterator I = CleanedStates.begin(),
-                                 E = CleanedStates.end(); I != E; ++I) {
+  for (const auto I : CleanedStates) {
     ExplodedNodeSet DstI;
     // Visit the statement.
-    Visit(currStmt, *I, DstI);
+    Visit(currStmt, I, DstI);
     Dst.insert(DstI);
   }
 
@@ -539,10 +815,8 @@
                                 "Error evaluating initializer");
 
   // We don't clean up dead bindings here.
-  const StackFrameContext *stackFrame =
-                           cast<StackFrameContext>(Pred->getLocationContext());
-  const CXXConstructorDecl *decl =
-                           cast<CXXConstructorDecl>(stackFrame->getDecl());
+  const auto *stackFrame = cast<StackFrameContext>(Pred->getLocationContext());
+  const auto *decl = cast<CXXConstructorDecl>(stackFrame->getDecl());
 
   ProgramStateRef State = Pred->getState();
   SVal thisVal = State->getSVal(svalBuilder.getCXXThis(decl, stackFrame));
@@ -554,7 +828,7 @@
   if (BMI->isAnyMemberInitializer()) {
     // Constructors build the object directly in the field,
     // but non-objects must be copied in from the initializer.
-    if (auto *CtorExpr = findDirectConstructorForCurrentCFGElement()) {
+    if (const auto *CtorExpr = findDirectConstructorForCurrentCFGElement()) {
       assert(BMI->getInit()->IgnoreImplicit() == CtorExpr);
       (void)CtorExpr;
       // The field was directly constructed, so there is no need to bind.
@@ -610,10 +884,8 @@
   PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
   ExplodedNodeSet Dst;
   NodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
-  for (ExplodedNodeSet::iterator I = Tmp.begin(), E = Tmp.end(); I != E; ++I) {
-    ExplodedNode *N = *I;
-    Bldr.generateNode(PP, N->getState(), N);
-  }
+  for (const auto I : Tmp)
+    Bldr.generateNode(PP, I->getState(), I);
 
   // Enqueue the new nodes onto the work list.
   Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
@@ -688,8 +960,15 @@
     varType = cast<TypedValueRegion>(Region)->getValueType();
   }
 
+  // FIXME: We need to run the same destructor on every element of the array.
+  // This workaround will just run the first destructor (which will still
+  // invalidate the entire array).
+  EvalCallOptions CallOpts;
+  Region = makeZeroElementRegion(state, loc::MemRegionVal(Region), varType,
+                                 CallOpts.IsArrayCtorOrDtor).getAsRegion();
+
   VisitCXXDestructor(varType, Region, Dtor.getTriggerStmt(), /*IsBase=*/ false,
-                     Pred, Dst);
+                     Pred, Dst, CallOpts);
 }
 
 void ExprEngine::ProcessDeleteDtor(const CFGDeleteDtor Dtor,
@@ -699,12 +978,12 @@
   const LocationContext *LCtx = Pred->getLocationContext();
   const CXXDeleteExpr *DE = Dtor.getDeleteExpr();
   const Stmt *Arg = DE->getArgument();
+  QualType DTy = DE->getDestroyedType();
   SVal ArgVal = State->getSVal(Arg, LCtx);
 
   // If the argument to delete is known to be a null value,
   // don't run destructor.
   if (State->isNull(ArgVal).isConstrainedTrue()) {
-    QualType DTy = DE->getDestroyedType();
     QualType BTy = getContext().getBaseElementType(DTy);
     const CXXRecordDecl *RD = BTy->getAsCXXRecordDecl();
     const CXXDestructorDecl *Dtor = RD->getDestructor();
@@ -715,17 +994,26 @@
     return;
   }
 
-  VisitCXXDestructor(DE->getDestroyedType(),
-                     ArgVal.getAsRegion(),
-                     DE, /*IsBase=*/ false,
-                     Pred, Dst);
+  EvalCallOptions CallOpts;
+  const MemRegion *ArgR = ArgVal.getAsRegion();
+  if (DE->isArrayForm()) {
+    // FIXME: We need to run the same destructor on every element of the array.
+    // This workaround will just run the first destructor (which will still
+    // invalidate the entire array).
+    CallOpts.IsArrayCtorOrDtor = true;
+    if (ArgR)
+      ArgR = getStoreManager().GetElementZeroRegion(cast<SubRegion>(ArgR), DTy);
+  }
+
+  VisitCXXDestructor(DE->getDestroyedType(), ArgR, DE, /*IsBase=*/false,
+                     Pred, Dst, CallOpts);
 }
 
 void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D,
                                  ExplodedNode *Pred, ExplodedNodeSet &Dst) {
   const LocationContext *LCtx = Pred->getLocationContext();
 
-  const CXXDestructorDecl *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
+  const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
   Loc ThisPtr = getSValBuilder().getCXXThis(CurDtor,
                                             LCtx->getCurrentStackFrame());
   SVal ThisVal = Pred->getState()->getSVal(ThisPtr);
@@ -737,24 +1025,31 @@
                                                      Base->isVirtual());
 
   VisitCXXDestructor(BaseTy, BaseVal.castAs<loc::MemRegionVal>().getRegion(),
-                     CurDtor->getBody(), /*IsBase=*/ true, Pred, Dst);
+                     CurDtor->getBody(), /*IsBase=*/ true, Pred, Dst, {});
 }
 
 void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D,
                                    ExplodedNode *Pred, ExplodedNodeSet &Dst) {
   const FieldDecl *Member = D.getFieldDecl();
+  QualType T = Member->getType();
   ProgramStateRef State = Pred->getState();
   const LocationContext *LCtx = Pred->getLocationContext();
 
-  const CXXDestructorDecl *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
+  const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
   Loc ThisVal = getSValBuilder().getCXXThis(CurDtor,
                                             LCtx->getCurrentStackFrame());
   SVal FieldVal =
       State->getLValue(Member, State->getSVal(ThisVal).castAs<Loc>());
 
-  VisitCXXDestructor(Member->getType(),
-                     FieldVal.castAs<loc::MemRegionVal>().getRegion(),
-                     CurDtor->getBody(), /*IsBase=*/false, Pred, Dst);
+  // FIXME: We need to run the same destructor on every element of the array.
+  // This workaround will just run the first destructor (which will still
+  // invalidate the entire array).
+  EvalCallOptions CallOpts;
+  FieldVal = makeZeroElementRegion(State, FieldVal, T,
+                                   CallOpts.IsArrayCtorOrDtor);
+
+  VisitCXXDestructor(T, FieldVal.castAs<loc::MemRegionVal>().getRegion(),
+                     CurDtor->getBody(), /*IsBase=*/false, Pred, Dst, CallOpts);
 }
 
 void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D,
@@ -763,25 +1058,47 @@
   ExplodedNodeSet CleanDtorState;
   StmtNodeBuilder StmtBldr(Pred, CleanDtorState, *currBldrCtx);
   ProgramStateRef State = Pred->getState();
-  if (State->contains<InitializedTemporariesSet>(
-      std::make_pair(D.getBindTemporaryExpr(), Pred->getStackFrame()))) {
+  const MemRegion *MR = nullptr;
+  if (const CXXTempObjectRegion *const *MRPtr =
+          State->get<InitializedTemporaries>(std::make_pair(
+              D.getBindTemporaryExpr(), Pred->getStackFrame()))) {
     // FIXME: Currently we insert temporary destructors for default parameters,
-    // but we don't insert the constructors.
-    State = State->remove<InitializedTemporariesSet>(
+    // but we don't insert the constructors, so the entry in
+    // InitializedTemporaries may be missing.
+    State = State->remove<InitializedTemporaries>(
         std::make_pair(D.getBindTemporaryExpr(), Pred->getStackFrame()));
+    // *MRPtr may still be null when the construction context for the temporary
+    // was not implemented.
+    MR = *MRPtr;
   }
   StmtBldr.generateNode(D.getBindTemporaryExpr(), Pred, State);
 
-  QualType varType = D.getBindTemporaryExpr()->getSubExpr()->getType();
+  QualType T = D.getBindTemporaryExpr()->getSubExpr()->getType();
   // FIXME: Currently CleanDtorState can be empty here due to temporaries being
   // bound to default parameters.
   assert(CleanDtorState.size() <= 1);
   ExplodedNode *CleanPred =
       CleanDtorState.empty() ? Pred : *CleanDtorState.begin();
-  // FIXME: Inlining of temporary destructors is not supported yet anyway, so
-  // we just put a NULL region for now. This will need to be changed later.
-  VisitCXXDestructor(varType, nullptr, D.getBindTemporaryExpr(),
-                     /*IsBase=*/false, CleanPred, Dst);
+
+  EvalCallOptions CallOpts;
+  CallOpts.IsTemporaryCtorOrDtor = true;
+  if (!MR) {
+    CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true;
+
+    // If we have no MR, we still need to unwrap the array to avoid destroying
+    // the whole array at once. Regardless, we'd eventually need to model array
+    // destructors properly, element-by-element.
+    while (const ArrayType *AT = getContext().getAsArrayType(T)) {
+      T = AT->getElementType();
+      CallOpts.IsArrayCtorOrDtor = true;
+    }
+  } else {
+    // We'd eventually need to makeZeroElementRegion() trick here,
+    // but for now we don't have the respective construction contexts,
+    // so MR would always be null in this case. Do nothing for now.
+  }
+  VisitCXXDestructor(T, MR, D.getBindTemporaryExpr(),
+                     /*IsBase=*/false, CleanPred, Dst, CallOpts);
 }
 
 void ExprEngine::processCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
@@ -791,7 +1108,7 @@
                                                const CFGBlock *DstT,
                                                const CFGBlock *DstF) {
   BranchNodeBuilder TempDtorBuilder(Pred, Dst, BldCtx, DstT, DstF);
-  if (Pred->getState()->contains<InitializedTemporariesSet>(
+  if (Pred->getState()->contains<InitializedTemporaries>(
           std::make_pair(BTE, Pred->getStackFrame()))) {
     TempDtorBuilder.markInfeasible(false);
     TempDtorBuilder.generateNode(Pred->getState(), true, Pred);
@@ -804,6 +1121,9 @@
 void ExprEngine::VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *BTE,
                                            ExplodedNodeSet &PreVisit,
                                            ExplodedNodeSet &Dst) {
+  // This is a fallback solution in case we didn't have a construction
+  // context when we were constructing the temporary. Otherwise the map should
+  // have been populated there.
   if (!getAnalysisManager().options.includeTemporaryDtorsInCFG()) {
     // In case we don't have temporary destructors in the CFG, do not mark
     // the initialization - we would otherwise never clean it up.
@@ -813,26 +1133,27 @@
   StmtNodeBuilder StmtBldr(PreVisit, Dst, *currBldrCtx);
   for (ExplodedNode *Node : PreVisit) {
     ProgramStateRef State = Node->getState();
+		const auto &Key = std::make_pair(BTE, Node->getStackFrame());
 
-    if (!State->contains<InitializedTemporariesSet>(
-            std::make_pair(BTE, Node->getStackFrame()))) {
-      // FIXME: Currently the state might already contain the marker due to
+    if (!State->contains<InitializedTemporaries>(Key)) {
+      // FIXME: Currently the state might also already contain the marker due to
       // incorrect handling of temporaries bound to default parameters; for
       // those, we currently skip the CXXBindTemporaryExpr but rely on adding
       // temporary destructor nodes.
-      State = State->add<InitializedTemporariesSet>(
-          std::make_pair(BTE, Node->getStackFrame()));
+      State = State->set<InitializedTemporaries>(Key, nullptr);
     }
     StmtBldr.generateNode(BTE, Node, State);
   }
 }
 
 namespace {
+
 class CollectReachableSymbolsCallback final : public SymbolVisitor {
   InvalidatedSymbols Symbols;
 
 public:
   explicit CollectReachableSymbolsCallback(ProgramStateRef State) {}
+
   const InvalidatedSymbols &getSymbols() const { return Symbols; }
 
   bool VisitSymbol(SymbolRef Sym) override {
@@ -840,7 +1161,8 @@
     return true;
   }
 };
-} // end anonymous namespace
+
+} // namespace
 
 void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
                        ExplodedNodeSet &DstTop) {
@@ -930,8 +1252,7 @@
     case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
     case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
     case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
-    case Stmt::CapturedStmtClass:
-    {
+    case Stmt::CapturedStmtClass: {
       const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
       Engine.addAbortedBlock(node, currBldrCtx->getBlock());
       break;
@@ -1060,16 +1381,15 @@
       StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);
 
       const Expr *ArgE;
-      if (const CXXDefaultArgExpr *DefE = dyn_cast<CXXDefaultArgExpr>(S))
+      if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
         ArgE = DefE->getExpr();
-      else if (const CXXDefaultInitExpr *DefE = dyn_cast<CXXDefaultInitExpr>(S))
+      else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
         ArgE = DefE->getExpr();
       else
         llvm_unreachable("unknown constant wrapper kind");
 
       bool IsTemporary = false;
-      if (const MaterializeTemporaryExpr *MTE =
-            dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
+      if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
         ArgE = MTE->GetTemporaryExpr();
         IsTemporary = true;
       }
@@ -1079,15 +1399,14 @@
         ConstantVal = UnknownVal();
 
       const LocationContext *LCtx = Pred->getLocationContext();
-      for (ExplodedNodeSet::iterator I = PreVisit.begin(), E = PreVisit.end();
-           I != E; ++I) {
-        ProgramStateRef State = (*I)->getState();
+      for (const auto I : PreVisit) {
+        ProgramStateRef State = I->getState();
         State = State->BindExpr(S, LCtx, *ConstantVal);
         if (IsTemporary)
           State = createTemporaryRegionIfNeeded(State, LCtx,
                                                 cast<Expr>(S),
                                                 cast<Expr>(S));
-        Bldr2.generateNode(S, *I, State);
+        Bldr2.generateNode(S, I, State);
       }
 
       getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
@@ -1108,12 +1427,10 @@
       ExplodedNodeSet Tmp;
       StmtNodeBuilder Bldr2(preVisit, Tmp, *currBldrCtx);
 
-      const Expr *Ex = cast<Expr>(S);
+      const auto *Ex = cast<Expr>(S);
       QualType resultType = Ex->getType();
 
-      for (ExplodedNodeSet::iterator it = preVisit.begin(), et = preVisit.end();
-           it != et; ++it) {
-        ExplodedNode *N = *it;
+      for (const auto N : preVisit) {
         const LocationContext *LCtx = N->getLocationContext();
         SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
                                                    resultType,
@@ -1150,7 +1467,7 @@
 
     case Stmt::ArraySubscriptExprClass:
       Bldr.takeNodes(Pred);
-      VisitLvalArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Pred, Dst);
+      VisitArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
 
@@ -1184,7 +1501,7 @@
       break;
 
     case Stmt::BinaryOperatorClass: {
-      const BinaryOperator* B = cast<BinaryOperator>(S);
+      const auto *B = cast<BinaryOperator>(S);
       if (B->isLogicalOp()) {
         Bldr.takeNodes(Pred);
         VisitLogicalExpr(B, Pred, Dst);
@@ -1216,12 +1533,12 @@
     }
 
     case Stmt::CXXOperatorCallExprClass: {
-      const CXXOperatorCallExpr *OCE = cast<CXXOperatorCallExpr>(S);
+      const auto *OCE = cast<CXXOperatorCallExpr>(S);
 
       // For instance method operators, make sure the 'this' argument has a
       // valid region.
       const Decl *Callee = OCE->getCalleeDecl();
-      if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Callee)) {
+      if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(Callee)) {
         if (MD->isInstance()) {
           ProgramStateRef State = Pred->getState();
           const LocationContext *LCtx = Pred->getLocationContext();
@@ -1239,34 +1556,38 @@
       // FALLTHROUGH
       LLVM_FALLTHROUGH;
     }
+
     case Stmt::CallExprClass:
     case Stmt::CXXMemberCallExprClass:
-    case Stmt::UserDefinedLiteralClass: {
+    case Stmt::UserDefinedLiteralClass:
       Bldr.takeNodes(Pred);
       VisitCallExpr(cast<CallExpr>(S), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
-    }
 
-    case Stmt::CXXCatchStmtClass: {
+    case Stmt::CXXCatchStmtClass:
       Bldr.takeNodes(Pred);
       VisitCXXCatchStmt(cast<CXXCatchStmt>(S), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
-    }
 
     case Stmt::CXXTemporaryObjectExprClass:
-    case Stmt::CXXConstructExprClass: {
+    case Stmt::CXXConstructExprClass:
       Bldr.takeNodes(Pred);
       VisitCXXConstructExpr(cast<CXXConstructExpr>(S), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
-    }
 
     case Stmt::CXXNewExprClass: {
       Bldr.takeNodes(Pred);
+
+      ExplodedNodeSet PreVisit;
+      getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
+
       ExplodedNodeSet PostVisit;
-      VisitCXXNewExpr(cast<CXXNewExpr>(S), Pred, PostVisit);
+      for (const auto i : PreVisit)
+        VisitCXXNewExpr(cast<CXXNewExpr>(S), i, PostVisit);
+
       getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
       Bldr.addNodes(Dst);
       break;
@@ -1275,12 +1596,11 @@
     case Stmt::CXXDeleteExprClass: {
       Bldr.takeNodes(Pred);
       ExplodedNodeSet PreVisit;
-      const CXXDeleteExpr *CDE = cast<CXXDeleteExpr>(S);
+      const auto *CDE = cast<CXXDeleteExpr>(S);
       getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
 
-      for (ExplodedNodeSet::iterator i = PreVisit.begin(),
-                                     e = PreVisit.end(); i != e ; ++i)
-        VisitCXXDeleteExpr(CDE, *i, Dst);
+      for (const auto i : PreVisit)
+        VisitCXXDeleteExpr(CDE, i, Dst);
 
       Bldr.addNodes(Dst);
       break;
@@ -1290,7 +1610,7 @@
 
     case Stmt::ChooseExprClass: { // __builtin_choose_expr
       Bldr.takeNodes(Pred);
-      const ChooseExpr *C = cast<ChooseExpr>(S);
+      const auto *C = cast<ChooseExpr>(S);
       VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
@@ -1311,8 +1631,7 @@
     case Stmt::BinaryConditionalOperatorClass:
     case Stmt::ConditionalOperatorClass: { // '?' operator
       Bldr.takeNodes(Pred);
-      const AbstractConditionalOperator *C
-        = cast<AbstractConditionalOperator>(S);
+      const auto *C = cast<AbstractConditionalOperator>(S);
       VisitGuardedExpr(C, C->getTrueExpr(), C->getFalseExpr(), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
@@ -1326,7 +1645,7 @@
 
     case Stmt::DeclRefExprClass: {
       Bldr.takeNodes(Pred);
-      const DeclRefExpr *DE = cast<DeclRefExpr>(S);
+      const auto *DE = cast<DeclRefExpr>(S);
       VisitCommonDeclRefExpr(DE, DE->getDecl(), Pred, Dst);
       Bldr.addNodes(Dst);
       break;
@@ -1347,7 +1666,7 @@
     case Stmt::CXXFunctionalCastExprClass:
     case Stmt::ObjCBridgedCastExprClass: {
       Bldr.takeNodes(Pred);
-      const CastExpr *C = cast<CastExpr>(S);
+      const auto *C = cast<CastExpr>(S);
       ExplodedNodeSet dstExpr;
       VisitCast(C, C->getSubExpr(), Pred, dstExpr);
 
@@ -1359,14 +1678,12 @@
 
     case Expr::MaterializeTemporaryExprClass: {
       Bldr.takeNodes(Pred);
-      const MaterializeTemporaryExpr *MTE = cast<MaterializeTemporaryExpr>(S);
+      const auto *MTE = cast<MaterializeTemporaryExpr>(S);
       ExplodedNodeSet dstPrevisit;
       getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, MTE, *this);
       ExplodedNodeSet dstExpr;
-      for (ExplodedNodeSet::iterator i = dstPrevisit.begin(),
-                                     e = dstPrevisit.end(); i != e ; ++i) {
-        CreateCXXTemporaryObject(MTE, *i, dstExpr);
-      }
+      for (const auto i : dstPrevisit)
+        CreateCXXTemporaryObject(MTE, i, dstExpr);
       getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, MTE, *this);
       Bldr.addNodes(Dst);
       break;
@@ -1421,11 +1738,19 @@
       Bldr.addNodes(Dst);
       break;
 
-    case Stmt::OffsetOfExprClass:
+    case Stmt::OffsetOfExprClass: {
       Bldr.takeNodes(Pred);
-      VisitOffsetOfExpr(cast<OffsetOfExpr>(S), Pred, Dst);
+      ExplodedNodeSet PreVisit;
+      getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
+
+      ExplodedNodeSet PostVisit;
+      for (const auto Node : PreVisit)
+        VisitOffsetOfExpr(cast<OffsetOfExpr>(S), Node, PostVisit);
+
+      getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
       Bldr.addNodes(Dst);
       break;
+    }
 
     case Stmt::UnaryExprOrTypeTraitExprClass:
       Bldr.takeNodes(Pred);
@@ -1435,7 +1760,7 @@
       break;
 
     case Stmt::StmtExprClass: {
-      const StmtExpr *SE = cast<StmtExpr>(S);
+      const auto *SE = cast<StmtExpr>(S);
 
       if (SE->getSubStmt()->body_empty()) {
         // Empty statement expression.
@@ -1444,7 +1769,8 @@
         break;
       }
 
-      if (Expr *LastExpr = dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) {
+      if (const auto *LastExpr =
+              dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) {
         ProgramStateRef state = Pred->getState();
         Bldr.generateNode(SE, Pred,
                           state->BindExpr(SE, Pred->getLocationContext(),
@@ -1456,7 +1782,7 @@
 
     case Stmt::UnaryOperatorClass: {
       Bldr.takeNodes(Pred);
-      const UnaryOperator *U = cast<UnaryOperator>(S);
+      const auto *U = cast<UnaryOperator>(S);
       if (AMgr.options.eagerlyAssumeBinOpBifurcation && (U->getOpcode() == UO_LNot)) {
         ExplodedNodeSet Tmp;
         VisitUnaryOperator(U, Pred, Tmp);
@@ -1471,7 +1797,7 @@
     case Stmt::PseudoObjectExprClass: {
       Bldr.takeNodes(Pred);
       ProgramStateRef state = Pred->getState();
-      const PseudoObjectExpr *PE = cast<PseudoObjectExpr>(S);
+      const auto *PE = cast<PseudoObjectExpr>(S);
       if (const Expr *Result = PE->getResultExpr()) {
         SVal V = state->getSVal(Result, Pred->getLocationContext());
         Bldr.generateNode(S, Pred,
@@ -1639,14 +1965,14 @@
                                 const LocationContext *LCtx,
                                 ASTContext &Ctx) {
 
-  const Expr *Ex = dyn_cast<Expr>(Condition);
+  const auto *Ex = dyn_cast<Expr>(Condition);
   if (!Ex)
     return UnknownVal();
 
   uint64_t bits = 0;
   bool bitsInit = false;
 
-  while (const CastExpr *CE = dyn_cast<CastExpr>(Ex)) {
+  while (const auto *CE = dyn_cast<CastExpr>(Ex)) {
     QualType T = CE->getType();
 
     if (!T->isIntegralOrEnumerationType())
@@ -1674,7 +2000,7 @@
 #ifndef NDEBUG
 static const Stmt *getRightmostLeaf(const Stmt *Condition) {
   while (Condition) {
-    const BinaryOperator *BO = dyn_cast<BinaryOperator>(Condition);
+    const auto *BO = dyn_cast<BinaryOperator>(Condition);
     if (!BO || !BO->isLogicalOp()) {
       return Condition;
     }
@@ -1700,10 +2026,10 @@
 // space.
 static const Stmt *ResolveCondition(const Stmt *Condition,
                                     const CFGBlock *B) {
-  if (const Expr *Ex = dyn_cast<Expr>(Condition))
+  if (const auto *Ex = dyn_cast<Expr>(Condition))
     Condition = Ex->IgnoreParens();
 
-  const BinaryOperator *BO = dyn_cast<BinaryOperator>(Condition);
+  const auto *BO = dyn_cast<BinaryOperator>(Condition);
   if (!BO || !BO->isLogicalOp())
     return Condition;
 
@@ -1751,7 +2077,7 @@
     return;
   }
 
-  if (const Expr *Ex = dyn_cast<Expr>(Condition))
+  if (const auto *Ex = dyn_cast<Expr>(Condition))
     Condition = Ex->IgnoreParens();
 
   Condition = ResolveCondition(Condition, BldCtx.getBlock());
@@ -1767,10 +2093,7 @@
     return;
 
   BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF);
-  for (NodeBuilder::iterator I = CheckersOutSet.begin(),
-                             E = CheckersOutSet.end(); E != I; ++I) {
-    ExplodedNode *PredI = *I;
-
+  for (const auto PredI : CheckersOutSet) {
     if (PredI->isSink())
       continue;
 
@@ -1779,7 +2102,7 @@
 
     if (X.isUnknownOrUndef()) {
       // Give it a chance to recover from unknown.
-      if (const Expr *Ex = dyn_cast<Expr>(Condition)) {
+      if (const auto *Ex = dyn_cast<Expr>(Condition)) {
         if (Ex->getType()->isIntegralOrEnumerationType()) {
           // Try to recover some path-sensitivity.  Right now casts of symbolic
           // integers that promote their values are currently not tracked well.
@@ -1836,13 +2159,13 @@
 void ExprEngine::processStaticInitializer(const DeclStmt *DS,
                                           NodeBuilderContext &BuilderCtx,
                                           ExplodedNode *Pred,
-                                          clang::ento::ExplodedNodeSet &Dst,
+                                          ExplodedNodeSet &Dst,
                                           const CFGBlock *DstT,
                                           const CFGBlock *DstF) {
   PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
   currBldrCtx = &BuilderCtx;
 
-  const VarDecl *VD = cast<VarDecl>(DS->getSingleDecl());
+  const auto *VD = cast<VarDecl>(DS->getSingleDecl());
   ProgramStateRef state = Pred->getState();
   bool initHasRun = state->contains<InitializedGlobalsSet>(VD);
   BranchNodeBuilder builder(Pred, Dst, BuilderCtx, DstT, DstF);
@@ -1860,7 +2183,6 @@
 /// processIndirectGoto - Called by CoreEngine.  Used to generate successor
 ///  nodes by processing the 'effects' of a computed goto jump.
 void ExprEngine::processIndirectGoto(IndirectGotoNodeBuilder &builder) {
-
   ProgramStateRef state = builder.getState();
   SVal V = state->getSVal(builder.getTarget(), builder.getLocationContext());
 
@@ -1871,7 +2193,7 @@
   //   (3) We have no clue about the label.  Dispatch to all targets.
   //
 
-  typedef IndirectGotoNodeBuilder::iterator iterator;
+  using iterator = IndirectGotoNodeBuilder::iterator;
 
   if (Optional<loc::GotoLabel> LV = V.getAs<loc::GotoLabel>()) {
     const LabelDecl *L = LV->getLabel();
@@ -1897,26 +2219,10 @@
   // This is really a catch-all.  We don't support symbolics yet.
   // FIXME: Implement dispatch for symbolic pointers.
 
-  for (iterator I=builder.begin(), E=builder.end(); I != E; ++I)
+  for (iterator I = builder.begin(), E = builder.end(); I != E; ++I)
     builder.generateNode(I, state);
 }
 
-#if 0
-static bool stackFrameDoesNotContainInitializedTemporaries(ExplodedNode &Pred) {
-  const StackFrameContext* Frame = Pred.getStackFrame();
-  const llvm::ImmutableSet<CXXBindTemporaryContext> &Set =
-      Pred.getState()->get<InitializedTemporariesSet>();
-  return std::find_if(Set.begin(), Set.end(),
-                      [&](const CXXBindTemporaryContext &Ctx) {
-                        if (Ctx.second == Frame) {
-                          Ctx.first->dump();
-                          llvm::errs() << "\n";
-                        }
-           return Ctx.second == Frame;
-         }) == Set.end();
-}
-#endif
-
 void ExprEngine::processBeginOfFunction(NodeBuilderContext &BC,
                                         ExplodedNode *Pred,
                                         ExplodedNodeSet &Dst,
@@ -1930,9 +2236,49 @@
 void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
                                       ExplodedNode *Pred,
                                       const ReturnStmt *RS) {
-  // FIXME: Assert that stackFrameDoesNotContainInitializedTemporaries(*Pred)).
-  // We currently cannot enable this assert, as lifetime extended temporaries
-  // are not modelled correctly.
+  // See if we have any stale C++ allocator values.
+  assert(areCXXNewAllocatorValuesClear(Pred->getState(),
+                                       Pred->getLocationContext(),
+                                       Pred->getStackFrame()->getParent()));
+
+  // FIXME: We currently cannot assert that temporaries are clear, because
+  // lifetime extended temporaries are not always modelled correctly. In some
+  // cases when we materialize the temporary, we do
+  // createTemporaryRegionIfNeeded(), and the region changes, and also the
+  // respective destructor becomes automatic from temporary. So for now clean up
+  // the state manually before asserting. Ideally, the code above the assertion
+  // should go away, but the assertion should remain.
+  {
+    ExplodedNodeSet CleanUpTemporaries;
+    NodeBuilder Bldr(Pred, CleanUpTemporaries, BC);
+    ProgramStateRef State = Pred->getState();
+    const LocationContext *FromLC = Pred->getLocationContext();
+    const LocationContext *ToLC = FromLC->getCurrentStackFrame()->getParent();
+    const LocationContext *LC = FromLC;
+    while (LC != ToLC) {
+      assert(LC && "ToLC must be a parent of FromLC!");
+      for (auto I : State->get<InitializedTemporaries>())
+        if (I.first.second == LC)
+          State = State->remove<InitializedTemporaries>(I.first);
+
+      LC = LC->getParent();
+    }
+    if (State != Pred->getState()) {
+      Pred = Bldr.generateNode(Pred->getLocation(), State, Pred);
+      if (!Pred) {
+        // The node with clean temporaries already exists. We might have reached
+        // it on a path on which we initialize different temporaries.
+        return;
+      }
+    }
+  }
+  assert(areInitializedTemporariesClear(Pred->getState(),
+                                        Pred->getLocationContext(),
+                                        Pred->getStackFrame()->getParent()));
+  assert(areTemporaryMaterializationsClear(Pred->getState(),
+                                           Pred->getLocationContext(),
+                                           Pred->getStackFrame()->getParent()));
+
   PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
   StateMgr.EndPath(Pred->getState());
 
@@ -1943,10 +2289,8 @@
     removeDeadOnEndOfFunction(BC, Pred, AfterRemovedDead);
 
     // Notify checkers.
-    for (ExplodedNodeSet::iterator I = AfterRemovedDead.begin(),
-        E = AfterRemovedDead.end(); I != E; ++I) {
-      getCheckerManager().runCheckersForEndFunction(BC, Dst, *I, *this);
-    }
+    for (const auto I : AfterRemovedDead)
+      getCheckerManager().runCheckersForEndFunction(BC, Dst, I, *this);
   } else {
     getCheckerManager().runCheckersForEndFunction(BC, Dst, Pred, *this);
   }
@@ -1957,7 +2301,8 @@
 /// ProcessSwitch - Called by CoreEngine.  Used to generate successor
 ///  nodes by processing the 'effects' of a switch statement.
 void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
-  typedef SwitchNodeBuilder::iterator iterator;
+  using iterator = SwitchNodeBuilder::iterator;
+
   ProgramStateRef state = builder.getState();
   const Expr *CondE = builder.getCondition();
   SVal  CondV_untested = state->getSVal(CondE, builder.getLocationContext());
@@ -2046,7 +2391,7 @@
   ProgramStateRef state = Pred->getState();
   const LocationContext *LCtx = Pred->getLocationContext();
 
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
+  if (const auto *VD = dyn_cast<VarDecl>(D)) {
     // C permits "extern void v", and if you cast the address to a valid type,
     // you can even do things with it. We simply pretend
     assert(Ex->isGLValue() || VD->getType()->isVoidType());
@@ -2096,22 +2441,24 @@
                       ProgramPoint::PostLValueKind);
     return;
   }
-  if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D)) {
+  if (const auto *ED = dyn_cast<EnumConstantDecl>(D)) {
     assert(!Ex->isGLValue());
     SVal V = svalBuilder.makeIntVal(ED->getInitVal());
     Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V));
     return;
   }
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     SVal V = svalBuilder.getFunctionPointer(FD);
     Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
                       ProgramPoint::PostLValueKind);
     return;
   }
-  if (isa<FieldDecl>(D)) {
+  if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D)) {
     // FIXME: Compute lvalue of field pointers-to-member.
     // Right now we just use a non-null void pointer, so that it gives proper
     // results in boolean contexts.
+    // FIXME: Maybe delegate this to the surrounding operator&.
+    // Note how this expression is lvalue, however pointer-to-member is NonLoc.
     SVal V = svalBuilder.conjureSymbolVal(Ex, LCtx, getContext().VoidPtrTy,
                                           currBldrCtx->blockCount());
     state = state->assume(V.castAs<DefinedOrUnknownSVal>(), true);
@@ -2124,10 +2471,9 @@
 }
 
 /// VisitArraySubscriptExpr - Transfer function for array accesses
-void ExprEngine::VisitLvalArraySubscriptExpr(const ArraySubscriptExpr *A,
+void ExprEngine::VisitArraySubscriptExpr(const ArraySubscriptExpr *A,
                                              ExplodedNode *Pred,
                                              ExplodedNodeSet &Dst){
-
   const Expr *Base = A->getBase()->IgnoreParens();
   const Expr *Idx  = A->getIdx()->IgnoreParens();
 
@@ -2136,18 +2482,40 @@
 
   ExplodedNodeSet EvalSet;
   StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
-  assert(A->isGLValue() ||
-          (!AMgr.getLangOpts().CPlusPlus &&
-           A->getType().isCForbiddenLValueType()));
+
+  bool IsVectorType = A->getBase()->getType()->isVectorType();
+
+  // The "like" case is for situations where C standard prohibits the type to
+  // be an lvalue, e.g. taking the address of a subscript of an expression of
+  // type "void *".
+  bool IsGLValueLike = A->isGLValue() ||
+    (A->getType().isCForbiddenLValueType() && !AMgr.getLangOpts().CPlusPlus);
 
   for (auto *Node : CheckerPreStmt) {
     const LocationContext *LCtx = Node->getLocationContext();
     ProgramStateRef state = Node->getState();
-    SVal V = state->getLValue(A->getType(),
-                              state->getSVal(Idx, LCtx),
-                              state->getSVal(Base, LCtx));
-    Bldr.generateNode(A, Node, state->BindExpr(A, LCtx, V), nullptr,
-                      ProgramPoint::PostLValueKind);
+
+    if (IsGLValueLike) {
+      QualType T = A->getType();
+
+      // One of the forbidden LValue types! We still need to have sensible
+      // symbolic locations to represent this stuff. Note that arithmetic on
+      // void pointers is a GCC extension.
+      if (T->isVoidType())
+        T = getContext().CharTy;
+
+      SVal V = state->getLValue(T,
+                                state->getSVal(Idx, LCtx),
+                                state->getSVal(Base, LCtx));
+      Bldr.generateNode(A, Node, state->BindExpr(A, LCtx, V), nullptr,
+          ProgramPoint::PostLValueKind);
+    } else if (IsVectorType) {
+      // FIXME: non-glvalue vector reads are not modelled.
+      Bldr.generateNode(A, Node, state, nullptr);
+    } else {
+      llvm_unreachable("Array subscript should be an lValue when not \
+a vector and not a forbidden lvalue type");
+    }
   }
 
   getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
@@ -2156,41 +2524,36 @@
 /// VisitMemberExpr - Transfer function for member expressions.
 void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
                                  ExplodedNodeSet &Dst) {
-
   // FIXME: Prechecks eventually go in ::Visit().
   ExplodedNodeSet CheckedSet;
   getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, M, *this);
 
-  ExplodedNodeSet EvalSet;
-  ValueDecl *Member = M->getMemberDecl();
+  ExplodedNodeSet EvalSet;  
+  ValueDecl *Member = M->getMemberDecl();  
 
   // Handle static member variables and enum constants accessed via
   // member syntax.
-  if (isa<VarDecl>(Member) || isa<EnumConstantDecl>(Member)) {
-    ExplodedNodeSet Dst;
-    for (ExplodedNodeSet::iterator I = CheckedSet.begin(), E = CheckedSet.end();
-         I != E; ++I) {
-      VisitCommonDeclRefExpr(M, Member, Pred, EvalSet);
-    }
+  if (isa<VarDecl>(Member) || isa<EnumConstantDecl>(Member)) {    
+    for (const auto I : CheckedSet)
+      VisitCommonDeclRefExpr(M, Member, I, EvalSet);
   } else {
     StmtNodeBuilder Bldr(CheckedSet, EvalSet, *currBldrCtx);
     ExplodedNodeSet Tmp;
 
-    for (ExplodedNodeSet::iterator I = CheckedSet.begin(), E = CheckedSet.end();
-         I != E; ++I) {
-      ProgramStateRef state = (*I)->getState();
-      const LocationContext *LCtx = (*I)->getLocationContext();
+    for (const auto I : CheckedSet) {
+      ProgramStateRef state = I->getState();
+      const LocationContext *LCtx = I->getLocationContext();
       Expr *BaseExpr = M->getBase();
 
       // Handle C++ method calls.
-      if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Member)) {
+      if (const auto *MD = dyn_cast<CXXMethodDecl>(Member)) {
         if (MD->isInstance())
           state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr);
 
         SVal MDVal = svalBuilder.getFunctionPointer(MD);
         state = state->BindExpr(M, LCtx, MDVal);
 
-        Bldr.generateNode(M, *I, state);
+        Bldr.generateNode(M, I, state);
         continue;
       }
 
@@ -2198,7 +2561,7 @@
       state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr);
       SVal baseExprVal = state->getSVal(BaseExpr, LCtx);
 
-      FieldDecl *field = cast<FieldDecl>(Member);
+      const auto *field = cast<FieldDecl>(Member);
       SVal L = state->getLValue(field, baseExprVal);
 
       if (M->isGLValue() || M->getType()->isArrayType()) {
@@ -2208,8 +2571,8 @@
         // pointers as soon as they are used.
         if (!M->isGLValue()) {
           assert(M->getType()->isArrayType());
-          const ImplicitCastExpr *PE =
-            dyn_cast<ImplicitCastExpr>((*I)->getParentMap().getParentIgnoreParens(M));
+          const auto *PE =
+            dyn_cast<ImplicitCastExpr>(I->getParentMap().getParentIgnoreParens(M));
           if (!PE || PE->getCastKind() != CK_ArrayToPointerDecay) {
             llvm_unreachable("should always be wrapped in ArrayToPointerDecay");
           }
@@ -2222,11 +2585,11 @@
             L = UnknownVal();
         }
 
-        Bldr.generateNode(M, *I, state->BindExpr(M, LCtx, L), nullptr,
+        Bldr.generateNode(M, I, state->BindExpr(M, LCtx, L), nullptr,
                           ProgramPoint::PostLValueKind);
       } else {
-        Bldr.takeNodes(*I);
-        evalLoad(Tmp, M, M, *I, state, L);
+        Bldr.takeNodes(I);
+        evalLoad(Tmp, M, M, I, state, L);
         Bldr.addNodes(Tmp);
       }
     }
@@ -2246,10 +2609,9 @@
   ExplodedNodeSet AfterInvalidateSet;
   StmtNodeBuilder Bldr(AfterPreSet, AfterInvalidateSet, *currBldrCtx);
 
-  for (ExplodedNodeSet::iterator I = AfterPreSet.begin(), E = AfterPreSet.end();
-       I != E; ++I) {
-    ProgramStateRef State = (*I)->getState();
-    const LocationContext *LCtx = (*I)->getLocationContext();
+  for (const auto I : AfterPreSet) {
+    ProgramStateRef State = I->getState();
+    const LocationContext *LCtx = I->getLocationContext();
 
     SmallVector<SVal, 8> ValuesToInvalidate;
     for (unsigned SI = 0, Count = AE->getNumSubExprs(); SI != Count; SI++) {
@@ -2266,7 +2628,7 @@
 
     SVal ResultVal = UnknownVal();
     State = State->BindExpr(AE, LCtx, ResultVal);
-    Bldr.generateNode(AE, *I, State, nullptr,
+    Bldr.generateNode(AE, I, State, nullptr,
                       ProgramPoint::PostStmtKind);
   }
 
@@ -2328,7 +2690,6 @@
     ArrayRef<const MemRegion *> Regions,
     const CallEvent *Call,
     RegionAndSymbolInvalidationTraits &ITraits) {
-
   if (!Invalidated || Invalidated->empty())
     return State;
 
@@ -2342,16 +2703,13 @@
   // If the symbols were invalidated by a call, we want to find out which ones
   // were invalidated directly due to being arguments to the call.
   InvalidatedSymbols SymbolsDirectlyInvalidated;
-  for (ArrayRef<const MemRegion *>::iterator I = ExplicitRegions.begin(),
-      E = ExplicitRegions.end(); I != E; ++I) {
-    if (const SymbolicRegion *R = (*I)->StripCasts()->getAs<SymbolicRegion>())
+  for (const auto I : ExplicitRegions) {
+    if (const SymbolicRegion *R = I->StripCasts()->getAs<SymbolicRegion>())
       SymbolsDirectlyInvalidated.insert(R->getSymbol());
   }
 
   InvalidatedSymbols SymbolsIndirectlyInvalidated;
-  for (InvalidatedSymbols::const_iterator I=Invalidated->begin(),
-      E = Invalidated->end(); I!=E; ++I) {
-    SymbolRef sym = *I;
+  for (const auto &sym : *Invalidated) {
     if (SymbolsDirectlyInvalidated.count(sym))
       continue;
     SymbolsIndirectlyInvalidated.insert(sym);
@@ -2375,7 +2733,6 @@
                           ExplodedNode *Pred,
                           SVal location, SVal Val,
                           bool atDeclInit, const ProgramPoint *PP) {
-
   const LocationContext *LC = Pred->getLocationContext();
   PostStmt PS(StoreE, LC);
   if (!PP)
@@ -2399,9 +2756,7 @@
     return;
   }
 
-  for (ExplodedNodeSet::iterator I = CheckedSet.begin(), E = CheckedSet.end();
-       I!=E; ++I) {
-    ExplodedNode *PredI = *I;
+  for (const auto PredI : CheckedSet) {
     ProgramStateRef state = PredI->getState();
 
     state = processPointerEscapedOnBind(state, location, Val, LC);
@@ -2450,8 +2805,8 @@
   if (location.isUndef())
     return;
 
-  for (ExplodedNodeSet::iterator NI=Tmp.begin(), NE=Tmp.end(); NI!=NE; ++NI)
-    evalBind(Dst, StoreE, *NI, location, Val, false);
+  for (const auto I : Tmp)
+    evalBind(Dst, StoreE, I, location, Val, false);
 }
 
 void ExprEngine::evalLoad(ExplodedNodeSet &Dst,
@@ -2461,14 +2816,13 @@
                           ProgramStateRef state,
                           SVal location,
                           const ProgramPointTag *tag,
-                          QualType LoadTy)
-{
+                          QualType LoadTy) {
   assert(!location.getAs<NonLoc>() && "location cannot be a NonLoc.");
 
   // Are we loading from a region?  This actually results in two loads; one
   // to fetch the address of the referenced value and one to fetch the
   // referenced value.
-  if (const TypedValueRegion *TR =
+  if (const auto *TR =
         dyn_cast_or_null<TypedValueRegion>(location.getAsRegion())) {
 
     QualType ValTy = TR->getValueType();
@@ -2481,10 +2835,10 @@
                      getContext().getPointerType(RT->getPointeeType()));
 
       // Perform the load from the referenced value.
-      for (ExplodedNodeSet::iterator I=Tmp.begin(), E=Tmp.end() ; I!=E; ++I) {
-        state = (*I)->getState();
-        location = state->getSVal(BoundEx, (*I)->getLocationContext());
-        evalLoadCommon(Dst, NodeEx, BoundEx, *I, state, location, tag, LoadTy);
+      for (const auto I : Tmp) {
+        state = I->getState();
+        location = state->getSVal(BoundEx, I->getLocationContext());
+        evalLoadCommon(Dst, NodeEx, BoundEx, I, state, location, tag, LoadTy);
       }
       return;
     }
@@ -2514,9 +2868,9 @@
     return;
 
   // Proceed with the load.
-  for (ExplodedNodeSet::iterator NI=Tmp.begin(), NE=Tmp.end(); NI!=NE; ++NI) {
-    state = (*NI)->getState();
-    const LocationContext *LCtx = (*NI)->getLocationContext();
+  for (const auto I : Tmp) {
+    state = I->getState();
+    const LocationContext *LCtx = I->getLocationContext();
 
     SVal V = UnknownVal();
     if (location.isValid()) {
@@ -2525,7 +2879,7 @@
       V = state->getSVal(location.castAs<Loc>(), LoadTy);
     }
 
-    Bldr.generateNode(NodeEx, *NI, state->BindExpr(BoundEx, LCtx, V), tag,
+    Bldr.generateNode(NodeEx, I, state->BindExpr(BoundEx, LCtx, V), tag,
                       ProgramPoint::PostLoadKind);
   }
 }
@@ -2582,8 +2936,7 @@
                                                    const Expr *Ex) {
   StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);
 
-  for (ExplodedNodeSet::iterator I=Src.begin(), E=Src.end(); I!=E; ++I) {
-    ExplodedNode *Pred = *I;
+  for (const auto Pred : Src) {
     // Test if the previous node was as the same expression.  This can happen
     // when the expression fails to evaluate to anything meaningful and
     // (as an optimization) we don't generate a node.
@@ -2633,7 +2986,7 @@
 
   for (const Expr *O : A->outputs()) {
     SVal X = state->getSVal(O, Pred->getLocationContext());
-    assert (!X.getAs<NonLoc>());  // Should be an Lval, or unknown, undef.
+    assert(!X.getAs<NonLoc>());  // Should be an Lval, or unknown, undef.
 
     if (Optional<Loc> LV = X.getAs<Loc>())
       state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext());
@@ -2657,16 +3010,15 @@
 static SourceManager* GraphPrintSourceManager;
 
 namespace llvm {
-template<>
-struct DOTGraphTraits<ExplodedNode*> :
-  public DefaultDOTGraphTraits {
 
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+template<>
+struct DOTGraphTraits<ExplodedNode*> : public DefaultDOTGraphTraits {
+  DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
   // FIXME: Since we do not cache error nodes in ExprEngine now, this does not
   // work.
   static std::string getNodeAttributes(const ExplodedNode *N, void*) {
-    return "";
+    return {};
   }
 
   // De-duplicate some source location pretty-printing.
@@ -2679,15 +3031,8 @@
         << "\\l";
     }
   }
-  static void printLocation2(raw_ostream &Out, SourceLocation SLoc) {
-    if (SLoc.isFileID() && GraphPrintSourceManager->isInMainFile(SLoc))
-      Out << "line " << GraphPrintSourceManager->getExpansionLineNumber(SLoc);
-    else
-      SLoc.print(Out, *GraphPrintSourceManager);
-  }
 
   static std::string getNodeLabel(const ExplodedNode *N, void*){
-
     std::string sbuf;
     llvm::raw_string_ostream Out(sbuf);
 
@@ -2695,14 +3040,13 @@
     ProgramPoint Loc = N->getLocation();
 
     switch (Loc.getKind()) {
-      case ProgramPoint::BlockEntranceKind: {
+      case ProgramPoint::BlockEntranceKind:
         Out << "Block Entrance: B"
             << Loc.castAs<BlockEntrance>().getBlock()->getBlockID();
         break;
-      }
 
       case ProgramPoint::BlockExitKind:
-        assert (false);
+        assert(false);
         break;
 
       case ProgramPoint::CallEnterKind:
@@ -2793,7 +3137,7 @@
             const Stmt *Label = E.getDst()->getLabel();
 
             if (Label) {
-              if (const CaseStmt *C = dyn_cast<CaseStmt>(Label)) {
+              if (const auto *C = dyn_cast<CaseStmt>(Label)) {
                 Out << "\\lcase ";
                 LangOptions LO; // FIXME.
                 if (C->getLHS())
@@ -2807,7 +3151,7 @@
                 Out << ":";
               }
               else {
-                assert (isa<DefaultStmt>(Label));
+                assert(isa<DefaultStmt>(Label));
                 Out << "\\ldefault:";
               }
             }
@@ -2848,6 +3192,8 @@
           Out << "\\lPostStore\\l";
         else if (Loc.getAs<PostLValue>())
           Out << "\\lPostLValue\\l";
+        else if (Loc.getAs<PostAllocatorCall>())
+          Out << "\\lPostAllocatorCall\\l";
 
         break;
       }
@@ -2857,40 +3203,7 @@
     Out << "\\|StateID: " << (const void*) state.get()
         << " NodeID: " << (const void*) N << "\\|";
 
-    // Analysis stack backtrace.
-    Out << "Location context stack (from current to outer):\\l";
-    const LocationContext *LC = Loc.getLocationContext();
-    unsigned Idx = 0;
-    for (; LC; LC = LC->getParent(), ++Idx) {
-      Out << Idx << ". (" << (const void *)LC << ") ";
-      switch (LC->getKind()) {
-      case LocationContext::StackFrame:
-        if (const NamedDecl *D = dyn_cast<NamedDecl>(LC->getDecl()))
-          Out << "Calling " << D->getQualifiedNameAsString();
-        else
-          Out << "Calling anonymous code";
-        if (const Stmt *S = cast<StackFrameContext>(LC)->getCallSite()) {
-          Out << " at ";
-          printLocation2(Out, S->getLocStart());
-        }
-        break;
-      case LocationContext::Block:
-        Out << "Invoking block";
-        if (const Decl *D = cast<BlockInvocationContext>(LC)->getBlockDecl()) {
-          Out << " defined at ";
-          printLocation2(Out, D->getLocStart());
-        }
-        break;
-      case LocationContext::Scope:
-        Out << "Entering scope";
-        // FIXME: Add more info once ScopeContext is activated.
-        break;
-      }
-      Out << "\\l";
-    }
-    Out << "\\l";
-
-    state->printDOT(Out);
+    state->printDOT(Out, N->getLocationContext());
 
     Out << "\\l";
 
@@ -2901,23 +3214,24 @@
     return Out.str();
   }
 };
-} // end llvm namespace
+
+} // namespace llvm
 #endif
 
 void ExprEngine::ViewGraph(bool trim) {
 #ifndef NDEBUG
   if (trim) {
-    std::vector<const ExplodedNode*> Src;
+    std::vector<const ExplodedNode *> Src;
 
     // Flush any outstanding reports to make sure we cover all the nodes.
     // This does not cause them to get displayed.
-    for (BugReporter::iterator I=BR.begin(), E=BR.end(); I!=E; ++I)
-      const_cast<BugType*>(*I)->FlushReports(BR);
+    for (const auto I : BR)
+      const_cast<BugType *>(I)->FlushReports(BR);
 
     // Iterate through the reports and get their nodes.
     for (BugReporter::EQClasses_iterator
            EI = BR.EQClasses_begin(), EE = BR.EQClasses_end(); EI != EE; ++EI) {
-      ExplodedNode *N = const_cast<ExplodedNode*>(EI->begin()->getErrorNode());
+      const auto *N = const_cast<ExplodedNode *>(EI->begin()->getErrorNode());
       if (N) Src.push_back(N);
     }
 
diff --git a/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 6f1e839..55ee2ce 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -20,6 +20,24 @@
 using namespace ento;
 using llvm::APSInt;
 
+/// \brief Optionally conjure and return a symbol for offset when processing
+/// an expression \p Expression.
+/// If \p Other is a location, conjure a symbol for \p Symbol
+/// (offset) if it is unknown so that memory arithmetic always
+/// results in an ElementRegion.
+/// \p Count The number of times the current basic block was visited.
+static SVal conjureOffsetSymbolOnLocation(
+    SVal Symbol, SVal Other, Expr* Expression, SValBuilder &svalBuilder,
+    unsigned Count, const LocationContext *LCtx) {
+  QualType Ty = Expression->getType();
+  if (Other.getAs<Loc>() &&
+      Ty->isIntegralOrEnumerationType() &&
+      Symbol.isUnknown()) {
+    return svalBuilder.conjureSymbolVal(Expression, LCtx, Ty, Count);
+  }
+  return Symbol;
+}
+
 void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
                                      ExplodedNode *Pred,
                                      ExplodedNodeSet &Dst) {
@@ -63,24 +81,13 @@
       StmtNodeBuilder Bldr(*it, Tmp2, *currBldrCtx);
 
       if (B->isAdditiveOp()) {
-        // If one of the operands is a location, conjure a symbol for the other
-        // one (offset) if it's unknown so that memory arithmetic always
-        // results in an ElementRegion.
         // TODO: This can be removed after we enable history tracking with
         // SymSymExpr.
         unsigned Count = currBldrCtx->blockCount();
-        if (LeftV.getAs<Loc>() &&
-            RHS->getType()->isIntegralOrEnumerationType() &&
-            RightV.isUnknown()) {
-          RightV = svalBuilder.conjureSymbolVal(RHS, LCtx, RHS->getType(),
-                                                Count);
-        }
-        if (RightV.getAs<Loc>() &&
-            LHS->getType()->isIntegralOrEnumerationType() &&
-            LeftV.isUnknown()) {
-          LeftV = svalBuilder.conjureSymbolVal(LHS, LCtx, LHS->getType(),
-                                               Count);
-        }
+        RightV = conjureOffsetSymbolOnLocation(
+            RightV, LeftV, RHS, svalBuilder, Count, LCtx);
+        LeftV = conjureOffsetSymbolOnLocation(
+            LeftV, RightV, LHS, svalBuilder, Count, LCtx);
       }
 
       // Although we don't yet model pointers-to-members, we do need to make
@@ -92,12 +99,10 @@
       // Process non-assignments except commas or short-circuited
       // logical expressions (LAnd and LOr).
       SVal Result = evalBinOp(state, Op, LeftV, RightV, B->getType());
-      if (Result.isUnknown()) {
-        Bldr.generateNode(B, *it, state);
-        continue;
+      if (!Result.isUnknown()) {
+        state = state->BindExpr(B, LCtx, Result);
       }
 
-      state = state->BindExpr(B, LCtx, Result);
       Bldr.generateNode(B, *it, state);
       continue;
     }
@@ -530,7 +535,7 @@
   const Expr *Init = CL->getInitializer();
   SVal V = State->getSVal(CL->getInitializer(), LCtx);
 
-  if (isa<CXXConstructExpr>(Init)) {
+  if (isa<CXXConstructExpr>(Init) || isa<CXXStdInitializerListExpr>(Init)) {
     // No work needed. Just pass the value up to this expression.
   } else {
     assert(isa<InitListExpr>(Init));
@@ -628,6 +633,16 @@
   StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
   ProgramStateRef state = Pred->getState();
 
+  if (B->getType()->isVectorType()) {
+    // FIXME: We do not model vector arithmetic yet. When adding support for
+    // that, note that the CFG-based reasoning below does not apply, because
+    // logical operators on vectors are not short-circuit. Currently they are
+    // modeled as short-circuit in Clang CFG but this is incorrect.
+    // Do not set the value for the expression. It'd be UnknownVal by default.
+    Bldr.generateNode(B, Pred, state);
+    return;
+  }
+
   ExplodedNode *N = Pred;
   while (!N->getLocation().getAs<BlockEntrance>()) {
     ProgramPoint P = N->getLocation();
@@ -745,7 +760,11 @@
   for (const ExplodedNode *N = Pred ; N ; N = *N->pred_begin()) {
     ProgramPoint PP = N->getLocation();
     if (PP.getAs<PreStmtPurgeDeadSymbols>() || PP.getAs<BlockEntrance>()) {
-      assert(N->pred_size() == 1);
+      // If the state N has multiple predecessors P, it means that successors
+      // of P are all equivalent.
+      // In turn, that means that all nodes at P are equivalent in terms
+      // of observable behavior at N, and we can follow any of them.
+      // FIXME: a more robust solution which does not walk up the tree.
       continue;
     }
     SrcBlock = PP.castAs<BlockEdge>().getSrc();
@@ -1028,7 +1047,14 @@
 
     // Propagate unknown and undefined values.
     if (V2_untested.isUnknownOrUndef()) {
-      Bldr.generateNode(U, *I, state->BindExpr(U, LCtx, V2_untested));
+      state = state->BindExpr(U, LCtx, V2_untested);
+
+      // Perform the store, so that the uninitialized value detection happens.
+      Bldr.takeNodes(*I);
+      ExplodedNodeSet Dst3;
+      evalStore(Dst3, U, U, *I, state, loc, V2_untested);
+      Bldr.addNodes(Dst3);
+
       continue;
     }
     DefinedSVal V2 = V2_untested.castAs<DefinedSVal>();
@@ -1040,6 +1066,7 @@
     // constant value. If the UnaryOperator has location type, create the
     // constant with int type and pointer width.
     SVal RHS;
+    SVal Result;
 
     if (U->getType()->isAnyPointerType())
       RHS = svalBuilder.makeArrayIndex(1);
@@ -1048,7 +1075,14 @@
     else
       RHS = UnknownVal();
 
-    SVal Result = evalBinOp(state, Op, V2, RHS, U->getType());
+    // The use of an operand of type bool with the ++ operators is deprecated
+    // but valid until C++17. And if the operand of the ++ operator is of type
+    // bool, it is set to true until C++17. Note that for '_Bool', it is also
+    // set to true when it encounters ++ operator.
+    if (U->getType()->isBooleanType() && U->isIncrementOp())
+      Result = svalBuilder.makeTruthVal(true, U->getType());
+    else
+      Result = evalBinOp(state, Op, V2, RHS, U->getType());
 
     // Conjure a new symbol if necessary to recover precision.
     if (Result.isUnknown()){
@@ -1070,7 +1104,6 @@
           Constraint = svalBuilder.evalEQ(state, SymVal,
                                        svalBuilder.makeZeroVal(U->getType()));
 
-
           state = state->assume(Constraint, false);
           assert(state);
         }
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 03e0095..f4aa56f 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
+#include "clang/Analysis/ConstructionContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/ParentMap.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
@@ -40,19 +42,30 @@
                                     const CallEvent &Call) {
   SVal ThisVal;
   bool AlwaysReturnsLValue;
+  const CXXRecordDecl *ThisRD = nullptr;
   if (const CXXConstructorCall *Ctor = dyn_cast<CXXConstructorCall>(&Call)) {
     assert(Ctor->getDecl()->isTrivial());
     assert(Ctor->getDecl()->isCopyOrMoveConstructor());
     ThisVal = Ctor->getCXXThisVal();
+    ThisRD = Ctor->getDecl()->getParent();
     AlwaysReturnsLValue = false;
   } else {
     assert(cast<CXXMethodDecl>(Call.getDecl())->isTrivial());
     assert(cast<CXXMethodDecl>(Call.getDecl())->getOverloadedOperator() ==
            OO_Equal);
     ThisVal = cast<CXXInstanceCall>(Call).getCXXThisVal();
+    ThisRD = cast<CXXMethodDecl>(Call.getDecl())->getParent();
     AlwaysReturnsLValue = true;
   }
 
+  assert(ThisRD);
+  if (ThisRD->isEmpty()) {
+    // Do nothing for empty classes. Otherwise it'd retrieve an UnknownVal
+    // and bind it and RegionStore would think that the actual value
+    // in this region at this offset is unknown.
+    return;
+  }
+
   const LocationContext *LCtx = Pred->getLocationContext();
 
   ExplodedNodeSet Dst;
@@ -83,20 +96,15 @@
 }
 
 
-/// Returns a region representing the first element of a (possibly
-/// multi-dimensional) array.
-///
-/// On return, \p Ty will be set to the base type of the array.
-///
-/// If the type is not an array type at all, the original value is returned.
-static SVal makeZeroElementRegion(ProgramStateRef State, SVal LValue,
-                                  QualType &Ty) {
+SVal ExprEngine::makeZeroElementRegion(ProgramStateRef State, SVal LValue,
+                                       QualType &Ty, bool &IsArray) {
   SValBuilder &SVB = State->getStateManager().getSValBuilder();
   ASTContext &Ctx = SVB.getContext();
 
   while (const ArrayType *AT = Ctx.getAsArrayType(Ty)) {
     Ty = AT->getElementType();
     LValue = State->getLValue(Ty, SVB.makeZeroArrayIndex(), LValue);
+    IsArray = true;
   }
 
   return LValue;
@@ -105,26 +113,30 @@
 
 const MemRegion *
 ExprEngine::getRegionForConstructedObject(const CXXConstructExpr *CE,
-                                          ExplodedNode *Pred) {
+                                          ExplodedNode *Pred,
+                                          const ConstructionContext *CC,
+                                          EvalCallOptions &CallOpts) {
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
+  MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
 
-  // See if we're constructing an existing region by looking at the next
-  // element in the CFG.
-
-  if (auto Elem = findElementDirectlyInitializedByCurrentConstructor()) {
-    if (Optional<CFGStmt> StmtElem = Elem->getAs<CFGStmt>()) {
-      auto *DS = cast<DeclStmt>(StmtElem->getStmt());
-      if (const auto *Var = dyn_cast<VarDecl>(DS->getSingleDecl())) {
-        if (Var->getInit() && Var->getInit()->IgnoreImplicit() == CE) {
-          SVal LValue = State->getLValue(Var, LCtx);
-          QualType Ty = Var->getType();
-          LValue = makeZeroElementRegion(State, LValue, Ty);
-          return LValue.getAsRegion();
-        }
-      }
-    } else if (Optional<CFGInitializer> InitElem = Elem->getAs<CFGInitializer>()) {
-      const CXXCtorInitializer *Init = InitElem->getInitializer();
+  // See if we're constructing an existing region by looking at the
+  // current construction context.
+  if (CC) {
+    switch (CC->getKind()) {
+    case ConstructionContext::SimpleVariableKind: {
+      const auto *DSCC = cast<SimpleVariableConstructionContext>(CC);
+      const auto *DS = DSCC->getDeclStmt();
+      const auto *Var = cast<VarDecl>(DS->getSingleDecl());
+      SVal LValue = State->getLValue(Var, LCtx);
+      QualType Ty = Var->getType();
+      LValue =
+          makeZeroElementRegion(State, LValue, Ty, CallOpts.IsArrayCtorOrDtor);
+      return LValue.getAsRegion();
+    }
+    case ConstructionContext::ConstructorInitializerKind: {
+      const auto *ICC = cast<ConstructorInitializerConstructionContext>(CC);
+      const auto *Init = ICC->getCXXCtorInitializer();
       assert(Init->isAnyMemberInitializer());
       const CXXMethodDecl *CurCtor = cast<CXXMethodDecl>(LCtx->getDecl());
       Loc ThisPtr =
@@ -142,62 +154,66 @@
       }
 
       QualType Ty = Field->getType();
-      FieldVal = makeZeroElementRegion(State, FieldVal, Ty);
+      FieldVal = makeZeroElementRegion(State, FieldVal, Ty,
+                                       CallOpts.IsArrayCtorOrDtor);
       return FieldVal.getAsRegion();
     }
-
-    // FIXME: This will eventually need to handle new-expressions as well.
-    // Don't forget to update the pre-constructor initialization code in
-    // ExprEngine::VisitCXXConstructExpr.
-  }
-  // If we couldn't find an existing region to construct into, assume we're
-  // constructing a temporary.
-  MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
-  return MRMgr.getCXXTempObjectRegion(CE, LCtx);
-}
-
-/// Returns true if the initializer for \Elem can be a direct
-/// constructor.
-static bool canHaveDirectConstructor(CFGElement Elem){
-  // DeclStmts and CXXCtorInitializers for fields can be directly constructed.
-
-  if (Optional<CFGStmt> StmtElem = Elem.getAs<CFGStmt>()) {
-    if (isa<DeclStmt>(StmtElem->getStmt())) {
-      return true;
+    case ConstructionContext::NewAllocatedObjectKind: {
+      if (AMgr.getAnalyzerOptions().mayInlineCXXAllocator()) {
+        const auto *NECC = cast<NewAllocatedObjectConstructionContext>(CC);
+        const auto *NE = NECC->getCXXNewExpr();
+        // TODO: Detect when the allocator returns a null pointer.
+        // Constructor shall not be called in this case.
+        if (const SubRegion *MR = dyn_cast_or_null<SubRegion>(
+                getCXXNewAllocatorValue(State, NE, LCtx).getAsRegion())) {
+          if (NE->isArray()) {
+            // TODO: In fact, we need to call the constructor for every
+            // allocated element, not just the first one!
+            CallOpts.IsArrayCtorOrDtor = true;
+            return getStoreManager().GetElementZeroRegion(
+                MR, NE->getType()->getPointeeType());
+          }
+          return MR;
+        }
+      }
+      break;
+    }
+    case ConstructionContext::TemporaryObjectKind: {
+      const auto *TOCC = cast<TemporaryObjectConstructionContext>(CC);
+      // See if we're lifetime-extended via our field. If so, take a note.
+      // Because automatic destructors aren't quite working in this case.
+      if (const auto *MTE = TOCC->getMaterializedTemporaryExpr()) {
+        if (const ValueDecl *VD = MTE->getExtendingDecl()) {
+          assert(VD->getType()->isReferenceType());
+          if (VD->getType()->getPointeeType().getCanonicalType() !=
+              MTE->GetTemporaryExpr()->getType().getCanonicalType()) {
+            CallOpts.IsTemporaryLifetimeExtendedViaSubobject = true;
+          }
+        }
+      }
+      // TODO: Support temporaries lifetime-extended via static references.
+      // They'd need a getCXXStaticTempObjectRegion().
+      CallOpts.IsTemporaryCtorOrDtor = true;
+      return MRMgr.getCXXTempObjectRegion(CE, LCtx);
+    }
+    case ConstructionContext::ReturnedValueKind: {
+      // TODO: We should construct into a CXXBindTemporaryExpr or a
+      // MaterializeTemporaryExpr around the call-expression on the previous
+      // stack frame. Currently we re-bind the temporary to the correct region
+      // later, but that's not semantically correct. This of course does not
+      // apply when we're in the top frame. But if we are in an inlined
+      // function, we should be able to take the call-site CFG element,
+      // and it should contain (but right now it wouldn't) some sort of
+      // construction context that'd give us the right temporary expression.
+      CallOpts.IsTemporaryCtorOrDtor = true;
+      return MRMgr.getCXXTempObjectRegion(CE, LCtx);
+    }
     }
   }
-
-  if (Elem.getKind() == CFGElement::Initializer) {
-    return true;
-  }
-
-  return false;
-}
-
-Optional<CFGElement>
-ExprEngine::findElementDirectlyInitializedByCurrentConstructor() {
-  const NodeBuilderContext &CurrBldrCtx = getBuilderContext();
-  // See if we're constructing an existing region by looking at the next
-  // element in the CFG.
-  const CFGBlock *B = CurrBldrCtx.getBlock();
-  assert(isa<CXXConstructExpr>(((*B)[currStmtIdx]).castAs<CFGStmt>().getStmt()));
-  unsigned int NextStmtIdx = currStmtIdx + 1;
-  if (NextStmtIdx >= B->size())
-    return None;
-
-  CFGElement Next = (*B)[NextStmtIdx];
-
-  // Is this a destructor? If so, we might be in the middle of an assignment
-  // to a local or member: look ahead one more element to see what we find.
-  while (Next.getAs<CFGImplicitDtor>() && NextStmtIdx + 1 < B->size()) {
-    ++NextStmtIdx;
-    Next = (*B)[NextStmtIdx];
-  }
-
-  if (canHaveDirectConstructor(Next))
-    return Next;
-
-  return None;
+  // If we couldn't find an existing region to construct into, assume we're
+  // constructing a temporary. Notify the caller of our failure.
+  CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true;
+  return MRMgr.getCXXTempObjectRegion(CE, LCtx);
 }
 
 const CXXConstructExpr *
@@ -211,7 +227,6 @@
     return nullptr;
 
   const CFGBlock *B = getBuilderContext().getBlock();
-  assert(canHaveDirectConstructor((*B)[currStmtIdx]));
 
   unsigned int PreviousStmtIdx = currStmtIdx - 1;
   CFGElement Previous = (*B)[PreviousStmtIdx];
@@ -242,9 +257,40 @@
   // For now, we just run the first constructor (which should still invalidate
   // the entire array).
 
+  EvalCallOptions CallOpts;
+  auto C = getCurrentCFGElement().getAs<CFGConstructor>();
+  assert(C || getCurrentCFGElement().getAs<CFGStmt>());
+  const ConstructionContext *CC = C ? C->getConstructionContext() : nullptr;
+
+  const CXXBindTemporaryExpr *BTE = nullptr;
+  const MaterializeTemporaryExpr *MTE = nullptr;
+
   switch (CE->getConstructionKind()) {
   case CXXConstructExpr::CK_Complete: {
-    Target = getRegionForConstructedObject(CE, Pred);
+    Target = getRegionForConstructedObject(CE, Pred, CC, CallOpts);
+
+    // In case of temporary object construction, extract data necessary for
+    // destruction and lifetime extension.
+    if (const auto *TCC =
+            dyn_cast_or_null<TemporaryObjectConstructionContext>(CC)) {
+      assert(CallOpts.IsTemporaryCtorOrDtor);
+      assert(!CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion);
+      if (AMgr.getAnalyzerOptions().includeTemporaryDtorsInCFG()) {
+        BTE = TCC->getCXXBindTemporaryExpr();
+        MTE = TCC->getMaterializedTemporaryExpr();
+        if (!BTE) {
+          // FIXME: lifetime extension for temporaries without destructors
+          // is not implemented yet.
+          MTE = nullptr;
+        }
+        if (MTE && MTE->getStorageDuration() != SD_FullExpression) {
+          // If the temporary is lifetime-extended, don't save the BTE,
+          // because we don't need a temporary destructor, but an automatic
+          // destructor.
+          BTE = nullptr;
+        }
+      }
+    }
     break;
   }
   case CXXConstructExpr::CK_VirtualBase:
@@ -267,6 +313,24 @@
     }
     // FALLTHROUGH
   case CXXConstructExpr::CK_NonVirtualBase:
+    // In C++17, classes with non-virtual bases may be aggregates, so they would
+    // be initialized as aggregates without a constructor call, so we may have
+    // a base class constructed directly into an initializer list without
+    // having the derived-class constructor call on the previous stack frame.
+    // Initializer lists may be nested into more initializer lists that
+    // correspond to surrounding aggregate initializations.
+    // FIXME: For now this code essentially bails out. We need to find the
+    // correct target region and set it.
+    // FIXME: Instead of relying on the ParentMap, we should have the
+    // trigger-statement (InitListExpr in this case) passed down from CFG or
+    // otherwise always available during construction.
+    if (dyn_cast_or_null<InitListExpr>(LCtx->getParentMap().getParent(CE))) {
+      MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
+      Target = MRMgr.getCXXTempObjectRegion(CE, LCtx);
+      CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true;
+      break;
+    }
+    // FALLTHROUGH
   case CXXConstructExpr::CK_Delegating: {
     const CXXMethodDecl *CurCtor = cast<CXXMethodDecl>(LCtx->getDecl());
     Loc ThisPtr = getSValBuilder().getCXXThis(CurCtor,
@@ -294,17 +358,18 @@
   ExplodedNodeSet DstPreVisit;
   getCheckerManager().runCheckersForPreStmt(DstPreVisit, Pred, CE, *this);
 
+  // FIXME: Is it possible and/or useful to do this before PreStmt?
   ExplodedNodeSet PreInitialized;
   {
     StmtNodeBuilder Bldr(DstPreVisit, PreInitialized, *currBldrCtx);
-    if (CE->requiresZeroInitialization()) {
-      // Type of the zero doesn't matter.
-      SVal ZeroVal = svalBuilder.makeZeroVal(getContext().CharTy);
+    for (ExplodedNodeSet::iterator I = DstPreVisit.begin(),
+                                   E = DstPreVisit.end();
+         I != E; ++I) {
+      ProgramStateRef State = (*I)->getState();
+      if (CE->requiresZeroInitialization()) {
+        // Type of the zero doesn't matter.
+        SVal ZeroVal = svalBuilder.makeZeroVal(getContext().CharTy);
 
-      for (ExplodedNodeSet::iterator I = DstPreVisit.begin(),
-                                     E = DstPreVisit.end();
-           I != E; ++I) {
-        ProgramStateRef State = (*I)->getState();
         // FIXME: Once we properly handle constructors in new-expressions, we'll
         // need to invalidate the region before setting a default value, to make
         // sure there aren't any lingering bindings around. This probably needs
@@ -318,9 +383,20 @@
         // since it's then possible to be initializing one part of a multi-
         // dimensional array.
         State = State->bindDefault(loc::MemRegionVal(Target), ZeroVal, LCtx);
-        Bldr.generateNode(CE, *I, State, /*tag=*/nullptr,
-                          ProgramPoint::PreStmtKind);
       }
+
+      if (BTE) {
+        State = addInitializedTemporary(State, BTE, LCtx,
+                                        cast<CXXTempObjectRegion>(Target));
+      }
+
+      if (MTE) {
+        State = addTemporaryMaterialization(State, MTE, LCtx,
+                                            cast<CXXTempObjectRegion>(Target));
+      }
+
+      Bldr.generateNode(CE, *I, State, /*tag=*/nullptr,
+                        ProgramPoint::PreStmtKind);
     }
   }
 
@@ -331,10 +407,9 @@
   ExplodedNodeSet DstEvaluated;
   StmtNodeBuilder Bldr(DstPreCall, DstEvaluated, *currBldrCtx);
 
-  bool IsArray = isa<ElementRegion>(Target);
   if (CE->getConstructor()->isTrivial() &&
       CE->getConstructor()->isCopyOrMoveConstructor() &&
-      !IsArray) {
+      !CallOpts.IsArrayCtorOrDtor) {
     // FIXME: Handle other kinds of trivial constructors as well.
     for (ExplodedNodeSet::iterator I = DstPreCall.begin(), E = DstPreCall.end();
          I != E; ++I)
@@ -343,7 +418,7 @@
   } else {
     for (ExplodedNodeSet::iterator I = DstPreCall.begin(), E = DstPreCall.end();
          I != E; ++I)
-      defaultEvalCall(Bldr, *I, *Call);
+      defaultEvalCall(Bldr, *I, *Call, CallOpts);
   }
 
   // If the CFG was contructed without elements for temporary destructors
@@ -355,20 +430,30 @@
   // paths when no-return temporary destructors are used for assertions.
   const AnalysisDeclContext *ADC = LCtx->getAnalysisDeclContext();
   if (!ADC->getCFGBuildOptions().AddTemporaryDtors) {
-      const MemRegion *Target = Call->getCXXThisVal().getAsRegion();
-      if (Target && isa<CXXTempObjectRegion>(Target) &&
-          Call->getDecl()->getParent()->isAnyDestructorNoReturn()) {
+    const MemRegion *Target = Call->getCXXThisVal().getAsRegion();
+    if (Target && isa<CXXTempObjectRegion>(Target) &&
+        Call->getDecl()->getParent()->isAnyDestructorNoReturn()) {
+
+      // If we've inlined the constructor, then DstEvaluated would be empty.
+      // In this case we still want a sink, which could be implemented
+      // in processCallExit. But we don't have that implemented at the moment,
+      // so if you hit this assertion, see if you can avoid inlining
+      // the respective constructor when analyzer-config cfg-temporary-dtors
+      // is set to false.
+      // Otherwise there's nothing wrong with inlining such constructor.
+      assert(!DstEvaluated.empty() &&
+             "We should not have inlined this constructor!");
 
       for (ExplodedNode *N : DstEvaluated) {
         Bldr.generateSink(CE, N, N->getState());
       }
 
-      // There is no need to run the PostCall and PostStmtchecker
+      // There is no need to run the PostCall and PostStmt checker
       // callbacks because we just generated sinks on all nodes in th
       // frontier.
       return;
     }
- }
+  }
 
   ExplodedNodeSet DstPostCall;
   getCheckerManager().runCheckersForPostCall(DstPostCall, DstEvaluated,
@@ -381,19 +466,11 @@
                                     const Stmt *S,
                                     bool IsBaseDtor,
                                     ExplodedNode *Pred,
-                                    ExplodedNodeSet &Dst) {
+                                    ExplodedNodeSet &Dst,
+                                    const EvalCallOptions &CallOpts) {
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
 
-  // FIXME: We need to run the same destructor on every element of the array.
-  // This workaround will just run the first destructor (which will still
-  // invalidate the entire array).
-  SVal DestVal = UnknownVal();
-  if (Dest)
-    DestVal = loc::MemRegionVal(Dest);
-  DestVal = makeZeroElementRegion(State, DestVal, ObjectType);
-  Dest = DestVal.getAsRegion();
-
   const CXXRecordDecl *RecordDecl = ObjectType->getAsCXXRecordDecl();
   assert(RecordDecl && "Only CXXRecordDecls should have destructors");
   const CXXDestructorDecl *DtorDecl = RecordDecl->getDestructor();
@@ -414,7 +491,7 @@
   StmtNodeBuilder Bldr(DstPreCall, DstInvalidated, *currBldrCtx);
   for (ExplodedNodeSet::iterator I = DstPreCall.begin(), E = DstPreCall.end();
        I != E; ++I)
-    defaultEvalCall(Bldr, *I, *Call);
+    defaultEvalCall(Bldr, *I, *Call, CallOpts);
 
   ExplodedNodeSet DstPostCall;
   getCheckerManager().runCheckersForPostCall(Dst, DstInvalidated,
@@ -437,15 +514,57 @@
   getCheckerManager().runCheckersForPreCall(DstPreCall, Pred,
                                             *Call, *this);
 
-  ExplodedNodeSet DstInvalidated;
-  StmtNodeBuilder Bldr(DstPreCall, DstInvalidated, *currBldrCtx);
-  for (ExplodedNodeSet::iterator I = DstPreCall.begin(), E = DstPreCall.end();
-       I != E; ++I)
-    defaultEvalCall(Bldr, *I, *Call);
-  getCheckerManager().runCheckersForPostCall(Dst, DstInvalidated,
-                                             *Call, *this);
-}
+  ExplodedNodeSet DstPostCall;
+  StmtNodeBuilder CallBldr(DstPreCall, DstPostCall, *currBldrCtx);
+  for (auto I : DstPreCall) {
+    // FIXME: Provide evalCall for checkers?
+    defaultEvalCall(CallBldr, I, *Call);
+  }
+  // If the call is inlined, DstPostCall will be empty and we bail out now.
 
+  // Store return value of operator new() for future use, until the actual
+  // CXXNewExpr gets processed.
+  ExplodedNodeSet DstPostValue;
+  StmtNodeBuilder ValueBldr(DstPostCall, DstPostValue, *currBldrCtx);
+  for (auto I : DstPostCall) {
+    // FIXME: Because CNE serves as the "call site" for the allocator (due to
+    // lack of a better expression in the AST), the conjured return value symbol
+    // is going to be of the same type (C++ object pointer type). Technically
+    // this is not correct because the operator new's prototype always says that
+    // it returns a 'void *'. So we should change the type of the symbol,
+    // and then evaluate the cast over the symbolic pointer from 'void *' to
+    // the object pointer type. But without changing the symbol's type it
+    // is breaking too much to evaluate the no-op symbolic cast over it, so we
+    // skip it for now.
+    ProgramStateRef State = I->getState();
+    SVal RetVal = State->getSVal(CNE, LCtx);
+
+    // If this allocation function is not declared as non-throwing, failures
+    // /must/ be signalled by exceptions, and thus the return value will never
+    // be NULL. -fno-exceptions does not influence this semantics.
+    // FIXME: GCC has a -fcheck-new option, which forces it to consider the case
+    // where new can return NULL. If we end up supporting that option, we can
+    // consider adding a check for it here.
+    // C++11 [basic.stc.dynamic.allocation]p3.
+    if (const FunctionDecl *FD = CNE->getOperatorNew()) {
+      QualType Ty = FD->getType();
+      if (const auto *ProtoType = Ty->getAs<FunctionProtoType>())
+        if (!ProtoType->isNothrow(getContext()))
+          State = State->assume(RetVal.castAs<DefinedOrUnknownSVal>(), true);
+    }
+
+    ValueBldr.generateNode(CNE, I,
+                           setCXXNewAllocatorValue(State, CNE, LCtx, RetVal));
+  }
+
+  ExplodedNodeSet DstPostPostCallCallback;
+  getCheckerManager().runCheckersForPostCall(DstPostPostCallCallback,
+                                             DstPostValue, *Call, *this);
+  for (auto I : DstPostPostCallCallback) {
+    getCheckerManager().runCheckersForNewAllocator(
+        CNE, getCXXNewAllocatorValue(I->getState(), CNE, LCtx), Dst, I, *this);
+  }
+}
 
 void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred,
                                    ExplodedNodeSet &Dst) {
@@ -456,69 +575,74 @@
 
   unsigned blockCount = currBldrCtx->blockCount();
   const LocationContext *LCtx = Pred->getLocationContext();
-  DefinedOrUnknownSVal symVal = UnknownVal();
+  SVal symVal = UnknownVal();
   FunctionDecl *FD = CNE->getOperatorNew();
 
-  bool IsStandardGlobalOpNewFunction = false;
-  if (FD && !isa<CXXMethodDecl>(FD) && !FD->isVariadic()) {
-    if (FD->getNumParams() == 2) {
-      QualType T = FD->getParamDecl(1)->getType();
-      if (const IdentifierInfo *II = T.getBaseTypeIdentifier())
-        // NoThrow placement new behaves as a standard new.
-        IsStandardGlobalOpNewFunction = II->getName().equals("nothrow_t");
-    }
-    else
-      // Placement forms are considered non-standard.
-      IsStandardGlobalOpNewFunction = (FD->getNumParams() == 1);
+  bool IsStandardGlobalOpNewFunction =
+      FD->isReplaceableGlobalAllocationFunction();
+
+  ProgramStateRef State = Pred->getState();
+
+  // Retrieve the stored operator new() return value.
+  if (AMgr.getAnalyzerOptions().mayInlineCXXAllocator()) {
+    symVal = getCXXNewAllocatorValue(State, CNE, LCtx);
+    State = clearCXXNewAllocatorValue(State, CNE, LCtx);
   }
 
   // We assume all standard global 'operator new' functions allocate memory in
   // heap. We realize this is an approximation that might not correctly model
   // a custom global allocator.
-  if (IsStandardGlobalOpNewFunction)
-    symVal = svalBuilder.getConjuredHeapSymbolVal(CNE, LCtx, blockCount);
-  else
-    symVal = svalBuilder.conjureSymbolVal(nullptr, CNE, LCtx, CNE->getType(),
-                                          blockCount);
+  if (symVal.isUnknown()) {
+    if (IsStandardGlobalOpNewFunction)
+      symVal = svalBuilder.getConjuredHeapSymbolVal(CNE, LCtx, blockCount);
+    else
+      symVal = svalBuilder.conjureSymbolVal(nullptr, CNE, LCtx, CNE->getType(),
+                                            blockCount);
+  }
 
-  ProgramStateRef State = Pred->getState();
   CallEventManager &CEMgr = getStateManager().getCallEventManager();
   CallEventRef<CXXAllocatorCall> Call =
     CEMgr.getCXXAllocatorCall(CNE, State, LCtx);
 
-  // Invalidate placement args.
-  // FIXME: Once we figure out how we want allocators to work,
-  // we should be using the usual pre-/(default-)eval-/post-call checks here.
-  State = Call->invalidateRegions(blockCount);
-  if (!State)
-    return;
+  if (!AMgr.getAnalyzerOptions().mayInlineCXXAllocator()) {
+    // Invalidate placement args.
+    // FIXME: Once we figure out how we want allocators to work,
+    // we should be using the usual pre-/(default-)eval-/post-call checks here.
+    State = Call->invalidateRegions(blockCount);
+    if (!State)
+      return;
 
-  // If this allocation function is not declared as non-throwing, failures
-  // /must/ be signalled by exceptions, and thus the return value will never be
-  // NULL. -fno-exceptions does not influence this semantics.
-  // FIXME: GCC has a -fcheck-new option, which forces it to consider the case
-  // where new can return NULL. If we end up supporting that option, we can
-  // consider adding a check for it here.
-  // C++11 [basic.stc.dynamic.allocation]p3.
-  if (FD) {
-    QualType Ty = FD->getType();
-    if (const FunctionProtoType *ProtoType = Ty->getAs<FunctionProtoType>())
-      if (!ProtoType->isNothrow(getContext()))
-        State = State->assume(symVal, true);
+    // If this allocation function is not declared as non-throwing, failures
+    // /must/ be signalled by exceptions, and thus the return value will never
+    // be NULL. -fno-exceptions does not influence this semantics.
+    // FIXME: GCC has a -fcheck-new option, which forces it to consider the case
+    // where new can return NULL. If we end up supporting that option, we can
+    // consider adding a check for it here.
+    // C++11 [basic.stc.dynamic.allocation]p3.
+    if (FD) {
+      QualType Ty = FD->getType();
+      if (const auto *ProtoType = Ty->getAs<FunctionProtoType>())
+        if (!ProtoType->isNothrow(getContext()))
+          if (auto dSymVal = symVal.getAs<DefinedOrUnknownSVal>())
+            State = State->assume(*dSymVal, true);
+    }
   }
 
   StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
 
+  SVal Result = symVal;
+
   if (CNE->isArray()) {
     // FIXME: allocating an array requires simulating the constructors.
     // For now, just return a symbolicated region.
-    const SubRegion *NewReg =
-        symVal.castAs<loc::MemRegionVal>().getRegionAs<SubRegion>();
-    QualType ObjTy = CNE->getType()->getAs<PointerType>()->getPointeeType();
-    const ElementRegion *EleReg =
-      getStoreManager().GetElementZeroRegion(NewReg, ObjTy);
-    State = State->BindExpr(CNE, Pred->getLocationContext(),
-                            loc::MemRegionVal(EleReg));
+    if (const SubRegion *NewReg =
+            dyn_cast_or_null<SubRegion>(symVal.getAsRegion())) {
+      QualType ObjTy = CNE->getType()->getAs<PointerType>()->getPointeeType();
+      const ElementRegion *EleReg =
+          getStoreManager().GetElementZeroRegion(NewReg, ObjTy);
+      Result = loc::MemRegionVal(EleReg);
+    }
+    State = State->BindExpr(CNE, Pred->getLocationContext(), Result);
     Bldr.generateNode(CNE, Pred, State);
     return;
   }
@@ -527,7 +651,6 @@
   // CXXNewExpr, we need to make sure that the constructed object is not
   // immediately invalidated here. (The placement call should happen before
   // the constructor call anyway.)
-  SVal Result = symVal;
   if (FD && FD->isReservedGlobalPlacementOperator()) {
     // Non-array placement new should always return the placement location.
     SVal PlacementLoc = State->getSVal(CNE->getPlacementArg(0), LCtx);
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index caf86b2..ede4816 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -15,8 +15,8 @@
 #include "PrettyStackTraceLocationContext.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/ParentMap.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
+#include "clang/Analysis/ConstructionContext.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "llvm/ADT/SmallSet.h"
@@ -276,6 +276,21 @@
 
       state = state->BindExpr(CCE, callerCtx, ThisV);
     }
+
+    if (const auto *CNE = dyn_cast<CXXNewExpr>(CE)) {
+      // We are currently evaluating a CXXNewAllocator CFGElement. It takes a
+      // while to reach the actual CXXNewExpr element from here, so keep the
+      // region for later use.
+      // Additionally cast the return value of the inlined operator new
+      // (which is of type 'void *') to the correct object type.
+      SVal AllocV = state->getSVal(CNE, callerCtx);
+      AllocV = svalBuilder.evalCast(
+          AllocV, CNE->getType(),
+          getContext().getPointerType(getContext().VoidTy));
+
+      state =
+          setCXXNewAllocatorValue(state, CNE, calleeCtx->getParent(), AllocV);
+    }
   }
 
   // Step 3: BindedRetNode -> CleanedNodes
@@ -315,6 +330,7 @@
     CallExitEnd Loc(calleeCtx, callerCtx);
     bool isNew;
     ProgramStateRef CEEState = (*I == CEBNode) ? state : (*I)->getState();
+
     ExplodedNode *CEENode = G.getNode(Loc, CEEState, false, &isNew);
     CEENode->addPredecessor(*I, G);
     if (!isNew)
@@ -331,16 +347,31 @@
     CallEventRef<> UpdatedCall = Call.cloneWithState(CEEState);
 
     ExplodedNodeSet DstPostCall;
-    getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode,
-                                               *UpdatedCall, *this,
-                                               /*WasInlined=*/true);
-
+    if (const CXXNewExpr *CNE = dyn_cast_or_null<CXXNewExpr>(CE)) {
+      ExplodedNodeSet DstPostPostCallCallback;
+      getCheckerManager().runCheckersForPostCall(DstPostPostCallCallback,
+                                                 CEENode, *UpdatedCall, *this,
+                                                 /*WasInlined=*/true);
+      for (auto I : DstPostPostCallCallback) {
+        getCheckerManager().runCheckersForNewAllocator(
+            CNE, getCXXNewAllocatorValue(I->getState(), CNE,
+                                         calleeCtx->getParent()),
+            DstPostCall, I, *this,
+            /*WasInlined=*/true);
+      }
+    } else {
+      getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode,
+                                                 *UpdatedCall, *this,
+                                                 /*WasInlined=*/true);
+    }
     ExplodedNodeSet Dst;
     if (const ObjCMethodCall *Msg = dyn_cast<ObjCMethodCall>(Call)) {
       getCheckerManager().runCheckersForPostObjCMessage(Dst, DstPostCall, *Msg,
                                                         *this,
                                                         /*WasInlined=*/true);
-    } else if (CE) {
+    } else if (CE &&
+               !(isa<CXXNewExpr>(CE) && // Called when visiting CXXNewExpr.
+                 AMgr.getAnalyzerOptions().mayInlineCXXAllocator())) {
       getCheckerManager().runCheckersForPostStmt(Dst, DstPostCall, CE,
                                                  *this, /*WasInlined=*/true);
     } else {
@@ -554,7 +585,19 @@
   QualType ResultTy = Call.getResultType();
   SValBuilder &SVB = getSValBuilder();
   unsigned Count = currBldrCtx->blockCount();
-  SVal R = SVB.conjureSymbolVal(nullptr, E, LCtx, ResultTy, Count);
+
+  // See if we need to conjure a heap pointer instead of
+  // a regular unknown pointer.
+  bool IsHeapPointer = false;
+  if (const auto *CNE = dyn_cast<CXXNewExpr>(E))
+    if (CNE->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
+      // FIXME: Delegate this to evalCall in MallocChecker?
+      IsHeapPointer = true;
+    }
+
+  SVal R = IsHeapPointer
+               ? SVB.getConjuredHeapSymbolVal(E, LCtx, Count)
+               : SVB.conjureSymbolVal(nullptr, E, LCtx, ResultTy, Count);
   return State->BindExpr(E, LCtx, R);
 }
 
@@ -570,15 +613,10 @@
   Bldr.generateNode(Call.getProgramPoint(), State, Pred);
 }
 
-enum CallInlinePolicy {
-  CIP_Allowed,
-  CIP_DisallowedOnce,
-  CIP_DisallowedAlways
-};
-
-static CallInlinePolicy mayInlineCallKind(const CallEvent &Call,
-                                          const ExplodedNode *Pred,
-                                          AnalyzerOptions &Opts) {
+ExprEngine::CallInlinePolicy
+ExprEngine::mayInlineCallKind(const CallEvent &Call, const ExplodedNode *Pred,
+                              AnalyzerOptions &Opts,
+                              const ExprEngine::EvalCallOptions &CallOpts) {
   const LocationContext *CurLC = Pred->getLocationContext();
   const StackFrameContext *CallerSFC = CurLC->getCurrentStackFrame();
   switch (Call.getKind()) {
@@ -596,22 +634,24 @@
 
     const CXXConstructorCall &Ctor = cast<CXXConstructorCall>(Call);
 
+    const CXXConstructExpr *CtorExpr = Ctor.getOriginExpr();
+
+    auto CCE = getCurrentCFGElement().getAs<CFGConstructor>();
+    const ConstructionContext *CC = CCE ? CCE->getConstructionContext()
+                                        : nullptr;
+
+    if (CC && isa<NewAllocatedObjectConstructionContext>(CC) &&
+        !Opts.mayInlineCXXAllocator())
+      return CIP_DisallowedOnce;
+
     // FIXME: We don't handle constructors or destructors for arrays properly.
     // Even once we do, we still need to be careful about implicitly-generated
     // initializers for array fields in default move/copy constructors.
-    const MemRegion *Target = Ctor.getCXXThisVal().getAsRegion();
-    if (Target && isa<ElementRegion>(Target))
+    // We still allow construction into ElementRegion targets when they don't
+    // represent array elements.
+    if (CallOpts.IsArrayCtorOrDtor)
       return CIP_DisallowedOnce;
 
-    // FIXME: This is a hack. We don't use the correct region for a new
-    // expression, so if we inline the constructor its result will just be
-    // thrown away. This short-term hack is tracked in <rdar://problem/12180598>
-    // and the longer-term possible fix is discussed in PR12014.
-    const CXXConstructExpr *CtorExpr = Ctor.getOriginExpr();
-    if (const Stmt *Parent = CurLC->getParentMap().getParent(CtorExpr))
-      if (isa<CXXNewExpr>(Parent))
-        return CIP_DisallowedOnce;
-
     // Inlining constructors requires including initializers in the CFG.
     const AnalysisDeclContext *ADC = CallerSFC->getAnalysisDeclContext();
     assert(ADC->getCFGBuildOptions().AddInitializers && "No CFG initializers");
@@ -626,12 +666,25 @@
     if (!Opts.mayInlineCXXMemberFunction(CIMK_Destructors))
       return CIP_DisallowedAlways;
 
-    // FIXME: This is a hack. We don't handle temporary destructors
-    // right now, so we shouldn't inline their constructors.
-    if (CtorExpr->getConstructionKind() == CXXConstructExpr::CK_Complete)
-      if (!Target || !isa<DeclRegion>(Target))
+    if (CtorExpr->getConstructionKind() == CXXConstructExpr::CK_Complete) {
+      // If we don't handle temporary destructors, we shouldn't inline
+      // their constructors.
+      if (CallOpts.IsTemporaryCtorOrDtor &&
+          !Opts.includeTemporaryDtorsInCFG())
         return CIP_DisallowedOnce;
 
+      // If we did not find the correct this-region, it would be pointless
+      // to inline the constructor. Instead we will simply invalidate
+      // the fake temporary target.
+      if (CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion)
+        return CIP_DisallowedOnce;
+
+      // If the temporary is lifetime-extended by binding a smaller object
+      // within it to a reference, automatic destructors don't work properly.
+      if (CallOpts.IsTemporaryLifetimeExtendedViaSubobject)
+        return CIP_DisallowedOnce;
+    }
+
     break;
   }
   case CE_CXXDestructor: {
@@ -643,13 +696,19 @@
     assert(ADC->getCFGBuildOptions().AddImplicitDtors && "No CFG destructors");
     (void)ADC;
 
-    const CXXDestructorCall &Dtor = cast<CXXDestructorCall>(Call);
-
     // FIXME: We don't handle constructors or destructors for arrays properly.
-    const MemRegion *Target = Dtor.getCXXThisVal().getAsRegion();
-    if (Target && isa<ElementRegion>(Target))
+    if (CallOpts.IsArrayCtorOrDtor)
       return CIP_DisallowedOnce;
 
+    // Allow disabling temporary destructor inlining with a separate option.
+    if (CallOpts.IsTemporaryCtorOrDtor && !Opts.mayInlineCXXTemporaryDtors())
+      return CIP_DisallowedOnce;
+
+    // If we did not find the correct this-region, it would be pointless
+    // to inline the destructor. Instead we will simply invalidate
+    // the fake temporary target.
+    if (CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion)
+      return CIP_DisallowedOnce;
     break;
   }
   case CE_CXXAllocator:
@@ -788,7 +847,8 @@
 }
 
 bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D,
-                                  const ExplodedNode *Pred) {
+                                  const ExplodedNode *Pred,
+                                  const EvalCallOptions &CallOpts) {
   if (!D)
     return false;
 
@@ -797,14 +857,6 @@
   AnalysisDeclContextManager &ADCMgr = AMgr.getAnalysisDeclContextManager();
   AnalysisDeclContext *CalleeADC = ADCMgr.getContext(D);
 
-  // Temporary object destructor processing is currently broken, so we never
-  // inline them.
-  // FIXME: Remove this once temp destructors are working.
-  if (isa<CXXDestructorCall>(Call)) {
-    if ((*currBldrCtx->getBlock())[currStmtIdx].getAs<CFGTemporaryDtor>())
-      return false;
-  }
-
   // The auto-synthesized bodies are essential to inline as they are
   // usually small and commonly used. Note: we should do this check early on to
   // ensure we always inline these calls.
@@ -835,7 +887,7 @@
   // FIXME: this checks both static and dynamic properties of the call, which
   // means we're redoing a bit of work that could be cached in the function
   // summary.
-  CallInlinePolicy CIP = mayInlineCallKind(Call, Pred, Opts);
+  CallInlinePolicy CIP = mayInlineCallKind(Call, Pred, Opts, CallOpts);
   if (CIP != CIP_Allowed) {
     if (CIP == CIP_DisallowedAlways) {
       assert(!MayInline.hasValue() || MayInline.getValue());
@@ -887,7 +939,8 @@
 }
 
 void ExprEngine::defaultEvalCall(NodeBuilder &Bldr, ExplodedNode *Pred,
-                                 const CallEvent &CallTemplate) {
+                                 const CallEvent &CallTemplate,
+                                 const EvalCallOptions &CallOpts) {
   // Make sure we have the most recent state attached to the call.
   ProgramStateRef State = Pred->getState();
   CallEventRef<> Call = CallTemplate.cloneWithState(State);
@@ -910,7 +963,7 @@
   } else {
     RuntimeDefinition RD = Call->getRuntimeDefinition();
     const Decl *D = RD.getDecl();
-    if (shouldInlineCall(*Call, D, Pred)) {
+    if (shouldInlineCall(*Call, D, Pred, CallOpts)) {
       if (RD.mayHaveOtherDefinitions()) {
         AnalyzerOptions &Options = getAnalysisManager().options;
 
diff --git a/lib/StaticAnalyzer/Core/FunctionSummary.cpp b/lib/StaticAnalyzer/Core/FunctionSummary.cpp
index c21735b..94edd84 100644
--- a/lib/StaticAnalyzer/Core/FunctionSummary.cpp
+++ b/lib/StaticAnalyzer/Core/FunctionSummary.cpp
@@ -1,4 +1,4 @@
-//== FunctionSummary.cpp - Stores summaries of functions. ----------*- C++ -*-//
+//===- FunctionSummary.cpp - Stores summaries of functions. ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,21 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h"
+
 using namespace clang;
 using namespace ento;
 
 unsigned FunctionSummariesTy::getTotalNumBasicBlocks() {
   unsigned Total = 0;
-  for (MapTy::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
-    Total += I->second.TotalBasicBlocks;
-  }
+  for (const auto &I : Map)
+    Total += I.second.TotalBasicBlocks;
   return Total;
 }
 
 unsigned FunctionSummariesTy::getTotalNumVisitedBasicBlocks() {
   unsigned Total = 0;
-  for (MapTy::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
-    Total += I->second.VisitedBasicBlocks.count();
-  }
+  for (const auto &I : Map)
+    Total += I.second.VisitedBasicBlocks.count();
   return Total;
 }
diff --git a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 9b820e8..b4e73c8 100644
--- a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -1,4 +1,4 @@
-//===--- HTMLDiagnostics.cpp - HTML Diagnostics for Paths ----*- C++ -*-===//
+//===- HTMLDiagnostics.cpp - HTML Diagnostics for Paths -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,24 +11,43 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/Stmt.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
 #include "clang/Rewrite/Core/HTMLRewrite.h"
 #include "clang/Rewrite/Core/Rewriter.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
-#include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/IssueHash.h"
 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <map>
+#include <memory>
+#include <set>
 #include <sstream>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
 
 using namespace clang;
 using namespace ento;
@@ -41,15 +60,19 @@
 
 class HTMLDiagnostics : public PathDiagnosticConsumer {
   std::string Directory;
-  bool createdDir, noDir;
+  bool createdDir = false;
+  bool noDir = false;
   const Preprocessor &PP;
   AnalyzerOptions &AnalyzerOpts;
   const bool SupportsCrossFileDiagnostics;
+
 public:
   HTMLDiagnostics(AnalyzerOptions &AnalyzerOpts,
                   const std::string& prefix,
                   const Preprocessor &pp,
-                  bool supportsMultipleFiles);
+                  bool supportsMultipleFiles)
+      : Directory(prefix), PP(pp), AnalyzerOpts(AnalyzerOpts),
+        SupportsCrossFileDiagnostics(supportsMultipleFiles) {}
 
   ~HTMLDiagnostics() override { FlushDiagnostics(nullptr); }
 
@@ -91,20 +114,16 @@
   // Rewrite the file specified by FID with HTML formatting.
   void RewriteFile(Rewriter &R, const SourceManager& SMgr,
                    const PathPieces& path, FileID FID);
+
+  /// \return Javascript for navigating the HTML report using j/k keys.
+  std::string generateKeyboardNavigationJavascript();
+
+private:
+  /// \return Javascript for displaying shortcuts help;
+  std::string showHelpJavascript();
 };
 
-} // end anonymous namespace
-
-HTMLDiagnostics::HTMLDiagnostics(AnalyzerOptions &AnalyzerOpts,
-                                 const std::string& prefix,
-                                 const Preprocessor &pp,
-                                 bool supportsMultipleFiles)
-    : Directory(prefix),
-      createdDir(false),
-      noDir(false),
-      PP(pp),
-      AnalyzerOpts(AnalyzerOpts),
-      SupportsCrossFileDiagnostics(supportsMultipleFiles) {}
+} // namespace
 
 void ento::createHTMLDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts,
                                         PathDiagnosticConsumers &C,
@@ -127,24 +146,19 @@
 void HTMLDiagnostics::FlushDiagnosticsImpl(
   std::vector<const PathDiagnostic *> &Diags,
   FilesMade *filesMade) {
-  for (std::vector<const PathDiagnostic *>::iterator it = Diags.begin(),
-       et = Diags.end(); it != et; ++it) {
-    ReportDiag(**it, filesMade);
-  }
+  for (const auto Diag : Diags)
+    ReportDiag(*Diag, filesMade);
 }
 
 void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
                                  FilesMade *filesMade) {
-
   // Create the HTML directory if it is missing.
   if (!createdDir) {
     createdDir = true;
     if (std::error_code ec = llvm::sys::fs::create_directories(Directory)) {
       llvm::errs() << "warning: could not create directory '"
                    << Directory << "': " << ec.message() << '\n';
-
       noDir = true;
-
       return;
     }
   }
@@ -171,7 +185,7 @@
   SmallString<128> declName("unknown");
   int offsetDecl = 0;
   if (const Decl *DeclWithIssue = D.getDeclWithIssue()) {
-      if (const NamedDecl *ND = dyn_cast<NamedDecl>(DeclWithIssue))
+      if (const auto *ND = dyn_cast<NamedDecl>(DeclWithIssue))
           declName = ND->getDeclName().getAsString();
 
       if (const Stmt *Body = DeclWithIssue->getBody()) {
@@ -209,7 +223,6 @@
                        << "': " << EC.message() << '\n';
           return;
       }
-
   } else {
       int i = 1;
       std::error_code EC;
@@ -250,7 +263,6 @@
 
 std::string HTMLDiagnostics::GenerateHTML(const PathDiagnostic& D, Rewriter &R,
     const SourceManager& SMgr, const PathPieces& path, const char *declName) {
-
   // Rewrite source files as HTML for every new file the path crosses
   std::vector<FileID> FileIDs;
   for (auto I : path) {
@@ -306,7 +318,7 @@
 
   const RewriteBuffer *Buf = R.getRewriteBufferFor(FileIDs[0]);
   if (!Buf)
-    return "";
+    return {};
 
   // Add CSS, header, and footer.
   const FileEntry* Entry = SMgr.getFileEntryForID(FileIDs[0]);
@@ -320,6 +332,114 @@
   return os.str();
 }
 
+/// Write executed lines from \p D in JSON format into \p os.
+static void serializeExecutedLines(
+    const PathDiagnostic &D,
+    const PathPieces &path,
+    llvm::raw_string_ostream &os) {
+  // Copy executed lines from path diagnostics.
+  std::map<unsigned, std::set<unsigned>> ExecutedLines;
+  for (auto I = D.executedLines_begin(),
+            E = D.executedLines_end(); I != E; ++I) {
+    std::set<unsigned> &LinesInFile = ExecutedLines[I->first];
+    for (unsigned LineNo : I->second) {
+      LinesInFile.insert(LineNo);
+    }
+  }
+
+  // We need to include all lines for which any kind of diagnostics appears.
+  for (const auto &P : path) {
+    FullSourceLoc Loc = P->getLocation().asLocation().getExpansionLoc();
+    FileID FID = Loc.getFileID();
+    unsigned LineNo = Loc.getLineNumber();
+    ExecutedLines[FID.getHashValue()].insert(LineNo);
+  }
+
+  os << "var relevant_lines = {";
+  for (auto I = ExecutedLines.begin(),
+            E = ExecutedLines.end(); I != E; ++I) {
+    if (I != ExecutedLines.begin())
+      os << ", ";
+
+    os << "\"" << I->first << "\": {";
+    for (unsigned LineNo : I->second) {
+      if (LineNo != *(I->second.begin()))
+        os << ", ";
+
+      os << "\"" << LineNo << "\": 1";
+    }
+    os << "}";
+  }
+
+  os << "};";
+}
+
+/// \return JavaScript for an option to only show relevant lines.
+static std::string showRelevantLinesJavascript(
+      const PathDiagnostic &D, const PathPieces &path) {
+  std::string s;
+  llvm::raw_string_ostream os(s);
+  os << "<script type='text/javascript'>\n";
+  serializeExecutedLines(D, path, os);
+  os << R"<<<(
+
+var filterCounterexample = function (hide) {
+  var tables = document.getElementsByClassName("code");
+  for (var t=0; t<tables.length; t++) {
+    var table = tables[t];
+    var file_id = table.getAttribute("data-fileid");
+    var lines_in_fid = relevant_lines[file_id];
+    if (!lines_in_fid) {
+      lines_in_fid = {};
+    }
+    var lines = table.getElementsByClassName("codeline");
+    for (var i=0; i<lines.length; i++) {
+        var el = lines[i];
+        var lineNo = el.getAttribute("data-linenumber");
+        if (!lines_in_fid[lineNo]) {
+          if (hide) {
+            el.setAttribute("hidden", "");
+          } else {
+            el.removeAttribute("hidden");
+          }
+        }
+    }
+  }
+}
+
+window.addEventListener("keydown", function (event) {
+  if (event.defaultPrevented) {
+    return;
+  }
+  if (event.key == "S") {
+    var checked = document.getElementsByName("showCounterexample")[0].checked;
+    filterCounterexample(!checked);
+    document.getElementsByName("showCounterexample")[0].checked = !checked;
+  } else {
+    return;
+  }
+  event.preventDefault();
+}, true);
+
+document.addEventListener("DOMContentLoaded", function() {
+    document.querySelector('input[name="showCounterexample"]').onchange=
+        function (event) {
+      filterCounterexample(this.checked);
+    };
+});
+</script>
+
+<form>
+    <input type="checkbox" name="showCounterexample" id="showCounterexample" />
+    <label for="showCounterexample">
+       Show only relevant lines
+    </label>
+</form>
+)<<<";
+
+  return os.str();
+}
+
 void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
     const SourceManager& SMgr, const PathPieces& path, FileID FID,
     const FileEntry *Entry, const char *declName) {
@@ -337,6 +457,15 @@
   int LineNumber = path.back()->getLocation().asLocation().getExpansionLineNumber();
   int ColumnNumber = path.back()->getLocation().asLocation().getExpansionColumnNumber();
 
+  R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), showHelpJavascript());
+
+  R.InsertTextBefore(SMgr.getLocForStartOfFile(FID),
+                     generateKeyboardNavigationJavascript());
+
+  // Checkbox and javascript for filtering the output to the counterexample.
+  R.InsertTextBefore(SMgr.getLocForStartOfFile(FID),
+                     showRelevantLinesJavascript(D, path));
+
   // Add the name of the file as an <h1> tag.
   {
     std::string s;
@@ -373,14 +502,33 @@
 
     // Output any other meta data.
 
-    for (PathDiagnostic::meta_iterator I=D.meta_begin(), E=D.meta_end();
-         I!=E; ++I) {
+    for (PathDiagnostic::meta_iterator I = D.meta_begin(), E = D.meta_end();
+         I != E; ++I) {
       os << "<tr><td></td><td>" << html::EscapeText(*I) << "</td></tr>\n";
     }
 
-    os << "</table>\n<!-- REPORTSUMMARYEXTRA -->\n"
-          "<h3>Annotated Source Code</h3>\n";
-
+    os << R"<<<(
+</table>
+<!-- REPORTSUMMARYEXTRA -->
+<h3>Annotated Source Code</h3>
+<p>Press <a href="#" onclick="toggleHelp(); return false;">'?'</a>
+   to see keyboard shortcuts</p>
+<input type="checkbox" class="spoilerhider" id="showinvocation" />
+<label for="showinvocation" >Show analyzer invocation</label>
+<div class="spoiler">clang -cc1 )<<<";
+    os << html::EscapeText(AnalyzerOpts.FullCompilerInvocation);
+    os << R"<<<(
+</div>
+<div id='tooltiphint' hidden="true">
+  <p>Keyboard shortcuts: </p>
+  <ul>
+    <li>Use 'j/k' keys for keyboard navigation</li>
+    <li>Use 'Shift+S' to show/hide relevant lines</li>
+    <li>Use '?' to toggle this window</li>
+  </ul>
+  <a href="#" onclick="toggleHelp(); return false;">Close</a>
+</div>
+)<<<";
     R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), os.str());
   }
 
@@ -438,6 +586,34 @@
   html::AddHeaderFooterInternalBuiltinCSS(R, FID, Entry->getName());
 }
 
+std::string HTMLDiagnostics::showHelpJavascript() {
+  return R"<<<(
+<script type='text/javascript'>
+
+var toggleHelp = function() {
+    var hint = document.querySelector("#tooltiphint");
+    var attributeName = "hidden";
+    if (hint.hasAttribute(attributeName)) {
+      hint.removeAttribute(attributeName);
+    } else {
+      hint.setAttribute("hidden", "true");
+    }
+};
+window.addEventListener("keydown", function (event) {
+  if (event.defaultPrevented) {
+    return;
+  }
+  if (event.key == "?") {
+    toggleHelp();
+  } else {
+    return;
+  }
+  event.preventDefault();
+});
+</script>
+)<<<";
+}
+
 void HTMLDiagnostics::RewriteFile(Rewriter &R, const SourceManager& SMgr,
     const PathPieces& path, FileID FID) {
   // Process the path.
@@ -482,7 +658,6 @@
 void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
                                   const PathDiagnosticPiece& P,
                                   unsigned num, unsigned max) {
-
   // For now, just draw a box above the line in question, and emit the
   // warning.
   FullSourceLoc Pos = P.getLocation().asLocation();
@@ -622,9 +797,7 @@
     os << "</td><td>";
   }
 
-  if (const PathDiagnosticMacroPiece *MP =
-        dyn_cast<PathDiagnosticMacroPiece>(&P)) {
-
+  if (const auto *MP = dyn_cast<PathDiagnosticMacroPiece>(&P)) {
     os << "Within the expansion of the macro '";
 
     // Get the name of the macro by relexing it.
@@ -695,10 +868,8 @@
 
   // Now highlight the ranges.
   ArrayRef<SourceRange> Ranges = P.getRanges();
-  for (ArrayRef<SourceRange>::iterator I = Ranges.begin(),
-                                       E = Ranges.end(); I != E; ++I) {
-    HighlightRange(R, LPosInfo.first, *I);
-  }
+  for (const auto &Range : Ranges)
+    HighlightRange(R, LPosInfo.first, Range);
 }
 
 static void EmitAlphaCounter(raw_ostream &os, unsigned n) {
@@ -714,18 +885,13 @@
 unsigned HTMLDiagnostics::ProcessMacroPiece(raw_ostream &os,
                                             const PathDiagnosticMacroPiece& P,
                                             unsigned num) {
-
-  for (PathPieces::const_iterator I = P.subPieces.begin(), E=P.subPieces.end();
-        I!=E; ++I) {
-
-    if (const PathDiagnosticMacroPiece *MP =
-            dyn_cast<PathDiagnosticMacroPiece>(I->get())) {
+  for (const auto &subPiece : P.subPieces) {
+    if (const auto *MP = dyn_cast<PathDiagnosticMacroPiece>(subPiece.get())) {
       num = ProcessMacroPiece(os, *MP, num);
       continue;
     }
 
-    if (PathDiagnosticEventPiece *EP =
-            dyn_cast<PathDiagnosticEventPiece>(I->get())) {
+    if (const auto *EP = dyn_cast<PathDiagnosticEventPiece>(subPiece.get())) {
       os << "<div class=\"msg msgEvent\" style=\"width:94%; "
             "margin-left:5px\">"
             "<table class=\"msgT\"><tr>"
@@ -777,3 +943,82 @@
 
   html::HighlightRange(R, InstantiationStart, E, HighlightStart, HighlightEnd);
 }
+
+std::string HTMLDiagnostics::generateKeyboardNavigationJavascript() {
+  return R"<<<(
+<script type='text/javascript'>
+var digitMatcher = new RegExp("[0-9]+");
+
+document.addEventListener("DOMContentLoaded", function() {
+    document.querySelectorAll(".PathNav > a").forEach(
+        function(currentValue, currentIndex) {
+            var hrefValue = currentValue.getAttribute("href");
+            currentValue.onclick = function() {
+                scrollTo(document.querySelector(hrefValue));
+                return false;
+            };
+        });
+});
+
+var findNum = function() {
+    var s = document.querySelector(".selected");
+    if (!s || s.id == "EndPath") {
+        return 0;
+    }
+    var out = parseInt(digitMatcher.exec(s.id)[0]);
+    return out;
+};
+
+var scrollTo = function(el) {
+    document.querySelectorAll(".selected").forEach(function(s) {
+        s.classList.remove("selected");
+    });
+    el.classList.add("selected");
+    window.scrollBy(0, el.getBoundingClientRect().top -
+        (window.innerHeight / 2));
+}
+
+var move = function(num, up, numItems) {
+  if (num == 1 && up || num == numItems - 1 && !up) {
+    return 0;
+  } else if (num == 0 && up) {
+    return numItems - 1;
+  } else if (num == 0 && !up) {
+    return 1 % numItems;
+  }
+  return up ? num - 1 : num + 1;
+}
+
+var numToId = function(num) {
+  if (num == 0) {
+    return document.getElementById("EndPath")
+  }
+  return document.getElementById("Path" + num);
+};
+
+var navigateTo = function(up) {
+  var numItems = document.querySelectorAll(".line > .msg").length;
+  var currentSelected = findNum();
+  var newSelected = move(currentSelected, up, numItems);
+  var newEl = numToId(newSelected, numItems);
+
+  // Scroll element into center.
+  scrollTo(newEl);
+};
+
+window.addEventListener("keydown", function (event) {
+  if (event.defaultPrevented) {
+    return;
+  }
+  if (event.key == "j") {
+    navigateTo(/*up=*/false);
+  } else if (event.key == "k") {
+    navigateTo(/*up=*/true);
+  } else {
+    return;
+  }
+  event.preventDefault();
+}, true);
+</script>
+  )<<<";
+}
diff --git a/lib/StaticAnalyzer/Core/IssueHash.cpp b/lib/StaticAnalyzer/Core/IssueHash.cpp
index abdea88..274ebe7 100644
--- a/lib/StaticAnalyzer/Core/IssueHash.cpp
+++ b/lib/StaticAnalyzer/Core/IssueHash.cpp
@@ -33,6 +33,13 @@
     return "";
   std::string Signature;
 
+  // When a flow sensitive bug happens in templated code we should not generate
+  // distinct hash value for every instantiation. Use the signature from the
+  // primary template.
+  if (const FunctionDecl *InstantiatedFrom =
+          Target->getTemplateInstantiationPattern())
+    Target = InstantiatedFrom;
+
   if (!isa<CXXConstructorDecl>(Target) && !isa<CXXDestructorDecl>(Target) &&
       !isa<CXXConversionDecl>(Target))
     Signature.append(Target->getReturnType().getAsString()).append(" ");
diff --git a/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
index 98b6ebd..a8c4b05 100644
--- a/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
+++ b/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
@@ -208,9 +208,16 @@
     return false;
 
   auto CounterVar = Matches[0].getNodeAs<VarDecl>("initVarName");
-  auto BoundNum = Matches[0].getNodeAs<IntegerLiteral>("boundNum")->getValue();
-  auto InitNum = Matches[0].getNodeAs<IntegerLiteral>("initNum")->getValue();
+  llvm::APInt BoundNum =
+      Matches[0].getNodeAs<IntegerLiteral>("boundNum")->getValue();
+  llvm::APInt InitNum =
+      Matches[0].getNodeAs<IntegerLiteral>("initNum")->getValue();
   auto CondOp = Matches[0].getNodeAs<BinaryOperator>("conditionOperator");
+  if (InitNum.getBitWidth() != BoundNum.getBitWidth()) {
+    InitNum = InitNum.zextOrSelf(BoundNum.getBitWidth());
+    BoundNum = BoundNum.zextOrSelf(InitNum.getBitWidth());
+  }
+
   if (CondOp->getOpcode() == BO_GE || CondOp->getOpcode() == BO_LE)
     maxStep = (BoundNum - InitNum + 1).abs().getZExtValue();
   else
diff --git a/lib/StaticAnalyzer/Core/MemRegion.cpp b/lib/StaticAnalyzer/Core/MemRegion.cpp
index 640e63d..1562ff1 100644
--- a/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -23,6 +23,11 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+#include<functional>
+
+#define DEBUG_TYPE "MemRegion"
 
 using namespace clang;
 using namespace ento;
@@ -103,15 +108,15 @@
 //===----------------------------------------------------------------------===//
 
 bool SubRegion::isSubRegionOf(const MemRegion* R) const {
-  const MemRegion* r = getSuperRegion();
-  while (r != nullptr) {
+  const MemRegion* r = this;
+  do {
     if (r == R)
       return true;
     if (const SubRegion* sr = dyn_cast<SubRegion>(r))
       r = sr->getSuperRegion();
     else
       break;
-  }
+  } while (r != nullptr);
   return false;
 }
 
@@ -472,6 +477,8 @@
 }
 
 void SymbolicRegion::dumpToStream(raw_ostream &os) const {
+  if (isa<HeapSpaceRegion>(getSuperRegion()))
+    os << "Heap";
   os << "SymRegion{" << sym << '}';
 }
 
@@ -767,9 +774,9 @@
       for (BlockDataRegion::referenced_vars_iterator
            I = BR->referenced_vars_begin(),
            E = BR->referenced_vars_end(); I != E; ++I) {
-        if (const VarRegion *VR = dyn_cast<VarRegion>(I.getOriginalRegion()))
-          if (VR->getDecl() == VD)
-            return cast<VarRegion>(I.getCapturedRegion());
+        const VarRegion *VR = I.getOriginalRegion();
+        if (VR->getDecl() == VD)
+          return cast<VarRegion>(I.getCapturedRegion());
       }
     }
 
@@ -1147,6 +1154,36 @@
   return nullptr;
 }
 
+/// Perform a given operation on two integers, return whether it overflows.
+/// Optionally write the resulting output into \p Res.
+static bool checkedOp(
+    int64_t LHS,
+    int64_t RHS,
+    std::function<llvm::APInt(llvm::APInt *, const llvm::APInt &, bool &)> Op,
+    int64_t *Res = nullptr) {
+  llvm::APInt ALHS(/*BitSize=*/64, LHS, /*Signed=*/true);
+  llvm::APInt ARHS(/*BitSize=*/64, RHS, /*Signed=*/true);
+  bool Overflow;
+  llvm::APInt Out = Op(&ALHS, ARHS, Overflow);
+  if (!Overflow && Res)
+    *Res = Out.getSExtValue();
+  return Overflow;
+}
+
+static bool checkedAdd(
+    int64_t LHS,
+    int64_t RHS,
+    int64_t *Res=nullptr) {
+  return checkedOp(LHS, RHS, &llvm::APInt::sadd_ov, Res);
+}
+
+static bool checkedMul(
+    int64_t LHS,
+    int64_t RHS,
+    int64_t *Res=nullptr) {
+  return checkedOp(LHS, RHS, &llvm::APInt::smul_ov, Res);
+}
+
 RegionRawOffset ElementRegion::getAsArrayOffset() const {
   CharUnits offset = CharUnits::Zero();
   const ElementRegion *ER = this;
@@ -1174,6 +1211,18 @@
         }
 
         CharUnits size = C.getTypeSizeInChars(elemType);
+
+        int64_t Mult;
+        bool Overflow = checkedAdd(i, size.getQuantity(), &Mult);
+        if (!Overflow)
+          Overflow = checkedMul(Mult, offset.getQuantity());
+        if (Overflow) {
+          DEBUG(llvm::dbgs() << "MemRegion::getAsArrayOffset: "
+                             << "offset overflowing, returning unknown\n");
+
+          return nullptr;
+        }
+
         offset += (i * size);
       }
 
diff --git a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index 9077556..8119e3a 100644
--- a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -98,20 +98,18 @@
 
 PathDiagnostic::~PathDiagnostic() {}
 
-PathDiagnostic::PathDiagnostic(StringRef CheckName, const Decl *declWithIssue,
-                               StringRef bugtype, StringRef verboseDesc,
-                               StringRef shortDesc, StringRef category,
-                               PathDiagnosticLocation LocationToUnique,
-                               const Decl *DeclToUnique)
-  : CheckName(CheckName),
-    DeclWithIssue(declWithIssue),
-    BugType(StripTrailingDots(bugtype)),
-    VerboseDesc(StripTrailingDots(verboseDesc)),
-    ShortDesc(StripTrailingDots(shortDesc)),
-    Category(StripTrailingDots(category)),
-    UniqueingLoc(LocationToUnique),
-    UniqueingDecl(DeclToUnique),
-    path(pathImpl) {}
+PathDiagnostic::PathDiagnostic(
+    StringRef CheckName, const Decl *declWithIssue, StringRef bugtype,
+    StringRef verboseDesc, StringRef shortDesc, StringRef category,
+    PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique,
+    std::unique_ptr<FilesToLineNumsMap> ExecutedLines)
+    : CheckName(CheckName), DeclWithIssue(declWithIssue),
+      BugType(StripTrailingDots(bugtype)),
+      VerboseDesc(StripTrailingDots(verboseDesc)),
+      ShortDesc(StripTrailingDots(shortDesc)),
+      Category(StripTrailingDots(category)), UniqueingLoc(LocationToUnique),
+      UniqueingDecl(DeclToUnique), ExecutedLines(std::move(ExecutedLines)),
+      path(pathImpl) {}
 
 static PathDiagnosticCallPiece *
 getFirstStackedCallToHeaderFile(PathDiagnosticCallPiece *CP,
@@ -381,11 +379,25 @@
   return None;
 }
 
+static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) {
+  std::pair<FileID, unsigned> XOffs = XL.getDecomposedLoc();
+  std::pair<FileID, unsigned> YOffs = YL.getDecomposedLoc();
+  const SourceManager &SM = XL.getManager();
+  std::pair<bool, bool> InSameTU = SM.isInTheSameTranslationUnit(XOffs, YOffs);
+  if (InSameTU.first)
+    return XL.isBeforeInTranslationUnitThan(YL);
+  const FileEntry *XFE = SM.getFileEntryForID(XL.getFileID());
+  const FileEntry *YFE = SM.getFileEntryForID(YL.getFileID());
+  if (!XFE || !YFE)
+    return XFE && !YFE;
+  return XFE->getName() < YFE->getName();
+}
+
 static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) {
   FullSourceLoc XL = X.getLocation().asLocation();
   FullSourceLoc YL = Y.getLocation().asLocation();
   if (XL != YL)
-    return XL.isBeforeInTranslationUnitThan(YL);
+    return compareCrossTUSourceLocs(XL, YL);
   if (X.getBugType() != Y.getBugType())
     return X.getBugType() < Y.getBugType();
   if (X.getCategory() != Y.getCategory())
@@ -405,7 +417,8 @@
     SourceLocation YDL = YD->getLocation();
     if (XDL != YDL) {
       const SourceManager &SM = XL.getManager();
-      return SM.isBeforeInTranslationUnit(XDL, YDL);
+      return compareCrossTUSourceLocs(FullSourceLoc(XDL, SM),
+                                      FullSourceLoc(YDL, SM));
     }
   }
   PathDiagnostic::meta_iterator XI = X.meta_begin(), XE = X.meta_end();
@@ -553,6 +566,7 @@
 
   switch (Source.getKind()) {
   case CFGElement::Statement:
+  case CFGElement::Constructor:
     return PathDiagnosticLocation(Source.castAs<CFGStmt>().getStmt(),
                                   SM, CallerCtx);
   case CFGElement::Initializer: {
@@ -576,9 +590,18 @@
       return PathDiagnosticLocation::createEnd(CallerBody, SM, CallerCtx);
     return PathDiagnosticLocation::create(CallerInfo->getDecl(), SM);
   }
-  case CFGElement::TemporaryDtor:
-  case CFGElement::NewAllocator:
-    llvm_unreachable("not yet implemented!");
+  case CFGElement::NewAllocator: {
+    const CFGNewAllocator &Alloc = Source.castAs<CFGNewAllocator>();
+    return PathDiagnosticLocation(Alloc.getAllocatorExpr(), SM, CallerCtx);
+  }
+  case CFGElement::TemporaryDtor: {
+    // Temporary destructors are for temporaries. They die immediately at around
+    // the location of CXXBindTemporaryExpr. If they are lifetime-extended,
+    // they'd be dealt with via an AutomaticObjectDtor instead.
+    const auto &Dtor = Source.castAs<CFGTemporaryDtor>();
+    return PathDiagnosticLocation::createEnd(Dtor.getBindTemporaryExpr(), SM,
+                                             CallerCtx);
+  }
   case CFGElement::LifetimeEnds:
   case CFGElement::LoopExit:
     llvm_unreachable("CFGElement kind should not be on callsite!");
@@ -690,6 +713,15 @@
     return getLocationForCaller(CEE->getCalleeContext(),
                                 CEE->getLocationContext(),
                                 SMng);
+  } else if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) {
+    CFGElement BlockFront = BE->getBlock()->front();
+    if (auto StmtElt = BlockFront.getAs<CFGStmt>()) {
+      return PathDiagnosticLocation(StmtElt->getStmt()->getLocStart(), SMng);
+    } else if (auto NewAllocElt = BlockFront.getAs<CFGNewAllocator>()) {
+      return PathDiagnosticLocation(
+          NewAllocElt->getAllocatorExpr()->getLocStart(), SMng);
+    }
+    llvm_unreachable("Unexpected CFG element at front of block");
   } else {
     llvm_unreachable("Unexpected ProgramPoint");
   }
@@ -732,6 +764,8 @@
     return CEE->getCalleeContext()->getCallSite();
   if (Optional<PostInitializer> PIPP = P.getAs<PostInitializer>())
     return PIPP->getInitializer()->getInit();
+  if (Optional<CallExitBegin> CEB = P.getAs<CallExitBegin>())
+    return CEB->getReturnStmt();
 
   return nullptr;
 }
@@ -1175,6 +1209,9 @@
 StackHintGenerator::~StackHintGenerator() {}
 
 std::string StackHintGeneratorForSymbol::getMessage(const ExplodedNode *N){
+  if (!N)
+    return getMessageForSymbolNotFound();
+
   ProgramPoint P = N->getLocation();
   CallExitEnd CExit = P.castAs<CallExitEnd>();
 
@@ -1184,16 +1221,11 @@
   if (!CE)
     return "";
 
-  if (!N)
-    return getMessageForSymbolNotFound();
-
   // Check if one of the parameters are set to the interesting symbol.
-  ProgramStateRef State = N->getState();
-  const LocationContext *LCtx = N->getLocationContext();
   unsigned ArgIndex = 0;
   for (CallExpr::const_arg_iterator I = CE->arg_begin(),
                                     E = CE->arg_end(); I != E; ++I, ++ArgIndex){
-    SVal SV = State->getSVal(*I, LCtx);
+    SVal SV = N->getSVal(*I);
 
     // Check if the variable corresponding to the symbol is passed by value.
     SymbolRef AS = SV.getAsLocSymbol();
@@ -1203,7 +1235,10 @@
 
     // Check if the parameter is a pointer to the symbol.
     if (Optional<loc::MemRegionVal> Reg = SV.getAs<loc::MemRegionVal>()) {
-      SVal PSV = State->getSVal(Reg->getRegion());
+      // Do not attempt to dereference void*.
+      if ((*I)->getType()->isVoidPointerType())
+        continue;
+      SVal PSV = N->getState()->getSVal(Reg->getRegion());
       SymbolRef AS = PSV.getAsLocSymbol();
       if (AS == Sym) {
         return getMessageForArg(*I, ArgIndex);
@@ -1212,7 +1247,7 @@
   }
 
   // Check if we are returning the interesting symbol.
-  SVal SV = State->getSVal(CE, LCtx);
+  SVal SV = N->getSVal(CE);
   SymbolRef RetSym = SV.getAsLocSymbol();
   if (RetSym == Sym) {
     return getMessageForReturn(CE);
diff --git a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
index 66812ed..d3e7fe7 100644
--- a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
+++ b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
@@ -16,9 +16,12 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Rewrite/Core/HTMLRewrite.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/IssueHash.h"
 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Casting.h"
 using namespace clang;
@@ -30,6 +33,7 @@
     const std::string OutputFile;
     const LangOptions &LangOpts;
     const bool SupportsCrossFileDiagnostics;
+    const bool SerializeStatistics;
   public:
     PlistDiagnostics(AnalyzerOptions &AnalyzerOpts,
                      const std::string& prefix,
@@ -61,7 +65,8 @@
                                    bool supportsMultipleFiles)
   : OutputFile(output),
     LangOpts(LO),
-    SupportsCrossFileDiagnostics(supportsMultipleFiles) {}
+    SupportsCrossFileDiagnostics(supportsMultipleFiles),
+    SerializeStatistics(AnalyzerOpts.shouldSerializeStats()) {}
 
 void ento::createPlistDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts,
                                          PathDiagnosticConsumers &C,
@@ -484,6 +489,15 @@
 
   o << " </array>\n";
 
+  if (llvm::AreStatisticsEnabled() && SerializeStatistics) {
+    o << " <key>statistics</key>\n";
+    std::string stats;
+    llvm::raw_string_ostream os(stats);
+    llvm::PrintStatisticsJSON(os);
+    os.flush();
+    EmitString(o, html::EscapeText(stats)) << '\n';
+  }
+
   // Finish.
   o << "</dict>\n</plist>";
 }
diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp
index 3215c3c..deb2e4a 100644
--- a/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -17,6 +17,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
@@ -254,13 +255,15 @@
 }
 
 SVal ProgramState::getSVal(Loc location, QualType T) const {
-  SVal V = getRawSVal(cast<Loc>(location), T);
+  SVal V = getRawSVal(location, T);
 
   // If 'V' is a symbolic value that is *perfectly* constrained to
   // be a constant value, use that value instead to lessen the burden
   // on later analysis stages (so we have less symbolic values to reason
   // about).
-  if (!T.isNull()) {
+  // We only go into this branch if we can convert the APSInt value we have
+  // to the type of T, which is not always the case (e.g. for void).
+  if (!T.isNull() && (T->isIntegralOrEnumerationType() || Loc::isLocType(T))) {
     if (SymbolRef sym = V.getAsSymbol()) {
       if (const llvm::APSInt *Int = getStateManager()
                                     .getConstraintManager()
@@ -352,6 +355,17 @@
   return CM.assume(this, inBound.castAs<DefinedSVal>(), Assumption);
 }
 
+ConditionTruthVal ProgramState::isNonNull(SVal V) const {
+  ConditionTruthVal IsNull = isNull(V);
+  if (IsNull.isUnderconstrained())
+    return IsNull;
+  return ConditionTruthVal(!IsNull.getValue());
+}
+
+ConditionTruthVal ProgramState::areEqual(SVal Lhs, SVal Rhs) const {
+  return stateMgr->getSValBuilder().areEqual(this, Lhs, Rhs);
+}
+
 ConditionTruthVal ProgramState::isNull(SVal V) const {
   if (V.isZeroConstant())
     return true;
@@ -424,24 +438,30 @@
 //  State pretty-printing.
 //===----------------------------------------------------------------------===//
 
-void ProgramState::print(raw_ostream &Out,
-                         const char *NL, const char *Sep) const {
+void ProgramState::print(raw_ostream &Out, const char *NL, const char *Sep,
+                         const LocationContext *LC) const {
   // Print the store.
   ProgramStateManager &Mgr = getStateManager();
   Mgr.getStoreManager().print(getStore(), Out, NL, Sep);
 
   // Print out the environment.
-  Env.print(Out, NL, Sep);
+  Env.print(Out, NL, Sep, LC);
 
   // Print out the constraints.
   Mgr.getConstraintManager().print(this, Out, NL, Sep);
 
+  // Print out the tracked dynamic types.
+  printDynamicTypeInfo(this, Out, NL, Sep);
+
+  // Print out tainted symbols.
+  printTaint(Out, NL, Sep);
+
   // Print checker-specific data.
-  Mgr.getOwningEngine()->printState(Out, this, NL, Sep);
+  Mgr.getOwningEngine()->printState(Out, this, NL, Sep, LC);
 }
 
-void ProgramState::printDOT(raw_ostream &Out) const {
-  print(Out, "\\l", "\\|");
+void ProgramState::printDOT(raw_ostream &Out, const LocationContext *LC) const {
+  print(Out, "\\l", "\\|", LC);
 }
 
 LLVM_DUMP_METHOD void ProgramState::dump() const {
@@ -453,7 +473,7 @@
   TaintMapImpl TM = get<TaintMap>();
 
   if (!TM.isEmpty())
-    Out <<"Tainted Symbols:" << NL;
+    Out <<"Tainted symbols:" << NL;
 
   for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) {
     Out << I->first << " : " << I->second << NL;
@@ -779,8 +799,7 @@
           // complete. For example, this would not currently identify
           // overlapping fields in a union as tainted. To identify this we can
           // check for overlapping/nested byte offsets.
-          if (Kind == I.second &&
-              (R == I.first || R->isSubRegionOf(I.first)))
+          if (Kind == I.second && R->isSubRegionOf(I.first))
             return true;
         }
       }
diff --git a/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index e0ad2d8..5a4031c 100644
--- a/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -354,7 +354,8 @@
   RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
                          const llvm::APSInt &Int,
                          const llvm::APSInt &Adjustment);
-  RangeSet getSymLERange(const RangeSet &RS, const llvm::APSInt &Int,
+  RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
+                         const llvm::APSInt &Int,
                          const llvm::APSInt &Adjustment);
   RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
                          const llvm::APSInt &Int,
@@ -395,7 +396,9 @@
     }
 
     if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
-      if (BinaryOperator::isComparisonOp(SSE->getOpcode())) {
+      // FIXME: Handle <=> here.
+      if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
+          BinaryOperator::isRelationalOp(SSE->getOpcode())) {
         // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
         if (Loc::isLocType(SSE->getLHS()->getType())) {
           assert(Loc::isLocType(SSE->getRHS()->getType()));
@@ -460,6 +463,53 @@
   return Changed ? State->set<ConstraintRange>(CR) : State;
 }
 
+/// Return a range set subtracting zero from \p Domain.
+static RangeSet assumeNonZero(
+    BasicValueFactory &BV,
+    RangeSet::Factory &F,
+    SymbolRef Sym,
+    RangeSet Domain) {
+  APSIntType IntType = BV.getAPSIntType(Sym->getType());
+  return Domain.Intersect(BV, F, ++IntType.getZeroValue(),
+      --IntType.getZeroValue());
+}
+
+/// \brief Apply implicit constraints for bitwise OR- and AND-.
+/// For unsigned types, bitwise OR with a constant always returns
+/// a value greater-or-equal than the constant, and bitwise AND
+/// returns a value less-or-equal then the constant.
+///
+/// Pattern matches the expression \p Sym against those rule,
+/// and applies the required constraints.
+/// \p Input Previously established expression range set
+static RangeSet applyBitwiseConstraints(
+    BasicValueFactory &BV,
+    RangeSet::Factory &F,
+    RangeSet Input,
+    const SymIntExpr* SIE) {
+  QualType T = SIE->getType();
+  bool IsUnsigned = T->isUnsignedIntegerType();
+  const llvm::APSInt &RHS = SIE->getRHS();
+  const llvm::APSInt &Zero = BV.getAPSIntType(T).getZeroValue();
+  BinaryOperator::Opcode Operator = SIE->getOpcode();
+
+  // For unsigned types, the output of bitwise-or is bigger-or-equal than RHS.
+  if (Operator == BO_Or && IsUnsigned)
+    return Input.Intersect(BV, F, RHS, BV.getMaxValue(T));
+
+  // Bitwise-or with a non-zero constant is always non-zero.
+  if (Operator == BO_Or && RHS != Zero)
+    return assumeNonZero(BV, F, SIE, Input);
+
+  // For unsigned types, or positive RHS,
+  // bitwise-and output is always smaller-or-equal than RHS (assuming two's
+  // complement representation of signed types).
+  if (Operator == BO_And && (IsUnsigned || RHS >= Zero))
+    return Input.Intersect(BV, F, BV.getMinValue(T), RHS);
+
+  return Input;
+}
+
 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
                                           SymbolRef Sym) {
   if (ConstraintRangeTy::data_type *V = State->get<ConstraintRange>(Sym))
@@ -472,12 +522,13 @@
 
   RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T));
 
-  // Special case: references are known to be non-zero.
-  if (T->isReferenceType()) {
-    APSIntType IntType = BV.getAPSIntType(T);
-    Result = Result.Intersect(BV, F, ++IntType.getZeroValue(),
-                              --IntType.getZeroValue());
-  }
+  // References are known to be non-zero.
+  if (T->isReferenceType())
+    return assumeNonZero(BV, F, Sym, Result);
+
+  // Known constraints on ranges of bitwise expressions.
+  if (const SymIntExpr* SIE = dyn_cast<SymIntExpr>(Sym))
+    return applyBitwiseConstraints(BV, F, Result, SIE);
 
   return Result;
 }
@@ -637,9 +688,10 @@
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-RangeSet RangeConstraintManager::getSymLERange(const RangeSet &RS,
-                                               const llvm::APSInt &Int,
-                                               const llvm::APSInt &Adjustment) {
+RangeSet RangeConstraintManager::getSymLERange(
+      llvm::function_ref<RangeSet()> RS,
+      const llvm::APSInt &Int,
+      const llvm::APSInt &Adjustment) {
   // Before we do any real work, see if the value can even show up.
   APSIntType AdjustmentType(Adjustment);
   switch (AdjustmentType.testInRange(Int, true)) {
@@ -648,48 +700,27 @@
   case APSIntType::RTR_Within:
     break;
   case APSIntType::RTR_Above:
-    return RS;
+    return RS();
   }
 
   // Special case for Int == Max. This is always feasible.
   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
   llvm::APSInt Max = AdjustmentType.getMaxValue();
   if (ComparisonVal == Max)
-    return RS;
+    return RS();
 
   llvm::APSInt Min = AdjustmentType.getMinValue();
   llvm::APSInt Lower = Min - Adjustment;
   llvm::APSInt Upper = ComparisonVal - Adjustment;
 
-  return RS.Intersect(getBasicVals(), F, Lower, Upper);
+  return RS().Intersect(getBasicVals(), F, Lower, Upper);
 }
 
 RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
                                                SymbolRef Sym,
                                                const llvm::APSInt &Int,
                                                const llvm::APSInt &Adjustment) {
-  // Before we do any real work, see if the value can even show up.
-  APSIntType AdjustmentType(Adjustment);
-  switch (AdjustmentType.testInRange(Int, true)) {
-  case APSIntType::RTR_Below:
-    return F.getEmptySet();
-  case APSIntType::RTR_Within:
-    break;
-  case APSIntType::RTR_Above:
-    return getRange(St, Sym);
-  }
-
-  // Special case for Int == Max. This is always feasible.
-  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
-  llvm::APSInt Max = AdjustmentType.getMaxValue();
-  if (ComparisonVal == Max)
-    return getRange(St, Sym);
-
-  llvm::APSInt Min = AdjustmentType.getMinValue();
-  llvm::APSInt Lower = Min - Adjustment;
-  llvm::APSInt Upper = ComparisonVal - Adjustment;
-
-  return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+  return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
 }
 
 ProgramStateRef
@@ -706,8 +737,8 @@
   RangeSet New = getSymGERange(State, Sym, From, Adjustment);
   if (New.isEmpty())
     return nullptr;
-  New = getSymLERange(New, To, Adjustment);
-  return New.isEmpty() ? nullptr : State->set<ConstraintRange>(Sym, New);
+  RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
+  return Out.isEmpty() ? nullptr : State->set<ConstraintRange>(Sym, Out);
 }
 
 ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
diff --git a/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
index 1304116..55ff158 100644
--- a/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
@@ -33,7 +33,7 @@
     // We can only simplify expressions whose RHS is an integer.
 
     BinaryOperator::Opcode op = SIE->getOpcode();
-    if (BinaryOperator::isComparisonOp(op)) {
+    if (BinaryOperator::isComparisonOp(op) && op != BO_Cmp) {
       if (!Assumption)
         op = BinaryOperator::negateComparisonOp(op);
 
diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp
index 693a617..b752da4 100644
--- a/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -134,7 +134,9 @@
   };
 } // end llvm namespace
 
+#ifndef NDEBUG
 LLVM_DUMP_METHOD void BindingKey::dump() const { llvm::errs() << *this; }
+#endif
 
 //===----------------------------------------------------------------------===//
 // Actual Store type.
@@ -823,7 +825,7 @@
   FieldVector FieldsInSymbolicSubregions;
   if (TopKey.hasSymbolicOffset()) {
     getSymbolicOffsetFields(TopKey, FieldsInSymbolicSubregions);
-    Top = cast<SubRegion>(TopKey.getConcreteOffsetRegion());
+    Top = TopKey.getConcreteOffsetRegion();
     TopKey = BindingKey::Make(Top, BindingKey::Default);
   }
 
@@ -869,7 +871,7 @@
 
     } else if (NextKey.hasSymbolicOffset()) {
       const MemRegion *Base = NextKey.getConcreteOffsetRegion();
-      if (Top->isSubRegionOf(Base)) {
+      if (Top->isSubRegionOf(Base) && Top != Base) {
         // Case 3: The next key is symbolic and we just changed something within
         // its concrete region. We don't know if the binding is still valid, so
         // we'll be conservative and include it.
@@ -879,7 +881,7 @@
       } else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) {
         // Case 4: The next key is symbolic, but we changed a known
         // super-region. In this case the binding is certainly included.
-        if (Top == Base || BaseSR->isSubRegionOf(Top))
+        if (BaseSR->isSubRegionOf(Top))
           if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
             Bindings.push_back(*I);
       }
@@ -1399,15 +1401,12 @@
         T = TR->getLocationType()->getPointeeType();
       else if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
         T = SR->getSymbol()->getType()->getPointeeType();
-      else if (isa<AllocaRegion>(MR))
-        T = Ctx.VoidTy;
     }
     assert(!T.isNull() && "Unable to auto-detect binding type!");
-    if (T->isVoidType()) {
-      // When trying to dereference a void pointer, read the first byte.
-      T = Ctx.CharTy;
-    }
+    assert(!T->isVoidType() && "Attempting to dereference a void pointer!");
     MR = GetElementZeroRegion(cast<SubRegion>(MR), T);
+  } else {
+    T = cast<TypedValueRegion>(MR)->getValueType();
   }
 
   // FIXME: Perhaps this method should just take a 'const MemRegion*' argument
@@ -1777,7 +1776,7 @@
   // quickly result in a warning.
   bool hasPartialLazyBinding = false;
 
-  const SubRegion *SR = dyn_cast<SubRegion>(R);
+  const SubRegion *SR = R;
   while (SR) {
     const MemRegion *Base = SR->getSuperRegion();
     if (Optional<SVal> D = getBindingForDerivedDefaultValue(B, Base, R, Ty)) {
@@ -1862,11 +1861,18 @@
     return svalBuilder.getRegionValueSymbolVal(R);
 
   // Is 'VD' declared constant?  If so, retrieve the constant value.
-  if (VD->getType().isConstQualified())
-    if (const Expr *Init = VD->getInit())
+  if (VD->getType().isConstQualified()) {
+    if (const Expr *Init = VD->getInit()) {
       if (Optional<SVal> V = svalBuilder.getConstantVal(Init))
         return *V;
 
+      // If the variable is const qualified and has an initializer but
+      // we couldn't evaluate initializer to a value, treat the value as
+      // unknown.
+      return UnknownVal();
+    }
+  }
+
   // This must come after the check for constants because closure-captured
   // constant variables may appear in UnknownSpaceRegion.
   if (isa<UnknownSpaceRegion>(MS))
@@ -2126,9 +2132,10 @@
       NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
   }
 
-  // If the init list is shorter than the array length, set the
-  // array default value.
-  if (Size.hasValue() && i < Size.getValue())
+  // If the init list is shorter than the array length (or the array has
+  // variable length), set the array default value. Values that are already set
+  // are not overwritten.
+  if (!Size.hasValue() || i < Size.getValue())
     NewB = setImplicitDefaultValue(NewB, R, ElementTy);
 
   return NewB;
diff --git a/lib/StaticAnalyzer/Core/SValBuilder.cpp b/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 04452e3..ab5ec9f 100644
--- a/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -1,4 +1,4 @@
-// SValBuilder.cpp - Basic class for all SValBuilder implementations -*- C++ -*-
+//===- SValBuilder.cpp - Basic class for all SValBuilder implementations --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,12 +13,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <tuple>
 
 using namespace clang;
 using namespace ento;
@@ -27,7 +46,7 @@
 // Basic SVal creation.
 //===----------------------------------------------------------------------===//
 
-void SValBuilder::anchor() { }
+void SValBuilder::anchor() {}
 
 DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) {
   if (Loc::isLocType(type))
@@ -95,7 +114,7 @@
 }
 
 DefinedOrUnknownSVal
-SValBuilder::getRegionValueSymbolVal(const TypedValueRegion* region) {
+SValBuilder::getRegionValueSymbolVal(const TypedValueRegion *region) {
   QualType T = region->getValueType();
 
   if (T->isNullPtrType())
@@ -149,7 +168,6 @@
   return nonloc::SymbolVal(sym);
 }
 
-
 DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const Stmt *stmt,
                                                    const LocationContext *LCtx,
                                                    QualType type,
@@ -217,10 +235,10 @@
   return nonloc::SymbolVal(sym);
 }
 
-DefinedSVal SValBuilder::getMemberPointer(const DeclaratorDecl* DD) {
+DefinedSVal SValBuilder::getMemberPointer(const DeclaratorDecl *DD) {
   assert(!DD || isa<CXXMethodDecl>(DD) || isa<FieldDecl>(DD));
 
-  if (auto *MD = dyn_cast_or_null<CXXMethodDecl>(DD)) {
+  if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(DD)) {
     // Sema treats pointers to static member functions as have function pointer
     // type, so return a function pointer for the method.
     // We don't need to play a similar trick for static member fields
@@ -277,19 +295,19 @@
     return makeZeroVal(E->getType());
 
   case Stmt::ObjCStringLiteralClass: {
-    const ObjCStringLiteral *SL = cast<ObjCStringLiteral>(E);
+    const auto *SL = cast<ObjCStringLiteral>(E);
     return makeLoc(getRegionManager().getObjCStringRegion(SL));
   }
 
   case Stmt::StringLiteralClass: {
-    const StringLiteral *SL = cast<StringLiteral>(E);
+    const auto *SL = cast<StringLiteral>(E);
     return makeLoc(getRegionManager().getStringRegion(SL));
   }
 
   // Fast-path some expressions to avoid the overhead of going through the AST's
   // constant evaluator
   case Stmt::CharacterLiteralClass: {
-    const CharacterLiteral *C = cast<CharacterLiteral>(E);
+    const auto *C = cast<CharacterLiteral>(E);
     return makeIntVal(C->getValue(), C->getType());
   }
 
@@ -297,7 +315,7 @@
     return makeBoolVal(cast<CXXBoolLiteralExpr>(E));
 
   case Stmt::TypeTraitExprClass: {
-    const TypeTraitExpr *TE = cast<TypeTraitExpr>(E);
+    const auto *TE = cast<TypeTraitExpr>(E);
     return makeTruthVal(TE->getValue(), TE->getType());
   }
 
@@ -311,7 +329,7 @@
     return makeNull();
 
   case Stmt::ImplicitCastExprClass: {
-    const CastExpr *CE = cast<CastExpr>(E);
+    const auto *CE = cast<CastExpr>(E);
     switch (CE->getCastKind()) {
     default:
       break;
@@ -348,8 +366,6 @@
   }
 }
 
-//===----------------------------------------------------------------------===//
-
 SVal SValBuilder::makeSymExprValNN(ProgramStateRef State,
                                    BinaryOperator::Opcode Op,
                                    NonLoc LHS, NonLoc RHS,
@@ -378,10 +394,8 @@
   return UnknownVal();
 }
 
-
 SVal SValBuilder::evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op,
                             SVal lhs, SVal rhs, QualType type) {
-
   if (lhs.isUndef() || rhs.isUndef())
     return UndefinedVal();
 
@@ -413,10 +427,19 @@
                      type);
 }
 
+ConditionTruthVal SValBuilder::areEqual(ProgramStateRef state, SVal lhs,
+                                        SVal rhs) {
+  return state->isNonNull(evalEQ(state, lhs, rhs));
+}
+
+SVal SValBuilder::evalEQ(ProgramStateRef state, SVal lhs, SVal rhs) {
+  return evalBinOp(state, BO_EQ, lhs, rhs, getConditionType());
+}
+
 DefinedOrUnknownSVal SValBuilder::evalEQ(ProgramStateRef state,
                                          DefinedOrUnknownSVal lhs,
                                          DefinedOrUnknownSVal rhs) {
-  return evalBinOp(state, BO_EQ, lhs, rhs, getConditionType())
+  return evalEQ(state, static_cast<SVal>(lhs), static_cast<SVal>(rhs))
       .castAs<DefinedOrUnknownSVal>();
 }
 
@@ -454,7 +477,6 @@
 // of the original value is known to be greater than the max of the target type.
 SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
                                    QualType castTy, QualType originalTy) {
-
   // No truncations if target type is big enough.
   if (getContext().getTypeSize(castTy) >= getContext().getTypeSize(originalTy))
     return evalCast(val, castTy, originalTy);
@@ -548,8 +570,8 @@
   }
 
   // Check for casts from array type to another type.
-  if (const ArrayType *arrayT =
-                      dyn_cast<ArrayType>(originalTy.getCanonicalType())) {
+  if (const auto *arrayT =
+          dyn_cast<ArrayType>(originalTy.getCanonicalType())) {
     // We will always decay to a pointer.
     QualType elemTy = arrayT->getElementType();
     val = StateMgr.ArrayToPointer(val.castAs<Loc>(), elemTy);
diff --git a/lib/StaticAnalyzer/Core/SVals.cpp b/lib/StaticAnalyzer/Core/SVals.cpp
index a834214..a753765 100644
--- a/lib/StaticAnalyzer/Core/SVals.cpp
+++ b/lib/StaticAnalyzer/Core/SVals.cpp
@@ -1,4 +1,4 @@
-//= RValues.cpp - Abstract RValues for Path-Sens. Value Tracking -*- C++ -*-==//
+//===- RValues.cpp - Abstract RValues for Path-Sens. Value Tracking -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,20 +12,31 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
-#include "clang/AST/ExprObjC.h"
-#include "clang/Basic/IdentifierTable.h"
-#include "llvm/Support/raw_ostream.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
 using namespace clang;
 using namespace ento;
-using llvm::APSInt;
 
 //===----------------------------------------------------------------------===//
 // Symbol iteration within an SVal.
 //===----------------------------------------------------------------------===//
 
-
 //===----------------------------------------------------------------------===//
 // Utility methods.
 //===----------------------------------------------------------------------===//
@@ -39,7 +50,7 @@
 
   if (Optional<loc::MemRegionVal> RV = getAs<loc::MemRegionVal>()) {
     const MemRegion *R = RV->getRegion();
-    if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) {
+    if (const auto *SR = dyn_cast<SymbolicRegion>(R)) {
       SymbolRef sym = SR->getSymbol();
       if (isa<SymbolConjured>(sym))
         return true;
@@ -53,12 +64,12 @@
   if (Optional<loc::MemRegionVal> X = getAs<loc::MemRegionVal>()) {
     const MemRegion* R = X->getRegion();
     if (const FunctionCodeRegion *CTR = R->getAs<FunctionCodeRegion>())
-      if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CTR->getDecl()))
+      if (const auto *FD = dyn_cast<FunctionDecl>(CTR->getDecl()))
         return FD;
   }
 
   if (auto X = getAs<nonloc::PointerToMember>()) {
-    if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(X->getDecl()))
+    if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(X->getDecl()))
       return MD;
   }
   return nullptr;
@@ -95,8 +106,8 @@
 
   const MemRegion *R = X->getRegion();
 
-  while (const SubRegion *SR = dyn_cast<SubRegion>(R)) {
-    if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(SR))
+  while (const auto *SR = dyn_cast<SubRegion>(R)) {
+    if (const auto *SymR = dyn_cast<SymbolicRegion>(SR))
       return SymR->getSymbol();
     else
       R = SR->getSuperRegion();
@@ -189,14 +200,14 @@
 nonloc::PointerToMember::iterator nonloc::PointerToMember::begin() const {
   const PTMDataType PTMD = getPTMData();
   if (PTMD.is<const DeclaratorDecl *>())
-    return nonloc::PointerToMember::iterator();
+    return {};
   return PTMD.get<const PointerToMemberData *>()->begin();
 }
 
 nonloc::PointerToMember::iterator nonloc::PointerToMember::end() const {
   const PTMDataType PTMD = getPTMData();
   if (PTMD.is<const DeclaratorDecl *>())
-    return nonloc::PointerToMember::iterator();
+    return {};
   return PTMD.get<const PointerToMemberData *>()->end();
 }
 
@@ -220,7 +231,6 @@
   return isConstant(0);
 }
 
-
 //===----------------------------------------------------------------------===//
 // Transfer function dispatch for Non-Locs.
 //===----------------------------------------------------------------------===//
@@ -254,7 +264,6 @@
 SVal loc::ConcreteInt::evalBinOp(BasicValueFactory& BasicVals,
                                  BinaryOperator::Opcode Op,
                                  const loc::ConcreteInt& R) const {
-
   assert(BinaryOperator::isComparisonOp(Op) || Op == BO_Sub);
 
   const llvm::APSInt *X = BasicVals.evalAPSInt(Op, getValue(), R.getValue());
@@ -300,10 +309,10 @@
          << C.getValue().getBitWidth() << 'b';
       break;
     }
-    case nonloc::SymbolValKind: {
+    case nonloc::SymbolValKind:
       os << castAs<nonloc::SymbolVal>().getSymbol();
       break;
-    }
+
     case nonloc::LocAsIntegerKind: {
       const nonloc::LocAsInteger& C = castAs<nonloc::LocAsInteger>();
       os << C.getLoc() << " [as " << C.getNumBits() << " bit integer]";
@@ -313,14 +322,14 @@
       const nonloc::CompoundVal& C = castAs<nonloc::CompoundVal>();
       os << "compoundVal{";
       bool first = true;
-      for (nonloc::CompoundVal::iterator I=C.begin(), E=C.end(); I!=E; ++I) {
+      for (const auto &I : C) {
         if (first) {
           os << ' '; first = false;
         }
         else
           os << ", ";
 
-        (*I).dumpToStream(os);
+        I.dumpToStream(os);
       }
       os << "}";
       break;
@@ -353,7 +362,7 @@
       break;
     }
     default:
-      assert (false && "Pretty-printed not implemented for this NonLoc.");
+      assert(false && "Pretty-printed not implemented for this NonLoc.");
       break;
   }
 }
diff --git a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index a5b5744..0ec4a81 100644
--- a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -679,7 +679,7 @@
     if (SymbolRef rSym = rhs.getAsLocSymbol()) {
       // We can only build expressions with symbols on the left,
       // so we need a reversible operator.
-      if (!BinaryOperator::isComparisonOp(op))
+      if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
         return UnknownVal();
 
       const llvm::APSInt &lVal = lhs.castAs<loc::ConcreteInt>().getValue();
@@ -988,6 +988,12 @@
         elementType = resultTy->getPointeeType();
     }
 
+    // Represent arithmetic on void pointers as arithmetic on char pointers.
+    // It is fine when a TypedValueRegion of char value type represents
+    // a void pointer. Note that arithmetic on void pointers is a GCC extension.
+    if (elementType->isVoidType())
+      elementType = getContext().CharTy;
+
     if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) {
       return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
                                                        superR, getContext()));
diff --git a/lib/StaticAnalyzer/Core/Store.cpp b/lib/StaticAnalyzer/Core/Store.cpp
index 1af49f6..212dc59 100644
--- a/lib/StaticAnalyzer/Core/Store.cpp
+++ b/lib/StaticAnalyzer/Core/Store.cpp
@@ -1,4 +1,4 @@
-//== Store.cpp - Interface for maps from Locations to Values ----*- C++ -*--==//
+//===- Store.cpp - Interface for maps from Locations to Values ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,18 +12,37 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/CharUnits.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace clang;
 using namespace ento;
 
 StoreManager::StoreManager(ProgramStateManager &stateMgr)
-  : svalBuilder(stateMgr.getSValBuilder()), StateMgr(stateMgr),
-    MRMgr(svalBuilder.getRegionManager()), Ctx(stateMgr.getContext()) {}
+    : svalBuilder(stateMgr.getSValBuilder()), StateMgr(stateMgr),
+      MRMgr(svalBuilder.getRegionManager()), Ctx(stateMgr.getContext()) {}
 
 StoreRef StoreManager::enterStackFrame(Store OldStore,
                                        const CallEvent &Call,
@@ -33,11 +52,8 @@
   SmallVector<CallEvent::FrameBindingTy, 16> InitialBindings;
   Call.getInitialStackFrameContents(LCtx, InitialBindings);
 
-  for (CallEvent::BindingsTy::iterator I = InitialBindings.begin(),
-                                       E = InitialBindings.end();
-       I != E; ++I) {
-    Store = Bind(Store.getStore(), I->first, I->second);
-  }
+  for (const auto &I : InitialBindings)
+    Store = Bind(Store.getStore(), I.first, I.second);
 
   return Store;
 }
@@ -61,7 +77,6 @@
 }
 
 const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) {
-
   ASTContext &Ctx = StateMgr.getContext();
 
   // Handle casts to Objective-C objects.
@@ -92,7 +107,7 @@
 
   // Handle casts from compatible types.
   if (R->isBoundable())
-    if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
+    if (const auto *TR = dyn_cast<TypedValueRegion>(R)) {
       QualType ObjTy = Ctx.getCanonicalType(TR->getValueType());
       if (CanonPointeeTy == ObjTy)
         return R;
@@ -164,7 +179,7 @@
         // Edge case: we are at 0 bytes off the beginning of baseR.  We
         // check to see if type we are casting to is the same as the base
         // region.  If so, just return the base region.
-        if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(baseR)) {
+        if (const auto *TR = dyn_cast<TypedValueRegion>(baseR)) {
           QualType ObjTy = Ctx.getCanonicalType(TR->getValueType());
           QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy);
           if (CanonPointeeTy == ObjTy)
@@ -219,7 +234,7 @@
   if (!MR)
     return true;
 
-  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
+  const auto *TVR = dyn_cast<TypedValueRegion>(MR);
   if (!TVR)
     return true;
 
@@ -253,11 +268,9 @@
 SVal StoreManager::evalDerivedToBase(SVal Derived, const CXXBasePath &Path) {
   // Walk through the path to create nested CXXBaseRegions.
   SVal Result = Derived;
-  for (CXXBasePath::const_iterator I = Path.begin(), E = Path.end();
-       I != E; ++I) {
-    Result = evalDerivedToBase(Result, I->Base->getType(),
-                               I->Base->isVirtual());
-  }
+  for (const auto &I : Path)
+    Result = evalDerivedToBase(Result, I.Base->getType(),
+                               I.Base->isVirtual());
   return Result;
 }
 
@@ -286,9 +299,9 @@
 /// symbolic regions, where the dynamic type is merely bounded (and even then,
 /// only ostensibly!), but does not take advantage of any dynamic type info.
 static const CXXRecordDecl *getCXXRecordType(const MemRegion *MR) {
-  if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR))
+  if (const auto *TVR = dyn_cast<TypedValueRegion>(MR))
     return TVR->getValueType()->getAsCXXRecordDecl();
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
+  if (const auto *SR = dyn_cast<SymbolicRegion>(MR))
     return SR->getSymbol()->getType()->getPointeeCXXRecordDecl();
   return nullptr;
 }
@@ -327,7 +340,7 @@
         return evalDerivedToBase(loc::MemRegionVal(MR), Paths.front());
     }
 
-    if (const CXXBaseObjectRegion *BaseR = dyn_cast<CXXBaseObjectRegion>(MR)) {
+    if (const auto *BaseR = dyn_cast<CXXBaseObjectRegion>(MR)) {
       // Drill down the chain to get the derived classes.
       MR = BaseR->getSuperRegion();
       continue;
@@ -361,13 +374,11 @@
   return UnknownVal();
 }
 
-
 /// CastRetrievedVal - Used by subclasses of StoreManager to implement
 ///  implicit casts that arise from loads from regions that are reinterpreted
 ///  as another region.
 SVal StoreManager::CastRetrievedVal(SVal V, const TypedValueRegion *R,
                                     QualType castTy, bool performTestOnly) {
-
   if (castTy.isNull() || V.isUnknownOrUndef())
     return V;
 
@@ -421,7 +432,7 @@
 
   // NOTE: We must have this check first because ObjCIvarDecl is a subclass
   // of FieldDecl.
-  if (const ObjCIvarDecl *ID = dyn_cast<ObjCIvarDecl>(D))
+  if (const auto *ID = dyn_cast<ObjCIvarDecl>(D))
     return loc::MemRegionVal(MRMgr.getObjCIvarRegion(ID, BaseR));
 
   return loc::MemRegionVal(MRMgr.getFieldRegion(cast<FieldDecl>(D), BaseR));
@@ -433,25 +444,26 @@
 
 SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset,
                                     SVal Base) {
-
   // If the base is an unknown or undefined value, just return it back.
   // FIXME: For absolute pointer addresses, we just return that value back as
   //  well, although in reality we should return the offset added to that
   //  value. See also the similar FIXME in getLValueFieldOrIvar().
   if (Base.isUnknownOrUndef() || Base.getAs<loc::ConcreteInt>())
     return Base;
-
+  
+  if (Base.getAs<loc::GotoLabel>())
+    return UnknownVal();
+  
   const SubRegion *BaseRegion =
       Base.castAs<loc::MemRegionVal>().getRegionAs<SubRegion>();
 
   // Pointer of any type can be cast and used as array base.
-  const ElementRegion *ElemR = dyn_cast<ElementRegion>(BaseRegion);
+  const auto *ElemR = dyn_cast<ElementRegion>(BaseRegion);
 
   // Convert the offset to the appropriate size and signedness.
   Offset = svalBuilder.convertToArrayIndex(Offset).castAs<NonLoc>();
 
   if (!ElemR) {
-    //
     // If the base region is not an ElementRegion, create one.
     // This can happen in the following example:
     //
@@ -459,7 +471,6 @@
     //   p[1] = 8;
     //
     //  Observe that 'p' binds to an AllocaRegion.
-    //
     return loc::MemRegionVal(MRMgr.getElementRegion(elementType, Offset,
                                                     BaseRegion, Ctx));
   }
@@ -496,7 +507,7 @@
                                                   Ctx));
 }
 
-StoreManager::BindingsHandler::~BindingsHandler() {}
+StoreManager::BindingsHandler::~BindingsHandler() = default;
 
 bool StoreManager::FindUniqueBinding::HandleBinding(StoreManager& SMgr,
                                                     Store store,
diff --git a/lib/StaticAnalyzer/Core/SymbolManager.cpp b/lib/StaticAnalyzer/Core/SymbolManager.cpp
index f2d5ee8..d18f83c 100644
--- a/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -1,4 +1,4 @@
-//== SymbolManager.h - Management of Symbolic Values ------------*- C++ -*--==//
+//===- SymbolManager.h - Management of Symbolic Values --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,15 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Expr.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 using namespace clang;
 using namespace ento;
 
-void SymExpr::anchor() { }
+void SymExpr::anchor() {}
 
 LLVM_DUMP_METHOD void SymExpr::dump() const {
   dumpToStream(llvm::errs());
@@ -88,7 +100,7 @@
      << getRegion() << ',' << T.getAsString() << '}';
 }
 
-void SymbolData::anchor() { }
+void SymbolData::anchor() {}
 
 void SymbolRegionValue::dumpToStream(raw_ostream &os) const {
   os << "reg_$" << getSymbolID()
@@ -138,7 +150,7 @@
       itr.push_back(cast<IntSymExpr>(SE)->getRHS());
       return;
     case SymExpr::SymSymExprKind: {
-      const SymSymExpr *x = cast<SymSymExpr>(SE);
+      const auto *x = cast<SymSymExpr>(SE);
       itr.push_back(x->getLHS());
       itr.push_back(x->getRHS());
       return;
@@ -192,7 +204,6 @@
 const SymbolDerived*
 SymbolManager::getDerivedSymbol(SymbolRef parentSymbol,
                                 const TypedValueRegion *R) {
-
   llvm::FoldingSetNodeID profile;
   SymbolDerived::Profile(profile, parentSymbol, R);
   void *InsertPos;
@@ -227,7 +238,6 @@
 SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T,
                                  const LocationContext *LCtx,
                                  unsigned Count, const void *SymbolTag) {
-
   llvm::FoldingSetNodeID profile;
   SymbolMetadata::Profile(profile, R, S, T, LCtx, Count, SymbolTag);
   void *InsertPos;
@@ -382,11 +392,10 @@
   LI->second = HaveMarkedDependents;
 
   if (const SymbolRefSmallVectorTy *Deps = SymMgr.getDependentSymbols(sym)) {
-    for (SymbolRefSmallVectorTy::const_iterator I = Deps->begin(),
-                                                E = Deps->end(); I != E; ++I) {
-      if (TheLiving.find(*I) != TheLiving.end())
+    for (const auto I : *Deps) {
+      if (TheLiving.find(I) != TheLiving.end())
         continue;
-      markLive(*I);
+      markLive(I);
     }
   }
 }
@@ -405,7 +414,7 @@
 void SymbolReaper::markElementIndicesLive(const MemRegion *region) {
   for (auto SR = dyn_cast<SubRegion>(region); SR;
        SR = dyn_cast<SubRegion>(SR->getSuperRegion())) {
-    if (auto ER = dyn_cast<ElementRegion>(SR)) {
+    if (const auto ER = dyn_cast<ElementRegion>(SR)) {
       SVal Idx = ER->getIndex();
       for (auto SI = Idx.symbol_begin(), SE = Idx.symbol_end(); SI != SE; ++SI)
         markLive(*SI);
@@ -432,10 +441,10 @@
 
   MR = MR->getBaseRegion();
 
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
+  if (const auto *SR = dyn_cast<SymbolicRegion>(MR))
     return isLive(SR->getSymbol());
 
-  if (const VarRegion *VR = dyn_cast<VarRegion>(MR))
+  if (const auto *VR = dyn_cast<VarRegion>(MR))
     return isLive(VR, true);
 
   // FIXME: This is a gross over-approximation. What we really need is a way to
@@ -547,7 +556,7 @@
       return false;
 
     unsigned &cachedQuery =
-      const_cast<SymbolReaper*>(this)->includedRegionCache[VR];
+      const_cast<SymbolReaper *>(this)->includedRegionCache[VR];
 
     if (cachedQuery) {
       return cachedQuery == 1;
diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index fccea9e..6c379ed 100644
--- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -22,6 +22,7 @@
 #include "clang/Analysis/CallGraph.h"
 #include "clang/Analysis/CodeInjector.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/CrossTU/CrossTranslationUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/StaticAnalyzer/Checkers/LocalCheckers.h"
@@ -57,6 +58,8 @@
                       "with inlining turned on).");
 STATISTIC(NumBlocksInAnalyzedFunctions,
                       "The # of basic blocks in the analyzed functions.");
+STATISTIC(NumVisitedBlocksInAnalyzedFunctions,
+          "The # of visited basic blocks in the analyzed functions.");
 STATISTIC(PercentReachableBlocks, "The % of reachable basic blocks.");
 STATISTIC(MaxCFGSize, "The maximum number of basic blocks in a function.");
 
@@ -168,6 +171,7 @@
   AnalyzerOptionsRef Opts;
   ArrayRef<std::string> Plugins;
   CodeInjector *Injector;
+  cross_tu::CrossTranslationUnitContext CTU;
 
   /// \brief Stores the declarations from the local translation unit.
   /// Note, we pre-compute the local declarations at parse time as an
@@ -186,28 +190,31 @@
   std::unique_ptr<AnalysisManager> Mgr;
 
   /// Time the analyzes time of each translation unit.
-  static llvm::Timer* TUTotalTimer;
+  std::unique_ptr<llvm::TimerGroup> AnalyzerTimers;
+  std::unique_ptr<llvm::Timer> TUTotalTimer;
 
   /// The information about analyzed functions shared throughout the
   /// translation unit.
   FunctionSummariesTy FunctionSummaries;
 
-  AnalysisConsumer(const Preprocessor &pp, const std::string &outdir,
+  AnalysisConsumer(CompilerInstance &CI, const std::string &outdir,
                    AnalyzerOptionsRef opts, ArrayRef<std::string> plugins,
                    CodeInjector *injector)
-      : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr), PP(pp),
-        OutDir(outdir), Opts(std::move(opts)), Plugins(plugins),
-        Injector(injector) {
+      : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr),
+        PP(CI.getPreprocessor()), OutDir(outdir), Opts(std::move(opts)),
+        Plugins(plugins), Injector(injector), CTU(CI) {
     DigestAnalyzerOptions();
-    if (Opts->PrintStats) {
-      llvm::EnableStatistics(false);
-      TUTotalTimer = new llvm::Timer("time", "Analyzer Total Time");
+    if (Opts->PrintStats || Opts->shouldSerializeStats()) {
+      AnalyzerTimers = llvm::make_unique<llvm::TimerGroup>(
+          "analyzer", "Analyzer timers");
+      TUTotalTimer = llvm::make_unique<llvm::Timer>(
+          "time", "Analyzer total time", *AnalyzerTimers);
+      llvm::EnableStatistics(/* PrintOnExit= */ false);
     }
   }
 
   ~AnalysisConsumer() override {
     if (Opts->PrintStats) {
-      delete TUTotalTimer;
       llvm::PrintStatistics();
     }
   }
@@ -384,7 +391,10 @@
 
   /// \brief Check if we should skip (not analyze) the given function.
   AnalysisMode getModeForDecl(Decl *D, AnalysisMode Mode);
+  void runAnalysisOnTranslationUnit(ASTContext &C);
 
+  /// Print \p S to stderr if \c Opts->AnalyzerDisplayProgress is set.
+  void reportAnalyzerProgress(StringRef S);
 };
 } // end anonymous namespace
 
@@ -392,8 +402,6 @@
 //===----------------------------------------------------------------------===//
 // AnalysisConsumer implementation.
 //===----------------------------------------------------------------------===//
-llvm::Timer* AnalysisConsumer::TUTotalTimer = nullptr;
-
 bool AnalysisConsumer::HandleTopLevelDecl(DeclGroupRef DG) {
   storeTopLevelDecls(DG);
   return true;
@@ -508,68 +516,90 @@
   }
 }
 
+static bool isBisonFile(ASTContext &C) {
+  const SourceManager &SM = C.getSourceManager();
+  FileID FID = SM.getMainFileID();
+  StringRef Buffer = SM.getBuffer(FID)->getBuffer();
+  if (Buffer.startswith("/* A Bison parser, made by"))
+    return true;
+  return false;
+}
+
+void AnalysisConsumer::runAnalysisOnTranslationUnit(ASTContext &C) {
+  BugReporter BR(*Mgr);
+  TranslationUnitDecl *TU = C.getTranslationUnitDecl();
+  checkerMgr->runCheckersOnASTDecl(TU, *Mgr, BR);
+
+  // Run the AST-only checks using the order in which functions are defined.
+  // If inlining is not turned on, use the simplest function order for path
+  // sensitive analyzes as well.
+  RecVisitorMode = AM_Syntax;
+  if (!Mgr->shouldInlineCall())
+    RecVisitorMode |= AM_Path;
+  RecVisitorBR = &BR;
+
+  // Process all the top level declarations.
+  //
+  // Note: TraverseDecl may modify LocalTUDecls, but only by appending more
+  // entries.  Thus we don't use an iterator, but rely on LocalTUDecls
+  // random access.  By doing so, we automatically compensate for iterators
+  // possibly being invalidated, although this is a bit slower.
+  const unsigned LocalTUDeclsSize = LocalTUDecls.size();
+  for (unsigned i = 0 ; i < LocalTUDeclsSize ; ++i) {
+    TraverseDecl(LocalTUDecls[i]);
+  }
+
+  if (Mgr->shouldInlineCall())
+    HandleDeclsCallGraph(LocalTUDeclsSize);
+
+  // After all decls handled, run checkers on the entire TranslationUnit.
+  checkerMgr->runCheckersOnEndOfTranslationUnit(TU, *Mgr, BR);
+
+  RecVisitorBR = nullptr;
+}
+
+void AnalysisConsumer::reportAnalyzerProgress(StringRef S) {
+  if (Opts->AnalyzerDisplayProgress)
+    llvm::errs() << S;
+}
+
 void AnalysisConsumer::HandleTranslationUnit(ASTContext &C) {
+
   // Don't run the actions if an error has occurred with parsing the file.
   DiagnosticsEngine &Diags = PP.getDiagnostics();
   if (Diags.hasErrorOccurred() || Diags.hasFatalErrorOccurred())
     return;
 
-  // Don't analyze if the user explicitly asked for no checks to be performed
-  // on this file.
-  if (Opts->DisableAllChecks)
-    return;
+  if (TUTotalTimer) TUTotalTimer->startTimer();
 
-  {
-    if (TUTotalTimer) TUTotalTimer->startTimer();
+  if (isBisonFile(C)) {
+    reportAnalyzerProgress("Skipping bison-generated file\n");
+  } else if (Opts->DisableAllChecks) {
 
-    // Introduce a scope to destroy BR before Mgr.
-    BugReporter BR(*Mgr);
-    TranslationUnitDecl *TU = C.getTranslationUnitDecl();
-    checkerMgr->runCheckersOnASTDecl(TU, *Mgr, BR);
-
-    // Run the AST-only checks using the order in which functions are defined.
-    // If inlining is not turned on, use the simplest function order for path
-    // sensitive analyzes as well.
-    RecVisitorMode = AM_Syntax;
-    if (!Mgr->shouldInlineCall())
-      RecVisitorMode |= AM_Path;
-    RecVisitorBR = &BR;
-
-    // Process all the top level declarations.
-    //
-    // Note: TraverseDecl may modify LocalTUDecls, but only by appending more
-    // entries.  Thus we don't use an iterator, but rely on LocalTUDecls
-    // random access.  By doing so, we automatically compensate for iterators
-    // possibly being invalidated, although this is a bit slower.
-    const unsigned LocalTUDeclsSize = LocalTUDecls.size();
-    for (unsigned i = 0 ; i < LocalTUDeclsSize ; ++i) {
-      TraverseDecl(LocalTUDecls[i]);
-    }
-
-    if (Mgr->shouldInlineCall())
-      HandleDeclsCallGraph(LocalTUDeclsSize);
-
-    // After all decls handled, run checkers on the entire TranslationUnit.
-    checkerMgr->runCheckersOnEndOfTranslationUnit(TU, *Mgr, BR);
-
-    RecVisitorBR = nullptr;
+    // Don't analyze if the user explicitly asked for no checks to be performed
+    // on this file.
+    reportAnalyzerProgress("All checks are disabled using a supplied option\n");
+  } else {
+    // Otherwise, just run the analysis.
+    runAnalysisOnTranslationUnit(C);
   }
 
+  if (TUTotalTimer) TUTotalTimer->stopTimer();
+
+  // Count how many basic blocks we have not covered.
+  NumBlocksInAnalyzedFunctions = FunctionSummaries.getTotalNumBasicBlocks();
+  NumVisitedBlocksInAnalyzedFunctions =
+      FunctionSummaries.getTotalNumVisitedBasicBlocks();
+  if (NumBlocksInAnalyzedFunctions > 0)
+    PercentReachableBlocks =
+      (FunctionSummaries.getTotalNumVisitedBasicBlocks() * 100) /
+        NumBlocksInAnalyzedFunctions;
+
   // Explicitly destroy the PathDiagnosticConsumer.  This will flush its output.
   // FIXME: This should be replaced with something that doesn't rely on
   // side-effects in PathDiagnosticConsumer's destructor. This is required when
   // used with option -disable-free.
   Mgr.reset();
-
-  if (TUTotalTimer) TUTotalTimer->stopTimer();
-
-  // Count how many basic blocks we have not covered.
-  NumBlocksInAnalyzedFunctions = FunctionSummaries.getTotalNumBasicBlocks();
-  if (NumBlocksInAnalyzedFunctions > 0)
-    PercentReachableBlocks =
-      (FunctionSummaries.getTotalNumVisitedBasicBlocks() * 100) /
-        NumBlocksInAnalyzedFunctions;
-
 }
 
 std::string AnalysisConsumer::getFunctionName(const Decl *D) {
@@ -704,7 +734,8 @@
   if (!Mgr->getAnalysisDeclContext(D)->getAnalysis<RelaxedLiveVariables>())
     return;
 
-  ExprEngine Eng(*Mgr, ObjCGCEnabled, VisitedCallees, &FunctionSummaries,IMode);
+  ExprEngine Eng(CTU, *Mgr, ObjCGCEnabled, VisitedCallees, &FunctionSummaries,
+                 IMode);
 
   // Set the graph auditor.
   std::unique_ptr<ExplodedNode::Auditor> Auditor;
@@ -762,7 +793,7 @@
   bool hasModelPath = analyzerOpts->Config.count("model-path") > 0;
 
   return llvm::make_unique<AnalysisConsumer>(
-      CI.getPreprocessor(), CI.getFrontendOpts().OutputFile, analyzerOpts,
+      CI, CI.getFrontendOpts().OutputFile, analyzerOpts,
       CI.getFrontendOpts().Plugins,
       hasModelPath ? new ModelInjector(CI) : nullptr);
 }
diff --git a/lib/StaticAnalyzer/Frontend/CMakeLists.txt b/lib/StaticAnalyzer/Frontend/CMakeLists.txt
index e3ca91a..a93719e 100644
--- a/lib/StaticAnalyzer/Frontend/CMakeLists.txt
+++ b/lib/StaticAnalyzer/Frontend/CMakeLists.txt
@@ -15,6 +15,7 @@
   clangAST
   clangAnalysis
   clangBasic
+  clangCrossTU
   clangFrontend
   clangLex
   clangStaticAnalyzerCheckers
diff --git a/lib/Tooling/AllTUsExecution.cpp b/lib/Tooling/AllTUsExecution.cpp
new file mode 100644
index 0000000..0c0854d
--- /dev/null
+++ b/lib/Tooling/AllTUsExecution.cpp
@@ -0,0 +1,160 @@
+//===- lib/Tooling/AllTUsExecution.cpp - Execute actions on all TUs. ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/AllTUsExecution.h"
+#include "clang/Tooling/ToolExecutorPluginRegistry.h"
+#include "llvm/Support/ThreadPool.h"
+
+namespace clang {
+namespace tooling {
+
+const char *AllTUsToolExecutor::ExecutorName = "AllTUsToolExecutor";
+
+namespace {
+llvm::Error make_string_error(const llvm::Twine &Message) {
+  return llvm::make_error<llvm::StringError>(Message,
+                                             llvm::inconvertibleErrorCode());
+}
+
+ArgumentsAdjuster getDefaultArgumentsAdjusters() {
+  return combineAdjusters(
+      getClangStripOutputAdjuster(),
+      combineAdjusters(getClangSyntaxOnlyAdjuster(),
+                       getClangStripDependencyFileAdjuster()));
+}
+
+class ThreadSafeToolResults : public ToolResults {
+public:
+  void addResult(StringRef Key, StringRef Value) override {
+    std::unique_lock<std::mutex> LockGuard(Mutex);
+    Results.addResult(Key, Value);
+  }
+
+  std::vector<std::pair<std::string, std::string>> AllKVResults() override {
+    return Results.AllKVResults();
+  }
+
+  void forEachResult(llvm::function_ref<void(StringRef Key, StringRef Value)>
+                         Callback) override {
+    Results.forEachResult(Callback);
+  }
+
+private:
+  InMemoryToolResults Results;
+  std::mutex Mutex;
+};
+
+} // namespace
+
+AllTUsToolExecutor::AllTUsToolExecutor(
+    const CompilationDatabase &Compilations, unsigned ThreadCount,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : Compilations(Compilations), Results(new ThreadSafeToolResults),
+      Context(Results.get()), ThreadCount(ThreadCount) {}
+
+AllTUsToolExecutor::AllTUsToolExecutor(
+    CommonOptionsParser Options, unsigned ThreadCount,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : OptionsParser(std::move(Options)),
+      Compilations(OptionsParser->getCompilations()),
+      Results(new ThreadSafeToolResults), Context(Results.get()),
+      ThreadCount(ThreadCount) {}
+
+llvm::Error AllTUsToolExecutor::execute(
+    llvm::ArrayRef<
+        std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+        Actions) {
+  if (Actions.empty())
+    return make_string_error("No action to execute.");
+
+  if (Actions.size() != 1)
+    return make_string_error(
+        "Only support executing exactly 1 action at this point.");
+
+  std::string ErrorMsg;
+  std::mutex TUMutex;
+  auto AppendError = [&](llvm::Twine Err) {
+    std::unique_lock<std::mutex> LockGuard(TUMutex);
+    ErrorMsg += Err.str();
+  };
+
+  auto Log = [&](llvm::Twine Msg) {
+    std::unique_lock<std::mutex> LockGuard(TUMutex);
+    llvm::errs() << Msg.str() << "\n";
+  };
+
+  auto Files = Compilations.getAllFiles();
+  // Add a counter to track the progress.
+  const std::string TotalNumStr = std::to_string(Files.size());
+  unsigned Counter = 0;
+  auto Count = [&]() {
+    std::unique_lock<std::mutex> LockGuard(TUMutex);
+    return ++Counter;
+  };
+
+  auto &Action = Actions.front();
+
+  {
+    llvm::ThreadPool Pool(ThreadCount == 0 ? llvm::hardware_concurrency()
+                                           : ThreadCount);
+
+    for (std::string File : Files) {
+      Pool.async(
+          [&](std::string Path) {
+            Log("[" + std::to_string(Count()) + "/" + TotalNumStr +
+                "] Processing file " + Path);
+            ClangTool Tool(Compilations, {Path});
+            Tool.appendArgumentsAdjuster(Action.second);
+            Tool.appendArgumentsAdjuster(getDefaultArgumentsAdjusters());
+            for (const auto &FileAndContent : OverlayFiles)
+              Tool.mapVirtualFile(FileAndContent.first(),
+                                  FileAndContent.second);
+            if (Tool.run(Action.first.get()))
+              AppendError(llvm::Twine("Failed to run action on ") + Path +
+                          "\n");
+          },
+          File);
+    }
+  }
+
+  if (!ErrorMsg.empty())
+    return make_string_error(ErrorMsg);
+
+  return llvm::Error::success();
+}
+
+static llvm::cl::opt<unsigned> ExecutorConcurrency(
+    "execute-concurrency",
+    llvm::cl::desc("The number of threads used to process all files in "
+                   "parallel. Set to 0 for hardware concurrency."),
+    llvm::cl::init(0));
+
+class AllTUsToolExecutorPlugin : public ToolExecutorPlugin {
+public:
+  llvm::Expected<std::unique_ptr<ToolExecutor>>
+  create(CommonOptionsParser &OptionsParser) override {
+    if (OptionsParser.getSourcePathList().empty())
+      return make_string_error(
+          "[AllTUsToolExecutorPlugin] Please provide a directory/file path in "
+          "the compilation database.");
+    return llvm::make_unique<AllTUsToolExecutor>(std::move(OptionsParser),
+                                                 ExecutorConcurrency);
+  }
+};
+
+static ToolExecutorPluginRegistry::Add<AllTUsToolExecutorPlugin>
+    X("all-TUs", "Runs FrontendActions on all TUs in the compilation database. "
+                 "Tool results are stored in memory.");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the plugin.
+volatile int AllTUsToolExecutorAnchorSource = 0;
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/lib/Tooling/ArgumentsAdjusters.cpp b/lib/Tooling/ArgumentsAdjusters.cpp
index ac9fd3c..7068ec2 100644
--- a/lib/Tooling/ArgumentsAdjusters.cpp
+++ b/lib/Tooling/ArgumentsAdjusters.cpp
@@ -58,14 +58,14 @@
       StringRef Arg = Args[i];
       // All dependency-file options begin with -M. These include -MM,
       // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD.
-      if (!Arg.startswith("-M"))
+      if (!Arg.startswith("-M")) {
         AdjustedArgs.push_back(Args[i]);
-
-      if ((Arg == "-MF") || (Arg == "-MT") || (Arg == "-MQ") ||
-          (Arg == "-MD") || (Arg == "-MMD")) {
-        // Output is specified as -MX foo. Skip the next argument also.
-        ++i;
+        continue;
       }
+
+      if (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ")
+        // These flags take an argument: -MX foo. Skip the next argument also.
+        ++i;
     }
     return AdjustedArgs;
   };
@@ -96,6 +96,10 @@
 
 ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
                                    ArgumentsAdjuster Second) {
+  if (!First)
+    return Second;
+  if (!Second)
+    return First;
   return [First, Second](const CommandLineArguments &Args, StringRef File) {
     return Second(First(Args, File), File);
   };
diff --git a/lib/Tooling/CMakeLists.txt b/lib/Tooling/CMakeLists.txt
index 32a1935..1999430 100644
--- a/lib/Tooling/CMakeLists.txt
+++ b/lib/Tooling/CMakeLists.txt
@@ -8,14 +8,17 @@
 add_subdirectory(ASTDiff)
 
 add_clang_library(clangTooling
+  AllTUsExecution.cpp
   ArgumentsAdjusters.cpp
   CommonOptionsParser.cpp
   CompilationDatabase.cpp
+  Execution.cpp
   FileMatchTrie.cpp
   FixIt.cpp
   JSONCompilationDatabase.cpp
   Refactoring.cpp
   RefactoringCallbacks.cpp
+  StandaloneExecution.cpp
   Tooling.cpp
 
   DEPENDS
diff --git a/lib/Tooling/CommonOptionsParser.cpp b/lib/Tooling/CommonOptionsParser.cpp
index edcb7df..74ad4e8 100644
--- a/lib/Tooling/CommonOptionsParser.cpp
+++ b/lib/Tooling/CommonOptionsParser.cpp
@@ -24,9 +24,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CommandLine.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/Tooling.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace clang::tooling;
 using namespace llvm;
@@ -81,7 +81,7 @@
   return Commands;
 }
 
-CommonOptionsParser::CommonOptionsParser(
+llvm::Error CommonOptionsParser::init(
     int &argc, const char **argv, cl::OptionCategory &Category,
     llvm::cl::NumOccurrencesFlag OccurrencesFlag, const char *Overview) {
   static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden,
@@ -105,20 +105,30 @@
       cl::desc("Additional argument to prepend to the compiler command line"),
       cl::cat(Category), cl::sub(*cl::AllSubCommands));
 
+  cl::ResetAllOptionOccurrences();
+
   cl::HideUnrelatedOptions(Category);
 
   std::string ErrorMessage;
   Compilations =
       FixedCompilationDatabase::loadFromCommandLine(argc, argv, ErrorMessage);
-  if (!Compilations && !ErrorMessage.empty())
-    llvm::errs() << ErrorMessage;
-  cl::ParseCommandLineOptions(argc, argv, Overview);
+  if (!ErrorMessage.empty())
+    ErrorMessage.append("\n");
+  llvm::raw_string_ostream OS(ErrorMessage);
+  // Stop initializing if command-line option parsing failed.
+  if (!cl::ParseCommandLineOptions(argc, argv, Overview, &OS)) {
+    OS.flush();
+    return llvm::make_error<llvm::StringError>("[CommonOptionsParser]: " +
+                                                   ErrorMessage,
+                                               llvm::inconvertibleErrorCode());
+  }
+
   cl::PrintOptionValues();
 
   SourcePathList = SourcePaths;
   if ((OccurrencesFlag == cl::ZeroOrMore || OccurrencesFlag == cl::Optional) &&
       SourcePathList.empty())
-    return;
+    return llvm::Error::success();
   if (!Compilations) {
     if (!BuildPath.empty()) {
       Compilations =
@@ -137,9 +147,34 @@
   auto AdjustingCompilations =
       llvm::make_unique<ArgumentsAdjustingCompilations>(
           std::move(Compilations));
-  AdjustingCompilations->appendArgumentsAdjuster(
-      getInsertArgumentAdjuster(ArgsBefore, ArgumentInsertPosition::BEGIN));
-  AdjustingCompilations->appendArgumentsAdjuster(
+  Adjuster =
+      getInsertArgumentAdjuster(ArgsBefore, ArgumentInsertPosition::BEGIN);
+  Adjuster = combineAdjusters(
+      std::move(Adjuster),
       getInsertArgumentAdjuster(ArgsAfter, ArgumentInsertPosition::END));
+  AdjustingCompilations->appendArgumentsAdjuster(Adjuster);
   Compilations = std::move(AdjustingCompilations);
+  return llvm::Error::success();
+}
+
+llvm::Expected<CommonOptionsParser> CommonOptionsParser::create(
+    int &argc, const char **argv, llvm::cl::OptionCategory &Category,
+    llvm::cl::NumOccurrencesFlag OccurrencesFlag, const char *Overview) {
+  CommonOptionsParser Parser;
+  llvm::Error Err =
+      Parser.init(argc, argv, Category, OccurrencesFlag, Overview);
+  if (Err)
+    return std::move(Err);
+  return std::move(Parser);
+}
+
+CommonOptionsParser::CommonOptionsParser(
+    int &argc, const char **argv, cl::OptionCategory &Category,
+    llvm::cl::NumOccurrencesFlag OccurrencesFlag, const char *Overview) {
+  llvm::Error Err = init(argc, argv, Category, OccurrencesFlag, Overview);
+  if (Err) {
+    llvm::report_fatal_error(
+        "CommonOptionsParser: failed to parse command-line arguments. " +
+        llvm::toString(std::move(Err)));
+  }
 }
diff --git a/lib/Tooling/CompilationDatabase.cpp b/lib/Tooling/CompilationDatabase.cpp
index 0e83557..92b76b1 100644
--- a/lib/Tooling/CompilationDatabase.cpp
+++ b/lib/Tooling/CompilationDatabase.cpp
@@ -10,6 +10,9 @@
 //  This file contains implementations of the CompilationDatabase base class
 //  and the FixedCompilationDatabase.
 //
+//  FIXME: Various functions that take a string &ErrorMessage should be upgraded
+//  to Expected.
+//
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/CompilationDatabase.h"
@@ -26,6 +29,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/LineIterator.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <sstream>
@@ -108,6 +112,15 @@
   return DB;
 }
 
+std::vector<CompileCommand> CompilationDatabase::getAllCompileCommands() const {
+  std::vector<CompileCommand> Result;
+  for (const auto &File : getAllFiles()) {
+    auto C = getCompileCommands(File);
+    std::move(C.begin(), C.end(), std::back_inserter(Result));
+  }
+  return Result;
+}
+
 CompilationDatabasePlugin::~CompilationDatabasePlugin() {}
 
 namespace {
@@ -302,8 +315,22 @@
   std::vector<std::string> StrippedArgs;
   if (!stripPositionalArgs(CommandLine, StrippedArgs, ErrorMsg))
     return nullptr;
-  return std::unique_ptr<FixedCompilationDatabase>(
-      new FixedCompilationDatabase(Directory, StrippedArgs));
+  return llvm::make_unique<FixedCompilationDatabase>(Directory, StrippedArgs);
+}
+
+std::unique_ptr<FixedCompilationDatabase>
+FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) {
+  ErrorMsg.clear();
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
+      llvm::MemoryBuffer::getFile(Path);
+  if (std::error_code Result = File.getError()) {
+    ErrorMsg = "Error while opening fixed database: " + Result.message();
+    return nullptr;
+  }
+  std::vector<std::string> Args{llvm::line_iterator(**File),
+                                llvm::line_iterator()};
+  return llvm::make_unique<FixedCompilationDatabase>(
+      llvm::sys::path::parent_path(Path), std::move(Args));
 }
 
 FixedCompilationDatabase::
@@ -324,15 +351,21 @@
   return Result;
 }
 
-std::vector<std::string>
-FixedCompilationDatabase::getAllFiles() const {
-  return std::vector<std::string>();
-}
+namespace {
 
-std::vector<CompileCommand>
-FixedCompilationDatabase::getAllCompileCommands() const {
-  return std::vector<CompileCommand>();
-}
+class FixedCompilationDatabasePlugin : public CompilationDatabasePlugin {
+  std::unique_ptr<CompilationDatabase>
+  loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
+    SmallString<1024> DatabasePath(Directory);
+    llvm::sys::path::append(DatabasePath, "compile_flags.txt");
+    return FixedCompilationDatabase::loadFromFile(DatabasePath, ErrorMessage);
+  }
+};
+
+static CompilationDatabasePluginRegistry::Add<FixedCompilationDatabasePlugin>
+X("fixed-compilation-database", "Reads plain-text flags file");
+
+} // namespace
 
 namespace clang {
 namespace tooling {
diff --git a/lib/Tooling/Core/CMakeLists.txt b/lib/Tooling/Core/CMakeLists.txt
index e2b0dd4..b302479 100644
--- a/lib/Tooling/Core/CMakeLists.txt
+++ b/lib/Tooling/Core/CMakeLists.txt
@@ -3,7 +3,6 @@
 add_clang_library(clangToolingCore
   Lookup.cpp
   Replacement.cpp
-  QualTypeNames.cpp
   Diagnostic.cpp
 
   LINK_LIBS
diff --git a/lib/Tooling/Core/QualTypeNames.cpp b/lib/Tooling/Core/QualTypeNames.cpp
deleted file mode 100644
index 721c2c9..0000000
--- a/lib/Tooling/Core/QualTypeNames.cpp
+++ /dev/null
@@ -1,477 +0,0 @@
-//===------- QualTypeNames.cpp - Generate Complete QualType Names ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/Tooling/Core/QualTypeNames.h"
-#include "clang/AST/DeclTemplate.h"
-#include "clang/AST/DeclarationName.h"
-#include "clang/AST/GlobalDecl.h"
-#include "clang/AST/Mangle.h"
-
-#include <stdio.h>
-#include <memory>
-
-namespace clang {
-
-namespace TypeName {
-/// \brief Generates a QualType that can be used to name the same type
-/// if used at the end of the current translation unit. This ignores
-/// issues such as type shadowing.
-///
-/// \param[in] QT - the type for which the fully qualified type will be
-/// returned.
-/// \param[in] Ctx - the ASTContext to be used.
-/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
-/// specifier "::" should be prepended or not.
-static QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
-                                      bool WithGlobalNsPrefix);
-
-/// \brief Create a NestedNameSpecifier for Namesp and its enclosing
-/// scopes.
-///
-/// \param[in] Ctx - the AST Context to be used.
-/// \param[in] Namesp - the NamespaceDecl for which a NestedNameSpecifier
-/// is requested.
-/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
-/// specifier "::" should be prepended or not.
-static NestedNameSpecifier *createNestedNameSpecifier(
-    const ASTContext &Ctx,
-    const NamespaceDecl *Namesp,
-    bool WithGlobalNsPrefix);
-
-/// \brief Create a NestedNameSpecifier for TagDecl and its enclosing
-/// scopes.
-///
-/// \param[in] Ctx - the AST Context to be used.
-/// \param[in] TD - the TagDecl for which a NestedNameSpecifier is
-/// requested.
-/// \param[in] FullyQualify - Convert all template arguments into fully
-/// qualified names.
-/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
-/// specifier "::" should be prepended or not.
-static NestedNameSpecifier *createNestedNameSpecifier(
-    const ASTContext &Ctx, const TypeDecl *TD,
-    bool FullyQualify, bool WithGlobalNsPrefix);
-
-static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
-    const ASTContext &Ctx, const Decl *decl,
-    bool FullyQualified, bool WithGlobalNsPrefix);
-
-static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
-    const ASTContext &Ctx, NestedNameSpecifier *scope, bool WithGlobalNsPrefix);
-
-static bool getFullyQualifiedTemplateName(const ASTContext &Ctx,
-                                          TemplateName &TName,
-                                          bool WithGlobalNsPrefix) {
-  bool Changed = false;
-  NestedNameSpecifier *NNS = nullptr;
-
-  TemplateDecl *ArgTDecl = TName.getAsTemplateDecl();
-  // ArgTDecl won't be NULL because we asserted that this isn't a
-  // dependent context very early in the call chain.
-  assert(ArgTDecl != nullptr);
-  QualifiedTemplateName *QTName = TName.getAsQualifiedTemplateName();
-
-  if (QTName && !QTName->hasTemplateKeyword()) {
-    NNS = QTName->getQualifier();
-    NestedNameSpecifier *QNNS = getFullyQualifiedNestedNameSpecifier(
-        Ctx, NNS, WithGlobalNsPrefix);
-    if (QNNS != NNS) {
-      Changed = true;
-      NNS = QNNS;
-    } else {
-      NNS = nullptr;
-    }
-  } else {
-    NNS = createNestedNameSpecifierForScopeOf(
-        Ctx, ArgTDecl, true, WithGlobalNsPrefix);
-  }
-  if (NNS) {
-    TName = Ctx.getQualifiedTemplateName(NNS,
-                                         /*TemplateKeyword=*/false, ArgTDecl);
-    Changed = true;
-  }
-  return Changed;
-}
-
-static bool getFullyQualifiedTemplateArgument(const ASTContext &Ctx,
-                                              TemplateArgument &Arg,
-                                              bool WithGlobalNsPrefix) {
-  bool Changed = false;
-
-  // Note: we do not handle TemplateArgument::Expression, to replace it
-  // we need the information for the template instance decl.
-
-  if (Arg.getKind() == TemplateArgument::Template) {
-    TemplateName TName = Arg.getAsTemplate();
-    Changed = getFullyQualifiedTemplateName(Ctx, TName, WithGlobalNsPrefix);
-    if (Changed) {
-      Arg = TemplateArgument(TName);
-    }
-  } else if (Arg.getKind() == TemplateArgument::Type) {
-    QualType SubTy = Arg.getAsType();
-    // Check if the type needs more desugaring and recurse.
-    QualType QTFQ = getFullyQualifiedType(SubTy, Ctx, WithGlobalNsPrefix);
-    if (QTFQ != SubTy) {
-      Arg = TemplateArgument(QTFQ);
-      Changed = true;
-    }
-  }
-  return Changed;
-}
-
-static const Type *getFullyQualifiedTemplateType(const ASTContext &Ctx,
-                                                 const Type *TypePtr,
-                                                 bool WithGlobalNsPrefix) {
-  // DependentTemplateTypes exist within template declarations and
-  // definitions. Therefore we shouldn't encounter them at the end of
-  // a translation unit. If we do, the caller has made an error.
-  assert(!isa<DependentTemplateSpecializationType>(TypePtr));
-  // In case of template specializations, iterate over the arguments
-  // and fully qualify them as well.
-  if (const auto *TST = dyn_cast<const TemplateSpecializationType>(TypePtr)) {
-    bool MightHaveChanged = false;
-    SmallVector<TemplateArgument, 4> FQArgs;
-    for (TemplateSpecializationType::iterator I = TST->begin(), E = TST->end();
-         I != E; ++I) {
-      // Cheap to copy and potentially modified by
-      // getFullyQualifedTemplateArgument.
-      TemplateArgument Arg(*I);
-      MightHaveChanged |= getFullyQualifiedTemplateArgument(
-          Ctx, Arg, WithGlobalNsPrefix);
-      FQArgs.push_back(Arg);
-    }
-
-    // If a fully qualified arg is different from the unqualified arg,
-    // allocate new type in the AST.
-    if (MightHaveChanged) {
-      QualType QT = Ctx.getTemplateSpecializationType(
-          TST->getTemplateName(), FQArgs,
-          TST->getCanonicalTypeInternal());
-      // getTemplateSpecializationType returns a fully qualified
-      // version of the specialization itself, so no need to qualify
-      // it.
-      return QT.getTypePtr();
-    }
-  } else if (const auto *TSTRecord = dyn_cast<const RecordType>(TypePtr)) {
-    // We are asked to fully qualify and we have a Record Type,
-    // which can point to a template instantiation with no sugar in any of
-    // its template argument, however we still need to fully qualify them.
-
-    if (const auto *TSTDecl =
-        dyn_cast<ClassTemplateSpecializationDecl>(TSTRecord->getDecl())) {
-      const TemplateArgumentList &TemplateArgs = TSTDecl->getTemplateArgs();
-
-      bool MightHaveChanged = false;
-      SmallVector<TemplateArgument, 4> FQArgs;
-      for (unsigned int I = 0, E = TemplateArgs.size(); I != E; ++I) {
-        // cheap to copy and potentially modified by
-        // getFullyQualifedTemplateArgument
-        TemplateArgument Arg(TemplateArgs[I]);
-        MightHaveChanged |= getFullyQualifiedTemplateArgument(
-            Ctx, Arg, WithGlobalNsPrefix);
-        FQArgs.push_back(Arg);
-      }
-
-      // If a fully qualified arg is different from the unqualified arg,
-      // allocate new type in the AST.
-      if (MightHaveChanged) {
-        TemplateName TN(TSTDecl->getSpecializedTemplate());
-        QualType QT = Ctx.getTemplateSpecializationType(
-            TN, FQArgs,
-            TSTRecord->getCanonicalTypeInternal());
-        // getTemplateSpecializationType returns a fully qualified
-        // version of the specialization itself, so no need to qualify
-        // it.
-        return QT.getTypePtr();
-      }
-    }
-  }
-  return TypePtr;
-}
-
-static NestedNameSpecifier *createOuterNNS(const ASTContext &Ctx, const Decl *D,
-                                           bool FullyQualify,
-                                           bool WithGlobalNsPrefix) {
-  const DeclContext *DC = D->getDeclContext();
-  if (const auto *NS = dyn_cast<NamespaceDecl>(DC)) {
-    while (NS && NS->isInline()) {
-      // Ignore inline namespace;
-      NS = dyn_cast<NamespaceDecl>(NS->getDeclContext());
-    }
-    if (NS->getDeclName()) {
-      return createNestedNameSpecifier(Ctx, NS, WithGlobalNsPrefix);
-    }
-    return nullptr;  // no starting '::', no anonymous
-  } else if (const auto *TD = dyn_cast<TagDecl>(DC)) {
-    return createNestedNameSpecifier(Ctx, TD, FullyQualify, WithGlobalNsPrefix);
-  } else if (const auto *TDD = dyn_cast<TypedefNameDecl>(DC)) {
-    return createNestedNameSpecifier(
-        Ctx, TDD, FullyQualify, WithGlobalNsPrefix);
-  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
-    return NestedNameSpecifier::GlobalSpecifier(Ctx);
-  }
-  return nullptr;  // no starting '::' if |WithGlobalNsPrefix| is false
-}
-
-/// \brief Return a fully qualified version of this name specifier.
-static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
-    const ASTContext &Ctx, NestedNameSpecifier *Scope,
-    bool WithGlobalNsPrefix) {
-  switch (Scope->getKind()) {
-    case NestedNameSpecifier::Global:
-      // Already fully qualified
-      return Scope;
-    case NestedNameSpecifier::Namespace:
-      return TypeName::createNestedNameSpecifier(
-          Ctx, Scope->getAsNamespace(), WithGlobalNsPrefix);
-    case NestedNameSpecifier::NamespaceAlias:
-      // Namespace aliases are only valid for the duration of the
-      // scope where they were introduced, and therefore are often
-      // invalid at the end of the TU.  So use the namespace name more
-      // likely to be valid at the end of the TU.
-      return TypeName::createNestedNameSpecifier(
-          Ctx,
-          Scope->getAsNamespaceAlias()->getNamespace()->getCanonicalDecl(),
-          WithGlobalNsPrefix);
-    case NestedNameSpecifier::Identifier:
-      // A function or some other construct that makes it un-namable
-      // at the end of the TU. Skip the current component of the name,
-      // but use the name of it's prefix.
-      return getFullyQualifiedNestedNameSpecifier(
-          Ctx, Scope->getPrefix(), WithGlobalNsPrefix);
-    case NestedNameSpecifier::Super:
-    case NestedNameSpecifier::TypeSpec:
-    case NestedNameSpecifier::TypeSpecWithTemplate: {
-      const Type *Type = Scope->getAsType();
-      // Find decl context.
-      const TagDecl *TD = nullptr;
-      if (const TagType *TagDeclType = Type->getAs<TagType>()) {
-        TD = TagDeclType->getDecl();
-      } else {
-        TD = Type->getAsCXXRecordDecl();
-      }
-      if (TD) {
-        return TypeName::createNestedNameSpecifier(Ctx, TD,
-                                                   true /*FullyQualified*/,
-                                                   WithGlobalNsPrefix);
-      } else if (const auto *TDD = dyn_cast<TypedefType>(Type)) {
-        return TypeName::createNestedNameSpecifier(Ctx, TDD->getDecl(),
-                                                   true /*FullyQualified*/,
-                                                   WithGlobalNsPrefix);
-      }
-      return Scope;
-    }
-  }
-  llvm_unreachable("bad NNS kind");
-}
-
-/// \brief Create a nested name specifier for the declaring context of
-/// the type.
-static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
-    const ASTContext &Ctx, const Decl *Decl,
-    bool FullyQualified, bool WithGlobalNsPrefix) {
-  assert(Decl);
-
-  const DeclContext *DC = Decl->getDeclContext()->getRedeclContext();
-  const auto *Outer = dyn_cast_or_null<NamedDecl>(DC);
-  const auto *OuterNS = dyn_cast_or_null<NamespaceDecl>(DC);
-  if (Outer && !(OuterNS && OuterNS->isAnonymousNamespace())) {
-    if (const auto *CxxDecl = dyn_cast<CXXRecordDecl>(DC)) {
-      if (ClassTemplateDecl *ClassTempl =
-              CxxDecl->getDescribedClassTemplate()) {
-        // We are in the case of a type(def) that was declared in a
-        // class template but is *not* type dependent.  In clang, it
-        // gets attached to the class template declaration rather than
-        // any specific class template instantiation.  This result in
-        // 'odd' fully qualified typename:
-        //
-        //    vector<_Tp,_Alloc>::size_type
-        //
-        // Make the situation is 'useable' but looking a bit odd by
-        // picking a random instance as the declaring context.
-        if (ClassTempl->spec_begin() != ClassTempl->spec_end()) {
-          Decl = *(ClassTempl->spec_begin());
-          Outer = dyn_cast<NamedDecl>(Decl);
-          OuterNS = dyn_cast<NamespaceDecl>(Decl);
-        }
-      }
-    }
-
-    if (OuterNS) {
-      return createNestedNameSpecifier(Ctx, OuterNS, WithGlobalNsPrefix);
-    } else if (const auto *TD = dyn_cast<TagDecl>(Outer)) {
-      return createNestedNameSpecifier(
-          Ctx, TD, FullyQualified, WithGlobalNsPrefix);
-    } else if (dyn_cast<TranslationUnitDecl>(Outer)) {
-      // Context is the TU. Nothing needs to be done.
-      return nullptr;
-    } else {
-      // Decl's context was neither the TU, a namespace, nor a
-      // TagDecl, which means it is a type local to a scope, and not
-      // accessible at the end of the TU.
-      return nullptr;
-    }
-  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
-    return NestedNameSpecifier::GlobalSpecifier(Ctx);
-  }
-  return nullptr;
-}
-
-/// \brief Create a nested name specifier for the declaring context of
-/// the type.
-static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
-    const ASTContext &Ctx, const Type *TypePtr,
-    bool FullyQualified, bool WithGlobalNsPrefix) {
-  if (!TypePtr) return nullptr;
-
-  Decl *Decl = nullptr;
-  // There are probably other cases ...
-  if (const auto *TDT = dyn_cast<TypedefType>(TypePtr)) {
-    Decl = TDT->getDecl();
-  } else if (const auto *TagDeclType = dyn_cast<TagType>(TypePtr)) {
-    Decl = TagDeclType->getDecl();
-  } else if (const auto *TST = dyn_cast<TemplateSpecializationType>(TypePtr)) {
-    Decl = TST->getTemplateName().getAsTemplateDecl();
-  } else {
-    Decl = TypePtr->getAsCXXRecordDecl();
-  }
-
-  if (!Decl) return nullptr;
-
-  return createNestedNameSpecifierForScopeOf(
-      Ctx, Decl, FullyQualified, WithGlobalNsPrefix);
-}
-
-NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
-                                               const NamespaceDecl *Namespace,
-                                               bool WithGlobalNsPrefix) {
-  while (Namespace && Namespace->isInline()) {
-    // Ignore inline namespace;
-    Namespace = dyn_cast<NamespaceDecl>(Namespace->getDeclContext());
-  }
-  if (!Namespace) return nullptr;
-
-  bool FullyQualified = true;  // doesn't matter, DeclContexts are namespaces
-  return NestedNameSpecifier::Create(
-      Ctx,
-      createOuterNNS(Ctx, Namespace, FullyQualified, WithGlobalNsPrefix),
-      Namespace);
-}
-
-NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
-                                               const TypeDecl *TD,
-                                               bool FullyQualify,
-                                               bool WithGlobalNsPrefix) {
-  return NestedNameSpecifier::Create(
-      Ctx,
-      createOuterNNS(Ctx, TD, FullyQualify, WithGlobalNsPrefix),
-      false /*No TemplateKeyword*/,
-      TD->getTypeForDecl());
-}
-
-/// \brief Return the fully qualified type, including fully-qualified
-/// versions of any template parameters.
-QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
-                               bool WithGlobalNsPrefix) {
-  // In case of myType* we need to strip the pointer first, fully
-  // qualify and attach the pointer once again.
-  if (isa<PointerType>(QT.getTypePtr())) {
-    // Get the qualifiers.
-    Qualifiers Quals = QT.getQualifiers();
-    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
-    QT = Ctx.getPointerType(QT);
-    // Add back the qualifiers.
-    QT = Ctx.getQualifiedType(QT, Quals);
-    return QT;
-  }
-
-  // In case of myType& we need to strip the reference first, fully
-  // qualify and attach the reference once again.
-  if (isa<ReferenceType>(QT.getTypePtr())) {
-    // Get the qualifiers.
-    bool IsLValueRefTy = isa<LValueReferenceType>(QT.getTypePtr());
-    Qualifiers Quals = QT.getQualifiers();
-    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
-    // Add the r- or l-value reference type back to the fully
-    // qualified one.
-    if (IsLValueRefTy)
-      QT = Ctx.getLValueReferenceType(QT);
-    else
-      QT = Ctx.getRValueReferenceType(QT);
-    // Add back the qualifiers.
-    QT = Ctx.getQualifiedType(QT, Quals);
-    return QT;
-  }
-
-  // Remove the part of the type related to the type being a template
-  // parameter (we won't report it as part of the 'type name' and it
-  // is actually make the code below to be more complex (to handle
-  // those)
-  while (isa<SubstTemplateTypeParmType>(QT.getTypePtr())) {
-    // Get the qualifiers.
-    Qualifiers Quals = QT.getQualifiers();
-
-    QT = dyn_cast<SubstTemplateTypeParmType>(QT.getTypePtr())->desugar();
-
-    // Add back the qualifiers.
-    QT = Ctx.getQualifiedType(QT, Quals);
-  }
-
-  NestedNameSpecifier *Prefix = nullptr;
-  // Local qualifiers are attached to the QualType outside of the
-  // elaborated type.  Retrieve them before descending into the
-  // elaborated type.
-  Qualifiers PrefixQualifiers = QT.getLocalQualifiers();
-  QT = QualType(QT.getTypePtr(), 0);
-  ElaboratedTypeKeyword Keyword = ETK_None;
-  if (const auto *ETypeInput = dyn_cast<ElaboratedType>(QT.getTypePtr())) {
-    QT = ETypeInput->getNamedType();
-    assert(!QT.hasLocalQualifiers());
-    Keyword = ETypeInput->getKeyword();
-  }
-  // Create a nested name specifier if needed.
-  Prefix = createNestedNameSpecifierForScopeOf(Ctx, QT.getTypePtr(),
-                                               true /*FullyQualified*/,
-                                               WithGlobalNsPrefix);
-
-  // In case of template specializations iterate over the arguments and
-  // fully qualify them as well.
-  if (isa<const TemplateSpecializationType>(QT.getTypePtr()) ||
-      isa<const RecordType>(QT.getTypePtr())) {
-    // We are asked to fully qualify and we have a Record Type (which
-    // may point to a template specialization) or Template
-    // Specialization Type. We need to fully qualify their arguments.
-
-    const Type *TypePtr = getFullyQualifiedTemplateType(
-        Ctx, QT.getTypePtr(), WithGlobalNsPrefix);
-    QT = QualType(TypePtr, 0);
-  }
-  if (Prefix || Keyword != ETK_None) {
-    QT = Ctx.getElaboratedType(Keyword, Prefix, QT);
-  }
-  QT = Ctx.getQualifiedType(QT, PrefixQualifiers);
-  return QT;
-}
-
-std::string getFullyQualifiedName(QualType QT,
-                                  const ASTContext &Ctx,
-                                  bool WithGlobalNsPrefix) {
-  PrintingPolicy Policy(Ctx.getPrintingPolicy());
-  Policy.SuppressScope = false;
-  Policy.AnonymousTagLocations = false;
-  Policy.PolishForDeclaration = true;
-  Policy.SuppressUnwrittenScope = true;
-  QualType FQQT = getFullyQualifiedType(QT, Ctx, WithGlobalNsPrefix);
-  return FQQT.getAsString(Policy);
-}
-
-}  // end namespace TypeName
-}  // end namespace clang
diff --git a/lib/Tooling/Execution.cpp b/lib/Tooling/Execution.cpp
new file mode 100644
index 0000000..ff68f85
--- /dev/null
+++ b/lib/Tooling/Execution.cpp
@@ -0,0 +1,108 @@
+//===- lib/Tooling/Execution.cpp - Implements tool execution framework. ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/ToolExecutorPluginRegistry.h"
+#include "clang/Tooling/Tooling.h"
+
+LLVM_INSTANTIATE_REGISTRY(clang::tooling::ToolExecutorPluginRegistry)
+
+namespace clang {
+namespace tooling {
+
+static llvm::cl::opt<std::string>
+    ExecutorName("executor", llvm::cl::desc("The name of the executor to use."),
+                 llvm::cl::init("standalone"));
+
+void InMemoryToolResults::addResult(StringRef Key, StringRef Value) {
+  KVResults.push_back({Key.str(), Value.str()});
+}
+
+std::vector<std::pair<std::string, std::string>>
+InMemoryToolResults::AllKVResults() {
+  return KVResults;
+}
+
+void InMemoryToolResults::forEachResult(
+    llvm::function_ref<void(StringRef Key, StringRef Value)> Callback) {
+  for (const auto &KV : KVResults) {
+    Callback(KV.first, KV.second);
+  }
+}
+
+void ExecutionContext::reportResult(StringRef Key, StringRef Value) {
+  Results->addResult(Key, Value);
+}
+
+llvm::Error
+ToolExecutor::execute(std::unique_ptr<FrontendActionFactory> Action) {
+  return execute(std::move(Action), ArgumentsAdjuster());
+}
+
+llvm::Error ToolExecutor::execute(std::unique_ptr<FrontendActionFactory> Action,
+                                  ArgumentsAdjuster Adjuster) {
+  std::vector<
+      std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+      Actions;
+  Actions.emplace_back(std::move(Action), std::move(Adjuster));
+  return execute(Actions);
+}
+
+namespace internal {
+llvm::Expected<std::unique_ptr<ToolExecutor>>
+createExecutorFromCommandLineArgsImpl(int &argc, const char **argv,
+                                      llvm::cl::OptionCategory &Category,
+                                      const char *Overview) {
+  auto OptionsParser =
+      CommonOptionsParser::create(argc, argv, Category, llvm::cl::ZeroOrMore,
+                                  /*Overview=*/Overview);
+  if (!OptionsParser)
+    return OptionsParser.takeError();
+  for (auto I = ToolExecutorPluginRegistry::begin(),
+            E = ToolExecutorPluginRegistry::end();
+       I != E; ++I) {
+    if (I->getName() != ExecutorName) {
+      continue;
+    }
+    std::unique_ptr<ToolExecutorPlugin> Plugin(I->instantiate());
+    llvm::Expected<std::unique_ptr<ToolExecutor>> Executor =
+        Plugin->create(*OptionsParser);
+    if (!Executor) {
+      return llvm::make_error<llvm::StringError>(
+          llvm::Twine("Failed to create '") + I->getName() +
+              "': " + llvm::toString(Executor.takeError()) + "\n",
+          llvm::inconvertibleErrorCode());
+    }
+    return std::move(*Executor);
+  }
+  return llvm::make_error<llvm::StringError>(
+      llvm::Twine("Executor \"") + ExecutorName + "\" is not registered.",
+      llvm::inconvertibleErrorCode());
+}
+} // end namespace internal
+
+llvm::Expected<std::unique_ptr<ToolExecutor>>
+createExecutorFromCommandLineArgs(int &argc, const char **argv,
+                                  llvm::cl::OptionCategory &Category,
+                                  const char *Overview) {
+  return internal::createExecutorFromCommandLineArgsImpl(argc, argv, Category,
+                                                         Overview);
+}
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the StandaloneToolExecutorPlugin etc.
+extern volatile int StandaloneToolExecutorAnchorSource;
+extern volatile int AllTUsToolExecutorAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED StandaloneToolExecutorAnchorDest =
+    StandaloneToolExecutorAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED AllTUsToolExecutorAnchorDest =
+    AllTUsToolExecutorAnchorSource;
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/lib/Tooling/Refactoring/ASTSelection.cpp b/lib/Tooling/Refactoring/ASTSelection.cpp
index 2c9c42b..7123fc3 100644
--- a/lib/Tooling/Refactoring/ASTSelection.cpp
+++ b/lib/Tooling/Refactoring/ASTSelection.cpp
@@ -115,6 +115,11 @@
       return true;
     if (auto *Opaque = dyn_cast<OpaqueValueExpr>(S))
       return TraverseOpaqueValueExpr(Opaque);
+    // Avoid selecting implicit 'this' expressions.
+    if (auto *TE = dyn_cast<CXXThisExpr>(S)) {
+      if (TE->isImplicit())
+        return true;
+    }
     // FIXME (Alex Lorenz): Improve handling for macro locations.
     SourceSelectionKind SelectionKind =
         selectionKindFor(CharSourceRange::getTokenRange(S->getSourceRange()));
@@ -249,9 +254,71 @@
   SelectedNodeWithParents &operator=(SelectedNodeWithParents &&) = default;
   SelectedASTNode::ReferenceType Node;
   llvm::SmallVector<SelectedASTNode::ReferenceType, 8> Parents;
+
+  /// Canonicalizes the given selection by selecting different related AST nodes
+  /// when it makes sense to do so.
+  void canonicalize();
 };
+
+enum SelectionCanonicalizationAction { KeepSelection, SelectParent };
+
+/// Returns the canonicalization action which should be applied to the
+/// selected statement.
+SelectionCanonicalizationAction
+getSelectionCanonizalizationAction(const Stmt *S, const Stmt *Parent) {
+  // Select the parent expression when:
+  // - The string literal in ObjC string literal is selected, e.g.:
+  //     @"test"   becomes   @"test"
+  //      ~~~~~~             ~~~~~~~
+  if (isa<StringLiteral>(S) && isa<ObjCStringLiteral>(Parent))
+    return SelectParent;
+  // The entire call should be selected when just the member expression
+  // that refers to the method or the decl ref that refers to the function
+  // is selected.
+  //    f.call(args)  becomes  f.call(args)
+  //      ~~~~                 ~~~~~~~~~~~~
+  //    func(args)  becomes  func(args)
+  //    ~~~~                 ~~~~~~~~~~
+  else if (const auto *CE = dyn_cast<CallExpr>(Parent)) {
+    if ((isa<MemberExpr>(S) || isa<DeclRefExpr>(S)) &&
+        CE->getCallee()->IgnoreImpCasts() == S)
+      return SelectParent;
+  }
+  // FIXME: Syntactic form -> Entire pseudo-object expr.
+  return KeepSelection;
+}
+
 } // end anonymous namespace
 
+void SelectedNodeWithParents::canonicalize() {
+  const Stmt *S = Node.get().Node.get<Stmt>();
+  assert(S && "non statement selection!");
+  const Stmt *Parent = Parents[Parents.size() - 1].get().Node.get<Stmt>();
+  if (!Parent)
+    return;
+
+  // Look through the implicit casts in the parents.
+  unsigned ParentIndex = 1;
+  for (; (ParentIndex + 1) <= Parents.size() && isa<ImplicitCastExpr>(Parent);
+       ++ParentIndex) {
+    const Stmt *NewParent =
+        Parents[Parents.size() - ParentIndex - 1].get().Node.get<Stmt>();
+    if (!NewParent)
+      break;
+    Parent = NewParent;
+  }
+
+  switch (getSelectionCanonizalizationAction(S, Parent)) {
+  case SelectParent:
+    Node = Parents[Parents.size() - ParentIndex];
+    for (; ParentIndex != 0; --ParentIndex)
+      Parents.pop_back();
+    break;
+  case KeepSelection:
+    break;
+  }
+}
+
 /// Finds the set of bottom-most selected AST nodes that are in the selection
 /// tree with the specified selection kind.
 ///
@@ -279,11 +346,23 @@
     llvm::SmallVectorImpl<SelectedNodeWithParents> &MatchingNodes,
     SourceSelectionKind Kind,
     llvm::SmallVectorImpl<SelectedASTNode::ReferenceType> &ParentStack) {
-  if (!hasAnyDirectChildrenWithKind(ASTSelection, Kind)) {
-    // This node is the bottom-most.
-    MatchingNodes.push_back(SelectedNodeWithParents{
-        std::cref(ASTSelection), {ParentStack.begin(), ParentStack.end()}});
-    return;
+  if (ASTSelection.Node.get<DeclStmt>()) {
+    // Select the entire decl stmt when any of its child declarations is the
+    // bottom-most.
+    for (const auto &Child : ASTSelection.Children) {
+      if (!hasAnyDirectChildrenWithKind(Child, Kind)) {
+        MatchingNodes.push_back(SelectedNodeWithParents{
+            std::cref(ASTSelection), {ParentStack.begin(), ParentStack.end()}});
+        return;
+      }
+    }
+  } else {
+    if (!hasAnyDirectChildrenWithKind(ASTSelection, Kind)) {
+      // This node is the bottom-most.
+      MatchingNodes.push_back(SelectedNodeWithParents{
+          std::cref(ASTSelection), {ParentStack.begin(), ParentStack.end()}});
+      return;
+    }
   }
   // Search in the children.
   ParentStack.push_back(std::cref(ASTSelection));
@@ -318,10 +397,14 @@
     return None;
   const Stmt *CodeRangeStmt = Selected.Node.get().Node.get<Stmt>();
   if (!isa<CompoundStmt>(CodeRangeStmt)) {
-    // FIXME (Alex L): Canonicalize.
+    Selected.canonicalize();
     return CodeRangeASTSelection(Selected.Node, Selected.Parents,
                                  /*AreChildrenSelected=*/false);
   }
+  // FIXME (Alex L): First selected SwitchCase means that first case statement.
+  // is selected actually
+  // (See https://github.com/apple/swift-clang & CompoundStmtRange).
+
   // FIXME (Alex L): Tweak selection rules for compound statements, see:
   // https://github.com/apple/swift-clang/blob/swift-4.1-branch/lib/Tooling/
   // Refactor/ASTSlice.cpp#L513
@@ -330,3 +413,41 @@
   return CodeRangeASTSelection(Selected.Node, Selected.Parents,
                                /*AreChildrenSelected=*/true);
 }
+
+static bool isFunctionLikeDeclaration(const Decl *D) {
+  // FIXME (Alex L): Test for BlockDecl.
+  return isa<FunctionDecl>(D) || isa<ObjCMethodDecl>(D);
+}
+
+bool CodeRangeASTSelection::isInFunctionLikeBodyOfCode() const {
+  bool IsPrevCompound = false;
+  // Scan through the parents (bottom-to-top) and check if the selection is
+  // contained in a compound statement that's a body of a function/method
+  // declaration.
+  for (const auto &Parent : llvm::reverse(Parents)) {
+    const DynTypedNode &Node = Parent.get().Node;
+    if (const auto *D = Node.get<Decl>()) {
+      if (isFunctionLikeDeclaration(D))
+        return IsPrevCompound;
+      // Stop the search at any type declaration to avoid returning true for
+      // expressions in type declarations in functions, like:
+      // function foo() { struct X {
+      //   int m = /*selection:*/ 1 + 2 /*selection end*/; }; };
+      if (isa<TypeDecl>(D))
+        return false;
+    }
+    IsPrevCompound = Node.get<CompoundStmt>() != nullptr;
+  }
+  return false;
+}
+
+const Decl *CodeRangeASTSelection::getFunctionLikeNearestParent() const {
+  for (const auto &Parent : llvm::reverse(Parents)) {
+    const DynTypedNode &Node = Parent.get().Node;
+    if (const auto *D = Node.get<Decl>()) {
+      if (isFunctionLikeDeclaration(D))
+        return D;
+    }
+  }
+  return nullptr;
+}
diff --git a/lib/Tooling/Refactoring/ASTSelectionRequirements.cpp b/lib/Tooling/Refactoring/ASTSelectionRequirements.cpp
new file mode 100644
index 0000000..c0232c5
--- /dev/null
+++ b/lib/Tooling/Refactoring/ASTSelectionRequirements.cpp
@@ -0,0 +1,48 @@
+//===--- ASTSelectionRequirements.cpp - Clang refactoring library ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h"
+
+using namespace clang;
+using namespace tooling;
+
+Expected<SelectedASTNode>
+ASTSelectionRequirement::evaluate(RefactoringRuleContext &Context) const {
+  // FIXME: Memoize so that selection is evaluated only once.
+  Expected<SourceRange> Range =
+      SourceRangeSelectionRequirement::evaluate(Context);
+  if (!Range)
+    return Range.takeError();
+
+  Optional<SelectedASTNode> Selection =
+      findSelectedASTNodes(Context.getASTContext(), *Range);
+  if (!Selection)
+    return Context.createDiagnosticError(
+        Range->getBegin(), diag::err_refactor_selection_invalid_ast);
+  return std::move(*Selection);
+}
+
+Expected<CodeRangeASTSelection> CodeRangeASTSelectionRequirement::evaluate(
+    RefactoringRuleContext &Context) const {
+  // FIXME: Memoize so that selection is evaluated only once.
+  Expected<SelectedASTNode> ASTSelection =
+      ASTSelectionRequirement::evaluate(Context);
+  if (!ASTSelection)
+    return ASTSelection.takeError();
+  std::unique_ptr<SelectedASTNode> StoredSelection =
+      llvm::make_unique<SelectedASTNode>(std::move(*ASTSelection));
+  Optional<CodeRangeASTSelection> CodeRange = CodeRangeASTSelection::create(
+      Context.getSelectionRange(), *StoredSelection);
+  if (!CodeRange)
+    return Context.createDiagnosticError(
+        Context.getSelectionRange().getBegin(),
+        diag::err_refactor_selection_invalid_ast);
+  Context.setASTSelection(std::move(StoredSelection));
+  return std::move(*CodeRange);
+}
diff --git a/lib/Tooling/Refactoring/CMakeLists.txt b/lib/Tooling/Refactoring/CMakeLists.txt
index 43ea1d9..402b5d3 100644
--- a/lib/Tooling/Refactoring/CMakeLists.txt
+++ b/lib/Tooling/Refactoring/CMakeLists.txt
@@ -2,7 +2,10 @@
 
 add_clang_library(clangToolingRefactor
   ASTSelection.cpp
+  ASTSelectionRequirements.cpp
   AtomicChange.cpp
+  Extract/Extract.cpp
+  Extract/SourceExtraction.cpp
   RefactoringActions.cpp
   Rename/RenamingAction.cpp
   Rename/SymbolOccurrences.cpp
@@ -17,5 +20,6 @@
   clangFormat
   clangIndex
   clangLex
+  clangRewrite
   clangToolingCore
   )
diff --git a/lib/Tooling/Refactoring/Extract/Extract.cpp b/lib/Tooling/Refactoring/Extract/Extract.cpp
new file mode 100644
index 0000000..b0847a7
--- /dev/null
+++ b/lib/Tooling/Refactoring/Extract/Extract.cpp
@@ -0,0 +1,199 @@
+//===--- Extract.cpp - Clang refactoring library --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Implements the "extract" refactoring that can pull code into
+/// new functions, methods or declare new variables.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/Extract/Extract.h"
+#include "SourceExtraction.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/Rewrite/Core/Rewriter.h"
+
+namespace clang {
+namespace tooling {
+
+namespace {
+
+/// Returns true if \c E is a simple literal or a reference expression that
+/// should not be extracted.
+bool isSimpleExpression(const Expr *E) {
+  if (!E)
+    return false;
+  switch (E->IgnoreParenCasts()->getStmtClass()) {
+  case Stmt::DeclRefExprClass:
+  case Stmt::PredefinedExprClass:
+  case Stmt::IntegerLiteralClass:
+  case Stmt::FloatingLiteralClass:
+  case Stmt::ImaginaryLiteralClass:
+  case Stmt::CharacterLiteralClass:
+  case Stmt::StringLiteralClass:
+    return true;
+  default:
+    return false;
+  }
+}
+
+SourceLocation computeFunctionExtractionLocation(const Decl *D) {
+  if (isa<CXXMethodDecl>(D)) {
+    // Code from method that is defined in class body should be extracted to a
+    // function defined just before the class.
+    while (const auto *RD = dyn_cast<CXXRecordDecl>(D->getLexicalDeclContext()))
+      D = RD;
+  }
+  return D->getLocStart();
+}
+
+} // end anonymous namespace
+
+const RefactoringDescriptor &ExtractFunction::describe() {
+  static const RefactoringDescriptor Descriptor = {
+      "extract-function",
+      "Extract Function",
+      "(WIP action; use with caution!) Extracts code into a new function",
+  };
+  return Descriptor;
+}
+
+Expected<ExtractFunction>
+ExtractFunction::initiate(RefactoringRuleContext &Context,
+                          CodeRangeASTSelection Code,
+                          Optional<std::string> DeclName) {
+  // We would like to extract code out of functions/methods/blocks.
+  // Prohibit extraction from things like global variable / field
+  // initializers and other top-level expressions.
+  if (!Code.isInFunctionLikeBodyOfCode())
+    return Context.createDiagnosticError(
+        diag::err_refactor_code_outside_of_function);
+
+  if (Code.size() == 1) {
+    // Avoid extraction of simple literals and references.
+    if (isSimpleExpression(dyn_cast<Expr>(Code[0])))
+      return Context.createDiagnosticError(
+          diag::err_refactor_extract_simple_expression);
+
+    // Property setters can't be extracted.
+    if (const auto *PRE = dyn_cast<ObjCPropertyRefExpr>(Code[0])) {
+      if (!PRE->isMessagingGetter())
+        return Context.createDiagnosticError(
+            diag::err_refactor_extract_prohibited_expression);
+    }
+  }
+
+  return ExtractFunction(std::move(Code), DeclName);
+}
+
+// FIXME: Support C++ method extraction.
+// FIXME: Support Objective-C method extraction.
+Expected<AtomicChanges>
+ExtractFunction::createSourceReplacements(RefactoringRuleContext &Context) {
+  const Decl *ParentDecl = Code.getFunctionLikeNearestParent();
+  assert(ParentDecl && "missing parent");
+
+  // Compute the source range of the code that should be extracted.
+  SourceRange ExtractedRange(Code[0]->getLocStart(),
+                             Code[Code.size() - 1]->getLocEnd());
+  // FIXME (Alex L): Add code that accounts for macro locations.
+
+  ASTContext &AST = Context.getASTContext();
+  SourceManager &SM = AST.getSourceManager();
+  const LangOptions &LangOpts = AST.getLangOpts();
+  Rewriter ExtractedCodeRewriter(SM, LangOpts);
+
+  // FIXME: Capture used variables.
+
+  // Compute the return type.
+  QualType ReturnType = AST.VoidTy;
+  // FIXME (Alex L): Account for the return statement in extracted code.
+  // FIXME (Alex L): Check for lexical expression instead.
+  bool IsExpr = Code.size() == 1 && isa<Expr>(Code[0]);
+  if (IsExpr) {
+    // FIXME (Alex L): Get a more user-friendly type if needed.
+    ReturnType = cast<Expr>(Code[0])->getType();
+  }
+
+  // FIXME: Rewrite the extracted code performing any required adjustments.
+
+  // FIXME: Capture any field if necessary (method -> function extraction).
+
+  // FIXME: Sort captured variables by name.
+
+  // FIXME: Capture 'this' / 'self' if necessary.
+
+  // FIXME: Compute the actual parameter types.
+
+  // Compute the location of the extracted declaration.
+  SourceLocation ExtractedDeclLocation =
+      computeFunctionExtractionLocation(ParentDecl);
+  // FIXME: Adjust the location to account for any preceding comments.
+
+  // FIXME: Adjust with PP awareness like in Sema to get correct 'bool'
+  // treatment.
+  PrintingPolicy PP = AST.getPrintingPolicy();
+  // FIXME: PP.UseStdFunctionForLambda = true;
+  PP.SuppressStrongLifetime = true;
+  PP.SuppressLifetimeQualifiers = true;
+  PP.SuppressUnwrittenScope = true;
+
+  ExtractionSemicolonPolicy Semicolons = ExtractionSemicolonPolicy::compute(
+      Code[Code.size() - 1], ExtractedRange, SM, LangOpts);
+  AtomicChange Change(SM, ExtractedDeclLocation);
+  // Create the replacement for the extracted declaration.
+  {
+    std::string ExtractedCode;
+    llvm::raw_string_ostream OS(ExtractedCode);
+    // FIXME: Use 'inline' in header.
+    OS << "static ";
+    ReturnType.print(OS, PP, DeclName);
+    OS << '(';
+    // FIXME: Arguments.
+    OS << ')';
+
+    // Function body.
+    OS << " {\n";
+    if (IsExpr && !ReturnType->isVoidType())
+      OS << "return ";
+    OS << ExtractedCodeRewriter.getRewrittenText(ExtractedRange);
+    if (Semicolons.isNeededInExtractedFunction())
+      OS << ';';
+    OS << "\n}\n\n";
+    auto Err = Change.insert(SM, ExtractedDeclLocation, OS.str());
+    if (Err)
+      return std::move(Err);
+  }
+
+  // Create the replacement for the call to the extracted declaration.
+  {
+    std::string ReplacedCode;
+    llvm::raw_string_ostream OS(ReplacedCode);
+
+    OS << DeclName << '(';
+    // FIXME: Forward arguments.
+    OS << ')';
+    if (Semicolons.isNeededInOriginalFunction())
+      OS << ';';
+
+    auto Err = Change.replace(
+        SM, CharSourceRange::getTokenRange(ExtractedRange), OS.str());
+    if (Err)
+      return std::move(Err);
+  }
+
+  // FIXME: Add support for assocciated symbol location to AtomicChange to mark
+  // the ranges of the name of the extracted declaration.
+  return AtomicChanges{std::move(Change)};
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/lib/Tooling/Refactoring/Extract/SourceExtraction.cpp b/lib/Tooling/Refactoring/Extract/SourceExtraction.cpp
new file mode 100644
index 0000000..7fd8cc2
--- /dev/null
+++ b/lib/Tooling/Refactoring/Extract/SourceExtraction.cpp
@@ -0,0 +1,112 @@
+//===--- SourceExtraction.cpp - Clang refactoring library -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SourceExtraction.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang;
+
+namespace {
+
+/// Returns true if the token at the given location is a semicolon.
+bool isSemicolonAtLocation(SourceLocation TokenLoc, const SourceManager &SM,
+                           const LangOptions &LangOpts) {
+  return Lexer::getSourceText(
+             CharSourceRange::getTokenRange(TokenLoc, TokenLoc), SM,
+             LangOpts) == ";";
+}
+
+/// Returns true if there should be a semicolon after the given statement.
+bool isSemicolonRequiredAfter(const Stmt *S) {
+  if (isa<CompoundStmt>(S))
+    return false;
+  if (const auto *If = dyn_cast<IfStmt>(S))
+    return isSemicolonRequiredAfter(If->getElse() ? If->getElse()
+                                                  : If->getThen());
+  if (const auto *While = dyn_cast<WhileStmt>(S))
+    return isSemicolonRequiredAfter(While->getBody());
+  if (const auto *For = dyn_cast<ForStmt>(S))
+    return isSemicolonRequiredAfter(For->getBody());
+  if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(S))
+    return isSemicolonRequiredAfter(CXXFor->getBody());
+  if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(S))
+    return isSemicolonRequiredAfter(ObjCFor->getBody());
+  switch (S->getStmtClass()) {
+  case Stmt::SwitchStmtClass:
+  case Stmt::CXXTryStmtClass:
+  case Stmt::ObjCAtSynchronizedStmtClass:
+  case Stmt::ObjCAutoreleasePoolStmtClass:
+  case Stmt::ObjCAtTryStmtClass:
+    return false;
+  default:
+    return true;
+  }
+}
+
+/// Returns true if the two source locations are on the same line.
+bool areOnSameLine(SourceLocation Loc1, SourceLocation Loc2,
+                   const SourceManager &SM) {
+  return !Loc1.isMacroID() && !Loc2.isMacroID() &&
+         SM.getSpellingLineNumber(Loc1) == SM.getSpellingLineNumber(Loc2);
+}
+
+} // end anonymous namespace
+
+namespace clang {
+namespace tooling {
+
+ExtractionSemicolonPolicy
+ExtractionSemicolonPolicy::compute(const Stmt *S, SourceRange &ExtractedRange,
+                                   const SourceManager &SM,
+                                   const LangOptions &LangOpts) {
+  auto neededInExtractedFunction = []() {
+    return ExtractionSemicolonPolicy(true, false);
+  };
+  auto neededInOriginalFunction = []() {
+    return ExtractionSemicolonPolicy(false, true);
+  };
+
+  /// The extracted expression should be terminated with a ';'. The call to
+  /// the extracted function will replace this expression, so it won't need
+  /// a terminating ';'.
+  if (isa<Expr>(S))
+    return neededInExtractedFunction();
+
+  /// Some statements don't need to be terminated with ';'. The call to the
+  /// extracted function will be a standalone statement, so it should be
+  /// terminated with a ';'.
+  bool NeedsSemi = isSemicolonRequiredAfter(S);
+  if (!NeedsSemi)
+    return neededInOriginalFunction();
+
+  /// Some statements might end at ';'. The extraction will move that ';', so
+  /// the call to the extracted function should be terminated with a ';'.
+  SourceLocation End = ExtractedRange.getEnd();
+  if (isSemicolonAtLocation(End, SM, LangOpts))
+    return neededInOriginalFunction();
+
+  /// Other statements should generally have a trailing ';'. We can try to find
+  /// it and move it together it with the extracted code.
+  Optional<Token> NextToken = Lexer::findNextToken(End, SM, LangOpts);
+  if (NextToken && NextToken->is(tok::semi) &&
+      areOnSameLine(NextToken->getLocation(), End, SM)) {
+    ExtractedRange.setEnd(NextToken->getLocation());
+    return neededInOriginalFunction();
+  }
+
+  /// Otherwise insert semicolons in both places.
+  return ExtractionSemicolonPolicy(true, true);
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/lib/Tooling/Refactoring/Extract/SourceExtraction.h b/lib/Tooling/Refactoring/Extract/SourceExtraction.h
new file mode 100644
index 0000000..4b4bd8b
--- /dev/null
+++ b/lib/Tooling/Refactoring/Extract/SourceExtraction.h
@@ -0,0 +1,52 @@
+//===--- SourceExtraction.cpp - Clang refactoring library -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H
+#define LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H
+
+#include "clang/Basic/LLVM.h"
+
+namespace clang {
+
+class LangOptions;
+class SourceManager;
+class SourceRange;
+class Stmt;
+
+namespace tooling {
+
+/// Determines which semicolons should be inserted during extraction.
+class ExtractionSemicolonPolicy {
+public:
+  bool isNeededInExtractedFunction() const {
+    return IsNeededInExtractedFunction;
+  }
+
+  bool isNeededInOriginalFunction() const { return IsNeededInOriginalFunction; }
+
+  /// Returns the semicolon insertion policy that is needed for extraction of
+  /// the given statement from the given source range.
+  static ExtractionSemicolonPolicy compute(const Stmt *S,
+                                           SourceRange &ExtractedRange,
+                                           const SourceManager &SM,
+                                           const LangOptions &LangOpts);
+
+private:
+  ExtractionSemicolonPolicy(bool IsNeededInExtractedFunction,
+                            bool IsNeededInOriginalFunction)
+      : IsNeededInExtractedFunction(IsNeededInExtractedFunction),
+        IsNeededInOriginalFunction(IsNeededInOriginalFunction) {}
+  bool IsNeededInExtractedFunction;
+  bool IsNeededInOriginalFunction;
+};
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H
diff --git a/lib/Tooling/Refactoring/RefactoringActions.cpp b/lib/Tooling/Refactoring/RefactoringActions.cpp
index 25f055b..37a1639 100644
--- a/lib/Tooling/Refactoring/RefactoringActions.cpp
+++ b/lib/Tooling/Refactoring/RefactoringActions.cpp
@@ -7,21 +7,100 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Tooling/Refactoring/Extract/Extract.h"
 #include "clang/Tooling/Refactoring/RefactoringAction.h"
+#include "clang/Tooling/Refactoring/RefactoringOptions.h"
+#include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
 
 namespace clang {
 namespace tooling {
 
-// Forward declare the individual create*Action functions.
-#define REFACTORING_ACTION(Name)                                               \
-  std::unique_ptr<RefactoringAction> create##Name##Action();
-#include "clang/Tooling/Refactoring/RefactoringActionRegistry.def"
+namespace {
+
+class DeclNameOption final : public OptionalRefactoringOption<std::string> {
+public:
+  StringRef getName() const { return "name"; }
+  StringRef getDescription() const {
+    return "Name of the extracted declaration";
+  }
+};
+
+// FIXME: Rewrite the Actions to avoid duplication of descriptions/names with
+// rules.
+class ExtractRefactoring final : public RefactoringAction {
+public:
+  StringRef getCommand() const override { return "extract"; }
+
+  StringRef getDescription() const override {
+    return "(WIP action; use with caution!) Extracts code into a new function";
+  }
+
+  /// Returns a set of refactoring actions rules that are defined by this
+  /// action.
+  RefactoringActionRules createActionRules() const override {
+    RefactoringActionRules Rules;
+    Rules.push_back(createRefactoringActionRule<ExtractFunction>(
+        CodeRangeASTSelectionRequirement(),
+        OptionRequirement<DeclNameOption>()));
+    return Rules;
+  }
+};
+
+class OldQualifiedNameOption : public RequiredRefactoringOption<std::string> {
+public:
+  StringRef getName() const override { return "old-qualified-name"; }
+  StringRef getDescription() const override {
+    return "The old qualified name to be renamed";
+  }
+};
+
+class NewQualifiedNameOption : public RequiredRefactoringOption<std::string> {
+public:
+  StringRef getName() const override { return "new-qualified-name"; }
+  StringRef getDescription() const override {
+    return "The new qualified name to change the symbol to";
+  }
+};
+
+class NewNameOption : public RequiredRefactoringOption<std::string> {
+public:
+  StringRef getName() const override { return "new-name"; }
+  StringRef getDescription() const override {
+    return "The new name to change the symbol to";
+  }
+};
+
+// FIXME: Rewrite the Actions to avoid duplication of descriptions/names with
+// rules.
+class LocalRename final : public RefactoringAction {
+public:
+  StringRef getCommand() const override { return "local-rename"; }
+
+  StringRef getDescription() const override {
+    return "Finds and renames symbols in code with no indexer support";
+  }
+
+  /// Returns a set of refactoring actions rules that are defined by this
+  /// action.
+  RefactoringActionRules createActionRules() const override {
+    RefactoringActionRules Rules;
+    Rules.push_back(createRefactoringActionRule<RenameOccurrences>(
+        SourceRangeSelectionRequirement(), OptionRequirement<NewNameOption>()));
+    // FIXME: Use NewNameOption.
+    Rules.push_back(createRefactoringActionRule<QualifiedRenameRule>(
+        OptionRequirement<OldQualifiedNameOption>(),
+        OptionRequirement<NewQualifiedNameOption>()));
+    return Rules;
+  }
+};
+
+} // end anonymous namespace
 
 std::vector<std::unique_ptr<RefactoringAction>> createRefactoringActions() {
   std::vector<std::unique_ptr<RefactoringAction>> Actions;
 
-#define REFACTORING_ACTION(Name) Actions.push_back(create##Name##Action());
-#include "clang/Tooling/Refactoring/RefactoringActionRegistry.def"
+  Actions.push_back(llvm::make_unique<LocalRename>());
+  Actions.push_back(llvm::make_unique<ExtractRefactoring>());
 
   return Actions;
 }
diff --git a/lib/Tooling/Refactoring/Rename/RenamingAction.cpp b/lib/Tooling/Refactoring/Rename/RenamingAction.cpp
index 28912c3..c8ed9dd 100644
--- a/lib/Tooling/Refactoring/Rename/RenamingAction.cpp
+++ b/lib/Tooling/Refactoring/Rename/RenamingAction.cpp
@@ -31,6 +31,8 @@
 #include "clang/Tooling/Refactoring/Rename/USRLocFinder.h"
 #include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
 #include <string>
 #include <vector>
 
@@ -41,83 +43,101 @@
 
 namespace {
 
-class SymbolSelectionRequirement : public SourceRangeSelectionRequirement {
-public:
-  Expected<const NamedDecl *> evaluate(RefactoringRuleContext &Context) const {
-    Expected<SourceRange> Selection =
-        SourceRangeSelectionRequirement::evaluate(Context);
-    if (!Selection)
-      return Selection.takeError();
-    const NamedDecl *ND =
-        getNamedDeclAt(Context.getASTContext(), Selection->getBegin());
-    if (!ND)
-      return Context.createDiagnosticError(
-          Selection->getBegin(), diag::err_refactor_selection_no_symbol);
-    return getCanonicalSymbolDeclaration(ND);
-  }
-};
-
-class OccurrenceFinder final : public FindSymbolOccurrencesRefactoringRule {
-public:
-  OccurrenceFinder(const NamedDecl *ND) : ND(ND) {}
-
-  Expected<SymbolOccurrences>
-  findSymbolOccurrences(RefactoringRuleContext &Context) override {
-    std::vector<std::string> USRs =
-        getUSRsForDeclaration(ND, Context.getASTContext());
-    std::string PrevName = ND->getNameAsString();
-    return getOccurrencesOfUSRs(
-        USRs, PrevName, Context.getASTContext().getTranslationUnitDecl());
-  }
-
-private:
-  const NamedDecl *ND;
-};
-
-class RenameOccurrences final : public SourceChangeRefactoringRule {
-public:
-  RenameOccurrences(const NamedDecl *ND, std::string NewName)
-      : Finder(ND), NewName(std::move(NewName)) {}
-
-  Expected<AtomicChanges>
-  createSourceReplacements(RefactoringRuleContext &Context) override {
-    Expected<SymbolOccurrences> Occurrences =
-        Finder.findSymbolOccurrences(Context);
-    if (!Occurrences)
-      return Occurrences.takeError();
-    // FIXME: Verify that the new name is valid.
-    SymbolName Name(NewName);
-    return createRenameReplacements(
-        *Occurrences, Context.getASTContext().getSourceManager(), Name);
-  }
-
-private:
-  OccurrenceFinder Finder;
-  std::string NewName;
-};
-
-class LocalRename final : public RefactoringAction {
-public:
-  StringRef getCommand() const override { return "local-rename"; }
-
-  StringRef getDescription() const override {
-    return "Finds and renames symbols in code with no indexer support";
-  }
-
-  /// Returns a set of refactoring actions rules that are defined by this
-  /// action.
-  RefactoringActionRules createActionRules() const override {
-    RefactoringActionRules Rules;
-    Rules.push_back(createRefactoringActionRule<RenameOccurrences>(
-        SymbolSelectionRequirement(), OptionRequirement<NewNameOption>()));
-    return Rules;
-  }
-};
+Expected<SymbolOccurrences>
+findSymbolOccurrences(const NamedDecl *ND, RefactoringRuleContext &Context) {
+  std::vector<std::string> USRs =
+      getUSRsForDeclaration(ND, Context.getASTContext());
+  std::string PrevName = ND->getNameAsString();
+  return getOccurrencesOfUSRs(USRs, PrevName,
+                              Context.getASTContext().getTranslationUnitDecl());
+}
 
 } // end anonymous namespace
 
-std::unique_ptr<RefactoringAction> createLocalRenameAction() {
-  return llvm::make_unique<LocalRename>();
+const RefactoringDescriptor &RenameOccurrences::describe() {
+  static const RefactoringDescriptor Descriptor = {
+      "local-rename",
+      "Rename",
+      "Finds and renames symbols in code with no indexer support",
+  };
+  return Descriptor;
+}
+
+Expected<RenameOccurrences>
+RenameOccurrences::initiate(RefactoringRuleContext &Context,
+                            SourceRange SelectionRange, std::string NewName) {
+  const NamedDecl *ND =
+      getNamedDeclAt(Context.getASTContext(), SelectionRange.getBegin());
+  if (!ND)
+    return Context.createDiagnosticError(
+        SelectionRange.getBegin(), diag::err_refactor_selection_no_symbol);
+  return RenameOccurrences(getCanonicalSymbolDeclaration(ND),
+                           std::move(NewName));
+}
+
+Expected<AtomicChanges>
+RenameOccurrences::createSourceReplacements(RefactoringRuleContext &Context) {
+  Expected<SymbolOccurrences> Occurrences = findSymbolOccurrences(ND, Context);
+  if (!Occurrences)
+    return Occurrences.takeError();
+  // FIXME: Verify that the new name is valid.
+  SymbolName Name(NewName);
+  return createRenameReplacements(
+      *Occurrences, Context.getASTContext().getSourceManager(), Name);
+}
+
+Expected<QualifiedRenameRule>
+QualifiedRenameRule::initiate(RefactoringRuleContext &Context,
+                              std::string OldQualifiedName,
+                              std::string NewQualifiedName) {
+  const NamedDecl *ND =
+      getNamedDeclFor(Context.getASTContext(), OldQualifiedName);
+  if (!ND)
+    return llvm::make_error<llvm::StringError>("Could not find symbol " +
+                                                   OldQualifiedName,
+                                               llvm::errc::invalid_argument);
+  return QualifiedRenameRule(ND, std::move(NewQualifiedName));
+}
+
+const RefactoringDescriptor &QualifiedRenameRule::describe() {
+  static const RefactoringDescriptor Descriptor = {
+      /*Name=*/"local-qualified-rename",
+      /*Title=*/"Qualified Rename",
+      /*Description=*/
+      R"(Finds and renames qualified symbols in code within a translation unit.
+It is used to move/rename a symbol to a new namespace/name:
+  * Supported symbols: classes, class members, functions, enums, and type alias.
+  * Renames all symbol occurrences from the old qualified name to the new
+    qualified name. All symbol references will be correctly qualified; For
+    symbol definitions, only name will be changed.
+For example, rename "A::Foo" to "B::Bar":
+  Old code:
+    namespace foo {
+    class A {};
+    }
+
+    namespace bar {
+    void f(foo::A a) {}
+    }
+
+  New code after rename:
+    namespace foo {
+    class B {};
+    }
+
+    namespace bar {
+    void f(B b) {}
+    })"
+  };
+  return Descriptor;
+}
+
+Expected<AtomicChanges>
+QualifiedRenameRule::createSourceReplacements(RefactoringRuleContext &Context) {
+  auto USRs = getUSRsForDeclaration(ND, Context.getASTContext());
+  assert(!USRs.empty());
+  return tooling::createRenameAtomicChanges(
+      USRs, NewQualifiedName, Context.getASTContext().getTranslationUnitDecl());
 }
 
 Expected<std::vector<AtomicChange>>
diff --git a/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp b/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp
index 0c746bb..40b70d8 100644
--- a/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp
+++ b/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp
@@ -73,6 +73,7 @@
         if (checkIfOverriddenFunctionAscends(OverriddenMethod))
           USRSet.insert(getUSRForDecl(OverriddenMethod));
       }
+      addUSRsOfInstantiatedMethods(MethodDecl);
     } else if (const auto *RecordDecl = dyn_cast<CXXRecordDecl>(FoundDecl)) {
       handleCXXRecordDecl(RecordDecl);
     } else if (const auto *TemplateDecl =
@@ -84,9 +85,13 @@
     return std::vector<std::string>(USRSet.begin(), USRSet.end());
   }
 
+  bool shouldVisitTemplateInstantiations() const { return true; }
+
   bool VisitCXXMethodDecl(const CXXMethodDecl *MethodDecl) {
     if (MethodDecl->isVirtual())
       OverriddenMethods.push_back(MethodDecl);
+    if (MethodDecl->getInstantiatedFromMemberFunction())
+      InstantiatedMethods.push_back(MethodDecl);
     return true;
   }
 
@@ -137,6 +142,20 @@
       addUSRsOfOverridenFunctions(OverriddenMethod);
   }
 
+  void addUSRsOfInstantiatedMethods(const CXXMethodDecl *MethodDecl) {
+    // For renaming a class template method, all references of the instantiated
+    // member methods should be renamed too, so add USRs of the instantiated
+    // methods to the USR set.
+    USRSet.insert(getUSRForDecl(MethodDecl));
+    if (const auto *FT = MethodDecl->getInstantiatedFromMemberFunction())
+      USRSet.insert(getUSRForDecl(FT));
+    for (const auto *Method : InstantiatedMethods) {
+      if (USRSet.find(getUSRForDecl(
+              Method->getInstantiatedFromMemberFunction())) != USRSet.end())
+        USRSet.insert(getUSRForDecl(Method));
+    }
+  }
+
   bool checkIfOverriddenFunctionAscends(const CXXMethodDecl *MethodDecl) {
     for (const auto &OverriddenMethod : MethodDecl->overridden_methods()) {
       if (USRSet.find(getUSRForDecl(OverriddenMethod)) != USRSet.end())
@@ -150,6 +169,7 @@
   ASTContext &Context;
   std::set<std::string> USRSet;
   std::vector<const CXXMethodDecl *> OverriddenMethods;
+  std::vector<const CXXMethodDecl *> InstantiatedMethods;
   std::vector<const ClassTemplatePartialSpecializationDecl *> PartialSpecs;
 };
 } // namespace
diff --git a/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
index 265e3c2..c77304a 100644
--- a/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
+++ b/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp
@@ -212,6 +212,41 @@
     return true;
   }
 
+  bool VisitMemberExpr(const MemberExpr *Expr) {
+    const NamedDecl *Decl = Expr->getFoundDecl();
+    auto StartLoc = Expr->getMemberLoc();
+    auto EndLoc = Expr->getMemberLoc();
+    if (isInUSRSet(Decl)) {
+      RenameInfos.push_back({StartLoc, EndLoc,
+                            /*FromDecl=*/nullptr,
+                            /*Context=*/nullptr,
+                            /*Specifier=*/nullptr,
+                            /*IgnorePrefixQualifiers=*/true});
+    }
+    return true;
+  }
+
+  bool VisitCXXConstructorDecl(const CXXConstructorDecl *CD) {
+    // Fix the constructor initializer when renaming class members.
+    for (const auto *Initializer : CD->inits()) {
+      // Ignore implicit initializers.
+      if (!Initializer->isWritten())
+        continue;
+
+      if (const FieldDecl *FD = Initializer->getMember()) {
+        if (isInUSRSet(FD)) {
+          auto Loc = Initializer->getSourceLocation();
+          RenameInfos.push_back({Loc, Loc,
+                                 /*FromDecl=*/nullptr,
+                                 /*Context=*/nullptr,
+                                 /*Specifier=*/nullptr,
+                                 /*IgnorePrefixQualifiers=*/true});
+        }
+      }
+    }
+    return true;
+  }
+
   bool VisitDeclRefExpr(const DeclRefExpr *Expr) {
     const NamedDecl *Decl = Expr->getFoundDecl();
     // Get the underlying declaration of the shadow declaration introduced by a
@@ -221,7 +256,26 @@
     }
 
     auto StartLoc = Expr->getLocStart();
-    auto EndLoc = Expr->getLocEnd();
+    // For template function call expressions like `foo<int>()`, we want to
+    // restrict the end of location to just before the `<` character.
+    SourceLocation EndLoc = Expr->hasExplicitTemplateArgs()
+                                ? Expr->getLAngleLoc().getLocWithOffset(-1)
+                                : Expr->getLocEnd();
+
+    if (const auto *MD = llvm::dyn_cast<CXXMethodDecl>(Decl)) {
+      if (isInUSRSet(MD)) {
+        // Handle renaming static template class methods, we only rename the
+        // name without prefix qualifiers and restrict the source range to the
+        // name.
+        RenameInfos.push_back({EndLoc, EndLoc,
+                               /*FromDecl=*/nullptr,
+                               /*Context=*/nullptr,
+                               /*Specifier=*/nullptr,
+                               /*IgnorePrefixQualifiers=*/true});
+        return true;
+      }
+    }
+
     // In case of renaming an enum declaration, we have to explicitly handle
     // unscoped enum constants referenced in expressions (e.g.
     // "auto r = ns1::ns2::Green" where Green is an enum constant of an unscoped
diff --git a/lib/Tooling/StandaloneExecution.cpp b/lib/Tooling/StandaloneExecution.cpp
new file mode 100644
index 0000000..eea8e39
--- /dev/null
+++ b/lib/Tooling/StandaloneExecution.cpp
@@ -0,0 +1,91 @@
+//===- lib/Tooling/Execution.cpp - Standalone clang action execution. -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/StandaloneExecution.h"
+#include "clang/Tooling/ToolExecutorPluginRegistry.h"
+
+namespace clang {
+namespace tooling {
+
+static llvm::Error make_string_error(const llvm::Twine &Message) {
+  return llvm::make_error<llvm::StringError>(Message,
+                                             llvm::inconvertibleErrorCode());
+}
+
+const char *StandaloneToolExecutor::ExecutorName = "StandaloneToolExecutor";
+
+static ArgumentsAdjuster getDefaultArgumentsAdjusters() {
+  return combineAdjusters(
+      getClangStripOutputAdjuster(),
+      combineAdjusters(getClangSyntaxOnlyAdjuster(),
+                       getClangStripDependencyFileAdjuster()));
+}
+
+StandaloneToolExecutor::StandaloneToolExecutor(
+    const CompilationDatabase &Compilations,
+    llvm::ArrayRef<std::string> SourcePaths,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : Tool(Compilations, SourcePaths), Context(&Results),
+      ArgsAdjuster(getDefaultArgumentsAdjusters()) {
+  // Use self-defined default argument adjusters instead of the default
+  // adjusters that come with the old `ClangTool`.
+  Tool.clearArgumentsAdjusters();
+}
+
+StandaloneToolExecutor::StandaloneToolExecutor(
+    CommonOptionsParser Options,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : OptionsParser(std::move(Options)),
+      Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList(),
+           PCHContainerOps),
+      Context(&Results), ArgsAdjuster(getDefaultArgumentsAdjusters()) {
+  Tool.clearArgumentsAdjusters();
+}
+
+llvm::Error StandaloneToolExecutor::execute(
+    llvm::ArrayRef<
+        std::pair<std::unique_ptr<FrontendActionFactory>, ArgumentsAdjuster>>
+        Actions) {
+  if (Actions.empty())
+    return make_string_error("No action to execute.");
+
+  if (Actions.size() != 1)
+    return make_string_error(
+        "Only support executing exactly 1 action at this point.");
+
+  auto &Action = Actions.front();
+  Tool.appendArgumentsAdjuster(Action.second);
+  Tool.appendArgumentsAdjuster(ArgsAdjuster);
+  if (Tool.run(Action.first.get()))
+    return make_string_error("Failed to run action.");
+
+  return llvm::Error::success();
+}
+
+class StandaloneToolExecutorPlugin : public ToolExecutorPlugin {
+public:
+  llvm::Expected<std::unique_ptr<ToolExecutor>>
+  create(CommonOptionsParser &OptionsParser) override {
+    if (OptionsParser.getSourcePathList().empty())
+      return make_string_error(
+          "[StandaloneToolExecutorPlugin] No positional argument found.");
+    return llvm::make_unique<StandaloneToolExecutor>(std::move(OptionsParser));
+  }
+};
+
+static ToolExecutorPluginRegistry::Add<StandaloneToolExecutorPlugin>
+    X("standalone", "Runs FrontendActions on a set of files provided "
+                    "via positional arguments.");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the plugin.
+volatile int StandaloneToolExecutorAnchorSource = 0;
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/lib/Tooling/Tooling.cpp b/lib/Tooling/Tooling.cpp
index df9d7df..7ae2950 100644
--- a/lib/Tooling/Tooling.cpp
+++ b/lib/Tooling/Tooling.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
@@ -327,10 +328,11 @@
 
 ClangTool::ClangTool(const CompilationDatabase &Compilations,
                      ArrayRef<std::string> SourcePaths,
-                     std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     IntrusiveRefCntPtr<vfs::FileSystem> BaseFS)
     : Compilations(Compilations), SourcePaths(SourcePaths),
       PCHContainerOps(std::move(PCHContainerOps)),
-      OverlayFileSystem(new vfs::OverlayFileSystem(vfs::getRealFileSystem())),
+      OverlayFileSystem(new vfs::OverlayFileSystem(BaseFS)),
       InMemoryFileSystem(new vfs::InMemoryFileSystem),
       Files(new FileManager(FileSystemOptions(), OverlayFileSystem)),
       DiagConsumer(nullptr) {
@@ -347,11 +349,7 @@
 }
 
 void ClangTool::appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) {
-  if (ArgsAdjuster)
-    ArgsAdjuster =
-        combineAdjusters(std::move(ArgsAdjuster), std::move(Adjuster));
-  else
-    ArgsAdjuster = std::move(Adjuster);
+  ArgsAdjuster = combineAdjusters(std::move(ArgsAdjuster), std::move(Adjuster));
 }
 
 void ClangTool::clearArgumentsAdjusters() {
@@ -390,6 +388,7 @@
             llvm::MemoryBuffer::getMemBuffer(MappedFile.second));
 
   bool ProcessingFailed = false;
+  bool FileSkipped = false;
   for (const auto &SourcePath : SourcePaths) {
     std::string File(getAbsolutePath(SourcePath));
 
@@ -403,12 +402,8 @@
     std::vector<CompileCommand> CompileCommandsForFile =
         Compilations.getCompileCommands(File);
     if (CompileCommandsForFile.empty()) {
-      // FIXME: There are two use cases here: doing a fuzzy
-      // "find . -name '*.cc' |xargs tool" match, where as a user I don't care
-      // about the .cc files that were not found, and the use case where I
-      // specify all files I want to run over explicitly, where this should
-      // be an error. We'll want to add an option for this.
       llvm::errs() << "Skipping " << File << ". Compile command not found.\n";
+      FileSkipped = true;
       continue;
     }
     for (CompileCommand &CompileCommand : CompileCommandsForFile) {
@@ -468,7 +463,7 @@
                                  Twine(InitialDirectory) + "\n!");
     }
   }
-  return ProcessingFailed ? 1 : 0;
+  return ProcessingFailed ? 1 : (FileSkipped ? 2 : 0);
 }
 
 namespace {
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 2190f27..af8ab16 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -101,12 +101,9 @@
   install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)"
     COMPONENT compiler-rt)
 
-  add_custom_target(install-compiler-rt
-                    DEPENDS compiler-rt
-                    COMMAND "${CMAKE_COMMAND}"
-                             -DCMAKE_INSTALL_COMPONENT=compiler-rt
-                             -P "${CMAKE_BINARY_DIR}/cmake_install.cmake"
-                    USES_TERMINAL)
+  add_llvm_install_targets(install-compiler-rt
+                           DEPENDS compiler-rt
+                           COMPONENT compiler-rt)
 
   # Add top-level targets that build specific compiler-rt runtimes.
   set(COMPILER_RT_RUNTIMES fuzzer asan builtins dfsan lsan msan profile tsan ubsan ubsan-minimal)
diff --git a/test/ARCMT/checking.m b/test/ARCMT/checking.m
index 0ce894c..182260c 100644
--- a/test/ARCMT/checking.m
+++ b/test/ARCMT/checking.m
@@ -116,7 +116,7 @@
 }
 
 struct S {
-  A* a; // expected-error {{ARC forbids Objective-C objects in struct}}
+  A* a;
 };
 
 @interface B
diff --git a/test/ARCMT/releases-driver.m b/test/ARCMT/releases-driver.m
index 7b1d2fb..3dd546f 100644
--- a/test/ARCMT/releases-driver.m
+++ b/test/ARCMT/releases-driver.m
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fblocks -fsyntax-only -fobjc-arc -x objective-c %s.result
-// RUN: cp %s %t
+// RUN: cat %s > %t
 // RUN: %clang_cc1 -arcmt-modify -triple x86_64-apple-macosx10.6 -x objective-c %t
 // RUN: diff %t %s.result
 // RUN: rm %t
diff --git a/test/ARCMT/releases-driver.m.result b/test/ARCMT/releases-driver.m.result
index 4c864bd..e9aa2d5 100644
--- a/test/ARCMT/releases-driver.m.result
+++ b/test/ARCMT/releases-driver.m.result
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fblocks -fsyntax-only -fobjc-arc -x objective-c %s.result
-// RUN: cp %s %t
+// RUN: cat %s > %t
 // RUN: %clang_cc1 -arcmt-modify -triple x86_64-apple-macosx10.6 -x objective-c %t
 // RUN: diff %t %s.result
 // RUN: rm %t
diff --git a/test/ARCMT/with-arc-mode-modify.m b/test/ARCMT/with-arc-mode-modify.m
index fbbd630..bc4662d 100644
--- a/test/ARCMT/with-arc-mode-modify.m
+++ b/test/ARCMT/with-arc-mode-modify.m
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -fobjc-arc -x objective-c %s.result
-// RUN: cp %s %t
+// RUN: cat %s > %t
 // RUN: %clang_cc1 -arcmt-modify -fsyntax-only -fobjc-arc -x objective-c %t
 // RUN: diff %t %s.result
 // RUN: rm %t
diff --git a/test/ARCMT/with-arc-mode-modify.m.result b/test/ARCMT/with-arc-mode-modify.m.result
index 631f276..b847c13 100644
--- a/test/ARCMT/with-arc-mode-modify.m.result
+++ b/test/ARCMT/with-arc-mode-modify.m.result
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -fobjc-arc -x objective-c %s.result
-// RUN: cp %s %t
+// RUN: cat %s > %t
 // RUN: %clang_cc1 -arcmt-modify -fsyntax-only -fobjc-arc -x objective-c %t
 // RUN: diff %t %s.result
 // RUN: rm %t
diff --git a/test/ASTMerge/class-template/Inputs/class-template1.cpp b/test/ASTMerge/class-template/Inputs/class-template1.cpp
index 440b5ab..fb5b229 100644
--- a/test/ASTMerge/class-template/Inputs/class-template1.cpp
+++ b/test/ASTMerge/class-template/Inputs/class-template1.cpp
@@ -1,5 +1,7 @@
 template<typename T>
-struct X0;
+struct X0 {
+  T getValue(T arg) { return arg; }
+};
 
 template<int I>
 struct X1;
@@ -26,6 +28,7 @@
 template<>
 struct X0<char> {
   int member;
+  char getValue(char ch) { return static_cast<char>(member); }
 };
 
 template<>
diff --git a/test/ASTMerge/class-template/Inputs/class-template2.cpp b/test/ASTMerge/class-template/Inputs/class-template2.cpp
index 6300301..b5d0add 100644
--- a/test/ASTMerge/class-template/Inputs/class-template2.cpp
+++ b/test/ASTMerge/class-template/Inputs/class-template2.cpp
@@ -1,5 +1,7 @@
 template<class T>
-struct X0;
+struct X0 {
+  T getValue(T arg);
+};
 
 template<int I>
 struct X1;
diff --git a/test/ASTMerge/class-template/test.cpp b/test/ASTMerge/class-template/test.cpp
index 0ab5443..7e25c5d 100644
--- a/test/ASTMerge/class-template/test.cpp
+++ b/test/ASTMerge/class-template/test.cpp
@@ -1,24 +1,28 @@
-// RUN: %clang_cc1 -emit-pch -o %t.1.ast %S/Inputs/class-template1.cpp
-// RUN: %clang_cc1 -emit-pch -o %t.2.ast %S/Inputs/class-template2.cpp
-// RUN: not %clang_cc1 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -std=c++1z -emit-pch -o %t.1.ast %S/Inputs/class-template1.cpp
+// RUN: %clang_cc1 -std=c++1z -emit-pch -o %t.2.ast %S/Inputs/class-template2.cpp
+// RUN: not %clang_cc1 -std=c++1z  -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
 
-// CHECK: class-template1.cpp:7:14: error: non-type template parameter declared with incompatible types in different translation units ('int' vs. 'long')
-// CHECK: class-template2.cpp:7:15: note: declared here with type 'long'
+static_assert(sizeof(X0<char>().getValue(1)) == sizeof(char));
+static_assert(sizeof(X0<int>().getValue(1)) == sizeof(int));
 
-// CHECK: class-template1.cpp:10:14: error: template parameter has different kinds in different translation units
-// CHECK: class-template2.cpp:10:10: note: template parameter declared here
+// CHECK: class-template1.cpp:9:14: error: non-type template parameter declared with incompatible types in different translation units ('int' vs. 'long')
+// CHECK: class-template2.cpp:9:15: note: declared here with type 'long'
 
-// CHECK: class-template1.cpp:16:23: error: non-type template parameter declared with incompatible types in different translation units ('long' vs. 'int')
-// CHECK: class-template2.cpp:16:23: note: declared here with type 'int'
+// CHECK: class-template1.cpp:12:14: error: template parameter has different kinds in different translation units
+// CHECK: class-template2.cpp:12:10: note: template parameter declared here
 
-// CHECK: class-template1.cpp:19:10: error: template parameter has different kinds in different translation units
-// CHECK: class-template2.cpp:19:10: note: template parameter declared here
+// CHECK: class-template1.cpp:18:23: error: non-type template parameter declared with incompatible types in different translation units ('long' vs. 'int')
+// CHECK: class-template2.cpp:18:23: note: declared here with type 'int'
 
-// CHECK: class-template2.cpp:25:20: error: external variable 'x0r' declared with incompatible types in different translation units ('X0<double> *' vs. 'X0<float> *')
-// CHECK: class-template1.cpp:24:19: note: declared here with type 'X0<float> *'
+// CHECK: class-template1.cpp:21:10: error: template parameter has different kinds in different translation units
+// CHECK: class-template2.cpp:21:10: note: template parameter declared here
 
-// CHECK: class-template1.cpp:32:8: warning: type 'X0<wchar_t>' has incompatible definitions in different translation units
-// CHECK: class-template1.cpp:33:7: note: field 'member' has type 'int' here
-// CHECK: class-template2.cpp:34:9: note: field 'member' has type 'float' here
+// CHECK: class-template2.cpp:27:20: error: external variable 'x0r' declared with incompatible types in different translation units ('X0<double> *' vs. 'X0<float> *')
+// CHECK: class-template1.cpp:26:19: note: declared here with type 'X0<float> *'
+
+// CHECK: class-template1.cpp:35:8: warning: type 'X0<wchar_t>' has incompatible definitions in different translation units
+// CHECK: class-template1.cpp:36:7: note: field 'member' has type 'int' here
+// CHECK: class-template2.cpp:36:9: note: field 'member' has type 'float' here
 
 // CHECK: 1 warning and 5 errors generated.
+// CHECK-NOT: static_assert
diff --git a/test/ASTMerge/exprs-cpp/Inputs/exprs3.cpp b/test/ASTMerge/exprs-cpp/Inputs/exprs3.cpp
index 2a33c35..6fdc33f 100644
--- a/test/ASTMerge/exprs-cpp/Inputs/exprs3.cpp
+++ b/test/ASTMerge/exprs-cpp/Inputs/exprs3.cpp
@@ -122,3 +122,20 @@
 const bool ExpressionTrait = __is_lvalue_expr(1);
 const unsigned ArrayRank = __array_rank(int[10][20]);
 const unsigned ArrayExtent = __array_extent(int[10][20], 1);
+
+constexpr int testLambdaAdd(int toAdd) {
+  const int Captured1 = 1, Captured2 = 2;
+  constexpr auto LambdaAdd = [Captured1, Captured2](int k) -> int {
+    return Captured1 + Captured2 + k;
+  };
+  return LambdaAdd(toAdd);
+}
+
+template<typename T>
+struct TestLambdaTemplate {
+  T i, j;
+  TestLambdaTemplate(T i, const T &j) : i(i), j(j) {}
+  T testLambda(T k) {
+    return [this](T k) -> decltype(auto) { return i + j + k; }(k);
+  }
+};
diff --git a/test/ASTMerge/exprs-cpp/test.cpp b/test/ASTMerge/exprs-cpp/test.cpp
index 0535aa8..c0b282e 100644
--- a/test/ASTMerge/exprs-cpp/test.cpp
+++ b/test/ASTMerge/exprs-cpp/test.cpp
@@ -30,6 +30,8 @@
 static_assert(ArrayRank == 2);
 static_assert(ArrayExtent == 20);
 
+static_assert(testLambdaAdd(3) == 6);
+
 void testImport(int *x, const S1 &cs1, S1 &s1) {
   testNewThrowDelete();
   testArrayElement(nullptr, 12);
@@ -44,4 +46,5 @@
   testDefaultArg();
   testDefaultArgExpr();
   useTemplateType();
+  TestLambdaTemplate<int>(1, 2).testLambda(3);
 }
diff --git a/test/ASTMerge/function-cpp/Inputs/function-1.cpp b/test/ASTMerge/function-cpp/Inputs/function-1.cpp
new file mode 100644
index 0000000..ee97a1a
--- /dev/null
+++ b/test/ASTMerge/function-cpp/Inputs/function-1.cpp
@@ -0,0 +1,8 @@
+
+template<typename T> constexpr T add(T arg1, T arg2) {
+  return arg1 + arg2;
+}
+
+template<> constexpr int add(int arg1, int arg2) {
+  return arg1 + arg2 + 2;
+}
diff --git a/test/ASTMerge/function-cpp/test.cpp b/test/ASTMerge/function-cpp/test.cpp
new file mode 100644
index 0000000..304ce3c
--- /dev/null
+++ b/test/ASTMerge/function-cpp/test.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=c++1z -emit-pch -o %t.1.ast %S/Inputs/function-1.cpp
+// RUN: %clang_cc1 -std=c++1z  -ast-merge %t.1.ast -fsyntax-only %s 2>&1 | FileCheck %s
+// XFAIL: *
+
+static_assert(add(1, 2) == 5);
+
+// FIXME: support of templated function overload is still not implemented.
+static_assert(add('\1', '\2') == 3);
+
+// CHECK-NOT: static_assert
diff --git a/test/ASTMerge/interface/Inputs/interface1.m b/test/ASTMerge/interface/Inputs/interface1.m
index 5865c0e..6192150 100644
--- a/test/ASTMerge/interface/Inputs/interface1.m
+++ b/test/ASTMerge/interface/Inputs/interface1.m
@@ -100,4 +100,6 @@
 @implementation I15 : I12
 @end
 
-
+@interface ImportSelectorSLoc { }
+-(int)addInt:(int)a toInt:(int)b moduloInt:(int)c; // don't crash here
+@end
diff --git a/test/ASTMerge/namespace/Inputs/namespace1.cpp b/test/ASTMerge/namespace/Inputs/namespace1.cpp
index 1ff84f3..4a53952 100644
--- a/test/ASTMerge/namespace/Inputs/namespace1.cpp
+++ b/test/ASTMerge/namespace/Inputs/namespace1.cpp
@@ -15,3 +15,13 @@
 namespace N3 {
   extern float z;
 }
+
+namespace AliasWithSameName = N3;
+
+namespace TestUnresolvedTypenameAndValueDecls {
+template <class T> class Base {
+public:
+  typedef T foo;
+  void bar();
+};
+}
diff --git a/test/ASTMerge/namespace/Inputs/namespace2.cpp b/test/ASTMerge/namespace/Inputs/namespace2.cpp
index 80429f7..f65057d 100644
--- a/test/ASTMerge/namespace/Inputs/namespace2.cpp
+++ b/test/ASTMerge/namespace/Inputs/namespace2.cpp
@@ -15,3 +15,46 @@
 namespace N3 {
   extern double z;
 }
+
+namespace Enclosing {
+namespace Nested {
+  const int z = 4;
+}
+}
+
+namespace ContainsInline {
+  inline namespace Inline {
+    const int z = 10;
+  }
+}
+
+namespace TestAliasName = Enclosing::Nested;
+// NOTE: There is no warning on this alias.
+namespace AliasWithSameName = Enclosing::Nested;
+
+namespace TestUsingDecls {
+
+namespace A {
+void foo();
+}
+namespace B {
+using A::foo; // <- a UsingDecl creating a UsingShadow
+}
+
+}// end namespace TestUsingDecls
+
+namespace TestUnresolvedTypenameAndValueDecls {
+
+template <class T> class Base;
+template <class T> class Derived : public Base<T> {
+public:
+  using typename Base<T>::foo;
+  using Base<T>::bar;
+  typedef typename Derived::foo NewUnresolvedUsingType;
+};
+
+} // end namespace TestUnresolvedTypenameAndValueDecls
+
+namespace TestUsingNamespace {
+  using namespace Enclosing;
+}
diff --git a/test/ASTMerge/namespace/test.cpp b/test/ASTMerge/namespace/test.cpp
index 8cc0fa2..f1796c0 100644
--- a/test/ASTMerge/namespace/test.cpp
+++ b/test/ASTMerge/namespace/test.cpp
@@ -1,6 +1,17 @@
-// RUN: %clang_cc1 -emit-pch -o %t.1.ast %S/Inputs/namespace1.cpp
-// RUN: %clang_cc1 -emit-pch -o %t.2.ast %S/Inputs/namespace2.cpp
-// RUN: not %clang_cc1 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -emit-pch -std=c++1z -o %t.1.ast %S/Inputs/namespace1.cpp
+// RUN: %clang_cc1 -emit-pch -std=c++1z -o %t.2.ast %S/Inputs/namespace2.cpp
+// RUN: not %clang_cc1 -std=c++1z -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
+
+static_assert(TestAliasName::z == 4);
+static_assert(ContainsInline::z == 10);
+
+void testImport() {
+  typedef TestUnresolvedTypenameAndValueDecls::Derived<int> Imported;
+  Imported a; // Successfull instantiation
+  static_assert(sizeof(Imported::foo) == sizeof(int));
+  static_assert(sizeof(TestUnresolvedTypenameAndValueDecls::Derived<double>::NewUnresolvedUsingType) == sizeof(double));
+}
+
 
 // CHECK: namespace2.cpp:16:17: error: external variable 'z' declared with incompatible types in different translation units ('double' vs. 'float')
 // CHECK: namespace1.cpp:16:16: note: declared here with type 'float'
diff --git a/test/ASTMerge/var-cpp/Inputs/var1.cpp b/test/ASTMerge/var-cpp/Inputs/var1.cpp
new file mode 100644
index 0000000..e29db9d
--- /dev/null
+++ b/test/ASTMerge/var-cpp/Inputs/var1.cpp
@@ -0,0 +1,17 @@
+
+template <typename T>
+constexpr T my_pi = T(3.1415926535897932385L);  // variable template
+
+template <> constexpr char my_pi<char> = '3';   // variable template specialization
+
+template <typename T>
+struct Wrapper {
+  template <typename U> static constexpr U my_const = U(1);
+   // Variable template partial specialization with member variable.
+  template <typename U> static constexpr U *my_const<const U *> = (U *)(0);
+};
+
+constexpr char a[] = "hello";
+
+template <> template <>
+constexpr const char *Wrapper<float>::my_const<const char *> = a;
diff --git a/test/ASTMerge/var-cpp/test.cpp b/test/ASTMerge/var-cpp/test.cpp
new file mode 100644
index 0000000..28d38d5
--- /dev/null
+++ b/test/ASTMerge/var-cpp/test.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -emit-pch -std=c++17 -o %t.1.ast %S/Inputs/var1.cpp
+// RUN: %clang_cc1 -std=c++17 -ast-merge %t.1.ast -fsyntax-only %s 2>&1
+
+static_assert(my_pi<double> == (double)3.1415926535897932385L);
+static_assert(my_pi<char> == '3');
+
+static_assert(Wrapper<int>::my_const<float> == 1.f);
+static_assert(Wrapper<char>::my_const<const float *> == nullptr);
+static_assert(Wrapper<float>::my_const<const char *> == a);
diff --git a/test/Analysis/Inputs/ctu-chain.cpp b/test/Analysis/Inputs/ctu-chain.cpp
new file mode 100644
index 0000000..2314dde
--- /dev/null
+++ b/test/Analysis/Inputs/ctu-chain.cpp
@@ -0,0 +1,20 @@
+int h_chain(int x) {
+  return x * 2;
+}
+
+namespace chns {
+int chf3(int x);
+
+int chf2(int x) {
+  return chf3(x);
+}
+
+class chcls {
+public:
+  int chf4(int x);
+};
+
+int chcls::chf4(int x) {
+  return x * 3;
+}
+}
diff --git a/test/Analysis/Inputs/ctu-other.cpp b/test/Analysis/Inputs/ctu-other.cpp
new file mode 100644
index 0000000..d35b7b2
--- /dev/null
+++ b/test/Analysis/Inputs/ctu-other.cpp
@@ -0,0 +1,67 @@
+int callback_to_main(int x);
+int f(int x) {
+  return x - 1;
+}
+
+int g(int x) {
+  return callback_to_main(x) + 1;
+}
+
+int h_chain(int);
+
+int h(int x) {
+  return 2 * h_chain(x);
+}
+
+namespace myns {
+int fns(int x) {
+  return x + 7;
+}
+
+namespace embed_ns {
+int fens(int x) {
+  return x - 3;
+}
+}
+
+class embed_cls {
+public:
+  int fecl(int x) {
+    return x - 7;
+  }
+};
+}
+
+class mycls {
+public:
+  int fcl(int x) {
+    return x + 5;
+  }
+  static int fscl(int x) {
+    return x + 6;
+  }
+
+  class embed_cls2 {
+  public:
+    int fecl2(int x) {
+      return x - 11;
+    }
+  };
+};
+
+namespace chns {
+int chf2(int x);
+
+class chcls {
+public:
+  int chf4(int x);
+};
+
+int chf3(int x) {
+  return chcls().chf4(x);
+}
+
+int chf1(int x) {
+  return chf2(x);
+}
+}
diff --git a/test/Analysis/Inputs/externalFnMap.txt b/test/Analysis/Inputs/externalFnMap.txt
new file mode 100644
index 0000000..2c93d3e
--- /dev/null
+++ b/test/Analysis/Inputs/externalFnMap.txt
@@ -0,0 +1,13 @@
+c:@N@chns@F@chf1#I# ctu-other.cpp.ast
+c:@N@myns@N@embed_ns@F@fens#I# ctu-other.cpp.ast
+c:@F@g#I# ctu-other.cpp.ast
+c:@S@mycls@F@fscl#I#S ctu-other.cpp.ast
+c:@S@mycls@F@fcl#I# ctu-other.cpp.ast
+c:@N@myns@S@embed_cls@F@fecl#I# ctu-other.cpp.ast
+c:@S@mycls@S@embed_cls2@F@fecl2#I# ctu-other.cpp.ast
+c:@F@f#I# ctu-other.cpp.ast
+c:@N@myns@F@fns#I# ctu-other.cpp.ast
+c:@F@h#I# ctu-other.cpp.ast
+c:@F@h_chain#I# ctu-chain.cpp.ast
+c:@N@chns@S@chcls@F@chf4#I# ctu-chain.cpp.ast
+c:@N@chns@F@chf2#I# ctu-chain.cpp.ast
diff --git a/test/Analysis/Inputs/system-header-simulator-cxx.h b/test/Analysis/Inputs/system-header-simulator-cxx.h
index 809b768..7702e51 100644
--- a/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -584,10 +584,21 @@
 
 }
 
+#ifdef TEST_INLINABLE_ALLOCATORS
+namespace std {
+  void *malloc(size_t);
+  void free(void *);
+}
+void* operator new(std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void* operator new[](std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void operator delete(void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+void operator delete[](void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+#else
 void* operator new(std::size_t, const std::nothrow_t&) throw();
 void* operator new[](std::size_t, const std::nothrow_t&) throw();
 void operator delete(void*, const std::nothrow_t&) throw();
 void operator delete[](void*, const std::nothrow_t&) throw();
+#endif
 
 void* operator new (std::size_t size, void* ptr) throw() { return ptr; };
 void* operator new[] (std::size_t size, void* ptr) throw() { return ptr; };
diff --git a/test/Analysis/Inputs/system-header-simulator.h b/test/Analysis/Inputs/system-header-simulator.h
index 2e6f1e7..8542387 100644
--- a/test/Analysis/Inputs/system-header-simulator.h
+++ b/test/Analysis/Inputs/system-header-simulator.h
@@ -32,6 +32,7 @@
 size_t strlen(const char *);
 
 char *strcpy(char *restrict, const char *restrict);
+char *strncpy(char *dst, const char *src, size_t n);
 void *memcpy(void *dst, const void *src, size_t n);
 
 typedef unsigned long __darwin_pthread_key_t;
@@ -109,4 +110,6 @@
 #ifndef NULL
 #define __DARWIN_NULL 0
 #define NULL __DARWIN_NULL
-#endif
\ No newline at end of file
+#endif
+
+#define offsetof(t, d) __builtin_offsetof(t, d)
\ No newline at end of file
diff --git a/test/Analysis/MismatchedDeallocator-checker-test.mm b/test/Analysis/MismatchedDeallocator-checker-test.mm
index b80f7df..013d677 100644
--- a/test/Analysis/MismatchedDeallocator-checker-test.mm
+++ b/test/Analysis/MismatchedDeallocator-checker-test.mm
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.MismatchedDeallocator -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.MismatchedDeallocator -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s
 
 #include "Inputs/system-header-simulator-objc.h"
 #include "Inputs/system-header-simulator-cxx.h"
diff --git a/test/Analysis/MisusedMovedObject.cpp b/test/Analysis/MisusedMovedObject.cpp
index 57c1ecc..07d52c8 100644
--- a/test/Analysis/MisusedMovedObject.cpp
+++ b/test/Analysis/MisusedMovedObject.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.cplusplus.MisusedMovedObject -std=c++11 -verify -analyzer-output=text %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.cplusplus.MisusedMovedObject -std=c++11 -verify -analyzer-output=text -analyzer-config exploration_strategy=unexplored_first_queue %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.cplusplus.MisusedMovedObject -std=c++11 -analyzer-config exploration_strategy=dfs -verify -analyzer-output=text -DDFS=1 %s
 
 namespace std {
 
@@ -38,6 +39,7 @@
   B() = default;
   B(const B &) = default;
   B(B &&) = default;
+  B& operator=(const B &q) = default;
   void operator=(B &&b) {
     return;
   }
@@ -70,6 +72,12 @@
   A(A &&other, char *k) {
     moveconstruct(std::move(other));
   }
+  void operator=(const A &other) {
+    i = other.i;
+    d = other.d;
+    b = other.b;
+    return;
+  }
   void operator=(A &&other) {
     moveconstruct(std::move(other));
     return;
@@ -105,17 +113,42 @@
 }
 
 void simpleMoveCtorTest() {
-  A a;
-  A b;
-  b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
-  a.foo();          // expected-warning {{Method call on a 'moved-from' object 'a'}} expected-note {{Method call on a 'moved-from' object 'a'}}
+  {
+    A a;
+    A b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
+    a.foo();            // expected-warning {{Method call on a 'moved-from' object 'a'}} expected-note {{Method call on a 'moved-from' object 'a'}}
+  }
+  {
+    A a;
+    A b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
+    b = a;              // expected-warning {{Copying a 'moved-from' object 'a'}} expected-note {{Copying a 'moved-from' object 'a'}}
+  }
+  {
+    A a;
+    A b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
+    b = std::move(a);   // expected-warning {{Moving a 'moved-from' object 'a'}} expected-note {{Moving a 'moved-from' object 'a'}}
+  }
 }
 
 void simpleMoveAssignementTest() {
-  A a;
-  A b;
-  b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
-  a.foo();          // expected-warning {{Method call on a 'moved-from' object 'a'}} expected-note {{Method call on a 'moved-from' object 'a'}}
+  {
+    A a;
+    A b;
+    b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
+    a.foo();          // expected-warning {{Method call on a 'moved-from' object 'a'}} expected-note {{Method call on a 'moved-from' object 'a'}}
+  }
+  {
+    A a;
+    A b;
+    b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
+    A c(a);           // expected-warning {{Copying a 'moved-from' object 'a'}} expected-note {{Copying a 'moved-from' object 'a'}}
+  }
+  {
+    A a;
+    A b;
+    b = std::move(a);  // expected-note {{'a' became 'moved-from' here}}
+    A c(std::move(a)); // expected-warning {{Moving a 'moved-from' object 'a'}} expected-note {{Moving a 'moved-from' object 'a'}}
+  }
 }
 
 void moveInInitListTest() {
@@ -270,7 +303,7 @@
   {
     A a;
     for (int i = 0; i < bignum(); i++) { // expected-note {{Loop condition is true.  Entering loop body}} expected-note {{Loop condition is true.  Entering loop body}}
-      constCopyOrMoveCall(std::move(a)); // expected-warning {{Copying a 'moved-from' object 'a'}} expected-note {{Copying a 'moved-from' object 'a'}}
+      constCopyOrMoveCall(std::move(a)); // expected-warning {{Moving a 'moved-from' object 'a'}} expected-note {{Moving a 'moved-from' object 'a'}}
       // expected-note@-1 {{'a' became 'moved-from' here}}
     }
   }
@@ -332,6 +365,8 @@
     A b = std::move(a);
     a.reset(); // no-warning
     a.foo();   // no-warning
+    // Test if resets the state of subregions as well.
+    a.b.foo(); // no-warning
   }
   {
     A a;
@@ -344,6 +379,7 @@
     A b = std::move(a);
     a.clear(); // no-warning
     a.foo();   // no-warning
+    a.b.foo(); // no-warning
   }
 }
 
@@ -439,12 +475,16 @@
   // A variation on the theme above.
   {
     A a;
+#ifdef DFS
     a.foo() > 0 ? a.foo() : A(std::move(a)).foo(); // expected-note {{Assuming the condition is false}} expected-note {{'?' condition is false}}
+#else
+    a.foo() > 0 ? a.foo() : A(std::move(a)).foo(); // expected-note {{Assuming the condition is true}} expected-note {{'?' condition is true}}
+#endif
   }
   // Same thing, but with a switch statement.
   {
     A a, b;
-    switch (i) { // expected-note {{Control jumps to 'case 1:'  at line 448}}
+    switch (i) { // expected-note {{Control jumps to 'case 1:'}}
     case 1:
       b = std::move(a); // no-warning
       break;            // expected-note {{Execution jumps to the end of the function}}
@@ -456,7 +496,7 @@
   // However, if there's a fallthrough, we do warn.
   {
     A a, b;
-    switch (i) { // expected-note {{Control jumps to 'case 1:'  at line 460}}
+    switch (i) { // expected-note {{Control jumps to 'case 1:'}}
     case 1:
       b = std::move(a); // expected-note {{'a' became 'moved-from' here}}
     case 2:
@@ -598,6 +638,7 @@
   }
 }
 
+class C : public A {};
 void subRegionMoveTest() {
   {
     A a;
@@ -616,12 +657,24 @@
     a.foo();             // expected-warning {{Method call on a 'moved-from' object 'a'}} expected-note {{Method call on a 'moved-from' object 'a'}}
     a.b.foo();           // no-warning
   }
+  {
+    C c;
+    C c1 = std::move(c); // expected-note {{'c' became 'moved-from' here}}
+    c.foo();             // expected-warning {{Method call on a 'moved-from' object 'c'}} expected-note {{Method call on a 'moved-from' object 'c'}}
+    c.b.foo();           // no-warning
+  }
 }
 
-class C: public A {};
 void resetSuperClass() {
   C c;
   C c1 = std::move(c);
   c.clear();
   C c2 = c; // no-warning
 }
+
+void reportSuperClass() {
+  C c;
+  C c1 = std::move(c); // expected-note {{'c' became 'moved-from' here}}
+  c.foo();             // expected-warning {{Method call on a 'moved-from' object 'c'}} expected-note {{Method call on a 'moved-from' object 'c'}}
+  C c2 = c;            // no-warning
+}
diff --git a/test/Analysis/NewDelete-atomics.cpp b/test/Analysis/NewDelete-atomics.cpp
new file mode 100644
index 0000000..275cf14
--- /dev/null
+++ b/test/Analysis/NewDelete-atomics.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
+
+// expected-no-diagnostics
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_consume = __ATOMIC_CONSUME,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+class Obj {
+  int RefCnt;
+
+public:
+  int incRef() {
+    return __c11_atomic_fetch_add((volatile _Atomic(int) *)&RefCnt, 1,
+                                  memory_order_relaxed);
+  }
+
+  int decRef() {
+    return __c11_atomic_fetch_sub((volatile _Atomic(int) *)&RefCnt, 1,
+                                  memory_order_relaxed);
+  }
+
+  void foo();
+};
+
+class IntrusivePtr {
+  Obj *Ptr;
+
+public:
+  IntrusivePtr(Obj *Ptr) : Ptr(Ptr) {
+    Ptr->incRef();
+  }
+
+  IntrusivePtr(const IntrusivePtr &Other) : Ptr(Other.Ptr) {
+    Ptr->incRef();
+  }
+
+  ~IntrusivePtr() {
+  // We should not take the path on which the object is deleted.
+    if (Ptr->decRef() == 1)
+      delete Ptr;
+  }
+
+  Obj *getPtr() const { return Ptr; }
+};
+
+void testDestroyLocalRefPtr() {
+  IntrusivePtr p1(new Obj());
+  {
+    IntrusivePtr p2(p1);
+  }
+
+  // p1 still maintains ownership. The object is not deleted.
+  p1.getPtr()->foo(); // no-warning
+}
+
+void testDestroySymbolicRefPtr(const IntrusivePtr &p1) {
+  {
+    IntrusivePtr p2(p1);
+  }
+
+  // p1 still maintains ownership. The object is not deleted.
+  p1.getPtr()->foo(); // no-warning
+}
diff --git a/test/Analysis/NewDelete-checker-test.cpp b/test/Analysis/NewDelete-checker-test.cpp
index 66e8375..620237c 100644
--- a/test/Analysis/NewDelete-checker-test.cpp
+++ b/test/Analysis/NewDelete-checker-test.cpp
@@ -1,5 +1,12 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -verify %s
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -analyzer-config c++-allocator-inlining=true -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -fblocks -analyzer-config c++-allocator-inlining=true -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -analyzer-config c++-allocator-inlining=true -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -DLEAKS -std=c++11 -fblocks -analyzer-config c++-allocator-inlining=true -DTEST_INLINABLE_ALLOCATORS -verify %s
+
 #include "Inputs/system-header-simulator-cxx.h"
 
 typedef __typeof__(sizeof(int)) size_t;
@@ -45,14 +52,18 @@
   void *p = operator new(0, std::nothrow);
 }
 #ifdef LEAKS
-// expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
+#ifndef TEST_INLINABLE_ALLOCATORS
+// expected-warning@-3{{Potential leak of memory pointed to by 'p'}}
+#endif
 #endif
 
 void testGlobalNoThrowPlacementExprNewBeforeOverload() {
   int *p = new(std::nothrow) int;
 }
 #ifdef LEAKS
-// expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
+#ifndef TEST_INLINABLE_ALLOCATORS
+// expected-warning@-3{{Potential leak of memory pointed to by 'p'}}
+#endif
 #endif
 
 //----- Standard pointer placement operators
@@ -186,7 +197,10 @@
 
 void testAllocDeallocNames() {
   int *p = new(std::nothrow) int[1];
-  delete[] (++p); // expected-warning{{Argument to 'delete[]' is offset by 4 bytes from the start of memory allocated by 'new[]'}}
+  delete[] (++p);
+#ifndef TEST_INLINABLE_ALLOCATORS
+  // expected-warning@-2{{Argument to 'delete[]' is offset by 4 bytes from the start of memory allocated by 'new[]'}}
+#endif
 }
 
 //--------------------------------
diff --git a/test/Analysis/NewDelete-custom.cpp b/test/Analysis/NewDelete-custom.cpp
index f06ff4a..f5a2952 100644
--- a/test/Analysis/NewDelete-custom.cpp
+++ b/test/Analysis/NewDelete-custom.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,unix.Malloc -std=c++11 -fblocks -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks,unix.Malloc -std=c++11 -DLEAKS -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,unix.Malloc -std=c++11 -analyzer-config c++-allocator-inlining=false -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks,unix.Malloc -std=c++11 -analyzer-config c++-allocator-inlining=false -DLEAKS=1 -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,unix.Malloc -std=c++11 -DALLOCATOR_INLINING=1 -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks,unix.Malloc -std=c++11 -DLEAKS=1 -DALLOCATOR_INLINING=1 -fblocks -verify %s
 #include "Inputs/system-header-simulator-cxx.h"
 
-#ifndef LEAKS
+#if !(LEAKS && !ALLOCATOR_INLINING)
 // expected-no-diagnostics
 #endif
 
@@ -11,7 +13,7 @@
 
 void *operator new[](std::size_t size) throw() { return allocator(size); }
 void *operator new(std::size_t size) throw() { return allocator(size); }
-void *operator new(std::size_t size, std::nothrow_t& nothrow) throw() { return allocator(size); }
+void *operator new(std::size_t size, const std::nothrow_t &nothrow) throw() { return allocator(size); }
 void *operator new(std::size_t, double d);
 
 class C {
@@ -26,7 +28,7 @@
 
   C *c3 = ::new C;
 }
-#ifdef LEAKS
+#if LEAKS && !ALLOCATOR_INLINING
 // expected-warning@-2{{Potential leak of memory pointed to by 'c3'}}
 #endif
 
@@ -37,7 +39,7 @@
 void testNewExprArray() {
   int *p = new int[0];
 }
-#ifdef LEAKS
+#if LEAKS && !ALLOCATOR_INLINING
 // expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
 #endif
 
@@ -50,30 +52,27 @@
 void testNewExpr() {
   int *p = new int;
 }
-#ifdef LEAKS
+#if LEAKS && !ALLOCATOR_INLINING
 // expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
 #endif
 
 
 //----- Custom NoThrow placement operators
 void testOpNewNoThrow() {
-  void *p = operator new(0, std::nothrow);
+  void *p = operator new(0, std::nothrow); // call is inlined, no warn
 }
-#ifdef LEAKS
-// expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
-#endif
 
 void testNewExprNoThrow() {
   int *p = new(std::nothrow) int;
 }
-#ifdef LEAKS
+#if LEAKS && !ALLOCATOR_INLINING
 // expected-warning@-2{{Potential leak of memory pointed to by 'p'}}
 #endif
 
 //----- Custom placement operators
 void testOpNewPlacement() {
   void *p = operator new(0, 0.1); // no warn
-} 
+}
 
 void testNewExprPlacement() {
   int *p = new(0.1) int; // no warn
diff --git a/test/Analysis/NewDelete-intersections.mm b/test/Analysis/NewDelete-intersections.mm
index aa52c79..b370785 100644
--- a/test/Analysis/NewDelete-intersections.mm
+++ b/test/Analysis/NewDelete-intersections.mm
@@ -1,5 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -verify %s
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks -std=c++11 -DLEAKS -fblocks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -std=c++11 -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks -std=c++11 -DLEAKS -fblocks -DTEST_INLINABLE_ALLOCATORS -verify %s
 #include "Inputs/system-header-simulator-cxx.h"
 #include "Inputs/system-header-simulator-objc.h"
 
diff --git a/test/Analysis/NewDelete-path-notes.cpp b/test/Analysis/NewDelete-path-notes.cpp
index ac760ca..d57f67e 100644
--- a/test/Analysis/NewDelete-path-notes.cpp
+++ b/test/Analysis/NewDelete-path-notes.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=text -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=text -analyzer-config c++-allocator-inlining=true -verify %s
 // RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=plist -analyzer-config path-diagnostics-alternate=false %s -o %t.plist
 // RUN: FileCheck --input-file=%t.plist %s
 
@@ -40,12 +41,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -53,12 +54,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>14</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -70,7 +71,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>6</integer>
+// CHECK-NEXT:       <key>line</key><integer>7</integer>
 // CHECK-NEXT:       <key>col</key><integer>12</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -78,12 +79,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>6</integer>
+// CHECK-NEXT:          <key>line</key><integer>7</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>6</integer>
+// CHECK-NEXT:          <key>line</key><integer>7</integer>
 // CHECK-NEXT:          <key>col</key><integer>18</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -103,12 +104,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
+// CHECK-NEXT:            <key>line</key><integer>7</integer>
 // CHECK-NEXT:            <key>col</key><integer>14</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -116,12 +117,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>8</integer>
+// CHECK-NEXT:            <key>line</key><integer>9</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>8</integer>
+// CHECK-NEXT:            <key>line</key><integer>9</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -137,12 +138,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>8</integer>
+// CHECK-NEXT:            <key>line</key><integer>9</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>8</integer>
+// CHECK-NEXT:            <key>line</key><integer>9</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -150,12 +151,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
+// CHECK-NEXT:            <key>line</key><integer>11</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
+// CHECK-NEXT:            <key>line</key><integer>11</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -167,7 +168,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>10</integer>
+// CHECK-NEXT:       <key>line</key><integer>11</integer>
 // CHECK-NEXT:       <key>col</key><integer>5</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -175,12 +176,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>10</integer>
+// CHECK-NEXT:          <key>line</key><integer>11</integer>
 // CHECK-NEXT:          <key>col</key><integer>5</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>10</integer>
+// CHECK-NEXT:          <key>line</key><integer>11</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -200,12 +201,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
+// CHECK-NEXT:            <key>line</key><integer>11</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
+// CHECK-NEXT:            <key>line</key><integer>11</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -213,12 +214,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>13</integer>
+// CHECK-NEXT:            <key>line</key><integer>14</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>13</integer>
+// CHECK-NEXT:            <key>line</key><integer>14</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -230,7 +231,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>13</integer>
+// CHECK-NEXT:       <key>line</key><integer>14</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -238,12 +239,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>13</integer>
+// CHECK-NEXT:          <key>line</key><integer>14</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>13</integer>
+// CHECK-NEXT:          <key>line</key><integer>14</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -267,7 +268,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>13</integer>
+// CHECK-NEXT:    <key>line</key><integer>14</integer>
 // CHECK-NEXT:    <key>col</key><integer>3</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -279,7 +280,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>24</integer>
+// CHECK-NEXT:       <key>line</key><integer>25</integer>
 // CHECK-NEXT:       <key>col</key><integer>2</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -287,12 +288,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>line</key><integer>25</integer>
 // CHECK-NEXT:          <key>col</key><integer>2</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>line</key><integer>25</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -308,7 +309,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>18</integer>
+// CHECK-NEXT:       <key>line</key><integer>19</integer>
 // CHECK-NEXT:       <key>col</key><integer>2</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -326,12 +327,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>18</integer>
+// CHECK-NEXT:            <key>line</key><integer>19</integer>
 // CHECK-NEXT:            <key>col</key><integer>2</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>18</integer>
+// CHECK-NEXT:            <key>line</key><integer>19</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -339,12 +340,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>19</integer>
+// CHECK-NEXT:            <key>line</key><integer>20</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>19</integer>
+// CHECK-NEXT:            <key>line</key><integer>20</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -356,7 +357,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>19</integer>
+// CHECK-NEXT:       <key>line</key><integer>20</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -364,12 +365,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>19</integer>
+// CHECK-NEXT:          <key>line</key><integer>20</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>19</integer>
+// CHECK-NEXT:          <key>line</key><integer>20</integer>
 // CHECK-NEXT:          <key>col</key><integer>13</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -385,7 +386,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>24</integer>
+// CHECK-NEXT:       <key>line</key><integer>25</integer>
 // CHECK-NEXT:       <key>col</key><integer>2</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -393,12 +394,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>line</key><integer>25</integer>
 // CHECK-NEXT:          <key>col</key><integer>2</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>line</key><integer>25</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -418,12 +419,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
 // CHECK-NEXT:            <key>col</key><integer>2</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -431,12 +432,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>26</integer>
+// CHECK-NEXT:            <key>line</key><integer>27</integer>
 // CHECK-NEXT:            <key>col</key><integer>2</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>26</integer>
+// CHECK-NEXT:            <key>line</key><integer>27</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -448,7 +449,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>26</integer>
+// CHECK-NEXT:       <key>line</key><integer>27</integer>
 // CHECK-NEXT:       <key>col</key><integer>2</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -456,12 +457,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>26</integer>
+// CHECK-NEXT:          <key>line</key><integer>27</integer>
 // CHECK-NEXT:          <key>col</key><integer>2</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>26</integer>
+// CHECK-NEXT:          <key>line</key><integer>27</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -485,7 +486,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>26</integer>
+// CHECK-NEXT:    <key>line</key><integer>27</integer>
 // CHECK-NEXT:    <key>col</key><integer>2</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
diff --git a/test/Analysis/NewDeleteLeaks-PR19102.cpp b/test/Analysis/NewDeleteLeaks-PR19102.cpp
index 502db61..625b2d4 100644
--- a/test/Analysis/NewDeleteLeaks-PR19102.cpp
+++ b/test/Analysis/NewDeleteLeaks-PR19102.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDeleteLeaks -analyzer-config c++-allocator-inlining=true -verify %s
 
 class A0 {};
 
diff --git a/test/Analysis/_Bool-increment-decrement.c b/test/Analysis/_Bool-increment-decrement.c
new file mode 100644
index 0000000..477b6ed
--- /dev/null
+++ b/test/Analysis/_Bool-increment-decrement.c
@@ -0,0 +1,140 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify -std=c99 -Dbool=_Bool -Dtrue=1 -Dfalse=0 %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify -std=c11 -Dbool=_Bool -Dtrue=1 -Dfalse=0 %s
+extern void clang_analyzer_eval(bool);
+
+void test__Bool_value() {
+  {
+    bool b = true;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -10;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 0;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+}
+
+void test__Bool_increment() {
+  {
+    bool b = true;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = true;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 0;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    ++b;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -10;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -1;
+    ++b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+}
+
+void test__Bool_decrement() {
+  {
+    bool b = true;
+    b--;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    b--;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = true;
+    --b;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    --b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 0;
+    --b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    --b;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+    --b;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -10;
+    --b;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 1;
+    --b;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+}
diff --git a/test/Analysis/analyzer-config.c b/test/Analysis/analyzer-config.c
index 4daea89..4acbad4 100644
--- a/test/Analysis/analyzer-config.c
+++ b/test/Analysis/analyzer-config.c
@@ -1,5 +1,5 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 %s -o /dev/null -analyzer-checker=core,osx.cocoa,debug.ConfigDumper -analyzer-max-loop 34 > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: FileCheck --input-file=%t %s --match-full-lines
 
 void bar() {}
 void foo() {
@@ -15,7 +15,9 @@
 // CHECK-NEXT: cfg-implicit-dtors = true
 // CHECK-NEXT: cfg-lifetime = false
 // CHECK-NEXT: cfg-loopexit = false
-// CHECK-NEXT: cfg-temporary-dtors = false
+// CHECK-NEXT: cfg-rich-constructors = true
+// CHECK-NEXT: cfg-temporary-dtors = true
+// CHECK-NEXT: exploration_strategy = unexplored_first_queue
 // CHECK-NEXT: faux-bodies = true
 // CHECK-NEXT: graph-trim-interval = 1000
 // CHECK-NEXT: inline-lambdas = true
@@ -28,7 +30,8 @@
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: serialize-stats = false
 // CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 19
+// CHECK-NEXT: num-entries = 22
diff --git a/test/Analysis/analyzer-config.cpp b/test/Analysis/analyzer-config.cpp
index a08e85e..0ab1077 100644
--- a/test/Analysis/analyzer-config.cpp
+++ b/test/Analysis/analyzer-config.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 %s -o /dev/null -analyzer-checker=core,osx.cocoa,debug.ConfigDumper -analyzer-max-loop 34 > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: FileCheck --input-file=%t %s --match-full-lines
 
 void bar() {}
 void foo() {
@@ -12,7 +12,9 @@
 
 class Foo {
 public:
-	void bar() {}
+  ~Foo() {}
+  void baz() { Foo(); }
+	void bar() { const Foo &f = Foo(); }
 	void foo() { bar(); }
 };
 
@@ -21,12 +23,16 @@
 // CHECK-NEXT: c++-inlining = destructors
 // CHECK-NEXT: c++-shared_ptr-inlining = false
 // CHECK-NEXT: c++-stdlib-inlining = true
+// CHECK-NEXT: c++-temp-dtor-inlining = false
 // CHECK-NEXT: c++-template-inlining = true
 // CHECK-NEXT: cfg-conditional-static-initializers = true
 // CHECK-NEXT: cfg-implicit-dtors = true
 // CHECK-NEXT: cfg-lifetime = false
 // CHECK-NEXT: cfg-loopexit = false
-// CHECK-NEXT: cfg-temporary-dtors = false
+// CHECK-NEXT: cfg-rich-constructors = true
+// CHECK-NEXT: cfg-temporary-dtors = true
+// CHECK-NEXT: experimental-enable-naive-ctu-analysis = false
+// CHECK-NEXT: exploration_strategy = unexplored_first_queue
 // CHECK-NEXT: faux-bodies = true
 // CHECK-NEXT: graph-trim-interval = 1000
 // CHECK-NEXT: inline-lambdas = true
@@ -39,7 +45,8 @@
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: serialize-stats = false
 // CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 24
+// CHECK-NEXT: num-entries = 29
diff --git a/test/Analysis/analyzer-stats.c b/test/Analysis/analyzer-stats.c
index 5a40d19..b58e862 100644
--- a/test/Analysis/analyzer-stats.c
+++ b/test/Analysis/analyzer-stats.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,deadcode.DeadStores,debug.Stats -verify -Wno-unreachable-code -analyzer-opt-analyze-nested-blocks %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,deadcode.DeadStores,debug.Stats -verify -Wno-unreachable-code -analyzer-opt-analyze-nested-blocks -analyzer-max-loop 4 %s
 
 int foo();
 
@@ -12,3 +12,19 @@
   a /= 4;
   return a;
 }
+
+
+int sink() // expected-warning-re{{sink -> Total CFGBlocks: {{[0-9]+}} | Unreachable CFGBlocks: 1 | Exhausted Block: yes | Empty WorkList: yes}}
+{
+  for (int i = 0; i < 10; ++i) // expected-warning {{(sink): The analyzer generated a sink at this point}}
+    ++i;
+
+  return 0;
+}
+
+int emptyConditionLoop() // expected-warning-re{{emptyConditionLoop -> Total CFGBlocks: {{[0-9]+}} | Unreachable CFGBlocks: 0 | Exhausted Block: yes | Empty WorkList: yes}}
+{
+  int num = 1;
+  for (;;)
+    num++;
+}
diff --git a/test/Analysis/arc-zero-init.m b/test/Analysis/arc-zero-init.m
new file mode 100644
index 0000000..de1e978
--- /dev/null
+++ b/test/Analysis/arc-zero-init.m
@@ -0,0 +1,46 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify -fobjc-arc %s
+
+#if __has_feature(objc_arc)
+// expected-no-diagnostics
+#endif
+
+@interface SomeClass
+@end
+
+void simpleStrongPointerValue() {
+  SomeClass *x;
+  if (x) {}
+#if !__has_feature(objc_arc)
+// expected-warning@-2{{Branch condition evaluates to a garbage value}}
+#endif
+}
+
+void simpleArray() {
+  SomeClass *vlaArray[5];
+
+  if (vlaArray[0]) {}
+#if !__has_feature(objc_arc)
+// expected-warning@-2{{Branch condition evaluates to a garbage value}}
+#endif
+}
+
+void variableLengthArray() {
+   int count = 1;
+   SomeClass * vlaArray[count];
+
+   if (vlaArray[0]) {}
+#if !__has_feature(objc_arc)
+  // expected-warning@-2{{Branch condition evaluates to a garbage value}}
+#endif
+}
+
+void variableLengthArrayWithExplicitStrongAttribute() {
+   int count = 1;
+   __attribute__((objc_ownership(strong))) SomeClass * vlaArray[count];
+
+   if (vlaArray[0]) {}
+#if !__has_feature(objc_arc)
+  // expected-warning@-2{{Branch condition evaluates to a garbage value}}
+#endif
+}
diff --git a/test/Analysis/auto-obj-dtors-cfg-output.cpp b/test/Analysis/auto-obj-dtors-cfg-output.cpp
index cc47c92..b5b0e5c 100644
--- a/test/Analysis/auto-obj-dtors-cfg-output.cpp
+++ b/test/Analysis/auto-obj-dtors-cfg-output.cpp
@@ -1,5 +1,16 @@
-// RUN: %clang_analyze_cc1 -fcxx-exceptions -fexceptions -analyzer-checker=debug.DumpCFG %s > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: %clang_analyze_cc1 -fcxx-exceptions -fexceptions -analyzer-checker=debug.DumpCFG -analyzer-config cfg-rich-constructors=false %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,WARNINGS %s
+// RUN: %clang_analyze_cc1 -fcxx-exceptions -fexceptions -analyzer-checker=debug.DumpCFG -analyzer-config cfg-rich-constructors=true %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,ANALYZER %s
+
+// This file tests how we construct two different flavors of the Clang CFG -
+// the CFG used by the Sema analysis-based warnings and the CFG used by the
+// static analyzer. The difference in the behavior is checked via FileCheck
+// prefixes (WARNINGS and ANALYZER respectively). When introducing new analyzer
+// flags, no new run lines should be added - just these flags would go to the
+// respective line depending on where is it turned on and where is it turned
+// off. Feel free to add tests that test only one of the CFG flavors if you're
+// not sure how the other flavor is supposed to work in your case.
 
 class A {
 public:
@@ -32,12 +43,14 @@
 // CHECK:      [B2 (ENTRY)]
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B1]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: a
 // CHECK-NEXT:   4: [B1.3] (ImplicitCastExpr, NoOp, const class A)
 // CHECK-NEXT:   5: const A &b = a;
-// CHECK-NEXT:   6: A() (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   6: A() (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   6: A() (CXXConstructExpr, [B1.7], [B1.9], class A)
 // CHECK-NEXT:   7: [B1.6] (BindTemporary)
 // CHECK-NEXT:   8: [B1.7] (ImplicitCastExpr, NoOp, const class A)
 // CHECK-NEXT:   9: [B1.8]
@@ -57,9 +70,11 @@
 // CHECK:      [B2 (ENTRY)]
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B1]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A [2])
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A [2])
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A [2])
 // CHECK-NEXT:   2: A a[2];
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A [0])
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A [0])
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B1.4], class A [0])
 // CHECK-NEXT:   4: A b[0];
 // CHECK-NEXT:   5: [B1.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (1): B2
@@ -74,15 +89,19 @@
 // CHECK:      [B2 (ENTRY)]
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B1]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A)
 // CHECK-NEXT:   2: A a;
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B1.4], class A)
 // CHECK-NEXT:   4: A c;
-// CHECK-NEXT:   5:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   5:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   5:  (CXXConstructExpr, [B1.6], class A)
 // CHECK-NEXT:   6: A d;
 // CHECK-NEXT:   7: [B1.6].~A() (Implicit destructor)
 // CHECK-NEXT:   8: [B1.4].~A() (Implicit destructor)
-// CHECK-NEXT:   9:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   9:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   9:  (CXXConstructExpr, [B1.10], class A)
 // CHECK:       10: A b;
 // CHECK:       11: [B1.10].~A() (Implicit destructor)
 // CHECK:       12: [B1.2].~A() (Implicit destructor)
@@ -101,7 +120,8 @@
 // CHECK:      [B4 (ENTRY)]
 // CHECK-NEXT:   Succs (1): B3
 // CHECK:      [B1]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B1.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B3.4].~A() (Implicit destructor)
@@ -115,9 +135,11 @@
 // CHECK-NEXT:   Preds (1): B3
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A a;
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B3.4], class A)
 // CHECK-NEXT:   4: A b;
 // CHECK-NEXT:   5: UV
 // CHECK-NEXT:   6: [B3.5] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -137,7 +159,8 @@
 // CHECK-NEXT:   Succs (1): B7
 // CHECK:      [B1]
 // CHECK:       l1:
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B1.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B6.2].~A() (Implicit destructor)
@@ -145,7 +168,8 @@
 // CHECK-NEXT:   Preds (2): B2 B3
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A b;
 // CHECK-NEXT:   3: [B2.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B6.4].~A() (Implicit destructor)
@@ -170,9 +194,11 @@
 // CHECK-NEXT:   Succs (1): B6
 // CHECK:      [B6]
 // CHECK:       l0:
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B6.2], class A)
 // CHECK-NEXT:   2: A b;
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B6.4], class A)
 // CHECK-NEXT:   4: A a;
 // CHECK-NEXT:   5: UV
 // CHECK-NEXT:   6: [B6.5] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -180,7 +206,8 @@
 // CHECK-NEXT:   Preds (2): B7 B5
 // CHECK-NEXT:   Succs (2): B5 B4
 // CHECK:      [B7]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B7.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   Preds (1): B8
 // CHECK-NEXT:   Succs (1): B6
@@ -207,23 +234,27 @@
 // CHECK-NEXT:   Preds (2): B2 B3
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B2.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (1): B4
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (1): B4
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B4]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B4.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: a
 // CHECK-NEXT:   4: [B4.3] (ImplicitCastExpr, NoOp, const class A)
-// CHECK-NEXT:   5: [B4.4] (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   5: [B4.4] (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   5: [B4.4] (CXXConstructExpr, [B4.6], class A)
 // CHECK-NEXT:   6: A b = a;
 // CHECK-NEXT:   7: b
 // CHECK-NEXT:   8: [B4.7] (ImplicitCastExpr, NoOp, const class A)
@@ -247,14 +278,16 @@
 // CHECK-NEXT:   Succs (1): B8
 // CHECK:      [B1]
 // CHECK-NEXT:   1: [B8.6].~A() (Implicit destructor)
-// CHECK-NEXT:   2:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   2:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   2:  (CXXConstructExpr, [B1.3], class A)
 // CHECK-NEXT:   3: A e;
 // CHECK-NEXT:   4: [B1.3].~A() (Implicit destructor)
 // CHECK-NEXT:   5: [B8.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (2): B2 B5
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A d;
 // CHECK-NEXT:   3: [B2.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B4.2].~A() (Implicit destructor)
@@ -268,7 +301,8 @@
 // CHECK-NEXT:   Preds (1): B4
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B4]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B4.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B4.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -276,7 +310,8 @@
 // CHECK-NEXT:   Preds (1): B8
 // CHECK-NEXT:   Succs (2): B3 B2
 // CHECK:      [B5]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B5.2], class A)
 // CHECK-NEXT:   2: A d;
 // CHECK-NEXT:   3: [B5.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B7.2].~A() (Implicit destructor)
@@ -290,7 +325,8 @@
 // CHECK-NEXT:   Preds (1): B7
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B7]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B7.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B7.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -298,11 +334,13 @@
 // CHECK-NEXT:   Preds (1): B8
 // CHECK-NEXT:   Succs (2): B6 B5
 // CHECK:      [B8]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B8.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: a
 // CHECK-NEXT:   4: [B8.3] (ImplicitCastExpr, NoOp, const class A)
-// CHECK-NEXT:   5: [B8.4] (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   5: [B8.4] (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   5: [B8.4] (CXXConstructExpr, [B8.6], class A)
 // CHECK-NEXT:   6: A b = a;
 // CHECK-NEXT:   7: b
 // CHECK-NEXT:   8: [B8.7] (ImplicitCastExpr, NoOp, const class A)
@@ -340,7 +378,8 @@
 // CHECK-NEXT:   Preds (1): B3
 // CHECK-NEXT:   Succs (1): B4
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B4.4].~A() (Implicit destructor)
@@ -361,7 +400,8 @@
 // CHECK-NEXT:   Preds (2): B2 B5
 // CHECK-NEXT:   Succs (2): B3 B1
 // CHECK:      [B5]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B5.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   Preds (1): B6
 // CHECK-NEXT:   Succs (1): B4
@@ -377,7 +417,8 @@
 // CHECK-NEXT:   Succs (1): B11
 // CHECK:      [B1]
 // CHECK-NEXT:   1: [B10.4].~A() (Implicit destructor)
-// CHECK-NEXT:   2:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   2:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   2:  (CXXConstructExpr, [B1.3], class A)
 // CHECK-NEXT:   3: A e;
 // CHECK-NEXT:   4: [B1.3].~A() (Implicit destructor)
 // CHECK-NEXT:   5: [B11.2].~A() (Implicit destructor)
@@ -387,7 +428,8 @@
 // CHECK-NEXT:   Preds (2): B3 B6
 // CHECK-NEXT:   Succs (1): B10
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A d;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B9.2].~A() (Implicit destructor)
@@ -425,7 +467,8 @@
 // CHECK:        Preds (1): B9
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B9]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B9.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B9.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -447,7 +490,8 @@
 // CHECK-NEXT:   Preds (2): B2 B11
 // CHECK-NEXT:   Succs (2): B9 B1
 // CHECK:      [B11]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B11.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   Preds (1): B12
 // CHECK-NEXT:   Succs (1): B10
@@ -474,7 +518,8 @@
 // CHECK-NEXT:   Preds (1): B2
 // CHECK-NEXT:   Succs (2): B3 B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: [B2.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (2): B3 B4
@@ -492,7 +537,8 @@
 // CHECK:      [B12 (ENTRY)]
 // CHECK-NEXT:   Succs (1): B11
 // CHECK:      [B1]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B1.2], class A)
 // CHECK-NEXT:   2: A d;
 // CHECK-NEXT:   3: [B1.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B11.2].~A() (Implicit destructor)
@@ -505,7 +551,8 @@
 // CHECK-NEXT:   Preds (2): B3 B6
 // CHECK-NEXT:   Succs (2): B10 B1
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B9.2].~A() (Implicit destructor)
@@ -540,7 +587,8 @@
 // CHECK:        Preds (1): B9
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B9]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B9.2], class A)
 // CHECK-NEXT:   2: A b;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B9.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -551,7 +599,8 @@
 // CHECK-NEXT:   Preds (1): B2
 // CHECK-NEXT:   Succs (1): B9
 // CHECK:      [B11]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B11.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   Preds (1): B12
 // CHECK-NEXT:   Succs (1): B9
@@ -577,7 +626,8 @@
 // CHECK-NEXT:   Preds (2): B3 B2
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: a
 // CHECK-NEXT:   4: [B2.3] (ImplicitCastExpr, NoOp, const class A)
@@ -592,7 +642,8 @@
 // CHECK-NEXT:   Preds (1): B4
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Succs (1): B1
@@ -608,14 +659,16 @@
 // CHECK-NEXT:   Succs (1): B2
 // CHECK:      [B1]
 // CHECK-NEXT:   1: [B2.6].~A() (Implicit destructor)
-// CHECK-NEXT:   2:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   2:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   2:  (CXXConstructExpr, [B1.3], class A)
 // CHECK-NEXT:   3: A g;
 // CHECK-NEXT:   4: [B1.3].~A() (Implicit destructor)
 // CHECK-NEXT:   5: [B2.2].~A() (Implicit destructor)
 // CHECK-NEXT:   Preds (3): B3 B7 B2
 // CHECK-NEXT:   Succs (1): B0
 // CHECK:      [B2]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B2.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   3: a
 // CHECK-NEXT:   4: [B2.3] (ImplicitCastExpr, NoOp, const class A)
@@ -635,7 +688,8 @@
 // CHECK:        Preds (2): B2 B4
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B4]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B4.2], class A)
 // CHECK-NEXT:   2: A f;
 // CHECK-NEXT:   3: [B4.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B8.2].~A() (Implicit destructor)
@@ -661,7 +715,8 @@
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B8]
 // CHECK:       case 0:
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B8.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B8.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -696,7 +751,8 @@
 // CHECK-NEXT:   Preds (1): B3
 // CHECK-NEXT:   Succs (1): B4
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A c;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B4.4].~A() (Implicit destructor)
@@ -717,7 +773,8 @@
 // CHECK-NEXT:   Preds (2): B2 B5
 // CHECK-NEXT:   Succs (2): B3 B1
 // CHECK:      [B5]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B5.2], class A)
 // CHECK-NEXT:   2: A a;
 // CHECK-NEXT:   Preds (1): B6
 // CHECK-NEXT:   Succs (1): B4
@@ -733,7 +790,8 @@
 // CHECK:      [B1]
 // CHECK-NEXT:   1: [B10.4].~A() (Implicit destructor)
 // CHECK-NEXT:   2: [B11.4].~A() (Implicit destructor)
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B1.4], class A)
 // CHECK-NEXT:   4: A f;
 // CHECK-NEXT:   5: [B1.4].~A() (Implicit destructor)
 // CHECK-NEXT:   6: [B11.2].~A() (Implicit destructor)
@@ -743,7 +801,8 @@
 // CHECK-NEXT:   Preds (2): B3 B6
 // CHECK-NEXT:   Succs (1): B10
 // CHECK:      [B3]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B3.2], class A)
 // CHECK-NEXT:   2: A e;
 // CHECK-NEXT:   3: [B3.2].~A() (Implicit destructor)
 // CHECK-NEXT:   4: [B9.2].~A() (Implicit destructor)
@@ -781,7 +840,8 @@
 // CHECK:        Preds (1): B9
 // CHECK-NEXT:   Succs (1): B1
 // CHECK:      [B9]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B9.2], class A)
 // CHECK-NEXT:   2: A d;
 // CHECK-NEXT:   3: UV
 // CHECK-NEXT:   4: [B9.3] (ImplicitCastExpr, LValueToRValue, _Bool)
@@ -803,9 +863,11 @@
 // CHECK-NEXT:   Preds (2): B2 B11
 // CHECK-NEXT:   Succs (2): B9 B1
 // CHECK:      [B11]
-// CHECK-NEXT:   1:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   1:  (CXXConstructExpr, [B11.2], class A)
 // CHECK-NEXT:   2: A a;
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B11.4], class A)
 // CHECK-NEXT:   4: A b;
 // CHECK-NEXT:   Preds (1): B12
 // CHECK-NEXT:   Succs (1): B10
diff --git a/test/Analysis/bitwise-ops.c b/test/Analysis/bitwise-ops.c
index acef668..5cdb668 100644
--- a/test/Analysis/bitwise-ops.c
+++ b/test/Analysis/bitwise-ops.c
@@ -44,3 +44,16 @@
   }
   return 0;
 }
+
+int testNegativeLeftShift(int a) {
+  if (a == -3) {
+    return a << 1; // expected-warning{{The result of the left shift is undefined because the left operand is negative}}
+  }
+  return 0;
+}
+
+int testUnrepresentableLeftShift(int a) {
+  if (a == 8)
+    return a << 30; // expected-warning{{The result of the left shift is undefined due to shifting '8' by '30', which is unrepresentable in the unsigned version of the return type 'int'}}
+  return 0;
+}
diff --git a/test/Analysis/block-in-critical-section.cpp b/test/Analysis/block-in-critical-section.cpp
index c65cc61..fcf6188 100644
--- a/test/Analysis/block-in-critical-section.cpp
+++ b/test/Analysis/block-in-critical-section.cpp
@@ -7,6 +7,20 @@
   void lock() {}
   void unlock() {}
 };
+template<typename T>
+struct lock_guard {
+  lock_guard<T>(std::mutex) {}
+  ~lock_guard<T>() {}
+};
+template<typename T>
+struct unique_lock {
+  unique_lock<T>(std::mutex) {}
+  ~unique_lock<T>() {}
+};
+template<typename T>
+struct not_real_lock {
+  not_real_lock<T>(std::mutex) {}
+};
 }
 
 void getc() {}
@@ -110,3 +124,31 @@
   m.lock();
   sleep(1); // expected-warning {{Call to blocking function 'sleep' inside of critical section}}
 }
+
+void testBlockInCriticalSectionLockGuard() {
+  std::mutex g_mutex;
+  std::not_real_lock<std::mutex> not_real_lock(g_mutex);
+  sleep(1); // no-warning
+
+  std::lock_guard<std::mutex> lock(g_mutex);
+  sleep(1); // expected-warning {{Call to blocking function 'sleep' inside of critical section}}
+}
+
+void testBlockInCriticalSectionLockGuardNested() {
+  testBlockInCriticalSectionLockGuard();
+  sleep(1); // no-warning
+}
+
+void testBlockInCriticalSectionUniqueLock() {
+  std::mutex g_mutex;
+  std::not_real_lock<std::mutex> not_real_lock(g_mutex);
+  sleep(1); // no-warning
+
+  std::unique_lock<std::mutex> lock(g_mutex);
+  sleep(1); // expected-warning {{Call to blocking function 'sleep' inside of critical section}}
+}
+
+void testBlockInCriticalSectionUniqueLockNested() {
+  testBlockInCriticalSectionUniqueLock();
+  sleep(1); // no-warning
+}
diff --git a/test/Analysis/block-in-critical-section.m b/test/Analysis/block-in-critical-section.m
new file mode 100644
index 0000000..73d5847
--- /dev/null
+++ b/test/Analysis/block-in-critical-section.m
@@ -0,0 +1,10 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.BlockInCriticalSection -verify -Wno-objc-root-class %s
+// expected-no-diagnostics
+
+@interface SomeClass
+-(void)someMethod;
+@end
+
+void shouldNotCrash(SomeClass *o) {
+  [o someMethod];
+}
diff --git a/test/Analysis/blocks.mm b/test/Analysis/blocks.mm
index 6cff9b4..8a3f170 100644
--- a/test/Analysis/blocks.mm
+++ b/test/Analysis/blocks.mm
@@ -1,6 +1,17 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core -fblocks -analyzer-opt-analyze-nested-blocks -verify -x objective-c++ %s
-// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.DumpCFG -fblocks -analyzer-opt-analyze-nested-blocks %s > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.DumpCFG -fblocks -analyzer-opt-analyze-nested-blocks -analyzer-config cfg-rich-constructors=false %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,WARNINGS %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.DumpCFG -fblocks -analyzer-opt-analyze-nested-blocks -analyzer-config cfg-rich-constructors=true %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,ANALYZER %s
+
+// This file tests how we construct two different flavors of the Clang CFG -
+// the CFG used by the Sema analysis-based warnings and the CFG used by the
+// static analyzer. The difference in the behavior is checked via FileCheck
+// prefixes (WARNINGS and ANALYZER respectively). When introducing new analyzer
+// flags, no new run lines should be added - just these flags would go to the
+// respective line depending on where is it turned on and where is it turned
+// off. Feel free to add tests that test only one of the CFG flavors if you're
+// not sure how the other flavor is supposed to work in your case.
 
 // expected-no-diagnostics
 
@@ -64,7 +75,8 @@
 
 // CHECK: [B1]
 // CHECK-NEXT:   1: 5
-// CHECK-NEXT:   2: [B1.1] (CXXConstructExpr, struct StructWithCopyConstructor)
+// WARNINGS-NEXT:   2: [B1.1] (CXXConstructExpr, struct StructWithCopyConstructor)
+// ANALYZER-NEXT:   2: [B1.1] (CXXConstructExpr, [B1.3], struct StructWithCopyConstructor)
 // CHECK-NEXT:   3: StructWithCopyConstructor s(5) __attribute__((blocks("byref")));
 // CHECK-NEXT:   4: ^{ }
 // CHECK-NEXT:   5: (void)([B1.4]) (CStyleCastExpr, ToVoid, void)
diff --git a/test/Analysis/bool-increment.cpp b/test/Analysis/bool-increment.cpp
new file mode 100644
index 0000000..93002d3
--- /dev/null
+++ b/test/Analysis/bool-increment.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify -std=c++98 -Wno-deprecated %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify -std=c++11 -Wno-deprecated %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify -std=c++14 -Wno-deprecated %s
+
+extern void clang_analyzer_eval(bool);
+
+void test_bool_value() {
+  {
+    bool b = true;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    clang_analyzer_eval(b == 0); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -10;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 0;
+    b++;
+    clang_analyzer_eval(b == 1); // expected-warning{{TRUE}}
+  }
+}
+
+void test_bool_increment() {
+  {
+    bool b = true;
+    b++;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    b++;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = true;
+    ++b;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = false;
+    ++b;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 0;
+    ++b;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = 10;
+    ++b;
+    ++b;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+
+  {
+    bool b = -10;
+    ++b;
+    clang_analyzer_eval(b); // expected-warning{{TRUE}}
+  }
+}
diff --git a/test/Analysis/bug_hash_test.cpp b/test/Analysis/bug_hash_test.cpp
index 0efcb5f..f397d18 100644
--- a/test/Analysis/bug_hash_test.cpp
+++ b/test/Analysis/bug_hash_test.cpp
@@ -1,1345 +1,122 @@
-// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.DumpBugHash -analyzer-output=plist %s -o %t.plist
-// RUN: FileCheck --input-file=%t.plist %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.ExprInspection %s -verify
 
-int function(int p) {
-  return 5;
+constexpr int clang_analyzer_hashDump(int) { return 5; }
+
+void function(int) {
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void function(int)$27$clang_analyzer_hashDump(5);$Category}}
 }
 
 namespace {
-int variadicParam(int p, ...) {
-  return 5;
+void variadicParam(int, ...) {
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void (anonymous namespace)::variadicParam(int, ...)$27$clang_analyzer_hashDump(5);$Category}}
 }
-}
+} // namespace
 
-constexpr int f() { return 5; }
+constexpr int f() {
+  return clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$int f()$34$returnclang_analyzer_hashDump(5);$Category}}
+}
 
 namespace AA {
-  class X {
-    int priv;
-    X() : priv(5) { priv = 0; }
+class X {
+  X() {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$AA::X::X()$29$clang_analyzer_hashDump(5);$Category}}
+  }
 
-    static int static_method() {
-      return 5;
-    }
+  static void static_method() {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void AA::X::static_method()$29$clang_analyzer_hashDump(5);$Category}}
+    variadicParam(5);
+  }
 
-    int method() && {
-      class Y {
-        inline int method() const & {
-          return 5;
-        }
-      };
+  void method() && {
+    struct Y {
+      inline void method() const & {
+        clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void AA::X::method()::Y::method() const &$33$clang_analyzer_hashDump(5);$Category}}
+      }
+    };
 
-      return 5;
-    }
+    Y y;
+    y.method();
 
-    int OutOfLine();
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void AA::X::method() &&$29$clang_analyzer_hashDump(5);$Category}}
+  }
 
-    X& operator=(int a) {
-      return *this;
-    }
+  void OutOfLine();
 
-    operator int() {
-      return 0;
-    }
+  X &operator=(int) {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$class AA::X & AA::X::operator=(int)$29$clang_analyzer_hashDump(5);$Category}}
+    return *this;
+  }
 
-    explicit operator float() {
-      return 0;
-    }
-  };
-}
+  operator int() {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$AA::X::operator int()$29$clang_analyzer_hashDump(5);$Category}}
+    return 0;
+  }
 
-int AA::X::OutOfLine() {
-  return 5;
+  explicit operator float() {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$AA::X::operator float()$29$clang_analyzer_hashDump(5);$Category}}
+    return 0;
+  }
+};
+} // namespace AA
+
+void AA::X::OutOfLine() {
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void AA::X::OutOfLine()$27$clang_analyzer_hashDump(5);$Category}}
 }
 
 void testLambda() {
-  [] () {
-    return;
+  []() {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void testLambda()::(anonymous class)::operator()() const$29$clang_analyzer_hashDump(5);$Category}}
   }();
 }
 
-// CHECK: <key>diagnostics</key>
-// CHECK-NEXT: <array>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>5</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>5</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>5</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>5</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>5</integer>
-// CHECK-NEXT:      <key>col</key><integer>10</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>5</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>5</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>e7be204e83f8e5ad3c870ec011d5131d</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>function</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>5</integer>
-// CHECK-NEXT:   <key>col</key><integer>10</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>10</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>10</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>10</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>10</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>10</integer>
-// CHECK-NEXT:      <key>col</key><integer>10</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>10</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>10</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>bc5dc0507ee90f1d14259057d25fb2b9</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>variadicParam</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>10</integer>
-// CHECK-NEXT:   <key>col</key><integer>10</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>14</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>14</integer>
-// CHECK-NEXT:           <key>col</key><integer>26</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>14</integer>
-// CHECK-NEXT:           <key>col</key><integer>28</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>14</integer>
-// CHECK-NEXT:           <key>col</key><integer>28</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>14</integer>
-// CHECK-NEXT:      <key>col</key><integer>28</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>14</integer>
-// CHECK-NEXT:         <key>col</key><integer>28</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>14</integer>
-// CHECK-NEXT:         <key>col</key><integer>28</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f5471f52854dc14167fe96db50c4ba5f</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>f</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>14</integer>
-// CHECK-NEXT:   <key>col</key><integer>28</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>16</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>16</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>16</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d23266517ac17d5ec5e2fbbdb1922af1</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>16</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>21</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>21</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>24</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>7bfcc45512a6a3f61dda6e3ecebc7384</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>21</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>26</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>26</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>26</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>21</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>28</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>95dbfbcdd1dd6401d262994c45d088be</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>26</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>21</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>28</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>28</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>28</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>28</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>28</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>064a01551caa8eca3202f1fd55b9c692</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>28</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>22</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>22</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>22</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>651fcca72f8ad65771702903ecd5f68a</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>static_method</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>22</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>32</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>32</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>32</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>32</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>32</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>32</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>32</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>c8ac8f24467234bea1f34adf5ad5007b</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>method</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>7</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>32</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>38</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>38</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>38</integer>
-// CHECK-NEXT:         <key>col</key><integer>18</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b47cf7973c9b459d9c99c483e722db8e</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>operator=</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>38</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>42</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>42</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>42</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>42</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>42</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>42</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>42</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0cbb0e1e5b03ba5b4f7f8f17504de671</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>42</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>46</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>46</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>46</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>46</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>46</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>46</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>46</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>df306826bf89e50c1b55e1d379a761b3</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>46</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>52</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>52</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>52</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>52</integer>
-// CHECK-NEXT:           <key>col</key><integer>10</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>52</integer>
-// CHECK-NEXT:      <key>col</key><integer>10</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>52</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>52</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>9dd7b17a6f62ed8c95b37a38cf71f3a9</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>OutOfLine</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>52</integer>
-// CHECK-NEXT:   <key>col</key><integer>10</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>56</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>56</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>58</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6ad4400e40885a78a0f57f585421a515</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>56</integer>
-// CHECK-NEXT:   <key>col</key><integer>3</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>56</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>56</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>58</integer>
-// CHECK-NEXT:         <key>col</key><integer>5</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6ad4400e40885a78a0f57f585421a515</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>56</integer>
-// CHECK-NEXT:   <key>col</key><integer>3</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>58</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>58</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>58</integer>
-// CHECK-NEXT:      <key>col</key><integer>4</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>58</integer>
-// CHECK-NEXT:         <key>col</key><integer>4</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>58</integer>
-// CHECK-NEXT:         <key>col</key><integer>5</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>378e6de75fb41b05bcef3950ad5ffa5e</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>58</integer>
-// CHECK-NEXT:   <key>col</key><integer>4</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT: </array>
+template <typename T>
+void f(T) {
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void f(T)$27$clang_analyzer_hashDump(5);$Category}}
+}
+
+template <typename T>
+struct TX {
+  void f(T) {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void TX::f(T)$29$clang_analyzer_hashDump(5);$Category}}
+  }
+};
+
+template <>
+void f<long>(long) {
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void f(long)$27$clang_analyzer_hashDump(5);$Category}}
+}
+
+template <>
+struct TX<long> {
+  void f(long) {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void TX<long>::f(long)$29$clang_analyzer_hashDump(5);$Category}}
+  }
+};
+
+template <typename T>
+struct TTX {
+  template<typename S>
+  void f(T, S) {
+    clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$void TTX::f(T, S)$29$clang_analyzer_hashDump(5);$Category}}
+  }
+};
+
+void g() {
+  // TX<int> and TX<double> is instantiated from the same code with the same
+  // source locations. The same error happining in both of the instantiations
+  // should share the common hash. This means we should not include the
+  // template argument for these types in the function signature.
+  // Note that, we still want the hash to be different for explicit
+  // specializations.
+  TX<int> x;
+  TX<double> y;
+  TX<long> xl;
+  x.f(1);
+  xl.f(1);
+  f(5);
+  f(3.0);
+  y.f(2);
+  TTX<int> z;
+  z.f<int>(5, 5);
+  f(5l);
+}
diff --git a/test/Analysis/bug_hash_test.m b/test/Analysis/bug_hash_test.m
index 1e99d3c..fbb70e5 100644
--- a/test/Analysis/bug_hash_test.m
+++ b/test/Analysis/bug_hash_test.m
@@ -1,5 +1,6 @@
-// RUN: %clang_analyze_cc1 -fblocks -analyzer-checker=core,debug.DumpBugHash -analyzer-output=plist %s -o %t.plist
-// RUN: FileCheck --input-file=%t.plist %s
+// RUN: %clang_analyze_cc1 -fblocks -analyzer-checker=core,debug.ExprInspection %s -verify
+
+void clang_analyzer_hashDump(int);
 
 @protocol NSObject
 + (id)alloc;
@@ -15,1178 +16,21 @@
 @end
 
 @implementation NSObject
++ (id)alloc {
+  return 0;
+}
+- (id)init {
+  return self;
+}
 - (void)method:(int)arg param:(int)arg2 {
-  arg = 5;
-  return;
+  clang_analyzer_hashDump(5); // expected-warning {{debug.ExprInspection$NSObject::method:param:$27$clang_analyzer_hashDump(5);$Category}}
 }
 @end
 
 
 void testBlocks() {
   int x = 5;
-  ^{ int y = 1 + x; }();
+  ^{
+    clang_analyzer_hashDump(x); // expected-warning {{debug.ExprInspection$$29$clang_analyzer_hashDump(x);$Category}}
+  }();
 }
-
-// CHECK: <key>diagnostics</key>
-// CHECK-NEXT: <array>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>7</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>9</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f9f569e94382c1f969aabd304581b294</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>Objective-C method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>method:param:</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>7</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>9</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>19</integer>
-// CHECK-NEXT:           <key>col</key><integer>9</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>19</integer>
-// CHECK-NEXT:      <key>col</key><integer>9</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>9</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>19</integer>
-// CHECK-NEXT:         <key>col</key><integer>9</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>ca44d6aa882ee55f76e11a80d5a66372</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>Objective-C method</string>
-// CHECK-NEXT:  <key>issue_context</key><string>method:param:</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>19</integer>
-// CHECK-NEXT:   <key>col</key><integer>9</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>26</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>26</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>26</integer>
-// CHECK-NEXT:         <key>col</key><integer>7</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>84ec7c854c1c7849abfa03f7f20b4f06</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>26</integer>
-// CHECK-NEXT:   <key>col</key><integer>3</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>11</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>11</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>26</integer>
-// CHECK-NEXT:      <key>col</key><integer>11</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>26</integer>
-// CHECK-NEXT:         <key>col</key><integer>11</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>26</integer>
-// CHECK-NEXT:         <key>col</key><integer>11</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f91db2d7b129ed60e7c9caf6f8a84d5c</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>26</integer>
-// CHECK-NEXT:   <key>col</key><integer>11</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>21</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0f1e9483a8ff59e787eaac18b68068ad</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>3</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>23</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0f1e9483a8ff59e787eaac18b68068ad</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
-// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>3</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>23</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>6</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>6</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>6</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>10</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>8a8e42efc427e1334b77d510d3fb6361</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>6</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>23</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>6</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>6</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>14</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>14</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6d6028808f1d47ec5b74a417e96c2a02</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>14</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>23</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>16</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>16</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>14</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>18</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>162138b23629276baad7dd3e8051fd6f</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>16</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>26</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>23</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Calling anonymous block</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>6</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>6</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>8</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>18</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
-// CHECK-NEXT:           <key>col</key><integer>18</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>27</integer>
-// CHECK-NEXT:      <key>col</key><integer>18</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>18</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>27</integer>
-// CHECK-NEXT:         <key>col</key><integer>18</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>1</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
-// CHECK-NEXT:   <key>category</key><string>debug</string>
-// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
-// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
-// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b3add78bcab0ebc3da3b640081057525</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>27</integer>
-// CHECK-NEXT:   <key>col</key><integer>18</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT: </array>
diff --git a/test/Analysis/builtin-functions.cpp b/test/Analysis/builtin-functions.cpp
index 2c19502..1998496 100644
--- a/test/Analysis/builtin-functions.cpp
+++ b/test/Analysis/builtin-functions.cpp
@@ -64,3 +64,20 @@
                                     // We give up the analysis on this path.
   }
 }
+
+void test_constant_p() {
+  int i = 1;
+  const int j = 2;
+  constexpr int k = 3;
+  clang_analyzer_eval(__builtin_constant_p(42) == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(i) == 0); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(j) == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(k) == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(i + 42) == 0); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(j + 42) == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(k + 42) == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(" ") == 1); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(test_constant_p) == 0); // expected-warning {{TRUE}}
+  clang_analyzer_eval(__builtin_constant_p(k - 3) == 0); // expected-warning {{FALSE}}
+  clang_analyzer_eval(__builtin_constant_p(k - 3) == 1); // expected-warning {{TRUE}}
+}
diff --git a/test/Analysis/call_once.cpp b/test/Analysis/call_once.cpp
index befddca..dd4b2d4 100644
--- a/test/Analysis/call_once.cpp
+++ b/test/Analysis/call_once.cpp
@@ -1,11 +1,34 @@
-// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -verify %s
-// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -DEMULATE_LIBSTDCPP -verify %s
+// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -verify %s -o %t.report
+// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -DEMULATE_LIBSTDCPP -verify %s -o %t.report
+
+// We do NOT model libcxx03 implementation, but the analyzer should still
+// not crash.
+// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -DEMULATE_LIBCXX03 -verify %s -o %t.report
+// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -analyzer-checker=core,debug.ExprInspection -DEMULATE_LIBCXX03 -DEMULATE_LIBSTDCPP -verify %s -o %t.report
+// RUN: rm -rf %t.report
 
 void clang_analyzer_eval(bool);
 
-// Faking std::std::call_once implementation.
+// Faking std::call_once implementation.
 namespace std {
 
+// Fake std::function implementation.
+template <typename>
+class function;
+class function_base {
+ public:
+  long field;
+};
+template <typename R, typename... P>
+class function<R(P...)> : function_base {
+ public:
+   R operator()(P...) const {
+
+     // Read from a super-class necessary to reproduce a crash.
+     bool a = field;
+   }
+};
+
 #ifndef EMULATE_LIBSTDCPP
 typedef struct once_flag_s {
   unsigned long __state_ = 0;
@@ -16,8 +39,13 @@
 } once_flag;
 #endif
 
+#ifndef EMULATE_LIBCXX03
 template <class Callable, class... Args>
 void call_once(once_flag &o, Callable&& func, Args&&... args) {};
+#else
+template <class Callable, class... Args> // libcxx03 call_once
+void call_once(once_flag &o, Callable func, Args&&... args) {};
+#endif
 
 } // namespace std
 
@@ -28,7 +56,9 @@
 
   std::call_once(g_initialize, [&] {
     int *x = nullptr;
+#ifndef EMULATE_LIBCXX03
     int y = *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+#endif
     z = 200;
   });
 }
@@ -45,8 +75,10 @@
     x = &z;
   });
 
+#ifndef EMULATE_LIBCXX03
   *x = 100; // no-warning
   clang_analyzer_eval(z == 100); // expected-warning{{TRUE}}
+#endif
 }
 
 void test_called_on_path_no_warning() {
@@ -59,7 +91,11 @@
     x = &y;
   });
 
+#ifndef EMULATE_LIBCXX03
   *x = 100; // no-warning
+#else
+  *x = 100; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+#endif
 }
 
 void test_called_on_path_warning() {
@@ -72,7 +108,9 @@
     x = nullptr;
   });
 
+#ifndef EMULATE_LIBCXX03
   *x = 100; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+#endif
 }
 
 void test_called_once_warning() {
@@ -89,7 +127,9 @@
     x = &y;
   });
 
+#ifndef EMULATE_LIBCXX03
   *x = 100; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+#endif
 }
 
 void test_called_once_no_warning() {
@@ -106,7 +146,9 @@
     x = nullptr;
   });
 
+#ifndef EMULATE_LIBCXX03
   *x = 100; // no-warning
+#endif
 }
 
 static int global = 0;
@@ -117,7 +159,9 @@
 void test_func_pointers() {
   static std::once_flag flag;
   std::call_once(flag, &funcPointer);
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(global == 1); // expected-warning{{TRUE}}
+#endif
 }
 
 template <class _Fp>
@@ -157,7 +201,9 @@
   },
                  x);
 
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(y == 120); // expected-warning{{TRUE}}
+#endif
 }
 
 void test_param_passing_lambda_false() {
@@ -169,7 +215,9 @@
   },
                  x);
 
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(x == 120); // expected-warning{{FALSE}}
+#endif
 }
 
 void test_param_passing_stored_lambda() {
@@ -182,7 +230,9 @@
   };
 
   std::call_once(flag, lambda, x);
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(y == 120); // expected-warning{{TRUE}}
+#endif
 }
 
 void test_multiparam_passing_lambda() {
@@ -194,8 +244,10 @@
   },
                  1, 2, 3);
 
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(x == 120); // expected-warning{{FALSE}}
   clang_analyzer_eval(x == 6); // expected-warning{{TRUE}}
+#endif
 }
 
 static int global2 = 0;
@@ -206,7 +258,9 @@
     global2 = a + b + c;
   },
                  1, 2, 3);
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(global2 == 6); // expected-warning{{TRUE}}
+#endif
 }
 
 static int global3 = 0;
@@ -220,7 +274,9 @@
 
   std::call_once(flag, &funcptr, 1, 2, 3);
 
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(global3 == 6); // expected-warning{{TRUE}}
+#endif
 }
 
 void test_blocks() {
@@ -229,7 +285,9 @@
   std::call_once(flag, ^{
     global3 = 120;
   });
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(global3 == 120); // expected-warning{{TRUE}}
+#endif
 }
 
 int call_once() {
@@ -238,7 +296,9 @@
 
 void test_non_std_call_once() {
   int x = call_once();
+#ifndef EMULATE_LIBCXX03
   clang_analyzer_eval(x == 5); // expected-warning{{TRUE}}
+#endif
 }
 
 namespace std {
@@ -247,5 +307,99 @@
 }
 void g();
 void test_no_segfault_on_different_impl() {
+#ifndef EMULATE_LIBCXX03
   std::call_once(g, false); // no-warning
+#endif
+}
+
+void test_lambda_refcapture() {
+  static std::once_flag flag;
+  int a = 6;
+  std::call_once(flag, [&](int &a) { a = 42; }, a);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(a == 42); // expected-warning{{TRUE}}
+#endif
+}
+
+void test_lambda_refcapture2() {
+  static std::once_flag flag;
+  int a = 6;
+  std::call_once(flag, [=](int &a) { a = 42; }, a);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(a == 42); // expected-warning{{TRUE}}
+#endif
+}
+
+void test_lambda_fail_refcapture() {
+  static std::once_flag flag;
+  int a = 6;
+  std::call_once(flag, [=](int a) { a = 42; }, a);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(a == 42); // expected-warning{{FALSE}}
+#endif
+}
+
+void mutator(int &param) {
+  param = 42;
+}
+void test_reftypes_funcptr() {
+  static std::once_flag flag;
+  int a = 6;
+  std::call_once(flag, &mutator, a);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(a == 42); // expected-warning{{TRUE}}
+#endif
+}
+
+void fail_mutator(int param) {
+  param = 42;
+}
+void test_mutator_noref() {
+  static std::once_flag flag;
+  int a = 6;
+  std::call_once(flag, &fail_mutator, a);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(a == 42); // expected-warning{{FALSE}}
+#endif
+}
+
+// Function is implicitly treated as a function pointer
+// even when an ampersand is not explicitly set.
+void callbackn(int &param) {
+  param = 42;
+}
+void test_implicit_funcptr() {
+  int x = 0;
+  static std::once_flag flagn;
+
+  std::call_once(flagn, callbackn, x);
+#ifndef EMULATE_LIBCXX03
+  clang_analyzer_eval(x == 42); // expected-warning{{TRUE}}
+#endif
+}
+
+int param_passed(int *x) {
+  return *x; // no-warning, as std::function is not working yet.
+}
+
+void callback_taking_func_ok(std::function<void(int*)> &innerCallback) {
+  innerCallback(nullptr);
+}
+
+// The provided callback expects an std::function, but instead a pointer
+// to a C++ function is provided.
+void callback_with_implicit_cast_ok() {
+  std::once_flag flag;
+  call_once(flag, callback_taking_func_ok, &param_passed);
+}
+
+void callback_taking_func(std::function<void()> &innerCallback) {
+  innerCallback();
+}
+
+// The provided callback expects an std::function, but instead a C function
+// name is provided, and C++ implicitly auto-constructs a pointer from it.
+void callback_with_implicit_cast() {
+  std::once_flag flag;
+  call_once(flag, callback_taking_func, callback_with_implicit_cast);
 }
diff --git a/test/Analysis/cfg-indirect-goto-determinism.cpp b/test/Analysis/cfg-indirect-goto-determinism.cpp
new file mode 100644
index 0000000..895535e
--- /dev/null
+++ b/test/Analysis/cfg-indirect-goto-determinism.cpp
@@ -0,0 +1,96 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
+
+void *target;
+int indirectBlockSuccessorDeterminism() {
+    (void)&&L1;
+    (void)&&L2;
+    (void)&&L3;
+    (void)&&L4;
+    (void)&&L5;
+    (void)&&L6;
+    (void)&&L7;
+    (void)&&L8;
+    (void)&&L9;
+    (void)&&L10;
+    (void)&&L11;
+    (void)&&L12;
+    (void)&&L13;
+    (void)&&L14;
+    (void)&&L15;
+    (void)&&L16;
+    (void)&&L17;
+    (void)&&L18;
+    (void)&&L19;
+    (void)&&L20;
+    (void)&&L21;
+    (void)&&L22;
+    (void)&&L23;
+    (void)&&L24;
+    (void)&&L25;
+    (void)&&L26;
+    (void)&&L27;
+    (void)&&L28;
+    (void)&&L29;
+    (void)&&L30;
+    (void)&&L31;
+    (void)&&L32;
+    (void)&&L33;
+    (void)&&L34;
+    (void)&&L35;
+    (void)&&L36;
+    (void)&&L37;
+    (void)&&L38;
+    (void)&&L39;
+    (void)&&L40;
+
+    goto *target;
+  L1:
+  L2:
+  L3:
+  L4:
+  L5:
+  L6:
+  L7:
+  L8:
+  L9:
+  L10:
+  L11:
+  L12:
+  L13:
+  L14:
+  L15:
+  L16:
+  L17:
+  L18:
+  L19:
+  L20:
+  L21:
+  L22:
+  L23:
+  L24:
+  L25:
+  L26:
+  L27:
+  L28:
+  L29:
+  L30:
+  L31:
+  L32:
+  L33:
+  L34:
+  L35:
+  L36:
+  L37:
+  L38:
+  L39:
+  L40:
+    return 0;
+}
+
+// CHECK-LABEL:  [B41 (INDIRECT GOTO DISPATCH)]
+// CHECK-NEXT:   Preds (1): B42
+// CHECK-NEXT:  Succs (40): B1 B2 B3 B4 B5 B6 B7 B8
+// CHECK-NEXT:       B9 B10 B11 B12 B13 B14 B15 B16 B17 B18
+// CHECK-NEXT:       B19 B20 B21 B22 B23 B24 B25 B26 B27 B28
+// CHECK-NEXT:       B29 B30 B31 B32 B33 B34 B35 B36 B37 B38
+// CHECK-NEXT:       B39 B40
diff --git a/test/Analysis/cfg-rich-constructors.cpp b/test/Analysis/cfg-rich-constructors.cpp
new file mode 100644
index 0000000..2ae33d3
--- /dev/null
+++ b/test/Analysis/cfg-rich-constructors.cpp
@@ -0,0 +1,518 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -triple x86_64-apple-darwin12 -analyzer-config cfg-temporary-dtors=true -std=c++11 -w %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s
+
+class C {
+public:
+  C();
+  C(C *);
+  C(int, int);
+
+  static C get();
+  operator bool() const;
+};
+
+typedef __typeof(sizeof(int)) size_t;
+void *operator new(size_t size, void *placement);
+
+namespace operator_new {
+
+// CHECK: void operatorNewWithConstructor()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2:  (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     3: new C([B1.2])
+void operatorNewWithConstructor() {
+  new C();
+}
+
+// CHECK: void operatorNewWithConstructorWithOperatorNewWithContstructor()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2: CFGNewAllocator(C *)
+// CHECK-NEXT:     3:  (CXXConstructExpr, [B1.4], class C)
+// CHECK-NEXT:     4: new C([B1.3])
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.6], class C)
+// CHECK-NEXT:     6: new C([B1.5])
+void operatorNewWithConstructorWithOperatorNewWithContstructor() {
+	new C(new C());
+}
+
+// CHECK: void operatorPlacementNewWithConstructorWithinPlacementArgument()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2:  (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     3: new C([B1.2])
+// CHECK-NEXT:     4: [B1.3] (ImplicitCastExpr, BitCast, void *)
+// CHECK-NEXT:     5: CFGNewAllocator(C *)
+// CHECK-NEXT:     6:  (CXXConstructExpr, [B1.7], class C)
+// CHECK-NEXT:     7: new ([B1.4]) C([B1.6])
+void operatorPlacementNewWithConstructorWithinPlacementArgument() {
+	new (new C()) C();
+}
+
+} // namespace operator_new
+
+namespace decl_stmt {
+
+// CHECK: void simpleVariable()
+// CHECK:          1:  (CXXConstructExpr, [B1.2], class C)
+// CHECK-NEXT:     2: C c;
+void simpleVariable() {
+  C c;
+}
+
+// CHECK: void simpleVariableWithBraces()
+// CHECK:          1: {} (CXXConstructExpr, [B1.2], class C)
+// CHECK-NEXT:     2: C c{};
+void simpleVariableWithBraces() {
+  C c{};
+}
+
+// CHECK: void simpleVariableWithConstructorArgument()
+// CHECK:          1: 0
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.4], class C)
+// CHECK-NEXT:     4: C c(0);
+void simpleVariableWithConstructorArgument() {
+  C c(0);
+}
+
+// CHECK: void simpleVariableWithOperatorNewInConstructorArgument()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2:  (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     3: new C([B1.2])
+// CHECK-NEXT:     4: [B1.3] (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     5: C c(new C());
+void simpleVariableWithOperatorNewInConstructorArgument() {
+  C c(new C());
+}
+
+// CHECK: void simpleVariableWithOperatorNewInBraces()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2:  (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     3: new C([B1.2])
+// CHECK-NEXT:     4: {[B1.3]} (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     5: C c{new C()};
+void simpleVariableWithOperatorNewInBraces() {
+  C c{new C()};
+}
+
+// CHECK: void simpleVariableInitializedByValue()
+// CHECK:          1: C::get
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B1.2]()
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.6], class C)
+// CHECK-NEXT:     6: C c = C::get();
+void simpleVariableInitializedByValue() {
+  C c = C::get();
+}
+
+// CHECK: void simpleVariableWithTernaryOperator(bool coin)
+// CHECK:        [B1]
+// CHECK-NEXT:     1: [B4.2] ? [B2.5] : [B3.6]
+// CHECK-NEXT:     2: [B1.1]
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.4], class C)
+// CHECK-NEXT:     4: C c = coin ? C::get() : C(0);
+// CHECK:        [B2]
+// CHECK-NEXT:     1: C::get
+// CHECK-NEXT:     2: [B2.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B2.2]()
+// CHECK-NEXT:     4: [B2.3]
+// CHECK-NEXT:     5: [B2.4] (CXXConstructExpr, [B1.2], class C)
+// CHECK:        [B3]
+// CHECK-NEXT:     1: 0
+// CHECK-NEXT:     2: [B3.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B3.2] (CXXConstructExpr, [B3.5], class C)
+// CHECK-NEXT:     4: C([B3.3]) (CXXFunctionalCastExpr, ConstructorConversion, class C)
+// CHECK-NEXT:     5: [B3.4]
+// CHECK-NEXT:     6: [B3.5] (CXXConstructExpr, [B1.2], class C)
+// CHECK:        [B4]
+// CHECK-NEXT:     1: coin
+// CHECK-NEXT:     2: [B4.1] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B4.2] ? ... : ...
+void simpleVariableWithTernaryOperator(bool coin) {
+  C c = coin ? C::get() : C(0);
+}
+
+// CHECK: void simpleVariableWithElidableCopy()
+// CHECK:          1: 0
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     4: C([B1.3]) (CXXFunctionalCastExpr, ConstructorConversion, class C)
+// CHECK-NEXT:     5: [B1.4]
+// CHECK-NEXT:     6: [B1.5] (CXXConstructExpr, [B1.7], class C)
+// CHECK-NEXT:     7: C c = C(0);
+void simpleVariableWithElidableCopy() {
+  C c = C(0);
+}
+
+// CHECK: void referenceVariableWithConstructor()
+// CHECK:          1: 0
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.4], const class C)
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: const C &c(0);
+void referenceVariableWithConstructor() {
+  const C &c(0);
+}
+
+// CHECK: void referenceVariableWithInitializer()
+// CHECK:          1: C() (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NoOp, const class C)
+// CHECK-NEXT:     3: [B1.2]
+// CHECK-NEXT:     4: const C &c = C();
+void referenceVariableWithInitializer() {
+  const C &c = C();
+}
+
+// CHECK: void referenceVariableWithTernaryOperator(bool coin)
+// CHECK:        [B1]
+// CHECK-NEXT:     1: [B4.2] ? [B2.5] : [B3.6]
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NoOp, const class C)
+// CHECK-NEXT:     3: [B1.2]
+// CHECK-NEXT:     4: const C &c = coin ? C::get() : C(0);
+// CHECK:        [B2]
+// CHECK-NEXT:     1: C::get
+// CHECK-NEXT:     2: [B2.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B2.2]()
+// CHECK-NEXT:     4: [B2.3]
+// CHECK-NEXT:     5: [B2.4] (CXXConstructExpr, [B1.3], class C)
+// CHECK:        [B3]
+// CHECK-NEXT:     1: 0
+// CHECK-NEXT:     2: [B3.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B3.2] (CXXConstructExpr, [B3.5], class C)
+// CHECK-NEXT:     4: C([B3.3]) (CXXFunctionalCastExpr, ConstructorConversion, class C)
+// CHECK-NEXT:     5: [B3.4]
+// CHECK-NEXT:     6: [B3.5] (CXXConstructExpr, [B1.3], class C)
+// CHECK:        [B4]
+// CHECK-NEXT:     1: coin
+// CHECK-NEXT:     2: [B4.1] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B4.2] ? ... : ...
+void referenceVariableWithTernaryOperator(bool coin) {
+  const C &c = coin ? C::get() : C(0);
+}
+
+} // end namespace decl_stmt
+
+namespace ctor_initializers {
+
+class D: public C {
+  C c1;
+
+public:
+
+// CHECK: D()
+// CHECK:          1:  (CXXConstructExpr, C() (Base initializer), class C)
+// CHECK-NEXT:     2: C([B1.1]) (Base initializer)
+// CHECK-NEXT:     3: CFGNewAllocator(C *)
+// CHECK-NEXT:     4:  (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     5: new C([B1.4])
+// CHECK-NEXT:     6: [B1.5] (CXXConstructExpr, c1([B1.5]) (Member initializer), class C)
+// CHECK-NEXT:     7: c1([B1.6]) (Member initializer)
+  D(): C(), c1(new C()) {}
+
+// CHECK: D(int)
+// CHECK:          1:  (CXXConstructExpr, D() (Delegating initializer), class ctor_initializers::D)
+// CHECK-NEXT:     2: D([B1.1]) (Delegating initializer)
+  D(int): D() {}
+
+// CHECK: D(double)
+// CHECK:          1: C::get
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B1.2]()
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, C([B1.4]) (Base initializer), class C)
+// CHECK-NEXT:     6: C([B1.5]) (Base initializer)
+// CHECK-NEXT:     7: CFGNewAllocator(C *)
+// CHECK-NEXT:     8: C::get
+// CHECK-NEXT:     9: [B1.8] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:    10: [B1.9]()
+// CHECK-NEXT:    11: [B1.10]
+// CHECK-NEXT:    12: [B1.11] (CXXConstructExpr, [B1.13], class C)
+// CHECK-NEXT:    13: new C([B1.12])
+// CHECK-NEXT:    14: [B1.13] (CXXConstructExpr, c1([B1.13]) (Member initializer), class C)
+// CHECK-NEXT:    15: c1([B1.14]) (Member initializer)
+  D(double): C(C::get()), c1(new C(C::get())) {}
+};
+
+} // end namespace ctor_initializers
+
+namespace return_stmt_without_dtor {
+
+// CHECK: C returnVariable()
+// CHECK:          1:  (CXXConstructExpr, [B1.2], class C)
+// CHECK-NEXT:     2: C c;
+// CHECK-NEXT:     3: c
+// CHECK-NEXT:     4: [B1.3] (ImplicitCastExpr, NoOp, class C)
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.6], class C)
+// CHECK-NEXT:     6: return [B1.5];
+C returnVariable() {
+  C c;
+  return c;
+}
+
+// CHECK: C returnEmptyBraces()
+// CHECK:          1: {} (CXXConstructExpr, [B1.2], class C)
+// CHECK-NEXT:     2: return [B1.1];
+C returnEmptyBraces() {
+  return {};
+}
+
+// CHECK: C returnBracesWithOperatorNew()
+// CHECK:          1: CFGNewAllocator(C *)
+// CHECK-NEXT:     2:  (CXXConstructExpr, [B1.3], class C)
+// CHECK-NEXT:     3: new C([B1.2])
+// CHECK-NEXT:     4: {[B1.3]} (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     5: return [B1.4];
+C returnBracesWithOperatorNew() {
+  return {new C()};
+}
+
+// CHECK: C returnBracesWithMultipleItems()
+// CHECK:          1: 123
+// CHECK-NEXT:     2: 456
+// CHECK-NEXT:     3: {[B1.1], [B1.2]} (CXXConstructExpr, [B1.4], class C)
+// CHECK-NEXT:     4: return [B1.3];
+C returnBracesWithMultipleItems() {
+  return {123, 456};
+}
+
+// CHECK: C returnTemporary()
+// CHECK:          1: C() (CXXConstructExpr, [B1.2], class C)
+// CHECK-NEXT:     2: [B1.1]
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.4], class C)
+// CHECK-NEXT:     4: return [B1.3];
+C returnTemporary() {
+  return C();
+}
+
+// CHECK: C returnTemporaryWithArgument()
+// CHECK:          1: nullptr
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, NullToPointer, class C *)
+// CHECK-NEXT:     3: [B1.2] (CXXConstructExpr, [B1.5], class C)
+// CHECK-NEXT:     4: C([B1.3]) (CXXFunctionalCastExpr, ConstructorConversion, class C)
+// CHECK-NEXT:     5: [B1.4]
+// CHECK-NEXT:     6: [B1.5] (CXXConstructExpr, [B1.7], class C)
+// CHECK-NEXT:     7: return [B1.6];
+C returnTemporaryWithArgument() {
+  return C(nullptr);
+}
+
+// CHECK: C returnTemporaryConstructedByFunction()
+// CHECK:          1: C::get
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B1.2]()
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.6], class C)
+// CHECK-NEXT:     6: return [B1.5];
+C returnTemporaryConstructedByFunction() {
+  return C::get();
+}
+
+// CHECK: C returnChainOfCopies()
+// CHECK:          1: C::get
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, FunctionToPointerDecay, class C (*)(void))
+// CHECK-NEXT:     3: [B1.2]()
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.7], class C)
+// CHECK-NEXT:     6: C([B1.5]) (CXXFunctionalCastExpr, ConstructorConversion, class C)
+// CHECK-NEXT:     7: [B1.6]
+// CHECK-NEXT:     8: [B1.7] (CXXConstructExpr, [B1.9], class C)
+// CHECK-NEXT:     9: return [B1.8];
+C returnChainOfCopies() {
+  return C(C::get());
+}
+
+} // end namespace return_stmt_without_dtor
+
+namespace return_stmt_with_dtor {
+
+class D {
+public:
+  D();
+  ~D();
+};
+
+// CHECK:  return_stmt_with_dtor::D returnTemporary()
+// CHECK:          1: return_stmt_with_dtor::D() (CXXConstructExpr, [B1.2], [B1.4], class return_stmt_with_dtor::D)
+// CHECK-NEXT:     2: [B1.1] (BindTemporary)
+// CHECK-NEXT:     3: [B1.2] (ImplicitCastExpr, NoOp, const class return_stmt_with_dtor::D)
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: [B1.4] (CXXConstructExpr, [B1.7], class return_stmt_with_dtor::D)
+// CHECK-NEXT:     6: ~return_stmt_with_dtor::D() (Temporary object destructor)
+// CHECK-NEXT:     7: return [B1.5];
+D returnTemporary() {
+  return D();
+}
+
+} // end namespace return_stmt_with_dtor
+
+namespace temporary_object_expr_without_dtors {
+
+// TODO: Should provide construction context for the constructor,
+// even if there is no specific trigger statement here.
+// CHECK: void simpleTemporary()
+// CHECK           1: C() (CXXConstructExpr, class C)
+void simpleTemporary() {
+  C();
+}
+
+// TODO: Should provide construction context for the constructor,
+// CHECK: void temporaryInCondition()
+// CHECK:          1: C() (CXXConstructExpr, class C)
+// CHECK-NEXT:     2: [B2.1] (ImplicitCastExpr, NoOp, const class C)
+// CHECK-NEXT:     3: [B2.2].operator bool
+// CHECK-NEXT:     4: [B2.2]
+// CHECK-NEXT:     5: [B2.4] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CHECK-NEXT:     T: if [B2.5]
+void temporaryInCondition() {
+  if (C());
+}
+
+} // end namespace temporary_object_expr_without_dtors
+
+namespace temporary_object_expr_with_dtors {
+
+class D {
+public:
+  D();
+  D(int);
+  ~D();
+
+  static D get();
+
+  operator bool() const;
+};
+
+// CHECK: void simpleTemporary()
+// CHECK:          1: temporary_object_expr_with_dtors::D() (CXXConstructExpr, [B1.2], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     2: [B1.1] (BindTemporary)
+// CHECK-NEXT:     3: ~temporary_object_expr_with_dtors::D() (Temporary object destructor)
+void simpleTemporary() {
+  D();
+}
+
+// CHECK:  void temporaryInCondition()
+// CHECK:          1: temporary_object_expr_with_dtors::D() (CXXConstructExpr, [B2.2], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     2: [B2.1] (BindTemporary)
+// CHECK-NEXT:     3: [B2.2] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     4: [B2.3].operator bool
+// CHECK-NEXT:     5: [B2.3]
+// CHECK-NEXT:     6: [B2.5] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CHECK-NEXT:     7: ~temporary_object_expr_with_dtors::D() (Temporary object destructor)
+// CHECK-NEXT:     T: if [B2.6]
+void temporaryInCondition() {
+  if (D());
+}
+
+// CHECK: void referenceVariableWithConstructor()
+// CHECK:          1: 0
+// CHECK-NEXT:     2: [B1.1] (CXXConstructExpr, [B1.3], [B1.4], const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     3: [B1.2] (BindTemporary)
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: const temporary_object_expr_with_dtors::D &d(0);
+// CHECK-NEXT:     6: [B1.5].~D() (Implicit destructor)
+void referenceVariableWithConstructor() {
+  const D &d(0);
+}
+
+// CHECK: void referenceVariableWithInitializer()
+// CHECK:          1: temporary_object_expr_with_dtors::D() (CXXConstructExpr, [B1.2], [B1.4], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     2: [B1.1] (BindTemporary)
+// CHECK-NEXT:     3: [B1.2] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     4: [B1.3]
+// CHECK-NEXT:     5: const temporary_object_expr_with_dtors::D &d = temporary_object_expr_with_dtors::D();
+// CHECK-NEXT:     6: [B1.5].~D() (Implicit destructor)
+void referenceVariableWithInitializer() {
+  const D &d = D();
+}
+
+// CHECK: void referenceVariableWithTernaryOperator(bool coin)
+// CHECK:        [B4]
+// CHECK-NEXT:     1: [B7.2] ? [B5.8] : [B6.8]
+// CHECK-NEXT:     2: [B4.1] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     3: [B4.2]
+// CHECK-NEXT:     4: const temporary_object_expr_with_dtors::D &d = coin ? D::get() : temporary_object_expr_with_dtors::D(0);
+// CHECK-NEXT:     T: (Temp Dtor) [B6.3]
+// CHECK:        [B5]
+// CHECK-NEXT:     1: D::get
+// CHECK-NEXT:     2: [B5.1] (ImplicitCastExpr, FunctionToPointerDecay, class temporary_object_expr_with_dtors::D (*)(void))
+// CHECK-NEXT:     3: [B5.2]()
+// CHECK-NEXT:     4: [B5.3] (BindTemporary)
+// CHECK-NEXT:     5: [B5.4] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     6: [B5.5]
+// CHECK-NEXT:     7: [B5.6] (CXXConstructExpr, [B5.8], [B4.3], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     8: [B5.7] (BindTemporary)
+// CHECK:        [B6]
+// CHECK-NEXT:     1: 0
+// CHECK-NEXT:     2: [B6.1] (CXXConstructExpr, [B6.3], [B6.6], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     3: [B6.2] (BindTemporary)
+// CHECK-NEXT:     4: temporary_object_expr_with_dtors::D([B6.3]) (CXXFunctionalCastExpr, ConstructorConversion, class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     5: [B6.4] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     6: [B6.5]
+// CHECK-NEXT:     7: [B6.6] (CXXConstructExpr, [B6.8], [B4.3], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     8: [B6.7] (BindTemporary)
+// CHECK:        [B7]
+// CHECK-NEXT:     1: coin
+// CHECK-NEXT:     2: [B7.1] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B7.2] ? ... : ...
+void referenceVariableWithTernaryOperator(bool coin) {
+  const D &d = coin ? D::get() : D(0);
+}
+
+// CHECK: void referenceWithFunctionalCast()
+// CHECK:          1: 1
+// CHECK-NEXT:     2: [B1.1] (CXXConstructExpr, [B1.3], [B1.5], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     3: [B1.2] (BindTemporary)
+// CHECK-NEXT:     4: temporary_object_expr_with_dtors::D([B1.3]) (CXXFunctionalCastExpr, ConstructorCon
+// CHECK-NEXT:     5: [B1.4]
+// CHECK-NEXT:     6: temporary_object_expr_with_dtors::D &&d = temporary_object_expr_with_dtors::D(1);
+// CHECK-NEXT:     7: [B1.6].~D() (Implicit destructor)
+void referenceWithFunctionalCast() {
+  D &&d = D(1);
+}
+
+// Test the condition constructor, we don't care about branch constructors here.
+// CHECK: void constructorInTernaryCondition()
+// CHECK:          1: 1
+// CHECK-NEXT:     2: [B7.1] (CXXConstructExpr, [B7.3], class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     3: [B7.2] (BindTemporary)
+// CHECK-NEXT:     4: temporary_object_expr_with_dtors::D([B7.3]) (CXXFunctionalCastExpr, ConstructorConv
+// CHECK-NEXT:     5: [B7.4] (ImplicitCastExpr, NoOp, const class temporary_object_expr_with_dtors::D)
+// CHECK-NEXT:     6: [B7.5].operator bool
+// CHECK-NEXT:     7: [B7.5]
+// CHECK-NEXT:     8: [B7.7] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CHECK-NEXT:     T: [B7.8] ? ... : ...
+void constructorInTernaryCondition() {
+  const D &d = D(1) ? D(2) : D(3);
+}
+
+} // end namespace temporary_object_expr_with_dtors
+
+namespace implicit_constructor_conversion {
+
+class A {};
+A get();
+
+class B {
+public:
+  B(const A &);
+  ~B() {}
+};
+
+// FIXME: Find construction context for the implicit constructor conversion.
+// CHECK: void implicitConstructionConversionFromFunctionValue()
+// CHECK:          1: get
+// CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, FunctionToPointerDecay, class implicit_constructor_conver
+// CHECK-NEXT:     3: [B1.2]()
+// CHECK-NEXT:     4: [B1.3] (ImplicitCastExpr, NoOp, const class implicit_constructor_conversion::A)
+// CHECK-NEXT:     5: [B1.4]
+// CHECK-NEXT:     6: [B1.5] (CXXConstructExpr, class implicit_constructor_conversion::B)
+// CHECK-NEXT:     7: [B1.6] (ImplicitCastExpr, ConstructorConversion, class implicit_constructor_convers
+// CHECK-NEXT:     8: [B1.7] (ImplicitCastExpr, NoOp, const class implicit_constructor_conversion::B)
+// CHECK-NEXT:     9: [B1.8]
+// CHECK-NEXT:    10: const implicit_constructor_conversion::B &b = get();
+// CHECK-NEXT:    11: [B1.10].~B() (Implicit destructor)
+void implicitConstructionConversionFromFunctionValue() {
+  const B &b = get(); // no-crash
+}
+
+} // end namespace implicit_constructor_conversion
diff --git a/test/Analysis/cfg.cpp b/test/Analysis/cfg.cpp
index 2082773..8940257 100644
--- a/test/Analysis/cfg.cpp
+++ b/test/Analysis/cfg.cpp
@@ -1,5 +1,16 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -triple x86_64-apple-darwin12 -analyzer-config cfg-temporary-dtors=true -std=c++11 %s > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -triple x86_64-apple-darwin12 -analyzer-config cfg-temporary-dtors=true -std=c++11 -analyzer-config cfg-rich-constructors=false %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,WARNINGS %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -triple x86_64-apple-darwin12 -analyzer-config cfg-temporary-dtors=true -std=c++11 -analyzer-config cfg-rich-constructors=true %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,ANALYZER %s
+
+// This file tests how we construct two different flavors of the Clang CFG -
+// the CFG used by the Sema analysis-based warnings and the CFG used by the
+// static analyzer. The difference in the behavior is checked via FileCheck
+// prefixes (WARNINGS and ANALYZER respectively). When introducing new analyzer
+// flags, no new run lines should be added - just these flags would go to the
+// respective line depending on where is it turned on and where is it turned
+// off. Feel free to add tests that test only one of the CFG flavors if you're
+// not sure how the other flavor is supposed to work in your case.
 
 // CHECK-LABEL: void checkWrap(int i)
 // CHECK: ENTRY
@@ -48,11 +59,14 @@
 // CHECK-NEXT:   5: int l;
 // CHECK-NEXT:   6: 2
 // CHECK-NEXT:   7: int m = 2;
-// CHECK-NEXT: CXXConstructExpr
+// WARNINGS-NEXT: (CXXConstructExpr, struct standalone)
+// ANALYZER-NEXT: (CXXConstructExpr, [B1.9], struct standalone)
 // CHECK-NEXT:   9: struct standalone myStandalone;
-// CHECK-NEXT: CXXConstructExpr
+// WARNINGS-NEXT: (CXXConstructExpr, struct (anonymous struct at {{.*}}))
+// ANALYZER-NEXT: (CXXConstructExpr, [B1.11], struct (anonymous struct at {{.*}}))
 // CHECK-NEXT:  11: struct (anonymous struct at {{.*}}) myAnon;
-// CHECK-NEXT: CXXConstructExpr
+// WARNINGS-NEXT: (CXXConstructExpr, struct named)
+// ANALYZER-NEXT: (CXXConstructExpr, [B1.13], struct named)
 // CHECK-NEXT:  13: struct named myNamed;
 // CHECK-NEXT:   Preds (1): B2
 // CHECK-NEXT:   Succs (1): B0
@@ -116,7 +130,8 @@
 // CHECK-NEXT:   Succs (1): B1
 // CHECK: [B1]
 // CHECK-NEXT:   1:  CFGNewAllocator(A *)
-// CHECK-NEXT:   2:  (CXXConstructExpr, class A)
+// WARNINGS-NEXT:   2:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:   2:  (CXXConstructExpr, [B1.3], class A)
 // CHECK-NEXT:   3: new A([B1.2])
 // CHECK-NEXT:   4: A *a = new A();
 // CHECK-NEXT:   5: a
@@ -138,7 +153,8 @@
 // CHECK: [B1]
 // CHECK-NEXT:   1: 5
 // CHECK-NEXT:   2: CFGNewAllocator(A *)
-// CHECK-NEXT:   3:  (CXXConstructExpr, class A [5])
+// WARNINGS-NEXT:   3:  (CXXConstructExpr, class A [5])
+// ANALYZER-NEXT:   3:  (CXXConstructExpr, [B1.4], class A [5])
 // CHECK-NEXT:   4: new A {{\[\[}}B1.1]]
 // CHECK-NEXT:   5: A *a = new A [5];
 // CHECK-NEXT:   6: a
@@ -331,7 +347,8 @@
 // CHECK-NEXT:  3: [B1.2] (ImplicitCastExpr, ArrayToPointerDecay, int *)
 // CHECK-NEXT:  4: [B1.3] (ImplicitCastExpr, BitCast, void *)
 // CHECK-NEXT:  5: CFGNewAllocator(MyClass *)
-// CHECK-NEXT:  6:  (CXXConstructExpr, class MyClass)
+// WARNINGS-NEXT:  6:  (CXXConstructExpr, class MyClass)
+// ANALYZER-NEXT:  6:  (CXXConstructExpr, [B1.7], class MyClass)
 // CHECK-NEXT:  7: new ([B1.4]) MyClass([B1.6])
 // CHECK-NEXT:  8: MyClass *obj = new (buffer) MyClass();
 // CHECK-NEXT:  Preds (1): B2
@@ -363,7 +380,8 @@
 // CHECK-NEXT:  4: [B1.3] (ImplicitCastExpr, BitCast, void *)
 // CHECK-NEXT:  5: 5
 // CHECK-NEXT:  6: CFGNewAllocator(MyClass *)
-// CHECK-NEXT:  7:  (CXXConstructExpr, class MyClass [5])
+// WARNINGS-NEXT:  7:  (CXXConstructExpr, class MyClass [5])
+// ANALYZER-NEXT:  7:  (CXXConstructExpr, [B1.8], class MyClass [5])
 // CHECK-NEXT:  8: new ([B1.4]) MyClass {{\[\[}}B1.5]]
 // CHECK-NEXT:  9: MyClass *obj = new (buffer) MyClass [5];
 // CHECK-NEXT:  Preds (1): B2
diff --git a/test/Analysis/constant-folding.c b/test/Analysis/constant-folding.c
index a6d2b74..8189868 100644
--- a/test/Analysis/constant-folding.c
+++ b/test/Analysis/constant-folding.c
@@ -76,3 +76,42 @@
   clang_analyzer_eval(b >= a); // expected-warning{{TRUE}}
   clang_analyzer_eval(a != b); // expected-warning{{TRUE}}
 }
+
+void testBitwiseRules(unsigned int a, int b) {
+  clang_analyzer_eval((a | 1) >= 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | -1) >= -1); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 2) >= 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 5) >= 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 10) >= 10); // expected-warning{{TRUE}}
+
+  // Argument order should not influence this
+  clang_analyzer_eval((1 | a) >= 1); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval((a & 1) <= 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 2) <= 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 5) <= 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 10) <= 10); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & -10) <= 10); // expected-warning{{UNKNOWN}}
+
+  // Again, check for different argument order.
+  clang_analyzer_eval((1 & a) <= 1); // expected-warning{{TRUE}}
+
+  unsigned int c = a;
+  c |= 1;
+  clang_analyzer_eval((c | 0) == 0); // expected-warning{{FALSE}}
+
+  // Rules don't apply to signed typed, as the values might be negative.
+  clang_analyzer_eval((b | 1) > 0); // expected-warning{{UNKNOWN}}
+
+  // Even for signed values, bitwise OR with a non-zero is always non-zero.
+  clang_analyzer_eval((b | 1) == 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval((b | -2) == 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval((b | 10) == 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval((b | 0) == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval((b | -2) >= 0); // expected-warning{{UNKNOWN}}
+
+  // Check that dynamically computed constants also work.
+  int constant = 1 << 3;
+  unsigned int d = a | constant;
+  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+}
diff --git a/test/Analysis/copypaste/macro-complexity.cpp b/test/Analysis/copypaste/macro-complexity.cpp
index 70d3f0c..db90236 100644
--- a/test/Analysis/copypaste/macro-complexity.cpp
+++ b/test/Analysis/copypaste/macro-complexity.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:MinimumCloneComplexity=10 -verify %s
 
 // Tests that the complexity value of a macro expansion is about the same as
-// the complexity value of a normal function call and the the macro body doesn't
+// the complexity value of a normal function call and the macro body doesn't
 // influence the complexity. See the CloneSignature class in CloneDetection.h
 // for more information about complexity values of clones.
 
diff --git a/test/Analysis/crash-trace.c b/test/Analysis/crash-trace.c
index b79dd02..ef1763e 100644
--- a/test/Analysis/crash-trace.c
+++ b/test/Analysis/crash-trace.c
@@ -18,6 +18,6 @@
 // CHECK: 0.	Program arguments: {{.*}}clang
 // CHECK-NEXT: 1.	<eof> parser at end of file
 // CHECK-NEXT: 2. While analyzing stack: 
-// CHECK-NEXT:  #0 void inlined()
-// CHECK-NEXT:  #1 void test()
+// CHECK-NEXT:  #0 Calling inlined at line 15
+// CHECK-NEXT:  #1 Calling test
 // CHECK-NEXT: 3.	{{.*}}crash-trace.c:{{[0-9]+}}:3: Error evaluating statement
diff --git a/test/Analysis/ctor.mm b/test/Analysis/ctor.mm
index e903263..cfae8ed 100644
--- a/test/Analysis/ctor.mm
+++ b/test/Analysis/ctor.mm
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify -DTEST_INLINABLE_ALLOCATORS %s
 
 #include "Inputs/system-header-simulator-cxx.h"
 
@@ -572,10 +573,9 @@
   }
 
   void testNew() {
-    // FIXME: Pending proper implementation of constructors for 'new'.
     raw_pair *pp = new raw_pair();
-    clang_analyzer_eval(pp->p1 == 0); // expected-warning{{UNKNOWN}}
-    clang_analyzer_eval(pp->p2 == 0); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(pp->p1 == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(pp->p2 == 0); // expected-warning{{TRUE}}
   }
 
   void testArrayNew() {
@@ -679,8 +679,7 @@
 
   void testDynamic() {
     List *list = new List{1, 2};
-    // FIXME: When we handle constructors with 'new', this will be TRUE.
-    clang_analyzer_eval(list->usedInitializerList); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(list->usedInitializerList); // expected-warning{{TRUE}}
   }
 }
 
@@ -730,3 +729,23 @@
     S s;
   }
 }
+
+namespace EmptyBaseAssign {
+struct B1 {};
+struct B2 { int x; };
+struct D: public B1, public B2 {
+const D &operator=(const D &d) {
+  *((B2 *)this) = d;
+  *((B1 *)this) = d;
+  return *this;
+}
+};
+
+void test() {
+  D d1;
+  d1.x = 1;
+  D d2;
+  d2 = d1;
+  clang_analyzer_eval(d2.x == 1); // expected-warning{{TRUE}}
+}
+}
diff --git a/test/Analysis/ctu-main.cpp b/test/Analysis/ctu-main.cpp
new file mode 100644
index 0000000..cb82cd4
--- /dev/null
+++ b/test/Analysis/ctu-main.cpp
@@ -0,0 +1,58 @@
+// RUN: mkdir -p %T/ctudir
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-pch -o %T/ctudir/ctu-other.cpp.ast %S/Inputs/ctu-other.cpp
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-pch -o %T/ctudir/ctu-chain.cpp.ast %S/Inputs/ctu-chain.cpp
+// RUN: cp %S/Inputs/externalFnMap.txt %T/ctudir/
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -analyze -analyzer-checker=core,debug.ExprInspection -analyzer-config experimental-enable-naive-ctu-analysis=true -analyzer-config ctu-dir=%T/ctudir -verify %s
+
+void clang_analyzer_eval(int);
+
+int f(int);
+int g(int);
+int h(int);
+
+int callback_to_main(int x) { return x + 1; }
+
+namespace myns {
+int fns(int x);
+
+namespace embed_ns {
+int fens(int x);
+}
+
+class embed_cls {
+public:
+  int fecl(int x);
+};
+}
+
+class mycls {
+public:
+  int fcl(int x);
+  static int fscl(int x);
+
+  class embed_cls2 {
+  public:
+    int fecl2(int x);
+  };
+};
+
+namespace chns {
+int chf1(int x);
+}
+
+int main() {
+  clang_analyzer_eval(f(3) == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(f(4) == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(f(5) == 3); // expected-warning{{FALSE}}
+  clang_analyzer_eval(g(4) == 6); // expected-warning{{TRUE}}
+  clang_analyzer_eval(h(2) == 8); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval(myns::fns(2) == 9);                   // expected-warning{{TRUE}}
+  clang_analyzer_eval(myns::embed_ns::fens(2) == -1);       // expected-warning{{TRUE}}
+  clang_analyzer_eval(mycls().fcl(1) == 6);                 // expected-warning{{TRUE}}
+  clang_analyzer_eval(mycls::fscl(1) == 7);                 // expected-warning{{TRUE}}
+  clang_analyzer_eval(myns::embed_cls().fecl(1) == -6);     // expected-warning{{TRUE}}
+  clang_analyzer_eval(mycls::embed_cls2().fecl2(0) == -11); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval(chns::chf1(4) == 12); // expected-warning{{TRUE}}
+}
diff --git a/test/Analysis/cxxnewexpr-callback-inline.cpp b/test/Analysis/cxxnewexpr-callback-inline.cpp
new file mode 100644
index 0000000..c823de8
--- /dev/null
+++ b/test/Analysis/cxxnewexpr-callback-inline.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.AnalysisOrder -analyzer-config c++-allocator-inlining=true,debug.AnalysisOrder:PreStmtCXXNewExpr=true,debug.AnalysisOrder:PostStmtCXXNewExpr=true,debug.AnalysisOrder:PreCall=true,debug.AnalysisOrder:PostCall=true,debug.AnalysisOrder:NewAllocator=true %s 2>&1 | FileCheck %s
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+namespace std {
+  void *malloc(size_t);
+}
+
+void *operator new(size_t size) { return std::malloc(size); }
+
+struct S {
+  S() {}
+};
+
+void foo();
+
+void test() {
+  S *s = new S();
+  foo();
+}
+
+// CHECK:      PreCall (operator new)
+// CHECK-NEXT: PreCall (std::malloc)
+// CHECK-NEXT: PostCall (std::malloc)
+// CHECK-NEXT: PostCall (operator new)
+// CHECK-NEXT: NewAllocator
+// CHECK-NEXT: PreCall (S::S)
+// CHECK-NEXT: PostCall (S::S)
+// CHECK-NEXT: PreStmt<CXXNewExpr>
+// CHECK-NEXT: PostStmt<CXXNewExpr>
+// CHECK-NEXT: PreCall (foo)
+// CHECK-NEXT: PostCall (foo)
diff --git a/test/Analysis/cxxnewexpr-callback-noinline.cpp b/test/Analysis/cxxnewexpr-callback-noinline.cpp
new file mode 100644
index 0000000..87edeac
--- /dev/null
+++ b/test/Analysis/cxxnewexpr-callback-noinline.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.AnalysisOrder -analyzer-config c++-allocator-inlining=false,debug.AnalysisOrder:PreStmtCXXNewExpr=true,debug.AnalysisOrder:PostStmtCXXNewExpr=true,debug.AnalysisOrder:PreCall=true,debug.AnalysisOrder:PostCall=true,debug.AnalysisOrder:NewAllocator=true %s 2>&1 | FileCheck %s
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+namespace std {
+  void *malloc(size_t);
+}
+
+void *operator new(size_t size) { return std::malloc(size); }
+
+struct S {
+  S() {}
+};
+
+void foo();
+
+void test() {
+  S *s = new S();
+  foo();
+}
+
+// CHECK:      PreCall (S::S)
+// CHECK-NEXT: PostCall (S::S)
+// CHECK-NEXT: PreStmt<CXXNewExpr>
+// CHECK-NEXT: PostStmt<CXXNewExpr>
+// CHECK-NEXT: PreCall (foo)
+// CHECK-NEXT: PostCall (foo)
+// CHECK-NEXT: PreCall (std::malloc)
+// CHECK-NEXT: PostCall (std::malloc)
diff --git a/test/Analysis/diagnostics/implicit-cxx-std-suppression.cpp b/test/Analysis/diagnostics/implicit-cxx-std-suppression.cpp
index 197fad5..35f8798 100644
--- a/test/Analysis/diagnostics/implicit-cxx-std-suppression.cpp
+++ b/test/Analysis/diagnostics/implicit-cxx-std-suppression.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete,debug.ExprInspection -analyzer-config c++-container-inlining=true -analyzer-config c++-stdlib-inlining=false -std=c++11 -verify %s
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete,debug.ExprInspection -analyzer-config c++-container-inlining=true -analyzer-config c++-stdlib-inlining=true -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete,debug.ExprInspection -analyzer-config c++-container-inlining=true -analyzer-config c++-stdlib-inlining=false -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDelete,debug.ExprInspection -analyzer-config c++-container-inlining=true -analyzer-config c++-stdlib-inlining=true -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
 
 // expected-no-diagnostics
 
diff --git a/test/Analysis/diagnostics/macro-null-return-suppression.cpp b/test/Analysis/diagnostics/macro-null-return-suppression.cpp
new file mode 100644
index 0000000..9a14f03
--- /dev/null
+++ b/test/Analysis/diagnostics/macro-null-return-suppression.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_analyze_cc1 -x c -analyzer-checker=core -analyzer-output=text -verify %s
+
+#define NULL 0
+
+int test_noparammacro() {
+  int *x = NULL; // expected-note{{'x' initialized to a null pointer value}}
+  return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+             // expected-note@-1{{Dereference of null pointer (loaded from variable 'x')}}
+}
+
+#define DYN_CAST(X) (X ? (char*)X : 0)
+#define GENERATE_NUMBER(X) (0)
+
+char test_assignment(int *param) {
+  char *param2;
+  param2 = DYN_CAST(param);
+  return *param2;
+}
+
+char test_declaration(int *param) {
+  char *param2 = DYN_CAST(param);
+  return *param2;
+}
+
+int coin();
+
+int test_multi_decl(int *paramA, int *paramB) {
+  char *param1 = DYN_CAST(paramA), *param2 = DYN_CAST(paramB);
+  if (coin())
+    return *param1;
+  return *param2;
+}
+
+int testDivision(int a) {
+  int divider = GENERATE_NUMBER(2); // expected-note{{'divider' initialized to 0}}
+  return 1/divider; // expected-warning{{Division by zero}}
+                    // expected-note@-1{{Division by zero}}
+}
+
+// Warning should not be suppressed if it happens in the same macro.
+#define DEREF_IN_MACRO(X) int fn() {int *p = 0; return *p; }
+
+DEREF_IN_MACRO(0) // expected-warning{{Dereference of null pointer}}
+                  // expected-note@-1{{'p' initialized to a null}}
+                  // expected-note@-2{{Dereference of null pointer}}
diff --git a/test/Analysis/diagnostics/no-store-func-path-notes.c b/test/Analysis/diagnostics/no-store-func-path-notes.c
new file mode 100644
index 0000000..a444ab6
--- /dev/null
+++ b/test/Analysis/diagnostics/no-store-func-path-notes.c
@@ -0,0 +1,226 @@
+// RUN: %clang_analyze_cc1 -x c -analyzer-checker=core -analyzer-output=text -verify %s
+
+typedef __typeof(sizeof(int)) size_t;
+void *memset(void *__s, int __c, size_t __n);
+
+int initializer1(int *p, int x) {
+  if (x) { // expected-note{{Taking false branch}}
+    *p = 1;
+    return 0;
+  } else {
+    return 1; // expected-note {{Returning without writing to '*p'}}
+  }
+}
+
+int param_not_initialized_by_func() {
+  int p; // expected-note {{'p' declared without an initial value}}
+  int out = initializer1(&p, 0); // expected-note{{Calling 'initializer1'}}
+                                 // expected-note@-1{{Returning from 'initializer1'}}
+  return p; // expected-note{{Undefined or garbage value returned to caller}}
+            // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+int param_initialized_properly() {
+  int p;
+  int out = initializer1(&p, 1);
+  return p; //no-warning
+}
+
+static int global;
+
+int initializer2(int **p, int x) {
+  if (x) { // expected-note{{Taking false branch}}
+    *p = &global;
+    return 0;
+  } else {
+    return 1; // expected-note {{Returning without writing to '*p'}}
+  }
+}
+
+int param_not_written_into_by_func() {
+  int *p = 0;                    // expected-note{{'p' initialized to a null pointer value}}
+  int out = initializer2(&p, 0); // expected-note{{Calling 'initializer2'}}
+                                 // expected-note@-1{{Returning from 'initializer2'}}
+  return *p;                     // expected-warning{{Dereference of null pointer (loaded from variable 'p')}}
+                                 // expected-note@-1{{Dereference of null pointer (loaded from variable 'p')}}
+}
+
+void initializer3(int *p, int param) {
+  if (param) // expected-note{{Taking false branch}}
+    *p = 0;
+} // expected-note{{Returning without writing to '*p'}}
+
+int param_written_into_by_void_func() {
+  int p;               // expected-note{{'p' declared without an initial value}}
+  initializer3(&p, 0); // expected-note{{Calling 'initializer3'}}
+                       // expected-note@-1{{Returning from 'initializer3'}}
+  return p;            // expected-warning{{Undefined or garbage value returned to caller}}
+                       // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+void initializer4(int *p, int param) {
+  if (param) // expected-note{{Taking false branch}}
+    *p = 0;
+} // expected-note{{Returning without writing to '*p'}}
+
+void initializer5(int *p, int param) {
+  if (!param) // expected-note{{Taking false branch}}
+    *p = 0;
+} // expected-note{{Returning without writing to '*p'}}
+
+int multi_init_tries_func() {
+  int p;               // expected-note{{'p' declared without an initial value}}
+  initializer4(&p, 0); // expected-note{{Calling 'initializer4'}}
+                       // expected-note@-1{{Returning from 'initializer4'}}
+  initializer5(&p, 1); // expected-note{{Calling 'initializer5'}}
+                       // expected-note@-1{{Returning from 'initializer5'}}
+  return p;            // expected-warning{{Undefined or garbage value returned to caller}}
+                       // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+int initializer6(const int *p) {
+  return 0;
+}
+
+int no_msg_on_const() {
+  int p; // expected-note{{'p' declared without an initial value}}
+  initializer6(&p);
+  return p; // expected-warning{{Undefined or garbage value returned to caller}}
+            // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+typedef struct {
+  int x;
+} S;
+
+int initializer7(S *s, int param) {
+  if (param) { // expected-note{{Taking false branch}}
+    s->x = 0;
+    return 0;
+  }
+  return 1; // expected-note{{Returning without writing to 's->x'}}
+}
+
+int initialize_struct_field() {
+  S local;
+  initializer7(&local, 0); // expected-note{{Calling 'initializer7'}}
+                           // expected-note@-1{{Returning from 'initializer7'}}
+  return local.x;          // expected-warning{{Undefined or garbage value returned to caller}}
+                           // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+void nullwriter(int **p) {
+  *p = 0; // expected-note{{Null pointer value stored to 'p'}}
+} // no extra note
+
+int usage() {
+  int x = 0;
+  int *p = &x;
+  nullwriter(&p); // expected-note{{Calling 'nullwriter'}}
+                  // expected-note@-1{{Returning from 'nullwriter'}}
+  return *p;      // expected-warning{{Dereference of null pointer (loaded from variable 'p')}}
+                  // expected-note@-1{{Dereference of null pointer (loaded from variable 'p')}}
+}
+
+typedef struct {
+  int x;
+  int y;
+} A;
+
+void partial_initializer(A *a) {
+  a->x = 0;
+} // expected-note{{Returning without writing to 'a->y'}}
+
+int use_partial_initializer() {
+  A a;
+  partial_initializer(&a); // expected-note{{Calling 'partial_initializer'}}
+                           // expected-note@-1{{Returning from 'partial_initializer'}}
+  return a.y;              // expected-warning{{Undefined or garbage value returned to caller}}
+                           // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+typedef struct {
+  int x;
+  int y;
+} B;
+
+typedef struct {
+  B b;
+} C;
+
+void partial_nested_initializer(C *c) {
+  c->b.x = 0;
+} // expected-note{{Returning without writing to 'c->b.y'}}
+
+int use_partial_nested_initializer() {
+  B localB;
+  C localC;
+  localC.b = localB;
+  partial_nested_initializer(&localC); // expected-note{{Calling 'partial_nested_initializer'}}
+                                       // expected-note@-1{{Returning from 'partial_nested_initializer'}}
+  return localC.b.y;                   // expected-warning{{Undefined or garbage value returned to caller}}
+                                       // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+void test_subregion_assignment(C* c) {
+  B b;
+  c->b = b;
+}
+
+int use_subregion_assignment() {
+  C c;
+  test_subregion_assignment(&c); // expected-note{{Calling 'test_subregion_assignment'}}
+                                 // expected-note@-1{{Returning from 'test_subregion_assignment'}}
+  return c.b.x; // expected-warning{{Undefined or garbage value returned to caller}}
+                // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+int confusing_signature(int *);
+int confusing_signature(int *p) {
+  return 0; // expected-note{{Returning without writing to '*p'}}
+}
+
+int use_confusing_signature() {
+  int a; // expected-note {{'a' declared without an initial value}}
+  confusing_signature(&a); // expected-note{{Calling 'confusing_signature'}}
+                           // expected-note@-1{{Returning from 'confusing_signature'}}
+  return a; // expected-note{{Undefined or garbage value returned to caller}}
+            // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+int coin();
+
+int multiindirection(int **p) {
+  if (coin()) // expected-note{{Assuming the condition is true}}
+              // expected-note@-1{{Taking true branch}}
+    return 1; // expected-note{{Returning without writing to '**p'}}
+  *(*p) = 0;
+  return 0;
+}
+
+int usemultiindirection() {
+  int a; // expected-note {{'a' declared without an initial value}}
+  int *b = &a;
+  multiindirection(&b); // expected-note{{Calling 'multiindirection'}}
+                        // expected-note@-1{{Returning from 'multiindirection'}}
+  return a; // expected-note{{Undefined or garbage value returned to caller}}
+            // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+int indirectingstruct(S** s) {
+  if (coin()) // expected-note{{Assuming the condition is true}}
+              // expected-note@-1{{Taking true branch}}
+    return 1; // expected-note{{Returning without writing to '(*s)->x'}}
+
+  (*s)->x = 0;
+  return 0;
+}
+
+int useindirectingstruct() {
+  S s;
+  S* p = &s;
+  indirectingstruct(&p); //expected-note{{Calling 'indirectingstruct'}}
+                         //expected-note@-1{{Returning from 'indirectingstruct'}}
+  return s.x; // expected-warning{{Undefined or garbage value returned to caller}}
+              // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
diff --git a/test/Analysis/diagnostics/no-store-func-path-notes.cpp b/test/Analysis/diagnostics/no-store-func-path-notes.cpp
new file mode 100644
index 0000000..a704c14
--- /dev/null
+++ b/test/Analysis/diagnostics/no-store-func-path-notes.cpp
@@ -0,0 +1,147 @@
+// RUN: %clang_analyze_cc1 -x c++ -analyzer-checker=core -analyzer-output=text -verify %s
+
+int initializer1(int &p, int x) {
+  if (x) { // expected-note{{Taking false branch}}
+    p = 1;
+    return 0;
+  } else {
+    return 1; // expected-note {{Returning without writing to 'p'}}
+  }
+}
+
+int param_not_initialized_by_func() {
+  int p;                        // expected-note {{'p' declared without an initial value}}
+  int out = initializer1(p, 0); // expected-note{{Calling 'initializer1'}}
+                                // expected-note@-1{{Returning from 'initializer1'}}
+  return p;                     // expected-note{{Undefined or garbage value returned to caller}}
+                                // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+struct S {
+  int initialize(int *p, int param) {
+    if (param) { //expected-note{{Taking false branch}}
+      *p = 1;
+      return 1;
+    }
+    return 0; // expected-note{{Returning without writing to '*p'}}
+  }
+};
+
+int use(S *s) {
+  int p;                //expected-note{{'p' declared without an initial value}}
+  s->initialize(&p, 0); //expected-note{{Calling 'S::initialize'}}
+                        //expected-note@-1{{Returning from 'S::initialize'}}
+  return p;             // expected-warning{{Undefined or garbage value returned to caller}}
+                        // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+int initializer2(const int &p) {
+  return 0;
+}
+
+int no_msg_const_ref() {
+  int p; //expected-note{{'p' declared without an initial value}}
+  initializer2(p);
+  return p; // expected-warning{{Undefined or garbage value returned to caller}}
+            // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+void nested() {}
+void init_in_nested_func(int **x) {
+  *x = 0; // expected-note{{Null pointer value stored to 'y'}}
+  nested();
+} // no-note
+
+int call_init_nested() {
+  int x = 0;
+  int *y = &x;
+  init_in_nested_func(&y); // expected-note{{Calling 'init_in_nested_func'}}
+                           // expected-note@-1{{Returning from 'init_in_nested_func'}}
+  return *y;               //expected-warning{{Dereference of null pointer (loaded from variable 'y')}}
+                           //expected-note@-1{{Dereference of null pointer (loaded from variable 'y')}}
+}
+
+struct A {
+  int x;
+  int y;
+};
+
+void partial_init_by_reference(A &a) {
+  a.x = 0;
+} // expected-note {{Returning without writing to 'a.y'}}
+
+int use_partial_init_by_reference() {
+  A a;
+  partial_init_by_reference(a); // expected-note{{Calling 'partial_init_by_reference'}}
+                                // expected-note@-1{{Returning from 'partial_init_by_reference'}}
+  return a.y;                   // expected-warning{{Undefined or garbage value returned to caller}}
+                                // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+struct B : A {
+};
+
+void partially_init_inherited_struct(B *b) {
+  b->x = 0;
+} // expected-note{{Returning without writing to 'b->y'}}
+
+int use_partially_init_inherited_struct() {
+  B b;
+  partially_init_inherited_struct(&b); // expected-note{{Calling 'partially_init_inherited_struct'}}
+                                       // expected-note@-1{{Returning from 'partially_init_inherited_struct'}}
+  return b.y;                          // expected-warning{{Undefined or garbage value returned to caller}}
+                                       // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
+
+struct C {
+  int x;
+  int y;
+  C(int pX, int pY) : x(pX) {} // expected-note{{Returning without writing to 'this->y'}}
+};
+
+int use_constructor() {
+  C c(0, 0); // expected-note{{Calling constructor for 'C'}}
+             // expected-note@-1{{Returning from constructor for 'C'}}
+  return c.y; // expected-note{{Undefined or garbage value returned to caller}}
+              // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+struct D {
+  void initialize(int *);
+};
+
+void D::initialize(int *p) {
+
+} // expected-note{{Returning without writing to '*p'}}
+
+int use_d_initializer(D* d) {
+  int p; // expected-note {{'p' declared without an initial value}}
+  d->initialize(&p); // expected-note{{Calling 'D::initialize'}}
+                     // expected-note@-1{{Returning from 'D::initialize'}}
+  return p;                     // expected-note{{Undefined or garbage value returned to caller}}
+                                // expected-warning@-1{{Undefined or garbage value returned to caller}}
+}
+
+int coin();
+
+struct S2 {
+  int x;
+};
+
+int pointerreference(S2* &s) {
+  if (coin()) // expected-note{{Assuming the condition is true}}
+              // expected-note@-1{{Taking true branch}}
+    return 1; // expected-note{{Returning without writing to 's->x'}}
+
+  s->x = 0;
+  return 0;
+}
+
+int usepointerreference() {
+  S2 s;
+  S2* p = &s;
+  pointerreference(p); //expected-note{{Calling 'pointerreference'}}
+                         //expected-note@-1{{Returning from 'pointerreference'}}
+  return s.x; // expected-warning{{Undefined or garbage value returned to caller}}
+              // expected-note@-1{{Undefined or garbage value returned to caller}}
+}
diff --git a/test/Analysis/diagnostics/no-store-func-path-notes.m b/test/Analysis/diagnostics/no-store-func-path-notes.m
new file mode 100644
index 0000000..3ae97ef
--- /dev/null
+++ b/test/Analysis/diagnostics/no-store-func-path-notes.m
@@ -0,0 +1,46 @@
+// RUN: %clang_analyze_cc1 -x objective-c -analyzer-checker=core -analyzer-output=text -Wno-objc-root-class -fblocks -verify %s
+
+@interface I
+- (int)initVar:(int *)var param:(int)param;
+@end
+
+@implementation I
+- (int)initVar:(int *)var param:(int)param {
+  if (param) { // expected-note{{Taking false branch}}
+    *var = 1;
+    return 0;
+  }
+  return 1; // expected-note{{Returning without writing to '*var'}}
+}
+@end
+
+int foo(I *i) {
+  int x;                            //expected-note{{'x' declared without an initial value}}
+  int out = [i initVar:&x param:0]; //expected-note{{Calling 'initVar:param:'}}
+                                    //expected-note@-1{{Returning from 'initVar:param:'}}
+  if (out)                          // expected-note{{Taking true branch}}
+    return x;                       //expected-warning{{Undefined or garbage value returned to caller}}
+                                    //expected-note@-1{{Undefined or garbage value returned to caller}}
+  return 0;
+}
+
+int initializer1(int *p, int x) {
+  if (x) { // expected-note{{Taking false branch}}
+    *p = 1;
+    return 0;
+  } else {
+    return 1; // expected-note {{Returning without writing to '*p'}}
+  }
+}
+
+int initFromBlock() {
+  __block int z;
+  ^{                     // expected-note {{Calling anonymous block}}
+    int p;               // expected-note{{'p' declared without an initial value}}
+    initializer1(&p, 0); // expected-note{{Calling 'initializer1'}}
+                         // expected-note@-1{{Returning from 'initializer1'}}
+    z = p;               // expected-warning{{Assigned value is garbage or undefined}}
+                         // expected-note@-1{{Assigned value is garbage or undefined}}
+  }();
+  return z;
+}
diff --git a/test/Analysis/diagnostics/undef-value-param.c b/test/Analysis/diagnostics/undef-value-param.c
index 837b041..266a335 100644
--- a/test/Analysis/diagnostics/undef-value-param.c
+++ b/test/Analysis/diagnostics/undef-value-param.c
@@ -12,7 +12,7 @@
     if (c)
            //expected-note@-1{{Assuming 'c' is not equal to 0}}
            //expected-note@-2{{Taking true branch}}
-        return;
+           return; // expected-note{{Returning without writing to '*x'}}
     *x = 5;
 }
 
@@ -51,7 +51,7 @@
   if (x <= 0) //expected-note {{Taking true branch}}
               //expected-note@-1 {{Assuming 'x' is <= 0}}
 
-    return;
+    return; //expected-note{{Returning without writing to 'X->f1'}}
   X->f1 = getValidPtr();
 }
 double testPassingParentRegionStruct(int x) {
@@ -293,12 +293,12 @@
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
 // CHECK-NEXT:            <key>line</key><integer>15</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
+// CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
 // CHECK-NEXT:            <key>line</key><integer>15</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
+// CHECK-NEXT:            <key>col</key><integer>17</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:          </array>
@@ -309,6 +309,20 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>15</integer>
+// CHECK-NEXT:       <key>col</key><integer>12</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>depth</key><integer>1</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;*x&apos;</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;*x&apos;</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
 // CHECK-NEXT:       <key>line</key><integer>22</integer>
 // CHECK-NEXT:       <key>col</key><integer>5</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
@@ -1046,6 +1060,20 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>54</integer>
+// CHECK-NEXT:       <key>col</key><integer>5</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>depth</key><integer>1</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;X-&gt;f1&apos;</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;X-&gt;f1&apos;</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
 // CHECK-NEXT:       <key>line</key><integer>60</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
diff --git a/test/Analysis/diagnostics/undef-value-param.m b/test/Analysis/diagnostics/undef-value-param.m
index f8212e0..521fd7b 100644
--- a/test/Analysis/diagnostics/undef-value-param.m
+++ b/test/Analysis/diagnostics/undef-value-param.m
@@ -69,7 +69,7 @@
              //expected-note@-1{{Assuming 'err' is not equal to 0}}
              //expected-note@-2{{Taking true branch}}
     CFRelease(ref);
-    return;
+    return; // expected-note{{Returning without writing to '*storeRef'}}
   }
   *storeRef = ref;
 }
@@ -831,6 +831,54 @@
 // CHECK-NEXT:       </array>
 // CHECK-NEXT:     </dict>
 // CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
+// CHECK-NEXT:            <key>col</key><integer>5</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
+// CHECK-NEXT:            <key>col</key><integer>13</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>72</integer>
+// CHECK-NEXT:            <key>col</key><integer>5</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>72</integer>
+// CHECK-NEXT:            <key>col</key><integer>10</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>72</integer>
+// CHECK-NEXT:       <key>col</key><integer>5</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>depth</key><integer>1</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;*storeRef&apos;</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Returning without writing to &apos;*storeRef&apos;</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
diff --git a/test/Analysis/edges-new.mm b/test/Analysis/edges-new.mm
index 47a125a..f310f1b 100644
--- a/test/Analysis/edges-new.mm
+++ b/test/Analysis/edges-new.mm
@@ -20288,7 +20288,7 @@
 // CHECK-NEXT:    <key>type</key><string>Bad deallocator</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.MismatchedDeallocator</string>
 // CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d9dbbf68db41ab74e2158f4b131abe34</string>
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>046c88d1c91ff46d6506dff5ff880756</string>
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>0</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
diff --git a/test/Analysis/exploration_order/noexprcrash.c b/test/Analysis/exploration_order/noexprcrash.c
new file mode 100644
index 0000000..75c2f0e
--- /dev/null
+++ b/test/Analysis/exploration_order/noexprcrash.c
@@ -0,0 +1,17 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify -analyzer-config exploration_strategy=unexplored_first %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify -analyzer-config exploration_strategy=dfs %s
+
+extern void clang_analyzer_eval(int);
+
+typedef struct { char a; } b;
+int c(b* input) {
+    int x = (input->a ?: input) ? 1 : 0; // expected-warning{{pointer/integer type mismatch}}
+    if (input->a) {
+      // FIXME: The value should actually be "TRUE",
+      // but is incorrect due to a bug.
+      clang_analyzer_eval(x); // expected-warning{{FALSE}}
+    } else {
+      clang_analyzer_eval(x); // expected-warning{{TRUE}}
+    }
+    return x;
+}
diff --git a/test/Analysis/exploration_order/prefer_unexplored.cc b/test/Analysis/exploration_order/prefer_unexplored.cc
new file mode 100644
index 0000000..317b88a
--- /dev/null
+++ b/test/Analysis/exploration_order/prefer_unexplored.cc
@@ -0,0 +1,40 @@
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core -analyzer-config exploration_strategy=unexplored_first -analyzer-output=text -verify %s | FileCheck %s
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core -analyzer-config exploration_strategy=unexplored_first_queue -analyzer-output=text -verify %s | FileCheck %s
+
+extern int coin();
+
+int foo() {
+    int *x = 0; // expected-note {{'x' initialized to a null pointer value}}
+    while (coin()) { // expected-note{{Loop condition is true}}
+        if (coin())  // expected-note {{Taking true branch}}
+            return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+                       // expected-note@-1{{Dereference of null pointer (loaded from variable 'x')}}
+    }
+    return 0;
+}
+
+void bar() {
+    while(coin()) // expected-note{{Loop condition is true}}
+        if (coin()) // expected-note {{Assuming the condition is true}}
+            foo(); // expected-note{{Calling 'foo'}}
+}
+
+int foo2() {
+    int *x = 0; // expected-note {{'x' initialized to a null pointer value}}
+    while (coin()) { // expected-note{{Loop condition is true}}
+        if (coin())  // expected-note {{Taking false branch}}
+            return false;
+        else
+            return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
+                       // expected-note@-1{{Dereference of null pointer (loaded from variable 'x')}}
+    }
+    return 0;
+}
+
+void bar2() {
+    while(coin()) // expected-note{{Loop condition is true}}
+        if (coin()) // expected-note {{Assuming the condition is false}}
+          return false;
+        else
+            foo(); // expected-note{{Calling 'foo'}}
+}
diff --git a/test/Analysis/expr-inspection.c b/test/Analysis/expr-inspection.c
index ec0e682..8419542 100644
--- a/test/Analysis/expr-inspection.c
+++ b/test/Analysis/expr-inspection.c
@@ -18,6 +18,8 @@
 // CHECK: Store (direct and default bindings)
 // CHECK-NEXT: (y,0,direct) : 1 S32b
 
-// CHECK: Expressions:
+// CHECK: Expressions by stack frame:
+// CHECK-NEXT: #0 Calling foo
 // CHECK-NEXT: clang_analyzer_printState : &code{clang_analyzer_printState}
-// CHECK-NEXT: {{(Ranges are empty.)|(Constraints:[[:space:]]*$)}}
+
+// CHECK: {{(Ranges are empty.)|(Constraints:[[:space:]]*$)}}
diff --git a/test/Analysis/gcdasyncsemaphorechecker_test.m b/test/Analysis/gcdasyncsemaphorechecker_test.m
new file mode 100644
index 0000000..a82545a
--- /dev/null
+++ b/test/Analysis/gcdasyncsemaphorechecker_test.m
@@ -0,0 +1,203 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.osx.GCDAsyncSemaphore %s -fblocks -verify
+typedef signed char BOOL;
+@protocol NSObject  - (BOOL)isEqual:(id)object; @end
+@interface NSObject <NSObject> {}
++(id)alloc;
+-(id)init;
+-(id)autorelease;
+-(id)copy;
+-(id)retain;
+@end
+
+typedef int dispatch_semaphore_t;
+typedef void (^block_t)();
+
+dispatch_semaphore_t dispatch_semaphore_create(int);
+void dispatch_semaphore_wait(dispatch_semaphore_t, int);
+void dispatch_semaphore_signal(dispatch_semaphore_t);
+
+void func(void (^)(void));
+void func_w_typedef(block_t);
+
+int coin();
+
+void use_semaphor_antipattern() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+// It's OK to use pattern in tests.
+// We simply match the containing function name against ^test.
+void test_no_warning() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100);
+}
+
+void use_semaphor_antipattern_multiple_times() {
+  dispatch_semaphore_t sema1 = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema1);
+  });
+  dispatch_semaphore_wait(sema1, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+
+  dispatch_semaphore_t sema2 = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema2);
+  });
+  dispatch_semaphore_wait(sema2, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void use_semaphor_antipattern_multiple_wait() {
+  dispatch_semaphore_t sema1 = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema1);
+  });
+  // FIXME: multiple waits on same semaphor should not raise a warning.
+  dispatch_semaphore_wait(sema1, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+  dispatch_semaphore_wait(sema1, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void warn_incorrect_order() {
+  // FIXME: ASTMatchers do not allow ordered matching, so would match even
+  // if out of order.
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+}
+
+void warn_w_typedef() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func_w_typedef(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void warn_nested_ast() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  if (coin()) {
+    func(^{
+         dispatch_semaphore_signal(sema);
+         });
+  } else {
+    func(^{
+         dispatch_semaphore_signal(sema);
+         });
+  }
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void use_semaphore_assignment() {
+  dispatch_semaphore_t sema;
+  sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void use_semaphore_assignment_init() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+  sema = dispatch_semaphore_create(1);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+void differentsemaphoreok() {
+  dispatch_semaphore_t sema1 = dispatch_semaphore_create(0);
+  dispatch_semaphore_t sema2 = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema1);
+  });
+  dispatch_semaphore_wait(sema2, 100); // no-warning
+}
+
+void nosignalok() {
+  dispatch_semaphore_t sema1 = dispatch_semaphore_create(0);
+  dispatch_semaphore_wait(sema1, 100);
+}
+
+void nowaitok() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+}
+
+void noblockok() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+  dispatch_semaphore_signal(sema);
+  dispatch_semaphore_wait(sema, 100);
+}
+
+void storedblockok() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+  block_t b = ^{
+      dispatch_semaphore_signal(sema);
+  };
+  dispatch_semaphore_wait(sema, 100);
+}
+
+void passed_semaphore_ok(dispatch_semaphore_t sema) {
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100);
+}
+
+void warn_with_cast() {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal((int)sema);
+  });
+  dispatch_semaphore_wait((int)sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+@interface Test1 : NSObject
+-(void)use_method_warn;
+-(void)testNoWarn;
+@end
+
+@implementation Test1
+
+-(void)use_method_warn {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100); // expected-warning{{Possible semaphore performance anti-pattern}}
+}
+
+-(void)testNoWarn {
+  dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+
+  func(^{
+      dispatch_semaphore_signal(sema);
+  });
+  dispatch_semaphore_wait(sema, 100);
+}
+
+@end
diff --git a/test/Analysis/generics.m b/test/Analysis/generics.m
index dac148d..e506e89 100644
--- a/test/Analysis/generics.m
+++ b/test/Analysis/generics.m
@@ -159,10 +159,26 @@
   doStuff(a); // expected-warning {{Conversion}}
 }
 
-void trustExplicitCasts(MutableArray *a,
+void dontInferFromExplicitCastsOnUnspecialized(MutableArray *a,
                         MutableArray<NSMutableString *> *b) {
   b = (MutableArray<NSMutableString *> *)a;
-  [a addObject: [[NSString alloc] init]]; // expected-warning {{Conversion}}
+  [a addObject: [[NSString alloc] init]]; // no-warning
+}
+
+void dontWarnOnExplicitCastsAfterInference(MutableArray *a) {
+  withMutArrString(a);
+  withMutArrMutableString((MutableArray<NSMutableString *> *)a); // no-warning
+}
+
+void dontDiagnoseOnExplicitCrossCasts(MutableArray<NSSet *> *a,
+                        MutableArray<NSMutableString *> *b) {
+  // Treat an explicit cast to a specialized type as an indication that
+  // Objective-C's type system is not expressive enough to represent a
+  // the invariant the programmer wanted. After an explicit cast, do not
+  // warn about potential generics shenanigans.
+  b = (MutableArray<NSMutableString *> *)a; // no-warning
+  [a addObject: [[NSSet alloc] init]]; // no-warning
+  [b addObject: [[NSMutableString alloc] init]]; //no-warning
 }
 
 void subtypeOfGeneric(id d, MyMutableStringArray *a,
@@ -2361,35 +2377,6 @@
 // CHECK-NEXT:    <key>path</key>
 // CHECK-NEXT:    <array>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>164</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>164</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>164</integer>
-// CHECK-NEXT:          <key>col</key><integer>42</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from explicit cast (from &apos;MutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from explicit cast (from &apos;MutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
 // CHECK-NEXT:      <key>kind</key><string>control</string>
 // CHECK-NEXT:      <key>edges</key>
 // CHECK-NEXT:       <array>
@@ -2397,12 +2384,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>164</integer>
+// CHECK-NEXT:            <key>line</key><integer>187</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>164</integer>
+// CHECK-NEXT:            <key>line</key><integer>187</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2410,12 +2397,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>165</integer>
+// CHECK-NEXT:            <key>line</key><integer>188</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>165</integer>
+// CHECK-NEXT:            <key>line</key><integer>188</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2427,90 +2414,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>165</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>165</integer>
-// CHECK-NEXT:          <key>col</key><integer>17</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>165</integer>
-// CHECK-NEXT:          <key>col</key><integer>39</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Core Foundation/Objective-C</string>
-// CHECK-NEXT:    <key>type</key><string>Generics</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
-// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>0549631e5a7fa668375061b6c898e438</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>trustExplicitCasts</string>
-// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>165</integer>
-// CHECK-NEXT:    <key>col</key><integer>3</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>171</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>171</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>172</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>172</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>172</integer>
+// CHECK-NEXT:       <key>line</key><integer>188</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -2518,12 +2422,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>172</integer>
+// CHECK-NEXT:          <key>line</key><integer>188</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>172</integer>
+// CHECK-NEXT:          <key>line</key><integer>188</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -2543,12 +2447,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>172</integer>
+// CHECK-NEXT:            <key>line</key><integer>188</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>172</integer>
+// CHECK-NEXT:            <key>line</key><integer>188</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2556,12 +2460,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2577,12 +2481,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2590,12 +2494,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>173</integer>
+// CHECK-NEXT:            <key>line</key><integer>189</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2607,7 +2511,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>173</integer>
+// CHECK-NEXT:       <key>line</key><integer>189</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -2615,12 +2519,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>173</integer>
+// CHECK-NEXT:          <key>line</key><integer>189</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>173</integer>
+// CHECK-NEXT:          <key>line</key><integer>189</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -2644,7 +2548,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>173</integer>
+// CHECK-NEXT:    <key>line</key><integer>189</integer>
 // CHECK-NEXT:    <key>col</key><integer>7</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -2656,361 +2560,6 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>179</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>183</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>183</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>183</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>183</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Core Foundation/Objective-C</string>
-// CHECK-NEXT:    <key>type</key><string>Generics</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
-// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f1b900572a63726a729714a765595c38</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGeneric</string>
-// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>183</integer>
-// CHECK-NEXT:    <key>col</key><integer>7</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>179</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>185</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>185</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>185</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>185</integer>
-// CHECK-NEXT:          <key>col</key><integer>17</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>185</integer>
-// CHECK-NEXT:          <key>col</key><integer>39</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Core Foundation/Objective-C</string>
-// CHECK-NEXT:    <key>type</key><string>Generics</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
-// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>cbb926c9de226a3eac85fb961a93f39a</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGeneric</string>
-// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>185</integer>
-// CHECK-NEXT:    <key>col</key><integer>3</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>191</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>195</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
 // CHECK-NEXT:       <key>line</key><integer>195</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
@@ -3032,6 +2581,103 @@
 // CHECK-NEXT:      </array>
 // CHECK-NEXT:      <key>depth</key><integer>0</integer>
 // CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>195</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>195</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>199</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>199</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>199</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>199</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
 // CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
 // CHECK-NEXT:      <key>message</key>
 // CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
@@ -3042,13 +2688,13 @@
 // CHECK-NEXT:    <key>type</key><string>Generics</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
 // CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
-// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b6ee3b22fbad45f213b4bf14bec7eeaf</string>
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f1b900572a63726a729714a765595c38</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGenericReverse</string>
+// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGeneric</string>
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>195</integer>
+// CHECK-NEXT:    <key>line</key><integer>199</integer>
 // CHECK-NEXT:    <key>col</key><integer>7</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3060,7 +2706,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>191</integer>
+// CHECK-NEXT:       <key>line</key><integer>195</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3068,12 +2714,124 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
+// CHECK-NEXT:          <key>line</key><integer>195</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
+// CHECK-NEXT:          <key>line</key><integer>195</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>195</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>195</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>201</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>201</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>201</integer>
+// CHECK-NEXT:       <key>col</key><integer>3</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>201</integer>
+// CHECK-NEXT:          <key>col</key><integer>17</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>201</integer>
+// CHECK-NEXT:          <key>col</key><integer>39</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </array>
+// CHECK-NEXT:    <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:    <key>type</key><string>Generics</string>
+// CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>cbb926c9de226a3eac85fb961a93f39a</string>
+// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGeneric</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
+// CHECK-NEXT:   <key>location</key>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>line</key><integer>201</integer>
+// CHECK-NEXT:    <key>col</key><integer>3</integer>
+// CHECK-NEXT:    <key>file</key><integer>0</integer>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>path</key>
+// CHECK-NEXT:    <array>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>207</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>207</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>207</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3093,12 +2851,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
+// CHECK-NEXT:            <key>line</key><integer>207</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
+// CHECK-NEXT:            <key>line</key><integer>207</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3106,12 +2864,158 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>197</integer>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>211</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>211</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>211</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>211</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </array>
+// CHECK-NEXT:    <key>description</key><string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:    <key>type</key><string>Generics</string>
+// CHECK-NEXT:    <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b6ee3b22fbad45f213b4bf14bec7eeaf</string>
+// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:   <key>issue_context</key><string>genericSubtypeOfGenericReverse</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:   <key>location</key>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>line</key><integer>211</integer>
+// CHECK-NEXT:    <key>col</key><integer>7</integer>
+// CHECK-NEXT:    <key>file</key><integer>0</integer>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>path</key>
+// CHECK-NEXT:    <array>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>207</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>207</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>207</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>207</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>207</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>213</integer>
 // CHECK-NEXT:            <key>col</key><integer>2</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>197</integer>
+// CHECK-NEXT:            <key>line</key><integer>213</integer>
 // CHECK-NEXT:            <key>col</key><integer>2</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3123,7 +3027,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>197</integer>
+// CHECK-NEXT:       <key>line</key><integer>213</integer>
 // CHECK-NEXT:       <key>col</key><integer>2</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3131,12 +3035,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>197</integer>
+// CHECK-NEXT:          <key>line</key><integer>213</integer>
 // CHECK-NEXT:          <key>col</key><integer>16</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>197</integer>
+// CHECK-NEXT:          <key>line</key><integer>213</integer>
 // CHECK-NEXT:          <key>col</key><integer>38</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3160,7 +3064,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>197</integer>
+// CHECK-NEXT:    <key>line</key><integer>213</integer>
 // CHECK-NEXT:    <key>col</key><integer>2</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3172,7 +3076,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>203</integer>
+// CHECK-NEXT:       <key>line</key><integer>219</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3180,12 +3084,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>203</integer>
+// CHECK-NEXT:          <key>line</key><integer>219</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>203</integer>
+// CHECK-NEXT:          <key>line</key><integer>219</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3205,12 +3109,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>203</integer>
+// CHECK-NEXT:            <key>line</key><integer>219</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>203</integer>
+// CHECK-NEXT:            <key>line</key><integer>219</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3218,12 +3122,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3239,12 +3143,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3252,12 +3156,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>204</integer>
+// CHECK-NEXT:            <key>line</key><integer>220</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3269,7 +3173,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>204</integer>
+// CHECK-NEXT:       <key>line</key><integer>220</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3277,12 +3181,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>204</integer>
+// CHECK-NEXT:          <key>line</key><integer>220</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>204</integer>
+// CHECK-NEXT:          <key>line</key><integer>220</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3306,7 +3210,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>204</integer>
+// CHECK-NEXT:    <key>line</key><integer>220</integer>
 // CHECK-NEXT:    <key>col</key><integer>27</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3318,7 +3222,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>208</integer>
+// CHECK-NEXT:       <key>line</key><integer>224</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3326,12 +3230,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>208</integer>
+// CHECK-NEXT:          <key>line</key><integer>224</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>208</integer>
+// CHECK-NEXT:          <key>line</key><integer>224</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3351,12 +3255,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>208</integer>
+// CHECK-NEXT:            <key>line</key><integer>224</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>208</integer>
+// CHECK-NEXT:            <key>line</key><integer>224</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3364,12 +3268,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3385,12 +3289,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3398,12 +3302,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>209</integer>
+// CHECK-NEXT:            <key>line</key><integer>225</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3415,7 +3319,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>209</integer>
+// CHECK-NEXT:       <key>line</key><integer>225</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3423,12 +3327,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>209</integer>
+// CHECK-NEXT:          <key>line</key><integer>225</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>209</integer>
+// CHECK-NEXT:          <key>line</key><integer>225</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3452,7 +3356,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>209</integer>
+// CHECK-NEXT:    <key>line</key><integer>225</integer>
 // CHECK-NEXT:    <key>col</key><integer>20</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3464,7 +3368,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>213</integer>
+// CHECK-NEXT:       <key>line</key><integer>229</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3472,12 +3376,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>213</integer>
+// CHECK-NEXT:          <key>line</key><integer>229</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>213</integer>
+// CHECK-NEXT:          <key>line</key><integer>229</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3497,12 +3401,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>213</integer>
+// CHECK-NEXT:            <key>line</key><integer>229</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>213</integer>
+// CHECK-NEXT:            <key>line</key><integer>229</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3510,12 +3414,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3531,12 +3435,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3544,12 +3448,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>214</integer>
+// CHECK-NEXT:            <key>line</key><integer>230</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3561,7 +3465,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>214</integer>
+// CHECK-NEXT:       <key>line</key><integer>230</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3569,12 +3473,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>214</integer>
+// CHECK-NEXT:          <key>line</key><integer>230</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>214</integer>
+// CHECK-NEXT:          <key>line</key><integer>230</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3598,7 +3502,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>214</integer>
+// CHECK-NEXT:    <key>line</key><integer>230</integer>
 // CHECK-NEXT:    <key>col</key><integer>20</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3610,7 +3514,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>218</integer>
+// CHECK-NEXT:       <key>line</key><integer>234</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3618,12 +3522,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>218</integer>
+// CHECK-NEXT:          <key>line</key><integer>234</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>218</integer>
+// CHECK-NEXT:          <key>line</key><integer>234</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3643,12 +3547,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>218</integer>
+// CHECK-NEXT:            <key>line</key><integer>234</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>218</integer>
+// CHECK-NEXT:            <key>line</key><integer>234</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3656,12 +3560,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3677,12 +3581,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3690,12 +3594,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>219</integer>
+// CHECK-NEXT:            <key>line</key><integer>235</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3707,7 +3611,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>219</integer>
+// CHECK-NEXT:       <key>line</key><integer>235</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3715,12 +3619,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>219</integer>
+// CHECK-NEXT:          <key>line</key><integer>235</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>219</integer>
+// CHECK-NEXT:          <key>line</key><integer>235</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3744,7 +3648,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>219</integer>
+// CHECK-NEXT:    <key>line</key><integer>235</integer>
 // CHECK-NEXT:    <key>col</key><integer>27</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3756,7 +3660,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>223</integer>
+// CHECK-NEXT:       <key>line</key><integer>239</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3764,12 +3668,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>223</integer>
+// CHECK-NEXT:          <key>line</key><integer>239</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>223</integer>
+// CHECK-NEXT:          <key>line</key><integer>239</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3789,12 +3693,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>223</integer>
+// CHECK-NEXT:            <key>line</key><integer>239</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>223</integer>
+// CHECK-NEXT:            <key>line</key><integer>239</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3802,12 +3706,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>224</integer>
+// CHECK-NEXT:            <key>line</key><integer>240</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>224</integer>
+// CHECK-NEXT:            <key>line</key><integer>240</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3819,7 +3723,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>224</integer>
+// CHECK-NEXT:       <key>line</key><integer>240</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3827,12 +3731,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>224</integer>
+// CHECK-NEXT:          <key>line</key><integer>240</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>224</integer>
+// CHECK-NEXT:          <key>line</key><integer>240</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3852,12 +3756,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>224</integer>
+// CHECK-NEXT:            <key>line</key><integer>240</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>224</integer>
+// CHECK-NEXT:            <key>line</key><integer>240</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3865,12 +3769,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3886,12 +3790,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3899,12 +3803,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>225</integer>
+// CHECK-NEXT:            <key>line</key><integer>241</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -3916,7 +3820,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>225</integer>
+// CHECK-NEXT:       <key>line</key><integer>241</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3924,12 +3828,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>225</integer>
+// CHECK-NEXT:          <key>line</key><integer>241</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>225</integer>
+// CHECK-NEXT:          <key>line</key><integer>241</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3953,7 +3857,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>225</integer>
+// CHECK-NEXT:    <key>line</key><integer>241</integer>
 // CHECK-NEXT:    <key>col</key><integer>27</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -3965,7 +3869,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>229</integer>
+// CHECK-NEXT:       <key>line</key><integer>245</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -3973,12 +3877,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>229</integer>
+// CHECK-NEXT:          <key>line</key><integer>245</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>229</integer>
+// CHECK-NEXT:          <key>line</key><integer>245</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -3998,12 +3902,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>229</integer>
+// CHECK-NEXT:            <key>line</key><integer>245</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>229</integer>
+// CHECK-NEXT:            <key>line</key><integer>245</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4011,12 +3915,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4032,12 +3936,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4045,12 +3949,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>230</integer>
+// CHECK-NEXT:            <key>line</key><integer>246</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4062,7 +3966,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>230</integer>
+// CHECK-NEXT:       <key>line</key><integer>246</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4070,12 +3974,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>230</integer>
+// CHECK-NEXT:          <key>line</key><integer>246</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>230</integer>
+// CHECK-NEXT:          <key>line</key><integer>246</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4099,7 +4003,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>230</integer>
+// CHECK-NEXT:    <key>line</key><integer>246</integer>
 // CHECK-NEXT:    <key>col</key><integer>27</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4111,7 +4015,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>234</integer>
+// CHECK-NEXT:       <key>line</key><integer>250</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4119,12 +4023,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>234</integer>
+// CHECK-NEXT:          <key>line</key><integer>250</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>234</integer>
+// CHECK-NEXT:          <key>line</key><integer>250</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4144,12 +4048,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>234</integer>
+// CHECK-NEXT:            <key>line</key><integer>250</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>234</integer>
+// CHECK-NEXT:            <key>line</key><integer>250</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4157,12 +4061,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4178,12 +4082,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4191,12 +4095,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>235</integer>
+// CHECK-NEXT:            <key>line</key><integer>251</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4208,7 +4112,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>235</integer>
+// CHECK-NEXT:       <key>line</key><integer>251</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4216,12 +4120,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>235</integer>
+// CHECK-NEXT:          <key>line</key><integer>251</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>235</integer>
+// CHECK-NEXT:          <key>line</key><integer>251</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4245,7 +4149,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>235</integer>
+// CHECK-NEXT:    <key>line</key><integer>251</integer>
 // CHECK-NEXT:    <key>col</key><integer>20</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4257,7 +4161,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>239</integer>
+// CHECK-NEXT:       <key>line</key><integer>255</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4265,12 +4169,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>239</integer>
+// CHECK-NEXT:          <key>line</key><integer>255</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>239</integer>
+// CHECK-NEXT:          <key>line</key><integer>255</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4290,12 +4194,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>239</integer>
+// CHECK-NEXT:            <key>line</key><integer>255</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>239</integer>
+// CHECK-NEXT:            <key>line</key><integer>255</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4303,12 +4207,75 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>256</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>256</integer>
+// CHECK-NEXT:            <key>col</key><integer>25</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>256</integer>
+// CHECK-NEXT:       <key>col</key><integer>27</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>256</integer>
+// CHECK-NEXT:          <key>col</key><integer>27</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>256</integer>
+// CHECK-NEXT:          <key>col</key><integer>67</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>256</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>256</integer>
+// CHECK-NEXT:            <key>col</key><integer>25</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4324,12 +4291,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4337,12 +4304,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>241</integer>
+// CHECK-NEXT:            <key>line</key><integer>257</integer>
 // CHECK-NEXT:            <key>col</key><integer>20</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4354,7 +4321,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>241</integer>
+// CHECK-NEXT:       <key>line</key><integer>257</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4362,12 +4329,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>241</integer>
+// CHECK-NEXT:          <key>line</key><integer>257</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>241</integer>
+// CHECK-NEXT:          <key>line</key><integer>257</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4391,7 +4358,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>241</integer>
+// CHECK-NEXT:    <key>line</key><integer>257</integer>
 // CHECK-NEXT:    <key>col</key><integer>20</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4403,7 +4370,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>245</integer>
+// CHECK-NEXT:       <key>line</key><integer>261</integer>
 // CHECK-NEXT:       <key>col</key><integer>45</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4411,12 +4378,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>245</integer>
+// CHECK-NEXT:          <key>line</key><integer>261</integer>
 // CHECK-NEXT:          <key>col</key><integer>45</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>245</integer>
+// CHECK-NEXT:          <key>line</key><integer>261</integer>
 // CHECK-NEXT:          <key>col</key><integer>45</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4436,12 +4403,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>245</integer>
+// CHECK-NEXT:            <key>line</key><integer>261</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>245</integer>
+// CHECK-NEXT:            <key>line</key><integer>261</integer>
 // CHECK-NEXT:            <key>col</key><integer>19</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4449,12 +4416,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>246</integer>
+// CHECK-NEXT:            <key>line</key><integer>262</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>246</integer>
+// CHECK-NEXT:            <key>line</key><integer>262</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4466,7 +4433,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>246</integer>
+// CHECK-NEXT:       <key>line</key><integer>262</integer>
 // CHECK-NEXT:       <key>col</key><integer>20</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4474,12 +4441,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>246</integer>
+// CHECK-NEXT:          <key>line</key><integer>262</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>246</integer>
+// CHECK-NEXT:          <key>line</key><integer>262</integer>
 // CHECK-NEXT:          <key>col</key><integer>20</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4499,12 +4466,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>246</integer>
+// CHECK-NEXT:            <key>line</key><integer>262</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>246</integer>
+// CHECK-NEXT:            <key>line</key><integer>262</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4512,12 +4479,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4533,12 +4500,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4546,12 +4513,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>247</integer>
+// CHECK-NEXT:            <key>line</key><integer>263</integer>
 // CHECK-NEXT:            <key>col</key><integer>27</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4563,7 +4530,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>247</integer>
+// CHECK-NEXT:       <key>line</key><integer>263</integer>
 // CHECK-NEXT:       <key>col</key><integer>27</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4571,12 +4538,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>247</integer>
+// CHECK-NEXT:          <key>line</key><integer>263</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>247</integer>
+// CHECK-NEXT:          <key>line</key><integer>263</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4600,7 +4567,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>247</integer>
+// CHECK-NEXT:    <key>line</key><integer>263</integer>
 // CHECK-NEXT:    <key>col</key><integer>27</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4616,12 +4583,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4629,12 +4596,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4650,12 +4617,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4663,12 +4630,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>19</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>260</integer>
+// CHECK-NEXT:            <key>line</key><integer>276</integer>
 // CHECK-NEXT:            <key>col</key><integer>19</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4680,7 +4647,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>260</integer>
+// CHECK-NEXT:       <key>line</key><integer>276</integer>
 // CHECK-NEXT:       <key>col</key><integer>19</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4688,12 +4655,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>260</integer>
+// CHECK-NEXT:          <key>line</key><integer>276</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>260</integer>
+// CHECK-NEXT:          <key>line</key><integer>276</integer>
 // CHECK-NEXT:          <key>col</key><integer>38</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4709,7 +4676,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>260</integer>
+// CHECK-NEXT:       <key>line</key><integer>276</integer>
 // CHECK-NEXT:       <key>col</key><integer>19</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4717,12 +4684,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>260</integer>
+// CHECK-NEXT:          <key>line</key><integer>276</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>260</integer>
+// CHECK-NEXT:          <key>line</key><integer>276</integer>
 // CHECK-NEXT:          <key>col</key><integer>38</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4746,7 +4713,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>260</integer>
+// CHECK-NEXT:    <key>line</key><integer>276</integer>
 // CHECK-NEXT:    <key>col</key><integer>19</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4762,12 +4729,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4775,12 +4742,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4796,12 +4763,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4809,12 +4776,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>262</integer>
+// CHECK-NEXT:            <key>line</key><integer>278</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4826,7 +4793,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>262</integer>
+// CHECK-NEXT:       <key>line</key><integer>278</integer>
 // CHECK-NEXT:       <key>col</key><integer>9</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4834,12 +4801,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>262</integer>
+// CHECK-NEXT:          <key>line</key><integer>278</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>262</integer>
+// CHECK-NEXT:          <key>line</key><integer>278</integer>
 // CHECK-NEXT:          <key>col</key><integer>23</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4855,7 +4822,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>262</integer>
+// CHECK-NEXT:       <key>line</key><integer>278</integer>
 // CHECK-NEXT:       <key>col</key><integer>9</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4863,12 +4830,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>262</integer>
+// CHECK-NEXT:          <key>line</key><integer>278</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>262</integer>
+// CHECK-NEXT:          <key>line</key><integer>278</integer>
 // CHECK-NEXT:          <key>col</key><integer>23</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -4892,7 +4859,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>262</integer>
+// CHECK-NEXT:    <key>line</key><integer>278</integer>
 // CHECK-NEXT:    <key>col</key><integer>9</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -4908,12 +4875,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4921,12 +4888,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4942,12 +4909,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4955,12 +4922,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>264</integer>
+// CHECK-NEXT:            <key>line</key><integer>280</integer>
 // CHECK-NEXT:            <key>col</key><integer>21</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -4972,7 +4939,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>264</integer>
+// CHECK-NEXT:       <key>line</key><integer>280</integer>
 // CHECK-NEXT:       <key>col</key><integer>11</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -4980,12 +4947,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>264</integer>
+// CHECK-NEXT:          <key>line</key><integer>280</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>264</integer>
+// CHECK-NEXT:          <key>line</key><integer>280</integer>
 // CHECK-NEXT:          <key>col</key><integer>21</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5001,7 +4968,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>264</integer>
+// CHECK-NEXT:       <key>line</key><integer>280</integer>
 // CHECK-NEXT:       <key>col</key><integer>9</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5009,12 +4976,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>264</integer>
+// CHECK-NEXT:          <key>line</key><integer>280</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>264</integer>
+// CHECK-NEXT:          <key>line</key><integer>280</integer>
 // CHECK-NEXT:          <key>col</key><integer>21</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5038,7 +5005,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>264</integer>
+// CHECK-NEXT:    <key>line</key><integer>280</integer>
 // CHECK-NEXT:    <key>col</key><integer>9</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5054,12 +5021,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>259</integer>
+// CHECK-NEXT:            <key>line</key><integer>275</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5067,12 +5034,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5088,12 +5055,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5101,12 +5068,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>266</integer>
+// CHECK-NEXT:            <key>line</key><integer>282</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5118,7 +5085,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>266</integer>
+// CHECK-NEXT:       <key>line</key><integer>282</integer>
 // CHECK-NEXT:       <key>col</key><integer>9</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5126,12 +5093,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>266</integer>
+// CHECK-NEXT:          <key>line</key><integer>282</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>266</integer>
+// CHECK-NEXT:          <key>line</key><integer>282</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5147,7 +5114,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>266</integer>
+// CHECK-NEXT:       <key>line</key><integer>282</integer>
 // CHECK-NEXT:       <key>col</key><integer>9</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5155,12 +5122,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>266</integer>
+// CHECK-NEXT:          <key>line</key><integer>282</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>266</integer>
+// CHECK-NEXT:          <key>line</key><integer>282</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5184,7 +5151,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>266</integer>
+// CHECK-NEXT:    <key>line</key><integer>282</integer>
 // CHECK-NEXT:    <key>col</key><integer>9</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5196,7 +5163,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>272</integer>
+// CHECK-NEXT:       <key>line</key><integer>288</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5204,12 +5171,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>272</integer>
+// CHECK-NEXT:          <key>line</key><integer>288</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>272</integer>
+// CHECK-NEXT:          <key>line</key><integer>288</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5229,12 +5196,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>272</integer>
+// CHECK-NEXT:            <key>line</key><integer>288</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>272</integer>
+// CHECK-NEXT:            <key>line</key><integer>288</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5242,12 +5209,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5263,12 +5230,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5276,12 +5243,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>16</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5293,7 +5260,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>273</integer>
+// CHECK-NEXT:       <key>line</key><integer>289</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5301,12 +5268,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>273</integer>
+// CHECK-NEXT:          <key>line</key><integer>289</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>273</integer>
+// CHECK-NEXT:          <key>line</key><integer>289</integer>
 // CHECK-NEXT:          <key>col</key><integer>23</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5326,12 +5293,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>273</integer>
+// CHECK-NEXT:            <key>line</key><integer>289</integer>
 // CHECK-NEXT:            <key>col</key><integer>16</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5339,12 +5306,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>274</integer>
+// CHECK-NEXT:            <key>line</key><integer>290</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>274</integer>
+// CHECK-NEXT:            <key>line</key><integer>290</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5360,12 +5327,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>274</integer>
+// CHECK-NEXT:            <key>line</key><integer>290</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>274</integer>
+// CHECK-NEXT:            <key>line</key><integer>290</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5373,12 +5340,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>275</integer>
+// CHECK-NEXT:            <key>line</key><integer>291</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>275</integer>
+// CHECK-NEXT:            <key>line</key><integer>291</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5390,7 +5357,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>275</integer>
+// CHECK-NEXT:       <key>line</key><integer>291</integer>
 // CHECK-NEXT:       <key>col</key><integer>5</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5398,12 +5365,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>275</integer>
+// CHECK-NEXT:          <key>line</key><integer>291</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>275</integer>
+// CHECK-NEXT:          <key>line</key><integer>291</integer>
 // CHECK-NEXT:          <key>col</key><integer>41</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5427,7 +5394,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>275</integer>
+// CHECK-NEXT:    <key>line</key><integer>291</integer>
 // CHECK-NEXT:    <key>col</key><integer>5</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5439,7 +5406,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>284</integer>
+// CHECK-NEXT:       <key>line</key><integer>300</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5447,12 +5414,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>284</integer>
+// CHECK-NEXT:          <key>line</key><integer>300</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>284</integer>
+// CHECK-NEXT:          <key>line</key><integer>300</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5472,12 +5439,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>284</integer>
+// CHECK-NEXT:            <key>line</key><integer>300</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>284</integer>
+// CHECK-NEXT:            <key>line</key><integer>300</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5485,12 +5452,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5506,12 +5473,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5519,12 +5486,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>16</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5536,7 +5503,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>285</integer>
+// CHECK-NEXT:       <key>line</key><integer>301</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5544,12 +5511,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>285</integer>
+// CHECK-NEXT:          <key>line</key><integer>301</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>285</integer>
+// CHECK-NEXT:          <key>line</key><integer>301</integer>
 // CHECK-NEXT:          <key>col</key><integer>23</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5569,12 +5536,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>285</integer>
+// CHECK-NEXT:            <key>line</key><integer>301</integer>
 // CHECK-NEXT:            <key>col</key><integer>16</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5582,12 +5549,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>286</integer>
+// CHECK-NEXT:            <key>line</key><integer>302</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>286</integer>
+// CHECK-NEXT:            <key>line</key><integer>302</integer>
 // CHECK-NEXT:            <key>col</key><integer>5</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5599,7 +5566,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>286</integer>
+// CHECK-NEXT:       <key>line</key><integer>302</integer>
 // CHECK-NEXT:       <key>col</key><integer>5</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5607,12 +5574,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>286</integer>
+// CHECK-NEXT:          <key>line</key><integer>302</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>286</integer>
+// CHECK-NEXT:          <key>line</key><integer>302</integer>
 // CHECK-NEXT:          <key>col</key><integer>41</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5636,7 +5603,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>286</integer>
+// CHECK-NEXT:    <key>line</key><integer>302</integer>
 // CHECK-NEXT:    <key>col</key><integer>5</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5652,12 +5619,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>310</integer>
+// CHECK-NEXT:            <key>line</key><integer>326</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>310</integer>
+// CHECK-NEXT:            <key>line</key><integer>326</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5665,12 +5632,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5686,12 +5653,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5699,12 +5666,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>28</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>311</integer>
+// CHECK-NEXT:            <key>line</key><integer>327</integer>
 // CHECK-NEXT:            <key>col</key><integer>28</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5716,7 +5683,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>311</integer>
+// CHECK-NEXT:       <key>line</key><integer>327</integer>
 // CHECK-NEXT:       <key>col</key><integer>28</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5724,12 +5691,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>311</integer>
+// CHECK-NEXT:          <key>line</key><integer>327</integer>
 // CHECK-NEXT:          <key>col</key><integer>28</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>311</integer>
+// CHECK-NEXT:          <key>line</key><integer>327</integer>
 // CHECK-NEXT:          <key>col</key><integer>39</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5745,7 +5712,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>311</integer>
+// CHECK-NEXT:       <key>line</key><integer>327</integer>
 // CHECK-NEXT:       <key>col</key><integer>28</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5753,12 +5720,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>311</integer>
+// CHECK-NEXT:          <key>line</key><integer>327</integer>
 // CHECK-NEXT:          <key>col</key><integer>28</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>311</integer>
+// CHECK-NEXT:          <key>line</key><integer>327</integer>
 // CHECK-NEXT:          <key>col</key><integer>39</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5782,7 +5749,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>311</integer>
+// CHECK-NEXT:    <key>line</key><integer>327</integer>
 // CHECK-NEXT:    <key>col</key><integer>28</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5798,12 +5765,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>310</integer>
+// CHECK-NEXT:            <key>line</key><integer>326</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>310</integer>
+// CHECK-NEXT:            <key>line</key><integer>326</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5811,12 +5778,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5832,12 +5799,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5845,12 +5812,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>312</integer>
+// CHECK-NEXT:            <key>line</key><integer>328</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5862,7 +5829,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>312</integer>
+// CHECK-NEXT:       <key>line</key><integer>328</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5870,12 +5837,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>312</integer>
+// CHECK-NEXT:          <key>line</key><integer>328</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>312</integer>
+// CHECK-NEXT:          <key>line</key><integer>328</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5891,7 +5858,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>312</integer>
+// CHECK-NEXT:       <key>line</key><integer>328</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5899,12 +5866,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>312</integer>
+// CHECK-NEXT:          <key>line</key><integer>328</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>312</integer>
+// CHECK-NEXT:          <key>line</key><integer>328</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5928,7 +5895,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>312</integer>
+// CHECK-NEXT:    <key>line</key><integer>328</integer>
 // CHECK-NEXT:    <key>col</key><integer>7</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -5940,7 +5907,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>316</integer>
+// CHECK-NEXT:       <key>line</key><integer>332</integer>
 // CHECK-NEXT:       <key>col</key><integer>13</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -5948,12 +5915,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>316</integer>
+// CHECK-NEXT:          <key>line</key><integer>332</integer>
 // CHECK-NEXT:          <key>col</key><integer>13</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>316</integer>
+// CHECK-NEXT:          <key>line</key><integer>332</integer>
 // CHECK-NEXT:          <key>col</key><integer>15</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -5973,12 +5940,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>316</integer>
+// CHECK-NEXT:            <key>line</key><integer>332</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>316</integer>
+// CHECK-NEXT:            <key>line</key><integer>332</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -5986,12 +5953,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6007,12 +5974,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6020,12 +5987,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>18</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>317</integer>
+// CHECK-NEXT:            <key>line</key><integer>333</integer>
 // CHECK-NEXT:            <key>col</key><integer>21</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6037,7 +6004,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>317</integer>
+// CHECK-NEXT:       <key>line</key><integer>333</integer>
 // CHECK-NEXT:       <key>col</key><integer>18</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6045,12 +6012,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>317</integer>
+// CHECK-NEXT:          <key>line</key><integer>333</integer>
 // CHECK-NEXT:          <key>col</key><integer>18</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>317</integer>
+// CHECK-NEXT:          <key>line</key><integer>333</integer>
 // CHECK-NEXT:          <key>col</key><integer>21</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6074,7 +6041,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>317</integer>
+// CHECK-NEXT:    <key>line</key><integer>333</integer>
 // CHECK-NEXT:    <key>col</key><integer>18</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6090,12 +6057,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>329</integer>
+// CHECK-NEXT:            <key>line</key><integer>345</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>329</integer>
+// CHECK-NEXT:            <key>line</key><integer>345</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6103,12 +6070,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>330</integer>
+// CHECK-NEXT:            <key>line</key><integer>346</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>330</integer>
+// CHECK-NEXT:            <key>line</key><integer>346</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6120,7 +6087,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>330</integer>
+// CHECK-NEXT:       <key>line</key><integer>346</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6128,12 +6095,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>330</integer>
+// CHECK-NEXT:          <key>line</key><integer>346</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>330</integer>
+// CHECK-NEXT:          <key>line</key><integer>346</integer>
 // CHECK-NEXT:          <key>col</key><integer>29</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6153,12 +6120,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>330</integer>
+// CHECK-NEXT:            <key>line</key><integer>346</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>330</integer>
+// CHECK-NEXT:            <key>line</key><integer>346</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6166,12 +6133,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6187,12 +6154,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6200,12 +6167,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>30</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>331</integer>
+// CHECK-NEXT:            <key>line</key><integer>347</integer>
 // CHECK-NEXT:            <key>col</key><integer>30</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6217,7 +6184,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>331</integer>
+// CHECK-NEXT:       <key>line</key><integer>347</integer>
 // CHECK-NEXT:       <key>col</key><integer>30</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6225,12 +6192,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>331</integer>
+// CHECK-NEXT:          <key>line</key><integer>347</integer>
 // CHECK-NEXT:          <key>col</key><integer>30</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>331</integer>
+// CHECK-NEXT:          <key>line</key><integer>347</integer>
 // CHECK-NEXT:          <key>col</key><integer>30</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6254,7 +6221,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>331</integer>
+// CHECK-NEXT:    <key>line</key><integer>347</integer>
 // CHECK-NEXT:    <key>col</key><integer>30</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6270,12 +6237,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>335</integer>
+// CHECK-NEXT:            <key>line</key><integer>351</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>335</integer>
+// CHECK-NEXT:            <key>line</key><integer>351</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6283,12 +6250,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>336</integer>
+// CHECK-NEXT:            <key>line</key><integer>352</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>336</integer>
+// CHECK-NEXT:            <key>line</key><integer>352</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6300,7 +6267,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>336</integer>
+// CHECK-NEXT:       <key>line</key><integer>352</integer>
 // CHECK-NEXT:       <key>col</key><integer>16</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6308,12 +6275,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>336</integer>
+// CHECK-NEXT:          <key>line</key><integer>352</integer>
 // CHECK-NEXT:          <key>col</key><integer>16</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>336</integer>
+// CHECK-NEXT:          <key>line</key><integer>352</integer>
 // CHECK-NEXT:          <key>col</key><integer>35</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6333,12 +6300,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>336</integer>
+// CHECK-NEXT:            <key>line</key><integer>352</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>336</integer>
+// CHECK-NEXT:            <key>line</key><integer>352</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6346,12 +6313,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6367,12 +6334,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6380,12 +6347,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>30</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>337</integer>
+// CHECK-NEXT:            <key>line</key><integer>353</integer>
 // CHECK-NEXT:            <key>col</key><integer>30</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6397,7 +6364,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>337</integer>
+// CHECK-NEXT:       <key>line</key><integer>353</integer>
 // CHECK-NEXT:       <key>col</key><integer>30</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6405,12 +6372,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>337</integer>
+// CHECK-NEXT:          <key>line</key><integer>353</integer>
 // CHECK-NEXT:          <key>col</key><integer>30</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>337</integer>
+// CHECK-NEXT:          <key>line</key><integer>353</integer>
 // CHECK-NEXT:          <key>col</key><integer>30</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6434,7 +6401,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>337</integer>
+// CHECK-NEXT:    <key>line</key><integer>353</integer>
 // CHECK-NEXT:    <key>col</key><integer>30</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6450,12 +6417,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>341</integer>
+// CHECK-NEXT:            <key>line</key><integer>357</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>341</integer>
+// CHECK-NEXT:            <key>line</key><integer>357</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6463,12 +6430,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6484,12 +6451,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6497,12 +6464,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>14</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>342</integer>
+// CHECK-NEXT:            <key>line</key><integer>358</integer>
 // CHECK-NEXT:            <key>col</key><integer>14</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6514,7 +6481,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>342</integer>
+// CHECK-NEXT:       <key>line</key><integer>358</integer>
 // CHECK-NEXT:       <key>col</key><integer>14</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6522,12 +6489,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>342</integer>
+// CHECK-NEXT:          <key>line</key><integer>358</integer>
 // CHECK-NEXT:          <key>col</key><integer>14</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>342</integer>
+// CHECK-NEXT:          <key>line</key><integer>358</integer>
 // CHECK-NEXT:          <key>col</key><integer>33</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6543,7 +6510,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>342</integer>
+// CHECK-NEXT:       <key>line</key><integer>358</integer>
 // CHECK-NEXT:       <key>col</key><integer>14</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6551,12 +6518,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>342</integer>
+// CHECK-NEXT:          <key>line</key><integer>358</integer>
 // CHECK-NEXT:          <key>col</key><integer>14</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>342</integer>
+// CHECK-NEXT:          <key>line</key><integer>358</integer>
 // CHECK-NEXT:          <key>col</key><integer>33</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6580,7 +6547,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>342</integer>
+// CHECK-NEXT:    <key>line</key><integer>358</integer>
 // CHECK-NEXT:    <key>col</key><integer>14</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6596,12 +6563,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>347</integer>
+// CHECK-NEXT:            <key>line</key><integer>363</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>347</integer>
+// CHECK-NEXT:            <key>line</key><integer>363</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6609,12 +6576,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>348</integer>
+// CHECK-NEXT:            <key>line</key><integer>364</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>348</integer>
+// CHECK-NEXT:            <key>line</key><integer>364</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6626,7 +6593,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>348</integer>
+// CHECK-NEXT:       <key>line</key><integer>364</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6634,12 +6601,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>348</integer>
+// CHECK-NEXT:          <key>line</key><integer>364</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>348</integer>
+// CHECK-NEXT:          <key>line</key><integer>364</integer>
 // CHECK-NEXT:          <key>col</key><integer>29</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6659,12 +6626,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>348</integer>
+// CHECK-NEXT:            <key>line</key><integer>364</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>348</integer>
+// CHECK-NEXT:            <key>line</key><integer>364</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6672,12 +6639,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6693,12 +6660,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6706,12 +6673,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>19</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>349</integer>
+// CHECK-NEXT:            <key>line</key><integer>365</integer>
 // CHECK-NEXT:            <key>col</key><integer>19</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6723,7 +6690,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>349</integer>
+// CHECK-NEXT:       <key>line</key><integer>365</integer>
 // CHECK-NEXT:       <key>col</key><integer>19</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6731,12 +6698,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>349</integer>
+// CHECK-NEXT:          <key>line</key><integer>365</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>349</integer>
+// CHECK-NEXT:          <key>line</key><integer>365</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6760,7 +6727,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>349</integer>
+// CHECK-NEXT:    <key>line</key><integer>365</integer>
 // CHECK-NEXT:    <key>col</key><integer>19</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6772,7 +6739,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>358</integer>
+// CHECK-NEXT:       <key>line</key><integer>374</integer>
 // CHECK-NEXT:       <key>col</key><integer>12</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6780,12 +6747,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>358</integer>
+// CHECK-NEXT:          <key>line</key><integer>374</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>358</integer>
+// CHECK-NEXT:          <key>line</key><integer>374</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6805,12 +6772,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>358</integer>
+// CHECK-NEXT:            <key>line</key><integer>374</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>358</integer>
+// CHECK-NEXT:            <key>line</key><integer>374</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6818,12 +6785,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6839,12 +6806,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6852,12 +6819,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>359</integer>
+// CHECK-NEXT:            <key>line</key><integer>375</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6869,7 +6836,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>359</integer>
+// CHECK-NEXT:       <key>line</key><integer>375</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6877,12 +6844,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>359</integer>
+// CHECK-NEXT:          <key>line</key><integer>375</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>359</integer>
+// CHECK-NEXT:          <key>line</key><integer>375</integer>
 // CHECK-NEXT:          <key>col</key><integer>9</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6906,7 +6873,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>359</integer>
+// CHECK-NEXT:    <key>line</key><integer>375</integer>
 // CHECK-NEXT:    <key>col</key><integer>7</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -6918,7 +6885,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>358</integer>
+// CHECK-NEXT:       <key>line</key><integer>374</integer>
 // CHECK-NEXT:       <key>col</key><integer>12</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6926,12 +6893,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>358</integer>
+// CHECK-NEXT:          <key>line</key><integer>374</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>358</integer>
+// CHECK-NEXT:          <key>line</key><integer>374</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -6951,12 +6918,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>358</integer>
+// CHECK-NEXT:            <key>line</key><integer>374</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>358</integer>
+// CHECK-NEXT:            <key>line</key><integer>374</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6964,12 +6931,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>364</integer>
+// CHECK-NEXT:            <key>line</key><integer>380</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>364</integer>
+// CHECK-NEXT:            <key>line</key><integer>380</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -6981,7 +6948,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>364</integer>
+// CHECK-NEXT:       <key>line</key><integer>380</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -6989,12 +6956,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>364</integer>
+// CHECK-NEXT:          <key>line</key><integer>380</integer>
 // CHECK-NEXT:          <key>col</key><integer>16</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>364</integer>
+// CHECK-NEXT:          <key>line</key><integer>380</integer>
 // CHECK-NEXT:          <key>col</key><integer>38</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -7018,7 +6985,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>364</integer>
+// CHECK-NEXT:    <key>line</key><integer>380</integer>
 // CHECK-NEXT:    <key>col</key><integer>3</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -7034,12 +7001,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>373</integer>
+// CHECK-NEXT:            <key>line</key><integer>389</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>373</integer>
+// CHECK-NEXT:            <key>line</key><integer>389</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -7047,12 +7014,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -7068,12 +7035,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>9</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -7081,12 +7048,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>70</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>375</integer>
+// CHECK-NEXT:            <key>line</key><integer>391</integer>
 // CHECK-NEXT:            <key>col</key><integer>79</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -7098,7 +7065,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>375</integer>
+// CHECK-NEXT:       <key>line</key><integer>391</integer>
 // CHECK-NEXT:       <key>col</key><integer>70</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -7106,12 +7073,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>375</integer>
+// CHECK-NEXT:          <key>line</key><integer>391</integer>
 // CHECK-NEXT:          <key>col</key><integer>70</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>375</integer>
+// CHECK-NEXT:          <key>line</key><integer>391</integer>
 // CHECK-NEXT:          <key>col</key><integer>79</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -7135,7 +7102,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>375</integer>
+// CHECK-NEXT:    <key>line</key><integer>391</integer>
 // CHECK-NEXT:    <key>col</key><integer>70</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
diff --git a/test/Analysis/html_diagnostics/relevant_lines/goto.c b/test/Analysis/html_diagnostics/relevant_lines/goto.c
new file mode 100644
index 0000000..fc90f13
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/goto.c
@@ -0,0 +1,13 @@
+int goto_test(int input) {
+  int *p = 0;
+  if (input)
+    goto mylabel;
+  return 0;
+mylabel:
+  return *p;
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"1": 1,  "2": 1, "3": 1, "4": 1, "6": 1, "7": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/header.h b/test/Analysis/html_diagnostics/relevant_lines/header.h
new file mode 100644
index 0000000..579b66a
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/header.h
@@ -0,0 +1,12 @@
+#define deref(X) (*X)
+
+char helper(
+    char *out,
+    int doDereference) {
+  if (doDereference) {
+    return deref(out);
+  } else {
+    return 'x';
+  }
+  return 'c';
+}
diff --git a/test/Analysis/html_diagnostics/relevant_lines/macros_same_file.c b/test/Analysis/html_diagnostics/relevant_lines/macros_same_file.c
new file mode 100644
index 0000000..5b35565
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/macros_same_file.c
@@ -0,0 +1,15 @@
+#define deref(X) (*X)
+
+int f(int coin) {
+  if (coin) {
+    int *x = 0;
+    return deref(x);
+  } else {
+    return 0;
+  }
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"3": 1, "4": 1, "5": 1, "6": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/multifile.c b/test/Analysis/html_diagnostics/relevant_lines/multifile.c
new file mode 100644
index 0000000..3abffd6
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/multifile.c
@@ -0,0 +1,14 @@
+#include "header.h"
+
+int f(int coin) {
+  char *p = 0;
+  if (coin) {
+    return helper(p, coin);
+  }
+  return 0;
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"3": 1, "4": 1, "5": 1, "6": 1}, "3": {"3": 1, "4": 1, "5": 1, "6": 1, "7": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/multiline_func_def.c b/test/Analysis/html_diagnostics/relevant_lines/multiline_func_def.c
new file mode 100644
index 0000000..35158a4
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/multiline_func_def.c
@@ -0,0 +1,16 @@
+int f(
+    int coin,
+    int paramA,
+    int paramB) {
+  if (coin) {
+    int *x = 0;
+    return *x;
+  } else {
+    return 0;
+  }
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"1": 1, "2": 1, "3": 1, "4": 1, "5": 1, "6": 1, "7": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/notexecutedlines.c b/test/Analysis/html_diagnostics/relevant_lines/notexecutedlines.c
new file mode 100644
index 0000000..3c723aa
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/notexecutedlines.c
@@ -0,0 +1,12 @@
+int f() {
+  int zzz = 200;
+  zzz += 100;
+  return 0;
+}
+
+// Show line with the warning even if it wasn't executed (e.g. warning given
+// by path-insensitive analysis).
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core,deadcode -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"3": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/objcmethods.m b/test/Analysis/html_diagnostics/relevant_lines/objcmethods.m
new file mode 100644
index 0000000..41a4c1d
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/objcmethods.m
@@ -0,0 +1,19 @@
+@interface I
+- (int)func;
+@end
+
+@implementation I
+- (int)func:(int *)param {
+  return *param;
+}
+@end
+
+void foo(I *i) {
+  int *x = 0;
+  [i func:x];
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output -Wno-objc-root-class %s
+// RUN: cat %t.output/* | FileCheck %s
+// CHECK: var relevant_lines = {"1": {"6": 1, "7": 1, "11": 1, "12": 1, "13": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/simple_conditional.c b/test/Analysis/html_diagnostics/relevant_lines/simple_conditional.c
new file mode 100644
index 0000000..769859d
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/simple_conditional.c
@@ -0,0 +1,13 @@
+int f(int coin) {
+  if (coin) {
+    int *x = 0;
+    return *x;
+  } else {
+    return 0;
+  }
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"1": 1, "2": 1, "3": 1, "4": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/switch.c b/test/Analysis/html_diagnostics/relevant_lines/switch.c
new file mode 100644
index 0000000..e9032cd
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/switch.c
@@ -0,0 +1,20 @@
+enum E {
+  A, B, C
+};
+
+int f(enum E input) {
+  int *x = 0;
+  switch (input) {
+    case A:
+      return 1;
+    case B:
+      return 0;
+    case C:
+      return *x;
+  }
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"5": 1, "6": 1, "7": 1, "12": 1, "13": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/switch_default.c b/test/Analysis/html_diagnostics/relevant_lines/switch_default.c
new file mode 100644
index 0000000..b14e3f9
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/switch_default.c
@@ -0,0 +1,20 @@
+enum E {
+  A, B, C
+};
+
+int f(enum E input) {
+  int *x = 0;
+  switch (input) {
+    case A:
+      return 1;
+    case B:
+      return 0;
+    default:
+      return *x;
+  }
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"5": 1, "6": 1, "7": 1, "12": 1, "13": 1}};
diff --git a/test/Analysis/html_diagnostics/relevant_lines/unused_header.c b/test/Analysis/html_diagnostics/relevant_lines/unused_header.c
new file mode 100644
index 0000000..4b77c65
--- /dev/null
+++ b/test/Analysis/html_diagnostics/relevant_lines/unused_header.c
@@ -0,0 +1,19 @@
+#include "header.h"
+
+int f(int coin) {
+  if (coin) {
+    int *x = 0;
+    return *x;
+  } else {
+    return 0;
+  }
+}
+
+int v(int coin) {
+  return coin;
+}
+
+// RUN: rm -rf %t.output
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -analyzer-output html -o %t.output %s
+// RUN: cat %t.output/* | FileCheck %s --match-full-lines
+// CHECK: var relevant_lines = {"1": {"3": 1, "4": 1, "5": 1, "6": 1}};
diff --git a/test/Analysis/implicit-ctor-undef-value.cpp b/test/Analysis/implicit-ctor-undef-value.cpp
new file mode 100644
index 0000000..87824c0
--- /dev/null
+++ b/test/Analysis/implicit-ctor-undef-value.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-output=text -verify %s
+
+namespace implicit_constructor {
+struct S {
+public:
+  S() {}
+  S(const S &) {}
+};
+
+// Warning is in a weird position because the body of the constructor is
+// missing. Specify which field is being assigned.
+class C { // expected-warning{{Value assigned to field 'y' in implicit constructor is garbage or undefined}}
+          // expected-note@-1{{Value assigned to field 'y' in implicit constructor is garbage or undefined}}
+  int x, y;
+  S s;
+
+public:
+  C(): x(0) {}
+};
+
+void test() {
+  C c1;
+  C c2(c1); // expected-note{{Calling implicit copy constructor for 'C'}}
+}
+} // end namespace implicit_constructor
+
+
+namespace explicit_constructor {
+class C {
+  int x, y;
+
+public:
+  C(): x(0) {}
+  // It is not necessary to specify which field is being assigned to.
+  C(const C &c):
+    x(c.x),
+    y(c.y) // expected-warning{{Assigned value is garbage or undefined}}
+           // expected-note@-1{{Assigned value is garbage or undefined}}
+  {}
+};
+
+void test() {
+  C c1;
+  C c2(c1); // expected-note{{Calling copy constructor for 'C'}}
+}
+} // end namespace explicit_constructor
+
+
+namespace base_class_constructor {
+struct S {
+public:
+  S() {}
+  S(const S &) {}
+};
+
+class C { // expected-warning{{Value assigned to field 'y' in implicit constructor is garbage or undefined}}
+          // expected-note@-1{{Value assigned to field 'y' in implicit constructor is garbage or undefined}}
+  int x, y;
+  S s;
+
+public:
+  C(): x(0) {}
+};
+
+class D: public C {
+public:
+  D(): C() {}
+};
+
+void test() {
+  D d1;
+  D d2(d1); // expected-note   {{Calling implicit copy constructor for 'D'}}
+            // expected-note@-1{{Calling implicit copy constructor for 'C'}}
+}
+} // end namespace base_class_constructor
diff --git a/test/Analysis/initializer.cpp b/test/Analysis/initializer.cpp
index e9658a0..b73a94f 100644
--- a/test/Analysis/initializer.cpp
+++ b/test/Analysis/initializer.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++17 -DCPLUSPLUS17 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++17 -DCPLUSPLUS17 -DTEST_INLINABLE_ALLOCATORS -verify %s
 
 void clang_analyzer_eval(bool);
 
@@ -211,7 +214,7 @@
 struct C {
   C(std::initializer_list<int *> list);
 };
-void foo() {
+void testPointerEscapeIntoLists() {
   C empty{}; // no-crash
 
   // Do not warn that 'x' leaks. It might have been deleted by
@@ -219,4 +222,47 @@
   int *x = new int;
   C c{x}; // no-warning
 }
+
+void testPassListsWithExplicitConstructors() {
+  (void)(std::initializer_list<int>){12}; // no-crash
+}
+}
+
+namespace CXX17_aggregate_construction {
+struct A {
+  A();
+};
+
+struct B: public A {
+};
+
+struct C: public B {
+};
+
+struct D: public virtual A {
+};
+
+// In C++17, classes B and C are aggregates, so they will be constructed
+// without actually calling their trivial constructor. Used to crash.
+void foo() {
+  B b = {}; // no-crash
+  const B &bl = {}; // no-crash
+  B &&br = {}; // no-crash
+
+  C c = {}; // no-crash
+  const C &cl = {}; // no-crash
+  C &&cr = {}; // no-crash
+
+  D d = {}; // no-crash
+
+#ifdef CPLUSPLUS17
+  C cd = {{}}; // no-crash
+  const C &cdl = {{}}; // no-crash
+  C &&cdr = {{}}; // no-crash
+
+  const B &bll = {{}}; // no-crash
+  const B &bcl = C({{}}); // no-crash
+  B &&bcr = C({{}}); // no-crash
+#endif
+}
 }
diff --git a/test/Analysis/initializers-cfg-output.cpp b/test/Analysis/initializers-cfg-output.cpp
index ccf4db5..8d1039d 100644
--- a/test/Analysis/initializers-cfg-output.cpp
+++ b/test/Analysis/initializers-cfg-output.cpp
@@ -1,4 +1,14 @@
-// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-rich-constructors=false %s 2>&1 | FileCheck -check-prefixes=CHECK,WARNINGS %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-rich-constructors=true %s 2>&1 | FileCheck -check-prefixes=CHECK,ANALYZER %s
+
+// This file tests how we construct two different flavors of the Clang CFG -
+// the CFG used by the Sema analysis-based warnings and the CFG used by the
+// static analyzer. The difference in the behavior is checked via FileCheck
+// prefixes (WARNINGS and ANALYZER respectively). When introducing new analyzer
+// flags, no new run lines should be added - just these flags would go to the
+// respective line depending on where is it turned on and where is it turned
+// off. Feel free to add tests that test only one of the CFG flavors if you're
+// not sure how the other flavor is supposed to work in your case.
 
 class A {
 public:
@@ -53,20 +63,25 @@
 // CHECK:  [B2 (ENTRY)]
 // CHECK:    Succs (1): B1
 // CHECK:  [B1]
-// CHECK:    1:  (CXXConstructExpr, class A)
+// WARNINGS:    1:  (CXXConstructExpr, class A)
+// ANALYZER:    1:  (CXXConstructExpr, A() (Base initializer), class A)
 // CHECK:    2: A([B1.1]) (Base initializer)
-// CHECK:    3:  (CXXConstructExpr, class C)
+// WARNINGS:    3:  (CXXConstructExpr, class C)
+// ANALYZER:    3:  (CXXConstructExpr, C() (Base initializer), class C)
 // CHECK:    4: C([B1.3]) (Base initializer)
-// CHECK:    5:  (CXXConstructExpr, class B)
+// WARNINGS:    5:  (CXXConstructExpr, class B)
+// ANALYZER:    5:  (CXXConstructExpr, B() (Base initializer), class B)
 // CHECK:    6: B([B1.5]) (Base initializer)
-// CHECK:    7:  (CXXConstructExpr, class A)
+// WARNINGS:    7:  (CXXConstructExpr, class A)
+// ANALYZER:    7:  (CXXConstructExpr, A() (Base initializer), class A)
 // CHECK:    8: A([B1.7]) (Base initializer)
 // CHECK:    9: /*implicit*/(int)0
 // CHECK:   10: i([B1.9]) (Member initializer)
 // CHECK:   11: this
 // CHECK:   12: [B1.11]->i
 // CHECK:   13: r([B1.12]) (Member initializer)
-// CHECK:   14:  (CXXConstructExpr, class A)
+// WARNINGS:   14:  (CXXConstructExpr, class A)
+// ANALYZER:   14:  (CXXConstructExpr, [B1.15], class A)
 // CHECK:   15: A a;
 // CHECK:    Preds (1): B2
 // CHECK:    Succs (1): B0
@@ -107,7 +122,8 @@
 // CHECK:  [B1]
 // CHECK:    1: 2
 // CHECK:    2: 3
-// CHECK:    3: [B1.1], [B1.2] (CXXConstructExpr, class TestDelegating)
+// WARNINGS:    3: [B1.1], [B1.2] (CXXConstructExpr, class TestDelegating)
+// ANALYZER:    3: [B1.1], [B1.2] (CXXConstructExpr, TestDelegating([B1.1], [B1.2]) (Delegating initializer), class TestDelegating)
 // CHECK:    4: TestDelegating([B1.3]) (Delegating initializer)
 // CHECK:    Preds (1): B2
 // CHECK:    Succs (1): B0
diff --git a/test/Analysis/inline.cpp b/test/Analysis/inline.cpp
index 76eee5b..c362fc9 100644
--- a/test/Analysis/inline.cpp
+++ b/test/Analysis/inline.cpp
@@ -315,17 +315,13 @@
     int value;
 
     IntWrapper(int input) : value(input) {
-      // We don't want this constructor to be inlined unless we can actually
-      // use the proper region for operator new.
-      // See PR12014 and <rdar://problem/12180598>.
-      clang_analyzer_checkInlined(false); // no-warning
+      clang_analyzer_checkInlined(true); // expected-warning{{TRUE}}
     }
   };
 
   void test() {
     IntWrapper *obj = new IntWrapper(42);
-    // should be TRUE
-    clang_analyzer_eval(obj->value == 42); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(obj->value == 42); // expected-warning{{TRUE}}
     delete obj;
   }
 
@@ -335,8 +331,9 @@
 
     clang_analyzer_eval(alias == obj); // expected-warning{{TRUE}}
 
-    // should be TRUE
-    clang_analyzer_eval(obj->value == 42); // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(obj->value == 42); // expected-warning{{TRUE}}
+    // Because malloc() was never free()d:
+    // expected-warning@-2{{Potential leak of memory pointed to by 'alias'}}
   }
 }
 
diff --git a/test/Analysis/inlining/containers.cpp b/test/Analysis/inlining/containers.cpp
index 16e006b..ffa3b51 100644
--- a/test/Analysis/inlining/containers.cpp
+++ b/test/Analysis/inlining/containers.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=destructors -analyzer-config c++-container-inlining=false -verify %s
 // RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=destructors -analyzer-config c++-container-inlining=true -DINLINE=1 -verify %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=destructors -analyzer-config c++-container-inlining=false -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=destructors -analyzer-config c++-container-inlining=true -DTEST_INLINABLE_ALLOCATORS -DINLINE=1 -verify %s
 
 #ifndef HEADER
 
diff --git a/test/Analysis/inlining/false-positive-suppression.c b/test/Analysis/inlining/false-positive-suppression.c
index 4931695..4d472e0 100644
--- a/test/Analysis/inlining/false-positive-suppression.c
+++ b/test/Analysis/inlining/false-positive-suppression.c
@@ -111,14 +111,12 @@
   *p = 1; // no-warning
 }
 
-// If there is a check in a macro that is not function-like, don't treat
-// it like a function so don't suppress.
 #define NON_FUNCTION_MACRO_WITH_CHECK ( ((p) != 0) ? *p : 17)
 void testNonFunctionMacro(int *p) {
   int i = NON_FUNCTION_MACRO_WITH_CHECK ;
   (void)i;
 
-  *p = 1; // expected-warning {{Dereference of null pointer (loaded from variable 'p')}}
+  *p = 1; // no-warning
 }
 
 
@@ -163,6 +161,7 @@
 }
 
 
+
 // Here the check is entirely in non-macro code even though the code itself
 // is a macro argument.
 #define MACRO_DO_IT(a) (a)
@@ -171,6 +170,15 @@
   (void)i;
 }
 
+// No warning should be emitted if dereference is performed from a different
+// macro.
+#define MACRO_CHECK(a) if (a) {}
+#define MACRO_DEREF(a) (*a)
+int testDifferentMacro(int *p) {
+  MACRO_CHECK(p);
+  return MACRO_DEREF(p); // no-warning
+}
+
 // --------------------------
 // "Suppression suppression"
 // --------------------------
diff --git a/test/Analysis/inlining/false-positive-suppression.m b/test/Analysis/inlining/false-positive-suppression.m
index f3532e5..25fe40f 100644
--- a/test/Analysis/inlining/false-positive-suppression.m
+++ b/test/Analysis/inlining/false-positive-suppression.m
@@ -5,7 +5,7 @@
 
 #define ARC __has_feature(objc_arc)
 
-#if defined(SUPPRESSED) && !ARC
+#ifdef SUPPRESSED
 // expected-no-diagnostics
 #endif
 
@@ -27,9 +27,8 @@
 
 void testNilReceiverHelperB(int *x) {
   *x = 1;
-// FIXME: Suppression for this case isn't working under ARC. It should.
-#if !defined(SUPPRESSED) || (defined(SUPPRESSED) && ARC)
-  // expected-warning@-3 {{Dereference of null pointer}}
+#if !defined(SUPPRESSED)
+  // expected-warning@-2 {{Dereference of null pointer}}
 #endif
 }
 
diff --git a/test/Analysis/inlining/inline-defensive-checks.c b/test/Analysis/inlining/inline-defensive-checks.c
index 9f211b5..ca61395 100644
--- a/test/Analysis/inlining/inline-defensive-checks.c
+++ b/test/Analysis/inlining/inline-defensive-checks.c
@@ -190,3 +190,54 @@
   idc(s);
   *(&(s->a[0])) = 7; // no-warning
 }
+
+void idcTrackConstraintThroughSymbolicRegion(int **x) {
+  idc(*x);
+  // FIXME: Should not warn.
+  **x = 7; // expected-warning{{Dereference of null pointer}}
+}
+
+void idcTrackConstraintThroughSymbolicRegionAndParens(int **x) {
+  idc(*x);
+  // FIXME: Should not warn.
+  *(*x) = 7; // expected-warning{{Dereference of null pointer}}
+}
+
+int *idcPlainNull(int coin) {
+  if (coin)
+    return 0;
+  static int X;
+  return &X;
+}
+
+void idcTrackZeroValueThroughSymbolicRegion(int coin, int **x) {
+  *x = idcPlainNull(coin);
+  **x = 7; // no-warning
+}
+
+void idcTrackZeroValueThroughSymbolicRegionAndParens(int coin, int **x) {
+  *x = idcPlainNull(coin);
+  *(*x) = 7; // no-warning
+}
+
+struct WithInt {
+  int i;
+};
+
+struct WithArray {
+  struct WithInt arr[1];
+};
+
+struct WithArray *idcPlainNullWithArray(int coin) {
+  if (coin)
+    return 0;
+  static struct WithArray S;
+  return &S;
+}
+
+void idcTrackZeroValueThroughSymbolicRegionWithArray(int coin, struct WithArray **s) {
+  *s = idcPlainNullWithArray(coin);
+  (*s)->arr[0].i = 1; // no-warning
+  // Same thing.
+  (*s)->arr->i = 1; // no-warning
+}
diff --git a/test/Analysis/inlining/path-notes.m b/test/Analysis/inlining/path-notes.m
index 7f60ff8..5bfd41d 100644
--- a/test/Analysis/inlining/path-notes.m
+++ b/test/Analysis/inlining/path-notes.m
@@ -136,11 +136,19 @@
   // expected-note@-1 {{Dereference of null pointer (loaded from variable 'x')}}
 }
 
-void testNilReceiver(id *x) {
-  if (*x) { // expected-note {{Assuming the condition is false}}
+void testNilReceiver(id *x, id *y, id *z) {
+  // FIXME: Should say "Assuming pointer value is null" instead.
+  // For some reason we're displaying different notes for
+  // tracked and untracked pointers.
+  if (*y) {} // expected-note    {{Assuming the condition is false}}
+             // expected-note@-1 {{Taking false branch}}
+  if (*x) { // expected-note {{Assuming pointer value is null}}
     // expected-note@-1 {{Taking false branch}}
     return;
   }
+  // FIXME: Should say "Assuming pointer value is null" instead.
+  if (*z) {} // expected-note    {{Assuming the condition is false}}
+             // expected-note@-1 {{Taking false branch}}
   testNilReceiverHelper([*x getPtr]);
   // expected-note@-1 {{'getPtr' not called because the receiver is nil}}
   // expected-note@-2 {{Passing null pointer value via 1st parameter 'x'}}
@@ -1158,12 +1166,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1171,12 +1179,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1188,7 +1196,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>140</integer>
+// CHECK-NEXT:       <key>line</key><integer>143</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1196,12 +1204,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>140</integer>
+// CHECK-NEXT:          <key>line</key><integer>143</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>140</integer>
+// CHECK-NEXT:          <key>line</key><integer>143</integer>
 // CHECK-NEXT:          <key>col</key><integer>8</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1221,12 +1229,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>140</integer>
+// CHECK-NEXT:            <key>line</key><integer>143</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1234,12 +1242,206 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>145</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>145</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>145</integer>
+// CHECK-NEXT:          <key>col</key><integer>8</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Assuming pointer value is null</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Assuming pointer value is null</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>145</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>150</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>150</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>150</integer>
+// CHECK-NEXT:          <key>col</key><integer>8</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Assuming the condition is false</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Assuming the condition is false</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>150</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>23</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1255,12 +1457,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>23</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1268,12 +1470,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>26</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>26</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1285,7 +1487,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>144</integer>
+// CHECK-NEXT:       <key>line</key><integer>152</integer>
 // CHECK-NEXT:       <key>col</key><integer>26</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1293,12 +1495,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>26</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>27</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1318,12 +1520,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>26</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>26</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1331,12 +1533,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>144</integer>
+// CHECK-NEXT:            <key>line</key><integer>152</integer>
 // CHECK-NEXT:            <key>col</key><integer>25</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1348,7 +1550,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>144</integer>
+// CHECK-NEXT:       <key>line</key><integer>152</integer>
 // CHECK-NEXT:       <key>col</key><integer>25</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1356,12 +1558,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>25</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>35</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1377,7 +1579,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>144</integer>
+// CHECK-NEXT:       <key>line</key><integer>152</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1385,12 +1587,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>144</integer>
+// CHECK-NEXT:          <key>line</key><integer>152</integer>
 // CHECK-NEXT:          <key>col</key><integer>36</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1541,12 +1743,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1554,12 +1756,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1571,7 +1773,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>151</integer>
+// CHECK-NEXT:       <key>line</key><integer>159</integer>
 // CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1579,12 +1781,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>151</integer>
+// CHECK-NEXT:          <key>line</key><integer>159</integer>
 // CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>151</integer>
+// CHECK-NEXT:          <key>line</key><integer>159</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1604,12 +1806,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>151</integer>
+// CHECK-NEXT:            <key>line</key><integer>159</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1617,12 +1819,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1638,12 +1840,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1651,12 +1853,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
+// CHECK-NEXT:            <key>line</key><integer>161</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1668,7 +1870,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>153</integer>
+// CHECK-NEXT:       <key>line</key><integer>161</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1676,12 +1878,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>153</integer>
+// CHECK-NEXT:          <key>line</key><integer>161</integer>
 // CHECK-NEXT:          <key>col</key><integer>19</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>153</integer>
+// CHECK-NEXT:          <key>line</key><integer>161</integer>
 // CHECK-NEXT:          <key>col</key><integer>23</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1705,7 +1907,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>153</integer>
+// CHECK-NEXT:    <key>line</key><integer>161</integer>
 // CHECK-NEXT:    <key>col</key><integer>10</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -1721,12 +1923,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>159</integer>
+// CHECK-NEXT:            <key>line</key><integer>167</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>159</integer>
+// CHECK-NEXT:            <key>line</key><integer>167</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1734,12 +1936,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>160</integer>
+// CHECK-NEXT:            <key>line</key><integer>168</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>160</integer>
+// CHECK-NEXT:            <key>line</key><integer>168</integer>
 // CHECK-NEXT:            <key>col</key><integer>15</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1755,12 +1957,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>160</integer>
+// CHECK-NEXT:            <key>line</key><integer>168</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>160</integer>
+// CHECK-NEXT:            <key>line</key><integer>168</integer>
 // CHECK-NEXT:            <key>col</key><integer>15</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1768,12 +1970,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1789,12 +1991,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1802,12 +2004,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1819,7 +2021,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>162</integer>
+// CHECK-NEXT:       <key>line</key><integer>170</integer>
 // CHECK-NEXT:       <key>col</key><integer>12</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1827,12 +2029,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>34</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1852,12 +2054,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>12</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1865,12 +2067,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1882,7 +2084,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>162</integer>
+// CHECK-NEXT:       <key>line</key><integer>170</integer>
 // CHECK-NEXT:       <key>col</key><integer>11</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1890,24 +2092,24 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>47</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>34</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1927,12 +2129,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>11</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1940,12 +2142,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1957,7 +2159,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>162</integer>
+// CHECK-NEXT:       <key>line</key><integer>170</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1965,24 +2167,24 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>60</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>162</integer>
+// CHECK-NEXT:          <key>line</key><integer>170</integer>
 // CHECK-NEXT:          <key>col</key><integer>47</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -2002,12 +2204,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>162</integer>
+// CHECK-NEXT:            <key>line</key><integer>170</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2015,12 +2217,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>167</integer>
+// CHECK-NEXT:            <key>line</key><integer>175</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>167</integer>
+// CHECK-NEXT:            <key>line</key><integer>175</integer>
 // CHECK-NEXT:            <key>col</key><integer>15</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2036,12 +2238,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>167</integer>
+// CHECK-NEXT:            <key>line</key><integer>175</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>167</integer>
+// CHECK-NEXT:            <key>line</key><integer>175</integer>
 // CHECK-NEXT:            <key>col</key><integer>15</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2049,12 +2251,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>169</integer>
+// CHECK-NEXT:            <key>line</key><integer>177</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>169</integer>
+// CHECK-NEXT:            <key>line</key><integer>177</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -2066,7 +2268,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>169</integer>
+// CHECK-NEXT:       <key>line</key><integer>177</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -2074,12 +2276,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
+// CHECK-NEXT:          <key>line</key><integer>177</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
+// CHECK-NEXT:          <key>line</key><integer>177</integer>
 // CHECK-NEXT:          <key>col</key><integer>10</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -2103,7 +2305,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>169</integer>
+// CHECK-NEXT:    <key>line</key><integer>177</integer>
 // CHECK-NEXT:    <key>col</key><integer>3</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
diff --git a/test/Analysis/inlining/temp-dtors-path-notes.cpp b/test/Analysis/inlining/temp-dtors-path-notes.cpp
new file mode 100644
index 0000000..6d1a42f
--- /dev/null
+++ b/test/Analysis/inlining/temp-dtors-path-notes.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker core -analyzer-config cfg-temporary-dtors=true,c++-temp-dtor-inlining=true -analyzer-output=text -verify %s
+
+namespace test_simple_temporary {
+class C {
+  int x;
+
+public:
+  C(int x): x(x) {} // expected-note{{The value 0 is assigned to field 'x'}}
+  ~C() { x = 1 / x; } // expected-warning{{Division by zero}}
+                      // expected-note@-1{{Division by zero}}
+};
+
+void test() {
+  C(0); // expected-note   {{Passing the value 0 via 1st parameter 'x'}}
+        // expected-note@-1{{Calling constructor for 'C'}}
+        // expected-note@-2{{Returning from constructor for 'C'}}
+        // expected-note@-3{{Calling '~C'}}
+}
+} // end namespace test_simple_temporary
+
+namespace test_lifetime_extended_temporary {
+class C {
+  int x;
+
+public:
+  C(int x): x(x) {} // expected-note{{The value 0 is assigned to field 'x'}}
+  void nop() const {}
+  ~C() { x = 1 / x; } // expected-warning{{Division by zero}}
+                      // expected-note@-1{{Division by zero}}
+};
+
+void test(int coin) {
+  // We'd divide by zero in the temporary destructor that goes after the
+  // elidable copy-constructor from C(0) to the lifetime-extended temporary.
+  // So in fact this example has nothing to do with lifetime extension.
+  // Actually, it would probably be better to elide the constructor, and
+  // put the note for the destructor call at the closing brace after nop.
+  const C &c = coin ? C(1) : C(0); // expected-note   {{Assuming 'coin' is 0}}
+                                   // expected-note@-1{{'?' condition is false}}
+                                   // expected-note@-2{{Passing the value 0 via 1st parameter 'x'}}
+                                   // expected-note@-3{{Calling constructor for 'C'}}
+                                   // expected-note@-4{{Returning from constructor for 'C'}}
+                                   // expected-note@-5{{Calling '~C'}}
+  c.nop();
+}
+} // end namespace test_lifetime_extended_temporary
+
+namespace test_bug_after_dtor {
+int glob;
+
+class C {
+public:
+  C() { glob += 1; }
+  ~C() { glob -= 2; } // expected-note{{The value 0 is assigned to 'glob'}}
+};
+
+void test() {
+  glob = 1;
+  C(); // expected-note   {{Calling '~C'}}
+       // expected-note@-1{{Returning from '~C'}}
+  glob = 1 / glob; // expected-warning{{Division by zero}}
+                   // expected-note@-1{{Division by zero}}
+}
+} // end namespace test_bug_after_dtor
diff --git a/test/Analysis/lambdas.cpp b/test/Analysis/lambdas.cpp
index f3ff9b9..38a2e3a 100644
--- a/test/Analysis/lambdas.cpp
+++ b/test/Analysis/lambdas.cpp
@@ -337,6 +337,16 @@
   lambda2();
 }
 
+void testCapturedConstExprFloat() {
+  constexpr float localConstant = 4.0;
+  auto lambda = []{
+    // Don't treat localConstant as containing a garbage value
+    float copy = localConstant; // no-warning
+    (void)copy;
+  };
+
+  lambda();
+}
 
 // CHECK: [B2 (ENTRY)]
 // CHECK:   Succs (1): B1
diff --git a/test/Analysis/lifetime-cfg-output.cpp b/test/Analysis/lifetime-cfg-output.cpp
index 1e6f56d..dc2849d 100644
--- a/test/Analysis/lifetime-cfg-output.cpp
+++ b/test/Analysis/lifetime-cfg-output.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -analyze -analyzer-checker=debug.DumpCFG -analyzer-config cfg-lifetime=true -analyzer-config cfg-implicit-dtors=false %s > %t 2>&1
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -analyze -analyzer-checker=debug.DumpCFG -analyzer-config cfg-lifetime=true,cfg-temporary-dtors=false,cfg-rich-constructors=false -analyzer-config cfg-implicit-dtors=false %s > %t 2>&1
 // RUN: FileCheck --input-file=%t %s
 
 extern bool UV;
diff --git a/test/Analysis/lifetime-extension.cpp b/test/Analysis/lifetime-extension.cpp
index 5e3c5dd..53c8edb 100644
--- a/test/Analysis/lifetime-extension.cpp
+++ b/test/Analysis/lifetime-extension.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_analyze_cc1 -Wno-unused -std=c++11 -analyzer-checker=debug.ExprInspection -verify %s
+// RUN: %clang_analyze_cc1 -Wno-unused -std=c++11 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=false -verify %s
+// RUN: %clang_analyze_cc1 -Wno-unused -std=c++11 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=true,c++-temp-dtor-inlining=true -DTEMPORARIES -verify %s
 
 void clang_analyzer_eval(bool);
 
@@ -44,3 +45,138 @@
   clang_analyzer_eval(z == 2); // expected-warning{{UNKNOWN}}
 }
 } // end namespace pr19539_crash_on_destroying_an_integer
+
+namespace maintain_original_object_address_on_lifetime_extension {
+class C {
+  C **after, **before;
+
+public:
+  bool x;
+
+  C(bool x, C **after, C **before) : x(x), after(after), before(before) {
+    *before = this;
+  }
+
+  // Don't track copies in our tests.
+  C(const C &c) : x(c.x), after(nullptr), before(nullptr) {}
+
+  ~C() { if (after) *after = this; }
+
+  operator bool() const { return x; }
+};
+
+void f1() {
+  C *after, *before;
+  {
+    const C &c = C(true, &after, &before);
+  }
+  clang_analyzer_eval(after == before);
+#ifdef TEMPORARIES
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+}
+
+void f2() {
+  C *after, *before;
+  C c = C(1, &after, &before);
+  clang_analyzer_eval(after == before);
+#ifdef TEMPORARIES
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+}
+
+void f3(bool coin) {
+  C *after, *before;
+  {
+    const C &c = coin ? C(true, &after, &before) : C(false, &after, &before);
+  }
+  clang_analyzer_eval(after == before);
+#ifdef TEMPORARIES
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+}
+
+void f4(bool coin) {
+  C *after, *before;
+  {
+    // no-crash
+    const C &c = C(coin, &after, &before) ?: C(false, &after, &before);
+  }
+  // FIXME: Add support for lifetime extension through binary conditional
+  // operator. Ideally also add support for the binary conditional operator in
+  // C++. Because for now it calls the constructor for the condition twice.
+  if (coin) {
+    clang_analyzer_eval(after == before);
+#ifdef TEMPORARIES
+  // expected-warning@-2{{The left operand of '==' is a garbage value}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+  } else {
+    clang_analyzer_eval(after == before);
+#ifdef TEMPORARIES
+    // Seems to work at the moment, but also seems accidental.
+    // Feel free to break.
+  // expected-warning@-4{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+  }
+}
+
+void f5() {
+  C *after, *before;
+  {
+    const bool &x = C(true, &after, &before).x; // no-crash
+  }
+  // FIXME: Should be TRUE. Should not warn about garbage value.
+  clang_analyzer_eval(after == before); // expected-warning{{UNKNOWN}}
+}
+} // end namespace maintain_original_object_address_on_lifetime_extension
+
+namespace maintain_original_object_address_on_move {
+class C {
+  int *x;
+
+public:
+  C() : x(nullptr) {}
+  C(int *x) : x(x) {}
+  C(const C &c) = delete;
+  C(C &&c) : x(c.x) { c.x = nullptr; }
+  C &operator=(C &&c) {
+    x = c.x;
+    c.x = nullptr;
+    return *this;
+  }
+  ~C() {
+    // This was triggering the division by zero warning in f1() and f2():
+    // Because move-elision materialization was incorrectly causing the object
+    // to be relocated from one address to another before move, but destructor
+    // was operating on the old address, it was still thinking that 'x' is set.
+    if (x)
+      *x = 0;
+  }
+};
+
+void f1() {
+  int x = 1;
+  // &x is replaced with nullptr in move-constructor before the temporary dies.
+  C c = C(&x);
+  // Hence x was not set to 0 yet.
+  1 / x; // no-warning
+}
+void f2() {
+  int x = 1;
+  C c;
+  // &x is replaced with nullptr in move-assignment before the temporary dies.
+  c = C(&x);
+  // Hence x was not set to 0 yet.
+  1 / x; // no-warning
+}
+} // end namespace maintain_original_object_address_on_move
diff --git a/test/Analysis/loop-unrolling.cpp b/test/Analysis/loop-unrolling.cpp
index 8ea5b82..1507acc 100644
--- a/test/Analysis/loop-unrolling.cpp
+++ b/test/Analysis/loop-unrolling.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++11 %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++11 -analyzer-config exploration_strategy=unexplored_first_queue %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify -std=c++11 -DDFS=1 %s
 
 void clang_analyzer_numTimesReached();
 void clang_analyzer_warnIfReached();
@@ -234,7 +235,11 @@
 
 int simple_unknown_bound_loop() {
   for (int i = 2; i < getNum(); i++) {
+#ifdef DFS
     clang_analyzer_numTimesReached(); // expected-warning {{10}}
+#else
+    clang_analyzer_numTimesReached(); // expected-warning {{13}}
+#endif
   }
   return 0;
 }
@@ -373,3 +378,9 @@
   return 0;
 }
 
+
+void pr34943() {
+  for (int i = 0; i < 6L; ++i) {
+    clang_analyzer_numTimesReached(); // expected-warning {{6}}
+  }
+}
diff --git a/test/Analysis/loop-widening-notes.cpp b/test/Analysis/loop-widening-notes.cpp
new file mode 100644
index 0000000..56bde74
--- /dev/null
+++ b/test/Analysis/loop-widening-notes.cpp
@@ -0,0 +1,72 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha -analyzer-max-loop 2 -analyzer-config widen-loops=true -analyzer-output=text -verify %s
+
+int *p_a;
+int bar();
+int flag_a;
+int test_for_bug_25609() {
+  if (p_a == 0) // expected-note {{Assuming 'p_a' is equal to null}} 
+                // expected-note@-1 {{Taking true branch}}
+    bar();
+  for (int i = 0;  // expected-note {{Loop condition is true.  Entering loop body}}                    
+                   // expected-note@-1 {{Loop condition is false. Execution continues on line 16}}
+       ++i,        // expected-note {{Value assigned to 'p_a'}} 
+       i < flag_a;
+       ++i) {}
+                                      
+  *p_a = 25609; // no-crash expected-warning {{Dereference of null pointer (loaded from variable 'p_a')}}
+                // expected-note@-1 {{Dereference of null pointer (loaded from variable 'p_a')}}
+  return *p_a;
+}
+
+int flag_b;
+int while_analyzer_output() {
+  flag_b = 100;
+  int num = 10;
+  while (flag_b-- > 0) { // expected-note {{Loop condition is true.  Entering loop body}} 
+                         // expected-note@-1 {{Value assigned to 'num'}} 
+                         // expected-note@-2 {{Loop condition is false. Execution continues on line 30}}
+    num = flag_b;
+  }
+  if (num < 0) // expected-note {{Assuming 'num' is >= 0}} 
+               // expected-note@-1 {{Taking false branch}}
+    flag_b = 0;
+  else if (num >= 1) // expected-note {{Assuming 'num' is < 1}} 
+                     // expected-note@-1 {{Taking false branch}}
+    flag_b = 50;
+  else
+    flag_b = 100;
+  return flag_b / num; // no-crash expected-warning {{Division by zero}} 
+                       // expected-note@-1 {{Division by zero}}
+}
+
+int flag_c;
+int do_while_analyzer_output() {
+  int num = 10;
+  do {   // expected-note {{Loop condition is true. Execution continues on line 47}} 
+         // expected-note@-1 {{Loop condition is false.  Exiting loop}}
+    num--;
+  } while (flag_c-- > 0); //expected-note {{Value assigned to 'num'}}
+  int local = 0;
+  if (num == 0)       // expected-note {{Assuming 'num' is equal to 0}} 
+                      // expected-note@-1 {{Taking true branch}}
+    local = 10 / num; // no-crash expected-warning {{Division by zero}}
+                      // expected-note@-1 {{Division by zero}}
+  return local;
+}
+
+int flag_d;
+int test_for_loop() {
+  int num = 10;
+  for (int i = 0;    // expected-note {{Loop condition is true.  Entering loop body}} 
+                     // expected-note@-1 {{Loop condition is false. Execution continues on line 67}}
+       new int(10),  // expected-note {{Value assigned to 'num'}}
+       i < flag_d;
+       ++i) {         
+    ++num;
+  }
+  if (num == 0) // expected-note {{Assuming 'num' is equal to 0}} 
+                // expected-note@-1 {{Taking true branch}}
+    flag_d += 10;
+  return flag_d / num; // no-crash expected-warning {{Division by zero}} 
+                       // expected-note@-1 {{Division by zero}}
+}
diff --git a/test/Analysis/malloc-custom.c b/test/Analysis/malloc-custom.c
index f33b150..053d0ee 100644
--- a/test/Analysis/malloc-custom.c
+++ b/test/Analysis/malloc-custom.c
@@ -1,6 +1,6 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -Wno-incompatible-library-redeclaration -verify %s
 
-// Various tests to make the the analyzer is robust against custom
+// Various tests to make the analyzer is robust against custom
 // redeclarations of memory routines.
 //
 // You wouldn't expect to see much of this in normal code, but, for example,
diff --git a/test/Analysis/malloc-fnptr-plist.c b/test/Analysis/malloc-fnptr-plist.c
new file mode 100644
index 0000000..6490eeb
--- /dev/null
+++ b/test/Analysis/malloc-fnptr-plist.c
@@ -0,0 +1,11 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker core,unix.Malloc -analyzer-output=plist -o %t.plist -verify %s
+// RUN: FileCheck --input-file=%t.plist %s
+
+void free(void *);
+void (*fnptr)(int);
+void foo() {
+  free((void *)fnptr); // expected-warning{{Argument to free() is a function pointer}}
+}
+
+// Make sure the bug category is correct.
+// CHECK: <key>category</key><string>Memory error</string>
diff --git a/test/Analysis/malloc-plist.c b/test/Analysis/malloc-plist.c
index e2062e8..513bf77 100644
--- a/test/Analysis/malloc-plist.c
+++ b/test/Analysis/malloc-plist.c
@@ -10,6 +10,7 @@
 void diagnosticTest(int in) {
     if (in > 5) {
         int *p = malloc(12);
+        *p = 0;
         (*p)++;
     }
     in++; // expected-warning {{leak}}
@@ -106,6 +107,7 @@
     int *m = 0;
     int *p;
     p = (int*)malloc(12);
+    *p = 0;
     (*p)++;
     m = p;
     p = 0;
@@ -392,12 +394,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>15</integer>
+// CHECK-NEXT:             <key>line</key><integer>16</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>15</integer>
+// CHECK-NEXT:             <key>line</key><integer>16</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -409,7 +411,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>15</integer>
+// CHECK-NEXT:        <key>line</key><integer>16</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -431,7 +433,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>15</integer>
+// CHECK-NEXT:     <key>line</key><integer>16</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -447,12 +449,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>19</integer>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>19</integer>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -460,12 +462,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -481,12 +483,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -494,12 +496,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>14</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -511,7 +513,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>20</integer>
+// CHECK-NEXT:        <key>line</key><integer>21</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -519,12 +521,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>20</integer>
+// CHECK-NEXT:           <key>line</key><integer>21</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>20</integer>
+// CHECK-NEXT:           <key>line</key><integer>21</integer>
 // CHECK-NEXT:           <key>col</key><integer>30</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -544,12 +546,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>line</key><integer>21</integer>
 // CHECK-NEXT:             <key>col</key><integer>14</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -557,12 +559,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>22</integer>
+// CHECK-NEXT:             <key>line</key><integer>23</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>22</integer>
+// CHECK-NEXT:             <key>line</key><integer>23</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -574,7 +576,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>22</integer>
+// CHECK-NEXT:        <key>line</key><integer>23</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -596,7 +598,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>22</integer>
+// CHECK-NEXT:     <key>line</key><integer>23</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -612,12 +614,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -625,12 +627,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>23</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -642,7 +644,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>25</integer>
+// CHECK-NEXT:        <key>line</key><integer>26</integer>
 // CHECK-NEXT:        <key>col</key><integer>18</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -650,12 +652,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
 // CHECK-NEXT:           <key>col</key><integer>18</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -675,12 +677,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>line</key><integer>26</integer>
 // CHECK-NEXT:             <key>col</key><integer>23</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -688,12 +690,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -709,12 +711,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -722,12 +724,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -739,7 +741,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>27</integer>
+// CHECK-NEXT:        <key>line</key><integer>28</integer>
 // CHECK-NEXT:        <key>col</key><integer>18</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -747,12 +749,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
 // CHECK-NEXT:           <key>col</key><integer>18</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
 // CHECK-NEXT:           <key>col</key><integer>40</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -772,12 +774,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -785,12 +787,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -806,12 +808,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -819,12 +821,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -836,7 +838,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>28</integer>
+// CHECK-NEXT:        <key>line</key><integer>29</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -844,12 +846,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
 // CHECK-NEXT:           <key>col</key><integer>12</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -869,12 +871,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -882,12 +884,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -899,7 +901,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>28</integer>
+// CHECK-NEXT:        <key>line</key><integer>29</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -907,12 +909,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -932,12 +934,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -945,12 +947,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>29</integer>
+// CHECK-NEXT:             <key>line</key><integer>30</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>29</integer>
+// CHECK-NEXT:             <key>line</key><integer>30</integer>
 // CHECK-NEXT:             <key>col</key><integer>14</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -962,7 +964,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>29</integer>
+// CHECK-NEXT:        <key>line</key><integer>30</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -984,7 +986,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>29</integer>
+// CHECK-NEXT:     <key>line</key><integer>30</integer>
 // CHECK-NEXT:     <key>col</key><integer>9</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -1000,12 +1002,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1013,12 +1015,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>21</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1030,7 +1032,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>44</integer>
+// CHECK-NEXT:        <key>line</key><integer>45</integer>
 // CHECK-NEXT:        <key>col</key><integer>15</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1038,12 +1040,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>line</key><integer>45</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>line</key><integer>45</integer>
 // CHECK-NEXT:           <key>col</key><integer>23</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1059,7 +1061,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>35</integer>
+// CHECK-NEXT:        <key>line</key><integer>36</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1077,12 +1079,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>35</integer>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>35</integer>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1090,12 +1092,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1111,12 +1113,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1124,12 +1126,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>13</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1141,7 +1143,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>36</integer>
+// CHECK-NEXT:        <key>line</key><integer>37</integer>
 // CHECK-NEXT:        <key>col</key><integer>13</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1149,12 +1151,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>line</key><integer>37</integer>
 // CHECK-NEXT:           <key>col</key><integer>13</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>line</key><integer>37</integer>
 // CHECK-NEXT:           <key>col</key><integer>23</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1174,12 +1176,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>13</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>line</key><integer>37</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1187,12 +1189,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1208,12 +1210,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1221,12 +1223,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1238,7 +1240,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>38</integer>
+// CHECK-NEXT:        <key>line</key><integer>39</integer>
 // CHECK-NEXT:        <key>col</key><integer>7</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1246,12 +1248,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>line</key><integer>39</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>line</key><integer>39</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1271,12 +1273,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1284,12 +1286,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>39</integer>
+// CHECK-NEXT:             <key>line</key><integer>40</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>39</integer>
+// CHECK-NEXT:             <key>line</key><integer>40</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1301,7 +1303,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>44</integer>
+// CHECK-NEXT:        <key>line</key><integer>45</integer>
 // CHECK-NEXT:        <key>col</key><integer>15</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1309,12 +1311,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>line</key><integer>45</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>line</key><integer>45</integer>
 // CHECK-NEXT:           <key>col</key><integer>23</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1334,12 +1336,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>line</key><integer>45</integer>
 // CHECK-NEXT:             <key>col</key><integer>21</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1347,12 +1349,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>46</integer>
+// CHECK-NEXT:             <key>line</key><integer>47</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>46</integer>
+// CHECK-NEXT:             <key>line</key><integer>47</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1364,7 +1366,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>46</integer>
+// CHECK-NEXT:        <key>line</key><integer>47</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1386,7 +1388,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>46</integer>
+// CHECK-NEXT:     <key>line</key><integer>47</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -1402,12 +1404,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>60</integer>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>60</integer>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1415,12 +1417,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1432,7 +1434,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>61</integer>
+// CHECK-NEXT:        <key>line</key><integer>62</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1440,12 +1442,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>line</key><integer>62</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>line</key><integer>62</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1461,7 +1463,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>53</integer>
+// CHECK-NEXT:        <key>line</key><integer>54</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1479,12 +1481,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>53</integer>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>53</integer>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1492,12 +1494,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1513,12 +1515,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1526,12 +1528,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1543,7 +1545,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>54</integer>
+// CHECK-NEXT:        <key>line</key><integer>55</integer>
 // CHECK-NEXT:        <key>col</key><integer>10</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1551,12 +1553,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>54</integer>
+// CHECK-NEXT:           <key>line</key><integer>55</integer>
 // CHECK-NEXT:           <key>col</key><integer>10</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>54</integer>
+// CHECK-NEXT:           <key>line</key><integer>55</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1576,12 +1578,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1589,12 +1591,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1610,12 +1612,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1623,12 +1625,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1640,7 +1642,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>55</integer>
+// CHECK-NEXT:        <key>line</key><integer>56</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1648,12 +1650,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>55</integer>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>55</integer>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
 // CHECK-NEXT:           <key>col</key><integer>10</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1673,12 +1675,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1686,12 +1688,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
 // CHECK-NEXT:             <key>col</key><integer>13</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1703,7 +1705,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>56</integer>
+// CHECK-NEXT:        <key>line</key><integer>57</integer>
 // CHECK-NEXT:        <key>col</key><integer>7</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1711,12 +1713,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>57</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>57</integer>
 // CHECK-NEXT:           <key>col</key><integer>17</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1732,7 +1734,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>50</integer>
+// CHECK-NEXT:        <key>line</key><integer>51</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1750,12 +1752,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>50</integer>
+// CHECK-NEXT:             <key>line</key><integer>51</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>50</integer>
+// CHECK-NEXT:             <key>line</key><integer>51</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1763,12 +1765,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>51</integer>
+// CHECK-NEXT:             <key>line</key><integer>52</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>51</integer>
+// CHECK-NEXT:             <key>line</key><integer>52</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1780,7 +1782,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>51</integer>
+// CHECK-NEXT:        <key>line</key><integer>52</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1788,12 +1790,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
 // CHECK-NEXT:           <key>col</key><integer>11</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1809,7 +1811,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>56</integer>
+// CHECK-NEXT:        <key>line</key><integer>57</integer>
 // CHECK-NEXT:        <key>col</key><integer>7</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -1817,12 +1819,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>57</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>57</integer>
 // CHECK-NEXT:           <key>col</key><integer>17</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1842,12 +1844,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
 // CHECK-NEXT:             <key>col</key><integer>13</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1855,75 +1857,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>57</integer>
+// CHECK-NEXT:             <key>line</key><integer>58</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>57</integer>
-// CHECK-NEXT:             <key>col</key><integer>10</integer>
-// CHECK-NEXT:             <key>file</key><integer>0</integer>
-// CHECK-NEXT:            </dict>
-// CHECK-NEXT:           </array>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>kind</key><string>event</string>
-// CHECK-NEXT:       <key>location</key>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>61</integer>
-// CHECK-NEXT:        <key>col</key><integer>5</integer>
-// CHECK-NEXT:        <key>file</key><integer>0</integer>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:       <key>ranges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>61</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>61</integer>
-// CHECK-NEXT:           <key>col</key><integer>28</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:       <key>depth</key><integer>0</integer>
-// CHECK-NEXT:       <key>extended_message</key>
-// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:       <key>message</key>
-// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>kind</key><string>control</string>
-// CHECK-NEXT:       <key>edges</key>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>start</key>
-// CHECK-NEXT:           <array>
-// CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>61</integer>
-// CHECK-NEXT:             <key>col</key><integer>5</integer>
-// CHECK-NEXT:             <key>file</key><integer>0</integer>
-// CHECK-NEXT:            </dict>
-// CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>61</integer>
-// CHECK-NEXT:             <key>col</key><integer>22</integer>
-// CHECK-NEXT:             <key>file</key><integer>0</integer>
-// CHECK-NEXT:            </dict>
-// CHECK-NEXT:           </array>
-// CHECK-NEXT:          <key>end</key>
-// CHECK-NEXT:           <array>
-// CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>62</integer>
-// CHECK-NEXT:             <key>col</key><integer>5</integer>
-// CHECK-NEXT:             <key>file</key><integer>0</integer>
-// CHECK-NEXT:            </dict>
-// CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>62</integer>
+// CHECK-NEXT:             <key>line</key><integer>58</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -1944,11 +1883,74 @@
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
 // CHECK-NEXT:           <key>line</key><integer>62</integer>
-// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
 // CHECK-NEXT:           <key>line</key><integer>62</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>63</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>63</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>63</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>63</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>63</integer>
 // CHECK-NEXT:           <key>col</key><integer>14</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1972,7 +1974,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>62</integer>
+// CHECK-NEXT:     <key>line</key><integer>63</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -1988,12 +1990,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2001,12 +2003,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>25</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>30</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2018,7 +2020,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>75</integer>
+// CHECK-NEXT:        <key>line</key><integer>76</integer>
 // CHECK-NEXT:        <key>col</key><integer>25</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2026,12 +2028,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>75</integer>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>75</integer>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
 // CHECK-NEXT:           <key>col</key><integer>35</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2051,12 +2053,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>25</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
 // CHECK-NEXT:             <key>col</key><integer>30</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2064,12 +2066,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
 // CHECK-NEXT:             <key>col</key><integer>11</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2081,7 +2083,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>76</integer>
+// CHECK-NEXT:        <key>line</key><integer>77</integer>
 // CHECK-NEXT:        <key>col</key><integer>11</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2089,12 +2091,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>line</key><integer>77</integer>
 // CHECK-NEXT:           <key>col</key><integer>11</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>line</key><integer>77</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2110,7 +2112,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>66</integer>
+// CHECK-NEXT:        <key>line</key><integer>67</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2128,12 +2130,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>66</integer>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>66</integer>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2141,12 +2143,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2162,12 +2164,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2175,12 +2177,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2196,12 +2198,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2209,12 +2211,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2226,7 +2228,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>68</integer>
+// CHECK-NEXT:        <key>line</key><integer>69</integer>
 // CHECK-NEXT:        <key>col</key><integer>18</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2234,12 +2236,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>68</integer>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
 // CHECK-NEXT:           <key>col</key><integer>18</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>68</integer>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
 // CHECK-NEXT:           <key>col</key><integer>40</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2259,12 +2261,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>18</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2272,12 +2274,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2293,12 +2295,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2306,12 +2308,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2323,7 +2325,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>69</integer>
+// CHECK-NEXT:        <key>line</key><integer>70</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2331,12 +2333,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>line</key><integer>70</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>line</key><integer>70</integer>
 // CHECK-NEXT:           <key>col</key><integer>12</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2356,12 +2358,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2369,12 +2371,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2386,7 +2388,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>69</integer>
+// CHECK-NEXT:        <key>line</key><integer>70</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2394,12 +2396,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>line</key><integer>70</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>line</key><integer>70</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2419,12 +2421,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2432,12 +2434,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>70</integer>
+// CHECK-NEXT:             <key>line</key><integer>71</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>70</integer>
+// CHECK-NEXT:             <key>line</key><integer>71</integer>
 // CHECK-NEXT:             <key>col</key><integer>14</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2449,7 +2451,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>76</integer>
+// CHECK-NEXT:        <key>line</key><integer>77</integer>
 // CHECK-NEXT:        <key>col</key><integer>11</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2457,12 +2459,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>line</key><integer>77</integer>
 // CHECK-NEXT:           <key>col</key><integer>11</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>line</key><integer>77</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2482,12 +2484,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
 // CHECK-NEXT:             <key>col</key><integer>11</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2495,12 +2497,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>77</integer>
+// CHECK-NEXT:             <key>line</key><integer>78</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>77</integer>
+// CHECK-NEXT:             <key>line</key><integer>78</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2512,7 +2514,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>77</integer>
+// CHECK-NEXT:        <key>line</key><integer>78</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2534,7 +2536,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>77</integer>
+// CHECK-NEXT:     <key>line</key><integer>78</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -2550,12 +2552,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>85</integer>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>85</integer>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2563,12 +2565,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
 // CHECK-NEXT:             <key>col</key><integer>26</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2580,7 +2582,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>86</integer>
+// CHECK-NEXT:        <key>line</key><integer>87</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2588,12 +2590,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>line</key><integer>87</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>line</key><integer>87</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2609,7 +2611,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>81</integer>
+// CHECK-NEXT:        <key>line</key><integer>82</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2627,12 +2629,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>81</integer>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>81</integer>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2640,12 +2642,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2661,12 +2663,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2674,12 +2676,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>19</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>line</key><integer>83</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2691,7 +2693,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>82</integer>
+// CHECK-NEXT:        <key>line</key><integer>83</integer>
 // CHECK-NEXT:        <key>col</key><integer>19</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2699,12 +2701,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>82</integer>
+// CHECK-NEXT:           <key>line</key><integer>83</integer>
 // CHECK-NEXT:           <key>col</key><integer>19</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>82</integer>
+// CHECK-NEXT:           <key>line</key><integer>83</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2720,7 +2722,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>86</integer>
+// CHECK-NEXT:        <key>line</key><integer>87</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2728,12 +2730,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>line</key><integer>87</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>line</key><integer>87</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2753,12 +2755,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
 // CHECK-NEXT:             <key>col</key><integer>26</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2766,12 +2768,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>87</integer>
+// CHECK-NEXT:             <key>line</key><integer>88</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>87</integer>
+// CHECK-NEXT:             <key>line</key><integer>88</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2783,7 +2785,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>87</integer>
+// CHECK-NEXT:        <key>line</key><integer>88</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2805,7 +2807,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>87</integer>
+// CHECK-NEXT:     <key>line</key><integer>88</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -2821,12 +2823,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>97</integer>
+// CHECK-NEXT:             <key>line</key><integer>98</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>97</integer>
+// CHECK-NEXT:             <key>line</key><integer>98</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2834,12 +2836,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2855,12 +2857,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2868,12 +2870,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>12</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>17</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2885,7 +2887,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>99</integer>
+// CHECK-NEXT:        <key>line</key><integer>100</integer>
 // CHECK-NEXT:        <key>col</key><integer>12</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2893,12 +2895,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>99</integer>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
 // CHECK-NEXT:           <key>col</key><integer>12</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>99</integer>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
 // CHECK-NEXT:           <key>col</key><integer>30</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2918,12 +2920,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>12</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
 // CHECK-NEXT:             <key>col</key><integer>17</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2931,12 +2933,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -2948,7 +2950,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>100</integer>
+// CHECK-NEXT:        <key>line</key><integer>101</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2956,12 +2958,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
 // CHECK-NEXT:           <key>col</key><integer>35</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2977,7 +2979,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>92</integer>
+// CHECK-NEXT:        <key>line</key><integer>93</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -2995,12 +2997,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>92</integer>
+// CHECK-NEXT:             <key>line</key><integer>93</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>92</integer>
+// CHECK-NEXT:             <key>line</key><integer>93</integer>
 // CHECK-NEXT:             <key>col</key><integer>4</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3008,12 +3010,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>93</integer>
+// CHECK-NEXT:             <key>line</key><integer>94</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>93</integer>
+// CHECK-NEXT:             <key>line</key><integer>94</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3025,7 +3027,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>93</integer>
+// CHECK-NEXT:        <key>line</key><integer>94</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3033,12 +3035,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>line</key><integer>94</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>line</key><integer>94</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3054,7 +3056,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>100</integer>
+// CHECK-NEXT:        <key>line</key><integer>101</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3062,12 +3064,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
 // CHECK-NEXT:           <key>col</key><integer>35</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3087,12 +3089,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3100,12 +3102,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>101</integer>
+// CHECK-NEXT:             <key>line</key><integer>102</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>101</integer>
+// CHECK-NEXT:             <key>line</key><integer>102</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3117,7 +3119,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>101</integer>
+// CHECK-NEXT:        <key>line</key><integer>102</integer>
 // CHECK-NEXT:        <key>col</key><integer>6</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3125,12 +3127,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>101</integer>
+// CHECK-NEXT:           <key>line</key><integer>102</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>101</integer>
+// CHECK-NEXT:           <key>line</key><integer>102</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3154,7 +3156,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>101</integer>
+// CHECK-NEXT:     <key>line</key><integer>102</integer>
 // CHECK-NEXT:     <key>col</key><integer>6</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -3170,12 +3172,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>106</integer>
+// CHECK-NEXT:             <key>line</key><integer>107</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>106</integer>
+// CHECK-NEXT:             <key>line</key><integer>107</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3183,12 +3185,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3204,12 +3206,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3217,12 +3219,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3234,7 +3236,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>108</integer>
+// CHECK-NEXT:        <key>line</key><integer>109</integer>
 // CHECK-NEXT:        <key>col</key><integer>15</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3242,12 +3244,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>line</key><integer>109</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>line</key><integer>109</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3267,12 +3269,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>15</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>line</key><integer>109</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3280,12 +3282,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>113</integer>
+// CHECK-NEXT:             <key>line</key><integer>115</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>113</integer>
+// CHECK-NEXT:             <key>line</key><integer>115</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3297,7 +3299,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>113</integer>
+// CHECK-NEXT:        <key>line</key><integer>115</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3319,7 +3321,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>113</integer>
+// CHECK-NEXT:     <key>line</key><integer>115</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -3331,7 +3333,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>121</integer>
+// CHECK-NEXT:        <key>line</key><integer>123</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3339,12 +3341,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3360,7 +3362,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>117</integer>
+// CHECK-NEXT:        <key>line</key><integer>119</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3378,12 +3380,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>117</integer>
+// CHECK-NEXT:             <key>line</key><integer>119</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>117</integer>
+// CHECK-NEXT:             <key>line</key><integer>119</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3391,12 +3393,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3412,12 +3414,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3425,12 +3427,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3442,7 +3444,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>118</integer>
+// CHECK-NEXT:        <key>line</key><integer>120</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3450,12 +3452,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>118</integer>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>118</integer>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3475,12 +3477,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>line</key><integer>120</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3488,12 +3490,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>119</integer>
+// CHECK-NEXT:             <key>line</key><integer>121</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>119</integer>
+// CHECK-NEXT:             <key>line</key><integer>121</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3505,7 +3507,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>119</integer>
+// CHECK-NEXT:        <key>line</key><integer>121</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3527,7 +3529,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>119</integer>
+// CHECK-NEXT:     <key>line</key><integer>121</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -3539,7 +3541,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>130</integer>
+// CHECK-NEXT:        <key>line</key><integer>132</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3547,12 +3549,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>132</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>132</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3568,7 +3570,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>125</integer>
+// CHECK-NEXT:        <key>line</key><integer>127</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3586,12 +3588,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>125</integer>
+// CHECK-NEXT:             <key>line</key><integer>127</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>125</integer>
+// CHECK-NEXT:             <key>line</key><integer>127</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3599,12 +3601,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3620,12 +3622,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3633,12 +3635,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3650,7 +3652,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>126</integer>
+// CHECK-NEXT:        <key>line</key><integer>128</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3658,12 +3660,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>126</integer>
+// CHECK-NEXT:           <key>line</key><integer>128</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>126</integer>
+// CHECK-NEXT:           <key>line</key><integer>128</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3683,12 +3685,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>line</key><integer>128</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3696,12 +3698,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>127</integer>
+// CHECK-NEXT:             <key>line</key><integer>129</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>127</integer>
+// CHECK-NEXT:             <key>line</key><integer>129</integer>
 // CHECK-NEXT:             <key>col</key><integer>7</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3713,7 +3715,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>127</integer>
+// CHECK-NEXT:        <key>line</key><integer>129</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3735,7 +3737,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>127</integer>
+// CHECK-NEXT:     <key>line</key><integer>129</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -3747,7 +3749,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>139</integer>
+// CHECK-NEXT:        <key>line</key><integer>141</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3755,12 +3757,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>139</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>139</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>26</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3776,7 +3778,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>133</integer>
+// CHECK-NEXT:        <key>line</key><integer>135</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3794,12 +3796,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>133</integer>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>133</integer>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3807,12 +3809,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3828,12 +3830,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3841,12 +3843,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3858,7 +3860,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>134</integer>
+// CHECK-NEXT:        <key>line</key><integer>136</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3866,12 +3868,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>line</key><integer>136</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>line</key><integer>136</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3891,12 +3893,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3904,12 +3906,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3925,12 +3927,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3938,12 +3940,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -3955,7 +3957,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>135</integer>
+// CHECK-NEXT:        <key>line</key><integer>137</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -3963,12 +3965,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>135</integer>
+// CHECK-NEXT:           <key>line</key><integer>137</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>135</integer>
+// CHECK-NEXT:           <key>line</key><integer>137</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -3988,12 +3990,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>line</key><integer>137</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4001,12 +4003,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>136</integer>
+// CHECK-NEXT:             <key>line</key><integer>138</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>136</integer>
+// CHECK-NEXT:             <key>line</key><integer>138</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4018,7 +4020,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>136</integer>
+// CHECK-NEXT:        <key>line</key><integer>138</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4040,7 +4042,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>136</integer>
+// CHECK-NEXT:     <key>line</key><integer>138</integer>
 // CHECK-NEXT:     <key>col</key><integer>9</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -4052,7 +4054,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>150</integer>
+// CHECK-NEXT:        <key>line</key><integer>152</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4060,12 +4062,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>150</integer>
+// CHECK-NEXT:           <key>line</key><integer>152</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>150</integer>
+// CHECK-NEXT:           <key>line</key><integer>152</integer>
 // CHECK-NEXT:           <key>col</key><integer>26</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4081,7 +4083,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>142</integer>
+// CHECK-NEXT:        <key>line</key><integer>144</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4099,12 +4101,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>142</integer>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>142</integer>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4112,12 +4114,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4133,12 +4135,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4146,12 +4148,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4163,7 +4165,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>143</integer>
+// CHECK-NEXT:        <key>line</key><integer>145</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4171,12 +4173,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>143</integer>
+// CHECK-NEXT:           <key>line</key><integer>145</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>143</integer>
+// CHECK-NEXT:           <key>line</key><integer>145</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4196,12 +4198,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>line</key><integer>145</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4209,12 +4211,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4230,12 +4232,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4243,12 +4245,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4260,7 +4262,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>144</integer>
+// CHECK-NEXT:        <key>line</key><integer>146</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4268,12 +4270,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>144</integer>
+// CHECK-NEXT:           <key>line</key><integer>146</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>144</integer>
+// CHECK-NEXT:           <key>line</key><integer>146</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4293,12 +4295,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>line</key><integer>146</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4306,12 +4308,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>147</integer>
+// CHECK-NEXT:             <key>line</key><integer>149</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>147</integer>
+// CHECK-NEXT:             <key>line</key><integer>149</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4323,7 +4325,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>147</integer>
+// CHECK-NEXT:        <key>line</key><integer>149</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4345,7 +4347,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>147</integer>
+// CHECK-NEXT:     <key>line</key><integer>149</integer>
 // CHECK-NEXT:     <key>col</key><integer>9</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -4357,7 +4359,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>161</integer>
+// CHECK-NEXT:        <key>line</key><integer>163</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4365,12 +4367,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>161</integer>
+// CHECK-NEXT:           <key>line</key><integer>163</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>161</integer>
+// CHECK-NEXT:           <key>line</key><integer>163</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4386,7 +4388,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>156</integer>
+// CHECK-NEXT:        <key>line</key><integer>158</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4404,12 +4406,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>156</integer>
+// CHECK-NEXT:             <key>line</key><integer>158</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>156</integer>
+// CHECK-NEXT:             <key>line</key><integer>158</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4417,12 +4419,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4438,12 +4440,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4451,12 +4453,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4468,7 +4470,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>157</integer>
+// CHECK-NEXT:        <key>line</key><integer>159</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4476,12 +4478,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>157</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>157</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4501,12 +4503,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>line</key><integer>159</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4514,12 +4516,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>158</integer>
+// CHECK-NEXT:             <key>line</key><integer>160</integer>
 // CHECK-NEXT:             <key>col</key><integer>12</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>158</integer>
+// CHECK-NEXT:             <key>line</key><integer>160</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4531,7 +4533,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>158</integer>
+// CHECK-NEXT:        <key>line</key><integer>160</integer>
 // CHECK-NEXT:        <key>col</key><integer>12</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4553,7 +4555,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>158</integer>
+// CHECK-NEXT:     <key>line</key><integer>160</integer>
 // CHECK-NEXT:     <key>col</key><integer>12</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -4565,7 +4567,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>172</integer>
+// CHECK-NEXT:        <key>line</key><integer>174</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4573,12 +4575,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>174</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>174</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4594,7 +4596,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>167</integer>
+// CHECK-NEXT:        <key>line</key><integer>169</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4612,12 +4614,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>167</integer>
+// CHECK-NEXT:             <key>line</key><integer>169</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>167</integer>
+// CHECK-NEXT:             <key>line</key><integer>169</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4625,12 +4627,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4646,12 +4648,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4659,12 +4661,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4676,7 +4678,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>168</integer>
+// CHECK-NEXT:        <key>line</key><integer>170</integer>
 // CHECK-NEXT:        <key>col</key><integer>22</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4684,12 +4686,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>168</integer>
+// CHECK-NEXT:           <key>line</key><integer>170</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>168</integer>
+// CHECK-NEXT:           <key>line</key><integer>170</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4709,12 +4711,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>22</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>line</key><integer>170</integer>
 // CHECK-NEXT:             <key>col</key><integer>27</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4722,12 +4724,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>169</integer>
+// CHECK-NEXT:             <key>line</key><integer>171</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>169</integer>
+// CHECK-NEXT:             <key>line</key><integer>171</integer>
 // CHECK-NEXT:             <key>col</key><integer>20</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4739,7 +4741,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>169</integer>
+// CHECK-NEXT:        <key>line</key><integer>171</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4761,7 +4763,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>169</integer>
+// CHECK-NEXT:     <key>line</key><integer>171</integer>
 // CHECK-NEXT:     <key>col</key><integer>5</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -4773,7 +4775,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>184</integer>
+// CHECK-NEXT:        <key>line</key><integer>186</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4781,12 +4783,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>line</key><integer>186</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>line</key><integer>186</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4802,7 +4804,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>180</integer>
+// CHECK-NEXT:        <key>line</key><integer>182</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4820,12 +4822,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>180</integer>
+// CHECK-NEXT:             <key>line</key><integer>182</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>180</integer>
+// CHECK-NEXT:             <key>line</key><integer>182</integer>
 // CHECK-NEXT:             <key>col</key><integer>6</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4833,12 +4835,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4854,12 +4856,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>10</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4867,12 +4869,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>19</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>line</key><integer>183</integer>
 // CHECK-NEXT:             <key>col</key><integer>24</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4884,7 +4886,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>181</integer>
+// CHECK-NEXT:        <key>line</key><integer>183</integer>
 // CHECK-NEXT:        <key>col</key><integer>19</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4892,12 +4894,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>181</integer>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
 // CHECK-NEXT:           <key>col</key><integer>19</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>181</integer>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
 // CHECK-NEXT:           <key>col</key><integer>28</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4913,7 +4915,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>184</integer>
+// CHECK-NEXT:        <key>line</key><integer>186</integer>
 // CHECK-NEXT:        <key>col</key><integer>5</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4921,12 +4923,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>line</key><integer>186</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>line</key><integer>186</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -4946,12 +4948,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>184</integer>
+// CHECK-NEXT:             <key>line</key><integer>186</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>184</integer>
+// CHECK-NEXT:             <key>line</key><integer>186</integer>
 // CHECK-NEXT:             <key>col</key><integer>23</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4959,12 +4961,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>185</integer>
+// CHECK-NEXT:             <key>line</key><integer>187</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>185</integer>
+// CHECK-NEXT:             <key>line</key><integer>187</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -4976,7 +4978,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>185</integer>
+// CHECK-NEXT:        <key>line</key><integer>187</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -4998,7 +5000,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>185</integer>
+// CHECK-NEXT:     <key>line</key><integer>187</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -5010,7 +5012,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>194</integer>
+// CHECK-NEXT:        <key>line</key><integer>196</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5018,12 +5020,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>line</key><integer>196</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>line</key><integer>196</integer>
 // CHECK-NEXT:           <key>col</key><integer>13</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5039,7 +5041,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>189</integer>
+// CHECK-NEXT:        <key>line</key><integer>191</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5057,12 +5059,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>189</integer>
+// CHECK-NEXT:             <key>line</key><integer>191</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>189</integer>
+// CHECK-NEXT:             <key>line</key><integer>191</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5070,12 +5072,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5091,12 +5093,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>5</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5104,12 +5106,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>12</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>line</key><integer>192</integer>
 // CHECK-NEXT:             <key>col</key><integer>17</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5121,7 +5123,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>190</integer>
+// CHECK-NEXT:        <key>line</key><integer>192</integer>
 // CHECK-NEXT:        <key>col</key><integer>12</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5129,12 +5131,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>190</integer>
+// CHECK-NEXT:           <key>line</key><integer>192</integer>
 // CHECK-NEXT:           <key>col</key><integer>12</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>190</integer>
+// CHECK-NEXT:           <key>line</key><integer>192</integer>
 // CHECK-NEXT:           <key>col</key><integer>21</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5150,7 +5152,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>194</integer>
+// CHECK-NEXT:        <key>line</key><integer>196</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5158,12 +5160,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>line</key><integer>196</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>line</key><integer>196</integer>
 // CHECK-NEXT:           <key>col</key><integer>13</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5183,12 +5185,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>194</integer>
+// CHECK-NEXT:             <key>line</key><integer>196</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>194</integer>
+// CHECK-NEXT:             <key>line</key><integer>196</integer>
 // CHECK-NEXT:             <key>col</key><integer>11</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5196,12 +5198,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>195</integer>
+// CHECK-NEXT:             <key>line</key><integer>197</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>195</integer>
+// CHECK-NEXT:             <key>line</key><integer>197</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5213,7 +5215,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>195</integer>
+// CHECK-NEXT:        <key>line</key><integer>197</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5235,7 +5237,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>195</integer>
+// CHECK-NEXT:     <key>line</key><integer>197</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
@@ -5247,7 +5249,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>206</integer>
+// CHECK-NEXT:        <key>line</key><integer>208</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5255,12 +5257,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5276,7 +5278,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>200</integer>
+// CHECK-NEXT:        <key>line</key><integer>202</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5294,12 +5296,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>200</integer>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>200</integer>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5307,12 +5309,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>line</key><integer>203</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>line</key><integer>203</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5328,12 +5330,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>line</key><integer>203</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>line</key><integer>203</integer>
 // CHECK-NEXT:             <key>col</key><integer>8</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5341,12 +5343,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5362,12 +5364,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5375,12 +5377,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>9</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>line</key><integer>204</integer>
 // CHECK-NEXT:             <key>col</key><integer>14</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5392,7 +5394,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>202</integer>
+// CHECK-NEXT:        <key>line</key><integer>204</integer>
 // CHECK-NEXT:        <key>col</key><integer>9</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5400,12 +5402,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
 // CHECK-NEXT:           <key>col</key><integer>18</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5421,7 +5423,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>206</integer>
+// CHECK-NEXT:        <key>line</key><integer>208</integer>
 // CHECK-NEXT:        <key>col</key><integer>3</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5429,12 +5431,12 @@
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
 // CHECK-NEXT:           <key>col</key><integer>25</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -5454,12 +5456,12 @@
 // CHECK-NEXT:          <key>start</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>206</integer>
+// CHECK-NEXT:             <key>line</key><integer>208</integer>
 // CHECK-NEXT:             <key>col</key><integer>3</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>206</integer>
+// CHECK-NEXT:             <key>line</key><integer>208</integer>
 // CHECK-NEXT:             <key>col</key><integer>23</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5467,12 +5469,12 @@
 // CHECK-NEXT:          <key>end</key>
 // CHECK-NEXT:           <array>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>207</integer>
+// CHECK-NEXT:             <key>line</key><integer>209</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
 // CHECK-NEXT:            <dict>
-// CHECK-NEXT:             <key>line</key><integer>207</integer>
+// CHECK-NEXT:             <key>line</key><integer>209</integer>
 // CHECK-NEXT:             <key>col</key><integer>1</integer>
 // CHECK-NEXT:             <key>file</key><integer>0</integer>
 // CHECK-NEXT:            </dict>
@@ -5484,7 +5486,7 @@
 // CHECK-NEXT:       <key>kind</key><string>event</string>
 // CHECK-NEXT:       <key>location</key>
 // CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>line</key><integer>207</integer>
+// CHECK-NEXT:        <key>line</key><integer>209</integer>
 // CHECK-NEXT:        <key>col</key><integer>1</integer>
 // CHECK-NEXT:        <key>file</key><integer>0</integer>
 // CHECK-NEXT:       </dict>
@@ -5506,7 +5508,7 @@
 // CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:    <key>location</key>
 // CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>line</key><integer>207</integer>
+// CHECK-NEXT:     <key>line</key><integer>209</integer>
 // CHECK-NEXT:     <key>col</key><integer>1</integer>
 // CHECK-NEXT:     <key>file</key><integer>0</integer>
 // CHECK-NEXT:    </dict>
diff --git a/test/Analysis/malloc.c b/test/Analysis/malloc.c
index 4c364eb..50be4ef 100644
--- a/test/Analysis/malloc.c
+++ b/test/Analysis/malloc.c
@@ -1720,13 +1720,6 @@
   }
 }
 
-char *dupstrWarn(const char *s) {
-  const int len = strlen(s);
-  char *p = (char*) smallocWarn(len + 1);
-  strcpy(p, s); // expected-warning{{String copy function overflows destination buffer}}
-  return p;
-}
-
 int *radar15580979() {
   int *data = (int *)malloc(32);
   int *p = data ?: (int*)malloc(32); // no warning
@@ -1784,6 +1777,27 @@
   free((void *)fnptr); // expected-warning {{Argument to free() is a function pointer}}
 }
 
+// Enabling the malloc checker enables some of the buffer-checking portions
+// of the C-string checker.
+void cstringchecker_bounds_nocrash() {
+  char *p = malloc(2);
+  strncpy(p, "AAA", sizeof("AAA")); // expected-warning {{Size argument is greater than the length of the destination buffer}}
+
+  free(p);
+}
+
+void allocateSomeMemory(void *offendingParameter, void **ptr) {
+  *ptr = malloc(1);
+}
+
+void testNoCrashOnOffendingParameter() {
+  // "extern" is necessary to avoid unrelated warnings 
+  // on passing uninitialized value.
+  extern void *offendingParameter;
+  void* ptr;
+  allocateSomeMemory(offendingParameter, &ptr);
+} // expected-warning {{Potential leak of memory pointed to by 'ptr'}}
+
 // ----------------------------------------------------------------------------
 // False negatives.
 
diff --git a/test/Analysis/malloc.cpp b/test/Analysis/malloc.cpp
index c323754..b93c73e 100644
--- a/test/Analysis/malloc.cpp
+++ b/test/Analysis/malloc.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_analyze_cc1 -w -analyzer-checker=core,alpha.deadcode.UnreachableCode,alpha.core.CastSize,unix.Malloc,cplusplus.NewDelete -analyzer-store=region -verify %s
 // RUN: %clang_analyze_cc1 -triple i386-unknown-linux-gnu -w -analyzer-checker=core,alpha.deadcode.UnreachableCode,alpha.core.CastSize,unix.Malloc,cplusplus.NewDelete -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,alpha.deadcode.UnreachableCode,alpha.core.CastSize,unix.Malloc,cplusplus.NewDelete -analyzer-store=region -DTEST_INLINABLE_ALLOCATORS -verify %s
+// RUN: %clang_analyze_cc1 -triple i386-unknown-linux-gnu -w -analyzer-checker=core,alpha.deadcode.UnreachableCode,alpha.core.CastSize,unix.Malloc,cplusplus.NewDelete -analyzer-store=region -DTEST_INLINABLE_ALLOCATORS -verify %s
 
 #include "Inputs/system-header-simulator-cxx.h"
 
diff --git a/test/Analysis/mmap-writeexec.c b/test/Analysis/mmap-writeexec.c
new file mode 100644
index 0000000..2d8ea9b
--- /dev/null
+++ b/test/Analysis/mmap-writeexec.c
@@ -0,0 +1,36 @@
+// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=alpha.security.MmapWriteExec -analyzer-config alpha.security.MmapWriteExec:MmapProtExec=1 -analyzer-config alpha.security.MmapWriteExec:MmapProtRead=4 -DUSE_ALTERNATIVE_PROT_EXEC_DEFINITION -verify %s
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-apple-darwin10 -analyzer-checker=alpha.security.MmapWriteExec -verify %s
+
+#define PROT_WRITE  0x02
+#ifndef USE_ALTERNATIVE_PROT_EXEC_DEFINITION
+#define PROT_EXEC   0x04
+#define PROT_READ   0x01
+#else
+#define PROT_EXEC   0x01
+#define PROT_READ   0x04
+#endif
+#define MAP_PRIVATE 0x0002
+#define MAP_ANON    0x1000
+#define MAP_FIXED   0x0010
+#define NULL        ((void *)0)
+
+typedef __typeof(sizeof(int)) size_t;
+void *mmap(void *, size_t, int, int, int, long);
+
+void f1()
+{
+  void *a = mmap(NULL, 16, PROT_READ | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); // no-warning
+  void *b = mmap(a, 16, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANON, -1, 0); // no-warning
+  void *c = mmap(NULL, 32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); // expected-warning{{Both PROT_WRITE and PROT_EXEC flags are set. This can lead to exploitable memory regions, which could be overwritten with malicious code}}
+  (void)a;
+  (void)b;
+  (void)c;
+}
+
+void f2()
+{
+  void *(*callm)(void *, size_t, int, int, int, long);
+  callm = mmap;
+  int prot = PROT_WRITE | PROT_EXEC;
+  (void)callm(NULL, 1024, prot, MAP_PRIVATE | MAP_ANON, -1, 0); // expected-warning{{Both PROT_WRITE and PROT_EXEC flags are set. This can lead to exploitable memory regions, which could be overwritten with malicious code}}
+}
diff --git a/test/Analysis/new-ctor-conservative.cpp b/test/Analysis/new-ctor-conservative.cpp
new file mode 100644
index 0000000..b82df9a
--- /dev/null
+++ b/test/Analysis/new-ctor-conservative.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_warnIfReached();
+
+struct S {
+  int x;
+  S() : x(1) {}
+  ~S() {}
+};
+
+void checkConstructorInlining() {
+  S *s = new S;
+  clang_analyzer_eval(s->x == 1); // expected-warning{{TRUE}}
+}
+
+void checkNewPOD() {
+  int *i = new int;
+  clang_analyzer_eval(*i == 0); // expected-warning{{UNKNOWN}}
+  int *j = new int();
+  clang_analyzer_eval(*j == 0); // expected-warning{{TRUE}}
+  int *k = new int(5);
+  clang_analyzer_eval(*k == 5); // expected-warning{{TRUE}}
+}
+
+void checkNewArray() {
+  S *s = new S[10];
+  // FIXME: Should be true once we inline array constructors.
+  clang_analyzer_eval(s[0].x == 1); // expected-warning{{UNKNOWN}}
+}
+
+struct NullS {
+  NullS() {
+    if (this) {}
+  }
+  NullS(int x) {
+    if (!this) {
+      clang_analyzer_warnIfReached(); // no-warning
+    }
+  }
+};
+
+void checkNullThis() {
+  NullS *nulls = new NullS(); // no-crash
+  NullS *nulls2 = new NullS(0);
+}
diff --git a/test/Analysis/new-ctor-inlined.cpp b/test/Analysis/new-ctor-inlined.cpp
new file mode 100644
index 0000000..1506bf2
--- /dev/null
+++ b/test/Analysis/new-ctor-inlined.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+
+typedef __typeof__(sizeof(int)) size_t;
+
+void *conjure();
+void exit(int);
+
+void *operator new(size_t size) throw() {
+  void *x = conjure();
+  if (x == 0)
+    exit(1);
+  return x;
+}
+
+struct S {
+  int x;
+  S() : x(1) {}
+  ~S() {}
+};
+
+void checkNewAndConstructorInlining() {
+  S *s = new S;
+  // Check that the symbol for 's' is not dying.
+  clang_analyzer_eval(s != 0); // expected-warning{{TRUE}}
+  // Check that bindings are correct (and also not dying).
+  clang_analyzer_eval(s->x == 1); // expected-warning{{TRUE}}
+}
+
+struct Sp {
+  Sp *p;
+  Sp(Sp *p): p(p) {}
+  ~Sp() {}
+};
+
+void checkNestedNew() {
+  Sp *p = new Sp(new Sp(0));
+  clang_analyzer_eval(p->p->p == 0); // expected-warning{{TRUE}}
+}
+
+void checkNewPOD() {
+  int *i = new int;
+  clang_analyzer_eval(*i == 0); // expected-warning{{UNKNOWN}}
+  int *j = new int();
+  clang_analyzer_eval(*j == 0); // expected-warning{{TRUE}}
+  int *k = new int(5);
+  clang_analyzer_eval(*k == 5); // expected-warning{{TRUE}}
+}
+
+void checkTrivialCopy() {
+  S s;
+  S *t = new S(s); // no-crash
+  clang_analyzer_eval(t->x == 1); // expected-warning{{TRUE}}
+}
diff --git a/test/Analysis/new-ctor-malloc.cpp b/test/Analysis/new-ctor-malloc.cpp
new file mode 100644
index 0000000..74b1e21
--- /dev/null
+++ b/test/Analysis/new-ctor-malloc.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection,unix.Malloc -analyzer-config c++-allocator-inlining=true -analyzer-output=text -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+
+typedef __typeof__(sizeof(int)) size_t;
+
+void *malloc(size_t size);
+
+void *operator new(size_t size) throw() {
+  void *x = malloc(size); // expected-note {{Memory is allocated}}
+  if (!x) // expected-note    {{Assuming 'x' is non-null}}
+          // expected-note@-1 {{Taking false branch}}
+    return nullptr;
+  return x;
+}
+
+void checkNewAndConstructorInlining() {
+  int *s = new int; // expected-note   {{Calling 'operator new'}}
+                    // expected-note@-1{{Returning from 'operator new'}}
+} // expected-warning {{Potential leak of memory pointed to by 's'}}
+  // expected-note@-1 {{Potential leak of memory pointed to by 's'}}
diff --git a/test/Analysis/new-ctor-null-throw.cpp b/test/Analysis/new-ctor-null-throw.cpp
new file mode 100644
index 0000000..cdaf01d
--- /dev/null
+++ b/test/Analysis/new-ctor-null-throw.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+
+typedef __typeof__(sizeof(int)) size_t;
+
+
+// These are ill-formed. One cannot return nullptr from a throwing version of an
+// operator new.
+void *operator new(size_t size) {
+  return nullptr;
+}
+void *operator new[](size_t size) {
+  return nullptr;
+}
+
+struct S {
+  int x;
+  S() : x(1) {}
+  ~S() {}
+};
+
+void testArrays() {
+  S *s = new S[10]; // no-crash
+  s[0].x = 2; // expected-warning{{Dereference of null pointer}}
+}
diff --git a/test/Analysis/new-ctor-null.cpp b/test/Analysis/new-ctor-null.cpp
new file mode 100644
index 0000000..ac2a39a
--- /dev/null
+++ b/test/Analysis/new-ctor-null.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_warnIfReached();
+
+typedef __typeof__(sizeof(int)) size_t;
+
+void *operator new(size_t size) throw() {
+  return nullptr;
+}
+void *operator new[](size_t size) throw() {
+  return nullptr;
+}
+
+struct S {
+  int x;
+  S() : x(1) {
+    // FIXME: Constructor should not be called with null this, even if it was
+    // returned by operator new().
+    clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+  }
+  ~S() {}
+};
+
+void testArrays() {
+  S *s = new S[10]; // no-crash
+  s[0].x = 2; // expected-warning{{Dereference of null pointer}}
+}
+
+int global;
+void testInvalidationOnConstructionIntoNull() {
+  global = 0;
+  S *s = new S();
+  // FIXME: Should be FALSE - we should not invalidate globals.
+  clang_analyzer_eval(global); // expected-warning{{UNKNOWN}}
+}
diff --git a/test/Analysis/new-ctor-recursive.cpp b/test/Analysis/new-ctor-recursive.cpp
new file mode 100644
index 0000000..e0c654b
--- /dev/null
+++ b/test/Analysis/new-ctor-recursive.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,cplusplus.NewDelete,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_dump(int);
+
+typedef __typeof__(sizeof(int)) size_t;
+
+void *conjure();
+void exit(int);
+
+struct S;
+
+S *global_s;
+
+// Recursive operator kinda placement new.
+void *operator new(size_t size, S *place);
+
+enum class ConstructionKind : char {
+  Garbage,
+  Recursive
+};
+
+struct S {
+public:
+  int x;
+  S(): x(1) {}
+  S(int y): x(y) {}
+
+  S(ConstructionKind k) {
+    switch (k) {
+    case ConstructionKind::Recursive: { // Call one more operator new 'r'ecursively.
+      S *s = new (nullptr) S(5);
+      x = s->x + 1;
+      global_s = s;
+      return;
+    }
+    case ConstructionKind::Garbage: {
+      // Leaves garbage in 'x'.
+    }
+    }
+  }
+  ~S() {}
+};
+
+// Do not try this at home!
+void *operator new(size_t size, S *place) {
+  if (!place)
+    return new S();
+  return place;
+}
+
+void testThatCharConstructorIndeedYieldsGarbage() {
+  S *s = new S(ConstructionKind::Garbage);
+  clang_analyzer_eval(s->x == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(s->x == 1); // expected-warning{{UNKNOWN}}
+  // FIXME: This should warn, but MallocChecker doesn't default-bind regions
+  // returned by standard operator new to garbage.
+  s->x += 1; // no-warning
+  delete s;
+}
+
+
+void testChainedOperatorNew() {
+  S *s;
+  // * Evaluate standard new.
+  // * Evaluate constructor S(3).
+  // * Bind value for standard new.
+  // * Evaluate our custom new.
+  // * Evaluate constructor S(Garbage).
+  // * Bind value for our custom new.
+  s = new (new S(3)) S(ConstructionKind::Garbage);
+  clang_analyzer_eval(s->x == 3); // expected-warning{{TRUE}}
+  // expected-warning@+9{{Potential leak of memory pointed to by 's'}}
+
+  // * Evaluate standard new.
+  // * Evaluate constructor S(Garbage).
+  // * Bind value for standard new.
+  // * Evaluate our custom new.
+  // * Evaluate constructor S(4).
+  // * Bind value for our custom new.
+  s = new (new S(ConstructionKind::Garbage)) S(4);
+  clang_analyzer_eval(s->x == 4); // expected-warning{{TRUE}}
+  delete s;
+
+  // -> Enter our custom new (nullptr).
+  //   * Evaluate standard new.
+  //   * Inline constructor S().
+  //   * Bind value for standard new.
+  // <- Exit our custom new (nullptr).
+  // * Evaluate constructor S(Garbage).
+  // * Bind value for our custom new.
+  s = new (nullptr) S(ConstructionKind::Garbage);
+  clang_analyzer_eval(s->x == 1); // expected-warning{{TRUE}}
+  delete s;
+
+  // -> Enter our custom new (nullptr).
+  //   * Evaluate standard new.
+  //   * Inline constructor S().
+  //   * Bind value for standard new.
+  // <- Exit our custom new (nullptr).
+  // -> Enter constructor S(Recursive).
+  //   -> Enter our custom new (nullptr).
+  //     * Evaluate standard new.
+  //     * Inline constructor S().
+  //     * Bind value for standard new.
+  //   <- Exit our custom new (nullptr).
+  //   * Evaluate constructor S(5).
+  //   * Bind value for our custom new (nullptr).
+  //   * Assign that value to global_s.
+  // <- Exit constructor S(Recursive).
+  // * Bind value for our custom new (nullptr).
+  global_s = nullptr;
+  s = new (nullptr) S(ConstructionKind::Recursive);
+  clang_analyzer_eval(global_s); // expected-warning{{TRUE}}
+  clang_analyzer_eval(global_s->x == 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval(s->x == 6); // expected-warning{{TRUE}}
+  delete s;
+}
diff --git a/test/Analysis/new-ctor-symbolic.cpp b/test/Analysis/new-ctor-symbolic.cpp
new file mode 100644
index 0000000..8bef2ed
--- /dev/null
+++ b/test/Analysis/new-ctor-symbolic.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-allocator-inlining=true -std=c++11 -verify %s
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_warnOnDeadSymbol(int);
+
+typedef __typeof__(sizeof(int)) size_t;
+
+int conjure();
+void exit(int);
+
+struct S {
+  S() {}
+  ~S() {}
+
+  static S buffer[1000];
+
+  // This operator allocates stuff within the buffer. Additionally, it never
+  // places anything at the beginning of the buffer.
+  void *operator new(size_t size) {
+    int i = conjure();
+    if (i == 0)
+      exit(1);
+    // Let's see if the symbol dies before new-expression is evaluated.
+    // It shouldn't.
+    clang_analyzer_warnOnDeadSymbol(i);
+    return buffer + i;
+  }
+};
+
+void testIndexLiveness() {
+  S *s = new S();
+  clang_analyzer_eval(s == S::buffer); // expected-warning{{FALSE}}
+} // expected-warning{{SYMBOL DEAD}}
diff --git a/test/Analysis/new-dynamic-types.cpp b/test/Analysis/new-dynamic-types.cpp
new file mode 100644
index 0000000..e0a460e
--- /dev/null
+++ b/test/Analysis/new-dynamic-types.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -std=c++11 -verify %s
+
+// expected-no-diagnostics
+
+typedef __typeof(sizeof(int)) size_t;
+
+void *operator new(size_t size, void *ptr);
+
+struct B {
+  virtual void foo();
+};
+
+struct D : public B {
+  virtual void foo() override {}
+};
+
+void test_ub() {
+  // FIXME: Potentially warn because this code is pretty weird.
+  B b;
+  new (&b) D;
+  b.foo(); // no-crash
+}
+
+void test_non_ub() {
+  char c[sizeof(D)]; // Should be enough storage.
+  new (c) D;
+  ((B *)c)->foo(); // no-crash
+}
diff --git a/test/Analysis/new.cpp b/test/Analysis/new.cpp
index 6cfcb1d..0cd2d08 100644
--- a/test/Analysis/new.cpp
+++ b/test/Analysis/new.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-store region -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-store region -std=c++11 -DTEST_INLINABLE_ALLOCATORS -verify %s
 #include "Inputs/system-header-simulator-cxx.h"
 
 void clang_analyzer_eval(bool);
@@ -34,7 +35,7 @@
 
   void *y = new (x) int;
   clang_analyzer_eval(x == y); // expected-warning{{TRUE}};
-  clang_analyzer_eval(*x == 1); // expected-warning{{UNKNOWN}};
+  clang_analyzer_eval(*x == 1); // expected-warning{{TRUE}};
 
   return y;
 }
@@ -200,8 +201,7 @@
   int n;
   new (&n) int;
 
-  // Should warn that n is uninitialized.
-  if (n) { // no-warning
+  if (n) { // expected-warning{{Branch condition evaluates to a garbage value}}
     return 0;
   }
   return 1;
@@ -311,7 +311,7 @@
 void testArrayDestr() {
   NoReturnDtor *p = new NoReturnDtor[2];
   delete[] p; // Calls the base destructor which aborts, checked below
-  //TODO: clang_analyzer_eval should not be called
+   //TODO: clang_analyzer_eval should not be called
   clang_analyzer_eval(true); // expected-warning{{TRUE}}
 }
 
diff --git a/test/Analysis/nonnullparamchecker-crash.cpp b/test/Analysis/nonnullparamchecker-crash.cpp
new file mode 100644
index 0000000..96e7f21
--- /dev/null
+++ b/test/Analysis/nonnullparamchecker-crash.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s
+class C {};
+
+// expected-no-diagnostics
+void f(C i) {
+  auto lambda = [&] { f(i); };
+  typedef decltype(lambda) T;
+  T* blah = new T(lambda);
+  (*blah)();
+  delete blah;
+}
diff --git a/test/Analysis/novoidtypecrash.c b/test/Analysis/novoidtypecrash.c
new file mode 100644
index 0000000..c04cfca
--- /dev/null
+++ b/test/Analysis/novoidtypecrash.c
@@ -0,0 +1,8 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core %s
+a;
+b(void **c) { // no-crash
+  *c = a;
+  int *d;
+  b(&d);
+  *d;
+}
diff --git a/test/Analysis/nullptr.cpp b/test/Analysis/nullptr.cpp
index b3e61c9..38e099b 100644
--- a/test/Analysis/nullptr.cpp
+++ b/test/Analysis/nullptr.cpp
@@ -142,8 +142,9 @@
                       // expected-note@-1{{Passing null pointer value via 1st parameter 'x'}}
   if (getSymbol()) {  // expected-note  {{Assuming the condition is true}}
                       // expected-note@-1{{Taking true branch}}
-    X *x = Type().x; // expected-note{{'x' initialized to a null pointer value}}
-    x->f(); // expected-warning{{Called C++ object pointer is null}}
+    X *xx = Type().x; // expected-note   {{Null pointer value stored to field 'x'}}
+                      // expected-note@-1{{'xx' initialized to a null pointer value}}
+    xx->f(); // expected-warning{{Called C++ object pointer is null}}
             // expected-note@-1{{Called C++ object pointer is null}}
   }
 }
diff --git a/test/Analysis/number-object-conversion.mm b/test/Analysis/number-object-conversion.mm
new file mode 100644
index 0000000..32628b2
--- /dev/null
+++ b/test/Analysis/number-object-conversion.mm
@@ -0,0 +1,13 @@
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -fblocks -fobjc-arc -w -analyzer-checker=osx.NumberObjectConversion -analyzer-config osx.NumberObjectConversion:Pedantic=true %s -verify
+
+#include "Inputs/system-header-simulator-objc.h"
+
+NSNumber* generateNumber();
+
+// expected-no-diagnostics
+int test_initialization_in_ifstmt() { // Don't warn on initialization in guard.
+  if (NSNumber* number = generateNumber()) { // no-warning
+    return 0;
+  }
+  return 1;
+}
diff --git a/test/Analysis/objc-for.m b/test/Analysis/objc-for.m
index f1d0cf1..f191e86 100644
--- a/test/Analysis/objc-for.m
+++ b/test/Analysis/objc-for.m
@@ -128,7 +128,7 @@
   int count = [A count];
   if (count > 0) {
     int i;
-    int j;
+    int j = 0;
     for (NSString *a in A) {
       i = 1;
       j++;
@@ -141,7 +141,7 @@
 void onlySuppressExitAfterZeroIterations(NSMutableDictionary *D) {
   if (D.count > 0) {
     int *x;
-    int i;
+    int i = 0;
     for (NSString *key in D) {
       x = 0;
       i++;
@@ -155,7 +155,7 @@
 void onlySuppressLoopExitAfterZeroIterations_WithContinue(NSMutableDictionary *D) {
   if (D.count > 0) {
     int *x;
-    int i;
+    int i = 0;
     for (NSString *key in D) {
       x = 0;
       i++;
diff --git a/test/Analysis/offsetofexpr-callback.c b/test/Analysis/offsetofexpr-callback.c
new file mode 100644
index 0000000..0fcb907
--- /dev/null
+++ b/test/Analysis/offsetofexpr-callback.c
@@ -0,0 +1,13 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.AnalysisOrder -analyzer-config debug.AnalysisOrder:PreStmtOffsetOfExpr=true,debug.AnalysisOrder:PostStmtOffsetOfExpr=true %s 2>&1 | FileCheck %s
+#include "Inputs/system-header-simulator.h"
+
+struct S {
+  char c;
+};
+
+void test() {
+  offsetof(struct S, c); 
+}
+
+// CHECK: PreStmt<OffsetOfExpr>
+// CHECK-NEXT: PostStmt<OffsetOfExpr>
\ No newline at end of file
diff --git a/test/Analysis/plist-macros.cpp b/test/Analysis/plist-macros.cpp
index 18d3ce1..821110e 100644
--- a/test/Analysis/plist-macros.cpp
+++ b/test/Analysis/plist-macros.cpp
@@ -65,6 +65,7 @@
   ;
 int useMultiNote(int *p, int y) {;
   y++;
+  if (p) {}
   multiNoteMacro
 
   return *p; // expected-warning {{Dereference of null pointer}}
@@ -1216,7 +1217,41 @@
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
 // CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>16</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:          </array>
@@ -1228,7 +1263,7 @@
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
 // CHECK-NEXT:       <key>line</key><integer>68</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
+// CHECK-NEXT:       <key>col</key><integer>7</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
 // CHECK-NEXT:      <key>ranges</key>
@@ -1236,12 +1271,12 @@
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
 // CHECK-NEXT:          <key>line</key><integer>68</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
 // CHECK-NEXT:          <key>line</key><integer>68</integer>
-// CHECK-NEXT:          <key>col</key><integer>16</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
@@ -1253,10 +1288,44 @@
 // CHECK-NEXT:      <string>Assuming &apos;p&apos; is null</string>
 // CHECK-NEXT:     </dict>
 // CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>69</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>69</integer>
+// CHECK-NEXT:            <key>col</key><integer>16</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>68</integer>
+// CHECK-NEXT:       <key>line</key><integer>69</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1264,12 +1333,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>68</integer>
+// CHECK-NEXT:          <key>line</key><integer>69</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>68</integer>
+// CHECK-NEXT:          <key>line</key><integer>69</integer>
 // CHECK-NEXT:          <key>col</key><integer>16</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1289,12 +1358,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>line</key><integer>69</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
+// CHECK-NEXT:            <key>line</key><integer>69</integer>
 // CHECK-NEXT:            <key>col</key><integer>16</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1302,12 +1371,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1323,12 +1392,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>8</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1336,12 +1405,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
+// CHECK-NEXT:            <key>line</key><integer>71</integer>
 // CHECK-NEXT:            <key>col</key><integer>10</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1353,7 +1422,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>70</integer>
+// CHECK-NEXT:       <key>line</key><integer>71</integer>
 // CHECK-NEXT:       <key>col</key><integer>10</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1361,12 +1430,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>70</integer>
+// CHECK-NEXT:          <key>line</key><integer>71</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>70</integer>
+// CHECK-NEXT:          <key>line</key><integer>71</integer>
 // CHECK-NEXT:          <key>col</key><integer>11</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1387,10 +1456,10 @@
 // CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>41f58f9549aa1867e461a7996a8d335c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>useMultiNote</string>
-// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>70</integer>
+// CHECK-NEXT:    <key>line</key><integer>71</integer>
 // CHECK-NEXT:    <key>col</key><integer>10</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
@@ -1402,7 +1471,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>82</integer>
+// CHECK-NEXT:       <key>line</key><integer>83</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1410,12 +1479,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
+// CHECK-NEXT:          <key>line</key><integer>83</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
+// CHECK-NEXT:          <key>line</key><integer>83</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1431,7 +1500,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>82</integer>
+// CHECK-NEXT:       <key>line</key><integer>83</integer>
 // CHECK-NEXT:       <key>col</key><integer>3</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1439,12 +1508,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
+// CHECK-NEXT:          <key>line</key><integer>83</integer>
 // CHECK-NEXT:          <key>col</key><integer>3</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
+// CHECK-NEXT:          <key>line</key><integer>83</integer>
 // CHECK-NEXT:          <key>col</key><integer>12</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1460,7 +1529,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>75</integer>
+// CHECK-NEXT:       <key>line</key><integer>76</integer>
 // CHECK-NEXT:       <key>col</key><integer>1</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1478,12 +1547,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
+// CHECK-NEXT:            <key>line</key><integer>76</integer>
 // CHECK-NEXT:            <key>col</key><integer>1</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
+// CHECK-NEXT:            <key>line</key><integer>76</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1491,12 +1560,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
+// CHECK-NEXT:            <key>line</key><integer>77</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
+// CHECK-NEXT:            <key>line</key><integer>77</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1512,12 +1581,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
+// CHECK-NEXT:            <key>line</key><integer>77</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
+// CHECK-NEXT:            <key>line</key><integer>77</integer>
 // CHECK-NEXT:            <key>col</key><integer>4</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1525,12 +1594,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1546,12 +1615,12 @@
 // CHECK-NEXT:         <key>start</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>3</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1559,12 +1628,12 @@
 // CHECK-NEXT:         <key>end</key>
 // CHECK-NEXT:          <array>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>6</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
 // CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>78</integer>
+// CHECK-NEXT:            <key>line</key><integer>79</integer>
 // CHECK-NEXT:            <key>col</key><integer>6</integer>
 // CHECK-NEXT:            <key>file</key><integer>0</integer>
 // CHECK-NEXT:           </dict>
@@ -1576,7 +1645,7 @@
 // CHECK-NEXT:      <key>kind</key><string>event</string>
 // CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>78</integer>
+// CHECK-NEXT:       <key>line</key><integer>79</integer>
 // CHECK-NEXT:       <key>col</key><integer>6</integer>
 // CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
@@ -1584,12 +1653,12 @@
 // CHECK-NEXT:      <array>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>78</integer>
+// CHECK-NEXT:          <key>line</key><integer>79</integer>
 // CHECK-NEXT:          <key>col</key><integer>4</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>78</integer>
+// CHECK-NEXT:          <key>line</key><integer>79</integer>
 // CHECK-NEXT:          <key>col</key><integer>4</integer>
 // CHECK-NEXT:          <key>file</key><integer>0</integer>
 // CHECK-NEXT:         </dict>
@@ -1613,7 +1682,7 @@
 // CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>78</integer>
+// CHECK-NEXT:    <key>line</key><integer>79</integer>
 // CHECK-NEXT:    <key>col</key><integer>6</integer>
 // CHECK-NEXT:    <key>file</key><integer>0</integer>
 // CHECK-NEXT:   </dict>
diff --git a/test/Analysis/plist-stats-output.c b/test/Analysis/plist-stats-output.c
new file mode 100644
index 0000000..072fe5e
--- /dev/null
+++ b/test/Analysis/plist-stats-output.c
@@ -0,0 +1,14 @@
+// RUN: %clang_analyze_cc1 %s -analyzer-checker=core -analyzer-output=plist -analyzer-config serialize-stats=true -o %t.plist
+// REQUIRES: asserts
+// RUN: FileCheck --input-file=%t.plist %s
+
+int foo() {}
+
+
+// CHECK:  <key>diagnostics</key>
+// CHECK-NEXT:  <array>
+// CHECK-NEXT:  </array>
+// CHECK-NEXT: <key>statistics</key>
+// CHECK-NEXT: <string>{
+// CHECK: }
+// CHECK-NEXT: </string>
diff --git a/test/Analysis/pointer-to-member.cpp b/test/Analysis/pointer-to-member.cpp
index 0fbb992..6588252 100644
--- a/test/Analysis/pointer-to-member.cpp
+++ b/test/Analysis/pointer-to-member.cpp
@@ -230,3 +230,42 @@
   clang_analyzer_eval(d2.*(static_cast<int D2::*>(static_cast<int R2::*>(static_cast<int R1::*>(&B::f)))) == 4); // expected-warning {{TRUE}}
 }
 } // end of testPointerToMemberDiamond namespace
+
+namespace testAnonymousMember {
+struct A {
+  struct {
+    int x;
+  };
+  struct {
+    struct {
+      int y;
+    };
+  };
+  struct {
+    union {
+      int z;
+    };
+  };
+};
+
+void test() {
+  clang_analyzer_eval(&A::x); // expected-warning{{TRUE}}
+  clang_analyzer_eval(&A::y); // expected-warning{{TRUE}}
+  clang_analyzer_eval(&A::z); // expected-warning{{TRUE}}
+
+  // FIXME: These should be true.
+  int A::*l = &A::x, A::*m = &A::y, A::*n = &A::z;
+  clang_analyzer_eval(l); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(n); // expected-warning{{UNKNOWN}}
+
+  // FIXME: These should be true as well.
+  A a;
+  a.x = 1;
+  clang_analyzer_eval(a.*l == 1); // expected-warning{{UNKNOWN}}
+  a.y = 2;
+  clang_analyzer_eval(a.*m == 2); // expected-warning{{UNKNOWN}}
+  a.z = 3;
+  clang_analyzer_eval(a.*n == 3); // expected-warning{{UNKNOWN}}
+}
+} // end of testAnonymousMember namespace
diff --git a/test/Analysis/ptr-arith.c b/test/Analysis/ptr-arith.c
index b78ec50..93cb4ee 100644
--- a/test/Analysis/ptr-arith.c
+++ b/test/Analysis/ptr-arith.c
@@ -342,3 +342,8 @@
   clang_analyzer_eval(*ptr3 == 'a'); // expected-warning{{UNKNOWN}}
 }
 
+void test_no_crash_on_pointer_to_label() {
+  char *a = &&label;
+  a[0] = 0;
+label:;
+}
diff --git a/test/Analysis/region-store.cpp b/test/Analysis/region-store.cpp
index cb49f48..ab179ce 100644
--- a/test/Analysis/region-store.cpp
+++ b/test/Analysis/region-store.cpp
@@ -25,4 +25,4 @@
   Builder->setLoc(l);
   return Builder->accessBase();
   
-}
\ No newline at end of file
+}
diff --git a/test/Analysis/region_store_overflow.c b/test/Analysis/region_store_overflow.c
new file mode 100644
index 0000000..6401b83
--- /dev/null
+++ b/test/Analysis/region_store_overflow.c
@@ -0,0 +1,11 @@
+// RUN: %clang_analyze_cc1 -analyze -analyzer-checker=core -verify %s
+
+// expected-no-diagnostics
+int **h;
+int overflow_in_memregion(long j) {
+  for (int l = 0;; ++l) {
+    if (j - l > 0)
+      return h[j - l][0]; // no-crash
+  }
+  return 0;
+}
diff --git a/test/Analysis/retain-release.m b/test/Analysis/retain-release.m
index a1cc62c..4add50e 100644
--- a/test/Analysis/retain-release.m
+++ b/test/Analysis/retain-release.m
@@ -450,6 +450,51 @@
   if (session) NSLog(@"ok");
 }
 
+
+// Handle CoreMedia API
+
+struct CMFoo;
+typedef struct CMFoo *CMFooRef;
+
+CMFooRef CMCreateFooRef();
+CMFooRef CMGetFooRef();
+
+typedef signed long SInt32;
+typedef SInt32  OSStatus;
+OSStatus CMCreateFooAndReturnViaOutParameter(CMFooRef * CF_RETURNS_RETAINED fooOut);
+
+void testLeakCoreMediaReferenceType() {
+  CMFooRef f = CMCreateFooRef(); // expected-warning{{leak}}
+}
+
+void testOverReleaseMediaReferenceType() {
+  CMFooRef f = CMGetFooRef();
+  CFRelease(f); // expected-warning{{Incorrect decrement of the reference count}}
+}
+
+void testOkToReleaseReturnsRetainedOutParameter() {
+  CMFooRef foo = 0;
+  OSStatus status = CMCreateFooAndReturnViaOutParameter(&foo);
+
+  if (status != 0)
+    return;
+
+  CFRelease(foo); // no-warning
+}
+
+void testLeakWithReturnsRetainedOutParameter() {
+  CMFooRef foo = 0;
+  OSStatus status = CMCreateFooAndReturnViaOutParameter(&foo);
+
+  if (status != 0)
+    return;
+
+  // FIXME: Ideally we would report a leak here since it is the caller's
+  // responsibility to release 'foo'. However, we don't currently have
+  // a mechanism in this checker to only require a release when a successful
+  // status is returned.
+}
+
 // Test retain/release checker with CFString and CFMutableArray.
 void f11() {
   // Create the array.
diff --git a/test/Analysis/return-stmt-merge.cpp b/test/Analysis/return-stmt-merge.cpp
new file mode 100644
index 0000000..a6bacc5
--- /dev/null
+++ b/test/Analysis/return-stmt-merge.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.AnalysisOrder,debug.ExprInspection -analyzer-config debug.AnalysisOrder:PreCall=true,debug.AnalysisOrder:PostCall=true,debug.AnalysisOrder:LiveSymbols=true %s 2>&1 | FileCheck %s
+
+// This test ensures that check::LiveSymbols is called as many times on the
+// path through the second "return" as it is through the first "return"
+// (three), and therefore the two paths were not merged prematurely before the
+// respective return statement is evaluated.
+// The paths would still be merged later, so we'd have only one post-call for
+// foo(), but it is incorrect to merge them in the middle of evaluating two
+// different statements.
+int coin();
+
+void foo() {
+  int x = coin();
+  if (x > 0)
+    return;
+  else
+    return;
+}
+
+void bar() {
+  foo();
+}
+
+// CHECK:      LiveSymbols
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: PreCall (foo)
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: PreCall (coin)
+// CHECK-NEXT: PostCall (coin)
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: PostCall (foo)
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: LiveSymbols
+// CHECK-NEXT: LiveSymbols
diff --git a/test/Analysis/security-syntax-checks.m b/test/Analysis/security-syntax-checks.m
index 04a4c7d8..8560ba2 100644
--- a/test/Analysis/security-syntax-checks.m
+++ b/test/Analysis/security-syntax-checks.m
@@ -146,6 +146,16 @@
   strcpy(x, y); //expected-warning{{Call to function 'strcpy' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcpy'. CWE-119}}
 }
 
+void test_strcpy_2() {
+  char x[4];
+  strcpy(x, "abcd"); //expected-warning{{Call to function 'strcpy' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcpy'. CWE-119}}
+}
+
+void test_strcpy_safe() {
+  char x[5];
+  strcpy(x, "abcd");
+}
+
 //===----------------------------------------------------------------------===
 // strcat()
 //===----------------------------------------------------------------------===
diff --git a/test/Analysis/stack-capture-leak-arc.mm b/test/Analysis/stack-capture-leak-arc.mm
new file mode 100644
index 0000000..1ffee93
--- /dev/null
+++ b/test/Analysis/stack-capture-leak-arc.mm
@@ -0,0 +1,189 @@
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,alpha.core.StackAddressAsyncEscape -fblocks -fobjc-arc -verify %s
+
+typedef struct dispatch_queue_s *dispatch_queue_t;
+typedef void (^dispatch_block_t)(void);
+void dispatch_async(dispatch_queue_t queue, dispatch_block_t block);
+typedef long dispatch_once_t;
+void dispatch_once(dispatch_once_t *predicate, dispatch_block_t block);
+typedef long dispatch_time_t;
+void dispatch_after(dispatch_time_t when, dispatch_queue_t queue, dispatch_block_t block);
+void dispatch_barrier_sync(dispatch_queue_t queue, dispatch_block_t block);
+
+extern dispatch_queue_t queue;
+extern dispatch_once_t *predicate;
+extern dispatch_time_t when;
+
+void test_block_expr_async() {
+  int x = 123;
+  int *p = &x;
+
+  dispatch_async(queue, ^{
+    *p = 321;
+  });
+  // expected-warning@-3 {{Address of stack memory associated with local variable 'x' \
+is captured by an asynchronously-executed block}}
+}
+
+void test_block_expr_once_no_leak() {
+  int x = 123;
+  int *p = &x;
+  // synchronous, no warning
+  dispatch_once(predicate, ^{
+    *p = 321;
+  });
+}
+
+void test_block_expr_after() {
+  int x = 123;
+  int *p = &x;
+  dispatch_after(when, queue, ^{
+    *p = 321;
+  });
+  // expected-warning@-3 {{Address of stack memory associated with local variable 'x' \
+is captured by an asynchronously-executed block}}
+}
+
+void test_block_expr_async_no_leak() {
+  int x = 123;
+  int *p = &x;
+  // no leak
+  dispatch_async(queue, ^{
+    int y = x;
+    ++y;
+  });
+}
+
+void test_block_var_async() {
+  int x = 123;
+  int *p = &x;
+  void (^b)(void) = ^void(void) {
+    *p = 1; 
+  };
+  dispatch_async(queue, b);
+  // expected-warning@-1 {{Address of stack memory associated with local variable 'x' \
+is captured by an asynchronously-executed block}}
+}
+
+void test_block_with_ref_async() {
+  int x = 123;
+  int &r = x;
+  void (^b)(void) = ^void(void) {
+    r = 1; 
+  };
+  dispatch_async(queue, b);
+  // expected-warning@-1 {{Address of stack memory associated with local variable 'x' \
+is captured by an asynchronously-executed block}}
+}
+
+dispatch_block_t get_leaking_block() {
+  int leaked_x = 791;
+  int *p = &leaked_x;
+  return ^void(void) {
+    *p = 1; 
+  };
+  // expected-warning@-3 {{Address of stack memory associated with local variable 'leaked_x' \
+is captured by a returned block}}
+}
+
+void test_returned_from_func_block_async() {
+  dispatch_async(queue, get_leaking_block());
+  // expected-warning@-1 {{Address of stack memory associated with local variable 'leaked_x' \
+is captured by an asynchronously-executed block}}
+}
+
+// synchronous, no leak
+void test_block_var_once() {
+  int x = 123;
+  int *p = &x;
+  void (^b)(void) = ^void(void) {
+    *p = 1; 
+  };
+  dispatch_once(predicate, b); // no-warning
+}
+
+void test_block_var_after() {
+  int x = 123;
+  int *p = &x;
+  void (^b)(void) = ^void(void) {
+    *p = 1; 
+  };
+  dispatch_after(when, queue, b);
+  // expected-warning@-1 {{Address of stack memory associated with local variable 'x' \
+is captured by an asynchronously-executed block}}
+}
+
+void test_block_var_async_no_leak() {
+  int x = 123;
+  int *p = &x;
+  void (^b)(void) = ^void(void) {
+    int y = x;
+    ++y; 
+  };
+  dispatch_async(queue, b); // no-warning
+}
+
+void test_block_inside_block_async_no_leak() {
+  int x = 123;
+  int *p = &x;
+  void (^inner)(void) = ^void(void) {
+    int y = x;
+    ++y; 
+  };
+  void (^outer)(void) = ^void(void) {
+    int z = x;
+    ++z;
+    inner(); 
+  };
+  dispatch_async(queue, outer); // no-warning
+}
+
+dispatch_block_t accept_and_pass_back_block(dispatch_block_t block) {
+  block();
+  return block; // no-warning
+}
+
+void test_passing_continuation_no_leak() {
+  int x = 123;
+  int *p = &x;
+  void (^cont)(void) = ^void(void) {
+    *p = 128;
+  };
+  accept_and_pass_back_block(cont); // no-warning
+}
+
+@interface NSObject
+@end
+@protocol OS_dispatch_semaphore
+@end
+typedef NSObject<OS_dispatch_semaphore> *dispatch_semaphore_t;
+dispatch_semaphore_t dispatch_semaphore_create(long value);
+long dispatch_semaphore_wait(dispatch_semaphore_t dsema, dispatch_time_t timeout);
+long dispatch_semaphore_signal(dispatch_semaphore_t dsema);
+
+void test_no_leaks_on_semaphore_pattern() {
+  int x = 0;
+  int *p = &x;
+  dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
+  dispatch_async(queue, ^{
+    *p = 1;
+    // Some work.
+    dispatch_semaphore_signal(semaphore);
+  }); // no-warning
+
+  // Do some other work concurrently with the asynchronous work
+  // Wait for the asynchronous work to finish
+  dispatch_semaphore_wait(semaphore, 1000);
+}
+
+void test_dispatch_barrier_sync() {
+  int buf[16];
+  for (int n = 0; n < 16; ++n) {
+    int *ptr = &buf[n];
+    // FIXME: Should not warn. The dispatch_barrier_sync() call ensures
+    // that the block does not outlive 'buf'.
+    dispatch_async(queue, ^{ // expected-warning{{Address of stack memory associated with local variable 'buf' is captured by an asynchronously-executed block}}
+      (void)ptr;
+    });
+  }
+  dispatch_barrier_sync(queue, ^{});
+}
diff --git a/test/Analysis/stack-capture-leak-no-arc.mm b/test/Analysis/stack-capture-leak-no-arc.mm
new file mode 100644
index 0000000..33829f5
--- /dev/null
+++ b/test/Analysis/stack-capture-leak-no-arc.mm
@@ -0,0 +1,37 @@
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,alpha.core.StackAddressAsyncEscape -fblocks -verify %s
+
+typedef struct dispatch_queue_s *dispatch_queue_t;
+typedef void (^dispatch_block_t)(void);
+void dispatch_async(dispatch_queue_t queue, dispatch_block_t block);
+extern dispatch_queue_t queue;
+
+void test_block_inside_block_async_no_leak() {
+  int x = 123;
+  int *p = &x;
+  void (^inner)(void) = ^void(void) {
+    int y = x;
+    ++y; 
+  };
+  // Block_copy(...) copies the captured block ("inner") too,
+  // there is no leak in this case.
+  dispatch_async(queue, ^void(void) {
+    int z = x;
+    ++z;
+    inner(); 
+  }); // no-warning
+}
+
+dispatch_block_t test_block_inside_block_async_leak() {
+  int x = 123;
+  void (^inner)(void) = ^void(void) {
+    int y = x;
+    ++y; 
+  };
+  void (^outer)(void) = ^void(void) {
+    int z = x;
+    ++z;
+    inner(); 
+  }; 
+  return outer; // expected-warning-re{{Address of stack-allocated block declared on line {{.+}} is captured by a returned block}}
+}
+
diff --git a/test/Analysis/string-with-signedness.c b/test/Analysis/string-with-signedness.c
new file mode 100644
index 0000000..1b00971
--- /dev/null
+++ b/test/Analysis/string-with-signedness.c
@@ -0,0 +1,10 @@
+// RUN: %clang_analyze_cc1 -Wno-incompatible-library-redeclaration -analyzer-checker=core,unix.cstring,alpha.unix.cstring -verify %s
+
+// expected-no-diagnostics
+
+void *strcpy(unsigned char *, unsigned char *);
+
+unsigned char a, b;
+void testUnsignedStrcpy() {
+  strcpy(&a, &b);
+}
diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c
index d3ca246..2717e91 100644
--- a/test/Analysis/taint-generic.c
+++ b/test/Analysis/taint-generic.c
@@ -1,10 +1,16 @@
 // RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
 
 int scanf(const char *restrict format, ...);
 int getchar(void);
 
 typedef struct _FILE FILE;
+#ifdef FILE_IS_STRUCT
+extern struct _FILE *stdin;
+#else
 extern FILE *stdin;
+#endif
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
diff --git a/test/Analysis/taint-tester.c b/test/Analysis/taint-tester.c
index 1b59e7b..3a8cc18 100644
--- a/test/Analysis/taint-tester.c
+++ b/test/Analysis/taint-tester.c
@@ -189,3 +189,10 @@
 
 }
 
+char *pointer1;
+void *pointer2;
+void noCrashTest() {
+  if (!*pointer1) {
+    __builtin___memcpy_chk(pointer2, pointer1, 0, 0); // no-crash
+  }
+}
diff --git a/test/Analysis/temp-obj-dtors-cfg-output.cpp b/test/Analysis/temp-obj-dtors-cfg-output.cpp
index 372443b..d17c5be 100644
--- a/test/Analysis/temp-obj-dtors-cfg-output.cpp
+++ b/test/Analysis/temp-obj-dtors-cfg-output.cpp
@@ -1,8 +1,23 @@
 // RUN: rm -f %t
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true -std=c++98 %s > %t 2>&1
-// RUN: FileCheck --input-file=%t -check-prefix=CXX98 -check-prefix=CHECK %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true -std=c++11 %s > %t 2>&1
-// RUN: FileCheck --input-file=%t -check-prefix=CXX11 -check-prefix=CHECK %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true,cfg-rich-constructors=false -std=c++98 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,CXX98,WARNINGS,CXX98-WARNINGS %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true,cfg-rich-constructors=false -std=c++11 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,CXX11,WARNINGS,CXX11-WARNINGS %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true,cfg-rich-constructors=true -std=c++98 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,CXX98,ANALYZER,CXX98-ANALYZER %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true,cfg-rich-constructors=true -std=c++11 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefixes=CHECK,CXX11,ANALYZER,CXX11-ANALYZER %s
+
+// This file tests how we construct two different flavors of the Clang CFG -
+// the CFG used by the Sema analysis-based warnings and the CFG used by the
+// static analyzer. The difference in the behavior is checked via FileCheck
+// prefixes (WARNINGS and ANALYZER respectively). When introducing new analyzer
+// flags, no new run lines should be added - just these flags would go to the
+// respective line depending on where is it turned on and where is it turned
+// off. Feel free to add tests that test only one of the CFG flavors if you're
+// not sure how the other flavor is supposed to work in your case.
+
+// Additionally, different C++ standards are checked.
 
 class A {
 public:
@@ -201,11 +216,13 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], [B1.4], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B1.3]
-// CHECK:     5: [B1.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B1.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B1.4] (CXXConstructExpr, [B1.7], class A)
 // CHECK:     6: ~A() (Temporary object destructor)
 // CHECK:     7: return [B1.5];
 // CHECK:     Preds (1): B2
@@ -259,11 +276,13 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], [B1.4], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B1.3]
-// CHECK:     5: [B1.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B1.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B1.4] (CXXConstructExpr, [B1.7], class A)
 // CHECK:     6: ~A() (Temporary object destructor)
 // CHECK:     7: return [B1.5];
 // CHECK:     Preds (1): B2
@@ -273,13 +292,15 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2].operator int
 // CHECK:     4: [B1.2]
 // CHECK:     5: [B1.4] (ImplicitCastExpr, UserDefinedConversion, int)
 // CHECK:     6: int([B1.5]) (CXXFunctionalCastExpr, NoOp, int)
-// CHECK:     7: B() (CXXConstructExpr, class B)
+// WARNINGS:     7: B() (CXXConstructExpr, class B)
+// ANALYZER:     7: B() (CXXConstructExpr, [B1.8], class B)
 // CHECK:     8: [B1.7] (BindTemporary)
 // CHECK:     9: [B1.8].operator int
 // CHECK:    10: [B1.8]
@@ -291,13 +312,15 @@
 // CHECK:    16: ~A() (Temporary object destructor)
 // CHECK:    17: foo
 // CHECK:    18: [B1.17] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(int))
-// CHECK:    19: A() (CXXConstructExpr, class A)
+// WARNINGS:    19: A() (CXXConstructExpr, class A)
+// ANALYZER:    19: A() (CXXConstructExpr, [B1.20], class A)
 // CHECK:    20: [B1.19] (BindTemporary)
 // CHECK:    21: [B1.20].operator int
 // CHECK:    22: [B1.20]
 // CHECK:    23: [B1.22] (ImplicitCastExpr, UserDefinedConversion, int)
 // CHECK:    24: int([B1.23]) (CXXFunctionalCastExpr, NoOp, int)
-// CHECK:    25: B() (CXXConstructExpr, class B)
+// WARNINGS:    25: B() (CXXConstructExpr, class B)
+// ANALYZER:    25: B() (CXXConstructExpr, [B1.26], class B)
 // CHECK:    26: [B1.25] (BindTemporary)
 // CHECK:    27: [B1.26].operator int
 // CHECK:    28: [B1.26]
@@ -330,7 +353,8 @@
 // CHECK:     Preds (2): B4 B5
 // CHECK:     Succs (2): B2 B1
 // CHECK:   [B4]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B4.2], class B)
 // CHECK:     2: [B4.1] (BindTemporary)
 // CHECK:     3: [B4.2].operator bool
 // CHECK:     4: [B4.2]
@@ -341,7 +365,8 @@
 // CHECK:     1: ~A() (Temporary object destructor)
 // CHECK:     2: foo
 // CHECK:     3: [B5.2] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(_Bool))
-// CHECK:     4: A() (CXXConstructExpr, class A)
+// WARNINGS:     4: A() (CXXConstructExpr, class A)
+// ANALYZER:     4: A() (CXXConstructExpr, [B5.5], class A)
 // CHECK:     5: [B5.4] (BindTemporary)
 // CHECK:     6: [B5.5].operator bool
 // CHECK:     7: [B5.5]
@@ -360,7 +385,8 @@
 // CHECK:     Preds (2): B8 B9
 // CHECK:     Succs (2): B6 B5
 // CHECK:   [B8]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B8.2], class B)
 // CHECK:     2: [B8.1] (BindTemporary)
 // CHECK:     3: [B8.2].operator bool
 // CHECK:     4: [B8.2]
@@ -368,7 +394,8 @@
 // CHECK:     Preds (1): B9
 // CHECK:     Succs (1): B7
 // CHECK:   [B9]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B9.2], class A)
 // CHECK:     2: [B9.1] (BindTemporary)
 // CHECK:     3: [B9.2].operator bool
 // CHECK:     4: [B9.2]
@@ -396,7 +423,8 @@
 // CHECK:     Preds (2): B4 B5
 // CHECK:     Succs (2): B2 B1
 // CHECK:   [B4]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B4.2], class B)
 // CHECK:     2: [B4.1] (BindTemporary)
 // CHECK:     3: [B4.2].operator bool
 // CHECK:     4: [B4.2]
@@ -407,7 +435,8 @@
 // CHECK:     1: ~A() (Temporary object destructor)
 // CHECK:     2: foo
 // CHECK:     3: [B5.2] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(_Bool))
-// CHECK:     4: A() (CXXConstructExpr, class A)
+// WARNINGS:     4: A() (CXXConstructExpr, class A)
+// ANALYZER:     4: A() (CXXConstructExpr, [B5.5], class A)
 // CHECK:     5: [B5.4] (BindTemporary)
 // CHECK:     6: [B5.5].operator bool
 // CHECK:     7: [B5.5]
@@ -426,7 +455,8 @@
 // CHECK:     Preds (2): B8 B9
 // CHECK:     Succs (2): B6 B5
 // CHECK:   [B8]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B8.2], class B)
 // CHECK:     2: [B8.1] (BindTemporary)
 // CHECK:     3: [B8.2].operator bool
 // CHECK:     4: [B8.2]
@@ -434,7 +464,8 @@
 // CHECK:     Preds (1): B9
 // CHECK:     Succs (1): B7
 // CHECK:   [B9]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B9.2], class A)
 // CHECK:     2: [B9.1] (BindTemporary)
 // CHECK:     3: [B9.2].operator bool
 // CHECK:     4: [B9.2]
@@ -467,7 +498,8 @@
 // CHECK:     Succs (1): B1
 // CHECK:   [B4]
 // CHECK:     1: ~B() (Temporary object destructor)
-// CHECK:     2: B() (CXXConstructExpr, class B)
+// WARNINGS:     2: B() (CXXConstructExpr, class B)
+// ANALYZER:     2: B() (CXXConstructExpr, [B4.3], class B)
 // CHECK:     3: [B4.2] (BindTemporary)
 // CHECK:     4: [B4.3].operator bool
 // CHECK:     5: [B4.3]
@@ -492,22 +524,26 @@
 // CHECK:     1: [B10.5] ? [B8.6] : [B9.15]
 // CHECK:     2: [B7.1] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     3: [B7.2]
-// CHECK:     4: [B7.3] (CXXConstructExpr, class A)
+// WARNINGS:     4: [B7.3] (CXXConstructExpr, class A)
+// ANALYZER:     4: [B7.3] (CXXConstructExpr, [B7.5], class A)
 // CHECK:     5: A a = B() ? A() : A(B());
 // CHECK:     T: (Temp Dtor) [B9.2]
 // CHECK:     Preds (2): B8 B9
 // CHECK:     Succs (2): B6 B5
 // CHECK:   [B8]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B8.2], [B8.4], class A)
 // CHECK:     2: [B8.1] (BindTemporary)
 // CHECK:     3: [B8.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B8.3]
-// CHECK:     5: [B8.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B8.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B8.4] (CXXConstructExpr, [B8.6], [B7.3], class A)
 // CHECK:     6: [B8.5] (BindTemporary)
 // CHECK:     Preds (1): B10
 // CHECK:     Succs (1): B7
 // CHECK:   [B9]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B9.2], class B)
 // CHECK:     2: [B9.1] (BindTemporary)
 // CHECK:     3: [B9.2].operator A
 // CHECK:     4: [B9.2]
@@ -515,17 +551,20 @@
 // CHECK:     6: [B9.5] (BindTemporary)
 // CHECK:     7: [B9.6] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     8: [B9.7]
-// CHECK:     9: [B9.8] (CXXConstructExpr, class A)
+// WARNINGS:     9: [B9.8] (CXXConstructExpr, class A)
+// ANALYZER:     9: [B9.8] (CXXConstructExpr, [B9.10], [B9.13], class A)
 // CHECK:    10: [B9.9] (BindTemporary)
 // CHECK:    11: A([B9.10]) (CXXFunctionalCastExpr, ConstructorConversion, class A)
 // CHECK:    12: [B9.11] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:    13: [B9.12]
-// CHECK:    14: [B9.13] (CXXConstructExpr, class A)
+// WARNINGS:    14: [B9.13] (CXXConstructExpr, class A)
+// ANALYZER:    14: [B9.13] (CXXConstructExpr, [B9.15], [B7.3], class A)
 // CHECK:    15: [B9.14] (BindTemporary)
 // CHECK:     Preds (1): B10
 // CHECK:     Succs (1): B7
 // CHECK:   [B10]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B10.2], class B)
 // CHECK:     2: [B10.1] (BindTemporary)
 // CHECK:     3: [B10.2].operator bool
 // CHECK:     4: [B10.2]
@@ -592,7 +631,8 @@
 // CHECK:     Preds (1): B3
 // CHECK:     Succs (1): B0
 // CHECK:   [B3]
-// CHECK:     1: C() (CXXConstructExpr, struct C)
+// WARNINGS:     1: C() (CXXConstructExpr, struct C)
+// ANALYZER:     1: C() (CXXConstructExpr, [B3.2], struct C)
 // CHECK:     2: [B3.1] (BindTemporary)
 // CHECK:     3: [B3.2].operator bool
 // CHECK:     4: [B3.2]
@@ -621,11 +661,13 @@
 // CHECK:     Preds (1): B4
 // CHECK:     Succs (1): B0
 // CHECK:   [B4]
-// CHECK:     1: C() (CXXConstructExpr, struct C)
+// WARNINGS:     1: C() (CXXConstructExpr, struct C)
+// ANALYZER:     1: C() (CXXConstructExpr, [B4.2], [B4.4], struct C)
 // CHECK:     2: [B4.1] (BindTemporary)
 // CHECK:     3: [B4.2] (ImplicitCastExpr, NoOp, const struct C)
 // CHECK:     4: [B4.3]
-// CHECK:     5: [B4.4] (CXXConstructExpr, struct C)
+// WARNINGS:     5: [B4.4] (CXXConstructExpr, struct C)
+// ANALYZER:     5: [B4.4] (CXXConstructExpr, [B4.6], struct C)
 // CHECK:     6: C c = C();
 // CHECK:     7: ~C() (Temporary object destructor)
 // CHECK:     8: c
@@ -672,18 +714,23 @@
 // CHECK:     Preds (1): B3
 // CHECK:     Succs (1): B0
 // CHECK:   [B3]
-// CHECK:     1: D() (CXXConstructExpr, struct D)
+// CXX98-WARNINGS:     1: D() (CXXConstructExpr, struct D)
+// CXX98-ANALYZER:     1: D() (CXXConstructExpr, [B3.3], struct D)
 // CXX98:     2: [B3.1] (ImplicitCastExpr, NoOp, const struct D)
 // CXX98:     3: [B3.2]
-// CXX98:     4: [B3.3] (CXXConstructExpr, struct D)
+// CXX98-WARNINGS:     4: [B3.3] (CXXConstructExpr, struct D)
+// CXX98-ANALYZER:     4: [B3.3] (CXXConstructExpr, [B3.5], struct D)
 // CXX98:     5: D d = D();
 // CXX98:     6: d
 // CXX98:     7: [B3.6].operator bool
 // CXX98:     8: [B3.6]
 // CXX98:     9: [B3.8] (ImplicitCastExpr, UserDefinedConversion, _Bool)
 // CXX98:     T: if [B3.9]
+// CXX11-WARNINGS:     1: D() (CXXConstructExpr, struct D)
+// CXX11-ANALYZER:     1: D() (CXXConstructExpr, [B3.2], struct D)
 // CXX11:     2: [B3.1]
-// CXX11:     3: [B3.2] (CXXConstructExpr, struct D)
+// CXX11-WARNINGS:     3: [B3.2] (CXXConstructExpr, struct D)
+// CXX11-ANALYZER:     3: [B3.2] (CXXConstructExpr, [B3.4], struct D)
 // CXX11:     4: D d = D();
 // CXX11:     5: d
 // CXX11:     6: [B3.5].operator bool
@@ -723,16 +770,19 @@
 // CHECK:     Preds (2): B5 B6
 // CHECK:     Succs (2): B3 B2
 // CHECK:   [B5]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B5.2], [B5.4], class A)
 // CHECK:     2: [B5.1] (BindTemporary)
 // CHECK:     3: [B5.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B5.3]
-// CHECK:     5: [B5.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B5.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B5.4] (CXXConstructExpr, [B5.6], [B4.3], class A)
 // CHECK:     6: [B5.5] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
 // CHECK:   [B6]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B6.2], class B)
 // CHECK:     2: [B6.1] (BindTemporary)
 // CHECK:     3: [B6.2].operator A
 // CHECK:     4: [B6.2]
@@ -740,12 +790,14 @@
 // CHECK:     6: [B6.5] (BindTemporary)
 // CHECK:     7: [B6.6] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     8: [B6.7]
-// CHECK:     9: [B6.8] (CXXConstructExpr, class A)
+// WARNINGS:     9: [B6.8] (CXXConstructExpr, class A)
+// ANALYZER:     9: [B6.8] (CXXConstructExpr, [B6.10], [B6.13], class A)
 // CHECK:    10: [B6.9] (BindTemporary)
 // CHECK:    11: A([B6.10]) (CXXFunctionalCastExpr, ConstructorConversion, class A)
 // CHECK:    12: [B6.11] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:    13: [B6.12]
-// CHECK:    14: [B6.13] (CXXConstructExpr, class A)
+// WARNINGS:    14: [B6.13] (CXXConstructExpr, class A)
+// ANALYZER:    14: [B6.13] (CXXConstructExpr, [B6.15], [B4.3], class A)
 // CHECK:    15: [B6.14] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
@@ -753,7 +805,8 @@
 // CHECK:     1: ~B() (Temporary object destructor)
 // CHECK:     2: foo
 // CHECK:     3: [B7.2] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(const class A &))
-// CHECK:     4: B() (CXXConstructExpr, class B)
+// WARNINGS:     4: B() (CXXConstructExpr, class B)
+// ANALYZER:     4: B() (CXXConstructExpr, [B7.5], class B)
 // CHECK:     5: [B7.4] (BindTemporary)
 // CHECK:     6: [B7.5].operator bool
 // CHECK:     7: [B7.5]
@@ -780,16 +833,19 @@
 // CHECK:     Preds (2): B11 B12
 // CHECK:     Succs (2): B9 B8
 // CHECK:   [B11]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B11.2], [B11.4], class A)
 // CHECK:     2: [B11.1] (BindTemporary)
 // CHECK:     3: [B11.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B11.3]
-// CHECK:     5: [B11.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B11.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B11.4] (CXXConstructExpr, [B11.6], [B10.3], class A)
 // CHECK:     6: [B11.5] (BindTemporary)
 // CHECK:     Preds (1): B13
 // CHECK:     Succs (1): B10
 // CHECK:   [B12]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B12.2], class B)
 // CHECK:     2: [B12.1] (BindTemporary)
 // CHECK:     3: [B12.2].operator A
 // CHECK:     4: [B12.2]
@@ -797,17 +853,20 @@
 // CHECK:     6: [B12.5] (BindTemporary)
 // CHECK:     7: [B12.6] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     8: [B12.7]
-// CHECK:     9: [B12.8] (CXXConstructExpr, class A)
+// WARNINGS:     9: [B12.8] (CXXConstructExpr, class A)
+// ANALYZER:     9: [B12.8] (CXXConstructExpr, [B12.10], [B12.13], class A)
 // CHECK:    10: [B12.9] (BindTemporary)
 // CHECK:    11: A([B12.10]) (CXXFunctionalCastExpr, ConstructorConversion, class A)
 // CHECK:    12: [B12.11] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:    13: [B12.12]
-// CHECK:    14: [B12.13] (CXXConstructExpr, class A)
+// WARNINGS:    14: [B12.13] (CXXConstructExpr, class A)
+// ANALYZER:    14: [B12.13] (CXXConstructExpr, [B12.15], [B10.3], class A)
 // CHECK:    15: [B12.14] (BindTemporary)
 // CHECK:     Preds (1): B13
 // CHECK:     Succs (1): B10
 // CHECK:   [B13]
-// CHECK:     1: B() (CXXConstructExpr, class B)
+// WARNINGS:     1: B() (CXXConstructExpr, class B)
+// ANALYZER:     1: B() (CXXConstructExpr, [B13.2], class B)
 // CHECK:     2: [B13.1] (BindTemporary)
 // CHECK:     3: [B13.2].operator bool
 // CHECK:     4: [B13.2]
@@ -834,37 +893,53 @@
 // CHECK:     Preds (1): B4
 // CHECK:     Succs (1): B1
 // CHECK:   [B4]
-// CHECK:     1: [B7.2] ?: [B6.6]
+// CXX98:     1: [B7.2] ?: [B6.6]
+// CXX11:     1: [B7.3] ?: [B6.6]
 // CHECK:     2: [B4.1] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     3: [B4.2]
-// CHECK:     4: [B4.3] (CXXConstructExpr, class A)
+// WARNINGS:     4: [B4.3] (CXXConstructExpr, class A)
+// ANALYZER:     4: [B4.3] (CXXConstructExpr, [B4.5], class A)
 // CHECK:     5: A a = A() ?: A();
 // CHECK:     T: (Temp Dtor) [B6.2]
 // CHECK:     Preds (2): B5 B6
 // CHECK:     Succs (2): B3 B2
 // CHECK:   [B5]
-// CHECK:     1: [B7.2] (ImplicitCastExpr, NoOp, const class A)
-// CHECK:     2: [B5.1]
-// CHECK:     3: [B5.2] (CXXConstructExpr, class A)
-// CHECK:     4: [B5.3] (BindTemporary)
+// CXX98:     1: [B7.2] (ImplicitCastExpr, NoOp, const class A)
+// CXX98:     2: [B5.1]
+// WARNINGS-CXX98:     3: [B5.2] (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     3: [B5.2] (CXXConstructExpr, [B5.4], class A)
+// CXX98:     4: [B5.3] (BindTemporary)
+// CXX11:     1: [B7.3] (ImplicitCastExpr, NoOp, const class A)
+// WARNINGS-CXX11:     2: [B5.1] (CXXConstructExpr, class A)
+// ANALYZER-CXX11:     2: [B5.1] (CXXConstructExpr, [B5.3], class A)
+// CXX11:     3: [B5.2] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
 // CHECK:   [B6]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B6.2], [B6.4], class A)
 // CHECK:     2: [B6.1] (BindTemporary)
 // CHECK:     3: [B6.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B6.3]
-// CHECK:     5: [B6.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B6.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B6.4] (CXXConstructExpr, [B6.6], class A)
 // CHECK:     6: [B6.5] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
 // CHECK:   [B7]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     1: A() (CXXConstructExpr, [B7.2], [B7.3], class A)
+// ANALYZER-CXX11:     1: A() (CXXConstructExpr, [B7.2], class A)
 // CHECK:     2: [B7.1] (BindTemporary)
-// CHECK:     3: [B7.2].operator bool
-// CHECK:     4: [B7.2]
-// CHECK:     5: [B7.4] (ImplicitCastExpr, UserDefinedConversion, _Bool)
-// CHECK:     T: [B7.5] ? ... : ...
+// CXX98:     3: [B7.2].operator bool
+// CXX98:     4: [B7.2]
+// CXX98:     5: [B7.4] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX98:     T: [B7.5] ? ... : ...
+// CXX11:     3: [B7.2]
+// CXX11:     4: [B7.3].operator bool
+// CXX11:     5: [B7.3]
+// CXX11:     6: [B7.5] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX11:     T: [B7.6] ? ... : ...
 // CHECK:     Preds (1): B8
 // CHECK:     Succs (2): B5 B6
 // CHECK:   [B0 (EXIT)]
@@ -886,7 +961,8 @@
 // CHECK:     Preds (1): B4
 // CHECK:     Succs (1): B1
 // CHECK:   [B4]
-// CHECK:     1: [B7.4] ?: [B6.6]
+// CXX98:     1: [B7.4] ?: [B6.6]
+// CXX11:     1: [B7.5] ?: [B6.6]
 // CHECK:     2: [B4.1] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     3: [B4.2]
 // CHECK:     4: [B7.2]([B4.3])
@@ -894,30 +970,44 @@
 // CHECK:     Preds (2): B5 B6
 // CHECK:     Succs (2): B3 B2
 // CHECK:   [B5]
-// CHECK:     1: [B7.4] (ImplicitCastExpr, NoOp, const class A)
-// CHECK:     2: [B5.1]
-// CHECK:     3: [B5.2] (CXXConstructExpr, class A)
-// CHECK:     4: [B5.3] (BindTemporary)
+// CXX98:     1: [B7.4] (ImplicitCastExpr, NoOp, const class A)
+// CXX98:     2: [B5.1]
+// WARNINGS-CXX98:     3: [B5.2] (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     3: [B5.2] (CXXConstructExpr, [B5.4], class A)
+// CXX98:     4: [B5.3] (BindTemporary)
+// CXX11:     1: [B7.5] (ImplicitCastExpr, NoOp, const class A)
+// WARNINGS-CXX11:     2: [B5.1] (CXXConstructExpr, class A)
+// ANALYZER-CXX11:     2: [B5.1] (CXXConstructExpr, [B5.3], class A)
+// CXX11:     3: [B5.2] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
 // CHECK:   [B6]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B6.2], [B6.4], class A)
 // CHECK:     2: [B6.1] (BindTemporary)
 // CHECK:     3: [B6.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B6.3]
-// CHECK:     5: [B6.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B6.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B6.4] (CXXConstructExpr, [B6.6], class A)
 // CHECK:     6: [B6.5] (BindTemporary)
 // CHECK:     Preds (1): B7
 // CHECK:     Succs (1): B4
 // CHECK:   [B7]
 // CHECK:     1: foo
 // CHECK:     2: [B7.1] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(const class A &))
-// CHECK:     3: A() (CXXConstructExpr, class A)
+// WARNINGS:     3: A() (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     3: A() (CXXConstructExpr, [B7.4], class A)
+// ANALYZER-CXX11:     3: A() (CXXConstructExpr, class A)
 // CHECK:     4: [B7.3] (BindTemporary)
-// CHECK:     5: [B7.4].operator bool
-// CHECK:     6: [B7.4]
-// CHECK:     7: [B7.6] (ImplicitCastExpr, UserDefinedConversion, _Bool)
-// CHECK:     T: [B7.7] ? ... : ...
+// CXX98:     5: [B7.4].operator bool
+// CXX98:     6: [B7.4]
+// CXX98:     7: [B7.6] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX98:     T: [B7.7] ? ... : ...
+// CXX11:     5: [B7.4]
+// CXX11:     6: [B7.5].operator bool
+// CXX11:     7: [B7.5]
+// CXX11:     8: [B7.7] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX11:     T: [B7.8] ? ... : ...
 // CHECK:     Preds (2): B8 B9
 // CHECK:     Succs (2): B5 B6
 // CHECK:   [B8]
@@ -925,7 +1015,8 @@
 // CHECK:     Preds (1): B9
 // CHECK:     Succs (1): B7
 // CHECK:   [B9]
-// CHECK:     1: [B12.2] ?: [B11.6]
+// CXX98:     1: [B12.2] ?: [B11.6]
+// CXX11:     1: [B12.3] ?: [B11.6]
 // CHECK:     2: [B9.1] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     3: [B9.2]
 // CHECK:     4: const A &a = A() ?: A();
@@ -933,28 +1024,42 @@
 // CHECK:     Preds (2): B10 B11
 // CHECK:     Succs (2): B8 B7
 // CHECK:   [B10]
-// CHECK:     1: [B12.2] (ImplicitCastExpr, NoOp, const class A)
-// CHECK:     2: [B10.1]
-// CHECK:     3: [B10.2] (CXXConstructExpr, class A)
-// CHECK:     4: [B10.3] (BindTemporary)
+// CXX98:     1: [B12.2] (ImplicitCastExpr, NoOp, const class A)
+// CXX98:     2: [B10.1]
+// WARNINGS-CXX98:     3: [B10.2] (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     3: [B10.2] (CXXConstructExpr, [B10.4], class A)
+// CXX98:     4: [B10.3] (BindTemporary)
+// CXX11:     1: [B12.3] (ImplicitCastExpr, NoOp, const class A)
+// WARNINGS-CXX11:     2: [B10.1] (CXXConstructExpr, class A)
+// ANALYZER-CXX11:     2: [B10.1] (CXXConstructExpr, [B10.3], class A)
+// CXX11:     3: [B10.2] (BindTemporary)
 // CHECK:     Preds (1): B12
 // CHECK:     Succs (1): B9
 // CHECK:   [B11]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS-CHECK:     1: A() (CXXConstructExpr, class A)
+// ANALYZER-CHECK:     1: A() (CXXConstructExpr, [B11.2], class A)
 // CHECK:     2: [B11.1] (BindTemporary)
 // CHECK:     3: [B11.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B11.3]
-// CHECK:     5: [B11.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B11.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B11.4] (CXXConstructExpr, [B11.6], class A)
 // CHECK:     6: [B11.5] (BindTemporary)
 // CHECK:     Preds (1): B12
 // CHECK:     Succs (1): B9
 // CHECK:   [B12]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER-CXX98:     1: A() (CXXConstructExpr, [B12.2], [B12.3], class A)
+// ANALYZER-CXX11:     1: A() (CXXConstructExpr, [B12.2], class A)
 // CHECK:     2: [B12.1] (BindTemporary)
-// CHECK:     3: [B12.2].operator bool
-// CHECK:     4: [B12.2]
-// CHECK:     5: [B12.4] (ImplicitCastExpr, UserDefinedConversion, _Bool)
-// CHECK:     T: [B12.5] ? ... : ...
+// CXX98:     3: [B12.2].operator bool
+// CXX98:     4: [B12.2]
+// CXX98:     5: [B12.4] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX98:     T: [B12.5] ? ... : ...
+// CXX11:     3: [B12.2]
+// CXX11:     4: [B12.3].operator bool
+// CXX11:     5: [B12.3]
+// CXX11:     6: [B12.5] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX11:     T: [B12.6] ? ... : ...
 // CHECK:     Preds (1): B13
 // CHECK:     Succs (2): B10 B11
 // CHECK:   [B0 (EXIT)]
@@ -962,11 +1067,13 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], [B1.4], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B1.3]
-// CHECK:     5: [B1.4] (CXXConstructExpr, class A)
+// WARNINGS:     5: [B1.4] (CXXConstructExpr, class A)
+// ANALYZER:     5: [B1.4] (CXXConstructExpr, [B1.6], class A)
 // CHECK:     6: A a = A();
 // CHECK:     7: ~A() (Temporary object destructor)
 // CHECK:     8: int b;
@@ -978,14 +1085,16 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], [B1.4], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     4: [B1.3]
 // CHECK:     5: const A &a = A();
 // CHECK:     6: foo
 // CHECK:     7: [B1.6] (ImplicitCastExpr, FunctionToPointerDecay, void (*)(const class A &))
-// CHECK:     8: A() (CXXConstructExpr, class A)
+// WARNINGS:     8: A() (CXXConstructExpr, class A)
+// ANALYZER:     8: A() (CXXConstructExpr, [B1.9], [B1.11], class A)
 // CHECK:     9: [B1.8] (BindTemporary)
 // CHECK:    10: [B1.9] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:    11: [B1.10]
@@ -1006,7 +1115,8 @@
 // CHECK:     4: [B1.3] (BindTemporary)
 // CHECK:     5: [B1.4] (ImplicitCastExpr, NoOp, const class A)
 // CHECK:     6: [B1.5]
-// CHECK:     7: [B1.6] (CXXConstructExpr, class A)
+// WARNINGS:     7: [B1.6] (CXXConstructExpr, class A)
+// ANALYZER:     7: [B1.6] (CXXConstructExpr, [B1.8], class A)
 // CHECK:     8: A a = A::make();
 // CHECK:     9: ~A() (Temporary object destructor)
 // CHECK:    10: int b;
@@ -1045,7 +1155,8 @@
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
 // CHECK:     1: int a;
-// CHECK:     2: A() (CXXConstructExpr, class A)
+// WARNINGS:     2: A() (CXXConstructExpr, class A)
+// ANALYZER:     2: A() (CXXConstructExpr, [B1.3], class A)
 // CHECK:     3: [B1.2] (BindTemporary)
 // CHECK:     4: [B1.3].operator int
 // CHECK:     5: [B1.3]
@@ -1061,13 +1172,15 @@
 // CHECK:   [B2 (ENTRY)]
 // CHECK:     Succs (1): B1
 // CHECK:   [B1]
-// CHECK:     1: A() (CXXConstructExpr, class A)
+// WARNINGS:     1: A() (CXXConstructExpr, class A)
+// ANALYZER:     1: A() (CXXConstructExpr, [B1.2], class A)
 // CHECK:     2: [B1.1] (BindTemporary)
 // CHECK:     3: [B1.2].operator int
 // CHECK:     4: [B1.2]
 // CHECK:     5: [B1.4] (ImplicitCastExpr, UserDefinedConversion, int)
 // CHECK:     6: int([B1.5]) (CXXFunctionalCastExpr, NoOp, int)
-// CHECK:     7: B() (CXXConstructExpr, class B)
+// WARNINGS:     7: B() (CXXConstructExpr, class B)
+// ANALYZER:     7: B() (CXXConstructExpr, [B1.8], class B)
 // CHECK:     8: [B1.7] (BindTemporary)
 // CHECK:     9: [B1.8].operator int
 // CHECK:    10: [B1.8]
@@ -1091,7 +1204,9 @@
 // CHECK:     Succs (1): B0
 // CHECK:   [B2 (NORETURN)]
 // CHECK:     1: int a;
-// CHECK:     2: NoReturn() (CXXConstructExpr, class NoReturn)
+// WARNINGS:     2: NoReturn() (CXXConstructExpr, class NoReturn)
+// ANALYZER-CXX98:     2: NoReturn() (CXXConstructExpr, [B2.3], [B2.4], class NoReturn)
+// ANALYZER-CXX11:     2: NoReturn() (CXXConstructExpr, [B2.3], class NoReturn)
 // CHECK:     3: [B2.2] (BindTemporary)
 // CHECK:     [[MEMBER:[45]]]: [B2.{{[34]}}].f
 // CHECK:     {{[56]}}: [B2.[[MEMBER]]]()
@@ -1108,7 +1223,8 @@
 // CHECK:     Succs (1): B0
 // CHECK:   [B2 (NORETURN)]
 // CHECK:     1: int a;
-// CHECK:     2: NoReturn() (CXXConstructExpr, class NoReturn)
+// WARNINGS:     2: NoReturn() (CXXConstructExpr, class NoReturn)
+// ANALYZER:     2: NoReturn() (CXXConstructExpr, [B2.3], class NoReturn)
 // CHECK:     3: [B2.2] (BindTemporary)
 // CHECK:     4: 47
 // CHECK:     5: ... , [B2.4]
@@ -1145,7 +1261,8 @@
 // CHECK:   [B6]
 // CHECK:     1: check
 // CHECK:     2: [B6.1] (ImplicitCastExpr, FunctionToPointerDecay, _Bool (*)(const class NoReturn &))
-// CHECK:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// WARNINGS:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// ANALYZER:     3: NoReturn() (CXXConstructExpr, [B6.4], [B6.6], class NoReturn)
 // CHECK:     4: [B6.3] (BindTemporary)
 // CHECK:     5: [B6.4] (ImplicitCastExpr, NoOp, const class NoReturn)
 // CHECK:     6: [B6.5]
@@ -1195,7 +1312,8 @@
 // CHECK:   [B6]
 // CHECK:     1: check
 // CHECK:     2: [B6.1] (ImplicitCastExpr, FunctionToPointerDecay, _Bool (*)(const class NoReturn &))
-// CHECK:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// WARNINGS:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// ANALYZER:     3: NoReturn() (CXXConstructExpr, [B6.4], [B6.6], class NoReturn)
 // CHECK:     4: [B6.3] (BindTemporary)
 // CHECK:     5: [B6.4] (ImplicitCastExpr, NoOp, const class NoReturn)
 // CHECK:     6: [B6.5]
@@ -1252,7 +1370,8 @@
 // CHECK:   [B6]
 // CHECK:     1: check
 // CHECK:     2: [B6.1] (ImplicitCastExpr, FunctionToPointerDecay, _Bool (*)(const class NoReturn &))
-// CHECK:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// WARNINGS:     3: NoReturn() (CXXConstructExpr, class NoReturn)
+// ANALYZER:     3: NoReturn() (CXXConstructExpr, [B6.4], [B6.6], class NoReturn)
 // CHECK:     4: [B6.3] (BindTemporary)
 // CHECK:     5: [B6.4] (ImplicitCastExpr, NoOp, const class NoReturn)
 // CHECK:     6: [B6.5]
diff --git a/test/Analysis/temp-obj-dtors-option.cpp b/test/Analysis/temp-obj-dtors-option.cpp
new file mode 100644
index 0000000..49577cd
--- /dev/null
+++ b/test/Analysis/temp-obj-dtors-option.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=true,c++-temp-dtor-inlining=false -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=true,c++-temp-dtor-inlining=true -DINLINE -verify %s
+
+void clang_analyzer_eval(bool);
+
+struct S {
+  int &x;
+
+  S(int &x) : x(x) { ++x; }
+  ~S() { --x; }
+};
+
+void foo() {
+  int x = 0;
+  S(x).x += 1;
+  clang_analyzer_eval(x == 1);
+#ifdef INLINE
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+}
diff --git a/test/Analysis/temporaries.cpp b/test/Analysis/temporaries.cpp
index 99851dd..dce4858 100644
--- a/test/Analysis/temporaries.cpp
+++ b/test/Analysis/temporaries.cpp
@@ -1,9 +1,12 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify -w -std=c++03 %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify -w -std=c++11 %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -DTEMPORARY_DTORS -verify -w -analyzer-config cfg-temporary-dtors=true %s -std=c++11
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=false -verify -w -std=c++03 %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config cfg-temporary-dtors=false -verify -w -std=c++11 %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -DTEMPORARY_DTORS -verify -w -analyzer-config cfg-temporary-dtors=true,c++-temp-dtor-inlining=true %s -std=c++11
 
 extern bool clang_analyzer_eval(bool);
 extern bool clang_analyzer_warnIfReached();
+void clang_analyzer_checkInlined(bool);
+
+#include "Inputs/system-header-simulator-cxx.h";
 
 struct Trivial {
   Trivial(int x) : value(x) {}
@@ -422,6 +425,10 @@
   struct CtorWithNoReturnDtor {
     CtorWithNoReturnDtor() = default;
 
+    CtorWithNoReturnDtor(int x) {
+      clang_analyzer_checkInlined(false); // no-warning
+    }
+
     ~CtorWithNoReturnDtor() __attribute__((noreturn));
   };
 
@@ -434,11 +441,17 @@
     const CtorWithNoReturnDtor &c = CtorWithNoReturnDtor();
 
     // This represents an (expected) loss of coverage, since the destructor
-    // of the lifetime-exended temporary is executed at at the end of
+    // of the lifetime-exended temporary is executed at the end of
     // scope.
     clang_analyzer_warnIfReached();  // no-warning
   }
 
+#if __cplusplus >= 201103L
+  CtorWithNoReturnDtor returnNoReturnDtor() {
+    return {1}; // no-crash
+  }
+#endif
+
 #endif // TEMPORARY_DTORS
 }
 
@@ -530,3 +543,400 @@
   Sub(i).m();
 }
 }
+
+namespace test_return_temporary {
+class C {
+  int x, y;
+
+public:
+  C(int x, int y) : x(x), y(y) {}
+  int getX() const { return x; }
+  int getY() const { return y; }
+  ~C() {}
+};
+
+class D: public C {
+public:
+  D() : C(1, 2) {}
+  D(const D &d): C(d.getX(), d.getY()) {}
+};
+
+C returnTemporaryWithVariable() { C c(1, 2); return c; }
+C returnTemporaryWithAnotherFunctionWithVariable() {
+  return returnTemporaryWithVariable();
+}
+C returnTemporaryWithCopyConstructionWithVariable() {
+  return C(returnTemporaryWithVariable());
+}
+
+C returnTemporaryWithConstruction() { return C(1, 2); }
+C returnTemporaryWithAnotherFunctionWithConstruction() {
+  return returnTemporaryWithConstruction();
+}
+C returnTemporaryWithCopyConstructionWithConstruction() {
+  return C(returnTemporaryWithConstruction());
+}
+
+D returnTemporaryWithVariableAndNonTrivialCopy() { D d; return d; }
+D returnTemporaryWithAnotherFunctionWithVariableAndNonTrivialCopy() {
+  return returnTemporaryWithVariableAndNonTrivialCopy();
+}
+D returnTemporaryWithCopyConstructionWithVariableAndNonTrivialCopy() {
+  return D(returnTemporaryWithVariableAndNonTrivialCopy());
+}
+
+#if __cplusplus >= 201103L
+C returnTemporaryWithBraces() { return {1, 2}; }
+C returnTemporaryWithAnotherFunctionWithBraces() {
+  return returnTemporaryWithBraces();
+}
+C returnTemporaryWithCopyConstructionWithBraces() {
+  return C(returnTemporaryWithBraces());
+}
+#endif // C++11
+
+void test() {
+  C c1 = returnTemporaryWithVariable();
+  clang_analyzer_eval(c1.getX() == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(c1.getY() == 2); // expected-warning{{TRUE}}
+
+  C c2 = returnTemporaryWithAnotherFunctionWithVariable();
+  clang_analyzer_eval(c2.getX() == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(c2.getY() == 2); // expected-warning{{TRUE}}
+
+  C c3 = returnTemporaryWithCopyConstructionWithVariable();
+  clang_analyzer_eval(c3.getX() == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(c3.getY() == 2); // expected-warning{{TRUE}}
+
+  C c4 = returnTemporaryWithConstruction();
+  clang_analyzer_eval(c4.getX() == 1);
+  clang_analyzer_eval(c4.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  C c5 = returnTemporaryWithAnotherFunctionWithConstruction();
+  clang_analyzer_eval(c5.getX() == 1);
+  clang_analyzer_eval(c5.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  C c6 = returnTemporaryWithCopyConstructionWithConstruction();
+  clang_analyzer_eval(c5.getX() == 1);
+  clang_analyzer_eval(c5.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+#if __cplusplus >= 201103L
+
+  C c7 = returnTemporaryWithBraces();
+  clang_analyzer_eval(c7.getX() == 1);
+  clang_analyzer_eval(c7.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  C c8 = returnTemporaryWithAnotherFunctionWithBraces();
+  clang_analyzer_eval(c8.getX() == 1);
+  clang_analyzer_eval(c8.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  C c9 = returnTemporaryWithCopyConstructionWithBraces();
+  clang_analyzer_eval(c9.getX() == 1);
+  clang_analyzer_eval(c9.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+#endif // C++11
+
+  D d1 = returnTemporaryWithVariableAndNonTrivialCopy();
+  clang_analyzer_eval(d1.getX() == 1);
+  clang_analyzer_eval(d1.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  D d2 = returnTemporaryWithAnotherFunctionWithVariableAndNonTrivialCopy();
+  clang_analyzer_eval(d2.getX() == 1);
+  clang_analyzer_eval(d2.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+
+  D d3 = returnTemporaryWithCopyConstructionWithVariableAndNonTrivialCopy();
+  clang_analyzer_eval(d3.getX() == 1);
+  clang_analyzer_eval(d3.getY() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+}
+} // namespace test_return_temporary
+
+
+namespace test_temporary_object_expr_without_dtor {
+class C {
+  int x;
+public:
+  C(int x) : x(x) {}
+  int getX() const { return x; }
+};
+
+void test() {
+  clang_analyzer_eval(C(3).getX() == 3); // expected-warning{{TRUE}}
+};
+}
+
+namespace test_temporary_object_expr_with_dtor {
+class C {
+  int x;
+
+public:
+  C(int x) : x(x) {}
+  ~C() {}
+  int getX() const { return x; }
+};
+
+void test(int coin) {
+  clang_analyzer_eval(C(3).getX() == 3);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+
+  const C &c1 = coin ? C(1) : C(2);
+  if (coin) {
+    clang_analyzer_eval(c1.getX() == 1);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+  } else {
+    clang_analyzer_eval(c1.getX() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+  }
+
+  C c2 = coin ? C(1) : C(2);
+  if (coin) {
+    clang_analyzer_eval(c2.getX() == 1);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+  } else {
+    clang_analyzer_eval(c2.getX() == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-2{{TRUE}}
+#else
+  // expected-warning@-4{{UNKNOWN}}
+#endif
+  }
+}
+
+} // namespace test_temporary_object_expr
+
+namespace test_match_constructors_and_destructors {
+class C {
+public:
+  int &x, &y;
+  C(int &_x, int &_y) : x(_x), y(_y) { ++x; }
+  C(const C &c): x(c.x), y(c.y) { ++x; }
+  ~C() { ++y; }
+};
+
+void test_simple_temporary() {
+  int x = 0, y = 0;
+  {
+    const C &c = C(x, y);
+  }
+  // One constructor and one destructor.
+  clang_analyzer_eval(x == 1);
+  clang_analyzer_eval(y == 1);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+}
+
+void test_simple_temporary_with_copy() {
+  int x = 0, y = 0;
+  {
+    C c = C(x, y);
+  }
+  // Two constructors (temporary object expr and copy) and two destructors.
+  clang_analyzer_eval(x == 2);
+  clang_analyzer_eval(y == 2);
+#ifdef TEMPORARY_DTORS
+  // expected-warning@-3{{TRUE}}
+  // expected-warning@-3{{TRUE}}
+#else
+  // expected-warning@-6{{UNKNOWN}}
+  // expected-warning@-6{{UNKNOWN}}
+#endif
+}
+
+void test_ternary_temporary(int coin) {
+  int x = 0, y = 0, z = 0, w = 0;
+  {
+    const C &c = coin ? C(x, y) : C(z, w);
+  }
+  // This time each branch contains an additional elidable copy constructor.
+  if (coin) {
+    clang_analyzer_eval(x == 2);
+    clang_analyzer_eval(y == 2);
+#ifdef TEMPORARY_DTORS
+    // expected-warning@-3{{TRUE}}
+    // expected-warning@-3{{TRUE}}
+#else
+    // expected-warning@-6{{UNKNOWN}}
+    // expected-warning@-6{{UNKNOWN}}
+#endif
+    clang_analyzer_eval(z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(w == 0); // expected-warning{{TRUE}}
+
+  } else {
+    clang_analyzer_eval(x == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(z == 2);
+    clang_analyzer_eval(w == 2);
+#ifdef TEMPORARY_DTORS
+    // expected-warning@-3{{TRUE}}
+    // expected-warning@-3{{TRUE}}
+#else
+    // expected-warning@-6{{UNKNOWN}}
+    // expected-warning@-6{{UNKNOWN}}
+#endif
+  }
+}
+
+void test_ternary_temporary_with_copy(int coin) {
+  int x = 0, y = 0, z = 0, w = 0;
+  {
+    C c = coin ? C(x, y) : C(z, w);
+  }
+  // Temporary expression, elidable copy within branch,
+  // constructor for variable - 3 total.
+  if (coin) {
+    clang_analyzer_eval(x == 3);
+    clang_analyzer_eval(y == 3);
+#ifdef TEMPORARY_DTORS
+    // expected-warning@-3{{TRUE}}
+    // expected-warning@-3{{TRUE}}
+#else
+    // expected-warning@-6{{UNKNOWN}}
+    // expected-warning@-6{{UNKNOWN}}
+#endif
+    clang_analyzer_eval(z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(w == 0); // expected-warning{{TRUE}}
+
+  } else {
+    clang_analyzer_eval(x == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(z == 3);
+    clang_analyzer_eval(w == 3);
+#ifdef TEMPORARY_DTORS
+    // expected-warning@-3{{TRUE}}
+    // expected-warning@-3{{TRUE}}
+#else
+    // expected-warning@-6{{UNKNOWN}}
+    // expected-warning@-6{{UNKNOWN}}
+#endif
+  }
+}
+} // namespace test_match_constructors_and_destructors
+
+namespace dont_forget_destructor_around_logical_op {
+int glob;
+
+class C {
+public:
+  ~C() {
+    glob = 1;
+    clang_analyzer_checkInlined(true);
+#ifdef TEMPORARY_DTORS
+    // expected-warning@-2{{TRUE}}
+#endif
+  }
+};
+
+C get();
+
+bool is(C);
+
+
+void test(int coin) {
+  // Here temporaries are being cleaned up after && is evaluated. There are two
+  // temporaries: the return value of get() and the elidable copy constructor
+  // of that return value into is(). According to the CFG, we need to cleanup
+  // both of them depending on whether the temporary corresponding to the
+  // return value of get() was initialized. However, we didn't track
+  // temporaries returned from functions, so we took the wrong branch.
+  coin && is(get()); // no-crash
+  // FIXME: Should be true once we inline all destructors.
+  clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}}
+}
+} // namespace dont_forget_destructor_around_logical_op
+
+#if __cplusplus >= 201103L
+namespace temporary_list_crash {
+class C {
+public:
+  C() {}
+  ~C() {}
+};
+
+void test() {
+  std::initializer_list<C>{C(), C()}; // no-crash
+}
+} // namespace temporary_list_crash
+#endif // C++11
diff --git a/test/Analysis/uninit-const.c b/test/Analysis/uninit-const.c
index 9a6529a..407c28a 100644
--- a/test/Analysis/uninit-const.c
+++ b/test/Analysis/uninit-const.c
@@ -1,4 +1,6 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=unix.Malloc,core,alpha.core.CallAndMessageUnInitRefArg -analyzer-output=text -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=unix.Malloc,core,alpha.core.CallAndMessageUnInitRefArg,debug.ExprInspection -analyzer-output=text -verify %s
+
+void clang_analyzer_warnIfReached();
 
 // Passing uninitialized const data to function
 #include "Inputs/system-header-simulator.h"
@@ -121,6 +123,32 @@
 
 }
 
+// https://bugs.llvm.org/show_bug.cgi?id=35419
+void f11_0(void) {
+  int x; // expected-note {{'x' declared without an initial value}}
+  x++; // expected-warning {{The expression is an uninitialized value. The computed value will also be garbage}}
+       // expected-note@-1 {{The expression is an uninitialized value. The computed value will also be garbage}}
+  clang_analyzer_warnIfReached(); // no-warning
+}
+void f11_1(void) {
+  int x; // expected-note {{'x' declared without an initial value}}
+  ++x; // expected-warning {{The expression is an uninitialized value. The computed value will also be garbage}}
+       // expected-note@-1 {{The expression is an uninitialized value. The computed value will also be garbage}}
+  clang_analyzer_warnIfReached(); // no-warning
+}
+void f11_2(void) {
+  int x; // expected-note {{'x' declared without an initial value}}
+  x--; // expected-warning {{The expression is an uninitialized value. The computed value will also be garbage}}
+       // expected-note@-1 {{The expression is an uninitialized value. The computed value will also be garbage}}
+  clang_analyzer_warnIfReached(); // no-warning
+}
+void f11_3(void) {
+  int x; // expected-note {{'x' declared without an initial value}}
+  --x; // expected-warning {{The expression is an uninitialized value. The computed value will also be garbage}}
+       // expected-note@-1 {{The expression is an uninitialized value. The computed value will also be garbage}}
+  clang_analyzer_warnIfReached(); // no-warning
+}
+
 int f_malloc_1(void) {
   int *ptr;
 
diff --git a/test/Analysis/uninit-const.cpp b/test/Analysis/uninit-const.cpp
index db969bf..f5166e6 100644
--- a/test/Analysis/uninit-const.cpp
+++ b/test/Analysis/uninit-const.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,core,alpha.core.CallAndMessageUnInitRefArg -analyzer-output=text -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,core,alpha.core.CallAndMessageUnInitRefArg -analyzer-output=text -DTEST_INLINABLE_ALLOCATORS -verify %s
 // Passing uninitialized const data to unknown function
 
 #include "Inputs/system-header-simulator-cxx.h"
@@ -52,7 +53,7 @@
 }
 
 void f6_1(void) {
-  int t;
+  int t; // expected-note{{'t' declared without an initial value}}
   int p = f6_1_sub(t); //expected-warning {{Assigned value is garbage or undefined}}
                        //expected-note@-1 {{Calling 'f6_1_sub'}}
                        //expected-note@-2 {{Returning from 'f6_1_sub'}}
diff --git a/test/Analysis/unix-fns.c b/test/Analysis/unix-fns.c
index 481f545..9126e1b 100644
--- a/test/Analysis/unix-fns.c
+++ b/test/Analysis/unix-fns.c
@@ -34,11 +34,40 @@
  struct dispatch_disk_s *_ddisk;
 } dispatch_object_t __attribute__((__transparent_union__));
 typedef void (^dispatch_block_t)(void);
-typedef long dispatch_once_t;
 typedef struct dispatch_queue_s *dispatch_queue_t;
-void dispatch_once(dispatch_once_t *predicate, dispatch_block_t block);
 void dispatch_sync(dispatch_queue_t queue, dispatch_block_t block);
 
+typedef long dispatch_once_t;
+
+extern
+__attribute__((__nonnull__))
+__attribute__((__nothrow__))
+void dispatch_once(dispatch_once_t *predicate,
+                   __attribute__((__noescape__)) dispatch_block_t block);
+
+// Inlined fast-path dispatch_once defers to the real dispatch_once
+// on the slow path.
+static
+__inline__
+__attribute__((__always_inline__))
+__attribute__((__nonnull__))
+__attribute__((__nothrow__))
+void _dispatch_once(dispatch_once_t *predicate,
+                    __attribute__((__noescape__)) dispatch_block_t block)
+{
+ if (__builtin_expect((*predicate), (~0l)) != ~0l) {
+  dispatch_once(predicate, block);
+ } else {
+  __asm__ __volatile__("" ::: "memory");
+ }
+ __builtin_assume(*predicate == ~0l);
+}
+
+// Macro so that user calls to dispatch_once() call the inlined fast-path
+// variant.
+#undef dispatch_once
+#define dispatch_once _dispatch_once
+
 #ifndef O_CREAT
 #define O_CREAT 0x0200
 #define O_RDONLY 0x0000
@@ -181,16 +210,6 @@
   }
 }
 
-// Test dispatch_once being a macro that wraps a call to _dispatch_once, which in turn
-// calls the real dispatch_once.
-
-static inline void _dispatch_once(dispatch_once_t *predicate, dispatch_block_t block)
-{
-  dispatch_once(predicate, block);
-}
-
-#define dispatch_once _dispatch_once
-
 void test_dispatch_once_in_macro() {
   dispatch_once_t pred = 0;
   dispatch_once(&pred, ^(){});  // expected-warning {{Call to 'dispatch_once' uses the local variable 'pred' for the predicate value}}
@@ -206,7 +225,7 @@
   });
 }
 
-// Test inlining if dispatch_once.
+// Test inlining of dispatch_once.
 void test_inline_dispatch_once() {
   static dispatch_once_t pred;
   int *p = 0;
@@ -215,6 +234,17 @@
   });
 }
 
+// Make sure code after call to dispatch once is reached.
+void test_inline_dispatch_once_reachable() {
+  static dispatch_once_t pred;
+  __block int *p;
+  dispatch_once(&pred, ^(void) {
+      p = 0;
+  });
+
+  *p = 7; // expected-warning {{Dereference of null pointer (loaded from variable 'p')}}
+}
+
 // CHECK: <key>diagnostics</key>
 // CHECK-NEXT: <array>
 // CHECK-NEXT:  <dict>
@@ -228,12 +258,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -241,12 +271,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -262,12 +292,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -275,12 +305,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -292,7 +322,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>53</integer>
+// CHECK-NEXT:      <key>line</key><integer>82</integer>
 // CHECK-NEXT:      <key>col</key><integer>7</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -300,12 +330,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>53</integer>
+// CHECK-NEXT:         <key>line</key><integer>82</integer>
 // CHECK-NEXT:         <key>col</key><integer>7</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>53</integer>
+// CHECK-NEXT:         <key>line</key><integer>82</integer>
 // CHECK-NEXT:         <key>col</key><integer>9</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -325,12 +355,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>53</integer>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -338,12 +368,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -359,12 +389,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -372,12 +402,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>line</key><integer>85</integer>
 // CHECK-NEXT:           <key>col</key><integer>11</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -389,7 +419,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>56</integer>
+// CHECK-NEXT:      <key>line</key><integer>85</integer>
 // CHECK-NEXT:      <key>col</key><integer>8</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -397,12 +427,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>56</integer>
+// CHECK-NEXT:         <key>line</key><integer>85</integer>
 // CHECK-NEXT:         <key>col</key><integer>19</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>56</integer>
+// CHECK-NEXT:         <key>line</key><integer>85</integer>
 // CHECK-NEXT:         <key>col</key><integer>25</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -426,7 +456,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>56</integer>
+// CHECK-NEXT:   <key>line</key><integer>85</integer>
 // CHECK-NEXT:   <key>col</key><integer>8</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -442,12 +472,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>62</integer>
+// CHECK-NEXT:           <key>line</key><integer>91</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>62</integer>
+// CHECK-NEXT:           <key>line</key><integer>91</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -455,12 +485,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -476,12 +506,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -489,12 +519,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -506,7 +536,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>64</integer>
+// CHECK-NEXT:      <key>line</key><integer>93</integer>
 // CHECK-NEXT:      <key>col</key><integer>7</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -514,12 +544,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>64</integer>
+// CHECK-NEXT:         <key>line</key><integer>93</integer>
 // CHECK-NEXT:         <key>col</key><integer>7</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>64</integer>
+// CHECK-NEXT:         <key>line</key><integer>93</integer>
 // CHECK-NEXT:         <key>col</key><integer>9</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -539,12 +569,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>64</integer>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -552,12 +582,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -573,12 +603,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -586,12 +616,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>67</integer>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
 // CHECK-NEXT:           <key>col</key><integer>13</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -603,7 +633,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>67</integer>
+// CHECK-NEXT:      <key>line</key><integer>96</integer>
 // CHECK-NEXT:      <key>col</key><integer>8</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -611,12 +641,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>67</integer>
+// CHECK-NEXT:         <key>line</key><integer>96</integer>
 // CHECK-NEXT:         <key>col</key><integer>44</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>67</integer>
+// CHECK-NEXT:         <key>line</key><integer>96</integer>
 // CHECK-NEXT:         <key>col</key><integer>50</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -640,7 +670,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>67</integer>
+// CHECK-NEXT:   <key>line</key><integer>96</integer>
 // CHECK-NEXT:   <key>col</key><integer>8</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -656,12 +686,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>73</integer>
+// CHECK-NEXT:           <key>line</key><integer>102</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>73</integer>
+// CHECK-NEXT:           <key>line</key><integer>102</integer>
 // CHECK-NEXT:           <key>col</key><integer>17</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -669,12 +699,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -690,12 +720,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -703,12 +733,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -724,12 +754,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>9</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -737,12 +767,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>52</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>74</integer>
+// CHECK-NEXT:           <key>line</key><integer>103</integer>
 // CHECK-NEXT:           <key>col</key><integer>64</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -754,7 +784,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>74</integer>
+// CHECK-NEXT:      <key>line</key><integer>103</integer>
 // CHECK-NEXT:      <key>col</key><integer>52</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -762,12 +792,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>74</integer>
+// CHECK-NEXT:         <key>line</key><integer>103</integer>
 // CHECK-NEXT:         <key>col</key><integer>66</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>74</integer>
+// CHECK-NEXT:         <key>line</key><integer>103</integer>
 // CHECK-NEXT:         <key>col</key><integer>72</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -791,7 +821,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>74</integer>
+// CHECK-NEXT:   <key>line</key><integer>103</integer>
 // CHECK-NEXT:   <key>col</key><integer>52</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -807,12 +837,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>84</integer>
+// CHECK-NEXT:           <key>line</key><integer>113</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>84</integer>
+// CHECK-NEXT:           <key>line</key><integer>113</integer>
 // CHECK-NEXT:           <key>col</key><integer>16</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -820,12 +850,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>85</integer>
+// CHECK-NEXT:           <key>line</key><integer>114</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>85</integer>
+// CHECK-NEXT:           <key>line</key><integer>114</integer>
 // CHECK-NEXT:           <key>col</key><integer>14</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -837,7 +867,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>85</integer>
+// CHECK-NEXT:      <key>line</key><integer>114</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -845,12 +875,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>85</integer>
+// CHECK-NEXT:         <key>line</key><integer>114</integer>
 // CHECK-NEXT:         <key>col</key><integer>16</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>85</integer>
+// CHECK-NEXT:         <key>line</key><integer>114</integer>
 // CHECK-NEXT:         <key>col</key><integer>20</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -874,7 +904,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>85</integer>
+// CHECK-NEXT:   <key>line</key><integer>114</integer>
 // CHECK-NEXT:   <key>col</key><integer>3</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -890,12 +920,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -903,12 +933,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>line</key><integer>123</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -920,7 +950,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>94</integer>
+// CHECK-NEXT:      <key>line</key><integer>123</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -928,12 +958,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>94</integer>
+// CHECK-NEXT:         <key>line</key><integer>123</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>94</integer>
+// CHECK-NEXT:         <key>line</key><integer>123</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -957,7 +987,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>94</integer>
+// CHECK-NEXT:   <key>line</key><integer>123</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -973,12 +1003,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -986,12 +1016,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1003,7 +1033,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>106</integer>
+// CHECK-NEXT:      <key>line</key><integer>135</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1011,12 +1041,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>106</integer>
+// CHECK-NEXT:         <key>line</key><integer>135</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>106</integer>
+// CHECK-NEXT:         <key>line</key><integer>135</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1040,7 +1070,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>106</integer>
+// CHECK-NEXT:   <key>line</key><integer>135</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1056,12 +1086,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1069,12 +1099,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1086,7 +1116,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>112</integer>
+// CHECK-NEXT:      <key>line</key><integer>141</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1094,12 +1124,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>line</key><integer>141</integer>
 // CHECK-NEXT:         <key>col</key><integer>26</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>line</key><integer>141</integer>
 // CHECK-NEXT:         <key>col</key><integer>26</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1123,7 +1153,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>112</integer>
+// CHECK-NEXT:   <key>line</key><integer>141</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1139,12 +1169,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>line</key><integer>153</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>line</key><integer>153</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1152,12 +1182,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>line</key><integer>153</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>line</key><integer>153</integer>
 // CHECK-NEXT:           <key>col</key><integer>21</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1169,7 +1199,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>124</integer>
+// CHECK-NEXT:      <key>line</key><integer>153</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1177,12 +1207,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>124</integer>
+// CHECK-NEXT:         <key>line</key><integer>153</integer>
 // CHECK-NEXT:         <key>col</key><integer>28</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>124</integer>
+// CHECK-NEXT:         <key>line</key><integer>153</integer>
 // CHECK-NEXT:         <key>col</key><integer>28</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1206,7 +1236,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>124</integer>
+// CHECK-NEXT:   <key>line</key><integer>153</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1222,12 +1252,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1235,12 +1265,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>line</key><integer>159</integer>
 // CHECK-NEXT:           <key>col</key><integer>22</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1252,7 +1282,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>130</integer>
+// CHECK-NEXT:      <key>line</key><integer>159</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1260,12 +1290,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>130</integer>
+// CHECK-NEXT:         <key>line</key><integer>159</integer>
 // CHECK-NEXT:         <key>col</key><integer>29</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>130</integer>
+// CHECK-NEXT:         <key>line</key><integer>159</integer>
 // CHECK-NEXT:         <key>col</key><integer>29</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1289,7 +1319,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>130</integer>
+// CHECK-NEXT:   <key>line</key><integer>159</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1305,12 +1335,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>line</key><integer>177</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>line</key><integer>177</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1318,12 +1348,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>line</key><integer>177</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>line</key><integer>177</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1335,7 +1365,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>148</integer>
+// CHECK-NEXT:      <key>line</key><integer>177</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1343,12 +1373,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>148</integer>
+// CHECK-NEXT:         <key>line</key><integer>177</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>148</integer>
+// CHECK-NEXT:         <key>line</key><integer>177</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1372,7 +1402,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>148</integer>
+// CHECK-NEXT:   <key>line</key><integer>177</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1388,12 +1418,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>160</integer>
+// CHECK-NEXT:           <key>line</key><integer>189</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>160</integer>
+// CHECK-NEXT:           <key>line</key><integer>189</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1401,12 +1431,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>160</integer>
+// CHECK-NEXT:           <key>line</key><integer>189</integer>
 // CHECK-NEXT:           <key>col</key><integer>16</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>160</integer>
+// CHECK-NEXT:           <key>line</key><integer>189</integer>
 // CHECK-NEXT:           <key>col</key><integer>31</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1418,7 +1448,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>160</integer>
+// CHECK-NEXT:      <key>line</key><integer>189</integer>
 // CHECK-NEXT:      <key>col</key><integer>16</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1426,12 +1456,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>160</integer>
+// CHECK-NEXT:         <key>line</key><integer>189</integer>
 // CHECK-NEXT:         <key>col</key><integer>33</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>160</integer>
+// CHECK-NEXT:         <key>line</key><integer>189</integer>
 // CHECK-NEXT:         <key>col</key><integer>33</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1455,7 +1485,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>160</integer>
+// CHECK-NEXT:   <key>line</key><integer>189</integer>
 // CHECK-NEXT:   <key>col</key><integer>16</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1471,12 +1501,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>201</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>201</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1484,12 +1514,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>201</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>line</key><integer>201</integer>
 // CHECK-NEXT:           <key>col</key><integer>20</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1501,7 +1531,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>172</integer>
+// CHECK-NEXT:      <key>line</key><integer>201</integer>
 // CHECK-NEXT:      <key>col</key><integer>15</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1509,12 +1539,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>line</key><integer>201</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>line</key><integer>201</integer>
 // CHECK-NEXT:         <key>col</key><integer>22</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1538,7 +1568,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>172</integer>
+// CHECK-NEXT:   <key>line</key><integer>201</integer>
 // CHECK-NEXT:   <key>col</key><integer>15</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1554,12 +1584,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>195</integer>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>195</integer>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
 // CHECK-NEXT:           <key>col</key><integer>17</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1567,12 +1597,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>196</integer>
+// CHECK-NEXT:           <key>line</key><integer>215</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>196</integer>
+// CHECK-NEXT:           <key>line</key><integer>215</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1584,7 +1614,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>196</integer>
+// CHECK-NEXT:      <key>line</key><integer>215</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1592,12 +1622,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>196</integer>
+// CHECK-NEXT:         <key>line</key><integer>215</integer>
 // CHECK-NEXT:         <key>col</key><integer>17</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>196</integer>
+// CHECK-NEXT:         <key>line</key><integer>215</integer>
 // CHECK-NEXT:         <key>col</key><integer>21</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1621,7 +1651,7 @@
 // CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>196</integer>
+// CHECK-NEXT:   <key>line</key><integer>215</integer>
 // CHECK-NEXT:   <key>col</key><integer>3</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -1633,7 +1663,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>201</integer>
+// CHECK-NEXT:      <key>line</key><integer>220</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1641,12 +1671,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>201</integer>
+// CHECK-NEXT:         <key>line</key><integer>220</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>201</integer>
+// CHECK-NEXT:         <key>line</key><integer>220</integer>
 // CHECK-NEXT:         <key>col</key><integer>8</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1666,12 +1696,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>201</integer>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>201</integer>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1679,12 +1709,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1700,12 +1730,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1713,12 +1743,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1730,7 +1760,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>202</integer>
+// CHECK-NEXT:      <key>line</key><integer>221</integer>
 // CHECK-NEXT:      <key>col</key><integer>24</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1738,12 +1768,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>202</integer>
+// CHECK-NEXT:         <key>line</key><integer>221</integer>
 // CHECK-NEXT:         <key>col</key><integer>24</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>206</integer>
+// CHECK-NEXT:         <key>line</key><integer>225</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1759,7 +1789,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>202</integer>
+// CHECK-NEXT:      <key>line</key><integer>221</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1767,12 +1797,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>202</integer>
+// CHECK-NEXT:         <key>line</key><integer>221</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>206</integer>
+// CHECK-NEXT:         <key>line</key><integer>225</integer>
 // CHECK-NEXT:         <key>col</key><integer>4</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1788,7 +1818,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>202</integer>
+// CHECK-NEXT:      <key>line</key><integer>221</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1796,12 +1826,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>202</integer>
+// CHECK-NEXT:         <key>line</key><integer>221</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>206</integer>
+// CHECK-NEXT:         <key>line</key><integer>225</integer>
 // CHECK-NEXT:         <key>col</key><integer>4</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1817,7 +1847,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>202</integer>
+// CHECK-NEXT:      <key>line</key><integer>221</integer>
 // CHECK-NEXT:      <key>col</key><integer>24</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1835,12 +1865,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>line</key><integer>221</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1848,12 +1878,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1869,12 +1899,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1882,12 +1912,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1899,7 +1929,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>203</integer>
+// CHECK-NEXT:      <key>line</key><integer>222</integer>
 // CHECK-NEXT:      <key>col</key><integer>8</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -1907,12 +1937,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>203</integer>
+// CHECK-NEXT:         <key>line</key><integer>222</integer>
 // CHECK-NEXT:         <key>col</key><integer>8</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>203</integer>
+// CHECK-NEXT:         <key>line</key><integer>222</integer>
 // CHECK-NEXT:         <key>col</key><integer>8</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -1932,12 +1962,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>line</key><integer>222</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1945,12 +1975,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1966,12 +1996,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1979,12 +2009,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>line</key><integer>223</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -1996,7 +2026,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>204</integer>
+// CHECK-NEXT:      <key>line</key><integer>223</integer>
 // CHECK-NEXT:      <key>col</key><integer>6</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2004,12 +2034,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>204</integer>
+// CHECK-NEXT:         <key>line</key><integer>223</integer>
 // CHECK-NEXT:         <key>col</key><integer>4</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>204</integer>
+// CHECK-NEXT:         <key>line</key><integer>223</integer>
 // CHECK-NEXT:         <key>col</key><integer>4</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2030,7 +2060,7 @@
 // CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>5d3f4c433004c7a6d4a06aa30cc3ea85</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>204</integer>
+// CHECK-NEXT:   <key>line</key><integer>223</integer>
 // CHECK-NEXT:   <key>col</key><integer>6</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
@@ -2046,12 +2076,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>211</integer>
+// CHECK-NEXT:           <key>line</key><integer>230</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>211</integer>
+// CHECK-NEXT:           <key>line</key><integer>230</integer>
 // CHECK-NEXT:           <key>col</key><integer>8</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2059,12 +2089,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>212</integer>
+// CHECK-NEXT:           <key>line</key><integer>231</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>212</integer>
+// CHECK-NEXT:           <key>line</key><integer>231</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2076,7 +2106,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>212</integer>
+// CHECK-NEXT:      <key>line</key><integer>231</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2084,12 +2114,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>212</integer>
+// CHECK-NEXT:         <key>line</key><integer>231</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>212</integer>
+// CHECK-NEXT:         <key>line</key><integer>231</integer>
 // CHECK-NEXT:         <key>col</key><integer>8</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2109,12 +2139,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>212</integer>
+// CHECK-NEXT:           <key>line</key><integer>231</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>212</integer>
+// CHECK-NEXT:           <key>line</key><integer>231</integer>
 // CHECK-NEXT:           <key>col</key><integer>5</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2122,12 +2152,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>line</key><integer>232</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>line</key><integer>232</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2139,7 +2169,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>213</integer>
+// CHECK-NEXT:      <key>line</key><integer>232</integer>
 // CHECK-NEXT:      <key>col</key><integer>24</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2147,12 +2177,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>213</integer>
+// CHECK-NEXT:         <key>line</key><integer>232</integer>
 // CHECK-NEXT:         <key>col</key><integer>24</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>215</integer>
+// CHECK-NEXT:         <key>line</key><integer>234</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2168,7 +2198,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>213</integer>
+// CHECK-NEXT:      <key>line</key><integer>232</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2176,12 +2206,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>213</integer>
+// CHECK-NEXT:         <key>line</key><integer>232</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>215</integer>
+// CHECK-NEXT:         <key>line</key><integer>234</integer>
 // CHECK-NEXT:         <key>col</key><integer>4</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2197,7 +2227,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>187</integer>
+// CHECK-NEXT:      <key>line</key><integer>50</integer>
 // CHECK-NEXT:      <key>col</key><integer>1</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2215,12 +2245,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>187</integer>
+// CHECK-NEXT:           <key>line</key><integer>50</integer>
 // CHECK-NEXT:           <key>col</key><integer>1</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>187</integer>
+// CHECK-NEXT:           <key>line</key><integer>50</integer>
 // CHECK-NEXT:           <key>col</key><integer>6</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2228,12 +2258,46 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>189</integer>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>2</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>2</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
 // CHECK-NEXT:           <key>col</key><integer>3</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>189</integer>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
 // CHECK-NEXT:           <key>col</key><integer>15</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2245,7 +2309,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>189</integer>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2253,12 +2317,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>189</integer>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>189</integer>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
 // CHECK-NEXT:         <key>col</key><integer>33</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2274,7 +2338,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>189</integer>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
 // CHECK-NEXT:      <key>col</key><integer>3</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2282,12 +2346,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>189</integer>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
 // CHECK-NEXT:         <key>col</key><integer>3</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>189</integer>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
 // CHECK-NEXT:         <key>col</key><integer>33</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2303,7 +2367,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>213</integer>
+// CHECK-NEXT:      <key>line</key><integer>232</integer>
 // CHECK-NEXT:      <key>col</key><integer>24</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2321,12 +2385,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>line</key><integer>232</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>line</key><integer>232</integer>
 // CHECK-NEXT:           <key>col</key><integer>24</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2334,12 +2398,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2355,12 +2419,12 @@
 // CHECK-NEXT:        <key>start</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>4</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2368,12 +2432,12 @@
 // CHECK-NEXT:        <key>end</key>
 // CHECK-NEXT:         <array>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
 // CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>line</key><integer>233</integer>
 // CHECK-NEXT:           <key>col</key><integer>7</integer>
 // CHECK-NEXT:           <key>file</key><integer>0</integer>
 // CHECK-NEXT:          </dict>
@@ -2385,7 +2449,7 @@
 // CHECK-NEXT:     <key>kind</key><string>event</string>
 // CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>214</integer>
+// CHECK-NEXT:      <key>line</key><integer>233</integer>
 // CHECK-NEXT:      <key>col</key><integer>7</integer>
 // CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
@@ -2393,12 +2457,12 @@
 // CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>214</integer>
+// CHECK-NEXT:         <key>line</key><integer>233</integer>
 // CHECK-NEXT:         <key>col</key><integer>5</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>214</integer>
+// CHECK-NEXT:         <key>line</key><integer>233</integer>
 // CHECK-NEXT:         <key>col</key><integer>5</integer>
 // CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
@@ -2419,9 +2483,459 @@
 // CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>265c4fd608dafee211bfa93d21c28866</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>214</integer>
+// CHECK-NEXT:   <key>line</key><integer>233</integer>
 // CHECK-NEXT:   <key>col</key><integer>7</integer>
 // CHECK-NEXT:   <key>file</key><integer>0</integer>
 // CHECK-NEXT:  </dict>
 // CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>241</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>4</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;_dispatch_once&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;_dispatch_once&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>50</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;test_inline_dispatch_once_reachable&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;test_inline_dispatch_once_reachable&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>50</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>50</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>2</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>2</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>241</integer>
+// CHECK-NEXT:      <key>col</key><integer>24</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>3</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>242</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>242</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>242</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>242</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>242</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>3</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Null pointer value stored to &apos;p&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Null pointer value stored to &apos;p&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Returning to caller</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Returning to caller</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>59</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>59</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Returning from &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Returning from &apos;dispatch_once&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>59</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>63</integer>
+// CHECK-NEXT:           <key>col</key><integer>2</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>63</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>241</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>4</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Returning from &apos;_dispatch_once&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Returning from &apos;_dispatch_once&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>245</integer>
+// CHECK-NEXT:      <key>col</key><integer>6</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>4</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>4</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Dereference of null pointer (loaded from variable &apos;p&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Dereference of null pointer (loaded from variable &apos;p&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Dereference of null pointer (loaded from variable &apos;p&apos;)</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Dereference of null pointer</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>1e83bd4361a2351df0b4e77eb3a9109b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>test_inline_dispatch_once_reachable</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>7</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>245</integer>
+// CHECK-NEXT:   <key>col</key><integer>6</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
 // CHECK-NEXT: </array>
diff --git a/test/Analysis/unreachable-code-path.c b/test/Analysis/unreachable-code-path.c
index effa4d9..95cc4ea 100644
--- a/test/Analysis/unreachable-code-path.c
+++ b/test/Analysis/unreachable-code-path.c
@@ -63,6 +63,7 @@
   if (c) return;
   if (!c) return;
   __builtin_unreachable(); // no-warning
+  __builtin_assume(0); // no-warning
 }
 
 // Compile-time constant false positives
diff --git a/test/Analysis/vector.m b/test/Analysis/vector.m
new file mode 100644
index 0000000..e74c487
--- /dev/null
+++ b/test/Analysis/vector.m
@@ -0,0 +1,61 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s
+
+typedef int __attribute__((ext_vector_type(2))) V;
+
+void clang_analyzer_warnIfReached();
+void clang_analyzer_numTimesReached();
+void clang_analyzer_eval(int);
+
+int flag;
+
+V pass_through_and_set_flag(V v) {
+  flag = 1;
+  return v;
+}
+
+V dont_crash_and_dont_split_state(V x, V y) {
+  flag = 0;
+  V z = x && pass_through_and_set_flag(y);
+  clang_analyzer_eval(flag); // expected-warning{{TRUE}}
+  // FIXME: For now we treat vector operator && as short-circuit,
+  // but in fact it is not. It should always evaluate
+  // pass_through_and_set_flag(). It should not split state.
+  // Now we also get FALSE on the other path.
+  // expected-warning@-5{{FALSE}}
+
+  // FIXME: Should be 1 since we should not split state.
+  clang_analyzer_numTimesReached(); // expected-warning{{2}}
+  return z;
+}
+
+void test_read() {
+  V x;
+  x[0] = 0;
+  x[1] = 1;
+
+  clang_analyzer_eval(x[0] == 0); // expected-warning{{TRUE}}
+}
+
+V return_vector() {
+  V z;
+  z[0] = 0;
+  z[1] = 0;
+  return z;
+}
+
+int test_vector_access() {
+  return return_vector()[0]; // no-crash no-warning
+}
+
+@interface I
+@property V v;
+@end
+
+// Do not crash on subscript operations into ObjC properties.
+int myfunc(I *i2) {
+  int out = i2.v[0]; // no-crash no-warning
+
+  // Check that the analysis continues.
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+  return out;
+}
diff --git a/test/Analysis/virtualcall.cpp b/test/Analysis/virtualcall.cpp
index c22a846..1929abf 100644
--- a/test/Analysis/virtualcall.cpp
+++ b/test/Analysis/virtualcall.cpp
@@ -262,6 +262,9 @@
 	//expected-note-re@-2 {{{{^}}Calling default constructor for 'M'}}
 #endif
   Y *y = new Y;
+#if !PUREONLY
+  //expected-note-re@-2 {{{{^}}Calling default constructor for 'Y'}}
+#endif
   delete y;
   header::Z z;
 #if !PUREONLY
diff --git a/test/Analysis/yaccignore.c b/test/Analysis/yaccignore.c
new file mode 100644
index 0000000..c9edfad
--- /dev/null
+++ b/test/Analysis/yaccignore.c
@@ -0,0 +1,13 @@
+/* A Bison parser, made by GNU Bison 1.875.  */
+
+// RUN: rm -rf %t.plist
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-output=plist -o %t.plist -verify %s
+// RUN: FileCheck --input-file=%t.plist %s
+
+// expected-no-diagnostics
+int foo() {
+  int *x = 0;
+  return *x; // no-warning
+}
+
+// CHECK:   <key>diagnostics</key>
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c1ac9e4..8efe360 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -96,6 +96,7 @@
     llvm-bcanalyzer
     llvm-cat
     llvm-dis
+    llvm-lto2
     llvm-modextract
     llvm-nm
     llvm-objdump
@@ -131,3 +132,12 @@
 add_custom_target(clang-test)
 add_dependencies(clang-test check-clang)
 set_target_properties(clang-test PROPERTIES FOLDER "Clang tests")
+
+# FIXME: This logic can be removed once all buildbots have moved
+# debuginfo-test from clang/test to llvm/projects or monorepo.
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests)
+  message(WARNING "Including debuginfo-tests in clang/test is deprecated.  Move to llvm/projects or use monorepo.")
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests/CMakeLists.txt)
+    add_subdirectory(debuginfo-tests)
+  endif()
+endif()
diff --git a/test/CXX/concepts-ts/dcl.dcl/lit.cfg.py b/test/CXX/concepts-ts/dcl.dcl/lit.cfg.py
new file mode 100644
index 0000000..c705e3c
--- /dev/null
+++ b/test/CXX/concepts-ts/dcl.dcl/lit.cfg.py
@@ -0,0 +1,26 @@
+# -*- Python -*-
+
+import os
+import lit.formats
+
+from lit.llvm import llvm_config
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'Clang-Concepts-TS-Unsupported'
+
+# testFormat: The test format to use to interpret tests.
+#
+# For now we require '&&' between commands, until they get globally killed and
+# the test runner updated.
+config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.cppm', '.m', '.mm', '.cu',
+                   '.ll', '.cl', '.s', '.S', '.modulemap', '.test', '.rs']
+
+config.unsupported = True
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
diff --git a/test/CXX/dcl.dcl/dcl.attr/dcl.attr.depend/p1.cpp b/test/CXX/dcl.dcl/dcl.attr/dcl.attr.depend/p1.cpp
index 9f7ef3a..6cf27af 100644
--- a/test/CXX/dcl.dcl/dcl.attr/dcl.attr.depend/p1.cpp
+++ b/test/CXX/dcl.dcl/dcl.attr/dcl.attr.depend/p1.cpp
@@ -7,8 +7,8 @@
 [[carries_dependency]] void f1(); // FIXME: warn here
 [[carries_dependency]] int f2(); // ok
 int f3(int param [[carries_dependency]]); // ok
-[[carries_dependency]] int (*f4)(); // expected-error {{'carries_dependency' attribute only applies to functions, methods, and parameters}}
-int (*f5 [[carries_dependency]])(); // expected-error {{'carries_dependency' attribute only applies to functions, methods, and parameters}}
+[[carries_dependency]] int (*f4)(); // expected-error {{'carries_dependency' attribute only applies to parameters, Objective-C methods, and functions}}
+int (*f5 [[carries_dependency]])(); // expected-error {{'carries_dependency' attribute only applies to}}
 int (*f6)() [[carries_dependency]]; // expected-error {{'carries_dependency' attribute cannot be applied to types}}
 int (*f7)(int n [[carries_dependency]]); // expected-error {{'[[carries_dependency]]' attribute only allowed on parameter in a function declaration}}
 int (((f8)))(int n [[carries_dependency]]); // ok
@@ -21,7 +21,7 @@
 };
 void f() {
   [[carries_dependency]] int f(int n [[carries_dependency]]); // ok
-  [[carries_dependency]] // expected-error {{'carries_dependency' attribute only applies to functions, methods, and parameters}}
+  [[carries_dependency]] // expected-error {{'carries_dependency' attribute only applies to}}
       int (*p)(int n [[carries_dependency]]); // expected-error {{'[[carries_dependency]]' attribute only allowed on parameter in a function declaration}}
 }
 
diff --git a/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p1.cpp b/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p1.cpp
index e7a2382..4425416 100644
--- a/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p1.cpp
+++ b/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p1.cpp
@@ -7,4 +7,4 @@
 [[nodiscard]] int f();
 enum [[nodiscard]] E {};
 
-namespace [[nodiscard]] N {} // expected-warning {{'nodiscard' attribute only applies to functions, methods, enums, and classes}}
+namespace [[nodiscard]] N {} // expected-warning {{'nodiscard' attribute only applies to Objective-C methods, enums, structs, unions, classes, functions, and function pointers}}
diff --git a/test/CXX/dcl.dcl/dcl.link/p7.cpp b/test/CXX/dcl.dcl/dcl.link/p7.cpp
index 7d80a22..81cb28c 100644
--- a/test/CXX/dcl.dcl/dcl.link/p7.cpp
+++ b/test/CXX/dcl.dcl/dcl.link/p7.cpp
@@ -2,10 +2,10 @@
 
 struct X { };
 
-// CHECK: @x1 = global %struct.X zeroinitializer
-// CHECK: @x4 = global %struct.X zeroinitializer
-// CHECK: @x2 = external global %struct.X
-// CHECK: @x3 = external global %struct.X
+// CHECK: @x1 = {{(dso_local )?}}global %struct.X zeroinitializer
+// CHECK: @x4 = {{(dso_local )?}}global %struct.X zeroinitializer
+// CHECK: @x2 = external {{(dso_local )?}}global %struct.X
+// CHECK: @x3 = external {{(dso_local )?}}global %struct.X
 extern "C" {
   X x1;
 }
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
index 0b7a902..1372151 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
@@ -39,6 +39,7 @@
 void foo(auto int ap, register int rp) {
 #if __cplusplus >= 201103L // C++11 or later
 // expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+// expected-warning@-3 {{'register' storage class specifier is deprecated}}
 #endif
   auto int abo;
 #if __cplusplus >= 201103L // C++11 or later
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.simple/p4-cxx0x.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.simple/p4-cxx0x.cpp
index 53227ea..ee952ae 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.simple/p4-cxx0x.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.simple/p4-cxx0x.cpp
@@ -12,13 +12,18 @@
 
 const int&& foo();
 int i;
-struct A { double x; };
+struct A {
+  double x;
+  static int y;
+};
 const A* a = new A();
 
 static_assert(is_same<decltype(foo()), const int&&>::value, "");
 static_assert(is_same<decltype(i), int>::value, "");
 static_assert(is_same<decltype(a->x), double>::value, "");
 static_assert(is_same<decltype((a->x)), const double&>::value, "");
+static_assert(is_same<decltype(a->y), int>::value, "");
+static_assert(is_same<decltype((a->y)), int&>::value, "");
 static_assert(is_same<decltype(static_cast<int&&>(i)), int&&>::value, "");
 
 int f0(int); // expected-note{{possible target}}
diff --git a/test/CXX/dcl.decl/dcl.init/dcl.init.list/p7-0x.cpp b/test/CXX/dcl.decl/dcl.init/dcl.init.list/p7-0x.cpp
index 48c5b23..4436cb0 100644
--- a/test/CXX/dcl.decl/dcl.init/dcl.init.list/p7-0x.cpp
+++ b/test/CXX/dcl.decl/dcl.init/dcl.init.list/p7-0x.cpp
@@ -24,6 +24,10 @@
     { 2, f(2), f(2.0) };  // OK: the double-to-int conversion is not at the top level
 }
 
+enum UnscopedEnum {
+  EnumVal = 300
+};
+
 // Test each rule individually.
 
 template<typename T>
@@ -115,15 +119,21 @@
 void int_to_float() {
   // Not a constant expression.
   char c = 1;
+  UnscopedEnum e = EnumVal;
 
   // Variables.  Yes, even though all char's will fit into any floating type.
   Agg<float> f1 = {c};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
   Agg<double> f2 = {c};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
   Agg<long double> f3 = {c};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
 
+  Agg<float> f4 = {e};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<double> f5 = {e};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<long double> f6 = {e};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+
   // Constants.
-  Agg<float> f4 = {12345678};  // OK (exactly fits in a float)
-  Agg<float> f5 = {123456789};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<float> f7 = {12345678};  // OK (exactly fits in a float)
+  Agg<float> f8 = {EnumVal};  // OK
+  Agg<float> f9 = {123456789};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
 
   Agg<float> ce1 = { Convert<int>(123456789) }; // expected-error {{constant expression evaluates to 123456789 which cannot be narrowed to type 'float'}} expected-note {{silence}}
   Agg<double> ce2 = { ConvertVar<long long>() }; // expected-error {{non-constant-expression cannot be narrowed from type 'long long' to 'double'}} expected-note {{silence}}
@@ -137,8 +147,10 @@
 void shrink_int() {
   // Not a constant expression.
   short s = 1;
+  UnscopedEnum e = EnumVal;
   unsigned short us = 1;
   Agg<char> c1 = {s};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<char> c2 = {e};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
   Agg<unsigned short> s1 = {s};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
   Agg<short> s2 = {us};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
 
@@ -148,16 +160,19 @@
   // long).
   long l1 = 1;
   Agg<int> i1 = {l1};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<int> i2 = {e};  // OK
   long long ll = 1;
   Agg<long> l2 = {ll};  // OK
 
   // Constants.
-  Agg<char> c2 = {127};  // OK
-  Agg<char> c3 = {300};  // expected-error {{ cannot be narrowed }} expected-note {{silence}} expected-warning {{changes value}}
+  Agg<char> c3 = {127};  // OK
+  Agg<char> c4 = {300};  // expected-error {{ cannot be narrowed }} expected-note {{silence}} expected-warning {{changes value}}
+  Agg<char> c5 = {EnumVal};  // expected-error {{ cannot be narrowed }} expected-note {{silence}} expected-warning {{changes value}}
 
-  Agg<int> i2 = {0x7FFFFFFFU};  // OK
-  Agg<int> i3 = {0x80000000U};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
-  Agg<unsigned int> i4 = {-0x80000000L};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<int> i3 = {0x7FFFFFFFU};  // OK
+  Agg<int> i4 = {EnumVal};  // OK
+  Agg<int> i5 = {0x80000000U};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
+  Agg<unsigned int> i6 = {-0x80000000L};  // expected-error {{ cannot be narrowed }} expected-note {{silence}}
 
   // Bool is also an integer type, but conversions to it are a different AST
   // node.
diff --git a/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-examples.cpp b/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-examples.cpp
index 052349c..46593b7 100644
--- a/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-examples.cpp
+++ b/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-examples.cpp
@@ -1,56 +1,56 @@
-// RUN: %clang_cc1 -ast-dump %s 2>&1 | FileCheck %s
-
-// CHECK-LABEL: example0
-void example0() {
-  double d = 2.0;
-  // CHECK: VarDecl{{.*}}rd 'double &'
-  // CHECK-NEXT: DeclRefExpr
-  double &rd = d;
-  // CHECK: VarDecl{{.*}}rcd 'const double &'
-  // CHECK-NEXT: ImplicitCastExpr{{.*}}'const double' lvalue <NoOp>
-  const double &rcd = d;
-}
-
-struct A { };
-struct B : A { } b;
-
-// CHECK-LABEL: example1
-void example1() {
-  // CHECK: VarDecl{{.*}}ra 'struct A &'
-  // CHECK: ImplicitCastExpr{{.*}}'struct A' lvalue <DerivedToBase (A)>
-  A &ra = b;
-  // CHECK: VarDecl{{.*}}rca 'const struct A &'
-  // CHECK: ImplicitCastExpr{{.*}}'const struct A' lvalue <DerivedToBase (A)>
-  // CHECK-NOT: MaterializeTemporaryExpr
-  // CHECK: ImplicitCastExpr{{.*}}'const struct B' lvalue <NoOp>
-  const A& rca = b;
-}
-
-extern B f();
-
-struct X {
-  operator B();
-} x;
-
-// CHECK-LABEL: example2
-void example2() {
-  // CHECK: VarDecl{{.*}}rca 'const struct A &'
-  // CHECK: ImplicitCastExpr{{.*}}'const struct A' lvalue <DerivedToBase (A)>
-  // CHECK: MaterializeTemporaryExpr{{.*}}'const struct B'
-  // CHECK: ImplicitCastExpr{{.*}}'const struct B' <NoOp>
-  // CHECK: CallExpr{{.*}}B
-  const A &rca = f(); 
-  // CHECK: VarDecl{{.*}}r 'const struct A &'
-  // CHECK: ImplicitCastExpr{{.*}}'const struct A' lvalue <DerivedToBase (A)>
-  // CHECK: MaterializeTemporaryExpr{{.*}}'const struct B'
-  // CHECK: ImplicitCastExpr{{.*}}'const struct B' <NoOp>
-  // CHECK: CXXMemberCallExpr{{.*}}'struct B'
-  const A& r = x;
-}
-
-// CHECK-LABEL: example3
-void example3() {
-  // CHECK: VarDecl{{.*}}rcd2 'const double &'
-  // CHECK: ImplicitCastExpr{{.*}} <IntegralToFloating>
-  const double& rcd2 = 2; 
-}
+// RUN: %clang_cc1 -ast-dump %s 2>&1 | FileCheck %s
+
+// CHECK-LABEL: example0
+void example0() {
+  double d = 2.0;
+  // CHECK: VarDecl{{.*}}rd 'double &'
+  // CHECK-NEXT: DeclRefExpr
+  double &rd = d;
+  // CHECK: VarDecl{{.*}}rcd 'const double &'
+  // CHECK-NEXT: ImplicitCastExpr{{.*}}'const double' lvalue <NoOp>
+  const double &rcd = d;
+}
+
+struct A { };
+struct B : A { } b;
+
+// CHECK-LABEL: example1
+void example1() {
+  // CHECK: VarDecl{{.*}}ra 'A &'
+  // CHECK: ImplicitCastExpr{{.*}}'A' lvalue <DerivedToBase (A)>
+  A &ra = b;
+  // CHECK: VarDecl{{.*}}rca 'const A &'
+  // CHECK: ImplicitCastExpr{{.*}}'const A' lvalue <DerivedToBase (A)>
+  // CHECK-NOT: MaterializeTemporaryExpr
+  // CHECK: ImplicitCastExpr{{.*}}'const B' lvalue <NoOp>
+  const A& rca = b;
+}
+
+extern B f();
+
+struct X {
+  operator B();
+} x;
+
+// CHECK-LABEL: example2
+void example2() {
+  // CHECK: VarDecl{{.*}}rca 'const A &'
+  // CHECK: ImplicitCastExpr{{.*}}'const A' lvalue <DerivedToBase (A)>
+  // CHECK: MaterializeTemporaryExpr{{.*}}'const B'
+  // CHECK: ImplicitCastExpr{{.*}}'const B' <NoOp>
+  // CHECK: CallExpr{{.*}}B
+  const A &rca = f();
+  // CHECK: VarDecl{{.*}}r 'const A &'
+  // CHECK: ImplicitCastExpr{{.*}}'const A' lvalue <DerivedToBase (A)>
+  // CHECK: MaterializeTemporaryExpr{{.*}}'const B'
+  // CHECK: ImplicitCastExpr{{.*}}'const B' <NoOp>
+  // CHECK: CXXMemberCallExpr{{.*}}'B'
+  const A& r = x;
+}
+
+// CHECK-LABEL: example3
+void example3() {
+  // CHECK: VarDecl{{.*}}rcd2 'const double &'
+  // CHECK: ImplicitCastExpr{{.*}} <IntegralToFloating>
+  const double& rcd2 = 2;
+}
diff --git a/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp b/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp
index 1d04d7d..fbe9c08 100644
--- a/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp
+++ b/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp
@@ -101,4 +101,25 @@
   template<> struct N::V<int> {};
   template struct N::V<int*>;
   template struct N::V<char>; // expected-error {{undefined}}
+
+  struct Q {};
+
+  // Perversely, inline anonymous namespaces can cause an ostensibly
+  // external-linkage declaration to acquire internal linkage when
+  // redeclared with a qualified name.
+  inline namespace {
+    struct Q {} q;
+    int f_in_inline();
+    extern int v_in_inline;
+    typedef int t_in_inline;
+  }
+  // FIXME: These "extra qualification" warnings are bogus: the qualification
+  // changes the meaning of the program.
+  int inline_namespaces::f_in_inline() { // expected-warning {{extra qualification}}
+    // Finds <anon>::Q, not inline_namespaces::Q
+    Q x = q;
+    return 0;
+  }
+  int inline_namespaces::v_in_inline = // expected-warning {{extra qualification}}
+    (Q(q), 0);
 }
diff --git a/test/CXX/drs/dr1xx.cpp b/test/CXX/drs/dr1xx.cpp
index d62ed9f..ee9f6a1 100644
--- a/test/CXX/drs/dr1xx.cpp
+++ b/test/CXX/drs/dr1xx.cpp
@@ -238,7 +238,20 @@
   };
 }
 
-namespace dr126 { // dr126: no
+namespace dr126 { // dr126: partial
+  // FIXME: We do not yet generate correct code for this change:
+  // eg:
+  //   catch (void*&) should catch void* but not int*
+  //   catch (void*) and catch (void*const&) should catch both
+  // Likewise:
+  //   catch (Base *&) should catch Base* but not Derived*
+  //   catch (Base *) should catch both
+  // In each case, we emit the same code for both catches.
+  //
+  // The ABI does not let us represent the language rule in the unwind tables.
+  // So, when catching by non-const (or volatile) reference to pointer, we
+  // should compare the exception type to the caught type and only accept an
+  // exact match.
 #if __cplusplus <= 201402L
   struct C {};
   struct D : C {};
@@ -262,12 +275,13 @@
     virtual void gr() throw(C&);
     virtual void hr() throw(C&); // expected-note {{overridden}}
 
-    virtual void pv() throw(void*); // expected-note {{overridden}}
+    virtual void pv() throw(void*);
 
 #if __cplusplus >= 201103L
-    virtual void np() throw(C*); // expected-note {{overridden}}
-    virtual void npm() throw(int C::*); // expected-note {{overridden}}
-    virtual void nr() throw(C&); // expected-note {{overridden}}
+    virtual void np() throw(C*);
+    virtual void npm() throw(int C::*);
+    virtual void nr() throw(C*&); // expected-note {{overridden}}
+    virtual void ncr() throw(C*const&);
 #endif
 
     virtual void ref1() throw(C *const&);
@@ -275,7 +289,7 @@
 
     virtual void v() throw(int);
     virtual void w() throw(const int);
-    virtual void x() throw(int*);
+    virtual void x() throw(int*); // expected-note {{overridden}}
     virtual void y() throw(const int*);
     virtual void z() throw(int); // expected-note {{overridden}}
   };
@@ -294,13 +308,14 @@
     virtual void gr() throw(G&);
     virtual void hr() throw(H&); // expected-error {{more lax}}
 
-    virtual void pv() throw(C*); // expected-error {{more lax}} FIXME: This is valid.
+    virtual void pv() throw(C*);
 
 #if __cplusplus >= 201103L
     using nullptr_t = decltype(nullptr);
-    virtual void np() throw(nullptr_t*); // expected-error {{more lax}} FIXME: This is valid.
-    virtual void npm() throw(nullptr_t*); // expected-error {{more lax}} FIXME: This is valid.
-    virtual void nr() throw(nullptr_t&); // expected-error {{more lax}} This is not.
+    virtual void np() throw(nullptr_t);
+    virtual void npm() throw(nullptr_t&);
+    virtual void nr() throw(nullptr_t); // expected-error {{more lax}}
+    virtual void ncr() throw(nullptr_t);
 #endif
 
     virtual void ref1() throw(D *const &);
@@ -308,7 +323,7 @@
 
     virtual void v() throw(const int);
     virtual void w() throw(int);
-    virtual void x() throw(const int*); // FIXME: 'const int*' is not allowed by A::h.
+    virtual void x() throw(const int*); // expected-error {{more lax}}
     virtual void y() throw(int*); // ok
     virtual void z() throw(long); // expected-error {{more lax}}
   };
diff --git a/test/CXX/drs/dr4xx.cpp b/test/CXX/drs/dr4xx.cpp
index 1a5976e..d27adc4 100644
--- a/test/CXX/drs/dr4xx.cpp
+++ b/test/CXX/drs/dr4xx.cpp
@@ -22,6 +22,9 @@
   class B {
   protected:
     typedef int type; // expected-note {{protected}}
+#if __cplusplus == 199711L
+    // expected-note@-2 {{protected}}
+#endif
   };
 
   class C {
@@ -593,10 +596,10 @@
     U<__builtin_offsetof(A, n)>::type a;
     U<__builtin_offsetof(T, n)>::type b; // expected-error +{{}} expected-warning 0+{{}}
     // as an extension, we allow the member-designator to include array indices
-    g(__builtin_offsetof(A, a[0])).h<int>(); // expected-error {{extension}}
-    g(__builtin_offsetof(A, a[N])).h<int>(); // expected-error {{extension}}
-    U<__builtin_offsetof(A, a[0])>::type c; // expected-error {{extension}}
-    U<__builtin_offsetof(A, a[N])>::type d; // expected-error {{extension}} expected-error +{{}} expected-warning 0+{{}}
+    g(__builtin_offsetof(A, a[0])).h<int>();
+    g(__builtin_offsetof(A, a[N])).h<int>();
+    U<__builtin_offsetof(A, a[0])>::type c;
+    U<__builtin_offsetof(A, a[N])>::type d; // expected-error +{{}} expected-warning 0+{{}}
   }
 }
 
diff --git a/test/CXX/drs/dr6xx.cpp b/test/CXX/drs/dr6xx.cpp
index 8b9a699..c3c867b 100644
--- a/test/CXX/drs/dr6xx.cpp
+++ b/test/CXX/drs/dr6xx.cpp
@@ -1,9 +1,13 @@
-// RUN: %clang_cc1 -std=c++98 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++1z %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -std=c++98 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking
+// RUN: %clang_cc1 -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking
+// RUN: %clang_cc1 -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking
+// RUN: %clang_cc1 -std=c++17 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking
+// RUN: %clang_cc1 -std=c++2a %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking
 
-namespace std { struct type_info {}; }
+namespace std {
+  struct type_info {};
+  __extension__ typedef __SIZE_TYPE__ size_t;
+} // namespace std
 
 namespace dr601 { // dr601: yes
 #if __cplusplus >= 201103L
@@ -354,6 +358,672 @@
   }
 }
 
+namespace dr641 { // dr641: yes
+  namespace std_example {
+    struct abc;
+
+    struct xyz {
+      xyz(); // expected-note 0-1{{candidate}}
+      xyz(xyz &); // expected-note 0-1{{candidate}}
+
+      operator xyz &() = delete; // expected-error 0-1{{extension}} expected-warning {{will never be used}}
+      operator abc &() = delete; // expected-error 0-1{{extension}}
+    };
+
+    struct abc : xyz {};
+
+    template<typename T>
+    void use(T &); // expected-note {{expects an l-value}}
+    void test() {
+      use<xyz>(xyz()); // expected-error {{no match}}
+      use<const xyz>(xyz());
+#if __cplusplus < 201103L
+      // expected-error-re@-2 {{no viable constructor copying parameter of type '{{.*}}xyz'}}
+#endif
+    }
+  }
+
+  template<typename T> struct error { typedef typename T::error type; };
+
+  struct A {
+    template<typename T, typename error<T>::type = 0> operator T() const; // expected-error 0-1{{extension}}
+  };
+  A a;
+  void f(A&); // expected-note 2{{candidate}}
+  void g(const A ca) {
+    f(A()); // expected-error {{no match}}
+    f(ca); // expected-error {{no match}}
+    (void)A();
+    (void)ca;
+  }
+}
+
+namespace dr642 { // dr642: yes
+  void f() {
+    const int i = 2;
+    {
+      char i[i];
+      _Static_assert(sizeof(i) == 2, ""); // expected-error {{C11}}
+    }
+  }
+
+  struct s { int a; };
+  void g(int s) {
+    struct s *p = new struct s;
+    p->a = s;
+  }
+}
+
+#if __cplusplus >= 201103L
+namespace dr643 { // dr643: yes
+  struct A {
+    int x;
+    auto f() -> decltype(this->x);
+    auto f(A &a) -> decltype(a.x);
+    auto g() -> decltype(x);
+    auto h() -> decltype(this->y); // expected-error {{no member named 'y'}}
+    auto h(A &a) -> decltype(a.y); // expected-error {{no member named 'y'}}
+    auto i() -> decltype(y); // expected-error {{undeclared identifier 'y'}}
+    int y;
+  };
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr644 { // dr644: partial
+  struct A {
+    A() = default;
+    int x, y;
+  };
+  static_assert(__is_literal_type(A), "");
+
+  struct B : A {};
+  static_assert(__is_literal_type(B), "");
+
+  struct C : virtual A {};
+  static_assert(!__is_literal_type(C), "");
+
+  struct D { C c; };
+  static_assert(!__is_literal_type(D), "");
+
+  // FIXME: According to DR644, E<C> is a literal type despite having virtual
+  // base classes. This appears to be a wording defect.
+  template<typename T>
+  struct E : T {
+    constexpr E() = default;
+  };
+  static_assert(!__is_literal_type(E<C>), "");
+}
+#endif
+
+// dr645 increases permission to optimize; it's not clear that it's possible to
+// test for this.
+// dr645: na
+
+#if __cplusplus >= 201103L
+namespace dr646 { // dr646: sup 981
+  struct A {
+    constexpr A(const A&) = default; // ok
+  };
+
+  struct B {
+    constexpr B() {}
+    B(B&);
+  };
+  constexpr B b = {}; // ok
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr647 { // dr647: yes
+  // This is partially superseded by dr1358.
+  struct A {
+    constexpr virtual void f() const;
+    constexpr virtual void g() const {} // expected-error {{virtual function cannot be constexpr}}
+  };
+
+  struct X { virtual void f() const; }; // expected-note {{overridden}}
+  struct B : X {
+    constexpr void f() const {} // expected-error {{virtual function cannot be constexpr}}
+  };
+
+  struct NonLiteral { NonLiteral() {} }; // expected-note {{not an aggregate and has no constexpr constructors}}
+
+  struct C {
+    constexpr C(NonLiteral);
+    constexpr C(NonLiteral, int) {} // expected-error {{not a literal type}}
+    constexpr C() try {} catch (...) {} // expected-error {{function try block}}
+  };
+
+  struct D {
+    operator int() const;
+    constexpr D(int) {}
+    D(float); // expected-note 2{{declared here}}
+  };
+  constexpr int get();
+  struct E {
+    int n;
+    D d;
+
+    // FIXME: We should diagnose this, as the conversion function is not
+    // constexpr. However, that part of this issue is supreseded by dr1364 and
+    // others; no diagnostic is required for this any more.
+    constexpr E()
+        : n(D(0)),
+          d(0) {}
+
+    constexpr E(int) // expected-error {{never produces a constant expression}}
+        : n(0),
+          d(0.0f) {} // expected-note {{non-constexpr constructor}}
+    constexpr E(float f) // expected-error {{never produces a constant expression}}
+        : n(get()),
+          d(D(0) + f) {} // expected-note {{non-constexpr constructor}}
+  };
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr648 { // dr648: yes
+  int f();
+  constexpr int a = (true ? 1 : f());
+  constexpr int b = false && f();
+  constexpr int c = true || f();
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr649 { // dr649: yes
+  alignas(0x20000000) int n; // expected-error {{requested alignment}}
+  struct alignas(0x20000000) X {}; // expected-error {{requested alignment}}
+  struct Y { int n alignas(0x20000000); }; // expected-error {{requested alignment}}
+  struct alignas(256) Z {};
+  // This part is superseded by dr2130 and eventually by aligned allocation support.
+  auto *p = new Z;
+}
+#endif
+
+// dr650 FIXME: add codegen test
+
+#if __cplusplus >= 201103L
+namespace dr651 { // dr651: yes
+  struct X {
+    virtual X &f();
+  };
+  struct Y : X {
+    Y &f();
+  };
+  using T = decltype(((X&&)Y()).f());
+  using T = X &;
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr652 { // dr652: yes
+  constexpr int n = 1.2 * 3.4;
+  static_assert(n == 4, "");
+}
+#endif
+
+// dr653 FIXME: add codegen test
+
+#if __cplusplus >= 201103L
+namespace dr654 { // dr654: yes
+  void f() {
+    if (nullptr) {} // expected-warning {{implicit conversion of nullptr constant to 'bool'}}
+    bool b = nullptr; // expected-warning {{implicit conversion of nullptr constant to 'bool'}}
+    if (nullptr == 0) {}
+    if (nullptr != 0) {}
+    if (nullptr <= 0) {} // expected-error {{invalid operands}}
+    if (nullptr == 1) {} // expected-error {{invalid operands}}
+    if (!nullptr) {} // expected-warning {{implicit conversion of nullptr constant to 'bool'}}
+    decltype(nullptr) n = 0;
+    static_cast<int>(nullptr); // expected-error {{not allowed}}
+    (void)static_cast<decltype(nullptr)>(0);
+    static_cast<decltype(nullptr)>(1); // expected-error {{not allowed}}
+    void(true ? nullptr : 0);
+    void(true ? 0 : nullptr);
+  }
+}
+#endif
+
+namespace dr655 { // dr655: yes
+  struct A { A(int); }; // expected-note 2-3{{not viable}}
+  struct B : A {
+    A a;
+    B();
+    B(int) : B() {} // expected-error 0-1 {{C++11}}
+    B(int*) : A() {} // expected-error {{no matching constructor}}
+  };
+}
+
+namespace dr656 { // dr656: yes
+  struct A { A(const A&) = delete; }; // expected-error 0-1 {{C++11}}
+  struct B : A {};
+  struct X { operator B(); } x;
+  const A &r = x;
+  struct Y : private A { // expected-note 2{{here}} expected-note 2{{candidate}}
+    operator B() volatile;
+  };
+  extern Y y;
+  extern volatile Y vy;
+  // Conversion not considered due to reference-related types.
+  const A &s = y; // expected-error {{private base class}}
+  const A &t = vy; // expected-error {{drops 'volatile'}}
+
+  struct C { operator struct D(); } c;
+  struct D : C {};
+  const D &d = c; // ok, D not reference-related to C
+
+  template<typename T> void accept(T); // expected-note {{candidate}}
+  template<typename T> void accept(...) = delete; // expected-error 0-1 {{C++11}} expected-note {{candidate}}
+  void f() {
+    accept<const A&>(x);
+    accept<const A&>(y); // expected-error {{private base class}}
+    accept<const A&>(vy); // expected-error {{call to deleted}} expected-error {{no matching constructor}}
+    accept<const D&>(c);
+  }
+}
+
+namespace dr657 { // dr657: partial
+  struct Abs { virtual void x() = 0; };
+  struct Der : public Abs { virtual void x(); };
+
+  struct Cnvt { template<typename F> Cnvt(F); };
+
+  void foo(Cnvt a);
+  void foo(Abs &a);
+  void f(Abs *a) { foo(*a); }
+
+  void bar(Abs &a);
+  template<typename T> void bar(T);
+  void g(Abs *a) { bar(*a); }
+
+  // FIXME: The following examples demonstrate that we might be accepting the
+  // above cases for the wrong reason.
+
+  // FIXME: We should reject this.
+  struct C { C(Abs) {} };
+  // FIXME: We should reject this.
+  struct Q { operator Abs() { __builtin_unreachable(); } } q;
+#if __cplusplus >= 201703L
+  // FIXME: We should *definitely* reject this.
+  C c = Q().operator Abs();
+#endif
+
+  template<typename F> struct Cnvt2 { Cnvt2(F); typedef int type; };
+
+  // FIXME: We should reject this.
+  void baz(Abs &a);
+  template<typename T> typename Cnvt2<T>::type baz(T);
+  void h(Abs *a) { baz(*a); }
+
+  // FIXME: We should reject this too.
+  Cnvt2<Abs>::type err;
+}
+
+// dr658 FIXME: add codegen test
+
+#if __cplusplus >= 201103L
+namespace dr659 { // dr659: yes
+  static_assert(alignof(char) == alignof(char&), "");
+  static_assert(alignof(int) == alignof(int&), "");
+  int n = alignof(int(&)()); // expected-error {{application of 'alignof' to a function type}}
+  struct A; // expected-note {{forward}}
+  int m = alignof(A&); // expected-error {{application of 'alignof' to an incomplete type}}
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr660 { // dr660: yes
+  enum : int { a };
+  enum class { b }; // expected-error {{requires a name}}
+  auto x = a;
+
+  struct X {
+    enum : int { a };
+    enum class { b }; // expected-error {{requires a name}}
+  };
+  auto y = X::a;
+}
+#endif
+
+// dr661 FIXME: add codegen test
+
+namespace dr662 { // dr662: yes
+  template <typename T> void f(T t) {
+    T &tr = t;
+    T *tp = &t; // expected-error {{pointer to a reference}}
+#if __cplusplus >= 201103L
+    auto *ap = &t;
+#endif
+  }
+  void g(int n) { f<int&>(n); } // expected-note {{instantiation of}}
+}
+
+namespace dr663 { // dr663: yes c++11
+  int ЍЎ = 123;
+#if __cplusplus < 201103L
+  // expected-error@-2 {{non-ASCII}}
+#endif
+}
+
+#if __cplusplus >= 201103L
+namespace dr664 { // dr664: yes
+  struct A { A(const A&) = delete; };
+  A &&f(A &&a, int n) {
+    if (n)
+      return f(static_cast<A&&>(a), n - 1);
+    return static_cast<A&&>(a);
+  }
+}
+#endif
+
+namespace dr665 { // dr665: yes
+  struct A { virtual ~A(); };
+  struct B : A {} *b;
+  struct C : private A {} *c; // expected-note {{here}}
+  struct D : B, C {} *d;
+
+  struct VB : virtual A {} *vb;
+  struct VC : private virtual A {} *vc; // expected-note {{here}}
+  struct VD : VB, VC {} *vd;
+
+  void f() {
+    (void)dynamic_cast<A*>(b);
+    (void)dynamic_cast<A*>(c); // expected-error {{private}}
+    (void)dynamic_cast<A*>(d); // expected-error {{ambiguous}}
+    (void)dynamic_cast<A*>(vb);
+    (void)dynamic_cast<A*>(vc); // expected-error {{private}}, even though it could be valid at runtime
+    (void)dynamic_cast<A*>(vd);
+  }
+}
+
+namespace dr666 { // dr666: yes
+  struct P { friend P operator*(P, P); P(int); } p(0);
+
+  template<int> int f();
+  template<typename T> int f() {
+    T::type *p = 0; // expected-error {{missing 'typename'}}
+    int a(T::type); // expected-error {{missing 'typename'}}
+    return f<T::type>(); // expected-error {{missing 'typename'}}
+  }
+  struct X { static const int type = 0; };
+  struct Y { typedef int type; };
+  int a = f<X>();
+  int b = f<Y>(); // expected-note {{instantiation of}}
+}
+
+// Triviality is entirely different in C++98.
+#if __cplusplus >= 201103L
+namespace dr667 { // dr667: yes
+  struct A {
+    A() = default;
+    int &r;
+  };
+  static_assert(!__is_trivially_constructible(A), "");
+
+  struct B { ~B() = delete; };
+  union C { B b; };
+  static_assert(!__is_trivially_destructible(C), "");
+
+  struct D { D(const D&) = delete; };
+  struct E : D {};
+  static_assert(!__is_trivially_constructible(E, const E&), "");
+
+  struct F { F &operator=(F&&) = delete; };
+  struct G : F {};
+  static_assert(!__is_trivially_assignable(G, G&&), "");
+}
+#endif
+
+// dr668 FIXME: add codegen test
+
+#if __cplusplus >= 201103L
+namespace dr669 { // dr669: yes
+  void f() {
+    int n;
+    using T = decltype(n);
+    using T = int;
+    using U = decltype((n));
+    using U = int &;
+
+    [=] {
+      using V = decltype(n);
+      using V = int;
+      using W = decltype((n));
+      using W = const int&;
+    } ();
+
+    struct X {
+      int n;
+      void f() const {
+        using X = decltype(n);
+        using X = int;
+        using Y = decltype((n));
+        using Y = const int&;
+      }
+    };
+  }
+}
+#endif
+
+namespace dr671 { // dr671: yes
+  enum class E { e }; // expected-error 0-1 {{C++11}}
+  E e = static_cast<E>(0);
+  int n = static_cast<int>(E::e); // expected-error 0-1 {{C++11}}
+  int m = static_cast<int>(e); // expected-error 0-1 {{C++11}}
+}
+
+// dr672 FIXME: add codegen test
+
+namespace dr673 { // dr673: yes
+  template<typename> struct X { static const int n = 0; };
+
+  class A {
+    friend class B *f();
+    class C *f();
+    void f(class D *);
+    enum { e = X<struct E>::n };
+    void g() { extern struct F *p; }
+  };
+  B *b;
+  C *c;
+  D *d;
+  E *e;
+  F *f; // expected-error {{unknown type name}}
+}
+
+namespace dr674 { // dr674: no
+  template<typename T> int f(T);
+
+  int g(int);
+  template<typename T> int g(T);
+
+  int h(int);
+  template<typename T> int h(T);
+
+  class X {
+    // FIXME: This should deduce dr674::f<int>.
+    friend int dr674::f(int); // expected-error {{does not match any}}
+    friend int dr674::g(int);
+    friend int dr674::h<>(int);
+    int n;
+  };
+
+  template<typename T> int f(T) { return X().n; }
+  int g(int) { return X().n; }
+  template<typename T> int g(T) { return X().n; }
+  int h(int) { return X().n; }
+  template<typename T> int h(T) { return X().n; }
+
+  template int f(int);
+  template int g(int);
+  template int h(int);
+}
+
+namespace dr675 { // dr675: dup 739
+  template<typename T> struct A { T n : 1; };
+#if __cplusplus >= 201103L
+  static_assert(A<char>{1}.n < 0, "");
+  static_assert(A<int>{1}.n < 0, "");
+  static_assert(A<long long>{1}.n < 0, "");
+#endif
+}
+
+// dr676: na
+
+namespace dr677 { // dr677: no
+  struct A {
+    void *operator new(std::size_t);
+    void operator delete(void*) = delete; // expected-error 0-1{{C++11}} expected-note {{deleted}}
+  };
+  struct B {
+    void *operator new(std::size_t);
+    void operator delete(void*) = delete; // expected-error 0-1{{C++11}} expected-note 2{{deleted}}
+    virtual ~B();
+  };
+  void f(A *p) { delete p; } // expected-error {{deleted}}
+  // FIXME: This appears to be valid; we shouldn't even be looking up the 'operator delete' here.
+  void f(B *p) { delete p; } // expected-error {{deleted}}
+  B::~B() {} // expected-error {{deleted}}
+}
+
+// dr678 FIXME: check that the modules ODR check catches this
+
+namespace dr679 { // dr679: yes
+  struct X {};
+  template<int> void operator+(X, X);
+  template<> void operator+<0>(X, X) {} // expected-note {{previous}}
+  template<> void operator+<0>(X, X) {} // expected-error {{redefinition}}
+}
+
+// dr680: na
+
+#if __cplusplus >= 201103L
+namespace dr681 { // dr681: partial
+  auto *a() -> int; // expected-error {{must specify return type 'auto', not 'auto *'}}
+  auto (*b)() -> int;
+  // FIXME: The errors here aren't great.
+  auto (*c()) -> int; // expected-error {{expected function body}}
+  auto ((*d)()) -> int; // expected-error {{expected ';'}} expected-error {{requires an initializer}}
+
+  // FIXME: This is definitely wrong. This should be
+  //   "function of () returning pointer to function of () returning int"
+  // not a function with a deduced return type.
+  auto (*e())() -> int; // expected-error 0-1{{C++14}}
+
+  auto f() -> int (*)();
+  auto g() -> auto (*)() -> int;
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr683 { // dr683: yes
+  struct A {
+    A() = default;
+    A(const A&) = default;
+    A(A&);
+  };
+  static_assert(__is_trivially_constructible(A, const A&), "");
+  static_assert(!__is_trivially_constructible(A, A&), "");
+  static_assert(!__is_trivial(A), "");
+
+  struct B : A {};
+  static_assert(__is_trivially_constructible(B, const B&), "");
+  static_assert(__is_trivially_constructible(B, B&), "");
+  static_assert(__is_trivial(B), "");
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr684 { // dr684: sup 1454
+  void f() {
+    int a; // expected-note {{here}}
+    constexpr int *p = &a; // expected-error {{constant expression}} expected-note {{pointer to 'a'}}
+  }
+}
+#endif
+
+#if __cplusplus >= 201103L
+namespace dr685 { // dr685: yes
+  enum E : long { e };
+  void f(int);
+  int f(long);
+  int a = f(e);
+
+  enum G : short { g };
+  int h(short);
+  void h(long);
+  int b = h(g);
+
+  int i(int);
+  void i(long);
+  int c = i(g);
+
+  int j(unsigned int); // expected-note {{candidate}}
+  void j(long); // expected-note {{candidate}}
+  int d = j(g); // expected-error {{ambiguous}}
+
+  int k(short); // expected-note {{candidate}}
+  void k(int); // expected-note {{candidate}}
+  int x = k(g); // expected-error {{ambiguous}}
+}
+#endif
+
+namespace dr686 { // dr686: yes
+  void f() {
+    (void)dynamic_cast<struct A*>(0); // expected-error {{incomplete}} expected-note {{forward}}
+    (void)dynamic_cast<struct A{}*>(0); // expected-error {{cannot be defined in a type specifier}}
+    (void)typeid(struct B*);
+    (void)typeid(struct B{}*); // expected-error {{cannot be defined in a type specifier}}
+    (void)static_cast<struct C*>(0);
+    (void)static_cast<struct C{}*>(0); // expected-error {{cannot be defined in a type specifier}}
+    (void)reinterpret_cast<struct D*>(0);
+    (void)reinterpret_cast<struct D{}*>(0); // expected-error {{cannot be defined in a type specifier}}
+    (void)const_cast<struct E*>(0); // expected-error {{not allowed}}
+    (void)const_cast<struct E{}*>(0); // expected-error {{cannot be defined in a type specifier}}
+    (void)sizeof(struct F*);
+    (void)sizeof(struct F{}*); // expected-error {{cannot be defined in a type specifier}}
+    (void)new struct G*;
+    (void)new struct G{}*; // expected-error {{cannot be defined in a type specifier}}
+#if __cplusplus >= 201103L
+    (void)alignof(struct H*);
+    (void)alignof(struct H{}*); // expected-error {{cannot be defined in a type specifier}}
+#endif
+    (void)(struct I*)0;
+    (void)(struct I{}*)0; // expected-error {{cannot be defined in a type specifier}}
+    if (struct J *p = 0) {}
+    if (struct J {} *p = 0) {} // expected-error {{cannot be defined in a condition}}
+    for (struct K *p = 0; struct L *q = 0; ) {}
+    for (struct K {} *p = 0; struct L {} *q = 0; ) {} // expected-error {{'L' cannot be defined in a condition}}
+#if __cplusplus >= 201103L
+    using M = struct {};
+#endif
+    struct N {
+      operator struct O{}(){}; // expected-error {{cannot be defined in a type specifier}}
+    };
+    try {}
+    catch (struct P *) {} // expected-error {{incomplete}} expected-note {{forward}}
+    catch (struct P {} *) {} // expected-error {{cannot be defined in a type specifier}}
+#if __cplusplus < 201703L
+    void g() throw(struct Q); // expected-error {{incomplete}} expected-note {{forward}}
+    void h() throw(struct Q {}); // expected-error {{cannot be defined in a type specifier}}
+#endif
+  }
+  template<struct R *> struct X;
+  template<struct R {} *> struct Y; // expected-error {{cannot be defined in a type specifier}}
+}
+
+namespace dr687 { // dr687 still open
+  template<typename T> void f(T a) {
+    // FIXME: This is valid in C++20.
+    g<int>(a); // expected-error {{undeclared}} expected-error {{'('}}
+
+    // This is not.
+    template g<int>(a); // expected-error {{expected expression}}
+  }
+}
+
 namespace dr692 { // dr692: no
   namespace temp_func_order_example2 {
     template <typename T, typename U> struct A {};
diff --git a/test/CXX/expr/expr.unary/expr.new/p2-cxx0x.cpp b/test/CXX/expr/expr.unary/expr.new/p2-cxx0x.cpp
index 7305bd1..a3a2f71 100644
--- a/test/CXX/expr/expr.unary/expr.new/p2-cxx0x.cpp
+++ b/test/CXX/expr/expr.unary/expr.new/p2-cxx0x.cpp
@@ -9,12 +9,14 @@
 void f() {
   only<const int*> p = new const auto (0);
   only<double*> q = new (auto) (0.0);
+  only<char*> r = new auto {'a'};
 
   new auto; // expected-error{{new expression for type 'auto' requires a constructor argument}}
   new (const auto)(); // expected-error{{new expression for type 'const auto' requires a constructor argument}}
   new (auto) (1,2,3); // expected-error{{new expression for type 'auto' contains multiple constructor arguments}}
-  new auto {1,2,3}; // expected-error{{new expression for type 'auto' cannot use list-initialization}}
-  new auto ({1,2,3}); // expected-error{{new expression for type 'auto' cannot use list-initialization}}
+  new auto {}; // expected-error{{new expression for type 'auto' requires a constructor argument}}
+  new auto {1,2,3}; // expected-error{{new expression for type 'auto' contains multiple constructor arguments}}
+  new auto ({1,2,3}); // expected-error{{new expression for type 'auto' contains multiple constructor arguments}}
 }
 
 void p2example() {
diff --git a/test/CXX/expr/expr.unary/expr.new/p2-cxx14.cpp b/test/CXX/expr/expr.unary/expr.new/p2-cxx14.cpp
new file mode 100644
index 0000000..70bbc48
--- /dev/null
+++ b/test/CXX/expr/expr.unary/expr.new/p2-cxx14.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++14 -pedantic
+
+void f() {
+  new auto('a');
+  new auto {2}; // expected-warning {{ISO C++ standards before C++17 do not allow new expression for type 'auto' to use list-initialization}}
+  new auto {1, 2}; // expected-error{{new expression for type 'auto' contains multiple constructor arguments}}
+  new auto {}; // expected-error{{new expression for type 'auto' requires a constructor argument}}
+  new decltype(auto)({1}); // expected-warning {{ISO C++ standards before C++17 do not allow new expression for type 'decltype(auto)' to use list-initialization}}
+  new decltype(auto)({1, 2}); // expected-error{{new expression for type 'decltype(auto)' contains multiple constructor arguments}}
+}
diff --git a/test/CXX/expr/expr.unary/expr.new/p2-cxx1z.cpp b/test/CXX/expr/expr.unary/expr.new/p2-cxx1z.cpp
new file mode 100644
index 0000000..6e76075
--- /dev/null
+++ b/test/CXX/expr/expr.unary/expr.new/p2-cxx1z.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++14
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++17 -pedantic
+
+void f() {
+  new auto('a');
+  new auto {2};
+  new auto {1, 2}; // expected-error{{new expression for type 'auto' contains multiple constructor arguments}}
+  new auto {}; // expected-error{{new expression for type 'auto' requires a constructor argument}}
+  new decltype(auto)({1});
+  new decltype(auto)({1, 2}); // expected-error{{new expression for type 'decltype(auto)' contains multiple constructor arguments}}
+}
diff --git a/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cpp b/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cpp
index 79e6865..ad03191 100644
--- a/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cpp
+++ b/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cpp
@@ -1,15 +1,15 @@
 // RUN: %clang_cc1 -fmodules-ts %S/module.cppm -triple %itanium_abi_triple -emit-module-interface -o %t
 // RUN: %clang_cc1 -fmodules-ts %s -triple %itanium_abi_triple -fmodule-file=%t -emit-llvm -o - | FileCheck %s --implicit-check-not=unused --implicit-check-not=global_module
 
-// CHECK-DAG: @extern_var_exported = external global
-// CHECK-DAG: @inline_var_exported = linkonce_odr global
-// CHECK-DAG: @_ZW6ModuleE19static_var_exported = available_externally global i32 0,
-// CHECK-DAG: @const_var_exported = available_externally constant i32 3,
+// CHECK-DAG: @extern_var_exported = external {{(dso_local )?}}global
+// CHECK-DAG: @inline_var_exported = linkonce_odr {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE19static_var_exported = available_externally {{(dso_local )?}}global i32 0,
+// CHECK-DAG: @const_var_exported = available_externally {{(dso_local )?}}constant i32 3,
 //
-// CHECK-DAG: @_ZW6ModuleE25extern_var_module_linkage = external global
-// CHECK-DAG: @_ZW6ModuleE25inline_var_module_linkage = linkonce_odr global
-// CHECK-DAG: @_ZW6ModuleE25static_var_module_linkage = available_externally global i32 0,
-// CHECK-DAG: @_ZW6ModuleE24const_var_module_linkage = available_externally constant i32 3,
+// CHECK-DAG: @_ZW6ModuleE25extern_var_module_linkage = external {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE25inline_var_module_linkage = linkonce_odr {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE25static_var_module_linkage = available_externally {{(dso_local )?}}global i32 0,
+// CHECK-DAG: @_ZW6ModuleE24const_var_module_linkage = available_externally {{(dso_local )?}}constant i32 3,
 
 module Module;
 
diff --git a/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cppm b/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cppm
index 15d1114..0f2c0db 100644
--- a/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cppm
+++ b/test/CXX/modules-ts/basic/basic.def.odr/p4/module.cppm
@@ -1,30 +1,30 @@
 // RUN: %clang_cc1 -fmodules-ts %s -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %s --implicit-check-not unused_inline --implicit-check-not unused_stastic_global_module
 
-// CHECK-DAG: @extern_var_global_module = external global
-// CHECK-DAG: @inline_var_global_module = linkonce_odr global
+// CHECK-DAG: @extern_var_global_module = external {{(dso_local )?}}global
+// CHECK-DAG: @inline_var_global_module = linkonce_odr {{(dso_local )?}}global
 // CHECK-DAG: @_ZL24static_var_global_module = internal global
 // CHECK-DAG: @_ZL23const_var_global_module = internal constant
 //
 // For ABI compatibility, these symbols do not include the module name.
-// CHECK-DAG: @extern_var_exported = external global
+// CHECK-DAG: @extern_var_exported = external {{(dso_local )?}}global
 // FIXME: Should this be 'weak_odr global'? Presumably it must be, since we
 // can discard this global and its initializer (if any), and other TUs are not
 // permitted to run the initializer for this variable.
-// CHECK-DAG: @inline_var_exported = linkonce_odr global
-// CHECK-DAG: @_ZW6ModuleE19static_var_exported = global
-// CHECK-DAG: @const_var_exported = constant
+// CHECK-DAG: @inline_var_exported = linkonce_odr {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE19static_var_exported = {{(dso_local )?}}global
+// CHECK-DAG: @const_var_exported = {{(dso_local )?}}constant
 //
-// CHECK-DAG: @_ZW6ModuleE25extern_var_module_linkage = external global
+// CHECK-DAG: @_ZW6ModuleE25extern_var_module_linkage = external {{(dso_local )?}}global
 // FIXME: Should this be 'weak_odr global'? Presumably it must be, since we
 // can discard this global and its initializer (if any), and other TUs are not
 // permitted to run the initializer for this variable.
-// CHECK-DAG: @_ZW6ModuleE25inline_var_module_linkage = linkonce_odr global
-// CHECK-DAG: @_ZW6ModuleE25static_var_module_linkage = global
-// CHECK-DAG: @_ZW6ModuleE24const_var_module_linkage = constant
+// CHECK-DAG: @_ZW6ModuleE25inline_var_module_linkage = linkonce_odr {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE25static_var_module_linkage = {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE24const_var_module_linkage = {{(dso_local )?}}constant
 //
-// CHECK-DAG: @_ZW6ModuleE25unused_var_module_linkage = global i32 4
-// CHECK-DAG: @_ZW6ModuleE32unused_static_var_module_linkage = global i32 5
-// CHECK-DAG: @_ZW6ModuleE31unused_const_var_module_linkage = constant i32 7
+// CHECK-DAG: @_ZW6ModuleE25unused_var_module_linkage = {{(dso_local )?}}global i32 4
+// CHECK-DAG: @_ZW6ModuleE32unused_static_var_module_linkage = {{(dso_local )?}}global i32 5
+// CHECK-DAG: @_ZW6ModuleE31unused_const_var_module_linkage = {{(dso_local )?}}constant i32 7
 
 static void unused_static_global_module() {}
 static void used_static_global_module() {}
@@ -37,7 +37,7 @@
 static int static_var_global_module;
 const int const_var_global_module = 3;
 
-// CHECK: define void {{.*}}@_Z23noninline_global_modulev
+// CHECK: define {{(dso_local )?}}void {{.*}}@_Z23noninline_global_modulev
 void noninline_global_module() {
   // FIXME: This should be promoted to module linkage and given a
   // module-mangled name, if it's called from an inline function within
@@ -60,9 +60,9 @@
 export {
   // FIXME: These should be ill-formed: you can't export an internal linkage
   // symbol, per [dcl.module.interface]p2.
-  // CHECK: define void {{.*}}@_ZW6ModuleE22unused_static_exportedv
+  // CHECK: define {{(dso_local )?}}void {{.*}}@_ZW6ModuleE22unused_static_exportedv
   static void unused_static_exported() {}
-  // CHECK: define void {{.*}}@_ZW6ModuleE20used_static_exportedv
+  // CHECK: define {{(dso_local )?}}void {{.*}}@_ZW6ModuleE20used_static_exportedv
   static void used_static_exported() {}
 
   inline void unused_inline_exported() {}
@@ -75,7 +75,7 @@
   static int static_var_exported;
   const int const_var_exported = 3;
 
-  // CHECK: define void {{.*}}@_Z18noninline_exportedv
+  // CHECK: define {{(dso_local )?}}void {{.*}}@_Z18noninline_exportedv
   void noninline_exported() {
     used_static_exported();
     // CHECK: define linkonce_odr {{.*}}@_Z20used_inline_exportedv
@@ -91,9 +91,9 @@
 // FIXME: Ideally we wouldn't emit this as its name is not visible outside this
 // TU, but this module interface might contain a template that can use this
 // function so we conservatively emit it for now.
-// CHECK: define void {{.*}}@_ZW6ModuleE28unused_static_module_linkagev
+// CHECK: define {{(dso_local )?}}void {{.*}}@_ZW6ModuleE28unused_static_module_linkagev
 static void unused_static_module_linkage() {}
-// CHECK: define void {{.*}}@_ZW6ModuleE26used_static_module_linkagev
+// CHECK: define {{(dso_local )?}}void {{.*}}@_ZW6ModuleE26used_static_module_linkagev
 static void used_static_module_linkage() {}
 
 inline void unused_inline_module_linkage() {}
@@ -104,7 +104,7 @@
 static int static_var_module_linkage;
 const int const_var_module_linkage = 3;
 
-// CHECK: define void {{.*}}@_ZW6ModuleE24noninline_module_linkagev
+// CHECK: define {{(dso_local )?}}void {{.*}}@_ZW6ModuleE24noninline_module_linkagev
 void noninline_module_linkage() {
   used_static_module_linkage();
   // CHECK: define linkonce_odr {{.*}}@_ZW6ModuleE26used_inline_module_linkagev
@@ -125,5 +125,5 @@
   struct b {};
   struct c {};
 };
-// CHECK: define void @_ZW6ModuleE1fW_0EN1a1bEW_0ENS_1cE(
+// CHECK: define {{(dso_local )?}}void @_ZW6ModuleE1fW_0EN1a1bEW_0ENS_1cE(
 void f(a::b, a::c) {}
diff --git a/test/CXX/modules-ts/basic/basic.def.odr/p4/user.cpp b/test/CXX/modules-ts/basic/basic.def.odr/p4/user.cpp
index 5e9f7ec..97c69fa 100644
--- a/test/CXX/modules-ts/basic/basic.def.odr/p4/user.cpp
+++ b/test/CXX/modules-ts/basic/basic.def.odr/p4/user.cpp
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -fmodules-ts %S/module.cppm -triple %itanium_abi_triple -emit-module-interface -o %t
 // RUN: %clang_cc1 -fmodules-ts %s -triple %itanium_abi_triple -fmodule-file=%t -emit-llvm -o - | FileCheck %s --implicit-check-not=unused --implicit-check-not=global_module
 
-// CHECK-DAG: @extern_var_exported = external global
-// CHECK-DAG: @inline_var_exported = linkonce_odr global
-// CHECK-DAG: @_ZW6ModuleE19static_var_exported = available_externally global i32 0
-// CHECK-DAG: @const_var_exported = available_externally constant i32 3
+// CHECK-DAG: @extern_var_exported = external {{(dso_local )?}}global
+// CHECK-DAG: @inline_var_exported = linkonce_odr {{(dso_local )?}}global
+// CHECK-DAG: @_ZW6ModuleE19static_var_exported = available_externally {{(dso_local )?}}global i32 0
+// CHECK-DAG: @const_var_exported = available_externally {{(dso_local )?}}constant i32 3
 
 import Module;
 
diff --git a/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.export/p1.cpp b/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.export/p1.cpp
new file mode 100644
index 0000000..ab8c690
--- /dev/null
+++ b/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.export/p1.cpp
@@ -0,0 +1,40 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+//
+// RUN: echo 'export module a; export class A{};' | %clang_cc1 -x c++ -fmodules-ts -emit-module-interface - -o %t/a.pcm
+// RUN: echo 'export module b; export class B{};' | %clang_cc1 -x c++ -fmodules-ts -emit-module-interface - -o %t/b.pcm
+// RUN: echo 'export module c; export class C{};' | %clang_cc1 -x c++ -fmodules-ts -emit-module-interface - -o %t/c.pcm
+//
+// RUN: %clang_cc1 -fmodules-ts -fprebuilt-module-path=%t -emit-module-interface %s -o %t/aggregate.internal.pcm -DAGGREGATE_INTERNAL
+// RUN: %clang_cc1 -fmodules-ts -fprebuilt-module-path=%t -emit-module-interface %s -o %t/aggregate.pcm -DAGGREGATE
+//
+// RUN: %clang_cc1 -fmodules-ts -fprebuilt-module-path=%t %s -verify -DTEST
+// expected-no-diagnostics
+
+
+#ifdef AGGREGATE_INTERNAL
+export module aggregate.internal;
+export import a;
+export {
+  import b;
+  import c;
+}
+#endif
+
+
+// Export the above aggregate module.
+// This is done to ensure that re-exports are transitive.
+#ifdef AGGREGATE
+export module aggregate;
+export import aggregate.internal;
+#endif
+
+
+// For the actual test, just try using the classes from the exported modules
+// and hope that they're accessible.
+#ifdef TEST
+import aggregate;
+A a;
+B b;
+C c;
+#endif
diff --git a/test/CXX/over/over.match/over.match.best/p1.cpp b/test/CXX/over/over.match/over.match.best/p1.cpp
index fad5bf9..a933f62 100644
--- a/test/CXX/over/over.match/over.match.best/p1.cpp
+++ b/test/CXX/over/over.match/over.match.best/p1.cpp
@@ -25,13 +25,13 @@
 
   // FIXME: The standard's example is wrong; we add a remove_ref<...> here to
   // fix it.
-  template<typename T, int N = remove_ref<T>::value> A(T&&, int*) -> A<T>;
+  template<typename T, int N = remove_ref<T>::value> A(T&&, int*) -> A<T**>;
   A a{1, 0};
   extern A<int> a;
-  A b{a, 0};
+  A b{a, 0}; // uses the implicit ctor that is more specialized
 
   A<int> *pa = &a;
-  A<A<int>&> *pb = &b;
+  A<int> *pb = &b;
 }
 
 // Partial ordering of function template specializations will be tested 
diff --git a/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp b/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp
index cf92545..cb9541c 100644
--- a/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp
+++ b/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp
@@ -6,9 +6,10 @@
   template<typename T> struct A {
     A(T);
     A(T*);
+	A(...);
   };
   template<typename T> A(T) -> A<T>;
-  template<typename T> explicit A(T*) -> A<T>; // expected-note {{explicit}}
+  template<typename T> explicit A(T*) -> A<T**>; // expected-note {{explicit}}
 
   int *p;
   A a(p);
@@ -16,14 +17,15 @@
   A c{p};
   A d = {p}; // expected-error {{selected an explicit deduction guide}}
 
-  using X = A<int>;
-  using Y = A<int*>;
+  using X = A<int**>;
+  using Y = A<int>;  // uses the implicit guide, being more specialized than the eligible user-defined deduction guides.
 
   using X = decltype(a);
   using Y = decltype(b);
   using X = decltype(c);
 }
 
+
 namespace std {
   template<typename T> struct initializer_list {
     const T *ptr;
@@ -54,3 +56,32 @@
   // between X<int> and X<float>.
   X xz = {z}; // expected-error {{no viable constructor or deduction guide}}
 }
+namespace pr34970 {
+//https://bugs.llvm.org/show_bug.cgi?id=34970
+
+template <typename X, typename Y> struct IsSame {
+    static constexpr bool value = false;
+};
+
+template <typename Z> struct IsSame<Z, Z> {
+    static constexpr bool value = true;
+};
+
+template <typename T> struct Optional {
+    template <typename U> Optional(U&&) { }
+};
+
+template <typename A> Optional(A) -> Optional<A>;
+
+int main() {
+    Optional opt(1729);
+    Optional dupe(opt);
+
+    static_assert(IsSame<decltype(opt), Optional<int>>::value);
+    static_assert(IsSame<decltype(dupe), Optional<int>>::value);
+    static_assert(!IsSame<decltype(dupe), Optional<Optional<int>>>::value);
+	return 0;
+}
+
+
+}
\ No newline at end of file
diff --git a/test/CXX/over/over.match/over.match.funcs/p4-0x.cpp b/test/CXX/over/over.match/over.match.funcs/p4-0x.cpp
index 68c7990..df77627 100644
--- a/test/CXX/over/over.match/over.match.funcs/p4-0x.cpp
+++ b/test/CXX/over/over.match/over.match.funcs/p4-0x.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
-// expected-no-diagnostics
 
 template<typename T> T &lvalue();
 template<typename T> T &&xvalue();
@@ -20,6 +19,18 @@
 
   void g();
 
+  void c() const; // expected-note {{'c' declared here}}
+  void v() volatile; // expected-note {{'v' declared here}}
+  void r() __restrict__; // expected-note {{'r' declared here}}
+  void cr() const __restrict__; // expected-note {{'cr' declared here}}
+  void cv() const volatile;
+  void vr() volatile __restrict__; // expected-note {{'vr' declared here}}
+  void cvr() const volatile __restrict__;
+
+  void lvalue() &; // expected-note 2 {{'lvalue' declared here}}
+  void const_lvalue() const&;
+  void rvalue() &&; // expected-note {{'rvalue' declared here}}
+
   int &operator+(const X0&) &;
   float &operator+(const X0&) &&;
 
@@ -32,7 +43,7 @@
   float &h2() const&&;
 };
 
-void X0::g() {
+void X0::g() { // expected-note {{'g' declared here}}
   int &ir1 = f();
   int &ir2 = X0::f();
 }
@@ -69,3 +80,26 @@
   float &fr3 = xvalue<X0>().h2();
   float &fr4 = prvalue<X0>().h2();
 }
+
+void test_diagnostics(const volatile X0 &__restrict__ cvr) {
+  cvr.g(); // expected-error {{'this' argument to member function 'g' has type 'const volatile X0', but function is not marked const or volatile}}
+  cvr.c(); // expected-error {{not marked volatile}}
+  cvr.v(); // expected-error {{not marked const}}
+  cvr.r(); // expected-error {{not marked const or volatile}}
+  cvr.cr(); // expected-error {{not marked volatile}}
+  cvr.cv();
+  cvr.vr(); // expected-error {{not marked const}}
+  cvr.cvr();
+
+  lvalue<X0>().lvalue();
+  lvalue<X0>().const_lvalue();
+  lvalue<X0>().rvalue(); // expected-error {{'this' argument to member function 'rvalue' is an lvalue, but function has rvalue ref-qualifier}}
+
+  xvalue<X0>().lvalue(); // expected-error {{'this' argument to member function 'lvalue' is an rvalue, but function has non-const lvalue ref-qualifier}}
+  xvalue<X0>().const_lvalue();
+  xvalue<X0>().rvalue();
+
+  prvalue<X0>().lvalue(); // expected-error {{'this' argument to member function 'lvalue' is an rvalue, but function has non-const lvalue ref-qualifier}}
+  prvalue<X0>().const_lvalue();
+  prvalue<X0>().rvalue();
+}
diff --git a/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp b/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
index ad60868..3420249 100644
--- a/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
+++ b/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
@@ -215,7 +215,7 @@
 template<typename T>
 void i(T t) {
   for (auto u : t) { // expected-error {{invalid range expression of type 'X::A *'; no viable 'begin' function available}} \
-                        expected-error {{member function 'begin' not viable}} \
+                        expected-error {{'this' argument to member function 'begin' has type 'const X::A', but function is not marked const}} \
                         expected-note {{when looking up 'begin' function}}
 
   }
diff --git a/test/CXX/temp/temp.deduct.guide/p3.cpp b/test/CXX/temp/temp.deduct.guide/p3.cpp
index e12f7b6..07d1be0 100644
--- a/test/CXX/temp/temp.deduct.guide/p3.cpp
+++ b/test/CXX/temp/temp.deduct.guide/p3.cpp
@@ -65,8 +65,8 @@
   };
   template<typename T> struct Local {};
   void f() {
-    Local(int) -> Local<int>; // expected-error 2{{expected}} expected-note {{to match}}
+    Local(int) -> Local<int>; // expected-error {{expected}}
     using WrongScope::Local;
-    Local(int) -> Local<int>; // expected-error 2{{expected}} expected-note {{to match}}
+    Local(int) -> Local<int>; // expected-error {{expected}}
   }
 }
diff --git a/test/CXX/temp/temp.param/p2.cpp b/test/CXX/temp/temp.param/p2.cpp
index 4eca057..656bd26 100644
--- a/test/CXX/temp/temp.param/p2.cpp
+++ b/test/CXX/temp/temp.param/p2.cpp
@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -DCPP11
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify %s -DCPP17
 
 // There is no semantic difference between class and typename in a
 // template-parameter. typename followed by an unqualified-id names a
@@ -13,7 +14,31 @@
 template<typename T, typename X<T>::type Value> struct Y1;
 
 // A storage class shall not be specified in a template-parameter declaration.
-template<static int Value> struct Z; // FIXME: expect an error
+template<static int Value> struct Z; //expected-error{{invalid declaration specifier}}
+template<typedef int Value> struct Z0; //expected-error{{expected template parameter}} expected-error{{expected identifier}} expected-error{{extraneous 'template<>' in declaration of struct 'Z0'}} expected-note{{did you mean to use 'typename'?}}
+template<extern inline int Value> struct Z1; //expected-error2{{invalid declaration specifier}}
+template<virtual int Value> struct Z2; //expected-error{{invalid declaration specifier}}
+template<explicit int Value> struct Z3; //expected-error{{invalid declaration specifier}}
+template<inline int Value> struct Z4; //expected-error{{invalid declaration specifier}}
+template<extern int> struct Z5; //expected-error{{invalid declaration specifier}}
+template<static int> struct Z6;  //expected-error{{invalid declaration specifier}}
+template<explicit int Value> struct Z7; //expected-error{{invalid declaration specifier}}
+template<mutable int> struct Z8; //expected-error{{invalid declaration specifier}}
+
+template<const int> struct Z9; // OK
+template<volatile int> struct Z10; // OK
+
+
+
+#ifdef CPP11
+template<thread_local int> struct Z11; //expected-error{{invalid declaration specifier}}
+template<constexpr int> struct Z12; //expected-error{{invalid declaration specifier}}
+
+#endif
+
+#ifdef CPP17
+template<auto> struct Z13; // OK
+#endif
 
 // Make sure that we properly disambiguate non-type template parameters that
 // start with 'class'.
diff --git a/test/CodeCompletion/Inputs/comments.h b/test/CodeCompletion/Inputs/comments.h
new file mode 100644
index 0000000..7b4b5da
--- /dev/null
+++ b/test/CodeCompletion/Inputs/comments.h
@@ -0,0 +1,4 @@
+// PR32732
+struct B {
+  // <- code completion
+};
diff --git a/test/CodeCompletion/call.cpp b/test/CodeCompletion/call.cpp
index 40a72ba..3e06210 100644
--- a/test/CodeCompletion/call.cpp
+++ b/test/CodeCompletion/call.cpp
@@ -19,10 +19,10 @@
   f(Y(), 0, 0);
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:19:9 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
   // CHECK-CC1: COMPLETION: Pattern : dynamic_cast<<#type#>>(<#expression#>)
-  // CHECK-CC1: f(N::Y y, <#int ZZ#>)
+  // CHECK-CC1: f(Y y, <#int ZZ#>)
   // CHECK-CC1-NEXT: f(int i, <#int j#>, int k)
   // CHECK-CC1-NEXT: f(float x, <#float y#>)
   // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:19:13 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
-  // CHECK-CC2-NOT: f(N::Y y, int ZZ)
+  // CHECK-CC2-NOT: f(Y y, int ZZ)
   // CHECK-CC2: f(int i, int j, <#int k#>)
 }
diff --git a/test/CodeCompletion/comments.cpp b/test/CodeCompletion/comments.cpp
new file mode 100644
index 0000000..21f1465e
--- /dev/null
+++ b/test/CodeCompletion/comments.cpp
@@ -0,0 +1,13 @@
+// Note: the run lines follow their respective tests, since line/column
+// matter in this test.
+
+#include "comments.h"
+
+struct A {
+  // <- code completion
+  /* <- code completion */
+};
+
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:7:6 %s
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:8:6 %s
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%S/Inputs/comments.h:3:6 %s
diff --git a/test/CodeCompletion/ignore-ns-level-decls.cpp b/test/CodeCompletion/ignore-ns-level-decls.cpp
new file mode 100644
index 0000000..59cee65
--- /dev/null
+++ b/test/CodeCompletion/ignore-ns-level-decls.cpp
@@ -0,0 +1,21 @@
+namespace ns {
+  struct bar {
+  };
+
+  struct baz {
+  };
+
+  int func(int a, bar b, baz c);
+}
+
+void test() {
+  ns::
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:12:7 %s -o - | FileCheck %s --check-prefix=CHECK-1
+// CHECK-1-DAG: COMPLETION: bar : bar
+// CHECK-1-DAG: COMPLETION: baz : baz
+// CHECK-1-DAG: COMPLETION: func : [#int#]func(<#int a#>, <#bar b#>, <#baz c#>)
+
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:12:7 -no-code-completion-ns-level-decls %s -o - | FileCheck %s --allow-empty --check-prefix=CHECK-EMPTY
+// CHECK-EMPTY-NOT: COMPLETION: bar : bar
+// CHECK-EMPTY: {{^}}{{$}}
+}
diff --git a/test/CodeCompletion/inside-macros.cpp b/test/CodeCompletion/inside-macros.cpp
new file mode 100644
index 0000000..dc40c6a
--- /dev/null
+++ b/test/CodeCompletion/inside-macros.cpp
@@ -0,0 +1,13 @@
+#define ID(X) X
+
+void test(bool input_var) {
+  ID(input_var) = true;
+  // Check that input_var shows up when completing at the start, in the middle
+  // and at the end of the identifier.
+  //
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:4:6 %s -o - | FileCheck %s
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:4:8 %s -o - | FileCheck %s
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:4:15 %s -o - | FileCheck %s
+
+  // CHECK: input_var
+}
diff --git a/test/CodeCompletion/member-access.cpp b/test/CodeCompletion/member-access.cpp
index a0dc7b4..c21265e 100644
--- a/test/CodeCompletion/member-access.cpp
+++ b/test/CodeCompletion/member-access.cpp
@@ -16,6 +16,8 @@
 };
 
 struct Derived : Base3 {
+  template <typename T> Derived(T);
+  Derived(int);
   int member4;
   int memfun3(int);
 };
@@ -48,7 +50,7 @@
   }
 };
 
-  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:29:6 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:31:6 %s -o - | FileCheck -check-prefix=CHECK-CC1 --implicit-check-not="Derived : Derived(" %s
   // CHECK-CC1: Base1 : Base1::
   // CHECK-CC1: member1 : [#int#][#Base1::#]member1
   // CHECK-CC1: member1 : [#int#][#Base2::#]member1
@@ -62,10 +64,10 @@
   // CHECK-CC1: memfun3 : [#int#]memfun3(<#int#>)
 
 // Make sure this doesn't crash
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:36:7 %s -verify
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:38:7 %s -verify
 
 // Make sure this also doesn't crash
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:47:14 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:49:14 %s
 
 
 template<typename T>
@@ -100,8 +102,8 @@
 // CHECK-CC2: overload1 : [#void#]overload1(<#const T &#>)
 // CHECK-CC2: overload1 : [#void#]overload1(<#const S &#>)
 
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:92:10 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:93:12 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:94:10 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:95:12 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
 }
 
 
@@ -118,8 +120,8 @@
 // CHECK-CC3: overload1 : [#void#]overload1(<#const int &#>)
 // CHECK-CC3: overload1 : [#void#]overload1(<#const double &#>)
 
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:110:10 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:111:12 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:112:10 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:113:12 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
 }
 
 template <typename T>
@@ -133,17 +135,17 @@
 // CHECK-CC4: BaseTemplate : BaseTemplate::
 // CHECK-CC4: baseTemplateField : [#int#]baseTemplateField
 // CHECK-CC4: baseTemplateFunction : [#int#]baseTemplateFunction()
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:132:8 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:134:8 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
     o2.baseTemplateField;
 // CHECK-CC5: BaseTemplate : BaseTemplate::
 // CHECK-CC5: baseTemplateField : [#T#]baseTemplateField
 // CHECK-CC5: baseTemplateFunction : [#T#]baseTemplateFunction()
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:137:8 %s -o - | FileCheck -check-prefix=CHECK-CC5 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:139:8 %s -o - | FileCheck -check-prefix=CHECK-CC5 %s
     this->o1;
 // CHECK-CC6: [#void#]function()
 // CHECK-CC6: o1 : [#BaseTemplate<int>#]o1
 // CHECK-CC6: o2 : [#BaseTemplate<T>#]o2
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:142:11 %s -o - | FileCheck -check-prefix=CHECK-CC6 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:144:11 %s -o - | FileCheck -check-prefix=CHECK-CC6 %s
   }
 
   static void staticFn(T &obj);
@@ -160,7 +162,7 @@
 // CHECK-CC7: o2 : [#BaseTemplate<T>#]o2
 // CHECK-CC7: staticFn : [#void#]staticFn(<#T &obj#>)
 // CHECK-CC7: Template : Template
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:156:16 %s -o - | FileCheck -check-prefix=CHECK-CC7 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:158:16 %s -o - | FileCheck -check-prefix=CHECK-CC7 %s
   typename Template<T>::Nested m;
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:164:25 %s -o - | FileCheck -check-prefix=CHECK-CC7 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:166:25 %s -o - | FileCheck -check-prefix=CHECK-CC7 %s
 }
diff --git a/test/CodeCompletion/qualifiers-as-written.cpp b/test/CodeCompletion/qualifiers-as-written.cpp
new file mode 100644
index 0000000..90530ec
--- /dev/null
+++ b/test/CodeCompletion/qualifiers-as-written.cpp
@@ -0,0 +1,31 @@
+struct foo {
+  typedef int type;
+
+  type method(type, foo::type, ::foo::type, ::foo::foo::type);
+};
+
+namespace ns {
+  struct bar {
+  };
+
+  struct baz {
+  };
+
+  int func(foo::type a, bar b, baz c);
+}
+
+typedef ns::bar bar;
+
+int func(foo a, bar b, ns::bar c, ns::baz d);
+using ns::func;
+
+void test() {
+  foo().method(0, 0, 0, 0);
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:23:9 %s -o - | FileCheck %s --check-prefix=CHECK-1
+  // CHECK-1: COMPLETION: method : [#type#]method(<#type#>, <#foo::type#>, <#::foo::type#>, <#::foo::foo::type#>)
+  f
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:26:3 %s -o - | FileCheck %s --check-prefix=CHECK-2
+  // FIXME(ibiryukov): We should get rid of CHECK-DAGs here when completion output is made deterministic (see PR35244).
+  // CHECK-2-DAG: COMPLETION: func : [#int#]func(<#foo a#>, <#bar b#>, <#ns::bar c#>, <#ns::baz d#>
+  // CHECK-2-DAG: COMPLETION: func : [#int#]func(<#foo::type a#>, <#bar b#>, <#baz c#>
+}
diff --git a/test/CodeCompletion/uninstantiated_params.cpp b/test/CodeCompletion/uninstantiated_params.cpp
index 57a520d..643f2f7 100644
--- a/test/CodeCompletion/uninstantiated_params.cpp
+++ b/test/CodeCompletion/uninstantiated_params.cpp
@@ -9,5 +9,5 @@
   unique_ptr<int> x;
   x.
   // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:10:5 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
-  // CHECK-CC1: [#void#]reset({#<#unique_ptr<int>::pointer ptr = pointer()#>#})
+  // CHECK-CC1: [#void#]reset({#<#pointer ptr = pointer()#>#})
 }
diff --git a/test/CodeGen/2004-03-07-ExternalConstant.c b/test/CodeGen/2004-03-07-ExternalConstant.c
index 2de3a69..36371c0 100644
--- a/test/CodeGen/2004-03-07-ExternalConstant.c
+++ b/test/CodeGen/2004-03-07-ExternalConstant.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1  %s -emit-llvm -o - | FileCheck %s
 
-// CHECK: @a = external constan
+// CHECK: @a = external {{(dso_local )?}}constan
 extern const int a[];   // 'a' should be marked constant even though it's external!
 int foo () {
   return a[0];
diff --git a/test/CodeGen/2005-07-20-SqrtNoErrno.c b/test/CodeGen/2005-07-20-SqrtNoErrno.c
deleted file mode 100644
index 96761e4..0000000
--- a/test/CodeGen/2005-07-20-SqrtNoErrno.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
-// llvm.sqrt has undefined behavior on negative inputs, so it is
-// inappropriate to translate C/C++ sqrt to this.
-float sqrtf(float x);
-float foo(float X) {
-  // CHECK: foo
-  // CHECK: call float @sqrtf(float %
-  // Check that this is marked readonly when errno is ignored.
-  return sqrtf(X);
-}
diff --git a/test/CodeGen/2007-11-07-CopyAggregateAlign.c b/test/CodeGen/2007-11-07-CopyAggregateAlign.c
index 08d9770..769a38a 100644
--- a/test/CodeGen/2007-11-07-CopyAggregateAlign.c
+++ b/test/CodeGen/2007-11-07-CopyAggregateAlign.c
@@ -2,6 +2,6 @@
 struct A { char s, t, u, v; short a; };
 // CHECK: %a = alloca %struct.A, align 2
 // CHECK: %b = alloca %struct.A, align 2
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{.*}}, i32 2, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{.*}} align 2 {{.*}} align 2 {{.*}}, i1 false)
 
 void q() { struct A a, b; a = b; }
diff --git a/test/CodeGen/2007-11-07-ZeroAggregateAlign.c b/test/CodeGen/2007-11-07-ZeroAggregateAlign.c
index b059607..2b8cfe1 100644
--- a/test/CodeGen/2007-11-07-ZeroAggregateAlign.c
+++ b/test/CodeGen/2007-11-07-ZeroAggregateAlign.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 struct A { short s; short t; int i; };
 // CHECK: %a = alloca %struct.A, align 4
-// CHECK: call void @llvm.memset.p0i8.{{.*}}i32 4, i1 false)
+// CHECK: call void @llvm.memset.p0i8.{{.*}} align 4 {{.*}}, i1 false)
 void q() { struct A a = {0}; }
diff --git a/test/CodeGen/2008-07-21-mixed-var-fn-decl.c b/test/CodeGen/2008-07-21-mixed-var-fn-decl.c
index ac13260..3c9591e 100644
--- a/test/CodeGen/2008-07-21-mixed-var-fn-decl.c
+++ b/test/CodeGen/2008-07-21-mixed-var-fn-decl.c
@@ -3,6 +3,6 @@
 int g0, f0();
 int f1(), g1;
 
-// CHECK: @g0 = common global i32 0, align 4
-// CHECK: @g1 = common global i32 0, align 4
+// CHECK: @g0 = common {{(dso_local )?}}global i32 0, align 4
+// CHECK: @g1 = common {{(dso_local )?}}global i32 0, align 4
 
diff --git a/test/CodeGen/64bit-swiftcall.c b/test/CodeGen/64bit-swiftcall.c
index 92ba37c..c5944b9 100644
--- a/test/CodeGen/64bit-swiftcall.c
+++ b/test/CodeGen/64bit-swiftcall.c
@@ -216,10 +216,10 @@
 // CHECK:  [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1
 // CHECK:  [[RES:%.*]] = alloca [[STRUCT]], align 1
 // CHECK:  [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memset{{.*}}(i8* [[CAST]], i8 0, i64 5
+// CHECK:  call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5
 // CHECK:  [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8*
 // CHECK:  [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]], i64 5
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align 1 [[CASTRET]], i8* align 1 [[CASTRES]], i64 5
 // CHECK:  [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }*
 // CHECK:  [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:  [[R0:%.*]] = load i64, i64* [[GEP]], align 1
@@ -269,10 +269,10 @@
 // CHECK:  [[RET:%.*]] = alloca [[UNION:%.*]], align 8
 // CHECK:  [[RES:%.*]] = alloca [[UNION]], align 8
 // CHECK:  [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CAST]]
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align 8 [[CAST]]
 // CHECK:  [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8*
 // CHECK:  [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]]
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align 8 [[CASTRET]], i8* align 8 [[CASTRES]]
 // CHECK:  [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }*
 // CHECK:  [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:  [[R0:%.*]] = load i64, i64* [[GEP]], align 8
diff --git a/test/CodeGen/Inputs/debug-info-embed-source.c b/test/CodeGen/Inputs/debug-info-embed-source.c
new file mode 100644
index 0000000..2fb55ee
--- /dev/null
+++ b/test/CodeGen/Inputs/debug-info-embed-source.c
@@ -0,0 +1 @@
+void foo() { }
diff --git a/test/CodeGen/Inputs/thinlto-distributed-backend-skip.bc b/test/CodeGen/Inputs/thinlto-distributed-backend-skip.bc
new file mode 100644
index 0000000..0243eb6
--- /dev/null
+++ b/test/CodeGen/Inputs/thinlto-distributed-backend-skip.bc
Binary files differ
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
index bcb680c..fb9f776 100644
--- a/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -9037,10 +9037,9 @@
 
 // CHECK-LABEL: @test_vld1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP3]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
+// CHECK:   ret <8 x half> [[TMP2]]
 float16x8_t test_vld1q_f16(float16_t const *a) {
   return vld1q_f16(a);
 }
@@ -9152,10 +9151,9 @@
 
 // CHECK-LABEL: @test_vld1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP3]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
+// CHECK:   ret <4 x half> [[TMP2]]
 float16x4_t test_vld1_f16(float16_t const *a) {
   return vld1_f16(a);
 }
@@ -9205,7 +9203,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
@@ -9223,7 +9221,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
@@ -9241,7 +9239,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
@@ -9259,7 +9257,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
@@ -9276,7 +9274,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
 int8x16x2_t test_vld2q_s8(int8_t const *a) {
@@ -9294,7 +9292,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
 int16x8x2_t test_vld2q_s16(int16_t const *a) {
@@ -9312,7 +9310,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
 int32x4x2_t test_vld2q_s32(int32_t const *a) {
@@ -9330,7 +9328,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
 int64x2x2_t test_vld2q_s64(int64_t const *a) {
@@ -9342,13 +9340,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
 float16x8x2_t test_vld2q_f16(float16_t const *a) {
@@ -9366,7 +9364,7 @@
 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
 float32x4x2_t test_vld2q_f32(float32_t const *a) {
@@ -9384,7 +9382,7 @@
 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
 float64x2x2_t test_vld2q_f64(float64_t const *a) {
@@ -9401,7 +9399,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
@@ -9419,7 +9417,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
@@ -9436,7 +9434,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
@@ -9454,7 +9452,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
@@ -9472,7 +9470,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
@@ -9490,7 +9488,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
@@ -9507,7 +9505,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
 int8x8x2_t test_vld2_s8(int8_t const *a) {
@@ -9525,7 +9523,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
 int16x4x2_t test_vld2_s16(int16_t const *a) {
@@ -9543,7 +9541,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
 int32x2x2_t test_vld2_s32(int32_t const *a) {
@@ -9561,7 +9559,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
 int64x1x2_t test_vld2_s64(int64_t const *a) {
@@ -9573,13 +9571,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
 float16x4x2_t test_vld2_f16(float16_t const *a) {
@@ -9597,7 +9595,7 @@
 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
 float32x2x2_t test_vld2_f32(float32_t const *a) {
@@ -9615,7 +9613,7 @@
 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
 float64x1x2_t test_vld2_f64(float64_t const *a) {
@@ -9632,7 +9630,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
@@ -9650,7 +9648,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
@@ -9667,7 +9665,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
@@ -9685,7 +9683,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
@@ -9703,7 +9701,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
@@ -9721,7 +9719,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
@@ -9738,7 +9736,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
 int8x16x3_t test_vld3q_s8(int8_t const *a) {
@@ -9756,7 +9754,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
 int16x8x3_t test_vld3q_s16(int16_t const *a) {
@@ -9774,7 +9772,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
 int32x4x3_t test_vld3q_s32(int32_t const *a) {
@@ -9792,7 +9790,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
 int64x2x3_t test_vld3q_s64(int64_t const *a) {
@@ -9804,13 +9802,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
 float16x8x3_t test_vld3q_f16(float16_t const *a) {
@@ -9828,7 +9826,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
 float32x4x3_t test_vld3q_f32(float32_t const *a) {
@@ -9846,7 +9844,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
 float64x2x3_t test_vld3q_f64(float64_t const *a) {
@@ -9863,7 +9861,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
@@ -9881,7 +9879,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
@@ -9898,7 +9896,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
@@ -9916,7 +9914,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
@@ -9934,7 +9932,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
@@ -9952,7 +9950,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
@@ -9969,7 +9967,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
 int8x8x3_t test_vld3_s8(int8_t const *a) {
@@ -9987,7 +9985,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
 int16x4x3_t test_vld3_s16(int16_t const *a) {
@@ -10005,7 +10003,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
 int32x2x3_t test_vld3_s32(int32_t const *a) {
@@ -10023,7 +10021,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
 int64x1x3_t test_vld3_s64(int64_t const *a) {
@@ -10035,13 +10033,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
 float16x4x3_t test_vld3_f16(float16_t const *a) {
@@ -10059,7 +10057,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
 float32x2x3_t test_vld3_f32(float32_t const *a) {
@@ -10077,7 +10075,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
 float64x1x3_t test_vld3_f64(float64_t const *a) {
@@ -10094,7 +10092,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
@@ -10112,7 +10110,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
@@ -10129,7 +10127,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
@@ -10147,7 +10145,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
@@ -10165,7 +10163,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
@@ -10183,7 +10181,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
@@ -10200,7 +10198,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
 int8x16x4_t test_vld4q_s8(int8_t const *a) {
@@ -10218,7 +10216,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
 int16x8x4_t test_vld4q_s16(int16_t const *a) {
@@ -10236,7 +10234,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
 int32x4x4_t test_vld4q_s32(int32_t const *a) {
@@ -10254,7 +10252,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
 int64x2x4_t test_vld4q_s64(int64_t const *a) {
@@ -10266,13 +10264,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
-// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
+// CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
 float16x8x4_t test_vld4q_f16(float16_t const *a) {
@@ -10290,7 +10288,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
 float32x4x4_t test_vld4q_f32(float32_t const *a) {
@@ -10308,7 +10306,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
 float64x2x4_t test_vld4q_f64(float64_t const *a) {
@@ -10325,7 +10323,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
@@ -10343,7 +10341,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
@@ -10360,7 +10358,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
@@ -10378,7 +10376,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
@@ -10396,7 +10394,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
@@ -10414,7 +10412,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
@@ -10431,7 +10429,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
 int8x8x4_t test_vld4_s8(int8_t const *a) {
@@ -10449,7 +10447,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
 int16x4x4_t test_vld4_s16(int16_t const *a) {
@@ -10467,7 +10465,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
 int32x2x4_t test_vld4_s32(int32_t const *a) {
@@ -10485,7 +10483,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
 int64x1x4_t test_vld4_s64(int64_t const *a) {
@@ -10497,13 +10495,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
-// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
+// CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
 float16x4x4_t test_vld4_f16(float16_t const *a) {
@@ -10521,7 +10519,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
 float32x2x4_t test_vld4_f32(float32_t const *a) {
@@ -10539,7 +10537,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
 float64x1x4_t test_vld4_f64(float64_t const *a) {
@@ -10556,7 +10554,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
@@ -10574,7 +10572,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
@@ -10666,9 +10664,9 @@
 // CHECK-LABEL: @test_vst1q_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
 // CHECK:   ret void
 void test_vst1q_f16(float16_t *a, float16x8_t b) {
   vst1q_f16(a, b);
@@ -10800,9 +10798,9 @@
 // CHECK-LABEL: @test_vst1_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
 // CHECK:   ret void
 void test_vst1_f16(float16_t *a, float16x4_t b) {
   vst1_f16(a, b);
@@ -10856,7 +10854,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -10876,7 +10874,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -10901,7 +10899,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -10926,7 +10924,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -10951,7 +10949,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -10971,7 +10969,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -10996,7 +10994,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -11021,7 +11019,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -11046,7 +11044,7 @@
 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -11056,9 +11054,9 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
   vst2q_f16(a, b);
@@ -11071,7 +11069,7 @@
 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -11096,7 +11094,7 @@
 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -11121,7 +11119,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -11141,7 +11139,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -11166,7 +11164,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -11186,7 +11184,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -11211,7 +11209,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -11236,7 +11234,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -11261,7 +11259,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -11281,7 +11279,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -11306,7 +11304,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -11331,7 +11329,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -11356,7 +11354,7 @@
 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -11366,9 +11364,9 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
   vst2_f16(a, b);
@@ -11381,7 +11379,7 @@
 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -11406,7 +11404,7 @@
 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -11431,7 +11429,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -11451,7 +11449,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -11476,7 +11474,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -11499,7 +11497,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -11529,7 +11527,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -11559,7 +11557,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -11589,7 +11587,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -11612,7 +11610,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -11642,7 +11640,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -11672,7 +11670,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -11702,7 +11700,7 @@
 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -11716,10 +11714,10 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
   vst3q_f16(a, b);
@@ -11732,7 +11730,7 @@
 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -11762,7 +11760,7 @@
 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -11792,7 +11790,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -11815,7 +11813,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -11845,7 +11843,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -11868,7 +11866,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -11898,7 +11896,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -11928,7 +11926,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -11958,7 +11956,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -11981,7 +11979,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -12011,7 +12009,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -12041,7 +12039,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -12071,7 +12069,7 @@
 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -12085,10 +12083,10 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
   vst3_f16(a, b);
@@ -12101,7 +12099,7 @@
 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -12131,7 +12129,7 @@
 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -12161,7 +12159,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -12184,7 +12182,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -12214,7 +12212,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -12240,7 +12238,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -12275,7 +12273,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -12310,7 +12308,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -12345,7 +12343,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -12371,7 +12369,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -12406,7 +12404,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -12441,7 +12439,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -12476,7 +12474,7 @@
 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -12494,11 +12492,11 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
   vst4q_f16(a, b);
@@ -12511,7 +12509,7 @@
 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -12546,7 +12544,7 @@
 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -12581,7 +12579,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -12607,7 +12605,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -12642,7 +12640,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -12668,7 +12666,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -12703,7 +12701,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -12738,7 +12736,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -12773,7 +12771,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -12799,7 +12797,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -12834,7 +12832,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -12869,7 +12867,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -12904,7 +12902,7 @@
 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -12922,11 +12920,11 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
   vst4_f16(a, b);
@@ -12939,7 +12937,7 @@
 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -12974,7 +12972,7 @@
 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -13009,7 +13007,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -13035,7 +13033,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -13072,7 +13070,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x2_t [[TMP4]]
 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
@@ -13090,7 +13088,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
@@ -13108,7 +13106,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
@@ -13126,7 +13124,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
@@ -13142,7 +13140,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x2_t [[TMP4]]
 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
@@ -13160,7 +13158,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
@@ -13178,7 +13176,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
@@ -13196,7 +13194,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
@@ -13208,13 +13206,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
@@ -13232,7 +13230,7 @@
 // CHECK:   store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
@@ -13250,7 +13248,7 @@
 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
@@ -13266,7 +13264,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x2_t [[TMP4]]
 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
@@ -13284,7 +13282,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
@@ -13302,7 +13300,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
@@ -13318,7 +13316,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x2_t [[TMP4]]
 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
@@ -13336,7 +13334,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
@@ -13354,7 +13352,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
@@ -13372,7 +13370,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
@@ -13388,7 +13386,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x2_t [[TMP4]]
 int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
@@ -13406,7 +13404,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
 int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
@@ -13424,7 +13422,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
 int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
@@ -13442,7 +13440,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
 int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
@@ -13454,13 +13452,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
 float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
@@ -13478,7 +13476,7 @@
 // CHECK:   store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
 float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
@@ -13496,7 +13494,7 @@
 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
@@ -13512,7 +13510,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x2_t [[TMP4]]
 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
@@ -13530,7 +13528,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
@@ -13548,7 +13546,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
@@ -13564,7 +13562,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x3_t [[TMP4]]
 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
@@ -13582,7 +13580,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
@@ -13600,7 +13598,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
@@ -13618,7 +13616,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
@@ -13634,7 +13632,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x3_t [[TMP4]]
 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
@@ -13652,7 +13650,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
@@ -13670,7 +13668,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
@@ -13688,7 +13686,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
@@ -13700,13 +13698,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
@@ -13724,7 +13722,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
@@ -13742,7 +13740,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
@@ -13758,7 +13756,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x3_t [[TMP4]]
 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
@@ -13776,7 +13774,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
@@ -13794,7 +13792,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
@@ -13810,7 +13808,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x3_t [[TMP4]]
 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
@@ -13828,7 +13826,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
@@ -13846,7 +13844,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
@@ -13864,7 +13862,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
@@ -13880,7 +13878,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x3_t [[TMP4]]
 int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
@@ -13898,7 +13896,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
 int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
@@ -13916,7 +13914,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
 int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
@@ -13934,7 +13932,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
 int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
@@ -13946,13 +13944,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
 float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
@@ -13970,7 +13968,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
 float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
@@ -13988,7 +13986,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
@@ -14004,7 +14002,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x3_t [[TMP4]]
 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
@@ -14022,7 +14020,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
@@ -14040,7 +14038,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
@@ -14056,7 +14054,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x4_t [[TMP4]]
 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
@@ -14074,7 +14072,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
@@ -14092,7 +14090,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
@@ -14110,7 +14108,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
@@ -14126,7 +14124,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x4_t [[TMP4]]
 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
@@ -14144,7 +14142,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
@@ -14162,7 +14160,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
@@ -14180,7 +14178,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
@@ -14192,13 +14190,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
@@ -14216,7 +14214,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
@@ -14234,7 +14232,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
@@ -14250,7 +14248,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x4_t [[TMP4]]
 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
@@ -14268,7 +14266,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
@@ -14286,7 +14284,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
@@ -14302,7 +14300,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x4_t [[TMP4]]
 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
@@ -14320,7 +14318,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
@@ -14338,7 +14336,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
@@ -14356,7 +14354,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
@@ -14372,7 +14370,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x4_t [[TMP4]]
 int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
@@ -14390,7 +14388,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
 int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
@@ -14408,7 +14406,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
 int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
@@ -14426,7 +14424,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
 int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
@@ -14438,13 +14436,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD1XN:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
 float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
@@ -14462,7 +14460,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
 float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
@@ -14480,7 +14478,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
@@ -14496,7 +14494,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x4_t [[TMP4]]
 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
@@ -14514,7 +14512,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
@@ -14532,7 +14530,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
@@ -14546,7 +14544,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -14566,7 +14564,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -14592,7 +14590,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -14618,7 +14616,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -14644,7 +14642,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -14664,7 +14662,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -14690,7 +14688,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -14716,7 +14714,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -14742,7 +14740,7 @@
 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -14752,10 +14750,10 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x2.v8f16.p0f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], half* [[TMP9]])
 // CHECK:   ret void
 void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
   vst1q_f16_x2(a, b);
@@ -14768,7 +14766,7 @@
 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -14794,7 +14792,7 @@
 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -14820,7 +14818,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -14840,7 +14838,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -14866,7 +14864,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -14892,7 +14890,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -14912,7 +14910,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -14938,7 +14936,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -14964,7 +14962,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -14990,7 +14988,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -15010,7 +15008,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -15036,7 +15034,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -15062,7 +15060,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -15088,7 +15086,7 @@
 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -15098,10 +15096,10 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x2.v4f16.p0f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], half* [[TMP9]])
 // CHECK:   ret void
 void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
   vst1_f16_x2(a, b);
@@ -15114,7 +15112,7 @@
 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -15140,7 +15138,7 @@
 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -15166,7 +15164,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -15186,7 +15184,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -15212,7 +15210,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -15238,7 +15236,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -15261,7 +15259,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -15292,7 +15290,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -15323,7 +15321,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -15354,7 +15352,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -15377,7 +15375,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -15408,7 +15406,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -15439,7 +15437,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -15470,7 +15468,7 @@
 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -15484,11 +15482,11 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x3.v8f16.p0f16(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], half* [[TMP12]])
 // CHECK:   ret void
 void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
   vst1q_f16_x3(a, b);
@@ -15501,7 +15499,7 @@
 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -15532,7 +15530,7 @@
 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -15563,7 +15561,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -15586,7 +15584,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -15617,7 +15615,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -15648,7 +15646,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -15671,7 +15669,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -15702,7 +15700,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -15733,7 +15731,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -15764,7 +15762,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -15787,7 +15785,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -15818,7 +15816,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -15849,7 +15847,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -15880,7 +15878,7 @@
 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -15894,11 +15892,11 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x3.v4f16.p0f16(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], half* [[TMP12]])
 // CHECK:   ret void
 void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
   vst1_f16_x3(a, b);
@@ -15911,7 +15909,7 @@
 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -15942,7 +15940,7 @@
 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -15973,7 +15971,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -15996,7 +15994,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -16027,7 +16025,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -16058,7 +16056,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16084,7 +16082,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -16120,7 +16118,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -16156,7 +16154,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -16192,7 +16190,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16218,7 +16216,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -16254,7 +16252,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -16290,7 +16288,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -16326,7 +16324,7 @@
 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -16344,12 +16342,12 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x4.v8f16.p0f16(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], half* [[TMP15]])
 // CHECK:   ret void
 void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
   vst1q_f16_x4(a, b);
@@ -16362,7 +16360,7 @@
 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -16398,7 +16396,7 @@
 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -16434,7 +16432,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16460,7 +16458,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -16496,7 +16494,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -16532,7 +16530,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16558,7 +16556,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -16594,7 +16592,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -16630,7 +16628,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -16666,7 +16664,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16692,7 +16690,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -16728,7 +16726,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -16764,7 +16762,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -16800,7 +16798,7 @@
 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -16818,12 +16816,12 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
-// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to half*
+// CHECK:   call void @llvm.aarch64.neon.st1x4.v4f16.p0f16(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], half* [[TMP15]])
 // CHECK:   ret void
 void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
   vst1_f16_x4(a, b);
@@ -16836,7 +16834,7 @@
 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -16872,7 +16870,7 @@
 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -16908,7 +16906,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16934,7 +16932,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -16970,7 +16968,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c
index 9bd9ab1..5d775ce 100644
--- a/test/CodeGen/aarch64-neon-ldst-one.c
+++ b/test/CodeGen/aarch64-neon-ldst-one.c
@@ -90,12 +90,11 @@
 
 // CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK:   ret <8 x half> [[LANE]]
 float16x8_t test_vld1q_dup_f16(float16_t  *a) {
   return vld1q_dup_f16(a);
 }
@@ -239,12 +238,11 @@
 
 // CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
-// CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP4]]
+// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
+// CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK:   ret <4 x half> [[LANE]]
 float16x4_t test_vld1_dup_f16(float16_t  *a) {
   return vld1_dup_f16(a);
 }
@@ -311,7 +309,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x2_t [[TMP4]]
 uint8x16x2_t test_vld2q_dup_u8(uint8_t  *a) {
@@ -329,7 +327,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
 uint16x8x2_t test_vld2q_dup_u16(uint16_t  *a) {
@@ -347,7 +345,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
 uint32x4x2_t test_vld2q_dup_u32(uint32_t  *a) {
@@ -365,7 +363,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
 uint64x2x2_t test_vld2q_dup_u64(uint64_t  *a) {
@@ -381,7 +379,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x2_t [[TMP4]]
 int8x16x2_t test_vld2q_dup_s8(int8_t  *a) {
@@ -399,7 +397,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
 int16x8x2_t test_vld2q_dup_s16(int16_t  *a) {
@@ -417,7 +415,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
 int32x4x2_t test_vld2q_dup_s32(int32_t  *a) {
@@ -435,7 +433,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
 int64x2x2_t test_vld2q_dup_s64(int64_t  *a) {
@@ -447,13 +445,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
 float16x8x2_t test_vld2q_dup_f16(float16_t  *a) {
@@ -471,7 +469,7 @@
 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
 float32x4x2_t test_vld2q_dup_f32(float32_t  *a) {
@@ -489,7 +487,7 @@
 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
 float64x2x2_t test_vld2q_dup_f64(float64_t  *a) {
@@ -505,7 +503,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x2_t [[TMP4]]
 poly8x16x2_t test_vld2q_dup_p8(poly8_t  *a) {
@@ -523,7 +521,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
 poly16x8x2_t test_vld2q_dup_p16(poly16_t  *a) {
@@ -541,7 +539,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
 poly64x2x2_t test_vld2q_dup_p64(poly64_t  *a) {
@@ -557,7 +555,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x2_t [[TMP4]]
 uint8x8x2_t test_vld2_dup_u8(uint8_t  *a) {
@@ -575,7 +573,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
 uint16x4x2_t test_vld2_dup_u16(uint16_t  *a) {
@@ -593,7 +591,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
 uint32x2x2_t test_vld2_dup_u32(uint32_t  *a) {
@@ -611,7 +609,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
 uint64x1x2_t test_vld2_dup_u64(uint64_t  *a) {
@@ -627,7 +625,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x2_t [[TMP4]]
 int8x8x2_t test_vld2_dup_s8(int8_t  *a) {
@@ -645,7 +643,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
 int16x4x2_t test_vld2_dup_s16(int16_t  *a) {
@@ -663,7 +661,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
 int32x2x2_t test_vld2_dup_s32(int32_t  *a) {
@@ -681,7 +679,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
 int64x1x2_t test_vld2_dup_s64(int64_t  *a) {
@@ -693,13 +691,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
 float16x4x2_t test_vld2_dup_f16(float16_t  *a) {
@@ -717,7 +715,7 @@
 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
 float32x2x2_t test_vld2_dup_f32(float32_t  *a) {
@@ -735,7 +733,7 @@
 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
 float64x1x2_t test_vld2_dup_f64(float64_t  *a) {
@@ -751,7 +749,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x2_t [[TMP4]]
 poly8x8x2_t test_vld2_dup_p8(poly8_t  *a) {
@@ -769,7 +767,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
 poly16x4x2_t test_vld2_dup_p16(poly16_t  *a) {
@@ -787,7 +785,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
 poly64x1x2_t test_vld2_dup_p64(poly64_t  *a) {
@@ -803,7 +801,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x3_t [[TMP4]]
 uint8x16x3_t test_vld3q_dup_u8(uint8_t  *a) {
@@ -822,7 +820,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
 uint16x8x3_t test_vld3q_dup_u16(uint16_t  *a) {
@@ -841,7 +839,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
 uint32x4x3_t test_vld3q_dup_u32(uint32_t  *a) {
@@ -860,7 +858,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
 uint64x2x3_t test_vld3q_dup_u64(uint64_t  *a) {
@@ -877,7 +875,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x3_t [[TMP4]]
 int8x16x3_t test_vld3q_dup_s8(int8_t  *a) {
@@ -896,7 +894,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
 int16x8x3_t test_vld3q_dup_s16(int16_t  *a) {
@@ -915,7 +913,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
 int32x4x3_t test_vld3q_dup_s32(int32_t  *a) {
@@ -934,7 +932,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
 int64x2x3_t test_vld3q_dup_s64(int64_t  *a) {
@@ -947,13 +945,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
 float16x8x3_t test_vld3q_dup_f16(float16_t  *a) {
@@ -972,7 +970,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
 float32x4x3_t test_vld3q_dup_f32(float32_t  *a) {
@@ -991,7 +989,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
 float64x2x3_t test_vld3q_dup_f64(float64_t  *a) {
@@ -1008,7 +1006,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x3_t [[TMP4]]
 poly8x16x3_t test_vld3q_dup_p8(poly8_t  *a) {
@@ -1027,7 +1025,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
 poly16x8x3_t test_vld3q_dup_p16(poly16_t  *a) {
@@ -1046,7 +1044,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
 poly64x2x3_t test_vld3q_dup_p64(poly64_t  *a) {
@@ -1063,7 +1061,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x3_t [[TMP4]]
 uint8x8x3_t test_vld3_dup_u8(uint8_t  *a) {
@@ -1082,7 +1080,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
 uint16x4x3_t test_vld3_dup_u16(uint16_t  *a) {
@@ -1101,7 +1099,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
 uint32x2x3_t test_vld3_dup_u32(uint32_t  *a) {
@@ -1120,7 +1118,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
 uint64x1x3_t test_vld3_dup_u64(uint64_t  *a) {
@@ -1137,7 +1135,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x3_t [[TMP4]]
 int8x8x3_t test_vld3_dup_s8(int8_t  *a) {
@@ -1156,7 +1154,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
 int16x4x3_t test_vld3_dup_s16(int16_t  *a) {
@@ -1175,7 +1173,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
 int32x2x3_t test_vld3_dup_s32(int32_t  *a) {
@@ -1194,7 +1192,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
 int64x1x3_t test_vld3_dup_s64(int64_t  *a) {
@@ -1207,13 +1205,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
 float16x4x3_t test_vld3_dup_f16(float16_t  *a) {
@@ -1232,7 +1230,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
 float32x2x3_t test_vld3_dup_f32(float32_t  *a) {
@@ -1251,7 +1249,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
 float64x1x3_t test_vld3_dup_f64(float64_t  *a) {
@@ -1268,7 +1266,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x3_t [[TMP4]]
 poly8x8x3_t test_vld3_dup_p8(poly8_t  *a) {
@@ -1287,7 +1285,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
 poly16x4x3_t test_vld3_dup_p16(poly16_t  *a) {
@@ -1306,7 +1304,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
 poly64x1x3_t test_vld3_dup_p64(poly64_t  *a) {
@@ -1323,7 +1321,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x4_t [[TMP4]]
 uint8x16x4_t test_vld4q_dup_u8(uint8_t  *a) {
@@ -1341,7 +1339,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
 uint16x8x4_t test_vld4q_dup_u16(uint16_t  *a) {
@@ -1359,7 +1357,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
 uint32x4x4_t test_vld4q_dup_u32(uint32_t  *a) {
@@ -1377,7 +1375,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
 uint64x2x4_t test_vld4q_dup_u64(uint64_t  *a) {
@@ -1393,7 +1391,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x4_t [[TMP4]]
 int8x16x4_t test_vld4q_dup_s8(int8_t  *a) {
@@ -1411,7 +1409,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
 int16x8x4_t test_vld4q_dup_s16(int16_t  *a) {
@@ -1429,7 +1427,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
 int32x4x4_t test_vld4q_dup_s32(int32_t  *a) {
@@ -1447,7 +1445,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
 int64x2x4_t test_vld4q_dup_s64(int64_t  *a) {
@@ -1459,13 +1457,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
 float16x8x4_t test_vld4q_dup_f16(float16_t  *a) {
@@ -1483,7 +1481,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
 float32x4x4_t test_vld4q_dup_f32(float32_t  *a) {
@@ -1501,7 +1499,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
 float64x2x4_t test_vld4q_dup_f64(float64_t  *a) {
@@ -1517,7 +1515,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x4_t [[TMP4]]
 poly8x16x4_t test_vld4q_dup_p8(poly8_t  *a) {
@@ -1535,7 +1533,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
 poly16x8x4_t test_vld4q_dup_p16(poly16_t  *a) {
@@ -1553,7 +1551,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
 poly64x2x4_t test_vld4q_dup_p64(poly64_t  *a) {
@@ -1569,7 +1567,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x4_t [[TMP4]]
 uint8x8x4_t test_vld4_dup_u8(uint8_t  *a) {
@@ -1587,7 +1585,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
 uint16x4x4_t test_vld4_dup_u16(uint16_t  *a) {
@@ -1605,7 +1603,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
 uint32x2x4_t test_vld4_dup_u32(uint32_t  *a) {
@@ -1623,7 +1621,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
 uint64x1x4_t test_vld4_dup_u64(uint64_t  *a) {
@@ -1639,7 +1637,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x4_t [[TMP4]]
 int8x8x4_t test_vld4_dup_s8(int8_t  *a) {
@@ -1657,7 +1655,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
 int16x4x4_t test_vld4_dup_s16(int16_t  *a) {
@@ -1675,7 +1673,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
 int32x2x4_t test_vld4_dup_s32(int32_t  *a) {
@@ -1693,7 +1691,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
 int64x1x4_t test_vld4_dup_s64(int64_t  *a) {
@@ -1705,13 +1703,13 @@
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
-// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
-// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
+// CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* [[TMP2]])
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
 float16x4x4_t test_vld4_dup_f16(float16_t  *a) {
@@ -1729,7 +1727,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
 float32x2x4_t test_vld4_dup_f32(float32_t  *a) {
@@ -1747,7 +1745,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
 float64x1x4_t test_vld4_dup_f64(float64_t  *a) {
@@ -1763,7 +1761,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false)
 // CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x4_t [[TMP4]]
 poly8x8x4_t test_vld4_dup_p8(poly8_t  *a) {
@@ -1781,7 +1779,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
 poly16x4x4_t test_vld4_dup_p16(poly16_t  *a) {
@@ -1799,7 +1797,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
 poly64x1x4_t test_vld4_dup_p64(poly64_t  *a) {
@@ -1897,12 +1895,11 @@
 // CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]]
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half>
-// CHECK:   ret <8 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]]
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
+// CHECK:   ret <8 x half> [[VLD1_LANE]]
 float16x8_t test_vld1q_lane_f16(float16_t  *a, float16x8_t b) {
   return vld1q_lane_f16(a, b, 7);
 }
@@ -2054,12 +2051,11 @@
 // CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]]
-// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half>
-// CHECK:   ret <4 x half> [[TMP5]]
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]]
+// CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
+// CHECK:   ret <4 x half> [[VLD1_LANE]]
 float16x4_t test_vld1_lane_f16(float16_t  *a, float16x4_t b) {
   return vld1_lane_f16(a, b, 3);
 }
@@ -2129,7 +2125,7 @@
 // CHECK:   store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[SRC]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -2142,7 +2138,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
 int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) {
@@ -2158,7 +2154,7 @@
 // CHECK:   store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[SRC]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -2171,7 +2167,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
 uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) {
@@ -2187,7 +2183,7 @@
 // CHECK:   store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[SRC]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -2200,7 +2196,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
 poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) {
@@ -2216,7 +2212,7 @@
 // CHECK:   store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[SRC]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -2232,7 +2228,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x3_t [[TMP9]]
 int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) {
@@ -2248,7 +2244,7 @@
 // CHECK:   store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[SRC]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -2264,7 +2260,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x3_t [[TMP9]]
 uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) {
@@ -2280,7 +2276,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
@@ -2298,7 +2294,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x2_t [[TMP13]]
 uint16x8x2_t test_vld2q_lane_u16(uint16_t  *a, uint16x8x2_t b) {
@@ -2314,7 +2310,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
@@ -2332,7 +2328,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x2_t [[TMP13]]
 uint32x4x2_t test_vld2q_lane_u32(uint32_t  *a, uint32x4x2_t b) {
@@ -2348,7 +2344,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
@@ -2366,7 +2362,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x2_t [[TMP13]]
 uint64x2x2_t test_vld2q_lane_u64(uint64_t  *a, uint64x2x2_t b) {
@@ -2382,7 +2378,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
@@ -2400,7 +2396,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x2_t [[TMP13]]
 int16x8x2_t test_vld2q_lane_s16(int16_t  *a, int16x8x2_t b) {
@@ -2416,7 +2412,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
@@ -2434,7 +2430,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x2_t [[TMP13]]
 int32x4x2_t test_vld2q_lane_s32(int32_t  *a, int32x4x2_t b) {
@@ -2450,7 +2446,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
@@ -2468,7 +2464,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x2_t [[TMP13]]
 int64x2x2_t test_vld2q_lane_s64(int64_t  *a, int64x2x2_t b) {
@@ -2484,7 +2480,7 @@
 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
@@ -2495,14 +2491,14 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]]
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[VLD2_LANE:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0i8(<8 x half> [[TMP8]], <8 x half> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half> } [[VLD2_LANE]], { <8 x half>, <8 x half> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x2_t [[TMP13]]
 float16x8x2_t test_vld2q_lane_f16(float16_t  *a, float16x8x2_t b) {
@@ -2518,7 +2514,7 @@
 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
@@ -2536,7 +2532,7 @@
 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x2_t [[TMP13]]
 float32x4x2_t test_vld2q_lane_f32(float32_t  *a, float32x4x2_t b) {
@@ -2552,7 +2548,7 @@
 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
@@ -2570,7 +2566,7 @@
 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x2_t [[TMP13]]
 float64x2x2_t test_vld2q_lane_f64(float64_t  *a, float64x2x2_t b) {
@@ -2586,7 +2582,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
@@ -2604,7 +2600,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x2_t [[TMP13]]
 poly16x8x2_t test_vld2q_lane_p16(poly16_t  *a, poly16x8x2_t b) {
@@ -2620,7 +2616,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
@@ -2638,7 +2634,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x2_t [[TMP13]]
 poly64x2x2_t test_vld2q_lane_p64(poly64_t  *a, poly64x2x2_t b) {
@@ -2654,7 +2650,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -2667,7 +2663,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
 uint8x8x2_t test_vld2_lane_u8(uint8_t  *a, uint8x8x2_t b) {
@@ -2683,7 +2679,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
@@ -2701,7 +2697,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x2_t [[TMP13]]
 uint16x4x2_t test_vld2_lane_u16(uint16_t  *a, uint16x4x2_t b) {
@@ -2717,7 +2713,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
@@ -2735,7 +2731,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x2_t [[TMP13]]
 uint32x2x2_t test_vld2_lane_u32(uint32_t  *a, uint32x2x2_t b) {
@@ -2751,7 +2747,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
@@ -2769,7 +2765,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x2_t [[TMP13]]
 uint64x1x2_t test_vld2_lane_u64(uint64_t  *a, uint64x1x2_t b) {
@@ -2785,7 +2781,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -2798,7 +2794,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
 int8x8x2_t test_vld2_lane_s8(int8_t  *a, int8x8x2_t b) {
@@ -2814,7 +2810,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
@@ -2832,7 +2828,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x2_t [[TMP13]]
 int16x4x2_t test_vld2_lane_s16(int16_t  *a, int16x4x2_t b) {
@@ -2848,7 +2844,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
@@ -2866,7 +2862,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x2_t [[TMP13]]
 int32x2x2_t test_vld2_lane_s32(int32_t  *a, int32x2x2_t b) {
@@ -2882,7 +2878,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
@@ -2900,7 +2896,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x2_t [[TMP13]]
 int64x1x2_t test_vld2_lane_s64(int64_t  *a, int64x1x2_t b) {
@@ -2916,7 +2912,7 @@
 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
@@ -2927,14 +2923,14 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]]
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[VLD2_LANE:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0i8(<4 x half> [[TMP8]], <4 x half> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half> } [[VLD2_LANE]], { <4 x half>, <4 x half> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x2_t [[TMP13]]
 float16x4x2_t test_vld2_lane_f16(float16_t  *a, float16x4x2_t b) {
@@ -2950,7 +2946,7 @@
 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
@@ -2968,7 +2964,7 @@
 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x2_t [[TMP13]]
 float32x2x2_t test_vld2_lane_f32(float32_t  *a, float32x2x2_t b) {
@@ -2984,7 +2980,7 @@
 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
@@ -3002,7 +2998,7 @@
 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x2_t [[TMP13]]
 float64x1x2_t test_vld2_lane_f64(float64_t  *a, float64x1x2_t b) {
@@ -3018,7 +3014,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -3031,7 +3027,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false)
 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
 poly8x8x2_t test_vld2_lane_p8(poly8_t  *a, poly8x8x2_t b) {
@@ -3047,7 +3043,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
@@ -3065,7 +3061,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x2_t [[TMP13]]
 poly16x4x2_t test_vld2_lane_p16(poly16_t  *a, poly16x4x2_t b) {
@@ -3081,7 +3077,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
@@ -3099,7 +3095,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]]
 // CHECK:   [[TMP11:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP12:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false)
 // CHECK:   [[TMP13:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x2_t [[TMP13]]
 poly64x1x2_t test_vld2_lane_p64(poly64_t  *a, poly64x1x2_t b) {
@@ -3115,7 +3111,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
@@ -3138,7 +3134,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x3_t [[TMP16]]
 uint16x8x3_t test_vld3q_lane_u16(uint16_t  *a, uint16x8x3_t b) {
@@ -3154,7 +3150,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
@@ -3177,7 +3173,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x3_t [[TMP16]]
 uint32x4x3_t test_vld3q_lane_u32(uint32_t  *a, uint32x4x3_t b) {
@@ -3193,7 +3189,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
@@ -3216,7 +3212,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x3_t [[TMP16]]
 uint64x2x3_t test_vld3q_lane_u64(uint64_t  *a, uint64x2x3_t b) {
@@ -3232,7 +3228,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
@@ -3255,7 +3251,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x3_t [[TMP16]]
 int16x8x3_t test_vld3q_lane_s16(int16_t  *a, int16x8x3_t b) {
@@ -3271,7 +3267,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
@@ -3294,7 +3290,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x3_t [[TMP16]]
 int32x4x3_t test_vld3q_lane_s32(int32_t  *a, int32x4x3_t b) {
@@ -3310,7 +3306,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
@@ -3333,7 +3329,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x3_t [[TMP16]]
 int64x2x3_t test_vld3q_lane_s64(int64_t  *a, int64x2x3_t b) {
@@ -3349,7 +3345,7 @@
 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
@@ -3364,15 +3360,15 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]]
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[VLD3_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0i8(<8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x3_t [[TMP16]]
 float16x8x3_t test_vld3q_lane_f16(float16_t  *a, float16x8x3_t b) {
@@ -3388,7 +3384,7 @@
 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
@@ -3411,7 +3407,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x3_t [[TMP16]]
 float32x4x3_t test_vld3q_lane_f32(float32_t  *a, float32x4x3_t b) {
@@ -3427,7 +3423,7 @@
 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
@@ -3450,7 +3446,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x3_t [[TMP16]]
 float64x2x3_t test_vld3q_lane_f64(float64_t  *a, float64x2x3_t b) {
@@ -3466,7 +3462,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -3482,7 +3478,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x3_t [[TMP9]]
 poly8x16x3_t test_vld3q_lane_p8(poly8_t  *a, poly8x16x3_t b) {
@@ -3498,7 +3494,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
@@ -3521,7 +3517,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x3_t [[TMP16]]
 poly16x8x3_t test_vld3q_lane_p16(poly16_t  *a, poly16x8x3_t b) {
@@ -3537,7 +3533,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
@@ -3560,7 +3556,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x3_t [[TMP16]]
 poly64x2x3_t test_vld3q_lane_p64(poly64_t  *a, poly64x2x3_t b) {
@@ -3576,7 +3572,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -3592,7 +3588,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x3_t [[TMP9]]
 uint8x8x3_t test_vld3_lane_u8(uint8_t  *a, uint8x8x3_t b) {
@@ -3608,7 +3604,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
@@ -3631,7 +3627,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x3_t [[TMP16]]
 uint16x4x3_t test_vld3_lane_u16(uint16_t  *a, uint16x4x3_t b) {
@@ -3647,7 +3643,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
@@ -3670,7 +3666,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x3_t [[TMP16]]
 uint32x2x3_t test_vld3_lane_u32(uint32_t  *a, uint32x2x3_t b) {
@@ -3686,7 +3682,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
@@ -3709,7 +3705,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x3_t [[TMP16]]
 uint64x1x3_t test_vld3_lane_u64(uint64_t  *a, uint64x1x3_t b) {
@@ -3725,7 +3721,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -3741,7 +3737,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x3_t [[TMP9]]
 int8x8x3_t test_vld3_lane_s8(int8_t  *a, int8x8x3_t b) {
@@ -3757,7 +3753,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
@@ -3780,7 +3776,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x3_t [[TMP16]]
 int16x4x3_t test_vld3_lane_s16(int16_t  *a, int16x4x3_t b) {
@@ -3796,7 +3792,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
@@ -3819,7 +3815,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x3_t [[TMP16]]
 int32x2x3_t test_vld3_lane_s32(int32_t  *a, int32x2x3_t b) {
@@ -3835,7 +3831,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
@@ -3858,7 +3854,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x3_t [[TMP16]]
 int64x1x3_t test_vld3_lane_s64(int64_t  *a, int64x1x3_t b) {
@@ -3874,7 +3870,7 @@
 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
@@ -3889,15 +3885,15 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]]
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[VLD3_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0i8(<4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x3_t [[TMP16]]
 float16x4x3_t test_vld3_lane_f16(float16_t  *a, float16x4x3_t b) {
@@ -3913,7 +3909,7 @@
 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
@@ -3936,7 +3932,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x3_t [[TMP16]]
 float32x2x3_t test_vld3_lane_f32(float32_t  *a, float32x2x3_t b) {
@@ -3952,7 +3948,7 @@
 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
@@ -3975,7 +3971,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x3_t [[TMP16]]
 float64x1x3_t test_vld3_lane_f64(float64_t  *a, float64x1x3_t b) {
@@ -3991,7 +3987,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -4007,7 +4003,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false)
 // CHECK:   [[TMP9:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x3_t [[TMP9]]
 poly8x8x3_t test_vld3_lane_p8(poly8_t  *a, poly8x8x3_t b) {
@@ -4023,7 +4019,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
@@ -4046,7 +4042,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x3_t [[TMP16]]
 poly16x4x3_t test_vld3_lane_p16(poly16_t  *a, poly16x4x3_t b) {
@@ -4062,7 +4058,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
@@ -4085,7 +4081,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]]
 // CHECK:   [[TMP14:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP15:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false)
 // CHECK:   [[TMP16:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x3_t [[TMP16]]
 poly64x1x3_t test_vld3_lane_p64(poly64_t  *a, poly64x1x3_t b) {
@@ -4101,7 +4097,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -4120,7 +4116,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint8x16x4_t [[TMP10]]
 uint8x16x4_t test_vld4q_lane_u8(uint8_t  *a, uint8x16x4_t b) {
@@ -4136,7 +4132,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
@@ -4164,7 +4160,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint16x8x4_t [[TMP19]]
 uint16x8x4_t test_vld4q_lane_u16(uint16_t  *a, uint16x8x4_t b) {
@@ -4180,7 +4176,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
@@ -4208,7 +4204,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint32x4x4_t [[TMP19]]
 uint32x4x4_t test_vld4q_lane_u32(uint32_t  *a, uint32x4x4_t b) {
@@ -4224,7 +4220,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
@@ -4252,7 +4248,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.uint64x2x4_t [[TMP19]]
 uint64x2x4_t test_vld4q_lane_u64(uint64_t  *a, uint64x2x4_t b) {
@@ -4268,7 +4264,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -4287,7 +4283,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int8x16x4_t [[TMP10]]
 int8x16x4_t test_vld4q_lane_s8(int8_t  *a, int8x16x4_t b) {
@@ -4303,7 +4299,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
@@ -4331,7 +4327,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int16x8x4_t [[TMP19]]
 int16x8x4_t test_vld4q_lane_s16(int16_t  *a, int16x8x4_t b) {
@@ -4347,7 +4343,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
@@ -4375,7 +4371,7 @@
 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int32x4x4_t [[TMP19]]
 int32x4x4_t test_vld4q_lane_s32(int32_t  *a, int32x4x4_t b) {
@@ -4391,7 +4387,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
@@ -4419,7 +4415,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.int64x2x4_t [[TMP19]]
 int64x2x4_t test_vld4q_lane_s64(int64_t  *a, int64x2x4_t b) {
@@ -4435,7 +4431,7 @@
 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
@@ -4454,16 +4450,16 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK:   [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]])
-// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
-// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]]
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
+// CHECK:   [[VLD4_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0i8(<8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i64 7, i8* [[TMP3]])
+// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
+// CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float16x8x4_t [[TMP19]]
 float16x8x4_t test_vld4q_lane_f16(float16_t  *a, float16x8x4_t b) {
@@ -4479,7 +4475,7 @@
 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
@@ -4507,7 +4503,7 @@
 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float32x4x4_t [[TMP19]]
 float32x4x4_t test_vld4q_lane_f32(float32_t  *a, float32x4x4_t b) {
@@ -4523,7 +4519,7 @@
 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
@@ -4551,7 +4547,7 @@
 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.float64x2x4_t [[TMP19]]
 float64x2x4_t test_vld4q_lane_f64(float64_t  *a, float64x2x4_t b) {
@@ -4567,7 +4563,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
@@ -4586,7 +4582,7 @@
 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly8x16x4_t [[TMP10]]
 poly8x16x4_t test_vld4q_lane_p8(poly8_t  *a, poly8x16x4_t b) {
@@ -4602,7 +4598,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
@@ -4630,7 +4626,7 @@
 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly16x8x4_t [[TMP19]]
 poly16x8x4_t test_vld4q_lane_p16(poly16_t  *a, poly16x8x4_t b) {
@@ -4646,7 +4642,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
@@ -4674,7 +4670,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x4_t [[TMP19]]
 poly64x2x4_t test_vld4q_lane_p64(poly64_t  *a, poly64x2x4_t b) {
@@ -4690,7 +4686,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -4709,7 +4705,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint8x8x4_t [[TMP10]]
 uint8x8x4_t test_vld4_lane_u8(uint8_t  *a, uint8x8x4_t b) {
@@ -4725,7 +4721,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
@@ -4753,7 +4749,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint16x4x4_t [[TMP19]]
 uint16x4x4_t test_vld4_lane_u16(uint16_t  *a, uint16x4x4_t b) {
@@ -4769,7 +4765,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
@@ -4797,7 +4793,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint32x2x4_t [[TMP19]]
 uint32x2x4_t test_vld4_lane_u32(uint32_t  *a, uint32x2x4_t b) {
@@ -4813,7 +4809,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
@@ -4841,7 +4837,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.uint64x1x4_t [[TMP19]]
 uint64x1x4_t test_vld4_lane_u64(uint64_t  *a, uint64x1x4_t b) {
@@ -4857,7 +4853,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -4876,7 +4872,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int8x8x4_t [[TMP10]]
 int8x8x4_t test_vld4_lane_s8(int8_t  *a, int8x8x4_t b) {
@@ -4892,7 +4888,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
@@ -4920,7 +4916,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int16x4x4_t [[TMP19]]
 int16x4x4_t test_vld4_lane_s16(int16_t  *a, int16x4x4_t b) {
@@ -4936,7 +4932,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
@@ -4964,7 +4960,7 @@
 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int32x2x4_t [[TMP19]]
 int32x2x4_t test_vld4_lane_s32(int32_t  *a, int32x2x4_t b) {
@@ -4980,7 +4976,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
@@ -5008,7 +5004,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.int64x1x4_t [[TMP19]]
 int64x1x4_t test_vld4_lane_s64(int64_t  *a, int64x1x4_t b) {
@@ -5024,7 +5020,7 @@
 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
@@ -5043,16 +5039,16 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK:   [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]])
-// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
-// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]]
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
+// CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
+// CHECK:   [[VLD4_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0i8(<4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i64 3, i8* [[TMP3]])
+// CHECK:   [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
+// CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float16x4x4_t [[TMP19]]
 float16x4x4_t test_vld4_lane_f16(float16_t  *a, float16x4x4_t b) {
@@ -5068,7 +5064,7 @@
 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
@@ -5096,7 +5092,7 @@
 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float32x2x4_t [[TMP19]]
 float32x2x4_t test_vld4_lane_f32(float32_t  *a, float32x2x4_t b) {
@@ -5112,7 +5108,7 @@
 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
@@ -5140,7 +5136,7 @@
 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.float64x1x4_t [[TMP19]]
 float64x1x4_t test_vld4_lane_f64(float64_t  *a, float64x1x4_t b) {
@@ -5156,7 +5152,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
@@ -5175,7 +5171,7 @@
 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false)
 // CHECK:   [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly8x8x4_t [[TMP10]]
 poly8x8x4_t test_vld4_lane_p8(poly8_t  *a, poly8x8x4_t b) {
@@ -5191,7 +5187,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
@@ -5219,7 +5215,7 @@
 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly16x4x4_t [[TMP19]]
 poly16x4x4_t test_vld4_lane_p16(poly16_t  *a, poly16x4x4_t b) {
@@ -5235,7 +5231,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
@@ -5263,7 +5259,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]]
 // CHECK:   [[TMP17:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP18:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false)
 // CHECK:   [[TMP19:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x4_t [[TMP19]]
 poly64x1x4_t test_vld4_lane_p64(poly64_t  *a, poly64x1x4_t b) {
@@ -5361,10 +5357,10 @@
 // CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]]
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]]
 // CHECK:   ret void
 void test_vst1q_lane_f16(float16_t  *a, float16x8_t b) {
   vst1q_lane_f16(a, b, 7);
@@ -5517,10 +5513,10 @@
 // CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
-// CHECK:   store i16 [[TMP3]], i16* [[TMP4]]
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
+// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
+// CHECK:   store half [[TMP3]], half* [[TMP4]]
 // CHECK:   ret void
 void test_vst1_lane_f16(float16_t  *a, float16x4_t b) {
   vst1_lane_f16(a, b, 3);
@@ -5589,7 +5585,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -5609,7 +5605,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -5634,7 +5630,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -5659,7 +5655,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -5684,7 +5680,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -5704,7 +5700,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -5729,7 +5725,7 @@
 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -5754,7 +5750,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -5779,7 +5775,7 @@
 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -5789,9 +5785,9 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2lane.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2q_lane_f16(float16_t  *a, float16x8x2_t b) {
   vst2q_lane_f16(a, b, 7);
@@ -5804,7 +5800,7 @@
 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -5829,7 +5825,7 @@
 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -5854,7 +5850,7 @@
 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -5874,7 +5870,7 @@
 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -5899,7 +5895,7 @@
 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -5924,7 +5920,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -5944,7 +5940,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -5969,7 +5965,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -5994,7 +5990,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -6019,7 +6015,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -6039,7 +6035,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -6064,7 +6060,7 @@
 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -6089,7 +6085,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -6114,7 +6110,7 @@
 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -6124,9 +6120,9 @@
 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st2lane.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst2_lane_f16(float16_t  *a, float16x4x2_t b) {
   vst2_lane_f16(a, b, 3);
@@ -6139,7 +6135,7 @@
 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -6164,7 +6160,7 @@
 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -6189,7 +6185,7 @@
 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -6209,7 +6205,7 @@
 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -6234,7 +6230,7 @@
 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -6259,7 +6255,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -6282,7 +6278,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -6312,7 +6308,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -6342,7 +6338,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -6372,7 +6368,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -6395,7 +6391,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -6425,7 +6421,7 @@
 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -6455,7 +6451,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -6485,7 +6481,7 @@
 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -6499,10 +6495,10 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3lane.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3q_lane_f16(float16_t  *a, float16x8x3_t b) {
   vst3q_lane_f16(a, b, 7);
@@ -6515,7 +6511,7 @@
 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -6545,7 +6541,7 @@
 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -6575,7 +6571,7 @@
 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -6598,7 +6594,7 @@
 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -6628,7 +6624,7 @@
 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -6658,7 +6654,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -6681,7 +6677,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -6711,7 +6707,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -6741,7 +6737,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -6771,7 +6767,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -6794,7 +6790,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -6824,7 +6820,7 @@
 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -6854,7 +6850,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -6884,7 +6880,7 @@
 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -6898,10 +6894,10 @@
 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st3lane.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst3_lane_f16(float16_t  *a, float16x4x3_t b) {
   vst3_lane_f16(a, b, 3);
@@ -6914,7 +6910,7 @@
 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -6944,7 +6940,7 @@
 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -6974,7 +6970,7 @@
 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -6997,7 +6993,7 @@
 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -7027,7 +7023,7 @@
 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -7057,7 +7053,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -7083,7 +7079,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -7118,7 +7114,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -7153,7 +7149,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -7188,7 +7184,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -7214,7 +7210,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -7249,7 +7245,7 @@
 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
@@ -7284,7 +7280,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -7319,7 +7315,7 @@
 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
@@ -7337,11 +7333,11 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4lane.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i64 7, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4q_lane_f16(float16_t  *a, float16x8x4_t b) {
   vst4q_lane_f16(a, b, 7);
@@ -7354,7 +7350,7 @@
 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
@@ -7389,7 +7385,7 @@
 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
@@ -7424,7 +7420,7 @@
 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -7450,7 +7446,7 @@
 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
@@ -7485,7 +7481,7 @@
 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
@@ -7520,7 +7516,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -7546,7 +7542,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -7581,7 +7577,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -7616,7 +7612,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -7651,7 +7647,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -7677,7 +7673,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -7712,7 +7708,7 @@
 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
@@ -7747,7 +7743,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
@@ -7782,7 +7778,7 @@
 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
@@ -7800,11 +7796,11 @@
 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK:   call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]])
+// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// CHECK:   call void @llvm.aarch64.neon.st4lane.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i64 3, i8* [[TMP2]])
 // CHECK:   ret void
 void test_vst4_lane_f16(float16_t  *a, float16x4x4_t b) {
   vst4_lane_f16(a, b, 3);
@@ -7817,7 +7813,7 @@
 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
@@ -7852,7 +7848,7 @@
 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
@@ -7887,7 +7883,7 @@
 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -7913,7 +7909,7 @@
 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
@@ -7948,7 +7944,7 @@
 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
diff --git a/test/CodeGen/aarch64-neon-perm.c b/test/CodeGen/aarch64-neon-perm.c
index 471017a..c24447b 100644
--- a/test/CodeGen/aarch64-neon-perm.c
+++ b/test/CodeGen/aarch64-neon-perm.c
@@ -899,7 +899,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
@@ -925,7 +925,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
@@ -951,7 +951,7 @@
 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
@@ -975,7 +975,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
@@ -1001,7 +1001,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
@@ -1027,7 +1027,7 @@
 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
@@ -1053,7 +1053,7 @@
 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
@@ -1077,7 +1077,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
@@ -1103,7 +1103,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
@@ -1127,7 +1127,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
@@ -1153,7 +1153,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
@@ -1179,7 +1179,7 @@
 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
@@ -1203,7 +1203,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
@@ -1229,7 +1229,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
@@ -1255,7 +1255,7 @@
 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
@@ -1281,7 +1281,7 @@
 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
@@ -1305,7 +1305,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
@@ -1331,7 +1331,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
@@ -1355,7 +1355,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
@@ -1381,7 +1381,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
@@ -1407,7 +1407,7 @@
 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
@@ -1431,7 +1431,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
@@ -1457,7 +1457,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
@@ -1483,7 +1483,7 @@
 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
@@ -1509,7 +1509,7 @@
 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
@@ -1533,7 +1533,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
@@ -1559,7 +1559,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
@@ -1583,7 +1583,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
@@ -1609,7 +1609,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
@@ -1635,7 +1635,7 @@
 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
@@ -1659,7 +1659,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
@@ -1685,7 +1685,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
@@ -1711,7 +1711,7 @@
 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
@@ -1737,7 +1737,7 @@
 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
@@ -1761,7 +1761,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
@@ -1787,7 +1787,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
@@ -1811,7 +1811,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
@@ -1837,7 +1837,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
@@ -1863,7 +1863,7 @@
 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
@@ -1887,7 +1887,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
@@ -1913,7 +1913,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
@@ -1939,7 +1939,7 @@
 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
@@ -1965,7 +1965,7 @@
 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
@@ -1989,7 +1989,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
@@ -2015,7 +2015,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
@@ -2039,7 +2039,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
@@ -2065,7 +2065,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
@@ -2091,7 +2091,7 @@
 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
@@ -2115,7 +2115,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
@@ -2141,7 +2141,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
@@ -2167,7 +2167,7 @@
 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
@@ -2193,7 +2193,7 @@
 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
@@ -2217,7 +2217,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2
 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
@@ -2243,7 +2243,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2
 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
diff --git a/test/CodeGen/aarch64-poly64.c b/test/CodeGen/aarch64-poly64.c
index 3fb8048..b70e5f0 100644
--- a/test/CodeGen/aarch64-poly64.c
+++ b/test/CodeGen/aarch64-poly64.c
@@ -248,7 +248,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
@@ -266,7 +266,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
@@ -284,7 +284,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
@@ -302,7 +302,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
@@ -320,7 +320,7 @@
 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
@@ -338,7 +338,7 @@
 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
@@ -352,7 +352,7 @@
 // CHECK:   store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0
@@ -377,7 +377,7 @@
 // CHECK:   store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0
@@ -402,7 +402,7 @@
 // CHECK:   store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0
@@ -432,7 +432,7 @@
 // CHECK:   store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0
@@ -462,7 +462,7 @@
 // CHECK:   store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0
@@ -497,7 +497,7 @@
 // CHECK:   store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
 // CHECK:   [[TMP2:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0
diff --git a/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c b/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
new file mode 100644
index 0000000..0390a87
--- /dev/null
+++ b/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -0,0 +1,659 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16\
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_fp16.h>
+
+// CHECK-LABEL: test_vabsh_f16
+// CHECK:  [[ABS:%.*]] =  call half @llvm.fabs.f16(half %a)
+// CHECK:  ret half [[ABS]]
+float16_t test_vabsh_f16(float16_t a) {
+  return vabsh_f16(a);
+}
+
+// CHECK-LABEL: test_vceqzh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq half %a, 0xH0000
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vceqzh_f16(float16_t a) {
+  return vceqzh_f16(a);
+}
+
+// CHECK-LABEL: test_vcgezh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge half %a, 0xH0000
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcgezh_f16(float16_t a) {
+  return vcgezh_f16(a);
+}
+
+// CHECK-LABEL: test_vcgtzh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcgtzh_f16(float16_t a) {
+  return vcgtzh_f16(a);
+}
+
+// CHECK-LABEL: test_vclezh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole half %a, 0xH0000
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vclezh_f16(float16_t a) {
+  return vclezh_f16(a);
+}
+
+// CHECK-LABEL: test_vcltzh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt half %a, 0xH0000
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcltzh_f16(float16_t a) {
+  return vcltzh_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_s16
+// CHECK:  [[VCVT:%.*]] = sitofp i16 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_s16 (int16_t a) {
+  return vcvth_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_s32
+// CHECK:  [[VCVT:%.*]] = sitofp i32 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_s32 (int32_t a) {
+  return vcvth_f16_s32(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_s64
+// CHECK:  [[VCVT:%.*]] = sitofp i64 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_s64 (int64_t a) {
+  return vcvth_f16_s64(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_u16
+// CHECK:  [[VCVT:%.*]] = uitofp i16 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_u16 (uint16_t a) {
+  return vcvth_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_u32
+// CHECK:  [[VCVT:%.*]] = uitofp i32 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_u32 (uint32_t a) {
+  return vcvth_f16_u32(a);
+}
+
+// CHECK-LABEL: test_vcvth_f16_u64
+// CHECK:  [[VCVT:%.*]] = uitofp i64 %a to half
+// CHECK:  ret half [[VCVT]]
+float16_t test_vcvth_f16_u64 (uint64_t a) {
+  return vcvth_f16_u64(a);
+}
+
+// CHECK-LABEL: test_vcvth_s16_f16
+// CHECK:  [[VCVT:%.*]] = fptosi half %a to i16
+// CHECK:  ret i16 [[VCVT]]
+int16_t test_vcvth_s16_f16 (float16_t a) {
+  return vcvth_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_s32_f16
+// CHECK:  [[VCVT:%.*]] = fptosi half %a to i32
+// CHECK:  ret i32 [[VCVT]]
+int32_t test_vcvth_s32_f16 (float16_t a) {
+  return vcvth_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_s64_f16
+// CHECK:  [[VCVT:%.*]] = fptosi half %a to i64
+// CHECK:  ret i64 [[VCVT]]
+int64_t test_vcvth_s64_f16 (float16_t a) {
+  return vcvth_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_u16_f16
+// CHECK:  [[VCVT:%.*]] = fptoui half %a to i16
+// CHECK:  ret i16 [[VCVT]]
+uint16_t test_vcvth_u16_f16 (float16_t a) {
+  return vcvth_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_u32_f16
+// CHECK:  [[VCVT:%.*]] = fptoui half %a to i32
+// CHECK:  ret i32 [[VCVT]]
+uint32_t test_vcvth_u32_f16 (float16_t a) {
+  return vcvth_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvth_u64_f16
+// CHECK:  [[VCVT:%.*]] = fptoui half %a to i64
+// CHECK:  ret i64 [[VCVT]]
+uint64_t test_vcvth_u64_f16 (float16_t a) {
+  return vcvth_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_s16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvtah_s16_f16 (float16_t a) {
+  return vcvtah_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtah_s32_f16 (float16_t a) {
+  return vcvtah_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_s64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+int64_t test_vcvtah_s64_f16 (float16_t a) {
+  return vcvtah_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_u16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcvtah_u16_f16 (float16_t a) {
+  return vcvtah_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtah_u32_f16 (float16_t a) {
+  return vcvtah_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtah_u64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+uint64_t test_vcvtah_u64_f16 (float16_t a) {
+  return vcvtah_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_s16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvtmh_s16_f16 (float16_t a) {
+  return vcvtmh_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtmh_s32_f16 (float16_t a) {
+  return vcvtmh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_s64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+int64_t test_vcvtmh_s64_f16 (float16_t a) {
+  return vcvtmh_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_u16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcvtmh_u16_f16 (float16_t a) {
+  return vcvtmh_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtmh_u32_f16 (float16_t a) {
+  return vcvtmh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmh_u64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+uint64_t test_vcvtmh_u64_f16 (float16_t a) {
+  return vcvtmh_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_s16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvtnh_s16_f16 (float16_t a) {
+  return vcvtnh_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtnh_s32_f16 (float16_t a) {
+  return vcvtnh_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_s64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+int64_t test_vcvtnh_s64_f16 (float16_t a) {
+  return vcvtnh_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_u16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcvtnh_u16_f16 (float16_t a) {
+  return vcvtnh_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtnh_u32_f16 (float16_t a) {
+  return vcvtnh_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnh_u64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+uint64_t test_vcvtnh_u64_f16 (float16_t a) {
+  return vcvtnh_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_s16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvtph_s16_f16 (float16_t a) {
+  return vcvtph_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_s32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+int32_t test_vcvtph_s32_f16 (float16_t a) {
+  return vcvtph_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_s64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+int64_t test_vcvtph_s64_f16 (float16_t a) {
+  return vcvtph_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_u16_f16
+// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcvtph_u16_f16 (float16_t a) {
+  return vcvtph_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_u32_f16
+// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+// CHECK: ret i32 [[VCVT]]
+uint32_t test_vcvtph_u32_f16 (float16_t a) {
+  return vcvtph_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtph_u64_f16
+// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+// CHECK: ret i64 [[VCVT]]
+uint64_t test_vcvtph_u64_f16 (float16_t a) {
+  return vcvtph_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vnegh_f16
+// CHECK: [[NEG:%.*]] = fsub half 0xH8000, %a
+// CHECK: ret half [[NEG]]
+float16_t test_vnegh_f16(float16_t a) {
+  return vnegh_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpeh_f16
+// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a)
+// CHECK: ret half [[VREC]]
+float16_t test_vrecpeh_f16(float16_t a) {
+  return vrecpeh_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpxh_f16
+// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a)
+// CHECK: ret half [[VREC]]
+float16_t test_vrecpxh_f16(float16_t a) {
+  return vrecpxh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.trunc.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndh_f16(float16_t a) {
+  return vrndh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndah_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.round.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndah_f16(float16_t a) {
+  return vrndah_f16(a);
+}
+
+// CHECK-LABEL: test_vrndih_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.nearbyint.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndih_f16(float16_t a) {
+  return vrndih_f16(a);
+}
+
+// CHECK-LABEL: test_vrndmh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.floor.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndmh_f16(float16_t a) {
+  return vrndmh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndnh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.aarch64.neon.frintn.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndnh_f16(float16_t a) {
+  return vrndnh_f16(a);
+}
+
+// CHECK-LABEL: test_vrndph_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.ceil.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndph_f16(float16_t a) {
+  return vrndph_f16(a);
+}
+
+// CHECK-LABEL: test_vrndxh_f16
+// CHECK:  [[RND:%.*]] =  call half @llvm.rint.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrndxh_f16(float16_t a) {
+  return vrndxh_f16(a);
+}
+
+// CHECK-LABEL: test_vrsqrteh_f16
+// CHECK:  [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a)
+// CHECK:  ret half [[RND]]
+float16_t test_vrsqrteh_f16(float16_t a) {
+  return vrsqrteh_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrth_f16
+// CHECK:  [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a)
+// CHECK:  ret half [[SQR]]
+float16_t test_vsqrth_f16(float16_t a) {
+  return vsqrth_f16(a);
+}
+
+// CHECK-LABEL: test_vaddh_f16
+// CHECK:  [[ADD:%.*]] = fadd half %a, %b
+// CHECK:  ret half [[ADD]]
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+  return vaddh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vabdh_f16
+// CHECK:  [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)
+// CHECK:  ret half [[ABD]]
+float16_t test_vabdh_f16(float16_t a, float16_t b) {
+  return vabdh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcageh_f16
+// CHECK:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
+// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcageh_f16(float16_t a, float16_t b) {
+  return vcageh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcagth_f16
+// CHECK:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b)
+// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcagth_f16(float16_t a, float16_t b) {
+  return vcagth_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcaleh_f16
+// CHECK:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %b, half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcaleh_f16(float16_t a, float16_t b) {
+  return vcaleh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcalth_f16
+// CHECK:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %b, half %a)
+// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// CHECK: ret i16 [[RET]]
+uint16_t test_vcalth_f16(float16_t a, float16_t b) {
+  return vcalth_f16(a, b);
+}
+
+// CHECK-LABEL: test_vceqh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq half %a, %b
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vceqh_f16(float16_t a, float16_t b) {
+  return vceqh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgeh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge half %a, %b
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcgeh_f16(float16_t a, float16_t b) {
+  return vcgeh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgth_f16
+//CHECK:  [[TMP1:%.*]] = fcmp ogt half %a, %b
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcgth_f16(float16_t a, float16_t b) {
+  return vcgth_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcleh_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole half %a, %b
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vcleh_f16(float16_t a, float16_t b) {
+  return vcleh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vclth_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt half %a, %b
+// CHECK:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// CHECK:  ret i16 [[TMP2]]
+uint16_t test_vclth_f16(float16_t a, float16_t b) {
+  return vclth_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_s16
+// CHECK: [[SEXT:%.*]] = sext i16 %a to i32
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_s16(int16_t a) {
+  return vcvth_n_f16_s16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_s32
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_s32(int32_t a) {
+  return vcvth_n_f16_s32(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_s64
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_s64(int64_t a) {
+  return vcvth_n_f16_s64(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_s16_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 0)
+// CHECK: [[RET:%.*]] = trunc i32 [[CVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvth_n_s16_f16(float16_t a) {
+  return vcvth_n_s16_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_s32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 0)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_s32_f16(float16_t a) {
+  return vcvth_n_s32_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_s64_f16
+// CHECK:  [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 0)
+// CHECK:  ret i64 [[CVT]]
+int64_t test_vcvth_n_s64_f16(float16_t a) {
+  return vcvth_n_s64_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_u16
+// CHECK: [[SEXT:%.*]] = zext i16 %a to i32
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 [[SEXT]], i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_u16(int16_t a) {
+  return vcvth_n_f16_u16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_u32
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_u32(int32_t a) {
+  return vcvth_n_f16_u32(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_f16_u64
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i64(i64 %a, i32 0)
+// CHECK:  ret half [[CVT]]
+float16_t test_vcvth_n_f16_u64(int64_t a) {
+  return vcvth_n_f16_u64(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_u16_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 0)
+// CHECK: [[RET:%.*]] = trunc i32 [[CVT]] to i16
+// CHECK: ret i16 [[RET]]
+int16_t test_vcvth_n_u16_f16(float16_t a) {
+  return vcvth_n_u16_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_u32_f16
+// CHECK:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 0)
+// CHECK:  ret i32 [[CVT]]
+int32_t test_vcvth_n_u32_f16(float16_t a) {
+  return vcvth_n_u32_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vcvth_n_u64_f16
+// CHECK:  [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f16(half %a, i32 0)
+// CHECK:  ret i64 [[CVT]]
+int64_t test_vcvth_n_u64_f16(float16_t a) {
+  return vcvth_n_u64_f16(a, 0);
+}
+
+// CHECK-LABEL: test_vdivh_f16
+// CHECK:  [[DIV:%.*]] = fdiv half %a, %b
+// CHECK:  ret half [[DIV]]
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+  return vdivh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxh_f16
+// CHECK:  [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half %b)
+// CHECK:  ret half [[MAX]]
+float16_t test_vmaxh_f16(float16_t a, float16_t b) {
+  return vmaxh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnmh_f16
+// CHECK:  [[MAX:%.*]] = call half @llvm.aarch64.neon.fmaxnm.f16(half %a, half %b)
+// CHECK:  ret half [[MAX]]
+float16_t test_vmaxnmh_f16(float16_t a, float16_t b) {
+  return vmaxnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminh_f16
+// CHECK:  [[MIN:%.*]] = call half @llvm.aarch64.neon.fmin.f16(half %a, half %b)
+// CHECK:  ret half [[MIN]]
+float16_t test_vminh_f16(float16_t a, float16_t b) {
+  return vminh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnmh_f16
+// CHECK:  [[MIN:%.*]] = call half @llvm.aarch64.neon.fminnm.f16(half %a, half %b)
+// CHECK:  ret half [[MIN]]
+float16_t test_vminnmh_f16(float16_t a, float16_t b) {
+  return vminnmh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulh_f16
+// CHECK:  [[MUL:%.*]] = fmul half %a, %b
+// CHECK:  ret half [[MUL]]
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+  return vmulh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulxh_f16
+// CHECK:  [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
+// CHECK:  ret half [[MUL]]
+float16_t test_vmulxh_f16(float16_t a, float16_t b) {
+  return vmulxh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrecpsh_f16
+// CHECK: [[RECPS:%.*]] = call half @llvm.aarch64.neon.frecps.f16(half %a, half %b)
+// CHECK: ret half [[RECPS]]
+float16_t test_vrecpsh_f16(float16_t a, float16_t b) {
+  return vrecpsh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrsqrtsh_f16
+// CHECK:  [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
+// CHECK:  ret half [[RSQRTS]]
+float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
+  return vrsqrtsh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsubh_f16
+// CHECK:  [[SUB:%.*]] = fsub half %a, %b
+// CHECK:  ret half [[SUB]]
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+  return vsubh_f16(a, b);
+}
+
+// CHECK-LABEL: test_vfmah_f16
+// CHECK:  [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a)
+// CHECK:  ret half [[FMA]]
+float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmah_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsh_f16
+// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
+// CHECK:  ret half [[ADD]]
+float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
+  return vfmsh_f16(a, b, c);
+}
+
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
new file mode 100644
index 0000000..1a5c3a2
--- /dev/null
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -0,0 +1,1633 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: test_vabs_f16
+// CHECK:  [[ABS:%.*]] =  call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[ABS]]
+float16x4_t test_vabs_f16(float16x4_t a) {
+  return vabs_f16(a);
+}
+
+// CHECK-LABEL: test_vabsq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[ABS]]
+float16x8_t test_vabsq_f16(float16x8_t a) {
+  return vabsq_f16(a);
+}
+
+// CHECK-LABEL: test_vceqz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vceqz_f16(float16x4_t a) {
+  return vceqz_f16(a);
+}
+
+// CHECK-LABEL: test_vceqzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vceqzq_f16(float16x8_t a) {
+  return vceqzq_f16(a);
+}
+
+// CHECK-LABEL: test_vcgez_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgez_f16(float16x4_t a) {
+  return vcgez_f16(a);
+}
+
+// CHECK-LABEL: test_vcgezq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgezq_f16(float16x8_t a) {
+  return vcgezq_f16(a);
+}
+
+// CHECK-LABEL: test_vcgtz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgtz_f16(float16x4_t a) {
+  return vcgtz_f16(a);
+}
+
+// CHECK-LABEL: test_vcgtzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgtzq_f16(float16x8_t a) {
+  return vcgtzq_f16(a);
+}
+
+// CHECK-LABEL: test_vclez_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vclez_f16(float16x4_t a) {
+  return vclez_f16(a);
+}
+
+// CHECK-LABEL: test_vclezq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vclezq_f16(float16x8_t a) {
+  return vclezq_f16(a);
+}
+
+// CHECK-LABEL: test_vcltz_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcltz_f16(float16x4_t a) {
+  return vcltz_f16(a);
+}
+
+// CHECK-LABEL: test_vcltzq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, zeroinitializer
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcltzq_f16(float16x8_t a) {
+  return vcltzq_f16(a);
+}
+
+// CHECK-LABEL: test_vcvt_f16_s16
+// CHECK:  [[VCVT:%.*]] = sitofp <4 x i16> %a to <4 x half>
+// CHECK:  ret <4 x half> [[VCVT]]
+float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
+  return vcvt_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_f16_s16
+// CHECK:  [[VCVT:%.*]] = sitofp <8 x i16> %a to <8 x half>
+// CHECK:  ret <8 x half> [[VCVT]]
+float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
+  return vcvtq_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vcvt_f16_u16
+// CHECK:  [[VCVT:%.*]] = uitofp <4 x i16> %a to <4 x half>
+// CHECK:  ret <4 x half> [[VCVT]]
+float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
+  return vcvt_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_f16_u16
+// CHECK:  [[VCVT:%.*]] = uitofp <8 x i16> %a to <8 x half>
+// CHECK:  ret <8 x half> [[VCVT]]
+float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
+  return vcvtq_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vcvt_s16_f16
+// CHECK:  [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+  return vcvt_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_s16_f16
+// CHECK:  [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+  return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvt_u16_f16
+// CHECK:  [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+  return vcvt_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtq_u16_f16
+// CHECK:  [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+  return vcvtq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvta_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtas.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
+  return vcvta_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtaq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
+  return vcvtaq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtm_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtms.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
+  return vcvtm_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtms.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
+  return vcvtmq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtm_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtmu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
+  return vcvtm_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtmq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtmu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
+  return vcvtmq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtn_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtns.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
+  return vcvtn_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtns.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
+  return vcvtnq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtn_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtnu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
+  return vcvtn_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtnq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtnu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
+  return vcvtnq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtp_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtps.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
+  return vcvtp_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtpq_s16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtps.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
+  return vcvtpq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtp_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtpu.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
+  return vcvtp_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vcvtpq_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtpu.v8i16.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x i16> [[VCVT]]
+uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
+  return vcvtpq_u16_f16(a);
+}
+
+// FIXME: Fix the zero constant when fp16 non-storage-only type becomes available.
+// CHECK-LABEL: test_vneg_f16
+// CHECK:  [[NEG:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
+// CHECK:  ret <4 x half> [[NEG]]
+float16x4_t test_vneg_f16(float16x4_t a) {
+  return vneg_f16(a);
+}
+
+// CHECK-LABEL: test_vnegq_f16
+// CHECK:  [[NEG:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
+// CHECK:  ret <8 x half> [[NEG]]
+float16x8_t test_vnegq_f16(float16x8_t a) {
+  return vnegq_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpe_f16
+// CHECK:  [[RCP:%.*]] = call <4 x half> @llvm.aarch64.neon.frecpe.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RCP]]
+float16x4_t test_vrecpe_f16(float16x4_t a) {
+  return vrecpe_f16(a);
+}
+
+// CHECK-LABEL: test_vrecpeq_f16
+// CHECK:  [[RCP:%.*]] = call <8 x half> @llvm.aarch64.neon.frecpe.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RCP]]
+float16x8_t test_vrecpeq_f16(float16x8_t a) {
+  return vrecpeq_f16(a);
+}
+
+// CHECK-LABEL: test_vrnd_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.trunc.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrnd_f16(float16x4_t a) {
+  return vrnd_f16(a);
+}
+
+// CHECK-LABEL: test_vrndq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndq_f16(float16x8_t a) {
+  return vrndq_f16(a);
+}
+
+// CHECK-LABEL: test_vrnda_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.round.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrnda_f16(float16x4_t a) {
+  return vrnda_f16(a);
+}
+
+// CHECK-LABEL: test_vrndaq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.round.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndaq_f16(float16x8_t a) {
+  return vrndaq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndi_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndi_f16(float16x4_t a) {
+  return vrndi_f16(a);
+}
+
+// CHECK-LABEL: test_vrndiq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndiq_f16(float16x8_t a) {
+  return vrndiq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndm_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.floor.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndm_f16(float16x4_t a) {
+  return vrndm_f16(a);
+}
+
+// CHECK-LABEL: test_vrndmq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndmq_f16(float16x8_t a) {
+  return vrndmq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndn_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndn_f16(float16x4_t a) {
+  return vrndn_f16(a);
+}
+
+// CHECK-LABEL: test_vrndnq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndnq_f16(float16x8_t a) {
+  return vrndnq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndp_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.ceil.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndp_f16(float16x4_t a) {
+  return vrndp_f16(a);
+}
+
+// CHECK-LABEL: test_vrndpq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndpq_f16(float16x8_t a) {
+  return vrndpq_f16(a);
+}
+
+// CHECK-LABEL: test_vrndx_f16
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.rint.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrndx_f16(float16x4_t a) {
+  return vrndx_f16(a);
+}
+
+// CHECK-LABEL: test_vrndxq_f16
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrndxq_f16(float16x8_t a) {
+  return vrndxq_f16(a);
+}
+
+// CHECK-LABEL: test_vrsqrte_f16
+// CHECK:  [[RND:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrte.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[RND]]
+float16x4_t test_vrsqrte_f16(float16x4_t a) {
+  return vrsqrte_f16(a);
+}
+
+// CHECK-LABEL: test_vrsqrteq_f16
+// CHECK:  [[RND:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrte.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[RND]]
+float16x8_t test_vrsqrteq_f16(float16x8_t a) {
+  return vrsqrteq_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrt_f16
+// CHECK:  [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x half> [[SQR]]
+float16x4_t test_vsqrt_f16(float16x4_t a) {
+  return vsqrt_f16(a);
+}
+
+// CHECK-LABEL: test_vsqrtq_f16
+// CHECK:  [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
+// CHECK:  ret <8 x half> [[SQR]]
+float16x8_t test_vsqrtq_f16(float16x8_t a) {
+  return vsqrtq_f16(a);
+}
+
+// CHECK-LABEL: test_vadd_f16
+// CHECK:  [[ADD:%.*]] = fadd <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vadd_f16(float16x4_t a, float16x4_t b) {
+  return vadd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vaddq_f16
+// CHECK:  [[ADD:%.*]] = fadd <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
+  return vaddq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vabd_f16
+// CHECK:  [[ABD:%.*]] = call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[ABD]]
+float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
+  return vabd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vabdq_f16
+// CHECK:  [[ABD:%.*]] = call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[ABD]]
+float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
+  return vabdq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcage_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
+  return vcage_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcageq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
+  return vcageq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcagt_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
+  return vcagt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcagtq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
+  return vcagtq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcale_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
+  return vcale_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcaleq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
+  return vcaleq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcalt_f16
+// CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
+// CHECK:  ret <4 x i16> [[ABS]]
+uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
+  return vcalt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcaltq_f16
+// CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
+// CHECK:  ret <8 x i16> [[ABS]]
+uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
+  return vcaltq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vceq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vceq_f16(float16x4_t a, float16x4_t b) {
+  return vceq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vceqq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vceqq_f16(float16x8_t a, float16x8_t b) {
+  return vceqq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcge_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcge_f16(float16x4_t a, float16x4_t b) {
+  return vcge_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgeq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgeq_f16(float16x8_t a, float16x8_t b) {
+  return vcgeq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgt_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcgt_f16(float16x4_t a, float16x4_t b) {
+  return vcgt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcgtq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcgtq_f16(float16x8_t a, float16x8_t b) {
+  return vcgtq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcle_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vcle_f16(float16x4_t a, float16x4_t b) {
+  return vcle_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcleq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcleq_f16(float16x8_t a, float16x8_t b) {
+  return vcleq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vclt_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK:  ret <4 x i16> [[TMP2]]
+uint16x4_t test_vclt_f16(float16x4_t a, float16x4_t b) {
+  return vclt_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcltq_f16
+// CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, %b
+// CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
+// CHECK:  ret <8 x i16> [[TMP2]]
+uint16x8_t test_vcltq_f16(float16x8_t a, float16x8_t b) {
+  return vcltq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vcvt_n_f16_s16
+// CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.aarch64.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <4 x half> [[CVT]]
+float16x4_t test_vcvt_n_f16_s16(int16x4_t a) {
+  return vcvt_n_f16_s16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_f16_s16
+// CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.aarch64.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <8 x half> [[CVT]]
+float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) {
+  return vcvtq_n_f16_s16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_f16_u16
+// CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.aarch64.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <4 x half> [[CVT]]
+float16x4_t test_vcvt_n_f16_u16(uint16x4_t a) {
+  return vcvt_n_f16_u16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_f16_u16
+// CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.aarch64.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
+// CHECK:  ret <8 x half> [[CVT]]
+float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
+  return vcvtq_n_f16_u16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_s16_f16
+// CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
+// CHECK:  ret <4 x i16> [[CVT]]
+int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
+  return vcvt_n_s16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_s16_f16
+// CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
+// CHECK:  ret <8 x i16> [[CVT]]
+int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
+  return vcvtq_n_s16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvt_n_u16_f16
+// CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
+// CHECK:  ret <4 x i16> [[CVT]]
+uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
+  return vcvt_n_u16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vcvtq_n_u16_f16
+// CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
+// CHECK:  ret <8 x i16> [[CVT]]
+uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) {
+  return vcvtq_n_u16_f16(a, 2);
+}
+
+// CHECK-LABEL: test_vdiv_f16
+// CHECK:  [[DIV:%.*]] = fdiv <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[DIV]]
+float16x4_t test_vdiv_f16(float16x4_t a, float16x4_t b) {
+  return vdiv_f16(a, b);
+}
+
+// CHECK-LABEL: test_vdivq_f16
+// CHECK:  [[DIV:%.*]] = fdiv <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[DIV]]
+float16x8_t test_vdivq_f16(float16x8_t a, float16x8_t b) {
+  return vdivq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmax_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmax.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
+  return vmax_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmax.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
+  return vmaxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnm_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
+  return vmaxnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmaxnmq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
+  return vmaxnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmin_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fmin.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
+  return vmin_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fmin.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
+  return vminq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnm_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
+  return vminnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vminnmq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnm.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
+  return vminnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmul_f16
+// CHECK:  [[MUL:%.*]] = fmul <4 x half> %a, %b
+// CHECK:  ret <4 x half> [[MUL]]
+float16x4_t test_vmul_f16(float16x4_t a, float16x4_t b) {
+  return vmul_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulq_f16
+// CHECK:  [[MUL:%.*]] = fmul <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
+  return vmulq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulx_f16
+// CHECK:  [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_f16(float16x4_t a, float16x4_t b) {
+  return vmulx_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulxq_f16
+// CHECK:  [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
+  return vmulxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpadd_f16
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
+  return vpadd_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpaddq_f16
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
+  return vpaddq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmax_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
+  return vpmax_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vpmaxq_f16(float16x8_t a, float16x8_t b) {
+  return vpmaxq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxnm_f16
+// CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MAX]]
+float16x4_t test_vpmaxnm_f16(float16x4_t a, float16x4_t b) {
+  return vpmaxnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmaxnmq_f16
+// CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MAX]]
+float16x8_t test_vpmaxnmq_f16(float16x8_t a, float16x8_t b) {
+  return vpmaxnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpmin_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
+  return vpmin_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vpminq_f16(float16x8_t a, float16x8_t b) {
+  return vpminq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminnm_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vpminnm_f16(float16x4_t a, float16x4_t b) {
+  return vpminnm_f16(a, b);
+}
+
+// CHECK-LABEL: test_vpminnmq_f16
+// CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vpminnmq_f16(float16x8_t a, float16x8_t b) {
+  return vpminnmq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrecps_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.frecps.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
+  return vrecps_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrecpsq_f16
+// CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.aarch64.neon.frecps.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
+  return vrecpsq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrsqrts_f16
+// CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrts.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  ret <4 x half> [[MIN]]
+float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
+  return vrsqrts_f16(a, b);
+}
+
+// CHECK-LABEL: test_vrsqrtsq_f16
+// CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.aarch64.neon.frsqrts.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  ret <8 x half> [[MIN]]
+float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
+  return vrsqrtsq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsub_f16
+// CHECK:  [[ADD:%.*]] = fsub <4 x half> %a, %b 
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vsub_f16(float16x4_t a, float16x4_t b) {
+  return vsub_f16(a, b);
+}
+
+// CHECK-LABEL: test_vsubq_f16
+// CHECK:  [[ADD:%.*]] = fsub <8 x half> %a, %b
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
+  return vsubq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vfma_f16
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfma_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmaq_f16
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmaq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfms_f16
+// CHECK:  [[SUB:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
+// CHECK:  ret <4 x half> [[ADD]]
+float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfms_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsq_f16
+// CHECK:  [[SUB:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
+// CHECK:  ret <8 x half> [[ADD]]
+float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmsq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfma_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfma_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmaq_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
+  return vfmaq_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfma_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
+  return vfma_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfmaq_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmaq_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfma_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
+  return vfma_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmaq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
+// CHECK: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
+// CHECK: ret <8 x half> [[FMA]]
+float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
+  return vfmaq_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmah_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
+  return vfmah_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmah_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
+  return vfmah_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfms_lane_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
+  return vfms_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmsq_lane_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
+  return vfmsq_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfms_laneq_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[FMLA]]
+float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
+  return vfms_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfmsq_laneq_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
+// CHECK: ret <8 x half> [[FMLA]]
+float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
+  return vfmsq_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vfms_n_f16
+// CHECK: [[SUB:%.*]]  = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
+// CHECK: ret <4 x half> [[FMA]]
+float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
+  return vfms_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsq_n_f16
+// CHECK: [[SUB:%.*]]  = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
+// CHECK: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
+// CHECK: ret <8 x half> [[FMA]]
+float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
+  return vfmsq_n_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vfmsh_lane_f16
+// CHECK: [[TMP0:%.*]] = fpext half %b to float
+// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
+// CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
+// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
+  return vfmsh_lane_f16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vfmsh_laneq_f16
+// CHECK: [[TMP0:%.*]] = fpext half %b to float
+// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
+// CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
+// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7
+// CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
+// CHECK: ret half [[FMA]]
+float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
+  return vfmsh_laneq_f16(a, b, c, 7);
+}
+
+// CHECK-LABEL: test_vmul_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP0]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
+  return vmul_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulq_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP0]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
+  return vmulq_lane_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmul_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP0]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_laneq_f16(float16x4_t a, float16x8_t b) {
+  return vmul_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulq_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP0]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_laneq_f16(float16x8_t a, float16x8_t b) {
+  return vmulq_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmul_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP3]]
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
+  return vmul_n_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %b, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %b, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %b, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %b, i32 7
+// CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP7]]
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
+  return vmulq_n_f16(a, b);
+}
+
+// FIXME: Fix it when fp16 non-storage-only type becomes available.
+// CHECK-LABEL: test_vmulh_lane_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulh_lane_f16(float16_t a, float16x4_t b) {
+  return vmulh_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulh_laneq_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulh_laneq_f16(float16_t a, float16x8_t b) {
+  return vmulh_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
+  return vmulx_lane_f16(a, b, 3);
+}
+
+// CHECK-LABEL: test_vmulxq_lane_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
+  return vmulxq_lane_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP0]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
+  return vmulx_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulxq_laneq_f16
+// CHECK: [[TMP0:%.*]] = shufflevector <8 x half> %b, <8 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: [[MUL:%.*]]  = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP0]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
+  return vmulxq_laneq_f16(a, b, 7);
+}
+
+// CHECK-LABEL: test_vmulx_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[MUL:%.*]]  = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> [[TMP3]])
+// CHECK: ret <4 x half> [[MUL]]
+float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
+  return vmulx_n_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmulxq_n_f16
+// CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half %b, i32 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %b, i32 1
+// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %b, i32 2
+// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %b, i32 3
+// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %b, i32 4
+// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %b, i32 5
+// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %b, i32 6
+// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %b, i32 7
+// CHECK: [[MUL:%.*]]  = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> [[TMP7]])
+// CHECK: ret <8 x half> [[MUL]]
+float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
+  return vmulxq_n_f16(a, b);
+}
+
+/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
+// CCHECK-LABEL: test_vmulxh_lane_f16
+// CCHECK: [[CONV0:%.*]] = fpext half %a to float
+// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CCHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
+  return vmulxh_lane_f16(a, b, 3);
+}
+
+// CCHECK-LABEL: test_vmulxh_laneq_f16
+// CCHECK: [[CONV0:%.*]] = fpext half %a to float
+// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CCHECK: ret half [[CONV3:%.*]]
+float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
+  return vmulxh_laneq_f16(a, b, 7);
+}
+*/
+
+// CHECK-LABEL: test_vmaxv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxv_f16(float16x4_t a) {
+  return vmaxv_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxvq_f16(float16x8_t a) {
+  return vmaxvq_f16(a);
+}
+
+// CHECK-LABEL: test_vminv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminv_f16(float16x4_t a) {
+  return vminv_f16(a);
+}
+
+// CHECK-LABEL: test_vminvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminvq_f16(float16x8_t a) {
+  return vminvq_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxnmv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxnmv_f16(float16x4_t a) {
+  return vmaxnmv_f16(a);
+}
+
+// CHECK-LABEL: test_vmaxnmvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vmaxnmvq_f16(float16x8_t a) {
+  return vmaxnmvq_f16(a);
+}
+
+// CHECK-LABEL: test_vminnmv_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminnmv_f16(float16x4_t a) {
+  return vminnmv_f16(a);
+}
+
+// CHECK-LABEL: test_vminnmvq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[MAX:%.*]]  = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[TMP1]])
+// CHECK: ret half [[MAX]]
+float16_t test_vminnmvq_f16(float16x8_t a) {
+  return vminnmvq_f16(a);
+}
+
+// CHECK-LABEL: test_vbsl_f16
+// CHECK:  [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK:  [[TMP1:%.*]] = bitcast <4 x half> %c to <8 x i8>
+// CHECK:  [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK:  [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK:  [[TMP4:%.*]] = and <4 x i16> %a, [[TMP2]]
+// CHECK:  [[TMP5:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:  [[TMP6:%.*]] = and <4 x i16> [[TMP5]], [[TMP3]]
+// CHECK:  [[TMP7:%.*]] = or <4 x i16> [[TMP4]], [[TMP6]]
+// CHECK:  [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <4 x half>
+// CHECK:  ret <4 x half> [[TMP8]]
+float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
+  return vbsl_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vbslq_f16
+// CHECK:  [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK:  [[TMP1:%.*]] = bitcast <8 x half> %c to <16 x i8>
+// CHECK:  [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK:  [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK:  [[TMP4:%.*]] = and <8 x i16> %a, [[TMP2]]
+// CHECK:  [[TMP5:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:  [[TMP6:%.*]] = and <8 x i16> [[TMP5]], [[TMP3]]
+// CHECK:  [[TMP7:%.*]] = or <8 x i16> [[TMP4]], [[TMP6]]
+// CHECK:  [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <8 x half>
+// CHECK:  ret <8 x half> [[TMP8]]
+float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
+  return vbslq_f16(a, b, c);
+}
+
+// CHECK-LABEL: test_vzip_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false)
+float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
+  return vzip_f16(a, b);
+}
+
+// CHECK-LABEL: test_vzipq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false)
+float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
+  return vzipq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vuzp_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false)
+float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
+  return vuzp_f16(a, b);
+}
+
+// CHECK-LABEL: test_vuzpq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false)
+float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
+  return vuzpq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vtrn_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <4 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false)
+float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
+  return vtrn_f16(a, b);
+}
+
+// CHECK-LABEL: test_vtrnq_f16
+// CHECK:   [[RETVAL:%.*]]  = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK:   [[TMP0:%.*]]  = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   [[TMP1:%.*]]  = bitcast i8* [[TMP0]] to <8 x half>*
+// CHECK:   [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] 
+// CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32>  <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false)
+float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
+  return vtrnq_f16(a, b);
+}
+
+// CHECK-LABEL: test_vmov_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %a, i32 3
+// CHECK:   ret <4 x half> [[TMP3]]
+float16x4_t test_vmov_n_f16(float16_t a) {
+  return vmov_n_f16(a);
+}
+
+// CHECK-LABEL: test_vmovq_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %a, i32 3
+// CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %a, i32 4
+// CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %a, i32 5
+// CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %a, i32 6
+// CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %a, i32 7
+// CHECK:   ret <8 x half> [[TMP7]]
+float16x8_t test_vmovq_n_f16(float16_t a) {
+  return vmovq_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdup_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %a, i32 3
+// CHECK:   ret <4 x half> [[TMP3]]
+float16x4_t test_vdup_n_f16(float16_t a) {
+  return vdup_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdupq_n_f16
+// CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half %a, i32 0
+// CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %a, i32 1
+// CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %a, i32 2
+// CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %a, i32 3
+// CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %a, i32 4
+// CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %a, i32 5
+// CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %a, i32 6
+// CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %a, i32 7
+// CHECK:   ret <8 x half> [[TMP7]]
+float16x8_t test_vdupq_n_f16(float16_t a) {
+  return vdupq_n_f16(a);
+}
+
+// CHECK-LABEL: test_vdup_lane_f16
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vdup_lane_f16(float16x4_t a) {
+  return vdup_lane_f16(a, 3);
+}
+
+// CHECK-LABEL: test_vdupq_lane_f16
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vdupq_lane_f16(float16x4_t a) {
+  return vdupq_lane_f16(a, 7);
+}
+
+// CHECK-LABEL: @test_vext_f16(
+// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK:   [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK:   ret <4 x half> [[VEXT]]
+float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
+  return vext_f16(a, b, 2);
+}
+
+// CHECK-LABEL: @test_vextq_f16(
+// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK:   [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+// CHECK:   ret <8 x half> [[VEXT]]
+float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
+  return vextq_f16(a, b, 5);
+}
+
+// CHECK-LABEL: @test_vrev64_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vrev64_f16(float16x4_t a) {
+  return vrev64_f16(a);
+}
+
+// CHECK-LABEL: @test_vrev64q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vrev64q_f16(float16x8_t a) {
+  return vrev64q_f16(a);
+}
+
+// CHECK-LABEL: @test_vzip1_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) {
+  return vzip1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip1q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) {
+  return vzip1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip2_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   ret <4 x half> [[SHFL]]
+float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) {
+  return vzip2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vzip2q_f16(
+// CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   ret <8 x half> [[SHFL]]
+float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) {
+  return vzip2q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp1_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) {
+  return vuzp1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp1q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) {
+  return vuzp1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp2_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) {
+  return vuzp2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vuzp2q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) {
+  return vuzp2q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn1_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) {
+  return vtrn1_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn1q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32>  <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) {
+  return vtrn1q_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn2_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   ret <4 x half> [[SHUFFLE_I]]
+float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
+  return vtrn2_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vtrn2q_f16(
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   ret <8 x half> [[SHUFFLE_I]]
+float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
+  return vtrn2q_f16(a, b);
+}
+
diff --git a/test/CodeGen/aarch64-varargs-ms.c b/test/CodeGen/aarch64-varargs-ms.c
index f3ff960..c27e785 100644
--- a/test/CodeGen/aarch64-varargs-ms.c
+++ b/test/CodeGen/aarch64-varargs-ms.c
@@ -3,7 +3,7 @@
 #include <stdarg.h>
 
 int simple_int(va_list ap) {
-// CHECK-LABEL: define i32 @simple_int
+// CHECK-LABEL: define dso_local i32 @simple_int
   return va_arg(ap, int);
 // CHECK: [[ADDR:%[a-z_0-9]+]] = bitcast i8* %argp.cur to i32*
 // CHECK: [[RESULT:%[a-z_0-9]+]] = load i32, i32* [[ADDR]]
diff --git a/test/CodeGen/address-safety-attr-kasan-hwasan.cpp b/test/CodeGen/address-safety-attr-kasan-hwasan.cpp
new file mode 100644
index 0000000..7a84b79
--- /dev/null
+++ b/test/CodeGen/address-safety-attr-kasan-hwasan.cpp
@@ -0,0 +1,53 @@
+// Make sure the sanitize_address attribute is emitted when using both ASan and KASan.
+// Also document that __attribute__((no_sanitize_address)) doesn't disable KASan instrumentation.
+
+/// RUN: %clang_cc1 -triple i386-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NOASAN %s
+/// RUN: %clang_cc1 -triple i386-unknown-linux -fsanitize=address -disable-O0-optnone -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-ASAN %s
+/// RUN: %clang_cc1 -triple i386-unknown-linux -fsanitize=kernel-address -disable-O0-optnone -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-KASAN %s
+/// RUN: %clang_cc1 -triple i386-unknown-linux -fsanitize=hwaddress -disable-O0-optnone -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-HWASAN %s
+
+int HasSanitizeAddress() {
+  return 1;
+}
+// CHECK-NOASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-ASAN: Function Attrs: noinline nounwind sanitize_address
+// CHECK-KASAN: Function Attrs: noinline nounwind sanitize_address
+// CHECK-HWASAN: Function Attrs: noinline nounwind sanitize_hwaddress
+
+__attribute__((no_sanitize("address")))
+int NoSanitizeQuoteAddress() {
+  return 0;
+}
+// CHECK-NOASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-ASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-KASAN: {{Function Attrs: noinline nounwind sanitize_address$}}
+// CHECK-HWASAN: {{Function Attrs: noinline nounwind sanitize_hwaddress$}}
+
+__attribute__((no_sanitize_address))
+int NoSanitizeAddress() {
+  return 0;
+}
+// CHECK-NOASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-ASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-KASAN: {{Function Attrs: noinline nounwind sanitize_address$}}
+// CHECK-HWASAN: {{Function Attrs: noinline nounwind sanitize_hwaddress$}}
+
+__attribute__((no_sanitize("kernel-address")))
+int NoSanitizeKernelAddress() {
+  return 0;
+}
+
+// CHECK-NOASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-ASAN: {{Function Attrs: noinline nounwind sanitize_address$}}
+// CHECK-KASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-HWASAN: {{Function Attrs: noinline nounwind sanitize_hwaddress$}}
+
+__attribute__((no_sanitize("hwaddress")))
+int NoSanitizeHWAddress() {
+  return 0;
+}
+
+// CHECK-NOASAN: {{Function Attrs: noinline nounwind$}}
+// CHECK-ASAN: {{Function Attrs: noinline nounwind sanitize_address$}}
+// CHECK-KASAN: {{Function Attrs: noinline nounwind sanitize_address$}}
+// CHECK-HWASAN: {{Function Attrs: noinline nounwind$}}
diff --git a/test/CodeGen/address-sanitizer-and-array-cookie.cpp b/test/CodeGen/address-sanitizer-and-array-cookie.cpp
index ea89537..e2267a1 100644
--- a/test/CodeGen/address-sanitizer-and-array-cookie.cpp
+++ b/test/CodeGen/address-sanitizer-and-array-cookie.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -emit-llvm -o - %s | FileCheck %s -check-prefix=PLAIN
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -emit-llvm -o - -fsanitize=address %s | FileCheck %s -check-prefix=ASAN
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -emit-llvm -o - -fsanitize=address -fsanitize-address-poison-class-member-array-new-cookie %s | FileCheck %s -check-prefix=ASAN-POISON-ALL-NEW-ARRAY
 
 typedef __typeof__(sizeof(0)) size_t;
 namespace std {
@@ -8,6 +9,7 @@
 }
 void *operator new[](size_t, const std::nothrow_t &) throw();
 void *operator new[](size_t, char *);
+void *operator new[](size_t, int, int);
 
 struct C {
   int x;
@@ -53,3 +55,10 @@
 }
 // ASAN-LABEL: CallPlacementNew
 // ASAN-NOT: __asan_poison_cxx_array_cookie
+
+C *CallNewWithArgs() {
+// ASAN-LABEL: CallNewWithArgs
+// ASAN-NOT: call void @__asan_poison_cxx_array_cookie
+// ASAN-POISON-ALL-NEW-ARRAY: call void @__asan_poison_cxx_array_cookie
+  return new (123, 456) C[20];
+}
diff --git a/test/CodeGen/address-space.c b/test/CodeGen/address-space.c
index 28b3954..d6a40f3 100644
--- a/test/CodeGen/address-space.c
+++ b/test/CodeGen/address-space.c
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s
-// RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s
-// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86 %s
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 // CHECK: @foo = common addrspace(1) global
 int foo __attribute__((address_space(1)));
@@ -25,12 +24,10 @@
 
 // CHECK-LABEL: define void @test3()
 // X86: load i32 addrspace(2)*, i32 addrspace(2)** @B
-// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**)
-// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)* addrspace(4)*)
+// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**)
 // CHECK: load i32, i32 addrspace(2)*
 // X86: load i32 addrspace(2)*, i32 addrspace(2)** @A
-// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**)
-// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)* addrspace(4)*)
+// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**)
 // CHECK: store i32 {{.*}}, i32 addrspace(2)*
 void test3() {
   *A = *B;
@@ -42,10 +39,8 @@
 } MyStruct;
 
 // CHECK-LABEL: define void @test4(
-// GIZ: call void @llvm.memcpy.p0i8.p2i8
-// GIZ: call void @llvm.memcpy.p2i8.p0i8
-// PIZ: call void @llvm.memcpy.p4i8.p2i8
-// PIZ: call void @llvm.memcpy.p2i8.p4i8
+// CHECK: call void @llvm.memcpy.p0i8.p2i8
+// CHECK: call void @llvm.memcpy.p2i8.p0i8
 void test4(MyStruct __attribute__((address_space(2))) *pPtr) {
   MyStruct s = pPtr[0];
   pPtr[0] = s;
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index ec3e173..ef4e760 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -208,13 +208,13 @@
 // APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
 // APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
-// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align {{[0-9]+}} %[[b]], i8* align {{[0-9]+}} %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16
 // AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 8, %struct.s35* byval align 8)
 // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
-// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
+// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %[[b]], i8* align 8 %[[c]]
 // AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // AAPCS: load <4 x float>, <4 x float>* %[[d]], align 16
diff --git a/test/CodeGen/arm-fp16-arguments.c b/test/CodeGen/arm-fp16-arguments.c
index 65f076a..d739f4b 100644
--- a/test/CodeGen/arm-fp16-arguments.c
+++ b/test/CodeGen/arm-fp16-arguments.c
@@ -25,3 +25,27 @@
 // HARD: ret float [[BITCAST]]
 // NATIVE: [[LOAD:%.*]] = load half, half* @g
 // NATIVE: ret half [[LOAD]]
+
+_Float16 h;
+
+void t3(_Float16 a) { h = a; }
+// SOFT: define void @t3(i32 [[PARAM:%.*]])
+// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
+// HARD: define arm_aapcs_vfpcc void @t3(float [[PARAM:%.*]])
+// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
+// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
+// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @h to i16*)
+// NATIVE: define void @t3(half [[PARAM:%.*]])
+// NATIVE: store half [[PARAM]], half* @h
+
+_Float16 t4() { return h; }
+// SOFT: define i32 @t4()
+// HARD: define arm_aapcs_vfpcc float @t4()
+// NATIVE: define half @t4()
+// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @h to i16*)
+// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
+// SOFT: ret i32 [[ZEXT]]
+// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
+// HARD: ret float [[BITCAST]]
+// NATIVE: [[LOAD:%.*]] = load half, half* @h
+// NATIVE: ret half [[LOAD]]
diff --git a/test/CodeGen/arm64-be-bitfield.c b/test/CodeGen/arm64-be-bitfield.c
index 081eab8..cee59b8 100644
--- a/test/CodeGen/arm64-be-bitfield.c
+++ b/test/CodeGen/arm64-be-bitfield.c
@@ -7,6 +7,6 @@
 // IR: callee_b0f(i64 [[ARG:%.*]])
 // IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8
 // IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8*
-// IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4
+// IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 8 [[BITCAST]], i64 4
   return bp11.b2;
 }
diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c
index 62888dd..28b4f11 100644
--- a/test/CodeGen/arm_neon_intrinsics.c
+++ b/test/CodeGen/arm_neon_intrinsics.c
@@ -4846,7 +4846,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
@@ -4873,7 +4873,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
@@ -4900,7 +4900,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
@@ -4927,7 +4927,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
@@ -4954,7 +4954,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
@@ -4981,7 +4981,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
@@ -5008,7 +5008,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
@@ -5035,7 +5035,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -5057,7 +5057,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
@@ -5084,7 +5084,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
@@ -5111,7 +5111,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -5133,7 +5133,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
@@ -5160,7 +5160,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
@@ -5187,7 +5187,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
@@ -5214,7 +5214,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
@@ -5241,7 +5241,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -5263,7 +5263,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
@@ -5587,7 +5587,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
@@ -5619,7 +5619,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
@@ -5651,7 +5651,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
@@ -5683,7 +5683,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
@@ -5715,7 +5715,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
@@ -5747,7 +5747,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
@@ -5779,7 +5779,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
@@ -5811,7 +5811,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -5836,7 +5836,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
@@ -5868,7 +5868,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
@@ -5900,7 +5900,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -5925,7 +5925,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
@@ -5957,7 +5957,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
@@ -5989,7 +5989,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
@@ -6021,7 +6021,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
@@ -6053,7 +6053,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -6078,7 +6078,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
@@ -6407,7 +6407,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
@@ -6444,7 +6444,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
@@ -6481,7 +6481,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
@@ -6518,7 +6518,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
@@ -6555,7 +6555,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
@@ -6592,7 +6592,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
@@ -6629,7 +6629,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
@@ -6666,7 +6666,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -6694,7 +6694,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
@@ -6731,7 +6731,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
@@ -6768,7 +6768,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -6796,7 +6796,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
@@ -6833,7 +6833,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
@@ -6870,7 +6870,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
@@ -6907,7 +6907,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
@@ -6944,7 +6944,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
@@ -6972,7 +6972,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
@@ -16199,7 +16199,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16220,7 +16220,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16246,7 +16246,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -16272,7 +16272,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16293,7 +16293,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16319,7 +16319,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -16345,7 +16345,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -16371,7 +16371,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -16397,7 +16397,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -16418,7 +16418,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16444,7 +16444,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16465,7 +16465,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -16491,7 +16491,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -16517,7 +16517,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -16543,7 +16543,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16564,7 +16564,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -16590,7 +16590,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -16616,7 +16616,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -16642,7 +16642,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -16668,7 +16668,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -16694,7 +16694,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16715,7 +16715,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -16741,7 +16741,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16767,7 +16767,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -16793,7 +16793,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16819,7 +16819,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -16845,7 +16845,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -16871,7 +16871,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -16897,7 +16897,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -16923,7 +16923,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -16944,7 +16944,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -16970,7 +16970,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -16996,7 +16996,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -17017,7 +17017,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -17043,7 +17043,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -17069,7 +17069,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -17095,7 +17095,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -17121,7 +17121,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -17142,7 +17142,7 @@
 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -17168,7 +17168,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -17192,7 +17192,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -17223,7 +17223,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -17254,7 +17254,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -17278,7 +17278,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -17309,7 +17309,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -17340,7 +17340,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -17371,7 +17371,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -17402,7 +17402,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -17426,7 +17426,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -17457,7 +17457,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -17481,7 +17481,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -17512,7 +17512,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -17543,7 +17543,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -17574,7 +17574,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -17598,7 +17598,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -17629,7 +17629,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -17660,7 +17660,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -17691,7 +17691,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -17722,7 +17722,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -17753,7 +17753,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -17777,7 +17777,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -17808,7 +17808,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -17839,7 +17839,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -17870,7 +17870,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -17901,7 +17901,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -17932,7 +17932,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -17963,7 +17963,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -17994,7 +17994,7 @@
 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -18025,7 +18025,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -18049,7 +18049,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -18080,7 +18080,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -18111,7 +18111,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -18135,7 +18135,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -18166,7 +18166,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -18197,7 +18197,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -18228,7 +18228,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -18259,7 +18259,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -18283,7 +18283,7 @@
 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -18314,7 +18314,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -18341,7 +18341,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -18377,7 +18377,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -18413,7 +18413,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -18440,7 +18440,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -18476,7 +18476,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -18512,7 +18512,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -18548,7 +18548,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -18584,7 +18584,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
@@ -18611,7 +18611,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -18647,7 +18647,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -18674,7 +18674,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -18710,7 +18710,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -18746,7 +18746,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -18782,7 +18782,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -18809,7 +18809,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -18845,7 +18845,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -18881,7 +18881,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
@@ -18917,7 +18917,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -18953,7 +18953,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -18989,7 +18989,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -19016,7 +19016,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -19052,7 +19052,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -19088,7 +19088,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -19124,7 +19124,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -19160,7 +19160,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
@@ -19196,7 +19196,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
@@ -19232,7 +19232,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
@@ -19268,7 +19268,7 @@
 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
@@ -19304,7 +19304,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -19331,7 +19331,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -19367,7 +19367,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -19403,7 +19403,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -19430,7 +19430,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -19466,7 +19466,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
@@ -19502,7 +19502,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
@@ -19538,7 +19538,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
@@ -19574,7 +19574,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
@@ -19601,7 +19601,7 @@
 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
@@ -20474,7 +20474,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !3
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
   return vtrn_s8(a, b);
@@ -20493,7 +20493,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !6
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
   return vtrn_s16(a, b);
@@ -20512,7 +20512,7 @@
 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !9
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
   return vtrn_s32(a, b);
@@ -20529,7 +20529,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !12
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
   return vtrn_u8(a, b);
@@ -20548,7 +20548,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !15
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
   return vtrn_u16(a, b);
@@ -20567,7 +20567,7 @@
 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !18
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
   return vtrn_u32(a, b);
@@ -20586,7 +20586,7 @@
 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !noalias !21
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
   return vtrn_f32(a, b);
@@ -20603,7 +20603,7 @@
 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !24
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
   return vtrn_p8(a, b);
@@ -20622,7 +20622,7 @@
 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !27
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
   return vtrn_p16(a, b);
@@ -20639,7 +20639,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
   return vtrnq_s8(a, b);
@@ -20658,7 +20658,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
   return vtrnq_s16(a, b);
@@ -20677,7 +20677,7 @@
 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
   return vtrnq_s32(a, b);
@@ -20694,7 +20694,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
   return vtrnq_u8(a, b);
@@ -20713,7 +20713,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
   return vtrnq_u16(a, b);
@@ -20732,7 +20732,7 @@
 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
   return vtrnq_u32(a, b);
@@ -20751,7 +20751,7 @@
 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
   return vtrnq_f32(a, b);
@@ -20768,7 +20768,7 @@
 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
   return vtrnq_p8(a, b);
@@ -20787,7 +20787,7 @@
 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
   return vtrnq_p16(a, b);
@@ -20968,7 +20968,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !57
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
   return vuzp_s8(a, b);
@@ -20987,7 +20987,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !60
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
   return vuzp_s16(a, b);
@@ -21006,7 +21006,7 @@
 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !63
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
   return vuzp_s32(a, b);
@@ -21023,7 +21023,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !66
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
   return vuzp_u8(a, b);
@@ -21042,7 +21042,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !69
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
   return vuzp_u16(a, b);
@@ -21061,7 +21061,7 @@
 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !72
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
   return vuzp_u32(a, b);
@@ -21080,7 +21080,7 @@
 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !noalias !75
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
   return vuzp_f32(a, b);
@@ -21097,7 +21097,7 @@
 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !78
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
   return vuzp_p8(a, b);
@@ -21116,7 +21116,7 @@
 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !81
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
   return vuzp_p16(a, b);
@@ -21133,7 +21133,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
   return vuzpq_s8(a, b);
@@ -21152,7 +21152,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
   return vuzpq_s16(a, b);
@@ -21171,7 +21171,7 @@
 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
   return vuzpq_s32(a, b);
@@ -21188,7 +21188,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
   return vuzpq_u8(a, b);
@@ -21207,7 +21207,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
   return vuzpq_u16(a, b);
@@ -21226,7 +21226,7 @@
 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
   return vuzpq_u32(a, b);
@@ -21245,7 +21245,7 @@
 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
   return vuzpq_f32(a, b);
@@ -21262,7 +21262,7 @@
 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
   return vuzpq_p8(a, b);
@@ -21281,7 +21281,7 @@
 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
   return vuzpq_p16(a, b);
@@ -21298,7 +21298,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !111
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
   return vzip_s8(a, b);
@@ -21317,7 +21317,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !114
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
   return vzip_s16(a, b);
@@ -21336,7 +21336,7 @@
 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !117
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
   return vzip_s32(a, b);
@@ -21353,7 +21353,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !120
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
   return vzip_u8(a, b);
@@ -21372,7 +21372,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !123
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
   return vzip_u16(a, b);
@@ -21391,7 +21391,7 @@
 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !126
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
   return vzip_u32(a, b);
@@ -21410,7 +21410,7 @@
 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !noalias !129
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
   return vzip_f32(a, b);
@@ -21427,7 +21427,7 @@
 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !132
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false)
 // CHECK:   ret void
 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
   return vzip_p8(a, b);
@@ -21446,7 +21446,7 @@
 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !135
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false)
 // CHECK:   ret void
 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
   return vzip_p16(a, b);
@@ -21463,7 +21463,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
   return vzipq_s8(a, b);
@@ -21482,7 +21482,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
   return vzipq_s16(a, b);
@@ -21501,7 +21501,7 @@
 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
   return vzipq_s32(a, b);
@@ -21518,7 +21518,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
   return vzipq_u8(a, b);
@@ -21537,7 +21537,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
   return vzipq_u16(a, b);
@@ -21556,7 +21556,7 @@
 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
   return vzipq_u32(a, b);
@@ -21575,7 +21575,7 @@
 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
   return vzipq_f32(a, b);
@@ -21592,7 +21592,7 @@
 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159
 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false)
 // CHECK:   ret void
 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
   return vzipq_p8(a, b);
@@ -21611,7 +21611,7 @@
 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162
 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false)
 // CHECK:   ret void
 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
   return vzipq_p16(a, b);
diff --git a/test/CodeGen/array-init.c b/test/CodeGen/array-init.c
new file mode 100644
index 0000000..fa54994
--- /dev/null
+++ b/test/CodeGen/array-init.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck %s
+
+// CHECK: @{{.*}}.a1 = internal constant [5 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0]
+// CHECK: @{{.*}}.a2 = internal constant [5 x i32] zeroinitializer
+// CHECK: @{{.*}}.a3 = internal constant [5 x i32] zeroinitializer
+
+void testConstArrayInits(void)
+{
+  const int a1[5] = {0,1,2};
+  const int a2[5] = {0,0,0};
+  const int a3[5] = {0};
+}
diff --git a/test/CodeGen/artificial.c b/test/CodeGen/artificial.c
new file mode 100644
index 0000000..5db6a67
--- /dev/null
+++ b/test/CodeGen/artificial.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+
+extern void foo();
+// CHECK: !DISubprogram(name: "foo"
+// CHECK-SAME: flags: DIFlagArtificial
+inline void __attribute__((artificial)) foo() {}
+
+void baz() {
+  foo();
+}
diff --git a/test/CodeGen/atomic-arm64.c b/test/CodeGen/atomic-arm64.c
index 5cae3d1..0e79846 100644
--- a/test/CodeGen/atomic-arm64.c
+++ b/test/CodeGen/atomic-arm64.c
@@ -65,7 +65,7 @@
 // CHECK:      [[TEMP:%.*]] = alloca [[QUAD_T:%.*]], align 8
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T0]], i8* align 8 [[T1]], i64 32, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256*
 // CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8*
 // CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5)
diff --git a/test/CodeGen/attr-mprefer-vector-width.c b/test/CodeGen/attr-mprefer-vector-width.c
new file mode 100644
index 0000000..30edba5
--- /dev/null
+++ b/test/CodeGen/attr-mprefer-vector-width.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -mprefer-vector-width=128 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK128
+// RUN: %clang_cc1 -mprefer-vector-width=256 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK256
+// RUN: %clang_cc1 -mprefer-vector-width=none -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKNONE
+
+int baz(int a) { return 4; }
+
+// CHECK128: baz{{.*}} #0
+// CHECK128: #0 = {{.*}}"prefer-vector-width"="128"
+
+// CHECK256: baz{{.*}} #0
+// CHECK256: #0 = {{.*}}"prefer-vector-width"="256"
+
+// CHECKNONE: baz{{.*}} #0
+// CHECKNONE-NOT: #0 = {{.*}}"prefer-vector-width"="none"
diff --git a/test/CodeGen/attr-target-mv-func-ptrs.c b/test/CodeGen/attr-target-mv-func-ptrs.c
new file mode 100644
index 0000000..5df9a92
--- /dev/null
+++ b/test/CodeGen/attr-target-mv-func-ptrs.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+int __attribute__((target("sse4.2"))) foo(int i) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int);
+int __attribute__((target("arch=ivybridge"))) foo(int i) {return 1;}
+int __attribute__((target("default"))) foo(int i) { return 2; }
+
+typedef int (*FuncPtr)(int);
+void func(FuncPtr);
+
+int bar() {
+  func(foo);
+  FuncPtr Free = &foo;
+  FuncPtr Free2 = foo;
+
+  return 0;
+  return Free(1) + Free(2);
+}
+
+// CHECK: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver
+// CHECK: define i32 @foo.sse4.2(
+// CHECK: ret i32 0
+// CHECK: define i32 @foo.arch_ivybridge(
+// CHECK: ret i32 1
+// CHECK: define i32 @foo(
+// CHECK: ret i32 2
+
+// CHECK: define i32 @bar()
+// CHECK: call void @func(i32 (i32)* @foo.ifunc)
+// CHECK: store i32 (i32)* @foo.ifunc
+// CHECK: store i32 (i32)* @foo.ifunc
+
+// CHECK: declare i32 @foo.arch_sandybridge(
diff --git a/test/CodeGen/attr-target-mv-va-args.c b/test/CodeGen/attr-target-mv-va-args.c
new file mode 100644
index 0000000..b33f841
--- /dev/null
+++ b/test/CodeGen/attr-target-mv-va-args.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int i, ...);
+int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;}
+int __attribute__((target("default"))) foo(int i, ...) { return 2; }
+
+int bar() {
+  return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf");
+}
+
+// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
+// CHECK: define i32 @foo.sse4.2(i32 %i, ...)
+// CHECK: ret i32 0
+// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...)
+// CHECK: ret i32 1
+// CHECK: define i32 @foo(i32 %i, ...)
+// CHECK: ret i32 2
+// CHECK: define i32 @bar()
+// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
+// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds 
+// CHECK: define i32 (i32, ...)* @foo.resolver() comdat
+// CHECK: ret i32 (i32, ...)* @foo.arch_sandybridge
+// CHECK: ret i32 (i32, ...)* @foo.arch_ivybridge
+// CHECK: ret i32 (i32, ...)* @foo.sse4.2
+// CHECK: ret i32 (i32, ...)* @foo
+// CHECK: declare i32 @foo.arch_sandybridge(i32, ...)
diff --git a/test/CodeGen/attr-target-mv.c b/test/CodeGen/attr-target-mv.c
new file mode 100644
index 0000000..0085a15
--- /dev/null
+++ b/test/CodeGen/attr-target-mv.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+int __attribute__((target("sse4.2"))) foo(void) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(void);
+int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;}
+int __attribute__((target("default"))) foo(void) { return 2; }
+
+int bar() {
+  return foo();
+}
+
+inline int __attribute__((target("sse4.2"))) foo_inline(void) { return 0; }
+inline int __attribute__((target("arch=sandybridge"))) foo_inline(void);
+inline int __attribute__((target("arch=ivybridge"))) foo_inline(void) {return 1;}
+inline int __attribute__((target("default"))) foo_inline(void) { return 2; }
+
+int bar2() {
+  return foo_inline();
+}
+
+inline __attribute__((target("default"))) void foo_decls(void);
+inline __attribute__((target("sse4.2"))) void foo_decls(void);
+void bar3() {
+  foo_decls();
+}
+inline __attribute__((target("default"))) void foo_decls(void) {}
+inline __attribute__((target("sse4.2"))) void foo_decls(void) {}
+
+inline __attribute__((target("default"))) void foo_multi(void) {}
+inline __attribute__((target("avx,sse4.2"))) void foo_multi(void) {}
+inline __attribute__((target("sse4.2,fma4"))) void foo_multi(void) {}
+inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(void) {}
+void bar4() {
+  foo_multi();
+}
+
+// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
+// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
+// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver
+
+// CHECK: define i32 @foo.sse4.2()
+// CHECK: ret i32 0
+// CHECK: define i32 @foo.arch_ivybridge()
+// CHECK: ret i32 1
+// CHECK: define i32 @foo()
+// CHECK: ret i32 2
+// CHECK: define i32 @bar()
+// CHECK: call i32 @foo.ifunc()
+
+// CHECK: define i32 ()* @foo.resolver() comdat
+// CHECK: call void @__cpu_indicator_init()
+// CHECK: ret i32 ()* @foo.arch_sandybridge
+// CHECK: ret i32 ()* @foo.arch_ivybridge
+// CHECK: ret i32 ()* @foo.sse4.2
+// CHECK: ret i32 ()* @foo
+
+// CHECK: define i32 @bar2()
+// CHECK: call i32 @foo_inline.ifunc()
+
+// CHECK: define i32 ()* @foo_inline.resolver() comdat
+// CHECK: call void @__cpu_indicator_init()
+// CHECK: ret i32 ()* @foo_inline.arch_sandybridge
+// CHECK: ret i32 ()* @foo_inline.arch_ivybridge
+// CHECK: ret i32 ()* @foo_inline.sse4.2
+// CHECK: ret i32 ()* @foo_inline
+
+// CHECK: define void @bar3()
+// CHECK: call void @foo_decls.ifunc()
+
+// CHECK: define void ()* @foo_decls.resolver() comdat
+// CHECK: ret void ()* @foo_decls.sse4.2
+// CHECK: ret void ()* @foo_decls
+
+// CHECK: declare i32 @foo.arch_sandybridge()
+
+// CHECK: define available_externally i32 @foo_inline.sse4.2()
+// CHECK: ret i32 0
+
+// CHECK: declare i32 @foo_inline.arch_sandybridge()
+//
+// CHECK: define available_externally i32 @foo_inline.arch_ivybridge()
+// CHECK: ret i32 1
+// CHECK: define available_externally i32 @foo_inline()
+// CHECK: ret i32 2
+
+// CHECK: define available_externally void @foo_decls()
+// CHECK: define available_externally void @foo_decls.sse4.2()
+
+// CHECK: define available_externally void @foo_multi.avx_sse4.2()
+// CHECK: define available_externally void @foo_multi.fma4_sse4.2()
+// CHECK: define available_externally void @foo_multi.arch_ivybridge_fma4_sse4.2()
+
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index f277767..ca544e4 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu x86-64 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
 
 int baz(int a) { return 4; }
 
@@ -10,6 +10,7 @@
 int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
 
 int __attribute__((target("sse4"))) panda(int a) { return 4; }
+int __attribute__((target("no-sse4"))) narwhal(int a) { return 4; }
 
 int bar(int a) { return baz(a) + foo(a); }
 
@@ -18,7 +19,18 @@
 
 int __attribute__((target("no-mmx"))) qq(int a) { return 40; }
 
-int __attribute__((target("arch=lakemont"))) lake(int a) { return 4; }
+int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
+
+int use_before_def(void);
+int useage(void){
+  return use_before_def();
+}
+
+// Adding the attribute to a definition does update it in IR.
+int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
+  return 5;
+}
+
 
 // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
 // CHECK: baz{{.*}} #0
@@ -29,15 +41,18 @@
 // CHECK: koala{{.*}} #0
 // CHECK: echidna{{.*}} #2
 // CHECK: panda{{.*}} #3
+// CHECK: narwhal{{.*}} #4
 // CHECK: bar{{.*}} #0
 // CHECK: qux{{.*}} #1
-// CHECK: qax{{.*}} #4
-// CHECK: qq{{.*}} #5
-// CHECK: lake{{.*}} #6
-// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
-// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
-// CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,+x87,-3dnow,-3dnowa,-mmx"
-// CHECK: #6 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx,+sse,+sse2"
+// CHECK: qax{{.*}} #5
+// CHECK: qq{{.*}} #6
+// CHECK: lake{{.*}} #7
+// CHECK: use_before_def{{.*}} #7
+// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
+// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
+// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
diff --git a/test/CodeGen/attr-x86-interrupt.c b/test/CodeGen/attr-x86-interrupt.c
index 0aca1f5..700a575 100644
--- a/test/CodeGen/attr-x86-interrupt.c
+++ b/test/CodeGen/attr-x86-interrupt.c
@@ -23,12 +23,12 @@
 // X86_LINUX: "disable-tail-calls"="true"
 // X86_LINUX-NOT: "disable-tail-calls"="false"
 // X86_64_WIN: @llvm.used = appending global [2 x i8*] [i8* bitcast (void (i32*, i64)* @foo7 to i8*), i8* bitcast (void (i32*)* @foo8 to i8*)], section "llvm.metadata"
-// X86_64_WIN: define x86_intrcc void @foo7(i32* %{{.+}}, i64 %{{.+}})
-// X86_64_WIN: define x86_intrcc void @foo8(i32* %{{.+}})
+// X86_64_WIN: define dso_local x86_intrcc void @foo7(i32* %{{.+}}, i64 %{{.+}})
+// X86_64_WIN: define dso_local x86_intrcc void @foo8(i32* %{{.+}})
 // X86_64_Win: "disable-tail-calls"="true"
 // X86_64_Win-NOT: "disable-tail-calls"="false"
 // X86_WIN: @llvm.used = appending global [2 x i8*] [i8* bitcast (void (i32*, i32)* @foo7 to i8*), i8* bitcast (void (i32*)* @foo8 to i8*)], section "llvm.metadata"
-// X86_WIN: define x86_intrcc void @foo7(i32* %{{.+}}, i32 %{{.+}})
-// X86_WIN: define x86_intrcc void @foo8(i32* %{{.+}})
+// X86_WIN: define dso_local x86_intrcc void @foo7(i32* %{{.+}}, i32 %{{.+}})
+// X86_WIN: define dso_local x86_intrcc void @foo8(i32* %{{.+}})
 // X86_Win: "disable-tail-calls"="true"
 // X86_Win-NOT: "disable-tail-calls"="false"
diff --git a/test/CodeGen/avx512bitalg-builtins.c b/test/CodeGen/avx512bitalg-builtins.c
new file mode 100644
index 0000000..5770c66
--- /dev/null
+++ b/test/CodeGen/avx512bitalg-builtins.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_popcnt_epi16(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_popcnt_epi16
+  // CHECK: @llvm.ctpop.v32i16
+  return _mm512_popcnt_epi16(__A);
+}
+
+__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_popcnt_epi16
+  // CHECK: @llvm.ctpop.v32i16
+  // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+  return _mm512_mask_popcnt_epi16(__A, __U, __B);
+}
+__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
+  // CHECK: @llvm.ctpop.v32i16
+  // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+  return _mm512_maskz_popcnt_epi16(__U, __B);
+}
+
+__m512i test_mm512_popcnt_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_popcnt_epi8
+  // CHECK: @llvm.ctpop.v64i8
+  return _mm512_popcnt_epi8(__A);
+}
+
+__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_popcnt_epi8
+  // CHECK: @llvm.ctpop.v64i8
+  // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_mask_popcnt_epi8(__A, __U, __B);
+}
+__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
+  // CHECK: @llvm.ctpop.v64i8
+  // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_maskz_popcnt_epi8(__U, __B);
+}
+
+__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
+  return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B);
+}
+
+__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
+  return _mm512_bitshuffle_epi64_mask(__A, __B);
+}
+
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index eb7b2a5..3b601f4 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -984,47 +984,53 @@
 
 __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mulhrs_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  // CHECK: @llvm.x86.avx512.pmul.hr.sw.512
   return _mm512_mulhrs_epi16(__A,__B); 
 }
 __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A,        __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_mulhrs_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  // CHECK: @llvm.x86.avx512.pmul.hr.sw.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_mulhrs_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  // CHECK: @llvm.x86.avx512.pmul.hr.sw.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_mulhrs_epi16(__U,__A,__B); 
 }
 __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mulhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  // CHECK: @llvm.x86.avx512.pmulh.w.512
   return _mm512_mulhi_epi16(__A,__B); 
 }
 __m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_mulhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  // CHECK: @llvm.x86.avx512.pmulh.w.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_mulhi_epi16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_mulhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  // CHECK: @llvm.x86.avx512.pmulh.w.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_mulhi_epi16(__U,__A,__B); 
 }
 __m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mulhi_epu16
-  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  // CHECK: @llvm.x86.avx512.pmulhu.w.512
   return _mm512_mulhi_epu16(__A,__B); 
 }
 __m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_mulhi_epu16
-  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  // CHECK: @llvm.x86.avx512.pmulhu.w.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_mulhi_epu16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_mulhi_epu16
-  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  // CHECK: @llvm.x86.avx512.pmulhu.w.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_mulhi_epu16(__U,__A,__B); 
 }
 
@@ -1626,16 +1632,25 @@
   return _mm512_maskz_set1_epi8(__M, __A); 
 }
 
-__mmask64 test_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
+__mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kunpackd
-  // CHECK: @llvm.x86.avx512.kunpck.dq
-  return _mm512_kunpackd(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // CHECK: [[LHS2:%.*]] = shufflevector <64 x i1> [[LHS]], <64 x i1> [[LHS]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // CHECK: [[RHS2:%.*]] = shufflevector <64 x i1> [[RHS]], <64 x i1> [[RHS]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[RHS2]], <32 x i1> [[LHS2]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+  // CHECK: bitcast <64 x i1> [[CONCAT]] to i64
+  return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); 
 }
 
-__mmask32 test_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
+__mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kunpackw
-  // CHECK: @llvm.x86.avx512.kunpck.wd
-  return _mm512_kunpackw(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // CHECK: [[LHS2:%.*]] = shufflevector <32 x i1> [[LHS]], <32 x i1> [[LHS]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: [[RHS2:%.*]] = shufflevector <32 x i1> [[RHS]], <32 x i1> [[RHS]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[RHS2]], <16 x i1> [[LHS2]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); 
 }
 
 __m512i test_mm512_mask_loadu_epi16(__m512i __W, __mmask32 __U, void const *__P) {
@@ -1668,7 +1683,8 @@
 }
 __mmask64 test_mm512_test_epi8_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <64 x i8> %{{.*}}, %{{.*}}
   return _mm512_test_epi8_mask(__A, __B); 
 }
 
@@ -1679,49 +1695,61 @@
 }
 __mmask64 test_mm512_mask_test_epi8_mask(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <64 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <64 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_test_epi8_mask(__U, __A, __B); 
 }
 
 __mmask32 test_mm512_test_epi16_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <32 x i16> %{{.*}}, %{{.*}}
   return _mm512_test_epi16_mask(__A, __B); 
 }
 
 __mmask32 test_mm512_mask_test_epi16_mask(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <32 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_test_epi16_mask(__U, __A, __B); 
 }
 
 __mmask64 test_mm512_testn_epi8_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}
   return _mm512_testn_epi8_mask(__A, __B); 
 }
 
 __mmask64 test_mm512_mask_testn_epi8_mask(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <64 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_testn_epi8_mask(__U, __A, __B); 
 }
 
 __mmask32 test_mm512_testn_epi16_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <32 x i16> %{{.*}}, %{{.*}}
   return _mm512_testn_epi16_mask(__A, __B); 
 }
 
 __mmask32 test_mm512_mask_testn_epi16_mask(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_testn_epi16_mask(__U, __A, __B); 
 }
 
 __mmask64 test_mm512_movepi8_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi8_mask
-  // CHECK: @llvm.x86.avx512.cvtb2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <64 x i1> [[CMP]] to i64
   return _mm512_movepi8_mask(__A); 
 }
 
@@ -1919,7 +1947,8 @@
 
 __mmask32 test_mm512_movepi16_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi16_mask
-  // CHECK: @llvm.x86.avx512.cvtw2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <32 x i1> [[CMP]] to i32
   return _mm512_movepi16_mask(__A); 
 }
 
diff --git a/test/CodeGen/avx512cdintrin.c b/test/CodeGen/avx512cdintrin.c
index a286018..e01d277 100644
--- a/test/CodeGen/avx512cdintrin.c
+++ b/test/CodeGen/avx512cdintrin.c
@@ -68,14 +68,40 @@
   return _mm512_maskz_lzcnt_epi64(__U,__A); 
 }
 
-__m512i test_mm512_broadcastmb_epi64(__mmask8 __A) {
+__m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) {
   // CHECK-LABEL: @test_mm512_broadcastmb_epi64
-  // CHECK: @llvm.x86.avx512.broadcastmb.512
-  return _mm512_broadcastmb_epi64(__A); 
+  // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  // CHECK: zext i8 %{{.*}} to i64
+  // CHECK: insertelement <8 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 3
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 4
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 5
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 6
+  // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7
+  return _mm512_broadcastmb_epi64(_mm512_cmpeq_epu64_mask ( a, b)); 
 }
 
-__m512i test_mm512_broadcastmw_epi32(__mmask16 __A) {
+__m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: @test_mm512_broadcastmw_epi32
-  // CHECK: @llvm.x86.avx512.broadcastmw.512
-  return _mm512_broadcastmw_epi32(__A); 
+  // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: zext i16 %{{.*}} to i32
+  // CHECK: insertelement <16 x i32> undef, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  return _mm512_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); 
 }
diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c
index 1b21ca3..47943c5 100644
--- a/test/CodeGen/avx512dq-builtins.c
+++ b/test/CodeGen/avx512dq-builtins.c
@@ -923,7 +923,8 @@
 
 __mmask16 test_mm512_movepi32_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi32_mask
-  // CHECK: @llvm.x86.avx512.cvtd2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <16 x i1> [[CMP]] to i16
   return _mm512_movepi32_mask(__A); 
 }
 
@@ -943,7 +944,8 @@
 
 __mmask8 test_mm512_movepi64_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi64_mask
-  // CHECK: @llvm.x86.avx512.cvtq2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <8 x i1> [[CMP]] to i8
   return _mm512_movepi64_mask(__A); 
 }
 
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index b154bca..5847f31 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -385,7 +385,9 @@
 __mmask16 test_mm512_knot(__mmask16 a)
 {
   // CHECK-LABEL: @test_mm512_knot
-  // CHECK: @llvm.x86.avx512.knot.w
+  // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: bitcast <16 x i1> [[NOT]] to i16
   return _mm512_knot(a);
 }
 
@@ -999,49 +1001,53 @@
 
 __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
   // CHECK-LABEL: @test_mm512_cmp_round_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
   return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
 }
 
 __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
   // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
   return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
 }
 
 __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
   // CHECK-LABEL: @test_mm512_cmp_ps_mask
-  // CHECKn: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
   return _mm512_cmp_ps_mask(a, b, 0);
 }
 
 __mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) {
   // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
   return _mm512_mask_cmp_ps_mask(m, a, b, 0);
 }
 
 __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) {
   // CHECK-LABEL: @test_mm512_cmp_round_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
   return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
 }
 
 __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
   // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
   return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
 }
 
 __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
   // CHECK-LABEL: @test_mm512_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
   return _mm512_cmp_pd_mask(a, b, 0);
 }
 
 __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) {
   // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
   return _mm512_mask_cmp_pd_mask(m, a, b, 0);
 }
 
@@ -3857,39 +3863,48 @@
 }
 __mmask16 test_mm512_testn_epi32_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}}
   return _mm512_testn_epi32_mask(__A, __B); 
 }
 
 __mmask16 test_mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_testn_epi32_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm512_testn_epi64_mask(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}}
   return _mm512_testn_epi64_mask(__A, __B); 
 }
 
 __mmask8 test_mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_testn_epi64_mask(__U, __A, __B); 
 }
 
 __mmask16 test_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
 {
   // CHECK-LABEL: @test_mm512_mask_test_epi32_mask 
-  // CHECK: @llvm.x86.avx512.ptestm.d.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}}
   return _mm512_mask_test_epi32_mask (__U,__A,__B);
 }
 
 __mmask8 test_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
 {
   // CHECK-LABEL: @test_mm512_mask_test_epi64_mask 
-  // CHECK: @llvm.x86.avx512.ptestm.q.512
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm512_mask_test_epi64_mask (__U,__A,__B);
 }
 
@@ -4484,73 +4499,81 @@
 
 __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
   return _mm512_shuffle_f32x4(__A, __B, 4); 
 }
 
 __m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4); 
 }
 
 __m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4); 
 }
 
 __m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
   return _mm512_shuffle_f64x2(__A, __B, 4); 
 }
 
 __m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4); 
 }
 
 __m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4); 
 }
 
 __m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
   return _mm512_shuffle_i32x4(__A, __B, 4); 
 }
 
 __m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4); 
 }
 
 __m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4); 
 }
 
 __m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
   return _mm512_shuffle_i64x2(__A, __B, 4); 
 }
 
 __m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4); 
 }
 
 __m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4); 
 }
 
@@ -6194,52 +6217,98 @@
   return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); 
 }
 
-__mmask16 test_mm512_kand(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kand
-  // CHECK: @llvm.x86.avx512.kand.w
-  return _mm512_kand(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                   _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                   __E, __F);
 }
 
-__mmask16 test_mm512_kandn(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kandn
-  // CHECK: @llvm.x86.avx512.kandn.w
-  return _mm512_kandn(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                    _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                    __E, __F);
 }
 
-__mmask16 test_mm512_kor(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kor
-  // CHECK: @llvm.x86.avx512.kor.w
-  return _mm512_kor(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kor(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                  _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                  __E, __F);
 }
 
-int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
+int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
   // CHECK-LABEL: @test_mm512_kortestc
-  // CHECK: @llvm.x86.avx512.kortestc.w
-  return _mm512_kortestc(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+  // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+  // CHECK: zext i1 [[CMP]] to i32
+  return _mm512_kortestc(_mm512_cmpneq_epu32_mask(__A, __B),
+                         _mm512_cmpneq_epu32_mask(__C, __D));
 }
 
-int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
+int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
   // CHECK-LABEL: @test_mm512_kortestz
-  // CHECK: @llvm.x86.avx512.kortestz.w
-  return _mm512_kortestz(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+  // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
+  // CHECK: zext i1 [[CMP]] to i32
+  return _mm512_kortestz(_mm512_cmpneq_epu32_mask(__A, __B),
+                         _mm512_cmpneq_epu32_mask(__C, __D));
 }
 
-__mmask16 test_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kunpackb
-  // CHECK: @llvm.x86.avx512.kunpck.bw
-  return _mm512_kunpackb(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: [[RHS2:%.*]] = shufflevector <16 x i1> [[RHS]], <16 x i1> [[RHS]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[RHS2]], <8 x i1> [[LHS2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: bitcast <16 x i1> [[CONCAT]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                       _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                       __E, __F);
 }
 
-__mmask16 test_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kxnor
-  // CHECK: @llvm.x86.avx512.kxnor.w
-  return _mm512_kxnor(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                    _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                    __E, __F);
 }
 
-__mmask16 test_mm512_kxor(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kxor
-  // CHECK: @llvm.x86.avx512.kxor.w
-  return _mm512_kxor(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return _mm512_mask_cmpneq_epu32_mask(_mm512_kxor(_mm512_cmpneq_epu32_mask(__A, __B),
+                                                   _mm512_cmpneq_epu32_mask(__C, __D)),
+                                                   __E, __F);
 }
 
 void test_mm512_stream_si512(__m512i * __P, __m512i __A) {
diff --git a/test/CodeGen/avx512vbmi2-builtins.c b/test/CodeGen/avx512vbmi2-builtins.c
new file mode 100644
index 0000000..4da21e3
--- /dev/null
+++ b/test/CodeGen/avx512vbmi2-builtins.c
@@ -0,0 +1,304 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.512
+  return _mm512_mask_compress_epi16(__S, __U, __D);
+}
+
+__m512i test_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_maskz_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.512
+  return _mm512_maskz_compress_epi16(__U, __D);
+}
+
+__m512i test_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.512
+  return _mm512_mask_compress_epi8(__S, __U, __D);
+}
+
+__m512i test_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_maskz_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.512
+  return _mm512_maskz_compress_epi8(__U, __D);
+}
+
+void test_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.store.w.512
+  _mm512_mask_compressstoreu_epi16(__P, __U, __D);
+}
+
+void test_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.store.b.512
+  _mm512_mask_compressstoreu_epi8(__P, __U, __D);
+}
+
+__m512i test_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.512
+  return _mm512_mask_expand_epi16(__S, __U, __D);
+}
+
+__m512i test_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_maskz_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.512
+  return _mm512_maskz_expand_epi16(__U, __D);
+}
+
+__m512i test_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_mask_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.512
+  return _mm512_mask_expand_epi8(__S, __U, __D);
+}
+
+__m512i test_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) {
+  // CHECK-LABEL: @test_mm512_maskz_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.512
+  return _mm512_maskz_expand_epi8(__U, __D);
+}
+
+__m512i test_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm512_mask_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.512
+  return _mm512_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m512i test_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.512
+  return _mm512_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m512i test_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm512_mask_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.512
+  return _mm512_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.512
+  return _mm512_maskz_expandloadu_epi8(__U, __P);
+}
+
+__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.512
+  return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.512
+  return _mm512_maskz_shldi_epi64(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.512
+  return _mm512_shldi_epi64(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.512
+  return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.512
+  return _mm512_maskz_shldi_epi32(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.512
+  return _mm512_shldi_epi32(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.512
+  return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.512
+  return _mm512_maskz_shldi_epi16(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.512
+  return _mm512_shldi_epi16(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512
+  return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512
+  return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512
+  return _mm512_shrdi_epi64(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512
+  return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512
+  return _mm512_maskz_shrdi_epi32(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512
+  return _mm512_shrdi_epi32(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512
+  return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512
+  return _mm512_maskz_shrdi_epi16(__U, __A, __B, 63);
+}
+
+__m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512
+  return _mm512_shrdi_epi16(__A, __B, 31);
+}
+
+__m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.512
+  return _mm512_mask_shldv_epi64(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.512
+  return _mm512_maskz_shldv_epi64(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.512
+  return _mm512_shldv_epi64(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.512
+  return _mm512_mask_shldv_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.512
+  return _mm512_maskz_shldv_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.512
+  return _mm512_shldv_epi32(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.512
+  return _mm512_mask_shldv_epi16(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shldv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.512
+  return _mm512_maskz_shldv_epi16(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.512
+  return _mm512_shldv_epi16(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.512
+  return _mm512_mask_shrdv_epi64(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.512
+  return _mm512_maskz_shrdv_epi64(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.512
+  return _mm512_shrdv_epi64(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.512
+  return _mm512_mask_shrdv_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.512
+  return _mm512_maskz_shrdv_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.512
+  return _mm512_shrdv_epi32(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.512
+  return _mm512_mask_shrdv_epi16(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.512
+  return _mm512_maskz_shrdv_epi16(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.512
+  return _mm512_shrdv_epi16(__S, __A, __B);
+}
+
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index b489033..f8594e5 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -1049,49 +1049,53 @@
 
 __mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_cmp_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.256
+  // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
   return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0);
 }
 
 __mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.256
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
   return _mm256_mask_cmp_ps_mask(m, __A, __B, 0);
 }
 
 __mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_cmp_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.128
+  // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
   return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0);
 }
 
 __mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_cmp_ps_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.ps.128
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
   return _mm_mask_cmp_ps_mask(m, __A, __B, 0);
 }
 
 __mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.256
+  // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
   return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0);
 }
 
 __mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.256
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
   return _mm256_mask_cmp_pd_mask(m, __A, __B, 0);
 }
 
 __mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.128
+  // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
   return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0);
 }
 
 __mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_cmp_pd_mask
-  // CHECK: @llvm.x86.avx512.mask.cmp.pd.128
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
   return _mm_mask_cmp_pd_mask(m, __A, __B, 0);
 }
 
@@ -1763,22 +1767,26 @@
 }
 __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  // CHECK: @llvm.x86.sse2.cvtdq2ps
+  // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm_mask_cvtepi32_ps(__W,__U,__A); 
 }
 __m128 test_mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  // CHECK: @llvm.x86.sse2.cvtdq2ps
+  // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm_maskz_cvtepi32_ps(__U,__A); 
 }
 __m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  // CHECK: @llvm.x86.avx.cvtdq2.ps.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
   return _mm256_mask_cvtepi32_ps(__W,__U,__A); 
 }
 __m256 test_mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  // CHECK: @llvm.x86.avx.cvtdq2.ps.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
   return _mm256_maskz_cvtepi32_ps(__U,__A); 
 }
 __m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
@@ -1793,12 +1801,14 @@
 }
 __m128i test_mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpd_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  // CHECK: @llvm.x86.avx.cvt.pd2dq.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm256_mask_cvtpd_epi32(__W,__U,__A); 
 }
 __m128i test_mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  // CHECK: @llvm.x86.avx.cvt.pd2dq.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm256_maskz_cvtpd_epi32(__U,__A); 
 }
 __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
@@ -1813,12 +1823,14 @@
 }
 __m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpd_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  // CHECK: @llvm.x86.avx.cvt.pd2.ps.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm256_mask_cvtpd_ps(__W,__U,__A); 
 }
 __m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpd_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  // CHECK: @llvm.x86.avx.cvt.pd2.ps.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   return _mm256_maskz_cvtpd_ps(__U,__A); 
 }
 __m128i test_mm_cvtpd_epu32(__m128d __A) {
@@ -1853,42 +1865,50 @@
 }
 __m128i test_mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvtps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  // CHECK: @llvm.x86.sse2.cvtps2dq
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm_mask_cvtps_epi32(__W,__U,__A); 
 }
 __m128i test_mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  // CHECK: @llvm.x86.sse2.cvtps2dq
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm_maskz_cvtps_epi32(__U,__A); 
 }
 __m256i test_mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  // CHECK: @llvm.x86.avx.cvt.ps2dq.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}}
   return _mm256_mask_cvtps_epi32(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  // CHECK: @llvm.x86.avx.cvt.ps2dq.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}}
   return _mm256_maskz_cvtps_epi32(__U,__A); 
 }
 __m128d test_mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
   return _mm_mask_cvtps_pd(__W,__U,__A); 
 }
 __m128d test_mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
   return _mm_maskz_cvtps_pd(__U,__A); 
 }
 __m256d test_mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
+  // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}}
   return _mm256_mask_cvtps_pd(__W,__U,__A); 
 }
 __m256d test_mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
+  // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}}
   return _mm256_maskz_cvtps_pd(__U,__A); 
 }
 __m128i test_mm_cvtps_epu32(__m128 __A) {
@@ -1933,12 +1953,14 @@
 }
 __m128i test_mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_cvttpd_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  // CHECK: @llvm.x86.avx.cvtt.pd2dq.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm256_mask_cvttpd_epi32(__W,__U,__A); 
 }
 __m128i test_mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  // CHECK: @llvm.x86.avx.cvtt.pd2dq.256
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm256_maskz_cvttpd_epi32(__U,__A); 
 }
 __m128i test_mm_cvttpd_epu32(__m128d __A) {
@@ -1973,22 +1995,26 @@
 }
 __m128i test_mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvttps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  // CHECK: @llvm.x86.sse2.cvttps2dq
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm_mask_cvttps_epi32(__W,__U,__A); 
 }
 __m128i test_mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvttps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  // CHECK: @llvm.x86.sse2.cvttps2dq
+  // CHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
   return _mm_maskz_cvttps_epi32(__U,__A); 
 }
 __m256i test_mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvttps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  // CHECK: @llvm.x86.avx.cvtt.ps2dq.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}}
   return _mm256_mask_cvttps_epi32(__W,__U,__A); 
 }
 __m256i test_mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvttps_epi32
-  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  // CHECK: @llvm.x86.avx.cvtt.ps2dq.256
+  // CHECK: select <8 x i1> {{.*}}, <8 x i32> {{.*}}, <8 x i32> {{.*}}
   return _mm256_maskz_cvttps_epi32(__U,__A); 
 }
 __m128i test_mm_cvttps_epu32(__m128 __A) {
@@ -5188,99 +5214,124 @@
 
 __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_test_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestm.d.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}}
   return _mm_test_epi32_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_test_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestm.d.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi32_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm256_test_epi32_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_test_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestm.d.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}}
   return _mm256_test_epi32_mask(__A, __B); 
 }
 
 __mmask8 test_mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_test_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestm.d.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_test_epi32_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm_test_epi64_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_test_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestm.q.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}}
   return _mm_test_epi64_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_test_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestm.q.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <2 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi64_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm256_test_epi64_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_test_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestm.q.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}}
   return _mm256_test_epi64_mask(__A, __B); 
 }
 
 __mmask8 test_mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_test_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestm.q.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_test_epi64_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm_testn_epi32_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
   return _mm_testn_epi32_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_testn_epi32_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm256_testn_epi32_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}}
   return _mm256_testn_epi32_mask(__A, __B); 
 }
 
 __mmask8 test_mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_testn_epi32_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.d.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_testn_epi32_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm_testn_epi64_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
   return _mm_testn_epi64_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <2 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_testn_epi64_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm256_testn_epi64_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}}
   return _mm256_testn_epi64_mask(__A, __B); 
 }
 
 __mmask8 test_mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_testn_epi64_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.q.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_testn_epi64_mask(__U, __A, __B); 
 }
+
 __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_epi32
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -5602,73 +5653,85 @@
 }
 __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
   return _mm256_shuffle_f32x4(__A, __B, 3); 
 }
 
 __m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_shuffle_f32x4(__W, __U, __A, __B, 3); 
 }
 
 __m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_f32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_shuffle_f32x4(__U, __A, __B, 3); 
 }
 
 __m256d test_mm256_shuffle_f64x2(__m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   return _mm256_shuffle_f64x2(__A, __B, 3); 
 }
 
 __m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_shuffle_f64x2(__W, __U, __A, __B, 3); 
 }
 
 __m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_f64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_shuffle_f64x2(__U, __A, __B, 3); 
 }
 
 __m256i test_mm256_shuffle_i32x4(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   return _mm256_shuffle_i32x4(__A, __B, 3); 
 }
 
 __m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_shuffle_i32x4(__W, __U, __A, __B, 3); 
 }
 
 __m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_i32x4
-  // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_shuffle_i32x4(__U, __A, __B, 3); 
 }
 
 __m256i test_mm256_shuffle_i64x2(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   return _mm256_shuffle_i64x2(__A, __B, 3); 
 }
 
 __m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_shuffle_i64x2(__W, __U, __A, __B, 3); 
 }
 
 __m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_i64x2
-  // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_shuffle_i64x2(__U, __A, __B, 3); 
 }
 
@@ -6978,37 +7041,40 @@
 
 __m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_ps
-  // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+  // CHECK: @llvm.x86.avx2.permps
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y);
 }
 
 __m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_ps
-  // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permutexvar_ps(__U, __X, __Y);
 }
 
 __m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_permutexvar_ps
-  // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+  // CHECK: @llvm.x86.avx2.permps
   return _mm256_permutexvar_ps( __X, __Y);
 }
 
 __m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32
-  // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+  // CHECK: @llvm.x86.avx2.permd
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_permutexvar_epi32(__M, __X, __Y);
 }
 
 __m256i test_mm256_permutexvar_epi32(__m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_permutexvar_epi32
-  // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+  // CHECK: @llvm.x86.avx2.permd
   return _mm256_permutexvar_epi32(__X, __Y);
 }
 
 __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_epi32
-  // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+  // CHECK: @llvm.x86.avx2.permd
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y);
 }
 
diff --git a/test/CodeGen/avx512vlbitalg-builtins.c b/test/CodeGen/avx512vlbitalg-builtins.c
new file mode 100644
index 0000000..9b2a1a4
--- /dev/null
+++ b/test/CodeGen/avx512vlbitalg-builtins.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256i test_mm256_popcnt_epi16(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_popcnt_epi16
+  // CHECK: @llvm.ctpop.v16i16
+  return _mm256_popcnt_epi16(__A);
+}
+
+__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_popcnt_epi16
+  // CHECK: @llvm.ctpop.v16i16
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
+  return _mm256_mask_popcnt_epi16(__A, __U, __B);
+}
+__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi16
+  // CHECK: @llvm.ctpop.v16i16
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
+  return _mm256_maskz_popcnt_epi16(__U, __B);
+}
+
+__m128i test_mm128_popcnt_epi16(__m128i __A) {
+  // CHECK-LABEL: @test_mm128_popcnt_epi16
+  // CHECK: @llvm.ctpop.v8i16
+  return _mm128_popcnt_epi16(__A);
+}
+
+__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_popcnt_epi16
+  // CHECK: @llvm.ctpop.v8i16
+  // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
+  return _mm128_mask_popcnt_epi16(__A, __U, __B);
+}
+__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_popcnt_epi16
+  // CHECK: @llvm.ctpop.v8i16
+  // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
+  return _mm128_maskz_popcnt_epi16(__U, __B);
+}
+
+__m256i test_mm256_popcnt_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_popcnt_epi8
+  // CHECK: @llvm.ctpop.v32i8
+  return _mm256_popcnt_epi8(__A);
+}
+
+__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_popcnt_epi8
+  // CHECK: @llvm.ctpop.v32i8
+  // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_mask_popcnt_epi8(__A, __U, __B);
+}
+__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi8
+  // CHECK: @llvm.ctpop.v32i8
+  // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_maskz_popcnt_epi8(__U, __B);
+}
+
+__m128i test_mm128_popcnt_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm128_popcnt_epi8
+  // CHECK: @llvm.ctpop.v16i8
+  return _mm128_popcnt_epi8(__A);
+}
+
+__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_popcnt_epi8
+  // CHECK: @llvm.ctpop.v16i8
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm128_mask_popcnt_epi8(__A, __U, __B);
+}
+__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_popcnt_epi8
+  // CHECK: @llvm.ctpop.v16i8
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm128_maskz_popcnt_epi8(__U, __B);
+}
+
+__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
+  return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B);
+}
+
+__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
+  return _mm256_bitshuffle_epi32_mask(__A, __B);
+}
+
+__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
+  return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B);
+}
+
+__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask
+  // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
+  return _mm128_bitshuffle_epi16_mask(__A, __B);
+}
+
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index ff34d8f..7adc50c 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -2481,109 +2481,135 @@
 }
 __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}}
   return _mm_test_epi8_mask(__A, __B); 
 }
 
 __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi8_mask(__U, __A, __B); 
 }
 
 __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}}
   return _mm256_test_epi8_mask(__A, __B); 
 }
 
 __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_test_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestm.b.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_test_epi8_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm_test_epi16_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}}
   return _mm_test_epi16_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi16_mask(__U, __A, __B); 
 }
 
 __mmask16 test_mm256_test_epi16_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}}
   return _mm256_test_epi16_mask(__A, __B); 
 }
 
 __mmask16 test_mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_test_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestm.w.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp ne <16 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_test_epi16_mask(__U, __A, __B); 
 }
 
 __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
   return _mm_testn_epi8_mask(__A, __B); 
 }
 
 __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_testn_epi8_mask(__U, __A, __B); 
 }
 
 __mmask32 test_mm256_testn_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}}
   return _mm256_testn_epi8_mask(__A, __B); 
 }
 
 __mmask32 test_mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_testn_epi8_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.b.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_testn_epi8_mask(__U, __A, __B); 
 }
 
 __mmask8 test_mm_testn_epi16_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
   return _mm_testn_epi16_mask(__A, __B); 
 }
 
 __mmask8 test_mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_testn_epi16_mask(__U, __A, __B); 
 }
 
 __mmask16 test_mm256_testn_epi16_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}}
   return _mm256_testn_epi16_mask(__A, __B); 
 }
 
 __mmask16 test_mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_testn_epi16_mask
-  // CHECK: @llvm.x86.avx512.ptestnm.w.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm256_mask_testn_epi16_mask(__U, __A, __B); 
 }
 
 __mmask16 test_mm_movepi8_mask(__m128i __A) {
   // CHECK-LABEL: @test_mm_movepi8_mask
-  // CHECK: @llvm.x86.avx512.cvtb2mask.128
+  // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <16 x i1> [[CMP]] to i16
   return _mm_movepi8_mask(__A); 
 }
 
 __mmask32 test_mm256_movepi8_mask(__m256i __A) {
   // CHECK-LABEL: @test_mm256_movepi8_mask
-  // CHECK: @llvm.x86.avx512.cvtb2mask.256
+  // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <32 x i1> [[CMP]] to i32
   return _mm256_movepi8_mask(__A); 
 }
 
@@ -2961,13 +2987,15 @@
 }
 __mmask8 test_mm_movepi16_mask(__m128i __A) {
   // CHECK-LABEL: @test_mm_movepi16_mask
-  // CHECK: @llvm.x86.avx512.cvtw2mask.128
+  // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <8 x i1> [[CMP]] to i8
   return _mm_movepi16_mask(__A); 
 }
 
 __mmask16 test_mm256_movepi16_mask(__m256i __A) {
   // CHECK-LABEL: @test_mm256_movepi16_mask
-  // CHECK: @llvm.x86.avx512.cvtw2mask.256
+  // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <16 x i1> [[CMP]] to i16
   return _mm256_movepi16_mask(__A); 
 }
 
diff --git a/test/CodeGen/avx512vlcd-builtins.c b/test/CodeGen/avx512vlcd-builtins.c
index 643f24f..376a342 100644
--- a/test/CodeGen/avx512vlcd-builtins.c
+++ b/test/CodeGen/avx512vlcd-builtins.c
@@ -3,28 +3,56 @@
 
 #include <immintrin.h>
 
-__m128i test_mm_broadcastmb_epi64(__mmask8 __A) {
+__m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) {
   // CHECK-LABEL: @test_mm_broadcastmb_epi64
-  // CHECK: @llvm.x86.avx512.broadcastmb.128
-  return _mm_broadcastmb_epi64(__A); 
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: bitcast <8 x i1> %{{.*}} to i8
+  // CHECK: zext i8 %{{.*}} to i64
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+  return _mm_broadcastmb_epi64(_mm_cmpeq_epi32_mask (a, b)); 
 }
 
-__m256i test_mm256_broadcastmb_epi64(__mmask8 __A) {
+__m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: @test_mm256_broadcastmb_epi64
-  // CHECK: @llvm.x86.avx512.broadcastmb.256
-  return _mm256_broadcastmb_epi64(__A); 
+  // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: bitcast <8 x i1> %{{.*}} to i8
+  // CHECK: zext i8 %{{.*}} to i64
+  // CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+  // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2
+  // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
+  return _mm256_broadcastmb_epi64(_mm256_cmpeq_epi64_mask ( a, b)); 
 }
 
-__m128i test_mm_broadcastmw_epi32(__mmask16 __A) {
+__m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: @test_mm_broadcastmw_epi32
-  // CHECK: @llvm.x86.avx512.broadcastmw.128
-  return _mm_broadcastmw_epi32(__A); 
+  // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: bitcast <16 x i1> %{{.*}} to i16
+  // CHECK: zext i16 %{{.*}} to i32
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
+  return _mm_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b));
 }
 
-__m256i test_mm256_broadcastmw_epi32(__mmask16 __A) {
+__m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: @test_mm256_broadcastmw_epi32
-  // CHECK: @llvm.x86.avx512.broadcastmw.256
-  return _mm256_broadcastmw_epi32(__A); 
+  // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: bitcast <16 x i1> %{{.*}} to i16
+  // CHECK: zext i16 %{{.*}} to i32
+  // CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 2
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 3
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 4
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 6
+  // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
+  return _mm256_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); 
 }
 
 __m128i test_mm_conflict_epi64(__m128i __A) {
diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c
index 3ca4b21..0812d68 100644
--- a/test/CodeGen/avx512vldq-builtins.c
+++ b/test/CodeGen/avx512vldq-builtins.c
@@ -853,13 +853,16 @@
 
 __mmask8 test_mm_movepi32_mask(__m128i __A) {
   // CHECK-LABEL: @test_mm_movepi32_mask
-  // CHECK: @llvm.x86.avx512.cvtd2mask.128
+  // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> %{{.*}}, zeroinitializer
+  // CHECK: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: bitcast <8 x i1> [[SHUF]] to i8
   return _mm_movepi32_mask(__A); 
 }
 
 __mmask8 test_mm256_movepi32_mask(__m256i __A) {
   // CHECK-LABEL: @test_mm256_movepi32_mask
-  // CHECK: @llvm.x86.avx512.cvtd2mask.256
+  // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <8 x i1> [[CMP]] to i8
   return _mm256_movepi32_mask(__A); 
 }
 
@@ -896,13 +899,17 @@
 
 __mmask8 test_mm_movepi64_mask(__m128i __A) {
   // CHECK-LABEL: @test_mm_movepi64_mask
-  // CHECK: @llvm.x86.avx512.cvtq2mask.128
+  // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // CHECK: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  // CHECK: bitcast <8 x i1> [[SHUF]] to i8
   return _mm_movepi64_mask(__A); 
 }
 
 __mmask8 test_mm256_movepi64_mask(__m256i __A) {
   // CHECK-LABEL: @test_mm256_movepi64_mask
-  // CHECK: @llvm.x86.avx512.cvtq2mask.256
+  // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // CHECK: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: bitcast <8 x i1> [[SHUF]] to i8
   return _mm256_movepi64_mask(__A); 
 }
 
diff --git a/test/CodeGen/avx512vlvbmi2-builtins.c b/test/CodeGen/avx512vlvbmi2-builtins.c
new file mode 100644
index 0000000..6edc66d
--- /dev/null
+++ b/test/CodeGen/avx512vlvbmi2-builtins.c
@@ -0,0 +1,604 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vl -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128i test_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.128
+  return _mm128_mask_compress_epi16(__S, __U, __D);
+}
+
+__m128i test_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_maskz_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.128
+  return _mm128_maskz_compress_epi16(__U, __D);
+}
+
+__m128i test_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.128
+  return _mm128_mask_compress_epi8(__S, __U, __D);
+}
+
+__m128i test_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_maskz_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.128
+  return _mm128_maskz_compress_epi8(__U, __D);
+}
+
+void test_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_compressstoreu_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.store.w.128
+  _mm128_mask_compressstoreu_epi16(__P, __U, __D);
+}
+
+void test_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_compressstoreu_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.store.b.128
+  _mm128_mask_compressstoreu_epi8(__P, __U, __D);
+}
+
+__m128i test_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.128
+  return _mm128_mask_expand_epi16(__S, __U, __D);
+}
+
+__m128i test_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_maskz_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.128
+  return _mm128_maskz_expand_epi16(__U, __D);
+}
+
+__m128i test_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_mask_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.128
+  return _mm128_mask_expand_epi8(__S, __U, __D);
+}
+
+__m128i test_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
+  // CHECK-LABEL: @test_mm128_maskz_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.128
+  return _mm128_maskz_expand_epi8(__U, __D);
+}
+
+__m128i test_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm128_mask_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.128
+  return _mm128_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m128i test_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm128_maskz_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.128
+  return _mm128_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m128i test_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm128_mask_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.128
+  return _mm128_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m128i test_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm128_maskz_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.128
+  return _mm128_maskz_expandloadu_epi8(__U, __P);
+}
+
+__m256i test_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.256
+  return _mm256_mask_compress_epi16(__S, __U, __D);
+}
+
+__m256i test_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.w.256
+  return _mm256_maskz_compress_epi16(__U, __D);
+}
+
+__m256i test_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.256
+  return _mm256_mask_compress_epi8(__S, __U, __D);
+}
+
+__m256i test_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.b.256
+  return _mm256_maskz_compress_epi8(__U, __D);
+}
+
+void test_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi16
+  // CHECK: @llvm.x86.avx512.mask.compress.store.w.256
+  _mm256_mask_compressstoreu_epi16(__P, __U, __D);
+}
+
+void test_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi8
+  // CHECK: @llvm.x86.avx512.mask.compress.store.b.256
+  _mm256_mask_compressstoreu_epi8(__P, __U, __D);
+}
+
+__m256i test_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.256
+  return _mm256_mask_expand_epi16(__S, __U, __D);
+}
+
+__m256i test_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.w.256
+  return _mm256_maskz_expand_epi16(__U, __D);
+}
+
+__m256i test_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.256
+  return _mm256_mask_expand_epi8(__S, __U, __D);
+}
+
+__m256i test_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.b.256
+  return _mm256_maskz_expand_epi8(__U, __D);
+}
+
+__m256i test_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.256
+  return _mm256_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m256i test_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi16
+  // CHECK: @llvm.x86.avx512.mask.expand.load.w.256
+  return _mm256_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m256i test_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.256
+  return _mm256_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi8
+  // CHECK: @llvm.x86.avx512.mask.expand.load.b.256
+  return _mm256_maskz_expandloadu_epi8(__U, __P);
+}
+
+__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.256
+  return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.256
+  return _mm256_maskz_shldi_epi64(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.256
+  return _mm256_shldi_epi64(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.128
+  return _mm128_mask_shldi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.128
+  return _mm128_maskz_shldi_epi64(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shldi_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshld.q.128
+  return _mm128_shldi_epi64(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.256
+  return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.256
+  return _mm256_maskz_shldi_epi32(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.256
+  return _mm256_shldi_epi32(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.128
+  return _mm128_mask_shldi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.128
+  return _mm128_maskz_shldi_epi32(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shldi_epi32(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshld.d.128
+  return _mm128_shldi_epi32(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.256
+  return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.256
+  return _mm256_maskz_shldi_epi16(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.256
+  return _mm256_shldi_epi16(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.128
+  return _mm128_mask_shldi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.128
+  return _mm128_maskz_shldi_epi16(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shldi_epi16(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshld.w.128
+  return _mm128_shldi_epi16(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256
+  return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256
+  return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256
+  return _mm256_shrdi_epi64(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128
+  return _mm128_mask_shrdi_epi64(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128
+  return _mm128_maskz_shrdi_epi64(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shrdi_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdi_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128
+  return _mm128_shrdi_epi64(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256
+  return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256
+  return _mm256_maskz_shrdi_epi32(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256
+  return _mm256_shrdi_epi32(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128
+  return _mm128_mask_shrdi_epi32(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128
+  return _mm128_maskz_shrdi_epi32(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shrdi_epi32(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdi_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128
+  return _mm128_shrdi_epi32(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256
+  return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256
+  return _mm256_maskz_shrdi_epi16(__U, __A, __B, 63);
+}
+
+__m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256
+  return _mm256_shrdi_epi16(__A, __B, 31);
+}
+
+__m128i test_mm128_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128
+  return _mm128_mask_shrdi_epi16(__S, __U, __A, __B, 127);
+}
+
+__m128i test_mm128_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128
+  return _mm128_maskz_shrdi_epi16(__U, __A, __B, 63);
+}
+
+__m128i test_mm128_shrdi_epi16(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdi_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128
+  return _mm128_shrdi_epi16(__A, __B, 31);
+}
+
+__m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.256
+  return _mm256_mask_shldv_epi64(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.256
+  return _mm256_maskz_shldv_epi64(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.256
+  return _mm256_shldv_epi64(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.128
+  return _mm128_mask_shldv_epi64(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.128
+  return _mm128_maskz_shldv_epi64(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.q.128
+  return _mm128_shldv_epi64(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.256
+  return _mm256_mask_shldv_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.256
+  return _mm256_maskz_shldv_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.256
+  return _mm256_shldv_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.128
+  return _mm128_mask_shldv_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.128
+  return _mm128_maskz_shldv_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.d.128
+  return _mm128_shldv_epi32(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.256
+  return _mm256_mask_shldv_epi16(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shldv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.256
+  return _mm256_maskz_shldv_epi16(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.256
+  return _mm256_shldv_epi16(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.128
+  return _mm128_mask_shldv_epi16(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shldv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.128
+  return _mm128_maskz_shldv_epi16(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shldv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshldv.w.128
+  return _mm128_shldv_epi16(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.256
+  return _mm256_mask_shrdv_epi64(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.256
+  return _mm256_maskz_shrdv_epi64(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.256
+  return _mm256_shrdv_epi64(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.128
+  return _mm128_mask_shrdv_epi64(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.128
+  return _mm128_maskz_shrdv_epi64(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdv_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.128
+  return _mm128_shrdv_epi64(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.256
+  return _mm256_mask_shrdv_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.256
+  return _mm256_maskz_shrdv_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.256
+  return _mm256_shrdv_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.128
+  return _mm128_mask_shrdv_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.128
+  return _mm128_maskz_shrdv_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdv_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.128
+  return _mm128_shrdv_epi32(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.256
+  return _mm256_mask_shrdv_epi16(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.256
+  return _mm256_maskz_shrdv_epi16(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.256
+  return _mm256_shrdv_epi16(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.128
+  return _mm128_mask_shrdv_epi16(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.128
+  return _mm128_maskz_shrdv_epi16(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_shrdv_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.128
+  return _mm128_shrdv_epi16(__S, __A, __B);
+}
+
diff --git a/test/CodeGen/avx512vlvnni-builtins.c b/test/CodeGen/avx512vlvnni-builtins.c
new file mode 100644
index 0000000..861b915
--- /dev/null
+++ b/test/CodeGen/avx512vlvnni-builtins.c
@@ -0,0 +1,148 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
+  return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.256
+  return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
+  return _mm256_dpbusd_epi32(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
+  return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.256
+  return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
+  return _mm256_dpbusds_epi32(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
+  return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.256
+  return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
+  return _mm256_dpwssd_epi32(__S, __A, __B);
+}
+
+__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
+  return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
+}
+
+__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.256
+  return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
+}
+
+__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
+  return _mm256_dpwssds_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
+  return _mm128_mask_dpbusd_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.128
+  return _mm128_maskz_dpbusd_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
+  return _mm128_dpbusd_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
+  return _mm128_mask_dpbusds_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.128
+  return _mm128_maskz_dpbusds_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
+  return _mm128_dpbusds_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
+  return _mm128_mask_dpwssd_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.128
+  return _mm128_maskz_dpwssd_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
+  return _mm128_dpwssd_epi32(__S, __A, __B);
+}
+
+__m128i test_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_mask_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
+  return _mm128_mask_dpwssds_epi32(__S, __U, __A, __B);
+}
+
+__m128i test_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_maskz_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.128
+  return _mm128_maskz_dpwssds_epi32(__U, __S, __A, __B);
+}
+
+__m128i test_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm128_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
+  return _mm128_dpwssds_epi32(__S, __A, __B);
+}
+
diff --git a/test/CodeGen/avx512vnni-builtins.c b/test/CodeGen/avx512vnni-builtins.c
new file mode 100644
index 0000000..d79046a
--- /dev/null
+++ b/test/CodeGen/avx512vnni-builtins.c
@@ -0,0 +1,76 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
+  return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.512
+  return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_dpbusd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
+  return _mm512_dpbusd_epi32(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
+  return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.512
+  return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_dpbusds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
+  return _mm512_dpbusds_epi32(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
+  return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.512
+  return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_dpwssd_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
+  return _mm512_dpwssd_epi32(__S, __A, __B);
+}
+
+__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
+  return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
+}
+
+__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.512
+  return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
+}
+
+__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_dpwssds_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
+  return _mm512_dpwssds_epi32(__S, __A, __B);
+}
+
diff --git a/test/CodeGen/avx512vpopcntdqvlintrin.c b/test/CodeGen/avx512vpopcntdqvlintrin.c
new file mode 100644
index 0000000..010cb6b
--- /dev/null
+++ b/test/CodeGen/avx512vpopcntdqvlintrin.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128i test_mm_popcnt_epi64(__m128i __A) {
+  // CHECK-LABEL: @test_mm_popcnt_epi64
+  // CHECK: @llvm.ctpop.v2i64
+  return _mm_popcnt_epi64(__A);
+}
+__m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_popcnt_epi64
+  // CHECK: @llvm.ctpop.v2i64
+  // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{[0-9]+}}, <2 x i64> {{.*}}
+  return _mm_mask_popcnt_epi64(__W, __U, __A);
+}
+__m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_popcnt_epi64
+  // CHECK: @llvm.ctpop.v2i64
+  // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{[0-9]+}}, <2 x i64> {{.*}}
+  return _mm_maskz_popcnt_epi64(__U, __A);
+}
+__m128i test_mm_popcnt_epi32(__m128i __A) {
+  // CHECK-LABEL: @test_mm_popcnt_epi32
+  // CHECK: @llvm.ctpop.v4i32
+  return _mm_popcnt_epi32(__A);
+}
+__m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_popcnt_epi32
+  // CHECK: @llvm.ctpop.v4i32
+  // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> {{.*}}
+  return _mm_mask_popcnt_epi32(__W, __U, __A);
+}
+__m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_popcnt_epi32
+  // CHECK: @llvm.ctpop.v4i32
+  // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> {{.*}}
+  return _mm_maskz_popcnt_epi32(__U, __A);
+}
+
+__m256i test_mm256_popcnt_epi64(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_popcnt_epi64
+  // CHECK: @llvm.ctpop.v4i64
+  return _mm256_popcnt_epi64(__A);
+}
+__m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_popcnt_epi64
+  // CHECK: @llvm.ctpop.v4i64
+  // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{[0-9]+}}, <4 x i64> {{.*}}
+  return _mm256_mask_popcnt_epi64(__W, __U, __A);
+}
+__m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi64
+  // CHECK: @llvm.ctpop.v4i64
+  // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{[0-9]+}}, <4 x i64> {{.*}}
+  return _mm256_maskz_popcnt_epi64(__U, __A);
+}
+__m256i test_mm256_popcnt_epi32(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_popcnt_epi32
+  // CHECK: @llvm.ctpop.v8i32
+  return _mm256_popcnt_epi32(__A);
+}
+__m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_popcnt_epi32
+  // CHECK: @llvm.ctpop.v8i32
+  // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{[0-9]+}}, <8 x i32> {{.*}}
+  return _mm256_mask_popcnt_epi32(__W, __U, __A);
+}
+__m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi32
+  // CHECK: @llvm.ctpop.v8i32
+  // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{[0-9]+}}, <8 x i32> {{.*}}
+  return _mm256_maskz_popcnt_epi32(__U, __A);
+}
diff --git a/test/CodeGen/block-byref-aggr.c b/test/CodeGen/block-byref-aggr.c
index 7d146a2..962f100 100644
--- a/test/CodeGen/block-byref-aggr.c
+++ b/test/CodeGen/block-byref-aggr.c
@@ -24,7 +24,7 @@
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false)
 //   Verify that there's nothing else significant in the function.
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[BYREF]]* [[A]] to i8*
 // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8)
@@ -50,14 +50,14 @@
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false)
 //   Then for 'a':
 // CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[A]], i32 0, i32 1
 // CHECK-NEXT: [[T0:%.*]] = load [[A_BYREF]]*, [[A_BYREF]]** [[A_FORWARDING]]
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false)
 //   Verify that there's nothing else significant in the function.
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[B_BYREF]]* [[B]] to i8*
 // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8)
diff --git a/test/CodeGen/blocks-windows.c b/test/CodeGen/blocks-windows.c
index ced00ef..546c0bf 100644
--- a/test/CodeGen/blocks-windows.c
+++ b/test/CodeGen/blocks-windows.c
@@ -68,7 +68,7 @@
 }
 
 // CHECK-BLOCKS-IN-BLOCKS-DECL: @_NSConcreteStackBlock = external dllexport global i8*
-// CHECK-BLOCKS-IN-BLOCKS-DEFN: @_NSConcreteStackBlock = common dllexport global [5 x i32]
+// CHECK-BLOCKS-IN-BLOCKS-DEFN: @_NSConcreteStackBlock = common dso_local dllexport global [5 x i32]
 // CHECK-BLOCKS-NOT-IN-BLOCKS: @_NSConcreteStackBlock = external dllimport global i8*
 // CHECK-BLOCKS-NOT-IN-BLOCKS-EXTERN: @_NSConcreteStackBlock = external dllimport global i8*
 // CHECK-BLOCKS-NOT-IN-BLOCKS-EXTERN-DLLIMPORT: @_NSConcreteStackBlock = external dllimport global i8*
diff --git a/test/CodeGen/bounds-checking.c b/test/CodeGen/bounds-checking.c
index 90b7f0f..2e6a086 100644
--- a/test/CodeGen/bounds-checking.c
+++ b/test/CodeGen/bounds-checking.c
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -fexperimental-new-pass-manager -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -fexperimental-new-pass-manager -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+//
+// REQUIRES: x86-registered-target
 
 // CHECK-LABEL: @f
 double f(int b, int i) {
diff --git a/test/CodeGen/builtin-memfns.c b/test/CodeGen/builtin-memfns.c
index 4a06160..d93a5aa 100644
--- a/test/CodeGen/builtin-memfns.c
+++ b/test/CodeGen/builtin-memfns.c
@@ -54,14 +54,14 @@
 int test7(int *p) {
   struct snd_pcm_hw_params_t* hwparams;  // incomplete type.
   
-  // CHECK: call void @llvm.memset{{.*}}256, i32 4, i1 false)
+  // CHECK: call void @llvm.memset{{.*}} align 4 {{.*}}256, i1 false)
   __builtin_memset(p, 0, 256);  // Should be alignment = 4
 
-  // CHECK: call void @llvm.memset{{.*}}256, i32 1, i1 false)
+  // CHECK: call void @llvm.memset{{.*}} align 1 {{.*}}256, i1 false)
   __builtin_memset((char*)p, 0, 256);  // Should be alignment = 1
 
   __builtin_memset(hwparams, 0, 256);  // No crash alignment = 1
-  // CHECK: call void @llvm.memset{{.*}}256, i32 1, i1 false)
+  // CHECK: call void @llvm.memset{{.*}} align 1{{.*}}256, i1 false)
 }
 
 // <rdar://problem/11314941>
@@ -73,13 +73,13 @@
 struct PS ps;
 void test8(int *arg) {
   // CHECK: @test8
-  // CHECK: call void @llvm.memcpy{{.*}} 16, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}} align 4 {{.*}} align 1 {{.*}} 16, i1 false)
   __builtin_memcpy(arg, ps.modes, sizeof(struct PS));
 }
 
 __attribute((aligned(16))) int x[4], y[4];
 void test9() {
   // CHECK: @test9
-  // CHECK: call void @llvm.memcpy{{.*}} 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false)
   __builtin_memcpy(x, y, sizeof(y));
 }
diff --git a/test/CodeGen/builtin-sqrt.c b/test/CodeGen/builtin-sqrt.c
new file mode 100644
index 0000000..5b275bf
--- /dev/null
+++ b/test/CodeGen/builtin-sqrt.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fmath-errno -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s --check-prefix=HAS_ERRNO
+// RUN: %clang_cc1              -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s --check-prefix=NO_ERRNO
+
+float foo(float X) {
+  // HAS_ERRNO: call float @sqrtf(float
+  // NO_ERRNO: call float @llvm.sqrt.f32(float
+  return __builtin_sqrtf(X);
+}
+
+// HAS_ERRNO: declare float @sqrtf(float) [[ATTR:#[0-9]+]]
+// HAS_ERRNO-NOT: attributes [[ATTR]] = {{{.*}} readnone
+
+// NO_ERRNO: declare float @llvm.sqrt.f32(float) [[ATTR:#[0-9]+]]
+// NO_ERRNO: attributes [[ATTR]] = { nounwind readnone {{.*}}}
+
diff --git a/test/CodeGen/builtins-arm.c b/test/CodeGen/builtins-arm.c
index e04349f..020f2b4 100644
--- a/test/CodeGen/builtins-arm.c
+++ b/test/CodeGen/builtins-arm.c
@@ -8,69 +8,85 @@
 }
 
 void f1(char *a, char *b) {
+  // CHECK: call {{.*}} @__clear_cache
 	__clear_cache(a,b);
 }
 
-// CHECK: call {{.*}} @__clear_cache
+float test_vcvtrf0(float f) {
+  // CHECK: call float @llvm.arm.vcvtr.f32(float %f)
+  return __builtin_arm_vcvtr_f(f, 0);
+}
+
+float test_vcvtrf1(float f) {
+  // CHECK: call float @llvm.arm.vcvtru.f32(float %f)
+  return __builtin_arm_vcvtr_f(f, 1);
+}
+
+double test_vcvtrd0(double d) {
+  // CHECK: call float @llvm.arm.vcvtr.f64(double %d)
+  return __builtin_arm_vcvtr_d(d, 0);
+}
+
+double test_vcvtrd1(double d) {
+  // call float @llvm.arm.vcvtru.f64(double %d)
+  return __builtin_arm_vcvtr_d(d, 1);
+}
 
 void test_eh_return_data_regno()
 {
+  // CHECK: store volatile i32 0
+  // CHECK: store volatile i32 1
   volatile int res;
-  res = __builtin_eh_return_data_regno(0);  // CHECK: store volatile i32 0
-  res = __builtin_eh_return_data_regno(1);  // CHECK: store volatile i32 1
+  res = __builtin_eh_return_data_regno(0);
+  res = __builtin_eh_return_data_regno(1);
 }
 
 void nop() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 0)
   __builtin_arm_nop();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 0)
-
 void yield() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 1)
   __builtin_arm_yield();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 1)
-
 void wfe() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 2)
   __builtin_arm_wfe();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 2)
-
 void wfi() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 3)
   __builtin_arm_wfi();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 3)
-
 void sev() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 4)
   __builtin_arm_sev();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 4)
-
 void sevl() {
+  // CHECK: call {{.*}} @llvm.arm.hint(i32 5)
   __builtin_arm_sevl();
 }
 
-// CHECK: call {{.*}} @llvm.arm.hint(i32 5)
-
 void dbg() {
+  // CHECK: call {{.*}} @llvm.arm.dbg(i32 0)
   __builtin_arm_dbg(0);
 }
 
-// CHECK: call {{.*}} @llvm.arm.dbg(i32 0)
-
 void test_barrier() {
-  __builtin_arm_dmb(1); //CHECK: call {{.*}} @llvm.arm.dmb(i32 1)
-  __builtin_arm_dsb(2); //CHECK: call {{.*}} @llvm.arm.dsb(i32 2)
-  __builtin_arm_isb(3); //CHECK: call {{.*}} @llvm.arm.isb(i32 3)
+  //CHECK: call {{.*}} @llvm.arm.dmb(i32 1)
+  //CHECK: call {{.*}} @llvm.arm.dsb(i32 2)
+  //CHECK: call {{.*}} @llvm.arm.isb(i32 3)
+  __builtin_arm_dmb(1);
+  __builtin_arm_dsb(2);
+  __builtin_arm_isb(3);
 }
 
-// CHECK: call {{.*}} @llvm.bitreverse.i32(i32 %a)
-
 unsigned rbit(unsigned a) {
+  // CHECK: call {{.*}} @llvm.bitreverse.i32(i32 %a)
   return __builtin_arm_rbit(a);
 }
 
diff --git a/test/CodeGen/builtins-hexagon.c b/test/CodeGen/builtins-hexagon.c
index f9f5d49..22835a2 100644
--- a/test/CodeGen/builtins-hexagon.c
+++ b/test/CodeGen/builtins-hexagon.c
@@ -1,2977 +1,3371 @@
 // REQUIRES: hexagon-registered-target
 // RUN: %clang_cc1 -triple hexagon-unknown-elf -emit-llvm %s -o - | FileCheck %s
 
-void foo() {
-  int v16 __attribute__((__vector_size__(64)));
-  int v32 __attribute__((__vector_size__(128)));
-  int v64 __attribute__((__vector_size__(256)));
+void test() {
+  int v64 __attribute__((__vector_size__(64)));
+  int v128 __attribute__((__vector_size__(128)));
+  int v256 __attribute__((__vector_size__(256)));
 
-  // The circ/brev intrinsics do not have _HEXAGON_ in the name.
-  __builtin_brev_ldb(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.ldb
-  __builtin_brev_ldd(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.ldd
-  __builtin_brev_ldh(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.ldh
-  __builtin_brev_ldub(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.ldub
-  __builtin_brev_lduh(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.lduh
-  __builtin_brev_ldw(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.ldw
-  __builtin_brev_stb(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.stb
-  __builtin_brev_std(0, 0LL, 0);
-  // CHECK: @llvm.hexagon.brev.std
-  __builtin_brev_sth(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.sth
-  __builtin_brev_sthhi(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.sthhi
-  __builtin_brev_stw(0, 0, 0);
-  // CHECK: @llvm.hexagon.brev.stw
-  __builtin_circ_ldb(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.ldb
-  __builtin_circ_ldd(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.ldd
-  __builtin_circ_ldh(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.ldh
-  __builtin_circ_ldub(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.ldub
-  __builtin_circ_lduh(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.lduh
-  __builtin_circ_ldw(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.ldw
-  __builtin_circ_stb(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.stb
-  __builtin_circ_std(0, 0LL, 0, 0);
-  // CHECK: llvm.hexagon.circ.std
-  __builtin_circ_sth(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.sth
-  __builtin_circ_sthhi(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.sthhi
-  __builtin_circ_stw(0, 0, 0, 0);
-  // CHECK: llvm.hexagon.circ.stw
-
-  __builtin_HEXAGON_A2_abs(0);
   // CHECK: @llvm.hexagon.A2.abs
-  __builtin_HEXAGON_A2_absp(0);
+  __builtin_HEXAGON_A2_abs(0);
   // CHECK: @llvm.hexagon.A2.absp
-  __builtin_HEXAGON_A2_abssat(0);
+  __builtin_HEXAGON_A2_absp(0);
   // CHECK: @llvm.hexagon.A2.abssat
-  __builtin_HEXAGON_A2_add(0, 0);
+  __builtin_HEXAGON_A2_abssat(0);
   // CHECK: @llvm.hexagon.A2.add
-  __builtin_HEXAGON_A2_addh_h16_hh(0, 0);
+  __builtin_HEXAGON_A2_add(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.hh
-  __builtin_HEXAGON_A2_addh_h16_hl(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_hh(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.hl
-  __builtin_HEXAGON_A2_addh_h16_lh(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.lh
-  __builtin_HEXAGON_A2_addh_h16_ll(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_lh(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.ll
-  __builtin_HEXAGON_A2_addh_h16_sat_hh(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.sat.hh
-  __builtin_HEXAGON_A2_addh_h16_sat_hl(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_sat_hh(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.sat.hl
-  __builtin_HEXAGON_A2_addh_h16_sat_lh(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_sat_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.sat.lh
-  __builtin_HEXAGON_A2_addh_h16_sat_ll(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_sat_lh(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.h16.sat.ll
-  __builtin_HEXAGON_A2_addh_l16_hl(0, 0);
+  __builtin_HEXAGON_A2_addh_h16_sat_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.l16.hl
-  __builtin_HEXAGON_A2_addh_l16_ll(0, 0);
+  __builtin_HEXAGON_A2_addh_l16_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.l16.ll
-  __builtin_HEXAGON_A2_addh_l16_sat_hl(0, 0);
+  __builtin_HEXAGON_A2_addh_l16_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.l16.sat.hl
-  __builtin_HEXAGON_A2_addh_l16_sat_ll(0, 0);
+  __builtin_HEXAGON_A2_addh_l16_sat_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.addh.l16.sat.ll
-  __builtin_HEXAGON_A2_addi(0, 0);
+  __builtin_HEXAGON_A2_addh_l16_sat_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.addi
-  __builtin_HEXAGON_A2_addp(0, 0);
+  __builtin_HEXAGON_A2_addi(0, 0);
   // CHECK: @llvm.hexagon.A2.addp
-  __builtin_HEXAGON_A2_addpsat(0, 0);
+  __builtin_HEXAGON_A2_addp(0, 0);
   // CHECK: @llvm.hexagon.A2.addpsat
-  __builtin_HEXAGON_A2_addsat(0, 0);
+  __builtin_HEXAGON_A2_addpsat(0, 0);
   // CHECK: @llvm.hexagon.A2.addsat
-  __builtin_HEXAGON_A2_addsp(0, 0);
+  __builtin_HEXAGON_A2_addsat(0, 0);
   // CHECK: @llvm.hexagon.A2.addsp
-  __builtin_HEXAGON_A2_and(0, 0);
+  __builtin_HEXAGON_A2_addsp(0, 0);
   // CHECK: @llvm.hexagon.A2.and
-  __builtin_HEXAGON_A2_andir(0, 0);
+  __builtin_HEXAGON_A2_and(0, 0);
   // CHECK: @llvm.hexagon.A2.andir
-  __builtin_HEXAGON_A2_andp(0, 0);
+  __builtin_HEXAGON_A2_andir(0, 0);
   // CHECK: @llvm.hexagon.A2.andp
-  __builtin_HEXAGON_A2_aslh(0);
+  __builtin_HEXAGON_A2_andp(0, 0);
   // CHECK: @llvm.hexagon.A2.aslh
-  __builtin_HEXAGON_A2_asrh(0);
+  __builtin_HEXAGON_A2_aslh(0);
   // CHECK: @llvm.hexagon.A2.asrh
-  __builtin_HEXAGON_A2_combine_hh(0, 0);
+  __builtin_HEXAGON_A2_asrh(0);
   // CHECK: @llvm.hexagon.A2.combine.hh
-  __builtin_HEXAGON_A2_combine_hl(0, 0);
+  __builtin_HEXAGON_A2_combine_hh(0, 0);
   // CHECK: @llvm.hexagon.A2.combine.hl
-  __builtin_HEXAGON_A2_combineii(0, 0);
-  // CHECK: @llvm.hexagon.A2.combineii
-  __builtin_HEXAGON_A2_combine_lh(0, 0);
+  __builtin_HEXAGON_A2_combine_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.combine.lh
-  __builtin_HEXAGON_A2_combine_ll(0, 0);
+  __builtin_HEXAGON_A2_combine_lh(0, 0);
   // CHECK: @llvm.hexagon.A2.combine.ll
-  __builtin_HEXAGON_A2_combinew(0, 0);
+  __builtin_HEXAGON_A2_combine_ll(0, 0);
+  // CHECK: @llvm.hexagon.A2.combineii
+  __builtin_HEXAGON_A2_combineii(0, 0);
   // CHECK: @llvm.hexagon.A2.combinew
-  __builtin_HEXAGON_A2_max(0, 0);
+  __builtin_HEXAGON_A2_combinew(0, 0);
   // CHECK: @llvm.hexagon.A2.max
-  __builtin_HEXAGON_A2_maxp(0, 0);
+  __builtin_HEXAGON_A2_max(0, 0);
   // CHECK: @llvm.hexagon.A2.maxp
-  __builtin_HEXAGON_A2_maxu(0, 0);
+  __builtin_HEXAGON_A2_maxp(0, 0);
   // CHECK: @llvm.hexagon.A2.maxu
-  __builtin_HEXAGON_A2_maxup(0, 0);
+  __builtin_HEXAGON_A2_maxu(0, 0);
   // CHECK: @llvm.hexagon.A2.maxup
-  __builtin_HEXAGON_A2_min(0, 0);
+  __builtin_HEXAGON_A2_maxup(0, 0);
   // CHECK: @llvm.hexagon.A2.min
-  __builtin_HEXAGON_A2_minp(0, 0);
+  __builtin_HEXAGON_A2_min(0, 0);
   // CHECK: @llvm.hexagon.A2.minp
-  __builtin_HEXAGON_A2_minu(0, 0);
+  __builtin_HEXAGON_A2_minp(0, 0);
   // CHECK: @llvm.hexagon.A2.minu
-  __builtin_HEXAGON_A2_minup(0, 0);
+  __builtin_HEXAGON_A2_minu(0, 0);
   // CHECK: @llvm.hexagon.A2.minup
-  __builtin_HEXAGON_A2_neg(0);
+  __builtin_HEXAGON_A2_minup(0, 0);
   // CHECK: @llvm.hexagon.A2.neg
-  __builtin_HEXAGON_A2_negp(0);
+  __builtin_HEXAGON_A2_neg(0);
   // CHECK: @llvm.hexagon.A2.negp
-  __builtin_HEXAGON_A2_negsat(0);
+  __builtin_HEXAGON_A2_negp(0);
   // CHECK: @llvm.hexagon.A2.negsat
-  __builtin_HEXAGON_A2_not(0);
+  __builtin_HEXAGON_A2_negsat(0);
   // CHECK: @llvm.hexagon.A2.not
-  __builtin_HEXAGON_A2_notp(0);
+  __builtin_HEXAGON_A2_not(0);
   // CHECK: @llvm.hexagon.A2.notp
-  __builtin_HEXAGON_A2_or(0, 0);
+  __builtin_HEXAGON_A2_notp(0);
   // CHECK: @llvm.hexagon.A2.or
-  __builtin_HEXAGON_A2_orir(0, 0);
+  __builtin_HEXAGON_A2_or(0, 0);
   // CHECK: @llvm.hexagon.A2.orir
-  __builtin_HEXAGON_A2_orp(0, 0);
+  __builtin_HEXAGON_A2_orir(0, 0);
   // CHECK: @llvm.hexagon.A2.orp
-  __builtin_HEXAGON_A2_roundsat(0);
+  __builtin_HEXAGON_A2_orp(0, 0);
   // CHECK: @llvm.hexagon.A2.roundsat
-  __builtin_HEXAGON_A2_sat(0);
+  __builtin_HEXAGON_A2_roundsat(0);
   // CHECK: @llvm.hexagon.A2.sat
-  __builtin_HEXAGON_A2_satb(0);
+  __builtin_HEXAGON_A2_sat(0);
   // CHECK: @llvm.hexagon.A2.satb
-  __builtin_HEXAGON_A2_sath(0);
+  __builtin_HEXAGON_A2_satb(0);
   // CHECK: @llvm.hexagon.A2.sath
-  __builtin_HEXAGON_A2_satub(0);
+  __builtin_HEXAGON_A2_sath(0);
   // CHECK: @llvm.hexagon.A2.satub
-  __builtin_HEXAGON_A2_satuh(0);
+  __builtin_HEXAGON_A2_satub(0);
   // CHECK: @llvm.hexagon.A2.satuh
-  __builtin_HEXAGON_A2_sub(0, 0);
+  __builtin_HEXAGON_A2_satuh(0);
   // CHECK: @llvm.hexagon.A2.sub
-  __builtin_HEXAGON_A2_subh_h16_hh(0, 0);
+  __builtin_HEXAGON_A2_sub(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.hh
-  __builtin_HEXAGON_A2_subh_h16_hl(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_hh(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.hl
-  __builtin_HEXAGON_A2_subh_h16_lh(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.lh
-  __builtin_HEXAGON_A2_subh_h16_ll(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_lh(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.ll
-  __builtin_HEXAGON_A2_subh_h16_sat_hh(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.sat.hh
-  __builtin_HEXAGON_A2_subh_h16_sat_hl(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_sat_hh(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.sat.hl
-  __builtin_HEXAGON_A2_subh_h16_sat_lh(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_sat_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.sat.lh
-  __builtin_HEXAGON_A2_subh_h16_sat_ll(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_sat_lh(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.h16.sat.ll
-  __builtin_HEXAGON_A2_subh_l16_hl(0, 0);
+  __builtin_HEXAGON_A2_subh_h16_sat_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.l16.hl
-  __builtin_HEXAGON_A2_subh_l16_ll(0, 0);
+  __builtin_HEXAGON_A2_subh_l16_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.l16.ll
-  __builtin_HEXAGON_A2_subh_l16_sat_hl(0, 0);
+  __builtin_HEXAGON_A2_subh_l16_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.l16.sat.hl
-  __builtin_HEXAGON_A2_subh_l16_sat_ll(0, 0);
+  __builtin_HEXAGON_A2_subh_l16_sat_hl(0, 0);
   // CHECK: @llvm.hexagon.A2.subh.l16.sat.ll
-  __builtin_HEXAGON_A2_subp(0, 0);
+  __builtin_HEXAGON_A2_subh_l16_sat_ll(0, 0);
   // CHECK: @llvm.hexagon.A2.subp
-  __builtin_HEXAGON_A2_subri(0, 0);
+  __builtin_HEXAGON_A2_subp(0, 0);
   // CHECK: @llvm.hexagon.A2.subri
-  __builtin_HEXAGON_A2_subsat(0, 0);
+  __builtin_HEXAGON_A2_subri(0, 0);
   // CHECK: @llvm.hexagon.A2.subsat
-  __builtin_HEXAGON_A2_svaddh(0, 0);
+  __builtin_HEXAGON_A2_subsat(0, 0);
   // CHECK: @llvm.hexagon.A2.svaddh
-  __builtin_HEXAGON_A2_svaddhs(0, 0);
+  __builtin_HEXAGON_A2_svaddh(0, 0);
   // CHECK: @llvm.hexagon.A2.svaddhs
-  __builtin_HEXAGON_A2_svadduhs(0, 0);
+  __builtin_HEXAGON_A2_svaddhs(0, 0);
   // CHECK: @llvm.hexagon.A2.svadduhs
-  __builtin_HEXAGON_A2_svavgh(0, 0);
+  __builtin_HEXAGON_A2_svadduhs(0, 0);
   // CHECK: @llvm.hexagon.A2.svavgh
-  __builtin_HEXAGON_A2_svavghs(0, 0);
+  __builtin_HEXAGON_A2_svavgh(0, 0);
   // CHECK: @llvm.hexagon.A2.svavghs
-  __builtin_HEXAGON_A2_svnavgh(0, 0);
+  __builtin_HEXAGON_A2_svavghs(0, 0);
   // CHECK: @llvm.hexagon.A2.svnavgh
-  __builtin_HEXAGON_A2_svsubh(0, 0);
+  __builtin_HEXAGON_A2_svnavgh(0, 0);
   // CHECK: @llvm.hexagon.A2.svsubh
-  __builtin_HEXAGON_A2_svsubhs(0, 0);
+  __builtin_HEXAGON_A2_svsubh(0, 0);
   // CHECK: @llvm.hexagon.A2.svsubhs
-  __builtin_HEXAGON_A2_svsubuhs(0, 0);
+  __builtin_HEXAGON_A2_svsubhs(0, 0);
   // CHECK: @llvm.hexagon.A2.svsubuhs
-  __builtin_HEXAGON_A2_swiz(0);
+  __builtin_HEXAGON_A2_svsubuhs(0, 0);
   // CHECK: @llvm.hexagon.A2.swiz
-  __builtin_HEXAGON_A2_sxtb(0);
+  __builtin_HEXAGON_A2_swiz(0);
   // CHECK: @llvm.hexagon.A2.sxtb
-  __builtin_HEXAGON_A2_sxth(0);
+  __builtin_HEXAGON_A2_sxtb(0);
   // CHECK: @llvm.hexagon.A2.sxth
-  __builtin_HEXAGON_A2_sxtw(0);
+  __builtin_HEXAGON_A2_sxth(0);
   // CHECK: @llvm.hexagon.A2.sxtw
-  __builtin_HEXAGON_A2_tfr(0);
+  __builtin_HEXAGON_A2_sxtw(0);
   // CHECK: @llvm.hexagon.A2.tfr
-  __builtin_HEXAGON_A2_tfrih(0, 0);
+  __builtin_HEXAGON_A2_tfr(0);
   // CHECK: @llvm.hexagon.A2.tfrih
-  __builtin_HEXAGON_A2_tfril(0, 0);
+  __builtin_HEXAGON_A2_tfrih(0, 0);
   // CHECK: @llvm.hexagon.A2.tfril
-  __builtin_HEXAGON_A2_tfrp(0);
+  __builtin_HEXAGON_A2_tfril(0, 0);
   // CHECK: @llvm.hexagon.A2.tfrp
-  __builtin_HEXAGON_A2_tfrpi(0);
+  __builtin_HEXAGON_A2_tfrp(0);
   // CHECK: @llvm.hexagon.A2.tfrpi
-  __builtin_HEXAGON_A2_tfrsi(0);
+  __builtin_HEXAGON_A2_tfrpi(0);
   // CHECK: @llvm.hexagon.A2.tfrsi
-  __builtin_HEXAGON_A2_vabsh(0);
+  __builtin_HEXAGON_A2_tfrsi(0);
   // CHECK: @llvm.hexagon.A2.vabsh
-  __builtin_HEXAGON_A2_vabshsat(0);
+  __builtin_HEXAGON_A2_vabsh(0);
   // CHECK: @llvm.hexagon.A2.vabshsat
-  __builtin_HEXAGON_A2_vabsw(0);
+  __builtin_HEXAGON_A2_vabshsat(0);
   // CHECK: @llvm.hexagon.A2.vabsw
-  __builtin_HEXAGON_A2_vabswsat(0);
+  __builtin_HEXAGON_A2_vabsw(0);
   // CHECK: @llvm.hexagon.A2.vabswsat
-  __builtin_HEXAGON_A2_vaddb_map(0, 0);
+  __builtin_HEXAGON_A2_vabswsat(0);
   // CHECK: @llvm.hexagon.A2.vaddb.map
-  __builtin_HEXAGON_A2_vaddh(0, 0);
+  __builtin_HEXAGON_A2_vaddb_map(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddh
-  __builtin_HEXAGON_A2_vaddhs(0, 0);
+  __builtin_HEXAGON_A2_vaddh(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddhs
-  __builtin_HEXAGON_A2_vaddub(0, 0);
+  __builtin_HEXAGON_A2_vaddhs(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddub
-  __builtin_HEXAGON_A2_vaddubs(0, 0);
+  __builtin_HEXAGON_A2_vaddub(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddubs
-  __builtin_HEXAGON_A2_vadduhs(0, 0);
+  __builtin_HEXAGON_A2_vaddubs(0, 0);
   // CHECK: @llvm.hexagon.A2.vadduhs
-  __builtin_HEXAGON_A2_vaddw(0, 0);
+  __builtin_HEXAGON_A2_vadduhs(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddw
-  __builtin_HEXAGON_A2_vaddws(0, 0);
+  __builtin_HEXAGON_A2_vaddw(0, 0);
   // CHECK: @llvm.hexagon.A2.vaddws
-  __builtin_HEXAGON_A2_vavgh(0, 0);
+  __builtin_HEXAGON_A2_vaddws(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgh
-  __builtin_HEXAGON_A2_vavghcr(0, 0);
+  __builtin_HEXAGON_A2_vavgh(0, 0);
   // CHECK: @llvm.hexagon.A2.vavghcr
-  __builtin_HEXAGON_A2_vavghr(0, 0);
+  __builtin_HEXAGON_A2_vavghcr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavghr
-  __builtin_HEXAGON_A2_vavgub(0, 0);
+  __builtin_HEXAGON_A2_vavghr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgub
-  __builtin_HEXAGON_A2_vavgubr(0, 0);
+  __builtin_HEXAGON_A2_vavgub(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgubr
-  __builtin_HEXAGON_A2_vavguh(0, 0);
+  __builtin_HEXAGON_A2_vavgubr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavguh
-  __builtin_HEXAGON_A2_vavguhr(0, 0);
+  __builtin_HEXAGON_A2_vavguh(0, 0);
   // CHECK: @llvm.hexagon.A2.vavguhr
-  __builtin_HEXAGON_A2_vavguw(0, 0);
+  __builtin_HEXAGON_A2_vavguhr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavguw
-  __builtin_HEXAGON_A2_vavguwr(0, 0);
+  __builtin_HEXAGON_A2_vavguw(0, 0);
   // CHECK: @llvm.hexagon.A2.vavguwr
-  __builtin_HEXAGON_A2_vavgw(0, 0);
+  __builtin_HEXAGON_A2_vavguwr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgw
-  __builtin_HEXAGON_A2_vavgwcr(0, 0);
+  __builtin_HEXAGON_A2_vavgw(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgwcr
-  __builtin_HEXAGON_A2_vavgwr(0, 0);
+  __builtin_HEXAGON_A2_vavgwcr(0, 0);
   // CHECK: @llvm.hexagon.A2.vavgwr
-  __builtin_HEXAGON_A2_vcmpbeq(0, 0);
+  __builtin_HEXAGON_A2_vavgwr(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpbeq
-  __builtin_HEXAGON_A2_vcmpbgtu(0, 0);
+  __builtin_HEXAGON_A2_vcmpbeq(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpbgtu
-  __builtin_HEXAGON_A2_vcmpheq(0, 0);
+  __builtin_HEXAGON_A2_vcmpbgtu(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpheq
-  __builtin_HEXAGON_A2_vcmphgt(0, 0);
+  __builtin_HEXAGON_A2_vcmpheq(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmphgt
-  __builtin_HEXAGON_A2_vcmphgtu(0, 0);
+  __builtin_HEXAGON_A2_vcmphgt(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmphgtu
-  __builtin_HEXAGON_A2_vcmpweq(0, 0);
+  __builtin_HEXAGON_A2_vcmphgtu(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpweq
-  __builtin_HEXAGON_A2_vcmpwgt(0, 0);
+  __builtin_HEXAGON_A2_vcmpweq(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpwgt
-  __builtin_HEXAGON_A2_vcmpwgtu(0, 0);
+  __builtin_HEXAGON_A2_vcmpwgt(0, 0);
   // CHECK: @llvm.hexagon.A2.vcmpwgtu
-  __builtin_HEXAGON_A2_vconj(0);
+  __builtin_HEXAGON_A2_vcmpwgtu(0, 0);
   // CHECK: @llvm.hexagon.A2.vconj
-  __builtin_HEXAGON_A2_vmaxb(0, 0);
+  __builtin_HEXAGON_A2_vconj(0);
   // CHECK: @llvm.hexagon.A2.vmaxb
-  __builtin_HEXAGON_A2_vmaxh(0, 0);
+  __builtin_HEXAGON_A2_vmaxb(0, 0);
   // CHECK: @llvm.hexagon.A2.vmaxh
-  __builtin_HEXAGON_A2_vmaxub(0, 0);
+  __builtin_HEXAGON_A2_vmaxh(0, 0);
   // CHECK: @llvm.hexagon.A2.vmaxub
-  __builtin_HEXAGON_A2_vmaxuh(0, 0);
+  __builtin_HEXAGON_A2_vmaxub(0, 0);
   // CHECK: @llvm.hexagon.A2.vmaxuh
-  __builtin_HEXAGON_A2_vmaxuw(0, 0);
+  __builtin_HEXAGON_A2_vmaxuh(0, 0);
   // CHECK: @llvm.hexagon.A2.vmaxuw
-  __builtin_HEXAGON_A2_vmaxw(0, 0);
+  __builtin_HEXAGON_A2_vmaxuw(0, 0);
   // CHECK: @llvm.hexagon.A2.vmaxw
-  __builtin_HEXAGON_A2_vminb(0, 0);
+  __builtin_HEXAGON_A2_vmaxw(0, 0);
   // CHECK: @llvm.hexagon.A2.vminb
-  __builtin_HEXAGON_A2_vminh(0, 0);
+  __builtin_HEXAGON_A2_vminb(0, 0);
   // CHECK: @llvm.hexagon.A2.vminh
-  __builtin_HEXAGON_A2_vminub(0, 0);
+  __builtin_HEXAGON_A2_vminh(0, 0);
   // CHECK: @llvm.hexagon.A2.vminub
-  __builtin_HEXAGON_A2_vminuh(0, 0);
+  __builtin_HEXAGON_A2_vminub(0, 0);
   // CHECK: @llvm.hexagon.A2.vminuh
-  __builtin_HEXAGON_A2_vminuw(0, 0);
+  __builtin_HEXAGON_A2_vminuh(0, 0);
   // CHECK: @llvm.hexagon.A2.vminuw
-  __builtin_HEXAGON_A2_vminw(0, 0);
+  __builtin_HEXAGON_A2_vminuw(0, 0);
   // CHECK: @llvm.hexagon.A2.vminw
-  __builtin_HEXAGON_A2_vnavgh(0, 0);
+  __builtin_HEXAGON_A2_vminw(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavgh
-  __builtin_HEXAGON_A2_vnavghcr(0, 0);
+  __builtin_HEXAGON_A2_vnavgh(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavghcr
-  __builtin_HEXAGON_A2_vnavghr(0, 0);
+  __builtin_HEXAGON_A2_vnavghcr(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavghr
-  __builtin_HEXAGON_A2_vnavgw(0, 0);
+  __builtin_HEXAGON_A2_vnavghr(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavgw
-  __builtin_HEXAGON_A2_vnavgwcr(0, 0);
+  __builtin_HEXAGON_A2_vnavgw(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavgwcr
-  __builtin_HEXAGON_A2_vnavgwr(0, 0);
+  __builtin_HEXAGON_A2_vnavgwcr(0, 0);
   // CHECK: @llvm.hexagon.A2.vnavgwr
-  __builtin_HEXAGON_A2_vraddub(0, 0);
+  __builtin_HEXAGON_A2_vnavgwr(0, 0);
   // CHECK: @llvm.hexagon.A2.vraddub
-  __builtin_HEXAGON_A2_vraddub_acc(0, 0, 0);
+  __builtin_HEXAGON_A2_vraddub(0, 0);
   // CHECK: @llvm.hexagon.A2.vraddub.acc
-  __builtin_HEXAGON_A2_vrsadub(0, 0);
+  __builtin_HEXAGON_A2_vraddub_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.A2.vrsadub
-  __builtin_HEXAGON_A2_vrsadub_acc(0, 0, 0);
+  __builtin_HEXAGON_A2_vrsadub(0, 0);
   // CHECK: @llvm.hexagon.A2.vrsadub.acc
-  __builtin_HEXAGON_A2_vsubb_map(0, 0);
+  __builtin_HEXAGON_A2_vrsadub_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.A2.vsubb.map
-  __builtin_HEXAGON_A2_vsubh(0, 0);
+  __builtin_HEXAGON_A2_vsubb_map(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubh
-  __builtin_HEXAGON_A2_vsubhs(0, 0);
+  __builtin_HEXAGON_A2_vsubh(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubhs
-  __builtin_HEXAGON_A2_vsubub(0, 0);
+  __builtin_HEXAGON_A2_vsubhs(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubub
-  __builtin_HEXAGON_A2_vsububs(0, 0);
+  __builtin_HEXAGON_A2_vsubub(0, 0);
   // CHECK: @llvm.hexagon.A2.vsububs
-  __builtin_HEXAGON_A2_vsubuhs(0, 0);
+  __builtin_HEXAGON_A2_vsububs(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubuhs
-  __builtin_HEXAGON_A2_vsubw(0, 0);
+  __builtin_HEXAGON_A2_vsubuhs(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubw
-  __builtin_HEXAGON_A2_vsubws(0, 0);
+  __builtin_HEXAGON_A2_vsubw(0, 0);
   // CHECK: @llvm.hexagon.A2.vsubws
-  __builtin_HEXAGON_A2_xor(0, 0);
+  __builtin_HEXAGON_A2_vsubws(0, 0);
   // CHECK: @llvm.hexagon.A2.xor
-  __builtin_HEXAGON_A2_xorp(0, 0);
+  __builtin_HEXAGON_A2_xor(0, 0);
   // CHECK: @llvm.hexagon.A2.xorp
-  __builtin_HEXAGON_A2_zxtb(0);
+  __builtin_HEXAGON_A2_xorp(0, 0);
   // CHECK: @llvm.hexagon.A2.zxtb
-  __builtin_HEXAGON_A2_zxth(0);
+  __builtin_HEXAGON_A2_zxtb(0);
   // CHECK: @llvm.hexagon.A2.zxth
-  __builtin_HEXAGON_A4_andn(0, 0);
+  __builtin_HEXAGON_A2_zxth(0);
   // CHECK: @llvm.hexagon.A4.andn
-  __builtin_HEXAGON_A4_andnp(0, 0);
+  __builtin_HEXAGON_A4_andn(0, 0);
   // CHECK: @llvm.hexagon.A4.andnp
-  __builtin_HEXAGON_A4_bitsplit(0, 0);
+  __builtin_HEXAGON_A4_andnp(0, 0);
   // CHECK: @llvm.hexagon.A4.bitsplit
-  __builtin_HEXAGON_A4_bitspliti(0, 0);
+  __builtin_HEXAGON_A4_bitsplit(0, 0);
   // CHECK: @llvm.hexagon.A4.bitspliti
-  __builtin_HEXAGON_A4_boundscheck(0, 0);
+  __builtin_HEXAGON_A4_bitspliti(0, 0);
   // CHECK: @llvm.hexagon.A4.boundscheck
-  __builtin_HEXAGON_A4_cmpbeq(0, 0);
+  __builtin_HEXAGON_A4_boundscheck(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbeq
-  __builtin_HEXAGON_A4_cmpbeqi(0, 0);
+  __builtin_HEXAGON_A4_cmpbeq(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbeqi
-  __builtin_HEXAGON_A4_cmpbgt(0, 0);
+  __builtin_HEXAGON_A4_cmpbeqi(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbgt
-  __builtin_HEXAGON_A4_cmpbgti(0, 0);
+  __builtin_HEXAGON_A4_cmpbgt(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbgti
-  __builtin_HEXAGON_A4_cmpbgtu(0, 0);
+  __builtin_HEXAGON_A4_cmpbgti(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbgtu
-  __builtin_HEXAGON_A4_cmpbgtui(0, 0);
+  __builtin_HEXAGON_A4_cmpbgtu(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpbgtui
-  __builtin_HEXAGON_A4_cmpheq(0, 0);
+  __builtin_HEXAGON_A4_cmpbgtui(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpheq
-  __builtin_HEXAGON_A4_cmpheqi(0, 0);
+  __builtin_HEXAGON_A4_cmpheq(0, 0);
   // CHECK: @llvm.hexagon.A4.cmpheqi
-  __builtin_HEXAGON_A4_cmphgt(0, 0);
+  __builtin_HEXAGON_A4_cmpheqi(0, 0);
   // CHECK: @llvm.hexagon.A4.cmphgt
-  __builtin_HEXAGON_A4_cmphgti(0, 0);
+  __builtin_HEXAGON_A4_cmphgt(0, 0);
   // CHECK: @llvm.hexagon.A4.cmphgti
-  __builtin_HEXAGON_A4_cmphgtu(0, 0);
+  __builtin_HEXAGON_A4_cmphgti(0, 0);
   // CHECK: @llvm.hexagon.A4.cmphgtu
-  __builtin_HEXAGON_A4_cmphgtui(0, 0);
+  __builtin_HEXAGON_A4_cmphgtu(0, 0);
   // CHECK: @llvm.hexagon.A4.cmphgtui
-  __builtin_HEXAGON_A4_combineir(0, 0);
+  __builtin_HEXAGON_A4_cmphgtui(0, 0);
   // CHECK: @llvm.hexagon.A4.combineir
-  __builtin_HEXAGON_A4_combineri(0, 0);
+  __builtin_HEXAGON_A4_combineir(0, 0);
   // CHECK: @llvm.hexagon.A4.combineri
-  __builtin_HEXAGON_A4_cround_ri(0, 0);
+  __builtin_HEXAGON_A4_combineri(0, 0);
   // CHECK: @llvm.hexagon.A4.cround.ri
-  __builtin_HEXAGON_A4_cround_rr(0, 0);
+  __builtin_HEXAGON_A4_cround_ri(0, 0);
   // CHECK: @llvm.hexagon.A4.cround.rr
-  __builtin_HEXAGON_A4_modwrapu(0, 0);
+  __builtin_HEXAGON_A4_cround_rr(0, 0);
   // CHECK: @llvm.hexagon.A4.modwrapu
-  __builtin_HEXAGON_A4_orn(0, 0);
+  __builtin_HEXAGON_A4_modwrapu(0, 0);
   // CHECK: @llvm.hexagon.A4.orn
-  __builtin_HEXAGON_A4_ornp(0, 0);
+  __builtin_HEXAGON_A4_orn(0, 0);
   // CHECK: @llvm.hexagon.A4.ornp
-  __builtin_HEXAGON_A4_rcmpeq(0, 0);
+  __builtin_HEXAGON_A4_ornp(0, 0);
   // CHECK: @llvm.hexagon.A4.rcmpeq
-  __builtin_HEXAGON_A4_rcmpeqi(0, 0);
+  __builtin_HEXAGON_A4_rcmpeq(0, 0);
   // CHECK: @llvm.hexagon.A4.rcmpeqi
-  __builtin_HEXAGON_A4_rcmpneq(0, 0);
+  __builtin_HEXAGON_A4_rcmpeqi(0, 0);
   // CHECK: @llvm.hexagon.A4.rcmpneq
-  __builtin_HEXAGON_A4_rcmpneqi(0, 0);
+  __builtin_HEXAGON_A4_rcmpneq(0, 0);
   // CHECK: @llvm.hexagon.A4.rcmpneqi
-  __builtin_HEXAGON_A4_round_ri(0, 0);
+  __builtin_HEXAGON_A4_rcmpneqi(0, 0);
   // CHECK: @llvm.hexagon.A4.round.ri
-  __builtin_HEXAGON_A4_round_ri_sat(0, 0);
+  __builtin_HEXAGON_A4_round_ri(0, 0);
   // CHECK: @llvm.hexagon.A4.round.ri.sat
-  __builtin_HEXAGON_A4_round_rr(0, 0);
+  __builtin_HEXAGON_A4_round_ri_sat(0, 0);
   // CHECK: @llvm.hexagon.A4.round.rr
-  __builtin_HEXAGON_A4_round_rr_sat(0, 0);
+  __builtin_HEXAGON_A4_round_rr(0, 0);
   // CHECK: @llvm.hexagon.A4.round.rr.sat
-  __builtin_HEXAGON_A4_tlbmatch(0, 0);
+  __builtin_HEXAGON_A4_round_rr_sat(0, 0);
   // CHECK: @llvm.hexagon.A4.tlbmatch
-  __builtin_HEXAGON_A4_vcmpbeq_any(0, 0);
+  __builtin_HEXAGON_A4_tlbmatch(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpbeq.any
-  __builtin_HEXAGON_A4_vcmpbeqi(0, 0);
+  __builtin_HEXAGON_A4_vcmpbeq_any(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpbeqi
-  __builtin_HEXAGON_A4_vcmpbgt(0, 0);
+  __builtin_HEXAGON_A4_vcmpbeqi(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpbgt
-  __builtin_HEXAGON_A4_vcmpbgti(0, 0);
+  __builtin_HEXAGON_A4_vcmpbgt(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpbgti
-  __builtin_HEXAGON_A4_vcmpbgtui(0, 0);
+  __builtin_HEXAGON_A4_vcmpbgti(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpbgtui
-  __builtin_HEXAGON_A4_vcmpheqi(0, 0);
+  __builtin_HEXAGON_A4_vcmpbgtui(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpheqi
-  __builtin_HEXAGON_A4_vcmphgti(0, 0);
+  __builtin_HEXAGON_A4_vcmpheqi(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmphgti
-  __builtin_HEXAGON_A4_vcmphgtui(0, 0);
+  __builtin_HEXAGON_A4_vcmphgti(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmphgtui
-  __builtin_HEXAGON_A4_vcmpweqi(0, 0);
+  __builtin_HEXAGON_A4_vcmphgtui(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpweqi
-  __builtin_HEXAGON_A4_vcmpwgti(0, 0);
+  __builtin_HEXAGON_A4_vcmpweqi(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpwgti
-  __builtin_HEXAGON_A4_vcmpwgtui(0, 0);
+  __builtin_HEXAGON_A4_vcmpwgti(0, 0);
   // CHECK: @llvm.hexagon.A4.vcmpwgtui
-  __builtin_HEXAGON_A4_vrmaxh(0, 0, 0);
+  __builtin_HEXAGON_A4_vcmpwgtui(0, 0);
   // CHECK: @llvm.hexagon.A4.vrmaxh
-  __builtin_HEXAGON_A4_vrmaxuh(0, 0, 0);
+  __builtin_HEXAGON_A4_vrmaxh(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrmaxuh
-  __builtin_HEXAGON_A4_vrmaxuw(0, 0, 0);
+  __builtin_HEXAGON_A4_vrmaxuh(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrmaxuw
-  __builtin_HEXAGON_A4_vrmaxw(0, 0, 0);
+  __builtin_HEXAGON_A4_vrmaxuw(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrmaxw
-  __builtin_HEXAGON_A4_vrminh(0, 0, 0);
+  __builtin_HEXAGON_A4_vrmaxw(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrminh
-  __builtin_HEXAGON_A4_vrminuh(0, 0, 0);
+  __builtin_HEXAGON_A4_vrminh(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrminuh
-  __builtin_HEXAGON_A4_vrminuw(0, 0, 0);
+  __builtin_HEXAGON_A4_vrminuh(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrminuw
-  __builtin_HEXAGON_A4_vrminw(0, 0, 0);
+  __builtin_HEXAGON_A4_vrminuw(0, 0, 0);
   // CHECK: @llvm.hexagon.A4.vrminw
-  __builtin_HEXAGON_A5_vaddhubs(0, 0);
+  __builtin_HEXAGON_A4_vrminw(0, 0, 0);
   // CHECK: @llvm.hexagon.A5.vaddhubs
-  __builtin_HEXAGON_C2_all8(0);
+  __builtin_HEXAGON_A5_vaddhubs(0, 0);
+  // CHECK: @llvm.hexagon.A6.vcmpbeq.notany
+  __builtin_HEXAGON_A6_vcmpbeq_notany(0, 0);
+  // CHECK: @llvm.hexagon.A6.vcmpbeq.notany.128B
+  __builtin_HEXAGON_A6_vcmpbeq_notany_128B(0, 0);
   // CHECK: @llvm.hexagon.C2.all8
-  __builtin_HEXAGON_C2_and(0, 0);
+  __builtin_HEXAGON_C2_all8(0);
   // CHECK: @llvm.hexagon.C2.and
-  __builtin_HEXAGON_C2_andn(0, 0);
+  __builtin_HEXAGON_C2_and(0, 0);
   // CHECK: @llvm.hexagon.C2.andn
-  __builtin_HEXAGON_C2_any8(0);
+  __builtin_HEXAGON_C2_andn(0, 0);
   // CHECK: @llvm.hexagon.C2.any8
-  __builtin_HEXAGON_C2_bitsclr(0, 0);
+  __builtin_HEXAGON_C2_any8(0);
   // CHECK: @llvm.hexagon.C2.bitsclr
-  __builtin_HEXAGON_C2_bitsclri(0, 0);
+  __builtin_HEXAGON_C2_bitsclr(0, 0);
   // CHECK: @llvm.hexagon.C2.bitsclri
-  __builtin_HEXAGON_C2_bitsset(0, 0);
+  __builtin_HEXAGON_C2_bitsclri(0, 0);
   // CHECK: @llvm.hexagon.C2.bitsset
-  __builtin_HEXAGON_C2_cmpeq(0, 0);
+  __builtin_HEXAGON_C2_bitsset(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpeq
-  __builtin_HEXAGON_C2_cmpeqi(0, 0);
+  __builtin_HEXAGON_C2_cmpeq(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpeqi
-  __builtin_HEXAGON_C2_cmpeqp(0, 0);
+  __builtin_HEXAGON_C2_cmpeqi(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpeqp
-  __builtin_HEXAGON_C2_cmpgei(0, 0);
+  __builtin_HEXAGON_C2_cmpeqp(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgei
-  __builtin_HEXAGON_C2_cmpgeui(0, 0);
+  __builtin_HEXAGON_C2_cmpgei(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgeui
-  __builtin_HEXAGON_C2_cmpgt(0, 0);
+  __builtin_HEXAGON_C2_cmpgeui(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgt
-  __builtin_HEXAGON_C2_cmpgti(0, 0);
+  __builtin_HEXAGON_C2_cmpgt(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgti
-  __builtin_HEXAGON_C2_cmpgtp(0, 0);
+  __builtin_HEXAGON_C2_cmpgti(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgtp
-  __builtin_HEXAGON_C2_cmpgtu(0, 0);
+  __builtin_HEXAGON_C2_cmpgtp(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgtu
-  __builtin_HEXAGON_C2_cmpgtui(0, 0);
+  __builtin_HEXAGON_C2_cmpgtu(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgtui
-  __builtin_HEXAGON_C2_cmpgtup(0, 0);
+  __builtin_HEXAGON_C2_cmpgtui(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpgtup
-  __builtin_HEXAGON_C2_cmplt(0, 0);
+  __builtin_HEXAGON_C2_cmpgtup(0, 0);
   // CHECK: @llvm.hexagon.C2.cmplt
-  __builtin_HEXAGON_C2_cmpltu(0, 0);
+  __builtin_HEXAGON_C2_cmplt(0, 0);
   // CHECK: @llvm.hexagon.C2.cmpltu
-  __builtin_HEXAGON_C2_mask(0);
+  __builtin_HEXAGON_C2_cmpltu(0, 0);
   // CHECK: @llvm.hexagon.C2.mask
-  __builtin_HEXAGON_C2_mux(0, 0, 0);
+  __builtin_HEXAGON_C2_mask(0);
   // CHECK: @llvm.hexagon.C2.mux
-  __builtin_HEXAGON_C2_muxii(0, 0, 0);
+  __builtin_HEXAGON_C2_mux(0, 0, 0);
   // CHECK: @llvm.hexagon.C2.muxii
-  __builtin_HEXAGON_C2_muxir(0, 0, 0);
+  __builtin_HEXAGON_C2_muxii(0, 0, 0);
   // CHECK: @llvm.hexagon.C2.muxir
-  __builtin_HEXAGON_C2_muxri(0, 0, 0);
+  __builtin_HEXAGON_C2_muxir(0, 0, 0);
   // CHECK: @llvm.hexagon.C2.muxri
-  __builtin_HEXAGON_C2_not(0);
+  __builtin_HEXAGON_C2_muxri(0, 0, 0);
   // CHECK: @llvm.hexagon.C2.not
-  __builtin_HEXAGON_C2_or (0, 0);
-  // CHECK: @llvm.hexagon.C2.or 
-  __builtin_HEXAGON_C2_orn(0, 0);
+  __builtin_HEXAGON_C2_not(0);
+  // CHECK: @llvm.hexagon.C2.or
+  __builtin_HEXAGON_C2_or(0, 0);
   // CHECK: @llvm.hexagon.C2.orn
-  __builtin_HEXAGON_C2_pxfer_map(0);
+  __builtin_HEXAGON_C2_orn(0, 0);
   // CHECK: @llvm.hexagon.C2.pxfer.map
-  __builtin_HEXAGON_C2_tfrpr(0);
+  __builtin_HEXAGON_C2_pxfer_map(0);
   // CHECK: @llvm.hexagon.C2.tfrpr
-  __builtin_HEXAGON_C2_tfrrp(0);
+  __builtin_HEXAGON_C2_tfrpr(0);
   // CHECK: @llvm.hexagon.C2.tfrrp
-  __builtin_HEXAGON_C2_vitpack(0, 0);
+  __builtin_HEXAGON_C2_tfrrp(0);
   // CHECK: @llvm.hexagon.C2.vitpack
-  __builtin_HEXAGON_C2_vmux(0, 0, 0);
+  __builtin_HEXAGON_C2_vitpack(0, 0);
   // CHECK: @llvm.hexagon.C2.vmux
-  __builtin_HEXAGON_C2_xor(0, 0);
+  __builtin_HEXAGON_C2_vmux(0, 0, 0);
   // CHECK: @llvm.hexagon.C2.xor
-  __builtin_HEXAGON_C4_and_and(0, 0, 0);
+  __builtin_HEXAGON_C2_xor(0, 0);
   // CHECK: @llvm.hexagon.C4.and.and
-  __builtin_HEXAGON_C4_and_andn(0, 0, 0);
+  __builtin_HEXAGON_C4_and_and(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.and.andn
-  __builtin_HEXAGON_C4_and_or(0, 0, 0);
+  __builtin_HEXAGON_C4_and_andn(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.and.or
-  __builtin_HEXAGON_C4_and_orn(0, 0, 0);
+  __builtin_HEXAGON_C4_and_or(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.and.orn
-  __builtin_HEXAGON_C4_cmplte(0, 0);
+  __builtin_HEXAGON_C4_and_orn(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.cmplte
-  __builtin_HEXAGON_C4_cmpltei(0, 0);
+  __builtin_HEXAGON_C4_cmplte(0, 0);
   // CHECK: @llvm.hexagon.C4.cmpltei
-  __builtin_HEXAGON_C4_cmplteu(0, 0);
+  __builtin_HEXAGON_C4_cmpltei(0, 0);
   // CHECK: @llvm.hexagon.C4.cmplteu
-  __builtin_HEXAGON_C4_cmplteui(0, 0);
+  __builtin_HEXAGON_C4_cmplteu(0, 0);
   // CHECK: @llvm.hexagon.C4.cmplteui
-  __builtin_HEXAGON_C4_cmpneq(0, 0);
+  __builtin_HEXAGON_C4_cmplteui(0, 0);
   // CHECK: @llvm.hexagon.C4.cmpneq
-  __builtin_HEXAGON_C4_cmpneqi(0, 0);
+  __builtin_HEXAGON_C4_cmpneq(0, 0);
   // CHECK: @llvm.hexagon.C4.cmpneqi
-  __builtin_HEXAGON_C4_fastcorner9(0, 0);
+  __builtin_HEXAGON_C4_cmpneqi(0, 0);
   // CHECK: @llvm.hexagon.C4.fastcorner9
-  __builtin_HEXAGON_C4_fastcorner9_not(0, 0);
+  __builtin_HEXAGON_C4_fastcorner9(0, 0);
   // CHECK: @llvm.hexagon.C4.fastcorner9.not
-  __builtin_HEXAGON_C4_nbitsclr(0, 0);
+  __builtin_HEXAGON_C4_fastcorner9_not(0, 0);
   // CHECK: @llvm.hexagon.C4.nbitsclr
-  __builtin_HEXAGON_C4_nbitsclri(0, 0);
+  __builtin_HEXAGON_C4_nbitsclr(0, 0);
   // CHECK: @llvm.hexagon.C4.nbitsclri
-  __builtin_HEXAGON_C4_nbitsset(0, 0);
+  __builtin_HEXAGON_C4_nbitsclri(0, 0);
   // CHECK: @llvm.hexagon.C4.nbitsset
-  __builtin_HEXAGON_C4_or_and(0, 0, 0);
+  __builtin_HEXAGON_C4_nbitsset(0, 0);
   // CHECK: @llvm.hexagon.C4.or.and
-  __builtin_HEXAGON_C4_or_andn(0, 0, 0);
+  __builtin_HEXAGON_C4_or_and(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.or.andn
-  __builtin_HEXAGON_C4_or_or(0, 0, 0);
+  __builtin_HEXAGON_C4_or_andn(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.or.or
-  __builtin_HEXAGON_C4_or_orn(0, 0, 0);
+  __builtin_HEXAGON_C4_or_or(0, 0, 0);
   // CHECK: @llvm.hexagon.C4.or.orn
-  __builtin_HEXAGON_F2_conv_d2df(0);
+  __builtin_HEXAGON_C4_or_orn(0, 0, 0);
   // CHECK: @llvm.hexagon.F2.conv.d2df
-  __builtin_HEXAGON_F2_conv_d2sf(0);
+  __builtin_HEXAGON_F2_conv_d2df(0);
   // CHECK: @llvm.hexagon.F2.conv.d2sf
-  __builtin_HEXAGON_F2_conv_df2d(0.0);
+  __builtin_HEXAGON_F2_conv_d2sf(0);
   // CHECK: @llvm.hexagon.F2.conv.df2d
-  __builtin_HEXAGON_F2_conv_df2d_chop(0.0);
+  __builtin_HEXAGON_F2_conv_df2d(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2d.chop
-  __builtin_HEXAGON_F2_conv_df2sf(0.0);
+  __builtin_HEXAGON_F2_conv_df2d_chop(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2sf
-  __builtin_HEXAGON_F2_conv_df2ud(0.0);
+  __builtin_HEXAGON_F2_conv_df2sf(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2ud
-  __builtin_HEXAGON_F2_conv_df2ud_chop(0.0);
+  __builtin_HEXAGON_F2_conv_df2ud(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2ud.chop
-  __builtin_HEXAGON_F2_conv_df2uw(0.0);
+  __builtin_HEXAGON_F2_conv_df2ud_chop(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2uw
-  __builtin_HEXAGON_F2_conv_df2uw_chop(0.0);
+  __builtin_HEXAGON_F2_conv_df2uw(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2uw.chop
-  __builtin_HEXAGON_F2_conv_df2w(0.0);
+  __builtin_HEXAGON_F2_conv_df2uw_chop(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2w
-  __builtin_HEXAGON_F2_conv_df2w_chop(0.0);
+  __builtin_HEXAGON_F2_conv_df2w(0.0);
   // CHECK: @llvm.hexagon.F2.conv.df2w.chop
-  __builtin_HEXAGON_F2_conv_sf2d(0.0f);
+  __builtin_HEXAGON_F2_conv_df2w_chop(0.0);
   // CHECK: @llvm.hexagon.F2.conv.sf2d
-  __builtin_HEXAGON_F2_conv_sf2d_chop(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2d(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2d.chop
-  __builtin_HEXAGON_F2_conv_sf2df(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2d_chop(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2df
-  __builtin_HEXAGON_F2_conv_sf2ud(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2df(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2ud
-  __builtin_HEXAGON_F2_conv_sf2ud_chop(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2ud(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2ud.chop
-  __builtin_HEXAGON_F2_conv_sf2uw(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2ud_chop(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2uw
-  __builtin_HEXAGON_F2_conv_sf2uw_chop(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2uw(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2uw.chop
-  __builtin_HEXAGON_F2_conv_sf2w(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2uw_chop(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2w
-  __builtin_HEXAGON_F2_conv_sf2w_chop(0.0f);
+  __builtin_HEXAGON_F2_conv_sf2w(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.sf2w.chop
-  __builtin_HEXAGON_F2_conv_ud2df(0);
+  __builtin_HEXAGON_F2_conv_sf2w_chop(0.0f);
   // CHECK: @llvm.hexagon.F2.conv.ud2df
-  __builtin_HEXAGON_F2_conv_ud2sf(0);
+  __builtin_HEXAGON_F2_conv_ud2df(0);
   // CHECK: @llvm.hexagon.F2.conv.ud2sf
-  __builtin_HEXAGON_F2_conv_uw2df(0);
+  __builtin_HEXAGON_F2_conv_ud2sf(0);
   // CHECK: @llvm.hexagon.F2.conv.uw2df
-  __builtin_HEXAGON_F2_conv_uw2sf(0);
+  __builtin_HEXAGON_F2_conv_uw2df(0);
   // CHECK: @llvm.hexagon.F2.conv.uw2sf
-  __builtin_HEXAGON_F2_conv_w2df(0);
+  __builtin_HEXAGON_F2_conv_uw2sf(0);
   // CHECK: @llvm.hexagon.F2.conv.w2df
-  __builtin_HEXAGON_F2_conv_w2sf(0);
+  __builtin_HEXAGON_F2_conv_w2df(0);
   // CHECK: @llvm.hexagon.F2.conv.w2sf
-  __builtin_HEXAGON_F2_dfclass(0.0, 0);
+  __builtin_HEXAGON_F2_conv_w2sf(0);
   // CHECK: @llvm.hexagon.F2.dfclass
-  __builtin_HEXAGON_F2_dfcmpeq(0.0, 0.0);
+  __builtin_HEXAGON_F2_dfclass(0.0, 0);
   // CHECK: @llvm.hexagon.F2.dfcmpeq
-  __builtin_HEXAGON_F2_dfcmpge(0.0, 0.0);
+  __builtin_HEXAGON_F2_dfcmpeq(0.0, 0.0);
   // CHECK: @llvm.hexagon.F2.dfcmpge
-  __builtin_HEXAGON_F2_dfcmpgt(0.0, 0.0);
+  __builtin_HEXAGON_F2_dfcmpge(0.0, 0.0);
   // CHECK: @llvm.hexagon.F2.dfcmpgt
-  __builtin_HEXAGON_F2_dfcmpuo(0.0, 0.0);
+  __builtin_HEXAGON_F2_dfcmpgt(0.0, 0.0);
   // CHECK: @llvm.hexagon.F2.dfcmpuo
-  __builtin_HEXAGON_F2_dfimm_n(0);
+  __builtin_HEXAGON_F2_dfcmpuo(0.0, 0.0);
   // CHECK: @llvm.hexagon.F2.dfimm.n
-  __builtin_HEXAGON_F2_dfimm_p(0);
+  __builtin_HEXAGON_F2_dfimm_n(0);
   // CHECK: @llvm.hexagon.F2.dfimm.p
-  __builtin_HEXAGON_F2_sfadd(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_dfimm_p(0);
   // CHECK: @llvm.hexagon.F2.sfadd
-  __builtin_HEXAGON_F2_sfclass(0.0f, 0);
+  __builtin_HEXAGON_F2_sfadd(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfclass
-  __builtin_HEXAGON_F2_sfcmpeq(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfclass(0.0f, 0);
   // CHECK: @llvm.hexagon.F2.sfcmpeq
-  __builtin_HEXAGON_F2_sfcmpge(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfcmpeq(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfcmpge
-  __builtin_HEXAGON_F2_sfcmpgt(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfcmpge(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfcmpgt
-  __builtin_HEXAGON_F2_sfcmpuo(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfcmpgt(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfcmpuo
-  __builtin_HEXAGON_F2_sffixupd(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfcmpuo(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffixupd
-  __builtin_HEXAGON_F2_sffixupn(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sffixupd(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffixupn
-  __builtin_HEXAGON_F2_sffixupr(0.0f);
+  __builtin_HEXAGON_F2_sffixupn(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffixupr
-  __builtin_HEXAGON_F2_sffma(0.0f, 0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sffixupr(0.0f);
   // CHECK: @llvm.hexagon.F2.sffma
-  __builtin_HEXAGON_F2_sffma_lib(0.0f, 0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sffma(0.0f, 0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffma.lib
-  __builtin_HEXAGON_F2_sffma_sc(0.0f, 0.0f, 0.0f, 0);
+  __builtin_HEXAGON_F2_sffma_lib(0.0f, 0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffma.sc
-  __builtin_HEXAGON_F2_sffms(0.0f, 0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sffma_sc(0.0f, 0.0f, 0.0f, 0);
   // CHECK: @llvm.hexagon.F2.sffms
-  __builtin_HEXAGON_F2_sffms_lib(0.0f, 0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sffms(0.0f, 0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sffms.lib
-  __builtin_HEXAGON_F2_sfimm_n(0);
+  __builtin_HEXAGON_F2_sffms_lib(0.0f, 0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfimm.n
-  __builtin_HEXAGON_F2_sfimm_p(0);
+  __builtin_HEXAGON_F2_sfimm_n(0);
   // CHECK: @llvm.hexagon.F2.sfimm.p
-  __builtin_HEXAGON_F2_sfmax(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfimm_p(0);
   // CHECK: @llvm.hexagon.F2.sfmax
-  __builtin_HEXAGON_F2_sfmin(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfmax(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfmin
-  __builtin_HEXAGON_F2_sfmpy(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfmin(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfmpy
-  __builtin_HEXAGON_F2_sfsub(0.0f, 0.0f);
+  __builtin_HEXAGON_F2_sfmpy(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.F2.sfsub
-  __builtin_HEXAGON_M2_acci(0, 0, 0);
+  __builtin_HEXAGON_F2_sfsub(0.0f, 0.0f);
   // CHECK: @llvm.hexagon.M2.acci
-  __builtin_HEXAGON_M2_accii(0, 0, 0);
+  __builtin_HEXAGON_M2_acci(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.accii
-  __builtin_HEXAGON_M2_cmaci_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_accii(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cmaci.s0
-  __builtin_HEXAGON_M2_cmacr_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_cmaci_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cmacr.s0
-  __builtin_HEXAGON_M2_cmacsc_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.cmacsc.s0
-  __builtin_HEXAGON_M2_cmacsc_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.cmacsc.s1
-  __builtin_HEXAGON_M2_cmacs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_cmacr_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cmacs.s0
-  __builtin_HEXAGON_M2_cmacs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_cmacs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cmacs.s1
-  __builtin_HEXAGON_M2_cmpyi_s0(0, 0);
+  __builtin_HEXAGON_M2_cmacs_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.cmacsc.s0
+  __builtin_HEXAGON_M2_cmacsc_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.cmacsc.s1
+  __builtin_HEXAGON_M2_cmacsc_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cmpyi.s0
-  __builtin_HEXAGON_M2_cmpyr_s0(0, 0);
+  __builtin_HEXAGON_M2_cmpyi_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.cmpyr.s0
-  __builtin_HEXAGON_M2_cmpyrsc_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.cmpyrsc.s0
-  __builtin_HEXAGON_M2_cmpyrsc_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.cmpyrsc.s1
-  __builtin_HEXAGON_M2_cmpyrs_s0(0, 0);
+  __builtin_HEXAGON_M2_cmpyr_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.cmpyrs.s0
-  __builtin_HEXAGON_M2_cmpyrs_s1(0, 0);
+  __builtin_HEXAGON_M2_cmpyrs_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.cmpyrs.s1
-  __builtin_HEXAGON_M2_cmpysc_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.cmpysc.s0
-  __builtin_HEXAGON_M2_cmpysc_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.cmpysc.s1
-  __builtin_HEXAGON_M2_cmpys_s0(0, 0);
+  __builtin_HEXAGON_M2_cmpyrs_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.cmpyrsc.s0
+  __builtin_HEXAGON_M2_cmpyrsc_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.cmpyrsc.s1
+  __builtin_HEXAGON_M2_cmpyrsc_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.cmpys.s0
-  __builtin_HEXAGON_M2_cmpys_s1(0, 0);
+  __builtin_HEXAGON_M2_cmpys_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.cmpys.s1
-  __builtin_HEXAGON_M2_cnacsc_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.cnacsc.s0
-  __builtin_HEXAGON_M2_cnacsc_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.cnacsc.s1
-  __builtin_HEXAGON_M2_cnacs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_cmpys_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.cmpysc.s0
+  __builtin_HEXAGON_M2_cmpysc_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.cmpysc.s1
+  __builtin_HEXAGON_M2_cmpysc_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.cnacs.s0
-  __builtin_HEXAGON_M2_cnacs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_cnacs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.cnacs.s1
-  __builtin_HEXAGON_M2_dpmpyss_acc_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_cnacs_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.cnacsc.s0
+  __builtin_HEXAGON_M2_cnacsc_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.cnacsc.s1
+  __builtin_HEXAGON_M2_cnacsc_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyss.acc.s0
-  __builtin_HEXAGON_M2_dpmpyss_nac_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_dpmpyss_acc_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyss.nac.s0
-  __builtin_HEXAGON_M2_dpmpyss_rnd_s0(0, 0);
+  __builtin_HEXAGON_M2_dpmpyss_nac_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyss.rnd.s0
-  __builtin_HEXAGON_M2_dpmpyss_s0(0, 0);
+  __builtin_HEXAGON_M2_dpmpyss_rnd_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyss.s0
-  __builtin_HEXAGON_M2_dpmpyuu_acc_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_dpmpyss_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyuu.acc.s0
-  __builtin_HEXAGON_M2_dpmpyuu_nac_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_dpmpyuu_acc_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyuu.nac.s0
-  __builtin_HEXAGON_M2_dpmpyuu_s0(0, 0);
+  __builtin_HEXAGON_M2_dpmpyuu_nac_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.dpmpyuu.s0
-  __builtin_HEXAGON_M2_hmmpyh_rs1(0, 0);
+  __builtin_HEXAGON_M2_dpmpyuu_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.hmmpyh.rs1
-  __builtin_HEXAGON_M2_hmmpyh_s1(0, 0);
+  __builtin_HEXAGON_M2_hmmpyh_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.hmmpyh.s1
-  __builtin_HEXAGON_M2_hmmpyl_rs1(0, 0);
+  __builtin_HEXAGON_M2_hmmpyh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.hmmpyl.rs1
-  __builtin_HEXAGON_M2_hmmpyl_s1(0, 0);
+  __builtin_HEXAGON_M2_hmmpyl_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.hmmpyl.s1
-  __builtin_HEXAGON_M2_maci(0, 0, 0);
+  __builtin_HEXAGON_M2_hmmpyl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.maci
-  __builtin_HEXAGON_M2_macsin(0, 0, 0);
+  __builtin_HEXAGON_M2_maci(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.macsin
-  __builtin_HEXAGON_M2_macsip(0, 0, 0);
+  __builtin_HEXAGON_M2_macsin(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.macsip
-  __builtin_HEXAGON_M2_mmachs_rs0(0, 0, 0);
+  __builtin_HEXAGON_M2_macsip(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmachs.rs0
-  __builtin_HEXAGON_M2_mmachs_rs1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmachs_rs0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmachs.rs1
-  __builtin_HEXAGON_M2_mmachs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmachs_rs1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmachs.s0
-  __builtin_HEXAGON_M2_mmachs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmachs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmachs.s1
-  __builtin_HEXAGON_M2_mmacls_rs0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmachs_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacls.rs0
-  __builtin_HEXAGON_M2_mmacls_rs1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacls_rs0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacls.rs1
-  __builtin_HEXAGON_M2_mmacls_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacls_rs1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacls.s0
-  __builtin_HEXAGON_M2_mmacls_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacls_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacls.s1
-  __builtin_HEXAGON_M2_mmacuhs_rs0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacls_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacuhs.rs0
-  __builtin_HEXAGON_M2_mmacuhs_rs1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacuhs_rs0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacuhs.rs1
-  __builtin_HEXAGON_M2_mmacuhs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacuhs_rs1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacuhs.s0
-  __builtin_HEXAGON_M2_mmacuhs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacuhs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmacuhs.s1
-  __builtin_HEXAGON_M2_mmaculs_rs0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmacuhs_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmaculs.rs0
-  __builtin_HEXAGON_M2_mmaculs_rs1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmaculs_rs0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmaculs.rs1
-  __builtin_HEXAGON_M2_mmaculs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmaculs_rs1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmaculs.s0
-  __builtin_HEXAGON_M2_mmaculs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mmaculs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmaculs.s1
-  __builtin_HEXAGON_M2_mmpyh_rs0(0, 0);
+  __builtin_HEXAGON_M2_mmaculs_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyh.rs0
-  __builtin_HEXAGON_M2_mmpyh_rs1(0, 0);
+  __builtin_HEXAGON_M2_mmpyh_rs0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyh.rs1
-  __builtin_HEXAGON_M2_mmpyh_s0(0, 0);
+  __builtin_HEXAGON_M2_mmpyh_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyh.s0
-  __builtin_HEXAGON_M2_mmpyh_s1(0, 0);
+  __builtin_HEXAGON_M2_mmpyh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyh.s1
-  __builtin_HEXAGON_M2_mmpyl_rs0(0, 0);
+  __builtin_HEXAGON_M2_mmpyh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyl.rs0
-  __builtin_HEXAGON_M2_mmpyl_rs1(0, 0);
+  __builtin_HEXAGON_M2_mmpyl_rs0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyl.rs1
-  __builtin_HEXAGON_M2_mmpyl_s0(0, 0);
+  __builtin_HEXAGON_M2_mmpyl_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyl.s0
-  __builtin_HEXAGON_M2_mmpyl_s1(0, 0);
+  __builtin_HEXAGON_M2_mmpyl_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyl.s1
-  __builtin_HEXAGON_M2_mmpyuh_rs0(0, 0);
+  __builtin_HEXAGON_M2_mmpyl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyuh.rs0
-  __builtin_HEXAGON_M2_mmpyuh_rs1(0, 0);
+  __builtin_HEXAGON_M2_mmpyuh_rs0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyuh.rs1
-  __builtin_HEXAGON_M2_mmpyuh_s0(0, 0);
+  __builtin_HEXAGON_M2_mmpyuh_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyuh.s0
-  __builtin_HEXAGON_M2_mmpyuh_s1(0, 0);
+  __builtin_HEXAGON_M2_mmpyuh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyuh.s1
-  __builtin_HEXAGON_M2_mmpyul_rs0(0, 0);
+  __builtin_HEXAGON_M2_mmpyuh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyul.rs0
-  __builtin_HEXAGON_M2_mmpyul_rs1(0, 0);
+  __builtin_HEXAGON_M2_mmpyul_rs0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyul.rs1
-  __builtin_HEXAGON_M2_mmpyul_s0(0, 0);
+  __builtin_HEXAGON_M2_mmpyul_rs1(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyul.s0
-  __builtin_HEXAGON_M2_mmpyul_s1(0, 0);
+  __builtin_HEXAGON_M2_mmpyul_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mmpyul.s1
-  __builtin_HEXAGON_M2_mpy_acc_hh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mmpyul_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.hh.s0
-  __builtin_HEXAGON_M2_mpy_acc_hh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_hh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.hh.s1
-  __builtin_HEXAGON_M2_mpy_acc_hl_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_hh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.hl.s0
-  __builtin_HEXAGON_M2_mpy_acc_hl_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_hl_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.hl.s1
-  __builtin_HEXAGON_M2_mpy_acc_lh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_hl_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.lh.s0
-  __builtin_HEXAGON_M2_mpy_acc_lh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_lh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.lh.s1
-  __builtin_HEXAGON_M2_mpy_acc_ll_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_lh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.ll.s0
-  __builtin_HEXAGON_M2_mpy_acc_ll_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_ll_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.ll.s1
-  __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_ll_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.hh.s0
-  __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.hh.s1
-  __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.hl.s0
-  __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.hl.s1
-  __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.lh.s0
-  __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.lh.s1
-  __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.ll.s0
-  __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.acc.sat.ll.s1
-  __builtin_HEXAGON_M2_mpyd_acc_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.hh.s0
-  __builtin_HEXAGON_M2_mpyd_acc_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.hh.s1
-  __builtin_HEXAGON_M2_mpyd_acc_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.hl.s0
-  __builtin_HEXAGON_M2_mpyd_acc_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.hl.s1
-  __builtin_HEXAGON_M2_mpyd_acc_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.lh.s0
-  __builtin_HEXAGON_M2_mpyd_acc_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.lh.s1
-  __builtin_HEXAGON_M2_mpyd_acc_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.ll.s0
-  __builtin_HEXAGON_M2_mpyd_acc_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.acc.ll.s1
-  __builtin_HEXAGON_M2_mpyd_hh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.hh.s0
-  __builtin_HEXAGON_M2_mpyd_hh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.hh.s1
-  __builtin_HEXAGON_M2_mpyd_hl_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.hl.s0
-  __builtin_HEXAGON_M2_mpyd_hl_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.hl.s1
-  __builtin_HEXAGON_M2_mpyd_lh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.lh.s0
-  __builtin_HEXAGON_M2_mpyd_lh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.lh.s1
-  __builtin_HEXAGON_M2_mpyd_ll_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.ll.s0
-  __builtin_HEXAGON_M2_mpyd_ll_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.ll.s1
-  __builtin_HEXAGON_M2_mpyd_nac_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.hh.s0
-  __builtin_HEXAGON_M2_mpyd_nac_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.hh.s1
-  __builtin_HEXAGON_M2_mpyd_nac_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.hl.s0
-  __builtin_HEXAGON_M2_mpyd_nac_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.hl.s1
-  __builtin_HEXAGON_M2_mpyd_nac_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.lh.s0
-  __builtin_HEXAGON_M2_mpyd_nac_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.lh.s1
-  __builtin_HEXAGON_M2_mpyd_nac_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.ll.s0
-  __builtin_HEXAGON_M2_mpyd_nac_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.nac.ll.s1
-  __builtin_HEXAGON_M2_mpyd_rnd_hh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hh.s0
-  __builtin_HEXAGON_M2_mpyd_rnd_hh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hh.s1
-  __builtin_HEXAGON_M2_mpyd_rnd_hl_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hl.s0
-  __builtin_HEXAGON_M2_mpyd_rnd_hl_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hl.s1
-  __builtin_HEXAGON_M2_mpyd_rnd_lh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.lh.s0
-  __builtin_HEXAGON_M2_mpyd_rnd_lh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.lh.s1
-  __builtin_HEXAGON_M2_mpyd_rnd_ll_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.ll.s0
-  __builtin_HEXAGON_M2_mpyd_rnd_ll_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyd.rnd.ll.s1
-  __builtin_HEXAGON_M2_mpy_hh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.hh.s0
-  __builtin_HEXAGON_M2_mpy_hh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_hh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.hh.s1
-  __builtin_HEXAGON_M2_mpy_hl_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_hh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.hl.s0
-  __builtin_HEXAGON_M2_mpy_hl_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_hl_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.hl.s1
-  __builtin_HEXAGON_M2_mpyi(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyi
-  __builtin_HEXAGON_M2_mpy_lh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_hl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.lh.s0
-  __builtin_HEXAGON_M2_mpy_lh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_lh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.lh.s1
-  __builtin_HEXAGON_M2_mpy_ll_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_lh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.ll.s0
-  __builtin_HEXAGON_M2_mpy_ll_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_ll_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.ll.s1
-  __builtin_HEXAGON_M2_mpy_nac_hh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_ll_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.hh.s0
-  __builtin_HEXAGON_M2_mpy_nac_hh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_hh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.hh.s1
-  __builtin_HEXAGON_M2_mpy_nac_hl_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_hh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.hl.s0
-  __builtin_HEXAGON_M2_mpy_nac_hl_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_hl_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.hl.s1
-  __builtin_HEXAGON_M2_mpy_nac_lh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_hl_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.lh.s0
-  __builtin_HEXAGON_M2_mpy_nac_lh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_lh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.lh.s1
-  __builtin_HEXAGON_M2_mpy_nac_ll_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_lh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.ll.s0
-  __builtin_HEXAGON_M2_mpy_nac_ll_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_ll_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.ll.s1
-  __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_ll_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.hh.s0
-  __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.hh.s1
-  __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.hl.s0
-  __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.hl.s1
-  __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.lh.s0
-  __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.lh.s1
-  __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.ll.s0
-  __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.nac.sat.ll.s1
-  __builtin_HEXAGON_M2_mpy_rnd_hh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.hh.s0
-  __builtin_HEXAGON_M2_mpy_rnd_hh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_hh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.hh.s1
-  __builtin_HEXAGON_M2_mpy_rnd_hl_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_hh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.hl.s0
-  __builtin_HEXAGON_M2_mpy_rnd_hl_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_hl_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.hl.s1
-  __builtin_HEXAGON_M2_mpy_rnd_lh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_hl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.lh.s0
-  __builtin_HEXAGON_M2_mpy_rnd_lh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_lh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.lh.s1
-  __builtin_HEXAGON_M2_mpy_rnd_ll_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_lh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.ll.s0
-  __builtin_HEXAGON_M2_mpy_rnd_ll_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_ll_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.rnd.ll.s1
-  __builtin_HEXAGON_M2_mpy_sat_hh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_rnd_ll_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.hh.s0
-  __builtin_HEXAGON_M2_mpy_sat_hh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_hh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.hh.s1
-  __builtin_HEXAGON_M2_mpy_sat_hl_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_hh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.hl.s0
-  __builtin_HEXAGON_M2_mpy_sat_hl_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_hl_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.hl.s1
-  __builtin_HEXAGON_M2_mpy_sat_lh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_hl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.lh.s0
-  __builtin_HEXAGON_M2_mpy_sat_lh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_lh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.lh.s1
-  __builtin_HEXAGON_M2_mpy_sat_ll_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_lh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.ll.s0
-  __builtin_HEXAGON_M2_mpy_sat_ll_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_ll_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.ll.s1
-  __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_ll_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.hh.s0
-  __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.hh.s1
-  __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.hl.s0
-  __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.hl.s1
-  __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.lh.s0
-  __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.lh.s1
-  __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.ll.s0
-  __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.sat.rnd.ll.s1
-  __builtin_HEXAGON_M2_mpysmi(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpysmi
-  __builtin_HEXAGON_M2_mpysu_up(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpysu.up
-  __builtin_HEXAGON_M2_mpyu_acc_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.hh.s0
-  __builtin_HEXAGON_M2_mpyu_acc_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.hh.s1
-  __builtin_HEXAGON_M2_mpyu_acc_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.hl.s0
-  __builtin_HEXAGON_M2_mpyu_acc_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.hl.s1
-  __builtin_HEXAGON_M2_mpyu_acc_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.lh.s0
-  __builtin_HEXAGON_M2_mpyu_acc_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.lh.s1
-  __builtin_HEXAGON_M2_mpyu_acc_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.ll.s0
-  __builtin_HEXAGON_M2_mpyu_acc_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.acc.ll.s1
-  __builtin_HEXAGON_M2_mpyud_acc_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.hh.s0
-  __builtin_HEXAGON_M2_mpyud_acc_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.hh.s1
-  __builtin_HEXAGON_M2_mpyud_acc_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.hl.s0
-  __builtin_HEXAGON_M2_mpyud_acc_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.hl.s1
-  __builtin_HEXAGON_M2_mpyud_acc_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.lh.s0
-  __builtin_HEXAGON_M2_mpyud_acc_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.lh.s1
-  __builtin_HEXAGON_M2_mpyud_acc_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.ll.s0
-  __builtin_HEXAGON_M2_mpyud_acc_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.acc.ll.s1
-  __builtin_HEXAGON_M2_mpyud_hh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.hh.s0
-  __builtin_HEXAGON_M2_mpyud_hh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.hh.s1
-  __builtin_HEXAGON_M2_mpyud_hl_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.hl.s0
-  __builtin_HEXAGON_M2_mpyud_hl_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.hl.s1
-  __builtin_HEXAGON_M2_mpyud_lh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.lh.s0
-  __builtin_HEXAGON_M2_mpyud_lh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.lh.s1
-  __builtin_HEXAGON_M2_mpyud_ll_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.ll.s0
-  __builtin_HEXAGON_M2_mpyud_ll_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.ll.s1
-  __builtin_HEXAGON_M2_mpyud_nac_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.hh.s0
-  __builtin_HEXAGON_M2_mpyud_nac_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.hh.s1
-  __builtin_HEXAGON_M2_mpyud_nac_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.hl.s0
-  __builtin_HEXAGON_M2_mpyud_nac_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.hl.s1
-  __builtin_HEXAGON_M2_mpyud_nac_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.lh.s0
-  __builtin_HEXAGON_M2_mpyud_nac_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.lh.s1
-  __builtin_HEXAGON_M2_mpyud_nac_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.ll.s0
-  __builtin_HEXAGON_M2_mpyud_nac_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyud.nac.ll.s1
-  __builtin_HEXAGON_M2_mpyu_hh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.hh.s0
-  __builtin_HEXAGON_M2_mpyu_hh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.hh.s1
-  __builtin_HEXAGON_M2_mpyu_hl_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.hl.s0
-  __builtin_HEXAGON_M2_mpyu_hl_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.hl.s1
-  __builtin_HEXAGON_M2_mpyui(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyui
-  __builtin_HEXAGON_M2_mpyu_lh_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.lh.s0
-  __builtin_HEXAGON_M2_mpyu_lh_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.lh.s1
-  __builtin_HEXAGON_M2_mpyu_ll_s0(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.ll.s0
-  __builtin_HEXAGON_M2_mpyu_ll_s1(0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.ll.s1
-  __builtin_HEXAGON_M2_mpyu_nac_hh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.hh.s0
-  __builtin_HEXAGON_M2_mpyu_nac_hh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.hh.s1
-  __builtin_HEXAGON_M2_mpyu_nac_hl_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.hl.s0
-  __builtin_HEXAGON_M2_mpyu_nac_hl_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.hl.s1
-  __builtin_HEXAGON_M2_mpyu_nac_lh_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.lh.s0
-  __builtin_HEXAGON_M2_mpyu_nac_lh_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.lh.s1
-  __builtin_HEXAGON_M2_mpyu_nac_ll_s0(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.ll.s0
-  __builtin_HEXAGON_M2_mpyu_nac_ll_s1(0, 0, 0);
-  // CHECK: @llvm.hexagon.M2.mpyu.nac.ll.s1
-  __builtin_HEXAGON_M2_mpy_up(0, 0);
+  __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.up
-  __builtin_HEXAGON_M2_mpy_up_s1(0, 0);
+  __builtin_HEXAGON_M2_mpy_up(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.up.s1
-  __builtin_HEXAGON_M2_mpy_up_s1_sat(0, 0);
+  __builtin_HEXAGON_M2_mpy_up_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.mpy.up.s1.sat
-  __builtin_HEXAGON_M2_mpyu_up(0, 0);
+  __builtin_HEXAGON_M2_mpy_up_s1_sat(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.hh.s0
+  __builtin_HEXAGON_M2_mpyd_acc_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.hh.s1
+  __builtin_HEXAGON_M2_mpyd_acc_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.hl.s0
+  __builtin_HEXAGON_M2_mpyd_acc_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.hl.s1
+  __builtin_HEXAGON_M2_mpyd_acc_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.lh.s0
+  __builtin_HEXAGON_M2_mpyd_acc_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.lh.s1
+  __builtin_HEXAGON_M2_mpyd_acc_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.ll.s0
+  __builtin_HEXAGON_M2_mpyd_acc_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.acc.ll.s1
+  __builtin_HEXAGON_M2_mpyd_acc_ll_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.hh.s0
+  __builtin_HEXAGON_M2_mpyd_hh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.hh.s1
+  __builtin_HEXAGON_M2_mpyd_hh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.hl.s0
+  __builtin_HEXAGON_M2_mpyd_hl_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.hl.s1
+  __builtin_HEXAGON_M2_mpyd_hl_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.lh.s0
+  __builtin_HEXAGON_M2_mpyd_lh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.lh.s1
+  __builtin_HEXAGON_M2_mpyd_lh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.ll.s0
+  __builtin_HEXAGON_M2_mpyd_ll_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.ll.s1
+  __builtin_HEXAGON_M2_mpyd_ll_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.hh.s0
+  __builtin_HEXAGON_M2_mpyd_nac_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.hh.s1
+  __builtin_HEXAGON_M2_mpyd_nac_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.hl.s0
+  __builtin_HEXAGON_M2_mpyd_nac_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.hl.s1
+  __builtin_HEXAGON_M2_mpyd_nac_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.lh.s0
+  __builtin_HEXAGON_M2_mpyd_nac_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.lh.s1
+  __builtin_HEXAGON_M2_mpyd_nac_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.ll.s0
+  __builtin_HEXAGON_M2_mpyd_nac_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.nac.ll.s1
+  __builtin_HEXAGON_M2_mpyd_nac_ll_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hh.s0
+  __builtin_HEXAGON_M2_mpyd_rnd_hh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hh.s1
+  __builtin_HEXAGON_M2_mpyd_rnd_hh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hl.s0
+  __builtin_HEXAGON_M2_mpyd_rnd_hl_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.hl.s1
+  __builtin_HEXAGON_M2_mpyd_rnd_hl_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.lh.s0
+  __builtin_HEXAGON_M2_mpyd_rnd_lh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.lh.s1
+  __builtin_HEXAGON_M2_mpyd_rnd_lh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.ll.s0
+  __builtin_HEXAGON_M2_mpyd_rnd_ll_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyd.rnd.ll.s1
+  __builtin_HEXAGON_M2_mpyd_rnd_ll_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyi
+  __builtin_HEXAGON_M2_mpyi(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpysmi
+  __builtin_HEXAGON_M2_mpysmi(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpysu.up
+  __builtin_HEXAGON_M2_mpysu_up(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.hh.s0
+  __builtin_HEXAGON_M2_mpyu_acc_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.hh.s1
+  __builtin_HEXAGON_M2_mpyu_acc_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.hl.s0
+  __builtin_HEXAGON_M2_mpyu_acc_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.hl.s1
+  __builtin_HEXAGON_M2_mpyu_acc_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.lh.s0
+  __builtin_HEXAGON_M2_mpyu_acc_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.lh.s1
+  __builtin_HEXAGON_M2_mpyu_acc_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.ll.s0
+  __builtin_HEXAGON_M2_mpyu_acc_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.acc.ll.s1
+  __builtin_HEXAGON_M2_mpyu_acc_ll_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.hh.s0
+  __builtin_HEXAGON_M2_mpyu_hh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.hh.s1
+  __builtin_HEXAGON_M2_mpyu_hh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.hl.s0
+  __builtin_HEXAGON_M2_mpyu_hl_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.hl.s1
+  __builtin_HEXAGON_M2_mpyu_hl_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.lh.s0
+  __builtin_HEXAGON_M2_mpyu_lh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.lh.s1
+  __builtin_HEXAGON_M2_mpyu_lh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.ll.s0
+  __builtin_HEXAGON_M2_mpyu_ll_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.ll.s1
+  __builtin_HEXAGON_M2_mpyu_ll_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.hh.s0
+  __builtin_HEXAGON_M2_mpyu_nac_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.hh.s1
+  __builtin_HEXAGON_M2_mpyu_nac_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.hl.s0
+  __builtin_HEXAGON_M2_mpyu_nac_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.hl.s1
+  __builtin_HEXAGON_M2_mpyu_nac_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.lh.s0
+  __builtin_HEXAGON_M2_mpyu_nac_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.lh.s1
+  __builtin_HEXAGON_M2_mpyu_nac_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.ll.s0
+  __builtin_HEXAGON_M2_mpyu_nac_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyu.nac.ll.s1
+  __builtin_HEXAGON_M2_mpyu_nac_ll_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.mpyu.up
-  __builtin_HEXAGON_M2_nacci(0, 0, 0);
+  __builtin_HEXAGON_M2_mpyu_up(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.hh.s0
+  __builtin_HEXAGON_M2_mpyud_acc_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.hh.s1
+  __builtin_HEXAGON_M2_mpyud_acc_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.hl.s0
+  __builtin_HEXAGON_M2_mpyud_acc_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.hl.s1
+  __builtin_HEXAGON_M2_mpyud_acc_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.lh.s0
+  __builtin_HEXAGON_M2_mpyud_acc_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.lh.s1
+  __builtin_HEXAGON_M2_mpyud_acc_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.ll.s0
+  __builtin_HEXAGON_M2_mpyud_acc_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.acc.ll.s1
+  __builtin_HEXAGON_M2_mpyud_acc_ll_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.hh.s0
+  __builtin_HEXAGON_M2_mpyud_hh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.hh.s1
+  __builtin_HEXAGON_M2_mpyud_hh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.hl.s0
+  __builtin_HEXAGON_M2_mpyud_hl_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.hl.s1
+  __builtin_HEXAGON_M2_mpyud_hl_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.lh.s0
+  __builtin_HEXAGON_M2_mpyud_lh_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.lh.s1
+  __builtin_HEXAGON_M2_mpyud_lh_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.ll.s0
+  __builtin_HEXAGON_M2_mpyud_ll_s0(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.ll.s1
+  __builtin_HEXAGON_M2_mpyud_ll_s1(0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.hh.s0
+  __builtin_HEXAGON_M2_mpyud_nac_hh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.hh.s1
+  __builtin_HEXAGON_M2_mpyud_nac_hh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.hl.s0
+  __builtin_HEXAGON_M2_mpyud_nac_hl_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.hl.s1
+  __builtin_HEXAGON_M2_mpyud_nac_hl_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.lh.s0
+  __builtin_HEXAGON_M2_mpyud_nac_lh_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.lh.s1
+  __builtin_HEXAGON_M2_mpyud_nac_lh_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.ll.s0
+  __builtin_HEXAGON_M2_mpyud_nac_ll_s0(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyud.nac.ll.s1
+  __builtin_HEXAGON_M2_mpyud_nac_ll_s1(0, 0, 0);
+  // CHECK: @llvm.hexagon.M2.mpyui
+  __builtin_HEXAGON_M2_mpyui(0, 0);
   // CHECK: @llvm.hexagon.M2.nacci
-  __builtin_HEXAGON_M2_naccii(0, 0, 0);
+  __builtin_HEXAGON_M2_nacci(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.naccii
-  __builtin_HEXAGON_M2_subacc(0, 0, 0);
+  __builtin_HEXAGON_M2_naccii(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.subacc
-  __builtin_HEXAGON_M2_vabsdiffh(0, 0);
+  __builtin_HEXAGON_M2_subacc(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vabsdiffh
-  __builtin_HEXAGON_M2_vabsdiffw(0, 0);
+  __builtin_HEXAGON_M2_vabsdiffh(0, 0);
   // CHECK: @llvm.hexagon.M2.vabsdiffw
-  __builtin_HEXAGON_M2_vcmac_s0_sat_i(0, 0, 0);
+  __builtin_HEXAGON_M2_vabsdiffw(0, 0);
   // CHECK: @llvm.hexagon.M2.vcmac.s0.sat.i
-  __builtin_HEXAGON_M2_vcmac_s0_sat_r(0, 0, 0);
+  __builtin_HEXAGON_M2_vcmac_s0_sat_i(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vcmac.s0.sat.r
-  __builtin_HEXAGON_M2_vcmpy_s0_sat_i(0, 0);
+  __builtin_HEXAGON_M2_vcmac_s0_sat_r(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vcmpy.s0.sat.i
-  __builtin_HEXAGON_M2_vcmpy_s0_sat_r(0, 0);
+  __builtin_HEXAGON_M2_vcmpy_s0_sat_i(0, 0);
   // CHECK: @llvm.hexagon.M2.vcmpy.s0.sat.r
-  __builtin_HEXAGON_M2_vcmpy_s1_sat_i(0, 0);
+  __builtin_HEXAGON_M2_vcmpy_s0_sat_r(0, 0);
   // CHECK: @llvm.hexagon.M2.vcmpy.s1.sat.i
-  __builtin_HEXAGON_M2_vcmpy_s1_sat_r(0, 0);
+  __builtin_HEXAGON_M2_vcmpy_s1_sat_i(0, 0);
   // CHECK: @llvm.hexagon.M2.vcmpy.s1.sat.r
-  __builtin_HEXAGON_M2_vdmacs_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vcmpy_s1_sat_r(0, 0);
   // CHECK: @llvm.hexagon.M2.vdmacs.s0
-  __builtin_HEXAGON_M2_vdmacs_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_vdmacs_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vdmacs.s1
-  __builtin_HEXAGON_M2_vdmpyrs_s0(0, 0);
+  __builtin_HEXAGON_M2_vdmacs_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vdmpyrs.s0
-  __builtin_HEXAGON_M2_vdmpyrs_s1(0, 0);
+  __builtin_HEXAGON_M2_vdmpyrs_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vdmpyrs.s1
-  __builtin_HEXAGON_M2_vdmpys_s0(0, 0);
+  __builtin_HEXAGON_M2_vdmpyrs_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vdmpys.s0
-  __builtin_HEXAGON_M2_vdmpys_s1(0, 0);
+  __builtin_HEXAGON_M2_vdmpys_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vdmpys.s1
-  __builtin_HEXAGON_M2_vmac2(0, 0, 0);
+  __builtin_HEXAGON_M2_vdmpys_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2
-  __builtin_HEXAGON_M2_vmac2es(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2es
-  __builtin_HEXAGON_M2_vmac2es_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2es(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2es.s0
-  __builtin_HEXAGON_M2_vmac2es_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2es_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2es.s1
-  __builtin_HEXAGON_M2_vmac2s_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2es_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2s.s0
-  __builtin_HEXAGON_M2_vmac2s_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2s_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2s.s1
-  __builtin_HEXAGON_M2_vmac2su_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2s_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2su.s0
-  __builtin_HEXAGON_M2_vmac2su_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_vmac2su_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmac2su.s1
-  __builtin_HEXAGON_M2_vmpy2es_s0(0, 0);
+  __builtin_HEXAGON_M2_vmac2su_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2es.s0
-  __builtin_HEXAGON_M2_vmpy2es_s1(0, 0);
+  __builtin_HEXAGON_M2_vmpy2es_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2es.s1
-  __builtin_HEXAGON_M2_vmpy2s_s0(0, 0);
+  __builtin_HEXAGON_M2_vmpy2es_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2s.s0
-  __builtin_HEXAGON_M2_vmpy2s_s0pack(0, 0);
+  __builtin_HEXAGON_M2_vmpy2s_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2s.s0pack
-  __builtin_HEXAGON_M2_vmpy2s_s1(0, 0);
+  __builtin_HEXAGON_M2_vmpy2s_s0pack(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2s.s1
-  __builtin_HEXAGON_M2_vmpy2s_s1pack(0, 0);
+  __builtin_HEXAGON_M2_vmpy2s_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2s.s1pack
-  __builtin_HEXAGON_M2_vmpy2su_s0(0, 0);
+  __builtin_HEXAGON_M2_vmpy2s_s1pack(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2su.s0
-  __builtin_HEXAGON_M2_vmpy2su_s1(0, 0);
+  __builtin_HEXAGON_M2_vmpy2su_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vmpy2su.s1
-  __builtin_HEXAGON_M2_vraddh(0, 0);
+  __builtin_HEXAGON_M2_vmpy2su_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vraddh
-  __builtin_HEXAGON_M2_vradduh(0, 0);
+  __builtin_HEXAGON_M2_vraddh(0, 0);
   // CHECK: @llvm.hexagon.M2.vradduh
-  __builtin_HEXAGON_M2_vrcmaci_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vradduh(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmaci.s0
-  __builtin_HEXAGON_M2_vrcmaci_s0c(0, 0, 0);
+  __builtin_HEXAGON_M2_vrcmaci_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmaci.s0c
-  __builtin_HEXAGON_M2_vrcmacr_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vrcmaci_s0c(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmacr.s0
-  __builtin_HEXAGON_M2_vrcmacr_s0c(0, 0, 0);
+  __builtin_HEXAGON_M2_vrcmacr_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmacr.s0c
-  __builtin_HEXAGON_M2_vrcmpyi_s0(0, 0);
+  __builtin_HEXAGON_M2_vrcmacr_s0c(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpyi.s0
-  __builtin_HEXAGON_M2_vrcmpyi_s0c(0, 0);
+  __builtin_HEXAGON_M2_vrcmpyi_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpyi.s0c
-  __builtin_HEXAGON_M2_vrcmpyr_s0(0, 0);
+  __builtin_HEXAGON_M2_vrcmpyi_s0c(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpyr.s0
-  __builtin_HEXAGON_M2_vrcmpyr_s0c(0, 0);
+  __builtin_HEXAGON_M2_vrcmpyr_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpyr.s0c
-  __builtin_HEXAGON_M2_vrcmpys_acc_s1(0, 0, 0);
+  __builtin_HEXAGON_M2_vrcmpyr_s0c(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpys.acc.s1
-  __builtin_HEXAGON_M2_vrcmpys_s1(0, 0);
+  __builtin_HEXAGON_M2_vrcmpys_acc_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpys.s1
-  __builtin_HEXAGON_M2_vrcmpys_s1rp(0, 0);
+  __builtin_HEXAGON_M2_vrcmpys_s1(0, 0);
   // CHECK: @llvm.hexagon.M2.vrcmpys.s1rp
-  __builtin_HEXAGON_M2_vrmac_s0(0, 0, 0);
+  __builtin_HEXAGON_M2_vrcmpys_s1rp(0, 0);
   // CHECK: @llvm.hexagon.M2.vrmac.s0
-  __builtin_HEXAGON_M2_vrmpy_s0(0, 0);
+  __builtin_HEXAGON_M2_vrmac_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M2.vrmpy.s0
-  __builtin_HEXAGON_M2_xor_xacc(0, 0, 0);
+  __builtin_HEXAGON_M2_vrmpy_s0(0, 0);
   // CHECK: @llvm.hexagon.M2.xor.xacc
-  __builtin_HEXAGON_M4_and_and(0, 0, 0);
+  __builtin_HEXAGON_M2_xor_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.and.and
-  __builtin_HEXAGON_M4_and_andn(0, 0, 0);
+  __builtin_HEXAGON_M4_and_and(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.and.andn
-  __builtin_HEXAGON_M4_and_or(0, 0, 0);
+  __builtin_HEXAGON_M4_and_andn(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.and.or
-  __builtin_HEXAGON_M4_and_xor(0, 0, 0);
+  __builtin_HEXAGON_M4_and_or(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.and.xor
-  __builtin_HEXAGON_M4_cmpyi_wh(0, 0);
+  __builtin_HEXAGON_M4_and_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.cmpyi.wh
-  __builtin_HEXAGON_M4_cmpyi_whc(0, 0);
+  __builtin_HEXAGON_M4_cmpyi_wh(0, 0);
   // CHECK: @llvm.hexagon.M4.cmpyi.whc
-  __builtin_HEXAGON_M4_cmpyr_wh(0, 0);
+  __builtin_HEXAGON_M4_cmpyi_whc(0, 0);
   // CHECK: @llvm.hexagon.M4.cmpyr.wh
-  __builtin_HEXAGON_M4_cmpyr_whc(0, 0);
+  __builtin_HEXAGON_M4_cmpyr_wh(0, 0);
   // CHECK: @llvm.hexagon.M4.cmpyr.whc
-  __builtin_HEXAGON_M4_mac_up_s1_sat(0, 0, 0);
+  __builtin_HEXAGON_M4_cmpyr_whc(0, 0);
   // CHECK: @llvm.hexagon.M4.mac.up.s1.sat
-  __builtin_HEXAGON_M4_mpyri_addi(0, 0, 0);
+  __builtin_HEXAGON_M4_mac_up_s1_sat(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.mpyri.addi
-  __builtin_HEXAGON_M4_mpyri_addr(0, 0, 0);
+  __builtin_HEXAGON_M4_mpyri_addi(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.mpyri.addr
-  __builtin_HEXAGON_M4_mpyri_addr_u2(0, 0, 0);
+  __builtin_HEXAGON_M4_mpyri_addr(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.mpyri.addr.u2
-  __builtin_HEXAGON_M4_mpyrr_addi(0, 0, 0);
+  __builtin_HEXAGON_M4_mpyri_addr_u2(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.mpyrr.addi
-  __builtin_HEXAGON_M4_mpyrr_addr(0, 0, 0);
+  __builtin_HEXAGON_M4_mpyrr_addi(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.mpyrr.addr
-  __builtin_HEXAGON_M4_nac_up_s1_sat(0, 0, 0);
+  __builtin_HEXAGON_M4_mpyrr_addr(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.nac.up.s1.sat
-  __builtin_HEXAGON_M4_or_and(0, 0, 0);
+  __builtin_HEXAGON_M4_nac_up_s1_sat(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.or.and
-  __builtin_HEXAGON_M4_or_andn(0, 0, 0);
+  __builtin_HEXAGON_M4_or_and(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.or.andn
-  __builtin_HEXAGON_M4_or_or(0, 0, 0);
+  __builtin_HEXAGON_M4_or_andn(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.or.or
-  __builtin_HEXAGON_M4_or_xor(0, 0, 0);
+  __builtin_HEXAGON_M4_or_or(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.or.xor
-  __builtin_HEXAGON_M4_pmpyw(0, 0);
+  __builtin_HEXAGON_M4_or_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.pmpyw
-  __builtin_HEXAGON_M4_pmpyw_acc(0, 0, 0);
+  __builtin_HEXAGON_M4_pmpyw(0, 0);
   // CHECK: @llvm.hexagon.M4.pmpyw.acc
-  __builtin_HEXAGON_M4_vpmpyh(0, 0);
+  __builtin_HEXAGON_M4_pmpyw_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vpmpyh
-  __builtin_HEXAGON_M4_vpmpyh_acc(0, 0, 0);
+  __builtin_HEXAGON_M4_vpmpyh(0, 0);
   // CHECK: @llvm.hexagon.M4.vpmpyh.acc
-  __builtin_HEXAGON_M4_vrmpyeh_acc_s0(0, 0, 0);
+  __builtin_HEXAGON_M4_vpmpyh_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyeh.acc.s0
-  __builtin_HEXAGON_M4_vrmpyeh_acc_s1(0, 0, 0);
+  __builtin_HEXAGON_M4_vrmpyeh_acc_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyeh.acc.s1
-  __builtin_HEXAGON_M4_vrmpyeh_s0(0, 0);
+  __builtin_HEXAGON_M4_vrmpyeh_acc_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyeh.s0
-  __builtin_HEXAGON_M4_vrmpyeh_s1(0, 0);
+  __builtin_HEXAGON_M4_vrmpyeh_s0(0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyeh.s1
-  __builtin_HEXAGON_M4_vrmpyoh_acc_s0(0, 0, 0);
+  __builtin_HEXAGON_M4_vrmpyeh_s1(0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyoh.acc.s0
-  __builtin_HEXAGON_M4_vrmpyoh_acc_s1(0, 0, 0);
+  __builtin_HEXAGON_M4_vrmpyoh_acc_s0(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyoh.acc.s1
-  __builtin_HEXAGON_M4_vrmpyoh_s0(0, 0);
+  __builtin_HEXAGON_M4_vrmpyoh_acc_s1(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyoh.s0
-  __builtin_HEXAGON_M4_vrmpyoh_s1(0, 0);
+  __builtin_HEXAGON_M4_vrmpyoh_s0(0, 0);
   // CHECK: @llvm.hexagon.M4.vrmpyoh.s1
-  __builtin_HEXAGON_M4_xor_and(0, 0, 0);
+  __builtin_HEXAGON_M4_vrmpyoh_s1(0, 0);
   // CHECK: @llvm.hexagon.M4.xor.and
-  __builtin_HEXAGON_M4_xor_andn(0, 0, 0);
+  __builtin_HEXAGON_M4_xor_and(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.xor.andn
-  __builtin_HEXAGON_M4_xor_or(0, 0, 0);
+  __builtin_HEXAGON_M4_xor_andn(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.xor.or
-  __builtin_HEXAGON_M4_xor_xacc(0, 0, 0);
+  __builtin_HEXAGON_M4_xor_or(0, 0, 0);
   // CHECK: @llvm.hexagon.M4.xor.xacc
-  __builtin_HEXAGON_M5_vdmacbsu(0, 0, 0);
+  __builtin_HEXAGON_M4_xor_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vdmacbsu
-  __builtin_HEXAGON_M5_vdmpybsu(0, 0);
+  __builtin_HEXAGON_M5_vdmacbsu(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vdmpybsu
-  __builtin_HEXAGON_M5_vmacbsu(0, 0, 0);
+  __builtin_HEXAGON_M5_vdmpybsu(0, 0);
   // CHECK: @llvm.hexagon.M5.vmacbsu
-  __builtin_HEXAGON_M5_vmacbuu(0, 0, 0);
+  __builtin_HEXAGON_M5_vmacbsu(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vmacbuu
-  __builtin_HEXAGON_M5_vmpybsu(0, 0);
+  __builtin_HEXAGON_M5_vmacbuu(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vmpybsu
-  __builtin_HEXAGON_M5_vmpybuu(0, 0);
+  __builtin_HEXAGON_M5_vmpybsu(0, 0);
   // CHECK: @llvm.hexagon.M5.vmpybuu
-  __builtin_HEXAGON_M5_vrmacbsu(0, 0, 0);
+  __builtin_HEXAGON_M5_vmpybuu(0, 0);
   // CHECK: @llvm.hexagon.M5.vrmacbsu
-  __builtin_HEXAGON_M5_vrmacbuu(0, 0, 0);
+  __builtin_HEXAGON_M5_vrmacbsu(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vrmacbuu
-  __builtin_HEXAGON_M5_vrmpybsu(0, 0);
+  __builtin_HEXAGON_M5_vrmacbuu(0, 0, 0);
   // CHECK: @llvm.hexagon.M5.vrmpybsu
-  __builtin_HEXAGON_M5_vrmpybuu(0, 0);
+  __builtin_HEXAGON_M5_vrmpybsu(0, 0);
   // CHECK: @llvm.hexagon.M5.vrmpybuu
-  __builtin_HEXAGON_S2_addasl_rrri(0, 0, 0);
+  __builtin_HEXAGON_M5_vrmpybuu(0, 0);
+  // CHECK: @llvm.hexagon.M6.vabsdiffb
+  __builtin_HEXAGON_M6_vabsdiffb(0, 0);
+  // CHECK: @llvm.hexagon.M6.vabsdiffub
+  __builtin_HEXAGON_M6_vabsdiffub(0, 0);
   // CHECK: @llvm.hexagon.S2.addasl.rrri
-  __builtin_HEXAGON_S2_asl_i_p(0, 0);
+  __builtin_HEXAGON_S2_addasl_rrri(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p
-  __builtin_HEXAGON_S2_asl_i_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_p(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p.acc
-  __builtin_HEXAGON_S2_asl_i_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p.and
-  __builtin_HEXAGON_S2_asl_i_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p.nac
-  __builtin_HEXAGON_S2_asl_i_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p.or
-  __builtin_HEXAGON_S2_asl_i_p_xacc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.p.xacc
-  __builtin_HEXAGON_S2_asl_i_r(0, 0);
+  __builtin_HEXAGON_S2_asl_i_p_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r
-  __builtin_HEXAGON_S2_asl_i_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_r(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.acc
-  __builtin_HEXAGON_S2_asl_i_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.and
-  __builtin_HEXAGON_S2_asl_i_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.nac
-  __builtin_HEXAGON_S2_asl_i_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.or
-  __builtin_HEXAGON_S2_asl_i_r_sat(0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.sat
-  __builtin_HEXAGON_S2_asl_i_r_xacc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_sat(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.r.xacc
-  __builtin_HEXAGON_S2_asl_i_vh(0, 0);
+  __builtin_HEXAGON_S2_asl_i_r_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.vh
-  __builtin_HEXAGON_S2_asl_i_vw(0, 0);
+  __builtin_HEXAGON_S2_asl_i_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.i.vw
-  __builtin_HEXAGON_S2_asl_r_p(0, 0);
+  __builtin_HEXAGON_S2_asl_i_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p
-  __builtin_HEXAGON_S2_asl_r_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_p(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p.acc
-  __builtin_HEXAGON_S2_asl_r_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p.and
-  __builtin_HEXAGON_S2_asl_r_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p.nac
-  __builtin_HEXAGON_S2_asl_r_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p.or
-  __builtin_HEXAGON_S2_asl_r_p_xor(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.p.xor
-  __builtin_HEXAGON_S2_asl_r_r(0, 0);
+  __builtin_HEXAGON_S2_asl_r_p_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r
-  __builtin_HEXAGON_S2_asl_r_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_r(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r.acc
-  __builtin_HEXAGON_S2_asl_r_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r.and
-  __builtin_HEXAGON_S2_asl_r_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r.nac
-  __builtin_HEXAGON_S2_asl_r_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asl_r_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r.or
-  __builtin_HEXAGON_S2_asl_r_r_sat(0, 0);
+  __builtin_HEXAGON_S2_asl_r_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.r.sat
-  __builtin_HEXAGON_S2_asl_r_vh(0, 0);
+  __builtin_HEXAGON_S2_asl_r_r_sat(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.vh
-  __builtin_HEXAGON_S2_asl_r_vw(0, 0);
+  __builtin_HEXAGON_S2_asl_r_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.asl.r.vw
-  __builtin_HEXAGON_S2_asr_i_p(0, 0);
+  __builtin_HEXAGON_S2_asl_r_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p
-  __builtin_HEXAGON_S2_asr_i_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_p(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.acc
-  __builtin_HEXAGON_S2_asr_i_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.and
-  __builtin_HEXAGON_S2_asr_i_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.nac
-  __builtin_HEXAGON_S2_asr_i_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.or
-  __builtin_HEXAGON_S2_asr_i_p_rnd(0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.rnd
-  __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax(0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_rnd(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.p.rnd.goodsyntax
-  __builtin_HEXAGON_S2_asr_i_r(0, 0);
+  __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r
-  __builtin_HEXAGON_S2_asr_i_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_r(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.acc
-  __builtin_HEXAGON_S2_asr_i_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.and
-  __builtin_HEXAGON_S2_asr_i_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.nac
-  __builtin_HEXAGON_S2_asr_i_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.or
-  __builtin_HEXAGON_S2_asr_i_r_rnd(0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.rnd
-  __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax(0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_rnd(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.r.rnd.goodsyntax
-  __builtin_HEXAGON_S2_asr_i_svw_trun(0, 0);
+  __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.svw.trun
-  __builtin_HEXAGON_S2_asr_i_vh(0, 0);
+  __builtin_HEXAGON_S2_asr_i_svw_trun(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.vh
-  __builtin_HEXAGON_S2_asr_i_vw(0, 0);
+  __builtin_HEXAGON_S2_asr_i_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.i.vw
-  __builtin_HEXAGON_S2_asr_r_p(0, 0);
+  __builtin_HEXAGON_S2_asr_i_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p
-  __builtin_HEXAGON_S2_asr_r_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_p(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p.acc
-  __builtin_HEXAGON_S2_asr_r_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p.and
-  __builtin_HEXAGON_S2_asr_r_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p.nac
-  __builtin_HEXAGON_S2_asr_r_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p.or
-  __builtin_HEXAGON_S2_asr_r_p_xor(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.p.xor
-  __builtin_HEXAGON_S2_asr_r_r(0, 0);
+  __builtin_HEXAGON_S2_asr_r_p_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r
-  __builtin_HEXAGON_S2_asr_r_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_r(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r.acc
-  __builtin_HEXAGON_S2_asr_r_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r.and
-  __builtin_HEXAGON_S2_asr_r_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r.nac
-  __builtin_HEXAGON_S2_asr_r_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_asr_r_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r.or
-  __builtin_HEXAGON_S2_asr_r_r_sat(0, 0);
+  __builtin_HEXAGON_S2_asr_r_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.r.sat
-  __builtin_HEXAGON_S2_asr_r_svw_trun(0, 0);
+  __builtin_HEXAGON_S2_asr_r_r_sat(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.svw.trun
-  __builtin_HEXAGON_S2_asr_r_vh(0, 0);
+  __builtin_HEXAGON_S2_asr_r_svw_trun(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.vh
-  __builtin_HEXAGON_S2_asr_r_vw(0, 0);
+  __builtin_HEXAGON_S2_asr_r_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.asr.r.vw
-  __builtin_HEXAGON_S2_brev(0);
+  __builtin_HEXAGON_S2_asr_r_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.brev
-  __builtin_HEXAGON_S2_brevp(0);
+  __builtin_HEXAGON_S2_brev(0);
   // CHECK: @llvm.hexagon.S2.brevp
-  __builtin_HEXAGON_S2_cabacencbin(0, 0, 0);
+  __builtin_HEXAGON_S2_brevp(0);
   // CHECK: @llvm.hexagon.S2.cabacencbin
-  __builtin_HEXAGON_S2_cl0(0);
+  __builtin_HEXAGON_S2_cabacencbin(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.cl0
-  __builtin_HEXAGON_S2_cl0p(0);
+  __builtin_HEXAGON_S2_cl0(0);
   // CHECK: @llvm.hexagon.S2.cl0p
-  __builtin_HEXAGON_S2_cl1(0);
+  __builtin_HEXAGON_S2_cl0p(0);
   // CHECK: @llvm.hexagon.S2.cl1
-  __builtin_HEXAGON_S2_cl1p(0);
+  __builtin_HEXAGON_S2_cl1(0);
   // CHECK: @llvm.hexagon.S2.cl1p
-  __builtin_HEXAGON_S2_clb(0);
+  __builtin_HEXAGON_S2_cl1p(0);
   // CHECK: @llvm.hexagon.S2.clb
-  __builtin_HEXAGON_S2_clbnorm(0);
+  __builtin_HEXAGON_S2_clb(0);
   // CHECK: @llvm.hexagon.S2.clbnorm
-  __builtin_HEXAGON_S2_clbp(0);
+  __builtin_HEXAGON_S2_clbnorm(0);
   // CHECK: @llvm.hexagon.S2.clbp
-  __builtin_HEXAGON_S2_clrbit_i(0, 0);
+  __builtin_HEXAGON_S2_clbp(0);
   // CHECK: @llvm.hexagon.S2.clrbit.i
-  __builtin_HEXAGON_S2_clrbit_r(0, 0);
+  __builtin_HEXAGON_S2_clrbit_i(0, 0);
   // CHECK: @llvm.hexagon.S2.clrbit.r
-  __builtin_HEXAGON_S2_ct0(0);
+  __builtin_HEXAGON_S2_clrbit_r(0, 0);
   // CHECK: @llvm.hexagon.S2.ct0
-  __builtin_HEXAGON_S2_ct0p(0);
+  __builtin_HEXAGON_S2_ct0(0);
   // CHECK: @llvm.hexagon.S2.ct0p
-  __builtin_HEXAGON_S2_ct1(0);
+  __builtin_HEXAGON_S2_ct0p(0);
   // CHECK: @llvm.hexagon.S2.ct1
-  __builtin_HEXAGON_S2_ct1p(0);
+  __builtin_HEXAGON_S2_ct1(0);
   // CHECK: @llvm.hexagon.S2.ct1p
-  __builtin_HEXAGON_S2_deinterleave(0);
+  __builtin_HEXAGON_S2_ct1p(0);
   // CHECK: @llvm.hexagon.S2.deinterleave
-  __builtin_HEXAGON_S2_extractu(0, 0, 0);
+  __builtin_HEXAGON_S2_deinterleave(0);
   // CHECK: @llvm.hexagon.S2.extractu
-  __builtin_HEXAGON_S2_extractup(0, 0, 0);
-  // CHECK: @llvm.hexagon.S2.extractup
-  __builtin_HEXAGON_S2_extractup_rp(0, 0);
-  // CHECK: @llvm.hexagon.S2.extractup.rp
-  __builtin_HEXAGON_S2_extractu_rp(0, 0);
+  __builtin_HEXAGON_S2_extractu(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.extractu.rp
-  __builtin_HEXAGON_S2_insert(0, 0, 0, 0);
+  __builtin_HEXAGON_S2_extractu_rp(0, 0);
+  // CHECK: @llvm.hexagon.S2.extractup
+  __builtin_HEXAGON_S2_extractup(0, 0, 0);
+  // CHECK: @llvm.hexagon.S2.extractup.rp
+  __builtin_HEXAGON_S2_extractup_rp(0, 0);
   // CHECK: @llvm.hexagon.S2.insert
-  __builtin_HEXAGON_S2_insertp(0, 0, 0, 0);
-  // CHECK: @llvm.hexagon.S2.insertp
-  __builtin_HEXAGON_S2_insertp_rp(0, 0, 0);
-  // CHECK: @llvm.hexagon.S2.insertp.rp
-  __builtin_HEXAGON_S2_insert_rp(0, 0, 0);
+  __builtin_HEXAGON_S2_insert(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S2.insert.rp
-  __builtin_HEXAGON_S2_interleave(0);
+  __builtin_HEXAGON_S2_insert_rp(0, 0, 0);
+  // CHECK: @llvm.hexagon.S2.insertp
+  __builtin_HEXAGON_S2_insertp(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.S2.insertp.rp
+  __builtin_HEXAGON_S2_insertp_rp(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.interleave
-  __builtin_HEXAGON_S2_lfsp(0, 0);
+  __builtin_HEXAGON_S2_interleave(0);
   // CHECK: @llvm.hexagon.S2.lfsp
-  __builtin_HEXAGON_S2_lsl_r_p(0, 0);
+  __builtin_HEXAGON_S2_lfsp(0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p
-  __builtin_HEXAGON_S2_lsl_r_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p(0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p.acc
-  __builtin_HEXAGON_S2_lsl_r_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p.and
-  __builtin_HEXAGON_S2_lsl_r_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p.nac
-  __builtin_HEXAGON_S2_lsl_r_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p.or
-  __builtin_HEXAGON_S2_lsl_r_p_xor(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.p.xor
-  __builtin_HEXAGON_S2_lsl_r_r(0, 0);
+  __builtin_HEXAGON_S2_lsl_r_p_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.r
-  __builtin_HEXAGON_S2_lsl_r_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_r(0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.r.acc
-  __builtin_HEXAGON_S2_lsl_r_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.r.and
-  __builtin_HEXAGON_S2_lsl_r_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.r.nac
-  __builtin_HEXAGON_S2_lsl_r_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsl_r_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.r.or
-  __builtin_HEXAGON_S2_lsl_r_vh(0, 0);
+  __builtin_HEXAGON_S2_lsl_r_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.vh
-  __builtin_HEXAGON_S2_lsl_r_vw(0, 0);
+  __builtin_HEXAGON_S2_lsl_r_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.lsl.r.vw
-  __builtin_HEXAGON_S2_lsr_i_p(0, 0);
+  __builtin_HEXAGON_S2_lsl_r_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p
-  __builtin_HEXAGON_S2_lsr_i_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p.acc
-  __builtin_HEXAGON_S2_lsr_i_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p.and
-  __builtin_HEXAGON_S2_lsr_i_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p.nac
-  __builtin_HEXAGON_S2_lsr_i_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p.or
-  __builtin_HEXAGON_S2_lsr_i_p_xacc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.p.xacc
-  __builtin_HEXAGON_S2_lsr_i_r(0, 0);
+  __builtin_HEXAGON_S2_lsr_i_p_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r
-  __builtin_HEXAGON_S2_lsr_i_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r.acc
-  __builtin_HEXAGON_S2_lsr_i_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r.and
-  __builtin_HEXAGON_S2_lsr_i_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r.nac
-  __builtin_HEXAGON_S2_lsr_i_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r.or
-  __builtin_HEXAGON_S2_lsr_i_r_xacc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.r.xacc
-  __builtin_HEXAGON_S2_lsr_i_vh(0, 0);
+  __builtin_HEXAGON_S2_lsr_i_r_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.vh
-  __builtin_HEXAGON_S2_lsr_i_vw(0, 0);
+  __builtin_HEXAGON_S2_lsr_i_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.i.vw
-  __builtin_HEXAGON_S2_lsr_r_p(0, 0);
+  __builtin_HEXAGON_S2_lsr_i_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p
-  __builtin_HEXAGON_S2_lsr_r_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p.acc
-  __builtin_HEXAGON_S2_lsr_r_p_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p.and
-  __builtin_HEXAGON_S2_lsr_r_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p.nac
-  __builtin_HEXAGON_S2_lsr_r_p_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p.or
-  __builtin_HEXAGON_S2_lsr_r_p_xor(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.p.xor
-  __builtin_HEXAGON_S2_lsr_r_r(0, 0);
+  __builtin_HEXAGON_S2_lsr_r_p_xor(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.r
-  __builtin_HEXAGON_S2_lsr_r_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_r(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.r.acc
-  __builtin_HEXAGON_S2_lsr_r_r_and(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.r.and
-  __builtin_HEXAGON_S2_lsr_r_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.r.nac
-  __builtin_HEXAGON_S2_lsr_r_r_or(0, 0, 0);
+  __builtin_HEXAGON_S2_lsr_r_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.r.or
-  __builtin_HEXAGON_S2_lsr_r_vh(0, 0);
+  __builtin_HEXAGON_S2_lsr_r_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.vh
-  __builtin_HEXAGON_S2_lsr_r_vw(0, 0);
+  __builtin_HEXAGON_S2_lsr_r_vh(0, 0);
   // CHECK: @llvm.hexagon.S2.lsr.r.vw
-  __builtin_HEXAGON_S2_packhl(0, 0);
+  __builtin_HEXAGON_S2_lsr_r_vw(0, 0);
   // CHECK: @llvm.hexagon.S2.packhl
-  __builtin_HEXAGON_S2_parityp(0, 0);
+  __builtin_HEXAGON_S2_packhl(0, 0);
   // CHECK: @llvm.hexagon.S2.parityp
-  __builtin_HEXAGON_S2_setbit_i(0, 0);
+  __builtin_HEXAGON_S2_parityp(0, 0);
   // CHECK: @llvm.hexagon.S2.setbit.i
-  __builtin_HEXAGON_S2_setbit_r(0, 0);
+  __builtin_HEXAGON_S2_setbit_i(0, 0);
   // CHECK: @llvm.hexagon.S2.setbit.r
-  __builtin_HEXAGON_S2_shuffeb(0, 0);
+  __builtin_HEXAGON_S2_setbit_r(0, 0);
   // CHECK: @llvm.hexagon.S2.shuffeb
-  __builtin_HEXAGON_S2_shuffeh(0, 0);
+  __builtin_HEXAGON_S2_shuffeb(0, 0);
   // CHECK: @llvm.hexagon.S2.shuffeh
-  __builtin_HEXAGON_S2_shuffob(0, 0);
+  __builtin_HEXAGON_S2_shuffeh(0, 0);
   // CHECK: @llvm.hexagon.S2.shuffob
-  __builtin_HEXAGON_S2_shuffoh(0, 0);
+  __builtin_HEXAGON_S2_shuffob(0, 0);
   // CHECK: @llvm.hexagon.S2.shuffoh
-  __builtin_HEXAGON_S2_svsathb(0);
+  __builtin_HEXAGON_S2_shuffoh(0, 0);
   // CHECK: @llvm.hexagon.S2.svsathb
-  __builtin_HEXAGON_S2_svsathub(0);
+  __builtin_HEXAGON_S2_svsathb(0);
   // CHECK: @llvm.hexagon.S2.svsathub
-  __builtin_HEXAGON_S2_tableidxb_goodsyntax(0, 0, 0, 0);
+  __builtin_HEXAGON_S2_svsathub(0);
   // CHECK: @llvm.hexagon.S2.tableidxb.goodsyntax
-  __builtin_HEXAGON_S2_tableidxd_goodsyntax(0, 0, 0, 0);
+  __builtin_HEXAGON_S2_tableidxb_goodsyntax(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S2.tableidxd.goodsyntax
-  __builtin_HEXAGON_S2_tableidxh_goodsyntax(0, 0, 0, 0);
+  __builtin_HEXAGON_S2_tableidxd_goodsyntax(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S2.tableidxh.goodsyntax
-  __builtin_HEXAGON_S2_tableidxw_goodsyntax(0, 0, 0, 0);
+  __builtin_HEXAGON_S2_tableidxh_goodsyntax(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S2.tableidxw.goodsyntax
-  __builtin_HEXAGON_S2_togglebit_i(0, 0);
+  __builtin_HEXAGON_S2_tableidxw_goodsyntax(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S2.togglebit.i
-  __builtin_HEXAGON_S2_togglebit_r(0, 0);
+  __builtin_HEXAGON_S2_togglebit_i(0, 0);
   // CHECK: @llvm.hexagon.S2.togglebit.r
-  __builtin_HEXAGON_S2_tstbit_i(0, 0);
+  __builtin_HEXAGON_S2_togglebit_r(0, 0);
   // CHECK: @llvm.hexagon.S2.tstbit.i
-  __builtin_HEXAGON_S2_tstbit_r(0, 0);
+  __builtin_HEXAGON_S2_tstbit_i(0, 0);
   // CHECK: @llvm.hexagon.S2.tstbit.r
-  __builtin_HEXAGON_S2_valignib(0, 0, 0);
+  __builtin_HEXAGON_S2_tstbit_r(0, 0);
   // CHECK: @llvm.hexagon.S2.valignib
-  __builtin_HEXAGON_S2_valignrb(0, 0, 0);
+  __builtin_HEXAGON_S2_valignib(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.valignrb
-  __builtin_HEXAGON_S2_vcnegh(0, 0);
+  __builtin_HEXAGON_S2_valignrb(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.vcnegh
-  __builtin_HEXAGON_S2_vcrotate(0, 0);
+  __builtin_HEXAGON_S2_vcnegh(0, 0);
   // CHECK: @llvm.hexagon.S2.vcrotate
-  __builtin_HEXAGON_S2_vrcnegh(0, 0, 0);
+  __builtin_HEXAGON_S2_vcrotate(0, 0);
   // CHECK: @llvm.hexagon.S2.vrcnegh
-  __builtin_HEXAGON_S2_vrndpackwh(0);
+  __builtin_HEXAGON_S2_vrcnegh(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.vrndpackwh
-  __builtin_HEXAGON_S2_vrndpackwhs(0);
+  __builtin_HEXAGON_S2_vrndpackwh(0);
   // CHECK: @llvm.hexagon.S2.vrndpackwhs
-  __builtin_HEXAGON_S2_vsathb(0);
+  __builtin_HEXAGON_S2_vrndpackwhs(0);
   // CHECK: @llvm.hexagon.S2.vsathb
-  __builtin_HEXAGON_S2_vsathb_nopack(0);
+  __builtin_HEXAGON_S2_vsathb(0);
   // CHECK: @llvm.hexagon.S2.vsathb.nopack
-  __builtin_HEXAGON_S2_vsathub(0);
+  __builtin_HEXAGON_S2_vsathb_nopack(0);
   // CHECK: @llvm.hexagon.S2.vsathub
-  __builtin_HEXAGON_S2_vsathub_nopack(0);
+  __builtin_HEXAGON_S2_vsathub(0);
   // CHECK: @llvm.hexagon.S2.vsathub.nopack
-  __builtin_HEXAGON_S2_vsatwh(0);
+  __builtin_HEXAGON_S2_vsathub_nopack(0);
   // CHECK: @llvm.hexagon.S2.vsatwh
-  __builtin_HEXAGON_S2_vsatwh_nopack(0);
+  __builtin_HEXAGON_S2_vsatwh(0);
   // CHECK: @llvm.hexagon.S2.vsatwh.nopack
-  __builtin_HEXAGON_S2_vsatwuh(0);
+  __builtin_HEXAGON_S2_vsatwh_nopack(0);
   // CHECK: @llvm.hexagon.S2.vsatwuh
-  __builtin_HEXAGON_S2_vsatwuh_nopack(0);
+  __builtin_HEXAGON_S2_vsatwuh(0);
   // CHECK: @llvm.hexagon.S2.vsatwuh.nopack
-  __builtin_HEXAGON_S2_vsplatrb(0);
+  __builtin_HEXAGON_S2_vsatwuh_nopack(0);
   // CHECK: @llvm.hexagon.S2.vsplatrb
-  __builtin_HEXAGON_S2_vsplatrh(0);
+  __builtin_HEXAGON_S2_vsplatrb(0);
   // CHECK: @llvm.hexagon.S2.vsplatrh
-  __builtin_HEXAGON_S2_vspliceib(0, 0, 0);
+  __builtin_HEXAGON_S2_vsplatrh(0);
   // CHECK: @llvm.hexagon.S2.vspliceib
-  __builtin_HEXAGON_S2_vsplicerb(0, 0, 0);
+  __builtin_HEXAGON_S2_vspliceib(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.vsplicerb
-  __builtin_HEXAGON_S2_vsxtbh(0);
+  __builtin_HEXAGON_S2_vsplicerb(0, 0, 0);
   // CHECK: @llvm.hexagon.S2.vsxtbh
-  __builtin_HEXAGON_S2_vsxthw(0);
+  __builtin_HEXAGON_S2_vsxtbh(0);
   // CHECK: @llvm.hexagon.S2.vsxthw
-  __builtin_HEXAGON_S2_vtrunehb(0);
+  __builtin_HEXAGON_S2_vsxthw(0);
   // CHECK: @llvm.hexagon.S2.vtrunehb
-  __builtin_HEXAGON_S2_vtrunewh(0, 0);
+  __builtin_HEXAGON_S2_vtrunehb(0);
   // CHECK: @llvm.hexagon.S2.vtrunewh
-  __builtin_HEXAGON_S2_vtrunohb(0);
+  __builtin_HEXAGON_S2_vtrunewh(0, 0);
   // CHECK: @llvm.hexagon.S2.vtrunohb
-  __builtin_HEXAGON_S2_vtrunowh(0, 0);
+  __builtin_HEXAGON_S2_vtrunohb(0);
   // CHECK: @llvm.hexagon.S2.vtrunowh
-  __builtin_HEXAGON_S2_vzxtbh(0);
+  __builtin_HEXAGON_S2_vtrunowh(0, 0);
   // CHECK: @llvm.hexagon.S2.vzxtbh
-  __builtin_HEXAGON_S2_vzxthw(0);
+  __builtin_HEXAGON_S2_vzxtbh(0);
   // CHECK: @llvm.hexagon.S2.vzxthw
-  __builtin_HEXAGON_S4_addaddi(0, 0, 0);
+  __builtin_HEXAGON_S2_vzxthw(0);
   // CHECK: @llvm.hexagon.S4.addaddi
-  __builtin_HEXAGON_S4_addi_asl_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_addaddi(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.addi.asl.ri
-  __builtin_HEXAGON_S4_addi_lsr_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_addi_asl_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.addi.lsr.ri
-  __builtin_HEXAGON_S4_andi_asl_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_addi_lsr_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.andi.asl.ri
-  __builtin_HEXAGON_S4_andi_lsr_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_andi_asl_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.andi.lsr.ri
-  __builtin_HEXAGON_S4_clbaddi(0, 0);
+  __builtin_HEXAGON_S4_andi_lsr_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.clbaddi
-  __builtin_HEXAGON_S4_clbpaddi(0, 0);
+  __builtin_HEXAGON_S4_clbaddi(0, 0);
   // CHECK: @llvm.hexagon.S4.clbpaddi
-  __builtin_HEXAGON_S4_clbpnorm(0);
+  __builtin_HEXAGON_S4_clbpaddi(0, 0);
   // CHECK: @llvm.hexagon.S4.clbpnorm
-  __builtin_HEXAGON_S4_extract(0, 0, 0);
+  __builtin_HEXAGON_S4_clbpnorm(0);
   // CHECK: @llvm.hexagon.S4.extract
-  __builtin_HEXAGON_S4_extractp(0, 0, 0);
-  // CHECK: @llvm.hexagon.S4.extractp
-  __builtin_HEXAGON_S4_extractp_rp(0, 0);
-  // CHECK: @llvm.hexagon.S4.extractp.rp
-  __builtin_HEXAGON_S4_extract_rp(0, 0);
+  __builtin_HEXAGON_S4_extract(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.extract.rp
-  __builtin_HEXAGON_S4_lsli(0, 0);
+  __builtin_HEXAGON_S4_extract_rp(0, 0);
+  // CHECK: @llvm.hexagon.S4.extractp
+  __builtin_HEXAGON_S4_extractp(0, 0, 0);
+  // CHECK: @llvm.hexagon.S4.extractp.rp
+  __builtin_HEXAGON_S4_extractp_rp(0, 0);
   // CHECK: @llvm.hexagon.S4.lsli
-  __builtin_HEXAGON_S4_ntstbit_i(0, 0);
+  __builtin_HEXAGON_S4_lsli(0, 0);
   // CHECK: @llvm.hexagon.S4.ntstbit.i
-  __builtin_HEXAGON_S4_ntstbit_r(0, 0);
+  __builtin_HEXAGON_S4_ntstbit_i(0, 0);
   // CHECK: @llvm.hexagon.S4.ntstbit.r
-  __builtin_HEXAGON_S4_or_andi(0, 0, 0);
+  __builtin_HEXAGON_S4_ntstbit_r(0, 0);
   // CHECK: @llvm.hexagon.S4.or.andi
-  __builtin_HEXAGON_S4_or_andix(0, 0, 0);
+  __builtin_HEXAGON_S4_or_andi(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.or.andix
-  __builtin_HEXAGON_S4_ori_asl_ri(0, 0, 0);
-  // CHECK: @llvm.hexagon.S4.ori.asl.ri
-  __builtin_HEXAGON_S4_ori_lsr_ri(0, 0, 0);
-  // CHECK: @llvm.hexagon.S4.ori.lsr.ri
-  __builtin_HEXAGON_S4_or_ori(0, 0, 0);
+  __builtin_HEXAGON_S4_or_andix(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.or.ori
-  __builtin_HEXAGON_S4_parity(0, 0);
+  __builtin_HEXAGON_S4_or_ori(0, 0, 0);
+  // CHECK: @llvm.hexagon.S4.ori.asl.ri
+  __builtin_HEXAGON_S4_ori_asl_ri(0, 0, 0);
+  // CHECK: @llvm.hexagon.S4.ori.lsr.ri
+  __builtin_HEXAGON_S4_ori_lsr_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.parity
-  __builtin_HEXAGON_S4_subaddi(0, 0, 0);
+  __builtin_HEXAGON_S4_parity(0, 0);
   // CHECK: @llvm.hexagon.S4.subaddi
-  __builtin_HEXAGON_S4_subi_asl_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_subaddi(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.subi.asl.ri
-  __builtin_HEXAGON_S4_subi_lsr_ri(0, 0, 0);
+  __builtin_HEXAGON_S4_subi_asl_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.subi.lsr.ri
-  __builtin_HEXAGON_S4_vrcrotate(0, 0, 0);
+  __builtin_HEXAGON_S4_subi_lsr_ri(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.vrcrotate
-  __builtin_HEXAGON_S4_vrcrotate_acc(0, 0, 0, 0);
+  __builtin_HEXAGON_S4_vrcrotate(0, 0, 0);
   // CHECK: @llvm.hexagon.S4.vrcrotate.acc
-  __builtin_HEXAGON_S4_vxaddsubh(0, 0);
+  __builtin_HEXAGON_S4_vrcrotate_acc(0, 0, 0, 0);
   // CHECK: @llvm.hexagon.S4.vxaddsubh
-  __builtin_HEXAGON_S4_vxaddsubhr(0, 0);
+  __builtin_HEXAGON_S4_vxaddsubh(0, 0);
   // CHECK: @llvm.hexagon.S4.vxaddsubhr
-  __builtin_HEXAGON_S4_vxaddsubw(0, 0);
+  __builtin_HEXAGON_S4_vxaddsubhr(0, 0);
   // CHECK: @llvm.hexagon.S4.vxaddsubw
-  __builtin_HEXAGON_S4_vxsubaddh(0, 0);
+  __builtin_HEXAGON_S4_vxaddsubw(0, 0);
   // CHECK: @llvm.hexagon.S4.vxsubaddh
-  __builtin_HEXAGON_S4_vxsubaddhr(0, 0);
+  __builtin_HEXAGON_S4_vxsubaddh(0, 0);
   // CHECK: @llvm.hexagon.S4.vxsubaddhr
-  __builtin_HEXAGON_S4_vxsubaddw(0, 0);
+  __builtin_HEXAGON_S4_vxsubaddhr(0, 0);
   // CHECK: @llvm.hexagon.S4.vxsubaddw
-  __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax(0, 0);
+  __builtin_HEXAGON_S4_vxsubaddw(0, 0);
   // CHECK: @llvm.hexagon.S5.asrhub.rnd.sat.goodsyntax
-  __builtin_HEXAGON_S5_asrhub_sat(0, 0);
+  __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax(0, 0);
   // CHECK: @llvm.hexagon.S5.asrhub.sat
-  __builtin_HEXAGON_S5_popcountp(0);
+  __builtin_HEXAGON_S5_asrhub_sat(0, 0);
   // CHECK: @llvm.hexagon.S5.popcountp
-  __builtin_HEXAGON_S5_vasrhrnd_goodsyntax(0, 0);
+  __builtin_HEXAGON_S5_popcountp(0);
   // CHECK: @llvm.hexagon.S5.vasrhrnd.goodsyntax
-  __builtin_HEXAGON_S6_rol_i_p(0, 0);
+  __builtin_HEXAGON_S5_vasrhrnd_goodsyntax(0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p
-  __builtin_HEXAGON_S6_rol_i_p_acc(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_p(0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p.acc
-  __builtin_HEXAGON_S6_rol_i_p_and(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_p_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p.and
-  __builtin_HEXAGON_S6_rol_i_p_nac(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_p_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p.nac
-  __builtin_HEXAGON_S6_rol_i_p_or(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_p_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p.or
-  __builtin_HEXAGON_S6_rol_i_p_xacc(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_p_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.p.xacc
-  __builtin_HEXAGON_S6_rol_i_r(0, 0);
+  __builtin_HEXAGON_S6_rol_i_p_xacc(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r
-  __builtin_HEXAGON_S6_rol_i_r_acc(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_r(0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r.acc
-  __builtin_HEXAGON_S6_rol_i_r_and(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_r_acc(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r.and
-  __builtin_HEXAGON_S6_rol_i_r_nac(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_r_and(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r.nac
-  __builtin_HEXAGON_S6_rol_i_r_or(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_r_nac(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r.or
-  __builtin_HEXAGON_S6_rol_i_r_xacc(0, 0, 0);
+  __builtin_HEXAGON_S6_rol_i_r_or(0, 0, 0);
   // CHECK: @llvm.hexagon.S6.rol.i.r.xacc
-  __builtin_HEXAGON_V6_extractw_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.extractw.128B
-  __builtin_HEXAGON_V6_extractw(v16, 0);
+  __builtin_HEXAGON_S6_rol_i_r_xacc(0, 0, 0);
+  // CHECK: @llvm.hexagon.S6.vsplatrbp
+  __builtin_HEXAGON_S6_vsplatrbp(0);
+  // CHECK: @llvm.hexagon.S6.vtrunehb.ppp
+  __builtin_HEXAGON_S6_vtrunehb_ppp(0, 0);
+  // CHECK: @llvm.hexagon.S6.vtrunohb.ppp
+  __builtin_HEXAGON_S6_vtrunohb_ppp(0, 0);
   // CHECK: @llvm.hexagon.V6.extractw
-  __builtin_HEXAGON_V6_hi_128B(v64);
-  // CHECK: @llvm.hexagon.V6.hi.128B
-  __builtin_HEXAGON_V6_hi(v32);
+  __builtin_HEXAGON_V6_extractw(v64, 0);
+  // CHECK: @llvm.hexagon.V6.extractw.128B
+  __builtin_HEXAGON_V6_extractw_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.hi
-  __builtin_HEXAGON_V6_lo_128B(v64);
-  // CHECK: @llvm.hexagon.V6.lo.128B
-  __builtin_HEXAGON_V6_lo(v32);
+  __builtin_HEXAGON_V6_hi(v128);
+  // CHECK: @llvm.hexagon.V6.hi.128B
+  __builtin_HEXAGON_V6_hi_128B(v256);
   // CHECK: @llvm.hexagon.V6.lo
-  __builtin_HEXAGON_V6_lvsplatw(0);
+  __builtin_HEXAGON_V6_lo(v128);
+  // CHECK: @llvm.hexagon.V6.lo.128B
+  __builtin_HEXAGON_V6_lo_128B(v256);
+  // CHECK: @llvm.hexagon.V6.lvsplatb
+  __builtin_HEXAGON_V6_lvsplatb(0);
+  // CHECK: @llvm.hexagon.V6.lvsplatb.128B
+  __builtin_HEXAGON_V6_lvsplatb_128B(0);
+  // CHECK: @llvm.hexagon.V6.lvsplath
+  __builtin_HEXAGON_V6_lvsplath(0);
+  // CHECK: @llvm.hexagon.V6.lvsplath.128B
+  __builtin_HEXAGON_V6_lvsplath_128B(0);
   // CHECK: @llvm.hexagon.V6.lvsplatw
-  __builtin_HEXAGON_V6_lvsplatw_128B(0);
+  __builtin_HEXAGON_V6_lvsplatw(0);
   // CHECK: @llvm.hexagon.V6.lvsplatw.128B
-  __builtin_HEXAGON_V6_pred_and_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.pred.and.128B
-  __builtin_HEXAGON_V6_pred_and_n_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.pred.and.n.128B
-  __builtin_HEXAGON_V6_pred_and_n(v16, v16);
-  // CHECK: @llvm.hexagon.V6.pred.and.n
-  __builtin_HEXAGON_V6_pred_and(v16, v16);
+  __builtin_HEXAGON_V6_lvsplatw_128B(0);
   // CHECK: @llvm.hexagon.V6.pred.and
-  __builtin_HEXAGON_V6_pred_not_128B(v32);
-  // CHECK: @llvm.hexagon.V6.pred.not.128B
-  __builtin_HEXAGON_V6_pred_not(v16);
+  __builtin_HEXAGON_V6_pred_and(v64, v64);
+  // CHECK: @llvm.hexagon.V6.pred.and.128B
+  __builtin_HEXAGON_V6_pred_and_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.pred.and.n
+  __builtin_HEXAGON_V6_pred_and_n(v64, v64);
+  // CHECK: @llvm.hexagon.V6.pred.and.n.128B
+  __builtin_HEXAGON_V6_pred_and_n_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.pred.not
-  __builtin_HEXAGON_V6_pred_or_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.pred.or.128B
-  __builtin_HEXAGON_V6_pred_or_n_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.pred.or.n.128B
-  __builtin_HEXAGON_V6_pred_or_n(v16, v16);
-  // CHECK: @llvm.hexagon.V6.pred.or.n
-  __builtin_HEXAGON_V6_pred_or(v16, v16);
+  __builtin_HEXAGON_V6_pred_not(v64);
+  // CHECK: @llvm.hexagon.V6.pred.not.128B
+  __builtin_HEXAGON_V6_pred_not_128B(v128);
   // CHECK: @llvm.hexagon.V6.pred.or
-  __builtin_HEXAGON_V6_pred_scalar2(0);
+  __builtin_HEXAGON_V6_pred_or(v64, v64);
+  // CHECK: @llvm.hexagon.V6.pred.or.128B
+  __builtin_HEXAGON_V6_pred_or_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.pred.or.n
+  __builtin_HEXAGON_V6_pred_or_n(v64, v64);
+  // CHECK: @llvm.hexagon.V6.pred.or.n.128B
+  __builtin_HEXAGON_V6_pred_or_n_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.pred.scalar2
-  __builtin_HEXAGON_V6_pred_scalar2_128B(0);
+  __builtin_HEXAGON_V6_pred_scalar2(0);
   // CHECK: @llvm.hexagon.V6.pred.scalar2.128B
-  __builtin_HEXAGON_V6_pred_xor_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.pred.xor.128B
-  __builtin_HEXAGON_V6_pred_xor(v16, v16);
+  __builtin_HEXAGON_V6_pred_scalar2_128B(0);
+  // CHECK: @llvm.hexagon.V6.pred.scalar2v2
+  __builtin_HEXAGON_V6_pred_scalar2v2(0);
+  // CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B
+  __builtin_HEXAGON_V6_pred_scalar2v2_128B(0);
   // CHECK: @llvm.hexagon.V6.pred.xor
-  __builtin_HEXAGON_V6_vabsdiffh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vabsdiffh.128B
-  __builtin_HEXAGON_V6_vabsdiffh(v16, v16);
+  __builtin_HEXAGON_V6_pred_xor(v64, v64);
+  // CHECK: @llvm.hexagon.V6.pred.xor.128B
+  __builtin_HEXAGON_V6_pred_xor_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.shuffeqh
+  __builtin_HEXAGON_V6_shuffeqh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.shuffeqh.128B
+  __builtin_HEXAGON_V6_shuffeqh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.shuffeqw
+  __builtin_HEXAGON_V6_shuffeqw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.shuffeqw.128B
+  __builtin_HEXAGON_V6_shuffeqw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai
+  __builtin_HEXAGON_V6_vS32b_nqpred_ai(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B
+  __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai
+  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B
+  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai
+  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B
+  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai
+  __builtin_HEXAGON_V6_vS32b_qpred_ai(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B
+  __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vabsb
+  __builtin_HEXAGON_V6_vabsb(v64);
+  // CHECK: @llvm.hexagon.V6.vabsb.128B
+  __builtin_HEXAGON_V6_vabsb_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vabsb.sat
+  __builtin_HEXAGON_V6_vabsb_sat(v64);
+  // CHECK: @llvm.hexagon.V6.vabsb.sat.128B
+  __builtin_HEXAGON_V6_vabsb_sat_128B(v128);
   // CHECK: @llvm.hexagon.V6.vabsdiffh
-  __builtin_HEXAGON_V6_vabsdiffub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vabsdiffub.128B
-  __builtin_HEXAGON_V6_vabsdiffub(v16, v16);
+  __builtin_HEXAGON_V6_vabsdiffh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vabsdiffh.128B
+  __builtin_HEXAGON_V6_vabsdiffh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vabsdiffub
-  __builtin_HEXAGON_V6_vabsdiffuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vabsdiffuh.128B
-  __builtin_HEXAGON_V6_vabsdiffuh(v16, v16);
+  __builtin_HEXAGON_V6_vabsdiffub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vabsdiffub.128B
+  __builtin_HEXAGON_V6_vabsdiffub_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vabsdiffuh
-  __builtin_HEXAGON_V6_vabsdiffw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vabsdiffw.128B
-  __builtin_HEXAGON_V6_vabsdiffw(v16, v16);
+  __builtin_HEXAGON_V6_vabsdiffuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vabsdiffuh.128B
+  __builtin_HEXAGON_V6_vabsdiffuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vabsdiffw
-  __builtin_HEXAGON_V6_vabsh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vabsh.128B
-  __builtin_HEXAGON_V6_vabsh_sat_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vabsh.sat.128B
-  __builtin_HEXAGON_V6_vabsh_sat(v16);
-  // CHECK: @llvm.hexagon.V6.vabsh.sat
-  __builtin_HEXAGON_V6_vabsh(v16);
+  __builtin_HEXAGON_V6_vabsdiffw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vabsdiffw.128B
+  __builtin_HEXAGON_V6_vabsdiffw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vabsh
-  __builtin_HEXAGON_V6_vabsw_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vabsw.128B
-  __builtin_HEXAGON_V6_vabsw_sat_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vabsw.sat.128B
-  __builtin_HEXAGON_V6_vabsw_sat(v16);
-  // CHECK: @llvm.hexagon.V6.vabsw.sat
-  __builtin_HEXAGON_V6_vabsw(v16);
+  __builtin_HEXAGON_V6_vabsh(v64);
+  // CHECK: @llvm.hexagon.V6.vabsh.128B
+  __builtin_HEXAGON_V6_vabsh_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vabsh.sat
+  __builtin_HEXAGON_V6_vabsh_sat(v64);
+  // CHECK: @llvm.hexagon.V6.vabsh.sat.128B
+  __builtin_HEXAGON_V6_vabsh_sat_128B(v128);
   // CHECK: @llvm.hexagon.V6.vabsw
-  __builtin_HEXAGON_V6_vaddb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddb.128B
-  __builtin_HEXAGON_V6_vaddb_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddb.dv.128B
-  __builtin_HEXAGON_V6_vaddb_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddb.dv
-  __builtin_HEXAGON_V6_vaddbnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddbnq.128B
-  __builtin_HEXAGON_V6_vaddbnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddbnq
-  __builtin_HEXAGON_V6_vaddbq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddbq.128B
-  __builtin_HEXAGON_V6_vaddbq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddbq
-  __builtin_HEXAGON_V6_vaddb(v16, v16);
+  __builtin_HEXAGON_V6_vabsw(v64);
+  // CHECK: @llvm.hexagon.V6.vabsw.128B
+  __builtin_HEXAGON_V6_vabsw_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vabsw.sat
+  __builtin_HEXAGON_V6_vabsw_sat(v64);
+  // CHECK: @llvm.hexagon.V6.vabsw.sat.128B
+  __builtin_HEXAGON_V6_vabsw_sat_128B(v128);
   // CHECK: @llvm.hexagon.V6.vaddb
-  __builtin_HEXAGON_V6_vaddh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddh.128B
-  __builtin_HEXAGON_V6_vaddh_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddh.dv.128B
-  __builtin_HEXAGON_V6_vaddh_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddh.dv
-  __builtin_HEXAGON_V6_vaddhnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddhnq.128B
-  __builtin_HEXAGON_V6_vaddhnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddhnq
-  __builtin_HEXAGON_V6_vaddhq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddhq.128B
-  __builtin_HEXAGON_V6_vaddhq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddhq
-  __builtin_HEXAGON_V6_vaddhsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddhsat.128B
-  __builtin_HEXAGON_V6_vaddhsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B
-  __builtin_HEXAGON_V6_vaddhsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddhsat.dv
-  __builtin_HEXAGON_V6_vaddhsat(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddhsat
-  __builtin_HEXAGON_V6_vaddh(v16, v16);
+  __builtin_HEXAGON_V6_vaddb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddb.128B
+  __builtin_HEXAGON_V6_vaddb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddb.dv
+  __builtin_HEXAGON_V6_vaddb_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddb.dv.128B
+  __builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vaddbnq
+  __builtin_HEXAGON_V6_vaddbnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddbnq.128B
+  __builtin_HEXAGON_V6_vaddbnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddbq
+  __builtin_HEXAGON_V6_vaddbq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddbq.128B
+  __builtin_HEXAGON_V6_vaddbq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddbsat
+  __builtin_HEXAGON_V6_vaddbsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddbsat.128B
+  __builtin_HEXAGON_V6_vaddbsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddbsat.dv
+  __builtin_HEXAGON_V6_vaddbsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B
+  __builtin_HEXAGON_V6_vaddbsat_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vaddcarry
+  __builtin_HEXAGON_V6_vaddcarry(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vaddcarry.128B
+  __builtin_HEXAGON_V6_vaddcarry_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vaddclbh
+  __builtin_HEXAGON_V6_vaddclbh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddclbh.128B
+  __builtin_HEXAGON_V6_vaddclbh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddclbw
+  __builtin_HEXAGON_V6_vaddclbw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddclbw.128B
+  __builtin_HEXAGON_V6_vaddclbw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddh
-  __builtin_HEXAGON_V6_vaddhw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddhw.128B
-  __builtin_HEXAGON_V6_vaddhw(v16, v16);
+  __builtin_HEXAGON_V6_vaddh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddh.128B
+  __builtin_HEXAGON_V6_vaddh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddh.dv
+  __builtin_HEXAGON_V6_vaddh_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddh.dv.128B
+  __builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vaddhnq
+  __builtin_HEXAGON_V6_vaddhnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddhnq.128B
+  __builtin_HEXAGON_V6_vaddhnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddhq
+  __builtin_HEXAGON_V6_vaddhq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddhq.128B
+  __builtin_HEXAGON_V6_vaddhq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddhsat
+  __builtin_HEXAGON_V6_vaddhsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddhsat.128B
+  __builtin_HEXAGON_V6_vaddhsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddhsat.dv
+  __builtin_HEXAGON_V6_vaddhsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B
+  __builtin_HEXAGON_V6_vaddhsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vaddhw
-  __builtin_HEXAGON_V6_vaddubh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddubh.128B
-  __builtin_HEXAGON_V6_vaddubh(v16, v16);
+  __builtin_HEXAGON_V6_vaddhw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddhw.128B
+  __builtin_HEXAGON_V6_vaddhw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddhw.acc
+  __builtin_HEXAGON_V6_vaddhw_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddhw.acc.128B
+  __builtin_HEXAGON_V6_vaddhw_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddubh
-  __builtin_HEXAGON_V6_vaddubsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddubsat.128B
-  __builtin_HEXAGON_V6_vaddubsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddubsat.dv.128B
-  __builtin_HEXAGON_V6_vaddubsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddubsat.dv
-  __builtin_HEXAGON_V6_vaddubsat(v16, v16);
+  __builtin_HEXAGON_V6_vaddubh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddubh.128B
+  __builtin_HEXAGON_V6_vaddubh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddubh.acc
+  __builtin_HEXAGON_V6_vaddubh_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddubh.acc.128B
+  __builtin_HEXAGON_V6_vaddubh_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddubsat
-  __builtin_HEXAGON_V6_vadduhsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vadduhsat.128B
-  __builtin_HEXAGON_V6_vadduhsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vadduhsat.dv.128B
-  __builtin_HEXAGON_V6_vadduhsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vadduhsat.dv
-  __builtin_HEXAGON_V6_vadduhsat(v16, v16);
+  __builtin_HEXAGON_V6_vaddubsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddubsat.128B
+  __builtin_HEXAGON_V6_vaddubsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddubsat.dv
+  __builtin_HEXAGON_V6_vaddubsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddubsat.dv.128B
+  __builtin_HEXAGON_V6_vaddubsat_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vaddububb.sat
+  __builtin_HEXAGON_V6_vaddububb_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddububb.sat.128B
+  __builtin_HEXAGON_V6_vaddububb_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vadduhsat
-  __builtin_HEXAGON_V6_vadduhw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vadduhw.128B
-  __builtin_HEXAGON_V6_vadduhw(v16, v16);
+  __builtin_HEXAGON_V6_vadduhsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vadduhsat.128B
+  __builtin_HEXAGON_V6_vadduhsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduhsat.dv
+  __builtin_HEXAGON_V6_vadduhsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduhsat.dv.128B
+  __builtin_HEXAGON_V6_vadduhsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vadduhw
-  __builtin_HEXAGON_V6_vaddw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddw.128B
-  __builtin_HEXAGON_V6_vaddw_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddw.dv.128B
-  __builtin_HEXAGON_V6_vaddw_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddw.dv
-  __builtin_HEXAGON_V6_vaddwnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddwnq.128B
-  __builtin_HEXAGON_V6_vaddwnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddwnq
-  __builtin_HEXAGON_V6_vaddwq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddwq.128B
-  __builtin_HEXAGON_V6_vaddwq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddwq
-  __builtin_HEXAGON_V6_vaddwsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddwsat.128B
-  __builtin_HEXAGON_V6_vaddwsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B
-  __builtin_HEXAGON_V6_vaddwsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaddwsat.dv
-  __builtin_HEXAGON_V6_vaddwsat(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vaddwsat
-  __builtin_HEXAGON_V6_vaddw(v16, v16);
+  __builtin_HEXAGON_V6_vadduhw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vadduhw.128B
+  __builtin_HEXAGON_V6_vadduhw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduhw.acc
+  __builtin_HEXAGON_V6_vadduhw_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vadduhw.acc.128B
+  __builtin_HEXAGON_V6_vadduhw_acc_128B(v256, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduwsat
+  __builtin_HEXAGON_V6_vadduwsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vadduwsat.128B
+  __builtin_HEXAGON_V6_vadduwsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduwsat.dv
+  __builtin_HEXAGON_V6_vadduwsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vadduwsat.dv.128B
+  __builtin_HEXAGON_V6_vadduwsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vaddw
-  __builtin_HEXAGON_V6_valignb_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.valignb.128B
-  __builtin_HEXAGON_V6_valignbi_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.valignbi.128B
-  __builtin_HEXAGON_V6_valignbi(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.valignbi
-  __builtin_HEXAGON_V6_valignb(v16, v16, 0);
+  __builtin_HEXAGON_V6_vaddw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddw.128B
+  __builtin_HEXAGON_V6_vaddw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddw.dv
+  __builtin_HEXAGON_V6_vaddw_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddw.dv.128B
+  __builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vaddwnq
+  __builtin_HEXAGON_V6_vaddwnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddwnq.128B
+  __builtin_HEXAGON_V6_vaddwnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddwq
+  __builtin_HEXAGON_V6_vaddwq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddwq.128B
+  __builtin_HEXAGON_V6_vaddwq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddwsat
+  __builtin_HEXAGON_V6_vaddwsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaddwsat.128B
+  __builtin_HEXAGON_V6_vaddwsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddwsat.dv
+  __builtin_HEXAGON_V6_vaddwsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B
+  __builtin_HEXAGON_V6_vaddwsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.valignb
-  __builtin_HEXAGON_V6_vand_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vand.128B
-  __builtin_HEXAGON_V6_vandqrt_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vandqrt.128B
-  __builtin_HEXAGON_V6_vandqrt_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vandqrt.acc.128B
-  __builtin_HEXAGON_V6_vandqrt_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vandqrt.acc
-  __builtin_HEXAGON_V6_vandqrt(v16, 0);
-  // CHECK: @llvm.hexagon.V6.vandqrt
-  __builtin_HEXAGON_V6_vand(v16, v16);
+  __builtin_HEXAGON_V6_valignb(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.valignb.128B
+  __builtin_HEXAGON_V6_valignb_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.valignbi
+  __builtin_HEXAGON_V6_valignbi(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.valignbi.128B
+  __builtin_HEXAGON_V6_valignbi_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vand
-  __builtin_HEXAGON_V6_vandvrt_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vandvrt.128B
-  __builtin_HEXAGON_V6_vandvrt_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vandvrt.acc.128B
-  __builtin_HEXAGON_V6_vandvrt_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vandvrt.acc
-  __builtin_HEXAGON_V6_vandvrt(v16, 0);
+  __builtin_HEXAGON_V6_vand(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vand.128B
+  __builtin_HEXAGON_V6_vand_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vandnqrt
+  __builtin_HEXAGON_V6_vandnqrt(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandnqrt.128B
+  __builtin_HEXAGON_V6_vandnqrt_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vandnqrt.acc
+  __builtin_HEXAGON_V6_vandnqrt_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B
+  __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vandqrt
+  __builtin_HEXAGON_V6_vandqrt(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandqrt.128B
+  __builtin_HEXAGON_V6_vandqrt_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vandqrt.acc
+  __builtin_HEXAGON_V6_vandqrt_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandqrt.acc.128B
+  __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vandvnqv
+  __builtin_HEXAGON_V6_vandvnqv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vandvnqv.128B
+  __builtin_HEXAGON_V6_vandvnqv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vandvqv
+  __builtin_HEXAGON_V6_vandvqv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vandvqv.128B
+  __builtin_HEXAGON_V6_vandvqv_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vandvrt
-  __builtin_HEXAGON_V6_vaslh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vaslh.128B
-  __builtin_HEXAGON_V6_vaslhv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaslhv.128B
-  __builtin_HEXAGON_V6_vaslh(v16, 0);
+  __builtin_HEXAGON_V6_vandvrt(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandvrt.128B
+  __builtin_HEXAGON_V6_vandvrt_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vandvrt.acc
+  __builtin_HEXAGON_V6_vandvrt_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vandvrt.acc.128B
+  __builtin_HEXAGON_V6_vandvrt_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vaslh
-  __builtin_HEXAGON_V6_vaslhv(v16, v16);
+  __builtin_HEXAGON_V6_vaslh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vaslh.128B
+  __builtin_HEXAGON_V6_vaslh_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vaslh.acc
+  __builtin_HEXAGON_V6_vaslh_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vaslh.acc.128B
+  __builtin_HEXAGON_V6_vaslh_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vaslhv
-  __builtin_HEXAGON_V6_vaslw_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vaslw.128B
-  __builtin_HEXAGON_V6_vaslw_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vaslw.acc.128B
-  __builtin_HEXAGON_V6_vaslw_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vaslw.acc
-  __builtin_HEXAGON_V6_vaslwv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vaslwv.128B
-  __builtin_HEXAGON_V6_vaslw(v16, 0);
+  __builtin_HEXAGON_V6_vaslhv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaslhv.128B
+  __builtin_HEXAGON_V6_vaslhv_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaslw
-  __builtin_HEXAGON_V6_vaslwv(v16, v16);
+  __builtin_HEXAGON_V6_vaslw(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vaslw.128B
+  __builtin_HEXAGON_V6_vaslw_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vaslw.acc
+  __builtin_HEXAGON_V6_vaslw_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vaslw.acc.128B
+  __builtin_HEXAGON_V6_vaslw_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vaslwv
-  __builtin_HEXAGON_V6_vasrh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrh.128B
-  __builtin_HEXAGON_V6_vasrhbrndsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhbrndsat.128B
-  __builtin_HEXAGON_V6_vasrhbrndsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhbrndsat
-  __builtin_HEXAGON_V6_vasrhubrndsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhubrndsat.128B
-  __builtin_HEXAGON_V6_vasrhubrndsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhubrndsat
-  __builtin_HEXAGON_V6_vasrhubsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhubsat.128B
-  __builtin_HEXAGON_V6_vasrhubsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrhubsat
-  __builtin_HEXAGON_V6_vasrhv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vasrhv.128B
-  __builtin_HEXAGON_V6_vasrh(v16, 0);
+  __builtin_HEXAGON_V6_vaslwv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vaslwv.128B
+  __builtin_HEXAGON_V6_vaslwv_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vasrh
-  __builtin_HEXAGON_V6_vasrhv(v16, v16);
+  __builtin_HEXAGON_V6_vasrh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrh.128B
+  __builtin_HEXAGON_V6_vasrh_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrh.acc
+  __builtin_HEXAGON_V6_vasrh_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrh.acc.128B
+  __builtin_HEXAGON_V6_vasrh_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhbrndsat
+  __builtin_HEXAGON_V6_vasrhbrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhbrndsat.128B
+  __builtin_HEXAGON_V6_vasrhbrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhbsat
+  __builtin_HEXAGON_V6_vasrhbsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhbsat.128B
+  __builtin_HEXAGON_V6_vasrhbsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhubrndsat
+  __builtin_HEXAGON_V6_vasrhubrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhubrndsat.128B
+  __builtin_HEXAGON_V6_vasrhubrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhubsat
+  __builtin_HEXAGON_V6_vasrhubsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrhubsat.128B
+  __builtin_HEXAGON_V6_vasrhubsat_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vasrhv
-  __builtin_HEXAGON_V6_vasrw_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrw.128B
-  __builtin_HEXAGON_V6_vasrw_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrw.acc.128B
-  __builtin_HEXAGON_V6_vasrw_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrw.acc
-  __builtin_HEXAGON_V6_vasrwh_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwh.128B
-  __builtin_HEXAGON_V6_vasrwhrndsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwhrndsat.128B
-  __builtin_HEXAGON_V6_vasrwhrndsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwhrndsat
-  __builtin_HEXAGON_V6_vasrwhsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwhsat.128B
-  __builtin_HEXAGON_V6_vasrwhsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwhsat
-  __builtin_HEXAGON_V6_vasrwh(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwh
-  __builtin_HEXAGON_V6_vasrwuhsat_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwuhsat.128B
-  __builtin_HEXAGON_V6_vasrwuhsat(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vasrwuhsat
-  __builtin_HEXAGON_V6_vasrwv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vasrwv.128B
-  __builtin_HEXAGON_V6_vasrw(v16, 0);
+  __builtin_HEXAGON_V6_vasrhv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vasrhv.128B
+  __builtin_HEXAGON_V6_vasrhv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vasruhubrndsat
+  __builtin_HEXAGON_V6_vasruhubrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasruhubrndsat.128B
+  __builtin_HEXAGON_V6_vasruhubrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasruhubsat
+  __builtin_HEXAGON_V6_vasruhubsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasruhubsat.128B
+  __builtin_HEXAGON_V6_vasruhubsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasruwuhrndsat
+  __builtin_HEXAGON_V6_vasruwuhrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasruwuhrndsat.128B
+  __builtin_HEXAGON_V6_vasruwuhrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasruwuhsat
+  __builtin_HEXAGON_V6_vasruwuhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasruwuhsat.128B
+  __builtin_HEXAGON_V6_vasruwuhsat_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vasrw
-  __builtin_HEXAGON_V6_vasrwv(v16, v16);
+  __builtin_HEXAGON_V6_vasrw(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrw.128B
+  __builtin_HEXAGON_V6_vasrw_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrw.acc
+  __builtin_HEXAGON_V6_vasrw_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrw.acc.128B
+  __builtin_HEXAGON_V6_vasrw_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwh
+  __builtin_HEXAGON_V6_vasrwh(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwh.128B
+  __builtin_HEXAGON_V6_vasrwh_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwhrndsat
+  __builtin_HEXAGON_V6_vasrwhrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwhrndsat.128B
+  __builtin_HEXAGON_V6_vasrwhrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwhsat
+  __builtin_HEXAGON_V6_vasrwhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwhsat.128B
+  __builtin_HEXAGON_V6_vasrwhsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwuhrndsat
+  __builtin_HEXAGON_V6_vasrwuhrndsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwuhrndsat.128B
+  __builtin_HEXAGON_V6_vasrwuhrndsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwuhsat
+  __builtin_HEXAGON_V6_vasrwuhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vasrwuhsat.128B
+  __builtin_HEXAGON_V6_vasrwuhsat_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vasrwv
-  __builtin_HEXAGON_V6_vassign_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vassign.128B
-  __builtin_HEXAGON_V6_vassignp_128B(v64);
-  // CHECK: @llvm.hexagon.V6.vassignp.128B
-  __builtin_HEXAGON_V6_vassignp(v32);
-  // CHECK: @llvm.hexagon.V6.vassignp
-  __builtin_HEXAGON_V6_vassign(v16);
+  __builtin_HEXAGON_V6_vasrwv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vasrwv.128B
+  __builtin_HEXAGON_V6_vasrwv_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vassign
-  __builtin_HEXAGON_V6_vavgh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavgh.128B
-  __builtin_HEXAGON_V6_vavghrnd_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavghrnd.128B
-  __builtin_HEXAGON_V6_vavghrnd(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vavghrnd
-  __builtin_HEXAGON_V6_vavgh(v16, v16);
+  __builtin_HEXAGON_V6_vassign(v64);
+  // CHECK: @llvm.hexagon.V6.vassign.128B
+  __builtin_HEXAGON_V6_vassign_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vassignp
+  __builtin_HEXAGON_V6_vassignp(v128);
+  // CHECK: @llvm.hexagon.V6.vassignp.128B
+  __builtin_HEXAGON_V6_vassignp_128B(v256);
+  // CHECK: @llvm.hexagon.V6.vavgb
+  __builtin_HEXAGON_V6_vavgb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgb.128B
+  __builtin_HEXAGON_V6_vavgb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavgbrnd
+  __builtin_HEXAGON_V6_vavgbrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgbrnd.128B
+  __builtin_HEXAGON_V6_vavgbrnd_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vavgh
-  __builtin_HEXAGON_V6_vavgub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavgub.128B
-  __builtin_HEXAGON_V6_vavgubrnd_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavgubrnd.128B
-  __builtin_HEXAGON_V6_vavgubrnd(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vavgubrnd
-  __builtin_HEXAGON_V6_vavgub(v16, v16);
+  __builtin_HEXAGON_V6_vavgh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgh.128B
+  __builtin_HEXAGON_V6_vavgh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavghrnd
+  __builtin_HEXAGON_V6_vavghrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavghrnd.128B
+  __builtin_HEXAGON_V6_vavghrnd_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vavgub
-  __builtin_HEXAGON_V6_vavguh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavguh.128B
-  __builtin_HEXAGON_V6_vavguhrnd_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavguhrnd.128B
-  __builtin_HEXAGON_V6_vavguhrnd(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vavguhrnd
-  __builtin_HEXAGON_V6_vavguh(v16, v16);
+  __builtin_HEXAGON_V6_vavgub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgub.128B
+  __builtin_HEXAGON_V6_vavgub_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavgubrnd
+  __builtin_HEXAGON_V6_vavgubrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgubrnd.128B
+  __builtin_HEXAGON_V6_vavgubrnd_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vavguh
-  __builtin_HEXAGON_V6_vavgw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavgw.128B
-  __builtin_HEXAGON_V6_vavgwrnd_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vavgwrnd.128B
-  __builtin_HEXAGON_V6_vavgwrnd(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vavgwrnd
-  __builtin_HEXAGON_V6_vavgw(v16, v16);
+  __builtin_HEXAGON_V6_vavguh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavguh.128B
+  __builtin_HEXAGON_V6_vavguh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavguhrnd
+  __builtin_HEXAGON_V6_vavguhrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavguhrnd.128B
+  __builtin_HEXAGON_V6_vavguhrnd_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavguw
+  __builtin_HEXAGON_V6_vavguw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavguw.128B
+  __builtin_HEXAGON_V6_vavguw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavguwrnd
+  __builtin_HEXAGON_V6_vavguwrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavguwrnd.128B
+  __builtin_HEXAGON_V6_vavguwrnd_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vavgw
-  __builtin_HEXAGON_V6_vcl0h_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vcl0h.128B
-  __builtin_HEXAGON_V6_vcl0h(v16);
+  __builtin_HEXAGON_V6_vavgw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgw.128B
+  __builtin_HEXAGON_V6_vavgw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vavgwrnd
+  __builtin_HEXAGON_V6_vavgwrnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vavgwrnd.128B
+  __builtin_HEXAGON_V6_vavgwrnd_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vcl0h
-  __builtin_HEXAGON_V6_vcl0w_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vcl0w.128B
-  __builtin_HEXAGON_V6_vcl0w(v16);
+  __builtin_HEXAGON_V6_vcl0h(v64);
+  // CHECK: @llvm.hexagon.V6.vcl0h.128B
+  __builtin_HEXAGON_V6_vcl0h_128B(v128);
   // CHECK: @llvm.hexagon.V6.vcl0w
-  __builtin_HEXAGON_V6_vcombine_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vcombine.128B
-  __builtin_HEXAGON_V6_vcombine(v16, v16);
+  __builtin_HEXAGON_V6_vcl0w(v64);
+  // CHECK: @llvm.hexagon.V6.vcl0w.128B
+  __builtin_HEXAGON_V6_vcl0w_128B(v128);
   // CHECK: @llvm.hexagon.V6.vcombine
-  __builtin_HEXAGON_V6_vd0_128B();
-  // CHECK: @llvm.hexagon.V6.vd0.128B
-  __builtin_HEXAGON_V6_vd0();
+  __builtin_HEXAGON_V6_vcombine(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vcombine.128B
+  __builtin_HEXAGON_V6_vcombine_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vd0
-  __builtin_HEXAGON_V6_vdealb_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vdealb.128B
-  __builtin_HEXAGON_V6_vdealb4w_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vdealb4w.128B
-  __builtin_HEXAGON_V6_vdealb4w(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vdealb4w
-  __builtin_HEXAGON_V6_vdealb(v16);
+  __builtin_HEXAGON_V6_vd0();
+  // CHECK: @llvm.hexagon.V6.vd0.128B
+  __builtin_HEXAGON_V6_vd0_128B();
+  // CHECK: @llvm.hexagon.V6.vdd0
+  __builtin_HEXAGON_V6_vdd0();
+  // CHECK: @llvm.hexagon.V6.vdd0.128B
+  __builtin_HEXAGON_V6_vdd0_128B();
   // CHECK: @llvm.hexagon.V6.vdealb
-  __builtin_HEXAGON_V6_vdealh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vdealh.128B
-  __builtin_HEXAGON_V6_vdealh(v16);
+  __builtin_HEXAGON_V6_vdealb(v64);
+  // CHECK: @llvm.hexagon.V6.vdealb.128B
+  __builtin_HEXAGON_V6_vdealb_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vdealb4w
+  __builtin_HEXAGON_V6_vdealb4w(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vdealb4w.128B
+  __builtin_HEXAGON_V6_vdealb4w_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vdealh
-  __builtin_HEXAGON_V6_vdealvdd_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdealvdd.128B
-  __builtin_HEXAGON_V6_vdealvdd(v16, v16, 0);
+  __builtin_HEXAGON_V6_vdealh(v64);
+  // CHECK: @llvm.hexagon.V6.vdealh.128B
+  __builtin_HEXAGON_V6_vdealh_128B(v128);
   // CHECK: @llvm.hexagon.V6.vdealvdd
-  __builtin_HEXAGON_V6_vdelta_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vdelta.128B
-  __builtin_HEXAGON_V6_vdelta(v16, v16);
+  __builtin_HEXAGON_V6_vdealvdd(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdealvdd.128B
+  __builtin_HEXAGON_V6_vdealvdd_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vdelta
-  __builtin_HEXAGON_V6_vdmpybus_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.128B
-  __builtin_HEXAGON_V6_vdmpybus_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.acc.128B
-  __builtin_HEXAGON_V6_vdmpybus_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.acc
-  __builtin_HEXAGON_V6_vdmpybus_dv_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.128B
-  __builtin_HEXAGON_V6_vdmpybus_dv_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.acc.128B
-  __builtin_HEXAGON_V6_vdmpybus_dv_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.acc
-  __builtin_HEXAGON_V6_vdmpybus_dv(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpybus.dv
-  __builtin_HEXAGON_V6_vdmpybus(v16, 0);
+  __builtin_HEXAGON_V6_vdelta(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vdelta.128B
+  __builtin_HEXAGON_V6_vdelta_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vdmpybus
-  __builtin_HEXAGON_V6_vdmpyhb_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.128B
-  __builtin_HEXAGON_V6_vdmpyhb_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhb_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.acc
-  __builtin_HEXAGON_V6_vdmpyhb_dv_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.128B
-  __builtin_HEXAGON_V6_vdmpyhb_dv_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhb_dv_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.acc
-  __builtin_HEXAGON_V6_vdmpyhb_dv(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv
-  __builtin_HEXAGON_V6_vdmpyhb(v16, 0);
+  __builtin_HEXAGON_V6_vdmpybus(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.128B
+  __builtin_HEXAGON_V6_vdmpybus_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.acc
+  __builtin_HEXAGON_V6_vdmpybus_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.acc.128B
+  __builtin_HEXAGON_V6_vdmpybus_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.dv
+  __builtin_HEXAGON_V6_vdmpybus_dv(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.128B
+  __builtin_HEXAGON_V6_vdmpybus_dv_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.acc
+  __builtin_HEXAGON_V6_vdmpybus_dv_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpybus.dv.acc.128B
+  __builtin_HEXAGON_V6_vdmpybus_dv_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhb
-  __builtin_HEXAGON_V6_vdmpyhisat_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhisat.128B
-  __builtin_HEXAGON_V6_vdmpyhisat_acc_128B(v32, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhisat.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhisat_acc(v16, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhisat.acc
-  __builtin_HEXAGON_V6_vdmpyhisat(v32, 0);
+  __builtin_HEXAGON_V6_vdmpyhb(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.128B
+  __builtin_HEXAGON_V6_vdmpyhb_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.acc
+  __builtin_HEXAGON_V6_vdmpyhb_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhb_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv
+  __builtin_HEXAGON_V6_vdmpyhb_dv(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.128B
+  __builtin_HEXAGON_V6_vdmpyhb_dv_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.acc
+  __builtin_HEXAGON_V6_vdmpyhb_dv_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhb.dv.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhb_dv_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhisat
-  __builtin_HEXAGON_V6_vdmpyhsat_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsat.128B
-  __builtin_HEXAGON_V6_vdmpyhsat_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsat.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhsat_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsat.acc
-  __builtin_HEXAGON_V6_vdmpyhsat(v16, 0);
+  __builtin_HEXAGON_V6_vdmpyhisat(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhisat.128B
+  __builtin_HEXAGON_V6_vdmpyhisat_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhisat.acc
+  __builtin_HEXAGON_V6_vdmpyhisat_acc(v64, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhisat.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhisat_acc_128B(v128, v256, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhsat
-  __builtin_HEXAGON_V6_vdmpyhsuisat_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.128B
-  __builtin_HEXAGON_V6_vdmpyhsuisat_acc_128B(v32, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhsuisat_acc(v16, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.acc
-  __builtin_HEXAGON_V6_vdmpyhsuisat(v32, 0);
+  __builtin_HEXAGON_V6_vdmpyhsat(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsat.128B
+  __builtin_HEXAGON_V6_vdmpyhsat_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsat.acc
+  __builtin_HEXAGON_V6_vdmpyhsat_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsat.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhsat_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhsuisat
-  __builtin_HEXAGON_V6_vdmpyhsusat_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.128B
-  __builtin_HEXAGON_V6_vdmpyhsusat_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhsusat_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.acc
-  __builtin_HEXAGON_V6_vdmpyhsusat(v16, 0);
+  __builtin_HEXAGON_V6_vdmpyhsuisat(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.128B
+  __builtin_HEXAGON_V6_vdmpyhsuisat_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.acc
+  __builtin_HEXAGON_V6_vdmpyhsuisat_acc(v64, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsuisat.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhsuisat_acc_128B(v128, v256, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhsusat
-  __builtin_HEXAGON_V6_vdmpyhvsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.128B
-  __builtin_HEXAGON_V6_vdmpyhvsat_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.acc.128B
-  __builtin_HEXAGON_V6_vdmpyhvsat_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.acc
-  __builtin_HEXAGON_V6_vdmpyhvsat(v16, v16);
+  __builtin_HEXAGON_V6_vdmpyhsusat(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.128B
+  __builtin_HEXAGON_V6_vdmpyhsusat_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.acc
+  __builtin_HEXAGON_V6_vdmpyhsusat_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vdmpyhsusat.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhsusat_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vdmpyhvsat
-  __builtin_HEXAGON_V6_vdsaduh_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdsaduh.128B
-  __builtin_HEXAGON_V6_vdsaduh_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vdsaduh.acc.128B
-  __builtin_HEXAGON_V6_vdsaduh_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vdsaduh.acc
-  __builtin_HEXAGON_V6_vdsaduh(v32, 0);
+  __builtin_HEXAGON_V6_vdmpyhvsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.128B
+  __builtin_HEXAGON_V6_vdmpyhvsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.acc
+  __builtin_HEXAGON_V6_vdmpyhvsat_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vdmpyhvsat.acc.128B
+  __builtin_HEXAGON_V6_vdmpyhvsat_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vdsaduh
-  __builtin_HEXAGON_V6_veqb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqb.128B
-  __builtin_HEXAGON_V6_veqb_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqb.and.128B
-  __builtin_HEXAGON_V6_veqb_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqb.and
-  __builtin_HEXAGON_V6_veqb_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqb.or.128B
-  __builtin_HEXAGON_V6_veqb_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqb.or
-  __builtin_HEXAGON_V6_veqb(v16, v16);
+  __builtin_HEXAGON_V6_vdsaduh(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdsaduh.128B
+  __builtin_HEXAGON_V6_vdsaduh_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vdsaduh.acc
+  __builtin_HEXAGON_V6_vdsaduh_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vdsaduh.acc.128B
+  __builtin_HEXAGON_V6_vdsaduh_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.veqb
-  __builtin_HEXAGON_V6_veqb_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqb.xor.128B
-  __builtin_HEXAGON_V6_veqb_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_veqb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqb.128B
+  __builtin_HEXAGON_V6_veqb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqb.and
+  __builtin_HEXAGON_V6_veqb_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqb.and.128B
+  __builtin_HEXAGON_V6_veqb_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqb.or
+  __builtin_HEXAGON_V6_veqb_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqb.or.128B
+  __builtin_HEXAGON_V6_veqb_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqb.xor
-  __builtin_HEXAGON_V6_veqh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqh.128B
-  __builtin_HEXAGON_V6_veqh_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqh.and.128B
-  __builtin_HEXAGON_V6_veqh_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqh.and
-  __builtin_HEXAGON_V6_veqh_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqh.or.128B
-  __builtin_HEXAGON_V6_veqh_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqh.or
-  __builtin_HEXAGON_V6_veqh(v16, v16);
+  __builtin_HEXAGON_V6_veqb_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqb.xor.128B
+  __builtin_HEXAGON_V6_veqb_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh
-  __builtin_HEXAGON_V6_veqh_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqh.xor.128B
-  __builtin_HEXAGON_V6_veqh_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_veqh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqh.128B
+  __builtin_HEXAGON_V6_veqh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqh.and
+  __builtin_HEXAGON_V6_veqh_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqh.and.128B
+  __builtin_HEXAGON_V6_veqh_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqh.or
+  __builtin_HEXAGON_V6_veqh_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqh.or.128B
+  __builtin_HEXAGON_V6_veqh_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh.xor
-  __builtin_HEXAGON_V6_veqw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqw.128B
-  __builtin_HEXAGON_V6_veqw_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqw.and.128B
-  __builtin_HEXAGON_V6_veqw_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqw.and
-  __builtin_HEXAGON_V6_veqw_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqw.or.128B
-  __builtin_HEXAGON_V6_veqw_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.veqw.or
-  __builtin_HEXAGON_V6_veqw(v16, v16);
+  __builtin_HEXAGON_V6_veqh_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqh.xor.128B
+  __builtin_HEXAGON_V6_veqh_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw
-  __builtin_HEXAGON_V6_veqw_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.veqw.xor.128B
-  __builtin_HEXAGON_V6_veqw_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_veqw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqw.128B
+  __builtin_HEXAGON_V6_veqw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqw.and
+  __builtin_HEXAGON_V6_veqw_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqw.and.128B
+  __builtin_HEXAGON_V6_veqw_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.veqw.or
+  __builtin_HEXAGON_V6_veqw_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqw.or.128B
+  __builtin_HEXAGON_V6_veqw_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw.xor
-  __builtin_HEXAGON_V6_vgtb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtb.128B
-  __builtin_HEXAGON_V6_vgtb_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtb.and.128B
-  __builtin_HEXAGON_V6_vgtb_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtb.and
-  __builtin_HEXAGON_V6_vgtb_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtb.or.128B
-  __builtin_HEXAGON_V6_vgtb_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtb.or
-  __builtin_HEXAGON_V6_vgtb(v16, v16);
+  __builtin_HEXAGON_V6_veqw_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.veqw.xor.128B
+  __builtin_HEXAGON_V6_veqw_xor_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermh
+  __builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vgathermh.128B
+  __builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermhq
+  __builtin_HEXAGON_V6_vgathermhq(0, v64, 0, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vgathermhq.128B
+  __builtin_HEXAGON_V6_vgathermhq_128B(0, v128, 0, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermhw
+  __builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermhw.128B
+  __builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256);
+  // CHECK: @llvm.hexagon.V6.vgathermhwq
+  __builtin_HEXAGON_V6_vgathermhwq(0, v64, 0, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermhwq.128B
+  __builtin_HEXAGON_V6_vgathermhwq_128B(0, v128, 0, 0, v256);
+  // CHECK: @llvm.hexagon.V6.vgathermw
+  __builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vgathermw.128B
+  __builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vgathermwq
+  __builtin_HEXAGON_V6_vgathermwq(0, v64, 0, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vgathermwq.128B
+  __builtin_HEXAGON_V6_vgathermwq_128B(0, v128, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgtb
-  __builtin_HEXAGON_V6_vgtb_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtb.xor.128B
-  __builtin_HEXAGON_V6_vgtb_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgtb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtb.128B
+  __builtin_HEXAGON_V6_vgtb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtb.and
+  __builtin_HEXAGON_V6_vgtb_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtb.and.128B
+  __builtin_HEXAGON_V6_vgtb_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtb.or
+  __builtin_HEXAGON_V6_vgtb_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtb.or.128B
+  __builtin_HEXAGON_V6_vgtb_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtb.xor
-  __builtin_HEXAGON_V6_vgth_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgth.128B
-  __builtin_HEXAGON_V6_vgth_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgth.and.128B
-  __builtin_HEXAGON_V6_vgth_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgth.and
-  __builtin_HEXAGON_V6_vgth_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgth.or.128B
-  __builtin_HEXAGON_V6_vgth_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgth.or
-  __builtin_HEXAGON_V6_vgth(v16, v16);
+  __builtin_HEXAGON_V6_vgtb_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtb.xor.128B
+  __builtin_HEXAGON_V6_vgtb_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth
-  __builtin_HEXAGON_V6_vgth_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgth.xor.128B
-  __builtin_HEXAGON_V6_vgth_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgth(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgth.128B
+  __builtin_HEXAGON_V6_vgth_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgth.and
+  __builtin_HEXAGON_V6_vgth_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgth.and.128B
+  __builtin_HEXAGON_V6_vgth_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgth.or
+  __builtin_HEXAGON_V6_vgth_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgth.or.128B
+  __builtin_HEXAGON_V6_vgth_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth.xor
-  __builtin_HEXAGON_V6_vgtub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtub.128B
-  __builtin_HEXAGON_V6_vgtub_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtub.and.128B
-  __builtin_HEXAGON_V6_vgtub_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtub.and
-  __builtin_HEXAGON_V6_vgtub_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtub.or.128B
-  __builtin_HEXAGON_V6_vgtub_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtub.or
-  __builtin_HEXAGON_V6_vgtub(v16, v16);
+  __builtin_HEXAGON_V6_vgth_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgth.xor.128B
+  __builtin_HEXAGON_V6_vgth_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub
-  __builtin_HEXAGON_V6_vgtub_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtub.xor.128B
-  __builtin_HEXAGON_V6_vgtub_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgtub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtub.128B
+  __builtin_HEXAGON_V6_vgtub_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtub.and
+  __builtin_HEXAGON_V6_vgtub_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtub.and.128B
+  __builtin_HEXAGON_V6_vgtub_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtub.or
+  __builtin_HEXAGON_V6_vgtub_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtub.or.128B
+  __builtin_HEXAGON_V6_vgtub_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub.xor
-  __builtin_HEXAGON_V6_vgtuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuh.128B
-  __builtin_HEXAGON_V6_vgtuh_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuh.and.128B
-  __builtin_HEXAGON_V6_vgtuh_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtuh.and
-  __builtin_HEXAGON_V6_vgtuh_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuh.or.128B
-  __builtin_HEXAGON_V6_vgtuh_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtuh.or
-  __builtin_HEXAGON_V6_vgtuh(v16, v16);
+  __builtin_HEXAGON_V6_vgtub_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtub.xor.128B
+  __builtin_HEXAGON_V6_vgtub_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh
-  __builtin_HEXAGON_V6_vgtuh_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuh.xor.128B
-  __builtin_HEXAGON_V6_vgtuh_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgtuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuh.128B
+  __builtin_HEXAGON_V6_vgtuh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtuh.and
+  __builtin_HEXAGON_V6_vgtuh_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuh.and.128B
+  __builtin_HEXAGON_V6_vgtuh_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtuh.or
+  __builtin_HEXAGON_V6_vgtuh_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuh.or.128B
+  __builtin_HEXAGON_V6_vgtuh_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh.xor
-  __builtin_HEXAGON_V6_vgtuw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuw.128B
-  __builtin_HEXAGON_V6_vgtuw_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuw.and.128B
-  __builtin_HEXAGON_V6_vgtuw_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtuw.and
-  __builtin_HEXAGON_V6_vgtuw_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuw.or.128B
-  __builtin_HEXAGON_V6_vgtuw_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtuw.or
-  __builtin_HEXAGON_V6_vgtuw(v16, v16);
+  __builtin_HEXAGON_V6_vgtuh_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuh.xor.128B
+  __builtin_HEXAGON_V6_vgtuh_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw
-  __builtin_HEXAGON_V6_vgtuw_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtuw.xor.128B
-  __builtin_HEXAGON_V6_vgtuw_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgtuw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuw.128B
+  __builtin_HEXAGON_V6_vgtuw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtuw.and
+  __builtin_HEXAGON_V6_vgtuw_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuw.and.128B
+  __builtin_HEXAGON_V6_vgtuw_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtuw.or
+  __builtin_HEXAGON_V6_vgtuw_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuw.or.128B
+  __builtin_HEXAGON_V6_vgtuw_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw.xor
-  __builtin_HEXAGON_V6_vgtw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtw.128B
-  __builtin_HEXAGON_V6_vgtw_and_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtw.and.128B
-  __builtin_HEXAGON_V6_vgtw_and(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtw.and
-  __builtin_HEXAGON_V6_vgtw_or_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtw.or.128B
-  __builtin_HEXAGON_V6_vgtw_or(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vgtw.or
-  __builtin_HEXAGON_V6_vgtw(v16, v16);
+  __builtin_HEXAGON_V6_vgtuw_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtuw.xor.128B
+  __builtin_HEXAGON_V6_vgtuw_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw
-  __builtin_HEXAGON_V6_vgtw_xor_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vgtw.xor.128B
-  __builtin_HEXAGON_V6_vgtw_xor(v16, v16, v16);
+  __builtin_HEXAGON_V6_vgtw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtw.128B
+  __builtin_HEXAGON_V6_vgtw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtw.and
+  __builtin_HEXAGON_V6_vgtw_and(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtw.and.128B
+  __builtin_HEXAGON_V6_vgtw_and_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vgtw.or
+  __builtin_HEXAGON_V6_vgtw_or(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtw.or.128B
+  __builtin_HEXAGON_V6_vgtw_or_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw.xor
-  __builtin_HEXAGON_V6_vinsertwr_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vinsertwr.128B
-  __builtin_HEXAGON_V6_vinsertwr(v16, 0);
+  __builtin_HEXAGON_V6_vgtw_xor(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vgtw.xor.128B
+  __builtin_HEXAGON_V6_vgtw_xor_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vinsertwr
-  __builtin_HEXAGON_V6_vlalignb_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlalignb.128B
-  __builtin_HEXAGON_V6_vlalignbi_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlalignbi.128B
-  __builtin_HEXAGON_V6_vlalignbi(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vlalignbi
-  __builtin_HEXAGON_V6_vlalignb(v16, v16, 0);
+  __builtin_HEXAGON_V6_vinsertwr(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vinsertwr.128B
+  __builtin_HEXAGON_V6_vinsertwr_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlalignb
-  __builtin_HEXAGON_V6_vlsrh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlsrh.128B
-  __builtin_HEXAGON_V6_vlsrhv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vlsrhv.128B
-  __builtin_HEXAGON_V6_vlsrh(v16, 0);
+  __builtin_HEXAGON_V6_vlalignb(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlalignb.128B
+  __builtin_HEXAGON_V6_vlalignb_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlalignbi
+  __builtin_HEXAGON_V6_vlalignbi(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlalignbi.128B
+  __builtin_HEXAGON_V6_vlalignbi_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlsrb
+  __builtin_HEXAGON_V6_vlsrb(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlsrb.128B
+  __builtin_HEXAGON_V6_vlsrb_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlsrh
-  __builtin_HEXAGON_V6_vlsrhv(v16, v16);
+  __builtin_HEXAGON_V6_vlsrh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlsrh.128B
+  __builtin_HEXAGON_V6_vlsrh_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlsrhv
-  __builtin_HEXAGON_V6_vlsrw_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlsrw.128B
-  __builtin_HEXAGON_V6_vlsrwv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vlsrwv.128B
-  __builtin_HEXAGON_V6_vlsrw(v16, 0);
+  __builtin_HEXAGON_V6_vlsrhv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vlsrhv.128B
+  __builtin_HEXAGON_V6_vlsrhv_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vlsrw
-  __builtin_HEXAGON_V6_vlsrwv(v16, v16);
+  __builtin_HEXAGON_V6_vlsrw(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlsrw.128B
+  __builtin_HEXAGON_V6_vlsrw_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlsrwv
-  __builtin_HEXAGON_V6_vlutb_128B(v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.128B
-  __builtin_HEXAGON_V6_vlutb_acc_128B(v32, v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.acc.128B
-  __builtin_HEXAGON_V6_vlutb_acc(v16, v16, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.acc
-  __builtin_HEXAGON_V6_vlutb_dv_128B(v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.dv.128B
-  __builtin_HEXAGON_V6_vlutb_dv_acc_128B(v64, v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.dv.acc.128B
-  __builtin_HEXAGON_V6_vlutb_dv_acc(v32, v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.dv.acc
-  __builtin_HEXAGON_V6_vlutb_dv(v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb.dv
-  __builtin_HEXAGON_V6_vlutb(v16, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vlutb
-  __builtin_HEXAGON_V6_vlutvvb_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvvb.128B
-  __builtin_HEXAGON_V6_vlutvvb_oracc_128B(v32, v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvvb.oracc.128B
-  __builtin_HEXAGON_V6_vlutvvb_oracc(v16, v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvvb.oracc
-  __builtin_HEXAGON_V6_vlutvvb(v16, v16, 0);
+  __builtin_HEXAGON_V6_vlsrwv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vlsrwv.128B
+  __builtin_HEXAGON_V6_vlsrwv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vlut4
+  __builtin_HEXAGON_V6_vlut4(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlut4.128B
+  __builtin_HEXAGON_V6_vlut4_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlutvvb
-  __builtin_HEXAGON_V6_vlutvwh_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvwh.128B
-  __builtin_HEXAGON_V6_vlutvwh_oracc_128B(v64, v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvwh.oracc.128B
-  __builtin_HEXAGON_V6_vlutvwh_oracc(v32, v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vlutvwh.oracc
-  __builtin_HEXAGON_V6_vlutvwh(v16, v16, 0);
+  __builtin_HEXAGON_V6_vlutvvb(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.128B
+  __builtin_HEXAGON_V6_vlutvvb_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.nm
+  __builtin_HEXAGON_V6_vlutvvb_nm(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.nm.128B
+  __builtin_HEXAGON_V6_vlutvvb_nm_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.oracc
+  __builtin_HEXAGON_V6_vlutvvb_oracc(v64, v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.oracc.128B
+  __builtin_HEXAGON_V6_vlutvvb_oracc_128B(v128, v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.oracci
+  __builtin_HEXAGON_V6_vlutvvb_oracci(v64, v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvb.oracci.128B
+  __builtin_HEXAGON_V6_vlutvvb_oracci_128B(v128, v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvbi
+  __builtin_HEXAGON_V6_vlutvvbi(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvvbi.128B
+  __builtin_HEXAGON_V6_vlutvvbi_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vlutvwh
-  __builtin_HEXAGON_V6_vmaxh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmaxh.128B
-  __builtin_HEXAGON_V6_vmaxh(v16, v16);
+  __builtin_HEXAGON_V6_vlutvwh(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.128B
+  __builtin_HEXAGON_V6_vlutvwh_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.nm
+  __builtin_HEXAGON_V6_vlutvwh_nm(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.nm.128B
+  __builtin_HEXAGON_V6_vlutvwh_nm_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.oracc
+  __builtin_HEXAGON_V6_vlutvwh_oracc(v128, v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.oracc.128B
+  __builtin_HEXAGON_V6_vlutvwh_oracc_128B(v256, v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.oracci
+  __builtin_HEXAGON_V6_vlutvwh_oracci(v128, v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwh.oracci.128B
+  __builtin_HEXAGON_V6_vlutvwh_oracci_128B(v256, v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwhi
+  __builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vlutvwhi.128B
+  __builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorenq
+  __builtin_HEXAGON_V6_vmaskedstorenq(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B
+  __builtin_HEXAGON_V6_vmaskedstorenq_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorentnq
+  __builtin_HEXAGON_V6_vmaskedstorentnq(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B
+  __builtin_HEXAGON_V6_vmaskedstorentnq_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorentq
+  __builtin_HEXAGON_V6_vmaskedstorentq(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B
+  __builtin_HEXAGON_V6_vmaskedstorentq_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vmaskedstoreq
+  __builtin_HEXAGON_V6_vmaskedstoreq(v64, 0, v64);
+  // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B
+  __builtin_HEXAGON_V6_vmaskedstoreq_128B(v128, 0, v128);
+  // CHECK: @llvm.hexagon.V6.vmaxb
+  __builtin_HEXAGON_V6_vmaxb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmaxb.128B
+  __builtin_HEXAGON_V6_vmaxb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmaxh
-  __builtin_HEXAGON_V6_vmaxub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmaxub.128B
-  __builtin_HEXAGON_V6_vmaxub(v16, v16);
+  __builtin_HEXAGON_V6_vmaxh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmaxh.128B
+  __builtin_HEXAGON_V6_vmaxh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmaxub
-  __builtin_HEXAGON_V6_vmaxuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmaxuh.128B
-  __builtin_HEXAGON_V6_vmaxuh(v16, v16);
+  __builtin_HEXAGON_V6_vmaxub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmaxub.128B
+  __builtin_HEXAGON_V6_vmaxub_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmaxuh
-  __builtin_HEXAGON_V6_vmaxw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmaxw.128B
-  __builtin_HEXAGON_V6_vmaxw(v16, v16);
+  __builtin_HEXAGON_V6_vmaxuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmaxuh.128B
+  __builtin_HEXAGON_V6_vmaxuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmaxw
-  __builtin_HEXAGON_V6_vminh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vminh.128B
-  __builtin_HEXAGON_V6_vminh(v16, v16);
+  __builtin_HEXAGON_V6_vmaxw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmaxw.128B
+  __builtin_HEXAGON_V6_vmaxw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vminb
+  __builtin_HEXAGON_V6_vminb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vminb.128B
+  __builtin_HEXAGON_V6_vminb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vminh
-  __builtin_HEXAGON_V6_vminub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vminub.128B
-  __builtin_HEXAGON_V6_vminub(v16, v16);
+  __builtin_HEXAGON_V6_vminh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vminh.128B
+  __builtin_HEXAGON_V6_vminh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vminub
-  __builtin_HEXAGON_V6_vminuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vminuh.128B
-  __builtin_HEXAGON_V6_vminuh(v16, v16);
+  __builtin_HEXAGON_V6_vminub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vminub.128B
+  __builtin_HEXAGON_V6_vminub_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vminuh
-  __builtin_HEXAGON_V6_vminw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vminw.128B
-  __builtin_HEXAGON_V6_vminw(v16, v16);
+  __builtin_HEXAGON_V6_vminuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vminuh.128B
+  __builtin_HEXAGON_V6_vminuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vminw
-  __builtin_HEXAGON_V6_vmpabus_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vmpabus.128B
-  __builtin_HEXAGON_V6_vmpabus_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vmpabus.acc.128B
-  __builtin_HEXAGON_V6_vmpabus_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpabus.acc
-  __builtin_HEXAGON_V6_vmpabusv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vmpabusv.128B
-  __builtin_HEXAGON_V6_vmpabus(v32, 0);
+  __builtin_HEXAGON_V6_vminw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vminw.128B
+  __builtin_HEXAGON_V6_vminw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpabus
-  __builtin_HEXAGON_V6_vmpabusv(v32, v32);
+  __builtin_HEXAGON_V6_vmpabus(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabus.128B
+  __builtin_HEXAGON_V6_vmpabus_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabus.acc
+  __builtin_HEXAGON_V6_vmpabus_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabus.acc.128B
+  __builtin_HEXAGON_V6_vmpabus_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vmpabusv
-  __builtin_HEXAGON_V6_vmpabuuv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vmpabuuv.128B
-  __builtin_HEXAGON_V6_vmpabuuv(v32, v32);
+  __builtin_HEXAGON_V6_vmpabusv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpabusv.128B
+  __builtin_HEXAGON_V6_vmpabusv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vmpabuu
+  __builtin_HEXAGON_V6_vmpabuu(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabuu.128B
+  __builtin_HEXAGON_V6_vmpabuu_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabuu.acc
+  __builtin_HEXAGON_V6_vmpabuu_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpabuu.acc.128B
+  __builtin_HEXAGON_V6_vmpabuu_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vmpabuuv
-  __builtin_HEXAGON_V6_vmpahb_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vmpahb.128B
-  __builtin_HEXAGON_V6_vmpahb_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vmpahb.acc.128B
-  __builtin_HEXAGON_V6_vmpahb_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpahb.acc
-  __builtin_HEXAGON_V6_vmpahb(v32, 0);
+  __builtin_HEXAGON_V6_vmpabuuv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpabuuv.128B
+  __builtin_HEXAGON_V6_vmpabuuv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vmpahb
-  __builtin_HEXAGON_V6_vmpybus_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpybus.128B
-  __builtin_HEXAGON_V6_vmpybus_acc_128B(v64, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpybus.acc.128B
-  __builtin_HEXAGON_V6_vmpybus_acc(v32, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpybus.acc
-  __builtin_HEXAGON_V6_vmpybusv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpybusv.128B
-  __builtin_HEXAGON_V6_vmpybus(v16, 0);
+  __builtin_HEXAGON_V6_vmpahb(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpahb.128B
+  __builtin_HEXAGON_V6_vmpahb_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpahb.acc
+  __builtin_HEXAGON_V6_vmpahb_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpahb.acc.128B
+  __builtin_HEXAGON_V6_vmpahb_acc_128B(v256, v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpahhsat
+  __builtin_HEXAGON_V6_vmpahhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpahhsat.128B
+  __builtin_HEXAGON_V6_vmpahhsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhb
+  __builtin_HEXAGON_V6_vmpauhb(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhb.128B
+  __builtin_HEXAGON_V6_vmpauhb_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhb.acc
+  __builtin_HEXAGON_V6_vmpauhb_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhb.acc.128B
+  __builtin_HEXAGON_V6_vmpauhb_acc_128B(v256, v256, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhuhsat
+  __builtin_HEXAGON_V6_vmpauhuhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpauhuhsat.128B
+  __builtin_HEXAGON_V6_vmpauhuhsat_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpsuhuhsat
+  __builtin_HEXAGON_V6_vmpsuhuhsat(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpsuhuhsat.128B
+  __builtin_HEXAGON_V6_vmpsuhuhsat_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpybus
-  __builtin_HEXAGON_V6_vmpybusv_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpybusv.acc.128B
-  __builtin_HEXAGON_V6_vmpybusv_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpybusv.acc
-  __builtin_HEXAGON_V6_vmpybusv(v16, v16);
+  __builtin_HEXAGON_V6_vmpybus(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpybus.128B
+  __builtin_HEXAGON_V6_vmpybus_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpybus.acc
+  __builtin_HEXAGON_V6_vmpybus_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpybus.acc.128B
+  __builtin_HEXAGON_V6_vmpybus_acc_128B(v256, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpybusv
-  __builtin_HEXAGON_V6_vmpybv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpybv.128B
-  __builtin_HEXAGON_V6_vmpybv_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpybv.acc.128B
-  __builtin_HEXAGON_V6_vmpybv_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpybv.acc
-  __builtin_HEXAGON_V6_vmpybv(v16, v16);
+  __builtin_HEXAGON_V6_vmpybusv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpybusv.128B
+  __builtin_HEXAGON_V6_vmpybusv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpybusv.acc
+  __builtin_HEXAGON_V6_vmpybusv_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpybusv.acc.128B
+  __builtin_HEXAGON_V6_vmpybusv_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpybv
-  __builtin_HEXAGON_V6_vmpyewuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyewuh.128B
-  __builtin_HEXAGON_V6_vmpyewuh(v16, v16);
+  __builtin_HEXAGON_V6_vmpybv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpybv.128B
+  __builtin_HEXAGON_V6_vmpybv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpybv.acc
+  __builtin_HEXAGON_V6_vmpybv_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpybv.acc.128B
+  __builtin_HEXAGON_V6_vmpybv_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyewuh
-  __builtin_HEXAGON_V6_vmpyh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyh.128B
-  __builtin_HEXAGON_V6_vmpyhsat_acc_128B(v64, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhsat.acc.128B
-  __builtin_HEXAGON_V6_vmpyhsat_acc(v32, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhsat.acc
-  __builtin_HEXAGON_V6_vmpyhsrs_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhsrs.128B
-  __builtin_HEXAGON_V6_vmpyhsrs(v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhsrs
-  __builtin_HEXAGON_V6_vmpyhss_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhss.128B
-  __builtin_HEXAGON_V6_vmpyhss(v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyhss
-  __builtin_HEXAGON_V6_vmpyhus_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyhus.128B
-  __builtin_HEXAGON_V6_vmpyhus_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyhus.acc.128B
-  __builtin_HEXAGON_V6_vmpyhus_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyhus.acc
-  __builtin_HEXAGON_V6_vmpyhus(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyhus
-  __builtin_HEXAGON_V6_vmpyhv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyhv.128B
-  __builtin_HEXAGON_V6_vmpyh(v16, 0);
+  __builtin_HEXAGON_V6_vmpyewuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyewuh.128B
+  __builtin_HEXAGON_V6_vmpyewuh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyewuh.64
+  __builtin_HEXAGON_V6_vmpyewuh_64(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyewuh.64.128B
+  __builtin_HEXAGON_V6_vmpyewuh_64_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyh
-  __builtin_HEXAGON_V6_vmpyhv_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyhv.acc.128B
-  __builtin_HEXAGON_V6_vmpyhv_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyhv.acc
-  __builtin_HEXAGON_V6_vmpyhvsrs_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyhvsrs.128B
-  __builtin_HEXAGON_V6_vmpyhvsrs(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyhvsrs
-  __builtin_HEXAGON_V6_vmpyhv(v16, v16);
+  __builtin_HEXAGON_V6_vmpyh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyh.128B
+  __builtin_HEXAGON_V6_vmpyh_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyh.acc
+  __builtin_HEXAGON_V6_vmpyh_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyh.acc.128B
+  __builtin_HEXAGON_V6_vmpyh_acc_128B(v256, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhsat.acc
+  __builtin_HEXAGON_V6_vmpyhsat_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhsat.acc.128B
+  __builtin_HEXAGON_V6_vmpyhsat_acc_128B(v256, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhsrs
+  __builtin_HEXAGON_V6_vmpyhsrs(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhsrs.128B
+  __builtin_HEXAGON_V6_vmpyhsrs_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhss
+  __builtin_HEXAGON_V6_vmpyhss(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhss.128B
+  __builtin_HEXAGON_V6_vmpyhss_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyhus
+  __builtin_HEXAGON_V6_vmpyhus(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyhus.128B
+  __builtin_HEXAGON_V6_vmpyhus_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyhus.acc
+  __builtin_HEXAGON_V6_vmpyhus_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyhus.acc.128B
+  __builtin_HEXAGON_V6_vmpyhus_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyhv
-  __builtin_HEXAGON_V6_vmpyieoh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyieoh.128B
-  __builtin_HEXAGON_V6_vmpyieoh(v16, v16);
+  __builtin_HEXAGON_V6_vmpyhv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyhv.128B
+  __builtin_HEXAGON_V6_vmpyhv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyhv.acc
+  __builtin_HEXAGON_V6_vmpyhv_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyhv.acc.128B
+  __builtin_HEXAGON_V6_vmpyhv_acc_128B(v256, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyhvsrs
+  __builtin_HEXAGON_V6_vmpyhvsrs(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyhvsrs.128B
+  __builtin_HEXAGON_V6_vmpyhvsrs_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyieoh
-  __builtin_HEXAGON_V6_vmpyiewh_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyiewh.acc.128B
-  __builtin_HEXAGON_V6_vmpyiewh_acc(v16, v16, v16);
+  __builtin_HEXAGON_V6_vmpyieoh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyieoh.128B
+  __builtin_HEXAGON_V6_vmpyieoh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyiewh.acc
-  __builtin_HEXAGON_V6_vmpyiewuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyiewuh.128B
-  __builtin_HEXAGON_V6_vmpyiewuh_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyiewuh.acc.128B
-  __builtin_HEXAGON_V6_vmpyiewuh_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyiewuh.acc
-  __builtin_HEXAGON_V6_vmpyiewuh(v16, v16);
+  __builtin_HEXAGON_V6_vmpyiewh_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyiewh.acc.128B
+  __builtin_HEXAGON_V6_vmpyiewh_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyiewuh
-  __builtin_HEXAGON_V6_vmpyih_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyih.128B
-  __builtin_HEXAGON_V6_vmpyih_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyih.acc.128B
-  __builtin_HEXAGON_V6_vmpyih_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyih.acc
-  __builtin_HEXAGON_V6_vmpyihb_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyihb.128B
-  __builtin_HEXAGON_V6_vmpyihb_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyihb.acc.128B
-  __builtin_HEXAGON_V6_vmpyihb_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyihb.acc
-  __builtin_HEXAGON_V6_vmpyihb(v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyihb
-  __builtin_HEXAGON_V6_vmpyih(v16, v16);
+  __builtin_HEXAGON_V6_vmpyiewuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyiewuh.128B
+  __builtin_HEXAGON_V6_vmpyiewuh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyiewuh.acc
+  __builtin_HEXAGON_V6_vmpyiewuh_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyiewuh.acc.128B
+  __builtin_HEXAGON_V6_vmpyiewuh_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyih
-  __builtin_HEXAGON_V6_vmpyiowh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyiowh.128B
-  __builtin_HEXAGON_V6_vmpyiowh(v16, v16);
+  __builtin_HEXAGON_V6_vmpyih(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyih.128B
+  __builtin_HEXAGON_V6_vmpyih_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyih.acc
+  __builtin_HEXAGON_V6_vmpyih_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyih.acc.128B
+  __builtin_HEXAGON_V6_vmpyih_acc_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyihb
+  __builtin_HEXAGON_V6_vmpyihb(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyihb.128B
+  __builtin_HEXAGON_V6_vmpyihb_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyihb.acc
+  __builtin_HEXAGON_V6_vmpyihb_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyihb.acc.128B
+  __builtin_HEXAGON_V6_vmpyihb_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpyiowh
-  __builtin_HEXAGON_V6_vmpyiwb_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwb.128B
-  __builtin_HEXAGON_V6_vmpyiwb_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwb.acc.128B
-  __builtin_HEXAGON_V6_vmpyiwb_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwb.acc
-  __builtin_HEXAGON_V6_vmpyiwb(v16, 0);
+  __builtin_HEXAGON_V6_vmpyiowh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyiowh.128B
+  __builtin_HEXAGON_V6_vmpyiowh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyiwb
-  __builtin_HEXAGON_V6_vmpyiwh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwh.128B
-  __builtin_HEXAGON_V6_vmpyiwh_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwh.acc.128B
-  __builtin_HEXAGON_V6_vmpyiwh_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyiwh.acc
-  __builtin_HEXAGON_V6_vmpyiwh(v16, 0);
+  __builtin_HEXAGON_V6_vmpyiwb(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwb.128B
+  __builtin_HEXAGON_V6_vmpyiwb_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwb.acc
+  __builtin_HEXAGON_V6_vmpyiwb_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwb.acc.128B
+  __builtin_HEXAGON_V6_vmpyiwb_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpyiwh
-  __builtin_HEXAGON_V6_vmpyowh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.128B
-  __builtin_HEXAGON_V6_vmpyowh_rnd_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.128B
-  __builtin_HEXAGON_V6_vmpyowh_rnd_sacc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.sacc.128B
-  __builtin_HEXAGON_V6_vmpyowh_rnd_sacc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.sacc
-  __builtin_HEXAGON_V6_vmpyowh_rnd(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd
-  __builtin_HEXAGON_V6_vmpyowh_sacc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.sacc.128B
-  __builtin_HEXAGON_V6_vmpyowh_sacc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyowh.sacc
-  __builtin_HEXAGON_V6_vmpyowh(v16, v16);
+  __builtin_HEXAGON_V6_vmpyiwh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwh.128B
+  __builtin_HEXAGON_V6_vmpyiwh_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwh.acc
+  __builtin_HEXAGON_V6_vmpyiwh_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwh.acc.128B
+  __builtin_HEXAGON_V6_vmpyiwh_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwub
+  __builtin_HEXAGON_V6_vmpyiwub(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwub.128B
+  __builtin_HEXAGON_V6_vmpyiwub_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwub.acc
+  __builtin_HEXAGON_V6_vmpyiwub_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyiwub.acc.128B
+  __builtin_HEXAGON_V6_vmpyiwub_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpyowh
-  __builtin_HEXAGON_V6_vmpyub_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyub.128B
-  __builtin_HEXAGON_V6_vmpyub_acc_128B(v64, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyub.acc.128B
-  __builtin_HEXAGON_V6_vmpyub_acc(v32, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyub.acc
-  __builtin_HEXAGON_V6_vmpyubv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyubv.128B
-  __builtin_HEXAGON_V6_vmpyub(v16, 0);
+  __builtin_HEXAGON_V6_vmpyowh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.128B
+  __builtin_HEXAGON_V6_vmpyowh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.64.acc
+  __builtin_HEXAGON_V6_vmpyowh_64_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.64.acc.128B
+  __builtin_HEXAGON_V6_vmpyowh_64_acc_128B(v256, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd
+  __builtin_HEXAGON_V6_vmpyowh_rnd(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.128B
+  __builtin_HEXAGON_V6_vmpyowh_rnd_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.sacc
+  __builtin_HEXAGON_V6_vmpyowh_rnd_sacc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.rnd.sacc.128B
+  __builtin_HEXAGON_V6_vmpyowh_rnd_sacc_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.sacc
+  __builtin_HEXAGON_V6_vmpyowh_sacc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyowh.sacc.128B
+  __builtin_HEXAGON_V6_vmpyowh_sacc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyub
-  __builtin_HEXAGON_V6_vmpyubv_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyubv.acc.128B
-  __builtin_HEXAGON_V6_vmpyubv_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyubv.acc
-  __builtin_HEXAGON_V6_vmpyubv(v16, v16);
+  __builtin_HEXAGON_V6_vmpyub(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyub.128B
+  __builtin_HEXAGON_V6_vmpyub_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyub.acc
+  __builtin_HEXAGON_V6_vmpyub_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyub.acc.128B
+  __builtin_HEXAGON_V6_vmpyub_acc_128B(v256, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpyubv
-  __builtin_HEXAGON_V6_vmpyuh_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyuh.128B
-  __builtin_HEXAGON_V6_vmpyuh_acc_128B(v64, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyuh.acc.128B
-  __builtin_HEXAGON_V6_vmpyuh_acc(v32, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vmpyuh.acc
-  __builtin_HEXAGON_V6_vmpyuhv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyuhv.128B
-  __builtin_HEXAGON_V6_vmpyuh(v16, 0);
+  __builtin_HEXAGON_V6_vmpyubv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyubv.128B
+  __builtin_HEXAGON_V6_vmpyubv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyubv.acc
+  __builtin_HEXAGON_V6_vmpyubv_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyubv.acc.128B
+  __builtin_HEXAGON_V6_vmpyubv_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmpyuh
-  __builtin_HEXAGON_V6_vmpyuhv_acc_128B(v64, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B
-  __builtin_HEXAGON_V6_vmpyuhv_acc(v32, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vmpyuhv.acc
-  __builtin_HEXAGON_V6_vmpyuhv(v16, v16);
+  __builtin_HEXAGON_V6_vmpyuh(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuh.128B
+  __builtin_HEXAGON_V6_vmpyuh_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuh.acc
+  __builtin_HEXAGON_V6_vmpyuh_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuh.acc.128B
+  __builtin_HEXAGON_V6_vmpyuh_acc_128B(v256, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuhe
+  __builtin_HEXAGON_V6_vmpyuhe(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuhe.128B
+  __builtin_HEXAGON_V6_vmpyuhe_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuhe.acc
+  __builtin_HEXAGON_V6_vmpyuhe_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vmpyuhe.acc.128B
+  __builtin_HEXAGON_V6_vmpyuhe_acc_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmpyuhv
-  __builtin_HEXAGON_V6_vmux_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vmux.128B
-  __builtin_HEXAGON_V6_vmux(v16, v16, v16);
+  __builtin_HEXAGON_V6_vmpyuhv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyuhv.128B
+  __builtin_HEXAGON_V6_vmpyuhv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vmpyuhv.acc
+  __builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B
+  __builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmux
-  __builtin_HEXAGON_V6_vnavgh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vnavgh.128B
-  __builtin_HEXAGON_V6_vnavgh(v16, v16);
+  __builtin_HEXAGON_V6_vmux(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vmux.128B
+  __builtin_HEXAGON_V6_vmux_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vnavgb
+  __builtin_HEXAGON_V6_vnavgb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vnavgb.128B
+  __builtin_HEXAGON_V6_vnavgb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vnavgh
-  __builtin_HEXAGON_V6_vnavgub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vnavgub.128B
-  __builtin_HEXAGON_V6_vnavgub(v16, v16);
+  __builtin_HEXAGON_V6_vnavgh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vnavgh.128B
+  __builtin_HEXAGON_V6_vnavgh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vnavgub
-  __builtin_HEXAGON_V6_vnavgw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vnavgw.128B
-  __builtin_HEXAGON_V6_vnavgw(v16, v16);
+  __builtin_HEXAGON_V6_vnavgub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vnavgub.128B
+  __builtin_HEXAGON_V6_vnavgub_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vnavgw
-  __builtin_HEXAGON_V6_vnormamth_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vnormamth.128B
-  __builtin_HEXAGON_V6_vnormamth(v16);
+  __builtin_HEXAGON_V6_vnavgw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vnavgw.128B
+  __builtin_HEXAGON_V6_vnavgw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vnormamth
-  __builtin_HEXAGON_V6_vnormamtw_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vnormamtw.128B
-  __builtin_HEXAGON_V6_vnormamtw(v16);
+  __builtin_HEXAGON_V6_vnormamth(v64);
+  // CHECK: @llvm.hexagon.V6.vnormamth.128B
+  __builtin_HEXAGON_V6_vnormamth_128B(v128);
   // CHECK: @llvm.hexagon.V6.vnormamtw
-  __builtin_HEXAGON_V6_vnot_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vnot.128B
-  __builtin_HEXAGON_V6_vnot(v16);
+  __builtin_HEXAGON_V6_vnormamtw(v64);
+  // CHECK: @llvm.hexagon.V6.vnormamtw.128B
+  __builtin_HEXAGON_V6_vnormamtw_128B(v128);
   // CHECK: @llvm.hexagon.V6.vnot
-  __builtin_HEXAGON_V6_vor_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vor.128B
-  __builtin_HEXAGON_V6_vor(v16, v16);
+  __builtin_HEXAGON_V6_vnot(v64);
+  // CHECK: @llvm.hexagon.V6.vnot.128B
+  __builtin_HEXAGON_V6_vnot_128B(v128);
   // CHECK: @llvm.hexagon.V6.vor
-  __builtin_HEXAGON_V6_vpackeb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackeb.128B
-  __builtin_HEXAGON_V6_vpackeb(v16, v16);
+  __builtin_HEXAGON_V6_vor(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vor.128B
+  __builtin_HEXAGON_V6_vor_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackeb
-  __builtin_HEXAGON_V6_vpackeh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackeh.128B
-  __builtin_HEXAGON_V6_vpackeh(v16, v16);
+  __builtin_HEXAGON_V6_vpackeb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackeb.128B
+  __builtin_HEXAGON_V6_vpackeb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackeh
-  __builtin_HEXAGON_V6_vpackhb_sat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackhb.sat.128B
-  __builtin_HEXAGON_V6_vpackhb_sat(v16, v16);
+  __builtin_HEXAGON_V6_vpackeh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackeh.128B
+  __builtin_HEXAGON_V6_vpackeh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackhb.sat
-  __builtin_HEXAGON_V6_vpackhub_sat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackhub.sat.128B
-  __builtin_HEXAGON_V6_vpackhub_sat(v16, v16);
+  __builtin_HEXAGON_V6_vpackhb_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackhb.sat.128B
+  __builtin_HEXAGON_V6_vpackhb_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackhub.sat
-  __builtin_HEXAGON_V6_vpackob_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackob.128B
-  __builtin_HEXAGON_V6_vpackob(v16, v16);
+  __builtin_HEXAGON_V6_vpackhub_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackhub.sat.128B
+  __builtin_HEXAGON_V6_vpackhub_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackob
-  __builtin_HEXAGON_V6_vpackoh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackoh.128B
-  __builtin_HEXAGON_V6_vpackoh(v16, v16);
+  __builtin_HEXAGON_V6_vpackob(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackob.128B
+  __builtin_HEXAGON_V6_vpackob_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackoh
-  __builtin_HEXAGON_V6_vpackwh_sat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackwh.sat.128B
-  __builtin_HEXAGON_V6_vpackwh_sat(v16, v16);
+  __builtin_HEXAGON_V6_vpackoh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackoh.128B
+  __builtin_HEXAGON_V6_vpackoh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackwh.sat
-  __builtin_HEXAGON_V6_vpackwuh_sat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vpackwuh.sat.128B
-  __builtin_HEXAGON_V6_vpackwuh_sat(v16, v16);
+  __builtin_HEXAGON_V6_vpackwh_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackwh.sat.128B
+  __builtin_HEXAGON_V6_vpackwh_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpackwuh.sat
-  __builtin_HEXAGON_V6_vpopcounth_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vpopcounth.128B
-  __builtin_HEXAGON_V6_vpopcounth(v16);
+  __builtin_HEXAGON_V6_vpackwuh_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vpackwuh.sat.128B
+  __builtin_HEXAGON_V6_vpackwuh_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vpopcounth
-  __builtin_HEXAGON_V6_vrdelta_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrdelta.128B
-  __builtin_HEXAGON_V6_vrdelta(v16, v16);
+  __builtin_HEXAGON_V6_vpopcounth(v64);
+  // CHECK: @llvm.hexagon.V6.vpopcounth.128B
+  __builtin_HEXAGON_V6_vpopcounth_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vprefixqb
+  __builtin_HEXAGON_V6_vprefixqb(v64);
+  // CHECK: @llvm.hexagon.V6.vprefixqb.128B
+  __builtin_HEXAGON_V6_vprefixqb_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vprefixqh
+  __builtin_HEXAGON_V6_vprefixqh(v64);
+  // CHECK: @llvm.hexagon.V6.vprefixqh.128B
+  __builtin_HEXAGON_V6_vprefixqh_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vprefixqw
+  __builtin_HEXAGON_V6_vprefixqw(v64);
+  // CHECK: @llvm.hexagon.V6.vprefixqw.128B
+  __builtin_HEXAGON_V6_vprefixqw_128B(v128);
   // CHECK: @llvm.hexagon.V6.vrdelta
-  __builtin_HEXAGON_V6_vrmpybus_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybus.128B
-  __builtin_HEXAGON_V6_vrmpybus_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybus.acc.128B
-  __builtin_HEXAGON_V6_vrmpybus_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybus.acc
-  __builtin_HEXAGON_V6_vrmpybusi_128B(v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybusi.128B
-  __builtin_HEXAGON_V6_vrmpybusi_acc_128B(v64, v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybusi.acc.128B
-  __builtin_HEXAGON_V6_vrmpybusi_acc(v32, v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybusi.acc
-  __builtin_HEXAGON_V6_vrmpybusi(v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpybusi
-  __builtin_HEXAGON_V6_vrmpybusv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpybusv.128B
-  __builtin_HEXAGON_V6_vrmpybus(v16, 0);
+  __builtin_HEXAGON_V6_vrdelta(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrdelta.128B
+  __builtin_HEXAGON_V6_vrdelta_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrmpybub.rtt
+  __builtin_HEXAGON_V6_vrmpybub_rtt(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B
+  __builtin_HEXAGON_V6_vrmpybub_rtt_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.acc
+  __builtin_HEXAGON_V6_vrmpybub_rtt_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.acc.128B
+  __builtin_HEXAGON_V6_vrmpybub_rtt_acc_128B(v256, v128, 0);
   // CHECK: @llvm.hexagon.V6.vrmpybus
-  __builtin_HEXAGON_V6_vrmpybusv_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpybusv.acc.128B
-  __builtin_HEXAGON_V6_vrmpybusv_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vrmpybusv.acc
-  __builtin_HEXAGON_V6_vrmpybusv(v16, v16);
+  __builtin_HEXAGON_V6_vrmpybus(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybus.128B
+  __builtin_HEXAGON_V6_vrmpybus_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybus.acc
+  __builtin_HEXAGON_V6_vrmpybus_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybus.acc.128B
+  __builtin_HEXAGON_V6_vrmpybus_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybusi
+  __builtin_HEXAGON_V6_vrmpybusi(v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybusi.128B
+  __builtin_HEXAGON_V6_vrmpybusi_128B(v256, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybusi.acc
+  __builtin_HEXAGON_V6_vrmpybusi_acc(v128, v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpybusi.acc.128B
+  __builtin_HEXAGON_V6_vrmpybusi_acc_128B(v256, v256, 0, 0);
   // CHECK: @llvm.hexagon.V6.vrmpybusv
-  __builtin_HEXAGON_V6_vrmpybv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpybv.128B
-  __builtin_HEXAGON_V6_vrmpybv_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpybv.acc.128B
-  __builtin_HEXAGON_V6_vrmpybv_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vrmpybv.acc
-  __builtin_HEXAGON_V6_vrmpybv(v16, v16);
+  __builtin_HEXAGON_V6_vrmpybusv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpybusv.128B
+  __builtin_HEXAGON_V6_vrmpybusv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrmpybusv.acc
+  __builtin_HEXAGON_V6_vrmpybusv_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpybusv.acc.128B
+  __builtin_HEXAGON_V6_vrmpybusv_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vrmpybv
-  __builtin_HEXAGON_V6_vrmpyub_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyub.128B
-  __builtin_HEXAGON_V6_vrmpyub_acc_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyub.acc.128B
-  __builtin_HEXAGON_V6_vrmpyub_acc(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyub.acc
-  __builtin_HEXAGON_V6_vrmpyubi_128B(v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyubi.128B
-  __builtin_HEXAGON_V6_vrmpyubi_acc_128B(v64, v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyubi.acc.128B
-  __builtin_HEXAGON_V6_vrmpyubi_acc(v32, v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyubi.acc
-  __builtin_HEXAGON_V6_vrmpyubi(v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrmpyubi
-  __builtin_HEXAGON_V6_vrmpyubv_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpyubv.128B
-  __builtin_HEXAGON_V6_vrmpyub(v16, 0);
+  __builtin_HEXAGON_V6_vrmpybv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpybv.128B
+  __builtin_HEXAGON_V6_vrmpybv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrmpybv.acc
+  __builtin_HEXAGON_V6_vrmpybv_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpybv.acc.128B
+  __builtin_HEXAGON_V6_vrmpybv_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vrmpyub
-  __builtin_HEXAGON_V6_vrmpyubv_acc_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vrmpyubv.acc.128B
-  __builtin_HEXAGON_V6_vrmpyubv_acc(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vrmpyubv.acc
-  __builtin_HEXAGON_V6_vrmpyubv(v16, v16);
+  __builtin_HEXAGON_V6_vrmpyub(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.128B
+  __builtin_HEXAGON_V6_vrmpyub_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.acc
+  __builtin_HEXAGON_V6_vrmpyub_acc(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.acc.128B
+  __builtin_HEXAGON_V6_vrmpyub_acc_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.rtt
+  __builtin_HEXAGON_V6_vrmpyub_rtt(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.rtt.128B
+  __builtin_HEXAGON_V6_vrmpyub_rtt_128B(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.rtt.acc
+  __builtin_HEXAGON_V6_vrmpyub_rtt_acc(v128, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyub.rtt.acc.128B
+  __builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B(v256, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyubi
+  __builtin_HEXAGON_V6_vrmpyubi(v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyubi.128B
+  __builtin_HEXAGON_V6_vrmpyubi_128B(v256, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyubi.acc
+  __builtin_HEXAGON_V6_vrmpyubi_acc(v128, v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrmpyubi.acc.128B
+  __builtin_HEXAGON_V6_vrmpyubi_acc_128B(v256, v256, 0, 0);
   // CHECK: @llvm.hexagon.V6.vrmpyubv
-  __builtin_HEXAGON_V6_vror_128B(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vror.128B
-  __builtin_HEXAGON_V6_vror(v16, 0);
+  __builtin_HEXAGON_V6_vrmpyubv(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpyubv.128B
+  __builtin_HEXAGON_V6_vrmpyubv_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrmpyubv.acc
+  __builtin_HEXAGON_V6_vrmpyubv_acc(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrmpyubv.acc.128B
+  __builtin_HEXAGON_V6_vrmpyubv_acc_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vror
-  __builtin_HEXAGON_V6_vroundhb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vroundhb.128B
-  __builtin_HEXAGON_V6_vroundhb(v16, v16);
+  __builtin_HEXAGON_V6_vror(v64, 0);
+  // CHECK: @llvm.hexagon.V6.vror.128B
+  __builtin_HEXAGON_V6_vror_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vroundhb
-  __builtin_HEXAGON_V6_vroundhub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vroundhub.128B
-  __builtin_HEXAGON_V6_vroundhub(v16, v16);
+  __builtin_HEXAGON_V6_vroundhb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vroundhb.128B
+  __builtin_HEXAGON_V6_vroundhb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vroundhub
-  __builtin_HEXAGON_V6_vroundwh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vroundwh.128B
-  __builtin_HEXAGON_V6_vroundwh(v16, v16);
+  __builtin_HEXAGON_V6_vroundhub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vroundhub.128B
+  __builtin_HEXAGON_V6_vroundhub_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrounduhub
+  __builtin_HEXAGON_V6_vrounduhub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrounduhub.128B
+  __builtin_HEXAGON_V6_vrounduhub_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vrounduwuh
+  __builtin_HEXAGON_V6_vrounduwuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vrounduwuh.128B
+  __builtin_HEXAGON_V6_vrounduwuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vroundwh
-  __builtin_HEXAGON_V6_vroundwuh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vroundwuh.128B
-  __builtin_HEXAGON_V6_vroundwuh(v16, v16);
+  __builtin_HEXAGON_V6_vroundwh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vroundwh.128B
+  __builtin_HEXAGON_V6_vroundwh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vroundwuh
-  __builtin_HEXAGON_V6_vrsadubi_128B(v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrsadubi.128B
-  __builtin_HEXAGON_V6_vrsadubi_acc_128B(v64, v64, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrsadubi.acc.128B
-  __builtin_HEXAGON_V6_vrsadubi_acc(v32, v32, 0, 0);
-  // CHECK: @llvm.hexagon.V6.vrsadubi.acc
-  __builtin_HEXAGON_V6_vrsadubi(v32, 0, 0);
+  __builtin_HEXAGON_V6_vroundwuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vroundwuh.128B
+  __builtin_HEXAGON_V6_vroundwuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vrsadubi
-  __builtin_HEXAGON_V6_vsathub_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsathub.128B
-  __builtin_HEXAGON_V6_vsathub(v16, v16);
+  __builtin_HEXAGON_V6_vrsadubi(v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrsadubi.128B
+  __builtin_HEXAGON_V6_vrsadubi_128B(v256, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrsadubi.acc
+  __builtin_HEXAGON_V6_vrsadubi_acc(v128, v128, 0, 0);
+  // CHECK: @llvm.hexagon.V6.vrsadubi.acc.128B
+  __builtin_HEXAGON_V6_vrsadubi_acc_128B(v256, v256, 0, 0);
   // CHECK: @llvm.hexagon.V6.vsathub
-  __builtin_HEXAGON_V6_vsatwh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsatwh.128B
-  __builtin_HEXAGON_V6_vsatwh(v16, v16);
+  __builtin_HEXAGON_V6_vsathub(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsathub.128B
+  __builtin_HEXAGON_V6_vsathub_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsatuwuh
+  __builtin_HEXAGON_V6_vsatuwuh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsatuwuh.128B
+  __builtin_HEXAGON_V6_vsatuwuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsatwh
-  __builtin_HEXAGON_V6_vsb_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vsb.128B
-  __builtin_HEXAGON_V6_vsb(v16);
+  __builtin_HEXAGON_V6_vsatwh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsatwh.128B
+  __builtin_HEXAGON_V6_vsatwh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsb
-  __builtin_HEXAGON_V6_vsh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vsh.128B
-  __builtin_HEXAGON_V6_vshufeh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshufeh.128B
-  __builtin_HEXAGON_V6_vshufeh(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshufeh
-  __builtin_HEXAGON_V6_vshuffb_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vshuffb.128B
-  __builtin_HEXAGON_V6_vshuffb(v16);
-  // CHECK: @llvm.hexagon.V6.vshuffb
-  __builtin_HEXAGON_V6_vshuffeb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshuffeb.128B
-  __builtin_HEXAGON_V6_vshuffeb(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshuffeb
-  __builtin_HEXAGON_V6_vshuffh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vshuffh.128B
-  __builtin_HEXAGON_V6_vshuffh(v16);
-  // CHECK: @llvm.hexagon.V6.vshuffh
-  __builtin_HEXAGON_V6_vshuffob_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshuffob.128B
-  __builtin_HEXAGON_V6_vshuffob(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshuffob
-  __builtin_HEXAGON_V6_vshuffvdd_128B(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vshuffvdd.128B
-  __builtin_HEXAGON_V6_vshuffvdd(v16, v16, 0);
-  // CHECK: @llvm.hexagon.V6.vshuffvdd
-  __builtin_HEXAGON_V6_vshufoeb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshufoeb.128B
-  __builtin_HEXAGON_V6_vshufoeb(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshufoeb
-  __builtin_HEXAGON_V6_vshufoeh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshufoeh.128B
-  __builtin_HEXAGON_V6_vshufoeh(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshufoeh
-  __builtin_HEXAGON_V6_vshufoh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vshufoh.128B
-  __builtin_HEXAGON_V6_vshufoh(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vshufoh
-  __builtin_HEXAGON_V6_vsh(v16);
+  __builtin_HEXAGON_V6_vsb(v64);
+  // CHECK: @llvm.hexagon.V6.vsb.128B
+  __builtin_HEXAGON_V6_vsb_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vscattermh
+  __builtin_HEXAGON_V6_vscattermh(0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermh.128B
+  __builtin_HEXAGON_V6_vscattermh_128B(0, 0, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermh.add
+  __builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermh.add.128B
+  __builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermhq
+  __builtin_HEXAGON_V6_vscattermhq(v64, 0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermhq.128B
+  __builtin_HEXAGON_V6_vscattermhq_128B(v128, 0, 0, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermhw
+  __builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermhw.128B
+  __builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermhw.add
+  __builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermhw.add.128B
+  __builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermhwq
+  __builtin_HEXAGON_V6_vscattermhwq(v64, 0, 0, v128, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermhwq.128B
+  __builtin_HEXAGON_V6_vscattermhwq_128B(v128, 0, 0, v256, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermw
+  __builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermw.128B
+  __builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermw.add
+  __builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermw.add.128B
+  __builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vscattermwq
+  __builtin_HEXAGON_V6_vscattermwq(v64, 0, 0, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vscattermwq.128B
+  __builtin_HEXAGON_V6_vscattermwq_128B(v128, 0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsh
-  __builtin_HEXAGON_V6_vsubb_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubb.128B
-  __builtin_HEXAGON_V6_vsubb_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubb.dv.128B
-  __builtin_HEXAGON_V6_vsubb_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubb.dv
-  __builtin_HEXAGON_V6_vsubbnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubbnq.128B
-  __builtin_HEXAGON_V6_vsubbnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubbnq
-  __builtin_HEXAGON_V6_vsubbq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubbq.128B
-  __builtin_HEXAGON_V6_vsubbq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubbq
-  __builtin_HEXAGON_V6_vsubb(v16, v16);
+  __builtin_HEXAGON_V6_vsh(v64);
+  // CHECK: @llvm.hexagon.V6.vsh.128B
+  __builtin_HEXAGON_V6_vsh_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vshufeh
+  __builtin_HEXAGON_V6_vshufeh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshufeh.128B
+  __builtin_HEXAGON_V6_vshufeh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vshuffb
+  __builtin_HEXAGON_V6_vshuffb(v64);
+  // CHECK: @llvm.hexagon.V6.vshuffb.128B
+  __builtin_HEXAGON_V6_vshuffb_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vshuffeb
+  __builtin_HEXAGON_V6_vshuffeb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshuffeb.128B
+  __builtin_HEXAGON_V6_vshuffeb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vshuffh
+  __builtin_HEXAGON_V6_vshuffh(v64);
+  // CHECK: @llvm.hexagon.V6.vshuffh.128B
+  __builtin_HEXAGON_V6_vshuffh_128B(v128);
+  // CHECK: @llvm.hexagon.V6.vshuffob
+  __builtin_HEXAGON_V6_vshuffob(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshuffob.128B
+  __builtin_HEXAGON_V6_vshuffob_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vshuffvdd
+  __builtin_HEXAGON_V6_vshuffvdd(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vshuffvdd.128B
+  __builtin_HEXAGON_V6_vshuffvdd_128B(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vshufoeb
+  __builtin_HEXAGON_V6_vshufoeb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshufoeb.128B
+  __builtin_HEXAGON_V6_vshufoeb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vshufoeh
+  __builtin_HEXAGON_V6_vshufoeh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshufoeh.128B
+  __builtin_HEXAGON_V6_vshufoeh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vshufoh
+  __builtin_HEXAGON_V6_vshufoh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vshufoh.128B
+  __builtin_HEXAGON_V6_vshufoh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubb
-  __builtin_HEXAGON_V6_vsubh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubh.128B
-  __builtin_HEXAGON_V6_vsubh_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubh.dv.128B
-  __builtin_HEXAGON_V6_vsubh_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubh.dv
-  __builtin_HEXAGON_V6_vsubhnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubhnq.128B
-  __builtin_HEXAGON_V6_vsubhnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubhnq
-  __builtin_HEXAGON_V6_vsubhq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubhq.128B
-  __builtin_HEXAGON_V6_vsubhq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubhq
-  __builtin_HEXAGON_V6_vsubhsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubhsat.128B
-  __builtin_HEXAGON_V6_vsubhsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B
-  __builtin_HEXAGON_V6_vsubhsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubhsat.dv
-  __builtin_HEXAGON_V6_vsubhsat(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubhsat
-  __builtin_HEXAGON_V6_vsubh(v16, v16);
+  __builtin_HEXAGON_V6_vsubb(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubb.128B
+  __builtin_HEXAGON_V6_vsubb_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubb.dv
+  __builtin_HEXAGON_V6_vsubb_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubb.dv.128B
+  __builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vsubbnq
+  __builtin_HEXAGON_V6_vsubbnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubbnq.128B
+  __builtin_HEXAGON_V6_vsubbnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubbq
+  __builtin_HEXAGON_V6_vsubbq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubbq.128B
+  __builtin_HEXAGON_V6_vsubbq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubbsat
+  __builtin_HEXAGON_V6_vsubbsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubbsat.128B
+  __builtin_HEXAGON_V6_vsubbsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubbsat.dv
+  __builtin_HEXAGON_V6_vsubbsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B
+  __builtin_HEXAGON_V6_vsubbsat_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vsubcarry
+  __builtin_HEXAGON_V6_vsubcarry(v64, v64, 0);
+  // CHECK: @llvm.hexagon.V6.vsubcarry.128B
+  __builtin_HEXAGON_V6_vsubcarry_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vsubh
-  __builtin_HEXAGON_V6_vsubhw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubhw.128B
-  __builtin_HEXAGON_V6_vsubhw(v16, v16);
+  __builtin_HEXAGON_V6_vsubh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubh.128B
+  __builtin_HEXAGON_V6_vsubh_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubh.dv
+  __builtin_HEXAGON_V6_vsubh_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubh.dv.128B
+  __builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vsubhnq
+  __builtin_HEXAGON_V6_vsubhnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubhnq.128B
+  __builtin_HEXAGON_V6_vsubhnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubhq
+  __builtin_HEXAGON_V6_vsubhq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubhq.128B
+  __builtin_HEXAGON_V6_vsubhq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubhsat
+  __builtin_HEXAGON_V6_vsubhsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubhsat.128B
+  __builtin_HEXAGON_V6_vsubhsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubhsat.dv
+  __builtin_HEXAGON_V6_vsubhsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B
+  __builtin_HEXAGON_V6_vsubhsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubhw
-  __builtin_HEXAGON_V6_vsububh_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsububh.128B
-  __builtin_HEXAGON_V6_vsububh(v16, v16);
+  __builtin_HEXAGON_V6_vsubhw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubhw.128B
+  __builtin_HEXAGON_V6_vsubhw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsububh
-  __builtin_HEXAGON_V6_vsububsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsububsat.128B
-  __builtin_HEXAGON_V6_vsububsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsububsat.dv.128B
-  __builtin_HEXAGON_V6_vsububsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsububsat.dv
-  __builtin_HEXAGON_V6_vsububsat(v16, v16);
+  __builtin_HEXAGON_V6_vsububh(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsububh.128B
+  __builtin_HEXAGON_V6_vsububh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsububsat
-  __builtin_HEXAGON_V6_vsubuhsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubuhsat.128B
-  __builtin_HEXAGON_V6_vsubuhsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubuhsat.dv.128B
-  __builtin_HEXAGON_V6_vsubuhsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubuhsat.dv
-  __builtin_HEXAGON_V6_vsubuhsat(v16, v16);
+  __builtin_HEXAGON_V6_vsububsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsububsat.128B
+  __builtin_HEXAGON_V6_vsububsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsububsat.dv
+  __builtin_HEXAGON_V6_vsububsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsububsat.dv.128B
+  __builtin_HEXAGON_V6_vsububsat_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vsubububb.sat
+  __builtin_HEXAGON_V6_vsubububb_sat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubububb.sat.128B
+  __builtin_HEXAGON_V6_vsubububb_sat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubuhsat
-  __builtin_HEXAGON_V6_vsubuhw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubuhw.128B
-  __builtin_HEXAGON_V6_vsubuhw(v16, v16);
+  __builtin_HEXAGON_V6_vsubuhsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubuhsat.128B
+  __builtin_HEXAGON_V6_vsubuhsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubuhsat.dv
+  __builtin_HEXAGON_V6_vsubuhsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubuhsat.dv.128B
+  __builtin_HEXAGON_V6_vsubuhsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubuhw
-  __builtin_HEXAGON_V6_vsubw_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubw.128B
-  __builtin_HEXAGON_V6_vsubw_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubw.dv.128B
-  __builtin_HEXAGON_V6_vsubw_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubw.dv
-  __builtin_HEXAGON_V6_vsubwnq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubwnq.128B
-  __builtin_HEXAGON_V6_vsubwnq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubwnq
-  __builtin_HEXAGON_V6_vsubwq_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubwq.128B
-  __builtin_HEXAGON_V6_vsubwq(v16, v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubwq
-  __builtin_HEXAGON_V6_vsubwsat_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubwsat.128B
-  __builtin_HEXAGON_V6_vsubwsat_dv_128B(v64, v64);
-  // CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B
-  __builtin_HEXAGON_V6_vsubwsat_dv(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vsubwsat.dv
-  __builtin_HEXAGON_V6_vsubwsat(v16, v16);
-  // CHECK: @llvm.hexagon.V6.vsubwsat
-  __builtin_HEXAGON_V6_vsubw(v16, v16);
+  __builtin_HEXAGON_V6_vsubuhw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubuhw.128B
+  __builtin_HEXAGON_V6_vsubuhw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubuwsat
+  __builtin_HEXAGON_V6_vsubuwsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubuwsat.128B
+  __builtin_HEXAGON_V6_vsubuwsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubuwsat.dv
+  __builtin_HEXAGON_V6_vsubuwsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubuwsat.dv.128B
+  __builtin_HEXAGON_V6_vsubuwsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubw
-  __builtin_HEXAGON_V6_vswap_128B(v32, v32, v32);
-  // CHECK: @llvm.hexagon.V6.vswap.128B
-  __builtin_HEXAGON_V6_vswap(v16, v16, v16);
+  __builtin_HEXAGON_V6_vsubw(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubw.128B
+  __builtin_HEXAGON_V6_vsubw_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubw.dv
+  __builtin_HEXAGON_V6_vsubw_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubw.dv.128B
+  __builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256);
+  // CHECK: @llvm.hexagon.V6.vsubwnq
+  __builtin_HEXAGON_V6_vsubwnq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubwnq.128B
+  __builtin_HEXAGON_V6_vsubwnq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubwq
+  __builtin_HEXAGON_V6_vsubwq(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubwq.128B
+  __builtin_HEXAGON_V6_vsubwq_128B(v128, v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubwsat
+  __builtin_HEXAGON_V6_vsubwsat(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vsubwsat.128B
+  __builtin_HEXAGON_V6_vsubwsat_128B(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubwsat.dv
+  __builtin_HEXAGON_V6_vsubwsat_dv(v128, v128);
+  // CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B
+  __builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vswap
-  __builtin_HEXAGON_V6_vtmpyb_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyb.128B
-  __builtin_HEXAGON_V6_vtmpyb_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B
-  __builtin_HEXAGON_V6_vtmpyb_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyb.acc
-  __builtin_HEXAGON_V6_vtmpybus_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpybus.128B
-  __builtin_HEXAGON_V6_vtmpybus_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpybus.acc.128B
-  __builtin_HEXAGON_V6_vtmpybus_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpybus.acc
-  __builtin_HEXAGON_V6_vtmpybus(v32, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpybus
-  __builtin_HEXAGON_V6_vtmpyb(v32, 0);
+  __builtin_HEXAGON_V6_vswap(v64, v64, v64);
+  // CHECK: @llvm.hexagon.V6.vswap.128B
+  __builtin_HEXAGON_V6_vswap_128B(v128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vtmpyb
-  __builtin_HEXAGON_V6_vtmpyhb_128B(v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyhb.128B
-  __builtin_HEXAGON_V6_vtmpyhb_acc_128B(v64, v64, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyhb.acc.128B
-  __builtin_HEXAGON_V6_vtmpyhb_acc(v32, v32, 0);
-  // CHECK: @llvm.hexagon.V6.vtmpyhb.acc
-  __builtin_HEXAGON_V6_vtmpyhb(v32, 0);
+  __builtin_HEXAGON_V6_vtmpyb(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyb.128B
+  __builtin_HEXAGON_V6_vtmpyb_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyb.acc
+  __builtin_HEXAGON_V6_vtmpyb_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B
+  __builtin_HEXAGON_V6_vtmpyb_acc_128B(v256, v256, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpybus
+  __builtin_HEXAGON_V6_vtmpybus(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpybus.128B
+  __builtin_HEXAGON_V6_vtmpybus_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpybus.acc
+  __builtin_HEXAGON_V6_vtmpybus_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpybus.acc.128B
+  __builtin_HEXAGON_V6_vtmpybus_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vtmpyhb
-  __builtin_HEXAGON_V6_vunpackb_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vunpackb.128B
-  __builtin_HEXAGON_V6_vunpackb(v16);
+  __builtin_HEXAGON_V6_vtmpyhb(v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyhb.128B
+  __builtin_HEXAGON_V6_vtmpyhb_128B(v256, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyhb.acc
+  __builtin_HEXAGON_V6_vtmpyhb_acc(v128, v128, 0);
+  // CHECK: @llvm.hexagon.V6.vtmpyhb.acc.128B
+  __builtin_HEXAGON_V6_vtmpyhb_acc_128B(v256, v256, 0);
   // CHECK: @llvm.hexagon.V6.vunpackb
-  __builtin_HEXAGON_V6_vunpackh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vunpackh.128B
-  __builtin_HEXAGON_V6_vunpackh(v16);
+  __builtin_HEXAGON_V6_vunpackb(v64);
+  // CHECK: @llvm.hexagon.V6.vunpackb.128B
+  __builtin_HEXAGON_V6_vunpackb_128B(v128);
   // CHECK: @llvm.hexagon.V6.vunpackh
-  __builtin_HEXAGON_V6_vunpackob_128B(v64, v32);
-  // CHECK: @llvm.hexagon.V6.vunpackob.128B
-  __builtin_HEXAGON_V6_vunpackob(v32, v16);
+  __builtin_HEXAGON_V6_vunpackh(v64);
+  // CHECK: @llvm.hexagon.V6.vunpackh.128B
+  __builtin_HEXAGON_V6_vunpackh_128B(v128);
   // CHECK: @llvm.hexagon.V6.vunpackob
-  __builtin_HEXAGON_V6_vunpackoh_128B(v64, v32);
-  // CHECK: @llvm.hexagon.V6.vunpackoh.128B
-  __builtin_HEXAGON_V6_vunpackoh(v32, v16);
+  __builtin_HEXAGON_V6_vunpackob(v128, v64);
+  // CHECK: @llvm.hexagon.V6.vunpackob.128B
+  __builtin_HEXAGON_V6_vunpackob_128B(v256, v128);
   // CHECK: @llvm.hexagon.V6.vunpackoh
-  __builtin_HEXAGON_V6_vunpackub_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vunpackub.128B
-  __builtin_HEXAGON_V6_vunpackub(v16);
+  __builtin_HEXAGON_V6_vunpackoh(v128, v64);
+  // CHECK: @llvm.hexagon.V6.vunpackoh.128B
+  __builtin_HEXAGON_V6_vunpackoh_128B(v256, v128);
   // CHECK: @llvm.hexagon.V6.vunpackub
-  __builtin_HEXAGON_V6_vunpackuh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vunpackuh.128B
-  __builtin_HEXAGON_V6_vunpackuh(v16);
+  __builtin_HEXAGON_V6_vunpackub(v64);
+  // CHECK: @llvm.hexagon.V6.vunpackub.128B
+  __builtin_HEXAGON_V6_vunpackub_128B(v128);
   // CHECK: @llvm.hexagon.V6.vunpackuh
-  __builtin_HEXAGON_V6_vxor_128B(v32, v32);
-  // CHECK: @llvm.hexagon.V6.vxor.128B
-  __builtin_HEXAGON_V6_vxor(v16, v16);
+  __builtin_HEXAGON_V6_vunpackuh(v64);
+  // CHECK: @llvm.hexagon.V6.vunpackuh.128B
+  __builtin_HEXAGON_V6_vunpackuh_128B(v128);
   // CHECK: @llvm.hexagon.V6.vxor
-  __builtin_HEXAGON_V6_vzb_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vzb.128B
-  __builtin_HEXAGON_V6_vzb(v16);
+  __builtin_HEXAGON_V6_vxor(v64, v64);
+  // CHECK: @llvm.hexagon.V6.vxor.128B
+  __builtin_HEXAGON_V6_vxor_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vzb
-  __builtin_HEXAGON_V6_vzh_128B(v32);
-  // CHECK: @llvm.hexagon.V6.vzh.128B
-  __builtin_HEXAGON_V6_vzh(v16);
+  __builtin_HEXAGON_V6_vzb(v64);
+  // CHECK: @llvm.hexagon.V6.vzb.128B
+  __builtin_HEXAGON_V6_vzb_128B(v128);
   // CHECK: @llvm.hexagon.V6.vzh
-  __builtin_HEXAGON_Y2_dccleana(0);
+  __builtin_HEXAGON_V6_vzh(v64);
+  // CHECK: @llvm.hexagon.V6.vzh.128B
+  __builtin_HEXAGON_V6_vzh_128B(v128);
   // CHECK: @llvm.hexagon.Y2.dccleana
-  __builtin_HEXAGON_Y2_dccleaninva(0);
+  __builtin_HEXAGON_Y2_dccleana(0);
   // CHECK: @llvm.hexagon.Y2.dccleaninva
-  __builtin_HEXAGON_Y2_dcinva(0);
+  __builtin_HEXAGON_Y2_dccleaninva(0);
   // CHECK: @llvm.hexagon.Y2.dcinva
-  __builtin_HEXAGON_Y2_dczeroa(0);
+  __builtin_HEXAGON_Y2_dcinva(0);
   // CHECK: @llvm.hexagon.Y2.dczeroa
-  __builtin_HEXAGON_Y4_l2fetch(0, 0);
+  __builtin_HEXAGON_Y2_dczeroa(0);
   // CHECK: @llvm.hexagon.Y4.l2fetch
-  __builtin_HEXAGON_Y5_l2fetch(0, 0);
+  __builtin_HEXAGON_Y4_l2fetch(0, 0);
   // CHECK: @llvm.hexagon.Y5.l2fetch
+  __builtin_HEXAGON_Y5_l2fetch(0, 0);
+  // CHECK: @llvm.hexagon.brev.ldb
+  __builtin_brev_ldb(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.ldd
+  __builtin_brev_ldd(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.ldh
+  __builtin_brev_ldh(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.ldub
+  __builtin_brev_ldub(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.lduh
+  __builtin_brev_lduh(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.ldw
+  __builtin_brev_ldw(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.stb
+  __builtin_brev_stb(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.std
+  __builtin_brev_std(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.sth
+  __builtin_brev_sth(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.sthhi
+  __builtin_brev_sthhi(0, 0, 0);
+  // CHECK: @llvm.hexagon.brev.stw
+  __builtin_brev_stw(0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.ldb
+  __builtin_circ_ldb(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.ldd
+  __builtin_circ_ldd(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.ldh
+  __builtin_circ_ldh(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.ldub
+  __builtin_circ_ldub(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.lduh
+  __builtin_circ_lduh(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.ldw
+  __builtin_circ_ldw(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.stb
+  __builtin_circ_stb(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.std
+  __builtin_circ_std(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.sth
+  __builtin_circ_sth(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.sthhi
+  __builtin_circ_sthhi(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.circ.stw
+  __builtin_circ_stw(0, 0, 0, 0);
+  // CHECK: @llvm.hexagon.prefetch
+  __builtin_HEXAGON_prefetch(0);
 }
diff --git a/test/CodeGen/builtins-ms.c b/test/CodeGen/builtins-ms.c
index a33b57d..915cc17 100644
--- a/test/CodeGen/builtins-ms.c
+++ b/test/CodeGen/builtins-ms.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -fms-extensions -triple i686-pc-win32 | FileCheck %s
 
-// CHECK-LABEL: define void @test_alloca(
+// CHECK-LABEL: define dso_local void @test_alloca(
 void capture(void *);
 void test_alloca(int n) {
   capture(_alloca(n));
@@ -8,7 +8,7 @@
   // CHECK: call void @capture(i8* %[[arg]])
 }
 
-// CHECK-LABEL: define void @test_alloca_with_align(
+// CHECK-LABEL: define dso_local void @test_alloca_with_align(
 void test_alloca_with_align(int n) {
   capture(__builtin_alloca_with_align(n, 64));
   // CHECK: %[[arg:.*]] = alloca i8, i32 %{{.*}}, align 8
diff --git a/test/CodeGen/builtins-nvptx-ptx50.cu b/test/CodeGen/builtins-nvptx-ptx50.cu
new file mode 100644
index 0000000..e85be44
--- /dev/null
+++ b/test/CodeGen/builtins-nvptx-ptx50.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN:            -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK %s
+//
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_50 \
+// RUN:   -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: test_fn
+__device__ void test_fn(double d, double* double_ptr) {
+  // CHECK: call double @llvm.nvvm.atomic.load.add.f64.p0f64
+  // expected-error@+1 {{'__nvvm_atom_add_gen_d' needs target feature satom}}
+  __nvvm_atom_add_gen_d(double_ptr, d);
+}
diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c
index c8d828d..57f90eb 100644
--- a/test/CodeGen/builtins-overflow.c
+++ b/test/CodeGen/builtins-overflow.c
@@ -14,7 +14,7 @@
 void overflowed(void);
 
 unsigned test_add_overflow_uint_uint_uint(unsigned x, unsigned y) {
-  // CHECK-LABEL: define i32 @test_add_overflow_uint_uint_uint
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_uint_uint_uint
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
@@ -28,7 +28,7 @@
 }
 
 int test_add_overflow_int_int_int(int x, int y) {
-  // CHECK-LABEL: define i32 @test_add_overflow_int_int_int
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_int_int_int
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
@@ -42,7 +42,7 @@
 }
 
 unsigned test_sub_overflow_uint_uint_uint(unsigned x, unsigned y) {
-  // CHECK-LABEL: define i32 @test_sub_overflow_uint_uint_uint
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_sub_overflow_uint_uint_uint
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
@@ -56,7 +56,7 @@
 }
 
 int test_sub_overflow_int_int_int(int x, int y) {
-  // CHECK-LABEL: define i32 @test_sub_overflow_int_int_int
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_sub_overflow_int_int_int
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
@@ -70,7 +70,7 @@
 }
 
 unsigned test_mul_overflow_uint_uint_uint(unsigned x, unsigned y) {
-  // CHECK-LABEL: define i32 @test_mul_overflow_uint_uint_uint
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_mul_overflow_uint_uint_uint
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
@@ -84,7 +84,7 @@
 }
 
 int test_mul_overflow_int_int_int(int x, int y) {
-  // CHECK-LABEL: define i32 @test_mul_overflow_int_int_int
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_mul_overflow_int_int_int
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
@@ -98,7 +98,7 @@
 }
 
 int test_add_overflow_uint_int_int(unsigned x, int y) {
-  // CHECK-LABEL: define i32 @test_add_overflow_uint_int_int
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_uint_int_int
   // CHECK: [[XE:%.+]] = zext i32 %{{.+}} to i33
   // CHECK: [[YE:%.+]] = sext i32 %{{.+}} to i33
   // CHECK: [[S:%.+]] = call { i33, i1 } @llvm.sadd.with.overflow.i33(i33 [[XE]], i33 [[YE]])
@@ -136,7 +136,7 @@
 }
 
 unsigned test_add_overflow_bool_bool_uint(_Bool x, _Bool y) {
-  // CHECK-LABEL: define i32 @test_add_overflow_bool_bool_uint
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_bool_bool_uint
   // CHECK: [[XE:%.+]] = zext i1 %{{.+}} to i32
   // CHECK: [[YE:%.+]] = zext i1 %{{.+}} to i32
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[XE]], i32 [[YE]])
@@ -165,7 +165,7 @@
 }
 
 int test_add_overflow_volatile(int x, int y) {
-  // CHECK-LABEL: define i32 @test_add_overflow_volatile
+  // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_volatile
   // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
   // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
@@ -338,3 +338,126 @@
     return LongLongErrorCode;
   return result;
 }
+
+int test_mixed_sign_mull_overflow(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow
+// CHECK:       [[IsNeg:%.*]] = icmp slt i32 [[Op1:%.*]], 0
+// CHECK-NEXT:  [[Signed:%.*]] = sub i32 0, [[Op1]]
+// CHECK-NEXT:  [[AbsSigned:%.*]] = select i1 [[IsNeg]], i32 [[Signed]], i32 [[Op1]]
+// CHECK-NEXT:  call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[AbsSigned]], i32 %{{.*}})
+// CHECK-NEXT:  [[UnsignedOFlow:%.*]] = extractvalue { i32, i1 } %{{.*}}, 1
+// CHECK-NEXT:  [[UnsignedResult:%.*]] = extractvalue { i32, i1 } %{{.*}}, 0
+// CHECK-NEXT:  [[IsNegZext:%.*]] = zext i1 [[IsNeg]] to i32
+// CHECK-NEXT:  [[MaxResult:%.*]] = add i32 2147483647, [[IsNegZext]]
+// CHECK-NEXT:  [[SignedOFlow:%.*]] = icmp ugt i32 [[UnsignedResult]], [[MaxResult]]
+// CHECK-NEXT:  [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[SignedOFlow]]
+// CHECK-NEXT:  [[NegativeResult:%.*]] = sub i32 0, [[UnsignedResult]]
+// CHECK-NEXT:  [[Result:%.*]] = select i1 [[IsNeg]], i32 [[NegativeResult]], i32 [[UnsignedResult]]
+// CHECK-NEXT:  store i32 [[Result]], i32* %{{.*}}, align 4
+// CHECK:       br i1 [[OFlow]]
+
+  int result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+int test_mixed_sign_mull_overflow_unsigned(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow_unsigned
+// CHECK:       [[IsNeg:%.*]] = icmp slt i32 [[Op1:%.*]], 0
+// CHECK-NEXT:  [[Signed:%.*]] = sub i32 0, [[Op1]]
+// CHECK-NEXT:  [[AbsSigned:%.*]] = select i1 [[IsNeg]], i32 [[Signed]], i32 [[Op1]]
+// CHECK-NEXT:  call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[AbsSigned]], i32 %{{.*}})
+// CHECK-NEXT:  [[UnsignedOFlow:%.*]] = extractvalue { i32, i1 } %{{.*}}, 1
+// CHECK-NEXT:  [[UnsignedResult:%.*]] = extractvalue { i32, i1 } %{{.*}}, 0
+// CHECK-NEXT:  [[NotNull:%.*]] = icmp ne i32 [[UnsignedResult]], 0
+// CHECK-NEXT:  [[Underflow:%.*]] = and i1 [[IsNeg]], [[NotNull]]
+// CHECK-NEXT:  [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[Underflow]]
+// CHECK-NEXT:  [[NegatedResult:%.*]] = sub i32 0, [[UnsignedResult]]
+// CHECK-NEXT:  [[Result:%.*]] = select i1 [[IsNeg]], i32 [[NegatedResult]], i32 [[UnsignedResult]]
+// CHECK-NEXT:  store i32 [[Result]], i32* %{{.*}}, align 4
+// CHECK:       br i1 [[OFlow]]
+
+  unsigned result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+int test_mixed_sign_mull_overflow_swapped(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow_swapped
+// CHECK:  call { i32, i1 } @llvm.umul.with.overflow.i32
+// CHECK:  add i32 2147483647
+  int result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 92233720368547
+  long long result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_swapped(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_swapped
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 92233720368547
+  long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_trunc_signed(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_trunc_signed
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 2147483647
+// CHECK:  trunc
+// CHECK:  store
+  int result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_trunc_unsigned(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_trunc_unsigned
+// CHECK:       call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:       [[NON_ZERO:%.*]] = icmp ne i64 [[UNSIGNED_RESULT:%.*]], 0
+// CHECK-NEXT:  [[UNDERFLOW:%.*]] = and i1 {{.*}}, [[NON_ZERO]]
+// CHECK-NEXT:  [[OVERFLOW_PRE_TRUNC:%.*]] = or i1 {{.*}}, [[UNDERFLOW]]
+// CHECK-NEXT:  [[TRUNC_OVERFLOW:%.*]] = icmp ugt i64 [[UNSIGNED_RESULT]], 4294967295
+// CHECK-NEXT:  [[OVERFLOW:%.*]] = or i1 [[OVERFLOW_PRE_TRUNC]], [[TRUNC_OVERFLOW]]
+// CHECK-NEXT:  [[NEGATED:%.*]] = sub i64 0, [[UNSIGNED_RESULT]]
+// CHECK-NEXT:  [[RESULT:%.*]] = select i1 {{.*}}, i64 [[NEGATED]], i64 [[UNSIGNED_RESULT]]
+// CHECK-NEXT:  trunc i64 [[RESULT]] to i32
+// CHECK-NEXT:  store
+  unsigned result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mul_overflow_extend_signed(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mul_overflow_extend_signed
+// CHECK:  call { i64, i1 } @llvm.smul.with.overflow.i64
+  long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mul_overflow_extend_unsigned(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mul_overflow_extend_unsigned
+// CHECK:  call { i65, i1 } @llvm.smul.with.overflow.i65
+  unsigned long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
diff --git a/test/CodeGen/builtins-wasm.c b/test/CodeGen/builtins-wasm.c
index e0f72d2..39252e7 100644
--- a/test/CodeGen/builtins-wasm.c
+++ b/test/CodeGen/builtins-wasm.c
@@ -3,25 +3,37 @@
 // RUN: %clang_cc1 -triple wasm64-unknown-unknown -O3 -emit-llvm -o - %s \
 // RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
 
-__SIZE_TYPE__ f1(void) {
+__SIZE_TYPE__ f0(void) {
+  return __builtin_wasm_mem_size(0);
+// WEBASSEMBLY32: call {{i.*}} @llvm.wasm.mem.size.i32(i32 0)
+// WEBASSEMBLY64: call {{i.*}} @llvm.wasm.mem.size.i64(i32 0)
+}
+
+__SIZE_TYPE__ f1(__SIZE_TYPE__ delta) {
+  return __builtin_wasm_mem_grow(0, delta);
+// WEBASSEMBLY32: call i32 @llvm.wasm.mem.grow.i32(i32 0, i32 %{{.*}})
+// WEBASSEMBLY64: call i64 @llvm.wasm.mem.grow.i64(i32 0, i64 %{{.*}})
+}
+
+__SIZE_TYPE__ f2(void) {
   return __builtin_wasm_current_memory();
 // WEBASSEMBLY32: call {{i.*}} @llvm.wasm.current.memory.i32()
 // WEBASSEMBLY64: call {{i.*}} @llvm.wasm.current.memory.i64()
 }
 
-__SIZE_TYPE__ f2(__SIZE_TYPE__ delta) {
+__SIZE_TYPE__ f3(__SIZE_TYPE__ delta) {
   return __builtin_wasm_grow_memory(delta);
 // WEBASSEMBLY32: call i32 @llvm.wasm.grow.memory.i32(i32 %{{.*}})
 // WEBASSEMBLY64: call i64 @llvm.wasm.grow.memory.i64(i64 %{{.*}})
 }
 
-void f3(unsigned int tag, void *obj) {
+void f4(unsigned int tag, void *obj) {
   return __builtin_wasm_throw(tag, obj);
 // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 %{{.*}}, i8* %{{.*}})
 // WEBASSEMBLY64: call void @llvm.wasm.throw(i32 %{{.*}}, i8* %{{.*}})
 }
 
-void f4() {
+void f5(void) {
   return __builtin_wasm_rethrow();
 // WEBASSEMBLY32: call void @llvm.wasm.rethrow()
 // WEBASSEMBLY64: call void @llvm.wasm.rethrow()
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index 5df0e01..fc3cc44 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -emit-llvm -o %t %s
-// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +ibt -target-feature +shstk -emit-llvm -o %t %s
+// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +ibt -target-feature +shstk -target-feature +clzero -fsyntax-only -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -257,6 +257,19 @@
   tmp_V8c = __builtin_ia32_packuswb(tmp_V4s, tmp_V4s);
   tmp_i = __builtin_ia32_vec_ext_v2si(tmp_V2i, 0);
 
+  __builtin_ia32_incsspd(tmp_Ui);
+  __builtin_ia32_incsspq(tmp_ULLi);
+  tmp_Ui = __builtin_ia32_rdsspd(tmp_Ui);
+  tmp_ULLi = __builtin_ia32_rdsspq(tmp_ULLi);
+  __builtin_ia32_saveprevssp();
+  __builtin_ia32_rstorssp(tmp_vp);
+  __builtin_ia32_wrssd(tmp_Ui, tmp_vp);
+  __builtin_ia32_wrssq(tmp_ULLi, tmp_vp);
+  __builtin_ia32_wrussd(tmp_Ui, tmp_vp);
+  __builtin_ia32_wrussq(tmp_ULLi, tmp_vp);
+  __builtin_ia32_setssbsy();
+  __builtin_ia32_clrssbsy(tmp_vp);
+
   (void) __builtin_ia32_ldmxcsr(tmp_Ui);
   (void) _mm_setcsr(tmp_Ui);
   tmp_Ui = __builtin_ia32_stmxcsr();
diff --git a/test/CodeGen/builtins.c b/test/CodeGen/builtins.c
index 3584585..4f84db0 100644
--- a/test/CodeGen/builtins.c
+++ b/test/CodeGen/builtins.c
@@ -176,6 +176,19 @@
 }
 // CHECK: }
 
+// CHECK-LABEL: define void @test_conditional_bzero
+void test_conditional_bzero() {
+  char dst[20];
+  int _sz = 20, len = 20;
+  return (_sz
+          ? ((_sz >= len)
+              ? __builtin_bzero(dst, len)
+              : foo())
+          : __builtin_bzero(dst, len));
+  // CHECK: call void @llvm.memset
+  // CHECK: call void @llvm.memset
+  // CHECK-NOT: phi
+}
 
 // CHECK-LABEL: define void @test_float_builtins
 void test_float_builtins(float F, double D, long double LD) {
@@ -317,6 +330,15 @@
   resld = __builtin_floorl(LD);
   // CHECK: call x86_fp80 @llvm.floor.f80
 
+  resf = __builtin_sqrtf(F);
+  // CHECK: call float @llvm.sqrt.f32(
+
+  resd = __builtin_sqrt(D);
+  // CHECK: call double @llvm.sqrt.f64(
+
+  resld = __builtin_sqrtl(LD);
+  // CHECK: call x86_fp80 @llvm.sqrt.f80
+
   resf = __builtin_truncf(F);
   // CHECK: call float @llvm.trunc.f32
 
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 4e14d98..aa428b8 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -4,7 +4,7 @@
 // Should be 3 hello strings, two global (of different sizes), the rest are
 // shared.
 
-// CHECK: @align = global i8 [[ALIGN:[0-9]+]]
+// CHECK: @align = {{(dso_local )?}}global i8 [[ALIGN:[0-9]+]]
 // ITANIUM: @.str = private unnamed_addr constant [6 x i8] c"hello\00"
 // MSABI: @"\01??_C@_05CJBACGMB@hello?$AA@" = linkonce_odr unnamed_addr constant [6 x i8] c"hello\00", comdat, align 1
 // ITANIUM: @f1.x = internal global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0)
@@ -13,7 +13,7 @@
 // CHECK: @f3.x = internal global [8 x i8] c"hello\00\00\00", align [[ALIGN]]
 // ITANIUM: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0) }
 // MSABI: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0) }
-// CHECK: @x = global [3 x i8] c"ola", align [[ALIGN]]
+// CHECK: @x = {{(dso_local )?}}global [3 x i8] c"ola", align [[ALIGN]]
 
 // XFAIL: hexagon
 // Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index fb731df..0e24ed2 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -132,7 +132,7 @@
 // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T1]], i8* align 2 [[T2]], i32 8, i1 false)
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[TMP0]] to i64*
 // CHECK-NEXT: [[T4:%.*]] = load i64, i64* [[T3]], align 8
 // CHECK-NEXT: [[T5:%.*]] = bitcast [[S]]* [[T0]] to i64*
@@ -154,7 +154,7 @@
 
 // CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
@@ -165,7 +165,7 @@
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T0]], i8 0, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
@@ -183,16 +183,16 @@
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false)
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T1]], i8 0, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T2]], i8* align 2 [[T3]], i32 6, i1 false)
 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[TMP1]] to i64*
 // CHECK-NEXT: [[T5:%.*]] = load i64, i64* [[T4]], align 8
 // CHECK-NEXT: [[T6:%.*]] = bitcast [[APS]]* [[T0]] to i64*
@@ -215,7 +215,7 @@
   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false)
 
   return __c11_atomic_load(addr, 5);
 }
@@ -232,11 +232,11 @@
   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
   // CHECK:   store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8
@@ -257,11 +257,11 @@
   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
   // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
@@ -270,7 +270,7 @@
   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false)
   return __c11_atomic_exchange(addr, *val, 5);
 }
 
@@ -291,15 +291,15 @@
   // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[NEW8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8*
   // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_DESIRED8]], i8* align 2 [[DESIRED8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64*
   // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_NEW8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
   // CHECK:   [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8
   // CHECK:   [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index ccb6421..3774ded 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -282,7 +282,7 @@
 // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T1]], i8* align 2 [[T2]], i32 8, i1 false)
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
@@ -307,7 +307,7 @@
 
 // CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
@@ -318,7 +318,7 @@
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T0]], i8 0, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8
@@ -335,16 +335,16 @@
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false)
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T1]], i8 0, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T2]], i8* align 2 [[T3]], i32 6, i1 false)
 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
@@ -357,7 +357,7 @@
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP3]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, %struct.PS* [[TMP2]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = load i16, i16* [[T0]], align 2
 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
@@ -381,7 +381,7 @@
   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false)
 
   return __c11_atomic_load(addr, 5);
 }
@@ -398,11 +398,11 @@
   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
@@ -423,11 +423,11 @@
   // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
   // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
@@ -437,7 +437,7 @@
   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
   // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false)
   return __c11_atomic_exchange(addr, *val, 5);
 }
 
@@ -457,15 +457,15 @@
   // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
   // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[NEW8]], i32 6, i1 false)
   // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
   // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8*
   // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_DESIRED8]], i8* align 2 [[DESIRED8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64*
   // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
   // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_NEW8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false)
   // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
   // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
   // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8*
diff --git a/test/CodeGen/cetintrin.c b/test/CodeGen/cetintrin.c
new file mode 100644
index 0000000..4d2f87a
--- /dev/null
+++ b/test/CodeGen/cetintrin.c
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-apple-darwin -target-feature +shstk -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=I386 --check-prefix=CHECK
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +shstk  -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=X86_64  --check-prefix=CHECK
+
+#include <immintrin.h>
+
+void test_incsspd(int a) {
+  // CHECK-LABEL: @test_incsspd
+  // CHECK:       call void @llvm.x86.incsspd(i32 %{{[0-9]+}})
+  _incsspd(a);
+}
+
+#ifdef __x86_64__
+void test_incsspq(int a) {
+  // X86_64-LABEL: @test_incsspq
+  // X86_64:       call void @llvm.x86.incsspq(i64 %{{[a-z0-9.]+}})
+  _incsspq(a);
+}
+
+void test_inc_ssp(unsigned int a) {
+  // X86_64-LABEL: @test_inc_ssp
+  // X86_64:       call void @llvm.x86.incsspq(i64 %{{[a-z0-9.]+}})
+  _inc_ssp(a);
+}
+#else
+
+void test_inc_ssp(unsigned int a) {
+  // I386-LABEL: @test_inc_ssp
+  // I386:       call void @llvm.x86.incsspd(i32 %{{[0-9]+}})
+  _inc_ssp(a);
+}
+
+#endif
+
+unsigned int test_rdsspd(unsigned int a) {
+  // CHECK-LABEL: @test_rdsspd
+  // CHECK:       call i32 @llvm.x86.rdsspd(i32 %{{[a-z0-9.]+}})
+  return _rdsspd(a);
+}
+
+#ifdef __x86_64__
+unsigned long long test_rdsspq(unsigned long long a) {
+  // X86_64-LABEL: @test_rdsspq
+  // X86_64:       call i64 @llvm.x86.rdsspq(i64 %{{[a-z0-9.]+}})
+  return _rdsspq(a);
+}
+
+unsigned long long test_get_ssp(void) {
+  // X86_64-LABEL: @test_get_ssp
+  // X86_64:       call i64 @llvm.x86.rdsspq(i64 0)
+  return _get_ssp();
+}
+
+#else
+
+unsigned int test_get_ssp(void) {
+  // I386-LABEL: @test_get_ssp
+  // I386:       call i32 @llvm.x86.rdsspd(i32 0)
+  return _get_ssp();
+}
+
+#endif
+
+void  test_saveprevssp() {
+  // CHECK-LABEL: @test_saveprevssp
+  // CHECK:       call void @llvm.x86.saveprevssp()
+  _saveprevssp();
+}
+
+void test_rstorssp(void * __p) {
+  // CHECK-LABEL: @test_rstorssp
+  // CHECK:       call void @llvm.x86.rstorssp(i8* %{{[a-z0-9.]+}})
+  _rstorssp(__p);
+}
+
+void test_wrssd(unsigned int __a, void * __p) {
+  // CHECK-LABEL: @test_wrssd
+  // CHECK:       call void @llvm.x86.wrssd(i32 %{{[a-z0-9.]+}}, i8* %{{[a-z0-9.]+}})
+  _wrssd(__a, __p);
+}
+
+#ifdef __x86_64__
+void test_wrssq(unsigned long long __a, void * __p) {
+  // X86_64-LABEL: @test_wrssq
+  // X86_64:       call void @llvm.x86.wrssq(i64 %{{[a-z0-9.]+}}, i8* %{{[a-z0-9.]+}})
+  _wrssq(__a, __p);
+}
+#endif
+
+void test_wrussd(unsigned int __a, void * __p) {
+  // CHECK-LABEL: @test_wrussd
+  // CHECK:       call void @llvm.x86.wrussd(i32 %{{[a-z0-9.]+}}, i8* %{{[a-z0-9.]+}})
+  _wrussd(__a, __p);
+}
+
+#ifdef __x86_64__
+void test_wrussq(unsigned long long __a, void * __p) {
+  // X86_64-LABEL: @test_wrussq
+  // X86_64:       call void @llvm.x86.wrussq(i64 %{{[a-z0-9.]+}}, i8* %{{[a-z0-9.]+}})
+  _wrussq(__a, __p);
+}
+#endif
+
+void test_setssbsy() {
+  // CHECK-LABEL: @test_setssbsy
+  // CHECK:       call void @llvm.x86.setssbsy()
+  _setssbsy();
+}
+
+void test_clrssbsy(void * __p) {
+  // CHECK-LABEL: @test_clrssbsy
+  // CHECK:       call void @llvm.x86.clrssbsy(i8* %{{[a-z0-9.]+}})
+  _clrssbsy(__p);
+}
diff --git a/test/CodeGen/cfi-icall-cross-dso.c b/test/CodeGen/cfi-icall-cross-dso.c
index 636a9e4..0fdf4a2 100644
--- a/test/CodeGen/cfi-icall-cross-dso.c
+++ b/test/CodeGen/cfi-icall-cross-dso.c
@@ -46,10 +46,10 @@
 // Check that we emit both string and hash based type entries for static void g(),
 // and don't emit them for the declaration of h().
 
-// CHECK: define internal void @g({{.*}} !type [[TVOID:![0-9]+]] !type [[TVOID_ID:![0-9]+]]
+// CHECK: define internal void @g({{.*}} !type [[TVOID:![0-9]+]] !type [[TVOID_GENERALIZED:![0-9]+]] !type [[TVOID_ID:![0-9]+]]
 static void g(void) {}
 
-// CHECK: declare void @h({{[^!]*$}}
+// CHECK: declare {{(dso_local )?}}void @h({{[^!]*$}}
 void h(void);
 
 typedef void (*Fn)(void);
@@ -60,9 +60,9 @@
   return &h;
 }
 
-// CHECK: define void @bar({{.*}} !type [[TNOPROTO:![0-9]+]] !type [[TNOPROTO_ID:![0-9]+]]
+// CHECK: define {{(dso_local )?}}void @bar({{.*}} !type [[TNOPROTO:![0-9]+]] !type [[TNOPROTO_GENERALIZED:![0-9]+]] !type [[TNOPROTO_ID:![0-9]+]]
 // ITANIUM: define available_externally void @foo({{[^!]*$}}
-// MS: define linkonce_odr void @foo({{.*}} !type [[TNOPROTO]] !type [[TNOPROTO_ID]]
+// MS: define linkonce_odr dso_local void @foo({{.*}} !type [[TNOPROTO]] !type [[TNOPROTO_GENERALIZED:![0-9]+]] !type [[TNOPROTO_ID]]
 inline void foo() {}
 void bar() { foo(); }
 
@@ -71,11 +71,15 @@
 // Check that the type entries are correct.
 
 // ITANIUM: [[TVOID]] = !{i64 0, !"_ZTSFvvE"}
+// ITANIUM: [[TVOID_GENERALIZED]] = !{i64 0, !"_ZTSFvvE.generalized"}
 // ITANIUM: [[TVOID_ID]] = !{i64 0, i64 9080559750644022485}
 // ITANIUM: [[TNOPROTO]] = !{i64 0, !"_ZTSFvE"}
+// ITANIUM: [[TNOPROTO_GENERALIZED]] = !{i64 0, !"_ZTSFvE.generalized"}
 // ITANIUM: [[TNOPROTO_ID]] = !{i64 0, i64 6588678392271548388}
 
 // MS: [[TVOID]] = !{i64 0, !"?6AXXZ"}
+// MS: [[TVOID_GENERALIZED]] = !{i64 0, !"?6AXXZ.generalized"}
 // MS: [[TVOID_ID]] = !{i64 0, i64 5113650790573562461}
 // MS: [[TNOPROTO]] = !{i64 0, !"?6AX@Z"}
+// MS: [[TNOPROTO_GENERALIZED]] = !{i64 0, !"?6AX@Z.generalized"}
 // MS: [[TNOPROTO_ID]] = !{i64 0, i64 4195979634929632483}
diff --git a/test/CodeGen/cfi-icall-generalize.c b/test/CodeGen/cfi-icall-generalize.c
new file mode 100644
index 0000000..c7c7b30
--- /dev/null
+++ b/test/CodeGen/cfi-icall-generalize.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=UNGENERALIZED %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -fsanitize-cfi-icall-generalize-pointers -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GENERALIZED %s
+
+// Test that const char* is generalized to const void* and that const char** is
+// generalized to void*
+
+// CHECK: define i32** @f({{.*}} !type [[TYPE:![0-9]+]] !type [[TYPE_GENERALIZED:![0-9]+]]
+int** f(const char *a, const char **b) {
+  return (int**)0;
+}
+
+void g(int** (*fp)(const char *, const char **)) {
+  // UNGENERALIZED: call i1 @llvm.type.test(i8* {{.*}}, metadata !"_ZTSFPPiPKcPS2_E")
+  // GENERALIZED: call i1 @llvm.type.test(i8* {{.*}}, metadata !"_ZTSFPvPKvS_E.generalized")
+  fp(0, 0);
+}
+
+// CHECK: [[TYPE]] = !{i64 0, !"_ZTSFPPiPKcPS2_E"}
+// CHECK: [[TYPE_GENERALIZED]] = !{i64 0, !"_ZTSFPvPKvS_E.generalized"}
diff --git a/test/CodeGen/cfi-icall.c b/test/CodeGen/cfi-icall.c
index ed34f4f..83fc8f8 100644
--- a/test/CodeGen/cfi-icall.c
+++ b/test/CodeGen/cfi-icall.c
@@ -3,22 +3,26 @@
 
 // Tests that we assign appropriate identifiers to unprototyped functions.
 
-// CHECK: define void @f({{.*}} !type [[TVOID:![0-9]+]]
+// CHECK: define {{(dso_local )?}}void @f({{.*}} !type [[TVOID:![0-9]+]] !type [[TVOID_GENERALIZED:![0-9]+]]
 void f() {
 }
 
 void xf();
 
-// CHECK: define void @g({{.*}} !type [[TINT:![0-9]+]]
+// CHECK: define {{(dso_local )?}}void @g({{.*}} !type [[TINT:![0-9]+]] !type [[TINT_GENERALIZED:![0-9]+]]
 void g(int b) {
   void (*fp)() = b ? f : xf;
   // ITANIUM: call i1 @llvm.type.test(i8* {{.*}}, metadata !"_ZTSFvE")
   fp();
 }
 
-// CHECK: declare !type [[TVOID:![0-9]+]] void @xf({{.*}}
+// CHECK: declare !type [[TVOID]] !type [[TVOID_GENERALIZED]] {{(dso_local )?}}void @xf({{.*}}
 
 // ITANIUM-DAG: [[TVOID]] = !{i64 0, !"_ZTSFvE"}
+// ITANIUM-DAG: [[TVOID_GENERALIZED]] = !{i64 0, !"_ZTSFvE.generalized"}
 // ITANIUM-DAG: [[TINT]] = !{i64 0, !"_ZTSFviE"}
+// ITANIUM-DAG: [[TINT_GENERALIZED]] = !{i64 0, !"_ZTSFviE.generalized"}
 // MS-DAG: [[TVOID]] = !{i64 0, !"?6AX@Z"}
+// MS-DAG: [[TVOID_GENERALIZED]] = !{i64 0, !"?6AX@Z.generalized"}
 // MS-DAG: [[TINT]] = !{i64 0, !"?6AXH@Z"}
+// MS-DAG: [[TINT_GENERALIZED]] = !{i64 0, !"?6AXH@Z.generalized"}
diff --git a/test/CodeGen/cfstring-windows.c b/test/CodeGen/cfstring-windows.c
index e54c860..554c4d9 100644
--- a/test/CodeGen/cfstring-windows.c
+++ b/test/CodeGen/cfstring-windows.c
@@ -32,7 +32,7 @@
 const CFStringRef string = (CFStringRef)__builtin___CFStringMakeConstantString("string");
 
 // CHECK-CF-IN-CF-DECL: @__CFConstantStringClassReference = external dllexport global [0 x i32]
-// CHECK-CF-IN-CF-DEFN: @__CFConstantStringClassReference = common dllexport global [32 x i32]
+// CHECK-CF-IN-CF-DEFN: @__CFConstantStringClassReference = common dso_local dllexport global [32 x i32]
 // CHECK-CF: @__CFConstantStringClassReference = external dllimport global [0 x i32]
 // CHECK-CF-EXTERN: @__CFConstantStringClassReference = external dllimport global [0 x i32]
 // CHECK-CF-EXTERN-DLLIMPORT: @__CFConstantStringClassReference = external dllimport global [0 x i32]
diff --git a/test/CodeGen/code-coverage.c b/test/CodeGen/code-coverage.c
index 80307f4..4fbfd5b 100644
--- a/test/CodeGen/code-coverage.c
+++ b/test/CodeGen/code-coverage.c
@@ -2,7 +2,16 @@
 // RUN: %clang_cc1 -emit-llvm -disable-red-zone -femit-coverage-data -coverage-no-function-names-in-data %s -o - | FileCheck %s --check-prefix WITHOUTNAMES
 // RUN: %clang_cc1 -emit-llvm -disable-red-zone -femit-coverage-data -coverage-notes-file=aaa.gcno -coverage-data-file=bbb.gcda -dwarf-column-info -debug-info-kind=limited -dwarf-version=4 %s -o - | FileCheck %s --check-prefix GCOV_FILE_INFO
 
-// <rdar://problem/12843084>
+// RUN: %clang_cc1 -emit-llvm-bc -o /dev/null -fexperimental-new-pass-manager -fdebug-pass-manager -femit-coverage-data %s 2>&1 | FileCheck --check-prefix=NEWPM %s
+// RUN: %clang_cc1 -emit-llvm-bc -o /dev/null -fexperimental-new-pass-manager -fdebug-pass-manager -femit-coverage-data -O3 %s 2>&1 | FileCheck --check-prefix=NEWPM-O3 %s
+
+// NEWPM-NOT: Running pass
+// NEWPM: Running pass: GCOVProfilerPass
+
+// NEWPM-O3-NOT: Running pass
+// NEWPM-O3: Running pass: ForceFunctionAttrsPass
+// NEWPM-O3: Running pass: GCOVProfilerPass
+
 
 int test1(int a) {
   switch (a % 2) {
diff --git a/test/CodeGen/complex-builtins.c b/test/CodeGen/complex-builtins.c
new file mode 100644
index 0000000..dbf3b59
--- /dev/null
+++ b/test/CodeGen/complex-builtins.c
@@ -0,0 +1,206 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm              %s | FileCheck %s -check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s -check-prefix=HAS_ERRNO
+
+// Test attributes and codegen of complex builtins.
+
+void foo(float f) {
+  __builtin_cabs(f);       __builtin_cabsf(f);      __builtin_cabsl(f);
+
+// NO__ERRNO: declare double @cabs(double, double) [[READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @cabsf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare double @cabs(double, double) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @cabsf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cacos(f);      __builtin_cacosf(f);     __builtin_cacosl(f);
+
+// NO__ERRNO: declare { double, double } @cacos(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cacosf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cacos(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cacosh(f);     __builtin_cacoshf(f);    __builtin_cacoshl(f);
+
+// NO__ERRNO: declare { double, double } @cacosh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cacosh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_carg(f);       __builtin_cargf(f);      __builtin_cargl(f);
+
+// NO__ERRNO: declare double @carg(double, double) [[READNONE]]
+// NO__ERRNO: declare float @cargf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @carg(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @cargf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_casin(f);      __builtin_casinf(f);     __builtin_casinl(f);
+
+// NO__ERRNO: declare { double, double } @casin(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @casinf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @casin(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @casinf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_casinh(f);     __builtin_casinhf(f);    __builtin_casinhl(f); 
+
+// NO__ERRNO: declare { double, double } @casinh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @casinhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @casinh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_catan(f);      __builtin_catanf(f);     __builtin_catanl(f); 
+
+// NO__ERRNO: declare { double, double } @catan(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @catanf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @catan(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @catanf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_catanh(f);     __builtin_catanhf(f);    __builtin_catanhl(f);
+
+// NO__ERRNO: declare { double, double } @catanh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @catanhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @catanh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_ccos(f);       __builtin_ccosf(f);      __builtin_ccosl(f);
+
+// NO__ERRNO: declare { double, double } @ccos(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ccosf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ccos(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_ccosh(f);      __builtin_ccoshf(f);     __builtin_ccoshl(f);
+
+// NO__ERRNO: declare { double, double } @ccosh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ccosh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cexp(f);       __builtin_cexpf(f);      __builtin_cexpl(f);
+
+// NO__ERRNO: declare { double, double } @cexp(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cexpf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cexp(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cimag(f);      __builtin_cimagf(f);     __builtin_cimagl(f);
+
+// NO__ERRNO-NOT: .cimag
+// NO__ERRNO-NOT: @cimag
+// HAS_ERRNO-NOT: .cimag
+// HAS_ERRNO-NOT: @cimag
+
+  __builtin_conj(f);       __builtin_conjf(f);      __builtin_conjl(f);
+
+// NO__ERRNO-NOT: .conj
+// NO__ERRNO-NOT: @conj
+// HAS_ERRNO-NOT: .conj
+// HAS_ERRNO-NOT: @conj
+
+  __builtin_clog(f);       __builtin_clogf(f);      __builtin_clogl(f);
+
+// NO__ERRNO: declare { double, double } @clog(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @clogf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @clog(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @clogf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cproj(f);      __builtin_cprojf(f);     __builtin_cprojl(f); 
+
+// NO__ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE:#[0-9]+]]
+// HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_cpow(f,f);       __builtin_cpowf(f,f);      __builtin_cpowl(f,f);
+
+// NO__ERRNO: declare { double, double } @cpow(double, double, double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cpow(double, double, double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_creal(f);      __builtin_crealf(f);     __builtin_creall(f);
+
+// NO__ERRNO-NOT: .creal
+// NO__ERRNO-NOT: @creal
+// HAS_ERRNO-NOT: .creal
+// HAS_ERRNO-NOT: @creal
+
+  __builtin_csin(f);       __builtin_csinf(f);      __builtin_csinl(f);
+
+// NO__ERRNO: declare { double, double } @csin(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csinf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csin(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csinf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_csinh(f);      __builtin_csinhf(f);     __builtin_csinhl(f);
+
+// NO__ERRNO: declare { double, double } @csinh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csinhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csinh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_csqrt(f);      __builtin_csqrtf(f);     __builtin_csqrtl(f);  
+
+// NO__ERRNO: declare { double, double } @csqrt(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csqrt(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_ctan(f);       __builtin_ctanf(f);      __builtin_ctanl(f);
+
+// NO__ERRNO: declare { double, double } @ctan(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ctanf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ctan(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  __builtin_ctanh(f);      __builtin_ctanhf(f);     __builtin_ctanhl(f); 
+
+// NO__ERRNO: declare { double, double } @ctanh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ctanh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+};
+
+
+// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+
+// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+
diff --git a/test/CodeGen/complex-libcalls.c b/test/CodeGen/complex-libcalls.c
new file mode 100644
index 0000000..db56628
--- /dev/null
+++ b/test/CodeGen/complex-libcalls.c
@@ -0,0 +1,208 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm              %s | FileCheck %s -check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s -check-prefix=HAS_ERRNO
+
+// Test attributes and builtin codegen of complex library calls. 
+
+void foo(float f) {
+  cabs(f);       cabsf(f);      cabsl(f);
+
+// NO__ERRNO: declare double @cabs(double, double) [[READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @cabsf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare double @cabs(double, double) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @cabsf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cacos(f);      cacosf(f);     cacosl(f);
+
+// NO__ERRNO: declare { double, double } @cacos(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cacosf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cacos(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cacosh(f);     cacoshf(f);    cacoshl(f);
+
+// NO__ERRNO: declare { double, double } @cacosh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cacosh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  carg(f);       cargf(f);      cargl(f);
+
+// NO__ERRNO: declare double @carg(double, double) [[READNONE]]
+// NO__ERRNO: declare float @cargf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @carg(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @cargf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  casin(f);      casinf(f);     casinl(f);
+
+// NO__ERRNO: declare { double, double } @casin(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @casinf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @casin(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @casinf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  casinh(f);     casinhf(f);    casinhl(f); 
+
+// NO__ERRNO: declare { double, double } @casinh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @casinhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @casinh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  catan(f);      catanf(f);     catanl(f); 
+
+// NO__ERRNO: declare { double, double } @catan(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @catanf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @catan(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @catanf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  catanh(f);     catanhf(f);    catanhl(f);
+
+// NO__ERRNO: declare { double, double } @catanh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @catanhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @catanh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  ccos(f);       ccosf(f);      ccosl(f);
+
+// NO__ERRNO: declare { double, double } @ccos(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ccosf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ccos(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  ccosh(f);      ccoshf(f);     ccoshl(f);
+
+// NO__ERRNO: declare { double, double } @ccosh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ccosh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cexp(f);       cexpf(f);      cexpl(f);
+
+// NO__ERRNO: declare { double, double } @cexp(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cexpf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cexp(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cimag(f);      cimagf(f);     cimagl(f);
+
+// NO__ERRNO-NOT: .cimag
+// NO__ERRNO-NOT: @cimag
+// HAS_ERRNO-NOT: .cimag
+// HAS_ERRNO-NOT: @cimag
+
+  conj(f);       conjf(f);      conjl(f);
+
+// NO__ERRNO: declare { double, double } @conj(double, double) [[READNONE:#[0-9]+]]
+// NO__ERRNO: declare <2 x float> @conjf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @conj(double, double) [[READNONE:#[0-9]+]]
+// HAS_ERRNO: declare <2 x float> @conjf(<2 x float>) [[READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  clog(f);       clogf(f);      clogl(f);
+
+// NO__ERRNO: declare { double, double } @clog(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @clogf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @clog(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @clogf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cproj(f);      cprojf(f);     cprojl(f); 
+
+// NO__ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
+// HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  cpow(f,f);       cpowf(f,f);      cpowl(f,f);
+
+// NO__ERRNO: declare { double, double } @cpow(double, double, double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @cpow(double, double, double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  creal(f);      crealf(f);     creall(f);
+
+// NO__ERRNO-NOT: .creal
+// NO__ERRNO-NOT: @creal
+// HAS_ERRNO-NOT: .creal
+// HAS_ERRNO-NOT: @creal
+
+  csin(f);       csinf(f);      csinl(f);
+
+// NO__ERRNO: declare { double, double } @csin(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csinf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csin(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csinf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  csinh(f);      csinhf(f);     csinhl(f);
+
+// NO__ERRNO: declare { double, double } @csinh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csinhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csinh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  csqrt(f);      csqrtf(f);     csqrtl(f);  
+
+// NO__ERRNO: declare { double, double } @csqrt(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @csqrt(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  ctan(f);       ctanf(f);      ctanl(f);
+
+// NO__ERRNO: declare { double, double } @ctan(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ctanf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ctan(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+
+  ctanh(f);      ctanhf(f);     ctanhl(f); 
+
+// NO__ERRNO: declare { double, double } @ctanh(double, double) [[READNONE]]
+// NO__ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { double, double } @ctanh(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+};
+
+
+// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+
+// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+
diff --git a/test/CodeGen/complex-math.c b/test/CodeGen/complex-math.c
index 8792ca1..5fd25d0 100644
--- a/test/CodeGen/complex-math.c
+++ b/test/CodeGen/complex-math.c
@@ -5,6 +5,7 @@
 // RUN %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -128,6 +129,29 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_float_rc(float %a, [2 x float] %b.coerce)
+  // A = a
+  // B = 0
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x float] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x float] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast float [[C]], %a
+  // BD = 0
+  // ACpBD = AC
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast float [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast float [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast float [[CC]], [[DD]]
+  //
+  // BC = 0
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast float [[D]], %a
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast float -0.000000e+00, [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast float [[AC]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast float [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 float _Complex div_float_cc(float _Complex a, float _Complex b) {
@@ -135,6 +159,29 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] %a.coerce, [2 x float] %b.coerce)
+  // AARCH64-FASTMATH: [[A:%.*]] = extractvalue [2 x float] %a.coerce, 0
+  // AARCH64-FASTMATH: [[B:%.*]] = extractvalue [2 x float] %a.coerce, 1
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x float] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x float] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast float [[C]], [[A]]
+  // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast float [[D]], [[B]]
+  // AARCH64-FASTMATH: [[ACpBD:%.*]] = fadd fast float [[AC]], [[BD]]
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast float [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast float [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast float [[CC]], [[DD]]
+  //
+  // AARCH64-FASTMATH: [[BC:%.*]] = fmul fast float [[C]], [[B]]
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast float [[D]], [[A]]
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast float [[BC]], [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast float [[ACpBD]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast float [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 
@@ -260,6 +307,29 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_double_rc(double %a, [2 x double] %b.coerce)
+  // A = a
+  // B = 0
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x double] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x double] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast double [[C]], %a
+  // BD = 0
+  // ACpBD = AC
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast double [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast double [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast double [[CC]], [[DD]]
+  //
+  // BC = 0
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast double [[D]], %a
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast double -0.000000e+00, [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast double [[AC]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast double [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 double _Complex div_double_cc(double _Complex a, double _Complex b) {
@@ -267,6 +337,29 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] %a.coerce, [2 x double] %b.coerce)
+  // AARCH64-FASTMATH: [[A:%.*]] = extractvalue [2 x double] %a.coerce, 0
+  // AARCH64-FASTMATH: [[B:%.*]] = extractvalue [2 x double] %a.coerce, 1
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x double] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x double] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast double [[C]], [[A]]
+  // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast double [[D]], [[B]]
+  // AARCH64-FASTMATH: [[ACpBD:%.*]] = fadd fast double [[AC]], [[BD]]
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast double [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast double [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast double [[CC]], [[DD]]
+  //
+  // AARCH64-FASTMATH: [[BC:%.*]] = fmul fast double [[C]], [[B]]
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast double [[D]], [[A]]
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast double [[BC]], [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast double [[ACpBD]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast double [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 
@@ -410,6 +503,29 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_long_double_rc(fp128 %a, [2 x fp128] %b.coerce)
+  // A = a
+  // B = 0
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x fp128] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x fp128] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast fp128 [[C]], %a
+  // BD = 0
+  // ACpBD = AC
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast fp128 [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast fp128 [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast fp128 [[CC]], [[DD]]
+  //
+  // BC = 0
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast fp128 [[D]], %a
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast fp128 0xL00000000000000008000000000000000, [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast fp128 [[AC]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast fp128 [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 long double _Complex div_long_double_cc(long double _Complex a, long double _Complex b) {
@@ -421,6 +537,29 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_long_double_cc([2 x fp128] %a.coerce, [2 x fp128] %b.coerce)
+  // AARCH64-FASTMATH: [[A:%.*]] = extractvalue [2 x fp128] %a.coerce, 0
+  // AARCH64-FASTMATH: [[B:%.*]] = extractvalue [2 x fp128] %a.coerce, 1
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x fp128] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x fp128] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast fp128 [[C]], [[A]]
+  // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast fp128 [[D]], [[B]]
+  // AARCH64-FASTMATH: [[ACpBD:%.*]] = fadd fast fp128 [[AC]], [[BD]]
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast fp128 [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast fp128 [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast fp128 [[CC]], [[DD]]
+  //
+  // AARCH64-FASTMATH: [[BC:%.*]] = fmul fast fp128 [[C]], [[B]]
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast fp128 [[D]], [[A]]
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast fp128 [[BC]], [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast fp128 [[ACpBD]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast fp128 [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 
diff --git a/test/CodeGen/compound-literal.c b/test/CodeGen/compound-literal.c
index 6507341..38675a7 100644
--- a/test/CodeGen/compound-literal.c
+++ b/test/CodeGen/compound-literal.c
@@ -33,7 +33,7 @@
   // CHECK-NEXT: store i32 [[TMP]], i32* [[CY]]
   // CHECK-NEXT: [[SI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[S]] to i8*
   // CHECK-NEXT: [[COMPOUNDLITI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[COMPOUNDLIT]] to i8*
-  // CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* [[SI8]], i8* [[COMPOUNDLITI8]]
+  // CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[SI8]], i8* align {{[0-9]+}} [[COMPOUNDLITI8]]
   s = (S){s.y,s.x};
   // CHECK-NEXT: ret void
 }
@@ -67,7 +67,7 @@
 
   // CHECK-NEXT: [[T0:%.*]] = bitcast i48* [[COERCE_TEMP]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[G]]* [[RESULT]] to i8*
-  // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 6
+  // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} [[T0]], i8* align {{[0-9]+}} [[T1]], i64 6
   // CHECK-NEXT: [[T0:%.*]] = load i48, i48* [[COERCE_TEMP]]
   // CHECK-NEXT: ret i48 [[T0]]
 }
diff --git a/test/CodeGen/darwin-ppc-varargs.c b/test/CodeGen/darwin-ppc-varargs.c
new file mode 100644
index 0000000..c2a0d19
--- /dev/null
+++ b/test/CodeGen/darwin-ppc-varargs.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple powerpc-apple-macosx10.5.0 -target-feature +altivec -Os -emit-llvm -o - %s | FileCheck %s
+
+int f(__builtin_va_list args) { return __builtin_va_arg(args, int); }
+
+// CHECK: @f(i8* {{.*}}[[PARAM:%[a-zA-Z0-9]+]])
+// CHECK: [[BITCAST:%[0-9]+]] = bitcast i8* [[PARAM]] to i32*
+// CHECK: [[VALUE:%[0-9]+]] = load i32, i32* [[BITCAST]], align 4
+// CHECK: ret i32 [[VALUE]]
+
+void h(vector int);
+int g(__builtin_va_list args) {
+  int i = __builtin_va_arg(args, int);
+  h(__builtin_va_arg(args, vector int));
+  int j = __builtin_va_arg(args, int);
+  return i + j;
+}
+
+// CHECK: @g(i8* {{.*}}[[PARAM:%[a-zA-Z0-9]+]])
+// CHECK: [[NEXT:%[-_.a-zA-Z0-9]+]] = getelementptr inbounds i8, i8* [[PARAM]], i32 4
+// CHECK: [[BITCAST:%[0-9]+]] = bitcast i8* [[PARAM]] to i32*
+// CHECK: [[LOAD:%[0-9]+]] = load i32, i32* [[BITCAST]], align 4
+// CHECK: [[PTRTOINT:%[0-9]+]] = ptrtoint i8* [[NEXT]] to i32
+// CHECK: [[ADD:%[0-9]+]] = add i32 [[PTRTOINT]], 15
+// CHECK: [[AND:%[0-9]+]] = and i32 [[ADD]], -16
+// CHECK: [[INTTOPTR:%[0-9]+]] = inttoptr i32 [[AND]] to <4 x i32>*
+// CHECK: [[ARG:%[0-9]]] = load <4 x i32>, <4 x i32>* [[INTTOPTR]], align 16
+// CHECK: call void @h(<4 x i32> [[ARG]]
+
diff --git a/test/CodeGen/debug-info-block-vars.c b/test/CodeGen/debug-info-block-vars.c
new file mode 100644
index 0000000..e0bb61e
--- /dev/null
+++ b/test/CodeGen/debug-info-block-vars.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -x c -fblocks -debug-info-kind=standalone -emit-llvm -O0 \
+// RUN:   -triple x86_64-apple-darwin -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c -fblocks -debug-info-kind=standalone -emit-llvm -O1 \
+// RUN:   -triple x86_64-apple-darwin -o - %s \
+// RUN:   | FileCheck --check-prefix=CHECK-OPT %s
+
+// CHECK: define internal void @__f_block_invoke(i8* %.block_descriptor)
+// CHECK: %.block_descriptor.addr = alloca i8*, align 8
+// CHECK: %block.addr = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8
+// CHECK: store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
+// CHECK: call void @llvm.dbg.declare(metadata i8** %.block_descriptor.addr,
+// CHECK-SAME:                        metadata !DIExpression())
+// CHECK-OPT-NOT: alloca
+// CHECK-OPT: call void @llvm.dbg.value(metadata i8* %.block_descriptor,
+// CHECK-OPT-SAME:                      metadata !DIExpression())
+void f() {
+  a(^{
+    b();
+  });
+}
diff --git a/test/CodeGen/debug-info-embed-source.c b/test/CodeGen/debug-info-embed-source.c
new file mode 100644
index 0000000..3b607b6
--- /dev/null
+++ b/test/CodeGen/debug-info-embed-source.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1                -debug-info-kind=limited -emit-llvm %p/Inputs/debug-info-embed-source.c -o - | FileCheck %s --check-prefix=NOEMBED
+// RUN: %clang_cc1 -gembed-source -debug-info-kind=limited -emit-llvm %p/Inputs/debug-info-embed-source.c -o - | FileCheck %s --check-prefix=EMBED
+
+// NOEMBED-NOT: !DIFile({{.*}}source:
+// EMBED: !DIFile({{.*}}source: "void foo() { }\0A"
diff --git a/test/CodeGen/debug-info-enum.cpp b/test/CodeGen/debug-info-enum.cpp
new file mode 100644
index 0000000..1237f28
--- /dev/null
+++ b/test/CodeGen/debug-info-enum.cpp
@@ -0,0 +1,100 @@
+// Test enumeration representation in debuig info metadata:
+// * test value representation for each possible underlying integer type
+// * test the integer type is as expected
+// * test the DW_AT_enum_class attribute is present (resp. absent) as expected.
+
+// RUN: %clang -target x86_64-linux -g -S -emit-llvm -o - %s | FileCheck %s
+
+
+enum class E0 : signed char {
+  A0 = -128,
+  B0 = 127,
+} x0;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E0"
+// CHECK-SAME: baseType: ![[SCHAR:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS0:[0-9]+]]
+// CHECK: ![[SCHAR]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+// CHECK: ![[ELTS0]] = !{![[A0:[0-9]+]], ![[B0:[0-9]+]]}
+// CHECK: ![[A0]] = !DIEnumerator(name: "A0", value: -128)
+// CHECK: ![[B0]] = !DIEnumerator(name: "B0", value: 127)
+
+enum class E1 : unsigned char { A1 = 255 } x1;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E1"
+// CHECK-SAME: baseType: ![[UCHAR:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS1:[0-9]+]]
+// CHECK: ![[UCHAR]] = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
+// CHECK: ![[ELTS1]] = !{![[A1:[0-9]+]]}
+// CHECK: ![[A1]] = !DIEnumerator(name: "A1", value: 255, isUnsigned: true)
+
+enum class E2 : signed short {
+  A2 = -32768,
+  B2 = 32767,
+} x2;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E2"
+// CHECK-SAME: baseType: ![[SHORT:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS2:[0-9]+]]
+// CHECK: ![[SHORT]] = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
+// CHECK: ![[ELTS2]] = !{![[A2:[0-9]+]], ![[B2:[0-9]+]]}
+// CHECK: ![[A2]] = !DIEnumerator(name: "A2", value: -32768)
+// CHECK: ![[B2]] = !DIEnumerator(name: "B2", value: 32767)
+
+enum class E3 : unsigned short { A3 = 65535 } x3;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E3"
+// CHECK-SAME: baseType: ![[USHORT:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS3:[0-9]+]]
+// CHECK: ![[USHORT]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+// CHECK: ![[ELTS3]] = !{![[A3:[0-9]+]]}
+// CHECK: ![[A3]] = !DIEnumerator(name: "A3", value: 65535, isUnsigned: true)
+
+enum class E4 : signed int { A4 = -2147483648, B4 = 2147483647 } x4;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E4"
+// CHECK-SAME: baseType: ![[INT:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS4:[0-9]+]]
+// CHECK: ![[INT]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+// CHECK: ![[ELTS4]] = !{![[A4:[0-9]+]], ![[B4:[0-9]+]]}
+// CHECK: ![[A4]] = !DIEnumerator(name: "A4", value: -2147483648)
+// CHECK: ![[B4]] = !DIEnumerator(name: "B4", value: 2147483647)
+
+enum class E5 : unsigned int { A5 = 4294967295 } x5;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E5"
+// CHECK-SAME: baseType: ![[UINT:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS5:[0-9]+]]
+// CHECK: ![[UINT]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+// CHECK: ![[ELTS5]] = !{![[A5:[0-9]+]]}
+// CHECK: ![[A5]] = !DIEnumerator(name: "A5", value: 4294967295, isUnsigned: true)
+
+enum class E6 : signed long long {
+  A6 = -9223372036854775807LL - 1,
+  B6 = 9223372036854775807LL
+} x6;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E6"
+// CHECK-SAME: baseType: ![[LONG:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS6:[0-9]+]]
+// CHECK: ![[LONG]] = !DIBasicType(name: "long long int", size: 64, encoding: DW_ATE_signed)
+// CHECK: ![[ELTS6]] = !{![[A6:[0-9]+]], ![[B6:[0-9]+]]}
+// CHECK: ![[A6]] = !DIEnumerator(name: "A6", value: -9223372036854775808)
+// CHECK: ![[B6]] = !DIEnumerator(name: "B6", value: 9223372036854775807)
+
+enum class E7 : unsigned long long { A7 = 18446744073709551615ULL } x7;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E7"
+// CHECK-SAME: baseType: ![[ULONG:[0-9]+]]
+// CHECK-SAME: DIFlagFixedEnum
+// CHECK-SAME: elements: ![[ELTS7:[0-9]+]]
+// CHECK: ![[ULONG]] = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+// CHECK: ![[ELTS7]] = !{![[A7:[0-9]+]]}
+// CHECK: ![[A7]] = !DIEnumerator(name: "A7", value: 18446744073709551615, isUnsigned: true)
+
+// Also test the FixedEnum flag is not present for old-style enumerations.
+enum E8 { A8 = -128, B8 = 127 } x8;
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "E8"
+// CHECK-SAME: baseType: ![[INT]]
+// CHECK-NOT: DIFlagFixedEnum
+// CHECK: !DIEnumerator(name: "A8", value: -128)
+
diff --git a/test/CodeGen/debug-info-file-checksum.c b/test/CodeGen/debug-info-file-checksum.c
index 2750800..d644aac 100644
--- a/test/CodeGen/debug-info-file-checksum.c
+++ b/test/CodeGen/debug-info-file-checksum.c
@@ -1,4 +1,5 @@
 // RUN: %clang -emit-llvm -S -g -gcodeview -x c %S/Inputs/debug-info-file-checksum.c -o - | FileCheck %s
+// RUN: %clang -emit-llvm -S -gdwarf-5 -x c %S/Inputs/debug-info-file-checksum.c -o - | FileCheck %s
 
 // Check that "checksum" is created correctly for the compiled file.
 
diff --git a/test/CodeGen/debug-info-vla.c b/test/CodeGen/debug-info-vla.c
index 7928ca7..8bd6a6d 100644
--- a/test/CodeGen/debug-info-vla.c
+++ b/test/CodeGen/debug-info-vla.c
@@ -2,9 +2,11 @@
 
 void testVLAwithSize(int s)
 {
-// CHECK: dbg.declare
-// CHECK: dbg.declare({{.*}}, metadata ![[VAR:.*]], metadata !DIExpression())
-// CHECK: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+1]]
+// CHECK-DAG: dbg.declare({{.*}} %__vla_expr, metadata ![[VLAEXPR:[0-9]+]]
+// CHECK-DAG: dbg.declare({{.*}} %vla, metadata ![[VAR:[0-9]+]]
+// CHECK-DAG: ![[VLAEXPR]] = !DILocalVariable(name: "__vla_expr", {{.*}} flags: DIFlagArtificial
+// CHECK-DAG: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+2]]
+// CHECK-DAG: !DISubrange(count: ![[VLAEXPR]])
   int vla[s];
   int i;
   for (i = 0; i < s; i++) {
diff --git a/test/CodeGen/decl.c b/test/CodeGen/decl.c
index 2065e33..ffee1e3 100644
--- a/test/CodeGen/decl.c
+++ b/test/CodeGen/decl.c
@@ -2,12 +2,12 @@
 
 // CHECK: @test1.x = internal constant [12 x i32] [i32 1
 // CHECK: @test2.x = private unnamed_addr constant [13 x i32] [i32 1,
-// CHECK: @test5w = global { i32, [4 x i8] } { i32 2, [4 x i8] undef }
-// CHECK: @test5y = global { double } { double 7.300000e+0{{[0]*}}1 }
+// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] undef }
+// CHECK: @test5y = {{(dso_local )?}}global { double } { double 7.300000e+0{{[0]*}}1 }
 
 // CHECK: @test6.x = private unnamed_addr constant %struct.SelectDest { i8 1, i8 2, i32 3, i32 0 }
 
-// CHECK: @test7 = global [2 x %struct.test7s] [%struct.test7s { i32 1, i32 2 }, %struct.test7s { i32 4, i32 0 }]
+// CHECK: @test7 = {{(dso_local )?}}global [2 x %struct.test7s] [%struct.test7s { i32 1, i32 2 }, %struct.test7s { i32 4, i32 0 }]
 
 void test1() {
   // This should codegen as a "@test1.x" global.
diff --git a/test/CodeGen/default-address-space.c b/test/CodeGen/default-address-space.c
index b7f4058..5450609 100644
--- a/test/CodeGen/default-address-space.c
+++ b/test/CodeGen/default-address-space.c
@@ -1,39 +1,27 @@
-// RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=PIZ,COM %s
 // RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,COM %s
 
-// PIZ-DAG: @foo = common addrspace(1) global i32 0
 // CHECK-DAG: @foo = common addrspace(1) global i32 0
 int foo;
 
-// PIZ-DAG: @ban = common addrspace(1) global [10 x i32] zeroinitializer
 // CHECK-DAG: @ban = common addrspace(1) global [10 x i32] zeroinitializer
 int ban[10];
 
-// PIZ-DAG: @A = common addrspace(1) global i32 addrspace(4)* null
-// PIZ-DAG: @B = common addrspace(1) global i32 addrspace(4)* null
 // CHECK-DAG: @A = common addrspace(1) global i32* null
 // CHECK-DAG: @B = common addrspace(1) global i32* null
 int *A;
 int *B;
 
 // COM-LABEL: define i32 @test1()
-// PIZ: load i32, i32 addrspace(4)* addrspacecast{{[^@]+}} @foo
 // CHECK: load i32, i32* addrspacecast{{[^@]+}} @foo
 int test1() { return foo; }
 
 // COM-LABEL: define i32 @test2(i32 %i)
 // COM: %[[addr:.*]] = getelementptr
-// PIZ: load i32, i32 addrspace(4)* %[[addr]]
-// PIZ-NEXT: ret i32
 // CHECK: load i32, i32* %[[addr]]
 // CHECK-NEXT: ret i32
 int test2(int i) { return ban[i]; }
 
 // COM-LABEL: define void @test3()
-// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* addrspacecast{{[^@]+}} @B
-// PIZ: load i32, i32 addrspace(4)*
-// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* addrspacecast{{[^@]+}} @A
-// PIZ: store i32 {{.*}}, i32 addrspace(4)*
 // CHECK: load i32*, i32** addrspacecast{{.*}} @B
 // CHECK: load i32, i32*
 // CHECK: load i32*, i32** addrspacecast{{.*}} @A
@@ -42,13 +30,6 @@
   *A = *B;
 }
 
-// PIZ-LABEL: define void @test4(i32 addrspace(4)* %a)
-// PIZ: %[[alloca:.*]] = alloca i32 addrspace(4)*
-// PIZ: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32 addrspace(4)* addrspace(4)*
-// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
-// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
-// PIZ: %[[arrayidx:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[r0]]
-// PIZ: store i32 0, i32 addrspace(4)* %[[arrayidx]]
 // CHECK-LABEL: define void @test4(i32* %a)
 // CHECK: %[[alloca:.*]] = alloca i32*, align 8, addrspace(5)
 // CHECK: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32**
diff --git a/test/CodeGen/dllexport.c b/test/CodeGen/dllexport.c
index 17c2ce9..f27012e 100644
--- a/test/CodeGen/dllexport.c
+++ b/test/CodeGen/dllexport.c
@@ -14,24 +14,24 @@
 __declspec(dllexport) extern int ExternGlobalDecl;
 
 // dllexport implies a definition.
-// CHECK-DAG: @GlobalDef = common dllexport global i32 0, align 4
+// CHECK-DAG: @GlobalDef = common dso_local dllexport global i32 0, align 4
 __declspec(dllexport) int GlobalDef;
 
 // Export definition.
-// CHECK-DAG: @GlobalInit = dllexport global i32 1, align 4
+// CHECK-DAG: @GlobalInit = dso_local dllexport global i32 1, align 4
 __declspec(dllexport) int GlobalInit = 1;
 
 // Declare, then export definition.
-// CHECK-DAG: @GlobalDeclInit = dllexport global i32 1, align 4
+// CHECK-DAG: @GlobalDeclInit = dso_local dllexport global i32 1, align 4
 __declspec(dllexport) extern int GlobalDeclInit;
 int GlobalDeclInit = 1;
 
 // Redeclarations
-// CHECK-DAG: @GlobalRedecl1 = common dllexport global i32 0, align 4
+// CHECK-DAG: @GlobalRedecl1 = common dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int GlobalRedecl1;
 __declspec(dllexport)        int GlobalRedecl1;
 
-// CHECK-DAG: @GlobalRedecl2 = common dllexport global i32 0, align 4
+// CHECK-DAG: @GlobalRedecl2 = common dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int GlobalRedecl2;
                              int GlobalRedecl2;
 
@@ -44,22 +44,22 @@
 // Declarations are not exported.
 
 // Export function definition.
-// CHECK-DAG: define dllexport void @def()
+// CHECK-DAG: define dso_local dllexport void @def()
 __declspec(dllexport) void def(void) {}
 
 // Export inline function.
-// CHECK-DAG: define weak_odr dllexport void @inlineFunc()
-// CHECK-DAG: define weak_odr dllexport void @externInlineFunc()
+// CHECK-DAG: define weak_odr dso_local dllexport void @inlineFunc()
+// CHECK-DAG: define weak_odr dso_local dllexport void @externInlineFunc()
 __declspec(dllexport) inline void inlineFunc(void) {}
 __declspec(dllexport) inline void externInlineFunc(void) {}
 extern void externInlineFunc(void);
 
 // Redeclarations
-// CHECK-DAG: define dllexport void @redecl1()
+// CHECK-DAG: define dso_local dllexport void @redecl1()
 __declspec(dllexport) void redecl1(void);
 __declspec(dllexport) void redecl1(void) {}
 
-// CHECK-DAG: define dllexport void @redecl2()
+// CHECK-DAG: define dso_local dllexport void @redecl2()
 __declspec(dllexport) void redecl2(void);
                       void redecl2(void) {}
 
@@ -70,46 +70,46 @@
 //===----------------------------------------------------------------------===//
 
 // dllexport takes precedence over the dllimport if both are specified.
-// CHECK-DAG: @PrecedenceGlobal1A = common dllexport global i32 0, align 4
-// CHECK-DAG: @PrecedenceGlobal1B = common dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobal1A = common dso_local dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobal1B = common dso_local dllexport global i32 0, align 4
 __attribute__((dllimport, dllexport))       int PrecedenceGlobal1A;
 __declspec(dllimport) __declspec(dllexport) int PrecedenceGlobal1B;
 
-// CHECK-DAG: @PrecedenceGlobal2A = common dllexport global i32 0, align 4
-// CHECK-DAG: @PrecedenceGlobal2B = common dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobal2A = common dso_local dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobal2B = common dso_local dllexport global i32 0, align 4
 __attribute__((dllexport, dllimport))       int PrecedenceGlobal2A;
 __declspec(dllexport) __declspec(dllimport) int PrecedenceGlobal2B;
 
-// CHECK-DAG: @PrecedenceGlobalRedecl1 = dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobalRedecl1 = dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int PrecedenceGlobalRedecl1;
 __declspec(dllimport)        int PrecedenceGlobalRedecl1 = 0;
 
-// CHECK-DAG: @PrecedenceGlobalRedecl2 = common dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobalRedecl2 = common dso_local dllexport global i32 0, align 4
 __declspec(dllimport) extern int PrecedenceGlobalRedecl2;
 __declspec(dllexport)        int PrecedenceGlobalRedecl2;
 
-// CHECK-DAG: @PrecedenceGlobalMixed1 = dllexport global i32 1, align 4
+// CHECK-DAG: @PrecedenceGlobalMixed1 = dso_local dllexport global i32 1, align 4
 __attribute__((dllexport)) extern int PrecedenceGlobalMixed1;
 __declspec(dllimport)             int PrecedenceGlobalMixed1 = 1;
 
-// CHECK-DAG: @PrecedenceGlobalMixed2 = common dllexport global i32 0, align 4
+// CHECK-DAG: @PrecedenceGlobalMixed2 = common dso_local dllexport global i32 0, align 4
 __attribute__((dllimport)) extern int PrecedenceGlobalMixed2;
 __declspec(dllexport)             int PrecedenceGlobalMixed2;
 
-// CHECK-DAG: define dllexport void @precedence1A()
-// CHECK-DAG: define dllexport void @precedence1B()
+// CHECK-DAG: define dso_local dllexport void @precedence1A()
+// CHECK-DAG: define dso_local dllexport void @precedence1B()
 void __attribute__((dllimport, dllexport))       precedence1A(void) {}
 void __declspec(dllimport) __declspec(dllexport) precedence1B(void) {}
 
-// CHECK-DAG: define dllexport void @precedence2A()
-// CHECK-DAG: define dllexport void @precedence2B()
+// CHECK-DAG: define dso_local dllexport void @precedence2A()
+// CHECK-DAG: define dso_local dllexport void @precedence2B()
 void __attribute__((dllexport, dllimport))       precedence2A(void) {}
 void __declspec(dllexport) __declspec(dllimport) precedence2B(void) {}
 
-// CHECK-DAG: define dllexport void @precedenceRedecl1()
+// CHECK-DAG: define dso_local dllexport void @precedenceRedecl1()
 void __declspec(dllimport) precedenceRedecl1(void);
 void __declspec(dllexport) precedenceRedecl1(void) {}
 
-// CHECK-DAG: define dllexport void @precedenceRedecl2()
+// CHECK-DAG: define dso_local dllexport void @precedenceRedecl2()
 void __declspec(dllexport) precedenceRedecl2(void);
 void __declspec(dllimport) precedenceRedecl2(void) {}
diff --git a/test/CodeGen/dllimport.c b/test/CodeGen/dllimport.c
index f70048e..61d6957 100644
--- a/test/CodeGen/dllimport.c
+++ b/test/CodeGen/dllimport.c
@@ -39,14 +39,14 @@
 
 // NB: MSVC issues a warning and makes GlobalRedecl3 dllexport. We follow GCC
 // and drop the dllimport with a warning.
-// CHECK: @GlobalRedecl3 = external global i32
+// CHECK: @GlobalRedecl3 = external dso_local global i32
 __declspec(dllimport) extern int GlobalRedecl3;
                       extern int GlobalRedecl3; // dllimport ignored
 USEVAR(GlobalRedecl3)
 
 // Make sure this works even if the decl has been used before it's defined (PR20792).
-// MS: @GlobalRedecl4 = common dllexport global i32
-// GNU: @GlobalRedecl4 = common global i32
+// MS: @GlobalRedecl4 = common dso_local dllexport global i32
+// GNU: @GlobalRedecl4 = common dso_local global i32
 __declspec(dllimport) extern int GlobalRedecl4;
 USEVAR(GlobalRedecl4)
                       int GlobalRedecl4; // dllimport ignored
@@ -76,22 +76,22 @@
 __declspec(dllimport) void decl(void);
 
 // Initialize use_decl with the address of the thunk.
-// CHECK-DAG: @use_decl = global void ()* @decl
+// CHECK-DAG: @use_decl = dso_local global void ()* @decl
 void (*use_decl)(void) = &decl;
 
 // Import inline function.
 // MS-DAG: declare dllimport void @inlineFunc()
 // MO1-DAG: define available_externally dllimport void @inlineFunc()
-// GNU-DAG: declare void @inlineFunc()
-// GO1-DAG: define available_externally void @inlineFunc()
+// GNU-DAG: declare dso_local void @inlineFunc()
+// GO1-DAG: define available_externally dso_local void @inlineFunc()
 __declspec(dllimport) inline void inlineFunc(void) {}
 USE(inlineFunc)
 
 // inline attributes
 // MS-DAG: declare dllimport void @noinline()
 // MO1-DAG: define available_externally dllimport void @noinline()
-// GNU-DAG: declare void @noinline()
-// GO1-DAG: define available_externally void @noinline()
+// GNU-DAG: declare dso_local void @noinline()
+// GO1-DAG: define available_externally dso_local void @noinline()
 // CHECK-NOT: @alwaysInline()
 // O1-NOT: @alwaysInline()
 __declspec(dllimport) __attribute__((noinline)) inline void noinline(void) {}
@@ -107,20 +107,20 @@
 
 // NB: MSVC issues a warning and makes redecl2/redecl3 dllexport. We follow GCC
 // and drop the dllimport with a warning.
-// CHECK-DAG: declare void @redecl2()
+// CHECK-DAG: declare dso_local void @redecl2()
 __declspec(dllimport) void redecl2(void);
                       void redecl2(void);
 USE(redecl2)
 
-// MS: define dllexport void @redecl3()
-// GNU: define void @redecl3()
+// MS: define dso_local dllexport void @redecl3()
+// GNU: define dso_local void @redecl3()
 __declspec(dllimport) void redecl3(void);
                       void redecl3(void) {} // dllimport ignored
 USE(redecl3)
 
 // Make sure this works even if the decl is used before it's defined (PR20792).
-// MS: define dllexport void @redecl4()
-// GNU: define void @redecl4()
+// MS: define dso_local dllexport void @redecl4()
+// GNU: define dso_local void @redecl4()
 __declspec(dllimport) void redecl4(void);
 USE(redecl4)
                       void redecl4(void) {} // dllimport ignored
diff --git a/test/CodeGen/dso-local-executable.c b/test/CodeGen/dso-local-executable.c
new file mode 100644
index 0000000..44d5ecf
--- /dev/null
+++ b/test/CodeGen/dso-local-executable.c
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm %s -o - | FileCheck --check-prefix=COFF %s
+// COFF-DAG: @bar = external dso_local global i32
+// COFF-DAG: @weak_bar = extern_weak dso_local global i32
+// COFF-DAG: declare dso_local void @foo()
+// COFF-DAG: @baz = dso_local global i32 42
+// COFF-DAG: define dso_local i32* @zed()
+// COFF-DAG: @thread_var = external dso_local thread_local global i32
+// COFF-DAG: @local_thread_var = dso_local thread_local global i32 42
+// COFF-DAG: @import_var = external dllimport global i32
+// COFF-DAG: declare dllimport void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -mrelocation-model static %s -o - | FileCheck --check-prefix=STATIC %s
+// STATIC-DAG: @bar = external dso_local global i32
+// STATIC-DAG: @weak_bar = extern_weak dso_local global i32
+// STATIC-DAG: declare dso_local void @foo()
+// STATIC-DAG: @baz = dso_local global i32 42
+// STATIC-DAG: define dso_local i32* @zed()
+// STATIC-DAG: @thread_var = external thread_local global i32
+// STATIC-DAG: @local_thread_var = dso_local thread_local global i32 42
+// STATIC-DAG: @import_var = external dso_local global i32
+// STATIC-DAG: declare dso_local void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -pic-is-pie -mpie-copy-relocations %s -o - | FileCheck --check-prefix=PIE-COPY %s
+// PIE-COPY-DAG: @bar = external dso_local global i32
+// PIE-COPY-DAG: @weak_bar = extern_weak global i32
+// PIE-COPY-DAG: declare void @foo()
+// PIE-COPY-DAG: @baz = dso_local global i32 42
+// PIE-COPY-DAG: define dso_local i32* @zed()
+// PIE-COPY-DAG: @thread_var = external thread_local global i32
+// PIE-COPY-DAG: @local_thread_var = dso_local thread_local global i32 42
+// PIE-COPY-DAG: @import_var = external dso_local global i32
+// PIE-COPY-DAG: declare void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -pic-is-pie %s -o - | FileCheck --check-prefix=PIE %s
+// PIE-DAG: @bar = external global i32
+// PIE-DAG: @weak_bar = extern_weak global i32
+// PIE-DAG: declare void @foo()
+// PIE-DAG: @baz = dso_local global i32 42
+// PIE-DAG: define dso_local i32* @zed()
+// PIE-DAG: @thread_var = external thread_local global i32
+// PIE-DAG: @local_thread_var = dso_local thread_local global i32 42
+// PIE-DAG: @import_var = external global i32
+// PIE-DAG: declare void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -mrelocation-model static -fno-plt %s -o - | FileCheck --check-prefix=NOPLT %s
+// NOPLT-DAG: @bar = external dso_local global i32
+// NOPLT-DAG: @weak_bar = extern_weak dso_local global i32
+// NOPLT-DAG: declare void @foo()
+// NOPLT-DAG: @baz = dso_local global i32 42
+// NOPLT-DAG: define dso_local i32* @zed()
+// NOPLT-DAG: @thread_var = external thread_local global i32
+// NOPLT-DAG: @local_thread_var = dso_local thread_local global i32 42
+// NOPLT-DAG: @import_var = external dso_local global i32
+// NOPLT-DAG: declare void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -fno-plt -pic-is-pie -mpie-copy-relocations %s -o - | FileCheck --check-prefix=PIE-COPY-NOPLT %s
+// PIE-COPY-NOPLT-DAG: @bar = external dso_local global i32
+// PIE-COPY-NOPLT-DAG: @weak_bar = extern_weak global i32
+// PIE-COPY-NOPLT-DAG: declare void @foo()
+// PIE-COPY-NOPLT-DAG: @baz = dso_local global i32 42
+// PIE-COPY-NOPLT-DAG: define dso_local i32* @zed()
+// PIE-COPY-NOPLT-DAG: @thread_var = external thread_local global i32
+// PIE-COPY-NOPLT-DAG: @local_thread_var = dso_local thread_local global i32 42
+// PIE-COPY-NOPLT-DAG: @import_var = external dso_local global i32
+// PIE-COPY-NOPLT-DAG: declare void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm -pic-is-pie -fno-plt %s -o - | FileCheck --check-prefix=PIE-NO-PLT %s
+// RUN: %clang_cc1 -triple powerpc64le-pc-linux -emit-llvm -mrelocation-model static %s -o - | FileCheck --check-prefix=PIE-NO-PLT %s
+// PIE-NO-PLT-DAG: @bar = external global i32
+// PIE-NO-PLT-DAG: @weak_bar = extern_weak global i32
+// PIE-NO-PLT-DAG: declare void @foo()
+// PIE-NO-PLT-DAG: @baz = dso_local global i32 42
+// PIE-NO-PLT-DAG: define dso_local i32* @zed()
+// PIE-NO-PLT-DAG: @thread_var = external thread_local global i32
+// PIE-NO-PLT-DAG: @local_thread_var = dso_local thread_local global i32 42
+// PIE-NO-PLT-DAG: @import_var = external global i32
+// PIE-NO-PLT-DAG: declare void @import_func()
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux -emit-llvm %s -o - | FileCheck --check-prefix=SHARED %s
+// SHARED-DAG: @bar = external global i32
+// SHARED-DAG: @weak_bar = extern_weak global i32
+// SHARED-DAG: declare void @foo()
+// SHARED-DAG: @baz = global i32 42
+// SHARED-DAG: define i32* @zed()
+// SHARED-DAG: @thread_var = external thread_local global i32
+// SHARED-DAG: @local_thread_var = thread_local global i32 42
+// PIE-NO-PLT-DAG: @import_var = external global i32
+// PIE-NO-PLT-DAG: declare void @import_func()
+
+__attribute__((dllimport)) extern int import_var;
+__attribute__((dllimport)) void import_func(void);
+
+int *use_import() {
+  import_func();
+  return &import_var;
+}
+
+extern int bar;
+__attribute__((weak)) extern int weak_bar;
+void foo(void);
+
+int baz = 42;
+int *zed() {
+  foo();
+  return baz ? &weak_bar : &bar;
+}
+
+extern __thread int thread_var;
+__thread int local_thread_var = 42;
+int *get_thread_var(int a) {
+  return a ? &thread_var : &local_thread_var;
+}
diff --git a/test/CodeGen/elf-linker-options.c b/test/CodeGen/elf-linker-options.c
new file mode 100644
index 0000000..cf2d1b9
--- /dev/null
+++ b/test/CodeGen/elf-linker-options.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple i686---elf -emit-llvm %s -o - | FileCheck %s
+
+#pragma comment(lib, "alpha")
+
+// CHECK: !llvm.linker.options = !{[[NODE:![0-9]+]]}
+// CHECK: [[NODE]] = !{!"lib", !"alpha"}
+
diff --git a/test/CodeGen/exceptions-seh-finally.c b/test/CodeGen/exceptions-seh-finally.c
index 0f2123b..ecb6f76 100644
--- a/test/CodeGen/exceptions-seh-finally.c
+++ b/test/CodeGen/exceptions-seh-finally.c
@@ -13,7 +13,7 @@
   }
 }
 
-// CHECK-LABEL: define void @basic_finally()
+// CHECK-LABEL: define dso_local void @basic_finally()
 // CHECK: invoke void @might_crash()
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -53,7 +53,7 @@
   }
 }
 
-// CHECK-LABEL: define void @label_in_finally()
+// CHECK-LABEL: define dso_local void @label_in_finally()
 // CHECK: invoke void @might_crash()
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -81,7 +81,7 @@
   }
 }
 
-// CHECK-LABEL: define void @use_abnormal_termination()
+// CHECK-LABEL: define dso_local void @use_abnormal_termination()
 // CHECK: invoke void @might_crash()
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -110,7 +110,7 @@
   }
 }
 
-// CHECK-LABEL: define void @noreturn_noop_finally()
+// CHECK-LABEL: define dso_local void @noreturn_noop_finally()
 // CHECK: call void @"\01?fin$0@0@noreturn_noop_finally@@"({{.*}})
 // CHECK: ret void
 
@@ -127,7 +127,7 @@
   }
 }
 
-// CHECK-LABEL: define void @noreturn_finally()
+// CHECK-LABEL: define dso_local void @noreturn_finally()
 // CHECK: invoke void @might_crash()
 // CHECK:     to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -151,7 +151,7 @@
   } __finally {
   }
 }
-// CHECK-LABEL: define i32 @finally_with_return()
+// CHECK-LABEL: define dso_local i32 @finally_with_return()
 // CHECK: call void @"\01?fin$0@0@finally_with_return@@"({{.*}})
 // CHECK-NEXT: ret i32 42
 
@@ -173,7 +173,7 @@
   return 0;
 }
 
-// CHECK-LABEL: define i32 @nested___finally___finally
+// CHECK-LABEL: define dso_local i32 @nested___finally___finally
 // CHECK: invoke void @"\01?fin$1@0@nested___finally___finally@@"({{.*}})
 // CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -208,7 +208,7 @@
   }
   return 912;
 }
-// CHECK-LABEL: define i32 @nested___finally___finally_with_eh_edge
+// CHECK-LABEL: define dso_local i32 @nested___finally___finally_with_eh_edge
 // CHECK: invoke void @might_crash()
 // CHECK-NEXT: to label %[[invokecont:[^ ]*]] unwind label %[[lpad1:[^ ]*]]
 //
@@ -252,7 +252,7 @@
   }
 }
 
-// CHECK-LABEL: define void @finally_within_finally(
+// CHECK-LABEL: define dso_local void @finally_within_finally(
 // CHECK: invoke void @might_crash(
 
 // CHECK: call void @"\01?fin$0@0@finally_within_finally@@"(
diff --git a/test/CodeGen/exceptions-seh-leave.c b/test/CodeGen/exceptions-seh-leave.c
index 087fadb..e61c8b3 100644
--- a/test/CodeGen/exceptions-seh-leave.c
+++ b/test/CodeGen/exceptions-seh-leave.c
@@ -17,7 +17,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @__leave_with___except_simple()
+// CHECK-LABEL: define dso_local i32 @__leave_with___except_simple()
 // CHECK: store i32 15, i32* %myres
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
@@ -37,7 +37,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @__leave_with___except()
+// CHECK-LABEL: define dso_local i32 @__leave_with___except()
 // CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[cont:.*]] unwind label %{{.*}}
 // For __excepts, instead of an explicit __try.__leave label, we could use
@@ -69,7 +69,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @__leave_with___finally_simple()
+// CHECK-LABEL: define dso_local i32 @__leave_with___finally_simple()
 // CHECK: store i32 15, i32* %myres
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
@@ -89,7 +89,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @__leave_with___finally_noreturn()
+// CHECK-LABEL: define dso_local i32 @__leave_with___finally_noreturn()
 // CHECK: store i32 15, i32* %myres
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
@@ -109,7 +109,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @__leave_with___finally()
+// CHECK-LABEL: define dso_local i32 @__leave_with___finally()
 // CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[cont:.*]] unwind label %{{.*}}
 // For __finally, there needs to be an explicit __try.__leave, because
@@ -142,7 +142,7 @@
   }
   return 1;
 }
-// CHECK-LABEL: define i32 @nested___except___finally()
+// CHECK-LABEL: define dso_local i32 @nested___except___finally()
 
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont1:.*]] unwind label %[[g1_lpad:.*]]
@@ -194,7 +194,7 @@
   return 1;
 }
 // The order of basic blocks in the below doesn't matter.
-// CHECK-LABEL: define i32 @nested___except___except()
+// CHECK-LABEL: define dso_local i32 @nested___except___except()
 
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
@@ -247,7 +247,7 @@
   return 1;
 }
 // The order of basic blocks in the below doesn't matter.
-// CHECK-LABEL: define i32 @nested___finally___except()
+// CHECK-LABEL: define dso_local i32 @nested___finally___except()
 
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
@@ -302,7 +302,7 @@
   return 1;
 }
 // The order of basic blocks in the below doesn't matter.
-// CHECK-LABEL: define i32 @nested___finally___finally()
+// CHECK-LABEL: define dso_local i32 @nested___finally___finally()
 
 // CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
diff --git a/test/CodeGen/exceptions-seh.c b/test/CodeGen/exceptions-seh.c
index a0a1dbc..085a9a1 100644
--- a/test/CodeGen/exceptions-seh.c
+++ b/test/CodeGen/exceptions-seh.c
@@ -10,7 +10,7 @@
 void try_body(int numerator, int denominator, int *myres) {
   *myres = numerator / denominator;
 }
-// CHECK-LABEL: define void @try_body(i32 %numerator, i32 %denominator, i32* %myres)
+// CHECK-LABEL: define dso_local void @try_body(i32 %numerator, i32 %denominator, i32* %myres)
 // CHECK: sdiv i32
 // CHECK: store i32 %{{.*}}, i32*
 // CHECK: ret void
@@ -27,7 +27,7 @@
   return success;
 }
 
-// CHECK-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// CHECK-LABEL: define dso_local i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
 // X64-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // X86-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: invoke void @try_body(i32 %{{.*}}, i32 %{{.*}}, i32* %{{.*}}) #[[NOINLINE:[0-9]+]]
@@ -57,9 +57,9 @@
 // X86: ret i32 1
 
 // Mingw uses msvcrt, so it can also use _except_handler3.
-// X86-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X86-GNU-LABEL: define dso_local i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
 // X86-GNU-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
-// X64-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X64-GNU-LABEL: define dso_local i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
 // X64-GNU-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 
 void j(void);
@@ -74,7 +74,7 @@
   return r;
 }
 
-// CHECK-LABEL: define i32 @filter_expr_capture()
+// CHECK-LABEL: define dso_local i32 @filter_expr_capture()
 // X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // X64: call void (...) @llvm.localescape(i32* %[[r:[^ ,]*]])
@@ -114,7 +114,7 @@
   }
   return r;
 }
-// CHECK-LABEL: define i32 @nested_try()
+// CHECK-LABEL: define dso_local i32 @nested_try()
 // X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: store i32 42, i32* %[[r:[^ ,]*]]
@@ -174,7 +174,7 @@
   }
   return g;
 }
-// CHECK-LABEL: define i32 @basic_finally(i32 %g)
+// CHECK-LABEL: define dso_local i32 @basic_finally(i32 %g)
 // X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: %[[g_addr:[^ ]*]] = alloca i32, align 4
@@ -211,7 +211,7 @@
     return 42;
   }
 }
-// CHECK-LABEL: define i32 @except_return()
+// CHECK-LABEL: define dso_local i32 @except_return()
 // CHECK: %[[tmp:[^ ]*]] = invoke i32 @returns_int()
 // CHECK:       to label %[[cont:[^ ]*]] unwind label %[[catchpad:[^ ]*]]
 //
@@ -240,7 +240,7 @@
   }
 }
 //
-// CHECK-LABEL: define void @finally_capture_twice(
+// CHECK-LABEL: define dso_local void @finally_capture_twice(
 // CHECK:         [[X:%.*]] = alloca i32, align 4
 // CHECK:         call void (...) @llvm.localescape(i32* [[X]])
 // CHECK-NEXT:    store i32 {{.*}}, i32* [[X]], align 4
@@ -267,7 +267,7 @@
   }
 }
 
-// CHECK-LABEL: define i32 @exception_code_in_except()
+// CHECK-LABEL: define dso_local i32 @exception_code_in_except()
 // CHECK: %[[ret_slot:[^ ]*]] = alloca i32
 // CHECK: %[[code_slot:[^ ]*]] = alloca i32
 // CHECK: invoke void @try_body(i32 0, i32 0, i32* null)
diff --git a/test/CodeGen/ext-vector.c b/test/CodeGen/ext-vector.c
index beb5882..faa6ede 100644
--- a/test/CodeGen/ext-vector.c
+++ b/test/CodeGen/ext-vector.c
@@ -5,10 +5,10 @@
 typedef __attribute__(( ext_vector_type(4) )) int int4;
 typedef __attribute__(( ext_vector_type(4) )) unsigned int uint4;
 
-// CHECK: @foo = global <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+// CHECK: @foo = {{(dso_local )?}}global <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
 float4 foo = (float4){ 1.0, 2.0, 3.0, 4.0 };
 
-// CHECK: @bar = constant <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 0x7FF0000000000000>
+// CHECK: @bar = {{(dso_local )?}}constant <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 0x7FF0000000000000>
 const float4 bar = (float4){ 1.0, 2.0, 3.0, __builtin_inff() };
 
 // CHECK: @test1
diff --git a/test/CodeGen/fentry.c b/test/CodeGen/fentry.c
index b913318..43586c4 100644
--- a/test/CodeGen/fentry.c
+++ b/test/CodeGen/fentry.c
@@ -7,5 +7,12 @@
   return 0;
 }
 
-//CHECK: attributes #{{[0-9]+}} = { {{.*}}"fentry-call"="true"{{.*}} }
-//NOPG-NOT: attributes #{{[0-9]+}} = { {{.*}}"fentry-call"{{.*}} }
+int __attribute__((no_instrument_function)) no_instrument(void) {
+  return foo();
+}
+
+//CHECK: attributes #0 = { {{.*}}"fentry-call"="true"{{.*}} }
+//CHECK: attributes #1 = { {{.*}} }
+//CHECK-NOT: attributes #1 = { {{.*}}"fentry-call"="true"{{.*}} }
+//NOPG-NOT: attributes #0 = { {{.*}}"fentry-call"{{.*}} }
+//NOPG-NOT: attributes #1 = { {{.*}}"fentry-call"{{.*}} }
diff --git a/test/CodeGen/finite-math.c b/test/CodeGen/finite-math.c
index 90a83fa..d1a2956 100644
--- a/test/CodeGen/finite-math.c
+++ b/test/CodeGen/finite-math.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -ffinite-math-only -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=FINITE
 // RUN: %clang_cc1 -fno-signed-zeros -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK  -check-prefix=NSZ
 // RUN: %clang_cc1 -freciprocal-math -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK  -check-prefix=RECIP
+// RUN: %clang_cc1 -mreassociate -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK  -check-prefix=REASSOC
 
 float f0, f1, f2;
 
@@ -10,6 +11,7 @@
   // FINITE: fadd nnan ninf
   // NSZ: fadd nsz
   // RECIP: fadd arcp
+  // REASSOC: fadd reassoc
   f0 = f1 + f2;
 
   // CHECK: ret
diff --git a/test/CodeGen/fixup-depth-overflow.c b/test/CodeGen/fixup-depth-overflow.c
index af452d0..1ae2a41 100644
--- a/test/CodeGen/fixup-depth-overflow.c
+++ b/test/CodeGen/fixup-depth-overflow.c
@@ -22,5 +22,5 @@
   return;
 }
 
-// CHECK-LABEL: define void @f
+// CHECK-LABEL: define {{(dso_local )?}}void @f
 // CHECK-NOT: cleanup
diff --git a/test/CodeGen/fma-builtins.c b/test/CodeGen/fma-builtins.c
index 91a2efe..6f792a7 100644
--- a/test/CodeGen/fma-builtins.c
+++ b/test/CodeGen/fma-builtins.c
@@ -4,161 +4,221 @@
 #include <immintrin.h>
 
 __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_fmadd_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps
   return _mm_fmadd_ps(a, b, c);
 }
 
 __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_fmadd_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd
   return _mm_fmadd_pd(a, b, c);
 }
 
 __m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_fmadd_ss
   // CHECK: @llvm.x86.fma.vfmadd.ss
   return _mm_fmadd_ss(a, b, c);
 }
 
 __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_fmadd_sd
   // CHECK: @llvm.x86.fma.vfmadd.sd
   return _mm_fmadd_sd(a, b, c);
 }
 
 __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfmsub.ps
+  // CHECK-LABEL: test_mm_fmsub_ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_fmsub_ps(a, b, c);
 }
 
 __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfmsub.pd
+  // CHECK-LABEL: test_mm_fmsub_pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_fmsub_pd(a, b, c);
 }
 
 __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfmsub.ss
+  // CHECK-LABEL: test_mm_fmsub_ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_fmsub_ss(a, b, c);
 }
 
 __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfmsub.sd
+  // CHECK-LABEL: test_mm_fmsub_sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_fmsub_sd(a, b, c);
 }
 
 __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.ps
+  // CHECK-LABEL: test_mm_fnmadd_ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
   return _mm_fnmadd_ps(a, b, c);
 }
 
 __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.pd
+  // CHECK-LABEL: test_mm_fnmadd_pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
   return _mm_fnmadd_pd(a, b, c);
 }
 
 __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.ss
+  // CHECK-LABEL: test_mm_fnmadd_ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> %{{.+}})
   return _mm_fnmadd_ss(a, b, c);
 }
 
 __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.sd
+  // CHECK-LABEL: test_mm_fnmadd_sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> %{{.+}})
   return _mm_fnmadd_sd(a, b, c);
 }
 
 __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.ps
+  // CHECK-LABEL: test_mm_fnmsub_ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
   return _mm_fnmsub_ps(a, b, c);
 }
 
 __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.pd
+  // CHECK-LABEL: test_mm_fnmsub_pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
   return _mm_fnmsub_pd(a, b, c);
 }
 
 __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.ss
+  // CHECK-LABEL: test_mm_fnmsub_ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> [[NEG2]])
   return _mm_fnmsub_ss(a, b, c);
 }
 
 __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.sd
+  // CHECK-LABEL: test_mm_fnmsub_sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> [[NEG2]])
   return _mm_fnmsub_sd(a, b, c);
 }
 
 __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_fmaddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps
   return _mm_fmaddsub_ps(a, b, c);
 }
 
 __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_fmaddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd
   return _mm_fmaddsub_pd(a, b, c);
 }
 
 __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
-  // CHECK: @llvm.x86.fma.vfmsubadd.ps
+  // CHECK-LABEL: test_mm_fmsubadd_ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_fmsubadd_ps(a, b, c);
 }
 
 __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
-  // CHECK: @llvm.x86.fma.vfmsubadd.pd
+  // CHECK-LABEL: test_mm_fmsubadd_pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_fmsubadd_pd(a, b, c);
 }
 
 __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_fmadd_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps.256
   return _mm256_fmadd_ps(a, b, c);
 }
 
 __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_fmadd_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd.256
   return _mm256_fmadd_pd(a, b, c);
 }
 
 __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
-  // CHECK: @llvm.x86.fma.vfmsub.ps.256
+  // CHECK-LABEL: test_mm256_fmsub_ps
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
   return _mm256_fmsub_ps(a, b, c);
 }
 
 __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
-  // CHECK: @llvm.x86.fma.vfmsub.pd.256
+  // CHECK-LABEL: test_mm256_fmsub_pd
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
   return _mm256_fmsub_pd(a, b, c);
 }
 
 __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.ps.256
+  // CHECK-LABEL: test_mm256_fnmadd_ps
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> %{{.+}})
   return _mm256_fnmadd_ps(a, b, c);
 }
 
 __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) {
-  // CHECK: @llvm.x86.fma.vfnmadd.pd.256
+  // CHECK-LABEL: test_mm256_fnmadd_pd
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> %{{.+}})
   return _mm256_fnmadd_pd(a, b, c);
 }
 
 __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.ps.256
+  // CHECK-LABEL: test_mm256_fnmsub_ps
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> [[NEG2]])
   return _mm256_fnmsub_ps(a, b, c);
 }
 
 __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
-  // CHECK: @llvm.x86.fma.vfnmsub.pd.256
+  // CHECK-LABEL: test_mm256_fnmsub_pd
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> [[NEG2]])
   return _mm256_fnmsub_pd(a, b, c);
 }
 
 __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_fmaddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps.256
   return _mm256_fmaddsub_ps(a, b, c);
 }
 
 __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_fmaddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd.256
   return _mm256_fmaddsub_pd(a, b, c);
 }
 
 __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
-  // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
+  // CHECK-LABEL: test_mm256_fmsubadd_ps
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
   return _mm256_fmsubadd_ps(a, b, c);
 }
 
 __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
-  // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
+  // CHECK-LABEL: test_mm256_fmsubadd_pd
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
   return _mm256_fmsubadd_pd(a, b, c);
 }
diff --git a/test/CodeGen/fma4-builtins.c b/test/CodeGen/fma4-builtins.c
index 3ca5fac..c848d4a 100644
--- a/test/CodeGen/fma4-builtins.c
+++ b/test/CodeGen/fma4-builtins.c
@@ -17,85 +17,101 @@
 
 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_macc_ss
-  // CHECK: @llvm.x86.fma.vfmadd.ss
+  // CHECK: @llvm.x86.fma4.vfmadd.ss
   return _mm_macc_ss(a, b, c);
 }
 
 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_macc_sd
-  // CHECK: @llvm.x86.fma.vfmadd.sd
+  // CHECK: @llvm.x86.fma4.vfmadd.sd
   return _mm_macc_sd(a, b, c);
 }
 
 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_msub_ps
-  // CHECK: @llvm.x86.fma.vfmsub.ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_msub_ps(a, b, c);
 }
 
 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_msub_pd
-  // CHECK: @llvm.x86.fma.vfmsub.pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_msub_pd(a, b, c);
 }
 
 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_msub_ss
-  // CHECK: @llvm.x86.fma.vfmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_msub_ss(a, b, c);
 }
 
 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_msub_sd
-  // CHECK: @llvm.x86.fma.vfmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_msub_sd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmacc_ps
-  // CHECK: @llvm.x86.fma.vfnmadd.ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
   return _mm_nmacc_ps(a, b, c);
 }
 
 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmacc_pd
-  // CHECK: @llvm.x86.fma.vfnmadd.pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
   return _mm_nmacc_pd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmacc_ss
-  // CHECK: @llvm.x86.fma.vfnmadd.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
   return _mm_nmacc_ss(a, b, c);
 }
 
 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmacc_sd
-  // CHECK: @llvm.x86.fma.vfnmadd.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
   return _mm_nmacc_sd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmsub_ps
-  // CHECK: @llvm.x86.fma.vfnmsub.ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
   return _mm_nmsub_ps(a, b, c);
 }
 
 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmsub_pd
-  // CHECK: @llvm.x86.fma.vfnmsub.pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
   return _mm_nmsub_pd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmsub_ss
-  // CHECK: @llvm.x86.fma.vfnmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
   return _mm_nmsub_ss(a, b, c);
 }
 
 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmsub_sd
-  // CHECK: @llvm.x86.fma.vfnmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
   return _mm_nmsub_sd(a, b, c);
 }
 
@@ -113,13 +129,15 @@
 
 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_msubadd_ps
-  // CHECK: @llvm.x86.fma.vfmsubadd.ps
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_msubadd_ps(a, b, c);
 }
 
 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_msubadd_pd
-  // CHECK: @llvm.x86.fma.vfmsubadd.pd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_msubadd_pd(a, b, c);
 }
 
@@ -137,37 +155,45 @@
 
 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
   // CHECK-LABEL: test_mm256_msub_ps
-  // CHECK: @llvm.x86.fma.vfmsub.ps.256
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
   return _mm256_msub_ps(a, b, c);
 }
 
 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
   // CHECK-LABEL: test_mm256_msub_pd
-  // CHECK: @llvm.x86.fma.vfmsub.pd.256
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
   return _mm256_msub_pd(a, b, c);
 }
 
 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
   // CHECK-LABEL: test_mm256_nmacc_ps
-  // CHECK: @llvm.x86.fma.vfnmadd.ps.256
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> %{{.+}})
   return _mm256_nmacc_ps(a, b, c);
 }
 
 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
   // CHECK-LABEL: test_mm256_nmacc_pd
-  // CHECK: @llvm.x86.fma.vfnmadd.pd.256
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> %{{.+}})
   return _mm256_nmacc_pd(a, b, c);
 }
 
 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
   // CHECK-LABEL: test_mm256_nmsub_ps
-  // CHECK: @llvm.x86.fma.vfnmsub.ps.256
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> [[NEG2]])
   return _mm256_nmsub_ps(a, b, c);
 }
 
 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
   // CHECK-LABEL: test_mm256_nmsub_pd
-  // CHECK: @llvm.x86.fma.vfnmsub.pd.256
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> [[NEG2]])
   return _mm256_nmsub_pd(a, b, c);
 }
 
@@ -185,12 +211,14 @@
 
 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
   // CHECK-LABEL: test_mm256_msubadd_ps
-  // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
+  // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.+}}, <8 x float> [[NEG]])
   return _mm256_msubadd_ps(a, b, c);
 }
 
 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
   // CHECK-LABEL: test_mm256_msubadd_pd
-  // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
+  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
   return _mm256_msubadd_pd(a, b, c);
 }
diff --git a/test/CodeGen/fp16-ops.c b/test/CodeGen/fp16-ops.c
index c96727f..f2ed667 100644
--- a/test/CodeGen/fp16-ops.c
+++ b/test/CodeGen/fp16-ops.c
@@ -1,8 +1,9 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-linux-gnu %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
 // RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fnative-half-type %s \
 // RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
 // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fnative-half-type %s \
@@ -16,20 +17,19 @@
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
 volatile double d0;
+short s0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
 
   // Check unary ops
 
-  // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]]
-  // HALF: [[F16TOF32:fpext half]]
+  // NOTNATIVE: [[F16TOF32:fpext half]]
   // CHECK: fptoui float
   // NATIVE-HALF: fptoui half
   test = (h0);
   // CHECK: uitofp i32
-  // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]]
-  // HALF: [[F32TOF16:fptrunc float]]
+  // NOTNATIVE: [[F32TOF16:fptrunc float]]
   // NATIVE-HALF: uitofp i32 {{.*}} to half
   h0 = (test);
   // CHECK: [[F16TOF32]]
@@ -38,8 +38,7 @@
   test = (!h1);
   // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // NOHALF: [[F32TOF16]]
-  // HALF: [[F32TOF16]]
+  // NOTNATIVE: [[F32TOF16]]
   // NATIVE-HALF: fsub half
   h1 = -h1;
   // CHECK: [[F16TOF32]]
@@ -76,8 +75,6 @@
   // NATIVE-HALF: fmul half
   h1 = h0 * h2;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F32TOF16]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -107,7 +104,6 @@
   // NATIVE-HALF: fdiv half
   h1 = (h0 / h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -137,7 +133,6 @@
   // NATIVE-HALF: fadd half
   h1 = (h2 + h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -167,7 +162,6 @@
   // NATIVE-HALF: fsub half
   h1 = (h2 - h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -196,7 +190,6 @@
   // NATIVE-HALF: fcmp olt half
   test = (h2 < h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp olt float
   // NATIVE-HALF: fcmp olt half
   test = (h2 < (__fp16)42.0);
@@ -225,7 +218,6 @@
   // NATIVE-HALF: fcmp ogt half
   test = (h0 > h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ogt float
   // NATIVE-HALF: fcmp ogt half
   test = ((__fp16)42.0 > h2);
@@ -254,7 +246,6 @@
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ole float
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= (__fp16)42.0);
@@ -284,7 +275,6 @@
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oge float
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= (__fp16)-2.0);
@@ -313,7 +303,6 @@
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oeq float
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == (__fp16)1.0);
@@ -342,7 +331,6 @@
   // NATIVE-HALF: fcmp une half
   test = (h1 != h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp une float
   // NATIVE-HALF: fcmp une half
   test = (h1 != (__fp16)1.0);
@@ -374,8 +362,7 @@
   h1 = (h1 ? h2 : h0);
   // Check assignments (inc. compound)
   h0 = h1;
-  // NOHALF: [[F32TOF16]]
-  // HALF: store {{.*}} half 0xHC000
+  // NOTNATIVE: store {{.*}} half 0xHC000
   // NATIVE-HALF: store {{.*}} half 0xHC000
   h0 = (__fp16)-2.0f;
   // CHECK: [[F32TOF16]]
@@ -398,7 +385,6 @@
   // NATIVE-HALF: fadd half
   h0 += h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -433,7 +419,6 @@
   // NATIVE-HALF: fsub half
   h0 -= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -468,7 +453,6 @@
   // NATIVE-HALF: fmul half
   h0 *= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -503,7 +487,6 @@
   // NATIVE-HALF: fdiv half
   h0 /= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -532,27 +515,29 @@
   h0 /= i0;
 
   // Check conversions to/from double
-  // NOHALF: call i16 @llvm.convert.to.fp16.f64(
-  // HALF: fptrunc double {{.*}} to half
+  // NOTNATIVE: fptrunc double {{.*}} to half
   // NATIVE-HALF: fptrunc double {{.*}} to half
   h0 = d0;
 
   // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
-  // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
-  // HALF: fptrunc float [[MID]] to half
+  // NOTNATIVE: fptrunc float [[MID]] to half
   // NATIVE-HALF: [[MID:%.*]] = fptrunc double {{%.*}} to float
   // NATIVE-HALF: fptrunc float {{.*}} to half
   h0 = (float)d0;
 
-  // NOHALF: call double @llvm.convert.from.fp16.f64(
-  // HALF: fpext half {{.*}} to double
+  // NOTNATIVE: fpext half {{.*}} to double
   // NATIVE-HALF: fpext half {{.*}} to double
   d0 = h0;
 
-  // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
-  // HALF: [[MID:%.*]] = fpext half {{.*}} to float
+  // NOTNATIVE: [[MID:%.*]] = fpext half {{.*}} to float
   // CHECK: fpext float [[MID]] to double
   // NATIVE-HALF: [[MID:%.*]] = fpext half {{.*}} to float
   // NATIVE-HALF: fpext float [[MID]] to double
   d0 = (float)h0;
+
+  // NOTNATIVE: [[V1:%.*]] = load i16, i16* @s0
+  // NOTNATIVE: [[CONV:%.*]] = sitofp i16 [[V1]] to float
+  // NOTNATIVE: [[TRUNC:%.*]] = fptrunc float [[CONV]] to half
+  // NOTNATIVE: store volatile half [[TRUNC]], half* @h0
+  h0 = s0;
 }
diff --git a/test/CodeGen/fp16vec-ops.c b/test/CodeGen/fp16vec-ops.c
index a99be41..2eb75a4 100644
--- a/test/CodeGen/fp16vec-ops.c
+++ b/test/CodeGen/fp16vec-ops.c
@@ -1,6 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple arm64-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -triple armv7-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.13 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
 
 typedef __fp16 half4 __attribute__ ((vector_size (8)));
 typedef short short4 __attribute__ ((vector_size (8)));
diff --git a/test/CodeGen/gfni-builtins.c b/test/CodeGen/gfni-builtins.c
new file mode 100644
index 0000000..95cfd4f
--- /dev/null
+++ b/test/CodeGen/gfni-builtins.c
@@ -0,0 +1,182 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+
+#include <immintrin.h>
+
+__m128i test_mm_gf2p8affineinv_epi64_epi8(__m128i A, __m128i B) {
+  // SSE-LABEL: @test_mm_gf2p8affineinv_epi64_epi8
+  // SSE: @llvm.x86.vgf2p8affineinvqb.128
+  return _mm_gf2p8affineinv_epi64_epi8(A, B, 1);
+}
+
+__m128i test_mm_gf2p8affine_epi64_epi8(__m128i A, __m128i B) {
+  // SSE-LABEL: @test_mm_gf2p8affine_epi64_epi8
+  // SSE: @llvm.x86.vgf2p8affineqb.128
+  return _mm_gf2p8affine_epi64_epi8(A, B, 1);
+}
+
+__m128i test_mm_gf2p8mul_epi8(__m128i A, __m128i B) {
+  // SSE-LABEL: @test_mm_gf2p8mul_epi8
+  // SSE: @llvm.x86.vgf2p8mulb.128
+  return _mm_gf2p8mul_epi8(A, B);
+}
+
+#if defined(AVX) || defined(AVX512)
+__m256i test_mm256_gf2p8affineinv_epi64_epi8(__m256i A, __m256i B) {
+  // AVX-LABEL: @test_mm256_gf2p8affineinv_epi64_epi8
+  // AVX: @llvm.x86.vgf2p8affineinvqb.256
+  return _mm256_gf2p8affineinv_epi64_epi8(A, B, 1);
+}
+
+__m256i test_mm256_gf2p8affine_epi64_epi8(__m256i A, __m256i B) {
+  // AVX-LABEL: @test_mm256_gf2p8affine_epi64_epi8
+  // AVX: @llvm.x86.vgf2p8affineqb.256
+  return _mm256_gf2p8affine_epi64_epi8(A, B, 1);
+}
+
+__m256i test_mm256_gf2p8mul_epi8(__m256i A, __m256i B) {
+  // AVX-LABEL: @test_mm256_gf2p8mul_epi8
+  // AVX: @llvm.x86.vgf2p8mulb.256
+  return _mm256_gf2p8mul_epi8(A, B);
+}
+#endif // AVX
+
+#ifdef AVX512
+__m512i test_mm512_gf2p8affineinv_epi64_epi8(__m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_gf2p8affineinv_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineinvqb.512
+  return _mm512_gf2p8affineinv_epi64_epi8(A, B, 1);
+}
+
+__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineinvqb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+}
+
+__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineinvqb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
+}
+
+__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8
+  // AVX256: @llvm.x86.vgf2p8affineinvqb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+}
+
+__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8
+  // AVX256: @llvm.x86.vgf2p8affineinvqb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
+}
+
+__m128i test_mm_mask_gf2p8affineinv_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
+  // AVX512-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineinvqb.128
+  // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+}
+
+__m128i test_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
+  // AVX512-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineinvqb.128
+  // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
+}
+
+__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineqb.512
+  return _mm512_gf2p8affine_epi64_epi8(A, B, 1);
+}
+
+__m512i test_mm512_mask_gf2p8affine_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineqb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
+}
+
+__m512i test_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineqb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
+}
+
+__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8
+  // AVX256: @llvm.x86.vgf2p8affineqb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
+}
+
+__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8
+  // AVX256: @llvm.x86.vgf2p8affineqb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
+}
+
+__m128i test_mm_mask_gf2p8affine_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
+  // AVX512-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineqb.128
+  // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
+}
+
+__m128i test_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
+  // AVX512-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8
+  // AVX512: @llvm.x86.vgf2p8affineqb.128
+  // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
+}
+
+__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_gf2p8mul_epi8
+  // AVX512: @llvm.x86.vgf2p8mulb.512
+  return _mm512_gf2p8mul_epi8(A, B);
+}
+
+__m512i test_mm512_mask_gf2p8mul_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_mask_gf2p8mul_epi8
+  // AVX512: @llvm.x86.vgf2p8mulb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_mask_gf2p8mul_epi8(S, U, A, B);
+}
+
+__m512i test_mm512_maskz_gf2p8mul_epi8(__mmask64 U, __m512i A, __m512i B) {
+  // AVX512-LABEL: @test_mm512_maskz_gf2p8mul_epi8
+  // AVX512: @llvm.x86.vgf2p8mulb.512
+  // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+  return _mm512_maskz_gf2p8mul_epi8(U, A, B);
+}
+
+__m256i test_mm256_mask_gf2p8mul_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_mask_gf2p8mul_epi8
+  // AVX256: @llvm.x86.vgf2p8mulb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_mask_gf2p8mul_epi8(S, U, A, B);
+}
+
+__m256i test_mm256_maskz_gf2p8mul_epi8(__mmask32 U, __m256i A, __m256i B) {
+  // AVX256-LABEL: @test_mm256_maskz_gf2p8mul_epi8
+  // AVX256: @llvm.x86.vgf2p8mulb.256
+  // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+  return _mm256_maskz_gf2p8mul_epi8(U, A, B);
+}
+
+__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
+  // AVX512-LABEL: @test_mm_mask_gf2p8mul_epi8
+  // AVX512: @llvm.x86.vgf2p8mulb.128
+  // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+  return _mm_mask_gf2p8mul_epi8(S, U, A, B);
+}
+#endif // AVX512
diff --git a/test/CodeGen/init.c b/test/CodeGen/init.c
index 5d08672..9924662 100644
--- a/test/CodeGen/init.c
+++ b/test/CodeGen/init.c
@@ -8,8 +8,9 @@
 // CHECK-DAG: %struct.M = type { [2 x %struct.I] }
 // CHECK-DAG: %struct.I = type { [3 x i32] }
 
-// CHECK: [1 x %struct.M] [%struct.M { [2 x %struct.I] [%struct.I { [3 x i32] [i32 4, i32 4, i32 0] }, %struct.I { [3 x i32] [i32 4, i32 4, i32 5] }] }],
-// CHECK: [2 x [3 x i32]] {{[[][[]}}3 x i32] [i32 2222, i32 2222, i32 0], [3 x i32] [i32 2222, i32 2222, i32 3333]],
+// CHECK-DAG: [1 x %struct.M] [%struct.M { [2 x %struct.I] [%struct.I { [3 x i32] [i32 4, i32 4, i32 0] }, %struct.I { [3 x i32] [i32 4, i32 4, i32 5] }] }],
+// CHECK-DAG: [2 x [3 x i32]] {{[[][[]}}3 x i32] [i32 2222, i32 2222, i32 0], [3 x i32] [i32 2222, i32 2222, i32 3333]],
+// CHECK-DAG: [[INIT14:.*]] = private global [16 x i32] [i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 0, i32 0, i32 0, i32 0], align 4
 
 void f1() {
   // Scalars in braces.
@@ -33,8 +34,8 @@
 }
 
 // Constants
-// CHECK: @g3 = constant i32 10
-// CHECK: @f4.g4 = internal constant i32 12
+// CHECK-DAG: @g3 = constant i32 10
+// CHECK-DAG: @f4.g4 = internal constant i32 12
 const int g3 = 10;
 int f4() {
   static const int g4 = 12;
@@ -61,7 +62,7 @@
 
 
 
-// CHECK: @test7 = global{{.*}}{ i32 0, [4 x i8] c"bar\00" }
+// CHECK-DAG: @test7 = global{{.*}}{ i32 0, [4 x i8] c"bar\00" }
 // PR8217
 struct a7 {
   int  b;
@@ -151,3 +152,15 @@
   // CHECK: memcpy{{.*}}getelementptr inbounds ([3 x i8], [3 x i8]* @
   bar((char[3]) {""});
 }
+
+// Test that we initialize large member arrays by copying from a global and not
+// with a series of stores.
+struct S14 { int a[16]; };
+
+void test14(struct S14 *s14) {
+// CHECK-LABEL: @test14
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 {{.*}}, i8* align 4 {{.*}} [[INIT14]] {{.*}}, i32 64, i1 false)
+// CHECK-NOT: store
+// CHECK: ret void
+  *s14 = (struct S14) { { [5 ... 11] = 17 } };
+}
diff --git a/test/CodeGen/inline.c b/test/CodeGen/inline.c
index e17b251..ea6e9d7 100644
--- a/test/CodeGen/inline.c
+++ b/test/CodeGen/inline.c
@@ -61,13 +61,13 @@
 // RUN: %clang_cc1 %s -triple i386-pc-win32 -O1 -disable-llvm-passes -emit-llvm -o - -std=c99 | FileCheck %s --check-prefix=CHECK4
 // RUN: %clang_cc1 %s -triple i386-pc-win32 -fexperimental-new-pass-manager -O1 -disable-llvm-passes -emit-llvm -o - -std=c99 | FileCheck %s --check-prefix=CHECK4
 // CHECK4-NOT: define weak_odr void @_Exit(
-// CHECK4-LABEL: define weak_odr i32 @ei()
-// CHECK4-LABEL: define i32 @bar()
+// CHECK4-LABEL: define weak_odr dso_local i32 @ei()
+// CHECK4-LABEL: define dso_local i32 @bar()
 // CHECK4-NOT: unreferenced1
-// CHECK4-LABEL: define weak_odr void @unreferenced2()
-// CHECK4-LABEL: define void @gnu_inline()
-// CHECK4-LABEL: define linkonce_odr i32 @foo()
-// CHECK4-LABEL: define available_externally void @gnu_ei_inline()
+// CHECK4-LABEL: define weak_odr dso_local void @unreferenced2()
+// CHECK4-LABEL: define dso_local void @gnu_inline()
+// CHECK4-LABEL: define linkonce_odr dso_local i32 @foo()
+// CHECK4-LABEL: define available_externally dso_local void @gnu_ei_inline()
 
 __attribute__((noreturn)) void __cdecl _exit(int _Code);
 __inline void __cdecl _Exit(int status) { _exit(status); }
diff --git a/test/CodeGen/instrument-functions.c b/test/CodeGen/instrument-functions.c
index 454dc4d..c075c39 100644
--- a/test/CodeGen/instrument-functions.c
+++ b/test/CodeGen/instrument-functions.c
@@ -1,18 +1,34 @@
-// RUN: %clang_cc1 -S -debug-info-kind=standalone -emit-llvm -o - %s -finstrument-functions | FileCheck %s
+// RUN: %clang_cc1 -S -debug-info-kind=standalone -emit-llvm -o - %s -finstrument-functions -disable-llvm-passes | FileCheck %s
+// RUN: %clang_cc1 -S -debug-info-kind=standalone -emit-llvm -o - %s -finstrument-function-entry-bare -disable-llvm-passes | FileCheck -check-prefix=BARE %s
 
-// CHECK: @test1
 int test1(int x) {
-// CHECK: call void @__cyg_profile_func_enter({{.*}}, !dbg
-// CHECK: call void @__cyg_profile_func_exit({{.*}}, !dbg
+// CHECK: @test1(i32 {{.*}}%x) #[[ATTR1:[0-9]+]]
 // CHECK: ret
+
+// BARE: @test1(i32 {{.*}}%x) #[[ATTR1:[0-9]+]]
+// BARE: ret
   return x;
 }
 
-// CHECK: @test2
 int test2(int) __attribute__((no_instrument_function));
 int test2(int x) {
-// CHECK-NOT: __cyg_profile_func_enter
-// CHECK-NOT: __cyg_profile_func_exit
+// CHECK: @test2(i32 {{.*}}%x) #[[ATTR2:[0-9]+]]
 // CHECK: ret
+
+// BARE: @test2(i32 {{.*}}%x) #[[ATTR2:[0-9]+]]
+// BARE: ret
   return x;
 }
+
+// CHECK: attributes #[[ATTR1]] =
+// CHECK-SAME: "instrument-function-entry"="__cyg_profile_func_enter"
+// CHECK-SAME: "instrument-function-exit"="__cyg_profile_func_exit"
+
+// BARE: attributes #[[ATTR1]] =
+// BARE-SAME: "instrument-function-entry-inlined"="__cyg_profile_func_enter_bare"
+
+// CHECK: attributes #[[ATTR2]] =
+// CHECK-NOT: "instrument-function-entry"
+
+// BARE: attributes #[[ATTR2]] =
+// BARE-NOT: "instrument-function-entry"
diff --git a/test/CodeGen/kr-func-promote.c b/test/CodeGen/kr-func-promote.c
index 8e55dc9..122e643 100644
--- a/test/CodeGen/kr-func-promote.c
+++ b/test/CodeGen/kr-func-promote.c
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - | FileCheck %s
-// CHECK: i32 @a(i32)
 
+// CHECK: i32 @a(i32
 int a();
 int a(x) short x; {return x;}
 
+// CHECK: void @b(double
+// CHECK: %[[ADDR:.*]] = alloca float, align 4
+// CHECK: %[[TRUNC:.*]] = fptrunc double %0 to float
+// CHECK: store float %[[TRUNC]], float* %[[ADDR]], align 4
+void b();
+void b(f) float f; {}
diff --git a/test/CodeGen/le32-vaarg.c b/test/CodeGen/le32-vaarg.c
index c02af27..7e1dd8a 100644
--- a/test/CodeGen/le32-vaarg.c
+++ b/test/CodeGen/le32-vaarg.c
@@ -23,7 +23,7 @@
 // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo{{$}}
 // CHECK: store %struct.Foo [[RESULT]], %struct.Foo* [[LOC:%[a-z_0-9]+]]
 // CHECK: [[LOC2:%[a-z_0-9]+]] = bitcast {{.*}} [[LOC]] to i8*
-// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[LOC2]]
+// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* align {{[0-9]+}} [[LOC2]]
 
 void skip_struct(va_list *args) {
   va_arg(*args, struct Foo);
diff --git a/test/CodeGen/libcall-declarations.c b/test/CodeGen/libcall-declarations.c
index 8a93b40..7492995 100644
--- a/test/CodeGen/libcall-declarations.c
+++ b/test/CodeGen/libcall-declarations.c
@@ -569,50 +569,50 @@
 // CHECK-ERRNO: declare double @trunc(double) [[NUWRN]]
 // CHECK-ERRNO: declare float @truncf(float) [[NUWRN]]
 // CHECK-ERRNO: declare x86_fp80 @truncl(x86_fp80) [[NUWRN]]
-// CHECK-ERRNO: declare double @cabs(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare float @cabsf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @cacos(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @cacosh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare double @carg(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare float @cargf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @casin(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @casinf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @casinh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @catan(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @catanf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @catanh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @ccos(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @ccosh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @cexp(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NUWRN]]
+// CHECK-ERRNO: declare double @cabs(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare float @cabsf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @cacos(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @cacosh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare double @carg(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare float @cargf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @casin(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @casinf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @casinh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @catan(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @catanf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @catanh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @ccos(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @ccosh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @cexp(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NONCONST]]
 // CHECK-ERRNO: declare double @cimag(double, double) [[NUWRN]]
 // CHECK-ERRNO: declare float @cimagf(<2 x float>) [[NUWRN]]
 // CHECK-ERRNO: declare { double, double } @conj(double, double) [[NUWRN]]
 // CHECK-ERRNO: declare <2 x float> @conjf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @clog(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @clogf(<2 x float>) [[NUWRN]]
+// CHECK-ERRNO: declare { double, double } @clog(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @clogf(<2 x float>) [[NONCONST]]
 // CHECK-ERRNO: declare { double, double } @cproj(double, double) [[NUWRN]]
 // CHECK-ERRNO: declare <2 x float> @cprojf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @cpow(double, double, double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NUWRN]]
+// CHECK-ERRNO: declare { double, double } @cpow(double, double, double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NONCONST]]
 // CHECK-ERRNO: declare double @creal(double, double) [[NUWRN]]
 // CHECK-ERRNO: declare float @crealf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @csin(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @csinf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @csinh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @csqrt(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @ctan(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NUWRN]]
-// CHECK-ERRNO: declare { double, double } @ctanh(double, double) [[NUWRN]]
-// CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NUWRN]]
+// CHECK-ERRNO: declare { double, double } @csin(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @csinf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @csinh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @csqrt(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @ctan(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NONCONST]]
+// CHECK-ERRNO: declare { double, double } @ctanh(double, double) [[NONCONST]]
+// CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NONCONST]]
 
 // CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} }
 // CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} }
diff --git a/test/CodeGen/libcalls.c b/test/CodeGen/libcalls.c
index 3a8207b..1b314f7 100644
--- a/test/CodeGen/libcalls.c
+++ b/test/CodeGen/libcalls.c
@@ -6,29 +6,28 @@
 // CHECK-NO-LABEL: define void @test_sqrt
 // CHECK-FAST-LABEL: define void @test_sqrt
 void test_sqrt(float a0, double a1, long double a2) {
-  // Following llvm-gcc's lead, we never emit these as intrinsics;
-  // no-math-errno isn't good enough.  We could probably use intrinsics
-  // with appropriate guards if it proves worthwhile.
-
   // CHECK-YES: call float @sqrtf
-  // CHECK-NO: call float @sqrtf
+  // CHECK-NO: call float @llvm.sqrt.f32(float
+  // CHECK-FAST: call float @llvm.sqrt.f32(float
   float l0 = sqrtf(a0);
 
   // CHECK-YES: call double @sqrt
-  // CHECK-NO: call double @sqrt
+  // CHECK-NO: call double @llvm.sqrt.f64(double
+  // CHECK-FAST: call double @llvm.sqrt.f64(double
   double l1 = sqrt(a1);
 
   // CHECK-YES: call x86_fp80 @sqrtl
-  // CHECK-NO: call x86_fp80 @sqrtl
+  // CHECK-NO: call x86_fp80 @llvm.sqrt.f80(x86_fp80
+  // CHECK-FAST: call x86_fp80 @llvm.sqrt.f80(x86_fp80
   long double l2 = sqrtl(a2);
 }
 
 // CHECK-YES: declare float @sqrtf(float)
 // CHECK-YES: declare double @sqrt(double)
 // CHECK-YES: declare x86_fp80 @sqrtl(x86_fp80)
-// CHECK-NO: declare float @sqrtf(float) [[NUW_RN:#[0-9]+]]
-// CHECK-NO: declare double @sqrt(double) [[NUW_RN]]
-// CHECK-NO: declare x86_fp80 @sqrtl(x86_fp80) [[NUW_RN]]
+// CHECK-NO: declare float @llvm.sqrt.f32(float)
+// CHECK-NO: declare double @llvm.sqrt.f64(double)
+// CHECK-NO: declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
 // CHECK-FAST: declare float @llvm.sqrt.f32(float)
 // CHECK-FAST: declare double @llvm.sqrt.f64(double)
 // CHECK-FAST: declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
@@ -59,22 +58,22 @@
 // CHECK-YES-LABEL: define void @test_fma
 // CHECK-NO-LABEL: define void @test_fma
 void test_fma(float a0, double a1, long double a2) {
-    // CHECK-YES: call float @llvm.fma.f32
+    // CHECK-YES: call float @fmaf
     // CHECK-NO: call float @llvm.fma.f32
     float l0 = fmaf(a0, a0, a0);
 
-    // CHECK-YES: call double @llvm.fma.f64
+    // CHECK-YES: call double @fma
     // CHECK-NO: call double @llvm.fma.f64
     double l1 = fma(a1, a1, a1);
 
-    // CHECK-YES: call x86_fp80 @llvm.fma.f80
+    // CHECK-YES: call x86_fp80 @fmal
     // CHECK-NO: call x86_fp80 @llvm.fma.f80
     long double l2 = fmal(a2, a2, a2);
 }
 
-// CHECK-YES: declare float @llvm.fma.f32(float, float, float) [[NUW_RN:#[0-9]+]]
-// CHECK-YES: declare double @llvm.fma.f64(double, double, double) [[NUW_RN]]
-// CHECK-YES: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[NUW_RN]]
+// CHECK-YES: declare float @fmaf(float, float, float)
+// CHECK-YES: declare double @fma(double, double, double)
+// CHECK-YES: declare x86_fp80 @fmal(x86_fp80, x86_fp80, x86_fp80)
 // CHECK-NO: declare float @llvm.fma.f32(float, float, float) [[NUW_RN2:#[0-9]+]]
 // CHECK-NO: declare double @llvm.fma.f64(double, double, double) [[NUW_RN2]]
 // CHECK-NO: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[NUW_RN2]]
@@ -86,7 +85,7 @@
   double atan_ = atan(d);
   long double atanl_ = atanl(ld);
   float atanf_ = atanf(f);
-// CHECK-NO: declare double @atan(double) [[NUW_RN]]
+// CHECK-NO: declare double @atan(double) [[NUW_RN:#[0-9]+]]
 // CHECK-NO: declare x86_fp80 @atanl(x86_fp80) [[NUW_RN]]
 // CHECK-NO: declare float @atanf(float) [[NUW_RN]]
 // CHECK-YES-NOT: declare double @atan(double) [[NUW_RN]]
@@ -106,9 +105,9 @@
   double exp_ = exp(d);
   long double expl_ = expl(ld);
   float expf_ = expf(f);
-// CHECK-NO: declare double @exp(double) [[NUW_RN]]
-// CHECK-NO: declare x86_fp80 @expl(x86_fp80) [[NUW_RN]]
-// CHECK-NO: declare float @expf(float) [[NUW_RN]]
+// CHECK-NO: declare double @llvm.exp.f64(double) [[NUW_RNI]]
+// CHECK-NO: declare x86_fp80 @llvm.exp.f80(x86_fp80) [[NUW_RNI]]
+// CHECK-NO: declare float @llvm.exp.f32(float) [[NUW_RNI]]
 // CHECK-YES-NOT: declare double @exp(double) [[NUW_RN]]
 // CHECK-YES-NOT: declare x86_fp80 @expl(x86_fp80) [[NUW_RN]]
 // CHECK-YES-NOT: declare float @expf(float) [[NUW_RN]]
@@ -116,15 +115,13 @@
   double log_ = log(d);
   long double logl_ = logl(ld);
   float logf_ = logf(f);
-// CHECK-NO: declare double @log(double) [[NUW_RN]]
-// CHECK-NO: declare x86_fp80 @logl(x86_fp80) [[NUW_RN]]
-// CHECK-NO: declare float @logf(float) [[NUW_RN]]
+// CHECK-NO: declare double @llvm.log.f64(double) [[NUW_RNI]]
+// CHECK-NO: declare x86_fp80 @llvm.log.f80(x86_fp80) [[NUW_RNI]]
+// CHECK-NO: declare float @llvm.log.f32(float) [[NUW_RNI]]
 // CHECK-YES-NOT: declare double @log(double) [[NUW_RN]]
 // CHECK-YES-NOT: declare x86_fp80 @logl(x86_fp80) [[NUW_RN]]
 // CHECK-YES-NOT: declare float @logf(float) [[NUW_RN]]
 }
 
-// CHECK-YES: attributes [[NUW_RN]] = { nounwind readnone speculatable }
-
-// CHECK-NO: attributes [[NUW_RN]] = { nounwind readnone{{.*}} }
-// CHECK-NO: attributes [[NUW_RNI]] = { nounwind readnone speculatable }
+// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind readnone{{.*}} }
+// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nounwind readnone speculatable }
diff --git a/test/CodeGen/mangle-ms.c b/test/CodeGen/mangle-ms.c
index 042c72e..0b05b7b 100644
--- a/test/CodeGen/mangle-ms.c
+++ b/test/CodeGen/mangle-ms.c
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s
 
-// CHECK: define void @"\01?f@@$$J0YAXP6AX@Z@Z"
+// CHECK: define dso_local void @"\01?f@@$$J0YAXP6AX@Z@Z"
 __attribute__((overloadable)) void f(void (*x)()) {}
 
-// CHECK: define void @f
+// CHECK: define dso_local void @f
 void f(void (*x)(int)) {}
 
-// CHECK: define void @g
+// CHECK: define dso_local void @g
 void g(void (*x)(int)) {}
 
-// CHECK: define void @"\01?g@@$$J0YAXP6AX@Z@Z"
+// CHECK: define dso_local void @"\01?g@@$$J0YAXP6AX@Z@Z"
 __attribute__((overloadable)) void g(void (*x)()) {}
diff --git a/test/CodeGen/mangle-windows-rtd.c b/test/CodeGen/mangle-windows-rtd.c
index fc6f309..a22cba2 100644
--- a/test/CodeGen/mangle-windows-rtd.c
+++ b/test/CodeGen/mangle-windows-rtd.c
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -emit-llvm -mrtd %s -o - -triple=i386-mingw32 | FileCheck %s
 
 void f1(void) {}
-// CHECK: define x86_stdcallcc void @"\01_f1@0"
+// CHECK: define dso_local x86_stdcallcc void @"\01_f1@0"
 
 void __stdcall f2(void) {}
-// CHECK: define x86_stdcallcc void @"\01_f2@0"
+// CHECK: define dso_local x86_stdcallcc void @"\01_f2@0"
 
 void __fastcall f3(void) {}
-// CHECK: define x86_fastcallcc void @"\01@f3@0"
+// CHECK: define dso_local x86_fastcallcc void @"\01@f3@0"
diff --git a/test/CodeGen/mangle-windows.c b/test/CodeGen/mangle-windows.c
index db39425..6dd14d9 100644
--- a/test/CodeGen/mangle-windows.c
+++ b/test/CodeGen/mangle-windows.c
@@ -11,73 +11,73 @@
 // ELF64: target datalayout = "e-m:e-{{.*}}"
 
 void __stdcall f1(void) {}
-// CHECK: define x86_stdcallcc void @"\01_f1@0"
-// X64: define void @f1(
+// CHECK: define dso_local x86_stdcallcc void @"\01_f1@0"
+// X64: define dso_local void @f1(
 // ELF32: define x86_stdcallcc void @"\01_f1@0"
 // ELF64: define void @f1(
 
 void __fastcall f2(void) {}
-// CHECK: define x86_fastcallcc void @"\01@f2@0"
-// X64: define void @f2(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f2@0"
+// X64: define dso_local void @f2(
 // ELF32: define x86_fastcallcc void @"\01@f2@0"
 // ELF64: define void @f2(
 
 void __stdcall f3() {}
-// CHECK: define x86_stdcallcc void @"\01_f3@0"
-// X64: define void @f3(
+// CHECK: define dso_local x86_stdcallcc void @"\01_f3@0"
+// X64: define dso_local void @f3(
 
 void __fastcall f4(char a) {}
-// CHECK: define x86_fastcallcc void @"\01@f4@4"
-// X64: define void @f4(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f4@4"
+// X64: define dso_local void @f4(
 
 void __fastcall f5(short a) {}
-// CHECK: define x86_fastcallcc void @"\01@f5@4"
-// X64: define void @f5(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f5@4"
+// X64: define dso_local void @f5(
 
 void __fastcall f6(int a) {}
-// CHECK: define x86_fastcallcc void @"\01@f6@4"
-// X64: define void @f6(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f6@4"
+// X64: define dso_local void @f6(
 
 void __fastcall f7(long a) {}
-// CHECK: define x86_fastcallcc void @"\01@f7@4"
-// X64: define void @f7(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f7@4"
+// X64: define dso_local void @f7(
 
 void __fastcall f8(long long a) {}
-// CHECK: define x86_fastcallcc void @"\01@f8@8"
-// X64: define void @f8(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f8@8"
+// X64: define dso_local void @f8(
 
 void __fastcall f9(long long a, char b, char c, short d) {}
-// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8 signext %c, i16 signext %d)
-// X64: define void @f9(
+// CHECK: define dso_local x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8 signext %c, i16 signext %d)
+// X64: define dso_local void @f9(
 
 void f12(void) {}
-// CHECK: define void @f12(
-// X64: define void @f12(
+// CHECK: define dso_local void @f12(
+// X64: define dso_local void @f12(
 
 void __vectorcall v1(void) {}
-// CHECK: define x86_vectorcallcc void @"\01v1@@0"(
-// X64: define x86_vectorcallcc void @"\01v1@@0"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v1@@0"(
+// X64: define dso_local x86_vectorcallcc void @"\01v1@@0"(
 // ELF32: define x86_vectorcallcc void @"\01v1@@0"(
 // ELF64: define x86_vectorcallcc void @"\01v1@@0"(
 
 void __vectorcall v2(char a) {}
-// CHECK: define x86_vectorcallcc void @"\01v2@@4"(
-// X64: define x86_vectorcallcc void @"\01v2@@8"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v2@@4"(
+// X64: define dso_local x86_vectorcallcc void @"\01v2@@8"(
 // ELF32: define x86_vectorcallcc void @"\01v2@@4"(
 // ELF64: define x86_vectorcallcc void @"\01v2@@8"(
 
 void __vectorcall v3(short a) {}
-// CHECK: define x86_vectorcallcc void @"\01v3@@4"(
-// X64: define x86_vectorcallcc void @"\01v3@@8"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v3@@4"(
+// X64: define dso_local x86_vectorcallcc void @"\01v3@@8"(
 
 void __vectorcall v4(int a) {}
-// CHECK: define x86_vectorcallcc void @"\01v4@@4"(
-// X64: define x86_vectorcallcc void @"\01v4@@8"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v4@@4"(
+// X64: define dso_local x86_vectorcallcc void @"\01v4@@8"(
 
 void __vectorcall v5(long long a) {}
-// CHECK: define x86_vectorcallcc void @"\01v5@@8"(
-// X64: define x86_vectorcallcc void @"\01v5@@8"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v5@@8"(
+// X64: define dso_local x86_vectorcallcc void @"\01v5@@8"(
 
 void __vectorcall v6(char a, char b) {}
-// CHECK: define x86_vectorcallcc void @"\01v6@@8"(
-// X64: define x86_vectorcallcc void @"\01v6@@16"(
+// CHECK: define dso_local x86_vectorcallcc void @"\01v6@@8"(
+// X64: define dso_local x86_vectorcallcc void @"\01v6@@16"(
diff --git a/test/CodeGen/math-builtins.c b/test/CodeGen/math-builtins.c
new file mode 100644
index 0000000..89be56d
--- /dev/null
+++ b/test/CodeGen/math-builtins.c
@@ -0,0 +1,586 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm              %s | FileCheck %s -check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s -check-prefix=HAS_ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown-gnu -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_GNU
+// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_WIN
+
+// Test attributes and codegen of math builtins.
+
+void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
+  f = __builtin_fmod(f,f);    f = __builtin_fmodf(f,f);   f =  __builtin_fmodl(f,f);
+
+// NO__ERRNO: frem double
+// NO__ERRNO: frem float
+// NO__ERRNO: frem x86_fp80
+// HAS_ERRNO: declare double @fmod(double, double) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @fmodf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fmodl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_atan2(f,f);    __builtin_atan2f(f,f) ;  __builtin_atan2l(f, f);
+
+// NO__ERRNO: declare double @atan2(double, double) [[READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @atan2f(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atan2(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atan2f(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_copysign(f,f); __builtin_copysignf(f,f); __builtin_copysignl(f,f); __builtin_copysignf128(f,f);
+
+// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+
+  __builtin_fabs(f);       __builtin_fabsf(f);      __builtin_fabsl(f); __builtin_fabsf128(f);
+
+// NO__ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC]]
+
+  __builtin_frexp(f,i);    __builtin_frexpf(f,i);   __builtin_frexpl(f,i);
+
+// NO__ERRNO: declare double @frexp(double, i32*) [[NOT_READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @frexpf(float, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @frexpl(x86_fp80, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @frexp(double, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @frexpf(float, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @frexpl(x86_fp80, i32*) [[NOT_READNONE]]
+
+  __builtin_huge_val();    __builtin_huge_valf();   __builtin_huge_vall(); __builtin_huge_valf128();
+
+// NO__ERRNO-NOT: .huge
+// NO__ERRNO-NOT: @huge
+// HAS_ERRNO-NOT: .huge
+// HAS_ERRNO-NOT: @huge
+
+  __builtin_inf();    __builtin_inff();   __builtin_infl(); __builtin_inff128();
+
+// NO__ERRNO-NOT: .inf
+// NO__ERRNO-NOT: @inf
+// HAS_ERRNO-NOT: .inf
+// HAS_ERRNO-NOT: @inf
+
+  __builtin_ldexp(f,f);    __builtin_ldexpf(f,f);   __builtin_ldexpl(f,f);  
+
+// NO__ERRNO: declare double @ldexp(double, i32) [[READNONE]]
+// NO__ERRNO: declare float @ldexpf(float, i32) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[READNONE]]
+// HAS_ERRNO: declare double @ldexp(double, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @ldexpf(float, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NOT_READNONE]]
+
+  __builtin_modf(f,d);       __builtin_modff(f,fp);      __builtin_modfl(f,l); 
+
+// NO__ERRNO: declare double @modf(double, double*) [[NOT_READNONE]]
+// NO__ERRNO: declare float @modff(float, float*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @modfl(x86_fp80, x86_fp80*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @modf(double, double*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @modff(float, float*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @modfl(x86_fp80, x86_fp80*) [[NOT_READNONE]]
+
+  __builtin_nan(c);        __builtin_nanf(c);       __builtin_nanl(c); __builtin_nanf128(c);
+
+// NO__ERRNO: declare double @nan(i8*) [[READNONE]]
+// NO__ERRNO: declare float @nanf(i8*) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nanl(i8*) [[READNONE]]
+// NO__ERRNO: declare fp128 @nanf128(i8*) [[READNONE]]
+// HAS_ERRNO: declare double @nan(i8*) [[READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @nanf(i8*) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nanl(i8*) [[READNONE]]
+// HAS_ERRNO: declare fp128 @nanf128(i8*) [[READNONE]]
+
+  __builtin_nans(c);        __builtin_nansf(c);       __builtin_nansl(c); __builtin_nansf128(c);
+
+// NO__ERRNO: declare double @nans(i8*) [[READNONE]]
+// NO__ERRNO: declare float @nansf(i8*) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nansl(i8*) [[READNONE]]
+// NO__ERRNO: declare fp128 @nansf128(i8*) [[READNONE]]
+// HAS_ERRNO: declare double @nans(i8*) [[READNONE]]
+// HAS_ERRNO: declare float @nansf(i8*) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nansl(i8*) [[READNONE]]
+// HAS_ERRNO: declare fp128 @nansf128(i8*) [[READNONE]]
+
+  __builtin_pow(f,f);        __builtin_powf(f,f);       __builtin_powl(f,f);
+
+// NO__ERRNO: declare double @llvm.pow.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.pow.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @pow(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @powf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_powi(f,f);        __builtin_powif(f,f);       __builtin_powil(f,f);
+
+// NO__ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
+
+  /* math */
+  __builtin_acos(f);       __builtin_acosf(f);      __builtin_acosl(f);
+
+// NO__ERRNO: declare double @acos(double) [[READNONE]]
+// NO__ERRNO: declare float @acosf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @acosl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @acos(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @acosf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @acosl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_acosh(f);      __builtin_acoshf(f);     __builtin_acoshl(f);  
+
+// NO__ERRNO: declare double @acosh(double) [[READNONE]]
+// NO__ERRNO: declare float @acoshf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @acoshl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @acosh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @acoshf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @acoshl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_asin(f);       __builtin_asinf(f);      __builtin_asinl(f); 
+
+// NO__ERRNO: declare double @asin(double) [[READNONE]]
+// NO__ERRNO: declare float @asinf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @asinl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @asin(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @asinf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @asinl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_asinh(f);      __builtin_asinhf(f);     __builtin_asinhl(f);
+
+// NO__ERRNO: declare double @asinh(double) [[READNONE]]
+// NO__ERRNO: declare float @asinhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @asinhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @asinh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @asinhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @asinhl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_atan(f);       __builtin_atanf(f);      __builtin_atanl(f);
+
+// NO__ERRNO: declare double @atan(double) [[READNONE]]
+// NO__ERRNO: declare float @atanf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atanl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atan(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atanf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atanl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_atanh(f);      __builtin_atanhf(f);     __builtin_atanhl(f); 
+
+// NO__ERRNO: declare double @atanh(double) [[READNONE]]
+// NO__ERRNO: declare float @atanhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atanhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atanh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atanhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atanhl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_cbrt(f);       __builtin_cbrtf(f);      __builtin_cbrtl(f);
+
+// NO__ERRNO: declare double @cbrt(double) [[READNONE]]
+// NO__ERRNO: declare float @cbrtf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @cbrt(double) [[READNONE]]
+// HAS_ERRNO: declare float @cbrtf(float) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
+
+  __builtin_ceil(f);       __builtin_ceilf(f);      __builtin_ceill(f);
+
+// NO__ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_cos(f);        __builtin_cosf(f);       __builtin_cosl(f); 
+
+// NO__ERRNO: declare double @llvm.cos.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.cos.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.cos.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @cos(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @cosf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cosl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_cosh(f);       __builtin_coshf(f);      __builtin_coshl(f);
+
+// NO__ERRNO: declare double @cosh(double) [[READNONE]]
+// NO__ERRNO: declare float @coshf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @coshl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @cosh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @coshf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @coshl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_erf(f);        __builtin_erff(f);       __builtin_erfl(f);
+
+// NO__ERRNO: declare double @erf(double) [[READNONE]]
+// NO__ERRNO: declare float @erff(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @erfl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @erf(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @erff(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @erfl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_erfc(f);       __builtin_erfcf(f);      __builtin_erfcl(f);
+
+// NO__ERRNO: declare double @erfc(double) [[READNONE]]
+// NO__ERRNO: declare float @erfcf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @erfcl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @erfc(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @erfcf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @erfcl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_exp(f);        __builtin_expf(f);       __builtin_expl(f);
+
+// NO__ERRNO: declare double @llvm.exp.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.exp.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @exp(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @expf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @expl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_exp2(f);       __builtin_exp2f(f);      __builtin_exp2l(f); 
+
+// NO__ERRNO: declare double @llvm.exp2.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.exp2.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp2.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @exp2(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @exp2f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @exp2l(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_expm1(f);      __builtin_expm1f(f);     __builtin_expm1l(f);
+
+// NO__ERRNO: declare double @expm1(double) [[READNONE]]
+// NO__ERRNO: declare float @expm1f(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @expm1l(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @expm1(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @expm1f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @expm1l(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_fdim(f,f);       __builtin_fdimf(f,f);      __builtin_fdiml(f,f);
+
+// NO__ERRNO: declare double @fdim(double, double) [[READNONE]]
+// NO__ERRNO: declare float @fdimf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @fdim(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @fdimf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_floor(f);      __builtin_floorf(f);     __builtin_floorl(f);
+
+// NO__ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_fma(f,f,f);        __builtin_fmaf(f,f,f);       __builtin_fmal(f,f,f);
+
+// NO__ERRNO: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @fma(double, double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @fmaf(float, float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fmal(x86_fp80, x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+// On GNU or Win, fma never sets errno, so we can convert to the intrinsic.
+
+// HAS_ERRNO_GNU: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO_GNU: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO_GNU: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+// HAS_ERRNO_WIN: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO_WIN: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// Long double is just double on win, so no f80 use/declaration.
+// HAS_ERRNO_WIN-NOT: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
+
+  __builtin_fmax(f,f);       __builtin_fmaxf(f,f);      __builtin_fmaxl(f,f);
+
+// NO__ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_fmin(f,f);       __builtin_fminf(f,f);      __builtin_fminl(f,f);
+
+// NO__ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_hypot(f,f);      __builtin_hypotf(f,f);     __builtin_hypotl(f,f);
+
+// NO__ERRNO: declare double @hypot(double, double) [[READNONE]]
+// NO__ERRNO: declare float @hypotf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @hypot(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @hypotf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_ilogb(f);      __builtin_ilogbf(f);     __builtin_ilogbl(f); 
+
+// NO__ERRNO: declare i32 @ilogb(double) [[READNONE]]
+// NO__ERRNO: declare i32 @ilogbf(float) [[READNONE]]
+// NO__ERRNO: declare i32 @ilogbl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i32 @ilogb(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i32 @ilogbf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i32 @ilogbl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_lgamma(f);     __builtin_lgammaf(f);    __builtin_lgammal(f);
+
+// NO__ERRNO: declare double @lgamma(double) [[NOT_READNONE]]
+// NO__ERRNO: declare float @lgammaf(float) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @lgammal(x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @lgamma(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @lgammaf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @lgammal(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_llrint(f);     __builtin_llrintf(f);    __builtin_llrintl(f);
+
+// NO__ERRNO: declare i64 @llrint(double) [[READNONE]]
+// NO__ERRNO: declare i64 @llrintf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @llrintl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @llrint(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llrintf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llrintl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_llround(f);    __builtin_llroundf(f);   __builtin_llroundl(f);
+
+// NO__ERRNO: declare i64 @llround(double) [[READNONE]]
+// NO__ERRNO: declare i64 @llroundf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @llroundl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @llround(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llroundf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llroundl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_log(f);        __builtin_logf(f);       __builtin_logl(f);
+
+// NO__ERRNO: declare double @llvm.log.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @logf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @logl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_log10(f);      __builtin_log10f(f);     __builtin_log10l(f);
+
+// NO__ERRNO: declare double @llvm.log10.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log10.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log10.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log10(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log10f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log10l(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_log1p(f);      __builtin_log1pf(f);     __builtin_log1pl(f);
+
+// NO__ERRNO: declare double @log1p(double) [[READNONE]]
+// NO__ERRNO: declare float @log1pf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @log1pl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @log1p(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log1pf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log1pl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_log2(f);       __builtin_log2f(f);      __builtin_log2l(f);
+
+// NO__ERRNO: declare double @llvm.log2.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log2.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log2.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log2(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log2f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log2l(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_logb(f);       __builtin_logbf(f);      __builtin_logbl(f);
+
+// NO__ERRNO: declare double @logb(double) [[READNONE]]
+// NO__ERRNO: declare float @logbf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @logbl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @logb(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @logbf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @logbl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_lrint(f);      __builtin_lrintf(f);     __builtin_lrintl(f);
+
+// NO__ERRNO: declare i64 @lrint(double) [[READNONE]]
+// NO__ERRNO: declare i64 @lrintf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @lrintl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @lrint(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lrintf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lrintl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_lround(f);     __builtin_lroundf(f);    __builtin_lroundl(f);
+
+// NO__ERRNO: declare i64 @lround(double) [[READNONE]]
+// NO__ERRNO: declare i64 @lroundf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @lroundl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @lround(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lroundf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lroundl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_nearbyint(f);  __builtin_nearbyintf(f); __builtin_nearbyintl(f);
+
+// NO__ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_nextafter(f,f);  __builtin_nextafterf(f,f); __builtin_nextafterl(f,f);
+
+// NO__ERRNO: declare double @nextafter(double, double) [[READNONE]]
+// NO__ERRNO: declare float @nextafterf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @nextafter(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @nextafterf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_nexttoward(f,f); __builtin_nexttowardf(f,f);__builtin_nexttowardl(f,f);
+
+// NO__ERRNO: declare double @nexttoward(double, x86_fp80) [[READNONE]]
+// NO__ERRNO: declare float @nexttowardf(float, x86_fp80) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @nexttoward(double, x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @nexttowardf(float, x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_remainder(f,f);  __builtin_remainderf(f,f); __builtin_remainderl(f,f);
+
+// NO__ERRNO: declare double @remainder(double, double) [[READNONE]]
+// NO__ERRNO: declare float @remainderf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @remainderl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @remainder(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @remainderf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @remainderl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  __builtin_remquo(f,f,i);  __builtin_remquof(f,f,i); __builtin_remquol(f,f,i);
+
+// NO__ERRNO: declare double @remquo(double, double, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare float @remquof(float, float, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @remquol(x86_fp80, x86_fp80, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @remquo(double, double, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @remquof(float, float, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @remquol(x86_fp80, x86_fp80, i32*) [[NOT_READNONE]]
+
+  __builtin_rint(f);       __builtin_rintf(f);      __builtin_rintl(f);
+
+// NO__ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_round(f);      __builtin_roundf(f);     __builtin_roundl(f);
+
+// NO__ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  __builtin_scalbln(f,f);    __builtin_scalblnf(f,f);   __builtin_scalblnl(f,f);
+
+// NO__ERRNO: declare double @scalbln(double, i64) [[READNONE]]
+// NO__ERRNO: declare float @scalblnf(float, i64) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @scalblnl(x86_fp80, i64) [[READNONE]]
+// HAS_ERRNO: declare double @scalbln(double, i64) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @scalblnf(float, i64) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @scalblnl(x86_fp80, i64) [[NOT_READNONE]]
+
+  __builtin_scalbn(f,f);     __builtin_scalbnf(f,f);    __builtin_scalbnl(f,f);
+
+// NO__ERRNO: declare double @scalbn(double, i32) [[READNONE]]
+// NO__ERRNO: declare float @scalbnf(float, i32) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @scalbnl(x86_fp80, i32) [[READNONE]]
+// HAS_ERRNO: declare double @scalbn(double, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @scalbnf(float, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @scalbnl(x86_fp80, i32) [[NOT_READNONE]]
+
+  __builtin_sin(f);        __builtin_sinf(f);       __builtin_sinl(f);
+
+// NO__ERRNO: declare double @llvm.sin.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.sin.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.sin.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @sin(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sinf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sinl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_sinh(f);       __builtin_sinhf(f);      __builtin_sinhl(f);
+
+// NO__ERRNO: declare double @sinh(double) [[READNONE]]
+// NO__ERRNO: declare float @sinhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @sinhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @sinh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sinhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sinhl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_sqrt(f);       __builtin_sqrtf(f);      __builtin_sqrtl(f); 
+
+// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.sqrt.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.sqrt.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @sqrt(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sqrtf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sqrtl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_tan(f);        __builtin_tanf(f);       __builtin_tanl(f);
+
+// NO__ERRNO: declare double @tan(double) [[READNONE]]
+// NO__ERRNO: declare float @tanf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tan(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tanf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_tanh(f);       __builtin_tanhf(f);      __builtin_tanhl(f);
+
+// NO__ERRNO: declare double @tanh(double) [[READNONE]]
+// NO__ERRNO: declare float @tanhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tanhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tanh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tanhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tanhl(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_tgamma(f);     __builtin_tgammaf(f);    __builtin_tgammal(f);
+
+// NO__ERRNO: declare double @tgamma(double) [[READNONE]]
+// NO__ERRNO: declare float @tgammaf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tgammal(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tgamma(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tgammaf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tgammal(x86_fp80) [[NOT_READNONE]]
+
+  __builtin_trunc(f);      __builtin_truncf(f);     __builtin_truncl(f);
+
+// NO__ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
+};
+
+
+// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+
+// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+
+// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+
diff --git a/test/CodeGen/math-libcalls.c b/test/CodeGen/math-libcalls.c
new file mode 100644
index 0000000..39bcb44
--- /dev/null
+++ b/test/CodeGen/math-libcalls.c
@@ -0,0 +1,547 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown     -w -S -o - -emit-llvm              %s | FileCheck %s --check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown     -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown-gnu -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_GNU
+// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_WIN
+
+// Test attributes and builtin codegen of math library calls.
+
+void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
+  f = fmod(f,f);     f = fmodf(f,f);    f = fmodl(f,f);
+
+// NO__ERRNO: frem double
+// NO__ERRNO: frem float
+// NO__ERRNO: frem x86_fp80
+// HAS_ERRNO: declare double @fmod(double, double) [[NOT_READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @fmodf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fmodl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  atan2(f,f);    atan2f(f,f) ;  atan2l(f, f);
+
+// NO__ERRNO: declare double @atan2(double, double) [[READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @atan2f(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atan2(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atan2f(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  copysign(f,f); copysignf(f,f);copysignl(f,f);
+
+// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+  fabs(f);       fabsf(f);      fabsl(f);
+
+// NO__ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  frexp(f,i);    frexpf(f,i);   frexpl(f,i);
+
+// NO__ERRNO: declare double @frexp(double, i32*) [[NOT_READNONE:#[0-9]+]]
+// NO__ERRNO: declare float @frexpf(float, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @frexpl(x86_fp80, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @frexp(double, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @frexpf(float, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @frexpl(x86_fp80, i32*) [[NOT_READNONE]]
+
+  ldexp(f,f);    ldexpf(f,f);   ldexpl(f,f);  
+
+// NO__ERRNO: declare double @ldexp(double, i32) [[READNONE]]
+// NO__ERRNO: declare float @ldexpf(float, i32) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[READNONE]]
+// HAS_ERRNO: declare double @ldexp(double, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @ldexpf(float, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NOT_READNONE]]
+
+  modf(f,d);       modff(f,fp);      modfl(f,l); 
+
+// NO__ERRNO: declare double @modf(double, double*) [[NOT_READNONE]]
+// NO__ERRNO: declare float @modff(float, float*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @modfl(x86_fp80, x86_fp80*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @modf(double, double*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @modff(float, float*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @modfl(x86_fp80, x86_fp80*) [[NOT_READNONE]]
+
+  nan(c);        nanf(c);       nanl(c);  
+
+// NO__ERRNO: declare double @nan(i8*) [[READONLY:#[0-9]+]]
+// NO__ERRNO: declare float @nanf(i8*) [[READONLY]]
+// NO__ERRNO: declare x86_fp80 @nanl(i8*) [[READONLY]]
+// HAS_ERRNO: declare double @nan(i8*) [[READONLY:#[0-9]+]]
+// HAS_ERRNO: declare float @nanf(i8*) [[READONLY]]
+// HAS_ERRNO: declare x86_fp80 @nanl(i8*) [[READONLY]]
+
+  pow(f,f);        powf(f,f);       powl(f,f);
+
+// NO__ERRNO: declare double @llvm.pow.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.pow.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @pow(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @powf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  /* math */
+  acos(f);       acosf(f);      acosl(f);
+
+// NO__ERRNO: declare double @acos(double) [[READNONE]]
+// NO__ERRNO: declare float @acosf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @acosl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @acos(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @acosf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @acosl(x86_fp80) [[NOT_READNONE]]
+
+  acosh(f);      acoshf(f);     acoshl(f);  
+
+// NO__ERRNO: declare double @acosh(double) [[READNONE]]
+// NO__ERRNO: declare float @acoshf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @acoshl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @acosh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @acoshf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @acoshl(x86_fp80) [[NOT_READNONE]]
+
+  asin(f);       asinf(f);      asinl(f); 
+
+// NO__ERRNO: declare double @asin(double) [[READNONE]]
+// NO__ERRNO: declare float @asinf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @asinl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @asin(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @asinf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @asinl(x86_fp80) [[NOT_READNONE]]
+
+  asinh(f);      asinhf(f);     asinhl(f);
+
+// NO__ERRNO: declare double @asinh(double) [[READNONE]]
+// NO__ERRNO: declare float @asinhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @asinhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @asinh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @asinhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @asinhl(x86_fp80) [[NOT_READNONE]]
+
+  atan(f);       atanf(f);      atanl(f);
+
+// NO__ERRNO: declare double @atan(double) [[READNONE]]
+// NO__ERRNO: declare float @atanf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atanl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atan(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atanf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atanl(x86_fp80) [[NOT_READNONE]]
+
+  atanh(f);      atanhf(f);     atanhl(f); 
+
+// NO__ERRNO: declare double @atanh(double) [[READNONE]]
+// NO__ERRNO: declare float @atanhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @atanhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @atanh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @atanhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @atanhl(x86_fp80) [[NOT_READNONE]]
+
+  cbrt(f);       cbrtf(f);      cbrtl(f);
+
+// NO__ERRNO: declare double @cbrt(double) [[READNONE]]
+// NO__ERRNO: declare float @cbrtf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @cbrt(double) [[READNONE:#[0-9]+]]
+// HAS_ERRNO: declare float @cbrtf(float) [[READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[READNONE]]
+
+  ceil(f);       ceilf(f);      ceill(f);
+
+// NO__ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  cos(f);        cosf(f);       cosl(f); 
+
+// NO__ERRNO: declare double @llvm.cos.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.cos.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.cos.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @cos(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @cosf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cosl(x86_fp80) [[NOT_READNONE]]
+
+  cosh(f);       coshf(f);      coshl(f);
+
+// NO__ERRNO: declare double @cosh(double) [[READNONE]]
+// NO__ERRNO: declare float @coshf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @coshl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @cosh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @coshf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @coshl(x86_fp80) [[NOT_READNONE]]
+
+  erf(f);        erff(f);       erfl(f);
+
+// NO__ERRNO: declare double @erf(double) [[READNONE]]
+// NO__ERRNO: declare float @erff(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @erfl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @erf(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @erff(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @erfl(x86_fp80) [[NOT_READNONE]]
+
+  erfc(f);       erfcf(f);      erfcl(f);
+
+// NO__ERRNO: declare double @erfc(double) [[READNONE]]
+// NO__ERRNO: declare float @erfcf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @erfcl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @erfc(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @erfcf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @erfcl(x86_fp80) [[NOT_READNONE]]
+
+  exp(f);        expf(f);       expl(f);
+
+// NO__ERRNO: declare double @llvm.exp.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.exp.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @exp(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @expf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @expl(x86_fp80) [[NOT_READNONE]]
+
+  exp2(f);       exp2f(f);      exp2l(f); 
+
+// NO__ERRNO: declare double @llvm.exp2.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.exp2.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp2.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @exp2(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @exp2f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @exp2l(x86_fp80) [[NOT_READNONE]]
+
+  expm1(f);      expm1f(f);     expm1l(f);
+
+// NO__ERRNO: declare double @expm1(double) [[READNONE]]
+// NO__ERRNO: declare float @expm1f(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @expm1l(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @expm1(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @expm1f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @expm1l(x86_fp80) [[NOT_READNONE]]
+
+  fdim(f,f);       fdimf(f,f);      fdiml(f,f);
+
+// NO__ERRNO: declare double @fdim(double, double) [[READNONE]]
+// NO__ERRNO: declare float @fdimf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @fdim(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @fdimf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  floor(f);      floorf(f);     floorl(f);
+
+// NO__ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  fma(f,f,f);        fmaf(f,f,f);       fmal(f,f,f);
+
+// NO__ERRNO: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @fma(double, double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @fmaf(float, float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @fmal(x86_fp80, x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+// On GNU or Win, fma never sets errno, so we can convert to the intrinsic.
+
+// HAS_ERRNO_GNU: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO_GNU: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO_GNU: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+// HAS_ERRNO_WIN: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC:#[0-9]+]]
+// HAS_ERRNO_WIN: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
+// Long double is just double on win, so no f80 use/declaration.
+// HAS_ERRNO_WIN-NOT: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
+
+  fmax(f,f);       fmaxf(f,f);      fmaxl(f,f);
+
+// NO__ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+  fmin(f,f);       fminf(f,f);      fminl(f,f);
+
+// NO__ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
+
+  hypot(f,f);      hypotf(f,f);     hypotl(f,f);
+
+// NO__ERRNO: declare double @hypot(double, double) [[READNONE]]
+// NO__ERRNO: declare float @hypotf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @hypot(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @hypotf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  ilogb(f);      ilogbf(f);     ilogbl(f); 
+
+// NO__ERRNO: declare i32 @ilogb(double) [[READNONE]]
+// NO__ERRNO: declare i32 @ilogbf(float) [[READNONE]]
+// NO__ERRNO: declare i32 @ilogbl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i32 @ilogb(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i32 @ilogbf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i32 @ilogbl(x86_fp80) [[NOT_READNONE]]
+
+  lgamma(f);     lgammaf(f);    lgammal(f);
+
+// NO__ERRNO: declare double @lgamma(double) [[NOT_READNONE]]
+// NO__ERRNO: declare float @lgammaf(float) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @lgammal(x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @lgamma(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @lgammaf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @lgammal(x86_fp80) [[NOT_READNONE]]
+
+  llrint(f);     llrintf(f);    llrintl(f);
+
+// NO__ERRNO: declare i64 @llrint(double) [[READNONE]]
+// NO__ERRNO: declare i64 @llrintf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @llrintl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @llrint(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llrintf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llrintl(x86_fp80) [[NOT_READNONE]]
+
+  llround(f);    llroundf(f);   llroundl(f);
+
+// NO__ERRNO: declare i64 @llround(double) [[READNONE]]
+// NO__ERRNO: declare i64 @llroundf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @llroundl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @llround(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llroundf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @llroundl(x86_fp80) [[NOT_READNONE]]
+
+  log(f);        logf(f);       logl(f);
+
+// NO__ERRNO: declare double @llvm.log.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @logf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @logl(x86_fp80) [[NOT_READNONE]]
+
+  log10(f);      log10f(f);     log10l(f);
+
+// NO__ERRNO: declare double @llvm.log10.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log10.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log10.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log10(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log10f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log10l(x86_fp80) [[NOT_READNONE]]
+
+  log1p(f);      log1pf(f);     log1pl(f);
+
+// NO__ERRNO: declare double @log1p(double) [[READNONE]]
+// NO__ERRNO: declare float @log1pf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @log1pl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @log1p(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log1pf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log1pl(x86_fp80) [[NOT_READNONE]]
+
+  log2(f);       log2f(f);      log2l(f);
+
+// NO__ERRNO: declare double @llvm.log2.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.log2.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.log2.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @log2(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @log2f(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @log2l(x86_fp80) [[NOT_READNONE]]
+
+  logb(f);       logbf(f);      logbl(f);
+
+// NO__ERRNO: declare double @logb(double) [[READNONE]]
+// NO__ERRNO: declare float @logbf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @logbl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @logb(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @logbf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @logbl(x86_fp80) [[NOT_READNONE]]
+
+  lrint(f);      lrintf(f);     lrintl(f);
+
+// NO__ERRNO: declare i64 @lrint(double) [[READNONE]]
+// NO__ERRNO: declare i64 @lrintf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @lrintl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @lrint(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lrintf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lrintl(x86_fp80) [[NOT_READNONE]]
+
+  lround(f);     lroundf(f);    lroundl(f);
+
+// NO__ERRNO: declare i64 @lround(double) [[READNONE]]
+// NO__ERRNO: declare i64 @lroundf(float) [[READNONE]]
+// NO__ERRNO: declare i64 @lroundl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare i64 @lround(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lroundf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare i64 @lroundl(x86_fp80) [[NOT_READNONE]]
+
+  nearbyint(f);  nearbyintf(f); nearbyintl(f);
+
+// NO__ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  nextafter(f,f);  nextafterf(f,f); nextafterl(f,f);
+
+// NO__ERRNO: declare double @nextafter(double, double) [[READNONE]]
+// NO__ERRNO: declare float @nextafterf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @nextafter(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @nextafterf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  nexttoward(f,f); nexttowardf(f,f);nexttowardl(f,f);
+
+// NO__ERRNO: declare double @nexttoward(double, x86_fp80) [[READNONE]]
+// NO__ERRNO: declare float @nexttowardf(float, x86_fp80) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @nexttoward(double, x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @nexttowardf(float, x86_fp80) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  remainder(f,f);  remainderf(f,f); remainderl(f,f);
+
+// NO__ERRNO: declare double @remainder(double, double) [[READNONE]]
+// NO__ERRNO: declare float @remainderf(float, float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @remainderl(x86_fp80, x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @remainder(double, double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @remainderf(float, float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @remainderl(x86_fp80, x86_fp80) [[NOT_READNONE]]
+
+  remquo(f,f,i);  remquof(f,f,i); remquol(f,f,i);
+
+// NO__ERRNO: declare double @remquo(double, double, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare float @remquof(float, float, i32*) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @remquol(x86_fp80, x86_fp80, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare double @remquo(double, double, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @remquof(float, float, i32*) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @remquol(x86_fp80, x86_fp80, i32*) [[NOT_READNONE]]
+
+  rint(f);       rintf(f);      rintl(f);
+
+// NO__ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  round(f);      roundf(f);     roundl(f);
+
+// NO__ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
+
+  scalbln(f,f);    scalblnf(f,f);   scalblnl(f,f);
+
+// NO__ERRNO: declare double @scalbln(double, i64) [[READNONE]]
+// NO__ERRNO: declare float @scalblnf(float, i64) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @scalblnl(x86_fp80, i64) [[READNONE]]
+// HAS_ERRNO: declare double @scalbln(double, i64) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @scalblnf(float, i64) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @scalblnl(x86_fp80, i64) [[NOT_READNONE]]
+
+  scalbn(f,f);     scalbnf(f,f);    scalbnl(f,f);
+
+// NO__ERRNO: declare double @scalbn(double, i32) [[READNONE]]
+// NO__ERRNO: declare float @scalbnf(float, i32) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @scalbnl(x86_fp80, i32) [[READNONE]]
+// HAS_ERRNO: declare double @scalbn(double, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @scalbnf(float, i32) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @scalbnl(x86_fp80, i32) [[NOT_READNONE]]
+
+  sin(f);        sinf(f);       sinl(f);
+
+// NO__ERRNO: declare double @llvm.sin.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.sin.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.sin.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @sin(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sinf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sinl(x86_fp80) [[NOT_READNONE]]
+
+  sinh(f);       sinhf(f);      sinhl(f);
+
+// NO__ERRNO: declare double @sinh(double) [[READNONE]]
+// NO__ERRNO: declare float @sinhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @sinhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @sinh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sinhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sinhl(x86_fp80) [[NOT_READNONE]]
+
+  sqrt(f);       sqrtf(f);      sqrtl(f); 
+
+// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.sqrt.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.sqrt.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @sqrt(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @sqrtf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @sqrtl(x86_fp80) [[NOT_READNONE]]
+
+  tan(f);        tanf(f);       tanl(f);
+
+// NO__ERRNO: declare double @tan(double) [[READNONE]]
+// NO__ERRNO: declare float @tanf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tan(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tanf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80) [[NOT_READNONE]]
+
+  tanh(f);       tanhf(f);      tanhl(f);
+
+// NO__ERRNO: declare double @tanh(double) [[READNONE]]
+// NO__ERRNO: declare float @tanhf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tanhl(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tanh(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tanhf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tanhl(x86_fp80) [[NOT_READNONE]]
+
+  tgamma(f);     tgammaf(f);    tgammal(f);
+
+// NO__ERRNO: declare double @tgamma(double) [[READNONE]]
+// NO__ERRNO: declare float @tgammaf(float) [[READNONE]]
+// NO__ERRNO: declare x86_fp80 @tgammal(x86_fp80) [[READNONE]]
+// HAS_ERRNO: declare double @tgamma(double) [[NOT_READNONE]]
+// HAS_ERRNO: declare float @tgammaf(float) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @tgammal(x86_fp80) [[NOT_READNONE]]
+
+  trunc(f);      truncf(f);     truncl(f);
+
+// NO__ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
+};
+
+
+// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+// NO__ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} }
+
+// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind "correctly{{.*}} }
+// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} }
+// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+
+// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} }
diff --git a/test/CodeGen/may-alias.c b/test/CodeGen/may-alias.c
index 41eda85..5f72d05 100644
--- a/test/CodeGen/may-alias.c
+++ b/test/CodeGen/may-alias.c
@@ -1,18 +1,24 @@
-// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -no-struct-path-tbaa -disable-llvm-passes -o - %s | FileCheck %s
-// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -disable-llvm-passes -o - %s | FileCheck %s -check-prefix=PATH
+// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 \
+// RUN:     -no-struct-path-tbaa -disable-llvm-passes -o - %s | \
+// RUN:     FileCheck %s -check-prefixes=CHECK,SCALAR
+// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 \
+// RUN:     -disable-llvm-passes -o - %s | \
+// RUN:     FileCheck %s -check-prefixes=CHECK,OLD-PATH
+// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 \
+// RUN:     -new-struct-path-tbaa -disable-llvm-passes -o - %s | \
+// RUN:     FileCheck %s -check-prefixes=CHECK,NEW-PATH
 
 // Types with the may_alias attribute should be considered equivalent
 // to char for aliasing.
 
 typedef int __attribute__((may_alias)) aliasing_int;
 
-void test0(aliasing_int *ai, int *i)
-{
-// CHECK: store i32 0, i32* %{{.*}}, !tbaa [[TAG_CHAR:!.*]]
-// PATH: store i32 0, i32* %{{.*}}, !tbaa [[TAG_CHAR:!.*]]
+void test0(aliasing_int *ai, int *i) {
+// CHECK-LABEL: test0
+// CHECK: store i32 0, {{.*}}, !tbaa [[TAG_alias_int:!.*]]
   *ai = 0;
-// CHECK: store i32 1, i32* %{{.*}}, !tbaa [[TAG_INT:!.*]]
-// PATH: store i32 1, i32* %{{.*}}, !tbaa [[TAG_INT:!.*]]
+
+// CHECK: store i32 1, {{.*}}, !tbaa [[TAG_int:!.*]]
   *i = 1;
 }
 
@@ -20,23 +26,36 @@
 struct Test1 { int x; };
 struct Test1MA { int x; } __attribute__((may_alias));
 void test1(struct Test1MA *p1, struct Test1 *p2) {
-  // CHECK: store i32 2, i32* {{%.*}}, !tbaa [[TAG_CHAR]]
-  // PATH: store i32 2, i32* {{%.*}}, !tbaa [[TAG_CHAR]]
+// CHECK-LABEL: test1
+// CHECK: store i32 2, {{.*}}, !tbaa [[TAG_alias_test1_x:!.*]]
   p1->x = 2;
-  // CHECK: store i32 3, i32* {{%.*}}, !tbaa [[TAG_INT]]
-  // PATH: store i32 3, i32* {{%.*}}, !tbaa [[TAG_test1_x:!.*]]
+
+// CHECK: store i32 3, {{.*}}, !tbaa [[TAG_test1_x:!.*]]
   p2->x = 3;
 }
-// CHECK:  !"any pointer", [[TYPE_CHAR:!.*]],
-// CHECK: [[TYPE_CHAR]] = !{!"omnipotent char", [[TAG_CXX_TBAA:!.*]],
-// CHECK: [[TAG_CXX_TBAA]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[TAG_CHAR]] = !{[[TYPE_CHAR]], [[TYPE_CHAR]], i64 0}
-// CHECK: [[TAG_INT]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
-// CHECK: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]]
 
-// PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", !{{.*}}
-// PATH: [[TAG_CHAR]] = !{[[TYPE_CHAR]], [[TYPE_CHAR]], i64 0}
-// PATH: [[TAG_INT]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
-// PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]]
-// PATH: [[TAG_test1_x]] = !{[[TYPE_test1:!.*]], [[TYPE_INT]], i64 0}
-// PATH: [[TYPE_test1]] = !{!"Test1", [[TYPE_INT]], i64 0}
+// SCALAR-DAG: [[ROOT:!.*]] = !{!"Simple C/C++ TBAA"}
+// SCALAR-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", [[ROOT]], i64 0}
+// SCALAR-DAG: [[TAG_alias_int]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
+// SCALAR-DAG: [[TAG_alias_test1_x]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
+// SCALAR-DAG: [[TYPE_int:!.*]] = !{!"int", [[TYPE_char]], i64 0}
+// SCALAR-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0}
+// SCALAR-DAG: [[TAG_test1_x]] = !{[[TYPE_int]], [[TYPE_int]], i64 0}
+
+// OLD-PATH-DAG: [[ROOT:!.*]] = !{!"Simple C/C++ TBAA"}
+// OLD-PATH-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", [[ROOT]], i64 0}
+// OLD-PATH-DAG: [[TAG_alias_int]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
+// OLD-PATH-DAG: [[TAG_alias_test1_x]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
+// OLD-PATH-DAG: [[TYPE_int:!.*]] = !{!"int", [[TYPE_char]], i64 0}
+// OLD-PATH-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0}
+// OLD-PATH-DAG: [[TYPE_test1:!.*]] = !{!"Test1", [[TYPE_int]], i64 0}
+// OLD-PATH-DAG: [[TAG_test1_x]] = !{[[TYPE_test1]], [[TYPE_int]], i64 0}
+
+// NEW-PATH-DAG: [[ROOT:!.*]] = !{!"Simple C/C++ TBAA"}
+// NEW-PATH-DAG: [[TYPE_char:!.*]] = !{[[ROOT]], i64 1, !"omnipotent char"}
+// NEW-PATH-DAG: [[TAG_alias_int]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0}
+// NEW-PATH-DAG: [[TAG_alias_test1_x]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0}
+// NEW-PATH-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
+// NEW-PATH-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
+// NEW-PATH-DAG: [[TYPE_test1:!.*]] = !{[[TYPE_char]], i64 4, !"Test1", [[TYPE_int]], i64 0, i64 4}
+// NEW-PATH-DAG: [[TAG_test1_x]] = !{[[TYPE_test1]], [[TYPE_int]], i64 0, i64 4}
diff --git a/test/CodeGen/mbackchain-2.c b/test/CodeGen/mbackchain-2.c
index e76afaf..bea46d2 100644
--- a/test/CodeGen/mbackchain-2.c
+++ b/test/CodeGen/mbackchain-2.c
@@ -1,6 +1,6 @@
 // RUN: %clang -mbackchain --target=s390x-linux -S -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: define void @foo() [[NUW:#[0-9]+]]
+// CHECK: define dso_local void @foo() [[NUW:#[0-9]+]]
 void foo(void) {
 }
 
diff --git a/test/CodeGen/mbackchain-3.c b/test/CodeGen/mbackchain-3.c
index b115861..5ff0e16 100644
--- a/test/CodeGen/mbackchain-3.c
+++ b/test/CodeGen/mbackchain-3.c
@@ -1,6 +1,6 @@
 // RUN: %clang -mno-backchain --target=s390x-linux -S -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: define void @foo() [[NUW:#[0-9]+]]
+// CHECK: define dso_local void @foo() [[NUW:#[0-9]+]]
 void foo(void) {
 }
 
diff --git a/test/CodeGen/mcount.c b/test/CodeGen/mcount.c
index 2284aca..98a2a6b 100644
--- a/test/CodeGen/mcount.c
+++ b/test/CodeGen/mcount.c
@@ -35,9 +35,9 @@
   return no_instrument();
 }
 
-// CHECK: attributes #0 = { {{.*}}"counting-function"="mcount"{{.*}} }
+// CHECK: attributes #0 = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
 // CHECK: attributes #1 = { {{.*}} }
-// CHECK-PREFIXED: attributes #0 = { {{.*}}"counting-function"="_mcount"{{.*}} }
+// CHECK-PREFIXED: attributes #0 = { {{.*}}"instrument-function-entry-inlined"="_mcount"{{.*}} }
 // CHECK-PREFIXED: attributes #1 = { {{.*}} }
-// NO-MCOUNT-NOT: attributes #{{[0-9]}} = { {{.*}}"counting-function"={{.*}} }
-// NO-MCOUNT1-NOT: attributes #1 = { {{.*}}"counting-function"={{.*}} }
+// NO-MCOUNT-NOT: attributes #{{[0-9]}} = { {{.*}}"instrument-function-entry-inlined"={{.*}} }
+// NO-MCOUNT1-NOT: attributes #1 = { {{.*}}"instrument-function-entry-inlined"={{.*}} }
diff --git a/test/CodeGen/microsoft-call-conv-x64.c b/test/CodeGen/microsoft-call-conv-x64.c
index 6475dfa..5be748f 100644
--- a/test/CodeGen/microsoft-call-conv-x64.c
+++ b/test/CodeGen/microsoft-call-conv-x64.c
@@ -3,12 +3,12 @@
 void __fastcall f1(void);
 void __stdcall f2(void);
 void __fastcall f4(void) {
-// CHECK-LABEL: define void @f4()
+// CHECK-LABEL: define dso_local void @f4()
   f1();
 // CHECK: call void @f1()
 }
 void __stdcall f5(void) {
-// CHECK-LABEL: define void @f5()
+// CHECK-LABEL: define dso_local void @f5()
   f2();
 // CHECK: call void @f2()
 }
diff --git a/test/CodeGen/mingw-long-double.c b/test/CodeGen/mingw-long-double.c
index 1c7c31f..d344d9f 100644
--- a/test/CodeGen/mingw-long-double.c
+++ b/test/CodeGen/mingw-long-double.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s \
 // RUN:    | FileCheck %s --check-prefix=GNU32
+// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s -mms-bitfields \
+// RUN:    | FileCheck %s --check-prefix=GNU32
 // RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -o - %s \
 // RUN:    | FileCheck %s --check-prefix=GNU64
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -o - %s \
@@ -10,32 +12,32 @@
   long double ldb;
 } agggregate_LD = {};
 // GNU32: %struct.anon = type { i8, x86_fp80 }
-// GNU32: @agggregate_LD = global %struct.anon zeroinitializer, align 4
+// GNU32: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 4
 // GNU64: %struct.anon = type { i8, x86_fp80 }
-// GNU64: @agggregate_LD = global %struct.anon zeroinitializer, align 16
+// GNU64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 16
 // MSC64: %struct.anon = type { i8, double }
-// MSC64: @agggregate_LD = global %struct.anon zeroinitializer, align 8
+// MSC64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 8
 
 long double dataLD = 1.0L;
-// GNU32: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 4
-// GNU64: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 16
-// MSC64: @dataLD = global double 1.000000e+00, align 8
+// GNU32: @dataLD = dso_local global x86_fp80 0xK3FFF8000000000000000, align 4
+// GNU64: @dataLD = dso_local global x86_fp80 0xK3FFF8000000000000000, align 16
+// MSC64: @dataLD = dso_local global double 1.000000e+00, align 8
 
 long double _Complex dataLDC = {1.0L, 1.0L};
-// GNU32: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 4
-// GNU64: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 16
-// MSC64: @dataLDC = global { double, double } { double 1.000000e+00, double 1.000000e+00 }, align 8
+// GNU32: @dataLDC = dso_local global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 4
+// GNU64: @dataLDC = dso_local global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 16
+// MSC64: @dataLDC = dso_local global { double, double } { double 1.000000e+00, double 1.000000e+00 }, align 8
 
 long double TestLD(long double x) {
   return x * x;
 }
-// GNU32: define x86_fp80 @TestLD(x86_fp80 %x)
-// GNU64: define void @TestLD(x86_fp80* noalias sret %agg.result, x86_fp80*)
-// MSC64: define double @TestLD(double %x)
+// GNU32: define dso_local x86_fp80 @TestLD(x86_fp80 %x)
+// GNU64: define dso_local void @TestLD(x86_fp80* noalias sret %agg.result, x86_fp80*)
+// MSC64: define dso_local double @TestLD(double %x)
 
 long double _Complex TestLDC(long double _Complex x) {
   return x * x;
 }
-// GNU32: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %x)
-// GNU64: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
-// MSC64: define void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
+// GNU32: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %x)
+// GNU64: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
+// MSC64: define dso_local void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
diff --git a/test/CodeGen/mips-vector-return.c b/test/CodeGen/mips-vector-return.c
index 8af4998..dd3c400 100644
--- a/test/CodeGen/mips-vector-return.c
+++ b/test/CodeGen/mips-vector-return.c
@@ -8,13 +8,13 @@
 typedef double v4df __attribute__ ((__vector_size__ (32)));
 typedef int v4i32 __attribute__ ((__vector_size__ (16)));
 
-// O32-LABEL: define void @test_v4sf(<4 x float>* noalias nocapture sret
+// O32-LABEL: define dso_local void @test_v4sf(<4 x float>* noalias nocapture sret
 // N64: define inreg { i64, i64 } @test_v4sf
 v4sf test_v4sf(float a) {
   return (v4sf){0.0f, a, 0.0f, 0.0f};
 }
 
-// O32-LABEL: define void @test_v4df(<4 x double>* noalias nocapture sret
+// O32-LABEL: define dso_local void @test_v4df(<4 x double>* noalias nocapture sret
 // N64-LABEL: define void @test_v4df(<4 x double>* noalias nocapture sret
 v4df test_v4df(double a) {
   return (v4df){0.0, a, 0.0, 0.0};
@@ -23,7 +23,7 @@
 // O32 returns integer vectors whose size is equal to or smaller than 16-bytes
 // in integer registers.
 //
-// O32: define inreg { i32, i32, i32, i32 } @test_v4i32
+// O32: define dso_local inreg { i32, i32, i32, i32 } @test_v4i32
 // N64: define inreg { i64, i64 } @test_v4i32
 v4i32 test_v4i32(int a) {
   return (v4i32){0, a, 0, 0};
diff --git a/test/CodeGen/mms-bitfields.c b/test/CodeGen/mms-bitfields.c
index 1617e8a..d4604fb 100644
--- a/test/CodeGen/mms-bitfields.c
+++ b/test/CodeGen/mms-bitfields.c
@@ -20,3 +20,46 @@
 } s3;
 
 // CHECK: %struct.s3 = type { i32, [4 x i8], %struct.s1 }
+
+// PR32482:
+
+#pragma pack (push,1)
+
+typedef unsigned int UINT32;
+
+struct Inner {
+  UINT32    A    :  1;
+  UINT32    B    :  1;
+  UINT32    C    :  1;
+  UINT32    D    : 30;
+} Inner;
+
+#pragma pack (pop)
+
+// CHECK: %struct.Inner = type { i32, i32 }
+
+// CHECK: %struct.A = type { i32, i32, i32 }
+
+#pragma pack(push, 1)
+
+union HEADER {
+  struct A {
+    int                                         :  3;  // Bits 2:0
+    int a                                       :  9;  // Bits 11:3
+    int                                         :  12;  // Bits 23:12
+    int b                                       :  17;  // Bits 40:24
+    int                                         :  7;  // Bits 47:41
+    int c                                       :  4;  // Bits 51:48
+    int                                         :  4;  // Bits 55:52
+    int d                                       :  3;  // Bits 58:56
+    int                                         :  5;  // Bits 63:59
+  } Bits;
+} HEADER;
+
+#pragma pack(pop)
+
+struct Inner variable = { 1,0,1, 21 };
+union HEADER hdr = {{1,2,3,4}};
+
+// CHECK: @variable = global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] undef, i8 21, i8 0, i8 0, i8 0 }, align 1
+// CHECK: @hdr = global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] undef, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] undef } }, align 1
diff --git a/test/CodeGen/ms-align-tentative.c b/test/CodeGen/ms-align-tentative.c
index eb68e69..ae9b4b1 100644
--- a/test/CodeGen/ms-align-tentative.c
+++ b/test/CodeGen/ms-align-tentative.c
@@ -1,25 +1,25 @@
 // RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fms-compatibility -o - < %s | FileCheck %s
 
 char __declspec(align(8192)) x;
-// CHECK-DAG: @x = global i8 0, align 8192
+// CHECK-DAG: @x = dso_local global i8 0, align 8192
 
 typedef char __declspec(align(8192)) T;
 T y;
-// CHECK-DAG: @y = global i8 0, align 8192
+// CHECK-DAG: @y = dso_local global i8 0, align 8192
 
 T __declspec(align(8192)) z;
-// CHECK-DAG: @z = global i8 0, align 8192
+// CHECK-DAG: @z = dso_local global i8 0, align 8192
 
 int __declspec(align(16)) redef;
 int __declspec(align(32)) redef = 8;
-// CHECK-DAG: @redef = global i32 8, align 32
+// CHECK-DAG: @redef = dso_local global i32 8, align 32
 
 struct __declspec(align(64)) S {
   char fd;
 } s;
-// CHECK-DAG: @s = global %struct.S zeroinitializer, align 64
+// CHECK-DAG: @s = dso_local global %struct.S zeroinitializer, align 64
 
 struct Wrap {
   struct S x;
 } w;
-// CHECK-DAG: @w = global %struct.Wrap zeroinitializer, align 64
+// CHECK-DAG: @w = dso_local global %struct.Wrap zeroinitializer, align 64
diff --git a/test/CodeGen/ms-annotation.c b/test/CodeGen/ms-annotation.c
index 6f4a20c..8ad4836 100644
--- a/test/CodeGen/ms-annotation.c
+++ b/test/CodeGen/ms-annotation.c
@@ -11,7 +11,7 @@
   __annotation(L"unicode: \u0ca0_\u0ca0");
 }
 
-// CHECK-LABEL: define void @test1()
+// CHECK-LABEL: define dso_local void @test1()
 // CHECK: call void @llvm.codeview.annotation(metadata ![[A1:[0-9]+]])
 // CHECK: call void @llvm.codeview.annotation(metadata ![[A2:[0-9]+]])
 // CHECK: call void @llvm.codeview.annotation(metadata ![[A3:[0-9]+]])
diff --git a/test/CodeGen/ms-barriers-intrinsics.c b/test/CodeGen/ms-barriers-intrinsics.c
index c7da50c..7f87c90 100644
--- a/test/CodeGen/ms-barriers-intrinsics.c
+++ b/test/CodeGen/ms-barriers-intrinsics.c
@@ -12,26 +12,26 @@
 #include <intrin.h>
 
 void test_ReadWriteBarrier() { _ReadWriteBarrier(); }
-// CHECK-LABEL: define void @test_ReadWriteBarrier
+// CHECK-LABEL: define dso_local void @test_ReadWriteBarrier
 // CHECK:   fence syncscope("singlethread") seq_cst
 // CHECK:   ret void
 // CHECK: }
 
 void test_ReadBarrier() { _ReadBarrier(); }
-// CHECK-LABEL: define void @test_ReadBarrier
+// CHECK-LABEL: define dso_local void @test_ReadBarrier
 // CHECK:   fence syncscope("singlethread") seq_cst
 // CHECK:   ret void
 // CHECK: }
 
 void test_WriteBarrier() { _WriteBarrier(); }
-// CHECK-LABEL: define void @test_WriteBarrier
+// CHECK-LABEL: define dso_local void @test_WriteBarrier
 // CHECK:   fence syncscope("singlethread") seq_cst
 // CHECK:   ret void
 // CHECK: }
 
 #if defined(__x86_64__)
 void test__faststorefence() { __faststorefence(); }
-// CHECK-X64-LABEL: define void @test__faststorefence
+// CHECK-X64-LABEL: define dso_local void @test__faststorefence
 // CHECK-X64:   fence seq_cst
 // CHECK-X64:   ret void
 // CHECK-X64: }
diff --git a/test/CodeGen/ms-declspecs.c b/test/CodeGen/ms-declspecs.c
index 05810bb..68d9640 100644
--- a/test/CodeGen/ms-declspecs.c
+++ b/test/CodeGen/ms-declspecs.c
@@ -2,14 +2,14 @@
 
 __declspec(selectany) int x1 = 1;
 const __declspec(selectany) int x2 = 2;
-// CHECK: @x1 = weak_odr global i32 1, comdat, align 4
-// CHECK: @x2 = weak_odr constant i32 2, comdat, align 4
+// CHECK: @x1 = weak_odr dso_local global i32 1, comdat, align 4
+// CHECK: @x2 = weak_odr dso_local constant i32 2, comdat, align 4
 
 // selectany turns extern variable declarations into definitions.
 __declspec(selectany) int x3;
 extern __declspec(selectany) int x4;
-// CHECK: @x3 = weak_odr global i32 0, comdat, align 4
-// CHECK: @x4 = weak_odr global i32 0, comdat, align 4
+// CHECK: @x3 = weak_odr dso_local global i32 0, comdat, align 4
+// CHECK: @x4 = weak_odr dso_local global i32 0, comdat, align 4
 
 struct __declspec(align(16)) S {
   char x;
@@ -19,14 +19,14 @@
 // CHECK: @u = {{.*}}zeroinitializer, align 16
 
 
-// CHECK: define void @t3() [[NAKED:#[0-9]+]] {
+// CHECK: define dso_local void @t3() [[NAKED:#[0-9]+]] {
 __declspec(naked) void t3() {}
 
-// CHECK: define void @t22() [[NUW:#[0-9]+]]
+// CHECK: define dso_local void @t22() [[NUW:#[0-9]+]]
 void __declspec(nothrow) t22();
 void t22() {}
 
-// CHECK: define void @t2() [[NI:#[0-9]+]] {
+// CHECK: define dso_local void @t2() [[NI:#[0-9]+]] {
 __declspec(noinline) void t2() {}
 
 // CHECK: call void @f20_t() [[NR:#[0-9]+]]
diff --git a/test/CodeGen/ms-declspecs.cpp b/test/CodeGen/ms-declspecs.cpp
index decf5d6..06f45a2 100644
--- a/test/CodeGen/ms-declspecs.cpp
+++ b/test/CodeGen/ms-declspecs.cpp
@@ -8,8 +8,8 @@
 __declspec(selectany) int x4;
 }
 __declspec(selectany) int x5;
-// CHECK: @"\01?x1@@3HA" = weak_odr global i32 0, comdat, align 4
-// CHECK: @x2 = weak_odr global i32 0, comdat, align 4
-// CHECK: @"\01?x3@@3HA"  = weak_odr global i32 0, comdat, align 4
-// CHECK: @x4 = weak_odr global i32 0, comdat, align 4
-// CHECK: @"\01?x5@@3HA"  = weak_odr global i32 0, comdat, align 4
+// CHECK: @"\01?x1@@3HA" = weak_odr dso_local global i32 0, comdat, align 4
+// CHECK: @x2 = weak_odr dso_local global i32 0, comdat, align 4
+// CHECK: @"\01?x3@@3HA"  = weak_odr dso_local global i32 0, comdat, align 4
+// CHECK: @x4 = weak_odr dso_local global i32 0, comdat, align 4
+// CHECK: @"\01?x5@@3HA"  = weak_odr dso_local global i32 0, comdat, align 4
diff --git a/test/CodeGen/ms-inline-asm-align.c b/test/CodeGen/ms-inline-asm-align.c
index de896b8..4786b2f 100644
--- a/test/CodeGen/ms-inline-asm-align.c
+++ b/test/CodeGen/ms-inline-asm-align.c
@@ -21,7 +21,7 @@
 // DARWIN-SAME: .align 8
 // DARWIN-SAME: "~{dirflag},~{fpsr},~{flags}"()
 
-// WINDOWS-LABEL: define void @align_test()
+// WINDOWS-LABEL: define dso_local void @align_test()
 // WINDOWS: call void asm sideeffect inteldialect
 // WINDOWS-SAME: .align 8
 // WINDOWS-SAME: .align 16
diff --git a/test/CodeGen/ms-inline-asm-avx512.c b/test/CodeGen/ms-inline-asm-avx512.c
index 6189e50..76c0a57 100644
--- a/test/CodeGen/ms-inline-asm-avx512.c
+++ b/test/CodeGen/ms-inline-asm-avx512.c
@@ -21,7 +21,7 @@
 }
 
 void ignore_fe_size() {
-  // CHECK-LABEL: define void @ignore_fe_size()
+  // CHECK-LABEL: define dso_local void @ignore_fe_size()
   char c;
   // CHECK: vaddps xmm1, xmm2, $1{1to4}
   __asm vaddps xmm1, xmm2, [c]{1to4}
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index c03f0de..5c3e3ff 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -577,17 +577,17 @@
 void t41(unsigned short a) {
 // CHECK-LABEL: define void @t41(i16 zeroext %a)
   __asm mov cs, a;
-// CHECK: mov cs, word ptr $0
+// CHECK: mov cs, $0
   __asm mov ds, a;
-// CHECK: mov ds, word ptr $1
+// CHECK: mov ds, $1
   __asm mov es, a;
-// CHECK: mov es, word ptr $2
+// CHECK: mov es, $2
   __asm mov fs, a;
-// CHECK: mov fs, word ptr $3
+// CHECK: mov fs, $3
   __asm mov gs, a;
-// CHECK: mov gs, word ptr $4
+// CHECK: mov gs, $4
   __asm mov ss, a;
-// CHECK: mov ss, word ptr $5
+// CHECK: mov ss, $5
 // CHECK: "*m,*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"(i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}})
 }
 
@@ -661,6 +661,17 @@
   // CHECK: call void asm sideeffect inteldialect "add eax, [eax + $$-128]", "~{eax},~{flags},~{dirflag},~{fpsr},~{flags}"()
 }
 
+void t47() {
+  // CHECK-LABEL: define void @t47
+  __asm {
+    bndmk bnd0, dword ptr [eax]
+    bndmk bnd1, dword ptr [ebx]
+    bndmk bnd2, dword ptr [ecx]
+    bndmk bnd3, dword ptr [edx]
+  }
+  // CHECK: call void asm sideeffect inteldialect "bndmk bnd0, dword ptr [eax]\0A\09bndmk bnd1, dword ptr [ebx]\0A\09bndmk bnd2, dword ptr [ecx]\0A\09bndmk bnd3, dword ptr [edx]", "~{bnd0},~{bnd1},~{bnd2},~{bnd3},~{dirflag},~{fpsr},~{flags}"()
+}
+
 void dot_operator(){
   // CHECK-LABEL: define void @dot_operator
 	__asm { mov eax, 3[ebx]A.b}
diff --git a/test/CodeGen/ms-intrinsics.c b/test/CodeGen/ms-intrinsics.c
index 818be7f..7f434b4 100644
--- a/test/CodeGen/ms-intrinsics.c
+++ b/test/CodeGen/ms-intrinsics.c
@@ -5,7 +5,7 @@
 // RUN:         -triple thumbv7--windows -Oz -emit-llvm %s -o - \
 // RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-ARM,CHECK-ARM-X64
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:         -triple x86_64--windows -Oz -emit-llvm %s -o - \
+// RUN:         -triple x86_64--windows -Oz -emit-llvm -target-feature +cx16 %s -o - \
 // RUN:         | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL
 
 // intrin.h needs size_t, but -ffreestanding prevents us from getting it from
@@ -20,12 +20,12 @@
 }
 
 // CHECK-I386: define{{.*}}void @test__stosb
-// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true)
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* align 1 %Dest, i8 %Data, i32 %Count, i1 true)
 // CHECK-I386:   ret void
 // CHECK-I386: }
 
 // CHECK-X64: define{{.*}}void @test__stosb
-// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true)
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* align 1 %Dest, i8 %Data, i64 %Count, i1 true)
 // CHECK-X64:   ret void
 // CHECK-X64: }
 
@@ -55,7 +55,7 @@
 void *test_AddressOfReturnAddress() {
   return _AddressOfReturnAddress();
 }
-// CHECK-INTEL-LABEL: define i8* @test_AddressOfReturnAddress()
+// CHECK-INTEL-LABEL: define dso_local i8* @test_AddressOfReturnAddress()
 // CHECK-INTEL: = tail call i8* @llvm.addressofreturnaddress()
 // CHECK-INTEL: ret i8*
 #endif
@@ -329,6 +329,27 @@
 // CHECK: ret i64 [[RESULT]]
 // CHECK: }
 
+#if defined(__x86_64__)
+unsigned char test_InterlockedCompareExchange128(__int64 volatile *Destination, __int64 ExchangeHigh, __int64 ExchangeLow, __int64* ComparandResult) {
+  return _InterlockedCompareExchange128(Destination, ExchangeHigh, ExchangeLow, ComparandResult);
+}
+// CHECK-X64: define{{.*}}i8 @test_InterlockedCompareExchange128(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, i64*{{[a-z_ ]*}}%ComparandResult){{.*}}{
+// CHECK-X64: [[DST:%[0-9]+]] = bitcast i64* %Destination to i128*
+// CHECK-X64: [[EH:%[0-9]+]] = zext i64 %ExchangeHigh to i128
+// CHECK-X64: [[EL:%[0-9]+]] = zext i64 %ExchangeLow to i128
+// CHECK-X64: [[CNR:%[0-9]+]] = bitcast i64* %ComparandResult to i128*
+// CHECK-X64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
+// CHECK-X64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]]
+// CHECK-X64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16
+// CHECK-X64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst
+// CHECK-X64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0
+// CHECK-X64: store i128 [[OLD]], i128* [[CNR]], align 16
+// CHECK-X64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1
+// CHECK-X64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8
+// CHECK-X64: ret i8 [[SUC8]]
+// CHECK-X64: }
+#endif
+
 short test_InterlockedIncrement16(short volatile *Addend) {
   return _InterlockedIncrement16(Addend);
 }
diff --git a/test/CodeGen/ms-setjmp.c b/test/CodeGen/ms-setjmp.c
index 675c8dd..b9c67eb 100644
--- a/test/CodeGen/ms-setjmp.c
+++ b/test/CodeGen/ms-setjmp.c
@@ -13,11 +13,11 @@
 
 int test_setjmp() {
   return _setjmp(jb);
-  // I386-LABEL: define i32 @test_setjmp
+  // I386-LABEL: define dso_local i32 @test_setjmp
   // I386:       %[[call:.*]] = call i32 (i8*, i32, ...) @_setjmp3(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i32 0)
   // I386-NEXT:  ret i32 %[[call]]
 
-  // X64-LABEL: define i32 @test_setjmp
+  // X64-LABEL: define dso_local i32 @test_setjmp
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
   // X64:       %[[call:.*]] = call i32 @_setjmp(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
@@ -25,7 +25,7 @@
 
 int test_setjmpex() {
   return _setjmpex(jb);
-  // X64-LABEL: define i32 @test_setjmpex
+  // X64-LABEL: define dso_local i32 @test_setjmpex
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
   // X64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
diff --git a/test/CodeGen/ms-x86-intrinsics.c b/test/CodeGen/ms-x86-intrinsics.c
index 51520d1..450a134 100644
--- a/test/CodeGen/ms-x86-intrinsics.c
+++ b/test/CodeGen/ms-x86-intrinsics.c
@@ -9,7 +9,7 @@
 char test__readfsbyte(unsigned long Offset) {
   return __readfsbyte(Offset);
 }
-// CHECK-I386-LABEL: define signext i8 @test__readfsbyte(i32 %Offset)
+// CHECK-I386-LABEL: define dso_local signext i8 @test__readfsbyte(i32 %Offset)
 // CHECK-I386:   [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i8 addrspace(257)*
 // CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i8, i8 addrspace(257)* [[PTR]], align 1
 // CHECK-I386:   ret i8 [[VALUE:%[0-9]+]]
@@ -17,7 +17,7 @@
 short test__readfsword(unsigned long Offset) {
   return __readfsword(Offset);
 }
-// CHECK-I386-LABEL: define signext i16 @test__readfsword(i32 %Offset)
+// CHECK-I386-LABEL: define dso_local signext i16 @test__readfsword(i32 %Offset)
 // CHECK-I386:   [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i16 addrspace(257)*
 // CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i16, i16 addrspace(257)* [[PTR]], align 2
 // CHECK-I386:   ret i16 [[VALUE:%[0-9]+]]
@@ -25,7 +25,7 @@
 long test__readfsdword(unsigned long Offset) {
   return __readfsdword(Offset);
 }
-// CHECK-I386-LABEL: define i32 @test__readfsdword(i32 %Offset)
+// CHECK-I386-LABEL: define dso_local i32 @test__readfsdword(i32 %Offset)
 // CHECK-I386:   [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i32 addrspace(257)*
 // CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i32, i32 addrspace(257)* [[PTR]], align 4
 // CHECK-I386:   ret i32 [[VALUE:%[0-9]+]]
@@ -33,7 +33,7 @@
 long long test__readfsqword(unsigned long Offset) {
   return __readfsqword(Offset);
 }
-// CHECK-I386-LABEL: define i64 @test__readfsqword(i32 %Offset)
+// CHECK-I386-LABEL: define dso_local i64 @test__readfsqword(i32 %Offset)
 // CHECK-I386:   [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i64 addrspace(257)*
 // CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i64, i64 addrspace(257)* [[PTR]], align 8
 // CHECK-I386:   ret i64 [[VALUE:%[0-9]+]]
@@ -42,7 +42,7 @@
 __int64 test__emul(int a, int b) {
   return __emul(a, b);
 }
-// CHECK-LABEL: define i64 @test__emul(i32 %a, i32 %b)
+// CHECK-LABEL: define dso_local i64 @test__emul(i32 %a, i32 %b)
 // CHECK: [[X:%[0-9]+]] = sext i32 %a to i64
 // CHECK: [[Y:%[0-9]+]] = sext i32 %b to i64
 // CHECK: [[RES:%[0-9]+]] = mul nsw i64 [[Y]], [[X]]
@@ -51,7 +51,7 @@
 unsigned __int64 test__emulu(unsigned int a, unsigned int b) {
   return __emulu(a, b);
 }
-// CHECK-LABEL: define i64 @test__emulu(i32 %a, i32 %b)
+// CHECK-LABEL: define dso_local i64 @test__emulu(i32 %a, i32 %b)
 // CHECK: [[X:%[0-9]+]] = zext i32 %a to i64
 // CHECK: [[Y:%[0-9]+]] = zext i32 %b to i64
 // CHECK: [[RES:%[0-9]+]] = mul nuw i64 [[Y]], [[X]]
@@ -62,7 +62,7 @@
 char test__readgsbyte(unsigned long Offset) {
   return __readgsbyte(Offset);
 }
-// CHECK-X64-LABEL: define i8 @test__readgsbyte(i32 %Offset)
+// CHECK-X64-LABEL: define dso_local i8 @test__readgsbyte(i32 %Offset)
 // CHECK-X64:   [[ZEXT:%[0-9]+]] = zext i32 %Offset to i64
 // CHECK-X64:   [[PTR:%[0-9]+]] = inttoptr i64 [[ZEXT]] to i8 addrspace(256)*
 // CHECK-X64:   [[VALUE:%[0-9]+]] = load volatile i8, i8 addrspace(256)* [[PTR]], align 1
@@ -71,7 +71,7 @@
 short test__readgsword(unsigned long Offset) {
   return __readgsword(Offset);
 }
-// CHECK-X64-LABEL: define i16 @test__readgsword(i32 %Offset)
+// CHECK-X64-LABEL: define dso_local i16 @test__readgsword(i32 %Offset)
 // CHECK-X64:   [[ZEXT:%[0-9]+]] = zext i32 %Offset to i64
 // CHECK-X64:   [[PTR:%[0-9]+]] = inttoptr i64 [[ZEXT]] to i16 addrspace(256)*
 // CHECK-X64:   [[VALUE:%[0-9]+]] = load volatile i16, i16 addrspace(256)* [[PTR]], align 2
@@ -80,7 +80,7 @@
 long test__readgsdword(unsigned long Offset) {
   return __readgsdword(Offset);
 }
-// CHECK-X64-LABEL: define i32 @test__readgsdword(i32 %Offset)
+// CHECK-X64-LABEL: define dso_local i32 @test__readgsdword(i32 %Offset)
 // CHECK-X64:   [[ZEXT:%[0-9]+]] = zext i32 %Offset to i64
 // CHECK-X64:   [[PTR:%[0-9]+]] = inttoptr i64 [[ZEXT]] to i32 addrspace(256)*
 // CHECK-X64:   [[VALUE:%[0-9]+]] = load volatile i32, i32 addrspace(256)* [[PTR]], align 4
@@ -89,7 +89,7 @@
 long long test__readgsqword(unsigned long Offset) {
   return __readgsqword(Offset);
 }
-// CHECK-X64-LABEL: define i64 @test__readgsqword(i32 %Offset)
+// CHECK-X64-LABEL: define dso_local i64 @test__readgsqword(i32 %Offset)
 // CHECK-X64:   [[ZEXT:%[0-9]+]] = zext i32 %Offset to i64
 // CHECK-X64:   [[PTR:%[0-9]+]] = inttoptr i64 [[ZEXT]] to i64 addrspace(256)*
 // CHECK-X64:   [[VALUE:%[0-9]+]] = load volatile i64, i64 addrspace(256)* [[PTR]], align 8
@@ -98,13 +98,13 @@
 __int64 test__mulh(__int64 a, __int64 b) {
   return __mulh(a, b);
 }
-// CHECK-X64-LABEL: define i64 @test__mulh(i64 %a, i64 %b)
+// CHECK-X64-LABEL: define dso_local i64 @test__mulh(i64 %a, i64 %b)
 // CHECK-X64: = mul nsw i128 %
 
 unsigned __int64 test__umulh(unsigned __int64 a, unsigned __int64 b) {
   return __umulh(a, b);
 }
-// CHECK-X64-LABEL: define i64 @test__umulh(i64 %a, i64 %b)
+// CHECK-X64-LABEL: define dso_local i64 @test__umulh(i64 %a, i64 %b)
 // CHECK-X64: = mul nuw i128 %
 
 __int64 test_mul128(__int64 Multiplier,
@@ -112,7 +112,7 @@
                     __int64 *HighProduct) {
   return _mul128(Multiplier, Multiplicand, HighProduct);
 }
-// CHECK-X64-LABEL: define i64 @test_mul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
+// CHECK-X64-LABEL: define dso_local i64 @test_mul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
 // CHECK-X64: = sext i64 %Multiplier to i128
 // CHECK-X64: = sext i64 %Multiplicand to i128
 // CHECK-X64: = mul nsw i128 %
@@ -124,7 +124,7 @@
                               unsigned __int64 *HighProduct) {
   return _umul128(Multiplier, Multiplicand, HighProduct);
 }
-// CHECK-X64-LABEL: define i64 @test_umul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
+// CHECK-X64-LABEL: define dso_local i64 @test_umul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
 // CHECK-X64: = zext i64 %Multiplier to i128
 // CHECK-X64: = zext i64 %Multiplicand to i128
 // CHECK-X64: = mul nuw i128 %
diff --git a/test/CodeGen/ms_abi.c b/test/CodeGen/ms_abi.c
index 407087e..75e1caf 100644
--- a/test/CodeGen/ms_abi.c
+++ b/test/CodeGen/ms_abi.c
@@ -13,7 +13,7 @@
 void __attribute__((sysv_abi)) f2(void);
 void f3(void) {
   // FREEBSD-LABEL: define void @f3()
-  // WIN64-LABEL: define void @f3()
+  // WIN64-LABEL: define dso_local void @f3()
   f1();
   // FREEBSD: call win64cc void @f1()
   // WIN64: call void @f1()
@@ -23,13 +23,13 @@
 }
 // FREEBSD: declare win64cc void @f1()
 // FREEBSD: declare void @f2()
-// WIN64: declare void @f1()
-// WIN64: declare x86_64_sysvcc void @f2()
+// WIN64: declare dso_local void @f1()
+// WIN64: declare dso_local x86_64_sysvcc void @f2()
 
 // Win64 ABI varargs
 void __attribute__((ms_abi)) f4(int a, ...) {
   // FREEBSD-LABEL: define win64cc void @f4
-  // WIN64-LABEL: define void @f4
+  // WIN64-LABEL: define dso_local void @f4
   __builtin_ms_va_list ap;
   __builtin_ms_va_start(ap, a);
   // FREEBSD: %[[AP:.*]] = alloca i8*
@@ -79,7 +79,7 @@
 
 // Let's verify that normal va_lists work right on Win64, too.
 void f5(int a, ...) {
-  // WIN64-LABEL: define void @f5
+  // WIN64-LABEL: define dso_local void @f5
   __builtin_va_list ap;
   __builtin_va_start(ap, a);
   // WIN64: %[[AP:.*]] = alloca i8*
@@ -110,7 +110,7 @@
 void __attribute__((sysv_abi)) f6(__builtin_ms_va_list ap) {
   // FREEBSD-LABEL: define void @f6
   // FREEBSD: store i8* %ap, i8** %[[AP:.*]]
-  // WIN64-LABEL: define x86_64_sysvcc void @f6
+  // WIN64-LABEL: define dso_local x86_64_sysvcc void @f6
   // WIN64: store i8* %ap, i8** %[[AP:.*]]
   int b = __builtin_va_arg(ap, int);
   // FREEBSD: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
@@ -146,3 +146,16 @@
   // WIN64: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
   // WIN64-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
 }
+
+// This test checks if structs are passed according to Win64 calling convention
+// when it's enforced by __attribute((ms_abi)).
+struct i128 {
+  unsigned long long a;
+  unsigned long long b;
+};
+
+__attribute__((ms_abi)) struct i128 f7(struct i128 a) {
+  // WIN64: define dso_local void @f7(%struct.i128* noalias sret %agg.result, %struct.i128* %a)
+  // FREEBSD: define win64cc void @f7(%struct.i128* noalias sret %agg.result, %struct.i128* %a)
+  return a;
+}
diff --git a/test/CodeGen/ms_abi_aarch64.c b/test/CodeGen/ms_abi_aarch64.c
index 4aa93f0..5b6cd6a 100644
--- a/test/CodeGen/ms_abi_aarch64.c
+++ b/test/CodeGen/ms_abi_aarch64.c
@@ -5,7 +5,7 @@
 void f2(void);
 void f3(void) {
   // LINUX-LABEL: define void @f3()
-  // WIN64-LABEL: define void @f3()
+  // WIN64-LABEL: define dso_local void @f3()
   f1();
   // LINUX: call win64cc void @f1()
   // WIN64: call void @f1()
@@ -15,13 +15,13 @@
 }
 // LINUX: declare win64cc void @f1()
 // LINUX: declare void @f2()
-// WIN64: declare void @f1()
-// WIN64: declare void @f2()
+// WIN64: declare dso_local void @f1()
+// WIN64: declare dso_local void @f2()
 
 // Win64 ABI varargs
 void __attribute__((ms_abi)) f4(int a, ...) {
   // LINUX-LABEL: define win64cc void @f4
-  // WIN64-LABEL: define void @f4
+  // WIN64-LABEL: define dso_local void @f4
   __builtin_ms_va_list ap;
   __builtin_ms_va_start(ap, a);
   // LINUX: %[[AP:.*]] = alloca i8*
@@ -50,7 +50,7 @@
 
 // Let's verify that normal va_lists work right on Win64, too.
 void f5(int a, ...) {
-  // WIN64-LABEL: define void @f5
+  // WIN64-LABEL: define dso_local void @f5
   __builtin_va_list ap;
   __builtin_va_start(ap, a);
   // WIN64: %[[AP:.*]] = alloca i8*
diff --git a/test/CodeGen/ms_struct-long-double.c b/test/CodeGen/ms_struct-long-double.c
new file mode 100644
index 0000000..9b3ea79
--- /dev/null
+++ b/test/CodeGen/ms_struct-long-double.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -emit-llvm-only -triple i686-windows-gnu -fdump-record-layouts %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts -Wno-incompatible-ms-struct %s | FileCheck %s
+// RUN: not %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts %s 2>&1 | FileCheck %s -check-prefix=ERROR
+
+struct ldb_struct {
+  char c;
+  long double ldb;
+} __attribute__((__ms_struct__));
+
+struct ldb_struct a;
+
+// CHECK:             0 | struct ldb_struct
+// CHECK-NEXT:        0 |   char c
+// CHECK-NEXT:        4 |   long double ldb
+// CHECK-NEXT:          | [sizeof=16, align=4]
+
+// ERROR: error: ms_struct may not produce Microsoft-compatible layouts with fundamental data types with sizes that aren't a power of two
diff --git a/test/CodeGen/ms_this.cpp b/test/CodeGen/ms_this.cpp
index 8647a5b..3776a6e 100644
--- a/test/CodeGen/ms_this.cpp
+++ b/test/CodeGen/ms_this.cpp
@@ -13,7 +13,7 @@
   void runc();
 };
 
-// CHECK: define void @"\01?runc@t2@@
+// CHECK: define dso_local void @"\01?runc@t2@@
 void t2::runc() {
   double num = 0;
   __asm {
@@ -26,7 +26,7 @@
 	   };
 }
 
-// CHECK: define void @"\01?runc@t1@@
+// CHECK: define dso_local void @"\01?runc@t1@@
 void t1::runc() {
   double num = 0;
   __asm {
@@ -41,7 +41,7 @@
 
 struct s {
   int a;
-  // CHECK: define linkonce_odr void @"\01?func@s@@
+  // CHECK: define linkonce_odr dso_local void @"\01?func@s@@
   void func() {
     __asm mov rax, [this]
     // CHECK: [[THIS_ADDR_S:%.+]] = alloca %struct.s*
diff --git a/test/CodeGen/no-bitfield-type-align.c b/test/CodeGen/no-bitfield-type-align.c
new file mode 100644
index 0000000..830cd58
--- /dev/null
+++ b/test/CodeGen/no-bitfield-type-align.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -fno-bitfield-type-align -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: %[[STRUCT_S:.*]] = type { i32 }
+
+struct S {
+  unsigned short:   0;
+  unsigned short  f1:15;
+  unsigned short:   0;
+  unsigned short  f2:15;
+};
+
+// CHECK: define void @test_zero_width_bitfield(%[[STRUCT_S]]* %[[A:.*]])
+// CHECK: %[[BF_LOAD:.*]] = load i32, i32* %[[V1:.*]], align 1
+// CHECK: %[[BF_CLEAR:.*]] = and i32 %[[BF_LOAD]], 32767
+// CHECK: %[[BF_CAST:.*]] = trunc i32 %[[BF_CLEAR]] to i16
+// CHECK: %[[CONV:.*]] = zext i16 %[[BF_CAST]] to i32
+// CHECK: %[[ADD:.*]] = add nsw i32 %[[CONV]], 1
+// CHECK: %[[CONV1:.*]] = trunc i32 %[[ADD]] to i16
+// CHECK: %[[V2:.*]] = zext i16 %[[CONV1]] to i32
+// CHECK: %[[BF_LOAD2:.*]] = load i32, i32* %[[V1]], align 1
+// CHECK: %[[BF_VALUE:.*]] = and i32 %[[V2]], 32767
+// CHECK: %[[BF_CLEAR3:.*]] = and i32 %[[BF_LOAD2]], -32768
+// CHECK: %[[BF_SET:.*]] = or i32 %[[BF_CLEAR3]], %[[BF_VALUE]]
+// CHECK: store i32 %[[BF_SET]], i32* %[[V1]], align 1
+
+// CHECK: %[[BF_LOAD4:.*]] = load i32, i32* %[[V4:.*]], align 1
+// CHECK: %[[BF_LSHR:.*]] = lshr i32 %[[BF_LOAD4]], 15
+// CHECK: %[[BF_CLEAR5:.*]] = and i32 %[[BF_LSHR]], 32767
+// CHECK: %[[BF_CAST6:.*]] = trunc i32 %[[BF_CLEAR5]] to i16
+// CHECK: %[[CONV7:.*]] = zext i16 %[[BF_CAST6]] to i32
+// CHECK: %[[ADD8:.*]] = add nsw i32 %[[CONV7]], 2
+// CHECK: %[[CONV9:.*]] = trunc i32 %[[ADD8]] to i16
+// CHECK: %[[V5:.*]] = zext i16 %[[CONV9]] to i32
+// CHECK: %[[BF_LOAD10:.*]] = load i32, i32* %[[V4]], align 1
+// CHECK: %[[BF_VALUE11:.*]] = and i32 %[[V5]], 32767
+// CHECK: %[[BF_SHL:.*]] = shl i32 %[[BF_VALUE11]], 15
+// CHECK: %[[BF_CLEAR12:.*]] = and i32 %[[BF_LOAD10]], -1073709057
+// CHECK: %[[BF_SET13:.*]] = or i32 %[[BF_CLEAR12]], %[[BF_SHL]]
+// CHECK: store i32 %[[BF_SET13]], i32* %[[V4]], align 1
+
+void test_zero_width_bitfield(struct S *a) {
+  a->f1 += 1;
+  a->f2 += 2;
+}
diff --git a/test/CodeGen/no-common.c b/test/CodeGen/no-common.c
index 8d2c4d7..f3a6a33 100644
--- a/test/CodeGen/no-common.c
+++ b/test/CodeGen/no-common.c
@@ -1,15 +1,15 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-DEFAULT
 // RUN: %clang_cc1 %s -fno-common -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-NOCOMMON
 
-// CHECK-DEFAULT: @x = common global
-// CHECK-NOCOMMON: @x = global
+// CHECK-DEFAULT: @x = common {{(dso_local )?}}global
+// CHECK-NOCOMMON: @x = {{(dso_local )?}}global
 int x;
 
-// CHECK-DEFAULT: @ABC = global
-// CHECK-NOCOMMON: @ABC = global
+// CHECK-DEFAULT: @ABC = {{(dso_local )?}}global
+// CHECK-NOCOMMON: @ABC = {{(dso_local )?}}global
 typedef void* (*fn_t)(long a, long b, char *f, int c);
 fn_t ABC __attribute__ ((nocommon));
 
-// CHECK-DEFAULT: @y = common global
-// CHECK-NOCOMMON: @y = common global
+// CHECK-DEFAULT: @y = common {{(dso_local )?}}global
+// CHECK-NOCOMMON: @y = common {{(dso_local )?}}global
 int y __attribute__((common));
diff --git a/test/CodeGen/no-opt-volatile-memcpy.c b/test/CodeGen/no-opt-volatile-memcpy.c
index bf98df3..2781475 100644
--- a/test/CodeGen/no-opt-volatile-memcpy.c
+++ b/test/CodeGen/no-opt-volatile-memcpy.c
@@ -18,10 +18,10 @@
 // CHECK: %[[LS:.*]] = alloca %struct.s, align 4
 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8*
 // CHECK-NEXT: %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[ZERO]], i8* %[[ONE]], i64 132, i32 4, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[ZERO]], i8* align 4 %[[ONE]], i64 132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 // CHECK-NEXT: %[[TWO:.*]] = bitcast %struct.s* %[[LS]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[TWO]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 
 
 struct s1 {
@@ -35,6 +35,6 @@
   s.y = gs;
 }
 // CHECK-LABEL: define void @fee()
-// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 
diff --git a/test/CodeGen/noplt.c b/test/CodeGen/noplt.c
new file mode 100644
index 0000000..20e2e07
--- /dev/null
+++ b/test/CodeGen/noplt.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -emit-llvm -fno-plt %s -o - | FileCheck %s -check-prefix=CHECK-NOPLT -check-prefix=CHECK-NOPLT-METADATA
+
+// CHECK-NOPLT: Function Attrs: nonlazybind
+// CHECK-NOPLT-NEXT: declare {{.*}}i32 @foo
+// CHECK-NOPLT-METADATA: !"RtLibUseGOT"
+int foo();
+
+int bar() {
+  return foo();
+}
diff --git a/test/CodeGen/overloadable.c b/test/CodeGen/overloadable.c
index 1feb12f..f4fd0b3 100644
--- a/test/CodeGen/overloadable.c
+++ b/test/CodeGen/overloadable.c
@@ -41,7 +41,7 @@
 void addrof_single(char *a) __attribute__((overloadable, enable_if(0, "")));
 void addrof_single(char *a) __attribute__((overloadable));
 
-// CHECK-LABEL: define void @foo
+// CHECK-LABEL: define {{(dso_local )?}}void @foo
 void foo() {
   // CHECK: store void (i8*)* @_Z11addrof_manyPc
   void (*p1)(char *) = &addrof_many;
@@ -64,7 +64,7 @@
 void ovl_bar(char *) __attribute__((overloadable));
 void ovl_bar(int) __attribute__((overloadable));
 
-// CHECK-LABEL: define void @bar
+// CHECK-LABEL: define {{(dso_local )?}}void @bar
 void bar() {
   char charbuf[1];
   unsigned char ucharbuf[1];
@@ -79,7 +79,7 @@
 void ovl_baz(unsigned int *, unsigned int) __attribute__((overloadable));
 void ovl_baz2(int, int *) __attribute__((overloadable));
 void ovl_baz2(unsigned int, unsigned int *) __attribute__((overloadable));
-// CHECK-LABEL: define void @baz
+// CHECK-LABEL: define {{(dso_local )?}}void @baz
 void baz() {
   unsigned int j;
   // Initial rules for incompatible pointer conversions made this overload
diff --git a/test/CodeGen/packed-nest-unpacked.c b/test/CodeGen/packed-nest-unpacked.c
index e2bbd41..4124a8a 100644
--- a/test/CodeGen/packed-nest-unpacked.c
+++ b/test/CodeGen/packed-nest-unpacked.c
@@ -9,26 +9,26 @@
 // <rdar://problem/10463337>
 struct X test1() {
   // CHECK: @test1
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   return g.y;
 }
 struct X test2() {
   // CHECK: @test2
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   struct X a = g.y;
   return a;
 }
 
 void test3(struct X a) {
   // CHECK: @test3
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i1 false)
   g.y = a;
 }
 
 // <rdar://problem/10530444>
 void test4() {
   // CHECK: @test4
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   f(g.y);
 }
 
@@ -42,7 +42,7 @@
 // <rdar://problem/11220251>
 void test6() {
   // CHECK: @test6
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* %{{.*}}, i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* align 4 %{{.*}}, i64 24, i1 false)
   g.y = foo();
 }
 
diff --git a/test/CodeGen/packed-structure.c b/test/CodeGen/packed-structure.c
index 7d1183d..91cacb4 100644
--- a/test/CodeGen/packed-structure.c
+++ b/test/CodeGen/packed-structure.c
@@ -29,7 +29,7 @@
 // CHECK-FUNCTIONS: ret i32 [[s0_load_y]]
 int s0_load_y(struct s0 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s0_copy
-// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 4, i1 false)
+// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 {{.*}}, i8* align 4 {{.*}}, i64 8, i1 false)
 void s0_copy(struct s0 *a, struct s0 *b) { *b = *a; }
 
 //
@@ -55,7 +55,7 @@
 // CHECK-FUNCTIONS: ret i32 [[s1_load_y]]
 int s1_load_y(struct s1 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s1_copy
-// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 1, i1 false)
+// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
 void s1_copy(struct s1 *a, struct s1 *b) { *b = *a; }
 
 //
@@ -83,7 +83,7 @@
 // CHECK-FUNCTIONS: ret i32 [[s2_load_y]]
 int s2_load_y(struct s2 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s2_copy
-// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 2, i1 false)
+// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 {{.*}}, i8* align 2 {{.*}}, i64 8, i1 false)
 void s2_copy(struct s2 *a, struct s2 *b) { *b = *a; }
 
 struct __attribute__((packed, aligned)) s3 {
diff --git a/test/CodeGen/partial-reinitialization2.c b/test/CodeGen/partial-reinitialization2.c
index c4f6567..9c3ce99 100644
--- a/test/CodeGen/partial-reinitialization2.c
+++ b/test/CodeGen/partial-reinitialization2.c
@@ -15,7 +15,7 @@
 // CHECK-LABEL: test1
 void test1(void)
 {
-  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false)
   // CHECK: store i8 120, i8* %
 
   struct LP1 l = { .p1 = g1, .p1.x[2] = 'x' };
@@ -24,7 +24,7 @@
 // CHECK-LABEL: test2
 void test2(void)
 {
-  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false)
   // CHECK: store i8 114, i8* %
 
   struct LP1 l = { .p1 = g1, .p1.x[1] = 'r' };
@@ -33,7 +33,7 @@
 // CHECK-LABEL: test3
 void test3(void)
 {
-  // CHECK: call void @llvm.memcpy{{.*}}%struct.P2* @g2{{.*}}i64 12, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}%struct.P2* @g2{{.*}}i64 12, i1 false)
   // CHECK: store i32 10, i32* %
 
   struct LP2 l = { .p2 = g2, .p2.b = 10 };
@@ -66,7 +66,7 @@
   // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get123()
   // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9]+]]
   // CHECK: [[TMP1:%[a-z0-9]+]] = bitcast {{.*}}[[TMP0]]
-  // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i1 false)
   // CHECK: store i32 100, i32* %
 
   struct LUP2 { union UP2 up; } var = { get123(), .up.p2.a = 100 };
@@ -76,12 +76,12 @@
 void test5(void)
 {
   // .l3 = g3
-  // CHECK: call void @llvm.memcpy{{.*}}%struct.LP3, %struct.LP3* @g3{{.*}}i64 12, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}%struct.LP3, %struct.LP3* @g3{{.*}}i64 12, i1 false)
 
   // .l3.p1 = { [0] = g1 } implicitly sets [1] to zero
-  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false)
   // CHECK: getelementptr{{.*}}%struct.P1, %struct.P1*{{.*}}i64 1
-  // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i1 false)
 
   // .l3.p1[1].x[1] = 'x'
   // CHECK: store i8 120, i8* %
@@ -98,7 +98,7 @@
   // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
   // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9]+]]
   // CHECK: [[TMP1:%[a-z0-9]+]] = bitcast {{.*}}[[TMP0]]
-  // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i32 {{[0-9]}}, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i1 false)
 
   // CHECK: store i32 10, i32* %
 
diff --git a/test/CodeGen/pgo-sample-thinlto-summary.c b/test/CodeGen/pgo-sample-thinlto-summary.c
index 7045db0..eae35a0 100644
--- a/test/CodeGen/pgo-sample-thinlto-summary.c
+++ b/test/CodeGen/pgo-sample-thinlto-summary.c
@@ -13,8 +13,8 @@
     g += baz(i);
 }
 
-// SAMPLEPGO-LABEL: define void @bar
-// THINLTO-LABEL: define void @bar
+// SAMPLEPGO-LABEL: define {{(dso_local )?}}void @bar
+// THINLTO-LABEL: define {{(dso_local )?}}void @bar
 // SAMPLEPGO-NOT: call{{.*}}foo
 // THINLTO: call{{.*}}foo
 void bar(int n) {
@@ -23,8 +23,8 @@
 }
 
 // Checks if loop unroll is invoked by normal compile, but not thinlto compile.
-// SAMPLEPGO-LABEL: define void @unroll
-// THINLTO-LABEL: define void @unroll
+// SAMPLEPGO-LABEL: define {{(dso_local )?}}void @unroll
+// THINLTO-LABEL: define {{(dso_local )?}}void @unroll
 // SAMPLEPGO: call{{.*}}baz
 // SAMPLEPGO: call{{.*}}baz
 // THINLTO: call{{.*}}baz
@@ -35,8 +35,8 @@
 }
 
 // Checks that icp is not invoked for ThinLTO, but invoked for normal samplepgo.
-// SAMPLEPGO-LABEL: define void @icp
-// THINLTO-LABEL: define void @icp
+// SAMPLEPGO-LABEL: define {{(dso_local )?}}void @icp
+// THINLTO-LABEL: define {{(dso_local )?}}void @icp
 // SAMPLEPGO: if.true.direct_targ
 // FIXME: the following condition needs to be reversed once
 //        LTOPreLinkDefaultPipeline is customized.
diff --git a/test/CodeGen/ppc-varargs-struct.c b/test/CodeGen/ppc-varargs-struct.c
index d7936a1..c201074 100644
--- a/test/CodeGen/ppc-varargs-struct.c
+++ b/test/CodeGen/ppc-varargs-struct.c
@@ -54,7 +54,7 @@
 // CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[VAARG_ADDR]]
 // CHECK-PPC-NEXT:  [[DEST:%[0-9]+]] = bitcast %struct.x* %t to i8*
 // CHECK-PPC-NEXT:  [[SRC:%.+]] = bitcast %struct.x* [[AGGR]] to i8*
-// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST]], i8* [[SRC]], i32 16, i32 8, i1 false)
+// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i1 false)
 
   int v = va_arg (ap, int);
   
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c
index 5894a6a..3cf3e04 100644
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -56,7 +56,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test1*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test1* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test1* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 8, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 8, i1 false)
 struct test1 test1va (int x, ...)
 {
   struct test1 y;
@@ -79,7 +79,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test2*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test2* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test2* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false)
 struct test2 test2va (int x, ...)
 {
   struct test2 y;
@@ -102,7 +102,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test3*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test3* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test3* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[DEST]], i8* align 16 [[SRC]], i64 32, i1 false)
 struct test3 test3va (int x, ...)
 {
   struct test3 y;
@@ -121,7 +121,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test4*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test4* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test4* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 12, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 12, i1 false)
 struct test4 test4va (int x, ...)
 {
   struct test4 y;
@@ -140,7 +140,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test_longdouble*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test_longdouble* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 8 [[SRC]], i64 16, i1 false)
 struct test_longdouble { long double x; };
 struct test_longdouble testva_longdouble (int x, ...)
 {
@@ -164,7 +164,7 @@
 // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test_vector*
 // CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %y to i8*
 // CHECK: [[SRC:%.*]] = bitcast %struct.test_vector* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false)
 struct test_vector { vector int x; };
 struct test_vector testva_vector (int x, ...)
 {
diff --git a/test/CodeGen/ppc64-soft-float.c b/test/CodeGen/ppc64-soft-float.c
index 95b1829..84ac2d5 100644
--- a/test/CodeGen/ppc64-soft-float.c
+++ b/test/CodeGen/ppc64-soft-float.c
@@ -92,7 +92,7 @@
 // CHECK-BE: %[[TMP0:[^ ]+]] = alloca %struct.f3, align 4
 // CHECK: %[[TMP1:[^ ]+]] = alloca [2 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [2 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f3* @global_f3 to i8*), i64 12, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f3* @global_f3 to i8*), i64 12, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP1]]
 // CHECK-LE: call { i64, i64 } @func_f3([2 x i64] %[[TMP3]])
 // CHECK-BE: call void @func_f3(%struct.f3* sret %[[TMP0]], [2 x i64] %[[TMP3]])
@@ -111,7 +111,7 @@
 // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f5, align 4
 // CHECK: %[[TMP1:[^ ]+]] = alloca [3 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [3 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f5* @global_f5 to i8*), i64 20, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f5* @global_f5 to i8*), i64 20, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [3 x i64], [3 x i64]* %[[TMP1]]
 // CHECK: call void @func_f5(%struct.f5* sret %[[TMP0]], [3 x i64] %[[TMP3]])
 struct f5 global_f5;
@@ -128,7 +128,7 @@
 // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f7, align 4
 // CHECK: %[[TMP1:[^ ]+]] = alloca [4 x i64], align 8
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [4 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f7* @global_f7 to i8*), i64 28, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f7* @global_f7 to i8*), i64 28, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [4 x i64], [4 x i64]* %[[TMP1]], align 8
 // CHECK: call void @func_f7(%struct.f7* sret %[[TMP0]], [4 x i64] %[[TMP3]])
 struct f7 global_f7;
@@ -144,7 +144,7 @@
 // CHECK-LABEL: @call_f9
 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
 // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
@@ -162,7 +162,7 @@
 // CHECK-BE: %[[TMPX:[^ ]+]] = alloca %struct.fabc, align 4
 // CHECK: %[[TMP0:[^ ]+]] = alloca [2 x i64], align 8
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [2 x i64]* %[[TMP0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.fabc* @global_fabc to i8*), i64 12, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.fabc* @global_fabc to i8*), i64 12, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP0]], align 8
 // CHECK-LE: %call = call { i64, i64 } @func_fabc([2 x i64] %[[TMP3]])
 // CHECK-BE: call void @func_fabc(%struct.fabc* sret %[[TMPX]], [2 x i64] %[[TMP3]])
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
index f78f26a..a7780be 100644
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -104,7 +104,7 @@
 // CHECK-LABEL: @call_f9
 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
 // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
diff --git a/test/CodeGen/pr19841.cpp b/test/CodeGen/pr19841.cpp
index 8fef683..282b2e6 100644
--- a/test/CodeGen/pr19841.cpp
+++ b/test/CodeGen/pr19841.cpp
@@ -12,7 +12,7 @@
   unsigned char _highlightColorTableVGA[];
   static const unsigned char b[];
 };
-// CHECK: [[Common_A_b:@[^ ]+]] = constant [1 x i8] zeroinitializer
+// CHECK: [[Common_A_b:@[^ ]+]] = {{(dso_local )?}}constant [1 x i8] zeroinitializer
 class B {
 public:
   Common::RenderMode _configRenderMode;
diff --git a/test/CodeGen/pr4349.c b/test/CodeGen/pr4349.c
index e39dc2c..9ec6f2f 100644
--- a/test/CodeGen/pr4349.c
+++ b/test/CodeGen/pr4349.c
@@ -16,22 +16,22 @@
 {
     void *ptr;
 };
-// CHECK: @svars1 = global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
+// CHECK: @svars1 = {{(dso_local )?}}global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
 struct svar svars1[] =
 {
     { &((cpu.pc).w[0]) }
 };
-// CHECK: @svars2 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 1) }]
+// CHECK: @svars2 = {{(dso_local )?}}global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 1) }]
 struct svar svars2[] =
 {
     { &((cpu.pc).b[0][1]) }
 };
-// CHECK: @svars3 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 2) }]
+// CHECK: @svars3 = {{(dso_local )?}}global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 2) }]
 struct svar svars3[] =
 {
     { &((cpu.pc).w[1]) }
 };
-// CHECK: @svars4 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 3) }]
+// CHECK: @svars4 = {{(dso_local )?}}global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 3) }]
 struct svar svars4[] =
 {
     { &((cpu.pc).b[1][1]) }
diff --git a/test/CodeGen/pragma-comment.c b/test/CodeGen/pragma-comment.c
index fae9b8f..1896e5c 100644
--- a/test/CodeGen/pragma-comment.c
+++ b/test/CodeGen/pragma-comment.c
@@ -23,10 +23,9 @@
 // CHECK: ![[bar]] = !{!" /bar=2"}
 // CHECK: ![[foo]] = !{!" /foo=\22foo bar\22"}
 
-// LINUX: !{!"-lmsvcrt.lib"}
-// LINUX: !{!"-lkernel32"}
-// LINUX: !{!"-lUSER32.LIB"}
-// LINUX: !{!" /bar=2"}
+// LINUX: !{!"lib", !"msvcrt.lib"}
+// LINUX: !{!"lib", !"kernel32"}
+// LINUX: !{!"lib", !"USER32.LIB"}
 
 // PS4: !{!"\01msvcrt.lib"}
 // PS4: !{!"\01kernel32"}
diff --git a/test/CodeGen/preserve-call-conv.c b/test/CodeGen/preserve-call-conv.c
index b67e29f..2dff34e 100644
--- a/test/CodeGen/preserve-call-conv.c
+++ b/test/CodeGen/preserve-call-conv.c
@@ -8,13 +8,13 @@
 // is lowered to the corresponding calling convention attrribute at the LLVM IR
 // level.
 void foo() __attribute__((preserve_most)) {
-  // CHECK-LABEL: define preserve_mostcc void @foo()
+  // CHECK-LABEL: define {{(dso_local )?}}preserve_mostcc void @foo()
 }
 
 // Check that the preserve_most calling convention attribute at the source level
 // is lowered to the corresponding calling convention attrribute at the LLVM IR
 // level.
 void boo() __attribute__((preserve_all)) {
-  // CHECK-LABEL: define preserve_allcc void @boo()
+  // CHECK-LABEL: define {{(dso_local )?}}preserve_allcc void @boo()
 }
 
diff --git a/test/CodeGen/push-hidden-visibility-subclass.cpp b/test/CodeGen/push-hidden-visibility-subclass.cpp
new file mode 100644
index 0000000..82bf653
--- /dev/null
+++ b/test/CodeGen/push-hidden-visibility-subclass.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-linux-unknown -emit-llvm %s -o - | FileCheck %s
+
+#pragma GCC visibility push(hidden)
+
+struct Base {
+  virtual ~Base() = default;
+  virtual void* Alloc() = 0;
+};
+
+class Child : public Base {
+public:
+  Child() = default;
+  void* Alloc();
+};
+
+void test() {
+  Child x;
+}
+
+// CHECK: @_ZTV5Child = external hidden unnamed_addr constant
diff --git a/test/CodeGen/rdpid-builtins.c b/test/CodeGen/rdpid-builtins.c
new file mode 100644
index 0000000..35516b0
--- /dev/null
+++ b/test/CodeGen/rdpid-builtins.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -target-feature +rdpid -emit-llvm -o - %s | FileCheck %s
+
+
+#include <x86intrin.h>
+
+unsigned int test_rdpid_u32(void) {
+// CHECK-LABEL: @test_rdpid_u32
+// CHECK: call i32 @llvm.x86.rdpid
+  return _rdpid_u32();
+}
diff --git a/test/CodeGen/regcall.c b/test/CodeGen/regcall.c
index 350a82d..b738990 100644
--- a/test/CodeGen/regcall.c
+++ b/test/CodeGen/regcall.c
@@ -6,34 +6,34 @@
 #include <xmmintrin.h>
 
 void __regcall v1(int a, int b) {}
-// Win32: define x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b)
-// Win64: define x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b)
+// Win64: define dso_local x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b)
 // Lin32: define x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b)
 // Lin64: define x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b)
 
 void __attribute__((regcall)) v1b(int a, int b) {}
-// Win32: define x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b)
-// Win64: define x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b)
+// Win64: define dso_local x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b)
 // Lin32: define x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b)
 // Lin64: define x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b)
 
 void __regcall v2(char a, char b) {}
-// Win32: define x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b)
-// Win64: define x86_regcallcc void @__regcall3__v2(i8 %a, i8 %b)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b)
+// Win64: define dso_local x86_regcallcc void @__regcall3__v2(i8 %a, i8 %b)
 // Lin32: define x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b)
 // Lin64: define x86_regcallcc void @__regcall3__v2(i8 signext %a, i8 signext %b)
 
 struct Small { int x; };
 void __regcall v3(int a, struct Small b, int c) {}
-// Win32: define x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 %b.0, i32 inreg %c)
-// Win64: define x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 %b.0, i32 inreg %c)
+// Win64: define dso_local x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c)
 // Lin32: define x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 inreg, i32 %b.0, i32 inreg %c)
 // Lin64: define x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c)
 
 struct Large { int a[5]; };
 void __regcall v4(int a, struct Large b, int c) {}
-// Win32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
-// Win64: define x86_regcallcc void @__regcall3__v4(i32 %a, %struct.Large* %b, i32 %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// Win64: define dso_local x86_regcallcc void @__regcall3__v4(i32 %a, %struct.Large* %b, i32 %c)
 // Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 %c)
 // Lin64: define x86_regcallcc void @__regcall3__v4(i32 %a, [5 x i32] %b.coerce, i32 %c)
 
@@ -42,8 +42,8 @@
 struct HFA5 { double v, w, x, y, z; };
 
 void __regcall hfa1(int a, struct HFA4 b, int c) {}
-// Win32: define x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
-// Win64: define x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c)
 // Lin32: define x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
 // Lin64: define x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, i32 %c)
 
@@ -51,16 +51,16 @@
 // indirectly. Additional vector arguments can consume the rest of the SSE
 // registers.
 void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
-// Win32: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg)
-// Win64: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double %c)
 // Lin32: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg)
 // Lin64: define x86_regcallcc void @__regcall3__hfa2(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, double %c)
 
 // Ensure that we pass builtin types directly while counting them against the
 // SSE register usage.
 void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
-// Win32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
-// Win64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
 // Lin32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
 // Lin64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.coerce0, double %f.coerce1)
 
@@ -68,16 +68,16 @@
 // Because they are not classified as homogeneous, they don't get special
 // handling to ensure alignment.
 void __regcall hfa4(struct HFA5 a) {}
-// Win32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4)
-// Win64: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* %a)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* %a)
 // Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4 %a)
 // Lin64: define x86_regcallcc void @__regcall3__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4)
 
 // Return HFAs of 4 or fewer elements in registers.
 static struct HFA2 g_hfa2;
 struct HFA2 __regcall hfa5(void) { return g_hfa2; }
-// Win32: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
-// Win64: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
+// Win32: define dso_local x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
+// Win64: define dso_local x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
 // Lin32: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
 // Lin64: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5()
 
@@ -86,20 +86,20 @@
 struct HVA4 { v4f32 w, x, y, z; };
 
 void __regcall hva1(int a, struct HVA4 b, int c) {}
-// Win32: define x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
-// Win64: define x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c)
 // Lin32: define x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
 // Lin64: define x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, i32 %c)
 
 void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
-// Win32: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg)
-// Win64: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c)
 // Lin32: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg)
 // Lin64: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.coerce0, <4 x float> %a.coerce1, <4 x float> %a.coerce2, <4 x float> %a.coerce3, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, <4 x float> %c)
 
 void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
-// Win32: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1)
-// Win64: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1)
+// Win64: define dso_local x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1)
 // Lin32: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1)
 // Lin64: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.coerce0, <4 x float> %f.coerce1)
 
@@ -107,14 +107,14 @@
 struct OddSizeHVA { v3f32 x, y; };
 
 void __regcall odd_size_hva(struct OddSizeHVA a) {}
-// Win32: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
-// Win64: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
+// Win32: define dso_local x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
+// Win64: define dso_local x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
 // Lin32: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
 // Lin64: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.coerce0, <3 x float> %a.coerce1)
 
 struct HFA6 { __m128 f[4]; };
 struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;}
-// Win32: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d)
-// Win64: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3)
+// Win32: define dso_local x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d)
+// Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3)
 // Lin32: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d)
 // Lin64: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce)
diff --git a/test/CodeGen/riscv32-abi.c b/test/CodeGen/riscv32-abi.c
new file mode 100644
index 0000000..04eceb3
--- /dev/null
+++ b/test/CodeGen/riscv32-abi.c
@@ -0,0 +1,430 @@
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm -fforce-enable-int128 %s -o - \
+// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128
+
+#include <stddef.h>
+#include <stdint.h>
+
+// CHECK-LABEL: define void @f_void()
+void f_void(void) {}
+
+// Scalar arguments and return values smaller than the word size are extended
+// according to the sign of their type, up to 32 bits
+
+// CHECK-LABEL: define zeroext i1 @f_scalar_0(i1 zeroext %x)
+_Bool f_scalar_0(_Bool x) { return x; }
+
+// CHECK-LABEL: define signext i8 @f_scalar_1(i8 signext %x)
+int8_t f_scalar_1(int8_t x) { return x; }
+
+// CHECK-LABEL: define zeroext i8 @f_scalar_2(i8 zeroext %x)
+uint8_t f_scalar_2(uint8_t x) { return x; }
+
+// CHECK-LABEL: define i32 @f_scalar_3(i32 %x)
+int32_t f_scalar_3(int32_t x) { return x; }
+
+// CHECK-LABEL: define i64 @f_scalar_4(i64 %x)
+int64_t f_scalar_4(int64_t x) { return x; }
+
+#ifdef __SIZEOF_INT128__
+// CHECK-FORCEINT128-LABEL: define i128 @f_scalar_5(i128 %x)
+__int128_t f_scalar_5(__int128_t x) { return x; }
+#endif
+
+// CHECK-LABEL: define float @f_fp_scalar_1(float %x)
+float f_fp_scalar_1(float x) { return x; }
+
+// CHECK-LABEL: define double @f_fp_scalar_2(double %x)
+double f_fp_scalar_2(double x) { return x; }
+
+// Scalars larger than 2*xlen are passed/returned indirect. However, the
+// RISC-V LLVM backend can handle this fine, so the function doesn't need to
+// be modified.
+
+// CHECK-LABEL: define fp128 @f_fp_scalar_3(fp128 %x)
+long double f_fp_scalar_3(long double x) { return x; }
+
+// Empty structs or unions are ignored.
+
+struct empty_s {};
+
+// CHECK-LABEL: define void @f_agg_empty_struct()
+struct empty_s f_agg_empty_struct(struct empty_s x) {
+  return x;
+}
+
+union empty_u {};
+
+// CHECK-LABEL: define void @f_agg_empty_union()
+union empty_u f_agg_empty_union(union empty_u x) {
+  return x;
+}
+
+// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
+// integer arguments. The rules for return are the same.
+
+struct tiny {
+  uint8_t a, b, c, d;
+};
+
+// CHECK-LABEL: define void @f_agg_tiny(i32 %x.coerce)
+void f_agg_tiny(struct tiny x) {
+  x.a += x.b;
+  x.c += x.d;
+}
+
+// CHECK-LABEL: define i32 @f_agg_tiny_ret()
+struct tiny f_agg_tiny_ret() {
+  return (struct tiny){1, 2, 3, 4};
+}
+
+typedef uint8_t v4i8 __attribute__((vector_size(4)));
+typedef int32_t v1i32 __attribute__((vector_size(4)));
+
+// CHECK-LABEL: define void @f_vec_tiny_v4i8(i32 %x.coerce)
+void f_vec_tiny_v4i8(v4i8 x) {
+  x[0] = x[1];
+  x[2] = x[3];
+}
+
+// CHECK-LABEL: define i32 @f_vec_tiny_v4i8_ret()
+v4i8 f_vec_tiny_v4i8_ret() {
+  return (v4i8){1, 2, 3, 4};
+}
+
+// CHECK-LABEL: define void @f_vec_tiny_v1i32(i32 %x.coerce)
+void f_vec_tiny_v1i32(v1i32 x) {
+  x[0] = 114;
+}
+
+// CHECK-LABEL: define i32 @f_vec_tiny_v1i32_ret()
+v1i32 f_vec_tiny_v1i32_ret() {
+  return (v1i32){1};
+}
+
+struct small {
+  int32_t a, *b;
+};
+
+// CHECK-LABEL: define void @f_agg_small([2 x i32] %x.coerce)
+void f_agg_small(struct small x) {
+  x.a += *x.b;
+  x.b = &x.a;
+}
+
+// CHECK-LABEL: define [2 x i32] @f_agg_small_ret()
+struct small f_agg_small_ret() {
+  return (struct small){1, 0};
+}
+
+typedef uint8_t v8i8 __attribute__((vector_size(8)));
+typedef int64_t v1i64 __attribute__((vector_size(8)));
+
+// CHECK-LABEL: define void @f_vec_small_v8i8(i64 %x.coerce)
+void f_vec_small_v8i8(v8i8 x) {
+  x[0] = x[7];
+}
+
+// CHECK-LABEL: define i64 @f_vec_small_v8i8_ret()
+v8i8 f_vec_small_v8i8_ret() {
+  return (v8i8){1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+// CHECK-LABEL: define void @f_vec_small_v1i64(i64 %x.coerce)
+void f_vec_small_v1i64(v1i64 x) {
+  x[0] = 114;
+}
+
+// CHECK-LABEL: define i64 @f_vec_small_v1i64_ret()
+v1i64 f_vec_small_v1i64_ret() {
+  return (v1i64){1};
+}
+
+// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
+// single 2*xlen-sized argument, to ensure that alignment can be maintained if
+// passed on the stack.
+
+struct small_aligned {
+  int64_t a;
+};
+
+// CHECK-LABEL: define void @f_agg_small_aligned(i64 %x.coerce)
+void f_agg_small_aligned(struct small_aligned x) {
+  x.a += x.a;
+}
+
+// CHECK-LABEL: define i64 @f_agg_small_aligned_ret(i64 %x.coerce)
+struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) {
+  return (struct small_aligned){10};
+}
+
+// Aggregates greater > 2*xlen will be passed and returned indirectly
+struct large {
+  int32_t a, b, c, d;
+};
+
+// CHECK-LABEL: define void @f_agg_large(%struct.large* %x)
+void f_agg_large(struct large x) {
+  x.a = x.b + x.c + x.d;
+}
+
+// The address where the struct should be written to will be the first
+// argument
+// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret %agg.result, i32 %i, i8 signext %j)
+struct large f_agg_large_ret(int32_t i, int8_t j) {
+  return (struct large){1, 2, 3, 4};
+}
+
+typedef unsigned char v16i8 __attribute__((vector_size(16)));
+
+// CHECK-LABEL: define void @f_vec_large_v16i8(<16 x i8>*)
+void f_vec_large_v16i8(v16i8 x) {
+  x[0] = x[7];
+}
+
+// CHECK-LABEL: define void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret %agg.result)
+v16i8 f_vec_large_v16i8_ret() {
+  return (v16i8){1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+// Scalars passed on the stack should have signext/zeroext attributes (they
+// are anyext).
+
+// CHECK-LABEL: define i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
+                     struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) {
+  return g + h;
+}
+
+// CHECK-LABEL: define i32 @f_scalar_stack_2(i32 %a, i64 %b, float %c, double %d, fp128 %e, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_2(int32_t a, int64_t b, float c, double d, long double e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_3(int32_t a, int64_t b, double c, long double d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
+
+// CHECK-LABEL: define fp128 @f_scalar_stack_4(i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+long double f_scalar_stack_4(int32_t a, int64_t b, double c, long double d,
+                             uint8_t e, int8_t f, uint8_t g) {
+  return d;
+}
+
+// Aggregates and >=XLen scalars passed on the stack should be lowered just as
+// they would be if passed via registers.
+
+// CHECK-LABEL: define void @f_scalar_stack_5(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, float %g, double %h, fp128 %i)
+void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e,
+                      int64_t f, float g, double h, long double i) {}
+
+// CHECK-LABEL: define void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h)
+void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e,
+                 struct small f, struct small_aligned g, struct large h) {}
+
+// Ensure that ABI lowering happens as expected for vararg calls. For RV32
+// with the base integer calling convention there will be no observable
+// differences in the lowered IR for a call with varargs vs without.
+
+int f_va_callee(int, ...);
+
+// CHECK-LABEL: define void @f_va_caller()
+// CHECK: call i32 (i32, ...) @f_va_callee(i32 1, i32 2, i64 3, double 4.000000e+00, double 5.000000e+00, i32 {{%.*}}, [2 x i32] {{%.*}}, i64 {{%.*}}, %struct.large* {{%.*}})
+void f_va_caller() {
+  f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct tiny){6, 7, 8, 9},
+              (struct small){10, NULL}, (struct small_aligned){11},
+              (struct large){12, 13, 14, 15});
+}
+
+// CHECK-LABEL: define i32 @f_va_1(i8* %fmt, ...) {{.*}} {
+// CHECK:   [[FMT_ADDR:%.*]] = alloca i8*, align 4
+// CHECK:   [[VA:%.*]] = alloca i8*, align 4
+// CHECK:   [[V:%.*]] = alloca i32, align 4
+// CHECK:   store i8* %fmt, i8** [[FMT_ADDR]], align 4
+// CHECK:   [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK:   call void @llvm.va_start(i8* [[VA1]])
+// CHECK:   [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK:   [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4
+// CHECK:   store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
+// CHECK:   [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32*
+// CHECK:   [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK:   store i32 [[TMP1]], i32* [[V]], align 4
+// CHECK:   [[VA2:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK:   call void @llvm.va_end(i8* [[VA2]])
+// CHECK:   [[TMP2:%.*]] = load i32, i32* [[V]], align 4
+// CHECK:   ret i32 [[TMP2]]
+// CHECK: }
+int f_va_1(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  int v = __builtin_va_arg(va, int);
+  __builtin_va_end(va);
+
+  return v;
+}
+
+// An "aligned" register pair (where the first register is even-numbered) is
+// used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the
+// correct offsets are used.
+
+// CHECK-LABEL: @f_va_2(
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[ARGP_CUR]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 7
+// CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -8
+// CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = inttoptr i32 [[TMP2]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR_ALIGNED]] to double*
+// CHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
+// CHECK-NEXT:    store double [[TMP4]], double* [[V]], align 8
+// CHECK-NEXT:    [[VA2:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA2]])
+// CHECK-NEXT:    [[TMP5:%.*]] = load double, double* [[V]], align 8
+// CHECK-NEXT:    ret double [[TMP5]]
+double f_va_2(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  double v = __builtin_va_arg(va, double);
+  __builtin_va_end(va);
+
+  return v;
+}
+
+// Two "aligned" register pairs.
+
+// CHECK-LABEL: @f_va_3(
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[W:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[X:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[ARGP_CUR]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 7
+// CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -8
+// CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = inttoptr i32 [[TMP2]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR_ALIGNED]] to double*
+// CHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[TMP3]], align 8
+// CHECK-NEXT:    store double [[TMP4]], double* [[V]], align 8
+// CHECK-NEXT:    [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 4
+// CHECK-NEXT:    store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR2]] to i32*
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+// CHECK-NEXT:    store i32 [[TMP6]], i32* [[W]], align 4
+// CHECK-NEXT:    [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint i8* [[ARGP_CUR4]] to i32
+// CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 7
+// CHECK-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], -8
+// CHECK-NEXT:    [[ARGP_CUR4_ALIGNED:%.*]] = inttoptr i32 [[TMP9]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4_ALIGNED]], i32 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARGP_CUR4_ALIGNED]] to double*
+// CHECK-NEXT:    [[TMP11:%.*]] = load double, double* [[TMP10]], align 8
+// CHECK-NEXT:    store double [[TMP11]], double* [[X]], align 8
+// CHECK-NEXT:    [[VA6:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA6]])
+// CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[V]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = load double, double* [[X]], align 8
+// CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
+// CHECK-NEXT:    ret double [[ADD]]
+double f_va_3(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  double v = __builtin_va_arg(va, double);
+  int w = __builtin_va_arg(va, int);
+  double x = __builtin_va_arg(va, double);
+  __builtin_va_end(va);
+
+  return v + x;
+}
+
+// CHECK-LABEL: define i32 @f_va_4(i8* %fmt, ...) {{.*}} {
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LD:%.*]] = alloca fp128, align 16
+// CHECK-NEXT:    [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1
+// CHECK-NEXT:    [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4
+// CHECK-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
+// CHECK-NEXT:    [[RET:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32*
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[V]], align 4
+// CHECK-NEXT:    [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 4
+// CHECK-NEXT:    store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ARGP_CUR2]] to fp128**
+// CHECK-NEXT:    [[TMP3:%.*]] = load fp128*, fp128** [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load fp128, fp128* [[TMP3]], align 16
+// CHECK-NEXT:    store fp128 [[TMP4]], fp128* [[LD]], align 16
+// CHECK-NEXT:    [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i32 4
+// CHECK-NEXT:    store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR4]] to %struct.tiny*
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast %struct.tiny* [[TS]] to i8*
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast %struct.tiny* [[TMP5]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[TMP6]], i8* align 4 [[TMP7]], i32 4, i1 false)
+// CHECK-NEXT:    [[ARGP_CUR6:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT7:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR6]], i32 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT7]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ARGP_CUR6]] to %struct.small*
+// CHECK-NEXT:    [[TMP9:%.*]] = bitcast %struct.small* [[SS]] to i8*
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast %struct.small* [[TMP8]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false)
+// CHECK-NEXT:    [[ARGP_CUR8:%.*]] = load i8*, i8** [[VA]], align 4
+// CHECK-NEXT:    [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR8]], i32 4
+// CHECK-NEXT:    store i8* [[ARGP_NEXT9]], i8** [[VA]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[ARGP_CUR8]] to %struct.large**
+// CHECK-NEXT:    [[TMP12:%.*]] = load %struct.large*, %struct.large** [[TMP11]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = bitcast %struct.large* [[LS]] to i8*
+// CHECK-NEXT:    [[TMP14:%.*]] = bitcast %struct.large* [[TMP12]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 16, i1 false)
+// CHECK-NEXT:    [[VA10:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA10]])
+int f_va_4(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  int v = __builtin_va_arg(va, int);
+  long double ld = __builtin_va_arg(va, long double);
+  struct tiny ts = __builtin_va_arg(va, struct tiny);
+  struct small ss = __builtin_va_arg(va, struct small);
+  struct large ls = __builtin_va_arg(va, struct large);
+  __builtin_va_end(va);
+
+  int ret = (int)((long double)v + ld);
+  ret = ret + ts.a + ts.b + ts.c + ts.d;
+  ret = ret + ss.a + (int)ss.b;
+  ret = ret + ls.a + ls.b + ls.c + ls.d;
+
+  return ret;
+}
diff --git a/test/CodeGen/riscv64-abi.c b/test/CodeGen/riscv64-abi.c
new file mode 100644
index 0000000..7a0f065
--- /dev/null
+++ b/test/CodeGen/riscv64-abi.c
@@ -0,0 +1,422 @@
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
+
+#include <stddef.h>
+#include <stdint.h>
+
+// CHECK-LABEL: define void @f_void()
+void f_void(void) {}
+
+// Scalar arguments and return values smaller than the word size are extended
+// according to the sign of their type, up to 32 bits
+
+// CHECK-LABEL: define zeroext i1 @f_scalar_0(i1 zeroext %x)
+_Bool f_scalar_0(_Bool x) { return x; }
+
+// CHECK-LABEL: define signext i8 @f_scalar_1(i8 signext %x)
+int8_t f_scalar_1(int8_t x) { return x; }
+
+// CHECK-LABEL: define zeroext i8 @f_scalar_2(i8 zeroext %x)
+uint8_t f_scalar_2(uint8_t x) { return x; }
+
+// CHECK-LABEL: define signext i32 @f_scalar_3(i32 signext %x)
+uint32_t f_scalar_3(int32_t x) { return x; }
+
+// CHECK-LABEL: define i64 @f_scalar_4(i64 %x)
+int64_t f_scalar_4(int64_t x) { return x; }
+
+// CHECK-LABEL: define float @f_fp_scalar_1(float %x)
+float f_fp_scalar_1(float x) { return x; }
+
+// CHECK-LABEL: define double @f_fp_scalar_2(double %x)
+double f_fp_scalar_2(double x) { return x; }
+
+// CHECK-LABEL: define fp128 @f_fp_scalar_3(fp128 %x)
+long double f_fp_scalar_3(long double x) { return x; }
+
+// Empty structs or unions are ignored.
+
+struct empty_s {};
+
+// CHECK-LABEL: define void @f_agg_empty_struct()
+struct empty_s f_agg_empty_struct(struct empty_s x) {
+  return x;
+}
+
+union empty_u {};
+
+// CHECK-LABEL: define void @f_agg_empty_union()
+union empty_u f_agg_empty_union(union empty_u x) {
+  return x;
+}
+
+// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
+// integer arguments. The rules for return are the same.
+
+struct tiny {
+  uint16_t a, b, c, d;
+};
+
+// CHECK-LABEL: define void @f_agg_tiny(i64 %x.coerce)
+void f_agg_tiny(struct tiny x) {
+  x.a += x.b;
+  x.c += x.d;
+}
+
+// CHECK-LABEL: define i64 @f_agg_tiny_ret()
+struct tiny f_agg_tiny_ret() {
+  return (struct tiny){1, 2, 3, 4};
+}
+
+typedef uint16_t v4i16 __attribute__((vector_size(8)));
+typedef int64_t v1i64 __attribute__((vector_size(8)));
+
+// CHECK-LABEL: define void @f_vec_tiny_v4i16(i64 %x.coerce)
+void f_vec_tiny_v4i16(v4i16 x) {
+  x[0] = x[1];
+  x[2] = x[3];
+}
+
+// CHECK-LABEL: define i64 @f_vec_tiny_v4i16_ret()
+v4i16 f_vec_tiny_v4i16_ret() {
+  return (v4i16){1, 2, 3, 4};
+}
+
+// CHECK-LABEL: define void @f_vec_tiny_v1i64(i64 %x.coerce)
+void f_vec_tiny_v1i64(v1i64 x) {
+  x[0] = 114;
+}
+
+// CHECK-LABEL: define i64 @f_vec_tiny_v1i64_ret()
+v1i64 f_vec_tiny_v1i64_ret() {
+  return (v1i64){1};
+}
+
+struct small {
+  int64_t a, *b;
+};
+
+// CHECK-LABEL: define void @f_agg_small([2 x i64] %x.coerce)
+void f_agg_small(struct small x) {
+  x.a += *x.b;
+  x.b = &x.a;
+}
+
+// CHECK-LABEL: define [2 x i64] @f_agg_small_ret()
+struct small f_agg_small_ret() {
+  return (struct small){1, 0};
+}
+
+typedef uint16_t v8i16 __attribute__((vector_size(16)));
+typedef __int128_t v1i128 __attribute__((vector_size(16)));
+
+// CHECK-LABEL: define void @f_vec_small_v8i16(i128 %x.coerce)
+void f_vec_small_v8i16(v8i16 x) {
+  x[0] = x[7];
+}
+
+// CHECK-LABEL: define i128 @f_vec_small_v8i16_ret()
+v8i16 f_vec_small_v8i16_ret() {
+  return (v8i16){1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+// CHECK-LABEL: define void @f_vec_small_v1i128(i128 %x.coerce)
+void f_vec_small_v1i128(v1i128 x) {
+  x[0] = 114;
+}
+
+// CHECK-LABEL: define i128 @f_vec_small_v1i128_ret()
+v1i128 f_vec_small_v1i128_ret() {
+  return (v1i128){1};
+}
+
+// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
+// single 2*xlen-sized argument, to ensure that alignment can be maintained if
+// passed on the stack.
+
+struct small_aligned {
+  __int128_t a;
+};
+
+// CHECK-LABEL: define void @f_agg_small_aligned(i128 %x.coerce)
+void f_agg_small_aligned(struct small_aligned x) {
+  x.a += x.a;
+}
+
+// CHECK-LABEL: define i128 @f_agg_small_aligned_ret(i128 %x.coerce)
+struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) {
+  return (struct small_aligned){10};
+}
+
+// Aggregates greater > 2*xlen will be passed and returned indirectly
+struct large {
+  int64_t a, b, c, d;
+};
+
+// CHECK-LABEL: define void @f_agg_large(%struct.large* %x)
+void f_agg_large(struct large x) {
+  x.a = x.b + x.c + x.d;
+}
+
+// The address where the struct should be written to will be the first
+// argument
+// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret %agg.result, i32 signext %i, i8 signext %j)
+struct large f_agg_large_ret(int32_t i, int8_t j) {
+  return (struct large){1, 2, 3, 4};
+}
+
+typedef unsigned char v32i8 __attribute__((vector_size(32)));
+
+// CHECK-LABEL: define void @f_vec_large_v32i8(<32 x i8>*)
+void f_vec_large_v32i8(v32i8 x) {
+  x[0] = x[7];
+}
+
+// CHECK-LABEL: define void @f_vec_large_v32i8_ret(<32 x i8>* noalias sret %agg.result)
+v32i8 f_vec_large_v32i8_ret() {
+  return (v32i8){1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+// Scalars passed on the stack should have signext/zeroext attributes (they
+// are anyext).
+
+// CHECK-LABEL: define signext i32 @f_scalar_stack_1(i64 %a.coerce, [2 x i64] %b.coerce, i128 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h)
+int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
+                     struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) {
+  return g + h;
+}
+
+// CHECK-LABEL: define signext i32 @f_scalar_stack_2(i32 signext %a, i128 %b, float %c, fp128 %d, <32 x i8>*, i8 zeroext %f, i8 %g, i8 %h)
+int f_scalar_stack_2(int32_t a, __int128_t b, float c, long double d, v32i8 e,
+                     uint8_t f, int8_t g, uint8_t h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret %agg.result, i32 signext %a, i128 %b, fp128 %c, <32 x i8>*, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_3(uint32_t a, __int128_t b, long double c, v32i8 d,
+                              uint8_t e, int8_t f, uint8_t g) {
+  return (struct large){a, e, f, g};
+}
+
+// Ensure that ABI lowering happens as expected for vararg calls.
+// Specifically, ensure that signext is emitted for varargs that will be
+// passed in registers but not on the stack. Ensure this takes into account
+// the use of "aligned" register pairs for varargs with 2*xlen alignment.
+
+int f_va_callee(int, ...);
+
+// CHECK-LABEL: define void @f_va_caller()
+void f_va_caller() {
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i64 3, double 4.000000e+00, double 5.000000e+00, i64 {{%.*}}, [2 x i64] {{%.*}}, i128 {{%.*}}, %struct.large* {{%.*}})
+  f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct tiny){6, 7, 8, 9},
+              (struct small){10, NULL}, (struct small_aligned){11},
+              (struct large){12, 13, 14, 15});
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, fp128 0xL00000000000000004001400000000000, i32 signext 6, i32 signext 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5.0L, 6, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i128 {{%.*}}, i32 signext 6, i32 signext 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, (struct small_aligned){5}, 6, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, [2 x i64] {{%.*}}, i32 signext 6, i32 signext 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, (struct small){5, NULL}, 6, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, fp128 0xL00000000000000004001800000000000, i32 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, 6.0L, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i128 {{%.*}}, i32 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, (struct small_aligned){6}, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, [2 x i64] {{%.*}}, i32 signext 7, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, (struct small){6, NULL}, 7, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, fp128 0xL00000000000000004001C00000000000, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, 6, 7.0L, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, i128 {{%.*}}, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, 6, (struct small_aligned){7}, 8, 9);
+  // CHECK: call signext i32 (i32, ...) @f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, [2 x i64] {{.*}}, i32 8, i32 9)
+  f_va_callee(1, 2, 3, 4, 5, 6, (struct small){7, NULL}, 8, 9);
+}
+
+// CHECK-LABEL: define signext i32 @f_va_1(i8* %fmt, ...) {{.*}} {
+// CHECK:   [[FMT_ADDR:%.*]] = alloca i8*, align 8
+// CHECK:   [[VA:%.*]] = alloca i8*, align 8
+// CHECK:   [[V:%.*]] = alloca i32, align 4
+// CHECK:   store i8* %fmt, i8** [[FMT_ADDR]], align 8
+// CHECK:   [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK:   call void @llvm.va_start(i8* [[VA1]])
+// CHECK:   [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK:   [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i64 8
+// CHECK:   store i8* [[ARGP_NEXT]], i8** [[VA]], align 8
+// CHECK:   [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32*
+// CHECK:   [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 8
+// CHECK:   store i32 [[TMP1]], i32* [[V]], align 4
+// CHECK:   [[VA2:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK:   call void @llvm.va_end(i8* [[VA2]])
+// CHECK:   [[TMP2:%.*]] = load i32, i32* [[V]], align 4
+// CHECK:   ret i32 [[TMP2]]
+// CHECK: }
+int f_va_1(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  int v = __builtin_va_arg(va, int);
+  __builtin_va_end(va);
+
+  return v;
+}
+
+// An "aligned" register pair (where the first register is even-numbered) is
+// used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the
+// correct offsets are used.
+
+// CHECK-LABEL: @f_va_2(
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[V:%.*]] = alloca fp128, align 16
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 8
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[ARGP_CUR]] to i64
+// CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 15
+// CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -16
+// CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = inttoptr i64 [[TMP2]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR_ALIGNED]], i64 16
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR_ALIGNED]] to fp128*
+// CHECK-NEXT:    [[TMP4:%.*]] = load fp128, fp128* [[TMP3]], align 16
+// CHECK-NEXT:    store fp128 [[TMP4]], fp128* [[V]], align 16
+// CHECK-NEXT:    [[VA2:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA2]])
+// CHECK-NEXT:    [[TMP5:%.*]] = load fp128, fp128* [[V]], align 16
+// CHECK-NEXT:    ret fp128 [[TMP5]]
+long double f_va_2(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  long double v = __builtin_va_arg(va, long double);
+  __builtin_va_end(va);
+
+  return v;
+}
+
+// Two "aligned" register pairs.
+
+// CHECK-LABEL: @f_va_3(
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[V:%.*]] = alloca fp128, align 16
+// CHECK-NEXT:    [[W:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[X:%.*]] = alloca fp128, align 16
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 8
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[ARGP_CUR]] to i64
+// CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 15
+// CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -16
+// CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = inttoptr i64 [[TMP2]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR_ALIGNED]], i64 16
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR_ALIGNED]] to fp128*
+// CHECK-NEXT:    [[TMP4:%.*]] = load fp128, fp128* [[TMP3]], align 16
+// CHECK-NEXT:    store fp128 [[TMP4]], fp128* [[V]], align 16
+// CHECK-NEXT:    [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i64 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT3]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR2]] to i32*
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK-NEXT:    store i32 [[TMP6]], i32* [[W]], align 4
+// CHECK-NEXT:    [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint i8* [[ARGP_CUR4]] to i64
+// CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 15
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], -16
+// CHECK-NEXT:    [[ARGP_CUR4_ALIGNED:%.*]] = inttoptr i64 [[TMP9]] to i8*
+// CHECK-NEXT:    [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4_ALIGNED]], i64 16
+// CHECK-NEXT:    store i8* [[ARGP_NEXT5]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARGP_CUR4_ALIGNED]] to fp128*
+// CHECK-NEXT:    [[TMP11:%.*]] = load fp128, fp128* [[TMP10]], align 16
+// CHECK-NEXT:    store fp128 [[TMP11]], fp128* [[X]], align 16
+// CHECK-NEXT:    [[VA6:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA6]])
+// CHECK-NEXT:    [[TMP12:%.*]] = load fp128, fp128* [[V]], align 16
+// CHECK-NEXT:    [[TMP13:%.*]] = load fp128, fp128* [[X]], align 16
+// CHECK-NEXT:    [[ADD:%.*]] = fadd fp128 [[TMP12]], [[TMP13]]
+// CHECK-NEXT:    ret fp128 [[ADD]]
+long double f_va_3(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  long double v = __builtin_va_arg(va, long double);
+  int w = __builtin_va_arg(va, int);
+  long double x = __builtin_va_arg(va, long double);
+  __builtin_va_end(va);
+
+  return v + x;
+}
+
+// CHECK-LABEL: @f_va_4(
+// CHECK:         [[FMT_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VA:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 2
+// CHECK-NEXT:    [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 8
+// CHECK-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 8
+// CHECK-NEXT:    [[RET:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 8
+// CHECK-NEXT:    [[VA1:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_start(i8* [[VA1]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i64 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32*
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 8
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[V]], align 4
+// CHECK-NEXT:    [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i64 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT3]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ARGP_CUR2]] to %struct.tiny*
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.tiny* [[TS]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast %struct.tiny* [[TMP2]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
+// CHECK-NEXT:    [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i64 16
+// CHECK-NEXT:    store i8* [[ARGP_NEXT5]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR4]] to %struct.small*
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast %struct.small* [[SS]] to i8*
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast %struct.small* [[TMP5]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false)
+// CHECK-NEXT:    [[ARGP_CUR6:%.*]] = load i8*, i8** [[VA]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT7:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR6]], i64 8
+// CHECK-NEXT:    store i8* [[ARGP_NEXT7]], i8** [[VA]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ARGP_CUR6]] to %struct.large**
+// CHECK-NEXT:    [[TMP9:%.*]] = load %struct.large*, %struct.large** [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast %struct.large* [[LS]] to i8*
+// CHECK-NEXT:    [[TMP11:%.*]] = bitcast %struct.large* [[TMP9]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 32, i1 false)
+// CHECK-NEXT:    [[VA8:%.*]] = bitcast i8** [[VA]] to i8*
+// CHECK-NEXT:    call void @llvm.va_end(i8* [[VA8]])
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], %struct.tiny* [[TS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[A]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP12]] to i64
+// CHECK-NEXT:    [[A9:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], %struct.small* [[SS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[A9]], align 8
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[CONV]], [[TMP13]]
+// CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], %struct.large* [[LS]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[C]], align 8
+// CHECK-NEXT:    [[ADD10:%.*]] = add nsw i64 [[ADD]], [[TMP14]]
+// CHECK-NEXT:    [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32
+// CHECK-NEXT:    store i32 [[CONV11]], i32* [[RET]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RET]], align 4
+// CHECK-NEXT:    ret i32 [[TMP15]]
+int f_va_4(char *fmt, ...) {
+  __builtin_va_list va;
+
+  __builtin_va_start(va, fmt);
+  int v = __builtin_va_arg(va, int);
+  struct tiny ts = __builtin_va_arg(va, struct tiny);
+  struct small ss = __builtin_va_arg(va, struct small);
+  struct large ls = __builtin_va_arg(va, struct large);
+  __builtin_va_end(va);
+
+  int ret = ts.a + ss.a + ls.c;
+
+  return ret;
+}
diff --git a/test/CodeGen/sparc-vaarg.c b/test/CodeGen/sparc-vaarg.c
index 3e4dd7c..920b9a1 100644
--- a/test/CodeGen/sparc-vaarg.c
+++ b/test/CodeGen/sparc-vaarg.c
@@ -19,7 +19,7 @@
 // CHECK-LABEL: define void @get_struct
 // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo*{{$}}
 // CHECK: [[RESULT2:%[a-z_0-9]+]] = bitcast {{.*}} [[RESULT]] to i8*
-// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[RESULT2]]
+// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* align {{[0-9]+}} [[RESULT2]]
 void get_struct(va_list *args) {
  dest = va_arg(*args, struct Foo);
 }
diff --git a/test/CodeGen/split-stacks.c b/test/CodeGen/split-stacks.c
index bf4cf0f..d9ff1f9 100644
--- a/test/CodeGen/split-stacks.c
+++ b/test/CodeGen/split-stacks.c
@@ -14,13 +14,13 @@
   return foo();
 }
 
-// CHECK-SEGSTK: define i32 @foo() [[SS:#[0-9]+]] {
-// CHECK-SEGSTK: define i32 @nosplit() [[NSS:#[0-9]+]] {
-// CHECK-SEGSTK: define i32 @main() [[SS]] {
+// CHECK-SEGSTK: define dso_local i32 @foo() [[SS:#[0-9]+]] {
+// CHECK-SEGSTK: define dso_local i32 @nosplit() [[NSS:#[0-9]+]] {
+// CHECK-SEGSTK: define dso_local i32 @main() [[SS]] {
 // CHECK-SEGSTK-NOT: [[NSS]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK: [[SS]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK-NOT: [[NSS]] = { {{.*}} "split-stack" {{.*}} }
 
-// CHECK-NOSEGSTK: define i32 @foo() #0 {
-// CHECK-NOSEGSTK: define i32 @main() #0 {
+// CHECK-NOSEGSTK: define dso_local i32 @foo() #0 {
+// CHECK-NOSEGSTK: define dso_local i32 @main() #0 {
 // CHECK-NOSEGSTK-NOT: #0 = { {{.*}} "split-stack" {{.*}} }
diff --git a/test/CodeGen/stack-arg-probe.c b/test/CodeGen/stack-arg-probe.c
new file mode 100644
index 0000000..d806db6
--- /dev/null
+++ b/test/CodeGen/stack-arg-probe.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
+// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
+
+// NO-STACKPROBE: attributes #{{[0-9]+}} = {{{.*}} "no-stack-arg-probe"
+// STACKPROBE-NOT: attributes #{{[0-9]+}} = {{{.*}} "no-stack-arg-probe"
+
+void test1() {
+}
diff --git a/test/CodeGen/stack-size-section.c b/test/CodeGen/stack-size-section.c
new file mode 100644
index 0000000..504a42d
--- /dev/null
+++ b/test/CodeGen/stack-size-section.c
@@ -0,0 +1,9 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -S -o - | FileCheck %s --check-prefix=CHECK-ABSENT
+// CHECK-ABSENT-NOT: section .stack_sizes
+
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fstack-size-section %s -S -o - | FileCheck %s --check-prefix=CHECK-PRESENT
+// CHECK-PRESENT: section .stack_sizes
+
+int foo() { return 42; }
diff --git a/test/CodeGen/target-builtin-noerror.c b/test/CodeGen/target-builtin-noerror.c
index 63fc7c9..37820b3 100644
--- a/test/CodeGen/target-builtin-noerror.c
+++ b/test/CodeGen/target-builtin-noerror.c
@@ -92,6 +92,7 @@
   (void)__builtin_cpu_is("broadwell");
   (void)__builtin_cpu_is("btver1");
   (void)__builtin_cpu_is("btver2");
+  (void)__builtin_cpu_is("cannonlake");
   (void)__builtin_cpu_is("core2");
   (void)__builtin_cpu_is("corei7");
   (void)__builtin_cpu_is("haswell");
@@ -99,6 +100,7 @@
   (void)__builtin_cpu_is("istanbul");
   (void)__builtin_cpu_is("ivybridge");
   (void)__builtin_cpu_is("knl");
+  (void)__builtin_cpu_is("knm");
   (void)__builtin_cpu_is("nehalem");
   (void)__builtin_cpu_is("sandybridge");
   (void)__builtin_cpu_is("shanghai");
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 3869afe..9349cec 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -124,20 +124,20 @@
 
 // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=R600
-// R600: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// R600: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // RUN: %clang_cc1 -triple r600-unknown -target-cpu cayman -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600D
-// R600D: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// R600D: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // Test default -target-cpu
 // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SIDefault
-// R600SIDefault: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=AARCH64
diff --git a/test/CodeGen/tbaa-array.cpp b/test/CodeGen/tbaa-array.cpp
new file mode 100644
index 0000000..4a6576e
--- /dev/null
+++ b/test/CodeGen/tbaa-array.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN:     -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN:     -new-struct-path-tbaa -emit-llvm -o - | \
+// RUN:     FileCheck -check-prefix=CHECK-NEW %s
+//
+// Check that we generate correct TBAA information for accesses to array
+// elements.
+
+struct A { int i; };
+struct B { A a[1]; };
+struct C { int i; int x[3]; };
+
+int foo(B *b) {
+// CHECK-LABEL: _Z3fooP1B
+// CHECK: load i32, {{.*}}, !tbaa [[TAG_A_i:!.*]]
+// CHECK-NEW-LABEL: _Z3fooP1B
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_A_i:!.*]]
+  return b->a->i;
+}
+
+// Check that members of array types are represented correctly.
+int bar(C *c) {
+// CHECK-NEW-LABEL: _Z3barP1C
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_i:!.*]]
+  return c->i;
+}
+
+int bar2(C *c) {
+// CHECK-NEW-LABEL: _Z4bar2P1C
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+  return c->x[2];
+}
+
+int bar3(C *c, int j) {
+// CHECK-NEW-LABEL: _Z4bar3P1Ci
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+  return c->x[j];
+}
+
+// CHECK-DAG: [[TAG_A_i]] = !{[[TYPE_A:!.*]], [[TYPE_int:!.*]], i64 0}
+// CHECK-DAG: [[TYPE_A]] = !{!"_ZTS1A", !{{.*}}, i64 0}
+// CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0}
+
+// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
+// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
+// CHECK-NEW-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TYPE_pointer:!.*]] = !{[[TYPE_char]], i64 8, !"any pointer"}
+// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"_ZTS1A", [[TYPE_int]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TAG_A_i]] = !{[[TYPE_A]], [[TYPE_int]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1C", [[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 4, i64 12}
+// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 0, i64 4}
diff --git a/test/CodeGen/tbaa-base.cpp b/test/CodeGen/tbaa-base.cpp
new file mode 100644
index 0000000..175c55f
--- /dev/null
+++ b/test/CodeGen/tbaa-base.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -O1 %s -emit-llvm -o - | FileCheck %s
+//
+// Test generating of TBAA metadata for accesses to members of base classes.
+
+struct A {
+  int x, y, z;
+};
+
+struct B : A {
+  int i;
+};
+
+struct C {
+  int i;
+  B b;
+  int j;
+};
+
+int f1(B *b) {
+// CHECK-LABEL: _Z2f1P1B
+// CHECK: load i32, {{.*}}, !tbaa [[TAG_A_y:!.*]]
+  return b->y;
+}
+
+int f2(C *c) {
+// CHECK-LABEL: _Z2f2P1C
+// CHECK: load i32, {{.*}}, !tbaa [[TAG_A_y]]
+  return (&(c->b))->y;
+}
+
+struct D : virtual A
+{};
+
+struct E {
+  D d;
+};
+
+int f3(D *d) {
+// CHECK-LABEL: _Z2f3P1D
+// CHECK: load i32, {{.*}}, !tbaa [[TAG_A_y]]
+  return d->y;
+}
+
+int f4(E *e) {
+// CHECK-LABEL: _Z2f4P1E
+// CHECK: load i32, {{.*}}, !tbaa [[TAG_A_y]]
+  return (&(e->d))->y;
+}
+
+// CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0}
+// CHECK-DAG: [[TYPE_int:!.*]] = !{!"int", [[TYPE_char]], i64 0}
+// CHECK-DAG: [[TYPE_A:!.*]] = !{!"_ZTS1A", [[TYPE_int]], i64 0, [[TYPE_int]], i64 4, [[TYPE_int]], i64 8}
+// CHECK-DAG: [[TAG_A_y]] = !{[[TYPE_A]], [[TYPE_int]], i64 4}
diff --git a/test/CodeGen/tbaa-reference.cpp b/test/CodeGen/tbaa-reference.cpp
index aae6a82..ecdbfbe 100644
--- a/test/CodeGen/tbaa-reference.cpp
+++ b/test/CodeGen/tbaa-reference.cpp
@@ -6,24 +6,32 @@
 
 struct B {
   S &s;
-  B(S &s) : s(s) {}
-  void bar();
+  B(S &s);
+  S &get();
 };
 
-void foo(S &s) {
-  B b(s);
-  b.bar();
+B::B(S &s) : s(s) {
+// CHECK-LABEL: _ZN1BC2ER1S
+// Check initialization of the reference parameter.
+// CHECK: store %struct.S* {{.*}}, %struct.S** {{.*}}, !tbaa [[TAG_pointer:!.*]]
+
+// Check loading of the reference parameter.
+// CHECK: load %struct.S*, %struct.S** {{.*}}, !tbaa [[TAG_pointer]]
+
+// Check initialization of the reference member.
+// CHECK: store %struct.S* {{.*}}, %struct.S** {{.*}}, !tbaa [[TAG_pointer]]
 }
 
-// CHECK-LABEL: _Z3fooR1S
-// Check initialization of the reference parameter in foo().
-// CHECK: store %struct.S* {{.*}}, %struct.S** {{.*}}, !tbaa [[TAG_pointer:!.*]]
-//
-// CHECK-LABEL: _ZN1BC2ER1S
-// TODO: Check loading of the reference parameter in B::B(S&).
-// Check initialization of B::s in B::B(S&).
-// CHECK: store %struct.S* {{.*}}, %struct.S** {{.*}}, !tbaa [[TAG_pointer]]
-//
+S &B::get() {
+// CHECK-LABEL: _ZN1B3getEv
+// Check that we access the reference as a structure member.
+// CHECK: load %struct.S*, %struct.S** {{.*}}, !tbaa [[TAG_B_s:!.*]]
+  return s;
+}
+
 // CHECK-DAG: [[TAG_pointer]] = !{[[TYPE_pointer:!.*]], [[TYPE_pointer]], i64 0}
+// CHECK-DAG: [[TAG_B_s]] = !{[[TYPE_B:!.*]], [[TYPE_pointer]], i64 0}
+//
+// CHECK-DAG: [[TYPE_B]] = !{!"_ZTS1B", [[TYPE_pointer]], i64 0}
 // CHECK-DAG: [[TYPE_pointer]] = !{!"any pointer", [[TYPE_char:!.*]], i64 0}
 // CHECK-DAG: [[TYPE_char]] = !{!"omnipotent char", {{!.*}}, i64 0}
diff --git a/test/CodeGen/tbaa-struct.cpp b/test/CodeGen/tbaa-struct.cpp
index 2623c42..9e5429b 100644
--- a/test/CodeGen/tbaa-struct.cpp
+++ b/test/CodeGen/tbaa-struct.cpp
@@ -1,6 +1,11 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 %s | \
+// RUN:     FileCheck -check-prefixes=CHECK,CHECK-OLD %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \
+// RUN:     -emit-llvm -o - -O1 %s | \
+// RUN:     FileCheck -check-prefixes=CHECK,CHECK-NEW %s
 //
-// Check that we generate !tbaa.struct metadata for struct copies.
+// Check that we generate TBAA metadata for struct copies correctly.
+
 struct A {
   short s;
   int i;
@@ -8,68 +13,117 @@
   int j;
 };
 
-void copy(struct A *a, struct A *b) {
-  *a = *b;
-}
+typedef A __attribute__((may_alias)) AA;
 
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 16, i32 4, i1 false), !tbaa.struct [[TS:!.*]]
+void copy(A *a1, A *a2) {
+// CHECK-LABEL: _Z4copyP1AS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 16, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS:!.*]]
+// CHECK-NEW-SAME: !tbaa [[TAG_A:![0-9]*]]
+  *a1 = *a2;
+}
 
 struct B {
-  char c1;
-  struct A a;
-  int ii;
+  char c;
+  A a;
+  int i;
 };
 
-void copy2(struct B *a, struct B *b) {
-  *a = *b;
+void copy2(B *b1, B *b2) {
+// CHECK-LABEL: _Z5copy2P1BS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 24, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS2:!.*]]
+// CHECK-NEW-SAME: !tbaa [[TAG_B:![0-9]*]]
+  *b1 = *b2;
 }
 
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 24, i32 4, i1 false), !tbaa.struct [[TS2:!.*]]
+struct S {
+  _Complex char cc;
+  _Complex int ci;
+};
 
-typedef _Complex int T2;
-typedef _Complex char T5;
-typedef _Complex int T7;
-typedef struct T4 { T5 field0; T7 field1; } T4;
-typedef union T1 { T2 field0; T4 field1; } T1;
+union U {
+  _Complex int ci;
+  S s;
+};
 
-void copy3 (T1 *a, T1 *b) {
-  *a = *b;
+void copy3(U *u1, U *u2) {
+// CHECK-LABEL: _Z5copy3P1US0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 12, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS3:!.*]]
+// CHECK-NEW-SAME: !tbaa [[TAG_U:![0-9]*]]
+  *u1 = *u2;
 }
 
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 12, i32 4, i1 false), !tbaa.struct [[TS3:!.*]]
-
 // Make sure that zero-length bitfield works.
-#define ATTR __attribute__ ((ms_struct))
-struct five {
+struct C {
   char a;
-  int :0;        /* ignored; prior field is not a bitfield. */
+  int : 0;  // Shall not be ignored; see r185018.
   char b;
   char c;
-} ATTR;
-void copy4(struct five *a, struct five *b) {
-  *a = *b;
-}
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 3, i32 1, i1 false), !tbaa.struct [[TS4:!.*]]
+} __attribute__((ms_struct));
 
-struct six {
+void copy4(C *c1, C *c2) {
+// CHECK-LABEL: _Z5copy4P1CS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 3, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS4:!.*]]
+// CHECK-NEW-SAME: !tbaa [[TAG_C:![0-9]*]]
+  *c1 = *c2;
+}
+
+struct D {
   char a;
-  int :0;
+  int : 0;
   char b;
   char c;
 };
-void copy5(struct six *a, struct six *b) {
-  *a = *b;
-}
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 6, i32 1, i1 false), !tbaa.struct [[TS5:!.*]]
 
-// CHECK: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}}
-// CHECK: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}}
-// CHECK: [[TAG_INT:!.*]] = !{[[INT:!.*]], [[INT]], i64 0}
-// CHECK: [[INT]] = !{!"int", [[CHAR]]
-// CHECK: [[TAG_CHAR:!.*]] = !{[[CHAR]], [[CHAR]], i64 0}
+void copy5(D *d1, D *d2) {
+// CHECK-LABEL: _Z5copy5P1DS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 6, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS5:!.*]]
+// CHECK-NEW-SAME: !tbaa [[TAG_D:![0-9]*]]
+  *d1 = *d2;
+}
+
+void copy6(AA *a1, A *a2) {
+// CHECK-LABEL: _Z5copy6P1AS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 16, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS]]
+// CHECK-NEW-SAME: !tbaa [[TAG_char:![0-9]*]]
+  *a1 = *a2;
+}
+
+void copy7(A *a1, AA *a2) {
+// CHECK-LABEL: _Z5copy7P1AS0_
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 16, i1 false)
+// CHECK-OLD-SAME: !tbaa.struct [[TS]]
+// CHECK-NEW-SAME: !tbaa [[TAG_char]]
+  *a1 = *a2;
+}
+
+// CHECK-OLD: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}}
+// CHECK-OLD: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}}
+// CHECK-OLD: [[TAG_INT:!.*]] = !{[[INT:!.*]], [[INT]], i64 0}
+// CHECK-OLD: [[INT]] = !{!"int", [[CHAR]]
+// CHECK-OLD: [[TAG_CHAR:!.*]] = !{[[CHAR]], [[CHAR]], i64 0}
 // (offset, size) = (0,1) char; (4,2) short; (8,4) int; (12,1) char; (16,4) int; (20,4) int
-// CHECK: [[TS2]] = !{i64 0, i64 1, !{{.*}}, i64 4, i64 2, !{{.*}}, i64 8, i64 4, !{{.*}}, i64 12, i64 1, !{{.*}}, i64 16, i64 4, {{.*}}, i64 20, i64 4, {{.*}}}
+// CHECK-OLD: [[TS2]] = !{i64 0, i64 1, !{{.*}}, i64 4, i64 2, !{{.*}}, i64 8, i64 4, !{{.*}}, i64 12, i64 1, !{{.*}}, i64 16, i64 4, {{.*}}, i64 20, i64 4, {{.*}}}
 // (offset, size) = (0,8) char; (0,2) char; (4,8) char
-// CHECK: [[TS3]] = !{i64 0, i64 8, !{{.*}}, i64 0, i64 2, !{{.*}}, i64 4, i64 8, !{{.*}}}
-// CHECK: [[TS4]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 4, [[TAG_INT]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]]}
-// CHECK: [[TS5]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 4, i64 4, [[TAG_INT]], i64 4, i64 1, [[TAG_CHAR]], i64 5, i64 1, [[TAG_CHAR]]}
+// CHECK-OLD: [[TS3]] = !{i64 0, i64 8, !{{.*}}, i64 0, i64 2, !{{.*}}, i64 4, i64 8, !{{.*}}}
+// CHECK-OLD: [[TS4]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 1, i64 4, [[TAG_INT]], i64 1, i64 1, [[TAG_CHAR]], i64 2, i64 1, [[TAG_CHAR]]}
+// CHECK-OLD: [[TS5]] = !{i64 0, i64 1, [[TAG_CHAR]], i64 4, i64 4, [[TAG_INT]], i64 4, i64 1, [[TAG_CHAR]], i64 5, i64 1, [[TAG_CHAR]]}
+
+// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
+// CHECK-NEW-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 0}
+// CHECK-NEW-DAG: [[TYPE_short:!.*]] = !{[[TYPE_char]], i64 2, !"short"}
+// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
+// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1A", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4, [[TYPE_char]], i64 8, i64 1, [[TYPE_int]], i64 12, i64 4}
+// CHECK-NEW-DAG: [[TAG_A]] = !{[[TYPE_A]], [[TYPE_A]], i64 0, i64 16}
+// CHECK-NEW-DAG: [[TYPE_B:!.*]] = !{[[TYPE_char]], i64 24, !"_ZTS1B", [[TYPE_char]], i64 0, i64 1, [[TYPE_A]], i64 4, i64 16, [[TYPE_int]], i64 20, i64 4}
+// CHECK-NEW-DAG: [[TAG_B]] = !{[[TYPE_B]], [[TYPE_B]], i64 0, i64 24}
+// CHECK-NEW-DAG: [[TAG_U]] = !{[[TYPE_char]], [[TYPE_char]], i64 0, i64 12}
+// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 3, !"_ZTS1C", [[TYPE_char]], i64 0, i64 1, [[TYPE_int]], i64 1, i64 4, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1}
+// CHECK-NEW-DAG: [[TAG_C]] = !{[[TYPE_C]], [[TYPE_C]], i64 0, i64 3}
+// CHECK-NEW-DAG: [[TYPE_D:!.*]] = !{[[TYPE_char]], i64 6, !"_ZTS1D", [[TYPE_char]], i64 0, i64 1, [[TYPE_int]], i64 4, i64 4, [[TYPE_char]], i64 4, i64 1, [[TYPE_char]], i64 5, i64 1}
+// CHECK-NEW-DAG: [[TAG_D]] = !{[[TYPE_D]], [[TYPE_D]], i64 0, i64 6}
diff --git a/test/CodeGen/tbaa.cpp b/test/CodeGen/tbaa.cpp
index 10e6487..c8a9e69 100644
--- a/test/CodeGen/tbaa.cpp
+++ b/test/CodeGen/tbaa.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=PATH
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH,OLD-PATH
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -new-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH,NEW-PATH
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O0 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-TBAA
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -relaxed-aliasing -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-TBAA
 // Test TBAA metadata generated by front-end.
@@ -248,29 +249,57 @@
 // CHECK: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
 // CHECK: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
 
-// PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", !
-// PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
-// PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]]
-// PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4}
-// PATH: [[TYPE_A]] = !{!"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_SHORT]], i64 8, [[TYPE_INT]], i64 12}
-// PATH: [[TYPE_SHORT:!.*]] = !{!"short", [[TYPE_CHAR]]
-// PATH: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_SHORT]], i64 0}
-// PATH: [[TAG_B_a_f32]] = !{[[TYPE_B:!.*]], [[TYPE_INT]], i64 8}
-// PATH: [[TYPE_B]] = !{!"_ZTS7StructB", [[TYPE_SHORT]], i64 0, [[TYPE_A]], i64 4, [[TYPE_INT]], i64 20}
-// PATH: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_SHORT]], i64 4}
-// PATH: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_INT]], i64 20}
-// PATH: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_INT]], i64 16}
-// PATH: [[TAG_S_f32]] = !{[[TYPE_S:!.*]], [[TYPE_INT]], i64 4}
-// PATH: [[TYPE_S]] = !{!"_ZTS7StructS", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
-// PATH: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_SHORT]], i64 0}
-// PATH: [[TAG_S2_f32]] = !{[[TYPE_S2:!.*]], [[TYPE_INT]], i64 4}
-// PATH: [[TYPE_S2]] = !{!"_ZTS8StructS2", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
-// PATH: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_SHORT]], i64 0}
-// PATH: [[TAG_C_b_a_f32]] = !{[[TYPE_C:!.*]], [[TYPE_INT]], i64 12}
-// PATH: [[TYPE_C]] = !{!"_ZTS7StructC", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28}
-// PATH: [[TAG_D_b_a_f32]] = !{[[TYPE_D:!.*]], [[TYPE_INT]], i64 12}
-// PATH: [[TYPE_D]] = !{!"_ZTS7StructD", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28, [[TYPE_CHAR]], i64 32}
-// PATH: [[TAG_five_b]] = !{[[TYPE_five:!.*]], [[TYPE_CHAR]], i64 1}
-// PATH: [[TYPE_five]] = !{!"_ZTS4five", [[TYPE_CHAR]], i64 0, [[TYPE_INT]], i64 1, [[TYPE_CHAR]], i64 1, [[TYPE_CHAR]], i64 2}
-// PATH: [[TAG_six_b]] = !{[[TYPE_six:!.*]], [[TYPE_CHAR]], i64 4}
-// PATH: [[TYPE_six]] = !{!"_ZTS3six", [[TYPE_CHAR]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_CHAR]], i64 4, [[TYPE_CHAR]], i64 5}
+// OLD-PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", !
+// OLD-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
+// OLD-PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]]
+// OLD-PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_A]] = !{!"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_SHORT]], i64 8, [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_SHORT:!.*]] = !{!"short", [[TYPE_CHAR]]
+// OLD-PATH: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_B_a_f32]] = !{[[TYPE_B:!.*]], [[TYPE_INT]], i64 8}
+// OLD-PATH: [[TYPE_B]] = !{!"_ZTS7StructB", [[TYPE_SHORT]], i64 0, [[TYPE_A]], i64 4, [[TYPE_INT]], i64 20}
+// OLD-PATH: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_SHORT]], i64 4}
+// OLD-PATH: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_INT]], i64 20}
+// OLD-PATH: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_INT]], i64 16}
+// OLD-PATH: [[TAG_S_f32]] = !{[[TYPE_S:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_S]] = !{!"_ZTS7StructS", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_S2_f32]] = !{[[TYPE_S2:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_S2]] = !{!"_ZTS8StructS2", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_C_b_a_f32]] = !{[[TYPE_C:!.*]], [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_C]] = !{!"_ZTS7StructC", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28}
+// OLD-PATH: [[TAG_D_b_a_f32]] = !{[[TYPE_D:!.*]], [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_D]] = !{!"_ZTS7StructD", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28, [[TYPE_CHAR]], i64 32}
+// OLD-PATH: [[TAG_five_b]] = !{[[TYPE_five:!.*]], [[TYPE_CHAR]], i64 1}
+// OLD-PATH: [[TYPE_five]] = !{!"_ZTS4five", [[TYPE_CHAR]], i64 0, [[TYPE_INT]], i64 1, [[TYPE_CHAR]], i64 1, [[TYPE_CHAR]], i64 2}
+// OLD-PATH: [[TAG_six_b]] = !{[[TYPE_six:!.*]], [[TYPE_CHAR]], i64 4}
+// OLD-PATH: [[TYPE_six]] = !{!"_ZTS3six", [[TYPE_CHAR]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_CHAR]], i64 4, [[TYPE_CHAR]], i64 5}
+
+// NEW-PATH-DAG: [[ROOT:!.*]] = !{!"Simple C++ TBAA"}
+// NEW-PATH-DAG: [[TYPE_char:!.*]] = !{[[ROOT]], i64 1, !"omnipotent char"}
+// NEW-PATH-DAG: [[TYPE_short:!.*]] = !{[[TYPE_char]], i64 2, !"short"}
+// NEW-PATH-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
+// NEW-PATH-DAG: [[TAG_i32:!.*]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
+// NEW-PATH-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS7StructA", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4, !12, i64 8, i64 2, [[TYPE_int]], i64 12, i64 4}
+// NEW-PATH-DAG: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_short]], i64 0, i64 2}
+// NEW-PATH-DAG: [[TAG_A_f32]] = !{[[TYPE_A]], [[TYPE_int]], i64 4, i64 4}
+// NEW-PATH-DAG: [[TYPE_B:!.*]] = !{[[TYPE_char]], i64 24, !"_ZTS7StructB", [[TYPE_short]], i64 0, i64 2, [[TYPE_A]], i64 4, i64 16, [[TYPE_int]], i64 20, i64 4}
+// NEW-PATH-DAG: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_short]], i64 4, i64 2}
+// NEW-PATH-DAG: [[TAG_B_a_f32]] = !{[[TYPE_B]], [[TYPE_int]], i64 8, i64 4}
+// NEW-PATH-DAG: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_int]], i64 20, i64 4}
+// NEW-PATH-DAG: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_int]], i64 16, i64 4}
+// NEW-PATH-DAG: [[TYPE_S:!.*]] = !{[[TYPE_char]], i64 8, !"_ZTS7StructS", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4}
+// NEW-PATH-DAG: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_short]], i64 0, i64 2}
+// NEW-PATH-DAG: [[TAG_S_f32]] = !{[[TYPE_S]], [[TYPE_int]], i64 4, i64 4}
+// NEW-PATH-DAG: [[TYPE_S2:!.*]] = !{[[TYPE_char]], i64 8, !"_ZTS8StructS2", [[TYPE_short]], i64 0, i64 2, [[TYPE_int]], i64 4, i64 4}
+// NEW-PATH-DAG: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_short]], i64 0, i64 2}
+// NEW-PATH-DAG: [[TAG_S2_f32]] = !{[[TYPE_S2]], [[TYPE_int]], i64 4, i64 4}
+// NEW-PATH-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 32, !"_ZTS7StructC", [[TYPE_short]], i64 0, i64 2, [[TYPE_B]], i64 4, i64 24, [[TYPE_int]], i64 28, i64 4}
+// NEW-PATH-DAG: [[TAG_C_b_a_f32]] = !{[[TYPE_C]], [[TYPE_int]], i64 12, i64 4}
+// NEW-PATH-DAG: [[TYPE_D:!.*]] = !{[[TYPE_char]], i64 36, !"_ZTS7StructD", [[TYPE_short]], i64 0, i64 2, [[TYPE_B]], i64 4, i64 24, [[TYPE_int]], i64 28, i64 4, [[TYPE_char]], i64 32, i64 1}
+// NEW-PATH-DAG: [[TAG_D_b_a_f32]] = !{[[TYPE_D]], [[TYPE_int]], i64 12, i64 4}
+// NEW-PATH-DAG: [[TYPE_five:!.*]] = !{[[TYPE_char]], i64 3, !"_ZTS4five", [[TYPE_char]], i64 0, i64 1, [[TYPE_int]], i64 1, i64 4, [[TYPE_char]], i64 1, i64 1, [[TYPE_char]], i64 2, i64 1}
+// NEW-PATH-DAG: [[TAG_five_b]] = !{[[TYPE_five]], [[TYPE_char]], i64 1, i64 1}
+// NEW-PATH-DAG: [[TYPE_six:!.*]] = !{[[TYPE_char]], i64 6, !"_ZTS3six", [[TYPE_char]], i64 0, i64 1, [[TYPE_int]], i64 4, i64 4, [[TYPE_char]], i64 4, i64 1, [[TYPE_char]], i64 5, i64 1}
+// NEW-PATH-DAG: [[TAG_six_b]] = !{[[TYPE_six]], [[TYPE_char]], i64 4, i64 1}
diff --git a/test/CodeGen/tentative-decls.c b/test/CodeGen/tentative-decls.c
index d88c346..3217787 100644
--- a/test/CodeGen/tentative-decls.c
+++ b/test/CodeGen/tentative-decls.c
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -emit-llvm -w -o - %s | FileCheck %s
 
-// RUN: grep '@r = common global \[1 x .*\] zeroinitializer' %t
+// CHECK-DAG: @r = common {{(dso_local )?}}global [1 x {{.*}}] zeroinitializer
 
 int r[];
 int (*a)[] = &r;
 
 struct s0;
 struct s0 x;
-// RUN: grep '@x = common global .struct.s0 zeroinitializer' %t
+// CHECK-DAG: @x = common {{(dso_local )?}}global %struct.s0 zeroinitializer
 
 struct s0 y;
-// RUN: grep '@y = common global .struct.s0 zeroinitializer' %t
+// CHECK-DAG: @y = common {{(dso_local )?}}global %struct.s0 zeroinitializer
 struct s0 *f0() {
   return &y;
 }
@@ -19,19 +19,19 @@
   int x;
 };
 
-// RUN: grep '@b = common global \[1 x .*\] zeroinitializer' %t
+// CHECK-DAG: @b = common {{(dso_local )?}}global [1 x {{.*}}] zeroinitializer
 int b[];
 int *f1() {
   return b;
 }
 
 // Check that the most recent tentative definition wins.
-// RUN: grep '@c = common global \[4 x .*\] zeroinitializer' %t
+// CHECK-DAG: @c = common {{(dso_local )?}}global [4 x {{.*}}] zeroinitializer
 int c[];
 int c[4];
 
 // Check that we emit static tentative definitions
-// RUN: grep '@c5 = internal global \[1 x .*\] zeroinitializer' %t
+// CHECK-DAG: @c5 = internal global [1 x {{.*}}] zeroinitializer
 static int c5[];
 static int func() { return c5[0]; }
 int callfunc() { return func(); }
diff --git a/test/CodeGen/thinlto-debug-pm.c b/test/CodeGen/thinlto-debug-pm.c
new file mode 100644
index 0000000..2accde1
--- /dev/null
+++ b/test/CodeGen/thinlto-debug-pm.c
@@ -0,0 +1,10 @@
+// Test to ensure -fdebug-pass-manager works when invoking the
+// ThinLTO backend path with the new PM.
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
+// RUN: llvm-lto -thinlto -o %t %t.o
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s
+// CHECK: Running pass:
+
+void foo() {
+}
diff --git a/test/CodeGen/thinlto-distributed-backend-skip.ll b/test/CodeGen/thinlto-distributed-backend-skip.ll
new file mode 100644
index 0000000..d9fa47d
--- /dev/null
+++ b/test/CodeGen/thinlto-distributed-backend-skip.ll
@@ -0,0 +1,21 @@
+; REQUIRES: x86-registered-target
+
+; Check that ThinLTO backend respects "SkipModuleByDistributedBackend"
+; flag which can be set by indexing.
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -fthinlto-index=%S/Inputs/thinlto-distributed-backend-skip.bc \
+; RUN:   -emit-llvm -o - -x ir %t.o | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+; CHECK: "empty"
+; CHECK: target triple =
+; CHECK-NOT: @main
+define i32 @main() {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/test/CodeGen/thinlto-distributed-cfi-devirt.ll
new file mode 100644
index 0000000..bd35524
--- /dev/null
+++ b/test/CodeGen/thinlto-distributed-cfi-devirt.ll
@@ -0,0 +1,102 @@
+; REQUIRES: x86-registered-target
+
+; Backend test for distribute ThinLTO with CFI.
+; It additionally enables -fwhole-program-vtables to get more information in
+; TYPE_IDs of GLOBALVAL_SUMMARY_BLOCK.
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
+; RUN:   -o %t2.index \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZN1A1nEi,p \
+; RUN:   -r=%t.o,_ZN1B1fEi,p \
+; RUN:   -r=%t.o,_ZN1C1fEi,p \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZTV1C, \
+; RUN:   -r=%t.o,_ZN1A1nEi, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZN1C1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px \
+; RUN:   -r=%t.o,_ZTV1C,px
+
+; Ensure that typeids are in the index.
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s
+; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
+; CHECK: <TYPE_ID op0=0 op1=6 op2=4 op3=7 op4=0 op5=0 op6=0 op7=0 op8=0 op9=0 op10=6 op11=0 op12=0 op13=8 op14=1 op15=6 op16=9 op17=0/>
+; CHECK-LABEL: </GLOBALVAL_SUMMARY_BLOCK
+; CHECK-LABEL: <STRTAB_BLOCK
+; CHECK: blob data = '_ZTS1A_ZN1A1nEi'
+; CHECK-LABEL: </STRTAB_BLOCK
+
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -emit-llvm -o - -x ir %t.o | FileCheck %s --check-prefixes=CHECK-IR
+
+; Check that backend does not fail generating native code.
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -o %t.native.o -x ir %t.o
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+
+@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1
+@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2
+
+; CHECK-IR-LABEL: define i32 @test
+define i32 @test(%struct.A* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8**
+  %vtable5 = load i8*, i8** %0
+
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+  %2 = extractvalue { i8*, i1 } %1, 1
+  br i1 %2, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  %3 = extractvalue { i8*, i1 } %1, 0
+  %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
+  %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+  %vtable16 = load i8*, i8** %0
+  %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A")
+  %6 = extractvalue { i8*, i1 } %5, 1
+  br i1 %6, label %cont2, label %trap
+
+cont2:
+  %7 = extractvalue { i8*, i1 } %5, 0
+  %8 = bitcast i8* %7 to i32 (%struct.A*, i32)*
+
+  ; Check that traps are conditional. Invalid TYPE_ID can cause
+  ; unconditional traps.
+  ; CHECK-IR: br i1 {{.*}}, label %trap
+
+  ; We still have to call it as virtual.
+  ; CHECK-IR: %call3 = tail call i32 %8
+  %call3 = tail call i32 %8(%struct.A* nonnull %obj, i32 %call)
+  ret i32 %call3
+}
+; CHECK-IR-LABEL: ret i32
+; CHECK-IR-LABEL: }
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
+declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a)
+declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
+!2 = !{i64 16, !"_ZTS1C"}
diff --git a/test/CodeGen/thinlto-distributed-cfi.ll b/test/CodeGen/thinlto-distributed-cfi.ll
new file mode 100644
index 0000000..5e8e39d
--- /dev/null
+++ b/test/CodeGen/thinlto-distributed-cfi.ll
@@ -0,0 +1,67 @@
+; REQUIRES: x86-registered-target
+
+; Backend test for distribute ThinLTO with CFI.
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
+; RUN:   -o %t2.index \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px
+
+; Check that typeids are in the index.
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s
+; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
+; CHECK: <TYPE_ID op0=0 op1=6 op2=3 op3=0 op4=0 op5=0 op6=0 op7=0/>
+; CHECK-LABEL: </GLOBALVAL_SUMMARY_BLOCK
+; CHECK-LABEL: <STRTAB_BLOCK
+; CHECK: blob data = '_ZTS1A'
+; CHECK-LABEL: </STRTAB_BLOCK
+
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -emit-llvm -o - -x ir %t.o | FileCheck %s --check-prefixes=CHECK-IR
+
+; Ensure that backend does not fail generating native code.
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -o %t.native.o -x ir %t.o
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.B = type { %struct.A }
+%struct.A = type { i32 (...)** }
+
+@_ZTV1B = constant { [3 x i8*] } { [3 x i8*] [i8* undef, i8* undef, i8* undef] }, !type !0
+
+; CHECK-IR-LABEL: define void @test
+define void @test(i8* %b) {
+entry:
+  ; Ensure that traps are conditional. Invalid TYPE_ID can cause
+  ; unconditional traps.
+  ; CHECK-IR: br i1 {{.*}}, label %trap
+  %0 = bitcast i8* %b to i8**
+  %vtable2 = load i8*, i8** %0
+  %1 = tail call i1 @llvm.type.test(i8* %vtable2, metadata !"_ZTS1A")
+  br i1 %1, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  ; CHECK-IR-LABEL: ret void
+  ret void
+}
+; CHECK-IR-LABEL: }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.trap()
+
+declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
diff --git a/test/CodeGen/thinlto-distributed.ll b/test/CodeGen/thinlto-distributed.ll
new file mode 100644
index 0000000..ae21d7b
--- /dev/null
+++ b/test/CodeGen/thinlto-distributed.ll
@@ -0,0 +1,21 @@
+; REQUIRES: x86-registered-target
+
+; Trivial test for distributes ThinLTO
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
+; RUN:   -o %t2.index \
+; RUN:   -r=%t.o,main,px
+
+; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
+; RUN:   -emit-obj -fthinlto-index=%t.o.thinlto.bc \
+; RUN:   -o %t.native.o -x ir %t.o
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define i32 @main() {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/thinlto_backend.ll b/test/CodeGen/thinlto_backend.ll
index 86f30c0..40530ec 100644
--- a/test/CodeGen/thinlto_backend.ll
+++ b/test/CodeGen/thinlto_backend.ll
@@ -20,6 +20,12 @@
 ; CHECK-OBJ-IGNORE-EMPTY: T f1
 ; CHECK-OBJ-IGNORE-EMPTY: U f2
 
+; Ensure we don't fail with index and non-ThinLTO object file, and output must
+; be empty file.
+; RUN: opt -o %t5.o %s
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t5.o -c -fthinlto-index=%t.thinlto.bc
+; RUN: llvm-nm %t4.o | count 0
+
 ; Ensure f2 was imported
 ; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc
 ; RUN: llvm-nm %t3.o | FileCheck --check-prefix=CHECK-OBJ %s
diff --git a/test/CodeGen/transparent-union-redecl.c b/test/CodeGen/transparent-union-redecl.c
new file mode 100644
index 0000000..3119208
--- /dev/null
+++ b/test/CodeGen/transparent-union-redecl.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -Werror -triple i386-linux -emit-llvm -o - %s | FileCheck %s
+
+// Test that different order of declarations is acceptable and that
+// implementing different redeclarations is acceptable.
+// rdar://problem/34949329
+
+typedef union {
+  int i;
+  float f;
+} TU __attribute__((transparent_union));
+
+// CHECK-LABEL: define void @f0(i32 %tu.coerce)
+// CHECK: %tu = alloca %union.TU, align 4
+// CHECK: %coerce.dive = getelementptr inbounds %union.TU, %union.TU* %tu, i32 0, i32 0
+// CHECK: store i32 %tu.coerce, i32* %coerce.dive, align 4
+void f0(TU tu) {}
+void f0(int i);
+
+// CHECK-LABEL: define void @f1(i32 %tu.coerce)
+// CHECK: %tu = alloca %union.TU, align 4
+// CHECK: %coerce.dive = getelementptr inbounds %union.TU, %union.TU* %tu, i32 0, i32 0
+// CHECK: store i32 %tu.coerce, i32* %coerce.dive, align 4
+void f1(int i);
+void f1(TU tu) {}
+
+// CHECK-LABEL: define void @f2(i32 %i)
+// CHECK: %i.addr = alloca i32, align 4
+// CHECK: store i32 %i, i32* %i.addr, align 4
+void f2(TU tu);
+void f2(int i) {}
+
+// CHECK-LABEL: define void @f3(i32 %i)
+// CHECK: %i.addr = alloca i32, align 4
+// CHECK: store i32 %i, i32* %i.addr, align 4
+void f3(int i) {}
+void f3(TU tu);
+
+// Also test functions with parameters specified K&R style.
+// CHECK-LABEL: define void @knrStyle(i32 %tu.coerce)
+// CHECK: %tu = alloca %union.TU, align 4
+// CHECK: %coerce.dive = getelementptr inbounds %union.TU, %union.TU* %tu, i32 0, i32 0
+// CHECK: store i32 %tu.coerce, i32* %coerce.dive, align 4
+void knrStyle(int i);
+void knrStyle(tu) TU tu; {}
diff --git a/test/CodeGen/ubsan-noreturn.c b/test/CodeGen/ubsan-noreturn.c
new file mode 100644
index 0000000..7de24e0
--- /dev/null
+++ b/test/CodeGen/ubsan-noreturn.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -emit-llvm -fsanitize=unreachable -o - | FileCheck %s
+
+// CHECK-LABEL: @f(
+void __attribute__((noreturn)) f() {
+  // CHECK: __ubsan_handle_builtin_unreachable
+  // CHECK: unreachable
+}
diff --git a/test/CodeGen/ubsan-pass-object-size.c b/test/CodeGen/ubsan-pass-object-size.c
new file mode 100644
index 0000000..d5d4f5a
--- /dev/null
+++ b/test/CodeGen/ubsan-pass-object-size.c
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -emit-llvm -w -triple x86_64-apple-darwin10 -fsanitize=array-bounds -o - | FileCheck %s
+
+// CHECK-LABEL: define i32 @foo(
+int foo(int *const p __attribute__((pass_object_size(0))), int n) {
+  int x = (p)[n];
+
+  // CHECK: [[SIZE_ALLOCA:%.*]] = alloca i64, align 8
+  // CHECK: store i64 %{{.*}}, i64* [[SIZE_ALLOCA]], align 8
+  // CHECK: [[LOAD_SIZE:%.*]] = load i64, i64* [[SIZE_ALLOCA]], align 8, !nosanitize
+  // CHECK-NEXT: [[SCALED_SIZE:%.*]] = udiv i64 [[LOAD_SIZE]], 4, !nosanitize
+  // CHECK-NEXT: [[SEXT_N:%.*]] = sext i32 %{{.*}} to i64, !nosanitize
+  // CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[SEXT_N]], [[SCALED_SIZE]], !nosanitize
+  // CHECK-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  // CHECK: __ubsan_handle_out_of_bounds
+
+  {
+    int **p = &p; // Shadow the parameter. The pass_object_size info is lost.
+    // CHECK-NOT: __ubsan_handle_out_of_bounds
+    x = *p[n];
+  }
+
+  // CHECK: ret i32
+  return x;
+}
+
+typedef struct {} ZeroSizedType;
+
+// CHECK-LABEL: define void @bar(
+ZeroSizedType bar(ZeroSizedType *const p __attribute__((pass_object_size(0))), int n) {
+  // CHECK-NOT: __ubsan_handle_out_of_bounds
+  // CHECK: ret void
+  return p[n];
+}
+
+// CHECK-LABEL: define i32 @baz(
+int baz(int *const p __attribute__((pass_object_size(1))), int n) {
+  // CHECK: __ubsan_handle_out_of_bounds
+  // CHECK: ret i32
+  return p[n];
+}
+
+// CHECK-LABEL: define i32 @mat(
+int mat(int *const p __attribute__((pass_object_size(2))), int n) {
+  // CHECK-NOT: __ubsan_handle_out_of_bounds
+  // CHECK: ret i32
+  return p[n];
+}
+
+// CHECK-LABEL: define i32 @pat(
+int pat(int *const p __attribute__((pass_object_size(3))), int n) {
+  // CHECK-NOT: __ubsan_handle_out_of_bounds
+  // CHECK: ret i32
+  return p[n];
+}
+
+// CHECK-LABEL: define i32 @cat(
+int cat(int p[static 10], int n) {
+  // CHECK-NOT: __ubsan_handle_out_of_bounds
+  // CHECK: ret i32
+  return p[n];
+}
+
+// CHECK-LABEL: define i32 @bat(
+int bat(int n, int p[n]) {
+  // CHECK-NOT: __ubsan_handle_out_of_bounds
+  // CHECK: ret i32
+  return p[n];
+}
diff --git a/test/CodeGen/vaes-builtins.c b/test/CodeGen/vaes-builtins.c
new file mode 100644
index 0000000..df160aa
--- /dev/null
+++ b/test/CodeGen/vaes-builtins.c
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vaes -emit-llvm -o - | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vaes -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+
+#include <immintrin.h>
+
+__m256i test_mm256_aesenc_epi128(__m256i __A, __m256i __B) {
+  // AVX-LABEL: @test_mm256_aesenc_epi128
+  // AVX: @llvm.x86.aesni.aesenc.256
+  return _mm256_aesenc_epi128(__A, __B);
+}
+
+__m256i test_mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
+  // AVX-LABEL: @test_mm256_aesenclast_epi128
+  // AVX: @llvm.x86.aesni.aesenclast.256
+  return _mm256_aesenclast_epi128(__A, __B);
+}
+
+__m256i test_mm256_aesdec_epi128(__m256i __A, __m256i __B) {
+  // AVX-LABEL: @test_mm256_aesdec_epi128
+  // AVX: @llvm.x86.aesni.aesdec.256
+  return _mm256_aesdec_epi128(__A, __B);
+}
+
+__m256i test_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
+  // AVX-LABEL: @test_mm256_aesdeclast_epi128
+  // AVX: @llvm.x86.aesni.aesdeclast.256
+  return _mm256_aesdeclast_epi128(__A, __B);
+}
+
+#ifdef AVX512
+__m512i test_mm512_aesenc_epi128(__m512i __A, __m512i __B) {
+  // AVX512-LABEL: @test_mm512_aesenc_epi128
+  // AVX512: @llvm.x86.aesni.aesenc.512
+  return _mm512_aesenc_epi128(__A, __B);
+}
+
+__m512i test_mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
+  // AVX512-LABEL: @test_mm512_aesenclast_epi128
+  // AVX512: @llvm.x86.aesni.aesenclast.512
+  return _mm512_aesenclast_epi128(__A, __B);
+}
+
+__m512i test_mm512_aesdec_epi128(__m512i __A, __m512i __B) {
+  // AVX512-LABEL: @test_mm512_aesdec_epi128
+  // AVX512: @llvm.x86.aesni.aesdec.512
+  return _mm512_aesdec_epi128(__A, __B);
+}
+
+__m512i test_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
+  // AVX512-LABEL: @test_mm512_aesdeclast_epi128
+  // AVX512: @llvm.x86.aesni.aesdeclast.512
+  return _mm512_aesdeclast_epi128(__A, __B);
+}
+#endif
+
diff --git a/test/CodeGen/variadic-null-win64.c b/test/CodeGen/variadic-null-win64.c
index 23c85b8..a52fb89 100644
--- a/test/CodeGen/variadic-null-win64.c
+++ b/test/CodeGen/variadic-null-win64.c
@@ -15,7 +15,7 @@
   v(f, 1, 2, 3, NULL);
   kr(f, 1, 2, 3, 0);
 }
-// WINDOWS: define void @f(i8* %f)
+// WINDOWS: define dso_local void @f(i8* %f)
 // WINDOWS: call void (i8*, ...) @v(i8* {{.*}}, i32 1, i32 2, i32 3, i64 0)
 // WINDOWS: call void bitcast (void (...)* @kr to void (i8*, i32, i32, i32, i32)*)(i8* {{.*}}, i32 1, i32 2, i32 3, i32 0)
 // LINUX: define void @f(i8* %f)
diff --git a/test/CodeGen/vector-scalar.c b/test/CodeGen/vector-scalar.c
new file mode 100644
index 0000000..0c973cd
--- /dev/null
+++ b/test/CodeGen/vector-scalar.c
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+
+// PR27085
+
+typedef unsigned char uchar4  __attribute__ ((vector_size (4)));
+
+// CHECK: @add2
+// CHECK: add <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+uchar4 add2(uchar4 v)
+{
+  return v + 2;
+}
+
+// CHECK: @sub2
+// CHECK: sub <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+uchar4 sub2(uchar4 v)
+{
+  return v - 2;
+}
+
+// CHECK: @mul2
+// CHECK: mul <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+uchar4 mul2(uchar4 v)
+{
+  return v * 2;
+}
+
+// CHECK: @div2
+// CHECK: udiv <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+uchar4 div2(uchar4 v)
+{
+  return v / 2;
+}
+
+typedef __attribute__(( ext_vector_type(4) )) unsigned char uchar4_ext;
+
+// CHECK: @div3_ext
+// CHECK: udiv <4 x i8> %{{.*}}, <i8 3, i8 3, i8 3, i8 3>
+uchar4_ext div3_ext(uchar4_ext v)
+{
+  return v / 3;
+}
diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c
index fa244fb..564b41e 100644
--- a/test/CodeGen/vectorcall.c
+++ b/test/CodeGen/vectorcall.c
@@ -2,56 +2,56 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32     | FileCheck %s --check-prefix=X64
 
 void __vectorcall v1(int a, int b) {}
-// X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
-// X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
+// X32: define dso_local x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
+// X64: define dso_local x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
 
 void __vectorcall v2(char a, char b) {}
-// X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
-// X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
+// X32: define dso_local x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
+// X64: define dso_local x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
 
 struct Small { int x; };
 void __vectorcall v3(int a, struct Small b, int c) {}
-// X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
-// X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
+// X32: define dso_local x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
+// X64: define dso_local x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
 
 struct Large { int a[5]; };
 void __vectorcall v4(int a, struct Large b, int c) {}
-// X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
-// X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
+// X32: define dso_local x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// X64: define dso_local x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
 
 struct HFA2 { double x, y; };
 struct HFA4 { double w, x, y, z; };
 struct HFA5 { double v, w, x, y, z; };
 
 void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
-// X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c)
-// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c)
+// X64: define dso_local x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c)
 
 // HFAs that would require more than six total SSE registers are passed
 // indirectly. Additional vector arguments can consume the rest of the SSE
 // registers.
 void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
-// X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c)
-// X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c)
+// X64: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c)
 
 // Ensure that we pass builtin types directly while counting them against the
 // SSE register usage.
 void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
-// X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
+// X64: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
 
 // Aggregates with more than four elements are not HFAs and are passed byval.
 // Because they are not classified as homogeneous, they don't get special
 // handling to ensure alignment.
 void __vectorcall hfa4(struct HFA5 a) {}
-// X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
-// X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
+// X64: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
 
 // Return HFAs of 4 or fewer elements in registers.
 static struct HFA2 g_hfa2;
 struct HFA2 __vectorcall hfa5(void) { return g_hfa2; }
-// X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
-// X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+// X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+// X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
 
 typedef float __attribute__((vector_size(16))) v4f32;
 struct HVA2 { v4f32 x, y; };
@@ -60,52 +60,52 @@
 struct HVA5 { v4f32 w, x, y, z, p; };
 
 v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c)
 
 v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c)
 
 v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
 
 // vector types have higher priority then HVA structures, So vector types are allocated first
 // and HVAs are allocated if enough registers are available
 v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c)
 
 v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
 
 struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;}
-// X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b)
-// X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b)
+// X32: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b)
+// X64: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b)
 
 struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;}
-// X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result)
-// X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
+// X32: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result)
+// X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
 
 v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;}
-// X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f)
-// X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f)
+// X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f)
 
 typedef float __attribute__((ext_vector_type(3))) v3f32;
 struct OddSizeHVA { v3f32 x, y; };
 
 void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
-// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
-// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
+// X32: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
+// X64: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
 
 // The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't
 // consider 'p7' as a register.  Instead p5 gets put into the register on the second pass.
 // x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
 struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7, int p8){ return p1;}
-// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
-// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
+// X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
+// X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
 
 // Vectorcall in both architectures allows passing of an HVA as long as there is room,
 // even if it is not one of the first 6 arguments.  First pass puts p4 into a
@@ -113,6 +113,6 @@
 // in a register, does NOT put p7 in a register (since theres no room), then puts 
 // p8 in a register.
 void __vectorcall HVAAnywhere(struct HFA2 p1, int p2, int p3, float p4, int p5, int p6, struct HFA4 p7, struct HFA2 p8, float p9){}
-// X32: define x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
-// X64: define x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
+// X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
+// X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
 
diff --git a/test/CodeGen/volatile-1.c b/test/CodeGen/volatile-1.c
index f63274b..c62a8fd 100644
--- a/test/CodeGen/volatile-1.c
+++ b/test/CodeGen/volatile-1.c
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -Wno-return-type -Wno-unused-value -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -Wno-return-type -Wno-unused-value -emit-llvm %s -w -o - | FileCheck %s
 
-// CHECK: @i = common global [[INT:i[0-9]+]] 0
+// CHECK: @i = common {{(dso_local )?}}global [[INT:i[0-9]+]] 0
 volatile int i, j, k;
 volatile int ar[5];
 volatile char c;
-// CHECK: @ci = common global [[CINT:.*]] zeroinitializer
+// CHECK: @ci = common {{(dso_local )?}}global [[CINT:.*]] zeroinitializer
 volatile _Complex int ci;
 volatile struct S {
 #ifdef __cplusplus
diff --git a/test/CodeGen/volatile.c b/test/CodeGen/volatile.c
index 52915f6..0f58bb6 100644
--- a/test/CodeGen/volatile.c
+++ b/test/CodeGen/volatile.c
@@ -199,7 +199,7 @@
 // CHECK: store volatile i32 {{.*}}, i32* @vtS
   (void)vF2;
   // From vF2 to a temporary
-// CHECK: call void @llvm.memcpy.{{.*}}(i8* %{{.*}}, i8* {{.*}} @vF2 {{.*}}, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* {{.*}} @vF2 {{.*}}, i1 true)
   vF2 = vF2;
   // vF2 to itself
 // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true)
@@ -209,6 +209,6 @@
 // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true)
   vF2 = (vF2, vF2);
   // vF2 to a temporary, then vF2 to itself
-// CHECK: call void @llvm.memcpy.{{.*}}(i8* %{{.*}}, i8* {{.*@vF2.*}}, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* {{.*@vF2.*}}, i1 true)
 // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true)
 }
diff --git a/test/CodeGen/vpclmulqdq-builtins.c b/test/CodeGen/vpclmulqdq-builtins.c
new file mode 100644
index 0000000..8c610e2
--- /dev/null
+++ b/test/CodeGen/vpclmulqdq-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+
+#include <immintrin.h>
+
+__m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
+  // AVX: @llvm.x86.pclmulqdq.256
+  return _mm256_clmulepi64_epi128(A, B, 0);
+}
+
+#ifdef AVX512
+__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
+  // AVX512: @llvm.x86.pclmulqdq.512
+  return _mm512_clmulepi64_epi128(A, B, 0);
+}
+#endif
+
diff --git a/test/CodeGen/wasm-arguments.c b/test/CodeGen/wasm-arguments.c
index 723632b..9283dd5 100644
--- a/test/CodeGen/wasm-arguments.c
+++ b/test/CodeGen/wasm-arguments.c
@@ -24,7 +24,7 @@
 // Single-element structs should be returned as the one element.
 // WEBASSEMBLY32: define i32 @f2()
 // WEBASSEMBLY64: define i32 @f2()
-s2 f2() {
+s2 f2(void) {
   s2 foo;
   return foo;
 }
@@ -36,7 +36,7 @@
 // Structs should be returned sret and not simplified by the frontend.
 // WEBASSEMBLY32: define void @f3(%struct.s3* noalias sret %agg.result)
 // WEBASSEMBLY64: define void @f3(%struct.s3* noalias sret %agg.result)
-s3 f3() {
+s3 f3(void) {
   s3 foo;
   return foo;
 }
diff --git a/test/CodeGen/wasm-varargs.c b/test/CodeGen/wasm-varargs.c
index b8e488e..3883549 100644
--- a/test/CodeGen/wasm-varargs.c
+++ b/test/CodeGen/wasm-varargs.c
@@ -93,11 +93,11 @@
 // CHECK:   [[R3:%[^,=]+]] = bitcast i8* [[ARGP_CUR]] to [[STRUCT_S]]*
 // CHECK:   [[R4:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8*
 // CHECK:   [[R5:%[^,=]+]] = bitcast [[STRUCT_S]]* [[R3]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[R4]], i8* [[R5]], i32 12, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R4]], i8* align 4 [[R5]], i32 12, i1 false)
 // CHECK:   [[VA2:%[^,=]+]] = bitcast i8** [[VA]] to i8*
 // CHECK:   call void @llvm.va_end(i8* [[VA2]])
 // CHECK:   [[R6:%[^,=]+]] = bitcast [[STRUCT_S]]* %agg.result to i8*
 // CHECK:   [[R7:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[R6]], i8* [[R7]], i32 12, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R6]], i8* align 4 [[R7]], i32 12, i1 false)
 // CHECK:   ret void
 // CHECK: }
diff --git a/test/CodeGen/windows-itanium.c b/test/CodeGen/windows-itanium.c
index 7f0e7b1..5bcd6dc 100644
--- a/test/CodeGen/windows-itanium.c
+++ b/test/CodeGen/windows-itanium.c
@@ -8,8 +8,8 @@
   return 32;
 }
 
-// CHECK-C: define i32 @function() {{.*}} {
-// CHECK-CXX: define i32 @_Z8functionv() {{.*}} {
+// CHECK-C: define dso_local i32 @function() {{.*}} {
+// CHECK-CXX: define dso_local i32 @_Z8functionv() {{.*}} {
 // CHECK:   ret i32 32
 // CHECK: }
 
diff --git a/test/CodeGen/windows-on-arm-dllimport-dllexport.c b/test/CodeGen/windows-on-arm-dllimport-dllexport.c
index 4b2e29e..a2ebbf1 100644
--- a/test/CodeGen/windows-on-arm-dllimport-dllexport.c
+++ b/test/CodeGen/windows-on-arm-dllimport-dllexport.c
@@ -17,9 +17,9 @@
 }
 
 // CHECK: @import_int = external dllimport global i32
-// CHECK: @export_int = common dllexport global i32 0, align 4
+// CHECK: @export_int = common dso_local dllexport global i32 0, align 4
 
-// CHECK: define dllexport arm_aapcs_vfpcc void @export_implemented_function()
+// CHECK: define dso_local dllexport arm_aapcs_vfpcc void @export_implemented_function()
 
 // CHECK: declare dllimport arm_aapcs_vfpcc void @import_function(i32)
 
diff --git a/test/CodeGen/windows-struct-abi.c b/test/CodeGen/windows-struct-abi.c
index 1631f61..5ffc4fa 100644
--- a/test/CodeGen/windows-struct-abi.c
+++ b/test/CodeGen/windows-struct-abi.c
@@ -6,11 +6,11 @@
 
 struct f1 return_f1(void) { while (1); }
 
-// CHECK: define i32 @return_f1()
+// CHECK: define dso_local i32 @return_f1()
 
 void receive_f1(struct f1 a0) { }
 
-// CHECK: define void @receive_f1(float %a0.0)
+// CHECK: define dso_local void @receive_f1(float %a0.0)
 
 struct f2 {
   float f;
@@ -19,11 +19,11 @@
 
 struct f2 return_f2(void) { while (1); }
 
-// CHECK: define i64 @return_f2()
+// CHECK: define dso_local i64 @return_f2()
 
 void receive_f2(struct f2 a0) { }
 
-// CHECK: define void @receive_f2(float %a0.0, float %a0.1)
+// CHECK: define dso_local void @receive_f2(float %a0.0, float %a0.1)
 
 struct f4 {
   float f;
@@ -34,9 +34,9 @@
 
 struct f4 return_f4(void) { while (1); }
 
-// CHECK: define void @return_f4(%struct.f4* noalias sret %agg.result)
+// CHECK: define dso_local void @return_f4(%struct.f4* noalias sret %agg.result)
 
 void receive_f4(struct f4 a0) { }
 
-// CHECK: define void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3)
+// CHECK: define dso_local void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3)
 
diff --git a/test/CodeGen/windows-swiftcall.c b/test/CodeGen/windows-swiftcall.c
index f76b2fd..a7cfb4d 100644
--- a/test/CodeGen/windows-swiftcall.c
+++ b/test/CodeGen/windows-swiftcall.c
@@ -40,7 +40,7 @@
   float *error;
   context_error_1(&x, &error);
 }
-// CHECK-LABEL: define void @test_context_error_1()
+// CHECK-LABEL: define dso_local void @test_context_error_1()
 // CHECK:       [[X:%.*]] = alloca i32, align 4
 // CHECK:       [[ERROR:%.*]] = alloca float*, align 8
 // CHECK:       [[TEMP:%.*]] = alloca swifterror float*, align 8
@@ -97,7 +97,7 @@
   float f1;
 } struct_1;
 TEST(struct_1);
-// CHECK-LABEL: define swiftcc { i64, i64 } @return_struct_1() {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_1() {{.*}}{
 // CHECK:   [[RET:%.*]] = alloca [[STRUCT1:%.*]], align 4
 // CHECK:   [[VAR:%.*]] = alloca [[STRUCT1]], align 4
 // CHECK:   call void @llvm.memset
@@ -111,7 +111,7 @@
 // CHECK:   [[R1:%.*]] = insertvalue { i64, i64 } [[R0]], i64 [[T1]], 1
 // CHECK:   ret { i64, i64 } [[R1]]
 // CHECK: }
-// CHECK-LABEL: define swiftcc void @take_struct_1(i64, i64) {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc void @take_struct_1(i64, i64) {{.*}}{
 // CHECK:   [[V:%.*]] = alloca [[STRUCT1:%.*]], align 4
 // CHECK:   [[CAST:%.*]] = bitcast [[STRUCT1]]* [[V]] to { i64, i64 }*
 // CHECK:   [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0
@@ -120,7 +120,7 @@
 // CHECK:   store i64 %1, i64* [[GEP1]], align 4
 // CHECK:   ret void
 // CHECK: }
-// CHECK-LABEL: define void @test_struct_1() {{.*}}{
+// CHECK-LABEL: define dso_local void @test_struct_1() {{.*}}{
 // CHECK:   [[AGG:%.*]] = alloca [[STRUCT1:%.*]], align 4
 // CHECK:   [[RET:%.*]] = call swiftcc { i64, i64 } @return_struct_1()
 // CHECK:   [[CAST:%.*]] = bitcast [[STRUCT1]]* [[AGG]] to { i64, i64 }*
@@ -147,7 +147,7 @@
   float f1;
 } struct_2;
 TEST(struct_2);
-// CHECK-LABEL: define swiftcc { i64, i64 } @return_struct_2() {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_2() {{.*}}{
 // CHECK:   [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4
 // CHECK:   [[VAR:%.*]] = alloca [[STRUCT2_TYPE]], align 4
 // CHECK:   [[CASTVAR:%.*]] = bitcast {{.*}} [[VAR]]
@@ -164,7 +164,7 @@
 // CHECK:   [[R1:%.*]] = insertvalue { i64, i64 } [[R0]], i64 [[T1]], 1
 // CHECK:   ret { i64, i64 } [[R1]]
 // CHECK: }
-// CHECK-LABEL: define swiftcc void @take_struct_2(i64, i64) {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc void @take_struct_2(i64, i64) {{.*}}{
 // CHECK:   [[V:%.*]] = alloca [[STRUCT:%.*]], align 4
 // CHECK:   [[CAST:%.*]] = bitcast [[STRUCT]]* [[V]] to { i64, i64 }*
 // CHECK:   [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0
@@ -173,7 +173,7 @@
 // CHECK:   store i64 %1, i64* [[GEP1]], align 4
 // CHECK:   ret void
 // CHECK: }
-// CHECK-LABEL: define void @test_struct_2() {{.*}} {
+// CHECK-LABEL: define dso_local void @test_struct_2() {{.*}} {
 // CHECK:   [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4
 // CHECK:   [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2()
 // CHECK:   [[CAST_TMP:%.*]] = bitcast [[STRUCT2_TYPE]]* [[TMP]] to { i64, i64 }*
@@ -203,27 +203,27 @@
   __attribute__((packed)) float f;
 } struct_misaligned_1;
 TEST(struct_misaligned_1)
-// CHECK-LABEL: define swiftcc i64 @return_struct_misaligned_1()
+// CHECK-LABEL: define dso_local swiftcc i64 @return_struct_misaligned_1()
 // CHECK:  [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1
 // CHECK:  [[RES:%.*]] = alloca [[STRUCT]], align 1
 // CHECK:  [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memset{{.*}}(i8* [[CAST]], i8 0, i64 5
+// CHECK:  call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5
 // CHECK:  [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8*
 // CHECK:  [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]], i64 5
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]], i64 5
 // CHECK:  [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }*
 // CHECK:  [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:  [[R0:%.*]] = load i64, i64* [[GEP]], align 1
 // CHECK:  ret i64 [[R0]]
 // CHECK:}
-// CHECK-LABEL: define swiftcc void @take_struct_misaligned_1(i64) {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc void @take_struct_misaligned_1(i64) {{.*}}{
 // CHECK:   [[V:%.*]] = alloca [[STRUCT:%.*]], align 1
 // CHECK:   [[CAST:%.*]] = bitcast [[STRUCT]]* [[V]] to { i64 }*
 // CHECK:   [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:   store i64 %0, i64* [[GEP]], align 1
 // CHECK:   ret void
 // CHECK: }
-// CHECK: define void @test_struct_misaligned_1() {{.*}}{
+// CHECK: define dso_local void @test_struct_misaligned_1() {{.*}}{
 // CHECK:   [[AGG:%.*]] = alloca [[STRUCT:%.*]], align 1
 // CHECK:   [[CALL:%.*]] = call swiftcc i64 @return_struct_misaligned_1()
 // CHECK:   [[T0:%.*]] = bitcast [[STRUCT]]* [[AGG]] to { i64 }*
@@ -256,26 +256,26 @@
   double d;
 } union_het_fp;
 TEST(union_het_fp)
-// CHECK-LABEL: define swiftcc i64 @return_union_het_fp()
+// CHECK-LABEL: define dso_local swiftcc i64 @return_union_het_fp()
 // CHECK:  [[RET:%.*]] = alloca [[UNION:%.*]], align 8
 // CHECK:  [[RES:%.*]] = alloca [[UNION]], align 8
 // CHECK:  [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CAST]]
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CAST]]
 // CHECK:  [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8*
 // CHECK:  [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8*
-// CHECK:  call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]]
+// CHECK:  call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]]
 // CHECK:  [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }*
 // CHECK:  [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:  [[R0:%.*]] = load i64, i64* [[GEP]], align 8
 // CHECK:  ret i64 [[R0]]
-// CHECK-LABEL: define swiftcc void @take_union_het_fp(i64) {{.*}}{
+// CHECK-LABEL: define dso_local swiftcc void @take_union_het_fp(i64) {{.*}}{
 // CHECK:   [[V:%.*]] = alloca [[UNION:%.*]], align 8
 // CHECK:   [[CAST:%.*]] = bitcast [[UNION]]* [[V]] to { i64 }*
 // CHECK:   [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0
 // CHECK:   store i64 %0, i64* [[GEP]], align 8
 // CHECK:   ret void
 // CHECK: }
-// CHECK-LABEL: define void @test_union_het_fp() {{.*}}{
+// CHECK-LABEL: define dso_local void @test_union_het_fp() {{.*}}{
 // CHECK:   [[AGG:%.*]] = alloca [[UNION:%.*]], align 8
 // CHECK:   [[CALL:%.*]] = call swiftcc i64 @return_union_het_fp()
 // CHECK:   [[T0:%.*]] = bitcast [[UNION]]* [[AGG]] to { i64 }*
@@ -294,7 +294,7 @@
   float f2;
 } union_hom_fp;
 TEST(union_hom_fp)
-// CHECK-LABEL: define void @test_union_hom_fp()
+// CHECK-LABEL: define dso_local void @test_union_hom_fp()
 // CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 4
 // CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] float @return_union_hom_fp()
 // CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float }]]*
@@ -311,7 +311,7 @@
   float4 fv2;
 } union_hom_fp_partial;
 TEST(union_hom_fp_partial)
-// CHECK: define void @test_union_hom_fp_partial()
+// CHECK: define dso_local void @test_union_hom_fp_partial()
 // CHECK:   [[AGG:%.*]] = alloca [[UNION:%.*]], align 16
 // CHECK:   [[CALL:%.*]] = call swiftcc { i64, i64 } @return_union_hom_fp_partial()
 // CHECK:   [[CAST:%.*]] = bitcast [[UNION]]* [[AGG]] to { i64, i64 }*
@@ -335,7 +335,7 @@
   float4 fv2;
 } union_het_fpv_partial;
 TEST(union_het_fpv_partial)
-// CHECK-LABEL: define void @test_union_het_fpv_partial()
+// CHECK-LABEL: define dso_local void @test_union_het_fpv_partial()
 // CHECK:   [[AGG:%.*]] = alloca [[UNION:%.*]], align 16
 // CHECK:   [[CALL:%.*]] = call swiftcc { i64, i64 } @return_union_het_fpv_partial()
 // CHECK:   [[CAST:%.*]] = bitcast [[UNION]]* [[AGG]] to { i64, i64 }*
@@ -385,7 +385,7 @@
 // CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
 // CHECK:   store <4 x i32> %1, <4 x i32>* [[T0]], align
 // CHECK:   ret void
-// CHECK-LABEL: define void @test_int8()
+// CHECK-LABEL: define dso_local void @test_int8()
 // CHECK:   [[TMP1:%.*]] = alloca [[REC]], align
 // CHECK:   [[TMP2:%.*]] = alloca [[REC]], align
 // CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_int8()
@@ -429,7 +429,7 @@
 // CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
 // CHECK:   store i32 %1, i32* [[T0]], align
 // CHECK:   ret void
-// CHECK-LABEL: define void @test_int5()
+// CHECK-LABEL: define dso_local void @test_int5()
 // CHECK:   [[TMP1:%.*]] = alloca [[REC]], align
 // CHECK:   [[TMP2:%.*]] = alloca [[REC]], align
 // CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_int5()
@@ -455,4 +455,4 @@
   int3 v __attribute__((packed));
 } misaligned_int3;
 TEST(misaligned_int3)
-// CHECK-LABEL: define swiftcc void @take_misaligned_int3(i64, i64)
+// CHECK-LABEL: define dso_local swiftcc void @take_misaligned_int3(i64, i64)
diff --git a/test/CodeGen/wmemcmp.c b/test/CodeGen/wmemcmp.c
new file mode 100644
index 0000000..ad08861
--- /dev/null
+++ b/test/CodeGen/wmemcmp.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -emit-llvm -o - | FileCheck %s
+
+typedef __SIZE_TYPE__ size_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+int wmemcmp_test(const wchar_t *s1, const wchar_t *s2, size_t n) {
+  // CHECK: [[S1:%.*]] = load
+  // CHECK: [[S2:%.*]] = load
+  // CHECK: [[N:%.*]] = load
+  // CHECK: [[N0:%.*]] = icmp eq i64 [[N]], 0
+  // CHECK: br i1 [[N0]], label %[[EXIT:.*]], label %[[GT:.*]]
+
+  // CHECK: [[GT]]:
+  // CHECK: [[S1P:%.*]] = phi i16* [ [[S1]], %[[ENTRY:.*]] ], [ [[S1N:.*]], %[[NEXT:.*]] ]
+  // CHECK: [[S2P:%.*]] = phi i16* [ [[S2]], %[[ENTRY]] ], [ [[S2N:.*]], %[[NEXT]] ]
+  // CHECK: [[NP:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[NN:.*]], %[[NEXT]] ]
+  // CHECK: [[S1L:%.*]] = load i16, i16* [[S1P]], align 2
+  // CHECK: [[S2L:%.*]] = load i16, i16* [[S2P]], align 2
+  // CHECK: [[CMPGT:%.*]] = icmp ugt i16 [[S1L]], [[S2L]]
+  // CHECK: br i1 [[CMPGT]], label %[[EXIT]], label %[[LT:.*]]
+
+  // CHECK: [[LT]]:
+  // CHECK: [[CMPLT:%.*]] = icmp ult i16 [[S1L]], [[S2L]]
+  // CHECK: br i1 [[CMPLT]], label %[[EXIT]], label %[[NEXT:.*]]
+
+  // CHECK: [[NEXT]]:
+  // CHECK: [[S1N]] = getelementptr inbounds i16, i16* [[S1P]], i32 1
+  // CHECK: [[S2N]] = getelementptr inbounds i16, i16* [[S2P]], i32 1
+  // CHECK: [[NN]] = sub i64 [[NP]], 1
+  // CHECK: [[NN0:%.*]] = icmp eq i64 [[NN]], 0
+  // CHECK: br i1 [[NN0]], label %[[EXIT]], label %[[GT]]
+  //
+  // CHECK: [[EXIT]]:
+  // CHECK: [[RV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[GT]] ], [ -1, %[[LT]] ], [ 0, %[[NEXT]] ]
+  // CHECK: ret i32 [[RV]]
+  return __builtin_wmemcmp(s1, s2, n);
+}
diff --git a/test/CodeGen/x86-atomic-long_double.c b/test/CodeGen/x86-atomic-long_double.c
index 9857c67..130ff45 100644
--- a/test/CodeGen/x86-atomic-long_double.c
+++ b/test/CodeGen/x86-atomic-long_double.c
@@ -15,12 +15,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -46,10 +46,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -77,12 +77,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -108,10 +108,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -140,12 +140,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -177,10 +177,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -203,7 +203,7 @@
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
   // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16
   // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128*
   // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16
@@ -213,7 +213,7 @@
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
   // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4
   // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8*
@@ -250,12 +250,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -281,10 +281,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -311,12 +311,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -342,10 +342,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -373,12 +373,12 @@
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
   // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
   // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
@@ -409,10 +409,10 @@
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
   // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
   // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
@@ -435,7 +435,7 @@
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
   // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i1 false)
   // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16
   // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128*
   // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16
@@ -445,7 +445,7 @@
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
   // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
-  // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false)
+  // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i1 false)
   // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4
   // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8*
diff --git a/test/CodeGen/x86-cf-protection.c b/test/CodeGen/x86-cf-protection.c
new file mode 100644
index 0000000..c853791
--- /dev/null
+++ b/test/CodeGen/x86-cf-protection.c
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 -fsyntax-only -S -emit-llvm -o %t -triple i386-unknown-unknown -fcf-protection=return %s 2>&1 | FileCheck %s --check-prefix=RETURN
+// RUN: not %clang_cc1 -fsyntax-only -S -emit-llvm -o %t -triple i386-unknown-unknown -fcf-protection=branch %s 2>&1 | FileCheck %s --check-prefix=BRANCH
+
+// RETURN: error: option 'cf-protection=return' cannot be specified without '-mshstk'
+// BRANCH: error: option 'cf-protection=branch' cannot be specified without '-mibt'
+void foo() {}
diff --git a/test/CodeGen/x86_32-arguments-realign.c b/test/CodeGen/x86_32-arguments-realign.c
index 768e1cc..b99523b 100644
--- a/test/CodeGen/x86_32-arguments-realign.c
+++ b/test/CodeGen/x86_32-arguments-realign.c
@@ -2,7 +2,7 @@
 // RUN: FileCheck < %t %s
 
 // CHECK-LABEL: define void @f0(%struct.s0* byval align 4)
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 16, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %{{.*}}, i8* align 4 %{{.*}}, i32 16, i1 false)
 // CHECK: }
 struct s0 { long double a; };
 void f0(struct s0 a0) {
diff --git a/test/CodeGen/x86_32-arguments-win32.c b/test/CodeGen/x86_32-arguments-win32.c
index 7b27fc7..65e25f3 100644
--- a/test/CodeGen/x86_32-arguments-win32.c
+++ b/test/CodeGen/x86_32-arguments-win32.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s
 
-// CHECK-LABEL: define i64 @f1_1()
-// CHECK-LABEL: define void @f1_2(i32 %a0.0, i32 %a0.1)
+// CHECK-LABEL: define dso_local i64 @f1_1()
+// CHECK-LABEL: define dso_local void @f1_2(i32 %a0.0, i32 %a0.1)
 struct s1 {
   int a;
   int b;
@@ -9,37 +9,37 @@
 struct s1 f1_1(void) { while (1) {} }
 void f1_2(struct s1 a0) {}
 
-// CHECK-LABEL: define i32 @f2_1()
+// CHECK-LABEL: define dso_local i32 @f2_1()
 struct s2 {
   short a;
   short b;
 };
 struct s2 f2_1(void) { while (1) {} }
 
-// CHECK-LABEL: define i16 @f3_1()
+// CHECK-LABEL: define dso_local i16 @f3_1()
 struct s3 {
   char a;
   char b;
 };
 struct s3 f3_1(void) { while (1) {} }
 
-// CHECK-LABEL: define i8 @f4_1()
+// CHECK-LABEL: define dso_local i8 @f4_1()
 struct s4 {
   char a:4;
   char b:4;
 };
 struct s4 f4_1(void) { while (1) {} }
 
-// CHECK-LABEL: define i64 @f5_1()
-// CHECK-LABEL: define void @f5_2(double %a0.0)
+// CHECK-LABEL: define dso_local i64 @f5_1()
+// CHECK-LABEL: define dso_local void @f5_2(double %a0.0)
 struct s5 {
   double a;
 };
 struct s5 f5_1(void) { while (1) {} }
 void f5_2(struct s5 a0) {}
 
-// CHECK-LABEL: define i32 @f6_1()
-// CHECK-LABEL: define void @f6_2(float %a0.0)
+// CHECK-LABEL: define dso_local i32 @f6_1()
+// CHECK-LABEL: define dso_local void @f6_2(float %a0.0)
 struct s6 {
   float a;
 };
diff --git a/test/CodeGen/x86_32-fpcc-struct-return.c b/test/CodeGen/x86_32-fpcc-struct-return.c
index 9f61599..2cfd437 100644
--- a/test/CodeGen/x86_32-fpcc-struct-return.c
+++ b/test/CodeGen/x86_32-fpcc-struct-return.c
@@ -17,15 +17,15 @@
 // CHECK: ret void
 Big returnBig(Big x) { return x; }
 
-// CHECK-PCC-LABEL: define void @returnSmall
+// CHECK-PCC-LABEL: define {{(dso_local )?}}void @returnSmall
 // CHECK-PCC: ret void
-// CHECK-REG-LABEL: define i32 @returnSmall
+// CHECK-REG-LABEL: define {{(dso_local )?}}i32 @returnSmall
 // CHECK-REG: ret i32
 Small returnSmall(Small x) { return x; }
 
-// CHECK-PCC-LABEL: define void @returnShort
+// CHECK-PCC-LABEL: define {{(dso_local )?}}void @returnShort
 // CHECK-PCC: ret void
-// CHECK-REG-LABEL: define i16 @returnShort
+// CHECK-REG-LABEL: define {{(dso_local )?}}i16 @returnShort
 // CHECK-REG: ret i16
 Short returnShort(Short x) { return x; }
 
diff --git a/test/CodeGen/x86_64-arguments-win32.c b/test/CodeGen/x86_64-arguments-win32.c
index 7731ead..b43107c 100644
--- a/test/CodeGen/x86_64-arguments-win32.c
+++ b/test/CodeGen/x86_64-arguments-win32.c
@@ -3,29 +3,29 @@
 // To be ABI compatible with code generated by MSVC, there shouldn't be any
 // sign/zero extensions on types smaller than 64bit.
 
-// CHECK-LABEL: define void @f1(i8 %a)
+// CHECK-LABEL: define dso_local void @f1(i8 %a)
 void f1(char a) {}
 
-// CHECK-LABEL: define void @f2(i8 %a)
+// CHECK-LABEL: define dso_local void @f2(i8 %a)
 void f2(unsigned char a) {}
 
-// CHECK-LABEL: define void @f3(i16 %a)
+// CHECK-LABEL: define dso_local void @f3(i16 %a)
 void f3(short a) {}
 
-// CHECK-LABEL: define void @f4(i16 %a)
+// CHECK-LABEL: define dso_local void @f4(i16 %a)
 void f4(unsigned short a) {}
 
 // For ABI compatibility with ICC, _Complex should be passed/returned
 // as if it were a struct with two elements.
 
-// CHECK-LABEL: define void @f5(i64 %a.coerce)
+// CHECK-LABEL: define dso_local void @f5(i64 %a.coerce)
 void f5(_Complex float a) {}
 
-// CHECK-LABEL: define void @f6({ double, double }* %a)
+// CHECK-LABEL: define dso_local void @f6({ double, double }* %a)
 void f6(_Complex double a) {}
 
-// CHECK-LABEL: define i64 @f7()
+// CHECK-LABEL: define dso_local i64 @f7()
 _Complex float f7() { return 1.0; }
 
-// CHECK-LABEL: define void @f8({ double, double }* noalias sret %agg.result)
+// CHECK-LABEL: define dso_local void @f8({ double, double }* noalias sret %agg.result)
 _Complex double f8() { return 1.0; }
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index d24ea4d..548980b 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -434,7 +434,7 @@
 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[CASTED_TMP_ADDR]], i8* align 8 [[RECASTED_VALUE_ADDR]], i64 16, i1 false)
 // CHECK-NEXT: add i32 {{.*}}, 16
 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
 // CHECK-NEXT: br label
diff --git a/test/CodeGen/x86_64-floatvectors.c b/test/CodeGen/x86_64-floatvectors.c
new file mode 100644
index 0000000..389a06a
--- /dev/null
+++ b/test/CodeGen/x86_64-floatvectors.c
@@ -0,0 +1,131 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
+// RUN:   FileCheck %s
+
+// This test validates that the inreg branch generation for __builtin_va_arg 
+// does not exceed the alloca size of the type, which can cause the SROA pass to
+// eliminate the assignment.
+
+typedef struct { float x, y, z; } vec3f;
+
+double Vec3FTest(__builtin_va_list ap) {
+  vec3f vec = __builtin_va_arg(ap, vec3f);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec3FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec3FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec3FGEP1:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec3FLoad1]], <2 x float>* [[Vec3FGEP1]]
+// CHECK: [[Vec3FLoad2:%.*]] = load float, float*
+// CHECK: [[Vec3FGEP2:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 1
+// CHECK: store float [[Vec3FLoad2]], float* [[Vec3FGEP2]]
+// CHECK: vaarg.in_mem:
+
+
+typedef struct { float x, y, z, q; } vec4f;
+
+double Vec4FTest(__builtin_va_list ap) {
+  vec4f vec = __builtin_va_arg(ap, vec4f);
+  return vec.x + vec.y + vec.z + vec.q;
+}
+// CHECK: define double @Vec4FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec4FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP1:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec4FLoad1]], <2 x float>* [[Vec4FGEP1]]
+// CHECK: [[Vec4FLoad2:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP2:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 1
+// CHECK: store <2 x float> [[Vec4FLoad2]], <2 x float>* [[Vec4FGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct { double x, y; } vec2d;
+
+double Vec2DTest(__builtin_va_list ap) {
+  vec2d vec = __builtin_va_arg(ap, vec2d);
+  return vec.x + vec.y;
+}
+// CHECK: define double @Vec2DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec2DLoad1:%.*]] = load double, double*
+// CHECK: [[Vec2DGEP1:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec2DLoad1]], double* [[Vec2DGEP1]]
+// CHECK: [[Vec2DLoad2:%.*]] = load double, double*
+// CHECK: [[Vec2DGEP2:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 1
+// CHECK: store double [[Vec2DLoad2]], double* [[Vec2DGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  float x, y;
+  double z;
+} vec2f1d;
+
+double Vec2F1DTest(__builtin_va_list ap) {
+  vec2f1d vec = __builtin_va_arg(ap, vec2f1d);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec2F1DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec2F1DLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec2F1DGEP1:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec2F1DLoad1]], <2 x float>* [[Vec2F1DGEP1]]
+// CHECK: [[Vec2F1DLoad2:%.*]] = load double, double*
+// CHECK: [[Vec2F1DGEP2:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 1
+// CHECK: store double [[Vec2F1DLoad2]], double* [[Vec2F1DGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  double x;
+  float y, z;
+} vec1d2f;
+
+double Vec1D2FTest(__builtin_va_list ap) {
+  vec1d2f vec = __builtin_va_arg(ap, vec1d2f);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec1D2FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec1D2FLoad1:%.*]] = load double, double*
+// CHECK: [[Vec1D2FGEP1:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec1D2FLoad1]], double* [[Vec1D2FGEP1]]
+// CHECK: [[Vec1D2FLoad2:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec1D2FGEP2:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 1
+// CHECK: store <2 x float> [[Vec1D2FLoad2]], <2 x float>* [[Vec1D2FGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  float x;
+  double z;
+} vec1f1d;
+
+double Vec1F1DTest(__builtin_va_list ap) {
+  vec1f1d vec = __builtin_va_arg(ap, vec1f1d);
+  return vec.x  + vec.z;
+}
+// CHECK: define double @Vec1F1DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec1F1DLoad1:%.*]] = load float, float*
+// CHECK: [[Vec1F1DGEP1:%.*]] = getelementptr inbounds { float, double }, { float, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store float [[Vec1F1DLoad1]], float* [[Vec1F1DGEP1]]
+// CHECK: [[Vec1F1DLoad2:%.*]] = load double, double*
+// CHECK: [[Vec1F1DGEP2:%.*]] = getelementptr inbounds { float, double }, { float, double }* {{%.*}}, i32 0, i32 1
+// CHECK: store double [[Vec1F1DLoad2]], double* [[Vec1F1DGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  double x;
+  float z;
+} vec1d1f;
+
+double Vec1D1FTest(__builtin_va_list ap) {
+  vec1d1f vec = __builtin_va_arg(ap, vec1d1f);
+  return vec.x  + vec.z;
+}
+// CHECK: define double @Vec1D1FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec1D1FLoad1:%.*]] = load double, double*
+// CHECK: [[Vec1D1FGEP1:%.*]] = getelementptr inbounds { double, float }, { double, float }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec1D1FLoad1]], double* [[Vec1D1FGEP1]]
+// CHECK: [[Vec1D1FLoad2:%.*]] = load float, float*
+// CHECK: [[Vec1D1FGEP2:%.*]] = getelementptr inbounds { double, float }, { double, float }* {{%.*}}, i32 0, i32 1
+// CHECK: store float [[Vec1D1FLoad2]], float* [[Vec1D1FGEP2]]
+// CHECK: vaarg.in_mem:
diff --git a/test/CodeGen/x86_64-instrument-functions.c b/test/CodeGen/x86_64-instrument-functions.c
new file mode 100644
index 0000000..686d9aa
--- /dev/null
+++ b/test/CodeGen/x86_64-instrument-functions.c
@@ -0,0 +1,38 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions -O2 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions-after-inlining -O2 -o - %s | FileCheck -check-prefix=NOINLINE %s
+
+// It's not so nice having asm tests in Clang, but we need to check that we set
+// up the pipeline correctly in order to have the instrumentation inserted.
+
+int leaf(int x) {
+  return x;
+// CHECK-LABEL: leaf:
+// CHECK: callq __cyg_profile_func_enter
+// CHECK-NOT: cyg_profile
+// CHECK: callq __cyg_profile_func_exit
+// CHECK-NOT: cyg_profile
+// CHECK: ret
+}
+
+int root(int x) {
+  return leaf(x);
+// CHECK-LABEL: root:
+// CHECK: callq __cyg_profile_func_enter
+// CHECK-NOT: cyg_profile
+
+// Inlined from leaf():
+// CHECK: callq __cyg_profile_func_enter
+// CHECK-NOT: cyg_profile
+// CHECK: callq __cyg_profile_func_exit
+
+// CHECK-NOT: cyg_profile
+// CHECK: callq __cyg_profile_func_exit
+// CHECK: ret
+
+// NOINLINE-LABEL: root:
+// NOINLINE: callq __cyg_profile_func_enter
+// NOINLINE-NOT: cyg_profile
+// NOINLINE: callq __cyg_profile_func_exit
+// NOINLINE: ret
+}
diff --git a/test/CodeGen/xcore-abi.c b/test/CodeGen/xcore-abi.c
index 2bac78d..90306ce 100644
--- a/test/CodeGen/xcore-abi.c
+++ b/test/CodeGen/xcore-abi.c
@@ -80,7 +80,7 @@
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8*
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 20, i32 4, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[V1]], i8* align 4 [[P1]], i32 20, i1 false)
   // CHECK: [[V2:%[a-z0-9]+]] = bitcast %struct.x* [[V]] to i8*
   // CHECK: call void @f(i8* [[V2]])
 
@@ -93,7 +93,7 @@
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8*
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 16, i32 4, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[V1]], i8* align 4 [[P1]], i32 16, i1 false)
   // CHECK: [[V2:%[a-z0-9]+]] = getelementptr inbounds [4 x i32], [4 x i32]* [[V0]], i32 0, i32 0
   // CHECK: store i32* [[V2]], i32** [[V:%[a-z0-9]+]], align 4
   // CHECK: [[V3:%[a-z0-9]+]] = load i32*, i32** [[V]], align 4
diff --git a/test/CodeGen/xray-always-emit-customevent.cpp b/test/CodeGen/xray-always-emit-customevent.cpp
new file mode 100644
index 0000000..8ac22f2
--- /dev/null
+++ b/test/CodeGen/xray-always-emit-customevent.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fxray-instrument -fxray-always-emit-customevents -x c++ \
+// RUN:     -std=c++11 -triple x86_64-unknown-unknown -emit-llvm -o - %s \
+// RUN:     | FileCheck %s
+
+// CHECK-LABEL: @_Z15neverInstrumentv
+[[clang::xray_never_instrument]] void neverInstrument() {
+  static constexpr char kPhase[] = "never";
+  __xray_customevent(kPhase, 5);
+  // CHECK: call void @llvm.xray.customevent(i8*{{.*}}, i32 5)
+}
diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu
index bc744a0..0f4691f 100644
--- a/test/CodeGenCUDA/filter-decl.cu
+++ b/test/CodeGenCUDA/filter-decl.cu
@@ -10,18 +10,18 @@
 __asm__("file scope asm is host only");
 
 // CHECK-HOST: constantdata = internal global
-// CHECK-DEVICE: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized global
 __constant__ char constantdata[256];
 
 // CHECK-HOST: devicedata = internal global
-// CHECK-DEVICE: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = {{(dso_local )?}}externally_initialized global
 __device__ char devicedata[256];
 
 // CHECK-HOST: shareddata = internal global
-// CHECK-DEVICE: shareddata = global
+// CHECK-DEVICE: shareddata = {{(dso_local )?}}global
 __shared__ char shareddata[256];
 
-// CHECK-HOST: hostdata = global
+// CHECK-HOST: hostdata = {{(dso_local )?}}global
 // CHECK-DEVICE-NOT: hostdata = global
 char hostdata[256];
 
diff --git a/test/CodeGenCUDA/library-builtin.cu b/test/CodeGenCUDA/library-builtin.cu
new file mode 100644
index 0000000..4804c75
--- /dev/null
+++ b/test/CodeGenCUDA/library-builtin.cu
@@ -0,0 +1,22 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
+// RUN:  FileCheck --check-prefixes=HOST,BOTH %s
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda \
+// RUN:   -emit-llvm -o - %s | FileCheck %s --check-prefixes=DEVICE,BOTH
+
+// BOTH-LABEL: define float @logf(float
+
+// logf() should be calling itself recursively as we don't have any standard
+// library on device side.
+// DEVICE: call float @logf(float
+extern "C" __attribute__((device)) float logf(float __x) { return logf(__x); }
+
+// NOTE: this case is to illustrate the expected differences in behavior between
+// the host and device. In general we do not mess with host-side standard
+// library.
+//
+// Host is assumed to have standard library, so logf() calls LLVM intrinsic.
+// HOST: call float @llvm.log.f32(float
+extern "C" float logf(float __x) { return logf(__x); }
diff --git a/test/CodeGenCXX/PR19955.cpp b/test/CodeGenCXX/PR19955.cpp
index 1001084..601ccc6 100644
--- a/test/CodeGenCXX/PR19955.cpp
+++ b/test/CodeGenCXX/PR19955.cpp
@@ -6,13 +6,13 @@
 
 extern int *varp;
 int *varp = &var;
-// CHECK-DAG: @"\01?varp@@3PAHA" = global i32* null
-// X64-DAG: @"\01?varp@@3PEAHEA" = global i32* null
+// CHECK-DAG: @"\01?varp@@3PAHA" = dso_local global i32* null
+// X64-DAG: @"\01?varp@@3PEAHEA" = dso_local global i32* null
 
 extern void (*funp)();
 void (*funp)() = &fun;
-// CHECK-DAG: @"\01?funp@@3P6AXXZA" = global void ()* null
-// X64-DAG: @"\01?funp@@3P6AXXZEA" = global void ()* null
+// CHECK-DAG: @"\01?funp@@3P6AXXZA" = dso_local global void ()* null
+// X64-DAG: @"\01?funp@@3P6AXXZEA" = dso_local global void ()* null
 
 // CHECK-LABEL: @"\01??__Evarp@@YAXXZ"
 // CHECK-DAG: store i32* @"\01?var@@3HA", i32** @"\01?varp@@3PAHA"
diff --git a/test/CodeGenCXX/alignment.cpp b/test/CodeGenCXX/alignment.cpp
index 4c44bad..49cb344 100644
--- a/test/CodeGenCXX/alignment.cpp
+++ b/test/CodeGenCXX/alignment.cpp
@@ -204,7 +204,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false)
     AlignedArray result = a.aArray;
   }
 
@@ -223,7 +223,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false)
     AlignedArray result = b.aArray;
   }
 
@@ -234,7 +234,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false)
     AlignedArray result = b.bArray;
   }
 
@@ -245,7 +245,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false)
     AlignedArray result = b->bArray;
   }
 
@@ -256,7 +256,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false)
     B b;
     AlignedArray result = b.bArray;
   }
@@ -277,7 +277,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false)
     D d;
     AlignedArray result = d.aArray;
   }
@@ -292,7 +292,7 @@
     // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
     // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
     // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false)
     D d;
     AlignedArray result = d.bArray;
   }
diff --git a/test/CodeGenCXX/alloc-size.cpp b/test/CodeGenCXX/alloc-size.cpp
index e93e231..275ffe6 100644
--- a/test/CodeGenCXX/alloc-size.cpp
+++ b/test/CodeGenCXX/alloc-size.cpp
@@ -69,4 +69,34 @@
          __builtin_object_size(dependent_calloc<7, 8>(), 0) +
          __builtin_object_size(dependent_calloc2<int, 9>(), 0);
 }
+} // namespace templated_alloc_size
+
+class C {
+public:
+  void *my_malloc(int N) __attribute__((alloc_size(2)));
+  void *my_calloc(int N, int M) __attribute__((alloc_size(2, 3)));
+};
+
+// CHECK-LABEL: define i32 @_Z16callMemberMallocv
+int callMemberMalloc() {
+  // CHECK: ret i32 16
+  return __builtin_object_size(C().my_malloc(16), 0);
+}
+
+// CHECK-LABEL: define i32 @_Z16callMemberCallocv
+int callMemberCalloc() {
+  // CHECK: ret i32 32
+  return __builtin_object_size(C().my_calloc(16, 2), 0);
+}
+
+struct D {
+  ~D();
+  void *my_malloc(int N) __attribute__((alloc_size(2)));
+};
+
+// CHECK-LABEL: define i32 @_Z20callExprWithCleanupsv
+int callExprWithCleanups() {
+  int *const p = (int *)D().my_malloc(3);
+  // CHECK: ret i32 3
+  return __builtin_object_size(p, 0);
 }
diff --git a/test/CodeGenCXX/array-default-argument.cpp b/test/CodeGenCXX/array-default-argument.cpp
index a07e390..23bc9fd 100644
--- a/test/CodeGenCXX/array-default-argument.cpp
+++ b/test/CodeGenCXX/array-default-argument.cpp
@@ -12,7 +12,7 @@
 };
 
 void f();
-// CHECK-LABEL: define void @_Z1gv()
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z1gv()
 void g() {
   // CHECK: br label %[[LOOP:.*]]
 
diff --git a/test/CodeGenCXX/assign-construct-memcpy.cpp b/test/CodeGenCXX/assign-construct-memcpy.cpp
index 3d42051..5e52b16 100644
--- a/test/CodeGenCXX/assign-construct-memcpy.cpp
+++ b/test/CodeGenCXX/assign-construct-memcpy.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s -DPOD | FileCheck %s -check-prefix=CHECK-POD
+/// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s -DPOD | FileCheck %s -check-prefix=CHECK-POD
 // RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s | FileCheck %s -check-prefix=CHECK-NONPOD
 
 // Declare the reserved placement operators.
@@ -23,47 +23,47 @@
 foo *test1(void *f, const foo &x) {
   return new (f) foo(x);
 // CHECK-POD: test1
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test1
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 }
 
 foo *test2(const foo &x) {
   return new foo(x);
 // CHECK-POD: test2
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test2
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24
 }
 
 foo test3(const foo &x) {
   foo f = x;
   return f;
 // CHECK-POD: test3
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test3
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 }
 
 foo *test4(foo &&x) {
   return new foo(x);
 // CHECK-POD: test4
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test4
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24
 }
 
 void test5(foo &f, const foo &x) {
   f = x;
 // CHECK-POD: test5
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test5
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 17
 }
 
 extern foo globtest;
@@ -71,10 +71,10 @@
 void test6(foo &&x) {
   globtest = move(x);
 // CHECK-POD: test6
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test6
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 17
 }
 
 void byval(foo f);
@@ -82,8 +82,8 @@
 void test7(const foo &x) {
   byval(x);
 // CHECK-POD: test7
-// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 
 // CHECK-NONPOD: test7
-// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8
+// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24
 }
diff --git a/test/CodeGenCXX/atomic-dllexport.cpp b/test/CodeGenCXX/atomic-dllexport.cpp
index ae14222..8ba7c3d 100644
--- a/test/CodeGenCXX/atomic-dllexport.cpp
+++ b/test/CodeGenCXX/atomic-dllexport.cpp
@@ -3,7 +3,7 @@
 
 struct __declspec(dllexport) SomeStruct {
   // Copy assignment operator should be produced, and exported:
-  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
-  // M64: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  // M32: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
   _Atomic(int) mData;
 };
diff --git a/test/CodeGenCXX/atomic-inline.cpp b/test/CodeGenCXX/atomic-inline.cpp
index fe72758..327f85d 100644
--- a/test/CodeGenCXX/atomic-inline.cpp
+++ b/test/CodeGenCXX/atomic-inline.cpp
@@ -1,6 +1,52 @@
 // RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | FileCheck %s
 // RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu -target-cpu core2 | FileCheck %s --check-prefix=CORE2
-// Check the atomic code generation for cpu targets w/wo cx16 support.
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=i386-linux-gnu -target-cpu i386 | FileCheck %s --check-prefix=I386
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=i386-linux-gnu -target-cpu i486 | FileCheck %s --check-prefix=I486
+// Check the atomic code generation for cpu targets w/wo cx, cx8 and cx16 support.
+
+struct alignas(4) AM4 {
+  short f1, f2;
+};
+AM4 m4;
+AM4 load4() {
+  AM4 am;
+  // CHECK-LABEL: @_Z5load4v
+  // CHECK: load atomic i32, {{.*}} monotonic
+  // CORE2-LABEL: @_Z5load4v
+  // CORE2: load atomic i32, {{.*}} monotonic
+  // I386-LABEL: @_Z5load4v
+  // I386: call i32 @__atomic_load_4
+  // I486-LABEL: @_Z5load4v
+  // I486: load atomic i32, {{.*}} monotonic
+  __atomic_load(&m4, &am, 0);
+  return am;
+}
+
+AM4 s4;
+void store4() {
+  // CHECK-LABEL: @_Z6store4v
+  // CHECK: store atomic i32 {{.*}} monotonic
+  // CORE2-LABEL: @_Z6store4v
+  // CORE2: store atomic i32 {{.*}} monotonic
+  // I386-LABEL: @_Z6store4v
+  // I386: call void @__atomic_store_4
+  // I486-LABEL: @_Z6store4v
+  // I486: store atomic i32 {{.*}} monotonic
+  __atomic_store(&m4, &s4, 0);
+}
+
+bool cmpxchg4() {
+  AM4 am;
+  // CHECK-LABEL: @_Z8cmpxchg4v
+  // CHECK: cmpxchg i32* {{.*}} monotonic
+  // CORE2-LABEL: @_Z8cmpxchg4v
+  // CORE2: cmpxchg i32* {{.*}} monotonic
+  // I386-LABEL: @_Z8cmpxchg4v
+  // I386: call zeroext i1 @__atomic_compare_exchange_4
+  // I486-LABEL: @_Z8cmpxchg4v
+  // I486: cmpxchg i32* {{.*}} monotonic
+  return __atomic_compare_exchange(&m4, &s4, &am, 0, 0, 0);
+}
 
 struct alignas(8) AM8 {
   int f1, f2;
@@ -12,6 +58,10 @@
   // CHECK: load atomic i64, {{.*}} monotonic
   // CORE2-LABEL: @_Z5load8v
   // CORE2: load atomic i64, {{.*}} monotonic
+  // I386-LABEL: @_Z5load8v
+  // I386: call i64 @__atomic_load_8
+  // I486-LABEL: @_Z5load8v
+  // I486: call i64 @__atomic_load_8
   __atomic_load(&m8, &am, 0);
   return am;
 }
@@ -22,6 +72,10 @@
   // CHECK: store atomic i64 {{.*}} monotonic
   // CORE2-LABEL: @_Z6store8v
   // CORE2: store atomic i64 {{.*}} monotonic
+  // I386-LABEL: @_Z6store8v
+  // I386: call void @__atomic_store_8
+  // I486-LABEL: @_Z6store8v
+  // I486: call void @__atomic_store_8
   __atomic_store(&m8, &s8, 0);
 }
 
@@ -31,6 +85,10 @@
   // CHECK: cmpxchg i64* {{.*}} monotonic
   // CORE2-LABEL: @_Z8cmpxchg8v
   // CORE2: cmpxchg i64* {{.*}} monotonic
+  // I386-LABEL: @_Z8cmpxchg8v
+  // I386: call zeroext i1 @__atomic_compare_exchange_8
+  // I486-LABEL: @_Z8cmpxchg8v
+  // I486: call zeroext i1 @__atomic_compare_exchange_8
   return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0);
 }
 
@@ -66,4 +124,3 @@
   // CORE2: cmpxchg i128* {{.*}} monotonic
   return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0);
 }
-
diff --git a/test/CodeGenCXX/attr-target-mv-diff-ns.cpp b/test/CodeGenCXX/attr-target-mv-diff-ns.cpp
new file mode 100644
index 0000000..4dc2b67
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-diff-ns.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// Test ensures that this properly differentiates between types in different 
+// namespaces.
+int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int);
+int __attribute__((target("arch=ivybridge"))) foo(int) {return 1;}
+int __attribute__((target("default"))) foo(int) { return 2; }
+
+namespace ns {
+int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int);
+int __attribute__((target("arch=ivybridge"))) foo(int) {return 1;}
+int __attribute__((target("default"))) foo(int) { return 2; }
+}
+
+int bar() {
+  return foo(1) + ns::foo(2);
+}
+
+// CHECK: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver
+// CHECK: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver
+
+// CHECK: define i32 @_Z3fooi.sse4.2(i32)
+// CHECK: ret i32 0
+// CHECK: define i32 @_Z3fooi.arch_ivybridge(i32)
+// CHECK: ret i32 1
+// CHECK: define i32 @_Z3fooi(i32)
+// CHECK: ret i32 2
+
+// CHECK: define i32 @_ZN2ns3fooEi.sse4.2(i32)
+// CHECK: ret i32 0
+// CHECK: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32)
+// CHECK: ret i32 1
+// CHECK: define i32 @_ZN2ns3fooEi(i32)
+// CHECK: ret i32 2
+
+// CHECK: define i32 @_Z3barv()
+// CHECK: call i32 @_Z3fooi.ifunc(i32 1)
+// CHECK: call i32 @_ZN2ns3fooEi.ifunc(i32 2)
+
+// CHECK: define i32 (i32)* @_Z3fooi.resolver() comdat
+// CHECK: ret i32 (i32)* @_Z3fooi.arch_sandybridge
+// CHECK: ret i32 (i32)* @_Z3fooi.arch_ivybridge
+// CHECK: ret i32 (i32)* @_Z3fooi.sse4.2
+// CHECK: ret i32 (i32)* @_Z3fooi
+//
+// CHECK: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat
+// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge
+// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge
+// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2
+// CHECK: ret i32 (i32)* @_ZN2ns3fooEi
+
+// CHECK: declare i32 @_Z3fooi.arch_sandybridge(i32)
+// CHECK: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32)
diff --git a/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp b/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
new file mode 100644
index 0000000..290d6b5
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+void temp();
+void temp(int);
+using FP = void(*)(int);
+void b() {
+  FP f = temp; 
+}
+
+int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int);
+int __attribute__((target("arch=ivybridge"))) foo(int) {return 1;}
+int __attribute__((target("default"))) foo(int) { return 2; }
+
+struct S {
+int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo(int);
+int __attribute__((target("arch=ivybridge"))) foo(int) {return 1;}
+int __attribute__((target("default"))) foo(int) { return 2; }
+};
+
+using FuncPtr = int (*)(int);
+using MemFuncPtr = int (S::*)(int);
+
+void f(FuncPtr, MemFuncPtr);
+
+int bar() {
+  FuncPtr Free = &foo;
+  MemFuncPtr Member = &S::foo;
+  S s;
+  f(foo, &S::foo);
+  return Free(1) + (s.*Member)(2);
+}
+
+
+// CHECK: @_Z3fooi.ifunc 
+// CHECK: @_ZN1S3fooEi.ifunc
+
+// CHECK: define i32 @_Z3barv()
+// Store to Free of ifunc
+// CHECK: store i32 (i32)* @_Z3fooi.ifunc
+// Store to Member of ifunc
+// CHECK: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]]
+
+// Call to 'f' with the ifunc
+// CHECK: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc
diff --git a/test/CodeGenCXX/attr-target-mv-member-funcs.cpp b/test/CodeGenCXX/attr-target-mv-member-funcs.cpp
new file mode 100644
index 0000000..2e5db3b
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-member-funcs.cpp
@@ -0,0 +1,137 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+struct S {
+  int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+  int __attribute__((target("arch=sandybridge"))) foo(int);
+  int __attribute__((target("arch=ivybridge"))) foo(int) { return 1; }
+  int __attribute__((target("default"))) foo(int) { return 2; }
+
+  S &__attribute__((target("arch=ivybridge"))) operator=(const S &) {
+    return *this;
+  }
+  S &__attribute__((target("default"))) operator=(const S &) {
+    return *this;
+  }
+};
+
+struct ConvertTo {
+  __attribute__((target("arch=ivybridge"))) operator S() const {
+    return S{};
+  }
+  __attribute__((target("default"))) operator S() const {
+    return S{};
+  }
+};
+
+int bar() {
+  S s;
+  S s2;
+  s2 = s;
+
+  ConvertTo C;
+  s2 = static_cast<S>(C);
+
+  return s.foo(0);
+}
+
+struct S2 {
+  int __attribute__((target("sse4.2"))) foo(int);
+  int __attribute__((target("arch=sandybridge"))) foo(int);
+  int __attribute__((target("arch=ivybridge"))) foo(int);
+  int __attribute__((target("default"))) foo(int);
+};
+
+int bar2() {
+  S2 s;
+  return s.foo(0);
+}
+
+int __attribute__((target("sse4.2"))) S2::foo(int) { return 0; }
+int __attribute__((target("arch=ivybridge"))) S2::foo(int) { return 1; }
+int __attribute__((target("default"))) S2::foo(int) { return 2; }
+
+template<typename T>
+struct templ {
+  int __attribute__((target("sse4.2"))) foo(int) { return 0; }
+  int __attribute__((target("arch=sandybridge"))) foo(int);
+  int __attribute__((target("arch=ivybridge"))) foo(int) { return 1; }
+  int __attribute__((target("default"))) foo(int) { return 2; }
+};
+
+int templ_use() {
+  templ<int> a;
+  templ<double> b;
+  return a.foo(1) + b.foo(2);
+}
+
+// CHECK: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver
+// CHECK: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver
+// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
+// CHECK: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver
+// Templates:
+// CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver
+// CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver
+
+// CHECK: define i32 @_Z3barv()
+// CHECK: %s = alloca %struct.S, align 1
+// CHECK: %s2 = alloca %struct.S, align 1
+// CHECK: %C = alloca %struct.ConvertTo, align 1
+// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
+// CHECK: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C)
+// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
+// CHECK: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
+
+// CHECK: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat
+// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge
+// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_
+
+// CHECK: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat
+// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge
+// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv
+
+// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat 
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
+
+// CHECK: define i32 @_Z4bar2v()
+// CHECK:call i32 @_ZN2S23fooEi.ifunc
+// define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat
+// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge
+// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge
+// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2
+// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi
+
+// CHECK: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32)
+// CHECK: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32)
+// CHECK: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32)
+
+// CHECK: define i32 @_Z9templ_usev()
+// CHECK:  call i32 @_ZN5templIiE3fooEi.ifunc
+// CHECK:  call i32 @_ZN5templIdE3fooEi.ifunc
+
+
+// CHECK: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat
+// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge
+// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge
+// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2
+// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi
+//
+// CHECK: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat
+// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge
+// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge
+// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2
+// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi
+
+// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
+// CHECK: ret i32 0
+
+// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
+
+// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
+// CHECK: ret i32 1
+
+// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32)
+// CHECK: ret i32 2
+
diff --git a/test/CodeGenCXX/attr-target-mv-modules.cpp b/test/CodeGenCXX/attr-target-mv-modules.cpp
new file mode 100644
index 0000000..6ff2046
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-modules.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -fmodules -emit-llvm %s -o - | FileCheck %s
+#pragma clang module build A
+module A {}
+#pragma clang module contents
+#pragma clang module begin A
+__attribute__((target("default"))) void f();
+__attribute__((target("sse4.2"))) void f();
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build B
+module B {}
+#pragma clang module contents
+#pragma clang module begin B
+__attribute__((target("default"))) void f();
+__attribute__((target("sse4.2"))) void f();
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import A
+#pragma clang module import B
+void g() { f(); }
+
+// Negative tests to validate that the resolver only calls each 1x.
+// CHECK: define void ()* @_Z1fv.resolver
+// CHECK: ret void ()* @_Z1fv.sse4.2
+// CHECK-NOT: ret void ()* @_Z1fv.sse4.2
+// CHECK: ret void ()* @_Z1fv
+// CHECK-NOT: ret void ()* @_Z1fv
diff --git a/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp b/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
new file mode 100644
index 0000000..63353c1
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+struct S {
+  int __attribute__((target("sse4.2"))) foo(int);
+  int __attribute__((target("arch=sandybridge"))) foo(int);
+  int __attribute__((target("arch=ivybridge"))) foo(int);
+  int __attribute__((target("default"))) foo(int);
+};
+
+int __attribute__((target("default"))) S::foo(int) { return 2; }
+int __attribute__((target("sse4.2"))) S::foo(int) { return 0; }
+int __attribute__((target("arch=ivybridge"))) S::foo(int) { return 1; }
+
+int bar() {
+  S s;
+  return s.foo(0);
+}
+
+// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
+
+// CHECK: define i32 @_ZN1S3fooEi(%struct.S* %this, i32)
+// CHECK: ret i32 2
+
+// CHECK: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
+// CHECK: ret i32 0
+
+// CHECK: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
+// CHECK: ret i32 1
+
+// CHECK: define i32 @_Z3barv()
+// CHECK: %s = alloca %struct.S, align 1
+// CHECK: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
+
+// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
+// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
+
+// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
diff --git a/test/CodeGenCXX/attr-target-mv-overloads.cpp b/test/CodeGenCXX/attr-target-mv-overloads.cpp
new file mode 100644
index 0000000..c72ea77
--- /dev/null
+++ b/test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo_overload(int);
+int __attribute__((target("arch=ivybridge"))) foo_overload(int) {return 1;}
+int __attribute__((target("default"))) foo_overload(int) { return 2; }
+int __attribute__((target("sse4.2"))) foo_overload(void) { return 0; }
+int __attribute__((target("arch=sandybridge"))) foo_overload(void);
+int __attribute__((target("arch=ivybridge"))) foo_overload(void) {return 1;}
+int __attribute__((target("default"))) foo_overload(void) { return 2; }
+
+int bar2() {
+  return foo_overload() + foo_overload(1);
+}
+
+// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+
+// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// CHECK: ret i32 0
+// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// CHECK: ret i32 1
+// CHECK: define i32 @_Z12foo_overloadi(i32)
+// CHECK: ret i32 2
+// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
+// CHECK: ret i32 0
+// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// CHECK: ret i32 1
+// CHECK: define i32 @_Z12foo_overloadv()
+// CHECK: ret i32 2
+
+// CHECK: define i32 @_Z4bar2v()
+// CHECK: call i32 @_Z12foo_overloadv.ifunc()
+// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
+
+// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
+// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
+// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
+// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
+// CHECK: ret i32 ()* @_Z12foo_overloadv
+
+// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
+// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
+// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
+// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
+// CHECK: ret i32 (i32)* @_Z12foo_overloadi
+
+// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
+// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
diff --git a/test/CodeGenCXX/attr-x86-interrupt.cpp b/test/CodeGenCXX/attr-x86-interrupt.cpp
index 5000104..30cf58b 100644
--- a/test/CodeGenCXX/attr-x86-interrupt.cpp
+++ b/test/CodeGenCXX/attr-x86-interrupt.cpp
@@ -26,10 +26,10 @@
 // X86_LINUX: define x86_intrcc void @{{.*}}foo8{{.*}}(i32* %{{.+}})
 // X86_LINUX: define linkonce_odr x86_intrcc void @{{.*}}foo9{{.*}}(i32* %{{.+}})
 // X86_64_WIN: @llvm.used = appending global [3 x i8*] [i8* bitcast (void (i32*, i64)* @{{.*}}foo7{{.*}} to i8*), i8* bitcast (void (i32*)* @{{.*}}foo8{{.*}} to i8*), i8* bitcast (void (i32*)* @{{.*}}foo9{{.*}} to i8*)], section "llvm.metadata"
-// X86_64_WIN: define x86_intrcc void @{{.*}}foo7{{.*}}(i32* %{{.+}}, i64 %{{.+}})
-// X86_64_WIN: define x86_intrcc void @{{.*}}foo8{{.*}}(i32* %{{.+}})
-// X86_64_WIN: define linkonce_odr x86_intrcc void @{{.*}}foo9{{.*}}(i32* %{{.+}})
+// X86_64_WIN: define dso_local x86_intrcc void @{{.*}}foo7{{.*}}(i32* %{{.+}}, i64 %{{.+}})
+// X86_64_WIN: define dso_local x86_intrcc void @{{.*}}foo8{{.*}}(i32* %{{.+}})
+// X86_64_WIN: define linkonce_odr dso_local x86_intrcc void @{{.*}}foo9{{.*}}(i32* %{{.+}})
 // X86_WIN: @llvm.used = appending global [3 x i8*] [i8* bitcast (void (i32*, i32)* @{{.*}}foo7{{.*}} to i8*), i8* bitcast (void (i32*)* @{{.*}}foo8{{.*}} to i8*), i8* bitcast (void (i32*)* @{{.*}}foo9{{.*}} to i8*)], section "llvm.metadata"
-// X86_WIN: define x86_intrcc void @{{.*}}foo7{{.*}}(i32* %{{.+}}, i32 %{{.+}})
-// X86_WIN: define x86_intrcc void @{{.*}}foo8{{.*}}(i32* %{{.+}})
-// X86_WIN: define linkonce_odr x86_intrcc void @{{.*}}foo9{{.*}}(i32* %{{.+}})
+// X86_WIN: define dso_local x86_intrcc void @{{.*}}foo7{{.*}}(i32* %{{.+}}, i32 %{{.+}})
+// X86_WIN: define dso_local x86_intrcc void @{{.*}}foo8{{.*}}(i32* %{{.+}})
+// X86_WIN: define linkonce_odr dso_local x86_intrcc void @{{.*}}foo9{{.*}}(i32* %{{.+}})
diff --git a/test/CodeGenCXX/block-inalloca.cpp b/test/CodeGenCXX/block-inalloca.cpp
new file mode 100644
index 0000000..b827dde
--- /dev/null
+++ b/test/CodeGenCXX/block-inalloca.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple i686-unknown-windows-msvc -fblocks -emit-llvm -o - %s | FileCheck %s
+
+struct S {
+  S(const struct S &) {}
+};
+
+void (^b)(S) = ^(S) {};
+
+// CHECK: [[DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, %struct.S, [3 x i8] }>, <{ i8*, %struct.S, [3 x i8] }>* %0, i32 0, i32 0
+// CHECK: load i8*, i8** [[DESCRIPTOR]]
+
diff --git a/test/CodeGenCXX/blocks.cpp b/test/CodeGenCXX/blocks.cpp
index 5b7c7e6..37219d3 100644
--- a/test/CodeGenCXX/blocks.cpp
+++ b/test/CodeGenCXX/blocks.cpp
@@ -122,7 +122,6 @@
   // CHECK-LABEL: define internal void @___ZN5test44testEv_block_invoke
   // CHECK: [[TMP:%.*]] = alloca [[A:%.*]], align 1
   // CHECK-NEXT: store i8* [[BLOCKDESC:%.*]], i8** {{.*}}, align 8
-  // CHECK-NEXT: load i8*, i8**
   // CHECK-NEXT: bitcast i8* [[BLOCKDESC]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*
   // CHECK:      call void @_ZN5test41AC1Ev([[A]]* [[TMP]])
   // CHECK-NEXT: call void @_ZN5test43fooENS_1AE([[A]]* [[TMP]])
diff --git a/test/CodeGenCXX/captured-statements.cpp b/test/CodeGenCXX/captured-statements.cpp
index 95e73e5..c8832f1 100644
--- a/test/CodeGenCXX/captured-statements.cpp
+++ b/test/CodeGenCXX/captured-statements.cpp
@@ -182,7 +182,7 @@
 }
 
 inline int test_captured_linkage() {
-  // CHECK-7: @_ZZ21test_captured_linkagevE1i = linkonce_odr global i32 0
+  // CHECK-7: @_ZZ21test_captured_linkagevE1i = linkonce_odr {{(dso_local )?}}global i32 0
   int j;
   #pragma clang __debug captured
   {
diff --git a/test/CodeGenCXX/catch-undef-behavior.cpp b/test/CodeGenCXX/catch-undef-behavior.cpp
index e828753..786c6da 100644
--- a/test/CodeGenCXX/catch-undef-behavior.cpp
+++ b/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -371,7 +371,7 @@
 void downcast_pointer(B *b) {
   (void) static_cast<C*>(b);
   // Alignment check from EmitTypeCheck(TCK_DowncastPointer, ...)
-  // CHECK: [[SUB:%[.a-z0-9]*]] = getelementptr i8, i8* {{.*}}, i64 -16
+  // CHECK: [[SUB:%[.a-z0-9]*]] = getelementptr inbounds i8, i8* {{.*}}, i64 -16
   // CHECK-NEXT: [[C:%.+]] = bitcast i8* [[SUB]] to %class.C*
   // null check goes here
   // CHECK: [[FROM_PHI:%.+]] = phi %class.C* [ [[C]], {{.*}} ], {{.*}}
@@ -388,7 +388,7 @@
 void downcast_reference(B &b) {
   (void) static_cast<C&>(b);
   // Alignment check from EmitTypeCheck(TCK_DowncastReference, ...)
-  // CHECK:      [[SUB:%[.a-z0-9]*]] = getelementptr i8, i8* {{.*}}, i64 -16
+  // CHECK:      [[SUB:%[.a-z0-9]*]] = getelementptr inbounds i8, i8* {{.*}}, i64 -16
   // CHECK-NEXT: [[C:%.+]] = bitcast i8* [[SUB]] to %class.C*
   // Objectsize check goes here
   // CHECK:      [[C_INT:%.+]] = ptrtoint %class.C* [[C]] to i64
diff --git a/test/CodeGenCXX/cfi-icall.cpp b/test/CodeGenCXX/cfi-icall.cpp
index c3c6ed3..5f5778f 100644
--- a/test/CodeGenCXX/cfi-icall.cpp
+++ b/test/CodeGenCXX/cfi-icall.cpp
@@ -8,19 +8,22 @@
 
 struct S {};
 
-void f(S *s) {
+void f(S s) {
 }
 
 }
 
 void g() {
-  void (*fp)(S *) = f;
-  // CHECK: call i1 @llvm.type.test(i8* {{.*}}, metadata [[VOIDS:![0-9]+]])
-  fp(0);
+  struct S s;
+  void (*fp)(S) = f;
+  // CHECK: call i1 @llvm.type.test(i8* {{.*}}, metadata [[VOIDS1:![0-9]+]])
+  fp(s);
 }
 
-// ITANIUM: define internal void @_ZN12_GLOBAL__N_11fEPNS_1SE({{.*}} !type [[TS:![0-9]+]]
-// MS: define internal void @"\01?f@?A@@YAXPEAUS@?A@@@Z"({{.*}} !type [[TS:![0-9]+]]
+// ITANIUM: define internal void @_ZN12_GLOBAL__N_11fENS_1SE({{.*}} !type [[TS1:![0-9]+]] !type [[TS2:![0-9]+]]
+// MS: define internal void @"\01?f@?A@@YAXUS@?A@@@Z"({{.*}} !type [[TS1:![0-9]+]] !type [[TS2:![0-9]+]]
 
-// CHECK: [[VOIDS]] = distinct !{}
-// CHECK: [[TS]] = !{i64 0, [[VOIDS]]}
+// CHECK: [[VOIDS1]] = distinct !{}
+// CHECK: [[TS1]] = !{i64 0, [[VOIDS1]]}
+// CHECK: [[TS2]] = !{i64 0, [[VOIDS2:![0-9]+]]}
+// CHECK: [[VOIDS2]] = distinct !{}
diff --git a/test/CodeGenCXX/cfi-ms-vbase-derived-cast.cpp b/test/CodeGenCXX/cfi-ms-vbase-derived-cast.cpp
new file mode 100644
index 0000000..3276d8f
--- /dev/null
+++ b/test/CodeGenCXX/cfi-ms-vbase-derived-cast.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -flto -flto-unit -emit-llvm -o - -triple=x86_64-pc-win32 %s -fsanitize=cfi-derived-cast -fsanitize-trap=cfi-derived-cast | FileCheck %s
+
+struct foo {
+  virtual ~foo() {}
+  virtual void f() = 0;
+};
+
+template <typename T>
+struct bar : virtual public foo {
+  void f() {
+    // CHECK: define{{.*}}@"\01?f@?$bar@Ubaz@@@@UEAAXXZ"
+    // Load "this", vbtable, vbase offset and vtable.
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // CHECK: load
+    // CHECK: @llvm.type.test{{.*}}!"?AUfoo@@"
+    static_cast<T&>(*this);
+  }
+};
+
+struct baz : public bar<baz> {
+  virtual ~baz() {}
+};
+
+int main() {
+  baz *z = new baz;
+  z->f();
+}
diff --git a/test/CodeGenCXX/cfi-ms-vbase-nvcall.cpp b/test/CodeGenCXX/cfi-ms-vbase-nvcall.cpp
new file mode 100644
index 0000000..ab61062
--- /dev/null
+++ b/test/CodeGenCXX/cfi-ms-vbase-nvcall.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -flto -flto-unit -emit-llvm -o - -triple=x86_64-pc-win32 %s -fsanitize=cfi-nvcall -fsanitize-trap=cfi-nvcall | FileCheck %s
+
+struct foo {
+  virtual ~foo() {}
+  virtual void f() = 0;
+};
+
+template <typename T>
+struct bar : virtual public foo {
+  void f() {}
+};
+
+struct baz : public bar<baz> {
+  virtual ~baz() {}
+  void g() {}
+};
+
+void f(baz *z) {
+  // CHECK: define{{.*}}@"\01?f@@YAXPEAUbaz@@@Z"
+  // Load z, vbtable, vbase offset and vtable.
+  // CHECK: load
+  // CHECK: load
+  // CHECK: load
+  // CHECK: load
+  // CHECK: @llvm.type.test{{.*}}!"?AUfoo@@"
+  z->g();
+}
diff --git a/test/CodeGenCXX/cfi-vcall-check-after-args.cpp b/test/CodeGenCXX/cfi-vcall-check-after-args.cpp
new file mode 100644
index 0000000..4c2ea8c
--- /dev/null
+++ b/test/CodeGenCXX/cfi-vcall-check-after-args.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck %s
+
+struct A {
+  virtual void f(int);
+};
+
+int g();
+void f(A *a) {
+  // CHECK: call i32 @_Z1gv()
+  // CHECK: call i1 @llvm.type.test
+  a->f(g());
+}
diff --git a/test/CodeGenCXX/const-base-cast.cpp b/test/CodeGenCXX/const-base-cast.cpp
index dd980d5..bb08b9d 100644
--- a/test/CodeGenCXX/const-base-cast.cpp
+++ b/test/CodeGenCXX/const-base-cast.cpp
@@ -7,4 +7,4 @@
 struct C : A,B {};
 unsigned char x = ((char*)(B*)(C*)0x1000) - (char*)0x1000;
 
-// CHECK: @x = global i8 1
+// CHECK: @x = {{(dso_local )?}}global i8 1
diff --git a/test/CodeGenCXX/const-global-linkage.cpp b/test/CodeGenCXX/const-global-linkage.cpp
index e1e9321..2ec64cf 100644
--- a/test/CodeGenCXX/const-global-linkage.cpp
+++ b/test/CodeGenCXX/const-global-linkage.cpp
@@ -4,7 +4,7 @@
 const int y = 20;
 const volatile int z = 30;
 // CHECK-NOT: @x
-// CHECK: @z = constant i32 30
+// CHECK: @z = {{(dso_local )?}}constant i32 30
 // CHECK: @_ZL1y = internal constant i32 20
 const int& b() { return y; }
 
@@ -12,6 +12,6 @@
 const char z2[] = "zxcv";
 const volatile char z3[] = "zxcv";
 // CHECK-NOT: @z1
-// CHECK: @z3 = constant
+// CHECK: @z3 = {{(dso_local )?}}constant
 // CHECK: @_ZL2z2 = internal constant
 const char* b2() { return z2; }
diff --git a/test/CodeGenCXX/constructor-destructor-return-this.cpp b/test/CodeGenCXX/constructor-destructor-return-this.cpp
index 164fda3..ddb715d 100644
--- a/test/CodeGenCXX/constructor-destructor-return-this.cpp
+++ b/test/CodeGenCXX/constructor-destructor-return-this.cpp
@@ -45,8 +45,8 @@
 // CHECKIOS5-LABEL: define %class.B* @_ZN1BD2Ev(%class.B* %this)
 // CHECKIOS5-LABEL: define %class.B* @_ZN1BD1Ev(%class.B* %this)
 
-// CHECKMS-LABEL: define x86_thiscallcc %class.B* @"\01??0B@@QAE@PAH@Z"(%class.B* returned %this, i32* %i)
-// CHECKMS-LABEL: define x86_thiscallcc void @"\01??1B@@UAE@XZ"(%class.B* %this)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc %class.B* @"\01??0B@@QAE@PAH@Z"(%class.B* returned %this, i32* %i)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc void @"\01??1B@@UAE@XZ"(%class.B* %this)
 
 class C : public A, public B {
 public:
@@ -83,8 +83,8 @@
 // CHECKIOS5-LABEL: define void @_ZN1CD0Ev(%class.C* %this)
 // CHECKIOS5-LABEL: define void @_ZThn8_N1CD0Ev(%class.C* %this)
 
-// CHECKMS-LABEL: define x86_thiscallcc %class.C* @"\01??0C@@QAE@PAHPAD@Z"(%class.C* returned %this, i32* %i, i8* %c)
-// CHECKMS-LABEL: define x86_thiscallcc void @"\01??1C@@UAE@XZ"(%class.C* %this)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc %class.C* @"\01??0C@@QAE@PAHPAD@Z"(%class.C* returned %this, i32* %i, i8* %c)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc void @"\01??1C@@UAE@XZ"(%class.C* %this)
 
 class D : public virtual A {
 public:
@@ -110,8 +110,8 @@
 // CHECKIOS5-LABEL: define %class.D* @_ZN1DD2Ev(%class.D* %this, i8** %vtt)
 // CHECKIOS5-LABEL: define %class.D* @_ZN1DD1Ev(%class.D* %this)
 
-// CHECKMS-LABEL: define x86_thiscallcc %class.D* @"\01??0D@@QAE@XZ"(%class.D* returned %this, i32 %is_most_derived)
-// CHECKMS-LABEL: define x86_thiscallcc void @"\01??1D@@UAE@XZ"(%class.D* %this)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc %class.D* @"\01??0D@@QAE@XZ"(%class.D* returned %this, i32 %is_most_derived)
+// CHECKMS-LABEL: define dso_local x86_thiscallcc void @"\01??1D@@UAE@XZ"(%class.D* %this)
 
 class E {
 public:
diff --git a/test/CodeGenCXX/constructor-direct-call.cpp b/test/CodeGenCXX/constructor-direct-call.cpp
index 9567d09..bcddc0f 100644
--- a/test/CodeGenCXX/constructor-direct-call.cpp
+++ b/test/CodeGenCXX/constructor-direct-call.cpp
@@ -9,7 +9,7 @@
   Test1 var;
   var.Test1::Test1();
 
-  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 4, i32 4, i1 false)
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 4, i1 false)
   var.Test1::Test1(var);
 }
 
@@ -28,7 +28,7 @@
   // CHECK-NEXT:  call x86_thiscallcc void @_ZN5Test2C1Ev(%class.Test2* %var)
   var.Test2::Test2();
 
-  // CHECK:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 4, i1 false)
+  // CHECK:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 8, i1 false)
   var.Test2::Test2(var);
 }
 
diff --git a/test/CodeGenCXX/copy-constructor-elim.cpp b/test/CodeGenCXX/copy-constructor-elim.cpp
index 4abe456..7ba2310 100644
--- a/test/CodeGenCXX/copy-constructor-elim.cpp
+++ b/test/CodeGenCXX/copy-constructor-elim.cpp
@@ -56,4 +56,4 @@
 // Make sure that we obey the destination's alignment requirements when emitting
 // the copy.
 // CHECK-LABEL: define {{.*}} @f(
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}({{.*}}, i8* bitcast (%struct.V* @gv1 to i8*), {{i64|i32}} 4, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}({{.*}}align 4{{.*}}, i8* align 8 bitcast (%struct.V* @gv1 to i8*), {{i64|i32}} 4, i1 false)
diff --git a/test/CodeGenCXX/copy-constructor-synthesis-2.cpp b/test/CodeGenCXX/copy-constructor-synthesis-2.cpp
index 9d7e18e..7940101 100644
--- a/test/CodeGenCXX/copy-constructor-synthesis-2.cpp
+++ b/test/CodeGenCXX/copy-constructor-synthesis-2.cpp
@@ -10,15 +10,15 @@
 
 PR23373 pr23373_b(pr23373_a);
 // CHECK-LABEL: define {{.*}} @__cxx_global_var_init(
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i32|i64}}({{.*}} @pr23373_b{{.*}}, {{.*}} @pr23373_a{{.*}}, [[W:i32|i64]] 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i32|i64}}({{.*}}align 4{{.*}}@pr23373_b{{.*}}, {{.*}}align 4{{.*}} @pr23373_a{{.*}}, [[W:i32|i64]] 4, i1 false)
 
 PR23373 pr23373_f() { return pr23373_a; }
 // CHECK-LABEL: define {{.*}} @_Z9pr23373_fv(
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}, [[W]] 4, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}align 4{{.*}}align 4{{.*}}, [[W]] 4, i1 false)
 
 void pr23373_g(PR23373 &a, PR23373 &b) { a = b; }
 // CHECK-LABEL: define {{.*}} @_Z9pr23373_g
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}, [[W]] 4, i32 4, i1 false)
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}align 4{{.*}}align 4{{.*}}, [[W]] 4, i1 false)
 
 struct A { virtual void a(); };
 A x(A& y) { return y; }
diff --git a/test/CodeGenCXX/copy-constructor-synthesis.cpp b/test/CodeGenCXX/copy-constructor-synthesis.cpp
index 5a2cc14..4fdd8a3 100644
--- a/test/CodeGenCXX/copy-constructor-synthesis.cpp
+++ b/test/CodeGenCXX/copy-constructor-synthesis.cpp
@@ -143,7 +143,7 @@
 // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[A]], [[A]]* [[OTHER]], i32 0, i32 1
 // CHECK-NEXT: [[T4:%.*]] = bitcast i16* [[T0]] to i8*
 // CHECK-NEXT: [[T5:%.*]] = bitcast i16* [[T2]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T4]], i8* [[T5]], i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T4]], i8* align 8 [[T5]], i64 8, i1 false)
 // CHECK-NEXT: ret [[A]]* [[THIS]]
 
 // CHECK-LABEL: define linkonce_odr void @_ZN6PR66281BC2ERKS0_(%"struct.PR6628::B"* %this, %"struct.PR6628::B"* dereferenceable({{[0-9]+}})) unnamed_addr
@@ -172,7 +172,7 @@
 // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[A]], [[A]]* [[OTHER]], i32 0, i32 1
 // CHECK-NEXT: [[T4:%.*]] = bitcast i16* [[T0]] to i8*
 // CHECK-NEXT: [[T5:%.*]] = bitcast i16* [[T2]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T4]], i8* [[T5]], i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T4]], i8* align 8 [[T5]], i64 8, i1 false)
 // CHECK-NEXT: ret void
 }
 
diff --git a/test/CodeGenCXX/cxx0x-delegating-ctors.cpp b/test/CodeGenCXX/cxx0x-delegating-ctors.cpp
index dcc0556..614983d 100644
--- a/test/CodeGenCXX/cxx0x-delegating-ctors.cpp
+++ b/test/CodeGenCXX/cxx0x-delegating-ctors.cpp
@@ -66,7 +66,7 @@
   X::X(int) : X() {}
 }
 // CHECK: define {{.*}} @_ZN7PR128901XC1Ei(%"class.PR12890::X"* %this, i32)
-// CHECK: call void @llvm.memset.p0i8.{{i32|i64}}(i8* {{.*}}, i8 0, {{i32|i64}} 4, i32 4, i1 false)
+// CHECK: call void @llvm.memset.p0i8.{{i32|i64}}(i8* align 4 {{.*}}, i8 0, {{i32|i64}} 4, i1 false)
 
 namespace PR14588 {
   void other();
diff --git a/test/CodeGenCXX/cxx0x-initializer-array.cpp b/test/CodeGenCXX/cxx0x-initializer-array.cpp
index de10aee..4a7e452 100644
--- a/test/CodeGenCXX/cxx0x-initializer-array.cpp
+++ b/test/CodeGenCXX/cxx0x-initializer-array.cpp
@@ -39,7 +39,7 @@
     // CHECK: store i32 -1,
 
     Agg2 b = { n };
-    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* bitcast ([3 x i32]* @[[THREE_NULL_MEMPTRS]] to i8*), i32 12, i32 4, i1 false)
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 bitcast ([3 x i32]* @[[THREE_NULL_MEMPTRS]] to i8*), i32 12, i1 false)
   }
 
   // Test dynamic initialization.
diff --git a/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
index 67215ef..e1c1816 100644
--- a/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-none-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefixes=X86,CHECK %s
-// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa-amdgiz -DNO_TLS -emit-llvm -o - %s | FileCheck -check-prefixes=AMD,CHECK %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -DNO_TLS -emit-llvm -o - %s | FileCheck -check-prefixes=AMDGCN,CHECK %s
 
 namespace std {
   typedef decltype(sizeof(int)) size_t;
@@ -49,8 +49,8 @@
 };
 // X86: @_ZGR15globalInitList1_ = internal constant [3 x i32] [i32 1, i32 2, i32 3]
 // X86: @globalInitList1 = global %{{[^ ]+}} { i32* getelementptr inbounds ([3 x i32], [3 x i32]* @_ZGR15globalInitList1_, i32 0, i32 0), i{{32|64}} 3 }
-// AMD: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] [i32 1, i32 2, i32 3]
-// AMD: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 }
+// AMDGCN: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] [i32 1, i32 2, i32 3]
+// AMDGCN: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 }
 std::initializer_list<int> globalInitList1 = {1, 2, 3};
 
 #ifndef NO_TLS
@@ -67,8 +67,8 @@
 
 // X86: @globalInitList2 = global %{{[^ ]+}} zeroinitializer
 // X86: @_ZGR15globalInitList2_ = internal global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
-// AMD: @globalInitList2 = addrspace(1) global %{{[^ ]+}} zeroinitializer
-// AMD: @_ZGR15globalInitList2_ = internal addrspace(1) global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
+// AMDGCN: @globalInitList2 = addrspace(1) global %{{[^ ]+}} zeroinitializer
+// AMDGCN: @_ZGR15globalInitList2_ = internal addrspace(1) global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
 
 // X86: @_ZN15partly_constant1kE = global i32 0, align 4
 // X86: @_ZN15partly_constant2ilE = global {{.*}} null, align 8
@@ -77,18 +77,18 @@
 // X86: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal constant [3 x i32] [i32 1, i32 2, i32 3], align 4
 // X86: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal global [2 x i32] zeroinitializer, align 4
 // X86: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
-// AMD: @_ZN15partly_constant1kE = addrspace(1) global i32 0, align 4
-// AMD: @_ZN15partly_constant2ilE = addrspace(2) global {{.*}} null, align 8
-// AMD: @[[PARTLY_CONSTANT_OUTER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global {{.*}} zeroinitializer, align 8
-// AMD: @[[PARTLY_CONSTANT_INNER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global [3 x {{.*}}] zeroinitializer, align 8
-// AMD: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
-// AMD: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global [2 x i32] zeroinitializer, align 4
-// AMD: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
+// AMDGCN: @_ZN15partly_constant1kE = addrspace(1) global i32 0, align 4
+// AMDGCN: @_ZN15partly_constant2ilE = addrspace(4) global {{.*}} null, align 8
+// AMDGCN: @[[PARTLY_CONSTANT_OUTER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(4) global {{.*}} zeroinitializer, align 8
+// AMDGCN: @[[PARTLY_CONSTANT_INNER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(4) global [3 x {{.*}}] zeroinitializer, align 8
+// AMDGCN: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal addrspace(4) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
+// AMDGCN: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal addrspace(4) global [2 x i32] zeroinitializer, align 4
+// AMDGCN: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal addrspace(4) constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
 
 // X86: @[[REFTMP1:.*]] = private constant [2 x i32] [i32 42, i32 43], align 4
 // X86: @[[REFTMP2:.*]] = private constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
-// AMD: @[[REFTMP1:.*]] = private addrspace(2) constant [2 x i32] [i32 42, i32 43], align 4
-// AMD: @[[REFTMP2:.*]] = private addrspace(2) constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
+// AMDGCN: @[[REFTMP1:.*]] = private addrspace(4) constant [2 x i32] [i32 42, i32 43], align 4
+// AMDGCN: @[[REFTMP2:.*]] = private addrspace(4) constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
 
 // CHECK: appending global
 
@@ -101,15 +101,15 @@
 // CHECK-LABEL: define internal void @__cxx_global_var_init
 // X86: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 0
 // X86: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 1
-// AMD: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 0
-// AMD: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 1
+// AMDGCN: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 0
+// AMDGCN: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 1
 // CHECK: call i32 @__cxa_atexit
 // X86: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i64 0, i64 0),
 // X86:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 0), align 8
 // X86: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 1), align 8
-// AMD: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i64 0, i64 0),
-// AMD:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 0), align 8
-// AMD: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 1), align 8
+// AMDGCN: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i64 0, i64 0),
+// AMDGCN:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 0), align 8
+// AMDGCN: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 1), align 8
 // CHECK: call void @_ZN10destroyme1D1Ev
 // CHECK-NEXT: call void @_ZN10destroyme1D1Ev
 // CHECK-NEXT: ret void
@@ -121,8 +121,8 @@
   // CHECK-LABEL: define void @_Z3fn1i
   // temporary array
   // X86: [[array:%[^ ]+]] = alloca [3 x i32]
-  // AMD: [[alloca:%[^ ]+]] = alloca [3 x i32], align 4, addrspace(5)
-  // AMD: [[array:%[^ ]+]] = addrspacecast [3 x i32] addrspace(5)* [[alloca]] to [3 x i32]*
+  // AMDGCN: [[alloca:%[^ ]+]] = alloca [3 x i32], align 4, addrspace(5)
+  // AMDGCN: [[array:%[^ ]+]] = addrspacecast [3 x i32] addrspace(5)* [[alloca]] to [3 x i32]*
   // CHECK: getelementptr inbounds [3 x i32], [3 x i32]* [[array]], i{{32|64}} 0
   // CHECK-NEXT: store i32 1, i32*
   // CHECK-NEXT: getelementptr
@@ -518,12 +518,12 @@
     // CHECK-LABEL: @_ZN9B197730102f1Ev
     testcase a{{"", ENUM_CONSTANT}};
     // X86: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
-    // AMD: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(2)* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"] addrspace(2)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
+    // AMDGCN: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(4)* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"] addrspace(4)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
   }
   void f2() {
     // CHECK-LABEL: @_ZN9B197730102f2Ev
     // X86: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* @_ZZN9B197730102f2EvE1p, i64 0, i64 1, i32 0), align 16
-    // AMD: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(1)* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"] addrspace(1)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* addrspacecast{{.*}}@_ZZN9B197730102f2EvE1p{{.*}}, i64 0, i64 1, i32 0), align 8
+    // AMDGCN: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(1)* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"] addrspace(1)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* addrspacecast{{.*}}@_ZZN9B197730102f2EvE1p{{.*}}, i64 0, i64 1, i32 0), align 8
     static std::initializer_list<pair<const char *, E>> a, p[2] =
         {a, {{"", ENUM_CONSTANT}}};
   }
diff --git a/test/CodeGenCXX/cxx11-initializer-array-new.cpp b/test/CodeGenCXX/cxx11-initializer-array-new.cpp
index 59f9603..6e24212 100644
--- a/test/CodeGenCXX/cxx11-initializer-array-new.cpp
+++ b/test/CodeGenCXX/cxx11-initializer-array-new.cpp
@@ -154,7 +154,7 @@
 //
 // CHECK: %[[SIZE:.*]] = sub i64 %{{.*}}, 24
 // CHECK: %[[REST:.*]] = bitcast %[[T]]* %[[T_2_AS_T]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* %[[REST]], i8 0, i64 %[[SIZE]], i32 4, i1 false)
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %[[REST]], i8 0, i64 %[[SIZE]], i1 false)
 //
 // CHECK: }
 
diff --git a/test/CodeGenCXX/cxx11-special-members.cpp b/test/CodeGenCXX/cxx11-special-members.cpp
index 037e59a..96109ba 100644
--- a/test/CodeGenCXX/cxx11-special-members.cpp
+++ b/test/CodeGenCXX/cxx11-special-members.cpp
@@ -39,7 +39,9 @@
   C<0> a;
   D b;
 }
-// CHECK: define {{.*}} @_ZN1CILi0EEC1Ev
+// Trivial default ctor, might or might not be defined, but we must not expect
+// someone else ot define it.
+// CHECK-NOT: declare {{.*}} @_ZN1CILi0EEC1Ev
 // CHECK: define {{.*}} @_ZN1DC1Ev
 
 // CHECK: define {{.*}} @_ZN1BC2EOS_(
diff --git a/test/CodeGenCXX/cxx1y-variable-template.cpp b/test/CodeGenCXX/cxx1y-variable-template.cpp
index cd73817..dd8f28e 100644
--- a/test/CodeGenCXX/cxx1y-variable-template.cpp
+++ b/test/CodeGenCXX/cxx1y-variable-template.cpp
@@ -18,6 +18,12 @@
 template<typename T> template<typename U> template<typename V> int Outer<T>::Inner<U>::arr[sizeof(T) + sizeof(U) + sizeof(V)] = { init_arr() };
 int *p = Outer<char[100]>::Inner<char[20]>::arr<char[3]>;
 
+namespace PR35456 {
+// CHECK: @_ZN7PR354561nILi0EEE = linkonce_odr global i32 0
+template<int> int n;
+int *p = &n<0>;
+}
+
 // CHECK: @_ZN5OuterIA100_cE5InnerIA20_cE3arrIA3_cEE = linkonce_odr global [123 x i32] zeroinitializer
 // CHECK: @_ZGVN5OuterIA100_cE5InnerIA20_cE3arrIA3_cEE = linkonce_odr global
 
diff --git a/test/CodeGenCXX/cxx1z-aligned-allocation.cpp b/test/CodeGenCXX/cxx1z-aligned-allocation.cpp
index 437597d..4c57997 100644
--- a/test/CodeGenCXX/cxx1z-aligned-allocation.cpp
+++ b/test/CodeGenCXX/cxx1z-aligned-allocation.cpp
@@ -4,6 +4,8 @@
 // RUN: %clang_cc1 -std=c++14 -fexceptions -fsized-deallocation -faligned-allocation %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s
 // RUN: %clang_cc1 -std=c++1z -fexceptions -fsized-deallocation %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s
 
+// RUN: %clang_cc1 -std=c++1z -fexceptions -fsized-deallocation %s -emit-llvm -triple x86_64-windows-msvc -o - | FileCheck %s --check-prefix=CHECK-MS
+
 // Check that we don't used aligned (de)allocation without -faligned-allocation or C++1z.
 // RUN: %clang_cc1 -std=c++14 -DUNALIGNED -fexceptions %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefix=CHECK-UNALIGNED
 // RUN: %clang_cc1 -std=c++1z -DUNALIGNED -fexceptions -fno-aligned-allocation %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefix=CHECK-UNALIGNED
@@ -27,14 +29,28 @@
 // CHECK-LABEL: define {{.*}} @_Z2a0v()
 // CHECK: %[[ALLOC:.*]] = call i8* @_ZnwmSt11align_val_t(i64 512, i64 32)
 // CHECK: call void @_ZdlPvSt11align_val_t(i8* %[[ALLOC]], i64 32)
+// CHECK-MS-LABEL: define {{.*}} @"\01?a0@@YAPEAXXZ"()
+// CHECK-MS: %[[ALLOC:.*]] = call i8* @"\01??2@YAPEAX_KW4align_val_t@std@@@Z"(i64 512, i64 32)
+// CHECK-MS: cleanuppad
+// CHECK-MS: call void @"\01??3@YAXPEAXW4align_val_t@std@@@Z"(i8* %[[ALLOC]], i64 32)
 void *a0() { return new A; }
 
+// FIXME: Why don't we call the sized array deallocation overload in this case?
+// The size is known.
+//
 // CHECK-LABEL: define {{.*}} @_Z2a1l(
 // CHECK: %[[ALLOC:.*]] = call i8* @_ZnamSt11align_val_t(i64 %{{.*}}, i64 32)
 // No array cookie.
 // CHECK-NOT: store
 // CHECK: invoke void @_ZN1AC1Ev(
 // CHECK: call void @_ZdaPvSt11align_val_t(i8* %[[ALLOC]], i64 32)
+// CHECK-MS-LABEL: define {{.*}} @"\01?a1@@YAPEAXJ@Z"(
+// CHECK-MS: %[[ALLOC:.*]] = call i8* @"\01??_U@YAPEAX_KW4align_val_t@std@@@Z"(i64 %{{.*}}, i64 32)
+// No array cookie.
+// CHECK-MS-NOT: store
+// CHECK-MS: invoke %struct.A* @"\01??0A@@QEAA@XZ"(
+// CHECK-MS: cleanuppad
+// CHECK-MS: call void @"\01??_V@YAXPEAXW4align_val_t@std@@@Z"(i8* %[[ALLOC]], i64 32)
 void *a1(long n) { return new A[n]; }
 
 // CHECK-LABEL: define {{.*}} @_Z2a2P1A(
diff --git a/test/CodeGenCXX/cxx1z-copy-omission.cpp b/test/CodeGenCXX/cxx1z-copy-omission.cpp
index 234e4b1..b33a218 100644
--- a/test/CodeGenCXX/cxx1z-copy-omission.cpp
+++ b/test/CodeGenCXX/cxx1z-copy-omission.cpp
@@ -6,6 +6,8 @@
   A(const A&);
   ~A();
 
+  operator bool();
+
   int arr[10];
 };
 
@@ -79,3 +81,27 @@
 
   // CHECK-LABEL: }
 }
+
+// CHECK-LABEL: define {{.*}} @_Z1jv(
+void j() {
+  // CHECK:   alloca %{{.*}}*
+  // CHECK:   %[[OUTERTEMP:.*]] = alloca %{{.*}}
+  // CHECK:   %[[INNERTEMP:.*]] = alloca %{{.*}}
+  // CHECK:   call void @_ZN1AC1Ei(%{{.*}} %[[INNERTEMP]], i32 1)
+  // CHECK:   call zeroext i1 @_ZN1AcvbEv(%{{.*}} %[[INNERTEMP]])
+  // CHECK:   br i1
+  //
+  // CHECK:   call void @_ZN1AC1EOS_(%{{.*}} %[[OUTERTEMP]], %{{.*}} %[[INNERTEMP]])
+  // CHECK:   br label
+  //
+  // CHECK:   call void @_ZN1AC1Ei(%{{.*}} %[[OUTERTEMP]], i32 2)
+  // CHECK:   br label
+  //
+  // CHECK:   call void @_ZN1AD1Ev(%{{.*}} %[[INNERTEMP]])
+  A &&a = A(1) ?: A(2);
+
+  // CHECK:   call void @_Z1iv()
+  i();
+
+  // CHECK:   call void @_ZN1AD1Ev(%{{.*}} %[[OUTERTEMP]])
+}
diff --git a/test/CodeGenCXX/cxx1z-initializer-aggregate.cpp b/test/CodeGenCXX/cxx1z-initializer-aggregate.cpp
index 9110e49..f59ec51 100644
--- a/test/CodeGenCXX/cxx1z-initializer-aggregate.cpp
+++ b/test/CodeGenCXX/cxx1z-initializer-aggregate.cpp
@@ -112,3 +112,21 @@
   //   A_CLEANUP:
   // CHECK: call void @_ZN7Dynamic1AD1Ev({{.*}} @_ZN7Dynamic2d3E
 }
+
+namespace Instantiated1 {
+  struct A { A(); };
+  struct B : A { using A::A; };
+  template<int> B v({});
+  template B v<0>;
+  // CHECK-LABEL: define {{.*}}global_var_init{{.*}} comdat($_ZN13Instantiated11vILi0EEE) {
+  // CHECK: call void @_ZN13Instantiated11BC1Ev(%{{.*}}* @_ZN13Instantiated11vILi0EEE)
+}
+
+namespace Instantiated2 {
+  struct A { A(); };
+  struct B : A {};
+  template<int> B v({});
+  template B v<0>;
+  // CHECK-LABEL: define {{.*}}global_var_init{{.*}} comdat($_ZN13Instantiated21vILi0EEE) {
+  // CHECK: call void @_ZN13Instantiated21AC2Ev(
+}
diff --git a/test/CodeGenCXX/cxx1z-inline-variables.cpp b/test/CodeGenCXX/cxx1z-inline-variables.cpp
index 1837093..50eab3b 100644
--- a/test/CodeGenCXX/cxx1z-inline-variables.cpp
+++ b/test/CodeGenCXX/cxx1z-inline-variables.cpp
@@ -31,31 +31,61 @@
   static constexpr int b = 2;
   static constexpr int c = 3;
   static inline constexpr int d = 4;
+  static const int e = 5;
+  static const int f = 6;
+  static const int g = 7;
 };
 const int &compat_use_before_redecl = compat::b;
 const int compat::a;
 const int compat::b;
 const int compat::c;
 const int compat::d;
+const int compat::e;
+constexpr int compat::f;
+constexpr inline int compat::g;
 const int &compat_use_after_redecl1 = compat::c;
 const int &compat_use_after_redecl2 = compat::d;
-// CHECK: @_ZN6compat1bE = weak_odr constant i32 2
-// CHECK: @_ZN6compat1aE = weak_odr constant i32 1
-// CHECK: @_ZN6compat1cE = weak_odr constant i32 3
-// CHECK: @_ZN6compat1dE = linkonce_odr constant i32 4
+const int &compat_use_after_redecl3 = compat::g;
+// CHECK-DAG: @_ZN6compat1bE = weak_odr constant i32 2
+// CHECK-DAG: @_ZN6compat1aE = weak_odr constant i32 1
+// CHECK-DAG: @_ZN6compat1cE = weak_odr constant i32 3
+// CHECK-DAG: @_ZN6compat1dE = linkonce_odr constant i32 4
+// CHECK-DAG: @_ZN6compat1eE = constant i32 5
+// CHECK-DAG: @_ZN6compat1fE = weak_odr constant i32 6
+// CHECK-DAG: @_ZN6compat1gE = linkonce_odr constant i32 7
 
 template<typename T> struct X {
   static int a;
   static inline int b;
   static int c;
+  static const int d;
+  static int e;
 };
 // CHECK: @_ZN1XIiE1aE = linkonce_odr global i32 10
 // CHECK: @_ZN1XIiE1bE = global i32 20
 // CHECK-NOT: @_ZN1XIiE1cE
+// CHECK: @_ZN1XIiE1dE = linkonce_odr constant i32 40
+// CHECK: @_ZN1XIiE1eE = linkonce_odr global i32 50
 template<> inline int X<int>::a = 10;
 int &use3 = X<int>::a;
 template<> int X<int>::b = 20;
 template<> inline int X<int>::c = 30;
+template<typename T> constexpr int X<T>::d = 40;
+template<typename T> inline int X<T>::e = 50;
+const int *use_x_int_d = &X<int>::d;
+const int *use_x_int_e = &X<int>::e;
+
+template<typename T> struct Y;
+template<> struct Y<int> {
+  static constexpr int a = 123;
+  static constexpr int b = 456;
+  static constexpr int c = 789;
+};
+// CHECK: @_ZN1YIiE1aE = weak_odr constant i32 123
+constexpr int Y<int>::a;
+// CHECK: @_ZN1YIiE1bE = linkonce_odr constant i32 456
+const int &yib = Y<int>::b;
+// CHECK-NOT: @_ZN1YIiE1cE
 
 // CHECK-LABEL: define {{.*}}global_var_init
 // CHECK: call i32 @_Z1fv
diff --git a/test/CodeGenCXX/cxx1z-lambda-star-this.cpp b/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
index a7e4aad..379dbfd 100644
--- a/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
+++ b/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
@@ -16,7 +16,7 @@
 //CHECK: %[[I1:.+]] = bitcast %struct.A* %[[I0]] to i8*
 //CHECK: %[[I2:.+]] = bitcast %struct.A* %this1 to i8*
 // copy the contents ...
-//CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[I1]], i8* %[[I2]], i32 8, i32 8, i1 false)
+//CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %[[I1]], i8* align 8 %[[I2]], i32 8, i1 false)
 
 struct B {
   double b = 222;
diff --git a/test/CodeGenCXX/cxx2a-destroying-delete.cpp b/test/CodeGenCXX/cxx2a-destroying-delete.cpp
index 2e4d407..9c6db1b 100644
--- a/test/CodeGenCXX/cxx2a-destroying-delete.cpp
+++ b/test/CodeGenCXX/cxx2a-destroying-delete.cpp
@@ -78,15 +78,15 @@
 // CHECK: br i1
 //
 // CHECK-NOT: call
-// CHECK: %[[VTABLE:.*]] = load
-// CHECK: %[[DTOR:.*]] = load
-//
 // For MS, we don't add a new vtable slot to the primary vtable for the virtual
 // destructor. Instead we cast to the VDel base class.
 // CHECK-MSABI: bitcast {{.*}} %[[d]]
 // CHECK-MSABI-NEXT: getelementptr {{.*}}, i64 8
 // CHECK-MSABI-NEXT: %[[d:.*]] = bitcast i8*
 //
+// CHECK: %[[VTABLE:.*]] = load
+// CHECK: %[[DTOR:.*]] = load
+//
 // CHECK: call {{void|i8\*}} %[[DTOR]](%{{.*}}* %[[d]]
 // CHECK-MSABI-SAME: , i32 1)
 // CHECK-NOT: call
diff --git a/test/CodeGenCXX/cxx2a-three-way-comparison.cpp b/test/CodeGenCXX/cxx2a-three-way-comparison.cpp
new file mode 100644
index 0000000..fd72d4a
--- /dev/null
+++ b/test/CodeGenCXX/cxx2a-three-way-comparison.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c++2a -emit-llvm %s -o - -triple %itanium_abi_triple | FileCheck %s --check-prefix=ITANIUM
+// RUN: not %clang_cc1 -std=c++2a -emit-llvm %s -o - -triple %ms_abi_triple 2>&1 | FileCheck %s --check-prefix=MSABI
+// RUN: not %clang_cc1 -std=c++2a -emit-llvm %s -o - -triple %itanium_abi_triple -DBUILTIN 2>&1 | FileCheck %s --check-prefix=BUILTIN
+// MSABI: cannot mangle this three-way comparison operator yet
+
+struct A {
+  void operator<=>(int);
+};
+
+// ITANIUM: define {{.*}}@_ZN1AssEi(
+void A::operator<=>(int) {}
+
+// ITANIUM: define {{.*}}@_Zssi1A(
+void operator<=>(int, A) {}
+
+int operator<=>(A, A);
+
+// ITANIUM: define {{.*}}_Z1f1A(
+int f(A a) {
+  // ITANIUM: %[[RET:.*]] = call {{.*}}_Zss1AS_(
+  // ITANIUM: ret i32 %[[RET]]
+  return a <=> a;
+}
+
+#ifdef BUILTIN
+void builtin(int a) {
+  a <=> a; // BUILTIN: cannot compile this scalar expression yet
+}
+#endif
diff --git a/test/CodeGenCXX/debug-info-composite-cc.cpp b/test/CodeGenCXX/debug-info-composite-cc.cpp
new file mode 100644
index 0000000..2f09116
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-composite-cc.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -triple %itanium_abi_triple %s -o - | FileCheck %s
+
+// Not trivially copyable because of the explicit destructor.
+// CHECK-DAG: !DICompositeType({{.*}}, name: "RefDtor",{{.*}}flags: DIFlagTypePassByReference
+struct RefDtor {
+  int i;
+  ~RefDtor() {}
+} refDtor;
+
+// Not trivially copyable because of the explicit copy constructor.
+// CHECK-DAG: !DICompositeType({{.*}}, name: "RefCopy",{{.*}}flags: DIFlagTypePassByReference
+struct RefCopy {
+  int i;
+  RefCopy() = default;
+  RefCopy(RefCopy &Copy) {}
+} refCopy;
+
+// POD-like type even though it defines a destructor.
+// CHECK-DAG: !DICompositeType({{.*}}, name: "Podlike", {{.*}}flags: DIFlagTypePassByValue
+struct Podlike {
+  int i;
+  Podlike() = default;
+  Podlike(Podlike &&Move) = default;
+  ~Podlike() = default;
+} podlike;
+
+
+// This is a POD type.
+// CHECK-DAG: !DICompositeType({{.*}}, name: "Pod",{{.*}}flags: DIFlagTypePassByValue
+struct Pod {
+  int i;
+} pod;
+
+// This is definitely not a POD type.
+// CHECK-DAG: !DICompositeType({{.*}}, name: "Complex",{{.*}}flags: DIFlagTypePassByReference
+struct Complex {
+  Complex() {}
+  Complex(Complex &Copy) : i(Copy.i) {};
+  int i;
+} complex;
diff --git a/test/CodeGenCXX/debug-info-enum-class.cpp b/test/CodeGenCXX/debug-info-enum-class.cpp
index b615d5b..e857bb1 100644
--- a/test/CodeGenCXX/debug-info-enum-class.cpp
+++ b/test/CodeGenCXX/debug-info-enum-class.cpp
@@ -15,7 +15,7 @@
 // CHECK-SAME:             baseType: ![[INT:[0-9]+]]
 // CHECK-SAME:             size: 32
 // CHECK-NOT:              offset:
-// CHECK-NOT:              flags:
+// CHECK-SAME:             flags: DIFlagFixedEnum
 // CHECK-SAME:             ){{$}}
 // CHECK: ![[INT]] = !DIBasicType(name: "int"
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "B"
@@ -23,12 +23,12 @@
 // CHECK-SAME:             baseType: ![[ULONG:[0-9]+]]
 // CHECK-SAME:             size: 64
 // CHECK-NOT:              offset:
-// CHECK-NOT:              flags:
+// CHECK-SAME:             flags: DIFlagFixedEnum
 // CHECK-SAME:             ){{$}}
 // CHECK: ![[ULONG]] = !DIBasicType(name: "long unsigned int"
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "C"
 // CHECK-SAME:             line: 5
-// CHECK-NOT:              baseType:
+// CHECK-SAME:             baseType: ![[ULONG:[0-9]+]]
 // CHECK-SAME:             size: 32
 // CHECK-NOT:              offset:
 // CHECK-NOT:              flags:
diff --git a/test/CodeGenCXX/debug-info-enum.cpp b/test/CodeGenCXX/debug-info-enum.cpp
index 8f54f9d..447edba 100644
--- a/test/CodeGenCXX/debug-info-enum.cpp
+++ b/test/CodeGenCXX/debug-info-enum.cpp
@@ -11,7 +11,7 @@
 // CHECK-SAME:                      identifier: "_ZTSN5test11eE"
 // CHECK: [[TEST1]] = !DINamespace(name: "test1"
 // CHECK: [[TEST1_ENUMS]] = !{[[TEST1_E:![0-9]*]]}
-// CHECK: [[TEST1_E]] = !DIEnumerator(name: "E", value: 0)
+// CHECK: [[TEST1_E]] = !DIEnumerator(name: "E", value: 0, isUnsigned: true)
 enum e { E };
 void foo() {
   int v = E;
diff --git a/test/CodeGenCXX/debug-info-range-for-var-names.cpp b/test/CodeGenCXX/debug-info-range-for-var-names.cpp
new file mode 100644
index 0000000..6cd70fe
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-range-for-var-names.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+
+struct vec {
+  using itr = int*;
+  itr begin() { return nullptr; }
+  itr end() { return nullptr; }
+};
+
+void test() {
+  vec as, bs, cs;
+
+  for (auto a : as)
+    for (auto b : bs)
+      for (auto c : cs) {
+      }
+}
+
+// CHECK: call void @llvm.dbg.declare(metadata %struct.vec** {{.*}}, metadata ![[RANGE1:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[BEGIN1:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[END1:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata %struct.vec** {{.*}}, metadata ![[RANGE2:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[BEGIN2:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[END2:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata %struct.vec** {{.*}}, metadata ![[RANGE3:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[BEGIN3:[0-9]+]]
+// CHECK: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[END3:[0-9]+]]
+// CHECK: ![[RANGE1]] = !DILocalVariable(name: "__range1",
+// CHECK: ![[BEGIN1]] = !DILocalVariable(name: "__begin1",
+// CHECK: ![[END1]] = !DILocalVariable(name: "__end1",
+// CHECK: ![[RANGE2]] = !DILocalVariable(name: "__range2",
+// CHECK: ![[BEGIN2]] = !DILocalVariable(name: "__begin2",
+// CHECK: ![[END2]] = !DILocalVariable(name: "__end2",
+// CHECK: ![[RANGE3]] = !DILocalVariable(name: "__range3",
+// CHECK: ![[BEGIN3]] = !DILocalVariable(name: "__begin3",
+// CHECK: ![[END3]] = !DILocalVariable(name: "__end3",
diff --git a/test/CodeGenCXX/debug-info-scope.cpp b/test/CodeGenCXX/debug-info-scope.cpp
index e81eccc..1124282 100644
--- a/test/CodeGenCXX/debug-info-scope.cpp
+++ b/test/CodeGenCXX/debug-info-scope.cpp
@@ -58,7 +58,7 @@
   }
 
   int x[] = {1, 2};
-  // CHECK: = !DILocalVariable(name: "__range"
+  // CHECK: = !DILocalVariable(name: "__range1"
   // CHECK-SAME:               scope: [[RANGE_FOR:![0-9]*]]
   // CHECK-NOT:                line:
   // CHECK-SAME:               ){{$}}
diff --git a/test/CodeGenCXX/debug-info-static-member.cpp b/test/CodeGenCXX/debug-info-static-member.cpp
index 3537754..702d1f8 100644
--- a/test/CodeGenCXX/debug-info-static-member.cpp
+++ b/test/CodeGenCXX/debug-info-static-member.cpp
@@ -3,9 +3,9 @@
 // RUN: %clangxx -target x86_64-unknown-unknown -g -std=c++11 %s -emit-llvm -S -o - | FileCheck %s
 // PR14471
 
-// CHECK: @_ZN1C1aE = global i32 4, align 4, !dbg [[A:![0-9]+]]
-// CHECK: @_ZN1C1bE = global i32 2, align 4, !dbg [[B:![0-9]+]]
-// CHECK: @_ZN1C1cE = global i32 1, align 4, !dbg [[C:![0-9]+]]
+// CHECK: @_ZN1C1aE = dso_local global i32 4, align 4, !dbg [[A:![0-9]+]]
+// CHECK: @_ZN1C1bE = dso_local global i32 2, align 4, !dbg [[B:![0-9]+]]
+// CHECK: @_ZN1C1cE = dso_local global i32 1, align 4, !dbg [[C:![0-9]+]]
 
 enum X {
   Y
diff --git a/test/CodeGenCXX/debug-info-template.cpp b/test/CodeGenCXX/debug-info-template.cpp
index 0c34145..4b330a0 100644
--- a/test/CodeGenCXX/debug-info-template.cpp
+++ b/test/CodeGenCXX/debug-info-template.cpp
@@ -1,8 +1,8 @@
 // RUN: %clang -S -emit-llvm -target x86_64-unknown_unknown -g %s -o - -std=c++11 | FileCheck %s
 
-// CHECK: @tci = global %"struct.TC<unsigned int, 2, &glb, &foo::e, &foo::f, &foo::g, 1, 2, 3>::nested" zeroinitializer, align 1, !dbg [[TCI:![0-9]+]]
-// CHECK: @tcn = global %struct.TC zeroinitializer, align 1, !dbg [[TCN:![0-9]+]]
-// CHECK: @nn = global %struct.NN zeroinitializer, align 1, !dbg [[NN:![0-9]+]]
+// CHECK: @tci = dso_local global %"struct.TC<unsigned int, 2, &glb, &foo::e, &foo::f, &foo::g, 1, 2, 3>::nested" zeroinitializer, align 1, !dbg [[TCI:![0-9]+]]
+// CHECK: @tcn = dso_local global %struct.TC zeroinitializer, align 1, !dbg [[TCN:![0-9]+]]
+// CHECK: @nn = dso_local global %struct.NN zeroinitializer, align 1, !dbg [[NN:![0-9]+]]
 
 // CHECK: !DICompileUnit(
 // CHECK: [[EMPTY:![0-9]*]] = !{}
diff --git a/test/CodeGenCXX/debug-info-vla.cpp b/test/CodeGenCXX/debug-info-vla.cpp
index 88f3a3f..6c35f30 100644
--- a/test/CodeGenCXX/debug-info-vla.cpp
+++ b/test/CodeGenCXX/debug-info-vla.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -target x86_64-unknown-unknown -fverbose-asm -g -O0 -S -emit-llvm %s -o - -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -std=c++11 -triple x86_64-unknown-unknown %s -o - | FileCheck %s
 
 
 void f(int m) {
@@ -13,8 +13,10 @@
 // CHECK: [[ELEM_TYPE]] = !{[[NOCOUNT:.*]]}
 // CHECK: [[NOCOUNT]] = !DISubrange(count: -1)
 //
+// CHECK: [[VAR:![0-9]+]] = !DILocalVariable(name: "__vla_expr", {{.*}}flags: DIFlagArtificial
 // CHECK: !DICompositeType(tag: DW_TAG_array_type,
 // CHECK-NOT:                               size:
 // CHECK-SAME:                              elements: [[ELEM_TYPE:![0-9]+]]
-// CHECK: [[ELEM_TYPE]] = !{[[THREE:.*]], [[NOCOUNT]]}
+// CHECK: [[ELEM_TYPE]] = !{[[THREE:.*]], [[VARRANGE:![0-9]+]]}
 // CHECK: [[THREE]] = !DISubrange(count: 3)
+// CHECK: [[VARRANGE]] = !DISubrange(count: [[VAR]])
diff --git a/test/CodeGenCXX/default_calling_conv.cpp b/test/CodeGenCXX/default_calling_conv.cpp
index 95c214a..b5b0f47 100644
--- a/test/CodeGenCXX/default_calling_conv.cpp
+++ b/test/CodeGenCXX/default_calling_conv.cpp
@@ -3,13 +3,23 @@
 // RUN: %clang_cc1 -triple i486-unknown-linux-gnu -fdefault-calling-conv=stdcall -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL
 // RUN: %clang_cc1 -triple i486-unknown-linux-gnu -mrtd -emit-llvm -o - %s | FileCheck %s --check-prefix=STDCALL --check-prefix=ALL
 // RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=vectorcall -emit-llvm -o - %s | FileCheck %s --check-prefix=VECTORCALL --check-prefix=ALL
+// RUN: %clang_cc1 -triple i986-unknown-linux-gnu -fdefault-calling-conv=regcall -emit-llvm -o - %s | FileCheck %s --check-prefix=REGCALL --check-prefix=ALL
 
 // CDECL: define void @_Z5test1v
 // FASTCALL: define x86_fastcallcc void @_Z5test1v
 // STDCALL: define x86_stdcallcc void @_Z5test1v
 // VECTORCALL: define x86_vectorcallcc void @_Z5test1v
+// REGCALL: define x86_regcallcc void @_Z17__regcall3__test1v
 void test1() {}
 
+// fastcall, stdcall, vectorcall and regcall do not support variadic functions.
+// CDECL: define void @_Z12testVariadicz
+// FASTCALL: define void @_Z12testVariadicz
+// STDCALL: define void @_Z12testVariadicz
+// VECTORCALL: define void @_Z12testVariadicz
+// REGCALL: define void @_Z12testVariadicz
+void testVariadic(...){}
+
 // ALL: define void @_Z5test2v
 void __attribute__((cdecl)) test2() {}
 
@@ -22,6 +32,9 @@
 // ALL: define  x86_vectorcallcc void @_Z5test5v
 void __attribute__((vectorcall)) test5() {}
 
+// ALL: define x86_regcallcc void @_Z17__regcall3__test6v
+void __attribute__((regcall)) test6() {}
+
 // ALL: define linkonce_odr void @_ZN1A11test_memberEv
 class A {
 public:
@@ -32,3 +45,8 @@
   A a;
   a.test_member();
 }
+
+// ALL: define i32 @main
+int main() {
+  return 1;
+}
diff --git a/test/CodeGenCXX/derived-cast.cpp b/test/CodeGenCXX/derived-cast.cpp
new file mode 100644
index 0000000..bf2b258
--- /dev/null
+++ b/test/CodeGenCXX/derived-cast.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+class A {
+    int a;
+};
+
+class B {
+    int b;
+public:
+    A *getAsA();
+};
+
+class X : public A, public B {
+    int x;
+};
+
+// PR35909 - https://bugs.llvm.org/show_bug.cgi?id=35909
+
+A *B::getAsA() {
+  return static_cast<X*>(this);
+
+  // CHECK-LABEL: define %class.A* @_ZN1B6getAsAEv
+  // CHECK: %[[THIS:.*]] = load %class.B*, %class.B**
+  // CHECK-NEXT: %[[BC:.*]] = bitcast %class.B* %[[THIS]] to i8*
+  // CHECK-NEXT: getelementptr inbounds i8, i8* %[[BC]], i64 -4
+}
+
diff --git a/test/CodeGenCXX/discard-name-values.cpp b/test/CodeGenCXX/discard-name-values.cpp
index 49cb7d2..d4d7527 100644
--- a/test/CodeGenCXX/discard-name-values.cpp
+++ b/test/CodeGenCXX/discard-name-values.cpp
@@ -1,10 +1,29 @@
-// RUN: %clang_cc1 -emit-llvm  -triple=armv7-apple-darwin -emit-llvm -std=c++11 %s -o - -O1 | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm  -triple=armv7-apple-darwin -emit-llvm -std=c++11 %s -o - -O1 -discard-value-names | FileCheck %s --check-prefix=DISCARDVALUE
+// RUN: %clang_cc1 -emit-llvm  -triple=armv7-apple-darwin -std=c++11 %s -o - -O1 \
+// RUN:    | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple=armv7-apple-darwin -std=c++11 %s -o - -O1 \
+// RUN:    -discard-value-names | FileCheck %s --check-prefix=DISCARDVALUE
 
-int foo(int bar) {
-  return bar;
+extern "C" void branch();
+
+bool test(bool pred) {
+  // DISCARDVALUE: br i1 %0, label %2, label %3
+  // CHECK: br i1 %pred, label %if.then, label %if.end
+
+  if (pred) {
+    // DISCARDVALUE: ; <label>:2:
+    // DISCARDVALUE-NEXT: tail call void @branch()
+    // DISCARDVALUE-NEXT: br label %3
+
+    // CHECK: if.then:
+    // CHECK-NEXT: tail call void @branch()
+    // CHECK-NEXT: br label %if.end
+    branch();
+  }
+
+  // DISCARDVALUE: ; <label>:3:
+  // DISCARDVALUE-NEXT: ret i1 %0
+
+  // CHECK: if.end:
+  // CHECK-NEXT: ret i1 %pred
+  return pred;
 }
-
-// CHECK: ret i32 %bar
-// DISCARDVALUE: ret i32 %0
-
diff --git a/test/CodeGenCXX/dllexport-alias.cpp b/test/CodeGenCXX/dllexport-alias.cpp
index a3dc61e..65f2946 100644
--- a/test/CodeGenCXX/dllexport-alias.cpp
+++ b/test/CodeGenCXX/dllexport-alias.cpp
@@ -14,5 +14,5 @@
 
 A::~A() {}
 
-// CHECK: @_ZN1AC1Ev = dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AC2Ev
-// CHECK: @_ZN1AD1Ev = dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AD2Ev
+// CHECK: @_ZN1AC1Ev = dso_local dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AC2Ev
+// CHECK: @_ZN1AD1Ev = dso_local dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AD2Ev
diff --git a/test/CodeGenCXX/dllexport-ctor-closure.cpp b/test/CodeGenCXX/dllexport-ctor-closure.cpp
index 4fae7e1..3a48a47 100644
--- a/test/CodeGenCXX/dllexport-ctor-closure.cpp
+++ b/test/CodeGenCXX/dllexport-ctor-closure.cpp
@@ -5,7 +5,7 @@
 
 struct CtorWithClosure {
   __declspec(dllexport) CtorWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FCtorWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 // CHECK:   %[[this_addr:.*]] = alloca %struct.CtorWithClosure*, align 4
 // CHECK:   store %struct.CtorWithClosure* %this, %struct.CtorWithClosure** %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load %struct.CtorWithClosure*, %struct.CtorWithClosure** %[[this_addr]]
@@ -17,7 +17,7 @@
   __declspec(dllexport) CtorWithClosureOutOfLine(...);
 };
 CtorWithClosureOutOfLine::CtorWithClosureOutOfLine(...) {}
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 
 #define DELETE_IMPLICIT_MEMBERS(ClassName) \
     ClassName(ClassName &&) = delete; \
@@ -28,7 +28,7 @@
 struct __declspec(dllexport) ClassWithClosure {
   DELETE_IMPLICIT_MEMBERS(ClassWithClosure);
   ClassWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FClassWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FClassWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 // CHECK:   %[[this_addr:.*]] = alloca %struct.ClassWithClosure*, align 4
 // CHECK:   store %struct.ClassWithClosure* %this, %struct.ClassWithClosure** %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load %struct.ClassWithClosure*, %struct.ClassWithClosure** %[[this_addr]]
@@ -44,10 +44,10 @@
 extern template struct TemplateWithClosure<int>;
 template struct __declspec(dllexport) TemplateWithClosure<int>;
 
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 // CHECK:   call {{.*}} @"\01??0?$TemplateWithClosure@D@@QAE@H@Z"({{.*}}, i32 1)
 
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 // CHECK:   call {{.*}} @"\01??0?$TemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 4)
 
 struct __declspec(dllexport) NestedOuter {
@@ -59,8 +59,8 @@
   };
 };
 
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FNestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 
 struct HasDtor {
   ~HasDtor();
@@ -76,7 +76,7 @@
 };
 CtorClosureOutOfLine::CtorClosureOutOfLine(const HasImplicitDtor2 &v) {}
 
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorClosureInline@@QAEXXZ"
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01??1HasImplicitDtor1@@QAE@XZ"
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorClosureOutOfLine@@QAEXXZ"
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01??1HasImplicitDtor2@@QAE@XZ"
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FCtorClosureInline@@QAEXXZ"
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01??1HasImplicitDtor1@@QAE@XZ"
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FCtorClosureOutOfLine@@QAEXXZ"
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01??1HasImplicitDtor2@@QAE@XZ"
diff --git a/test/CodeGenCXX/dllexport-dtor-thunks.cpp b/test/CodeGenCXX/dllexport-dtor-thunks.cpp
index 52f9901..45cce8b 100644
--- a/test/CodeGenCXX/dllexport-dtor-thunks.cpp
+++ b/test/CodeGenCXX/dllexport-dtor-thunks.cpp
@@ -6,5 +6,5 @@
 C::~C() {}
 
 // This thunk should *not* be dllexport.
-// CHECK: define linkonce_odr i8* @"\01??_EC@@W7EAAPEAXI@Z"
-// CHECK: define dllexport void @"\01??1C@@UEAA@XZ"
+// CHECK: define linkonce_odr dso_local i8* @"\01??_EC@@W7EAAPEAXI@Z"
+// CHECK: define dso_local dllexport void @"\01??1C@@UEAA@XZ"
diff --git a/test/CodeGenCXX/dllexport-members.cpp b/test/CodeGenCXX/dllexport-members.cpp
index 1c56251..569ef25 100644
--- a/test/CodeGenCXX/dllexport-members.cpp
+++ b/test/CodeGenCXX/dllexport-members.cpp
@@ -26,101 +26,101 @@
 struct ExportMembers {
   struct Nested;
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?normalDef@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define          dllexport                void @"\01?normalDef@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInclass@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInclass@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInlineDef@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInlineDef@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInlineDecl@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInlineDecl@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers9normalDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers9normalDefEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers13normalInclassEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers13normalInclassEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers15normalInlineDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers15normalInlineDefEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers16normalInlineDeclEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers16normalInlineDeclEv(%struct.ExportMembers* %this)
-  // M32-DAG: define linkonce_odr       x86_thiscallcc void @"\01?referencedNonExportedInClass@ExportMembers@@QAEXXZ"
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?normalDef@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?normalDef@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInclass@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInclass@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInlineDef@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInlineDef@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInlineDecl@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInlineDecl@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers9normalDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers9normalDefEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers13normalInclassEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers13normalInclassEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers15normalInlineDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers15normalInlineDefEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers16normalInlineDeclEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers16normalInlineDeclEv(%struct.ExportMembers* %this)
+  // M32-DAG: define linkonce_odr dso_local       x86_thiscallcc void @"\01?referencedNonExportedInClass@ExportMembers@@QAEXXZ"
   __declspec(dllexport)                void normalDef();
   __declspec(dllexport)                void normalInclass() { referencedNonExportedInClass(); }
   __declspec(dllexport)                void normalInlineDef();
   __declspec(dllexport)         inline void normalInlineDecl();
                                        void referencedNonExportedInClass() {}
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?virtualDef@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define          dllexport                void @"\01?virtualDef@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInclass@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInclass@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInlineDef@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInlineDef@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInlineDecl@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInlineDecl@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers10virtualDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers10virtualDefEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers14virtualInclassEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers14virtualInclassEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers16virtualInlineDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers16virtualInlineDefEv(%struct.ExportMembers* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers17virtualInlineDeclEv(%struct.ExportMembers* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers17virtualInlineDeclEv(%struct.ExportMembers* %this)
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?virtualDef@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?virtualDef@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInclass@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInclass@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInlineDef@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInlineDef@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInlineDecl@ExportMembers@@UAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInlineDecl@ExportMembers@@UEAAXXZ"(%struct.ExportMembers* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers10virtualDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers10virtualDefEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers14virtualInclassEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers14virtualInclassEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers16virtualInlineDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers16virtualInlineDefEv(%struct.ExportMembers* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers17virtualInlineDeclEv(%struct.ExportMembers* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers17virtualInlineDeclEv(%struct.ExportMembers* %this)
   __declspec(dllexport) virtual        void virtualDef();
   __declspec(dllexport) virtual        void virtualInclass() {}
   __declspec(dllexport) virtual        void virtualInlineDef();
   __declspec(dllexport) virtual inline void virtualInlineDecl();
 
-  // MSC-DAG: define          dllexport                void @"\01?staticDef@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInclass@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInlineDef@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInlineDecl@ExportMembers@@SAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers9staticDefEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers13staticInclassEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers15staticInlineDefEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers16staticInlineDeclEv()
+  // MSC-DAG: define          dso_local dllexport                void @"\01?staticDef@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInclass@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInlineDef@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInlineDecl@ExportMembers@@SAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers9staticDefEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers13staticInclassEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers15staticInlineDefEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers16staticInlineDeclEv()
   __declspec(dllexport) static         void staticDef();
   __declspec(dllexport) static         void staticInclass() {}
   __declspec(dllexport) static         void staticInlineDef();
   __declspec(dllexport) static  inline void staticInlineDecl();
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?protectedDef@ExportMembers@@IAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define          dllexport                void @"\01?protectedDef@ExportMembers@@IEAAXXZ"(%struct.ExportMembers* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers12protectedDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers12protectedDefEv(%struct.ExportMembers* %this)
-  // MSC-DAG: define          dllexport                void @"\01?protectedStaticDef@ExportMembers@@KAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers18protectedStaticDefEv()
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?protectedDef@ExportMembers@@IAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?protectedDef@ExportMembers@@IEAAXXZ"(%struct.ExportMembers* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers12protectedDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers12protectedDefEv(%struct.ExportMembers* %this)
+  // MSC-DAG: define          dso_local dllexport                void @"\01?protectedStaticDef@ExportMembers@@KAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers18protectedStaticDefEv()
 protected:
   __declspec(dllexport)                void protectedDef();
   __declspec(dllexport) static         void protectedStaticDef();
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?privateDef@ExportMembers@@AAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define          dllexport                void @"\01?privateDef@ExportMembers@@AEAAXXZ"(%struct.ExportMembers* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers10privateDefEv(%struct.ExportMembers* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers10privateDefEv(%struct.ExportMembers* %this)
-  // MSC-DAG: define          dllexport                void @"\01?privateStaticDef@ExportMembers@@CAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers16privateStaticDefEv()
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?privateDef@ExportMembers@@AAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?privateDef@ExportMembers@@AEAAXXZ"(%struct.ExportMembers* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers10privateDefEv(%struct.ExportMembers* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers10privateDefEv(%struct.ExportMembers* %this)
+  // MSC-DAG: define          dso_local dllexport                void @"\01?privateStaticDef@ExportMembers@@CAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers16privateStaticDefEv()
 private:
   __declspec(dllexport)                void privateDef();
   __declspec(dllexport) static         void privateStaticDef();
 
-  // M32-DAG: define                    x86_thiscallcc void @"\01?ignored@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
-  // M64-DAG: define                                   void @"\01?ignored@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
-  // G32-DAG: define                    x86_thiscallcc void @_ZN13ExportMembers7ignoredEv(%struct.ExportMembers* %this)
-  // G64-DAG: define                                   void @_ZN13ExportMembers7ignoredEv(%struct.ExportMembers* %this)
+  // M32-DAG: define          dso_local x86_thiscallcc void @"\01?ignored@ExportMembers@@QAEXXZ"(%struct.ExportMembers* %this)
+  // M64-DAG: define          dso_local                void @"\01?ignored@ExportMembers@@QEAAXXZ"(%struct.ExportMembers* %this)
+  // G32-DAG: define          dso_local x86_thiscallcc void @_ZN13ExportMembers7ignoredEv(%struct.ExportMembers* %this)
+  // G64-DAG: define          dso_local                void @_ZN13ExportMembers7ignoredEv(%struct.ExportMembers* %this)
 public:
   void ignored();
 
-  // MSC-DAG: @"\01?StaticField@ExportMembers@@2HA"               = dllexport global i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstField@ExportMembers@@2HB"          = dllexport constant i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstFieldEqualInit@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldBraceInit@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldRefNotDef@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?ConstexprField@ExportMembers@@2HB"            = weak_odr dllexport constant i32 1, comdat, align 4
-  // GNU-DAG: @_ZN13ExportMembers11StaticFieldE                   = dllexport global i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers16StaticConstFieldE              = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers25StaticConstFieldEqualInitE     = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers25StaticConstFieldBraceInitE     = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers14ConstexprFieldE                = dllexport constant i32 1, align 4
+  // MSC-DAG: @"\01?StaticField@ExportMembers@@2HA"               = dso_local dllexport global i32 1, align 4
+  // MSC-DAG: @"\01?StaticConstField@ExportMembers@@2HB"          = dso_local dllexport constant i32 1, align 4
+  // MSC-DAG: @"\01?StaticConstFieldEqualInit@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldBraceInit@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldRefNotDef@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?ConstexprField@ExportMembers@@2HB"            = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // GNU-DAG: @_ZN13ExportMembers11StaticFieldE                   = dso_local dllexport global i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers16StaticConstFieldE              = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers25StaticConstFieldEqualInitE     = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers25StaticConstFieldBraceInitE     = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers14ConstexprFieldE                = dso_local dllexport constant i32 1, align 4
   __declspec(dllexport) static         int  StaticField;
   __declspec(dllexport) static  const  int  StaticConstField;
   __declspec(dllexport) static  const  int  StaticConstFieldEqualInit = 1;
@@ -154,99 +154,99 @@
 
 // Export individual members of a nested class.
 struct ExportMembers::Nested {
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?normalDef@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define          dllexport                void @"\01?normalDef@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInclass@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInclass@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInlineDef@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInlineDef@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?normalInlineDecl@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?normalInlineDecl@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested9normalDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers6Nested9normalDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested13normalInclassEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested13normalInclassEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested15normalInlineDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested15normalInlineDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16normalInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested16normalInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?normalDef@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?normalDef@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInclass@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInclass@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInlineDef@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInlineDef@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?normalInlineDecl@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?normalInlineDecl@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested9normalDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested9normalDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested13normalInclassEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested13normalInclassEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested15normalInlineDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested15normalInlineDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16normalInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested16normalInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
   __declspec(dllexport)                void normalDef();
   __declspec(dllexport)                void normalInclass() {}
   __declspec(dllexport)                void normalInlineDef();
   __declspec(dllexport)         inline void normalInlineDecl();
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?virtualDef@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define          dllexport                void @"\01?virtualDef@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInclass@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInclass@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInlineDef@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInlineDef@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?virtualInlineDecl@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01?virtualInlineDecl@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10virtualDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers6Nested10virtualDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested14virtualInclassEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested14virtualInclassEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16virtualInlineDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested16virtualInlineDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?virtualDef@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?virtualDef@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInclass@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInclass@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInlineDef@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInlineDef@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?virtualInlineDecl@Nested@ExportMembers@@UAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01?virtualInlineDecl@Nested@ExportMembers@@UEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10virtualDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested10virtualDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested14virtualInclassEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested14virtualInclassEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16virtualInlineDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested16virtualInlineDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(%"struct.ExportMembers::Nested"* %this)
   __declspec(dllexport) virtual        void virtualDef();
   __declspec(dllexport) virtual        void virtualInclass() {}
   __declspec(dllexport) virtual        void virtualInlineDef();
   __declspec(dllexport) virtual inline void virtualInlineDecl();
 
-  // MSC-DAG: define          dllexport                void @"\01?staticDef@Nested@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInclass@Nested@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInlineDef@Nested@ExportMembers@@SAXXZ"()
-  // MSC-DAG: define weak_odr dllexport                void @"\01?staticInlineDecl@Nested@ExportMembers@@SAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers6Nested9staticDefEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested13staticInclassEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested15staticInlineDefEv()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN13ExportMembers6Nested16staticInlineDeclEv()
+  // MSC-DAG: define          dso_local dllexport                void @"\01?staticDef@Nested@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInclass@Nested@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInlineDef@Nested@ExportMembers@@SAXXZ"()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01?staticInlineDecl@Nested@ExportMembers@@SAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested9staticDefEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested13staticInclassEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested15staticInlineDefEv()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN13ExportMembers6Nested16staticInlineDeclEv()
   __declspec(dllexport) static         void staticDef();
   __declspec(dllexport) static         void staticInclass() {}
   __declspec(dllexport) static         void staticInlineDef();
   __declspec(dllexport) static  inline void staticInlineDecl();
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?protectedDef@Nested@ExportMembers@@IAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define          dllexport                void @"\01?protectedDef@Nested@ExportMembers@@IEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested12protectedDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers6Nested12protectedDefEv(%"struct.ExportMembers::Nested"* %this)
-  // MSC-DAG: define          dllexport                void @"\01?protectedStaticDef@Nested@ExportMembers@@KAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers6Nested18protectedStaticDefEv()
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?protectedDef@Nested@ExportMembers@@IAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?protectedDef@Nested@ExportMembers@@IEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested12protectedDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested12protectedDefEv(%"struct.ExportMembers::Nested"* %this)
+  // MSC-DAG: define          dso_local dllexport                void @"\01?protectedStaticDef@Nested@ExportMembers@@KAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested18protectedStaticDefEv()
 protected:
   __declspec(dllexport)                void protectedDef();
   __declspec(dllexport) static         void protectedStaticDef();
 
-  // M32-DAG: define          dllexport x86_thiscallcc void @"\01?privateDef@Nested@ExportMembers@@AAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define          dllexport                void @"\01?privateDef@Nested@ExportMembers@@AEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define          dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10privateDefEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define          dllexport                void @_ZN13ExportMembers6Nested10privateDefEv(%"struct.ExportMembers::Nested"* %this)
-  // MSC-DAG: define          dllexport                void @"\01?privateStaticDef@Nested@ExportMembers@@CAXXZ"()
-  // GNU-DAG: define          dllexport                void @_ZN13ExportMembers6Nested16privateStaticDefEv()
+  // M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01?privateDef@Nested@ExportMembers@@AAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define          dso_local dllexport                void @"\01?privateDef@Nested@ExportMembers@@AEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10privateDefEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested10privateDefEv(%"struct.ExportMembers::Nested"* %this)
+  // MSC-DAG: define          dso_local dllexport                void @"\01?privateStaticDef@Nested@ExportMembers@@CAXXZ"()
+  // GNU-DAG: define          dso_local dllexport                void @_ZN13ExportMembers6Nested16privateStaticDefEv()
 private:
   __declspec(dllexport)                void privateDef();
   __declspec(dllexport) static         void privateStaticDef();
 
-  // M32-DAG: define                    x86_thiscallcc void @"\01?ignored@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // M64-DAG: define                                   void @"\01?ignored@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
-  // G32-DAG: define                    x86_thiscallcc void @_ZN13ExportMembers6Nested7ignoredEv(%"struct.ExportMembers::Nested"* %this)
-  // G64-DAG: define                                   void @_ZN13ExportMembers6Nested7ignoredEv(%"struct.ExportMembers::Nested"* %this)
+  // M32-DAG: define          dso_local x86_thiscallcc void @"\01?ignored@Nested@ExportMembers@@QAEXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // M64-DAG: define          dso_local                void @"\01?ignored@Nested@ExportMembers@@QEAAXXZ"(%"struct.ExportMembers::Nested"* %this)
+  // G32-DAG: define          dso_local x86_thiscallcc void @_ZN13ExportMembers6Nested7ignoredEv(%"struct.ExportMembers::Nested"* %this)
+  // G64-DAG: define          dso_local                void @_ZN13ExportMembers6Nested7ignoredEv(%"struct.ExportMembers::Nested"* %this)
 public:
   void ignored();
 
-  // MSC-DAG: @"\01?StaticField@Nested@ExportMembers@@2HA"               = dllexport global i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstField@Nested@ExportMembers@@2HB"          = dllexport constant i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstFieldEqualInit@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldBraceInit@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldRefNotDef@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?ConstexprField@Nested@ExportMembers@@2HB"            = weak_odr dllexport constant i32 1, comdat, align 4
-  // GNU-DAG: @_ZN13ExportMembers6Nested11StaticFieldE                   = dllexport global i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers6Nested16StaticConstFieldE              = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers6Nested25StaticConstFieldEqualInitE     = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers6Nested25StaticConstFieldBraceInitE     = dllexport constant i32 1, align 4
-  // GNU-DAG: @_ZN13ExportMembers6Nested14ConstexprFieldE                = dllexport constant i32 1, align 4
+  // MSC-DAG: @"\01?StaticField@Nested@ExportMembers@@2HA"               = dso_local dllexport global i32 1, align 4
+  // MSC-DAG: @"\01?StaticConstField@Nested@ExportMembers@@2HB"          = dso_local dllexport constant i32 1, align 4
+  // MSC-DAG: @"\01?StaticConstFieldEqualInit@Nested@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldBraceInit@Nested@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldRefNotDef@Nested@ExportMembers@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?ConstexprField@Nested@ExportMembers@@2HB"            = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+  // GNU-DAG: @_ZN13ExportMembers6Nested11StaticFieldE                   = dso_local dllexport global i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers6Nested16StaticConstFieldE              = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers6Nested25StaticConstFieldEqualInitE     = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers6Nested25StaticConstFieldBraceInitE     = dso_local dllexport constant i32 1, align 4
+  // GNU-DAG: @_ZN13ExportMembers6Nested14ConstexprFieldE                = dso_local dllexport constant i32 1, align 4
   __declspec(dllexport) static         int  StaticField;
   __declspec(dllexport) static  const  int  StaticConstField;
   __declspec(dllexport) static  const  int  StaticConstFieldEqualInit = 1;
@@ -280,48 +280,48 @@
 
 // Export special member functions.
 struct ExportSpecials {
-  // M32-DAG: define dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@XZ"(%struct.ExportSpecials* returned %this)
-  // M64-DAG: define dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@XZ"(%struct.ExportSpecials* returned %this)
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1Ev(%struct.ExportSpecials* %this)
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC1Ev(%struct.ExportSpecials* %this)
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2Ev(%struct.ExportSpecials* %this)
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC2Ev(%struct.ExportSpecials* %this)
+  // M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@XZ"(%struct.ExportSpecials* returned %this)
+  // M64-DAG: define dso_local dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@XZ"(%struct.ExportSpecials* returned %this)
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1Ev(%struct.ExportSpecials* %this)
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC1Ev(%struct.ExportSpecials* %this)
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2Ev(%struct.ExportSpecials* %this)
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC2Ev(%struct.ExportSpecials* %this)
   __declspec(dllexport) ExportSpecials();
 
-  // M32-DAG: define dllexport x86_thiscallcc void @"\01??1ExportSpecials@@QAE@XZ"(%struct.ExportSpecials* %this)
-  // M64-DAG: define dllexport                void @"\01??1ExportSpecials@@QEAA@XZ"(%struct.ExportSpecials* %this)
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsD1Ev(%struct.ExportSpecials* %this)
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsD1Ev(%struct.ExportSpecials* %this)
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsD2Ev(%struct.ExportSpecials* %this)
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsD2Ev(%struct.ExportSpecials* %this)
+  // M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01??1ExportSpecials@@QAE@XZ"(%struct.ExportSpecials* %this)
+  // M64-DAG: define dso_local dllexport                void @"\01??1ExportSpecials@@QEAA@XZ"(%struct.ExportSpecials* %this)
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsD1Ev(%struct.ExportSpecials* %this)
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsD1Ev(%struct.ExportSpecials* %this)
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsD2Ev(%struct.ExportSpecials* %this)
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsD2Ev(%struct.ExportSpecials* %this)
   __declspec(dllexport) ~ExportSpecials();
 
-  // M32-DAG: define dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@ABU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@AEBU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC1ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC2ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@ABU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define dso_local dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@AEBU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC1ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC2ERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
   __declspec(dllexport) ExportSpecials(const ExportSpecials&);
 
-  // M32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QAEAAU0@ABU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QAEAAU0@ABU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSERKS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
   __declspec(dllexport) ExportSpecials& operator=(const ExportSpecials&);
 
-  // M32-DAG: define dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@$$QAU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@$$QEAU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC1EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                void @_ZN14ExportSpecialsC2EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ExportSpecials* @"\01??0ExportSpecials@@QAE@$$QAU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define dso_local dllexport                %struct.ExportSpecials* @"\01??0ExportSpecials@@QEAA@$$QEAU0@@Z"(%struct.ExportSpecials* returned %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC1EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                void @_ZN14ExportSpecialsC2EOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
   __declspec(dllexport) ExportSpecials(ExportSpecials&&);
 
-  // M32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QAEAAU0@$$QAU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSEOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSEOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QAEAAU0@$$QAU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @"\01??4ExportSpecials@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSEOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportSpecials* @_ZN14ExportSpecialsaSEOS_(%struct.ExportSpecials* %this, %struct.ExportSpecials* dereferenceable({{[0-9]+}}))
   __declspec(dllexport) ExportSpecials& operator=(ExportSpecials&&);
 };
 ExportSpecials::ExportSpecials() {}
@@ -334,40 +334,40 @@
 
 // Export class with inline special member functions.
 struct ExportInlineSpecials {
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@XZ"(%struct.ExportInlineSpecials* returned %this)
-  // M64-DAG: define weak_odr dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@XZ"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1Ev(
-  // G64-DAG: define weak_odr dllexport                void @_ZN20ExportInlineSpecialsC1Ev(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@XZ"(%struct.ExportInlineSpecials* returned %this)
+  // M64-DAG: define weak_odr dso_local dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@XZ"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1Ev(
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN20ExportInlineSpecialsC1Ev(
   __declspec(dllexport) ExportInlineSpecials() {}
 
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??1ExportInlineSpecials@@QAE@XZ"(
-  // M64-DAG: define weak_odr dllexport                void @"\01??1ExportInlineSpecials@@QEAA@XZ"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsD1Ev(
-  // G64-DAG: define weak_odr dllexport                void @_ZN20ExportInlineSpecialsD1Ev(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportInlineSpecials@@QAE@XZ"(
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01??1ExportInlineSpecials@@QEAA@XZ"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsD1Ev(
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN20ExportInlineSpecialsD1Ev(
   __declspec(dllexport) ~ExportInlineSpecials() {}
 
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@ABU0@@Z"(
-  // M64-DAG: define weak_odr dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@AEBU0@@Z"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1ERKS_(
-  // G64-DAG: define weak_odr dllexport                void @_ZN20ExportInlineSpecialsC1ERKS_(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@ABU0@@Z"(
+  // M64-DAG: define weak_odr dso_local dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@AEBU0@@Z"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1ERKS_(
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN20ExportInlineSpecialsC1ERKS_(
   __declspec(dllexport) inline ExportInlineSpecials(const ExportInlineSpecials&);
 
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QAEAAU0@ABU0@@Z"(
-  // M64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QEAAAEAU0@AEBU0@@Z"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSERKS_(
-  // G64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSERKS_(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QAEAAU0@ABU0@@Z"(
+  // M64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QEAAAEAU0@AEBU0@@Z"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSERKS_(
+  // G64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSERKS_(
   __declspec(dllexport) ExportInlineSpecials& operator=(const ExportInlineSpecials&);
 
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@$$QAU0@@Z"(
-  // M64-DAG: define weak_odr dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@$$QEAU0@@Z"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1EOS_(
-  // G64-DAG: define weak_odr dllexport                void @_ZN20ExportInlineSpecialsC1EOS_(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QAE@$$QAU0@@Z"(
+  // M64-DAG: define weak_odr dso_local dllexport                %struct.ExportInlineSpecials* @"\01??0ExportInlineSpecials@@QEAA@$$QEAU0@@Z"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1EOS_(
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN20ExportInlineSpecialsC1EOS_(
   __declspec(dllexport) ExportInlineSpecials(ExportInlineSpecials&&) {}
 
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QAEAAU0@$$QAU0@@Z"(
-  // M64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QEAAAEAU0@$$QEAU0@@Z"(
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSEOS_(
-  // G64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSEOS_(
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QAEAAU0@$$QAU0@@Z"(
+  // M64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @"\01??4ExportInlineSpecials@@QEAAAEAU0@$$QEAU0@@Z"(
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSEOS_(
+  // G64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportInlineSpecials* @_ZN20ExportInlineSpecialsaSEOS_(
   __declspec(dllexport) ExportInlineSpecials& operator=(ExportInlineSpecials&&) { return *this; }
 };
 ExportInlineSpecials::ExportInlineSpecials(const ExportInlineSpecials&) {}
@@ -384,74 +384,74 @@
   __declspec(dllexport) ExportDefaultedDefs& operator=(ExportDefaultedDefs&&);
 };
 
-// M32-DAG: define dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@XZ"(%struct.ExportDefaultedDefs* returned %this)
-// M64-DAG: define dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@XZ"(%struct.ExportDefaultedDefs* returned %this)
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1Ev(%struct.ExportDefaultedDefs* %this)
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsC1Ev(%struct.ExportDefaultedDefs* %this)
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2Ev(%struct.ExportDefaultedDefs* %this)
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsC2Ev(%struct.ExportDefaultedDefs* %this)
+// M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@XZ"(%struct.ExportDefaultedDefs* returned %this)
+// M64-DAG: define dso_local dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@XZ"(%struct.ExportDefaultedDefs* returned %this)
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1Ev(%struct.ExportDefaultedDefs* %this)
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsC1Ev(%struct.ExportDefaultedDefs* %this)
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2Ev(%struct.ExportDefaultedDefs* %this)
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsC2Ev(%struct.ExportDefaultedDefs* %this)
 __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs() = default;
 
-// M32-DAG: define dllexport x86_thiscallcc void @"\01??1ExportDefaultedDefs@@QAE@XZ"(%struct.ExportDefaultedDefs* %this)
-// M64-DAG: define dllexport                void @"\01??1ExportDefaultedDefs@@QEAA@XZ"(%struct.ExportDefaultedDefs* %this)
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD1Ev(%struct.ExportDefaultedDefs* %this)
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsD1Ev(%struct.ExportDefaultedDefs* %this)
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD2Ev(%struct.ExportDefaultedDefs* %this)
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsD2Ev(%struct.ExportDefaultedDefs* %this)
+// M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01??1ExportDefaultedDefs@@QAE@XZ"(%struct.ExportDefaultedDefs* %this)
+// M64-DAG: define dso_local dllexport                void @"\01??1ExportDefaultedDefs@@QEAA@XZ"(%struct.ExportDefaultedDefs* %this)
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD1Ev(%struct.ExportDefaultedDefs* %this)
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsD1Ev(%struct.ExportDefaultedDefs* %this)
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD2Ev(%struct.ExportDefaultedDefs* %this)
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsD2Ev(%struct.ExportDefaultedDefs* %this)
 ExportDefaultedDefs::~ExportDefaultedDefs() = default;
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define weak_odr dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define weak_odr dllexport                void @_ZN19ExportDefaultedDefsC1ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define weak_odr dllexport                void @_ZN19ExportDefaultedDefsC2ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define weak_odr dso_local dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN19ExportDefaultedDefsC1ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN19ExportDefaultedDefsC2ERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
 __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs(const ExportDefaultedDefs&) = default;
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSERKS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
 inline ExportDefaultedDefs& ExportDefaultedDefs::operator=(const ExportDefaultedDefs&) = default;
 
-// M32-DAG: define dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@$$QAU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@$$QEAU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsC1EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define dllexport                void @_ZN19ExportDefaultedDefsC2EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QAE@$$QAU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define dso_local dllexport                %struct.ExportDefaultedDefs* @"\01??0ExportDefaultedDefs@@QEAA@$$QEAU0@@Z"(%struct.ExportDefaultedDefs* returned %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsC1EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local dllexport                void @_ZN19ExportDefaultedDefsC2EOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
 __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs(ExportDefaultedDefs&&) = default;
 
-// M32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSEOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSEOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @"\01??4ExportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSEOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedDefs* @_ZN19ExportDefaultedDefsaSEOS_(%struct.ExportDefaultedDefs* %this, %struct.ExportDefaultedDefs* dereferenceable({{[0-9]+}}))
 ExportDefaultedDefs& ExportDefaultedDefs::operator=(ExportDefaultedDefs&&) = default;
 
 
 // Export defaulted member function definitions declared inside class.
 struct ExportDefaultedInclassDefs {
   __declspec(dllexport) ExportDefaultedInclassDefs() = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M64VS2013-DAG: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M64VS2015-NOT: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
 
   __declspec(dllexport) ~ExportDefaultedInclassDefs() = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M64VS2013-DAG: define weak_odr dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M64VS2015-NOT: define weak_odr dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
 
   __declspec(dllexport) ExportDefaultedInclassDefs(const ExportDefaultedInclassDefs&) = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64VS2013-DAG: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64VS2015-NOT: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
 
   __declspec(dllexport) ExportDefaultedInclassDefs& operator=(const ExportDefaultedInclassDefs&) = default;
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
 };
 
 
@@ -463,28 +463,28 @@
   __declspec(dllexport) void operator delete[](void*);
 };
 
-// M32-DAG: define dllexport i8* @"\01??2ExportAlloc@@SAPAXI@Z"(i32 %n)
-// M64-DAG: define dllexport i8* @"\01??2ExportAlloc@@SAPEAX_K@Z"(i64 %n)
-// G32-DAG: define dllexport i8* @_ZN11ExportAllocnwEj(i32 %n)
-// G64-DAG: define dllexport i8* @_ZN11ExportAllocnwEy(i64 %n)
+// M32-DAG: define dso_local dllexport i8* @"\01??2ExportAlloc@@SAPAXI@Z"(i32 %n)
+// M64-DAG: define dso_local dllexport i8* @"\01??2ExportAlloc@@SAPEAX_K@Z"(i64 %n)
+// G32-DAG: define dso_local dllexport i8* @_ZN11ExportAllocnwEj(i32 %n)
+// G64-DAG: define dso_local dllexport i8* @_ZN11ExportAllocnwEy(i64 %n)
 void* ExportAlloc::operator new(__SIZE_TYPE__ n) { return malloc(n); }
 
-// M32-DAG: define dllexport i8* @"\01??_UExportAlloc@@SAPAXI@Z"(i32 %n)
-// M64-DAG: define dllexport i8* @"\01??_UExportAlloc@@SAPEAX_K@Z"(i64 %n)
-// G32-DAG: define dllexport i8* @_ZN11ExportAllocnaEj(i32 %n)
-// G64-DAG: define dllexport i8* @_ZN11ExportAllocnaEy(i64 %n)
+// M32-DAG: define dso_local dllexport i8* @"\01??_UExportAlloc@@SAPAXI@Z"(i32 %n)
+// M64-DAG: define dso_local dllexport i8* @"\01??_UExportAlloc@@SAPEAX_K@Z"(i64 %n)
+// G32-DAG: define dso_local dllexport i8* @_ZN11ExportAllocnaEj(i32 %n)
+// G64-DAG: define dso_local dllexport i8* @_ZN11ExportAllocnaEy(i64 %n)
 void* ExportAlloc::operator new[](__SIZE_TYPE__ n) { return malloc(n); }
 
-// M32-DAG: define dllexport void @"\01??3ExportAlloc@@SAXPAX@Z"(i8* %p)
-// M64-DAG: define dllexport void @"\01??3ExportAlloc@@SAXPEAX@Z"(i8* %p)
-// G32-DAG: define dllexport void @_ZN11ExportAllocdlEPv(i8* %p)
-// G64-DAG: define dllexport void @_ZN11ExportAllocdlEPv(i8* %p)
+// M32-DAG: define dso_local dllexport void @"\01??3ExportAlloc@@SAXPAX@Z"(i8* %p)
+// M64-DAG: define dso_local dllexport void @"\01??3ExportAlloc@@SAXPEAX@Z"(i8* %p)
+// G32-DAG: define dso_local dllexport void @_ZN11ExportAllocdlEPv(i8* %p)
+// G64-DAG: define dso_local dllexport void @_ZN11ExportAllocdlEPv(i8* %p)
 void ExportAlloc::operator delete(void* p) { free(p); }
 
-// M32-DAG: define dllexport void @"\01??_VExportAlloc@@SAXPAX@Z"(i8* %p)
-// M64-DAG: define dllexport void @"\01??_VExportAlloc@@SAXPEAX@Z"(i8* %p)
-// G32-DAG: define dllexport void @_ZN11ExportAllocdaEPv(i8* %p)
-// G64-DAG: define dllexport void @_ZN11ExportAllocdaEPv(i8* %p)
+// M32-DAG: define dso_local dllexport void @"\01??_VExportAlloc@@SAXPAX@Z"(i8* %p)
+// M64-DAG: define dso_local dllexport void @"\01??_VExportAlloc@@SAXPEAX@Z"(i8* %p)
+// G32-DAG: define dso_local dllexport void @_ZN11ExportAllocdaEPv(i8* %p)
+// G64-DAG: define dso_local dllexport void @_ZN11ExportAllocdaEPv(i8* %p)
 void ExportAlloc::operator delete[](void* p) { free(p); }
 
 
@@ -501,125 +501,125 @@
 
 // Export implicit instantiation of an exported member function template.
 void useMemFunTmpl() {
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-  // M64-DAG: define weak_odr dllexport                void @"\01??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-  // G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
-  // G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+  // M64-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+  // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+  // G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
   MemFunTmpl().exportedNormal<ImplicitInst_Exported>();
 
-  // MSC-DAG: define weak_odr dllexport                void @"\01??$exportedStatic@UImplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
-  // GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ImplicitInst_ExportedEEvv()
+  // MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedStatic@UImplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
+  // GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ImplicitInst_ExportedEEvv()
   MemFunTmpl().exportedStatic<ImplicitInst_Exported>();
 }
 
 
 // Export explicit instantiation declaration of an exported member function
 // template.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
 extern template void MemFunTmpl::exportedNormal<ExplicitDecl_Exported>();
        template void MemFunTmpl::exportedNormal<ExplicitDecl_Exported>();
 
-// MSC-DAG: define weak_odr dllexport                void @"\01??$exportedStatic@UExplicitDecl_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ExplicitDecl_ExportedEEvv()
+// MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedStatic@UExplicitDecl_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ExplicitDecl_ExportedEEvv()
 extern template void MemFunTmpl::exportedStatic<ExplicitDecl_Exported>();
        template void MemFunTmpl::exportedStatic<ExplicitDecl_Exported>();
 
 
 // Export explicit instantiation definition of an exported member function
 // template.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
 template void MemFunTmpl::exportedNormal<ExplicitInst_Exported>();
 
-// MSC-DAG: define weak_odr dllexport                void @"\01??$exportedStatic@UExplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ExplicitInst_ExportedEEvv()
+// MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedStatic@UExplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedStaticI21ExplicitInst_ExportedEEvv()
 template void MemFunTmpl::exportedStatic<ExplicitInst_Exported>();
 
 
 // Export specialization of an exported member function template.
-// M32-DAG: define          dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define          dllexport                void @"\01??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define          dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define          dllexport                void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define          dso_local dllexport                void @"\01??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define          dso_local dllexport                void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
 template<> __declspec(dllexport) void MemFunTmpl::exportedNormal<ExplicitSpec_Def_Exported>() {}
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
 template<> __declspec(dllexport) inline void MemFunTmpl::exportedNormal<ExplicitSpec_InlineDef_Exported>() {}
 
-// MSC-DAG: define          dllexport                void @"\01??$exportedStatic@UExplicitSpec_Def_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define          dllexport                void @_ZN10MemFunTmpl14exportedStaticI25ExplicitSpec_Def_ExportedEEvv()
+// MSC-DAG: define          dso_local dllexport                void @"\01??$exportedStatic@UExplicitSpec_Def_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define          dso_local dllexport                void @_ZN10MemFunTmpl14exportedStaticI25ExplicitSpec_Def_ExportedEEvv()
 template<> __declspec(dllexport) void MemFunTmpl::exportedStatic<ExplicitSpec_Def_Exported>() {}
 
-// MSC-DAG: define weak_odr dllexport                void @"\01??$exportedStatic@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl14exportedStaticI31ExplicitSpec_InlineDef_ExportedEEvv()
+// MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$exportedStatic@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl14exportedStaticI31ExplicitSpec_InlineDef_ExportedEEvv()
 template<> __declspec(dllexport) inline void MemFunTmpl::exportedStatic<ExplicitSpec_InlineDef_Exported>() {}
 
 
 // Not exporting specialization of an exported member function template without
-// explicit dllexport.
-// M32-DAG: define                    x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_NotExported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define                                   void @"\01??$exportedNormal@UExplicitSpec_NotExported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define                    x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI24ExplicitSpec_NotExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define                                   void @_ZN10MemFunTmpl14exportedNormalI24ExplicitSpec_NotExportedEEvv(%struct.MemFunTmpl* %this)
+// explicit dso_local dllexport.
+// M32-DAG: define          dso_local x86_thiscallcc void @"\01??$exportedNormal@UExplicitSpec_NotExported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define          dso_local                void @"\01??$exportedNormal@UExplicitSpec_NotExported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define          dso_local x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI24ExplicitSpec_NotExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define          dso_local                void @_ZN10MemFunTmpl14exportedNormalI24ExplicitSpec_NotExportedEEvv(%struct.MemFunTmpl* %this)
 template<> void MemFunTmpl::exportedNormal<ExplicitSpec_NotExported>() {}
 
-// M32-DAG: define                                   void @"\01??$exportedStatic@UExplicitSpec_NotExported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define                                   void @_ZN10MemFunTmpl14exportedStaticI24ExplicitSpec_NotExportedEEvv()
+// M32-DAG: define          dso_local                void @"\01??$exportedStatic@UExplicitSpec_NotExported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define          dso_local                void @_ZN10MemFunTmpl14exportedStaticI24ExplicitSpec_NotExportedEEvv()
 template<> void MemFunTmpl::exportedStatic<ExplicitSpec_NotExported>() {}
 
 
 // Export explicit instantiation declaration of a non-exported member function
 // template.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(%struct.MemFunTmpl* %this)
 extern template __declspec(dllexport) void MemFunTmpl::normalDef<ExplicitDecl_Exported>();
        template __declspec(dllexport) void MemFunTmpl::normalDef<ExplicitDecl_Exported>();
 
-// M32-DAG: define weak_odr dllexport                void @"\01??$staticDef@UExplicitDecl_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9staticDefI21ExplicitDecl_ExportedEEvv()
+// M32-DAG: define weak_odr dso_local dllexport                void @"\01??$staticDef@UExplicitDecl_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9staticDefI21ExplicitDecl_ExportedEEvv()
 extern template __declspec(dllexport) void MemFunTmpl::staticDef<ExplicitDecl_Exported>();
        template __declspec(dllexport) void MemFunTmpl::staticDef<ExplicitDecl_Exported>();
 
 
 // Export explicit instantiation definition of a non-exported member function
 // template.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$normalDef@UExplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$normalDef@UExplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ExportedEEvv(%struct.MemFunTmpl* %this)
 template __declspec(dllexport) void MemFunTmpl::normalDef<ExplicitInst_Exported>();
 
-// MSC-DAG: define weak_odr dllexport                void @"\01??$staticDef@UExplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9staticDefI21ExplicitInst_ExportedEEvv()
+// MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$staticDef@UExplicitInst_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9staticDefI21ExplicitInst_ExportedEEvv()
 template __declspec(dllexport) void MemFunTmpl::staticDef<ExplicitInst_Exported>();
 
 
 // Export specialization of a non-exported member function template.
-// M32-DAG: define          dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define          dllexport                void @"\01??$normalDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define weak_odr dllexport                void @"\01??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define          dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define          dllexport                void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define          dso_local dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define          dso_local dllexport                void @"\01??$normalDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define weak_odr dso_local dllexport                void @"\01??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define          dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define          dso_local dllexport                void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(%struct.MemFunTmpl* %this)
 template<> __declspec(dllexport) void MemFunTmpl::normalDef<ExplicitSpec_Def_Exported>() {}
 template<> __declspec(dllexport) inline void MemFunTmpl::normalDef<ExplicitSpec_InlineDef_Exported>() {}
 
-// MSC-DAG: define          dllexport                void @"\01??$staticDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@SAXXZ"()
-// MSC-DAG: define weak_odr dllexport                void @"\01??$staticDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define          dllexport                void @_ZN10MemFunTmpl9staticDefI25ExplicitSpec_Def_ExportedEEvv()
-// GNU-DAG: define weak_odr dllexport                void @_ZN10MemFunTmpl9staticDefI31ExplicitSpec_InlineDef_ExportedEEvv()
+// MSC-DAG: define          dso_local dllexport                void @"\01??$staticDef@UExplicitSpec_Def_Exported@@@MemFunTmpl@@SAXXZ"()
+// MSC-DAG: define weak_odr dso_local dllexport                void @"\01??$staticDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define          dso_local dllexport                void @_ZN10MemFunTmpl9staticDefI25ExplicitSpec_Def_ExportedEEvv()
+// GNU-DAG: define weak_odr dso_local dllexport                void @_ZN10MemFunTmpl9staticDefI31ExplicitSpec_InlineDef_ExportedEEvv()
 template<> __declspec(dllexport) void MemFunTmpl::staticDef<ExplicitSpec_Def_Exported>() {}
 template<> __declspec(dllexport) inline void MemFunTmpl::staticDef<ExplicitSpec_InlineDef_Exported>() {}
 
@@ -633,49 +633,49 @@
 template<typename T> const int MemVarTmpl::ExportedStaticVar;
 
 // Export implicit instantiation of an exported member variable template.
-// MSC-DAG: @"\01??$ExportedStaticVar@UImplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ImplicitInst_ExportedEE       = weak_odr dllexport constant i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UImplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ImplicitInst_ExportedEE       = weak_odr dso_local dllexport constant i32 1, comdat, align 4
 int useMemVarTmpl() { return MemVarTmpl::ExportedStaticVar<ImplicitInst_Exported>; }
 
 // Export explicit instantiation declaration of an exported member variable
 // template.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitDecl_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ExplicitDecl_ExportedEE       = weak_odr dllexport constant i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitDecl_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ExplicitDecl_ExportedEE       = weak_odr dso_local dllexport constant i32 1, comdat, align 4
 extern template const int MemVarTmpl::ExportedStaticVar<ExplicitDecl_Exported>;
        template const int MemVarTmpl::ExportedStaticVar<ExplicitDecl_Exported>;
 
 // Export explicit instantiation definition of an exported member variable
 // template.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ExplicitInst_ExportedEE       = weak_odr dllexport constant i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI21ExplicitInst_ExportedEE       = weak_odr dso_local dllexport constant i32 1, comdat, align 4
 template const int MemVarTmpl::ExportedStaticVar<ExplicitInst_Exported>;
 
 // Export specialization of an exported member variable template.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI25ExplicitSpec_Def_ExportedEE       = dllexport constant i32 1, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI25ExplicitSpec_Def_ExportedEE       = dso_local dllexport constant i32 1, align 4
 template<> __declspec(dllexport) const int MemVarTmpl::ExportedStaticVar<ExplicitSpec_Def_Exported> = 1;
 
 // Not exporting specialization of an exported member variable template without
 // explicit dllexport.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_NotExported@@@MemVarTmpl@@2HB" = weak_odr constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI24ExplicitSpec_NotExportedEE       = constant i32 1, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_NotExported@@@MemVarTmpl@@2HB" = weak_odr dso_local constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI24ExplicitSpec_NotExportedEE       = dso_local constant i32 1, align 4
 template<> const int MemVarTmpl::ExportedStaticVar<ExplicitSpec_NotExported> = 1;
 
 
 // Export explicit instantiation declaration of a non-exported member variable
 // template.
-// MSC-DAG: @"\01??$StaticVar@UExplicitDecl_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI21ExplicitDecl_ExportedEE        = weak_odr dllexport constant i32 1, comdat, align 4
+// MSC-DAG: @"\01??$StaticVar@UExplicitDecl_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI21ExplicitDecl_ExportedEE        = weak_odr dso_local dllexport constant i32 1, comdat, align 4
 extern template __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitDecl_Exported>;
        template __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitDecl_Exported>;
 
 // Export explicit instantiation definition of a non-exported member variable
 // template.
-// MSC-DAG: @"\01??$StaticVar@UExplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI21ExplicitInst_ExportedEE        = weak_odr dllexport constant i32 1, comdat, align 4
+// MSC-DAG: @"\01??$StaticVar@UExplicitInst_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI21ExplicitInst_ExportedEE        = weak_odr dso_local dllexport constant i32 1, comdat, align 4
 template __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitInst_Exported>;
 
 // Export specialization of a non-exported member variable template.
-// MSC-DAG: @"\01??$StaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI25ExplicitSpec_Def_ExportedEE        = dllexport constant i32 1, align 4
+// MSC-DAG: @"\01??$StaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dso_local dllexport constant i32 1, comdat, align 4
+// GNU-DAG: @_ZN10MemVarTmpl9StaticVarI25ExplicitSpec_Def_ExportedEE        = dso_local dllexport constant i32 1, align 4
 template<> __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitSpec_Def_Exported> = 1;
diff --git a/test/CodeGenCXX/dllexport-ms-friend.cpp b/test/CodeGenCXX/dllexport-ms-friend.cpp
index 7bcf590..73379f3 100644
--- a/test/CodeGenCXX/dllexport-ms-friend.cpp
+++ b/test/CodeGenCXX/dllexport-ms-friend.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple %ms_abi_triple -fms-extensions -emit-llvm -O0 -o - %s | FileCheck %s
 
 // Friend functions defined in classes are emitted.
-// CHECK: define weak_odr dllexport void @"\01?friend1@@YAXXZ"()
+// CHECK: define weak_odr dso_local dllexport void @"\01?friend1@@YAXXZ"()
 struct FuncFriend1 {
   friend __declspec(dllexport) void friend1() {}
 };
diff --git a/test/CodeGenCXX/dllexport-pr26549.cpp b/test/CodeGenCXX/dllexport-pr26549.cpp
index ceb2e06..e9a62e2 100644
--- a/test/CodeGenCXX/dllexport-pr26549.cpp
+++ b/test/CodeGenCXX/dllexport-pr26549.cpp
@@ -3,7 +3,7 @@
 template <typename> struct MessageT { };
 extern template struct MessageT<int>;
 
-// CHECK: define weak_odr dllexport {{.*}} %struct.MessageT* @"\01??4?$MessageT@H@@QEAAAEAU0@AEBU0@@Z"(
+// CHECK: define weak_odr dso_local dllexport {{.*}} %struct.MessageT* @"\01??4?$MessageT@H@@QEAAAEAU0@AEBU0@@Z"(
 template struct __declspec(dllexport) MessageT<int>;
 // Previously we crashed when this dllexport was the last thing in the file.
 // DO NOT ADD MORE TESTS AFTER THIS LINE!
diff --git a/test/CodeGenCXX/dllexport-vtable-thunks.cpp b/test/CodeGenCXX/dllexport-vtable-thunks.cpp
index 81811ac..5294784 100644
--- a/test/CodeGenCXX/dllexport-vtable-thunks.cpp
+++ b/test/CodeGenCXX/dllexport-vtable-thunks.cpp
@@ -11,7 +11,7 @@
   virtual void m();
 };
 void C::m() {}
-// CHECK: define dllexport void @_ZThn8_N1C1mEv
+// CHECK: define dso_local dllexport void @_ZThn8_N1C1mEv
 
 struct Base {
   virtual void m();
@@ -20,4 +20,4 @@
   virtual void m();
 };
 void Derived::m() {}
-// CHECK: define dllexport void @_ZTv0_n24_N7Derived1mEv
+// CHECK: define dso_local dllexport void @_ZTv0_n24_N7Derived1mEv
diff --git a/test/CodeGenCXX/dllexport.cpp b/test/CodeGenCXX/dllexport.cpp
index 1c96de9..4155940 100644
--- a/test/CodeGenCXX/dllexport.cpp
+++ b/test/CodeGenCXX/dllexport.cpp
@@ -29,6 +29,8 @@
 // The vftable for struct W is comdat largest because we have RTTI.
 // M32-DAG: $"\01??_7W@@6B@" = comdat largest
 
+// M32-DAG: $"\01?smember@?$Base@H@PR32992@@0HA" = comdat any
+
 
 //===----------------------------------------------------------------------===//
 // Globals
@@ -40,42 +42,42 @@
 __declspec(dllexport) extern int ExternGlobalDecl;
 
 // dllexport implies a definition.
-// MSC-DAG: @"\01?GlobalDef@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @GlobalDef            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?GlobalDef@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @GlobalDef            = dso_local dllexport global i32 0, align 4
 __declspec(dllexport) int GlobalDef;
 
 // Export definition.
-// MSC-DAG: @"\01?GlobalInit1@@3HA" = dllexport global i32 1, align 4
-// GNU-DAG: @GlobalInit1            = dllexport global i32 1, align 4
+// MSC-DAG: @"\01?GlobalInit1@@3HA" = dso_local dllexport global i32 1, align 4
+// GNU-DAG: @GlobalInit1            = dso_local dllexport global i32 1, align 4
 __declspec(dllexport) int GlobalInit1 = 1;
 
-// MSC-DAG: @"\01?GlobalInit2@@3HA" = dllexport global i32 1, align 4
-// GNU-DAG: @GlobalInit2            = dllexport global i32 1, align 4
+// MSC-DAG: @"\01?GlobalInit2@@3HA" = dso_local dllexport global i32 1, align 4
+// GNU-DAG: @GlobalInit2            = dso_local dllexport global i32 1, align 4
 int __declspec(dllexport) GlobalInit2 = 1;
 
 // Declare, then export definition.
-// MSC-DAG: @"\01?GlobalDeclInit@@3HA" = dllexport global i32 1, align 4
-// GNU-DAG: @GlobalDeclInit            = dllexport global i32 1, align 4
+// MSC-DAG: @"\01?GlobalDeclInit@@3HA" = dso_local dllexport global i32 1, align 4
+// GNU-DAG: @GlobalDeclInit            = dso_local dllexport global i32 1, align 4
 __declspec(dllexport) extern int GlobalDeclInit;
 int GlobalDeclInit = 1;
 
 // Redeclarations
-// MSC-DAG: @"\01?GlobalRedecl1@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @GlobalRedecl1            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?GlobalRedecl1@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @GlobalRedecl1            = dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int GlobalRedecl1;
 __declspec(dllexport)        int GlobalRedecl1;
 
-// MSC-DAG: @"\01?GlobalRedecl2@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @GlobalRedecl2            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?GlobalRedecl2@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @GlobalRedecl2            = dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int GlobalRedecl2;
                              int GlobalRedecl2;
 
-// MSC-DAG: @"\01?ExternalGlobal@ns@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @_ZN2ns14ExternalGlobalE      = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?ExternalGlobal@ns@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @_ZN2ns14ExternalGlobalE      = dso_local dllexport global i32 0, align 4
 namespace ns { __declspec(dllexport) int ExternalGlobal; }
 
-// MSC-DAG: @"\01?ExternalAutoTypeGlobal@@3UExternal@@A" = dllexport global %struct.External zeroinitializer, align 4
-// GNU-DAG: @ExternalAutoTypeGlobal                      = dllexport global %struct.External zeroinitializer, align 4
+// MSC-DAG: @"\01?ExternalAutoTypeGlobal@@3UExternal@@A" = dso_local dllexport global %struct.External zeroinitializer, align 4
+// GNU-DAG: @ExternalAutoTypeGlobal                      = dso_local dllexport global %struct.External zeroinitializer, align 4
 __declspec(dllexport) auto ExternalAutoTypeGlobal = External();
 
 int f();
@@ -86,7 +88,7 @@
   return x++;
 };
 
-// MSC-DAG: @"\01?x@?1??inlineStaticLocalsFunc@@YAHXZ@4HA" = weak_odr dllexport global i32 0, comdat
+// MSC-DAG: @"\01?x@?1??inlineStaticLocalsFunc@@YAHXZ@4HA" = weak_odr dso_local dllexport global i32 0, comdat
 // MSC-DAG: @"\01??_B?1??inlineStaticLocalsFunc@@YAHXZ@51" = weak_odr dllexport global i32 0, comdat
 // Note: MinGW doesn't seem to export the static local here.
 inline int __declspec(dllexport) inlineStaticLocalsFunc() {
@@ -94,7 +96,35 @@
   return x++;
 }
 
+namespace PR32992 {
+// Static data members of a instantiated base class should be exported.
+template <class T>
+class Base {
+  virtual void myfunc() {}
+  static int smember;
+};
+// MSC-DAG: @"\01?smember@?$Base@H@PR32992@@0HA" = weak_odr dso_local dllexport global i32 77, comdat, align 4
+template <class T> int Base<T>::smember = 77;
+template <class T>
+class __declspec(dllexport) Derived2 : Base<T> {
+  void myfunc() {}
+};
+class Derived : public Derived2<int> {
+  void myfunc() {}
+};
+}  // namespace PR32992
 
+namespace PR32992_1 {
+namespace a { enum b { c }; }
+template <typename> class d {
+   static constexpr a::b e = a::c;
+};
+namespace f {
+  template <typename g = int> class h : d<g> {};
+}
+using f::h;
+class __declspec(dllexport) i : h<> {};
+}
 
 //===----------------------------------------------------------------------===//
 // Variable templates
@@ -102,55 +132,49 @@
 
 // Declarations are not exported.
 
-// dllexport implies a definition.
-// MSC-NOT: @"\01??$VarTmplDef@UExplicitInst_Exported@@@@3HA"
-// GNU-NOT: @_Z10VarTmplDefI21ExplicitInst_ExportedE
-template<typename T> __declspec(dllexport) int VarTmplDef;
-INSTVAR(VarTmplDef<ExplicitInst_Exported>)
-
-// MSC-DAG: @"\01??$VarTmplImplicitDef@UImplicitInst_Exported@@@@3HA" = external global
-// GNU-DAG: @_Z18VarTmplImplicitDefI21ImplicitInst_ExportedE          = external global
-template<typename T> __declspec(dllexport) int VarTmplImplicitDef;
+// MSC-DAG: @"\01??$VarTmplImplicitDef@UImplicitInst_Exported@@@@3HA" = external dso_local global
+// GNU-DAG: @_Z18VarTmplImplicitDefI21ImplicitInst_ExportedE          = external dso_local global
+template<typename T> __declspec(dllexport) extern int VarTmplImplicitDef;
 USEVAR(VarTmplImplicitDef<ImplicitInst_Exported>)
 
 // Export definition.
-// MSC-DAG: @"\01??$VarTmplInit1@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z12VarTmplInit1I21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmplInit1@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z12VarTmplInit1I21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template<typename T> __declspec(dllexport) int VarTmplInit1 = 1;
 INSTVAR(VarTmplInit1<ExplicitInst_Exported>)
 
-// MSC-DAG: @"\01??$VarTmplInit2@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z12VarTmplInit2I21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmplInit2@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z12VarTmplInit2I21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template<typename T> int __declspec(dllexport) VarTmplInit2 = 1;
 INSTVAR(VarTmplInit2<ExplicitInst_Exported>)
 
 // Declare, then export definition.
-// MSC-DAG: @"\01??$VarTmplDeclInit@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z15VarTmplDeclInitI21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmplDeclInit@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z15VarTmplDeclInitI21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template<typename T> __declspec(dllexport) extern int VarTmplDeclInit;
 template<typename T>                              int VarTmplDeclInit = 1;
 INSTVAR(VarTmplDeclInit<ExplicitInst_Exported>)
 
 // Redeclarations
-// MSC-DAG: @"\01??$VarTmplRedecl1@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z14VarTmplRedecl1I21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmplRedecl1@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z14VarTmplRedecl1I21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template<typename T> __declspec(dllexport) extern int VarTmplRedecl1;
 template<typename T> __declspec(dllexport)        int VarTmplRedecl1 = 1;
 INSTVAR(VarTmplRedecl1<ExplicitInst_Exported>)
 
-// MSC-DAG: @"\01??$VarTmplRedecl2@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z14VarTmplRedecl2I21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmplRedecl2@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z14VarTmplRedecl2I21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template<typename T> __declspec(dllexport) extern int VarTmplRedecl2;
 template<typename T>                              int VarTmplRedecl2 = 1;
 INSTVAR(VarTmplRedecl2<ExplicitInst_Exported>)
 
-// MSC-DAG: @"\01??$ExternalVarTmpl@UExplicitInst_Exported@@@ns@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_ZN2ns15ExternalVarTmplI21ExplicitInst_ExportedEE        = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExternalVarTmpl@UExplicitInst_Exported@@@ns@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_ZN2ns15ExternalVarTmplI21ExplicitInst_ExportedEE        = weak_odr dso_local dllexport global i32 1, comdat, align 4
 namespace ns { template<typename T> __declspec(dllexport) int ExternalVarTmpl = 1; }
 INSTVAR(ns::ExternalVarTmpl<ExplicitInst_Exported>)
 
-// MSC-DAG: @"\01??$ExternalAutoTypeVarTmpl@UExplicitInst_Exported@@@@3UExternal@@A" = weak_odr dllexport global %struct.External zeroinitializer, comdat, align 4
-// GNU-DAG: @_Z23ExternalAutoTypeVarTmplI21ExplicitInst_ExportedE                    = weak_odr dllexport global %struct.External zeroinitializer, comdat, align 4
+// MSC-DAG: @"\01??$ExternalAutoTypeVarTmpl@UExplicitInst_Exported@@@@3UExternal@@A" = weak_odr dso_local dllexport global %struct.External zeroinitializer, comdat, align 4
+// GNU-DAG: @_Z23ExternalAutoTypeVarTmplI21ExplicitInst_ExportedE                    = weak_odr dso_local dllexport global %struct.External zeroinitializer, comdat, align 4
 template<typename T> __declspec(dllexport) auto ExternalAutoTypeVarTmpl = External();
 template External ExternalAutoTypeVarTmpl<ExplicitInst_Exported>;
 
@@ -159,55 +183,55 @@
 template<typename T> __declspec(dllexport) int ExportedVarTmpl = 1;
 
 // Export implicit instantiation of an exported variable template.
-// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI21ImplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI21ImplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 USEVAR(ExportedVarTmpl<ImplicitInst_Exported>)
 
 // Export explicit instantiation declaration of an exported variable template.
-// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitDecl_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitDecl_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 extern template int ExportedVarTmpl<ExplicitDecl_Exported>;
        template int ExportedVarTmpl<ExplicitDecl_Exported>;
 
 // Export explicit instantiation definition of an exported variable template.
-// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitInst_ExportedE          = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UImplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitInst_ExportedE          = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template __declspec(dllexport) int ExportedVarTmpl<ExplicitInst_Exported>;
 
 // Export specialization of an exported variable template.
-// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_Exported@@@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitSpec_ExportedE          = dllexport global i32 0, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_Exported@@@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI21ExplicitSpec_ExportedE          = dso_local dllexport global i32 0, align 4
 template<> __declspec(dllexport) int ExportedVarTmpl<ExplicitSpec_Exported>;
 
-// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_Def_Exported@@@@3HA" = dllexport global i32 1, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI25ExplicitSpec_Def_ExportedE          = dllexport global i32 1, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_Def_Exported@@@@3HA" = dso_local dllexport global i32 1, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI25ExplicitSpec_Def_ExportedE          = dso_local dllexport global i32 1, align 4
 template<> __declspec(dllexport) int ExportedVarTmpl<ExplicitSpec_Def_Exported> = 1;
 
 // Not exporting specialization of an exported variable template without
 // explicit dllexport.
-// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_NotExported@@@@3HA" = global i32 0, align 4
-// GNU-DAG: @_Z15ExportedVarTmplI24ExplicitSpec_NotExportedE          = global i32 0, align 4
+// MSC-DAG: @"\01??$ExportedVarTmpl@UExplicitSpec_NotExported@@@@3HA" = dso_local global i32 0, align 4
+// GNU-DAG: @_Z15ExportedVarTmplI24ExplicitSpec_NotExportedE          = dso_local global i32 0, align 4
 template<> int ExportedVarTmpl<ExplicitSpec_NotExported>;
 
 
 // Export explicit instantiation declaration of a non-exported variable template.
-// MSC-DAG: @"\01??$VarTmpl@UExplicitDecl_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z7VarTmplI21ExplicitDecl_ExportedE           = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmpl@UExplicitDecl_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z7VarTmplI21ExplicitDecl_ExportedE           = weak_odr dso_local dllexport global i32 1, comdat, align 4
 extern template __declspec(dllexport) int VarTmpl<ExplicitDecl_Exported>;
        template __declspec(dllexport) int VarTmpl<ExplicitDecl_Exported>;
 
 // Export explicit instantiation definition of a non-exported variable template.
-// MSC-DAG: @"\01??$VarTmpl@UExplicitInst_Exported@@@@3HA" = weak_odr dllexport global i32 1, comdat, align 4
-// GNU-DAG: @_Z7VarTmplI21ExplicitInst_ExportedE           = weak_odr dllexport global i32 1, comdat, align 4
+// MSC-DAG: @"\01??$VarTmpl@UExplicitInst_Exported@@@@3HA" = weak_odr dso_local dllexport global i32 1, comdat, align 4
+// GNU-DAG: @_Z7VarTmplI21ExplicitInst_ExportedE           = weak_odr dso_local dllexport global i32 1, comdat, align 4
 template __declspec(dllexport) int VarTmpl<ExplicitInst_Exported>;
 
 // Export specialization of a non-exported variable template.
-// MSC-DAG: @"\01??$VarTmpl@UExplicitSpec_Exported@@@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @_Z7VarTmplI21ExplicitSpec_ExportedE           = dllexport global i32 0, align 4
+// MSC-DAG: @"\01??$VarTmpl@UExplicitSpec_Exported@@@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @_Z7VarTmplI21ExplicitSpec_ExportedE           = dso_local dllexport global i32 0, align 4
 template<> __declspec(dllexport) int VarTmpl<ExplicitSpec_Exported>;
 
-// MSC-DAG: @"\01??$VarTmpl@UExplicitSpec_Def_Exported@@@@3HA" = dllexport global i32 1, align 4
-// GNU-DAG: @_Z7VarTmplI25ExplicitSpec_Def_ExportedE           = dllexport global i32 1, align 4
+// MSC-DAG: @"\01??$VarTmpl@UExplicitSpec_Def_Exported@@@@3HA" = dso_local dllexport global i32 1, align 4
+// GNU-DAG: @_Z7VarTmplI25ExplicitSpec_Def_ExportedE           = dso_local dllexport global i32 1, align 4
 template<> __declspec(dllexport) int VarTmpl<ExplicitSpec_Def_Exported> = 1;
 
 
@@ -219,46 +243,46 @@
 // Declarations are not exported.
 
 // Export function definition.
-// MSC-DAG: define dllexport void @"\01?def@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z3defv()
+// MSC-DAG: define dso_local dllexport void @"\01?def@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z3defv()
 __declspec(dllexport) void def() {}
 
 // extern "C"
-// MSC-DAG: define dllexport void @externC()
-// GNU-DAG: define dllexport void @externC()
+// MSC-DAG: define dso_local dllexport void @externC()
+// GNU-DAG: define dso_local dllexport void @externC()
 extern "C" __declspec(dllexport) void externC() {}
 
 // Export inline function.
-// MSC-DAG: define weak_odr dllexport void @"\01?inlineFunc@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z10inlineFuncv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01?inlineFunc@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z10inlineFuncv()
 __declspec(dllexport) inline void inlineFunc() {}
 
-// MSC-DAG: define weak_odr dllexport void @"\01?inlineDecl@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z10inlineDeclv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01?inlineDecl@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z10inlineDeclv()
 __declspec(dllexport) inline void inlineDecl();
                              void inlineDecl() {}
 
-// MSC-DAG: define weak_odr dllexport void @"\01?inlineDef@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z9inlineDefv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01?inlineDef@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z9inlineDefv()
 __declspec(dllexport) void inlineDef();
                inline void inlineDef() {}
 
 // Redeclarations
-// MSC-DAG: define dllexport void @"\01?redecl1@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z7redecl1v()
+// MSC-DAG: define dso_local dllexport void @"\01?redecl1@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z7redecl1v()
 __declspec(dllexport) void redecl1();
 __declspec(dllexport) void redecl1() {}
 
-// MSC-DAG: define dllexport void @"\01?redecl2@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z7redecl2v()
+// MSC-DAG: define dso_local dllexport void @"\01?redecl2@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z7redecl2v()
 __declspec(dllexport) void redecl2();
                       void redecl2() {}
 
 // Friend functions
-// MSC-DAG: define dllexport void @"\01?friend1@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z7friend1v()
-// MSC-DAG: define dllexport void @"\01?friend2@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z7friend2v()
+// MSC-DAG: define dso_local dllexport void @"\01?friend1@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z7friend1v()
+// MSC-DAG: define dso_local dllexport void @"\01?friend2@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z7friend2v()
 struct FuncFriend {
   friend __declspec(dllexport) void friend1();
   friend __declspec(dllexport) void friend2();
@@ -267,13 +291,13 @@
                       void friend2() {}
 
 // Implicit declarations can be redeclared with dllexport.
-// MSC-DAG: define dllexport noalias i8* @"\01??2@{{YAPAXI|YAPEAX_K}}@Z"(
-// GNU-DAG: define dllexport noalias i8* @_Znw{{[yj]}}(
+// MSC-DAG: define dso_local dllexport noalias i8* @"\01??2@{{YAPAXI|YAPEAX_K}}@Z"(
+// GNU-DAG: define dso_local dllexport noalias i8* @_Znw{{[yj]}}(
 void* alloc(__SIZE_TYPE__ n);
 __declspec(dllexport) void* operator new(__SIZE_TYPE__ n) { return alloc(n); }
 
-// MSC-DAG: define dllexport void @"\01?externalFunc@ns@@YAXXZ"()
-// GNU-DAG: define dllexport void @_ZN2ns12externalFuncEv()
+// MSC-DAG: define dso_local dllexport void @"\01?externalFunc@ns@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_ZN2ns12externalFuncEv()
 namespace ns { __declspec(dllexport) void externalFunc() {} }
 
 
@@ -283,60 +307,60 @@
 //===----------------------------------------------------------------------===//
 
 // Export function template definition.
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplDef@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z11funcTmplDefI21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplDef@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z11funcTmplDefI21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) void funcTmplDef() {}
 INST(funcTmplDef<ExplicitInst_Exported>)
 
 // Export inline function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$inlineFuncTmpl1@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15inlineFuncTmpl1I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$inlineFuncTmpl1@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15inlineFuncTmpl1I21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) inline void inlineFuncTmpl1() {}
 INST(inlineFuncTmpl1<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$inlineFuncTmpl2@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15inlineFuncTmpl2I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$inlineFuncTmpl2@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15inlineFuncTmpl2I21ExplicitInst_ExportedEvv()
 template<typename T> inline void __attribute__((dllexport)) inlineFuncTmpl2() {}
 INST(inlineFuncTmpl2<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$inlineFuncTmplDecl@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z18inlineFuncTmplDeclI21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$inlineFuncTmplDecl@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z18inlineFuncTmplDeclI21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) inline void inlineFuncTmplDecl();
 template<typename T>                              void inlineFuncTmplDecl() {}
 INST(inlineFuncTmplDecl<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$inlineFuncTmplDef@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z17inlineFuncTmplDefI21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$inlineFuncTmplDef@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z17inlineFuncTmplDefI21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) void inlineFuncTmplDef();
 template<typename T>                inline void inlineFuncTmplDef() {}
 INST(inlineFuncTmplDef<ExplicitInst_Exported>)
 
 
 // Redeclarations
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplRedecl1@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15funcTmplRedecl1I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplRedecl1@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15funcTmplRedecl1I21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) void funcTmplRedecl1();
 template<typename T> __declspec(dllexport) void funcTmplRedecl1() {}
 INST(funcTmplRedecl1<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplRedecl2@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15funcTmplRedecl2I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplRedecl2@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15funcTmplRedecl2I21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) void funcTmplRedecl2();
 template<typename T>                       void funcTmplRedecl2() {}
 INST(funcTmplRedecl2<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplRedecl3@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15funcTmplRedecl3I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplRedecl3@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15funcTmplRedecl3I21ExplicitInst_ExportedEvv()
 template<typename T> __declspec(dllexport) void funcTmplRedecl3();
 template<typename T>                       void funcTmplRedecl3() {}
 INST(funcTmplRedecl3<ExplicitInst_Exported>)
 
 
 // Function template friends
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplFriend1@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15funcTmplFriend1I21ExplicitInst_ExportedEvv()
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmplFriend2@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z15funcTmplFriend2I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplFriend1@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15funcTmplFriend1I21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmplFriend2@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z15funcTmplFriend2I21ExplicitInst_ExportedEvv()
 struct FuncTmplFriend {
   template<typename T> friend __declspec(dllexport) void funcTmplFriend1();
   template<typename T> friend __declspec(dllexport) void funcTmplFriend2();
@@ -346,8 +370,8 @@
 INST(funcTmplFriend1<ExplicitInst_Exported>)
 INST(funcTmplFriend2<ExplicitInst_Exported>)
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$externalFuncTmpl@UExplicitInst_Exported@@@ns@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_ZN2ns16externalFuncTmplI21ExplicitInst_ExportedEEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$externalFuncTmpl@UExplicitInst_Exported@@@ns@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_ZN2ns16externalFuncTmplI21ExplicitInst_ExportedEEvv()
 namespace ns { template<typename T> __declspec(dllexport) void externalFuncTmpl() {} }
 INST(ns::externalFuncTmpl<ExplicitInst_Exported>)
 
@@ -356,55 +380,55 @@
 template<typename T> __declspec(dllexport) void exportedFuncTmpl() {}
 
 // Export implicit instantiation of an exported function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$exportedFuncTmpl@UImplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z16exportedFuncTmplI21ImplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$exportedFuncTmpl@UImplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z16exportedFuncTmplI21ImplicitInst_ExportedEvv()
 USE(exportedFuncTmpl<ImplicitInst_Exported>)
 
 // Export explicit instantiation declaration of an exported function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$exportedFuncTmpl@UExplicitDecl_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z16exportedFuncTmplI21ExplicitDecl_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$exportedFuncTmpl@UExplicitDecl_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z16exportedFuncTmplI21ExplicitDecl_ExportedEvv()
 extern template void exportedFuncTmpl<ExplicitDecl_Exported>();
        template void exportedFuncTmpl<ExplicitDecl_Exported>();
 
 // Export explicit instantiation definition of an exported function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$exportedFuncTmpl@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z16exportedFuncTmplI21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$exportedFuncTmpl@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z16exportedFuncTmplI21ExplicitInst_ExportedEvv()
 template void exportedFuncTmpl<ExplicitInst_Exported>();
 
 // Export specialization of an exported function template.
-// MSC-DAG: define dllexport void @"\01??$exportedFuncTmpl@UExplicitSpec_Def_Exported@@@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z16exportedFuncTmplI25ExplicitSpec_Def_ExportedEvv()
+// MSC-DAG: define dso_local dllexport void @"\01??$exportedFuncTmpl@UExplicitSpec_Def_Exported@@@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z16exportedFuncTmplI25ExplicitSpec_Def_ExportedEvv()
 template<> __declspec(dllexport) void exportedFuncTmpl<ExplicitSpec_Def_Exported>() {}
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$exportedFuncTmpl@UExplicitSpec_InlineDef_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z16exportedFuncTmplI31ExplicitSpec_InlineDef_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$exportedFuncTmpl@UExplicitSpec_InlineDef_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z16exportedFuncTmplI31ExplicitSpec_InlineDef_ExportedEvv()
 template<> __declspec(dllexport) inline void exportedFuncTmpl<ExplicitSpec_InlineDef_Exported>() {}
 
 // Not exporting specialization of an exported function template without
 // explicit dllexport.
-// MSC-DAG: define void @"\01??$exportedFuncTmpl@UExplicitSpec_NotExported@@@@YAXXZ"()
-// GNU-DAG: define void @_Z16exportedFuncTmplI24ExplicitSpec_NotExportedEvv()
+// MSC-DAG: define dso_local void @"\01??$exportedFuncTmpl@UExplicitSpec_NotExported@@@@YAXXZ"()
+// GNU-DAG: define dso_local void @_Z16exportedFuncTmplI24ExplicitSpec_NotExportedEvv()
 template<> void exportedFuncTmpl<ExplicitSpec_NotExported>() {}
 
 
 // Export explicit instantiation declaration of a non-exported function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmpl@UExplicitDecl_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z8funcTmplI21ExplicitDecl_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmpl@UExplicitDecl_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z8funcTmplI21ExplicitDecl_ExportedEvv()
 extern template __declspec(dllexport) void funcTmpl<ExplicitDecl_Exported>();
        template __declspec(dllexport) void funcTmpl<ExplicitDecl_Exported>();
 
 // Export explicit instantiation definition of a non-exported function template.
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmpl@UExplicitInst_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z8funcTmplI21ExplicitInst_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmpl@UExplicitInst_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z8funcTmplI21ExplicitInst_ExportedEvv()
 template __declspec(dllexport) void funcTmpl<ExplicitInst_Exported>();
 
 // Export specialization of a non-exported function template.
-// MSC-DAG: define dllexport void @"\01??$funcTmpl@UExplicitSpec_Def_Exported@@@@YAXXZ"()
-// GNU-DAG: define dllexport void @_Z8funcTmplI25ExplicitSpec_Def_ExportedEvv()
+// MSC-DAG: define dso_local dllexport void @"\01??$funcTmpl@UExplicitSpec_Def_Exported@@@@YAXXZ"()
+// GNU-DAG: define dso_local dllexport void @_Z8funcTmplI25ExplicitSpec_Def_ExportedEvv()
 template<> __declspec(dllexport) void funcTmpl<ExplicitSpec_Def_Exported>() {}
 
-// MSC-DAG: define weak_odr dllexport void @"\01??$funcTmpl@UExplicitSpec_InlineDef_Exported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr dllexport void @_Z8funcTmplI31ExplicitSpec_InlineDef_ExportedEvv()
+// MSC-DAG: define weak_odr dso_local dllexport void @"\01??$funcTmpl@UExplicitSpec_InlineDef_Exported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local dllexport void @_Z8funcTmplI31ExplicitSpec_InlineDef_ExportedEvv()
 template<> __declspec(dllexport) inline void funcTmpl<ExplicitSpec_InlineDef_Exported>() {}
 
 
@@ -414,61 +438,61 @@
 //===----------------------------------------------------------------------===//
 
 // dllexport takes precedence over the dllimport if both are specified.
-// MSC-DAG: @"\01?PrecedenceGlobal1A@@3HA" = dllexport global i32 0, align 4
-// MSC-DAG: @"\01?PrecedenceGlobal1B@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobal1A            = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobal1B            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobal1A@@3HA" = dso_local dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobal1B@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobal1A            = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobal1B            = dso_local dllexport global i32 0, align 4
 __attribute__((dllimport, dllexport))       int PrecedenceGlobal1A; // dllimport ignored
 __declspec(dllimport) __declspec(dllexport) int PrecedenceGlobal1B; // dllimport ignored
 
-// MSC-DAG: @"\01?PrecedenceGlobal2A@@3HA" = dllexport global i32 0, align 4
-// MSC-DAG: @"\01?PrecedenceGlobal2B@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobal2A            = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobal2B            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobal2A@@3HA" = dso_local dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobal2B@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobal2A            = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobal2B            = dso_local dllexport global i32 0, align 4
 __attribute__((dllexport, dllimport))       int PrecedenceGlobal2A; // dllimport ignored
 __declspec(dllexport) __declspec(dllimport) int PrecedenceGlobal2B; // dllimport ignored
 
-// MSC-DAG: @"\01?PrecedenceGlobalRedecl1@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobalRedecl1            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobalRedecl1@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobalRedecl1            = dso_local dllexport global i32 0, align 4
 __declspec(dllexport) extern int PrecedenceGlobalRedecl1;
 __declspec(dllimport)        int PrecedenceGlobalRedecl1 = 0;
 
-// MSC-DAG: @"\01?PrecedenceGlobalRedecl2@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobalRedecl2            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobalRedecl2@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobalRedecl2            = dso_local dllexport global i32 0, align 4
 __declspec(dllimport) extern int PrecedenceGlobalRedecl2;
 __declspec(dllexport)        int PrecedenceGlobalRedecl2;
 
-// MSC-DAG: @"\01?PrecedenceGlobalMixed1@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobalMixed1            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobalMixed1@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobalMixed1            = dso_local dllexport global i32 0, align 4
 __attribute__((dllexport)) extern int PrecedenceGlobalMixed1;
 __declspec(dllimport)             int PrecedenceGlobalMixed1 = 0;
 
-// MSC-DAG: @"\01?PrecedenceGlobalMixed2@@3HA" = dllexport global i32 0, align 4
-// GNU-DAG: @PrecedenceGlobalMixed2            = dllexport global i32 0, align 4
+// MSC-DAG: @"\01?PrecedenceGlobalMixed2@@3HA" = dso_local dllexport global i32 0, align 4
+// GNU-DAG: @PrecedenceGlobalMixed2            = dso_local dllexport global i32 0, align 4
 __attribute__((dllimport)) extern int PrecedenceGlobalMixed2;
 __declspec(dllexport)             int PrecedenceGlobalMixed2;
 
-// MSC-DAG: define dllexport void @"\01?precedence1A@@YAXXZ"
-// MSC-DAG: define dllexport void @"\01?precedence1B@@YAXXZ"
-// GNU-DAG: define dllexport void @_Z12precedence1Av()
-// GNU-DAG: define dllexport void @_Z12precedence1Bv()
+// MSC-DAG: define dso_local dllexport void @"\01?precedence1A@@YAXXZ"
+// MSC-DAG: define dso_local dllexport void @"\01?precedence1B@@YAXXZ"
+// GNU-DAG: define dso_local dllexport void @_Z12precedence1Av()
+// GNU-DAG: define dso_local dllexport void @_Z12precedence1Bv()
 void __attribute__((dllimport, dllexport))       precedence1A() {}
 void __declspec(dllimport) __declspec(dllexport) precedence1B() {}
 
-// MSC-DAG: define dllexport void @"\01?precedence2A@@YAXXZ"
-// MSC-DAG: define dllexport void @"\01?precedence2B@@YAXXZ"
-// GNU-DAG: define dllexport void @_Z12precedence2Av()
-// GNU-DAG: define dllexport void @_Z12precedence2Bv()
+// MSC-DAG: define dso_local dllexport void @"\01?precedence2A@@YAXXZ"
+// MSC-DAG: define dso_local dllexport void @"\01?precedence2B@@YAXXZ"
+// GNU-DAG: define dso_local dllexport void @_Z12precedence2Av()
+// GNU-DAG: define dso_local dllexport void @_Z12precedence2Bv()
 void __attribute__((dllexport, dllimport))       precedence2A() {}
 void __declspec(dllexport) __declspec(dllimport) precedence2B() {}
 
-// MSC-DAG: define dllexport void @"\01?precedenceRedecl1@@YAXXZ"
-// GNU-DAG: define dllexport void @_Z17precedenceRedecl1v()
+// MSC-DAG: define dso_local dllexport void @"\01?precedenceRedecl1@@YAXXZ"
+// GNU-DAG: define dso_local dllexport void @_Z17precedenceRedecl1v()
 void __declspec(dllimport) precedenceRedecl1();
 void __declspec(dllexport) precedenceRedecl1() {}
 
-// MSC-DAG: define dllexport void @"\01?precedenceRedecl2@@YAXXZ"
-// GNU-DAG: define dllexport void @_Z17precedenceRedecl2v()
+// MSC-DAG: define dso_local dllexport void @"\01?precedenceRedecl2@@YAXXZ"
+// GNU-DAG: define dso_local dllexport void @_Z17precedenceRedecl2v()
 void __declspec(dllexport) precedenceRedecl2();
 void __declspec(dllimport) precedenceRedecl2() {}
 
@@ -480,11 +504,11 @@
 
 struct S {
   void __declspec(dllexport) a() {}
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?a@S@@QAEXXZ"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?a@S@@QAEXXZ"
 
   struct T {
     void __declspec(dllexport) a() {}
-    // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?a@T@S@@QAEXXZ"
+    // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?a@T@S@@QAEXXZ"
   };
 };
 
@@ -493,11 +517,11 @@
   SomeTemplate(T o = T()) : o(o) {}
   T o;
 };
-// MSVC2015-DAG: define weak_odr dllexport {{.+}} @"\01??4?$SomeTemplate@H@@Q{{.+}}@$$Q{{.+}}@@Z"
-// MSVC2013-DAG: define weak_odr dllexport {{.+}} @"\01??4?$SomeTemplate@H@@Q{{.+}}0@A{{.+}}0@@Z"
+// MSVC2015-DAG: define weak_odr dso_local dllexport {{.+}} @"\01??4?$SomeTemplate@H@@Q{{.+}}@$$Q{{.+}}@@Z"
+// MSVC2013-DAG: define weak_odr dso_local dllexport {{.+}} @"\01??4?$SomeTemplate@H@@Q{{.+}}0@A{{.+}}0@@Z"
 struct __declspec(dllexport) InheritFromTemplate : SomeTemplate<int> {};
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 
 namespace PR23801 {
 template <typename>
@@ -514,32 +538,32 @@
 
 }
 //
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FB@PR23801@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FB@PR23801@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
 
 struct __declspec(dllexport) T {
   // Copy assignment operator:
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.T* @"\01??4T@@QAEAAU0@ABU0@@Z"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.T* @"\01??4T@@QAEAAU0@ABU0@@Z"
 
   // Explicitly defaulted copy constructur:
   T(const T&) = default;
-  // M32MSVC2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.T* @"\01??0T@@QAE@ABU0@@Z"
+  // M32MSVC2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.T* @"\01??0T@@QAE@ABU0@@Z"
 
   void a() {}
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?a@T@@QAEXXZ"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?a@T@@QAEXXZ"
 
   static int b;
-  // M32-DAG: @"\01?b@T@@2HA" = external global i32
+  // M32-DAG: @"\01?b@T@@2HA" = external dso_local global i32
 
   static int c;
-  // M32-DAG: @"\01?c@T@@2HA" = dllexport global i32 0, align 4
+  // M32-DAG: @"\01?c@T@@2HA" = dso_local dllexport global i32 0, align 4
 };
 
 USEVAR(T::b)
 int T::c;
 
 // Export template class with static member variable
-// MSC-DAG: @"\01?StaticClassVarExpTmplClass@?$TmplClass@H@@2HA" = weak_odr dllexport global i32 0, comdat, align 4
-// GNU-DAG: @_ZN9TmplClassIiE26StaticClassVarExpTmplClassE = weak_odr dllexport global i32 0, comdat, align 4
+// MSC-DAG: @"\01?StaticClassVarExpTmplClass@?$TmplClass@H@@2HA" = weak_odr dso_local dllexport global i32 0, comdat, align 4
+// GNU-DAG: @_ZN9TmplClassIiE26StaticClassVarExpTmplClassE = weak_odr dso_local dllexport global i32 0, comdat, align 4
 template<typename T>
 struct __declspec(dllexport) TmplClass
 {
@@ -550,8 +574,8 @@
 T TmplClass<T>::StaticClassVarExpTmplClass;
 
 // Export a definition of a template function.
-// MSC-DAG: define weak_odr dllexport i32 @"\01??$TypeFunTmpl@H@@YAHH@Z"
-// GNU-DAG: define weak_odr dllexport i32 @_Z11TypeFunTmplIiET_S0_
+// MSC-DAG: define weak_odr dso_local dllexport i32 @"\01??$TypeFunTmpl@H@@YAHH@Z"
+// GNU-DAG: define weak_odr dso_local dllexport i32 @_Z11TypeFunTmplIiET_S0_
 template<typename T>
 T __declspec(dllexport) TypeFunTmpl(T t) { return t + t; }
 
@@ -564,18 +588,18 @@
 template <typename T> struct __declspec(dllexport) U { void foo() {} };
 struct __declspec(dllexport) V : public U<int> { };
 // U<int>'s assignment operator is emitted.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.U* @"\01??4?$U@H@@QAEAAU0@ABU0@@Z"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.U* @"\01??4?$U@H@@QAEAAU0@ABU0@@Z"
 
 struct __declspec(dllexport) W { virtual void foo(); };
 void W::foo() {}
 // Default ctor:
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.W* @"\01??0W@@QAE@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.W* @"\01??0W@@QAE@XZ"
 // Copy ctor:
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.W* @"\01??0W@@QAE@ABU0@@Z"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.W* @"\01??0W@@QAE@ABU0@@Z"
 // vftable:
 // M32-DAG: [[W_VTABLE:@.*]] = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"\01??_R4W@@6B@" to i8*), i8* bitcast (void (%struct.W*)* @"\01?foo@W@@UAEXXZ" to i8*)] }, comdat($"\01??_7W@@6B@")
 // M32-DAG: @"\01??_7W@@6B@" = dllexport unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* [[W_VTABLE]], i32 0, i32 0, i32 1)
-// G32-DAG: @_ZTV1W = dllexport unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1W to i8*), i8* bitcast (void (%struct.W*)* @_ZN1W3fooEv to i8*)] }
+// G32-DAG: @_ZTV1W = dso_local dllexport unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1W to i8*), i8* bitcast (void (%struct.W*)* @_ZN1W3fooEv to i8*)] }
 
 struct __declspec(dllexport) X : public virtual W {};
 // vbtable:
@@ -583,19 +607,19 @@
 
 struct __declspec(dllexport) Y {
   // Move assignment operator:
-  // MSVC2015-DAG: define weak_odr dllexport {{.+}} @"\01??4Y@@Q{{.+}}@$$Q{{.+}}@@Z"
-  // MSVC2013-DAG: define weak_odr dllexport {{.+}} @"\01??4Y@@Q{{.+}}0@A{{.+}}0@@Z"
+  // MSVC2015-DAG: define weak_odr dso_local dllexport {{.+}} @"\01??4Y@@Q{{.+}}@$$Q{{.+}}@@Z"
+  // MSVC2013-DAG: define weak_odr dso_local dllexport {{.+}} @"\01??4Y@@Q{{.+}}0@A{{.+}}0@@Z"
 
   int x;
 };
 
 struct __declspec(dllexport) Z { virtual ~Z() {} };
 // The scalar deleting dtor does not get exported:
-// M32-DAG: define linkonce_odr x86_thiscallcc i8* @"\01??_GZ@@UAEPAXI@Z"
+// M32-DAG: define linkonce_odr dso_local x86_thiscallcc i8* @"\01??_GZ@@UAEPAXI@Z"
 
 
 // The user-defined dtor does get exported:
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??1Z@@UAE@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1Z@@UAE@XZ"
 
 namespace UseDtorAlias {
   struct __declspec(dllexport) A { ~A(); };
@@ -603,39 +627,39 @@
   A::~A() { }
   B::~B() { }
   // Emit a alias definition of B's constructor.
-  // M32-DAG: @"\01??1B@UseDtorAlias@@QAE@XZ" = dllexport alias {{.*}} @"\01??1A@UseDtorAlias@@QAE@XZ"
+  // M32-DAG: @"\01??1B@UseDtorAlias@@QAE@XZ" = dso_local dllexport alias {{.*}} @"\01??1A@UseDtorAlias@@QAE@XZ"
 }
 
 struct __declspec(dllexport) DefaultedCtorsDtors {
   DefaultedCtorsDtors() = default;
-  // M32MSVC2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.DefaultedCtorsDtors* @"\01??0DefaultedCtorsDtors@@QAE@XZ"
+  // M32MSVC2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.DefaultedCtorsDtors* @"\01??0DefaultedCtorsDtors@@QAE@XZ"
   ~DefaultedCtorsDtors() = default;
-  // M32MSVC2013-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??1DefaultedCtorsDtors@@QAE@XZ"
+  // M32MSVC2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1DefaultedCtorsDtors@@QAE@XZ"
 };
 
 // Export defaulted member function definitions declared inside class.
 struct __declspec(dllexport) ExportDefaultedInclassDefs {
   ExportDefaultedInclassDefs() = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M64VS2013-DAG: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
-  // M64VS2015-NOT: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* returned %this)
 
   ~ExportDefaultedInclassDefs() = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M64VS2013-DAG: define weak_odr dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
-  // M64VS2015-NOT: define weak_odr dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportDefaultedInclassDefs@@QAE@XZ"(%struct.ExportDefaultedInclassDefs* %this)
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                void @"\01??1ExportDefaultedInclassDefs@@QEAA@XZ"(%struct.ExportDefaultedInclassDefs* %this)
 
   ExportDefaultedInclassDefs(const ExportDefaultedInclassDefs&) = default;
-  // M32VS2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64VS2013-DAG: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M32VS2015-NOT: define weak_odr dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64VS2015-NOT: define weak_odr dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32VS2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64VS2013-DAG: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32VS2015-NOT: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QAE@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64VS2015-NOT: define weak_odr dso_local dllexport                %struct.ExportDefaultedInclassDefs* @"\01??0ExportDefaultedInclassDefs@@QEAA@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* returned %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
 
   ExportDefaultedInclassDefs& operator=(const ExportDefaultedInclassDefs&) = default;
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
-  // M64-DAG: define weak_odr dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QAEAAU0@ABU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
+  // M64-DAG: define weak_odr dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ExportDefaultedInclassDefs* @"\01??4ExportDefaultedInclassDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ExportDefaultedInclassDefs* %this, %struct.ExportDefaultedInclassDefs* dereferenceable({{[0-9]+}}))
 };
 
 namespace ReferencedInlineMethodInNestedClass {
@@ -648,8 +672,8 @@
     };
     T *t;
   };
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?foo@S@ReferencedInlineMethodInNestedClass@@QAEXXZ"
-  // M32-DAG: define linkonce_odr x86_thiscallcc void @"\01?bar@T@S@ReferencedInlineMethodInNestedClass@@QAEXXZ"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?foo@S@ReferencedInlineMethodInNestedClass@@QAEXXZ"
+  // M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?bar@T@S@ReferencedInlineMethodInNestedClass@@QAEXXZ"
 }
 
 // MS ignores DLL attributes on partial specializations.
@@ -657,37 +681,37 @@
 template <typename T> struct __declspec(dllexport) PartiallySpecializedClassTemplate<T*> { void f(); };
 template <typename T> void PartiallySpecializedClassTemplate<T*>::f() {}
 USEMEMFUNC(PartiallySpecializedClassTemplate<void*>, f);
-// M32-DAG: define linkonce_odr x86_thiscallcc void @"\01?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ"
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv
+// M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv
 
 // Attributes on explicit specializations are honored.
 template <typename T> struct ExplicitlySpecializedClassTemplate {};
 template <> struct __declspec(dllexport) ExplicitlySpecializedClassTemplate<void*> { void f(); };
 void ExplicitlySpecializedClassTemplate<void*>::f() {}
 USEMEMFUNC(ExplicitlySpecializedClassTemplate<void*>, f);
-// M32-DAG: define dllexport x86_thiscallcc void @"\01?f@?$ExplicitlySpecializedClassTemplate@PAX@@QAEXXZ"
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv
+// M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01?f@?$ExplicitlySpecializedClassTemplate@PAX@@QAEXXZ"
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv
 
 // MS inherits DLL attributes to partial specializations.
 template <typename T> struct __declspec(dllexport) PartiallySpecializedExportedClassTemplate {};
 template <typename T> struct PartiallySpecializedExportedClassTemplate<T*> { void f() {} };
 USEMEMFUNC(PartiallySpecializedExportedClassTemplate<void*>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$PartiallySpecializedExportedClassTemplate@PAX@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN41PartiallySpecializedExportedClassTemplateIPvE1fEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$PartiallySpecializedExportedClassTemplate@PAX@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN41PartiallySpecializedExportedClassTemplateIPvE1fEv
 
 // MS ignores DLL attributes on partial specializations; inheritance still works though.
 template <typename T> struct __declspec(dllexport) PartiallySpecializedExportedClassTemplate2 {};
 template <typename T> struct __declspec(dllimport) PartiallySpecializedExportedClassTemplate2<T*> { void f(); };
 template <typename T> void PartiallySpecializedExportedClassTemplate2<T*>::f() {}
 USEMEMFUNC(PartiallySpecializedExportedClassTemplate2<void*>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$PartiallySpecializedExportedClassTemplate2@PAX@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$PartiallySpecializedExportedClassTemplate2@PAX@@QAEXXZ"
 // G32-DAG: declare dllimport x86_thiscallcc void @_ZN42PartiallySpecializedExportedClassTemplate2IPvE1fEv
 
 // Attributes on the instantiation take precedence over attributes on the template.
 template <typename T> struct __declspec(dllimport) ExplicitlyInstantiatedWithDifferentAttr { void f() {} };
 template struct __declspec(dllexport) ExplicitlyInstantiatedWithDifferentAttr<int>;
 USEMEMFUNC(ExplicitlyInstantiatedWithDifferentAttr<int>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ExplicitlyInstantiatedWithDifferentAttr@H@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ExplicitlyInstantiatedWithDifferentAttr@H@@QAEXXZ"
 
 // Don't create weak dllexport aliases. (PR21373)
 struct NonExportedBaseClass {
@@ -696,62 +720,62 @@
 NonExportedBaseClass::~NonExportedBaseClass() {}
 
 struct __declspec(dllexport) ExportedDerivedClass : NonExportedBaseClass {};
-// M32-DAG: weak_odr dllexport x86_thiscallcc void @"\01??1ExportedDerivedClass@@UAE@XZ"
+// M32-DAG: weak_odr dso_local dllexport x86_thiscallcc void @"\01??1ExportedDerivedClass@@UAE@XZ"
 
 // Do not assert about generating code for constexpr functions twice during explicit instantiation (PR21718).
 template <typename T> struct ExplicitInstConstexprMembers {
   // Copy assignment operator
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable(1) %struct.ExplicitInstConstexprMembers* @"\01??4?$ExplicitInstConstexprMembers@X@@QAEAAU0@ABU0@@Z"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable(1) %struct.ExplicitInstConstexprMembers* @"\01??4?$ExplicitInstConstexprMembers@X@@QAEAAU0@ABU0@@Z"
 
   constexpr ExplicitInstConstexprMembers() {}
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExplicitInstConstexprMembers* @"\01??0?$ExplicitInstConstexprMembers@X@@QAE@XZ"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExplicitInstConstexprMembers* @"\01??0?$ExplicitInstConstexprMembers@X@@QAE@XZ"
 
   ExplicitInstConstexprMembers(const ExplicitInstConstexprMembers&) = default;
-  // M32MSVC2013-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExplicitInstConstexprMembers* @"\01??0?$ExplicitInstConstexprMembers@X@@QAE@ABU0@@Z"
+  // M32MSVC2013-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExplicitInstConstexprMembers* @"\01??0?$ExplicitInstConstexprMembers@X@@QAE@ABU0@@Z"
 
   constexpr int f() const { return 42; }
-  // M32-DAG: define weak_odr dllexport x86_thiscallcc i32 @"\01?f@?$ExplicitInstConstexprMembers@X@@QBEHXZ"
+  // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc i32 @"\01?f@?$ExplicitInstConstexprMembers@X@@QBEHXZ"
 };
 template struct __declspec(dllexport) ExplicitInstConstexprMembers<void>;
 
 template <typename T> struct ExplicitInstantiationDeclTemplate { void f() {} };
 extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate<int>;
 USEMEMFUNC(ExplicitInstantiationDeclTemplate<int>, f);
-// M32-DAG: {{declare|define available_externally}} x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclTemplate@H@@QAEXXZ"
+// M32-DAG: {{declare|define available_externally}} dso_local x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclTemplate@H@@QAEXXZ"
 
 template <typename T> struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate { void f() {} };
 extern template struct ExplicitInstantiationDeclExportedTemplate<int>;
 USEMEMFUNC(ExplicitInstantiationDeclExportedTemplate<int>, f);
-// M32-DAG: {{declare|define available_externally}} x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclExportedTemplate@H@@QAEXXZ"
+// M32-DAG: {{declare|define available_externally}} dso_local x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclExportedTemplate@H@@QAEXXZ"
 
 template <typename T> struct ExplicitInstantiationDeclExportedDefTemplate { void f() {} ExplicitInstantiationDeclExportedDefTemplate() {} };
 extern template struct ExplicitInstantiationDeclExportedDefTemplate<int>;
 template struct __declspec(dllexport) ExplicitInstantiationDeclExportedDefTemplate<int>;
 USEMEMFUNC(ExplicitInstantiationDeclExportedDefTemplate<int>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAEXXZ"
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.ExplicitInstantiationDeclExportedDefTemplate* @"\01??0?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAE@XZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN44ExplicitInstantiationDeclExportedDefTemplateIiE1fEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %struct.ExplicitInstantiationDeclExportedDefTemplate* @"\01??0?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAE@XZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN44ExplicitInstantiationDeclExportedDefTemplateIiE1fEv
 
 template <typename T> struct ImplicitInstantiationExportedExplicitInstantiationDefTemplate { virtual void f() {} };
 ImplicitInstantiationExportedExplicitInstantiationDefTemplate<int> ImplicitInstantiationExportedExplicitInstantiationDefTemplateInstance;
 template struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInstantiationDefTemplate<int>;
 USEMEMFUNC(ImplicitInstantiationExportedExplicitInstantiationDefTemplate<int>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExportedExplicitInstantiationDefTemplate@H@@UAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN61ImplicitInstantiationExportedExplicitInstantiationDefTemplateIiE1fEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExportedExplicitInstantiationDefTemplate@H@@UAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN61ImplicitInstantiationExportedExplicitInstantiationDefTemplateIiE1fEv
 
 template <typename T> struct __declspec(dllexport) ImplicitInstantiationExplicitInstantiationDefExportedTemplate { virtual void f() {} };
 ImplicitInstantiationExplicitInstantiationDefExportedTemplate<int> ImplicitInstantiationExplicitInstantiationDefExportedTemplateInstance;
 template struct ImplicitInstantiationExplicitInstantiationDefExportedTemplate<int>;
 USEMEMFUNC(ImplicitInstantiationExplicitInstantiationDefExportedTemplate<int>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExplicitInstantiationDefExportedTemplate@H@@UAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN61ImplicitInstantiationExplicitInstantiationDefExportedTemplateIiE1fEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExplicitInstantiationDefExportedTemplate@H@@UAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN61ImplicitInstantiationExplicitInstantiationDefExportedTemplateIiE1fEv
 
 template <typename T> struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate { virtual void f() {} };
 ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate<int> ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateInstance;
 template struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate<int>;
 USEMEMFUNC(ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate<int>, f);
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate@H@@UAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN69ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateIiE1fEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate@H@@UAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN69ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateIiE1fEv
 
 namespace { struct InternalLinkageType {}; }
 struct __declspec(dllexport) PR23308 {
@@ -765,14 +789,14 @@
 template <typename T> struct PR23770DerivedTemplate : PR23770BaseTemplate<int> {};
 extern template struct PR23770DerivedTemplate<int>;
 template struct __declspec(dllexport) PR23770DerivedTemplate<int>;
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$PR23770BaseTemplate@H@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$PR23770BaseTemplate@H@@QAEXXZ"
 
 namespace InClassInits {
 
 struct __declspec(dllexport) S {
   int x = 42;
 };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::S"* @"\01??0S@InClassInits@@QAE@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::S"* @"\01??0S@InClassInits@@QAE@XZ"
 
 // dllexport an already instantiated class template.
 template <typename T> struct Base {
@@ -780,7 +804,7 @@
 };
 Base<int> base;
 struct __declspec(dllexport) T : Base<int> { };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::Base"* @"\01??0?$Base@H@InClassInits@@QAE@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::Base"* @"\01??0?$Base@H@InClassInits@@QAE@XZ"
 
 struct A { A(int); };
 struct __declspec(dllexport) U {
@@ -788,7 +812,7 @@
   U(A = 0) {}
   int x = 0;
 };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::U"* @"\01??0U@InClassInits@@QAE@UA@1@@Z"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::U"* @"\01??0U@InClassInits@@QAE@UA@1@@Z"
 
 struct Evil {
   template <typename T> struct Base {
@@ -800,7 +824,7 @@
   // the default ctor must still be delayed.
   struct __declspec(dllexport) T : Base<int> {};
 };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::Evil::Base"* @"\01??0?$Base@H@Evil@InClassInits@@QAE@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::Evil::Base"* @"\01??0?$Base@H@Evil@InClassInits@@QAE@XZ"
 
 template <typename T> struct Foo {};
 template <typename T> struct Bar {
@@ -812,7 +836,7 @@
 // After parsing Baz, in ActOnFinishCXXNonNestedClass we would synthesize
 // Baz's operator=, causing instantiation of Foo<int> after which
 // ActOnFinishCXXNonNestedClass is called, and we would bite our own tail.
-// M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable(1) %"struct.InClassInits::Baz"* @"\01??4Baz@InClassInits@@QAEAAU01@ABU01@@Z"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc dereferenceable(1) %"struct.InClassInits::Baz"* @"\01??4Baz@InClassInits@@QAEAAU01@ABU01@@Z"
 }
 
 // We had an issue where instantiating A would force emission of B's delayed
@@ -823,28 +847,28 @@
   B(int = 0) {}
   A<int> m_fn1() {}
 };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FB@pr26490@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01??_FB@pr26490@@QAEXXZ"
 }
 
-// dllexport trumps dllexport on an explicit instantiation.
+// dllexport trumps dllimport on an explicit instantiation.
 template <typename T> struct ExplicitInstantiationTwoAttributes { void f() {} };
 template struct __declspec(dllexport) __declspec(dllimport) ExplicitInstantiationTwoAttributes<int>;
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$ExplicitInstantiationTwoAttributes@H@@QAEXXZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?f@?$ExplicitInstantiationTwoAttributes@H@@QAEXXZ"
 
 namespace pr34849 {
 // Specializations of exported class template member functions get exported.
 template <typename> struct __declspec(dllexport) ExportedClassTemplate { void foo(); };
 template<> void ExportedClassTemplate<int>::foo() {}
 template struct ExportedClassTemplate<int>;
-// M32-DAG: define dllexport x86_thiscallcc void @"\01?foo@?$ExportedClassTemplate@H@pr34849@@QAEXXZ"
+// M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01?foo@?$ExportedClassTemplate@H@pr34849@@QAEXXZ"
 
 // Specializations of exported class member template functions do not get exported.
 struct __declspec(dllexport) ExportedClass { template <typename> void bar() ; };
 template<> void ExportedClass::bar<int>() {}
-// M32-DAG: define x86_thiscallcc void @"\01??$bar@H@ExportedClass@pr34849@@QAEXXZ"
+// M32-DAG: define dso_local x86_thiscallcc void @"\01??$bar@H@ExportedClass@pr34849@@QAEXXZ"
 template <typename> struct __declspec(dllexport) ExportedClassTemplate2 { template <typename> void baz(); };
 template<> template<> void ExportedClassTemplate2<int>::baz<int>() {}
-// M32-DAG: define x86_thiscallcc void @"\01??$baz@H@?$ExportedClassTemplate2@H@pr34849@@QAEXXZ"
+// M32-DAG: define dso_local x86_thiscallcc void @"\01??$baz@H@?$ExportedClassTemplate2@H@pr34849@@QAEXXZ"
 }
 
 //===----------------------------------------------------------------------===//
@@ -880,14 +904,14 @@
 // MS: ClassTemplate<int> gets exported.
 struct __declspec(dllexport) DerivedFromTemplate : public ClassTemplate<int> {};
 USEMEMFUNC(DerivedFromTemplate, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@H@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@H@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv
 
 // ExportedTemplate is explicitly exported.
 struct __declspec(dllexport) DerivedFromExportedTemplate : public ExportedClassTemplate<int> {};
 USEMEMFUNC(DerivedFromExportedTemplate, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ExportedClassTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ExportedClassTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv
 
 // ImportedClassTemplate is explicitly imported.
 struct __declspec(dllexport) DerivedFromImportedTemplate : public ImportedClassTemplate<int> {};
@@ -899,27 +923,27 @@
 struct DerivedFromTemplateD : public ClassTemplate<double> {};
 struct __declspec(dllexport) DerivedFromTemplateD2 : public ClassTemplate<double> {};
 USEMEMFUNC(DerivedFromTemplateD2, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@N@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIdE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@N@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIdE4funcEv
 
 // MS: Base class already instantiated with different dll attribute.
 struct __declspec(dllimport) DerivedFromTemplateB : public ClassTemplate<bool> {};
 struct __declspec(dllexport) DerivedFromTemplateB2 : public ClassTemplate<bool> {};
 USEMEMFUNC(DerivedFromTemplateB2, func)
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"\01?func@?$ClassTemplate@_N@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv
 
 // Base class already specialized without dll attribute.
 struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate<int> {};
 USEMEMFUNC(DerivedFromExplicitlySpecializedTemplate, func)
-// M32-DAG: define x86_thiscallcc void @"\01?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ"
-// G32-DAG: define x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv
+// M32-DAG: define dso_local x86_thiscallcc void @"\01?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ"
+// G32-DAG: define dso_local x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv
 
 // Base class alredy specialized with export attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : public ExplicitlyExportSpecializedTemplate<int> {};
 USEMEMFUNC(DerivedFromExplicitlyExportSpecializedTemplate, func)
-// M32-DAG: define dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ"
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv
+// M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ"
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv
 
 // Base class already specialized with import attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyImportSpecializedTemplate : public ExplicitlyImportSpecializedTemplate<int> {};
@@ -930,14 +954,14 @@
 // Base class already instantiated without dll attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate<int> {};
 USEMEMFUNC(DerivedFromExplicitlyInstantiatedTemplate, func)
-// M32-DAG: define weak_odr x86_thiscallcc void @"\01?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local x86_thiscallcc void @"\01?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv
 
 // Base class already instantiated with export attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyExportInstantiatedTemplate : public ExplicitlyExportInstantiatedTemplate<int> {};
 USEMEMFUNC(DerivedFromExplicitlyExportInstantiatedTemplate, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv
 
 // Base class already instantiated with import attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyImportInstantiatedTemplate : public ExplicitlyImportInstantiatedTemplate<int> {};
@@ -950,15 +974,15 @@
 template <typename T> struct MiddleClass : public TopClass<T> { };
 struct __declspec(dllexport) BottomClass : public MiddleClass<int> { };
 USEMEMFUNC(BottomClass, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$TopClass@H@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN8TopClassIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$TopClass@H@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN8TopClassIiE4funcEv
 
 template <typename T> struct ExplicitInstantiationDeclTemplateBase { void func() {} };
 extern template struct ExplicitInstantiationDeclTemplateBase<int>;
 struct __declspec(dllexport) DerivedFromExplicitInstantiationDeclTemplateBase : public ExplicitInstantiationDeclTemplateBase<int> {};
 template struct ExplicitInstantiationDeclTemplateBase<int>;
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ExplicitInstantiationDeclTemplateBase@H@@QAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ExplicitInstantiationDeclTemplateBase@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv
 
 // PR26076
 struct LayerSelectionBound;
@@ -971,8 +995,8 @@
   };
   LayerSelection foo;
 };
-// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.LayerTreeImpl::ElementLayers"* @"\01??0ElementLayers@LayerTreeImpl@@QAE@XZ"
-// M64-DAG: define weak_odr dllexport %"struct.LayerTreeImpl::ElementLayers"* @"\01??0ElementLayers@LayerTreeImpl@@QEAA@XZ"
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.LayerTreeImpl::ElementLayers"* @"\01??0ElementLayers@LayerTreeImpl@@QAE@XZ"
+// M64-DAG: define weak_odr dso_local dllexport %"struct.LayerTreeImpl::ElementLayers"* @"\01??0ElementLayers@LayerTreeImpl@@QEAA@XZ"
 
 class __declspec(dllexport) ACE_Shared_Object {
 public:
@@ -980,6 +1004,6 @@
 };
 class __declspec(dllexport) ACE_Service_Object : public ACE_Shared_Object {};
 // Implicit move constructor declaration.
-// MSVC2015-DAG: define weak_odr dllexport {{.+}}ACE_Service_Object@@Q{{.+}}@$$Q
+// MSVC2015-DAG: define weak_odr dso_local dllexport {{.+}}ACE_Service_Object@@Q{{.+}}@$$Q
 // The declarations should not be exported.
-// MSVC2013-NOT: define weak_odr dllexport {{.+}}ACE_Service_Object@@Q{{.+}}@$$Q
+// MSVC2013-NOT: define weak_odr dso_local dllexport {{.+}}ACE_Service_Object@@Q{{.+}}@$$Q
diff --git a/test/CodeGenCXX/dllimport-dtor-thunks.cpp b/test/CodeGenCXX/dllimport-dtor-thunks.cpp
index b381fff..a1ff34c 100644
--- a/test/CodeGenCXX/dllimport-dtor-thunks.cpp
+++ b/test/CodeGenCXX/dllimport-dtor-thunks.cpp
@@ -39,11 +39,11 @@
 
 // The destructors are called in reverse order of construction. Only the third
 // needs the complete destructor (_D).
-// CHECK-LABEL: define void @testit()
+// CHECK-LABEL: define dso_local void @testit()
 // CHECK:  call void @"\01??_DImportVBaseOverrideVDtor@@QEAAXXZ"(%struct.ImportVBaseOverrideVDtor* %{{.*}})
 // CHECK:  call void @"\01??1ImportOverrideVDtor@@UEAA@XZ"(%struct.ImportOverrideVDtor* %{{.*}})
 // CHECK:  call void @"\01??1ImportIntroVDtor@@UEAA@XZ"(%struct.ImportIntroVDtor* %{{.*}})
 
-// CHECK-LABEL: define linkonce_odr void @"\01??_DImportVBaseOverrideVDtor@@QEAAXXZ"
+// CHECK-LABEL: define linkonce_odr dso_local void @"\01??_DImportVBaseOverrideVDtor@@QEAAXXZ"
 // CHECK-LABEL: declare dllimport void @"\01??1ImportOverrideVDtor@@UEAA@XZ"
 // CHECK-LABEL: declare dllimport void @"\01??1ImportIntroVDtor@@UEAA@XZ"
diff --git a/test/CodeGenCXX/dllimport-members.cpp b/test/CodeGenCXX/dllimport-members.cpp
index ff78683..c0986d5 100644
--- a/test/CodeGenCXX/dllimport-members.cpp
+++ b/test/CodeGenCXX/dllimport-members.cpp
@@ -63,86 +63,86 @@
 struct ImportMembers {
   struct Nested;
 
-  // M32-DAG: define  dllexport   x86_thiscallcc void @"\01?normalDef@ImportMembers@@QAEXXZ"(%struct.ImportMembers* %this)
-  // M64-DAG: define  dllexport                  void @"\01?normalDef@ImportMembers@@QEAAXXZ"(%struct.ImportMembers* %this)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalDecl@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?normalDecl@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInclass@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInclass@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInlineDef@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInlineDef@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInlineDecl@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInlineDecl@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
-  // G32-DAG: define              x86_thiscallcc void @_ZN13ImportMembers9normalDefEv(%struct.ImportMembers* %this)
-  // G64-DAG: define                             void @_ZN13ImportMembers9normalDefEv(%struct.ImportMembers* %this)
-  // G32-DAG: declare dllimport   x86_thiscallcc void @_ZN13ImportMembers10normalDeclEv(%struct.ImportMembers*)
-  // G64-DAG: declare dllimport                  void @_ZN13ImportMembers10normalDeclEv(%struct.ImportMembers*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers13normalInclassEv(%struct.ImportMembers* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers15normalInlineDefEv(%struct.ImportMembers* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers16normalInlineDeclEv(%struct.ImportMembers* %this)
+  // M32-DAG: define  dso_local dllexport   x86_thiscallcc void @"\01?normalDef@ImportMembers@@QAEXXZ"(%struct.ImportMembers* %this)
+  // M64-DAG: define  dso_local dllexport                  void @"\01?normalDef@ImportMembers@@QEAAXXZ"(%struct.ImportMembers* %this)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalDecl@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalDecl@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInclass@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInclass@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInlineDef@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInlineDef@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInlineDecl@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInlineDecl@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
+  // G32-DAG: define  dso_local             x86_thiscallcc void @_ZN13ImportMembers9normalDefEv(%struct.ImportMembers* %this)
+  // G64-DAG: define  dso_local                            void @_ZN13ImportMembers9normalDefEv(%struct.ImportMembers* %this)
+  // G32-DAG: declare           dllimport   x86_thiscallcc void @_ZN13ImportMembers10normalDeclEv(%struct.ImportMembers*)
+  // G64-DAG: declare           dllimport                  void @_ZN13ImportMembers10normalDeclEv(%struct.ImportMembers*)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers13normalInclassEv(%struct.ImportMembers* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers15normalInlineDefEv(%struct.ImportMembers* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                 void @_ZN13ImportMembers16normalInlineDeclEv(%struct.ImportMembers* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInclass@ImportMembers@@QAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInlineDef@ImportMembers@@QAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInlineDecl@ImportMembers@@QAEXXZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv(
   __declspec(dllimport)                void normalDef(); // dllimport ignored
   __declspec(dllimport)                void normalDecl();
   __declspec(dllimport)                void normalInclass() {}
   __declspec(dllimport)                void normalInlineDef();
   __declspec(dllimport)         inline void normalInlineDecl();
 
-  // M32-DAG: define  dllexport   x86_thiscallcc void @"\01?virtualDef@ImportMembers@@UAEXXZ"(%struct.ImportMembers* %this)
-  // M64-DAG: define  dllexport                  void @"\01?virtualDef@ImportMembers@@UEAAXXZ"(%struct.ImportMembers* %this)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualDecl@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualDecl@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInclass@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInclass@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInlineDef@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInlineDef@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInlineDecl@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInlineDecl@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
-  // G32-DAG: define              x86_thiscallcc void @_ZN13ImportMembers10virtualDefEv(%struct.ImportMembers* %this)
-  // G64-DAG: define                             void @_ZN13ImportMembers10virtualDefEv(%struct.ImportMembers* %this)
+  // M32-DAG: define  dso_local dllexport   x86_thiscallcc void @"\01?virtualDef@ImportMembers@@UAEXXZ"(%struct.ImportMembers* %this)
+  // M64-DAG: define  dso_local dllexport                  void @"\01?virtualDef@ImportMembers@@UEAAXXZ"(%struct.ImportMembers* %this)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualDecl@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualDecl@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInclass@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInclass@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInlineDef@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInlineDef@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInlineDecl@ImportMembers@@UAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInlineDecl@ImportMembers@@UEAAXXZ"(%struct.ImportMembers*)
+  // G32-DAG: define  dso_local             x86_thiscallcc void @_ZN13ImportMembers10virtualDefEv(%struct.ImportMembers* %this)
+  // G64-DAG: define  dso_local                            void @_ZN13ImportMembers10virtualDefEv(%struct.ImportMembers* %this)
   // G32-DAG: declare dllimport   x86_thiscallcc void @_ZN13ImportMembers11virtualDeclEv(%struct.ImportMembers*)
   // G64-DAG: declare dllimport                  void @_ZN13ImportMembers11virtualDeclEv(%struct.ImportMembers*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers14virtualInclassEv(%struct.ImportMembers* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers16virtualInlineDefEv(%struct.ImportMembers* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv(%struct.ImportMembers* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers17virtualInlineDeclEv(%struct.ImportMembers* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers14virtualInclassEv(%struct.ImportMembers* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers16virtualInlineDefEv(%struct.ImportMembers* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv(%struct.ImportMembers* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers17virtualInlineDeclEv(%struct.ImportMembers* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInclass@ImportMembers@@UAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInlineDef@ImportMembers@@UAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInlineDecl@ImportMembers@@UAEXXZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv(
   __declspec(dllimport) virtual        void virtualDef(); // dllimport ignored
   __declspec(dllimport) virtual        void virtualDecl();
   __declspec(dllimport) virtual        void virtualInclass() {}
   __declspec(dllimport) virtual        void virtualInlineDef();
   __declspec(dllimport) virtual inline void virtualInlineDecl();
 
-  // MSC-DAG: define  dllexport                void @"\01?staticDef@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticDecl@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInclass@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInlineDef@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInlineDecl@ImportMembers@@SAXXZ"()
-  // GNU-DAG: define                           void @_ZN13ImportMembers9staticDefEv()
-  // GNU-DAG: declare dllimport                void @_ZN13ImportMembers10staticDeclEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers13staticInclassEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers15staticInlineDefEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers16staticInlineDeclEv()
+  // MSC-DAG: define  dso_local dllexport                void @"\01?staticDef@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticDecl@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInclass@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInlineDef@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInlineDecl@ImportMembers@@SAXXZ"()
+  // GNU-DAG: define  dso_local                          void @_ZN13ImportMembers9staticDefEv()
+  // GNU-DAG: declare           dllimport                void @_ZN13ImportMembers10staticDeclEv()
+  // GNU-DAG: define linkonce_odr dso_local               void @_ZN13ImportMembers13staticInclassEv()
+  // GNU-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers15staticInlineDefEv()
+  // GNU-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers16staticInlineDeclEv()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInclass@ImportMembers@@SAXXZ"()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInlineDef@ImportMembers@@SAXXZ"()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInlineDecl@ImportMembers@@SAXXZ"()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers13staticInclassEv()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers15staticInlineDefEv()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers16staticInlineDeclEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers13staticInclassEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers15staticInlineDefEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers16staticInlineDeclEv()
   __declspec(dllimport) static         void staticDef(); // dllimport ignored
   __declspec(dllimport) static         void staticDecl();
   __declspec(dllimport) static         void staticInclass() {}
@@ -169,10 +169,10 @@
   __declspec(dllimport)                void privateNormalDecl();
   __declspec(dllimport) static         void privateStaticDecl();
 
-  // M32-DAG: declare           x86_thiscallcc void @"\01?ignored@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
-  // M64-DAG: declare                          void @"\01?ignored@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
-  // G32-DAG: declare           x86_thiscallcc void @_ZN13ImportMembers7ignoredEv(%struct.ImportMembers*)
-  // G64-DAG: declare                          void @_ZN13ImportMembers7ignoredEv(%struct.ImportMembers*)
+  // M32-DAG: declare dso_local          x86_thiscallcc void @"\01?ignored@ImportMembers@@QAEXXZ"(%struct.ImportMembers*)
+  // M64-DAG: declare dso_local                         void @"\01?ignored@ImportMembers@@QEAAXXZ"(%struct.ImportMembers*)
+  // G32-DAG: declare dso_local          x86_thiscallcc void @_ZN13ImportMembers7ignoredEv(%struct.ImportMembers*)
+  // G64-DAG: declare dso_local                         void @_ZN13ImportMembers7ignoredEv(%struct.ImportMembers*)
 public:
   void ignored();
 
@@ -235,87 +235,87 @@
 
 // Import individual members of a nested class.
 struct ImportMembers::Nested {
-  // M32-DAG: define  dllexport   x86_thiscallcc void @"\01?normalDef@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"* %this)
-  // M64-DAG: define  dllexport                  void @"\01?normalDef@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"* %this)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalDecl@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?normalDecl@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInclass@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInclass@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInlineDef@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInlineDef@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?normalInlineDecl@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?normalInlineDecl@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // G32-DAG: define              x86_thiscallcc void @_ZN13ImportMembers6Nested9normalDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define                             void @_ZN13ImportMembers6Nested9normalDefEv(%"struct.ImportMembers::Nested"* %this)
+  // M32-DAG: define  dso_local dllexport   x86_thiscallcc void @"\01?normalDef@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"* %this)
+  // M64-DAG: define  dso_local dllexport                  void @"\01?normalDef@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"* %this)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalDecl@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalDecl@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInclass@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInclass@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInlineDef@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInlineDef@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?normalInlineDecl@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?normalInlineDecl@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // G32-DAG: define  dso_local             x86_thiscallcc void @_ZN13ImportMembers6Nested9normalDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define  dso_local                            void @_ZN13ImportMembers6Nested9normalDefEv(%"struct.ImportMembers::Nested"* %this)
   // G32-DAG: declare dllimport   x86_thiscallcc void @_ZN13ImportMembers6Nested10normalDeclEv(%"struct.ImportMembers::Nested"*)
   // G64-DAG: declare dllimport                  void @_ZN13ImportMembers6Nested10normalDeclEv(%"struct.ImportMembers::Nested"*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested13normalInclassEv(%"struct.ImportMembers::Nested"* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested15normalInlineDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested16normalInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested13normalInclassEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested15normalInlineDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested16normalInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInclass@Nested@ImportMembers@@QAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInlineDef@Nested@ImportMembers@@QAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?normalInlineDecl@Nested@ImportMembers@@QAEXXZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv(
   __declspec(dllimport)                void normalDef(); // dllimport ignored
   __declspec(dllimport)                void normalDecl();
   __declspec(dllimport)                void normalInclass() {}
   __declspec(dllimport)                void normalInlineDef();
   __declspec(dllimport)         inline void normalInlineDecl();
 
-  // M32-DAG: define  dllexport   x86_thiscallcc void @"\01?virtualDef@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"* %this)
-  // M64-DAG: define  dllexport                  void @"\01?virtualDef@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"* %this)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualDecl@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualDecl@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInclass@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInclass@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInlineDef@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInlineDef@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // M32-DAG: declare dllimport   x86_thiscallcc void @"\01?virtualInlineDecl@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare dllimport                  void @"\01?virtualInlineDecl@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // G32-DAG: define              x86_thiscallcc void @_ZN13ImportMembers6Nested10virtualDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define                             void @_ZN13ImportMembers6Nested10virtualDefEv(%"struct.ImportMembers::Nested"* %this)
+  // M32-DAG: define  dso_local dllexport   x86_thiscallcc void @"\01?virtualDef@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"* %this)
+  // M64-DAG: define  dso_local dllexport                  void @"\01?virtualDef@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"* %this)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualDecl@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualDecl@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInclass@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInclass@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInlineDef@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInlineDef@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare           dllimport   x86_thiscallcc void @"\01?virtualInlineDecl@Nested@ImportMembers@@UAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare           dllimport                  void @"\01?virtualInlineDecl@Nested@ImportMembers@@UEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // G32-DAG: define  dso_local             x86_thiscallcc void @_ZN13ImportMembers6Nested10virtualDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define  dso_local                            void @_ZN13ImportMembers6Nested10virtualDefEv(%"struct.ImportMembers::Nested"* %this)
   // G32-DAG: declare dllimport   x86_thiscallcc void @_ZN13ImportMembers6Nested11virtualDeclEv(%"struct.ImportMembers::Nested"*)
   // G64-DAG: declare dllimport                  void @_ZN13ImportMembers6Nested11virtualDeclEv(%"struct.ImportMembers::Nested"*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested14virtualInclassEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested14virtualInclassEv(%"struct.ImportMembers::Nested"* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested16virtualInlineDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested16virtualInlineDefEv(%"struct.ImportMembers::Nested"* %this)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested14virtualInclassEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested14virtualInclassEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16virtualInlineDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested16virtualInlineDefEv(%"struct.ImportMembers::Nested"* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(%"struct.ImportMembers::Nested"* %this)
 
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInclass@Nested@ImportMembers@@UAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInlineDef@Nested@ImportMembers@@UAEXXZ"(
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01?virtualInlineDecl@Nested@ImportMembers@@UAEXXZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc                   void @_ZN13ImportMembers6Nested14virtualInclassEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc                   void @_ZN13ImportMembers6Nested16virtualInlineDefEv(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc                   void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc                   void @_ZN13ImportMembers6Nested14virtualInclassEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc                   void @_ZN13ImportMembers6Nested16virtualInlineDefEv(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc                   void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(
   __declspec(dllimport) virtual        void virtualDef(); // dllimport ignored
   __declspec(dllimport) virtual        void virtualDecl();
   __declspec(dllimport) virtual        void virtualInclass() {}
   __declspec(dllimport) virtual        void virtualInlineDef();
   __declspec(dllimport) virtual inline void virtualInlineDecl();
 
-  // MSC-DAG: define  dllexport                void @"\01?staticDef@Nested@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticDecl@Nested@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInclass@Nested@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInlineDef@Nested@ImportMembers@@SAXXZ"()
-  // MSC-DAG: declare dllimport                void @"\01?staticInlineDecl@Nested@ImportMembers@@SAXXZ"()
-  // GNU-DAG: define                           void @_ZN13ImportMembers6Nested9staticDefEv()
-  // GNU-DAG: declare dllimport                void @_ZN13ImportMembers6Nested10staticDeclEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested13staticInclassEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested15staticInlineDefEv()
-  // GNU-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested16staticInlineDeclEv()
+  // MSC-DAG: define  dso_local dllexport                void @"\01?staticDef@Nested@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticDecl@Nested@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInclass@Nested@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInlineDef@Nested@ImportMembers@@SAXXZ"()
+  // MSC-DAG: declare           dllimport                void @"\01?staticInlineDecl@Nested@ImportMembers@@SAXXZ"()
+  // GNU-DAG: define  dso_local                          void @_ZN13ImportMembers6Nested9staticDefEv()
+  // GNU-DAG: declare           dllimport                void @_ZN13ImportMembers6Nested10staticDeclEv()
+  // GNU-DAG: define linkonce_odr dso_local               void @_ZN13ImportMembers6Nested13staticInclassEv()
+  // GNU-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers6Nested15staticInlineDefEv()
+  // GNU-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers6Nested16staticInlineDeclEv()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInclass@Nested@ImportMembers@@SAXXZ"()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInlineDef@Nested@ImportMembers@@SAXXZ"()
   // MO1-DAG: define available_externally dllimport void @"\01?staticInlineDecl@Nested@ImportMembers@@SAXXZ"()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested13staticInclassEv()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested15staticInlineDefEv()
-  // GO1-DAG: define linkonce_odr              void @_ZN13ImportMembers6Nested16staticInlineDeclEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers6Nested13staticInclassEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers6Nested15staticInlineDefEv()
+  // GO1-DAG: define linkonce_odr dso_local              void @_ZN13ImportMembers6Nested16staticInlineDeclEv()
   __declspec(dllimport) static         void staticDef(); // dllimport ignored
   __declspec(dllimport) static         void staticDecl();
   __declspec(dllimport) static         void staticInclass() {}
@@ -342,10 +342,10 @@
   __declspec(dllimport)                void privateNormalDecl();
   __declspec(dllimport) static         void privateStaticDecl();
 
-  // M32-DAG: declare           x86_thiscallcc void @"\01?ignored@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
-  // M64-DAG: declare                          void @"\01?ignored@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
-  // G32-DAG: declare           x86_thiscallcc void @_ZN13ImportMembers6Nested7ignoredEv(%"struct.ImportMembers::Nested"*)
-  // G64-DAG: declare                          void @_ZN13ImportMembers6Nested7ignoredEv(%"struct.ImportMembers::Nested"*)
+  // M32-DAG: declare dso_local           x86_thiscallcc void @"\01?ignored@Nested@ImportMembers@@QAEXXZ"(%"struct.ImportMembers::Nested"*)
+  // M64-DAG: declare dso_local                          void @"\01?ignored@Nested@ImportMembers@@QEAAXXZ"(%"struct.ImportMembers::Nested"*)
+  // G32-DAG: declare dso_local           x86_thiscallcc void @_ZN13ImportMembers6Nested7ignoredEv(%"struct.ImportMembers::Nested"*)
+  // G64-DAG: declare dso_local                          void @_ZN13ImportMembers6Nested7ignoredEv(%"struct.ImportMembers::Nested"*)
 public:
   void ignored();
 
@@ -451,50 +451,50 @@
 struct ImportInlineSpecials {
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@XZ"(%struct.ImportInlineSpecials* returned)
   // M64-DAG: declare dllimport                  %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QEAA@XZ"(%struct.ImportInlineSpecials* returned)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev(%struct.ImportInlineSpecials* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN20ImportInlineSpecialsC1Ev(%struct.ImportInlineSpecials* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev(%struct.ImportInlineSpecials* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN20ImportInlineSpecialsC1Ev(%struct.ImportInlineSpecials* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@XZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev(
   __declspec(dllimport) ImportInlineSpecials() {}
 
   // M32-DAG: declare dllimport   x86_thiscallcc void @"\01??1ImportInlineSpecials@@QAE@XZ"(%struct.ImportInlineSpecials*)
   // M64-DAG: declare dllimport                  void @"\01??1ImportInlineSpecials@@QEAA@XZ"(%struct.ImportInlineSpecials*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev(%struct.ImportInlineSpecials* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN20ImportInlineSpecialsD1Ev(%struct.ImportInlineSpecials* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev(%struct.ImportInlineSpecials* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN20ImportInlineSpecialsD1Ev(%struct.ImportInlineSpecials* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01??1ImportInlineSpecials@@QAE@XZ"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev(
   __declspec(dllimport) ~ImportInlineSpecials() {}
 
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@ABU0@@Z"(%struct.ImportInlineSpecials* returned, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QEAA@AEBU0@@Z"(%struct.ImportInlineSpecials* returned, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                void @_ZN20ImportInlineSpecialsC1ERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN20ImportInlineSpecialsC1ERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@ABU0@@Z"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_(
   __declspec(dllimport) inline ImportInlineSpecials(const ImportInlineSpecials&);
 
   // M32-DAG: declare dllimport   x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QAEAAU0@ABU0@@Z"(%struct.ImportInlineSpecials*, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QEAAAEAU0@AEBU0@@Z"(%struct.ImportInlineSpecials*, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QAEAAU0@ABU0@@Z"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSERKS_(
   __declspec(dllimport) ImportInlineSpecials& operator=(const ImportInlineSpecials&);
 
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@$$QAU0@@Z"(%struct.ImportInlineSpecials* returned, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QEAA@$$QEAU0@@Z"(%struct.ImportInlineSpecials* returned, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                void @_ZN20ImportInlineSpecialsC1EOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN20ImportInlineSpecialsC1EOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportInlineSpecials* @"\01??0ImportInlineSpecials@@QAE@$$QAU0@@Z"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_(
   __declspec(dllimport) ImportInlineSpecials(ImportInlineSpecials&&) {}
 
   // M32-DAG: declare dllimport   x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QAEAAU0@$$QAU0@@Z"(%struct.ImportInlineSpecials*, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ImportInlineSpecials*, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(%struct.ImportInlineSpecials* %this, %struct.ImportInlineSpecials* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @"\01??4ImportInlineSpecials@@QAEAAU0@$$QAU0@@Z"(
-  // GO1-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportInlineSpecials* @_ZN20ImportInlineSpecialsaSEOS_(
   __declspec(dllimport) ImportInlineSpecials& operator=(ImportInlineSpecials&&) { return *this; }
 };
 ImportInlineSpecials::ImportInlineSpecials(const ImportInlineSpecials&) {}
@@ -506,50 +506,50 @@
 struct ImportDefaulted {
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@XZ"(%struct.ImportDefaulted* returned)
   // M64-DAG: declare dllimport                  %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QEAA@XZ"(%struct.ImportDefaulted* returned)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void                     @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
-  // G64-DAG: define linkonce_odr                void                     @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void                     @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
+  // G64-DAG: define linkonce_odr dso_local                void                     @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@XZ"(%struct.ImportDefaulted* returned %this)
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1Ev(%struct.ImportDefaulted* %this)
   __declspec(dllimport) ImportDefaulted() = default;
 
   // M32-DAG: declare dllimport   x86_thiscallcc void @"\01??1ImportDefaulted@@QAE@XZ"(%struct.ImportDefaulted*)
   // M64-DAG: declare dllimport                  void @"\01??1ImportDefaulted@@QEAA@XZ"(%struct.ImportDefaulted*)
-  // G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
-  // G64-DAG: define linkonce_odr                void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
+  // G64-DAG: define linkonce_odr dso_local                void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
   // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"\01??1ImportDefaulted@@QAE@XZ"(%struct.ImportDefaulted* %this)
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(%struct.ImportDefaulted* %this)
   __declspec(dllimport) ~ImportDefaulted() = default;
 
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@ABU0@@Z"(%struct.ImportDefaulted* returned, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QEAA@AEBU0@@Z"(%struct.ImportDefaulted* returned, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc void                     @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                void                     @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void                     @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                void                     @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@ABU0@@Z"(%struct.ImportDefaulted* returned %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1ERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   __declspec(dllimport) ImportDefaulted(const ImportDefaulted&) = default;
 
   // M32-DAG: declare dllimport   x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QAEAAU0@ABU0@@Z"(%struct.ImportDefaulted*, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QEAAAEAU0@AEBU0@@Z"(%struct.ImportDefaulted*, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QAEAAU0@ABU0@@Z"(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // GO1-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSERKS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   __declspec(dllimport) ImportDefaulted& operator=(const ImportDefaulted&) = default;
 
   // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@$$QAU0@@Z"(%struct.ImportDefaulted* returned, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QEAA@$$QEAU0@@Z"(%struct.ImportDefaulted* returned, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc void                     @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                void                     @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void                     @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                void                     @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc %struct.ImportDefaulted* @"\01??0ImportDefaulted@@QAE@$$QAU0@@Z"(%struct.ImportDefaulted* returned %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // GO1-DAG: define linkonce_odr x86_thiscallcc void @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1EOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   __declspec(dllimport) ImportDefaulted(ImportDefaulted&&) = default;
 
   // M32-DAG: declare dllimport   x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QAEAAU0@$$QAU0@@Z"(%struct.ImportDefaulted*, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // M64-DAG: declare dllimport                  dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ImportDefaulted*, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G32-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // G64-DAG: define linkonce_odr                dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G32-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // G64-DAG: define linkonce_odr dso_local                dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   // MO1-DAG: define available_externally dllimport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @"\01??4ImportDefaulted@@QAEAAU0@$$QAU0@@Z"(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
-  // GO1-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
+  // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaulted* @_ZN15ImportDefaultedaSEOS_(%struct.ImportDefaulted* %this, %struct.ImportDefaulted* dereferenceable({{[0-9]+}}))
   __declspec(dllimport) ImportDefaulted& operator=(ImportDefaulted&&) = default;
 
   ForceNonTrivial v; // ensure special members are non-trivial
@@ -585,28 +585,28 @@
 
 // M32-DAG: declare dllimport   x86_thiscallcc %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QAE@ABU0@@Z"(%struct.ImportDefaultedDefs* returned, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 // M64-DAG: declare dllimport                  %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QEAA@AEBU0@@Z"(%struct.ImportDefaultedDefs* returned, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN19ImportDefaultedDefsC1ERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define linkonce_odr                void @_ZN19ImportDefaultedDefsC1ERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC1ERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define linkonce_odr dso_local                 void @_ZN19ImportDefaultedDefsC1ERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 inline ImportDefaultedDefs::ImportDefaultedDefs(const ImportDefaultedDefs&) = default;
 
 // M32-DAG: declare dllimport   x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QAEAAU0@ABU0@@Z"(%struct.ImportDefaultedDefs*, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 // M64-DAG: declare dllimport                  dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QEAAAEAU0@AEBU0@@Z"(%struct.ImportDefaultedDefs*, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define linkonce_odr                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define linkonce_odr dso_local                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSERKS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 inline ImportDefaultedDefs& ImportDefaultedDefs::operator=(const ImportDefaultedDefs&) = default;
 
-// M32-DAG: define dllexport x86_thiscallcc %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QAE@$$QAU0@@Z"(%struct.ImportDefaultedDefs* returned %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define dllexport                %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QEAA@$$QEAU0@@Z"(%struct.ImportDefaultedDefs* returned %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define x86_thiscallcc void @_ZN19ImportDefaultedDefsC1EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define                void @_ZN19ImportDefaultedDefsC1EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define x86_thiscallcc void @_ZN19ImportDefaultedDefsC2EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define                void @_ZN19ImportDefaultedDefsC2EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define dso_local dllexport x86_thiscallcc %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QAE@$$QAU0@@Z"(%struct.ImportDefaultedDefs* returned %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define dso_local dllexport                %struct.ImportDefaultedDefs* @"\01??0ImportDefaultedDefs@@QEAA@$$QEAU0@@Z"(%struct.ImportDefaultedDefs* returned %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC1EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local                void @_ZN19ImportDefaultedDefsC1EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC2EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local                void @_ZN19ImportDefaultedDefsC2EOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 ImportDefaultedDefs::ImportDefaultedDefs(ImportDefaultedDefs&&) = default; // dllimport ignored
 
-// M32-DAG: define dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// M64-DAG: define dllexport                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G32-DAG: define x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSEOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
-// G64-DAG: define                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSEOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M32-DAG: define dso_local dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// M64-DAG: define dso_local dllexport                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @"\01??4ImportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G32-DAG: define dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSEOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
+// G64-DAG: define dso_local                dereferenceable({{[0-9]+}}) %struct.ImportDefaultedDefs* @_ZN19ImportDefaultedDefsaSEOS_(%struct.ImportDefaultedDefs* %this, %struct.ImportDefaultedDefs* dereferenceable({{[0-9]+}}))
 ImportDefaultedDefs& ImportDefaultedDefs::operator=(ImportDefaultedDefs&&) = default; // dllimport ignored
 
 USESPECIALS(ImportDefaultedDefs)
@@ -659,12 +659,12 @@
 // Import implicit instantiation of an imported member function template.
 // M32-DAG: declare dllimport   x86_thiscallcc void @"\01??$importedNormal@UImplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                  void @"\01??$importedNormal@UImplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define linkonce_odr                void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define linkonce_odr dso_local                void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
 USEMF(MemFunTmpl, importedNormal<ImplicitInst_Imported>)
 
 // MSC-DAG: declare dllimport                void @"\01??$importedStatic@UImplicitInst_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define linkonce_odr              void @_ZN10MemFunTmpl14importedStaticI21ImplicitInst_ImportedEEvv()
+// GNU-DAG: define linkonce_odr dso_local              void @_ZN10MemFunTmpl14importedStaticI21ImplicitInst_ImportedEEvv()
 USE(MemFunTmpl::importedStatic<ImplicitInst_Imported>)
 
 
@@ -672,13 +672,13 @@
 // template.
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01??$importedNormal@UExplicitDecl_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                void @"\01??$importedNormal@UExplicitDecl_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: declare x86_thiscallcc           void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
-// G64-DAG: declare                          void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
+// G32-DAG: declare dso_local x86_thiscallcc           void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
+// G64-DAG: declare dso_local                          void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
 extern template void MemFunTmpl::importedNormal<ExplicitDecl_Imported>();
 USEMF(MemFunTmpl, importedNormal<ExplicitDecl_Imported>)
 
 // MSC-DAG: declare dllimport                void @"\01??$importedStatic@UExplicitDecl_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: declare                          void @_ZN10MemFunTmpl14importedStaticI21ExplicitDecl_ImportedEEvv()
+// GNU-DAG: declare dso_local                void @_ZN10MemFunTmpl14importedStaticI21ExplicitDecl_ImportedEEvv()
 extern template void MemFunTmpl::importedStatic<ExplicitDecl_Imported>();
 USE(MemFunTmpl::importedStatic<ExplicitDecl_Imported>)
 
@@ -687,13 +687,13 @@
 // template.
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01??$importedNormal@UExplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                void @"\01??$importedNormal@UExplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: define weak_odr x86_thiscallcc   void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr                  void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local x86_thiscallcc   void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local                  void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
 template void MemFunTmpl::importedNormal<ExplicitInst_Imported>();
 USEMF(MemFunTmpl, importedNormal<ExplicitInst_Imported>)
 
 // MSC-DAG: declare dllimport                void @"\01??$importedStatic@UExplicitInst_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr                  void @_ZN10MemFunTmpl14importedStaticI21ExplicitInst_ImportedEEvv()
+// GNU-DAG: define weak_odr dso_local        void @_ZN10MemFunTmpl14importedStaticI21ExplicitInst_ImportedEEvv()
 template void MemFunTmpl::importedStatic<ExplicitInst_Imported>();
 USE(MemFunTmpl::importedStatic<ExplicitInst_Imported>)
 
@@ -715,8 +715,8 @@
 
 // M32-DAG: declare dllimport   x86_thiscallcc void @"\01??$importedNormal@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                  void @"\01??$importedNormal@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define linkonce_odr                void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define linkonce_odr dso_local                void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
 template<> __declspec(dllimport) inline void MemFunTmpl::importedNormal<ExplicitSpec_InlineDef_Imported>() {}
 USEMF(MemFunTmpl, importedNormal<ExplicitSpec_InlineDef_Imported>)
 
@@ -733,22 +733,22 @@
 #endif
 
 // MSC-DAG: declare dllimport                void @"\01??$importedStatic@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define linkonce_odr              void @_ZN10MemFunTmpl14importedStaticI31ExplicitSpec_InlineDef_ImportedEEvv()
+// GNU-DAG: define linkonce_odr dso_local    void @_ZN10MemFunTmpl14importedStaticI31ExplicitSpec_InlineDef_ImportedEEvv()
 template<> __declspec(dllimport) inline void MemFunTmpl::importedStatic<ExplicitSpec_InlineDef_Imported>() {}
 USE(MemFunTmpl::importedStatic<ExplicitSpec_InlineDef_Imported>)
 
 
 // Not importing specialization of an imported member function template without
 // explicit dllimport.
-// M32-DAG: define x86_thiscallcc void @"\01??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
-// M64-DAG: define                void @"\01??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
-// G32-DAG: define x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define                void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(%struct.MemFunTmpl* %this)
+// M32-DAG: define dso_local x86_thiscallcc void @"\01??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl* %this)
+// M64-DAG: define dso_local                void @"\01??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl* %this)
+// G32-DAG: define dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define dso_local                void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(%struct.MemFunTmpl* %this)
 template<> void MemFunTmpl::importedNormal<ExplicitSpec_NotImported>() {}
 USEMF(MemFunTmpl, importedNormal<ExplicitSpec_NotImported>)
 
-// MSC-DAG: define                void @"\01??$importedStatic@UExplicitSpec_NotImported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define                void @_ZN10MemFunTmpl14importedStaticI24ExplicitSpec_NotImportedEEvv()
+// MSC-DAG: define dso_local                void @"\01??$importedStatic@UExplicitSpec_NotImported@@@MemFunTmpl@@SAXXZ"()
+// GNU-DAG: define dso_local                void @_ZN10MemFunTmpl14importedStaticI24ExplicitSpec_NotImportedEEvv()
 template<> void MemFunTmpl::importedStatic<ExplicitSpec_NotImported>() {}
 USE(MemFunTmpl::importedStatic<ExplicitSpec_NotImported>)
 
@@ -757,13 +757,13 @@
 // template.
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01??$normalDef@UExplicitDecl_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                void @"\01??$normalDef@UExplicitDecl_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: declare x86_thiscallcc           void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
-// G64-DAG: declare                          void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
+// G32-DAG: declare dso_local x86_thiscallcc           void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
+// G64-DAG: declare dso_local                          void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(%struct.MemFunTmpl*)
 extern template __declspec(dllimport) void MemFunTmpl::normalDef<ExplicitDecl_Imported>();
 USEMF(MemFunTmpl, normalDef<ExplicitDecl_Imported>)
 
 // MSC-DAG: declare dllimport                void @"\01??$staticDef@UExplicitDecl_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: declare                          void @_ZN10MemFunTmpl9staticDefI21ExplicitDecl_ImportedEEvv()
+// GNU-DAG: declare dso_local                void @_ZN10MemFunTmpl9staticDefI21ExplicitDecl_ImportedEEvv()
 extern template __declspec(dllimport) void MemFunTmpl::staticDef<ExplicitDecl_Imported>();
 USE(MemFunTmpl::staticDef<ExplicitDecl_Imported>)
 
@@ -772,13 +772,13 @@
 // template.
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01??$normalDef@UExplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                void @"\01??$normalDef@UExplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: define weak_odr x86_thiscallcc   void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define weak_odr                  void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define weak_odr dso_local x86_thiscallcc   void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define weak_odr dso_local                  void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(%struct.MemFunTmpl* %this)
 template __declspec(dllimport) void MemFunTmpl::normalDef<ExplicitInst_Imported>();
 USEMF(MemFunTmpl, normalDef<ExplicitInst_Imported>)
 
 // MSC-DAG: declare dllimport                void @"\01??$staticDef@UExplicitInst_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define weak_odr                  void @_ZN10MemFunTmpl9staticDefI21ExplicitInst_ImportedEEvv()
+// GNU-DAG: define weak_odr dso_local                  void @_ZN10MemFunTmpl9staticDefI21ExplicitInst_ImportedEEvv()
 template __declspec(dllimport) void MemFunTmpl::staticDef<ExplicitInst_Imported>();
 USE(MemFunTmpl::staticDef<ExplicitInst_Imported>)
 
@@ -800,8 +800,8 @@
 
 // M32-DAG: declare dllimport   x86_thiscallcc void @"\01??$normalDef@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QAEXXZ"(%struct.MemFunTmpl*)
 // M64-DAG: declare dllimport                  void @"\01??$normalDef@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QEAAXXZ"(%struct.MemFunTmpl*)
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
-// G64-DAG: define linkonce_odr                void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
+// G64-DAG: define linkonce_odr dso_local                void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(%struct.MemFunTmpl* %this)
 template<> __declspec(dllimport) inline void MemFunTmpl::normalDef<ExplicitSpec_InlineDef_Imported>() {}
 USEMF(MemFunTmpl, normalDef<ExplicitSpec_InlineDef_Imported>)
 
@@ -818,7 +818,7 @@
 #endif
 
 // MSC-DAG: declare dllimport void @"\01??$staticDef@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@SAXXZ"()
-// GNU-DAG: define linkonce_odr void @_ZN10MemFunTmpl9staticDefI31ExplicitSpec_InlineDef_ImportedEEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl9staticDefI31ExplicitSpec_InlineDef_ImportedEEvv()
 template<> __declspec(dllimport) inline void MemFunTmpl::staticDef<ExplicitSpec_InlineDef_Imported>() {}
 USE(MemFunTmpl::staticDef<ExplicitSpec_InlineDef_Imported>)
 
@@ -853,8 +853,8 @@
 
 // Not importing specialization of a member variable template without explicit
 // dllimport.
-// MSC-DAG: @"\01??$ImportedStaticVar@UExplicitSpec_NotImported@@@MemVarTmpl@@2HB" = external constant i32
-// GNU-DAG: @_ZN10MemVarTmpl17ImportedStaticVarI24ExplicitSpec_NotImportedEE       = external constant i32
+// MSC-DAG: @"\01??$ImportedStaticVar@UExplicitSpec_NotImported@@@MemVarTmpl@@2HB" = external dso_local constant i32
+// GNU-DAG: @_ZN10MemVarTmpl17ImportedStaticVarI24ExplicitSpec_NotImportedEE       = external dso_local constant i32
 template<> const int MemVarTmpl::ImportedStaticVar<ExplicitSpec_NotImported>;
 USEMV(MemVarTmpl, ImportedStaticVar<ExplicitSpec_NotImported>)
 
diff --git a/test/CodeGenCXX/dllimport-memptr-global.cpp b/test/CodeGenCXX/dllimport-memptr-global.cpp
index e64537b..5c77d36 100644
--- a/test/CodeGenCXX/dllimport-memptr-global.cpp
+++ b/test/CodeGenCXX/dllimport-memptr-global.cpp
@@ -40,15 +40,15 @@
 
 // All of the non-virtual globals need dynamic initializers.
 
-// CHECK: @"\01?mp_single_nv@@3P8Single@@AEXXZQ1@" = global i8* null, align 4
-// CHECK: @"\01?mp_multi_nv@@3P8Multi@@AEXXZQ1@" = global { i8*, i32 } zeroinitializer, align 4
-// CHECK: @"\01?mp_virtual_nv@@3P8Virtual@@AEXXZQ1@" = global { i8*, i32, i32 } zeroinitializer, align 4
-// CHECK: @"\01?mp_general_nv@@3P8General@@AEXXZQ1@" = global { i8*, i32, i32, i32 } zeroinitializer, align 4
+// CHECK: @"\01?mp_single_nv@@3P8Single@@AEXXZQ1@" = dso_local global i8* null, align 4
+// CHECK: @"\01?mp_multi_nv@@3P8Multi@@AEXXZQ1@" = dso_local global { i8*, i32 } zeroinitializer, align 4
+// CHECK: @"\01?mp_virtual_nv@@3P8Virtual@@AEXXZQ1@" = dso_local global { i8*, i32, i32 } zeroinitializer, align 4
+// CHECK: @"\01?mp_general_nv@@3P8General@@AEXXZQ1@" = dso_local global { i8*, i32, i32, i32 } zeroinitializer, align 4
 
-// CHECK: @"\01?mp_single_v@@3P8Single@@AEXXZQ1@" = global i8* bitcast (void (%struct.Single*, ...)* @"\01??_9Single@@$BA@AE" to i8*), align 4
-// CHECK: @"\01?mp_multi_v@@3P8Multi@@AEXXZQ1@" = global { i8*, i32 } { i8* bitcast (void (%struct.Multi*, ...)* @"\01??_9Multi@@$BA@AE" to i8*), i32 0 }, align 4
-// CHECK: @"\01?mp_virtual_v@@3P8Virtual@@AEXXZQ1@" = global { i8*, i32, i32 } { i8* bitcast (void (%struct.Virtual*, ...)* @"\01??_9Virtual@@$BA@AE" to i8*), i32 0, i32 0 }, align 4
-// CHECK: @"\01?mp_general_v@@3P8General@@AEXXZQ1@" = global { i8*, i32, i32, i32 } { i8* bitcast (void (%struct.General*, ...)* @"\01??_9General@@$BA@AE" to i8*), i32 0, i32 0, i32 0 }, align 4
+// CHECK: @"\01?mp_single_v@@3P8Single@@AEXXZQ1@" = dso_local global i8* bitcast (void (%struct.Single*, ...)* @"\01??_9Single@@$BA@AE" to i8*), align 4
+// CHECK: @"\01?mp_multi_v@@3P8Multi@@AEXXZQ1@" = dso_local global { i8*, i32 } { i8* bitcast (void (%struct.Multi*, ...)* @"\01??_9Multi@@$BA@AE" to i8*), i32 0 }, align 4
+// CHECK: @"\01?mp_virtual_v@@3P8Virtual@@AEXXZQ1@" = dso_local global { i8*, i32, i32 } { i8* bitcast (void (%struct.Virtual*, ...)* @"\01??_9Virtual@@$BA@AE" to i8*), i32 0, i32 0 }, align 4
+// CHECK: @"\01?mp_general_v@@3P8General@@AEXXZQ1@" = dso_local global { i8*, i32, i32, i32 } { i8* bitcast (void (%struct.General*, ...)* @"\01??_9General@@$BA@AE" to i8*), i32 0, i32 0, i32 0 }, align 4
 
 // CHECK: define internal void @_GLOBAL__sub_I{{.*}}() {{.*}} {
 // CHECK:   call void @"\01??__Emp_single_nv@@YAXXZ"()
diff --git a/test/CodeGenCXX/dllimport-missing-key.cpp b/test/CodeGenCXX/dllimport-missing-key.cpp
new file mode 100644
index 0000000..d8ef7aa
--- /dev/null
+++ b/test/CodeGenCXX/dllimport-missing-key.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU %s
+
+class __declspec(dllimport) QObjectData {
+public:
+    virtual ~QObjectData() = 0;
+    void *ptr;
+
+    int method() const;
+};
+
+class LocalClass : public QObjectData {
+};
+
+void call() {
+    (new LocalClass())->method();
+}
+
+// GNU-DAG: @_ZTV11QObjectData = available_externally dllimport
+// GNU-DAG: @_ZTS11QObjectData = linkonce_odr
+// GNU-DAG: @_ZTI11QObjectData = linkonce_odr
diff --git a/test/CodeGenCXX/dllimport-rtti.cpp b/test/CodeGenCXX/dllimport-rtti.cpp
index 91e747a..619d956 100644
--- a/test/CodeGenCXX/dllimport-rtti.cpp
+++ b/test/CodeGenCXX/dllimport-rtti.cpp
@@ -12,7 +12,7 @@
 // MSVC-DAG: @"\01??_R3S@@8" = linkonce_odr
 
 // GNU-DAG: @_ZTV1S = available_externally dllimport
-// GNU-DAG: @_ZTI1S = external dllimport
+// GNU-DAG: @_ZTI1S = linkonce_odr dso_local
 
 struct U : S {
 } u;
@@ -21,13 +21,13 @@
   virtual void f();
 } v;
 // GNU-DAG: @_ZTV1V = available_externally dllimport
-// GNU-DAG: @_ZTS1V = linkonce_odr
-// GNU-DAG: @_ZTI1V = linkonce_odr
+// GNU-DAG: @_ZTS1V = linkonce_odr dso_local
+// GNU-DAG: @_ZTI1V = linkonce_odr dso_local
 
 struct W {
   __declspec(dllimport) virtual void f();
   virtual void g();
 } w;
-// GNU-DAG: @_ZTV1W = linkonce_odr
-// GNU-DAG: @_ZTS1W = linkonce_odr
-// GNU-DAG: @_ZTI1W = linkonce_odr
+// GNU-DAG: @_ZTV1W = linkonce_odr dso_local
+// GNU-DAG: @_ZTS1W = linkonce_odr dso_local
+// GNU-DAG: @_ZTI1W = linkonce_odr dso_local
diff --git a/test/CodeGenCXX/dllimport.cpp b/test/CodeGenCXX/dllimport.cpp
index 372a96b..4ac4f5c 100644
--- a/test/CodeGenCXX/dllimport.cpp
+++ b/test/CodeGenCXX/dllimport.cpp
@@ -77,8 +77,8 @@
 
 // NB: MSC issues a warning and makes GlobalRedecl3 dllexport. We follow GCC
 // and drop the dllimport with a warning.
-// MSC-DAG: @"\01?GlobalRedecl3@@3HA" = external global i32
-// GNU-DAG: @GlobalRedecl3            = external global i32
+// MSC-DAG: @"\01?GlobalRedecl3@@3HA" = external dso_local global i32
+// GNU-DAG: @GlobalRedecl3            = external dso_local global i32
 __declspec(dllimport) extern int GlobalRedecl3;
                       extern int GlobalRedecl3; // dllimport ignored
 USEVAR(GlobalRedecl3)
@@ -136,8 +136,8 @@
 template<typename T> __declspec(dllimport) int VarTmplRedecl2;
 USEVAR(VarTmplRedecl2<ImplicitInst_Imported>)
 
-// MSC-DAG: @"\01??$VarTmplRedecl3@UImplicitInst_Imported@@@@3HA" = external global i32
-// GNU-DAG: @_Z14VarTmplRedecl3I21ImplicitInst_ImportedE          = external global i32
+// MSC-DAG: @"\01??$VarTmplRedecl3@UImplicitInst_Imported@@@@3HA" = external dso_local global i32
+// GNU-DAG: @_Z14VarTmplRedecl3I21ImplicitInst_ImportedE          = external dso_local global i32
 template<typename T> __declspec(dllimport) extern int VarTmplRedecl3;
 template<typename T>                       extern int VarTmplRedecl3; // dllimport ignored
 USEVAR(VarTmplRedecl3<ImplicitInst_Imported>)
@@ -174,8 +174,8 @@
 
 // Not importing specialization of an imported variable template without
 // explicit dllimport.
-// MSC-DAG: @"\01??$ImportedVarTmpl@UExplicitSpec_NotImported@@@@3HA" = global i32 0, align 4
-// GNU-DAG: @_Z15ImportedVarTmplI24ExplicitSpec_NotImportedE          = global i32 0, align 4
+// MSC-DAG: @"\01??$ImportedVarTmpl@UExplicitSpec_NotImported@@@@3HA" = dso_local global i32 0, align 4
+// GNU-DAG: @_Z15ImportedVarTmplI24ExplicitSpec_NotImportedE          = dso_local global i32 0, align 4
 template<> int ImportedVarTmpl<ExplicitSpec_NotImported>;
 USEVAR(ImportedVarTmpl<ExplicitSpec_NotImported>)
 
@@ -217,31 +217,31 @@
 
 // Import inline function.
 // MSC-DAG: declare dllimport void @"\01?inlineFunc@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z10inlineFuncv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z10inlineFuncv()
 // MO1-DAG: define available_externally dllimport void @"\01?inlineFunc@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z10inlineFuncv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z10inlineFuncv()
 __declspec(dllimport) inline void inlineFunc() {}
 USE(inlineFunc)
 
 // MSC-DAG: declare dllimport void @"\01?inlineDecl@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z10inlineDeclv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z10inlineDeclv()
 // MO1-DAG: define available_externally dllimport void @"\01?inlineDecl@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z10inlineDeclv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z10inlineDeclv()
 __declspec(dllimport) inline void inlineDecl();
                              void inlineDecl() {}
 USE(inlineDecl)
 
 // MSC-DAG: declare dllimport void @"\01?inlineDef@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z9inlineDefv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z9inlineDefv()
 // MO1-DAG: define available_externally dllimport void @"\01?inlineDef@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z9inlineDefv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z9inlineDefv()
 __declspec(dllimport) void inlineDef();
                inline void inlineDef() {}
 USE(inlineDef)
 
 // inline attributes
 // MSC-DAG: declare dllimport void @"\01?noinline@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z8noinlinev()
+// GNU-DAG: define linkonce_odr dso_local void @_Z8noinlinev()
 __declspec(dllimport) __attribute__((noinline)) inline void noinline() {}
 USE(noinline)
 
@@ -259,30 +259,30 @@
 
 // NB: MSC issues a warning and makes redecl2/redecl3 dllexport. We follow GCC
 // and drop the dllimport with a warning.
-// MSC-DAG: declare void @"\01?redecl2@@YAXXZ"()
-// GNU-DAG: declare void @_Z7redecl2v()
+// MSC-DAG: declare dso_local void @"\01?redecl2@@YAXXZ"()
+// GNU-DAG: declare dso_local void @_Z7redecl2v()
 __declspec(dllimport) void redecl2();
                       void redecl2();
 USE(redecl2)
 
-// MSC-DAG: define dllexport void @"\01?redecl3@@YAXXZ"()
-// GNU-DAG: define void @_Z7redecl3v()
+// MSC-DAG: define dso_local dllexport void @"\01?redecl3@@YAXXZ"()
+// GNU-DAG: define dso_local void @_Z7redecl3v()
 __declspec(dllimport) void redecl3();
                       void redecl3() {} // dllimport ignored
 USE(redecl3)
 
 
 // Friend functions
-// MSC-DAG: declare dllimport void @"\01?friend1@@YAXXZ"()
-// GNU-DAG: declare dllimport void @_Z7friend1v()
-// MSC-DAG: declare           void @"\01?friend2@@YAXXZ"()
-// GNU-DAG: declare           void @_Z7friend2v()
-// MSC-DAG: define  dllexport void @"\01?friend3@@YAXXZ"()
-// GNU-DAG: define            void @_Z7friend3v()
-// MSC-DAG: declare           void @"\01?friend4@@YAXXZ"()
-// GNU-DAG: declare           void @_Z7friend4v()
-// MSC-DAG: declare dllimport void @"\01?friend5@@YAXXZ"()
-// GNU-DAG: declare dllimport void @_Z7friend5v()
+// MSC-DAG: declare           dllimport void @"\01?friend1@@YAXXZ"()
+// GNU-DAG: declare           dllimport void @_Z7friend1v()
+// MSC-DAG: declare dso_local           void @"\01?friend2@@YAXXZ"()
+// GNU-DAG: declare dso_local           void @_Z7friend2v()
+// MSC-DAG: define  dso_local dllexport void @"\01?friend3@@YAXXZ"()
+// GNU-DAG: define  dso_local           void @_Z7friend3v()
+// MSC-DAG: declare dso_local           void @"\01?friend4@@YAXXZ"()
+// GNU-DAG: declare dso_local           void @_Z7friend4v()
+// MSC-DAG: declare           dllimport void @"\01?friend5@@YAXXZ"()
+// GNU-DAG: declare           dllimport void @_Z7friend5v()
 
 struct FuncFriend {
   friend __declspec(dllimport) void friend1();
@@ -399,31 +399,31 @@
 
 // Import inline function template.
 // MSC-DAG: declare dllimport void @"\01??$inlineFuncTmpl1@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z15inlineFuncTmpl1I21ImplicitInst_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z15inlineFuncTmpl1I21ImplicitInst_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$inlineFuncTmpl1@UImplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z15inlineFuncTmpl1I21ImplicitInst_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z15inlineFuncTmpl1I21ImplicitInst_ImportedEvv()
 template<typename T> __declspec(dllimport) inline void inlineFuncTmpl1() {}
 USE(inlineFuncTmpl1<ImplicitInst_Imported>)
 
 // MSC-DAG: declare dllimport void @"\01??$inlineFuncTmpl2@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z15inlineFuncTmpl2I21ImplicitInst_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z15inlineFuncTmpl2I21ImplicitInst_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$inlineFuncTmpl2@UImplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z15inlineFuncTmpl2I21ImplicitInst_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z15inlineFuncTmpl2I21ImplicitInst_ImportedEvv()
 template<typename T> inline void __attribute__((dllimport)) inlineFuncTmpl2() {}
 USE(inlineFuncTmpl2<ImplicitInst_Imported>)
 
-// MSC-DAG: define linkonce_odr void @"\01??$inlineFuncTmplDecl@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z18inlineFuncTmplDeclI21ImplicitInst_ImportedEvv()
-// MO1-DAG: define linkonce_odr void @"\01??$inlineFuncTmplDecl@UImplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z18inlineFuncTmplDeclI21ImplicitInst_ImportedEvv()
+// MSC-DAG: define linkonce_odr dso_local void @"\01??$inlineFuncTmplDecl@UImplicitInst_Imported@@@@YAXXZ"()
+// GNU-DAG: define linkonce_odr dso_local void @_Z18inlineFuncTmplDeclI21ImplicitInst_ImportedEvv()
+// MO1-DAG: define linkonce_odr dso_local void @"\01??$inlineFuncTmplDecl@UImplicitInst_Imported@@@@YAXXZ"()
+// GO1-DAG: define linkonce_odr dso_local void @_Z18inlineFuncTmplDeclI21ImplicitInst_ImportedEvv()
 template<typename T> __declspec(dllimport) inline void inlineFuncTmplDecl();
 template<typename T>                              void inlineFuncTmplDecl() {}
 USE(inlineFuncTmplDecl<ImplicitInst_Imported>)
 
-// MSC-DAG: define linkonce_odr void @"\01??$inlineFuncTmplDef@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z17inlineFuncTmplDefI21ImplicitInst_ImportedEvv()
-// MO1-DAG: define linkonce_odr void @"\01??$inlineFuncTmplDef@UImplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z17inlineFuncTmplDefI21ImplicitInst_ImportedEvv()
+// MSC-DAG: define linkonce_odr dso_local void @"\01??$inlineFuncTmplDef@UImplicitInst_Imported@@@@YAXXZ"()
+// GNU-DAG: define linkonce_odr dso_local void @_Z17inlineFuncTmplDefI21ImplicitInst_ImportedEvv()
+// MO1-DAG: define linkonce_odr dso_local void @"\01??$inlineFuncTmplDef@UImplicitInst_Imported@@@@YAXXZ"()
+// GO1-DAG: define linkonce_odr dso_local void @_Z17inlineFuncTmplDefI21ImplicitInst_ImportedEvv()
 template<typename T> __declspec(dllimport) void inlineFuncTmplDef();
 template<typename T>                inline void inlineFuncTmplDef() {}
 USE(inlineFuncTmplDef<ImplicitInst_Imported>)
@@ -436,14 +436,14 @@
 template<typename T> __declspec(dllimport) void funcTmplRedecl1();
 USE(funcTmplRedecl1<ImplicitInst_Imported>)
 
-// MSC-DAG: declare void @"\01??$funcTmplRedecl2@UImplicitInst_NotImported@@@@YAXXZ"()
-// GNU-DAG: declare void @_Z15funcTmplRedecl2I24ImplicitInst_NotImportedEvv()
+// MSC-DAG: declare dso_local void @"\01??$funcTmplRedecl2@UImplicitInst_NotImported@@@@YAXXZ"()
+// GNU-DAG: declare dso_local void @_Z15funcTmplRedecl2I24ImplicitInst_NotImportedEvv()
 template<typename T> __declspec(dllimport) void funcTmplRedecl2();
 template<typename T>                       void funcTmplRedecl2(); // dllimport ignored
 USE(funcTmplRedecl2<ImplicitInst_NotImported>)
 
-// MSC-DAG: define linkonce_odr void @"\01??$funcTmplRedecl3@UImplicitInst_NotImported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z15funcTmplRedecl3I24ImplicitInst_NotImportedEvv()
+// MSC-DAG: define linkonce_odr dso_local void @"\01??$funcTmplRedecl3@UImplicitInst_NotImported@@@@YAXXZ"()
+// GNU-DAG: define linkonce_odr dso_local void @_Z15funcTmplRedecl3I24ImplicitInst_NotImportedEvv()
 template<typename T> __declspec(dllimport) void funcTmplRedecl3();
 template<typename T>                       void funcTmplRedecl3() {} // dllimport ignored
 USE(funcTmplRedecl3<ImplicitInst_NotImported>)
@@ -452,12 +452,12 @@
 // Function template friends
 // MSC-DAG: declare dllimport   void @"\01??$funcTmplFriend1@UImplicitInst_Imported@@@@YAXXZ"()
 // GNU-DAG: declare dllimport   void @_Z15funcTmplFriend1I21ImplicitInst_ImportedEvv()
-// MSC-DAG: declare             void @"\01??$funcTmplFriend2@UImplicitInst_NotImported@@@@YAXXZ"()
-// GNU-DAG: declare             void @_Z15funcTmplFriend2I24ImplicitInst_NotImportedEvv()
-// MSC-DAG: define linkonce_odr void @"\01??$funcTmplFriend3@UImplicitInst_NotImported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z15funcTmplFriend3I24ImplicitInst_NotImportedEvv()
-// MSC-DAG: define linkonce_odr void @"\01??$funcTmplFriend4@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z15funcTmplFriend4I21ImplicitInst_ImportedEvv()
+// MSC-DAG: declare             dso_local void @"\01??$funcTmplFriend2@UImplicitInst_NotImported@@@@YAXXZ"()
+// GNU-DAG: declare             dso_local void @_Z15funcTmplFriend2I24ImplicitInst_NotImportedEvv()
+// MSC-DAG: define linkonce_odr dso_local void @"\01??$funcTmplFriend3@UImplicitInst_NotImported@@@@YAXXZ"()
+// GNU-DAG: define linkonce_odr dso_local void @_Z15funcTmplFriend3I24ImplicitInst_NotImportedEvv()
+// MSC-DAG: define linkonce_odr dso_local void @"\01??$funcTmplFriend4@UImplicitInst_Imported@@@@YAXXZ"()
+// GNU-DAG: define linkonce_odr dso_local void @_Z15funcTmplFriend4I21ImplicitInst_ImportedEvv()
 struct FuncTmplFriend {
   template<typename T> friend __declspec(dllimport) void funcTmplFriend1();
   template<typename T> friend __declspec(dllimport) void funcTmplFriend2();
@@ -490,24 +490,24 @@
 USE(importedFuncTmplDecl<ImplicitInst_Imported>)
 
 // MSC-DAG: declare dllimport void @"\01??$importedFuncTmpl@UImplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z16importedFuncTmplI21ImplicitInst_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z16importedFuncTmplI21ImplicitInst_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$importedFuncTmpl@UImplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z16importedFuncTmplI21ImplicitInst_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z16importedFuncTmplI21ImplicitInst_ImportedEvv()
 USE(importedFuncTmpl<ImplicitInst_Imported>)
 
 // Import explicit instantiation declaration of an imported function template.
 // MSC-DAG: declare dllimport void @"\01??$importedFuncTmpl@UExplicitDecl_Imported@@@@YAXXZ"()
-// GNU-DAG: declare void @_Z16importedFuncTmplI21ExplicitDecl_ImportedEvv()
+// GNU-DAG: declare dso_local void @_Z16importedFuncTmplI21ExplicitDecl_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$importedFuncTmpl@UExplicitDecl_Imported@@@@YAXXZ"()
-// GO1-DAG: define available_externally void @_Z16importedFuncTmplI21ExplicitDecl_ImportedEvv()
+// GO1-DAG: define available_externally dso_local void @_Z16importedFuncTmplI21ExplicitDecl_ImportedEvv()
 extern template void importedFuncTmpl<ExplicitDecl_Imported>();
 USE(importedFuncTmpl<ExplicitDecl_Imported>)
 
 // Import explicit instantiation definition of an imported function template.
 // MSC-DAG: declare dllimport void @"\01??$importedFuncTmpl@UExplicitInst_Imported@@@@YAXXZ"()
-// GNU-DAG: define weak_odr void @_Z16importedFuncTmplI21ExplicitInst_ImportedEvv()
+// GNU-DAG: define weak_odr dso_local void @_Z16importedFuncTmplI21ExplicitInst_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$importedFuncTmpl@UExplicitInst_Imported@@@@YAXXZ"()
-// GO1-DAG: define weak_odr void @_Z16importedFuncTmplI21ExplicitInst_ImportedEvv()
+// GO1-DAG: define weak_odr dso_local void @_Z16importedFuncTmplI21ExplicitInst_ImportedEvv()
 template void importedFuncTmpl<ExplicitInst_Imported>();
 USE(importedFuncTmpl<ExplicitInst_Imported>)
 
@@ -526,9 +526,9 @@
 #endif
 
 // MSC-DAG: declare dllimport void @"\01??$importedFuncTmplDecl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z20importedFuncTmplDeclI31ExplicitSpec_InlineDef_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z20importedFuncTmplDeclI31ExplicitSpec_InlineDef_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$importedFuncTmplDecl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z20importedFuncTmplDeclI31ExplicitSpec_InlineDef_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z20importedFuncTmplDeclI31ExplicitSpec_InlineDef_ImportedEvv()
 template<> __declspec(dllimport) inline void importedFuncTmplDecl<ExplicitSpec_InlineDef_Imported>() {}
 USE(importedFuncTmplDecl<ExplicitSpec_InlineDef_Imported>)
 
@@ -546,17 +546,17 @@
 #endif
 
 // MSC-DAG: declare dllimport void @"\01??$importedFuncTmpl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z16importedFuncTmplI31ExplicitSpec_InlineDef_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z16importedFuncTmplI31ExplicitSpec_InlineDef_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$importedFuncTmpl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z16importedFuncTmplI31ExplicitSpec_InlineDef_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z16importedFuncTmplI31ExplicitSpec_InlineDef_ImportedEvv()
 template<> __declspec(dllimport) inline void importedFuncTmpl<ExplicitSpec_InlineDef_Imported>() {}
 USE(importedFuncTmpl<ExplicitSpec_InlineDef_Imported>)
 
 
 // Not importing specialization of an imported function template without
 // explicit dllimport.
-// MSC-DAG: define void @"\01??$importedFuncTmpl@UExplicitSpec_NotImported@@@@YAXXZ"()
-// GNU-DAG: define void @_Z16importedFuncTmplI24ExplicitSpec_NotImportedEvv()
+// MSC-DAG: define dso_local void @"\01??$importedFuncTmpl@UExplicitSpec_NotImported@@@@YAXXZ"()
+// GNU-DAG: define dso_local void @_Z16importedFuncTmplI24ExplicitSpec_NotImportedEvv()
 template<> void importedFuncTmpl<ExplicitSpec_NotImported>() {}
 USE(importedFuncTmpl<ExplicitSpec_NotImported>)
 
@@ -565,9 +565,9 @@
 // MSC-DAG: declare dllimport void @"\01??$funcTmpl@UExplicitDecl_Imported@@@@YAXXZ"()
 // MSC-DAG: declare dllimport void @"\01??$inlineFuncTmpl@UExplicitDecl_Imported@@@@YAXXZ"()
 // GNU-DAG: declare dllimport void @_Z8funcTmplI21ExplicitDecl_ImportedEvv()
-// GNU-DAG: declare void @_Z14inlineFuncTmplI21ExplicitDecl_ImportedEvv()
+// GNU-DAG: declare dso_local void @_Z14inlineFuncTmplI21ExplicitDecl_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$inlineFuncTmpl@UExplicitDecl_Imported@@@@YAXXZ"()
-// GO1-DAG: define available_externally void @_Z14inlineFuncTmplI21ExplicitDecl_ImportedEvv()
+// GO1-DAG: define available_externally dso_local void @_Z14inlineFuncTmplI21ExplicitDecl_ImportedEvv()
 extern template __declspec(dllimport) void funcTmpl<ExplicitDecl_Imported>();
 extern template __declspec(dllimport) void inlineFuncTmpl<ExplicitDecl_Imported>();
 USE(funcTmpl<ExplicitDecl_Imported>)
@@ -578,11 +578,11 @@
 // MSC-DAG: declare dllimport void @"\01??$funcTmpl@UExplicitInst_Imported@@@@YAXXZ"()
 // MSC-DAG: declare dllimport void @"\01??$inlineFuncTmpl@UExplicitInst_Imported@@@@YAXXZ"()
 // GNU-DAG: declare dllimport void @_Z8funcTmplI21ExplicitInst_ImportedEvv()
-// GNU-DAG: define weak_odr void @_Z14inlineFuncTmplI21ExplicitInst_ImportedEvv()
-// MO1-DAG: define available_externally dllimport void @"\01??$funcTmpl@UExplicitInst_Imported@@@@YAXXZ"()
+// GNU-DAG: define weak_odr dso_local void @_Z14inlineFuncTmplI21ExplicitInst_ImportedEvv()
+// MO1-DAG: declare dllimport void @"\01??$funcTmpl@UExplicitInst_Imported@@@@YAXXZ"()
 // MO1-DAG: define available_externally dllimport void @"\01??$inlineFuncTmpl@UExplicitInst_Imported@@@@YAXXZ"()
 // GO1-DAG: define available_externally dllimport void @_Z8funcTmplI21ExplicitInst_ImportedEvv()
-// GO1-DAG: define weak_odr void @_Z14inlineFuncTmplI21ExplicitInst_ImportedEvv()
+// GO1-DAG: define weak_odr dso_local void @_Z14inlineFuncTmplI21ExplicitInst_ImportedEvv()
 template __declspec(dllimport) void funcTmpl<ExplicitInst_Imported>();
 template __declspec(dllimport) void inlineFuncTmpl<ExplicitInst_Imported>();
 USE(funcTmpl<ExplicitInst_Imported>)
@@ -603,12 +603,21 @@
 #endif
 
 // MSC-DAG: declare dllimport void @"\01??$funcTmpl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GNU-DAG: define linkonce_odr void @_Z8funcTmplI31ExplicitSpec_InlineDef_ImportedEvv()
+// GNU-DAG: define linkonce_odr dso_local void @_Z8funcTmplI31ExplicitSpec_InlineDef_ImportedEvv()
 // MO1-DAG: define available_externally dllimport void @"\01??$funcTmpl@UExplicitSpec_InlineDef_Imported@@@@YAXXZ"()
-// GO1-DAG: define linkonce_odr void @_Z8funcTmplI31ExplicitSpec_InlineDef_ImportedEvv()
+// GO1-DAG: define linkonce_odr dso_local void @_Z8funcTmplI31ExplicitSpec_InlineDef_ImportedEvv()
 template<> __declspec(dllimport) inline void funcTmpl<ExplicitSpec_InlineDef_Imported>() {}
 USE(funcTmpl<ExplicitSpec_InlineDef_Imported>)
 
+#ifdef MSABI
+namespace pr35435 {
+struct X;
+template <typename T> struct __declspec(dllimport) S {
+  void foo(T *t) { t->problem(); }
+};
+template void S<X>::foo(X*); // Cannot be instantiated because X is incomplete; dllimport means it's treated as an instantiation decl.
+}
+#endif
 
 
 //===----------------------------------------------------------------------===//
@@ -631,7 +640,7 @@
 
   T& operator=(T&&) = default;
   // Note: Don't mark inline move operators dllimport because current MSVC versions don't export them.
-  // M18-DAG: define linkonce_odr x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.T* @"\01??4T@@QAEAAU0@$$QAU0@@Z"
+  // M18-DAG: define linkonce_odr dso_local x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.T* @"\01??4T@@QAEAAU0@$$QAU0@@Z"
   // M19-DAG: define available_externally dllimport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.T* @"\01??4T@@QAEAAU0@$$QAU0@@Z"
 };
 USEMEMFUNC(T, a)
@@ -684,7 +693,7 @@
 
 namespace Vtordisp {
   // Don't dllimport the vtordisp.
-  // MO1-DAG: define linkonce_odr x86_thiscallcc void @"\01?f@?$C@H@Vtordisp@@$4PPPPPPPM@A@AEXXZ"
+  // MO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@?$C@H@Vtordisp@@$4PPPPPPPM@A@AEXXZ"
 
   class __declspec(dllimport) Base {
     virtual void f() {}
@@ -791,7 +800,7 @@
 template <typename T> struct PartiallySpecializedClassTemplate {};
 template <typename T> struct __declspec(dllimport) PartiallySpecializedClassTemplate<T*> { void f(); };
 USEMEMFUNC(PartiallySpecializedClassTemplate<void*>, f);
-// M32-DAG: declare x86_thiscallcc void @"\01?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ"
+// M32-DAG: declare dso_local x86_thiscallcc void @"\01?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ"
 // G32-DAG: declare dllimport x86_thiscallcc void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv
 
 // Attributes on explicit specializations are honored.
@@ -806,7 +815,7 @@
 template <typename T> struct PartiallySpecializedImportedClassTemplate<T*> { void f() {} };
 USEMEMFUNC(PartiallySpecializedImportedClassTemplate<void*>, f);
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"\01?f@?$PartiallySpecializedImportedClassTemplate@PAX@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN41PartiallySpecializedImportedClassTemplateIPvE1fEv
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN41PartiallySpecializedImportedClassTemplateIPvE1fEv
 
 // Attributes on the instantiation take precedence over attributes on the template.
 template <typename T> struct __declspec(dllexport) ExplicitlyInstantiatedWithDifferentAttr { void f() {} };
@@ -821,7 +830,7 @@
 USEMEMFUNC(ExplicitInstantiationDeclImportedDefTemplate<int>, f);
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"\01?f@?$ExplicitInstantiationDeclImportedDefTemplate@H@@QAEXXZ"
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc %struct.ExplicitInstantiationDeclImportedDefTemplate* @"\01??0?$ExplicitInstantiationDeclImportedDefTemplate@H@@QAE@XZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN44ExplicitInstantiationDeclImportedDefTemplateIiE1fEv
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN44ExplicitInstantiationDeclImportedDefTemplateIiE1fEv
 
 template <typename T> struct __declspec(dllimport) ExplicitInstantiationDeclExportedDefImportedTemplate { void f() {} ExplicitInstantiationDeclExportedDefImportedTemplate() {} };
 extern template struct __declspec(dllimport) ExplicitInstantiationDeclExportedDefImportedTemplate <int>;
@@ -852,7 +861,7 @@
   // functions are emitted unless they are used.
 
   USEMEMFUNC(basic_ostream<char>::sentry, foo);
-  // M32-DAG: define linkonce_odr x86_thiscallcc void @"\01?foo@sentry@?$basic_ostream@D@PR27810@@QAEXXZ"
+  // M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?foo@sentry@?$basic_ostream@D@PR27810@@QAEXXZ"
   // M32-NOT: ??0sentry@?$basic_ostream@D@PR27810@@QAE@XZ
 }
 
@@ -898,7 +907,7 @@
 struct __declspec(dllimport) DerivedFromTemplate : public ClassTemplate<int> {};
 USEMEMFUNC(ClassTemplate<int>, func)
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"\01?func@?$ClassTemplate@H@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv
 
 // ImportedTemplate is explicitly imported.
 struct __declspec(dllimport) DerivedFromImportedTemplate : public ImportedClassTemplate<int> {};
@@ -909,34 +918,34 @@
 // ExportedTemplate is explicitly exported.
 struct __declspec(dllimport) DerivedFromExportedTemplate : public ExportedClassTemplate<int> {};
 USEMEMFUNC(ExportedClassTemplate<int>, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ExportedClassTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ExportedClassTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv
 
 // Base class already implicitly instantiated without attribute.
 struct DerivedFromTemplateD : public ClassTemplate<double> {};
 struct __declspec(dllimport) DerivedFromTemplateD2 : public ClassTemplate<double> {};
 USEMEMFUNC(ClassTemplate<double>, func)
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01?func@?$ClassTemplate@N@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIdE4funcEv
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIdE4funcEv
 
 // MS: Base class already instantiated with dfferent attribute.
 struct __declspec(dllexport) DerivedFromTemplateB : public ClassTemplate<bool> {};
 struct __declspec(dllimport) DerivedFromTemplateB2 : public ClassTemplate<bool> {};
 USEMEMFUNC(ClassTemplate<bool>, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@_N@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ClassTemplate@_N@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv
 
 // Base class already specialized without dll attribute.
 struct __declspec(dllimport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate<int> {};
 USEMEMFUNC(ExplicitlySpecializedTemplate<int>, func)
-// M32-DAG: define linkonce_odr x86_thiscallcc void @"\01?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv
+// M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ"
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv
 
 // Base class alredy specialized with export attribute.
 struct __declspec(dllimport) DerivedFromExplicitlyExportSpecializedTemplate : public ExplicitlyExportSpecializedTemplate<int> {};
 USEMEMFUNC(ExplicitlyExportSpecializedTemplate<int>, func)
-// M32-DAG: define dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ"
-// G32-DAG: define dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv
+// M32-DAG: define dso_local dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ"
+// G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv
 
 // Base class already specialized with import attribute.
 struct __declspec(dllimport) DerivedFromExplicitlyImportSpecializedTemplate : public ExplicitlyImportSpecializedTemplate<int> {};
@@ -947,14 +956,14 @@
 // Base class already instantiated without dll attribute.
 struct __declspec(dllimport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate<int> {};
 USEMEMFUNC(ExplicitlyInstantiatedTemplate<int>, func)
-// M32-DAG: define weak_odr x86_thiscallcc void @"\01?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local x86_thiscallcc void @"\01?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv
 
 // Base class already instantiated with export attribute.
 struct __declspec(dllimport) DerivedFromExplicitlyExportInstantiatedTemplate : public ExplicitlyExportInstantiatedTemplate<int> {};
 USEMEMFUNC(ExplicitlyExportInstantiatedTemplate<int>, func)
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ"
-// G32-DAG: define weak_odr dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"\01?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ"
+// G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv
 
 // Base class already instantiated with import attribute.
 struct __declspec(dllimport) DerivedFromExplicitlyImportInstantiatedTemplate : public ExplicitlyImportInstantiatedTemplate<int> {};
@@ -968,7 +977,7 @@
 struct __declspec(dllimport) BottomClass : public MiddleClass<int> { };
 USEMEMFUNC(TopClass<int>, func)
 // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"\01?func@?$TopClass@H@@QAEXXZ"
-// G32-DAG: define linkonce_odr x86_thiscallcc void @_ZN8TopClassIiE4funcEv
+// G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN8TopClassIiE4funcEv
 
 template <typename T> struct ExplicitInstantiationDeclTemplateBase { void func() {} };
 extern template struct ExplicitInstantiationDeclTemplateBase<int>;
@@ -976,7 +985,7 @@
 template struct ExplicitInstantiationDeclTemplateBase<int>;
 USEMEMFUNC(ExplicitInstantiationDeclTemplateBase<int>, func)
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01?func@?$ExplicitInstantiationDeclTemplateBase@H@@QAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv
 
 template <typename T> struct ExplicitInstantiationDeclTemplateBase2 { void func() {} };
 extern template struct ExplicitInstantiationDeclTemplateBase2<int>;
@@ -984,4 +993,4 @@
 template struct __declspec(dllexport) ExplicitInstantiationDeclTemplateBase2<int>;
 USEMEMFUNC(ExplicitInstantiationDeclTemplateBase2<int>, func)
 // M32-DAG: declare dllimport x86_thiscallcc void @"\01?func@?$ExplicitInstantiationDeclTemplateBase2@H@@QAEXXZ"
-// G32-DAG: define weak_odr x86_thiscallcc void @_ZN38ExplicitInstantiationDeclTemplateBase2IiE4funcEv
+// G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN38ExplicitInstantiationDeclTemplateBase2IiE4funcEv
diff --git a/test/CodeGenCXX/dso-local-executable.cpp b/test/CodeGenCXX/dso-local-executable.cpp
new file mode 100644
index 0000000..cc8d50f
--- /dev/null
+++ b/test/CodeGenCXX/dso-local-executable.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -O1 -emit-llvm %s -o - | FileCheck --check-prefix=STATIC %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -fno-plt -O1 -emit-llvm %s -o - | FileCheck --check-prefix=NOPLT %s
+
+// STATIC-DAG: @_ZTV1C = linkonce_odr dso_local unnamed_addr constant
+// STATIC-DAG: @_ZTS1C = linkonce_odr dso_local constant
+// STATIC-DAG: @_ZTI1C = linkonce_odr dso_local constant
+// STATIC-DAG: @_ZZ14useStaticLocalvE3obj = linkonce_odr dso_local global
+// STATIC-DAG: @_ZGVZN5guard1gEvE1a = linkonce_odr dso_local global
+// STATIC-DAG: define dso_local void @_ZN1CC2Ev(
+// STATIC-DAG: define dso_local void @_ZN1CC1Ev(
+// STATIC-DAG: define linkonce_odr dso_local void @_ZN1C3fooEv(
+
+// NOPLT-DAG: @_ZTV1C = linkonce_odr dso_local unnamed_addr constant
+// NOPLT-DAG: @_ZTS1C = linkonce_odr dso_local constant
+// NOPLT-DAG: @_ZTI1C = linkonce_odr dso_local constant
+// NOPLT-DAG: @_ZZ14useStaticLocalvE3obj = linkonce_odr dso_local global
+// NOPLT-DAG: @_ZGVZN5guard1gEvE1a = linkonce_odr dso_local global
+// NOPLT-DAG: define dso_local void @_ZN1CC2Ev(
+// NOPLT-DAG: define dso_local void @_ZN1CC1Ev(
+// NOPLT-DAG: define linkonce_odr dso_local void @_ZN1C3fooEv(
+
+struct C {
+  C();
+  virtual void foo() {}
+};
+C::C() {}
+
+struct HasVTable {
+  virtual void f();
+};
+inline HasVTable &useStaticLocal() {
+  static HasVTable obj;
+  return obj;
+}
+void useit() {
+  useStaticLocal();
+}
+
+namespace guard {
+int f();
+inline int g() {
+  static int a = f();
+  return a;
+}
+int h() {
+  return g();
+}
+} // namespace guard
+
+
+// STATIC-DAG: define available_externally dso_local void @_ZN5test23barIcEC1Ev(
+// NOPLT-DAG: define available_externally void @_ZN5test23barIcEC1Ev(
+namespace test2 {
+void foo();
+template <typename T>
+struct bar {
+  virtual void zed();
+  bar() { foo(); }
+};
+extern template class bar<char>;
+bar<char> abc;
+} // namespace test2
diff --git a/test/CodeGenCXX/duplicate-mangled-name.cpp b/test/CodeGenCXX/duplicate-mangled-name.cpp
index 8c8f6e0..cebba02 100644
--- a/test/CodeGenCXX/duplicate-mangled-name.cpp
+++ b/test/CodeGenCXX/duplicate-mangled-name.cpp
@@ -34,7 +34,7 @@
 namespace nm {
   float abc = 2;
 }
-// CHECK: @_ZN2nm3abcE = global float
+// CHECK: @_ZN2nm3abcE = {{(dso_local )?}}global float
 
 float foo() {
   _ZN1TD1Ev();
diff --git a/test/CodeGenCXX/eh.cpp b/test/CodeGenCXX/eh.cpp
index dfbe48c..d393ee1 100644
--- a/test/CodeGenCXX/eh.cpp
+++ b/test/CodeGenCXX/eh.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -triple x86_64-apple-darwin -std=c++11 -emit-llvm %s -o %t.ll
-// RUN: FileCheck --input-file=%t.ll %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -triple x86_64-apple-darwin -std=c++11 -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
 
 struct test1_D {
   double d;
@@ -13,7 +13,7 @@
 // CHECK:       [[EXNOBJ:%.*]] = call i8* @__cxa_allocate_exception(i64 8)
 // CHECK-NEXT:  [[EXN:%.*]] = bitcast i8* [[EXNOBJ]] to [[DSTAR:%[^*]*\*]]
 // CHECK-NEXT:  [[EXN2:%.*]] = bitcast [[DSTAR]] [[EXN]] to i8*
-// CHECK-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[EXN2]], i8* bitcast ([[DSTAR]] @d1 to i8*), i64 8, i32 8, i1 false)
+// CHECK-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[EXN2]], i8* align 8 bitcast ([[DSTAR]] @d1 to i8*), i64 8, i1 false)
 // CHECK-NEXT:  call void @__cxa_throw(i8* [[EXNOBJ]], i8* bitcast ({ i8*, i8* }* @_ZTI7test1_D to i8*), i8* null) [[NR:#[0-9]+]]
 // CHECK-NEXT:  unreachable
 
@@ -466,7 +466,7 @@
   // CHECK: [[T0:%.*]] = call i8* @__cxa_allocate_exception(i64 16)
   // CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to %"class.test17::DerivedException"*
   // CHECK-NEXT: [[T2:%.*]] = bitcast %"class.test17::DerivedException"* [[T1]] to i8*
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T2]], i8 0, i64 16, i32 16, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T2]], i8 0, i64 16, i1 false)
 }
 }
 
diff --git a/test/CodeGenCXX/exceptions-cxx-ehsc.cpp b/test/CodeGenCXX/exceptions-cxx-ehsc.cpp
index c660d14..e98542e 100644
--- a/test/CodeGenCXX/exceptions-cxx-ehsc.cpp
+++ b/test/CodeGenCXX/exceptions-cxx-ehsc.cpp
@@ -11,7 +11,7 @@
   may_throw();
 }
 }
-// CHECK-LABEL: define void @"\01?caller@test1@@YAXXZ"(
+// CHECK-LABEL: define dso_local void @"\01?caller@test1@@YAXXZ"(
 // CHECK: call void @never_throws(
 // CHECK: invoke void @"\01?may_throw@test1@@YAXXZ"(
 
@@ -26,6 +26,6 @@
   may_throw();
 }
 }
-// CHECK-LABEL: define void @"\01?caller@test2@@YAXXZ"(
+// CHECK-LABEL: define dso_local void @"\01?caller@test2@@YAXXZ"(
 // CHECK: invoke void @throws_int(
 // CHECK: invoke void @"\01?may_throw@test2@@YAXXZ"(
diff --git a/test/CodeGenCXX/exceptions-cxx-new.cpp b/test/CodeGenCXX/exceptions-cxx-new.cpp
index 9836da8..3a8ce7d 100644
--- a/test/CodeGenCXX/exceptions-cxx-new.cpp
+++ b/test/CodeGenCXX/exceptions-cxx-new.cpp
@@ -12,7 +12,7 @@
   }
 }
 
-// CHECK-LABEL: define void @"\01?test_catch@@YAXXZ"(
+// CHECK-LABEL: define dso_local void @"\01?test_catch@@YAXXZ"(
 // CHECK:   invoke i32 @"\01?f@@YAHH@Z"(i32 1)
 // CHECK:         to label %[[NORMAL:.*]] unwind label %[[CATCHSWITCH:.*]]
 
@@ -50,7 +50,7 @@
   f(1);
 }
 
-// CHECK-LABEL: define {{.*}} @"\01?test_cleanup@@YAXXZ"(
+// CHECK-LABEL: define dso_local {{.*}} @"\01?test_cleanup@@YAXXZ"(
 // CHECK:   invoke i32 @"\01?f@@YAHH@Z"(i32 1)
 // CHECK:           to label %[[LEAVE_FUNC:.*]] unwind label %[[CLEANUP:.*]]
 
diff --git a/test/CodeGenCXX/exceptions-seh-filter-captures.cpp b/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
index ab75a87..1f4749f 100644
--- a/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
+++ b/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
@@ -14,7 +14,7 @@
   }
 }
 
-// CHECK-LABEL: define void @test_freefunc(i32 %p1)
+// CHECK-LABEL: define dso_local void @test_freefunc(i32 %p1)
 // CHECK: @llvm.localescape(i32* %[[p1_ptr:[^, ]*]], i32* %[[l1_ptr:[^, ]*]])
 // CHECK: store i32 %p1, i32* %[[p1_ptr]], align 4
 // CHECK: store i32 13, i32* %[[l1_ptr]], align 4
@@ -45,7 +45,7 @@
   }
 }
 
-// CHECK-LABEL: define void @"\01?test_method@S@@QEAAXXZ"(%struct.S* %this)
+// CHECK-LABEL: define dso_local void @"\01?test_method@S@@QEAAXXZ"(%struct.S* %this)
 // CHECK: @llvm.localescape(i32* %[[l1_addr:[^, ]*]])
 // CHECK: store i32 13, i32* %[[l1_addr]], align 4
 // CHECK: invoke void @might_crash()
diff --git a/test/CodeGenCXX/exceptions-seh.cpp b/test/CodeGenCXX/exceptions-seh.cpp
index fa2d834..84159eb 100644
--- a/test/CodeGenCXX/exceptions-seh.cpp
+++ b/test/CodeGenCXX/exceptions-seh.cpp
@@ -21,7 +21,7 @@
 
 // Make sure we use __CxxFrameHandler3 for C++ EH.
 
-// CXXEH-LABEL: define void @use_cxx()
+// CXXEH-LABEL: define dso_local void @use_cxx()
 // CXXEH-SAME:  personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 // CXXEH: call %struct.HasCleanup* @"\01??0HasCleanup@@QEAA@XZ"(%struct.HasCleanup* %{{.*}})
 // CXXEH: invoke void @might_throw()
@@ -36,7 +36,7 @@
 // CXXEH: call void @"\01??1HasCleanup@@QEAA@XZ"(%struct.HasCleanup* %{{.*}})
 // CXXEH: cleanupret
 
-// NOCXX-LABEL: define void @use_cxx()
+// NOCXX-LABEL: define dso_local void @use_cxx()
 // NOCXX-NOT: invoke
 // NOCXX: call %struct.HasCleanup* @"\01??0HasCleanup@@QEAA@XZ"(%struct.HasCleanup* %{{.*}})
 // NOCXX-NOT: invoke
@@ -55,7 +55,7 @@
 
 // Make sure we use __C_specific_handler for SEH.
 
-// CHECK-LABEL: define void @use_seh()
+// CHECK-LABEL: define dso_local void @use_seh()
 // CHECK-SAME:  personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // CHECK: invoke void @might_throw() #[[NOINLINE:[0-9]+]]
 // CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
@@ -87,7 +87,7 @@
   }
 }
 
-// CHECK-LABEL: define void @nested_finally() #{{[0-9]+}}
+// CHECK-LABEL: define dso_local void @nested_finally() #{{[0-9]+}}
 // CHECK-SAME:  personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // CHECK: invoke void @might_throw()
 // CHECK: call void @"\01?fin$0@0@nested_finally@@"(i8 1, i8* {{.*}})
@@ -108,11 +108,11 @@
   might_throw();
 }
 
-// CXXEH-LABEL: define void @"\01?use_seh_in_lambda@@YAXXZ"()
+// CXXEH-LABEL: define dso_local void @"\01?use_seh_in_lambda@@YAXXZ"()
 // CXXEH-SAME:  personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 // CXXEH: cleanuppad
 
-// NOCXX-LABEL: define void @"\01?use_seh_in_lambda@@YAXXZ"()
+// NOCXX-LABEL: define dso_local void @"\01?use_seh_in_lambda@@YAXXZ"()
 // NOCXX-NOT: invoke
 // NOCXX: ret void
 
@@ -139,7 +139,7 @@
   use_seh_in_inline_func();
 }
 
-// CHECK-LABEL: define linkonce_odr void @use_seh_in_inline_func() #{{[0-9]+}}
+// CHECK-LABEL: define linkonce_odr dso_local void @use_seh_in_inline_func() #{{[0-9]+}}
 // CHECK-SAME:  personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // CHECK: invoke void @might_throw()
 //
diff --git a/test/CodeGenCXX/explicit-instantiation.cpp b/test/CodeGenCXX/explicit-instantiation.cpp
index 85857fb..a7fba38 100644
--- a/test/CodeGenCXX/explicit-instantiation.cpp
+++ b/test/CodeGenCXX/explicit-instantiation.cpp
@@ -119,14 +119,14 @@
   // definition of Inner.
   template struct Outer<int>;
   // CHECK: define weak_odr void @_ZN13NestedClasses5OuterIiE5Inner1fEv
-  // CHECK-MS: define weak_odr x86_thiscallcc void @"\01?f@Inner@?$Outer@H@NestedClasses@@QAEXXZ"
+  // CHECK-MS: define weak_odr dso_local x86_thiscallcc void @"\01?f@Inner@?$Outer@H@NestedClasses@@QAEXXZ"
 
   // Explicit instantiation declaration of Outer causes explicit instantiation
   // declaration of Inner, but not in MSVC mode.
   extern template struct Outer<char>;
   auto use = &Outer<char>::Inner::f;
   // CHECK: {{declare|define available_externally}} void @_ZN13NestedClasses5OuterIcE5Inner1fEv
-  // CHECK-MS: define linkonce_odr x86_thiscallcc void @"\01?f@Inner@?$Outer@D@NestedClasses@@QAEXXZ"
+  // CHECK-MS: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@Inner@?$Outer@D@NestedClasses@@QAEXXZ"
 }
 
 // Check that we emit definitions from explicit instantiations even when they
@@ -170,3 +170,22 @@
   f<int>();
 }
 }
+
+namespace DefaultedMembers {
+  struct B { B(); B(const B&); ~B(); };
+  template<typename T> struct A : B {
+    A() = default;
+    ~A() = default;
+  };
+  extern template struct A<int>;
+
+  // CHECK-LABEL: define {{.*}} @_ZN16DefaultedMembers1AIiEC2Ev
+  // CHECK-LABEL: define {{.*}} @_ZN16DefaultedMembers1AIiED2Ev
+  A<int> ai;
+
+  // CHECK-LABEL: define {{.*}} @_ZN16DefaultedMembers1AIiEC2ERKS1_
+  A<int> ai2(ai);
+
+  // CHECK-NOT: @_ZN16DefaultedMembers1AIcE
+  template struct A<char>;
+}
diff --git a/test/CodeGenCXX/extern-c.cpp b/test/CodeGenCXX/extern-c.cpp
index 1046915..b80d325 100644
--- a/test/CodeGenCXX/extern-c.cpp
+++ b/test/CodeGenCXX/extern-c.cpp
@@ -7,7 +7,7 @@
 // CHECK-NOT: @_ZN3foo1bE = global
 extern int b;
 
-// CHECK: @_ZN3foo1cE = global
+// CHECK: @_ZN3foo1cE = {{(dso_local )?}}global
 int c = 5;
 
 // CHECK-NOT: @_ZN3foo1dE
@@ -16,21 +16,21 @@
 // CHECK-NOT: should_not_appear
 extern "C++" int should_not_appear;
 
-// CHECK: @_ZN3foo10extern_cxxE = global
+// CHECK: @_ZN3foo10extern_cxxE = {{(dso_local )?}}global
 extern "C++" int extern_cxx = 0;
 
 }
 
-// CHECK-NOT: @global_a = global
+// CHECK-NOT: @global_a = {{(dso_local )?}}global
 extern "C" int global_a;
 
-// CHECK: @global_b = global
+// CHECK: @global_b = {{(dso_local )?}}global
 extern "C" int global_b = 0;
 
 // CHECK-NOT: should_not_appear
 extern "C++" int should_not_appear;
 
-// CHECK: @extern_cxx = global
+// CHECK: @extern_cxx = {{(dso_local )?}}global
 extern "C++" int extern_cxx = 0;
 
 namespace test1 {
@@ -38,11 +38,11 @@
     struct X {};
   }
   extern "C" {
-    // CHECK: @test1_b = global
+    // CHECK: @test1_b = {{(dso_local )?}}global
     X test1_b = X();
   }
   void *use = &test1_b;
-  // CHECK: @_ZN5test13useE = global
+  // CHECK: @_ZN5test13useE = {{(dso_local )?}}global
 }
 
 namespace test2 {
@@ -50,7 +50,7 @@
     struct X {};
   }
 
-  // CHECK: @test2_b = global
+  // CHECK: @test2_b = {{(dso_local )?}}global
   extern "C" X test2_b;
   X test2_b;
 }
diff --git a/test/CodeGenCXX/float16-declarations.cpp b/test/CodeGenCXX/float16-declarations.cpp
index b97f9aa..e82c05e 100644
--- a/test/CodeGenCXX/float16-declarations.cpp
+++ b/test/CodeGenCXX/float16-declarations.cpp
@@ -11,16 +11,14 @@
 // CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2
 
   _Float16 f2n = 33.f16;
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
 
   _Float16 arr1n[10];
 // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2
 // CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16
 
   _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
 
   const volatile _Float16 func1n(const _Float16 &arg) {
     return arg + f2n + arr1n[4] - arr2n[1];
@@ -31,20 +29,18 @@
 /* File */
 
 _Float16 f1f;
-// CHECK-AARCH64-DAG: @f1f = global half 0xH0000, align 2
-// CHECK-X86-DAG: @f1f = global half 0xH0000, align 2
+// CHECK-AARCH64-DAG: @f1f = dso_local global half 0xH0000, align 2
+// CHECK-X86-DAG: @f1f = dso_local global half 0xH0000, align 2
 
 _Float16 f2f = 32.4;
-// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2
-// CHECK-X86-DAG: @f2f = global i16 20493, align 2
+// CHECK-DAG: @f2f = dso_local global half 0xH500D, align 2
 
 _Float16 arr1f[10];
-// CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2
-// CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16
+// CHECK-AARCH64-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 2
+// CHECK-X86-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 16
 
 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
-// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
-// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2
+// CHECK-DAG: @arr2f = dso_local global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
 
 _Float16 func1f(_Float16 arg);
 
@@ -55,24 +51,24 @@
   _Float16 f1c;
 
   static const _Float16 f2c;
-// CHECK-DAG: @_ZN2C13f2cE = external constant half, align 2
+// CHECK-DAG: @_ZN2C13f2cE = external dso_local constant half, align 2
 
   volatile _Float16 f3c;
 
 public:
   C1(_Float16 arg) : f1c(arg), f3c(arg) { }
 // Check that we mangle _Float16 to DF16_
-// CHECK-DAG: define linkonce_odr void @_ZN2C1C2EDF16_(%class.C1*{{.*}}, half{{.*}})
+// CHECK-DAG: define linkonce_odr dso_local void @_ZN2C1C2EDF16_(%class.C1*{{.*}}, half{{.*}})
 
   _Float16 func1c(_Float16 arg ) {
     return f1c + arg;
   }
-// CHECK-DAG: define linkonce_odr half @_ZN2C16func1cEDF16_(%class.C1*{{.*}}, half{{.*}})
+// CHECK-DAG: define linkonce_odr dso_local half @_ZN2C16func1cEDF16_(%class.C1*{{.*}}, half{{.*}})
 
   static _Float16 func2c(_Float16 arg) {
     return arg * C1::f2c;
   }
-// CHECK-DAG: define linkonce_odr half @_ZN2C16func2cEDF16_(half{{.*}})
+// CHECK-DAG: define linkonce_odr dso_local half @_ZN2C16func2cEDF16_(half{{.*}})
 };
 
 /*  Template */
@@ -80,7 +76,7 @@
 template <class C> C func1t(C arg) {
   return arg * 2.f16;
 }
-// CHECK-DAG: define linkonce_odr half @_Z6func1tIDF16_ET_S0_(half{{.*}})
+// CHECK-DAG: define linkonce_odr dso_local half @_Z6func1tIDF16_ET_S0_(half{{.*}})
 
 template <class C> struct S1 {
   C mem1;
@@ -110,11 +106,9 @@
 // CHECK-DAG:  call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}})
 
   S1<_Float16> s1 = { 132.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
-// CHECK-X86-DAG:     @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2
+// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
 // CHECK-DAG:  [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8*
-// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
-// CHECK-X86-DAG:     call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
+// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[S1]], i8* align 2 bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i1 false)
 
   _Float16 f4l = func1n(f1l)  + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
     func1t(f1l) + s1.mem2 - f1n + f2n;
@@ -129,8 +123,7 @@
 // CHECK-DAG:  store half [[INC]], half* %{{.*}}, align 2
 
   _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
-// CHECK-X86-DAG:     @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2
+// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
 
   float cvtf = f2n;
 //CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float
diff --git a/test/CodeGenCXX/fp16-mangle.cpp b/test/CodeGenCXX/fp16-mangle.cpp
index bd5a319..5827fd5 100644
--- a/test/CodeGenCXX/fp16-mangle.cpp
+++ b/test/CodeGenCXX/fp16-mangle.cpp
@@ -4,9 +4,9 @@
 template <typename T, typename U> struct S { static int i; };
 template <> int S<__fp16, __fp16>::i = 3;
 
-// CHECK-LABEL: define void @_Z1fPDh(i16* %x)
+// CHECK-LABEL: define void @_Z1fPDh(half* %x)
 void f (__fp16 *x) { }
 
-// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y)
+// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y)
 void g (__fp16 *x, __fp16 *y) { }
 
diff --git a/test/CodeGenCXX/global-llvm-constant.cpp b/test/CodeGenCXX/global-llvm-constant.cpp
index 55933ee..877683e 100644
--- a/test/CodeGenCXX/global-llvm-constant.cpp
+++ b/test/CodeGenCXX/global-llvm-constant.cpp
@@ -15,7 +15,7 @@
 
 int add(int x, int y) { return x + y; }
 
-// CHECK: @x2 = constant
+// CHECK: @x2 = {{(dso_local )?}}constant
 extern const X x2;
 const X x2 = { &add };
 
@@ -27,6 +27,6 @@
   X1 array[3];
 };
 
-// CHECK: @x2b = global
+// CHECK: @x2b = {{(dso_local )?}}global
 extern const X2 x2b;
 const X2 x2b = { { { 1 }, { 2 }, { 3 } } };
diff --git a/test/CodeGenCXX/hidden-dllimport.cpp b/test/CodeGenCXX/hidden-dllimport.cpp
new file mode 100644
index 0000000..9de0d71
--- /dev/null
+++ b/test/CodeGenCXX/hidden-dllimport.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm -fvisibility-inlines-hidden -o - %s | FileCheck %s
+
+// We used to declare this hidden dllimport, which is contradictory.
+
+// CHECK: declare dllimport void @"\01?bar@foo@@QEAAXXZ"(%struct.foo*)
+
+struct __attribute__((dllimport)) foo {
+  void bar() {}
+};
+void zed(foo *p) { p->bar(); }
diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp
index 1338b25..05fb7f1 100644
--- a/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -41,13 +41,13 @@
 // PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
 // ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
-// X64: define x86_vectorcallcc void @"\01_Z7func_D12D1@@24"(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
+// X64: define dso_local x86_vectorcallcc void @"\01_Z7func_D12D1@@24"(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
 D1 CC func_D1(D1 x) { return x; }
 
 // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
 // ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
-// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce)
+// X64: define dso_local x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce)
 D2 CC func_D2(D2 x) { return x; }
 
 // PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
@@ -92,7 +92,7 @@
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
 // FIXME: MSVC doesn't consider this an HVA because of the empty base.
-// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(%struct.HVAWithEmptyBase inreg %a.coerce)
+// X64: define dso_local x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(%struct.HVAWithEmptyBase inreg %a.coerce)
 
 struct HVAWithEmptyBitField : Float1, Float2 {
   int : 0; // Takes no space.
@@ -102,5 +102,5 @@
 // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
 // ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
-// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce)
+// X64: define dso_local x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce)
 void CC with_empty_bitfield(HVAWithEmptyBitField a) {}
diff --git a/test/CodeGenCXX/initializer-list-ctor-order.cpp b/test/CodeGenCXX/initializer-list-ctor-order.cpp
index 390fe4f..70f264c 100644
--- a/test/CodeGenCXX/initializer-list-ctor-order.cpp
+++ b/test/CodeGenCXX/initializer-list-ctor-order.cpp
@@ -15,7 +15,7 @@
   A a{f(), g()};
 }
 // CHECK-ITANIUM-LABEL: define void @_Z3foov
-// CHECK-MS-LABEL: define void @"\01?foo@@YAXXZ"
+// CHECK-MS-LABEL: define dso_local void @"\01?foo@@YAXXZ"
 // CHECK: call i32 @f()
 // CHECK: call i32 @g()
 
@@ -24,6 +24,6 @@
 };
 B::B() : A{f(), g()} {}
 // CHECK-ITANIUM-LABEL: define void @_ZN1BC2Ev
-// CHECK-MS-LABEL: define x86_thiscallcc %struct.B* @"\01??0B@@QAE@XZ"
+// CHECK-MS-LABEL: define dso_local x86_thiscallcc %struct.B* @"\01??0B@@QAE@XZ"
 // CHECK: call i32 @f()
 // CHECK: call i32 @g()
diff --git a/test/CodeGenCXX/inline-dllexport-member.cpp b/test/CodeGenCXX/inline-dllexport-member.cpp
index a98f560..ee0e8ce 100644
--- a/test/CodeGenCXX/inline-dllexport-member.cpp
+++ b/test/CodeGenCXX/inline-dllexport-member.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple i686-windows-win32 -fms-extensions -debug-info-kind=limited -emit-llvm %s -o - \
 // RUN:    | FileCheck %s
 
-// CHECK: @"\01?ui@s@@2IB" = weak_odr dllexport constant i32 0, comdat, align 4, !dbg [[UI:![0-9]+]]
+// CHECK: @"\01?ui@s@@2IB" = weak_odr dso_local dllexport constant i32 0, comdat, align 4, !dbg [[UI:![0-9]+]]
 
 struct __declspec(dllexport) s {
   static const unsigned int ui = 0;
diff --git a/test/CodeGenCXX/inline-functions.cpp b/test/CodeGenCXX/inline-functions.cpp
index f1169f9..9fd8d14 100644
--- a/test/CodeGenCXX/inline-functions.cpp
+++ b/test/CodeGenCXX/inline-functions.cpp
@@ -23,14 +23,14 @@
 // We need a final CHECK line here.
 
 // NORMAL-LABEL: define void @_Z1fv
-// MSVCCOMPAT-LABEL: define void @"\01?f@@YAXXZ"
+// MSVCCOMPAT-LABEL: define dso_local void @"\01?f@@YAXXZ"
 void f() { }
 
 // <rdar://problem/8740363>
 inline void f1(int);
 
 // NORMAL-LABEL: define linkonce_odr void @_Z2f1i
-// MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?f1@@YAXH@Z"
+// MSVCCOMPAT-LABEL: define linkonce_odr dso_local void @"\01?f1@@YAXH@Z"
 void f1(int) { }
 
 void test_f1() { f1(17); }
@@ -44,7 +44,7 @@
   };
 
   // NORMAL-LABEL: define linkonce_odr void @_ZN5test11C4funcEv(
-  // MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?func@C@test1@@QEAAXXZ"(
+  // MSVCCOMPAT-LABEL: define linkonce_odr dso_local void @"\01?func@C@test1@@QEAAXXZ"(
 
   class C {
   public:
@@ -72,36 +72,36 @@
     f(a);
   }
   // NORMAL-LABEL: define linkonce_odr void @_ZN5test21fERKNS_1AE
-  // MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?f@test2@@YAXAEBUA@1@@Z"
+  // MSVCCOMPAT-LABEL: define linkonce_odr dso_local void @"\01?f@test2@@YAXAEBUA@1@@Z"
 }
 
 // NORMAL-NOT: _Z17ExternAndInlineFnv
-// MSVCCOMPAT-LABEL: define weak_odr void @"\01?ExternAndInlineFn@@YAXXZ"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local void @"\01?ExternAndInlineFn@@YAXXZ"
 extern inline void ExternAndInlineFn() {}
 
 // NORMAL-NOT: _Z18InlineThenExternFnv
-// MSVCCOMPAT-LABEL: define weak_odr void @"\01?InlineThenExternFn@@YAXXZ"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local void @"\01?InlineThenExternFn@@YAXXZ"
 inline void InlineThenExternFn() {}
 extern void InlineThenExternFn();
 
 // NORMAL-LABEL: define void @_Z18ExternThenInlineFnv
-// MSVCCOMPAT-LABEL: define void @"\01?ExternThenInlineFn@@YAXXZ"
+// MSVCCOMPAT-LABEL: define dso_local void @"\01?ExternThenInlineFn@@YAXXZ"
 extern void ExternThenInlineFn() {}
 
 // NORMAL-NOT: _Z25ExternThenInlineThenDefFnv
-// MSVCCOMPAT-LABEL: define weak_odr void @"\01?ExternThenInlineThenDefFn@@YAXXZ"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local void @"\01?ExternThenInlineThenDefFn@@YAXXZ"
 extern void ExternThenInlineThenDefFn();
 inline void ExternThenInlineThenDefFn();
 void ExternThenInlineThenDefFn() {}
 
 // NORMAL-NOT: _Z25InlineThenExternThenDefFnv
-// MSVCCOMPAT-LABEL: define weak_odr void @"\01?InlineThenExternThenDefFn@@YAXXZ"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local void @"\01?InlineThenExternThenDefFn@@YAXXZ"
 inline void InlineThenExternThenDefFn();
 extern void InlineThenExternThenDefFn();
 void InlineThenExternThenDefFn() {}
 
 // NORMAL-NOT: _Z17ExternAndConstexprFnv
-// MSVCCOMPAT-LABEL: define weak_odr i32 @"\01?ExternAndConstexprFn@@YAHXZ"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local i32 @"\01?ExternAndConstexprFn@@YAHXZ"
 extern constexpr int ExternAndConstexprFn() { return 0; }
 
 // NORMAL-NOT: _Z11ConstexprFnv
@@ -112,7 +112,7 @@
 extern inline void ExternInlineOnPrimaryTemplate(T);
 
 // NORMAL-LABEL: define void @_Z29ExternInlineOnPrimaryTemplateIiEvT_
-// MSVCCOMPAT-LABEL: define void @"\01??$ExternInlineOnPrimaryTemplate@H@@YAXH@Z"
+// MSVCCOMPAT-LABEL: define dso_local void @"\01??$ExternInlineOnPrimaryTemplate@H@@YAXH@Z"
 template <>
 void ExternInlineOnPrimaryTemplate(int) {}
 
@@ -120,7 +120,7 @@
 extern inline void ExternInlineOnPrimaryTemplateAndSpecialization(T);
 
 // NORMAL-NOT: _Z46ExternInlineOnPrimaryTemplateAndSpecializationIiEvT_
-// MSVCCOMPAT-LABEL: define weak_odr void @"\01??$ExternInlineOnPrimaryTemplateAndSpecialization@H@@YAXH@Z"
+// MSVCCOMPAT-LABEL: define weak_odr dso_local void @"\01??$ExternInlineOnPrimaryTemplateAndSpecialization@H@@YAXH@Z"
 template <>
 extern inline void ExternInlineOnPrimaryTemplateAndSpecialization(int) {}
 
@@ -146,5 +146,5 @@
 
 __attribute__((used)) inline S<int> Foo() { return S<int>(); }
 // NORMAL-LABEL: define linkonce_odr void @_ZN7PR229593FooEv(
-// MSVCCOMPAT-LABEL: define linkonce_odr i8 @"\01?Foo@PR22959@@YA?AU?$S@H@1@XZ"(
+// MSVCCOMPAT-LABEL: define linkonce_odr dso_local i8 @"\01?Foo@PR22959@@YA?AU?$S@H@1@XZ"(
 }
diff --git a/test/CodeGenCXX/instrument-functions.cpp b/test/CodeGenCXX/instrument-functions.cpp
index 587b638..45ae482 100644
--- a/test/CodeGenCXX/instrument-functions.cpp
+++ b/test/CodeGenCXX/instrument-functions.cpp
@@ -1,22 +1,26 @@
-// RUN: %clang_cc1 -S -emit-llvm -triple %itanium_abi_triple -o - %s -finstrument-functions | FileCheck %s
+// RUN: %clang_cc1 -S -emit-llvm -triple %itanium_abi_triple -o - %s -finstrument-functions -disable-llvm-passes | FileCheck %s
 
-// CHECK: @_Z5test1i
 int test1(int x) {
-// CHECK: __cyg_profile_func_enter
-// CHECK: __cyg_profile_func_exit
+// CHECK: @_Z5test1i(i32 {{.*}}%x) #[[ATTR1:[0-9]+]]
 // CHECK: ret
   return x;
 }
 
-// CHECK: @_Z5test2i
 int test2(int) __attribute__((no_instrument_function));
 int test2(int x) {
-// CHECK-NOT: __cyg_profile_func_enter
-// CHECK-NOT: __cyg_profile_func_exit
+// CHECK: @_Z5test2i(i32 {{.*}}%x) #[[ATTR2:[0-9]+]]
 // CHECK: ret
   return x;
 }
 
+// CHECK: attributes #[[ATTR1]] =
+// CHECK-SAME: "instrument-function-entry"="__cyg_profile_func_enter"
+// CHECK-SAME: "instrument-function-exit"="__cyg_profile_func_exit"
+
+// CHECK: attributes #[[ATTR2]] =
+// CHECK-NOT: "instrument-function-entry"
+
+
 // This test case previously crashed code generation.  It exists solely
 // to test -finstrument-function does not crash codegen for this trivial
 // case.
diff --git a/test/CodeGenCXX/internal-linkage.cpp b/test/CodeGenCXX/internal-linkage.cpp
index 77b1670..f3c0ad1 100644
--- a/test/CodeGenCXX/internal-linkage.cpp
+++ b/test/CodeGenCXX/internal-linkage.cpp
@@ -51,7 +51,7 @@
 
 char const * *test4()
 {
-    // CHECK: @extern_nonconst_xyzzy = global
+    // CHECK: @extern_nonconst_xyzzy = {{(dso_local )?}}global
     return &extern_nonconst_xyzzy;
 }
 
diff --git a/test/CodeGenCXX/invariant.group-for-vptrs.cpp b/test/CodeGenCXX/invariant.group-for-vptrs.cpp
index 4e5d9b4..45671e5 100644
--- a/test/CodeGenCXX/invariant.group-for-vptrs.cpp
+++ b/test/CodeGenCXX/invariant.group-for-vptrs.cpp
@@ -56,7 +56,7 @@
 
 // Checking D::D()
 // CHECK-LABEL: define linkonce_odr void @_ZN1DC2Ev(
-// CHECK:  = call i8* @llvm.invariant.group.barrier(i8*
+// CHECK:  = call i8* @llvm.invariant.group.barrier.p0i8(i8*
 // CHECK:  call void @_ZN1AC2Ev(%struct.A*
 // CHECK: store {{.*}} !invariant.group ![[MD]]
 
diff --git a/test/CodeGenCXX/mangle-abi-tag.cpp b/test/CodeGenCXX/mangle-abi-tag.cpp
index a653ff4..5d84096 100644
--- a/test/CodeGenCXX/mangle-abi-tag.cpp
+++ b/test/CodeGenCXX/mangle-abi-tag.cpp
@@ -145,7 +145,7 @@
   f13();
 }
 // f13()::L::foo[abi:C][abi:D]()
-// CHECK-DAG: define linkonce_odr %struct.E* @_ZZ3f13vEN1L3fooB1CB1DEv(
+// CHECK-DAG: define linkonce_odr {{(dso_local )?}}%struct.E* @_ZZ3f13vEN1L3fooB1CB1DEv(
 
 // f13()::L::foo[abi:C][abi:D]()::a[abi:A][abi:B]
 // CHECK-DAG: @_ZZZ3f13vEN1L3fooB1CB1DEvE1aB1AB1B =
@@ -218,7 +218,7 @@
 void f19_test(N19::C<N19::A,  &N19::foo>, N19::F, N19::D) {
 }
 // f19_test(N19::C<N19::A, &N19::foo[abi:B]>, N19::F, N19::D)
-// CHECK-DAG: define void @_Z8f19_testN3N191CINS_1AEXadL_ZNS_3fooB1BES1_NS_1DEEEEENS_1FES2_(
+// CHECK-DAG: define {{(dso_local )?}}void @_Z8f19_testN3N191CINS_1AEXadL_ZNS_3fooB1BES1_NS_1DEEEEENS_1FES2_(
 
 namespace pr30440 {
 
diff --git a/test/CodeGenCXX/mangle-ms-cxx11.cpp b/test/CodeGenCXX/mangle-ms-cxx11.cpp
index b22c046..f79518d 100644
--- a/test/CodeGenCXX/mangle-ms-cxx11.cpp
+++ b/test/CodeGenCXX/mangle-ms-cxx11.cpp
@@ -322,13 +322,13 @@
 
 namespace PR31197 {
 struct A {
-  // CHECK-DAG: define linkonce_odr x86_thiscallcc i32* @"\01??R<lambda_1>@x@A@PR31197@@QBE@XZ"(
+  // CHECK-DAG: define linkonce_odr dso_local x86_thiscallcc i32* @"\01??R<lambda_1>@x@A@PR31197@@QBE@XZ"(
   int *x = []() {
     static int white;
     // CHECK-DAG: @"\01?white@?1???R<lambda_1>@x@A@PR31197@@QBE@XZ@4HA"
     return &white;
   }();
-  // CHECK-DAG: define linkonce_odr x86_thiscallcc i32* @"\01??R<lambda_1>@y@A@PR31197@@QBE@XZ"(
+  // CHECK-DAG: define linkonce_odr dso_local x86_thiscallcc i32* @"\01??R<lambda_1>@y@A@PR31197@@QBE@XZ"(
   int *y = []() {
     static int black;
     // CHECK-DAG: @"\01?black@?1???R<lambda_1>@y@A@PR31197@@QBE@XZ@4HA"
diff --git a/test/CodeGenCXX/mangle-ms-md5.cpp b/test/CodeGenCXX/mangle-ms-md5.cpp
index aef2683..6843c3c 100644
--- a/test/CodeGenCXX/mangle-ms-md5.cpp
+++ b/test/CodeGenCXX/mangle-ms-md5.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck %s
 int xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
-// CHECK-DAG: @"\01??@bf7ea7b95f260b0b24e7f1e8fc8370ab@" = global i32 0, align 4
+// CHECK-DAG: @"\01??@bf7ea7b95f260b0b24e7f1e8fc8370ab@" = dso_local global i32 0, align 4
 
 struct yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy {
   yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy();
diff --git a/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp b/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
index b01d609..d26558b 100644
--- a/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
+++ b/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
@@ -57,4 +57,4 @@
 
 // Test that we mangle in the vbptr offset, which is 12 here.
 //
-// CHECK: define weak_odr x86_thiscallcc %struct.ClassTemplate* @"\01??0?$ClassTemplate@$J??_9MostGeneral@@$BA@AEA@M@3@@QAE@XZ"
+// CHECK: define weak_odr dso_local x86_thiscallcc %struct.ClassTemplate* @"\01??0?$ClassTemplate@$J??_9MostGeneral@@$BA@AEA@M@3@@QAE@XZ"
diff --git a/test/CodeGenCXX/mangle-ms-vector-types.cpp b/test/CodeGenCXX/mangle-ms-vector-types.cpp
index 53a1a43..6974ee1 100644
--- a/test/CodeGenCXX/mangle-ms-vector-types.cpp
+++ b/test/CodeGenCXX/mangle-ms-vector-types.cpp
@@ -5,33 +5,33 @@
 #include <immintrin.h>
 
 void foo64(__m64) {}
-// CHECK: define void @"\01?foo64@@YAXT__m64@@@Z"
+// CHECK: define dso_local void @"\01?foo64@@YAXT__m64@@@Z"
 
 void foo128(__m128) {}
-// CHECK: define void @"\01?foo128@@YAXT__m128@@@Z"
+// CHECK: define dso_local void @"\01?foo128@@YAXT__m128@@@Z"
 
 void foo128d(__m128d) {}
-// CHECK: define void @"\01?foo128d@@YAXU__m128d@@@Z"
+// CHECK: define dso_local void @"\01?foo128d@@YAXU__m128d@@@Z"
 
 void foo128i(__m128i) {}
-// CHECK: define void @"\01?foo128i@@YAXT__m128i@@@Z"
+// CHECK: define dso_local void @"\01?foo128i@@YAXT__m128i@@@Z"
 
 void foo256(__m256) {}
-// CHECK: define void @"\01?foo256@@YAXT__m256@@@Z"
+// CHECK: define dso_local void @"\01?foo256@@YAXT__m256@@@Z"
 
 void foo256d(__m256d) {}
-// CHECK: define void @"\01?foo256d@@YAXU__m256d@@@Z"
+// CHECK: define dso_local void @"\01?foo256d@@YAXU__m256d@@@Z"
 
 void foo256i(__m256i) {}
-// CHECK: define void @"\01?foo256i@@YAXT__m256i@@@Z"
+// CHECK: define dso_local void @"\01?foo256i@@YAXT__m256i@@@Z"
 
 // We have a custom mangling for vector types not standardized by Intel.
 void foov8hi(__v8hi) {}
-// CHECK: define void @"\01?foov8hi@@YAXT?$__vector@F$07@__clang@@@Z"
+// CHECK: define dso_local void @"\01?foov8hi@@YAXT?$__vector@F$07@__clang@@@Z"
 
 typedef __attribute__((ext_vector_type(4))) int vi4b;
 void foovi4b(vi4b) {}
-// CHECK: define void @"\01?foovi4b@@YAXT?$__vector@H$03@__clang@@@Z"
+// CHECK: define dso_local void @"\01?foovi4b@@YAXT?$__vector@H$03@__clang@@@Z"
 
 // Clang does not support vectors of complex types, so we can't test the
 // mangling of them.
diff --git a/test/CodeGenCXX/mangle-ms.cpp b/test/CodeGenCXX/mangle-ms.cpp
index 7e2f17f..77fde7a 100644
--- a/test/CodeGenCXX/mangle-ms.cpp
+++ b/test/CodeGenCXX/mangle-ms.cpp
@@ -27,8 +27,8 @@
 // X64-DAG:   @"\01?_c@@YAHXZ"
 
 const int &NeedsReferenceTemporary = 2;
-// CHECK-DAG: @"\01?NeedsReferenceTemporary@@3ABHB" = constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3ABHB"
-// X64-DAG: @"\01?NeedsReferenceTemporary@@3AEBHEB" = constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3AEBHEB"
+// CHECK-DAG: @"\01?NeedsReferenceTemporary@@3ABHB" = dso_local constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3ABHB"
+// X64-DAG: @"\01?NeedsReferenceTemporary@@3AEBHEB" = dso_local constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3AEBHEB"
 
 class foo {
   static const short d;
@@ -448,22 +448,22 @@
 //   void foo(void *const, __clang::__pass_object_size0);
 // where __clang is a top-level namespace.
 
-// CHECK-DAG: define i32 @"\01?foo@PassObjectSize@@YAHQAHW4__pass_object_size0@__clang@@@Z"
+// CHECK-DAG: define dso_local i32 @"\01?foo@PassObjectSize@@YAHQAHW4__pass_object_size0@__clang@@@Z"
 int foo(int *const i __attribute__((pass_object_size(0)))) { return 0; }
-// CHECK-DAG: define i32 @"\01?bar@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@@Z"
+// CHECK-DAG: define dso_local i32 @"\01?bar@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@@Z"
 int bar(int *const i __attribute__((pass_object_size(1)))) { return 0; }
-// CHECK-DAG: define i32 @"\01?qux@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@0W4__pass_object_size0@3@@Z"
+// CHECK-DAG: define dso_local i32 @"\01?qux@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@0W4__pass_object_size0@3@@Z"
 int qux(int *const i __attribute__((pass_object_size(1))), int *const j __attribute__((pass_object_size(0)))) { return 0; }
-// CHECK-DAG: define i32 @"\01?zot@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@01@Z"
+// CHECK-DAG: define dso_local i32 @"\01?zot@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@01@Z"
 int zot(int *const i __attribute__((pass_object_size(1))), int *const j __attribute__((pass_object_size(1)))) { return 0; }
 }
 
 namespace Atomic {
-// CHECK-DAG: define void @"\01?f@Atomic@@YAXU?$_Atomic@H@__clang@@@Z"(
+// CHECK-DAG: define dso_local void @"\01?f@Atomic@@YAXU?$_Atomic@H@__clang@@@Z"(
 void f(_Atomic(int)) {}
 }
 namespace Complex {
-// CHECK-DAG: define void @"\01?f@Complex@@YAXU?$_Complex@H@__clang@@@Z"(
+// CHECK-DAG: define dso_local void @"\01?f@Complex@@YAXU?$_Complex@H@__clang@@@Z"(
 void f(_Complex int) {}
 }
 
diff --git a/test/CodeGenCXX/mangle-windows.cpp b/test/CodeGenCXX/mangle-windows.cpp
index 8564447..51e4bc8 100644
--- a/test/CodeGenCXX/mangle-windows.cpp
+++ b/test/CodeGenCXX/mangle-windows.cpp
@@ -5,20 +5,20 @@
 // RUN:     FileCheck --check-prefix=ITANIUM %s
 
 void __stdcall f1(void) {}
-// WIN: define x86_stdcallcc void @"\01?f1@@YGXXZ"
-// ITANIUM: define x86_stdcallcc void @"\01__Z2f1v@0"
+// WIN: define dso_local x86_stdcallcc void @"\01?f1@@YGXXZ"
+// ITANIUM: define dso_local x86_stdcallcc void @"\01__Z2f1v@0"
 
 void __fastcall f2(void) {}
-// WIN: define x86_fastcallcc void @"\01?f2@@YIXXZ"
-// ITANIUM: define x86_fastcallcc void @"\01@_Z2f2v@0"
+// WIN: define dso_local x86_fastcallcc void @"\01?f2@@YIXXZ"
+// ITANIUM: define dso_local x86_fastcallcc void @"\01@_Z2f2v@0"
 
 extern "C" void __stdcall f3(void) {}
-// WIN: define x86_stdcallcc void @"\01_f3@0"
-// ITANIUM: define x86_stdcallcc void @"\01_f3@0"
+// WIN: define dso_local x86_stdcallcc void @"\01_f3@0"
+// ITANIUM: define dso_local x86_stdcallcc void @"\01_f3@0"
 
 extern "C" void __fastcall f4(void) {}
-// WIN: define x86_fastcallcc void @"\01@f4@0"
-// ITANIUM: define x86_fastcallcc void @"\01@f4@0"
+// WIN: define dso_local x86_fastcallcc void @"\01@f4@0"
+// ITANIUM: define dso_local x86_fastcallcc void @"\01@f4@0"
 
 struct Foo {
   void __stdcall foo();
@@ -26,17 +26,17 @@
 };
 
 void Foo::foo() {}
-// WIN: define x86_stdcallcc void @"\01?foo@Foo@@QAGXXZ"
-// ITANIUM: define x86_stdcallcc void @"\01__ZN3Foo3fooEv@4"
+// WIN: define dso_local x86_stdcallcc void @"\01?foo@Foo@@QAGXXZ"
+// ITANIUM: define dso_local x86_stdcallcc void @"\01__ZN3Foo3fooEv@4"
 
 void Foo::bar() {}
-// WIN: define x86_stdcallcc void @"\01?bar@Foo@@SGXXZ"
-// ITANIUM: define x86_stdcallcc void @"\01__ZN3Foo3barEv@0"
+// WIN: define dso_local x86_stdcallcc void @"\01?bar@Foo@@SGXXZ"
+// ITANIUM: define dso_local x86_stdcallcc void @"\01__ZN3Foo3barEv@0"
 
 // Mostly a test that we don't crash and that the names start with a \01.
 // gcc on mingw produces __Zpp@4
 // cl produces _++@4
 extern "C" void __stdcall operator++(Foo &x) {
 }
-// WIN: define x86_stdcallcc void @"\01??E@YGXAAUFoo@@@Z"
-// ITANIUM: define x86_stdcallcc void @"\01__ZppR3Foo@4"
+// WIN: define dso_local x86_stdcallcc void @"\01??E@YGXAAUFoo@@@Z"
+// ITANIUM: define dso_local x86_stdcallcc void @"\01__ZppR3Foo@4"
diff --git a/test/CodeGenCXX/member-function-pointer-calls.cpp b/test/CodeGenCXX/member-function-pointer-calls.cpp
index 67417ef..198119b 100644
--- a/test/CodeGenCXX/member-function-pointer-calls.cpp
+++ b/test/CodeGenCXX/member-function-pointer-calls.cpp
@@ -11,7 +11,7 @@
 
 // CHECK-LABEL: define i32 @_Z2g1v()
 // CHECK: ret i32 1
-// MINGW64-LABEL: define i32 @_Z2g1v()
+// MINGW64-LABEL: define dso_local i32 @_Z2g1v()
 // MINGW64: call i32 @_Z1fP1AMS_FivE(%struct.A* %{{.*}}, { i64, i64 }* %{{.*}})
 int g1() {
   A a;
@@ -20,7 +20,7 @@
 
 // CHECK-LABEL: define i32 @_Z2g2v()
 // CHECK: ret i32 2
-// MINGW64-LABEL: define i32 @_Z2g2v()
+// MINGW64-LABEL: define dso_local i32 @_Z2g2v()
 // MINGW64: call i32 @_Z1fP1AMS_FivE(%struct.A* %{{.*}}, { i64, i64 }* %{{.*}})
 int g2() {
   A a;
diff --git a/test/CodeGenCXX/microsoft-abi-arg-order.cpp b/test/CodeGenCXX/microsoft-abi-arg-order.cpp
index 68c141f..84c77bd 100644
--- a/test/CodeGenCXX/microsoft-abi-arg-order.cpp
+++ b/test/CodeGenCXX/microsoft-abi-arg-order.cpp
@@ -13,7 +13,7 @@
 
 // Order of destruction should be left to right.
 //
-// X86-LABEL: define void @"\01?foo@@YAXUA@@00@Z"
+// X86-LABEL: define dso_local void @"\01?foo@@YAXUA@@00@Z"
 // X86:          ([[argmem_ty:<{ %struct.A, %struct.A, %struct.A }>]]* inalloca)
 // X86: %[[a:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %0, i32 0, i32 0
 // X86: %[[b:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %0, i32 0, i32 1
@@ -23,7 +23,7 @@
 // X86: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[c]])
 // X86: ret void
 
-// X64-LABEL: define void @"\01?foo@@YAXUA@@00@Z"
+// X64-LABEL: define dso_local void @"\01?foo@@YAXUA@@00@Z"
 // X64:         (%struct.A* %[[a:[^,]*]], %struct.A* %[[b:[^,]*]], %struct.A* %[[c:[^)]*]])
 // X64: call void @"\01??1A@@QEAA@XZ"(%struct.A* %[[a]])
 // X64: call void @"\01??1A@@QEAA@XZ"(%struct.A* %[[b]])
@@ -38,7 +38,7 @@
 // Order of evaluation should be right to left, and we should clean up the right
 // things as we unwind.
 //
-// X86-LABEL: define void @"\01?call_foo@@YAXXZ"()
+// X86-LABEL: define dso_local void @"\01?call_foo@@YAXXZ"()
 // X86: call i8* @llvm.stacksave()
 // X86: %[[argmem:[^ ]*]] = alloca inalloca [[argmem_ty]]
 // X86: %[[arg3:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 2
@@ -59,7 +59,7 @@
 //   ehcleanup:
 // X86: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[arg3]])
 
-// X64-LABEL: define void @"\01?call_foo@@YAXXZ"()
+// X64-LABEL: define dso_local void @"\01?call_foo@@YAXXZ"()
 // X64: call %struct.A* @"\01??0A@@QEAA@H@Z"(%struct.A* %[[arg3:[^,]*]], i32 3)
 // X64: invoke %struct.A* @"\01??0A@@QEAA@H@Z"(%struct.A* %[[arg2:[^,]*]], i32 2)
 // X64: invoke %struct.A* @"\01??0A@@QEAA@H@Z"(%struct.A* %[[arg1:[^,]*]], i32 1)
diff --git a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
index 146a6c8..349d5a0 100644
--- a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
+++ b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
@@ -5,7 +5,7 @@
 };
 
 void check_array_no_cookies() {
-// CHECK: define void @"\01?check_array_no_cookies@@YAXXZ"() [[NUW:#[0-9]+]]
+// CHECK: define dso_local void @"\01?check_array_no_cookies@@YAXXZ"() [[NUW:#[0-9]+]]
 
 // CHECK: call i8* @"\01??_U@YAPAXI@Z"(i32 42)
   ClassWithoutDtor *array = new ClassWithoutDtor[42];
@@ -62,7 +62,7 @@
 struct S {
   char x[42];
   void operator delete[](void *p, __SIZE_TYPE__);
-  // CHECK-LABEL: define void @"\01?delete_s@PR23990@@YAXPAUS@1@@Z"(
+  // CHECK-LABEL: define dso_local void @"\01?delete_s@PR23990@@YAXPAUS@1@@Z"(
   // CHECK: call void @"\01??_VS@PR23990@@SAXPAXI@Z"(i8* {{.*}}, i32 42)
 };
 void delete_s(S *s) { delete[] s; }
diff --git a/test/CodeGenCXX/microsoft-abi-byval-sret.cpp b/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
index 57ac795..83b4312 100644
--- a/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
+++ b/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
@@ -18,7 +18,7 @@
   return x;
 }
 
-// CHECK-LABEL: define x86_thiscallcc %struct.A* @"\01?foo@B@@QAE?AUA@@U2@@Z"
+// CHECK-LABEL: define dso_local x86_thiscallcc %struct.A* @"\01?foo@B@@QAE?AUA@@U2@@Z"
 // CHECK:       (%struct.B* %this, <{ %struct.A*, %struct.A }>* inalloca)
 // CHECK:   getelementptr inbounds <{ %struct.A*, %struct.A }>, <{ %struct.A*, %struct.A }>* %{{.*}}, i32 0, i32 0
 // CHECK:   load %struct.A*, %struct.A**
@@ -28,7 +28,7 @@
   return x;
 }
 
-// CHECK-LABEL: define %struct.A* @"\01?bar@B@@QAA?AUA@@U2@@Z"
+// CHECK-LABEL: define dso_local %struct.A* @"\01?bar@B@@QAA?AUA@@U2@@Z"
 // CHECK:       (<{ %struct.B*, %struct.A*, %struct.A }>* inalloca)
 // CHECK:   getelementptr inbounds <{ %struct.B*, %struct.A*, %struct.A }>, <{ %struct.B*, %struct.A*, %struct.A }>* %{{.*}}, i32 0, i32 1
 // CHECK:   load %struct.A*, %struct.A**
@@ -38,7 +38,7 @@
   return x;
 }
 
-// CHECK-LABEL: define x86_stdcallcc %struct.A* @"\01?baz@B@@QAG?AUA@@U2@@Z"
+// CHECK-LABEL: define dso_local x86_stdcallcc %struct.A* @"\01?baz@B@@QAG?AUA@@U2@@Z"
 // CHECK:       (<{ %struct.B*, %struct.A*, %struct.A }>* inalloca)
 // CHECK:   getelementptr inbounds <{ %struct.B*, %struct.A*, %struct.A }>, <{ %struct.B*, %struct.A*, %struct.A }>* %{{.*}}, i32 0, i32 1
 // CHECK:   load %struct.A*, %struct.A**
@@ -48,7 +48,7 @@
   return x;
 }
 
-// CHECK-LABEL: define x86_fastcallcc void @"\01?qux@B@@QAI?AUA@@U2@@Z"
+// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01?qux@B@@QAI?AUA@@U2@@Z"
 // CHECK:       (%struct.B* inreg %this, %struct.A* inreg noalias sret %agg.result, <{ %struct.A }>* inalloca)
 // CHECK:   ret void
 
diff --git a/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp b/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
index 8ae85c0..2ddd376 100644
--- a/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
+++ b/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
@@ -14,14 +14,14 @@
 struct C : A, B { C(); virtual void foo(Agg x); };
 C::C() {} // force emission
 
-// CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01?foo@C@byval_thunk@@W3AEXUAgg@2@@Z"
+// CHECK32-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01?foo@C@byval_thunk@@W3AEXUAgg@2@@Z"
 // CHECK32:             (%"struct.byval_thunk::C"* %this, <{ %"struct.byval_thunk::Agg" }>* inalloca)
 // CHECK32:   getelementptr i8, i8* %{{.*}}, i32 -4
 // CHECK32:   musttail call x86_thiscallcc void @"\01?foo@C@byval_thunk@@UAEXUAgg@2@@Z"
 // CHECK32:       (%"struct.byval_thunk::C"* %{{.*}}, <{ %"struct.byval_thunk::Agg" }>* inalloca %0)
 // CHECK32-NEXT: ret void
 
-// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@byval_thunk@@W7EAAXUAgg@2@@Z"
+// CHECK64-LABEL: define linkonce_odr dso_local void @"\01?foo@C@byval_thunk@@W7EAAXUAgg@2@@Z"
 // CHECK64:             (%"struct.byval_thunk::C"* %this, %"struct.byval_thunk::Agg"* %x)
 // CHECK64:   getelementptr i8, i8* %{{.*}}, i32 -8
 // CHECK64:   call void @"\01?foo@C@byval_thunk@@UEAAXUAgg@2@@Z"
@@ -43,7 +43,7 @@
 struct C : A, B { C(); virtual void __stdcall foo(Agg x); };
 C::C() {} // force emission
 
-// CHECK32-LABEL: define linkonce_odr x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@W3AGXUAgg@2@@Z"
+// CHECK32-LABEL: define linkonce_odr dso_local x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@W3AGXUAgg@2@@Z"
 // CHECK32:             (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* inalloca)
 // CHECK32:   %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>, <{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* %0, i32 0, i32 0
 // CHECK32:   load %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::C"** %[[this_slot]]
@@ -53,7 +53,7 @@
 // CHECK32:       (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>*  inalloca %0)
 // CHECK32-NEXT: ret void
 
-// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@stdcall_thunk@@W7EAAXUAgg@2@@Z"
+// CHECK64-LABEL: define linkonce_odr dso_local void @"\01?foo@C@stdcall_thunk@@W7EAAXUAgg@2@@Z"
 // CHECK64:             (%"struct.stdcall_thunk::C"* %this, %"struct.stdcall_thunk::Agg"* %x)
 // CHECK64:   getelementptr i8, i8* %{{.*}}, i32 -8
 // CHECK64:   call void @"\01?foo@C@stdcall_thunk@@UEAAXUAgg@2@@Z"
@@ -75,7 +75,7 @@
 struct C : A, B { C(); virtual Agg __cdecl foo(Agg x); };
 C::C() {} // force emission
 
-// CHECK32-LABEL: define linkonce_odr %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@W3AA?AUAgg@2@U32@@Z"
+// CHECK32-LABEL: define linkonce_odr dso_local %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@W3AA?AUAgg@2@U32@@Z"
 // CHECK32:             (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* inalloca)
 // CHECK32:   %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>, <{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* %0, i32 0, i32 0
 // CHECK32:   load %"struct.sret_thunk::C"*, %"struct.sret_thunk::C"** %[[this_slot]]
@@ -85,7 +85,7 @@
 // CHECK32:       (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>*  inalloca %0)
 // CHECK32-NEXT: ret %"struct.sret_thunk::Agg"* %[[rv]]
 
-// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@sret_thunk@@W7EAA?AUAgg@2@U32@@Z"
+// CHECK64-LABEL: define linkonce_odr dso_local void @"\01?foo@C@sret_thunk@@W7EAA?AUAgg@2@U32@@Z"
 // CHECK64:             (%"struct.sret_thunk::C"* %this, %"struct.sret_thunk::Agg"* noalias sret %agg.result, %"struct.sret_thunk::Agg"* %x)
 // CHECK64:   getelementptr i8, i8* %{{.*}}, i32 -8
 // CHECK64:   call void @"\01?foo@C@sret_thunk@@UEAA?AUAgg@2@U32@@Z"
diff --git a/test/CodeGenCXX/microsoft-abi-byval-vararg.cpp b/test/CodeGenCXX/microsoft-abi-byval-vararg.cpp
index 6a0a860..9b3b144 100644
--- a/test/CodeGenCXX/microsoft-abi-byval-vararg.cpp
+++ b/test/CodeGenCXX/microsoft-abi-byval-vararg.cpp
@@ -19,12 +19,12 @@
   return sum;
 }
 
-// CHECK-LABEL: define i32 @"\01?foo@@YAHUA@@ZZ"(<{ %struct.A }>* inalloca, ...)
+// CHECK-LABEL: define dso_local i32 @"\01?foo@@YAHUA@@ZZ"(<{ %struct.A }>* inalloca, ...)
 
 int main() {
   return foo(A(3), 1, 2, 3);
 }
-// CHECK-LABEL: define i32 @main()
+// CHECK-LABEL: define dso_local i32 @main()
 // CHECK: %[[argmem:[^ ]*]] = alloca inalloca <{ %struct.A, i32, i32, i32 }>
 // CHECK: call i32 {{.*bitcast.*}}@"\01?foo@@YAHUA@@ZZ"{{.*}}(<{ %struct.A, i32, i32, i32 }>* inalloca %[[argmem]])
 
@@ -40,13 +40,13 @@
   varargs_three(1, 2, 3, x);
 }
 
-// CHECK-LABEL: define void @"\01?call_var_args@@YAXXZ"()
+// CHECK-LABEL: define dso_local void @"\01?call_var_args@@YAXXZ"()
 // CHECK: call void {{.*bitcast.*varargs_zero.*}}(<{ %struct.A }>* inalloca %{{.*}})
 // CHECK: call void {{.*bitcast.*varargs_one.*}}(<{ i32, %struct.A }>* inalloca %{{.*}})
 // CHECK: call void {{.*bitcast.*varargs_two.*}}(<{ i32, i32, %struct.A }>* inalloca %{{.*}})
 // CHECK: call void {{.*bitcast.*varargs_three.*}}(<{ i32, i32, i32, %struct.A }>* inalloca %{{.*}})
 
-// CHECK-LABEL: declare void @"\01?varargs_zero@@YAXZZ"(...)
-// CHECK-LABEL: declare void @"\01?varargs_one@@YAXHZZ"(i32, ...)
-// CHECK-LABEL: declare void @"\01?varargs_two@@YAXHHZZ"(i32, i32, ...)
-// CHECK-LABEL: declare void @"\01?varargs_three@@YAXHHHZZ"(i32, i32, i32, ...)
+// CHECK-LABEL: declare dso_local void @"\01?varargs_zero@@YAXZZ"(...)
+// CHECK-LABEL: declare dso_local void @"\01?varargs_one@@YAXHZZ"(i32, ...)
+// CHECK-LABEL: declare dso_local void @"\01?varargs_two@@YAXHHZZ"(i32, i32, ...)
+// CHECK-LABEL: declare dso_local void @"\01?varargs_three@@YAXHHHZZ"(i32, i32, i32, ...)
diff --git a/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp b/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
index 6da7a50..3e15ba2 100644
--- a/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
+++ b/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
@@ -19,9 +19,9 @@
 S C::cdecl_sret() { return S(); }
 S C::byval_and_sret(S a) { return S(); }
 
-// CHECK: define void @"\01?variadic_sret@C@@QAA?AUS@@PBDZZ"(%struct.C* %this, %struct.S* noalias sret %agg.result, i8* %f, ...)
-// CHECK: define void @"\01?cdecl_sret@C@@QAA?AUS@@XZ"(%struct.C* %this, %struct.S* noalias sret %agg.result)
-// CHECK: define void @"\01?byval_and_sret@C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.S* byval align 4 %a)
+// CHECK: define dso_local void @"\01?variadic_sret@C@@QAA?AUS@@PBDZZ"(%struct.C* %this, %struct.S* noalias sret %agg.result, i8* %f, ...)
+// CHECK: define dso_local void @"\01?cdecl_sret@C@@QAA?AUS@@XZ"(%struct.C* %this, %struct.S* noalias sret %agg.result)
+// CHECK: define dso_local void @"\01?byval_and_sret@C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.S* byval align 4 %a)
 
 int main() {
   C c;
@@ -29,7 +29,7 @@
   c.cdecl_sret();
   c.byval_and_sret(S());
 }
-// CHECK-LABEL: define i32 @main()
+// CHECK-LABEL: define dso_local i32 @main()
 // CHECK: call void {{.*}} @"\01?variadic_sret@C@@QAA?AUS@@PBDZZ"
 // CHECK: call void @"\01?cdecl_sret@C@@QAA?AUS@@XZ"
 // CHECK: call void @"\01?byval_and_sret@C@@QAA?AUS@@U2@@Z"
@@ -41,4 +41,4 @@
 S A::f(int x) {
   return S();
 }
-// CHECK-LABEL: define x86_fastcallcc void @"\01?f@A@@QAI?AUS@@H@Z"(%struct.A* inreg %this, %struct.S* inreg noalias sret %agg.result, i32 %x)
+// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01?f@A@@QAI?AUS@@H@Z"(%struct.A* inreg %this, %struct.S* inreg noalias sret %agg.result, i32 %x)
diff --git a/test/CodeGenCXX/microsoft-abi-constexpr-vs-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-constexpr-vs-inheritance.cpp
index 6972dac..a65f42a 100644
--- a/test/CodeGenCXX/microsoft-abi-constexpr-vs-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-constexpr-vs-inheritance.cpp
@@ -7,7 +7,7 @@
 };
 
 A a(42);
-// CHECK: @"\01?a@@3UA@@A" = global { { [1 x i8*] }*, i32 } { { [1 x i8*] }* @"\01??_7A@@6B@", i32 42 }, align 4
+// CHECK: @"\01?a@@3UA@@A" = dso_local global { { [1 x i8*] }*, i32 } { { [1 x i8*] }* @"\01??_7A@@6B@", i32 42 }, align 4
 
 struct B {
   constexpr B(int y) : y(y) {}
@@ -20,4 +20,4 @@
 };
 
 C c;
-// CHECK: @"\01?c@@3UC@@A" = global { { [1 x i8*] }*, i32, { [1 x i8*] }*, i32 } { { [1 x i8*] }* @"\01??_7C@@6BA@@@", i32 777, { [1 x i8*] }* @"\01??_7C@@6BB@@@", i32 13 }
+// CHECK: @"\01?c@@3UC@@A" = dso_local global { { [1 x i8*] }*, i32, { [1 x i8*] }*, i32 } { { [1 x i8*] }* @"\01??_7C@@6BA@@@", i32 777, { [1 x i8*] }* @"\01??_7C@@6BB@@@", i32 13 }
diff --git a/test/CodeGenCXX/microsoft-abi-default-cc.cpp b/test/CodeGenCXX/microsoft-abi-default-cc.cpp
index 6259a53..9247cc4 100644
--- a/test/CodeGenCXX/microsoft-abi-default-cc.cpp
+++ b/test/CodeGenCXX/microsoft-abi-default-cc.cpp
@@ -13,13 +13,13 @@
 void __cdecl foo();
 void __cdecl foo() {}
 // GCABI-LABEL: define void @_Z3foov()
-// MSABI: define void @"\01?foo@@YAXXZ"
+// MSABI: define dso_local void @"\01?foo@@YAXXZ"
 
 void __cdecl bar();
 void bar();
 void bar() {}
 // GCABI-LABEL: define void @_Z3barv()
-// MSABI: define void @"\01?bar@@YAXXZ"
+// MSABI: define dso_local void @"\01?bar@@YAXXZ"
 
 // Test that it's OK to mark either the method declaration or method definition
 // with a default CC explicitly.
@@ -34,23 +34,23 @@
 
 void METHOD_CC A::baz() {}
 // GCABI-LABEL: define void @_ZN1A3bazEv
-// MSABI: define x86_thiscallcc void @"\01?baz@A@@QAEXXZ"
+// MSABI: define dso_local x86_thiscallcc void @"\01?baz@A@@QAEXXZ"
 void A::qux() {}
 // GCABI-LABEL: define void @_ZN1A3quxEv
-// MSABI: define x86_thiscallcc void @"\01?qux@A@@QAEXXZ"
+// MSABI: define dso_local x86_thiscallcc void @"\01?qux@A@@QAEXXZ"
 
 void __cdecl static_baz() {}
 // GCABI-LABEL: define void @_Z10static_bazv
-// MSABI: define void @"\01?static_baz@@YAXXZ"
+// MSABI: define dso_local void @"\01?static_baz@@YAXXZ"
 void static_qux() {}
 // GCABI-LABEL: define void @_Z10static_quxv
-// MSABI: define void @"\01?static_qux@@YAXXZ"
+// MSABI: define dso_local void @"\01?static_qux@@YAXXZ"
 
 namespace PR31656 {
 template <int I>
 void __cdecl callee(int args[I]);
 // GCABI-LABEL: declare void @_ZN7PR316566calleeILi1EEEvPi(
-// MSABI: declare void @"\01??$callee@$00@PR31656@@YAXQAH@Z"(
+// MSABI: declare dso_local void @"\01??$callee@$00@PR31656@@YAXQAH@Z"(
 
 void caller() { callee<1>(0); }
 }
diff --git a/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
index 8d75ed4..07356e4 100644
--- a/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
+++ b/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
@@ -7,18 +7,18 @@
 struct T {};
 
 T* test0() { return dynamic_cast<T*>((B*)0); }
-// CHECK-LABEL: define noalias %struct.T* @"\01?test0@@YAPAUT@@XZ"()
+// CHECK-LABEL: define dso_local noalias %struct.T* @"\01?test0@@YAPAUT@@XZ"()
 // CHECK:   ret %struct.T* null
 
 T* test1(V* x) { return &dynamic_cast<T&>(*x); }
-// CHECK-LABEL: define %struct.T* @"\01?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
 // CHECK:        [[CAST:%.*]] = bitcast %struct.V* %x to i8*
 // CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 1)
 // CHECK-NEXT:   [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 T* test2(A* x) { return &dynamic_cast<T&>(*x); }
-// CHECK-LABEL: define %struct.T* @"\01?test2@@YAPAUT@@PAUA@@@Z"(%struct.A* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test2@@YAPAUT@@PAUA@@@Z"(%struct.A* %x)
 // CHECK:        [[CAST:%.*]] = bitcast %struct.A* %x to i8*
 // CHECK-NEXT:   [[VBPTRPTR:%.*]] = getelementptr inbounds %struct.A, %struct.A* %x, i32 0, i32 0
 // CHECK-NEXT:   [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4
@@ -30,7 +30,7 @@
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 T* test3(B* x) { return &dynamic_cast<T&>(*x); }
-// CHECK-LABEL: define %struct.T* @"\01?test3@@YAPAUT@@PAUB@@@Z"(%struct.B* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test3@@YAPAUT@@PAUB@@@Z"(%struct.B* %x)
 // CHECK:        [[VOIDP:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0
 // CHECK-NEXT:   [[VBPTR:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 4
 // CHECK-NEXT:   [[VBPTRPTR:%.*]] = bitcast i8* [[VBPTR:%.*]] to i32**
@@ -44,14 +44,14 @@
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 T* test4(V* x) { return dynamic_cast<T*>(x); }
-// CHECK-LABEL: define %struct.T* @"\01?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
 // CHECK:        [[CAST:%.*]] = bitcast %struct.V* %x to i8*
 // CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 0)
 // CHECK-NEXT:   [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 T* test5(A* x) { return dynamic_cast<T*>(x); }
-// CHECK-LABEL: define %struct.T* @"\01?test5@@YAPAUT@@PAUA@@@Z"(%struct.A* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test5@@YAPAUT@@PAUA@@@Z"(%struct.A* %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq %struct.A* %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[VOIDP:%.*]] = bitcast %struct.A* %x to i8*
@@ -67,7 +67,7 @@
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 T* test6(B* x) { return dynamic_cast<T*>(x); }
-// CHECK-LABEL: define %struct.T* @"\01?test6@@YAPAUT@@PAUB@@@Z"(%struct.B* %x)
+// CHECK-LABEL: define dso_local %struct.T* @"\01?test6@@YAPAUT@@PAUB@@@Z"(%struct.B* %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq %struct.B* %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[CAST:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0
@@ -85,13 +85,13 @@
 // CHECK-NEXT:   ret %struct.T* [[RET]]
 
 void* test7(V* x) { return dynamic_cast<void*>(x); }
-// CHECK-LABEL: define i8* @"\01?test7@@YAPAXPAUV@@@Z"(%struct.V* %x)
+// CHECK-LABEL: define dso_local i8* @"\01?test7@@YAPAXPAUV@@@Z"(%struct.V* %x)
 // CHECK:        [[CAST:%.*]] = bitcast %struct.V* %x to i8*
 // CHECK-NEXT:   [[RET:%.*]] = tail call i8* @__RTCastToVoid(i8* [[CAST]])
 // CHECK-NEXT:   ret i8* [[RET]]
 
 void* test8(A* x) { return dynamic_cast<void*>(x); }
-// CHECK-LABEL: define i8* @"\01?test8@@YAPAXPAUA@@@Z"(%struct.A* %x)
+// CHECK-LABEL: define dso_local i8* @"\01?test8@@YAPAXPAUA@@@Z"(%struct.A* %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq %struct.A* %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[VOIDP:%.*]] = bitcast %struct.A* %x to i8*
@@ -106,7 +106,7 @@
 // CHECK-NEXT:   ret i8* [[RET]]
 
 void* test9(B* x) { return dynamic_cast<void*>(x); }
-// CHECK-LABEL: define i8* @"\01?test9@@YAPAXPAUB@@@Z"(%struct.B* %x)
+// CHECK-LABEL: define dso_local i8* @"\01?test9@@YAPAXPAUB@@@Z"(%struct.B* %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq %struct.B* %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[CAST:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0
@@ -134,7 +134,7 @@
   Cleanup c;
   return dynamic_cast<S3 *>(&s);
 }
-// CHECK-LABEL: define %"struct.PR25606::S3"* @"\01?f@PR25606@@YAPAUS3@1@AAUS2@1@@Z"(
+// CHECK-LABEL: define dso_local %"struct.PR25606::S3"* @"\01?f@PR25606@@YAPAUS3@1@AAUS2@1@@Z"(
 // CHECK:    [[CALL:%.*]] = invoke i8* @__RTDynamicCast
 
 // CHECK:    [[BC:%.*]] = bitcast i8* [[CALL]] to %"struct.PR25606::S3"*
diff --git a/test/CodeGenCXX/microsoft-abi-eh-catch.cpp b/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
index 6e18b92..52351cf 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
@@ -17,7 +17,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_all()
+// WIN64-LABEL: define dso_local void @catch_all()
 // WIN64: invoke void @might_throw()
 // WIN64-NEXT: to label %[[cont:[^ ]*]] unwind label %[[catchswitch_lpad:[^ ]*]]
 //
@@ -46,7 +46,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_int()
+// WIN64-LABEL: define dso_local void @catch_int()
 // WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %[[e_addr:[^\]]*]]]
 //
 // The catchpad instruction starts the lifetime of 'e'. Unfortunately, that
@@ -68,7 +68,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_int_unnamed()
+// WIN64-LABEL: define dso_local void @catch_int_unnamed()
 // WIN64: catchpad within %{{.*}} [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
 // WIN64: catchret
 
@@ -94,7 +94,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_a_byval()
+// WIN64-LABEL: define dso_local void @catch_a_byval()
 // WIN64: %[[e_addr:[^ ]*]] = alloca %struct.A
 // WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 0, %struct.A* %[[e_addr]]]
 // WIN64: %[[e_i8:[^ ]*]] = bitcast %struct.A* %[[e_addr]] to i8*
@@ -109,7 +109,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_a_ref()
+// WIN64-LABEL: define dso_local void @catch_a_ref()
 // WIN64: %[[e_addr:[^ ]*]] = alloca %struct.A*
 // WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 8, %struct.A** %[[e_addr]]]
 // WIN64: %[[eptr:[^ ]*]] = load %struct.A*, %struct.A** %[[e_addr]]
@@ -121,7 +121,7 @@
   might_throw();
 }
 
-// WIN64-LABEL: define void @fn_with_exc_spec()
+// WIN64-LABEL: define dso_local void @fn_with_exc_spec()
 // WIN64: call void @might_throw()
 // WIN64-NEXT: ret void
 
@@ -137,7 +137,7 @@
   }
 }
 
-// WIN64-LABEL: define void @catch_nested()
+// WIN64-LABEL: define dso_local void @catch_nested()
 // WIN64: invoke void @might_throw()
 // WIN64-NEXT: to label %{{.*}} unwind label %[[catchswitch_outer:[^ ]*]]
 //
diff --git a/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp b/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
index 96b4fa3..01ea9a8 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
@@ -15,7 +15,7 @@
 }
 
 // With exceptions, we need to clean up at least one of these temporaries.
-// WIN32-LABEL: define void @"\01?HasEHCleanup@@YAXXZ"() {{.*}} {
+// WIN32-LABEL: define dso_local void @"\01?HasEHCleanup@@YAXXZ"() {{.*}} {
 // WIN32:   %[[base:.*]] = call i8* @llvm.stacksave()
 //    If this call throws, we have to restore the stack.
 // WIN32:   call void @"\01?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
@@ -37,7 +37,7 @@
   return TakesTwo((TakeRef(A()), A()), (TakeRef(A()), A()));
 }
 
-// WIN32-LABEL: define i32 @"\01?HasDeactivatedCleanups@@YAHXZ"() {{.*}} {
+// WIN32-LABEL: define dso_local i32 @"\01?HasDeactivatedCleanups@@YAHXZ"() {{.*}} {
 // WIN32:   %[[isactive:.*]] = alloca i1
 // WIN32:   call i8* @llvm.stacksave()
 // WIN32:   %[[argmem:.*]] = alloca inalloca [[argmem_ty:<{ %struct.A, %struct.A }>]]
@@ -72,7 +72,7 @@
   return (cond ? TakesTwo(A(), A()) : CouldThrow());
 }
 
-// WIN32-LABEL: define i32 @"\01?HasConditionalCleanup@@YAH_N@Z"(i1 zeroext %{{.*}}) {{.*}} {
+// WIN32-LABEL: define dso_local i32 @"\01?HasConditionalCleanup@@YAH_N@Z"(i1 zeroext %{{.*}}) {{.*}} {
 // WIN32:   store i1 false
 // WIN32:   br i1
 // WIN32:   call i8* @llvm.stacksave()
@@ -95,7 +95,7 @@
   return (cond ? TakesTwo((TakeRef(A()), A()), (TakeRef(A()), A())) : CouldThrow());
 }
 
-// WIN32-O0-LABEL: define i32 @"\01?HasConditionalDeactivatedCleanups@@YAH_N@Z"{{.*}} {
+// WIN32-O0-LABEL: define dso_local i32 @"\01?HasConditionalDeactivatedCleanups@@YAH_N@Z"{{.*}} {
 // WIN32-O0:   alloca i1
 // WIN32-O0:   %[[arg1_cond:.*]] = alloca i1
 //        Start all four cleanups as deactivated.
@@ -130,7 +130,7 @@
 // WIN32-O0:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
 // WIN32-O0: }
 
-// WIN32-O3-LABEL: define i32 @"\01?HasConditionalDeactivatedCleanups@@YAH_N@Z"{{.*}} {
+// WIN32-O3-LABEL: define dso_local i32 @"\01?HasConditionalDeactivatedCleanups@@YAH_N@Z"{{.*}} {
 // WIN32-O3:   alloca i1
 // WIN32-O3:   alloca i1
 // WIN32-O3:   %[[arg1_cond:.*]] = alloca i1
@@ -187,7 +187,7 @@
 
 // Verify that we don't bother with a vbtable lookup when adjusting the this
 // pointer to call a base destructor from a constructor while unwinding.
-// WIN32-LABEL: define {{.*}} @"\01??0C@crash_on_partial_destroy@@QAE@XZ"{{.*}} {
+// WIN32-LABEL: define dso_local {{.*}} @"\01??0C@crash_on_partial_destroy@@QAE@XZ"{{.*}} {
 // WIN32:      cleanuppad
 //
 //        We shouldn't do any vbptr loads, just constant GEPs.
@@ -217,7 +217,7 @@
   g();
 }
 
-// WIN32-LABEL: define void @"\01?f@dont_call_terminate@@YAXXZ"()
+// WIN32-LABEL: define dso_local void @"\01?f@dont_call_terminate@@YAXXZ"()
 // WIN32: invoke void @"\01?g@dont_call_terminate@@YAXXZ"()
 // WIN32-NEXT: to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
@@ -239,7 +239,7 @@
 }
 }
 
-// WIN32-LABEL: define void @"\01?f@noexcept_false_dtor@@YAXXZ"()
+// WIN32-LABEL: define dso_local void @"\01?f@noexcept_false_dtor@@YAXXZ"()
 // WIN32: invoke i32 @"\01?CouldThrow@@YAHXZ"()
 // WIN32: call x86_thiscallcc void @"\01??1D@noexcept_false_dtor@@QAE@XZ"(%"struct.noexcept_false_dtor::D"* %{{.*}})
 // WIN32: cleanuppad
@@ -256,7 +256,7 @@
   g();
 }
 
-// WIN32-LIFETIME-LABEL: define void @"\01?f@lifetime_marker@@YAXXZ"()
+// WIN32-LIFETIME-LABEL: define dso_local void @"\01?f@lifetime_marker@@YAXXZ"()
 // WIN32-LIFETIME: %[[c:.*]] = alloca %"struct.lifetime_marker::C"
 // WIN32-LIFETIME: %[[bc0:.*]] = bitcast %"struct.lifetime_marker::C"* %c to i8*
 // WIN32-LIFETIME: call void @llvm.lifetime.start.p0i8(i64 1, i8* %[[bc0]])
@@ -293,7 +293,7 @@
 };
 
 class_0::class_0() {
-  // WIN32: define x86_thiscallcc %struct.class_0* @"\01??0class_0@@QAE@XZ"(%struct.class_0* returned %this, i32 %is_most_derived) 
+  // WIN32: define dso_local x86_thiscallcc %struct.class_0* @"\01??0class_0@@QAE@XZ"(%struct.class_0* returned %this, i32 %is_most_derived) 
   // WIN32: store i32 %is_most_derived, i32* %[[IS_MOST_DERIVED_VAR:.*]], align 4
   // WIN32: %[[IS_MOST_DERIVED_VAL:.*]] = load i32, i32* %[[IS_MOST_DERIVED_VAR]]
   // WIN32: %[[SHOULD_CALL_VBASE_CTORS:.*]] = icmp ne i32 %[[IS_MOST_DERIVED_VAL]], 0
diff --git a/test/CodeGenCXX/microsoft-abi-eh-inlineasm.cpp b/test/CodeGenCXX/microsoft-abi-eh-inlineasm.cpp
new file mode 100644
index 0000000..4179508
--- /dev/null
+++ b/test/CodeGenCXX/microsoft-abi-eh-inlineasm.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple=x86_64-pc-windows-msvc \
+// RUN:     -fexceptions -fcxx-exceptions | FileCheck %s
+
+// Make sure calls to inline asm have funclet bundles.
+
+extern "C" void might_throw();
+extern "C" void foo() {
+  try {
+    might_throw();
+  } catch (int) {
+    __asm__("nop");
+  }
+}
+
+// CHECK-LABEL: define dso_local void @foo()
+// CHECK: invoke void @might_throw()
+// CHECK: %[[CATCHPAD:[^ ]*]] = catchpad within
+// CHECK: call void asm sideeffect "nop", {{.*}} [ "funclet"(token %[[CATCHPAD]]) ]
diff --git a/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp b/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
index 7836dcf..9c60ceb 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
@@ -6,7 +6,7 @@
   may_throw();
 }
 
-// CHECK-LABEL: define void @"\01?never_throws@@YAXXZ"()
+// CHECK-LABEL: define dso_local void @"\01?never_throws@@YAXXZ"()
 // CHECK-SAME:          personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 // CHECK:      invoke void @"\01?may_throw@@YAXXZ"()
 // CHECK:      %[[cp:.*]] = cleanuppad within none []
diff --git a/test/CodeGenCXX/microsoft-abi-emit-dependent.cpp b/test/CodeGenCXX/microsoft-abi-emit-dependent.cpp
new file mode 100644
index 0000000..e74ebc8
--- /dev/null
+++ b/test/CodeGenCXX/microsoft-abi-emit-dependent.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -emit-llvm-only -fmodules -triple x86_64-windows %s
+// PR36181
+#pragma clang module build foo
+module foo {}
+#pragma clang module contents
+template <typename T> struct A {
+  friend void f(A<T>) {}
+};
+#pragma clang module endbuild
+#pragma clang module import foo
+void g() { f(A<int>()); }
diff --git a/test/CodeGenCXX/microsoft-abi-extern-template.cpp b/test/CodeGenCXX/microsoft-abi-extern-template.cpp
index c69b4f5..2576875 100644
--- a/test/CodeGenCXX/microsoft-abi-extern-template.cpp
+++ b/test/CodeGenCXX/microsoft-abi-extern-template.cpp
@@ -7,13 +7,13 @@
 // CHECK-SAME:   i8* bitcast (i8* (%struct.Foo*, i32)* @"\01??_G?$Foo@H@@UEAAPEAXI@Z" to i8*)
 // CHECK-SAME: ] }, comdat
 
-// CHECK-LABEL: define %struct.Foo* @"\01?f@@YAPEAU?$Foo@H@@XZ"()
+// CHECK-LABEL: define dso_local %struct.Foo* @"\01?f@@YAPEAU?$Foo@H@@XZ"()
 // CHECK: call %struct.Foo* @"\01??0?$Foo@H@@QEAA@XZ"(%struct.Foo* %{{.*}})
 
-// CHECK: define available_externally %struct.Foo* @"\01??0?$Foo@H@@QEAA@XZ"(%struct.Foo* returned %this)
+// CHECK: define available_externally dso_local %struct.Foo* @"\01??0?$Foo@H@@QEAA@XZ"(%struct.Foo* returned %this)
 // CHECK:   store {{.*}} @"\01??_7?$Foo@H@@6B@"
 
-// CHECK: define linkonce_odr i8* @"\01??_G?$Foo@H@@UEAAPEAXI@Z"(%struct.Foo* %this, i32 %should_call_delete)
+// CHECK: define linkonce_odr dso_local i8* @"\01??_G?$Foo@H@@UEAAPEAXI@Z"(%struct.Foo* %this, i32 %should_call_delete)
 
 struct Base {
   virtual ~Base();
diff --git a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
index a3985ba..47ee9ac 100644
--- a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
+++ b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
@@ -15,17 +15,17 @@
 struct PR26313_Z;
 int PR26313_Z::**a = nullptr;
 int PR26313_Z::*b = *a;
-// CHECK-DAG: @"\01?a@@3PAPQPR26313_Z@@HA" = global %0* null, align 4
-// CHECK-DAG: @"\01?b@@3PQPR26313_Z@@HQ1@" = global { i32, i32, i32 } { i32 0, i32 0, i32 -1 }, align 4
+// CHECK-DAG: @"\01?a@@3PAPQPR26313_Z@@HA" = dso_local global %0* null, align 4
+// CHECK-DAG: @"\01?b@@3PQPR26313_Z@@HQ1@" = dso_local global { i32, i32, i32 } { i32 0, i32 0, i32 -1 }, align 4
 
 namespace PR20947 {
 struct A;
 int A::**a = nullptr;
-// CHECK-DAG: @"\01?a@PR20947@@3PAPQA@1@HA" = global %{{.*}}* null, align 4
+// CHECK-DAG: @"\01?a@PR20947@@3PAPQA@1@HA" = dso_local global %{{.*}}* null, align 4
 
 struct B;
 int B::*&b = b;
-// CHECK-DAG: @"\01?b@PR20947@@3AAPQB@1@HA" = global %{{.*}}* null, align 4
+// CHECK-DAG: @"\01?b@PR20947@@3AAPQB@1@HA" = dso_local global %{{.*}}* null, align 4
 }
 
 namespace PR20017 {
@@ -35,7 +35,7 @@
 };
 struct B;
 auto a = &A<B>::m_fn1;
-// CHECK-DAG: @"\01?a@PR20017@@3P8?$A@UB@PR20017@@@1@AEPQB@1@HXZQ21@" = global i8* bitcast ({ i32, i32, i32 } ({{.*}}*)* @"\01?m_fn1@?$A@UB@PR20017@@@PR20017@@QAEPQB@2@HXZ" to i8*), align 4
+// CHECK-DAG: @"\01?a@PR20017@@3P8?$A@UB@PR20017@@@1@AEPQB@1@HXZQ21@" = dso_local global i8* bitcast ({ i32, i32, i32 } ({{.*}}*)* @"\01?m_fn1@?$A@UB@PR20017@@@PR20017@@QAEPQB@2@HXZ" to i8*), align 4
 }
 
 #ifndef INCOMPLETE_VIRTUAL
@@ -89,24 +89,24 @@
 int NonZeroVBPtr::*n_d_memptr;
 int Unspecified::*u_d_memptr;
 int UnspecSingle::*us_d_memptr;
-// CHECK: @"\01?s_d_memptr@@3PQSingle@@HQ1@" = global i32 -1, align 4
-// CHECK: @"\01?p_d_memptr@@3PQPolymorphic@@HQ1@" = global i32 0, align 4
-// CHECK: @"\01?m_d_memptr@@3PQMultiple@@HQ1@" = global i32 -1, align 4
-// CHECK: @"\01?v_d_memptr@@3PQVirtual@@HQ1@" = global { i32, i32 }
+// CHECK: @"\01?s_d_memptr@@3PQSingle@@HQ1@" = dso_local global i32 -1, align 4
+// CHECK: @"\01?p_d_memptr@@3PQPolymorphic@@HQ1@" = dso_local global i32 0, align 4
+// CHECK: @"\01?m_d_memptr@@3PQMultiple@@HQ1@" = dso_local global i32 -1, align 4
+// CHECK: @"\01?v_d_memptr@@3PQVirtual@@HQ1@" = dso_local global { i32, i32 }
 // CHECK:   { i32 0, i32 -1 }, align 4
-// CHECK: @"\01?n_d_memptr@@3PQNonZeroVBPtr@@HQ1@" = global { i32, i32 }
+// CHECK: @"\01?n_d_memptr@@3PQNonZeroVBPtr@@HQ1@" = dso_local global { i32, i32 }
 // CHECK:   { i32 0, i32 -1 }, align 4
-// CHECK: @"\01?u_d_memptr@@3PQUnspecified@@HQ1@" = global { i32, i32, i32 }
+// CHECK: @"\01?u_d_memptr@@3PQUnspecified@@HQ1@" = dso_local global { i32, i32, i32 }
 // CHECK:   { i32 0, i32 0, i32 -1 }, align 4
-// CHECK: @"\01?us_d_memptr@@3PQUnspecSingle@@HQ1@" = global { i32, i32, i32 }
+// CHECK: @"\01?us_d_memptr@@3PQUnspecSingle@@HQ1@" = dso_local global { i32, i32, i32 }
 // CHECK:   { i32 0, i32 0, i32 -1 }, align 4
 
 void (Single  ::*s_f_memptr)();
 void (Multiple::*m_f_memptr)();
 void (Virtual ::*v_f_memptr)();
-// CHECK: @"\01?s_f_memptr@@3P8Single@@AEXXZQ1@" = global i8* null, align 4
-// CHECK: @"\01?m_f_memptr@@3P8Multiple@@AEXXZQ1@" = global { i8*, i32 } zeroinitializer, align 4
-// CHECK: @"\01?v_f_memptr@@3P8Virtual@@AEXXZQ1@" = global { i8*, i32, i32 } zeroinitializer, align 4
+// CHECK: @"\01?s_f_memptr@@3P8Single@@AEXXZQ1@" = dso_local global i8* null, align 4
+// CHECK: @"\01?m_f_memptr@@3P8Multiple@@AEXXZQ1@" = dso_local global { i8*, i32 } zeroinitializer, align 4
+// CHECK: @"\01?v_f_memptr@@3P8Virtual@@AEXXZQ1@" = dso_local global { i8*, i32, i32 } zeroinitializer, align 4
 
 // We can define Unspecified after locking in the inheritance model.
 struct Unspecified : Multiple, Virtual {
@@ -170,11 +170,11 @@
 // Try a cast that changes the inheritance model.  Null for D is 0, but null for
 // C is -1.  We need the cast to long in order to hit the non-APValue path.
 int C::*ptr4 = (int C::*) (int D::*) (long D::*) 0;
-// CHECK: @"\01?ptr4@CastParam@@3PQC@1@HQ21@" = global i32 -1, align 4
+// CHECK: @"\01?ptr4@CastParam@@3PQC@1@HQ21@" = dso_local global i32 -1, align 4
 
 // MSVC rejects this but we accept it.
 int C::*ptr5 = (int C::*) (long D::*) 0;
-// CHECK: @"\01?ptr5@CastParam@@3PQC@1@HQ21@" = global i32 -1, align 4
+// CHECK: @"\01?ptr5@CastParam@@3PQC@1@HQ21@" = dso_local global i32 -1, align 4
 }
 
 struct UnspecWithVBPtr;
@@ -191,7 +191,7 @@
   void (Virtual    ::*v_f_memptr)() = &Virtual::foo;
   void (Unspecified::*u_f_memptr)() = &Unspecified::foo;
   void (UnspecWithVBPtr::*u2_f_memptr)() = &UnspecWithVBPtr::foo;
-// CHECK: define void @"\01?EmitNonVirtualMemberPointers@@YAXXZ"() {{.*}} {
+// CHECK: define dso_local void @"\01?EmitNonVirtualMemberPointers@@YAXXZ"() {{.*}} {
 // CHECK:   alloca i8*, align 4
 // CHECK:   alloca { i8*, i32 }, align 4
 // CHECK:   alloca { i8*, i32, i32 }, align 4
@@ -221,7 +221,7 @@
   if (memptr)
     memptr = 0;
 // Check that member pointers use the right offsets and that null is -1.
-// CHECK:      define void @"\01?podMemPtrs@@YAXXZ"() {{.*}} {
+// CHECK:      define dso_local void @"\01?podMemPtrs@@YAXXZ"() {{.*}} {
 // CHECK:        %[[memptr:.*]] = alloca i32, align 4
 // CHECK-NEXT:   store i32 0, i32* %[[memptr]], align 4
 // CHECK-NEXT:   store i32 4, i32* %[[memptr]], align 4
@@ -241,7 +241,7 @@
     memptr = 0;
 // Member pointers for polymorphic classes include the vtable slot in their
 // offset and use 0 to represent null.
-// CHECK:      define void @"\01?polymorphicMemPtrs@@YAXXZ"() {{.*}} {
+// CHECK:      define dso_local void @"\01?polymorphicMemPtrs@@YAXXZ"() {{.*}} {
 // CHECK:        %[[memptr:.*]] = alloca i32, align 4
 // CHECK-NEXT:   store i32 4, i32* %[[memptr]], align 4
 // CHECK-NEXT:   store i32 8, i32* %[[memptr]], align 4
@@ -255,7 +255,7 @@
 
 bool nullTestDataUnspecified(int Unspecified::*mp) {
   return mp;
-// CHECK: define zeroext i1 @"\01?nullTestDataUnspecified@@YA_NPQUnspecified@@H@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?nullTestDataUnspecified@@YA_NPQUnspecified@@H@Z"{{.*}} {
 // CHECK:   %{{.*}} = load { i32, i32, i32 }, { i32, i32, i32 }* %{{.*}}, align 4
 // CHECK:   store { i32, i32, i32 } {{.*}} align 4
 // CHECK:   %[[mp:.*]] = load { i32, i32, i32 }, { i32, i32, i32 }* %{{.*}}, align 4
@@ -271,13 +271,13 @@
 // CHECK: }
 
 // Pass this large type indirectly.
-// X64-LABEL: define zeroext i1 @"\01?nullTestDataUnspecified@@
+// X64-LABEL: define dso_local zeroext i1 @"\01?nullTestDataUnspecified@@
 // X64:             ({ i32, i32, i32 }*)
 }
 
 bool nullTestFunctionUnspecified(void (Unspecified::*mp)()) {
   return mp;
-// CHECK: define zeroext i1 @"\01?nullTestFunctionUnspecified@@YA_NP8Unspecified@@AEXXZ@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?nullTestFunctionUnspecified@@YA_NP8Unspecified@@AEXXZ@Z"{{.*}} {
 // CHECK:   %{{.*}} = load { i8*, i32, i32, i32 }, { i8*, i32, i32, i32 }* %{{.*}}, align 4
 // CHECK:   store { i8*, i32, i32, i32 } {{.*}} align 4
 // CHECK:   %[[mp:.*]] = load { i8*, i32, i32, i32 }, { i8*, i32, i32, i32 }* %{{.*}}, align 4
@@ -291,7 +291,7 @@
   return o->*memptr;
 // Test that we can unpack this aggregate member pointer and load the member
 // data pointer.
-// CHECK: define i32 @"\01?loadDataMemberPointerVirtual@@YAHPAUVirtual@@PQ1@H@Z"{{.*}} {
+// CHECK: define dso_local i32 @"\01?loadDataMemberPointerVirtual@@YAHPAUVirtual@@PQ1@H@Z"{{.*}} {
 // CHECK:   %[[o:.*]] = load %{{.*}}*, %{{.*}}** %{{.*}}, align 4
 // CHECK:   %[[memptr:.*]] = load { i32, i32 }, { i32, i32 }* %{{.*}}, align 4
 // CHECK:   %[[memptr0:.*]] = extractvalue { i32, i32 } %[[memptr:.*]], 0
@@ -312,7 +312,7 @@
 
 // A two-field data memptr on x64 gets coerced to i64 and is passed in a
 // register or memory.
-// X64-LABEL: define i32 @"\01?loadDataMemberPointerVirtual@@YAHPEAUVirtual@@PEQ1@H@Z"
+// X64-LABEL: define dso_local i32 @"\01?loadDataMemberPointerVirtual@@YAHPEAUVirtual@@PEQ1@H@Z"
 // X64:             (%struct.Virtual* %o, i64 %memptr.coerce)
 }
 
@@ -320,7 +320,7 @@
   return o->*memptr;
 // Test that we can unpack this aggregate member pointer and load the member
 // data pointer.
-// CHECK: define i32 @"\01?loadDataMemberPointerUnspecified@@YAHPAUUnspecified@@PQ1@H@Z"{{.*}} {
+// CHECK: define dso_local i32 @"\01?loadDataMemberPointerUnspecified@@YAHPAUUnspecified@@PQ1@H@Z"{{.*}} {
 // CHECK:   %[[o:.*]] = load %{{.*}}*, %{{.*}}** %{{.*}}, align 4
 // CHECK:   %[[memptr:.*]] = load { i32, i32, i32 }, { i32, i32, i32 }* %{{.*}}, align 4
 // CHECK:   %[[memptr0:.*]] = extractvalue { i32, i32, i32 } %[[memptr:.*]], 0
@@ -351,12 +351,12 @@
 void callMemberPointerSingle(Single *o, void (Single::*memptr)()) {
   (o->*memptr)();
 // Just look for an indirect thiscall.
-// CHECK: define void @"\01?callMemberPointerSingle@@{{.*}} {{.*}} {
+// CHECK: define dso_local void @"\01?callMemberPointerSingle@@{{.*}} {{.*}} {
 // CHECK:   call x86_thiscallcc void %{{.*}}(%{{.*}} %{{.*}})
 // CHECK:   ret void
 // CHECK: }
 
-// X64-LABEL: define void @"\01?callMemberPointerSingle@@
+// X64-LABEL: define dso_local void @"\01?callMemberPointerSingle@@
 // X64:           (%struct.Single* %o, i8* %memptr)
 // X64:   bitcast i8* %{{[^ ]*}} to void (%struct.Single*)*
 // X64:   ret void
@@ -364,7 +364,7 @@
 
 void callMemberPointerMultiple(Multiple *o, void (Multiple::*memptr)()) {
   (o->*memptr)();
-// CHECK: define void @"\01?callMemberPointerMultiple@@{{.*}} {
+// CHECK: define dso_local void @"\01?callMemberPointerMultiple@@{{.*}} {
 // CHECK:   %[[memptr0:.*]] = extractvalue { i8*, i32 } %{{.*}}, 0
 // CHECK:   %[[memptr1:.*]] = extractvalue { i8*, i32 } %{{.*}}, 1
 // CHECK:   %[[this_adjusted:.*]] = getelementptr inbounds i8, i8* %{{.*}}, i32 %[[memptr1]]
@@ -378,7 +378,7 @@
 void callMemberPointerVirtualBase(Virtual *o, void (Virtual::*memptr)()) {
   (o->*memptr)();
 // This shares a lot with virtual data member pointers.
-// CHECK: define void @"\01?callMemberPointerVirtualBase@@{{.*}} {
+// CHECK: define dso_local void @"\01?callMemberPointerVirtualBase@@{{.*}} {
 // CHECK:   %[[memptr0:.*]] = extractvalue { i8*, i32, i32 } %{{.*}}, 0
 // CHECK:   %[[memptr1:.*]] = extractvalue { i8*, i32, i32 } %{{.*}}, 1
 // CHECK:   %[[memptr2:.*]] = extractvalue { i8*, i32, i32 } %{{.*}}, 2
@@ -400,21 +400,21 @@
 bool compareSingleFunctionMemptr(void (Single::*l)(), void (Single::*r)()) {
   return l == r;
 // Should only be one comparison here.
-// CHECK: define zeroext i1 @"\01?compareSingleFunctionMemptr@@YA_NP8Single@@AEXXZ0@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?compareSingleFunctionMemptr@@YA_NP8Single@@AEXXZ0@Z"{{.*}} {
 // CHECK-NOT: icmp
 // CHECK:   %[[r:.*]] = icmp eq
 // CHECK-NOT: icmp
 // CHECK:   ret i1 %[[r]]
 // CHECK: }
 
-// X64-LABEL: define zeroext i1 @"\01?compareSingleFunctionMemptr@@
+// X64-LABEL: define dso_local zeroext i1 @"\01?compareSingleFunctionMemptr@@
 // X64:             (i8* %{{[^,]*}}, i8* %{{[^)]*}})
 }
 
 bool compareNeqSingleFunctionMemptr(void (Single::*l)(), void (Single::*r)()) {
   return l != r;
 // Should only be one comparison here.
-// CHECK: define zeroext i1 @"\01?compareNeqSingleFunctionMemptr@@YA_NP8Single@@AEXXZ0@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?compareNeqSingleFunctionMemptr@@YA_NP8Single@@AEXXZ0@Z"{{.*}} {
 // CHECK-NOT: icmp
 // CHECK:   %[[r:.*]] = icmp ne
 // CHECK-NOT: icmp
@@ -424,7 +424,7 @@
 
 bool unspecFuncMemptrEq(void (Unspecified::*l)(), void (Unspecified::*r)()) {
   return l == r;
-// CHECK: define zeroext i1 @"\01?unspecFuncMemptrEq@@YA_NP8Unspecified@@AEXXZ0@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?unspecFuncMemptrEq@@YA_NP8Unspecified@@AEXXZ0@Z"{{.*}} {
 // CHECK:   %[[lhs0:.*]] = extractvalue { i8*, i32, i32, i32 } %[[l:.*]], 0
 // CHECK:   %{{.*}} = extractvalue { i8*, i32, i32, i32 } %[[r:.*]], 0
 // CHECK:   %[[cmp0:.*]] = icmp eq i8* %[[lhs0]], %{{.*}}
@@ -445,13 +445,13 @@
 // CHECK:   ret i1 %{{.*}}
 // CHECK: }
 
-// X64-LABEL: define zeroext i1 @"\01?unspecFuncMemptrEq@@
+// X64-LABEL: define dso_local zeroext i1 @"\01?unspecFuncMemptrEq@@
 // X64:             ({ i8*, i32, i32, i32 }*, { i8*, i32, i32, i32 }*)
 }
 
 bool unspecFuncMemptrNeq(void (Unspecified::*l)(), void (Unspecified::*r)()) {
   return l != r;
-// CHECK: define zeroext i1 @"\01?unspecFuncMemptrNeq@@YA_NP8Unspecified@@AEXXZ0@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?unspecFuncMemptrNeq@@YA_NP8Unspecified@@AEXXZ0@Z"{{.*}} {
 // CHECK:   %[[lhs0:.*]] = extractvalue { i8*, i32, i32, i32 } %[[l:.*]], 0
 // CHECK:   %{{.*}} = extractvalue { i8*, i32, i32, i32 } %[[r:.*]], 0
 // CHECK:   %[[cmp0:.*]] = icmp ne i8* %[[lhs0]], %{{.*}}
@@ -475,7 +475,7 @@
 
 bool unspecDataMemptrEq(int Unspecified::*l, int Unspecified::*r) {
   return l == r;
-// CHECK: define zeroext i1 @"\01?unspecDataMemptrEq@@YA_NPQUnspecified@@H0@Z"{{.*}} {
+// CHECK: define dso_local zeroext i1 @"\01?unspecDataMemptrEq@@YA_NPQUnspecified@@H0@Z"{{.*}} {
 // CHECK:   extractvalue { i32, i32, i32 } %{{.*}}, 0
 // CHECK:   extractvalue { i32, i32, i32 } %{{.*}}, 0
 // CHECK:   icmp eq i32
@@ -490,13 +490,13 @@
 // CHECK:   ret i1
 // CHECK: }
 
-// X64-LABEL: define zeroext i1 @"\01?unspecDataMemptrEq@@
+// X64-LABEL: define dso_local zeroext i1 @"\01?unspecDataMemptrEq@@
 // X64:             ({ i32, i32, i32 }*, { i32, i32, i32 }*)
 }
 
 void (Multiple::*convertB2FuncToMultiple(void (B2::*mp)()))() {
   return mp;
-// CHECK: define i64 @"\01?convertB2FuncToMultiple@@YAP8Multiple@@AEXXZP8B2@@AEXXZ@Z"{{.*}} {
+// CHECK: define dso_local i64 @"\01?convertB2FuncToMultiple@@YAP8Multiple@@AEXXZP8B2@@AEXXZ@Z"{{.*}} {
 // CHECK:   store
 // CHECK:   %[[mp:.*]] = load i8*, i8** %{{.*}}, align 4
 // CHECK:   icmp ne i8* %[[mp]], null
@@ -520,7 +520,7 @@
 // LLVM from optimizing away the branch.  This is likely a bug in
 // lib/CodeGen/TargetInfo.cpp with how we classify memptr types for returns.
 //
-// CHECK: define i32 @"\01?convertMultipleFuncToB2@@YAP8B2@@AEXXZP8Multiple@@AEXXZ@Z"{{.*}} {
+// CHECK: define dso_local i32 @"\01?convertMultipleFuncToB2@@YAP8B2@@AEXXZP8Multiple@@AEXXZ@Z"{{.*}} {
 // CHECK:   store
 // CHECK:   %[[src:.*]] = load { i8*, i32 }, { i8*, i32 }* %{{.*}}, align 4
 // CHECK:   extractvalue { i8*, i32 } %[[src]], 0
@@ -545,7 +545,7 @@
 
 void (D::*convertCToD(void (C::*mp)()))() {
   return mp;
-// CHECK: define void @"\01?convertCToD@Test1@@YAP8D@1@AEXXZP8C@1@AEXXZ@Z"{{.*}} {
+// CHECK: define dso_local void @"\01?convertCToD@Test1@@YAP8D@1@AEXXZP8C@1@AEXXZ@Z"{{.*}} {
 // CHECK:   store
 // CHECK:   load { i8*, i32, i32 }, { i8*, i32, i32 }* %{{.*}}, align 4
 // CHECK:   extractvalue { i8*, i32, i32 } %{{.*}}, 0
@@ -585,7 +585,7 @@
 
 int A::*reinterpret(int B::*mp) {
   return reinterpret_cast<int A::*>(mp);
-// CHECK: define i32 @"\01?reinterpret@Test2@@YAPQA@1@HPQB@1@H@Z"{{.*}}  {
+// CHECK: define dso_local i32 @"\01?reinterpret@Test2@@YAPQA@1@HPQB@1@H@Z"{{.*}}  {
 // CHECK-NOT: select
 // CHECK:   ret i32
 // CHECK: }
@@ -593,7 +593,7 @@
 
 int A::*reinterpret(int C::*mp) {
   return reinterpret_cast<int A::*>(mp);
-// CHECK: define i32 @"\01?reinterpret@Test2@@YAPQA@1@HPQC@1@H@Z"{{.*}}  {
+// CHECK: define dso_local i32 @"\01?reinterpret@Test2@@YAPQA@1@HPQC@1@H@Z"{{.*}}  {
 // CHECK:   %[[mp:.*]] = load i32, i32*
 // CHECK:   %[[cmp:.*]] = icmp ne i32 %[[mp]], 0
 // CHECK:   select i1 %[[cmp]], i32 %[[mp]], i32 -1
@@ -612,7 +612,7 @@
 
 int *load_data(A *a, int A::*mp) {
   return &(a->*mp);
-// CHECK-LABEL: define i32* @"\01?load_data@Test3@@YAPAHPAUA@1@PQ21@H@Z"{{.*}}  {
+// CHECK-LABEL: define dso_local i32* @"\01?load_data@Test3@@YAPAHPAUA@1@PQ21@H@Z"{{.*}}  {
 // CHECK:    %[[a:.*]] = load %"struct.Test3::A"*, %"struct.Test3::A"** %{{.*}}, align 4
 // CHECK:    %[[mp:.*]] = load i32, i32* %{{.*}}, align 4
 // CHECK:    %[[a_i8:.*]] = bitcast %"struct.Test3::A"* %[[a]] to i8*
@@ -631,7 +631,7 @@
 void (C::*getmp())() {
   return &C::g;
 }
-// CHECK-LABEL: define i64 @"\01?getmp@Test4@@YAP8C@1@AEXXZXZ"()
+// CHECK-LABEL: define dso_local i64 @"\01?getmp@Test4@@YAP8C@1@AEXXZXZ"()
 // CHECK: store { i8*, i32 } { i8* bitcast (void (%"struct.Test4::C"*, ...)* @"\01??_9C@Test4@@$BA@AE" to i8*), i32 4 }, { i8*, i32 }* %{{.*}}
 //
 
@@ -651,7 +651,7 @@
 };
 struct B : public A {};
 void test() { void (B::*a)() = &B::f; }
-// CHECK-LABEL: define void @"\01?test@pr20007@@YAXXZ"
+// CHECK-LABEL: define dso_local void @"\01?test@pr20007@@YAXXZ"
 // CHECK: store i8* bitcast (void (%"struct.pr20007::A"*)* @"\01?f@A@pr20007@@QAEXXZ" to i8*)
 }
 
@@ -663,7 +663,7 @@
 struct __single_inheritance B;
 struct B : public A {};
 void test() { void (B::*a)() = &B::f; }
-// CHECK-LABEL: define void @"\01?test@pr20007_kw@@YAXXZ"
+// CHECK-LABEL: define dso_local void @"\01?test@pr20007_kw@@YAXXZ"
 // CHECK: store i8* bitcast (void (%"struct.pr20007_kw::A"*)* @"\01?f@A@pr20007_kw@@QAEXXZ" to i8*)
 }
 
@@ -678,7 +678,7 @@
 static_assert(sizeof(int B::*) == 4, "");
 static_assert(sizeof(int A::*) == 4, "");
 #pragma pointers_to_members(best_case)
-// CHECK-LABEL: define void @"\01?test@pr20007_pragma@@YAXXZ"
+// CHECK-LABEL: define dso_local void @"\01?test@pr20007_pragma@@YAXXZ"
 }
 
 namespace pr20007_pragma2 {
@@ -692,7 +692,7 @@
 static_assert(sizeof(int B::*) == 4, "");
 static_assert(sizeof(int A::*) == 12, "");
 #pragma pointers_to_members(best_case)
-// CHECK-LABEL: define void @"\01?test@pr20007_pragma2@@YAXXZ"
+// CHECK-LABEL: define dso_local void @"\01?test@pr20007_pragma2@@YAXXZ"
 }
 
 namespace pr23823 {
@@ -738,7 +738,7 @@
 
 typedef void (D::*DMemPtrTy)();
 
-// CHECK-LABEL: define void @"\01?get_memptr@pr23878@@YAP8D@1@AEXXZXZ"
+// CHECK-LABEL: define dso_local void @"\01?get_memptr@pr23878@@YAP8D@1@AEXXZXZ"
 // CHECK: @"\01??_9C@pr23878@@$BA@AE" to i8*), i32 0, i32 4
 DMemPtrTy get_memptr() { return &D::f; }
 }
@@ -762,7 +762,7 @@
 struct S;
 
 void f(int S::*&p) {}
-// CHECK-LABEL: define void @"\01?f@PR24703@@YAXAAPQS@1@H@Z"(
+// CHECK-LABEL: define dso_local void @"\01?f@PR24703@@YAXAAPQS@1@H@Z"(
 }
 
 namespace ReferenceToMPTWithIncompleteClass {
diff --git a/test/CodeGenCXX/microsoft-abi-methods.cpp b/test/CodeGenCXX/microsoft-abi-methods.cpp
index e58d103..6dbbf45 100644
--- a/test/CodeGenCXX/microsoft-abi-methods.cpp
+++ b/test/CodeGenCXX/microsoft-abi-methods.cpp
@@ -22,7 +22,7 @@
 // CHECK: ret
 
 // Make sure that the definition uses the right calling convention:
-// CHECK: define linkonce_odr x86_thiscallcc void @"\01?simple_method@C@@QAEXXZ"
+// CHECK: define linkonce_odr dso_local x86_thiscallcc void @"\01?simple_method@C@@QAEXXZ"
 // CHECK: ret
 }
 
@@ -34,7 +34,7 @@
 // CHECK: ret
 
 // Make sure that the definition uses the right calling convention:
-// CHECK: define linkonce_odr void @"\01?cdecl_method@C@@QAAXXZ"
+// CHECK: define linkonce_odr dso_local void @"\01?cdecl_method@C@@QAAXXZ"
 // CHECK: ret
 }
 
@@ -46,7 +46,7 @@
 // CHECK: ret
 
 // Make sure that the definition uses the right calling convention:
-// CHECK: define linkonce_odr void @"\01?vararg_method@C@@QAAXPBDZZ"
+// CHECK: define linkonce_odr dso_local void @"\01?vararg_method@C@@QAAXPBDZZ"
 }
 
 void call_static_method() {
@@ -56,7 +56,7 @@
 // CHECK: ret
 
 // Make sure that the definition uses the right calling convention:
-// CHECK: define linkonce_odr void @"\01?static_method@C@@SAXXZ"
+// CHECK: define linkonce_odr dso_local void @"\01?static_method@C@@SAXXZ"
 }
 
 class Base {
@@ -71,19 +71,19 @@
   Child c;
 // Make sure that the Base constructor call in the Child constructor uses
 // the right calling convention:
-// CHECK: define linkonce_odr x86_thiscallcc %class.Child* @"\01??0Child@@QAE@XZ"
+// CHECK: define linkonce_odr dso_local x86_thiscallcc %class.Child* @"\01??0Child@@QAE@XZ"
 // CHECK: %{{[.0-9A-Z_a-z]+}} = call x86_thiscallcc %class.Base* @"\01??0Base@@QAE@XZ"
 // CHECK: ret
 
 // Make sure that the Base constructor definition uses the right CC:
-// CHECK: define linkonce_odr x86_thiscallcc %class.Base* @"\01??0Base@@QAE@XZ"
+// CHECK: define linkonce_odr dso_local x86_thiscallcc %class.Base* @"\01??0Base@@QAE@XZ"
 
 // Make sure that the Base destructor call in the Child denstructor uses
 // the right calling convention:
-// CHECK: define linkonce_odr x86_thiscallcc void @"\01??1Child@@QAE@XZ"
+// CHECK: define linkonce_odr dso_local x86_thiscallcc void @"\01??1Child@@QAE@XZ"
 // CHECK: call x86_thiscallcc void @"\01??1Base@@QAE@XZ"
 // CHECK: ret
 
 // Make sure that the Base destructor definition uses the right CC:
-// CHECK: define linkonce_odr x86_thiscallcc void @"\01??1Base@@QAE@XZ"
+// CHECK: define linkonce_odr dso_local x86_thiscallcc void @"\01??1Base@@QAE@XZ"
 }
diff --git a/test/CodeGenCXX/microsoft-abi-multiple-nonvirtual-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-multiple-nonvirtual-inheritance.cpp
index f9db268..6dc0158 100644
--- a/test/CodeGenCXX/microsoft-abi-multiple-nonvirtual-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-multiple-nonvirtual-inheritance.cpp
@@ -19,7 +19,7 @@
 extern "C" void foo(void *);
 
 void call_left_no_override(ChildNoOverride *child) {
-// CHECK: define void @"\01?call_left_no_override
+// CHECK-LABEL: define dso_local void @"\01?call_left_no_override
 // CHECK: %[[CHILD:.*]] = load %struct.ChildNoOverride
 
   child->left();
@@ -34,7 +34,8 @@
 }
 
 void ChildOverride::left() {
-// CHECK: define x86_thiscallcc void @"\01?left@ChildOverride@@UAEXXZ"(%struct.ChildOverride* %[[THIS:.*]])
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01?left@ChildOverride@@UAEXXZ"
+// CHECK-SAME: (%struct.ChildOverride* %[[THIS:.*]])
 //
 // No need to adjust 'this' as the ChildOverride's layout begins with Left.
 // CHECK: %[[THIS_ADDR:.*]] = alloca %struct.ChildOverride*, align 4
@@ -48,7 +49,7 @@
 }
 
 void call_left_override(ChildOverride *child) {
-// CHECK: define void @"\01?call_left_override
+// CHECK-LABEL: define dso_local void @"\01?call_left_override
 // CHECK: %[[CHILD:.*]] = load %struct.ChildOverride
 
   child->left();
@@ -62,7 +63,7 @@
 }
 
 void call_right_no_override(ChildNoOverride *child) {
-// CHECK: define void @"\01?call_right_no_override
+// CHECK-LABEL: define dso_local void @"\01?call_right_no_override
 // CHECK: %[[CHILD:.*]] = load %struct.ChildNoOverride
 
   child->right();
@@ -82,25 +83,30 @@
 }
 
 void ChildOverride::right() {
-// CHECK: define x86_thiscallcc void @"\01?right@ChildOverride@@UAEXXZ"(i8*
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01?right@ChildOverride@@UAEXXZ"(i8*
 //
 // ChildOverride::right gets 'this' cast to Right* in ECX (i.e. this+4) so we
 // need to adjust 'this' before use.
 //
+// CHECK: %[[THIS_STORE:.*]] = alloca %struct.ChildOverride*, align 4
 // CHECK: %[[THIS_ADDR:.*]] = alloca %struct.ChildOverride*, align 4
-// CHECK: %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX:.*]], i32 -4
-// CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %struct.ChildOverride*
-// CHECK: store %struct.ChildOverride* %[[THIS]], %struct.ChildOverride** %[[THIS_ADDR]], align 4
+// CHECK: %[[COERCE_VAL:.*]] = bitcast i8* %[[ECX:.*]] to %struct.ChildOverride*
+// CHECK: store %struct.ChildOverride* %[[COERCE_VAL]], %struct.ChildOverride** %[[THIS_STORE]], align 4
+// CHECK: %[[THIS_INIT:.*]] = load %struct.ChildOverride*, %struct.ChildOverride** %[[THIS_STORE]], align 4
+// CHECK: store %struct.ChildOverride* %[[THIS_INIT]], %struct.ChildOverride** %[[THIS_ADDR]], align 4
+// CHECK: %[[THIS_RELOAD:.*]] = load %struct.ChildOverride*, %struct.ChildOverride** %[[THIS_ADDR]]
+// CHECK: %[[THIS_i8:.*]] = bitcast %struct.ChildOverride* %[[THIS_RELOAD]] to i8*
+// CHECK: %[[THIS_ADJUSTED:.*]] = getelementptr inbounds i8, i8* %[[THIS_i8]], i32 -4
+// CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_ADJUSTED]] to %struct.ChildOverride*
 
   foo(this);
-// CHECK: %[[THIS:.*]] = load %struct.ChildOverride*, %struct.ChildOverride** %[[THIS_ADDR]]
 // CHECK: %[[THIS_PARAM:.*]] = bitcast %struct.ChildOverride* %[[THIS]] to i8*
 // CHECK: call void @foo(i8* %[[THIS_PARAM]])
 // CHECK: ret
 }
 
 void call_right_override(ChildOverride *child) {
-// CHECK: define void @"\01?call_right_override
+// CHECK-LABEL: define dso_local void @"\01?call_right_override
 // CHECK: %[[CHILD:.*]] = load %struct.ChildOverride
 
   child->right();
@@ -108,16 +114,15 @@
 // the caller site.
 //
 // CHECK: %[[CHILD_i8:.*]] = bitcast %struct.ChildOverride* %[[CHILD]] to i8*
+// CHECK: %[[RIGHT:.*]] = getelementptr inbounds i8, i8* %[[CHILD_i8]], i32 4
 //
+// CHECK: %[[CHILD_i8:.*]] = bitcast %struct.ChildOverride* %[[CHILD]] to i8*
 // CHECK: %[[VFPTR_i8:.*]] = getelementptr inbounds i8, i8* %[[CHILD_i8]], i32 4
 // CHECK: %[[VFPTR:.*]] = bitcast i8* %[[VFPTR_i8]] to void (i8*)***
 // CHECK: %[[VFTABLE:.*]] = load void (i8*)**, void (i8*)*** %[[VFPTR]]
 // CHECK: %[[VFUN:.*]] = getelementptr inbounds void (i8*)*, void (i8*)** %[[VFTABLE]], i64 0
 // CHECK: %[[VFUN_VALUE:.*]] = load void (i8*)*, void (i8*)** %[[VFUN]]
 //
-// CHECK: %[[CHILD_i8:.*]] = bitcast %struct.ChildOverride* %[[CHILD]] to i8*
-// CHECK: %[[RIGHT:.*]] = getelementptr inbounds i8, i8* %[[CHILD_i8]], i32 4
-//
 // CHECK: call x86_thiscallcc void %[[VFUN_VALUE]](i8* %[[RIGHT]])
 // CHECK: ret
 }
@@ -127,15 +132,20 @@
 };
 
 void GrandchildOverride::right() {
-// CHECK: define x86_thiscallcc void @"\01?right@GrandchildOverride@@UAEXXZ"(i8*
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01?right@GrandchildOverride@@UAEXXZ"(i8*
 //
+// CHECK: %[[THIS_STORE:.*]] = alloca %struct.GrandchildOverride*, align 4
 // CHECK: %[[THIS_ADDR:.*]] = alloca %struct.GrandchildOverride*, align 4
-// CHECK: %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX:.*]], i32 -4
-// CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %struct.GrandchildOverride*
-// CHECK: store %struct.GrandchildOverride* %[[THIS]], %struct.GrandchildOverride** %[[THIS_ADDR]], align 4
+// CHECK: %[[COERCE_VAL:.*]] = bitcast i8* %[[ECX:.*]] to %struct.GrandchildOverride*
+// CHECK: store %struct.GrandchildOverride* %[[COERCE_VAL]], %struct.GrandchildOverride** %[[THIS_STORE]], align 4
+// CHECK: %[[THIS_INIT:.*]] = load %struct.GrandchildOverride*, %struct.GrandchildOverride** %[[THIS_STORE]], align 4
+// CHECK: store %struct.GrandchildOverride* %[[THIS_INIT]], %struct.GrandchildOverride** %[[THIS_ADDR]], align 4
+// CHECK: %[[THIS_RELOAD:.*]] = load %struct.GrandchildOverride*, %struct.GrandchildOverride** %[[THIS_ADDR]]
+// CHECK: %[[THIS_i8:.*]] = bitcast %struct.GrandchildOverride* %[[THIS_RELOAD]] to i8*
+// CHECK: %[[THIS_ADJUSTED:.*]] = getelementptr inbounds i8, i8* %[[THIS_i8]], i32 -4
+// CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_ADJUSTED]] to %struct.GrandchildOverride*
 
   foo(this);
-// CHECK: %[[THIS:.*]] = load %struct.GrandchildOverride*, %struct.GrandchildOverride** %[[THIS_ADDR]]
 // CHECK: %[[THIS_PARAM:.*]] = bitcast %struct.GrandchildOverride* %[[THIS]] to i8*
 // CHECK: call void @foo(i8* %[[THIS_PARAM]])
 // CHECK: ret
@@ -148,19 +158,19 @@
 
 void emit_ctors() {
   Left l;
-  // CHECK: define {{.*}} @"\01??0Left@@QAE@XZ"
+  // CHECK-LABEL: define {{.*}} @"\01??0Left@@QAE@XZ"
   // CHECK-NOT: getelementptr
   // CHECK:   store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7Left@@6B@" to i32 (...)**)
   // CHECK: ret
 
   Right r;
-  // CHECK: define {{.*}} @"\01??0Right@@QAE@XZ"
+  // CHECK-LABEL: define {{.*}} @"\01??0Right@@QAE@XZ"
   // CHECK-NOT: getelementptr
   // CHECK:   store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7Right@@6B@" to i32 (...)**)
   // CHECK: ret
 
   ChildOverride co;
-  // CHECK: define {{.*}} @"\01??0ChildOverride@@QAE@XZ"
+  // CHECK-LABEL: define {{.*}} @"\01??0ChildOverride@@QAE@XZ"
   // CHECK:   %[[THIS:.*]] = load %struct.ChildOverride*, %struct.ChildOverride**
   // CHECK:   %[[VFPTR:.*]] = bitcast %struct.ChildOverride* %[[THIS]] to i32 (...)***
   // CHECK:   store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7ChildOverride@@6BLeft@@@" to i32 (...)**), i32 (...)*** %[[VFPTR]]
@@ -171,7 +181,7 @@
   // CHECK: ret
 
   GrandchildOverride gc;
-  // CHECK: define {{.*}} @"\01??0GrandchildOverride@@QAE@XZ"
+  // CHECK-LABEL: define {{.*}} @"\01??0GrandchildOverride@@QAE@XZ"
   // CHECK:   %[[THIS:.*]] = load %struct.GrandchildOverride*, %struct.GrandchildOverride**
   // CHECK:   %[[VFPTR:.*]] = bitcast %struct.GrandchildOverride* %[[THIS]] to i32 (...)***
   // CHECK:   store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7GrandchildOverride@@6BLeft@@@" to i32 (...)**), i32 (...)*** %[[VFPTR]]
@@ -192,7 +202,7 @@
 };
 
 void call_asymmetric_child_complete_dtor() {
-  // CHECK-LABEL: define void @"\01?call_asymmetric_child_complete_dtor@@YAXXZ"
+  // CHECK-LABEL: define dso_local void @"\01?call_asymmetric_child_complete_dtor@@YAXXZ"
   AsymmetricChild obj;
   // CHECK: call x86_thiscallcc %struct.AsymmetricChild* @"\01??0AsymmetricChild@@QAE@XZ"(%struct.AsymmetricChild* %[[OBJ:.*]])
   // CHECK-NOT: getelementptr
diff --git a/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
index eaae349..acd8653 100644
--- a/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ b/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -68,7 +68,7 @@
   int bb;
 };
 
-// WIN32: declare void @"{{.*take_bools_and_chars.*}}"
+// WIN32: declare dso_local void @"{{.*take_bools_and_chars.*}}"
 // WIN32:       (<{ i8, [3 x i8], i8, [3 x i8], %struct.SmallWithDtor,
 // WIN32:           i8, [3 x i8], i8, [3 x i8], i32, i8, [3 x i8] }>* inalloca)
 void take_bools_and_chars(char a, char b, SmallWithDtor c, char d, bool e, int f, bool g);
@@ -79,93 +79,93 @@
 // Returning structs that fit into a register.
 Small small_return() { return Small(); }
 // LINUX-LABEL: define void @_Z12small_returnv(%struct.Small* noalias sret %agg.result)
-// WIN32: define i32 @"\01?small_return@@YA?AUSmall@@XZ"()
-// WIN64: define i32 @"\01?small_return@@YA?AUSmall@@XZ"()
+// WIN32: define dso_local i32 @"\01?small_return@@YA?AUSmall@@XZ"()
+// WIN64: define dso_local i32 @"\01?small_return@@YA?AUSmall@@XZ"()
 
 Medium medium_return() { return Medium(); }
 // LINUX-LABEL: define void @_Z13medium_returnv(%struct.Medium* noalias sret %agg.result)
-// WIN32: define i64 @"\01?medium_return@@YA?AUMedium@@XZ"()
-// WIN64: define i64 @"\01?medium_return@@YA?AUMedium@@XZ"()
+// WIN32: define dso_local i64 @"\01?medium_return@@YA?AUMedium@@XZ"()
+// WIN64: define dso_local i64 @"\01?medium_return@@YA?AUMedium@@XZ"()
 
 // Returning structs that fit into a register but are not POD.
 SmallCpp11NotCpp03Pod small_non_pod_return() { return SmallCpp11NotCpp03Pod(); }
 // LINUX-LABEL: define void @_Z20small_non_pod_returnv(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
-// WIN32: define void @"\01?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
-// WIN64: define void @"\01?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
+// WIN32: define dso_local void @"\01?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
+// WIN64: define dso_local void @"\01?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
 
 SmallWithCtor small_with_ctor_return() { return SmallWithCtor(); }
 // LINUX-LABEL: define void @_Z22small_with_ctor_returnv(%struct.SmallWithCtor* noalias sret %agg.result)
-// WIN32: define void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
-// WIN64: define void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
+// WIN32: define dso_local void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
+// WIN64: define dso_local void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
 // FIXME: The 'sret' mark here doesn't seem to be enough to convince LLVM to
 // preserve the hidden sret pointer in R0 across the function.
-// WOA: define arm_aapcs_vfpcc void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
 
 SmallWithVftable small_with_vftable_return() { return SmallWithVftable(); }
 // LINUX-LABEL: define void @_Z25small_with_vftable_returnv(%struct.SmallWithVftable* noalias sret %agg.result)
-// WIN32: define void @"\01?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
-// WIN64: define void @"\01?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
+// WIN32: define dso_local void @"\01?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
+// WIN64: define dso_local void @"\01?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
 
 MediumWithCopyCtor medium_with_copy_ctor_return() { return MediumWithCopyCtor(); }
 // LINUX-LABEL: define void @_Z28medium_with_copy_ctor_returnv(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WIN32: define void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WIN64: define void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WOA: define arm_aapcs_vfpcc void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
+// WIN32: define dso_local void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
+// WIN64: define dso_local void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
 
 // Returning a large struct that doesn't fit into a register.
 Big big_return() { return Big(); }
 // LINUX-LABEL: define void @_Z10big_returnv(%struct.Big* noalias sret %agg.result)
-// WIN32: define void @"\01?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
-// WIN64: define void @"\01?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
+// WIN32: define dso_local void @"\01?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
+// WIN64: define dso_local void @"\01?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
 
 
 void small_arg(Small s) {}
 // LINUX-LABEL: define void @_Z9small_arg5Small(i32 %s.0)
-// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0)
-// WIN64: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce)
-// WOA: define arm_aapcs_vfpcc void @"\01?small_arg@@YAXUSmall@@@Z"([1 x i32] %s.coerce)
+// WIN32: define dso_local void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0)
+// WIN64: define dso_local void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?small_arg@@YAXUSmall@@@Z"([1 x i32] %s.coerce)
 
 void medium_arg(Medium s) {}
 // LINUX-LABEL: define void @_Z10medium_arg6Medium(i32 %s.0, i32 %s.1)
-// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1)
-// WIN64: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce)
-// WOA: define arm_aapcs_vfpcc void @"\01?medium_arg@@YAXUMedium@@@Z"([2 x i32] %s.coerce)
+// WIN32: define dso_local void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1)
+// WIN64: define dso_local void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?medium_arg@@YAXUMedium@@@Z"([2 x i32] %s.coerce)
 
 void base_no_byval_arg(BaseNoByval s) {}
 // LINUX-LABEL: define void @_Z17base_no_byval_arg11BaseNoByval(%struct.BaseNoByval* byval align 4 %s)
-// WIN32: define void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i32 %s.0, i32 %s.1)
-// WIN64: define void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i64 %s.coerce)
-// WOA: define arm_aapcs_vfpcc void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"([2 x i32] %s.coerce)
+// WIN32: define dso_local void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i32 %s.0, i32 %s.1)
+// WIN64: define dso_local void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i64 %s.coerce)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?base_no_byval_arg@@YAXUBaseNoByval@@@Z"([2 x i32] %s.coerce)
 
 void small_arg_with_ctor(SmallWithCtor s) {}
 // LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s)
-// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0)
-// WIN64: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce)
-// WOA: define arm_aapcs_vfpcc void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"([1 x i32] %s.coerce)
+// WIN32: define dso_local void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0)
+// WIN64: define dso_local void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"([1 x i32] %s.coerce)
 
 // FIXME: We could coerce to a series of i32s here if we wanted to.
 void multibyte_arg(Multibyte s) {}
 // LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s)
-// WIN32: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s)
-// WIN64: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce)
-// WOA: define arm_aapcs_vfpcc void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"([1 x i32] %s.coerce)
+// WIN32: define dso_local void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s)
+// WIN64: define dso_local void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"([1 x i32] %s.coerce)
 
 void packed_arg(Packed s) {}
 // LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s)
-// WIN32: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s)
-// WIN64: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s)
+// WIN32: define dso_local void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s)
+// WIN64: define dso_local void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s)
 
 // Test that dtors are invoked in the callee.
 void small_arg_with_dtor(SmallWithDtor s) {}
-// WIN32: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} {
+// WIN32: define dso_local void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} {
 // WIN32:   call x86_thiscallcc void @"\01??1SmallWithDtor@@QAE@XZ"
 // WIN32: }
-// WIN64: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(i32 %s.coerce) {{.*}} {
+// WIN64: define dso_local void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(i32 %s.coerce) {{.*}} {
 // WIN64:   call void @"\01??1SmallWithDtor@@QEAA@XZ"
 // WIN64: }
 
 // FIXME: MSVC incompatible!
-// WOA: define arm_aapcs_vfpcc void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(%struct.SmallWithDtor* %s) {{.*}} {
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(%struct.SmallWithDtor* %s) {{.*}} {
 // WOA:   call arm_aapcs_vfpcc void @"\01??1SmallWithDtor@@QAA@XZ"(%struct.SmallWithDtor* %s)
 // WOA: }
 
@@ -174,7 +174,7 @@
 }
 // The temporary is copied, so it's destroyed in the caller as well as the
 // callee.
-// WIN64-LABEL: define void @"\01?call_small_arg_with_dtor@@YAXXZ"()
+// WIN64-LABEL: define dso_local void @"\01?call_small_arg_with_dtor@@YAXXZ"()
 // WIN64:   call %struct.SmallWithDtor* @"\01??0SmallWithDtor@@QEAA@XZ"
 // WIN64:   call void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(i32 %{{.*}})
 // WIN64:   call void @"\01??1SmallWithDtor@@QEAA@XZ"
@@ -182,13 +182,13 @@
 
 // Test that references aren't destroyed in the callee.
 void ref_small_arg_with_dtor(const SmallWithDtor &s) { }
-// WIN32: define void @"\01?ref_small_arg_with_dtor@@YAXABUSmallWithDtor@@@Z"(%struct.SmallWithDtor* dereferenceable({{[0-9]+}}) %s) {{.*}} {
+// WIN32: define dso_local void @"\01?ref_small_arg_with_dtor@@YAXABUSmallWithDtor@@@Z"(%struct.SmallWithDtor* dereferenceable({{[0-9]+}}) %s) {{.*}} {
 // WIN32-NOT:   call x86_thiscallcc void @"\01??1SmallWithDtor@@QAE@XZ"
 // WIN32: }
-// WIN64-LABEL: define void @"\01?ref_small_arg_with_dtor@@YAXAEBUSmallWithDtor@@@Z"(%struct.SmallWithDtor* dereferenceable({{[0-9]+}}) %s)
+// WIN64-LABEL: define dso_local void @"\01?ref_small_arg_with_dtor@@YAXAEBUSmallWithDtor@@@Z"(%struct.SmallWithDtor* dereferenceable({{[0-9]+}}) %s)
 
 void big_arg_with_dtor(BigWithDtor s) {}
-// WIN64-LABEL: define void @"\01?big_arg_with_dtor@@YAXUBigWithDtor@@@Z"(%struct.BigWithDtor* %s)
+// WIN64-LABEL: define dso_local void @"\01?big_arg_with_dtor@@YAXUBigWithDtor@@@Z"(%struct.BigWithDtor* %s)
 // WIN64:   call void @"\01??1BigWithDtor@@QEAA@XZ"
 // WIN64: }
 
@@ -197,7 +197,7 @@
 }
 // We can elide the copy of the temporary in the caller, because this object is
 // larger than 8 bytes and is passed indirectly.
-// WIN64-LABEL: define void @"\01?call_big_arg_with_dtor@@YAXXZ"()
+// WIN64-LABEL: define dso_local void @"\01?call_big_arg_with_dtor@@YAXXZ"()
 // WIN64:   call %struct.BigWithDtor* @"\01??0BigWithDtor@@QEAA@XZ"
 // WIN64:   call void @"\01?big_arg_with_dtor@@YAXUBigWithDtor@@@Z"(%struct.BigWithDtor* %{{.*}})
 // WIN64-NOT: call void @"\01??1BigWithDtor@@QEAA@XZ"
@@ -207,7 +207,7 @@
 void temporary_ref_with_dtor() {
   ref_small_arg_with_dtor(SmallWithDtor());
 }
-// WIN32: define void @"\01?temporary_ref_with_dtor@@YAXXZ"() {{.*}} {
+// WIN32: define dso_local void @"\01?temporary_ref_with_dtor@@YAXXZ"() {{.*}} {
 // WIN32:   call x86_thiscallcc %struct.SmallWithDtor* @"\01??0SmallWithDtor@@QAE@XZ"
 // WIN32:   call void @"\01?ref_small_arg_with_dtor@@YAXABUSmallWithDtor@@@Z"
 // WIN32:   call x86_thiscallcc void @"\01??1SmallWithDtor@@QAE@XZ"
@@ -219,7 +219,7 @@
 }
 //   When exceptions are off, we don't have any cleanups.  See
 //   microsoft-abi-exceptions.cpp for these cleanups.
-// WIN32: define void @"\01?eh_cleanup_arg_with_dtor@@YAXXZ"() {{.*}} {
+// WIN32: define dso_local void @"\01?eh_cleanup_arg_with_dtor@@YAXXZ"() {{.*}} {
 // WIN32:   call x86_thiscallcc %struct.SmallWithDtor* @"\01??0SmallWithDtor@@QAE@XZ"
 // WIN32:   call x86_thiscallcc %struct.SmallWithDtor* @"\01??0SmallWithDtor@@QAE@XZ"
 // WIN32:   call void @"\01?takes_two_by_val_with_dtor@@YAXUSmallWithDtor@@0@Z"
@@ -228,19 +228,19 @@
 
 void small_arg_with_vftable(SmallWithVftable s) {}
 // LINUX-LABEL: define void @_Z22small_arg_with_vftable16SmallWithVftable(%struct.SmallWithVftable* %s)
-// WIN32: define void @"\01?small_arg_with_vftable@@YAXUSmallWithVftable@@@Z"(<{ %struct.SmallWithVftable }>* inalloca)
-// WIN64: define void @"\01?small_arg_with_vftable@@YAXUSmallWithVftable@@@Z"(%struct.SmallWithVftable* %s)
+// WIN32: define dso_local void @"\01?small_arg_with_vftable@@YAXUSmallWithVftable@@@Z"(<{ %struct.SmallWithVftable }>* inalloca)
+// WIN64: define dso_local void @"\01?small_arg_with_vftable@@YAXUSmallWithVftable@@@Z"(%struct.SmallWithVftable* %s)
 
 void medium_arg_with_copy_ctor(MediumWithCopyCtor s) {}
 // LINUX-LABEL: define void @_Z25medium_arg_with_copy_ctor18MediumWithCopyCtor(%struct.MediumWithCopyCtor* %s)
-// WIN32: define void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(<{ %struct.MediumWithCopyCtor }>* inalloca)
-// WIN64: define void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(%struct.MediumWithCopyCtor* %s)
-// WOA: define arm_aapcs_vfpcc void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(%struct.MediumWithCopyCtor* %s)
+// WIN32: define dso_local void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(<{ %struct.MediumWithCopyCtor }>* inalloca)
+// WIN64: define dso_local void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(%struct.MediumWithCopyCtor* %s)
+// WOA: define dso_local arm_aapcs_vfpcc void @"\01?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(%struct.MediumWithCopyCtor* %s)
 
 void big_arg(Big s) {}
 // LINUX-LABEL: define void @_Z7big_arg3Big(%struct.Big* byval align 4 %s)
-// WIN32: define void @"\01?big_arg@@YAXUBig@@@Z"(%struct.Big* byval align 4 %s)
-// WIN64: define void @"\01?big_arg@@YAXUBig@@@Z"(%struct.Big* %s)
+// WIN32: define dso_local void @"\01?big_arg@@YAXUBig@@@Z"(%struct.Big* byval align 4 %s)
+// WIN64: define dso_local void @"\01?big_arg@@YAXUBig@@@Z"(%struct.Big* %s)
 
 // PR27607: We would attempt to load i32 value out of the reference instead of
 // just loading the pointer from the struct during argument expansion.
@@ -250,8 +250,8 @@
 };
 void takes_ref_field(RefField s) {}
 // LINUX-LABEL: define void @_Z15takes_ref_field8RefField(%struct.RefField* byval align 4 %s)
-// WIN32: define void @"\01?takes_ref_field@@YAXURefField@@@Z"(i32* %s.0)
-// WIN64: define void @"\01?takes_ref_field@@YAXURefField@@@Z"(i64 %s.coerce)
+// WIN32: define dso_local void @"\01?takes_ref_field@@YAXURefField@@@Z"(i32* %s.0)
+// WIN64: define dso_local void @"\01?takes_ref_field@@YAXURefField@@@Z"(i64 %s.coerce)
 
 void pass_ref_field() {
   int x;
@@ -259,9 +259,9 @@
 }
 // LINUX-LABEL: define void @_Z14pass_ref_fieldv()
 // LINUX: call void @_Z15takes_ref_field8RefField(%struct.RefField* byval align 4 %{{.*}})
-// WIN32-LABEL: define void @"\01?pass_ref_field@@YAXXZ"()
+// WIN32-LABEL: define dso_local void @"\01?pass_ref_field@@YAXXZ"()
 // WIN32: call void @"\01?takes_ref_field@@YAXURefField@@@Z"(i32* %{{.*}})
-// WIN64-LABEL: define void @"\01?pass_ref_field@@YAXXZ"()
+// WIN64-LABEL: define dso_local void @"\01?pass_ref_field@@YAXXZ"()
 // WIN64: call void @"\01?takes_ref_field@@YAXURefField@@@Z"(i64 %{{.*}})
 
 class Class {
@@ -269,47 +269,47 @@
   Small thiscall_method_small() { return Small(); }
   // LINUX: define {{.*}} void @_ZN5Class21thiscall_method_smallEv(%struct.Small* noalias sret %agg.result, %class.Class* %this)
   // WIN32: define {{.*}} x86_thiscallcc void @"\01?thiscall_method_small@Class@@QAE?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_small@Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_small@Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
 
   SmallWithCtor thiscall_method_small_with_ctor() { return SmallWithCtor(); }
   // LINUX: define {{.*}} void @_ZN5Class31thiscall_method_small_with_ctorEv(%struct.SmallWithCtor* noalias sret %agg.result, %class.Class* %this)
   // WIN32: define {{.*}} x86_thiscallcc void @"\01?thiscall_method_small_with_ctor@Class@@QAE?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret %agg.result)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_small_with_ctor@Class@@QEAA?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_small_with_ctor@Class@@QEAA?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret %agg.result)
 
   Small __cdecl cdecl_method_small() { return Small(); }
   // LINUX: define {{.*}} void @_ZN5Class18cdecl_method_smallEv(%struct.Small* noalias sret %agg.result, %class.Class* %this)
   // WIN32: define {{.*}} void @"\01?cdecl_method_small@Class@@QAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
-  // WIN64: define linkonce_odr void @"\01?cdecl_method_small@Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"\01?cdecl_method_small@Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
 
   Big __cdecl cdecl_method_big() { return Big(); }
   // LINUX: define {{.*}} void @_ZN5Class16cdecl_method_bigEv(%struct.Big* noalias sret %agg.result, %class.Class* %this)
   // WIN32: define {{.*}} void @"\01?cdecl_method_big@Class@@QAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret %agg.result)
-  // WIN64: define linkonce_odr void @"\01?cdecl_method_big@Class@@QEAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"\01?cdecl_method_big@Class@@QEAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret %agg.result)
 
   void thiscall_method_arg(Empty s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Empty(%class.Class* %this)
   // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUEmpty@@@Z"(%class.Class* %this, %struct.Empty* byval align 4 %s)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUEmpty@@@Z"(%class.Class* %this, i8 %s.coerce)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_arg@Class@@QEAAXUEmpty@@@Z"(%class.Class* %this, i8 %s.coerce)
 
   void thiscall_method_arg(EmptyWithCtor s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13EmptyWithCtor(%class.Class* %this)
   // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUEmptyWithCtor@@@Z"(%class.Class* %this, %struct.EmptyWithCtor* byval align 4 %s)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUEmptyWithCtor@@@Z"(%class.Class* %this, i8 %s.coerce)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_arg@Class@@QEAAXUEmptyWithCtor@@@Z"(%class.Class* %this, i8 %s.coerce)
 
   void thiscall_method_arg(Small s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Small(%class.Class* %this, i32 %s.0)
   // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, i32 %s.0)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(SmallWithCtor s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s)
   // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(Big s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE3Big(%class.Class* %this, %struct.Big* byval align 4 %s)
   // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUBig@@@Z"(%class.Class* %this, %struct.Big* byval align 4 %s)
-  // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUBig@@@Z"(%class.Class* %this, %struct.Big* %s)
+  // WIN64: define linkonce_odr dso_local void @"\01?thiscall_method_arg@Class@@QEAAXUBig@@@Z"(%class.Class* %this, %struct.Big* %s)
 };
 
 void use_class() {
@@ -333,13 +333,13 @@
 };
 void g(X) {
 }
-// WIN32: define void @"\01?g@@YAXUX@@@Z"(<{ %struct.X, [3 x i8] }>* inalloca) {{.*}} {
+// WIN32: define dso_local void @"\01?g@@YAXUX@@@Z"(<{ %struct.X, [3 x i8] }>* inalloca) {{.*}} {
 // WIN32:   call x86_thiscallcc void @"\01??1X@@QAE@XZ"(%struct.X* {{.*}})
 // WIN32: }
 void f() {
   g(X());
 }
-// WIN32: define void @"\01?f@@YAXXZ"() {{.*}} {
+// WIN32: define dso_local void @"\01?f@@YAXXZ"() {{.*}} {
 // WIN32-NOT: call {{.*}} @"\01??1X@@QAE@XZ"
 // WIN32: }
 
@@ -362,7 +362,7 @@
   b.b = 13;
   int c = foo(NonTrivial(), b);
 }
-// WIN32-LABEL: define void @"\01?bar@test2@@YAXXZ"() {{.*}} {
+// WIN32-LABEL: define dso_local void @"\01?bar@test2@@YAXXZ"() {{.*}} {
 // WIN32:   %[[argmem:[^ ]*]] = alloca inalloca [[argmem_ty:<{ %"struct.test2::NonTrivial", %"struct.test2::POD" }>]]
 // WIN32:   getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 1
 // WIN32:   call void @llvm.memcpy
@@ -384,7 +384,7 @@
   int a;
 };
 void foo(NonTrivial a, bool b) { }
-// WIN32-LABEL: define void @"\01?foo@test3@@YAXUNonTrivial@1@_N@Z"(<{ %"struct.test3::NonTrivial", i8, [3 x i8] }>* inalloca)
+// WIN32-LABEL: define dso_local void @"\01?foo@test3@@YAXUNonTrivial@1@_N@Z"(<{ %"struct.test3::NonTrivial", i8, [3 x i8] }>* inalloca)
 
 }
 
@@ -399,13 +399,13 @@
 struct ForwardDeclare1 {};
 
 void fn2(FnPtr1 a, SmallWithDtor b) { fn1(a, b); };
-// WIN32-LABEL: define void @"\01?fn2@@YAXP6AXUForwardDeclare1@@@ZUSmallWithDtor@@@Z"
+// WIN32-LABEL: define dso_local void @"\01?fn2@@YAXP6AXUForwardDeclare1@@@ZUSmallWithDtor@@@Z"
 // WIN32:   %[[a:[^ ]*]] = getelementptr inbounds [[argmem_ty:<{ {}\*, %struct.SmallWithDtor }>]], [[argmem_ty:<{ {}\*, %struct.SmallWithDtor }>]]* %{{.*}}, i32 0, i32 0
 // WIN32:   %[[a1:[^ ]*]] = bitcast {}** %[[a]] to void [[dst_ty:\(%struct.ForwardDeclare1\*\)\*]]*
 // WIN32:   %[[argmem:[^ ]*]] = alloca inalloca [[argmem_ty]]
 // WIN32:   %[[gep1:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 1
 // WIN32:   %[[bc1:[^ ]*]] = bitcast %struct.SmallWithDtor* %[[gep1]] to i8*
-// WIN32:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[bc1]], i8* {{.*}}, i32 4, i32 4, i1 false)
+// WIN32:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %[[bc1]], i8* align 4 {{.*}}, i32 4, i1 false)
 // WIN32:   %[[a2:[^ ]*]] = load void [[dst_ty]], void [[dst_ty]]* %[[a1]], align 4
 // WIN32:   %[[gep2:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 0
 // WIN32:   %[[addr:[^ ]*]] = bitcast {}** %[[gep2]] to void [[dst_ty]]*
@@ -430,11 +430,11 @@
 };
 void C::g() { return h(SmallWithDtor()); }
 
-// WIN32-LABEL: define x86_thiscallcc void @"\01?g@C@pr30293@@QAEXXZ"(%"struct.pr30293::C"* %this)
+// WIN32-LABEL: define dso_local x86_thiscallcc void @"\01?g@C@pr30293@@QAEXXZ"(%"struct.pr30293::C"* %this)
 // WIN32: call x86_thiscallcc %struct.SmallWithDtor* @"\01??0SmallWithDtor@@QAE@XZ"
 // WIN32: call void @"\01?h@C@pr30293@@UAAXUSmallWithDtor@@@Z"(<{ i8*, %struct.SmallWithDtor }>* inalloca %{{[^,)]*}})
-// WIN32: declare void @"\01?h@C@pr30293@@UAAXUSmallWithDtor@@@Z"(<{ i8*, %struct.SmallWithDtor }>* inalloca)
+// WIN32: declare dso_local void @"\01?h@C@pr30293@@UAAXUSmallWithDtor@@@Z"(<{ i8*, %struct.SmallWithDtor }>* inalloca)
 
-// WIN64-LABEL: define void @"\01?g@C@pr30293@@QEAAXXZ"(%"struct.pr30293::C"* %this)
-// WIN64: declare void @"\01?h@C@pr30293@@UEAAXUSmallWithDtor@@@Z"(i8*, i32)
+// WIN64-LABEL: define dso_local void @"\01?g@C@pr30293@@QEAAXXZ"(%"struct.pr30293::C"* %this)
+// WIN64: declare dso_local void @"\01?h@C@pr30293@@UEAAXUSmallWithDtor@@@Z"(i8*, i32)
 }
diff --git a/test/CodeGenCXX/microsoft-abi-static-initializers.cpp b/test/CodeGenCXX/microsoft-abi-static-initializers.cpp
index 0b84f07..fe0c371 100644
--- a/test/CodeGenCXX/microsoft-abi-static-initializers.cpp
+++ b/test/CodeGenCXX/microsoft-abi-static-initializers.cpp
@@ -28,11 +28,11 @@
 // See @llvm.global_ctors above.
 __declspec(selectany) S selectany1;
 __declspec(selectany) S selectany2;
-// CHECK: define linkonce_odr void @"\01??__Eselectany1@@YAXXZ"() {{.*}} comdat
+// CHECK: define linkonce_odr dso_local void @"\01??__Eselectany1@@YAXXZ"() {{.*}} comdat
 // CHECK-NOT: @"\01??_Bselectany1
 // CHECK: call x86_thiscallcc %struct.S* @"\01??0S@@QAE@XZ"
 // CHECK: ret void
-// CHECK: define linkonce_odr void @"\01??__Eselectany2@@YAXXZ"() {{.*}} comdat
+// CHECK: define linkonce_odr dso_local void @"\01??__Eselectany2@@YAXXZ"() {{.*}} comdat
 // CHECK-NOT: @"\01??_Bselectany2
 // CHECK: call x86_thiscallcc %struct.S* @"\01??0S@@QAE@XZ"
 // CHECK: ret void
@@ -51,7 +51,7 @@
   static S TheS;
 }
 
-// CHECK-LABEL: define void @"\01?StaticLocal@@YAXXZ"()
+// CHECK-LABEL: define dso_local void @"\01?StaticLocal@@YAXXZ"()
 // CHECK: load i32, i32* @"\01?$S1@?1??StaticLocal@@YAXXZ@4IA"
 // CHECK: store i32 {{.*}}, i32* @"\01?$S1@?1??StaticLocal@@YAXXZ@4IA"
 // CHECK: ret
@@ -93,7 +93,7 @@
   static S S34;
   static S S35;
 }
-// CHECK-LABEL: define void @"\01?MultipleStatics@@YAXXZ"()
+// CHECK-LABEL: define dso_local void @"\01?MultipleStatics@@YAXXZ"()
 // CHECK: load i32, i32* @"\01?$S1@?1??MultipleStatics@@YAXXZ@4IA"
 // CHECK: and i32 {{.*}}, 1
 // CHECK: and i32 {{.*}}, 2
@@ -133,7 +133,7 @@
   return s;
 }
 
-// CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.S* @"\01?UnreachableStatic@@YAAAUS@@XZ"() {{.*}} comdat
+// CHECK-LABEL: define linkonce_odr dso_local dereferenceable({{[0-9]+}}) %struct.S* @"\01?UnreachableStatic@@YAAAUS@@XZ"() {{.*}} comdat
 // CHECK: and i32 {{.*}}, 2
 // CHECK: or i32 {{.*}}, 2
 // CHECK: ret
@@ -143,7 +143,7 @@
   return TheS;
 }
 
-// CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.S* @"\01?getS@@YAAAUS@@XZ"() {{.*}} comdat
+// CHECK-LABEL: define linkonce_odr dso_local dereferenceable({{[0-9]+}}) %struct.S* @"\01?getS@@YAAAUS@@XZ"() {{.*}} comdat
 // CHECK: load i32, i32* @"\01??_B?1??getS@@YAAAUS@@XZ@51"
 // CHECK: and i32 {{.*}}, 1
 // CHECK: icmp eq i32 {{.*}}, 0
@@ -158,7 +158,7 @@
 // CHECK: ret %struct.S* @"\01?TheS@?1??getS@@YAAAUS@@XZ@4U2@A"
 
 inline int enum_in_function() {
-  // CHECK-LABEL: define linkonce_odr i32 @"\01?enum_in_function@@YAHXZ"() {{.*}} comdat
+  // CHECK-LABEL: define linkonce_odr dso_local i32 @"\01?enum_in_function@@YAHXZ"() {{.*}} comdat
   static enum e { foo, bar, baz } x;
   // CHECK: @"\01?x@?1??enum_in_function@@YAHXZ@4W4e@?1??1@YAHXZ@A"
   static int y;
@@ -169,7 +169,7 @@
 struct T {
   enum e { foo, bar, baz };
   int enum_in_struct() {
-    // CHECK-LABEL: define linkonce_odr x86_thiscallcc i32 @"\01?enum_in_struct@T@@QAEHXZ"({{.*}}) {{.*}} comdat
+    // CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc i32 @"\01?enum_in_struct@T@@QAEHXZ"({{.*}}) {{.*}} comdat
     static int x;
     // CHECK: @"\01?x@?1??enum_in_struct@T@@QAEHXZ@4HA"
     return x++;
@@ -177,7 +177,7 @@
 };
 
 inline int switch_test(int x) {
-  // CHECK-LABEL: define linkonce_odr i32 @"\01?switch_test@@YAHH@Z"(i32 %x) {{.*}} comdat
+  // CHECK-LABEL: define linkonce_odr dso_local i32 @"\01?switch_test@@YAHH@Z"(i32 %x) {{.*}} comdat
   switch (x) {
     static int a;
     // CHECK: @"\01?a@?3??switch_test@@YAHH@Z@4HA"
@@ -198,7 +198,7 @@
 
 int f();
 inline void switch_test2() {
-  // CHECK-LABEL: define linkonce_odr void @"\01?switch_test2@@YAXXZ"() {{.*}} comdat
+  // CHECK-LABEL: define linkonce_odr dso_local void @"\01?switch_test2@@YAXXZ"() {{.*}} comdat
   // CHECK: @"\01?x@?2??switch_test2@@YAXXZ@4HA"
   switch (1) default: static int x = f();
 }
@@ -213,7 +213,7 @@
   template struct __declspec(dllimport) A<int>;
 
   inline int switch_test3() {
-    // CHECK-LABEL: define linkonce_odr i32 @"\01?switch_test3@DynamicDLLImportInitVSMangling@@YAHXZ"() {{.*}} comdat
+    // CHECK-LABEL: define linkonce_odr dso_local i32 @"\01?switch_test3@DynamicDLLImportInitVSMangling@@YAHXZ"() {{.*}} comdat
     static int local;
     // CHECK: @"\01?local@?1??switch_test3@DynamicDLLImportInitVSMangling@@YAHXZ@4HA"
     return local++;
@@ -231,16 +231,16 @@
   DynamicDLLImportInitVSMangling::switch_test3();
 }
 
-// CHECK: define linkonce_odr void @"\01??__Efoo@?$B@H@@2VA@@A@YAXXZ"() {{.*}} comdat
+// CHECK: define linkonce_odr dso_local void @"\01??__Efoo@?$B@H@@2VA@@A@YAXXZ"() {{.*}} comdat
 // CHECK-NOT: and
 // CHECK-NOT: ?_Bfoo@
 // CHECK: call x86_thiscallcc %class.A* @"\01??0A@@QAE@XZ"
 // CHECK: call i32 @atexit(void ()* @"\01??__Ffoo@?$B@H@@2VA@@A@YAXXZ")
 // CHECK: ret void
 
-// CHECK: define linkonce_odr x86_thiscallcc %class.A* @"\01??0A@@QAE@XZ"({{.*}}) {{.*}} comdat
+// CHECK: define linkonce_odr dso_local x86_thiscallcc %class.A* @"\01??0A@@QAE@XZ"({{.*}}) {{.*}} comdat
 
-// CHECK: define linkonce_odr x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}}) {{.*}} comdat
+// CHECK: define linkonce_odr dso_local x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}}) {{.*}} comdat
 
 // CHECK: define internal void @"\01??__Ffoo@?$B@H@@2VA@@A@YAXXZ"
 // CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"{{.*}}foo
diff --git a/test/CodeGenCXX/microsoft-abi-structors-alias.cpp b/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
index 4eb757a..4c0a9be 100644
--- a/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
+++ b/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
@@ -5,7 +5,7 @@
   ~A() {}
 };
 template class A<char>;
-// CHECK-DAG: define weak_odr x86_thiscallcc void @"\01??1?$A@D@test1@@AAE@XZ"
+// CHECK-DAG: define weak_odr dso_local x86_thiscallcc void @"\01??1?$A@D@test1@@AAE@XZ"
 }
 
 namespace test2 {
@@ -22,14 +22,14 @@
 void foo() {
   B b;
 }
-// CHECK-DAG: @"\01??1B@test2@@UAE@XZ" = alias void (%"struct.test2::B"*), bitcast (void (%"struct.test2::A"*)* @"\01??1A@test2@@UAE@XZ" to void (%"struct.test2::B"*)*)
+// CHECK-DAG: @"\01??1B@test2@@UAE@XZ" = dso_local alias void (%"struct.test2::B"*), bitcast (void (%"struct.test2::A"*)* @"\01??1A@test2@@UAE@XZ" to void (%"struct.test2::B"*)*)
 }
 
 namespace test3 {
 struct A { virtual ~A(); };
 A::~A() {}
 }
-// CHECK-DAG: define x86_thiscallcc void @"\01??1A@test3@@UAE@XZ"(
+// CHECK-DAG: define dso_local x86_thiscallcc void @"\01??1A@test3@@UAE@XZ"(
 namespace test3 {
 template <typename T>
 struct B : A {
@@ -39,4 +39,4 @@
 }
 // This has to be weak, and emitting weak aliases is fragile, so we don't do the
 // aliasing.
-// CHECK-DAG: define weak_odr x86_thiscallcc void @"\01??1?$B@H@test3@@UAE@XZ"(
+// CHECK-DAG: define weak_odr dso_local x86_thiscallcc void @"\01??1?$B@H@test3@@UAE@XZ"(
diff --git a/test/CodeGenCXX/microsoft-abi-structors.cpp b/test/CodeGenCXX/microsoft-abi-structors.cpp
index 4de6c33..138af04 100644
--- a/test/CodeGenCXX/microsoft-abi-structors.cpp
+++ b/test/CodeGenCXX/microsoft-abi-structors.cpp
@@ -20,7 +20,7 @@
 void no_constructor_destructor_infinite_recursion() {
   A a;
 
-// CHECK:      define linkonce_odr x86_thiscallcc %"class.basic::A"* @"\01??0A@basic@@QAE@XZ"(%"class.basic::A"* returned %this) {{.*}} comdat {{.*}} {
+// CHECK:      define linkonce_odr dso_local x86_thiscallcc %"class.basic::A"* @"\01??0A@basic@@QAE@XZ"(%"class.basic::A"* returned %this) {{.*}} comdat {{.*}} {
 // CHECK:        [[THIS_ADDR:%[.0-9A-Z_a-z]+]] = alloca %"class.basic::A"*, align 4
 // CHECK-NEXT:   store %"class.basic::A"* %this, %"class.basic::A"** [[THIS_ADDR]], align 4
 // CHECK-NEXT:   [[T1:%[.0-9A-Z_a-z]+]] = load %"class.basic::A"*, %"class.basic::A"** [[THIS_ADDR]]
@@ -41,13 +41,13 @@
 
 // Tests that we can define constructors outside the class (PR12784).
 B::B() {
-  // CHECK: define x86_thiscallcc %"struct.basic::B"* @"\01??0B@basic@@QAE@XZ"(%"struct.basic::B"* returned %this)
+  // CHECK: define dso_local x86_thiscallcc %"struct.basic::B"* @"\01??0B@basic@@QAE@XZ"(%"struct.basic::B"* returned %this)
   // CHECK: ret
 }
 
 struct C {
   virtual ~C() {
-// DTORS:      define linkonce_odr x86_thiscallcc i8* @"\01??_GC@basic@@UAEPAXI@Z"(%"struct.basic::C"* %this, i32 %should_call_delete) {{.*}} comdat {{.*}} {
+// DTORS:      define linkonce_odr dso_local x86_thiscallcc i8* @"\01??_GC@basic@@UAEPAXI@Z"(%"struct.basic::C"* %this, i32 %should_call_delete) {{.*}} comdat {{.*}} {
 // DTORS:        store i32 %should_call_delete, i32* %[[SHOULD_DELETE_VAR:[0-9a-z._]+]], align 4
 // DTORS:        store i8* %{{.*}}, i8** %[[RETVAL:[0-9a-z._]+]]
 // DTORS:        %[[SHOULD_DELETE_VALUE:[0-9a-z._]+]] = load i32, i32* %[[SHOULD_DELETE_VAR]]
@@ -81,7 +81,7 @@
 }
 
 void call_complete_dtor(C *obj_ptr) {
-// CHECK: define void @"\01?call_complete_dtor@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
+// CHECK: define dso_local void @"\01?call_complete_dtor@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
   obj_ptr->~C();
 // CHECK: %[[OBJ_PTR_VALUE:.*]] = load %"struct.basic::C"*, %"struct.basic::C"** %{{.*}}, align 4
 // CHECK-NEXT: %[[PVTABLE:.*]] = bitcast %"struct.basic::C"* %[[OBJ_PTR_VALUE]] to i8* (%"struct.basic::C"*, i32)***
@@ -93,7 +93,7 @@
 }
 
 void call_deleting_dtor(C *obj_ptr) {
-// CHECK: define void @"\01?call_deleting_dtor@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
+// CHECK: define dso_local void @"\01?call_deleting_dtor@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
   delete obj_ptr;
 // CHECK:      %[[OBJ_PTR_VALUE:.*]] = load %"struct.basic::C"*, %"struct.basic::C"** %{{.*}}, align 4
 // CHECK:      br i1 {{.*}}, label %[[DELETE_NULL:.*]], label %[[DELETE_NOTNULL:.*]]
@@ -108,7 +108,7 @@
 }
 
 void call_deleting_dtor_and_global_delete(C *obj_ptr) {
-// CHECK: define void @"\01?call_deleting_dtor_and_global_delete@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
+// CHECK: define dso_local void @"\01?call_deleting_dtor_and_global_delete@basic@@YAXPAUC@1@@Z"(%"struct.basic::C"* %obj_ptr)
   ::delete obj_ptr;
 // CHECK:      %[[OBJ_PTR_VALUE:.*]] = load %"struct.basic::C"*, %"struct.basic::C"** %{{.*}}, align 4
 // CHECK:      br i1 {{.*}}, label %[[DELETE_NULL:.*]], label %[[DELETE_NOTNULL:.*]]
@@ -155,7 +155,7 @@
 };
 
 C::~C() {
-// CHECK-LABEL: define x86_thiscallcc void @"\01??1C@dtor_in_second_nvbase@@UAE@XZ"
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1C@dtor_in_second_nvbase@@UAE@XZ"
 // CHECK:       (%"struct.dtor_in_second_nvbase::C"* %this)
 //      No this adjustment!
 // CHECK-NOT: getelementptr
@@ -172,7 +172,7 @@
 void foo() {
   C c;
 }
-// DTORS2-LABEL: define linkonce_odr x86_thiscallcc i8* @"\01??_EC@dtor_in_second_nvbase@@W3AEPAXI@Z"
+// DTORS2-LABEL: define linkonce_odr dso_local x86_thiscallcc i8* @"\01??_EC@dtor_in_second_nvbase@@W3AEPAXI@Z"
 // DTORS2:       (%"struct.dtor_in_second_nvbase::C"* %this, i32 %should_call_delete)
 //      Do an adjustment from B* to C*.
 // DTORS2:   getelementptr i8, i8* %{{.*}}, i32 -4
@@ -196,7 +196,7 @@
 struct F : D, E { ~F(); int f; };
 
 F::~F() {
-// CHECK-LABEL: define x86_thiscallcc void @"\01??1F@test2@@UAE@XZ"(%"struct.test2::F"*{{[^,]*}})
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1F@test2@@UAE@XZ"(%"struct.test2::F"*{{[^,]*}})
 //      Do an adjustment from C vbase subobject to F as though F was the
 //      complete type.
 // CHECK:   getelementptr inbounds i8, i8* %{{.*}}, i32 -20
@@ -207,7 +207,7 @@
 void foo() {
   F f;
 }
-// DTORS3-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_DF@test2@@QAEXXZ"({{.*}} {{.*}} comdat
+// DTORS3-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01??_DF@test2@@QAEXXZ"({{.*}} {{.*}} comdat
 //      Do an adjustment from C* to F*.
 // DTORS3:   getelementptr i8, i8* %{{.*}}, i32 20
 // DTORS3:   bitcast i8* %{{.*}} to %"struct.test2::F"*
@@ -228,7 +228,7 @@
 };
 
 B::B() {
-  // CHECK: define x86_thiscallcc %"struct.constructors::B"* @"\01??0B@constructors@@QAE@XZ"(%"struct.constructors::B"* returned %this)
+  // CHECK: define dso_local x86_thiscallcc %"struct.constructors::B"* @"\01??0B@constructors@@QAE@XZ"(%"struct.constructors::B"* returned %this)
   // CHECK: call x86_thiscallcc %"struct.constructors::A"* @"\01??0A@constructors@@QAE@XZ"(%"struct.constructors::A"* %{{.*}})
   // CHECK: ret
 }
@@ -238,7 +238,7 @@
 };
 
 C::C() {
-  // CHECK: define x86_thiscallcc %"struct.constructors::C"* @"\01??0C@constructors@@QAE@XZ"(%"struct.constructors::C"* returned %this, i32 %is_most_derived)
+  // CHECK: define dso_local x86_thiscallcc %"struct.constructors::C"* @"\01??0C@constructors@@QAE@XZ"(%"struct.constructors::C"* returned %this, i32 %is_most_derived)
   // TODO: make sure this works in the Release build too;
   // CHECK: store i32 %is_most_derived, i32* %[[IS_MOST_DERIVED_VAR:.*]], align 4
   // CHECK: %[[IS_MOST_DERIVED_VAL:.*]] = load i32, i32* %[[IS_MOST_DERIVED_VAR]]
@@ -264,7 +264,7 @@
 
 void create_C() {
   C c;
-  // CHECK: define void @"\01?create_C@constructors@@YAXXZ"()
+  // CHECK: define dso_local void @"\01?create_C@constructors@@YAXXZ"()
   // CHECK: call x86_thiscallcc %"struct.constructors::C"* @"\01??0C@constructors@@QAE@XZ"(%"struct.constructors::C"* %c, i32 1)
   // CHECK: ret
 }
@@ -274,7 +274,7 @@
 };
 
 D::D() {
-  // CHECK: define x86_thiscallcc %"struct.constructors::D"* @"\01??0D@constructors@@QAE@XZ"(%"struct.constructors::D"* returned %this, i32 %is_most_derived) unnamed_addr
+  // CHECK: define dso_local x86_thiscallcc %"struct.constructors::D"* @"\01??0D@constructors@@QAE@XZ"(%"struct.constructors::D"* returned %this, i32 %is_most_derived) unnamed_addr
   // CHECK: store i32 %is_most_derived, i32* %[[IS_MOST_DERIVED_VAR:.*]], align 4
   // CHECK: %[[IS_MOST_DERIVED_VAL:.*]] = load i32, i32* %[[IS_MOST_DERIVED_VAR]]
   // CHECK: %[[SHOULD_CALL_VBASE_CTORS:.*]] = icmp ne i32 %[[IS_MOST_DERIVED_VAL]], 0
@@ -301,7 +301,7 @@
 };
 
 E::E() {
-  // CHECK: define x86_thiscallcc %"struct.constructors::E"* @"\01??0E@constructors@@QAE@XZ"(%"struct.constructors::E"* returned %this, i32 %is_most_derived) unnamed_addr
+  // CHECK: define dso_local x86_thiscallcc %"struct.constructors::E"* @"\01??0E@constructors@@QAE@XZ"(%"struct.constructors::E"* returned %this, i32 %is_most_derived) unnamed_addr
   // CHECK: store i32 %is_most_derived, i32* %[[IS_MOST_DERIVED_VAR:.*]], align 4
   // CHECK: %[[IS_MOST_DERIVED_VAL:.*]] = load i32, i32* %[[IS_MOST_DERIVED_VAR]]
   // CHECK: %[[SHOULD_CALL_VBASE_CTORS:.*]] = icmp ne i32 %[[IS_MOST_DERIVED_VAL]], 0
@@ -333,7 +333,7 @@
 };
 
 F::F() {}
-// CHECK: define x86_thiscallcc %"struct.constructors::F"* @"\01??0F@constructors@@QAE@XZ"
+// CHECK: define dso_local x86_thiscallcc %"struct.constructors::F"* @"\01??0F@constructors@@QAE@XZ"
 
 } // end namespace constructors
 
@@ -345,12 +345,12 @@
 
 void call_nv_complete(A *a) {
   a->~A();
-// CHECK: define void @"\01?call_nv_complete@dtors@@YAXPAUA@1@@Z"
+// CHECK: define dso_local void @"\01?call_nv_complete@dtors@@YAXPAUA@1@@Z"
 // CHECK: call x86_thiscallcc void @"\01??1A@dtors@@QAE@XZ"
 // CHECK: ret
 }
 
-// CHECK: declare x86_thiscallcc void @"\01??1A@dtors@@QAE@XZ"
+// CHECK: declare dso_local x86_thiscallcc void @"\01??1A@dtors@@QAE@XZ"
 
 // Now try some virtual bases, where we need the complete dtor.
 
@@ -360,13 +360,13 @@
 
 void call_vbase_complete(D *d) {
   d->~D();
-// CHECK: define void @"\01?call_vbase_complete@dtors@@YAXPAUD@1@@Z"
+// CHECK: define dso_local void @"\01?call_vbase_complete@dtors@@YAXPAUD@1@@Z"
 // CHECK: call x86_thiscallcc void @"\01??_DD@dtors@@QAEXXZ"(%"struct.dtors::D"* %{{[^,]+}})
 // CHECK: ret
 }
 
 // The complete dtor should call the base dtors for D and the vbase A (once).
-// CHECK: define linkonce_odr x86_thiscallcc void @"\01??_DD@dtors@@QAEXXZ"({{.*}}) {{.*}} comdat
+// CHECK: define linkonce_odr dso_local x86_thiscallcc void @"\01??_DD@dtors@@QAEXXZ"({{.*}}) {{.*}} comdat
 // CHECK-NOT: call
 // CHECK: call x86_thiscallcc void @"\01??1D@dtors@@QAE@XZ"
 // CHECK-NOT: call
@@ -376,7 +376,7 @@
 
 void destroy_d_complete() {
   D d;
-// CHECK: define void @"\01?destroy_d_complete@dtors@@YAXXZ"
+// CHECK: define dso_local void @"\01?destroy_d_complete@dtors@@YAXXZ"
 // CHECK: call x86_thiscallcc void @"\01??_DD@dtors@@QAEXXZ"(%"struct.dtors::D"* %{{[^,]+}})
 // CHECK: ret
 }
@@ -386,7 +386,7 @@
 // a vftable.
 void call_nv_deleting_dtor(D *d) {
   delete d;
-// CHECK: define void @"\01?call_nv_deleting_dtor@dtors@@YAXPAUD@1@@Z"
+// CHECK: define dso_local void @"\01?call_nv_deleting_dtor@dtors@@YAXPAUD@1@@Z"
 // CHECK: call x86_thiscallcc void @"\01??_DD@dtors@@QAEXXZ"(%"struct.dtors::D"* %{{[^,]+}})
 // CHECK: call void @"\01??3@YAXPAX@Z"
 // CHECK: ret
@@ -404,11 +404,11 @@
 B::B(int *a) {}
 B::B(const char *a, ...) {}
 B::B(short *a) {}
-// CHECK: define x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAH@Z"
+// CHECK: define dso_local x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAH@Z"
 // CHECK:               (%"struct.test1::B"* returned %this, i32* %a, i32 %is_most_derived)
-// CHECK: define %"struct.test1::B"* @"\01??0B@test1@@QAA@PBDZZ"
+// CHECK: define dso_local %"struct.test1::B"* @"\01??0B@test1@@QAA@PBDZZ"
 // CHECK:               (%"struct.test1::B"* returned %this, i32 %is_most_derived, i8* %a, ...)
-// CHECK: define x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAF@Z"
+// CHECK: define dso_local x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAF@Z"
 // CHECK:               (%"struct.test1::B"* returned %this, i16* %a, i32 %is_most_derived)
 
 void construct_b() {
@@ -416,7 +416,7 @@
   B b1(&a);
   B b2("%d %d", 1, 2);
 }
-// CHECK-LABEL: define void @"\01?construct_b@test1@@YAXXZ"()
+// CHECK-LABEL: define dso_local void @"\01?construct_b@test1@@YAXXZ"()
 // CHECK: call x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAH@Z"
 // CHECK:               (%"struct.test1::B"* {{.*}}, i32* {{.*}}, i32 1)
 // CHECK: call %"struct.test1::B"* (%"struct.test1::B"*, i32, i8*, ...) @"\01??0B@test1@@QAA@PBDZZ"
@@ -451,7 +451,7 @@
 };
 X::X(int) : X() {}
 }
-// CHECK: define x86_thiscallcc %"struct.delegating_ctor::X"* @"\01??0X@delegating_ctor@@QAE@H@Z"(
+// CHECK: define dso_local x86_thiscallcc %"struct.delegating_ctor::X"* @"\01??0X@delegating_ctor@@QAE@H@Z"(
 // CHECK:  %[[is_most_derived_addr:.*]] = alloca i32, align 4
 // CHECK:  store i32 %is_most_derived, i32* %[[is_most_derived_addr]]
 // CHECK:  %[[is_most_derived:.*]] = load i32, i32* %[[is_most_derived_addr]]
@@ -477,9 +477,9 @@
 class G {
  public:
   __stdcall G() {};
-// DTORS4: define linkonce_odr x86_thiscallcc %class.G* @"\01??0G@@QAE@XZ"
+// DTORS4: define linkonce_odr dso_local x86_thiscallcc %class.G* @"\01??0G@@QAE@XZ"
   __stdcall ~G() {};
-// DTORS4: define linkonce_odr x86_thiscallcc void @"\01??1G@@QAE@XZ"
+// DTORS4: define linkonce_odr dso_local x86_thiscallcc void @"\01??1G@@QAE@XZ"
 };
 
 extern void testG() {
diff --git a/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp b/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
index 3f53e63..767286e 100644
--- a/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
+++ b/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
@@ -6,14 +6,14 @@
   ~S();
 };
 
-// CHECK-DAG: @"\01?s@?1??f@@YAAAUS@@XZ@4U2@A" = linkonce_odr thread_local global %struct.S zeroinitializer
+// CHECK-DAG: @"\01?s@?1??f@@YAAAUS@@XZ@4U2@A" = linkonce_odr dso_local thread_local global %struct.S zeroinitializer
 // CHECK-DAG: @"\01??__J?1??f@@YAAAUS@@XZ@51" = linkonce_odr thread_local global i32 0
-// CHECK-DAG: @"\01?s@?1??g@@YAAAUS@@XZ@4U2@A" = linkonce_odr global %struct.S zeroinitializer
+// CHECK-DAG: @"\01?s@?1??g@@YAAAUS@@XZ@4U2@A" = linkonce_odr dso_local global %struct.S zeroinitializer
 // CHECK-DAG: @"\01?$TSS0@?1??g@@YAAAUS@@XZ@4HA" = linkonce_odr global i32 0
 // CHECK-DAG: @_Init_thread_epoch = external thread_local global i32, align 4
-// CHECK-DAG: @"\01?j@?1??h@@YAAAUS@@_N@Z@4U2@A" = linkonce_odr thread_local global %struct.S zeroinitializer
+// CHECK-DAG: @"\01?j@?1??h@@YAAAUS@@_N@Z@4U2@A" = linkonce_odr dso_local thread_local global %struct.S zeroinitializer
 // CHECK-DAG: @"\01??__J?1??h@@YAAAUS@@_N@Z@51" = linkonce_odr thread_local global i32 0
-// CHECK-DAG: @"\01?i@?1??h@@YAAAUS@@_N@Z@4U2@A" = linkonce_odr global %struct.S zeroinitializer
+// CHECK-DAG: @"\01?i@?1??h@@YAAAUS@@_N@Z@4U2@A" = linkonce_odr dso_local global %struct.S zeroinitializer
 // CHECK-DAG: @"\01?$TSS0@?1??h@@YAAAUS@@_N@Z@4HA" = linkonce_odr global i32 0
 // CHECK-DAG: @"\01?i@?1??g1@@YAHXZ@4HA" = internal global i32 0, align 4
 // CHECK-DAG: @"\01?$TSS0@?1??g1@@YAHXZ@4HA" = internal global i32 0, align 4
@@ -89,7 +89,7 @@
   return b ? j : i;
 }
 
-// CHECK-LABEL: define i32 @"\01?g1@@YAHXZ"()
+// CHECK-LABEL: define dso_local i32 @"\01?g1@@YAHXZ"()
 int f1();
 int g1() {
   static int i = f1();
diff --git a/test/CodeGenCXX/microsoft-abi-thunks.cpp b/test/CodeGenCXX/microsoft-abi-thunks.cpp
index 8230334..e6fb45d 100644
--- a/test/CodeGenCXX/microsoft-abi-thunks.cpp
+++ b/test/CodeGenCXX/microsoft-abi-thunks.cpp
@@ -61,13 +61,13 @@
 
 C::C() {}  // Emits vftable and forces thunk generation.
 
-// CODEGEN-LABEL: define linkonce_odr x86_thiscallcc i8* @"\01??_EC@@W3AEPAXI@Z"(%struct.C* %this, i32 %should_call_delete) {{.*}} comdat
+// CODEGEN-LABEL: define linkonce_odr dso_local x86_thiscallcc i8* @"\01??_EC@@W3AEPAXI@Z"(%struct.C* %this, i32 %should_call_delete) {{.*}} comdat
 // CODEGEN:   getelementptr i8, i8* {{.*}}, i32 -4
 // FIXME: should actually call _EC, not _GC.
 // CODEGEN:   call x86_thiscallcc i8* @"\01??_GC@@UAEPAXI@Z"
 // CODEGEN: ret
 
-// CODEGEN-LABEL: define linkonce_odr x86_thiscallcc void @"\01?public_f@C@@W3AEXXZ"(%struct.C*
+// CODEGEN-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01?public_f@C@@W3AEXXZ"(%struct.C*
 // CODEGEN:   getelementptr i8, i8* {{.*}}, i32 -4
 // CODEGEN:   call x86_thiscallcc void @"\01?public_f@C@@UAEXXZ"(%struct.C*
 // CODEGEN: ret
@@ -91,7 +91,7 @@
 
 E::E() {}  // Emits vftable and forces thunk generation.
 
-// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.C* @"\01?goo@E@@QAEPAUB@@XZ"{{.*}} comdat
+// CODEGEN-LABEL: define weak_odr dso_local x86_thiscallcc %struct.C* @"\01?goo@E@@QAEPAUB@@XZ"{{.*}} comdat
 // CODEGEN:   call x86_thiscallcc %struct.C* @"\01?goo@E@@UAEPAUC@@XZ"
 // CODEGEN:   getelementptr inbounds i8, i8* {{.*}}, i32 4
 // CODEGEN: ret
@@ -124,7 +124,7 @@
 
 I::I() {}  // Emits vftable and forces thunk generation.
 
-// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.{{[BF]}}* @"\01?goo@I@@QAEPAUB@@XZ"{{.*}} comdat
+// CODEGEN-LABEL: define weak_odr dso_local x86_thiscallcc %struct.{{[BF]}}* @"\01?goo@I@@QAEPAUB@@XZ"{{.*}} comdat
 // CODEGEN: %[[ORIG_RET:.*]] = call x86_thiscallcc %struct.F* @"\01?goo@I@@UAEPAUF@@XZ"
 // CODEGEN: %[[ORIG_RET_i8:.*]] = bitcast %struct.F* %[[ORIG_RET]] to i8*
 // CODEGEN: %[[VBPTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ORIG_RET_i8]], i32 4
diff --git a/test/CodeGenCXX/microsoft-abi-try-throw.cpp b/test/CodeGenCXX/microsoft-abi-try-throw.cpp
index bf1834e..caf68b6 100644
--- a/test/CodeGenCXX/microsoft-abi-try-throw.cpp
+++ b/test/CodeGenCXX/microsoft-abi-try-throw.cpp
@@ -33,7 +33,7 @@
 }
 
 #ifdef TRY
-// TRY-LABEL: define void @"\01?qual_catch@@YAXXZ"
+// TRY-LABEL: define dso_local void @"\01?qual_catch@@YAXXZ"
 void qual_catch() {
   try {
     external();
diff --git a/test/CodeGenCXX/microsoft-abi-typeid.cpp b/test/CodeGenCXX/microsoft-abi-typeid.cpp
index d73f848..3152c31 100644
--- a/test/CodeGenCXX/microsoft-abi-typeid.cpp
+++ b/test/CodeGenCXX/microsoft-abi-typeid.cpp
@@ -12,19 +12,19 @@
 A* fn();
 
 const std::type_info* test0_typeid() { return &typeid(int); }
-// CHECK-LABEL: define %struct.type_info* @"\01?test0_typeid@@YAPBUtype_info@@XZ"()
+// CHECK-LABEL: define dso_local %struct.type_info* @"\01?test0_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to %struct.type_info*)
 
 const std::type_info* test1_typeid() { return &typeid(A); }
-// CHECK-LABEL: define %struct.type_info* @"\01?test1_typeid@@YAPBUtype_info@@XZ"()
+// CHECK-LABEL: define dso_local %struct.type_info* @"\01?test1_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8" to %struct.type_info*)
 
 const std::type_info* test2_typeid() { return &typeid(&a); }
-// CHECK-LABEL: define %struct.type_info* @"\01?test2_typeid@@YAPBUtype_info@@XZ"()
+// CHECK-LABEL: define dso_local %struct.type_info* @"\01?test2_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor7* @"\01??_R0PAUA@@@8" to %struct.type_info*)
 
 const std::type_info* test3_typeid() { return &typeid(*fn()); }
-// CHECK-LABEL: define %struct.type_info* @"\01?test3_typeid@@YAPBUtype_info@@XZ"()
+// CHECK-LABEL: define dso_local %struct.type_info* @"\01?test3_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:        [[CALL:%.*]] = tail call %struct.A* @"\01?fn@@YAPAUA@@XZ"()
 // CHECK-NEXT:   [[CMP:%.*]] = icmp eq %struct.A* [[CALL]], null
 // CHECK-NEXT:   br i1 [[CMP]]
@@ -41,11 +41,11 @@
 // CHECK-NEXT:   ret %struct.type_info* [[RET]]
 
 const std::type_info* test4_typeid() { return &typeid(b); }
-// CHECK: define %struct.type_info* @"\01?test4_typeid@@YAPBUtype_info@@XZ"()
+// CHECK: define dso_local %struct.type_info* @"\01?test4_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:   ret %struct.type_info* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to %struct.type_info*)
 
 const std::type_info* test5_typeid() { return &typeid(v); }
-// CHECK: define %struct.type_info* @"\01?test5_typeid@@YAPBUtype_info@@XZ"()
+// CHECK: define dso_local %struct.type_info* @"\01?test5_typeid@@YAPBUtype_info@@XZ"()
 // CHECK:        [[RT:%.*]] = tail call i8* @__RTtypeid(i8* bitcast (%struct.V* @"\01?v@@3UV@@A" to i8*))
 // CHECK-NEXT:   [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info*
 // CHECK-NEXT:   ret %struct.type_info* [[RET]]
@@ -63,7 +63,7 @@
     typeid(poly);
   }
 }
-// CHECK-LABEL: define void @"\01?f@PR26329@@YAXABUPolymorphic@1@@Z"(
+// CHECK-LABEL: define dso_local void @"\01?f@PR26329@@YAXABUPolymorphic@1@@Z"(
 // CHECK: %[[cs:.*]] = catchswitch within none [label %{{.*}}] unwind to caller
 // CHECK: %[[cp:.*]] = catchpad within %[[cs]] [i8* null, i32 64, i8* null]
 // CHECK: invoke i8* @__RTtypeid(i8* {{.*}}) [ "funclet"(token %[[cp]]) ]
diff --git a/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp b/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
index bb73f87..bb18d99 100644
--- a/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
+++ b/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
@@ -23,7 +23,9 @@
 
 D::D() {}  // Forces vftable emission.
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@D@@$4PPPPPPPM@A@AEXXZ"
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@D@@$4PPPPPPPM@A@AEXXZ"
+// Note that the vtordisp is applied before really adjusting to D*.
+// CHECK: %[[COERCE_LOAD:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.D* %[[ECX]] to i8*
 // CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -4
@@ -34,7 +36,8 @@
 // CHECK: call x86_thiscallcc void @"\01?f@D@@UAEXXZ"(i8* %[[ADJUSTED_i8]])
 // CHECK: ret void
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@D@@$4PPPPPPPI@3AEXXZ"
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@D@@$4PPPPPPPI@3AEXXZ"
+// CHECK: %[[COERCE_LOAD:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.D* %[[ECX]] to i8*
 // CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -8
@@ -63,7 +66,8 @@
 
 G::G() {}  // Forces vftable emission.
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@E@@$R4BA@M@PPPPPPPM@7AEXXZ"(i8*)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@E@@$R4BA@M@PPPPPPPM@7AEXXZ"(i8*
+// CHECK: %[[COERCE_LOAD:.*]] = load %struct.E*, %struct.E** %{{.*}}
 // CHECK: %[[ECX:.*]] = load %struct.E*, %struct.E** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.E* %[[ECX]] to i8*
 // CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -4
diff --git a/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
index 20ecaf4..394d967 100644
--- a/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 %s -fno-rtti -triple=i386-pc-win32 -emit-llvm -o %t
+// RUN: %clang_cc1 %s -fno-rtti -std=c++11 -Wno-inaccessible-base -triple=i386-pc-win32 -emit-llvm -o %t
 // RUN: FileCheck %s < %t
 // RUN: FileCheck --check-prefix=CHECK2 %s < %t
 
 // For now, just make sure x86_64 doesn't crash.
-// RUN: %clang_cc1 %s -fno-rtti -triple=x86_64-pc-win32 -emit-llvm -o %t
+// RUN: %clang_cc1 %s -fno-rtti -std=c++11 -Wno-inaccessible-base -triple=x86_64-pc-win32 -emit-llvm -o %t
 
 struct VBase {
   virtual ~VBase();
@@ -20,7 +20,7 @@
 };
 
 B::B() {
-  // CHECK-LABEL: define x86_thiscallcc %struct.B* @"\01??0B@@QAE@XZ"
+  // CHECK-LABEL: define dso_local x86_thiscallcc %struct.B* @"\01??0B@@QAE@XZ"
   // CHECK:   %[[THIS:.*]] = load %struct.B*, %struct.B**
   // CHECK:   br i1 %{{.*}}, label %[[INIT_VBASES:.*]], label %[[SKIP_VBASES:.*]]
 
@@ -51,13 +51,15 @@
 }
 
 B::~B() {
-  // CHECK-LABEL: define x86_thiscallcc void @"\01??1B@@UAE@XZ"
-  // Adjust the this parameter:
-  // CHECK:   %[[THIS_PARAM_i8:.*]] = bitcast %struct.B* {{.*}} to i8*
-  // CHECK:   %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_PARAM_i8]], i32 -8
-  // CHECK:   %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %struct.B*
-  // CHECK:   store %struct.B* %[[THIS]], %struct.B** %[[THIS_ADDR:.*]], align 4
-  // CHECK:   %[[THIS:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
+  // CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1B@@UAE@XZ"
+  // Store initial this:
+  // CHECK:   %[[THIS_ADDR:.*]] = alloca %struct.B*
+  // CHECK:   store %struct.B* %{{.*}}, %struct.B** %[[THIS_ADDR]], align 4
+  // Reload and adjust the this parameter:
+  // CHECK:   %[[THIS_RELOAD:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
+  // CHECK:   %[[THIS_UNADJ_i8:.*]] = bitcast %struct.B* %[[THIS_RELOAD]] to i8*
+  // CHECK:   %[[THIS_ADJ_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_UNADJ_i8]], i32 -8
+  // CHECK:   %[[THIS:.*]] = bitcast i8* %[[THIS_ADJ_i8]] to %struct.B*
 
   // Restore the vfptr that could have been changed by a subclass.
   // CHECK:   %[[THIS_i8:.*]] = bitcast %struct.B* %[[THIS]] to i8*
@@ -84,7 +86,7 @@
 
   // CHECK: ret
 
-  // CHECK2-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_DB@@QAEXXZ"(%struct.B*
+  // CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"\01??_DB@@QAEXXZ"(%struct.B*
   // CHECK2: %[[THIS:.*]] = load %struct.B*, %struct.B** {{.*}}
   // CHECK2: %[[THIS_i8:.*]] = bitcast %struct.B* %[[THIS]] to i8*
   // CHECK2: %[[B_i8:.*]] = getelementptr i8, i8* %[[THIS_i8]], i32 8
@@ -96,30 +98,40 @@
   // CHECK2: call x86_thiscallcc void @"\01??1VBase@@UAE@XZ"(%struct.VBase* %[[VBASE]])
   // CHECK2: ret
 
-  // CHECK2-LABEL: define linkonce_odr x86_thiscallcc i8* @"\01??_GB@@UAEPAXI@Z"
-  // CHECK2:   %[[THIS_PARAM_i8:.*]] = bitcast %struct.B* {{.*}} to i8*
+  // CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc i8* @"\01??_GB@@UAEPAXI@Z"
+  // CHECK2:   store %struct.B* %{{.*}}, %struct.B** %[[THIS_ADDR:.*]], align 4
+  // CHECK2:   %[[THIS:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
+  // CHECK2:   %[[THIS_PARAM_i8:.*]] = bitcast %struct.B* %[[THIS]] to i8*
   // CHECK2:   %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_PARAM_i8:.*]], i32 -8
   // CHECK2:   %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %struct.B*
-  // CHECK2:   store %struct.B* %[[THIS]], %struct.B** %[[THIS_ADDR:.*]], align 4
-  // CHECK2:   %[[THIS:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
   // CHECK2:   call x86_thiscallcc void @"\01??_DB@@QAEXXZ"(%struct.B* %[[THIS]])
   // ...
   // CHECK2: ret
 }
 
 void B::foo() {
-// CHECK-LABEL: define x86_thiscallcc void @"\01?foo@B@@UAEXXZ"(i8*
+// CHECK-LABEL: define dso_local x86_thiscallcc void @"\01?foo@B@@UAEXXZ"(i8*
 //
 // B::foo gets 'this' cast to VBase* in ECX (i.e. this+8) so we
 // need to adjust 'this' before use.
 //
-// CHECK: %[[THIS_ADDR:.*]] = alloca %struct.B*, align 4
-// CHECK: %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX:.*]], i32 -8
-// CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %struct.B*
-// CHECK: store %struct.B* %[[THIS]], %struct.B** %[[THIS_ADDR]], align 4
+// Coerce this to correct type:
+// CHECK:   %[[THIS_STORE:.*]] = alloca %struct.B*
+// CHECK:   %[[THIS_ADDR:.*]] = alloca %struct.B*
+// CHECK:   %[[COERCE_VAL:.*]] = bitcast i8* %{{.*}} to %struct.B*
+// CHECK:   store %struct.B* %[[COERCE_VAL]], %struct.B** %[[THIS_STORE]], align 4
+//
+// Store initial this:
+// CHECK:   %[[THIS_INIT:.*]] = load %struct.B*, %struct.B** %[[THIS_STORE]]
+// CHECK:   store %struct.B* %[[THIS_INIT]], %struct.B** %[[THIS_ADDR]], align 4
+//
+// Reload and adjust the this parameter:
+// CHECK:   %[[THIS_RELOAD:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
+// CHECK:   %[[THIS_UNADJ_i8:.*]] = bitcast %struct.B* %[[THIS_RELOAD]] to i8*
+// CHECK:   %[[THIS_ADJ_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_UNADJ_i8]], i32 -8
+// CHECK:   %[[THIS:.*]] = bitcast i8* %[[THIS_ADJ_i8]] to %struct.B*
 
   field = 42;
-// CHECK: %[[THIS:.*]] = load %struct.B*, %struct.B** %[[THIS_ADDR]]
 // CHECK: %[[THIS8:.*]] = bitcast %struct.B* %[[THIS]] to i8*
 // CHECK: %[[VBPTR:.*]] = getelementptr inbounds i8, i8* %[[THIS8]], i32 0
 // CHECK: %[[VBPTR8:.*]] = bitcast i8* %[[VBPTR]] to i32**
@@ -137,7 +149,7 @@
 }
 
 void call_vbase_bar(B *obj) {
-// CHECK-LABEL: define void @"\01?call_vbase_bar@@YAXPAUB@@@Z"(%struct.B* %obj)
+// CHECK-LABEL: define dso_local void @"\01?call_vbase_bar@@YAXPAUB@@@Z"(%struct.B* %obj)
 // CHECK: %[[OBJ:.*]] = load %struct.B
 
   obj->bar();
@@ -151,12 +163,31 @@
 // CHECK: %[[VBENTRY:.*]] = getelementptr inbounds i32, i32* %[[VBTABLE]], i32 1
 // CHECK: %[[VBOFFSET32:.*]] = load i32, i32* %[[VBENTRY]]
 // CHECK: %[[VBOFFSET:.*]] = add nsw i32 0, %[[VBOFFSET32]]
+// CHECK: %[[VBASE:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 %[[VBOFFSET]]
+//
+// CHECK: %[[OBJ_i8:.*]] = bitcast %struct.B* %[[OBJ]] to i8*
+// CHECK: %[[VBPTR:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 0
+// CHECK: %[[VBPTR8:.*]] = bitcast i8* %[[VBPTR]] to i32**
+// CHECK: %[[VBTABLE:.*]] = load i32*, i32** %[[VBPTR8]]
+// CHECK: %[[VBENTRY:.*]] = getelementptr inbounds i32, i32* %[[VBTABLE]], i32 1
+// CHECK: %[[VBOFFSET32:.*]] = load i32, i32* %[[VBENTRY]]
+// CHECK: %[[VBOFFSET:.*]] = add nsw i32 0, %[[VBOFFSET32]]
 // CHECK: %[[VBASE_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 %[[VBOFFSET]]
 // CHECK: %[[VFPTR:.*]] = bitcast i8* %[[VBASE_i8]] to void (i8*)***
 // CHECK: %[[VFTABLE:.*]] = load void (i8*)**, void (i8*)*** %[[VFPTR]]
 // CHECK: %[[VFUN:.*]] = getelementptr inbounds void (i8*)*, void (i8*)** %[[VFTABLE]], i64 2
 // CHECK: %[[VFUN_VALUE:.*]] = load void (i8*)*, void (i8*)** %[[VFUN]]
 //
+// CHECK: call x86_thiscallcc void %[[VFUN_VALUE]](i8* %[[VBASE]])
+//
+// CHECK: ret void
+}
+
+void delete_B(B *obj) {
+// CHECK-LABEL: define dso_local void @"\01?delete_B@@YAXPAUB@@@Z"(%struct.B* %obj)
+// CHECK: %[[OBJ:.*]] = load %struct.B
+
+  delete obj;
 // CHECK: %[[OBJ_i8:.*]] = bitcast %struct.B* %[[OBJ]] to i8*
 // CHECK: %[[VBPTR:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 0
 // CHECK: %[[VBPTR8:.*]] = bitcast i8* %[[VBPTR]] to i32**
@@ -164,18 +195,9 @@
 // CHECK: %[[VBENTRY:.*]] = getelementptr inbounds i32, i32* %[[VBTABLE]], i32 1
 // CHECK: %[[VBOFFSET32:.*]] = load i32, i32* %[[VBENTRY]]
 // CHECK: %[[VBOFFSET:.*]] = add nsw i32 0, %[[VBOFFSET32]]
-// CHECK: %[[VBASE:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 %[[VBOFFSET]]
+// CHECK: %[[VBASE_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 %[[VBOFFSET]]
+// CHECK: %[[VBASE:.*]] = bitcast i8* %[[VBASE_i8]] to %struct.B*
 //
-// CHECK: call x86_thiscallcc void %[[VFUN_VALUE]](i8* %[[VBASE]])
-//
-// CHECK: ret void
-}
-
-void delete_B(B *obj) {
-// CHECK-LABEL: define void @"\01?delete_B@@YAXPAUB@@@Z"(%struct.B* %obj)
-// CHECK: %[[OBJ:.*]] = load %struct.B
-
-  delete obj;
 // CHECK: %[[OBJ_i8:.*]] = bitcast %struct.B* %[[OBJ]] to i8*
 // CHECK: %[[VBPTR:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 0
 // CHECK: %[[VBPTR8:.*]] = bitcast i8* %[[VBPTR]] to i32**
@@ -189,22 +211,12 @@
 // CHECK: %[[VFUN:.*]] = getelementptr inbounds i8* (%struct.B*, i32)*, i8* (%struct.B*, i32)** %[[VFTABLE]], i64 0
 // CHECK: %[[VFUN_VALUE:.*]] = load i8* (%struct.B*, i32)*, i8* (%struct.B*, i32)** %[[VFUN]]
 //
-// CHECK: %[[OBJ_i8:.*]] = bitcast %struct.B* %[[OBJ]] to i8*
-// CHECK: %[[VBPTR:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 0
-// CHECK: %[[VBPTR8:.*]] = bitcast i8* %[[VBPTR]] to i32**
-// CHECK: %[[VBTABLE:.*]] = load i32*, i32** %[[VBPTR8]]
-// CHECK: %[[VBENTRY:.*]] = getelementptr inbounds i32, i32* %[[VBTABLE]], i32 1
-// CHECK: %[[VBOFFSET32:.*]] = load i32, i32* %[[VBENTRY]]
-// CHECK: %[[VBOFFSET:.*]] = add nsw i32 0, %[[VBOFFSET32]]
-// CHECK: %[[VBASE_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 %[[VBOFFSET]]
-// CHECK: %[[VBASE:.*]] = bitcast i8* %[[VBASE_i8]] to %struct.B*
-//
 // CHECK: call x86_thiscallcc i8* %[[VFUN_VALUE]](%struct.B* %[[VBASE]], i32 1)
 // CHECK: ret void
 }
 
 void call_complete_dtor() {
-  // CHECK-LABEL: define void @"\01?call_complete_dtor@@YAXXZ"
+  // CHECK-LABEL: define dso_local void @"\01?call_complete_dtor@@YAXXZ"
   B b;
   // CHECK: call x86_thiscallcc %struct.B* @"\01??0B@@QAE@XZ"(%struct.B* %[[B:.*]], i32 1)
   // CHECK-NOT: getelementptr
@@ -219,7 +231,7 @@
 
 // Used to crash on an assertion.
 C::C() {
-// CHECK-LABEL: define x86_thiscallcc %struct.C* @"\01??0C@@QAE@XZ"
+// CHECK-LABEL: define dso_local x86_thiscallcc %struct.C* @"\01??0C@@QAE@XZ"
 }
 
 namespace multiple_vbases {
@@ -243,7 +255,7 @@
 };
 
 D::D() {
-  // CHECK-LABEL: define x86_thiscallcc %"struct.multiple_vbases::D"* @"\01??0D@multiple_vbases@@QAE@XZ"
+  // CHECK-LABEL: define dso_local x86_thiscallcc %"struct.multiple_vbases::D"* @"\01??0D@multiple_vbases@@QAE@XZ"
   // Just make sure we emit 3 vtordisps after initializing vfptrs.
   // CHECK: store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7D@multiple_vbases@@6BA@1@@" to i32 (...)**), i32 (...)*** %{{.*}}
   // CHECK: store i32 (...)** bitcast ({ [1 x i8*] }* @"\01??_7D@multiple_vbases@@6BB@1@@" to i32 (...)**), i32 (...)*** %{{.*}}
@@ -283,12 +295,17 @@
 } d;
 
 D::~D() {
-  // CHECK-LABEL: define x86_thiscallcc void @"\01??1D@diamond@@UAE@XZ"(%"struct.diamond::D"*{{.*}})
-  // CHECK: %[[ARG_i8:.*]] = bitcast %"struct.diamond::D"* %{{.*}} to i8*
-  // CHECK: %[[THIS_i8:.*]] = getelementptr inbounds i8, i8* %[[ARG_i8]], i32 -24
-  // CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_i8]] to %"struct.diamond::D"*
-  // CHECK: store %"struct.diamond::D"* %[[THIS]], %"struct.diamond::D"** %[[THIS_VAL:.*]], align 4
-  // CHECK: %[[THIS:.*]] = load %"struct.diamond::D"*, %"struct.diamond::D"** %[[THIS_VAL]]
+  // CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1D@diamond@@UAE@XZ"(%"struct.diamond::D"*{{.*}})
+  // Store initial this:
+  // CHECK: %[[THIS_ADDR:.*]] = alloca %"struct.diamond::D"*
+  // CHECK: store %"struct.diamond::D"* %{{.*}}, %"struct.diamond::D"** %[[THIS_ADDR]], align 4
+  //
+  // Reload and adjust the this parameter:
+  // CHECK: %[[THIS_RELOAD:.*]] = load %"struct.diamond::D"*, %"struct.diamond::D"** %[[THIS_ADDR]]
+  // CHECK: %[[THIS_UNADJ_i8:.*]] = bitcast %"struct.diamond::D"* %[[THIS_RELOAD]] to i8*
+  // CHECK: %[[THIS_ADJ_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_UNADJ_i8]], i32 -24
+  // CHECK: %[[THIS:.*]] = bitcast i8* %[[THIS_ADJ_i8]] to %"struct.diamond::D"*
+  //
   // CHECK: %[[D_i8:.*]] = bitcast %"struct.diamond::D"* %[[THIS]] to i8*
   // CHECK: %[[C_i8:.*]] = getelementptr inbounds i8, i8* %[[D_i8]], i32 4
   // CHECK: %[[C:.*]] = bitcast i8* %[[C_i8]] to %"struct.diamond::C"*
@@ -317,7 +334,7 @@
 // call to B() from C().
 void callC() { C x; }
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc %"struct.test2::C"* @"\01??0C@test2@@QAE@XZ"
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc %"struct.test2::C"* @"\01??0C@test2@@QAE@XZ"
 // CHECK:           (%"struct.test2::C"* returned %this, i32 %is_most_derived)
 // CHECK: br i1
 //   Virtual bases
@@ -328,7 +345,7 @@
 // CHECK: call x86_thiscallcc %"struct.test2::A"* @"\01??0A@test2@@QAE@XZ"(%"struct.test2::A"* %{{.*}})
 // CHECK: ret
 
-// CHECK2-LABEL: define linkonce_odr x86_thiscallcc %"struct.test2::B"* @"\01??0B@test2@@QAE@XZ"
+// CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc %"struct.test2::B"* @"\01??0B@test2@@QAE@XZ"
 // CHECK2:           (%"struct.test2::B"* returned %this, i32 %is_most_derived)
 // CHECK2: call x86_thiscallcc %"struct.test2::A"* @"\01??0A@test2@@QAE@XZ"(%"struct.test2::A"* %{{.*}})
 // CHECK2: ret
@@ -357,7 +374,7 @@
 };
 
 void D::bar() {
-  // CHECK-LABEL: define x86_thiscallcc void @"\01?bar@D@test3@@UAEXXZ"(%"struct.test3::D"* %this)
+  // CHECK-LABEL: define dso_local x86_thiscallcc void @"\01?bar@D@test3@@UAEXXZ"(%"struct.test3::D"* %this)
 
   C::foo();
   // Shouldn't need any vbtable lookups.  All we have to do is adjust to C*,
@@ -391,7 +408,7 @@
 void foo(void*);
 
 C::~C() {
-  // CHECK-LABEL: define x86_thiscallcc void @"\01??1C@test4@@UAE@XZ"(%"struct.test4::C"* %this)
+  // CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1C@test4@@UAE@XZ"(%"struct.test4::C"* %this)
 
   // In this case "this" points to the most derived class, so no GEPs needed.
   // CHECK-NOT: getelementptr
@@ -404,7 +421,7 @@
 }
 
 void destroy(C *obj) {
-  // CHECK-LABEL: define void @"\01?destroy@test4@@YAXPAUC@1@@Z"(%"struct.test4::C"* %obj)
+  // CHECK-LABEL: define dso_local void @"\01?destroy@test4@@YAXPAUC@1@@Z"(%"struct.test4::C"* %obj)
 
   delete obj;
   // CHECK: %[[VPTR:.*]] = bitcast %"struct.test4::C"* %[[OBJ:.*]] to i8* (%"struct.test4::C"*, i32)***
@@ -426,7 +443,7 @@
 };
 
 E::~E() {
-  // CHECK-LABEL: define x86_thiscallcc void @"\01??1E@test4@@UAE@XZ"(%"struct.test4::E"* %this)
+  // CHECK-LABEL: define dso_local x86_thiscallcc void @"\01??1E@test4@@UAE@XZ"(%"struct.test4::E"* %this)
 
   // In this case "this" points to the most derived class, so no GEPs needed.
   // CHECK-NOT: getelementptr
@@ -437,19 +454,19 @@
 }
 
 void destroy(E *obj) {
-  // CHECK-LABEL: define void @"\01?destroy@test4@@YAXPAUE@1@@Z"(%"struct.test4::E"* %obj)
+  // CHECK-LABEL: define dso_local void @"\01?destroy@test4@@YAXPAUE@1@@Z"(%"struct.test4::E"* %obj)
 
   // CHECK-NOT: getelementptr
+  // CHECK: %[[OBJ_i8:.*]] = bitcast %"struct.test4::E"* %[[OBJ]] to i8*
+  // CHECK: %[[B_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 4
+  // FIXME: in fact, the call should take i8* and the bitcast is redundant.
+  // CHECK: %[[B_as_E:.*]] = bitcast i8* %[[B_i8]] to %"struct.test4::E"*
   // CHECK: %[[OBJ_i8:.*]] = bitcast %"struct.test4::E"* %[[OBJ:.*]] to i8*
   // CHECK: %[[B_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 4
   // CHECK: %[[VPTR:.*]] = bitcast i8* %[[B_i8]] to i8* (%"struct.test4::E"*, i32)***
   // CHECK: %[[VFTABLE:.*]] = load i8* (%"struct.test4::E"*, i32)**, i8* (%"struct.test4::E"*, i32)*** %[[VPTR]]
   // CHECK: %[[VFTENTRY:.*]] = getelementptr inbounds i8* (%"struct.test4::E"*, i32)*, i8* (%"struct.test4::E"*, i32)** %[[VFTABLE]], i64 0
   // CHECK: %[[VFUN:.*]] = load i8* (%"struct.test4::E"*, i32)*, i8* (%"struct.test4::E"*, i32)** %[[VFTENTRY]]
-  // CHECK: %[[OBJ_i8:.*]] = bitcast %"struct.test4::E"* %[[OBJ]] to i8*
-  // CHECK: %[[B_i8:.*]] = getelementptr inbounds i8, i8* %[[OBJ_i8]], i32 4
-  // FIXME: in fact, the call should take i8* and the bitcast is redundant.
-  // CHECK: %[[B_as_E:.*]] = bitcast i8* %[[B_i8]] to %"struct.test4::E"*
   // CHECK: call x86_thiscallcc i8* %[[VFUN]](%"struct.test4::E"* %[[B_as_E]], i32 1)
   delete obj;
 }
@@ -471,7 +488,7 @@
 };
 
 C::C() : B() {}
-// CHECK-LABEL: define x86_thiscallcc %"struct.test5::C"* @"\01??0C@test5@@QAE@XZ"(
+// CHECK-LABEL: define dso_local x86_thiscallcc %"struct.test5::C"* @"\01??0C@test5@@QAE@XZ"(
 // CHECK:   %[[THIS:.*]] = load %"struct.test5::C"*, %"struct.test5::C"**
 // CHECK:   br i1 %{{.*}}, label %[[INIT_VBASES:.*]], label %[[SKIP_VBASES:.*]]
 
@@ -479,7 +496,7 @@
 // CHECK:   %[[B:.*]] = bitcast %"struct.test5::C"* %[[THIS]] to %"struct.test5::B"*
 // CHECK:   %[[B_i8:.*]] = bitcast %"struct.test5::B"* %[[B]] to i8*
 // CHECK:   %[[FIELD:.*]] = getelementptr inbounds i8, i8* %[[B_i8]], i32 4
-// CHECK:   call void @llvm.memset.p0i8.i32(i8* %[[FIELD]], i8 0, i32 4, i32 4, i1 false)
+// CHECK:   call void @llvm.memset.p0i8.i32(i8* align 4 %[[FIELD]], i8 0, i32 4, i1 false)
 }
 
 namespace pr27621 {
@@ -495,7 +512,7 @@
 void callit(C *p) {
   static_cast<B*>(p)->g();
 }
-// CHECK-LABEL: define void @"\01?callit@pr27621@@YAXPAUC@1@@Z"(%"struct.pr27621::C"* %{{.*}})
+// CHECK-LABEL: define dso_local void @"\01?callit@pr27621@@YAXPAUC@1@@Z"(%"struct.pr27621::C"* %{{.*}})
 // CHECK: %[[B_i8:.*]] = getelementptr i8, i8* %{{.*}}, i32 4
 // CHECK: call x86_thiscallcc void @"\01?g@C@pr27621@@UAEXXZ"(i8* %[[B_i8]])
 }
@@ -511,7 +528,7 @@
   D();
 };
 D::D() : C() {}
-// CHECK-LABEL: define x86_thiscallcc %"class.test6::D"* @"\01??0D@test6@@AAE@XZ"(
+// CHECK-LABEL: define dso_local x86_thiscallcc %"class.test6::D"* @"\01??0D@test6@@AAE@XZ"(
 // CHECK:   %[[THIS:.*]] = load %"class.test6::D"*, %"class.test6::D"**
 // CHECK:   br i1 %{{.*}}, label %[[INIT_VBASES:.*]], label %[[SKIP_VBASES:.*]]
 
@@ -519,5 +536,5 @@
 // CHECK:   %[[C:.*]] = bitcast %"class.test6::D"* %[[THIS]] to %"class.test6::C"*
 // CHECK:   %[[C_i8:.*]] = bitcast %"class.test6::C"* %[[C]] to i8*
 // CHECK:   %[[FIELD:.*]] = getelementptr inbounds i8, i8* %[[C_i8]], i32 8
-// CHECK:   call void @llvm.memset.p0i8.i32(i8* %[[FIELD]], i8 0, i32 4, i32 4, i1 false)
+// CHECK:   call void @llvm.memset.p0i8.i32(i8* align 4 %[[FIELD]], i8 0, i32 4, i1 false)
 }
diff --git a/test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp b/test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp
index af930c8..0c37570 100644
--- a/test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp
+++ b/test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp
@@ -53,14 +53,14 @@
   auto ptr7 = &C::plugh;
 
 
-// CHECK32-LABEL: define void @"\01?f@@YAXXZ"()
+// CHECK32-LABEL: define dso_local void @"\01?f@@YAXXZ"()
 // CHECK32: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$BA@AE" to i8*), i8** %ptr
 // CHECK32: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$B3AE" to i8*), i8** %ptr2
 // CHECK32: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$B7AE" to i8*), i8** %ptr3
 // CHECK32: store i8* bitcast (void (%"struct.(anonymous namespace)::D"*, ...)* @"\01??_9D@?A@@$BA@AE" to i8*), i8** %ptr4
 // CHECK32: }
 //
-// CHECK64-LABEL: define void @"\01?f@@YAXXZ"()
+// CHECK64-LABEL: define dso_local void @"\01?f@@YAXXZ"()
 // CHECK64: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$BA@AA" to i8*), i8** %ptr
 // CHECK64: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$B7AA" to i8*), i8** %ptr2
 // CHECK64: store i8* bitcast (void (%struct.C*, ...)* @"\01??_9C@@$BBA@AA" to i8*), i8** %ptr3
diff --git a/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp b/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
index f8a12e6..058458b 100644
--- a/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
@@ -19,7 +19,7 @@
 }
 }
 
-// CHECK-LABEL: define void @"\01?f@num_params@@YAXPAUC@1@@Z"(%"struct.num_params::C"* %c)
+// CHECK-LABEL: define dso_local void @"\01?f@num_params@@YAXPAUC@1@@Z"(%"struct.num_params::C"* %c)
 // CHECK: call x86_thiscallcc void bitcast (void (%"struct.num_params::C"*, ...)* @"\01??_9C@num_params@@$BA@AE" to void (%"struct.num_params::C"*, i32)*)(%"struct.num_params::C"* %{{.*}}, i32 0)
 // CHECK: call x86_thiscallcc void bitcast (void (%"struct.num_params::C"*, ...)* @"\01??_9C@num_params@@$BA@AE" to void (%"struct.num_params::C"*, i32, i32)*)(%"struct.num_params::C"* %{{.*}}, i32 0, i32 0)
 
@@ -41,7 +41,7 @@
 }
 }
 
-// CHECK-LABEL: define i64 @"\01?f@i64_return@@YA_JPAUC@1@@Z"(%"struct.i64_return::C"* %c)
+// CHECK-LABEL: define dso_local i64 @"\01?f@i64_return@@YA_JPAUC@1@@Z"(%"struct.i64_return::C"* %c)
 // CHECK: call x86_thiscallcc i32 bitcast (void (%"struct.i64_return::C"*, ...)* @"\01??_9C@i64_return@@$BA@AE" to i32 (%"struct.i64_return::C"*)*)(%"struct.i64_return::C"* %{{.*}})
 // CHECK: call x86_thiscallcc i64 bitcast (void (%"struct.i64_return::C"*, ...)* @"\01??_9C@i64_return@@$BA@AE" to i64 (%"struct.i64_return::C"*)*)(%"struct.i64_return::C"* %{{.*}})
 
@@ -63,7 +63,7 @@
 }
 }
 
-// CHECK-LABEL: define void @"\01?f@sret@@YAXPAUC@1@@Z"(%"struct.sret::C"* %c)
+// CHECK-LABEL: define dso_local void @"\01?f@sret@@YAXPAUC@1@@Z"(%"struct.sret::C"* %c)
 // CHECK: call x86_thiscallcc i32 bitcast (void (%"struct.sret::C"*, ...)* @"\01??_9C@sret@@$BA@AE" to i32 (%"struct.sret::C"*)*)(%"struct.sret::C"* %{{.*}})
 // CHECK: call x86_thiscallcc void bitcast (void (%"struct.sret::C"*, ...)* @"\01??_9C@sret@@$BA@AE" to void (%"struct.sret::C"*, %"struct.sret::Big"*)*)(%"struct.sret::C"* %{{.*}}, %"struct.sret::Big"* sret %{{.*}})
 
@@ -92,7 +92,7 @@
 }
 }
 
-// CHECK-LABEL: define void @"\01?f@cdecl_inalloca@@YAXPAUC@1@@Z"(%"struct.cdecl_inalloca::C"* %c)
+// CHECK-LABEL: define dso_local void @"\01?f@cdecl_inalloca@@YAXPAUC@1@@Z"(%"struct.cdecl_inalloca::C"* %c)
 // CHECK: call void bitcast (void (%"struct.cdecl_inalloca::C"*, ...)* @"\01??_9C@cdecl_inalloca@@$BA@AA" to void (%"struct.cdecl_inalloca::C"*)*)(%"struct.cdecl_inalloca::C"* %{{.*}})
 // CHECK: call void bitcast (void (%"struct.cdecl_inalloca::C"*, ...)* @"\01??_9C@cdecl_inalloca@@$BA@AA" to void (<{ %"struct.cdecl_inalloca::C"*, %"struct.cdecl_inalloca::Big" }>*)*)(<{ %"struct.cdecl_inalloca::C"*, %"struct.cdecl_inalloca::Big" }>* inalloca %{{.*}})
 
diff --git a/test/CodeGenCXX/microsoft-abi-vmemptr-vbase.cpp b/test/CodeGenCXX/microsoft-abi-vmemptr-vbase.cpp
index 85cc84f..d23a4a9 100644
--- a/test/CodeGenCXX/microsoft-abi-vmemptr-vbase.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vmemptr-vbase.cpp
@@ -8,5 +8,5 @@
     virtual void f();
 };
 void (B::*MemPtr)(void) = &B::f;
-// CHECK-DAG: @"\01?MemPtr@PR23452@@3P8B@1@AEXXZQ21@" = global { i8*, i32, i32 } { i8* bitcast ({{.*}} @"\01??_9B@PR23452@@$BA@AE" to i8*), i32 0, i32 4 }
+// CHECK-DAG: @"\01?MemPtr@PR23452@@3P8B@1@AEXXZQ21@" = dso_local global { i8*, i32, i32 } { i8* bitcast ({{.*}} @"\01??_9B@PR23452@@$BA@AE" to i8*), i32 0, i32 4 }
 }
diff --git a/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-this-adjustment.cpp b/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-this-adjustment.cpp
index b97c432..7679c55 100644
--- a/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-this-adjustment.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-this-adjustment.cpp
@@ -156,23 +156,17 @@
 
 // BITCODE-LABEL: define {{.*}}\01?ffun@test4@@YAXAAUC@1@@Z
 void ffun(C &c) {
-  // BITCODE: load
-  // BITCODE: bitcast
-  // BITCODE: bitcast
   // BITCODE: [[THIS1:%.+]] = bitcast %"struct.test4::C"* {{.*}} to i8*
   // BITCODE: [[THIS2:%.+]] = getelementptr inbounds i8, i8* [[THIS1]], i32 4
-  // BITCODE-NEXT: call x86_thiscallcc {{.*}}(i8* [[THIS2]])
+  // BITCODE: call x86_thiscallcc {{.*}}(i8* [[THIS2]])
   c.bar();
 }
 
 // BITCODE-LABEL: define {{.*}}\01?fop@test4@@YAXAAUC@1@@Z
 void fop(C &c) {
-  // BITCODE: load
-  // BITCODE: bitcast
-  // BITCODE: bitcast
   // BITCODE: [[THIS1:%.+]] = bitcast %"struct.test4::C"* {{.*}} to i8*
   // BITCODE: [[THIS2:%.+]] = getelementptr inbounds i8, i8* [[THIS1]], i32 4
-  // BITCODE-NEXT: call x86_thiscallcc {{.*}}(i8* [[THIS2]])
+  // BITCODE: call x86_thiscallcc {{.*}}(i8* [[THIS2]])
   -c;
 }
 
@@ -195,8 +189,12 @@
   whatsthis = this;
 }
 
-// BITCODE-LABEL: define void @"\01?g@C@pr30293@@UAAXUNonTrivial@2@@Z"(<{ i8*, %"struct.pr30293::NonTrivial" }>* inalloca)
-// BITCODE: %[[this1:[^ ]*]] = load i8*, i8** %[[thisaddr:[^ ]*]], align 4
-// BITCODE-NEXT: %[[this2:[^ ]*]] = getelementptr inbounds i8, i8* %[[this1]], i32 -4
-// BITCODE-NEXT: store i8* %[[this2]], i8** %[[thisaddr]], align 4
+// BITCODE-LABEL: define dso_local void @"\01?g@C@pr30293@@UAAXUNonTrivial@2@@Z"(<{ i8*, %"struct.pr30293::NonTrivial" }>* inalloca)
+// BITCODE: %[[thisaddr:[^ ]*]] = getelementptr inbounds <{ i8*, %"struct.pr30293::NonTrivial" }>, <{ i8*, %"struct.pr30293::NonTrivial" }>* {{.*}}, i32 0, i32 0
+// BITCODE: %[[thisaddr1:[^ ]*]] = bitcast i8** %[[thisaddr]] to %"struct.pr30293::C"**
+// BITCODE: %[[this1:[^ ]*]] = load %"struct.pr30293::C"*, %"struct.pr30293::C"** %[[thisaddr1]], align 4
+// BITCODE: %[[this2:[^ ]*]] = bitcast %"struct.pr30293::C"* %[[this1]] to i8*
+// BITCODE: %[[this3:[^ ]*]] = getelementptr inbounds i8, i8* %[[this2]], i32 -4
+// BITCODE: %[[this4:[^ ]*]] = bitcast i8* %[[this3]] to %"struct.pr30293::C"*
+// BITCODE: store %"struct.pr30293::C"* %[[this4]], %"struct.pr30293::C"** @"\01?whatsthis@pr30293@@3PAUC@1@A", align 4
 }
diff --git a/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp
index 342a584..3633d7c 100644
--- a/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp
@@ -145,7 +145,7 @@
   // MANGLING-DAG: @"\01??_7X@Test4@@6B@"
 
   // Also check the mangling of the thunk.
-  // MANGLING-DAG: define linkonce_odr x86_thiscallcc void @"\01?f@C@@WPPPPPPPI@AEXXZ"
+  // MANGLING-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01?f@C@@WPPPPPPPI@AEXXZ"
 };
 
 X::X() {}
diff --git a/test/CodeGenCXX/microsoft-compatibility.cpp b/test/CodeGenCXX/microsoft-compatibility.cpp
index 3676024..0fd2e4d 100644
--- a/test/CodeGenCXX/microsoft-compatibility.cpp
+++ b/test/CodeGenCXX/microsoft-compatibility.cpp
@@ -8,7 +8,7 @@
 template <>
 const int S<char>::x[] = {1};
 
-// CHECK-LABEL: @"\01?x@?$S@D@@2QBHB" = weak_odr constant [1 x i32] [i32 1], comdat
+// CHECK-LABEL: @"\01?x@?$S@D@@2QBHB" = weak_odr dso_local constant [1 x i32] [i32 1], comdat
 
 template<class T>
 void destroy(T *p) {
@@ -20,11 +20,11 @@
   destroy((void*)&a);
 }
 
-// CHECK-LABEL: define void @f()
+// CHECK-LABEL: define dso_local void @f()
 // CHECK: call void @"\01??$destroy@X@@YAXPAX@Z"
 // CHECK: ret void
 
-// CHECK-LABEL: define linkonce_odr void @"\01??$destroy@X@@YAXPAX@Z"(i8* %p)
+// CHECK-LABEL: define linkonce_odr dso_local void @"\01??$destroy@X@@YAXPAX@Z"(i8* %p)
 //    The pseudo-dtor expr should not generate calls to anything.
 // CHECK-NOT: call
 // CHECK-NOT: invoke
diff --git a/test/CodeGenCXX/microsoft-inaccessible-base.cpp b/test/CodeGenCXX/microsoft-inaccessible-base.cpp
new file mode 100644
index 0000000..d8af8ef
--- /dev/null
+++ b/test/CodeGenCXX/microsoft-inaccessible-base.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -fms-compatibility -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
+
+// Make sure we choose the *direct* base path when doing these conversions.
+
+// CHECK: %struct.C = type { %struct.A, %struct.B }
+// CHECK: %struct.D = type { %struct.B, %struct.A }
+
+struct A { int a; };
+struct B : A { int b; };
+
+struct C : A, B { };
+extern "C" A *a_from_c(C *p) { return p; }
+// CHECK-LABEL: define dso_local %struct.A* @a_from_c(%struct.C* %{{.*}})
+// CHECK: bitcast %struct.C* %{{.*}} to %struct.A*
+
+struct D : B, A { };
+extern "C" A *a_from_d(D *p) { return p; }
+// CHECK-LABEL: define dso_local %struct.A* @a_from_d(%struct.D* %{{.*}})
+// CHECK: %[[p_i8:[^ ]*]] = bitcast %struct.D* %{{.*}} to i8*
+// CHECK: getelementptr inbounds i8, i8* %[[p_i8]], i64 8
diff --git a/test/CodeGenCXX/microsoft-interface.cpp b/test/CodeGenCXX/microsoft-interface.cpp
index 5f3a94a..74a5209 100644
--- a/test/CodeGenCXX/microsoft-interface.cpp
+++ b/test/CodeGenCXX/microsoft-interface.cpp
@@ -17,24 +17,24 @@
   return s.test();
 }
 
-// CHECK: @_ZTV1S = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1S to i8*), i8* bitcast (i32 (%struct.S*)* @_ZN1S4testEv to i8*)] }
+// CHECK: @_ZTV1S = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1S to i8*), i8* bitcast (i32 (%struct.S*)* @_ZN1S4testEv to i8*)] }
 
-// CHECK-LABEL: define i32 @_Z2fnv()
+// CHECK-LABEL: define dso_local i32 @_Z2fnv()
 // CHECK:   call x86_thiscallcc void @_ZN1SC1Ev(%struct.S* %s)
 // CHECK:   %{{[.0-9A-Z_a-z]+}} = call x86_thiscallcc i32 @_ZN1S4testEv(%struct.S* %s)
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @_ZN1SC1Ev(%struct.S* %this)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @_ZN1SC1Ev(%struct.S* %this)
 // CHECK:   call x86_thiscallcc void @_ZN1SC2Ev(%struct.S* %{{[.0-9A-Z_a-z]+}})
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc i32 @_ZN1S4testEv(%struct.S* %this)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc i32 @_ZN1S4testEv(%struct.S* %this)
 // CHECK:   %{{[.0-9A-Z_a-z]+}} = call x86_thiscallcc i32 @_ZN1I4testEv(%__interface.I* %{{[.0-9A-Z_a-z]+}})
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @_ZN1SC2Ev(%struct.S* %this)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @_ZN1SC2Ev(%struct.S* %this)
 // CHECK:   call x86_thiscallcc void @_ZN1IC2Ev(%__interface.I* %{{[.0-9A-Z_a-z]+}})
 // CHECK:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1S, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %{{[.0-9A-Z_a-z]+}}
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc void @_ZN1IC2Ev(%__interface.I* %this)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc void @_ZN1IC2Ev(%__interface.I* %this)
 // CHECK:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1I, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %{{[.0-9A-Z_a-z]+}}
 
-// CHECK-LABEL: define linkonce_odr x86_thiscallcc i32 @_ZN1I4testEv(%__interface.I* %this)
+// CHECK-LABEL: define linkonce_odr dso_local x86_thiscallcc i32 @_ZN1I4testEv(%__interface.I* %this)
 // CHECK:   ret i32 1
diff --git a/test/CodeGenCXX/microsoft-uuidof.cpp b/test/CodeGenCXX/microsoft-uuidof.cpp
index 9b4ff68..f8d2da7 100644
--- a/test/CodeGenCXX/microsoft-uuidof.cpp
+++ b/test/CodeGenCXX/microsoft-uuidof.cpp
@@ -63,12 +63,12 @@
 // CHECK: @_GUID_87654321_4321_4321_4321_ba0987654321 = linkonce_odr constant { i32, i16, i16, [8 x i8] } { i32 -2023406815, i16 17185, i16 17185, [8 x i8] c"C!\BA\09\87eC!" }, comdat
 
 // The static initializer for thing.
-// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @thing to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 16, i32 4, i1 false)
-// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @thing to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 4, i32 4, i1 false)
+// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @thing to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 16, i1 false)
+// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @thing to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 4, i1 false)
 
 // The static initializer for g.
-// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @g to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false)
-// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @g to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false)
+// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @g to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false)
+// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @g to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false)
 
 #ifdef DEFINE_GUID
 void fun() {
@@ -81,20 +81,20 @@
 
   // CHECK-DEFINE-GUID: [[U1:%.+]] = bitcast %struct._GUID* %s1_1 to i8*
   // CHECK-DEFINE-WRONG-GUID: [[U1:%.+]] = bitcast %struct._GUID* %s1_1 to i8*
-  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U1]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false)
-  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U1]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false)
+  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U1]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false)
+  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U1]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false)
   GUID s1_1 = __uuidof(S1);
 
   // CHECK-DEFINE-GUID: [[U2:%.+]] = bitcast %struct._GUID* %s1_2 to i8*
   // CHECK-DEFINE-WRONG-GUID: [[U2:%.+]] = bitcast %struct._GUID* %s1_2 to i8*
-  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U2]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false)
-  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U2]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false)
+  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U2]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false)
+  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U2]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false)
   GUID s1_2 = __uuidof(S1);
 
   // CHECK-DEFINE-GUID: [[U3:%.+]] = bitcast %struct._GUID* %s1_3 to i8*
   // CHECK-DEFINE-WRONG-GUID: [[U3:%.+]] = bitcast %struct._GUID* %s1_3 to i8*
-  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U3]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false)
-  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U3]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false)
+  // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U3]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false)
+  // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U3]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false)
   GUID s1_3 = __uuidof(s1);
 }
 #endif
diff --git a/test/CodeGenCXX/mingw-new-abi.cpp b/test/CodeGenCXX/mingw-new-abi.cpp
index 2b05253..fe98a1f 100644
--- a/test/CodeGenCXX/mingw-new-abi.cpp
+++ b/test/CodeGenCXX/mingw-new-abi.cpp
@@ -3,8 +3,8 @@
 
 namespace test1 {
   struct foo {
-    //  MINGW: declare x86_thiscallcc void @_ZN5test13foo1fEv
-    //  CYGWIN: declare void @_ZN5test13foo1fEv
+    //  MINGW: declare dso_local x86_thiscallcc void @_ZN5test13foo1fEv
+    //  CYGWIN: declare dso_local void @_ZN5test13foo1fEv
     void f();
   };
   void g(foo *x) {
diff --git a/test/CodeGenCXX/mingw-w64-exceptions.c b/test/CodeGenCXX/mingw-w64-exceptions.c
new file mode 100644
index 0000000..e980ebd
--- /dev/null
+++ b/test/CodeGenCXX/mingw-w64-exceptions.c
@@ -0,0 +1,22 @@
+// RUN: %clang -target x86_64-windows-gnu -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SEH
+// RUN: %clang -target i686-windows-gnu -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DWARF
+
+// RUN: %clang -target x86_64-windows-gnu -fsjlj-exceptions -c %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-SJLJ
+
+// RUN: %clang -target x86_64-windows-gnu -fdwarf-exceptions -c %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-DWARF
+
+// RUN: %clang -target x86_64-windows-gnu -fsjlj-exceptions -fseh-exceptions -c %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-SEH
+
+// RUN: %clang -target x86_64-windows-gnu -fseh-exceptions -fsjlj-exceptions -c %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-SJLJ
+
+// RUN: %clang -target x86_64-windows-gnu -fseh-exceptions -fdwarf-exceptions -c %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-DWARF
+
+// CHECK-SEH: "-fseh-exceptions"
+// CHECK-SJLJ: "-fsjlj-exceptions"
+// CHECK-DWARF-NOT: "-fsjlj-exceptions"
+// CHECK-DWARF-NOT: "-fseh-exceptions"
diff --git a/test/CodeGenCXX/mingw-w64-seh-exceptions.cpp b/test/CodeGenCXX/mingw-w64-seh-exceptions.cpp
index 9025f87..f3c3dca 100644
--- a/test/CodeGenCXX/mingw-w64-seh-exceptions.cpp
+++ b/test/CodeGenCXX/mingw-w64-seh-exceptions.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -fexceptions -emit-llvm -triple x86_64-w64-windows-gnu -o - | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 %s -fexceptions -fseh-exceptions -emit-llvm -triple x86_64-w64-windows-gnu -o - | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 %s -fexceptions -fdwarf-exceptions -emit-llvm -triple i686-w64-windows-gnu -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 %s -fexceptions -emit-llvm -triple i686-w64-windows-gnu -o - | FileCheck %s --check-prefix=X86
 
 extern "C" void foo();
@@ -15,12 +16,12 @@
   foo();
 }
 
-// X64: define void @test()
+// X64: define dso_local void @test()
 // X64-SAME: personality i8* bitcast (i32 (...)* @__gxx_personality_seh0 to i8*)
 // X64: invoke void @foo()
 // X64: landingpad { i8*, i32 }
 
-// X86: define void @test()
+// X86: define dso_local void @test()
 // X86-SAME: personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
 // X86: invoke void @foo()
 // X86: landingpad { i8*, i32 }
diff --git a/test/CodeGenCXX/ms-eh-personality.cpp b/test/CodeGenCXX/ms-eh-personality.cpp
new file mode 100644
index 0000000..36661b7
--- /dev/null
+++ b/test/CodeGenCXX/ms-eh-personality.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -fexceptions -fcxx-exceptions %s -emit-llvm -o - | FileCheck %s --check-prefix=MSVC
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -fexceptions -fcxx-exceptions -fsjlj-exceptions %s -emit-llvm -o - | FileCheck %s --check-prefix=SJLJ
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -fexceptions -fcxx-exceptions -fseh-exceptions %s -emit-llvm -o - | FileCheck %s --check-prefix=MSVC
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -fexceptions -fcxx-exceptions -fdwarf-exceptions %s -emit-llvm -o - | FileCheck %s --check-prefix=DWARF
+
+// MSVC: define dso_local void @f(){{.*}}@__CxxFrameHandler3
+// SJLJ: define dso_local void @f(){{.*}}@__gxx_personality_sj0
+// DWARF: define dso_local void @f(){{.*}}@__gxx_personality_v0
+
+struct Cleanup {
+  Cleanup();
+  ~Cleanup();
+  int x = 0;
+};
+
+void g();
+extern "C" void f() {
+  Cleanup c;
+  g();
+}
diff --git a/test/CodeGenCXX/ms-inline-asm-return.cpp b/test/CodeGenCXX/ms-inline-asm-return.cpp
index 837f1b4..4a7165a 100644
--- a/test/CodeGenCXX/ms-inline-asm-return.cpp
+++ b/test/CodeGenCXX/ms-inline-asm-return.cpp
@@ -12,7 +12,7 @@
     mov edx, 1
   }
 }
-// CHECK-LABEL: define i64 @f_i64()
+// CHECK-LABEL: define dso_local i64 @f_i64()
 // CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=A,~{eax},{{.*}}"
 // CHECK: ret i64 %[[r]]
 
@@ -22,7 +22,7 @@
     mov edx, 1
   }
 }
-// CHECK-LABEL: define i32 @f_i32()
+// CHECK-LABEL: define dso_local i32 @f_i32()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
 // CHECK: ret i32 %[[r]]
 
@@ -32,7 +32,7 @@
     mov edx, 1
   }
 }
-// CHECK-LABEL: define signext i16 @f_i16()
+// CHECK-LABEL: define dso_local signext i16 @f_i16()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
 // CHECK: %[[r_i16:[^ ]*]] = trunc i32 %[[r]] to i16
 // CHECK: ret i16 %[[r_i16]]
@@ -43,7 +43,7 @@
     mov edx, 1
   }
 }
-// CHECK-LABEL: define signext i8 @f_i8()
+// CHECK-LABEL: define dso_local signext i8 @f_i8()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: ret i8 %[[r_i8]]
@@ -54,7 +54,7 @@
     mov edx, 1U
   }
 }
-// CHECK-LABEL: define zeroext i1 @f_i1()
+// CHECK-LABEL: define dso_local zeroext i1 @f_i1()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: store i8 %[[r_i8]], i8* %{{.*}}
@@ -69,7 +69,7 @@
     mov eax, 0x01010101
   }
 }
-// CHECK-LABEL: define i32 @f_s4()
+// CHECK-LABEL: define dso_local i32 @f_s4()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "={eax},~{eax},{{.*}}"
 // CHECK: store i32 %[[r]], i32* %{{.*}}
 // CHECK: %[[r_i32:[^ ]*]] = load i32, i32* %{{.*}}
@@ -84,7 +84,7 @@
     mov edx, 01010101b
   }
 }
-// CHECK-LABEL: define i64 @f_s8()
+// CHECK-LABEL: define dso_local i64 @f_s8()
 // CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=A,~{eax},{{.*}}"
 // CHECK: store i64 %[[r]], i64* %{{.*}}
 // CHECK: %[[r_i64:[^ ]*]] = load i64, i64* %{{.*}}
@@ -95,6 +95,6 @@
 int main() {
   __asm xor eax, eax
 }
-// CHECK-LABEL: define i32 @main()
+// CHECK-LABEL: define dso_local i32 @main()
 // CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "={eax},{{.*}}"
 // CHECK: ret i32 %[[r]]
diff --git a/test/CodeGenCXX/ms-integer-static-data-members-exported.cpp b/test/CodeGenCXX/ms-integer-static-data-members-exported.cpp
index 78bb3a2..d043fc1 100644
--- a/test/CodeGenCXX/ms-integer-static-data-members-exported.cpp
+++ b/test/CodeGenCXX/ms-integer-static-data-members-exported.cpp
@@ -17,6 +17,6 @@
   };
 };
 
-// CHECK: @"\01?x@S@@2FB" = weak_odr dllexport constant i16 42, comdat, align 2
-// CHECK: @"\01?y@S@@2W4Enum@@B" = weak_odr dllexport constant i32 2, comdat, align 4
+// CHECK: @"\01?x@S@@2FB" = weak_odr dso_local dllexport constant i16 42, comdat, align 2
+// CHECK: @"\01?y@S@@2W4Enum@@B" = weak_odr dso_local dllexport constant i32 2, comdat, align 4
 // CHECK-NOT: NonExported
diff --git a/test/CodeGenCXX/ms-integer-static-data-members.cpp b/test/CodeGenCXX/ms-integer-static-data-members.cpp
index b2bfc92..1ac5648 100644
--- a/test/CodeGenCXX/ms-integer-static-data-members.cpp
+++ b/test/CodeGenCXX/ms-integer-static-data-members.cpp
@@ -33,20 +33,20 @@
 
 
 // No initialization.
-// CHECK-DAG: @"\01?NoInit_Ref@S@@2HB" = external constant i32
+// CHECK-DAG: @"\01?NoInit_Ref@S@@2HB" = external dso_local constant i32
 
 // Inline initialization, no real definiton, not referenced.
 // CHECK-NOT: @"\01?Inline_NotDef_NotRef@S@@2HB" = {{.*}} constant i32 5
 
 // Inline initialization, no real definiton, referenced.
-// CHECK-DAG: @"\01?Inline_NotDef_Ref@S@@2HB" = linkonce_odr constant i32 5, comdat, align 4
+// CHECK-DAG: @"\01?Inline_NotDef_Ref@S@@2HB" = linkonce_odr dso_local constant i32 5, comdat, align 4
 
 // Inline initialization, real definiton, not referenced.
-// CHECK-NOT: @"\01?Inline_Def_NotRef@S@@2HB" = constant i32 5, align 4
+// CHECK-NOT: @"\01?Inline_Def_NotRef@S@@2HB" = dso_local constant i32 5, align 4
 
 // Inline initialization, real definiton, referenced.
-// CHECK-DAG: @"\01?Inline_Def_Ref@S@@2HB" = linkonce_odr constant i32 5, comdat, align 4
+// CHECK-DAG: @"\01?Inline_Def_Ref@S@@2HB" = linkonce_odr dso_local constant i32 5, comdat, align 4
 
 // Out-of-line initialization.
-// CHECK-DAG: @"\01?OutOfLine_Def_NotRef@S@@2HB" = constant i32 5, align 4
-// CHECK-DAG: @"\01?OutOfLine_Def_Ref@S@@2HB" = constant i32 5, align 4
+// CHECK-DAG: @"\01?OutOfLine_Def_NotRef@S@@2HB" = dso_local constant i32 5, align 4
+// CHECK-DAG: @"\01?OutOfLine_Def_Ref@S@@2HB" = dso_local constant i32 5, align 4
diff --git a/test/CodeGenCXX/ms-property.cpp b/test/CodeGenCXX/ms-property.cpp
index 49e957b..8c1c7a6 100644
--- a/test/CodeGenCXX/ms-property.cpp
+++ b/test/CodeGenCXX/ms-property.cpp
@@ -102,7 +102,7 @@
   return Test1::GetTest1()->X;
 }
 
-// CHECK: define linkonce_odr void @"\01??$foo@H@@YAXHH@Z"(i32 %{{.+}}, i32 %{{.+}})
+// CHECK: define linkonce_odr dso_local void @"\01??$foo@H@@YAXHH@Z"(i32 %{{.+}}, i32 %{{.+}})
 // CHECK: call i32 @"\01?GetX@?$St@H@@QEAAHHH@Z"(%class.St{{.+}}* [[BAR:%.+]], i32 %{{.+}} i32 %{{.+}})
 // CHECK: call i32 @"\01?PutX@?$St@H@@QEAAHHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}}, i32 %{{.+}}, i32 %{{.+}})
 // CHECK: call i32 @"\01?GetX@?$St@H@@QEAAHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}} i32 %{{.+}})
diff --git a/test/CodeGenCXX/ms-thread_local.cpp b/test/CodeGenCXX/ms-thread_local.cpp
index dc7958d..793e528 100644
--- a/test/CodeGenCXX/ms-thread_local.cpp
+++ b/test/CodeGenCXX/ms-thread_local.cpp
@@ -6,12 +6,12 @@
 };
 
 // CHECK-DAG: $"\01??$a@X@@3UA@@A" = comdat any
-// CHECK-DAG: @"\01??$a@X@@3UA@@A" = linkonce_odr thread_local global %struct.A zeroinitializer, comdat, align 1
+// CHECK-DAG: @"\01??$a@X@@3UA@@A" = linkonce_odr dso_local thread_local global %struct.A zeroinitializer, comdat, align 1
 // CHECK-DAG: @"\01??__E?$a@X@@YAXXZ$initializer$" = internal constant void ()* @"\01??__E?$a@X@@YAXXZ", section ".CRT$XDU", comdat($"\01??$a@X@@3UA@@A")
 template <typename T>
 thread_local A a = A();
 
-// CHECK-DAG: @"\01?b@@3UA@@A" = thread_local global %struct.A zeroinitializer, align 1
+// CHECK-DAG: @"\01?b@@3UA@@A" = dso_local thread_local global %struct.A zeroinitializer, align 1
 // CHECK-DAG: @"__tls_init$initializer$" = internal constant void ()* @__tls_init, section ".CRT$XDU"
 thread_local A b;
 
diff --git a/test/CodeGenCXX/msabi-swiftcall-cc.cpp b/test/CodeGenCXX/msabi-swiftcall-cc.cpp
new file mode 100644
index 0000000..5a5453a
--- /dev/null
+++ b/test/CodeGenCXX/msabi-swiftcall-cc.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple i686-unknown-windows-msvc -fdeclspec -emit-llvm %s -o - | FileCheck %s
+
+void __attribute__((__swiftcall__)) f() {}
+// CHECK-DAG: @"\01?f@@YSXXZ"
+
+void (__attribute__((__swiftcall__)) *p)();
+// CHECK-DAG: @"\01?p@@3P6SXXZA"
+
+namespace {
+void __attribute__((__swiftcall__)) __attribute__((__used__)) f() { }
+// CHECK-DAG: "\01?f@?A@@YSXXZ"
+}
+
+namespace n {
+void __attribute__((__swiftcall__)) f() {}
+// CHECK-DAG: "\01?f@n@@YSXXZ"
+}
+
+struct __declspec(dllexport) S {
+  S(const S &) = delete;
+  S & operator=(const S &) = delete;
+  void __attribute__((__swiftcall__)) m() { }
+  // CHECK-DAG: "\01?m@S@@QASXXZ"
+};
+
+void f(void (__attribute__((__swiftcall__))())) {}
+// CHECK-DAG: "\01?f@@YAXP6SXXZ@Z"
+
diff --git a/test/CodeGenCXX/naked.cpp b/test/CodeGenCXX/naked.cpp
index 34f22b3..c6fe133 100644
--- a/test/CodeGenCXX/naked.cpp
+++ b/test/CodeGenCXX/naked.cpp
@@ -7,7 +7,7 @@
 };
 
 __attribute__((naked)) void TestNaked::NakedFunction() {
-  // CHECK-LABEL: define {{(x86_thiscallcc )?}}void @
+  // CHECK-LABEL: define {{(dso_local )?}}{{(x86_thiscallcc )?}}void @
   // CHECK: call void asm sideeffect
   asm("");
 }
diff --git a/test/CodeGenCXX/new-array-init.cpp b/test/CodeGenCXX/new-array-init.cpp
index ccc218e..90cd600 100644
--- a/test/CodeGenCXX/new-array-init.cpp
+++ b/test/CodeGenCXX/new-array-init.cpp
@@ -50,10 +50,10 @@
   // FIXME: Conditionally throw an exception rather than passing -1 to alloc function
   // CHECK: select
   // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}}
-  // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4,
+  // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4,
   // CHECK: %[[REST:.*]] = getelementptr inbounds i8, i8* %[[PTR]], i32 4
   // CHECK: %[[RESTSIZE:.*]] = sub {{.*}}, 4
-  // CHECK: call void @llvm.memset{{.*}}(i8* %[[REST]], i8 0, i{{32|64}} %[[RESTSIZE]],
+  // CHECK: call void @llvm.memset{{.*}}(i8* align {{[0-9]+}} %[[REST]], i8 0, i{{32|64}} %[[RESTSIZE]],
   new char[n] { "abc" };
 }
 
@@ -61,7 +61,7 @@
 void string_exact() {
   // CHECK-NOT: icmp
   // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}} 4)
-  // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4,
+  // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4,
   // CHECK-NOT: memset
   new char[4] { "abc" };
 }
@@ -71,7 +71,7 @@
   // CHECK-NOT: icmp
   // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}} 15)
   // FIXME: For very large arrays, it would be preferable to emit a small copy and a memset.
-  // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @[[ABC15]], i32 0, i32 0), i32 15,
+  // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([15 x i8], [15 x i8]* @[[ABC15]], i32 0, i32 0), i32 15,
   // CHECK-NOT: memset
   new char[15] { "abc" };
 }
@@ -113,7 +113,7 @@
   // CHECK: %[[PTR2:.*]] = getelementptr inbounds %[[AGGR]], %[[AGGR]]* %[[PTR1]], i32 1{{$}}
   // CHECK: %[[REMAIN:.*]] = sub i32 {{.*}}, 16
   // CHECK: %[[MEM:.*]] = bitcast %[[AGGR]]* %[[PTR2]] to i8*
-  // CHECK: call void @llvm.memset{{.*}}(i8* %[[MEM]], i8 0, i32 %[[REMAIN]],
+  // CHECK: call void @llvm.memset{{.*}}(i8* align {{[0-9]+}} %[[MEM]], i8 0, i32 %[[REMAIN]],
   struct Aggr { int a, b; };
   new Aggr[n] { 1, 2, 3 };
 }
diff --git a/test/CodeGenCXX/new-overflow.cpp b/test/CodeGenCXX/new-overflow.cpp
index 0c4c3c8..b27984f 100644
--- a/test/CodeGenCXX/new-overflow.cpp
+++ b/test/CodeGenCXX/new-overflow.cpp
@@ -85,9 +85,7 @@
 
   // CHECK:    define [[A:%.*]]* @_ZN5test44testEs(i16 signext
   // CHECK:      [[N:%.*]] = sext i16 {{%.*}} to i32
-  // CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[N]], 0
-  // CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 -1, i32 [[N]]
-  // CHECK-NEXT: call i8* @_Znaj(i32 [[T1]])
+  // CHECK-NEXT: call i8* @_Znaj(i32 [[N]])
   // CHECK:      getelementptr inbounds {{.*}}, i32 [[N]]
   elt *test(short s) {
     return new elt[s];
@@ -104,9 +102,7 @@
 
   // CHECK:    define [[A:%.*]]* @_ZN5test54testEi(i32
   // CHECK:      [[N:%.*]] = load i32, i32*
-  // CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[N]], 0
-  // CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 -1, i32 [[N]]
-  // CHECK-NEXT: call i8* @_Znaj(i32 [[T1]])
+  // CHECK-NEXT: call i8* @_Znaj(i32 [[N]])
   // CHECK:      getelementptr inbounds {{.*}}, i32 [[N]]
   elt *test(int s) {
     return new elt[s];
@@ -169,13 +165,11 @@
 
   // CHECK:    define [[A:%.*]]* @_ZN5test84testEx(i64
   // CHECK:      [[N:%.*]] = load i64, i64*
-  // CHECK-NEXT: [[T0:%.*]] = icmp uge i64 [[N]], 4294967296
   // CHECK-NEXT: [[T1:%.*]] = trunc i64 [[N]] to i32
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[T1]], i32 4)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 1
-  // CHECK-NEXT: [[T4:%.*]] = or i1 [[T0]], [[T3]]
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T2]], 0
-  // CHECK-NEXT: [[T6:%.*]] = select i1 [[T4]], i32 -1, i32 [[T5]]
+  // CHECK-NEXT: [[T6:%.*]] = select i1 [[T3]], i32 -1, i32 [[T5]]
   // CHECK-NEXT: call i8* @_Znaj(i32 [[T6]])
   // CHECK:      getelementptr inbounds {{.*}}, i32 [[T1]]
   elt *test(long long s) {
@@ -194,13 +188,11 @@
 
   // CHECK:    define [[A:%.*]]* @_ZN5test94testEy(i64
   // CHECK:      [[N:%.*]] = load i64, i64*
-  // CHECK-NEXT: [[T0:%.*]] = icmp uge i64 [[N]], 4294967296
   // CHECK-NEXT: [[T1:%.*]] = trunc i64 [[N]] to i32
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[T1]], i32 4)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 1
-  // CHECK-NEXT: [[T4:%.*]] = or i1 [[T0]], [[T3]]
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T2]], 0
-  // CHECK-NEXT: [[T6:%.*]] = select i1 [[T4]], i32 -1, i32 [[T5]]
+  // CHECK-NEXT: [[T6:%.*]] = select i1 [[T3]], i32 -1, i32 [[T5]]
   // CHECK-NEXT: call i8* @_Znaj(i32 [[T6]])
   // CHECK:      getelementptr inbounds {{.*}}, i32 [[T1]]
   elt *test(unsigned long long s) {
diff --git a/test/CodeGenCXX/new.cpp b/test/CodeGenCXX/new.cpp
index ae2ec15..3bebc2a 100644
--- a/test/CodeGenCXX/new.cpp
+++ b/test/CodeGenCXX/new.cpp
@@ -255,8 +255,6 @@
   // CHECK-LABEL:    define void @_ZN6test155test2EPvi(
   // CHECK:      [[N:%.*]] = load i32, i32*
   // CHECK-NEXT: [[T0:%.*]] = sext i32 [[N]] to i64
-  // CHECK-NEXT: [[T1:%.*]] = icmp slt i64 [[T0]], 0
-  // CHECK-NEXT: [[T2:%.*]] = select i1 [[T1]], i64 -1, i64 [[T0]]
   // CHECK-NEXT: [[P:%.*]] = load i8*, i8**
   // CHECK:      [[BEGIN:%.*]] = bitcast i8* [[P]] to [[A:%.*]]*
   // CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[T0]], 0
diff --git a/test/CodeGenCXX/no-opt-volatile-memcpy.cpp b/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
index a061daa..a51421c 100644
--- a/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
+++ b/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
@@ -18,10 +18,10 @@
 // CHECK: %[[LS:.*]] = alloca %struct.s, align 4
 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8*
 // CHECK-NEXT:  %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8*
-// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* %[[ZERO]], i8* %[[ONE]], i64 132, i32 4, i1 true)
-// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* align 4 %[[ZERO]], i8* align 4 %[[ONE]], i64 132, i1 true)
+// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 // CHECK-NEXT:  %[[TWO:.*]] = bitcast %struct.s* %[[LS]] to i8*
-// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(i8* align 4 %[[TWO]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 
 
 struct s1 {
@@ -35,8 +35,8 @@
   s.y = gs;
 }
 // CHECK-LABEL: define void @_Z3feev()
-// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true)
 
 struct d : s1 {
 };
@@ -47,4 +47,4 @@
   gd = gd;
 }
 // CHECK-LABEL: define void @_Z4gorfv()
-// CHECK:   call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK:   call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true)
diff --git a/test/CodeGenCXX/pod-member-memcpys.cpp b/test/CodeGenCXX/pod-member-memcpys.cpp
index 97d203f..9facb8a 100644
--- a/test/CodeGenCXX/pod-member-memcpys.cpp
+++ b/test/CodeGenCXX/pod-member-memcpys.cpp
@@ -116,59 +116,59 @@
 
 // Basic copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.Basic* @_ZN5BasicaSERKS_(%struct.Basic* %this, %struct.Basic* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.Basic*
 
 // PODMember copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PODMember* @_ZN9PODMemberaSERKS_(%struct.PODMember* %this, %struct.PODMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.PODMember*
 
 // PODLikeMember copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PODLikeMember* @_ZN13PODLikeMemberaSERKS_(%struct.PODLikeMember* %this, %struct.PODLikeMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.PODLikeMember*
 
 // ArrayMember copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.ArrayMember* @_ZN11ArrayMemberaSERKS_(%struct.ArrayMember* %this, %struct.ArrayMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}})
 // CHECK: ret %struct.ArrayMember*
 
 // VolatileMember copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.VolatileMember* @_ZN14VolatileMemberaSERKS_(%struct.VolatileMember* %this, %struct.VolatileMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: load volatile i32, i32* {{.*}}, align 4
 // CHECK: store volatile i32 {{.*}}, align 4
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.VolatileMember*
 
 // BitfieldMember copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.BitfieldMember* @_ZN14BitfieldMemberaSERKS_(%struct.BitfieldMember* %this, %struct.BitfieldMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 3, i32 1{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 3, i1 {{.*}})
 // CHECK: ret %struct.BitfieldMember*
 
 // InnerClass copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.InnerClassMember* @_ZN16InnerClassMemberaSERKS_(%struct.InnerClassMember* %this, %struct.InnerClassMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.InnerClassMember*
 
 // PackedMembers copy-assignment:
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PackedMembers* @_ZN13PackedMembersaSERKS_(%struct.PackedMembers* %this, %struct.PackedMembers* dereferenceable({{[0-9]+}}))
 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 1{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret %struct.PackedMembers*
 
 // COPY-CONSTRUCTORS:
@@ -183,70 +183,70 @@
 // PackedMembers copy-assignment:
 // CHECK-LABEL: define linkonce_odr void @_ZN13PackedMembersC2ERKS_(%struct.PackedMembers* %this, %struct.PackedMembers* dereferenceable({{[0-9]+}}))
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 1{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(BitfieldMember2)
 // BitfieldMember2 copy-constructor:
 // CHECK-2-LABEL: define linkonce_odr void @_ZN15BitfieldMember2C2ERKS_(%struct.BitfieldMember2* %this, %struct.BitfieldMember2* dereferenceable({{[0-9]+}}))
-// CHECK-2: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4, i1 false)
+// CHECK-2: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 false)
 // CHECK-2: call void @_ZN6NonPODC1ERKS_
 // CHECK-2: ret void
 
 CALL_CC(BitfieldMember3)
 // BitfieldMember3 copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN15BitfieldMember3C2ERKS_(%struct.BitfieldMember3* %this, %struct.BitfieldMember3* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 8, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 8, i1 false)
 // CHECK: ret void
 
 CALL_CC(ReferenceMember)
 // ReferenceMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN15ReferenceMemberC2ERKS_(%struct.ReferenceMember* %this, %struct.ReferenceMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 8{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 16, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 8{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(InnerClassMember)
 // InnerClass copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN16InnerClassMemberC2ERKS_(%struct.InnerClassMember* %this, %struct.InnerClassMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(BitfieldMember)
 // BitfieldMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN14BitfieldMemberC2ERKS_(%struct.BitfieldMember* %this, %struct.BitfieldMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 3, i32 1{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 3, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(VolatileMember)
 // VolatileMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN14VolatileMemberC2ERKS_(%struct.VolatileMember* %this, %struct.VolatileMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: load volatile i32, i32* {{.*}}, align 4
 // CHECK: store volatile i32 {{.*}}, align 4
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(ArrayMember)
 // ArrayMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN11ArrayMemberC2ERKS_(%struct.ArrayMember* %this, %struct.ArrayMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(PODLikeMember)
 // PODLikeMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN13PODLikeMemberC2ERKS_(%struct.PODLikeMember* %this, %struct.PODLikeMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: invoke void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 // CHECK: landingpad
 // CHECK: invoke void @_ZN7PODLikeD1Ev
@@ -254,15 +254,15 @@
 CALL_CC(PODMember)
 // PODMember copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN9PODMemberC2ERKS_(%struct.PODMember* %this, %struct.PODMember* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
 
 CALL_CC(Basic)
 // Basic copy-constructor:
 // CHECK-LABEL: define linkonce_odr void @_ZN5BasicC2ERKS_(%struct.Basic* %this, %struct.Basic* dereferenceable({{[0-9]+}}))
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: call void @_ZN6NonPODC1ERKS_
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}})
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}})
 // CHECK: ret void
diff --git a/test/CodeGenCXX/pr20897.cpp b/test/CodeGenCXX/pr20897.cpp
index 9a7fdb9..16e122b 100644
--- a/test/CodeGenCXX/pr20897.cpp
+++ b/test/CodeGenCXX/pr20897.cpp
@@ -3,13 +3,13 @@
 
 // __declspec(dllexport) causes us to export the implicit constructor.
 struct __declspec(dllexport) Derived : virtual Base {
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc %struct.Derived* @"\01??0Derived@@QAE@ABU0@@Z"
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc %struct.Derived* @"\01??0Derived@@QAE@ABU0@@Z"
 // CHECK:      %[[this:.*]] = load %struct.Derived*, %struct.Derived** {{.*}}
 // CHECK-NEXT: store %struct.Derived* %[[this]], %struct.Derived** %[[retval:.*]]
 // CHECK:      %[[dest_a_gep:.*]] = getelementptr inbounds %struct.Derived, %struct.Derived* %[[this]], i32 0, i32 1
 // CHECK-NEXT: %[[src_load:.*]]   = load %struct.Derived*, %struct.Derived** {{.*}}
 // CHECK-NEXT: %[[src_a_gep:.*]]  = getelementptr inbounds %struct.Derived, %struct.Derived* %[[src_load:.*]], i32 0, i32 1
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[dest_a_gep]], i8* %[[src_a_gep]], i64 1, i32 4, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[dest_a_gep]], i8* align 4 %[[src_a_gep]], i64 1, i1 false)
 // CHECK-NEXT: %[[dest_this:.*]] = load %struct.Derived*, %struct.Derived** %[[retval]]
 // CHECK-NEXT: ret %struct.Derived* %[[dest_this]]
   bool a : 1;
@@ -18,7 +18,7 @@
 
 // __declspec(dllexport) causes us to export the implicit copy constructor.
 struct __declspec(dllexport) Derived2 : virtual Base {
-// CHECK-LABEL: define weak_odr dllexport x86_thiscallcc %struct.Derived2* @"\01??0Derived2@@QAE@ABU0@@Z"
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc %struct.Derived2* @"\01??0Derived2@@QAE@ABU0@@Z"
 // CHECK:      %[[this:.*]] = load %struct.Derived2*, %struct.Derived2** {{.*}}
 // CHECK-NEXT: store %struct.Derived2* %[[this]], %struct.Derived2** %[[retval:.*]]
 // CHECK:      %[[dest_a_gep:.*]] = getelementptr inbounds %struct.Derived2, %struct.Derived2* %[[this]], i32 0, i32 1
@@ -26,7 +26,7 @@
 // CHECK-NEXT: %[[src_a_gep:.*]]  = getelementptr inbounds %struct.Derived2, %struct.Derived2* %[[src_load:.*]], i32 0, i32 1
 // CHECK-NEXT: %[[dest_a_bitcast:.*]]  = bitcast [1 x i32]* %[[dest_a_gep]] to i8*
 // CHECK-NEXT: %[[src_a_bitcast:.*]] = bitcast [1 x i32]* %[[src_a_gep]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[dest_a_bitcast]], i8* %[[src_a_bitcast]], i32 4, i32 4, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %[[dest_a_bitcast]], i8* align 4 %[[src_a_bitcast]], i32 4, i1 false)
 // CHECK-NEXT: %[[dest_this:.*]] = load %struct.Derived2*, %struct.Derived2** %[[retval]]
 // CHECK-NEXT: ret %struct.Derived2* %[[dest_this]]
   int Array[1];
diff --git a/test/CodeGenCXX/pr27030.cpp b/test/CodeGenCXX/pr27030.cpp
index 5c24051..ce83c89 100644
--- a/test/CodeGenCXX/pr27030.cpp
+++ b/test/CodeGenCXX/pr27030.cpp
@@ -5,7 +5,7 @@
 extern int B::*a;
 void test1() { (int A::*)(a); }
 }
-// CHECK-LABEL: define void @test1(
+// CHECK-LABEL: define dso_local void @test1(
 // CHECK: %[[load:.*]]       = load i32, i32* @a
 // CHECK: %[[memptr_cmp:.*]] = icmp ne i32 %[[load]], -1
 // CHECK: br i1 %[[memptr_cmp]]
diff --git a/test/CodeGenCXX/pr28360.cpp b/test/CodeGenCXX/pr28360.cpp
index 5d7e1ae..af226df 100644
--- a/test/CodeGenCXX/pr28360.cpp
+++ b/test/CodeGenCXX/pr28360.cpp
@@ -10,7 +10,7 @@
 
 void Baz() { Bar(&A::Foo); }
 
-// CHECK-LABEL: define void @"\01?Baz@@YAXXZ"(
+// CHECK-LABEL: define dso_local void @"\01?Baz@@YAXXZ"(
 // CHECK:  %[[ref_tmp:.*]] = alloca i8*, align 4
 // CHECK: store i8* bitcast (void (%struct.A*)* @"\01?Foo@A@@QAEXXZ" to i8*), i8** %[[ref_tmp]], align 4
 // CHECK: call void @"\01?Bar@@YAXABQ8A@@AEXXZ@Z"(i8** dereferenceable(4) %[[ref_tmp]])
diff --git a/test/CodeGenCXX/pr30731.cpp b/test/CodeGenCXX/pr30731.cpp
index 078f21c..f985c26 100644
--- a/test/CodeGenCXX/pr30731.cpp
+++ b/test/CodeGenCXX/pr30731.cpp
@@ -16,6 +16,6 @@
 
 void f(S* s) { s->f(); }
 
-// CHECK-LABEL: define void @"\01?f@@YAXPAUS@@@Z"
+// CHECK-LABEL: define dso_local void @"\01?f@@YAXPAUS@@@Z"
 // CHECK: call
 // CHECK: ret void
diff --git a/test/CodeGenCXX/pr33080.cpp b/test/CodeGenCXX/pr33080.cpp
index a744bca..782327e 100644
--- a/test/CodeGenCXX/pr33080.cpp
+++ b/test/CodeGenCXX/pr33080.cpp
@@ -1,25 +1,25 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fms-extensions -emit-llvm -o- %s | FileCheck %s
 
 void fa(__unaligned struct A *) {}
-// CHECK: define void @_Z2faPU11__unaligned1A(
+// CHECK: define {{(dso_local )?}}void @_Z2faPU11__unaligned1A(
 
 void ga(struct A *, struct A *) {}
-// CHECK: define void @_Z2gaP1AS0_(
+// CHECK: define {{(dso_local )?}}void @_Z2gaP1AS0_(
 
 void gb(__unaligned struct A *, struct A *) {}
-// CHECK: define void @_Z2gbPU11__unaligned1APS_(
+// CHECK: define {{(dso_local )?}}void @_Z2gbPU11__unaligned1APS_(
 
 void gc(struct A *, __unaligned struct A *) {}
-// CHECK: define void @_Z2gcP1APU11__unalignedS_(
+// CHECK: define {{(dso_local )?}}void @_Z2gcP1APU11__unalignedS_(
 
 void gd(__unaligned struct A *, __unaligned struct A *) {}
-// CHECK: define void @_Z2gdPU11__unaligned1AS1_(
+// CHECK: define {{(dso_local )?}}void @_Z2gdPU11__unaligned1AS1_(
 
 void hb(__unaligned struct A *, __unaligned const struct A *) {}
-// CHECK: define void @_Z2hbPU11__unaligned1APU11__unalignedKS_(
+// CHECK: define {{(dso_local )?}}void @_Z2hbPU11__unaligned1APU11__unalignedKS_(
 
 void ja(__unaligned struct A *, __unaligned struct A *__unaligned *, __unaligned struct A *__unaligned *__unaligned *) {}
-// CHECK: define void @_Z2jaPU11__unaligned1APU11__unalignedS1_PU11__unalignedS3_(
+// CHECK: define {{(dso_local )?}}void @_Z2jaPU11__unaligned1APU11__unalignedS1_PU11__unalignedS3_(
 
 void jb(__unaligned struct A *, __unaligned struct A **, __unaligned struct A *__unaligned *__unaligned *) {}
 // CHECK: @_Z2jbPU11__unaligned1APS1_PU11__unalignedPU11__unalignedS1_(
diff --git a/test/CodeGenCXX/pragma-init_seg.cpp b/test/CodeGenCXX/pragma-init_seg.cpp
index 1ed841f..8fecb1e 100644
--- a/test/CodeGenCXX/pragma-init_seg.cpp
+++ b/test/CodeGenCXX/pragma-init_seg.cpp
@@ -9,17 +9,17 @@
 namespace simple_init {
 #pragma init_seg(compiler)
 int x = f();
-// CHECK: @"\01?x@simple_init@@3HA" = global i32 0, align 4
+// CHECK: @"\01?x@simple_init@@3HA" = dso_local global i32 0, align 4
 // CHECK: @__cxx_init_fn_ptr = private constant void ()* @"\01??__Ex@simple_init@@YAXXZ", section ".CRT$XCC"
 
 #pragma init_seg(lib)
 int y = f();
-// CHECK: @"\01?y@simple_init@@3HA" = global i32 0, align 4
+// CHECK: @"\01?y@simple_init@@3HA" = dso_local global i32 0, align 4
 // CHECK: @__cxx_init_fn_ptr.1 = private constant void ()* @"\01??__Ey@simple_init@@YAXXZ", section ".CRT$XCL"
 
 #pragma init_seg(user)
 int z = f();
-// CHECK: @"\01?z@simple_init@@3HA" = global i32 0, align 4
+// CHECK: @"\01?z@simple_init@@3HA" = dso_local global i32 0, align 4
 // No function pointer!  This one goes on @llvm.global_ctors.
 }
 
@@ -35,7 +35,7 @@
 
 namespace selectany_init {
 int __declspec(selectany) x = f();
-// CHECK: @"\01?x@selectany_init@@3HA" = weak_odr global i32 0, comdat, align 4
+// CHECK: @"\01?x@selectany_init@@3HA" = weak_odr dso_local global i32 0, comdat, align 4
 // CHECK: @__cxx_init_fn_ptr.3 = private constant void ()* @"\01??__Ex@selectany_init@@YAXXZ", section ".asdf", comdat($"\01?x@selectany_init@@3HA")
 }
 
@@ -43,7 +43,7 @@
 template <typename T> struct A { static const int x; };
 template <typename T> const int A<T>::x = f();
 template struct A<int>;
-// CHECK: @"\01?x@?$A@H@explicit_template_instantiation@@2HB" = weak_odr global i32 0, comdat, align 4
+// CHECK: @"\01?x@?$A@H@explicit_template_instantiation@@2HB" = weak_odr dso_local global i32 0, comdat, align 4
 // CHECK: @__cxx_init_fn_ptr.4 = private constant void ()* @"\01??__Ex@?$A@H@explicit_template_instantiation@@2HB@YAXXZ", section ".asdf", comdat($"\01?x@?$A@H@explicit_template_instantiation@@2HB")
 }
 
@@ -51,7 +51,7 @@
 template <typename T> struct A { static const int x; };
 template <typename T> const int A<T>::x = f();
 int g() { return A<int>::x; }
-// CHECK: @"\01?x@?$A@H@implicit_template_instantiation@@2HB" = linkonce_odr global i32 0, comdat, align 4
+// CHECK: @"\01?x@?$A@H@implicit_template_instantiation@@2HB" = linkonce_odr dso_local global i32 0, comdat, align 4
 // CHECK: @__cxx_init_fn_ptr.5 = private constant void ()* @"\01??__Ex@?$A@H@implicit_template_instantiation@@2HB@YAXXZ", section ".asdf", comdat($"\01?x@?$A@H@implicit_template_instantiation@@2HB")
 }
 
diff --git a/test/CodeGenCXX/pragma-weak.cpp b/test/CodeGenCXX/pragma-weak.cpp
index caab266..83d66bc 100644
--- a/test/CodeGenCXX/pragma-weak.cpp
+++ b/test/CodeGenCXX/pragma-weak.cpp
@@ -5,7 +5,7 @@
 // GCC produces a weak symbol for this because it matches mangled names.
 // Different c++ ABIs may or may not mangle this, so we produce a strong
 // symbol.
-// CHECK: @zex = global i32
+// CHECK: @zex = {{(dso_local )?}}global i32
 
 #pragma weak foo
 struct S {  void foo(); };
diff --git a/test/CodeGenCXX/reference-init.cpp b/test/CodeGenCXX/reference-init.cpp
index ba7b282..c62e034 100644
--- a/test/CodeGenCXX/reference-init.cpp
+++ b/test/CodeGenCXX/reference-init.cpp
@@ -31,6 +31,6 @@
 struct SelfReference { SelfReference &r; };
 extern SelfReference self_reference_1;
 SelfReference self_reference_2 = {self_reference_1};
-// CHECK-CXX98: @self_reference_2 = global %[[SELF_REF:.*]] { %[[SELF_REF]]* @self_reference_1 }
-// CHECK-CXX11: @self_reference_2 = global %[[SELF_REF:.*]] zeroinitializer
+// CHECK-CXX98: @self_reference_2 = {{.*}}global %[[SELF_REF:.*]] { %[[SELF_REF]]* @self_reference_1 }
+// CHECK-CXX11: @self_reference_2 = {{(dso_local )?}}global %[[SELF_REF:.*]] zeroinitializer
 // CHECK-CXX11: call {{.*}}memcpy{{.*}} @self_reference_2 {{.*}} @self_reference_1
diff --git a/test/CodeGenCXX/regcall.cpp b/test/CodeGenCXX/regcall.cpp
index 2b7b9e2..ea1f878 100644
--- a/test/CodeGenCXX/regcall.cpp
+++ b/test/CodeGenCXX/regcall.cpp
@@ -22,8 +22,8 @@
   return i;
 }
 // CHECK-LIN: define x86_regcallcc {{.+}}@_Z15__regcall3__foo
-// CHECK-WIN64: define x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z"
-// CHECK-WIN32: define x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z"
+// CHECK-WIN64: define dso_local x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z"
+// CHECK-WIN32: define dso_local x86_regcallcc {{.+}}@"\01?foo@@YwHH@Z"
 
 // used to give a body to test_class functions
 static int x = 0;
@@ -37,8 +37,8 @@
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC1Ev
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC2Ev
   // Windows ignores calling convention on constructor/destructors.
-  // CHECK-WIN64-DAG: define linkonce_odr %class.test_class* @"\01??0test_class@@QEAA@XZ"
-  // CHECK-WIN32-DAG: define linkonce_odr x86_thiscallcc %class.test_class* @"\01??0test_class@@QAE@XZ"
+  // CHECK-WIN64-DAG: define linkonce_odr dso_local %class.test_class* @"\01??0test_class@@QEAA@XZ"
+  // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc %class.test_class* @"\01??0test_class@@QAE@XZ"
 
 #ifndef WIN_TEST
   __regcall 
@@ -47,31 +47,31 @@
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD2Ev
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD1Ev
   // Windows ignores calling convention on constructor/destructors.
-  // CHECK-WIN64-DAG: define linkonce_odr void @"\01??_Dtest_class@@QEAAXXZ"
-  // CHECK-WIN32-DAG: define linkonce_odr x86_thiscallcc void @"\01??_Dtest_class@@QAEXXZ"
+  // CHECK-WIN64-DAG: define linkonce_odr dso_local void @"\01??_Dtest_class@@QEAAXXZ"
+  // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"\01??_Dtest_class@@QAEXXZ"
   
   test_class& __regcall operator+=(const test_class&){
     return *this;
   }
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @_ZN10test_classpLERKS_
-  // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QEAwAEAV0@AEBV0@@Z"
-  // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QAwAAV0@ABV0@@Z"
+  // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QEAwAEAV0@AEBV0@@Z"
+  // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc dereferenceable(4) %class.test_class* @"\01??Ytest_class@@QAwAAV0@ABV0@@Z"
   void __regcall do_thing(){}
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall3__do_thingEv
-  // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01?do_thing@test_class@@QEAwXXZ"
-  // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01?do_thing@test_class@@QAwXXZ"
+  // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01?do_thing@test_class@@QEAwXXZ"
+  // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01?do_thing@test_class@@QAwXXZ"
   
   template<typename T>
   void __regcall tempFunc(T i){}
   // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall3__tempFuncIiEEvT_
-  // CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
-  // CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
+  // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
+  // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
 };
 
 bool __regcall operator ==(const test_class&, const test_class&){ --x; return false;}
 // CHECK-LIN-DAG: define x86_regcallcc zeroext i1 @_ZeqRK10test_classS1_
-// CHECK-WIN64-DAG: define x86_regcallcc zeroext i1 @"\01??8@Yw_NAEBVtest_class@@0@Z"
-// CHECK-WIN32-DAG: define x86_regcallcc zeroext i1 @"\01??8@Yw_NABVtest_class@@0@Z"
+// CHECK-WIN64-DAG: define dso_local x86_regcallcc zeroext i1 @"\01??8@Yw_NAEBVtest_class@@0@Z"
+// CHECK-WIN32-DAG: define dso_local x86_regcallcc zeroext i1 @"\01??8@Yw_NABVtest_class@@0@Z"
 
 test_class __regcall operator""_test_class (unsigned long long) { ++x; return test_class{};}
 // CHECK-LIN64-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* noalias sret %agg.result, i64)
@@ -82,8 +82,8 @@
 template<typename T>
 void __regcall freeTempFunc(T i){}
 // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_Z24__regcall3__freeTempFuncIiEvT_
-// CHECK-WIN64-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
-// CHECK-WIN32-DAG: define linkonce_odr x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
+// CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
+// CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"\01??$freeTempFunc@H@@YwXH@Z"
 
 // class to force generation of functions
 void force_gen() {
@@ -101,5 +101,5 @@
 }
 // CHECK-LIN64-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 16 %f)
 // CHECK-LIN32-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* inreg noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %f)
-// CHECK-WIN64-DAG: define x86_regcallcc { double, double } @"\01?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
-// CHECK-WIN32-DAG: define x86_regcallcc { double, double } @"\01?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
+// CHECK-WIN64-DAG: define dso_local x86_regcallcc { double, double } @"\01?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
+// CHECK-WIN32-DAG: define dso_local x86_regcallcc { double, double } @"\01?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
diff --git a/test/CodeGenCXX/rtti-mingw64.cpp b/test/CodeGenCXX/rtti-mingw64.cpp
index 9f3e039..628a4d0 100644
--- a/test/CodeGenCXX/rtti-mingw64.cpp
+++ b/test/CodeGenCXX/rtti-mingw64.cpp
@@ -7,8 +7,8 @@
 };
 C::~C() {}
 
-// CHECK: @_ZTI1C = linkonce_odr
-// CHECK: @_ZTI1B = linkonce_odr constant { i8*, i8*, i32, i32, i8*, i64 }
+// CHECK: @_ZTI1C = linkonce_odr dso_local
+// CHECK: @_ZTI1B = linkonce_odr dso_local constant { i8*, i8*, i32, i32, i8*, i64 }
 // CHECK-SAME:  i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2) to i8*),
 // CHECK-SAME:  i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1B, i32 0, i32 0),
 // CHECK-SAME:  i32 0,
diff --git a/test/CodeGenCXX/runtime-dllstorage.cpp b/test/CodeGenCXX/runtime-dllstorage.cpp
index 76c002c..4fcc903 100644
--- a/test/CodeGenCXX/runtime-dllstorage.cpp
+++ b/test/CodeGenCXX/runtime-dllstorage.cpp
@@ -12,7 +12,7 @@
 // %clang_cc1 -triple i686-windows-itanium -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -fno-use-cxa-atexit -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-DYNAMIC-IA -check-prefix CHECK-DYNAMIC-IA-ATEXIT
 // %clang_cc1 -triple i686-windows-itanium -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -fno-use-cxa-atexit -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-STATIC-IA -check-prefix CHECK-STATIC-IA-ATEXIT
 
-// RUN: %clang_cc1 -triple i686-windows-gnu -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-DYNAMIC-IA
+// RUN: %clang_cc1 -triple i686-windows-gnu -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-STATIC-IA
 // RUN: %clang_cc1 -triple i686-windows-gnu -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -flto-visibility-public-std -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-STATIC-IA
 // RUN: %clang_cc1 -triple i686-windows-cygnus -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-DYNAMIC-IA
 // RUN: %clang_cc1 -triple i686-windows-cygnus -std=c++11 -fdeclspec -fms-compatibility -fexceptions -fcxx-exceptions -flto-visibility-public-std -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-IA -check-prefix CHECK-STATIC-IA
@@ -110,7 +110,7 @@
 // CHECK-MS-DAG: declare i32 @atexit(void ()*)
 // CHECK-MS-DYNAMIC-DAG: declare dllimport {{.*}} void @_CxxThrowException
 // CHECK-MS-STATIC-DAG: declare {{.*}} void @_CxxThrowException
-// CHECK-MS-DAG: declare noalias i8* @"\01??2@YAPAXI@Z"
+// CHECK-MS-DAG: declare dso_local noalias i8* @"\01??2@YAPAXI@Z"
 // CHECK-MS-DAG: declare void @_Init_thread_header(i32*)
 // CHECK-MS-DAG: declare void @_Init_thread_footer(i32*)
 
@@ -128,7 +128,7 @@
 // CHECK-DYNAMIC-NODECL-IA-DAG: declare dllimport i32 @__cxa_guard_acquire(i64*)
 // CHECK-DYNAMIC-IMPORT-IA-DAG: declare dllimport i32 @__cxa_guard_acquire(i64*)
 // CHECK-DYNAMIC-EXPORT-IA-DAG: declare dllexport i32 @__cxa_guard_acquire(i64*)
-// CHECK-IA-DAG: declare noalias i8* @_Znwj(i32)
+// CHECK-IA-DAG: declare dso_local noalias i8* @_Znwj(i32)
 // CHECK-DYNAMIC-DECL-IA-DAG: declare dllimport void @__cxa_guard_release(i64*)
 // CHECK-DYNAMIC-NODECL-IA-DAG: declare dllimport void @__cxa_guard_release(i64*)
 // CHECK-DYNAMIC-IMPORT-IA-DAG: declare dllimport void @__cxa_guard_release(i64*)
@@ -136,7 +136,7 @@
 // CHECK-DYANMIC-IA-DAG: declare dllimport void @_ZSt9terminatev()
 // CHECK-DYNAMIC-NODECL-IA-DAG: declare void @_ZSt9terminatev()
 // CHECK-DYNAMIC-IMPORT-IA-DAG: declare dllimport void @_ZSt9terminatev()
-// CHECK-DYNAMIC-EXPORT-IA-DAG: declare dllexport void @_ZSt9terminatev()
+// CHECK-DYNAMIC-EXPORT-IA-DAG: declare dso_local dllexport void @_ZSt9terminatev()
 
 // CHECK-STATIC-IA-DAG: declare i32 @__cxa_thread_atexit(void (i8*)*, i8*, i8*)
 // CHECK-STATIC-IA-DAG: declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
@@ -145,14 +145,14 @@
 // CHECK-STATIC-DECL-IA-DAG: declare i32 @__cxa_guard_acquire(i64*)
 // CHECK-STATIC-NODECL-IA-DAG: declare i32 @__cxa_guard_acquire(i64*)
 // CHECK-STATIC-IMPORT-IA-DAG: declare i32 @__cxa_guard_acquire(i64*)
-// CHECK-STATIC-EXPORT-IA-DAG: declare i32 @__cxa_guard_acquire(i64*)
-// CHECK-IA-DAG: declare noalias i8* @_Znwj(i32)
+// CHECK-STATIC-EXPORT-IA-DAG: declare dso_local i32 @__cxa_guard_acquire(i64*)
+// CHECK-IA-DAG: declare dso_local noalias i8* @_Znwj(i32)
 // CHECK-STATIC-DECL-IA-DAG: declare void @__cxa_guard_release(i64*)
 // CHECK-STATIC-NODECL-IA-DAG: declare void @__cxa_guard_release(i64*)
 // CHECK-STATIC-IMPORT-IA-DAG: declare void @__cxa_guard_release(i64*)
-// CHECK-STATIC-EXPORT-IA-DAG: declare void @__cxa_guard_release(i64*)
+// CHECK-STATIC-EXPORT-IA-DAG: declare dso_local void @__cxa_guard_release(i64*)
 // CHECK-STATIC-IA-DAG: declare void @_ZSt9terminatev()
 // CHECK-STATIC-NODECL-IA-DAG: declare void @_ZSt9terminatev()
 // CHECK-STATIC-IMPORT-IA-DAG: declare void @_ZSt9terminatev()
-// CHECK-STATIC-EXPORT-IA-DAG: declare dllexport void @_ZSt9terminatev()
+// CHECK-STATIC-EXPORT-IA-DAG: declare dso_local dllexport void @_ZSt9terminatev()
 
diff --git a/test/CodeGenCXX/sanitize-no-dtor-callback.cpp b/test/CodeGenCXX/sanitize-no-dtor-callback.cpp
index 2c35576..afc5382 100644
--- a/test/CodeGenCXX/sanitize-no-dtor-callback.cpp
+++ b/test/CodeGenCXX/sanitize-no-dtor-callback.cpp
@@ -1,8 +1,9 @@
-// Test without the flag -fsanitize-memory-use-after-dtor, to ensure that
+// Test with the flag -fno-sanitize-memory-use-after-dtor, to ensure that
 // instrumentation is not erroneously inserted
-// RUN: %clang_cc1 -fsanitize=memory -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=memory -fno-sanitize-memory-use-after-dtor -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
 
 struct Simple {
+  int x;
   ~Simple() {}
 };
 Simple s;
@@ -10,6 +11,7 @@
 // CHECK-NOT: call void @__sanitizer_dtor_callback
 
 struct Inlined {
+  int x;
   inline ~Inlined() {}
 };
 Inlined i;
diff --git a/test/CodeGenCXX/sections.cpp b/test/CodeGenCXX/sections.cpp
index c33871a..899bdfa 100644
--- a/test/CodeGenCXX/sections.cpp
+++ b/test/CodeGenCXX/sections.cpp
@@ -76,26 +76,26 @@
 __declspec(allocate("short_section")) short short_var = 42;
 }
 
-//CHECK: @D = global i32 1
-//CHECK: @a = global i32 1, section ".data"
-//CHECK: @b = constant i32 1, section ".my_const"
+//CHECK: @D = dso_local global i32 1
+//CHECK: @a = dso_local global i32 1, section ".data"
+//CHECK: @b = dso_local constant i32 1, section ".my_const"
 //CHECK: @[[MYSTR:.*]] = {{.*}} unnamed_addr constant [11 x i8] c"my string!\00"
-//CHECK: @s = global i8* getelementptr inbounds ([11 x i8], [11 x i8]* @[[MYSTR]], i32 0, i32 0), section ".data2"
-//CHECK: @c = global i32 1, section ".my_seg"
-//CHECK: @d = global i32 1, section ".data"
-//CHECK: @e = global i32 0, section ".my_bss"
-//CHECK: @f = global i32 0, section ".c"
-//CHECK: @i = global i32 0
-//CHECK: @TEST1 = global i32 0
-//CHECK: @TEST2 = global i32 0, section ".bss1"
-//CHECK: @TEST3 = global i32 0, section ".bss1"
-//CHECK: @d2 = global i32 1, section ".data"
-//CHECK: @b2 = constant i32 1, section ".my_const"
-//CHECK: @unreferenced = constant i32 0, section "read_flag_section"
-//CHECK: @referenced = constant i32 42, section "read_flag_section"
-//CHECK: @implicitly_read_write = global i32 42, section "no_section_attributes"
-//CHECK: @long_var = global i32 42, section "long_section"
-//CHECK: @short_var = global i16 42, section "short_section"
-//CHECK: define void @g()
-//CHECK: define void @h() {{.*}} section ".my_code"
-//CHECK: define void @h2() {{.*}} section ".my_code"
+//CHECK: @s = dso_local global i8* getelementptr inbounds ([11 x i8], [11 x i8]* @[[MYSTR]], i32 0, i32 0), section ".data2"
+//CHECK: @c = dso_local global i32 1, section ".my_seg"
+//CHECK: @d = dso_local global i32 1, section ".data"
+//CHECK: @e = dso_local global i32 0, section ".my_bss"
+//CHECK: @f = dso_local global i32 0, section ".c"
+//CHECK: @i = dso_local global i32 0
+//CHECK: @TEST1 = dso_local global i32 0
+//CHECK: @TEST2 = dso_local global i32 0, section ".bss1"
+//CHECK: @TEST3 = dso_local global i32 0, section ".bss1"
+//CHECK: @d2 = dso_local global i32 1, section ".data"
+//CHECK: @b2 = dso_local constant i32 1, section ".my_const"
+//CHECK: @unreferenced = dso_local constant i32 0, section "read_flag_section"
+//CHECK: @referenced = dso_local constant i32 42, section "read_flag_section"
+//CHECK: @implicitly_read_write = dso_local global i32 42, section "no_section_attributes"
+//CHECK: @long_var = dso_local global i32 42, section "long_section"
+//CHECK: @short_var = dso_local global i16 42, section "short_section"
+//CHECK: define dso_local void @g()
+//CHECK: define dso_local void @h() {{.*}} section ".my_code"
+//CHECK: define dso_local void @h2() {{.*}} section ".my_code"
diff --git a/test/CodeGenCXX/specialized-static-data-mem-init.cpp b/test/CodeGenCXX/specialized-static-data-mem-init.cpp
index 6879962..d793242 100644
--- a/test/CodeGenCXX/specialized-static-data-mem-init.cpp
+++ b/test/CodeGenCXX/specialized-static-data-mem-init.cpp
@@ -2,8 +2,8 @@
 // rdar: // 8562966
 // pr8409
 
-// CHECK: @_ZN1CIiE11needs_guardE = linkonce_odr global
-// CHECK: @_ZGVN1CIiE11needs_guardE = linkonce_odr global
+// CHECK: @_ZN1CIiE11needs_guardE = linkonce_odr {{(dso_local )?}}global
+// CHECK: @_ZGVN1CIiE11needs_guardE = linkonce_odr {{(dso_local )?}}global
 
 struct K
 {
diff --git a/test/CodeGenCXX/split-stacks.cpp b/test/CodeGenCXX/split-stacks.cpp
index 76e1b79..2373bcc 100644
--- a/test/CodeGenCXX/split-stacks.cpp
+++ b/test/CodeGenCXX/split-stacks.cpp
@@ -16,18 +16,18 @@
   return tnosplit<int>();
 }
 
-// CHECK-SEGSTK: define i32 @_Z3foov() [[SS:#[0-9]+]] {
-// CHECK-SEGSTK: define i32 @_Z7nosplitv() [[NSS1:#[0-9]+]] {
-// CHECK-SEGSTK: define linkonce_odr i32 @_Z8tnosplitIiEiv() [[NSS2:#[0-9]+]] comdat {
+// CHECK-SEGSTK: define dso_local i32 @_Z3foov() [[SS:#[0-9]+]] {
+// CHECK-SEGSTK: define dso_local i32 @_Z7nosplitv() [[NSS1:#[0-9]+]] {
+// CHECK-SEGSTK: define linkonce_odr dso_local i32 @_Z8tnosplitIiEiv() [[NSS2:#[0-9]+]] comdat {
 // CHECK-SEGSTK-NOT: [[NSS1]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK-NOT: [[NSS2]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK: [[SS]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK-NOT: [[NSS1]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-SEGSTK-NOT: [[NSS2]] = { {{.*}} "split-stack" {{.*}} }
 
-// CHECK-NOSEGSTK: define i32 @_Z3foov() [[NSS0:#[0-9]+]] {
-// CHECK-NOSEGSTK: define i32 @_Z7nosplitv() [[NSS1:#[0-9]+]] {
-// CHECK-NOSEGSTK: define linkonce_odr i32 @_Z8tnosplitIiEiv() [[NSS2:#[0-9]+]] comdat {
+// CHECK-NOSEGSTK: define dso_local i32 @_Z3foov() [[NSS0:#[0-9]+]] {
+// CHECK-NOSEGSTK: define dso_local i32 @_Z7nosplitv() [[NSS1:#[0-9]+]] {
+// CHECK-NOSEGSTK: define linkonce_odr dso_local i32 @_Z8tnosplitIiEiv() [[NSS2:#[0-9]+]] comdat {
 // CHECK-NOSEGSTK-NOT: [[NSS1]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-NOSEGSTK-NOT: [[NSS2]] = { {{.*}} "split-stack" {{.*}} }
 // CHECK-NOSEGSTK-NOT: [[NSS3]] = { {{.*}} "split-stack" {{.*}} }
diff --git a/test/CodeGenCXX/static-init-wasm.cpp b/test/CodeGenCXX/static-init-wasm.cpp
index 5f2f94f..68d1391 100644
--- a/test/CodeGenCXX/static-init-wasm.cpp
+++ b/test/CodeGenCXX/static-init-wasm.cpp
@@ -43,12 +43,12 @@
 
 A theA;
 
-// WEBASSEMBLY32: define internal void @__cxx_global_var_init() #3 section ".text.__startup" {
+// WEBASSEMBLY32: define internal void @__cxx_global_var_init() #3 {
 // WEBASSEMBLY32: call %struct.A* @_ZN1AC1Ev(%struct.A* @theA)
-// WEBASSEMBLY32: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #3 section ".text.__startup" {
+// WEBASSEMBLY32: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #3 {
 // WEBASSEMBLY32: call void @__cxx_global_var_init()
 //
-// WEBASSEMBLY64: define internal void @__cxx_global_var_init() #3 section ".text.__startup" {
+// WEBASSEMBLY64: define internal void @__cxx_global_var_init() #3 {
 // WEBASSEMBLY64: call %struct.A* @_ZN1AC1Ev(%struct.A* @theA)
-// WEBASSEMBLY64: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #3 section ".text.__startup" {
+// WEBASSEMBLY64: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #3 {
 // WEBASSEMBLY64: call void @__cxx_global_var_init()
diff --git a/test/CodeGenCXX/strict-vtable-pointers.cpp b/test/CodeGenCXX/strict-vtable-pointers.cpp
index c379892..c45e7a6 100644
--- a/test/CodeGenCXX/strict-vtable-pointers.cpp
+++ b/test/CodeGenCXX/strict-vtable-pointers.cpp
@@ -53,7 +53,7 @@
 };
 
 // CHECK-NEW-LABEL: define void @_Z12LocalObjectsv()
-// CHECK-NEW-NOT: @llvm.invariant.group.barrier(
+// CHECK-NEW-NOT: @llvm.invariant.group.barrier.p0i8(
 // CHECK-NEW-LABEL: {{^}}}
 void LocalObjects() {
   DynamicBase1 DB;
@@ -81,20 +81,20 @@
 
 struct DynamicFromVirtualStatic1;
 // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN25DynamicFromVirtualStatic1C1Ev
-// CHECK-CTORS-NOT: @llvm.invariant.group.barrier(
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier.p0i8(
 // CHECK-CTORS-LABEL: {{^}}}
 
 struct DynamicFrom2Virtuals;
 // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN20DynamicFrom2VirtualsC1Ev
-// CHECK-CTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-CTORS: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-CTORS-LABEL: {{^}}}
 
 
 // CHECK-NEW-LABEL: define void @_Z9Pointers1v()
-// CHECK-NEW-NOT: @llvm.invariant.group.barrier(
+// CHECK-NEW-NOT: @llvm.invariant.group.barrier.p0i8(
 // CHECK-NEW-LABEL: call void @_ZN12DynamicBase1C1Ev(
 
-// CHECK-NEW: %[[THIS3:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS2:.*]])
+// CHECK-NEW: %[[THIS3:.*]] = call i8* @llvm.invariant.group.barrier.p0i8(i8* %[[THIS2:.*]])
 // CHECK-NEW: %[[THIS4:.*]] = bitcast i8* %[[THIS3]] to %[[DynamicDerived:.*]]*
 // CHECK-NEW: call void @_ZN14DynamicDerivedC1Ev(%[[DynamicDerived:.*]]* %[[THIS4]])
 // CHECK-NEW-LABEL: {{^}}}
@@ -109,9 +109,9 @@
 
 // CHECK-NEW-LABEL: define void @_Z14HackingObjectsv()
 // CHECK-NEW:  call void @_ZN12DynamicBase1C1Ev
-// CHECK-NEW:  call i8* @llvm.invariant.group.barrier(
+// CHECK-NEW:  call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-NEW:  call void @_ZN14DynamicDerivedC1Ev(
-// CHECK-NEW:  call i8* @llvm.invariant.group.barrier(
+// CHECK-NEW:  call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-NEW: call void @_ZN12DynamicBase1C1Ev(
 // CHECK-NEW-LABEL: {{^}}}
 void HackingObjects() {
@@ -131,7 +131,7 @@
 /*** Testing Constructors ***/
 struct DynamicBase1;
 // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase1C2Ev(
-// CHECK-CTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-CTORS-NOT: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-CTORS-LABEL: {{^}}}
 
 
@@ -140,7 +140,7 @@
 // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN14DynamicDerivedC2Ev(
 // CHECK-CTORS: %[[THIS0:.*]] = load %[[DynamicDerived:.*]]*, %[[DynamicDerived]]** {{.*}}
 // CHECK-CTORS: %[[THIS1:.*]] = bitcast %[[DynamicDerived:.*]]* %[[THIS0]] to i8*
-// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS1:.*]])
+// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier.p0i8(i8* %[[THIS1:.*]])
 // CHECK-CTORS: %[[THIS3:.*]] = bitcast i8* %[[THIS2]] to %[[DynamicDerived]]*
 // CHECK-CTORS: %[[THIS4:.*]] = bitcast %[[DynamicDerived]]* %[[THIS3]] to %[[DynamicBase:.*]]*
 // CHECK-CTORS: call void @_ZN12DynamicBase1C2Ev(%[[DynamicBase]]* %[[THIS4]])
@@ -154,15 +154,15 @@
 
 // CHECK-CTORS: %[[THIS0:.*]] = load %[[CLASS:.*]]*, %[[CLASS]]** {{.*}}
 // CHECK-CTORS: %[[THIS1:.*]] = bitcast %[[CLASS:.*]]* %[[THIS0]] to i8*
-// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS1]])
+// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier.p0i8(i8* %[[THIS1]])
 // CHECK-CTORS: %[[THIS3:.*]] = bitcast i8* %[[THIS2]] to %[[CLASS]]*
 // CHECK-CTORS: %[[THIS4:.*]] = bitcast %[[CLASS]]* %[[THIS3]] to %[[BASE_CLASS:.*]]*
 // CHECK-CTORS: call void @_ZN12DynamicBase1C2Ev(%[[BASE_CLASS]]* %[[THIS4]])
 
-// CHECK-CTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-CTORS: call i8* @llvm.invariant.group.barrier.p0i8(
 
 // CHECK-CTORS: call void @_ZN12DynamicBase2C2Ev(
-// CHECK-CTORS-NOT: @llvm.invariant.group.barrier
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier.p0i8
 
 
 // CHECK-CTORS: %[[THIS10:.*]] = bitcast %struct.DynamicDerivedMultiple* %[[THIS0]] to i32 (...)***
@@ -177,7 +177,7 @@
 
 struct DynamicFromStatic;
 // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN17DynamicFromStaticC2Ev(
-// CHECK-CTORS-NOT: @llvm.invariant.group.barrier(
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier.p0i8(
 // CHECK-CTORS-LABEL: {{^}}}
 
 struct A {
@@ -206,15 +206,15 @@
 void UnionsBarriers(U *u) {
   // CHECK-NEW: call void @_Z9changeToBP1U(
   changeToB(u);
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z2g2P1A(%struct.A*
   g2(&u->b);
   // CHECK-NEW: call void @_Z9changeToAP1U(%union.U* 
   changeToA(u);
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // call void @_Z2g2P1A(%struct.A* %a)
   g2(&u->a);
-  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier.p0i8(i8*
 }
 
 struct HoldingVirtuals {
@@ -233,10 +233,10 @@
 
 // CHECK-NEW-LABEL: noBarriers
 void noBarriers(NoVptrs &noVptrs) {
-  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: 42
   noVptrs.a += 42;
-  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z4takeR12AnotherEmpty(
   take(noVptrs.empty);
 }
@@ -249,10 +249,10 @@
 
 // CHECK-NEW-LABEL: define void @_Z15UnionsBarriers2R2U2
 void UnionsBarriers2(U2 &u) {
-  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: 42
   u.z += 42;
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z4takeR15HoldingVirtuals(
   take(u.h);
 }
@@ -279,24 +279,24 @@
 void take(VirtualInheritance &);
 
 void UnionsBarrier3(U3 &u) {
-  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW-NOT: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: 42
   u.z += 42;
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z4takeR13VirtualInBase(
   take(u.v1);
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z4takeR13VirtualInBase(
   take(u.v2);
 
-  // CHECK-NEW: call i8* @llvm.invariant.group.barrier(i8*
+  // CHECK-NEW: call i8* @llvm.invariant.group.barrier.p0i8(i8*
   // CHECK-NEW: call void @_Z4takeR18VirtualInheritance(
   take(u.v3);
 }
 
 /** DTORS **/
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN10StaticBaseD2Ev(
-// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-DTORS-LABEL: {{^}}}
 
 
@@ -305,22 +305,22 @@
 // CHECK-DTORS-LABEL: {{^}}}
 
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN17DynamicFromStaticD2Ev
-// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-DTORS-LABEL: {{^}}}
 
 
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN22DynamicDerivedMultipleD2Ev(
 
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase2D2Ev(
-// CHECK-DTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-DTORS-LABEL: {{^}}}
 
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase1D2Ev
-// CHECK-DTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-DTORS-LABEL: {{^}}}
 
 // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN14DynamicDerivedD2Ev
-// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier.p0i8(
 // CHECK-DTORS-LABEL: {{^}}}
 
 
diff --git a/test/CodeGenCXX/tmp-md-nodes1.cpp b/test/CodeGenCXX/tmp-md-nodes1.cpp
new file mode 100644
index 0000000..41a0159
--- /dev/null
+++ b/test/CodeGenCXX/tmp-md-nodes1.cpp
@@ -0,0 +1,18 @@
+// REQUIRES: asserts
+// RUN: %clang_cc1 -O0 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s
+
+// This test simply checks that the varargs thunk is created. The failing test
+// case asserts.
+
+struct Alpha {
+  virtual void bravo(...);
+};
+struct Charlie {
+  virtual ~Charlie() {}
+};
+struct CharlieImpl : Charlie, Alpha {
+  void bravo(...) {}
+} delta;
+
+// CHECK: define {{.*}} void @_ZThn{{[48]}}_N11CharlieImpl5bravoEz(
diff --git a/test/CodeGenCXX/tmp-md-nodes2.cpp b/test/CodeGenCXX/tmp-md-nodes2.cpp
new file mode 100644
index 0000000..e50220c
--- /dev/null
+++ b/test/CodeGenCXX/tmp-md-nodes2.cpp
@@ -0,0 +1,33 @@
+// REQUIRES: asserts
+// RUN: %clang_cc1 -O0 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s
+
+// This test simply checks that the varargs thunk is created. The failing test
+// case asserts.
+
+typedef signed char __int8_t;
+typedef int BOOL;
+class CMsgAgent;
+
+class CFs {
+public:
+  typedef enum {} CACHE_HINT;
+  virtual BOOL ReqCacheHint( CMsgAgent* p_ma, CACHE_HINT hint, ... ) ;
+};
+
+typedef struct {} _Lldiv_t;
+
+class CBdVfs {
+public:
+  virtual ~CBdVfs( ) {}
+};
+
+class CBdVfsImpl : public CBdVfs, public CFs {
+  BOOL ReqCacheHint( CMsgAgent* p_ma, CACHE_HINT hint, ... );
+};
+
+BOOL CBdVfsImpl::ReqCacheHint( CMsgAgent* p_ma, CACHE_HINT hint, ... ) {
+  return true;
+}
+
+// CHECK: define {{.*}} @_ZThn{{[48]}}_N10CBdVfsImpl12ReqCacheHintEP9CMsgAgentN3CFs10CACHE_HINTEz(
diff --git a/test/CodeGenCXX/trap-fnattr.cpp b/test/CodeGenCXX/trap-fnattr.cpp
index b73ea43..79fb813 100644
--- a/test/CodeGenCXX/trap-fnattr.cpp
+++ b/test/CodeGenCXX/trap-fnattr.cpp
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -O0 -emit-llvm -ftrapv -ftrap-function=mytrap %s -o - | FileCheck %s -check-prefix=TRAPFUNC
 // RUN: %clang_cc1 -O0 -emit-llvm -ftrapv %s -o - | FileCheck %s -check-prefix=NOOPTION
 
-// TRAPFUNC-LABEL: define void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
+// TRAPFUNC-LABEL: define {{(dso_local )?}}void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
 // TRAPFUNC: call void @llvm.trap() [[ATTR0:#[0-9]+]]
 
-// NOOPTION-LABEL: define void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
+// NOOPTION-LABEL: define {{(dso_local )?}}void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
 // NOOPTION: call void @llvm.trap(){{$}}
 
 void test_builtin(void) {
diff --git a/test/CodeGenCXX/trivial_abi.cpp b/test/CodeGenCXX/trivial_abi.cpp
new file mode 100644
index 0000000..2cf07b2
--- /dev/null
+++ b/test/CodeGenCXX/trivial_abi.cpp
@@ -0,0 +1,239 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: %[[STRUCT_SMALL:.*]] = type { i32* }
+// CHECK: %[[STRUCT_LARGE:.*]] = type { i32*, [128 x i32] }
+// CHECK: %[[STRUCT_TRIVIAL:.*]] = type { i32 }
+// CHECK: %[[STRUCT_NONTRIVIAL:.*]] = type { i32 }
+
+struct __attribute__((trivial_abi)) Small {
+  int *p;
+  Small();
+  ~Small();
+  Small(const Small &) noexcept;
+  Small &operator=(const Small &);
+};
+
+struct __attribute__((trivial_abi)) Large {
+  int *p;
+  int a[128];
+  Large();
+  ~Large();
+  Large(const Large &) noexcept;
+  Large &operator=(const Large &);
+};
+
+struct Trivial {
+  int a;
+};
+
+struct NonTrivial {
+  NonTrivial();
+  ~NonTrivial();
+  int a;
+};
+
+struct HasTrivial {
+  Small s;
+  Trivial m;
+};
+
+struct HasNonTrivial {
+  Small s;
+  NonTrivial m;
+};
+
+// CHECK: define void @_Z14testParamSmall5Small(i64 %[[A_COERCE:.*]])
+// CHECK: %[[A:.*]] = alloca %[[STRUCT_SMALL]], align 8
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[A]], i32 0, i32 0
+// CHECK: %[[COERCE_VAL_IP:.*]] = inttoptr i64 %[[A_COERCE]] to i32*
+// CHECK: store i32* %[[COERCE_VAL_IP]], i32** %[[COERCE_DIVE]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallD1Ev(%[[STRUCT_SMALL]]* %[[A]])
+// CHECK: ret void
+// CHECK: }
+
+void testParamSmall(Small a) noexcept {
+}
+
+// CHECK: define i64 @_Z15testReturnSmallv()
+// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_SMALL:.*]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallC1Ev(%[[STRUCT_SMALL]]* %[[RETVAL]])
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[RETVAL]], i32 0, i32 0
+// CHECK: %[[V0:.*]] = load i32*, i32** %[[COERCE_DIVE]], align 8
+// CHECK: %[[COERCE_VAL_PI:.*]] = ptrtoint i32* %[[V0]] to i64
+// CHECK: ret i64 %[[COERCE_VAL_PI]]
+// CHECK: }
+
+Small testReturnSmall() {
+  Small t;
+  return t;
+}
+
+// CHECK: define void @_Z14testCallSmall0v()
+// CHECK: %[[T:.*]] = alloca %[[STRUCT_SMALL:.*]], align 8
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_SMALL]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallC1Ev(%[[STRUCT_SMALL]]* %[[T]])
+// CHECK: %[[CALL1:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallC1ERKS_(%[[STRUCT_SMALL]]* %[[AGG_TMP]], %[[STRUCT_SMALL]]* dereferenceable(8) %[[T]])
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[AGG_TMP]], i32 0, i32 0
+// CHECK: %[[V0:.*]] = load i32*, i32** %[[COERCE_DIVE]], align 8
+// CHECK: %[[COERCE_VAL_PI:.*]] = ptrtoint i32* %[[V0]] to i64
+// CHECK: call void @_Z14testParamSmall5Small(i64 %[[COERCE_VAL_PI]])
+// CHECK: %[[CALL2:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallD1Ev(%[[STRUCT_SMALL]]* %[[T]])
+// CHECK: ret void
+// CHECK: }
+
+void testCallSmall0() {
+  Small t;
+  testParamSmall(t);
+}
+
+// CHECK: define void @_Z14testCallSmall1v()
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_SMALL:.*]], align 8
+// CHECK: %[[CALL:.*]] = call i64 @_Z15testReturnSmallv()
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[AGG_TMP]], i32 0, i32 0
+// CHECK: %[[COERCE_VAL_IP:.*]] = inttoptr i64 %[[CALL]] to i32*
+// CHECK: store i32* %[[COERCE_VAL_IP]], i32** %[[COERCE_DIVE]], align 8
+// CHECK: %[[COERCE_DIVE1:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[AGG_TMP]], i32 0, i32 0
+// CHECK: %[[V0:.*]] = load i32*, i32** %[[COERCE_DIVE1]], align 8
+// CHECK: %[[COERCE_VAL_PI:.*]] = ptrtoint i32* %[[V0]] to i64
+// CHECK: call void @_Z14testParamSmall5Small(i64 %[[COERCE_VAL_PI]])
+// CHECK: ret void
+// CHECK: }
+
+void testCallSmall1() {
+  testParamSmall(testReturnSmall());
+}
+
+// CHECK: define void @_Z16testIgnoredSmallv()
+// CHECK: %[[AGG_TMP_ENSURED:.*]] = alloca %[[STRUCT_SMALL:.*]], align 8
+// CHECK: %[[CALL:.*]] = call i64 @_Z15testReturnSmallv()
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[AGG_TMP_ENSURED]], i32 0, i32 0
+// CHECK: %[[COERCE_VAL_IP:.*]] = inttoptr i64 %[[CALL]] to i32*
+// CHECK: store i32* %[[COERCE_VAL_IP]], i32** %[[COERCE_DIVE]], align 8
+// CHECK: %[[CALL1:.*]] = call %[[STRUCT_SMALL]]* @_ZN5SmallD1Ev(%[[STRUCT_SMALL]]* %[[AGG_TMP_ENSURED]])
+// CHECK: ret void
+// CHECK: }
+
+void testIgnoredSmall() {
+  testReturnSmall();
+}
+
+// CHECK: define void @_Z14testParamLarge5Large(%[[STRUCT_LARGE:.*]]* %[[A:.*]])
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeD1Ev(%[[STRUCT_LARGE]]* %[[A]])
+// CHECK: ret void
+// CHECK: }
+
+void testParamLarge(Large a) noexcept {
+}
+
+// CHECK: define void @_Z15testReturnLargev(%[[STRUCT_LARGE:.*]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeC1Ev(%[[STRUCT_LARGE]]* %[[AGG_RESULT]])
+// CHECK: ret void
+// CHECK: }
+
+Large testReturnLarge() {
+  Large t;
+  return t;
+}
+
+// CHECK: define void @_Z14testCallLarge0v()
+// CHECK: %[[T:.*]] = alloca %[[STRUCT_LARGE:.*]], align 8
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_LARGE]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeC1Ev(%[[STRUCT_LARGE]]* %[[T]])
+// CHECK: %[[CALL1:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeC1ERKS_(%[[STRUCT_LARGE]]* %[[AGG_TMP]], %[[STRUCT_LARGE]]* dereferenceable(520) %[[T]])
+// CHECK: call void @_Z14testParamLarge5Large(%[[STRUCT_LARGE]]* %[[AGG_TMP]])
+// CHECK: %[[CALL2:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeD1Ev(%[[STRUCT_LARGE]]* %[[T]])
+// CHECK: ret void
+// CHECK: }
+
+void testCallLarge0() {
+  Large t;
+  testParamLarge(t);
+}
+
+// CHECK: define void @_Z14testCallLarge1v()
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_LARGE:.*]], align 8
+// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret %[[AGG_TMP]])
+// CHECK: call void @_Z14testParamLarge5Large(%[[STRUCT_LARGE]]* %[[AGG_TMP]])
+// CHECK: ret void
+// CHECK: }
+
+void testCallLarge1() {
+  testParamLarge(testReturnLarge());
+}
+
+// CHECK: define void @_Z16testIgnoredLargev()
+// CHECK: %[[AGG_TMP_ENSURED:.*]] = alloca %[[STRUCT_LARGE:.*]], align 8
+// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret %[[AGG_TMP_ENSURED]])
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeD1Ev(%[[STRUCT_LARGE]]* %[[AGG_TMP_ENSURED]])
+// CHECK: ret void
+// CHECK: }
+
+void testIgnoredLarge() {
+  testReturnLarge();
+}
+
+// CHECK: define i64 @_Z20testReturnHasTrivialv()
+// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_TRIVIAL:.*]], align 4
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_TRIVIAL]], %[[STRUCT_TRIVIAL]]* %[[RETVAL]], i32 0, i32 0
+// CHECK: %[[V0:.*]] = load i32, i32* %[[COERCE_DIVE]], align 4
+// CHECK: %[[COERCE_VAL_II:.*]] = zext i32 %[[V0]] to i64
+// CHECK: ret i64 %[[COERCE_VAL_II]]
+// CHECK: }
+
+Trivial testReturnHasTrivial() {
+  Trivial t;
+  return t;
+}
+
+// CHECK: define void @_Z23testReturnHasNonTrivialv(%[[STRUCT_NONTRIVIAL:.*]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_NONTRIVIAL]]* @_ZN10NonTrivialC1Ev(%[[STRUCT_NONTRIVIAL]]* %[[AGG_RESULT]])
+// CHECK: ret void
+// CHECK: }
+
+NonTrivial testReturnHasNonTrivial() {
+  NonTrivial t;
+  return t;
+}
+
+// CHECK: define void @_Z18testExceptionSmallv()
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_SMALL]], align 8
+// CHECK: %[[AGG_TMP1:.*]] = alloca %[[STRUCT_SMALL]], align 8
+// CHECK: call %[[STRUCT_SMALL]]* @_ZN5SmallC1Ev(%[[STRUCT_SMALL]]* %[[AGG_TMP]])
+// CHECK: invoke %[[STRUCT_SMALL]]* @_ZN5SmallC1Ev(%[[STRUCT_SMALL]]* %[[AGG_TMP1]])
+
+// CHECK: call void @_Z20calleeExceptionSmall5SmallS_(i64 %{{.*}}, i64 %{{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: landingpad { i8*, i32 }
+// CHECK: call %[[STRUCT_SMALL]]* @_ZN5SmallD1Ev(%[[STRUCT_SMALL]]* %[[AGG_TMP]])
+// CHECK: br
+
+// CHECK: resume { i8*, i32 }
+
+void calleeExceptionSmall(Small, Small);
+
+void testExceptionSmall() {
+  calleeExceptionSmall(Small(), Small());
+}
+
+// CHECK: define void @_Z18testExceptionLargev()
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_LARGE]], align 8
+// CHECK: %[[AGG_TMP1:.*]] = alloca %[[STRUCT_LARGE]], align 8
+// CHECK: call %[[STRUCT_LARGE]]* @_ZN5LargeC1Ev(%[[STRUCT_LARGE]]* %[[AGG_TMP]])
+// CHECK: invoke %[[STRUCT_LARGE]]* @_ZN5LargeC1Ev(%[[STRUCT_LARGE]]* %[[AGG_TMP1]])
+
+// CHECK: call void @_Z20calleeExceptionLarge5LargeS_(%[[STRUCT_LARGE]]* %[[AGG_TMP]], %[[STRUCT_LARGE]]* %[[AGG_TMP1]])
+// CHECK-NEXT: ret void
+
+// CHECK: landingpad { i8*, i32 }
+// CHECK: call %[[STRUCT_LARGE]]* @_ZN5LargeD1Ev(%[[STRUCT_LARGE]]* %[[AGG_TMP]])
+// CHECK: br
+
+// CHECK: resume { i8*, i32 }
+
+void calleeExceptionLarge(Large, Large);
+
+void testExceptionLarge() {
+  calleeExceptionLarge(Large(), Large());
+}
diff --git a/test/CodeGenCXX/type-metadata.cpp b/test/CodeGenCXX/type-metadata.cpp
index 6821e36..172636c 100644
--- a/test/CodeGenCXX/type-metadata.cpp
+++ b/test/CodeGenCXX/type-metadata.cpp
@@ -104,7 +104,7 @@
 }
 
 // ITANIUM: define hidden void @_Z2afP1A
-// MS: define void @"\01?af@@YAXPEAUA@@@Z"
+// MS: define dso_local void @"\01?af@@YAXPEAUA@@@Z"
 void af(A *a) {
   // TT-ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^ ]*]], metadata !"?AUA@@")
@@ -214,7 +214,7 @@
 };
 
 // ITANIUM: define hidden void @_ZN5test21fEPNS_1DE
-// MS: define void @"\01?f@test2@@YAXPEAUD@1@@Z"
+// MS: define dso_local void @"\01?f@test2@@YAXPEAUD@1@@Z"
 void f(D *d) {
   // TT-ITANIUM: {{%[^ ]*}} = call i1 @llvm.type.test(i8* {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(i8* {{%[^ ]*}}, metadata !"?AUA@test2@@")
diff --git a/test/CodeGenCXX/ubsan-devirtualized-calls.cpp b/test/CodeGenCXX/ubsan-devirtualized-calls.cpp
index f4ccdbf..1510a4a 100644
--- a/test/CodeGenCXX/ubsan-devirtualized-calls.cpp
+++ b/test/CodeGenCXX/ubsan-devirtualized-calls.cpp
@@ -32,7 +32,7 @@
 // CHECK: [[UBSAN_TI_DERIVED4_1:@[0-9]+]] = private unnamed_addr global {{.*}} i8* bitcast {{.*}} @_ZTI8Derived4 to i8*
 // CHECK: [[UBSAN_TI_DERIVED4_2:@[0-9]+]] = private unnamed_addr global {{.*}} i8* bitcast {{.*}} @_ZTI8Derived4 to i8*
 
-// CHECK-LABEL: define void @_Z2t1v
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z2t1v
 void t1() {
   Derived1 d1;
   static_cast<Base1 *>(&d1)->f1(); //< Devirt Base1::f1 to Derived1::f1.
@@ -40,7 +40,7 @@
   // CHECK-NEXT: call void @__ubsan_handle_dynamic_type_cache{{[_a-z]*}}({{.*}} [[UBSAN_TI_DERIVED1_1]] {{.*}}, i{{[0-9]+}} %[[D1]]
 }
 
-// CHECK-LABEL: define void @_Z2t2v
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z2t2v
 void t2() {
   Derived2 d2;
   static_cast<Base1 *>(&d2)->f1(); //< Devirt Base1::f1 to Derived2::f1.
@@ -48,7 +48,7 @@
   // CHECK-NEXT: call void @__ubsan_handle_dynamic_type_cache{{[_a-z]*}}({{.*}} [[UBSAN_TI_DERIVED2_1]] {{.*}}, i{{[0-9]+}} %[[D2_1]]
 }
 
-// CHECK-LABEL: define void @_Z2t3v
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z2t3v
 void t3() {
   Derived2 d2;
   static_cast<Base2 *>(&d2)->f1(); //< Devirt Base2::f1 to Derived2::f1.
@@ -56,7 +56,7 @@
   // CHECK-NEXT: call void @__ubsan_handle_dynamic_type_cache{{[_a-z]*}}({{.*}} [[UBSAN_TI_DERIVED2_2]] {{.*}}, i{{[0-9]+}} %[[D2_2]]
 }
 
-// CHECK-LABEL: define void @_Z2t4v
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z2t4v
 void t4() {
   Base1 p;
   Derived3 *badp = static_cast<Derived3 *>(&p); //< Check that &p isa Derived3.
@@ -73,7 +73,7 @@
   // CHECK-NEXT: call void @__ubsan_handle_dynamic_type_cache{{[_a-z]*}}({{.*}} [[UBSAN_TI_BASE1]] {{.*}}, i{{[0-9]+}} %[[BADP1]]
 }
 
-// CHECK-LABEL: define void @_Z2t5v
+// CHECK-LABEL: define {{(dso_local )?}}void @_Z2t5v
 void t5() {
   Base1 p;
   Derived4 *badp = static_cast<Derived4 *>(&p); //< Check that &p isa Derived4.
diff --git a/test/CodeGenCXX/ubsan-function-noexcept.cpp b/test/CodeGenCXX/ubsan-function-noexcept.cpp
new file mode 100644
index 0000000..45c2764
--- /dev/null
+++ b/test/CodeGenCXX/ubsan-function-noexcept.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -std=c++17 -fsanitize=function -emit-llvm -triple x86_64-linux-gnu %s -o - | FileCheck %s
+
+// Check that typeinfo recorded in function prolog doesn't have "Do" noexcept
+// qualifier in its mangled name.
+// CHECK: @[[RTTI:[0-9]+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+// CHECK: define void @_Z1fv() #{{.*}} prologue <{ i32, i32 }> <{ i32 {{.*}}, i32 trunc (i64 sub (i64 ptrtoint (i8** @[[RTTI]] to i64), i64 ptrtoint (void ()* @_Z1fv to i64)) to i32) }>
+void f() noexcept {}
+
+// CHECK: define void @_Z1gPDoFvvE
+void g(void (*p)() noexcept) {
+  // Check that reference typeinfo at call site doesn't have "Do" noexcept
+  // qualifier in its mangled name, either.
+  // CHECK: icmp eq i8* %{{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize
+  p();
+}
diff --git a/test/CodeGenCXX/ubsan-unreachable.cpp b/test/CodeGenCXX/ubsan-unreachable.cpp
new file mode 100644
index 0000000..32a7804
--- /dev/null
+++ b/test/CodeGenCXX/ubsan-unreachable.cpp
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s -fsanitize=unreachable | FileCheck %s
+
+extern void __attribute__((noreturn)) abort();
+
+// CHECK-LABEL: define void @_Z14calls_noreturnv
+void calls_noreturn() {
+  abort();
+
+  // Check that there are no attributes on the call site.
+  // CHECK-NOT: call void @_Z5abortv{{.*}}#
+
+  // CHECK: __ubsan_handle_builtin_unreachable
+  // CHECK: unreachable
+}
+
+struct A {
+  // CHECK: declare void @_Z5abortv{{.*}} [[ABORT_ATTR:#[0-9]+]]
+
+  // CHECK-LABEL: define linkonce_odr void @_ZN1A5call1Ev
+  void call1() {
+    // CHECK-NOT: call void @_ZN1A16does_not_return2Ev{{.*}}#
+    does_not_return2();
+
+    // CHECK: __ubsan_handle_builtin_unreachable
+    // CHECK: unreachable
+  }
+
+  // Test static members.
+  static void __attribute__((noreturn)) does_not_return1() {
+    // CHECK-NOT: call void @_Z5abortv{{.*}}#
+    abort();
+  }
+
+  // CHECK-LABEL: define linkonce_odr void @_ZN1A5call2Ev
+  void call2() {
+    // CHECK-NOT: call void @_ZN1A16does_not_return1Ev{{.*}}#
+    does_not_return1();
+
+    // CHECK: __ubsan_handle_builtin_unreachable
+    // CHECK: unreachable
+  }
+
+  // Test calls through pointers to non-static member functions.
+  typedef void __attribute__((noreturn)) (A::*MemFn)();
+
+  // CHECK-LABEL: define linkonce_odr void @_ZN1A5call3Ev
+  void call3() {
+    MemFn MF = &A::does_not_return2;
+    (this->*MF)();
+
+    // CHECK-NOT: call void %{{.*}}#
+    // CHECK: __ubsan_handle_builtin_unreachable
+    // CHECK: unreachable
+  }
+
+  // Test regular members.
+  // CHECK-LABEL: define linkonce_odr void @_ZN1A16does_not_return2Ev({{.*}})
+  // CHECK-SAME: [[DOES_NOT_RETURN_ATTR:#[0-9]+]]
+  void __attribute__((noreturn)) does_not_return2() {
+    // CHECK-NOT: call void @_Z5abortv(){{.*}}#
+    abort();
+
+    // CHECK: call void @__ubsan_handle_builtin_unreachable
+    // CHECK: unreachable
+
+    // CHECK: call void @__ubsan_handle_builtin_unreachable
+    // CHECK: unreachable
+  }
+};
+
+// CHECK: define linkonce_odr void @_ZN1A16does_not_return1Ev() [[DOES_NOT_RETURN_ATTR]]
+
+void force_irgen() {
+  A a;
+  a.call1();
+  a.call2();
+  a.call3();
+}
+
+// CHECK-NOT: [[ABORT_ATTR]] = {{[^}]+}}noreturn
+// CHECK-NOT: [[DOES_NOT_RETURN_ATTR]] = {{[^}]+}}noreturn
diff --git a/test/CodeGenCXX/ubsan-vtable-checks.cpp b/test/CodeGenCXX/ubsan-vtable-checks.cpp
index 5e17913..494455b 100644
--- a/test/CodeGenCXX/ubsan-vtable-checks.cpp
+++ b/test/CodeGenCXX/ubsan-vtable-checks.cpp
@@ -15,7 +15,7 @@
 U::~U() {}
 
 // ITANIUM: define i32 @_Z5get_vP1T
-// MSABI: define i32 @"\01?get_v
+// MSABI: define dso_local i32 @"\01?get_v
 int get_v(T* t) {
   // First, we check that vtable is not loaded before a type check.
   // CHECK-NULL-NOT: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})***
@@ -28,7 +28,7 @@
 }
 
 // ITANIUM: define void @_Z9delete_itP1T
-// MSABI: define void @"\01?delete_it
+// MSABI: define dso_local void @"\01?delete_it
 void delete_it(T *t) {
   // First, we check that vtable is not loaded before a type check.
   // CHECK-VPTR-NOT: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})***
@@ -38,3 +38,15 @@
   // CHECK-VPTR: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})***
   delete t;
 }
+
+// ITANIUM: define %struct.U* @_Z7dyncastP1T
+// MSABI: define dso_local %struct.U* @"\01?dyncast
+U* dyncast(T *t) {
+  // First, we check that dynamic_cast is not called before a type check.
+  // CHECK-VPTR-NOT: call i8* @__{{dynamic_cast|RTDynamicCast}}
+  // CHECK-VPTR: br i1 {{.*}} label %{{.*}}
+  // CHECK-VPTR: call void @__ubsan_handle_dynamic_type_cache_miss_abort
+  // Second, we check that dynamic_cast is actually called once the type check is done.
+  // CHECK-VPTR: call i8* @__{{dynamic_cast|RTDynamicCast}}
+  return dynamic_cast<U*>(t);
+}
diff --git a/test/CodeGenCXX/unaligned-member-qualifier.cpp b/test/CodeGenCXX/unaligned-member-qualifier.cpp
index 0d326a6..57cfbd9 100644
--- a/test/CodeGenCXX/unaligned-member-qualifier.cpp
+++ b/test/CodeGenCXX/unaligned-member-qualifier.cpp
@@ -8,13 +8,13 @@
 };
 
 void A::foo() __unaligned {}
-// CHECK: define [[THISCALL:(x86_thiscallcc )?]]void @_ZNU11__unaligned1A3fooEv(
+// CHECK: define {{(dso_local )?}}[[THISCALL:(x86_thiscallcc )?]]void @_ZNU11__unaligned1A3fooEv(
 
 void A::foo() const __unaligned {}
-// CHECK: define [[THISCALL]]void @_ZNU11__unalignedK1A3fooEv(
+// CHECK: define {{(dso_local )?}}[[THISCALL]]void @_ZNU11__unalignedK1A3fooEv(
 
 void A::foo() volatile __unaligned {}
-// CHECK: define [[THISCALL]]void @_ZNU11__unalignedV1A3fooEv(
+// CHECK: define {{(dso_local )?}}[[THISCALL]]void @_ZNU11__unalignedV1A3fooEv(
 
 void A::foo() const volatile __unaligned {}
-// CHECK: define [[THISCALL]]void @_ZNU11__unalignedVK1A3fooEv(
+// CHECK: define {{(dso_local )?}}[[THISCALL]]void @_ZNU11__unalignedVK1A3fooEv(
diff --git a/test/CodeGenCXX/uncopyable-args.cpp b/test/CodeGenCXX/uncopyable-args.cpp
index 66d67e4..4c0241d 100644
--- a/test/CodeGenCXX/uncopyable-args.cpp
+++ b/test/CodeGenCXX/uncopyable-args.cpp
@@ -19,7 +19,7 @@
 // CHECK: call void @_ZN7trivial3fooENS_1AE(i8* %{{.*}})
 // CHECK-LABEL: declare void @_ZN7trivial3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@trivial@@YAXUA@1@@Z"(i64)
+// WIN64-LABEL: declare dso_local void @"\01?foo@trivial@@YAXUA@1@@Z"(i64)
 }
 
 namespace default_ctor {
@@ -40,7 +40,7 @@
 // CHECK: call void @_ZN12default_ctor3fooENS_1AE(i8* %{{.*}})
 // CHECK-LABEL: declare void @_ZN12default_ctor3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@default_ctor@@YAXUA@1@@Z"(i64)
+// WIN64-LABEL: declare dso_local void @"\01?foo@default_ctor@@YAXUA@1@@Z"(i64)
 }
 
 namespace move_ctor {
@@ -63,7 +63,7 @@
 // NEWABI-LABEL: declare void @_ZN9move_ctor3fooENS_1AE(%"struct.move_ctor::A"*)
 // OLDABI-LABEL: declare void @_ZN9move_ctor3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@move_ctor@@YAXUA@1@@Z"(%"struct.move_ctor::A"*)
+// WIN64-LABEL: declare dso_local void @"\01?foo@move_ctor@@YAXUA@1@@Z"(%"struct.move_ctor::A"*)
 }
 
 namespace all_deleted {
@@ -85,7 +85,7 @@
 // NEWABI-LABEL: declare void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"*)
 // OLDABI-LABEL: declare void @_ZN11all_deleted3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@all_deleted@@YAXUA@1@@Z"(%"struct.all_deleted::A"*)
+// WIN64-LABEL: declare dso_local void @"\01?foo@all_deleted@@YAXUA@1@@Z"(%"struct.all_deleted::A"*)
 }
 
 namespace implicitly_deleted {
@@ -107,8 +107,8 @@
 // OLDABI-LABEL: declare void @_ZN18implicitly_deleted3fooENS_1AE(i8*)
 
 // In MSVC 2013, the copy ctor is not deleted by a move assignment. In MSVC 2015, it is.
-// WIN64-18-LABEL: declare void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(i64
-// WIN64-19-LABEL: declare void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(%"struct.implicitly_deleted::A"*)
+// WIN64-18-LABEL: declare dso_local void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(i64
+// WIN64-19-LABEL: declare dso_local void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(%"struct.implicitly_deleted::A"*)
 }
 
 namespace one_deleted {
@@ -129,7 +129,7 @@
 // NEWABI-LABEL: declare void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"*)
 // OLDABI-LABEL: declare void @_ZN11one_deleted3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@one_deleted@@YAXUA@1@@Z"(%"struct.one_deleted::A"*)
+// WIN64-LABEL: declare dso_local void @"\01?foo@one_deleted@@YAXUA@1@@Z"(%"struct.one_deleted::A"*)
 }
 
 namespace copy_defaulted {
@@ -149,7 +149,7 @@
 // CHECK: call void @_ZN14copy_defaulted3fooENS_1AE(i8* %{{.*}})
 // CHECK-LABEL: declare void @_ZN14copy_defaulted3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@copy_defaulted@@YAXUA@1@@Z"(i64)
+// WIN64-LABEL: declare dso_local void @"\01?foo@copy_defaulted@@YAXUA@1@@Z"(i64)
 }
 
 namespace move_defaulted {
@@ -169,7 +169,7 @@
 // CHECK: call void @_ZN14move_defaulted3fooENS_1AE(i8* %{{.*}})
 // CHECK-LABEL: declare void @_ZN14move_defaulted3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@move_defaulted@@YAXUA@1@@Z"(%"struct.move_defaulted::A"*)
+// WIN64-LABEL: declare dso_local void @"\01?foo@move_defaulted@@YAXUA@1@@Z"(%"struct.move_defaulted::A"*)
 }
 
 namespace trivial_defaulted {
@@ -188,7 +188,7 @@
 // CHECK: call void @_ZN17trivial_defaulted3fooENS_1AE(i8* %{{.*}})
 // CHECK-LABEL: declare void @_ZN17trivial_defaulted3fooENS_1AE(i8*)
 
-// WIN64-LABEL: declare void @"\01?foo@trivial_defaulted@@YAXUA@1@@Z"(i64)
+// WIN64-LABEL: declare dso_local void @"\01?foo@trivial_defaulted@@YAXUA@1@@Z"(i64)
 }
 
 namespace two_copy_ctors {
@@ -211,7 +211,7 @@
 // NEWABI-LABEL: declare void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"*)
 // OLDABI-LABEL: declare void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"* byval
 
-// WIN64-LABEL: declare void @"\01?foo@two_copy_ctors@@YAXUB@1@@Z"(%"struct.two_copy_ctors::B"*)
+// WIN64-LABEL: declare dso_local void @"\01?foo@two_copy_ctors@@YAXUB@1@@Z"(%"struct.two_copy_ctors::B"*)
 }
 
 namespace definition_only {
@@ -223,7 +223,7 @@
 void *foo(A a) { return a.p; }
 // NEWABI-LABEL: define i8* @_ZN15definition_only3fooENS_1AE(%"struct.definition_only::A"*
 // OLDABI-LABEL: define i8* @_ZN15definition_only3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@definition_only@@YAPEAXUA@1@@Z"(%"struct.definition_only::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@definition_only@@YAPEAXUA@1@@Z"(%"struct.definition_only::A"*
 }
 
 namespace deleted_by_member {
@@ -239,7 +239,7 @@
 void *foo(A a) { return a.b.p; }
 // NEWABI-LABEL: define i8* @_ZN17deleted_by_member3fooENS_1AE(%"struct.deleted_by_member::A"*
 // OLDABI-LABEL: define i8* @_ZN17deleted_by_member3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@deleted_by_member@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@deleted_by_member@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member::A"*
 }
 
 namespace deleted_by_base {
@@ -254,7 +254,7 @@
 void *foo(A a) { return a.p; }
 // NEWABI-LABEL: define i8* @_ZN15deleted_by_base3fooENS_1AE(%"struct.deleted_by_base::A"*
 // OLDABI-LABEL: define i8* @_ZN15deleted_by_base3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@deleted_by_base@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@deleted_by_base@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base::A"*
 }
 
 namespace deleted_by_member_copy {
@@ -270,7 +270,7 @@
 void *foo(A a) { return a.b.p; }
 // NEWABI-LABEL: define i8* @_ZN22deleted_by_member_copy3fooENS_1AE(%"struct.deleted_by_member_copy::A"*
 // OLDABI-LABEL: define i8* @_ZN22deleted_by_member_copy3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@deleted_by_member_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member_copy::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@deleted_by_member_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member_copy::A"*
 }
 
 namespace deleted_by_base_copy {
@@ -285,7 +285,7 @@
 void *foo(A a) { return a.p; }
 // NEWABI-LABEL: define i8* @_ZN20deleted_by_base_copy3fooENS_1AE(%"struct.deleted_by_base_copy::A"*
 // OLDABI-LABEL: define i8* @_ZN20deleted_by_base_copy3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@deleted_by_base_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base_copy::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@deleted_by_base_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base_copy::A"*
 }
 
 namespace explicit_delete {
@@ -296,7 +296,7 @@
 };
 // NEWABI-LABEL: define i8* @_ZN15explicit_delete3fooENS_1AE(%"struct.explicit_delete::A"*
 // OLDABI-LABEL: define i8* @_ZN15explicit_delete3fooENS_1AE(i8*
-// WIN64-LABEL: define i8* @"\01?foo@explicit_delete@@YAPEAXUA@1@@Z"(%"struct.explicit_delete::A"*
+// WIN64-LABEL: define dso_local i8* @"\01?foo@explicit_delete@@YAPEAXUA@1@@Z"(%"struct.explicit_delete::A"*
 void *foo(A a) { return a.p; }
 }
 
diff --git a/test/CodeGenCXX/value-init.cpp b/test/CodeGenCXX/value-init.cpp
index fc4e0d3..348cbf7 100644
--- a/test/CodeGenCXX/value-init.cpp
+++ b/test/CodeGenCXX/value-init.cpp
@@ -244,7 +244,7 @@
   struct C : B { C(); };      
   C::C() : A(3), B() {}
   // CHECK-LABEL: define void @_ZN7PR111241CC1Ev
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 12, i32 8, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 12, i1 false)
   // CHECK-NEXT: call void @_ZN7PR111241BC2Ev
   // Make sure C::C doesn't overwrite parts of A while it is zero-initializing B
 
@@ -252,7 +252,7 @@
   struct C2 : B2 { C2(); };      
   C2::C2() : A(3), B2() {}
   // CHECK-LABEL: define void @_ZN7PR111242C2C1Ev
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* {{.*}}, i64 16, i32 8, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 {{.*}}, i64 16, i1 false)
   // CHECK-NEXT: call void @_ZN7PR111242B2C2Ev
 }
 
diff --git a/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp b/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
index 64b1c52..f63f9bf 100644
--- a/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
+++ b/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
@@ -10,7 +10,7 @@
 void vararg(...);
 
 void test(X x) {
-  // CHECK-LABEL: define void @"\01?test@@YAXUX@@@Z"
+  // CHECK-LABEL: define dso_local void @"\01?test@@YAXUX@@@Z"
 
   // X86: %[[argmem:[^ ]*]] = alloca inalloca <{ %struct.X }>
   // X86: call void (<{ %struct.X }>*, ...) bitcast (void (...)* @"\01?vararg@@YAXZZ" to void (<{ %struct.X }>*, ...)*)(<{ %struct.X }>* inalloca %[[argmem]])
diff --git a/test/CodeGenCXX/varargs.cpp b/test/CodeGenCXX/varargs.cpp
index e016599..1f82d60 100644
--- a/test/CodeGenCXX/varargs.cpp
+++ b/test/CodeGenCXX/varargs.cpp
@@ -35,7 +35,7 @@
   // CHECK-NEXT: [[TMP:%.*]] = alloca [[A]], align 4
   // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[X]] to i8*
-  // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 8, i32 4, i1 false)
+  // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T0]], i8* align 4 [[T1]], i64 8, i1 false)
   // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i64*
   // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 4
   // CHECK-NEXT: call void (...) @_ZN5test13fooEz(i64 [[T1]])
diff --git a/test/CodeGenCXX/virt-template-vtable.cpp b/test/CodeGenCXX/virt-template-vtable.cpp
index a71db48..66d2332 100644
--- a/test/CodeGenCXX/virt-template-vtable.cpp
+++ b/test/CodeGenCXX/virt-template-vtable.cpp
@@ -16,10 +16,10 @@
 template class A<short>;
 
 
-// CHECK: @_ZTV1B = linkonce_odr unnamed_addr constant
-// CHECK: @_ZTV1AIlE = weak_odr unnamed_addr constant
-// CHECK: @_ZTV1AIsE = weak_odr unnamed_addr constant
-// CHECK: @_ZTV1AIiE = linkonce_odr unnamed_addr constant
+// CHECK: @_ZTV1B = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK: @_ZTV1AIlE = weak_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK: @_ZTV1AIsE = weak_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK: @_ZTV1AIiE = linkonce_odr {{(dso_local )?}}unnamed_addr constant
 
 template<class T> struct C {
   virtual void c() {}
diff --git a/test/CodeGenCXX/virtual-base-ctor.cpp b/test/CodeGenCXX/virtual-base-ctor.cpp
index 8c8c310..e32296f 100644
--- a/test/CodeGenCXX/virtual-base-ctor.cpp
+++ b/test/CodeGenCXX/virtual-base-ctor.cpp
@@ -8,4 +8,4 @@
 struct B : virtual A { void* x; };    
 B x;
 
-// CHECK: @y = local_unnamed_addr global i8 2
+// CHECK: @y = {{(dso_local )?}}local_unnamed_addr global i8 2
diff --git a/test/CodeGenCXX/virtual-bases.cpp b/test/CodeGenCXX/virtual-bases.cpp
index e9c568c..18d4bf0 100644
--- a/test/CodeGenCXX/virtual-bases.cpp
+++ b/test/CodeGenCXX/virtual-bases.cpp
@@ -46,3 +46,37 @@
 D::D() { }
 
 }
+
+namespace virtualBaseAlignment {
+
+// Check that the store to B::x in the base constructor has an 8-byte alignment.
+
+// CHECK: define linkonce_odr void @_ZN20virtualBaseAlignment1BC1Ev(%[[STRUCT_B:.*]]* %[[THIS:.*]])
+// CHECK: %[[THIS_ADDR:.*]] = alloca %[[STRUCT_B]]*, align 8
+// CHECK: store %[[STRUCT_B]]* %[[THIS]], %[[STRUCT_B]]** %[[THIS_ADDR]], align 8
+// CHECK: %[[THIS1:.*]] = load %[[STRUCT_B]]*, %[[STRUCT_B]]** %[[THIS_ADDR]], align 8
+// CHECK: %[[X:.*]] = getelementptr inbounds %[[STRUCT_B]], %[[STRUCT_B]]* %[[THIS1]], i32 0, i32 2
+// CHECK: store i32 123, i32* %[[X]], align 16
+
+// CHECK: define linkonce_odr void @_ZN20virtualBaseAlignment1BC2Ev(%[[STRUCT_B]]* %[[THIS:.*]], i8** %{{.*}})
+// CHECK: %[[THIS_ADDR:.*]] = alloca %[[STRUCT_B]]*, align 8
+// CHECK: store %[[STRUCT_B]]* %[[THIS]], %[[STRUCT_B]]** %[[THIS_ADDR]], align 8
+// CHECK: %[[THIS1:.*]] = load %[[STRUCT_B]]*, %[[STRUCT_B]]** %[[THIS_ADDR]], align 8
+// CHECK: %[[X:.*]] = getelementptr inbounds %[[STRUCT_B]], %[[STRUCT_B]]* %[[THIS1]], i32 0, i32 2
+// CHECK: store i32 123, i32* %[[X]], align 8
+
+struct A {
+  __attribute__((aligned(16))) double data1;
+};
+
+struct B : public virtual A {
+  B() : x(123) {}
+  double a;
+  int x;
+};
+
+struct C : public virtual B {};
+
+void test() { B b; C c; }
+
+}
diff --git a/test/CodeGenCXX/virtual-function-attrs.cpp b/test/CodeGenCXX/virtual-function-attrs.cpp
index 3a9a1a2..873412d 100644
--- a/test/CodeGenCXX/virtual-function-attrs.cpp
+++ b/test/CodeGenCXX/virtual-function-attrs.cpp
@@ -8,7 +8,7 @@
 
 void A::f() {}
 
-// CHECK: define [[CC:(x86_thiscallcc )?]]void @_ZN1A1fEv({{.*}}) unnamed_addr
-// CHECK: declare [[CC]]void @_ZN1A1gEv({{.*}}) unnamed_addr
+// CHECK: define {{(dso_local )?}}[[CC:(x86_thiscallcc )?]]void @_ZN1A1fEv({{.*}}) unnamed_addr
+// CHECK: declare {{(dso_local )?}}[[CC]]void @_ZN1A1gEv({{.*}}) unnamed_addr
 // CHECK: declare {{.*}} @_ZN1AD1Ev({{.*}}) unnamed_addr
-// CHECK: declare [[CC]]void @_ZN1AD0Ev({{.*}}) unnamed_addr
+// CHECK: declare {{(dso_local )?}}[[CC]]void @_ZN1AD0Ev({{.*}}) unnamed_addr
diff --git a/test/CodeGenCXX/virtual-function-calls.cpp b/test/CodeGenCXX/virtual-function-calls.cpp
index 22944d9..61b2fba 100644
--- a/test/CodeGenCXX/virtual-function-calls.cpp
+++ b/test/CodeGenCXX/virtual-function-calls.cpp
@@ -44,7 +44,7 @@
   };
 
   // CHECK-LABEL: @_ZN15VirtualNoreturn1f
-  // CHECK-INVARIANT-LABEL: define void @_ZN15VirtualNoreturn1f
+  // CHECK-INVARIANT-LABEL: define {{(dso_local )?}}void @_ZN15VirtualNoreturn1f
   void f(A *p) {
     p->f();
     // CHECK: call {{.*}}void %{{[^#]*$}}
diff --git a/test/CodeGenCXX/vla-consruct.cpp b/test/CodeGenCXX/vla-consruct.cpp
index fd8314a..87191fe 100644
--- a/test/CodeGenCXX/vla-consruct.cpp
+++ b/test/CodeGenCXX/vla-consruct.cpp
@@ -21,6 +21,8 @@
   // CHECK: define void {{.*test.*}}(i32 [[n:%.+]]) #
   // CHECK: [[n_addr:%.+]] = alloca
   // CHECK-NEXT: [[saved_stack:%.+]] = alloca
+  // CHECK-NEXT: [[vla_expr:%.+]] = alloca i64, align 8
+  // CHECK-NEXT: [[vla_expr1:%.+]] = alloca i64, align 8
   // CHECK-NEXT: [[sizeof_S:%.+]] = alloca
   // CHECK-NEXT: [[sizeof_array_t_0_0:%.+]] = alloca
   // CHECK-NEXT: [[sizeof_array_t_0:%.+]] = alloca
@@ -37,6 +39,8 @@
   // CHECK-NEXT: store i8* [[t4]], i8** [[saved_stack]]
   // CHECK-NEXT: [[t5:%.+]] = mul nuw i64 [[t1]], [[t3]]
   // CHECK-NEXT: [[vla:%.+]] = alloca [[struct_S]], i64 [[t5]]
+  // CHECK-NEXT: store i64 [[t1]], i64* [[vla_expr]]
+  // CHECK-NEXT: store i64 [[t3]], i64* [[vla_expr1]]
   // CHECK-NEXT: [[t6:%.+]] = mul nuw i64 [[t1]], [[t3]]
   // CHECK-NEXT: [[isempty:%.+]] = icmp eq i64 [[t6]], 0
   // CHECK-NEXT: br i1 [[isempty]], label %[[arrayctor_cont:.+]], label %[[new_ctorloop:.+]]
diff --git a/test/CodeGenCXX/vla.cpp b/test/CodeGenCXX/vla.cpp
index 957a9f9..81ff62d 100644
--- a/test/CodeGenCXX/vla.cpp
+++ b/test/CodeGenCXX/vla.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck -check-prefixes=X64,CHECK %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn %s -emit-llvm -o - | FileCheck -check-prefixes=AMDGCN,CHECK %s
 
 template<typename T>
 struct S {
@@ -9,7 +10,7 @@
 int f() {
   // Make sure that the reference here is enough to trigger the instantiation of
   // the static data member.
-  // CHECK: @_ZN1SIiE1nE = linkonce_odr global i32 5
+  // CHECK: @_ZN1SIiE1nE = linkonce_odr{{.*}} global i32 5
   int a[S<int>::n];
   return sizeof a;
 }
@@ -17,10 +18,18 @@
 // rdar://problem/9506377
 void test0(void *array, int n) {
   // CHECK-LABEL: define void @_Z5test0Pvi(
-  // CHECK:      [[ARRAY:%.*]] = alloca i8*, align 8
-  // CHECK-NEXT: [[N:%.*]] = alloca i32, align 4
-  // CHECK-NEXT: [[REF:%.*]] = alloca i16*, align 8
-  // CHECK-NEXT: [[S:%.*]] = alloca i16, align 2
+  // X64:        [[ARRAY:%.*]] = alloca i8*, align 8
+  // AMDGCN:        [[ARRAY0:%.*]] = alloca i8*, align 8, addrspace(5)
+  // AMDGCN-NEXT:   [[ARRAY:%.*]] = addrspacecast i8* addrspace(5)* [[ARRAY0]] to i8**
+  // X64-NEXT:   [[N:%.*]] = alloca i32, align 4
+  // AMDGCN:        [[N0:%.*]] = alloca i32, align 4, addrspace(5)
+  // AMDGCN-NEXT:   [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32*
+  // X64-NEXT:   [[REF:%.*]] = alloca i16*, align 8
+  // AMDGCN:        [[REF0:%.*]] = alloca i16*, align 8, addrspace(5)
+  // AMDGCN-NEXT:   [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16**
+  // X64-NEXT:   [[S:%.*]] = alloca i16, align 2
+  // AMDGCN:        [[S0:%.*]] = alloca i16, align 2, addrspace(5)
+  // AMDGCN-NEXT:   [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16*
   // CHECK-NEXT: store i8* 
   // CHECK-NEXT: store i32
 
@@ -59,6 +68,8 @@
 void test2(int b) {
   // CHECK-LABEL: define void {{.*}}test2{{.*}}(i32 %b)
   int varr[b];
+  // AMDGCN: %__end1 = alloca i32*, align 8, addrspace(5)
+  // AMDGCN: [[END:%.*]] = addrspacecast i32* addrspace(5)* %__end1 to i32**
   // get the address of %b by checking the first store that stores it 
   //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
 
@@ -75,13 +86,16 @@
   //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]]
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4
   //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
-  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //X64-NEXT: store i32* [[VLA_END_PTR]], i32** %__end1
+  //AMDGCN-NEXT: store i32* [[VLA_END_PTR]], i32** [[END]]
   for (int d : varr) 0;
 }
 
 void test3(int b, int c) {
   // CHECK-LABEL: define void {{.*}}test3{{.*}}(i32 %b, i32 %c)
   int varr[b][c];
+  // AMDGCN: %__end1 = alloca i32*, align 8, addrspace(5)
+  // AMDGCN: [[END:%.*]] = addrspacecast i32* addrspace(5)* %__end1 to i32**
   // get the address of %b by checking the first store that stores it 
   //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
   //CHECK-NEXT: store i32 %c, i32* [[PTR_C:%.*]]
@@ -105,7 +119,8 @@
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]]
   //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]]
   //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_END_INDEX]]
-  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //X64-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //AMDGCN-NEXT: store i32* [[VLA_END_PTR]], i32** [[END]]
  
   for (auto &d : varr) 0;
 }
diff --git a/test/CodeGenCXX/volatile-1.cpp b/test/CodeGenCXX/volatile-1.cpp
index 75fb0d2..525e828 100644
--- a/test/CodeGenCXX/volatile-1.cpp
+++ b/test/CodeGenCXX/volatile-1.cpp
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 -Wno-unused-value -triple %itanium_abi_triple -emit-llvm %s -std=c++98 -o - | FileCheck %s
 // RUN: %clang_cc1 -Wno-unused-value -triple %itanium_abi_triple -emit-llvm %s -std=c++11 -o - | FileCheck -check-prefix=CHECK -check-prefix=CHECK11 %s
 
-// CHECK: @i = global [[INT:i[0-9]+]] 0
+// CHECK: @i = {{(dso_local )?}}global [[INT:i[0-9]+]] 0
 volatile int i, j, k;
 volatile int ar[5];
 volatile char c;
-// CHECK: @ci = global [[CINT:.*]] zeroinitializer
+// CHECK: @ci = {{(dso_local )?}}global [[CINT:.*]] zeroinitializer
 volatile _Complex int ci;
 volatile struct S {
 #ifdef __cplusplus
diff --git a/test/CodeGenCXX/vtable-assume-load.cpp b/test/CodeGenCXX/vtable-assume-load.cpp
index b0857c2..9ca77ed 100644
--- a/test/CodeGenCXX/vtable-assume-load.cpp
+++ b/test/CodeGenCXX/vtable-assume-load.cpp
@@ -163,7 +163,7 @@
 void g(S &s) { s.foo(); }
 
 // if struct has novtable specifier, then we can't generate assumes
-// CHECK-MS-LABEL: define void @"\01?test@testMS@@YAXXZ"()
+// CHECK-MS-LABEL: define dso_local void @"\01?test@testMS@@YAXXZ"()
 // CHECK-MS: call x86_thiscallcc %"struct.testMS::S"* @"\01??0S@testMS@@QAE@XZ"(
 // CHECK-MS-NOT: @llvm.assume
 // CHECK-MS-LABEL: {{^}}}
diff --git a/test/CodeGenCXX/vtable-available-externally.cpp b/test/CodeGenCXX/vtable-available-externally.cpp
index 2e2cdbb..0e7e8b4 100644
--- a/test/CodeGenCXX/vtable-available-externally.cpp
+++ b/test/CodeGenCXX/vtable-available-externally.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -emit-llvm -o %t
-// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -O2 -disable-llvm-passes -emit-llvm -o %t.opt
+// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -std=c++98 -emit-llvm -o %t
+// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -std=c++98 -O2 -disable-llvm-passes -emit-llvm -o %t.opt
 // RUN: FileCheck --check-prefix=CHECK-TEST1 %s < %t
 // RUN: FileCheck --check-prefix=CHECK-TEST2 %s < %t
 // RUN: FileCheck --check-prefix=CHECK-TEST5 %s < %t
diff --git a/test/CodeGenCXX/vtable-key-function-ios.cpp b/test/CodeGenCXX/vtable-key-function-ios.cpp
index 8a3466b..a119c78 100644
--- a/test/CodeGenCXX/vtable-key-function-ios.cpp
+++ b/test/CodeGenCXX/vtable-key-function-ios.cpp
@@ -28,8 +28,8 @@
 
 // V-table should be defined externally.
 Test0a::Test0a() { use(typeid(Test0a)); }
-// CHECK: @_ZTV6Test0a = external unnamed_addr constant 
-// CHECK: @_ZTI6Test0a = external constant
+// CHECK: @_ZTV6Test0a = external {{(dso_local )?}}unnamed_addr constant 
+// CHECK: @_ZTI6Test0a = external {{(dso_local )?}}constant
 
 // This is not a key function.
 void Test0a::foo() {}
@@ -47,8 +47,8 @@
 
 // V-table should be defined externally.
 Test0b::Test0b() { use(typeid(Test0b)); }
-// CHECK: @_ZTV6Test0b = external unnamed_addr constant 
-// CHECK: @_ZTI6Test0b = external constant
+// CHECK: @_ZTV6Test0b = external {{(dso_local )?}}unnamed_addr constant 
+// CHECK: @_ZTI6Test0b = external {{(dso_local )?}}constant
 
 /*** Test1a ******************************************************************/
 
@@ -60,9 +60,9 @@
 
 // V-table needs to be defined weakly.
 Test1a::Test1a() { use(typeid(Test1a)); }
-// CHECK:      @_ZTV6Test1a = linkonce_odr unnamed_addr constant 
-// CHECK-LATE: @_ZTS6Test1a = linkonce_odr constant
-// CHECK-LATE: @_ZTI6Test1a = linkonce_odr constant
+// CHECK:      @_ZTV6Test1a = linkonce_odr {{(dso_local )?}}unnamed_addr constant 
+// CHECK-LATE: @_ZTS6Test1a = linkonce_odr {{(dso_local )?}}constant
+// CHECK-LATE: @_ZTI6Test1a = linkonce_odr {{(dso_local )?}}constant
 
 // This defines the key function.
 inline void Test1a::foo() {}
@@ -80,9 +80,9 @@
 
 // V-table should be defined weakly..
 Test1b::Test1b() { use(typeid(Test1b)); }
-// CHECK: @_ZTV6Test1b = linkonce_odr unnamed_addr constant 
-// CHECK: @_ZTS6Test1b = linkonce_odr constant
-// CHECK: @_ZTI6Test1b = linkonce_odr constant
+// CHECK: @_ZTV6Test1b = linkonce_odr {{(dso_local )?}}unnamed_addr constant 
+// CHECK: @_ZTS6Test1b = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI6Test1b = linkonce_odr {{(dso_local )?}}constant
 
 /*** Test2a ******************************************************************/
 
@@ -94,9 +94,9 @@
 
 // V-table should be defined with weak linkage.
 Test2a::Test2a() { use(typeid(Test2a)); }
-// CHECK:      @_ZTV6Test2a = linkonce_odr unnamed_addr constant
-// CHECK-LATE: @_ZTS6Test2a = linkonce_odr constant
-// CHECK-LATE: @_ZTI6Test2a = linkonce_odr constant
+// CHECK:      @_ZTV6Test2a = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK-LATE: @_ZTS6Test2a = linkonce_odr {{(dso_local )?}}constant
+// CHECK-LATE: @_ZTI6Test2a = linkonce_odr {{(dso_local )?}}constant
 
 void Test2a::bar() {}
 inline void Test2a::foo() {}
@@ -113,9 +113,9 @@
 
 // V-table should be defined with weak linkage.
 Test2b::Test2b() { use(typeid(Test2b)); }
-// CHECK:      @_ZTV6Test2b = linkonce_odr unnamed_addr constant
-// CHECK-LATE: @_ZTS6Test2b = linkonce_odr constant
-// CHECK-LATE: @_ZTI6Test2b = linkonce_odr constant
+// CHECK:      @_ZTV6Test2b = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK-LATE: @_ZTS6Test2b = linkonce_odr {{(dso_local )?}}constant
+// CHECK-LATE: @_ZTI6Test2b = linkonce_odr {{(dso_local )?}}constant
 
 inline void Test2b::foo() {}
 
@@ -132,9 +132,9 @@
 
 // V-table should be defined with weak linkage.
 Test2c::Test2c() { use(typeid(Test2c)); }
-// CHECK: @_ZTV6Test2c = linkonce_odr unnamed_addr constant
-// CHECK: @_ZTS6Test2c = linkonce_odr constant
-// CHECK: @_ZTI6Test2c = linkonce_odr constant
+// CHECK: @_ZTV6Test2c = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK: @_ZTS6Test2c = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI6Test2c = linkonce_odr {{(dso_local )?}}constant
 
 /*** Test3a ******************************************************************/
 
@@ -146,9 +146,9 @@
 
 // V-table should be defined with weak linkage.
 Test3a::Test3a() { use(typeid(Test3a)); }
-// CHECK:      @_ZTV6Test3a = linkonce_odr unnamed_addr constant
-// CHECK-LATE: @_ZTS6Test3a = linkonce_odr constant
-// CHECK-LATE: @_ZTI6Test3a = linkonce_odr constant
+// CHECK:      @_ZTV6Test3a = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK-LATE: @_ZTS6Test3a = linkonce_odr {{(dso_local )?}}constant
+// CHECK-LATE: @_ZTI6Test3a = linkonce_odr {{(dso_local )?}}constant
 
 // This defines the key function.
 inline void Test3a::bar() {}
@@ -166,9 +166,9 @@
 
 // V-table should be defined with weak linkage.
 Test3b::Test3b() { use(typeid(Test3b)); }
-// CHECK:      @_ZTV6Test3b = linkonce_odr unnamed_addr constant
-// CHECK-LATE: @_ZTS6Test3b = linkonce_odr constant
-// CHECK-LATE: @_ZTI6Test3b = linkonce_odr constant
+// CHECK:      @_ZTV6Test3b = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK-LATE: @_ZTS6Test3b = linkonce_odr {{(dso_local )?}}constant
+// CHECK-LATE: @_ZTI6Test3b = linkonce_odr {{(dso_local )?}}constant
 
 // This defines the key function.
 inline void Test3b::foo() {}
@@ -187,6 +187,6 @@
 
 // V-table should be defined with weak linkage.
 Test3c::Test3c() { use(typeid(Test3c)); }
-// CHECK: @_ZTV6Test3c = linkonce_odr unnamed_addr constant
-// CHECK: @_ZTS6Test3c = linkonce_odr constant
-// CHECK: @_ZTI6Test3c = linkonce_odr constant
+// CHECK: @_ZTV6Test3c = linkonce_odr {{(dso_local )?}}unnamed_addr constant
+// CHECK: @_ZTS6Test3c = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI6Test3c = linkonce_odr {{(dso_local )?}}constant
diff --git a/test/CodeGenCXX/vtable-key-function-win-comdat.cpp b/test/CodeGenCXX/vtable-key-function-win-comdat.cpp
index 3dd1be7..e7c5be9 100644
--- a/test/CodeGenCXX/vtable-key-function-win-comdat.cpp
+++ b/test/CodeGenCXX/vtable-key-function-win-comdat.cpp
@@ -20,6 +20,6 @@
 // CHECK-NOT: $_ZTS6Test1a.1 = comdat any
 // CHECK-NOT: $_ZTI6Test1a.1 = comdat any
 
-// CHECK: @_ZTV6Test1a = linkonce_odr unnamed_addr constant {{.*}} ({ i8*, i8* }* @_ZTI6Test1a to i8*)
-// CHECK: @_ZTS6Test1a = linkonce_odr constant
-// CHECK: @_ZTI6Test1a = linkonce_odr constant {{.*}} [8 x i8]* @_ZTS6Test1a
+// CHECK: @_ZTV6Test1a = linkonce_odr dso_local unnamed_addr constant {{.*}} ({ i8*, i8* }* @_ZTI6Test1a to i8*)
+// CHECK: @_ZTS6Test1a = linkonce_odr dso_local constant
+// CHECK: @_ZTI6Test1a = linkonce_odr dso_local constant {{.*}} [8 x i8]* @_ZTS6Test1a
diff --git a/test/CodeGenCXX/weak-extern-typeinfo.cpp b/test/CodeGenCXX/weak-extern-typeinfo.cpp
index 38f6a3e..6bbb473 100644
--- a/test/CodeGenCXX/weak-extern-typeinfo.cpp
+++ b/test/CodeGenCXX/weak-extern-typeinfo.cpp
@@ -31,17 +31,17 @@
 void V1::foo() { }
 void V2::foo() { }
 
-// CHECK: @_ZTS1A = weak_odr constant
-// CHECK: @_ZTI1A = weak_odr constant
-// CHECK: @_ZTS1B = weak_odr constant
-// CHECK: @_ZTI1B = weak_odr constant
-// CHECK: @_ZTS1C = weak_odr constant
-// CHECK: @_ZTS2T1 = linkonce_odr constant
-// CHECK: @_ZTI2T1 = linkonce_odr constant
-// CHECK: @_ZTS1T = linkonce_odr constant
-// CHECK: @_ZTI1T = linkonce_odr constant
-// CHECK: @_ZTI1C = weak_odr constant
-// CHECK: @_ZTS2V1 = weak_odr constant
-// CHECK: @_ZTI2V1 = weak_odr constant
-// CHECK: @_ZTS2V2 = weak_odr constant
-// CHECK: @_ZTI2V2 = weak_odr constant
+// CHECK: @_ZTS1A = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI1A = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS1B = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI1B = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS1C = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS2T1 = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI2T1 = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS1T = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI1T = linkonce_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI1C = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS2V1 = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI2V1 = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTS2V2 = weak_odr {{(dso_local )?}}constant
+// CHECK: @_ZTI2V2 = weak_odr {{(dso_local )?}}constant
diff --git a/test/CodeGenCXX/windows-itanium-type-info.cpp b/test/CodeGenCXX/windows-itanium-type-info.cpp
index 285b598..20bd78d 100644
--- a/test/CodeGenCXX/windows-itanium-type-info.cpp
+++ b/test/CodeGenCXX/windows-itanium-type-info.cpp
@@ -24,17 +24,17 @@
   throw base();
 }
 
-// CHECK-DAG: @_ZTIi = dllexport constant
-// CHECK-DAG: @_ZTSi = dllexport constant
+// CHECK-DAG: @_ZTIi = dso_local dllexport constant
+// CHECK-DAG: @_ZTSi = dso_local dllexport constant
 
-// CHECK-DAG: @_ZTI7derived = dllexport constant
-// CHECK-DAG: @_ZTS7derived = dllexport constant
-// CHECK-DAG: @_ZTV7derived = dllexport unnamed_addr constant
+// CHECK-DAG: @_ZTI7derived = dso_local dllexport constant
+// CHECK-DAG: @_ZTS7derived = dso_local dllexport constant
+// CHECK-DAG: @_ZTV7derived = dso_local dllexport unnamed_addr constant
 
 // CHECK-DAG: @_ZTI4base = external dllimport constant
 
-// CHECK-EH-IMPORT: @_ZTS4base = linkonce_odr constant
-// CHECK-EH-IMPORT: @_ZTI4base = linkonce_odr constant
+// CHECK-EH-IMPORT: @_ZTS4base = linkonce_odr dso_local constant
+// CHECK-EH-IMPORT: @_ZTI4base = linkonce_odr dso_local constant
 
 struct __declspec(dllimport) gatekeeper {};
 struct zuul : gatekeeper {
@@ -42,5 +42,5 @@
 };
 zuul::~zuul() {}
 
-// CHECK-DAG: @_ZTI10gatekeeper = linkonce_odr constant
-// CHECK-DAG: @_ZTS10gatekeeper = linkonce_odr constant
+// CHECK-DAG: @_ZTI10gatekeeper = linkonce_odr dso_local constant
+// CHECK-DAG: @_ZTS10gatekeeper = linkonce_odr dso_local constant
diff --git a/test/CodeGenCoroutines/coro-alloc.cpp b/test/CodeGenCoroutines/coro-alloc.cpp
index 820201d..1cf9b8c 100644
--- a/test/CodeGenCoroutines/coro-alloc.cpp
+++ b/test/CodeGenCoroutines/coro-alloc.cpp
@@ -106,6 +106,34 @@
   co_return;
 }
 
+struct promise_matching_placement_new_tag {};
+
+template<>
+struct std::experimental::coroutine_traits<void, promise_matching_placement_new_tag, int, float, double> {
+  struct promise_type {
+    void *operator new(unsigned long, promise_matching_placement_new_tag,
+                       int, float, double);
+    void get_return_object() {}
+    suspend_always initial_suspend() { return {}; }
+    suspend_always final_suspend() { return {}; }
+    void return_void() {}
+  };
+};
+
+// CHECK-LABEL: f1a(
+extern "C" void f1a(promise_matching_placement_new_tag, int x, float y , double z) {
+  // CHECK: store i32 %x, i32* %x.addr, align 4
+  // CHECK: store float %y, float* %y.addr, align 4
+  // CHECK: store double %z, double* %z.addr, align 8
+  // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
+  // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
+  // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4
+  // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4
+  // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8
+  // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]])
+  co_return;
+}
+
 struct promise_delete_tag {};
 
 template<>
@@ -173,6 +201,7 @@
 // CHECK-LABEL: f4(
 extern "C" int f4(promise_on_alloc_failure_tag) {
   // CHECK: %[[RetVal:.+]] = alloca i32
+  // CHECK: %[[Gro:.+]] = alloca i32
   // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16
   // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64()
   // CHECK: %[[MEM:.+]] = call i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* dereferenceable(1) @_ZStL7nothrow)
@@ -186,7 +215,11 @@
 
   // CHECK: [[OKBB]]:
   // CHECK:   %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv(
-  // CHECK:   store i32 %[[OkRet]], i32* %[[RetVal]]
+  // CHECK:   store i32 %[[OkRet]], i32* %[[Gro]]
+
+  // CHECK: %[[Tmp1:.*]] = load i32, i32* %[[Gro]]
+  // CHECK-NEXT: store i32 %[[Tmp1]], i32* %[[RetVal]]
+  // CHECK-NEXT: br label %[[RetBB]]
 
   // CHECK: [[RetBB]]:
   // CHECK:   %[[LoadRet:.+]] = load i32, i32* %[[RetVal]], align 4
diff --git a/test/CodeGenCoroutines/coro-dest-slot.cpp b/test/CodeGenCoroutines/coro-dest-slot.cpp
new file mode 100644
index 0000000..4c7395b
--- /dev/null
+++ b/test/CodeGenCoroutines/coro-dest-slot.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+using namespace std::experimental;
+
+struct coro {
+  struct promise_type {
+    coro get_return_object();
+    suspend_always initial_suspend();
+    suspend_never final_suspend();
+    void return_void();
+    static void unhandled_exception();
+  };
+};
+
+extern "C" coro f(int) { co_return; }
+// Verify that cleanup.dest.slot is eliminated in a coroutine.
+// CHECK-LABEL: f(
+// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv(
+// CHECK: %[[CLEANUP_DEST:.+]] = phi i32 [ 0, %{{.+}} ], [ 2, %{{.+}} ], [ 2, %{{.+}} ]
+// CHECK: call i8* @llvm.coro.free(
+// CHECK: switch i32 %cleanup.dest.slot.0, label %{{.+}} [
+// CHECK-NEXT: i32 0
+// CHECK-NEXT: i32 2
+// CHECK-NEXT: ]
diff --git a/test/CodeGenCoroutines/coro-gro-nrvo.cpp b/test/CodeGenCoroutines/coro-gro-nrvo.cpp
new file mode 100644
index 0000000..48931cb
--- /dev/null
+++ b/test/CodeGenCoroutines/coro-gro-nrvo.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+using namespace std::experimental;
+
+namespace std {
+
+struct nothrow_t {};
+constexpr nothrow_t nothrow = {};
+
+} // end namespace std
+
+// Required when get_return_object_on_allocation_failure() is defined by
+// the promise.
+void* operator new(__SIZE_TYPE__ __sz, const std::nothrow_t&) noexcept;
+void  operator delete(void* __p, const std::nothrow_t&) noexcept;
+
+
+template <class RetObject>
+struct promise_type {
+    RetObject get_return_object();
+    suspend_always initial_suspend();
+    suspend_never final_suspend();
+    void return_void();
+    static void unhandled_exception();
+};
+
+struct coro {
+  using promise_type = promise_type<coro>;
+  coro(coro const&);
+  struct Impl;
+  Impl *impl;
+};
+
+// Verify that the NRVO is applied to the Gro object.
+// CHECK-LABEL: define void @_Z1fi(%struct.coro* noalias sret %agg.result, i32)
+coro f(int) {
+// CHECK: %call = call i8* @_Znwm(
+// CHECK-NEXT: br label %[[CoroInit:.*]]
+
+// CHECK: {{.*}}[[CoroInit]]:
+// CHECK: store i1 false, i1* %gro.active
+// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro* sret %agg.result
+// CHECK-NEXT: store i1 true, i1* %gro.active
+  co_return;
+}
+
+
+template <class RetObject>
+struct promise_type_with_on_alloc_failure {
+    static RetObject get_return_object_on_allocation_failure();
+    RetObject get_return_object();
+    suspend_always initial_suspend();
+    suspend_never final_suspend();
+    void return_void();
+    static void unhandled_exception();
+};
+
+struct coro_two {
+  using promise_type = promise_type_with_on_alloc_failure<coro_two>;
+  coro_two(coro_two const&);
+  struct Impl;
+  Impl *impl;
+};
+
+// Verify that the NRVO is applied to the Gro object.
+// CHECK-LABEL: define void @_Z1hi(%struct.coro_two* noalias sret %agg.result, i32)
+ coro_two h(int) {
+
+// CHECK: %call = call i8* @_ZnwmRKSt9nothrow_t
+// CHECK-NEXT: %[[CheckNull:.*]] = icmp ne i8* %call, null
+// CHECK-NEXT: br i1 %[[CheckNull]], label %[[InitOnSuccess:.*]], label %[[InitOnFailure:.*]]
+
+// CHECK: {{.*}}[[InitOnFailure]]:
+// CHECK-NEXT: call void @{{.*get_return_object_on_allocation_failureEv}}(%struct.coro_two* sret %agg.result
+// CHECK-NEXT: br label %[[RetLabel:.*]]
+
+// CHECK: {{.*}}[[InitOnSuccess]]:
+// CHECK: store i1 false, i1* %gro.active
+// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro_two* sret %agg.result
+// CHECK-NEXT: store i1 true, i1* %gro.active
+
+// CHECK: [[RetLabel]]:
+// CHECK-NEXT: ret void
+  co_return;
+}
diff --git a/test/CodeGenCoroutines/coro-params.cpp b/test/CodeGenCoroutines/coro-params.cpp
index 540f845..078d7ee 100644
--- a/test/CodeGenCoroutines/coro-params.cpp
+++ b/test/CodeGenCoroutines/coro-params.cpp
@@ -1,6 +1,7 @@
 // Verifies that parameters are copied with move constructors
 // Verifies that parameter copies are destroyed
 // Vefifies that parameter copies are used in the body of the coroutine
+// Verifies that parameter copies are used to construct the promise type, if that type has a matching constructor
 // RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s
 
 namespace std::experimental {
@@ -68,12 +69,12 @@
   // CHECK: store i32 %val, i32* %[[ValAddr:.+]]
 
   // CHECK: call i8* @llvm.coro.begin(
-  // CHECK-NEXT: call void @_ZN8MoveOnlyC1EOS_(%struct.MoveOnly* %[[MoCopy]], %struct.MoveOnly* dereferenceable(4) %[[MoParam]])
+  // CHECK: call void @_ZN8MoveOnlyC1EOS_(%struct.MoveOnly* %[[MoCopy]], %struct.MoveOnly* dereferenceable(4) %[[MoParam]])
   // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(%struct.MoveAndCopy* %[[McCopy]], %struct.MoveAndCopy* dereferenceable(4) %[[McParam]]) #
   // CHECK-NEXT: invoke void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev(
 
   // CHECK: call void @_ZN14suspend_always12await_resumeEv(
-  // CHECK: %[[IntParam:.+]] = load i32, i32* %val.addr
+  // CHECK: %[[IntParam:.+]] = load i32, i32* %val1
   // CHECK: %[[MoGep:.+]] = getelementptr inbounds %struct.MoveOnly, %struct.MoveOnly* %[[MoCopy]], i32 0, i32 0
   // CHECK: %[[MoVal:.+]] = load i32, i32* %[[MoGep]]
   // CHECK: %[[McGep:.+]] =  getelementptr inbounds %struct.MoveAndCopy, %struct.MoveAndCopy* %[[McCopy]], i32 0, i32 0
@@ -127,3 +128,31 @@
 void call_dependent_params() {
   dependent_params(A{}, B{}, B{});
 }
+
+// Test that, when the promise type has a constructor whose signature matches
+// that of the coroutine function, that constructor is used. This is an
+// experimental feature that will be proposed for the Coroutines TS.
+
+struct promise_matching_constructor {};
+
+template<>
+struct std::experimental::coroutine_traits<void, promise_matching_constructor, int, float, double> {
+  struct promise_type {
+    promise_type(promise_matching_constructor, int, float, double) {}
+    promise_type() = delete;
+    void get_return_object() {}
+    suspend_always initial_suspend() { return {}; }
+    suspend_always final_suspend() { return {}; }
+    void return_void() {}
+    void unhandled_exception() {}
+  };
+};
+
+// CHECK-LABEL: void @_Z38coroutine_matching_promise_constructor28promise_matching_constructorifd(i32, float, double)
+void coroutine_matching_promise_constructor(promise_matching_constructor, int, float, double) {
+  // CHECK: %[[INT:.+]] = load i32, i32* %5, align 4
+  // CHECK: %[[FLOAT:.+]] = load float, float* %6, align 4
+  // CHECK: %[[DOUBLE:.+]] = load double, double* %7, align 8
+  // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJv28promise_matching_constructorifdEE12promise_typeC1ES1_ifd(%"struct.std::experimental::coroutine_traits<void, promise_matching_constructor, int, float, double>::promise_type"* %__promise, i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]])
+  co_return;
+}
diff --git a/test/CodeGenCoroutines/coro-promise-dtor.cpp b/test/CodeGenCoroutines/coro-promise-dtor.cpp
index 4142cebe..5cc4720 100644
--- a/test/CodeGenCoroutines/coro-promise-dtor.cpp
+++ b/test/CodeGenCoroutines/coro-promise-dtor.cpp
@@ -28,7 +28,7 @@
   co_return;
 }
 
-// CHECK-LABEL: define void @"\01?f@@YA?AUcoro_t@@XZ"(
+// CHECK-LABEL: define dso_local void @"\01?f@@YA?AUcoro_t@@XZ"(
 // CHECK:  %gro.active = alloca i1
 // CHECK:  store i1 false, i1* %gro.active
 
diff --git a/test/CodeGenObjC/arc-foreach.m b/test/CodeGenObjC/arc-foreach.m
index 77cb068..ffeb372 100644
--- a/test/CodeGenObjC/arc-foreach.m
+++ b/test/CodeGenObjC/arc-foreach.m
@@ -42,7 +42,7 @@
 
 // Initialize the fast enumaration state.
 // CHECK-LP64-NEXT: [[T0:%.*]] = bitcast [[STATE_T]]* [[STATE]] to i8*
-// CHECK-LP64-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 64, i32 8, i1 false)
+// CHECK-LP64-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 64, i1 false)
 
 // Evaluate the collection expression and retain.
 // CHECK-LP64-NEXT: [[T0:%.*]] = load [[ARRAY_T]]*, [[ARRAY_T]]** [[ARRAY]], align 8
diff --git a/test/CodeGenObjC/arc.m b/test/CodeGenObjC/arc.m
index d34156e..7871369 100644
--- a/test/CodeGenObjC/arc.m
+++ b/test/CodeGenObjC/arc.m
@@ -7,30 +7,30 @@
 // RUN: %clang_cc1 -fobjc-runtime=macosx-10.7.0 -triple x86_64-apple-darwin11 -Wno-objc-root-class -Wno-incompatible-pointer-types -Wno-arc-unsafe-retained-assign -emit-llvm -fblocks -fobjc-arc -fobjc-runtime-has-weak -o - %s | FileCheck -check-prefix=ARC-NATIVE %s
 
 // ARC-ALIEN: declare extern_weak void @objc_storeStrong(i8**, i8*)
-// ARC-ALIEN: declare extern_weak i8* @objc_retain(i8* returned)
-// ARC-ALIEN: declare extern_weak i8* @objc_autoreleaseReturnValue(i8* returned)
+// ARC-ALIEN: declare extern_weak i8* @objc_retain(i8*)
+// ARC-ALIEN: declare extern_weak i8* @objc_autoreleaseReturnValue(i8*)
 // ARC-ALIEN: declare i8* @objc_msgSend(i8*, i8*, ...) [[NLB:#[0-9]+]]
 // ARC-ALIEN: declare extern_weak void @objc_release(i8*)
-// ARC-ALIEN: declare extern_weak i8* @objc_retainAutoreleasedReturnValue(i8* returned)
+// ARC-ALIEN: declare extern_weak i8* @objc_retainAutoreleasedReturnValue(i8*)
 // ARC-ALIEN: declare extern_weak i8* @objc_initWeak(i8**, i8*)
 // ARC-ALIEN: declare extern_weak i8* @objc_storeWeak(i8**, i8*)
 // ARC-ALIEN: declare extern_weak i8* @objc_loadWeakRetained(i8**)
 // ARC-ALIEN: declare extern_weak void @objc_destroyWeak(i8**)
-// declare extern_weak i8* @objc_autorelease(i8*)
-// ARC-ALIEN: declare extern_weak i8* @objc_retainAutorelease(i8* returned)
+// ARC-ALIEN: declare extern_weak i8* @objc_autorelease(i8*)
+// ARC-ALIEN: declare extern_weak i8* @objc_retainAutorelease(i8*)
 
 // ARC-NATIVE: declare void @objc_storeStrong(i8**, i8*)
-// ARC-NATIVE: declare i8* @objc_retain(i8* returned) [[NLB:#[0-9]+]]
-// ARC-NATIVE: declare i8* @objc_autoreleaseReturnValue(i8* returned)
+// ARC-NATIVE: declare i8* @objc_retain(i8*) [[NLB:#[0-9]+]]
+// ARC-NATIVE: declare i8* @objc_autoreleaseReturnValue(i8*)
 // ARC-NATIVE: declare i8* @objc_msgSend(i8*, i8*, ...) [[NLB]]
 // ARC-NATIVE: declare void @objc_release(i8*) [[NLB]]
-// ARC-NATIVE: declare i8* @objc_retainAutoreleasedReturnValue(i8* returned)
+// ARC-NATIVE: declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 // ARC-NATIVE: declare i8* @objc_initWeak(i8**, i8*)
 // ARC-NATIVE: declare i8* @objc_storeWeak(i8**, i8*)
 // ARC-NATIVE: declare i8* @objc_loadWeakRetained(i8**)
 // ARC-NATIVE: declare void @objc_destroyWeak(i8**)
-// declare i8* @objc_autorelease(i8*)
-// ARC-NATIVE: declare i8* @objc_retainAutorelease(i8* returned)
+// ARC-NATIVE: declare i8* @objc_autorelease(i8*)
+// ARC-NATIVE: declare i8* @objc_retainAutorelease(i8*)
 
 // CHECK-LABEL: define void @test0
 void test0(id x) {
@@ -507,7 +507,7 @@
   // CHECK:      [[X:%.*]] = alloca [5 x i8*], align 16
   // CHECK: call void @llvm.lifetime.start
   // CHECK-NEXT: [[T0:%.*]] = bitcast [5 x i8*]* [[X]] to i8*
-  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 40, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 40, i1 false)
   id x[5];
 
   extern id test19_helper(void);
@@ -538,6 +538,7 @@
   // CHECK-LABEL: define void @test20
   // CHECK:      [[N:%.*]] = alloca i32, align 4
   // CHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*
+  // CHECK-NEXT: [[VLA_EXPR:%.*]] = alloca i64, align 8
   // CHECK-NEXT: store i32 {{%.*}}, i32* [[N]], align 4
 
   id x[n];
@@ -553,10 +554,13 @@
   // Allocate the VLA.
   // CHECK-NEXT: [[VLA:%.*]] = alloca i8*, i64 [[DIM]], align 16
 
+  // Store the VLA #elements expression.
+  // CHECK-NEXT: store i64 [[DIM]], i64* [[VLA_EXPR]], align 8
+
   // Zero-initialize.
   // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[VLA]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[DIM]], 8
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T1]], i32 16, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 [[T1]], i1 false)
 
   // Destroy.
   // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i8*, i8** [[VLA]], i64 [[DIM]]
@@ -579,6 +583,7 @@
   // CHECK-LABEL: define void @test21
   // CHECK:      [[N:%.*]] = alloca i32, align 4
   // CHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*
+  // CHECK-NEXT: [[VLA_EXPR:%.*]] = alloca i64, align 8
   // CHECK-NEXT: store i32 {{%.*}}, i32* [[N]], align 4
 
   id x[2][n][3];
@@ -595,11 +600,14 @@
   // CHECK-NEXT: [[T0:%.*]] = mul nuw i64 2, [[DIM]]
   // CHECK-NEXT: [[VLA:%.*]] = alloca [3 x i8*], i64 [[T0]], align 16
 
+  // Store the VLA #elements expression.
+  // CHECK-NEXT: store i64 [[DIM]], i64* [[VLA_EXPR]], align 8
+
   // Zero-initialize.
   // CHECK-NEXT: [[T0:%.*]] = bitcast [3 x i8*]* [[VLA]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 2, [[DIM]]
   // CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[T1]], 24
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T2]], i32 16, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 [[T2]], i1 false)
 
   // Destroy.
   // CHECK-NEXT: [[T0:%.*]] = mul nuw i64 2, [[DIM]]
@@ -1504,9 +1512,7 @@
 // CHECK:      [[SELF:%.*]] = alloca [[TEST69:%.*]]*, align 8
 // CHECK:      [[T0:%.*]] = load [[TEST69]]*, [[TEST69]]** [[SELF]], align 8
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST69]]* [[T0]] to i8*
-// CHECK-NEXT: [[RETAIN:%.*]] = call i8* @objc_retain(i8* [[T1]])
-// CHECK-NEXT: [[AUTORELEASE:%.*]] = tail call i8* @objc_autoreleaseReturnValue(i8* [[RETAIN]])
-// CHECK-NEXT: ret i8* [[AUTORELEASE]]
+// CHECK-NEXT: ret i8* [[T1]]
 
 // rdar://problem/10907547
 void test70(id i) {
diff --git a/test/CodeGenObjC/attr-exception.m b/test/CodeGenObjC/attr-exception.m
index 00ebb0f..30b637c 100644
--- a/test/CodeGenObjC/attr-exception.m
+++ b/test/CodeGenObjC/attr-exception.m
@@ -13,8 +13,8 @@
 
 @implementation A
 @end
-// CHECK: @"OBJC_EHTYPE_$_A" = global {{%.*}} { i8** getelementptr (i8*, i8** @objc_ehtype_vtable, i32 2)
-// CHECK-HIDDEN: @"OBJC_EHTYPE_$_A" = hidden global {{%.*}} { i8** getelementptr (i8*, i8** @objc_ehtype_vtable, i32 2)
+// CHECK: @"OBJC_EHTYPE_$_A" = global {{%.*}} { i8** getelementptr inbounds (i8*, i8** @objc_ehtype_vtable, i32 2)
+// CHECK-HIDDEN: @"OBJC_EHTYPE_$_A" = hidden global {{%.*}} { i8** getelementptr inbounds (i8*, i8** @objc_ehtype_vtable, i32 2)
 
 __attribute__((objc_exception))
 __attribute__((visibility("default")))
@@ -23,5 +23,5 @@
 
 @implementation B
 @end
-// CHECK: @"OBJC_EHTYPE_$_B" = global {{%.*}} { i8** getelementptr (i8*, i8** @objc_ehtype_vtable, i32 2)
-// CHECK-HIDDEN: @"OBJC_EHTYPE_$_B" = global {{%.*}} { i8** getelementptr (i8*, i8** @objc_ehtype_vtable, i32 2)
+// CHECK: @"OBJC_EHTYPE_$_B" = global {{%.*}} { i8** getelementptr inbounds (i8*, i8** @objc_ehtype_vtable, i32 2)
+// CHECK-HIDDEN: @"OBJC_EHTYPE_$_B" = global {{%.*}} { i8** getelementptr inbounds (i8*, i8** @objc_ehtype_vtable, i32 2)
diff --git a/test/CodeGenObjC/availability-dso-local.m b/test/CodeGenObjC/availability-dso-local.m
new file mode 100644
index 0000000..8ff41d8
--- /dev/null
+++ b/test/CodeGenObjC/availability-dso-local.m
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple thumbv7-apple-ios10.0.0 -emit-llvm -fvisibility hidden -w %s -o - | FileCheck %s
+
+// CHECK: @"OBJC_CLASS_$_a" = hidden global %struct._class_t
+
+__attribute__((availability(ios, introduced = 11.0))) @interface a @end
+    @implementation a @end
diff --git a/test/CodeGenObjC/builtin-memfns.m b/test/CodeGenObjC/builtin-memfns.m
index bb425c0..6cc91b1 100644
--- a/test/CodeGenObjC/builtin-memfns.m
+++ b/test/CodeGenObjC/builtin-memfns.m
@@ -5,6 +5,6 @@
 // PR13697
 void test1(int *a, id b) {
   // CHECK: @test1
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
   memcpy(a, b, 8);
 }
diff --git a/test/CodeGenObjC/debug-info-block-captured-self.m b/test/CodeGenObjC/debug-info-block-captured-self.m
deleted file mode 100644
index 302a011..0000000
--- a/test/CodeGenObjC/debug-info-block-captured-self.m
+++ /dev/null
@@ -1,72 +0,0 @@
-// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
-//
-// Test that debug location is generated for a captured "self" inside
-// a block.
-//
-// This test is split into two parts, this one for the frontend, and
-// then llvm/test/DebugInfo/debug-info-block-captured-self.ll to
-// ensure that DW_AT_location is generated for the captured self.
-@class T;
-@interface S
-@end
-@interface Mode
--(int) count;
-@end
-@interface Context
-@end
-@interface ViewController
-@property (nonatomic, readwrite, strong) Context *context;
-@end
-typedef enum {
-    Unknown = 0,
-} State;
-@interface Main : ViewController
-{
-    T * t1;
-    T * t2;
-}
-@property(readwrite, nonatomic) State state;
-@end
-@implementation Main
-- (id) initWithContext:(Context *) context
-{
-    t1 = [self.context withBlock:^(id obj){
-        id *mode1;
-	t2 = [mode1 withBlock:^(id object){
-	    Mode *mode2 = object;
-	    if ([mode2 count] != 0) {
-	      self.state = 0;
-	    }
-	  }];
-      }];
-}
-@end
-// The important part of this test is that there is a dbg.value
-// intrinsic associated with the implicit .block_descriptor argument
-// of the block. We also test that this value gets alloca'd, so the
-// register llocator won't accidentally kill it.
-
-// outer block:
-// CHECK: define internal void {{.*}}_block_invoke{{.*}}
-
-// inner block:
-// CHECK: define internal void {{.*}}_block_invoke{{.*}}
-// CHECK:        %[[MEM1:.*]] = alloca i8*, align 8
-// CHECK-NEXT:   %[[MEM2:.*]] = alloca i8*, align 8
-// CHECK-NEXT:   [[DBGADDR:%.*]] = alloca [[BLOCK_T:<{.*}>]]*, align 8
-// CHECK:        store i8* [[BLOCK_DESC:%.*]], i8** %[[MEM1]], align 8
-// CHECK:        %[[TMP0:.*]] = load i8*, i8** %[[MEM1]]
-// CHECK:        call void @llvm.dbg.value(metadata i8* %[[TMP0]], metadata ![[BDMD:[0-9]+]], metadata !{{.*}})
-// CHECK:        call void @llvm.dbg.declare(metadata i8* [[BLOCK_DESC]], metadata ![[BDMD:[0-9]+]], metadata !{{.*}})
-// CHECK:        store [[BLOCK_T]]* {{%.*}}, [[BLOCK_T]]** [[DBGADDR]], align 8
-// CHECK:        call void @llvm.dbg.declare(metadata [[BLOCK_T]]** [[DBGADDR]], metadata ![[SELF:.*]], metadata !{{.*}})
-// make sure we are still in the same function
-// CHECK: define {{.*}}__copy_helper_block_
-// Metadata
-// CHECK: ![[MAIN:.*]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Main"
-// CHECK-SAME:                            line: 23,
-// CHECK: ![[PMAIN:.*]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[MAIN]],
-// CHECK: ![[BDMD]] = !DILocalVariable(name: ".block_descriptor", arg:
-// CHECK: ![[SELF]] = !DILocalVariable(name: "self"
-// CHECK-NOT:                          arg:
-// CHECK-SAME:                         line: 40,
diff --git a/test/CodeGenObjC/debug-info-blocks.m b/test/CodeGenObjC/debug-info-blocks.m
index 0bf5663..848e389 100644
--- a/test/CodeGenObjC/debug-info-blocks.m
+++ b/test/CodeGenObjC/debug-info-blocks.m
@@ -10,23 +10,20 @@
 // CHECK-NEXT: call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %[[ALLOCA]], metadata ![[SELF:[0-9]+]], metadata !{{.*}})
 // CHECK-NEXT: call void @llvm.dbg.declare(metadata %1** %d, metadata ![[D:[0-9]+]], metadata !{{.*}})
 
-// rdar://problem/14386148
-// Test that we don't emit bogus line numbers for the helper functions.
-// Test that we do emit scope info for the helper functions.
+// Test that we do emit scope info for the helper functions, and that the
+// parameters to these functions are marked as artificial (so the debugger
+// doesn't accidentally step into the function).
 // CHECK: define {{.*}} @__copy_helper_block_{{.*}}(i8*, i8*)
 // CHECK-NOT: ret
 // CHECK: call {{.*}}, !dbg ![[DBG_LINE:[0-9]+]]
 // CHECK-NOT: ret
 // CHECK: load {{.*}}, !dbg ![[COPY_LINE:[0-9]+]]
+// CHECK: ret void, !dbg ![[COPY_LINE]]
 // CHECK: define {{.*}} @__destroy_helper_block_{{.*}}(i8*)
 // CHECK-NOT: ret
 // CHECK: load {{.*}}, !dbg ![[DESTROY_LINE:[0-9]+]]
+// CHECK: ret void, !dbg ![[DESTROY_LINE]]
 
-// CHECK-DAG: [[DBG_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]])
-// CHECK-DAG: [[COPY_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]])
-// CHECK-DAG: [[COPY_SP]] = distinct !DISubprogram(name: "__copy_helper_block_"
-// CHECK-DAG: [[DESTROY_LINE]] = !DILocation(line: 0, scope: ![[DESTROY_SP:[0-9]+]])
-// CHECK-DAG: [[DESTROY_SP]] = distinct !DISubprogram(name: "__destroy_helper_block_"
 typedef unsigned int NSUInteger;
 
 @protocol NSObject
@@ -60,6 +57,14 @@
 - (id)init
 {
     if ((self = [super init])) {
+      // CHECK-DAG: [[DBG_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]])
+      // CHECK-DAG: [[COPY_LINE]] = !DILocation(line: [[@LINE+7]], scope: ![[COPY_SP:[0-9]+]])
+      // CHECK-DAG: [[COPY_SP]] = distinct !DISubprogram(name: "__copy_helper_block_"
+      // CHECK-DAG: [[DESTROY_LINE]] = !DILocation(line: [[@LINE+5]], scope: ![[DESTROY_SP:[0-9]+]])
+      // CHECK-DAG: [[DESTROY_SP]] = distinct !DISubprogram(name: "__destroy_helper_block_"
+      // CHECK-DAG: !DILocalVariable(arg: 1, scope: ![[COPY_SP]], {{.*}}, flags: DIFlagArtificial)
+      // CHECK-DAG: !DILocalVariable(arg: 2, scope: ![[COPY_SP]], {{.*}}, flags: DIFlagArtificial)
+      // CHECK-DAG: !DILocalVariable(arg: 1, scope: ![[DESTROY_SP]], {{.*}}, flags: DIFlagArtificial)
       run(^{
           // CHECK-DAG: ![[SELF]] = !DILocalVariable(name: "self", scope:{{.*}}, line: [[@LINE+4]],
           // CHECK-DAG: ![[D]] = !DILocalVariable(name: "d", scope:{{.*}}, line: [[@LINE+1]],
diff --git a/test/CodeGenObjC/disable-tail-call-escaping-block.m b/test/CodeGenObjC/disable-tail-call-escaping-block.m
new file mode 100644
index 0000000..48b0eda
--- /dev/null
+++ b/test/CodeGenObjC/disable-tail-call-escaping-block.m
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -fblocks -fno-escaping-block-tail-calls -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define void @test(
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE0:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE1:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE2:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE3:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE4:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE5:.*]] to i8*)
+// CHECK: store i8* bitcast (void (i8*)* @[[TEST_BLOCK_INVOKE6:.*]] to i8*)
+
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE0]]({{.*}}) #[[DISABLEATTR:.*]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE1]]({{.*}}) #[[ENABLEATTR:.*]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE2]]({{.*}}) #[[DISABLEATTR]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE3]]({{.*}}) #[[DISABLEATTR]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE4]]({{.*}}) #[[ENABLEATTR]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE5]]({{.*}}) #[[DISABLEATTR]] {
+// CHECK: define internal void @[[TEST_BLOCK_INVOKE6]]({{.*}}) #[[ENABLEATTR]] {
+
+// CHECK: attributes #[[ENABLEATTR]] = {{{.*}}"disable-tail-calls"="false"{{.*}}}
+// CHECK: attributes #[[DISABLEATTR]] = {{{.*}}"disable-tail-calls"="true"{{.*}}}
+
+typedef void (^BlockTy)(void);
+typedef void (*NoEscapeFnTy)(__attribute__((noescape)) BlockTy);
+
+void callee0(__attribute__((noescape)) BlockTy);
+void callee1(BlockTy);
+
+__attribute__((objc_root_class))
+@interface C0
+-(void)m0:(__attribute__((noescape)) BlockTy)p;
+-(void)m1:(BlockTy)p;
+@end
+
+@implementation C0
+-(void)m0:(__attribute__((noescape)) BlockTy)p {}
+-(void)m1:(BlockTy)p {}
+@end
+
+NoEscapeFnTy noescapefunc;
+
+void test(id a, C0 *c0) {
+  BlockTy b0 = ^{ (void)a; }; // disable tail-call optimization.
+  callee0(b0);
+  callee0(^{ (void)a; }); // enable tail-call optimization.
+  callee1(^{ (void)a; }); // disable tail-call optimization.
+
+  BlockTy b1 = ^{ (void)a; }; // disable tail-call optimization.
+  [c0 m0:b1];
+  [c0 m0:^{ (void)a; }]; // enable tail-call optimization.
+  [c0 m1:^{ (void)a; }]; // disable tail-call optimization.
+
+  noescapefunc(^{ (void)a; }); // enable tail-call optimization.
+}
diff --git a/test/CodeGenObjC/dllstorage.m b/test/CodeGenObjC/dllstorage.m
index 4bdbd50..32aceb6 100644
--- a/test/CodeGenObjC/dllstorage.m
+++ b/test/CodeGenObjC/dllstorage.m
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -fdeclspec -fobjc-runtime=ios -fobjc-exceptions -S -emit-llvm -o - %s | FileCheck -check-prefix CHECK-IR %s
 // RUN: %clang_cc1 -triple i686-windows-itanium -fms-extensions -fobjc-runtime=macosx -fdeclspec -fobjc-exceptions -S -emit-llvm -o - %s | FileCheck -check-prefix CHECK-IR %s
 // RUN: %clang_cc1 -triple i686-windows-itanium -fms-extensions -fobjc-runtime=objfw -fdeclspec -fobjc-exceptions -S -emit-llvm -o - %s | FileCheck -check-prefix CHECK-FW %s
 
@@ -15,11 +16,11 @@
 @interface J : I
 @end
 
-// CHECK-IR-DAG: @"OBJC_METACLASS_$_J" = dllexport global %struct._class_t
-// CHECK-IR-DAG: @"OBJC_CLASS_$_J" = dllexport global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_METACLASS_$_J" = dso_local dllexport global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_CLASS_$_J" = dso_local dllexport global %struct._class_t
 
-// CHECK-FW-DAG: @_OBJC_METACLASS_J = dllexport global
-// CHECK-FW-DAG: @_OBJC_CLASS_J = dllexport global
+// CHECK-FW-DAG: @_OBJC_METACLASS_J = dso_local dllexport global
+// CHECK-FW-DAG: @_OBJC_CLASS_J = dso_local dllexport global
 
 @implementation J {
   id _ivar;
@@ -31,11 +32,11 @@
 @interface K : J
 @end
 
-// CHECK-IR-DAG: @"OBJC_METACLASS_$_K" = global %struct._class_t
-// CHECK-IR-DAG: @"OBJC_CLASS_$_K" = global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_METACLASS_$_K" = dso_local global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_CLASS_$_K" = dso_local global %struct._class_t
 
-// CHECK-FW-DAG: @_OBJC_METACLASS_K = global
-// CHECK-FW-DAG: @_OBJC_CLASS_K = global
+// CHECK-FW-DAG: @_OBJC_METACLASS_K = dso_local global
+// CHECK-FW-DAG: @_OBJC_CLASS_K = dso_local global
 
 @implementation K {
   id _ivar;
@@ -48,11 +49,11 @@
 @interface L : K
 @end
 
-// CHECK-IR-DAG: @"OBJC_METACLASS_$_L" = dllexport global %struct._class_t
-// CHECK-IR-DAG: @"OBJC_CLASS_$_L" = dllexport global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_METACLASS_$_L" = dso_local dllexport global %struct._class_t
+// CHECK-IR-DAG: @"OBJC_CLASS_$_L" = dso_local dllexport global %struct._class_t
 
-// CHECK-FW-DAG: @_OBJC_METACLASS_L = dllexport global
-// CHECK-FW-DAG: @_OBJC_CLASS_L = dllexport global
+// CHECK-FW-DAG: @_OBJC_METACLASS_L = dso_local dllexport global
+// CHECK-FW-DAG: @_OBJC_CLASS_L = dso_local dllexport global
 
 @implementation L {
   id _none;
@@ -93,13 +94,13 @@
 @interface N : I
 @end
 
-// CHECK-FW-DAG: @_OBJC_METACLASS_N = dllexport global
-// CHECK-FW-DAG: @_OBJC_CLASS_N = dllexport global
+// CHECK-FW-DAG: @_OBJC_METACLASS_N = dso_local dllexport global
+// CHECK-FW-DAG: @_OBJC_CLASS_N = dso_local dllexport global
 
 @implementation N : I
 @end
 
-// CHECK-IR-DAG: @"OBJC_EHTYPE_$_N" = dllexport global %struct._objc_typeinfo
+// CHECK-IR-DAG: @"OBJC_EHTYPE_$_N" = dso_local dllexport global %struct._objc_typeinfo
 
 __declspec(dllimport)
 __attribute__((__objc_exception__))
@@ -112,7 +113,16 @@
 @interface P : I
 @end
 
-// CHECK-IR-DAG: @"OBJC_EHTYPE_$_P" = external global %struct._objc_typeinfo
+// CHECK-IR-DAG: @"OBJC_EHTYPE_$_P" = external dso_local global %struct._objc_typeinfo
+
+@interface Q : M
+@end
+
+id f(Q *q) {
+  return q->_ivar;
+}
+
+// CHECK-IR-DAG: @"OBJC_IVAR_$_M._ivar" = external dllimport global i32
 
 int g() {
   @autoreleasepool {
diff --git a/test/CodeGenObjC/forward-protocol-metadata-symbols.m b/test/CodeGenObjC/forward-protocol-metadata-symbols.m
index 78c51e4..16d33ec 100644
--- a/test/CodeGenObjC/forward-protocol-metadata-symbols.m
+++ b/test/CodeGenObjC/forward-protocol-metadata-symbols.m
@@ -23,4 +23,17 @@
 // CHECK: @"\01l_OBJC_LABEL_PROTOCOL_$_P0" = weak hidden global
 // CHECK: @"\01l_OBJC_PROTOCOL_REFERENCE_$_P0" = weak hidden global
 
-// CHECK: llvm.compiler.used = appending global [10 x i8*] {{[^"]*}}OBJC_CLASS_NAME_{{[^"]*}}OBJC_METH_VAR_NAME_{{[^"]*}}OBJC_METH_VAR_TYPE_{{[^"]*}}"\01l_OBJC_$_CLASS_METHODS_A"{{[^"]*}}"\01l_OBJC_CLASS_PROTOCOLS_$_A"{{[^"]*}}OBJC_CLASS_NAME_.1{{[^"]*}}"\01l_OBJC_PROTOCOL_$_P0"{{[^"]*}}"\01l_OBJC_LABEL_PROTOCOL_$_P0"{{[^"]*}}"\01l_OBJC_PROTOCOL_REFERENCE_$_P0"{{[^"]*}}"OBJC_LABEL_CLASS_$"{{[^"]*}} section "llvm.metadata"
+// CHECK: llvm.used = appending global [3 x i8*]
+// CHECK-SAME: "\01l_OBJC_PROTOCOL_$_P0"
+// CHECK-SAME: "\01l_OBJC_LABEL_PROTOCOL_$_P0"
+// CHECK-SAME: "\01l_OBJC_PROTOCOL_REFERENCE_$_P0"
+
+// CHECK: llvm.compiler.used = appending global [7 x i8*]
+// CHECK-SAME: OBJC_CLASS_NAME_
+// CHECK-SAME: OBJC_METH_VAR_NAME_
+// CHECK-SAME: OBJC_METH_VAR_TYPE_
+// CHECK-SAME: "\01l_OBJC_$_CLASS_METHODS_A"
+// CHECK-SAME: "\01l_OBJC_CLASS_PROTOCOLS_$_A"
+// CHECK-SAME: OBJC_CLASS_NAME_.1
+// CHECK-SAME: "OBJC_LABEL_CLASS_$"
+// CHECK-SAME: section "llvm.metadata"
diff --git a/test/CodeGenObjC/ivar-layout-flexible-array.m b/test/CodeGenObjC/ivar-layout-flexible-array.m
new file mode 100644
index 0000000..28849c8
--- /dev/null
+++ b/test/CodeGenObjC/ivar-layout-flexible-array.m
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -Wno-objc-root-class -fobjc-arc -emit-llvm -o - %s | FileCheck %s
+
+// rdar://problem/21054495
+@interface FlexibleArrayMember {
+  char flexible_array[][4][2];
+}
+@end
+@implementation FlexibleArrayMember
+@end
+// CHECK: @OBJC_METH_VAR_NAME_{{.*}} = private unnamed_addr constant {{.*}} c"flexible_array\00"
+// CHECK-NEXT: @OBJC_METH_VAR_TYPE_{{.*}} = private unnamed_addr constant {{.*}} c"^[4[2c]]\00"
+
+
+typedef char FlexibleArray[];
+
+struct Packet {
+  int size;
+  FlexibleArray data;
+};
+
+@interface VariableSizeIvar {
+  struct Packet flexible_struct;
+}
+@end
+@implementation VariableSizeIvar
+@end
+// CHECK: @OBJC_METH_VAR_NAME_{{.*}} = private unnamed_addr constant {{.*}} c"flexible_struct\00"
+// CHECK-NEXT: @OBJC_METH_VAR_TYPE_{{.*}} = private unnamed_addr constant {{.*}} c"{Packet=\22size\22i\22data\22[0c]}\00"
diff --git a/test/CodeGenObjC/messages-2.m b/test/CodeGenObjC/messages-2.m
index be66f71..38c5d50 100644
--- a/test/CodeGenObjC/messages-2.m
+++ b/test/CodeGenObjC/messages-2.m
@@ -157,7 +157,7 @@
   // CHECK:         call {{.*}} @objc_msgSend_stret to
   // CHECK-NEXT:    br label
   // CHECK:         [[T0:%.*]] = bitcast [[POINT_T]]* [[POINT]] to i8*
-  // CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 4, i1 false)
+  // CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[T0]], i8 0, i64 48, i1 false)
   // CHECK-NEXT:    br label
 
   // CHECK-NF:      [[X:%.*]] = alloca [[A]]*
@@ -169,7 +169,7 @@
   // CHECK-NF:      call {{.*}} @objc_msgSend_stret to
   // CHECK-NF-NEXT: br label
   // CHECK-NF:      [[T0:%.*]] = bitcast [[POINT_T]]* [[POINT]] to i8*
-  // CHECK-NF-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 4, i1 false)
+  // CHECK-NF-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0]], i8 0, i64 48, i1 false)
   // CHECK-NF-NEXT: br label
   MyPoint point = [x returnAPoint];
 }
diff --git a/test/CodeGenObjC/no-sanitize.m b/test/CodeGenObjC/no-sanitize.m
index 39f8575..07a196b 100644
--- a/test/CodeGenObjC/no-sanitize.m
+++ b/test/CodeGenObjC/no-sanitize.m
@@ -1,8 +1,9 @@
-// RUN: %clang_cc1 %s -emit-llvm -fsanitize=address -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -fsanitize=address -fblocks -o - | FileCheck %s
 
 @interface I0 @end
 @implementation I0
 // CHECK-NOT: sanitize_address
 - (void) im0: (int) a0 __attribute__((no_sanitize("address"))) {
+  int (^blockName)() = ^int() { return 0; };
 }
 @end
diff --git a/test/CodeGenObjC/nontrivial-c-struct-exception.m b/test/CodeGenObjC/nontrivial-c-struct-exception.m
new file mode 100644
index 0000000..a926a6d
--- /dev/null
+++ b/test/CodeGenObjC/nontrivial-c-struct-exception.m
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -fobjc-exceptions -fexceptions -fobjc-arc-exceptions -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: %[[STRUCT_STRONG:.*]] = type { i32, i8* }
+
+typedef struct {
+  int i;
+  id f1;
+} Strong;
+
+// CHECK: define void @testStrongException()
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_STRONG]], align 8
+// CHECK: %[[AGG_TMP1:.*]] = alloca %[[STRUCT_STRONG]], align 8
+// CHECK: %[[CALL:.*]] = call [2 x i64] @genStrong()
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONG]]* %[[AGG_TMP]] to [2 x i64]*
+// CHECK: store [2 x i64] %[[CALL]], [2 x i64]* %[[V0]], align 8
+// CHECK: invoke [2 x i64] @genStrong()
+
+// CHECK: call void @calleeStrong([2 x i64] %{{.*}}, [2 x i64] %{{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: landingpad { i8*, i32 }
+// CHECK: %[[V9:.*]] = bitcast %[[STRUCT_STRONG]]* %[[AGG_TMP]] to i8**
+// CHECK: call void @__destructor_8_s8(i8** %[[V9]])
+// CHECK: br label
+
+// CHECK: resume
+
+Strong genStrong(void);
+void calleeStrong(Strong, Strong);
+
+void testStrongException(void) {
+  calleeStrong(genStrong(), genStrong());
+}
diff --git a/test/CodeGenObjC/nontrivial-c-struct-func-name-collision.m b/test/CodeGenObjC/nontrivial-c-struct-func-name-collision.m
new file mode 100644
index 0000000..bf512ec
--- /dev/null
+++ b/test/CodeGenObjC/nontrivial-c-struct-func-name-collision.m
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-llvm -verify -o - %s
+
+typedef struct { // expected-error {{special function __default_constructor_8_s8 for non-trivial C struct has incorrect type}}
+  int i;
+  id f1;
+} StrongSmall;
+
+int __default_constructor_8_s8(double a) {
+  return 0;
+}
+
+void testIncorrectFunctionType(void) {
+  StrongSmall x;
+}
diff --git a/test/CodeGenObjC/objc-asm-attribute-neg-test.m b/test/CodeGenObjC/objc-asm-attribute-neg-test.m
index e9bef4c..de809d8 100644
--- a/test/CodeGenObjC/objc-asm-attribute-neg-test.m
+++ b/test/CodeGenObjC/objc-asm-attribute-neg-test.m
@@ -7,15 +7,15 @@
 
 __attribute__((objc_runtime_name("MySecretNamespace.Message")))
 @interface Message <Protocol> { 
-__attribute__((objc_runtime_name("MySecretNamespace.Message"))) // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+__attribute__((objc_runtime_name("MySecretNamespace.Message"))) // expected-error {{'objc_runtime_name' attribute only applies to Objective-C interfaces and Objective-C protocols}}
   id MyIVAR;
 }
 __attribute__((objc_runtime_name("MySecretNamespace.Message")))
 @property int MyProperty; // expected-error {{prefix attribute must be followed by an interface or protocol}}}}
 
-- (int) getMyProperty __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+- (int) getMyProperty __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to}}
 
-- (void) setMyProperty : (int) arg __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+- (void) setMyProperty : (int) arg __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to}}
 
 @end
 
diff --git a/test/CodeGenObjC/stret-1.m b/test/CodeGenObjC/stret-1.m
index 2314d5a..1122c28 100644
--- a/test/CodeGenObjC/stret-1.m
+++ b/test/CodeGenObjC/stret-1.m
@@ -16,7 +16,7 @@
     s = [(id)(argc&~255) method];
     // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T0:%[^,]+]]
     // CHECK: [[T0P:%.*]] = bitcast %struct.stret* [[T0]] to i8*
-    // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0P]], i8 0, i64 400, i32 4, i1 false)
+    // CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0P]], i8 0, i64 400, i1 false)
 
     s = [Test method];
     // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]]
diff --git a/test/CodeGenObjC/strong-in-c-struct.m b/test/CodeGenObjC/strong-in-c-struct.m
new file mode 100644
index 0000000..c48c663
--- /dev/null
+++ b/test/CodeGenObjC/strong-in-c-struct.m
@@ -0,0 +1,522 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-llvm -o - %s | FileCheck %s
+
+typedef void (^BlockTy)(void);
+
+typedef struct {
+  int a[4];
+} Trivial;
+
+typedef struct {
+  Trivial f0;
+  id f1;
+} Strong;
+
+typedef struct {
+  int i;
+  id f1;
+} StrongSmall;
+
+typedef struct {
+  Strong f0;
+  id f1;
+  double d;
+} StrongOuter;
+
+typedef struct {
+  int f0;
+  volatile id f1;
+} StrongVolatile;
+
+typedef struct {
+  BlockTy f0;
+} StrongBlock;
+
+typedef struct {
+  int i;
+  id f0[2][2];
+} IDArray;
+
+typedef struct {
+  double d;
+  Strong f0[2][2];
+} StructArray;
+
+typedef struct {
+  id f0;
+  int i : 9;
+} Bitfield0;
+
+typedef struct {
+  char c;
+  int i0 : 2;
+  int i1 : 4;
+  id f0;
+  int i2 : 31;
+  int i3 : 1;
+  id f1;
+  int : 0;
+  int a[3];
+  id f2;
+  double d;
+  int i4 : 1;
+  volatile int i5 : 2;
+  volatile char i6;
+} Bitfield1;
+
+StrongSmall getStrongSmall(void);
+StrongOuter getStrongOuter(void);
+void calleeStrongSmall(StrongSmall);
+void func(Strong *);
+
+// CHECK: %[[STRUCT_BITFIELD1:.*]] = type { i8, i8, i8*, i32, i8*, [3 x i32], i8*, double, i8, i8 }
+
+// CHECK: define void @test_constructor_destructor_StrongOuter()
+// CHECK: %[[T:.*]] = alloca %[[STRUCT_STRONGOUTER:.*]], align 8
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T]] to i8**
+// CHECK: call void @__default_constructor_8_s16_s24(i8** %[[V0]])
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T]] to i8**
+// CHECK: call void @__destructor_8_s16_s24(i8** %[[V1]])
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__default_constructor_8_s16_s24(i8** %[[DST:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: call void @__default_constructor_8_s16(i8** %[[V0]])
+// CHECK: %[[V1:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V2:.*]] = getelementptr inbounds i8, i8* %[[V1]], i64 24
+// CHECK: %[[V3:.*]] = bitcast i8* %[[V2]] to i8**
+// CHECK: %[[V4:.*]] = bitcast i8** %[[V3]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %[[V4]], i8 0, i64 8, i1 false)
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__default_constructor_8_s16(i8** %[[DST:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V2:.*]] = getelementptr inbounds i8, i8* %[[V1]], i64 16
+// CHECK: %[[V3:.*]] = bitcast i8* %[[V2]] to i8**
+// CHECK: %[[V4:.*]] = bitcast i8** %[[V3]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %[[V4]], i8 0, i64 8, i1 false)
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__destructor_8_s16_s24(i8** %[[DST:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: call void @__destructor_8_s16(i8** %[[V0]])
+// CHECK: %[[V1:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V2:.*]] = getelementptr inbounds i8, i8* %[[V1]], i64 24
+// CHECK: %[[V3:.*]] = bitcast i8* %[[V2]] to i8**
+// CHECK: call void @objc_storeStrong(i8** %[[V3]], i8* null)
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__destructor_8_s16(i8** %[[DST:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V2:.*]] = getelementptr inbounds i8, i8* %[[V1]], i64 16
+// CHECK: %[[V3:.*]] = bitcast i8* %[[V2]] to i8**
+// CHECK: call void @objc_storeStrong(i8** %[[V3]], i8* null)
+// CHECK: ret void
+
+void test_constructor_destructor_StrongOuter(void) {
+  StrongOuter t;
+}
+
+// CHECK: define void @test_copy_constructor_StrongOuter(%[[STRUCT_STRONGOUTER:.*]]* %[[S:.*]])
+// CHECK: %[[S_ADDR:.*]] = alloca %[[STRUCT_STRONGOUTER]]*, align 8
+// CHECK: %[[T:.*]] = alloca %[[STRUCT_STRONGOUTER]], align 8
+// CHECK: store %[[STRUCT_STRONGOUTER]]* %[[S]], %[[STRUCT_STRONGOUTER]]** %[[S_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load %[[STRUCT_STRONGOUTER]]*, %[[STRUCT_STRONGOUTER]]** %[[S_ADDR]], align 8
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T]] to i8**
+// CHECK: %[[V2:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[V0]] to i8**
+// CHECK: call void @__copy_constructor_8_8_t0w16_s16_s24_t32w8(i8** %[[V1]], i8** %[[V2]])
+// CHECK: %[[V3:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T]] to i8**
+// CHECK: call void @__destructor_8_s16_s24(i8** %[[V3]])
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_t0w16_s16_s24_t32w8(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: call void @__copy_constructor_8_8_t0w16_s16(i8** %[[V0]], i8** %[[V1]])
+// CHECK: %[[V2:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V3:.*]] = getelementptr inbounds i8, i8* %[[V2]], i64 24
+// CHECK: %[[V4:.*]] = bitcast i8* %[[V3]] to i8**
+// CHECK: %[[V5:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V6:.*]] = getelementptr inbounds i8, i8* %[[V5]], i64 24
+// CHECK: %[[V7:.*]] = bitcast i8* %[[V6]] to i8**
+// CHECK: %[[V8:.*]] = load i8*, i8** %[[V7]], align 8
+// CHECK: %[[V9:.*]] = call i8* @objc_retain(i8* %[[V8]])
+// CHECK: store i8* %[[V9]], i8** %[[V4]], align 8
+// CHECK: %[[V10:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V11:.*]] = getelementptr inbounds i8, i8* %[[V10]], i64 32
+// CHECK: %[[V12:.*]] = bitcast i8* %[[V11]] to i8**
+// CHECK: %[[V13:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V14:.*]] = getelementptr inbounds i8, i8* %[[V13]], i64 32
+// CHECK: %[[V15:.*]] = bitcast i8* %[[V14]] to i8**
+// CHECK: %[[V16:.*]] = bitcast i8** %[[V12]] to i64*
+// CHECK: %[[V17:.*]] = bitcast i8** %[[V15]] to i64*
+// CHECK: %[[V18:.*]] = load i64, i64* %[[V17]], align 8
+// CHECK: store i64 %[[V18]], i64* %[[V16]], align 8
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_t0w16_s16(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V2:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V3:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[V2]], i8* align 8 %[[V3]], i64 16, i1 false)
+// CHECK: %[[V4:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V5:.*]] = getelementptr inbounds i8, i8* %[[V4]], i64 16
+// CHECK: %[[V6:.*]] = bitcast i8* %[[V5]] to i8**
+// CHECK: %[[V7:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V8:.*]] = getelementptr inbounds i8, i8* %[[V7]], i64 16
+// CHECK: %[[V9:.*]] = bitcast i8* %[[V8]] to i8**
+// CHECK: %[[V10:.*]] = load i8*, i8** %[[V9]], align 8
+// CHECK: %[[V11:.*]] = call i8* @objc_retain(i8* %[[V10]])
+// CHECK: store i8* %[[V11]], i8** %[[V6]], align 8
+// CHECK: ret void
+
+void test_copy_constructor_StrongOuter(StrongOuter *s) {
+  StrongOuter t = *s;
+}
+
+/// CHECK: define linkonce_odr hidden void @__copy_assignment_8_8_t0w16_s16_s24_t32w8(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V2:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V3:.*]] = getelementptr inbounds i8, i8* %[[V2]], i64 24
+// CHECK: %[[V4:.*]] = bitcast i8* %[[V3]] to i8**
+// CHECK: %[[V5:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V6:.*]] = getelementptr inbounds i8, i8* %[[V5]], i64 24
+// CHECK: %[[V7:.*]] = bitcast i8* %[[V6]] to i8**
+// CHECK: %[[V8:.*]] = load i8*, i8** %[[V7]], align 8
+// CHECK: call void @objc_storeStrong(i8** %[[V4]], i8* %[[V8]])
+
+void test_copy_assignment_StrongOuter(StrongOuter *d, StrongOuter *s) {
+  *d = *s;
+}
+
+// CHECK: define void @test_move_constructor_StrongOuter()
+// CHECK: %[[T1:.*]] = getelementptr inbounds %[[STRUCT_BLOCK_BYREF_T:.*]], %[[STRUCT_BLOCK_BYREF_T]]* %{{.*}}, i32 0, i32 7
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T1]] to i8**
+// CHECK: call void @__default_constructor_8_s16_s24(i8** %[[V1]])
+// CHECK: %[[T2:.*]] = getelementptr inbounds %[[STRUCT_BLOCK_BYREF_T]], %[[STRUCT_BLOCK_BYREF_T]]* %{{.*}}, i32 0, i32 7
+// CHECK: %[[V9:.*]] = bitcast %[[STRUCT_STRONGOUTER]]* %[[T2]] to i8**
+// CHECK: call void @__destructor_8_s16_s24(i8** %[[V9]])
+
+// CHECK: define internal void @__Block_byref_object_copy_(i8*, i8*)
+// CHECK: call void @__move_constructor_8_8_t0w16_s16_s24_t32w8(
+
+// CHECK: define linkonce_odr hidden void @__move_constructor_8_8_t0w16_s16_s24_t32w8(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: call void @__move_constructor_8_8_t0w16_s16(i8** %[[V0]], i8** %[[V1]])
+// CHECK: %[[V2:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V3:.*]] = getelementptr inbounds i8, i8* %[[V2]], i64 24
+// CHECK: %[[V4:.*]] = bitcast i8* %[[V3]] to i8**
+// CHECK: %[[V5:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V6:.*]] = getelementptr inbounds i8, i8* %[[V5]], i64 24
+// CHECK: %[[V7:.*]] = bitcast i8* %[[V6]] to i8**
+// CHECK: %[[V8:.*]] = load i8*, i8** %[[V7]], align 8
+// CHECK: store i8* null, i8** %[[V7]], align 8
+// CHECK: store i8* %[[V8]], i8** %[[V4]], align 8
+
+// CHECK: define internal void @__Block_byref_object_dispose_(i8*)
+// CHECK: call void @__destructor_8_s16_s24(
+
+void test_move_constructor_StrongOuter(void) {
+  __block StrongOuter t;
+  BlockTy b = ^{ (void)t; };
+}
+
+// CHECK: define linkonce_odr hidden void @__move_assignment_8_8_t0w16_s16_s24_t32w8(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: call void @__move_assignment_8_8_t0w16_s16(i8** %[[V0]], i8** %[[V1]])
+// CHECK: %[[V2:.*]] = bitcast i8** %[[V0]] to i8*
+// CHECK: %[[V3:.*]] = getelementptr inbounds i8, i8* %[[V2]], i64 24
+// CHECK: %[[V4:.*]] = bitcast i8* %[[V3]] to i8**
+// CHECK: %[[V5:.*]] = bitcast i8** %[[V1]] to i8*
+// CHECK: %[[V6:.*]] = getelementptr inbounds i8, i8* %[[V5]], i64 24
+// CHECK: %[[V7:.*]] = bitcast i8* %[[V6]] to i8**
+// CHECK: %[[V8:.*]] = load i8*, i8** %[[V7]], align 8
+// CHECK: store i8* null, i8** %[[V7]], align 8
+// CHECK: %[[V9:.*]] = load i8*, i8** %[[V4]], align 8
+// CHECK: store i8* %[[V8]], i8** %[[V4]], align 8
+// CHECK: call void @objc_release(i8* %[[V9]])
+
+void test_move_assignment_StrongOuter(StrongOuter *p) {
+  *p = getStrongOuter();
+}
+
+// CHECK: define void @test_parameter_StrongSmall([2 x i64] %[[A_COERCE:.*]])
+// CHECK: %[[A:.*]] = alloca %[[STRUCT_STRONG:.*]], align 8
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONG]]* %[[A]] to [2 x i64]*
+// CHECK: store [2 x i64] %[[A_COERCE]], [2 x i64]* %[[V0]], align 8
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONG]]* %[[A]] to i8**
+// CHECK: call void @__destructor_8_s8(i8** %[[V1]])
+// CHECK: ret void
+
+void test_parameter_StrongSmall(StrongSmall a) {
+}
+
+// CHECK: define void @test_argument_StrongSmall([2 x i64] %[[A_COERCE:.*]])
+// CHECK: %[[A:.*]] = alloca %[[STRUCT_STRONGSMALL:.*]], align 8
+// CHECK: %[[TEMP_LVALUE:.*]] = alloca %[[STRUCT_STRONGSMALL]], align 8
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to [2 x i64]*
+// CHECK: store [2 x i64] %[[A_COERCE]], [2 x i64]* %[[V0]], align 8
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[TEMP_LVALUE]] to i8**
+// CHECK: %[[V2:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to i8**
+// CHECK: call void @__copy_constructor_8_8_t0w4_s8(i8** %[[V1]], i8** %[[V2]])
+// CHECK: %[[V3:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[TEMP_LVALUE]] to [2 x i64]*
+// CHECK: %[[V4:.*]] = load [2 x i64], [2 x i64]* %[[V3]], align 8
+// CHECK: call void @calleeStrongSmall([2 x i64] %[[V4]])
+// CHECK: %[[V5:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to i8**
+// CHECK: call void @__destructor_8_s8(i8** %[[V5]])
+// CHECK: ret void
+
+void test_argument_StrongSmall(StrongSmall a) {
+  calleeStrongSmall(a);
+}
+
+// CHECK: define [2 x i64] @test_return_StrongSmall([2 x i64] %[[A_COERCE:.*]])
+// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_STRONGSMALL:.*]], align 8
+// CHECK: %[[A:.*]] = alloca %[[STRUCT_STRONGSMALL]], align 8
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to [2 x i64]*
+// CHECK: store [2 x i64] %[[A_COERCE]], [2 x i64]* %[[V0]], align 8
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[RETVAL]] to i8**
+// CHECK: %[[V2:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to i8**
+// CHECK: call void @__copy_constructor_8_8_t0w4_s8(i8** %[[V1]], i8** %[[V2]])
+// CHECK: %[[V3:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[A]] to i8**
+// CHECK: call void @__destructor_8_s8(i8** %[[V3]])
+// CHECK: %[[V4:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[RETVAL]] to [2 x i64]*
+// CHECK: %[[V5:.*]] = load [2 x i64], [2 x i64]* %[[V4]], align 8
+// CHECK: ret [2 x i64] %[[V5]]
+
+StrongSmall test_return_StrongSmall(StrongSmall a) {
+  return a;
+}
+
+// CHECK: define void @test_destructor_ignored_result()
+// CHECK: %[[COERCE:.*]] = alloca %[[STRUCT_STRONGSMALL:.*]], align 8
+// CHECK: %[[CALL:.*]] = call [2 x i64] @getStrongSmall()
+// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[COERCE]] to [2 x i64]*
+// CHECK: store [2 x i64] %[[CALL]], [2 x i64]* %[[V0]], align 8
+// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONGSMALL]]* %[[COERCE]] to i8**
+// CHECK: call void @__destructor_8_s8(i8** %[[V1]])
+// CHECK: ret void
+
+void test_destructor_ignored_result(void) {
+  getStrongSmall();
+}
+
+// CHECK: define void @test_copy_constructor_StrongBlock(
+// CHECK: call void @__copy_constructor_8_8_sb0(
+// CHECK: call void @__destructor_8_sb0(
+// CHECK: ret void
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_sb0(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V2:.*]] = load i8*, i8** %[[V1]], align 8
+// CHECK: %[[V3:.*]] = call i8* @objc_retainBlock(i8* %[[V2]])
+// CHECK: store i8* %[[V3]], i8** %[[V0]], align 8
+// CHECK: ret void
+
+void test_copy_constructor_StrongBlock(StrongBlock *s) {
+  StrongBlock t = *s;
+}
+
+// CHECK: define void @test_copy_assignment_StrongBlock(%[[STRUCT_STRONGBLOCK:.*]]* %[[D:.*]], %[[STRUCT_STRONGBLOCK]]* %[[S:.*]])
+// CHECK: call void @__copy_assignment_8_8_sb0(
+
+// CHECK: define linkonce_odr hidden void @__copy_assignment_8_8_sb0(i8** %[[DST:.*]], i8** %[[SRC:.*]])
+// CHECK: %[[DST_ADDR:.*]] = alloca i8**, align 8
+// CHECK: %[[SRC_ADDR:.*]] = alloca i8**, align 8
+// CHECK: store i8** %[[DST]], i8*** %[[DST_ADDR]], align 8
+// CHECK: store i8** %[[SRC]], i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load i8**, i8*** %[[DST_ADDR]], align 8
+// CHECK: %[[V1:.*]] = load i8**, i8*** %[[SRC_ADDR]], align 8
+// CHECK: %[[V2:.*]] = load i8*, i8** %[[V1]], align 8
+// CHECK: %[[V3:.*]] = call i8* @objc_retainBlock(i8* %[[V2]])
+// CHECK: %[[V4:.*]] = load i8*, i8** %[[V0]], align 8
+// CHECK: store i8* %[[V3]], i8** %[[V0]], align 8
+// CHECK: call void @objc_release(i8* %[[V4]])
+// CHECK: ret void
+
+void test_copy_assignment_StrongBlock(StrongBlock *d, StrongBlock *s) {
+  *d = *s;
+}
+
+// CHECK: define void @test_copy_constructor_StrongVolatile0(
+// CHECK: call void @__copy_constructor_8_8_t0w4_sv8(
+// CHECK: call void @__destructor_8_sv8(
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_t0w4_sv8(
+// CHECK: %[[V8:.*]] = load volatile i8*, i8** %{{.*}}, align 8
+// CHECK: %[[V9:.*]] = call i8* @objc_retain(i8* %[[V8]])
+// CHECK: store volatile i8* %[[V9]], i8** %{{.*}}, align 8
+
+void test_copy_constructor_StrongVolatile0(StrongVolatile *s) {
+  StrongVolatile t = *s;
+}
+
+// CHECK: define void @test_copy_constructor_StrongVolatile1(
+// CHECK: call void @__copy_constructor_8_8_tv0w128_sv16(
+
+void test_copy_constructor_StrongVolatile1(Strong *s) {
+  volatile Strong t = *s;
+}
+
+// CHECK: define void @test_block_capture_Strong()
+// CHECK: call void @__default_constructor_8_s16(
+// CHECK: call void @__copy_constructor_8_8_t0w16_s16(
+// CHECK: call void @__destructor_8_s16(
+// CHECK: call void @__destructor_8_s16(
+// CHECK: ret void
+
+// CHECK: define internal void @__copy_helper_block_.1(i8*, i8*)
+// CHECK: call void @__copy_constructor_8_8_t0w16_s16(
+// CHECK: ret void
+
+// CHECK: define internal void @__destroy_helper_block_.2(
+// CHECK: call void @__destructor_8_s16(
+// CHECK: ret void
+
+void test_block_capture_Strong(void) {
+  Strong t;
+  BlockTy b = ^(){ (void)t; };
+}
+
+// CHECK: define void @test_variable_length_array(i32 %[[N:.*]])
+// CHECK: %[[N_ADDR:.*]] = alloca i32, align 4
+// CHECK: store i32 %[[N]], i32* %[[N_ADDR]], align 4
+// CHECK: %[[V0:.*]] = load i32, i32* %[[N_ADDR]], align 4
+// CHECK: %[[V1:.*]] = zext i32 %[[V0]] to i64
+// CHECK: %[[VLA:.*]] = alloca %[[STRUCT_STRONG:.*]], i64 %[[V1]], align 8
+// CHECK: %[[V3:.*]] = bitcast %[[STRUCT_STRONG]]* %[[VLA]] to i8**
+// CHECK: %[[V4:.*]] = mul nuw i64 24, %[[V1]]
+// CHECK: %[[V5:.*]] = bitcast i8** %[[V3]] to i8*
+// CHECK: %[[V6:.*]] = getelementptr inbounds i8, i8* %[[V5]], i64 %[[V4]]
+// CHECK: %[[DSTARRAY_END:.*]] = bitcast i8* %[[V6]] to i8**
+// CHECK: br label
+
+// CHECK: %[[DSTADDR_CUR:.*]] = phi i8** [ %[[V3]], {{.*}} ], [ %[[V7:.*]], {{.*}} ]
+// CHECK: %[[DONE:.*]] = icmp eq i8** %[[DSTADDR_CUR]], %[[DSTARRAY_END]]
+// CHECK: br i1 %[[DONE]], label
+
+// CHECK: call void @__default_constructor_8_s16(i8** %[[DSTADDR_CUR]])
+// CHECK: %[[V8:.*]] = bitcast i8** %[[DSTADDR_CUR]] to i8*
+// CHECK: %[[V9:.*]] = getelementptr inbounds i8, i8* %[[V8]], i64 24
+// CHECK: %[[V7]] = bitcast i8* %[[V9]] to i8**
+// CHECK: br label
+
+// CHECK: call void @func(%[[STRUCT_STRONG]]* %[[VLA]])
+// CHECK: %[[V10:.*]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[VLA]], i64 %[[V1]]
+// CHECK: %[[ARRAYDESTROY_ISEMPTY:.*]] = icmp eq %[[STRUCT_STRONG]]* %[[VLA]], %[[V10]]
+// CHECK: br i1 %[[ARRAYDESTROY_ISEMPTY]], label
+
+// CHECK: %[[ARRAYDESTROY_ELEMENTPAST:.*]] = phi %[[STRUCT_STRONG]]* [ %[[V10]], {{.*}} ], [ %[[ARRAYDESTROY_ELEMENT:.*]], {{.*}} ]
+// CHECK: %[[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK: %[[V11:.*]] = bitcast %[[STRUCT_STRONG]]* %[[ARRAYDESTROY_ELEMENT]] to i8**
+// CHECK: call void @__destructor_8_s16(i8** %[[V11]])
+// CHECK: %[[ARRAYDESTROY_DONE:.*]] = icmp eq %[[STRUCT_STRONG]]* %[[ARRAYDESTROY_ELEMENT]], %[[VLA]]
+// CHECK: br i1 %[[ARRAYDESTROY_DONE]], label
+
+// CHECK: ret void
+
+void test_variable_length_array(int n) {
+  Strong a[n];
+  func(a);
+}
+
+// CHECK: define linkonce_odr hidden void @__default_constructor_8_AB8s8n4_s8_AE(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 32, i1 false)
+void test_constructor_destructor_IDArray(void) {
+  IDArray t;
+}
+
+// CHECK: define linkonce_odr hidden void @__default_constructor_8_AB8s24n4_s24_AE(
+void test_constructor_destructor_StructArray(void) {
+  StructArray t;
+}
+
+// Check that IRGen copies the 9-bit bitfield emitting i16 load and store.
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_s0_t8w2(
+// CHECK: %[[V4:.*]] = bitcast i8** %{{.*}} to i8*
+// CHECK: %[[V5:.*]] = getelementptr inbounds i8, i8* %[[V4]], i64 8
+// CHECK: %[[V6:.*]] = bitcast i8* %[[V5]] to i8**
+// CHECK: %[[V7:.*]] = bitcast i8** %{{.*}} to i8*
+// CHECK: %[[V8:.*]] = getelementptr inbounds i8, i8* %[[V7]], i64 8
+// CHECK: %[[V9:.*]] = bitcast i8* %[[V8]] to i8**
+// CHECK: %[[V10:.*]] = bitcast i8** %[[V6]] to i16*
+// CHECK: %[[V11:.*]] = bitcast i8** %[[V9]] to i16*
+// CHECK: %[[V12:.*]] = load i16, i16* %[[V11]], align 8
+// CHECK: store i16 %[[V12]], i16* %[[V10]], align 8
+// CHECK: ret void
+
+void test_copy_constructor_Bitfield0(Bitfield0 *a) {
+  Bitfield0 t = *a;
+}
+
+// CHECK: define linkonce_odr hidden void @__copy_constructor_8_8_t0w2_s8_t16w4_s24_t32w12_s48_t56w9_tv513w2_tv520w8
+// CHECK: %[[V4:.*]] = load i16, i16* %{{.*}}, align 8
+// CHECK: store i16 %[[V4]], i16* %{{.*}}, align 8
+// CHECK: %[[V21:.*]] = load i32, i32* %{{.*}}, align 8
+// CHECK: store i32 %[[V21]], i32* %{{.*}}, align 8
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 12, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 9, i1 false)
+// CHECK: %[[V54:.*]] = bitcast i8** %[[V0:.*]] to %[[STRUCT_BITFIELD1]]*
+// CHECK: %[[I5:.*]] = getelementptr inbounds %[[STRUCT_BITFIELD1]], %[[STRUCT_BITFIELD1]]* %[[V54]], i32 0, i32 8
+// CHECK: %[[V55:.*]] = bitcast i8** %[[V1:.*]] to %[[STRUCT_BITFIELD1]]*
+// CHECK: %[[I51:.*]] = getelementptr inbounds %[[STRUCT_BITFIELD1]], %[[STRUCT_BITFIELD1]]* %[[V55]], i32 0, i32 8
+// CHECK: %[[BF_LOAD:.*]] = load volatile i8, i8* %[[I51]], align 8
+// CHECK: %[[BF_SHL:.*]] = shl i8 %[[BF_LOAD]], 5
+// CHECK: %[[BF_ASHR:.*]] = ashr i8 %[[BF_SHL]], 6
+// CHECK: %[[BF_CAST:.*]] = sext i8 %[[BF_ASHR]] to i32
+// CHECK: %[[V56:.*]] = trunc i32 %[[BF_CAST]] to i8
+// CHECK: %[[BF_LOAD2:.*]] = load volatile i8, i8* %[[I5]], align 8
+// CHECK: %[[BF_VALUE:.*]] = and i8 %[[V56]], 3
+// CHECK: %[[BF_SHL3:.*]] = shl i8 %[[BF_VALUE]], 1
+// CHECK: %[[BF_CLEAR:.*]] = and i8 %[[BF_LOAD2]], -7
+// CHECK: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], %[[BF_SHL3]]
+// CHECK: store volatile i8 %[[BF_SET]], i8* %[[I5]], align 8
+// CHECK: %[[V57:.*]] = bitcast i8** %[[V0]] to %[[STRUCT_BITFIELD1]]*
+// CHECK: %[[I6:.*]] = getelementptr inbounds %[[STRUCT_BITFIELD1]], %[[STRUCT_BITFIELD1]]* %[[V57]], i32 0, i32 9
+// CHECK: %[[V58:.*]] = bitcast i8** %[[V1]] to %[[STRUCT_BITFIELD1]]*
+// CHECK: %[[I64:.*]] = getelementptr inbounds %[[STRUCT_BITFIELD1]], %[[STRUCT_BITFIELD1]]* %[[V58]], i32 0, i32 9
+// CHECK: %[[V59:.*]] = load volatile i8, i8* %[[I64]], align 1
+// CHECK: store volatile i8 %[[V59]], i8* %[[I6]], align 1
+
+void test_copy_constructor_Bitfield1(Bitfield1 *a) {
+  Bitfield1 t = *a;
+}
diff --git a/test/CodeGenObjCXX/arc-exceptions.mm b/test/CodeGenObjCXX/arc-exceptions.mm
index 0ae3069..3d50461 100644
--- a/test/CodeGenObjCXX/arc-exceptions.mm
+++ b/test/CodeGenObjCXX/arc-exceptions.mm
@@ -101,7 +101,7 @@
   //   Construct array.
   // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[A]], [[A]]* [[THIS]], i32 0, i32 1
   // CHECK-NEXT: [[T0:%.*]] = bitcast [2 x [3 x i8*]]* [[ARRAY]] to i8*
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 8, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 48, i1 false)
   //   throw 0;
   // CHECK:      invoke void @__cxa_throw(
   //   Landing pad from throw site:
diff --git a/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
new file mode 100644
index 0000000..35abe19
--- /dev/null
+++ b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.12.0 -emit-llvm -disable-llvm-passes -O3 -fblocks -fobjc-arc -fobjc-runtime-has-weak -std=c++11 -o - %s | FileCheck %s
+
+void test0(id x) {
+  extern void test0_helper(id (^)(void));
+  test0_helper([=]() { return x; });
+  // CHECK-LABEL: define internal i8* @___Z5test0P11objc_object_block_invoke
+  // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test0P11objc_objectENK3$_0clEv"
+  // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T2:%.*]] = tail call i8* @objc_autoreleaseReturnValue(i8* [[T1]])
+  // CHECK-NEXT: ret i8* [[T2]]
+}
+
+id test1_rv;
+
+void test1() {
+  extern void test1_helper(id (*)(void));
+  test1_helper([](){ return test1_rv; });
+  // CHECK-LABEL: define internal i8* @"_ZZ5test1vEN3$_18__invokeEv"
+  // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test1vENK3$_1clEv"
+  // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T2:%.*]] = tail call i8* @objc_autoreleaseReturnValue(i8* [[T1]])
+  // CHECK-NEXT: ret i8* [[T2]]
+}
diff --git a/test/CodeGenObjCXX/microsoft-abi-arc-param-order.mm b/test/CodeGenObjCXX/microsoft-abi-arc-param-order.mm
index cb71bcf..45a8f63 100644
--- a/test/CodeGenObjCXX/microsoft-abi-arc-param-order.mm
+++ b/test/CodeGenObjCXX/microsoft-abi-arc-param-order.mm
@@ -9,7 +9,7 @@
 
 // Verify that we destruct things from left to right in the MS C++ ABI: a, b, c, d.
 //
-// CHECK-LABEL: define void @"\01?test_arc_order@@YAXUA@@PAUobjc_object@@01@Z"
+// CHECK-LABEL: define dso_local void @"\01?test_arc_order@@YAXUA@@PAUobjc_object@@01@Z"
 // CHECK:                       (<{ %struct.A, i8*, %struct.A, i8* }>* inalloca)
 void test_arc_order(A a, id __attribute__((ns_consumed)) b , A c, id __attribute__((ns_consumed)) d) {
   // CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %{{.*}})
diff --git a/test/CodeGenObjCXX/msabi-objc-extensions.mm b/test/CodeGenObjCXX/msabi-objc-extensions.mm
new file mode 100644
index 0000000..e6a6813
--- /dev/null
+++ b/test/CodeGenObjCXX/msabi-objc-extensions.mm
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple thumbv7-windows-msvc -fobjc-runtime=ios-6.0 -fobjc-arc -o - -emit-llvm %s | FileCheck %s
+
+@protocol P;
+@protocol Q;
+
+@class I;
+
+void f(id<P>, id, id<P>, id) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_object@U?$Protocol@UP@@@__ObjC@@@@PAUobjc_object@@01@Z"
+
+void f(id, id<P>, id<P>, id) {}
+// CHECK-LABEL: "\01?f@@YAXPAUobjc_object@@PAU?$objc_object@U?$Protocol@UP@@@__ObjC@@@@10@Z"
+
+void f(id<P>, id<P>) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_object@U?$Protocol@UP@@@__ObjC@@@@0@Z"
+
+void f(id<P>) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_object@U?$Protocol@UP@@@__ObjC@@@@@Z"
+
+void f(id<P, Q>) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_object@U?$Protocol@UP@@@__ObjC@@U?$Protocol@UQ@@@2@@@@Z"
+
+void f(Class<P>) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_class@U?$Protocol@UP@@@__ObjC@@@@@Z"
+
+void f(Class<P, Q>) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$objc_class@U?$Protocol@UP@@@__ObjC@@U?$Protocol@UQ@@@2@@@@Z"
+
+void f(I<P> *) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$I@U?$Protocol@UP@@@__ObjC@@@@@Z"
+
+void f(I<P, Q> *) {}
+// CHECK-LABEL: "\01?f@@YAXPAU?$I@U?$Protocol@UP@@@__ObjC@@U?$Protocol@UQ@@@2@@@@Z"
+
+template <typename>
+struct S {};
+
+void f(S<__unsafe_unretained id>) {}
+// CHECK-LABEL: "\01?f@@YAXU?$S@PAUobjc_object@@@@@Z"
+
+void f(S<__autoreleasing id>) {}
+// CHECK-LABEL: "\01?f@@YAXU?$S@U?$Autoreleasing@PAUobjc_object@@@__ObjC@@@@@Z"
+
+void f(S<__strong id>) {}
+// CHECK-LABEL: "\01?f@@YAXU?$S@U?$Strong@PAUobjc_object@@@__ObjC@@@@@Z"
+
+void f(S<__weak id>) {}
+// CHECK-LABEL: "\01?f@@YAXU?$S@U?$Weak@PAUobjc_object@@@__ObjC@@@@@Z"
+
+void w(__weak id) {}
+// CHECK-LABEL: "\01?w@@YAXPAUobjc_object@@@Z"
+
+void s(__strong id) {}
+// CHECK-LABEL: "\01?s@@YAXPAUobjc_object@@@Z"
+
+void a(__autoreleasing id) {}
+// CHECK-LABEL: "\01?a@@YAXPAUobjc_object@@@Z"
+
+void u(__unsafe_unretained id) {}
+// CHECK-LABEL: "\01?u@@YAXPAUobjc_object@@@Z"
+
+S<__autoreleasing id> g() { return S<__autoreleasing id>(); }
+// CHECK-LABEL: "\01?g@@YA?AU?$S@U?$Autoreleasing@PAUobjc_object@@@__ObjC@@@@XZ"
+
+__autoreleasing id h() { return nullptr; }
+// CHECK-LABEL: "\01?h@@YAPAUobjc_object@@XZ"
diff --git a/test/CodeGenObjCXX/msabi-objc-types.mm b/test/CodeGenObjCXX/msabi-objc-types.mm
index 013a9c8..28b9f9d 100644
--- a/test/CodeGenObjCXX/msabi-objc-types.mm
+++ b/test/CodeGenObjCXX/msabi-objc-types.mm
@@ -3,13 +3,13 @@
 @class I;
 
 id kid;
-// CHECK: @"\01?kid@@3PAUobjc_object@@A" = global
+// CHECK: @"\01?kid@@3PAUobjc_object@@A" =  dso_local global
 
 Class klass;
-// CHECK: @"\01?klass@@3PAUobjc_class@@A" = global
+// CHECK: @"\01?klass@@3PAUobjc_class@@A" = dso_local global
 
 I *kI;
-// CHECK: @"\01?kI@@3PAUI@@A" = global
+// CHECK: @"\01?kI@@3PAUI@@A" = dso_local global
 
 void f(I *) {}
 // CHECK-LABEL: "\01?f@@YAXPAUI@@@Z"
@@ -33,7 +33,7 @@
 // CHECK-LABEL: "\01?g@@YAXAAPAUobjc_object@@@Z"
 
 void g(const id &) {}
-// CHECK-LABEL: "\01?g@@YAXABPAUobjc_object@@@Z"
+// CHECK-LABEL: "\01?g@@YAXABQAUobjc_object@@@Z"
 
 void g(id &&) {}
 // CHECK-LABEL: "\01?g@@YAX$$QAPAUobjc_object@@@Z"
@@ -45,7 +45,7 @@
 // CHECK-LABEL: "\01?h@@YAXAAPAUobjc_class@@@Z"
 
 void h(const Class &) {}
-// CHECK-LABEL: "\01?h@@YAXABPAUobjc_class@@@Z"
+// CHECK-LABEL: "\01?h@@YAXABQAUobjc_class@@@Z"
 
 void h(Class &&) {}
 // CHECK-LABEL: "\01?h@@YAX$$QAPAUobjc_class@@@Z"
@@ -62,6 +62,30 @@
 const I &l() { return *kI; }
 // CHECK-LABEL: "\01?l@@YAABUI@@XZ"
 
+void m(const id) {}
+// CHECK-LABEL: "\01?m@@YAXQAUobjc_object@@@Z"
+
+void m(const I *) {}
+// CHECK-LABEL: "\01?m@@YAXPBUI@@@Z"
+
+void n(SEL) {}
+// CHECK-LABEL: "\01?n@@YAXPAUobjc_selector@@@Z"
+
+void n(SEL *) {}
+// CHECK-LABEL: "\01?n@@YAXPAPAUobjc_selector@@@Z"
+
+void n(const SEL *) {}
+// CHECK-LABEL: "\01?n@@YAXPBQAUobjc_selector@@@Z"
+
+void n(SEL &) {}
+// CHECK-LABEL: "\01?n@@YAXAAPAUobjc_selector@@@Z"
+
+void n(const SEL &) {}
+// CHECK-LABEL: "\01?n@@YAXABQAUobjc_selector@@@Z"
+
+void n(SEL &&) {}
+// CHECK-LABEL: "\01?n@@YAX$$QAPAUobjc_selector@@@Z"
+
 struct __declspec(dllexport) s {
   struct s &operator=(const struct s &) = delete;
 
@@ -93,16 +117,16 @@
   // CHECK-LABEL: "\01?m@s@@QAAX$$QAPAUobjc_object@@@Z"
 
   void m(const id &) {}
-  // CHECK-LABEL: "\01?m@s@@QAAXABPAUobjc_object@@@Z"
+  // CHECK-LABEL: "\01?m@s@@QAAXABQAUobjc_object@@@Z"
 
   void m(const id &&) {}
-  // CHECK-LABEL: "\01?m@s@@QAAX$$QBPAUobjc_object@@@Z"
+  // CHECK-LABEL: "\01?m@s@@QAAX$$QBQAUobjc_object@@@Z"
 
   void m(Class *) {}
   // CHECK-LABEL: "\01?m@s@@QAAXPAPAUobjc_class@@@Z"
 
   void m(const Class *) {}
-  // CHECK-LABEL: "\01?m@s@@QAAXPBPAUobjc_class@@@Z"
+  // CHECK-LABEL: "\01?m@s@@QAAXPBQAUobjc_class@@@Z"
 
   void m(Class) {}
   // CHECK-LABEL: "\01?m@s@@QAAXPAUobjc_class@@@Z"
@@ -111,12 +135,58 @@
   // CHECK-LABEL: "\01?m@s@@QAAXAAPAUobjc_class@@@Z"
 
   void m(const Class &) {}
-  // CHECK-LABEL: "\01?m@s@@QAAXABPAUobjc_class@@@Z"
+  // CHECK-LABEL: "\01?m@s@@QAAXABQAUobjc_class@@@Z"
 
   void m(Class &&) {}
   // CHECK-LABEL: "\01?m@s@@QAAX$$QAPAUobjc_class@@@Z"
 
   void m(const Class &&) {}
-  // CHECK-LABEL: "\01?m@s@@QAAX$$QBPAUobjc_class@@@Z"
+  // CHECK-LABEL: "\01?m@s@@QAAX$$QBQAUobjc_class@@@Z"
+
+  void m(SEL) {}
+  // CHECK-LABEL: "\01?m@s@@QAAXPAUobjc_selector@@@Z"
+
+  void m(SEL *) {}
+  // CHECK-LABEL: "\01?m@s@@QAAXPAPAUobjc_selector@@@Z"
+
+  void m(const SEL *) {}
+  // CHECK-LABEL: "\01?m@s@@QAAXPBQAUobjc_selector@@@Z"
+
+  void m(SEL &) {}
+  // CHECK-LABEL: "\01?m@s@@QAAXAAPAUobjc_selector@@@Z"
+
+  void m(const SEL &) {}
+  // CHECK-LABEL: "\01?m@s@@QAAXABQAUobjc_selector@@@Z"
+
+  void m(SEL &&) {}
+  // CHECK-LABEL: "\01?m@s@@QAAX$$QAPAUobjc_selector@@@Z"
+
+  void m(const SEL &&) {}
+  // CHECK-LABEL: "\01?m@s@@QAAX$$QBQAUobjc_selector@@@Z"
 };
 
+template <typename T>
+struct remove_pointer { typedef T type; };
+
+template <typename T>
+struct remove_pointer<T *> {
+  typedef T type;
+};
+
+template <typename T>
+struct t {
+  t() {}
+};
+
+template struct t<id>;
+// CHECK-LABEL: "\01??0?$t@PAUobjc_object@@@@QAA@XZ"
+
+template struct t<remove_pointer<id>::type>;
+// CHECK-LABEL: "\01??0?$t@Uobjc_object@@@@QAA@XZ"
+
+template struct t<SEL>;
+// CHECK-LABEL: "\01??0?$t@PAUobjc_selector@@@@QAA@XZ"
+
+template struct t<remove_pointer<SEL>::type>;
+// CHECK-LABEL: "\01??0?$t@Uobjc_selector@@@@QAA@XZ"
+
diff --git a/test/CodeGenObjCXX/msabi-stret.mm b/test/CodeGenObjCXX/msabi-stret.mm
new file mode 100644
index 0000000..765c238
--- /dev/null
+++ b/test/CodeGenObjCXX/msabi-stret.mm
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple i686-unknown-windows-msvc -fobjc-runtime=ios-6.0 -Os -S -emit-llvm -o - %s -mdisable-fp-elim | FileCheck %s
+
+struct S {
+  S() = default;
+  S(const S &) {}
+};
+
+@interface I
++ (S)m:(S)s;
+@end
+
+S f() {
+  return [I m:S()];
+}
+
+// CHECK: declare dllimport void @objc_msgSend_stret(i8*, i8*, ...)
+// CHECK-NOT: declare dllimport void @objc_msgSend(i8*, i8*, ...)
+
diff --git a/test/CodeGenObjCXX/trivial_abi.mm b/test/CodeGenObjCXX/trivial_abi.mm
new file mode 100644
index 0000000..6a20578d
--- /dev/null
+++ b/test/CodeGenObjCXX/trivial_abi.mm
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fobjc-arc  -fobjc-weak -fobjc-runtime-has-weak -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fobjc-arc  -fobjc-weak -fobjc-runtime-has-weak -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: %[[STRUCT_STRONGWEAK:.*]] = type { i8*, i8* }
+// CHECK: %[[STRUCT_STRONG:.*]] = type { i8* }
+// CHECK: %[[STRUCT_S:.*]] = type { i8* }
+
+struct __attribute__((trivial_abi)) StrongWeak {
+  id fstrong;
+  __weak id fweak;
+};
+
+struct __attribute__((trivial_abi)) Strong {
+  id fstrong;
+};
+
+template<class T>
+struct __attribute__((trivial_abi)) S {
+  T a;
+};
+
+// CHECK: define void @_Z19testParamStrongWeak10StrongWeak(%[[STRUCT_STRONGWEAK]]* %{{.*}})
+// CHECK-NOT: call
+// CHECK: ret void
+
+void testParamStrongWeak(StrongWeak a) {
+}
+
+// CHECK: define void @_Z18testCallStrongWeakP10StrongWeak(%[[STRUCT_STRONGWEAK]]* %[[A:.*]])
+// CHECK: %[[A_ADDR:.*]] = alloca %[[STRUCT_STRONGWEAK]]*, align 8
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_STRONGWEAK]], align 8
+// CHECK: store %[[STRUCT_STRONGWEAK]]* %[[A]], %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load %[[STRUCT_STRONGWEAK]]*, %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_STRONGWEAK]]* @_ZN10StrongWeakC1ERKS_(%[[STRUCT_STRONGWEAK]]* %[[AGG_TMP]], %[[STRUCT_STRONGWEAK]]* dereferenceable(16) %[[V0]])
+// CHECK: call void @_Z19testParamStrongWeak10StrongWeak(%[[STRUCT_STRONGWEAK]]* %[[AGG_TMP]])
+// CHECK: %[[CALL1:.*]] = call %[[STRUCT_STRONGWEAK]]* @_ZN10StrongWeakD1Ev(%[[STRUCT_STRONGWEAK]]* %[[AGG_TMP]])
+// CHECK: ret void
+
+void testCallStrongWeak(StrongWeak *a) {
+  testParamStrongWeak(*a);
+}
+
+// CHECK: define void @_Z20testReturnStrongWeakP10StrongWeak(%[[STRUCT_STRONGWEAK:.*]]* noalias sret %[[AGG_RESULT:.*]], %[[STRUCT_STRONGWEAK]]* %[[A:.*]])
+// CHECK: %[[A_ADDR:.*]] = alloca %[[STRUCT_STRONGWEAK]]*, align 8
+// CHECK: store %[[STRUCT_STRONGWEAK]]* %[[A]], %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load %[[STRUCT_STRONGWEAK]]*, %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_STRONGWEAK]]* @_ZN10StrongWeakC1ERKS_(%[[STRUCT_STRONGWEAK]]* %[[AGG_RESULT]], %[[STRUCT_STRONGWEAK]]* dereferenceable(16) %[[V0]])
+// CHECK: ret void
+
+StrongWeak testReturnStrongWeak(StrongWeak *a) {
+  return *a;
+}
+
+// CHECK: define void @_Z15testParamStrong6Strong(i64 %[[A_COERCE:.*]])
+// CHECK: %[[A:.*]] = alloca %[[STRUCT_STRONG]], align 8
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[A]], i32 0, i32 0
+// CHECK: %[[COERCE_VAL_IP:.*]] = inttoptr i64 %[[A_COERCE]] to i8*
+// CHECK: store i8* %[[COERCE_VAL_IP]], i8** %[[COERCE_DIVE]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_STRONG]]* @_ZN6StrongD1Ev(%[[STRUCT_STRONG]]* %[[A]])
+// CHECK: ret void
+
+// CHECK: define linkonce_odr %[[STRUCT_STRONG]]* @_ZN6StrongD1Ev(
+
+void testParamStrong(Strong a) {
+}
+
+// CHECK: define void @_Z14testCallStrongP6Strong(%[[STRUCT_STRONG]]* %[[A:.*]])
+// CHECK: %[[A_ADDR:.*]] = alloca %[[STRUCT_STRONG]]*, align 8
+// CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_STRONG]], align 8
+// CHECK: store %[[STRUCT_STRONG]]* %[[A]], %[[STRUCT_STRONG]]** %[[A_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load %[[STRUCT_STRONG]]*, %[[STRUCT_STRONG]]** %[[A_ADDR]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_STRONG]]* @_ZN6StrongC1ERKS_(%[[STRUCT_STRONG]]* %[[AGG_TMP]], %[[STRUCT_STRONG]]* dereferenceable(8) %[[V0]])
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[AGG_TMP]], i32 0, i32 0
+// CHECK: %[[V1:.*]] = load i8*, i8** %[[COERCE_DIVE]], align 8
+// CHECK: %[[COERCE_VAL_PI:.*]] = ptrtoint i8* %[[V1]] to i64
+// CHECK: call void @_Z15testParamStrong6Strong(i64 %[[COERCE_VAL_PI]])
+// CHECK: ret void
+
+void testCallStrong(Strong *a) {
+  testParamStrong(*a);
+}
+
+// CHECK: define i64 @_Z16testReturnStrongP6Strong(%[[STRUCT_STRONG]]* %[[A:.*]])
+// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_STRONG]], align 8
+// CHECK: %[[A_ADDR:.*]] = alloca %[[STRUCT_STRONG]]*, align 8
+// CHECK: store %[[STRUCT_STRONG]]* %[[A]], %[[STRUCT_STRONG]]** %[[A_ADDR]], align 8
+// CHECK: %[[V0:.*]] = load %[[STRUCT_STRONG]]*, %[[STRUCT_STRONG]]** %[[A_ADDR]], align 8
+// CHECK: %[[CALL:.*]] = call %[[STRUCT_STRONG]]* @_ZN6StrongC1ERKS_(%[[STRUCT_STRONG]]* %[[RETVAL]], %[[STRUCT_STRONG]]* dereferenceable(8) %[[V0]])
+// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[RETVAL]], i32 0, i32 0
+// CHECK: %[[V1:.*]] = load i8*, i8** %[[COERCE_DIVE]], align 8
+// CHECK: %[[COERCE_VAL_PI:.*]] = ptrtoint i8* %[[V1]] to i64
+// CHECK: ret i64 %[[COERCE_VAL_PI]]
+
+Strong testReturnStrong(Strong *a) {
+  return *a;
+}
+
+// CHECK: define void @_Z21testParamWeakTemplate1SIU6__weakP11objc_objectE(%[[STRUCT_S]]* %{{.*}})
+// CHECK-NOT: call
+// CHECK: ret void
+
+void testParamWeakTemplate(S<__weak id> a) {
+}
diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 68fc803..01ae1b3 100644
--- a/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -enable-var-scope -check-prefixes=COM,AMD %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
 
 typedef struct {
   int cells[9];
@@ -37,7 +37,7 @@
 
 
 // X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
-// AMD-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
+// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
   return out;
@@ -49,15 +49,15 @@
 // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
 // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
 
-// AMD: load [9 x i32], [9 x i32] addrspace(1)*
-// AMD: call %struct.Mat4X4 @foo([9 x i32]
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
+// AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
 // X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMD-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
   Mat64X64 out;
   return out;
@@ -68,66 +68,66 @@
 // the return value.
 // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
 // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
-// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
   out[0] = foo_large(in[1]);
 }
 
-// AMD-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
+// AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
 void FuncOneMember(struct StructOneMember u) {
   u.x = (int2)(0, 0);
 }
 
-// AMD-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
 void FuncOneLargeMember(struct LargeStructOneMember u) {
   u.x[0] = (int2)(0, 0);
 }
 
-// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
-// AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
-// AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
-// AMD:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
-// AMD:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
-// AMD:  call void @FuncOneMember(<2 x i32>
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMDGCN-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMDGCN:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMDGCN:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
+// AMDGCN:  call void @FuncOneMember(<2 x i32>
 kernel void KernelOneMember(struct StructOneMember u) {
   FuncOneMember(u);
 }
 
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
-// AMD:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
-// AMD:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMD:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
+// AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
+// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
   FuncOneLargeMember(u);
 }
 
-// AMD-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
+// AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
 void FuncTwoMember(struct StructTwoMember u) {
   u.y = (int2)(0, 0);
 }
 
-// AMD-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
   u.y[0] = (int2)(0, 0);
 }
 
 
-// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
-// AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
-// AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
-// AMD: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMDGCN-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
 kernel void KernelTwoMember(struct StructTwoMember u) {
   FuncTwoMember(u);
 }
 
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
-// AMD-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
-// AMD:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
-// AMD:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMD:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
+// AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
+// AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
+// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
   FuncLargeTwoMember(u);
 }
diff --git a/test/CodeGenOpenCL/address-space-constant-initializers.cl b/test/CodeGenOpenCL/address-space-constant-initializers.cl
index a03d939..da971cf 100644
--- a/test/CodeGenOpenCL/address-space-constant-initializers.cl
+++ b/test/CodeGenOpenCL/address-space-constant-initializers.cl
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck -check-prefix=FAKE %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck -check-prefix=AMDGCN %s
 
 typedef struct {
     int i;
@@ -13,10 +12,22 @@
     __constant float* constant_float_ptr;
 } ConstantArrayPointerStruct;
 
-// CHECK: %struct.ConstantArrayPointerStruct = type { float addrspace(2)* }
-// CHECK: addrspace(2) constant %struct.ConstantArrayPointerStruct { float addrspace(2)* bitcast (i8 addrspace(2)* getelementptr (i8, i8 addrspace(2)* bitcast (%struct.ArrayStruct addrspace(2)* @constant_array_struct to i8 addrspace(2)*), i64 4) to float addrspace(2)*) }
+// FAKE: %struct.ConstantArrayPointerStruct = type { float addrspace(2)* }
+// FAKE: addrspace(2) constant %struct.ConstantArrayPointerStruct { float addrspace(2)* bitcast (i8 addrspace(2)* getelementptr (i8, i8 addrspace(2)* bitcast (%struct.ArrayStruct addrspace(2)* @constant_array_struct to i8 addrspace(2)*), i64 4) to float addrspace(2)*) }
+// AMDGCN: %struct.ConstantArrayPointerStruct = type { float addrspace(4)* }
+// AMDGCN: addrspace(4) constant %struct.ConstantArrayPointerStruct { float addrspace(4)* bitcast (i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* bitcast (%struct.ArrayStruct addrspace(4)* @constant_array_struct to i8 addrspace(4)*), i64 4) to float addrspace(4)*) }
 // Bug  18567
 __constant ConstantArrayPointerStruct constant_array_pointer_struct = {
     &constant_array_struct.f
 };
 
+__kernel void initializer_cast_is_valid_crash()
+{
+  unsigned char v512[64] = {
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+  };
+
+}
diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl
index cb641f5..60f5e30 100644
--- a/test/CodeGenOpenCL/address-spaces.cl
+++ b/test/CodeGenOpenCL/address-spaces.cl
@@ -1,11 +1,9 @@
 // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
 // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-opencl -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,GIZ
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-amdgizcl -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20GIZ
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
 
 // SPIR: %struct.S = type { i32, i32, i32* }
 // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* }
@@ -15,18 +13,18 @@
   int *z;
 };
 
-// CL20-DAG: @g_extern_var = external addrspace(1) global float
-// CL20-DAG: @l_extern_var = external addrspace(1) global float
+// CL20-DAG: @g_extern_var = external {{(dso_local )?}}addrspace(1) global float
+// CL20-DAG: @l_extern_var = external {{(dso_local )?}}addrspace(1) global float
 // CL20-DAG: @test_static.l_static_var = internal addrspace(1) global float 0.000000e+00
 // CL20-DAG: @g_static_var = internal addrspace(1) global float 0.000000e+00
 
 #ifdef CL20
-// CL20-DAG: @g_s = common addrspace(1) global %struct.S zeroinitializer
+// CL20-DAG: @g_s = common {{(dso_local )?}}addrspace(1) global %struct.S zeroinitializer
 struct S g_s;
 #endif
 
 // SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
 void f__p(__private int *arg) {}
 
 // CHECK: i32 addrspace(1)* %arg
@@ -35,11 +33,12 @@
 // CHECK: i32 addrspace(3)* %arg
 void f__l(__local int *arg) {}
 
-// CHECK: i32 addrspace(2)* %arg
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
 void f__c(__constant int *arg) {}
 
 // SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
 void fp(private int *arg) {}
 
 // CHECK: i32 addrspace(1)* %arg
@@ -48,28 +47,29 @@
 // CHECK: i32 addrspace(3)* %arg
 void fl(local int *arg) {}
 
-// CHECK: i32 addrspace(2)* %arg
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
 void fc(constant int *arg) {}
 
 #ifdef CL20
 int i;
-// CL20-DAG: @i = common addrspace(1) global i32 0
+// CL20-DAG: @i = common {{(dso_local )?}}addrspace(1) global i32 0
 int *ptr;
-// CL20SPIR-DAG: @ptr = common addrspace(1) global i32 addrspace(4)* null
-// CL20GIZ-DAG: @ptr = common addrspace(1) global i32* null
+// CL20SPIR-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32 addrspace(4)* null
+// CL20AMDGCN-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32* null
 #endif
 
 // SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
 // CL20SPIR-DAG: i32 addrspace(4)* %arg
-// CL20GIZ-DAG: i32* %arg
+// CL20AMDGCN-DAG: i32* %arg
 void f(int *arg) {
 
   int i;
 // SPIR: %i = alloca i32,
-// GIZ: %i = alloca i32{{.*}}addrspace(5)
+// AMDGCN: %i = alloca i32{{.*}}addrspace(5)
 // CL20SPIR-DAG: %i = alloca i32,
-// CL20GIZ-DAG: %i = alloca i32{{.*}}addrspace(5)
+// CL20AMDGCN-DAG: %i = alloca i32{{.*}}addrspace(5)
 
 #ifdef CL20
   static int ii;
@@ -79,13 +79,13 @@
 
 typedef int int_td;
 typedef int *intp_td;
-// SPIR: define void @test_typedef(i32 addrspace(1)* %x, i32 addrspace(2)* %y, i32* %z)
+// SPIR: define {{(dso_local )?}}void @test_typedef(i32 addrspace(1)* %x, i32 addrspace(2)* %y, i32* %z)
 void test_typedef(global int_td *x, constant int_td *y, intp_td z) {
   *x = *y;
   *z = 0;
 }
 
-// SPIR: define void @test_struct()
+// SPIR: define {{(dso_local )?}}void @test_struct()
 void test_struct() {
   // SPIR: %ps = alloca %struct.S*
   // CL20SPIR: %ps = alloca %struct.S addrspace(4)*
@@ -99,7 +99,7 @@
 #endif
 }
 
-// SPIR-LABEL: define void @test_void_par()
+// SPIR-LABEL: define {{(dso_local )?}}void @test_void_par()
 void test_void_par(void) {}
 
 // On ppc64 returns signext i32.
diff --git a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index 19287c7..59f38f8 100644
--- a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -58,3 +58,11 @@
   const int lvc = 4;
   lv1 = lvc;
 }
+
+// CHECK-LABEL: define void @func3()
+// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
+// CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 addrspace(5)*
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 4 %[[CAST]], i8 0, i64 64, i1 false)
+void func3(void) {
+  float a[16][1] = {{0.}};
+}
diff --git a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a9d74ab..aec00e7 100644
--- a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -404,14 +404,14 @@
   return s;
 }
 
-// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
 struct_arr32 func_ret_struct_arr32()
 {
   struct_arr32 s = { 0 };
   return s;
 }
 
-// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
 struct_arr33 func_ret_struct_arr33()
 {
   struct_arr33 s = { 0 };
@@ -425,7 +425,7 @@
   return s;
 }
 
-// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 {
+// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
 // CHECK: ret i32 0
 transparent_u func_transparent_union_ret()
 {
@@ -440,7 +440,7 @@
   return s;
 }
 
-// CHECK: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result)
+// CHECK: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
 flexible_array func_flexible_array_ret()
 {
   flexible_array s = { 0 };
@@ -450,11 +450,11 @@
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -479,7 +479,7 @@
 
 // Function signature from blender, nothing should be passed byval. The v3i32
 // should not count as 4 passed registers.
-// CHECK: define void @v3i32_pair_reg_count(%struct.int3_pair* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
+// CHECK: define void @v3i32_pair_reg_count(%struct.int3_pair addrspace(5)* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
 void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
 
 // Each short4 should fit pack into 2 registers.
@@ -487,7 +487,7 @@
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/test/CodeGenOpenCL/amdgpu-alignment.cl b/test/CodeGenOpenCL/amdgpu-alignment.cl
index 70a22c9..b5dc47a 100644
--- a/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -336,91 +336,91 @@
 }
 
 // CHECK-LABEL: @private_memory_alignment_alloca(
-// CHECK: %private_i8 = alloca [4 x i8], align 1
-// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2
-// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4
-// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4
-// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8
-// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16
-// CHECK: %private_i16 = alloca [4 x i16], align 2
-// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4
-// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8
-// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8
-// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16
-// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32
-// CHECK: %private_i32 = alloca [4 x i32], align 4
-// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8
-// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16
-// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16
-// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32
-// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64
-// CHECK: %private_i64 = alloca [4 x i64], align 8
-// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16
-// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32
-// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32
-// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64
-// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128
-// CHECK: %private_f16 = alloca [4 x half], align 2
-// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4
-// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8
-// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8
-// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16
-// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32
-// CHECK: %private_f32 = alloca [4 x float], align 4
-// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8
-// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16
-// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16
-// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32
-// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64
-// CHECK: %private_f64 = alloca [4 x double], align 8
-// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16
-// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32
-// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32
-// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64
-// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128
+// CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5)
+// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5)
+// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5)
+// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5)
+// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5)
+// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5)
+// CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5)
+// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5)
+// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5)
+// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5)
+// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5)
+// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5)
+// CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5)
+// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5)
+// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5)
+// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5)
+// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5)
+// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5)
+// CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5)
+// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5)
+// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5)
+// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5)
+// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5)
+// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5)
+// CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5)
+// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5)
+// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5)
+// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5)
+// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5)
+// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5)
+// CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5)
+// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5)
+// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5)
+// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5)
+// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5)
+// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5)
+// CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5)
+// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5)
+// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5)
+// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5)
+// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5)
+// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5)
 
-// CHECK: store volatile i8 0, i8* %arraydecay, align 1
-// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8>* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8>* %storetmp, align 4
-// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile i16 0, i16* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16>* %storetmp{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile i32 0, i32* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %storetmp16, align 16
-// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile i64 0, i64* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64>* %storetmp23, align 32
-// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64>* %arraydecay{{[0-9]+}}, align 128
-// CHECK: store volatile half 0xH0000, half* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <2 x half> zeroinitializer, <2 x half>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half>* %storetmp{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x half> zeroinitializer, <4 x half>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <8 x half> zeroinitializer, <8 x half>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <16 x half> zeroinitializer, <16 x half>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile float 0.000000e+00, float* %arraydecay34, align 4
-// CHECK: store volatile <2 x float> zeroinitializer, <2 x float>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float>* %storetmp{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x float> zeroinitializer, <4 x float>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <8 x float> zeroinitializer, <8 x float>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <16 x float> zeroinitializer, <16 x float>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile double 0.000000e+00, double* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <2 x double> zeroinitializer, <2 x double>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double>* %storetmp{{[0-9]+}}, align 32
-// CHECK: store volatile <4 x double> zeroinitializer, <4 x double>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <8 x double> zeroinitializer, <8 x double>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile <16 x double> zeroinitializer, <16 x double>* %arraydecay{{[0-9]+}}, align 128
+// CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
+// CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
 kernel void private_memory_alignment_alloca()
 {
   volatile private char private_i8[4];
diff --git a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
index 4d46b40..894611e 100644
--- a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -51,31 +51,31 @@
 
 kernel void kernel1(
     // CHECK-DAG: ![[KERNELARG0:[0-9]+]] = !DILocalVariable(name: "KernelArg0", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
     global int *KernelArg0,
     // CHECK-DAG: ![[KERNELARG1:[0-9]+]] = !DILocalVariable(name: "KernelArg1", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
     constant int *KernelArg1,
     // CHECK-DAG: ![[KERNELARG2:[0-9]+]] = !DILocalVariable(name: "KernelArg2", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
     local int *KernelArg2) {
   private int *Tmp0;
   int *Tmp1;
 
   // CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = !DILocalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   global int *FuncVar0 = KernelArg0;
   // CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   constant int *FuncVar1 = KernelArg1;
   // CHECK-DAG: ![[FUNCVAR2:[0-9]+]] = !DILocalVariable(name: "FuncVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   local int *FuncVar2 = KernelArg2;
   // CHECK-DAG: ![[FUNCVAR3:[0-9]+]] = !DILocalVariable(name: "FuncVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   private int *FuncVar3 = Tmp0;
   // CHECK-DAG: ![[FUNCVAR4:[0-9]+]] = !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   int *FuncVar4 = Tmp1;
 
   // CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
@@ -111,18 +111,18 @@
   int *local FuncVar14; FuncVar14 = Tmp1;
 
   // CHECK-DAG: ![[FUNCVAR15:[0-9]+]] = !DILocalVariable(name: "FuncVar15", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   global int *private FuncVar15 = KernelArg0;
   // CHECK-DAG: ![[FUNCVAR16:[0-9]+]] = !DILocalVariable(name: "FuncVar16", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   constant int *private FuncVar16 = KernelArg1;
   // CHECK-DAG: ![[FUNCVAR17:[0-9]+]] = !DILocalVariable(name: "FuncVar17", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   local int *private FuncVar17 = KernelArg2;
   // CHECK-DAG: ![[FUNCVAR18:[0-9]+]] = !DILocalVariable(name: "FuncVar18", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   private int *private FuncVar18 = Tmp0;
   // CHECK-DAG: ![[FUNCVAR19:[0-9]+]] = !DILocalVariable(name: "FuncVar19", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
   int *private FuncVar19 = Tmp1;
 }
diff --git a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index b2db4d7..875c35c 100644
--- a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -2,6 +2,10 @@
 
 typedef struct {int a;} ndrange_t;
 
+void callee(long id, global long *out) {
+  out[id] = id;
+}
+
 // CHECK-LABEL: define amdgpu_kernel void @test
 kernel void test(global char *a, char b, global long *c, long d) {
   queue_t default_queue;
@@ -24,22 +28,31 @@
                  c[0] = d;
                  ((local int*)lp)[0] = 1;
                  }, 100);
+
+  void (^block)(void) = ^{
+    callee(d, c);
+  };
+
+  enqueue_kernel(default_queue, flags, ndrange, block);
 }
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>)
 // CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 // CHECK: entry:
-// CHECK:  %1 = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8
-// CHECK:  store <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %1, align 8
-// CHECK:  %2 = addrspacecast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %1 to i8 addrspace(4)*
-// CHECK:  call void @__test_block_invoke(i8 addrspace(4)* %2)
+// CHECK:  %1 = alloca <{ i32, i32, i8*, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
+// CHECK:  store <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
+// CHECK:  %2 = addrspacecast <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
+// CHECK:  call void @__test_block_invoke(i8* %2)
 // CHECK:  ret void
 // CHECK:}
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
 
 // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
new file mode 100644
index 0000000..f6c6f32
--- /dev/null
+++ b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+void foo(void) {}
diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl b/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
deleted file mode 100644
index 4cb8d95..0000000
--- a/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=GIZ %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=GIZ %s
-
-// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-// GIZ: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
-void foo(void) {}
-
diff --git a/test/CodeGenOpenCL/amdgpu-nullptr.cl b/test/CodeGenOpenCL/amdgpu-nullptr.cl
index 513d56c..13008c5 100644
--- a/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -21,48 +21,48 @@
 
 // Test 0 as initializer.
 
-// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
 private char *private_p = 0;
 
-// CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
 local char *local_p = 0;
 
 // CHECK: @global_p = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
 global char *global_p = 0;
 
-// CHECK: @constant_p = local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @constant_p = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
 constant char *constant_p = 0;
 
-// CHECK: @generic_p = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @generic_p = local_unnamed_addr addrspace(1) global i8* null, align 8
 generic char *generic_p = 0;
 
 // Test NULL as initializer.
 
-// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
 private char *private_p_NULL = NULL;
 
-// CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
 local char *local_p_NULL = NULL;
 
 // CHECK: @global_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
 global char *global_p_NULL = NULL;
 
-// CHECK: @constant_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @constant_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
 constant char *constant_p_NULL = NULL;
 
-// CHECK: @generic_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @generic_p_NULL = local_unnamed_addr addrspace(1) global i8* null, align 8
 generic char *generic_p_NULL = NULL;
 
 // Test constant folding of null pointer.
 // A null pointer should be folded to a null pointer in the target address space.
 
-// CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32 addrspace(4)* null, align 8
+// CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32* null, align 8
 generic int *fold_generic = (global int*)(generic float*)(private char*)0;
 
-// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16* null, align 4
+// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 addrspace(5)* null, align 4
 private short *fold_priv = (private short*)(generic int*)(global void*)0;
 
-// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8* inttoptr (i32 10 to i8*), align 4
+// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 addrspace(5)* inttoptr (i32 10 to i8 addrspace(5)*), align 4
 private char *fold_priv_arith = (private char*)0 + 10;
 
 // CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 14, align 4
@@ -99,12 +99,12 @@
 
 // Test static variable initialization.
 
-// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
 // NOOPT: @test_static_var_private.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
 
 void test_static_var_private(void) {
@@ -118,12 +118,12 @@
   static StructTy2 SS2;
 }
 
-// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
-// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
-// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
 // NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global i8 addrspace(3)* null, align 4
 // NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global i8 addrspace(3)* null, align 4
-// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
 // NOOPT: @test_static_var_local.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
 void test_static_var_local(void) {
   static local char *sp1 = 0;
@@ -138,14 +138,14 @@
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_private(
-// NOOPT: store i8* null, i8** %sp1, align 4
-// NOOPT: store i8* null, i8** %sp2, align 4
-// NOOPT: store i8* null, i8** %sp3, align 4
-// NOOPT: store i8* null, i8** %sp4, align 4
-// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8*
-// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_private.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false)
-// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8*
-// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false)
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_private.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
 void test_func_scope_var_private(void) {
   private char *sp1 = 0;
   private char *sp2 = NULL;
@@ -158,14 +158,14 @@
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_local(
-// NOOPT: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)** %sp1, align 4
-// NOOPT: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)** %sp2, align 4
-// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp3, align 4
-// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp4, align 4
-// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8*
-// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_local.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false)
-// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8*
-// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false)
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_local.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
 void test_func_scope_var_local(void) {
   local char *sp1 = 0;
   local char *sp2 = NULL;
@@ -183,31 +183,31 @@
 // cannot have common linkage since common linkage requires zero initialization
 // and does not have explicit section.
 
-// CHECK: @p1 = common local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @p1 = common local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
 private char *p1;
 
-// CHECK: @p2 = weak local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @p2 = weak local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
 local char *p2;
 
-// CHECK: @p3 = common local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @p3 = common local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
 constant char *p3;
 
 // CHECK: @p4 = common local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
 global char *p4;
 
-// CHECK: @p5 = common local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @p5 = common local_unnamed_addr addrspace(1) global i8* null, align 8
 generic char *p5;
 
 // Test default initialization of sturcture.
 
-// CHECK: @S1 = weak local_unnamed_addr addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// CHECK: @S1 = weak local_unnamed_addr addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
 StructTy1 S1;
 
 // CHECK: @S2 = common local_unnamed_addr addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
 StructTy2 S2;
 
 // Test default initialization of array.
-// CHECK: @A1 = weak local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }], align 8
+// CHECK: @A1 = weak local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }], align 8
 StructTy1 A1[2];
 
 // CHECK: @A2 = common local_unnamed_addr addrspace(1) global [2 x %struct.StructTy2] zeroinitializer, align 8
@@ -216,14 +216,14 @@
 // Test comparison with 0.
 
 // CHECK-LABEL: cmp_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
 void cmp_private(private char* p) {
   if (p != 0)
     *p = 0;
 }
 
 // CHECK-LABEL: cmp_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
 void cmp_local(local char* p) {
   if (p != 0)
     *p = 0;
@@ -237,7 +237,7 @@
 }
 
 // CHECK-LABEL: cmp_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
 char cmp_constant(constant char* p) {
   if (p != 0)
     return *p;
@@ -246,7 +246,7 @@
 }
 
 // CHECK-LABEL: cmp_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
 void cmp_generic(generic char* p) {
   if (p != 0)
     *p = 0;
@@ -255,14 +255,14 @@
 // Test comparison with NULL.
 
 // CHECK-LABEL: cmp_NULL_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
 void cmp_NULL_private(private char* p) {
   if (p != NULL)
     *p = 0;
 }
 
 // CHECK-LABEL: cmp_NULL_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
 void cmp_NULL_local(local char* p) {
   if (p != NULL)
     *p = 0;
@@ -276,7 +276,7 @@
 }
 
 // CHECK-LABEL: cmp_NULL_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
 char cmp_NULL_constant(constant char* p) {
   if (p != NULL)
     return *p;
@@ -285,7 +285,7 @@
 }
 
 // CHECK-LABEL: cmp_NULL_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
 void cmp_NULL_generic(generic char* p) {
   if (p != NULL)
     *p = 0;
@@ -293,11 +293,11 @@
 
 // Test storage 0 as null pointer.
 // CHECK-LABEL: test_storage_null_pointer
-// CHECK: store i8* null, i8* addrspace(4)* %arg_private
-// CHECK: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(4)* %arg_local
-// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(4)* %arg_global
-// CHECK: store i8 addrspace(2)* null, i8 addrspace(2)* addrspace(4)* %arg_constant
-// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)* addrspace(4)* %arg_generic
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
 void test_storage_null_pointer(private char** arg_private,
                                local char** arg_local,
                                global char** arg_global,
@@ -312,11 +312,11 @@
 
 // Test storage NULL as null pointer.
 // CHECK-LABEL: test_storage_null_pointer_NULL
-// CHECK: store i8* null, i8* addrspace(4)* %arg_private
-// CHECK: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(4)* %arg_local
-// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(4)* %arg_global
-// CHECK: store i8 addrspace(2)* null, i8 addrspace(2)* addrspace(4)* %arg_constant
-// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)* addrspace(4)* %arg_generic
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
 void test_storage_null_pointer_NULL(private char** arg_private,
                                     local char** arg_local,
                                     global char** arg_global,
@@ -337,8 +337,8 @@
                                       generic char* arg_generic);
 
 // CHECK-LABEL: test_pass_null_pointer_arg
-// CHECK: call void @test_pass_null_pointer_arg_calee(i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(2)* null, i8 addrspace(4)* null)
-// CHECK: call void @test_pass_null_pointer_arg_calee(i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(2)* null, i8 addrspace(4)* null)
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
 void test_pass_null_pointer_arg(void) {
   test_pass_null_pointer_arg_calee(0, 0, 0, 0, 0);
   test_pass_null_pointer_arg_calee(NULL, NULL, NULL, NULL, NULL);
@@ -352,8 +352,8 @@
                                            size_t arg_generic);
 
 // CHECK-LABEL: test_cast_null_pointer_to_sizet
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
 void test_cast_null_pointer_to_sizet(void) {
   test_cast_null_pointer_to_sizet_calee((size_t)((private char*)0),
                                         (size_t)((local char*)0),
@@ -465,14 +465,14 @@
 // Test cast to bool.
 
 // CHECK-LABEL: cast_bool_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
 void cast_bool_private(private char* p) {
   if (p)
     *p = 0;
 }
 
 // CHECK-LABEL: cast_bool_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
 void cast_bool_local(local char* p) {
   if (p)
     *p = 0;
@@ -486,7 +486,7 @@
 }
 
 // CHECK-LABEL: cast_bool_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
 char cast_bool_constant(constant char* p) {
   if (p)
     return *p;
@@ -495,7 +495,7 @@
 }
 
 // CHECK-LABEL: cast_bool_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
 void cast_bool_generic(generic char* p) {
   if (p)
     *p = 0;
@@ -510,7 +510,7 @@
 } StructTy3;
 
 // CHECK-LABEL: test_memset_private
-// CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull {{.*}}, i8 0, i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 {{.*}}, i8 0, i64 40, i1 false)
 void test_memset_private(private StructTy3 *ptr) {
   StructTy3 S3 = {0, 0, 0, 0, 0};
   *ptr = S3;
@@ -520,13 +520,13 @@
 // A 0 literal casted to pointer should become a null pointer.
 
 // CHECK-LABEL: test_cast_0_to_local_ptr
-// CHECK: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+// CHECK: ret i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
 local int* test_cast_0_to_local_ptr(void) {
   return (local int*)0;
 }
 
 // CHECK-LABEL: test_cast_0_to_private_ptr
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
 private int* test_cast_0_to_private_ptr(void) {
   return (private int*)0;
 }
@@ -536,7 +536,7 @@
 // zero value.
 
 // CHECK-LABEL: test_cast_int_to_ptr1_private
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
 private int* test_cast_int_to_ptr1_private(void) {
   return (private int*)((void)0, 0);
 }
@@ -548,7 +548,7 @@
 }
 
 // CHECK-LABEL: test_cast_int_to_ptr2
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
 private int* test_cast_int_to_ptr2(void) {
   int x = 0;
   return (private int*)x;
@@ -568,7 +568,7 @@
 }
 
 // CHECK-LABEL: test_not_private_ptr
-// CHECK: %[[lnot:.*]] = icmp eq i8* %p, null
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(5)* %p, null
 // CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
 // CHECK: ret i32 %[[lnot_ext]]
 int test_not_private_ptr(private char* p) {
@@ -576,7 +576,7 @@
 }
 
 // CHECK-LABEL: test_not_local_ptr
-// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
 // CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
 // CHECK: ret i32 %[[lnot_ext]]
 int test_not_local_ptr(local char* p) {
@@ -585,8 +585,8 @@
 
 
 // CHECK-LABEL: test_and_ptr
-// CHECK: %[[tobool:.*]] = icmp ne i8* %p1, null
-// CHECK: %[[tobool1:.*]] = icmp ne i8 addrspace(3)* %p2, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: %[[tobool:.*]] = icmp ne i8 addrspace(5)* %p1, null
+// CHECK: %[[tobool1:.*]] = icmp ne i8 addrspace(3)* %p2, addrspacecast (i8* null to i8 addrspace(3)*)
 // CHECK: %[[res:.*]] = and i1 %[[tobool]], %[[tobool1]]
 // CHECK: %[[land_ext:.*]] = zext i1 %[[res]] to i32
 // CHECK: ret i32 %[[land_ext]]
@@ -597,7 +597,7 @@
 // Test folding of null pointer in function scope.
 // NOOPT-LABEL: test_fold_private
 // NOOPT: call void @test_fold_callee
-// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)** %glob, align 8
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
 // NOOPT: %{{.*}} = add nsw i64 %{{.*}}, 0
@@ -612,10 +612,10 @@
 
 // NOOPT-LABEL: test_fold_local
 // NOOPT: call void @test_fold_callee
-// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)** %glob, align 8
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
-// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, sext (i32 ptrtoint (i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) to i32) to i64)
+// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, sext (i32 ptrtoint (i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) to i32) to i64)
 // NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
 void test_fold_local(void) {
   global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index 146d9dc..5e7a3db 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -1,12 +1,14 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa-opencl | FileCheck -check-prefixes=COMMON,AMD %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
 
-// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
+// SPIR: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
+// AMDGCN: %struct.__opencl_block_literal_generic = type { i32, i32, i8* }
 // SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
-// AMD: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
+// AMDGCN: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8* } { i32 16, i32 8, i8* bitcast (void (i8*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) }
 // COMMON-NOT: .str
 
-// COMMON-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
+// SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
+// AMDGCN-LABEL: define internal {{.*}}void @block_A_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %a)
 void (^block_A)(local void *) = ^(local void *a) {
   return;
 };
@@ -18,27 +20,44 @@
   // COMMON-NOT: %block.flags
   // COMMON-NOT: %block.reserved
   // COMMON-NOT: %block.descriptor
-  // COMMON: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0
+  // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0
+  // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 0
   // SPIR: store i32 16, i32* %[[block_size]]
-  // AMD: store i32 20, i32* %[[block_size]]
-  // COMMON: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1
+  // AMDGCN: store i32 20, i32 addrspace(5)* %[[block_size]]
+  // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1
+  // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 1
   // SPIR: store i32 4, i32* %[[block_align]]
-  // AMD: store i32 8, i32* %[[block_align]]
-  // COMMON: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2
-  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]]
-  // COMMON: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
-  // COMMON: %[[i_value:.*]] = load i32, i32* %i
-  // COMMON: store i32 %[[i_value]], i32* %[[block_captured]],
-  // COMMON: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
-  // COMMON: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
-  // COMMON: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
-  // COMMON: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
-  // COMMON: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
-  // COMMON: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
-  // COMMON: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
-  // COMMON: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
-  // COMMON: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
-  // COMMON: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+  // AMDGCN: store i32 8, i32 addrspace(5)* %[[block_align]]
+  // SPIR: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2
+  // SPIR: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]]
+  // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
+  // SPIR: %[[i_value:.*]] = load i32, i32* %i
+  // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
+  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
+  // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
+  // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
+  // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
+  // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+  // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
+  // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
+  // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
+  // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
+  // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+  // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
+  // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
+  // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
+  // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
+  // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
+  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
+  // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
+  // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
+  // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
+  // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
+  // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
+  // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
+  // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
+  // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
+  // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
 
   int (^ block_B)(void) = ^{
     return i;
@@ -46,9 +65,13 @@
   block_B();
 }
 
-// COMMON-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
-// COMMON:  %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)*
-// COMMON:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3
-// COMMON:  %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
+// SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
+// SPIR:  %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)*
+// SPIR:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3
+// SPIR:  %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
+// AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor)
+// AMDGCN:  %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i8*, i32 }>*
+// AMDGCN:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }>* %[[block]], i32 0, i32 3
+// AMDGCN:  %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]]
 
 // COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 1dad674..caa76e2 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,20 @@
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fadd(__attribute__((address_space(3))) float *out, float src) {
+  *out = __builtin_amdgcn_ds_fadd(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmin(__attribute__((address_space(3))) float *out, float src) {
+  *out = __builtin_amdgcn_ds_fmin(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmax(__attribute__((address_space(3))) float *out, float src) {
+  *out = __builtin_amdgcn_ds_fmax(out, src, 0, 0, false);
+}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl
index 9f03654..2015f36 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -434,22 +434,22 @@
 }
 
 // CHECK-LABEL: @test_dispatch_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out)
 {
   *out = __builtin_amdgcn_dispatch_ptr();
 }
 
 // CHECK-LABEL: @test_kernarg_segment_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
-void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out)
 {
   *out = __builtin_amdgcn_kernarg_segment_ptr();
 }
 
 // CHECK-LABEL: @test_implicitarg_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
-void test_implicitarg_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out)
 {
   *out = __builtin_amdgcn_implicitarg_ptr();
 }
diff --git a/test/CodeGenOpenCL/byval.cl b/test/CodeGenOpenCL/byval.cl
index 90afdfa..592d96f 100644
--- a/test/CodeGenOpenCL/byval.cl
+++ b/test/CodeGenOpenCL/byval.cl
@@ -1,6 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---opencl %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---amdgizcl %s | FileCheck %s -check-prefix=AMDGIZ
 
 struct A {
   int x[100];
@@ -10,10 +9,8 @@
 
 int g() {
   struct A a;
-  // CHECK: call i32 @f(%struct.A* byval{{.*}}%a)
-  // AMDGIZ: call i32 @f(%struct.A addrspace(5)* byval{{.*}}%a)
+  // CHECK: call i32 @f(%struct.A addrspace(5)* byval{{.*}}%a)
   return f(a);
 }
 
-// CHECK: declare i32 @f(%struct.A* byval{{.*}})
-// AMDGIZ: declare i32 @f(%struct.A addrspace(5)* byval{{.*}})
+// CHECK: declare i32 @f(%struct.A addrspace(5)* byval{{.*}})
diff --git a/test/CodeGenOpenCL/cast_image.cl b/test/CodeGenOpenCL/cast_image.cl
index 479404a..d4e24b4 100644
--- a/test/CodeGenOpenCL/cast_image.cl
+++ b/test/CodeGenOpenCL/cast_image.cl
@@ -4,7 +4,7 @@
 #ifdef __AMDGCN__
 
 constant int* convert(image2d_t img) {
-  // AMDGCN: bitcast %opencl.image2d_ro_t addrspace(2)* %img to i32 addrspace(2)*
+  // AMDGCN: bitcast %opencl.image2d_ro_t addrspace(4)* %img to i32 addrspace(4)*
   return __builtin_astype(img, constant int*);
 }
 
diff --git a/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/test/CodeGenOpenCL/cl-uniform-wg-size.cl
new file mode 100644
index 0000000..76ace5d
--- /dev/null
+++ b/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+
+kernel void ker() {};
+// CHECK: define{{.*}}@ker() #0
+
+void foo() {};
+// CHECK: define{{.*}}@foo() #1
+
+// CHECK-LABEL: attributes #0
+// CHECK-UNIFORM: "uniform-work-group-size"="true"
+// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
+
+// CHECK-LABEL: attributes #1
+// CHECK-NOT: uniform-work-group-size
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 0bf87c2..005bce7 100644
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -29,6 +29,10 @@
 // COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
 const bl_t block_G = (bl_t) ^ (local void *a) {};
 
+void callee(int id, __global int *out) {
+  out[id] = id;
+}
+
 // COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: %default_queue = alloca %opencl.queue_t*
@@ -282,6 +286,21 @@
   // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
+  void (^block_C)(void) = ^{
+    callee(i, a);
+  };
+
+  // Emits block literal on stack and block kernel [[INVLK3]].
+  // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+  enqueue_kernel(default_queue, flags, ndrange, block_C);
+
   // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
   // COMMON: call i32 @__get_kernel_work_group_size_impl(
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -333,6 +352,7 @@
 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
diff --git a/test/CodeGenOpenCL/convergent.cl b/test/CodeGenOpenCL/convergent.cl
index c46205b..a011920 100644
--- a/test/CodeGenOpenCL/convergent.cl
+++ b/test/CodeGenOpenCL/convergent.cl
@@ -126,9 +126,17 @@
 
 // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
 
+// CHECK-LABEL: @assume_convergent_asm
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
+kernel void assume_convergent_asm()
+{
+  __asm__ volatile("s_barrier");
+}
+
 // CHECK: attributes #0 = { noinline norecurse nounwind "
 // CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
 // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/test/CodeGenOpenCL/func-call-dbg-loc.cl b/test/CodeGenOpenCL/func-call-dbg-loc.cl
new file mode 100644
index 0000000..4ed082f
--- /dev/null
+++ b/test/CodeGenOpenCL/func-call-dbg-loc.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -debug-info-kind=limited -O0 -emit-llvm -o - %s | FileCheck %s
+
+typedef struct
+{
+    int a;
+} Struct;
+
+Struct func1();
+
+void func2(Struct S);
+
+void func3()
+{
+    // CHECK: call i32 @func1() #{{[0-9]+}}, !dbg ![[LOC:[0-9]+]]
+    // CHECK: call void @func2(i32 %{{[0-9]+}}) #{{[0-9]+}}, !dbg ![[LOC]]
+    func2(func1());
+}
+
diff --git a/test/CodeGenOpenCL/kernel-attributes.cl b/test/CodeGenOpenCL/kernel-attributes.cl
index eff2e57..c9ecb14 100644
--- a/test/CodeGenOpenCL/kernel-attributes.cl
+++ b/test/CodeGenOpenCL/kernel-attributes.cl
@@ -3,13 +3,13 @@
 typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
 
 kernel  __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(1,2,4))) void kernel1(int a) {}
-// CHECK: define spir_kernel void @kernel1(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD1:[0-9]+]] !reqd_work_group_size ![[MD2:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel1(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD1:[0-9]+]] !reqd_work_group_size ![[MD2:[0-9]+]]
 
 kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
-// CHECK: define spir_kernel void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
 
 kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
-// CHECK: define spir_kernel void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
 
 // CHECK: [[MD1]] = !{i32 undef, i32 1}
 // CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
diff --git a/test/CodeGenOpenCL/kernel-metadata.cl b/test/CodeGenOpenCL/kernel-metadata.cl
index 95be430..cdec97b 100644
--- a/test/CodeGenOpenCL/kernel-metadata.cl
+++ b/test/CodeGenOpenCL/kernel-metadata.cl
@@ -6,5 +6,5 @@
 __kernel void kernel_function() {
 }
 
-// CHECK: define spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
+// CHECK: define {{.*}}spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
 // CHECK: ![[MD]] = !{}
diff --git a/test/CodeGenOpenCL/lifetime.cl b/test/CodeGenOpenCL/lifetime.cl
index 430e058..ed9f32f 100644
--- a/test/CodeGenOpenCL/lifetime.cl
+++ b/test/CodeGenOpenCL/lifetime.cl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---amdgizcl %s | FileCheck %s -check-prefix=AMDGIZ
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s -check-prefix=AMDGCN
 
 void use(char *a);
 
@@ -10,6 +10,6 @@
 
 void lifetime_test() {
 // CHECK: @llvm.lifetime.start.p0i
-// AMDGIZ: @llvm.lifetime.start.p5i
+// AMDGCN: @llvm.lifetime.start.p5i
   helper_no_markers();
 }
diff --git a/test/CodeGenOpenCL/opencl_types.cl b/test/CodeGenOpenCL/opencl_types.cl
index 3501f9f..ee3c8d7 100644
--- a/test/CodeGenOpenCL/opencl_types.cl
+++ b/test/CodeGenOpenCL/opencl_types.cl
@@ -11,37 +11,39 @@
 
 void fnc1(image1d_t img) {}
 // CHECK-SPIR: @fnc1(%opencl.image1d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1(%opencl.image1d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1(%opencl.image1d_ro_t addrspace(4)*
 
 void fnc1arr(image1d_array_t img) {}
 // CHECK-SPIR: @fnc1arr(%opencl.image1d_array_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1arr(%opencl.image1d_array_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1arr(%opencl.image1d_array_ro_t addrspace(4)*
 
 void fnc1buff(image1d_buffer_t img) {}
 // CHECK-SPIR: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(4)*
 
 void fnc2(image2d_t img) {}
 // CHECK-SPIR: @fnc2(%opencl.image2d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc2(%opencl.image2d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc2(%opencl.image2d_ro_t addrspace(4)*
 
 void fnc2arr(image2d_array_t img) {}
 // CHECK-SPIR: @fnc2arr(%opencl.image2d_array_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc2arr(%opencl.image2d_array_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc2arr(%opencl.image2d_array_ro_t addrspace(4)*
 
 void fnc3(image3d_t img) {}
 // CHECK-SPIR: @fnc3(%opencl.image3d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc3(%opencl.image3d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc3(%opencl.image3d_ro_t addrspace(4)*
 
 void fnc4smp(sampler_t s) {}
 // CHECK-SPIR-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
-// CHECK-AMDGCN-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+// CHECK-AMDGCN-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
 
 kernel void foo(image1d_t img) {
   sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_LINEAR;
-  // CHECK-COM: alloca %opencl.sampler_t addrspace(2)*
+  // CHECK-SPIR: alloca %opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: alloca %opencl.sampler_t addrspace(4)*
   event_t evt;
-  // CHECK-COM: alloca %opencl.event_t*
+  // CHECK-SPIR: alloca %opencl.event_t*
+  // CHECK-AMDGCN: alloca %opencl.event_t addrspace(5)*
   clk_event_t clk_evt;
   // CHECK-SPIR: alloca %opencl.clk_event_t*
   // CHECK-AMDGCN: alloca %opencl.clk_event_t addrspace(1)*
@@ -51,11 +53,14 @@
   reserve_id_t rid;
   // CHECK-SPIR: alloca %opencl.reserve_id_t*
   // CHECK-AMDGCN: alloca %opencl.reserve_id_t addrspace(1)*
-  // CHECK-COM: store %opencl.sampler_t addrspace(2)*
+  // CHECK-SPIR: store %opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: store %opencl.sampler_t addrspace(4)*
   fnc4smp(smp);
-  // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
   fnc4smp(glb_smp);
-  // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
 }
 
 kernel void foo_pipe(read_only pipe int p) {}
@@ -64,4 +69,4 @@
 
 void __attribute__((overloadable)) bad1(image1d_t b, image2d_t c, image2d_t d) {}
 // CHECK-SPIR-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}
-// CHECK-AMDGCN-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}(%opencl.image1d_ro_t addrspace(2)*{{.*}}%opencl.image2d_ro_t addrspace(2)*{{.*}}%opencl.image2d_ro_t addrspace(2)*{{.*}})
+// CHECK-AMDGCN-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}(%opencl.image1d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}})
diff --git a/test/CodeGenOpenCL/partial_initializer.cl b/test/CodeGenOpenCL/partial_initializer.cl
index 454c82f..cdc469f 100644
--- a/test/CodeGenOpenCL/partial_initializer.cl
+++ b/test/CodeGenOpenCL/partial_initializer.cl
@@ -36,7 +36,7 @@
   // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
 
   // CHECK: %[[v0:.*]] = bitcast [6 x [6 x float]]* %A to i8*
-  // CHECK: call void @llvm.memset.p0i8.i32(i8* %[[v0]], i8 0, i32 144, i32 4, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[v0]], i8 0, i32 144, i1 false)
   // CHECK: %[[v1:.*]] = bitcast i8* %[[v0]] to [6 x [6 x float]]*
   // CHECK: %[[v2:.*]] = getelementptr [6 x [6 x float]], [6 x [6 x float]]* %[[v1]], i32 0, i32 0
   // CHECK: %[[v3:.*]] = getelementptr [6 x float], [6 x float]* %[[v2]], i32 0, i32 0
@@ -46,7 +46,7 @@
   float A[6][6]  = {1.0f, 2.0f};
 
   // CHECK: %[[v5:.*]] = bitcast %struct.StrucTy* %S to i8*
-  // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[v5]], i8 addrspace(2)* bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i32 4, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i1 false)
   StrucTy S = {1, 2};
 
   // CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>* %[[compoundliteral1]], align 8
diff --git a/test/CodeGenOpenCL/private-array-initialization.cl b/test/CodeGenOpenCL/private-array-initialization.cl
index a7d4a98..9aa058d 100644
--- a/test/CodeGenOpenCL/private-array-initialization.cl
+++ b/test/CodeGenOpenCL/private-array-initialization.cl
@@ -1,9 +1,33 @@
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE0 %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE5 %s
 
 // CHECK: @test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
 
 void test() {
   __private int arr[] = {1, 2, 3};
-// CHECK:  %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
-// CHECK:  call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[arr_i8_ptr]], i8 addrspace(2)* bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i32 4, i1 false)
+// PRIVATE0:  %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
+// PRIVATE0:  call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false)
+
+// PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5)
+// PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(4)* align 4 bitcast ([3 x i32] addrspace(4)* @test.arr to i8 addrspace(4)*), i64 12, i1 false)
+}
+
+__kernel void initializer_cast_is_valid_crash() {
+// PRIVATE0: %v512 = alloca [64 x i8], align 1
+// PRIVATE0: %0 = bitcast [64 x i8]* %v512 to i8*
+// PRIVATE0: call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 64, i1 false)
+// PRIVATE0: %1 = bitcast i8* %0 to [64 x i8]*
+
+
+// PRIVATE5: %v512 = alloca [64 x i8], align 1, addrspace(5)
+// PRIVATE5: %0 = bitcast [64 x i8] addrspace(5)* %v512 to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 1 %0, i8 0, i64 64, i1 false)
+// PRIVATE5: %1 = bitcast i8 addrspace(5)* %0 to [64 x i8] addrspace(5)*
+  unsigned char v512[64] = {
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+  };
 }
diff --git a/test/CodeGenOpenCL/sampler.cl b/test/CodeGenOpenCL/sampler.cl
index 3a7319c..22976c5 100644
--- a/test/CodeGenOpenCL/sampler.cl
+++ b/test/CodeGenOpenCL/sampler.cl
@@ -20,6 +20,8 @@
 constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
 // CHECK-NOT: glb_smp
 
+int get_sampler_initializer(void);
+
 void fnc4smp(sampler_t s) {}
 // CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* %
 
@@ -58,4 +60,20 @@
   fnc4smp(5);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5)
   // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+  fnc4smp(const_smp);
+   // CHECK: [[CONST_SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]]
+  fnc4smp(const_smp);
+  // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]]
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+  fnc4smp(constant_smp);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // TODO: enable sampler initialization with non-constant integer.
+  //const sampler_t const_smp_func_init = get_sampler_initializer();
 }
diff --git a/test/CodeGenOpenCL/size_t.cl b/test/CodeGenOpenCL/size_t.cl
index 02950bb..63a0622 100644
--- a/test/CodeGenOpenCL/size_t.cl
+++ b/test/CodeGenOpenCL/size_t.cl
@@ -1,12 +1,14 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir-unknown-unknown -o - | FileCheck --check-prefix=SZ32 %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir64-unknown-unknown -o - | FileCheck --check-prefix=SZ64 --check-prefix=SZ64ONLY %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDONLY %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn---opencl -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDONLY %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn---opencl -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
 
 //SZ32: define{{.*}} i32 @test_ptrtoint_private(i8* %x)
 //SZ32: ptrtoint i8* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_private(i8* %x)
-//SZ64: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_private(i8* %x)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_private(i8 addrspace(5)* %x)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
 size_t test_ptrtoint_private(private char* x) {
   return (size_t)x;
 }
@@ -21,8 +23,10 @@
 
 //SZ32: define{{.*}} i32 @test_ptrtoint_constant(i8 addrspace(2)* %x)
 //SZ32: ptrtoint i8 addrspace(2)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(2)* %x)
-//SZ64: ptrtoint i8 addrspace(2)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(2)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(2)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(4)* %x)
+//AMDGCN: ptrtoint i8 addrspace(4)* %{{.*}} to i64
 uintptr_t test_ptrtoint_constant(constant char* x) {
   return (uintptr_t)x;
 }
@@ -37,18 +41,21 @@
 
 //SZ32: define{{.*}} i32 @test_ptrtoint_generic(i8 addrspace(4)* %x)
 //SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_generic(i8 addrspace(4)* %x)
-//SZ64: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_generic(i8 addrspace(4)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_generic(i8* %x)
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
 size_t test_ptrtoint_generic(generic char* x) {
   return (size_t)x;
 }
 
 //SZ32: define{{.*}} i8* @test_inttoptr_private(i32 %x)
 //SZ32: inttoptr i32 %{{.*}} to i8*
-//SZ64: define{{.*}} i8* @test_inttoptr_private(i64 %x)
-//AMDONLY: trunc i64 %{{.*}} to i32
-//AMDONLY: inttoptr i32 %{{.*}} to i8*
+//SZ64ONLY: define{{.*}} i8* @test_inttoptr_private(i64 %x)
 //SZ64ONLY: inttoptr i64 %{{.*}} to i8*
+//AMDGCN: define{{.*}} i8 addrspace(5)* @test_inttoptr_private(i64 %x)
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: inttoptr i32 %{{.*}} to i8 addrspace(5)*
 private char* test_inttoptr_private(size_t x) {
   return (private char*)x;
 }
@@ -64,8 +71,8 @@
 //SZ32: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i32 %y)
 //SZ32: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
 //SZ64: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i64 %y)
-//AMDONLY: trunc i64 %{{.*}} to i32
-//AMDONLY: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
 //SZ64ONLY: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i64
 local char* test_add_local(local char* x, ptrdiff_t y) {
   return x + y;
@@ -92,9 +99,12 @@
 //SZ32: define{{.*}} i32 @test_sub_private(i8* %x, i8* %y)
 //SZ32: ptrtoint i8* %{{.*}} to i32
 //SZ32: ptrtoint i8* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_sub_private(i8* %x, i8* %y)
-//SZ64: ptrtoint i8* %{{.*}} to i64
-//SZ64: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_sub_private(i8* %x, i8* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_private(i8 addrspace(5)* %x, i8 addrspace(5)* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
 ptrdiff_t test_sub_private(private char* x, private char *y) {
   return x - y;
 }
@@ -102,9 +112,12 @@
 //SZ32: define{{.*}} i32 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
 //SZ32: ptrtoint i8* %{{.*}} to i32
 //SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
-//SZ64: ptrtoint i8* %{{.*}} to i64
-//SZ64: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_mix(i8 addrspace(5)* %x, i8* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
 ptrdiff_t test_sub_mix(private char* x, generic char *y) {
   return x - y;
 }
diff --git a/test/CodeGenOpenCL/str_literals.cl b/test/CodeGenOpenCL/str_literals.cl
index 8fe5cef..436af83 100644
--- a/test/CodeGenOpenCL/str_literals.cl
+++ b/test/CodeGenOpenCL/str_literals.cl
@@ -5,5 +5,5 @@
 
 // CHECK: unnamed_addr addrspace(2) constant
 // CHECK-NOT: addrspace(2) unnamed_addr constant
-// CHECK: @x = addrspace(2) constant i8 addrspace(2)*
-// CHECK: @y = addrspace(2) constant i8 addrspace(2)*
+// CHECK: @x = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
+// CHECK: @y = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
diff --git a/test/CodeGenOpenCL/vla.cl b/test/CodeGenOpenCL/vla.cl
index 5d3599f..f3d868a 100644
--- a/test/CodeGenOpenCL/vla.cl
+++ b/test/CodeGenOpenCL/vla.cl
@@ -1,13 +1,14 @@
 // RUN: %clang_cc1 -emit-llvm -triple "spir-unknown-unknown" -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa-opencl -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa-amdgizcl -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 constant int sz0 = 5;
-// CHECK: @sz0 = addrspace(2) constant i32 5
+// SPIR: @sz0 = addrspace(2) constant i32 5
+// AMDGCN: @sz0 = addrspace(4) constant i32 5
 const global int sz1 = 16;
 // CHECK: @sz1 = addrspace(1) constant i32 16
 const constant int sz2 = 8;
-// CHECK: @sz2 = addrspace(2) constant i32 8
+// SPIR: @sz2 = addrspace(2) constant i32 8
+// AMDGCN: @sz2 = addrspace(4) constant i32 8
 // CHECK: @testvla.vla2 = internal addrspace(3) global [8 x i16] undef
 
 kernel void testvla()
@@ -15,10 +16,10 @@
   int vla0[sz0];
 // SPIR: %vla0 = alloca [5 x i32]
 // SPIR-NOT: %vla0 = alloca [5 x i32]{{.*}}addrspace
-// GIZ: %vla0 = alloca [5 x i32]{{.*}}addrspace(5)
+// AMDGCN: %vla0 = alloca [5 x i32]{{.*}}addrspace(5)
   char vla1[sz1];
 // SPIR: %vla1 = alloca [16 x i8]
 // SPIR-NOT: %vla1 = alloca [16 x i8]{{.*}}addrspace
-// GIZ: %vla1 = alloca [16 x i8]{{.*}}addrspace(5)
+// AMDGCN: %vla1 = alloca [16 x i8]{{.*}}addrspace(5)
   local short vla2[sz2];
 }
diff --git a/test/CoverageMapping/Inputs/deferred-region-helper.h b/test/CoverageMapping/Inputs/deferred-region-helper.h
new file mode 100644
index 0000000..c3e5c16
--- /dev/null
+++ b/test/CoverageMapping/Inputs/deferred-region-helper.h
@@ -0,0 +1,5 @@
+void included_func() {
+  if (false)
+    return;
+  return;
+}
diff --git a/test/CoverageMapping/break.c b/test/CoverageMapping/break.c
index ee41271..08461d7 100644
--- a/test/CoverageMapping/break.c
+++ b/test/CoverageMapping/break.c
@@ -2,32 +2,43 @@
 
 int main() {         // CHECK: File 0, [[@LINE]]:12 -> {{[0-9]+}}:2 = #0
   int cnt = 0;       // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:18 = #0
-  while(cnt < 100) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> [[@LINE+3]]:4 = #1
+  while(cnt < 100) { // CHECK: File 0, [[@LINE]]:20 -> [[@LINE+3]]:4 = #1
     break;
     ++cnt;           // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+1]]:4 = 0
   }                  // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:18 = #0
-  while(cnt < 100) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> [[@LINE+6]]:4 = #2
+  while(cnt < 100) { // CHECK: File 0, [[@LINE]]:20 -> [[@LINE+6]]:4 = #2
     {
       break;
       ++cnt;         // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE+3]]:4 = 0
     }
     ++cnt;
   }                  // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:18 = ((#0 + #3) - #4)
-  while(cnt < 100) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> [[@LINE+7]]:4 = #3
+  while(cnt < 100) { // CHECK: File 0, [[@LINE]]:20 -> [[@LINE+7]]:4 = #3
                      // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> [[@LINE+1]]:16 = #3
-    if(cnt == 0) {   // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE+3]]:6 = #4
+    if(cnt == 0) {   // CHECK: File 0, [[@LINE]]:18 -> [[@LINE+3]]:6 = #4
       break;
       ++cnt;         // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE+1]]:6 = 0
     }
     ++cnt;           // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+1]]:4 = (#3 - #4)
   }                  // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:18 = (#0 + #6)
-  while(cnt < 100) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> [[@LINE+8]]:4 = #5
+  while(cnt < 100) { // CHECK: File 0, [[@LINE]]:20 -> [[@LINE+8]]:4 = #5
                      // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> [[@LINE+1]]:16 = #5
-    if(cnt == 0) {   // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE+2]]:6 = #6
-      ++cnt;
+    if(cnt == 0) {   // CHECK: File 0, [[@LINE]]:18 -> [[@LINE+2]]:6 = #6
+      ++cnt;         // CHECK-NEXT: Gap,File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = (#5 - #6)
     } else {         // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = (#5 - #6)
       break;
     }
     ++cnt;
   }
 }
+
+// CHECK-LABEL: break_continue_in_increment:
+// CHECK:  [[@LINE+6]]:11 -> [[@LINE+6]]:45 = #1
+// CHECK:  [[@LINE+5]]:18 -> [[@LINE+5]]:19 = #1
+// CHECK:  [[@LINE+4]]:21 -> [[@LINE+4]]:26 = #2
+// CHECK:  [[@LINE+3]]:33 -> [[@LINE+3]]:41 = (#1 - #2)
+// CHECK:  [[@LINE+3]]:5 -> [[@LINE+3]]:6 = #1
+void break_continue_in_increment(int x) {
+  for (;; ({ if (x) break; else continue; }))
+    ;
+}
diff --git a/test/CoverageMapping/casts.c b/test/CoverageMapping/casts.c
index d295f31..6f479fd 100644
--- a/test/CoverageMapping/casts.c
+++ b/test/CoverageMapping/casts.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -main-file-name casts.c %s | FileCheck %s
 
 int main() {                                                   // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+4]]:2 = #0
-                                                               // CHECK-NEXT: File 0, [[@LINE+1]]:41 -> [[@LINE+1]]:54 = #1
+                                                               // CHECK: File 0, [[@LINE+1]]:41 -> [[@LINE+1]]:54 = #1
   int window_size = (sizeof(int) <= 2 ? (unsigned)512 : 1024); // CHECK-NEXT: File 0, [[@LINE]]:57 -> [[@LINE]]:61 = (#0 - #1)
   return 0;
 }
diff --git a/test/CoverageMapping/classtemplate.cpp b/test/CoverageMapping/classtemplate.cpp
index 0ccdcb2..0dbb0c0 100644
--- a/test/CoverageMapping/classtemplate.cpp
+++ b/test/CoverageMapping/classtemplate.cpp
@@ -2,6 +2,7 @@
 // RUN: FileCheck -input-file %tmapping %s --check-prefix=CHECK-CONSTRUCTOR
 // RUN: FileCheck -input-file %tmapping %s --check-prefix=CHECK-GETTER
 // RUN: FileCheck -input-file %tmapping %s --check-prefix=CHECK-SETTER
+// RUN: FileCheck -input-file %tmapping %s --check-prefix=CHECK-INIT-LIST
 
 template<class TT>
 class Test {
@@ -44,11 +45,51 @@
   void unmangleable(UninstantiatedClassWithTraits<T> x) {}
 };
 
+void abort() __attribute__((noreturn));
+
+namespace std {
+typedef decltype(sizeof(int)) size_t;
+
+template <typename E> struct initializer_list {
+  const E *p;
+  size_t n;
+  initializer_list(const E *p, size_t n) : p(p), n(n) {}
+};
+
+template <typename F, typename S> struct pair {
+  F f;
+  S s;
+  pair(const F &f, const S &s) : f(f), s(s) {}
+};
+
+struct string {
+  const char *str;
+  string() { abort(); }
+  string(const char *S) : str(S) {}
+  ~string() { abort(); }
+};
+
+template<typename K, typename V>
+struct map {
+  using T = pair<K, V>;
+  map(initializer_list<T> i, const string &s = string()) {}
+  ~map() { abort(); }
+};
+
+}; // namespace std
+
+// CHECK-INIT-LIST-LABEL: _Z5Test4v:
+std::map<int, int> Test4() { // CHECK-INIT-LIST: File 0, [[@LINE]]:28 -> [[@LINE+3]]:2 = #0
+  abort();
+  return std::map<int, int>{{0, 0}}; // CHECK-INIT-LIST-NEXT: [[@LINE]]:3 -> [[@LINE]]:36 = 0
+}
+
 int main() {
   Test<unsigned> t;
   t.set(Test<unsigned>::A, 5.5);
   t.set(Test<unsigned>::T, 5.6);
   t.set(Test<unsigned>::G, 5.7);
   t.set(Test<unsigned>::C, 5.8);
+  Test4();
   return 0;
 }
diff --git a/test/CoverageMapping/continue.c b/test/CoverageMapping/continue.c
index 7ea03fb..9864c91 100644
--- a/test/CoverageMapping/continue.c
+++ b/test/CoverageMapping/continue.c
@@ -3,21 +3,21 @@
 int main() {                    // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+21]]:2 = #0
   int j = 0;                    // CHECK-NEXT: File 0, [[@LINE+2]]:18 -> [[@LINE+2]]:24 = (#0 + #1)
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:26 -> [[@LINE+1]]:29 = #1
-  for(int i = 0; i < 20; ++i) { // CHECK-NEXT: File 0, [[@LINE]]:31 -> [[@LINE+17]]:4 = #1
+  for(int i = 0; i < 20; ++i) { // CHECK: File 0, [[@LINE]]:31 -> [[@LINE+17]]:4 = #1
     if(i < 10) {                // CHECK: File 0, [[@LINE]]:16 -> [[@LINE+13]]:6 = #2
       if(i < 5) {               // CHECK: File 0, [[@LINE]]:17 -> [[@LINE+3]]:8 = #3
         continue;
         j = 1;                   // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE+1]]:8 = 0
-      } else {                   // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+2]]:8 = (#2 - #3)
+      } else {                   // CHECK: File 0, [[@LINE]]:14 -> [[@LINE+2]]:8 = (#2 - #3)
         j = 2;
       }
       j = 3;                     // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE+6]]:6 = (#2 - #3)
       if(i < 7) {                // CHECK: File 0, [[@LINE]]:17 -> [[@LINE+3]]:8 = #4
         continue;
         j = 4;                   // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE+1]]:8 = 0
-      } else j = 5;              // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE]]:19 = ((#2 - #3) - #4)
+      } else j = 5;              // CHECK: File 0, [[@LINE]]:14 -> [[@LINE]]:19 = ((#2 - #3) - #4)
       j = 6;                     // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE+1]]:6 = ((#2 - #3) - #4)
-    } else                       // CHECK-NEXT: File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:12 = (#1 - #2)
+    } else                       // CHECK: File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:12 = (#1 - #2)
       j = 7;
     j = 8;                       // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+1]]:4 = ((#1 - #3) - #4)
   }
diff --git a/test/CoverageMapping/deferred-region.cpp b/test/CoverageMapping/deferred-region.cpp
index 3504588..5a104f7 100644
--- a/test/CoverageMapping/deferred-region.cpp
+++ b/test/CoverageMapping/deferred-region.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -fexceptions -fcxx-exceptions -emit-llvm-only -triple %itanium_abi_triple -main-file-name deferred-region.cpp %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -fexceptions -fcxx-exceptions -emit-llvm-only -triple %itanium_abi_triple -main-file-name deferred-region.cpp -I %S/Inputs %s | FileCheck %s
 
 #define IF if
 #define STMT(S) S
@@ -170,6 +170,16 @@
 	return; // CHECK: Gap,File 0, [[@LINE]]:8 -> [[@LINE+1]]:2 = 0
 }
 
+#include "deferred-region-helper.h"
+// CHECK-LABEL: _Z13included_funcv:
+// CHECK:  Gap,File 0, 2:13 -> 3:5 = #1
+// CHECK:  Gap,File 0, 3:11 -> 4:3 = (#0 - #1)
+
+// CHECK-LABEL: _Z7includev:
+void include() {
+  included_func();
+}
+
 int main() {
   foo(0);
   foo(1);
@@ -189,5 +199,6 @@
   for_loop();
   while_loop();
   gotos();
+  include();
   return 0;
 }
diff --git a/test/CoverageMapping/if.cpp b/test/CoverageMapping/if.cpp
index 95e6d8a..e3d6f4e 100644
--- a/test/CoverageMapping/if.cpp
+++ b/test/CoverageMapping/if.cpp
@@ -3,37 +3,50 @@
 int nop() { return 0; }
 
 // CHECK-LABEL: _Z3foov:
-void foo() {                    // CHECK-NEXT: [[@LINE]]:12 -> [[@LINE+5]]:2 = #0
+                                // CHECK-NEXT: [[@LINE+1]]:12 -> [[@LINE+6]]:2 = #0
+void foo() {                    // CHECK-NEXT: Gap,File 0, [[@LINE+1]]:20 -> [[@LINE+1]]:22 = #2
   if (int j = true ? nop()      // CHECK-NEXT: [[@LINE]]:22 -> [[@LINE]]:27 = #2
                    : nop();     // CHECK-NEXT: [[@LINE]]:22 -> [[@LINE]]:27 = (#0 - #2)
       j)                        // CHECK-NEXT: [[@LINE]]:7 -> [[@LINE]]:8 = #0
-    ++j;                        // CHECK-NEXT: [[@LINE]]:5 -> [[@LINE]]:8 = #1
-}
+    ++j;                        // CHECK-NEXT: [[@LINE-1]]:9 -> [[@LINE]]:5 = #1
+}                               // CHECK-NEXT: [[@LINE-1]]:5 -> [[@LINE-1]]:8 = #1
 
 // CHECK-LABEL: main:
 int main() {                    // CHECK: File 0, [[@LINE]]:12 -> {{[0-9]+}}:2 = #0
   int i = 0;
-                                // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = #0
+                                // CHECK-NEXT: File 0, [[@LINE+2]]:6 -> [[@LINE+2]]:12 = #0
+                                // CHECK-NEXT: Gap,File 0, [[@LINE+1]]:13 -> [[@LINE+1]]:14 = #1
   if(i == 0) i = 1;             // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE]]:19 = #1
+
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = #0
-  if(i == 1)
+  if(i == 1)                    // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+1]]:5 = #2
     i = 2;                      // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:10 = #2
+
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = #0
-  if(i == 0) { i = 1;           // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+2]]:4 = #3
-    i = 2;
+  if(i == 0) { i = 1;           // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE]]:14 = #3
+    i = 2;                      // CHECK-NEXT: File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:4 = #3
   }
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = #0
-  if(i != 0) {                  // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+2]]:4 = #4
-    i = 1;
-  } else {                      // CHECK-NEXT: File 0, [[@LINE]]:10 -> [[@LINE+2]]:4 = (#0 - #4)
-    i = 3;
+  if(i != 0) {                  // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE]]:14 = #4
+    i = 1;                      // CHECK-NEXT: File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:4 = #4
+  } else {                      // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE]]:10 = (#0 - #4)
+    i = 3;                      // CHECK-NEXT: File 0, [[@LINE-1]]:10 -> [[@LINE+1]]:4 = (#0 - #4)
   }
 
-  i = i == 0?
+  i = i == 0?                   // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+1]]:9 = #5
         i + 1 :                 // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:14 = #5
         i + 2;                  // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:14 = (#0 - #5)
+
+                                // CHECK-NEXT: Gap,File 0, [[@LINE+2]]:13 -> [[@LINE+2]]:14 = #6
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:14 -> [[@LINE+1]]:20 = #6
   i = i == 0?i + 12:i + 10;     // CHECK-NEXT: File 0, [[@LINE]]:21 -> [[@LINE]]:27 = (#0 - #6)
 
   return 0;
 }
+
+#define FOO true
+
+// CHECK-LABEL: _Z7ternaryv:
+void ternary() {
+  true ? FOO : FOO; // CHECK-NOT: Gap,{{.*}}, [[@LINE]]:8 ->
+}
diff --git a/test/CoverageMapping/includehell.cpp b/test/CoverageMapping/includehell.cpp
index 9ad3683..fd08d6a 100644
--- a/test/CoverageMapping/includehell.cpp
+++ b/test/CoverageMapping/includehell.cpp
@@ -37,43 +37,43 @@
 // CHECK-MAIN-NEXT: File [[MAIN]], 16:35 -> 17:33 = #9
 // CHECK-MAIN-NEXT: Expansion,File [[MAIN]], 17:12 -> 17:33 = #9
 
-// CHECK-START:      File [[START1:[0-9]]], 1:1 -> 5:1 = #0
-// CHECK-START-NEXT: File [[START1]], 4:17 -> 4:22 = (#0 + #1)
-// CHECK-START-NEXT: File [[START1]], 4:24 -> 4:27 = #1
-// CHECK-START-NEXT: File [[START1]], 4:29 -> 5:1 = #1
-// CHECK-START:      File [[START2:[0-9]]], 1:1 -> 5:1 = #0
-// CHECK-START-NEXT: File [[START2]], 4:17 -> 4:22 = (#0 + #5)
-// CHECK-START-NEXT: File [[START2]], 4:24 -> 4:27 = #5
-// CHECK-START-NEXT: File [[START2]], 4:29 -> 5:1 = #5
-// CHECK-START:      File [[START3:[0-9]]], 1:1 -> 5:1 = #0
-// CHECK-START-NEXT: File [[START3]], 4:17 -> 4:22 = (#0 + #9)
-// CHECK-START-NEXT: File [[START3]], 4:24 -> 4:27 = #9
-// CHECK-START-NEXT: File [[START3]], 4:29 -> 5:1 = #9
+// CHECK-START: File [[START1:[0-9]]], 1:1 -> 5:1 = #0
+// CHECK-START: File [[START1]], 4:17 -> 4:22 = (#0 + #1)
+// CHECK-START: File [[START1]], 4:24 -> 4:27 = #1
+// CHECK-START: File [[START1]], 4:29 -> 5:1 = #1
+// CHECK-START: File [[START2:[0-9]]], 1:1 -> 5:1 = #0
+// CHECK-START: File [[START2]], 4:17 -> 4:22 = (#0 + #5)
+// CHECK-START: File [[START2]], 4:24 -> 4:27 = #5
+// CHECK-START: File [[START2]], 4:29 -> 5:1 = #5
+// CHECK-START: File [[START3:[0-9]]], 1:1 -> 5:1 = #0
+// CHECK-START: File [[START3]], 4:17 -> 4:22 = (#0 + #9)
+// CHECK-START: File [[START3]], 4:24 -> 4:27 = #9
+// CHECK-START: File [[START3]], 4:29 -> 5:1 = #9
 
 // CHECK-CODE:      File [[CODE1:[0-9]]], 1:1 -> 14:1 = #1
 // CHECK-CODE-NEXT: File [[CODE1]], 4:5 -> 4:11 = #1
-// CHECK-CODE-NEXT: File [[CODE1]], 4:13 -> 6:2 = #2
-// CHECK-CODE-NEXT: File [[CODE1]], 6:8 -> 8:2 = (#1 - #2)
+// CHECK-CODE: File [[CODE1]], 4:13 -> 6:2 = #2
+// CHECK-CODE: File [[CODE1]], 6:8 -> 8:2 = (#1 - #2)
 // CHECK-CODE-NEXT: File [[CODE1]], 9:5 -> 9:9 = #1
-// CHECK-CODE-NEXT: File [[CODE1]], 9:11 -> 11:2 = #3
-// CHECK-CODE-NEXT: File [[CODE1]], 11:8 -> 13:2 = (#1 - #3)
+// CHECK-CODE: File [[CODE1]], 9:11 -> 11:2 = #3
+// CHECK-CODE: File [[CODE1]], 11:8 -> 13:2 = (#1 - #3)
 // CHECK-CODE:      File [[CODE2:[0-9]]], 1:1 -> 14:1 = #5
 // CHECK-CODE-NEXT: File [[CODE2]], 4:5 -> 4:11 = #5
-// CHECK-CODE-NEXT: File [[CODE2]], 4:13 -> 6:2 = #6
-// CHECK-CODE-NEXT: File [[CODE2]], 6:8 -> 8:2 = (#5 - #6)
+// CHECK-CODE: File [[CODE2]], 4:13 -> 6:2 = #6
+// CHECK-CODE: File [[CODE2]], 6:8 -> 8:2 = (#5 - #6)
 // CHECK-CODE-NEXT: File [[CODE2]], 9:5 -> 9:9 = #5
-// CHECK-CODE-NEXT: File [[CODE2]], 9:11 -> 11:2 = #7
-// CHECK-CODE-NEXT: File [[CODE2]], 11:8 -> 13:2 = (#5 - #7)
+// CHECK-CODE: File [[CODE2]], 9:11 -> 11:2 = #7
+// CHECK-CODE: File [[CODE2]], 11:8 -> 13:2 = (#5 - #7)
 
-// CHECK-END:      File [[END1:[0-9]]], 1:1 -> 3:2 = #1
-// CHECK-END-NEXT: File [[END1]], 1:1 -> 6:1 = #0
-// CHECK-END-NEXT: File [[END1]], 5:5 -> 5:9 = #0
-// CHECK-END-NEXT: File [[END1]], 5:11 -> 5:16 = #4
-// CHECK-END:      File [[END2:[0-9]]], 1:1 -> 3:2 = #5
-// CHECK-END-NEXT: File [[END2]], 1:1 -> 6:1 = #0
-// CHECK-END-NEXT: File [[END2]], 5:5 -> 5:9 = #0
-// CHECK-END-NEXT: File [[END2]], 5:11 -> 5:16 = #8
-// CHECK-END:      File [[END3:[0-9]]], 1:1 -> 3:2 = #9
-// CHECK-END-NEXT: File [[END3]], 1:1 -> 6:1 = #0
-// CHECK-END-NEXT: File [[END3]], 5:5 -> 5:9 = #0
-// CHECK-END-NEXT: File [[END3]], 5:11 -> 5:16 = #10
+// CHECK-END: File [[END1:[0-9]]], 1:1 -> 3:2 = #1
+// CHECK-END: File [[END1]], 1:1 -> 6:1 = #0
+// CHECK-END: File [[END1]], 5:5 -> 5:9 = #0
+// CHECK-END: File [[END1]], 5:11 -> 5:16 = #4
+// CHECK-END: File [[END2:[0-9]]], 1:1 -> 3:2 = #5
+// CHECK-END: File [[END2]], 1:1 -> 6:1 = #0
+// CHECK-END: File [[END2]], 5:5 -> 5:9 = #0
+// CHECK-END: File [[END2]], 5:11 -> 5:16 = #8
+// CHECK-END: File [[END3:[0-9]]], 1:1 -> 3:2 = #9
+// CHECK-END: File [[END3]], 1:1 -> 6:1 = #0
+// CHECK-END: File [[END3]], 5:5 -> 5:9 = #0
+// CHECK-END: File [[END3]], 5:11 -> 5:16 = #10
diff --git a/test/CoverageMapping/label.cpp b/test/CoverageMapping/label.cpp
index aec5e4f..8cac5b0 100644
--- a/test/CoverageMapping/label.cpp
+++ b/test/CoverageMapping/label.cpp
@@ -4,32 +4,32 @@
 void func() {                // CHECK-NEXT: File 0, [[@LINE]]:13 -> {{[0-9]+}}:2 = #0
   int i = 0;                 // CHECK-NEXT: File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:20 = (#0 + #3)
                              // CHECK-NEXT: File 0, [[@LINE+1]]:22 -> [[@LINE+1]]:25 = #3
-  for(i = 0; i < 10; ++i) {  // CHECK-NEXT: File 0, [[@LINE]]:27 -> [[@LINE+11]]:4 = #1
+  for(i = 0; i < 10; ++i) {  // CHECK: File 0, [[@LINE]]:27 -> [[@LINE+11]]:4 = #1
                              // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> [[@LINE+1]]:13 = #1
-    if(i < 5) {              // CHECK-NEXT: File 0, [[@LINE]]:15 -> [[@LINE+6]]:6 = #2
+    if(i < 5) {              // CHECK: File 0, [[@LINE]]:15 -> [[@LINE+6]]:6 = #2
       {
         x:                   // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE+4]]:6 = #3
           int j = 1;
       }
       int m = 2;
     } else
-      goto x;                // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:13 = (#1 - #2)
+      goto x;                // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:13 = (#1 - #2)
     int k = 3;               // CHECK-NEXT: File 0, [[@LINE-1]]:13 -> [[@LINE]]:5 = #3
   }                          // CHECK-NEXT: File 0, [[@LINE-1]]:5 -> [[@LINE]]:4 = #3
   static int j = 0;          // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+5]]:2 = ((#0 + #3) - #1)
   ++j;
   if(j == 1)                 // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:12 = ((#0 + #3) - #1)
-    goto x;                  // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #4
+    goto x;                  // CHECK: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #4
                              // CHECK-NEXT: File 0, [[@LINE-1]]:11 -> [[@LINE+1]]:2 = (((#0 + #3) - #1) - #4)
 }
 
                              // CHECK-NEXT: test1
 void test1(int x) {          // CHECK-NEXT: File 0, [[@LINE]]:19 -> {{[0-9]+}}:2 = #0
   if(x == 0)                 // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:12 = #0
-    goto a;                  // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #1
+    goto a;                  // CHECK: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #1
                              // CHECK-NEXT: File 0, [[@LINE-1]]:11 -> [[@LINE+1]]:3 = (#0 - #1)
-  goto b;                    // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+5]]:2 = (#0 - #1)
-                             // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+4]]:2 = #3
+  goto b;                    // CHECK: Gap,File 0, [[@LINE]]:3 -> [[@LINE+5]]:2 = #3
+                             // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:9 -> [[@LINE+1]]:1 = #2
 a:                           // CHECK-NEXT: File 0, [[@LINE]]:1 -> [[@LINE+3]]:2 = #2
 b:                           // CHECK-NEXT: File 0, [[@LINE]]:1 -> [[@LINE+2]]:2 = #3
   x = x + 1;
@@ -38,11 +38,11 @@
                              // CHECK-NEXT: test2
 void test2(int x) {          // CHECK-NEXT: File 0, [[@LINE]]:19 -> {{[0-9]+}}:2 = #0
   if(x == 0)                 // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:12 = #0
-    goto a;                  // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #1
-                             // CHECK-NEXT: File 0, [[@LINE-1]]:11 -> [[@LINE+3]]:8 = #0
+    goto a;                  // CHECK: File 0, [[@LINE]]:5 -> [[@LINE]]:11 = #1
+                             // CHECK: Gap,File 0, [[@LINE-1]]:12 -> [[@LINE+3]]:8 = (#0 - #1)
                              // CHECK-NEXT: File 0, [[@LINE+2]]:8 -> [[@LINE+3]]:11 = (#0 - #1)
                              // CHECK-NEXT: File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:17 = (#0 - #1)
-  else if(x == 1)            // CHECK-NEXT: File 0, [[@LINE+1]]:5 -> [[@LINE+1]]:11 = #2
+  else if(x == 1)            // CHECK: File 0, [[@LINE+1]]:5 -> [[@LINE+1]]:11 = #2
     goto b;                  // CHECK-NEXT: File 0, [[@LINE]]:11 -> [[@LINE+1]]:1 = #3
 a:                           // CHECK-NEXT: File 0, [[@LINE]]:1 -> [[@LINE+3]]:2 = #3
 b:                           // CHECK-NEXT: File 0, [[@LINE]]:1 -> [[@LINE+2]]:2 = #4
@@ -55,10 +55,10 @@
   for(int i = 0; i < 10; ++i) { // CHECK: File 0, [[@LINE]]:31 -> [[@LINE+13]]:4 = #1
   a:                         // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+12]]:4 = #2
     if(i < 3)                // CHECK-NEXT: File 0, [[@LINE]]:8 -> [[@LINE]]:13 = #2
-      goto e;                // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:13 = #3
+      goto e;                // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:13 = #3
                              // CHECK-NEXT: File 0, [[@LINE-1]]:13 -> [[@LINE+1]]:5 = (#2 - #3)
     goto c;                  // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+8]]:4 = (#2 - #3)
-
+                             // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:11 -> [[@LINE+1]]:3 = #4
   b:                         // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+6]]:4 = #4
     j = 2;
   c:                         // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+4]]:4 = #5
diff --git a/test/CoverageMapping/loops.cpp b/test/CoverageMapping/loops.cpp
index cb7d777..ff7aafd 100644
--- a/test/CoverageMapping/loops.cpp
+++ b/test/CoverageMapping/loops.cpp
@@ -3,7 +3,7 @@
                                     // CHECK: rangedFor
 void rangedFor() {                  // CHECK-NEXT: File 0, [[@LINE]]:18 -> {{[0-9]+}}:2 = #0
   int arr[] = { 1, 2, 3, 4, 5 };
-  int sum = 0;
+  int sum = 0;                      // CHECK: Gap,File 0, [[@LINE+1]]:20 -> [[@LINE+1]]:21 = #1
   for(auto i : arr) {               // CHECK: File 0, [[@LINE]]:21 -> [[@LINE+6]]:4 = #1
     if (i == 3)
       continue;                     // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #2
@@ -17,24 +17,27 @@
 }
 
                                     // CHECK: main:
-int main() {                        // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+24]]:2 = #0
+int main() {                        // CHECK-NEXT: File 0, [[@LINE]]:12 -> {{.*}}:2 = #0
                                     // CHECK-NEXT: File 0, [[@LINE+1]]:18 -> [[@LINE+1]]:24 = (#0 + #1)
   for(int i = 0; i < 10; ++i)       // CHECK-NEXT: File 0, [[@LINE]]:26 -> [[@LINE]]:29 = #1
-     ;                              // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:7 = #1
+     ;                              // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:30 -> [[@LINE]]:6 = #1
+                                    // CHECK-NEXT: File 0, [[@LINE-1]]:6 -> [[@LINE-1]]:7 = #1
   for(int i = 0;
       i < 10;                       // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:13 = (#0 + #2)
       ++i)                          // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:10 = #2
-  {                                 // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:4 = #2
-    int x = 0;
+  {                                 // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:11 -> [[@LINE]]:3 = #2
+    int x = 0;                      // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:4 = #2
   }
   int j = 0;                        // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:14 = (#0 + #3)
-  while(j < 5) ++j;                 // CHECK-NEXT: File 0, [[@LINE]]:16 -> [[@LINE]]:19 = #3
+  while(j < 5) ++j;                 // CHECK-NEXT: Gap,File 0, [[@LINE]]:15 -> [[@LINE]]:16 = #3
+                                    // CHECK-NEXT: File 0, [[@LINE-1]]:16 -> [[@LINE-1]]:19 = #3
+
   do {                              // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE+2]]:4 = (#0 + #4)
     ++j;
   } while(j < 10);                  // CHECK-NEXT: File 0, [[@LINE]]:11 -> [[@LINE]]:17 = (#0 + #4)
   j = 0;
-  while
-   (j < 5)                          // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:10 = (#0 + #5)
+  while                             // CHECK-NEXT: File 0, [[@LINE+1]]:5 -> [[@LINE+1]]:10 = (#0 + #5)
+   (j < 5)                          // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:6 = #5
      ++j;                           // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:9 = #5
   do
     ++j;                            // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:8 = (#0 + #6)
diff --git a/test/CoverageMapping/macro-expansion.c b/test/CoverageMapping/macro-expansion.c
index 4e52558..1e4a28b 100644
--- a/test/CoverageMapping/macro-expansion.c
+++ b/test/CoverageMapping/macro-expansion.c
@@ -4,18 +4,18 @@
 // CHECK:      File 1, [[@LINE+5]]:12 -> [[@LINE+5]]:38 = #0
 // CHECK-NEXT: File 1, [[@LINE+4]]:15 -> [[@LINE+4]]:28 = (#0 + #2)
 // CHECK-NEXT: File 1, [[@LINE+3]]:21 -> [[@LINE+3]]:22 = (#0 + #2)
-// CHECK-NEXT: File 1, [[@LINE+2]]:24 -> [[@LINE+2]]:26 = #3
+// CHECK: File 1, [[@LINE+2]]:24 -> [[@LINE+2]]:26 = #3
 // CHECK-NEXT: File 1, [[@LINE+1]]:36 -> [[@LINE+1]]:37 = (#0 + #2)
 #define M1 do { if (0) {} } while (0)
 // CHECK-NEXT: File 2, [[@LINE+10]]:15 -> [[@LINE+10]]:41 = #0
 // CHECK-NEXT: File 2, [[@LINE+9]]:18 -> [[@LINE+9]]:31 = (#0 + #4)
 // CHECK-NEXT: File 2, [[@LINE+8]]:24 -> [[@LINE+8]]:25 = (#0 + #4)
-// CHECK-NEXT: File 2, [[@LINE+7]]:27 -> [[@LINE+7]]:29 = #5
+// CHECK: File 2, [[@LINE+7]]:27 -> [[@LINE+7]]:29 = #5
 // CHECK-NEXT: File 2, [[@LINE+6]]:39 -> [[@LINE+6]]:40 = (#0 + #4)
 // CHECK-NEXT: File 3, [[@LINE+5]]:15 -> [[@LINE+5]]:41 = #0
 // CHECK-NEXT: File 3, [[@LINE+4]]:18 -> [[@LINE+4]]:31 = (#0 + #6)
 // CHECK-NEXT: File 3, [[@LINE+3]]:24 -> [[@LINE+3]]:25 = (#0 + #6)
-// CHECK-NEXT: File 3, [[@LINE+2]]:27 -> [[@LINE+2]]:29 = #7
+// CHECK: File 3, [[@LINE+2]]:27 -> [[@LINE+2]]:29 = #7
 // CHECK-NEXT: File 3, [[@LINE+1]]:39 -> [[@LINE+1]]:40 = (#0 + #6)
 #define M2(x) do { if (x) {} } while (0)
 // CHECK-NEXT: File 4, [[@LINE+4]]:15 -> [[@LINE+4]]:38 = #0
@@ -40,7 +40,7 @@
 // CHECK-NEXT: File 9, {{[0-9]+}}:15 -> {{[0-9]+}}:41 = (#0 + #8)
 // CHECK-NEXT: File 9, {{[0-9]+}}:18 -> {{[0-9]+}}:31 = ((#0 + #8) + #9)
 // CHECK-NEXT: File 9, {{[0-9]+}}:24 -> {{[0-9]+}}:25 = ((#0 + #8) + #9)
-// CHECK-NEXT: File 9, {{[0-9]+}}:27 -> {{[0-9]+}}:29 = #10
+// CHECK: File 9, {{[0-9]+}}:27 -> {{[0-9]+}}:29 = #10
 // CHECK-NEXT: File 9, {{[0-9]+}}:39 -> {{[0-9]+}}:40 = ((#0 + #8) + #9)
 
 void func(int x) {
diff --git a/test/CoverageMapping/macro-expressions.cpp b/test/CoverageMapping/macro-expressions.cpp
index 3eba869..26d70c6 100644
--- a/test/CoverageMapping/macro-expressions.cpp
+++ b/test/CoverageMapping/macro-expressions.cpp
@@ -54,19 +54,19 @@
 // CHECK-NEXT: File 0, [[@LINE+1]]:17 -> {{[0-9]+}}:2 = #0
 void foo(int i) {
   // CHECK-NEXT: File 0, [[@LINE+2]]:7 -> [[@LINE+2]]:8 = #0
-  // CHECK-NEXT: File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:12 = #1
+  // CHECK: File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:12 = #1
   if (0) {}
 
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:7 -> [[@LINE+2]]:11 = #0
   // CHECK-NEXT: File 0, [[@LINE+1]]:16 -> [[@LINE+1]]:18 = #2
   if (EXPR(i)) {}
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:9 -> [[@LINE+2]]:14 = (#0 + #3)
-  // CHECK-NEXT: File 0, [[@LINE+1]]:20 -> [[@LINE+1]]:22 = #3
+  // CHECK: File 0, [[@LINE+1]]:20 -> [[@LINE+1]]:22 = #3
   for (;NEXPR(i);) {}
   // CHECK-NEXT: Expansion,File 0, [[@LINE+4]]:8 -> [[@LINE+4]]:14 = #0
   // CHECK-NEXT: Expansion,File 0, [[@LINE+3]]:33 -> [[@LINE+3]]:35 = (#0 + #4)
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:43 -> [[@LINE+2]]:46 = #4
-  // CHECK-NEXT: File 0, [[@LINE+1]]:51 -> [[@LINE+1]]:53 = #4
+  // CHECK: File 0, [[@LINE+1]]:51 -> [[@LINE+1]]:53 = #4
   for (ASSIGN(DECL(int, j), 0); LT(j, i); INC(j)) {}
   // CHECK-NEXT: Expansion,File 0, [[@LINE+1]]:3 -> [[@LINE+1]]:9 = #0
   ASSIGN(DECL(int, k), 0);
@@ -79,17 +79,17 @@
   do {} while (NEXPR(i));
   // CHECK-NEXT: Expansion,File 0, [[@LINE+3]]:8 -> [[@LINE+3]]:12 = #0
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:23 -> [[@LINE+2]]:26 = #0
-  // CHECK-NEXT: File 0, [[@LINE+1]]:42 -> [[@LINE+1]]:44 = #7
+  // CHECK: File 0, [[@LINE+1]]:42 -> [[@LINE+1]]:44 = #7
   for (DECL(int, j) : ARR(int, 1, 2, 3)) {}
 
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:20 = #0
   // CHECK-NEXT: Expansion,File 0, [[@LINE+1]]:23 -> [[@LINE+1]]:29 = #0
   (void)(i ? PRIo64 : PRIu64);
 
-  // CHECK-NEXT: File 0, [[@LINE+5]]:14 -> [[@LINE+5]]:15 = #9
+  // CHECK: File 0, [[@LINE+5]]:14 -> [[@LINE+5]]:15 = #9
   // CHECK-NEXT: Expansion,File 0, [[@LINE+4]]:18 -> [[@LINE+4]]:22 = (#0 - #9)
   // CHECK-NEXT: File 0, [[@LINE+3]]:22 -> [[@LINE+3]]:33 = (#0 - #9)
-  // CHECK-NEXT: File 0, [[@LINE+2]]:28 -> [[@LINE+2]]:29 = #10
+  // CHECK: File 0, [[@LINE+2]]:28 -> [[@LINE+2]]:29 = #10
   // CHECK-NEXT: File 0, [[@LINE+1]]:32 -> [[@LINE+1]]:33 = ((#0 - #9) - #10)
   (void)(i ? i : EXPR(i) ? i : 0);
   // CHECK-NEXT: Expansion,File 0, [[@LINE+3]]:15 -> [[@LINE+3]]:19 = (#0 - #11)
diff --git a/test/CoverageMapping/macros.c b/test/CoverageMapping/macros.c
index 60164be..95fe37e 100644
--- a/test/CoverageMapping/macros.c
+++ b/test/CoverageMapping/macros.c
@@ -40,7 +40,7 @@
 void func4() { // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+6]]:2 = #0
   int i = 0;
   while (i++ < 10) // CHECK-NEXT: File 0, [[@LINE]]:10 -> [[@LINE]]:18 = (#0 + #1)
-    if (i < 5) // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+2]]:14 = #1
+    if (i < 5) // CHECK: File 0, [[@LINE]]:5 -> [[@LINE+2]]:14 = #1
                // CHECK-NEXT: File 0, [[@LINE-1]]:9 -> [[@LINE-1]]:14 = #1
       MACRO_2; // CHECK-NEXT: Expansion,File 0, [[@LINE]]:7 -> [[@LINE]]:14 = #2
 }
diff --git a/test/CoverageMapping/macroscopes.cpp b/test/CoverageMapping/macroscopes.cpp
index f5fd55c..3f5f65e 100644
--- a/test/CoverageMapping/macroscopes.cpp
+++ b/test/CoverageMapping/macroscopes.cpp
@@ -89,10 +89,10 @@
 // CHECK-NEXT: File 1, 3:52 -> 3:53 = #1
 // CHECK-NEXT: File 2, 10:3 -> 20:4 = #1
 // CHECK-NEXT: File 2, 11:7 -> 11:13 = #1
-// CHECK-NEXT: File 2, 11:15 -> 13:4 = #2
+// CHECK: File 2, 11:15 -> 13:4 = #2
 // CHECK-NEXT: File 2, 13:10 -> 15:4 = (#1 - #2)
 // CHECK-NEXT: File 2, 16:7 -> 16:11 = #1
-// CHECK-NEXT: File 2, 16:13 -> 18:4 = #3
+// CHECK: File 2, 16:13 -> 18:4 = #3
 // CHECK-NEXT: File 2, 18:10 -> 20:4 = (#1 - #3)
 // CHECK-NEXT: File 3, 6:3 -> 7:4 = #1
 // CHECK-NEXT: File 4, 3:24 -> 3:53 = #0
@@ -101,10 +101,10 @@
 // CHECK-NEXT: File 4, 3:52 -> 3:53 = #4
 // CHECK-NEXT: File 5, 10:3 -> 20:4 = #4
 // CHECK-NEXT: File 5, 11:7 -> 11:13 = #4
-// CHECK-NEXT: File 5, 11:15 -> 13:4 = #5
+// CHECK: File 5, 11:15 -> 13:4 = #5
 // CHECK-NEXT: File 5, 13:10 -> 15:4 = (#4 - #5)
 // CHECK-NEXT: File 5, 16:7 -> 16:11 = #4
-// CHECK-NEXT: File 5, 16:13 -> 18:4 = #6
+// CHECK: File 5, 16:13 -> 18:4 = #6
 // CHECK-NEXT: File 5, 18:10 -> 20:4 = (#4 - #6)
 // CHECK-NEXT: File 6, 6:3 -> 7:4 = #4
 // CHECK-NEXT: File 7, 3:24 -> 3:53 = #0
diff --git a/test/CoverageMapping/moremacros.c b/test/CoverageMapping/moremacros.c
index 630b75d..88411f3 100644
--- a/test/CoverageMapping/moremacros.c
+++ b/test/CoverageMapping/moremacros.c
@@ -7,7 +7,7 @@
 // CHECK-NEXT: File 0, [[@LINE+1]]:40 -> {{[0-9]+}}:2 = #0
 int main(int argc, const char *argv[]) {
   // CHECK-NEXT: File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:12 = #0
-  if (!argc) {} // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE]]:16 = #1
+  if (!argc) {} // CHECK: File 0, [[@LINE]]:14 -> [[@LINE]]:16 = #1
 
   // CHECK-NEXT: File 0, [[@LINE+3]]:7 -> [[@LINE+3]]:12 = #0
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:19 = #2
@@ -27,7 +27,7 @@
 
   // CHECK-NEXT: File 0, [[@LINE+3]]:3 -> [[@LINE+7]]:2 = ((#0 - #2) - #3)
   // CHECK-NEXT: File 0, [[@LINE+2]]:7 -> [[@LINE+2]]:12 = ((#0 - #2) - #3)
-  // CHECK-NEXT: File 0, [[@LINE+1]]:14 -> [[@LINE+4]]:8 = #4
+  // CHECK: File 0, [[@LINE+1]]:14 -> [[@LINE+4]]:8 = #4
   if (!argc) {
     return 0;
   // CHECK-NEXT: Expansion,File 0, [[@LINE+1]]:3 -> [[@LINE+1]]:8 = #4
diff --git a/test/CoverageMapping/objc.m b/test/CoverageMapping/objc.m
index 55c7545..4e4c184 100644
--- a/test/CoverageMapping/objc.m
+++ b/test/CoverageMapping/objc.m
@@ -7,7 +7,7 @@
                       // CHECK: func
 void func(A *a) {     // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+3]]:2 = #0
   if (a)              // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:8 = #0
-    [a bork:  20  ];  // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:20 = #1
+    [a bork:  20  ];  // CHECK: File 0, [[@LINE]]:5 -> [[@LINE]]:20 = #1
 }
 
 @interface NSArray
@@ -17,12 +17,12 @@
 
                                // CHECK: func2
 void func2(NSArray *array) {   // CHECK-NEXT: File 0, [[@LINE]]:28 -> {{[0-9]+}}:2 = #0
-  int i = 0;
+  int i = 0;                   // CHECK-NEXT: Gap,File 0, [[@LINE+1]]:28 -> [[@LINE+1]]:29 = #1
   for (NSArray *x in array) {  // CHECK-NEXT: File 0, [[@LINE]]:29 -> [[@LINE+7]]:4 = #1
                                // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:10 = #1
-    if (x) {                   // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = #2
+    if (x) {                   // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = #2
       i = 1;
-    } else {                   // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = (#1 - #2)
+    } else {                   // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = (#1 - #2)
       i = -1;
     }
   }
diff --git a/test/CoverageMapping/preprocessor.c b/test/CoverageMapping/preprocessor.c
index cce8b67..b3ebc7b 100644
--- a/test/CoverageMapping/preprocessor.c
+++ b/test/CoverageMapping/preprocessor.c
@@ -19,7 +19,7 @@
 
 #if 1
                  // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:12 = #0
-  if(i == 0) {   // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+2]]:4 = #1
+  if(i == 0) {   // CHECK: File 0, [[@LINE]]:14 -> [[@LINE+2]]:4 = #1
     i = 1;
   }
 #else            // CHECK-NEXT: Skipped,File 0, [[@LINE]]:1 -> [[@LINE+6]]:1 = 0
diff --git a/test/CoverageMapping/return.c b/test/CoverageMapping/return.c
index 3cc271b..440acb5 100644
--- a/test/CoverageMapping/return.c
+++ b/test/CoverageMapping/return.c
@@ -10,15 +10,15 @@
 void func2() {                  // CHECK-NEXT: File 0, [[@LINE]]:14 -> {{[0-9]+}}:2 = #0
                                 // CHECK-NEXT: File 0, [[@LINE+2]]:18 -> [[@LINE+2]]:24 = ((#0 + #1) - #2)
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:26 -> [[@LINE+1]]:29 = (#1 - #2)
-  for(int i = 0; i < 10; ++i) { // CHECK-NEXT: File 0, [[@LINE]]:31 -> {{[0-9]+}}:4 = #1
+  for(int i = 0; i < 10; ++i) { // CHECK: File 0, [[@LINE]]:31 -> {{[0-9]+}}:4 = #1
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> [[@LINE+1]]:13 = #1
-    if(i > 2) {                 // CHECK-NEXT: File 0, [[@LINE]]:15 -> [[@LINE+2]]:6 = #2
+    if(i > 2) {                 // CHECK: File 0, [[@LINE]]:15 -> [[@LINE+2]]:6 = #2
       return;                   // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+3]]:5 = (#1 - #2)
     }                           // CHECK-NEXT: File 0, [[@LINE+2]]:5 -> {{[0-9]+}}:4 = (#1 - #2)
                                 // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> [[@LINE+1]]:14 = (#1 - #2)
-    if(i == 3) {                // CHECK-NEXT: File 0, [[@LINE]]:16 -> [[@LINE+2]]:6 = #3
+    if(i == 3) {                // CHECK: File 0, [[@LINE]]:16 -> [[@LINE+2]]:6 = #3
       int j = 1;
-    } else {                    // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = ((#1 - #2) - #3)
+    } else {                    // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+2]]:6 = ((#1 - #2) - #3)
       int j = 2;
     }
   }
@@ -27,9 +27,9 @@
                                // CHECK-NEXT: func3
 void func3(int x) {            // CHECK-NEXT: File 0, [[@LINE]]:19 -> {{[0-9]+}}:2 = #0
                                // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:11 = #0
-  if(x > 5) {                  // CHECK-NEXT: File 0, [[@LINE]]:13 -> [[@LINE+6]]:4 = #1
+  if(x > 5) {                  // CHECK: File 0, [[@LINE]]:13 -> [[@LINE+6]]:4 = #1
     while(x >= 9) {            // CHECK-NEXT: File 0, [[@LINE]]:11 -> [[@LINE]]:17 = #1
-      return;                  // CHECK-NEXT: File 0, [[@LINE-1]]:19 -> [[@LINE+2]]:6 = #2
+      return;                  // CHECK: File 0, [[@LINE-1]]:19 -> [[@LINE+2]]:6 = #2
       --x;                     // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE+1]]:6 = 0
     }
     int i = 0;                 // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+1]]:4 = (#1 - #2)
diff --git a/test/CoverageMapping/switch.cpp b/test/CoverageMapping/switch.cpp
index d347e66..30c6492 100644
--- a/test/CoverageMapping/switch.cpp
+++ b/test/CoverageMapping/switch.cpp
@@ -38,7 +38,7 @@
 
                     // CHECK: baz
 void baz() {        // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+5]]:2 = #0
-  switch (int i = true ? nop()  // CHECK-NEXT: [[@LINE]]:26 -> [[@LINE]]:31 = #2
+  switch (int i = true ? nop()  // CHECK: [[@LINE]]:26 -> [[@LINE]]:31 = #2
                        : nop(); // CHECK-NEXT: [[@LINE]]:26 -> [[@LINE]]:31 = (#0 - #2)
           i) {}
   nop();            // CHECK-NEXT: [[@LINE]]:3 -> [[@LINE+1]]:2 = #1
diff --git a/test/CoverageMapping/switchmacro.c b/test/CoverageMapping/switchmacro.c
index 8fb1b83..f4c14f7 100644
--- a/test/CoverageMapping/switchmacro.c
+++ b/test/CoverageMapping/switchmacro.c
@@ -7,13 +7,13 @@
   switch (i) {
   default:       // CHECK-NEXT: File 0, [[@LINE]]:3 -> {{[0-9]+}}:11 = #2
     if (i == 1)  // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:15 = #2
-      return 0;  // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #3
+      return 0;  // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #3
                  // CHECK-NEXT: File 0, [[@LINE-1]]:15 -> [[@LINE+3]]:5 = (#2 - #3)
     // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:5 -> [[@LINE+2]]:8 = (#2 - #3) (Expanded file = 1)
     // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> {{[0-9]+}}:11 = (#2 - #3)
     FOO(1);
   case 0:        // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = ((#2 + #4) - #3)
-    return 2;
+    return 2;    // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+6]]:3 = #5
 
   // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:3 -> [[@LINE+2]]:6 = 0
   // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> {{[0-9]+}}:11 = 0
diff --git a/test/CoverageMapping/test.c b/test/CoverageMapping/test.c
index 5affbaa..ae73fcb 100644
--- a/test/CoverageMapping/test.c
+++ b/test/CoverageMapping/test.c
@@ -7,7 +7,7 @@
 int main() {                     // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+7]]:2 = #0
                                  // CHECK-NEXT: File 0, [[@LINE+1]]:18 -> [[@LINE+1]]:24 = (#0 + #1)
   for(int i = 0; i < 10; ++i) {  // CHECK-NEXT: File 0, [[@LINE]]:26 -> [[@LINE]]:29 = #1
-    bar();                       // CHECK-NEXT: File 0, [[@LINE-1]]:31 -> [[@LINE+1]]:4 = #1
+    bar();                       // CHECK: File 0, [[@LINE-1]]:31 -> [[@LINE+1]]:4 = #1
   }
   static_func();
   return 0;
@@ -16,7 +16,7 @@
                                  // CHECK-NEXT: foo
 void foo() {                     // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+5]]:2 = #0
                                  // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:7 = #0
-  if(1) {                        // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE+2]]:4 = #1
+  if(1) {                        // CHECK: File 0, [[@LINE]]:9 -> [[@LINE+2]]:4 = #1
     int i = 0;
   }
 }
diff --git a/test/CoverageMapping/trycatch.cpp b/test/CoverageMapping/trycatch.cpp
index 37b35d3..5603543 100644
--- a/test/CoverageMapping/trycatch.cpp
+++ b/test/CoverageMapping/trycatch.cpp
@@ -12,12 +12,12 @@
                                       // CHECK: func
 void func(int i) {                    // CHECK-NEXT: File 0, [[@LINE]]:18 -> {{[0-9]+}}:2 = #0
                                       // CHECK-NEXT: File 0, [[@LINE+1]]:6 -> [[@LINE+1]]:11 = #0
-  if(i % 2) {                         // CHECK-NEXT: File 0, [[@LINE]]:13 -> [[@LINE+4]]:4 = #1
+  if(i % 2) {                         // CHECK: File 0, [[@LINE]]:13 -> [[@LINE+4]]:4 = #1
     throw Error();
     int j = 0;                        // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+2]]:4 = 0
-                                      // CHECK-NEXT: File 0, [[@LINE+1]]:10 -> [[@LINE+2]]:27 = (#0 - #1)
+                                      // CHECK: File 0, [[@LINE+1]]:10 -> [[@LINE+2]]:27 = (#0 - #1)
   } else if(i == 8)                   // CHECK-NEXT: File 0, [[@LINE]]:13 -> [[@LINE]]:19 = (#0 - #1)
-    throw ImportantError();           // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:27 = #2
+    throw ImportantError();           // CHECK: File 0, [[@LINE]]:5 -> [[@LINE]]:27 = #2
 }                                     // CHECK-NEXT: File 0, [[@LINE-1]]:27 -> [[@LINE]]:2 = ((#0 - #1) - #2)
 
                                       // CHECK-NEXT: main
diff --git a/test/CoverageMapping/while.c b/test/CoverageMapping/while.c
index 7f09e4b..616ecf6 100644
--- a/test/CoverageMapping/while.c
+++ b/test/CoverageMapping/while.c
@@ -3,10 +3,10 @@
                                     // CHECK: main
 int main() {                        // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+8]]:2 = #0
   int j = 0;                        // CHECK-NEXT: File 0, [[@LINE+1]]:9 -> [[@LINE+1]]:14 = (#0 + #1)
-  while(j < 5) ++j;                 // CHECK-NEXT: File 0, [[@LINE]]:16 -> [[@LINE]]:19 = #1
-  j = 0;
-  while
-   (j < 5)                          // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE]]:10 = (#0 + #2)
+  while(j < 5) ++j;                 // CHECK-NEXT: File 0, [[@LINE]]:15 -> [[@LINE]]:16 = #1
+  j = 0;                            // CHECK-NEXT: File 0, [[@LINE-1]]:16 -> [[@LINE-1]]:19 = #1
+  while                             // CHECK-NEXT: File 0, [[@LINE+1]]:5 -> [[@LINE+1]]:10 = (#0 + #2)
+   (j < 5)                          // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:6 = #2
      ++j;                           // CHECK-NEXT: File 0, [[@LINE]]:6 -> [[@LINE]]:9 = #2
   return 0;
 }
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/CUDA-nolibdevice/usr/local/cuda/bin/ptxas
old mode 100644
new mode 100755
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
copy to test/Driver/Inputs/CUDA-nolibdevice/usr/local/cuda/bin/ptxas
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/CUDA-symlinks/opt/cuda/bin/ptxas
old mode 100644
new mode 100755
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/CUDA-symlinks/opt/cuda/bin/ptxas
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA-symlinks/opt/cuda/include/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA-symlinks/opt/cuda/include/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA-symlinks/opt/cuda/lib/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA-symlinks/opt/cuda/lib/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/CUDA-symlinks/opt/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
copy to test/Driver/Inputs/CUDA-symlinks/opt/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/CUDA-symlinks/opt/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
copy to test/Driver/Inputs/CUDA-symlinks/opt/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
diff --git a/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas b/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
new file mode 120000
index 0000000..59eefd9
--- /dev/null
+++ b/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
@@ -0,0 +1 @@
+../../opt/cuda/bin/ptxas
\ No newline at end of file
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/CUDA/usr/local/cuda/bin/ptxas
old mode 100644
new mode 100755
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/CUDA/usr/local/cuda/bin/ptxas
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA_90/usr/local/cuda/bin/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA_90/usr/local/cuda/bin/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA_90/usr/local/cuda/include/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA_90/usr/local/cuda/include/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA_90/usr/local/cuda/lib/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA_90/usr/local/cuda/lib/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/CUDA_90/usr/local/cuda/lib64/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/CUDA_90/usr/local/cuda/lib64/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/CUDA_90/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
copy to test/Driver/Inputs/CUDA_90/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
diff --git a/test/Driver/Inputs/CUDA_90/usr/local/cuda/version.txt b/test/Driver/Inputs/CUDA_90/usr/local/cuda/version.txt
new file mode 100644
index 0000000..24e1fd4
--- /dev/null
+++ b/test/Driver/Inputs/CUDA_90/usr/local/cuda/version.txt
@@ -0,0 +1 @@
+CUDA Version 9.0.103
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/Windows/usr/bin/ld.bfd
old mode 100644
new mode 100755
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/Windows/usr/bin/ld.bfd
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtbegin.o
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
rename to test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtend.o
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
rename to test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtend.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crti.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crti.o
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crti.o
rename to test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crti.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtn.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtn.o
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtn.o
rename to test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/crtn.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-rtems/include/c++/6.3.0/.keep
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
rename to test/Driver/Inputs/basic_myriad_tree/sparc-myriad-rtems/include/c++/6.3.0/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-rtems/lib/crt0.o
similarity index 100%
rename from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
rename to test/Driver/Inputs/basic_myriad_tree/sparc-myriad-rtems/lib/crt0.o
diff --git a/test/Driver/Inputs/config-1.cfg b/test/Driver/Inputs/config-1.cfg
new file mode 100644
index 0000000..7d1326f
--- /dev/null
+++ b/test/Driver/Inputs/config-1.cfg
@@ -0,0 +1,6 @@
+
+# Empty lines and line started with # are ignored
+-Werror
+
+    # Language
+    -std=c99
diff --git a/test/Driver/Inputs/config-2.cfg b/test/Driver/Inputs/config-2.cfg
new file mode 100644
index 0000000..c803bd2
--- /dev/null
+++ b/test/Driver/Inputs/config-2.cfg
@@ -0,0 +1,2 @@
+# nested inclusion
+@config-3.cfg
diff --git a/test/Driver/Inputs/config-2a.cfg b/test/Driver/Inputs/config-2a.cfg
new file mode 100644
index 0000000..ecdc555
--- /dev/null
+++ b/test/Driver/Inputs/config-2a.cfg
@@ -0,0 +1,2 @@
+# nested inclusion
+@config/config-4.cfg
diff --git a/test/Driver/Inputs/config-3.cfg b/test/Driver/Inputs/config-3.cfg
new file mode 100644
index 0000000..d5086a8
--- /dev/null
+++ b/test/Driver/Inputs/config-3.cfg
@@ -0,0 +1 @@
+-Wundefined-func-template
diff --git a/test/Driver/Inputs/config-4.cfg b/test/Driver/Inputs/config-4.cfg
new file mode 100644
index 0000000..d8a022a
--- /dev/null
+++ b/test/Driver/Inputs/config-4.cfg
@@ -0,0 +1,2 @@
+-L/usr/local/lib
+-stdlib=libc++
\ No newline at end of file
diff --git a/test/Driver/Inputs/config-5.cfg b/test/Driver/Inputs/config-5.cfg
new file mode 100644
index 0000000..09787ae
--- /dev/null
+++ b/test/Driver/Inputs/config-5.cfg
@@ -0,0 +1,2 @@
+--serialize-diagnostics diag.ser
+-target
diff --git a/test/Driver/Inputs/config-6.cfg b/test/Driver/Inputs/config-6.cfg
new file mode 100644
index 0000000..24d93cf
--- /dev/null
+++ b/test/Driver/Inputs/config-6.cfg
@@ -0,0 +1 @@
+--config config-5
diff --git a/test/Driver/Inputs/config/config-4.cfg b/test/Driver/Inputs/config/config-4.cfg
new file mode 100644
index 0000000..033e86a
--- /dev/null
+++ b/test/Driver/Inputs/config/config-4.cfg
@@ -0,0 +1 @@
+-isysroot /opt/data
diff --git a/test/Driver/Inputs/config/i386-qqq.cfg b/test/Driver/Inputs/config/i386-qqq.cfg
new file mode 100644
index 0000000..41e1a94
--- /dev/null
+++ b/test/Driver/Inputs/config/i386-qqq.cfg
@@ -0,0 +1 @@
+-target i386
diff --git a/test/Driver/Inputs/config/i386-qqq3.cfg b/test/Driver/Inputs/config/i386-qqq3.cfg
new file mode 100644
index 0000000..59928c1
--- /dev/null
+++ b/test/Driver/Inputs/config/i386-qqq3.cfg
@@ -0,0 +1 @@
+-target i383
diff --git a/test/Driver/Inputs/config/x86_64-qqq.cfg b/test/Driver/Inputs/config/x86_64-qqq.cfg
new file mode 100644
index 0000000..9d8565a
--- /dev/null
+++ b/test/Driver/Inputs/config/x86_64-qqq.cfg
@@ -0,0 +1 @@
+-target x86_64
diff --git a/test/Driver/Inputs/config/x86_64-qqq2.cfg b/test/Driver/Inputs/config/x86_64-qqq2.cfg
new file mode 100644
index 0000000..9d8565a
--- /dev/null
+++ b/test/Driver/Inputs/config/x86_64-qqq2.cfg
@@ -0,0 +1 @@
+-target x86_64
diff --git a/test/Driver/Inputs/config/x86_64.cfg b/test/Driver/Inputs/config/x86_64.cfg
new file mode 100644
index 0000000..9d8565a
--- /dev/null
+++ b/test/Driver/Inputs/config/x86_64.cfg
@@ -0,0 +1 @@
+-target x86_64
diff --git a/test/Driver/Inputs/config2/config-4.cfg b/test/Driver/Inputs/config2/config-4.cfg
new file mode 100644
index 0000000..bd866b6
--- /dev/null
+++ b/test/Driver/Inputs/config2/config-4.cfg
@@ -0,0 +1 @@
+-Wall
diff --git a/test/Driver/Inputs/config2/i386.cfg b/test/Driver/Inputs/config2/i386.cfg
new file mode 100644
index 0000000..41e1a94
--- /dev/null
+++ b/test/Driver/Inputs/config2/i386.cfg
@@ -0,0 +1 @@
+-target i386
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/fuse_ld_windows/ld.foo.exe
old mode 100644
new mode 100755
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/fuse_ld_windows/ld.foo.exe
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/bin/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/bin/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/include/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/include/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32d/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32d/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib64/lp64/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib64/lp64/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib64/lp64d/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib64/lp64d/crtbegin.o
diff --git a/test/Driver/Inputs/multilib_riscv_linux_sdk/riscv64-unknown-linux-gnu/bin/ld b/test/Driver/Inputs/multilib_riscv_linux_sdk/riscv64-unknown-linux-gnu/bin/ld
new file mode 100755
index 0000000..b23e556
--- /dev/null
+++ b/test/Driver/Inputs/multilib_riscv_linux_sdk/riscv64-unknown-linux-gnu/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32d/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32d/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64d/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/lib64/lp64d/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32d/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32d/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64/.keep
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64d/.keep
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
copy to test/Driver/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib64/lp64d/.keep
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/resource_dir/lib/linux/libclang_rt.hwasan-aarch64.a.syms
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/resource_dir/lib/linux/libclang_rt.hwasan-aarch64.a.syms
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/resource_dir/share/asan_blacklist.txt
similarity index 100%
rename from test/Driver/Inputs/resource_dir/asan_blacklist.txt
rename to test/Driver/Inputs/resource_dir/share/asan_blacklist.txt
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/resource_dir/share/hwasan_blacklist.txt
similarity index 100%
copy from test/Driver/Inputs/resource_dir/asan_blacklist.txt
copy to test/Driver/Inputs/resource_dir/share/hwasan_blacklist.txt
diff --git a/test/Driver/Inputs/resource_dir/ubsan_blacklist.txt b/test/Driver/Inputs/resource_dir/share/ubsan_blacklist.txt
similarity index 100%
rename from test/Driver/Inputs/resource_dir/ubsan_blacklist.txt
rename to test/Driver/Inputs/resource_dir/share/ubsan_blacklist.txt
diff --git a/test/Driver/Inputs/resource_dir/vtables_blacklist.txt b/test/Driver/Inputs/resource_dir/share/vtables_blacklist.txt
similarity index 100%
rename from test/Driver/Inputs/resource_dir/vtables_blacklist.txt
rename to test/Driver/Inputs/resource_dir/share/vtables_blacklist.txt
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/include/c++/4.8.2/sparc-sun-solaris2.11/bits/gthr.h
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/include/c++/4.8.2/sparc-sun-solaris2.11/bits/gthr.h
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/include/c++/4.8.2/typeinfo
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/include/c++/4.8.2/typeinfo
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
rename to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o
rename to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o
rename to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crt1.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crt1.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtend.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtend.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/libatomic.a
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/libatomic.a
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/sparcv9/libatomic.a
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/sparcv9/libatomic.a
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/crti.o
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
rename to test/Driver/Inputs/solaris_sparc_tree/usr/lib/crti.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/crtn.o
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
rename to test/Driver/Inputs/solaris_sparc_tree/usr/lib/crtn.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1 b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1
similarity index 100%
rename from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
rename to test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/crti.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/crti.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/crtn.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
copy to test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/crtn.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1 b/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
copy to test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/include/c++/4.9.4/i386-pc-solaris2.11/bits/gthr.h
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/include/c++/4.9.4/i386-pc-solaris2.11/bits/gthr.h
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/include/c++/4.9.4/typeinfo
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/include/c++/4.9.4/typeinfo
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/amd64/libatomic.a
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/amd64/libatomic.a
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64/crtend.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64/crtend.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/crtbegin.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/crtbegin.o
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/crtend.o
similarity index 100%
copy from test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/crtend.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/libatomic.a
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/gcc/4.9/lib/libatomic.a
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crt1.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crt1.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crti.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crti.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crtn.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/crtn.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1 b/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/crt1.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/crt1.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/crti.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/crti.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o b/test/Driver/Inputs/solaris_x86_tree/usr/lib/crtn.o
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/crtn.o
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1 b/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1
similarity index 100%
copy from test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
copy to test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1
diff --git a/test/Driver/XRay/lit.local.cfg b/test/Driver/XRay/lit.local.cfg
index 7cc6c70..41bd413 100644
--- a/test/Driver/XRay/lit.local.cfg
+++ b/test/Driver/XRay/lit.local.cfg
@@ -1,15 +1,16 @@
+import platform
 target_triple_components = config.target_triple.split('-')
 config.available_features.update(target_triple_components)
 
 # Only run the tests in platforms where XRay instrumentation is supported.
 supported_targets = [
-    'x86_64', 'x86_64h', 'arm', 'aarch64', 'arm64', 'powerpc64le', 'mips',
+    'amd64', 'x86_64', 'x86_64h', 'arm', 'aarch64', 'arm64', 'powerpc64le', 'mips',
     'mipsel', 'mips64', 'mips64el'
 ]
 
 # Only on platforms we support.
 supported_oses = [
-    'linux'
+    'Linux', 'FreeBSD'
 ]
 
 triple_set = set(target_triple_components)
@@ -17,7 +18,7 @@
   config.unsupported = True
 
 # Do not run for 'android' despite being linux.
-if len(triple_set.intersection(supported_oses)) == 0 or 'android' in triple_set:
+if platform.system() not in supported_oses or 'android' in triple_set:
   config.unsupported = True
 
 if config.enable_shared:
diff --git a/test/Driver/aarch64-cpus.c b/test/Driver/aarch64-cpus.c
index f644d2b..229484b 100644
--- a/test/Driver/aarch64-cpus.c
+++ b/test/Driver/aarch64-cpus.c
@@ -21,165 +21,188 @@
 // RUN: %clang -target aarch64 -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-TUNE %s
 // CA35: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a35"
+// CA35-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
-// RUN: %clang -target arm64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
+// RUN: %clang -target arm64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35-TUNE %s
 // ARM64-CA35: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a35"
-
+// ARM64-CA35-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-TUNE %s
 // CA53: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a53"
+// CA53-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53 %s
-// RUN: %clang -target arm64 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53 %s
+// RUN: %clang -target arm64 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA53-TUNE %s
 // ARM64-CA53: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a53"
+// ARM64-CA53-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-TUNE %s
 // CA55: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a55"
+// CA55-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55 %s
-// RUN: %clang -target arm64 -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55 %s
+// RUN: %clang -target arm64 -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA55-TUNE %s
 // ARM64-CA55: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a55"
+// ARM64-CA55-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-TUNE %s
 // CA57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a57"
+// CA57-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57 %s
-// RUN: %clang -target arm64 -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57 %s
+// RUN: %clang -target arm64 -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA57-TUNE %s
 // ARM64-CA57: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a57"
+// ARM64-CA57-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-TUNE %s
 // CA72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a72"
+// CA72-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72 %s
-// RUN: %clang -target arm64 -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72 %s
+// RUN: %clang -target arm64 -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72-TUNE %s
 // ARM64-CA72: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a72"
+// ARM64-CA72-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-TUNE %s
 // CORTEX-A73: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a73"
+// CORTEX-A73-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73 %s
-// RUN: %clang -target arm64 -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73 %s
+// RUN: %clang -target arm64 -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A73-TUNE %s
 // ARM64-CORTEX-A73: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a73"
+// ARM64-CORTEX-A73-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
-// RUN: %clang -target aarch64 -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A75-TUNE %s
 // CORTEX-A75: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a75"
+// CORTEX-A75-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75 %s
-// RUN: %clang -target arm64 -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75 %s
+// RUN: %clang -target arm64 -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CORTEX-A75-TUNE %s
 // ARM64-CORTEX-A75: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a75"
+// ARM64-CORTEX-A75-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
-// RUN: %clang -target aarch64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-TUNE %s
 // M1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "exynos-m1"
+// M1-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
-// RUN: %clang -target aarch64 -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
-// RUN: %clang -target aarch64_be -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2 %s
+// RUN: %clang -target aarch64 -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-TUNE %s
 // M2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "exynos-m2"
+// M2-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
-// RUN: %clang -target aarch64_be -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-TUNE %s
 // M3: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m3"
+// M3-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
-// RUN: %clang -target arm64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
+// RUN: %clang -target arm64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1-TUNE %s
 // ARM64-M1: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "exynos-m1"
+// ARM64-M1-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2 %s
-// RUN: %clang -target arm64 -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2 %s
+// RUN: %clang -target arm64 -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M2-TUNE %s
 // ARM64-M2: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "exynos-m2"
+// ARM64-M2-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3 %s
-// RUN: %clang -target arm64 -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3 %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3 %s
+// RUN: %clang -target arm64 -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M3-TUNE %s
 // ARM64-M3: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "exynos-m3"
+// ARM64-M3-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
-// RUN: %clang -target aarch64 -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
+// RUN: %clang -target aarch64 -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR-TUNE %s
 // FALKOR: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "falkor"
+// FALKOR-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR %s
-// RUN: %clang -target arm64 -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR %s
+// RUN: %clang -target arm64 -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-FALKOR-TUNE %s
 // ARM64-FALKOR: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "falkor"
+// ARM64-FALKOR-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO %s
-// RUN: %clang -target aarch64 -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO %s
-// RUN: %clang -target aarch64 -mlittle-endian -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO %s
+// RUN: %clang -target aarch64 -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=KRYO-TUNE %s
 // KRYO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "kryo"
+// KRYO-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO %s
-// RUN: %clang -target arm64 -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO %s
-// RUN: %clang -target arm64 -mlittle-endian -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO %s
+// RUN: %clang -target arm64 -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=kryo -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-KRYO-TUNE %s
 // ARM64-KRYO: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "kryo"
+// ARM64-KRYO-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99 %s
@@ -188,7 +211,7 @@
 // RUN: %clang -target aarch64 -mlittle-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-TUNE %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-TUNE %s
 // THUNDERX2T99: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "thunderx2t99" "-target-feature" "+v8.1a"
-// THUNDERX2T99-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "thunderx2t99"
+// THUNDERX2T99-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 // THUNDERX2T99-TUNE-NOT: +v8.1a
 
 // RUN: %clang -target arm64 -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-THUNDERX2T99 %s
@@ -196,7 +219,7 @@
 // RUN: %clang -target arm64 -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-THUNDERX2T99-TUNE %s
 // RUN: %clang -target arm64 -mlittle-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-THUNDERX2T99-TUNE %s
 // ARM64-THUNDERX2T99: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "thunderx2t99" "-target-feature" "+v8.1a"
-// ARM64-THUNDERX2T99-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "thunderx2t99"
+// ARM64-THUNDERX2T99-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 // ARM64-THUNDERX2T99-TUNE-NOT: +v8.1a
 
 // RUN: %clang -target aarch64_be -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s
@@ -207,91 +230,104 @@
 // RUN: %clang -target aarch64_be -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE-TUNE %s
 // CA35-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a35"
+// CA35-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE-TUNE %s
 // CA53-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a53"
+// CA53-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CA55-BE-TUNE %s
 // CA55-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a55"
+// CA55-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE-TUNE %s
 // CA57-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a57"
+// CA57-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE-TUNE %s
 // CA72-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a72"
+// CA72-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
-// RUN: %clang -target aarch64_be -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A73-BE-TUNE %s
 // CORTEX-A73-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a73"
+// CORTEX-A73-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
-// RUN: %clang -target aarch64_be -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE-TUNE %s
 // M1-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m1"
+// M1-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
-// RUN: %clang -target aarch64_be -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=M2-BE-TUNE %s
 // M2-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m2"
+// M2-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
-// RUN: %clang -target aarch64_be -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3-BE-TUNE %s
 // M3-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m3"
+// M3-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64_be -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
-// RUN: %clang -target aarch64_be -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
-// RUN: %clang -target aarch64 -mbig-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
-// RUN: %clang -target aarch64_be -mbig-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
+// RUN: %clang -target aarch64_be -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE-TUNE %s
 // THUNDERX2T99-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "thunderx2t99"
+// THUNDERX2T99-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
-// RUN: %clang -target aarch64 -mcpu=cortex-a57 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a57  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mcpu=cortex-a72 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a72  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a73     -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mcpu=thunderx2t99 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=thunderx2t99  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
-// MCPU-MTUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a53"
+// RUN: %clang -target aarch64 -mcpu=cortex-a57 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-A57 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a57  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-A57 %s
+// RUN: %clang -target aarch64 -mcpu=cortex-a72 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-A72 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a72  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-A72 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a73     -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-A73 %s
+// RUN: %clang -target aarch64 -mcpu=thunderx2t99 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-THUNDERX2T99 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=thunderx2t99  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-THUNDERX2T99 %s
+// MCPU-MTUNE-A57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a57"
+// MCPU-MTUNE-A72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a72"
+// MCPU-MTUNE-A73: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a73"
+// MCPU-MTUNE-THUNDERX2T99: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "thunderx2t99"
 
 // RUN: %clang -target aarch64 -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A %s
 // RUN: %clang -target aarch64 -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A %s
@@ -342,17 +378,21 @@
 
 // ================== Check whether -mcpu and -mtune accept mixed-case values.
 // RUN: %clang -target aarch64 -mcpu=Cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA53 %s
-// RUN: %clang -target aarch64 -mtune=Cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA53 %s
+// RUN: %clang -target aarch64 -mtune=Cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA53-TUNE %s
 // CASE-INSENSITIVE-CA53: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a53"
+// CASE-INSENSITIVE-CA53-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=cortex-A53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA53 %s
-// RUN: %clang -target arm64 -mtune=cortex-A53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA53 %s
+// RUN: %clang -target arm64 -mtune=cortex-A53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA53-TUNE %s
 // CASE-INSENSITIVE-ARM64-CA53: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a53"
+// CASE-INSENSITIVE-ARM64-CA53-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target aarch64 -mcpu=CORTEX-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA57 %s
-// RUN: %clang -target aarch64 -mtune=CORTEX-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA57 %s
+// RUN: %clang -target aarch64 -mtune=CORTEX-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA57-TUNE %s
 // CASE-INSENSITIVE-CA57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a57"
+// CASE-INSENSITIVE-CA57-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -mcpu=Cortex-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA57 %s
-// RUN: %clang -target arm64 -mtune=Cortex-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA57 %s
+// RUN: %clang -target arm64 -mtune=Cortex-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA57-TUNE %s
 // CASE-INSENSITIVE-ARM64-CA57: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a57"
+// CASE-INSENSITIVE-ARM64-CA57-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
diff --git a/test/Driver/amdgcn-toolchain-pic.cl b/test/Driver/amdgcn-toolchain-pic.cl
new file mode 100644
index 0000000..fc2978a
--- /dev/null
+++ b/test/Driver/amdgcn-toolchain-pic.cl
@@ -0,0 +1,7 @@
+// RUN: %clang -no-canonical-prefixes -### -target amdgcn-- -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd- -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-amdhsa -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-amdpal -mcpu=gfx803 %s 2>&1 | FileCheck %s
+// RUN: %clang -no-canonical-prefixes -### -target amdgcn-amd-mesa3d -mcpu=gfx803 %s 2>&1 | FileCheck %s
+
+// CHECK: clang{{.*}} "-mrelocation-model" "pic" "-pic-level" "1"
diff --git a/test/Driver/amdgpu-features.c b/test/Driver/amdgpu-features.c
index 495cadb..23ba72b 100644
--- a/test/Driver/amdgpu-features.c
+++ b/test/Driver/amdgpu-features.c
@@ -5,3 +5,9 @@
 // RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=kaveri -mamdgpu-debugger-abi=1.0 %s -o - 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MAMDGPU-DEBUGGER-ABI-1-0 %s
 // CHECK-MAMDGPU-DEBUGGER-ABI-1-0: "-target-feature" "+amdgpu-debugger-insert-nops" "-target-feature" "+amdgpu-debugger-reserve-regs" "-target-feature" "+amdgpu-debugger-emit-prologue"
+
+// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mxnack %s 2>&1 | FileCheck --check-prefix=XNACK %s
+// XNACK: "-target-feature" "+xnack"
+
+// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-xnack %s 2>&1 | FileCheck --check-prefix=NO-XNACK %s
+// NO-XNACK: "-target-feature" "-xnack"
diff --git a/test/Driver/amdgpu-macros.cl b/test/Driver/amdgpu-macros.cl
new file mode 100644
index 0000000..a047a7c
--- /dev/null
+++ b/test/Driver/amdgpu-macros.cl
@@ -0,0 +1,263 @@
+// Check that appropriate macros are defined for every supported AMDGPU
+// "-target" and "-mcpu" options.
+
+//
+// R600-based processors.
+//
+
+// RUN: %clang -E -dM -target r600 -mcpu=r600 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,R600 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv630 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,R600 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv635 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,R600 %s
+// RUN: %clang -E -dM -target r600 -mcpu=r630 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,R630 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rs780 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RS880 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rs880 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RS880 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv610 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RS880 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv620 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RS880 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv670 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RV670 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv710 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RV710 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv730 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RV730 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv740 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RV770 %s
+// RUN: %clang -E -dM -target r600 -mcpu=rv770 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,RV770 %s
+// RUN: %clang -E -dM -target r600 -mcpu=cedar %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CEDAR %s
+// RUN: %clang -E -dM -target r600 -mcpu=palm %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CEDAR %s
+// RUN: %clang -E -dM -target r600 -mcpu=cypress %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CYPRESS %s
+// RUN: %clang -E -dM -target r600 -mcpu=hemlock %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CYPRESS %s
+// RUN: %clang -E -dM -target r600 -mcpu=juniper %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,JUNIPER %s
+// RUN: %clang -E -dM -target r600 -mcpu=redwood %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,REDWOOD %s
+// RUN: %clang -E -dM -target r600 -mcpu=sumo %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,SUMO %s
+// RUN: %clang -E -dM -target r600 -mcpu=sumo2 %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,SUMO %s
+// RUN: %clang -E -dM -target r600 -mcpu=barts %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,BARTS %s
+// RUN: %clang -E -dM -target r600 -mcpu=caicos %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CAICOS %s
+// RUN: %clang -E -dM -target r600 -mcpu=aruba %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CAYMAN %s
+// RUN: %clang -E -dM -target r600 -mcpu=cayman %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,CAYMAN %s
+// RUN: %clang -E -dM -target r600 -mcpu=turks %s 2>&1 | FileCheck --check-prefixes=ARCH-R600,TURKS %s
+
+// R600-NOT:    #define FP_FAST_FMA 1
+// R630-NOT:    #define FP_FAST_FMA 1
+// RS880-NOT:   #define FP_FAST_FMA 1
+// RV670-NOT:   #define FP_FAST_FMA 1
+// RV710-NOT:   #define FP_FAST_FMA 1
+// RV730-NOT:   #define FP_FAST_FMA 1
+// RV770-NOT:   #define FP_FAST_FMA 1
+// CEDAR-NOT:   #define FP_FAST_FMA 1
+// CYPRESS-NOT: #define FP_FAST_FMA 1
+// JUNIPER-NOT: #define FP_FAST_FMA 1
+// REDWOOD-NOT: #define FP_FAST_FMA 1
+// SUMO-NOT:    #define FP_FAST_FMA 1
+// BARTS-NOT:   #define FP_FAST_FMA 1
+// CAICOS-NOT:  #define FP_FAST_FMA 1
+// CAYMAN-NOT:  #define FP_FAST_FMA 1
+// TURKS-NOT:   #define FP_FAST_FMA 1
+
+// R600-NOT:    #define FP_FAST_FMAF 1
+// R630-NOT:    #define FP_FAST_FMAF 1
+// RS880-NOT:   #define FP_FAST_FMAF 1
+// RV670-NOT:   #define FP_FAST_FMAF 1
+// RV710-NOT:   #define FP_FAST_FMAF 1
+// RV730-NOT:   #define FP_FAST_FMAF 1
+// RV770-NOT:   #define FP_FAST_FMAF 1
+// CEDAR-NOT:   #define FP_FAST_FMAF 1
+// CYPRESS-NOT: #define FP_FAST_FMAF 1
+// JUNIPER-NOT: #define FP_FAST_FMAF 1
+// REDWOOD-NOT: #define FP_FAST_FMAF 1
+// SUMO-NOT:    #define FP_FAST_FMAF 1
+// BARTS-NOT:   #define FP_FAST_FMAF 1
+// CAICOS-NOT:  #define FP_FAST_FMAF 1
+// CAYMAN-NOT:  #define FP_FAST_FMAF 1
+// TURKS-NOT:   #define FP_FAST_FMAF 1
+
+// ARCH-R600-DAG: #define __AMDGPU__ 1
+// ARCH-R600-DAG: #define __AMD__ 1
+
+// R600-NOT:    #define __HAS_FMAF__ 1
+// R630-NOT:    #define __HAS_FMAF__ 1
+// RS880-NOT:   #define __HAS_FMAF__ 1
+// RV670-NOT:   #define __HAS_FMAF__ 1
+// RV710-NOT:   #define __HAS_FMAF__ 1
+// RV730-NOT:   #define __HAS_FMAF__ 1
+// RV770-NOT:   #define __HAS_FMAF__ 1
+// CEDAR-NOT:   #define __HAS_FMAF__ 1
+// CYPRESS-DAG: #define __HAS_FMAF__ 1
+// JUNIPER-NOT: #define __HAS_FMAF__ 1
+// REDWOOD-NOT: #define __HAS_FMAF__ 1
+// SUMO-NOT:    #define __HAS_FMAF__ 1
+// BARTS-NOT:   #define __HAS_FMAF__ 1
+// CAICOS-NOT:  #define __HAS_FMAF__ 1
+// CAYMAN-DAG:  #define __HAS_FMAF__ 1
+// TURKS-NOT:   #define __HAS_FMAF__ 1
+
+// R600-NOT:    #define __HAS_FP64__ 1
+// R630-NOT:    #define __HAS_FP64__ 1
+// RS880-NOT:   #define __HAS_FP64__ 1
+// RV670-NOT:   #define __HAS_FP64__ 1
+// RV710-NOT:   #define __HAS_FP64__ 1
+// RV730-NOT:   #define __HAS_FP64__ 1
+// RV770-NOT:   #define __HAS_FP64__ 1
+// CEDAR-NOT:   #define __HAS_FP64__ 1
+// CYPRESS-NOT: #define __HAS_FP64__ 1
+// JUNIPER-NOT: #define __HAS_FP64__ 1
+// REDWOOD-NOT: #define __HAS_FP64__ 1
+// SUMO-NOT:    #define __HAS_FP64__ 1
+// BARTS-NOT:   #define __HAS_FP64__ 1
+// CAICOS-NOT:  #define __HAS_FP64__ 1
+// CAYMAN-NOT:  #define __HAS_FP64__ 1
+// TURKS-NOT:   #define __HAS_FP64__ 1
+
+// R600-NOT:    #define __HAS_LDEXPF__ 1
+// R630-NOT:    #define __HAS_LDEXPF__ 1
+// RS880-NOT:   #define __HAS_LDEXPF__ 1
+// RV670-NOT:   #define __HAS_LDEXPF__ 1
+// RV710-NOT:   #define __HAS_LDEXPF__ 1
+// RV730-NOT:   #define __HAS_LDEXPF__ 1
+// RV770-NOT:   #define __HAS_LDEXPF__ 1
+// CEDAR-NOT:   #define __HAS_LDEXPF__ 1
+// CYPRESS-NOT: #define __HAS_LDEXPF__ 1
+// JUNIPER-NOT: #define __HAS_LDEXPF__ 1
+// REDWOOD-NOT: #define __HAS_LDEXPF__ 1
+// SUMO-NOT:    #define __HAS_LDEXPF__ 1
+// BARTS-NOT:   #define __HAS_LDEXPF__ 1
+// CAICOS-NOT:  #define __HAS_LDEXPF__ 1
+// CAYMAN-NOT:  #define __HAS_LDEXPF__ 1
+// TURKS-NOT:   #define __HAS_LDEXPF__ 1
+
+// ARCH-R600-DAG: #define __R600__ 1
+
+// R600-DAG:    #define __r600__ 1
+// R630-DAG:    #define __r630__ 1
+// RS880-DAG:   #define __rs880__ 1
+// RV670-DAG:   #define __rv670__ 1
+// RV710-DAG:   #define __rv710__ 1
+// RV730-DAG:   #define __rv730__ 1
+// RV770-DAG:   #define __rv770__ 1
+// CEDAR-DAG:   #define __cedar__ 1
+// CYPRESS-DAG: #define __cypress__ 1
+// JUNIPER-DAG: #define __juniper__ 1
+// REDWOOD-DAG: #define __redwood__ 1
+// SUMO-DAG:    #define __sumo__ 1
+// BARTS-DAG:   #define __barts__ 1
+// CAICOS-DAG:  #define __caicos__ 1
+// CAYMAN-DAG:  #define __cayman__ 1
+// TURKS-DAG:   #define __turks__ 1
+
+//
+// AMDGCN-based processors.
+//
+
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx600 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX600 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX600 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX601 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX601 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX601 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX601 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX601 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx700 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX700 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX700 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX701 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX701 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx702 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX702 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx703 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX703 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX703 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX703 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx704 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX704 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX704 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX801 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX801 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX802 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX802 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX802 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX803 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX803 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX803 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX803 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX810 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX810 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX900 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX902 %s
+
+// GFX600-DAG: #define FP_FAST_FMA 1
+// GFX601-DAG: #define FP_FAST_FMA 1
+// GFX700-DAG: #define FP_FAST_FMA 1
+// GFX701-DAG: #define FP_FAST_FMA 1
+// GFX702-DAG: #define FP_FAST_FMA 1
+// GFX703-DAG: #define FP_FAST_FMA 1
+// GFX704-DAG: #define FP_FAST_FMA 1
+// GFX801-DAG: #define FP_FAST_FMA 1
+// GFX802-DAG: #define FP_FAST_FMA 1
+// GFX803-DAG: #define FP_FAST_FMA 1
+// GFX810-DAG: #define FP_FAST_FMA 1
+// GFX900-DAG: #define FP_FAST_FMA 1
+// GFX902-DAG: #define FP_FAST_FMA 1
+
+// GFX600-DAG: #define FP_FAST_FMAF 1
+// GFX601-NOT: #define FP_FAST_FMAF 1
+// GFX700-NOT: #define FP_FAST_FMAF 1
+// GFX701-DAG: #define FP_FAST_FMAF 1
+// GFX702-DAG: #define FP_FAST_FMAF 1
+// GFX703-NOT: #define FP_FAST_FMAF 1
+// GFX704-NOT: #define FP_FAST_FMAF 1
+// GFX801-DAG: #define FP_FAST_FMAF 1
+// GFX802-NOT: #define FP_FAST_FMAF 1
+// GFX803-NOT: #define FP_FAST_FMAF 1
+// GFX810-NOT: #define FP_FAST_FMAF 1
+// GFX900-DAG: #define FP_FAST_FMAF 1
+// GFX902-DAG: #define FP_FAST_FMAF 1
+
+// ARCH-GCN-DAG: #define __AMDGCN__ 1
+// ARCH-GCN-DAG: #define __AMDGPU__ 1
+// ARCH-GCN-DAG: #define __AMD__ 1
+
+// GFX600-DAG: #define __HAS_FMAF__ 1
+// GFX601-DAG: #define __HAS_FMAF__ 1
+// GFX700-DAG: #define __HAS_FMAF__ 1
+// GFX701-DAG: #define __HAS_FMAF__ 1
+// GFX702-DAG: #define __HAS_FMAF__ 1
+// GFX703-DAG: #define __HAS_FMAF__ 1
+// GFX704-DAG: #define __HAS_FMAF__ 1
+// GFX801-DAG: #define __HAS_FMAF__ 1
+// GFX802-DAG: #define __HAS_FMAF__ 1
+// GFX803-DAG: #define __HAS_FMAF__ 1
+// GFX810-DAG: #define __HAS_FMAF__ 1
+// GFX900-DAG: #define __HAS_FMAF__ 1
+// GFX902-DAG: #define __HAS_FMAF__ 1
+
+// GFX600-DAG: #define __HAS_FP64__ 1
+// GFX601-DAG: #define __HAS_FP64__ 1
+// GFX700-DAG: #define __HAS_FP64__ 1
+// GFX701-DAG: #define __HAS_FP64__ 1
+// GFX702-DAG: #define __HAS_FP64__ 1
+// GFX703-DAG: #define __HAS_FP64__ 1
+// GFX704-DAG: #define __HAS_FP64__ 1
+// GFX801-DAG: #define __HAS_FP64__ 1
+// GFX802-DAG: #define __HAS_FP64__ 1
+// GFX803-DAG: #define __HAS_FP64__ 1
+// GFX810-DAG: #define __HAS_FP64__ 1
+// GFX900-DAG: #define __HAS_FP64__ 1
+// GFX902-DAG: #define __HAS_FP64__ 1
+
+// GFX600-DAG: #define __HAS_LDEXPF__ 1
+// GFX601-DAG: #define __HAS_LDEXPF__ 1
+// GFX700-DAG: #define __HAS_LDEXPF__ 1
+// GFX701-DAG: #define __HAS_LDEXPF__ 1
+// GFX702-DAG: #define __HAS_LDEXPF__ 1
+// GFX703-DAG: #define __HAS_LDEXPF__ 1
+// GFX704-DAG: #define __HAS_LDEXPF__ 1
+// GFX801-DAG: #define __HAS_LDEXPF__ 1
+// GFX802-DAG: #define __HAS_LDEXPF__ 1
+// GFX803-DAG: #define __HAS_LDEXPF__ 1
+// GFX810-DAG: #define __HAS_LDEXPF__ 1
+// GFX900-DAG: #define __HAS_LDEXPF__ 1
+// GFX902-DAG: #define __HAS_LDEXPF__ 1
+
+// GFX600-DAG: #define __gfx600__ 1
+// GFX601-DAG: #define __gfx601__ 1
+// GFX700-DAG: #define __gfx700__ 1
+// GFX701-DAG: #define __gfx701__ 1
+// GFX702-DAG: #define __gfx702__ 1
+// GFX703-DAG: #define __gfx703__ 1
+// GFX704-DAG: #define __gfx704__ 1
+// GFX801-DAG: #define __gfx801__ 1
+// GFX802-DAG: #define __gfx802__ 1
+// GFX803-DAG: #define __gfx803__ 1
+// GFX810-DAG: #define __gfx810__ 1
+// GFX900-DAG: #define __gfx900__ 1
+// GFX902-DAG: #define __gfx902__ 1
diff --git a/test/Driver/amdgpu-mcpu.cl b/test/Driver/amdgpu-mcpu.cl
index 26137ac..265d0f7 100644
--- a/test/Driver/amdgpu-mcpu.cl
+++ b/test/Driver/amdgpu-mcpu.cl
@@ -1,112 +1,115 @@
-t// Check that -mcpu works for all supported GPUs
+// Check that -mcpu works for all supported GPUs.
 
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=r600 %s -o - 2>&1 | FileCheck --check-prefix=R600-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv630 %s -o - 2>&1 | FileCheck --check-prefix=R600-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv635 %s -o - 2>&1 | FileCheck --check-prefix=R600-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv610 %s -o - 2>&1 | FileCheck --check-prefix=RS880-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv620 %s -o - 2>&1 | FileCheck --check-prefix=RS880-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rs780 %s -o - 2>&1 | FileCheck --check-prefix=RS880-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rs880 %s -o - 2>&1 | FileCheck --check-prefix=RS880-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv670 %s -o - 2>&1 | FileCheck --check-prefix=RV670-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv710 %s -o - 2>&1 | FileCheck --check-prefix=RV710-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv730 %s -o - 2>&1 | FileCheck --check-prefix=RV730-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv740 %s -o - 2>&1 | FileCheck --check-prefix=RV770-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=rv770 %s -o - 2>&1 | FileCheck --check-prefix=RV770-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=palm %s -o - 2>&1 | FileCheck --check-prefix=CEDAR-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=cedar %s -o - 2>&1 | FileCheck --check-prefix=CEDAR-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=sumo %s -o - 2>&1 | FileCheck --check-prefix=SUMO-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=sumo2 %s -o - 2>&1 | FileCheck --check-prefix=SUMO-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=redwood %s -o - 2>&1 | FileCheck --check-prefix=REDWOOD-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=juniper %s -o - 2>&1 | FileCheck --check-prefix=JUNIPER-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=juniper %s -o - 2>&1 | FileCheck --check-prefix=JUNIPER-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=hemlock %s -o - 2>&1 | FileCheck --check-prefix=CYPRESS-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=cypress %s -o - 2>&1 | FileCheck --check-prefix=CYPRESS-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=barts %s -o - 2>&1 | FileCheck --check-prefix=BARTS-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=turks %s -o - 2>&1 | FileCheck --check-prefix=TURKS-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=caicos %s -o - 2>&1 | FileCheck --check-prefix=CAICOS-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=cayman %s -o - 2>&1 | FileCheck --check-prefix=CAYMAN-CHECK %s
-// RUN: %clang -### -target r600 -x cl -S -emit-llvm -mcpu=aruba %s -o - 2>&1 | FileCheck --check-prefix=CAYMAN-CHECK %s
+//
+// R600-based processors.
+//
 
-// R600-CHECK:  "-target-cpu" "r600"
-// RS880-CHECK: "-target-cpu" "rs880"
-// RV670-CHECK: "-target-cpu" "rv670"
-// RV710-CHECK: "-target-cpu" "rv710"
-// RV730-CHECK: "-target-cpu" "rv730"
-// RV770-CHECK: "-target-cpu" "rv770"
-// CEDAR-CHECK: "-target-cpu" "cedar"
-// REDWOOD-CHECK: "-target-cpu" "redwood"
-// SUMO-CHECK: "-target-cpu" "sumo"
-// JUNIPER-CHECK: "-target-cpu" "juniper"
-// CYPRESS-CHECK: "-target-cpu" "cypress"
-// BARTS-CHECK: "-target-cpu" "barts"
-// TURKS-CHECK: "-target-cpu" "turks"
-// CAICOS-CHECK: "-target-cpu" "caicos"
-// CAYMAN-CHECK: "-target-cpu" "cayman"
+// RUN: %clang -### -target r600 -mcpu=r600 %s 2>&1 | FileCheck --check-prefix=R600 %s
+// RUN: %clang -### -target r600 -mcpu=rv630 %s 2>&1 | FileCheck --check-prefix=R600 %s
+// RUN: %clang -### -target r600 -mcpu=rv635 %s 2>&1 | FileCheck --check-prefix=R600 %s
+// RUN: %clang -### -target r600 -mcpu=r630 %s 2>&1 | FileCheck --check-prefix=R630 %s
+// RUN: %clang -### -target r600 -mcpu=rs780 %s 2>&1 | FileCheck --check-prefix=RS880 %s
+// RUN: %clang -### -target r600 -mcpu=rs880 %s 2>&1 | FileCheck --check-prefix=RS880 %s
+// RUN: %clang -### -target r600 -mcpu=rv610 %s 2>&1 | FileCheck --check-prefix=RS880 %s
+// RUN: %clang -### -target r600 -mcpu=rv620 %s 2>&1 | FileCheck --check-prefix=RS880 %s
+// RUN: %clang -### -target r600 -mcpu=rv670 %s 2>&1 | FileCheck --check-prefix=RV670 %s
+// RUN: %clang -### -target r600 -mcpu=rv710 %s 2>&1 | FileCheck --check-prefix=RV710 %s
+// RUN: %clang -### -target r600 -mcpu=rv730 %s 2>&1 | FileCheck --check-prefix=RV730 %s
+// RUN: %clang -### -target r600 -mcpu=rv740 %s 2>&1 | FileCheck --check-prefix=RV770 %s
+// RUN: %clang -### -target r600 -mcpu=rv770 %s 2>&1 | FileCheck --check-prefix=RV770 %s
+// RUN: %clang -### -target r600 -mcpu=cedar %s 2>&1 | FileCheck --check-prefix=CEDAR %s
+// RUN: %clang -### -target r600 -mcpu=palm %s 2>&1 | FileCheck --check-prefix=CEDAR %s
+// RUN: %clang -### -target r600 -mcpu=cypress %s 2>&1 | FileCheck --check-prefix=CYPRESS %s
+// RUN: %clang -### -target r600 -mcpu=hemlock %s 2>&1 | FileCheck --check-prefix=CYPRESS %s
+// RUN: %clang -### -target r600 -mcpu=juniper %s 2>&1 | FileCheck --check-prefix=JUNIPER %s
+// RUN: %clang -### -target r600 -mcpu=redwood %s 2>&1 | FileCheck --check-prefix=REDWOOD %s
+// RUN: %clang -### -target r600 -mcpu=sumo %s 2>&1 | FileCheck --check-prefix=SUMO %s
+// RUN: %clang -### -target r600 -mcpu=sumo2 %s 2>&1 | FileCheck --check-prefix=SUMO %s
+// RUN: %clang -### -target r600 -mcpu=barts %s 2>&1 | FileCheck --check-prefix=BARTS %s
+// RUN: %clang -### -target r600 -mcpu=caicos %s 2>&1 | FileCheck --check-prefix=CAICOS %s
+// RUN: %clang -### -target r600 -mcpu=aruba %s 2>&1 | FileCheck --check-prefix=CAYMAN %s
+// RUN: %clang -### -target r600 -mcpu=cayman %s 2>&1 | FileCheck --check-prefix=CAYMAN %s
+// RUN: %clang -### -target r600 -mcpu=turks %s 2>&1 | FileCheck --check-prefix=TURKS %s
 
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx600 %s -o - 2>&1 | FileCheck --check-prefix=GFX600-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=tahiti %s -o - 2>&1 | FileCheck --check-prefix=TAHITI-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx601 %s -o - 2>&1 | FileCheck --check-prefix=GFX601-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=pitcairn %s -o - 2>&1 | FileCheck --check-prefix=PITCAIRN-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=verde %s -o - 2>&1 | FileCheck --check-prefix=VERDE-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=oland %s -o - 2>&1 | FileCheck --check-prefix=OLAND-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=hainan %s -o - 2>&1 | FileCheck --check-prefix=HAINAN-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx700 %s -o - 2>&1 | FileCheck --check-prefix=GFX700-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=bonaire %s -o - 2>&1 | FileCheck --check-prefix=BONAIRE-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=kaveri %s -o - 2>&1 | FileCheck --check-prefix=KAVERI-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx701 %s -o - 2>&1 | FileCheck --check-prefix=GFX701-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=hawaii %s -o - 2>&1 | FileCheck --check-prefix=HAWAII-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx702 %s -o - 2>&1 | FileCheck --check-prefix=GFX702-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx703 %s -o - 2>&1 | FileCheck --check-prefix=GFX703-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=kabini %s -o - 2>&1 | FileCheck --check-prefix=KABINI-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=mullins %s -o - 2>&1 | FileCheck --check-prefix=MULLINS-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx800 %s -o - 2>&1 | FileCheck --check-prefix=GFX800-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=iceland %s -o - 2>&1 | FileCheck --check-prefix=ICELAND-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx801 %s -o - 2>&1 | FileCheck --check-prefix=GFX801-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=carrizo %s -o - 2>&1 | FileCheck --check-prefix=CARRIZO-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx802 %s -o - 2>&1 | FileCheck --check-prefix=GFX802-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=tonga %s -o - 2>&1 | FileCheck --check-prefix=TONGA-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx803 %s -o - 2>&1 | FileCheck --check-prefix=GFX803-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=fiji %s -o - 2>&1 | FileCheck --check-prefix=FIJI-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=polaris10 %s -o - 2>&1 | FileCheck --check-prefix=POLARIS10-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=polaris11 %s -o - 2>&1 | FileCheck --check-prefix=POLARIS11-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx804 %s -o - 2>&1 | FileCheck --check-prefix=GFX804-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx810 %s -o - 2>&1 | FileCheck --check-prefix=GFX810-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=stoney %s -o - 2>&1 | FileCheck --check-prefix=STONEY-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx900 %s -o - 2>&1 | FileCheck --check-prefix=GFX900-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx901 %s -o - 2>&1 | FileCheck --check-prefix=GFX901-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx902 %s -o - 2>&1 | FileCheck --check-prefix=GFX902-CHECK %s
-// RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=gfx903 %s -o - 2>&1 | FileCheck --check-prefix=GFX903-CHECK %s
+// R600:    "-target-cpu" "r600"
+// R630:    "-target-cpu" "r630"
+// RS880:   "-target-cpu" "rs880"
+// RV670:   "-target-cpu" "rv670"
+// RV710:   "-target-cpu" "rv710"
+// RV730:   "-target-cpu" "rv730"
+// RV770:   "-target-cpu" "rv770"
+// CEDAR:   "-target-cpu" "cedar"
+// CYPRESS: "-target-cpu" "cypress"
+// JUNIPER: "-target-cpu" "juniper"
+// REDWOOD: "-target-cpu" "redwood"
+// SUMO:    "-target-cpu" "sumo"
+// BARTS:   "-target-cpu" "barts"
+// CAICOS:  "-target-cpu" "caicos"
+// CAYMAN:  "-target-cpu" "cayman"
+// TURKS:   "-target-cpu" "turks"
 
-// GFX600-CHECK: "-target-cpu" "gfx600"
-// TAHITI-CHECK: "-target-cpu" "tahiti"
-// GFX601-CHECK: "-target-cpu" "gfx601"
-// PITCAIRN-CHECK: "-target-cpu" "pitcairn"
-// VERDE-CHECK: "-target-cpu" "verde"
-// OLAND-CHECK: "-target-cpu" "oland"
-// HAINAN-CHECK: "-target-cpu" "hainan"
-// GFX700-CHECK: "-target-cpu" "gfx700"
-// BONAIRE-CHECK: "-target-cpu" "bonaire"
-// KAVERI-CHECK: "-target-cpu" "kaveri"
-// GFX701-CHECK: "-target-cpu" "gfx701"
-// HAWAII-CHECK: "-target-cpu" "hawaii"
-// GFX702-CHECK: "-target-cpu" "gfx702"
-// GFX703-CHECK: "-target-cpu" "gfx703"
-// KABINI-CHECK: "-target-cpu" "kabini"
-// MULLINS-CHECK: "-target-cpu" "mullins"
-// GFX800-CHECK: "-target-cpu" "gfx800"
-// ICELAND-CHECK: "-target-cpu" "iceland"
-// GFX801-CHECK: "-target-cpu" "gfx801"
-// CARRIZO-CHECK: "-target-cpu" "carrizo"
-// GFX802-CHECK: "-target-cpu" "gfx802"
-// TONGA-CHECK: "-target-cpu" "tonga"
-// GFX803-CHECK: "-target-cpu" "gfx803"
-// FIJI-CHECK: "-target-cpu" "fiji"
-// POLARIS10-CHECK: "-target-cpu" "polaris10"
-// POLARIS11-CHECK: "-target-cpu" "polaris11"
-// GFX804-CHECK: "-target-cpu" "gfx804"
-// GFX810-CHECK: "-target-cpu" "gfx810"
-// STONEY-CHECK: "-target-cpu" "stoney"
-// GFX900-CHECK: "-target-cpu" "gfx900"
-// GFX901-CHECK: "-target-cpu" "gfx901"
-// GFX902-CHECK: "-target-cpu" "gfx902"
-// GFX903-CHECK: "-target-cpu" "gfx903"
+//
+// AMDGCN-based processors.
+//
+
+// RUN: %clang -### -target amdgcn -mcpu=gfx600 %s 2>&1 | FileCheck --check-prefix=GFX600 %s
+// RUN: %clang -### -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=TAHITI %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefix=GFX601 %s
+// RUN: %clang -### -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefix=HAINAN %s
+// RUN: %clang -### -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefix=OLAND %s
+// RUN: %clang -### -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefix=PITCAIRN %s
+// RUN: %clang -### -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefix=VERDE %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx700 %s 2>&1 | FileCheck --check-prefix=GFX700 %s
+// RUN: %clang -### -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefix=KAVERI %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefix=GFX701 %s
+// RUN: %clang -### -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefix=HAWAII %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx702 %s 2>&1 | FileCheck --check-prefix=GFX702 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx703 %s 2>&1 | FileCheck --check-prefix=GFX703 %s
+// RUN: %clang -### -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefix=KABINI %s
+// RUN: %clang -### -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefix=MULLINS %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx704 %s 2>&1 | FileCheck --check-prefix=GFX704 %s
+// RUN: %clang -### -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=BONAIRE %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefix=GFX801 %s
+// RUN: %clang -### -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefix=CARRIZO %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=GFX802 %s
+// RUN: %clang -### -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=ICELAND %s
+// RUN: %clang -### -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefix=TONGA %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=GFX803 %s
+// RUN: %clang -### -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=FIJI %s
+// RUN: %clang -### -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefix=POLARIS10 %s
+// RUN: %clang -### -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefix=POLARIS11 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --check-prefix=GFX810 %s
+// RUN: %clang -### -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefix=STONEY %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=GFX900 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefix=GFX902 %s
+
+// GFX600:    "-target-cpu" "gfx600"
+// TAHITI:    "-target-cpu" "tahiti"
+// GFX601:    "-target-cpu" "gfx601"
+// HAINAN:    "-target-cpu" "hainan"
+// OLAND:     "-target-cpu" "oland"
+// PITCAIRN:  "-target-cpu" "pitcairn"
+// VERDE:     "-target-cpu" "verde"
+// GFX700:    "-target-cpu" "gfx700"
+// KAVERI:    "-target-cpu" "kaveri"
+// GFX701:    "-target-cpu" "gfx701"
+// HAWAII:    "-target-cpu" "hawaii"
+// GFX702:    "-target-cpu" "gfx702"
+// GFX703:    "-target-cpu" "gfx703"
+// KABINI:    "-target-cpu" "kabini"
+// MULLINS:   "-target-cpu" "mullins"
+// GFX704:    "-target-cpu" "gfx704"
+// BONAIRE:   "-target-cpu" "bonaire"
+// GFX801:    "-target-cpu" "gfx801"
+// CARRIZO:   "-target-cpu" "carrizo"
+// GFX802:    "-target-cpu" "gfx802"
+// ICELAND:   "-target-cpu" "iceland"
+// TONGA:     "-target-cpu" "tonga"
+// GFX803:    "-target-cpu" "gfx803"
+// FIJI:      "-target-cpu" "fiji"
+// POLARIS10: "-target-cpu" "polaris10"
+// POLARIS11: "-target-cpu" "polaris11"
+// GFX810:    "-target-cpu" "gfx810"
+// STONEY:    "-target-cpu" "stoney"
+// GFX900:    "-target-cpu" "gfx900"
+// GFX902:    "-target-cpu" "gfx902"
diff --git a/test/Driver/amdgpu-toolchain.c b/test/Driver/amdgpu-toolchain.c
index 52a7197..bd6bf7e 100644
--- a/test/Driver/amdgpu-toolchain.c
+++ b/test/Driver/amdgpu-toolchain.c
@@ -3,4 +3,4 @@
 // AS_LINK: ld.lld{{.*}} "-shared"
 
 // RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
-// DWARF_VER: "-dwarf-version=2"
+// DWARF_VER: "-dwarf-version=5"
diff --git a/test/Driver/ananas.c b/test/Driver/ananas.c
index 2a5b35e..4edc2a1 100644
--- a/test/Driver/ananas.c
+++ b/test/Driver/ananas.c
@@ -7,3 +7,11 @@
 // CHECK-STATIC: crtbegin.o
 // CHECK-STATIC: crtend.o
 // CHECK-STATIC: crtn.o
+
+// RUN: %clang -no-canonical-prefixes -target x86_64-unknown-ananas -shared %s \
+// RUN:   --sysroot=%S/Inputs/ananas-tree -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SHARED %s
+// CHECK-SHARED: crti.o
+// CHECK-SHARED: crtbeginS.o
+// CHECK-SHARED: crtendS.o
+// CHECK-SHARED: crtn.o
diff --git a/test/Driver/android-pie.c b/test/Driver/android-pie.c
new file mode 100644
index 0000000..2569c55
--- /dev/null
+++ b/test/Driver/android-pie.c
@@ -0,0 +1,66 @@
+// NO-PIE-NOT: "-pie"
+// PIE: "-pie"
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-androideabi \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android14 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android16 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android14 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android16 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android14 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android16 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=aarch64-linux-android \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=aarch64-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm64-linux-android \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=arm64-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// Override toolchain default setting.
+// RUN: %clang %s -### -o %t.o 2>&1 -pie --target=arm-linux-androideabi \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 -pie --target=arm-linux-androideabi14 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 -no-pie -pie --target=arm-linux-androideabi24 \
+// RUN:   | FileCheck --check-prefix=PIE %s
+
+// RUN: %clang %s -### -o %t.o 2>&1 -no-pie --target=arm-linux-androideabi24 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 -nopie --target=arm-linux-androideabi24 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
+// RUN: %clang %s -### -o %t.o 2>&1 -pie -no-pie --target=arm-linux-androideabi24 \
+// RUN:   | FileCheck --check-prefix=NO-PIE %s
diff --git a/test/Driver/arclite-link.c b/test/Driver/arclite-link.c
index 3471bf6..2cc0271 100644
--- a/test/Driver/arclite-link.c
+++ b/test/Driver/arclite-link.c
@@ -1,6 +1,6 @@
 // RUN: touch %t.o
-// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -lfoo -mmacosx-version-min=10.7 %t.o 2>&1 | FileCheck -check-prefix=CHECK-ARCLITE-OSX %s
-// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -mmacosx-version-min=10.8 %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOARCLITE %s
+// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -lfoo -mmacosx-version-min=10.10 %t.o 2>&1 | FileCheck -check-prefix=CHECK-ARCLITE-OSX %s
+// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -mmacosx-version-min=10.11 %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOARCLITE %s
 // RUN: %clang -### -target i386-apple-darwin10 -fobjc-link-runtime -mmacosx-version-min=10.7 %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOARCLITE %s
 // RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -nostdlib %t.o 2>&1 | FileCheck -check-prefix=CHECK-NOSTDLIB %s
 
@@ -12,6 +12,6 @@
 // CHECK-NOARCLITE-NOT: libarclite
 // CHECK-NOSTDLIB-NOT: -lobjc
 
-// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -fobjc-arc -mmacosx-version-min=10.7 %s 2>&1 | FileCheck -check-prefix=CHECK-UNUSED %s
+// RUN: %clang -### -target x86_64-apple-darwin10 -fobjc-link-runtime -fobjc-arc -mmacosx-version-min=10.10 %s 2>&1 | FileCheck -check-prefix=CHECK-UNUSED %s
 
 // CHECK-UNUSED-NOT: warning: argument unused during compilation: '-fobjc-link-runtime'
diff --git a/test/Driver/arm-cortex-cpus.c b/test/Driver/arm-cortex-cpus.c
index 8f81782..3c1eb2e 100644
--- a/test/Driver/arm-cortex-cpus.c
+++ b/test/Driver/arm-cortex-cpus.c
@@ -284,13 +284,13 @@
 // RUN: %clang -target arm -march=armebv8.2-a -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V82A-THUMB %s
 // CHECK-BE-V82A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8.2a-{{.*}}" "-target-cpu" "generic"
 
-// RUN: %clang -target armv8a -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V82A-FP16 %s
+// RUN: %clang -target armv8a-linux-eabi -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V82A-FP16 %s
 // CHECK-V82A-FP16: "-cc1"{{.*}} "-triple" "armv8.2{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+fullfp16"
 
 // Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it
 // on and off. Cortex-A53 is a placeholder for now.
-// RUN: %clang -target armv8a -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s
-// RUN: %clang -target armv8a -mcpu=cortex-a53+nofp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-NOFP16 %s
+// RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s
+// RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+nofp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-NOFP16 %s
 // CHECK-CORTEX-A53-FP16: "-cc1" {{.*}}"-target-cpu" "cortex-a53" {{.*}}"-target-feature" "+fullfp16"
 // CHECK-CORTEX-A53-NOFP16: "-cc1" {{.*}}"-target-cpu" "cortex-a53" {{.*}}"-target-feature" "-fullfp16"
 
diff --git a/test/Driver/arm-dotprod.c b/test/Driver/arm-dotprod.c
index a895d30..971e0de 100644
--- a/test/Driver/arm-dotprod.c
+++ b/test/Driver/arm-dotprod.c
@@ -4,8 +4,21 @@
 // RUN: %clang -### -target arm -march=armv8.3a %s 2>&1 | FileCheck %s --check-prefix=CHECK-NONE
 // CHECK-NONE-NOT: "-target-feature" "+dotprod"
 
-// RUN: %clang -### -target arm -march=armv8.2a+dotprod %s 2>&1 | FileCheck %s
-// RUN: %clang -### -target arm -march=armv8.3a+dotprod %s 2>&1 | FileCheck %s
-// RUN: %clang -### -target arm -mcpu=cortex-a75 %s 2>&1 | FileCheck %s
-// RUN: %clang -### -target arm -mcpu=cortex-a55 %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target arm-linux-eabi -march=armv8.2a+dotprod %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target arm-linux-eabi -march=armv8.3a+dotprod %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target arm-linux-eabi -mcpu=cortex-a75 %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target arm-linux-eabi -mcpu=cortex-a55 %s 2>&1 | FileCheck %s
 // CHECK: "+dotprod"
+
+// The following default to -msoft-float
+// RUN: %clang -### -target arm -march=armv8.2a+dotprod %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-NO-DOTPROD
+// RUN: %clang -### -target arm -march=armv8.3a+dotprod %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-NO-DOTPROD
+// RUN: %clang -### -target arm -mcpu=cortex-a75 %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-NO-DOTPROD
+// RUN: %clang -### -target arm -mcpu=cortex-a55 %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-NO-DOTPROD
+// We rely on the backend disabling dotprod as it depends on neon, so check that
+// neon is disabled after the dotprod was enabled.
+// CHECK-NO-DOTPROD-NOT: "+dotprod"
diff --git a/test/Driver/arm-mfpu.c b/test/Driver/arm-mfpu.c
index 2e1c00d..2f8d64f 100644
--- a/test/Driver/arm-mfpu.c
+++ b/test/Driver/arm-mfpu.c
@@ -2,6 +2,8 @@
 
 // RUN: %clang -target arm-linux-eabi %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-DEFAULT %s
+// CHECK-DEFAULT-NOT: "-target-feature" "+soft-float"
+// CHECK-DEFAULT: "-target-feature" "+soft-float-abi"
 // CHECK-DEFAULT-NOT: "-target-feature" "+vfp2"
 // CHECK-DEFAULT-NOT: "-target-feature" "+vfp3"
 // CHECK-DEFAULT-NOT: "-target-feature" "+d16"
@@ -19,23 +21,49 @@
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfp %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfp %s -mfloat-abi=soft -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-2 %s
+// CHECK-VFP-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP: "-target-feature" "+soft-float-abi"
 // CHECK-VFP: "-target-feature" "+vfp2"
 // CHECK-VFP: "-target-feature" "-vfp3"
 // CHECK-VFP: "-target-feature" "-vfp4"
 // CHECK-VFP: "-target-feature" "-fp-armv8"
 // CHECK-VFP: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-2: "-target-feature" "-vfp2"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfp3 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3: "-target-feature" "+vfp3"
 // CHECK-VFP3: "-target-feature" "-vfp4"
 // CHECK-VFP3: "-target-feature" "-fp-armv8"
 // CHECK-VFP3: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-3: "-target-feature" "-vfp3"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-fp16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3-FP16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-fp16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3-FP16-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3-FP16: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3-FP16: "-target-feature" "-fp-only-sp"
 // CHECK-VFP3-FP16: "-target-feature" "-d16"
 // CHECK-VFP3-FP16: "-target-feature" "+vfp3"
@@ -49,6 +77,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3-D16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3-D16-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3-D16: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3-D16: "-target-feature" "-fp-only-sp"
 // CHECK-VFP3-D16: "-target-feature" "+d16"
 // CHECK-VFP3-D16: "-target-feature" "+vfp3"
@@ -58,6 +90,10 @@
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16-fp16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3-D16-FP16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16-fp16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3-D16-FP16-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3-D16-FP16: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3-D16-FP16: "-target-feature" "-fp-only-sp"
 // CHECK-VFP3-D16-FP16: "-target-feature" "+d16"
 // CHECK-VFP3-D16-FP16: "-target-feature" "+vfp3"
@@ -69,6 +105,10 @@
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3XD %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3XD-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3XD: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3XD: "-target-feature" "+fp-only-sp"
 // CHECK-VFP3XD: "-target-feature" "+d16"
 // CHECK-VFP3XD: "-target-feature" "+vfp3"
@@ -80,6 +120,10 @@
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd-fp16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3XD-FP16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd-fp16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
+// CHECK-VFP3XD-FP16-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP3XD-FP16: "-target-feature" "+soft-float-abi"
 // CHECK-VFP3XD-FP16: "-target-feature" "+fp-only-sp"
 // CHECK-VFP3XD-FP16: "-target-feature" "+d16"
 // CHECK-VFP3XD-FP16: "-target-feature" "+vfp3"
@@ -93,14 +137,29 @@
 // RUN:   | FileCheck --check-prefix=CHECK-VFP4 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv4 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP4 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv4 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-4 %s
+// CHECK-VFP4-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP4: "-target-feature" "+soft-float-abi"
 // CHECK-VFP4: "-target-feature" "+vfp4"
 // CHECK-VFP4: "-target-feature" "-fp-armv8"
 // CHECK-VFP4: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-4: "-target-feature" "-vfp4"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfp4-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP4-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv4-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP4-D16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv4-d16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-4 %s
+// CHECK-VFP4-D16-NOT: "-target-feature" "+soft-float"
+// CHECK-VFP4-D16: "-target-feature" "+soft-float-abi"
 // CHECK-VFP4-D16: "-target-feature" "-fp-only-sp"
 // CHECK-VFP4-D16: "-target-feature" "+d16"
 // CHECK-VFP4-D16: "-target-feature" "+vfp4"
@@ -111,6 +170,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-FP4-SP-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=fpv4-sp-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FP4-SP-D16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=fpv4-sp-d16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-4 %s
+// CHECK-FP4-SP-D16-NOT: "-target-feature" "+soft-float"
+// CHECK-FP4-SP-D16: "-target-feature" "+soft-float-abi"
 // CHECK-FP4-SP-D16: "-target-feature" "+fp-only-sp"
 // CHECK-FP4-SP-D16: "-target-feature" "+d16"
 // CHECK-FP4-SP-D16: "-target-feature" "+vfp4"
@@ -121,6 +184,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-FP5-SP-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=fpv5-sp-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FP5-SP-D16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=fp-armv8-sp-d16 -mfloat-abi=soft %s -### -o %t.o \
+// RUN:   2>&1 | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// CHECK-FP5-SP-D16-NOT: "-target-feature" "+soft-float"
+// CHECK-FP5-SP-D16: "-target-feature" "+soft-float-abi"
 // CHECK-FP5-SP-D16: "-target-feature" "+fp-only-sp"
 // CHECK-FP5-SP-D16: "-target-feature" "+d16"
 // CHECK-FP5-SP-D16: "-target-feature" "+fp-armv8"
@@ -131,18 +198,44 @@
 // RUN:   | FileCheck --check-prefix=CHECK-FP5-DP-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=fpv5-dp-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FP5-DP-D16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=fpv5-dp-d16 %s -mfloat-abi=soft -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-5 %s
+// CHECK-FP5-DP-D16-NOT: "-target-feature" "+soft-float"
+// CHECK-FP5-DP-D16: "-target-feature" "+soft-float-abi"
 // CHECK-FP5-DP-D16: "-target-feature" "-fp-only-sp"
 // CHECK-FP5-DP-D16: "-target-feature" "+d16"
 // CHECK-FP5-DP-D16: "-target-feature" "+fp-armv8"
 // CHECK-FP5-DP-D16: "-target-feature" "-neon"
 // CHECK-FP5-DP-D16: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "+soft-float"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-5: "-target-feature" "-fp-armv8"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=neon %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON %s
+// RUN: %clang -target arm-linux-eabi -mfpu=neon -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-6 %s
+// CHECK-NEON-NOT: "-target-feature" "+soft-float"
 // CHECK-NEON: "-target-feature" "+neon"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-6: "-target-feature" "-neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=neon-fp16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON-FP16 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=neon-fp16 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-6 %s
+// CHECK-NEON-FP16-NOT: "-target-feature" "+soft-float"
+// CHECK-NEON-FP16: "-target-feature" "+soft-float-abi"
 // CHECK-NEON-FP16: "-target-feature" "-fp-only-sp"
 // CHECK-NEON-FP16: "-target-feature" "-d16"
 // CHECK-NEON-FP16: "-target-feature" "+vfp3"
@@ -154,48 +247,78 @@
 
 // RUN: %clang -target arm-linux-eabi -mfpu=neon-vfpv3 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON-VFPV3 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=neon-vfpv3 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-6 %s
+// CHECK-NEON-VFPV3-NOT: "-target-feature" "+soft-float"
+// CHECK-NEON-VFPV3: "-target-feature" "+soft-float-abi"
+// CHECK-NEON-VFPV3: "-target-feature" "+vfp3"
 // CHECK-NEON-VFPV3: "-target-feature" "+neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=neon-vfpv4 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON-VFPV4 %s
+// RUN: %clang -target arm-linux-eabi -mfpu=neon-vfpv4 -mfloat-abi=soft %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-7 %s
+// CHECK-NEON-VFPV4-NOT: "-target-feature" "+soft-float"
+// CHECK-NEON-VFPV4: "-target-feature" "+soft-float-abi"
 // CHECK-NEON-VFPV4: "-target-feature" "+vfp4"
 // CHECK-NEON-VFPV4: "-target-feature" "+neon"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-7: "-target-feature" "-neon"
 
 // RUN: %clang -target arm-linux-eabi -msoft-float %s -### -o %t.o 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-SOFT-FLOAT %s
-// CHECK-SOFT-FLOAT: "-target-feature" "-neon"
-
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
 // RUN: %clang -target armv8 %s -### 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-ARMV8-DEFAULT-SOFT-FP %s
-// CHECK-ARMV8-DEFAULT-SOFT-FP: "-target-feature" "-neon"
-// CHECK-ARMV8-DEFAULT-SOFT-FP: "-target-feature" "-crypto"
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv8a -mfpu=neon %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-8 %s
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP-8: "-target-feature" "-neon"
 
 // RUN: %clang -target armv8 -mfpu=fp-armv8 %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV8-SOFT-FLOAT %s
-// CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "+fp-armv8"
-// CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "-neon"
+// CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "+soft-float"
+// CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "+soft-float-abi"
+// NOT-CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "+fp-armv8"
+// CHECK-ARMV9-SOFT-FLOAT: "-target-feature" "-neon"
 // CHECK-ARMV8-SOFT-FLOAT: "-target-feature" "-crypto"
 
 // RUN: %clang -target armv8-linux-gnueabihf -mfpu=fp-armv8 %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FP-ARMV8 %s
-// CHECK-FP-ARMV8-NOT: "-target-feature" "+neon"
+// CHECK-FP-ARMV8-NOT: "-target-feature" "+soft-float"
+// CHECK-FP-ARMV8-NOT: "-target-feature" "+soft-float-abi"
 // CHECK-FP-ARMV8: "-target-feature" "+fp-armv8"
 // CHECK-FP-ARMV8: "-target-feature" "-neon"
 // CHECK-FP-ARMV8: "-target-feature" "-crypto"
 
 // RUN: %clang -target armv8-linux-gnueabihf -mfpu=neon-fp-armv8 %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON-FP-ARMV8 %s
+// CHECK-NEON-FP-ARMV8-NOT: "-target-feature" "+soft-float"
+// CHECK-NEON-FP-ARMV8-NOT: "-target-feature" "+soft-float-abi"
 // CHECK-NEON-FP-ARMV8: "-target-feature" "+fp-armv8"
 // CHECK-NEON-FP-ARMV8: "-target-feature" "+neon"
 // CHECK-NEON-FP-ARMV8: "-target-feature" "-crypto"
 
 // RUN: %clang -target armv8-linux-gnueabihf -mfpu=crypto-neon-fp-armv8 %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-CRYPTO-NEON-FP-ARMV8 %s
+// CHECK-CRYPTO-NEON-FP-ARMV8-NOT: "-target-feature" "+soft-float"
+// CHECK-CRYPTO-NEON-FP-ARMV8-NOT: "-target-feature" "+soft-float-abi"
 // CHECK-CRYPTO-NEON-FP-ARMV8: "-target-feature" "+fp-armv8"
 // CHECK-CRYPTO-NEON-FP-ARMV8: "-target-feature" "+crypto"
 
 // RUN: %clang -target armv8-linux-gnueabi -mfpu=none %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FP %s
+// CHECK-NO-FP-NOT: "-target-feature" "+soft-float"
+// CHECK-NO-FP: "-target-feature" "+soft-float-abi"
 // CHECK-NO-FP: "-target-feature" "-fp-only-sp"
 // CHECK-NO-FP: "-target-feature" "-d16"
 // CHECK-NO-FP: "-target-feature" "-vfp2"
@@ -209,8 +332,35 @@
 // RUN:   | FileCheck --check-prefix=CHECK-HF %s
 // RUN: %clang -target arm-linux-musleabihf %s -### 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-HF %s
+// CHECK-HF-NOT: "-target-feature" "+soft-float"
+// CHECK-HF-NOT: "-target-feature" "+soft-float-abi"
 // CHECK-HF: "-target-cpu" "arm1176jzf-s"
 
 // RUN: %clang -target armv7-apple-darwin -x assembler %s -### -c 2>&1 \
 // RUN:   | FileCheck --check-prefix=ASM %s
 // ASM-NOT: -target-feature
+
+// RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv7-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv6-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv5-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv4-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv8-linux-gnueabi -msoft-float -mfpu=none %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// RUN: %clang -target armv8-linux-gnueabi -msoft-float %s -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
+// CHECK-SOFT-ABI-FP: "-target-feature" "+soft-float"
+// CHECK-SOFT-ABI-FP: "-target-feature" "+soft-float-abi"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-vfp2"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-vfp3"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-vfp4"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-fp-armv8"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-neon"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-crypto"
diff --git a/test/Driver/arm-target-as-mthumb.s b/test/Driver/arm-target-as-mthumb.s
new file mode 100644
index 0000000..c2c0fd0
--- /dev/null
+++ b/test/Driver/arm-target-as-mthumb.s
@@ -0,0 +1,17 @@
+// Make sure -mthumb does not affect assembler triple, but -Wa,-mthumb or
+// -Xassembler -mthumb does. Also check that -Wa,-mthumb or -Xassembler -mthumb
+// does not affect non assembler files.
+
+// RUN: %clang -target armv7a-linux-gnueabi -### -c -mthumb %s 2>&1 | \
+// RUN: FileCheck -check-prefix=TRIPLE-ARM %s
+// RUN: %clang -target armv7a-linux-gnueabi -### -c -Wa,-mthumb \
+// RUN: %S/Inputs/wildcard1.c  2>&1 | FileCheck -check-prefix=TRIPLE-ARM %s
+
+// TRIPLE-ARM: "-triple" "armv7--linux-gnueabi"
+
+// RUN: %clang -target armv7a-linux-gnueabi -### -c -Wa,-mthumb %s 2>&1 | \
+// RUN: FileCheck -check-prefix=TRIPLE-THUMB %s
+// RUN: %clang -target armv7a-linux-gnueabi -### -c -Xassembler -mthumb %s \
+// RUN: 2>&1 | FileCheck -check-prefix=TRIPLE-THUMB %s
+
+// TRIPLE-THUMB: "-triple" "thumbv7--linux-gnueabi"
diff --git a/test/Driver/arm-wchar_t-defaults.c b/test/Driver/arm-wchar_t-defaults.c
new file mode 100644
index 0000000..8b2ae29
--- /dev/null
+++ b/test/Driver/arm-wchar_t-defaults.c
@@ -0,0 +1,53 @@
+// RUN: %clang -### -target armv7-unknown-none-eabi -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-none-eabi -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+// RUN: %clang -### -target armv7-unknown-none-eabi -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-none-eabi -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-none-eabi -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-none-eabi -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-none-eabi -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-none-eabi -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+// RUN: %clang -### -target aarch64-unknown-none-eabi -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64-unknown-none-eabi -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+// RUN: %clang -### -target aarch64_be-unknown-none-eabi -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64_be-unknown-none-eabi -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-UNSIGNED %s
+
+// RUN: %clang -### -target armv7-unknown-netbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-netbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7-unknown-netbsd -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-netbsd -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-netbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-netbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-netbsd -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-netbsd -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target aarch64-unknown-netbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64-unknown-netbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target aarch64_be-unknown-netbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64_be-unknown-netbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+
+// RUN: %clang -### -target armv7-unknown-openbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-openbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7-unknown-openbsd -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-openbsd -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-openbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-openbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target armv7eb-unknown-openbsd -mthumb -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7eb-unknown-openbsd -mthumb -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target aarch64-unknown-openbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64-unknown-openbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target aarch64_be-unknown-openbsd -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64_be-unknown-openbsd -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+
+// RUN: %clang -### -target armv7-unknown-windows-msvc -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target armv7-unknown-windows-msvc -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+// RUN: %clang -### -target aarch64-unknown-windows-msvc -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHORT %s
+// RUN: %clang -### -target aarch64-unknown-windows-msvc -fno-short-wchar -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-LONG-SIGNED %s
+
+// CHECK-SHORT-NOT: "-fwchar-type=int"
+// CHECK-SHORT-NOT: "-fno-signed-wchar"
+
+// CHECK-LONG-UNSIGNED: "-fwchar-type=int"
+// CHECK-LONG-UNSIGNED: "-fno-signed-wchar"
+
+// CHECK-LONG-SIGNED: "-fwchar-type=int"
+// CHECK-LONG-SIGNED: "-fsigned-wchar"
+
diff --git a/test/Driver/as-mcpu.c b/test/Driver/as-mcpu.c
new file mode 100644
index 0000000..23528b1
--- /dev/null
+++ b/test/Driver/as-mcpu.c
@@ -0,0 +1,11 @@
+// ================== Check that krait is substituted by cortex-a15 when invoking
+// the assembler
+// RUN: %clang -target arm-linux -mcpu=krait -### -c %s -v -fno-integrated-as 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A15 %s
+// CHECK-CORTEX-A15: as{{(.exe)?}}" "{{.*}}-mcpu=cortex-a15
+
+// ================== Check that kryo is substituted by cortex-a57 when invoking
+// the assembler
+// RUN: %clang -target arm-linux -mcpu=kryo -### -c %s -v -fno-integrated-as 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A57 %s
+// RUN: %clang -target aarch64-linux -mcpu=kryo -### -c %s -v -fno-integrated-as 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A57 %s
+
+// CHECK-CORTEX-A57: as{{(.exe)?}}" "{{.*}}-mcpu=cortex-a57
diff --git a/test/Driver/asan.c b/test/Driver/asan.c
index 4db103d..288666d 100644
--- a/test/Driver/asan.c
+++ b/test/Driver/asan.c
@@ -6,8 +6,13 @@
 // RUN: %clang -O1 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
 // RUN: %clang -O2 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
 // RUN: %clang -O3 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
+// RUN: %clang     -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O1 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O2 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O3 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
 // Verify that -fsanitize={address,kernel-address} invoke ASan and KASan instrumentation.
 
 int foo(int *a) { return *a; }
 // CHECK-ASAN: __asan_init
 // CHECK-KASAN: __asan_load4_noabort
+// CHECK-HWASAN: __hwasan_init
diff --git a/test/Driver/autocomplete.c b/test/Driver/autocomplete.c
index 8b6d6cf..1a96474 100644
--- a/test/Driver/autocomplete.c
+++ b/test/Driver/autocomplete.c
@@ -3,13 +3,8 @@
 // add/modify flags, change HelpTexts or the values of some flags.
 
 // Some corner cases.
-// RUN: %clang --autocomplete= | FileCheck %s -check-prefix=ALL_FLAGS
-// RUN: %clang --autocomplete=# | FileCheck %s -check-prefix=ALL_FLAGS
-// Let's pick some example flags that are hopefully unlikely to change.
-// ALL_FLAGS: -fast
-// ALL_FLAGS: -fastcp
-// ALL_FLAGS: -fastf
 // Just test that this doesn't crash:
+// RUN: %clang --autocomplete=
 // RUN: %clang --autocomplete=,
 // RUN: %clang --autocomplete==
 // RUN: %clang --autocomplete=,,
@@ -17,27 +12,27 @@
 
 // RUN: %clang --autocomplete=-fsyn | FileCheck %s -check-prefix=FSYN
 // FSYN: -fsyntax-only
-// RUN: %clang --autocomplete=-std= | FileCheck %s -check-prefix=STD
+// RUN: %clang --autocomplete=-std | FileCheck %s -check-prefix=STD
 // STD: -std= Language standard to compile for
 // RUN: %clang --autocomplete=foo | FileCheck %s -check-prefix=FOO
 // FOO-NOT: foo
 // RUN: %clang --autocomplete=-stdlib=,l | FileCheck %s -check-prefix=STDLIB
 // STDLIB: libc++
 // STDLIB-NEXT: libstdc++
-// RUN: %clang --autocomplete=-stdlib=, | FileCheck %s -check-prefix=STDLIBALL
+// RUN: %clang --autocomplete=-stdlib= | FileCheck %s -check-prefix=STDLIBALL
 // STDLIBALL: libc++
 // STDLIBALL-NEXT: libstdc++
 // STDLIBALL-NEXT: platform
 // RUN: %clang --autocomplete=-meabi,d | FileCheck %s -check-prefix=MEABI
 // MEABI: default
-// RUN: %clang --autocomplete=-meabi, | FileCheck %s -check-prefix=MEABIALL
+// RUN: %clang --autocomplete=-meabi | FileCheck %s -check-prefix=MEABIALL
 // MEABIALL: 4
 // MEABIALL-NEXT: 5
 // MEABIALL-NEXT: default
 // MEABIALL-NEXT: gnu
 // RUN: %clang --autocomplete=-cl-std=,CL2 | FileCheck %s -check-prefix=CLSTD
 // CLSTD: CL2.0
-// RUN: %clang --autocomplete=-cl-std=, | FileCheck %s -check-prefix=CLSTDALL
+// RUN: %clang --autocomplete=-cl-std= | FileCheck %s -check-prefix=CLSTDALL
 // CLSTDALL: cl
 // CLSTDALL-NEXT: CL
 // CLSTDALL-NEXT: cl1.1
@@ -48,7 +43,7 @@
 // CLSTDALL-NEXT: CL2.0
 // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER
 // FNOSANICOVER: func
-// RUN: %clang --autocomplete=-fno-sanitize-coverage=, | FileCheck %s -check-prefix=FNOSANICOVERALL
+// RUN: %clang --autocomplete=-fno-sanitize-coverage= | FileCheck %s -check-prefix=FNOSANICOVERALL
 // FNOSANICOVERALL: 8bit-counters
 // FNOSANICOVERALL-NEXT: bb
 // FNOSANICOVERALL-NEXT: edge
@@ -62,41 +57,37 @@
 // FNOSANICOVERALL-NEXT: trace-gep
 // FNOSANICOVERALL-NEXT: trace-pc
 // FNOSANICOVERALL-NEXT: trace-pc-guard
-// RUN: %clang --autocomplete=-ffp-contract=, | FileCheck %s -check-prefix=FFPALL
+// RUN: %clang --autocomplete=-ffp-contract= | FileCheck %s -check-prefix=FFPALL
 // FFPALL: fast
 // FFPALL-NEXT: off
 // FFPALL-NEXT: on
-// RUN: %clang --autocomplete=-flto=, | FileCheck %s -check-prefix=FLTOALL
+// RUN: %clang --autocomplete=-flto= | FileCheck %s -check-prefix=FLTOALL
 // FLTOALL: full
 // FLTOALL-NEXT: thin
-// RUN: %clang --autocomplete=-fveclib=, | FileCheck %s -check-prefix=FVECLIBALL
+// RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL
 // FVECLIBALL: Accelerate
 // FVECLIBALL-NEXT: none
 // FVECLIBALL-NEXT: SVML
-// RUN: %clang --autocomplete=-fshow-overloads=, | FileCheck %s -check-prefix=FSOVERALL
+// RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL
 // FSOVERALL: all
 // FSOVERALL-NEXT: best
-// RUN: %clang --autocomplete=-fvisibility=, | FileCheck %s -check-prefix=FVISIBILITYALL
+// RUN: %clang --autocomplete=-fvisibility= | FileCheck %s -check-prefix=FVISIBILITYALL
 // FVISIBILITYALL: default
 // FVISIBILITYALL-NEXT: hidden
-// RUN: %clang --autocomplete=-mfloat-abi=, | FileCheck %s -check-prefix=MFLOATABIALL
+// RUN: %clang --autocomplete=-mfloat-abi= | FileCheck %s -check-prefix=MFLOATABIALL
 // MFLOATABIALL: hard
 // MFLOATABIALL-NEXT: soft
 // MFLOATABIALL-NEXT: softfp
-// RUN: %clang --autocomplete=-mthread-model, | FileCheck %s -check-prefix=MTHREADMODELALL
+// RUN: %clang --autocomplete=-mthread-model | FileCheck %s -check-prefix=MTHREADMODELALL
 // MTHREADMODELALL: posix
 // MTHREADMODELALL-NEXT: single
-// RUN: %clang --autocomplete=-mrelocation-model, | FileCheck %s -check-prefix=MRELOCMODELALL
+// RUN: %clang --autocomplete=-mrelocation-model | FileCheck %s -check-prefix=MRELOCMODELALL
 // MRELOCMODELALL: dynamic-no-pic
 // MRELOCMODELALL-NEXT: pic
 // MRELOCMODELALL-NEXT: ropi
 // MRELOCMODELALL-NEXT: ropi-rwpi
 // MRELOCMODELALL-NEXT: rwpi
 // MRELOCMODELALL-NEXT: static
-// RUN: %clang --autocomplete=-mrelocation-mode | FileCheck %s -check-prefix=MRELOCMODEL_CLANG
-// MRELOCMODEL_CLANG-NOT: -mrelocation-model
-// RUN: %clang --autocomplete=#-mrelocation-mode | FileCheck %s -check-prefix=MRELOCMODEL_CC1
-// MRELOCMODEL_CC1: -mrelocation-model
 // RUN: %clang --autocomplete=-Wma | FileCheck %s -check-prefix=WARNING
 // WARNING: -Wmacro-redefined
 // WARNING-NEXT: -Wmain
@@ -106,7 +97,20 @@
 // WARNING-NEXT: -Wmax-unsigned-zero
 // RUN: %clang --autocomplete=-Wno-invalid-pp- | FileCheck %s -check-prefix=NOWARNING
 // NOWARNING: -Wno-invalid-pp-token
-// RUN: %clang --autocomplete=-analyzer-checker, | FileCheck %s -check-prefix=ANALYZER
+// RUN: %clang --autocomplete=-analyzer-checker | FileCheck %s -check-prefix=ANALYZER
 // ANALYZER: unix.Malloc
-// RUN: %clang --autocomplete=-std=, | FileCheck %s -check-prefix=STDVAL
+// RUN: %clang --autocomplete=-std= | FileCheck %s -check-prefix=STDVAL
 // STDVAL: c99
+//
+// Clang shouldn't autocomplete CC1 options unless -cc1 or -Xclang were provided
+// RUN: %clang --autocomplete=-mrelocation-mode | FileCheck %s -check-prefix=MRELOCMODEL_CLANG
+// MRELOCMODEL_CLANG-NOT: -mrelocation-model
+// RUN: %clang --autocomplete=-Xclang,-mrelocation-mode | FileCheck %s -check-prefix=MRELOCMODEL_CC1
+// RUN: %clang --autocomplete=-cc1,-mrelocation-mode | FileCheck %s -check-prefix=MRELOCMODEL_CC1
+// MRELOCMODEL_CC1: -mrelocation-model
+// Make sure it ignores passed flags unlesss they are -Xclang or -cc1
+// RUN: %clang --autocomplete=foo,bar,,-fsyn | FileCheck %s -check-prefix=FSYN-CORON
+// FSYN-CORON: -fsyntax-only
+// Check if they can autocomplete values with coron
+// RUN: %clang --autocomplete=foo,bar,,,-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER-CORON
+// FNOSANICOVER-CORON: func
diff --git a/test/Driver/cl-cc-flags.c b/test/Driver/cl-cc-flags.c
index 76f116e..d74062a 100644
--- a/test/Driver/cl-cc-flags.c
+++ b/test/Driver/cl-cc-flags.c
@@ -13,6 +13,9 @@
 // RUN: %clang_cl --target=i686-windows-msvc /Gv -### -- %s 2>&1 | FileCheck --check-prefix=VECTORCALL %s
 // VECTORCALL: -fdefault-calling-conv=vectorcall
 
+// RUN: %clang_cl --target=i686-windows-msvc /Gregcall -### -- %s 2>&1 | FileCheck --check-prefix=REGCALL %s
+// REGCALL: -fdefault-calling-conv=regcall
+
 // Last one should win:
 
 // RUN: %clang_cl --target=i686-windows-msvc /Gd /Gv -### -- %s 2>&1 | FileCheck --check-prefix=LASTWINS_VECTOR %s
diff --git a/test/Driver/cl-include.c b/test/Driver/cl-include.c
index d3dc006..a69265d 100644
--- a/test/Driver/cl-include.c
+++ b/test/Driver/cl-include.c
@@ -10,5 +10,16 @@
 // RUN: env INCLUDE=/my/system/inc %clang_cl -### -- %s 2>&1 | FileCheck %s --check-prefix=STDINC
 // STDINC: "-internal-isystem" "/my/system/inc"
 
-// RUN: env INCLUDE=/my/system/inc %clang_cl -nostdinc -### -- %s 2>&1 | FileCheck %s --check-prefix=NOSTDINC
+// -nostdinc suppresses all of %INCLUDE%, clang resource dirs, and -imsvc dirs.
+// RUN: env INCLUDE=/my/system/inc %clang_cl -nostdinc -imsvc /my/other/inc -### -- %s 2>&1 | FileCheck %s --check-prefix=NOSTDINC
+// NOSTDINC: argument unused{{.*}}-imsvc
 // NOSTDINC-NOT: "-internal-isystem" "/my/system/inc"
+// NOSTDINC-NOT: "-internal-isystem" "{{.*lib.*clang.*include}}"
+// NOSTDINC-NOT: "-internal-isystem" "/my/other/inc"
+
+// /X suppresses %INCLUDE% but not clang resource dirs or -imsvc dirs.
+// RUN: env INCLUDE=/my/system/inc %clang_cl /X -imsvc /my/other/inc -### -- %s 2>&1 | FileCheck %s --check-prefix=SLASHX
+// SLASHX-NOT: "argument unused{{.*}}-imsvc"
+// SLASHX-NOT: "-internal-isystem" "/my/system/inc"
+// SLASHX: "-internal-isystem" "{{.*lib.*clang.*include}}"
+// SLASHX: "-internal-isystem" "/my/other/inc"
diff --git a/test/Driver/cl-options.c b/test/Driver/cl-options.c
index b78bb9d..21d8264 100644
--- a/test/Driver/cl-options.c
+++ b/test/Driver/cl-options.c
@@ -197,8 +197,12 @@
 
 // RUN: %clang_cl /E /showIncludes -### -- %s 2>&1 | FileCheck -check-prefix=showIncludes_E %s
 // RUN: %clang_cl /EP /showIncludes -### -- %s 2>&1 | FileCheck -check-prefix=showIncludes_E %s
+// RUN: %clang_cl /E /EP /showIncludes -### -- %s 2>&1 | FileCheck -check-prefix=showIncludes_E %s
 // showIncludes_E: warning: argument unused during compilation: '--show-includes'
 
+// RUN: %clang_cl /EP /P /showIncludes -### -- %s 2>&1 | FileCheck -check-prefix=showIncludes_E_And_P %s
+// showIncludes_E_And_P-NOT: warning: argument unused during compilation: '--show-includes'
+
 // /source-charset: should warn on everything except UTF-8.
 // RUN: %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s
 // source-charset-utf-16: invalid value 'utf-16'
@@ -251,9 +255,10 @@
 // RUN: %clang_cl /W2 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
 // RUN: %clang_cl /W3 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
 // RUN: %clang_cl /W4 -### -- %s 2>&1 | FileCheck -check-prefix=W4 %s
-// RUN: %clang_cl /Wall -### -- %s 2>&1 | FileCheck -check-prefix=W4 %s
+// RUN: %clang_cl /Wall -### -- %s 2>&1 | FileCheck -check-prefix=Weverything %s
 // W1: -Wall
 // W4: -WCL4
+// Weverything: -Weverything
 
 // RUN: %clang_cl /WX -### -- %s 2>&1 | FileCheck -check-prefix=WX %s
 // WX: -Werror
@@ -288,6 +293,9 @@
 // RUN: %clang_cl /TP /c /GX /GX- -### -- %s 2>&1 | FileCheck -check-prefix=GX_ %s
 // GX_-NOT: "-fcxx-exceptions" "-fexceptions"
 
+// RUN: %clang_cl /d1PP -### -- %s 2>&1 | FileCheck -check-prefix=d1PP %s
+// d1PP: -dD
+
 // We forward any unrecognized -W diagnostic options to cc1.
 // RUN: %clang_cl -Wunused-pragmas -### -- %s 2>&1 | FileCheck -check-prefix=WJoined %s
 // WJoined: "-cc1"
@@ -339,6 +347,7 @@
 // RUN:    /kernel- \
 // RUN:    /nologo \
 // RUN:    /openmp- \
+// RUN:    /permissive- \
 // RUN:    /RTC1 \
 // RUN:    /sdl \
 // RUN:    /sdl- \
@@ -371,6 +380,8 @@
 // (/Zs is for syntax-only)
 // RUN: %clang_cl /Zs \
 // RUN:     /AIfoo \
+// RUN:     /Bt \
+// RUN:     /Bt+ \
 // RUN:     /clr:pure \
 // RUN:     /docname \
 // RUN:     /EHsc \
@@ -570,6 +581,7 @@
 // RUN:     -fstandalone-debug \
 // RUN:     -flimit-debug-info \
 // RUN:     -flto \
+// RUN:     --version \
 // RUN:     -Werror /Zs -- %s 2>&1
 
 
diff --git a/test/Driver/cl-pch-search.cpp b/test/Driver/cl-pch-search.cpp
index ca62668..5c072c3 100644
--- a/test/Driver/cl-pch-search.cpp
+++ b/test/Driver/cl-pch-search.cpp
@@ -2,5 +2,5 @@
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
 // REQUIRES: x86-registered-target
-// Check that pchfile.h next to to pchfile.cc is found correctly.
-// RUN: %clang_cl -Werror --target=x86_64 /Ycpchfile.h /FIpchfile.h /c /Fo%t.obj /Fp%t.pch -- %S/Inputs/pchfile.cpp
+// Check that pchfile.h next to pchfile.cc is found correctly.
+// RUN: %clang_cl -Werror --target=x86_64-windows /Ycpchfile.h /FIpchfile.h /c /Fo%t.obj /Fp%t.pch -- %S/Inputs/pchfile.cpp
diff --git a/test/Driver/cl-pch-showincludes.cpp b/test/Driver/cl-pch-showincludes.cpp
index 7e0e109..75a634b 100644
--- a/test/Driver/cl-pch-showincludes.cpp
+++ b/test/Driver/cl-pch-showincludes.cpp
@@ -8,14 +8,14 @@
 
 // When building the pch, header1.h (included by header2.h), header2.h (the pch
 // input itself) and header3.h (included directly, above) should be printed.
-// RUN: %clang_cl -Werror --target=x86_64 /showIncludes /I%S/Inputs /Ycheader2.h /FIheader2.h /Fp%t.pch /c /Fo%t -- %s \
+// RUN: %clang_cl -Werror --target=x86_64-windows /showIncludes /I%S/Inputs /Ycheader2.h /FIheader2.h /Fp%t.pch /c /Fo%t -- %s \
 // RUN:   | FileCheck --strict-whitespace -check-prefix=CHECK-YC %s
 // CHECK-YC: Note: including file: {{[^ ]*header2.h}}
 // CHECK-YC: Note: including file:  {{[^ ]*header1.h}}
 // CHECK-YC: Note: including file: {{[^ ]*header3.h}}
 
 // When using the pch, only the direct include is printed.
-// RUN: %clang_cl -Werror --target=x86_64 /showIncludes /I%S/Inputs /Yuheader2.h /FIheader2.h /Fp%t.pch /c /Fo%t -- %s \
+// RUN: %clang_cl -Werror --target=x86_64-windows /showIncludes /I%S/Inputs /Yuheader2.h /FIheader2.h /Fp%t.pch /c /Fo%t -- %s \
 // RUN:   | FileCheck --strict-whitespace -check-prefix=CHECK-YU %s
 // CHECK-YU-NOT: Note: including file: {{.*pch}}
 // CHECK-YU-NOT: Note: including file: {{.*header1.h}}
@@ -23,7 +23,7 @@
 // CHECK-YU: Note: including file: {{[^ ]*header3.h}}
 
 // When not using pch at all, all the /FI files are printed.
-// RUN: %clang_cl -Werror --target=x86_64 /showIncludes /I%S/Inputs /FIheader2.h /c /Fo%t -- %s \
+// RUN: %clang_cl -Werror --target=x86_64-windows /showIncludes /I%S/Inputs /FIheader2.h /c /Fo%t -- %s \
 // RUN:   | FileCheck --strict-whitespace -check-prefix=CHECK-FI %s
 // CHECK-FI: Note: including file: {{[^ ]*header2.h}}
 // CHECK-FI: Note: including file:  {{[^ ]*header1.h}}
@@ -32,7 +32,7 @@
 // Also check that /FI arguments before the /Yc / /Yu flags are printed right.
 
 // /FI flags before the /Yc arg should be printed, /FI flags after it shouldn't.
-// RUN: %clang_cl -Werror --target=x86_64 /showIncludes /I%S/Inputs /Ycheader2.h /FIheader0.h /FIheader2.h /FIheader4.h /Fp%t.pch /c /Fo%t -- %s \
+// RUN: %clang_cl -Werror --target=x86_64-windows /showIncludes /I%S/Inputs /Ycheader2.h /FIheader0.h /FIheader2.h /FIheader4.h /Fp%t.pch /c /Fo%t -- %s \
 // RUN:   | FileCheck --strict-whitespace -check-prefix=CHECK-YCFI %s
 // CHECK-YCFI: Note: including file: {{[^ ]*header0.h}}
 // CHECK-YCFI: Note: including file: {{[^ ]*header2.h}}
@@ -40,7 +40,7 @@
 // CHECK-YCFI: Note: including file: {{[^ ]*header4.h}}
 // CHECK-YCFI: Note: including file: {{[^ ]*header3.h}}
 
-// RUN: %clang_cl -Werror --target=x86_64 /showIncludes /I%S/Inputs /Yuheader2.h /FIheader0.h /FIheader2.h /FIheader4.h /Fp%t.pch /c /Fo%t -- %s \
+// RUN: %clang_cl -Werror --target=x86_64-windows /showIncludes /I%S/Inputs /Yuheader2.h /FIheader0.h /FIheader2.h /FIheader4.h /Fp%t.pch /c /Fo%t -- %s \
 // RUN:   | FileCheck --strict-whitespace -check-prefix=CHECK-YUFI %s
 // CHECK-YUFI-NOT: Note: including file: {{.*pch}}
 // CHECK-YUFI-NOT: Note: including file: {{.*header0.h}}
diff --git a/test/Driver/cl-x86-flags.c b/test/Driver/cl-x86-flags.c
index 32cd072..595959b 100644
--- a/test/Driver/cl-x86-flags.c
+++ b/test/Driver/cl-x86-flags.c
@@ -1,88 +1,132 @@
 // REQUIRES: x86-registered-target
 
+// expected-no-diagnostics
+
 // We support -m32 and -m64.  We support all x86 CPU feature flags in gcc's -m
 // flag space.
 // RUN: %clang_cl /Zs /WX -m32 -m64 -msse3 -msse4.1 -mavx -mno-avx \
 // RUN:     --target=i386-pc-win32 -### -- 2>&1 %s | FileCheck -check-prefix=MFLAGS %s
 // MFLAGS-NOT: argument unused during compilation
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=IA32 %s
-// IA32: "-target-cpu" "i386"
-// IA32-NOT: -target-feature
-// IA32-NOT: argument unused during compilation
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_IA32 -- %s
+#if defined(TEST_32_ARCH_IA32)
+#if _M_IX86_FP || __AVX__ || __AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m32 -arch:ia32 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=ia32 %s
+// arch: args are case-sensitive.
+// RUN: %clang_cl -m32 -arch:ia32 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=ia32 %s
 // ia32: argument unused during compilation
-// ia32-NOT: -target-feature
 
-// RUN: %clang_cl -m64 -arch:IA32 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=IA3264 %s
+// RUN: %clang_cl -m64 -arch:IA32 --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=IA3264 %s
 // IA3264: argument unused during compilation
-// IA3264-NOT: -target-feature
 
-// RUN: %clang_cl -m32 -arch:SSE --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=SSE %s
-// SSE: "-target-cpu" "pentium3"
-// SSE: -target-feature
-// SSE: +sse
-// SSE-NOT: argument unused during compilation
+// RUN: %clang_cl -m32 -arch:SSE --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_SSE -- %s
+#if defined(TEST_32_ARCH_SSE)
+#if _M_IX86_FP != 1 || __AVX__ || __AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m32 -arch:sse --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=sse %s
+// RUN: %clang_cl -m32 -arch:sse --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=sse %s
 // sse: argument unused during compilation
-// sse-NOT: -target-feature
 
-// RUN: %clang_cl -m32 -arch:SSE2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=SSE2 %s
-// SSE2: "-target-cpu" "pentium4"
-// SSE2: -target-feature
-// SSE2: +sse2
-// SSE2-NOT: argument unused during compilation
+// RUN: %clang_cl -m32 -arch:SSE2 --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_SSE2 -- %s
+#if defined(TEST_32_ARCH_SSE2)
+#if _M_IX86_FP != 2 || __AVX__ || __AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m32 -arch:sse2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=sse %s
+// RUN: %clang_cl -m32 -arch:sse2 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=sse %s
 // sse2: argument unused during compilation
-// sse2-NOT: -target-feature
 
-// RUN: %clang_cl -m64 -arch:SSE --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=SSE64 %s
+// RUN: %clang_cl -m64 -arch:SSE --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=SSE64 %s
 // SSE64: argument unused during compilation
-// SSE64-NOT: -target-feature
-// SSE64-NOT: pentium3
 
-// RUN: %clang_cl -m64 -arch:SSE2 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=SSE264 %s
+// RUN: %clang_cl -m64 -arch:SSE2 --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=SSE264 %s
 // SSE264: argument unused during compilation
-// SSE264-NOT: -target-feature
 
-// RUN: %clang_cl -m32 -arch:AVX --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=AVX %s
-// AVX: "-target-cpu" "sandybridge"
-// AVX: -target-feature
-// AVX: +avx
+// RUN: %clang_cl -m32 -arch:AVX --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX -- %s
+#if defined(TEST_32_ARCH_AVX)
+#if _M_IX86_FP != 2 || !__AVX__ || __AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m32 -arch:avx --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=avx %s
+// RUN: %clang_cl -m32 -arch:avx --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx %s
 // avx: argument unused during compilation
-// avx-NOT: -target-feature
 
-// RUN: %clang_cl -m32 -arch:AVX2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=AVX2 %s
-// AVX2: "-target-cpu" "haswell"
-// AVX2: -target-feature
-// AVX2: +avx2
+// RUN: %clang_cl -m32 -arch:AVX2 --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX2 -- %s
+#if defined(TEST_32_ARCH_AVX2)
+#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m32 -arch:avx2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=avx2 %s
+// RUN: %clang_cl -m32 -arch:avx2 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx2 %s
 // avx2: argument unused during compilation
-// avx2-NOT: -target-feature
 
-// RUN: %clang_cl -m64 -arch:AVX --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=AVX64 %s
-// AVX64: "-target-cpu" "sandybridge"
-// AVX64: -target-feature
-// AVX64: +avx
+// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512F -- %s
+#if defined(TEST_32_ARCH_AVX512F)
+#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m64 -arch:avx --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=avx64 %s
+// RUN: %clang_cl -m32 -arch:avx512f --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx512f %s
+// avx512f: argument unused during compilation
+
+// RUN: %clang_cl -m32 -arch:AVX512 --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512 -- %s
+#if defined(TEST_32_ARCH_AVX512)
+#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__  || !__AVX512BW__
+#error fail
+#endif
+#endif
+
+// RUN: %clang_cl -m32 -arch:avx512 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx512 %s
+// avx512: argument unused during compilation
+
+// RUN: %clang_cl -m64 -arch:AVX --target=x86_64-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX -- %s
+#if defined(TEST_64_ARCH_AVX)
+#if _M_IX86_FP || !__AVX__ || __AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
+
+// RUN: %clang_cl -m64 -arch:avx --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx64 %s
 // avx64: argument unused during compilation
-// avx64-NOT: -target-feature
 
-// RUN: %clang_cl -m64 -arch:AVX2 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=AVX264 %s
-// AVX264: "-target-cpu" "haswell"
-// AVX264: -target-feature
-// AVX264: +avx2
+// RUN: %clang_cl -m64 -arch:AVX2 --target=x86_64-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX2 -- %s
+#if defined(TEST_64_ARCH_AVX2)
+#if _M_IX86_FP || !__AVX__ || !__AVX2__ || __AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
 
-// RUN: %clang_cl -m64 -arch:avx2 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=avx264 %s
+// RUN: %clang_cl -m64 -arch:avx2 --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx264 %s
 // avx264: argument unused during compilation
-// avx264-NOT: -target-feature
+
+// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512F -- %s
+#if defined(TEST_64_ARCH_AVX512F)
+#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__  || __AVX512BW__
+#error fail
+#endif
+#endif
+
+// RUN: %clang_cl -m64 -arch:avx512f --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx512f64 %s
+// avx512f64: argument unused during compilation
+
+// RUN: %clang_cl -m64 -arch:AVX512 --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512 -- %s
+#if defined(TEST_64_ARCH_AVX512)
+#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__  || !__AVX512BW__
+#error fail
+#endif
+#endif
+
+// RUN: %clang_cl -m64 -arch:avx512 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx51264 %s
+// avx51264: argument unused during compilation
 
 void f() {
 }
diff --git a/test/Driver/clang_f_opts.c b/test/Driver/clang_f_opts.c
index b22f74f..9c90f0e 100644
--- a/test/Driver/clang_f_opts.c
+++ b/test/Driver/clang_f_opts.c
@@ -503,3 +503,22 @@
 // CHECK-WINDOWS-ISO10646: "-fwchar-type=int"
 // CHECK-WINDOWS-ISO10646: "-fsigned-wchar"
 
+// RUN: %clang -### -S -fcf-protection %s 2>&1 | FileCheck -check-prefix=CHECK-CF-PROTECTION-FULL %s
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CF-PROTECTION-FULL %s
+// RUN: %clang -### -S -fcf-protection=full %s 2>&1 | FileCheck -check-prefix=CHECK-CF-PROTECTION-FULL %s
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CF-PROTECTION-FULL %s
+// CHECK-CF-PROTECTION-FULL: -fcf-protection=full
+// CHECK-NO-CF-PROTECTION-FULL-NOT: -fcf-protection=full
+// RUN: %clang -### -S -fcf-protection=return %s 2>&1 | FileCheck -check-prefix=CHECK-CF-PROTECTION-RETURN %s
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CF-PROTECTION-RETURN %s
+// CHECK-CF-PROTECTION-RETURN: -fcf-protection=return
+// CHECK-NO-CF-PROTECTION-RETURN-NOT: -fcf-protection=return
+// RUN: %clang -### -S -fcf-protection=branch %s 2>&1 | FileCheck -check-prefix=CHECK-CF-PROTECTION-BRANCH %s
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CF-PROTECTION-BRANCH %s
+// CHECK-CF-PROTECTION-BRANCH: -fcf-protection=branch
+// CHECK-NO-CF-PROTECTION-BRANCH-NOT: -fcf-protection=branch
+
+// RUN: %clang -### -S -fdiscard-value-names %s 2>&1 | FileCheck -check-prefix=CHECK-DISCARD-NAMES %s
+// RUN: %clang -### -S -fno-discard-value-names %s 2>&1 | FileCheck -check-prefix=CHECK-NO-DISCARD-NAMES %s
+// CHECK-DISCARD-NAMES: "-discard-value-names"
+// CHECK-NO-DISCARD-NAMES-NOT: "-discard-value-names"
diff --git a/test/Driver/codeview-column-info.c b/test/Driver/codeview-column-info.c
new file mode 100644
index 0000000..c5a7dc9
--- /dev/null
+++ b/test/Driver/codeview-column-info.c
@@ -0,0 +1,13 @@
+// Check that -dwarf-column-info does not get added to the cc1 line:
+// 1) When -gcodeview is present via the clang or clang++ driver
+// 2) When /Z7 is present via the cl driver.
+
+// RUN: %clang -### --target=x86_64-windows-msvc -c -g -gcodeview %s 2> %t1
+// RUN: FileCheck < %t1 %s
+// RUN: %clangxx -### --target=x86_64-windows-msvc -c -g -gcodeview %s 2> %t2
+// RUN: FileCheck < %t2 %s
+// RUN: %clang_cl -### --target=x86_64-windows-msvc /c /Z7 -- %s 2> %t2
+// RUN: FileCheck < %t2 %s
+
+// CHECK: "-cc1"
+// CHECK-NOT: "-dwarf-column-info"
diff --git a/test/Driver/config-file-errs.c b/test/Driver/config-file-errs.c
new file mode 100644
index 0000000..8db2ea4
--- /dev/null
+++ b/test/Driver/config-file-errs.c
@@ -0,0 +1,54 @@
+//--- No more than one '--config' may be specified.
+//
+// RUN: not %clang --config 1.cfg --config 2.cfg 2>&1 | FileCheck %s -check-prefix CHECK-DUPLICATE
+// CHECK-DUPLICATE: no more than one option '--config' is allowed
+
+
+//--- '--config' must be followed by config file name.
+//
+// RUN: not %clang --config 2>&1 | FileCheck %s -check-prefix CHECK-MISSING-FILE
+// CHECK-MISSING-FILE: argument to '--config' is missing (expected 1 value)
+
+
+//--- '--config' must not be found in config files.
+//
+// RUN: not %clang --config %S/Inputs/config-6.cfg 2>&1 | FileCheck %s -check-prefix CHECK-NESTED
+// CHECK-NESTED: option '--config' is not allowed inside configuration file
+
+
+//--- Argument of '--config' must be existing file, if it is specified by path.
+//
+// RUN: not %clang --config somewhere/nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NONEXISTENT
+// CHECK-NONEXISTENT: configuration file '{{.*}}somewhere/nonexistent-config-file' does not exist
+
+
+//--- Argument of '--config' must exist somewhere in well-known directories, if it is specified by bare name.
+//
+// RUN: not %clang --config-system-dir= --config-user-dir= --config nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NOTFOUND0
+// CHECK-NOTFOUND0: configuration file 'nonexistent-config-file.cfg' cannot be found
+// CHECK-NOTFOUND0-NEXT: was searched for in the directory:
+// CHECK-NOTFOUND0-NOT: was searched for in the directory:
+//
+// RUN: not %clang --config-system-dir= --config-user-dir=%S/Inputs/config2 --config nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NOTFOUND1
+// CHECK-NOTFOUND1: configuration file 'nonexistent-config-file.cfg' cannot be found
+// CHECK-NOTFOUND1-NEXT: was searched for in the directory: {{.*}}/Inputs/config2
+// CHECK-NOTFOUND1-NEXT: was searched for in the directory:
+// CHECK-NOTFOUND1-NOT: was searched for in the directory:
+//
+// RUN: not %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NOTFOUND2
+// CHECK-NOTFOUND2: configuration file 'nonexistent-config-file.cfg' cannot be found
+// CHECK-NOTFOUND2-NEXT: was searched for in the directory: {{.*}}/Inputs/config
+// CHECK-NOTFOUND2-NEXT: was searched for in the directory:
+// CHECK-NOTFOUND2-NOT: was searched for in the directory:
+//
+// RUN: not %clang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 --config nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NOTFOUND3
+// CHECK-NOTFOUND3: configuration file 'nonexistent-config-file.cfg' cannot be found
+// CHECK-NOTFOUND3-NEXT: was searched for in the directory: {{.*}}/Inputs/config2
+// CHECK-NOTFOUND3-NEXT: was searched for in the directory: {{.*}}/Inputs/config
+// CHECK-NOTFOUND3-NEXT: was searched for in the directory:
+
+
+//--- Argument in config file cannot cross the file boundary
+//
+// RUN: not %clang --config %S/Inputs/config-5.cfg x86_64-unknown-linux-gnu -c %s 2>&1 | FileCheck %s -check-prefix CHECK-CROSS
+// CHECK-CROSS: error: argument to '-target' is missing
diff --git a/test/Driver/config-file.c b/test/Driver/config-file.c
new file mode 100644
index 0000000..f9e6ce6
--- /dev/null
+++ b/test/Driver/config-file.c
@@ -0,0 +1,72 @@
+//--- Config file search directories
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-DIRS
+// CHECK-DIRS: System configuration file directory: {{.*}}/Inputs/config
+// CHECK-DIRS: User configuration file directory: {{.*}}/Inputs/config2
+
+
+//--- Config file (full path) in output of -###
+//
+// RUN: %clang --config %S/Inputs/config-1.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-HHH
+// CHECK-HHH: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+// CHECK-HHH: -Werror
+// CHECK-HHH: -std=c99
+
+
+//--- Config file (full path) in output of -v
+//
+// RUN: %clang --config %S/Inputs/config-1.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-V
+// CHECK-V: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+// CHECK-V: -Werror
+// CHECK-V: -std=c99
+
+
+//--- Config file in output of -###
+//
+// RUN: %clang --config-system-dir=%S/Inputs --config-user-dir= --config config-1.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-HHH2
+// CHECK-HHH2: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+// CHECK-HHH2: -Werror
+// CHECK-HHH2: -std=c99
+
+
+//--- Config file in output of -v
+//
+// RUN: %clang --config-system-dir=%S/Inputs --config-user-dir= --config config-1.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-V2
+// CHECK-V2: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+// CHECK-V2: -Werror
+// CHECK-V2: -std=c99
+
+
+//--- Nested config files
+//
+// RUN: %clang --config-system-dir=%S/Inputs --config-user-dir= --config config-2.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED
+// CHECK-NESTED: Configuration file: {{.*}}Inputs{{.}}config-2.cfg
+// CHECK-NESTED: -Wundefined-func-template
+
+// RUN: %clang --config-system-dir=%S/Inputs --config-user-dir= --config config-2.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED2
+// CHECK-NESTED2: Configuration file: {{.*}}Inputs{{.}}config-2.cfg
+// CHECK-NESTED2: -Wundefined-func-template
+
+
+// RUN: %clang --config %S/Inputs/config-2a.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTEDa
+// CHECK-NESTEDa: Configuration file: {{.*}}Inputs{{.}}config-2a.cfg
+// CHECK-NESTEDa: -isysroot
+// CHECK-NESTEDa-SAME: /opt/data
+
+// RUN: %clang --config-system-dir=%S/Inputs --config-user-dir= --config config-2a.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED2a
+// CHECK-NESTED2a: Configuration file: {{.*}}Inputs{{.}}config-2a.cfg
+// CHECK-NESTED2a: -isysroot
+// CHECK-NESTED2a-SAME: /opt/data
+
+
+//--- Unused options in config file do not produce warnings
+//
+// RUN: %clang --config %S/Inputs/config-4.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-UNUSED
+// CHECK-UNUSED-NOT: argument unused during compilation:
+
+
+//--- User directory is searched first.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 --config config-4 -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-PRECEDENCE
+// CHECK-PRECEDENCE: Configuration file: {{.*}}Inputs{{.}}config2{{.}}config-4.cfg
+// CHECK-PRECEDENCE: -Wall
diff --git a/test/Driver/config-file2.c b/test/Driver/config-file2.c
new file mode 100644
index 0000000..ff19a93
--- /dev/null
+++ b/test/Driver/config-file2.c
@@ -0,0 +1,51 @@
+// REQUIRES: x86-registered-target
+
+//--- Invocation `clang --config x86_64-qqq -m32` loads `i386-qqq.cfg` if the latter exists.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -m32 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD
+// CHECK-RELOAD: Target: i386
+// CHECK-RELOAD: Configuration file: {{.*}}Inputs{{.}}config{{.}}i386-qqq.cfg
+
+
+//--- Invocation `clang --config x86_64-qqq2 -m32` loads `i386.cfg` if the latter exists in another search directory.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 --config x86_64-qqq2 -m32 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1
+// CHECK-RELOAD1: Target: i386
+// CHECK-RELOAD1: Configuration file: {{.*}}Inputs{{.}}config2{{.}}i386.cfg
+
+
+//--- Invocation `clang --config x86_64-qqq2 -m32` loads `x86_64-qqq2.cfg` if `i386-qqq2.cfg` and `i386.cfg` do not exist.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq2 -m32 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD2
+// note: target is overriden due to -m32
+// CHECK-RELOAD2: Target: i386
+// CHECK-RELOAD2: Configuration file: {{.*}}Inputs{{.}}config{{.}}x86_64-qqq2.cfg
+
+
+//--- Invocation `clang --config i386-qqq3 -m64` loads `x86_64.cfg` if `x86_64-qqq3.cfg` does not exist.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq3 -m64 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD3
+// CHECK-RELOAD3: Target: x86_64
+// CHECK-RELOAD3: Configuration file: {{.*}}Inputs{{.}}config{{.}}x86_64.cfg
+
+
+//--- Invocation `clang --config x86_64-qqq -target i386` loads `i386-qqq.cfg` if the latter exists.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -target i386 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD4
+// CHECK-RELOAD4: Target: i386
+// CHECK-RELOAD4: Configuration file: {{.*}}Inputs{{.}}config{{.}}i386-qqq.cfg
+
+
+//--- Invocation `clang --config x86_64-qqq2 -target i386` loads `x86_64-qqq2.cfg` if `i386-qqq2.cfg` and `i386.cfg` do not exist.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq2 -target i386 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD5
+// note: target is overriden due to -target i386
+// CHECK-RELOAD5: Target: i386
+// CHECK-RELOAD5: Configuration file: {{.*}}Inputs{{.}}config{{.}}x86_64-qqq2.cfg
+
+
+//--- Invocation `clang --config x86_64-qqq -target i386 -m64` loads `x86_64-qqq.cfg`.
+//
+// RUN: %clang --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -target i386 -m64 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD6
+// CHECK-RELOAD6: Target: x86_64
+// CHECK-RELOAD6: Configuration file: {{.*}}Inputs{{.}}config{{.}}x86_64-qqq.cfg
diff --git a/test/Driver/config-file3.c b/test/Driver/config-file3.c
new file mode 100644
index 0000000..c1b523c
--- /dev/null
+++ b/test/Driver/config-file3.c
@@ -0,0 +1,98 @@
+// REQUIRES: shell
+// REQUIRES: x86-registered-target
+
+//--- If config file is specified by relative path (workdir/cfg-s2), it is searched for by that path.
+//
+// RUN: mkdir -p %T/workdir
+// RUN: echo "@subdir/cfg-s2" > %T/workdir/cfg-1
+// RUN: mkdir -p %T/workdir/subdir
+// RUN: echo "-Wundefined-var-template" > %T/workdir/subdir/cfg-s2
+//
+// RUN: ( cd %T && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL )
+//
+// CHECK-REL: Configuration file: {{.*}}/workdir/cfg-1
+// CHECK-REL: -Wundefined-var-template
+
+
+//--- Invocation qqq-clang-g++ tries to find config file qqq-clang-g++.cfg first.
+//
+// RUN: mkdir -p %T/testdmode
+// RUN: [ ! -s %T/testdmode/qqq-clang-g++ ] || rm %T/testdmode/qqq-clang-g++
+// RUN: ln -s %clang %T/testdmode/qqq-clang-g++
+// RUN: echo "-Wundefined-func-template" > %T/testdmode/qqq-clang-g++.cfg
+// RUN: echo "-Werror" > %T/testdmode/qqq.cfg
+// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME
+//
+// FULL-NAME: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg
+// FULL-NAME: -Wundefined-func-template
+// FULL-NAME-NOT: -Werror
+//
+//--- File specified by --config overrides config inferred from clang executable.
+//
+// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT
+//
+// CHECK-EXPLICIT: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg
+//
+//--- Invocation qqq-clang-g++ tries to find config file qqq.cfg if qqq-clang-g++.cfg is not found.
+//
+// RUN: rm %T/testdmode/qqq-clang-g++.cfg
+// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME
+//
+// SHORT-NAME: Configuration file: {{.*}}/testdmode/qqq.cfg
+// SHORT-NAME: -Werror
+// SHORT-NAME-NOT: -Wundefined-func-template
+
+
+//--- Config files are searched for in binary directory as well.
+//
+// RUN: mkdir -p %T/testbin
+// RUN: [ ! -s %T/testbin/clang ] || rm %T/testbin/clang
+// RUN: ln -s %clang %T/testbin/clang
+// RUN: echo "-Werror" > %T/testbin/aaa.cfg
+// RUN: %T/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN
+//
+// CHECK-BIN: Configuration file: {{.*}}/testbin/aaa.cfg
+// CHECK-BIN: -Werror
+
+
+//--- If command line contains options that change triple (for instance, -m32), clang tries
+//    reloading config file.
+
+//--- When reloading config file, x86_64-clang-g++ tries to find config i386-clang-g++.cfg first.
+//
+// RUN: mkdir -p %T/testreload
+// RUN: [ ! -s %T/testreload/x86_64-clang-g++ ] || rm %T/testreload/x86_64-clang-g++
+// RUN: ln -s %clang %T/testreload/x86_64-clang-g++
+// RUN: echo "-Wundefined-func-template" > %T/testreload/i386-clang-g++.cfg
+// RUN: echo "-Werror" > %T/testreload/i386.cfg
+// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD
+//
+// CHECK-RELOAD: Configuration file: {{.*}}/testreload/i386-clang-g++.cfg
+// CHECK-RELOAD: -Wundefined-func-template
+// CHECK-RELOAD-NOT: -Werror
+
+//--- If config file is specified by --config and its name does not start with architecture, it is used without reloading.
+//
+// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a
+//
+// CHECK-RELOAD1a: Configuration file: {{.*}}/Inputs/config-3.cfg
+//
+// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b
+//
+// CHECK-RELOAD1b: Configuration file: {{.*}}/Inputs/config-3.cfg
+
+//--- If config file is specified by --config and its name starts with architecture, it is reloaded.
+//
+// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c
+//
+// CHECK-RELOAD1c: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg
+
+//--- x86_64-clang-g++ tries to find config i386.cfg if i386-clang-g++.cfg is not found.
+//
+// RUN: rm %T/testreload/i386-clang-g++.cfg
+// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d
+//
+// CHECK-RELOAD1d: Configuration file: {{.*}}/testreload/i386.cfg
+// CHECK-RELOAD1d: -Werror
+// CHECK-RELOAD1d-NOT: -Wundefined-func-template
+
diff --git a/test/Driver/constructors.c b/test/Driver/constructors.c
index 39a199a..884fbe8 100644
--- a/test/Driver/constructors.c
+++ b/test/Driver/constructors.c
@@ -6,6 +6,12 @@
 //
 // RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
 // RUN:     -target i386-unknown-linux \
+// RUN:     --sysroot=%S/Inputs/resource_dir \
+// RUN:     --gcc-toolchain="" \
+// RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
+//
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1       \
+// RUN:     -target i386-unknown-linux \
 // RUN:     --sysroot=%S/Inputs/fake_install_tree \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=CHECK-INIT-ARRAY %s
diff --git a/test/Driver/coverage.c b/test/Driver/coverage.c
new file mode 100644
index 0000000..f6bfbfb
--- /dev/null
+++ b/test/Driver/coverage.c
@@ -0,0 +1,8 @@
+// Test coverage flag.
+// REQUIRES: system-windows
+//
+// RUN: %clang_cl -Wno-msvc-not-found -### -coverage %s -o foo/bar.o 2>&1 | FileCheck -check-prefix=CLANG-CL-COVERAGE %s
+// CLANG-CL-COVERAGE-NOT: error:
+// CLANG-CL-COVERAGE-NOT: warning:
+// CLANG-CL-COVERAGE-NOT: argument unused
+// CLANG-CL-COVERAGE-NOT: unknown argument
diff --git a/test/Driver/cuda-bad-arch.cu b/test/Driver/cuda-bad-arch.cu
index cbc2d11..a6559b0 100644
--- a/test/Driver/cuda-bad-arch.cu
+++ b/test/Driver/cuda-bad-arch.cu
@@ -12,6 +12,12 @@
 
 // BAD: error: Unsupported CUDA gpu architecture
 
+// RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_21 \
+// RUN:   --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefix BAD_CUDA9 %s
+
+// BAD_CUDA9: GPU arch sm_21 is supported by CUDA versions between 7.0 and 8.0
+
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_20 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
diff --git a/test/Driver/cuda-bail-out.cu b/test/Driver/cuda-bail-out.cu
new file mode 100644
index 0000000..aaf7714
--- /dev/null
+++ b/test/Driver/cuda-bail-out.cu
@@ -0,0 +1,54 @@
+// Test clang driver bails out after one error during CUDA compilation.
+
+// REQUIRES: clang-driver
+// REQUIRES: powerpc-registered-target
+// REQUIRES: nvptx-registered-target
+
+#ifdef FORCE_ERROR
+#error compilation failed
+#endif
+
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DFORCE_ERROR %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DFORCE_ERROR --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_60 \
+// RUN:   %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DFORCE_ERROR --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_60 \
+// RUN:   --cuda-device-only %s 2>&1 | FileCheck %s
+
+#if defined(ERROR_HOST) && !defined(__CUDA_ARCH__)
+#error compilation failed
+#endif
+
+#if defined(ERROR_SM35) && (__CUDA_ARCH__ == 350)
+#error compilation failed
+#endif
+
+#if defined(ERROR_SM60) && (__CUDA_ARCH__ == 600)
+#error compilation failed
+#endif
+
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_HOST --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_60 \
+// RUN:   %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_SM35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_60 \
+// RUN:   --cuda-device-only %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_SM60 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_60 \
+// RUN:   --cuda-device-only %s 2>&1 | FileCheck %s
+
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_HOST -DERROR_SM35 --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-gpu-arch=sm_60 %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_HOST -DERROR_SM60 --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-gpu-arch=sm_60 %s 2>&1 | FileCheck %s
+// RUN: not %clang -target powerpc64le-ibm-linux-gnu -fsyntax-only -nocudalib \
+// RUN:   -nocudainc -DERROR_SM35 -DERROR_SM60 --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-gpu-arch=sm_60 %s 2>&1 | FileCheck %s
+
+
+// CHECK: error: compilation failed
+// CHECK-NOT: error: compilation failed
diff --git a/test/Driver/cuda-detect-path.cu b/test/Driver/cuda-detect-path.cu
new file mode 100644
index 0000000..61b9400
--- /dev/null
+++ b/test/Driver/cuda-detect-path.cu
@@ -0,0 +1,83 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// This tests uses the PATH environment variable.
+// REQUIRES: !system-windows
+
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s
+
+
+// Check that we follow ptxas binaries that are symlinks.
+// RUN: env PATH=%S/Inputs/CUDA-symlinks/usr/bin \
+// RUN:    %clang -v --target=i386-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix SYMLINKS
+// RUN: env PATH=%S/Inputs/CUDA-symlinks/usr/bin \
+// RUN:    %clang -v --target=i386-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix SYMLINKS
+// RUN: env PATH=%S/Inputs/CUDA-symlinks/usr/bin \
+// RUN:    %clang -v --target=x86_64-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix SYMLINKS
+// RUN: env PATH=%S/Inputs/CUDA-symlinks/usr/bin \
+// RUN:    %clang -v --target=x86_64-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix SYMLINKS
+
+
+// We only take a CUDA installation from PATH if it contains libdevice.
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-unknown-linux --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-apple-macosx --sysroot=%S/no-cuda-there \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+
+// We even require libdevice if -nocudalib is passed to avoid false positives
+// if the distribution merges CUDA into /usr and ptxas ends up /usr/bin.
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-unknown-linux --sysroot=%S/no-cuda-there -nocudalib \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-apple-macosx --sysroot=%S/no-cuda-there -nocudalib \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-unknown-linux --sysroot=%S/no-cuda-there -nocudalib \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA-nolibdevice/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-apple-macosx --sysroot=%S/no-cuda-there -nocudalib \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+
+
+// Check that the CUDA installation in PATH is not taken when passing
+// the option --cuda-path-ignore-env.
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-unknown-linux --sysroot=%S/no-cuda-there --cuda-path-ignore-env \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=i386-apple-macosx --sysroot=%S/no-cuda-there --cuda-path-ignore-env \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-unknown-linux --sysroot=%S/no-cuda-there --cuda-path-ignore-env \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+// RUN: env PATH=%S/Inputs/CUDA/usr/local/cuda/bin \
+// RUN:    %clang -v --target=x86_64-apple-macosx --sysroot=%S/no-cuda-there --cuda-path-ignore-env \
+// RUN:    2>&1 | FileCheck %s --check-prefix NOCUDA
+
+// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
+// SYMLINKS: Found CUDA installation: {{.*}}/Inputs/CUDA-symlinks/opt/cuda
+// NOCUDA-NOT: Found CUDA installation:
diff --git a/test/Driver/cuda-detect.cu b/test/Driver/cuda-detect.cu
index 1db0cfb..f086ba4 100644
--- a/test/Driver/cuda-detect.cu
+++ b/test/Driver/cuda-detect.cu
@@ -4,9 +4,14 @@
 //
 // Check that we properly detect CUDA installation.
 // RUN: %clang -v --target=i386-unknown-linux \
-// RUN:   --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN:   --sysroot=%S/no-cuda-there --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
 // RUN: %clang -v --target=i386-apple-macosx \
-// RUN:   --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN:   --sysroot=%S/no-cuda-there --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN: %clang -v --target=x86_64-unknown-linux \
+// RUN:   --sysroot=%S/no-cuda-there --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN: %clang -v --target=x86_64-apple-macosx \
+// RUN:   --sysroot=%S/no-cuda-there --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
+
 
 // RUN: %clang -v --target=i386-unknown-linux \
 // RUN:   --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
@@ -20,15 +25,23 @@
 
 // Check that we don't find a CUDA installation without libdevice ...
 // RUN: %clang -v --target=i386-unknown-linux \
-// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
 // RUN: %clang -v --target=i386-apple-macosx \
-// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN: %clang -v --target=x86_64-unknown-linux \
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN: %clang -v --target=x84_64-apple-macosx \
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NOCUDA
 
 // ... unless the user doesn't need libdevice
 // RUN: %clang -v --target=i386-unknown-linux -nocudalib \
-// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
 // RUN: %clang -v --target=i386-apple-macosx -nocudalib \
-// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
+// RUN: %clang -v --target=x86_64-unknown-linux -nocudalib \
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
+// RUN: %clang -v --target=x86_64-apple-macosx -nocudalib \
+// RUN:   --sysroot=%S/Inputs/CUDA-nolibdevice --cuda-path-ignore-env 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE
 
 
 // Make sure we map libdevice bitcode files to proper GPUs. These
diff --git a/test/Driver/cuda-external-tools.cu b/test/Driver/cuda-external-tools.cu
index 63b74fc..c15cc05 100644
--- a/test/Driver/cuda-external-tools.cu
+++ b/test/Driver/cuda-external-tools.cu
@@ -7,112 +7,141 @@
 
 // Regular compiles with -O{0,1,2,3,4,fast}.  -O4 and -Ofast map to ptxas O3.
 // RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
 // RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT1 %s
 // RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
 // RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
 // RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
 // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
+// Generating relocatable device code
+// RUN: %clang -### -target x86_64-linux-gnu -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
 
 // With debugging enabled, ptxas should be run with with no ptxas optimizations.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s
 
 // --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
 // RUN:   --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
 
 // Regular compile without -O.  This should result in us passing -O0 to ptxas.
 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
 
 // Regular compiles with -Os and -Oz.  For lack of a better option, we map
 // these to ptxas -O3.
 // RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
 // RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
 
 // Regular compile targeting sm_35.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
+// Separate compilation targeting sm_35.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // 32-bit compile.
-// RUN: %clang -### -target x86_32-linux-gnu -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
+// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
+// 32-bit compile when generating relocatable device code.
+// RUN: %clang -### -target i386-linux-gnu -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
 
 // Compile with -fintegrated-as.  This should still cause us to invoke ptxas.
 // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
+// Check that we still pass -c when generating relocatable device code.
+// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
 
 // Check -Xcuda-ptxas and -Xcuda-fatbinary
 // RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
 // RUN:   -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
-// RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
-// RUN:   -check-prefix FATBINARY-EXTRA %s
+// RUN: | FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s
 
 // MacOS spot-checks
 // RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
 // RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
-// RUN: %clang -### -target x86_32-apple-macosx -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
+// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
+
+// Check relocatable device code generation on MacOS.
+// RUN: %clang -### -target x86_64-apple-macosx -O0 -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
+// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
+// RUN: %clang -### -target i386-apple-macosx -fcuda-rdc -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
 
 // Check that CLANG forwards the -v flag to PTXAS.
 // RUN:   %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
 
 // Match clang job that produces PTX assembly.
-// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// SM20: "-target-cpu" "sm_20"
-// SM35: "-target-cpu" "sm_35"
-// SM20: "-o" "[[PTXFILE:[^"]*]]"
-// SM35: "-o" "[[PTXFILE:[^"]*]]"
+// CHECK: "-cc1"
+// ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda"
+// ARCH32-SAME: "-triple" "nvptx-nvidia-cuda"
+// SM20-SAME: "-target-cpu" "sm_20"
+// SM35-SAME: "-target-cpu" "sm_35"
+// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]"
+// SM35-SAME: "-o" "[[PTXFILE:[^"]*]]"
+// RDC-SAME: "-fcuda-rdc"
+// CHECK-NOT: "-fcuda-rdc"
 
 // Match the call to ptxas (which assembles PTX to SASS).
-// CHECK:ptxas
-// ARCH64: "-m64"
-// ARCH32: "-m32"
-// OPT0: "-O0"
+// CHECK: ptxas
+// ARCH64-SAME: "-m64"
+// ARCH32-SAME: "-m32"
+// OPT0-SAME: "-O0"
 // OPT0-NOT: "-g"
-// OPT1: "-O1"
+// OPT1-SAME: "-O1"
 // OPT1-NOT: "-g"
-// OPT2: "-O2"
+// OPT2-SAME: "-O2"
 // OPT2-NOT: "-g"
-// OPT3: "-O3"
+// OPT3-SAME: "-O3"
 // OPT3-NOT: "-g"
-// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
-// SM20: "--gpu-name" "sm_20"
-// SM35: "--gpu-name" "sm_35"
-// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
-// SM35: "--output-file" "[[CUBINFILE:[^"]*]]"
-// PTXAS-EXTRA: "-foo1"
-// PTXAS-EXTRA-SAME: "-foo2"
+// DBG-SAME: "-g" "--dont-merge-basicblocks" "--return-at-end"
+// SM20-SAME: "--gpu-name" "sm_20"
+// SM35-SAME: "--gpu-name" "sm_35"
+// SM20-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
+// SM35-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
 // CHECK-SAME: "[[PTXFILE]]"
+// PTXAS-EXTRA-SAME: "-foo1"
+// PTXAS-EXTRA-SAME: "-foo2"
+// RDC-SAME: "-c"
+// CHECK-NOT: "-c"
 
 // Match the call to fatbinary (which combines all our PTX and SASS into one
 // blob).
-// CHECK:fatbinary
-// CHECK-DAG: "--cuda"
-// ARCH64-DAG: "-64"
-// ARCH32-DAG: "-32"
+// CHECK: fatbinary
+// CHECK-SAME-DAG: "--cuda"
+// ARCH64-SAME-DAG: "-64"
+// ARCH32-SAME-DAG: "-32"
 // CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
-// SM20-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
-// SM35-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
-// SM20-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
-// SM35-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
-// FATBINARY-EXTRA: "-bar1"
+// SM20-SAME-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
+// SM35-SAME-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
+// SM20-SAME-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
+// SM35-SAME-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
+// FATBINARY-EXTRA-SAME: "-bar1"
 // FATBINARY-EXTRA-SAME: "-bar2"
 
 // Match the clang job for host compilation.
-// CHECK: "-cc1" "-triple" "x86_64--linux-gnu"
+// CHECK: "-cc1"
+// ARCH64-SAME: "-triple" "x86_64-
+// ARCH32-SAME: "-triple" "i386-
 // CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
+// RDC-SAME: "-fcuda-rdc"
+// CHECK-NOT: "-fcuda-rdc"
 
-// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"
\ No newline at end of file
+// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"
diff --git a/test/Driver/cuda-no-pgo-or-coverage.cu b/test/Driver/cuda-no-pgo-or-coverage.cu
new file mode 100644
index 0000000..a2cb610
--- /dev/null
+++ b/test/Driver/cuda-no-pgo-or-coverage.cu
@@ -0,0 +1,34 @@
+// Check that profiling/coverage arguments doen't get passed down to device-side
+// compilation.
+//
+// REQUIRES: clang-driver
+//
+// XRUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
+// XRUN:   -fprofile-generate %s 2>&1 | \
+// XRUN: FileCheck --check-prefixes=CHECK,PROF %s
+//
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
+// RUN:   -fprofile-instr-generate %s 2>&1 | \
+// RUN: FileCheck --check-prefixes=CHECK,PROF %s
+//
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
+// RUN:   -coverage %s 2>&1 | \
+// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
+//
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
+// RUN:   -ftest-coverage %s 2>&1 | \
+// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
+//
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20   \
+// RUN:   -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \
+// RUN: FileCheck --check-prefixes=CHECK,PROF,GCOV %s
+//
+//
+// CHECK-NOT: error: unsupported option '-fprofile
+// CHECK-NOT: error: invalid argument
+// CHECK-DAG: "-fcuda-is-device"
+// CHECK-NOT: "-f{{[^"]*coverage.*}}"
+// CHECK-NOT: "-fprofile{{[^"]*}}"
+// CHECK: "-triple" "x86_64--linux-gnu"
+// PROF-DAG: "-fprofile{{.*}}"
+// GCOV-DAG: "-f{{(coverage|emit-coverage).*}}"
diff --git a/test/Driver/cuda-not-found.cu b/test/Driver/cuda-not-found.cu
index b63623a..48cf19a 100644
--- a/test/Driver/cuda-not-found.cu
+++ b/test/Driver/cuda-not-found.cu
@@ -3,10 +3,10 @@
 // Check that we raise an error if we're trying to compile CUDA code but can't
 // find a CUDA install, unless -nocudainc was passed.
 
-// RUN: %clang -### --sysroot=%s/no-cuda-there %s 2>&1 | FileCheck %s --check-prefix ERR
+// RUN: %clang -### --sysroot=%s/no-cuda-there --cuda-path-ignore-env %s 2>&1 | FileCheck %s --check-prefix ERR
 // RUN: %clang -### --cuda-path=%s/no-cuda-there %s 2>&1 | FileCheck %s --check-prefix ERR
 // ERR: cannot find CUDA installation
 
-// RUN: %clang -### -nocudainc --sysroot=%s/no-cuda-there %s 2>&1 | FileCheck %s --check-prefix OK
+// RUN: %clang -### -nocudainc --sysroot=%s/no-cuda-there --cuda-path-ignore-env %s 2>&1 | FileCheck %s --check-prefix OK
 // RUN: %clang -### -nocudainc --cuda-path=%s/no-cuda-there %s 2>&1 | FileCheck %s --check-prefix OK
 // OK-NOT: cannot find CUDA installation
diff --git a/test/Driver/cuda-options.cu b/test/Driver/cuda-options.cu
index c4bfda9..67d4fde 100644
--- a/test/Driver/cuda-options.cu
+++ b/test/Driver/cuda-options.cu
@@ -73,11 +73,10 @@
 // and that all results are included on the host side.
 // RUN: %clang -### -target x86_64-linux-gnu \
 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN:    -check-prefix DEVICE2 -check-prefix DEVICE-SM30 \
-// RUN:    -check-prefix DEVICE2-SM35 -check-prefix HOST \
-// RUN:    -check-prefix HOST-NOSAVE -check-prefix INCLUDES-DEVICE \
-// RUN:    -check-prefix NOLINK %s
+// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
+// RUN:             -check-prefixes DEVICE-SM30,DEVICE2-SM35 \
+// RUN:             -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
+// RUN:             -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
 
 // Verify that device-side results are passed to the correct tool when
 // -save-temps is used.
@@ -182,9 +181,15 @@
 // DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // DEVICE2-SAME: "-fcuda-is-device"
 // DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
-// DEVICE2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
+// DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
 // DEVICE2-SAME: "-x" "cuda"
 
+// Match another call to ptxas.
+// DEVICE2: ptxas
+// DEVICE2-SM35-DAG: "--gpu-name" "sm_35"
+// DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
+// DEVICE2-DAG: "[[PTXFILE2]]"
+
 // Match no device-side compilation.
 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // NODEVICE-NOT: "-fcuda-is-device"
@@ -193,6 +198,8 @@
 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
+// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
+// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
 
 // Match host-side preprocessor job with -save-temps.
 // HOST-SAVE: "-cc1" "-triple" "x86_64--linux-gnu"
@@ -207,7 +214,11 @@
 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 // HOST-NOSAVE-SAME: "-x" "cuda"
 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
+// There is only one GPU binary after combining it with fatbinary!
+// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
+// There is only one GPU binary after combining it with fatbinary.
+// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 
 // Match external assembler that uses compilation output.
 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
diff --git a/test/Driver/cuda-version-check.cu b/test/Driver/cuda-version-check.cu
index 46ca72f..2fdd9c4 100644
--- a/test/Driver/cuda-version-check.cu
+++ b/test/Driver/cuda-version-check.cu
@@ -2,40 +2,40 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // This should only complain about sm_60, not sm_35.
 // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \
-// RUN:    --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35
 
 // We should get two errors here, one for sm_60 and one for sm_61.
 // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \
-// RUN:    --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61
 
 // We should still get an error if we pass -nocudainc, because this compilation
 // would invoke ptxas, and we do a version check on that, too.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // If with -nocudainc and -E, we don't touch the CUDA install, so we
 // shouldn't get an error.
 // RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \
-// RUN:    --sysroot=%S/Inputs/CUDA 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // --no-cuda-version-check should suppress all of these errors.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \
 // RUN:    --no-cuda-version-check %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
@@ -43,9 +43,9 @@
 // therefore we should not get an error in host-only mode. We use the -S here
 // to avoid the error being produced in case by the assembler tool, which does
 // the same check.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // OK-NOT: error: GPU arch
diff --git a/test/Driver/darwin-ld.c b/test/Driver/darwin-ld.c
index 3ddba02..9275d0f 100644
--- a/test/Driver/darwin-ld.c
+++ b/test/Driver/darwin-ld.c
@@ -342,6 +342,10 @@
 // RUN: FileCheck -check-prefix=PASS_REMARKS_WITH_HOTNESS %s < %t.log
 // PASS_REMARKS_WITH_HOTNESS: "-mllvm" "-lto-pass-remarks-output" "-mllvm" "foo/bar.out.opt.yaml" "-mllvm" "-lto-pass-remarks-with-hotness"
 
+// RUN: %clang -target x86_64-apple-darwin12 %t.o -fsave-optimization-record -fprofile-instr-use=blah -fdiagnostics-hotness-threshold=100 -### -o foo/bar.out 2> %t.log
+// RUN: FileCheck -check-prefix=PASS_REMARKS_WITH_HOTNESS_THRESHOLD %s < %t.log
+// PASS_REMARKS_WITH_HOTNESS_THRESHOLD: "-mllvm" "-lto-pass-remarks-output" "-mllvm" "foo/bar.out.opt.yaml" "-mllvm" "-lto-pass-remarks-with-hotness" "-mllvm" "-lto-pass-remarks-hotness-threshold=100"
+
 // RUN: %clang -target x86_64-apple-ios6.0 -miphoneos-version-min=6.0 -fprofile-instr-generate -### %t.o 2> %t.log
 // RUN: FileCheck -check-prefix=LINK_PROFILE_FIRST %s < %t.log
 // RUN: %clang -target x86_64-apple-darwin12 -fprofile-instr-generate -### %t.o 2> %t.log
diff --git a/test/Driver/darwin-sdkroot.c b/test/Driver/darwin-sdkroot.c
index 4f1fca2..8b3782e 100644
--- a/test/Driver/darwin-sdkroot.c
+++ b/test/Driver/darwin-sdkroot.c
@@ -60,7 +60,7 @@
 //
 // CHECK-SIMULATOR: clang
 // CHECK-SIMULATOR: "-cc1"
-// CHECK-SIMULATOR: "-triple" "x86_64-apple-ios8.0.0"
+// CHECK-SIMULATOR: "-triple" "x86_64-apple-ios8.0.0-simulator"
 // CHECK-SIMULATOR: ld
 // CHECK-SIMULATOR: "-ios_simulator_version_min" "8.0.0"
 //
diff --git a/test/Driver/darwin-simulator-macro.c b/test/Driver/darwin-simulator-macro.c
index 6971e93..cf17299 100644
--- a/test/Driver/darwin-simulator-macro.c
+++ b/test/Driver/darwin-simulator-macro.c
@@ -4,9 +4,8 @@
 // RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mappletvsimulator-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
 // RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mwatchos-simulator-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
 // RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mwatchsimulator-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
-// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mios-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=DEV %s
-// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mtvos-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=DEV %s
-// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mwatchos-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=DEV %s
+// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mios-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
+// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mtvos-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
+// RUN: %clang -target x86_64-apple-darwin10 -arch x86_64 -mwatchos-version-min=6.0.0 -E -dM %s | FileCheck -check-prefix=SIM %s
 
 // SIM: #define __APPLE_EMBEDDED_SIMULATOR__ 1
-// DEV-NOT: __APPLE_EMBEDDED_SIMULATOR__
diff --git a/test/Driver/darwin-version.c b/test/Driver/darwin-version.c
index 3ff49ac..411f268 100644
--- a/test/Driver/darwin-version.c
+++ b/test/Driver/darwin-version.c
@@ -12,11 +12,15 @@
 // CHECK-VERSION-IOS3: "armv6k-apple-ios3.0.0"
 
 // RUN: env IPHONEOS_DEPLOYMENT_TARGET=11.0 \
-// RUN:   %clang -target armv7-apple-ios9.0 -c -### %s 2> %t.err
+// RUN:   %clang -target armv7-apple-darwin -c -### %s 2> %t.err
 // RUN:   FileCheck --input-file=%t.err --check-prefix=CHECK-VERSION-IOS4 %s
 // CHECK-VERSION-IOS4: invalid iOS deployment version 'IPHONEOS_DEPLOYMENT_TARGET=11.0'
 
-// RUN: %clang -target armv7-apple-ios9.0 -miphoneos-version-min=11.0 -c -### %s 2> %t.err
+// RUN: %clang -target armv7-apple-ios11.0 -c -### %s 2> %t.err
+// RUN: FileCheck --input-file=%t.err --check-prefix=CHECK-VERSION-IOS41 %s
+// CHECK-VERSION-IOS41: invalid iOS deployment version '--target=armv7-apple-ios11.0'
+
+// RUN: %clang -target armv7-apple-darwin -miphoneos-version-min=11.0 -c -### %s 2> %t.err
 // RUN: FileCheck --input-file=%t.err --check-prefix=CHECK-VERSION-IOS5 %s
 // CHECK-VERSION-IOS5: invalid iOS deployment version '-miphoneos-version-min=11.0'
 
@@ -25,13 +29,14 @@
 // CHECK-VERSION-IOS6: invalid iOS deployment version '-mios-simulator-version-min=11.0'
 
 // RUN: %clang -target armv7-apple-ios11.1 -c -### %s 2>&1 | \
-// RUN: FileCheck --check-prefix=CHECK-VERSION-IOS7 %s
-// RUN: %clang -target armv7-apple-ios9 -Wno-missing-sysroot -isysroot SDKs/iPhoneOS11.0.sdk -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=CHECK-VERSION-IOS71 %s
+// CHECK-VERSION-IOS71: invalid iOS deployment version
+// RUN: %clang -target armv7-apple-darwin -Wno-missing-sysroot -isysroot SDKs/iPhoneOS11.0.sdk -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-IOS7 %s
 // CHECK-VERSION-IOS7: thumbv7-apple-ios10.99.99
 
 // RUN: env IPHONEOS_DEPLOYMENT_TARGET=11.0 \
-// RUN:   %clang -target arm64-apple-ios11.0 -c -### %s 2>&1 | \
+// RUN:   %clang -target arm64-apple-darwin -c -### %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-IOS8 %s
 // CHECK-VERSION-IOS8: arm64-apple-ios11.0.0
 
@@ -41,7 +46,7 @@
 
 // RUN: %clang -target x86_64-apple-darwin -mios-simulator-version-min=11.0 -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-IOS10 %s
-// CHECK-VERSION-IOS10: x86_64-apple-ios11.0.0
+// CHECK-VERSION-IOS10: x86_64-apple-ios11.0.0-simulator
 
 // RUN: %clang -target arm64-apple-ios11.1 -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-IOS11 %s
@@ -49,7 +54,7 @@
 
 // RUN: %clang -target armv7-apple-ios9.0 -miphoneos-version-min=11.0 -c -Wno-invalid-ios-deployment-target -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-IOS12 %s
-// CHECK-VERSION-IOS12: thumbv7-apple-ios11.0.0
+// CHECK-VERSION-IOS12: thumbv7-apple-ios9.0.0
 
 // RUN: %clang -target i686-apple-darwin8 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX4 %s
@@ -68,14 +73,14 @@
 // CHECK-VERSION-OSX6: "i386-apple-macosx10.6.0"
 // RUN: %clang -target x86_64-apple-darwin14 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX10 %s
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.10 -c %s -### 2>&1 | \
+// RUN: %clang -target x86_64-apple-darwin -mmacosx-version-min=10.10 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX10 %s
-// RUN: %clang -target x86_64-apple-macosx -mmacos-version-min=10.10 -c %s -### 2>&1 | \
+// RUN: %clang -target x86_64-apple-darwin -mmacos-version-min=10.10 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX10 %s
 // CHECK-VERSION-OSX10: "x86_64-apple-macosx10.10.0"
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min= -c %s -### 2>&1 | \
+// RUN: %clang -target x86_64-apple-darwin -mmacosx-version-min= -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-MISSING %s
-// RUN: %clang -target x86_64-apple-macosx -mmacos-version-min= -c %s -### 2>&1 | \
+// RUN: %clang -target x86_64-apple-darwin -mmacos-version-min= -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-MISSING %s
 // CHECK-VERSION-MISSING: invalid version number
 // RUN: %clang -target armv7k-apple-darwin -mwatchos-version-min=2.0 -c %s -### 2>&1 | \
@@ -85,13 +90,13 @@
 // CHECK-VERSION-TVOS83: "thumbv7-apple-tvos8.3.0"
 // RUN: %clang -target i386-apple-darwin -mtvos-simulator-version-min=8.3 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-TVSIM83 %s
-// CHECK-VERSION-TVSIM83: "i386-apple-tvos8.3.0"
+// CHECK-VERSION-TVSIM83: "i386-apple-tvos8.3.0-simulator"
 // RUN: %clang -target armv7k-apple-darwin -mwatchos-version-min=2.0 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOS20 %s
 // CHECK-VERSION-WATCHOS20: "thumbv7k-apple-watchos2.0.0"
 // RUN: %clang -target i386-apple-darwin -mwatchos-simulator-version-min=2.0 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHSIM20 %s
-// CHECK-VERSION-WATCHSIM20: "i386-apple-watchos2.0.0"
+// CHECK-VERSION-WATCHSIM20: "i386-apple-watchos2.0.0-simulator"
 
 // Check environment variable gets interpreted correctly
 // RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 IPHONEOS_DEPLOYMENT_TARGET=2.0 \
@@ -117,7 +122,7 @@
 // RUN: env TVOS_DEPLOYMENT_TARGET=8.3.1 \
 // RUN:   %clang -target i386-apple-darwin9 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-TVOSSIM %s
-// CHECK-VERSION-TVOSSIM: "i386-apple-tvos8.3.1"
+// CHECK-VERSION-TVOSSIM: "i386-apple-tvos8.3.1-simulator"
 
 // RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 WATCHOS_DEPLOYMENT_TARGET=2.0 \
 // RUN:   %clang -target armv7-apple-darwin9 -c %s -### 2>&1 | \
@@ -126,16 +131,177 @@
 // RUN: env WATCHOS_DEPLOYMENT_TARGET=2.0 \
 // RUN:   %clang -target i386-apple-darwin9 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOSSIM %s
-// CHECK-VERSION-WATCHOSSIM: "i386-apple-watchos2.0.0"
+// CHECK-VERSION-WATCHOSSIM: "i386-apple-watchos2.0.0-simulator"
 
 // RUN: %clang -target x86_64-apple-ios11.0.0 -c %s -### 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-IOS-TARGET %s
-// CHECK-VERSION-IOS-TARGET: "x86_64-apple-ios11.0.0"
+// CHECK-VERSION-IOS-TARGET: "x86_64-apple-ios11.0.0-simulator"
 
 // RUN: %clang -target x86_64-apple-tvos11.0 -c %s -### 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-TVOS-TARGET %s
-// CHECK-VERSION-TVOS-TARGET: "x86_64-apple-tvos11.0.0"
+// CHECK-VERSION-TVOS-TARGET: "x86_64-apple-tvos11.0.0-simulator"
 
 // RUN: %clang -target x86_64-apple-watchos4.0 -c %s -### 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-VERSION-WATCHOS-TARGET %s
-// CHECK-VERSION-WATCHOS-TARGET: "x86_64-apple-watchos4.0.0"
+// CHECK-VERSION-WATCHOS-TARGET: "x86_64-apple-watchos4.0.0-simulator"
+
+// RUN: env MACOSX_DEPLOYMENT_TARGET=1000.1000 \
+// RUN:   %clang -target x86_64-apple-darwin -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-INVALID-ENV %s
+// CHECK-VERSION-INVALID-ENV: invalid version number in 'MACOSX_DEPLOYMENT_TARGET=1000.1000'
+
+
+
+// Target can specify the OS version:
+
+// RUN: %clang -target x86_64-apple-macos10.11.2 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TMAC2 %s
+// CHECK-VERSION-TMAC2: "x86_64-apple-macosx10.11.2"
+
+// RUN: %clang -target arm64-apple-ios11.1 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIOS1 %s
+// CHECK-VERSION-TIOS1: "arm64-apple-ios11.1.0"
+
+// RUN: %clang -target arm64-apple-tvos10.3 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TTVOS1 %s
+// CHECK-VERSION-TTVOS1: "arm64-apple-tvos10.3.0"
+
+// RUN: %clang -target armv7k-apple-watchos4.1 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TWATCHOS1 %s
+// CHECK-VERSION-TWATCHOS1: "thumbv7k-apple-watchos4.1.0"
+
+// "darwin" always back to the -m<os>version-min and environment:
+
+// RUN: %clang -target x86_64-apple-darwin14 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TDARWIN-FALL1 %s
+// CHECK-VERSION-TDARWIN-FALL1: "x86_64-apple-macosx10.10.0"
+
+// RUN: %clang -target x86_64-apple-darwin14 -miphoneos-version-min=10.1 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TDARWIN-FALL2 %s
+// CHECK-VERSION-TDARWIN-FALL2: "x86_64-apple-ios10.1.0-simulator"
+
+// RUN: env IPHONEOS_DEPLOYMENT_TARGET=9.1 \
+// RUN:   %clang -target arm64-apple-darwin14 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TDARWIN-FALL3 %s
+// CHECK-VERSION-TDARWIN-FALL3: "arm64-apple-ios9.1.0"
+
+// RUN: %clang -target arm64-apple-darwin14 -isysroot SDKs/iPhoneOS11.0.sdk -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TDARWIN-FALL4 %s
+// CHECK-VERSION-TDARWIN-FALL4: "arm64-apple-ios11.0.0"
+
+// RUN: %clang -target unknown-apple-darwin12  -arch armv7  -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TDARWIN-FALL5 %s
+// CHECK-VERSION-TDARWIN-FALL5: "thumbv7-apple-ios5.0.0"
+
+// Warn about -m<os>-version-min when it's used with target:
+
+// RUN: %clang -target x86_64-apple-macos10.11.2 -mmacos-version-min=10.6 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TNO-OSV1 %s
+// CHECK-VERSION-TNO-OSV1: overriding '-mmacosx-version-min=10.6' option with '--target=x86_64-apple-macos10.11.2'
+
+// RUN: %clang -target x86_64-apple-macos -miphoneos-version-min=9.1 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TNO-OSV2 %s
+// CHECK-VERSION-TNO-OSV2: overriding '-miphoneos-version-min=9.1' option with '--target=x86_64-apple-macos'
+
+// RUN: %clang -target x86_64-apple-ios -miphonesimulator-version-min=10.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TNO-OSV3 %s
+// CHECK-VERSION-TNO-OSV3: "x86_64-apple-ios10.0.0-simulator"
+// CHECK-VERSION-TNO-OSV3-NOT: overriding '-mios-simulator-version-min
+// CHECK-VERSION-TNO-OSV3-NOT: argument unused during compilation
+
+// RUN: %clang -target arm64-apple-ios10.1.0 -miphoneos-version-min=10.1.0.1 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TNO-OSV4 %s
+// CHECK-VERSION-TNO-OSV4: overriding '-miphoneos-version-min=10.1.0.1' option with '--target=arm64-apple-ios10.1.0'
+
+// RUN: %clang -target x86_64-apple-macos10.6 -mmacos-version-min=10.6 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TNO-SAME %s
+// CHECK-VERSION-TNO-SAME-NOT: overriding
+// CHECK-VERSION-TNO-SAME-NOT: argument unused during compilation
+
+// Target with OS version is not overriden by -m<os>-version-min variables:
+
+// RUN: %clang -target x86_64-apple-macos10.11.2 -mmacos-version-min=10.6 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIGNORE-OSV1 %s
+// CHECK-VERSION-TIGNORE-OSV1: "x86_64-apple-macosx10.11.2"
+
+// RUN: %clang -target arm64-apple-ios11.0 -mios-version-min=9.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIGNORE-OSV2 %s
+// CHECK-VERSION-TIGNORE-OSV2: "arm64-apple-ios11.0.0"
+
+// RUN: %clang -target arm64-apple-tvos11.0 -mtvos-version-min=9.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIGNORE-OSV3 %s
+// CHECK-VERSION-TIGNORE-OSV3: "arm64-apple-tvos11.0.0"
+
+// RUN: %clang -target armv7k-apple-watchos3 -mwatchos-version-min=4 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIGNORE-OSV4 %s
+// CHECK-VERSION-TIGNORE-OSV4: "thumbv7k-apple-watchos3.0.0"
+
+// Target without OS version inlcudes the OS given by -m<os>-version-min arguments:
+
+// RUN: %clang -target x86_64-apple-macos -mmacos-version-min=10.11 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-USE-OS-ARG1 %s
+// CHECK-VERSION-USE-OS-ARG1: "x86_64-apple-macosx10.11.0"
+
+// RUN: %clang -target arm64-apple-ios -mios-version-min=9.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-USE-OS-ARG2 %s
+// CHECK-VERSION-USE-OS-ARG2: "arm64-apple-ios9.0.0"
+
+// RUN: %clang -target arm64-apple-tvos -mtvos-version-min=10.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-USE-OS-ARG3 %s
+// CHECK-VERSION-USE-OS-ARG3: "arm64-apple-tvos10.0.0"
+
+// RUN: %clang -target armv7k-apple-watchos -mwatchos-version-min=4 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-USE-OS-ARG4 %s
+// CHECK-VERSION-USE-OS-ARG4: "thumbv7k-apple-watchos4.0.0"
+
+// Target with OS version is not overriden by environment variables:
+
+// RUN: env MACOSX_DEPLOYMENT_TARGET=10.1 \
+// RUN:   %clang -target i386-apple-macos10.5 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TMACOS-CMD %s
+// CHECK-VERSION-TMACOS-CMD: "i386-apple-macosx10.5.0"
+
+// RUN: env IPHONEOS_DEPLOYMENT_TARGET=10.1 \
+// RUN:   %clang -target arm64-apple-ios11 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIOS-CMD %s
+// CHECK-VERSION-TIOS-CMD: "arm64-apple-ios11.0.0"
+
+// RUN: env TVOS_DEPLOYMENT_TARGET=8.3.1 \
+// RUN:   %clang -target arm64-apple-tvos9 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TTVOS-CMD %s
+// CHECK-VERSION-TTVOS-CMD: "arm64-apple-tvos9.0.0"
+
+// RUN: env WATCHOS_DEPLOYMENT_TARGET=2 \
+// RUN:   %clang -target armv7k-apple-watchos3 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TWATCHOS-CMD %s
+// CHECK-VERSION-TWATCHOS-CMD: "thumbv7k-apple-watchos3.0.0"
+
+// Target with OS version is not overriden by the SDK:
+
+// RUN: %clang -target armv7-apple-ios9 -Wno-missing-sysroot -isysroot SDKs/iPhoneOS11.0.sdk -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=CHECK-VERSION-TIOS-SDK %s
+// CHECK-VERSION-TIOS-SDK: thumbv7-apple-ios9
+
+// RUN: %clang -target armv7k-apple-watchos4 -Wno-missing-sysroot -isysroot SDKs/WatchOS3.0.sdk -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=CHECK-VERSION-TWATCHOS-SDK %s
+// CHECK-VERSION-TWATCHOS-SDK: thumbv7k-apple-watchos4
+
+// RUN: %clang -target armv7-apple-tvos9 -Wno-missing-sysroot -isysroot SDKs/AppleTVOS11.0.sdk -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=CHECK-VERSION-TTVOS-SDK %s
+// CHECK-VERSION-TTVOS-SDK: thumbv7-apple-tvos9
+
+// Target with OS version is not overriden by arch:
+
+// RUN: %clang -target uknown-apple-macos10.11.2 -arch=armv7k -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TIGNORE-ARCH1 %s
+// CHECK-VERSION-TIGNORE-ARCH1: "unknown-apple-macosx10.11.2"
+
+// Target can be used to specify the environment:
+
+// RUN: %clang -target x86_64-apple-ios11-simulator -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TENV-SIM1 %s
+// CHECK-VERSION-TENV-SIM1: "x86_64-apple-ios11.0.0-simulator"
+
+// RUN: %clang -target armv7k-apple-ios10.1-simulator -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TENV-SIM2 %s
+// CHECK-VERSION-TENV-SIM2: "thumbv7k-apple-ios10.1.0-simulator"
diff --git a/test/Driver/debug-options.c b/test/Driver/debug-options.c
index 3bec1e1..b3f7aa6 100644
--- a/test/Driver/debug-options.c
+++ b/test/Driver/debug-options.c
@@ -245,3 +245,13 @@
 // RUN: %clang -###                  %s 2>&1 | FileCheck -check-prefix=NOMACRO %s
 // MACRO: "-debug-info-macro"
 // NOMACRO-NOT: "-debug-info-macro"
+//
+// RUN: %clang -### -gdwarf-5 -gembed-source %s 2>&1 | FileCheck -check-prefix=GEMBED_5 %s
+// RUN: %clang -### -gdwarf-2 -gembed-source %s 2>&1 | FileCheck -check-prefix=GEMBED_2 %s
+// RUN: %clang -### -gdwarf-5 -gno-embed-source %s 2>&1 | FileCheck -check-prefix=NOGEMBED_5 %s
+// RUN: %clang -### -gdwarf-2 -gno-embed-source %s 2>&1 | FileCheck -check-prefix=NOGEMBED_2 %s
+//
+// GEMBED_5:  "-gembed-source"
+// GEMBED_2:  error: invalid argument '-gembed-source' only allowed with '-gdwarf-5'
+// NOGEMBED_5-NOT:  "-gembed-source"
+// NOGEMBED_2-NOT:  error: invalid argument '-gembed-source' only allowed with '-gdwarf-5'
diff --git a/test/Driver/emulated-tls.cpp b/test/Driver/emulated-tls.cpp
index 38edc98..20abad1 100644
--- a/test/Driver/emulated-tls.cpp
+++ b/test/Driver/emulated-tls.cpp
@@ -1,7 +1,42 @@
-// Cygwin and OpenBSD use emutls. Clang should pass -femulated-tls to cc1
-// and cc1 should pass EmulatedTLS to LLVM CodeGen.
-// FIXME: Add more targets here to use emutls.
-// RUN: %clang -### -std=c++11 -target i686-pc-cygwin %s 2>&1 | FileCheck %s
-// RUN: %clang -### -std=c++11 -target i686-pc-openbsd %s 2>&1 | FileCheck %s
+// Android, Cygwin and OpenBSD use emutls by default.
+// Clang should pass -femulated-tls or -fno-emulated-tls to cc1 if they are used,
+// and cc1 should set up EmulatedTLS and ExplicitEmulatedTLS to LLVM CodeGen.
+//
+// RUN: %clang -### -target arm-linux-androideabi %s 2>&1 \
+// RUN: | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target arm-linux-gnu %s 2>&1 \
+// RUN: | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target i686-pc-cygwin %s 2>&1 \
+// RUN: | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target i686-pc-openbsd %s 2>&1 \
+// RUN: | FileCheck -check-prefix=DEFAULT %s
 
-// CHECK: "-cc1" {{.*}}"-femulated-tls"
+// RUN: %clang -### -target arm-linux-androideabi -fno-emulated-tls -femulated-tls %s 2>&1 \
+// RUN: | FileCheck -check-prefix=EMU %s
+// RUN: %clang -### -target arm-linux-gnu %s -fno-emulated-tls -femulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=EMU %s
+// RUN: %clang -### -target i686-pc-cygwin %s -fno-emulated-tls -femulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=EMU %s
+// RUN: %clang -### -target i686-pc-openbsd %s -fno-emulated-tls -femulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=EMU %s
+
+// RUN: %clang -### -target arm-linux-androideabi -femulated-tls -fno-emulated-tls %s 2>&1 \
+// RUN: | FileCheck -check-prefix=NOEMU %s
+// RUN: %clang -### -target arm-linux-gnu %s -femulated-tls -fno-emulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=NOEMU %s
+// RUN: %clang -### -target i686-pc-cygwin %s -femulated-tls -fno-emulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=NOEMU %s
+// RUN: %clang -### -target i686-pc-openbsd %s -femulated-tls -fno-emulated-tls 2>&1 \
+// RUN: | FileCheck -check-prefix=NOEMU %s
+
+
+// Default without -f[no-]emulated-tls, will be decided by the target triple.
+// DEFAULT-NOT: "-cc1" {{.*}}"-femulated-tls"
+// DEFAULT-NOT: "-cc1" {{.*}}"-fno-emulated-tls"
+
+// Explicit and last -f[no-]emulated-tls flag will be passed to cc1.
+// EMU: "-cc1" {{.*}}"-femulated-tls"
+// EMU-NOT: "-cc1" {{.*}}"-fno-emulated-tls"
+
+// NOEMU: "-cc1" {{.*}}"-fno-emulated-tls"
+// NOEMU-NOT: "-cc1" {{.*}}"-femulated-tls"
diff --git a/test/Driver/fast-math.c b/test/Driver/fast-math.c
index 5094a5b..7bb057f 100644
--- a/test/Driver/fast-math.c
+++ b/test/Driver/fast-math.c
@@ -121,18 +121,23 @@
 // RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
 // CHECK-UNSAFE-MATH: "-cc1"
 // CHECK-UNSAFE-MATH: "-menable-unsafe-fp-math"
+// CHECK-UNSAFE-MATH: "-mreassociate"
 //
 // RUN: %clang -### -fno-fast-math -fno-math-errno -fassociative-math -freciprocal-math \
 // RUN:     -fno-signed-zeros -fno-trapping-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH-UNSAFE-MATH %s
 // CHECK-NO-FAST-MATH-UNSAFE-MATH: "-cc1"
 // CHECK-NO-FAST-MATH-UNSAFE-MATH: "-menable-unsafe-fp-math"
-//
+// CHECK-NO-FAST-MATH-UNSAFE-MATH: "-mreassociate"
+
+// The 2nd -fno-fast-math overrides -fassociative-math.
+
 // RUN: %clang -### -fno-fast-math -fno-math-errno -fassociative-math -freciprocal-math \
 // RUN:     -fno-fast-math -fno-signed-zeros -fno-trapping-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH-NO-FAST-MATH %s
 // CHECK-UNSAFE-MATH-NO-FAST-MATH: "-cc1"
 // CHECK-UNSAFE-MATH-NO-FAST-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-UNSAFE-MATH-NO-FAST-MATH-NOT: "-mreassociate"
 //
 // Check that various umbrella flags also enable these frontend options.
 // RUN: %clang -### -ffast-math -c %s 2>&1 \
@@ -151,7 +156,7 @@
 // One umbrella flag is *really* weird and also changes the semantics of the
 // program by adding a special preprocessor macro. Check that the frontend flag
 // modeling this semantic change is provided. Also check that the flag is not
-// present if any of the optimization is disabled.
+// present if any of the optimizations are disabled.
 // RUN: %clang -### -ffast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
 // RUN: %clang -### -fno-fast-math -ffast-math -c %s 2>&1 \
@@ -175,8 +180,11 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fmath-errno -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -fno-associative-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH --check-prefix=CHECK-ASSOC-MATH %s
 // CHECK-NO-FAST-MATH: "-cc1"
 // CHECK-NO-FAST-MATH-NOT: "-ffast-math"
+// CHECK-ASSOC-MATH-NOT: "-mreassociate"
 //
 // Check various means of disabling these flags, including disabling them after
 // they've been enabled via an umbrella flag.
@@ -209,21 +217,16 @@
 // CHECK-NO-NO-NANS-NOT: "-menable-no-nans"
 // CHECK-NO-NO-NANS-NOT: "-ffinite-math-only"
 // CHECK-NO-NO-NANS: "-o"
-//
+
+// A later inverted option overrides an earlier option.
+
 // RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
 // RUN:     -fno-trapping-math -fno-associative-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
-// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
-// RUN:     -fno-trapping-math -fno-reciprocal-math -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
-// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
-// RUN:     -fno-trapping-math -fsigned-zeros -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
-// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
-// RUN:     -fno-trapping-math -ftrapping-math -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+
 // RUN: %clang -### -funsafe-math-optimizations -fno-associative-math -c %s \
 // RUN:   2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+
 // RUN: %clang -### -funsafe-math-optimizations -fno-reciprocal-math -c %s \
 // RUN:   2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
 // RUN: %clang -### -funsafe-math-optimizations -fsigned-zeros -c %s 2>&1 \
@@ -235,6 +238,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
 // RUN: %clang -### -ffast-math -fno-associative-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+
 // RUN: %clang -### -ffast-math -fno-reciprocal-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
 // RUN: %clang -### -ffast-math -fsigned-zeros -c %s 2>&1 \
@@ -243,10 +247,43 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
 // RUN: %clang -### -ffast-math -fno-unsafe-math-optimizations -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+
 // CHECK-NO-UNSAFE-MATH: "-cc1"
 // CHECK-NO-UNSAFE-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-NO_UNSAFE-MATH-NOT: "-mreassociate"
 // CHECK-NO-UNSAFE-MATH: "-o"
-//
+
+
+// Reassociate is allowed because it does not require reciprocal-math.
+
+// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
+// RUN:     -fno-trapping-math -fno-reciprocal-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-REASSOC-NO-UNSAFE-MATH %s
+
+// CHECK-REASSOC-NO-UNSAFE-MATH: "-cc1"
+// CHECK-REASSOC-NO-UNSAFE-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-REASSOC-NO_UNSAFE-MATH: "-mreassociate"
+// CHECK-REASSOC-NO-UNSAFE-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-REASSOC-NO-UNSAFE-MATH: "-o"
+
+
+// In these runs, reassociate is not allowed because both no-signed-zeros and no-trapping-math are required.
+
+// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
+// RUN:     -fno-trapping-math -fsigned-zeros -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-REASSOC-NO-UNSAFE-MATH %s
+
+// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
+// RUN:     -fno-trapping-math -ftrapping-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-REASSOC-NO-UNSAFE-MATH %s
+
+// CHECK-NO-REASSOC-NO-UNSAFE-MATH: "-cc1"
+// CHECK-NO-REASSOC-NO-UNSAFE-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-NO-REASSOC-NO_UNSAFE-MATH-NOT: "-mreassociate"
+// CHECK-NO-REASSOC-NO-UNSAFE-MATH-NOT: "-menable-unsafe-fp-math"
+// CHECK-NO-REASSOC-NO-UNSAFE-MATH: "-o"
+
+
 // RUN: %clang -### -ftrapping-math -fno-trapping-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-TRAPPING-MATH %s
 // CHECK-NO-TRAPPING-MATH: "-fno-trapping-math"
diff --git a/test/Driver/fno-escaping-block-tail-calls.c b/test/Driver/fno-escaping-block-tail-calls.c
new file mode 100644
index 0000000..2d1b0a3
--- /dev/null
+++ b/test/Driver/fno-escaping-block-tail-calls.c
@@ -0,0 +1,7 @@
+// RUN: %clang -### %s -fescaping-block-tail-calls -fno-escaping-block-tail-calls 2> %t
+// RUN: FileCheck --check-prefix=CHECK-DISABLE < %t %s
+// CHECK-DISABLE: "-fno-escaping-block-tail-calls"
+
+// RUN: %clang -### %s -fno-escaping-block-tail-calls -fescaping-block-tail-calls 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-DISABLE < %t %s
+// CHECK-NO-DISABLE-NOT: "-fno-escaping-block-tail-calls"
diff --git a/test/Driver/fno-rtti-data.cpp b/test/Driver/fno-rtti-data.cpp
new file mode 100644
index 0000000..cc2d5b6
--- /dev/null
+++ b/test/Driver/fno-rtti-data.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang -### -fno-rtti-data %s 2>&1 | FileCheck %s
+// CHECK: -fno-rtti-data
diff --git a/test/Driver/frame-pointer.c b/test/Driver/frame-pointer.c
index ecb16af..cf5c07d 100644
--- a/test/Driver/frame-pointer.c
+++ b/test/Driver/frame-pointer.c
@@ -33,6 +33,18 @@
 // RUN: %clang -target mips64el-linux-gnu -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-32 %s
 // RUN: %clang -target mips64el-linux-gnu -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-32 %s
 
+// RUN: %clang -target riscv32-unknown-elf -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-32 %s
+// RUN: %clang -target riscv32-unknown-elf -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-32 %s
+// RUN: %clang -target riscv32-unknown-elf -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-32 %s
+// RUN: %clang -target riscv32-unknown-elf -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-32 %s
+// RUN: %clang -target riscv32-unknown-elf -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-32 %s
+
+// RUN: %clang -target riscv64-unknown-elf -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-64 %s
+// RUN: %clang -target riscv64-unknown-elf -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-64 %s
+// RUN: %clang -target riscv64-unknown-elf -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-64 %s
+// RUN: %clang -target riscv64-unknown-elf -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s
+// RUN: %clang -target riscv64-unknown-elf -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s
+
 // CHECK0-32: -mdisable-fp-elim
 // CHECK1-32-NOT: -mdisable-fp-elim
 // CHECK2-32-NOT: -mdisable-fp-elim
diff --git a/test/Driver/freebsd.c b/test/Driver/freebsd.c
index 0f89d25..ac663ff 100644
--- a/test/Driver/freebsd.c
+++ b/test/Driver/freebsd.c
@@ -36,6 +36,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-LIB32PATHS %s
 // CHECK-LIB32PATHS: libraries: ={{.*:?}}/usr/lib32
 //
+// Check that O32 MIPS uses /usr/lib32 on a 64-bit tree.
+//
+// RUN: %clang -target mips-freebsd12 %s \
+// RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -print-search-dirs 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-LIB32PATHS %s
+//
 // Check that the new linker flags are passed to FreeBSD
 // RUN: %clang -no-canonical-prefixes -target x86_64-pc-freebsd8 -m32 %s \
 // RUN:   --sysroot=%S/Inputs/multiarch_freebsd64_tree -### 2>&1 \
diff --git a/test/Driver/fsanitize-blacklist.c b/test/Driver/fsanitize-blacklist.c
index 9268bb2..0dce1a2 100644
--- a/test/Driver/fsanitize-blacklist.c
+++ b/test/Driver/fsanitize-blacklist.c
@@ -13,6 +13,7 @@
 // RUN: echo "badline" > %t.bad
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BLACKLIST
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress -fsanitize-blacklist=%t.good -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BLACKLIST
 // CHECK-BLACKLIST: -fsanitize-blacklist={{.*}}.good" "-fsanitize-blacklist={{.*}}.second
 
 // Now, check for -fdepfile-entry flags.
@@ -20,15 +21,20 @@
 // CHECK-BLACKLIST2: -fdepfile-entry={{.*}}.good" "-fdepfile-entry={{.*}}.second
 
 // Check that the default blacklist is not added as an extra dependency.
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-BLACKLIST --implicit-check-not=fdepfile-entry
-// CHECK-DEFAULT-BLACKLIST: -fsanitize-blacklist={{.*}}asan_blacklist.txt
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-BLACKLIST-ASAN --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+// CHECK-DEFAULT-BLACKLIST-ASAN: -fsanitize-blacklist={{.*[^w]}}asan_blacklist.txt
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-BLACKLIST-HWASAN --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+// CHECK-DEFAULT-BLACKLIST-HWASAN: -fsanitize-blacklist={{.*}}hwasan_blacklist.txt
 
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=integer -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=nullability -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=alignment -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=integer -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=nullability -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=alignment -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
 // CHECK-DEFAULT-UBSAN-BLACKLIST: -fsanitize-blacklist={{.*}}ubsan_blacklist.txt
 
+// Check that combining ubsan and another sanitizer results in both blacklists being used.
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined,address -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-UBSAN-BLACKLIST --check-prefix=CHECK-DEFAULT-ASAN-BLACKLIST --implicit-check-not=fdepfile-entry --implicit-check-not=-fsanitize-blacklist=
+
 // Ignore -fsanitize-blacklist flag if there is no -fsanitize flag.
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE --check-prefix=DELIMITERS
 // CHECK-NO-SANITIZE-NOT: -fsanitize-blacklist
@@ -51,7 +57,7 @@
 // CHECK-BAD-BLACKLIST: error: malformed sanitizer blacklist: 'error parsing file '{{.*}}.bad': malformed line 1: 'badline''
 
 // -fno-sanitize-blacklist disables all blacklists specified earlier.
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-FIRST-DISABLED
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-FIRST-DISABLED --implicit-check-not=-fsanitize-blacklist=
 // CHECK-ONLY_FIRST-DISABLED-NOT: good
 // CHECK-ONLY-FIRST-DISABLED: -fsanitize-blacklist={{.*}}.second
 // CHECK-ONLY_FIRST-DISABLED-NOT: good
diff --git a/test/Driver/fsanitize-coverage.c b/test/Driver/fsanitize-coverage.c
index cd05cfd..4073ea4 100644
--- a/test/Driver/fsanitize-coverage.c
+++ b/test/Driver/fsanitize-coverage.c
@@ -6,6 +6,7 @@
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=kernel-address -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=hwaddress -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=leak -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC
diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
index 0f37070..d92df55 100644
--- a/test/Driver/fsanitize.c
+++ b/test/Driver/fsanitize.c
@@ -3,6 +3,7 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined-trap -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-undefined-trap-on-error -fsanitize=undefined-trap %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
+// CHECK-UNDEFINED-TRAP-NOT: -fsanitize-recover
 // CHECK-UNDEFINED-TRAP: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute|function),?){19}"}}
 // CHECK-UNDEFINED-TRAP: "-fsanitize-trap=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
 // CHECK-UNDEFINED-TRAP2: "-fsanitize-trap=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,float-divide-by-zero,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,unreachable,vla-bound"
@@ -13,9 +14,6 @@
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-DARWIN
 // CHECK-UNDEFINED-DARWIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute),?){19}"}}
 
-// RUN: %clang -target i386-unknown-openbsd -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-OPENBSD
-// CHECK-UNDEFINED-OPENBSD: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
-
 // RUN: %clang -target i386-pc-win32 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32
 // RUN: %clang -target i386-pc-win32 -fsanitize=undefined -x c++ %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32 --check-prefix=CHECK-UNDEFINED-WIN-CXX
 // RUN: %clang -target x86_64-pc-win32 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN64
@@ -85,6 +83,15 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=kernel-address,address -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANKA-SANA
 // CHECK-SANKA-SANA: '-fsanitize=kernel-address' not allowed with '-fsanitize=address'
 
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress,thread -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANHA-SANT
+// CHECK-SANHA-SANT: '-fsanitize=hwaddress' not allowed with '-fsanitize=thread'
+
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress,memory -pie -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANHA-SANM
+// CHECK-SANHA-SANM: '-fsanitize=hwaddress' not allowed with '-fsanitize=memory'
+
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress,address -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANHA-SANA
+// CHECK-SANHA-SANA: '-fsanitize=hwaddress' not allowed with '-fsanitize=address'
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=kernel-address,leak -pie -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANKA-SANL
 // CHECK-SANKA-SANL: '-fsanitize=kernel-address' not allowed with '-fsanitize=leak'
 
@@ -174,11 +181,11 @@
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fsanitize-memory-use-after-dtor %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fno-sanitize-memory-use-after-dtor -fsanitize-memory-use-after-dtor %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR
 // CHECK-USE-AFTER-DTOR: -cc1{{.*}}-fsanitize-memory-use-after-dtor
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fno-sanitize-memory-use-after-dtor %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR-OFF
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fsanitize-memory-use-after-dtor -fno-sanitize-memory-use-after-dtor %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR-OFF
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-DTOR-OFF
 // CHECK-USE-AFTER-DTOR-OFF-NOT: -cc1{{.*}}memory-use-after-dtor
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-address-field-padding=0 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-FIELD-PADDING-0
@@ -202,7 +209,9 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
 // RUN: %clang -target x86_64-unknown-freebsd -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
 // RUN: %clang -target aarch64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
-// RUN: %clang -target arm-linux-androideabi -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
+// RUN: %clang -target arm-linux-androideabi -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIC-NO-PIE
+// RUN: %clang -target arm-linux-androideabi24 -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
+// RUN: %clang -target aarch64-linux-android -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
 // RUN: %clang -target i386-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
 
@@ -210,6 +219,10 @@
 // CHECK-NO-PIE: "-mrelocation-model" "static"
 // CHECK-NO-PIE-NOT: "-pie"
 
+// CHECK-PIC-NO-PIE-NOT: "-pie"
+// CHECK-PIC-NO-PIE: "-mrelocation-model" "pic"
+// CHECK-PIC-NO-PIE-NOT: "-pie"
+
 // CHECK-PIE: "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie"
 // CHECK-PIE: "-pie"
 
@@ -280,6 +293,9 @@
 // RUN: %clang -target mips-unknown-linux -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-MIPS
 // CHECK-SANL-MIPS: unsupported option '-fsanitize=leak' for target 'mips-unknown-linux'
 
+// RUN: %clang -target powerpc64-unknown-linux -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-PPC64
+// RUN: %clang -target powerpc64le-unknown-linux -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-PPC64
+// CHECK-SANL-PPC64: "-fsanitize=leak"
 // RUN: %clang -target powerpc-unknown-linux -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-PPC
 // CHECK-SANL-PPC: unsupported option '-fsanitize=leak' for target 'powerpc-unknown-linux'
 
@@ -325,10 +341,10 @@
 // CHECK-TSAN-ARM-IOS: unsupported option '-fsanitize=thread' for target 'arm-apple-ios'
 
 // RUN: %clang -target i386-apple-iossimulator -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-I386-IOSSIMULATOR
-// CHECK-TSAN-I386-IOSSIMULATOR: unsupported option '-fsanitize=thread' for target 'i386-apple-iossimulator'
+// CHECK-TSAN-I386-IOSSIMULATOR: unsupported option '-fsanitize=thread' for target 'i386-apple-iossimulator-simulator'
 
 // RUN: %clang -target i386-apple-tvossimulator -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-I386-TVOSSIMULATOR
-// CHECK-TSAN-I386-TVOSSIMULATOR: unsupported option '-fsanitize=thread' for target 'i386-apple-tvossimulator'
+// CHECK-TSAN-I386-TVOSSIMULATOR: unsupported option '-fsanitize=thread' for target 'i386-apple-tvossimulator-simulator'
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=thread -fsanitize-thread-memory-access %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-MEMORY-ACCESS
 // CHECK-TSAN-MEMORY-ACCESS-NOT: -cc1{{.*}}tsan-instrument-memory-accesses=0
@@ -372,9 +388,6 @@
 // RUN: %clang -target armv7-apple-ios7 -miphoneos-version-min=7.0 -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-IOS
 // CHECK-ASAN-IOS: -fsanitize=address
 
-// RUN: %clang -target i386-pc-openbsd -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-OPENBSD
-// CHECK-ASAN-OPENBSD: unsupported option '-fsanitize=address' for target 'i386-pc-openbsd'
-
 // RUN: %clang -target x86_64-apple-darwin -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LSAN-X86-64-DARWIN
 // CHECK-LSAN-X86-64-DARWIN-NOT: unsupported option
 
@@ -414,11 +427,11 @@
 
 // RUN: %clang -target i386-apple-iossimulator -fsanitize=efficiency-cache-frag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ESAN-I386-IOSSIMULATOR
 // RUN: %clang -target i386-apple-iossimulator -fsanitize=efficiency-working-set %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ESAN-I386-IOSSIMULATOR
-// CHECK-ESAN-I386-IOSSIMULATOR: unsupported option '-fsanitize=efficiency-{{.*}}' for target 'i386-apple-iossimulator'
+// CHECK-ESAN-I386-IOSSIMULATOR: unsupported option '-fsanitize=efficiency-{{.*}}' for target 'i386-apple-iossimulator-simulator'
 
 // RUN: %clang -target i386-apple-tvossimulator -fsanitize=efficiency-cache-frag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ESAN-I386-TVOSSIMULATOR
 // RUN: %clang -target i386-apple-tvossimulator -fsanitize=efficiency-working-set %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ESAN-I386-TVOSSIMULATOR
-// CHECK-ESAN-I386-TVOSSIMULATOR: unsupported option '-fsanitize=efficiency-{{.*}}' for target 'i386-apple-tvossimulator'
+// CHECK-ESAN-I386-TVOSSIMULATOR: unsupported option '-fsanitize=efficiency-{{.*}}' for target 'i386-apple-tvossimulator-simulator'
 
 
 
@@ -455,6 +468,9 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-trap=address -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-TRAP
 // CHECK-ASAN-TRAP: error: unsupported argument 'address' to option '-fsanitize-trap'
 
+// RUN: %clang -target aarch64-linux-gnu -fsanitize-trap=hwaddress -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HWASAN-TRAP
+// CHECK-HWASAN-TRAP: error: unsupported argument 'hwaddress' to option '-fsanitize-trap'
+
 // RUN: %clang -target x86_64-apple-darwin10 -mmacosx-version-min=10.7 -flto -fsanitize=cfi-vcall -fno-sanitize-trap=cfi -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NOTRAP-OLD-MACOS
 // CHECK-CFI-NOTRAP-OLD-MACOS: error: unsupported option '-fno-sanitize-trap=cfi-vcall' for target 'x86_64-apple-darwin10'
 
@@ -471,6 +487,14 @@
 // CHECK-CFI-NO-CROSS-DSO: -emit-llvm-bc
 // CHECK-CFI-NO-CROSS-DSO-NOT: -fsanitize-cfi-cross-dso
 
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi-icall -fsanitize-cfi-icall-generalize-pointers -fvisibility=hidden -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-GENERALIZE-POINTERS
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi-icall -fvisibility=hidden -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-CFI-GENERALIZE-POINTERS
+// CHECK-CFI-GENERALIZE-POINTERS: -fsanitize-cfi-icall-generalize-pointers
+// CHECK-NO-CFI-GENERALIZE-POINTERS-NOT: -fsanitize-cfi-icall-generalize-pointers
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi-icall -fsanitize-cfi-icall-generalize-pointers -fsanitize-cfi-cross-dso -fvisibility=hidden -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-GENERALIZE-AND-CROSS-DSO
+// CHECK-CFI-GENERALIZE-AND-CROSS-DSO: error: invalid argument '-fsanitize-cfi-cross-dso' not allowed with '-fsanitize-cfi-icall-generalize-pointers'
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -fsanitize-stats -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-STATS
 // CHECK-CFI-STATS: -fsanitize-stats
 
@@ -496,7 +520,7 @@
 // NOSP-NOT: "-fsanitize=safe-stack"
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=address,safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP-ASAN
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address,safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP-ASAN
 // RUN: %clang -target x86_64-linux-gnu -fstack-protector -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -fstack-protector-all -### %s 2>&1 | FileCheck %s -check-prefix=SP
 // RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
@@ -504,14 +528,11 @@
 // RUN: %clang -target i386-contiki-unknown -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
 // NO-SP-NOT: stack-protector
 // NO-SP: "-fsanitize=safe-stack"
+// SP-ASAN: error: invalid argument '-fsanitize=safe-stack' not allowed with '-fsanitize=address'
 // SP: "-fsanitize=safe-stack"
 // SP: -stack-protector
 // NO-SP-NOT: stack-protector
 
-// NO-SP-ASAN-NOT: stack-protector
-// NO-SP-ASAN: "-fsanitize=address,safe-stack"
-// NO-SP-ASAN-NOT: stack-protector
-
 // RUN: %clang -target powerpc64-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
 // RUN: %clang -target powerpc64le-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
 // CHECK-SANM: "-fsanitize=memory"
@@ -578,6 +599,8 @@
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-minimal-runtime -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-UBSAN-MINIMAL
 // CHECK-ASAN-UBSAN-MINIMAL: error: invalid argument '-fsanitize-minimal-runtime' not allowed with '-fsanitize=address'
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-HWASAN-MINIMAL
+// CHECK-HWASAN-MINIMAL: error: invalid argument '-fsanitize-minimal-runtime' not allowed with '-fsanitize=hwaddress'
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -flto -fvisibility=hidden -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-MINIMAL
 // CHECK-CFI-MINIMAL: "-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall"
@@ -591,3 +614,38 @@
 // CHECK-CFI-NOICALL-MINIMAL: "-fsanitize=cfi-derived-cast,cfi-unrelated-cast,cfi-nvcall,cfi-vcall"
 // CHECK-CFI-NOICALL-MINIMAL: "-fsanitize-trap=cfi-derived-cast,cfi-unrelated-cast,cfi-nvcall,cfi-vcall"
 // CHECK-CFI-NOICALL-MINIMAL: "-fsanitize-minimal-runtime"
+
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target arm-linux-androideabi -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target i386-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target mips64-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target mips64el-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target mips-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// RUN: %clang -target mipsel-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO
+// CHECK-SCUDO: "-fsanitize=scudo"
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-PIE
+// RUN: %clang -target arm-linux-androideabi -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-PIE
+// CHECK-SCUDO-PIE: "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie"
+// CHECK-SCUDO-PIE: "-pie"
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo,undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-UBSAN
+// CHECK-SCUDO-UBSAN: "-fsanitize={{.*}}scudo"
+
+// RUN: %clang -target powerpc-unknown-linux -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SCUDO
+// CHECK-NO-SCUDO: unsupported option
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo,address  %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-ASAN
+// CHECK-SCUDO-ASAN: error: invalid argument '-fsanitize=scudo' not allowed with '-fsanitize=address'
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo,leak  %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-LSAN
+// CHECK-SCUDO-LSAN: error: invalid argument '-fsanitize=scudo' not allowed with '-fsanitize=leak'
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo,memory  %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-MSAN
+// CHECK-SCUDO-MSAN: error: invalid argument '-fsanitize=scudo' not allowed with '-fsanitize=memory'
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo,thread  %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-TSAN
+// CHECK-SCUDO-TSAN: error: invalid argument '-fsanitize=scudo' not allowed with '-fsanitize=thread'
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=scudo,hwaddress  %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO-HWASAN
+// CHECK-SCUDO-HWASAN: error: invalid argument '-fsanitize=scudo' not allowed with '-fsanitize=hwaddress'
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=hwaddress %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANHA-X86_64
+// CHECK-SANHA-X86_64: unsupported option '-fsanitize=hwaddress' for target
diff --git a/test/Driver/fuchsia.c b/test/Driver/fuchsia.c
index d3af79e..57dbc73 100644
--- a/test/Driver/fuchsia.c
+++ b/test/Driver/fuchsia.c
@@ -1,16 +1,17 @@
 // RUN: %clang %s -### -no-canonical-prefixes --target=x86_64-unknown-fuchsia \
-// RUN:     --sysroot=%S/platform -fuse-ld=ld 2>&1 \
+// RUN:     --sysroot=%S/platform 2>&1 \
 // RUN:     | FileCheck -check-prefixes=CHECK,CHECK-X86_64 %s
 // RUN: %clang %s -### -no-canonical-prefixes --target=aarch64-unknown-fuchsia \
-// RUN:     --sysroot=%S/platform -fuse-ld=ld 2>&1 \
+// RUN:     --sysroot=%S/platform 2>&1 \
 // RUN:     | FileCheck -check-prefixes=CHECK,CHECK-AARCH64 %s
 // CHECK: {{.*}}clang{{.*}}" "-cc1"
+// CHECK: "--mrelax-relocations"
 // CHECK: "-munwind-tables"
 // CHECK: "-fuse-init-array"
 // CHECK: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK: "-internal-externc-isystem" "[[SYSROOT]]{{/|\\\\}}include"
-// CHECK: {{.*}}lld{{.*}}" "-flavor" "gnu"
-// CHECK: "-z" "rodynamic"
+// CHECK: "-fno-common"
+// CHECK: {{.*}}ld.lld{{.*}}" "-z" "rodynamic"
 // CHECK: "--sysroot=[[SYSROOT]]"
 // CHECK: "-pie"
 // CHECK: "--build-id"
@@ -79,3 +80,35 @@
 // CHECK-ASAN-SHARED: "-fsanitize-address-globals-dead-stripping"
 // CHECK-ASAN-SHARED: "{{.*[/\\]}}libclang_rt.asan-x86_64.so"
 // CHECK-ASAN-SHARED-NOT: "{{.*[/\\]}}libclang_rt.asan-preinit-x86_64.a"
+
+// RUN: %clang %s -### --target=x86_64-fuchsia \
+// RUN:     -fsanitize=fuzzer 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-FUZZER-X86
+// CHECK-FUZZER-X86: "-fsanitize=fuzzer,fuzzer-no-link"
+// CHECK-FUZZER-X86: "{{.*[/\\]}}libclang_rt.fuzzer-x86_64.a"
+
+// RUN: %clang %s -### --target=aarch64-fuchsia \
+// RUN:     -fsanitize=fuzzer 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-FUZZER-AARCH64
+// CHECK-FUZZER-AARCH64: "-fsanitize=fuzzer,fuzzer-no-link"
+// CHECK-FUZZER-AARCH64: "{{.*[/\\]}}libclang_rt.fuzzer-aarch64.a"
+
+// RUN: %clang %s -### --target=x86_64-fuchsia \
+// RUN:     -fsanitize=scudo 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-SCUDO-X86
+// CHECK-SCUDO-X86: "-fsanitize=scudo"
+// CHECK-SCUDO-X86: "-pie"
+// CHECK-SCUDO-X86: "{{.*[/\\]}}libclang_rt.scudo-x86_64.so"
+
+// RUN: %clang %s -### --target=aarch64-fuchsia \
+// RUN:     -fsanitize=scudo 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-SCUDO-AARCH64
+// CHECK-SCUDO-AARCH64: "-fsanitize=scudo"
+// CHECK-SCUDO-AARCH64: "-pie"
+// CHECK-SCUDO-AARCH64: "{{.*[/\\]}}libclang_rt.scudo-aarch64.so"
+
+// RUN: %clang %s -### --target=x86_64-fuchsia \
+// RUN:     -fsanitize=scudo -fPIC -shared 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-SCUDO-SHARED
+// CHECK-SCUDO-SHARED: "-fsanitize=scudo"
+// CHECK-SCUDO-SHARED: "{{.*[/\\]}}libclang_rt.scudo-x86_64.so"
diff --git a/test/Driver/fuchsia.cpp b/test/Driver/fuchsia.cpp
index ab0a901..cbd08ad 100644
--- a/test/Driver/fuchsia.cpp
+++ b/test/Driver/fuchsia.cpp
@@ -1,13 +1,12 @@
 // RUN: %clangxx %s -### -no-canonical-prefixes --target=x86_64-unknown-fuchsia \
-// RUN:     --sysroot=%S/platform 2>&1 -fuse-ld=ld | FileCheck %s
+// RUN:     --sysroot=%S/platform 2>&1 | FileCheck %s
 // CHECK: {{.*}}clang{{.*}}" "-cc1"
 // CHECK: "-triple" "x86_64-fuchsia"
 // CHECK: "-fuse-init-array"
 // CHECK: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK: "-internal-isystem" "{{.*[/\\]}}x86_64-fuchsia{{/|\\\\}}include{{/|\\\\}}c++{{/|\\\\}}v1"
 // CHECK: "-internal-externc-isystem" "[[SYSROOT]]{{/|\\\\}}include"
-// CHECK: {{.*}}lld{{.*}}" "-flavor" "gnu"
-// CHECK: "-z" "rodynamic"
+// CHECK: {{.*}}ld.lld{{.*}}" "-z" "rodynamic"
 // CHECK: "--sysroot=[[SYSROOT]]"
 // CHECK: "-pie"
 // CHECK: "--build-id"
diff --git a/test/Driver/fuse-ld-windows.c b/test/Driver/fuse-ld-windows.c
new file mode 100644
index 0000000..089f296
--- /dev/null
+++ b/test/Driver/fuse-ld-windows.c
@@ -0,0 +1,25 @@
+// REQUIRES: system-windows
+
+// We used to require adding ".exe" suffix when cross-compiling on Windows.
+// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
+// RUN:     -B %S/Inputs/fuse_ld_windows -fuse-ld=foo 2>&1 \
+// RUN:   | FileCheck %s
+
+// Check that the old variant still works.
+// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
+// RUN:     -B %S/Inputs/fuse_ld_windows -fuse-ld=foo.exe 2>&1 \
+// RUN:   | FileCheck %s
+
+// With the full path, the extension can be omitted, too,
+// because Windows allows that.
+// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
+// RUN:     -fuse-ld=%S/Inputs/fuse_ld_windows/ld.foo 2>&1 \
+// RUN:   | FileCheck %s
+
+// Check that the full path with the extension works too.
+// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
+// RUN:     -fuse-ld=%S/Inputs/fuse_ld_windows/ld.foo.exe 2>&1 \
+// RUN:   | FileCheck %s
+
+// CHECK-NOT: invalid linker name
+// CHECK: /Inputs/fuse_ld_windows{{/|\\\\}}ld.foo
diff --git a/test/Driver/fuse-ld.c b/test/Driver/fuse-ld.c
index bd8c9a5..b043ce6 100644
--- a/test/Driver/fuse-ld.c
+++ b/test/Driver/fuse-ld.c
@@ -67,3 +67,29 @@
 // RUN:     -gcc-toolchain %S/Inputs/basic_android_tree 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-ANDROID-ARM-GOLD-TC
 // CHECK-ANDROID-ARM-GOLD-TC: Inputs/basic_android_tree/lib/gcc/arm-linux-androideabi/4.4.3/../../../../arm-linux-androideabi/bin{{/|\\+}}ld.gold
+
+
+// RUN: %clang %s -### -fuse-ld=link \
+// RUN:     -target i686-unknown-windows-msvc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LINK
+// CHECK-WINDOWS-MSVC-LINK: "{{.*}}link.exe"
+// CHECK-WINDOWS-MSVC-LINK-SAME: "-out:{{.*}}"
+
+// RUN: %clang %s -### -fuse-ld=lld \
+// RUN:     -target i686-unknown-windows-msvc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD
+// CHECK-WINDOWS-MSVC-LLD: "{{.*}}lld-link"
+// CHECK-WINDOWS-MSVC-LLD-SAME: "-out:{{.*}}"
+
+// RUN: %clang %s -### -fuse-ld=lld-link \
+// RUN:     -target i686-unknown-windows-msvc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD-LINK
+// CHECK-WINDOWS-MSVC-LLD-LINK: "{{.*}}lld-link"
+// CHECK-WINDOWS-MSVC-LLD-LINK-SAME: "-out:{{.*}}"
+
+// RUN: %clang %s -### -fuse-ld=bfd \
+// RUN:     -target i686-unknown-windows-msvc \
+// RUN:     -B %S/Inputs/Windows/usr/bin 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-BFD
+// CHECK-WINDOWS-MSVC-BFD: "{{.*}}ld.bfd"
+// CHECK-WINDOWS-MSVC-BFD-SAME: "-o"
diff --git a/test/Driver/global-isel.c b/test/Driver/global-isel.c
new file mode 100644
index 0000000..f4fc6a7
--- /dev/null
+++ b/test/Driver/global-isel.c
@@ -0,0 +1,24 @@
+// REQUIRES: x86-registered-target,aarch64-registered-target
+
+// RUN: %clang -fexperimental-isel -S -### %s 2>&1 | FileCheck --check-prefix=ENABLED %s
+// RUN: %clang -fno-experimental-isel -S -### %s 2>&1 | FileCheck --check-prefix=DISABLED %s
+
+// RUN: %clang -target aarch64 -fexperimental-isel -S %s -### 2>&1 | FileCheck --check-prefix=ARM64-DEFAULT %s
+// RUN: %clang -target aarch64 -fexperimental-isel -S -O0 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O0 %s
+// RUN: %clang -target aarch64 -fexperimental-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2 %s
+// RUN: %clang -target aarch64 -fexperimental-isel -Wno-experimental-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2-NOWARN %s
+
+// RUN: %clang -target x86_64 -fexperimental-isel -S %s -### 2>&1 | FileCheck --check-prefix=X86_64 %s
+
+// ENABLED: "-mllvm" "-global-isel=1"
+// DISABLED: "-mllvm" "-global-isel=0"
+
+// ARM64-DEFAULT-NOT: warning: -fexperimental-sel
+// ARM64-DEFAULT-NOT: "-global-isel-abort=2"
+// ARM64-O0-NOT: warning: -fexperimental-sel
+// ARM64-O2: warning: -fexperimental-isel support is incomplete for this architecture at the current optimization level
+// ARM64-O2: "-mllvm" "-global-isel-abort=2"
+// ARM64-O2-NOWARN-NOT: warning: -fexperimental-isel
+
+// X86_64: -fexperimental-isel support for the 'x86_64' architecture is incomplete
+// X86_64: "-mllvm" "-global-isel-abort=2"
diff --git a/test/Driver/hexagon-hvx.c b/test/Driver/hexagon-hvx.c
index f648e49..6163559 100644
--- a/test/Driver/hexagon-hvx.c
+++ b/test/Driver/hexagon-hvx.c
@@ -2,18 +2,34 @@
 // Tests for the hvx features and warnings.
 // -----------------------------------------------------------------------------
 
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \
+// RUN:  2>&1 | FileCheck -check-prefix=CHECKHVX165 %s
+// CHECKHVX165: "-target-feature" "+hvxv65"
+
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \
 // RUN:  2>&1 | FileCheck -check-prefix=CHECKHVX162 %s
 // CHECKHVX162: "-target-feature" "+hvxv62"
 
-// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \
 // RUN:  -mhvx-double 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s
 
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \
+// RUN:  -mhvx-double 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s
+
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \
 // RUN:  -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s
+
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \
+// RUN:  -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s
+
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \
+// RUN:  -mhvx-length=128b 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s
 // CHECKHVX2-NOT: "-target-feature" "+hvx-length64b"
 // CHECKHVX2: "-target-feature" "+hvx-length128b"
 
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 2>&1 \
+// RUN:  | FileCheck -check-prefix=CHECKHVX3 %s
+
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 2>&1 \
 // RUN:  | FileCheck -check-prefix=CHECKHVX3 %s
 // CHECKHVX3-NOT: "-target-feature" "+hvx
@@ -52,7 +68,7 @@
 // RUN:  2>&1 | FileCheck -check-prefix=CHECK-HVXEQ %s
 // CHECK-HVXEQ: "-target-feature" "+hvxv62"
 
-// Honor the last occured -mhvx=, -mhvx flag.
+// Honor the last occurred -mhvx=, -mhvx flag.
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx=v62 -mhvx\
 // RUN:  2>&1 | FileCheck -check-prefix=CHECK-HVXEQ-PRE %s
 // CHECK-HVXEQ-PRE-NOT: "-target-feature" "+hvxv62"
@@ -66,8 +82,10 @@
 // The default mode on v60,v62 is 64B.
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \
 // RUN:  2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s
-// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx -mhvx-length=64B\
-// RUN:  2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \
+// RUN:  -mhvx-length=64b 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s
+// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \
+// RUN:  -mhvx-length=64B 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s
 // CHECK-HVXLENGTH-64B: "-target-feature" "+hvx{{.*}}" "-target-feature" "+hvx-length64b"
 // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mhvx-length=128B\
 // RUN:  2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-128B %s
diff --git a/test/Driver/hexagon-toolchain-elf.c b/test/Driver/hexagon-toolchain-elf.c
index 9858245..8f4c320 100644
--- a/test/Driver/hexagon-toolchain-elf.c
+++ b/test/Driver/hexagon-toolchain-elf.c
@@ -99,19 +99,27 @@
 
 // RUN: %clang -### -target hexagon-unknown-elf \
 // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
-// RUN:   -O3 \
+// RUN:   -mcpu=hexagonv65 \
 // RUN:   %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK025 %s
-// CHECK025: "-ffp-contract=fast"
-// CHECK025: hexagon-link
+// CHECK025: "-cc1" {{.*}} "-target-cpu" "hexagonv65"
+// CHECK025: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v65/crt0
+
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -O3 \
+// RUN:   %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK026 %s
+// CHECK026: "-ffp-contract=fast"
+// CHECK026: hexagon-link
 
 // RUN: %clang -### -target hexagon-unknown-elf \
 // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -O3 -ffp-contract=off \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK026 %s
-// CHECK026-NOT: "-ffp-contract=fast"
-// CHECK026: hexagon-link
+// RUN:   | FileCheck -check-prefix=CHECK027 %s
+// CHECK027-NOT: "-ffp-contract=fast"
+// CHECK027: hexagon-link
 
 // -----------------------------------------------------------------------------
 // Test Linker related args
@@ -496,12 +504,22 @@
 // CHECK060-NEXT: hexagon-link
 
 // -----------------------------------------------------------------------------
+// ffixed-r19
+// -----------------------------------------------------------------------------
+// RUN: %clang -### -target hexagon-unknown-elf -ffixed-r19 %s 2>&1 \
+// RUN:        | FileCheck --check-prefix=CHECK070 %s
+// CHECK070: "-target-feature" "+reserved-r19"
+// RUN: %clang -### -target hexagon-unknown-elf %s 2>&1 \
+// RUN:        | FileCheck --check-prefix=CHECK071 %s
+// CHECK071-NOT: "+reserved-r19"
+
+// -----------------------------------------------------------------------------
 // Misc Defaults
 // -----------------------------------------------------------------------------
 // RUN: %clang -### -target hexagon-unknown-elf \
 // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -mcpu=hexagonv60 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK070 %s
-// CHECK070:      "-cc1"
-// CHECK070:      "-Wreturn-type"
+// RUN:   | FileCheck -check-prefix=CHECK080 %s
+// CHECK080:      "-cc1"
+// CHECK080:      "-Wreturn-type"
diff --git a/test/Driver/lanai-unknown-unknown.cpp b/test/Driver/lanai-unknown-unknown.cpp
index 5ce0adf..220a84f 100644
--- a/test/Driver/lanai-unknown-unknown.cpp
+++ b/test/Driver/lanai-unknown-unknown.cpp
@@ -11,25 +11,25 @@
 
 extern "C" {
 
-// CHECK: @align_c = global i32 1
+// CHECK: @align_c = dso_local global i32 1
 int align_c = __alignof(char);
 
-// CHECK: @align_s = global i32 2
+// CHECK: @align_s = dso_local global i32 2
 int align_s = __alignof(short);
 
-// CHECK: @align_i = global i32 4
+// CHECK: @align_i = dso_local global i32 4
 int align_i = __alignof(int);
 
-// CHECK: @align_l = global i32 4
+// CHECK: @align_l = dso_local global i32 4
 int align_l = __alignof(long);
 
-// CHECK: @align_ll = global i32 8
+// CHECK: @align_ll = dso_local global i32 8
 int align_ll = __alignof(long long);
 
-// CHECK: @align_p = global i32 4
+// CHECK: @align_p = dso_local global i32 4
 int align_p = __alignof(void*);
 
-// CHECK: @align_vl = global i32 4
+// CHECK: @align_vl = dso_local global i32 4
 int align_vl = __alignof(va_list);
 
 // Check types
diff --git a/test/Driver/le32-unknown-nacl.cpp b/test/Driver/le32-unknown-nacl.cpp
index 379ddb6..9bbcdec 100644
--- a/test/Driver/le32-unknown-nacl.cpp
+++ b/test/Driver/le32-unknown-nacl.cpp
@@ -10,34 +10,34 @@
 
 extern "C" {
 
-// CHECK: @align_c = global i32 1
+// CHECK: @align_c = dso_local global i32 1
 int align_c = __alignof(char);
 
-// CHECK: @align_s = global i32 2
+// CHECK: @align_s = dso_local global i32 2
 int align_s = __alignof(short);
 
-// CHECK: @align_i = global i32 4
+// CHECK: @align_i = dso_local global i32 4
 int align_i = __alignof(int);
 
-// CHECK: @align_l = global i32 4
+// CHECK: @align_l = dso_local global i32 4
 int align_l = __alignof(long);
 
-// CHECK: @align_ll = global i32 8
+// CHECK: @align_ll = dso_local global i32 8
 int align_ll = __alignof(long long);
 
-// CHECK: @align_p = global i32 4
+// CHECK: @align_p = dso_local global i32 4
 int align_p = __alignof(void*);
 
-// CHECK: @align_f = global i32 4
+// CHECK: @align_f = dso_local global i32 4
 int align_f = __alignof(float);
 
-// CHECK: @align_d = global i32 8
+// CHECK: @align_d = dso_local global i32 8
 int align_d = __alignof(double);
 
-// CHECK: @align_ld = global i32 8
+// CHECK: @align_ld = dso_local global i32 8
 int align_ld = __alignof(long double);
 
-// CHECK: @align_vl = global i32 4
+// CHECK: @align_vl = dso_local global i32 4
 int align_vl = __alignof(va_list);
 
 // CHECK: __LITTLE_ENDIAN__defined
diff --git a/test/Driver/le64-unknown-unknown.cpp b/test/Driver/le64-unknown-unknown.cpp
index d0a5859..8cee9dc 100644
--- a/test/Driver/le64-unknown-unknown.cpp
+++ b/test/Driver/le64-unknown-unknown.cpp
@@ -9,34 +9,34 @@
 
 extern "C" {
 
-// CHECK: @align_c = global i32 1
+// CHECK: @align_c = dso_local global i32 1
 int align_c = __alignof(char);
 
-// CHECK: @align_s = global i32 2
+// CHECK: @align_s = dso_local global i32 2
 int align_s = __alignof(short);
 
-// CHECK: @align_i = global i32 4
+// CHECK: @align_i = dso_local global i32 4
 int align_i = __alignof(int);
 
-// CHECK: @align_l = global i32 8
+// CHECK: @align_l = dso_local global i32 8
 int align_l = __alignof(long);
 
-// CHECK: @align_ll = global i32 8
+// CHECK: @align_ll = dso_local global i32 8
 int align_ll = __alignof(long long);
 
-// CHECK: @align_p = global i32 8
+// CHECK: @align_p = dso_local global i32 8
 int align_p = __alignof(void*);
 
-// CHECK: @align_f = global i32 4
+// CHECK: @align_f = dso_local global i32 4
 int align_f = __alignof(float);
 
-// CHECK: @align_d = global i32 8
+// CHECK: @align_d = dso_local global i32 8
 int align_d = __alignof(double);
 
-// CHECK: @align_ld = global i32 8
+// CHECK: @align_ld = dso_local global i32 8
 int align_ld = __alignof(long double);
 
-// CHECK: @align_vl = global i32 4
+// CHECK: @align_vl = dso_local global i32 4
 int align_vl = __alignof(va_list);
 
 // CHECK: __LITTLE_ENDIAN__defined
diff --git a/test/Driver/linux-ld.c b/test/Driver/linux-ld.c
index a47afe3..f9f9385 100644
--- a/test/Driver/linux-ld.c
+++ b/test/Driver/linux-ld.c
@@ -156,7 +156,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-LD-64-STATIC %s
 // CHECK-LD-64-STATIC-NOT: warning:
 // CHECK-LD-64-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-LD-64-STATIC-NOT: "--eh-frame-hdr"
+// CHECK-LD-64-STATIC: "--eh-frame-hdr"
 // CHECK-LD-64-STATIC: "-m" "elf_x86_64"
 // CHECK-LD-64-STATIC-NOT: "-dynamic-linker"
 // CHECK-LD-64-STATIC: "-static"
@@ -1133,6 +1133,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID %s
 // CHECK-ANDROID: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
+// CHECK-ANDROID: "--enable-new-dtags"
 // CHECK-ANDROID: "{{.*}}{{/|\\\\}}crtbegin_dynamic.o"
 // CHECK-ANDROID: "-L[[SYSROOT]]/usr/lib"
 // CHECK-ANDROID-NOT: "gcc_s"
@@ -1307,31 +1308,6 @@
 // CHECK-ANDROID-PIE: "{{.*}}{{/|\\\\}}crtend_android.o"
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=arm-linux-androideabi \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=arm-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=aarch64-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=arm64-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=mipsel-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=mips64el-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=i686-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=x86_64-linux-android \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NO-DEFAULT-PIE %s
-// CHECK-ANDROID-NO-DEFAULT-PIE-NOT: -pie
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=arm-linux-androideabi \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-32 %s
diff --git a/test/Driver/masm.c b/test/Driver/masm.c
index 17c6393..5c7251a 100644
--- a/test/Driver/masm.c
+++ b/test/Driver/masm.c
@@ -2,11 +2,13 @@
 // RUN: %clang -target i386-unknown-linux -masm=att -S %s -### 2>&1 | FileCheck --check-prefix=CHECK-ATT %s
 // RUN: %clang -target i386-unknown-linux -S -masm=somerequired %s -### 2>&1 | FileCheck --check-prefix=CHECK-SOMEREQUIRED %s
 // RUN: %clang -target arm-unknown-eabi -S -masm=intel %s -### 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
+// RUN: %clang_cl --target=x86_64 /FA -### -- %s 2>&1 | FileCheck --check-prefix=CHECK-CL %s
 
 int f() {
 // CHECK-INTEL: -x86-asm-syntax=intel
 // CHECK-ATT: -x86-asm-syntax=att
 // CHECK-SOMEREQUIRED: error: unsupported argument 'somerequired' to option 'masm='
 // CHECK-ARM: warning: argument unused during compilation: '-masm=intel'
+// CHECK-CL: -x86-asm-syntax=intel
   return 0;
 }
diff --git a/test/Driver/mingw-libgcc.c b/test/Driver/mingw-libgcc.c
index 1d45c91..bfe2360 100644
--- a/test/Driver/mingw-libgcc.c
+++ b/test/Driver/mingw-libgcc.c
@@ -2,11 +2,11 @@
 // Verified with gcc version 5.1.0 (i686-posix-dwarf-rev0, Built by MinGW-W64 project).
 
 // gcc, static
-// RUN: %clang -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
-// RUN: %clang -static -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
-// RUN: %clang -static-libgcc -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
-// RUN: %clang -static -shared -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
-// RUN: %clang -static-libgcc -shared -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefixes=CHECK_STATIC,CHECK_BDYNAMIC %s
+// RUN: %clang -static -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefixes=CHECK_STATIC,CHECK_BSTATIC %s
+// RUN: %clang -static-libgcc -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefixes=CHECK_STATIC,CHECK_BDYNAMIC %s
+// RUN: %clang -static -shared -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefixes=CHECK_STATIC,CHECK_SHARED,CHECK_BSTATIC %s
+// RUN: %clang -static-libgcc -shared -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefixes=CHECK_STATIC,CHECK_SHARED,CHECK_BDYNAMIC %s
 
 // gcc, dynamic
 // RUN: %clang -shared -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
@@ -21,5 +21,8 @@
 // RUN: %clang --driver-mode=g++ -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
 // RUN: %clang -shared --driver-mode=g++ -v -target i686-pc-windows-gnu -rtlib=platform -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
 
+// CHECK_SHARED: "--shared"
+// CHECK_BSTATIC: "-Bstatic"
+// CHECK_BDYNAMIC: "-Bdynamic"
 // CHECK_STATIC: "-lgcc" "-lgcc_eh"
 // CHECK_DYNAMIC: "-lgcc_s" "-lgcc"
diff --git a/test/Driver/mingw-msvcrt.c b/test/Driver/mingw-msvcrt.c
index b7c6584..f83a8c2 100644
--- a/test/Driver/mingw-msvcrt.c
+++ b/test/Driver/mingw-msvcrt.c
@@ -2,4 +2,5 @@
 // RUN: %clang -v -target i686-pc-windows-gnu -lmsvcr120 -### %s 2>&1 | FileCheck -check-prefix=CHECK_MSVCR120 %s
 
 // CHECK_DEFAULT: "-lmingwex" "-lmsvcrt" "-ladvapi32"
-// CHECK_MSVCR120: "-lmingwex" "-ladvapi32"
+// CHECK_MSVCR120: "-lmsvcr120"
+// CHECK_MSVCR120-SAME: "-lmingwex" "-ladvapi32"
diff --git a/test/Driver/mingw-useld.c b/test/Driver/mingw-useld.c
deleted file mode 100644
index a0ab5a9..0000000
--- a/test/Driver/mingw-useld.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// RUN: %clang -### -target i686-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s 2>&1 | FileCheck -check-prefix=CHECK_LD_32 %s
-// CHECK_LD_32: ld{{(.exe)?}}"
-// CHECK_LD_32: "i386pe"
-// CHECK_LD_32-NOT: "-flavor" "gnu"
-
-// RUN: %clang -### -target i686-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_32 %s
-// CHECK_LLD_32-NOT: invalid linker name in argument
-// CHECK_LLD_32: lld{{(.exe)?}}" "-flavor" "gnu"
-// CHECK_LLD_32: "i386pe"
-
-// RUN: %clang -### -target x86_64-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_64 %s
-// CHECK_LLD_64-NOT: invalid linker name in argument
-// CHECK_LLD_64: lld{{(.exe)?}}" "-flavor" "gnu"
-// CHECK_LLD_64: "i386pep"
-
-// RUN: %clang -### -target arm-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_ARM %s
-// CHECK_LLD_ARM-NOT: invalid linker name in argument
-// CHECK_LLD_ARM: lld{{(.exe)?}}" "-flavor" "gnu"
-// CHECK_LLD_ARM: "thumb2pe"
diff --git a/test/Driver/mips-features.c b/test/Driver/mips-features.c
index a9db0f1..d9bde7d 100644
--- a/test/Driver/mips-features.c
+++ b/test/Driver/mips-features.c
@@ -402,3 +402,9 @@
 // RUN: %clang -target -mips-mti-linux-gnu -### -c %s -mno-branch-likely 2>&1 \
 // RUN:   | FileCheck --check-prefix=NO-BRANCH-LIKELY %s
 // NO-BRANCH-LIKELY: argument unused during compilation: '-mno-branch-likely'
+
+// -mindirect-jump=hazard
+// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \
+// RUN:        -mindirect-jump=hazard 2>&1 \
+// RUN:   | FileCheck --check-prefix=INDIRECT-BH %s
+// INDIRECT-BH: "-target-feature" "+use-indirect-jump-hazard"
diff --git a/test/Driver/mips-indirect-branch.c b/test/Driver/mips-indirect-branch.c
new file mode 100644
index 0000000..64e85d5
--- /dev/null
+++ b/test/Driver/mips-indirect-branch.c
@@ -0,0 +1,23 @@
+// REQUIRES: mips-registered-target
+// -mindirect-jump=hazard -mips32
+// RUN: %clang -target mips-unknown-linux-gnu -mips32 -### -c %s \
+// RUN:        -mindirect-jump=hazard 2>&1 | FileCheck %s --check-prefix=MIPS32
+// MIPS32: error: '-mindirect-jump=hazard' is unsupported with the 'mips32' architecture
+
+// -mindirect-jump=hazard -mmicromips
+// RUN: %clang -target mips-unknown-linux-gnu -mmicromips -### -c %s \
+// RUN:        -mindirect-jump=hazard 2>&1 | FileCheck %s --check-prefix=MICROMIPS
+// MICROMIPS: error: '-mindirect-jump=hazard' is unsupported with the 'micromips' architecture
+
+// -mindirect-jump=hazard -mips16
+// RUN: %clang -target mips-unknown-linux-gnu -mips16 -### -c %s \
+// RUN:        -mindirect-jump=hazard 2>&1 | FileCheck %s --check-prefix=MIPS16
+// MIPS16: error: '-mindirect-jump=hazard' is unsupported with the 'mips16' architecture
+
+// RUN: %clang -target mips-unknown-linux-gnu  -### -c %s \
+// RUN:        -mindirect-jump=retopline 2>&1 | FileCheck %s --check-prefix=RETOPLINE
+// RETOPLINE: error: unknown '-mindirect-jump=' option 'retopline'
+
+// RUN: %clang -target mips-unknown-linux-gnu  -### -mips32 -c %s \
+// RUN:        -mindirect-jump=retopline 2>&1 | FileCheck %s --check-prefix=MIXED
+// MIXED: error: unknown '-mindirect-jump=' option 'retopline'
diff --git a/test/Driver/mprefer-vector-width.c b/test/Driver/mprefer-vector-width.c
new file mode 100644
index 0000000..d927b35
--- /dev/null
+++ b/test/Driver/mprefer-vector-width.c
@@ -0,0 +1,24 @@
+////
+//// Verify that valid options for the -mprefer-vector-width flag are passed through and invalid options cause an error.
+////
+
+//// If there are no options, convert to 'all'.
+
+// RUN: %clang -### -S %s -mprefer-vector-width=none  2>&1 | FileCheck --check-prefix=WIDTHNONE %s
+// WIDTHNONE: "-mprefer-vector-width=none"
+
+//// Check options that cover all types.
+
+// RUN: %clang -### -S %s -mprefer-vector-width=128  2>&1 | FileCheck --check-prefix=WIDTH128 %s
+// WIDTH128: "-mprefer-vector-width=128"
+
+//// Check invalid parameters.
+
+// RUN: %clang -### -S %s -mprefer-vector-width=one  2>&1 | FileCheck --check-prefix=WIDTHONE %s
+// WIDTHONE: invalid value 'one' in 'mprefer-vector-width='
+
+// RUN: %clang -### -S %s -mprefer-vector-width=128.5  2>&1 | FileCheck --check-prefix=WIDTH128p5 %s
+// WIDTH128p5: invalid value '128.5' in 'mprefer-vector-width='
+
+// RUN: %clang -### -S %s -mprefer-vector-width=-128  2>&1 | FileCheck --check-prefix=WIDTHNEG128 %s
+// WIDTHNEG128: invalid value '-128' in 'mprefer-vector-width='
diff --git a/test/Driver/myriad-toolchain.c b/test/Driver/myriad-toolchain.c
index 7bd215b..215a02f 100644
--- a/test/Driver/myriad-toolchain.c
+++ b/test/Driver/myriad-toolchain.c
@@ -1,19 +1,19 @@
-// RUN: %clang -no-canonical-prefixes -### -target sparc-myriad-rtems-elf %s \
+// RUN: %clang -no-canonical-prefixes -### -target sparc-myriad-rtems %s \
 // RUN: -ccc-install-dir %S/Inputs/basic_myriad_tree/bin \
 // RUN: --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=LINK_WITH_RTEMS
 // LINK_WITH_RTEMS: Inputs{{.*}}crti.o
 // LINK_WITH_RTEMS: Inputs{{.*}}crtbegin.o
-// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2"
-// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/bin/../sparc-myriad-elf/lib"
+// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0"
+// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/bin/../sparc-myriad-rtems/lib"
 // LINK_WITH_RTEMS: "--start-group" "-lc" "-lgcc" "-lrtemscpu" "-lrtemsbsp" "--end-group"
 // LINK_WITH_RTEMS: Inputs{{.*}}crtend.o
 // LINK_WITH_RTEMS: Inputs{{.*}}crtn.o
 
-// RUN: %clang -c -no-canonical-prefixes -### -target sparc-myriad-rtems-elf -x c++ %s \
+// RUN: %clang -c -no-canonical-prefixes -### -target sparc-myriad-rtems -x c++ %s \
 // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=COMPILE_CXX
-// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2"
-// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2/sparc-myriad-elf"
-// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2/backward"
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0"
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0/sparc-myriad-rtems"
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-rtems/6.3.0/../../../../sparc-myriad-rtems/include/c++/6.3.0/backward"
 
 // RUN: %clang -### -E -target sparc-myriad --sysroot=/yow %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=SLASH_INCLUDE
@@ -41,7 +41,7 @@
 // RUN:   | FileCheck %s -check-prefix=MOVICOMPILE
 // MOVICOMPILE: moviCompile{{(.exe)?}}" "-S" "-fno-exceptions" "-DMYRIAD2" "-mcpu=myriad2.2" "-isystem" "somewhere" "-I" "common"
 // MOVICOMPILE: moviAsm{{(.exe)?}}" "-no6thSlotCompression" "-cv:myriad2.2" "-noSPrefixing" "-a"
-// MOVICOMPILE: "-yippee" "-i:somewhere" "-i:common" "-elf"
+// MOVICOMPILE: "-yippee" "-i:somewhere" "-i:common"
 
 // RUN: %clang -target shave-myriad -c -### %s -DEFINE_ME -UNDEFINE_ME 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=DEFINES
@@ -85,6 +85,6 @@
 // RUN: %clang -### -c -g %s -target sparc-myriad 2>&1 | FileCheck -check-prefix=G_SPARC %s
 // G_SPARC: "-debug-info-kind=limited" "-dwarf-version=2"
 
-// RUN: %clang -### -c %s -target sparc-myriad-elf -fuse-init-array 2>&1 \
+// RUN: %clang -### -c %s -target sparc-myriad-rtems -fuse-init-array 2>&1 \
 // RUN: | FileCheck -check-prefix=USE-INIT-ARRAY %s
 // USE-INIT-ARRAY-NOT: argument unused
diff --git a/test/Driver/objc-weak.m b/test/Driver/objc-weak.m
index 68ae26e..85cfcd8 100644
--- a/test/Driver/objc-weak.m
+++ b/test/Driver/objc-weak.m
@@ -1,27 +1,27 @@
 // Check miscellaneous Objective-C options.
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK
+// RUN: %clang -target x86_64-apple-macosx10.7 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK
+// RUN: %clang -target x86_64-apple-macosx10.7 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK
 // ARC-WEAK: -fobjc-arc
 // ARC-WEAK: -fobjc-weak
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-arc -fno-objc-weak 2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak -fno-objc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macos10.7 -S -### %s -fobjc-arc -fno-objc-weak 2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macos10.7 -S -### %s -fobjc-weak -fno-objc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
 // ARC-NO-WEAK: -fobjc-arc
 // ARC-NO-WEAK: -fno-objc-weak
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK-NOTSUPPORTED
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK-NOTSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx10.5 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK-NOTSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx10.5 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK-NOTSUPPORTED
 // ARC-WEAK-NOTSUPPORTED: error: -fobjc-weak is not supported on the current deployment target
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
+// RUN: %clang -target x86_64-apple-macos10.7 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
+// RUN: %clang -target x86_64-apple-macos10.7 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
 // MRC-WEAK: -fobjc-weak
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macosx10.7 -S -### %s -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macosx10.7 -S -### %s -fobjc-weak -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
 // MRC-NO-WEAK: -fno-objc-weak
 
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-NOTSUPPORTED
-// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-NOTSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx10.5 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-NOTSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx10.5 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-NOTSUPPORTED
 // MRC-WEAK-NOTSUPPORTED: error: -fobjc-weak is not supported on the current deployment target
diff --git a/test/Driver/opencl.cl b/test/Driver/opencl.cl
index d68d424..8c421be 100644
--- a/test/Driver/opencl.cl
+++ b/test/Driver/opencl.cl
@@ -13,6 +13,7 @@
 // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
 // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
 // RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
+// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
 // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
 // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
 
@@ -31,6 +32,7 @@
 // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"
 // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero"
 // CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
+// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"
 // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
 // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'
 
diff --git a/test/Driver/openmp-offload-gpu.c b/test/Driver/openmp-offload-gpu.c
index abef552..16c321b 100644
--- a/test/Driver/openmp-offload-gpu.c
+++ b/test/Driver/openmp-offload-gpu.c
@@ -10,7 +10,8 @@
 /// ###########################################################################
 
 /// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
+// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
 
 // CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
@@ -19,7 +20,9 @@
 /// ###########################################################################
 
 /// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
+// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp \
+// RUN:          -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
+// RUN:          -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
 
 // CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
@@ -28,46 +31,73 @@
 /// ###########################################################################
 
 /// Check cubin file generation and usage by nvlink
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-CUBIN %s
+// RUN:   %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN:          -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
 
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin"
-
+// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
 
 /// ###########################################################################
 
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
+/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
+// RUN:   touch %t.s
+// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -no-canonical-prefixes -save-temps %t.s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
 
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin"
+/// Use DAG to ensure that assembly file has been unbundled.
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
+
+/// ###########################################################################
+
+/// Check cubin file generation and bundling
+// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -no-canonical-prefixes -save-temps %s -c 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
+
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
+
+/// ###########################################################################
+
+/// Check cubin file unbundling and usage by nvlink
+// RUN:   touch %t.o
+// RUN:   %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          -no-canonical-prefixes -save-temps %t.o 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
+
+/// Use DAG to ensure that cubin file has been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
 
 /// ###########################################################################
 
 /// Check cubin file generation and usage by nvlink
 // RUN:   touch %t1.o
 // RUN:   touch %t2.o
-// RUN:   %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN:   %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-TWOCUBIN %s
 
 // CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
 
 /// ###########################################################################
 
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN:   touch %t1.o
-// RUN:   touch %t2.o
-// RUN:   %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
-
-// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
-
-/// ###########################################################################
-
 /// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -no-canonical-prefixes %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
@@ -77,7 +107,8 @@
 /// ###########################################################################
 
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
+// RUN:          -save-temps -no-canonical-prefixes %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
 
 // CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
@@ -86,7 +117,8 @@
 
 /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
 /// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
+// RUN:          -save-temps -no-canonical-prefixes %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-PTXAS-RELO %s
 
 // CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
diff --git a/test/Driver/opt-record.c b/test/Driver/opt-record.c
index 5e8a28f..7b4ec48 100644
--- a/test/Driver/opt-record.c
+++ b/test/Driver/opt-record.c
@@ -9,6 +9,8 @@
 // RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
 // RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
 // RUN: %clang -### -S -o FOO -fsave-optimization-record -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
+// RUN: %clang -### -S -o FOO -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
+// RUN: %clang -### -S -o FOO -foptimization-record-file=BAR.txt -fno-save-optimization-record %s 2>&1 | FileCheck %s --check-prefix=CHECK-FOPT-DISABLE
 
 // CHECK: "-cc1"
 // CHECK: "-opt-record-file" "FOO.opt.yaml"
@@ -20,3 +22,4 @@
 // CHECK-EQ: "-cc1"
 // CHECK-EQ: "-opt-record-file" "BAR.txt"
 
+// CHECK-FOPT-DISABLE-NOT: "-fno-save-optimization-record"
diff --git a/test/Driver/output-file-cleanup.c b/test/Driver/output-file-cleanup.c
index 74d64b0..e3f2bdb 100644
--- a/test/Driver/output-file-cleanup.c
+++ b/test/Driver/output-file-cleanup.c
@@ -41,18 +41,3 @@
 // RUN: not %clang -S %t-dir/1.c %t-dir/2.c
 // RUN: test -f %t-dir/1.s
 // RUN: test ! -f %t-dir/2.s
-
-// When given multiple .c files to compile, clang compiles them in order until
-// it hits an error, at which point it stops.
-//
-// RUN: touch %t-dir/1.c
-// RUN: echo "invalid C code" > %t-dir/2.c
-// RUN: touch %t-dir/3.c
-// RUN: echo "invalid C code" > %t-dir/4.c
-// RUN: touch %t-dir/5.c
-// RUN: not %clang -S %t-dir/1.c %t-dir/2.c %t-dir/3.c %t-dir/4.c %t-dir/5.c
-// RUN: test -f %t-dir/1.s
-// RUN: test ! -f %t-dir/2.s
-// RUN: test ! -f %t-dir/3.s
-// RUN: test ! -f %t-dir/4.s
-// RUN: test ! -f %t-dir/5.s
diff --git a/test/Driver/pic.c b/test/Driver/pic.c
index 6b01c58..1b4257c 100644
--- a/test/Driver/pic.c
+++ b/test/Driver/pic.c
@@ -152,6 +152,22 @@
 // RUN: %clang %s -target i386-unknown-linux -shared -pie -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIE
 //
+// On Musl Linux, PIE is enabled by default, but can be disabled.
+// RUN: %clang -c %s -target x86_64-linux-musl -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target i686-linux-musl -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target armv6-linux-musleabihf -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target armv7-linux-musleabihf -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang %s -target x86_64-linux-musl -nopie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIE
+// RUN: %clang %s -target x86_64-linux-musl -pie -nopie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIE
+// RUN: %clang %s -target x86_64-linux-musl -nopie -pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
 // Darwin is a beautiful and unique snowflake when it comes to these flags.
 // When targeting a 32-bit darwin system, only level 2 is supported. On 64-bit
 // targets, there is simply nothing you can do, there is no PIE, there is only
@@ -205,19 +221,19 @@
 //
 // Checks for ARM+Apple+IOS including -fapple-kext, -mkernel, and iphoneos
 // version boundaries.
-// RUN: %clang -c %s -target armv7-apple-ios -fapple-kext -miphoneos-version-min=6.0.0 -### 2>&1 \
+// RUN: %clang -c %s -target armv7-apple-ios6 -fapple-kext -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
-// RUN: %clang -c %s -target armv7-apple-ios -mkernel -miphoneos-version-min=6.0.0 -### 2>&1 \
+// RUN: %clang -c %s -target armv7-apple-ios6 -mkernel -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
-// RUN: %clang -c %s -target arm64-apple-ios -mkernel -miphoneos-version-min=7.0.0 -### 2>&1 \
+// RUN: %clang -c %s -target arm64-apple-ios7 -mkernel -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
-// RUN: %clang -x assembler -c %s -target arm64-apple-ios -mkernel -miphoneos-version-min=7.0.0 -no-integrated-as -### 2>&1 \
+// RUN: %clang -x assembler -c %s -target arm64-apple-ios7 -mkernel -no-integrated-as -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-STATIC
-// RUN: %clang -c %s -target armv7k-apple-watchos -fapple-kext -mwatchos-version-min=1.0.0 -### 2>&1 \
+// RUN: %clang -c %s -target armv7k-apple-watchos1 -fapple-kext -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
-// RUN: %clang -c %s -target armv7-apple-ios -fapple-kext -miphoneos-version-min=5.0.0 -### 2>&1 \
+// RUN: %clang -c %s -target armv7-apple-ios5 -fapple-kext -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
-// RUN: %clang -c %s -target armv7-apple-ios -fapple-kext -miphoneos-version-min=6.0.0 -static -### 2>&1 \
+// RUN: %clang -c %s -target armv7-apple-ios6 -fapple-kext -static -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
 // RUN: %clang -c %s -target armv7-apple-unknown-macho -static -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
@@ -251,17 +267,54 @@
 // RUN: %clang %s -target i386-pc-openbsd -no-pie -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NOPIE-LD
 //
-// On Android PIC is enabled by default
+// On Android PIC is enabled by default, and PIE is enabled by default starting
+// with API16.
 // RUN: %clang -c %s -target i686-linux-android -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target i686-linux-android14 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target i686-linux-android16 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target i686-linux-android24 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
 // RUN: %clang -c %s -target arm-linux-androideabi -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN: %clang -c %s -target arm-linux-androideabi14 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN: %clang -c %s -target arm-linux-androideabi16 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target arm-linux-androideabi24 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
 // RUN: %clang -c %s -target mipsel-linux-android -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN: %clang -c %s -target mipsel-linux-android14 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN: %clang -c %s -target mipsel-linux-android16 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target mipsel-linux-android24 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
+// 64-bit Android targets are always PIE.
 // RUN: %clang -c %s -target aarch64-linux-android -### 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target aarch64-linux-android24 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
 // RUN: %clang -c %s -target arm64-linux-android -### 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
+// Default value of PIE can be overwritten, even on 64-bit targets.
+// RUN: %clang -c %s -target arm-linux-androideabi -fPIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target i686-linux-android14 -fPIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target i686-linux-android16 -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target aarch64-linux-android -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target aarch64-linux-android24 -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
 //
 // On Windows-X64 PIC is enabled by default
 // RUN: %clang -c %s -target x86_64-pc-windows-msvc18.0.0 -### 2>&1 \
diff --git a/test/Driver/riscv-abi.c b/test/Driver/riscv-abi.c
new file mode 100644
index 0000000..8b79c846
--- /dev/null
+++ b/test/Driver/riscv-abi.c
@@ -0,0 +1,47 @@
+// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ILP32 %s
+// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o -mabi=ilp32 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ILP32 %s
+
+// CHECK-ILP32: "-target-abi" "ilp32"
+
+// TODO: ilp32f support.
+// RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=ilp32f 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ILP32F %s
+
+// CHECK-ILP32F: error: unknown target ABI 'ilp32f'
+
+// TODO: ilp32d support.
+// RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=ilp32d 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ILP32D %s
+
+// CHECK-ILP32D: error: unknown target ABI 'ilp32d'
+
+// RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=lp64 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-RV32-LP64 %s
+
+// CHECK-RV32-LP64: error: unknown target ABI 'lp64'
+
+// RUN: %clang -target riscv64-unknown-elf %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LP64 %s
+// RUN: %clang -target riscv64-unknown-elf %s -### -o %t.o -mabi=lp64 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LP64 %s
+
+// CHECK-LP64: "-target-abi" "lp64"
+
+// TODO: lp64f support.
+// RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=lp64f 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LP64F %s
+
+// CHECK-LP64F: error: unknown target ABI 'lp64f'
+
+// TODO: lp64d support.
+// RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=lp64d 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LP64D %s
+
+// CHECK-LP64D: error: unknown target ABI 'lp64d'
+
+// RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=ilp32 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-RV64-ILP32 %s
+
+// CHECK-RV64-ILP32: error: unknown target ABI 'ilp32'
diff --git a/test/Driver/riscv-features.c b/test/Driver/riscv-features.c
new file mode 100644
index 0000000..565596c
--- /dev/null
+++ b/test/Driver/riscv-features.c
@@ -0,0 +1,4 @@
+// RUN: %clang -target riscv32-unknown-elf -### %s -fsyntax-only 2>&1 | FileCheck %s
+// RUN: %clang -target riscv64-unknown-elf -### %s -fsyntax-only 2>&1 | FileCheck %s
+
+// CHECK: fno-signed-char
diff --git a/test/Driver/riscv-gnutools.c b/test/Driver/riscv-gnutools.c
new file mode 100644
index 0000000..afcb505
--- /dev/null
+++ b/test/Driver/riscv-gnutools.c
@@ -0,0 +1,19 @@
+// Check gnutools are invoked with propagated values for -mabi and -march.
+
+// RUN: %clang -target riscv32 -fno-integrated-as %s -###  -c \
+// RUN: 2>&1 | FileCheck -check-prefix=MABI-ILP32 %s
+
+// RUN: %clang -target riscv32 -fno-integrated-as -march=rv32g %s -### -c \
+// RUN: 2>&1 | FileCheck -check-prefix=MABI-ILP32-MARCH-G %s
+
+// RUN: %clang -target riscv64 -fno-integrated-as %s -###  -c \
+// RUN: 2>&1 | FileCheck -check-prefix=MABI-ILP64 %s
+
+// RUN: %clang -target riscv64 -fno-integrated-as -march=rv64g %s -### -c \
+// RUN: 2>&1 | FileCheck -check-prefix=MABI-ILP64-MARCH-G %s
+
+// MABI-ILP32: "{{.*}}as{{(.exe)?}}" "-mabi" "ilp32"
+// MABI-ILP32-MARCH-G: "{{.*}}as{{(.exe)?}}" "-mabi" "ilp32" "-march" "rv32g"
+
+// MABI-ILP64: "{{.*}}as{{(.exe)?}}" "-mabi" "lp64"
+// MABI-ILP64-MARCH-G: "{{.*}}as{{(.exe)?}}" "-mabi" "lp64" "-march" "rv64g"
diff --git a/test/Driver/riscv32-toolchain.c b/test/Driver/riscv32-toolchain.c
new file mode 100644
index 0000000..36ba4b6
--- /dev/null
+++ b/test/Driver/riscv32-toolchain.c
@@ -0,0 +1,122 @@
+// A basic clang -cc1 command-line, and simple environment check.
+
+// RUN: %clang %s -### -no-canonical-prefixes -target riscv32 2>&1 | FileCheck -check-prefix=CC1 %s
+// CC1: clang{{.*}} "-cc1" "-triple" "riscv32"
+
+
+// RUN: %clang %s -### -no-canonical-prefixes -fuse-ld=ld \
+// RUN:   -target riscv32-linux-unknown-elf \
+// RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
+// RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
+// RUN:   | FileCheck -check-prefix=CC1-RV32-LINUX-ILP32 %s
+
+// CC1-RV32-LINUX-ILP32: "{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/../../../../riscv64-unknown-linux-gnu/bin{{/|\\\\}}ld"
+// CC1-RV32-LINUX-ILP32: "--sysroot={{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot"
+// CC1-RV32-LINUX-ILP32: "-m" "elf32lriscv"
+// CC1-RV32-LINUX-ILP32: "-dynamic-linker" "/lib/ld-linux-riscv32-ilp32.so.1"
+// CC1-RV32-LINUX-ILP32: "{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32{{/|\\\\}}crtbegin.o"
+// CC1-RV32-LINUX-ILP32: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32"
+// CC1-RV32-LINUX-ILP32: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32"
+// CC1-RV32-LINUX-ILP32: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32"
+
+// RUN: %clang %s -### -no-canonical-prefixes -fuse-ld=ld \
+// RUN:   -target riscv32-linux-unknown-elf -march=rv32imafd -mabi=ilp32d \
+// RUN:   --gcc-toolchain=%S/Inputs/multilib_riscv_linux_sdk \
+// RUN:   --sysroot=%S/Inputs/multilib_riscv_linux_sdk/sysroot 2>&1 \
+// RUN:   | FileCheck -check-prefix=CC1-RV32-LINUX-ILP32D %s
+
+// CC1-RV32-LINUX-ILP32D: "{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/../../../../riscv64-unknown-linux-gnu/bin{{/|\\\\}}ld"
+// CC1-RV32-LINUX-ILP32D: "--sysroot={{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot"
+// CC1-RV32-LINUX-ILP32D: "-m" "elf32lriscv"
+// CC1-RV32-LINUX-ILP32D: "-dynamic-linker" "/lib/ld-linux-riscv32-ilp32d.so.1"
+// CC1-RV32-LINUX-ILP32D: "{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32d{{/|\\\\}}crtbegin.o"
+// CC1-RV32-LINUX-ILP32D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/lib/gcc/riscv64-unknown-linux-gnu/7.2.0/lib32/ilp32d"
+// CC1-RV32-LINUX-ILP32D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/lib32/ilp32d"
+// CC1-RV32-LINUX-ILP32D: "-L{{.*}}/Inputs/multilib_riscv_linux_sdk/sysroot/usr/lib32/ilp32d"
+
+// RUN: %clang -target riscv32 %s -emit-llvm -S -o - | FileCheck %s
+
+typedef __builtin_va_list va_list;
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+// CHECK: @align_c = dso_local global i32 1
+int align_c = __alignof(char);
+
+// CHECK: @align_s = dso_local global i32 2
+int align_s = __alignof(short);
+
+// CHECK: @align_i = dso_local global i32 4
+int align_i = __alignof(int);
+
+// CHECK: @align_wc = dso_local global i32 4
+int align_wc = __alignof(wchar_t);
+
+// CHECK: @align_l = dso_local global i32 4
+int align_l = __alignof(long);
+
+// CHECK: @align_ll = dso_local global i32 8
+int align_ll = __alignof(long long);
+
+// CHECK: @align_p = dso_local global i32 4
+int align_p = __alignof(void*);
+
+// CHECK: @align_f = dso_local global i32 4
+int align_f = __alignof(float);
+
+// CHECK: @align_d = dso_local global i32 8
+int align_d = __alignof(double);
+
+// CHECK: @align_ld = dso_local global i32 16
+int align_ld = __alignof(long double);
+
+// CHECK: @align_vl = dso_local global i32 4
+int align_vl = __alignof(va_list);
+
+// Check types
+
+// CHECK: zeroext i8 @check_char()
+char check_char() { return 0; }
+
+// CHECK: define dso_local signext i16 @check_short()
+short check_short() { return 0; }
+
+// CHECK: define dso_local i32 @check_int()
+int check_int() { return 0; }
+
+// CHECK: define dso_local i32 @check_wchar_t()
+int check_wchar_t() { return 0; }
+
+// CHECK: define dso_local i32 @check_long()
+long check_long() { return 0; }
+
+// CHECK: define dso_local i64 @check_longlong()
+long long check_longlong() { return 0; }
+
+// CHECK: define dso_local zeroext i8 @check_uchar()
+unsigned char check_uchar() { return 0; }
+
+// CHECK: define dso_local zeroext i16 @check_ushort()
+unsigned short check_ushort() { return 0; }
+
+// CHECK: define dso_local i32 @check_uint()
+unsigned int check_uint() { return 0; }
+
+// CHECK: define dso_local i32 @check_ulong()
+unsigned long check_ulong() { return 0; }
+
+// CHECK: define dso_local i64 @check_ulonglong()
+unsigned long long check_ulonglong() { return 0; }
+
+// CHECK: define dso_local i32 @check_size_t()
+size_t check_size_t() { return 0; }
+
+// CHECK: define dso_local float @check_float()
+float check_float() { return 0; }
+
+// CHECK: define dso_local double @check_double()
+double check_double() { return 0; }
+
+// CHECK: define dso_local fp128 @check_longdouble()
+long double check_longdouble() { return 0; }
diff --git a/test/Driver/riscv64-toolchain.c b/test/Driver/riscv64-toolchain.c
new file mode 100644
index 0000000..44dcc93
--- /dev/null
+++ b/test/Driver/riscv64-toolchain.c
@@ -0,0 +1,91 @@
+// A basic clang -cc1 command-line, and simple environment check.
+
+// RUN: %clang %s -### -no-canonical-prefixes -target riscv64 2>&1 | FileCheck -check-prefix=CC1 %s
+// CC1: clang{{.*}} "-cc1" "-triple" "riscv64"
+
+// RUN: %clang -target riscv64 %s -emit-llvm -S -o - | FileCheck %s
+
+typedef __builtin_va_list va_list;
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+// CHECK: @align_c = dso_local global i32 1
+int align_c = __alignof(char);
+
+// CHECK: @align_s = dso_local global i32 2
+int align_s = __alignof(short);
+
+// CHECK: @align_i = dso_local global i32 4
+int align_i = __alignof(int);
+
+// CHECK: @align_wc = dso_local global i32 4
+int align_wc = __alignof(wchar_t);
+
+// CHECK: @align_l = dso_local global i32 8
+int align_l = __alignof(long);
+
+// CHECK: @align_ll = dso_local global i32 8
+int align_ll = __alignof(long long);
+
+// CHECK: @align_p = dso_local global i32 8
+int align_p = __alignof(void*);
+
+// CHECK: @align_f = dso_local global i32 4
+int align_f = __alignof(float);
+
+// CHECK: @align_d = dso_local global i32 8
+int align_d = __alignof(double);
+
+// CHECK: @align_ld = dso_local global i32 16
+int align_ld = __alignof(long double);
+
+// CHECK: @align_vl = dso_local global i32 8
+int align_vl = __alignof(va_list);
+
+// Check types
+
+// CHECK: define dso_local zeroext i8 @check_char()
+char check_char() { return 0; }
+
+// CHECK: define dso_local signext i16 @check_short()
+short check_short() { return 0; }
+
+// CHECK: define dso_local signext i32 @check_int()
+int check_int() { return 0; }
+
+// CHECK: define dso_local signext i32 @check_wchar_t()
+int check_wchar_t() { return 0; }
+
+// CHECK: define dso_local i64 @check_long()
+long check_long() { return 0; }
+
+// CHECK: define dso_local i64 @check_longlong()
+long long check_longlong() { return 0; }
+
+// CHECK: define dso_local zeroext i8 @check_uchar()
+unsigned char check_uchar() { return 0; }
+
+// CHECK: define dso_local zeroext i16 @check_ushort()
+unsigned short check_ushort() { return 0; }
+
+// CHECK: define dso_local signext i32 @check_uint()
+unsigned int check_uint() { return 0; }
+
+// CHECK: define dso_local i64 @check_ulong()
+unsigned long check_ulong() { return 0; }
+
+// CHECK: define dso_local i64 @check_ulonglong()
+unsigned long long check_ulonglong() { return 0; }
+
+// CHECK: define dso_local i64 @check_size_t()
+size_t check_size_t() { return 0; }
+
+// CHECK: define dso_local float @check_float()
+float check_float() { return 0; }
+
+// CHECK: define dso_local double @check_double()
+double check_double() { return 0; }
+
+// CHECK: define dso_local fp128 @check_longdouble()
+long double check_longdouble() { return 0; }
diff --git a/test/Driver/sanitize_unwind_tables.c b/test/Driver/sanitize_unwind_tables.c
index b78843e..e74c158 100644
--- a/test/Driver/sanitize_unwind_tables.c
+++ b/test/Driver/sanitize_unwind_tables.c
@@ -9,5 +9,7 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=dataflow %s -### 2>&1 |  FileCheck %s
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=efficiency-cache-frag %s -### 2>&1 |  FileCheck %s
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=efficiency-working-set %s -### 2>&1 |  FileCheck %s
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=hwaddress %s -### 2>&1 |  FileCheck %s
+// RUN: %clang -target aarch64-linux-android -fsanitize=hwaddress %s -### 2>&1 |  FileCheck %s
 
 // CHECK: -munwind-tables
diff --git a/test/Driver/sanitizer-ld.c b/test/Driver/sanitizer-ld.c
index 68eab30..107cf9d 100644
--- a/test/Driver/sanitizer-ld.c
+++ b/test/Driver/sanitizer-ld.c
@@ -9,9 +9,9 @@
 // CHECK-ASAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-LINUX-NOT: "-lc"
 // CHECK-ASAN-LINUX: libclang_rt.asan-i386.a"
-// CHECK-ASAN-LINUX-NOT: "-export-dynamic"
+// CHECK-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-ASAN-LINUX: "--dynamic-list={{.*}}libclang_rt.asan-i386.a.syms"
-// CHECK-ASAN-LINUX-NOT: "-export-dynamic"
+// CHECK-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-ASAN-LINUX: "-lpthread"
 // CHECK-ASAN-LINUX: "-lrt"
 // CHECK-ASAN-LINUX: "-ldl"
@@ -39,11 +39,11 @@
 // CHECK-SHARED-ASAN-LINUX-NOT: "-lc"
 // CHECK-SHARED-ASAN-LINUX-NOT: libclang_rt.asan-i386.a"
 // CHECK-SHARED-ASAN-LINUX: libclang_rt.asan-i386.so"
-// CHECK-SHARED-ASAN-LINUX: "-whole-archive" "{{.*}}libclang_rt.asan-preinit-i386.a" "-no-whole-archive"
+// CHECK-SHARED-ASAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.asan-preinit-i386.a" "--no-whole-archive"
 // CHECK-SHARED-ASAN-LINUX-NOT: "-lpthread"
 // CHECK-SHARED-ASAN-LINUX-NOT: "-lrt"
 // CHECK-SHARED-ASAN-LINUX-NOT: "-ldl"
-// CHECK-SHARED-ASAN-LINUX-NOT: "-export-dynamic"
+// CHECK-SHARED-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-SHARED-ASAN-LINUX-NOT: "--dynamic-list"
 
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
@@ -60,7 +60,7 @@
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "-lpthread"
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "-lrt"
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "-ldl"
-// CHECK-DSO-SHARED-ASAN-LINUX-NOT: "-export-dynamic"
+// CHECK-DSO-SHARED-ASAN-LINUX-NOT: "--export-dynamic"
 // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "--dynamic-list"
 
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
@@ -75,7 +75,7 @@
 // CHECK-ASAN-FREEBSD: freebsd{{/|\\+}}libclang_rt.asan-i386.a"
 // CHECK-ASAN-FREEBSD-NOT: libclang_rt.asan_cxx
 // CHECK-ASAN-FREEBSD-NOT: "--dynamic-list"
-// CHECK-ASAN-FREEBSD: "-export-dynamic"
+// CHECK-ASAN-FREEBSD: "--export-dynamic"
 // CHECK-ASAN-FREEBSD: "-lpthread"
 // CHECK-ASAN-FREEBSD: "-lrt"
 
@@ -96,10 +96,10 @@
 //
 // CHECK-ASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-LINUX-CXX-NOT: "-lc"
-// CHECK-ASAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.asan-i386.a" "-no-whole-archive"
-// CHECK-ASAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "-no-whole-archive"
+// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive"
+// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "--no-whole-archive"
 // CHECK-ASAN-LINUX-CXX-NOT: "--dynamic-list"
-// CHECK-ASAN-LINUX-CXX: "-export-dynamic"
+// CHECK-ASAN-LINUX-CXX: "--export-dynamic"
 // CHECK-ASAN-LINUX-CXX: stdc++
 // CHECK-ASAN-LINUX-CXX: "-lpthread"
 // CHECK-ASAN-LINUX-CXX: "-lrt"
@@ -112,7 +112,7 @@
 //
 // CHECK-ASAN-LINUX-CXX-STATIC: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-LINUX-CXX-STATIC-NOT: stdc++
-// CHECK-ASAN-LINUX-CXX-STATIC: "-whole-archive" "{{.*}}libclang_rt.asan-i386.a" "-no-whole-archive"
+// CHECK-ASAN-LINUX-CXX-STATIC: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive"
 // CHECK-ASAN-LINUX-CXX-STATIC: stdc++
 
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
@@ -140,7 +140,7 @@
 //
 // CHECK-ASAN-ANDROID: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-ANDROID-NOT: "-lc"
-// CHECK-ASAN-ANDROID: "-pie"
+// CHECK-ASAN-ANDROID-NOT: "-pie"
 // CHECK-ASAN-ANDROID-NOT: "-lpthread"
 // CHECK-ASAN-ANDROID: libclang_rt.asan-arm-android.so"
 // CHECK-ASAN-ANDROID-NOT: "-lpthread"
@@ -185,7 +185,7 @@
 //
 // CHECK-ASAN-ANDROID-X86: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ASAN-ANDROID-X86-NOT: "-lc"
-// CHECK-ASAN-ANDROID-X86: "-pie"
+// CHECK-ASAN-ANDROID-X86-NOT: "-pie"
 // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread"
 // CHECK-ASAN-ANDROID-X86: libclang_rt.asan-i686-android.so"
 // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread"
@@ -227,11 +227,11 @@
 //
 // CHECK-TSAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-TSAN-LINUX-CXX-NOT: stdc++
-// CHECK-TSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.tsan-x86_64.a" "-no-whole-archive"
+// CHECK-TSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.tsan-x86_64.a" "--no-whole-archive"
 // CHECK-TSAN-LINUX-CXX: "--dynamic-list={{.*}}libclang_rt.tsan-x86_64.a.syms"
-// CHECK-TSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.tsan_cxx-x86_64.a" "-no-whole-archive"
+// CHECK-TSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.tsan_cxx-x86_64.a" "--no-whole-archive"
 // CHECK-TSAN-LINUX-CXX: "--dynamic-list={{.*}}libclang_rt.tsan_cxx-x86_64.a.syms"
-// CHECK-TSAN-LINUX-CXX-NOT: "-export-dynamic"
+// CHECK-TSAN-LINUX-CXX-NOT: "--export-dynamic"
 // CHECK-TSAN-LINUX-CXX: stdc++
 // CHECK-TSAN-LINUX-CXX: "-lpthread"
 // CHECK-TSAN-LINUX-CXX: "-lrt"
@@ -246,11 +246,11 @@
 //
 // CHECK-MSAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-MSAN-LINUX-CXX-NOT: stdc++
-// CHECK-MSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "-no-whole-archive"
+// CHECK-MSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "--no-whole-archive"
 // CHECK-MSAN-LINUX-CXX: "--dynamic-list={{.*}}libclang_rt.msan-x86_64.a.syms"
-// CHECK-MSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.msan_cxx-x86_64.a" "-no-whole-archive"
+// CHECK-MSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.msan_cxx-x86_64.a" "--no-whole-archive"
 // CHECK-MSAN-LINUX-CXX: "--dynamic-list={{.*}}libclang_rt.msan_cxx-x86_64.a.syms"
-// CHECK-MSAN-LINUX-CXX-NOT: "-export-dynamic"
+// CHECK-MSAN-LINUX-CXX-NOT: "--export-dynamic"
 // CHECK-MSAN-LINUX-CXX: stdc++
 // CHECK-MSAN-LINUX-CXX: "-lpthread"
 // CHECK-MSAN-LINUX-CXX: "-lrt"
@@ -270,7 +270,7 @@
 // CHECK-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}"
 // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan
 // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx
-// CHECK-UBSAN-LINUX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone-i386.a" "-no-whole-archive"
+// CHECK-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan
 // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx
 // CHECK-UBSAN-LINUX-NOT: "-lstdc++"
@@ -302,7 +302,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-LINK-CXX %s
 // CHECK-UBSAN-LINUX-LINK-CXX-NOT: "-lstdc++"
-// CHECK-UBSAN-LINUX-LINK-CXX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone_cxx-i386.a" "-no-whole-archive"
+// CHECK-UBSAN-LINUX-LINK-CXX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone_cxx-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-LINUX-LINK-CXX-NOT: "-lstdc++"
 
 // RUN: %clangxx -fsanitize=undefined %s -### -o %t.o 2>&1 \
@@ -312,9 +312,9 @@
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-LINUX-CXX %s
 // CHECK-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}"
 // CHECK-UBSAN-LINUX-CXX-NOT: libclang_rt.asan
-// CHECK-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone-i386.a" "-no-whole-archive"
+// CHECK-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-LINUX-CXX-NOT: libclang_rt.asan
-// CHECK-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone_cxx-i386.a" "-no-whole-archive"
+// CHECK-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone_cxx-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-LINUX-CXX-NOT: libclang_rt.asan
 // CHECK-UBSAN-LINUX-CXX: "-lstdc++"
 // CHECK-UBSAN-LINUX-CXX-NOT: libclang_rt.asan
@@ -325,7 +325,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-MINIMAL-LINUX %s
 // CHECK-UBSAN-MINIMAL-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-UBSAN-MINIMAL-LINUX: "-whole-archive" "{{.*}}libclang_rt.ubsan_minimal-i386.a" "-no-whole-archive"
+// CHECK-UBSAN-MINIMAL-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_minimal-i386.a" "--no-whole-archive"
 // CHECK-UBSAN-MINIMAL-LINUX: "-lpthread"
 
 // RUN: %clang -fsanitize=undefined -fsanitize-minimal-runtime %s -### -o %t.o 2>&1 \
@@ -347,7 +347,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX %s
 // CHECK-ASAN-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-ASAN-UBSAN-LINUX: "-whole-archive" "{{.*}}libclang_rt.asan-i386.a" "-no-whole-archive"
+// CHECK-ASAN-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive"
 // CHECK-ASAN-UBSAN-LINUX-NOT: libclang_rt.ubsan
 // CHECK-ASAN-UBSAN-LINUX-NOT: "-lstdc++"
 // CHECK-ASAN-UBSAN-LINUX: "-lpthread"
@@ -357,8 +357,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX-CXX %s
 // CHECK-ASAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-ASAN-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.asan-i386.a" "-no-whole-archive"
-// CHECK-ASAN-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "-no-whole-archive"
+// CHECK-ASAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive"
+// CHECK-ASAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "--no-whole-archive"
 // CHECK-ASAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan
 // CHECK-ASAN-UBSAN-LINUX-CXX: "-lstdc++"
 // CHECK-ASAN-UBSAN-LINUX-CXX: "-lpthread"
@@ -368,7 +368,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-MSAN-UBSAN-LINUX-CXX %s
 // CHECK-MSAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-MSAN-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "-no-whole-archive"
+// CHECK-MSAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "--no-whole-archive"
 // CHECK-MSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan
 
 // RUN: %clangxx -fsanitize=thread,undefined %s -### -o %t.o 2>&1 \
@@ -376,7 +376,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-TSAN-UBSAN-LINUX-CXX %s
 // CHECK-TSAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-TSAN-UBSAN-LINUX-CXX: "-whole-archive" "{{.*}}libclang_rt.tsan-x86_64.a" "-no-whole-archive"
+// CHECK-TSAN-UBSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.tsan-x86_64.a" "--no-whole-archive"
 // CHECK-TSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan
 
 // RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
@@ -429,7 +429,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-ASAN-COV-LINUX %s
 // CHECK-ASAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-ASAN-COV-LINUX: "-whole-archive" "{{.*}}libclang_rt.asan-x86_64.a" "-no-whole-archive"
+// CHECK-ASAN-COV-LINUX: "--whole-archive" "{{.*}}libclang_rt.asan-x86_64.a" "--no-whole-archive"
 // CHECK-ASAN-COV-LINUX-NOT: libclang_rt.ubsan
 // CHECK-ASAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-ASAN-COV-LINUX: "-lpthread"
@@ -439,7 +439,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-MSAN-COV-LINUX %s
 // CHECK-MSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-MSAN-COV-LINUX: "-whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "-no-whole-archive"
+// CHECK-MSAN-COV-LINUX: "--whole-archive" "{{.*}}libclang_rt.msan-x86_64.a" "--no-whole-archive"
 // CHECK-MSAN-COV-LINUX-NOT: libclang_rt.ubsan
 // CHECK-MSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-MSAN-COV-LINUX: "-lpthread"
@@ -449,7 +449,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-DFSAN-COV-LINUX %s
 // CHECK-DFSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-DFSAN-COV-LINUX: "-whole-archive" "{{.*}}libclang_rt.dfsan-x86_64.a" "-no-whole-archive"
+// CHECK-DFSAN-COV-LINUX: "--whole-archive" "{{.*}}libclang_rt.dfsan-x86_64.a" "--no-whole-archive"
 // CHECK-DFSAN-COV-LINUX-NOT: libclang_rt.ubsan
 // CHECK-DFSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-DFSAN-COV-LINUX: "-lpthread"
@@ -459,7 +459,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-UBSAN-COV-LINUX %s
 // CHECK-UBSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-UBSAN-COV-LINUX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x86_64.a" "-no-whole-archive"
+// CHECK-UBSAN-COV-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x86_64.a" "--no-whole-archive"
 // CHECK-UBSAN-COV-LINUX-NOT: "-lstdc++"
 // CHECK-UBSAN-COV-LINUX: "-lpthread"
 
@@ -468,7 +468,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-COV-LINUX %s
 // CHECK-COV-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-COV-LINUX: "-whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x86_64.a" "-no-whole-archive"
+// CHECK-COV-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x86_64.a" "--no-whole-archive"
 // CHECK-COV-LINUX-NOT: "-lstdc++"
 // CHECK-COV-LINUX: "-lpthread"
 
@@ -487,7 +487,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-DIAG-LINUX %s
 // CHECK-CFI-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-CFI-DIAG-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.ubsan_standalone-x86_64.a" "-no-whole-archive"
+// CHECK-CFI-DIAG-LINUX: "--whole-archive" "{{[^"]*}}libclang_rt.ubsan_standalone-x86_64.a" "--no-whole-archive"
 
 // Cross-DSO CFI links the CFI runtime.
 // RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
@@ -495,7 +495,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-LINUX %s
 // CHECK-CFI-CROSS-DSO-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-CFI-CROSS-DSO-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.cfi-x86_64.a" "-no-whole-archive"
+// CHECK-CFI-CROSS-DSO-LINUX: "--whole-archive" "{{[^"]*}}libclang_rt.cfi-x86_64.a" "--no-whole-archive"
 // CHECK-CFI-CROSS-DSO-LINUX: -export-dynamic
 
 // Cross-DSO CFI with diagnostics links just the CFI runtime.
@@ -505,7 +505,7 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-LINUX %s
 // CHECK-CFI-CROSS-DSO-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-CFI-CROSS-DSO-DIAG-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.cfi_diag-x86_64.a" "-no-whole-archive"
+// CHECK-CFI-CROSS-DSO-DIAG-LINUX: "--whole-archive" "{{[^"]*}}libclang_rt.cfi_diag-x86_64.a" "--no-whole-archive"
 // CHECK-CFI-CROSS-DSO-DIAG-LINUX: -export-dynamic
 
 // Cross-DSO CFI on Android does not link runtime libraries.
@@ -562,8 +562,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CFI-STATS-LINUX %s
 // CHECK-CFI-STATS-LINUX: "{{.*}}ld{{(.exe)?}}"
-// CHECK-CFI-STATS-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.stats_client-x86_64.a" "-no-whole-archive"
-// CHECK-CFI-STATS-LINUX-NOT: "-whole-archive"
+// CHECK-CFI-STATS-LINUX: "--whole-archive" "{{[^"]*}}libclang_rt.stats_client-x86_64.a" "--no-whole-archive"
+// CHECK-CFI-STATS-LINUX-NOT: "--whole-archive"
 // CHECK-CFI-STATS-LINUX: "{{[^"]*}}libclang_rt.stats-x86_64.a"
 
 // RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \
@@ -644,3 +644,106 @@
 //
 // CHECK-ESAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-ESAN-LINUX: libclang_rt.esan-x86_64.a
+
+// RUN: %clang -fsanitize=scudo %s -### -o %t.o 2>&1 \
+// RUN:     -target i386-unknown-linux -fuse-ld=ld \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SCUDO-LINUX %s
+// CHECK-SCUDO-LINUX: "{{.*}}ld{{(.exe)?}}"
+// CHECK-SCUDO-LINUX: "-pie"
+// CHECK-SCUDO-LINUX: "--whole-archive" "{{.*}}libclang_rt.scudo-i386.a" "--no-whole-archive"
+// CHECK-SCUDO-LINUX-NOT: "-lstdc++"
+// CHECK-SCUDO-LINUX: "-lpthread"
+// CHECK-SCUDO-LINUX: "-ldl"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
+// RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=scudo -shared-libsan \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SCUDO-SHARED-LINUX %s
+//
+// CHECK-SCUDO-SHARED-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "-lc"
+// CHECK-SCUDO-SHARED-LINUX-NOT: libclang_rt.scudo-i386.a"
+// CHECK-SCUDO-SHARED-LINUX: libclang_rt.scudo-i386.so"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "-lpthread"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "-lrt"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "-ldl"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "--export-dynamic"
+// CHECK-SCUDO-SHARED-LINUX-NOT: "--dynamic-list"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
+// RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
+// RUN:   | FileCheck --check-prefix=CHECK-SCUDO-ANDROID %s
+//
+// CHECK-SCUDO-ANDROID: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SCUDO-ANDROID-NOT: "-lc"
+// CHECK-SCUDO-ANDROID: "-pie"
+// CHECK-SCUDO-ANDROID-NOT: "-lpthread"
+// CHECK-SCUDO-ANDROID: libclang_rt.scudo-arm-android.so"
+// CHECK-SCUDO-ANDROID-NOT: "-lpthread"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target arm-linux-androideabi -fuse-ld=ld -fsanitize=scudo \
+// RUN:     --sysroot=%S/Inputs/basic_android_tree/sysroot \
+// RUN:     -static-libsan \
+// RUN:   | FileCheck --check-prefix=CHECK-SCUDO-ANDROID-STATIC %s
+// CHECK-SCUDO-ANDROID-STATIC: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SCUDO-ANDROID-STATIC: "-pie"
+// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo-arm-android.a" "--no-whole-archive"
+// CHECK-SCUDO-ANDROID-STATIC-NOT: "-lstdc++"
+// CHECK-SCUDO-ANDROID-STATIC: "-lpthread"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-HWASAN-LINUX %s
+//
+// CHECK-HWASAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-HWASAN-LINUX-NOT: "-lc"
+// CHECK-HWASAN-LINUX: libclang_rt.hwasan-aarch64.a"
+// CHECK-HWASAN-LINUX-NOT: "--export-dynamic"
+// CHECK-HWASAN-LINUX: "--dynamic-list={{.*}}libclang_rt.hwasan-aarch64.a.syms"
+// CHECK-HWASAN-LINUX-NOT: "--export-dynamic"
+// CHECK-HWASAN-LINUX: "-lpthread"
+// CHECK-HWASAN-LINUX: "-lrt"
+// CHECK-HWASAN-LINUX: "-ldl"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress -shared-libsan \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SHARED-HWASAN-LINUX %s
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress \
+// RUN:     -shared-libsan \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SHARED-HWASAN-LINUX %s
+//
+// CHECK-SHARED-HWASAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "-lc"
+// CHECK-SHARED-HWASAN-LINUX: libclang_rt.hwasan-aarch64.so"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "-lpthread"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "-lrt"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "-ldl"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "--export-dynamic"
+// CHECK-SHARED-HWASAN-LINUX-NOT: "--dynamic-list"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \
+// RUN:     -target aarch64-unknown-linux -fuse-ld=ld -fsanitize=hwaddress -shared-libsan \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-DSO-SHARED-HWASAN-LINUX %s
+//
+// CHECK-DSO-SHARED-HWASAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "-lc"
+// CHECK-DSO-SHARED-HWASAN-LINUX: libclang_rt.hwasan-aarch64.so"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "-lpthread"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "-lrt"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "-ldl"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "--export-dynamic"
+// CHECK-DSO-SHARED-HWASAN-LINUX-NOT: "--dynamic-list"
diff --git a/test/Driver/solaris-header-search.cpp b/test/Driver/solaris-header-search.cpp
index 667b2c0..d5c3c0d 100644
--- a/test/Driver/solaris-header-search.cpp
+++ b/test/Driver/solaris-header-search.cpp
@@ -1,11 +1,41 @@
-// Test that the C++ headers are found.
+// Test that the C++ headers are found on Solaris with gcc toolchain detection
 //
-// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \
-// RUN:     --target=sparc-sun-solaris2.11 \
-// RUN:     --gcc-toolchain="" \
-// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
-// RUN:   | FileCheck %s
-// CHECK: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
-// CHECK: "-internal-isystem" "{{.*}}/usr/include/c++/v1/support/solaris"
-// CHECK: "-internal-isystem" "{{.*}}/usr/gcc/4.8/include/c++/4.8.2"
-// CHECK: "-internal-isystem" "{{.*}}/usr/gcc/4.8/include/c++/4.8.2/sparc-sun-solaris2.11"
+// Sparc, 32bit
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 --stdlib=platform \
+// RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
+// RUN:   | FileCheck --check-prefix=CHECK_SOLARIS_SPARC %s
+// CHECK_SOLARIS_SPARC: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK_SOLARIS_SPARC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK_SOLARIS_SPARC-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../../../include/c++/4.8.2"
+// CHECK_SOLARIS_SPARC-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../../../include/c++/4.8.2/sparc-sun-solaris2.11"
+
+// Sparc, 64bit
+// RUN: %clang -no-canonical-prefixes -m64 %s -### -fsyntax-only 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 --stdlib=platform \
+// RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
+// RUN:   | FileCheck --check-prefix=CHECK_SOLARIS_SPARC64 %s
+// CHECK_SOLARIS_SPARC64: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK_SOLARIS_SPARC64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK_SOLARIS_SPARC64-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../../../include/c++/4.8.2"
+// CHECK_SOLARIS_SPARC64-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../../../include/c++/4.8.2/sparc-sun-solaris2.11/sparcv9"
+
+// Intel, 32bit
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
+// RUN:     --target=i386-pc-solaris2.11 --stdlib=platform \
+// RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
+// RUN:   | FileCheck --check-prefix=CHECK_SOLARIS_X86 %s
+// CHECK_SOLARIS_X86: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK_SOLARIS_X86-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK_SOLARIS_X86-SAME: "-internal-isystem" "{{.*}}/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../../../include/c++/4.9.4"
+// CHECK_SOLARIS_X86-SAME: "-internal-isystem" "{{.*}}/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../../../include/c++/4.9.4/i386-pc-solaris2.11"
+
+// Intel, 64bit
+// RUN: %clang -no-canonical-prefixes -m64 %s -### -fsyntax-only 2>&1 \
+// RUN:     --target=i386-pc-solaris2.11 --stdlib=platform \
+// RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
+// RUN:   | FileCheck --check-prefix=CHECK_SOLARIS_X64 %s
+// CHECK_SOLARIS_X64: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK_SOLARIS_X64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK_SOLARIS_X64-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../../../include/c++/4.9.4"
+// CHECK_SOLARIS_X64-SAME: "-internal-isystem" "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../../../include/c++/4.9.4/i386-pc-solaris2.11/amd64"
diff --git a/test/Driver/solaris-ld.c b/test/Driver/solaris-ld.c
index d871b59..2fc5c91 100644
--- a/test/Driver/solaris-ld.c
+++ b/test/Driver/solaris-ld.c
@@ -1,33 +1,110 @@
-// Test ld invocation on Solaris targets.
+// General tests that ld invocations on Solaris targets sane. Note that we use
+// sysroot to make these tests independent of the host system.
 
-// Check sparc-sun-solaris2.1
+// Check sparc-sun-solaris2.11, 32bit
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
-// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
-// RUN:   | FileCheck %s
-// CHECK: "-cc1" "-triple" "sparc-sun-solaris2.11"
-// CHECK: ld{{.*}}"
-// CHECK: "--dynamic-linker" "{{.*}}/usr/lib/ld.so.1"
-// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crt1.o"
-// CHECK: "{{.*}}/usr/lib/crti.o"
-// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtbegin.o"
-// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtend.o"
-// CHECK: "{{.*}}/usr/lib/crtn.o"
-// CHECK "-lc"
-// CHECK "-lgcc_s"
-// CHECK "-lgcc"
-// CHECK "-lm"
+// RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-LD-SPARC32 %s
+// CHECK-LD-SPARC32-NOT: warning:
+// CHECK-LD-SPARC32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparc-sun-solaris2.11"
+// CHECK-LD-SPARC32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-LD-SPARC32: "{{.*}}ld{{(.exe)?}}"
+// CHECK-LD-SPARC32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1"
+// CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crt1.o"
+// CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o"
+// CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtbegin.o"
+// CHECK-LD-SPARC32-SAME: "-L[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2"
+// CHECK-LD-SPARC32-SAME: "-L[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../.."
+// CHECK-LD-SPARC32-SAME: "-L[[SYSROOT]]/usr/lib"
+// CHECK-LD-SPARC32-SAME: "-lgcc_s"
+// CHECK-LD-SPARC32-SAME: "-lc"
+// CHECK-LD-SPARC32-SAME: "-lgcc"
+// CHECK-LD-SPARC32-SAME: "-lm"
+// CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtend.o"
+// CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crtn.o"
+
+// Check sparc-sun-solaris2.11, 64bit
+// RUN: %clang -no-canonical-prefixes -m64 %s -### -o %t.o 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-LD-SPARC64 %s
+// CHECK-LD-SPARC64-NOT: warning:
+// CHECK-LD-SPARC64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparcv9-sun-solaris2.11"
+// CHECK-LD-SPARC64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-LD-SPARC64: "{{.*}}ld{{(.exe)?}}"
+// CHECK-LD-SPARC64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}ld.so.1"
+// CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crt1.o"
+// CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}crti.o"
+// CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crtbegin.o"
+// CHECK-LD-SPARC64-SAME: "-L[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9"
+// CHECK-LD-SPARC64-SAME: "-L[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/../../../sparcv9"
+// CHECK-LD-SPARC64-SAME: "-L[[SYSROOT]]/usr/lib/sparcv9"
+// CHECK-LD-SPARC64-SAME: "-lgcc_s"
+// CHECK-LD-SPARC64-SAME: "-lc"
+// CHECK-LD-SPARC64-SAME: "-lgcc"
+// CHECK-LD-SPARC64-SAME: "-lm"
+// CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crtend.o"
+// CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}crtn.o"
+
+// Check i386-pc-solaris2.11, 32bit
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     --target=i386-pc-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-LD-X32 %s
+// CHECK-LD-X32-NOT: warning:
+// CHECK-LD-X32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "i386-pc-solaris2.11"
+// CHECK-LD-X32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-LD-X32: "{{.*}}ld{{(.exe)?}}"
+// CHECK-LD-X32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1"
+// CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt1.o"
+// CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o"
+// CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4{{/|\\\\}}crtbegin.o"
+// CHECK-LD-X32-SAME: "-L[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4"
+// CHECK-LD-X32-SAME: "-L[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../.."
+// CHECK-LD-X32-SAME: "-L[[SYSROOT]]/usr/lib"
+// CHECK-LD-X32-SAME: "-lgcc_s"
+// CHECK-LD-X32-SAME: "-lc"
+// CHECK-LD-X32-SAME: "-lgcc"
+// CHECK-LD-X32-SAME: "-lm"
+// CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4{{/|\\\\}}crtend.o"
+// CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crtn.o"
+
+// Check i386-pc-solaris2.11, 64bit
+// RUN: %clang -no-canonical-prefixes -m64 %s -### -o %t.o 2>&1 \
+// RUN:     --target=i386-pc-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/solaris_x86_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-LD-X64 %s
+// CHECK-LD-X64-NOT: warning:
+// CHECK-LD-X64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "x86_64-pc-solaris2.11"
+// CHECK-LD-X64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-LD-X64: "{{.*}}ld{{(.exe)?}}"
+// CHECK-LD-X64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}ld.so.1"
+// CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crt1.o"
+// CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crti.o"
+// CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64{{/|\\\\}}crtbegin.o"
+// CHECK-LD-X64-SAME: "-L[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64"
+// CHECK-LD-X64-SAME: "-L[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/../../../amd64"
+// CHECK-LD-X64-SAME: "-L[[SYSROOT]]/usr/lib/amd64"
+// CHECK-LD-X64-SAME: "-lgcc_s"
+// CHECK-LD-X64-SAME: "-lc"
+// CHECK-LD-X64-SAME: "-lgcc"
+// CHECK-LD-X64-SAME: "-lm"
+// CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64{{/|\\\\}}crtend.o"
+// CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crtn.o"
 
 // Check the right -l flags are present with -shared
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o -shared 2>&1 \
 // RUN:     --target=sparc-sun-solaris2.11 \
 // RUN:     --gcc-toolchain="" \
-// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
-// RUN:   | FileCheck --check-prefix=CHECK-SHARED %s
-
-// CHECK-SHARED: ld{{.*}}"
-// CHECK-SHARED "-lc"
-// CHECK-SHARED "-lgcc_s"
-// CHECK-SHARED-NOT "-lgcc"
-// CHECK-SHARED-NOT: "-lm"
+// RUN:     --sysroot=%S/Inputs/solaris_sparc_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SPARC32-SHARED %s
+// CHECK-SPARC32-SHARED: "{{.*}}ld{{(.exe)?}}"
+// CHECK-SPARC32-SHARED-SAME: "-lgcc_s"
+// CHECK-SPARC32-SHARED-SAME: "-lc"
+// CHECK-SPARC32-SHARED-NOT: "-lgcc"
+// CHECK-SPARC32-SHARED-NOT: "-lm"
diff --git a/test/Driver/stack-arg-probe.c b/test/Driver/stack-arg-probe.c
new file mode 100644
index 0000000..97d2396
--- /dev/null
+++ b/test/Driver/stack-arg-probe.c
@@ -0,0 +1,7 @@
+// RUN: %clang -### %s 2>&1 | FileCheck %s -check-prefix=STACKPROBE
+// RUN: %clang -### -mno-stack-arg-probe -mstack-arg-probe %s 2>&1 | FileCheck %s -check-prefix=STACKPROBE
+// RUN: %clang -### -mstack-arg-probe -mno-stack-arg-probe %s 2>&1 | FileCheck %s -check-prefix=NO-STACKPROBE
+// REQUIRES: clang-driver
+
+// NO-STACKPROBE: -mno-stack-arg-probe
+// STACKPROBE-NOT: -mno-stack-arg-probe
diff --git a/test/Driver/stack-size-section.c b/test/Driver/stack-size-section.c
new file mode 100644
index 0000000..461f0b5
--- /dev/null
+++ b/test/Driver/stack-size-section.c
@@ -0,0 +1,9 @@
+// RUN: %clang -target x86_64-unknown %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT
+// RUN: %clang -target x86_64-scei-ps4 -fno-stack-size-section %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT
+// CHECK-ABSENT-NOT: -fstack-size-section
+
+// RUN: %clang -target x86_64-unknown -fstack-size-section -### 2>&1 | FileCheck %s --check-prefix=CHECK-PRESENT
+// RUN: %clang -target x86_64-scei-ps4 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PRESENT
+// CHECK-PRESENT: -fstack-size-section
+
+int foo() { return 42; }
diff --git a/test/Driver/types.c b/test/Driver/types.c
new file mode 100644
index 0000000..03fe105
--- /dev/null
+++ b/test/Driver/types.c
@@ -0,0 +1,18 @@
+// Check whether __int128_t and __uint128_t are supported.
+
+// RUN: not %clang -c --target=riscv32-unknown-linux-gnu -fsyntax-only %s \
+// RUN: 2>&1 | FileCheck %s
+
+// RUN: %clang -c --target=riscv32-unknown-linux-gnu -fsyntax-only %s \
+// RUN: -fno-force-enable-int128 -fforce-enable-int128
+
+// RUN: not %clang -c --target=riscv32-unknown-linux-gnu -fsyntax-only %s \
+// RUN: -fforce-enable-int128 -fno-force-enable-int128
+
+void a() {
+  __int128_t s;
+  __uint128_t t;
+}
+
+// CHECK: error: use of undeclared identifier '__int128_t'
+// CHECK: error: use of undeclared identifier '__uint128_t'
diff --git a/test/Driver/unavailable_aligned_allocation.cpp b/test/Driver/unavailable_aligned_allocation.cpp
index cd75c08..d5820b0 100644
--- a/test/Driver/unavailable_aligned_allocation.cpp
+++ b/test/Driver/unavailable_aligned_allocation.cpp
@@ -10,15 +10,15 @@
 // RUN: %clang -target thumbv7-apple-watchos3 -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=UNAVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.13 -mios-simulator-version-min=10 \
+// RUN: %clang -target x86_64-apple-darwin -mios-simulator-version-min=10 \
 // RUN:  -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=UNAVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.13 -mtvos-simulator-version-min=10 \
+// RUN: %clang -target x86_64-apple-darwin -mtvos-simulator-version-min=10 \
 // RUN: -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=UNAVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.13 -mwatchos-simulator-version-min=3 \
+// RUN: %clang -target x86_64-apple-darwin -mwatchos-simulator-version-min=3 \
 // RUN: -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=UNAVAILABLE
 //
@@ -39,15 +39,15 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=AVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.12 -mios-simulator-version-min=11 \
+// RUN: %clang -target x86_64-apple-darwin -mios-simulator-version-min=11 \
 // RUN:  -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=AVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.12 -mtvos-simulator-version-min=11 \
+// RUN: %clang -target x86_64-apple-darwin -mtvos-simulator-version-min=11 \
 // RUN: -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=AVAILABLE
 //
-// RUN: %clang -target x86_64-apple-macosx10.12 -mwatchos-simulator-version-min=4 \
+// RUN: %clang -target x86_64-apple-darwin -mwatchos-simulator-version-min=4 \
 // RUN: -c -### %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=AVAILABLE
 //
diff --git a/test/Driver/unix-conformance.c b/test/Driver/unix-conformance.c
new file mode 100644
index 0000000..ddf77c7
--- /dev/null
+++ b/test/Driver/unix-conformance.c
@@ -0,0 +1,24 @@
+// Check UNIX conformance for cc/c89/c99
+// When c99 encounters a compilation error that causes an object file not to be
+// created, it shall write a diagnostic to standard error and continue to
+// compile other source code operands, but it shall not perform the link phase
+// and it shall return a non-zero exit status.
+
+// When given multiple .c files to compile, clang compiles them in order until
+// it hits an error, at which point it stops.
+//
+// RUN: rm -rf %t-dir
+// RUN: mkdir -p %t-dir
+// RUN: cd %t-dir
+//
+// RUN: touch %t-dir/1.c
+// RUN: echo "invalid C code" > %t-dir/2.c
+// RUN: touch %t-dir/3.c
+// RUN: echo "invalid C code" > %t-dir/4.c
+// RUN: touch %t-dir/5.c
+// RUN: not %clang -S %t-dir/1.c %t-dir/2.c %t-dir/3.c %t-dir/4.c %t-dir/5.c
+// RUN: test -f %t-dir/1.s
+// RUN: test ! -f %t-dir/2.s
+// RUN: test -f %t-dir/3.s
+// RUN: test ! -f %t-dir/4.s
+// RUN: test -f %t-dir/5.s
diff --git a/test/Driver/unknown-arg.c b/test/Driver/unknown-arg.c
index 9bba74a..befd063 100644
--- a/test/Driver/unknown-arg.c
+++ b/test/Driver/unknown-arg.c
@@ -1,11 +1,23 @@
-// RUN: not %clang %s -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -### 2>&1 | \
+// RUN: not %clang %s -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -ifoo -imultilib dir -### 2>&1 | \
 // RUN: FileCheck %s
+// RUN: %clang %s -imultilib dir -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=MULTILIB
+// RUN: not %clang %s -stdlibs=foo -hell -version -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
 // RUN: %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -### -c -- %s 2>&1 | \
 // RUN: FileCheck %s --check-prefix=CL
+// RUN: %clang_cl -Brepo -### -- %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CL-DID-YOU-MEAN
 // RUN: not %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -c -Werror=unknown-argument -### -- %s 2>&1 | \
 // RUN: FileCheck %s --check-prefix=CL-ERROR
+// RUN: not %clang_cl -helo -Werror=unknown-argument -### -- %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CL-ERROR-DID-YOU-MEAN
 // RUN: %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -c -Wno-unknown-argument -### -- %s 2>&1 | \
 // RUN: FileCheck %s --check-prefix=SILENT
+// RUN: not %clang -cc1as -hell --version -debug-info-macros 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1AS-DID-YOU-MEAN
+// RUN: not %clang -cc1asphalt -help 2>&1 | \
+// RUN: FileCheck %s --check-prefix=UNKNOWN-INTEGRATED
 
 // CHECK: error: unknown argument: '-cake-is-lie'
 // CHECK: error: unknown argument: '-%0'
@@ -14,6 +26,11 @@
 // CHECK: error: unknown argument: '-munknown-to-clang-option'
 // CHECK: error: unknown argument: '-print-stats'
 // CHECK: error: unknown argument: '-funknown-to-clang-option'
+// CHECK: error: unknown argument: '-ifoo'
+// MULTILIB: warning: argument unused during compilation: '-imultilib dir'
+// DID-YOU-MEAN: error: unknown argument '-stdlibs=foo', did you mean '-stdlib=foo'?
+// DID-YOU-MEAN: error: unknown argument '-hell', did you mean '-help'?
+// DID-YOU-MEAN: error: unknown argument '-version', did you mean '--version'?
 // CL: warning: unknown argument ignored in clang-cl: '-cake-is-lie'
 // CL: warning: unknown argument ignored in clang-cl: '-%0'
 // CL: warning: unknown argument ignored in clang-cl: '-%d'
@@ -21,6 +38,7 @@
 // CL: warning: unknown argument ignored in clang-cl: '-munknown-to-clang-option'
 // CL: warning: unknown argument ignored in clang-cl: '-print-stats'
 // CL: warning: unknown argument ignored in clang-cl: '-funknown-to-clang-option'
+// CL-DID-YOU-MEAN: warning: unknown argument ignored in clang-cl '-Brepo' (did you mean '-Brepro'?)
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-cake-is-lie'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-%0'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-%d'
@@ -28,9 +46,13 @@
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-munknown-to-clang-option'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-print-stats'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-funknown-to-clang-option'
+// CL-ERROR-DID-YOU-MEAN: error: unknown argument ignored in clang-cl '-helo' (did you mean '-help'?)
 // SILENT-NOT: error:
 // SILENT-NOT: warning:
-
+// CC1AS-DID-YOU-MEAN: error: unknown argument '-hell', did you mean '-help'?
+// CC1AS-DID-YOU-MEAN: error: unknown argument '--version', did you mean '-version'?
+// CC1AS-DID-YOU-MEAN: error: unknown argument '-debug-info-macros', did you mean '-debug-info-macro'?
+// UNKNOWN-INTEGRATED: error: unknown integrated tool 'asphalt'. Valid tools include '-cc1' and '-cc1as'.
 
 // RUN: %clang -S %s -o %t.s  -Wunknown-to-clang-option 2>&1 | FileCheck --check-prefix=IGNORED %s
 
diff --git a/test/Driver/unknown-std.c b/test/Driver/unknown-std.c
index 41736ec..9ef70a4 100644
--- a/test/Driver/unknown-std.c
+++ b/test/Driver/unknown-std.c
@@ -14,6 +14,8 @@
 // CHECK-NEXT: note: use 'gnu99' for 'ISO C 1999 with GNU extensions' standard
 // CHECK-NEXT: note: use 'c11' or 'iso9899:2011' for 'ISO C 2011' standard
 // CHECK-NEXT: note: use 'gnu11' for 'ISO C 2011 with GNU extensions' standard
+// CHECK-NEXT: note: use 'c17' or 'iso9899:2017' for 'ISO C 2017' standard
+// CHECK-NEXT: note: use 'gnu17' for 'ISO C 2017 with GNU extensions' standard
 
 // Make sure that no other output is present.
 // CHECK-NOT: {{^.+$}}
diff --git a/test/Driver/unknown-std.cpp b/test/Driver/unknown-std.cpp
index a7aae51..2122a74 100644
--- a/test/Driver/unknown-std.cpp
+++ b/test/Driver/unknown-std.cpp
@@ -4,7 +4,7 @@
 
 // RUN: not %clang %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
 // RUN: not %clang -x objective-c++ %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
-// RUN: not %clang -x cuda -nocudainc -nocudalib %s -std=foobar -c 2>&1 | FileCheck --match-full-lines --check-prefix=CHECK --check-prefix=CUDA %s
+// RUN: not %clang -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s -std=foobar -c 2>&1 | FileCheck --match-full-lines --check-prefix=CHECK --check-prefix=CUDA %s
 
 // CHECK: error: invalid value 'foobar' in '-std=foobar'
 // CHECK-NEXT: note: use 'c++98' or 'c++03' for 'ISO C++ 1998 with amendments' standard
diff --git a/test/Driver/unsupported-option.c b/test/Driver/unsupported-option.c
new file mode 100644
index 0000000..39f135e
--- /dev/null
+++ b/test/Driver/unsupported-option.c
@@ -0,0 +1,7 @@
+// RUN: not %clang %s --hedonism -### 2>&1 | \
+// RUN: FileCheck %s
+// RUN: not %clang %s --hell -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
+
+// CHECK: error: unsupported option '--hedonism'
+// DID-YOU-MEAN: error: unsupported option '--hell', did you mean '--help'?
diff --git a/test/Driver/wasm-toolchain.c b/test/Driver/wasm-toolchain.c
index 39401c9..b1f5f48 100644
--- a/test/Driver/wasm-toolchain.c
+++ b/test/Driver/wasm-toolchain.c
@@ -1,21 +1,8 @@
 // A basic clang -cc1 command-line. WebAssembly is somewhat special in
-// enabling -ffunction-sections, -fdata-sections, and -fvisibility=hidden by
-// default.
+// enabling -fvisibility=hidden by default.
 
 // RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown 2>&1 | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}} "-ffunction-sections" "-fdata-sections"
-
-// Ditto, but ensure that a user -fno-function-sections disables the
-// default -ffunction-sections.
-
-// RUN: %clang %s -### -target wasm32-unknown-unknown -fno-function-sections 2>&1 | FileCheck -check-prefix=NO_FUNCTION_SECTIONS %s
-// NO_FUNCTION_SECTIONS-NOT: function-sections
-
-// Ditto, but ensure that a user -fno-data-sections disables the
-// default -fdata-sections.
-
-// RUN: %clang %s -### -target wasm32-unknown-unknown -fno-data-sections 2>&1 | FileCheck -check-prefix=NO_DATA_SECTIONS %s
-// NO_DATA_SECTIONS-NOT: data-sections
+// CC1: clang{{.*}} "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
 
 // Ditto, but ensure that a user -fvisibility=default disables the default
 // -fvisibility=hidden.
@@ -27,10 +14,10 @@
 
 // RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK %s
 // LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "[[temp]]" "-allow-undefined-file" "wasm.syms" "-lc" "-lcompiler_rt" "-o" "a.out"
+// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C link command-line with optimization.
 
 // RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK_OPT %s
 // LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "[[temp]]" "-allow-undefined-file" "wasm.syms" "-lc" "-lcompiler_rt" "-o" "a.out"
+// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
diff --git a/test/Driver/wasm-toolchain.cpp b/test/Driver/wasm-toolchain.cpp
new file mode 100644
index 0000000..ea3aa31
--- /dev/null
+++ b/test/Driver/wasm-toolchain.cpp
@@ -0,0 +1,23 @@
+// A basic clang -cc1 command-line. WebAssembly is somewhat special in
+// enabling -fvisibility=hidden by default.
+
+// RUN: %clangxx %s -### -no-canonical-prefixes -target wasm32-unknown-unknown 2>&1 | FileCheck -check-prefix=CC1 %s
+// CC1: clang{{.*}} "-cc1" "-triple" "wasm32-unknown-unknown" {{.*}} "-fvisibility" "hidden" {{.*}}
+
+// Ditto, but ensure that a user -fvisibility=default disables the default
+// -fvisibility=hidden.
+
+// RUN: %clangxx %s -### -target wasm32-unknown-unknown -fvisibility=default 2>&1 | FileCheck -check-prefix=FVISIBILITY_DEFAULT %s
+// FVISIBILITY_DEFAULT-NOT: hidden
+
+// A basic C++ link command-line.
+
+// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=c++ %s 2>&1 | FileCheck -check-prefix=LINK %s
+// LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
+
+// A basic C++ link command-line with optimization.
+
+// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=c++ 2>&1 | FileCheck -check-prefix=LINK_OPT %s
+// LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
+// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
diff --git a/test/Driver/windows-cross.c b/test/Driver/windows-cross.c
index 4b52e99..d96b028 100644
--- a/test/Driver/windows-cross.c
+++ b/test/Driver/windows-cross.c
@@ -3,6 +3,11 @@
 
 // CHECK-BASIC-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
 
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -stdlib=libstdc++ -rtlib=compiler-rt -static -o /dev/null %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-STATIC
+
+// CHECK-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bstatic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
+
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libstdc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-RTLIB
 
@@ -23,6 +28,11 @@
 
 // CHECK-SHARED: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -static -o shared.dll %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix CHECK-SHARED-STATIC
+
+// CHECK-SHARED-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-NOSTARTFILES
 
diff --git a/test/Driver/x86-march.c b/test/Driver/x86-march.c
index 8236b2f..9c9235d 100644
--- a/test/Driver/x86-march.c
+++ b/test/Driver/x86-march.c
@@ -60,6 +60,10 @@
 // RUN:   | FileCheck %s -check-prefix=cannonlake
 // cannonlake: "-target-cpu" "cannonlake"
 //
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=icelake 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=icelake
+// icelake: "-target-cpu" "icelake"
+//
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=lakemont
 // lakemont: "-target-cpu" "lakemont"
@@ -72,6 +76,10 @@
 // RUN:   | FileCheck %s -check-prefix=silvermont
 // silvermont: "-target-cpu" "silvermont"
 //
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=goldmont 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=goldmont
+// goldmont: "-target-cpu" "goldmont"
+//
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=k8 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=k8
 // k8: "-target-cpu" "k8"
diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c
index dc32f6c..530ba04 100644
--- a/test/Driver/x86-target-features.c
+++ b/test/Driver/x86-target-features.c
@@ -20,10 +20,10 @@
 // SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes"
 // NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes"
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX %s
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX %s
-// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512ifma"
-// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512ifma"
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX %s
+// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
+// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
 
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BMI %s
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BMI %s
@@ -70,6 +70,16 @@
 // MPX: "-target-feature" "+mpx"
 // NO-MPX: "-target-feature" "-mpx"
 
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mshstk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CETSS %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-shstk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CETSS %s
+// CETSS: "-target-feature" "+shstk"
+// NO-CETSS: "-target-feature" "-shstk"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mibt %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CETIBT %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-ibt %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CETIBT %s
+// CETIBT: "-target-feature" "+ibt"
+// NO-CETIBT: "-target-feature" "-ibt"
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -msgx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SGX %s
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-sgx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SGX %s
 // SGX: "-target-feature" "+sgx"
@@ -84,3 +94,48 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
 // CLZERO: "-target-feature" "+clzero"
 // NO-CLZERO: "-target-feature" "-clzero"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VAES %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VAES %s
+// VAES: "-target-feature" "+vaes"
+// NO-VAES: "-target-feature" "-vaes"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mgfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=GFNI %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-gfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-GFNI %s
+// GFNI: "-target-feature" "+gfni"
+// NO-GFNI: "-target-feature" "-gfni
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s
+// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
+// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s
+// BITALG: "-target-feature" "+avx512bitalg"
+// NO-BITALG: "-target-feature" "-avx512bitalg"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VNNI %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VNNI %s
+// VNNI: "-target-feature" "+avx512vnni"
+// NO-VNNI: "-target-feature" "-avx512vnni"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VBMI2 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VBMI2 %s
+// VBMI2: "-target-feature" "+avx512vbmi2"
+// NO-VBMI2: "-target-feature" "-avx512vbmi2"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RDPID %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RDPID %s
+// RDPID: "-target-feature" "+rdpid"
+// NO-RDPID: "-target-feature" "-rdpid"
+
+// RUN: %clang -target i386-linux-gnu -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE %s
+// RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s
+// RETPOLINE: "-target-feature" "+retpoline"
+// NO-RETPOLINE: "-target-feature" "-retpoline"
+
+// RUN: %clang -target i386-linux-gnu -mretpoline -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE-EXTERNAL-THUNK %s
+// RUN: %clang -target i386-linux-gnu -mretpoline -mno-retpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE-EXTERNAL-THUNK %s
+// RETPOLINE-EXTERNAL-THUNK: "-target-feature" "+retpoline-external-thunk"
+// NO-RETPOLINE-EXTERNAL-THUNK: "-target-feature" "-retpoline-external-thunk"
diff --git a/test/FixIt/fixit-typedef-instead-of-typename-typo.cpp b/test/FixIt/fixit-typedef-instead-of-typename-typo.cpp
new file mode 100644
index 0000000..9a6f116
--- /dev/null
+++ b/test/FixIt/fixit-typedef-instead-of-typename-typo.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+template <typename A, typedef B> struct Foo {
+  // expected-error@-1 {{expected template parameter}} expected-note@-1 {{did you mean to use 'typename'?}}
+
+  // Check that we are speculatively (with fixit applied) trying to parse the rest.
+
+  // Should not produce error about type since parsing speculatively with fixit applied.
+  B member;
+
+  a // expected-error {{unknown type name 'a'}} // expected-error@+1 {{expected member name or ';' after declaration specifiers}}
+};
+
+
+// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+// CHECK: fix-it:{{.*}}:{3:23-3:30}:"typename"
diff --git a/test/Format/dump-config-cxx.h b/test/Format/dump-config-cxx.h
new file mode 100644
index 0000000..57f3c97
--- /dev/null
+++ b/test/Format/dump-config-cxx.h
@@ -0,0 +1,3 @@
+// RUN: clang-format -dump-config %s | FileCheck %s
+
+// CHECK: Language: Cpp
diff --git a/test/Format/dump-config-objc.h b/test/Format/dump-config-objc.h
new file mode 100644
index 0000000..d33fd36
--- /dev/null
+++ b/test/Format/dump-config-objc.h
@@ -0,0 +1,5 @@
+// RUN: clang-format -dump-config %s | FileCheck %s
+
+// CHECK: Language: ObjC
+@interface Foo
+@end
diff --git a/test/Format/lit.local.cfg b/test/Format/lit.local.cfg
new file mode 100644
index 0000000..abf4e8a
--- /dev/null
+++ b/test/Format/lit.local.cfg
@@ -0,0 +1,4 @@
+# Suffixes supported by clang-format.
+config.suffixes = ['.c', '.cc', '.cpp', '.h', '.m', '.mm', '.java', '.js',
+                   '.ts', '.proto', '.protodevel', '.pb.txt', '.textproto',
+                   '.textpb', '.asciipb', '.td']
diff --git a/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext b/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext
index 730dc4a..0eef9bd 100644
--- a/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext
+++ b/test/Frontend/Inputs/optimization-remark-with-hotness-sample.proftext
@@ -1,7 +1,7 @@
-foo:0:0
- 0: 0
+foo:29:29
+ 0: 29
 bar:29:29
- 9: foo:0
-main:0:0
- 0: 0 bar:0
+ 8: 29 foo:29
+main:29:1
+ 3: 29 bar:29
 
diff --git a/test/Frontend/Inputs/optimization-remark-with-hotness.proftext b/test/Frontend/Inputs/optimization-remark-with-hotness.proftext
index af111d6..4ed20ab 100644
--- a/test/Frontend/Inputs/optimization-remark-with-hotness.proftext
+++ b/test/Frontend/Inputs/optimization-remark-with-hotness.proftext
@@ -1,6 +1,6 @@
 foo
 # Func Hash:
-0
+24
 # Num Counters:
 1
 # Counter Values:
@@ -16,7 +16,7 @@
 
 main
 # Func Hash:
-4
+1160280
 # Num Counters:
 2
 # Counter Values:
diff --git a/test/Frontend/ast-codegen.c b/test/Frontend/ast-codegen.c
index 4a3f8a3..e1140fd 100644
--- a/test/Frontend/ast-codegen.c
+++ b/test/Frontend/ast-codegen.c
@@ -5,9 +5,9 @@
 // CHECK: module asm "foo"
 __asm__("foo");
 
-// CHECK: @g0 = common global i32 0, align 4
+// CHECK: @g0 = common dso_local global i32 0, align 4
 int g0;
 
-// CHECK: define i32 @f0()
+// CHECK: define dso_local i32 @f0()
 int f0() {
 }
diff --git a/test/Frontend/diagnostics-order.c b/test/Frontend/diagnostics-order.c
new file mode 100644
index 0000000..37c0cd9
--- /dev/null
+++ b/test/Frontend/diagnostics-order.c
@@ -0,0 +1,12 @@
+// Make sure a note stays with its associated command-line argument diagnostic.
+// Previously, these diagnostics were grouped by diagnostic level with all
+// notes last.
+//
+// RUN: not %clang_cc1 -O999 -std=bogus -verify=-foo %s 2> %t
+// RUN: FileCheck < %t %s
+//
+// CHECK:      error: invalid value '-foo' in '-verify='
+// CHECK-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// CHECK-NEXT: warning: optimization level '-O999' is not supported
+// CHECK-NEXT: error: invalid value 'bogus' in '-std=bogus'
+// CHECK-NEXT: note: use {{.*}} for {{.*}} standard
diff --git a/test/Frontend/float16.cpp b/test/Frontend/float16.cpp
index febd6b8..aa65270 100644
--- a/test/Frontend/float16.cpp
+++ b/test/Frontend/float16.cpp
@@ -1,326 +1,326 @@
-// RUN: %clang_cc1 -std=c++11 -ast-dump %s | FileCheck %s --strict-whitespace
-// RUN: %clang_cc1 -std=c++11 -ast-dump -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace
-
-/*  Various contexts where type _Float16 can appear. */
-
-/*  Namespace */
-namespace {
-  _Float16 f1n;
-  _Float16 f2n = 33.f16;
-  _Float16 arr1n[10];
-  _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
-  const volatile _Float16 func1n(const _Float16 &arg) {
-    return arg + f2n + arr1n[4] - arr2n[1];
-  }
-}
-
-//CHECK:      |-NamespaceDecl
-//CHECK-NEXT: | |-VarDecl {{.*}} f1n '_Float16'
-//CHECK-NEXT: | |-VarDecl {{.*}} f2n '_Float16' cinit
-//CHECK-NEXT: | | `-FloatingLiteral {{.*}} '_Float16' 3.300000e+01
-//CHECK-NEXT: | |-VarDecl {{.*}} arr1n '_Float16 [10]'
-//CHECK-NEXT: | |-VarDecl {{.*}} arr2n '_Float16 [3]' cinit
-//CHECK-NEXT: | | `-InitListExpr {{.*}} '_Float16 [3]'
-//CHECK-NEXT: | |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: | |   | `-FloatingLiteral {{.*}} 'double' 1.200000e+00
-//CHECK-NEXT: | |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: | |   | `-FloatingLiteral {{.*}} 'double' 3.000000e+00
-//CHECK-NEXT: | |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: | |     `-FloatingLiteral {{.*}} 'double' 3.000000e+04
-//CHECK-NEXT: | `-FunctionDecl {{.*}} func1n 'const volatile _Float16 (const _Float16 &)'
-
-/* File */
-_Float16 f1f;
-_Float16 f2f = 32.4;
-_Float16 arr1f[10];
-_Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
-_Float16 func1f(_Float16 arg);
-
-//CHECK:      |-VarDecl {{.*}} f1f '_Float16'
-//CHECK-NEXT: |-VarDecl {{.*}} f2f '_Float16' cinit
-//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: |   `-FloatingLiteral {{.*}} 'double' 3.240000e+01
-//CHECK-NEXT: |-VarDecl {{.*}} arr1f '_Float16 [10]'
-//CHECK-NEXT: |-VarDecl {{.*}} arr2f '_Float16 [3]' cinit
-//CHECK-NEXT: | `-InitListExpr {{.*}} '_Float16 [3]'
-//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: |   | `-UnaryOperator {{.*}} 'double' prefix '-'
-//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} 'double' 1.200000e+00
-//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: |   | `-UnaryOperator {{.*}} 'double' prefix '-'
-//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} 'double' 3.000000e+00
-//CHECK-NEXT: |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT: |     `-UnaryOperator {{.*}} 'double' prefix '-'
-//CHECK-NEXT: |       `-FloatingLiteral {{.*}} 'double' 3.000000e+04
-//CHECK-NEXT: |-FunctionDecl {{.*}} func1f '_Float16 (_Float16)'
-//CHECK-NEXT: | `-ParmVarDecl {{.*}} arg '_Float16'
-
-
-// Mixing __fp16 and Float16 types:
-// The _Float16 type is first converted to __fp16 type and then the operation
-// is completed as if both operands were of __fp16 type.
-
-__fp16 B = -0.1;
-auto C = -1.0f16 + B;
-
-// When we do *not* have native half types, we expect __fp16 to be promoted to
-// float, and consequently also _Float16 promotions to float:
-
-//CHECK:      -VarDecl {{.*}} used B '__fp16' cinit
-//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
-//CHECK-NEXT: |   `-UnaryOperator {{.*}} 'double' prefix '-'
-//CHECK-NEXT: |     `-FloatingLiteral {{.*}} 'double' 1.000000e-01
-//CHECK-NEXT: |-VarDecl {{.*}} C 'float':'float' cinit
-//CHECK-NEXT: | `-BinaryOperator {{.*}} 'float' '+'
-//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
-//CHECK-NEXT: |   | `-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
-//CHECK-NEXT: |   `-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
-//CHECK-NEXT: |     `-ImplicitCastExpr {{.*}} '__fp16' <LValueToRValue>
-//CHECK-NEXT: |       `-DeclRefExpr {{.*}} '__fp16' lvalue Var 0x{{.*}} 'B' '__fp16'
-
-// When do have native half types, we expect to see promotions to fp16:
-
-//CHECK-NATIVE: |-VarDecl {{.*}} used B '__fp16' cinit
-//CHECK-NATIVE: | `-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
-//CHECK-NATIVE: |   `-UnaryOperator {{.*}} 'double' prefix '-'
-//CHECK-NATIVE: |     `-FloatingLiteral {{.*}} 'double' 1.000000e-01
-//CHECK-NATIVE: |-VarDecl {{.*}} C '__fp16':'__fp16' cinit
-//CHECK-NATIVE: | `-BinaryOperator {{.*}} '__fp16' '+'
-//CHECK-NATIVE: |   |-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
-//CHECK-NATIVE: |   | `-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NATIVE: |   |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
-//CHECK-NATIVE: |   `-ImplicitCastExpr {{.*}} '__fp16' <LValueToRValue>
-//CHECK-NATIVE: |     `-DeclRefExpr {{.*}} '__fp16' lvalue Var 0x{{.*}} 'B' '__fp16'
-
-
-/* Class */
-
-class C1 {
-  _Float16 f1c;
-  static const _Float16 f2c;
-  volatile _Float16 f3c;
-public:
-  C1(_Float16 arg) : f1c(arg), f3c(arg) { }
-  _Float16 func1c(_Float16 arg ) {
-    return f1c + arg;
-  }
-  static _Float16 func2c(_Float16 arg) {
-    return arg * C1::f2c;
-  }
-};
-
-//CHECK:      |-CXXRecordDecl {{.*}} referenced class C1 definition
-//CHECK:      | |-CXXRecordDecl {{.*}} implicit referenced class C1
-//CHECK-NEXT: | |-FieldDecl {{.*}} referenced f1c '_Float16'
-//CHECK-NEXT: | |-VarDecl {{.*}} used f2c 'const _Float16' static
-//CHECK-NEXT: | |-FieldDecl {{.*}} f3c 'volatile _Float16'
-//CHECK-NEXT: | |-AccessSpecDecl
-//CHECK-NEXT: | |-CXXConstructorDecl {{.*}} used C1 'void (_Float16)
-//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
-//CHECK-NEXT: | | |-CXXCtorInitializer Field {{.*}} 'f1c' '_Float16'
-//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
-//CHECK-NEXT: | | |-CXXCtorInitializer Field {{.*}} 'f3c' 'volatile _Float16'
-//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
-//CHECK-NEXT: | | `-CompoundStmt
-//CHECK-NEXT: | |-CXXMethodDecl {{.*}} used func1c '_Float16 (_Float16)
-//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
-//CHECK-NEXT: | | `-CompoundStmt
-//CHECK-NEXT: | |   `-ReturnStmt
-//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT: | |       |-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | |       | `-MemberExpr {{.*}} '_Float16' lvalue ->f1c 0x{{.*}}
-//CHECK-NEXT: | |       |   `-CXXThisExpr {{.*}} 'class C1 *' this
-//CHECK-NEXT: | |       `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | |         `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
-//CHECK-NEXT: | |-CXXMethodDecl {{.*}} used func2c '_Float16 (_Float16)' static
-//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
-//CHECK-NEXT: | | `-CompoundStmt
-//CHECK-NEXT: | |   `-ReturnStmt
-//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '_Float16' '*'
-//CHECK-NEXT: | |       |-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | |       | `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
-//CHECK-NEXT: | |       `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT: | |         `-DeclRefExpr {{.*}} 'const _Float16' lvalue Var 0x{{.*}} 'f2c' 'const _Float16'
-
-
-/*  Template */
-
-template <class C> C func1t(C arg) {
-  return arg * 2.f16;
-}
-
-//CHECK:      |-FunctionTemplateDecl {{.*}} func1t
-//CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} C
-//CHECK-NEXT: | |-FunctionDecl {{.*}} func1t 'C (C)'
-//CHECK-NEXT: | | |-ParmVarDecl {{.*}} referenced arg 'C'
-//CHECK-NEXT: | | `-CompoundStmt
-//CHECK-NEXT: | |   `-ReturnStmt
-//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '<dependent type>' '*'
-//CHECK-NEXT: | |       |-DeclRefExpr {{.*}} 'C' lvalue ParmVar {{.*}} 'arg' 'C'
-//CHECK-NEXT: | |       `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00
-//CHECK-NEXT: | `-FunctionDecl {{.*}} used func1t '_Float16 (_Float16)'
-//CHECK-NEXT: |   |-TemplateArgument type '_Float16'
-//CHECK-NEXT: |   |-ParmVarDecl {{.*}} used arg '_Float16':'_Float16'
-//CHECK-NEXT: |   `-CompoundStmt
-//CHECK-NEXT: |     `-ReturnStmt
-//CHECK-NEXT: |       `-BinaryOperator {{.*}} '_Float16' '*'
-//CHECK-NEXT: |         |-ImplicitCastExpr {{.*}} '_Float16':'_Float16' <LValueToRValue>
-//CHECK-NEXT: |         | `-DeclRefExpr {{.*}} '_Float16':'_Float16' lvalue ParmVar {{.*}} 'arg' '_Float16':'_Float16'
-//CHECK-NEXT: |         `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00
-
-
-template <class C> struct S1 {
-  C mem1;
-};
-
-//CHECK:      |-ClassTemplateDecl {{.*}} S1
-//CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} referenced class depth 0 index 0 C
-//CHECK-NEXT: | |-CXXRecordDecl {{.*}} struct S1 definition
-//CHECK:      | | |-CXXRecordDecl {{.*}} implicit struct S1
-//CHECK-NEXT: | | `-FieldDecl {{.*}} mem1 'C'
-//CHECK-NEXT: | `-ClassTemplateSpecialization {{.*}} 'S1'
-
-template <> struct S1<_Float16> {
-  _Float16 mem2;
-};
-
-
-/* Local */
-
-extern int printf (const char *__restrict __format, ...);
-
-int main(void) {
-  _Float16 f1l = 1e3f16;
-//CHECK:       | `-VarDecl {{.*}} used f1l '_Float16' cinit
-//CHECK-NEXT:  |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+03
-
-  _Float16 f2l = -0.f16;
-//CHECK:       | `-VarDecl {{.*}} used f2l '_Float16' cinit
-//CHECK-NEXT:  |   `-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} '_Float16' 0.000000e+00
-
-  _Float16 f3l = 1.000976562;
-//CHECK:       | `-VarDecl {{.*}} used f3l '_Float16' cinit
-//CHECK-NEXT:  |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} 'double' 1.000977e+00
-
-  C1 c1(f1l);
-//CHECK:       | `-VarDecl{{.*}} used c1 'class C1' callinit
-//CHECK-NEXT:  |   `-CXXConstructExpr {{.*}} 'class C1' 'void (_Float16)
-//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var 0x{{.*}} 'f1l' '_Float16'
-
-  S1<_Float16> s1 = { 132.f16 };
-//CHECK:       | `-VarDecl {{.*}} used s1 'S1<_Float16>':'struct S1<_Float16>' cinit
-//CHECK-NEXT:  |   `-InitListExpr {{.*}} 'S1<_Float16>':'struct S1<_Float16>'
-//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} '_Float16' 1.320000e+02
-
-  _Float16 f4l = func1n(f1l)  + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
-    func1t(f1l) + s1.mem2 - f1n + f2n;
-//CHECK:       | `-VarDecl {{.*}} used f4l '_Float16' cinit
-//CHECK-NEXT:  |   `-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     |-BinaryOperator {{.*}} '_Float16' '-'
-//CHECK-NEXT:  |     | |-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     | | |-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     | | | |-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     | | | | |-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     | | | | | |-BinaryOperator {{.*}} '_Float16' '+'
-//CHECK-NEXT:  |     | | | | | | |-CallExpr {{.*}} '_Float16'
-//CHECK-NEXT:  |     | | | | | | | |-ImplicitCastExpr {{.*}} 'const volatile _Float16 (*)(const _Float16 &)' <FunctionToPointerDecay>
-//CHECK-NEXT:  |     | | | | | | | | `-DeclRefExpr {{.*}} 'const volatile _Float16 (const _Float16 &)' lvalue Function {{.*}} 'func1n' 'const volatile _Float16 (const _Float16 &)'
-//CHECK-NEXT:  |     | | | | | | | `-ImplicitCastExpr {{.*}} 'const _Float16' lvalue <NoOp>
-//CHECK-NEXT:  |     | | | | | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
-//CHECK-NEXT:  |     | | | | | | `-CallExpr {{.*}} '_Float16'
-//CHECK-NEXT:  |     | | | | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
-//CHECK-NEXT:  |     | | | | | |   | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1f' '_Float16 (_Float16)'
-//CHECK-NEXT:  |     | | | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     | | | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16'
-//CHECK-NEXT:  |     | | | | | `-CXXMemberCallExpr {{.*}} '_Float16'
-//CHECK-NEXT:  |     | | | | |   |-MemberExpr {{.*}} '<bound member function type>' .func1c {{.*}}
-//CHECK-NEXT:  |     | | | | |   | `-DeclRefExpr {{.*}} 'class C1' lvalue Var {{.*}} 'c1' 'class C1'
-//CHECK-NEXT:  |     | | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     | | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f3l' '_Float16'
-//CHECK-NEXT:  |     | | | | `-CallExpr {{.*}} '_Float16'
-//CHECK-NEXT:  |     | | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
-//CHECK-NEXT:  |     | | | |   | `-MemberExpr {{.*}} '_Float16 (_Float16)' lvalue .func2c {{.*}}
-//CHECK-NEXT:  |     | | | |   |   `-DeclRefExpr {{.*}} 'class C1' lvalue Var {{.*}} 'c1' 'class C1'
-//CHECK-NEXT:  |     | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
-//CHECK-NEXT:  |     | | | `-CallExpr {{.*}} '_Float16':'_Float16'
-//CHECK-NEXT:  |     | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
-//CHECK-NEXT:  |     | | |   | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t')
-//CHECK-NEXT:  |     | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
-//CHECK-NEXT:  |     | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     | |   `-MemberExpr {{.*}} '_Float16' lvalue .mem2 {{.*}}
-//CHECK-NEXT:  |     | |     `-DeclRefExpr {{.*}} 'S1<_Float16>':'struct S1<_Float16>' lvalue Var {{.*}} 's1' 'S1<_Float16>':'struct S1<_Float16>'
-//CHECK-NEXT:  |     | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |     |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1n' '_Float16'
-//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
-
-  auto f5l = -1.f16, *f6l = &f2l, f7l = func1t(f3l);
-//CHECK:       | |-VarDecl {{.*}} f5l '_Float16':'_Float16' cinit
-//CHECK-NEXT:  | | `-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT:  | |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
-//CHECK-NEXT:  | |-VarDecl {{.*}} f6l '_Float16 *' cinit
-//CHECK-NEXT:  | | `-UnaryOperator {{.*}} '_Float16 *' prefix '&'
-//CHECK-NEXT:  | |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16'
-//CHECK-NEXT:  | `-VarDecl {{.*}} f7l '_Float16':'_Float16' cinit
-//CHECK-NEXT:  |   `-CallExpr {{.*}} '_Float16':'_Float16'
-//CHECK-NEXT:  |     |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
-//CHECK-NEXT:  |     | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t')
-//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f3l' '_Float16'
-
-  _Float16 f8l = f4l++;
-//CHECK:       | `-VarDecl {{.*}} f8l '_Float16' cinit
-//CHECK-NEXT:  |   `-UnaryOperator {{.*}} '_Float16' postfix '++'
-//CHECK-NEXT:  |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f4l' '_Float16'
-
-  _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
-//CHECK:       `-VarDecl {{.*}} arr1l '_Float16 [3]' cinit
-//CHECK-NEXT:    `-InitListExpr {{.*}} '_Float16 [3]'
-//CHECK-NEXT:      |-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT:      | `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
-//CHECK-NEXT:      |-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT:      | `-FloatingLiteral {{.*}} '_Float16' 0.000000e+00
-//CHECK-NEXT:      `-UnaryOperator {{.*}} '_Float16' prefix '-'
-//CHECK-NEXT:        `-FloatingLiteral {{.*}} '_Float16' 1.100000e+01
-
-  float cvtf = f2n;
-//CHECK:       `-VarDecl {{.*}} cvtf 'float' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
-//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
-
-  double cvtd = f2n;
-//CHECK:       `-VarDecl {{.*}} cvtd 'double' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'double' <FloatingCast>
-//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
-
-  long double cvtld = f2n;
-//CHECK:       `-VarDecl {{.*}} cvtld 'long double' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'long double' <FloatingCast>
-//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
-//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
-
-  _Float16 f2h = 42.0f;
-//CHECK:       `-VarDecl {{.*}} f2h '_Float16' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'float' 4.200000e+01
-
-  _Float16 d2h = 42.0;
-//CHECK:       `-VarDecl {{.*}} d2h '_Float16' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'double' 4.200000e+01
-
-  _Float16 ld2h = 42.0l;
-//CHECK:       `-VarDecl {{.*}} ld2h '_Float16' cinit
-//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
-//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'long double' 4.200000e+01
-}
+// RUN: %clang_cc1 -std=c++11 -ast-dump %s | FileCheck %s --strict-whitespace
+// RUN: %clang_cc1 -std=c++11 -ast-dump -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace
+
+/*  Various contexts where type _Float16 can appear. */
+
+/*  Namespace */
+namespace {
+  _Float16 f1n;
+  _Float16 f2n = 33.f16;
+  _Float16 arr1n[10];
+  _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
+  const volatile _Float16 func1n(const _Float16 &arg) {
+    return arg + f2n + arr1n[4] - arr2n[1];
+  }
+}
+
+//CHECK:      |-NamespaceDecl
+//CHECK-NEXT: | |-VarDecl {{.*}} f1n '_Float16'
+//CHECK-NEXT: | |-VarDecl {{.*}} f2n '_Float16' cinit
+//CHECK-NEXT: | | `-FloatingLiteral {{.*}} '_Float16' 3.300000e+01
+//CHECK-NEXT: | |-VarDecl {{.*}} arr1n '_Float16 [10]'
+//CHECK-NEXT: | |-VarDecl {{.*}} arr2n '_Float16 [3]' cinit
+//CHECK-NEXT: | | `-InitListExpr {{.*}} '_Float16 [3]'
+//CHECK-NEXT: | |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: | |   | `-FloatingLiteral {{.*}} 'double' 1.200000e+00
+//CHECK-NEXT: | |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: | |   | `-FloatingLiteral {{.*}} 'double' 3.000000e+00
+//CHECK-NEXT: | |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: | |     `-FloatingLiteral {{.*}} 'double' 3.000000e+04
+//CHECK-NEXT: | `-FunctionDecl {{.*}} func1n 'const volatile _Float16 (const _Float16 &)'
+
+/* File */
+_Float16 f1f;
+_Float16 f2f = 32.4;
+_Float16 arr1f[10];
+_Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
+_Float16 func1f(_Float16 arg);
+
+//CHECK:      |-VarDecl {{.*}} f1f '_Float16'
+//CHECK-NEXT: |-VarDecl {{.*}} f2f '_Float16' cinit
+//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: |   `-FloatingLiteral {{.*}} 'double' 3.240000e+01
+//CHECK-NEXT: |-VarDecl {{.*}} arr1f '_Float16 [10]'
+//CHECK-NEXT: |-VarDecl {{.*}} arr2f '_Float16 [3]' cinit
+//CHECK-NEXT: | `-InitListExpr {{.*}} '_Float16 [3]'
+//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: |   | `-UnaryOperator {{.*}} 'double' prefix '-'
+//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} 'double' 1.200000e+00
+//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: |   | `-UnaryOperator {{.*}} 'double' prefix '-'
+//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} 'double' 3.000000e+00
+//CHECK-NEXT: |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT: |     `-UnaryOperator {{.*}} 'double' prefix '-'
+//CHECK-NEXT: |       `-FloatingLiteral {{.*}} 'double' 3.000000e+04
+//CHECK-NEXT: |-FunctionDecl {{.*}} func1f '_Float16 (_Float16)'
+//CHECK-NEXT: | `-ParmVarDecl {{.*}} arg '_Float16'
+
+
+// Mixing __fp16 and Float16 types:
+// The _Float16 type is first converted to __fp16 type and then the operation
+// is completed as if both operands were of __fp16 type.
+
+__fp16 B = -0.1;
+auto C = -1.0f16 + B;
+
+// When we do *not* have native half types, we expect __fp16 to be promoted to
+// float, and consequently also _Float16 promotions to float:
+
+//CHECK:      -VarDecl {{.*}} used B '__fp16' cinit
+//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
+//CHECK-NEXT: |   `-UnaryOperator {{.*}} 'double' prefix '-'
+//CHECK-NEXT: |     `-FloatingLiteral {{.*}} 'double' 1.000000e-01
+//CHECK-NEXT: |-VarDecl {{.*}} C 'float':'float' cinit
+//CHECK-NEXT: | `-BinaryOperator {{.*}} 'float' '+'
+//CHECK-NEXT: |   |-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
+//CHECK-NEXT: |   | `-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT: |   |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
+//CHECK-NEXT: |   `-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
+//CHECK-NEXT: |     `-ImplicitCastExpr {{.*}} '__fp16' <LValueToRValue>
+//CHECK-NEXT: |       `-DeclRefExpr {{.*}} '__fp16' lvalue Var 0x{{.*}} 'B' '__fp16'
+
+// When do have native half types, we expect to see promotions to fp16:
+
+//CHECK-NATIVE: |-VarDecl {{.*}} used B '__fp16' cinit
+//CHECK-NATIVE: | `-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
+//CHECK-NATIVE: |   `-UnaryOperator {{.*}} 'double' prefix '-'
+//CHECK-NATIVE: |     `-FloatingLiteral {{.*}} 'double' 1.000000e-01
+//CHECK-NATIVE: |-VarDecl {{.*}} C '__fp16':'__fp16' cinit
+//CHECK-NATIVE: | `-BinaryOperator {{.*}} '__fp16' '+'
+//CHECK-NATIVE: |   |-ImplicitCastExpr {{.*}} '__fp16' <FloatingCast>
+//CHECK-NATIVE: |   | `-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NATIVE: |   |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
+//CHECK-NATIVE: |   `-ImplicitCastExpr {{.*}} '__fp16' <LValueToRValue>
+//CHECK-NATIVE: |     `-DeclRefExpr {{.*}} '__fp16' lvalue Var 0x{{.*}} 'B' '__fp16'
+
+
+/* Class */
+
+class C1 {
+  _Float16 f1c;
+  static const _Float16 f2c;
+  volatile _Float16 f3c;
+public:
+  C1(_Float16 arg) : f1c(arg), f3c(arg) { }
+  _Float16 func1c(_Float16 arg ) {
+    return f1c + arg;
+  }
+  static _Float16 func2c(_Float16 arg) {
+    return arg * C1::f2c;
+  }
+};
+
+//CHECK:      |-CXXRecordDecl {{.*}} referenced class C1 definition
+//CHECK:      | |-CXXRecordDecl {{.*}} implicit referenced class C1
+//CHECK-NEXT: | |-FieldDecl {{.*}} referenced f1c '_Float16'
+//CHECK-NEXT: | |-VarDecl {{.*}} used f2c 'const _Float16' static
+//CHECK-NEXT: | |-FieldDecl {{.*}} f3c 'volatile _Float16'
+//CHECK-NEXT: | |-AccessSpecDecl
+//CHECK-NEXT: | |-CXXConstructorDecl {{.*}} used C1 'void (_Float16)
+//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
+//CHECK-NEXT: | | |-CXXCtorInitializer Field {{.*}} 'f1c' '_Float16'
+//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
+//CHECK-NEXT: | | |-CXXCtorInitializer Field {{.*}} 'f3c' 'volatile _Float16'
+//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
+//CHECK-NEXT: | | `-CompoundStmt
+//CHECK-NEXT: | |-CXXMethodDecl {{.*}} used func1c '_Float16 (_Float16)
+//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
+//CHECK-NEXT: | | `-CompoundStmt
+//CHECK-NEXT: | |   `-ReturnStmt
+//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT: | |       |-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | |       | `-MemberExpr {{.*}} '_Float16' lvalue ->f1c 0x{{.*}}
+//CHECK-NEXT: | |       |   `-CXXThisExpr {{.*}} 'C1 *' this
+//CHECK-NEXT: | |       `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | |         `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
+//CHECK-NEXT: | |-CXXMethodDecl {{.*}} used func2c '_Float16 (_Float16)' static
+//CHECK-NEXT: | | |-ParmVarDecl {{.*}} used arg '_Float16'
+//CHECK-NEXT: | | `-CompoundStmt
+//CHECK-NEXT: | |   `-ReturnStmt
+//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '_Float16' '*'
+//CHECK-NEXT: | |       |-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | |       | `-DeclRefExpr {{.*}} '_Float16' lvalue ParmVar 0x{{.*}} 'arg' '_Float16'
+//CHECK-NEXT: | |       `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT: | |         `-DeclRefExpr {{.*}} 'const _Float16' lvalue Var 0x{{.*}} 'f2c' 'const _Float16'
+
+
+/*  Template */
+
+template <class C> C func1t(C arg) {
+  return arg * 2.f16;
+}
+
+//CHECK:      |-FunctionTemplateDecl {{.*}} func1t
+//CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} C
+//CHECK-NEXT: | |-FunctionDecl {{.*}} func1t 'C (C)'
+//CHECK-NEXT: | | |-ParmVarDecl {{.*}} referenced arg 'C'
+//CHECK-NEXT: | | `-CompoundStmt
+//CHECK-NEXT: | |   `-ReturnStmt
+//CHECK-NEXT: | |     `-BinaryOperator {{.*}} '<dependent type>' '*'
+//CHECK-NEXT: | |       |-DeclRefExpr {{.*}} 'C' lvalue ParmVar {{.*}} 'arg' 'C'
+//CHECK-NEXT: | |       `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00
+//CHECK-NEXT: | `-FunctionDecl {{.*}} used func1t '_Float16 (_Float16)'
+//CHECK-NEXT: |   |-TemplateArgument type '_Float16'
+//CHECK-NEXT: |   |-ParmVarDecl {{.*}} used arg '_Float16':'_Float16'
+//CHECK-NEXT: |   `-CompoundStmt
+//CHECK-NEXT: |     `-ReturnStmt
+//CHECK-NEXT: |       `-BinaryOperator {{.*}} '_Float16' '*'
+//CHECK-NEXT: |         |-ImplicitCastExpr {{.*}} '_Float16':'_Float16' <LValueToRValue>
+//CHECK-NEXT: |         | `-DeclRefExpr {{.*}} '_Float16':'_Float16' lvalue ParmVar {{.*}} 'arg' '_Float16':'_Float16'
+//CHECK-NEXT: |         `-FloatingLiteral {{.*}} '_Float16' 2.000000e+00
+
+
+template <class C> struct S1 {
+  C mem1;
+};
+
+//CHECK:      |-ClassTemplateDecl {{.*}} S1
+//CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} referenced class depth 0 index 0 C
+//CHECK-NEXT: | |-CXXRecordDecl {{.*}} struct S1 definition
+//CHECK:      | | |-CXXRecordDecl {{.*}} implicit struct S1
+//CHECK-NEXT: | | `-FieldDecl {{.*}} mem1 'C'
+//CHECK-NEXT: | `-ClassTemplateSpecialization {{.*}} 'S1'
+
+template <> struct S1<_Float16> {
+  _Float16 mem2;
+};
+
+
+/* Local */
+
+extern int printf (const char *__restrict __format, ...);
+
+int main(void) {
+  _Float16 f1l = 1e3f16;
+//CHECK:       | `-VarDecl {{.*}} used f1l '_Float16' cinit
+//CHECK-NEXT:  |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+03
+
+  _Float16 f2l = -0.f16;
+//CHECK:       | `-VarDecl {{.*}} used f2l '_Float16' cinit
+//CHECK-NEXT:  |   `-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} '_Float16' 0.000000e+00
+
+  _Float16 f3l = 1.000976562;
+//CHECK:       | `-VarDecl {{.*}} used f3l '_Float16' cinit
+//CHECK-NEXT:  |   `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} 'double' 1.000977e+00
+
+  C1 c1(f1l);
+//CHECK:       | `-VarDecl{{.*}} used c1 'C1' callinit
+//CHECK-NEXT:  |   `-CXXConstructExpr {{.*}} 'C1' 'void (_Float16)
+//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var 0x{{.*}} 'f1l' '_Float16'
+
+  S1<_Float16> s1 = { 132.f16 };
+//CHECK:       | `-VarDecl {{.*}} used s1 'S1<_Float16>':'S1<_Float16>' cinit
+//CHECK-NEXT:  |   `-InitListExpr {{.*}} 'S1<_Float16>':'S1<_Float16>'
+//CHECK-NEXT:  |     `-FloatingLiteral {{.*}} '_Float16' 1.320000e+02
+
+  _Float16 f4l = func1n(f1l)  + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
+    func1t(f1l) + s1.mem2 - f1n + f2n;
+//CHECK:       | `-VarDecl {{.*}} used f4l '_Float16' cinit
+//CHECK-NEXT:  |   `-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     |-BinaryOperator {{.*}} '_Float16' '-'
+//CHECK-NEXT:  |     | |-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     | | |-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     | | | |-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     | | | | |-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     | | | | | |-BinaryOperator {{.*}} '_Float16' '+'
+//CHECK-NEXT:  |     | | | | | | |-CallExpr {{.*}} '_Float16'
+//CHECK-NEXT:  |     | | | | | | | |-ImplicitCastExpr {{.*}} 'const volatile _Float16 (*)(const _Float16 &)' <FunctionToPointerDecay>
+//CHECK-NEXT:  |     | | | | | | | | `-DeclRefExpr {{.*}} 'const volatile _Float16 (const _Float16 &)' lvalue Function {{.*}} 'func1n' 'const volatile _Float16 (const _Float16 &)'
+//CHECK-NEXT:  |     | | | | | | | `-ImplicitCastExpr {{.*}} 'const _Float16' lvalue <NoOp>
+//CHECK-NEXT:  |     | | | | | | |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
+//CHECK-NEXT:  |     | | | | | | `-CallExpr {{.*}} '_Float16'
+//CHECK-NEXT:  |     | | | | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
+//CHECK-NEXT:  |     | | | | | |   | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1f' '_Float16 (_Float16)'
+//CHECK-NEXT:  |     | | | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     | | | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16'
+//CHECK-NEXT:  |     | | | | | `-CXXMemberCallExpr {{.*}} '_Float16'
+//CHECK-NEXT:  |     | | | | |   |-MemberExpr {{.*}} '<bound member function type>' .func1c {{.*}}
+//CHECK-NEXT:  |     | | | | |   | `-DeclRefExpr {{.*}} 'C1' lvalue Var {{.*}} 'c1' 'C1'
+//CHECK-NEXT:  |     | | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     | | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f3l' '_Float16'
+//CHECK-NEXT:  |     | | | | `-CallExpr {{.*}} '_Float16'
+//CHECK-NEXT:  |     | | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
+//CHECK-NEXT:  |     | | | |   | `-MemberExpr {{.*}} '_Float16 (_Float16)' lvalue .func2c {{.*}}
+//CHECK-NEXT:  |     | | | |   |   `-DeclRefExpr {{.*}} 'C1' lvalue Var {{.*}} 'c1' 'C1'
+//CHECK-NEXT:  |     | | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     | | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
+//CHECK-NEXT:  |     | | | `-CallExpr {{.*}} '_Float16':'_Float16'
+//CHECK-NEXT:  |     | | |   |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
+//CHECK-NEXT:  |     | | |   | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t')
+//CHECK-NEXT:  |     | | |   `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     | | |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1l' '_Float16'
+//CHECK-NEXT:  |     | | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     | |   `-MemberExpr {{.*}} '_Float16' lvalue .mem2 {{.*}}
+//CHECK-NEXT:  |     | |     `-DeclRefExpr {{.*}} 'S1<_Float16>':'S1<_Float16>' lvalue Var {{.*}} 's1' 'S1<_Float16>':'S1<_Float16>'
+//CHECK-NEXT:  |     | `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |     |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f1n' '_Float16'
+//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
+
+  auto f5l = -1.f16, *f6l = &f2l, f7l = func1t(f3l);
+//CHECK:       | |-VarDecl {{.*}} f5l '_Float16':'_Float16' cinit
+//CHECK-NEXT:  | | `-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT:  | |   `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
+//CHECK-NEXT:  | |-VarDecl {{.*}} f6l '_Float16 *' cinit
+//CHECK-NEXT:  | | `-UnaryOperator {{.*}} '_Float16 *' prefix '&'
+//CHECK-NEXT:  | |   `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2l' '_Float16'
+//CHECK-NEXT:  | `-VarDecl {{.*}} f7l '_Float16':'_Float16' cinit
+//CHECK-NEXT:  |   `-CallExpr {{.*}} '_Float16':'_Float16'
+//CHECK-NEXT:  |     |-ImplicitCastExpr {{.*}} '_Float16 (*)(_Float16)' <FunctionToPointerDecay>
+//CHECK-NEXT:  |     | `-DeclRefExpr {{.*}} '_Float16 (_Float16)' lvalue Function {{.*}} 'func1t' '_Float16 (_Float16)' (FunctionTemplate {{.*}} 'func1t')
+//CHECK-NEXT:  |     `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:  |       `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f3l' '_Float16'
+
+  _Float16 f8l = f4l++;
+//CHECK:       | `-VarDecl {{.*}} f8l '_Float16' cinit
+//CHECK-NEXT:  |   `-UnaryOperator {{.*}} '_Float16' postfix '++'
+//CHECK-NEXT:  |     `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f4l' '_Float16'
+
+  _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
+//CHECK:       `-VarDecl {{.*}} arr1l '_Float16 [3]' cinit
+//CHECK-NEXT:    `-InitListExpr {{.*}} '_Float16 [3]'
+//CHECK-NEXT:      |-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT:      | `-FloatingLiteral {{.*}} '_Float16' 1.000000e+00
+//CHECK-NEXT:      |-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT:      | `-FloatingLiteral {{.*}} '_Float16' 0.000000e+00
+//CHECK-NEXT:      `-UnaryOperator {{.*}} '_Float16' prefix '-'
+//CHECK-NEXT:        `-FloatingLiteral {{.*}} '_Float16' 1.100000e+01
+
+  float cvtf = f2n;
+//CHECK:       `-VarDecl {{.*}} cvtf 'float' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'float' <FloatingCast>
+//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
+
+  double cvtd = f2n;
+//CHECK:       `-VarDecl {{.*}} cvtd 'double' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'double' <FloatingCast>
+//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
+
+  long double cvtld = f2n;
+//CHECK:       `-VarDecl {{.*}} cvtld 'long double' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} 'long double' <FloatingCast>
+//CHECK-NEXT:      `-ImplicitCastExpr {{.*}} '_Float16' <LValueToRValue>
+//CHECK-NEXT:        `-DeclRefExpr {{.*}} '_Float16' lvalue Var {{.*}} 'f2n' '_Float16'
+
+  _Float16 f2h = 42.0f;
+//CHECK:       `-VarDecl {{.*}} f2h '_Float16' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'float' 4.200000e+01
+
+  _Float16 d2h = 42.0;
+//CHECK:       `-VarDecl {{.*}} d2h '_Float16' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'double' 4.200000e+01
+
+  _Float16 ld2h = 42.0l;
+//CHECK:       `-VarDecl {{.*}} ld2h '_Float16' cinit
+//CHECK-NEXT:    `-ImplicitCastExpr {{.*}} '_Float16' <FloatingCast>
+//CHECK-NEXT:      `-FloatingLiteral {{.*}} 'long double' 4.200000e+01
+}
diff --git a/test/Frontend/gnu-mcount.c b/test/Frontend/gnu-mcount.c
index 9872c0d..3953d24 100644
--- a/test/Frontend/gnu-mcount.c
+++ b/test/Frontend/gnu-mcount.c
@@ -43,36 +43,36 @@
 
 // CHECK-LABEL: f
 // TODO: add profiling support for arm-baremetal
-// CHECK-ARM-BAREMETAL-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01mcount"{{.*}} }
-// CHECK-ARM-BAREMETAL-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-BAREMETAL-EABI: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM64-BAREMETAL-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01_mcount"{{.*}} }
-// CHECK-ARM-IOS-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="_mcount"{{.*}} }
-// CHECK-ARM-IOS-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01mcount"{{.*}} }
-// CHECK-ARM-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM64-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01_mcount"{{.*}} }
-// CHECK-ARM64-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI-LINUX: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01_mcount"{{.*}} }
-// CHECK-ARM-EABI-FREEBSD: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="__mcount"{{.*}} }
-// CHECK-ARM-EABI-FREEBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI-FREEBSD: attributes #{{[0-9]+}} = { {{.*}}"counting-function"=".mcount"{{.*}} }
-// CHECK-ARM64-EABI-FREEBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI-NETBSD: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="_mcount"{{.*}} }
-// CHECK-ARM-EABI-NETBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI-OPENBSD: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="__mcount"{{.*}} }
-// CHECK-ARM-EABI-OPENBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI-OPENBSD: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="__mcount"{{.*}} }
-// CHECK-ARM64-EABI-OPENBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI-MEABI-GNU-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI-RTEMS: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM-EABI-RTEMS-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI-RTEMS: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM64-EABI-RTEMS-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM-EABI-CLOUDABI: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM-EABI-CLOUDABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
-// CHECK-ARM64-EABI-CLOUDABI: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="mcount"{{.*}} }
-// CHECK-ARM64-EABI-CLOUDABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"counting-function"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-BAREMETAL-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01mcount"{{.*}} }
+// CHECK-ARM-BAREMETAL-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-BAREMETAL-EABI: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM64-BAREMETAL-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01_mcount"{{.*}} }
+// CHECK-ARM-IOS-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="_mcount"{{.*}} }
+// CHECK-ARM-IOS-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01mcount"{{.*}} }
+// CHECK-ARM-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM64-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01_mcount"{{.*}} }
+// CHECK-ARM64-EABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI-LINUX: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01_mcount"{{.*}} }
+// CHECK-ARM-EABI-FREEBSD: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="__mcount"{{.*}} }
+// CHECK-ARM-EABI-FREEBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI-FREEBSD: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"=".mcount"{{.*}} }
+// CHECK-ARM64-EABI-FREEBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI-NETBSD: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="_mcount"{{.*}} }
+// CHECK-ARM-EABI-NETBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI-OPENBSD: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="__mcount"{{.*}} }
+// CHECK-ARM-EABI-OPENBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI-OPENBSD: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="__mcount"{{.*}} }
+// CHECK-ARM64-EABI-OPENBSD-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI-MEABI-GNU-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM-EABI-MEABI-GNU: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI-RTEMS: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM-EABI-RTEMS-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI-RTEMS: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM64-EABI-RTEMS-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM-EABI-CLOUDABI: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM-EABI-CLOUDABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
+// CHECK-ARM64-EABI-CLOUDABI: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="mcount"{{.*}} }
+// CHECK-ARM64-EABI-CLOUDABI-NOT: attributes #{{[0-9]+}} = { {{.*}}"instrument-function-entry-inlined"="\01__gnu_mcount_nc"{{.*}} }
 
diff --git a/test/Frontend/optimization-remark-with-hotness.c b/test/Frontend/optimization-remark-with-hotness.c
index 5e31ce9..6d3ab06 100644
--- a/test/Frontend/optimization-remark-with-hotness.c
+++ b/test/Frontend/optimization-remark-with-hotness.c
@@ -11,18 +11,22 @@
 // RUN:     -fprofile-instrument-use-path=%t.profdata -Rpass=inline \
 // RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
 // RUN:     -fdiagnostics-show-hotness -verify
+// The clang version of the previous test.
+// RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \
+// RUN:     -fprofile-instr-use=%t.profdata -Rpass=inline \
+// RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
+// RUN:     -fdiagnostics-show-hotness -Xclang -verify
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \
 // RUN:     optimization-remark-with-hotness.c %s -emit-llvm-only \
 // RUN:     -fprofile-sample-use=%t-sample.profdata -Rpass=inline \
 // RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
 // RUN:     -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \
 // RUN:     -verify
-// The clang version of the previous test.
-// RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \
-// RUN:     -fprofile-instr-use=%t.profdata -Rpass=inline \
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \
+// RUN:     optimization-remark-with-hotness.c %s -emit-llvm-only \
+// RUN:     -fprofile-instrument-use-path=%t.profdata -Rpass=inline \
 // RUN:     -Rpass-analysis=inline -Rpass-missed=inline \
-// RUN:     -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \
-// RUN:     -Xclang -verify
+// RUN:     -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 -verify
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \
 // RUN:     optimization-remark-with-hotness.c %s -emit-llvm-only \
 // RUN:     -fprofile-instrument-use-path=%t.profdata -Rpass=inline \
@@ -56,13 +60,13 @@
   // THRESHOLD-NOT: hotness
   // NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information
   // NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information
-  // expected-remark@+1 {{foo inlined into bar with cost=always}}
+  // expected-remark@+1 {{foo inlined into bar with cost=always (hotness:}}
   sum += foo(x, x - 2);
 }
 
 int main(int argc, const char *argv[]) {
   for (int i = 0; i < 30; i++)
-    // expected-remark@+1 {{bar not inlined into main because it should never be inlined}}
+    // expected-remark@+1 {{bar not inlined into main because it should never be inlined (cost=never) (hotness:}}
     bar(argc);
   return sum;
 }
diff --git a/test/Frontend/region-pragmas.c b/test/Frontend/region-pragmas.c
new file mode 100644
index 0000000..8d929e8
--- /dev/null
+++ b/test/Frontend/region-pragmas.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -Wall -verify %s
+// expected-no-diagnostics
+
+#pragma region foo
+#pragma endregion foo
diff --git a/test/Frontend/rewrite-includes-messages.c b/test/Frontend/rewrite-includes-messages.c
index f93fe72..a3a5cb2 100644
--- a/test/Frontend/rewrite-includes-messages.c
+++ b/test/Frontend/rewrite-includes-messages.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -E -frewrite-includes %s -I%S/Inputs/ | %clang_cc1 -Wall -fsyntax-only -Wunused-macros -x c - 2>&1 > %t.1
 // RUN: %clang_cc1 -I%S/Inputs/ -Wall -Wunused-macros -fsyntax-only %s 2>&1 > %t.2
-// RUN: diff %t.1 %t.2 -u
+// RUN: diff -u %t.1 %t.2
 // expected-no-diagnostics
 
 #include "rewrite-includes-messages.h"
diff --git a/test/Frontend/unknown-arg.c b/test/Frontend/unknown-arg.c
new file mode 100644
index 0000000..00f2da6
--- /dev/null
+++ b/test/Frontend/unknown-arg.c
@@ -0,0 +1,9 @@
+// RUN: not %clang_cc1 %s -E --helium 2>&1 | \
+// RUN: FileCheck %s
+// RUN: not %clang_cc1 %s -E --hel[ 2>&1 | \
+// RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
+// RUN: not %clang %s -E -Xclang --hel[ 2>&1 | \
+// RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
+
+// CHECK: error: unknown argument: '--helium'
+// DID-YOU-MEAN: error: unknown argument '--hel[', did you mean '--help'?
diff --git a/test/Frontend/verify-prefixes.c b/test/Frontend/verify-prefixes.c
new file mode 100644
index 0000000..c5b545c
--- /dev/null
+++ b/test/Frontend/verify-prefixes.c
@@ -0,0 +1,118 @@
+#if GC
+# define GCONST const
+#else
+# define GCONST
+#endif
+
+// gconst-note@8 {{variable 'glb' declared const here}}
+GCONST int glb = 5;
+
+
+// Check various correct prefix spellings and combinations.
+//
+// RUN: %clang_cc1             -DGC           -verify=gconst                 %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC      -verify=lconst                 %s
+// RUN: %clang_cc1                       -DSC -verify=expected               %s
+// RUN: %clang_cc1                       -DSC -verify                        %s
+// RUN: %clang_cc1                       -DSC -verify -verify                %s
+// RUN: %clang_cc1                            -verify=nconst                 %s
+// RUN: %clang_cc1                            -verify=n-const                %s
+// RUN: %clang_cc1                            -verify=n_const                %s
+// RUN: %clang_cc1                            -verify=NConst                 %s
+// RUN: %clang_cc1                            -verify=NConst2                %s
+// RUN: %clang_cc1 -Wcast-qual -DGC -DLC      -verify=gconst,lconst          %s
+// RUN: %clang_cc1 -Wcast-qual -DGC -DLC -DSC -verify=gconst,lconst,expected %s
+// RUN: %clang_cc1 -Wcast-qual -DGC -DLC      -verify=gconst -verify=lconst  %s
+// RUN: %clang_cc1 -Wcast-qual -DGC -DLC -DSC -verify=gconst,lconst -verify  %s
+// RUN: %clang_cc1             -DGC      -DSC -verify -verify=gconst -verify %s
+//
+// Duplicate prefixes.
+// RUN: %clang_cc1 -Wcast-qual -DGC -DLC      -verify=gconst,lconst,gconst         %s
+// RUN: %clang_cc1             -DGC           -verify=gconst -verify=gconst,gconst %s
+// RUN: %clang_cc1                       -DSC -verify=expected -verify=expected    %s
+// RUN: %clang_cc1                       -DSC -verify -verify=expected             %s
+//
+// Various tortured cases: multiple directives with different prefixes per
+// line, prefixes used as comments, prefixes prefixing prefixes, and prefixes
+// with special suffixes.
+// RUN: %clang_cc1 -Wcast-qual      -DLC      -verify=foo                    %s
+// RUN: %clang_cc1                       -DSC -verify=bar                    %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo,bar                %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=bar,foo                %s
+// RUN: %clang_cc1                       -DSC -verify=foo-bar                %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC      -verify=bar-foo                %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo,foo-bar            %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo-bar,foo            %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=bar,bar-foo            %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=bar-foo,bar            %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo-bar,bar-foo        %s
+// RUN: %clang_cc1                       -DSC -verify=foo-warning            %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC      -verify=bar-warning-re         %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo,foo-warning        %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=foo-warning,foo        %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=bar,bar-warning-re     %s
+// RUN: %clang_cc1 -Wcast-qual      -DLC -DSC -verify=bar-warning-re,bar     %s
+
+
+// Check invalid prefixes.  Check that there's no additional output, which
+// might indicate that diagnostic verification became enabled even though it
+// was requested incorrectly.  Check that prefixes are reported in command-line
+// order.
+//
+// RUN: not %clang_cc1 -verify=5abc,-xy,foo,_k -verify='#a,b$' %s 2> %t
+// RUN: FileCheck --check-prefixes=ERR %s < %t
+//
+// ERR-NOT:  {{.}}
+// ERR:      error: invalid value '5abc' in '-verify='
+// ERR-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// ERR-NEXT: error: invalid value '-xy' in '-verify='
+// ERR-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// ERR-NEXT: error: invalid value '_k' in '-verify='
+// ERR-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// ERR-NEXT: error: invalid value '#a' in '-verify='
+// ERR-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// ERR-NEXT: error: invalid value 'b$' in '-verify='
+// ERR-NEXT: note: -verify prefixes must start with a letter and contain only alphanumeric characters, hyphens, and underscores
+// ERR-NOT:  {{.}}
+
+
+// Check that our test code actually has expected diagnostics when there's no
+// -verify.
+//
+// RUN: not %clang_cc1 -Wcast-qual -DGC -DLC -DSC %s 2> %t
+// RUN: FileCheck --check-prefix=ALL %s < %t
+//
+// ALL: cannot assign to variable 'glb' with const-qualified type 'const int'
+// ALL: variable 'glb' declared const here
+// ALL: cast from 'const int *' to 'int *' drops const qualifier
+// ALL: initializing 'int *' with an expression of type 'const int *' discards qualifiers
+
+
+#if LC
+# define LCONST const
+#else
+# define LCONST
+#endif
+
+#if SC
+# define SCONST const
+#else
+# define SCONST
+#endif
+
+void foo() {
+  LCONST int loc = 5;
+  SCONST static int sta = 5;
+  // We don't actually expect 1-2 occurrences of this error.  We're just
+  // checking the parsing.
+  glb = 6; // gconst-error1-2 {{cannot assign to variable 'glb' with const-qualified type 'const int'}}
+  *(int*)(&loc) = 6; // lconst-warning {{cast from 'const int *' to 'int *' drops const qualifier}}
+  ; // Code, comments, and many directives with different prefixes per line, including cases where some prefixes (foo and bar) prefix others (such as foo-bar and bar-foo), such that some prefixes appear as normal comments and some have special suffixes (-warning and -re): foo-warning@-1 {{cast from 'const int *' to 'int *' drops const qualifier}} foo-bar-warning@+1 {{initializing 'int *' with an expression of type 'const int *' discards qualifiers}} foo-warning-warning@+1 {{initializing 'int *' with an expression of type 'const int *' discards qualifiers}} bar-warning-re-warning@-1 {{cast from 'const int *' to 'int *' drops const qualifier}} bar-foo-warning@-1 {{cast from 'const int *' to 'int *' drops const qualifier}} bar-warning@+1 {{initializing 'int *' with an expression of type 'const int *' discards qualifiers}}
+  int *p = &sta; // expected-warning {{initializing 'int *' with an expression of type 'const int *' discards qualifiers}}
+}
+
+// nconst-no-diagnostics
+// n-const-no-diagnostics
+// n_const-no-diagnostics
+// NConst-no-diagnostics
+// NConst2-no-diagnostics
diff --git a/test/Frontend/x86-target-cpu.c b/test/Frontend/x86-target-cpu.c
index 4e0db55..b5103d2 100644
--- a/test/Frontend/x86-target-cpu.c
+++ b/test/Frontend/x86-target-cpu.c
@@ -13,7 +13,9 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu skylake-avx512 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu skx -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu cannonlake -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
diff --git a/test/Headers/mm3dnow.c b/test/Headers/mm3dnow.c
new file mode 100644
index 0000000..255483c
--- /dev/null
+++ b/test/Headers/mm3dnow.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
+// expected-no-diagnostics
+
+#if defined(i386) || defined(__x86_64__)
+#include <mm3dnow.h>
+
+int __attribute__((__target__(("3dnow")))) foo(int a) {
+  _m_femms();
+  return 4;
+}
+
+__m64 __attribute__((__target__(("3dnowa")))) bar(__m64 a) {
+  return _m_pf2iw(a);
+}
+#endif
diff --git a/test/Headers/stdbool.cpp b/test/Headers/stdbool.cpp
index 7c927db..0110a45 100644
--- a/test/Headers/stdbool.cpp
+++ b/test/Headers/stdbool.cpp
@@ -1,13 +1,19 @@
-// RUN: %clang_cc1 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT %s
+// RUN: %clang_cc1 -std=gnu++98 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-98 %s
+// RUN: %clang_cc1 -std=gnu++11 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-11 %s
 // RUN: %clang_cc1 -std=c++98 -E -dM %s | FileCheck --check-prefix=CHECK-CONFORMING %s
 // RUN: %clang_cc1 -fsyntax-only -std=gnu++98 -verify -Weverything %s
 #include <stdbool.h>
 #define zzz
 
-// CHECK-GNU-COMPAT: #define _Bool bool
-// CHECK-GNU-COMPAT: #define bool bool
-// CHECK-GNU-COMPAT: #define false false
-// CHECK-GNU-COMPAT: #define true true
+// CHECK-GNU-COMPAT-98: #define _Bool bool
+// CHECK-GNU-COMPAT-98: #define bool bool
+// CHECK-GNU-COMPAT-98: #define false false
+// CHECK-GNU-COMPAT-98: #define true true
+
+// CHECK-GNU-COMPAT-11: #define _Bool bool
+// CHECK-GNU-COMPAT-11-NOT: #define bool bool
+// CHECK-GNU-COMPAT-11-NOT: #define false false
+// CHECK-GNU-COMPAT-11-NOT: #define true true
 
 // CHECK-CONFORMING-NOT: #define _Bool
 // CHECK-CONFORMING: #define __CHAR_BIT__
diff --git a/test/Import/template-specialization/Inputs/T.cpp b/test/Import/template-specialization/Inputs/T.cpp
index b31e243..7eea958 100644
--- a/test/Import/template-specialization/Inputs/T.cpp
+++ b/test/Import/template-specialization/Inputs/T.cpp
@@ -12,3 +12,7 @@
     int g;
   };
 };
+
+
+template <typename T> constexpr int f() { return 0; }
+template <> constexpr int f<int>() { return 4; }
diff --git a/test/Import/template-specialization/test.cpp b/test/Import/template-specialization/test.cpp
index 43996c5..30df818 100644
--- a/test/Import/template-specialization/test.cpp
+++ b/test/Import/template-specialization/test.cpp
@@ -1,7 +1,10 @@
 // RUN: clang-import-test -import %S/Inputs/T.cpp -expression %s
-// XFAIL: *
+
 void expr() {
   A<int>::B b1;
   A<bool>::B b2;
   b1.f + b2.g;
 }
+
+static_assert(f<char>() == 0, "");
+static_assert(f<int>() == 4, "");
diff --git a/test/Index/Core/index-pch.cpp b/test/Index/Core/index-pch.cpp
new file mode 100644
index 0000000..8c5a92b61
--- /dev/null
+++ b/test/Index/Core/index-pch.cpp
@@ -0,0 +1,17 @@
+// RUN: c-index-test core -print-source-symbols -- %s -std=c++14 | FileCheck %s
+// RUN: %clang_cc1 -emit-pch %s -std=c++14 -o %t.pch
+// RUN: c-index-test core -print-source-symbols -module-file %t.pch | FileCheck %s
+
+// CHECK: [[@LINE+2]]:8 | struct(Gen)/C++ | DETECTOR | [[DETECTOR_USR:.*]] | {{.*}} | Def | rel: 0
+template <class _Default, class _AlwaysVoid, template <class...> class _Op, class... _Args>
+struct DETECTOR {
+ using value_t = int;
+};
+
+struct nonesuch {};
+
+// CHECK: [[@LINE+4]]:9 | type-alias/C++ | is_detected
+// CHECK: [[@LINE+3]]:32 | struct(Gen)/C++ | DETECTOR | [[DETECTOR_USR]] | {{.*}} | Ref,RelCont | rel: 1
+// CHECK-NEXT:	RelCont | is_detected
+template <template<class...> class _Op, class... _Args>
+  using is_detected = typename DETECTOR<nonesuch, void, _Op, _Args...>::value_t;
diff --git a/test/Index/Core/index-source.cpp b/test/Index/Core/index-source.cpp
index f9c88d5..6f485fe 100644
--- a/test/Index/Core/index-source.cpp
+++ b/test/Index/Core/index-source.cpp
@@ -201,8 +201,8 @@
 
   template<typename U>
   class InnerClass;
-// CHECK: [[@LINE-1]]:9 | class(Gen)/C++ | InnerClass | c:@S@PseudoOverridesInSpecializations>#d#I@ST>1#T@InnerClass | <no-cgname> | Ref,RelCont,RelSpecialization | rel: 2
-// CHECK-NEXT: RelCont
+// CHECK: [[@LINE-1]]:9 | class(Gen)/C++ | InnerClass | c:@S@PseudoOverridesInSpecializations>#d#I@ST>1#T@InnerClass | <no-cgname> | Decl,RelChild,RelSpecialization | rel: 2
+// CHECK-NEXT: RelChild
 // CHECK-NEXT: RelSpecialization | InnerClass | c:@ST>2#T#T@PseudoOverridesInSpecializations@ST>1#T@InnerClass
 };
 
@@ -274,7 +274,7 @@
 
 template<typename T>
 class SpecializationDecl;
-// CHECK: [[@LINE-1]]:7 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Ref | rel: 0
+// CHECK: [[@LINE-1]]:7 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Decl | rel: 0
 
 template<typename T>
 class SpecializationDecl { };
@@ -525,3 +525,36 @@
 // CHECK-NEXT: RelCont | Nested | c:@N@rd33122110@S@Outer@S@Nested>#I
 // CHECK: [[@LINE-3]]:20 | struct/C++ | Outer | c:@N@rd33122110@S@Outer | <no-cgname> | Ref,RelCont | rel: 1
 // CHECK-NEXT: RelCont | Nested | c:@N@rd33122110@S@Outer@S@Nested>#I
+
+namespace index_offsetof {
+
+struct Struct {
+  int field;
+};
+
+struct Struct2 {
+  Struct array[4][2];
+};
+
+void foo() {
+  __builtin_offsetof(Struct, field);
+// CHECK: [[@LINE-1]]:30 | field/C | field | c:@N@index_offsetof@S@Struct@FI@field | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK-NEXT: RelCont | foo | c:@N@index_offsetof@F@foo#
+  __builtin_offsetof(Struct2, array[1][0].field);
+// CHECK: [[@LINE-1]]:31 | field/C | array | c:@N@index_offsetof@S@Struct2@FI@array | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK-NEXT: RelCont | foo | c:@N@index_offsetof@F@foo#
+// CHECK: [[@LINE-3]]:43 | field/C | field | c:@N@index_offsetof@S@Struct@FI@field | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK-NEXT: RelCont | foo | c:@N@index_offsetof@F@foo#
+}
+
+#define OFFSET_OF_(X, Y) __builtin_offsetof(X, Y)
+
+class SubclassOffsetof : public Struct {
+  void foo() {
+    OFFSET_OF_(SubclassOffsetof, field);
+// CHECK: [[@LINE-1]]:34 | field/C | field | c:@N@index_offsetof@S@Struct@FI@field | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK-NEXT: RelCont | foo | c:@N@index_offsetof@S@SubclassOffsetof@F@foo#
+  }
+};
+
+}
diff --git a/test/Index/Inputs/crash-preamble-classes.h b/test/Index/Inputs/crash-preamble-classes.h
new file mode 100644
index 0000000..a8fb5ce
--- /dev/null
+++ b/test/Index/Inputs/crash-preamble-classes.h
@@ -0,0 +1,9 @@
+struct Incomplete;
+
+struct X : Incomplete {
+  X();
+};
+
+struct Y : X {
+  using X::X;
+};
diff --git a/test/Index/Inputs/record-parsing-invocation-remap.c b/test/Index/Inputs/record-parsing-invocation-remap.c
new file mode 100644
index 0000000..4a32ca6
--- /dev/null
+++ b/test/Index/Inputs/record-parsing-invocation-remap.c
@@ -0,0 +1,2 @@
+
+#pragma clang __debug parser_crash
diff --git a/test/Index/USR/linkage.cpp b/test/Index/USR/linkage.cpp
new file mode 100644
index 0000000..fec80da
--- /dev/null
+++ b/test/Index/USR/linkage.cpp
@@ -0,0 +1,7 @@
+// RUN: c-index-test core -print-source-symbols -- %s | FileCheck %s
+
+// Linkage decls are skipped in USRs for enclosed items.
+// Linkage decls themselves don't have USRs (no lines between ns and X).
+// CHECK: {{[0-9]+}}:11 | namespace/C++ | ns | c:@N@ns |
+// CHECK-NEXT: {{[0-9]+}}:33 | variable/C | X | c:@N@ns@X |
+namespace ns { extern "C" { int X; } }
diff --git a/test/Index/annotate-tokens.cpp b/test/Index/annotate-tokens.cpp
index 460ab51..67bdf63 100644
--- a/test/Index/annotate-tokens.cpp
+++ b/test/Index/annotate-tokens.cpp
@@ -37,7 +37,9 @@
   ~C();
 };
 
-// RUN: c-index-test -test-annotate-tokens=%s:1:1:38:1 %s -fno-delayed-template-parsing | FileCheck %s
+auto test5(X) -> X;
+
+// RUN: c-index-test -test-annotate-tokens=%s:1:1:41:1 %s -std=c++14 -fno-delayed-template-parsing | FileCheck %s
 // CHECK: Keyword: "struct" [1:1 - 1:7] StructDecl=bonk:1:8 (Definition)
 // CHECK: Identifier: "bonk" [1:8 - 1:12] StructDecl=bonk:1:8 (Definition)
 // CHECK: Punctuation: "{" [1:13 - 1:14] StructDecl=bonk:1:8 (Definition)
@@ -184,6 +186,14 @@
 // CHECK: Punctuation: "}" [29:22 - 29:23] CompoundStmt=
 // CHECK: Punctuation: "~" [37:3 - 37:4] CXXDestructor=~C:37:3
 // CHECK: Identifier: "C" [37:4 - 37:5] CXXDestructor=~C:37:3
+// CHECK: Keyword: "auto" [40:1 - 40:5] FunctionDecl=test5:40:6
+// CHECK: Identifier: "test5" [40:6 - 40:11] FunctionDecl=test5:40:6
+// CHECK: Punctuation: "(" [40:11 - 40:12] FunctionDecl=test5:40:6
+// CHECK: Identifier: "X" [40:12 - 40:13] TypeRef=struct X:7:8
+// CHECK: Punctuation: ")" [40:13 - 40:14] FunctionDecl=test5:40:6
+// CHECK: Punctuation: "->" [40:15 - 40:17] FunctionDecl=test5:40:6
+// CHECK: Identifier: "X" [40:18 - 40:19] TypeRef=struct X:7:8
+// CHECK: Punctuation: ";" [40:19 - 40:20]
 
 // RUN: env LIBCLANG_DISABLE_CRASH_RECOVERY=1 c-index-test -test-annotate-tokens=%s:32:1:32:13 %s | FileCheck %s -check-prefix=CHECK2
 // CHECK2: Keyword: "if" [32:3 - 32:5] IfStmt=
diff --git a/test/Index/comment-cplus-decls.cpp b/test/Index/comment-cplus-decls.cpp
index 6e32c60..c4b08eb 100644
--- a/test/Index/comment-cplus-decls.cpp
+++ b/test/Index/comment-cplus-decls.cpp
@@ -46,7 +46,7 @@
     data* reserved;
 };
 // CHECK: <Declaration>class Test {}</Declaration>
-// CHECK: <Declaration>Test() : reserved(new Test::data()) {}</Declaration>
+// CHECK: <Declaration>Test()</Declaration>
 // CHECK: <Declaration>unsigned int getID() const</Declaration>
 // CHECK: <Declaration>~Test(){{( noexcept)?}}</Declaration>
 // CHECK: <Declaration>Test::data *reserved</Declaration>
diff --git a/test/Index/complete-call.cpp b/test/Index/complete-call.cpp
index 9750bd6..ca11648 100644
--- a/test/Index/complete-call.cpp
+++ b/test/Index/complete-call.cpp
@@ -94,6 +94,24 @@
   s.foo_7(42,);
 }
 
+struct Bar {
+  static void foo_1();
+  void foo_1(float);
+  static void foo_1(int);
+};
+
+void test() {
+  Bar::foo_1();
+  Bar b;
+  b.foo_1();
+}
+
+struct Bar2 : public Bar {
+  Bar2() {
+    Bar::foo_1();
+  }
+};
+
 // RUN: c-index-test -code-completion-at=%s:47:9 %s | FileCheck -check-prefix=CHECK-CC1 %s
 // CHECK-CC1: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{RightParen )} (1)
 // CHECK-CC1: Completion contexts:
@@ -803,3 +821,46 @@
 // CHECK-CC59-NEXT: Class name
 // CHECK-CC59-NEXT: Nested name specifier
 // CHECK-CC59-NEXT: Objective-C interface
+
+// RUN: c-index-test -code-completion-at=%s:104:14 %s | FileCheck -check-prefix=CHECK-CC60 %s
+// CHECK-CC60: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{RightParen )} (1)
+// CHECK-CC60: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter float}{RightParen )} (1)
+// CHECK-CC60: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter int}{RightParen )} (1)
+// CHECK-CC60: Completion contexts:
+// CHECK-CC60-NEXT: Any type
+// CHECK-CC60-NEXT: Any value
+// CHECK-CC60-NEXT: Enum tag
+// CHECK-CC60-NEXT: Union tag
+// CHECK-CC60-NEXT: Struct tag
+// CHECK-CC60-NEXT: Class name
+// CHECK-CC60-NEXT: Nested name specifier
+// CHECK-CC60-NEXT: Objective-C interface
+
+// RUN: c-index-test -code-completion-at=%s:106:11 %s | FileCheck -check-prefix=CHECK-CC61 %s
+// CHECK-CC61: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{RightParen )} (1)
+// CHECK-CC61: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter float}{RightParen )} (1)
+// CHECK-CC61: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter int}{RightParen )} (1)
+// CHECK-CC61: Completion contexts:
+// CHECK-CC61-NEXT: Any type
+// CHECK-CC61-NEXT: Any value
+// CHECK-CC61-NEXT: Enum tag
+// CHECK-CC61-NEXT: Union tag
+// CHECK-CC61-NEXT: Struct tag
+// CHECK-CC61-NEXT: Class name
+// CHECK-CC61-NEXT: Nested name specifier
+// CHECK-CC61-NEXT: Objective-C interface
+
+// RUN: c-index-test -code-completion-at=%s:111:16 %s | FileCheck -check-prefix=CHECK-CC62 %s
+// CHECK-CC62: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{RightParen )} (1)
+// CHECK-CC62: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter float}{RightParen )} (1)
+// CHECK-CC62: OverloadCandidate:{ResultType void}{Text foo_1}{LeftParen (}{CurrentParameter int}{RightParen )} (1)
+// CHECK-CC62: Completion contexts:
+// CHECK-CC62-NEXT: Any type
+// CHECK-CC62-NEXT: Any value
+// CHECK-CC62-NEXT: Enum tag
+// CHECK-CC62-NEXT: Union tag
+// CHECK-CC62-NEXT: Struct tag
+// CHECK-CC62-NEXT: Class name
+// CHECK-CC62-NEXT: Nested name specifier
+// CHECK-CC62-NEXT: Objective-C interface
+
diff --git a/test/Index/complete-cxx-inline-methods.cpp b/test/Index/complete-cxx-inline-methods.cpp
index c0a4a8d..0f78e8c 100644
--- a/test/Index/complete-cxx-inline-methods.cpp
+++ b/test/Index/complete-cxx-inline-methods.cpp
@@ -25,7 +25,7 @@
 
 // RUN: c-index-test -code-completion-at=%s:4:9 -std=c++98 %s | FileCheck %s
 // RUN: c-index-test -code-completion-at=%s:13:7 -std=c++98 %s | FileCheck %s
-// CHECK:      CXXMethod:{ResultType MyCls::Vec &}{TypedText operator=}{LeftParen (}{Placeholder const MyCls::Vec &}{RightParen )} (79)
+// CHECK:      CXXMethod:{ResultType Vec &}{TypedText operator=}{LeftParen (}{Placeholder const Vec &}{RightParen )} (79)
 // CHECK-NEXT: StructDecl:{TypedText Vec}{Text ::} (75)
 // CHECK-NEXT: FieldDecl:{ResultType int}{TypedText x} (35)
 // CHECK-NEXT: FieldDecl:{ResultType int}{TypedText y} (35)
diff --git a/test/Index/complete-exprs.cpp b/test/Index/complete-exprs.cpp
index 3f3bd5c..fc60cc5 100644
--- a/test/Index/complete-exprs.cpp
+++ b/test/Index/complete-exprs.cpp
@@ -58,7 +58,7 @@
 // CHECK-CC1: CXXConstructor:{TypedText string}{LeftParen (}{Placeholder const char *}{Comma , }{Placeholder int n}{RightParen )} (50)
 // CHECK-CC1: ClassTemplate:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >} (50)
 // CHECK-CC1: CXXConstructor:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder const T &}{Comma , }{Placeholder unsigned int n}{RightParen )} (50)
-// CHECK-CC1: FunctionTemplate:{ResultType void}{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder InputIterator first}{Comma , }{Placeholder InputIterator last}{RightParen )} (50)
+// CHECK-CC1: FunctionTemplate:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder InputIterator first}{Comma , }{Placeholder InputIterator last}{RightParen )} (50)
 
 // RUN: c-index-test -code-completion-at=%s:19:1 %s | FileCheck -check-prefix=CHECK-CC2 %s
 // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:19:1 %s | FileCheck -check-prefix=CHECK-CC2 %s
@@ -72,7 +72,7 @@
 // CHECK-CC3: FunctionDecl:{ResultType void}{TypedText g}{LeftParen (}{RightParen )} (50)
 // CHECK-CC3: ClassTemplate:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >} (50)
 // CHECK-CC3: CXXConstructor:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder const T &}{Comma , }{Placeholder unsigned int n}{RightParen )} (50)
-// CHECK-CC3: FunctionTemplate:{ResultType void}{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder InputIterator first}{Comma , }{Placeholder InputIterator last}{RightParen )} (50)
+// CHECK-CC3: FunctionTemplate:{TypedText vector}{LeftAngle <}{Placeholder typename T}{RightAngle >}{LeftParen (}{Placeholder InputIterator first}{Comma , }{Placeholder InputIterator last}{RightParen )} (50)
 
 // RUN: c-index-test -code-completion-at=%s:34:1 %s -std=c++0x | FileCheck -check-prefix=CHECK-CC4 %s
 // CHECK-CC4: NotImplemented:{ResultType const X *}{TypedText this} (40)
diff --git a/test/Index/complete-interfaces.m b/test/Index/complete-interfaces.m
index 2f86c3d..3e90407 100644
--- a/test/Index/complete-interfaces.m
+++ b/test/Index/complete-interfaces.m
@@ -45,3 +45,19 @@
 
 
 // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:11:12 %s | FileCheck -check-prefix=CHECK-CC2 %s
+
+
+void useClasses() {
+  int i = 0;
+  [Int3 message:1];
+}
+
+// RUN: c-index-test -code-completion-at=%s:51:11 %s | FileCheck -check-prefix=CHECK-USE %s
+// RUN: c-index-test -code-completion-at=%s:52:17 %s | FileCheck -check-prefix=CHECK-USE %s
+// CHECK-USE: ObjCInterfaceDecl:{TypedText Int2} (50)
+// CHECK-USE: ObjCInterfaceDecl:{TypedText Int3} (50)
+// CHECK-USE-NOT: Int1
+// CHECK-USE-NOT: Int4
+
+// Caching should work too:
+// RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:51:11 %s | FileCheck -check-prefix=CHECK-USE %s
diff --git a/test/Index/complete-method-decls.m b/test/Index/complete-method-decls.m
index 8a17142..f561f81 100644
--- a/test/Index/complete-method-decls.m
+++ b/test/Index/complete-method-decls.m
@@ -236,3 +236,25 @@
 // RUN: c-index-test -code-completion-at=%s:107:2 %s -target x86_64-apple-macosx10.7 | FileCheck -check-prefix=CHECK-NULLABILITY2 %s
 // CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text instancetype}{RightParen )}{TypedText getI3} (40)
 // CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text I3 *}{RightParen )}{TypedText produceI3}{TypedText :}{LeftParen (}{Text I3 *}{RightParen )}{Text i3} (40)
+
+@interface CompleteWithoutLeadingPrefix
+
+- (void)aMethod;
++ (int)aClassMethod:(int)x;
+@property int p;
+
+@end
+
+@implementation CompleteWithoutLeadingPrefix
+
+
+
+@end
+
+// RUN: c-index-test -code-completion-at=%s:250:1 %s | FileCheck -check-prefix=CHECK-COMP-NO-PREFIX %s
+// CHECK-COMP-NO-PREFIX: NotImplemented:{TypedText @end} (40)
+// CHECK-COMP-NO-PREFIX: ObjCClassMethodDecl:{Text +}{HorizontalSpace  }{LeftParen (}{Text int}{RightParen )}{TypedText aClassMethod}{TypedText :}{LeftParen (}{Text int}{RightParen )}{Text x} (40)
+// CHECK-COMP-NO-PREFIX: ObjCInstanceMethodDecl:{Text -}{HorizontalSpace  }{LeftParen (}{Text void}{RightParen )}{TypedText aMethod} (40)
+// CHECK-COMP-NO-PREFIX: ObjCInterfaceDecl:{TypedText I1}
+// CHECK-COMP-NO-PREFIX: ObjCInstanceMethodDecl:{Text -}{HorizontalSpace  }{LeftParen (}{Text int}{RightParen )}{TypedText p} (40)
+// CHECK-COMP-NO-PREFIX: ObjCInstanceMethodDecl:{Text -}{HorizontalSpace  }{LeftParen (}{Text void}{RightParen )}{TypedText setP}{TypedText :}{LeftParen (}{Text int}{RightParen )}{Text p} (40)
diff --git a/test/Index/complete-pch-skip.cpp b/test/Index/complete-pch-skip.cpp
new file mode 100644
index 0000000..6abf40d
--- /dev/null
+++ b/test/Index/complete-pch-skip.cpp
@@ -0,0 +1,30 @@
+namespace ns {
+int bar;
+}
+
+int main() { return ns:: }
+int main2() { return ns::foo(). }
+
+// RUN: echo "namespace ns { struct foo { int baz }; }" > %t.h
+// RUN: c-index-test -write-pch %t.h.pch -x c++-header %t.h
+//
+// RUN: c-index-test -code-completion-at=%s:5:26 -include %t.h %s | FileCheck -check-prefix=WITH-PCH %s
+// WITH-PCH: {TypedText bar}
+// WITH-PCH: {TypedText foo}
+
+// RUN: env CINDEXTEST_COMPLETION_SKIP_PREAMBLE=1 c-index-test -code-completion-at=%s:5:26 -include %t.h %s | FileCheck -check-prefix=SKIP-PCH %s
+// SKIP-PCH-NOT: foo
+// SKIP-PCH: {TypedText bar}
+// SKIP-PCH-NOT: foo
+
+// Verify that with *no* preamble (no -include flag) we still get local results.
+// SkipPreamble used to break this, by making lookup *too* lazy.
+// RUN: env CINDEXTEST_COMPLETION_SKIP_PREAMBLE=1 c-index-test -code-completion-at=%s:5:26 %s | FileCheck -check-prefix=NO-PCH %s
+// NO-PCH-NOT: foo
+// NO-PCH: {TypedText bar}
+// NO-PCH-NOT: foo
+
+// Verify that we still get member results from the preamble.
+// RUN: env CINDEXTEST_COMPLETION_SKIP_PREAMBLE=1 c-index-test -code-completion-at=%s:6:32 -include %t.h %s | FileCheck -check-prefix=MEMBER %s
+// MEMBER: {TypedText baz}
+
diff --git a/test/Index/complete-super.cpp b/test/Index/complete-super.cpp
index 9ffa7c8..92d3f7f 100644
--- a/test/Index/complete-super.cpp
+++ b/test/Index/complete-super.cpp
@@ -40,3 +40,8 @@
 // CHECK-ACCESS-PATTERN: NotImplemented:{TypedText private}{Colon :} (40)
 // CHECK-ACCESS-PATTERN: NotImplemented:{TypedText protected}{Colon :} (40)
 // CHECK-ACCESS-PATTERN: NotImplemented:{TypedText public}{Colon :} (40)
+
+// RUN: env CINDEXTEST_CODE_COMPLETE_PATTERNS=1 c-index-test -code-completion-at=%s:10:12 %s | FileCheck -check-prefix=CHECK-INHERITANCE-PATTERN %s
+// CHECK-INHERITANCE-PATTERN: NotImplemented:{TypedText private} (40)
+// CHECK-INHERITANCE-PATTERN: NotImplemented:{TypedText protected} (40)
+// CHECK-INHERITANCE-PATTERN: NotImplemented:{TypedText public} (40)
diff --git a/test/Index/crash-preamble-classes.cpp b/test/Index/crash-preamble-classes.cpp
new file mode 100644
index 0000000..e7d4bd7
--- /dev/null
+++ b/test/Index/crash-preamble-classes.cpp
@@ -0,0 +1,8 @@
+#include "crash-preamble-classes.h"
+
+struct Z : Y {
+  Z() {}
+};
+
+// RUN: env CINDEXTEST_EDITING=1 \
+// RUN: c-index-test -test-load-source-reparse 5 local -I %S/Inputs %s
diff --git a/test/Index/get-cursor.cpp b/test/Index/get-cursor.cpp
index 8aa12d5..e997fc4 100644
--- a/test/Index/get-cursor.cpp
+++ b/test/Index/get-cursor.cpp
@@ -152,6 +152,11 @@
 void f_dynamic_noexcept() throw(int);
 void f_dynamic_noexcept_any() throw(...);
 
+enum EnumType { Enumerator };
+struct Z {
+    EnumType e = Enumerator;
+};
+
 // RUN: c-index-test -cursor-at=%s:6:4 %s | FileCheck -check-prefix=CHECK-COMPLETION-1 %s
 // CHECK-COMPLETION-1: CXXConstructor=X:6:3
 // CHECK-COMPLETION-1-NEXT: Completion string: {TypedText X}{LeftParen (}{Placeholder int}{Comma , }{Placeholder int}{RightParen )}
@@ -275,3 +280,6 @@
 // CHECK-FORRANGE: 141:18 DeclRefExpr=coll:140:20 Extent=[141:18 - 141:22] Spelling=coll ([141:18 - 141:22])
 // CHECK-FORRANGE: 142:11 DeclRefExpr=lv:141:13 Extent=[142:11 - 142:13] Spelling=lv ([142:11 - 142:13])
 
+// RUN: c-index-test -cursor-at=%s:157:18 -std=c++11 %s | FileCheck -check-prefix=CHECK-INCLASSINITIALIZER %s
+// CHECK-INCLASSINITIALIZER: 157:18 DeclRefExpr=Enumerator:155:17 Extent=[157:18 - 157:28] Spelling=Enumerator ([157:18 - 157:28])
+
diff --git a/test/Index/index-refs.cpp b/test/Index/index-refs.cpp
index d8bede0..760e4cf 100644
--- a/test/Index/index-refs.cpp
+++ b/test/Index/index-refs.cpp
@@ -67,6 +67,9 @@
 
 void foo5() {
   struct S2 s = { .y = 1, .x = 4};
+  s.y = s.x + 1;
+  (void)&foo3;
+  foo4(s.y);
 }
 
 int ginitlist[] = {EnumVal};
@@ -105,7 +108,7 @@
 // CHECK:      [indexDeclaration]: kind: c++-class-template | name: TS | {{.*}} | loc: 47:8
 // CHECK-NEXT: [indexDeclaration]: kind: struct-template-partial-spec | name: TS | USR: c:@SP>1#T@TS>#t0.0#I | {{.*}} | loc: 50:8
 // CHECK-NEXT: [indexDeclaration]: kind: typedef | name: MyInt | USR: c:index-refs.cpp@SP>1#T@TS>#t0.0#I@T@MyInt | {{.*}} | loc: 51:15 | semantic-container: [TS:50:8] | lexical-container: [TS:50:8]
-// CHECK-NEXT: [indexEntityReference]: kind: c++-class-template | name: TS | USR: c:@ST>2#T#T@TS | lang: C++ | cursor: TemplateRef=TS:47:8 | loc: 50:8 | <parent>:: <<NULL>> | container: [TU] | refkind: direct
+// CHECK-NEXT: [indexEntityReference]: kind: c++-class-template | name: TS | USR: c:@ST>2#T#T@TS | lang: C++ | cursor: TemplateRef=TS:47:8 | loc: 50:8 | <parent>:: <<NULL>> | container: [TU] | refkind: direct | role: ref
 /* when indexing implicit instantiations
   [indexDeclaration]: kind: struct-template-spec | name: TS | USR: c:@S@TS>#I | {{.*}} | loc: 50:8
   [indexDeclaration]: kind: typedef | name: MyInt | USR: c:index-refs.cpp@593@S@TS>#I@T@MyInt | {{.*}} | loc: 51:15 | semantic-container: [TS:50:8] | lexical-container: [TS:50:8]
@@ -117,7 +120,7 @@
 // CHECK-NEXT: [indexEntityReference]: kind: c++-class-template | name: TS | USR: c:@ST>2#T#T@TS | {{.*}} | loc: 55:3
 
 // CHECK:      [indexEntityReference]: kind: variable | name: array_size | {{.*}} | loc: 59:22
-// CHECK:      [indexEntityReference]: kind: variable | name: default_param | {{.*}} | loc: 62:19
+// CHECK:      [indexEntityReference]: kind: variable | name: default_param | {{.*}} | loc: 62:19 | {{.*}} | role: ref read
 // CHECK-NOT:  [indexEntityReference]: kind: variable | name: default_param | {{.*}} | loc: 62:19
 
 // CHECK:      [indexEntityReference]: kind: field | name: y | {{.*}} | loc: 69:20
@@ -125,6 +128,11 @@
 // CHECK-NOT:  [indexEntityReference]: kind: field | name: y | {{.*}} | loc: 69:20
 // CHECK-NOT:  [indexEntityReference]: kind: field | name: x | {{.*}} | loc: 69:28
 
+// CHECK:      [indexEntityReference]: kind: field | name: y | {{.*}} | loc: 70:5 | {{.*}} | role: ref write
+// CHECK:      [indexEntityReference]: kind: field | name: x | {{.*}} | loc: 70:11 | {{.*}} | role: ref read
+// CHECK:      [indexEntityReference]: kind: function | name: foo3 | {{.*}} | loc: 71:10 | {{.*}} | role: ref addr
+// CHECK:      [indexEntityReference]: kind: function | name: foo4 | {{.*}} | loc: 72:3 | {{.*}} | role: ref call
+
 // CHECK:      [indexDeclaration]: kind: variable | name: ginitlist |
-// CHECK:      [indexEntityReference]: kind: enumerator | name: EnumVal | {{.*}} | loc: 72:20
-// CHECK-NOT:  [indexEntityReference]: kind: enumerator | name: EnumVal | {{.*}} | loc: 72:20
+// CHECK:      [indexEntityReference]: kind: enumerator | name: EnumVal | {{.*}} | loc: 75:20
+// CHECK-NOT:  [indexEntityReference]: kind: enumerator | name: EnumVal | {{.*}} | loc: 75:20
diff --git a/test/Index/index-subscripting-literals.m b/test/Index/index-subscripting-literals.m
index 4ecad0b..08b936a 100644
--- a/test/Index/index-subscripting-literals.m
+++ b/test/Index/index-subscripting-literals.m
@@ -42,7 +42,7 @@
 // RUN: c-index-test -index-file -target x86_64-apple-macosx10.7 %s | FileCheck %s
 
 // CHECK:      [indexEntityReference]: kind: variable | name: idx | USR: c:@idx | lang: C | cursor: DeclRefExpr=idx:22:5 | loc: 27:9
-// CHECK-NEXT: [indexEntityReference]: kind: variable | name: p | USR: c:@p | lang: C | cursor: DeclRefExpr=p:23:4 | loc: 27:16 | <parent>:: kind: function | name: testArray | USR: c:@F@testArray | lang: C | container: [testArray:25:4] | refkind: direct
+// CHECK-NEXT: [indexEntityReference]: kind: variable | name: p | USR: c:@p | lang: C | cursor: DeclRefExpr=p:23:4 | loc: 27:16 | <parent>:: kind: function | name: testArray | USR: c:@F@testArray | lang: C | container: [testArray:25:4] | refkind: direct | role: ref
 // CHECK-NEXT: [indexEntityReference]: kind: objc-instance-method | name: setObject:atIndexedSubscript:
 // CHECK-NEXT: [indexEntityReference]: kind: objc-class | name: NSArray
 // CHECK-NEXT: [indexEntityReference]: kind: objc-class-method | name: arrayWithObjects:count: 
diff --git a/test/Index/load-classes.cpp b/test/Index/load-classes.cpp
index 8b1ed31..b6c25b4 100644
--- a/test/Index/load-classes.cpp
+++ b/test/Index/load-classes.cpp
@@ -29,7 +29,7 @@
 }
 
 // RUN: c-index-test -test-load-source all %s | FileCheck %s
-// CHECK: load-classes.cpp:3:8: StructDecl=X:3:8 (Definition) Extent=[3:1 - 26:2]
+// CHECK: load-classes.cpp:3:8: StructDecl=X:3:8 (Definition) (abstract) Extent=[3:1 - 26:2]
 // CHECK: load-classes.cpp:4:3: CXXConstructor=X:4:3 (converting constructor) Extent=[4:3 - 4:15] [access=public]
 // FIXME: missing TypeRef in the constructor name
 // CHECK: load-classes.cpp:4:9: ParmDecl=value:4:9 (Definition) Extent=[4:5 - 4:14]
diff --git a/test/Index/opencl-types.cl b/test/Index/opencl-types.cl
index fe0042a..d71893a 100644
--- a/test/Index/opencl-types.cl
+++ b/test/Index/opencl-types.cl
@@ -16,11 +16,11 @@
   double4 vectorDouble;
 }
 
-// CHECK: VarDecl=scalarHalf:11:8 (Definition) [type=half] [typekind=Half] [isPOD=1]
+// CHECK: VarDecl=scalarHalf:11:8 (Definition){{( \(invalid\))?}} [type=half] [typekind=Half] [isPOD=1]
 // CHECK: VarDecl=vectorHalf:12:9 (Definition) [type=half4] [typekind=Typedef] [canonicaltype=half __attribute__((ext_vector_type(4)))] [canonicaltypekind=Unexposed] [isPOD=1]
 // CHECK: VarDecl=scalarFloat:13:9 (Definition) [type=float] [typekind=Float] [isPOD=1]
 // CHECK: VarDecl=vectorFloat:14:10 (Definition) [type=float4] [typekind=Typedef] [canonicaltype=float __attribute__((ext_vector_type(4)))] [canonicaltypekind=Unexposed] [isPOD=1]
-// CHECK: VarDecl=scalarDouble:15:10 (Definition) [type=double] [typekind=Double] [isPOD=1]
+// CHECK: VarDecl=scalarDouble:15:10 (Definition){{( \(invalid\))?}} [type=double] [typekind=Double] [isPOD=1]
 // CHECK: VarDecl=vectorDouble:16:11 (Definition) [type=double4] [typekind=Typedef] [canonicaltype=double __attribute__((ext_vector_type(4)))] [canonicaltypekind=Unexposed] [isPOD=1]
 
 #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
@@ -45,10 +45,10 @@
 // CHECK: ParmDecl=scalarOCLImage2dArrayRO:32:61 (Definition) [type=__read_only image2d_array_t] [typekind=OCLImage2dArrayRO] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dDepthRO:33:61 (Definition) [type=__read_only image2d_depth_t] [typekind=OCLImage2dDepthRO] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dArrayDepthRO:34:72 (Definition) [type=__read_only image2d_array_depth_t] [typekind=OCLImage2dArrayDepthRO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAARO:35:59 (Definition) [type=__read_only image2d_msaa_t] [typekind=OCLImage2dMSAARO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAARO:36:70 (Definition) [type=__read_only image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAARO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAADepthRO:37:70 (Definition) [type=__read_only image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthRO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthRO:38:81 (Definition) [type=__read_only image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthRO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAARO:35:59 (Definition){{( \(invalid\))?}} [type=__read_only image2d_msaa_t] [typekind=OCLImage2dMSAARO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAARO:36:70 (Definition){{( \(invalid\))?}} [type=__read_only image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAARO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAADepthRO:37:70 (Definition){{( \(invalid\))?}} [type=__read_only image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthRO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthRO:38:81 (Definition){{( \(invalid\))?}} [type=__read_only image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthRO] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage3dRO:39:50 (Definition) [type=__read_only image3d_t] [typekind=OCLImage3dRO] [isPOD=1]
 
 void kernel OCLImage1dWOTest(write_only image1d_t scalarOCLImage1dWO);
@@ -71,11 +71,11 @@
 // CHECK: ParmDecl=scalarOCLImage2dArrayWO:58:62 (Definition) [type=__write_only image2d_array_t] [typekind=OCLImage2dArrayWO] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dDepthWO:59:62 (Definition) [type=__write_only image2d_depth_t] [typekind=OCLImage2dDepthWO] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dArrayDepthWO:60:73 (Definition) [type=__write_only image2d_array_depth_t] [typekind=OCLImage2dArrayDepthWO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAAWO:61:60 (Definition) [type=__write_only image2d_msaa_t] [typekind=OCLImage2dMSAAWO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAAWO:62:71 (Definition) [type=__write_only image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAAWO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAADepthWO:63:71 (Definition) [type=__write_only image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthWO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthWO:64:82 (Definition) [type=__write_only image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthWO] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage3dWO:65:51 (Definition) [type=__write_only image3d_t] [typekind=OCLImage3dWO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAAWO:61:60 (Definition){{( \(invalid\))?}} [type=__write_only image2d_msaa_t] [typekind=OCLImage2dMSAAWO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAAWO:62:71 (Definition){{( \(invalid\))?}} [type=__write_only image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAAWO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAADepthWO:63:71 (Definition){{( \(invalid\))?}} [type=__write_only image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthWO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthWO:64:82 (Definition){{( \(invalid\))?}} [type=__write_only image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthWO] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage3dWO:65:51 (Definition){{( \(invalid\))?}} [type=__write_only image3d_t] [typekind=OCLImage3dWO] [isPOD=1]
 
 void kernel OCLImage1dRWTest(read_write image1d_t scalarOCLImage1dRW);
 void kernel OCLImage1dArrayRWTest(read_write image1d_array_t scalarOCLImage1dArrayRW);
@@ -97,10 +97,10 @@
 // CHECK: ParmDecl=scalarOCLImage2dArrayRW:84:62 (Definition) [type=__read_write image2d_array_t] [typekind=OCLImage2dArrayRW] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dDepthRW:85:62 (Definition) [type=__read_write image2d_depth_t] [typekind=OCLImage2dDepthRW] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage2dArrayDepthRW:86:73 (Definition) [type=__read_write image2d_array_depth_t] [typekind=OCLImage2dArrayDepthRW] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAARW:87:60 (Definition) [type=__read_write image2d_msaa_t] [typekind=OCLImage2dMSAARW] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAARW:88:71 (Definition) [type=__read_write image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAARW] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dMSAADepthRW:89:71 (Definition) [type=__read_write image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthRW] [isPOD=1]
-// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthRW:90:82 (Definition) [type=__read_write image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthRW] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAARW:87:60 (Definition){{( \(invalid\))?}} [type=__read_write image2d_msaa_t] [typekind=OCLImage2dMSAARW] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAARW:88:71 (Definition){{( \(invalid\))?}} [type=__read_write image2d_array_msaa_t] [typekind=OCLImage2dArrayMSAARW] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dMSAADepthRW:89:71 (Definition){{( \(invalid\))?}} [type=__read_write image2d_msaa_depth_t] [typekind=OCLImage2dMSAADepthRW] [isPOD=1]
+// CHECK: ParmDecl=scalarOCLImage2dArrayMSAADepthRW:90:82 (Definition){{( \(invalid\))?}} [type=__read_write image2d_array_msaa_depth_t] [typekind=OCLImage2dArrayMSAADepthRW] [isPOD=1]
 // CHECK: ParmDecl=scalarOCLImage3dRW:91:51 (Definition) [type=__read_write image3d_t] [typekind=OCLImage3dRW] [isPOD=1]
 
 void kernel intPipeTestRO(read_only pipe int scalarPipe);
diff --git a/test/Index/pipe-size.cl b/test/Index/pipe-size.cl
index cfe9793..78a8f43 100644
--- a/test/Index/pipe-size.cl
+++ b/test/Index/pipe-size.cl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR
 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64
-// RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa-amdgizcl %s -o - | FileCheck %s --check-prefix=AMD
+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN
 __kernel void testPipe( pipe int test )
 {
     int s = sizeof(test);
@@ -11,6 +11,6 @@
     // SPIR: store i32 4, i32* %s, align 4
     // SPIR64: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)** %test.addr, align 8
     // SPIR64: store i32 8, i32* %s, align 4
-    // AMD: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)* addrspace(5)* %test.addr, align 8
-    // AMD: store i32 8, i32 addrspace(5)* %s, align 4
+    // AMDGCN: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)* addrspace(5)* %test.addr, align 8
+    // AMDGCN: store i32 8, i32 addrspace(5)* %s, align 4
 }
diff --git a/test/Index/preamble-conditionals-inverted.cpp b/test/Index/preamble-conditionals-inverted.cpp
index 1d67ccb..f5cf120 100644
--- a/test/Index/preamble-conditionals-inverted.cpp
+++ b/test/Index/preamble-conditionals-inverted.cpp
@@ -3,6 +3,8 @@
 // RUN: | FileCheck %s --implicit-check-not "error:"
 #ifdef FOO_H
 
-void foo();
+void foo() {}
 
 #endif
+
+int foo() { return 0; }
diff --git a/test/Index/preamble-conditionals-skipping.cpp b/test/Index/preamble-conditionals-skipping.cpp
new file mode 100644
index 0000000..fa5764c
--- /dev/null
+++ b/test/Index/preamble-conditionals-skipping.cpp
@@ -0,0 +1,16 @@
+// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-load-source-reparse 5 \
+// RUN:                                       local -std=c++14 %s 2>&1 \
+// RUN: | FileCheck %s --implicit-check-not "error:"
+
+#ifdef MYCPLUSPLUS
+extern "C" {
+#endif
+
+#ifdef MYCPLUSPLUS
+}
+#endif
+
+int main()
+{
+    return 0;
+}
diff --git a/test/Index/print-display-names.cpp b/test/Index/print-display-names.cpp
index 94fe466..5ba10e4 100644
--- a/test/Index/print-display-names.cpp
+++ b/test/Index/print-display-names.cpp
@@ -12,9 +12,20 @@
 
 template<> void g<int>(ClassTmpl<int, int>);
 
-// RUN: c-index-test -test-load-source all-display %s | FileCheck %s
-// CHECK: print-display-names.cpp:2:7: ClassTemplate=ClassTmpl<T, typename>:2:7
-// CHECK: print-display-names.cpp:6:16: ClassDecl=ClassTmpl<Integer, Integer>:6:16 (Definition)
-// CHECK: print-display-names.cpp:8:6: FunctionDecl=f(ClassTmpl<float, Integer>):8:6
-// CHECK: print-display-names.cpp:11:6: FunctionTemplate=g(ClassTmpl<T, T>):11:6
-// CHECK: print-display-names.cpp:13:17: FunctionDecl=g<>(ClassTmpl<int, int>):13:17 [Specialization of g:11:6]
+// RUN: c-index-test -test-load-source all-display %s | FileCheck %s --check-prefix=DISPLAY_NAME
+// DISPLAY_NAME: print-display-names.cpp:2:7: ClassTemplate=ClassTmpl<T, typename>:2:7
+// DISPLAY_NAME: print-display-names.cpp:6:16: ClassDecl=ClassTmpl<Integer, Integer>:6:16 (Definition)
+// DISPLAY_NAME: print-display-names.cpp:8:6: FunctionDecl=f(ClassTmpl<float, Integer>):8:6
+// DISPLAY_NAME: print-display-names.cpp:11:6: FunctionTemplate=g(ClassTmpl<T, T>):11:6
+// DISPLAY_NAME: print-display-names.cpp:13:17: FunctionDecl=g<>(ClassTmpl<int, int>):13:17 [Specialization of g:11:6]
+
+// RUN: env CINDEXTEST_PRINTINGPOLICY_TERSEOUTPUT=1 c-index-test -test-load-source all-pretty %s | FileCheck %s --check-prefix=PRETTY
+// PRETTY: print-display-names.cpp:2:7: ClassTemplate=template <typename T, typename > class ClassTmpl {}:2:7 (Definition) Extent=[1:1 - 2:20]
+// PRETTY: print-display-names.cpp:4:13: TypedefDecl=typedef int Integer:4:13 (Definition) Extent=[4:1 - 4:20]
+// PRETTY: print-display-names.cpp:6:16: ClassDecl=template<> class ClassTmpl<int, int> {}:6:16 (Definition) [Specialization of ClassTmpl:2:7] Extent=[6:1 - 6:43]
+// PRETTY: print-display-names.cpp:8:6: FunctionDecl=void f(ClassTmpl<float, Integer> p):8:6 Extent=[8:1 - 8:36]
+// PRETTY: print-display-names.cpp:8:34: ParmDecl=ClassTmpl<float, Integer> p:8:34 (Definition) Extent=[8:8 - 8:35]
+// PRETTY: print-display-names.cpp:11:6: FunctionTemplate=template <typename T> void g(ClassTmpl<T, T>):11:6 Extent=[10:1 - 11:24]
+// PRETTY: print-display-names.cpp:11:23: ParmDecl=ClassTmpl<T, T>:11:23 (Definition) Extent=[11:8 - 11:23]
+// PRETTY: print-display-names.cpp:13:17: FunctionDecl=template<> void g<int>(ClassTmpl<int, int>):13:17 [Specialization of g:11:6] [Template arg 0: kind: 1, type: int] Extent=[13:1 - 13:44]
+// PRETTY: print-display-names.cpp:13:43: ParmDecl=ClassTmpl<int, int>:13:43 (Definition) Extent=[13:24 - 13:43]
diff --git a/test/Index/print-type-size.cpp b/test/Index/print-type-size.cpp
index 45de93f..1ea5346 100644
--- a/test/Index/print-type-size.cpp
+++ b/test/Index/print-type-size.cpp
@@ -4,8 +4,8 @@
 
 namespace basic {
 
-// CHECK64: VarDecl=v:[[@LINE+2]]:6 (Definition) [type=void] [typekind=Void]
-// CHECK32: VarDecl=v:[[@LINE+1]]:6 (Definition) [type=void] [typekind=Void]
+// CHECK64: VarDecl=v:[[@LINE+2]]:6 (Definition) (invalid) [type=void] [typekind=Void]
+// CHECK32: VarDecl=v:[[@LINE+1]]:6 (Definition) (invalid) [type=void] [typekind=Void]
 void v;
 
 // CHECK64: VarDecl=v1:[[@LINE+2]]:7 (Definition) [type=void *] [typekind=Pointer] [sizeof=8] [alignof=8]
diff --git a/test/Index/record-completion-invocation.c b/test/Index/record-completion-invocation.c
new file mode 100644
index 0000000..c249565
--- /dev/null
+++ b/test/Index/record-completion-invocation.c
@@ -0,0 +1,11 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env CINDEXTEST_INVOCATION_EMISSION_PATH=%t not c-index-test -code-completion-at=%s:10:1 "-remap-file=%s,%S/Inputs/record-parsing-invocation-remap.c" %s
+// RUN: cat %t/libclang-* | FileCheck %s
+
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env LIBCLANG_DISABLE_CRASH_RECOVERY=1 CINDEXTEST_INVOCATION_EMISSION_PATH=%t not --crash c-index-test -code-completion-at=%s:10:1 "-remap-file=%s,%S/Inputs/record-parsing-invocation-remap.c" %s
+// RUN: cat %t/libclang-* | FileCheck %s
+
+// CHECK: {"toolchain":"{{.*}}","libclang.operation":"complete","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-completion-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"invocation-args":["-code-completion-at={{.*}}record-completion-invocation.c:10:1"],"unsaved_file_hashes":[{"name":"{{.*}}record-completion-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]}
diff --git a/test/Index/record-parsing-invocation.c b/test/Index/record-parsing-invocation.c
new file mode 100644
index 0000000..3254e58
--- /dev/null
+++ b/test/Index/record-parsing-invocation.c
@@ -0,0 +1,28 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env CINDEXTEST_INVOCATION_EMISSION_PATH=%t not c-index-test -test-load-source all %s
+// RUN: cat %t/libclang-* | FileCheck %s
+
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env LIBCLANG_DISABLE_CRASH_RECOVERY=1 CINDEXTEST_INVOCATION_EMISSION_PATH=%t not --crash c-index-test -test-load-source all %s
+// RUN: cat %t/libclang-* | FileCheck %s
+
+// Verify that the file is removed for successful operation:
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env CINDEXTEST_INVOCATION_EMISSION_PATH=%t c-index-test -test-load-source all %s -DAVOID_CRASH
+// RUN: ls %t | count 0
+
+// Make sure we record the unsaved file hash.
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: env CINDEXTEST_INVOCATION_EMISSION_PATH=%t not c-index-test -test-load-source all "-remap-file=%s,%S/Inputs/record-parsing-invocation-remap.c" %s
+// RUN: cat %t/libclang-* | FileCheck --check-prefix=CHECK-UNSAVED %s
+
+#ifndef AVOID_CRASH
+#  pragma clang __debug parser_crash
+#endif
+
+// CHECK: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"]}
+// CHECK-UNSAVED: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"unsaved_file_hashes":[{"name":"{{.*}}record-parsing-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]}
diff --git a/test/Index/skipped-bodies-ctors.cpp b/test/Index/skipped-bodies-ctors.cpp
new file mode 100644
index 0000000..8a559ee
--- /dev/null
+++ b/test/Index/skipped-bodies-ctors.cpp
@@ -0,0 +1,16 @@
+// RUN: env CINDEXTEST_SKIP_FUNCTION_BODIES=1 c-index-test -test-load-source all %s 2>&1 \
+// RUN: | FileCheck --implicit-check-not "error:" %s
+
+
+template <class T>
+struct Foo {
+  template <class = int>
+  Foo(int &a) : a(a) {
+  }
+
+  int &a;
+};
+
+
+int bar = Foo<int>(bar).a + Foo<int>(bar).a;
+// CHECK-NOT: error: constructor for 'Foo<int>' must explicitly initialize the reference
diff --git a/test/Index/skipped-bodies-templates.cpp b/test/Index/skipped-bodies-templates.cpp
new file mode 100644
index 0000000..d8fbc28
--- /dev/null
+++ b/test/Index/skipped-bodies-templates.cpp
@@ -0,0 +1,27 @@
+// RUN: env CINDEXTEST_SKIP_FUNCTION_BODIES=1 c-index-test -test-load-source all %s 2>&1 \
+// RUN: | FileCheck %s
+
+
+template <class T>
+struct Foo {
+  inline int with_body() {
+    return 100;
+  }
+
+  inline int without_body();
+};
+
+
+int bar = Foo<int>().with_body() + Foo<int>().without_body();
+// CHECK-NOT: warning: inline function 'Foo<int>::with_body' is not defined
+// CHECK: warning: inline function 'Foo<int>::without_body' is not defined
+
+template <class T>
+inline int with_body() { return 10; }
+
+template <class T>
+inline int without_body();
+
+int baz = with_body<int>() + without_body<int>();
+// CHECK-NOT: warning: inline function 'with_body<int>' is not defined
+// CHECK: warning: inline function 'without_body<int>' is not defined
diff --git a/test/Index/skipped-function-bodies.cpp b/test/Index/skipped-function-bodies.cpp
new file mode 100644
index 0000000..9378f66
--- /dev/null
+++ b/test/Index/skipped-function-bodies.cpp
@@ -0,0 +1,9 @@
+// RUN: env CINDEXTEST_SKIP_FUNCTION_BODIES=1 c-index-test -test-load-source all %s 2>&1 \
+// RUN: | FileCheck %s
+
+inline int with_body() { return 10; }
+inline int without_body();
+
+int x = with_body() + without_body();
+// CHECK-NOT: warning: inline function 'with_body' is not defined
+// CHECK: warning: inline function 'without_body' is not defined
diff --git a/test/Lexer/cxx-features.cpp b/test/Lexer/cxx-features.cpp
index 04821bd..a7d12e2 100644
--- a/test/Lexer/cxx-features.cpp
+++ b/test/Lexer/cxx-features.cpp
@@ -4,8 +4,8 @@
 // RUN: %clang_cc1 -std=c++14 -fcxx-exceptions -fsized-deallocation -verify %s
 // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fsized-deallocation -verify %s
 // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fsized-deallocation -fconcepts-ts -DCONCEPTS_TS=1 -verify %s
-// RUN: %clang_cc1 -fno-rtti -fno-threadsafe-statics -verify %s -DNO_EXCEPTIONS -DNO_RTTI -DNO_THREADSAFE_STATICS
-// RUN: %clang_cc1 -fcoroutines-ts -DNO_EXCEPTIONS -DCOROUTINES -verify %s
+// RUN: %clang_cc1 -fno-rtti -fno-threadsafe-statics -verify %s -DNO_EXCEPTIONS -DNO_RTTI -DNO_THREADSAFE_STATICS -fsized-deallocation
+// RUN: %clang_cc1 -fcoroutines-ts -DNO_EXCEPTIONS -DCOROUTINES -verify -fsized-deallocation %s
 
 // expected-no-diagnostics
 
diff --git a/test/Lexer/cxx1y_digit_separators.cpp b/test/Lexer/cxx1y_digit_separators.cpp
index 5536634..67dcd7c 100644
--- a/test/Lexer/cxx1y_digit_separators.cpp
+++ b/test/Lexer/cxx1y_digit_separators.cpp
@@ -51,6 +51,8 @@
   float u = 0x.'p1f; // expected-error {{hexadecimal floating literal requires a significand}}
   float v = 0e'f; // expected-error {{exponent has no digits}}
   float w = 0x0p'f; // expected-error {{exponent has no digits}}
+  float x = 0'e+1; // expected-error {{digit separator cannot appear at end of digit sequence}}
+  float y = 0x0'p+1; // expected-error {{digit separator cannot appear at end of digit sequence}}
 }
 
 #line 123'456
diff --git a/test/Lexer/cxx2a-spaceship.cpp b/test/Lexer/cxx2a-spaceship.cpp
new file mode 100644
index 0000000..604575e
--- /dev/null
+++ b/test/Lexer/cxx2a-spaceship.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 %s -verify
+// RUN: %clang_cc1 -std=c++2a %s -verify
+// RUN: %clang_cc1 -std=c++2a %s -verify -Wc++17-compat -DCOMPAT
+//
+// RUN: %clang_cc1 -std=c++17 %s -E -o - | FileCheck %s --check-prefix=CXX17
+// RUN: %clang_cc1 -std=c++2a %s -E -o - | FileCheck %s --check-prefix=CXX20
+
+namespace N {
+
+struct A {};
+void operator<=(A, A);
+#if __cplusplus > 201703L
+void operator<=>(A, A);
+#ifdef COMPAT
+// expected-warning@-2 {{'<=>' operator is incompatible with C++ standards before C++2a}}
+#endif
+#endif
+
+template<auto> struct X {};
+X<operator<=>
+#if __cplusplus <= 201703L
+  // expected-warning@-2 {{'<=>' is a single token in C++2a; add a space to avoid a change in behavior}}
+#else
+  >
+#endif
+#ifdef COMPAT
+// expected-warning@-7 {{'<=>' operator is incompatible with C++ standards before C++2a}}
+#endif
+  x;
+}
+
+// <=> can be formed by pasting other comparison operators.
+#if __cplusplus > 201703L
+#define STR(x) #x
+#define STR_EXPANDED(x) STR(x)
+#define PASTE(x, y) x ## y
+constexpr char a[] = STR_EXPANDED(PASTE(<, =>));
+constexpr char b[] = STR_EXPANDED(PASTE(<=, >));
+static_assert(__builtin_strcmp(a, "<=>") == 0);
+static_assert(__builtin_strcmp(b, "<=>") == 0);
+#endif
+
+// -E must not accidentally form a <=> token.
+
+// CXX17: preprocess1: < =>
+// CXX17: preprocess2: <=>
+// CXX17: preprocess3: < =>
+// CXX17: preprocess4: <=>=
+// CXX17: preprocess5: <=>>
+// CXX17: preprocess6: <=>>=
+// CXX17: preprocess7: <=>
+// CXX17: preprocess8: <=>=
+//
+// CXX20: preprocess1: < =>
+// CXX20: preprocess2: <= >
+// CXX20: preprocess3: < =>
+// CXX20: preprocess4: <= >=
+// CXX20: preprocess5: <= >>
+// CXX20: preprocess6: <= >>=
+// CXX20: preprocess7: <=>
+// CXX20: preprocess8: <=>=
+
+#define ID(x) x
+[[some_vendor::some_attribute( // expected-warning {{unknown attribute}}
+preprocess1: ID(<)ID(=>),
+preprocess2: ID(<=)ID(>),
+preprocess3: ID(<)ID(=)ID(>),
+preprocess4: ID(<=)ID(>=),
+preprocess5: ID(<=)ID(>>),
+preprocess6: ID(<=)ID(>>=),
+preprocess7: ID(<=>) // expected-warning 0-1{{'<=>'}}
+preprocess8: ID(<=>=) // expected-warning 0-1{{'<=>'}}
+)]];
diff --git a/test/Lexer/half-literal.cpp b/test/Lexer/half-literal.cpp
index b87eae4..8e0034d 100644
--- a/test/Lexer/half-literal.cpp
+++ b/test/Lexer/half-literal.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic %s
-float a = 1.0h; // expected-error{{invalid suffix 'h' on floating constant}}
+float a = 1.0h; // expected-error{{no matching literal operator for call to 'operator""h' with argument of type 'long double' or 'const char *', and no matching literal operator template}}
 float b = 1.0H; // expected-error{{invalid suffix 'H' on floating constant}}
 
 _Float16 c = 1.f166; // expected-error{{invalid suffix 'f166' on floating constant}}
diff --git a/test/Lexer/has_feature_address_sanitizer.cpp b/test/Lexer/has_feature_address_sanitizer.cpp
index 406a2ab..b5b4de8 100644
--- a/test/Lexer/has_feature_address_sanitizer.cpp
+++ b/test/Lexer/has_feature_address_sanitizer.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -E -fsanitize=address %s -o - | FileCheck --check-prefix=CHECK-ASAN %s
 // RUN: %clang_cc1 -E -fsanitize=kernel-address %s -o - | FileCheck --check-prefix=CHECK-ASAN %s
+// RUN: %clang_cc1 -E -fsanitize=hwaddress %s -o - | FileCheck --check-prefix=CHECK-HWASAN %s
 // RUN: %clang_cc1 -E  %s -o - | FileCheck --check-prefix=CHECK-NO-ASAN %s
 
 #if __has_feature(address_sanitizer)
@@ -8,5 +9,17 @@
 int AddressSanitizerDisabled();
 #endif
 
+#if __has_feature(hwaddress_sanitizer)
+int HWAddressSanitizerEnabled();
+#else
+int HWAddressSanitizerDisabled();
+#endif
+
 // CHECK-ASAN: AddressSanitizerEnabled
+// CHECK-ASAN: HWAddressSanitizerDisabled
+
+// CHECK-HWASAN: AddressSanitizerDisabled
+// CHECK-HWASAN: HWAddressSanitizerEnabled
+
 // CHECK-NO-ASAN: AddressSanitizerDisabled
+// CHECK-NO-ASAN: HWAddressSanitizerDisabled
diff --git a/test/Lexer/has_feature_objc_arc.m b/test/Lexer/has_feature_objc_arc.m
index 279b91a..d5c8493 100644
--- a/test/Lexer/has_feature_objc_arc.m
+++ b/test/Lexer/has_feature_objc_arc.m
@@ -13,8 +13,16 @@
 void no_objc_arc_weak_feature();
 #endif
 
+#if __has_feature(objc_arc_fields)
+void has_objc_arc_fields();
+#else
+void no_objc_arc_fields();
+#endif
+
 // CHECK-ARC: void has_objc_arc_feature();
 // CHECK-ARC: void has_objc_arc_weak_feature();
+// CHECK-ARC: void has_objc_arc_fields();
 
 // CHECK-ARCLITE: void has_objc_arc_feature();
 // CHECK-ARCLITE: void no_objc_arc_weak_feature();
+// CHECK-ARCLITE: void has_objc_arc_fields();
diff --git a/test/Lexer/null-character-in-literal.c b/test/Lexer/null-character-in-literal.c
new file mode 100644
index 0000000..a479547
--- /dev/null
+++ b/test/Lexer/null-character-in-literal.c
Binary files differ
diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c
index 30805d1..30e353f 100644
--- a/test/Lexer/unicode.c
+++ b/test/Lexer/unicode.c
@@ -33,3 +33,8 @@
   int 🌷 = 🌵(🌹);
   return 🌷;
 }
+
+int n; = 3; // expected-warning {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}}
+int *n꞉꞉v = &n;; // expected-warning 2{{treating Unicode character <U+A789> as identifier character rather than as ':' symbol}}
+                 // expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}}
+int v＝［＝］（auto）｛return～x；｝（）; // expected-warning 12{{treating Unicode character}}
diff --git a/test/Misc/ast-dump-attr.cpp b/test/Misc/ast-dump-attr.cpp
index bf94295..1e4eb7c 100644
--- a/test/Misc/ast-dump-attr.cpp
+++ b/test/Misc/ast-dump-attr.cpp
@@ -1,211 +1,211 @@
-// RUN: %clang_cc1 -triple x86_64-pc-linux -std=c++11 -Wno-deprecated-declarations -ast-dump -ast-dump-filter Test %s | FileCheck --strict-whitespace %s
-
-int TestLocation
-__attribute__((unused));
-// CHECK:      VarDecl{{.*}}TestLocation
-// CHECK-NEXT:   UnusedAttr 0x{{[^ ]*}} <line:[[@LINE-2]]:16>
-
-int TestIndent
-__attribute__((unused));
-// CHECK:      {{^}}VarDecl{{.*TestIndent[^()]*$}}
-// CHECK-NEXT: {{^}}`-UnusedAttr{{[^()]*$}}
-
-void TestAttributedStmt() {
-  switch (1) {
-  case 1:
-    [[clang::fallthrough]];
-  case 2:
-    ;
-  }
-}
-// CHECK:      FunctionDecl{{.*}}TestAttributedStmt
-// CHECK:      AttributedStmt
-// CHECK-NEXT:   FallThroughAttr
-// CHECK-NEXT:   NullStmt
-
-[[clang::warn_unused_result]] int TestCXX11DeclAttr();
-// CHECK:      FunctionDecl{{.*}}TestCXX11DeclAttr
-// CHECK-NEXT:   WarnUnusedResultAttr
-
-int TestAlignedNull __attribute__((aligned));
-// CHECK:      VarDecl{{.*}}TestAlignedNull
-// CHECK-NEXT:   AlignedAttr {{.*}} aligned
-// CHECK-NEXT:     <<<NULL>>>
-
-int TestAlignedExpr __attribute__((aligned(4)));
-// CHECK:      VarDecl{{.*}}TestAlignedExpr
-// CHECK-NEXT:   AlignedAttr {{.*}} aligned
-// CHECK-NEXT:     IntegerLiteral
-
-int TestEnum __attribute__((visibility("default")));
-// CHECK:      VarDecl{{.*}}TestEnum
-// CHECK-NEXT:   VisibilityAttr{{.*}} Default
-
-class __attribute__((lockable)) Mutex {
-} mu1, mu2;
-int TestExpr __attribute__((guarded_by(mu1)));
-// CHECK:      VarDecl{{.*}}TestExpr
-// CHECK-NEXT:   GuardedByAttr
-// CHECK-NEXT:     DeclRefExpr{{.*}}mu1
-
-class Mutex TestVariadicExpr __attribute__((acquired_after(mu1, mu2)));
-// CHECK:      VarDecl{{.*}}TestVariadicExpr
-// CHECK:        AcquiredAfterAttr
-// CHECK-NEXT:     DeclRefExpr{{.*}}mu1
-// CHECK-NEXT:     DeclRefExpr{{.*}}mu2
-
-void function1(void *) {
-  int TestFunction __attribute__((cleanup(function1)));
-}
-// CHECK:      VarDecl{{.*}}TestFunction
-// CHECK-NEXT:   CleanupAttr{{.*}} Function{{.*}}function1
-
-void TestIdentifier(void *, int)
-__attribute__((pointer_with_type_tag(ident1,1,2)));
-// CHECK: FunctionDecl{{.*}}TestIdentifier
-// CHECK:   ArgumentWithTypeTagAttr{{.*}} pointer_with_type_tag ident1
-
-void TestBool(void *, int)
-__attribute__((pointer_with_type_tag(bool1,1,2)));
-// CHECK: FunctionDecl{{.*}}TestBool
-// CHECK:   ArgumentWithTypeTagAttr{{.*}}pointer_with_type_tag bool1 0 1 IsPointer
-
-void TestUnsigned(void *, int)
-__attribute__((pointer_with_type_tag(unsigned1,1,2)));
-// CHECK: FunctionDecl{{.*}}TestUnsigned
-// CHECK:   ArgumentWithTypeTagAttr{{.*}} pointer_with_type_tag unsigned1 0 1
-
-void TestInt(void) __attribute__((constructor(123)));
-// CHECK:      FunctionDecl{{.*}}TestInt
-// CHECK-NEXT:   ConstructorAttr{{.*}} 123
-
-static int TestString __attribute__((alias("alias1")));
-// CHECK:      VarDecl{{.*}}TestString
-// CHECK-NEXT:   AliasAttr{{.*}} "alias1"
-
-extern struct s1 TestType
-__attribute__((type_tag_for_datatype(ident1,int)));
-// CHECK:      VarDecl{{.*}}TestType
-// CHECK-NEXT:   TypeTagForDatatypeAttr{{.*}} int
-
-void TestLabel() {
-L: __attribute__((unused)) int i;
-// CHECK: LabelStmt{{.*}}'L'
-// CHECK: VarDecl{{.*}}i 'int'
-// CHECK-NEXT: UnusedAttr{{.*}}
-
-M: __attribute(()) int j;
-// CHECK: LabelStmt {{.*}} 'M'
-// CHECK-NEXT: DeclStmt
-// CHECK-NEXT: VarDecl {{.*}} j 'int'
-
-N: __attribute(()) ;
-// CHECK: LabelStmt {{.*}} 'N'
-// CHECK-NEXT: NullStmt
-}
-
-namespace Test {
-extern "C" int printf(const char *format, ...);
-// CHECK: FunctionDecl{{.*}}printf
-// CHECK-NEXT: ParmVarDecl{{.*}}format{{.*}}'const char *'
-// CHECK-NEXT: FormatAttr{{.*}}Implicit printf 1 2
-
-alignas(8) extern int x;
-extern int x;
-// CHECK: VarDecl{{.*}} x 'int'
-// CHECK: VarDecl{{.*}} x 'int'
-// CHECK-NEXT: AlignedAttr{{.*}} Inherited
-}
-
-int __attribute__((cdecl)) TestOne(void), TestTwo(void);
-// CHECK: FunctionDecl{{.*}}TestOne{{.*}}__attribute__((cdecl))
-// CHECK: FunctionDecl{{.*}}TestTwo{{.*}}__attribute__((cdecl))
-
-void func() {
-  auto Test = []() __attribute__((no_thread_safety_analysis)) {};
-  // CHECK: CXXMethodDecl{{.*}}operator() 'void (void) const'
-  // CHECK: NoThreadSafetyAnalysisAttr
-
-  // Because GNU's noreturn applies to the function type, and this lambda does
-  // not have a capture list, the call operator and the function pointer
-  // conversion should both be noreturn, but the method should not contain a
-  // NoReturnAttr because the attribute applied to the type.
-  auto Test2 = []() __attribute__((noreturn)) { while(1); };
-  // CHECK: CXXMethodDecl{{.*}}operator() 'void (void) __attribute__((noreturn)) const'
-  // CHECK-NOT: NoReturnAttr
-  // CHECK: CXXConversionDecl{{.*}}operator void (*)() __attribute__((noreturn))
-}
-
-namespace PR20930 {
-struct S {
-  struct { int Test __attribute__((deprecated)); };
-  // CHECK: FieldDecl{{.*}}Test 'int'
-  // CHECK-NEXT: DeprecatedAttr
-};
-
-void f() {
-  S s;
-  s.Test = 1;
-  // CHECK: IndirectFieldDecl{{.*}}Test 'int'
-  // CHECK: DeprecatedAttr
-}
-}
-
-struct __attribute__((objc_bridge_related(NSParagraphStyle,,))) TestBridgedRef;
-// CHECK: CXXRecordDecl{{.*}} struct TestBridgedRef
-// CHECK-NEXT: ObjCBridgeRelatedAttr{{.*}} NSParagraphStyle
-
-void TestExternalSourceSymbolAttr1()
-__attribute__((external_source_symbol(language="Swift", defined_in="module", generated_declaration)));
-// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr1
-// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module" GeneratedDeclaration
-
-void TestExternalSourceSymbolAttr2()
-__attribute__((external_source_symbol(defined_in="module", language="Swift")));
-// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr2
-// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module"{{$}}
-
-void TestExternalSourceSymbolAttr3()
-__attribute__((external_source_symbol(generated_declaration, language="Objective-C++", defined_in="module")));
-// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr3
-// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Objective-C++" "module" GeneratedDeclaration
-
-void TestExternalSourceSymbolAttr4()
-__attribute__((external_source_symbol(defined_in="Some external file.cs", generated_declaration, language="C Sharp")));
-// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr4
-// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "C Sharp" "Some external file.cs" GeneratedDeclaration
-
-void TestExternalSourceSymbolAttr5()
-__attribute__((external_source_symbol(generated_declaration, defined_in="module", language="Swift")));
-// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr5
-// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module" GeneratedDeclaration
-
-namespace TestNoEscape {
-  void noescapeFunc(int *p0, __attribute__((noescape)) int *p1) {}
-  // CHECK: `-FunctionDecl{{.*}} noescapeFunc 'void (int *, __attribute__((noescape)) int *)'
-  // CHECK-NEXT: ParmVarDecl
-  // CHECK-NEXT: ParmVarDecl
-  // CHECK-NEXT: NoEscapeAttr
-}
-
-namespace TestSuppress {
-  [[gsl::suppress("at-namespace")]];
-  // CHECK: NamespaceDecl{{.*}} TestSuppress
-  // CHECK-NEXT: EmptyDecl{{.*}}
-  // CHECK-NEXT: SuppressAttr{{.*}} at-namespace
-  [[gsl::suppress("on-decl")]]
-  void TestSuppressFunction();
-  // CHECK: FunctionDecl{{.*}} TestSuppressFunction
-  // CHECK-NEXT SuppressAttr{{.*}} on-decl
-
-  void f() {
-      int *i;
-
-      [[gsl::suppress("on-stmt")]] {
-      // CHECK: AttributedStmt
-      // CHECK-NEXT: SuppressAttr{{.*}} on-stmt
-      // CHECK-NEXT: CompoundStmt
-        i = reinterpret_cast<int*>(7);
-      }
-    }
-}
+// RUN: %clang_cc1 -triple x86_64-pc-linux -std=c++11 -Wno-deprecated-declarations -ast-dump -ast-dump-filter Test %s | FileCheck --strict-whitespace %s
+
+int TestLocation
+__attribute__((unused));
+// CHECK:      VarDecl{{.*}}TestLocation
+// CHECK-NEXT:   UnusedAttr 0x{{[^ ]*}} <line:[[@LINE-2]]:16>
+
+int TestIndent
+__attribute__((unused));
+// CHECK:      {{^}}VarDecl{{.*TestIndent[^()]*$}}
+// CHECK-NEXT: {{^}}`-UnusedAttr{{[^()]*$}}
+
+void TestAttributedStmt() {
+  switch (1) {
+  case 1:
+    [[clang::fallthrough]];
+  case 2:
+    ;
+  }
+}
+// CHECK:      FunctionDecl{{.*}}TestAttributedStmt
+// CHECK:      AttributedStmt
+// CHECK-NEXT:   FallThroughAttr
+// CHECK-NEXT:   NullStmt
+
+[[clang::warn_unused_result]] int TestCXX11DeclAttr();
+// CHECK:      FunctionDecl{{.*}}TestCXX11DeclAttr
+// CHECK-NEXT:   WarnUnusedResultAttr
+
+int TestAlignedNull __attribute__((aligned));
+// CHECK:      VarDecl{{.*}}TestAlignedNull
+// CHECK-NEXT:   AlignedAttr {{.*}} aligned
+// CHECK-NEXT:     <<<NULL>>>
+
+int TestAlignedExpr __attribute__((aligned(4)));
+// CHECK:      VarDecl{{.*}}TestAlignedExpr
+// CHECK-NEXT:   AlignedAttr {{.*}} aligned
+// CHECK-NEXT:     IntegerLiteral
+
+int TestEnum __attribute__((visibility("default")));
+// CHECK:      VarDecl{{.*}}TestEnum
+// CHECK-NEXT:   VisibilityAttr{{.*}} Default
+
+class __attribute__((lockable)) Mutex {
+} mu1, mu2;
+int TestExpr __attribute__((guarded_by(mu1)));
+// CHECK:      VarDecl{{.*}}TestExpr
+// CHECK-NEXT:   GuardedByAttr
+// CHECK-NEXT:     DeclRefExpr{{.*}}mu1
+
+class Mutex TestVariadicExpr __attribute__((acquired_after(mu1, mu2)));
+// CHECK:      VarDecl{{.*}}TestVariadicExpr
+// CHECK:        AcquiredAfterAttr
+// CHECK-NEXT:     DeclRefExpr{{.*}}mu1
+// CHECK-NEXT:     DeclRefExpr{{.*}}mu2
+
+void function1(void *) {
+  int TestFunction __attribute__((cleanup(function1)));
+}
+// CHECK:      VarDecl{{.*}}TestFunction
+// CHECK-NEXT:   CleanupAttr{{.*}} Function{{.*}}function1
+
+void TestIdentifier(void *, int)
+__attribute__((pointer_with_type_tag(ident1,1,2)));
+// CHECK: FunctionDecl{{.*}}TestIdentifier
+// CHECK:   ArgumentWithTypeTagAttr{{.*}} pointer_with_type_tag ident1
+
+void TestBool(void *, int)
+__attribute__((pointer_with_type_tag(bool1,1,2)));
+// CHECK: FunctionDecl{{.*}}TestBool
+// CHECK:   ArgumentWithTypeTagAttr{{.*}}pointer_with_type_tag bool1 1 2 IsPointer
+
+void TestUnsigned(void *, int)
+__attribute__((pointer_with_type_tag(unsigned1,1,2)));
+// CHECK: FunctionDecl{{.*}}TestUnsigned
+// CHECK:   ArgumentWithTypeTagAttr{{.*}} pointer_with_type_tag unsigned1 1 2
+
+void TestInt(void) __attribute__((constructor(123)));
+// CHECK:      FunctionDecl{{.*}}TestInt
+// CHECK-NEXT:   ConstructorAttr{{.*}} 123
+
+static int TestString __attribute__((alias("alias1")));
+// CHECK:      VarDecl{{.*}}TestString
+// CHECK-NEXT:   AliasAttr{{.*}} "alias1"
+
+extern struct s1 TestType
+__attribute__((type_tag_for_datatype(ident1,int)));
+// CHECK:      VarDecl{{.*}}TestType
+// CHECK-NEXT:   TypeTagForDatatypeAttr{{.*}} int
+
+void TestLabel() {
+L: __attribute__((unused)) int i;
+// CHECK: LabelStmt{{.*}}'L'
+// CHECK: VarDecl{{.*}}i 'int'
+// CHECK-NEXT: UnusedAttr{{.*}}
+
+M: __attribute(()) int j;
+// CHECK: LabelStmt {{.*}} 'M'
+// CHECK-NEXT: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} j 'int'
+
+N: __attribute(()) ;
+// CHECK: LabelStmt {{.*}} 'N'
+// CHECK-NEXT: NullStmt
+}
+
+namespace Test {
+extern "C" int printf(const char *format, ...);
+// CHECK: FunctionDecl{{.*}}printf
+// CHECK-NEXT: ParmVarDecl{{.*}}format{{.*}}'const char *'
+// CHECK-NEXT: FormatAttr{{.*}}Implicit printf 1 2
+
+alignas(8) extern int x;
+extern int x;
+// CHECK: VarDecl{{.*}} x 'int'
+// CHECK: VarDecl{{.*}} x 'int'
+// CHECK-NEXT: AlignedAttr{{.*}} Inherited
+}
+
+int __attribute__((cdecl)) TestOne(void), TestTwo(void);
+// CHECK: FunctionDecl{{.*}}TestOne{{.*}}__attribute__((cdecl))
+// CHECK: FunctionDecl{{.*}}TestTwo{{.*}}__attribute__((cdecl))
+
+void func() {
+  auto Test = []() __attribute__((no_thread_safety_analysis)) {};
+  // CHECK: CXXMethodDecl{{.*}}operator() 'void () const'
+  // CHECK: NoThreadSafetyAnalysisAttr
+
+  // Because GNU's noreturn applies to the function type, and this lambda does
+  // not have a capture list, the call operator and the function pointer
+  // conversion should both be noreturn, but the method should not contain a
+  // NoReturnAttr because the attribute applied to the type.
+  auto Test2 = []() __attribute__((noreturn)) { while(1); };
+  // CHECK: CXXMethodDecl{{.*}}operator() 'void () __attribute__((noreturn)) const'
+  // CHECK-NOT: NoReturnAttr
+  // CHECK: CXXConversionDecl{{.*}}operator void (*)() __attribute__((noreturn))
+}
+
+namespace PR20930 {
+struct S {
+  struct { int Test __attribute__((deprecated)); };
+  // CHECK: FieldDecl{{.*}}Test 'int'
+  // CHECK-NEXT: DeprecatedAttr
+};
+
+void f() {
+  S s;
+  s.Test = 1;
+  // CHECK: IndirectFieldDecl{{.*}}Test 'int'
+  // CHECK: DeprecatedAttr
+}
+}
+
+struct __attribute__((objc_bridge_related(NSParagraphStyle,,))) TestBridgedRef;
+// CHECK: CXXRecordDecl{{.*}} struct TestBridgedRef
+// CHECK-NEXT: ObjCBridgeRelatedAttr{{.*}} NSParagraphStyle
+
+void TestExternalSourceSymbolAttr1()
+__attribute__((external_source_symbol(language="Swift", defined_in="module", generated_declaration)));
+// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr1
+// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module" GeneratedDeclaration
+
+void TestExternalSourceSymbolAttr2()
+__attribute__((external_source_symbol(defined_in="module", language="Swift")));
+// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr2
+// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module"{{$}}
+
+void TestExternalSourceSymbolAttr3()
+__attribute__((external_source_symbol(generated_declaration, language="Objective-C++", defined_in="module")));
+// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr3
+// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Objective-C++" "module" GeneratedDeclaration
+
+void TestExternalSourceSymbolAttr4()
+__attribute__((external_source_symbol(defined_in="Some external file.cs", generated_declaration, language="C Sharp")));
+// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr4
+// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "C Sharp" "Some external file.cs" GeneratedDeclaration
+
+void TestExternalSourceSymbolAttr5()
+__attribute__((external_source_symbol(generated_declaration, defined_in="module", language="Swift")));
+// CHECK: FunctionDecl{{.*}} TestExternalSourceSymbolAttr5
+// CHECK-NEXT: ExternalSourceSymbolAttr{{.*}} "Swift" "module" GeneratedDeclaration
+
+namespace TestNoEscape {
+  void noescapeFunc(int *p0, __attribute__((noescape)) int *p1) {}
+  // CHECK: `-FunctionDecl{{.*}} noescapeFunc 'void (int *, __attribute__((noescape)) int *)'
+  // CHECK-NEXT: ParmVarDecl
+  // CHECK-NEXT: ParmVarDecl
+  // CHECK-NEXT: NoEscapeAttr
+}
+
+namespace TestSuppress {
+  [[gsl::suppress("at-namespace")]];
+  // CHECK: NamespaceDecl{{.*}} TestSuppress
+  // CHECK-NEXT: EmptyDecl{{.*}}
+  // CHECK-NEXT: SuppressAttr{{.*}} at-namespace
+  [[gsl::suppress("on-decl")]]
+  void TestSuppressFunction();
+  // CHECK: FunctionDecl{{.*}} TestSuppressFunction
+  // CHECK-NEXT SuppressAttr{{.*}} on-decl
+
+  void f() {
+      int *i;
+
+      [[gsl::suppress("on-stmt")]] {
+      // CHECK: AttributedStmt
+      // CHECK-NEXT: SuppressAttr{{.*}} on-stmt
+      // CHECK-NEXT: CompoundStmt
+        i = reinterpret_cast<int*>(7);
+      }
+    }
+}
diff --git a/test/Misc/ast-dump-attr.m b/test/Misc/ast-dump-attr.m
new file mode 100644
index 0000000..8775d40
--- /dev/null
+++ b/test/Misc/ast-dump-attr.m
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -fdouble-square-bracket-attributes -triple x86_64-apple-macosx10.10.0 -ast-dump -ast-dump-filter Test %s | FileCheck --strict-whitespace %s
+
+@interface NSObject
+@end
+
+[[clang::objc_exception]]
+@interface Test1 {
+// CHECK: ObjCInterfaceDecl{{.*}} Test1
+// CHECK-NEXT: ObjCExceptionAttr{{.*}}
+  [[clang::iboutlet]] NSObject *Test2;
+// CHECK: ObjCIvarDecl{{.*}} Test2
+// CHECK-NEXT: IBOutletAttr
+}
+@property (readonly) [[clang::objc_returns_inner_pointer]] void *Test3, *Test4;
+// CHECK: ObjCPropertyDecl{{.*}} Test3 'void *' readonly
+// CHECK-NEXT: ObjCReturnsInnerPointerAttr
+// CHECK-NEXT: ObjCPropertyDecl{{.*}} Test4 'void *' readonly
+// CHECK-NEXT: ObjCReturnsInnerPointerAttr
+
+@property (readonly) [[clang::iboutlet]] NSObject *Test5;
+// CHECK: ObjCPropertyDecl{{.*}} Test5 'NSObject *' readonly
+// CHECK-NEXT: IBOutletAttr
+
+// CHECK: ObjCMethodDecl{{.*}} implicit{{.*}} Test3
+// CHECK-NEXT: ObjCReturnsInnerPointerAttr
+// CHECK: ObjCMethodDecl{{.*}} implicit{{.*}} Test4
+// CHECK-NEXT: ObjCReturnsInnerPointerAttr
+// CHECK: ObjCMethodDecl{{.*}} implicit{{.*}} Test5
+// CHECK-NOT: IBOutletAttr
+@end
+
+[[clang::objc_runtime_name("name")]] @protocol Test6;
+// CHECK: ObjCProtocolDecl{{.*}} Test6
+// CHECK-NEXT: ObjCRuntimeNameAttr{{.*}} "name"
+
+[[clang::objc_protocol_requires_explicit_implementation]]
+@protocol Test7
+// CHECK: ObjCProtocolDecl{{.*}} Test7
+// CHECK-NEXT: ObjCExplicitProtocolImplAttr
+@end
+
+@interface Test8
+// CHECK: ObjCInterfaceDecl{{.*}} Test8
+-(void)Test9 [[clang::ns_consumes_self]];
+// CHECK: ObjCMethodDecl{{.*}} Test9 'void'
+// CHECK-NEXT: NSConsumesSelfAttr
+-(void) [[clang::ns_consumes_self]] Test10: (int)Test11;
+// CHECK: ObjCMethodDecl{{.*}} Test10: 'void'
+// CHECK-NEXT: |-ParmVarDecl{{.*}} Test11 'int'
+// CHECK-NEXT: `-NSConsumesSelfAttr
+-(void)Test12: (int *) [[clang::noescape]] Test13  to:(int)Test14 [[clang::ns_consumes_self]];
+// CHECK: ObjCMethodDecl{{.*}} Test12:to: 'void'
+// CHECK-NEXT: |-ParmVarDecl{{.*}} Test13 'int *'
+// CHECK-NEXT: | `-NoEscapeAttr
+// CHECK-NEXT: |-ParmVarDecl{{.*}} Test14 'int'
+// CHECK-NEXT: `-NSConsumesSelfAttr
+@end
diff --git a/test/Misc/ast-dump-color.cpp b/test/Misc/ast-dump-color.cpp
index bc01d49..ad7ea30 100644
--- a/test/Misc/ast-dump-color.cpp
+++ b/test/Misc/ast-dump-color.cpp
@@ -32,7 +32,7 @@
 //CHECK: {{^}}[[GREEN:.\[0;1;32m]]TranslationUnitDecl[[RESET:.\[0m]][[Yellow:.\[0;33m]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue:.\[0;34m]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN:.\[0;1;36m]] __int128_t[[RESET]] [[Green:.\[0;32m]]'__int128'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __uint128_t[[RESET]] [[Green]]'unsigned __int128'[[RESET]]{{$}}
-//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __builtin_va_list[[RESET]] [[Green]]'struct __va_list_tag [1]'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __builtin_va_list[[RESET]] [[Green]]'__va_list_tag [1]'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]{{.*}}ast-dump-color.cpp:6:1[[RESET]], [[Yellow]]col:5[[RESET]]> [[Yellow]]col:5[[RESET]][[CYAN]] Test[[RESET]] [[Green]]'int'[[RESET]]
 //CHECK: {{^}}[[Blue]]| |-[[RESET]][[BLUE:.\[0;1;34m]]UnusedAttr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:25[[RESET]]> unused{{$}}
 //CHECK: {{^}}[[Blue]]| `-[[RESET]][[Blue]]FullComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:4:4[[RESET]], [[Yellow]]line:5:8[[RESET]]>{{$}}
@@ -43,7 +43,7 @@
 //CHECK: {{^}}[[Blue]]|     |-[[RESET]][[Blue]]HTMLEndTagComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:13[[RESET]], [[Yellow]]col:16[[RESET]]> Name="a"{{$}}
 //CHECK: {{^}}[[Blue]]|     |-[[RESET]][[Blue]]TextComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:5:4[[RESET]]> Text=" "{{$}}
 //CHECK: {{^}}[[Blue]]|     `-[[RESET]][[Blue]]HTMLStartTagComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:5[[RESET]], [[Yellow]]col:8[[RESET]]> Name="br" SelfClosing{{$}}
-//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]FunctionDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:9:1[[RESET]], [[Yellow]]line:16:1[[RESET]]> [[Yellow]]line:9:6[[RESET]][[CYAN]] TestAttributedStmt[[RESET]] [[Green]]'void (void)'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]FunctionDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:9:1[[RESET]], [[Yellow]]line:16:1[[RESET]]> [[Yellow]]line:9:6[[RESET]][[CYAN]] TestAttributedStmt[[RESET]] [[Green]]'void ()'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]| |-[[RESET]][[MAGENTA:.\[0;1;35m]]CompoundStmt[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:27[[RESET]], [[Yellow]]line:16:1[[RESET]]>{{$}}
 //CHECK: {{^}}[[Blue]]| | `-[[RESET]][[MAGENTA]]SwitchStmt[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:10:3[[RESET]], [[Yellow]]line:15:3[[RESET]]>{{$}}
 //CHECK: {{^}}[[Blue]]| |   |-[[RESET]][[Blue:.\[0;34m]]<<<NULL>>>[[RESET]]{{$}}
@@ -75,29 +75,29 @@
 //CHECK: {{^}}[[Blue]]| |   | `-[[RESET]][[Blue]]TextComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:6[[RESET]], [[Yellow]]col:22[[RESET]]> Text=" Another variable"{{$}}
 //CHECK: {{^}}[[Blue]]| |   `-[[RESET]][[Blue]]ParagraphComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:23:6[[RESET]], [[Yellow]]col:44[[RESET]]>{{$}}
 //CHECK: {{^}}[[Blue]]| |     `-[[RESET]][[Blue]]TextComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:6[[RESET]], [[Yellow]]col:44[[RESET]]> Text=" Like the other variable, but different"{{$}}
-//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:18:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit used[[CYAN]] Mutex[[RESET]] [[Green]]'void (void) noexcept'[[RESET]] inline{{.*$}}
+//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:18:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit used[[CYAN]] Mutex[[RESET]] [[Green]]'void () noexcept'[[RESET]] inline{{.*$}}
 //CHECK: {{^}}[[Blue]]| | `-[[RESET]][[MAGENTA]]CompoundStmt[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]>{{$}}
-//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit constexpr[[CYAN]] Mutex[[RESET]] [[Green]]'void (const class Mutex &)'[[RESET]] inline{{ .*$}}
-//CHECK: {{^}}[[Blue]]| | `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] [[Green]]'const class Mutex &'[[RESET]]{{$}}
-//CHECK: {{^}}[[Blue]]| `-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit constexpr[[CYAN]] Mutex[[RESET]] [[Green]]'void (class Mutex &&)'[[RESET]] inline{{ .*$}}
-//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] [[Green]]'class Mutex &&'[[RESET]]{{$}}
-//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:1[[RESET]], [[Yellow]]line:25:3[[RESET]]> [[Yellow]]col:3[[RESET]] referenced[[CYAN]] mu1[[RESET]] [[Green]]'class Mutex':'class Mutex'[[RESET]]
-//CHECK: {{^}}[[Blue]]| `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:3[[RESET]]> [[Green]]'class Mutex':'class Mutex'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void (void) noexcept'[[RESET]]{{$}}
-//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:18:1[[RESET]], [[Yellow]]line:25:8[[RESET]]> [[Yellow]]col:8[[RESET]][[CYAN]] mu2[[RESET]] [[Green]]'class Mutex':'class Mutex'[[RESET]]
-//CHECK: {{^}}[[Blue]]| `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Green]]'class Mutex':'class Mutex'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void (void) noexcept'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit constexpr[[CYAN]] Mutex[[RESET]] [[Green]]'void (const Mutex &)'[[RESET]] inline{{ .*$}}
+//CHECK: {{^}}[[Blue]]| | `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] [[Green]]'const Mutex &'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]| `-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] implicit constexpr[[CYAN]] Mutex[[RESET]] [[Green]]'void (Mutex &&)'[[RESET]] inline{{ .*$}}
+//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:33[[RESET]]> [[Yellow]]col:33[[RESET]] [[Green]]'Mutex &&'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:1[[RESET]], [[Yellow]]line:25:3[[RESET]]> [[Yellow]]col:3[[RESET]] referenced[[CYAN]] mu1[[RESET]] [[Green]]'class Mutex':'Mutex'[[RESET]]
+//CHECK: {{^}}[[Blue]]| `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:3[[RESET]]> [[Green]]'class Mutex':'Mutex'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void () noexcept'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:18:1[[RESET]], [[Yellow]]line:25:8[[RESET]]> [[Yellow]]col:8[[RESET]][[CYAN]] mu2[[RESET]] [[Green]]'class Mutex':'Mutex'[[RESET]]
+//CHECK: {{^}}[[Blue]]| `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Green]]'class Mutex':'Mutex'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void () noexcept'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:26:1[[RESET]], [[Yellow]]col:5[[RESET]]> [[Yellow]]col:5[[RESET]][[CYAN]] TestExpr[[RESET]] [[Green]]'int'[[RESET]]
 //CHECK: {{^}}[[Blue]]| `-[[RESET]][[BLUE]]GuardedByAttr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:29[[RESET]], [[Yellow]]col:43[[RESET]]>{{$}}
-//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[MAGENTA]]DeclRefExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:40[[RESET]]> [[Green]]'class Mutex':'class Mutex'[[RESET]][[Cyan]] lvalue[[RESET]][[Cyan]][[RESET]] [[GREEN]]Var[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]][[CYAN]] 'mu1'[[RESET]] [[Green]]'class Mutex':'class Mutex'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[MAGENTA]]DeclRefExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:40[[RESET]]> [[Green]]'class Mutex':'Mutex'[[RESET]][[Cyan]] lvalue[[RESET]][[Cyan]][[RESET]] [[GREEN]]Var[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]][[CYAN]] 'mu1'[[RESET]] [[Green]]'class Mutex':'Mutex'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]CXXRecordDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:28:1[[RESET]], [[Yellow]]line:30:1[[RESET]]> [[Yellow]]line:28:8[[RESET]] struct[[CYAN]] Invalid[[RESET]] definition
 //CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXRecordDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:1[[RESET]], [[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit referenced struct[[CYAN]] Invalid[[RESET]]
 //CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:29:3[[RESET]], [[Yellow]]col:42[[RESET]]> [[Yellow]]col:29[[RESET]] invalid[[CYAN]] Invalid[[RESET]] [[Green]]'void (int)'[[RESET]]
 //CHECK: {{^}}[[Blue]]| | |-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:37[[RESET]], [[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]col:42[[RESET]] invalid [[Green]]'int'[[RESET]]
 //CHECK: {{^}}[[Blue]]| | `-[[RESET]][[BLUE]]NoInlineAttr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:18[[RESET]]>
-//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:28:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit used constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void (void) noexcept'[[RESET]] inline
+//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:28:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit used constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void () noexcept'[[RESET]] inline
 //CHECK: {{^}}[[Blue]]| | `-[[RESET]][[MAGENTA]]CompoundStmt[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]>
-//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void (const struct Invalid &)'[[RESET]] inline default trivial noexcept-unevaluated 0x{{[0-9a-fA-F]*}}
-//CHECK: {{^}}[[Blue]]| | `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] [[Green]]'const struct Invalid &'[[RESET]]
-//CHECK: {{^}}[[Blue]]| `-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void (struct Invalid &&)'[[RESET]] inline default trivial noexcept-unevaluated 0x{{[0-9a-fA-F]*}}
-//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] [[Green]]'struct Invalid &&'[[RESET]]
-//CHECK: {{^}}[[Blue]]`-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:1[[RESET]], [[Yellow]]line:30:3[[RESET]]> [[Yellow]]col:3[[RESET]][[CYAN]] Invalid[[RESET]] [[Green]]'struct Invalid':'struct Invalid'[[RESET]]
-//CHECK: {{^}}[[Blue]]  `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:3[[RESET]]> [[Green]]'struct Invalid':'struct Invalid'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void (void) noexcept'[[RESET]]
+//CHECK: {{^}}[[Blue]]| |-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void (const Invalid &)'[[RESET]] inline default trivial noexcept-unevaluated 0x{{[0-9a-fA-F]*}}
+//CHECK: {{^}}[[Blue]]| | `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] [[Green]]'const Invalid &'[[RESET]]
+//CHECK: {{^}}[[Blue]]| `-[[RESET]][[GREEN]]CXXConstructorDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] implicit constexpr[[CYAN]] Invalid[[RESET]] [[Green]]'void (Invalid &&)'[[RESET]] inline default trivial noexcept-unevaluated 0x{{[0-9a-fA-F]*}}
+//CHECK: {{^}}[[Blue]]|   `-[[RESET]][[GREEN]]ParmVarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:8[[RESET]]> [[Yellow]]col:8[[RESET]] [[Green]]'Invalid &&'[[RESET]]
+//CHECK: {{^}}[[Blue]]`-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:1[[RESET]], [[Yellow]]line:30:3[[RESET]]> [[Yellow]]col:3[[RESET]][[CYAN]] Invalid[[RESET]] [[Green]]'struct Invalid':'Invalid'[[RESET]]
+//CHECK: {{^}}[[Blue]]  `-[[RESET]][[MAGENTA]]CXXConstructExpr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:3[[RESET]]> [[Green]]'struct Invalid':'Invalid'[[RESET]][[Cyan]][[RESET]][[Cyan]][[RESET]] [[Green]]'void () noexcept'[[RESET]]
diff --git a/test/Misc/ast-dump-decl.cpp b/test/Misc/ast-dump-decl.cpp
index 0df8a5a..c689149 100644
--- a/test/Misc/ast-dump-decl.cpp
+++ b/test/Misc/ast-dump-decl.cpp
@@ -1,539 +1,539 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -fms-extensions -ast-dump -ast-dump-filter Test %s | FileCheck -strict-whitespace %s
-
-class testEnumDecl {
-  enum class TestEnumDeclScoped;
-  enum TestEnumDeclFixed : int;
-};
-// CHECK: EnumDecl{{.*}} class TestEnumDeclScoped 'int'
-// CHECK: EnumDecl{{.*}} TestEnumDeclFixed 'int'
-
-class testFieldDecl {
-  int TestFieldDeclInit = 0;
-};
-// CHECK:      FieldDecl{{.*}} TestFieldDeclInit 'int'
-// CHECK-NEXT:   IntegerLiteral
-
-namespace testVarDeclNRVO {
-  class A { };
-  A foo() {
-    A TestVarDeclNRVO;
-    return TestVarDeclNRVO;
-  }
-}
-// CHECK: VarDecl{{.*}} TestVarDeclNRVO 'class testVarDeclNRVO::A' nrvo
-
-void testParmVarDeclInit(int TestParmVarDeclInit = 0);
-// CHECK:      ParmVarDecl{{.*}} TestParmVarDeclInit 'int'
-// CHECK-NEXT:   IntegerLiteral{{.*}}
-
-namespace TestNamespaceDecl {
-  int i;
-}
-// CHECK:      NamespaceDecl{{.*}} TestNamespaceDecl
-// CHECK-NEXT:   VarDecl
-
-namespace TestNamespaceDecl {
-  int j;
-}
-// CHECK:      NamespaceDecl{{.*}} TestNamespaceDecl
-// CHECK-NEXT:   original Namespace
-// CHECK-NEXT:   VarDecl
-
-inline namespace TestNamespaceDeclInline {
-}
-// CHECK:      NamespaceDecl{{.*}} TestNamespaceDeclInline inline
-
-namespace testUsingDirectiveDecl {
-  namespace A {
-  }
-}
-namespace TestUsingDirectiveDecl {
-  using namespace testUsingDirectiveDecl::A;
-}
-// CHECK:      NamespaceDecl{{.*}} TestUsingDirectiveDecl
-// CHECK-NEXT:   UsingDirectiveDecl{{.*}} Namespace{{.*}} 'A'
-
-namespace testNamespaceAlias {
-  namespace A {
-  }
-}
-namespace TestNamespaceAlias = testNamespaceAlias::A;
-// CHECK:      NamespaceAliasDecl{{.*}} TestNamespaceAlias
-// CHECK-NEXT:   Namespace{{.*}} 'A'
-
-using TestTypeAliasDecl = int;
-// CHECK: TypeAliasDecl{{.*}} TestTypeAliasDecl 'int'
-
-namespace testTypeAliasTemplateDecl {
-  template<typename T> class A;
-  template<typename T> using TestTypeAliasTemplateDecl = A<T>;
-}
-// CHECK:      TypeAliasTemplateDecl{{.*}} TestTypeAliasTemplateDecl
-// CHECK-NEXT:   TemplateTypeParmDecl
-// CHECK-NEXT:   TypeAliasDecl{{.*}} TestTypeAliasTemplateDecl 'A<T>'
-
-namespace testCXXRecordDecl {
-  class TestEmpty {};
-// CHECK:      CXXRecordDecl{{.*}} class TestEmpty
-// CHECK-NEXT:   DefinitionData pass_in_registers empty aggregate standard_layout trivially_copyable pod trivial literal has_constexpr_non_copy_move_ctor can_const_default_init
-// CHECK-NEXT:     DefaultConstructor exists trivial constexpr
-// CHECK-NEXT:     CopyConstructor simple trivial has_const_param
-// CHECK-NEXT:     MoveConstructor exists simple trivial
-// CHECK-NEXT:     CopyAssignment trivial has_const_param
-// CHECK-NEXT:     MoveAssignment exists simple trivial
-// CHECK-NEXT:     Destructor simple irrelevant trivial
-
-  class A { };
-  class B { };
-  class TestCXXRecordDecl : virtual A, public B {
-    int i;
-  };
-}
-// CHECK:      CXXRecordDecl{{.*}} class TestCXXRecordDecl
-// CHECK-NEXT:   DefinitionData{{$}}
-// CHECK-NEXT:     DefaultConstructor exists non_trivial
-// CHECK-NEXT:     CopyConstructor simple non_trivial has_const_param
-// CHECK-NEXT:     MoveConstructor exists simple non_trivial
-// CHECK-NEXT:     CopyAssignment non_trivial has_const_param
-// CHECK-NEXT:     MoveAssignment exists simple non_trivial
-// CHECK-NEXT:     Destructor simple irrelevant trivial
-// CHECK-NEXT:   virtual private 'class testCXXRecordDecl::A'
-// CHECK-NEXT:   public 'class testCXXRecordDecl::B'
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestCXXRecordDecl
-// CHECK-NEXT:   FieldDecl
-
-template<class...T>
-class TestCXXRecordDeclPack : public T... {
-};
-// CHECK:      CXXRecordDecl{{.*}} class TestCXXRecordDeclPack
-// CHECK:        public 'T'...
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestCXXRecordDeclPack
-
-thread_local int TestThreadLocalInt;
-// CHECK: TestThreadLocalInt {{.*}} tls_dynamic
-
-class testCXXMethodDecl {
-  virtual void TestCXXMethodDeclPure() = 0;
-  void TestCXXMethodDeclDelete() = delete;
-  void TestCXXMethodDeclThrow() throw();
-  void TestCXXMethodDeclThrowType() throw(int);
-};
-// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclPure 'void (void)' virtual pure
-// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclDelete 'void (void)' delete
-// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclThrow 'void (void) throw()'
-// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclThrowType 'void (void) throw(int)'
-
-namespace testCXXConstructorDecl {
-  class A { };
-  class TestCXXConstructorDecl : public A {
-    int I;
-    TestCXXConstructorDecl(A &a, int i) : A(a), I(i) { }
-    TestCXXConstructorDecl(A &a) : TestCXXConstructorDecl(a, 0) { }
-  };
-}
-// CHECK:      CXXConstructorDecl{{.*}} TestCXXConstructorDecl 'void {{.*}}'
-// CHECK-NEXT:   ParmVarDecl{{.*}} a
-// CHECK-NEXT:   ParmVarDecl{{.*}} i
-// CHECK-NEXT:   CXXCtorInitializer{{.*}}A
-// CHECK-NEXT:     Expr
-// CHECK:        CXXCtorInitializer{{.*}}I
-// CHECK-NEXT:     Expr
-// CHECK:        CompoundStmt
-// CHECK:      CXXConstructorDecl{{.*}} TestCXXConstructorDecl 'void {{.*}}'
-// CHECK-NEXT:   ParmVarDecl{{.*}} a
-// CHECK-NEXT:   CXXCtorInitializer{{.*}}TestCXXConstructorDecl
-// CHECK-NEXT:     CXXConstructExpr{{.*}}TestCXXConstructorDecl
-
-class TestCXXDestructorDecl {
-  ~TestCXXDestructorDecl() { }
-};
-// CHECK:      CXXDestructorDecl{{.*}} ~TestCXXDestructorDecl 'void (void) noexcept'
-// CHECK-NEXT:   CompoundStmt
-
-// Test that the range of a defaulted members is computed correctly.
-class TestMemberRanges {
-public:
-  TestMemberRanges() = default;
-  TestMemberRanges(const TestMemberRanges &Other) = default;
-  TestMemberRanges(TestMemberRanges &&Other) = default;
-  ~TestMemberRanges() = default;
-  TestMemberRanges &operator=(const TestMemberRanges &Other) = default;
-  TestMemberRanges &operator=(TestMemberRanges &&Other) = default;
-};
-void SomeFunction() {
-  TestMemberRanges A;
-  TestMemberRanges B(A);
-  B = A;
-  A = static_cast<TestMemberRanges &&>(B);
-  TestMemberRanges C(static_cast<TestMemberRanges &&>(A));
-}
-// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:30>
-// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:59>
-// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:54>
-// CHECK:      CXXDestructorDecl{{.*}} <line:{{.*}}:3, col:31>
-// CHECK:      CXXMethodDecl{{.*}} <line:{{.*}}:3, col:70>
-// CHECK:      CXXMethodDecl{{.*}} <line:{{.*}}:3, col:65>
-
-class TestCXXConversionDecl {
-  operator int() { return 0; }
-};
-// CHECK:      CXXConversionDecl{{.*}} operator int 'int (void)'
-// CHECK-NEXT:   CompoundStmt
-
-namespace TestStaticAssertDecl {
-  static_assert(true, "msg");
-}
-// CHECK:      NamespaceDecl{{.*}} TestStaticAssertDecl
-// CHECK-NEXT:   StaticAssertDecl{{.*> .*$}}
-// CHECK-NEXT:     CXXBoolLiteralExpr
-// CHECK-NEXT:     StringLiteral
-
-namespace testFunctionTemplateDecl {
-  class A { };
-  class B { };
-  class C { };
-  class D { };
-  template<typename T> void TestFunctionTemplate(T) { }
-
-  // implicit instantiation
-  void bar(A a) { TestFunctionTemplate(a); }
-
-  // explicit specialization
-  template<> void TestFunctionTemplate(B);
-
-  // explicit instantiation declaration
-  extern template void TestFunctionTemplate(C);
-
-  // explicit instantiation definition
-  template void TestFunctionTemplate(D);
-}
-// CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
-// CHECK-NEXT:   TemplateTypeParmDecl
-// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
-// CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
-// CHECK-NEXT:     CompoundStmt
-// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}A
-// CHECK-NEXT:     TemplateArgument
-// CHECK-NEXT:     ParmVarDecl
-// CHECK-NEXT:     CompoundStmt
-// CHECK-NEXT:   Function{{.*}} 'TestFunctionTemplate' {{.*}}B
-// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}C
-// CHECK-NEXT:     TemplateArgument
-// CHECK-NEXT:     ParmVarDecl
-// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}D
-// CHECK-NEXT:     TemplateArgument
-// CHECK-NEXT:     ParmVarDecl
-// CHECK-NEXT:     CompoundStmt
-// CHECK:      FunctionDecl{{.*}} TestFunctionTemplate {{.*}}B
-// CHECK-NEXT:   TemplateArgument
-// CHECK-NEXT:   ParmVarDecl
-
-namespace testClassTemplateDecl {
-  class A { };
-  class B { };
-  class C { };
-  class D { };
-
-  template<typename T> class TestClassTemplate {
-  public:
-    TestClassTemplate();
-    ~TestClassTemplate();
-    int j();
-    int i;
-  };
-
-  // implicit instantiation
-  TestClassTemplate<A> a;
-
-  // explicit specialization
-  template<> class TestClassTemplate<B> {
-    int j;
-  };
-
-  // explicit instantiation declaration
-  extern template class TestClassTemplate<C>;
-
-  // explicit instantiation definition
-  template class TestClassTemplate<D>;
-
-  // partial explicit specialization
-  template<typename T1, typename T2> class TestClassTemplatePartial {
-    int i;
-  };
-  template<typename T1> class TestClassTemplatePartial<T1, A> {
-    int j;
-  };
-}
-// CHECK:      ClassTemplateDecl{{.*}} TestClassTemplate
-// CHECK-NEXT:   TemplateTypeParmDecl
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK:          CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:     AccessSpecDecl{{.*}} public
-// CHECK-NEXT:     CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
-// CHECK-NEXT:     CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
-// CHECK-NEXT:     CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
-// CHECK-NEXT:     FieldDecl{{.*}} i
-// CHECK-NEXT:   ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
-// CHECK:          TemplateArgument{{.*}}A
-// CHECK-NEXT:     CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:     AccessSpecDecl{{.*}} public
-// CHECK-NEXT:     CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
-// CHECK-NEXT:     CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
-// CHECK-NEXT:     CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
-// CHECK-NEXT:     FieldDecl{{.*}} i
-// CHECK:        ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
-// CHECK-NEXT:   ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
-// CHECK-NEXT:   ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
-
-// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:   DefinitionData
-// CHECK:        TemplateArgument{{.*}}B
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:   FieldDecl{{.*}} j
-
-// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
-// CHECK:        TemplateArgument{{.*}}C
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:   AccessSpecDecl{{.*}} public
-// CHECK-NEXT:   CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
-// CHECK-NEXT:   CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
-// CHECK-NEXT:   CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
-// CHECK-NEXT:   FieldDecl{{.*}} i
-
-// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
-// CHECK:        TemplateArgument{{.*}}D
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
-// CHECK-NEXT:   AccessSpecDecl{{.*}} public
-// CHECK-NEXT:   CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
-// CHECK-NEXT:   CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
-// CHECK-NEXT:   CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
-// CHECK-NEXT:   FieldDecl{{.*}} i
-
-// CHECK:      ClassTemplatePartialSpecializationDecl{{.*}} class TestClassTemplatePartial
-// CHECK:        TemplateArgument
-// CHECK-NEXT:   TemplateArgument{{.*}}A
-// CHECK-NEXT:   TemplateTypeParmDecl
-// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplatePartial
-// CHECK-NEXT:   FieldDecl{{.*}} j
-
-// PR15220 dump instantiation only once
-namespace testCanonicalTemplate {
-  class A {};
-
-  template<typename T> void TestFunctionTemplate(T);
-  template<typename T> void TestFunctionTemplate(T);
-  void bar(A a) { TestFunctionTemplate(a); }
-  // CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
-  // CHECK-NEXT:   TemplateTypeParmDecl
-  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
-  // CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
-  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}A
-  // CHECK-NEXT:     TemplateArgument
-  // CHECK-NEXT:     ParmVarDecl
-  // CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
-  // CHECK-NEXT:   TemplateTypeParmDecl
-  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
-  // CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
-  // CHECK-NEXT:   Function{{.*}} 'TestFunctionTemplate'
-  // CHECK-NOT:      TemplateArgument
-
-  template<typename T1> class TestClassTemplate {
-    template<typename T2> friend class TestClassTemplate;
-  };
-  TestClassTemplate<A> a;
-  // CHECK:      ClassTemplateDecl{{.*}} TestClassTemplate
-  // CHECK-NEXT:   TemplateTypeParmDecl
-  // CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
-  // CHECK:          CXXRecordDecl{{.*}} class TestClassTemplate
-  // CHECK-NEXT:     FriendDecl
-  // CHECK-NEXT:       ClassTemplateDecl{{.*}} TestClassTemplate
-  // CHECK-NEXT:         TemplateTypeParmDecl
-  // CHECK-NEXT:         CXXRecordDecl{{.*}} class TestClassTemplate
-  // CHECK-NEXT:   ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
-  // CHECK:          TemplateArgument{{.*}}A
-  // CHECK-NEXT:     CXXRecordDecl{{.*}} class TestClassTemplate
-}
-
-template <class T>
-class TestClassScopeFunctionSpecialization {
-  template<class U> void foo(U a) { }
-  template<> void foo<int>(int a) { }
-};
-// CHECK:      ClassScopeFunctionSpecializationDecl
-// CHECK-NEXT:   CXXMethod{{.*}} 'foo' 'void (int)'
-// CHECK-NEXT:   TemplateArgument{{.*}} 'int'
-
-namespace TestTemplateTypeParmDecl {
-  template<typename ... T, class U = int> void foo();
-}
-// CHECK:      NamespaceDecl{{.*}} TestTemplateTypeParmDecl
-// CHECK-NEXT:   FunctionTemplateDecl
-// CHECK-NEXT:     TemplateTypeParmDecl{{.*}} typename depth 0 index 0 ... T
-// CHECK-NEXT:     TemplateTypeParmDecl{{.*}} class depth 0 index 1 U
-// CHECK-NEXT:       TemplateArgument type 'int'
-
-namespace TestNonTypeTemplateParmDecl {
-  template<int I = 1, int ... J> void foo();
-}
-// CHECK:      NamespaceDecl{{.*}} TestNonTypeTemplateParmDecl
-// CHECK-NEXT:   FunctionTemplateDecl
-// CHECK-NEXT:     NonTypeTemplateParmDecl{{.*}} 'int' depth 0 index 0 I
-// CHECK-NEXT:       TemplateArgument expr
-// CHECK-NEXT:         IntegerLiteral{{.*}} 'int' 1
-// CHECK-NEXT:     NonTypeTemplateParmDecl{{.*}} 'int' depth 0 index 1 ... J
-
-namespace TestTemplateTemplateParmDecl {
-  template<typename T> class A;
-  template <template <typename> class T = A, template <typename> class ... U> void foo();
-}
-// CHECK:      NamespaceDecl{{.*}} TestTemplateTemplateParmDecl
-// CHECK:        FunctionTemplateDecl
-// CHECK-NEXT:     TemplateTemplateParmDecl{{.*}} T
-// CHECK-NEXT:       TemplateTypeParmDecl{{.*}} typename
-// CHECK-NEXT:       TemplateArgument{{.*}} template A
-// CHECK-NEXT:     TemplateTemplateParmDecl{{.*}} ... U
-// CHECK-NEXT:       TemplateTypeParmDecl{{.*}} typename
-
-namespace TestTemplateArgument {
-  template<typename> class A { };
-  template<template<typename> class ...> class B { };
-  int foo();
-
-  template<typename> class testType { };
-  template class testType<int>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testType
-  // CHECK:        TemplateArgument{{.*}} type 'int'
-
-  template<int fp(void)> class testDecl { };
-  template class testDecl<foo>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testDecl
-  // CHECK:        TemplateArgument{{.*}} decl
-  // CHECK-NEXT:     Function{{.*}}foo
-
-  template class testDecl<nullptr>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testDecl
-  // CHECK:        TemplateArgument{{.*}} nullptr
-
-  template<int> class testIntegral { };
-  template class testIntegral<1>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testIntegral
-  // CHECK:        TemplateArgument{{.*}} integral 1
-
-  template<template<typename> class> class testTemplate { };
-  template class testTemplate<A>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testTemplate
-  // CHECK:        TemplateArgument{{.*}} A
-
-  template<template<typename> class ...T> class C {
-    B<T...> testTemplateExpansion;
-  };
-  // FIXME: Need TemplateSpecializationType dumping to test TemplateExpansion.
-
-  template<int, int = 0> class testExpr;
-  template<int I> class testExpr<I> { };
-  // CHECK:      ClassTemplatePartialSpecializationDecl{{.*}} class testExpr
-  // CHECK:        TemplateArgument{{.*}} expr
-  // CHECK-NEXT:     DeclRefExpr{{.*}}I
-
-  template<int, int ...> class testPack { };
-  template class testPack<0, 1, 2>;
-  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testPack
-  // CHECK:        TemplateArgument{{.*}} integral 0
-  // CHECK-NEXT:   TemplateArgument{{.*}} pack
-  // CHECK-NEXT:     TemplateArgument{{.*}} integral 1
-  // CHECK-NEXT:     TemplateArgument{{.*}} integral 2
-}
-
-namespace testUsingDecl {
-  int i;
-}
-namespace TestUsingDecl {
-  using testUsingDecl::i;
-}
-// CHECK:      NamespaceDecl{{.*}} TestUsingDecl
-// CHECK-NEXT:   UsingDecl{{.*}} testUsingDecl::i
-// CHECK-NEXT:   UsingShadowDecl{{.*}} Var{{.*}} 'i' 'int'
-
-namespace testUnresolvedUsing {
-  class A { };
-  template<class T> class B {
-  public:
-    A a;
-  };
-  template<class T> class TestUnresolvedUsing : public B<T> {
-    using typename B<T>::a;
-    using B<T>::a;
-  };
-}
-// CHECK: CXXRecordDecl{{.*}} TestUnresolvedUsing
-// CHECK:   UnresolvedUsingTypenameDecl{{.*}} B<T>::a
-// CHECK:   UnresolvedUsingValueDecl{{.*}} B<T>::a
-
-namespace TestLinkageSpecDecl {
-  extern "C" void test1();
-  extern "C++" void test2();
-}
-// CHECK:      NamespaceDecl{{.*}} TestLinkageSpecDecl
-// CHECK-NEXT:   LinkageSpecDecl{{.*}} C
-// CHECK-NEXT:     FunctionDecl
-// CHECK-NEXT:   LinkageSpecDecl{{.*}} C++
-// CHECK-NEXT:     FunctionDecl
-
-class TestAccessSpecDecl {
-public:
-private:
-protected:
-};
-// CHECK:      CXXRecordDecl{{.*}} class TestAccessSpecDecl
-// CHECK:         CXXRecordDecl{{.*}} class TestAccessSpecDecl
-// CHECK-NEXT:    AccessSpecDecl{{.*}} public
-// CHECK-NEXT:    AccessSpecDecl{{.*}} private
-// CHECK-NEXT:    AccessSpecDecl{{.*}} protected
-
-template<typename T> class TestFriendDecl {
-  friend int foo();
-  friend class A;
-  friend T;
-};
-// CHECK:      CXXRecord{{.*}} TestFriendDecl
-// CHECK:        CXXRecord{{.*}} TestFriendDecl
-// CHECK-NEXT:   FriendDecl
-// CHECK-NEXT:     FunctionDecl{{.*}} foo
-// CHECK-NEXT:   FriendDecl{{.*}} 'class A':'class A'
-// CHECK-NEXT:   FriendDecl{{.*}} 'T'
-
-namespace TestFileScopeAsmDecl {
-  asm("ret");
-}
-// CHECK:      NamespaceDecl{{.*}} TestFileScopeAsmDecl{{$}}
-// CHECK:        FileScopeAsmDecl{{.*> .*$}}
-// CHECK-NEXT:     StringLiteral
-
-namespace TestFriendDecl2 {
-  void f();
-  struct S {
-    friend void f();
-  };
-}
-// CHECK: NamespaceDecl [[TestFriendDecl2:0x.*]] <{{.*}}> {{.*}} TestFriendDecl2
-// CHECK: |-FunctionDecl [[TestFriendDecl2_f:0x.*]] <{{.*}}> {{.*}} f 'void (void)'
-// CHECK: `-CXXRecordDecl {{.*}} struct S
-// CHECK:   |-CXXRecordDecl {{.*}} struct S
-// CHECK:   `-FriendDecl
-// CHECK:     `-FunctionDecl {{.*}} parent [[TestFriendDecl2]] prev [[TestFriendDecl2_f]] <{{.*}}> {{.*}} f 'void (void)'
-
-namespace Comment {
-  extern int Test;
-  /// Something here.
-  extern int Test;
-  extern int Test;
-}
-
-// CHECK: VarDecl {{.*}} Test 'int' extern
-// CHECK-NOT: FullComment
-// CHECK: VarDecl {{.*}} Test 'int' extern
-// CHECK: `-FullComment
-// CHECK:   `-ParagraphComment
-// CHECK:       `-TextComment
-// CHECK: VarDecl {{.*}} Test 'int' extern
-// CHECK-NOT: FullComment
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -fms-extensions -ast-dump -ast-dump-filter Test %s | FileCheck -strict-whitespace %s
+
+class testEnumDecl {
+  enum class TestEnumDeclScoped;
+  enum TestEnumDeclFixed : int;
+};
+// CHECK: EnumDecl{{.*}} class TestEnumDeclScoped 'int'
+// CHECK: EnumDecl{{.*}} TestEnumDeclFixed 'int'
+
+class testFieldDecl {
+  int TestFieldDeclInit = 0;
+};
+// CHECK:      FieldDecl{{.*}} TestFieldDeclInit 'int'
+// CHECK-NEXT:   IntegerLiteral
+
+namespace testVarDeclNRVO {
+  class A { };
+  A foo() {
+    A TestVarDeclNRVO;
+    return TestVarDeclNRVO;
+  }
+}
+// CHECK: VarDecl{{.*}} TestVarDeclNRVO 'testVarDeclNRVO::A' nrvo
+
+void testParmVarDeclInit(int TestParmVarDeclInit = 0);
+// CHECK:      ParmVarDecl{{.*}} TestParmVarDeclInit 'int'
+// CHECK-NEXT:   IntegerLiteral{{.*}}
+
+namespace TestNamespaceDecl {
+  int i;
+}
+// CHECK:      NamespaceDecl{{.*}} TestNamespaceDecl
+// CHECK-NEXT:   VarDecl
+
+namespace TestNamespaceDecl {
+  int j;
+}
+// CHECK:      NamespaceDecl{{.*}} TestNamespaceDecl
+// CHECK-NEXT:   original Namespace
+// CHECK-NEXT:   VarDecl
+
+inline namespace TestNamespaceDeclInline {
+}
+// CHECK:      NamespaceDecl{{.*}} TestNamespaceDeclInline inline
+
+namespace testUsingDirectiveDecl {
+  namespace A {
+  }
+}
+namespace TestUsingDirectiveDecl {
+  using namespace testUsingDirectiveDecl::A;
+}
+// CHECK:      NamespaceDecl{{.*}} TestUsingDirectiveDecl
+// CHECK-NEXT:   UsingDirectiveDecl{{.*}} Namespace{{.*}} 'A'
+
+namespace testNamespaceAlias {
+  namespace A {
+  }
+}
+namespace TestNamespaceAlias = testNamespaceAlias::A;
+// CHECK:      NamespaceAliasDecl{{.*}} TestNamespaceAlias
+// CHECK-NEXT:   Namespace{{.*}} 'A'
+
+using TestTypeAliasDecl = int;
+// CHECK: TypeAliasDecl{{.*}} TestTypeAliasDecl 'int'
+
+namespace testTypeAliasTemplateDecl {
+  template<typename T> class A;
+  template<typename T> using TestTypeAliasTemplateDecl = A<T>;
+}
+// CHECK:      TypeAliasTemplateDecl{{.*}} TestTypeAliasTemplateDecl
+// CHECK-NEXT:   TemplateTypeParmDecl
+// CHECK-NEXT:   TypeAliasDecl{{.*}} TestTypeAliasTemplateDecl 'A<T>'
+
+namespace testCXXRecordDecl {
+  class TestEmpty {};
+// CHECK:      CXXRecordDecl{{.*}} class TestEmpty
+// CHECK-NEXT:   DefinitionData pass_in_registers empty aggregate standard_layout trivially_copyable pod trivial literal has_constexpr_non_copy_move_ctor can_const_default_init
+// CHECK-NEXT:     DefaultConstructor exists trivial constexpr
+// CHECK-NEXT:     CopyConstructor simple trivial has_const_param
+// CHECK-NEXT:     MoveConstructor exists simple trivial
+// CHECK-NEXT:     CopyAssignment trivial has_const_param
+// CHECK-NEXT:     MoveAssignment exists simple trivial
+// CHECK-NEXT:     Destructor simple irrelevant trivial
+
+  class A { };
+  class B { };
+  class TestCXXRecordDecl : virtual A, public B {
+    int i;
+  };
+}
+// CHECK:      CXXRecordDecl{{.*}} class TestCXXRecordDecl
+// CHECK-NEXT:   DefinitionData{{$}}
+// CHECK-NEXT:     DefaultConstructor exists non_trivial
+// CHECK-NEXT:     CopyConstructor simple non_trivial has_const_param
+// CHECK-NEXT:     MoveConstructor exists simple non_trivial
+// CHECK-NEXT:     CopyAssignment non_trivial has_const_param
+// CHECK-NEXT:     MoveAssignment exists simple non_trivial
+// CHECK-NEXT:     Destructor simple irrelevant trivial
+// CHECK-NEXT:   virtual private 'testCXXRecordDecl::A'
+// CHECK-NEXT:   public 'testCXXRecordDecl::B'
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestCXXRecordDecl
+// CHECK-NEXT:   FieldDecl
+
+template<class...T>
+class TestCXXRecordDeclPack : public T... {
+};
+// CHECK:      CXXRecordDecl{{.*}} class TestCXXRecordDeclPack
+// CHECK:        public 'T'...
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestCXXRecordDeclPack
+
+thread_local int TestThreadLocalInt;
+// CHECK: TestThreadLocalInt {{.*}} tls_dynamic
+
+class testCXXMethodDecl {
+  virtual void TestCXXMethodDeclPure() = 0;
+  void TestCXXMethodDeclDelete() = delete;
+  void TestCXXMethodDeclThrow() throw();
+  void TestCXXMethodDeclThrowType() throw(int);
+};
+// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclPure 'void ()' virtual pure
+// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclDelete 'void ()' delete
+// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclThrow 'void () throw()'
+// CHECK: CXXMethodDecl{{.*}} TestCXXMethodDeclThrowType 'void () throw(int)'
+
+namespace testCXXConstructorDecl {
+  class A { };
+  class TestCXXConstructorDecl : public A {
+    int I;
+    TestCXXConstructorDecl(A &a, int i) : A(a), I(i) { }
+    TestCXXConstructorDecl(A &a) : TestCXXConstructorDecl(a, 0) { }
+  };
+}
+// CHECK:      CXXConstructorDecl{{.*}} TestCXXConstructorDecl 'void {{.*}}'
+// CHECK-NEXT:   ParmVarDecl{{.*}} a
+// CHECK-NEXT:   ParmVarDecl{{.*}} i
+// CHECK-NEXT:   CXXCtorInitializer{{.*}}A
+// CHECK-NEXT:     Expr
+// CHECK:        CXXCtorInitializer{{.*}}I
+// CHECK-NEXT:     Expr
+// CHECK:        CompoundStmt
+// CHECK:      CXXConstructorDecl{{.*}} TestCXXConstructorDecl 'void {{.*}}'
+// CHECK-NEXT:   ParmVarDecl{{.*}} a
+// CHECK-NEXT:   CXXCtorInitializer{{.*}}TestCXXConstructorDecl
+// CHECK-NEXT:     CXXConstructExpr{{.*}}TestCXXConstructorDecl
+
+class TestCXXDestructorDecl {
+  ~TestCXXDestructorDecl() { }
+};
+// CHECK:      CXXDestructorDecl{{.*}} ~TestCXXDestructorDecl 'void () noexcept'
+// CHECK-NEXT:   CompoundStmt
+
+// Test that the range of a defaulted members is computed correctly.
+class TestMemberRanges {
+public:
+  TestMemberRanges() = default;
+  TestMemberRanges(const TestMemberRanges &Other) = default;
+  TestMemberRanges(TestMemberRanges &&Other) = default;
+  ~TestMemberRanges() = default;
+  TestMemberRanges &operator=(const TestMemberRanges &Other) = default;
+  TestMemberRanges &operator=(TestMemberRanges &&Other) = default;
+};
+void SomeFunction() {
+  TestMemberRanges A;
+  TestMemberRanges B(A);
+  B = A;
+  A = static_cast<TestMemberRanges &&>(B);
+  TestMemberRanges C(static_cast<TestMemberRanges &&>(A));
+}
+// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:30>
+// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:59>
+// CHECK:      CXXConstructorDecl{{.*}} <line:{{.*}}:3, col:54>
+// CHECK:      CXXDestructorDecl{{.*}} <line:{{.*}}:3, col:31>
+// CHECK:      CXXMethodDecl{{.*}} <line:{{.*}}:3, col:70>
+// CHECK:      CXXMethodDecl{{.*}} <line:{{.*}}:3, col:65>
+
+class TestCXXConversionDecl {
+  operator int() { return 0; }
+};
+// CHECK:      CXXConversionDecl{{.*}} operator int 'int ()'
+// CHECK-NEXT:   CompoundStmt
+
+namespace TestStaticAssertDecl {
+  static_assert(true, "msg");
+}
+// CHECK:      NamespaceDecl{{.*}} TestStaticAssertDecl
+// CHECK-NEXT:   StaticAssertDecl{{.*> .*$}}
+// CHECK-NEXT:     CXXBoolLiteralExpr
+// CHECK-NEXT:     StringLiteral
+
+namespace testFunctionTemplateDecl {
+  class A { };
+  class B { };
+  class C { };
+  class D { };
+  template<typename T> void TestFunctionTemplate(T) { }
+
+  // implicit instantiation
+  void bar(A a) { TestFunctionTemplate(a); }
+
+  // explicit specialization
+  template<> void TestFunctionTemplate(B);
+
+  // explicit instantiation declaration
+  extern template void TestFunctionTemplate(C);
+
+  // explicit instantiation definition
+  template void TestFunctionTemplate(D);
+}
+// CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
+// CHECK-NEXT:   TemplateTypeParmDecl
+// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
+// CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
+// CHECK-NEXT:     CompoundStmt
+// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}A
+// CHECK-NEXT:     TemplateArgument
+// CHECK-NEXT:     ParmVarDecl
+// CHECK-NEXT:     CompoundStmt
+// CHECK-NEXT:   Function{{.*}} 'TestFunctionTemplate' {{.*}}B
+// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}C
+// CHECK-NEXT:     TemplateArgument
+// CHECK-NEXT:     ParmVarDecl
+// CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}D
+// CHECK-NEXT:     TemplateArgument
+// CHECK-NEXT:     ParmVarDecl
+// CHECK-NEXT:     CompoundStmt
+// CHECK:      FunctionDecl{{.*}} TestFunctionTemplate {{.*}}B
+// CHECK-NEXT:   TemplateArgument
+// CHECK-NEXT:   ParmVarDecl
+
+namespace testClassTemplateDecl {
+  class A { };
+  class B { };
+  class C { };
+  class D { };
+
+  template<typename T> class TestClassTemplate {
+  public:
+    TestClassTemplate();
+    ~TestClassTemplate();
+    int j();
+    int i;
+  };
+
+  // implicit instantiation
+  TestClassTemplate<A> a;
+
+  // explicit specialization
+  template<> class TestClassTemplate<B> {
+    int j;
+  };
+
+  // explicit instantiation declaration
+  extern template class TestClassTemplate<C>;
+
+  // explicit instantiation definition
+  template class TestClassTemplate<D>;
+
+  // partial explicit specialization
+  template<typename T1, typename T2> class TestClassTemplatePartial {
+    int i;
+  };
+  template<typename T1> class TestClassTemplatePartial<T1, A> {
+    int j;
+  };
+}
+// CHECK:      ClassTemplateDecl{{.*}} TestClassTemplate
+// CHECK-NEXT:   TemplateTypeParmDecl
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK:          CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:     AccessSpecDecl{{.*}} public
+// CHECK-NEXT:     CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
+// CHECK-NEXT:     CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
+// CHECK-NEXT:     CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
+// CHECK-NEXT:     FieldDecl{{.*}} i
+// CHECK-NEXT:   ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
+// CHECK:          TemplateArgument{{.*}}A
+// CHECK-NEXT:     CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:     AccessSpecDecl{{.*}} public
+// CHECK-NEXT:     CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
+// CHECK-NEXT:     CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
+// CHECK-NEXT:     CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
+// CHECK-NEXT:     FieldDecl{{.*}} i
+// CHECK:        ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
+// CHECK-NEXT:   ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
+// CHECK-NEXT:   ClassTemplateSpecialization{{.*}} 'TestClassTemplate'
+
+// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:   DefinitionData
+// CHECK:        TemplateArgument{{.*}}B
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:   FieldDecl{{.*}} j
+
+// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
+// CHECK:        TemplateArgument{{.*}}C
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:   AccessSpecDecl{{.*}} public
+// CHECK-NEXT:   CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
+// CHECK-NEXT:   CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
+// CHECK-NEXT:   CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
+// CHECK-NEXT:   FieldDecl{{.*}} i
+
+// CHECK:      ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
+// CHECK:        TemplateArgument{{.*}}D
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
+// CHECK-NEXT:   AccessSpecDecl{{.*}} public
+// CHECK-NEXT:   CXXConstructorDecl{{.*}} <line:{{.*}}:5, col:23>
+// CHECK-NEXT:   CXXDestructorDecl{{.*}} <line:{{.*}}:5, col:24>
+// CHECK-NEXT:   CXXMethodDecl{{.*}} <line:{{.*}}:5, col:11>
+// CHECK-NEXT:   FieldDecl{{.*}} i
+
+// CHECK:      ClassTemplatePartialSpecializationDecl{{.*}} class TestClassTemplatePartial
+// CHECK:        TemplateArgument
+// CHECK-NEXT:   TemplateArgument{{.*}}A
+// CHECK-NEXT:   TemplateTypeParmDecl
+// CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplatePartial
+// CHECK-NEXT:   FieldDecl{{.*}} j
+
+// PR15220 dump instantiation only once
+namespace testCanonicalTemplate {
+  class A {};
+
+  template<typename T> void TestFunctionTemplate(T);
+  template<typename T> void TestFunctionTemplate(T);
+  void bar(A a) { TestFunctionTemplate(a); }
+  // CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
+  // CHECK-NEXT:   TemplateTypeParmDecl
+  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
+  // CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
+  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate {{.*}}A
+  // CHECK-NEXT:     TemplateArgument
+  // CHECK-NEXT:     ParmVarDecl
+  // CHECK:      FunctionTemplateDecl{{.*}} TestFunctionTemplate
+  // CHECK-NEXT:   TemplateTypeParmDecl
+  // CHECK-NEXT:   FunctionDecl{{.*}} TestFunctionTemplate 'void (T)'
+  // CHECK-NEXT:     ParmVarDecl{{.*}} 'T'
+  // CHECK-NEXT:   Function{{.*}} 'TestFunctionTemplate'
+  // CHECK-NOT:      TemplateArgument
+
+  template<typename T1> class TestClassTemplate {
+    template<typename T2> friend class TestClassTemplate;
+  };
+  TestClassTemplate<A> a;
+  // CHECK:      ClassTemplateDecl{{.*}} TestClassTemplate
+  // CHECK-NEXT:   TemplateTypeParmDecl
+  // CHECK-NEXT:   CXXRecordDecl{{.*}} class TestClassTemplate
+  // CHECK:          CXXRecordDecl{{.*}} class TestClassTemplate
+  // CHECK-NEXT:     FriendDecl
+  // CHECK-NEXT:       ClassTemplateDecl{{.*}} TestClassTemplate
+  // CHECK-NEXT:         TemplateTypeParmDecl
+  // CHECK-NEXT:         CXXRecordDecl{{.*}} class TestClassTemplate
+  // CHECK-NEXT:   ClassTemplateSpecializationDecl{{.*}} class TestClassTemplate
+  // CHECK:          TemplateArgument{{.*}}A
+  // CHECK-NEXT:     CXXRecordDecl{{.*}} class TestClassTemplate
+}
+
+template <class T>
+class TestClassScopeFunctionSpecialization {
+  template<class U> void foo(U a) { }
+  template<> void foo<int>(int a) { }
+};
+// CHECK:      ClassScopeFunctionSpecializationDecl
+// CHECK-NEXT:   CXXMethod{{.*}} 'foo' 'void (int)'
+// CHECK-NEXT:   TemplateArgument{{.*}} 'int'
+
+namespace TestTemplateTypeParmDecl {
+  template<typename ... T, class U = int> void foo();
+}
+// CHECK:      NamespaceDecl{{.*}} TestTemplateTypeParmDecl
+// CHECK-NEXT:   FunctionTemplateDecl
+// CHECK-NEXT:     TemplateTypeParmDecl{{.*}} typename depth 0 index 0 ... T
+// CHECK-NEXT:     TemplateTypeParmDecl{{.*}} class depth 0 index 1 U
+// CHECK-NEXT:       TemplateArgument type 'int'
+
+namespace TestNonTypeTemplateParmDecl {
+  template<int I = 1, int ... J> void foo();
+}
+// CHECK:      NamespaceDecl{{.*}} TestNonTypeTemplateParmDecl
+// CHECK-NEXT:   FunctionTemplateDecl
+// CHECK-NEXT:     NonTypeTemplateParmDecl{{.*}} 'int' depth 0 index 0 I
+// CHECK-NEXT:       TemplateArgument expr
+// CHECK-NEXT:         IntegerLiteral{{.*}} 'int' 1
+// CHECK-NEXT:     NonTypeTemplateParmDecl{{.*}} 'int' depth 0 index 1 ... J
+
+namespace TestTemplateTemplateParmDecl {
+  template<typename T> class A;
+  template <template <typename> class T = A, template <typename> class ... U> void foo();
+}
+// CHECK:      NamespaceDecl{{.*}} TestTemplateTemplateParmDecl
+// CHECK:        FunctionTemplateDecl
+// CHECK-NEXT:     TemplateTemplateParmDecl{{.*}} T
+// CHECK-NEXT:       TemplateTypeParmDecl{{.*}} typename
+// CHECK-NEXT:       TemplateArgument{{.*}} template A
+// CHECK-NEXT:     TemplateTemplateParmDecl{{.*}} ... U
+// CHECK-NEXT:       TemplateTypeParmDecl{{.*}} typename
+
+namespace TestTemplateArgument {
+  template<typename> class A { };
+  template<template<typename> class ...> class B { };
+  int foo();
+
+  template<typename> class testType { };
+  template class testType<int>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testType
+  // CHECK:        TemplateArgument{{.*}} type 'int'
+
+  template<int fp(void)> class testDecl { };
+  template class testDecl<foo>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testDecl
+  // CHECK:        TemplateArgument{{.*}} decl
+  // CHECK-NEXT:     Function{{.*}}foo
+
+  template class testDecl<nullptr>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testDecl
+  // CHECK:        TemplateArgument{{.*}} nullptr
+
+  template<int> class testIntegral { };
+  template class testIntegral<1>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testIntegral
+  // CHECK:        TemplateArgument{{.*}} integral 1
+
+  template<template<typename> class> class testTemplate { };
+  template class testTemplate<A>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testTemplate
+  // CHECK:        TemplateArgument{{.*}} A
+
+  template<template<typename> class ...T> class C {
+    B<T...> testTemplateExpansion;
+  };
+  // FIXME: Need TemplateSpecializationType dumping to test TemplateExpansion.
+
+  template<int, int = 0> class testExpr;
+  template<int I> class testExpr<I> { };
+  // CHECK:      ClassTemplatePartialSpecializationDecl{{.*}} class testExpr
+  // CHECK:        TemplateArgument{{.*}} expr
+  // CHECK-NEXT:     DeclRefExpr{{.*}}I
+
+  template<int, int ...> class testPack { };
+  template class testPack<0, 1, 2>;
+  // CHECK:      ClassTemplateSpecializationDecl{{.*}} class testPack
+  // CHECK:        TemplateArgument{{.*}} integral 0
+  // CHECK-NEXT:   TemplateArgument{{.*}} pack
+  // CHECK-NEXT:     TemplateArgument{{.*}} integral 1
+  // CHECK-NEXT:     TemplateArgument{{.*}} integral 2
+}
+
+namespace testUsingDecl {
+  int i;
+}
+namespace TestUsingDecl {
+  using testUsingDecl::i;
+}
+// CHECK:      NamespaceDecl{{.*}} TestUsingDecl
+// CHECK-NEXT:   UsingDecl{{.*}} testUsingDecl::i
+// CHECK-NEXT:   UsingShadowDecl{{.*}} Var{{.*}} 'i' 'int'
+
+namespace testUnresolvedUsing {
+  class A { };
+  template<class T> class B {
+  public:
+    A a;
+  };
+  template<class T> class TestUnresolvedUsing : public B<T> {
+    using typename B<T>::a;
+    using B<T>::a;
+  };
+}
+// CHECK: CXXRecordDecl{{.*}} TestUnresolvedUsing
+// CHECK:   UnresolvedUsingTypenameDecl{{.*}} B<T>::a
+// CHECK:   UnresolvedUsingValueDecl{{.*}} B<T>::a
+
+namespace TestLinkageSpecDecl {
+  extern "C" void test1();
+  extern "C++" void test2();
+}
+// CHECK:      NamespaceDecl{{.*}} TestLinkageSpecDecl
+// CHECK-NEXT:   LinkageSpecDecl{{.*}} C
+// CHECK-NEXT:     FunctionDecl
+// CHECK-NEXT:   LinkageSpecDecl{{.*}} C++
+// CHECK-NEXT:     FunctionDecl
+
+class TestAccessSpecDecl {
+public:
+private:
+protected:
+};
+// CHECK:      CXXRecordDecl{{.*}} class TestAccessSpecDecl
+// CHECK:         CXXRecordDecl{{.*}} class TestAccessSpecDecl
+// CHECK-NEXT:    AccessSpecDecl{{.*}} public
+// CHECK-NEXT:    AccessSpecDecl{{.*}} private
+// CHECK-NEXT:    AccessSpecDecl{{.*}} protected
+
+template<typename T> class TestFriendDecl {
+  friend int foo();
+  friend class A;
+  friend T;
+};
+// CHECK:      CXXRecord{{.*}} TestFriendDecl
+// CHECK:        CXXRecord{{.*}} TestFriendDecl
+// CHECK-NEXT:   FriendDecl
+// CHECK-NEXT:     FunctionDecl{{.*}} foo
+// CHECK-NEXT:   FriendDecl{{.*}} 'class A':'A'
+// CHECK-NEXT:   FriendDecl{{.*}} 'T'
+
+namespace TestFileScopeAsmDecl {
+  asm("ret");
+}
+// CHECK:      NamespaceDecl{{.*}} TestFileScopeAsmDecl{{$}}
+// CHECK:        FileScopeAsmDecl{{.*> .*$}}
+// CHECK-NEXT:     StringLiteral
+
+namespace TestFriendDecl2 {
+  void f();
+  struct S {
+    friend void f();
+  };
+}
+// CHECK: NamespaceDecl [[TestFriendDecl2:0x.*]] <{{.*}}> {{.*}} TestFriendDecl2
+// CHECK: |-FunctionDecl [[TestFriendDecl2_f:0x.*]] <{{.*}}> {{.*}} f 'void ()'
+// CHECK: `-CXXRecordDecl {{.*}} struct S
+// CHECK:   |-CXXRecordDecl {{.*}} struct S
+// CHECK:   `-FriendDecl
+// CHECK:     `-FunctionDecl {{.*}} parent [[TestFriendDecl2]] prev [[TestFriendDecl2_f]] <{{.*}}> {{.*}} f 'void ()'
+
+namespace Comment {
+  extern int Test;
+  /// Something here.
+  extern int Test;
+  extern int Test;
+}
+
+// CHECK: VarDecl {{.*}} Test 'int' extern
+// CHECK-NOT: FullComment
+// CHECK: VarDecl {{.*}} Test 'int' extern
+// CHECK: `-FullComment
+// CHECK:   `-ParagraphComment
+// CHECK:       `-TextComment
+// CHECK: VarDecl {{.*}} Test 'int' extern
+// CHECK-NOT: FullComment
diff --git a/test/Misc/ast-dump-invalid.cpp b/test/Misc/ast-dump-invalid.cpp
index 842b057..3cc8f8b 100644
--- a/test/Misc/ast-dump-invalid.cpp
+++ b/test/Misc/ast-dump-invalid.cpp
@@ -1,64 +1,64 @@
-// RUN: not %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -fms-extensions -ast-dump -ast-dump-filter Test %s | FileCheck -check-prefix CHECK -strict-whitespace %s
-
-namespace TestInvalidRParenOnCXXUnresolvedConstructExpr {
-template <class T>
-void f(T i, T j) {
-  return T (i, j;
-}
-}
-
-// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidRParenOnCXXUnresolvedConstructExpr
-// CHECK-NEXT: `-FunctionTemplateDecl
-// CHECK-NEXT:   |-TemplateTypeParmDecl
-// CHECK-NEXT:   `-FunctionDecl
-// CHECK-NEXT:     |-ParmVarDecl
-// CHECK-NEXT:     |-ParmVarDecl
-// CHECK-NEXT:     `-CompoundStmt
-// CHECK-NEXT:       `-ReturnStmt
-// CHECK-NEXT:         `-CXXUnresolvedConstructExpr {{.*}} <col:10, col:16> 'T'
-// CHECK-NEXT:           |-DeclRefExpr {{.*}} <col:13> 'T' lvalue ParmVar {{.*}} 'i' 'T'
-// CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:16> 'T' lvalue ParmVar {{.*}} 'j' 'T'
-
-
-namespace TestInvalidIf {
-int g(int i) {
-  if (invalid_condition)
-    return 4;
-  else
-    return i;
-}
-}
-// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidIf
-// CHECK-NEXT: `-FunctionDecl
-// CHECK-NEXT:   |-ParmVarDecl
-// CHECK-NEXT:   `-CompoundStmt
-// CHECK-NEXT:     `-IfStmt {{.*}} <line:25:3, line:28:12>
-// CHECK-NEXT:       |-<<<NULL>>>
-// CHECK-NEXT:       |-<<<NULL>>>
-// CHECK-NEXT:       |-OpaqueValueExpr {{.*}} <<invalid sloc>> '_Bool'
-// CHECK-NEXT:       |-ReturnStmt {{.*}} <line:26:5, col:12>
-// CHECK-NEXT:       | `-IntegerLiteral {{.*}} <col:12> 'int' 4
-// CHECK-NEXT:       `-ReturnStmt {{.*}} <line:28:5, col:12>
-// CHECK-NEXT:         `-ImplicitCastExpr {{.*}} <col:12> 'int' <LValueToRValue>
-// CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:12> 'int' lvalue ParmVar {{.*}} 'i' 'int'
-
-namespace TestInvalidFunctionDecl {
-struct Str {
-   double foo1(double, invalid_type);
-};
-double Str::foo1(double, invalid_type)
-{ return 45; }
-}
-// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidFunctionDecl
-// CHECK-NEXT: |-CXXRecordDecl {{.*}} <line:46:1, line:48:1> line:46:8 struct Str definition
-// CHECK:      | |-CXXRecordDecl {{.*}} <col:1, col:8> col:8 implicit struct Str
-// CHECK-NEXT: | `-CXXMethodDecl {{.*}} <line:47:4, col:36> col:11 invalid foo1 'double (double, int)'
-// CHECK-NEXT: |   |-ParmVarDecl {{.*}} <col:16> col:22 'double'
-// CHECK-NEXT: |   `-ParmVarDecl {{.*}} <col:24, <invalid sloc>> col:36 invalid 'int'
-// CHECK-NEXT: `-CXXMethodDecl {{.*}} parent {{.*}} <line:49:1, line:50:14> line:49:13 invalid foo1 'double (double, int)'
-// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:18> col:24 'double'
-// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:26, <invalid sloc>> col:38 invalid 'int'
-// CHECK-NEXT:   `-CompoundStmt {{.*}} <line:50:1, col:14>
-// CHECK-NEXT:     `-ReturnStmt {{.*}} <col:3, col:10>
-// CHECK-NEXT:       `-ImplicitCastExpr {{.*}} <col:10> 'double' <IntegralToFloating>
-// CHECK-NEXT:         `-IntegerLiteral {{.*}} <col:10> 'int' 45
+// RUN: not %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -fms-extensions -ast-dump -ast-dump-filter Test %s | FileCheck -check-prefix CHECK -strict-whitespace %s
+
+namespace TestInvalidRParenOnCXXUnresolvedConstructExpr {
+template <class T>
+void f(T i, T j) {
+  return T (i, j;
+}
+}
+
+// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidRParenOnCXXUnresolvedConstructExpr
+// CHECK-NEXT: `-FunctionTemplateDecl
+// CHECK-NEXT:   |-TemplateTypeParmDecl
+// CHECK-NEXT:   `-FunctionDecl
+// CHECK-NEXT:     |-ParmVarDecl
+// CHECK-NEXT:     |-ParmVarDecl
+// CHECK-NEXT:     `-CompoundStmt
+// CHECK-NEXT:       `-ReturnStmt
+// CHECK-NEXT:         `-CXXUnresolvedConstructExpr {{.*}} <col:10, col:16> 'T'
+// CHECK-NEXT:           |-DeclRefExpr {{.*}} <col:13> 'T' lvalue ParmVar {{.*}} 'i' 'T'
+// CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:16> 'T' lvalue ParmVar {{.*}} 'j' 'T'
+
+
+namespace TestInvalidIf {
+int g(int i) {
+  if (invalid_condition)
+    return 4;
+  else
+    return i;
+}
+}
+// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidIf
+// CHECK-NEXT: `-FunctionDecl
+// CHECK-NEXT:   |-ParmVarDecl
+// CHECK-NEXT:   `-CompoundStmt
+// CHECK-NEXT:     `-IfStmt {{.*}} <line:25:3, line:28:12>
+// CHECK-NEXT:       |-<<<NULL>>>
+// CHECK-NEXT:       |-<<<NULL>>>
+// CHECK-NEXT:       |-OpaqueValueExpr {{.*}} <<invalid sloc>> 'bool'
+// CHECK-NEXT:       |-ReturnStmt {{.*}} <line:26:5, col:12>
+// CHECK-NEXT:       | `-IntegerLiteral {{.*}} <col:12> 'int' 4
+// CHECK-NEXT:       `-ReturnStmt {{.*}} <line:28:5, col:12>
+// CHECK-NEXT:         `-ImplicitCastExpr {{.*}} <col:12> 'int' <LValueToRValue>
+// CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:12> 'int' lvalue ParmVar {{.*}} 'i' 'int'
+
+namespace TestInvalidFunctionDecl {
+struct Str {
+   double foo1(double, invalid_type);
+};
+double Str::foo1(double, invalid_type)
+{ return 45; }
+}
+// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidFunctionDecl
+// CHECK-NEXT: |-CXXRecordDecl {{.*}} <line:46:1, line:48:1> line:46:8 struct Str definition
+// CHECK:      | |-CXXRecordDecl {{.*}} <col:1, col:8> col:8 implicit struct Str
+// CHECK-NEXT: | `-CXXMethodDecl {{.*}} <line:47:4, col:36> col:11 invalid foo1 'double (double, int)'
+// CHECK-NEXT: |   |-ParmVarDecl {{.*}} <col:16> col:22 'double'
+// CHECK-NEXT: |   `-ParmVarDecl {{.*}} <col:24, <invalid sloc>> col:36 invalid 'int'
+// CHECK-NEXT: `-CXXMethodDecl {{.*}} parent {{.*}} <line:49:1, line:50:14> line:49:13 invalid foo1 'double (double, int)'
+// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:18> col:24 'double'
+// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:26, <invalid sloc>> col:38 invalid 'int'
+// CHECK-NEXT:   `-CompoundStmt {{.*}} <line:50:1, col:14>
+// CHECK-NEXT:     `-ReturnStmt {{.*}} <col:3, col:10>
+// CHECK-NEXT:       `-ImplicitCastExpr {{.*}} <col:10> 'double' <IntegralToFloating>
+// CHECK-NEXT:         `-IntegerLiteral {{.*}} <col:10> 'int' 45
diff --git a/test/Misc/ast-dump-stmt.c b/test/Misc/ast-dump-stmt.c
index 1f21cf0..4618931 100644
--- a/test/Misc/ast-dump-stmt.c
+++ b/test/Misc/ast-dump-stmt.c
@@ -30,6 +30,38 @@
 // CHECK-NEXT:   IntegerLiteral
 // CHECK-NEXT:   OpaqueValueExpr
 // CHECK-NEXT:     IntegerLiteral
-// CHECK-NEXT:   OpaqueValueExpr
-// CHECK-NEXT:     IntegerLiteral
-// CHECK-NEXT:   IntegerLiteral
+// CHECK-NEXT:   OpaqueValueExpr
+// CHECK-NEXT:     IntegerLiteral
+// CHECK-NEXT:   IntegerLiteral
+
+void TestUnaryOperatorExpr(void) {
+  char T1 = 1;
+  int T2 = 1;
+
+  T1++;
+  T2++;
+  // CHECK:      UnaryOperator{{.*}}postfix '++' cannot overflow
+  // CHECK-NEXT:   DeclRefExpr{{.*}}'T1' 'char'
+  // CHECK-NOT:  UnaryOperator{{.*}}postfix '++' cannot overflow
+  // CHECK:        DeclRefExpr{{.*}}'T2' 'int'
+
+  -T1;
+  -T2;
+  // CHECK:      UnaryOperator{{.*}}prefix '-' cannot overflow
+  // CHECK-NEXT:   ImplicitCastExpr
+  // CHECK-NEXT:     ImplicitCastExpr
+  // CHECK-NEXT:       DeclRefExpr{{.*}}'T1' 'char'
+  // CHECK-NOT:  UnaryOperator{{.*}}prefix '-' cannot overflow
+  // CHECK:        ImplicitCastExpr
+  // CHECK:          DeclRefExpr{{.*}}'T2' 'int'
+
+  ~T1;
+  ~T2;
+  // CHECK:      UnaryOperator{{.*}}prefix '~' cannot overflow
+  // CHECK-NEXT:   ImplicitCastExpr
+  // CHECK-NEXT:     ImplicitCastExpr
+  // CHECK-NEXT:       DeclRefExpr{{.*}}'T1' 'char'
+  // CHECK:  	 UnaryOperator{{.*}}prefix '~' cannot overflow
+  // CHECK-NEXT:     ImplicitCastExpr
+  // CHECK-NEXT:       DeclRefExpr{{.*}}'T2' 'int'
+}
diff --git a/test/Misc/ast-print-pragmas.cpp b/test/Misc/ast-print-pragmas.cpp
index 5840c1a..f1a7b24 100644
--- a/test/Misc/ast-print-pragmas.cpp
+++ b/test/Misc/ast-print-pragmas.cpp
@@ -4,7 +4,7 @@
 // FIXME: A bug in ParsedAttributes causes the order of the attributes to be
 // reversed. The checks are consequently in the reverse order below.
 
-// CHECK: #pragma clang loop interleave_count(8)
+// CHECK: #pragma clang loop interleave_count(8){{$}}
 // CHECK-NEXT: #pragma clang loop vectorize_width(4)
 
 void test(int *List, int Length) {
@@ -61,7 +61,7 @@
 
 #ifdef MS_EXT
 #pragma init_seg(compiler)
-// MS-EXT: #pragma init_seg (.CRT$XCC)
+// MS-EXT: #pragma init_seg (.CRT$XCC){{$}}
 // MS-EXT-NEXT: int x = 3 __declspec(thread);
 int __declspec(thread) x = 3;
 #endif //MS_EXT
diff --git a/test/Misc/pragma-attribute-supported-attributes-list.test b/test/Misc/pragma-attribute-supported-attributes-list.test
index fcc2c5c..1004459 100644
--- a/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -2,7 +2,7 @@
 
 // The number of supported attributes should never go down!
 
-// CHECK: #pragma clang attribute supports 66 attributes:
+// CHECK: #pragma clang attribute supports 67 attributes:
 // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@@ -66,6 +66,7 @@
 // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local)
 // CHECK-NEXT: Target (SubjectMatchRule_function)
 // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member)
+// CHECK-NEXT: TrivialABI (SubjectMatchRule_record)
 // CHECK-NEXT: WarnUnusedResult (SubjectMatchRule_objc_method, SubjectMatchRule_enum, SubjectMatchRule_record, SubjectMatchRule_hasType_functionType)
-// CHECK-NEXT: XRayInstrument (SubjectMatchRule_function_is_member, SubjectMatchRule_objc_method, SubjectMatchRule_function)
-// CHECK-NEXT: XRayLogArgs (SubjectMatchRule_function_is_member, SubjectMatchRule_objc_method, SubjectMatchRule_function)
+// CHECK-NEXT: XRayInstrument (SubjectMatchRule_function, SubjectMatchRule_objc_method)
+// CHECK-NEXT: XRayLogArgs (SubjectMatchRule_function, SubjectMatchRule_objc_method)
diff --git a/test/Misc/target-invalid-cpu-note.c b/test/Misc/target-invalid-cpu-note.c
new file mode 100644
index 0000000..6b98ac9
--- /dev/null
+++ b/test/Misc/target-invalid-cpu-note.c
@@ -0,0 +1,162 @@
+// RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM
+// ARM: error: unknown target CPU 'not-a-cpu'
+// ARM: note: valid target CPU values are:
+// ARM-SAME: arm2
+
+// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
+// AARCH64: error: unknown target CPU 'not-a-cpu'
+// AARCH64: note: valid target CPU values are:
+// AARCH64-SAME: cortex-a35,
+
+// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
+// X86: error: unknown target CPU 'not-a-cpu'
+// X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3,
+// X86-SAME: i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3,
+// X86-SAME: pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott,
+// X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont,
+// X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge,
+// X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512,
+// X86-SAME: skx, cannonlake, icelake, knl, knm, lakemont, k6, k6-2, k6-3,
+// X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
+// X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
+// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1,
+// X86-SAME: x86-64, geode
+
+// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
+// X86_64: error: unknown target CPU 'not-a-cpu'
+// X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell,
+// X86_64-SAME: atom, silvermont, slm, goldmont, nehalem, corei7, westmere,
+// X86_64-SAME: sandybridge, corei7-avx, ivybridge, core-avx-i, haswell,
+// X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cannonlake,
+// X86_64-SAME: icelake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3,
+// X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
+// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, x86-64
+
+// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
+// NVPTX: error: unknown target CPU 'not-a-cpu'
+// NVPTX: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35,
+// NVPTX-SAME: sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72
+
+// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
+// R600: error: unknown target CPU 'not-a-cpu'
+// R600: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, 
+// R600-SAME: rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, 
+// R600-SAME: palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, 
+// R600-SAME: caicos, aruba, cayman, turks
+
+
+// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
+// AMDGCN: error: unknown target CPU 'not-a-cpu'
+// AMDGCN: note: valid target CPU values are: gfx600, tahiti, gfx601, hainan,
+// AMDGCN-SAME: oland, pitcairn, verde, gfx700, kaveri, gfx701, hawaii, gfx702,
+// AMDGCN-SAME: gfx703, kabini, mullins, gfx704, bonaire, gfx801, carrizo, 
+// AMDGCN-SAME: gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11,
+// AMDGCN-SAME: gfx810, stoney, gfx900, gfx902
+
+// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
+// WEBASM: error: unknown target CPU 'not-a-cpu'
+// WEBASM: note: valid target CPU values are: mvp, bleeding-edge, generic
+
+// RUN: not %clang_cc1 -triple systemz--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SYSTEMZ
+// SYSTEMZ: error: unknown target CPU 'not-a-cpu'
+// SYSTEMZ: note: valid target CPU values are: arch8, z10, arch9, z196, arch10,
+// SYSTEMZ-SAME: zEC12, arch11, z13, arch12, z14
+
+// RUN: not %clang_cc1 -triple sparc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARC
+// SPARC: error: unknown target CPU 'not-a-cpu'
+// SPARC: note: valid target CPU values are: v8, supersparc, sparclite, f934,
+// SPARC-SAME: hypersparc, sparclite86x, sparclet, tsc701, v9, ultrasparc,
+// SPARC-SAME: ultrasparc3, niagara, niagara2, niagara3, niagara4, ma2100,
+// SPARC-SAME: ma2150, ma2155, ma2450, ma2455, ma2x5x, ma2080, ma2085, ma2480,
+// SPARC-SAME: ma2485, ma2x8x, myriad2, myriad2.1, myriad2.2, myriad2.3, leon2,
+// SPARC-SAME: at697e, at697f, leon3, ut699, gr712rc, leon4, gr740
+
+// RUN: not %clang_cc1 -triple sparcv9--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARCV9
+// SPARCV9: error: unknown target CPU 'not-a-cpu'
+// SPARCV9: note: valid target CPU values are: v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4
+
+// RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC
+// PPC: error: unknown target CPU 'not-a-cpu'
+// PPC: note: valid target CPU values are: generic, 440, 450, 601, 602, 603,
+// PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750,
+// PPC-SAME: 970, g5, a2, a2q, e500mc, e5500, power3, pwr3, power4, pwr4,
+// PPC-SAME: power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7,
+// PPC-SAME: pwr7, power8, pwr8, power9, pwr9, powerpc, ppc, powerpc64, ppc64,
+// PPC-SAME: powerpc64le, ppc64le
+
+// RUN: not %clang_cc1 -triple nios2--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NIOS
+// NIOS: error: unknown target CPU 'not-a-cpu'
+// NIOS: note: valid target CPU values are: nios2r1, nios2r2
+
+// RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS
+// MIPS: error: unknown target CPU 'not-a-cpu'
+// MIPS: note: valid target CPU values are: mips1, mips2, mips3, mips4, mips5,
+// MIPS-SAME: mips32, mips32r2, mips32r3, mips32r5, mips32r6, mips64, mips64r2,
+// MIPS-SAME: mips64r3, mips64r5, mips64r6, octeon, p5600
+
+// RUN: not %clang_cc1 -triple lanai--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix LANAI
+// LANAI: error: unknown target CPU 'not-a-cpu'
+// LANAI: note: valid target CPU values are: v11
+
+// RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON
+// HEXAGON: error: unknown target CPU 'not-a-cpu'
+// HEXAGON: note: valid target CPU values are: hexagonv4, hexagonv5, hexagonv55,
+// HEXAGON-SAME: hexagonv60, hexagonv62, hexagonv65
+
+// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF
+// BPF: error: unknown target CPU 'not-a-cpu'
+// BPF: note: valid target CPU values are: generic, v1, v2, probe
+
+// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR
+// AVR: error: unknown target CPU 'not-a-cpu'
+// AVR: note: valid target CPU values are: avr1, avr2, avr25, avr3, avr31,
+// AVR-SAME: avr35, avr4, avr5, avr51, avr6, avrxmega1, avrxmega2, avrxmega3,
+// AVR-SAME: avrxmega4, avrxmega5, avrxmega6, avrxmega7, avrtiny, at90s1200,
+// AVR-SAME: attiny11, attiny12, attiny15, attiny28, at90s2313, at90s2323,
+// AVR-SAME: at90s2333, at90s2343, attiny22, attiny26, at86rf401, at90s4414,
+// AVR-SAME: t90s4433, at90s4434, at90s8515, at90c8534, at90s8535, ata5272,
+// AVR-SAME: ttiny13, attiny13a, attiny2313, attiny2313a, attiny24, attiny24a,
+// AVR-SAME: ttiny4313, attiny44, attiny44a, attiny84, attiny84a, attiny25,
+// AVR-SAME: ttiny45, attiny85, attiny261, attiny261a, attiny461, attiny461a,
+// AVR-SAME: ttiny861, attiny861a, attiny87, attiny43u, attiny48, attiny88,
+// AVR-SAME: ttiny828, at43usb355, at76c711, atmega103, at43usb320, attiny167,
+// AVR-SAME: t90usb82, at90usb162, ata5505, atmega8u2, atmega16u2,
+// AVR-SAME: atmega32u2, attiny1634, atmega8, ata6289, atmega8a, ata6285,
+// AVR-SAME: ata6286, atmega48, atmega48a, atmega48pa, atmega48p, atmega88,
+// AVR-SAME: atmega88a, atmega88p, atmega88pa, atmega8515, atmega8535,
+// AVR-SAME: atmega8hva, at90pwm1, at90pwm2, at90pwm2b, at90pwm3, at90pwm3b,
+// AVR-SAME: at90pwm81, ata5790, ata5795, atmega16, atmega16a, atmega161,
+// AVR-SAME: atmega162, atmega163, atmega164a, atmega164p, atmega164pa,
+// AVR-SAME: atmega165, atmega165a, atmega165p, atmega165pa, atmega168,
+// AVR-SAME: atmega168a, atmega168p, atmega168pa, atmega169, atmega169a,
+// AVR-SAME: atmega169p, atmega169pa, atmega32, atmega32a, atmega323,
+// AVR-SAME: atmega324a, atmega324p, atmega324pa, atmega325, atmega325a,
+// AVR-SAME: atmega325p, atmega325pa, atmega3250, atmega3250a, atmega3250p,
+// AVR-SAME: atmega3250pa, atmega328, atmega328p, atmega329, atmega329a,
+// AVR-SAME: atmega329p, atmega329pa, atmega3290, atmega3290a, atmega3290p,
+// AVR-SAME: atmega3290pa, atmega406, atmega64, atmega64a, atmega640, atmega644,
+// AVR-SAME: atmega644a, atmega644p, atmega644pa, atmega645, atmega645a,
+// AVR-SAME: tmega645p, atmega649, atmega649a, atmega649p, atmega6450,
+// AVR-SAME: tmega6450a, atmega6450p, atmega6490, atmega6490a, atmega6490p,
+// AVR-SAME: tmega64rfr2, atmega644rfr2, atmega16hva, atmega16hva2,
+// AVR-SAME: tmega16hvb, atmega16hvbrevb, atmega32hvb, atmega32hvbrevb,
+// AVR-SAME: tmega64hve, at90can32, at90can64, at90pwm161, at90pwm216,
+// AVR-SAME: t90pwm316, atmega32c1, atmega64c1, atmega16m1, atmega32m1,
+// AVR-SAME: tmega64m1, atmega16u4, atmega32u4, atmega32u6, at90usb646,
+// AVR-SAME: t90usb647, at90scr100, at94k, m3000, atmega128, atmega128a,
+// AVR-SAME: tmega1280, atmega1281, atmega1284, atmega1284p, atmega128rfa1,
+// AVR-SAME: tmega128rfr2, atmega1284rfr2, at90can128, at90usb1286,
+// AVR-SAME: t90usb1287, atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2,
+// AVR-SAME: txmega16a4, atxmega16a4u, atxmega16c4, atxmega16d4, atxmega32a4,
+// AVR-SAME: txmega32a4u, atxmega32c4, atxmega32d4, atxmega32e5, atxmega16e5,
+// AVR-SAME: txmega8e5, atxmega32x1, atxmega64a3, atxmega64a3u, atxmega64a4u,
+// AVR-SAME: txmega64b1, atxmega64b3, atxmega64c3, atxmega64d3, atxmega64d4,
+// AVR-SAME: txmega64a1, atxmega64a1u, atxmega128a3, atxmega128a3u,
+// AVR-SAME: txmega128b1, atxmega128b3, atxmega128c3, atxmega128d3,
+// AVR-SAME: txmega128d4, atxmega192a3, atxmega192a3u, atxmega192c3,
+// AVR-SAME: txmega192d3, atxmega256a3, atxmega256a3u, atxmega256a3b,
+// AVR-SAME: txmega256a3bu, atxmega256c3, atxmega256d3, atxmega384c3,
+// AVR-SAME: txmega384d3, atxmega128a1, atxmega128a1u, atxmega128a4u,
+// AVR-SAME: ttiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102,
+// AVR-SAME: attiny104
+
diff --git a/test/Modules/ExtDebugInfo.cpp b/test/Modules/ExtDebugInfo.cpp
index 97386bc..c57f1f0 100644
--- a/test/Modules/ExtDebugInfo.cpp
+++ b/test/Modules/ExtDebugInfo.cpp
@@ -187,7 +187,7 @@
 
 // CHECK: !DIGlobalVariable(name: "anon_enum", {{.*}}, type: ![[ANON_ENUM:[0-9]+]]
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, scope: ![[NS]],
-// CHECK-SAME:             line: 16
+// CHECK-SAME:             line: 19
 
 // CHECK: !DIGlobalVariable(name: "GlobalUnion",
 // CHECK-SAME:              type: ![[GLOBAL_UNION:[0-9]+]]
diff --git a/test/Modules/Inputs/DebugCXX.h b/test/Modules/Inputs/DebugCXX.h
index 1ccf8d3..8f83c0b 100644
--- a/test/Modules/Inputs/DebugCXX.h
+++ b/test/Modules/Inputs/DebugCXX.h
@@ -1,4 +1,7 @@
 /* -*- C++ -*- */
+
+#include "dummy.h"
+
 namespace DebugCXX {
   // Records.
   struct Struct {
diff --git a/test/Modules/Inputs/DependsOnModule.framework/module.map b/test/Modules/Inputs/DependsOnModule.framework/module.map
index 4d468f2..948a1ef 100644
--- a/test/Modules/Inputs/DependsOnModule.framework/module.map
+++ b/test/Modules/Inputs/DependsOnModule.framework/module.map
@@ -37,4 +37,22 @@
       export *
     }
   }
+  explicit module CXX11 {
+    requires cplusplus11
+  }
+  explicit module CXX14 {
+    requires cplusplus14
+  }
+  explicit module CXX17 {
+    requires cplusplus17
+  }
+  explicit module C99 {
+    requires c99
+  }
+  explicit module C11 {
+    requires c11
+  }
+  explicit module C17 {
+    requires c17
+  }
 }
diff --git a/test/Modules/Inputs/codegen/foo.h b/test/Modules/Inputs/codegen/foo.h
index bd3b648..76ad655 100644
--- a/test/Modules/Inputs/codegen/foo.h
+++ b/test/Modules/Inputs/codegen/foo.h
@@ -29,4 +29,7 @@
   inst<float>();
 }
 
+__attribute__((always_inline)) inline void always_inl() {
+}
+
 asm("narf");
diff --git a/test/Modules/Inputs/codegen/use.cpp b/test/Modules/Inputs/codegen/use.cpp
index cd1a4a6..3e55188 100644
--- a/test/Modules/Inputs/codegen/use.cpp
+++ b/test/Modules/Inputs/codegen/use.cpp
@@ -6,3 +6,6 @@
 void use_of_instantiated_declaration_without_definition() {
   inst<int>();
 }
+void call_always_inline() {
+  always_inl();
+}
diff --git a/test/Modules/Inputs/declare-use/h.h b/test/Modules/Inputs/declare-use/h.h
index 379e501..8984727 100644
--- a/test/Modules/Inputs/declare-use/h.h
+++ b/test/Modules/Inputs/declare-use/h.h
@@ -1,7 +1,7 @@
 #ifndef H_H
 #define H_H
 #include "c.h"
-#include "d.h" // expected-error {{does not depend on a module exporting}}
+#include "d.h" // expected-error {{module XH does not depend on a module exporting}}
 #include "h1.h"
 const int h1 = aux_h*c*7*d;
 #endif
diff --git a/test/Modules/Inputs/diag_flags.h b/test/Modules/Inputs/diag_flags.h
index 3b85c84..55eeab1 100644
--- a/test/Modules/Inputs/diag_flags.h
+++ b/test/Modules/Inputs/diag_flags.h
@@ -1 +1,4 @@
+#define pragma _Pragma("clang diagnostic push") _Pragma("clang diagnostic error \"-Wpadded\"") _Pragma("clang diagnostic pop")
+pragma
+
 struct Padded { char x; int y; };
diff --git a/test/Modules/Inputs/implicit-private-canonical/A.framework/Headers/a.h b/test/Modules/Inputs/implicit-private-canonical/A.framework/Headers/a.h
new file mode 100644
index 0000000..8b4b198
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-canonical/A.framework/Headers/a.h
@@ -0,0 +1 @@
+extern int APUBLIC;
diff --git a/test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h b/test/Modules/Inputs/implicit-private-canonical/A.framework/Headers/aprivate.h
similarity index 100%
copy from test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h
copy to test/Modules/Inputs/implicit-private-canonical/A.framework/Headers/aprivate.h
diff --git a/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.modulemap b/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.modulemap
new file mode 100644
index 0000000..e8455fb
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.modulemap
@@ -0,0 +1,7 @@
+framework module A {
+  header "a.h"
+
+  module Pirate {}
+
+  export *
+}
diff --git a/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.private.modulemap b/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.private.modulemap
new file mode 100644
index 0000000..a7606f9
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-canonical/A.framework/Modules/module.private.modulemap
@@ -0,0 +1,4 @@
+framework module A_Private {
+  header "aprivate.h"
+  export *
+}
diff --git a/test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h b/test/Modules/Inputs/implicit-private-canonical/A.framework/PrivateHeaders/aprivate.h
similarity index 100%
copy from test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h
copy to test/Modules/Inputs/implicit-private-canonical/A.framework/PrivateHeaders/aprivate.h
diff --git a/test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h b/test/Modules/Inputs/implicit-private-with-different-name/A.framework/PrivateHeaders/aprivate.h
similarity index 100%
rename from test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h
rename to test/Modules/Inputs/implicit-private-with-different-name/A.framework/PrivateHeaders/aprivate.h
diff --git a/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Headers/a.h b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Headers/a.h
new file mode 100644
index 0000000..8b4b198
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Headers/a.h
@@ -0,0 +1 @@
+extern int APUBLIC;
diff --git a/test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Headers/aprivate.h
similarity index 100%
copy from test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h
copy to test/Modules/Inputs/implicit-private-with-submodule/A.framework/Headers/aprivate.h
diff --git a/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.modulemap b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.modulemap
new file mode 100644
index 0000000..95eabf9
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.modulemap
@@ -0,0 +1,4 @@
+framework module A {
+  header "a.h"
+  export *
+}
diff --git a/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.private.modulemap b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.private.modulemap
new file mode 100644
index 0000000..4018296
--- /dev/null
+++ b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/Modules/module.private.modulemap
@@ -0,0 +1,4 @@
+framework module A.Private {
+  header "aprivate.h"
+  export *
+}
diff --git a/test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h b/test/Modules/Inputs/implicit-private-with-submodule/A.framework/PrivateHeaders/aprivate.h
similarity index 100%
copy from test/Modules/Inputs/implicit-private-with-different-name/A.framework/Headers/aprivate.h
copy to test/Modules/Inputs/implicit-private-with-submodule/A.framework/PrivateHeaders/aprivate.h
diff --git a/test/Modules/Inputs/odr_hash-Friend/Bad.h b/test/Modules/Inputs/odr_hash-Friend/Bad.h
new file mode 100644
index 0000000..c0a6d1f
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/Bad.h
@@ -0,0 +1,17 @@
+template <class T>
+struct iterator {
+  void Compare(const iterator &x) { return; }
+  friend void Check(iterator) { return; }
+};
+
+template <class T = int> struct Box {
+  iterator<T> I;
+
+  void test() {
+    Check(I);
+    I.Compare(I);
+  }
+};
+
+// Force instantiation of Box<int>
+Box<> B;
diff --git a/test/Modules/Inputs/odr_hash-Friend/Box.h b/test/Modules/Inputs/odr_hash-Friend/Box.h
new file mode 100644
index 0000000..01ab90d
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/Box.h
@@ -0,0 +1,14 @@
+template <class T>
+struct iterator {
+  void Compare(const iterator &x) { }
+  friend void Check(iterator) {}
+};
+
+template <class T = int> struct Box {
+  iterator<T> I;
+
+  void test() {
+    Check(I);
+    I.Compare(I);
+  }
+};
diff --git a/test/Modules/Inputs/odr_hash-Friend/Good.h b/test/Modules/Inputs/odr_hash-Friend/Good.h
new file mode 100644
index 0000000..4c38392
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/Good.h
@@ -0,0 +1,17 @@
+template <class T>
+struct iterator {
+  void Compare(const iterator &x) { }
+  friend void Check(iterator) {}
+};
+
+template <class T = int> struct Box {
+  iterator<T> I;
+
+  void test() {
+    Check(I);
+    I.Compare(I);
+  }
+};
+
+// Force instantiation of Box<int>
+Box<> B;
diff --git a/test/Modules/Inputs/odr_hash-Friend/M1.h b/test/Modules/Inputs/odr_hash-Friend/M1.h
new file mode 100644
index 0000000..202ad06
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M1.h
@@ -0,0 +1,6 @@
+#include "Box.h"
+
+void Peek() {
+  Box<> Gift;
+  Gift.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/M2.h b/test/Modules/Inputs/odr_hash-Friend/M2.h
new file mode 100644
index 0000000..69f08a9
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M2.h
@@ -0,0 +1,5 @@
+#include "Box.h"
+void x() {
+  Box<> Unused;
+  //Unused.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/M3.h b/test/Modules/Inputs/odr_hash-Friend/M3.h
new file mode 100644
index 0000000..ab457e0
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M3.h
@@ -0,0 +1,7 @@
+#include "Box.h"
+#include "M2.h"
+
+void Party() {
+  Box<> Present;
+  Present.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/module.modulemap b/test/Modules/Inputs/odr_hash-Friend/module.modulemap
new file mode 100644
index 0000000..449d51e
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/module.modulemap
@@ -0,0 +1,23 @@
+module Box {
+  header "Box.h"
+}
+
+module Module1 {
+  header "M1.h"
+}
+
+module Module2 {
+  header "M2.h"
+}
+
+module Module3 {
+  header "M3.h"
+}
+
+module Good {
+  header "Good.h"
+}
+
+module Bad {
+  header "Bad.h"
+}
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Module2/include.h b/test/Modules/Inputs/odr_hash-Unresolved/Module2/include.h
new file mode 100644
index 0000000..fa7c017
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Module2/include.h
@@ -0,0 +1,3 @@
+// include.h
+#include "Sub1/Z.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Module2/not-include.h b/test/Modules/Inputs/odr_hash-Unresolved/Module2/not-include.h
new file mode 100644
index 0000000..98cd1ef
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Module2/not-include.h
@@ -0,0 +1,5 @@
+// not-include.h
+
+#include "function.h"
+#include "class.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Sub1/X.h b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/X.h
new file mode 100644
index 0000000..f610e66
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/X.h
@@ -0,0 +1,3 @@
+// X.h
+#include "Sub1/Z.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Y.h b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Y.h
new file mode 100644
index 0000000..f41ddd6
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Y.h
@@ -0,0 +1,4 @@
+// Y.h
+#include "Sub1/Z.h"
+#include "class.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Z.h b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Z.h
new file mode 100644
index 0000000..b6bdc14
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Sub1/Z.h
@@ -0,0 +1,4 @@
+// Z.h
+#include "Sub2/A.h"
+#include "Sub2/B.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Sub2/A.h b/test/Modules/Inputs/odr_hash-Unresolved/Sub2/A.h
new file mode 100644
index 0000000..a65b370
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Sub2/A.h
@@ -0,0 +1,3 @@
+// A.h
+#include "function.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/Sub2/B.h b/test/Modules/Inputs/odr_hash-Unresolved/Sub2/B.h
new file mode 100644
index 0000000..fa89f21
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/Sub2/B.h
@@ -0,0 +1,3 @@
+// B.h
+#include "function.h"
+
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/class.h b/test/Modules/Inputs/odr_hash-Unresolved/class.h
new file mode 100644
index 0000000..fe3a711
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/class.h
@@ -0,0 +1,11 @@
+#ifndef Class
+#define Class
+template <class T>
+class S {
+  int Field;
+  void run() {
+    int x;
+    A::Check(&Field, 1);
+  }
+};
+#endif
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/function.h b/test/Modules/Inputs/odr_hash-Unresolved/function.h
new file mode 100644
index 0000000..de75b2c
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/function.h
@@ -0,0 +1,6 @@
+#ifndef Function
+#define Function
+namespace A {
+static void Check(int*, int) {}
+}
+#endif
diff --git a/test/Modules/Inputs/odr_hash-Unresolved/module.modulemap b/test/Modules/Inputs/odr_hash-Unresolved/module.modulemap
new file mode 100644
index 0000000..ac7fc43
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Unresolved/module.modulemap
@@ -0,0 +1,21 @@
+module Module1 {
+  module Sub1 {
+    umbrella "Sub1"
+    module * { export * }
+  }
+
+  module Sub2 {
+    umbrella "Sub2"
+    module * { export * }
+  }
+}
+
+module Module2 {
+  umbrella "Module2"
+  module * { export * }
+}
+
+module Other {
+  textual header "function.h"
+  textual header "class.h"
+}
diff --git a/test/Modules/Inputs/shadow/A1/A.h b/test/Modules/Inputs/shadow/A1/A.h
new file mode 100644
index 0000000..f07c681
--- /dev/null
+++ b/test/Modules/Inputs/shadow/A1/A.h
@@ -0,0 +1 @@
+#define A1_A_h
diff --git a/test/Modules/Inputs/shadow/A1/module.modulemap b/test/Modules/Inputs/shadow/A1/module.modulemap
new file mode 100644
index 0000000..9439a43
--- /dev/null
+++ b/test/Modules/Inputs/shadow/A1/module.modulemap
@@ -0,0 +1,5 @@
+module A {
+  header "A.h"
+}
+
+module A1 {}
diff --git a/test/Modules/Inputs/shadow/A2/A.h b/test/Modules/Inputs/shadow/A2/A.h
new file mode 100644
index 0000000..9880ed0
--- /dev/null
+++ b/test/Modules/Inputs/shadow/A2/A.h
@@ -0,0 +1 @@
+#define A2_A_h
diff --git a/test/Modules/Inputs/shadow/A2/module.modulemap b/test/Modules/Inputs/shadow/A2/module.modulemap
new file mode 100644
index 0000000..935d89b
--- /dev/null
+++ b/test/Modules/Inputs/shadow/A2/module.modulemap
@@ -0,0 +1,5 @@
+module A {
+  header "A.h"
+}
+
+module A2 {}
diff --git a/test/Modules/Inputs/shadowed-submodule/A1/Foo.h b/test/Modules/Inputs/shadowed-submodule/A1/Foo.h
new file mode 100644
index 0000000..57775d1
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A1/Foo.h
@@ -0,0 +1 @@
+#include <stdarg.h> // expected-error {{could not build module 'A'}}
diff --git a/test/Modules/Inputs/shadowed-submodule/A1/module.modulemap b/test/Modules/Inputs/shadowed-submodule/A1/module.modulemap
new file mode 100644
index 0000000..7afbc47
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A1/module.modulemap
@@ -0,0 +1,14 @@
+module A [system] { // expected-note {{previous definition is here}}
+  module sub {
+    header "sys/A.h"
+  }
+  module sub2 {
+    header "sys/A2.h"
+  }
+  module stdarg {
+    header "stdarg.h"
+    export *
+  }
+}
+
+module A2 {}
diff --git a/test/Modules/Inputs/shadowed-submodule/A1/sys/A.h b/test/Modules/Inputs/shadowed-submodule/A1/sys/A.h
new file mode 100644
index 0000000..4fc3e8e
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A1/sys/A.h
@@ -0,0 +1 @@
+#include <sys/A2.h>
diff --git a/test/Modules/Inputs/shadowed-submodule/A1/sys/A2.h b/test/Modules/Inputs/shadowed-submodule/A1/sys/A2.h
new file mode 100644
index 0000000..e9b6a44
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A1/sys/A2.h
@@ -0,0 +1 @@
+// nothing
diff --git a/test/Modules/Inputs/shadowed-submodule/A2/Foo.h b/test/Modules/Inputs/shadowed-submodule/A2/Foo.h
new file mode 100644
index 0000000..38ade6d
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A2/Foo.h
@@ -0,0 +1 @@
+#include <stdarg.h>
diff --git a/test/Modules/Inputs/shadowed-submodule/A2/module.modulemap b/test/Modules/Inputs/shadowed-submodule/A2/module.modulemap
new file mode 100644
index 0000000..c4e44b0
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A2/module.modulemap
@@ -0,0 +1,14 @@
+module A [system] {
+  module sub {
+    header "sys/A.h"
+  }
+  module sub2 { // expected-error {{build a shadowed submodule 'A.sub2'}}
+    header "sys/A2.h"
+  }
+  module stdarg {
+    header "stdarg.h"
+    export *
+  }
+}
+
+module A2 {}
diff --git a/test/Modules/Inputs/shadowed-submodule/A2/sys/A.h b/test/Modules/Inputs/shadowed-submodule/A2/sys/A.h
new file mode 100644
index 0000000..4fc3e8e
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A2/sys/A.h
@@ -0,0 +1 @@
+#include <sys/A2.h>
diff --git a/test/Modules/Inputs/shadowed-submodule/A2/sys/A2.h b/test/Modules/Inputs/shadowed-submodule/A2/sys/A2.h
new file mode 100644
index 0000000..e9b6a44
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/A2/sys/A2.h
@@ -0,0 +1 @@
+// nothing
diff --git a/test/Modules/Inputs/shadowed-submodule/Foo/module.modulemap b/test/Modules/Inputs/shadowed-submodule/Foo/module.modulemap
new file mode 100644
index 0000000..11db9cb
--- /dev/null
+++ b/test/Modules/Inputs/shadowed-submodule/Foo/module.modulemap
@@ -0,0 +1,3 @@
+module Foo {
+  header "../A1/Foo.h"
+}
diff --git a/test/Modules/ModuleDebugInfo.cpp b/test/Modules/ModuleDebugInfo.cpp
index 008b3e4..116aa5f 100644
--- a/test/Modules/ModuleDebugInfo.cpp
+++ b/test/Modules/ModuleDebugInfo.cpp
@@ -5,12 +5,13 @@
 
 // Modules:
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debugger-tuning=lldb -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll
 // RUN: cat %t-mod.ll | FileCheck %s
 // RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-MOD %s
 
 // PCH:
-// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11  -debugger-tuning=lldb -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
 // RUN: cat %t-pch.ll | FileCheck %s
 // RUN: cat %t-pch.ll | FileCheck --check-prefix=CHECK-NEG %s
 
@@ -18,6 +19,9 @@
 @import DebugCXX;
 #endif
 
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
+
 // CHECK: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
 // CHECK-SAME:                    isOptimized: false,
 // CHECK-NOT:                     splitDebugFilename:
@@ -27,6 +31,8 @@
 // CHECK-SAME:             identifier: "_ZTSN8DebugCXX4EnumE")
 // CHECK: !DINamespace(name: "DebugCXX"
 
+// CHECK-MOD: ![[DEBUGCXX:.*]] = !DIModule(scope: null, name: "DebugCXX
+
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type,
 // CHECK-NOT:              name:
 // CHECK-SAME:             )
@@ -42,7 +48,7 @@
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type,
 // CHECK-NOT:              name:
 // CHECK-SAME:             )
-// CHECK: !DIEnumerator(name: "e5", value: 5)
+// CHECK: !DIEnumerator(name: "e5", value: 5, isUnsigned: true)
 
 // CHECK: !DIDerivedType(tag: DW_TAG_typedef, name: "B",
 // no mangled name here yet.
@@ -150,4 +156,11 @@
 // CHECK-SAME:                             name: "WithSpecializedBase<float>",
 // CHECK-SAME:                             flags: DIFlagFwdDecl,
 
+// CHECK-MOD: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[DEBUGCXX]],
+// CHECK-MOD-SAME:              entity: ![[DUMMY:[0-9]+]],
+// CHECK-MOD-SAME:              line: 3)
+// CHECK-MOD: ![[DUMMY]] = !DIModule(scope: null, name: "dummy",
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus,
+// CHECK-MOD-SAME:  splitDebugFilename: "{{.*}}dummy{{.*}}.pcm",
+
 // CHECK-NEG-NOT: !DICompositeType(tag: DW_TAG_structure_type, name: "PureForwardDecl"
diff --git a/test/Modules/add-remove-private.m b/test/Modules/add-remove-private.m
index dc73a09..5e7a5a9 100644
--- a/test/Modules/add-remove-private.m
+++ b/test/Modules/add-remove-private.m
@@ -4,7 +4,7 @@
 // RUN: cp -r %S/Inputs/AddRemovePrivate.framework %t/AddRemovePrivate.framework
 
 // Build with module.private.modulemap
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -F %t %s -verify -DP
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -F %t %s -verify -DP -Wno-private-module
 // RUN: cp %t.mcp/AddRemovePrivate.pcm %t/with.pcm
 
 // Build without module.private.modulemap
@@ -17,7 +17,7 @@
 
 // Build with module.private.modulemap (again)
 // RUN: cp %S/Inputs/AddRemovePrivate.framework/Modules/module.private.modulemap %t/AddRemovePrivate.framework/Modules/module.private.modulemap
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -F %t %s -verify -DP
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -F %t %s -verify -DP -Wno-private-module
 // RUN: not diff %t.mcp/AddRemovePrivate.pcm %t/without.pcm
 
 // expected-no-diagnostics
diff --git a/test/Modules/auto-module-import.m b/test/Modules/auto-module-import.m
index 9a34c92..f6127ad 100644
--- a/test/Modules/auto-module-import.m
+++ b/test/Modules/auto-module-import.m
@@ -1,7 +1,7 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -DERRORS
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -xobjective-c++ %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -DERRORS
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -xobjective-c++ %s -verify
 // 
 // Test both with and without the declarations that refer to unimported
 // entities. For error recovery, those cases implicitly trigger an import.
diff --git a/test/Modules/codegen-opt.test b/test/Modules/codegen-opt.test
index 2f4997a..f62cf38 100644
--- a/test/Modules/codegen-opt.test
+++ b/test/Modules/codegen-opt.test
@@ -25,7 +25,13 @@
 FOO: declare void @_Z2f1Ri(i32*
 FOO-NOT: {{define|declare}}
 
-FIXME: this internal function should be weak_odr, comdat, and with a new mangling
+Internal functions are not modularly code generated - they are
+internal wherever they're used. This might not be ideal, but
+continues to workaround/support some oddities that backwards
+compatible modules have seen and supported in the wild.  To remove
+the duplication here, the internal functions would need to be
+promoted to weak_odr, placed in comdat and given a new mangling -
+this would be needed for the C++ Modules TS anyway.
 FOO: define internal void @_ZL2f2v() #{{[0-9]+}}
 FOO-NOT: {{define|declare}}
 
@@ -45,7 +51,7 @@
 BAR-CMN-NOT: {{define|declare}}
 BAR-OPT: declare void @_Z2f1Ri(i32*
 BAR-OPT-NOT: {{define|declare}}
-BAR-OPT: define available_externally void @_ZL2f2v()
+BAR-OPT: define internal void @_ZL2f2v()
 BAR-OPT-NOT: {{define|declare}}
 
 
@@ -61,5 +67,5 @@
 USE-OPT-NOT: {{define|declare}}
 USE-OPT: declare void @_Z2f1Ri(i32*
 USE-OPT-NOT: {{define|declare}}
-USE-OPT: define available_externally void @_ZL2f2v()
+USE-OPT: define internal void @_ZL2f2v()
 USE-OPT-NOT: {{define|declare}}
diff --git a/test/Modules/codegen.test b/test/Modules/codegen.test
index 1bdea5d..a919933 100644
--- a/test/Modules/codegen.test
+++ b/test/Modules/codegen.test
@@ -4,7 +4,7 @@
 RUN: %clang_cc1 -triple=x86_64-linux-gnu -fmodules-codegen -fmodules-debuginfo -x c++ -fmodules -emit-module -fmodule-name=foo %S/Inputs/codegen/foo.modulemap -o %t/foo.pcm
 
 RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -debug-info-kind=limited -o - %t/foo.pcm | FileCheck --check-prefix=FOO --check-prefix=BOTH %s
-RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -debug-info-kind=limited -o - -fmodules -fmodule-file=%t/foo.pcm %S/Inputs/codegen/use.cpp | FileCheck --check-prefix=BOTH --check-prefix=USE %s
+RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -debug-info-kind=limited -o - -fmodules -disable-llvm-passes -fmodule-file=%t/foo.pcm %S/Inputs/codegen/use.cpp | FileCheck --check-prefix=BOTH --check-prefix=USE %s
 
 For want of any better definition, inline asm goes "everywhere" the same as it
 if it were in a header (with the disadvantage that the inline asm will be
@@ -23,6 +23,7 @@
 FOO: $_Z2f1PKcz = comdat any
 FOO: $_ZN13implicit_dtorD1Ev = comdat any
 USE: $_Z4instIiEvv = comdat any
+USE: $_Z10always_inlv = comdat any
 FOO: $_ZN13implicit_dtorD2Ev = comdat any
 FOO: define weak_odr void @_Z2f1PKcz(i8* %fmt, ...) #{{[0-9]+}} comdat
 FOO:   call void @llvm.va_start(i8* %{{[a-zA-Z0-9]*}})
@@ -38,6 +39,7 @@
 
 USE: define linkonce_odr void @_ZN20uninst_implicit_dtorD1Ev
 USE: define linkonce_odr void @_Z4instIiEvv
+USE: define linkonce_odr void @_Z10always_inlv
 USE: define linkonce_odr void @_ZN20uninst_implicit_dtorD2Ev
 
 Modular debug info puts the definition of a class defined in a module in that
diff --git a/test/Modules/crash-vfs-path-emptydir-entries.m b/test/Modules/crash-vfs-path-emptydir-entries.m
index d96adbb..a50ea86 100644
--- a/test/Modules/crash-vfs-path-emptydir-entries.m
+++ b/test/Modules/crash-vfs-path-emptydir-entries.m
@@ -8,7 +8,7 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/i %t/m %t %t/sysroot
-// RUN: cp -a %S/Inputs/crash-recovery/usr %t/i/
+// RUN: cp -R %S/Inputs/crash-recovery/usr %t/i/
 
 // RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
 // RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \
diff --git a/test/Modules/crash-vfs-path-symlink-component.m b/test/Modules/crash-vfs-path-symlink-component.m
index 4723a77..565a64f 100644
--- a/test/Modules/crash-vfs-path-symlink-component.m
+++ b/test/Modules/crash-vfs-path-symlink-component.m
@@ -8,7 +8,7 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/i %t/m %t %t/sysroot
-// RUN: cp -a %S/Inputs/crash-recovery/usr %t/i/
+// RUN: cp -R %S/Inputs/crash-recovery/usr %t/i/
 // RUN: ln -s include/tcl-private %t/i/usr/x
 
 // RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
diff --git a/test/Modules/crash-vfs-path-symlink-topheader.m b/test/Modules/crash-vfs-path-symlink-topheader.m
index 8e0c2d4..fea1f01 100644
--- a/test/Modules/crash-vfs-path-symlink-topheader.m
+++ b/test/Modules/crash-vfs-path-symlink-topheader.m
@@ -8,7 +8,7 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/i %t/m %t %t/sysroot
-// RUN: cp -a %S/Inputs/crash-recovery/usr %t/i/
+// RUN: cp -R %S/Inputs/crash-recovery/usr %t/i/
 // RUN: rm -f %t/i/usr/include/pthread_impl.h
 // RUN: ln -s pthread/pthread_impl.h %t/i/usr/include/pthread_impl.h
 
diff --git a/test/Modules/crash-vfs-umbrella-frameworks.m b/test/Modules/crash-vfs-umbrella-frameworks.m
index 0c3981d..a18acf5 100644
--- a/test/Modules/crash-vfs-umbrella-frameworks.m
+++ b/test/Modules/crash-vfs-umbrella-frameworks.m
@@ -5,7 +5,7 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/i %t/m %t
-// RUN: cp -a %S/Inputs/crash-recovery/Frameworks %t/i/
+// RUN: cp -R %S/Inputs/crash-recovery/Frameworks %t/i/
 // RUN: mkdir -p %t/i/Frameworks/A.framework/Frameworks
 // RUN: ln -s ../../B.framework %t/i/Frameworks/A.framework/Frameworks/B.framework
 
diff --git a/test/Modules/cxx-irgen.cpp b/test/Modules/cxx-irgen.cpp
index 6cc32ad..01e4384 100644
--- a/test/Modules/cxx-irgen.cpp
+++ b/test/Modules/cxx-irgen.cpp
@@ -12,15 +12,15 @@
 
 // Keep these two namespace definitions separate; merging them hides the bug.
 namespace EmitInlineMethods {
-  // CHECK-DAG: define linkonce_odr [[CC:([0-9_a-z]*cc[ ]+)?]]void @_ZN17EmitInlineMethods1C1fEPNS_1AE(
-  // CHECK-DAG: declare [[CC]]void @_ZN17EmitInlineMethods1A1gEv(
+  // CHECK-DAG: define linkonce_odr {{(dso_local )?}}[[CC:([0-9_a-z]*cc[ ]+)?]]void @_ZN17EmitInlineMethods1C1fEPNS_1AE(
+  // CHECK-DAG: declare {{(dso_local )?}}[[CC]]void @_ZN17EmitInlineMethods1A1gEv(
   struct C {
     __attribute__((used)) void f(A *p) { p->g(); }
   };
 }
 namespace EmitInlineMethods {
-  // CHECK-DAG: define linkonce_odr [[CC]]void @_ZN17EmitInlineMethods1D1fEPNS_1BE(
-  // CHECK-DAG: define linkonce_odr [[CC]]void @_ZN17EmitInlineMethods1B1gEv(
+  // CHECK-DAG: define linkonce_odr {{(dso_local )?}}[[CC]]void @_ZN17EmitInlineMethods1D1fEPNS_1BE(
+  // CHECK-DAG: define linkonce_odr {{(dso_local )?}}[[CC]]void @_ZN17EmitInlineMethods1B1gEv(
   struct D {
     __attribute__((used)) void f(B *p) { p->g(); }
   };
diff --git a/test/Modules/cxx17-inline-variables.cpp b/test/Modules/cxx17-inline-variables.cpp
new file mode 100644
index 0000000..be6a190
--- /dev/null
+++ b/test/Modules/cxx17-inline-variables.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fmodules %s
+
+#pragma clang module build a
+module a {}
+#pragma clang module contents
+#pragma clang module begin a
+
+template <class c, c e> struct ak { static constexpr c value = e; };
+ak<bool, true> instantiate_class_definition;
+
+#pragma clang module end /* a */
+#pragma clang module endbuild
+
+
+#pragma clang module build o
+module o {}
+#pragma clang module contents
+#pragma clang module begin o
+#pragma clang module import a
+
+inline int instantiate_var_definition() { return ak<bool, true>::value; }
+
+#pragma clang module end
+#pragma clang module endbuild
+
+
+#pragma clang module import o
+#pragma clang module import a
+
+int main() { return ak<bool, true>::value; }
diff --git a/test/Modules/diag-flags.cpp b/test/Modules/diag-flags.cpp
index ada90d2..14067c6 100644
--- a/test/Modules/diag-flags.cpp
+++ b/test/Modules/diag-flags.cpp
@@ -3,24 +3,24 @@
 // For an implicit module, all that matters are the warning flags in the user.
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -Wpadded
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -Wpadded -Werror
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -Werror=padded
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -DLOCAL_WARNING -Wpadded
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -DLOCAL_ERROR -Wpadded -Werror
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -DLOCAL_ERROR -Werror=padded
 //
 // For an explicit module, all that matters are the warning flags in the module build.
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/nodiag.pcm
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Wpadded
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Werror -Wpadded
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Wpadded -DLOCAL_WARNING
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Werror -Wpadded -DLOCAL_ERROR
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/warning.pcm -Wpadded
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm -Werror=padded
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm -Werror=padded -DLOCAL_ERROR
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm -Werror
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/werror-no-error.pcm -Werror -Wpadded -Wno-error=padded
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm -Wno-padded
-// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm -Werror=padded
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm -Werror=padded -DLOCAL_ERROR
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/error.pcm -Werror=padded
 // RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -fmodule-file=%t/error.pcm -Wno-padded
@@ -35,10 +35,22 @@
 // Diagnostic flags from the module user make no difference to diagnostics
 // emitted within the module when using an explicitly-loaded module.
 #if ERROR
-// expected-error@diag_flags.h:14 {{padding struct}}
+// expected-error@diag_flags.h:4 {{padding struct}}
 #elif WARNING
-// expected-warning@diag_flags.h:14 {{padding struct}}
-#else
-// expected-no-diagnostics
+// expected-warning@diag_flags.h:4 {{padding struct}}
 #endif
 int arr[sizeof(Padded)];
+
+// Diagnostic flags from the module make no difference to diagnostics emitted
+// in the module user.
+#if LOCAL_ERROR
+// expected-error@+4 {{padding struct}}
+#elif LOCAL_WARNING
+// expected-warning@+2 {{padding struct}}
+#endif
+struct Padded2 { char x; int y; };
+int arr2[sizeof(Padded2)];
+
+#if !ERROR && !WARNING && !LOCAL_ERROR && !LOCAL_WARNING
+// expected-no-diagnostics
+#endif
diff --git a/test/Modules/friend-definition.cpp b/test/Modules/friend-definition.cpp
new file mode 100644
index 0000000..8588cbd
--- /dev/null
+++ b/test/Modules/friend-definition.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -fmodules -std=c++14 %s -verify
+// expected-no-diagnostics
+
+#pragma clang module build A
+module A {}
+#pragma clang module contents
+#pragma clang module begin A
+template<typename T> struct A {
+  friend A operator+(const A&, const A&) { return {}; }
+};
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build B
+module B {}
+#pragma clang module contents
+#pragma clang module begin B
+#pragma clang module import A
+inline void f() { A<int> a; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build C
+module C {}
+#pragma clang module contents
+#pragma clang module begin C
+#pragma clang module import A
+inline void g() { A<int> a; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import A
+#pragma clang module import B
+#pragma clang module import C
+
+void h() {
+  A<int> a;
+  a + a;
+}
diff --git a/test/Modules/global_index.m b/test/Modules/global_index.m
index 64a70f2..e94c69a 100644
--- a/test/Modules/global_index.m
+++ b/test/Modules/global_index.m
@@ -1,12 +1,12 @@
 // RUN: rm -rf %t
 // Run without global module index
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -fno-modules-global-index -F %S/Inputs %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -fno-modules-global-index -F %S/Inputs %s -verify
 // RUN: ls %t|not grep modules.idx
 // Run and create the global module index
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify
 // RUN: ls %t|grep modules.idx
 // Run and use the global module index
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -print-stats 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fdisable-module-hash -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -print-stats 2>&1 | FileCheck %s
 
 // expected-no-diagnostics
 @import DependsOnModule;
diff --git a/test/Modules/implicit-map-dot-private.m b/test/Modules/implicit-map-dot-private.m
new file mode 100644
index 0000000..9fd8677
--- /dev/null
+++ b/test/Modules/implicit-map-dot-private.m
@@ -0,0 +1,12 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -emit-pch -o %t-A.pch %s -Wprivate-module
+
+#ifndef HEADER
+#define HEADER
+
+@import A.Private; // expected-warning {{no submodule named 'Private' in module 'A'; using top level 'A_Private'}}
+// expected-note@Inputs/implicit-private-canonical/A.framework/Modules/module.private.modulemap:1{{module defined here}}
+
+const int *y = &APRIVATE;
+
+#endif
diff --git a/test/Modules/implicit-private-canonical.m b/test/Modules/implicit-private-canonical.m
new file mode 100644
index 0000000..96b6c4a
--- /dev/null
+++ b/test/Modules/implicit-private-canonical.m
@@ -0,0 +1,35 @@
+// RUN: rm -rf %t
+// Build PCH using A, with adjacent private module APrivate, which winds up being implicitly referenced
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -emit-pch -o %t-A.pch %s -Wprivate-module -DNO_AT_IMPORT
+// Use the PCH with no explicit way to resolve APrivate, still pick it up by automatic second-chance search for "A" with "Private" appended
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -include-pch %t-A.pch %s -fsyntax-only -Wprivate-module -DNO_AT_IMPORT
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -emit-pch -o %t-A.pch %s -Wprivate-module -DUSE_AT_IMPORT_PRIV
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -include-pch %t-A.pch %s -fsyntax-only -Wprivate-module -DUSE_AT_IMPORT_PRIV
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -emit-pch -o %t-A.pch %s -Wprivate-module -DUSE_AT_IMPORT_BOTH
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -include-pch %t-A.pch %s -fsyntax-only -Wprivate-module -DUSE_AT_IMPORT_BOTH
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#ifdef NO_AT_IMPORT
+#import "A/aprivate.h"
+#endif
+
+#ifdef USE_AT_IMPORT_PRIV
+@import A_Private;
+#endif
+
+#ifdef USE_AT_IMPORT_BOTH
+@import A;
+@import A_Private;
+#endif
+
+const int *y = &APRIVATE;
+
+#endif
diff --git a/test/Modules/implicit-private-with-different-name.m b/test/Modules/implicit-private-with-different-name.m
index c09d397..7ee8453 100644
--- a/test/Modules/implicit-private-with-different-name.m
+++ b/test/Modules/implicit-private-with-different-name.m
@@ -1,17 +1,17 @@
 // RUN: rm -rf %t
 
 // Build PCH using A, with adjacent private module APrivate, which winds up being implicitly referenced
-// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -emit-pch -o %t-A.pch %s
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -emit-pch -o %t-A.pch %s -Wprivate-module
 
 // Use the PCH with no explicit way to resolve APrivate, still pick it up by automatic second-chance search for "A" with "Private" appended
-// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -include-pch %t-A.pch %s -fsyntax-only
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -include-pch %t-A.pch %s -fsyntax-only -Wprivate-module
 
 // Check the fixit
-// RUN: %clang_cc1  -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -include-pch %t-A.pch %s -fsyntax-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-different-name -include-pch %t-A.pch %s -fsyntax-only -fdiagnostics-parseable-fixits -Wprivate-module %s 2>&1 | FileCheck %s
 
-// expected-warning@Inputs/implicit-private-with-different-name/A.framework/Modules/module.private.modulemap:1{{top-level module 'APrivate' in private module map, expected a submodule of 'A'}}
-// expected-note@Inputs/implicit-private-with-different-name/A.framework/Modules/module.private.modulemap:1{{make 'APrivate' a submodule of 'A' to ensure it can be found by name}}
-// CHECK: fix-it:"{{.*}}module.private.modulemap":{1:18-1:26}:"A.Private"
+// expected-warning@Inputs/implicit-private-with-different-name/A.framework/Modules/module.private.modulemap:1{{expected canonical name for private module 'APrivate'}}
+// expected-note@Inputs/implicit-private-with-different-name/A.framework/Modules/module.private.modulemap:1{{rename 'APrivate' to ensure it can be found by name}}
+// CHECK: fix-it:"{{.*}}module.private.modulemap":{1:18-1:26}:"A_Private"
 
 #ifndef HEADER
 #define HEADER
diff --git a/test/Modules/implicit-private-with-submodule.m b/test/Modules/implicit-private-with-submodule.m
new file mode 100644
index 0000000..1779341
--- /dev/null
+++ b/test/Modules/implicit-private-with-submodule.m
@@ -0,0 +1,36 @@
+// RUN: rm -rf %t
+// Build PCH using A, with private submodule A.Private
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-submodule -emit-pch -o %t-A.pch %s -DNO_AT_IMPORT
+
+// RUN: rm -rf %t
+// Build PCH using A, with private submodule A.Private, check the fixit
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-submodule -emit-pch -o %t-A.pch %s -fdiagnostics-parseable-fixits -DNO_AT_IMPORT 2>&1 | FileCheck %s
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-submodule -emit-pch -o %t-A.pch %s -DUSE_AT_IMPORT_PRIV
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-with-submodule -emit-pch -o %t-A.pch %s -DUSE_AT_IMPORT_BOTH
+
+// expected-warning@Inputs/implicit-private-with-submodule/A.framework/Modules/module.private.modulemap:1{{private submodule 'A.Private' in private module map, expected top-level module}}
+// expected-note@Inputs/implicit-private-with-submodule/A.framework/Modules/module.private.modulemap:1{{rename 'A.Private' to ensure it can be found by name}}
+// CHECK: fix-it:"{{.*}}module.private.modulemap":{1:20-1:27}:"A_Private"
+
+#ifndef HEADER
+#define HEADER
+
+#ifdef NO_AT_IMPORT
+#import "A/aprivate.h"
+#endif
+
+#ifdef USE_AT_IMPORT_PRIV
+@import A.Private;
+#endif
+
+#ifdef USE_AT_IMPORT_BOTH
+@import A;
+@import A.Private;
+#endif
+
+const int *y = &APRIVATE;
+
+#endif
diff --git a/test/Modules/merge-anon-in-extern_c.cpp b/test/Modules/merge-anon-in-extern_c.cpp
new file mode 100644
index 0000000..1443251
--- /dev/null
+++ b/test/Modules/merge-anon-in-extern_c.cpp
@@ -0,0 +1,19 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -verify %s
+// expected-no-diagnostics
+
+#pragma clang module build sys_types
+module sys_types {}
+#pragma clang module contents
+#pragma clang module begin sys_types
+extern "C" {
+  typedef union { bool b; } pthread_mutex_t;
+}
+#pragma clang module end
+#pragma clang module endbuild
+
+typedef union { bool b; } pthread_mutex_t;
+#pragma clang module import sys_types
+
+const pthread_mutex_t *m;
+
diff --git a/test/Modules/modify-module.m b/test/Modules/modify-module.m
index f9a70b2..d59a479 100644
--- a/test/Modules/modify-module.m
+++ b/test/Modules/modify-module.m
@@ -3,9 +3,9 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/include
-// RUN: cp %S/Inputs/Modified/A.h %t/include
-// RUN: cp %S/Inputs/Modified/B.h %t/include
-// RUN: cp %S/Inputs/Modified/module.map %t/include
+// RUN: cat %S/Inputs/Modified/A.h > %t/include/A.h
+// RUN: cat %S/Inputs/Modified/B.h > %t/include/B.h
+// RUN: cat %S/Inputs/Modified/module.map > %t/include/module.map
 // RUN: %clang_cc1 -fdisable-module-hash -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t/include %s -verify
 // RUN: echo '' >> %t/include/B.h
 // RUN: %clang_cc1 -fdisable-module-hash -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t/include %s -verify
diff --git a/test/Modules/module-imported-by-pch-path.m b/test/Modules/module-imported-by-pch-path.m
new file mode 100644
index 0000000..04c6caf
--- /dev/null
+++ b/test/Modules/module-imported-by-pch-path.m
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dst %t.cache
+// RUN: mkdir -p %t.dst/folder-with-modulemap %t.dst/pch-folder
+// RUN: echo '#import "folder-with-modulemap/included.h"' > %t.dst/header.h
+// RUN: echo 'extern int MyModuleVersion;' > %t.dst/folder-with-modulemap/MyModule.h
+// RUN: echo '@import MyModule;' > %t.dst/folder-with-modulemap/included.h
+// RUN: echo 'module MyModule { header "MyModule.h" }' > %t.dst/folder-with-modulemap/module.modulemap
+
+// RUN: %clang -o %t.dst/pch-folder/header.pch -x objective-c-header -fmodules-cache-path=%t.cache -fmodules %t.dst/header.h
+// RUN: not %clang -fsyntax-only -fmodules-cache-path=%t.cache -fmodules %s -include-pch %t.dst/pch-folder/header.pch 2>&1 | FileCheck %s
+
+void test() {
+  (void)MyModuleVersion; // should be found by implicit import
+}
+
+// CHECK: module 'MyModule' in AST file '{{.*MyModule.*pcm}}' (imported by AST file '[[PCH:.*header.pch]]') is not defined in any loaded module map file; maybe you need to load '[[PATH:.*folder-with-modulemap]]
+// CHECK: consider adding '[[PATH]]' to the header search path
+// CHECK: imported by '[[PCH]]'
diff --git a/test/Modules/module-name-private.m b/test/Modules/module-name-private.m
new file mode 100644
index 0000000..33bd3c1
--- /dev/null
+++ b/test/Modules/module-name-private.m
@@ -0,0 +1,12 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -verify -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/implicit-private-canonical -fsyntax-only %s -Wprivate-module -fmodule-name=A -Rmodule-build
+
+// Because of -fmodule-name=A, no module (A or A_Private) is supposed to be
+// built and -Rmodule-build should not produce any output.
+
+// expected-no-diagnostics
+
+#import <A/a.h>
+#import <A/aprivate.h>
+
+int foo() { return APRIVATE; }
diff --git a/test/Modules/modulemap-locations.m b/test/Modules/modulemap-locations.m
index 3c80db5..c99bb14 100644
--- a/test/Modules/modulemap-locations.m
+++ b/test/Modules/modulemap-locations.m
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t 
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/ModuleMapLocations/Module_ModuleMap -I %S/Inputs/ModuleMapLocations/Both -F %S/Inputs/ModuleMapLocations -I %S/Inputs/ModuleMapLocations -F %S/Inputs -x objective-c -fsyntax-only %s -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/ModuleMapLocations/Module_ModuleMap -I %S/Inputs/ModuleMapLocations/Both -F %S/Inputs/ModuleMapLocations -I %S/Inputs/ModuleMapLocations -F %S/Inputs -x objective-c -fsyntax-only %s -verify -Wno-private-module
 
 // regular
 @import module_modulemap;
diff --git a/test/Modules/odr.cpp b/test/Modules/odr.cpp
index 9cdbb4f..c470011 100644
--- a/test/Modules/odr.cpp
+++ b/test/Modules/odr.cpp
@@ -18,6 +18,6 @@
 // expected-note@a.h:3 {{declaration of 'f' does not match}}
 // expected-note@a.h:1 {{definition has no member 'm'}}
 
-// expected-error@b.h:5 {{'E::e2' from module 'b' is not present in definition of 'E' in module 'a'}}
+// expected-error@b.h:5 {{'e2' from module 'b' is not present in definition of 'E' in module 'a'}}
 // expected-error@b.h:3 {{'Y::f' from module 'b' is not present in definition of 'Y' in module 'a'}}
 // expected-error@b.h:2 {{'Y::m' from module 'b' is not present in definition of 'Y' in module 'a'}}
diff --git a/test/Modules/odr_hash-Friend.cpp b/test/Modules/odr_hash-Friend.cpp
new file mode 100644
index 0000000..8a2513e
--- /dev/null
+++ b/test/Modules/odr_hash-Friend.cpp
@@ -0,0 +1,90 @@
+// RUN: rm -rf %t
+
+// PR35939: MicrosoftMangle.cpp triggers an assertion failure on this test.
+// UNSUPPORTED: system-windows
+
+// RUN: %clang_cc1 \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify -DTEST1 -fcolor-diagnostics
+
+// RUN: %clang_cc1 \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify -DTEST2 -fcolor-diagnostics
+
+// RUN: %clang_cc1 \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify -DTEST3 -fcolor-diagnostics
+
+// RUN: %clang_cc1 \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify -DTEST3 -fcolor-diagnostics
+
+// RUN: %clang_cc1 \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify -DTEST3 -fcolor-diagnostics
+
+#if defined(TEST1)
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+// expected-no-diagnostics
+#endif
+
+#if defined(TEST2)
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+#include "Good.h"
+// expected-no-diagnostics
+#endif
+
+#if defined(TEST3)
+#include "Good.h"
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+// expected-no-diagnostics
+#endif
+
+#if defined(TEST4)
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+#include "Bad.h"
+// expected-error@Bad.h:* {{'Check' has different definitions in different modules; definition in module 'Bad' first difference is function body}}
+// expected-note@Box.h:* {{but in 'Box' found a different body}}
+#endif
+
+#if defined(TEST5)
+#include "Bad.h"
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+// expected-error@Bad.h:* {{'Check' has different definitions in different modules; definition in module 'Bad' first difference is function body}}
+// expected-note@Box.h:* {{but in 'Box' found a different body}}
+#endif
+
+
+void Run() {
+  Box<> Present;
+}
diff --git a/test/Modules/odr_hash-Unresolved.cpp b/test/Modules/odr_hash-Unresolved.cpp
new file mode 100644
index 0000000..0c73b50
--- /dev/null
+++ b/test/Modules/odr_hash-Unresolved.cpp
@@ -0,0 +1,14 @@
+// RUN: rm -rf %t
+
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modules.cache \
+// RUN:  -I %S/Inputs/odr_hash-Unresolved \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -fmodules-local-submodule-visibility \
+// RUN:  -std=c++11 -x c++ %s -fsyntax-only
+
+// Note: There is no -verify in the run line because some error messages are
+// not captured from the module building stage.
+
+#include "Module2/include.h"
diff --git a/test/Modules/odr_hash-blocks.cpp b/test/Modules/odr_hash-blocks.cpp
new file mode 100644
index 0000000..07dfa4c
--- /dev/null
+++ b/test/Modules/odr_hash-blocks.cpp
@@ -0,0 +1,119 @@
+// Clear and create directories
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: mkdir %t/cache
+// RUN: mkdir %t/Inputs
+
+// Build first header file
+// RUN: echo "#define FIRST" >> %t/Inputs/first.h
+// RUN: cat %s               >> %t/Inputs/first.h
+
+// Build second header file
+// RUN: echo "#define SECOND" >> %t/Inputs/second.h
+// RUN: cat %s                >> %t/Inputs/second.h
+
+// Test that each header can compile
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/first.h
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/second.h
+
+// Build module map file
+// RUN: echo "module FirstModule {"     >> %t/Inputs/module.map
+// RUN: echo "    header \"first.h\""   >> %t/Inputs/module.map
+// RUN: echo "}"                        >> %t/Inputs/module.map
+// RUN: echo "module SecondModule {"    >> %t/Inputs/module.map
+// RUN: echo "    header \"second.h\""  >> %t/Inputs/module.map
+// RUN: echo "}"                        >> %t/Inputs/module.map
+
+// Run test
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps \
+// RUN:            -fmodules-cache-path=%t/cache -x c++ -I%t/Inputs \
+// RUN:            -verify %s -std=c++11 -fblocks
+
+#if !defined(FIRST) && !defined(SECOND)
+#include "first.h"
+#include "second.h"
+#endif
+
+// Used for testing
+#if defined(FIRST)
+#define ACCESS public:
+#elif defined(SECOND)
+#define ACCESS private:
+#endif
+
+// TODO: S1, S2, and S3 should generate errors.
+namespace Blocks {
+#if defined(FIRST)
+struct S1 {
+  void (^block)(int x) = ^(int x) { };
+};
+#elif defined(SECOND)
+struct S1 {
+  void (^block)(int x) = ^(int y) { };
+};
+#else
+S1 s1;
+#endif
+
+#if defined(FIRST)
+struct S2 {
+  int (^block)(int x) = ^(int x) { return x + 1; };
+};
+#elif defined(SECOND)
+struct S2 {
+  int (^block)(int x) = ^(int x) { return x; };
+};
+#else
+S2 s2;
+#endif
+
+#if defined(FIRST)
+struct S3 {
+  void run(int (^block)(int x));
+};
+#elif defined(SECOND)
+struct S3 {
+  void run(int (^block)(int x, int y));
+};
+#else
+S3 s3;
+#endif
+
+#define DECLS                                       \
+  int (^block)(int x) = ^(int x) { return x + x; }; \
+  void run(int (^block)(int x, int y));
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Blocks::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+
+#undef DECLS
+}
+
+// Keep macros contained to one file.
+#ifdef FIRST
+#undef FIRST
+#endif
+
+#ifdef SECOND
+#undef SECOND
+#endif
+
+#ifdef ACCESS
+#undef ACCESS
+#endif
diff --git a/test/Modules/odr_hash.cpp b/test/Modules/odr_hash.cpp
index 8ff95d2..5991dd0 100644
--- a/test/Modules/odr_hash.cpp
+++ b/test/Modules/odr_hash.cpp
@@ -12,6 +12,10 @@
 // RUN: echo "#define SECOND" >> %t/Inputs/second.h
 // RUN: cat %s                >> %t/Inputs/second.h
 
+// Test that each header can compile
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++1z %t/Inputs/first.h
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++1z %t/Inputs/second.h
+
 // Build module map file
 // RUN: echo "module FirstModule {"     >> %t/Inputs/module.map
 // RUN: echo "    header \"first.h\""   >> %t/Inputs/module.map
@@ -28,6 +32,13 @@
 #include "second.h"
 #endif
 
+// Used for testing
+#if defined(FIRST)
+#define ACCESS public:
+#elif defined(SECOND)
+#define ACCESS private:
+#endif
+
 namespace AccessSpecifiers {
 #if defined(FIRST)
 struct S1 {
@@ -55,6 +66,32 @@
 // expected-error@second.h:* {{'AccessSpecifiers::S2' has different definitions in different modules; first difference is definition in module 'SecondModule' found protected access specifier}}
 // expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
 #endif
+
+#define DECLS \
+public:       \
+private:      \
+protected:
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'AccessSpecifiers::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+
+#undef DECLS
 } // namespace AccessSpecifiers
 
 namespace StaticAssert {
@@ -113,7 +150,31 @@
 // expected-error@second.h:* {{'StaticAssert::S4' has different definitions in different modules; first difference is definition in module 'SecondModule' found public access specifier}}
 // expected-note@first.h:* {{but in 'FirstModule' found static assert}}
 #endif
-}
+
+#define DECLS                       \
+  static_assert(4 == 4, "Message"); \
+  static_assert(5 == 5);
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'StaticAssert::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace StaticAssert
 
 namespace Field {
 #if defined(FIRST)
@@ -302,6 +363,38 @@
 // expected-error@first.h:* {{'Field::S13::x' from module 'FirstModule' is not present in definition of 'Field::S13' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
+
+#define DECLS         \
+  int a;              \
+  int b : 3;          \
+  unsigned c : 1 + 2; \
+  s d;                \
+  double e = 1.0;     \
+  long f[5];
+
+#if defined(FIRST) || defined(SECOND)
+typedef short s;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Field::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace Field
 
 namespace Method {
@@ -464,11 +557,11 @@
 
 #if defined(FIRST)
 struct S11 {
-  void A(int x) {}
+  void A(int x);
 };
 #elif defined(SECOND)
 struct S11 {
-  void A(int y) {}
+  void A(int y);
 };
 #else
 S11 s11;
@@ -478,11 +571,11 @@
 
 #if defined(FIRST)
 struct S12 {
-  void A(int x) {}
+  void A(int x);
 };
 #elif defined(SECOND)
 struct S12 {
-  void A(int x = 1) {}
+  void A(int x = 1);
 };
 #else
 S12 s12;
@@ -492,11 +585,11 @@
 
 #if defined(FIRST)
 struct S13 {
-  void A(int x = 1 + 0) {}
+  void A(int x = 1 + 0);
 };
 #elif defined(SECOND)
 struct S13 {
-  void A(int x = 1) {}
+  void A(int x = 1);
 };
 #else
 S13 s13;
@@ -506,11 +599,11 @@
 
 #if defined(FIRST)
 struct S14 {
-  void A(int x[2]) {}
+  void A(int x[2]);
 };
 #elif defined(SECOND)
 struct S14 {
-  void A(int x[3]) {}
+  void A(int x[3]);
 };
 #else
 S14 s14;
@@ -531,6 +624,40 @@
 // expected-error@first.h:* {{'Method::S15::A' from module 'FirstModule' is not present in definition of 'Method::S15' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'A' does not match}}
 #endif
+
+#define DECLS            \
+  void A();              \
+  static void B();       \
+  virtual void C();      \
+  virtual void D() = 0;  \
+  inline void E();       \
+  void F() const;        \
+  void G() volatile;     \
+  void H(int x);         \
+  void I(int x = 5 + 5); \
+  void J(int);           \
+  void K(int x[2]);      \
+  int L();
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1* v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1* i1;
+// expected-error@second.h:* {{'Method::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace Method
 
 namespace Constructor {
@@ -565,6 +692,31 @@
 // expected-error@second.h:* {{'Constructor::S2' has different definitions in different modules; first difference is definition in module 'SecondModule' found constructor that has 2 parameters}}
 // expected-note@first.h:* {{but in 'FirstModule' found constructor that has 1 parameter}}
 #endif
+
+#define DECLS(CLASS) \
+  CLASS(int);        \
+  CLASS(double);     \
+  CLASS(int, int);
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS(Valid1)
+};
+#else
+Valid1* v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS(Invalid1)
+  ACCESS
+};
+#else
+Invalid1* i1;
+// expected-error@second.h:* {{'Constructor::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace Constructor
 
 namespace Destructor {
@@ -600,32 +752,44 @@
 // expected-note@first.h:* {{but in 'FirstModule' found destructor is virtual}}
 #endif
 
-}  // namespace Destructor
-
-// Naive parsing of AST can lead to cycles in processing.  Ensure
-// self-references don't trigger an endless cycles of AST node processing.
-namespace SelfReference {
-#if defined(FIRST)
-template <template <int> class T> class Wrapper {};
-
-template <int N> class S {
-  S(Wrapper<::SelfReference::S> &Ref) {}
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  ~Valid1();
 };
-
-struct Xx {
-  struct Yy {
-  };
-};
-
-Xx::Xx::Xx::Yy yy;
-
-namespace NNS {
-template <typename> struct Foo;
-template <template <class> class T = NNS::Foo>
-struct NestedNamespaceSpecifier {};
-}
+#else
+Valid1 v1;
 #endif
-}  // namespace SelfReference
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  ~Invalid1();
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Destructor::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid2 {
+  virtual ~Valid2();
+};
+#else
+Valid2 v2;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid2 {
+  virtual ~Invalid2();
+  ACCESS
+};
+#else
+Invalid2 i2;
+// expected-error@second.h:* {{'Destructor::Invalid2' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+}  // namespace Destructor
 
 namespace TypeDef {
 #if defined(FIRST)
@@ -722,6 +886,35 @@
 // expected-error@second.h:* {{'TypeDef::S6' has different definitions in different modules; first difference is definition in module 'SecondModule' found typedef 'b' with underlying type 'float'}}
 // expected-note@first.h:* {{but in 'FirstModule' found typedef 'b' with different underlying type 'TypeDef::F' (aka 'float')}}
 #endif
+
+#define DECLS       \
+  typedef int A;    \
+  typedef double B; \
+  typedef I C;
+
+#if defined(FIRST) || defined(SECOND)
+typedef int I;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'TypeDef::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace TypeDef
 
 namespace Using {
@@ -819,6 +1012,35 @@
 // expected-error@second.h:* {{'Using::S6' has different definitions in different modules; first difference is definition in module 'SecondModule' found type alias 'b' with underlying type 'float'}}
 // expected-note@first.h:* {{but in 'FirstModule' found type alias 'b' with different underlying type 'Using::F' (aka 'float')}}
 #endif
+
+#if defined(FIRST) || defined(SECOND)
+using I = int;
+#endif
+
+#define DECLS       \
+  using A = int;    \
+  using B = double; \
+  using C = I;
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Using::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace Using
 
 namespace RecordType {
@@ -837,7 +1059,34 @@
 // expected-error@first.h:* {{'RecordType::S1::x' from module 'FirstModule' is not present in definition of 'RecordType::S1' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
-}
+
+#define DECLS \
+  Foo F;
+
+#if defined(FIRST) || defined(SECOND)
+struct Foo {};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'RecordType::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace RecordType
 
 namespace DependentType {
 #if defined(FIRST)
@@ -856,7 +1105,34 @@
 // expected-error@first.h:* {{'DependentType::S1::x' from module 'FirstModule' is not present in definition of 'S1<T>' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
-}
+
+#define DECLS \
+  typename T::typeA x;
+
+#if defined(FIRST) || defined(SECOND)
+template <class T>
+struct Valid1 {
+  DECLS
+};
+#else
+template <class T>
+using V1 = Valid1<T>;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <class T>
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+template <class T>
+using I1 = Invalid1<T>;
+// expected-error@second.h:* {{'DependentType::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace DependentType
 
 namespace ElaboratedType {
 #if defined(FIRST)
@@ -874,7 +1150,34 @@
 // expected-error@first.h:* {{'ElaboratedType::S1::x' from module 'FirstModule' is not present in definition of 'ElaboratedType::S1' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
-}
+
+#define DECLS \
+  NS::type x;
+
+#if defined(FIRST) || defined(SECOND)
+namespace NS { using type = float; }
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'ElaboratedType::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace ElaboratedType
 
 namespace Enum {
 #if defined(FIRST)
@@ -892,6 +1195,33 @@
 // expected-error@first.h:* {{'Enum::S1::x' from module 'FirstModule' is not present in definition of 'Enum::S1' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
+
+#define DECLS \
+  E e = E1;
+
+#if defined(FIRST) || defined(SECOND)
+enum E { E1, E2 };
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Enum::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }
 
 namespace NestedNamespaceSpecifier {
@@ -1069,7 +1399,73 @@
 // expected-note@first.h:* {{declaration of 'x' does not match}}
 #endif
 }
+
+#define DECLS       \
+  NS1::Type a;      \
+  NS1::NS2::Type b; \
+  NS1::S c;         \
+  NS3::Type d;
+
+#if defined(FIRST) || defined(SECOND)
+namespace NS1 {
+  using Type = int;
+  namespace NS2 {
+    using Type = double;
+  }
+  struct S {};
 }
+namespace NS3 = NS1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+
+#define DECLS               \
+  typename T::type *x = {}; \
+  int y = x->T::foo();      \
+  int z = U::template X<int>::value;
+
+#if defined(FIRST) || defined(SECOND)
+template <class T, class U>
+struct Valid2 {
+  DECLS
+};
+#else
+template <class T, class U>
+using V2 = Valid2<T, U>;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <class T, class U>
+struct Invalid2 {
+  DECLS
+  ACCESS
+};
+#else
+template <class T, class U>
+using I2 = Invalid2<T, U>;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::Invalid2' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace NestedNamespaceSpecifier
 
 namespace TemplateSpecializationType {
 #if defined(FIRST)
@@ -1103,7 +1499,40 @@
 // expected-error@first.h:* {{'TemplateSpecializationType::S2::u' from module 'FirstModule' is not present in definition of 'TemplateSpecializationType::S2' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'u' does not match}}
 #endif
-}
+
+#define DECLS                       \
+  OneTemplateArg<int> x;            \
+  OneTemplateArg<double> y;         \
+  OneTemplateArg<char *> z;         \
+  TwoTemplateArgs<int, int> a;      \
+  TwoTemplateArgs<double, float> b; \
+  TwoTemplateArgs<short *, char> c;
+
+#if defined(FIRST) || defined(SECOND)
+template <class T> struct OneTemplateArg {};
+template <class T, class U> struct TwoTemplateArgs {};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+DECLS
+ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'TemplateSpecializationType::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace TemplateSpecializationType
 
 namespace TemplateArgument {
 #if defined(FIRST)
@@ -1205,7 +1634,43 @@
 // expected-error@second.h:* {{'TemplateArgument::S6' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'y'}}
 // expected-note@first.h:* {{but in 'FirstModule' found field 'x'}}
 #endif
-}
+
+#define DECLS                   \
+  OneClass<int> a;              \
+  OneInt<1> b;                  \
+  using c = OneClass<float>;    \
+  using d = OneInt<2>;          \
+  using e = OneInt<2 + 2>;      \
+  OneTemplateClass<OneClass> f; \
+  OneTemplateInt<OneInt> g;
+
+#if defined(FIRST) || defined(SECOND)
+template <class> struct OneClass{};
+template <int> struct OneInt{};
+template <template <class> class> struct OneTemplateClass{};
+template <template <int> class> struct OneTemplateInt{};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+DECLS
+ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'TemplateArgument::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace TemplateArgument
 
 namespace TemplateTypeParmType {
 #if defined(FIRST)
@@ -1247,7 +1712,41 @@
 // expected-error@first.h:* {{'TemplateTypeParmType::S2::type' from module 'FirstModule' is not present in definition of 'S2<T, U>' in module 'SecondModule'}}
 // expected-note@second.h:* {{declaration of 'type' does not match}}
 #endif
-}
+
+#define DECLS            \
+  T t;                   \
+  U u;                   \
+  ParameterPack<T> a;    \
+  ParameterPack<T, U> b; \
+  ParameterPack<U> c;    \
+  ParameterPack<U, T> d;
+
+#if defined(FIRST) || defined(SECOND)
+template <class ...Ts> struct ParameterPack {};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <class T, class U>
+struct Valid1 {
+  DECLS
+};
+#else
+using TemplateTypeParmType::Valid1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <class T, class U>
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+using TemplateTypeParmType::Invalid1;
+// expected-error@second.h:* {{'TemplateTypeParmType::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace TemplateTypeParmType
 
 namespace VarDecl {
 #if defined(FIRST)
@@ -1381,46 +1880,36 @@
 // expected-note@second.h:* {{declaration of 'x' does not match}}
 #endif
 
-#if defined(FIRST)
-template <typename T>
-struct S {
-  struct R {
-    void foo(T x = 0) {}
-  };
-};
-#elif defined(SECOND)
-template <typename T>
-struct S {
-  struct R {
-    void foo(T x = 1) {}
-  };
-};
-#else
-void run() {
-  S<int>::R().foo();
-}
-// expected-error@second.h:* {{'VarDecl::S::R' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'foo' with 1st parameter with a default argument}}
-// expected-note@first.h:* {{but in 'FirstModule' found method 'foo' with 1st parameter with a different default argument}}
+#define DECLS             \
+  static int a;           \
+  static I b;             \
+  static const int c = 1; \
+  static constexpr int d = 5;
+
+#if defined(FIRST) || defined(SECOND)
+using I = int;
 #endif
 
-#if defined(FIRST)
-template <typename alpha> struct Bravo {
-  void charlie(bool delta = false) {}
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
 };
-typedef Bravo<char> echo;
-echo foxtrot;
-#elif defined(SECOND)
-template <typename alpha> struct Bravo {
-  void charlie(bool delta = (false)) {}
-};
-typedef Bravo<char> echo;
-echo foxtrot;
 #else
-Bravo<char> golf;
-// expected-error@second.h:* {{'VarDecl::Bravo' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'charlie' with 1st parameter with a default argument}}
-// expected-note@first.h:* {{but in 'FirstModule' found method 'charlie' with 1st parameter with a different default argument}}
+Valid1 v1;
 #endif
-}
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'VarDecl::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace VarDecl
 
 namespace Friend {
 #if defined(FIRST)
@@ -1499,7 +1988,41 @@
 // expected-error@second.h:* {{'Friend::S5' has different definitions in different modules; first difference is definition in module 'SecondModule' found friend function 'T5b'}}
 // expected-note@first.h:* {{but in 'FirstModule' found friend function 'T5a'}}
 #endif
-}
+
+#define DECLS            \
+  friend class FriendA;  \
+  friend struct FriendB; \
+  friend FriendC;        \
+  friend const FriendD;  \
+  friend void Function();
+
+#if defined(FIRST) || defined(SECOND)
+class FriendA {};
+class FriendB {};
+class FriendC {};
+class FriendD {};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Friend::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
+}  // namespace Friend
 
 namespace TemplateParameters {
 #if defined(FIRST)
@@ -1575,6 +2098,38 @@
 // expected-error@second.h:* {{'TemplateParameters::S6' has different definitions in different modules; first difference is definition in module 'SecondModule' found unnamed template parameter}}
 // expected-note@first.h:* {{but in 'FirstModule' found template parameter 'A'}}
 #endif
+
+#define DECLS
+
+#if defined(FIRST) || defined(SECOND)
+template <class> class DefaultArg;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <int, class, template <class> class,
+          int A, class B, template <int> class C,
+          int D = 1, class E = int, template <class F> class = DefaultArg>
+struct Valid1 {
+  DECLS
+};
+#else
+using TemplateParameters::Valid1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+template <int, class, template <class> class,
+          int A, class B, template <int> class C,
+          int D = 1, class E = int, template <class F> class = DefaultArg>
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+using TemplateParameters::Invalid1;
+// expected-error@second.h:* {{'TemplateParameters::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace TemplateParameters
 
 namespace BaseClass {
@@ -1695,68 +2250,69 @@
 // expected-error@second.h:* {{'BaseClass::S10' has different definitions in different modules; first difference is definition in module 'SecondModule' found 1st base class 'BaseClass::B10a' with protected access specifier}}
 // expected-note@first.h:* {{but in 'FirstModule' found 1st base class 'BaseClass::B10a' with no access specifier}}
 #endif
+
+#define DECLS
+
+#if defined(FIRST) || defined(SECOND)
+struct Base1 {};
+struct Base2 {};
+struct Base3 {};
+struct Base4 {};
+struct Base5 {};
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 :
+  Base1, virtual Base2, protected Base3, public Base4, private Base5 {
+
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 :
+  Base1, virtual Base2, protected Base3, public Base4, private Base5 {
+
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'BaseClass::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+#undef DECLS
 }  // namespace BaseClass
 
-// Interesting cases that should not cause errors.  struct S should not error
-// while struct T should error at the access specifier mismatch at the end.
-namespace AllDecls {
-#define CREATE_ALL_DECL_STRUCT(NAME, ACCESS)               \
-  typedef int INT;                                         \
-  struct NAME {                                            \
-  public:                                                  \
-  private:                                                 \
-  protected:                                               \
-    static_assert(1 == 1, "Message");                      \
-    static_assert(2 == 2);                                 \
-                                                           \
-    int x;                                                 \
-    double y;                                              \
-                                                           \
-    INT z;                                                 \
-                                                           \
-    unsigned a : 1;                                        \
-    unsigned b : 2 * 2 + 5 / 2;                            \
-                                                           \
-    mutable int c = sizeof(x + y);                         \
-                                                           \
-    void method() {}                                       \
-    static void static_method() {}                         \
-    virtual void virtual_method() {}                       \
-    virtual void pure_virtual_method() = 0;                \
-    inline void inline_method() {}                         \
-    void volatile_method() volatile {}                     \
-    void const_method() const {}                           \
-                                                           \
-    typedef int typedef_int;                               \
-    using using_int = int;                                 \
-                                                           \
-    void method_one_arg(int x) {}                          \
-    void method_one_arg_default_argument(int x = 5 + 5) {} \
-    void method_decayed_type(int x[5]) {}                  \
-                                                           \
-    int constant_arr[5];                                   \
-                                                           \
-    ACCESS:                                                \
+
+// Collection of interesting cases below.
+
+// Naive parsing of AST can lead to cycles in processing.  Ensure
+// self-references don't trigger an endless cycles of AST node processing.
+namespace SelfReference {
+#if defined(FIRST)
+template <template <int> class T> class Wrapper {};
+
+template <int N> class S {
+  S(Wrapper<::SelfReference::S> &Ref) {}
+};
+
+struct Xx {
+  struct Yy {
   };
+};
 
-#if defined(FIRST)
-CREATE_ALL_DECL_STRUCT(S, public)
-#elif defined(SECOND)
-CREATE_ALL_DECL_STRUCT(S, public)
-#else
-S *s;
-#endif
+Xx::Xx::Xx::Yy yy;
 
-#if defined(FIRST)
-CREATE_ALL_DECL_STRUCT(T, private)
-#elif defined(SECOND)
-CREATE_ALL_DECL_STRUCT(T, public)
-#else
-T *t;
-// expected-error@second.h:* {{'AllDecls::T' has different definitions in different modules; first difference is definition in module 'SecondModule' found public access specifier}}
-// expected-note@first.h:* {{but in 'FirstModule' found private access specifier}}
-#endif
+namespace NNS {
+template <typename> struct Foo;
+template <template <class> class T = NNS::Foo>
+struct NestedNamespaceSpecifier {};
 }
+#endif
+}  // namespace SelfReference
 
 namespace FriendFunction {
 #if defined(FIRST)
@@ -1825,7 +2381,7 @@
 // expected-note@second.h:* {{but in 'SecondModule' found public access specifier}}
 #endif
 
-}  // namespace ImplicitDelc
+}  // namespace ImplicitDecl
 
 namespace TemplatedClass {
 #if defined(FIRST)
@@ -2051,7 +2607,7 @@
   S<int>::R().foo();
 }
 #endif
-}
+}  // namespace LateParsedDefaultArgument
 
 namespace LateParsedDefaultArgument {
 #if defined(FIRST)
@@ -2065,7 +2621,7 @@
 #elif defined(SECOND)
 #else
 #endif
-}
+}  // LateParsedDefaultArgument
 
 namespace DifferentParameterNameInTemplate {
 #if defined(FIRST) || defined(SECOND)
@@ -2111,7 +2667,7 @@
 #else
 Alpha::Alpha() {}
 #endif
-}
+}  // DifferentParameterNameInTemplate
 
 namespace ParameterTest {
 #if defined(FIRST)
@@ -2138,7 +2694,7 @@
 #else
 S<X> s;
 #endif
-}
+}  // ParameterTest
 
 namespace MultipleTypedefs {
 #if defined(FIRST)
@@ -2188,12 +2744,210 @@
 #else
 S3 s3;
 #endif
+}  // MultipleTypedefs
+
+namespace DefaultArguments {
+#if defined(FIRST)
+template <typename T>
+struct S {
+  struct R {
+    void foo(T x = 0);
+  };
+};
+#elif defined(SECOND)
+template <typename T>
+struct S {
+  struct R {
+    void foo(T x = 1);
+  };
+};
+#else
+void run() {
+  S<int>::R().foo();
+}
+// expected-error@second.h:* {{'DefaultArguments::S::R' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'foo' with 1st parameter with a default argument}}
+// expected-note@first.h:* {{but in 'FirstModule' found method 'foo' with 1st parameter with a different default argument}}
+#endif
+
+#if defined(FIRST)
+template <typename alpha> struct Bravo {
+  void charlie(bool delta = false);
+};
+typedef Bravo<char> echo;
+echo foxtrot;
+#elif defined(SECOND)
+template <typename alpha> struct Bravo {
+  void charlie(bool delta = (false));
+};
+typedef Bravo<char> echo;
+echo foxtrot;
+#else
+Bravo<char> golf;
+// expected-error@second.h:* {{'DefaultArguments::Bravo' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'charlie' with 1st parameter with a default argument}}
+// expected-note@first.h:* {{but in 'FirstModule' found method 'charlie' with 1st parameter with a different default argument}}
+#endif
+}  // namespace DefaultArguments
+
+namespace FunctionDecl {
+#if defined(FIRST)
+struct S1 {};
+S1 s1a;
+#elif defined(SECOND)
+struct S1 {};
+#else
+S1 s1;
+#endif
+
+#if defined(FIRST)
+struct S2 {
+  S2() = default;
+};
+S2 s2a = S2();
+#elif defined(SECOND)
+struct S2 {
+  S2() = default;
+};
+#else
+S2 s2;
+#endif
+
+#if defined(FIRST)
+struct S3 {
+  S3() = delete;
+};
+S3* s3c;
+#elif defined(SECOND)
+struct S3 {
+  S3() = delete;
+};
+#else
+S3* s3;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+int F1(int x, float y = 2.7) { return 1; }
+#else
+int I1 = F1(1);
+#endif
+
+#if defined(FIRST)
+int F2() { return 1; }
+#elif defined(SECOND)
+double F2() { return 1; }
+#else
+int I2 = F2();
+// expected-error@-1 {{call to 'F2' is ambiguous}}
+// expected-note@first.h:* {{candidate function}}
+// expected-note@second.h:* {{candidate function}}
+#endif
+
+#if defined(FIRST)
+int F3(float) { return 1; }
+#elif defined(SECOND)
+int F3(double) { return 1; }
+#else
+int I3 = F3(1);
+// expected-error@-1 {{call to 'F3' is ambiguous}}
+// expected-note@first.h:* {{candidate function}}
+// expected-note@second.h:* {{candidate function}}
+#endif
+
+#if defined(FIRST)
+int F4(int x) { return 1; }
+#elif defined(SECOND)
+int F4(int y) { return 1; }
+#else
+int I4 = F4(1);
+// expected-error@second.h:* {{'FunctionDecl::F4' has different definitions in different modules; definition in module 'SecondModule' first difference is 1st parameter with name 'y'}}
+// expected-note@first.h:* {{but in 'FirstModule' found 1st parameter with name 'x'}}
+#endif
+
+#if defined(FIRST)
+int F5(int x) { return 1; }
+#elif defined(SECOND)
+int F5(int x = 1) { return 1; }
+#else
+int I5 = F6(1);
+// expected-error@second.h:* {{'FunctionDecl::F5' has different definitions in different modules; definition in module 'SecondModule' first difference is 1st parameter without a default argument}}
+// expected-note@first.h:* {{but in 'FirstModule' found 1st parameter with a default argument}}
+#endif
+
+#if defined(FIRST)
+int F6(int x = 2) { return 1; }
+#elif defined(SECOND)
+int F6(int x = 1) { return 1; }
+#else
+int I6 = F6(1);
+// expected-error@second.h:* {{'FunctionDecl::F6' has different definitions in different modules; definition in module 'SecondModule' first difference is 1st parameter with a default argument}}
+// expected-note@first.h:* {{but in 'FirstModule' found 1st parameter with a different default argument}}
+#endif
+
+using I = int;
+#if defined(FIRST)
+I F7() { return 0; }
+#elif defined(SECOND)
+int F7() { return 0; }
+#else
+int I7 = F7();
+// expected-error@second.h:* {{'FunctionDecl::F7' has different definitions in different modules; definition in module 'SecondModule' first difference is return type is 'int'}}
+// expected-note@first.h:* {{but in 'FirstModule' found different return type 'FunctionDecl::I' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+int F8(int) { return 0; }
+#elif defined(SECOND)
+int F8(I) { return 0; }
+#else
+int I8 = F8(1);
+// expected-error@second.h:* {{'FunctionDecl::F8' has different definitions in different modules; definition in module 'SecondModule' first difference is 1st parameter with type 'FunctionDecl::I' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found 1st parameter with type 'int'}}
+#endif
+
+#if defined(FIRST)
+int F9(int[1]) { return 0; }
+#elif defined(SECOND)
+int F9(int[2]) { return 0; }
+#else
+int I9 = F9(nullptr);
+// expected-error@second.h:* {{'FunctionDecl::F9' has different definitions in different modules; definition in module 'SecondModule' first difference is 1st parameter with type 'int *' decayed from 'int [2]'}}
+// expected-note@first.h:* {{but in 'FirstModule' found 1st parameter with type 'int *' decayed from 'int [1]'}}
+#endif
+
+#if defined(FIRST)
+int F10() { return 1; }
+#elif defined(SECOND)
+int F10() { return 2; }
+#else
+int I10 = F10();
+#endif
+// expected-error@second.h:* {{'FunctionDecl::F10' has different definitions in different modules; definition in module 'SecondModule' first difference is function body}}
+// expected-note@first.h:* {{but in 'FirstModule' found a different body}}
+}  // namespace FunctionDecl
+
+namespace DeclTemplateArguments {
+#if defined(FIRST)
+int foo() { return 1; }
+int bar() { return foo(); }
+#elif defined(SECOND)
+template <class T = int>
+int foo() { return 2; }
+int bar() { return foo<>(); }
+#else
+int num = bar();
+// expected-error@second.h:* {{'DeclTemplateArguments::bar' has different definitions in different modules; definition in module 'SecondModule' first difference is function body}}
+// expected-note@first.h:* {{but in 'FirstModule' found a different body}}
+#endif
 }
 
 // Keep macros contained to one file.
 #ifdef FIRST
 #undef FIRST
 #endif
+
 #ifdef SECOND
 #undef SECOND
 #endif
+
+#ifdef ACCESS
+#undef ACCESS
+#endif
diff --git a/test/Modules/prune.m b/test/Modules/prune.m
index 58992f9..97a2fd7 100644
--- a/test/Modules/prune.m
+++ b/test/Modules/prune.m
@@ -8,8 +8,8 @@
 // Clear out the module cache
 // RUN: rm -rf %t
 // Run Clang twice so we end up creating the timestamp file (the second time).
-// RUN: %clang_cc1 -DIMPORT_DEPENDS_ON_MODULE -fmodules-ignore-macro=DIMPORT_DEPENDS_ON_MODULE -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t %s -verify
-// RUN: %clang_cc1 -DIMPORT_DEPENDS_ON_MODULE -fmodules-ignore-macro=DIMPORT_DEPENDS_ON_MODULE -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t %s -verify
+// RUN: %clang_cc1 -DIMPORT_DEPENDS_ON_MODULE -Wno-private-module -fmodules-ignore-macro=DIMPORT_DEPENDS_ON_MODULE -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t %s -verify
+// RUN: %clang_cc1 -DIMPORT_DEPENDS_ON_MODULE -Wno-private-module -fmodules-ignore-macro=DIMPORT_DEPENDS_ON_MODULE -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t %s -verify
 // RUN: ls %t | grep modules.timestamp
 // RUN: ls -R %t | grep ^Module.*pcm
 // RUN: ls -R %t | grep DependsOnModule.*pcm
@@ -17,7 +17,7 @@
 // Set the timestamp back more than two days. We should try to prune,
 // but nothing gets pruned because the module files are new enough.
 // RUN: touch -m -a -t 201101010000 %t/modules.timestamp 
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -Wno-private-module -F %S/Inputs -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
 // RUN: ls %t | grep modules.timestamp
 // RUN: ls -R %t | grep ^Module.*pcm
 // RUN: ls -R %t | grep DependsOnModule.*pcm
@@ -26,7 +26,7 @@
 // This shouldn't prune anything, because the timestamp has been updated, so
 // the pruning mechanism won't fire.
 // RUN: find %t -name DependsOnModule*.pcm | sed -e 's/\\/\//g' | xargs touch -a -t 201101010000
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -F %S/Inputs -Wno-private-module -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
 // RUN: ls %t | grep modules.timestamp
 // RUN: ls -R %t | grep ^Module.*pcm
 // RUN: ls -R %t | grep DependsOnModule.*pcm
@@ -35,7 +35,7 @@
 // This should trigger pruning, which will remove DependsOnModule but not Module.
 // RUN: touch -m -a -t 201101010000 %t/modules.timestamp 
 // RUN: find %t -name DependsOnModule*.pcm | sed -e 's/\\/\//g' | xargs touch -a -t 201101010000
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -F %S/Inputs -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -F %S/Inputs -Wno-private-module -fmodules-cache-path=%t -fmodules -fmodules-prune-interval=172800 -fmodules-prune-after=345600 %s -verify
 // RUN: ls %t | grep modules.timestamp
 // RUN: ls -R %t | grep ^Module.*pcm
 // RUN: ls -R %t | not grep DependsOnModule.*pcm
diff --git a/test/Modules/redefinition-c-tagtypes.m b/test/Modules/redefinition-c-tagtypes.m
index a01f11b..eb469e0 100644
--- a/test/Modules/redefinition-c-tagtypes.m
+++ b/test/Modules/redefinition-c-tagtypes.m
@@ -1,8 +1,8 @@
 // RUN: rm -rf %t.cache
 // RUN: %clang_cc1 -fsyntax-only %s -fmodules -fmodules-cache-path=%t.cache \
-// RUN:   -fimplicit-module-maps -F%S/Inputs -verify
+// RUN:   -fimplicit-module-maps -Wno-private-module -F%S/Inputs -verify
 // RUN: %clang_cc1 -fsyntax-only %s -fmodules -fmodules-cache-path=%t.cache \
-// RUN:   -fimplicit-module-maps -F%S/Inputs -DCHANGE_TAGS -verify
+// RUN:   -fimplicit-module-maps -Wno-private-module -F%S/Inputs -DCHANGE_TAGS -verify
 #include "F/F.h"
 
 #ifndef CHANGE_TAGS
diff --git a/test/Modules/requires-coroutines.mm b/test/Modules/requires-coroutines.mm
index 8e25a3c..4e9c9d1 100644
--- a/test/Modules/requires-coroutines.mm
+++ b/test/Modules/requires-coroutines.mm
@@ -1,6 +1,6 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify
-// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify -fcoroutines-ts -DCOROUTINES
+// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -Wno-private-module -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify
+// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -Wno-private-module -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify -fcoroutines-ts -DCOROUTINES
 
 #ifdef COROUTINES
 @import DependsOnModule.Coroutines;
diff --git a/test/Modules/requires.m b/test/Modules/requires.m
index d61de6b..d8f54b4 100644
--- a/test/Modules/requires.m
+++ b/test/Modules/requires.m
@@ -1,6 +1,7 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs %s -verify -fmodule-feature custom_req1
-
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs %s -verify -fmodule-feature custom_req1
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs %s -verify -std=c89 -DTEST_C_FEATURES
+#ifndef TEST_C_FEATURES
 // expected-error@DependsOnModule.framework/module.map:7 {{module 'DependsOnModule.CXX' requires feature 'cplusplus'}}
 @import DependsOnModule.CXX; // expected-note {{module imported here}}
 @import DependsOnModule.NotCXX;
@@ -15,3 +16,17 @@
 @import RequiresWithMissingHeader.HeaderBefore; // expected-note {{module imported here}}
 // expected-error@module.map:* {{module 'RequiresWithMissingHeader.HeaderAfter' requires feature 'missing'}}
 @import RequiresWithMissingHeader.HeaderAfter; // expected-note {{module imported here}}
+// expected-error@DependsOnModule.framework/module.map:40 {{module 'DependsOnModule.CXX11' requires feature 'cplusplus11'}}
+@import DependsOnModule.CXX11; // expected-note {{module imported here}}
+// expected-error@DependsOnModule.framework/module.map:43 {{module 'DependsOnModule.CXX14' requires feature 'cplusplus14'}}
+@import DependsOnModule.CXX14; // expected-note {{module imported here}}
+// expected-error@DependsOnModule.framework/module.map:46 {{module 'DependsOnModule.CXX17' requires feature 'cplusplus17'}}
+@import DependsOnModule.CXX17; // expected-note {{module imported here}}
+#else
+// expected-error@DependsOnModule.framework/module.map:49 {{module 'DependsOnModule.C99' requires feature 'c99'}}
+@import DependsOnModule.C99; // expected-note {{module imported here}}
+// expected-error@DependsOnModule.framework/module.map:52 {{module 'DependsOnModule.C11' requires feature 'c11'}}
+@import DependsOnModule.C11; // expected-note {{module imported here}}
+// expected-error@DependsOnModule.framework/module.map:55 {{module 'DependsOnModule.C17' requires feature 'c17'}}
+@import DependsOnModule.C17; // expected-note {{module imported here}}
+#endif
diff --git a/test/Modules/requires.mm b/test/Modules/requires.mm
index f90622e..b4237cb 100644
--- a/test/Modules/requires.mm
+++ b/test/Modules/requires.mm
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs/DependsOnModule.framework %s -verify
 
 @import DependsOnModule.CXX;
 // expected-error@module.map:11 {{module 'DependsOnModule.NotCXX' is incompatible with feature 'cplusplus'}}
diff --git a/test/Modules/shadow.m b/test/Modules/shadow.m
new file mode 100644
index 0000000..44320af
--- /dev/null
+++ b/test/Modules/shadow.m
@@ -0,0 +1,21 @@
+// RUN: rm -rf %t
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/shadow/A1 -I %S/Inputs/shadow/A2 %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -fmodule-map-file=%S/Inputs/shadow/A2/module.modulemap %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION
+// REDEFINITION: error: redefinition of module 'A'
+// REDEFINITION: note: previously defined
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -I %S/Inputs/shadow %s -verify
+
+@import A1;
+@import A2;
+@import A;
+
+#import "A2/A.h" // expected-note {{implicitly imported}}
+// expected-error@A2/module.modulemap:1 {{import of shadowed module 'A'}}
+// expected-note@A1/module.modulemap:1 {{previous definition}}
+
+#if defined(A2_A_h)
+#error got the wrong definition of module A
+#elif !defined(A1_A_h)
+#error missing definition from A1
+#endif
diff --git a/test/Modules/shadowed-submodule.m b/test/Modules/shadowed-submodule.m
new file mode 100644
index 0000000..c9c77bd
--- /dev/null
+++ b/test/Modules/shadowed-submodule.m
@@ -0,0 +1,5 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/shadowed-submodule/Foo -I %S/Inputs/shadowed-submodule/A2 %s -verify
+
+@import Foo; // expected-error {{module 'A' was built in directory}}
+             // expected-note@shadowed-submodule.m:4 {{imported by module 'Foo'}}
diff --git a/test/Modules/subframework-from-intermediate-path.m b/test/Modules/subframework-from-intermediate-path.m
index 394cc45..1543861 100644
--- a/test/Modules/subframework-from-intermediate-path.m
+++ b/test/Modules/subframework-from-intermediate-path.m
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
+// RUN: %clang_cc1 -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
 
 @import DependsOnModule;
 @import SubFramework; // expected-error{{module 'SubFramework' not found}}
diff --git a/test/Modules/subframeworks.m b/test/Modules/subframeworks.m
index 2108184..ce35415 100644
--- a/test/Modules/subframeworks.m
+++ b/test/Modules/subframeworks.m
@@ -1,6 +1,6 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
-// RUN: %clang_cc1 -x objective-c++ -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
+// RUN: %clang_cc1 -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
+// RUN: %clang_cc1 -x objective-c++ -Wauto-import -Wno-private-module -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -F %S/Inputs/DependsOnModule.framework/Frameworks %s -verify
 
 @import DependsOnModule;
 
diff --git a/test/Modules/templates.mm b/test/Modules/templates.mm
index 79e975a..9c3f327 100644
--- a/test/Modules/templates.mm
+++ b/test/Modules/templates.mm
@@ -77,10 +77,10 @@
   // CHECK: %[[l:.*]] = alloca %[[ListInt:[^ ]*]], align 8
   // CHECK: %[[r:.*]] = alloca %[[ListInt]], align 8
 
-  // CHECK: call {{.*}}memcpy{{.*}}(i8* %{{.*}}, i8* bitcast ({{.*}}* @_ZZ15testMixedStructvE1l to i8*), i64 16,
+  // CHECK: call {{.*}}memcpy{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* align {{[0-9]+}} bitcast ({{.*}}* @_ZZ15testMixedStructvE1l to i8*), i64 16,
   ListInt_left l{0, 1};
 
-  // CHECK: call {{.*}}memcpy{{.*}}(i8* %{{.*}}, i8* bitcast ({{.*}}* @_ZZ15testMixedStructvE1r to i8*), i64 16,
+  // CHECK: call {{.*}}memcpy{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* align {{[0-9]+}} bitcast ({{.*}}* @_ZZ15testMixedStructvE1r to i8*), i64 16,
   ListInt_right r{0, 2};
 
   // CHECK: call void @_Z10useListIntR4ListIiE(%[[ListInt]]* dereferenceable({{[0-9]+}}) %[[l]])
diff --git a/test/Modules/using-decl-friend-2.cpp b/test/Modules/using-decl-friend-2.cpp
new file mode 100644
index 0000000..0c71c37
--- /dev/null
+++ b/test/Modules/using-decl-friend-2.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -fmodules %s -verify
+// expected-no-diagnostics
+
+#pragma clang module build A
+module A {}
+#pragma clang module contents
+#pragma clang module begin A
+namespace N { class X; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build B
+module B {}
+#pragma clang module contents
+#pragma clang module begin B
+namespace N { class Friendly { friend class X; }; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build C
+module C {}
+#pragma clang module contents
+#pragma clang module begin C
+#pragma clang module import A
+void use_X(N::X *p);
+#pragma clang module import B
+// UsingShadowDecl names the friend declaration
+using N::X;
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import B
+namespace N { class AlsoFriendly { friend class X; }; }
+#pragma clang module import A
+#pragma clang module import C
+// The friend declaration from N::Friendly is now the first in the redecl
+// chain, so is not ordinarily visible. We need the IDNS of the UsingShadowDecl
+// to still consider it to be visible, though.
+X *p;
diff --git a/test/Modules/using-decl-friend.cpp b/test/Modules/using-decl-friend.cpp
new file mode 100644
index 0000000..f11d002
--- /dev/null
+++ b/test/Modules/using-decl-friend.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -fmodules %s -verify
+// expected-no-diagnostics
+
+#pragma clang module build A
+module A {}
+#pragma clang module contents
+#pragma clang module begin A
+namespace N {
+  class X;
+}
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build B
+module B {
+  module X {}
+  module Y {}
+}
+#pragma clang module contents
+#pragma clang module begin B.X
+namespace N {
+  class Friendly {
+    friend class X;
+  };
+}
+#pragma clang module end
+#pragma clang module begin B.Y
+namespace N {
+  class X;
+}
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import A
+#pragma clang module import B.X
+using N::X;
+X *p;
diff --git a/test/Modules/using-directive-redecl.cpp b/test/Modules/using-directive-redecl.cpp
new file mode 100644
index 0000000..94772f3
--- /dev/null
+++ b/test/Modules/using-directive-redecl.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -fmodules -fmodules-local-submodule-visibility -verify %s
+// expected-no-diagnostics
+#pragma clang module build M
+module M { module TDFNodes {} module TDFInterface {} }
+#pragma clang module contents
+  // TDFNodes
+  #pragma clang module begin M.TDFNodes
+  namespace Detail {
+     namespace TDF {
+        class TLoopManager {};
+     }
+  }
+  namespace Internal {
+     namespace TDF {
+        using namespace Detail::TDF;
+     }
+  }
+  #pragma clang module end
+
+  // TDFInterface
+  #pragma clang module begin M.TDFInterface
+    #pragma clang module import M.TDFNodes
+      namespace Internal {
+        namespace TDF {
+          using namespace Detail::TDF;
+        }
+      }
+  #pragma clang module end
+
+#pragma clang module endbuild
+
+#pragma clang module import M.TDFNodes
+namespace Internal {
+  namespace TDF {
+    TLoopManager * use;
+  }
+}
diff --git a/test/Modules/var-templates.cpp b/test/Modules/var-templates.cpp
new file mode 100644
index 0000000..eca2428
--- /dev/null
+++ b/test/Modules/var-templates.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fmodules -std=c++14 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+
+#pragma clang module build A
+module A {}
+#pragma clang module contents
+#pragma clang module begin A
+template<int> int n = 42;
+decltype(n<0>) f();
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module build B
+module B {}
+#pragma clang module contents
+#pragma clang module begin B
+#pragma clang module import A
+inline int f() { return n<0>; }
+#pragma clang module end
+#pragma clang module endbuild
+
+#pragma clang module import B
+
+// CHECK: @_Z1nILi0EE = linkonce_odr global i32 42, comdat
+int g() { return f(); }
diff --git a/test/OpenMP/atomic_ast_print.cpp b/test/OpenMP/atomic_ast_print.cpp
index f1fc7c8..c66249b 100644
--- a/test/OpenMP/atomic_ast_print.cpp
+++ b/test/OpenMP/atomic_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -44,7 +48,7 @@
 }
 
 // CHECK: T a = T();
-// CHECK-NEXT: #pragma omp atomic
+// CHECK-NEXT: #pragma omp atomic{{$}}
 // CHECK-NEXT: a++;
 // CHECK-NEXT: #pragma omp atomic read
 // CHECK-NEXT: a = argc;
diff --git a/test/OpenMP/atomic_capture_codegen.cpp b/test/OpenMP/atomic_capture_codegen.cpp
index 306b83f..51c65a0 100644
--- a/test/OpenMP/atomic_capture_codegen.cpp
+++ b/test/OpenMP/atomic_capture_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/atomic_codegen.cpp b/test/OpenMP/atomic_codegen.cpp
index 7f62a9b..c741116 100644
--- a/test/OpenMP/atomic_codegen.cpp
+++ b/test/OpenMP/atomic_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -x c++ -emit-llvm -std=c++98 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -x c++ -emit-llvm -std=c++11 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -x c++ -emit-llvm -std=c++98 %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -x c++ -emit-llvm -std=c++11 %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 int a;
diff --git a/test/OpenMP/atomic_messages.c b/test/OpenMP/atomic_messages.c
index 7b3178b..1234e85 100644
--- a/test/OpenMP/atomic_messages.c
+++ b/test/OpenMP/atomic_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int foo() {
 L1:
   foo();
diff --git a/test/OpenMP/atomic_messages.cpp b/test/OpenMP/atomic_messages.cpp
index efb368d..d7fa8c9 100644
--- a/test/OpenMP/atomic_messages.cpp
+++ b/test/OpenMP/atomic_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int foo() {
 L1:
   foo();
diff --git a/test/OpenMP/atomic_read_codegen.c b/test/OpenMP/atomic_read_codegen.c
index 0cfb2d2..55fd03b 100644
--- a/test/OpenMP/atomic_read_codegen.c
+++ b/test/OpenMP/atomic_read_codegen.c
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
diff --git a/test/OpenMP/atomic_update_codegen.cpp b/test/OpenMP/atomic_update_codegen.cpp
index 1343cd8..d314c24 100644
--- a/test/OpenMP/atomic_update_codegen.cpp
+++ b/test/OpenMP/atomic_update_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/atomic_write_codegen.c b/test/OpenMP/atomic_write_codegen.c
index 0c85b6e..242708c 100644
--- a/test/OpenMP/atomic_write_codegen.c
+++ b/test/OpenMP/atomic_write_codegen.c
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
@@ -137,7 +142,7 @@
   dx = dv;
 // CHECK: [[LD:%.+]] = load x86_fp80, x86_fp80*
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false)
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[BITCAST]], i8 0, i64 16, i1 false)
 // CHECK: store x86_fp80 [[LD]], x86_fp80* [[LDTEMP]]
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
 // CHECK: [[LD:%.+]] = load i128, i128* [[BITCAST]]
@@ -226,7 +231,7 @@
 // CHECK: load i64, i64*
 // CHECK: [[VAL:%.+]] = uitofp i64 %{{.+}} to x86_fp80
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP:%.+]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false)
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[BITCAST]], i8 0, i64 16, i1 false)
 // CHECK: store x86_fp80 [[VAL]], x86_fp80* [[TEMP]]
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP]] to i128*
 // CHECK: [[VAL:%.+]] = load i128, i128* [[BITCAST]]
diff --git a/test/OpenMP/barrier_ast_print.cpp b/test/OpenMP/barrier_ast_print.cpp
index fd03a9a..a8db217 100644
--- a/test/OpenMP/barrier_ast_print.cpp
+++ b/test/OpenMP/barrier_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -24,7 +28,7 @@
   return a + argc;
 }
 // CHECK:      static T a;
-// CHECK-NEXT: #pragma omp barrier
+// CHECK-NEXT: #pragma omp barrier{{$}}
 // CHECK:      static int a;
 // CHECK-NEXT: #pragma omp barrier
 // CHECK:      static char a;
diff --git a/test/OpenMP/barrier_codegen.cpp b/test/OpenMP/barrier_codegen.cpp
index f335935..aa3358c 100644
--- a/test/OpenMP/barrier_codegen.cpp
+++ b/test/OpenMP/barrier_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/barrier_messages.cpp b/test/OpenMP/barrier_messages.cpp
index 7d79445..137453a 100644
--- a/test/OpenMP/barrier_messages.cpp
+++ b/test/OpenMP/barrier_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 template <class T>
 T tmain(T argc) {
 #pragma omp barrier
diff --git a/test/OpenMP/cancel_ast_print.cpp b/test/OpenMP/cancel_ast_print.cpp
index be55084..b376d4e 100644
--- a/test/OpenMP/cancel_ast_print.cpp
+++ b/test/OpenMP/cancel_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -22,7 +26,7 @@
 }
 // CHECK-NEXT: #pragma omp sections
 // CHECK: {
-// CHECK: #pragma omp cancel sections
+// CHECK: #pragma omp cancel sections{{$}}
 // CHECK: }
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
diff --git a/test/OpenMP/cancel_codegen.cpp b/test/OpenMP/cancel_codegen.cpp
index 2ff9dcc..e2aff69 100644
--- a/test/OpenMP/cancel_codegen.cpp
+++ b/test/OpenMP/cancel_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/cancel_codegen_cleanup.cpp b/test/OpenMP/cancel_codegen_cleanup.cpp
index 5a297bd..94d4906 100644
--- a/test/OpenMP/cancel_codegen_cleanup.cpp
+++ b/test/OpenMP/cancel_codegen_cleanup.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
 
+// RUN: %clang_cc1 -std=c++11 -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 //CHECK: call i32 @__kmpc_cancel
 //CHECK: br {{.*}}label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 //CHECK: [[EXIT]]:
diff --git a/test/OpenMP/cancel_if_messages.cpp b/test/OpenMP/cancel_if_messages.cpp
index 426ac49..dc24fa4 100644
--- a/test/OpenMP/cancel_if_messages.cpp
+++ b/test/OpenMP/cancel_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/cancel_messages.cpp b/test/OpenMP/cancel_messages.cpp
index f8bbe2c..28f22d1 100644
--- a/test/OpenMP/cancel_messages.cpp
+++ b/test/OpenMP/cancel_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int main(int argc, char **argv) {
 #pragma omp cancellation       // expected-error {{expected an OpenMP directive}}
 #pragma omp cancel // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
diff --git a/test/OpenMP/cancellation_point_ast_print.cpp b/test/OpenMP/cancellation_point_ast_print.cpp
index c8b133c..d27d2b7 100644
--- a/test/OpenMP/cancellation_point_ast_print.cpp
+++ b/test/OpenMP/cancellation_point_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -14,7 +18,7 @@
 }
 // CHECK: #pragma omp parallel
 // CHECK-NEXT: {
-// CHECK-NEXT: #pragma omp cancellation point parallel
+// CHECK-NEXT: #pragma omp cancellation point parallel{{$}}
 // CHECK-NEXT: }
 #pragma omp sections
 {
diff --git a/test/OpenMP/cancellation_point_codegen.cpp b/test/OpenMP/cancellation_point_codegen.cpp
index de65c11..81a8421 100644
--- a/test/OpenMP/cancellation_point_codegen.cpp
+++ b/test/OpenMP/cancellation_point_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/cancellation_point_messages.cpp b/test/OpenMP/cancellation_point_messages.cpp
index 21b0e9c..8bf5b3d 100644
--- a/test/OpenMP/cancellation_point_messages.cpp
+++ b/test/OpenMP/cancellation_point_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int main(int argc, char **argv) {
 #pragma omp cancellation       // expected-error {{expected an OpenMP directive}}
 #pragma omp cancellation point // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
diff --git a/test/OpenMP/capturing_in_templates.cpp b/test/OpenMP/capturing_in_templates.cpp
index a4dcbee..97a9352 100644
--- a/test/OpenMP/capturing_in_templates.cpp
+++ b/test/OpenMP/capturing_in_templates.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 template <typename T1, typename T2>
@@ -15,7 +18,7 @@
 
 // CHECK-LABEL: @main
 int main(int argc, char **argv) {
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i32* null)
+// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null)
 #pragma omp target
  {
     for (int i = 0; i < 64; ++i) {
diff --git a/test/OpenMP/critical_ast_print.cpp b/test/OpenMP/critical_ast_print.cpp
index 0579d03..f51145e 100644
--- a/test/OpenMP/critical_ast_print.cpp
+++ b/test/OpenMP/critical_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -10,10 +14,10 @@
 
 // CHECK: template <typename T, int N> int tmain(T argc, char **argv)
 // CHECK: static int a;
-// CHECK-NEXT: #pragma omp critical
+// CHECK-NEXT: #pragma omp critical{{$}}
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: ++a;
-// CHECK-NEXT: #pragma omp critical (the_name) hint(N)
+// CHECK-NEXT: #pragma omp critical (the_name) hint(N){{$}}
 // CHECK-NEXT: foo();
 // CHECK-NEXT: return N;
 // CHECK: template<> int tmain<int, 4>(int argc, char **argv)
diff --git a/test/OpenMP/critical_codegen.cpp b/test/OpenMP/critical_codegen.cpp
index 964c91f..ba54005 100644
--- a/test/OpenMP/critical_codegen.cpp
+++ b/test/OpenMP/critical_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -78,7 +84,7 @@
 void parallel_critical() {
 #pragma omp parallel
 #pragma omp critical
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_critical({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
diff --git a/test/OpenMP/critical_messages.cpp b/test/OpenMP/critical_messages.cpp
index bb5ec00..a04d2e5 100644
--- a/test/OpenMP/critical_messages.cpp
+++ b/test/OpenMP/critical_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 int foo();
 
 template<typename T, int N>
diff --git a/test/OpenMP/debug-info-openmp-array.cpp b/test/OpenMP/debug-info-openmp-array.cpp
index 19bf2a2..e6de476 100644
--- a/test/OpenMP/debug-info-openmp-array.cpp
+++ b/test/OpenMP/debug-info-openmp-array.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 void f(int m) {
diff --git a/test/OpenMP/declare_reduction_ast_print.c b/test/OpenMP/declare_reduction_ast_print.c
index 7b97a7c..239b1cf 100644
--- a/test/OpenMP/declare_reduction_ast_print.c
+++ b/test/OpenMP/declare_reduction_ast_print.c
@@ -1,13 +1,17 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
 #define HEADER
 
 #pragma omp declare reduction(+ : int, char : omp_out *= omp_in)
-// CHECK: #pragma omp declare reduction (+ : int : omp_out *= omp_in)
+// CHECK: #pragma omp declare reduction (+ : int : omp_out *= omp_in){{$}}
 // CHECK-NEXT: #pragma omp declare reduction (+ : char : omp_out *= omp_in)
 
 #pragma omp declare reduction(fun : float : omp_out += omp_in) initializer(omp_priv = omp_orig + 15)
diff --git a/test/OpenMP/declare_reduction_ast_print.cpp b/test/OpenMP/declare_reduction_ast_print.cpp
index 816ff79..a8ca529 100644
--- a/test/OpenMP/declare_reduction_ast_print.cpp
+++ b/test/OpenMP/declare_reduction_ast_print.cpp
@@ -1,13 +1,17 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
 #define HEADER
 
 #pragma omp declare reduction(+ : int, char : omp_out *= omp_in)
-// CHECK: #pragma omp declare reduction (+ : int : omp_out *= omp_in)
+// CHECK: #pragma omp declare reduction (+ : int : omp_out *= omp_in){{$}}
 // CHECK-NEXT: #pragma omp declare reduction (+ : char : omp_out *= omp_in)
 
 template <class T>
diff --git a/test/OpenMP/declare_reduction_codegen.c b/test/OpenMP/declare_reduction_codegen.c
index 579b664..8ccb8cc 100644
--- a/test/OpenMP/declare_reduction_codegen.c
+++ b/test/OpenMP/declare_reduction_codegen.c
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c -emit-llvm %s -triple %itanium_abi_triple -o - -femit-all-decls -disable-llvm-passes | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple %itanium_abi_triple -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes
 // RUN: %clang_cc1 -fopenmp -x c -triple %itanium_abi_triple -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix=CHECK-LOAD %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -emit-llvm %s -triple %itanium_abi_triple -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple %itanium_abi_triple -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple %itanium_abi_triple -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 #ifndef HEADER
diff --git a/test/OpenMP/declare_reduction_codegen.cpp b/test/OpenMP/declare_reduction_codegen.cpp
index ae6e047..b52de52 100644
--- a/test/OpenMP/declare_reduction_codegen.cpp
+++ b/test/OpenMP/declare_reduction_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes
 // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix=CHECK-LOAD %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 #ifndef HEADER
diff --git a/test/OpenMP/declare_reduction_messages.c b/test/OpenMP/declare_reduction_messages.c
index fb9eacc..39387c7 100644
--- a/test/OpenMP/declare_reduction_messages.c
+++ b/test/OpenMP/declare_reduction_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int temp; // expected-note 6 {{'temp' declared here}}
 
 #pragma omp declare reduction                                              // expected-error {{expected '(' after 'declare reduction'}}
diff --git a/test/OpenMP/declare_reduction_messages.cpp b/test/OpenMP/declare_reduction_messages.cpp
index 3e5a15e..f22f924 100644
--- a/test/OpenMP/declare_reduction_messages.cpp
+++ b/test/OpenMP/declare_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 %s
+
 int temp; // expected-note 7 {{'temp' declared here}}
 
 #pragma omp declare reduction                                              // expected-error {{expected '(' after 'declare reduction'}}
diff --git a/test/OpenMP/declare_simd_ast_print.c b/test/OpenMP/declare_simd_ast_print.c
index 04fd73f..414b010 100644
--- a/test/OpenMP/declare_simd_ast_print.c
+++ b/test/OpenMP/declare_simd_ast_print.c
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -12,10 +16,10 @@
 #pragma omp declare simd notinbranch simdlen(2), uniform(s1, s2) linear(d: s1)
 void add_1(float *d, int s1, float *s2, double b[]) __attribute__((cold));
 
-// CHECK: #pragma omp declare simd notinbranch simdlen(2) uniform(s1, s2) linear(val(d): s1)
-// CHECK-NEXT: #pragma omp declare simd inbranch uniform(d) linear(val(s1): 32) linear(val(s2): 32)
-// CHECK-NEXT: #pragma omp declare simd simdlen(32) aligned(d) aligned(b)
-// CHECK-NEXT: #pragma omp declare simd aligned(b: 64)
+// CHECK: #pragma omp declare simd notinbranch simdlen(2) uniform(s1, s2) linear(val(d): s1){{$}}
+// CHECK-NEXT: #pragma omp declare simd inbranch uniform(d) linear(val(s1): 32) linear(val(s2): 32){{$}}
+// CHECK-NEXT: #pragma omp declare simd simdlen(32) aligned(d) aligned(b){{$}}
+// CHECK-NEXT: #pragma omp declare simd aligned(b: 64){{$}}
 // CHECK-NEXT: void add_1(float *d, int s1, float *s2, double b[]) __attribute__((cold))
 
 #endif
diff --git a/test/OpenMP/declare_simd_ast_print.cpp b/test/OpenMP/declare_simd_ast_print.cpp
index 632e038..c09f8b4 100644
--- a/test/OpenMP/declare_simd_ast_print.cpp
+++ b/test/OpenMP/declare_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -80,8 +84,8 @@
 // CHECK-NEXT: float taddpf(float *a, T *&b) {
 // CHECK-NEXT: return *a + *b;
 // CHECK-NEXT: }
-// CHECK: #pragma omp declare simd
-// CHECK-NEXT: #pragma omp declare simd
+// CHECK: #pragma omp declare simd uniform(this, b)
+// CHECK-NEXT: #pragma omp declare simd{{$}}
 // CHECK-NEXT: int tadd(int b) {
 // CHECK-NEXT: return this->x[b] + b;
 // CHECK-NEXT: }
diff --git a/test/OpenMP/declare_simd_codegen.cpp b/test/OpenMP/declare_simd_codegen.cpp
index 47e9519..4ac0b7f 100644
--- a/test/OpenMP/declare_simd_codegen.cpp
+++ b/test/OpenMP/declare_simd_codegen.cpp
@@ -1,18 +1,38 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
+void add_1(float *d);
+
+#pragma omp declare simd linear(d : 8)
+#pragma omp declare simd inbranch simdlen(32)
+#pragma omp declare simd notinbranch
+void add_1(float *d);
+
 #pragma omp declare simd linear(d : 8)
 #pragma omp declare simd inbranch simdlen(32)
 #pragma omp declare simd notinbranch
 void add_1(float *d) {}
 
+void add_1(float *d);
+
+#pragma omp declare simd linear(d : 8)
+#pragma omp declare simd inbranch simdlen(32)
+#pragma omp declare simd notinbranch
+void add_2(float *d);
+
 #pragma omp declare simd aligned(hp, hp2)
 template <class C>
 void h(C *hp, C *hp2, C *hq, C *lin) {
+  add_2(0);
 }
 
 // Explicit specialization with <C=int>.
@@ -110,6 +130,7 @@
 // CHECK-DAG: define {{.+}}@_Z3bax2VVPdi(
 // CHECK-DAG: define {{.+}}@_Z3fooPffi(
 // CHECK-DAG: define {{.+}}@_Z3food(
+// CHECK-DAG: declare {{.+}}@_Z5add_2Pf(
 
 // CHECK-DAG: "_ZGVbM4l8__Z5add_1Pf"
 // CHECK-DAG: "_ZGVbN4l8__Z5add_1Pf"
@@ -277,6 +298,23 @@
 // CHECK-DAG: "_ZGVeM16ua16vl1__Z3fooPffi"
 // CHECK-DAG: "_ZGVeN16ua16vl1__Z3fooPffi"
 
+// CHECK-DAG: "_ZGVbM4l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVbN4l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVcM8l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVcN8l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVdM8l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVdN8l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVeM16l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVeN16l8__Z5add_2Pf"
+// CHECK-DAG: "_ZGVbM32v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVcM32v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVdM32v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVeM32v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVbN2v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVcN4v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVdN4v__Z5add_2Pf"
+// CHECK-DAG: "_ZGVeN8v__Z5add_2Pf"
+
 // CHECK-DAG: "_ZGVbN2v__Z3food"
 // CHECK-DAG: "_ZGVcN4v__Z3food"
 // CHECK-DAG: "_ZGVdN4v__Z3food"
diff --git a/test/OpenMP/declare_simd_messages.cpp b/test/OpenMP/declare_simd_messages.cpp
index af46283..dab7d05 100644
--- a/test/OpenMP/declare_simd_messages.cpp
+++ b/test/OpenMP/declare_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple=x86_64-pc-win32 -verify -fopenmp -x c++ -std=c++11 -fms-extensions -Wno-pragma-pack %s
 
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -verify -fopenmp-simd -x c++ -std=c++11 -fms-extensions -Wno-pragma-pack %s
+
 // expected-error@+1 {{expected an OpenMP directive}}
 #pragma omp declare
 
diff --git a/test/OpenMP/declare_target_ast_print.cpp b/test/OpenMP/declare_target_ast_print.cpp
index 55f73cb..bd1acc2 100644
--- a/test/OpenMP/declare_target_ast_print.cpp
+++ b/test/OpenMP/declare_target_ast_print.cpp
@@ -1,17 +1,21 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
 #define HEADER
 
 #pragma omp declare target
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp declare target{{$}}
 void foo() {}
 // CHECK-NEXT: void foo()
 #pragma omp end declare target
-// CHECK: #pragma omp end declare target
+// CHECK: #pragma omp end declare target{{$}}
 
 extern "C" {
 #pragma omp declare target
@@ -83,46 +87,41 @@
 void f1() {
 }
 #pragma omp declare target (a1, f1)
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: int a1;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: void f1()
-// CHECK: #pragma omp end declare target
+// CHECK: #pragma omp end declare target{{$}}
 
 int b1, b2, b3;
 void f2() {
 }
 #pragma omp declare target to(b1) to(b2), to(b3, f2)
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: int b1;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: int b2;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: int b3;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target{{$}}
 // CHECK: void f2()
-// CHECK: #pragma omp end declare target
+// CHECK: #pragma omp end declare target{{$}}
 
 int c1, c2, c3;
-void f3() {
-}
-#pragma omp declare target link(c1) link(c2), link(c3, f3)
-// CHECK: #pragma omp declare target link
+#pragma omp declare target link(c1) link(c2), link(c3)
+// CHECK: #pragma omp declare target link{{$}}
 // CHECK: int c1;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target link
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target link{{$}}
 // CHECK: int c2;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target link
+// CHECK: #pragma omp end declare target{{$}}
+// CHECK: #pragma omp declare target link{{$}}
 // CHECK: int c3;
-// CHECK: #pragma omp end declare target
-// CHECK: #pragma omp declare target link
-// CHECK: void f3()
-// CHECK: #pragma omp end declare target
+// CHECK: #pragma omp end declare target{{$}}
 
 struct SSSt {
 #pragma omp declare target
diff --git a/test/OpenMP/declare_target_messages.cpp b/test/OpenMP/declare_target_messages.cpp
index bbffc0e..3286a29 100644
--- a/test/OpenMP/declare_target_messages.cpp
+++ b/test/OpenMP/declare_target_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -fnoopenmp-use-tls -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -fnoopenmp-use-tls -ferror-limit 100 -o - %s
+
 #pragma omp end declare target // expected-error {{unexpected OpenMP directive '#pragma omp end declare target'}}
 
 int a, b; // expected-warning {{declaration is not declared in any declare target region}}
@@ -13,8 +15,16 @@
 
 #pragma omp declare target map(a) // expected-error {{unexpected 'map' clause, only 'to' or 'link' clauses expected}}
 
+#pragma omp declare target to(foo1) // expected-error {{use of undeclared identifier 'foo1'}}
+
+#pragma omp declare target link(foo2) // expected-error {{use of undeclared identifier 'foo2'}}
+
 void c(); // expected-warning {{declaration is not declared in any declare target region}}
 
+void func() {} // expected-note {{'func' defined here}}
+
+#pragma omp declare target link(func) // expected-error {{function name is not allowed in 'link' clause}}
+
 extern int b;
 
 struct NonT {
@@ -23,6 +33,33 @@
 
 typedef int sint;
 
+template <typename T>
+T bla1() { return 0; }
+
+#pragma omp declare target
+template <typename T>
+T bla2() { return 0; }
+#pragma omp end declare target
+
+template<>
+float bla2() { return 1.0; }
+
+#pragma omp declare target
+void blub2() {
+  bla2<float>();
+  bla2<int>();
+}
+#pragma omp end declare target
+
+void t2() {
+#pragma omp target
+  {
+    bla2<float>();
+    bla2<long>();
+  }
+}
+
+
 #pragma omp declare target // expected-note {{to match this '#pragma omp declare target'}}
 #pragma omp threadprivate(a) // expected-note {{defined as threadprivate or thread local}}
 extern int b;
diff --git a/test/OpenMP/distribute_ast_print.cpp b/test/OpenMP/distribute_ast_print.cpp
index d4a9df9..f675814 100644
--- a/test/OpenMP/distribute_ast_print.cpp
+++ b/test/OpenMP/distribute_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -83,7 +87,7 @@
   static T a;
 // CHECK: static T a;
 #pragma omp distribute
-// CHECK-NEXT: #pragma omp distribute
+// CHECK-NEXT: #pragma omp distribute{{$}}
   for (int i=0; i < 2; ++i)a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
diff --git a/test/OpenMP/distribute_codegen.cpp b/test/OpenMP/distribute_codegen.cpp
index 7ea17b1..ea73679 100644
--- a/test/OpenMP/distribute_codegen.cpp
+++ b/test/OpenMP/distribute_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_collapse_messages.cpp b/test/OpenMP/distribute_collapse_messages.cpp
index 4897e69..b852fd2 100644
--- a/test/OpenMP/distribute_collapse_messages.cpp
+++ b/test/OpenMP/distribute_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_dist_schedule_ast_print.cpp b/test/OpenMP/distribute_dist_schedule_ast_print.cpp
index b06a56e..de5e59a 100644
--- a/test/OpenMP/distribute_dist_schedule_ast_print.cpp
+++ b/test/OpenMP/distribute_dist_schedule_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -14,7 +18,7 @@
   static T a;
 // CHECK: static T a;
 #pragma omp distribute dist_schedule(static,10)
-// CHECK-NEXT: #pragma omp distribute dist_schedule(static, 10)
+// CHECK-NEXT: #pragma omp distribute dist_schedule(static, 10){{$}}
   for (int i=0; i < 2; ++i)a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
diff --git a/test/OpenMP/distribute_dist_schedule_messages.cpp b/test/OpenMP/distribute_dist_schedule_messages.cpp
index 98652a2..9f91f41 100644
--- a/test/OpenMP/distribute_dist_schedule_messages.cpp
+++ b/test/OpenMP/distribute_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_firstprivate_codegen.cpp b/test/OpenMP/distribute_firstprivate_codegen.cpp
index 718fc87..29d44fd 100644
--- a/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -63,16 +79,13 @@
     #pragma omp teams
 #pragma omp distribute firstprivate(g, g1, svar, sfvar)
     for (int i = 0; i < 2; ++i) {
-      // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
-      // LAMBDA-32: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], double*{{.*}} [[G1_IN:%.+]], i32*{{.*}} [[SVAR_IN:%.+]], float*{{.*}} [[SFVAR_IN:%.+]])
       // Private alloca's for conversion
-      // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
-      // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
-      // LAMBDA: [[TMP:%.+]] = alloca double*,
 
       // Actual private variables to be used in the body (tmp is used for the reference type)
       // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
@@ -82,31 +95,25 @@
       // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
 
       // Store input parameter addresses into private alloca's for conversion
-      // LAMBDA-64: store i{{[0-9]+}} [[G_IN]], i{{[0-9]+}}* [[G_ADDR]],
-      // LAMBDA-32: store double* [[G_IN]], double** [[G_ADDR]],
-      // LAMBDA: store i{{[0-9]+}} [[G1_IN]], i{{[0-9]+}}* [[G1_ADDR]],
-      // LAMBDA: store i{{[0-9]+}} [[SVAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]],
-      // LAMBDA: store i{{[0-9]+}} [[SFVAR_IN]], i{{[0-9]+}}* [[SFVAR_ADDR]],
+      // LAMBDA: store double* [[G_IN]], double** [[G_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_ADDR]],
 
-      // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast i{{[0-9]+}}* [[G_ADDR]] to double*
-      // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]],
-      // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast i{{[0-9]+}}* [[G1_ADDR]] to double*
-      // LAMBDA-DAG: store double* [[G1_CONV]], double** [[G1_REF]],
-      // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[SVAR_ADDR]] to i{{[0-9]+}}*
-      // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[SFVAR_ADDR]] to float*
-      // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load double*, double** [[G1_REF]],
-      // LAMBDA-DAG: store double* [[G1_REF_VAL]], double** [[TMP]],
-      // LAMBDA-64-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_CONV]],
-      // LAMBDA-32-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_ADDR_VAL]],
+      // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]],
+      // LAMBDA-DAG: [[SVAR_ADDR_VAL:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+      // LAMBDA-DAG: [[SFVAR_ADDR_VAL:%.+]] = load float*, float** [[SFVAR_ADDR]],
+      // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_ADDR]],
+      // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF]],
+      // LAMBDA-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_ADDR_VAL]],
       // LAMBDA-DAG: store double [[G_CONV_VAL]], double* [[G_PRIVATE]],
-      // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[TMP]],
+      // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[G1_REF]],
       // LAMBDA-DAG: [[TMP_VAL_VAL:%.+]] = load{{.*}} double, double* [[TMP_VAL]],
       // LAMBDA-DAG: store double [[TMP_VAL_VAL]], double* [[G1_PRIVATE]],
       // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]],
-      // LAMBDA-64-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_CONV]],
-      // LAMBDA-32-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR]],
+      // LAMBDA-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR_VAL]],
       // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_CONV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE]],
-      // LAMBDA-DAG: [[SFVAR_CONV_VAL:%.+]] = load float, float* [[SFVAR_CONV]],
+      // LAMBDA-DAG: [[SFVAR_CONV_VAL:%.+]] = load float, float* [[SFVAR_ADDR_VAL]],
       // LAMBDA-DAG: store float [[SFVAR_CONV_VAL]], float* [[SFVAR_PRIVATE]],
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
       g += 1;
@@ -204,16 +211,18 @@
 // CHECK: ret
 
 // CHECK: define{{.+}} [[OFFLOAD_FUN]](
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void
 // CHECK: ret
 //
-// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
+// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]])
 
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
 
 // discard omp loop variables
@@ -222,6 +231,7 @@
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
 
 // CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -230,23 +240,22 @@
 // CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 
-// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
-// CHECK: store i{{[0-9]+}} [[SVAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
 
 // init t_var
-// CHECK-64-DAG: [[T_VAR_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_ADDR]] to i{{[0-9]+}}*
-// CHECK-64-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK-DAG: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
+// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
 
 // init vec
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}*
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}*
-// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}})
+// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}})
 
 // init s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
@@ -261,7 +270,7 @@
 // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ]
 // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}}
 // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}}
-// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}}
 // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}}
 // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]]
@@ -274,14 +283,13 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}}
 // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}}
-// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}})
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
 // CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
 // init svar
-// CHECK-64-DAG: [[SVAR_ADDR_CONV:%.+]] = bitcast{{.+}} [[SVAR_ADDR]] to{{.+}}
-// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR_CONV]],
-// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]],
-// CHECK-DAG: store{{.+}} [[SVAR_CONV_VAL]],{{.+}} [[SVAR_PRIV]],
+// CHECK-DAG: [[SVAR_REF:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]],
+// CHECK-DAG: [[SVAR_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_REF]],
+// CHECK-DAG: store{{.+}} [[SVAR_VAL]],{{.+}} [[SVAR_PRIV]],
 
 // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
 // CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}},
@@ -301,12 +309,14 @@
 // CHECK: ret
 
 // CHECK: define{{.+}} [[OFFLOAD_FUN_1]](
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void
 // CHECK: ret
 //
-// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]])
+// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]])
 
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
@@ -318,6 +328,7 @@
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
 
 // CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -325,22 +336,21 @@
 // CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
 
-// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 // CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
 
 // init t_var
-// CHECK-64-DAG: [[T_VAR_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_ADDR]] to i{{[0-9]+}}*
-// CHECK-64-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK-DAG: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
+// CHECK-DAG: store i{{[0-9]+}} [[T_VAR]], i{{[0-9]+}}* [[T_VAR_PRIV]],
 
 // init vec
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}*
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}*
-// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}})
+// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}})
 
 // init s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
@@ -355,7 +365,7 @@
 // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ]
 // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}}
 // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}}
-// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}}
 // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}}
 // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]]
@@ -368,7 +378,7 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}}
 // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}}
-// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}})
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
 
 // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
diff --git a/test/OpenMP/distribute_firstprivate_messages.cpp b/test/OpenMP/distribute_firstprivate_messages.cpp
index a59c952..ea0e7b4 100644
--- a/test/OpenMP/distribute_firstprivate_messages.cpp
+++ b/test/OpenMP/distribute_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_lastprivate_codegen.cpp b/test/OpenMP/distribute_lastprivate_codegen.cpp
index abcbfd8..66a860b 100644
--- a/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -74,6 +90,7 @@
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
       // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
       // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
@@ -202,6 +219,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -238,7 +256,7 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
 // CHECK: call void @__kmpc_for_static_fini(
 
 // lastprivates
@@ -251,7 +269,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -262,7 +280,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -271,7 +289,7 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
 // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
 // CHECK: ret void
@@ -303,6 +321,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -339,7 +358,7 @@
 // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
 // CHECK: call void @__kmpc_for_static_fini(
 
 // lastprivates
@@ -352,7 +371,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -363,7 +382,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -372,6 +391,6 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
 #endif
diff --git a/test/OpenMP/distribute_parallel_for_ast_print.cpp b/test/OpenMP/distribute_parallel_for_ast_print.cpp
index 54a3649..58e6eb8 100644
--- a/test/OpenMP/distribute_parallel_for_ast_print.cpp
+++ b/test/OpenMP/distribute_parallel_for_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -std=c++11 -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -std=c++11 -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -37,7 +41,7 @@
   }
 };
 
-// CHECK: #pragma omp distribute parallel for private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp distribute parallel for private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp distribute parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp distribute parallel for private(this->a) private(this->a) private(this->S::a)
 
@@ -82,7 +86,7 @@
 // CHECK-NEXT: a = 2;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N)
+#pragma omp distribute parallel for private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N)
   for (int i = 0; i < 2; ++i)
     for (int j = 0; j < 2; ++j)
       for (int j = 0; j < 2; ++j)
@@ -93,8 +97,8 @@
       for (int j = 0; j < 2; ++j)
         for (int j = 0; j < 2; ++j)
           for (int j = 0; j < 2; ++j)
-	    a++;
-  // CHECK: #pragma omp distribute parallel for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N)
+            a++;
+  // CHECK: #pragma omp distribute parallel for private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N)
   // CHECK-NEXT: for (int i = 0; i < 2; ++i)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
diff --git a/test/OpenMP/distribute_parallel_for_codegen.cpp b/test/OpenMP/distribute_parallel_for_codegen.cpp
index 62c38f1..a7700a3 100644
--- a/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -6,12 +6,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -28,6 +44,7 @@
   #pragma omp teams
   #pragma omp distribute parallel for
   for (int i = 0; i < n; ++i) {
+    #pragma omp cancel for
     a[i] = b[i] + c[i];
   }
 
diff --git a/test/OpenMP/distribute_parallel_for_collapse_messages.cpp b/test/OpenMP/distribute_parallel_for_collapse_messages.cpp
index 41976a6..f01dfee 100644
--- a/test/OpenMP/distribute_parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_copyin_messages.cpp b/test/OpenMP/distribute_parallel_for_copyin_messages.cpp
index 7d70341..5182f24 100644
--- a/test/OpenMP/distribute_parallel_for_copyin_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_default_messages.cpp b/test/OpenMP/distribute_parallel_for_default_messages.cpp
index 3437bd5..cb9cbcf 100644
--- a/test/OpenMP/distribute_parallel_for_default_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, int N>
diff --git a/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp b/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp
index 0f5820e..adcef0b 100644
--- a/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index d12c0aa..e4915ac 100644
--- a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -5,12 +5,27 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -63,17 +78,14 @@
     #pragma omp teams
     #pragma omp distribute parallel for firstprivate(g, g1, svar, sfvar)
     for (int i = 0; i < 2; ++i) {
-      // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
-      // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
 
       // addr alloca's
-      // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
-      // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
-      // LAMBDA: [[TMP:%.+]] = alloca double*,
 
       // private alloca's
       // LAMBDA: [[G_PRIV:%.+]] = alloca double,
@@ -88,31 +100,27 @@
       // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
 
+      // LAMBDA-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
+      // LAMBDA-DAG: [[G1_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G1_ADDR]]
+      // LAMBDA-DAG: [[SVAR_CONV:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]]
+      // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = load {{.+}}*, {{.+}}** [[SFVAR_ADDR]]
+
       // init private alloca's with addr alloca's
       // g
-      // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
-      // LAMBDA-32-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
       // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]],
       // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
 
       // g1
-      // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
-      // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}** [[G1_REF]],
-      // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
-      // LAMBDA-DAG: store {{.+}}* [[G1_REF_VAL]], {{.+}}** [[TMP]],
-      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]],
+      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
       // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
       // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
       // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
 
       // svar
-      // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
-      // LAMBDA-64-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
-      // LAMBDA-32-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
+      // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
       // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]],
 
       // sfvar
-      // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
       // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CONV]],
       // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]],
 
@@ -271,17 +279,19 @@
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 
 // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED_0:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void
 // CHECK: ret
 
-// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
+// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]])
 
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
 // addr alloca's
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
 
 // skip loop alloca's
@@ -310,16 +320,15 @@
 
 // init private alloca's with addr alloca's
 // t-var
-// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
-// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_REF:%.+]] = load {{.+}}, {{.+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_REF]],
 // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
 
 // vec
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
 
 // s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
@@ -336,14 +345,13 @@
 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
 
 // svar
-// CHECK-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
-// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
-// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]],
+// CHECK-DAG: [[SVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]],
+// CHECK-DAG: [[SVAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_REF]],
+// CHECK-DAG: store {{.+}} [[SVAR]], {{.+}}* [[SVAR_PRIV]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // pass private alloca's to fork
@@ -413,7 +421,7 @@
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
 
 // s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
@@ -430,7 +438,7 @@
 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
@@ -456,13 +464,15 @@
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
 // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void
 // CHECK: ret
 
-// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
+// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
 
 // addr alloca's
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
@@ -492,16 +502,15 @@
 
 // init private alloca's with addr alloca's
 // t-var
-// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
-// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]],
 // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
 
 // vec
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
 
 // s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
@@ -518,7 +527,7 @@
 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
@@ -582,7 +591,7 @@
 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
 
 // s_arr
 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
@@ -599,7 +608,7 @@
 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
diff --git a/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp
index 3e288c3..63f4dbe 100644
--- a/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -42,7 +44,7 @@
 };
 class S5 {
   int a;
-  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}}
 
 public:
   S5() : a(0) {}
@@ -50,7 +52,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -150,9 +152,10 @@
 #pragma omp distribute parallel for firstprivate(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute parallel for lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel private(i)
@@ -314,14 +317,16 @@
 #pragma omp distribute parallel for firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute parallel for lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
diff --git a/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index 9ee909e..7e7f24f 100644
--- a/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_parallel_for_if_messages.cpp b/test/OpenMP/distribute_parallel_for_if_messages.cpp
index 5e9d9ad..85e78d0 100644
--- a/test/OpenMP/distribute_parallel_for_if_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index 2e8da79..dda8c93 100644
--- a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -74,6 +90,7 @@
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
       // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
       // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
@@ -141,6 +158,7 @@
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
 
       // private alloca's
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
@@ -162,7 +180,7 @@
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
 
       // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
-      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
 
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
 
@@ -273,6 +291,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -330,7 +349,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -341,7 +360,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -350,7 +369,7 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
 // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
 // CHECK: ret void
@@ -371,6 +390,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -421,7 +441,7 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
 
 // CHECK: call void @__kmpc_for_static_fini(
 
@@ -435,7 +455,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -446,7 +466,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -455,7 +475,7 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
 // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
 // CHECK: ret void
@@ -486,6 +506,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -528,7 +549,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -539,7 +560,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -548,7 +569,7 @@
 // CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
 // CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}})
 // CHECK: ret void
 
 // outlined function for 'parallel for'
@@ -566,6 +587,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -613,7 +635,7 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
 
 // CHECK: call void @__kmpc_for_static_fini(
 
@@ -627,7 +649,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -638,7 +660,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -647,7 +669,7 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
 
 #endif
diff --git a/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp
index ebc7a66..c8eaad8 100644
--- a/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -313,14 +315,16 @@
 #pragma omp distribute parallel for lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp distribute parallel for firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
   static int si;
diff --git a/test/OpenMP/distribute_parallel_for_messages.cpp b/test/OpenMP/distribute_parallel_for_messages.cpp
index 35a61df..5ea73a8 100644
--- a/test/OpenMP/distribute_parallel_for_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
@@ -38,7 +40,7 @@
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for
+#pragma omp distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp distribute parallel for'}}
   for (int i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -116,3 +118,14 @@
     ;
 }
 
+void test_cancel() {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for
+  for (int i = 0; i < 16; ++i)
+    for (int j = 0; j < 16; ++j) {
+#pragma omp cancel for
+    ;
+    }
+}
+
diff --git a/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 89e9905..21d0949 100644
--- a/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_parallel_for_num_threads_messages.cpp b/test/OpenMP/distribute_parallel_for_num_threads_messages.cpp
index 7939514..110f5d7 100644
--- a/test/OpenMP/distribute_parallel_for_num_threads_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index d8d8a9a..a731031 100644
--- a/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_parallel_for_private_messages.cpp b/test/OpenMP/distribute_parallel_for_private_messages.cpp
index 465357a..75fe47a 100644
--- a/test/OpenMP/distribute_parallel_for_private_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 958b4e7..88ecdca 100644
--- a/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_parallel_for_proc_bind_messages.cpp b/test/OpenMP/distribute_parallel_for_proc_bind_messages.cpp
index 9898f9d..febf4b0 100644
--- a/test/OpenMP/distribute_parallel_for_proc_bind_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp b/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
new file mode 100644
index 0000000..dc2a3ca
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
@@ -0,0 +1,84 @@
+// Test host code gen
+
+// RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+
+template <typename T>
+T tmain(T &r) {
+  int n = 1000;  
+  // schedule: dynamic chunk
+  #pragma omp target map(tofrom:r)
+  #pragma omp teams
+  #pragma omp distribute parallel for reduction(+:r)
+  for (int i = 0; i < n; ++i)
+    r += (T)i;  
+
+  return r;
+}
+
+int main() {
+  int n = 1000;
+  int r = 0;
+  #pragma omp target map(tofrom:r)
+  #pragma omp teams
+  #pragma omp distribute parallel for reduction(+:r)
+  for (int i = 0; i < n; ++i)
+    r += i;
+
+  return tmain<int>(r);
+}
+
+// CHECK-LABEL: main
+// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call void [[OFFL:@.+]](
+// CHECK: call{{.+}} [[TMAIN:@.+]](i{{32|64}}
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFL]](
+// CHECK: call{{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}} [[TEOUTL:@.+]] to{{.+}}
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TEOUTL]](
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} [[PAROUTL:@.+]] to{{.+}}
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[PAROUTL]](
+// CHECK: call{{.+}} @__kmpc_reduce_nowait(
+// CHECK: call{{.+}} @__kmpc_end_reduce_nowait(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]](i{{32|64}}
+// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call void [[TOFFL:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[TOFFL]](
+// CHECK: call{{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}} [[TEMPLTEOUTL:@.+]] to{{.+}}
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TEMPLTEOUTL]](
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} [[TPAROUTL:@.+]] to{{.+}}
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TPAROUTL]](
+// CHECK: call{{.+}} @__kmpc_reduce_nowait(
+// CHECK: call{{.+}} @__kmpc_end_reduce_nowait(
+// CHECK: ret void
+
+#endif // HEADER
diff --git a/test/OpenMP/distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
index 0bf1bc0..21a1eb4 100644
--- a/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_schedule_messages.cpp b/test/OpenMP/distribute_parallel_for_schedule_messages.cpp
index 6363cd7..1f521d4 100644
--- a/test/OpenMP/distribute_parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_shared_messages.cpp b/test/OpenMP/distribute_parallel_for_shared_messages.cpp
index d5725e7..01c582c 100644
--- a/test/OpenMP/distribute_parallel_for_shared_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 
 struct S1; // expected-note 2 {{declared here}}
 extern S1 a;
diff --git a/test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp
index 2d99d7d..0c94f5b 100644
--- a/test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp
index 6ed3f21..fff6f96 100644
--- a/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -28,17 +32,18 @@
       ++this->a.a;
   }
   S7 &operator=(S7 &s) {
+    int k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd private(a) private(this->a)
-    for (int k = 0; k < s.a.a; ++k)
+#pragma omp distribute parallel for simd private(a) private(this->a) linear(k)
+    for (k = 0; k < s.a.a; ++k)
       ++s.a.a;
     return *this;
   }
 };
 
-// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(T::a)
-// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a)
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(T::a){{$}}
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) linear(k)
 // CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(this->S::a)
 
 class S8 : public S7<S> {
@@ -82,7 +87,7 @@
 // CHECK-NEXT: a = 2;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N)
+#pragma omp distribute parallel for simd private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N)
   for (int i = 0; i < 2; ++i)
     for (int j = 0; j < 2; ++j)
       for (int j = 0; j < 2; ++j)
@@ -93,8 +98,8 @@
       for (int j = 0; j < 2; ++j)
         for (int j = 0; j < 2; ++j)
           for (int j = 0; j < 2; ++j)
-	    a++;
-  // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N)
+            a++;
+  // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N)
   // CHECK-NEXT: for (int i = 0; i < 2; ++i)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
@@ -135,14 +140,15 @@
   // CHECK-NEXT: for (int j = 0; j < 10; ++j)
   // CHECK-NEXT: a++;
 
+  int i;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd aligned(x:8) linear(h:2) safelen(8) simdlen(8)
-  for (int i = 0; i < 100; i++)
+#pragma omp distribute parallel for simd aligned(x:8) linear(i:2) safelen(8) simdlen(8)
+  for (i = 0; i < 100; i++)
     for (int j = 0; j < 200; j++)
       a += h + x[j];
-  // CHECK: #pragma omp distribute parallel for simd aligned(x: 8) linear(h: 2) safelen(8) simdlen(8)
-  // CHECK-NEXT: for (int i = 0; i < 100; i++)
+  // CHECK: #pragma omp distribute parallel for simd aligned(x: 8) linear(i: 2) safelen(8) simdlen(8)
+  // CHECK-NEXT: for (i = 0; i < 100; i++)
   // CHECK-NEXT: for (int j = 0; j < 200; j++)
   // CHECK-NEXT: a += h + x[j];
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
new file mode 100644
index 0000000..129d90b
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -0,0 +1,2278 @@
+// Test host code gen
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+
+template <typename T>
+T tmain() {
+  T *a, *b, *c;
+  int n = 10000;
+  int ch = 100;
+
+  // no schedule clauses
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // dist_schedule: static no chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd dist_schedule(static)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // dist_schedule: static chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd dist_schedule(static, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // schedule: static no chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd schedule(static)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // schedule: static chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd schedule(static, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // schedule: dynamic no chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd schedule(dynamic)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  // schedule: dynamic chunk
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd schedule(dynamic, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+  }
+
+  return T();
+}
+
+int main() {
+  double *a, *b, *c;
+  int n = 10000;
+  int ch = 100;
+
+#ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
+
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
+
+    // no schedule clauses
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
+      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+      // check EUB for distribute
+      // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE]]:
+      // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[EUB_END:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE]]:
+      // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END]]
+      // LAMBDA-DAG: [[EUB_END]]:
+      // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+      // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
+
+      // check exit condition
+      // LAMBDA: [[OMP_JUMP_BACK]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[DIST_BODY]]:
+      // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
+      // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
+      // check that distlb and distub are properly passed to fork_call
+      // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+      // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+      // LAMBDA: br label %[[DIST_INC:.+]]
+
+      // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+      // LAMBDA: [[DIST_INC]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+      // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+      // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_JUMP_BACK]]
+
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+
+      // implementation of 'parallel for'
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+      // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+      // In this case we use EUB
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+      // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+      // LAMBDA: [[PF_EUB_TRUE]]:
+      // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[PF_EUB_END:.+]]
+      // LAMBDA-DAG: [[PF_EUB_FALSE]]:
+      // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: br label %[[PF_EUB_END]]
+      // LAMBDA-DAG: [[PF_EUB_END]]:
+      // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+      // check exit condition
+      // LAMBDA: [[OMP_PF_JUMP_BACK]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[PF_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label {{.+}}
+
+      // check stride 1 for 'for' in 'distribute parallel for simd'
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+      // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
+
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // dist_schedule: static no chunk (same sa default - no dist_schedule)
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd dist_schedule(static)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
+      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+      // check EUB for distribute
+      // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE]]:
+      // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[EUB_END:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE]]:
+      // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END]]
+      // LAMBDA-DAG: [[EUB_END]]:
+      // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+      // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
+
+      // check exit condition
+      // LAMBDA: [[OMP_JUMP_BACK]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[DIST_BODY]]:
+      // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
+      // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
+      // check that distlb and distub are properly passed to fork_call
+      // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+      // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+      // LAMBDA: br label %[[DIST_INC:.+]]
+
+      // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+      // LAMBDA: [[DIST_INC]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+      // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+      // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_JUMP_BACK]]
+
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+
+      // implementation of 'parallel for'
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+      // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+      // In this case we use EUB
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+      // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+      // LAMBDA: [[PF_EUB_TRUE]]:
+      // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[PF_EUB_END:.+]]
+      // LAMBDA-DAG: [[PF_EUB_FALSE]]:
+      // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: br label %[[PF_EUB_END]]
+      // LAMBDA-DAG: [[PF_EUB_END]]:
+      // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+      // check exit condition
+      // LAMBDA: [[OMP_PF_JUMP_BACK]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[PF_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label {{.+}}
+
+      // check stride 1 for 'for' in 'distribute parallel for simd'
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+      // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
+
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // dist_schedule: static chunk
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd dist_schedule(static, ch)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
+      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
+      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
+      // check EUB for distribute
+      // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE]]:
+      // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[EUB_END:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE]]:
+      // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END]]
+      // LAMBDA-DAG: [[EUB_END]]:
+      // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+      // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+
+      // check exit condition
+      // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
+
+      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
+      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
+      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
+      // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+      // check that distlb and distub are properly passed to fork_call
+      // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+      // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+      // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
+
+      // check DistInc
+      // LAMBDA: [[DIST_INNER_LOOP_INC]]:
+      // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+      // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
+      // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
+
+      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
+      // check NextLB and NextUB
+      // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+      // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
+      // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+      // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
+      // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
+      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
+
+      // outer loop exit
+      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+
+      // skip implementation of 'parallel for': using default scheduling and was tested above
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // schedule: static no chunk
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd schedule(static)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
+      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+      // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
+      // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+      // LAMBDA: ret
+
+      // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+      // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+      // In this case we use EUB
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+      // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+      // LAMBDA: [[PF_EUB_TRUE]]:
+      // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+      // LAMBDA: br label %[[PF_EUB_END:.+]]
+      // LAMBDA-DAG: [[PF_EUB_FALSE]]:
+      // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA: br label %[[PF_EUB_END]]
+      // LAMBDA-DAG: [[PF_EUB_END]]:
+      // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+      // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+      // initialize omp.iv
+      // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+      // check exit condition
+      // LAMBDA: [[OMP_PF_JUMP_BACK]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+      // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+      // check that PrevLB and PrevUB are passed to the 'for'
+      // LAMBDA: [[PF_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label {{.+}}
+
+      // check stride 1 for 'for' in 'distribute parallel for simd'
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+      // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
+
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // schedule: static chunk
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd schedule(static, ch)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+      // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
+      // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+      // LAMBDA: ret
+
+      // 'parallel for' implementation using outer and inner loops and PrevEUB
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+      // check PrevEUB (using PrevUB instead of NumIt as upper bound)
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
+      // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
+      // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
+      // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+      // LAMBDA: [[PF_EUB_TRUE]]:
+      // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA: br label %[[PF_EUB_END:.+]]
+      // LAMBDA-DAG: [[PF_EUB_FALSE]]:
+      // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+      // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
+      // LAMBDA: br label %[[PF_EUB_END]]
+      // LAMBDA-DAG: [[PF_EUB_END]]:
+      // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
+      // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
+      // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
+      // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
+      // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
+
+      // initialize omp.iv (IV = LB)
+      // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+
+      // outer loop: while (IV < UB) {
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+      // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
+      // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+      // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // skip body branch
+      // LAMBDA: br{{.+}}
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+      // IV = IV + 1 and inner loop latch
+      // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+      // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
+
+      // check NextLB and NextUB
+      // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
+      // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+      // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
+      // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+      // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+      // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
+      // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
+      // LAMBDA-DAG: call void @__kmpc_for_static_fini(
+      // LAMBDA: ret
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // schedule: dynamic no chunk
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd schedule(dynamic)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+      // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
+      // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+      // LAMBDA: ret
+
+      // 'parallel for' implementation using outer and inner loops and PrevEUB
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
+      // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+      // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+      // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+      // initialize omp.iv (IV = LB)
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
+      // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+      // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // skip body branch
+      // LAMBDA: br{{.+}}
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+      // IV = IV + 1 and inner loop latch
+      // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+      // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+      // check NextLB and NextUB
+      // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
+      // LAMBDA: ret
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+
+    // schedule: dynamic chunk
+    #pragma omp target
+    #pragma omp teams
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
+
+    #pragma omp distribute parallel for simd schedule(dynamic, ch)
+    for (int i = 0; i < n; ++i) {
+      a[i] = b[i] + c[i];
+      // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
+      // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+      // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
+      // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+      // LAMBDA: ret
+
+      // 'parallel for' implementation using outer and inner loops and PrevEUB
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+      // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+      // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+      // initialize lb and ub to PrevLB and PrevUB
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+      // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+      // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+      // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+      // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+      // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+      // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
+      // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+      // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+      // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+      // initialize omp.iv (IV = LB)
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
+      // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+      // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+      // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+      // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+      // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
+      // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+      // skip body branch
+      // LAMBDA: br{{.+}}
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+      // IV = IV + 1 and inner loop latch
+      // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
+      // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+      // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+      // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+      // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+      // check NextLB and NextUB
+      // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
+      // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+      // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
+      // LAMBDA: ret
+      [&]() {
+	a[i] = b[i] + c[i];
+      }();
+    }
+  }();
+  return 0;
+#else
+  // CHECK-LABEL: @main
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
+
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
+
+  // CHECK: call{{.+}} [[TMAIN:@.+]]()
+
+  // no schedule clauses
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+    // check EUB for distribute
+    // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+    // CHECK-DAG: [[EUB_TRUE]]:
+    // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[EUB_END:.+]]
+    // CHECK-DAG: [[EUB_FALSE]]:
+    // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END]]
+    // CHECK-DAG: [[EUB_END]]:
+    // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+    // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_JUMP_BACK:.+]]
+
+    // check exit condition
+    // CHECK: [[OMP_JUMP_BACK]]:
+    // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[DIST_BODY]]:
+    // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+    // check that distlb and distub are properly passed to fork_call
+    // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+    // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+    // CHECK: br label %[[DIST_INC:.+]]
+
+    // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+    // CHECK: [[DIST_INC]]:
+    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+    // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+    // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_JUMP_BACK]]
+
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+
+    // implementation of 'parallel for'
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+    // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+    // In this case we use EUB
+    // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+    // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+    // CHECK: [[PF_EUB_TRUE]]:
+    // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[PF_EUB_END:.+]]
+    // CHECK-DAG: [[PF_EUB_FALSE]]:
+    // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: br label %[[PF_EUB_END]]
+    // CHECK-DAG: [[PF_EUB_END]]:
+    // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+    // check exit condition
+    // CHECK: [[OMP_PF_JUMP_BACK]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+    // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[PF_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label {{.+}}
+
+    // check stride 1 for 'for' in 'distribute parallel for simd'
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+    // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+  }
+
+  // dist_schedule: static no chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd dist_schedule(static)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+    // check EUB for distribute
+    // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+    // CHECK-DAG: [[EUB_TRUE]]:
+    // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[EUB_END:.+]]
+    // CHECK-DAG: [[EUB_FALSE]]:
+    // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END]]
+    // CHECK-DAG: [[EUB_END]]:
+    // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+    // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_JUMP_BACK:.+]]
+
+    // check exit condition
+    // CHECK: [[OMP_JUMP_BACK]]:
+    // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[DIST_BODY]]:
+    // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+    // check that distlb and distub are properly passed to fork_call
+    // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+    // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+    // CHECK: br label %[[DIST_INC:.+]]
+
+    // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+    // CHECK: [[DIST_INC]]:
+    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+    // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+    // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_JUMP_BACK]]
+
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+
+    // implementation of 'parallel for'
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+    // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+    // In this case we use EUB
+    // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+    // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+    // CHECK: [[PF_EUB_TRUE]]:
+    // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[PF_EUB_END:.+]]
+    // CHECK-DAG: [[PF_EUB_FALSE]]:
+    // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: br label %[[PF_EUB_END]]
+    // CHECK-DAG: [[PF_EUB_END]]:
+    // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+    // check exit condition
+    // CHECK: [[OMP_PF_JUMP_BACK]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+    // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[PF_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label {{.+}}
+
+    // check stride 1 for 'for' in 'distribute parallel for simd'
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+    // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+  }
+
+  // dist_schedule: static chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd dist_schedule(static, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+    // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
+    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
+
+    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
+    // check EUB for distribute
+    // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+    // CHECK-DAG: [[EUB_TRUE]]:
+    // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[EUB_END:.+]]
+    // CHECK-DAG: [[EUB_FALSE]]:
+    // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END]]
+    // CHECK-DAG: [[EUB_END]]:
+    // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+    // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+
+    // check exit condition
+    // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
+
+    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
+    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
+
+    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
+    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
+    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
+    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[DIST_INNER_LOOP_BODY]]:
+    // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+    // check that distlb and distub are properly passed to fork_call
+    // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+    // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+    // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
+
+    // check DistInc
+    // CHECK: [[DIST_INNER_LOOP_INC]]:
+    // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+    // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
+    // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
+
+    // CHECK: [[DIST_OUTER_LOOP_INC]]:
+    // check NextLB and NextUB
+    // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+    // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
+    // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
+    // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+    // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
+    // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
+    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
+
+    // outer loop exit
+    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+
+    // skip implementation of 'parallel for': using default scheduling and was tested above
+  }
+
+  // schedule: static no chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd schedule(static)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+    // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
+    // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+    // CHECK: ret
+
+    // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+    // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+    // In this case we use EUB
+    // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+    // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+    // CHECK: [[PF_EUB_TRUE]]:
+    // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+    // CHECK: br label %[[PF_EUB_END:.+]]
+    // CHECK-DAG: [[PF_EUB_FALSE]]:
+    // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK: br label %[[PF_EUB_END]]
+    // CHECK-DAG: [[PF_EUB_END]]:
+    // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+    // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+    // initialize omp.iv
+    // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+    // check exit condition
+    // CHECK: [[OMP_PF_JUMP_BACK]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+    // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+    // check that PrevLB and PrevUB are passed to the 'for'
+    // CHECK: [[PF_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label {{.+}}
+
+    // check stride 1 for 'for' in 'distribute parallel for simd'
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+    // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+  }
+
+  // schedule: static chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd schedule(static, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+    // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
+    // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+    // CHECK: ret
+
+    // 'parallel for' implementation using outer and inner loops and PrevEUB
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+    // check PrevEUB (using PrevUB instead of NumIt as upper bound)
+    // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+    // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
+    // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
+    // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
+    // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+    // CHECK: [[PF_EUB_TRUE]]:
+    // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK: br label %[[PF_EUB_END:.+]]
+    // CHECK-DAG: [[PF_EUB_FALSE]]:
+    // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+    // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
+    // CHECK: br label %[[PF_EUB_END]]
+    // CHECK-DAG: [[PF_EUB_END]]:
+    // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
+    // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
+    // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
+    // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
+    // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
+
+    // initialize omp.iv (IV = LB)
+    // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+
+    // outer loop: while (IV < UB) {
+    // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+    // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+    // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
+
+    // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+    // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+    // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // skip body branch
+    // CHECK: br{{.+}}
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+    // IV = IV + 1 and inner loop latch
+    // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+    // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
+
+    // check NextLB and NextUB
+    // CHECK: [[OMP_PF_INNER_LOOP_END]]:
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+    // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+    // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
+    // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+    // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+    // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
+    // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+    // CHECK-DAG: call void @__kmpc_for_static_fini(
+    // CHECK: ret
+  }
+
+  // schedule: dynamic no chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd schedule(dynamic)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+    // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
+    // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+    // CHECK: ret
+
+    // 'parallel for' implementation using outer and inner loops and PrevEUB
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+    // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+    // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+    // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+    // initialize omp.iv (IV = LB)
+    // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+    // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+    // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+    // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+    // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // skip body branch
+    // CHECK: br{{.+}}
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+    // IV = IV + 1 and inner loop latch
+    // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+    // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+    // check NextLB and NextUB
+    // CHECK: [[OMP_PF_INNER_LOOP_END]]:
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+    // CHECK: ret
+  }
+
+  // schedule: dynamic chunk
+  #pragma omp target
+  #pragma omp teams
+  // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
+
+  #pragma omp distribute parallel for simd schedule(dynamic, ch)
+  for (int i = 0; i < n; ++i) {
+    a[i] = b[i] + c[i];
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
+    // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+    // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
+    // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+    // CHECK: ret
+
+    // 'parallel for' implementation using outer and inner loops and PrevEUB
+    // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+    // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+    // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+    // initialize lb and ub to PrevLB and PrevUB
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+    // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+    // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+    // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+    // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+    // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+    // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+    // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+    // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+    // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+    // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+    // initialize omp.iv (IV = LB)
+    // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+    // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+    // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+    // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+    // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+    // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+    // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+    // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+    // skip body branch
+    // CHECK: br{{.+}}
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+    // IV = IV + 1 and inner loop latch
+    // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+    // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+    // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+    // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+    // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+    // check NextLB and NextUB
+    // CHECK: [[OMP_PF_INNER_LOOP_END]]:
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+    // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+    // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+    // CHECK: ret
+  }
+
+  return tmain<int>();
+#endif
+}
+
+// check code
+// CHECK: define{{.+}} [[TMAIN]]()
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_3:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_4:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_5:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_6:@.+]](
+
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_7:@.+]](
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+// check EUB for distribute
+// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+// CHECK-DAG: [[EUB_TRUE]]:
+// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[EUB_END:.+]]
+// CHECK-DAG: [[EUB_FALSE]]:
+// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END]]
+// CHECK-DAG: [[EUB_END]]:
+// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_JUMP_BACK:.+]]
+
+// check exit condition
+// CHECK: [[OMP_JUMP_BACK]]:
+// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[DIST_BODY]]:
+// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+// check that distlb and distub are properly passed to fork_call
+// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+// CHECK: br label %[[DIST_INC:.+]]
+
+// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+// CHECK: [[DIST_INC]]:
+// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_JUMP_BACK]]
+
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// implementation of 'parallel for'
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+// In this case we use EUB
+// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+// CHECK: [[PF_EUB_TRUE]]:
+// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[PF_EUB_END:.+]]
+// CHECK-DAG: [[PF_EUB_FALSE]]:
+// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: br label %[[PF_EUB_END]]
+// CHECK-DAG: [[PF_EUB_END]]:
+// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+// check exit condition
+// CHECK: [[OMP_PF_JUMP_BACK]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[PF_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label {{.+}}
+
+// check stride 1 for 'for' in 'distribute parallel for simd'
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+
+// check EUB for distribute
+// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+// CHECK-DAG: [[EUB_TRUE]]:
+// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[EUB_END:.+]]
+// CHECK-DAG: [[EUB_FALSE]]:
+// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END]]
+// CHECK-DAG: [[EUB_END]]:
+// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_JUMP_BACK:.+]]
+
+// check exit condition
+// CHECK: [[OMP_JUMP_BACK]]:
+// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[DIST_BODY]]:
+// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+// check that distlb and distub are properly passed to fork_call
+// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+// CHECK: br label %[[DIST_INC:.+]]
+
+// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
+// CHECK: [[DIST_INC]]:
+// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_JUMP_BACK]]
+
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// implementation of 'parallel for'
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+// In this case we use EUB
+// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+// CHECK: [[PF_EUB_TRUE]]:
+// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[PF_EUB_END:.+]]
+// CHECK-DAG: [[PF_EUB_FALSE]]:
+// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: br label %[[PF_EUB_END]]
+// CHECK-DAG: [[PF_EUB_END]]:
+// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+// check exit condition
+// CHECK: [[OMP_PF_JUMP_BACK]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[PF_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label {{.+}}
+
+// check stride 1 for 'for' in 'distribute parallel for simd'
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
+// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
+
+// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
+// check EUB for distribute
+// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+// CHECK-DAG: [[EUB_TRUE]]:
+// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[EUB_END:.+]]
+// CHECK-DAG: [[EUB_FALSE]]:
+// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END]]
+// CHECK-DAG: [[EUB_END]]:
+// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+
+// check exit condition
+// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
+
+// CHECK: [[DIST_OUTER_LOOP_BODY]]:
+// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
+
+// CHECK: [[DIST_INNER_LOOP_HEADER]]:
+// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
+// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[DIST_INNER_LOOP_BODY]]:
+// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+// check that distlb and distub are properly passed to fork_call
+// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
+// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
+// CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
+
+// check DistInc
+// CHECK: [[DIST_INNER_LOOP_INC]]:
+// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
+
+// CHECK: [[DIST_OUTER_LOOP_INC]]:
+// check NextLB and NextUB
+// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
+// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
+// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
+// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
+// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
+
+// outer loop exit
+// CHECK: [[DIST_OUTER_LOOP_END]]:
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// skip implementation of 'parallel for': using default scheduling and was tested above
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
+// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
+// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
+// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+// CHECK: ret
+
+// 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+
+// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
+// In this case we use EUB
+// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
+// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
+// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+// CHECK: [[PF_EUB_TRUE]]:
+// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
+// CHECK: br label %[[PF_EUB_END:.+]]
+// CHECK-DAG: [[PF_EUB_FALSE]]:
+// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK: br label %[[PF_EUB_END]]
+// CHECK-DAG: [[PF_EUB_END]]:
+// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
+// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
+
+// check exit condition
+// CHECK: [[OMP_PF_JUMP_BACK]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[PF_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label {{.+}}
+
+// check stride 1 for 'for' in 'distribute parallel for simd'
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
+// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_JUMP_BACK]]
+
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
+// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+// CHECK: ret
+
+// 'parallel for' implementation using outer and inner loops and PrevEUB
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+// check PrevEUB (using PrevUB instead of NumIt as upper bound)
+// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
+// CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
+// CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
+// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
+// CHECK: [[PF_EUB_TRUE]]:
+// CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK: br label %[[PF_EUB_END:.+]]
+// CHECK-DAG: [[PF_EUB_FALSE]]:
+// CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
+// CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
+// CHECK: br label %[[PF_EUB_END]]
+// CHECK-DAG: [[PF_EUB_END]]:
+// CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
+// CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
+// CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
+// CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
+// CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
+
+// initialize omp.iv (IV = LB)
+// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+
+// outer loop: while (IV < UB) {
+// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
+// CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
+
+// CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// skip body branch
+// CHECK: br{{.+}}
+// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+// IV = IV + 1 and inner loop latch
+// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
+
+// check NextLB and NextUB
+// CHECK: [[OMP_PF_INNER_LOOP_END]]:
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+// CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+// CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+// CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
+// CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
+// CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+// CHECK-DAG: call void @__kmpc_for_static_fini(
+// CHECK: ret
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
+// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+// CHECK: ret
+
+// 'parallel for' implementation using outer and inner loops and PrevEUB
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+// initialize omp.iv (IV = LB)
+// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// skip body branch
+// CHECK: br{{.+}}
+// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+// IV = IV + 1 and inner loop latch
+// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+// check NextLB and NextUB
+// CHECK: [[OMP_PF_INNER_LOOP_END]]:
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+// CHECK: ret
+
+// CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
+// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
+// CHECK: ret
+
+// 'parallel for' implementation using outer and inner loops and PrevEUB
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
+// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
+// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
+
+// initialize lb and ub to PrevLB and PrevUB
+// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
+// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
+// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
+// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
+// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
+// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
+// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
+// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
+// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
+// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
+
+// initialize omp.iv (IV = LB)
+// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
+// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
+// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
+// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
+
+// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
+// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
+// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
+
+// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
+// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
+// skip body branch
+// CHECK: br{{.+}}
+// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
+
+// IV = IV + 1 and inner loop latch
+// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
+// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
+// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
+// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
+
+// check NextLB and NextUB
+// CHECK: [[OMP_PF_INNER_LOOP_END]]:
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
+// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
+
+// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
+// CHECK: ret
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp
index 8690b4a..b12dcc1 100644
--- a/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp
index 2d1a3dc..2d72925 100644
--- a/test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
index 5c32306..3014d03 100644
--- a/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, int N>
diff --git a/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp
index 2e3ee2b..f029b0e 100644
--- a/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..1447b5f
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -0,0 +1,626 @@
+// RxUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd firstprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+
+      // addr alloca's
+      // LAMBDA: [[G_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
+
+      // private alloca's
+      // LAMBDA: [[G_PRIV:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIV:%.+]] = alloca double,
+      // LAMBDA: [[TMP_PRIV:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIV:%.+]] = alloca float,
+
+      // transfer input parameters into addr alloca's
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+      // init private alloca's with addr alloca's
+      // g
+      // LAMBDA-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
+      // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]],
+      // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
+
+      // g1
+      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
+      // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
+      // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
+      // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
+
+      // svar
+      // LAMBDA-DAG: [[SVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]],
+      // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_REF]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]],
+
+      // sfvar
+      // LAMBDA-DAG: [[SFVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SFVAR_ADDR]],
+      // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_REF]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // pass firstprivate parameters to parallel outlined function
+      // g
+      // LAMBDA-64-DAG: [[G_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_PRIV]],
+      // LAMBDA-64: [[G_CAST_CONV:%.+]] = bitcast {{.+}}* [[G_CAST:%.+]] to
+      // LAMBDA-64-DAG: store {{.+}} [[G_PRIV_VAL]], {{.+}}* [[G_CAST_CONV]],
+      // LAMBDA-64-DAG: [[G_PAR:%.+]] = load {{.+}}, {{.+}}* [[G_CAST]],
+
+      // g1
+      // LAMBDA-DAG: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV]],
+      // LAMBDA-DAG: [[G1_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV_VAL]],
+      // LAMBDA: [[G1_CAST_CONV:%.+]] = bitcast {{.+}}* [[G1_CAST:%.+]] to
+      // LAMBDA-DAG: store {{.+}} [[G1_PRIV_VAL]], {{.+}}* [[G1_CAST_CONV]],
+      // LAMBDA-DAG: [[G1_PAR:%.+]] = load {{.+}}, {{.+}}* [[G1_CAST]],
+
+      // svar
+      // LAMBDA: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
+      // LAMBDA-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
+      // LAMBDA-64-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
+      // LAMBDA-32-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
+      // LAMBDA-DAG: [[SVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
+
+      // sfvar
+      // LAMBDA: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_PRIV]],
+      // LAMBDA-DAG: [[SFVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_CAST:%.+]] to
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_CAST_CONV]],
+      // LAMBDA-DAG: [[SFVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CAST]],
+
+      // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PAR]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
+      // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIV]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: ret void
+
+
+      // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
+      // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
+      // skip initial params
+      // LAMBDA: {{.+}} = alloca{{.+}},
+      // LAMBDA: {{.+}} = alloca{{.+}},
+      // LAMBDA: {{.+}} = alloca{{.+}},
+      // LAMBDA: {{.+}} = alloca{{.+}},
+
+      // addr alloca's
+      // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
+
+      // private alloca's (only for 32-bit)
+      // LAMBDA-32: [[G_PRIV:%.+]] = alloca double,
+
+      // transfer input parameters into addr alloca's
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+      // prepare parameters for lambda
+      // g
+      // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
+      // LAMBDA-32-DAG: [[G_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
+      // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_ADDR_REF]],
+      // LAMBDA-32-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
+
+      // g1
+      // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
+      // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}* [[G1_REF]],
+
+      // svar
+      // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
+
+      // sfvar
+      // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA-64: store double 1.0{{.+}}, double* [[G_CONV]],
+      // LAMBDA-32: store double 1.0{{.+}}, double* [[G_PRIV]],
+      // LAMBDA: [[G1_REF_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
+      // LAMBDA: store {{.+}} 1.0{{.+}}, {{.+}}* [[G1_REF_REF]],
+      // LAMBDA-64: store {{.+}} 3, {{.+}}* [[SVAR_CONV]],
+      // LAMBDA-32: store {{.+}} 3, {{.+}}* [[SVAR_ADDR]],
+      // LAMBDA: store {{.+}} 4.0{{.+}}, {{.+}}* [[SFVAR_CONV]],
+
+      // pass params to inner lambda
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA-64: store double* [[G_CONV]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA-32: store double* [[G_PRIV]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF_REF:%.+]] = load double*, double** [[G1_REF]],
+      // LAMBDA: store double* [[G1_REF_REF]], double** [[G1_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA-64: store i{{[0-9]+}}* [[SVAR_CONV]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA-32: store i{{[0-9]+}}* [[SVAR_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_CONV]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: ret void
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return tmain<int>();
+  #endif
+}
+
+// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]])
+
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// addr alloca's
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
+
+// skip loop alloca's
+// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
+
+// private alloca's
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// init addr alloca's with input values
+// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// init private alloca's with addr alloca's
+// t-var
+// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]],
+// CHECK-DAG: store {{.+}} [[T_VAR]], {{.+}} [[T_VAR_PRIV]],
+
+// vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
+// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
+
+// s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
+// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
+// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
+// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
+// CHECK-DAG: [[CPY_BODY]]:
+// CHECK-DAG: call void @llvm.memcpy{{.+}}(
+// CHECK-DAG: [[CPY_DONE]]:
+
+// var
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
+// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
+// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
+
+// svar
+// CHECK-DAG: [[SVAR_CONV_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]],
+// CHECK-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV_REF]],
+// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// pass private alloca's to fork
+// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
+// not dag to distinguish with S_VAR_CAST
+// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
+// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
+// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
+// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[SVAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
+// CHECK-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
+// CHECK-64-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
+// CHECK-32-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
+// CHECK-DAG: [[SVAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], [[S_FLOAT_TY]]* [[TMP_PRIV_VAL]], i{{[0-9]+}} [[SVAR_CAST_VAL]])
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
+// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
+// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
+// in combination
+// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
+
+// addr alloca's
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+// skip loop alloca's
+// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
+
+// private alloca's
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// init addr alloca's with input values
+// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// init private alloca's with addr alloca's
+// vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
+// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
+
+// s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
+// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
+// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
+// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
+// CHECK-DAG: [[CPY_BODY]]:
+// CHECK-DAG: call void @llvm.memcpy{{.+}}(
+// CHECK-DAG: [[CPY_DONE]]:
+
+// var
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
+// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+// template tmain with S_INT_TY
+// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
+// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
+
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// addr alloca's
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip loop alloca's
+// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
+
+// private alloca's
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// init addr alloca's with input values
+// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+
+// init private alloca's with addr alloca's
+// t-var
+// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]],
+// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
+
+// vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
+// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
+
+// s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
+// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
+// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
+// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
+// CHECK-DAG: [[CPY_BODY]]:
+// CHECK-DAG: call void @llvm.memcpy{{.+}}(
+// CHECK-DAG: [[CPY_DONE]]:
+
+// var
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
+// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
+// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// pass private alloca's to fork
+// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
+// not dag to distinguish with S_VAR_CAST
+// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
+// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
+// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
+// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], [[S_INT_TY]]* [[TMP_PRIV_VAL]])
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
+// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
+// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
+// in combination
+// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
+
+// addr alloca's
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip loop alloca's
+// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
+
+// private alloca's
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// init addr alloca's with input values
+// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+
+// init private alloca's with addr alloca's
+// vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
+// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
+// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]],
+
+// s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
+// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
+// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
+// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
+// CHECK-DAG: [[CPY_BODY]]:
+// CHECK-DAG: call void @llvm.memcpy{{.+}}(
+// CHECK-DAG: [[CPY_DONE]]:
+
+// var
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
+// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp
index 07d30e4..d6f56d5 100644
--- a/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -42,7 +44,7 @@
 };
 class S5 {
   int a;
-  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}}
 
 public:
   S5() : a(0) {}
@@ -50,7 +52,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -150,9 +152,10 @@
 #pragma omp distribute parallel for simd firstprivate(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel private(i)
@@ -314,14 +317,16 @@
 #pragma omp distribute parallel for simd firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
diff --git a/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
new file mode 100644
index 0000000..2beea79
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -0,0 +1,197 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+#pragma omp target
+#pragma omp teams
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp distribute parallel for simd
+  for(int i = 0 ; i < 100; i++) {}
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+  // CHECK: call void @__kmpc_for_static_fini(
+
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (parallel: false)
+  for(int i = 0 ; i < 100; i++) {
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_serialized_parallel(
+  // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+  // CHECK: call void @__kmpc_end_serialized_parallel(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @{{.+}}gtid_test
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+    gtid_test();
+  }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    fn1();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    fn2();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (parallel: Arg)
+  for(int i = 0 ; i < 100; i++) {
+    fn3();
+  }
+  return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_0]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+    // CHECK: call void @__kmpc_for_static_fini(
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn4
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    fn4();
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_1]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn5
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn5();
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (Arg)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_2]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn6
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn6();
+  }
+
+  return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
index c6957b9..0f9e123 100644
--- a/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..af74ab7
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -0,0 +1,687 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // LAMBDA: [[TMP_G1:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+
+      // init addr alloca's
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
+      // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: [[G1_PAR:%.+]] = load{{.+}}, {{.+}} [[TMP_G1_PRIVATE]],
+      // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]])
+      // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // linear counter
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_COUNTER_BLOCK:.+]], label %[[OMP_COUNTER_DONE:.+]]
+      // LAMBDA: [[OMP_COUNTER_BLOCK]]:
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* %
+      // LAMBDA: br label %[[OMP_COUNTER_DONE]]
+      // LAMBDA: [[OMP_COUNTER_DONE]]:
+
+      // lastprivate
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // outlined function for 'parallel for'
+      // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]])
+
+      // addr alloca's
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+
+      // private alloca's
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+
+            // init addr alloca's
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+
+      // loop body
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // lastprivate
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+
+// call constructor for s_arr
+// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]],
+// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
+// CHECK: [[S_ARR_CST_LOOP]]:
+// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
+// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]])
+// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
+// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
+// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
+// CHECK: [[S_ARR_CST_END]]:
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]],
+// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]])
+// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// outlined function for 'parallel for'
+// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]])
+// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]])
+
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+
+// call constructor for s_arr
+// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]],
+// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
+// CHECK: [[S_ARR_CST_LOOP]]:
+// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
+// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]])
+// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
+// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
+// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
+// CHECK: [[S_ARR_CST_END]]:
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// loop body
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]],  [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR_REF]], [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]],
+// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]])
+// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]])
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}})
+// CHECK: ret void
+
+// outlined function for 'parallel for'
+// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]])
+// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]])
+
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+
+// call constructor for s_arr
+// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BGN]],
+// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
+// CHECK: [[S_ARR_CST_LOOP]]:
+// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
+// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CTOR]])
+// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
+// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
+// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
+// CHECK: [[S_ARR_CST_END]]:
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
+// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
+
diff --git a/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp
index e54089d..45e0d44 100644
--- a/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -313,14 +315,16 @@
 #pragma omp distribute parallel for simd lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp distribute parallel for simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
   static int si;
diff --git a/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp
index 632ef06..788bace 100644
--- a/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,41 +20,49 @@
 {
   int B = 0;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(::z)
@@ -63,6 +73,7 @@
 #pragma omp distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(B::ib,B:C1+C2)
@@ -148,11 +159,13 @@
 #pragma omp distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -163,6 +176,7 @@
 #pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc : 5)
@@ -183,6 +197,7 @@
 #pragma omp distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(e, g)
@@ -193,6 +208,7 @@
 #pragma omp distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear(i)
@@ -202,28 +218,13 @@
   {
     int v = 0;
     int i;
+    int k;
     #pragma omp target
     #pragma omp teams
-    #pragma omp distribute parallel for simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
+    #pragma omp distribute parallel for simd linear(k:i)
+    for (k = 0; k < argc; ++k) { i = k; v += i; }
   }
 
-#pragma omp target
-#pragma omp teams
-#pragma omp parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -262,11 +263,13 @@
 #pragma omp distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -277,6 +280,7 @@
 #pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd linear (argc)
@@ -314,24 +318,14 @@
     #pragma omp target
     #pragma omp teams
     #pragma omp distribute parallel for simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
+      for (i = 0; i < argc; ++i) ++i;
 
     #pragma omp target
     #pragma omp teams
     #pragma omp distribute parallel for simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+      for (i = 0; i < argc; ++i) { ++i; i += 4; }
   }
 
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp
index 1fedb3c..20ee49a 100644
--- a/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -790,9 +792,10 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd lastprivate(s) firstprivate(s)
+#pragma omp distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/distribute_parallel_for_simd_misc_messages.c b/test/OpenMP/distribute_parallel_for_simd_misc_messages.c
index 01c079e..c2bee7f 100644
--- a/test/OpenMP/distribute_parallel_for_simd_misc_messages.c
+++ b/test/OpenMP/distribute_parallel_for_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp distribute parallel for simd'}}
 #pragma omp distribute parallel for simd
 
@@ -68,7 +70,7 @@
 #pragma omp target
 #pragma omp teams
 // expected-warning@+1 {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
-#pragma omp distribute parallel for simd linear(x);
+#pragma omp distribute parallel for simd firstprivate(x);
   for (i = 0; i < 16; ++i)
     ;
 
@@ -548,89 +550,6 @@
 #pragma omp distribute parallel for simd linear(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
-
-  int x, y;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected expression}}
-#pragma omp distribute parallel for simd linear(x :)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute parallel for simd linear(x :, )
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(x : 1)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for simd linear(x : 2 * 2)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute parallel for simd linear(x : 1, y)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute parallel for simd linear(x : 1, y, z : 1)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be linear}}
-#pragma omp distribute parallel for simd linear(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as private}}
-// expected-error@+1 {{private variable cannot be linear}}
-#pragma omp distribute parallel for simd private(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be private}}
-#pragma omp distribute parallel for simd linear(x) private(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-warning@+1 {{zero linear step (x and other variables in clause should probably be const)}}
-#pragma omp distribute parallel for simd linear(x, y : 0)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be lastprivate}}
-#pragma omp distribute parallel for simd linear(x) lastprivate(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as lastprivate}}
-// expected-error@+1 {{lastprivate variable cannot be linear}}
-#pragma omp distribute parallel for simd lastprivate(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
 }
 
 void test_aligned() {
@@ -934,16 +853,19 @@
     ;
 
   int x, y, z;
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd lastprivate(x) firstprivate(x)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+3 2 {{lastprivate variable cannot be firstprivate}} expected-note@+3 2 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd lastprivate(x, y) firstprivate(x, y)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+3 3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute parallel for simd lastprivate(x, y, z) firstprivate(x, y, z)
diff --git a/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
new file mode 100644
index 0000000..014fb95
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T, int C>
+int tmain() {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads(C)
+  for (int i = 0; i < 100; i++)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads(T(23))
+  for (int i = 0; i < 100; i++)
+    foo();
+  return 0;
+}
+
+int main() {
+  S s(0);
+  char a = s;
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
+// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
+#pragma omp target
+#pragma omp teams
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+#pragma omp distribute parallel for simd num_threads(2)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+    // CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2)
+    // CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+#pragma omp target
+#pragma omp teams
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+#pragma omp distribute parallel for simd num_threads(a)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*,
+    // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]],
+    // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
+    // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
+    // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
+    // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+  return a + tmain<char, 5>() + tmain<S, 1>();
+}
+
+// tmain 5
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
+
+// tmain 1
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_0]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_2]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_3]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]](
+// CHECK-DAG:   [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}})
+// CHECK-DAG:   [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}}
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]])
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp
index 0d6376d..5f5165d 100644
--- a/test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
new file mode 100644
index 0000000..629e023
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
@@ -0,0 +1,313 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd private(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd private(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to {{.+}},
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: ret void
+
+      // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED]](
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
+
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: ret void
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute parallel for simd private(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
+
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN_0]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// this is the ctor loop
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to {{.+}},
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+// By OpenMP specifications, private applies to both distribute and parallel for.
+// However, the support for 'private' of 'parallel' is only used when 'parallel'
+// is found alone. Therefore we only have one 'private' support for 'parallel for'
+// in combination
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_0]](
+// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_FLOAT_TY]],
+// CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[S_VAR_PRIV:%svar+]] = alloca i{{[0-9]+}},
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// this is the ctor loop
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+// template tmain with S_INT_TY
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: ret
+
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK-NOT: alloca [[S_INT_TY]],
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}},
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]](
+// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_INT_TY]],
+// CHECK-NOT: alloca [[S_INT_TY]],
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// this is the ctor loop
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// call destructors: var..
+// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+
+// ..and s_arr
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_private_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_private_messages.cpp
index 9df3688..d23d917 100644
--- a/test/OpenMP/distribute_parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
new file mode 100644
index 0000000..9c73a21
--- /dev/null
+++ b/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -0,0 +1,98 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(master)
+  for(int i = 0; i < 1000; i++) {}
+  return T();
+}
+
+int main() {
+  // CHECK-LABEL: @main
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(spread)
+  for(int i = 0; i < 1000; i++) {}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(close)
+  for(int i = 0; i < 1000; i++) {}
+  return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp
index 6b64cc3..d81f7d9 100644
--- a/test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
index 94e4541..348e664 100644
--- a/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp
index a5fd1ae..93766f9 100644
--- a/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp
index b3003dd..8a5ffd1 100644
--- a/test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp
index 134b852..fd694b7 100644
--- a/test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 
 struct S1; // expected-note 2 {{declared here}}
 extern S1 a;
diff --git a/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp
index 2d813ec..8e40e35 100644
--- a/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp
@@ -2,6 +2,9 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_private_codegen.cpp b/test/OpenMP/distribute_private_codegen.cpp
index c637fb4..c470e8f 100644
--- a/test/OpenMP/distribute_private_codegen.cpp
+++ b/test/OpenMP/distribute_private_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/distribute_private_messages.cpp b/test/OpenMP/distribute_private_messages.cpp
index 0aa1e47..3cf2fdc 100644
--- a/test/OpenMP/distribute_private_messages.cpp
+++ b/test/OpenMP/distribute_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_aligned_messages.cpp b/test/OpenMP/distribute_simd_aligned_messages.cpp
index 21b1bc9..51421d6 100644
--- a/test/OpenMP/distribute_simd_aligned_messages.cpp
+++ b/test/OpenMP/distribute_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/distribute_simd_ast_print.cpp b/test/OpenMP/distribute_simd_ast_print.cpp
index 6d2e916..5133db6 100644
--- a/test/OpenMP/distribute_simd_ast_print.cpp
+++ b/test/OpenMP/distribute_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -37,7 +41,7 @@
   }
 };
 
-// CHECK: #pragma omp distribute simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp distribute simd private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp distribute simd private(this->a) private(this->a)
 // CHECK: #pragma omp distribute simd private(this->a) private(this->a) private(this->S::a)
 
@@ -84,14 +88,14 @@
 
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) reduction(+ : h) dist_schedule(static,N)
+#pragma omp distribute simd private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) reduction(+ : h) dist_schedule(static,N)
   for (int i = 0; i < 2; ++i)
     for (int j = 0; j < 2; ++j)
       for (int k = 0; k < 10; ++k)
         for (int m = 0; m < 10; ++m)
           for (int n = 0; n < 10; ++n)
             a++;
-// CHECK: #pragma omp distribute simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) reduction(+: h) dist_schedule(static, N)
+// CHECK: #pragma omp distribute simd private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) reduction(+: h) dist_schedule(static, N)
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: for (int j = 0; j < 2; ++j)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
@@ -129,14 +133,15 @@
 // CHECK-NEXT: for (int j = 0; j < 10; ++j)
 // CHECK-NEXT: a++;
 
+  int i;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd aligned(x:8) linear(h:2) safelen(8) simdlen(8)
-  for (int i = 0; i < 100; i++)
+#pragma omp distribute simd aligned(x:8) linear(i:2) safelen(8) simdlen(8)
+  for (i = 0; i < 100; i++)
     for (int j = 0; j < 200; j++)
       a += h + x[j];
-// CHECK: #pragma omp distribute simd aligned(x: 8) linear(h: 2) safelen(8) simdlen(8)
-// CHECK-NEXT: for (int i = 0; i < 100; i++)
+// CHECK: #pragma omp distribute simd aligned(x: 8) linear(i: 2) safelen(8) simdlen(8)
+// CHECK-NEXT: for (i = 0; i < 100; i++)
 // CHECK-NEXT: for (int j = 0; j < 200; j++)
 // CHECK-NEXT: a += h + x[j];
 
diff --git a/test/OpenMP/distribute_simd_codegen.cpp b/test/OpenMP/distribute_simd_codegen.cpp
new file mode 100644
index 0000000..e114ed6
--- /dev/null
+++ b/test/OpenMP/distribute_simd_codegen.cpp
@@ -0,0 +1,287 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64  --check-prefix HCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32  --check-prefix HCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void without_schedule_clause(float *a, float *b, float *c, float *d) {
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute simd simdlen(8) aligned(a)
+  for (int i = 33; i < 32000000; i += 7) {
+    a[i] = b[i] * c[i] * d[i];
+  }
+}
+
+// CHECK: define {{.*}}void @{{.+}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]])
+// CHECK:  [[TID_ADDR:%.+]] = alloca i32*
+// CHECK:  [[IV:%.+iv]] = alloca i32
+// CHECK:  [[LB:%.+lb]] = alloca i32
+// CHECK:  [[UB:%.+ub]] = alloca i32
+// CHECK:  [[ST:%.+stride]] = alloca i32
+// CHECK:  [[LAST:%.+last]] = alloca i32
+// CHECK-DAG:  store i32* [[GBL_TIDP]], i32** [[TID_ADDR]]
+// CHECK-DAG:  call void @llvm.assume(
+// CHECK-DAG:  store i32 0, i32* [[LB]]
+// CHECK-DAG:  store i32 4571423, i32* [[UB]]
+// CHECK-DAG:  store i32 1, i32* [[ST]]
+// CHECK-DAG:  store i32 0, i32* [[LAST]]
+// CHECK-DAG:  [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
+// CHECK-DAG:  [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
+// CHECK:  call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
+// CHECK-DAG:  [[UBV0:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
+// CHECK:  br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
+// CHECK-DAG:  [[BBCT]]:
+// CHECK-DAG:  br label %[[BBCE:.+]]
+// CHECK-DAG:  [[BBCF]]:
+// CHECK-DAG:  [[UBV1:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  br label %[[BBCE]]
+// CHECK:  [[BBCE]]:
+// CHECK:  [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ]
+// CHECK:  store i32 [[SELUB]], i32* [[UB]]
+// CHECK:  [[LBV0:%.+]] = load i32, i32* [[LB]]
+// CHECK:  store i32 [[LBV0]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR:.+]]
+// CHECK:  [[BBINNFOR]]:
+// CHECK:  [[IVVAL0:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[UBV2:%.+]] = load i32, i32* [[UB]]
+// CHECK:  [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]]
+// CHECK:  br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]]
+// CHECK:  [[BBINNBODY]]:
+// CHECK:  {{.+}} = load i32, i32* [[IV]]
+// ... loop body ...
+// CHECK:  br label %[[BBBODYCONT:.+]]
+// CHECK:  [[BBBODYCONT]]:
+// CHECK:  br label %[[BBINNINC:.+]]
+// CHECK:  [[BBINNINC]]:
+// CHECK:  [[IVVAL1:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1
+// CHECK:  store i32 [[IVINC]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR]]
+// CHECK:  [[BBINNEND]]:
+// CHECK:  br label %[[LPEXIT:.+]]
+// CHECK:  [[LPEXIT]]:
+// CHECK:  call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]])
+// CHECK:  ret void
+
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void static_not_chunked(float *a, float *b, float *c, float *d) {
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute simd dist_schedule(static) safelen(32)
+  for (int i = 32000000; i > 33; i += -7) {
+        a[i] = b[i] * c[i] * d[i];
+  }
+}
+
+// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]])
+// CHECK:  [[TID_ADDR:%.+]] = alloca i32*
+// CHECK:  [[IV:%.+iv]] = alloca i32
+// CHECK:  [[LB:%.+lb]] = alloca i32
+// CHECK:  [[UB:%.+ub]] = alloca i32
+// CHECK:  [[ST:%.+stride]] = alloca i32
+// CHECK:  [[LAST:%.+last]] = alloca i32
+// CHECK-DAG:  store i32* [[GBL_TIDP]], i32** [[TID_ADDR]]
+// CHECK-DAG:  store i32 0, i32* [[LB]]
+// CHECK-DAG:  store i32 4571423, i32* [[UB]]
+// CHECK-DAG:  store i32 1, i32* [[ST]]
+// CHECK-DAG:  store i32 0, i32* [[LAST]]
+// CHECK-DAG:  [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
+// CHECK-DAG:  [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
+// CHECK:  call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
+// CHECK-DAG:  [[UBV0:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
+// CHECK:  br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
+// CHECK-DAG:  [[BBCT]]:
+// CHECK-DAG:  br label %[[BBCE:.+]]
+// CHECK-DAG:  [[BBCF]]:
+// CHECK-DAG:  [[UBV1:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  br label %[[BBCE]]
+// CHECK:  [[BBCE]]:
+// CHECK:  [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ]
+// CHECK:  store i32 [[SELUB]], i32* [[UB]]
+// CHECK:  [[LBV0:%.+]] = load i32, i32* [[LB]]
+// CHECK:  store i32 [[LBV0]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR:.+]]
+// CHECK:  [[BBINNFOR]]:
+// CHECK:  [[IVVAL0:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[UBV2:%.+]] = load i32, i32* [[UB]]
+// CHECK:  [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]]
+// CHECK:  br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]]
+// CHECK:  [[BBINNBODY]]:
+// CHECK:  {{.+}} = load i32, i32* [[IV]]
+// ... loop body ...
+// CHECK:  br label %[[BBBODYCONT:.+]]
+// CHECK:  [[BBBODYCONT]]:
+// CHECK:  br label %[[BBINNINC:.+]]
+// CHECK:  [[BBINNINC]]:
+// CHECK:  [[IVVAL1:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1
+// CHECK:  store i32 [[IVINC]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR]]
+// CHECK:  [[BBINNEND]]:
+// CHECK:  br label %[[LPEXIT:.+]]
+// CHECK:  [[LPEXIT]]:
+// CHECK:  call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]])
+// CHECK:  ret void
+
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void static_chunked(float *a, float *b, float *c, float *d) {
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd dist_schedule(static, 5)
+  for (unsigned i = 131071; i <= 2147483647; i += 127) {
+    a[i] = b[i] * c[i] * d[i];
+  }
+}
+
+// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]])
+// CHECK:  [[TID_ADDR:%.+]] = alloca i32*
+// CHECK:  [[IV:%.+iv]] = alloca i32
+// CHECK:  [[LB:%.+lb]] = alloca i32
+// CHECK:  [[UB:%.+ub]] = alloca i32
+// CHECK:  [[ST:%.+stride]] = alloca i32
+// CHECK:  [[LAST:%.+last]] = alloca i32
+// CHECK-DAG:  store i32* [[GBL_TIDP]], i32** [[TID_ADDR]]
+// CHECK-DAG:  store i32 0, i32* [[LB]]
+// CHECK-DAG:  store i32 16908288, i32* [[UB]]
+// CHECK-DAG:  store i32 1, i32* [[ST]]
+// CHECK-DAG:  store i32 0, i32* [[LAST]]
+// CHECK-DAG:  [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
+// CHECK-DAG:  [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
+// CHECK:  call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5)
+// CHECK-DAG:  [[UBV0:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288
+// CHECK:  br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
+// CHECK-DAG:  [[BBCT]]:
+// CHECK-DAG:  br label %[[BBCE:.+]]
+// CHECK-DAG:  [[BBCF]]:
+// CHECK-DAG:  [[UBV1:%.+]] = load i32, i32* [[UB]]
+// CHECK-DAG:  br label %[[BBCE]]
+// CHECK:  [[BBCE]]:
+// CHECK:  [[SELUB:%.+]] = phi i32 [ 16908288, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ]
+// CHECK:  store i32 [[SELUB]], i32* [[UB]]
+// CHECK:  [[LBV0:%.+]] = load i32, i32* [[LB]]
+// CHECK:  store i32 [[LBV0]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR:.+]]
+// CHECK:  [[BBINNFOR]]:
+// CHECK:  [[IVVAL0:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[UBV2:%.+]] = load i32, i32* [[UB]]
+// CHECK:  [[IVLEUB:%.+]] = icmp ule i32 [[IVVAL0]], [[UBV2]]
+// CHECK:  br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]]
+// CHECK:  [[BBINNBODY]]:
+// CHECK:  {{.+}} = load i32, i32* [[IV]]
+// ... loop body ...
+// CHECK:  br label %[[BBBODYCONT:.+]]
+// CHECK:  [[BBBODYCONT]]:
+// CHECK:  br label %[[BBINNINC:.+]]
+// CHECK:  [[BBINNINC]]:
+// CHECK:  [[IVVAL1:%.+]] = load i32, i32* [[IV]]
+// CHECK:  [[IVINC:%.+]] = add i32 [[IVVAL1]], 1
+// CHECK:  store i32 [[IVINC]], i32* [[IV]]
+// CHECK:  br label %[[BBINNFOR]]
+// CHECK:  [[BBINNEND]]:
+// CHECK:  br label %[[LPEXIT:.+]]
+// CHECK:  [[LPEXIT]]:
+// CHECK:  call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]])
+// CHECK:  ret void
+
+// CHECK-LABEL: test_precond
+void test_precond() {
+  char a = 0; char i;
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute simd linear(i)
+  for(i = a; i < 10; ++i);
+}
+
+// a is passed as a parameter to the outlined functions
+// CHECK:  define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], i8* dereferenceable({{[0-9]+}}) [[APARM:%.+]])
+// CHECK:  store i8* [[APARM]], i8** [[APTRADDR:%.+]]
+// ..many loads of %0..
+// CHECK:  [[A2:%.+]] = load i8*, i8** [[APTRADDR]]
+// CHECK:  [[AVAL0:%.+]] = load i8, i8* [[A2]]
+// CHECK:  store i8 [[AVAL0]], i8* [[CAP_EXPR:%.+]],
+// CHECK:  [[AVAL1:%.+]] = load i8, i8* [[CAP_EXPR]]
+// CHECK:  load i8, i8* [[CAP_EXPR]]
+// CHECK:  [[AVAL2:%.+]] = load i8, i8* [[CAP_EXPR]]
+// CHECK:  [[ACONV:%.+]] = sext i8 [[AVAL2]] to i32
+// CHECK:  [[ACMP:%.+]] = icmp slt i32 [[ACONV]], 10
+// CHECK:  br i1 [[ACMP]], label %[[PRECOND_THEN:.+]], label %[[PRECOND_END:.+]]
+// CHECK:  [[PRECOND_THEN]]
+// CHECK:  call void @__kmpc_for_static_init_4
+// CHECK:  call void @__kmpc_for_static_fini
+// CHECK:  [[PRECOND_END]]
+
+// no templates for now, as these require special handling in target regions and/or declare target
+
+// HCHECK-LABEL: fint
+// HCHECK: call {{.*}}i32 {{.+}}ftemplate
+// HCHECK: ret i32
+
+// HCHECK: load i16, i16*
+// HCHECK: store i16 %
+// HCHECK: call i32 @__tgt_target_teams(
+// HCHECK: call void @__kmpc_for_static_init_4(
+template <typename T>
+T ftemplate() {
+  short aa = 0;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute simd dist_schedule(static, aa)
+  for (int i = 0; i < 100; i++) {
+  }
+  return T();
+}
+
+int fint(void) { return ftemplate<int>(); }
+
+#endif
+
+// CHECK: !{!"llvm.loop.vectorize.width", i32 8}
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: !{!"llvm.loop.vectorize.width", i32 32}
diff --git a/test/OpenMP/distribute_simd_collapse_messages.cpp b/test/OpenMP/distribute_simd_collapse_messages.cpp
index 182a09a..f4d5b73 100644
--- a/test/OpenMP/distribute_simd_collapse_messages.cpp
+++ b/test/OpenMP/distribute_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_dist_schedule_messages.cpp b/test/OpenMP/distribute_simd_dist_schedule_messages.cpp
index 6a8482d..c659abe 100644
--- a/test/OpenMP/distribute_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/distribute_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..919a122
--- /dev/null
+++ b/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
@@ -0,0 +1,396 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd firstprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+#pragma omp distribute simd firstprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // Private alloca's for conversion
+      // LAMBDA: [[G_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
+
+      // Actual private variables to be used in the body (tmp is used for the reference type)
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+
+      // Store input parameter addresses into private alloca's for conversion
+      // LAMBDA: store double* [[G_IN]], double** [[G_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_ADDR]],
+
+      // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]],
+      // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_ADDR]],
+      // LAMBDA-DAG: [[SVAR_ADDR_VAL:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+      // LAMBDA-DAG: [[SFVAR_ADDR_VAL:%.+]] = load float*, float** [[SFVAR_ADDR]],
+      // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF]],
+
+      // LAMBDA-DAG: [[G_VAL:%.+]] = load{{.+}} double, double* [[G_ADDR_VAL]],
+      // LAMBDA-DAG: store double [[G_VAL]], double* [[G_PRIVATE]],
+      // LAMBDA-DAG: [[G1_VAL_REF:%.+]] = load double*, double** [[G1_REF]],
+      // LAMBDA-DAG: [[G1_VAL:%.+]] = load{{.+}} double, double* [[G1_VAL_REF]],
+      // LAMBDA-DAG: store double [[G1_VAL]], double* [[G1_PRIVATE]],
+      // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]],
+      // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR_VAL]],
+      // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load float, float* [[SFVAR_ADDR_VAL]],
+      // LAMBDA-DAG: store float [[SFVAR_VAL]], float* [[SFVAR_PRIVATE]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      g += 1;
+      g1 += 1;
+      svar += 3;
+      sfvar += 4.0;
+      // LAMBDA-DAG: [[G_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA-DAG: [[G_NEXT:%.+]] = fadd double [[G_VAL]], 1.{{.+}}
+      // LAMBDA-DAG: store double [[G_NEXT]], double* [[G_PRIVATE]],
+      // LAMBDA-DAG: [[TMP_VAL1:%.+]] = load double*, double** [[TMP_PRIVATE]],
+      // LAMBDA-DAG: [[TMP_VAL_VAL1:%.+]] = load{{.*}} double, double* [[TMP_VAL1]],
+      // LAMBDA-DAG: [[TMP_ADD:%.+]] = fadd double [[TMP_VAL_VAL1]], 1.{{.+}}
+      // LAMBDA-DAG: store{{.*}} double [[TMP_ADD]], double* [[TMP_VAL1]],
+      // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA-DAG: [[SVAR_ADD:%.+]] = add{{.*}} i{{[0-9]+}} [[SVAR_VAL]], 3
+      // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_ADD]], i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA-DAG: [[SFVAR_CONV_VAL1:%.+]] = fpext float [[SFVAR_VAL]] to double
+      // LAMBDA-DAG: [[SFVAR_ADD:%.+]] = fadd double [[SFVAR_CONV_VAL1]], 4.{{.+}}
+      // LAMBDA-DAG: [[SFVAR_CONV_VAL2:%.+]] = fptrunc double [[SFVAR_ADD]] to float
+      // LAMBDA-DAG: store float [[SFVAR_CONV_VAL2:%.+]], float* [[SFVAR_PRIVATE]],
+
+      // call inner lambda (use refs to private alloca's)
+      // LAMBDA: [[GEP_0:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[GEP_0]],
+      // LAMBDA: [[GEP_1:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[TMP_PAR:%.+]] = load double*, double** [[TMP_PRIVATE]],
+      // LAMBDA: store double* [[TMP_PAR]], double** [[GEP_1]],
+      // LAMBDA: [[GEP_2:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[GEP_2]],
+      // LAMBDA: [[GEP_3:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[GEP_3]],
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{.+}})
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g += 2;
+	g1 += 2;
+	svar += 4;
+	sfvar += 8.0;
+	// LAMBDA-DAG: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA-DAG: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA-DAG: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]],
+	// LAMBDA-DAG: [[G_REF_VAL:%.+]] = load double, double* [[G_REF]],
+	// LAMBDA-DAG: [[G_REF_ADD:%.+]] = fadd double [[G_REF_VAL]], 2.{{.+}}
+	// LAMBDA-DAG: store double [[G_REF_ADD]], double* [[G_REF]]
+
+	// LAMBDA-DAG: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA-DAG: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load double, double* [[G1_REF]],
+	// LAMBDA-DAG: [[G1_ADD:%.+]] = fadd double [[G1_REF_VAL]], 2.{{.+}}
+	// LAMBDA-DAG: store double [[G1_ADD]], double* [[G1_REF]],
+
+	// LAMBDA-DAG: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA-DAG: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA-DAG: [[SVAR_REF_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA-DAG: [[SVAR_ADD:%.+]] = add{{.*}} i{{[0-9]+}} [[SVAR_REF_VAL]], 4
+	// LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_ADD]], i{{[0-9]+}}* [[SVAR_REF]]
+
+	// LAMBDA-DAG: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA-DAG: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA-DAG: [[SFVAR_REF_VAL:%.+]] = load float, float* [[SFVAR_REF]]
+	// LAMBDA-DAG: [[SFVAR_REF_CONV:%.+]] = fpext float [[SFVAR_REF_VAL]] to double
+	// LAMBDA-DAG: [[SFVAR_ADD:%.+]] = fadd double [[SFVAR_REF_CONV]], 8.{{.+}}
+	// LAMBDA-DAG: [[SFVAR_ADD_CONV:%.+]] = fptrunc double [[SFVAR_ADD]] to float
+	// LAMBDA-DAG: store float [[SFVAR_ADD_CONV]], float* [[SFVAR_REF]],
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute simd firstprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]])
+
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
+
+// discard omp loop variables
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// init t_var
+// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV_VAL_REF]],
+// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+
+// init vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}*
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}*
+// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}})
+
+// init s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_VAL]] to [[S_FLOAT_TY]]*
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]]{{.+}}
+// CHECK-DAG: [[S_ARR_PRIV_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BGN]]{{.+}}
+// CHECK-DAG: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BGN]], [[S_ARR_PRIV_NEXT]]
+// CHECK-DAG: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_CPY_DONE:.+]], label %[[S_ARR_CPY_BODY:.+]]
+
+// CHECK-DAG: [[S_ARR_CPY_BODY]]:
+// CHECK-DAG: [[S_ARR_SRC_PAST:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BCAST]],{{.+}} ], [ [[S_ARR_SRC:%.+]],{{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ]
+// CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}}
+// CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}}
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}}
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}}
+// CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]]
+// CHECK-DAG: br i1 [[S_ARR_CPY_FIN]], label %[[S_ARR_CPY_DONE]], label %[[S_ARR_CPY_BODY]]
+// CHECK-DAG: [[S_ARR_CPY_DONE]]:
+
+// init var
+// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}}
+// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}}
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
+// CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
+
+// init svar
+// CHECK-DAG: [[SVAR_CONV_VAL_REF:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]],
+// CHECK-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_CONV_VAL_REF]],
+// CHECK-DAG: store{{.+}} [[SVAR_CONV_VAL]],{{.+}} [[SVAR_PRIV]],
+
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// Template
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]])
+
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// discard omp loop variables
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+// CHECK: {{.*}} = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
+
+// init t_var
+// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV_VAL_REF]],
+// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+
+// init vec
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}*
+// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}*
+// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}})
+
+// init s_arr
+// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_VAL]] to [[S_INT_TY]]*
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]]{{.+}}
+// CHECK-DAG: [[S_ARR_PRIV_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BGN]]{{.+}}
+// CHECK-DAG: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BGN]], [[S_ARR_PRIV_NEXT]]
+// CHECK-DAG: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_CPY_DONE:.+]], label %[[S_ARR_CPY_BODY:.+]]
+
+// CHECK-DAG: [[S_ARR_CPY_BODY]]:
+// CHECK-DAG: [[S_ARR_SRC_PAST:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BCAST]],{{.+}} ], [ [[S_ARR_SRC:%.+]],{{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ]
+// CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}}
+// CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}}
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}}
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}}
+// CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]]
+// CHECK-DAG: br i1 [[S_ARR_CPY_FIN]], label %[[S_ARR_CPY_DONE]], label %[[S_ARR_CPY_BODY]]
+// CHECK-DAG: [[S_ARR_CPY_DONE]]:
+
+// init var
+// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
+// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}}
+// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}}
+// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
+
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}},
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}},
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/distribute_simd_firstprivate_messages.cpp b/test/OpenMP/distribute_simd_firstprivate_messages.cpp
index b9267a3..e0f3dde 100644
--- a/test/OpenMP/distribute_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/distribute_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -42,7 +44,7 @@
 };
 class S5 {
   int a;
-  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}}
 
 public:
   S5() : a(0) {}
@@ -50,7 +52,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -150,9 +152,10 @@
 #pragma omp distribute simd firstprivate(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute simd lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel private(i)
@@ -314,14 +317,16 @@
 #pragma omp distribute simd firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+#pragma omp distribute simd lastprivate(g) firstprivate(g)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
diff --git a/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..cde7c81
--- /dev/null
+++ b/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
@@ -0,0 +1,409 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+#pragma omp distribute simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // LAMBDA: [[TMP_G1:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
+      // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // linear counter
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_COUNTER_BLOCK:.+]], label %[[OMP_COUNTER_DONE:.+]]
+      // LAMBDA: [[OMP_COUNTER_BLOCK]]:
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* %
+      // LAMBDA: br label %[[OMP_COUNTER_DONE]]
+      // LAMBDA: [[OMP_COUNTER_DONE]]:
+
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]],  [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/distribute_simd_lastprivate_messages.cpp b/test/OpenMP/distribute_simd_lastprivate_messages.cpp
index a891435..e47ad7e 100644
--- a/test/OpenMP/distribute_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/distribute_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -313,14 +315,16 @@
 #pragma omp distribute simd lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp distribute simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i)
     foo();
+// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd lastprivate(n) firstprivate(n) // OK
+#pragma omp distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i)
     foo();
   static int si;
diff --git a/test/OpenMP/distribute_simd_linear_messages.cpp b/test/OpenMP/distribute_simd_linear_messages.cpp
index c60e0a2..06d85d1 100644
--- a/test/OpenMP/distribute_simd_linear_messages.cpp
+++ b/test/OpenMP/distribute_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,41 +20,49 @@
 {
   int B = 0;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(::z)
@@ -63,6 +73,7 @@
 #pragma omp distribute simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(B::ib,B:C1+C2)
@@ -148,11 +159,13 @@
 #pragma omp distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -163,6 +176,7 @@
 #pragma omp distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc : 5)
@@ -183,6 +197,7 @@
 #pragma omp distribute simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(e, g)
@@ -193,37 +208,12 @@
 #pragma omp distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target
-    #pragma omp teams
-    #pragma omp distribute simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target
-#pragma omp teams
-#pragma omp parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -262,11 +252,13 @@
 #pragma omp distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -277,6 +269,7 @@
 #pragma omp distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams
 #pragma omp distribute simd linear (argc)
@@ -310,28 +303,18 @@
 
   #pragma omp parallel
   {
-    int i;
+    int k;
     #pragma omp target
     #pragma omp teams
-    #pragma omp distribute simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
+    #pragma omp distribute simd linear(k)
+      for (k = 0; k < argc; ++k) ++k;
 
     #pragma omp target
     #pragma omp teams
-    #pragma omp distribute simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+    #pragma omp distribute simd linear(k : 4)
+      for (k = 0; k < argc; k+=4) { }
   }
 
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/distribute_simd_loop_messages.cpp b/test/OpenMP/distribute_simd_loop_messages.cpp
index 59fafda..1977ca7 100644
--- a/test/OpenMP/distribute_simd_loop_messages.cpp
+++ b/test/OpenMP/distribute_simd_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 static int sii;
 // expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
@@ -322,9 +324,7 @@
 
   #pragma omp target
   #pragma omp teams
-  // expected-error@+3 {{unexpected OpenMP clause 'shared' in directive '#pragma omp distribute simd'}}
-  // expected-note@+2  {{defined as shared}}
-  // expected-error@+2 {{loop iteration variable in the associated loop of 'omp distribute simd' directive may not be shared, predetermined as linear}}
+  // expected-error@+1 {{unexpected OpenMP clause 'shared' in directive '#pragma omp distribute simd'}}
   #pragma omp distribute simd shared(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
diff --git a/test/OpenMP/distribute_simd_misc_messages.c b/test/OpenMP/distribute_simd_misc_messages.c
index 5fc2cb6..2cb5e97 100644
--- a/test/OpenMP/distribute_simd_misc_messages.c
+++ b/test/OpenMP/distribute_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp distribute simd'}}
 #pragma omp distribute simd
 
@@ -561,89 +563,6 @@
 #pragma omp distribute simd linear(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
-
-  int x, y;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected expression}}
-#pragma omp distribute simd linear(x :)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute simd linear(x :, )
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(x : 1)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute simd linear(x : 2 * 2)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute simd linear(x : 1, y)
-  for (i = 0; i < 16; ++i)
-    ;
-#pragma omp target
-#pragma omp teams
-// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
-#pragma omp distribute simd linear(x : 1, y, z : 1)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be linear}}
-#pragma omp distribute simd linear(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as private}}
-// expected-error@+1 {{private variable cannot be linear}}
-#pragma omp distribute simd private(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be private}}
-#pragma omp distribute simd linear(x) private(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-warning@+1 {{zero linear step (x and other variables in clause should probably be const)}}
-#pragma omp distribute simd linear(x, y : 0)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as linear}}
-// expected-error@+1 {{linear variable cannot be lastprivate}}
-#pragma omp distribute simd linear(x) lastprivate(x)
-  for (i = 0; i < 16; ++i)
-    ;
-
-#pragma omp target
-#pragma omp teams
-// expected-note@+2 {{defined as lastprivate}}
-// expected-error@+1 {{lastprivate variable cannot be linear}}
-#pragma omp distribute simd lastprivate(x) linear(x)
-  for (i = 0; i < 16; ++i)
-    ;
 }
 
 void test_aligned() {
@@ -1083,26 +1002,26 @@
 }
 
 void linear_modifiers(int argc) {
-  int f;
+  int k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd linear(f)
-  for (int k = 0; k < argc; ++k) ++k;
+#pragma omp distribute simd linear(k)
+  for (k = 0; k < argc; ++k) ++k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd linear(val(f))
-  for (int k = 0; k < argc; ++k) ++k;
+#pragma omp distribute simd linear(val(k))
+  for (k = 0; k < argc; ++k) ++k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd linear(uval(f)) // expected-error {{expected 'val' modifier}}
-  for (int k = 0; k < argc; ++k) ++k;
+#pragma omp distribute simd linear(uval(k)) // expected-error {{expected 'val' modifier}}
+  for (k = 0; k < argc; ++k) ++k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd linear(ref(f)) // expected-error {{expected 'val' modifier}}
-  for (int k = 0; k < argc; ++k) ++k;
+#pragma omp distribute simd linear(ref(k)) // expected-error {{expected 'val' modifier}}
+  for (k = 0; k < argc; ++k) ++k;
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd linear(foo(f)) // expected-error {{expected 'val' modifier}}
-  for (int k = 0; k < argc; ++k) ++k;
+#pragma omp distribute simd linear(foo(k)) // expected-error {{expected 'val' modifier}}
+  for (k = 0; k < argc; ++k) ++k;
 }
 
diff --git a/test/OpenMP/distribute_simd_private_codegen.cpp b/test/OpenMP/distribute_simd_private_codegen.cpp
new file mode 100644
index 0000000..f675158
--- /dev/null
+++ b/test/OpenMP/distribute_simd_private_codegen.cpp
@@ -0,0 +1,224 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd private(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams
+#pragma omp distribute simd private(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
+      // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd private(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute simd
+  for (i = 0; i < 2; ++i) {
+    ;
+  }
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK-NOT: alloca [[S_INT_TY]],
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/distribute_simd_private_messages.cpp b/test/OpenMP/distribute_simd_private_messages.cpp
index c777c99..8d82cc8 100644
--- a/test/OpenMP/distribute_simd_private_messages.cpp
+++ b/test/OpenMP/distribute_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_reduction_codegen.cpp b/test/OpenMP/distribute_simd_reduction_codegen.cpp
new file mode 100644
index 0000000..85b0e80
--- /dev/null
+++ b/test/OpenMP/distribute_simd_reduction_codegen.cpp
@@ -0,0 +1,217 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute simd reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: alloca i{{.+}},
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[LAST_ITER:%.+]] = load i32, i32* %
+    // LAMBDA: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0
+    // LAMBDA: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]]
+    // LAMBDA: [[THEN]]
+    // LAMBDA: store i32 2, i32* %
+    // LAMBDA: br label %[[DONE]]
+    // LAMBDA: [[DONE]]
+    // LAMBDA: [[SIVAR_ORIG_VAL:%.+]] = load i32, i32* [[SIVAR_REF]],
+    // LAMBDA: [[SIVAR_PRIV_VAL:%.+]] = load i32, i32* [[SIVAR_PRIV]],
+    // LAMBDA: [[ADD:%.+]] = add nsw i32 [[SIVAR_ORIG_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store i32 [[ADD]], i32* [[SIVAR_REF]],
+    // LAMBDA: ret void
+
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[LAST_ITER:%.+]] = load i32, i32* %
+// CHECK: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0
+// CHECK: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]]
+// CHECK: [[THEN]]
+// CHECK: store i32 2, i32* %
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]
+// CHECK: [[SIVAR_ORIG_VAL:%.+]] = load i32, i32* [[SIVAR_REF]],
+// CHECK: [[SIVAR_PRIV_VAL:%.+]] = load i32, i32* [[SIVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i32 [[SIVAR_ORIG_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store i32 [[ADD]], i32* [[SIVAR_REF]],
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: alloca i{{.+}},
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[LAST_ITER:%.+]] = load i32, i32* %
+// CHECK: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0
+// CHECK: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]]
+// CHECK: [[THEN]]
+// CHECK: store i32 2, i32* %
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]
+// CHECK: [[TVAR_ORIG_VAL:%.+]] = load i32, i32* [[TVAR_REF]],
+// CHECK: [[TVAR_PRIV_VAL:%.+]] = load i32, i32* [[TVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i32 [[TVAR_ORIG_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store i32 [[ADD]], i32* [[TVAR_REF]],
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/distribute_simd_reduction_messages.cpp b/test/OpenMP/distribute_simd_reduction_messages.cpp
index ddedcb0..7bb5bde 100644
--- a/test/OpenMP/distribute_simd_reduction_messages.cpp
+++ b/test/OpenMP/distribute_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_safelen_messages.cpp b/test/OpenMP/distribute_simd_safelen_messages.cpp
index 4ae35fb..c95d121 100644
--- a/test/OpenMP/distribute_simd_safelen_messages.cpp
+++ b/test/OpenMP/distribute_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/distribute_simd_simdlen_messages.cpp b/test/OpenMP/distribute_simd_simdlen_messages.cpp
index 4ae35fb..c95d121 100644
--- a/test/OpenMP/distribute_simd_simdlen_messages.cpp
+++ b/test/OpenMP/distribute_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/driver.c b/test/OpenMP/driver.c
index 74aaea5..baafdf2 100644
--- a/test/OpenMP/driver.c
+++ b/test/OpenMP/driver.c
@@ -1,10 +1,12 @@
 // Test that by default -fnoopenmp-use-tls is passed to frontend.
 //
 // RUN: %clang %s -### -o %t.o 2>&1 -fopenmp=libomp | FileCheck --check-prefix=CHECK-DEFAULT %s
+
 // CHECK-DEFAULT: -cc1
 // CHECK-DEFAULT-NOT: -fnoopenmp-use-tls
 //
 // RUN: %clang %s -### -o %t.o 2>&1 -fopenmp=libomp -fnoopenmp-use-tls | FileCheck --check-prefix=CHECK-NO-TLS %s
+
 // CHECK-NO-TLS: -cc1
 // CHECK-NO-TLS-SAME: -fnoopenmp-use-tls
 //
@@ -13,17 +15,29 @@
 // RUN: %clang %s -c -E -dM -fopenmp=libomp -fopenmp-version=0 | FileCheck --check-prefix=CHECK-DEFAULT-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp=libomp -fopenmp-version=100 | FileCheck --check-prefix=CHECK-DEFAULT-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp=libomp -fopenmp-version=31 | FileCheck --check-prefix=CHECK-DEFAULT-VERSION %s
+
 // CHECK-DEFAULT-VERSION: #define _OPENMP 201107
 
+// RUN: %clang %s -c -E -dM -fopenmp-simd | FileCheck --check-prefix=CHECK-SIMD-DEFAULT-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=1 | FileCheck --check-prefix=CHECK-SIMD-DEFAULT-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=0 | FileCheck --check-prefix=CHECK-SIMD-DEFAULT-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=100 | FileCheck --check-prefix=CHECK-SIMD-DEFAULT-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=31 | FileCheck --check-prefix=CHECK-SIMD-DEFAULT-VERSION %s
+// CHECK-SIMD-DEFAULT-VERSION: #define _OPENMP 201511
+
 // RUN: %clang %s -c -E -dM -fopenmp=libomp -fopenmp-version=40 | FileCheck --check-prefix=CHECK-40-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=40 | FileCheck --check-prefix=CHECK-40-VERSION %s
 // CHECK-40-VERSION: #define _OPENMP 201307
 
 // RUN: %clang %s -c -E -dM -fopenmp=libomp -fopenmp-version=45 | FileCheck --check-prefix=CHECK-45-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=45 | FileCheck --check-prefix=CHECK-45-VERSION %s
 // CHECK-45-VERSION: #define _OPENMP 201511
 
 // RUN: %clang %s -c -E -dM -fopenmp-version=1 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-version=31 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-version=40 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-version=45 | FileCheck --check-prefix=CHECK-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-version=45 | FileCheck --check-prefix=CHECK-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fno-openmp-simd -fopenmp-version=45 | FileCheck --check-prefix=CHECK-VERSION %s
 // CHECK-VERSION-NOT: #define _OPENMP
 
diff --git a/test/OpenMP/dump.cpp b/test/OpenMP/dump.cpp
index 5ec202c..162136b 100644
--- a/test/OpenMP/dump.cpp
+++ b/test/OpenMP/dump.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-dump %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-dump %s | FileCheck %s
 // expected-no-diagnostics
 
 int ga, gb;
@@ -12,7 +13,7 @@
 
 #pragma omp declare reduction(fun : float : omp_out += omp_in) initializer(omp_priv = omp_orig + 15)
 
-// CHECK:      |-OMPDeclareReductionDecl {{.+}} <line:11:35> col:35 operator+ 'int' combiner
+// CHECK:      |-OMPDeclareReductionDecl {{.+}} <line:12:35> col:35 operator+ 'int' combiner
 // CHECK-NEXT: | |-CompoundAssignOperator {{.+}} <col:47, col:58> 'int' lvalue '*=' ComputeLHSTy='int' ComputeResultTy='int'
 // CHECK-NEXT: | | |-DeclRefExpr {{.+}} <col:47> 'int' lvalue Var {{.+}} 'omp_out' 'int'
 // CHECK-NEXT: | | `-ImplicitCastExpr {{.+}} <col:58> 'int' <LValueToRValue>
@@ -27,7 +28,7 @@
 // CHECK-NEXT: | |     `-DeclRefExpr {{.+}} <col:58> 'char' lvalue Var {{.+}} 'omp_in' 'char'
 // CHECK-NEXT: | |-VarDecl {{.+}} <col:40> col:40 implicit used omp_in 'char'
 // CHECK-NEXT: | `-VarDecl {{.+}} <col:40> col:40 implicit used omp_out 'char'
-// CHECK-NEXT: |-OMPDeclareReductionDecl {{.+}} <line:13:37> col:37 fun 'float' combiner initializer
+// CHECK-NEXT: |-OMPDeclareReductionDecl {{.+}} <line:14:37> col:37 fun 'float' combiner initializer
 // CHECK-NEXT: | |-CompoundAssignOperator {{.+}} <col:45, col:56> 'float' lvalue '+=' ComputeLHSTy='float' ComputeResultTy='float'
 // CHECK-NEXT: | | |-DeclRefExpr {{.+}} <col:45> 'float' lvalue Var {{.+}} 'omp_out' 'float'
 // CHECK-NEXT: | | `-ImplicitCastExpr {{.+}} <col:56> 'float' <LValueToRValue>
@@ -42,27 +43,27 @@
   }
 };
 
-// CHECK:      |     `-OMPParallelForDirective {{.+}} <line:39:9, col:80>
+// CHECK:      |     `-OMPParallelForDirective {{.+}} {{<line:40:9, col:80>|<col:9, col:80>}}
 // CHECK-NEXT: |       |-OMPDefaultClause {{.+}} <col:26, col:40>
 // CHECK-NEXT: |       |-OMPPrivateClause {{.+}} <col:40, col:51>
 // CHECK-NEXT: |       | `-DeclRefExpr {{.+}} <col:48> 'int' lvalue OMPCapturedExpr {{.+}} 'a' 'int &'
 // CHECK-NEXT: |       |-OMPSharedClause {{.+}} <col:51, col:61>
 // CHECK-NEXT: |       | `-MemberExpr {{.+}} <col:58> 'int' lvalue ->b
-// CHECK-NEXT: |       |   `-CXXThisExpr {{.+}} <col:58> 'struct S *' this
+// CHECK-NEXT: |       |   `-CXXThisExpr {{.+}} <col:58> 'S *' this
 // CHECK-NEXT: |       |-OMPScheduleClause {{.+}} <col:61, col:79>
 // CHECK-NEXT: |       | `-ImplicitCastExpr {{.+}} <col:78> 'int' <LValueToRValue>
 // CHECK-NEXT: |       |   `-DeclRefExpr {{.+}} <col:78> 'int' lvalue OMPCapturedExpr {{.+}} '.capture_expr.' 'int'
-// CHECK-NEXT: |       |-CapturedStmt {{.+}} <line:40:5, line:41:9>
+// CHECK-NEXT: |       |-CapturedStmt {{.+}} <line:41:5, line:42:9>
 // CHECK-NEXT: |       | |-CapturedDecl {{.+}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT: |       | | |-ForStmt {{.+}} <line:40:5, line:41:9>
-// CHECK:      |       | | | `-UnaryOperator {{.+}} <line:41:7, col:9> 'int' lvalue prefix '++'
+// CHECK-NEXT: |       | | |-ForStmt {{.+}} <line:41:5, line:42:9>
+// CHECK:      |       | | | `-UnaryOperator {{.+}} <line:42:7, col:9> 'int' lvalue prefix '++'
 // CHECK-NEXT: |       | | |   `-DeclRefExpr {{.+}} <col:9> 'int' lvalue OMPCapturedExpr {{.+}} 'a' 'int &'
 
 #pragma omp declare simd
 #pragma omp declare simd inbranch
 void foo();
 
-// CHECK:      `-FunctionDecl {{.+}} <line:63:1, col:10> col:6 foo 'void (void)'
-// CHECK-NEXT:   |-OMPDeclareSimdDeclAttr {{.+}} <line:62:9, col:34> Implicit BS_Inbranch
-// CHECK:        `-OMPDeclareSimdDeclAttr {{.+}} <line:61:9, col:25> Implicit BS_Undefined
+// CHECK:      `-FunctionDecl {{.+}} <line:64:1, col:10> col:6 foo 'void ()'
+// CHECK-NEXT:   |-OMPDeclareSimdDeclAttr {{.+}} <line:63:9, col:34> Implicit BS_Inbranch
+// CHECK:        `-OMPDeclareSimdDeclAttr {{.+}} <line:62:9, col:25> Implicit BS_Undefined
 
diff --git a/test/OpenMP/flush_ast_print.cpp b/test/OpenMP/flush_ast_print.cpp
index f3af47b..8cde315 100644
--- a/test/OpenMP/flush_ast_print.cpp
+++ b/test/OpenMP/flush_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -16,7 +20,7 @@
   return a + argc;
 }
 // CHECK:      static T a;
-// CHECK-NEXT: #pragma omp flush
+// CHECK-NEXT: #pragma omp flush{{$}}
 // CHECK-NEXT: #pragma omp flush (a)
 // CHECK:      static int a;
 // CHECK-NEXT: #pragma omp flush
diff --git a/test/OpenMP/flush_codegen.cpp b/test/OpenMP/flush_codegen.cpp
index 89636a5..543524c 100644
--- a/test/OpenMP/flush_codegen.cpp
+++ b/test/OpenMP/flush_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -30,4 +35,6 @@
 // CHECK: call {{.*}}void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
 // CHECK: ret
 
+// CHECK-NOT: line: 0,
+
 #endif
diff --git a/test/OpenMP/flush_messages.cpp b/test/OpenMP/flush_messages.cpp
index 1c086a3..da1d0c2 100644
--- a/test/OpenMP/flush_messages.cpp
+++ b/test/OpenMP/flush_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 struct S1 { // expected-note 2 {{declared here}}
   int a;
 };
diff --git a/test/OpenMP/for_ast_print.cpp b/test/OpenMP/for_ast_print.cpp
index f489b21..82318ff 100644
--- a/test/OpenMP/for_ast_print.cpp
+++ b/test/OpenMP/for_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -165,7 +169,7 @@
     for (q = &buf[0]; q <= buf + 7; q++)
       foo();
   // CHECK: #pragma omp parallel
-  // CHECK-NEXT: #pragma omp for
+  // CHECK-NEXT: #pragma omp for{{$}}
   // CHECK-NEXT: for (p = buf; p < &buf[8]; p++)
   // CHECK-NEXT: for (q = &buf[0]; q <= buf + 7; q++)
   // CHECK-NEXT: foo();
diff --git a/test/OpenMP/for_codegen.cpp b/test/OpenMP/for_codegen.cpp
index 0d5972f..7c295a0 100644
--- a/test/OpenMP/for_codegen.cpp
+++ b/test/OpenMP/for_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -main-file-name for_codegen.cpp %s -o - -emit-llvm -fprofile-instrument=clang -fprofile-instrument-path=for_codegen-test.profraw | FileCheck %s --check-prefix=PROF-INSTR-PATH
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -main-file-name for_codegen.cpp %s -o - -emit-llvm -fprofile-instrument=clang -fprofile-instrument-path=for_codegen-test.profraw | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 //
 // expected-no-diagnostics
 #ifndef HEADER
@@ -16,6 +23,16 @@
 // CHECK-DAG: [[J:@.+]] = global i8 2,
 // CHECK-DAG: [[K:@.+]] = global i8 3,
 
+// CHECK-LABEL: loop_with_counter_collapse
+void loop_with_counter_collapse() {
+  // CHECK: call void @__kmpc_for_static_init_8(%ident_t* @
+  // CHECK: call void @__kmpc_for_static_fini(%ident_t* @
+  #pragma omp for collapse(2)
+  for (int i = 0; i < 4; i++) {
+    for (int j = i; j < 4; j++) {
+    }
+  }
+}
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
@@ -330,8 +347,8 @@
 // CHECK-LABEL: test_precond
 void test_precond() {
   // CHECK: [[A_ADDR:%.+]] = alloca i8,
-  // CHECK: [[CAP:%.+]] = alloca i8,
   // CHECK: [[I_ADDR:%.+]] = alloca i8,
+  // CHECK: [[CAP:%.+]] = alloca i8,
   char a = 0;
   // CHECK: store i8 0,
   // CHECK: store i32
@@ -355,7 +372,7 @@
 void parallel_for(float *a) {
 #pragma omp parallel
 #pragma omp for schedule(static, 5)
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_init_4u({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke i32 {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
@@ -376,7 +393,9 @@
 char i = 1, j = 2, k = 3;
 // CHECK-LABEL: for_with_global_lcv
 void for_with_global_lcv() {
+// CHECK: alloca i8,
 // CHECK: [[I_ADDR:%.+]] = alloca i8,
+// CHECK: alloca i8,
 // CHECK: [[J_ADDR:%.+]] = alloca i8,
 
 // CHECK: call void @__kmpc_for_static_init_4(
diff --git a/test/OpenMP/for_collapse_messages.cpp b/test/OpenMP/for_collapse_messages.cpp
index a6fdf00..230880b 100644
--- a/test/OpenMP/for_collapse_messages.cpp
+++ b/test/OpenMP/for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_firstprivate_codegen.cpp b/test/OpenMP/for_firstprivate_codegen.cpp
index 5907392..fc3caa9 100644
--- a/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/test/OpenMP/for_firstprivate_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -81,6 +88,7 @@
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[G1_PRIVATE_REF:%.+]] = alloca i{{[0-9]+}}*,
@@ -154,6 +162,7 @@
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: [[SIVAR2_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
@@ -218,6 +227,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
@@ -231,7 +241,7 @@
 
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@@ -272,42 +282,34 @@
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: [[TVAR:%.+]] = alloca i32,
-// CHECK: [[TVAR_CAST:%.+]] = alloca i64,
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: [[TVAR_VAL:%.+]] = load i32, i32* [[TVAR]],
-// CHECK: [[TVAR_CONV:%.+]] = bitcast i64* [[TVAR_CAST]] to i32*
-// CHECK: store i32 [[TVAR_VAL]], i32* [[TVAR_CONV]],
-// CHECK: [[PVT_CASTVAL:%[^,]+]] = load i64, i64* [[TVAR_CAST]],
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void  (i32*, i32*, ...)*), i64 [[PVT_CASTVAL]],
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void  (i32*, i32*, ...)*), i32* [[TVAR]],
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 {{.*}}%{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
 // Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-// CHECK: %{{.+}} = bitcast i64* [[T_VAR_PRIV]] to i32*
 
-// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
 // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
 // CHECK: [[VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 
-// firstprivate t_var(t_var)
-// CHECK-NOT: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
-
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
diff --git a/test/OpenMP/for_firstprivate_messages.cpp b/test/OpenMP/for_firstprivate_messages.cpp
index 60be4f5..c34c918 100644
--- a/test/OpenMP/for_firstprivate_messages.cpp
+++ b/test/OpenMP/for_firstprivate_messages.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
 
 void foo() {
 }
diff --git a/test/OpenMP/for_lastprivate_codegen.cpp b/test/OpenMP/for_lastprivate_codegen.cpp
index 47c24ed..0061de6 100644
--- a/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/test/OpenMP/for_lastprivate_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -213,6 +220,7 @@
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -407,6 +415,7 @@
 // BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: alloca i{{[0-9]+}},
+// BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -477,6 +486,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
@@ -513,7 +523,7 @@
 // original vec[]=private_vec[];
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // original s_arr[]=private_s_arr[];
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@@ -611,6 +621,7 @@
 // CHECK: ret void
 
 // CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: alloca i8,
 // CHECK: [[CNT_PRIV:%.+]] = alloca i8,
 
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
@@ -673,6 +684,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -740,7 +752,7 @@
 // original vec[]=private_vec[];
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // original s_arr[]=private_s_arr[];
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
diff --git a/test/OpenMP/for_lastprivate_messages.cpp b/test/OpenMP/for_lastprivate_messages.cpp
index 13ef35e..7787ab1 100644
--- a/test/OpenMP/for_lastprivate_messages.cpp
+++ b/test/OpenMP/for_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_linear_codegen.cpp b/test/OpenMP/for_linear_codegen.cpp
index 16a67c0..34e8c6a 100644
--- a/test/OpenMP/for_linear_codegen.cpp
+++ b/test/OpenMP/for_linear_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -157,6 +164,7 @@
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -181,6 +189,7 @@
 
     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
@@ -233,6 +242,7 @@
   for (int i = 0; i < 2; ++i) {
     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
@@ -307,6 +317,7 @@
 // BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: alloca i{{[0-9]+}},
+// BLOCKS: alloca i{{[0-9]+}},
 // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -351,13 +362,13 @@
 
 // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_START:%.+]] = alloca float*,
 // CHECK: [[LVAR_START:%.+]] = alloca i64,
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_PRIV:%.+]] = alloca float*,
 // CHECK: [[LVAR_PRIV:%.+]] = alloca i64,
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
@@ -419,6 +430,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
@@ -446,13 +458,13 @@
 
 // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_START:%.+]] = alloca i32*,
 // CHECK: [[LVAR_START:%.+]] = alloca i32,
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_PRIV:%.+]] = alloca i32*,
 // CHECK: [[LVAR_PRIV:%.+]] = alloca i32,
 // CHECK: [[LVAR_PRIV_REF:%.+]] = alloca i32*,
diff --git a/test/OpenMP/for_linear_messages.cpp b/test/OpenMP/for_linear_messages.cpp
index ab89349..622cd4a 100644
--- a/test/OpenMP/for_linear_messages.cpp
+++ b/test/OpenMP/for_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
diff --git a/test/OpenMP/for_loop_messages.cpp b/test/OpenMP/for_loop_messages.cpp
index 01cc602..37ff0fc 100644
--- a/test/OpenMP/for_loop_messages.cpp
+++ b/test/OpenMP/for_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
diff --git a/test/OpenMP/for_misc_messages.c b/test/OpenMP/for_misc_messages.c
index 0a7cdf8..901d66d 100644
--- a/test/OpenMP/for_misc_messages.c
+++ b/test/OpenMP/for_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp for'}}
 #pragma omp for
 
@@ -52,6 +54,20 @@
 #pragma omp for foo bar
   for (i = 0; i < 16; ++i)
     ;
+// At one time, this failed an assert.
+// expected-error@+1 {{unexpected OpenMP clause 'num_teams' in directive '#pragma omp for'}}
+#pragma omp for num_teams(3)
+  for (i = 0; i < 16; ++i)
+    ;
+// At one time, this error was reported twice.
+// expected-error@+1 {{unexpected OpenMP clause 'uniform' in directive '#pragma omp for'}}
+#pragma omp for uniform
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{unexpected OpenMP clause 'if' in directive '#pragma omp for'}}
+#pragma omp for if(0)
+  for (i = 0; i < 16; ++i)
+    ;
 }
 
 void test_non_identifiers() {
diff --git a/test/OpenMP/for_ordered_clause.cpp b/test/OpenMP/for_ordered_clause.cpp
index 3335f40..ec29a98 100644
--- a/test/OpenMP/for_ordered_clause.cpp
+++ b/test/OpenMP/for_ordered_clause.cpp
@@ -2,6 +2,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_private_codegen.cpp b/test/OpenMP/for_private_codegen.cpp
index f1416d7..d914a62 100644
--- a/test/OpenMP/for_private_codegen.cpp
+++ b/test/OpenMP/for_private_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -40,7 +47,7 @@
 int main() {
   static int svar;
 #ifdef LAMBDA
-  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[G:@.+]] = {{(dso_local )?}}global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
@@ -102,7 +109,7 @@
   }();
   return 0;
 #elif defined(BLOCKS)
-  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS: [[G:@.+]] = {{(dso_local )?}}global double
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
diff --git a/test/OpenMP/for_private_messages.cpp b/test/OpenMP/for_private_messages.cpp
index 4045c5b..5b0b562 100644
--- a/test/OpenMP/for_private_messages.cpp
+++ b/test/OpenMP/for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_reduction_codegen.cpp b/test/OpenMP/for_reduction_codegen.cpp
index d5afae9..323692a 100644
--- a/test/OpenMP/for_reduction_codegen.cpp
+++ b/test/OpenMP/for_reduction_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -27,7 +34,7 @@
 // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
-template <typename T>
+template <typename T, int length>
 T tmain() {
   T t;
   S<T> test;
@@ -36,6 +43,7 @@
   S<T> s_arr[] = {1, 2};
   S<T> &var = test;
   S<T> var1;
+  S<T> arr[length];
 #pragma omp parallel
 #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) nowait
   for (int i = 0; i < 2; ++i) {
@@ -48,6 +56,12 @@
     vec[i] = t_var;
     s_arr[i] = var;
   }
+#pragma omp parallel
+#pragma omp for reduction(+ : arr[1:length-2])
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
   return T();
 }
 
@@ -180,12 +194,12 @@
   S<float> test;
   float t_var = 0, t_var1;
   int vec[] = {1, 2};
-  S<float> s_arr[] = {1, 2};
+  S<float> s_arr[] = {1, 2, 3, 4};
   S<float> &var = test;
   S<float> var1, arrs[10][4];
   S<float> **var2 = foo();
-  S<float> vvar2[2];
-  S<float> (&var3)[2] = s_arr;
+  S<float> vvar2[5];
+  S<float> (&var3)[4] = s_arr;
 #pragma omp parallel
 #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
   for (int i = 0; i < 2; ++i) {
@@ -200,11 +214,28 @@
 #pragma omp for reduction(+:arr) reduction(&:arrs)
   for (int i = 0; i < 10; ++i)
     ++arr[1][i];
+  // arr is a VLA, but the array section has constant length so we can generate a constant sized array!
+#pragma omp parallel
+#pragma omp for reduction(+:arr[1][0:2])
+  for (int i = 0; i < 10; ++i)
+    ++arr[1][i];
 #pragma omp parallel
 #pragma omp for reduction(& : var2[0 : 5][1 : 6])
   for (int i = 0; i < 10; ++i)
     ;
 #pragma omp parallel
+#pragma omp for reduction(& : var2[1][1 : 6])
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp for reduction(& : var2[1 : 1][1 : 6])
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp for reduction(& : var2[1 : 1][1])
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
 #pragma omp for reduction(& : vvar2[0 : 5])
   for (int i = 0; i < 10; ++i)
     ;
@@ -213,28 +244,43 @@
   for (int i = 0; i < 10; ++i)
     ;
 #pragma omp parallel
+#pragma omp for reduction(& : var3[ : 2])
+  for (int i = 0; i < 10; ++i)
+    ;
+  // TODO: The compiler should also be able to generate a constant sized array in this case!
+#pragma omp parallel
+#pragma omp for reduction(& : var3[2 : ])
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
 #pragma omp for reduction(& : var3)
   for (int i = 0; i < 10; ++i)
     ;
-  return tmain<int>();
+  return tmain<int, 42>();
 #endif
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK2:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK3:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void
-// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 3, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*)* [[MAIN_MICROTASK3:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK4:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK5:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK6:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK7:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [5 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK8:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK9:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK10:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK11:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK12:@.+]] to void
+// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT_42:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
@@ -306,7 +352,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1 = var1.operator &&(var1_reduction);
 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]])
@@ -322,7 +368,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]],
@@ -360,7 +406,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // var1 = var1.operator &&(var1_reduction);
@@ -378,7 +424,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // t_var1 = min(t_var1, t_var1_reduction);
@@ -464,7 +510,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1_lhs = var1_lhs.operator &&(var1_rhs);
 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]])
@@ -480,7 +526,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]],
@@ -491,7 +537,7 @@
 // CHECK: store float [[UP]], float* [[T_VAR1_LHS]],
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}})
 
 // Reduction list for runtime.
 // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
@@ -583,7 +629,7 @@
 // CHECK: phi [[S_FLOAT_TY]]*
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
@@ -613,7 +659,7 @@
 // CHECK: call void @__kmpc_critical(
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
@@ -686,13 +732,13 @@
 // CHECK: phi [[S_FLOAT_TY]]*
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}})
 
 // CHECK: [[ARRS_PRIV:%.+]] = alloca [10 x [4 x [[S_FLOAT_TY]]]],
 
@@ -777,7 +823,7 @@
 // CHECK: phi [[S_FLOAT_TY]]*
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
@@ -807,7 +853,7 @@
 // CHECK: call void @__kmpc_critical(
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
@@ -876,13 +922,41 @@
 // CHECK: phi [[S_FLOAT_TY]]*
 // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false)
 // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* {{.*}} %{{.+}})
+
+// CHECK: [[VLA1_ORIG_ADDR:%.+]] = alloca i64
+// CHECK: [[VLA2_ORIG_ADDR:%.+]] = alloca i64
+// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca i32*,
+// CHECK: [[ARR_PRIV:%.+]] = alloca [1 x [2 x i32]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+// CHECK: [[VLA1:%.+]] = load i64, i64* [[VLA1_ORIG_ADDR]]
+// CHECK: [[VLA2:%.+]] = load i64, i64* [[VLA2_ORIG_ADDR]]
+// CHECK: [[ARR_ORIG:%.+]] = load i32*, i32** [[ARR_ORIG_ADDR]],
+
+// CHECK: [[LOW_OFFSET:%.+]] = mul nsw i64 1, [[VLA2]]
+// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds i32, i32* [[ARR_ORIG]], i64 [[LOW_OFFSET]]
+// CHECK: [[LOW:%.+]] = getelementptr inbounds i32, i32* [[ARRIDX]], i64 0
+
+// CHECK: [[START:%.+]] = ptrtoint i32* [[ARR_ORIG]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint i32* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [1 x [2 x i32]]* [[ARR_PRIV]] to i32*
+// CHECK: [[ARR_PRIV:%.+]] = getelementptr i32, i32* [[ARR_PRIV_PTR]], i64 [[OFFSET]]
+
+// CHECK: ret void
+
+// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}})
 
 // CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***,
 
@@ -910,67 +984,116 @@
 // CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]]
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}})
 
-// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***,
+// CHECK: [[VAR2_PRIV:%.+]] = alloca [1 x [6 x [[S_FLOAT_TY]]]],
 
 // Reduction list for runtime.
-// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-// CHECK: [[VVAR2_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]],
+// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]],
 
-// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
-// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
-// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
-// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
-// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
-// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
-// CHECK: call i8* @llvm.stacksave()
-// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
-// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]],
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
 // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
 // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
-// CHECK: [[PSEUDO_VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV]], i64 [[OFFSET]]
-// CHECK: [[VVAR2_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VVAR2_PRIV]] to [2 x [[S_FLOAT_TY]]]*
+// CHECK: [[VAR2_PRIV_PTR:%.+]] = bitcast [1 x [6 x [[S_FLOAT_TY]]]]* [[VAR2_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** %
+// CHECK: store [[S_FLOAT_TY]]* [[VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]]
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}})
 
-// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***,
+// CHECK: [[VAR2_PRIV:%.+]] = alloca [1 x [6 x [[S_FLOAT_TY]]]],
 
 // Reduction list for runtime.
-// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]],
 
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
-// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
-// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW:%.+]] to i64
-// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
-// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
-// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
-// CHECK: call i8* @llvm.stacksave()
-// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]],
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
 // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
 // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
-// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV]], i64 [[OFFSET]]
-// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [2 x [[S_FLOAT_TY]]]*
-
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %
-
+// CHECK: [[VAR2_PRIV_PTR:%.+]] = bitcast [1 x [6 x [[S_FLOAT_TY]]]]* [[VAR2_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** %
+// CHECK: store [[S_FLOAT_TY]]* [[VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]]
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK7]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}})
 
-// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***,
+// CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]],
+
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]],
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+
+// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
+// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[PSEUDO_VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV]], i64 [[OFFSET]]
+// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** %
+// CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]]
+// CHECK: ret void
+
+// CHECK: define internal void [[MAIN_MICROTASK8]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [5 x [[S_FLOAT_TY]]]* dereferenceable(20) %{{.+}})
+
+// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [5 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VVAR2_PRIV:%.+]] = alloca [5 x [[S_FLOAT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+// CHECK: [[VVAR2_ORIG:%.+]] = load [5 x [[S_FLOAT_TY]]]*, [5 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]],
+
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [5 x [[S_FLOAT_TY]]], [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
+// CHECK: [[ORIG_START:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[VVAR2_PRIV_PTR:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: ret void
+
+// CHECK: define internal void [[MAIN_MICROTASK9]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}})
+
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 
 // Reduction list for runtime.
@@ -978,21 +1101,117 @@
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
-// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %
-// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]*
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
 
 // CHECK: ret void
 
-// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: define internal void [[MAIN_MICROTASK10]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}})
+
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 0
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]*
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
+
+// CHECK: ret void
+
+// CHECK: define internal void [[MAIN_MICROTASK11]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}})
+
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+
+// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
+// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW:%.+]] to i64
+// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
+// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
+// CHECK: call i8* @llvm.stacksave()
+// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV]], i64 [[OFFSET]]
+// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]*
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
+
+// CHECK: ret void
+
+// CHECK: define internal void [[MAIN_MICROTASK12]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}})
+
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR3_PRIV:%.+]] = alloca [4 x [[S_FLOAT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
+// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
+// CHECK: bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+
+// CHECK: ret void
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT_42]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// Not interested in this one:
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [42 x [[S_INT_TY]]]*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK2:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
@@ -1002,6 +1221,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]],
@@ -1072,7 +1292,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1 = var1.operator &&(var1_reduction);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]])
@@ -1088,7 +1308,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]],
@@ -1114,7 +1334,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // var1 = var1.operator &&(var1_reduction);
@@ -1132,7 +1352,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // t_var1 = min(t_var1, t_var1_reduction);
@@ -1199,7 +1419,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1_lhs = var1_lhs.operator &&(var1_rhs);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]])
@@ -1215,7 +1435,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]],
@@ -1226,5 +1446,28 @@
 // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_LHS]],
 // CHECK: ret void
 
+// CHECK: define internal void [[TMAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [42 x [[S_INT_TY]]]* dereferenceable(168) %{{.*}}, [2 x i32]* dereferenceable(8) %{{.*}}, i32* dereferenceable(4) %{{.*}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.*}}, [[S_INT_TY]]* dereferenceable(4) %{{.*}})
+
+// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca [42 x [[S_INT_TY]]]*,
+// CHECK: [[ARR_PRIV:%.+]] = alloca [40 x [[S_INT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[ARR_ORIG:%.+]] = load [42 x [[S_INT_TY]]]*, [42 x [[S_INT_TY]]]** [[ARR_ORIG_ADDR]],
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [42 x [[S_INT_TY]]], [42 x [[S_INT_TY]]]* [[ARR_ORIG]], i64 0, i64 1
+// CHECK: [[ORIG_START:%.+]] = bitcast [42 x [[S_INT_TY]]]* [[ARR_ORIG]] to [[S_INT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_INT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_INT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [40 x [[S_INT_TY]]]* [[ARR_PRIV]] to [[S_INT_TY]]*
+// CHECK: [[PSEUDO_ARR_PRIV:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[ARR_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: [[ARR_PRIV:%.+]] = bitcast [[S_INT_TY]]* [[PSEUDO_ARR_PRIV]] to [42 x [[S_INT_TY]]]*
+
+// CHECK: ret void
+
 #endif
 
diff --git a/test/OpenMP/for_reduction_codegen_UDR.cpp b/test/OpenMP/for_reduction_codegen_UDR.cpp
index 95b04f4..94fcfa9 100644
--- a/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ b/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -40,7 +45,7 @@
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
 #pragma omp declare reduction(operator&& : int : omp_out = 111 & omp_in)
-template <typename T>
+template <typename T, int length>
 T tmain() {
   T t;
   S<T> test;
@@ -49,6 +54,7 @@
   S<T> s_arr[] = {1, 2};
   S<T> &var = test;
   S<T> var1;
+  S<T> arr[length];
 #pragma omp declare reduction(operator& : T : omp_out = 15 + omp_in)
 #pragma omp declare reduction(operator+ : T : omp_out = 1513 + omp_in) initializer(omp_priv = 321)
 #pragma omp declare reduction(min : T : omp_out = 47 - omp_in) initializer(omp_priv = 432 / omp_orig)
@@ -66,6 +72,12 @@
     vec[i] = t_var;
     s_arr[i] = var;
   }
+#pragma omp parallel
+#pragma omp for reduction(+ : arr[1:length-2])
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
   return T();
 }
 
@@ -78,12 +90,12 @@
   S<float> test;
   float t_var = 0, t_var1;
   int vec[] = {1, 2};
-  S<float> s_arr[] = {1, 2};
+  S<float> s_arr[] = {1, 2, 3, 4};
   S<float> &var = test;
   S<float> var1, arrs[10][4];
   S<float> **var2 = foo();
-  S<float> vvar2[2];
-  S<float>(&var3)[2] = s_arr;
+  S<float> vvar2[5];
+  S<float>(&var3)[4] = s_arr;
 #pragma omp declare reduction(operator+ : int : omp_out = 555 * omp_in) initializer(omp_priv = 888)
 #pragma omp parallel
 #pragma omp for reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1)
@@ -115,24 +127,24 @@
 #pragma omp for reduction(& : var3)
   for (int i = 0; i < 10; ++i)
     ;
-  return tmain<int>();
+  return tmain<int, 42>();
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK2:@.+]] to void
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK3:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void
-// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [5 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void
+// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT_42:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
@@ -300,7 +312,7 @@
 // CHECK: fadd float 5.550000e+02, %
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}})
 
 // Reduction list for runtime.
 // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
@@ -496,7 +508,7 @@
 
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}})
 
 // CHECK: [[ARRS_PRIV:%.+]] = alloca [10 x [4 x [[S_FLOAT_TY]]]],
 
@@ -711,67 +723,30 @@
 // CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]]
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [5 x [[S_FLOAT_TY]]]* dereferenceable(60) %{{.+}})
 
-// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [5 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VVAR2_PRIV:%.+]] = alloca [5 x [[S_FLOAT_TY]]],
 
 // Reduction list for runtime.
-// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-// CHECK: [[VVAR2_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]],
+// CHECK: [[VVAR2_ORIG:%.+]] = load [5 x [[S_FLOAT_TY]]]*, [5 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]],
 
-// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
-// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
-// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
-// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
-// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64)
-// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
-// CHECK: call i8* @llvm.stacksave()
-// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
-// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [5 x [[S_FLOAT_TY]]], [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
+// CHECK: [[ORIG_START:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
 // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
 // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64)
-// CHECK: [[PSEUDO_VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV]], i64 [[OFFSET]]
-// CHECK: [[VVAR2_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VVAR2_PRIV]] to [2 x [[S_FLOAT_TY]]]*
+// CHECK: [[VVAR2_PRIV_PTR:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV_PTR]], i64 [[OFFSET]]
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}})
 
-// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
-
-// Reduction list for runtime.
-// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
-
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
-// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
-// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW:%.+]] to i64
-// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
-// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64)
-// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
-// CHECK: call i8* @llvm.stacksave()
-// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
-// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
-// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
-// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
-// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64)
-// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV]], i64 [[OFFSET]]
-// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [2 x [[S_FLOAT_TY]]]*
-
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %
-
-// CHECK: ret void
-
-// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}})
-
-// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 
 // Reduction list for runtime.
@@ -779,21 +754,56 @@
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
-// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
-// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
-// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
-// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64)
+// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]*
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
 
 // CHECK: ret void
 
-// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}})
+
+// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR3_PRIV:%.+]] = alloca [4 x [[S_FLOAT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
+// CHECK: bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
+// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4
+
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
+
+// CHECK: ret void
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT_42]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// Not interested in this one:
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [42 x [[S_INT_TY]]]*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK2:@.+]] to void
+// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
@@ -803,6 +813,7 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]],
@@ -964,5 +975,28 @@
 // CHECK: sub nsw i32 47, %
 // CHECK: ret void
 
+// CHECK: define internal void [[TMAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [42 x [[S_INT_TY]]]* dereferenceable(504) %{{.*}}, [2 x i32]* dereferenceable(8) %{{.*}}, i32* dereferenceable(4) %{{.*}}, [2 x [[S_INT_TY]]]* dereferenceable(24) %{{.*}}, [[S_INT_TY]]* dereferenceable(12) %{{.*}})
+
+// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca [42 x [[S_INT_TY]]]*,
+// CHECK: [[ARR_PRIV:%.+]] = alloca [40 x [[S_INT_TY]]],
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[ARR_ORIG:%.+]] = load [42 x [[S_INT_TY]]]*, [42 x [[S_INT_TY]]]** [[ARR_ORIG_ADDR]],
+// CHECK: [[LOW:%.+]] = getelementptr inbounds [42 x [[S_INT_TY]]], [42 x [[S_INT_TY]]]* [[ARR_ORIG]], i64 0, i64 1
+// CHECK: [[ORIG_START:%.+]] = bitcast [42 x [[S_INT_TY]]]* [[ARR_ORIG]] to [[S_INT_TY]]*
+// CHECK: [[START:%.+]] = ptrtoint [[S_INT_TY]]* [[ORIG_START]] to i64
+// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_INT_TY]]* [[LOW]] to i64
+// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
+// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_INT_TY]]* getelementptr ([[S_INT_TY]], [[S_INT_TY]]* null, i32 1) to i64)
+// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [40 x [[S_INT_TY]]]* [[ARR_PRIV]] to [[S_INT_TY]]*
+// CHECK: [[PSEUDO_ARR_PRIV:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[ARR_PRIV_PTR]], i64 [[OFFSET]]
+// CHECK: [[ARR_PRIV:%.+]] = bitcast [[S_INT_TY]]* [[PSEUDO_ARR_PRIV]] to [42 x [[S_INT_TY]]]*
+
+// CHECK: ret void
+
 #endif
 
diff --git a/test/OpenMP/for_reduction_messages.cpp b/test/OpenMP/for_reduction_messages.cpp
index d4f4a79..a4f15aa 100644
--- a/test/OpenMP/for_reduction_messages.cpp
+++ b/test/OpenMP/for_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_schedule_messages.cpp b/test/OpenMP/for_schedule_messages.cpp
index 8946ce3..f4b5b3a 100644
--- a/test/OpenMP/for_schedule_messages.cpp
+++ b/test/OpenMP/for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_aligned_messages.cpp b/test/OpenMP/for_simd_aligned_messages.cpp
index 3ff2587..fce9aca 100644
--- a/test/OpenMP/for_simd_aligned_messages.cpp
+++ b/test/OpenMP/for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/for_simd_ast_print.cpp b/test/OpenMP/for_simd_ast_print.cpp
index befad16..13e82b4 100644
--- a/test/OpenMP/for_simd_ast_print.cpp
+++ b/test/OpenMP/for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -33,7 +37,7 @@
   }
 };
 
-// CHECK: #pragma omp for simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp for simd private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp for simd private(this->a) private(this->a)
 // CHECK: #pragma omp for simd private(this->a) private(this->a) private(this->S1::a)
 
diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp
index e33bfe4..3cd06d4 100644
--- a/test/OpenMP/for_simd_codegen.cpp
+++ b/test/OpenMP/for_simd_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t < %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify -emit-llvm -o - < %s | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm -o - < %s | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - < %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t < %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify -emit-llvm -o - < %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm -o - < %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -673,7 +679,7 @@
 void parallel_simd(float *a) {
 #pragma omp parallel
 #pragma omp for simd
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     invoke i32 {{.*}}bar{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
@@ -684,4 +690,5 @@
     a[i] += bar();
 }
 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
+// TERM_DEBUG-NOT: line: 0,
 #endif // HEADER
diff --git a/test/OpenMP/for_simd_collapse_messages.cpp b/test/OpenMP/for_simd_collapse_messages.cpp
index 2efd494..9e9b8ee 100644
--- a/test/OpenMP/for_simd_collapse_messages.cpp
+++ b/test/OpenMP/for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_firstprivate_messages.cpp b/test/OpenMP/for_simd_firstprivate_messages.cpp
index 4e96866..ceacdb6 100644
--- a/test/OpenMP/for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/for_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_lastprivate_messages.cpp b/test/OpenMP/for_simd_lastprivate_messages.cpp
index a176a2d..9ed2232 100644
--- a/test/OpenMP/for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/for_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_linear_messages.cpp b/test/OpenMP/for_simd_linear_messages.cpp
index 3f93125..ff522e7 100644
--- a/test/OpenMP/for_simd_linear_messages.cpp
+++ b/test/OpenMP/for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
diff --git a/test/OpenMP/for_simd_loop_messages.cpp b/test/OpenMP/for_simd_loop_messages.cpp
index ea17a83..a3ae84e 100644
--- a/test/OpenMP/for_simd_loop_messages.cpp
+++ b/test/OpenMP/for_simd_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
diff --git a/test/OpenMP/for_simd_misc_messages.c b/test/OpenMP/for_simd_misc_messages.c
index ca1e366..4da2e6b 100644
--- a/test/OpenMP/for_simd_misc_messages.c
+++ b/test/OpenMP/for_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp for simd'}}
 #pragma omp for simd
 
diff --git a/test/OpenMP/for_simd_private_messages.cpp b/test/OpenMP/for_simd_private_messages.cpp
index ca4c3a3..8fe6d7b 100644
--- a/test/OpenMP/for_simd_private_messages.cpp
+++ b/test/OpenMP/for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_reduction_messages.cpp b/test/OpenMP/for_simd_reduction_messages.cpp
index a77ab75..1f5578e 100644
--- a/test/OpenMP/for_simd_reduction_messages.cpp
+++ b/test/OpenMP/for_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_safelen_messages.cpp b/test/OpenMP/for_simd_safelen_messages.cpp
index fa1b444..31b0f84 100644
--- a/test/OpenMP/for_simd_safelen_messages.cpp
+++ b/test/OpenMP/for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_schedule_messages.cpp b/test/OpenMP/for_simd_schedule_messages.cpp
index 76ba3a4..b330f8c 100644
--- a/test/OpenMP/for_simd_schedule_messages.cpp
+++ b/test/OpenMP/for_simd_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/for_simd_simdlen_messages.cpp b/test/OpenMP/for_simd_simdlen_messages.cpp
index 8fe1979..0995428 100644
--- a/test/OpenMP/for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/for_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/function-attr.cpp b/test/OpenMP/function-attr.cpp
index 87aba40..a370cc3 100644
--- a/test/OpenMP/function-attr.cpp
+++ b/test/OpenMP/function-attr.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10  -stack-protector 2 -emit-llvm -o - %s | FileCheck %s
 
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10  -stack-protector 2 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Check that function attributes are added to the OpenMP runtime functions.
 
 template <class T>
diff --git a/test/OpenMP/is_initial_device.c b/test/OpenMP/is_initial_device.c
index 0521f0b..2fe93a4 100644
--- a/test/OpenMP/is_initial_device.c
+++ b/test/OpenMP/is_initial_device.c
@@ -7,6 +7,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -emit-llvm -fopenmp-is-device \
 // RUN:             %s -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefixes DEVICE,OUTLINED
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x ir -triple powerpc64le-unknown-unknown -emit-llvm %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple powerpc64le-unknown-unknown -emit-llvm -fopenmp-is-device %s -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 int check() {
   int host = omp_is_initial_device();
diff --git a/test/OpenMP/linking.c b/test/OpenMP/linking.c
index 71e978f..802553c 100644
--- a/test/OpenMP/linking.c
+++ b/test/OpenMP/linking.c
@@ -20,9 +20,19 @@
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libgomp -target i386-unknown-linux -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-GOMP-LD-32 %s
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 -fopenmp-simd -target i386-unknown-linux -rtlib=platform | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: lgomp
+// SIMD-ONLY2-NOT: lomp
+// SIMD-ONLY2-NOT: liomp
 // CHECK-GOMP-LD-32: "{{.*}}ld{{(.exe)?}}"
 // CHECK-GOMP-LD-32: "-lgomp" "-lrt"
 // CHECK-GOMP-LD-32: "-lpthread" "-lc"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 -fopenmp-simd -target i386-unknown-linux -rtlib=platform | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: lgomp
+// SIMD-ONLY2-NOT: lomp
+// SIMD-ONLY2-NOT: liomp
 //
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libgomp -target x86_64-unknown-linux -rtlib=platform \
@@ -79,10 +89,20 @@
 // CHECK-MSVC-LINK-64-SAME: -nodefaultlib:vcompd.lib
 // CHECK-MSVC-LINK-64-SAME: -libpath:{{.+}}/../lib
 // CHECK-MSVC-LINK-64-SAME: -defaultlib:libomp.lib
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 -fopenmp-simd -target x86_64-msvc-win32 -rtlib=platform | FileCheck --check-prefix SIMD-ONLY11 %s
+// SIMD-ONLY11-NOT: libiomp
+// SIMD-ONLY11-NOT: libomp
+// SIMD-ONLY11-NOT: libgomp
 //
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libiomp5 -target x86_64-msvc-win32 -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 -fopenmp-simd -target x86_64-msvc-win32 -rtlib=platform | FileCheck --check-prefix SIMD-ONLY11 %s
+// SIMD-ONLY11-NOT: libiomp
+// SIMD-ONLY11-NOT: libomp
+// SIMD-ONLY11-NOT: libgomp
 // CHECK-MSVC-ILINK-64: link.exe
 // CHECK-MSVC-ILINK-64-SAME: -nodefaultlib:vcomp.lib
 // CHECK-MSVC-ILINK-64-SAME: -nodefaultlib:vcompd.lib
diff --git a/test/OpenMP/loops_explicit_clauses_codegen.cpp b/test/OpenMP/loops_explicit_clauses_codegen.cpp
index dc21fd1..43fbd56 100644
--- a/test/OpenMP/loops_explicit_clauses_codegen.cpp
+++ b/test/OpenMP/loops_explicit_clauses_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 
diff --git a/test/OpenMP/master_ast_print.cpp b/test/OpenMP/master_ast_print.cpp
index a36dd02..40b14c5 100644
--- a/test/OpenMP/master_ast_print.cpp
+++ b/test/OpenMP/master_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -21,7 +25,7 @@
 }
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: {
-// CHECK-NEXT: #pragma omp master
+// CHECK-NEXT: #pragma omp master{{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
diff --git a/test/OpenMP/master_codegen.cpp b/test/OpenMP/master_codegen.cpp
index d61b476..d5b8036 100644
--- a/test/OpenMP/master_codegen.cpp
+++ b/test/OpenMP/master_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -49,7 +55,7 @@
 void parallel_master() {
 #pragma omp parallel
 #pragma omp master
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call i32 @__kmpc_master({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
diff --git a/test/OpenMP/master_messages.cpp b/test/OpenMP/master_messages.cpp
index 397f139..26d6bc8 100644
--- a/test/OpenMP/master_messages.cpp
+++ b/test/OpenMP/master_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 int foo();
 
 int main() {
diff --git a/test/OpenMP/nesting_of_regions.cpp b/test/OpenMP/nesting_of_regions.cpp
index 6282de4..0955ee2 100644
--- a/test/OpenMP/nesting_of_regions.cpp
+++ b/test/OpenMP/nesting_of_regions.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 void bar();
 
 template <class T>
diff --git a/test/OpenMP/no_option.c b/test/OpenMP/no_option.c
index 8a65b03..b9bc208 100644
--- a/test/OpenMP/no_option.c
+++ b/test/OpenMP/no_option.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -verify -o - %s
+
+// RUN: %clang_cc1 -verify -o - %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 int a;
diff --git a/test/OpenMP/no_option_no_warn.c b/test/OpenMP/no_option_no_warn.c
index 10778c0..8755638 100644
--- a/test/OpenMP/no_option_no_warn.c
+++ b/test/OpenMP/no_option_no_warn.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -verify -Wno-source-uses-openmp -o - %s
+
+// RUN: %clang_cc1 -verify -Wno-source-uses-openmp -o - %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 int a;
diff --git a/test/OpenMP/nvptx_data_sharing.cpp b/test/OpenMP/nvptx_data_sharing.cpp
new file mode 100644
index 0000000..53bac3c
--- /dev/null
+++ b/test/OpenMP/nvptx_data_sharing.cpp
@@ -0,0 +1,57 @@
+// Test device data sharing codegen.
+///==========================================================================///
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void test_ds(){
+  #pragma omp target
+  {
+    int a = 10;
+    #pragma omp parallel
+    {
+       a = 1000;
+    }
+  }
+}
+
+/// ========= In the worker function ========= ///
+
+// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() [[ATTR1:#.*]] {
+// CK1: [[SHAREDARGS:%.+]] = alloca i8**
+// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]], i16 1)
+// CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]]
+// CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]])
+
+/// ========= In the kernel function ========= ///
+
+// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() [[ATTR2:#.*]] {
+// CK1: [[SHAREDARGS1:%.+]] = alloca i8**
+// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1, i16 1)
+// CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
+// CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]]
+// CK1: [[SHAREDVAR:%.+]] = bitcast i32* {{.*}} to i8*
+// CK1: store i8* [[SHAREDVAR]], i8** [[SHARGSTMP2]]
+
+/// ========= In the data sharing wrapper function ========= ///
+
+// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) [[ATTR1]] {
+// CK1: [[SHAREDARGS2:%.+]] = alloca i8**
+// CK1: store i8** %2, i8*** [[SHAREDARGS2]]
+// CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
+// CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]]
+// CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32**
+// CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]]
+// CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]])
+
+/// ========= Attributes ========= ///
+
+// CK1-NOT: attributes [[ATTR1]] = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+// CK1: attributes [[ATTR2]] = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+
+#endif
diff --git a/test/OpenMP/nvptx_parallel_codegen.cpp b/test/OpenMP/nvptx_parallel_codegen.cpp
index 224f245..ba889bf 100644
--- a/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -78,7 +78,7 @@
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @llvm.nvvm.barrier0()
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]],
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -92,20 +92,20 @@
   //
   // CHECK: [[EXEC_PARALLEL]]
   // CHECK: [[WF1:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
-  // CHECK: [[WM1:%.+]] = icmp eq i8* [[WF1]], bitcast (void (i32*, i32*)* [[PARALLEL_FN1:@.+]] to i8*)
+  // CHECK: [[WM1:%.+]] = icmp eq i8* [[WF1]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN1:@.+]]_wrapper to i8*)
   // CHECK: br i1 [[WM1]], label {{%?}}[[EXEC_PFN1:.+]], label {{%?}}[[CHECK_NEXT1:.+]]
   //
   // CHECK: [[EXEC_PFN1]]
-  // CHECK: call void [[PARALLEL_FN1]](
+  // CHECK: call void [[PARALLEL_FN1]]_wrapper(
   // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
   //
   // CHECK: [[CHECK_NEXT1]]
   // CHECK: [[WF2:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
-  // CHECK: [[WM2:%.+]] = icmp eq i8* [[WF2]], bitcast (void (i32*, i32*)* [[PARALLEL_FN2:@.+]] to i8*)
+  // CHECK: [[WM2:%.+]] = icmp eq i8* [[WF2]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN2:@.+]]_wrapper to i8*)
   // CHECK: br i1 [[WM2]], label {{%?}}[[EXEC_PFN2:.+]], label {{%?}}[[CHECK_NEXT2:.+]]
   //
   // CHECK: [[EXEC_PFN2]]
-  // CHECK: call void [[PARALLEL_FN2]](
+  // CHECK: call void [[PARALLEL_FN2]]_wrapper(
   // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
   //
   // CHECK: [[CHECK_NEXT2]]
@@ -152,13 +152,13 @@
   // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
   // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
   // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
-  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN1]] to i8*))
+  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN1]]_wrapper to i8*),
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: call void @__kmpc_serialized_parallel(
   // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]](
   // CHECK: call void @__kmpc_end_serialized_parallel(
-  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN2]] to i8*))
+  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN2]]_wrapper to i8*),
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK-64-DAG: load i32, i32* [[REF_A]]
@@ -166,7 +166,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -203,7 +203,7 @@
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @llvm.nvvm.barrier0()
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]],
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -217,11 +217,11 @@
   //
   // CHECK: [[EXEC_PARALLEL]]
   // CHECK: [[WF:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
-  // CHECK: [[WM:%.+]] = icmp eq i8* [[WF]], bitcast (void (i32*, i32*)* [[PARALLEL_FN4:@.+]] to i8*)
+  // CHECK: [[WM:%.+]] = icmp eq i8* [[WF]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN4:@.+]]_wrapper to i8*)
   // CHECK: br i1 [[WM]], label {{%?}}[[EXEC_PFN:.+]], label {{%?}}[[CHECK_NEXT:.+]]
   //
   // CHECK: [[EXEC_PFN]]
-  // CHECK: call void [[PARALLEL_FN4]](
+  // CHECK: call void [[PARALLEL_FN4]]_wrapper(
   // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
   //
   // CHECK: [[CHECK_NEXT]]
@@ -283,7 +283,7 @@
   // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
   //
   // CHECK: [[IF_THEN]]
-  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN4]] to i8*))
+  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN4]]_wrapper to i8*),
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[IF_END:.+]]
@@ -303,7 +303,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp
index d7ce7dc..23b40e1 100644
--- a/test/OpenMP/nvptx_target_codegen.cpp
+++ b/test/OpenMP/nvptx_target_codegen.cpp
@@ -91,7 +91,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -168,7 +168,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -278,7 +278,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -441,7 +441,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -531,7 +531,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -616,7 +616,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
diff --git a/test/OpenMP/nvptx_target_cuda_mode_messages.cpp b/test/OpenMP/nvptx_target_cuda_mode_messages.cpp
new file mode 100644
index 0000000..eecc26c
--- /dev/null
+++ b/test/OpenMP/nvptx_target_cuda_mode_messages.cpp
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-cuda-mode -fopenmp-host-ir-file-path %t-ppc-host.bc -o -
+
+template <typename tx, typename ty>
+struct TT {
+  tx X;
+  ty Y;
+};
+
+int foo(int n, double *ptr) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  double c[5][10];
+  TT<long long, char> d;
+
+#pragma omp target firstprivate(a) map(tofrom: b) // expected-note 2 {{defined as threadprivate or thread local}}
+  {
+    int c;                               // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(a, b, c, aa) // expected-error 3 {{threadprivate or thread local variable cannot be shared}}
+    b[a] = a;
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i) // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(i) // expected-error {{threadprivate or thread local variable cannot be shared}}
+    ++i;
+  }
+
+#pragma omp target map(aa, b, c, d)
+  {
+    int e;                         // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel private(b, e) // expected-error {{threadprivate or thread local variable cannot be private}}
+    {
+      aa += 1;
+      b[2] = 1.0;
+      c[1][2] = 1.0;
+      d.X = 1;
+      d.Y = 1;
+    }
+  }
+
+#pragma omp target private(ptr)
+  {
+    ptr[0]++;
+  }
+
+  return a;
+}
+
+template <typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  tx b[10];
+
+#pragma omp target reduction(+ \
+                             : a, b) // expected-note {{defined as threadprivate or thread local}}
+  {
+    int e;                        // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(a, e) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+    a += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static int fstatic(int n) {
+  int a = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target firstprivate(a, aaa, b)
+  {
+    a += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n) {
+    int b = n + 1;
+
+#pragma omp target firstprivate(b) // expected-note {{defined as threadprivate or thread local}}
+    {
+      int c;                      // expected-note {{defined as threadprivate or thread local}}
+#pragma omp parallel shared(b, c) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+      this->a = (double)b + 1.5;
+    }
+
+    return (int)b;
+  }
+};
+
+int bar(int n, double *ptr) {
+  int a = 0;
+  a += foo(n, ptr);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n); // expected-note {{in instantiation of function template specialization 'ftemplate<int>' requested here}}
+
+  return a;
+}
+
diff --git a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
index f750be0..2ca2073 100644
--- a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
+++ b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
@@ -72,17 +72,17 @@
   //  firstprivate(b): memcpy(b_priv,b_in)
   // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8*
   // TCHECK:  [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}})
 
   // firstprivate(c)
   // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8*
   // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}})
 
   // firstprivate(d)
   // TCHECK:  [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8*
   // TCHECK:  [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}})
 
   // TCHECK: load i16, i16* [[A2_ADDR]],
 
@@ -151,7 +151,7 @@
 // firstprivate(b)
 // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
 // TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
-// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}})
 
 // TCHECK:  ret void
 
@@ -214,8 +214,14 @@
 // firstprivate(b)
 // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
 // TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
-// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}})
 
 // TCHECK: ret void
 
 #endif
+
+// TCHECK-DAG: distinct !DISubprogram(linkageName: "__omp_offloading_{{.+}}_worker",
+// TCHECK-DAG: distinct !DISubprogram(linkageName: "__omp_offloading_{{.+}}_worker",
+// TCHECK-DAG: distinct !DISubprogram(linkageName: "__omp_offloading_{{.+}}_worker",
+// TCHECK-DAG: distinct !DISubprogram(linkageName: "__omp_offloading_{{.+}}_worker",
+// TCHECK-DAG: distinct !DISubprogram(linkageName: "__omp_offloading_{{.+}}_worker",
diff --git a/test/OpenMP/nvptx_target_parallel_codegen.cpp b/test/OpenMP/nvptx_target_parallel_codegen.cpp
index 7d16624..64d195c 100644
--- a/test/OpenMP/nvptx_target_parallel_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_codegen.cpp
@@ -1,9 +1,9 @@
 // Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -62,7 +62,7 @@
   // CHECK: br label {{%?}}[[EXEC:.+]]
   //
   // CHECK: [[EXEC]]
-  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
+  // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
@@ -104,7 +104,7 @@
   // CHECK: br label {{%?}}[[EXEC:.+]]
   //
   // CHECK: [[EXEC]]
-  // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
+  // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
diff --git a/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
index bc423c1..73d3bf8 100644
--- a/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -1,9 +1,9 @@
 // Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -51,7 +51,7 @@
   //
   // CHECK: [[EXEC]]
   // CHECK-NOT: call void @__kmpc_push_num_threads
-  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
+  // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
@@ -94,7 +94,7 @@
   //
   // CHECK: [[EXEC]]
   // CHECK-NOT: call void @__kmpc_push_num_threads
-  // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
+  // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
diff --git a/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
index 91c6de1..eb166b7 100644
--- a/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -1,9 +1,9 @@
 // Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -52,7 +52,7 @@
   //
   // CHECK: [[EXEC]]
   // CHECK-NOT: call void @__kmpc_push_proc_bind
-  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+  // CHECK: {{call|invoke}} void [[OP1:@.+]](
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
@@ -73,7 +73,7 @@
   //
   // CHECK: [[EXEC]]
   // CHECK-NOT: call void @__kmpc_push_proc_bind
-  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+  // CHECK: {{call|invoke}} void [[OP1:@.+]](
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
@@ -93,7 +93,7 @@
   //
   // CHECK: [[EXEC]]
   // CHECK-NOT: call void @__kmpc_push_proc_bind
-  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+  // CHECK: {{call|invoke}} void [[OP1:@.+]](
   // CHECK: br label {{%?}}[[DONE:.+]]
   //
   // CHECK: [[DONE]]
diff --git a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index c4c3e97..b12801e 100644
--- a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -1,9 +1,9 @@
 // Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -168,9 +168,9 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
@@ -405,9 +405,9 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
@@ -714,18 +714,18 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
   //
   // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
diff --git a/test/OpenMP/nvptx_target_printf_codegen.c b/test/OpenMP/nvptx_target_printf_codegen.c
index 0de6885..9f57d9e 100644
--- a/test/OpenMP/nvptx_target_printf_codegen.c
+++ b/test/OpenMP/nvptx_target_printf_codegen.c
@@ -3,12 +3,8 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-
-#include <stdarg.h>
-
 // expected-no-diagnostics
 extern int printf(const char *, ...);
-extern int vprintf(const char *, va_list);
 
 // Check a simple call to printf end-to-end.
 // CHECK: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double }
diff --git a/test/OpenMP/nvptx_target_simd_codegen.cpp b/test/OpenMP/nvptx_target_simd_codegen.cpp
new file mode 100644
index 0000000..9bb7617
--- /dev/null
+++ b/test/OpenMP/nvptx_target_simd_codegen.cpp
@@ -0,0 +1,74 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0
+
+#define N 1000
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a[N];
+  short aa[N];
+  tx b[10];
+  
+  #pragma omp target simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 1;
+  }
+
+  #pragma omp target simd
+  for(int i = 0; i < n; i++) {  
+    aa[i] += 1;
+  }
+
+  #pragma omp target simd
+  for(int i = 0; i < 10; i++) {
+    b[i] += 1;
+  }
+
+  return a[0];
+}
+
+int bar(int n){
+  int a = 0;
+
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/nvptx_target_teams_codegen.cpp b/test/OpenMP/nvptx_target_teams_codegen.cpp
index e823eab..ff5967b 100644
--- a/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -60,7 +60,7 @@
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @llvm.nvvm.barrier0()
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1)
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -125,7 +125,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
@@ -146,7 +146,7 @@
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @llvm.nvvm.barrier0()
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1)
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -211,7 +211,7 @@
   // CHECK: br label {{%?}}[[TERMINATE:.+]]
   //
   // CHECK: [[TERMINATE]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
   // CHECK: call void @llvm.nvvm.barrier0()
   // CHECK: br label {{%?}}[[EXIT]]
   //
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
new file mode 100644
index 0000000..fc3f253
--- /dev/null
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -0,0 +1,123 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a[N];
+  short aa[N];
+  tx b[10];
+  tx c[M][M];  
+  tx f = n;
+  tx l;
+  int k;
+
+#pragma omp target teams distribute parallel for lastprivate(l) dist_schedule(static,128) schedule(static,32)
+  for(int i = 0; i < n; i++) {
+    a[i] = 1;
+    l = i;
+  }
+
+  #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
+  for(int i = 0; i < n; i++) {
+    aa[i] += 1;
+  }
+
+#pragma omp target teams distribute parallel for map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
+  for(int i = 0; i < 10; i++) {
+    b[i] += 1;
+  }
+
+#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M)
+  for(int i = 0; i < M; i++) {
+    for(int j = 0; j < M; j++) {
+      k = M;
+      c[i][j] = i+j*f+k;      
+    }
+  }
+
+  return a[0];
+}
+
+int bar(int n){
+  int a = 0;
+
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL1]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL2]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL3]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL4]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
new file mode 100644
index 0000000..c508bc9
--- /dev/null
+++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -0,0 +1,123 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a[N];
+  short aa[N];
+  tx b[10];
+  tx c[M][M];  
+  tx f = n;
+  tx l;
+  int k;
+
+#pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32)
+  for(int i = 0; i < n; i++) {
+    a[i] = 1;
+    l = i;
+  }
+
+  #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
+  for(int i = 0; i < n; i++) {
+    aa[i] += 1;
+  }
+
+#pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
+  for(int i = 0; i < 10; i++) {
+    b[i] += 1;
+  }
+
+#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M)
+  for(int i = 0; i < M; i++) {
+    for(int j = 0; j < M; j++) {
+      k = M;
+      c[i][j] = i+j*f+k;      
+    }
+  }
+
+  return a[0];
+}
+
+int bar(int n){
+  int a = 0;
+
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL1]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL2]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL3]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL4]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
new file mode 100644
index 0000000..a78a01a
--- /dev/null
+++ b/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
@@ -0,0 +1,99 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a[N];
+  short aa[N];
+  tx b[10];
+  tx c[M][M];  
+  tx f = n;
+  tx l;
+  int k;
+
+#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)
+  for(int i = 0; i < n; i++) {
+    a[i] = 1;
+    l = i;
+  }
+
+  #pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)
+  for(int i = 0; i < n; i++) {
+    aa[i] += 1;
+  }
+
+#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)
+  for(int i = 0; i < 10; i++) {
+    b[i] += 1;
+  }
+
+#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)
+  for(int i = 0; i < M; i++) {
+    for(int j = 0; j < M; j++) {
+      k = M;
+      c[i][j] = i+j*f+k;      
+    }
+  }
+
+  return a[0];
+}
+
+int bar(int n){
+  int a = 0;
+
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index ae129eb..696940b 100644
--- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -84,7 +84,7 @@
   // CHECK: br label %[[EXIT]]
   //
   // CHECK: [[EXIT]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
 
   //
   // Reduction function
@@ -168,9 +168,9 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
@@ -249,9 +249,9 @@
   // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
   // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
-  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
   // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align
+  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
   // CHECK: store double [[ELT_VAL]], double* [[SCRATCHPAD_ELT_PTR]], align
   //
   // CHECK: ret
@@ -298,25 +298,15 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
   // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
   // CHECK: br label {{%?}}[[REDUCE_CONT]]
   //
   // CHECK: [[REDUCE_CONT]]
   // CHECK: ret
 
-
-
-
-
-
-
-
-
-
-
   // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}_worker()
 
   // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l33]](
@@ -360,7 +350,7 @@
   // CHECK: br label %[[EXIT]]
   //
   // CHECK: [[EXIT]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
 
   //
   // Reduction function
@@ -480,9 +470,9 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
@@ -617,9 +607,9 @@
   // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
   // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
-  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
   // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align
+  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
   // CHECK: store float [[ELT_VAL]], float* [[SCRATCHPAD_ELT_PTR]], align
   //
   // CHECK: ret
@@ -690,24 +680,15 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
   // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
   // CHECK: br label {{%?}}[[REDUCE_CONT]]
   //
   // CHECK: [[REDUCE_CONT]]
   // CHECK: ret
 
-
-
-
-
-
-
-
-
-
   // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l40}}_worker()
 
   // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+template.+l40]](
@@ -776,7 +757,7 @@
   // CHECK: br label %[[EXIT]]
   //
   // CHECK: [[EXIT]]
-  // CHECK: call void @__kmpc_kernel_deinit()
+  // CHECK: call void @__kmpc_kernel_deinit(
 
   //
   // Reduction function
@@ -903,18 +884,18 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
   //
   // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
   // CHECK: br label {{%?}}[[COPY_CONT:.+]]
   //
@@ -1035,9 +1016,9 @@
   // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
   // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
-  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
   // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
+  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
   // CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align
   //
   // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
@@ -1053,9 +1034,9 @@
   // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
   // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
-  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
   // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
+  // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
   // CHECK: store i16 [[ELT_VAL]], i16* [[SCRATCHPAD_ELT_PTR]], align
   //
   // CHECK: ret
@@ -1121,18 +1102,18 @@
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
   // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
   //
   // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
-  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
   // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+  // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
   // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
   // CHECK: br label {{%?}}[[REDUCE_CONT]]
   //
diff --git a/test/OpenMP/openmp_check.cpp b/test/OpenMP/openmp_check.cpp
index d46213a..cd4706b 100644
--- a/test/OpenMP/openmp_check.cpp
+++ b/test/OpenMP/openmp_check.cpp
@@ -2,7 +2,16 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+#define p _Pragma("omp parallel")
+
 int nested(int a) {
+#pragma omp parallel p // expected-error {{unexpected OpenMP directive}}
+  ++a;
 #pragma omp parallel
   ++a;
 
@@ -11,8 +20,6 @@
   // expected-warning@-2 {{'auto' type specifier is a C++11 extension}}
   // expected-error@-3 {{expected expression}}
   // expected-error@-4 {{expected ';' at end of declaration}}
-#else
-  // expected-no-diagnostics
 #endif
 
 #pragma omp parallel
diff --git a/test/OpenMP/openmp_common.c b/test/OpenMP/openmp_common.c
index ed55796..feeefc4 100644
--- a/test/OpenMP/openmp_common.c
+++ b/test/OpenMP/openmp_common.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 #pragma omp // expected-error {{expected an OpenMP directive}}
 #pragma omp unknown_directive // expected-error {{expected an OpenMP directive}}
 
diff --git a/test/OpenMP/openmp_offload_codegen.cpp b/test/OpenMP/openmp_offload_codegen.cpp
index f89bccf..fff5013 100644
--- a/test/OpenMP/openmp_offload_codegen.cpp
+++ b/test/OpenMP/openmp_offload_codegen.cpp
@@ -1,12 +1,18 @@
 // Test device for mapping codegen.
 ///==========================================================================///
 
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm %s -o - 2>&1 \
-// RUN:   | FileCheck -check-prefix=CK1 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm %s -o - 2>&1 | FileCheck -check-prefix=CK1 %s
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1-DEVICE
 
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 
 #ifdef CK1
@@ -33,4 +39,4 @@
 // CK1: [[GEPOPARG:%.+]] = getelementptr inbounds {{.*}}
 // CK1: call {{.*}}tgt_target({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]]
 
-#endif
\ No newline at end of file
+#endif
diff --git a/test/OpenMP/openmp_seh.c b/test/OpenMP/openmp_seh.c
index 74dccec..0bdd079 100644
--- a/test/OpenMP/openmp_seh.c
+++ b/test/OpenMP/openmp_seh.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -verify -triple x86_64-pc-windows-msvc19.0.0 -fopenmp -fms-compatibility -x c++ -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-pc-windows-msvc19.0.0 -fopenmp-simd -fms-compatibility -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 extern "C" {
diff --git a/test/OpenMP/openmp_win_codegen.cpp b/test/OpenMP/openmp_win_codegen.cpp
new file mode 100644
index 0000000..249832b
--- /dev/null
+++ b/test/OpenMP/openmp_win_codegen.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-windows-msvc18.0.0 -std=c++11 -fms-compatibility-version=18 -fms-extensions -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-pc-windows-msvc18.0.0 -std=c++11 -fms-compatibility-version=18 -fms-extensions -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+void foo();
+void bar();
+
+struct Test {
+  static void main() {
+    int failed = 0;
+    int j = 2;
+
+#pragma omp parallel
+    {
+      int local_j = 3;
+#pragma omp single copyprivate(local_j)
+      {
+        local_j = 4;
+      }
+
+      // Assure reports a data race, but value written to "j"
+      // should always be the same.
+      j = local_j;
+    }
+
+  }
+};
+
+// CHECK-LABEL: @main
+int main() {
+  // CHECK: call void @{{.+}}main
+  Test::main();
+  // CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* {{.*}}@0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*))
+#pragma omp parallel
+  {
+    try {
+      foo();
+    } catch (int t) {
+#pragma omp critical
+      {
+        bar();
+      };
+    }
+  };
+  // CHECK: ret i32 0
+  return 0;
+}
+
+// CHECK: define internal void [[OUTLINED]](
+// CHECK: [[GID:%.+]] = {{.*}}call i32 @__kmpc_global_thread_num(%ident_t* {{.*}}@0)
+// CHECK: invoke void @{{.+}}foo
+// CHECK: [[CATCHSWITCH:%.+]] = catchswitch within none
+// CHECK: [[CATCHPAD:%.+]] = catchpad within [[CATCHSWITCH]]
+// CHECK: call void @__kmpc_critical(%ident_t* {{.*}}@0, i32 [[GID]],
+// CHECK: invoke void @{{.+}}bar
+// CHECK: call void @__kmpc_end_critical(%ident_t* {{.*}}@0, i32 [[GID]],
+// CHECK: catchret from [[CATCHPAD]] to
+// CHECK:      cleanuppad within [[CATCHPAD]] []
+// CHECK-NEXT: call void @__kmpc_end_critical(%ident_t* {{.*}}@0, i32 [[GID]],
+// CHECK-NEXT: cleanupret from {{.*}} unwind label %[[CATCHTERM:[^ ]+]]
+// CHECK:      cleanuppad within none []
+// CHECK-NEXT: call void @"\01?terminate@@YAXXZ"() #5 [ "funclet"(token %{{.*}}) ]
+// CHECK-NEXT: unreachable
+// CHECK:      [[CATCHTERM]]
+// CHECK-NEXT: cleanuppad within [[CATCHPAD]] []
+// CHECK-NEXT: call void @"\01?terminate@@YAXXZ"() #5 [ "funclet"(token %{{.*}}) ]
+// CHECK-NEXT: unreachable
diff --git a/test/OpenMP/option_warn.c b/test/OpenMP/option_warn.c
index ddec8e1..d9be164 100644
--- a/test/OpenMP/option_warn.c
+++ b/test/OpenMP/option_warn.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -verify -Wsource-uses-openmp -o - %s
 
+// RUN: %clang_cc1 -verify -Wsource-uses-openmp -o - %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 int a;
 #pragma omp threadprivate(a,b) // expected-warning {{unexpected '#pragma omp ...' in program}}
 #pragma omp parallel
diff --git a/test/OpenMP/ordered_ast_print.cpp b/test/OpenMP/ordered_ast_print.cpp
index c7baa9c..f36afab 100644
--- a/test/OpenMP/ordered_ast_print.cpp
+++ b/test/OpenMP/ordered_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -54,7 +58,7 @@
 // CHECK: static T a;
 // CHECK-NEXT: #pragma omp for ordered
 // CHECK-NEXT: for (int i = 0; i < argc; ++i)
-// CHECK-NEXT: #pragma omp ordered
+// CHECK-NEXT: #pragma omp ordered{{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp
index af6a872..977aba9 100644
--- a/test/OpenMP/ordered_codegen.cpp
+++ b/test/OpenMP/ordered_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/ordered_doacross_codegen.c b/test/OpenMP/ordered_doacross_codegen.c
new file mode 100644
index 0000000..c12df26
--- /dev/null
+++ b/test/OpenMP/ordered_doacross_codegen.c
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[KMP_DIM:%.+]] = type { i64, i64, i64 }
+extern int n;
+int a[10], b[10], c[10], d[10];
+void foo();
+
+// CHECK-LABEL: @main()
+int main() {
+  int i;
+// CHECK: [[DIMS:%.+]] = alloca [[KMP_DIM]],
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
+// CHECK: icmp
+// CHECK-NEXT: br i1 %
+// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
+// CHECK: store i64 %{{.+}}, i64* %
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
+// CHECK: store i64 1, i64* %
+// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
+// CHECK: call void @__kmpc_for_static_init_4(
+#pragma omp for ordered(1)
+  for (i = 0; i < n; ++i) {
+    a[i] = b[i] + 1;
+    foo();
+// CHECK: call void [[FOO:.+]](
+// CHECK: load i32, i32* [[CNT:%.+]],
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
+#pragma omp ordered depend(source)
+    c[i] = c[i] + 1;
+    foo();
+// CHECK: call void [[FOO]]
+// CHECK: load i32, i32* [[CNT]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
+#pragma omp ordered depend(sink : i - 2)
+    d[i] = a[i - 2];
+  }
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
+  // CHECK: ret i32 0
+  return 0;
+}
+#endif // HEADER
diff --git a/test/OpenMP/ordered_doacross_codegen.cpp b/test/OpenMP/ordered_doacross_codegen.cpp
index 28b02c3..60b0353 100644
--- a/test/OpenMP/ordered_doacross_codegen.cpp
+++ b/test/OpenMP/ordered_doacross_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -19,7 +24,7 @@
 // CHECK: icmp
 // CHECK-NEXT: br i1 %
 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* [[CAST]], i8 0, i64 24, i32 8, i1 false)
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
 // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
 // CHECK: store i64 %{{.+}}, i64* %
 // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
@@ -76,7 +81,7 @@
 // CHECK: icmp
 // CHECK-NEXT: br i1 %
 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* [[CAST]], i8 0, i64 24, i32 8, i1 false)
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
 // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
 // CHECK: store i64 %{{.+}}, i64* %
 // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
diff --git a/test/OpenMP/ordered_messages.cpp b/test/OpenMP/ordered_messages.cpp
index 2e3d55c..43ac40e 100644
--- a/test/OpenMP/ordered_messages.cpp
+++ b/test/OpenMP/ordered_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++98 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++98 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 int foo();
 
 template <class T>
@@ -134,6 +138,8 @@
 #pragma omp ordered depend(source) depend(sink : i-0, j+sizeof(T)) // expected-error {{'depend(sink:vec)' clauses cannot be mixed with 'depend(source)' clause}}
     }
   }
+#pragma omp ordered depend(source) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
+#pragma omp ordered depend(sink:k) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
   return T();
 }
 
diff --git a/test/OpenMP/parallel_ast_print.cpp b/test/OpenMP/parallel_ast_print.cpp
index 1e24c6e..19ebb70 100644
--- a/test/OpenMP/parallel_ast_print.cpp
+++ b/test/OpenMP/parallel_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -135,6 +139,9 @@
 // CHECK-NEXT:   #pragma omp threadprivate(S<long>::TS)
 // CHECK-NEXT: }
 
+int thrp;
+#pragma omp threadprivate(thrp)
+
 template <typename T, int C>
 T tmain(T argc, T *argv) {
   T b = argc, c, d, e, f, g;
@@ -143,7 +150,7 @@
   T arr[C][10], arr1[C];
 #pragma omp parallel
   a=2;
-#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10])
+#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S<T>::TS, thrp) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10])
   foo();
 #pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(&& : g)
   foo();
@@ -155,9 +162,9 @@
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
 // CHECK-NEXT: T arr[C][10], arr1[C];
-// CHECK-NEXT: #pragma omp parallel
+// CHECK-NEXT: #pragma omp parallel{{$}}
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10])
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10])
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
@@ -168,7 +175,7 @@
 // CHECK-NEXT: int arr[5][10], arr1[5];
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10])
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10])
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
@@ -179,7 +186,7 @@
 // CHECK-NEXT: long arr[1][10], arr1[1];
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10])
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10])
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
diff --git a/test/OpenMP/parallel_codegen.cpp b/test/OpenMP/parallel_codegen.cpp
index 68a178e..9d9bb71 100644
--- a/test/OpenMP/parallel_codegen.cpp
+++ b/test/OpenMP/parallel_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -42,7 +47,7 @@
 // CHECK-DEBUG:       [[LOC_2_ADDR:%.+]] = alloca %ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false)
+// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false)
 // CHECK-DEBUG:       store i32 %argc, i32* [[ARGC_ADDR:%.+]],
 // CHECK-DEBUG:       [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
 // CHECK-DEBUG:       [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
@@ -53,7 +58,7 @@
 // CHECK-DEBUG:       ret i32
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]])
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
 // CHECK-SAME:       #[[FN_ATTRS:[0-9]+]]
 // CHECK:       store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
 // CHECK:       [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
@@ -64,7 +69,7 @@
 // CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
-// CHECK-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]])
+// CHECK-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
 // CHECK-DEBUG-SAME:  #[[FN_ATTRS:[0-9]+]]
 // CHECK-DEBUG:       store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
 // CHECK-DEBUG:       [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
@@ -80,7 +85,7 @@
 // CHECK-DAG: declare {{.*}}void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
 // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
 // CHECK-DEBUG-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
-// CHECK-DEBUG-DAG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]])
+// CHECK-DEBUG-DAG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
 // CHECK-DEBUG-DAG:       call void [[OMP_OUTLINED_DEBUG]]
 
 // CHECK:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
@@ -92,7 +97,7 @@
 // CHECK-DEBUG-DAG:   [[LOC_2_ADDR:%.+]] = alloca %ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false)
+// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false)
 // CHECK-DEBUG-NEXT:  store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
 // CHECK-DEBUG:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
 // CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
diff --git a/test/OpenMP/parallel_copyin_codegen.cpp b/test/OpenMP/parallel_copyin_codegen.cpp
index 49e7b3f..3d33aa2 100644
--- a/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/test/OpenMP/parallel_copyin_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s -check-prefix=TLS-CHECK
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TLS-CHECK
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=TLS-LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=TLS-BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=TLS-ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef ARRAY
 #ifndef HEADER
@@ -40,18 +56,18 @@
 // CHECK-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
 // CHECK-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer,
 // CHECK-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer,
-// CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr global i{{[0-9]+}} 333,
-// CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
-// CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr global [2 x [[S_INT_TY]]] zeroinitializer,
-// CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr global [[S_INT_TY]] zeroinitializer,
+// CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333,
+// CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
+// CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer,
+// CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer,
 // TLS-CHECK-DAG: [[T_VAR:@.+]] = internal thread_local global i{{[0-9]+}} 1122,
 // TLS-CHECK-DAG: [[VEC:@.+]] = internal thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
 // TLS-CHECK-DAG: [[S_ARR:@.+]] = internal thread_local global [2 x [[S_FLOAT_TY]]] zeroinitializer,
 // TLS-CHECK-DAG: [[VAR:@.+]] = internal thread_local global [[S_FLOAT_TY]] zeroinitializer,
-// TLS-CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr thread_local global i{{[0-9]+}} 333,
-// TLS-CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
-// TLS-CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr thread_local global [2 x [[S_INT_TY]]] zeroinitializer,
-// TLS-CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr thread_local global [[S_INT_TY]] zeroinitializer,
+// TLS-CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}thread_local global i{{[0-9]+}} 333,
+// TLS-CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
+// TLS-CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}thread_local global [2 x [[S_INT_TY]]] zeroinitializer,
+// TLS-CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}thread_local global [[S_INT_TY]] zeroinitializer,
 
 template <typename T>
 T tmain() {
@@ -74,7 +90,7 @@
 
 int main() {
 #ifdef LAMBDA
-  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA: [[G:@.+]] = {{(dso_local )?}}global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   // TLS-LAMBDA: [[G:@.+]] = {{.*}}thread_local {{.*}}global i{{[0-9]+}} 1212,
@@ -136,7 +152,7 @@
   }();
   return 0;
 #elif defined(BLOCKS)
-  // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // BLOCKS: [[G:@.+]] = {{(dso_local )?}}global i{{[0-9]+}} 1212,
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
 
@@ -278,10 +294,10 @@
 
 // threadprivate_vec = vec;
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
-// CHECK: call void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}}  %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8*
-// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* [[MASTER_CAST]]
+// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]]
@@ -399,10 +415,10 @@
 
 // threadprivate_vec = vec;
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
-// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
+// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8*
-// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* [[MASTER_CAST]]
+// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]]
@@ -492,13 +508,13 @@
   static St s[2];
 // ARRAY: @__kmpc_fork_call(
 // ARRAY: call i8* @__kmpc_threadprivate_cached(
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* bitcast ([2 x i32]* @{{.+}} to i8*), i64 8, i32 4, i1 false)
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 bitcast ([2 x i32]* @{{.+}} to i8*), i64 8, i1 false)
 // ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}})
 
 // TLS-ARRAY: @__kmpc_fork_call(
 // TLS-ARRAY: [[REFT:%.+]] = load [2 x i32]*, [2 x i32]** [[ADDR:%.+]],
 // TLS-ARRAY: [[REF:%.+]] = bitcast [2 x i32]* [[REFT]] to i8*
-// TLS-ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([2 x i32]* @{{.+}} to i8*), i8* [[REF]], i64 8, i32 4, i1 false)
+// TLS-ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @{{.+}} to i8*), i8* align 4 [[REF]], i64 8, i1 false)
 // TLS-ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}})
 
 #pragma omp threadprivate(a, s)
diff --git a/test/OpenMP/parallel_copyin_messages.cpp b/test/OpenMP/parallel_copyin_messages.cpp
index 2b54b43..81a507a 100644
--- a/test/OpenMP/parallel_copyin_messages.cpp
+++ b/test/OpenMP/parallel_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_default_messages.cpp b/test/OpenMP/parallel_default_messages.cpp
index cbc6a73..4b698bc 100644
--- a/test/OpenMP/parallel_default_messages.cpp
+++ b/test/OpenMP/parallel_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp
index d20f3f5..6772f61 100644
--- a/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -4,13 +4,30 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-32 %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-32 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-64 %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-64 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef ARRAY
 #ifndef HEADER
@@ -349,7 +366,7 @@
 // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32*
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -442,7 +459,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], align 128
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}} 128,
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i1
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -490,7 +507,7 @@
 // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
 // ARRAY: call i8* @llvm.stacksave()
 // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false)
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
 #pragma omp parallel firstprivate(a, s, vla1, vla2)
   s[0].St_func(s, n, vla1);
   ;
@@ -506,7 +523,7 @@
 // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
 // ARRAY: call i8* @llvm.stacksave()
 // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false)
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
 #endif
 
 
diff --git a/test/OpenMP/parallel_firstprivate_messages.cpp b/test/OpenMP/parallel_firstprivate_messages.cpp
index fc0eb4c..7de268f 100644
--- a/test/OpenMP/parallel_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
@@ -56,7 +58,7 @@
 }
 
 int main(int argc, char **argv) {
-  const int d = 5;
+  const int d = 5; // expected-note {{variable 'd' declared const here}}
   const int da[5] = { 0 };
   S4 e(4);
   S5 g(5);
@@ -72,6 +74,8 @@
   #pragma omp parallel firstprivate (argc)
   #pragma omp parallel firstprivate (S1) // expected-error {{'S1' does not refer to a value}}
   #pragma omp parallel firstprivate (a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  #pragma omp parallel firstprivate (d)
+    d = 5; // expected-error {{cannot assign to variable 'd' with const-qualified type}}
   #pragma omp parallel firstprivate (argv[1]) // expected-error {{expected variable name}}
   #pragma omp parallel firstprivate(ba)
   #pragma omp parallel firstprivate(ca)
diff --git a/test/OpenMP/parallel_for_ast_print.cpp b/test/OpenMP/parallel_for_ast_print.cpp
index 3c36bd8..1e77bc8 100644
--- a/test/OpenMP/parallel_for_ast_print.cpp
+++ b/test/OpenMP/parallel_for_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,7 +39,7 @@
   }
 };
 
-// CHECK: #pragma omp parallel for private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp parallel for private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp parallel for private(this->a) private(this->a) private(this->S::a)
 
diff --git a/test/OpenMP/parallel_for_codegen.cpp b/test/OpenMP/parallel_for_codegen.cpp
index bc04532..e6199f0 100644
--- a/test/OpenMP/parallel_for_codegen.cpp
+++ b/test/OpenMP/parallel_for_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -O1 -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CLEANUP
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -O1 -fopenmp-simd -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -15,10 +22,12 @@
   double a = 5;
 // CHECK: [[CHUNK_SIZE:%.+]] = fptosi double %{{.+}}to i8
 // CHECK: store i8 %{{.+}}, i8* [[CHUNK:%.+]],
-// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i8* [[CHUNK]])
+// CHECK: [[VAL:%.+]] = load i8, i8* [[CHUNK]],
+// CHECK: store i8 [[VAL]], i8*
+// CHECK: [[CHUNK:%.+]] = load i64, i64* %
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i64 [[CHUNK]])
 
-// CHECK: [[CHUNK:%.+]] = load i8*, i8** %
-// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]],
+// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* %
 // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
 // CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]])
@@ -345,10 +354,10 @@
 
 // TERM_DEBUG-LABEL: parallel_for
 // CLEANUP: parallel_for
-void parallel_for(float *a, int n) {
+void parallel_for(float *a, const int n) {
   float arr[n];
-#pragma omp parallel for schedule(static, 5) private(arr)
-  // TERM_DEBUG:     __kmpc_global_thread_num
+#pragma omp parallel for schedule(static, 5) private(arr) default(none) firstprivate(n) shared(a)
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_init_4u({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke i32 {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
@@ -361,7 +370,7 @@
   // CLEANUP:     call void @__kmpc_for_static_init_4u({{.+}})
   // CLEANUP:     call void @__kmpc_for_static_fini({{.+}})
   for (unsigned i = 131071; i <= 2147483647; i += 127)
-    a[i] += foo() + arr[i];
+    a[i] += foo() + arr[i] + n;
 }
 // Check source line corresponds to "#pragma omp parallel for schedule(static, 5)" above:
 // TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-4]],
diff --git a/test/OpenMP/parallel_for_collapse_messages.cpp b/test/OpenMP/parallel_for_collapse_messages.cpp
index 4461df8..9d99c25 100644
--- a/test/OpenMP/parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/parallel_for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_copyin_messages.cpp b/test/OpenMP/parallel_for_copyin_messages.cpp
index 5a5d163..265a195 100644
--- a/test/OpenMP/parallel_for_copyin_messages.cpp
+++ b/test/OpenMP/parallel_for_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_default_messages.cpp b/test/OpenMP/parallel_for_default_messages.cpp
index d2129a3..7433836 100644
--- a/test/OpenMP/parallel_for_default_messages.cpp
+++ b/test/OpenMP/parallel_for_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_for_firstprivate_messages.cpp b/test/OpenMP/parallel_for_firstprivate_messages.cpp
index c9a69da..19b1ced7 100644
--- a/test/OpenMP/parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_if_messages.cpp b/test/OpenMP/parallel_for_if_messages.cpp
index b5a7fdc..1d83d35 100644
--- a/test/OpenMP/parallel_for_if_messages.cpp
+++ b/test/OpenMP/parallel_for_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_lastprivate_messages.cpp b/test/OpenMP/parallel_for_lastprivate_messages.cpp
index 134100b..8d2f4b5 100644
--- a/test/OpenMP/parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_linear_codegen.cpp b/test/OpenMP/parallel_for_linear_codegen.cpp
index d0968d7..c565097 100644
--- a/test/OpenMP/parallel_for_linear_codegen.cpp
+++ b/test/OpenMP/parallel_for_linear_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -49,6 +56,7 @@
   for (int i = 0; i < 2; ++i) {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
     // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
@@ -96,6 +104,7 @@
   for (int i = 0; i < 2; ++i) {
     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
     // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
@@ -155,13 +164,13 @@
 
 // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_START:%.+]] = alloca float*,
 // CHECK: [[LVAR_START:%.+]] = alloca i64,
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_PRIV:%.+]] = alloca float*,
 // CHECK: [[LVAR_PRIV:%.+]] = alloca i64,
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
@@ -205,13 +214,13 @@
 //
 // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_START:%.+]] = alloca i32*,
 // CHECK: [[LVAR_START:%.+]] = alloca i32,
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[PVAR_PRIV:%.+]] = alloca i32*,
 // CHECK: [[LVAR_PRIV:%.+]] = alloca i32,
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
diff --git a/test/OpenMP/parallel_for_linear_messages.cpp b/test/OpenMP/parallel_for_linear_messages.cpp
index e5f5b61..080add6 100644
--- a/test/OpenMP/parallel_for_linear_messages.cpp
+++ b/test/OpenMP/parallel_for_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
 int x;
 };
diff --git a/test/OpenMP/parallel_for_loop_messages.cpp b/test/OpenMP/parallel_for_loop_messages.cpp
index cf1f619..193c84e 100644
--- a/test/OpenMP/parallel_for_loop_messages.cpp
+++ b/test/OpenMP/parallel_for_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
diff --git a/test/OpenMP/parallel_for_messages.cpp b/test/OpenMP/parallel_for_messages.cpp
index 7c4f926..9ab4bcc 100644
--- a/test/OpenMP/parallel_for_messages.cpp
+++ b/test/OpenMP/parallel_for_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_misc_messages.c b/test/OpenMP/parallel_for_misc_messages.c
index 1a773be..2d12ba4 100644
--- a/test/OpenMP/parallel_for_misc_messages.c
+++ b/test/OpenMP/parallel_for_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp parallel for'}}
 #pragma omp parallel for
 
diff --git a/test/OpenMP/parallel_for_num_threads_messages.cpp b/test/OpenMP/parallel_for_num_threads_messages.cpp
index 10a4e1b..6f57d6e 100644
--- a/test/OpenMP/parallel_for_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_for_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_ordered_messages.cpp b/test/OpenMP/parallel_for_ordered_messages.cpp
index 055fe1b..381bc6a 100644
--- a/test/OpenMP/parallel_for_ordered_messages.cpp
+++ b/test/OpenMP/parallel_for_ordered_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_private_messages.cpp b/test/OpenMP/parallel_for_private_messages.cpp
index cc1b79f..7efc6ee 100644
--- a/test/OpenMP/parallel_for_private_messages.cpp
+++ b/test/OpenMP/parallel_for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_proc_bind_messages.cpp b/test/OpenMP/parallel_for_proc_bind_messages.cpp
index 0f9c47f..0ffff89 100644
--- a/test/OpenMP/parallel_for_proc_bind_messages.cpp
+++ b/test/OpenMP/parallel_for_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_for_reduction_messages.cpp b/test/OpenMP/parallel_for_reduction_messages.cpp
index 6499ef5..5cfc2b9 100644
--- a/test/OpenMP/parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/parallel_for_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_schedule_messages.cpp b/test/OpenMP/parallel_for_schedule_messages.cpp
index f446609..5664529 100644
--- a/test/OpenMP/parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/parallel_for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_aligned_messages.cpp b/test/OpenMP/parallel_for_simd_aligned_messages.cpp
index eed8017..75f4adb 100644
--- a/test/OpenMP/parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/parallel_for_simd_ast_print.cpp b/test/OpenMP/parallel_for_simd_ast_print.cpp
index 2668961..fd41537 100644
--- a/test/OpenMP/parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/parallel_for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,7 +39,7 @@
   }
 };
 
-// CHECK: #pragma omp parallel for simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp parallel for simd private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp parallel for simd private(this->a) private(this->a)
 // CHECK: #pragma omp parallel for simd private(this->a) private(this->a) private(this->S1::a)
 
diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp
index 9112635..2c9d9a9 100644
--- a/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -668,7 +674,7 @@
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
 #pragma omp parallel for simd
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     invoke i32 {{.*}}bar{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
@@ -679,5 +685,6 @@
     a[i] += bar();
 }
 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
+// TERM_DEBUG-NOT: line: 0,
 #endif // HEADER
 
diff --git a/test/OpenMP/parallel_for_simd_collapse_messages.cpp b/test/OpenMP/parallel_for_simd_collapse_messages.cpp
index c7effbd..d23086b 100644
--- a/test/OpenMP/parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_copyin_messages.cpp b/test/OpenMP/parallel_for_simd_copyin_messages.cpp
index 1e6fdc9..6b492b1 100644
--- a/test/OpenMP/parallel_for_simd_copyin_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_default_messages.cpp b/test/OpenMP/parallel_for_simd_default_messages.cpp
index 9bd8113..2fccb56 100644
--- a/test/OpenMP/parallel_for_simd_default_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
index 7a08572..e1e01aa 100644
--- a/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_if_messages.cpp b/test/OpenMP/parallel_for_simd_if_messages.cpp
index 7842524..08f8b46 100644
--- a/test/OpenMP/parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
index 8670e55..84e0896 100644
--- a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_linear_messages.cpp b/test/OpenMP/parallel_for_simd_linear_messages.cpp
index fc1895a..a6bcf64 100644
--- a/test/OpenMP/parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
diff --git a/test/OpenMP/parallel_for_simd_loop_messages.cpp b/test/OpenMP/parallel_for_simd_loop_messages.cpp
index 7feb4b0..d9d05cc 100644
--- a/test/OpenMP/parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
diff --git a/test/OpenMP/parallel_for_simd_messages.cpp b/test/OpenMP/parallel_for_simd_messages.cpp
index 18f25fa..f1d4c5b 100644
--- a/test/OpenMP/parallel_for_simd_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_misc_messages.c b/test/OpenMP/parallel_for_simd_misc_messages.c
index 378c48f..d3565bf 100644
--- a/test/OpenMP/parallel_for_simd_misc_messages.c
+++ b/test/OpenMP/parallel_for_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp parallel for simd'}}
 #pragma omp parallel for simd
 
diff --git a/test/OpenMP/parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
index 940565c..06460c3 100644
--- a/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_private_messages.cpp b/test/OpenMP/parallel_for_simd_private_messages.cpp
index a33b35d..5d263d6 100644
--- a/test/OpenMP/parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_proc_bind_messages.cpp b/test/OpenMP/parallel_for_simd_proc_bind_messages.cpp
index a05b150..26cc729 100644
--- a/test/OpenMP/parallel_for_simd_proc_bind_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_for_simd_reduction_messages.cpp b/test/OpenMP/parallel_for_simd_reduction_messages.cpp
index 8ff246e..32222ac 100644
--- a/test/OpenMP/parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_safelen_messages.cpp b/test/OpenMP/parallel_for_simd_safelen_messages.cpp
index 3e643c6..49ebf42 100644
--- a/test/OpenMP/parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_schedule_messages.cpp b/test/OpenMP/parallel_for_simd_schedule_messages.cpp
index 4135427..29e46a7 100644
--- a/test/OpenMP/parallel_for_simd_schedule_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/parallel_for_simd_simdlen_messages.cpp
index fa9e0d6..167644e 100644
--- a/test/OpenMP/parallel_for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_if_codegen.cpp b/test/OpenMP/parallel_if_codegen.cpp
index 87dcf1d..e59415a 100644
--- a/test/OpenMP/parallel_if_codegen.cpp
+++ b/test/OpenMP/parallel_if_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/parallel_if_messages.cpp b/test/OpenMP/parallel_if_messages.cpp
index 094e83b..d0a4c6d 100644
--- a/test/OpenMP/parallel_if_messages.cpp
+++ b/test/OpenMP/parallel_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_messages.cpp b/test/OpenMP/parallel_messages.cpp
index 4db55a0..8b0c035 100644
--- a/test/OpenMP/parallel_messages.cpp
+++ b/test/OpenMP/parallel_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
@@ -29,6 +31,8 @@
   foo();
   L1:
     foo();
+  #pragma omp parallel ordered // expected-error {{unexpected OpenMP clause 'ordered' in directive '#pragma omp parallel'}}
+    ;
   #pragma omp parallel
   ;
   #pragma omp parallel
diff --git a/test/OpenMP/parallel_num_threads_codegen.cpp b/test/OpenMP/parallel_num_threads_codegen.cpp
index c5f11bd..9ec712f 100644
--- a/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/parallel_num_threads_messages.cpp b/test/OpenMP/parallel_num_threads_messages.cpp
index adcf6ca..310e7cc 100644
--- a/test/OpenMP/parallel_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_private_codegen.cpp b/test/OpenMP/parallel_private_codegen.cpp
index fbb9b18..33a5b4c 100644
--- a/test/OpenMP/parallel_private_codegen.cpp
+++ b/test/OpenMP/parallel_private_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -96,7 +103,7 @@
   static int sivar;
   SS ss(sivar);
 #ifdef LAMBDA
-  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA: [[G:@.+]] = {{(dso_local )?}}global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: alloca [[SS_TY]],
   // LAMBDA: alloca [[CAP_TY:%.+]],
@@ -168,7 +175,7 @@
   }();
   return 0;
 #elif defined(BLOCKS)
-  // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // BLOCKS: [[G:@.+]] = {{(dso_local )?}}global i{{[0-9]+}} 1212,
   // BLOCKS-LABEL: @main
   // BLOCKS: call
   // BLOCKS: call{{.*}} void {{%.+}}(i8
diff --git a/test/OpenMP/parallel_private_messages.cpp b/test/OpenMP/parallel_private_messages.cpp
index ab535b4..4adee55 100644
--- a/test/OpenMP/parallel_private_messages.cpp
+++ b/test/OpenMP/parallel_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_proc_bind_codegen.cpp b/test/OpenMP/parallel_proc_bind_codegen.cpp
index 34a64de..d374a11 100644
--- a/test/OpenMP/parallel_proc_bind_codegen.cpp
+++ b/test/OpenMP/parallel_proc_bind_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/parallel_proc_bind_messages.cpp b/test/OpenMP/parallel_proc_bind_messages.cpp
index 78ba297..91be0f1 100644
--- a/test/OpenMP/parallel_proc_bind_messages.cpp
+++ b/test/OpenMP/parallel_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_reduction_codegen.cpp b/test/OpenMP/parallel_reduction_codegen.cpp
index c4c0ff4..c064ed0 100644
--- a/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/test/OpenMP/parallel_reduction_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -407,7 +414,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1 = var1.operator &&(var1_reduction);
 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]])
@@ -423,7 +430,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]],
@@ -461,7 +468,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // var1 = var1.operator &&(var1_reduction);
@@ -479,7 +486,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // t_var1 = min(t_var1, t_var1_reduction);
@@ -561,7 +568,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1_lhs = var1_lhs.operator &&(var1_rhs);
 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]])
@@ -577,7 +584,7 @@
 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]],
@@ -730,7 +737,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1 = var1.operator &&(var1_reduction);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]])
@@ -745,7 +752,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]],
@@ -771,7 +778,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // var1 = var1.operator &&(var1_reduction);
@@ -789,7 +796,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // t_var1 = min(t_var1, t_var1_reduction);
@@ -857,7 +864,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1_lhs = var1_lhs.operator &&(var1_rhs);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]])
@@ -873,7 +880,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]],
diff --git a/test/OpenMP/parallel_reduction_messages.cpp b/test/OpenMP/parallel_reduction_messages.cpp
index 96f4b58..b3619bb 100644
--- a/test/OpenMP/parallel_reduction_messages.cpp
+++ b/test/OpenMP/parallel_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_ast_print.cpp b/test/OpenMP/parallel_sections_ast_print.cpp
index da3bd48..8c0bd0b 100644
--- a/test/OpenMP/parallel_sections_ast_print.cpp
+++ b/test/OpenMP/parallel_sections_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -54,7 +58,7 @@
 // CHECK-NEXT: T b = argc, c, d, e, f, g;
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
-// CHECK-NEXT: #pragma omp parallel sections
+// CHECK-NEXT: #pragma omp parallel sections{{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
diff --git a/test/OpenMP/parallel_sections_codegen.cpp b/test/OpenMP/parallel_sections_codegen.cpp
index afbc6e4..eadc493 100644
--- a/test/OpenMP/parallel_sections_codegen.cpp
+++ b/test/OpenMP/parallel_sections_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -74,7 +79,7 @@
 
 // CHECK-LABEL: tmain
 // CHECK:       call void {{.*}} @__kmpc_fork_call(
-// CHECK:       __kmpc_global_thread_num
+// CHECK-NOT:   __kmpc_global_thread_num
 // CHECK:       call void @__kmpc_for_static_init_4(
 // CHECK:       invoke void @{{.*}}foo{{.*}}()
 // CHECK-NEXT:  unwind label %[[TERM_LPAD:.+]]
diff --git a/test/OpenMP/parallel_sections_copyin_messages.cpp b/test/OpenMP/parallel_sections_copyin_messages.cpp
index 62b5d05..da0ef5c 100644
--- a/test/OpenMP/parallel_sections_copyin_messages.cpp
+++ b/test/OpenMP/parallel_sections_copyin_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_default_messages.cpp b/test/OpenMP/parallel_sections_default_messages.cpp
index 9b2bdad..a62b86f 100644
--- a/test/OpenMP/parallel_sections_default_messages.cpp
+++ b/test/OpenMP/parallel_sections_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_sections_firstprivate_messages.cpp b/test/OpenMP/parallel_sections_firstprivate_messages.cpp
index b733aab..f8370f2 100644
--- a/test/OpenMP/parallel_sections_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_sections_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_if_messages.cpp b/test/OpenMP/parallel_sections_if_messages.cpp
index ce067a2..b03ed35 100644
--- a/test/OpenMP/parallel_sections_if_messages.cpp
+++ b/test/OpenMP/parallel_sections_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_lastprivate_messages.cpp b/test/OpenMP/parallel_sections_lastprivate_messages.cpp
index eccbfc5..865dda0 100644
--- a/test/OpenMP/parallel_sections_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_sections_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_messages.cpp b/test/OpenMP/parallel_sections_messages.cpp
index 1e71a30..459953a 100644
--- a/test/OpenMP/parallel_sections_messages.cpp
+++ b/test/OpenMP/parallel_sections_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_misc_messages.c b/test/OpenMP/parallel_sections_misc_messages.c
index 203d12c..780837a 100644
--- a/test/OpenMP/parallel_sections_misc_messages.c
+++ b/test/OpenMP/parallel_sections_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 void foo();
 
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp parallel sections'}}
diff --git a/test/OpenMP/parallel_sections_num_threads_messages.cpp b/test/OpenMP/parallel_sections_num_threads_messages.cpp
index cda3841..42bdee2 100644
--- a/test/OpenMP/parallel_sections_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_sections_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_private_messages.cpp b/test/OpenMP/parallel_sections_private_messages.cpp
index 40b0138..75e3f17 100644
--- a/test/OpenMP/parallel_sections_private_messages.cpp
+++ b/test/OpenMP/parallel_sections_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_proc_bind_messages.cpp b/test/OpenMP/parallel_sections_proc_bind_messages.cpp
index 79796da..bb173b6 100644
--- a/test/OpenMP/parallel_sections_proc_bind_messages.cpp
+++ b/test/OpenMP/parallel_sections_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/parallel_sections_reduction_messages.cpp b/test/OpenMP/parallel_sections_reduction_messages.cpp
index 26b3a53..bbd3737 100644
--- a/test/OpenMP/parallel_sections_reduction_messages.cpp
+++ b/test/OpenMP/parallel_sections_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_sections_shared_messages.cpp b/test/OpenMP/parallel_sections_shared_messages.cpp
index 2b83597..1ab923c 100644
--- a/test/OpenMP/parallel_sections_shared_messages.cpp
+++ b/test/OpenMP/parallel_sections_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/parallel_shared_messages.cpp b/test/OpenMP/parallel_shared_messages.cpp
index f6f4573..915b6a2 100644
--- a/test/OpenMP/parallel_shared_messages.cpp
+++ b/test/OpenMP/parallel_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/predefined_macro.c b/test/OpenMP/predefined_macro.c
index e18c3d2..af60e95 100644
--- a/test/OpenMP/predefined_macro.c
+++ b/test/OpenMP/predefined_macro.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -fopenmp -verify -DFOPENMP -o - %s
 // RUN: %clang_cc1 -verify -o - %s
+
+// RUN: %clang_cc1 -fopenmp-simd -verify -DFOPENMP -o - %s
+// RUN: %clang_cc1 -verify -o - %s
 // expected-no-diagnostics
 #ifdef FOPENMP
 // -fopenmp option is specified
diff --git a/test/OpenMP/report_default_DSA.cpp b/test/OpenMP/report_default_DSA.cpp
index d14cd5c..ba620a4 100644
--- a/test/OpenMP/report_default_DSA.cpp
+++ b/test/OpenMP/report_default_DSA.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo(int x, int n) {
   double vec[n];
   for (int iter = 0; iter < x; iter++) {
diff --git a/test/OpenMP/schedule_codegen.cpp b/test/OpenMP/schedule_codegen.cpp
index bd5ec86..394eae4 100644
--- a/test/OpenMP/schedule_codegen.cpp
+++ b/test/OpenMP/schedule_codegen.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
 
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 int main() {
 // CHECK: @__kmpc_for_static_init
 // CHECK-NOT: !llvm.mem.parallel_loop_access
diff --git a/test/OpenMP/sections_ast_print.cpp b/test/OpenMP/sections_ast_print.cpp
index 7fecd5b..15c8c26 100644
--- a/test/OpenMP/sections_ast_print.cpp
+++ b/test/OpenMP/sections_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -19,7 +23,7 @@
     foo();
   }
   // CHECK-NEXT: #pragma omp parallel
-  // CHECK-NEXT: #pragma omp sections private(argc,b) firstprivate(c,d) lastprivate(d,f) reduction(-: g) nowait
+  // CHECK-NEXT: #pragma omp sections private(argc,b) firstprivate(c,d) lastprivate(d,f) reduction(-: g) nowait{{$}}
   // CHECK-NEXT: {
   // CHECK-NEXT: foo();
   // CHECK-NEXT: }
@@ -42,7 +46,7 @@
   // CHECK-NEXT: #pragma omp parallel
   // CHECK-NEXT: #pragma omp sections private(argc,b) firstprivate(argv,c) lastprivate(d,f) reduction(+: g) nowait
   // CHECK-NEXT: {
-  // CHECK-NEXT: #pragma omp section
+  // CHECK-NEXT: #pragma omp section{{$}}
   // CHECK-NEXT: foo();
   // CHECK-NEXT: #pragma omp section
   // CHECK-NEXT: foo();
diff --git a/test/OpenMP/sections_codegen.cpp b/test/OpenMP/sections_codegen.cpp
index 0ed87e4..ba918c3 100644
--- a/test/OpenMP/sections_codegen.cpp
+++ b/test/OpenMP/sections_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -84,7 +89,7 @@
 
 // CHECK-LABEL: tmain
 // CHECK:       call void {{.*}} @__kmpc_fork_call(
-// CHECK:       __kmpc_global_thread_num
+// CHECK-NOT:   __kmpc_global_thread_num
 // CHECK:       call void @__kmpc_for_static_init_4(
 // CHECK:       invoke void @{{.*}}foo{{.*}}()
 // CHECK-NEXT:  unwind label %[[TERM_LPAD:.+]]
diff --git a/test/OpenMP/sections_firstprivate_codegen.cpp b/test/OpenMP/sections_firstprivate_codegen.cpp
index a5426f8..467c10f 100644
--- a/test/OpenMP/sections_firstprivate_codegen.cpp
+++ b/test/OpenMP/sections_firstprivate_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -218,7 +225,7 @@
 
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@@ -256,11 +263,7 @@
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: [[T_VARVAL:%.+]] = load i32, i32* %{{.+}},
-// CHECK: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST:%.+]] to i32*
-// CHECK: store i32  [[T_VARVAL]], i32* [[T_VARCONV]],
-// CHECK: [[T_VARPVT:%.+]] = load i64, i64* [[T_VARCAST]],
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void {{.*}}i64 [[T_VARPVT]],
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
@@ -271,14 +274,13 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
 // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
 // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
@@ -290,7 +292,7 @@
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
diff --git a/test/OpenMP/sections_firstprivate_messages.cpp b/test/OpenMP/sections_firstprivate_messages.cpp
index cd2b4b8..425125c 100644
--- a/test/OpenMP/sections_firstprivate_messages.cpp
+++ b/test/OpenMP/sections_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/sections_lastprivate_codegen.cpp b/test/OpenMP/sections_lastprivate_codegen.cpp
index c9a224d..bb6a54f 100644
--- a/test/OpenMP/sections_lastprivate_codegen.cpp
+++ b/test/OpenMP/sections_lastprivate_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -335,7 +342,7 @@
 // original vec[]=private_vec[];
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // original s_arr[]=private_s_arr[];
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
diff --git a/test/OpenMP/sections_lastprivate_messages.cpp b/test/OpenMP/sections_lastprivate_messages.cpp
index 01bb13d..e24b58f 100644
--- a/test/OpenMP/sections_lastprivate_messages.cpp
+++ b/test/OpenMP/sections_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/sections_misc_messages.c b/test/OpenMP/sections_misc_messages.c
index da20aa1..ba03085 100644
--- a/test/OpenMP/sections_misc_messages.c
+++ b/test/OpenMP/sections_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 void foo();
 
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp sections'}}
diff --git a/test/OpenMP/sections_private_codegen.cpp b/test/OpenMP/sections_private_codegen.cpp
index 337a0bf..6a6d4fe 100644
--- a/test/OpenMP/sections_private_codegen.cpp
+++ b/test/OpenMP/sections_private_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -40,7 +47,7 @@
 int main() {
   static int sivar;
 #ifdef LAMBDA
-  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[G:@.+]] = {{(dso_local )?}}global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
@@ -85,7 +92,7 @@
   }();
   return 0;
 #elif defined(BLOCKS)
-  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS: [[G:@.+]] = {{(dso_local )?}}global double
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
diff --git a/test/OpenMP/sections_private_messages.cpp b/test/OpenMP/sections_private_messages.cpp
index 27bb313..81e3eb4 100644
--- a/test/OpenMP/sections_private_messages.cpp
+++ b/test/OpenMP/sections_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/sections_reduction_codegen.cpp b/test/OpenMP/sections_reduction_codegen.cpp
index 8c474d5..c76e3e1 100644
--- a/test/OpenMP/sections_reduction_codegen.cpp
+++ b/test/OpenMP/sections_reduction_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -296,7 +303,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1 = var1.operator &&(var1_reduction);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]])
@@ -312,7 +319,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]],
@@ -338,7 +345,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // var1 = var1.operator &&(var1_reduction);
@@ -356,7 +363,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 // CHECK: call void @__kmpc_end_critical(
 
 // t_var1 = min(t_var1, t_var1_reduction);
@@ -423,7 +430,7 @@
 // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // var1_lhs = var1_lhs.operator &&(var1_rhs);
 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]])
@@ -439,7 +446,7 @@
 // CHECK:  call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]])
 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8*
 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
 
 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]],
diff --git a/test/OpenMP/sections_reduction_messages.cpp b/test/OpenMP/sections_reduction_messages.cpp
index 58b22a5..bf302cf 100644
--- a/test/OpenMP/sections_reduction_messages.cpp
+++ b/test/OpenMP/sections_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_aligned_messages.cpp b/test/OpenMP/simd_aligned_messages.cpp
index d936fed..7621d7d 100644
--- a/test/OpenMP/simd_aligned_messages.cpp
+++ b/test/OpenMP/simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/simd_ast_print.cpp b/test/OpenMP/simd_ast_print.cpp
index 9fcd459..ad16fe2 100644
--- a/test/OpenMP/simd_ast_print.cpp
+++ b/test/OpenMP/simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -34,7 +38,7 @@
   }
 };
 
-// CHECK: #pragma omp simd aligned(this->a)
+// CHECK: #pragma omp simd aligned(this->a){{$}}
 // CHECK: #pragma omp simd aligned(this->b: 8)
 // CHECK: #pragma omp simd aligned(this->a)
 
@@ -149,7 +153,7 @@
   static int *a;
 // CHECK: static int *a;
 #pragma omp simd
-// CHECK-NEXT: #pragma omp simd
+// CHECK-NEXT: #pragma omp simd{{$}}
   for (int i=0; i < 2; ++i)*a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: *a = 2;
diff --git a/test/OpenMP/simd_codegen.cpp b/test/OpenMP/simd_codegen.cpp
index 2cc099f..8832730 100644
--- a/test/OpenMP/simd_codegen.cpp
+++ b/test/OpenMP/simd_codegen.cpp
@@ -1,706 +1,715 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
-// expected-no-diagnostics
- #ifndef HEADER
- #define HEADER
-
-// CHECK: [[SS_TY:%.+]] = type { i32 }
-
-long long get_val() { return 0; }
-double *g_ptr;
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
-void simple(float *a, float *b, float *c, float *d) {
-  #pragma omp simd
-// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], 6
-// CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
-  for (int i = 3; i < 32; i += 5) {
-// CHECK: [[SIMPLE_LOOP1_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
-// CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
-// CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
-// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
-// ... loop body ...
-// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
-    a[i] = b[i] * c[i] * d[i];
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
-// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
-// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
-// br label %{{.+}}, !llvm.loop !{{.+}}
-  }
-// CHECK: [[SIMPLE_LOOP1_END]]:
-
-  long long k = get_val();
-
-  #pragma omp simd linear(k : 3)
-// CHECK: [[K0:%.+]] = call {{.*}}i64 @{{.*}}get_val
-// CHECK-NEXT: store i64 [[K0]], i64* [[K_VAR:%[^,]+]]
-// CHECK: store i32 0, i32* [[OMP_IV2:%[^,]+]]
-// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR]]
-// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
-
-// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV2]], 9
-// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]]
-  for (int i = 10; i > 1; i--) {
-// CHECK: [[SIMPLE_LOOP2_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-// FIXME: It is interesting, why the following "mul 1" was not constant folded?
-// CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1
-// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]]
-// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-//
-// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-// CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3
-// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
-// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
-// Update of the privatized version of linear variable!
-// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
-    a[k]++;
-    k = k + 3;
-// CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1
-// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
-// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP2_END]]:
-//
-// Update linear vars after loop, as the loop was operating on a private version.
-// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
-// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
-// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]]
-//
-
-  int lin = 12;
-  #pragma omp simd linear(lin : get_val()), linear(g_ptr)
-
-// Init linear private var.
-// CHECK: store i32 12, i32* [[LIN_VAR:%[^,]+]]
-// CHECK: store i64 0, i64* [[OMP_IV3:%[^,]+]]
-
-// CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
-// CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]]
-// Remember linear step.
-// CHECK: [[CALL_VAL:%.+]] = invoke
-// CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]]
-
-// CHECK: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR:@[^,]+]]
-// CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]]
-
-// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP3:%.+]] = icmp ult i64 [[IV3]], 4
-// CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
-  for (unsigned long long it = 2000; it >= 600; it-=400) {
-// CHECK: [[SIMPLE_LOOP3_BODY]]:
-// Start of body: calculate it from IV:
-// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
-// CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-//
-// Linear start and step are used to calculate current value of the linear variable.
-// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
-// CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
-// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-    *g_ptr++ = 0.0;
-// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-    a[it + lin]++;
-// CHECK: [[FLT_INC:%.+]] = fadd float
-// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
-// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP3_END]]:
-//
-// Linear start and step are used to calculate final value of the linear variables.
-// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
-// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
-// CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
-// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
-// CHECK: store double* {{.*}}[[GLIN_VAR]]
-
-  #pragma omp simd
-// CHECK: store i32 0, i32* [[OMP_IV4:%[^,]+]]
-
-// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP4:%.+]] = icmp slt i32 [[IV4]], 4
-// CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
-  for (short it = 6; it <= 20; it-=-4) {
-// CHECK: [[SIMPLE_LOOP4_BODY]]:
-// Start of body: calculate it from IV:
-// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
-// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
-// CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
-// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
-
-// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
-// CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
-// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP4_END]]:
-
-  #pragma omp simd
-// CHECK: store i32 0, i32* [[OMP_IV5:%[^,]+]]
-
-// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP5:%.+]] = icmp slt i32 [[IV5]], 26
-// CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
-  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
-// CHECK: [[SIMPLE_LOOP5_BODY]]:
-// Start of body: calculate it from IV:
-// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
-// CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
-// CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
-// CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
-// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
-
-// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
-// CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
-// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP5_END]]:
-
-// CHECK-NOT: mul i32 %{{.+}}, 10
-  #pragma omp simd
-  for (unsigned i=100; i<10; i+=10) {
-  }
-
-  int A;
-  // CHECK: store i32 -1, i32* [[A:%.+]],
-  A = -1;
-  #pragma omp simd lastprivate(A)
-// CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
-// CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
-// CHECK: [[SIMD_LOOP7_COND]]:
-// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7
-// CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
-  for (long long i = -10; i < 10; i += 3) {
-// CHECK: [[SIMPLE_LOOP7_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
-// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
-// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-    A = i;
-// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
-// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP7_END]]:
-// CHECK-NEXT: store i64 11, i64*
-// CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
-// CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]],
-  int R;
-  // CHECK: store i32 -1, i32* [[R:%[^,]+]],
-  R = -1;
-// CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]],
-// CHECK: store i32 1, i32* [[R_PRIV:%[^,]+]],
-  #pragma omp simd reduction(*:R)
-// CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
-// CHECK: [[SIMD_LOOP8_COND]]:
-// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7
-// CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
-  for (long long i = -10; i < 10; i += 3) {
-// CHECK: [[SIMPLE_LOOP8_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
-// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-    R *= i;
-// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
-// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP8_END]]:
-// CHECK-DAG: [[R_VAL:%.+]] = load i32, i32* [[R]],
-// CHECK-DAG: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]],
-// CHECK: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]]
-// CHECK-NEXT: store i32 [[RED]], i32* [[R]],
-// CHECK-NEXT: ret void
-}
-
-template <class T, unsigned K> T tfoo(T a) { return a + K; }
-
-template <typename T, unsigned N>
-int templ1(T a, T *z) {
-  #pragma omp simd collapse(N)
-  for (int i = 0; i < N * 2; i++) {
-    for (long long j = 0; j < (N + N + N + N); j += 2) {
-      z[i + j] = a + tfoo<T, N>(i + j);
-    }
-  }
-  return 0;
-}
-
-// Instatiation templ1<float,2>
-// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
-// CHECK: store i64 0, i64* [[T1_OMP_IV:[^,]+]]
-// ...
-// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP1:%.+]] = icmp slt i64 [[IV]], 16
-// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
-// CHECK: [[T1_BODY]]:
-// Loop counters i and j updates:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
-// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
-// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
-// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
-// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
-// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
-// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
-// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// simd.for.inc:
-// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
-// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK-NEXT: br label {{%.+}}
-// CHECK: [[T1_END]]:
-// CHECK: ret i32 0
-//
-void inst_templ1() {
-  float a;
-  float z[100];
-  templ1<float,2> (a, z);
-}
-
-
-typedef int MyIdx;
-
-class IterDouble {
-  double *Ptr;
-public:
-  IterDouble operator++ () const {
-    IterDouble n;
-    n.Ptr = Ptr + 1;
-    return n;
-  }
-  bool operator < (const IterDouble &that) const {
-    return Ptr < that.Ptr;
-  }
-  double & operator *() const {
-    return *Ptr;
-  }
-  MyIdx operator - (const IterDouble &that) const {
-    return (MyIdx) (Ptr - that.Ptr);
-  }
-  IterDouble operator + (int Delta) {
-    IterDouble re;
-    re.Ptr = Ptr + Delta;
-    return re;
-  }
-
-  ///~IterDouble() {}
-};
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}}
-void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
-//
-// Calculate number of iterations before the loop body.
-// CHECK: [[DIFF1:%.+]] = invoke {{.*}}i32 @{{.*}}IterDouble{{.*}}
-// CHECK: [[DIFF2:%.+]] = sub nsw i32 [[DIFF1]], 1
-// CHECK-NEXT: [[DIFF3:%.+]] = add nsw i32 [[DIFF2]], 1
-// CHECK-NEXT: [[DIFF4:%.+]] = sdiv i32 [[DIFF3]], 1
-// CHECK-NEXT: [[DIFF5:%.+]] = sub nsw i32 [[DIFF4]], 1
-// CHECK-NEXT: store i32 [[DIFF5]], i32* [[OMP_LAST_IT:%[^,]+]]{{.+}}
-// CHECK: store i32 0, i32* [[IT_OMP_IV:%[^,]+]]
-  #pragma omp simd
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[LAST_IT:%.+]] = load i32, i32* [[OMP_LAST_IT]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
-// CHECK-NEXT: [[NUM_IT:%.+]] = add nsw i32 [[LAST_IT]], 1
-// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], [[NUM_IT]]
-// CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
-  for (IterDouble i = ia; i < ib; ++i) {
-// CHECK: [[IT_BODY]]:
-// Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
-// Call of operator+ (i, IV).
-// CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// ... loop body ...
-   *i = *ic * 0.5;
-// Float multiply and save result.
-// CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
-// CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
-   ++ic;
-//
-// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
-// CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
-// br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
-  }
-// CHECK: [[IT_END]]:
-// CHECK: ret void
-}
-
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}}
-void collapsed(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
-  // k declared in the loop init below
-  short l; // inner loop counter
-// CHECK: store i32 0, i32* [[OMP_IV:[^,]+]]
-//
-  #pragma omp simd collapse(4)
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP:%.+]] = icmp ult i32 [[IV]], 120
-// CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 2u; j < 5u; j++) //3 iterations
-      for (int k = 3; k <= 6; k++) // 4 iterations
-        for (l = 4; l < 9; ++l) // 5 iterations
-        {
-// CHECK: [[COLL1_BODY]]:
-// Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// Calculation of the loop counters values.
-// CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
-// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
-// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
-// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
-// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
-// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
-// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
-// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
-// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
-// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
-// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
-// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
-// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
-// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
-// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
-// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
-// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
-// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
-// ... loop body ...
-// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-    float res = b[j] * c[k];
-    a[i] = res * d[l];
-// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
-// br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
-// CHECK: [[COLL1_END]]:
-  }
-// i,j,l are updated; k is not updated.
-// CHECK: store i32 3, i32*
-// CHECK-NEXT: store i32 5, i32*
-// CHECK-NEXT: store i32 7, i32*
-// CHECK-NEXT: store i16 9, i16*
-// CHECK: ret void
-}
-
-extern char foo();
-extern double globalfloat;
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}}
-void widened(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  short j; // inner loop counter
-  globalfloat = 1.0;
-  int localint = 1;
-// CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]]
-// Counter is widened to 64 bits.
-// CHECK: store i64 0, i64* [[OMP_IV:[^,]+]]
-//
-  #pragma omp simd collapse(2) private(globalfloat, localint)
-
-// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[LI:%.+]] = load i64, i64* [[OMP_LI:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-// CHECK-NEXT: [[NUMIT:%.+]] = add nsw i64 [[LI]], 1
-// CHECK-NEXT: [[CMP:%.+]] = icmp slt i64 [[IV]], [[NUMIT]]
-// CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 0; j < foo(); j++) // foo() iterations
-  {
-// CHECK: [[WIDE1_BODY]]:
-// Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-// Calculation of the loop counters values...
-// CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-// CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
-// ... loop body ...
-//
-// Here we expect store into private double var, not global
-// CHECK-NOT: store double {{.+}}, double* [[GLOBALFLOAT]]
-    globalfloat = (float)j/i;
-    float res = b[j] * c[j];
-// Store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-    a[i] = res * d[i];
-// Then there's a store into private var localint:
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-    localint = (int)j;
-// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-// CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
-// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
-//
-// br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
-// CHECK: [[WIDE1_END]]:
-  }
-// i,j are updated.
-// CHECK: store i32 3, i32* [[I:%[^,]+]]
-// CHECK: store i16
-//
-// Here we expect store into original localint, not its privatized version.
-// CHECK-NOT: store i32 {{.+}}, i32* [[LOCALINT]]
-  localint = (int)j;
-// CHECK: ret void
-}
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
-void linear(float *a) {
-  // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
-  // CHECK: [[K_ADDR:%.+]] = alloca i64*,
-  long long val = 0;
-  long long &k = val;
-
-  #pragma omp simd linear(k : 3)
-// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
-// CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
-// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
-// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
-// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
-// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
-// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
-// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
-  for (int i = 10; i > 1; i--) {
-// CHECK: [[SIMPLE_LOOP_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// FIXME: It is interesting, why the following "mul 1" was not constant folded?
-// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
-// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
-// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-//
-// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
-// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
-// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
-// Update of the privatized version of linear variable!
-// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
-    a[k]++;
-    k = k + 3;
-// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
-// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP_END]]:
-//
-// Update linear vars after loop, as the loop was operating on a private version.
-// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
-// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
-// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
-// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
-// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
-// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
-//
-
-  #pragma omp simd linear(val(k) : 3)
-// CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
-// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
-// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
-// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
-// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
-// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
-// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
-  for (int i = 10; i > 1; i--) {
-// CHECK: [[SIMPLE_LOOP_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// FIXME: It is interesting, why the following "mul 1" was not constant folded?
-// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
-// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
-// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-//
-// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
-// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
-// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
-// Update of the privatized version of linear variable!
-// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
-    a[k]++;
-    k = k + 3;
-// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
-// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP_END]]:
-//
-// Update linear vars after loop, as the loop was operating on a private version.
-// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
-// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
-// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
-// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
-// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
-// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
-//
-  #pragma omp simd linear(uval(k) : 3)
-// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
-// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
-// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
-
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
-// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
-  for (int i = 10; i > 1; i--) {
-// CHECK: [[SIMPLE_LOOP_BODY]]:
-// Start of body: calculate i from IV:
-// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// FIXME: It is interesting, why the following "mul 1" was not constant folded?
-// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
-// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
-// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-//
-// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
-// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
-// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
-// Update of the privatized version of linear variable!
-// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
-    a[k]++;
-    k = k + 3;
-// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
-// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
-// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
-  }
-// CHECK: [[SIMPLE_LOOP_END]]:
-//
-// Update linear vars after loop, as the loop was operating on a private version.
-// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
-// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
-// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
-//
-}
-
-// TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
-
-// TERM_DEBUG-LABEL: parallel_simd
-void parallel_simd(float *a) {
-#pragma omp parallel
-#pragma omp simd
-  // TERM_DEBUG-NOT: __kmpc_global_thread_num
-  // TERM_DEBUG:     invoke i32 {{.*}}bar{{.*}}()
-  // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
-  // TERM_DEBUG-NOT: __kmpc_global_thread_num
-  // TERM_DEBUG:     [[TERM_LPAD]]
-  // TERM_DEBUG:     call void @__clang_call_terminate
-  // TERM_DEBUG:     unreachable
-  for (unsigned i = 131071; i <= 2147483647; i += 127)
-    a[i] += bar();
-}
-// TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
-
-// CHECK-LABEL: S8
-// CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-NEXT: and i64 %{{.+}}, 15
-// CHECK-NEXT: icmp eq i64 %{{.+}}, 0
-// CHECK-NEXT: call void @llvm.assume(i1
-
-// CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-NEXT: and i64 %{{.+}}, 7
-// CHECK-NEXT: icmp eq i64 %{{.+}}, 0
-// CHECK-NEXT: call void @llvm.assume(i1
-
-// CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-NEXT: and i64 %{{.+}}, 15
-// CHECK-NEXT: icmp eq i64 %{{.+}}, 0
-// CHECK-NEXT: call void @llvm.assume(i1
-
-// CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-NEXT: and i64 %{{.+}}, 3
-// CHECK-NEXT: icmp eq i64 %{{.+}}, 0
-// CHECK-NEXT: call void @llvm.assume(i1
-struct SS {
-  SS(): a(0) {}
-  SS(int v) : a(v) {}
-  int a;
-  typedef int type;
-};
-
-template <typename T>
-class S7 : public T {
-protected:
-  T *a;
-  T b[2];
-  S7() : a(0) {}
-
-public:
-  S7(typename T::type &v) : a((T*)&v) {
-#pragma omp simd aligned(a)
-    for (int k = 0; k < a->a; ++k)
-      ++this->a->a;
-#pragma omp simd aligned(this->b : 8)
-    for (int k = 0; k < a->a; ++k)
-      ++a->a;
-  }
-};
-
-class S8 : private IterDouble, public S7<SS> {
-  S8() {}
-
-public:
-  S8(int v) : S7<SS>(v){
-#pragma omp parallel private(a)
-#pragma omp simd aligned(S7<SS>::a)
-    for (int k = 0; k < a->a; ++k)
-      ++this->a->a;
-#pragma omp parallel shared(b)
-#pragma omp simd aligned(this->b: 4)
-    for (int k = 0; k < a->a; ++k)
-      ++a->a;
-  }
-};
-S8 s8(0);
-
-#endif // HEADER
-
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix=TERM_DEBUG %s
+// expected-no-diagnostics
+ #ifndef HEADER
+ #define HEADER
+
+// CHECK: [[SS_TY:%.+]] = type { i32 }
+
+long long get_val() { return 0; }
+double *g_ptr;
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
+void simple(float *a, float *b, float *c, float *d) {
+  #pragma omp simd
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], 6
+// CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
+  for (int i = 3; i < 32; i += 5) {
+// CHECK: [[SIMPLE_LOOP1_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
+// CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
+// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+    a[i] = b[i] * c[i] * d[i];
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
+// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// br label %{{.+}}, !llvm.loop !{{.+}}
+  }
+// CHECK: [[SIMPLE_LOOP1_END]]:
+
+  long long k = get_val();
+
+  #pragma omp simd linear(k : 3)
+// CHECK: [[K0:%.+]] = call {{.*}}i64 @{{.*}}get_val
+// CHECK-NEXT: store i64 [[K0]], i64* [[K_VAR:%[^,]+]]
+// CHECK: store i32 0, i32* [[OMP_IV2:%[^,]+]]
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV2]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP2_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+// CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP2_END]]:
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]]
+//
+
+  int lin = 12;
+  #pragma omp simd linear(lin : get_val()), linear(g_ptr)
+
+// Init linear private var.
+// CHECK: store i32 12, i32* [[LIN_VAR:%[^,]+]]
+// CHECK: store i64 0, i64* [[OMP_IV3:%[^,]+]]
+
+// CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
+// CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]]
+// Remember linear step.
+// CHECK: [[CALL_VAL:%.+]] = invoke
+// CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]]
+
+// CHECK: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR:@[^,]+]]
+// CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]]
+
+// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP3:%.+]] = icmp ult i64 [[IV3]], 4
+// CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
+  for (unsigned long long it = 2000; it >= 600; it-=400) {
+// CHECK: [[SIMPLE_LOOP3_BODY]]:
+// Start of body: calculate it from IV:
+// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
+// CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+//
+// Linear start and step are used to calculate current value of the linear variable.
+// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
+// CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
+// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+    *g_ptr++ = 0.0;
+// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+    a[it + lin]++;
+// CHECK: [[FLT_INC:%.+]] = fadd float
+// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
+// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP3_END]]:
+//
+// Linear start and step are used to calculate final value of the linear variables.
+// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
+// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
+// CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
+// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
+// CHECK: store double* {{.*}}[[GLIN_VAR]]
+
+  #pragma omp simd
+// CHECK: store i32 0, i32* [[OMP_IV4:%[^,]+]]
+
+// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP4:%.+]] = icmp slt i32 [[IV4]], 4
+// CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
+  for (short it = 6; it <= 20; it-=-4) {
+// CHECK: [[SIMPLE_LOOP4_BODY]]:
+// Start of body: calculate it from IV:
+// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
+// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
+// CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
+// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+
+// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
+// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP4_END]]:
+
+  #pragma omp simd
+// CHECK: store i32 0, i32* [[OMP_IV5:%[^,]+]]
+
+// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP5:%.+]] = icmp slt i32 [[IV5]], 26
+// CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
+  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
+// CHECK: [[SIMPLE_LOOP5_BODY]]:
+// Start of body: calculate it from IV:
+// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
+// CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
+// CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
+// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+
+// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
+// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP5_END]]:
+
+// CHECK-NOT: mul i32 %{{.+}}, 10
+  #pragma omp simd
+  for (unsigned i=100; i<10; i+=10) {
+  }
+
+  int A;
+  // CHECK: store i32 -1, i32* [[A:%.+]],
+  A = -1;
+  #pragma omp simd lastprivate(A)
+// CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
+// CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
+// CHECK: [[SIMD_LOOP7_COND]]:
+// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7
+// CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
+  for (long long i = -10; i < 10; i += 3) {
+// CHECK: [[SIMPLE_LOOP7_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
+// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
+// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+    A = i;
+// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
+// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP7_END]]:
+// CHECK-NEXT: store i64 11, i64*
+// CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
+// CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]],
+  int R;
+  // CHECK: store i32 -1, i32* [[R:%[^,]+]],
+  R = -1;
+// CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]],
+// CHECK: store i32 1, i32* [[R_PRIV:%[^,]+]],
+  #pragma omp simd reduction(*:R)
+// CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
+// CHECK: [[SIMD_LOOP8_COND]]:
+// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7
+// CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
+  for (long long i = -10; i < 10; i += 3) {
+// CHECK: [[SIMPLE_LOOP8_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
+// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+    R *= i;
+// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
+// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP8_END]]:
+// CHECK-DAG: [[R_VAL:%.+]] = load i32, i32* [[R]],
+// CHECK-DAG: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]],
+// CHECK: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]]
+// CHECK-NEXT: store i32 [[RED]], i32* [[R]],
+// CHECK-NEXT: ret void
+}
+
+template <class T, unsigned K> T tfoo(T a) { return a + K; }
+
+template <typename T, unsigned N>
+int templ1(T a, T *z) {
+  #pragma omp simd collapse(N)
+  for (int i = 0; i < N * 2; i++) {
+    for (long long j = 0; j < (N + N + N + N); j += 2) {
+      z[i + j] = a + tfoo<T, N>(i + j);
+    }
+  }
+  return 0;
+}
+
+// Instatiation templ1<float,2>
+// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
+// CHECK: store i64 0, i64* [[T1_OMP_IV:[^,]+]]
+// ...
+// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP1:%.+]] = icmp slt i64 [[IV]], 16
+// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
+// CHECK: [[T1_BODY]]:
+// Loop counters i and j updates:
+// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
+// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
+// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
+// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
+// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
+// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
+// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
+// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// simd.for.inc:
+// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
+// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: br label {{%.+}}
+// CHECK: [[T1_END]]:
+// CHECK: ret i32 0
+//
+void inst_templ1() {
+  float a;
+  float z[100];
+  templ1<float,2> (a, z);
+}
+
+
+typedef int MyIdx;
+
+class IterDouble {
+  double *Ptr;
+public:
+  IterDouble operator++ () const {
+    IterDouble n;
+    n.Ptr = Ptr + 1;
+    return n;
+  }
+  bool operator < (const IterDouble &that) const {
+    return Ptr < that.Ptr;
+  }
+  double & operator *() const {
+    return *Ptr;
+  }
+  MyIdx operator - (const IterDouble &that) const {
+    return (MyIdx) (Ptr - that.Ptr);
+  }
+  IterDouble operator + (int Delta) {
+    IterDouble re;
+    re.Ptr = Ptr + Delta;
+    return re;
+  }
+
+  ///~IterDouble() {}
+};
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}}
+void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
+//
+// Calculate number of iterations before the loop body.
+// CHECK: [[DIFF1:%.+]] = invoke {{.*}}i32 @{{.*}}IterDouble{{.*}}
+// CHECK: [[DIFF2:%.+]] = sub nsw i32 [[DIFF1]], 1
+// CHECK-NEXT: [[DIFF3:%.+]] = add nsw i32 [[DIFF2]], 1
+// CHECK-NEXT: [[DIFF4:%.+]] = sdiv i32 [[DIFF3]], 1
+// CHECK-NEXT: [[DIFF5:%.+]] = sub nsw i32 [[DIFF4]], 1
+// CHECK-NEXT: store i32 [[DIFF5]], i32* [[OMP_LAST_IT:%[^,]+]]{{.+}}
+// CHECK: store i32 0, i32* [[IT_OMP_IV:%[^,]+]]
+  #pragma omp simd
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[LAST_IT:%.+]] = load i32, i32* [[OMP_LAST_IT]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK-NEXT: [[NUM_IT:%.+]] = add nsw i32 [[LAST_IT]], 1
+// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], [[NUM_IT]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
+  for (IterDouble i = ia; i < ib; ++i) {
+// CHECK: [[IT_BODY]]:
+// Start of body: calculate i from index:
+// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// Call of operator+ (i, IV).
+// CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
+// ... loop body ...
+   *i = *ic * 0.5;
+// Float multiply and save result.
+// CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
+// CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
+// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+   ++ic;
+//
+// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
+  }
+// CHECK: [[IT_END]]:
+// CHECK: ret void
+}
+
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}}
+void collapsed(float *a, float *b, float *c, float *d) {
+  int i; // outer loop counter
+  unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
+  // k declared in the loop init below
+  short l; // inner loop counter
+// CHECK: store i32 0, i32* [[OMP_IV:[^,]+]]
+//
+  #pragma omp simd collapse(4)
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP:%.+]] = icmp ult i32 [[IV]], 120
+// CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
+  for (i = 1; i < 3; i++) // 2 iterations
+    for (j = 2u; j < 5u; j++) //3 iterations
+      for (int k = 3; k <= 6; k++) // 4 iterations
+        for (l = 4; l < 9; ++l) // 5 iterations
+        {
+// CHECK: [[COLL1_BODY]]:
+// Start of body: calculate i from index:
+// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// Calculation of the loop counters values.
+// CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
+// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
+// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
+// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
+// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
+// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
+// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
+// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
+// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
+// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
+// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
+// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
+// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
+// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
+// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
+// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
+// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
+// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
+// ... loop body ...
+// End of body: store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+    float res = b[j] * c[k];
+    a[i] = res * d[l];
+// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
+// CHECK: [[COLL1_END]]:
+  }
+// i,j,l are updated; k is not updated.
+// CHECK: store i32 3, i32*
+// CHECK-NEXT: store i32 5, i32*
+// CHECK-NEXT: store i32 7, i32*
+// CHECK-NEXT: store i16 9, i16*
+// CHECK: ret void
+}
+
+extern char foo();
+extern double globalfloat;
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}}
+void widened(float *a, float *b, float *c, float *d) {
+  int i; // outer loop counter
+  short j; // inner loop counter
+  globalfloat = 1.0;
+  int localint = 1;
+// CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]]
+// Counter is widened to 64 bits.
+// CHECK: store i64 0, i64* [[OMP_IV:[^,]+]]
+//
+  #pragma omp simd collapse(2) private(globalfloat, localint)
+
+// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[LI:%.+]] = load i64, i64* [[OMP_LI:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK-NEXT: [[NUMIT:%.+]] = add nsw i64 [[LI]], 1
+// CHECK-NEXT: [[CMP:%.+]] = icmp slt i64 [[IV]], [[NUMIT]]
+// CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
+  for (i = 1; i < 3; i++) // 2 iterations
+    for (j = 0; j < foo(); j++) // foo() iterations
+  {
+// CHECK: [[WIDE1_BODY]]:
+// Start of body: calculate i from index:
+// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// Calculation of the loop counters values...
+// CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
+// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
+// ... loop body ...
+//
+// Here we expect store into private double var, not global
+// CHECK-NOT: store double {{.+}}, double* [[GLOBALFLOAT]]
+    globalfloat = (float)j/i;
+    float res = b[j] * c[j];
+// Store into a[i]:
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+    a[i] = res * d[i];
+// Then there's a store into private var localint:
+// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+    localint = (int)j;
+// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
+// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+//
+// br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
+// CHECK: [[WIDE1_END]]:
+  }
+// i,j are updated.
+// CHECK: store i32 3, i32* [[I:%[^,]+]]
+// CHECK: store i16
+//
+// Here we expect store into original localint, not its privatized version.
+// CHECK-NOT: store i32 {{.+}}, i32* [[LOCALINT]]
+  localint = (int)j;
+// CHECK: ret void
+}
+
+// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
+void linear(float *a) {
+  // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
+  // CHECK: [[K_ADDR:%.+]] = alloca i64*,
+  long long val = 0;
+  long long &k = val;
+
+  #pragma omp simd linear(k : 3)
+// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
+// CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]:
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
+// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+
+  #pragma omp simd linear(val(k) : 3)
+// CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]:
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
+// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+  #pragma omp simd linear(uval(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]:
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]:
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
+//
+}
+
+// TERM_DEBUG-LABEL: bar
+int bar() {return 0;};
+
+// TERM_DEBUG-LABEL: parallel_simd
+void parallel_simd(float *a) {
+#pragma omp parallel
+#pragma omp simd
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     invoke i32 {{.*}}bar{{.*}}()
+  // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     [[TERM_LPAD]]
+  // TERM_DEBUG:     call void @__clang_call_terminate
+  // TERM_DEBUG:     unreachable
+  for (unsigned i = 131071; i <= 2147483647; i += 127)
+    a[i] += bar();
+}
+// TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
+
+// CHECK-LABEL: S8
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+
+// CHECK-DAG: and i64 %{{.+}}, 15
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
+// CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 7
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
+// CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 15
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
+// CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 3
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
+// CHECK-DAG: call void @llvm.assume(i1
+struct SS {
+  SS(): a(0) {}
+  SS(int v) : a(v) {}
+  int a;
+  typedef int type;
+};
+
+template <typename T>
+class S7 : public T {
+protected:
+  T *a;
+  T b[2];
+  S7() : a(0) {}
+
+public:
+  S7(typename T::type &v) : a((T*)&v) {
+#pragma omp simd aligned(a)
+    for (int k = 0; k < a->a; ++k)
+      ++this->a->a;
+#pragma omp simd aligned(this->b : 8)
+    for (int k = 0; k < a->a; ++k)
+      ++a->a;
+  }
+};
+
+class S8 : private IterDouble, public S7<SS> {
+  S8() {}
+
+public:
+  S8(int v) : S7<SS>(v){
+#pragma omp parallel private(a)
+#pragma omp simd aligned(S7<SS>::a)
+    for (int k = 0; k < a->a; ++k)
+      ++this->a->a;
+#pragma omp parallel shared(b)
+#pragma omp simd aligned(this->b: 4)
+    for (int k = 0; k < a->a; ++k)
+      ++a->a;
+  }
+};
+S8 s8(0);
+
+// TERM_DEBUG-NOT: line: 0,
+// TERM_DEBUG: distinct !DISubprogram(linkageName: "_GLOBAL__sub_I_simd_codegen.cpp",
+
+#endif // HEADER
+
diff --git a/test/OpenMP/simd_collapse_messages.cpp b/test/OpenMP/simd_collapse_messages.cpp
index 5b88024..b3100c0 100644
--- a/test/OpenMP/simd_collapse_messages.cpp
+++ b/test/OpenMP/simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_lastprivate_messages.cpp b/test/OpenMP/simd_lastprivate_messages.cpp
index 349d6a5..0af8128 100644
--- a/test/OpenMP/simd_lastprivate_messages.cpp
+++ b/test/OpenMP/simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_linear_messages.cpp b/test/OpenMP/simd_linear_messages.cpp
index 792f78a..aad0d18 100644
--- a/test/OpenMP/simd_linear_messages.cpp
+++ b/test/OpenMP/simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
diff --git a/test/OpenMP/simd_loop_messages.cpp b/test/OpenMP/simd_loop_messages.cpp
index a47ccbc..b9d146c 100644
--- a/test/OpenMP/simd_loop_messages.cpp
+++ b/test/OpenMP/simd_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 static int sii;
 // expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
@@ -234,9 +236,7 @@
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
-  // expected-error@+3 {{unexpected OpenMP clause 'shared' in directive '#pragma omp simd'}}
-  // expected-note@+2  {{defined as shared}}
-  // expected-error@+2 {{loop iteration variable in the associated loop of 'omp simd' directive may not be shared, predetermined as linear}}
+  // expected-error@+1 {{unexpected OpenMP clause 'shared' in directive '#pragma omp simd'}}
   #pragma omp simd shared(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
diff --git a/test/OpenMP/simd_metadata.c b/test/OpenMP/simd_metadata.c
index 6ed6607..8fdc306 100644
--- a/test/OpenMP/simd_metadata.c
+++ b/test/OpenMP/simd_metadata.c
@@ -7,6 +7,15 @@
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
 
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
+// RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
+// RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
+// RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
+// RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
+// RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
+
 void h1(float *c, float *a, double b[], int size)
 {
 // CHECK-LABEL: define void @h1
diff --git a/test/OpenMP/simd_misc_messages.c b/test/OpenMP/simd_misc_messages.c
index 1e15482..75b8ce0 100644
--- a/test/OpenMP/simd_misc_messages.c
+++ b/test/OpenMP/simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp simd'}}
 #pragma omp simd
 
diff --git a/test/OpenMP/simd_private_messages.cpp b/test/OpenMP/simd_private_messages.cpp
index 1850101..bfa26d1 100644
--- a/test/OpenMP/simd_private_messages.cpp
+++ b/test/OpenMP/simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_reduction_messages.cpp b/test/OpenMP/simd_reduction_messages.cpp
index bdf429d..135a3ca 100644
--- a/test/OpenMP/simd_reduction_messages.cpp
+++ b/test/OpenMP/simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_safelen_messages.cpp b/test/OpenMP/simd_safelen_messages.cpp
index 56cb868..b75a923 100644
--- a/test/OpenMP/simd_safelen_messages.cpp
+++ b/test/OpenMP/simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/simd_simdlen_messages.cpp b/test/OpenMP/simd_simdlen_messages.cpp
index 426d187..f34d628 100644
--- a/test/OpenMP/simd_simdlen_messages.cpp
+++ b/test/OpenMP/simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/single_ast_print.cpp b/test/OpenMP/single_ast_print.cpp
index e819ab9..a8eaeb0 100644
--- a/test/OpenMP/single_ast_print.cpp
+++ b/test/OpenMP/single_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -16,7 +20,7 @@
 #pragma omp parallel firstprivate(a, b, c)
 #pragma omp single copyprivate(a, this->b, (this)->c)
 // CHECK: #pragma omp parallel firstprivate(this->a,this->b,this->c)
-// CHECK-NEXT: #pragma omp single copyprivate(this->a,this->b,this->c)
+// CHECK-NEXT: #pragma omp single copyprivate(this->a,this->b,this->c){{$}}
     ++this->a, --b, (this)->c /= 1;
   }
 };
diff --git a/test/OpenMP/single_codegen.cpp b/test/OpenMP/single_codegen.cpp
index 4feb3bd..108604a 100644
--- a/test/OpenMP/single_codegen.cpp
+++ b/test/OpenMP/single_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp-simd -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef ARRAY
 #ifndef HEADER
@@ -173,7 +180,7 @@
 // CHECK: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]],
 // CHECK: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
 // CHECK: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]],
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[DST_A2_ADDR]], i8* [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i{{[0-9]+}} 1, i1 false)
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false)
 // CHECK: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
 // CHECK: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]],
 // CHECK: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
@@ -190,7 +197,7 @@
 void parallel_single() {
 #pragma omp parallel
 #pragma omp single
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call i32 @__kmpc_single({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
diff --git a/test/OpenMP/single_copyprivate_messages.cpp b/test/OpenMP/single_copyprivate_messages.cpp
index 4714753..bb18568 100644
--- a/test/OpenMP/single_copyprivate_messages.cpp
+++ b/test/OpenMP/single_copyprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/single_firstprivate_codegen.cpp b/test/OpenMP/single_firstprivate_codegen.cpp
index 9f059e9..23b5435 100644
--- a/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/test/OpenMP/single_firstprivate_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -185,7 +192,7 @@
 
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@@ -222,24 +229,18 @@
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR:%.+]],
-// CHECK: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST:%.+]] to i32*
-// CHECK: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
-// CHECK: [[T_VARPVT:%.+]] = load i64, i64* [[T_VARCAST]],
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void {{.*}}i64 [[T_VARPVT:%.+]],
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 {{.*}}%{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
-// CHECK: [[T_VAR_ARG:%.+]] = alloca i{{[0-9]+}},
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** %
-// CHECK: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_ARG]] to i32*
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
 // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
 // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
@@ -255,7 +256,7 @@
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
diff --git a/test/OpenMP/single_firstprivate_messages.cpp b/test/OpenMP/single_firstprivate_messages.cpp
index 32de9fd..4c714e3 100644
--- a/test/OpenMP/single_firstprivate_messages.cpp
+++ b/test/OpenMP/single_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/single_misc_messages.c b/test/OpenMP/single_misc_messages.c
index 2c922dd..b826419 100644
--- a/test/OpenMP/single_misc_messages.c
+++ b/test/OpenMP/single_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 void foo();
 
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp single'}}
diff --git a/test/OpenMP/single_private_codegen.cpp b/test/OpenMP/single_private_codegen.cpp
index 6d47cb5..4b4d103 100644
--- a/test/OpenMP/single_private_codegen.cpp
+++ b/test/OpenMP/single_private_codegen.cpp
@@ -3,6 +3,13 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -39,7 +46,7 @@
 int main() {
   static int sivar;
 #ifdef LAMBDA
-  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[G:@.+]] = {{(dso_local )?}}global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
@@ -79,7 +86,7 @@
   }();
   return 0;
 #elif defined(BLOCKS)
-  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS: [[G:@.+]] = {{(dso_local )?}}global double
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
diff --git a/test/OpenMP/single_private_messages.cpp b/test/OpenMP/single_private_messages.cpp
index 0ed0e6c..0a3ec2e 100644
--- a/test/OpenMP/single_private_messages.cpp
+++ b/test/OpenMP/single_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_ast_print.cpp b/test/OpenMP/target_ast_print.cpp
index d004f73..827d6a4 100644
--- a/test/OpenMP/target_ast_print.cpp
+++ b/test/OpenMP/target_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -42,7 +46,7 @@
 
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T i, j, a[20]
-// CHECK-NEXT: #pragma omp target
+// CHECK-NEXT: #pragma omp target{{$}}
 // CHECK-NEXT: foo();
 // CHECK-NEXT: #pragma omp target if(target: argc > 0)
 // CHECK-NEXT: foo()
diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp
index d982426..2c0dfd8 100644
--- a/test/OpenMP/target_codegen.cpp
+++ b/test/OpenMP/target_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -35,17 +53,17 @@
 // sizes.
 
 // CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
-// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i32] [i32 32, i32 288]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
 // CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ]] 2]
-// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
-// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288]
-// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 547, i32 288, i32 547, i32 547, i32 288, i32 288, i32 547, i32 547]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 547]
-// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 547, i32 288, i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
@@ -97,9 +115,10 @@
   static long *plocal;
 
   // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[CAPTURE:%.+]],
-  // CHECK:       [[LD:%.+]] = load i32, i32* [[CAPTURE]],
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i32 [[LD]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null)
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -110,10 +129,11 @@
   {
   }
 
-  // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[CAPTURE:%.+]],
-  // CHECK-DAG:   [[LD:%.+]] = load i32, i32* [[CAPTURE]],
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 [[LD]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT]], i32 0, i32 0)
+  // CHECK-DAG:   [[ADD:%.+]] = add nsw i32
+  // CHECK-DAG:   store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK-DAG:   [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK-DAG:   [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -136,7 +156,7 @@
   // CHECK:       call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
   // CHECK-NEXT:  br label %[[END]]
   // CHECK:       [[END]]
-  #pragma omp target device(global + a)
+  #pragma omp target device(global + a) nowait
   {
     static int local1;
     *plocal = global;
@@ -149,7 +169,7 @@
     global += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0))
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -173,7 +193,7 @@
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -218,14 +238,14 @@
   // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
   // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
 
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[TRY]]
   // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
   // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
   // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
 
-  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
-  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
-  // CHECK:       [[TRY]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
@@ -491,17 +511,18 @@
 // CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
 // CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
 
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
 // CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
 // CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
 
-// We capture 2 VLA sizes in this target region
-// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
-// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
-
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
 // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[TRY]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0))
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
@@ -572,7 +593,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0))
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -623,7 +644,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/test/OpenMP/target_codegen_global_capture.cpp b/test/OpenMP/target_codegen_global_capture.cpp
index eb34082..b334405 100644
--- a/test/OpenMP/target_codegen_global_capture.cpp
+++ b/test/OpenMP/target_codegen_global_capture.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_codegen_registration.cpp b/test/OpenMP/target_codegen_registration.cpp
index 064c15b..80604ea 100644
--- a/test/OpenMP/target_codegen_registration.cpp
+++ b/test/OpenMP/target_codegen_registration.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,9 +24,22 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // Check that no target code is emmitted if no omptests flag was provided.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -63,93 +84,93 @@
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // TCHECK-NOT: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 
 // CHECK-NTARGET-NOT: private constant i8 0
 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
 
 // CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
-// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME1]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
-// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME2]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
-// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME3]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
-// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME4]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
-// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME5]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
-// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME6]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
-// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME7]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
-// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME8]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
-// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME9]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
-// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME10]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
-// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME11]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
-// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: @.omp_offloading.entry.[[NAME12]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 
 // TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
-// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME1]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
-// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME2]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
-// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME3]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
-// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME4]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
-// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME5]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
-// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME6]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
-// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME7]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
-// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME8]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
-// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME9]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
-// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME10]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
-// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME11]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 // TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
-// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: @.omp_offloading.entry.[[NAME12]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
 
 // CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
 // CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
@@ -411,31 +432,31 @@
 
 // Check metadata is properly generated:
 // CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 // TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 #endif
diff --git a/test/OpenMP/target_codegen_registration_naming.cpp b/test/OpenMP/target_codegen_registration_naming.cpp
index ce133ee..0d584b8 100644
--- a/test/OpenMP/target_codegen_registration_naming.cpp
+++ b/test/OpenMP/target_codegen_registration_naming.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_data_ast_print.cpp b/test/OpenMP/target_data_ast_print.cpp
index bb55ea3..17dace8 100644
--- a/test/OpenMP/target_data_ast_print.cpp
+++ b/test/OpenMP/target_data_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -48,7 +52,7 @@
 
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T i, j, b, c, d, e, x[20];
-// CHECK-NEXT: #pragma omp target data map(to: c)
+// CHECK-NEXT: #pragma omp target data map(to: c){{$}}
 // CHECK-NEXT: i = argc;
 // CHECK-NEXT: #pragma omp target data map(to: c) if(target data: j > 0)
 // CHECK-NEXT: foo();
diff --git a/test/OpenMP/target_data_codegen.cpp b/test/OpenMP/target_data_codegen.cpp
index 7e5e267..f835bac 100644
--- a/test/OpenMP/target_data_codegen.cpp
+++ b/test/OpenMP/target_data_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 // CK1: [[ST:%.+]] = type { i32, double* }
@@ -22,15 +30,15 @@
 double gc[100];
 
 // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800]
-// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
-// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
-// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 37]
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 37]
 
 // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
-// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
 
 // CK1-LABEL: _Z3fooi
 void foo(int arg) {
@@ -38,8 +46,9 @@
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
-  // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+  // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -52,8 +61,9 @@
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
-  // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+  // CK1-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+  // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   #pragma omp target data if(1+3-5) device(arg) map(from: gc)
@@ -67,7 +77,7 @@
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -86,7 +96,7 @@
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1: br label %[[IFEND:[^,]+]]
@@ -97,7 +107,7 @@
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -113,7 +123,7 @@
   // CK1-DAG: [[CSVAL0]] = mul nuw i[[sz]] %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -124,7 +134,7 @@
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -147,7 +157,7 @@
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   #pragma omp target data map(to: gb.b[:3])
@@ -161,6 +171,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { i32, double* }
@@ -178,7 +196,7 @@
 };
 
 // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24]
-// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 37, i32 21]
+// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 37, i64 21]
 
 // CK2-LABEL: _Z3bari
 int bar(int arg){
@@ -189,8 +207,9 @@
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
-// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -222,8 +241,9 @@
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
-// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
 // CK2: br label %[[IFEND:[^,]+]]
@@ -238,6 +258,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3-LABEL: no_target_devices
diff --git a/test/OpenMP/target_data_device_messages.cpp b/test/OpenMP/target_data_device_messages.cpp
index 9ed7a54..51c809a 100644
--- a/test/OpenMP/target_data_device_messages.cpp
+++ b/test/OpenMP/target_data_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_data_if_messages.cpp b/test/OpenMP/target_data_if_messages.cpp
index 229d7f5..ae68d25 100644
--- a/test/OpenMP/target_data_if_messages.cpp
+++ b/test/OpenMP/target_data_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_data_messages.c b/test/OpenMP/target_data_messages.c
index fd41e7d..017a339 100644
--- a/test/OpenMP/target_data_messages.c
+++ b/test/OpenMP/target_data_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() { }
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_data_use_device_ptr_ast_print.cpp b/test/OpenMP/target_data_use_device_ptr_ast_print.cpp
index ce1ec1d..ba429f5 100644
--- a/test/OpenMP/target_data_use_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_data_use_device_ptr_ast_print.cpp
@@ -1,6 +1,9 @@
 // RxUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -23,7 +26,7 @@
 };
 // CHECK: struct SA
 // CHECK: void func(
-// CHECK: #pragma omp target data map(tofrom: this->i) use_device_ptr(this->k)
+// CHECK: #pragma omp target data map(tofrom: this->i) use_device_ptr(this->k){{$}}
 // CHECK: #pragma omp target data map(tofrom: this->i) use_device_ptr(this->z)
 struct SB {
   unsigned A;
diff --git a/test/OpenMP/target_data_use_device_ptr_codegen.cpp b/test/OpenMP/target_data_use_device_ptr_codegen.cpp
index 5e27556..d6f18cc 100644
--- a/test/OpenMP/target_data_use_device_ptr_codegen.cpp
+++ b/test/OpenMP/target_data_use_device_ptr_codegen.cpp
@@ -9,23 +9,31 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 double *g;
 
 // CK1: @g = global double*
-// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i32] [i32 99]
-// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i32] [{{i32 35, i32 99|i32 99, i32 35}}]
-// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
-// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
-// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
-// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
+// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [{{i64 35, i64 99|i64 99, i64 35}}]
+// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
+// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
+// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
+// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
 
 // CK1-LABEL: @_Z3foo
 template<typename T>
@@ -327,13 +335,21 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { double*, double** }
-// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 83]
-// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i32] [i32 32, i32 19, i32 83]
-// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
-// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i32] [i32 96, i32 32, i32 19, i32 83]
+// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 83]
+// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i64] [i64 32, i64 19, i64 83]
+// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
+// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i64] [i64 96, i64 32, i64 19, i64 83]
 
 template <typename T>
 struct ST {
diff --git a/test/OpenMP/target_data_use_device_ptr_messages.cpp b/test/OpenMP/target_data_use_device_ptr_messages.cpp
index 1d8002c..87f23e2 100644
--- a/test/OpenMP/target_data_use_device_ptr_messages.cpp
+++ b/test/OpenMP/target_data_use_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp -ferror-limit 200 %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd -ferror-limit 200 %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_defaultmap_messages.cpp b/test/OpenMP/target_defaultmap_messages.cpp
index 59348d4..27079c3 100644
--- a/test/OpenMP/target_defaultmap_messages.cpp
+++ b/test/OpenMP/target_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_depend_codegen.cpp b/test/OpenMP/target_depend_codegen.cpp
new file mode 100644
index 0000000..d7eb93d
--- /dev/null
+++ b/test/OpenMP/target_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target device(global + a) nowait depend(inout: global, a, bn) if(a)
+  {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target if(0) firstprivate(global) depend(out:global)
+  {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_depend_messages.cpp b/test/OpenMP/target_depend_messages.cpp
index 3eb3af1..5fdf37d 100644
--- a/test/OpenMP/target_depend_messages.cpp
+++ b/test/OpenMP/target_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_device_messages.cpp b/test/OpenMP/target_device_messages.cpp
index 3befcd6..8cf19b2 100644
--- a/test/OpenMP/target_device_messages.cpp
+++ b/test/OpenMP/target_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_enter_data_ast_print.cpp b/test/OpenMP/target_enter_data_ast_print.cpp
index 865cc7f..1df1f8e 100644
--- a/test/OpenMP/target_enter_data_ast_print.cpp
+++ b/test/OpenMP/target_enter_data_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -65,7 +69,7 @@
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T i, j, b, c, d, e, x[20];
 // CHECK-NEXT: i = argc;
-// CHECK-NEXT: #pragma omp target enter data map(to: i)
+// CHECK-NEXT: #pragma omp target enter data map(to: i){{$}}
 // CHECK-NEXT: #pragma omp target enter data map(to: i) if(target enter data: j > 0)
 // CHECK-NEXT: #pragma omp target enter data map(to: i) if(b)
 // CHECK-NEXT: #pragma omp target enter data map(to: c)
diff --git a/test/OpenMP/target_enter_data_codegen.cpp b/test/OpenMP/target_enter_data_codegen.cpp
index 683cd97..93ffb7a 100644
--- a/test/OpenMP/target_enter_data_codegen.cpp
+++ b/test/OpenMP/target_enter_data_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 // CK1: [[ST:%.+]] = type { i32, double* }
@@ -22,15 +30,15 @@
 double gc[100];
 
 // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800]
-// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 32]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 32]
 
 // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
-// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
-// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 37]
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 37]
 
 // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
-// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
 
 // CK1-LABEL: _Z3fooi
 void foo(int arg) {
@@ -38,8 +46,9 @@
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
-  // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+  // CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+  // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -52,7 +61,7 @@
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
   // CK1-NOT: __tgt_target_data_end
-  #pragma omp target enter data if(1+3-5) device(arg) map(alloc: gc)
+  #pragma omp target enter data if(1+3-5) device(arg) map(alloc: gc) nowait
   {++arg;}
 
   // Region 01
@@ -63,7 +72,7 @@
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -87,7 +96,7 @@
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -110,7 +119,7 @@
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -144,6 +153,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { i32, double* }
@@ -161,7 +178,7 @@
 };
 
 // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24]
-// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 37, i32 21]
+// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 37, i64 21]
 
 // CK2-LABEL: _Z3bari
 int bar(int arg){
@@ -172,8 +189,9 @@
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
-// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -211,6 +229,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3-LABEL: no_target_devices
@@ -231,6 +257,14 @@
 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
 
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-64
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
@@ -239,6 +273,16 @@
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-32
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// SIMD-ONLY4-NOT: {{__kmpc|__tgt}}
 #ifdef CK4
 
 // CK4-LABEL: device_side_scan
diff --git a/test/OpenMP/target_enter_data_depend_codegen.cpp b/test/OpenMP/target_enter_data_depend_codegen.cpp
new file mode 100644
index 0000000..2e4440c
--- /dev/null
+++ b/test/OpenMP/target_enter_data_depend_codegen.cpp
@@ -0,0 +1,386 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+// CK1: [[ST:%.+]] = type { i32, double* }
+// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]],
+// CK1-SAME: i[[sz]], i8 }
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+struct ST {
+  T a;
+  double *b;
+};
+
+ST<int> gb;
+double gc[100];
+
+// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 32]
+
+// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 32]
+
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+
+// CK1-LABEL: _Z3fooi
+void foo(int arg) {
+  int la;
+  float lb[arg];
+
+  // CK1: alloca [1 x %struct.kmp_depend_info],
+  // CK1: alloca [3 x %struct.kmp_depend_info],
+  // CK1: alloca [4 x %struct.kmp_depend_info],
+  // CK1: alloca [5 x %struct.kmp_depend_info],
+
+  // Region 00
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}}
+  // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null)
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target enter data if(1+3-5) device(arg) map(alloc:gc) nowait depend(in: arg)
+  {++arg;}
+
+  // Region 01
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target enter data map(to:la) if(1+3-4) depend(in: la) depend(out: arg)
+  {++arg;}
+
+  // Region 02
+  // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CK1: [[IFTHEN]]
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK1: store i32* [[ARG]], i32** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[IF:%.+]] = load i8, i8* %{{.+}}
+  // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
+  // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
+  // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+
+  // CK1: br label %[[IFEND:[^,]+]]
+
+  // CK1: [[IFELSE]]
+  // CK1: br label %[[IFEND]]
+  // CK1: [[IFEND]]
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target enter data map(to:arg) if(arg) device(4) depend(inout: arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 03
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float**
+  // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float**
+  // CK1: store float* [[VLA]], float** [[P0_BC]],
+  // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0
+  // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target enter data map(alloc:lb) depend(out: lb, arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 04
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST**
+  // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
+  // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double**
+  // CK1: store double* %{{.+}}, double** [[P1_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target enter data map(to:gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg)
+  {++arg;}
+}
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias)
+// CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+#endif
diff --git a/test/OpenMP/target_enter_data_depend_messages.cpp b/test/OpenMP/target_enter_data_depend_messages.cpp
index 77e7039..ccc29ea 100644
--- a/test/OpenMP/target_enter_data_depend_messages.cpp
+++ b/test/OpenMP/target_enter_data_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_enter_data_device_messages.cpp b/test/OpenMP/target_enter_data_device_messages.cpp
index d954eca..de8405b 100644
--- a/test/OpenMP/target_enter_data_device_messages.cpp
+++ b/test/OpenMP/target_enter_data_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_enter_data_if_messages.cpp b/test/OpenMP/target_enter_data_if_messages.cpp
index 137ff97..26404a5 100644
--- a/test/OpenMP/target_enter_data_if_messages.cpp
+++ b/test/OpenMP/target_enter_data_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_enter_data_map_messages.c b/test/OpenMP/target_enter_data_map_messages.c
index 8a7de3b..381c94b 100644
--- a/test/OpenMP/target_enter_data_map_messages.c
+++ b/test/OpenMP/target_enter_data_map_messages.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - -x c++ %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - -x c++ %s
+
 int main(int argc, char **argv) {
 
   int r;
diff --git a/test/OpenMP/target_enter_data_nowait_messages.cpp b/test/OpenMP/target_enter_data_nowait_messages.cpp
index 508b1b2..6f59c5c 100644
--- a/test/OpenMP/target_enter_data_nowait_messages.cpp
+++ b/test/OpenMP/target_enter_data_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 int main(int argc, char **argv) {
   int i;
 
diff --git a/test/OpenMP/target_exit_data_ast_print.cpp b/test/OpenMP/target_exit_data_ast_print.cpp
index a2f5d64..1408023 100644
--- a/test/OpenMP/target_exit_data_ast_print.cpp
+++ b/test/OpenMP/target_exit_data_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -69,7 +73,7 @@
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T i, j, b, c, d, e, x[20];
 // CHECK-NEXT: i = argc;
-// CHECK-NEXT: #pragma omp target exit data map(from: i)
+// CHECK-NEXT: #pragma omp target exit data map(from: i){{$}}
 // CHECK-NEXT: #pragma omp target exit data map(from: i) if(target exit data: j > 0)
 // CHECK-NEXT: #pragma omp target exit data map(from: i) if(b)
 // CHECK-NEXT: #pragma omp target exit data map(from: c)
diff --git a/test/OpenMP/target_exit_data_codegen.cpp b/test/OpenMP/target_exit_data_codegen.cpp
index a82c1c6..4523776 100644
--- a/test/OpenMP/target_exit_data_codegen.cpp
+++ b/test/OpenMP/target_exit_data_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 // CK1: [[ST:%.+]] = type { i32, double* }
@@ -22,15 +30,15 @@
 double gc[100];
 
 // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800]
-// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
-// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 32]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 32]
 
-// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 38]
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 38]
 
 // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
-// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 32, i32 16]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 16]
 
 // CK1-LABEL: _Z3fooi
 void foo(int arg) {
@@ -39,8 +47,9 @@
 
   // Region 00
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
-  // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+  // CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+  // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -52,7 +61,7 @@
   // CK1-DAG: store [100 x double]* @gc, [100 x double]** [[PC0]]
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
-  #pragma omp target exit data if(1+3-5) device(arg) map(from: gc)
+  #pragma omp target exit data if(1+3-5) device(arg) map(from: gc) nowait
   {++arg;}
 
   // Region 01
@@ -64,7 +73,7 @@
   // CK1-NOT: __tgt_target_data_begin
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -88,7 +97,7 @@
 
   // Region 03
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -111,7 +120,7 @@
 
   // Region 04
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -144,6 +153,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { i32, double* }
@@ -161,7 +178,7 @@
 };
 
 // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24]
-// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 36, i32 20]
+// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 36, i64 20]
 
 // CK2-LABEL: _Z3bari
 int bar(int arg){
@@ -173,8 +190,9 @@
 // CK2-NOT: __tgt_target_data_begin
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
-// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -211,6 +229,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3-LABEL: no_target_devices
diff --git a/test/OpenMP/target_exit_data_depend_codegen.cpp b/test/OpenMP/target_exit_data_depend_codegen.cpp
new file mode 100644
index 0000000..ad8c475
--- /dev/null
+++ b/test/OpenMP/target_exit_data_depend_codegen.cpp
@@ -0,0 +1,386 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+// CK1: [[ST:%.+]] = type { i32, double* }
+// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]],
+// CK1-SAME: i[[sz]], i8 }
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+struct ST {
+  T a;
+  double *b;
+};
+
+ST<int> gb;
+double gc[100];
+
+// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+
+// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 40]
+
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 34, i64 18]
+
+// CK1-LABEL: _Z3fooi
+void foo(int arg) {
+  int la;
+  float lb[arg];
+
+  // CK1: alloca [1 x %struct.kmp_depend_info],
+  // CK1: alloca [3 x %struct.kmp_depend_info],
+  // CK1: alloca [4 x %struct.kmp_depend_info],
+  // CK1: alloca [5 x %struct.kmp_depend_info],
+
+  // Region 00
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}}
+  // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null)
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target exit data if(1+3-5) device(arg) map(from:gc) nowait depend(in: arg)
+  {++arg;}
+
+  // Region 01
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target exit data map(release: la) if(1+3-4) depend(in: la) depend(out: arg)
+  {++arg;}
+
+  // Region 02
+  // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CK1: [[IFTHEN]]
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK1: store i32* [[ARG]], i32** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[IF:%.+]] = load i8, i8* %{{.+}}
+  // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
+  // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
+  // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+
+  // CK1: br label %[[IFEND:[^,]+]]
+
+  // CK1: [[IFELSE]]
+  // CK1: br label %[[IFEND]]
+  // CK1: [[IFEND]]
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target exit data map(delete: arg) if(arg) device(4) depend(inout: arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 03
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float**
+  // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float**
+  // CK1: store float* [[VLA]], float** [[P0_BC]],
+  // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0
+  // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target exit data map(from:lb) depend(out: lb, arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 04
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST**
+  // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
+  // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double**
+  // CK1: store double* %{{.+}}, double** [[P1_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target exit data map(from:gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg)
+  {++arg;}
+}
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias)
+// CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+#endif
diff --git a/test/OpenMP/target_exit_data_depend_messages.cpp b/test/OpenMP/target_exit_data_depend_messages.cpp
index ce8d237..7334cd7 100644
--- a/test/OpenMP/target_exit_data_depend_messages.cpp
+++ b/test/OpenMP/target_exit_data_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_exit_data_device_messages.cpp b/test/OpenMP/target_exit_data_device_messages.cpp
index d9ce0dd..ad27e8a 100644
--- a/test/OpenMP/target_exit_data_device_messages.cpp
+++ b/test/OpenMP/target_exit_data_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_exit_data_if_messages.cpp b/test/OpenMP/target_exit_data_if_messages.cpp
index 6d5ada1..9341fe8 100644
--- a/test/OpenMP/target_exit_data_if_messages.cpp
+++ b/test/OpenMP/target_exit_data_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_exit_data_map_messages.c b/test/OpenMP/target_exit_data_map_messages.c
index 84bd7e7..1c15aac 100644
--- a/test/OpenMP/target_exit_data_map_messages.c
+++ b/test/OpenMP/target_exit_data_map_messages.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - -x c++ %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - -x c++ %s
+
 int main(int argc, char **argv) {
 
   int r;
diff --git a/test/OpenMP/target_exit_data_nowait_messages.cpp b/test/OpenMP/target_exit_data_nowait_messages.cpp
index d028603..02eded5 100644
--- a/test/OpenMP/target_exit_data_nowait_messages.cpp
+++ b/test/OpenMP/target_exit_data_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 int main(int argc, char **argv) {
   int i;
 
diff --git a/test/OpenMP/target_firstprivate_codegen.cpp b/test/OpenMP/target_firstprivate_codegen.cpp
index 647b7b6..a159971 100644
--- a/test/OpenMP/target_firstprivate_codegen.cpp
+++ b/test/OpenMP/target_firstprivate_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -33,15 +51,15 @@
 // TCHECK:  [[S1:%.+]] = type { double }
 
 // CHECK-DAG:  [[SIZET:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 4]
-// CHECK:  [[MAPT:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
-// CHECK-DAG:  [[MAPT2:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 161, i32 288, i32 161, i32 161, i32 288, i32 288, i32 161, i32 161]
+// CHECK:  [[MAPT:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG:  [[MAPT2:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 161, i64 288, i64 161, i64 161, i64 288, i64 288, i64 161, i64 161]
 // CHECK-DAG:  [[SIZET3:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] zeroinitializer
-// CHECK-DAG:  [[MAPT3:@.+]] = private unnamed_addr constant [1 x i32] [i32 32]
-// CHECK-DAG:  [[MAPT4:@.+]] = private unnamed_addr constant [5 x i32] [i32 547, i32 288, i32 288, i32 288, i32 161]
+// CHECK-DAG:  [[MAPT3:@.+]] = private unnamed_addr constant [1 x i64] [i64 32]
+// CHECK-DAG:  [[MAPT4:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 161]
 // CHECK-DAG:  [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40]
-// CHECK-DAG:  [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 161]
+// CHECK-DAG:  [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 161]
 // CHECK-DAG:  [[SIZET6:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 40]
-// CHECK-DAG:  [[MAPT6:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 161]
+// CHECK-DAG:  [[MAPT6:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 161]
 
 
 // CHECK: define {{.*}}[[FOO:@.+]](
@@ -100,7 +118,7 @@
   // CHECK:  store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR2]],
   // CHECK:  [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK:  {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT]], i32 0, i32 0))
+  // CHECK:  {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT]], i32 0, i32 0))
   
   // TCHECK:  define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]])
   // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
@@ -219,11 +237,11 @@
   // CHECK:  [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]],  i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT2]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0))
   
   // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
   // target region
-  // TCHECK:  define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} [[BN_SZ:%.+]], float* [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} [[CN_SZ1:%.+]], i{{[0-9]+}} [[CN_SZ2:%.+]], double* [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]])
+  // TCHECK:  define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} [[BN_SZ:%.+]], float* {{.+}} [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} [[CN_SZ1:%.+]], i{{[0-9]+}} [[CN_SZ2:%.+]], double* {{.+}} [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]])
   // TCHECK:  [[A2_ADDR:%.+]] = alloca i{{[0-9]+}},
   // TCHECK:  [[B_ADDR:%.+]] = alloca [10 x float]*,
   // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
@@ -263,7 +281,7 @@
   //  firstprivate(b): memcpy(b_priv,b_in)
   // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8*
   // TCHECK:  [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}})
 
   // TCHECK:  [[RET_STACK:%.+]] = call i8* @llvm.stacksave()
   // TCHECK:  store i8* [[RET_STACK]], i8** [[SSTACK]],
@@ -273,12 +291,12 @@
   // TCHECK:  [[BN_COPY_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[BN_SZ_VAL]], 4
   // TCHECK:  [[BN_PRIV__BCAST:%.+]] = bitcast float* [[BN_PRIV]] to i8*
   // TCHECK:  [[BN_REF_IN_BCAST:%.+]] = bitcast float* [[BN_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[BN_PRIV__BCAST]], i8* [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BN_PRIV__BCAST]], i8* align {{[0-9]+}} [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}})
 
   // firstprivate(c)
   // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8*
   // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}})
   
   // firstprivate(cn)
   // TCHECK:  [[CN_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]]
@@ -287,12 +305,12 @@
   // TCHECK:  [[CN_SZ2_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ2]], 8
   // TCHECK:  [[CN_PRIV_BCAST:%.+]] = bitcast double* [[CN_PRIV]] to i8*
   // TCHECK:  [[CN_IN_BCAST:%.+]] = bitcast double* [[CN_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[CN_PRIV_BCAST]], i8* [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[CN_PRIV_BCAST]], i8* align {{[0-9]+}} [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}})
   
   // firstprivate(d)
   // TCHECK:  [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8*
   // TCHECK:  [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}})
 
   
   #pragma omp target firstprivate(ptr)
@@ -310,7 +328,7 @@
 
   // CHECK:  [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT3]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT3]], i32 0, i32 0))
 
   // TCHECK:  define void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]])
   // TCHECK:  [[PTR_ADDR:%.+]] = alloca double*,
@@ -373,7 +391,7 @@
 // firstprivate(b)
 // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
 // TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
-// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}})
 
 // TCHECK:  ret void
 
@@ -446,7 +464,7 @@
   // CHECK:  store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]],
 
   // only check that we use the map types stored in the global variable
-  // CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT4]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT4]], i32 0, i32 0))
   
   // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
   // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
@@ -481,7 +499,7 @@
   // TCHECK:  [[C_SZ_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[C_SZ2]],  2
   // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_PRIV]] to i8*
   // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_ADDR_REF]] to i8*
-  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}})
 
   // finish
   // TCHECK: [[RELOAD_SSTACK:%.+]] = load i8*, i8** [[SSTACK]],
@@ -519,7 +537,7 @@
   // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
   // only check that the right sizes and map types are used
-  // CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
 };
 
 
@@ -557,7 +575,7 @@
 // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]**
 // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
-// CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT6]], i32 0, i32 0))
+// CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0))
 
 
 // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
@@ -576,7 +594,7 @@
 // firstprivate(b)
 // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
 // TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
-// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}})
 
 // TCHECK: ret void
 
diff --git a/test/OpenMP/target_firstprivate_messages.cpp b/test/OpenMP/target_firstprivate_messages.cpp
index 6dbad67..4f82c96 100644
--- a/test/OpenMP/target_firstprivate_messages.cpp
+++ b/test/OpenMP/target_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
 extern S1 a;
 class S2 {
@@ -36,7 +38,7 @@
   S5(int v) : a(v) {}
   S5 &operator=(S5 &s) {
 #pragma omp target firstprivate(a) firstprivate(this->a) firstprivate(s.a) // expected-error {{expected variable name or data member of current class}}
-    for (int k = 0; k < s.a; ++k)
+    for (int k = 0; k < s.a; ++k) // expected-warning {{Non-trivial type 'S5' is mapped, only trivial types are guaranteed to be mapped correctly}}
       ++s.a;
     return *this;
   }
diff --git a/test/OpenMP/target_if_messages.cpp b/test/OpenMP/target_if_messages.cpp
index 14b6a5f..982fd4b 100644
--- a/test/OpenMP/target_if_messages.cpp
+++ b/test/OpenMP/target_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_is_device_ptr_ast_print.cpp b/test/OpenMP/target_is_device_ptr_ast_print.cpp
index 8e56aa2..4e8bf81 100644
--- a/test/OpenMP/target_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -50,7 +54,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target is_device_ptr(this->k){{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: }
 // CHECK-NEXT: #pragma omp target is_device_ptr(this->z)
diff --git a/test/OpenMP/target_is_device_ptr_codegen.cpp b/test/OpenMP/target_is_device_ptr_codegen.cpp
index 1a54aa1..fb729b1 100644
--- a/test/OpenMP/target_is_device_ptr_codegen.cpp
+++ b/test/OpenMP/target_is_device_ptr_codegen.cpp
@@ -9,31 +9,39 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 double *g;
 
 // CK1: @g = global double*
 // CK1: [[SIZES00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}]
-// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES01:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}]
-// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}]
-// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES03:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}]
-// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES04:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}]
-// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES05:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}]
-// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1: [[SIZES06:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}]
-// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i32] [i32 288, i32 288]
+// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i64] [i64 288, i64 288]
 
 // CK1-LABEL: @_Z3foo
 template<typename T>
@@ -41,7 +49,7 @@
   float *l;
   T *t;
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -58,7 +66,7 @@
     ++g;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -75,7 +83,7 @@
     ++l;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -92,7 +100,7 @@
     ++t;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -110,7 +118,7 @@
     ++lr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -128,7 +136,7 @@
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -146,7 +154,7 @@
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -186,18 +194,26 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { double*, double** }
 
 // CK2: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}]
-// CK2: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 33]
+// CK2: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
 // CK2: [[SIZE01:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}]
-// CK2: [[MTYPE01:@.+]] = {{.+}}constant [2 x i32] [i32 32, i32 17]
+// CK2: [[MTYPE01:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 17]
 
 // CK2: [[SIZE02:@.+]] = {{.+}}constant [3 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}, i[[sz]] {{8|4}}]
-// CK2: [[MTYPE02:@.+]] = {{.+}}constant [3 x i32] [i32 33, i32 0, i32 17]
+// CK2: [[MTYPE02:@.+]] = {{.+}}constant [3 x i64] [i64 33, i64 0, i64 17]
 
 template <typename T>
 struct ST {
@@ -209,7 +225,7 @@
   void foo(double *&arg) {
     int *la = 0;
 
-    // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -225,7 +241,7 @@
       a++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -249,7 +265,7 @@
       b++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}})
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
diff --git a/test/OpenMP/target_is_device_ptr_messages.cpp b/test/OpenMP/target_is_device_ptr_messages.cpp
index 8cd1426..6f218e2 100644
--- a/test/OpenMP/target_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp -ferror-limit 200 %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd -ferror-limit 200 %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_map_codegen.cpp b/test/OpenMP/target_map_codegen.cpp
index 72589b5..6b15473 100644
--- a/test/OpenMP/target_map_codegen.cpp
+++ b/test/OpenMP/target_map_codegen.cpp
@@ -13,6 +13,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 class B {
@@ -33,7 +41,7 @@
 
 // CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK1-LABEL: implicit_maps_integer
 void implicit_maps_integer (int a){
@@ -41,7 +49,7 @@
   B::modify(a);
   int i = a;
 
-  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -76,19 +84,27 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK2: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK2: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 // CK2: [[SIZES2:@.+]] = {{.+}}constant [1 x i[[sz]]] zeroinitializer
 // Map types: OMP_MAP_IS_PTR = 32
-// CK2: [[TYPES2:@.+]] = {{.+}}constant [1 x i32] [i32 32]
+// CK2: [[TYPES2:@.+]] = {{.+}}constant [1 x i64] [i64 32]
 
 // CK2-LABEL: implicit_maps_reference
 void implicit_maps_reference (int a, int *b){
   int &i = a;
-  // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -108,7 +124,7 @@
   }
 
   int *&p = b;
-  // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -155,16 +171,24 @@
 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK3-LABEL: implicit_maps_parameter
 void implicit_maps_parameter (int a){
 
-  // CK3-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -199,11 +223,19 @@
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
 #ifdef CK4
 
 // CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK4-LABEL: implicit_maps_nested_integer
 void implicit_maps_nested_integer (int a){
@@ -216,7 +248,7 @@
   // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
   #pragma omp parallel
   {
-    // CK4-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+    // CK4-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
     // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -255,11 +287,19 @@
 // RUN: %clang_cc1 -DCK5 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
 // RUN: %clang_cc1 -DCK5 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// SIMD-ONLY4-NOT: {{__kmpc|__tgt}}
 #ifdef CK5
 
 // CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK5-LABEL: implicit_maps_nested_integer_and_enum
 void implicit_maps_nested_integer_and_enum (int a){
@@ -270,7 +310,7 @@
   // Using an enum should not change the mapping information.
   int  i = a;
 
-  // CK5-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -306,16 +346,24 @@
 // RUN: %clang_cc1 -DCK6 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
 // RUN: %clang_cc1 -DCK6 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY5 %s
+// RUN: %clang_cc1 -DCK6 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY5 %s
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY5 %s
+// RUN: %clang_cc1 -DCK6 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY5 %s
+// SIMD-ONLY5-NOT: {{__kmpc|__tgt}}
 #ifdef CK6
 // CK6-DAG: [[GBL:@Gi]] = global i32 0
 // CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK6-LABEL: implicit_maps_host_global
 int Gi;
 void implicit_maps_host_global (int a){
-  // CK6-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -352,6 +400,14 @@
 // RUN: %clang_cc1 -DCK7 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
 // RUN: %clang_cc1 -DCK7 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY6 %s
+// RUN: %clang_cc1 -DCK7 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY6 %s
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY6 %s
+// RUN: %clang_cc1 -DCK7 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY6 %s
+// SIMD-ONLY6-NOT: {{__kmpc|__tgt}}
 #ifdef CK7
 
 // For a 32-bit targets, the value doesn't fit the size of the pointer,
@@ -359,15 +415,15 @@
 
 // CK7-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 // Map types: OMP_MAP_TO  | OMP_MAP_PRIVATE_PTR | OMP_MAP_FIRST_REF = 161
-// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 161]
+// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 161]
 
 // CK7-LABEL: implicit_maps_double
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK7-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -414,17 +470,25 @@
 // RUN: %clang_cc1 -DCK8 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK8
 // RUN: %clang_cc1 -DCK8 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// RUN: %clang_cc1 -DCK8 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// RUN: %clang_cc1 -DCK8 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// SIMD-ONLY7-NOT: {{__kmpc|__tgt}}
 #ifdef CK8
 
 // CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288
-// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 // CK8-LABEL: implicit_maps_float
 void implicit_maps_float (int a){
   float f = (float)a;
 
-  // CK8-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -458,17 +522,25 @@
 // RUN: %clang_cc1 -DCK9 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK9
 // RUN: %clang_cc1 -DCK9 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY8 %s
+// RUN: %clang_cc1 -DCK9 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY8 %s
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY8 %s
+// RUN: %clang_cc1 -DCK9 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY8 %s
+// SIMD-ONLY8-NOT: {{__kmpc|__tgt}}
 #ifdef CK9
 
 // CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
 // Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547
-// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 547]
+// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547]
 
 // CK9-LABEL: implicit_maps_array
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK9-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK9-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -499,17 +571,25 @@
 // RUN: %clang_cc1 -DCK10 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK10
 // RUN: %clang_cc1 -DCK10 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY9 %s
+// RUN: %clang_cc1 -DCK10 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY9 %s
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY9 %s
+// RUN: %clang_cc1 -DCK10 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY9 %s
+// SIMD-ONLY9-NOT: {{__kmpc|__tgt}}
 #ifdef CK10
 
 // CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] zeroinitializer
 // Map types: OMP_MAP_IS_FIRST = 32
-// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 32]
+// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 32]
 
 // CK10-LABEL: implicit_maps_pointer
 void implicit_maps_pointer (){
   double *ddyn;
 
-  // CK10-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK10-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -541,17 +621,25 @@
 // RUN: %clang_cc1 -DCK11 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK11
 // RUN: %clang_cc1 -DCK11 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY10 %s
+// RUN: %clang_cc1 -DCK11 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY10 %s
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY10 %s
+// RUN: %clang_cc1 -DCK11 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY10 %s
+// SIMD-ONLY10-NOT: {{__kmpc|__tgt}}
 #ifdef CK11
 
 // CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
 // Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547
-// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 547]
+// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547]
 
 // CK11-LABEL: implicit_maps_double_complex
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK11-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK11-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -581,6 +669,14 @@
 // RUN: %clang_cc1 -DCK12 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
 // RUN: %clang_cc1 -DCK12 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY11 %s
+// RUN: %clang_cc1 -DCK12 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY11 %s
+// RUN: %clang_cc1 -DCK12 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s
+// SIMD-ONLY11-NOT: {{__kmpc|__tgt}}
 #ifdef CK12
 
 // For a 32-bit targets, the value doesn't fit the size of the pointer,
@@ -588,15 +684,15 @@
 
 // CK12-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
 // Map types: OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 // Map types: OMP_MAP_TO  | OMP_MAP_PRIVATE_PTR | OMP_MAP_FIRST_REF = 161
-// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 161]
+// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 161]
 
 // CK12-LABEL: implicit_maps_float_complex
 void implicit_maps_float_complex (int a){
   float _Complex fc = (float)a;
 
-  // CK12-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK12-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -642,6 +738,14 @@
 // RUN: %clang_cc1 -DCK13 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK13
 // RUN: %clang_cc1 -DCK13 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY12 %s
+// RUN: %clang_cc1 -DCK13 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY12 %s
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY12 %s
+// RUN: %clang_cc1 -DCK13 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY12 %s
+// SIMD-ONLY12-NOT: {{__kmpc|__tgt}}
 #ifdef CK13
 
 // We don't have a constant map size for VLAs.
@@ -649,13 +753,13 @@
 //  - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 (vla size)
 //  - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 (vla size)
 //  - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_IMPICIT_MAP = 547
-// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 288, i32 288, i32 547]
+// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 288, i64 288, i64 547]
 
 // CK13-LABEL: implicit_maps_variable_length_array
 void implicit_maps_variable_length_array (int a){
   double vla[2][a];
 
-  // CK13-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK13-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
   // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
@@ -714,6 +818,14 @@
 // RUN: %clang_cc1 -DCK14 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
 // RUN: %clang_cc1 -DCK14 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY13 %s
+// RUN: %clang_cc1 -DCK14 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY13 %s
+// RUN: %clang_cc1 -DCK14 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s
+// SIMD-ONLY13-NOT: {{__kmpc|__tgt}}
 #ifdef CK14
 
 // CK14-DAG: [[ST:%.+]] = type { i32, double }
@@ -722,7 +834,7 @@
 // - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_IMPLICIT_MAP = 547
 // - OMP_MAP_TO + OMP_MAP_FROM + OMP_IMPLICIT_MAP = 515
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 547, i32 515, i32 288]
+// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 547, i64 515, i64 288]
 
 class SSS {
 public:
@@ -745,7 +857,7 @@
   SSS sss(a, (double)a);
 
   // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK14-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK14-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -796,6 +908,14 @@
 // RUN: %clang_cc1 -DCK15 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
 // RUN: %clang_cc1 -DCK15 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY14 %s
+// RUN: %clang_cc1 -DCK15 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY14 %s
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY14 %s
+// RUN: %clang_cc1 -DCK15 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY14 %s
+// SIMD-ONLY14-NOT: {{__kmpc|__tgt}}
 #ifdef CK15
 
 // CK15: [[ST:%.+]] = type { i32, double, i32* }
@@ -803,13 +923,13 @@
 // Map types:
 // - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK15: [[TYPES:@.+]] = {{.+}}constant [5 x i32] [i32 547, i32 515, i32 512, i32 531, i32 288]
+// CK15: [[TYPES:@.+]] = {{.+}}constant [5 x i64] [i64 547, i64 515, i64 512, i64 531, i64 288]
 
 // CK15: [[SIZES2:@.+]] = {{.+}}constant [5 x i[[sz]]] [i{{64|32}} 4, i{{64|32}} 8, i{{64|32}} {{8|4}}, i{{64|32}} 4, i{{64|32}} 4]
 // Map types:
 // - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK15: [[TYPES2:@.+]] = {{.+}}constant [5 x i32] [i32 547, i32 515, i32 512, i32 531, i32 288]
+// CK15: [[TYPES2:@.+]] = {{.+}}constant [5 x i64] [i64 547, i64 515, i64 512, i64 531, i64 288]
 
 template<int x>
 class SSST {
@@ -844,7 +964,7 @@
   SSST<123> ssst(a, (double)a, a);
 
   // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -890,7 +1010,7 @@
   ssst.foo(456);
 
   // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -966,12 +1086,20 @@
 // RUN: %clang_cc1 -DCK16 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
 // RUN: %clang_cc1 -DCK16 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY15 %s
+// RUN: %clang_cc1 -DCK16 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY15 %s
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY15 %s
+// RUN: %clang_cc1 -DCK16 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY15 %s
+// SIMD-ONLY15-NOT: {{__kmpc|__tgt}}
 #ifdef CK16
 
 // CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types:
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 template<int y>
 int foo(int d) {
@@ -987,7 +1115,7 @@
   int i = a;
 
   // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK16-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1019,12 +1147,20 @@
 // RUN: %clang_cc1 -DCK17 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK17
 // RUN: %clang_cc1 -DCK17 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY16 %s
+// RUN: %clang_cc1 -DCK17 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY16 %s
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY16 %s
+// RUN: %clang_cc1 -DCK17 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY16 %s
+// SIMD-ONLY16-NOT: {{__kmpc|__tgt}}
 #ifdef CK17
 
 // CK17-DAG: [[ST:%.+]] = type { i32, double }
 // CK17-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}]
 // Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547
-// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 547]
+// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547]
 
 class SSS {
 public:
@@ -1036,7 +1172,7 @@
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK17-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1067,12 +1203,20 @@
 // RUN: %clang_cc1 -DCK18 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
 // RUN: %clang_cc1 -DCK18 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY17 %s
+// RUN: %clang_cc1 -DCK18 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY17 %s
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY17 %s
+// RUN: %clang_cc1 -DCK18 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY17 %s
+// SIMD-ONLY17-NOT: {{__kmpc|__tgt}}
 #ifdef CK18
 
 // CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
 // Map types:
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288
-// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288]
+// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288]
 
 template<typename T>
 int foo(T d) {
@@ -1087,7 +1231,7 @@
   int i = a;
 
   // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK18-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
   // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1119,128 +1263,136 @@
 // RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK19 --check-prefix CK19-32
 // RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK19 --check-prefix CK19-32
+
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY18 %s
+// SIMD-ONLY18-NOT: {{__kmpc|__tgt}}
 #ifdef CK19
 
 // CK19: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK19: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19: [[SIZE00n:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK19: [[MTYPE00n:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE00n:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK19: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK19: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240]
-// CK19: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK19: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240]
-// CK19: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK19: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK19: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
-// CK19: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
-// CK19: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19: [[SIZE08:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK19: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK19: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19: [[SIZE10:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240]
-// CK19: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE11:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240]
-// CK19: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19: [[SIZE12:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK19: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
-// CK19: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK19: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
-// CK19: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK19: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19: [[SIZE15:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
-// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 33]
+// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 33]
 
 // CK19: [[SIZE17:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 240]
-// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 34]
+// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 34]
 
 // CK19: [[SIZE18:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 240]
-// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35]
+// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35]
 
-// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 32]
+// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 32]
 
 // CK19: [[SIZE20:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 33]
+// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 33]
 
-// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35]
+// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35]
 
 // CK19: [[SIZE22:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35]
+// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35]
 
 // CK19: [[SIZE23:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE23:@.+]] = private {{.*}}constant [1 x i32] [i32 39]
+// CK19: [[MTYPE23:@.+]] = private {{.*}}constant [1 x i64] [i64 39]
 
 // CK19: [[SIZE24:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 480]
-// CK19: [[MTYPE24:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE24:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE25:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK19: [[MTYPE25:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE25:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE26:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 24]
-// CK19: [[MTYPE26:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE26:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE27:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK19: [[MTYPE27:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE27:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE28:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 16]
-// CK19: [[MTYPE28:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19]
+// CK19: [[MTYPE28:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19]
 
 // CK19: [[SIZE29:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19]
+// CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19]
 
-// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i32] [i32 288, i32 288, i32 288, i32 35]
+// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 288, i64 288, i64 288, i64 35]
 
 // CK19: [[SIZE31:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 40]
-// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i32] [i32 288, i32 288, i32 288, i32 35]
+// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 288, i64 288, i64 288, i64 35]
 
 // CK19: [[SIZE32:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728]
-// CK19: [[MTYPE32:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE32:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE33:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728]
-// CK19: [[MTYPE33:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE33:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE34:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728]
-// CK19: [[MTYPE34:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE34:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
-// CK19: [[MTYPE35:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE35:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19: [[SIZE36:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 208]
-// CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
-// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35]
+// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35]
 
-// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35]
+// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35]
 
-// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35]
+// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35]
 
-// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35]
+// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35]
 
 // CK19: [[SIZE41:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 208]
-// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35]
+// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35]
 
 // CK19: [[SIZE42:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 104]
-// CK19: [[MTYPE42:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19]
+// CK19: [[MTYPE42:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19]
 
-// CK19: [[MTYPE43:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK19: [[MTYPE43:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: explicit_maps_single
 void explicit_maps_single (int ii){
@@ -1248,7 +1400,7 @@
   int a = ii;
 
   // Region 00
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1269,7 +1421,7 @@
   int b = a;
 
   // Region 00n
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1291,7 +1443,7 @@
   int arra[100];
 
   // Region 01
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1309,7 +1461,7 @@
   }
 
   // Region 02
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1328,7 +1480,7 @@
   }
 
   // Region 03
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1347,7 +1499,7 @@
   }
 
   // Region 04
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1366,7 +1518,7 @@
   }
 
   // Region 05
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1385,7 +1537,7 @@
   }
 
   // Region 06
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1408,7 +1560,7 @@
   }
 
   // Region 07
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1431,7 +1583,7 @@
   }
 
   // Region 08
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1453,7 +1605,7 @@
   int *pa;
 
   // Region 09
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1471,7 +1623,7 @@
   }
 
   // Region 10
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1492,7 +1644,7 @@
   }
 
   // Region 11
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1513,7 +1665,7 @@
   }
 
   // Region 12
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1534,7 +1686,7 @@
   }
 
   // Region 13
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1559,7 +1711,7 @@
   }
 
   // Region 14
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1584,7 +1736,7 @@
   }
 
   // Region 15
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1608,7 +1760,7 @@
   int va[ii];
 
   // Region 16
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1639,7 +1791,7 @@
   }
 
   // Region 17
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1665,7 +1817,7 @@
   }
 
   // Region 18
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1691,7 +1843,7 @@
   }
 
   // Region 19
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1723,7 +1875,7 @@
   }
 
   // Region 20
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1749,7 +1901,7 @@
   }
 
   // Region 21
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1781,7 +1933,7 @@
   }
 
   // Region 22
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1808,7 +1960,7 @@
 
   // Always.
   // Region 23
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1830,7 +1982,7 @@
   int ***mptr;
 
   // Region 24
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1848,7 +2000,7 @@
   }
 
   // Region 25
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1869,7 +2021,7 @@
   }
 
   // Region 26
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1890,7 +2042,7 @@
   }
 
   // Region 27
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1911,7 +2063,7 @@
   }
 
   // Region 28
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1956,7 +2108,7 @@
   }
 
   // Region 29
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2004,7 +2156,7 @@
   double mva[23][ii][ii+5];
 
   // Region 30
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2057,7 +2209,7 @@
   }
 
   // Region 31
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
@@ -2106,7 +2258,7 @@
   double ***mptras;
 
   // Region 32
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2124,7 +2276,7 @@
   }
 
   // Region 33
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2143,7 +2295,7 @@
   }
 
   // Region 34
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2162,7 +2314,7 @@
   }
 
   // Region 35
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2187,7 +2339,7 @@
   }
 
   // Region 36
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2208,7 +2360,7 @@
   }
 
   // Region 37
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2248,7 +2400,7 @@
   }
 
   // Region 38
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2290,7 +2442,7 @@
   }
 
   // Region 39
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2332,7 +2484,7 @@
   }
 
   // Region 40
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2374,7 +2526,7 @@
   }
 
   // Region 41
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
@@ -2409,7 +2561,7 @@
   }
 
   // Region 42
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2454,7 +2606,7 @@
   }
 
   // Region 43 - the memory is not contiguous for this map - will map the whole last dimension.
-  // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}})
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2534,19 +2686,27 @@
 // RUN: %clang_cc1 -DCK20 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK20 --check-prefix CK20-32
 // RUN: %clang_cc1 -DCK20 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK20 --check-prefix CK20-32
+
+// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s
+// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s
+// RUN: %clang_cc1 -DCK20 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY19 %s
+// RUN: %clang_cc1 -DCK20 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY19 %s
+// SIMD-ONLY19-NOT: {{__kmpc|__tgt}}
 #ifdef CK20
 
 // CK20: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK20: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK20: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK20: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20]
-// CK20: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK20: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK20: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK20: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK20: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK20: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 12]
-// CK20: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK20: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK20-LABEL: explicit_maps_references_and_function_args
 void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], float *d){
@@ -2557,7 +2717,7 @@
   float *&dd = d;
 
   // Region 00
-  // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2577,7 +2737,7 @@
   }
 
   // Region 01
-  // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2598,7 +2758,7 @@
   }
 
   // Region 02
-  // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2616,7 +2776,7 @@
   }
 
   // Region 03
-  // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2650,26 +2810,34 @@
 // RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK21 --check-prefix CK21-32
 // RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK21 --check-prefix CK21-32
+
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY20 %s
+// SIMD-ONLY20-NOT: {{__kmpc|__tgt}}
 #ifdef CK21
 // CK21: [[ST:%.+]] = type { i32, i32, float* }
 
 // CK21: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK21: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK21: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK21: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 492]
-// CK21: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK21: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK21: [[SIZE02:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 500]
-// CK21: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i32] [i32 34, i32 18]
+// CK21: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 34, i64 18]
 
 // CK21: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 492]
-// CK21: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK21: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK21: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK21: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 34]
+// CK21: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK21: [[SIZE05:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] 4, i[[Z]] 4]
-// CK21: [[MTYPE05:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 3]
+// CK21: [[MTYPE05:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 3]
 
 // CK21-LABEL: explicit_maps_template_args_and_members
 
@@ -2684,7 +2852,7 @@
     T *lb;
 
     // Region 00
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2703,7 +2871,7 @@
     }
     
     // Region 01
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2724,7 +2892,7 @@
     }
     
     // Region 02
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2753,7 +2921,7 @@
     }
     
     // Region 03
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2771,7 +2939,7 @@
     }
     
     // Region 04
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2790,7 +2958,7 @@
     
     // Make sure the extra flag is passed to the second map.
     // Region 05
-    // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE05]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2839,55 +3007,63 @@
 // RUN: %clang_cc1 -DCK22 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK22 --check-prefix CK22-32
 // RUN: %clang_cc1 -DCK22 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK22 --check-prefix CK22-32
+
+// RUN: %clang_cc1 -DCK22 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY21 %s
+// RUN: %clang_cc1 -DCK22 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY21 %s
+// RUN: %clang_cc1 -DCK22 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY21 %s
+// RUN: %clang_cc1 -DCK22 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY21 %s
+// SIMD-ONLY21-NOT: {{__kmpc|__tgt}}
 #ifdef CK22
 
 // CK22-DAG: [[ST:%.+]] = type { float }
 // CK22-DAG: [[STT:%.+]] = type { i32 }
 
 // CK22: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK22: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK22: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK22: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK22: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20]
-// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK22: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE06:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK22: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE07:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK22: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE08:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK22: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20]
-// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE10:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK22: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE11:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK22: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE12:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK22: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE13:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK22: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK22: [[SIZE14:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20]
-// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 int a;
 int c[100];
@@ -2913,7 +3089,7 @@
 // CK22-LABEL: explicit_maps_globals
 int explicit_maps_globals(void){
   // Region 00
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2929,7 +3105,7 @@
   { a+=1; }
   
   // Region 01
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2945,7 +3121,7 @@
   { c[3]+=1; }
   
   // Region 02
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2961,7 +3137,7 @@
   { d[3]+=1; }
     
   // Region 03
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2977,7 +3153,7 @@
   { c[3]+=1; }
   
   // Region 04
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2996,7 +3172,7 @@
   { d[3]+=1; }
   
   // Region 05
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3012,7 +3188,7 @@
   { sa.fa+=1; }
   
   // Region 06
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3028,7 +3204,7 @@
   { sc[3].fa+=1; }
   
   // Region 07
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3044,7 +3220,7 @@
   { sd[3].fa+=1; }
   
   // Region 08
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3060,7 +3236,7 @@
   { sc[3].fa+=1; }
   
   // Region 09
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3079,7 +3255,7 @@
   { sd[3].fa+=1; }
   
   // Region 10
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3095,7 +3271,7 @@
   { sta.fa+=1; }
   
   // Region 11
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3111,7 +3287,7 @@
   { stc[3].fa+=1; }
   
   // Region 12
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3127,7 +3303,7 @@
   { std[3].fa+=1; }
   
   // Region 13
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3143,7 +3319,7 @@
   { stc[3].fa+=1; }
   
   // Region 14
-  // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3186,25 +3362,33 @@
 // RUN: %clang_cc1 -std=c++11 -DCK23 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK23 --check-prefix CK23-32
 // RUN: %clang_cc1 -std=c++11 -DCK23 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK23 --check-prefix CK23-32
+
+// RUN: %clang_cc1 -std=c++11 -DCK23 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY22 %s
+// RUN: %clang_cc1 -std=c++11 -DCK23 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -std=c++11 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY22 %s
+// RUN: %clang_cc1 -std=c++11 -DCK23 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY22 %s
+// RUN: %clang_cc1 -std=c++11 -DCK23 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -std=c++11 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY22 %s
+// SIMD-ONLY22-NOT: {{__kmpc|__tgt}}
 #ifdef CK23
 
 // CK23: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK23: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK23: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK23: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK23: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK23: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16]
-// CK23: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK23: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK23-LABEL: explicit_maps_inside_captured
 int explicit_maps_inside_captured(int a){
@@ -3216,7 +3400,7 @@
   // CK23: define {{.*}}explicit_maps_inside_captured{{.*}}
   [&](void){
     // Region 00
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3235,7 +3419,7 @@
     #pragma omp target map(a)
       { a+=1; }
     // Region 01
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3254,7 +3438,7 @@
     #pragma omp target map(b)
       { b+=1; }
     // Region 02
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3274,7 +3458,7 @@
       { c[3]+=1; }
       
     // Region 03
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3293,7 +3477,7 @@
     #pragma omp target map(d)
       { d[3]+=1; }
     // Region 04
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3314,7 +3498,7 @@
       { c[3]+=1; }
       
     // Region 05
-    // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3353,6 +3537,14 @@
 // RUN: %clang_cc1 -DCK24 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK24 --check-prefix CK24-32
 // RUN: %clang_cc1 -DCK24 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK24 --check-prefix CK24-32
+
+// RUN: %clang_cc1 -DCK24 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY23 %s
+// RUN: %clang_cc1 -DCK24 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY23 %s
+// RUN: %clang_cc1 -DCK24 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY23 %s
+// RUN: %clang_cc1 -DCK24 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY23 %s
+// SIMD-ONLY23-NOT: {{__kmpc|__tgt}}
 #ifdef CK24
 
 // CK24-DAG: [[SC:%.+]] = type { i32, [[SB:%.+]], [[SB:%.+]]*, [10 x i32] }
@@ -3379,43 +3571,43 @@
 };
 
 // CK24: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK24: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE13:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK24: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE14:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{56|48}}]
-// CK24: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE15:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK24: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE16:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20]
-// CK24: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE17:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{3560|2880}}]
-// CK24: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19]
+// CK24: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19]
 
 // CK24: [[SIZE18:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK24: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE19:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK24: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19]
+// CK24: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19]
 
 // CK24: [[SIZE20:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK24: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19]
+// CK24: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19]
 
 // CK24: [[SIZE21:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK24: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19]
+// CK24: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19]
 
 // CK24: [[SIZE22:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}]
-// CK24: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK24: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24: [[SIZE23:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}]
-// CK24: [[MTYPE23:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19]
+// CK24: [[MTYPE23:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19]
 
 // CK24: [[SIZE24:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 4]
-// CK24: [[MTYPE24:@.+]] = private {{.*}}constant [4 x i32] [i32 35, i32 19, i32 19, i32 19]
+// CK24: [[MTYPE24:@.+]] = private {{.*}}constant [4 x i64] [i64 35, i64 19, i64 19, i64 19]
 
 // CK24-LABEL: explicit_maps_struct_fields
 int explicit_maps_struct_fields(int a){
@@ -3423,7 +3615,7 @@
   SC *p;
 
 // Region 01
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3443,7 +3635,7 @@
 // Same thing but starting from a pointer.
 //
 // Region 13
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3463,7 +3655,7 @@
   { p->a++; }
   
 // Region 14
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3484,7 +3676,7 @@
   { p->a++; }
   
 // Region 15
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3506,7 +3698,7 @@
   { p->a++; }
 
 // Region 16
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE16]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE16]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE16]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE16]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3527,7 +3719,7 @@
   { p->a++; }
   
 // Region 17
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3558,7 +3750,7 @@
   { p->a++; }
 
 // Region 18
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE18]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE18]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3581,7 +3773,7 @@
   { p->a++; }
   
 // Region 19
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE19]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE19]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3616,7 +3808,7 @@
   { p->a++; }
 
 // Region 20
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3647,7 +3839,7 @@
   { p->a++; }
   
 // Region 21
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE21]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE21]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3680,7 +3872,7 @@
   { p->a++; }
 
 // Region 22
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE22]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE22]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3703,7 +3895,7 @@
   { p->a++; }
   
 // Region 23
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE23]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE23]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3737,7 +3929,7 @@
   { p->a++; }
 
 // Region 24
-// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}})
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3819,16 +4011,24 @@
 // RUN: %clang_cc1 -DCK25 -std=c++11 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK25 --check-prefix CK25-32
 // RUN: %clang_cc1 -DCK25 -std=c++11 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK25 --check-prefix CK25-32
+
+// RUN: %clang_cc1 -DCK25 -std=c++11 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY24 %s
+// RUN: %clang_cc1 -DCK25 -std=c++11 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY24 %s
+// RUN: %clang_cc1 -DCK25 -std=c++11 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY24 %s
+// RUN: %clang_cc1 -DCK25 -std=c++11 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY24 %s
+// SIMD-ONLY24-NOT: {{__kmpc|__tgt}}
 #ifdef CK25
 // CK25: [[ST:%.+]] = type { i32, float }
 // CK25: [[CA00:%.+]] = type { [[ST]]* }
 // CK25: [[CA01:%.+]] = type { i32* }
 
 // CK25: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4]
-// CK25: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK25: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK25: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK25: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33]
+// CK25: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK25-LABEL: explicit_maps_with_inner_lambda
 
@@ -3839,7 +4039,7 @@
 
   int foo(T arg) {
     // Region 00
-    // CK25-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3860,7 +4060,7 @@
     }
 
     // Region 01
-    // CK25-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3913,20 +4113,28 @@
 // RUN: %clang_cc1 -DCK26 -std=c++11 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK26 --check-prefix CK26-32
 // RUN: %clang_cc1 -DCK26 -std=c++11 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK26 --check-prefix CK26-32
+
+// RUN: %clang_cc1 -DCK26 -std=c++11 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY25 %s
+// RUN: %clang_cc1 -DCK26 -std=c++11 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY25 %s
+// RUN: %clang_cc1 -DCK26 -std=c++11 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY25 %s
+// RUN: %clang_cc1 -DCK26 -std=c++11 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY25 %s
+// SIMD-ONLY25-NOT: {{__kmpc|__tgt}}
 #ifdef CK26
 // CK26: [[ST:%.+]] = type { i32, float*, i32, float* }
 
 // CK26: [[SIZE00:@.+]] = private {{.*}}constant [2 x i[[Z:64|32]]] [i[[Z:64|32]] {{32|16}}, i[[Z:64|32]] 4]
-// CK26: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35]
+// CK26: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 
 // CK26: [[SIZE01:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4]
-// CK26: [[MTYPE01:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35]
+// CK26: [[MTYPE01:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 
 // CK26: [[SIZE02:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4]
-// CK26: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35]
+// CK26: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 
 // CK26: [[SIZE03:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4]
-// CK26: [[MTYPE03:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35]
+// CK26: [[MTYPE03:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 
 // CK26-LABEL: explicit_maps_with_private_class_members
 
@@ -3943,7 +4151,7 @@
     #pragma omp parallel firstprivate(fA,fB) private(pA,pB)
     {
       // Region 00
-      // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3970,7 +4178,7 @@
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3997,7 +4205,7 @@
       }
 
       // Region 02
-      // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4024,7 +4232,7 @@
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}})
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4097,28 +4305,36 @@
 // RUN: %clang_cc1 -DCK27 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK27 --check-prefix CK27-32
 // RUN: %clang_cc1 -DCK27 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK27 --check-prefix CK27-32
+
+// RUN: %clang_cc1 -DCK27 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY26 %s
+// RUN: %clang_cc1 -DCK27 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY26 %s
+// RUN: %clang_cc1 -DCK27 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY26 %s
+// RUN: %clang_cc1 -DCK27 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY26 %s
+// SIMD-ONLY26-NOT: {{__kmpc|__tgt}}
 #ifdef CK27
 
 // CK27: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] zeroinitializer
-// CK27: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK27: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK27: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer
-// CK27: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK27: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK27: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer
-// CK27: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK27: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK27: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer
-// CK27: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK27: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK27: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer
-// CK27: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 32]
+// CK27: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK27: [[SIZE07:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4]
-// CK27: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 288]
+// CK27: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 288]
 
 // CK27: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 40]
-// CK27: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 161]
+// CK27: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 161]
 
 // CK27-LABEL: zero_size_section_and_private_maps
 void zero_size_section_and_private_maps (int ii){
@@ -4127,7 +4343,7 @@
   int *pa;
 
   // Region 00
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4145,7 +4361,7 @@
   }
 
   // Region 01
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4166,7 +4382,7 @@
   }
 
   // Region 02
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4187,7 +4403,7 @@
   }
 
   // Region 03
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4212,7 +4428,7 @@
   int pvtArr[10];
 
   // Region 04
-  // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null)
+  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
   // CK27: call void [[CALL04:@.+]]()
   #pragma omp target private(pvtPtr)
   {
@@ -4220,7 +4436,7 @@
   }
 
   // Region 05
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4238,7 +4454,7 @@
   }
 
   // Region 06
-  // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null)
+  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
   // CK27: call void [[CALL06:@.+]]()
   #pragma omp target private(pvtScl)
   {
@@ -4246,7 +4462,7 @@
   }
 
   // Region 07
-  // CK27-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}})
   // CK27-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -4266,7 +4482,7 @@
   }
 
   // Region 08
-  // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null)
+  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
   // CK27: call void [[CALL08:@.+]]()
   #pragma omp target private(pvtArr)
   {
@@ -4274,7 +4490,7 @@
   }
 
   // Region 09
-  // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4308,20 +4524,28 @@
 // RUN: %clang_cc1 -DCK28 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK28 --check-prefix CK28-32
 // RUN: %clang_cc1 -DCK28 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK28 --check-prefix CK28-32
+
+// RUN: %clang_cc1 -DCK28 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY27 %s
+// RUN: %clang_cc1 -DCK28 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY27 %s
+// RUN: %clang_cc1 -DCK28 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY27 %s
+// RUN: %clang_cc1 -DCK28 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY27 %s
+// SIMD-ONLY27-NOT: {{__kmpc|__tgt}}
 #ifdef CK28
 
 // CK28: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] {{8|4}}]
-// CK28: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK28: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK28: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400]
-// CK28: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35]
+// CK28: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK28-LABEL: explicit_maps_pointer_references
 void explicit_maps_pointer_references (int *p){
   int *&a = p;
 
   // Region 00
-  // CK28-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4341,7 +4565,7 @@
   }
 
   // Region 01
-  // CK28-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4371,19 +4595,27 @@
 // RUN: %clang_cc1 -DCK29 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK29 --check-prefix CK29-32
 // RUN: %clang_cc1 -DCK29 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK29 --check-prefix CK29-32
+
+// RUN: %clang_cc1 -DCK29 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY28 %s
+// RUN: %clang_cc1 -DCK29 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY28 %s
+// RUN: %clang_cc1 -DCK29 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY28 %s
+// RUN: %clang_cc1 -DCK29 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY28 %s
+// SIMD-ONLY28-NOT: {{__kmpc|__tgt}}
 #ifdef CK29
 
 // CK29: [[SSA:%.+]] = type { double*, double** }
 // CK29: [[SSB:%.+]]  = type { [[SSA]]*, [[SSA]]** }
 
 // CK29: [[SIZE00:@.+]] = private {{.*}}constant [4 x i[[Z:64|32]]] [i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] 80]
-// CK29: [[MTYPE00:@.+]] = private {{.*}}constant [4 x i32] [i32 35, i32 16, i32 19, i32 19]
+// CK29: [[MTYPE00:@.+]] = private {{.*}}constant [4 x i64] [i64 35, i64 16, i64 19, i64 19]
 
 // CK29: [[SIZE01:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 80]
-// CK29: [[MTYPE01:@.+]] = private {{.*}}constant [4 x i32] [i32 32, i32 19, i32 19, i32 19]
+// CK29: [[MTYPE01:@.+]] = private {{.*}}constant [4 x i64] [i64 32, i64 19, i64 19, i64 19]
 
 // CK29: [[SIZE02:@.+]] = private {{.*}}constant [5 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 80]
-// CK29: [[MTYPE02:@.+]] = private {{.*}}constant [5 x i32] [i32 32, i32 19, i32 16, i32 19, i32 19]
+// CK29: [[MTYPE02:@.+]] = private {{.*}}constant [5 x i64] [i64 32, i64 19, i64 16, i64 19, i64 19]
 
 struct SSA{
   double *p;
@@ -4400,7 +4632,7 @@
   void foo() {
 
     // Region 00
-    // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE00]]{{.+}})
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -4447,7 +4679,7 @@
     }
 
     // Region 01
-    // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE01]]{{.+}})
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -4492,7 +4724,7 @@
     }
 
     // Region 02
-    // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 5, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 5, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[MTYPE02]]{{.+}})
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
diff --git a/test/OpenMP/target_map_messages.cpp b/test/OpenMP/target_map_messages.cpp
index f607dcf..17f3ba5 100644
--- a/test/OpenMP/target_map_messages.cpp
+++ b/test/OpenMP/target_map_messages.cpp
@@ -1,5 +1,8 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 200 %s
-// RUN: %clang_cc1 -DCCODE -verify -fopenmp -ferror-limit 200 -x c %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 200 %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCCODE -verify -fopenmp -ferror-limit 200 -x c %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 200 %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCCODE -verify -fopenmp-simd -ferror-limit 200 -x c %s -Wno-openmp-target
 #ifdef CCODE
 void foo(int arg) {
   const int n = 0;
@@ -26,7 +29,14 @@
   T d;
   float e[I];
   T *f;
+  int bf : 20;
   void func(int arg) {
+    #pragma omp target
+    {
+      a = 0.0;
+      func(arg);
+      bf = 20;
+    }
     #pragma omp target map(arg,a,d)
     {}
     #pragma omp target map(arg[2:2],a,d) // expected-error {{subscripted value is not an array or pointer}}
@@ -225,9 +235,17 @@
   {}
   #pragma omp target map(r.ArrS[0].A, t.ArrS[1].A)
   {}
-  #pragma omp target map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer derreferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  #pragma omp target map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
   {}
-  #pragma omp target map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer derreferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  #pragma omp target map(r.PtrS, r.PtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}}
+  {}
+  #pragma omp target map(r.PtrS->A, r.PtrS->B)
+  {}
+  #pragma omp target map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  {}
+  #pragma omp target map(r.RPtrS, r.RPtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}}
+  {}
+  #pragma omp target map(r.RPtrS->A, r.RPtrS->B)
   {}
   #pragma omp target map(r.S.Arr[:12])
   {}
@@ -267,8 +285,13 @@
   {}
   #pragma omp target map((p+1)->A)  // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}}
   {}
-  #pragma omp target map(u.B)  // expected-error {{mapped storage cannot be derived from a union}}
+  #pragma omp target map(u.B)  // expected-error {{mapping of union members is not allowed}}
   {}
+  #pragma omp target
+  {
+    u.B = 0;
+    r.S.foo();
+  }
 
   #pragma omp target data map(to: r.C) //expected-note {{used here}}
   {
diff --git a/test/OpenMP/target_messages.cpp b/test/OpenMP/target_messages.cpp
index 77d8084..25db026 100644
--- a/test/OpenMP/target_messages.cpp
+++ b/test/OpenMP/target_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 // RUN: not %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
 // CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
 // RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s
 // RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s
diff --git a/test/OpenMP/target_nowait_messages.cpp b/test/OpenMP/target_nowait_messages.cpp
index 7531c81..1f982dd 100644
--- a/test/OpenMP/target_nowait_messages.cpp
+++ b/test/OpenMP/target_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_ast_print.cpp b/test/OpenMP/target_parallel_ast_print.cpp
index 82c9203..1b61f15 100644
--- a/test/OpenMP/target_parallel_ast_print.cpp
+++ b/test/OpenMP/target_parallel_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -70,7 +74,7 @@
 // CHECK-NEXT: S<T> s;
 // CHECK-NEXT: T arr[C][10], arr1[C];
 // CHECK-NEXT: T i, j, a[20]
-// CHECK-NEXT: #pragma omp target parallel
+// CHECK-NEXT: #pragma omp target parallel{{$}}
 // CHECK-NEXT: h = 2;
 // CHECK-NEXT: #pragma omp target parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10])
 // CHECK-NEXT: foo()
diff --git a/test/OpenMP/target_parallel_codegen.cpp b/test/OpenMP/target_parallel_codegen.cpp
index 17318d5..0cead45 100644
--- a/test/OpenMP/target_parallel_codegen.cpp
+++ b/test/OpenMP/target_parallel_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -39,15 +57,15 @@
 // sizes.
 
 // CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
-// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
-// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288]
-// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 547, i32 288, i32 547, i32 547, i32 288, i32 288, i32 547, i32 547]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 547]
-// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 547, i32 288, i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
@@ -93,14 +111,14 @@
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
   // CHECK:       call void [[HVT0:@.+]]()
   // CHECK-NEXT:  br label %[[END]]
   // CHECK:       [[END]]
-  #pragma omp target parallel
+  #pragma omp target parallel nowait
   {
   }
 
@@ -110,7 +128,7 @@
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -134,7 +152,7 @@
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -179,14 +197,14 @@
   // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
   // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
 
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
   // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
   // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
   // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
 
-  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
-  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
-  // CHECK:       [[TRY]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
@@ -428,7 +446,7 @@
 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]])
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}})
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
 // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
 
 template<typename tx>
@@ -510,17 +528,18 @@
 // CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
 // CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
 
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
 // CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
 // CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
 
-// We capture 2 VLA sizes in this target region
-// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
-// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
-
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
 // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
 // CHECK:       [[TRY]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
@@ -583,7 +602,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -633,7 +652,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -703,7 +722,7 @@
 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}})
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
 // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
 
 
diff --git a/test/OpenMP/target_parallel_codegen_registration.cpp b/test/OpenMP/target_parallel_codegen_registration.cpp
index 2511bc4..5b2af0f 100644
--- a/test/OpenMP/target_parallel_codegen_registration.cpp
+++ b/test/OpenMP/target_parallel_codegen_registration.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target parallel codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,9 +24,22 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // Check that no target code is emmitted if no omptests flag was provided.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -63,40 +84,40 @@
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // TCHECK-NOT: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 
 // CHECK-NTARGET-NOT: private constant i8 0
 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
@@ -411,31 +432,31 @@
 
 // Check metadata is properly generated:
 // CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 // TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 #endif
diff --git a/test/OpenMP/target_parallel_codegen_registration_naming.cpp b/test/OpenMP/target_parallel_codegen_registration_naming.cpp
index 4b79402..d2b88dc 100644
--- a/test/OpenMP/target_parallel_codegen_registration_naming.cpp
+++ b/test/OpenMP/target_parallel_codegen_registration_naming.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target parallel codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_parallel_debug_codegen.cpp b/test/OpenMP/target_parallel_debug_codegen.cpp
index cd19a8f..0ecd6a9 100644
--- a/test/OpenMP/target_parallel_debug_codegen.cpp
+++ b/test/OpenMP/target_parallel_debug_codegen.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
 // expected-no-diagnostics
 
 int main() {
@@ -92,18 +92,18 @@
 // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
 // CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
 // CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
-// CHECK: call void [[NONDEBUG_WRAPPER:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
+// CHECK: call void @[[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
 
-// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
+// CHECK: define internal void @[[DEBUG_PARALLEL:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
 // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
 // CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
 // CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
 
-// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: define internal void @[[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
 // CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
 // CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)*
 // CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
-// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+// CHECK: call void @[[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
 
 // CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
 // CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
@@ -111,3 +111,16 @@
 // CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
 // CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
 
+// CHECK: !DILocalVariable(name: ".global_tid.",
+// CHECK-SAME: DIFlagArtificial
+// CHECK: !DILocalVariable(name: ".bound_tid.",
+// CHECK-SAME: DIFlagArtificial
+// CHECK: !DILocalVariable(name: "c",
+// CHECK-SAME: line: 11
+// CHECK: !DILocalVariable(name: "a",
+// CHECK-SAME: line: 9
+// CHECK: !DILocalVariable(name: "b",
+// CHECK-SAME: line: 10
+
+// CHECK-DAG: distinct !DISubprogram(name: "[[NONDEBUG_WRAPPER]]",
+// CHECK-DAG: distinct !DISubprogram(name: "[[DEBUG_PARALLEL]]",
diff --git a/test/OpenMP/target_parallel_default_messages.cpp b/test/OpenMP/target_parallel_default_messages.cpp
index cf49793..143a94c 100644
--- a/test/OpenMP/target_parallel_default_messages.cpp
+++ b/test/OpenMP/target_parallel_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_defaultmap_messages.cpp b/test/OpenMP/target_parallel_defaultmap_messages.cpp
index 49e7c30..33078a2 100644
--- a/test/OpenMP/target_parallel_defaultmap_messages.cpp
+++ b/test/OpenMP/target_parallel_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_depend_codegen.cpp b/test/OpenMP/target_parallel_depend_codegen.cpp
new file mode 100644
index 0000000..108535a
--- /dev/null
+++ b/test/OpenMP/target_parallel_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target parallel device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+  {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel if(0) firstprivate(global) depend(out:global)
+  {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_parallel_depend_messages.cpp b/test/OpenMP/target_parallel_depend_messages.cpp
index 24c69e9..746c223 100644
--- a/test/OpenMP/target_parallel_depend_messages.cpp
+++ b/test/OpenMP/target_parallel_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_device_messages.cpp b/test/OpenMP/target_parallel_device_messages.cpp
index 6c8d4c2..cc25253 100644
--- a/test/OpenMP/target_parallel_device_messages.cpp
+++ b/test/OpenMP/target_parallel_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_firstprivate_messages.cpp b/test/OpenMP/target_parallel_firstprivate_messages.cpp
index dd6825a..02186e7 100644
--- a/test/OpenMP/target_parallel_firstprivate_messages.cpp
+++ b/test/OpenMP/target_parallel_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_ast_print.cpp b/test/OpenMP/target_parallel_for_ast_print.cpp
index d5aa055..c6c46c0 100644
--- a/test/OpenMP/target_parallel_for_ast_print.cpp
+++ b/test/OpenMP/target_parallel_for_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,7 +39,7 @@
   }
 };
 
-// CHECK: #pragma omp target parallel for private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp target parallel for private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp target parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp target parallel for private(this->a) private(this->a) private(this->S::a)
 
diff --git a/test/OpenMP/target_parallel_for_codegen.cpp b/test/OpenMP/target_parallel_for_codegen.cpp
index 1b72c0a..4b6254d 100644
--- a/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/test/OpenMP/target_parallel_for_codegen.cpp
@@ -1,232 +1,831 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
-long long get_val() { return 0; }
-double *g_ptr;
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
-void simple(float *a, float *b, float *c, float *d) {
-  // CHECK: store i32 3, i32* %
-  // CHECK: icmp slt i32 %{{.+}}, 32
-  // CHECK: fmul float
-  // CHECK: fmul float
-  // CHECK: add nsw i32 %{{.+}}, 5
-  #pragma omp target parallel for device((int)*a)
-  for (int i = 3; i < 32; i += 5) {
-    a[i] = b[i] * c[i] * d[i];
-  }
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
 
-  // CHECK: call i{{.+}} @{{.+}}get_val{{.+}}()
-  // CHECK: store i32 10, i32* %
-  // CHECK: icmp sgt i32 %{{.+}}, 1
-  // CHECK: fadd float %{{.+}}, 1.000000e+00
-  // CHECK: add nsw {{.+}} %{{.+}}, 3
-  // CHECK: add nsw i32 %{{.+}}, -1
-  long long k = get_val();
-  #pragma omp target parallel for linear(k : 3) schedule(dynamic)
-  for (int i = 10; i > 1; i--) {
-    a[k]++;
-    k = k + 3;
-  }
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
 
-  // CHECK: store i32 12, i32* %
-  // CHECK: store i{{.+}} 2000, i{{.+}}* %
-  // CHECK: icmp uge i{{.+}} %{{.+}}, 600
-  // CHECK: store double 0.000000e+00,
-  // CHECK: fadd float %{{.+}}, 1.000000e+00
-  // CHECK: sub i{{.+}} %{{.+}}, 400
-  int lin = 12;
-  #pragma omp target parallel for linear(lin : get_val()), linear(g_ptr)
-  for (unsigned long long it = 2000; it >= 600; it-=400) {
-    *g_ptr++ = 0.0;
-    a[it + lin]++;
-  }
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
 
-  // CHECK: store i{{.+}} 6, i{{.+}}* %
-  // CHECK: icmp sle i{{.+}} %{{.+}}, 20
-  // CHECK: sub nsw i{{.+}} %{{.+}}, -4
-  #pragma omp target parallel for
-  for (short it = 6; it <= 20; it-=-4) {
-  }
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
 
-  // CHECK: store i8 122, i8* %
-  // CHECK: icmp sge i32 %{{.+}}, 97
-  // CHECK: add nsw i32 %{{.+}}, -1
-  #pragma omp target parallel for
-  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
-  }
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [10 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547, i64 288]
+// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
 
-  // CHECK: store i32 100, i32* %
-  // CHECK: icmp ult i32 %{{.+}}, 10
-  // CHECK: add i32 %{{.+}}, 10
-  #pragma omp target parallel for
-  for (unsigned i=100; i<10; i+=10) {
-  }
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
 
-  int A;
-  {
-  A = -1;
-  // CHECK: store i{{.+}} -10, i{{.+}}* %
-  // CHECK: icmp slt i{{.+}} %{{.+}}, 10
-  // CHECK: add nsw i{{.+}} %{{.+}}, 3
-  #pragma omp target parallel for lastprivate(A)
-  for (long long i = -10; i < 10; i += 3) {
-    A = i;
-  }
-  }
-  int R;
-  {
-  R = -1;
-  // CHECK: store i{{.+}} -10, i{{.+}}* %
-  // CHECK: icmp slt i{{.+}} %{{.+}}, 10
-  // CHECK: add nsw i{{.+}} %{{.+}}, 3
-  #pragma omp target parallel for reduction(*:R)
-  for (long long i = -10; i < 10; i += 3) {
-    R *= i;
-  }
-  }
-}
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
 
-template <class T, unsigned K> T tfoo(T a) { return a + K; }
-
-template <typename T, unsigned N>
-int templ1(T a, T *z) {
-  #pragma omp target parallel for collapse(N)
-  for (int i = 0; i < N * 2; i++) {
-    for (long long j = 0; j < (N + N + N + N); j += 2) {
-      z[i + j] = a + tfoo<T, N>(i + j);
-    }
-  }
-  return 0;
-}
-
-// Instatiation templ1<float,2>
-// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
-void inst_templ1() {
-  float a;
-  float z[100];
-  templ1<float,2> (a, z);
-}
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
 
 
-typedef int MyIdx;
-
-class IterDouble {
-  double *Ptr;
-public:
-  IterDouble operator++ () const {
-    IterDouble n;
-    n.Ptr = Ptr + 1;
-    return n;
-  }
-  bool operator < (const IterDouble &that) const {
-    return Ptr < that.Ptr;
-  }
-  double & operator *() const {
-    return *Ptr;
-  }
-  MyIdx operator - (const IterDouble &that) const {
-    return (MyIdx) (Ptr - that.Ptr);
-  }
-  IterDouble operator + (int Delta) {
-    IterDouble re;
-    re.Ptr = Ptr + Delta;
-    return re;
-  }
-
-  ///~IterDouble() {}
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
 };
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}}
-void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
-//
-// Calculate number of iterations before the loop body.
-// CHECK: invoke {{.*}}i32 @{{.*}}IterDouble{{.*}}
+// CHECK-LABEL: get_val
+long long get_val() { return 0; }
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
   #pragma omp target parallel for
-  for (IterDouble i = ia; i < ib; ++i) {
-// Call of operator+ (i, IV).
-// CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// ... loop body ...
-   *i = *ic * 0.5;
-// Float multiply and save result.
-// CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
-// CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]]
-   ++ic;
+  for (int i = 3; i < 32; i += 5) {
+#pragma omp cancel for
+#pragma omp cancellation point for
   }
-// CHECK: ret void
-}
 
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}}
-void collapsed(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
-  // k declared in the loop init below
-  short l; // inner loop counter
-//
-  #pragma omp target parallel for collapse(4)
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 2u; j < 5u; j++) //3 iterations
-      for (int k = 3; k <= 6; k++) // 4 iterations
-        for (l = 4; l < 9; ++l) // 5 iterations
-        {
-// ... loop body ...
-// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
-    float res = b[j] * c[k];
-    a[i] = res * d[l];
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}})
+  long long k = get_val();
+  #pragma omp target parallel for if(target: 0) linear(k : 3) schedule(dynamic)
+  for (int i = 10; i > 1; i--) {
+    a += 1;
   }
-// CHECK: ret void
-}
 
-extern char foo();
-extern double globalfloat;
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}}
-void widened(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  short j; // inner loop counter
-  globalfloat = 1.0;
-  int localint = 1;
-// CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]],
-  #pragma omp target parallel for collapse(2) private(globalfloat, localint)
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 0; j < foo(); j++) // foo() iterations
-  {
-// ... loop body ...
-//
-// Here we expect store into private double var, not global
-// CHECK: store double {{.+}}, double* [[GLOBALFLOAT]]
-    globalfloat = (float)j/i;
-    float res = b[j] * c[j];
-// Store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
-    a[i] = res * d[i];
-// Then there's a store into private var localint:
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]
-    localint = (int)j;
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  int lin = 12;
+  #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait
+  for (unsigned long long it = 2000; it >= 600; it-=400) {
+    aa += 1;
   }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+  // CHECK:       [[IFELSE]]
+  // CHECK:       call void [[HVT3]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[IFEND]]
+  // CHECK:       [[IFEND]]
+
+  #pragma omp target parallel for if(target: n>10)
+  for (short it = 6; it <= 20; it-=-4) {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+  // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]],
+  // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]],
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]],
+  // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]],
+  // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]],
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]],
+  // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR5:%.+]],
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR5:%.+]],
+  // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]],
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]],
+  // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
+  // CHECK-DAG:   [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]],
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
+  // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target parallel for if(target: n>20) schedule(static, a)
+  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]]()
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*))
 //
-// Here we expect store into original localint, not its privatized version.
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT]]
-  localint = (int)j;
-// CHECK: ret void
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.)
+// CHECK:       call i32 @__kmpc_cancel(%ident_t* @
+// CHECK:       call i32 @__kmpc_cancellationpoint(%ident_t* @
+// CHECK:       ret void
+// CHECK:       }
+
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       alloca i{{32|64}}{{[*]*}}, align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK-64:    [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
+// CHECK-64:    store i32 [[AA]], i32* [[AA_C]], align
+// CHECK-32:    store i32 [[AA]], i32* [[AA_CASTED]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i{{32|64}}{{[*]*}})* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i{{32|64}}{{[*]*}} %{{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       alloca i[[SZ]], align
+// CHECK:       alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK:       store i16 [[AA]], i16* [[AA_C]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i[[SZ]] {{.+}}, i[[SZ]] {{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT3]]
+// Create stack storage and store argument in there.
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[A_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
+// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
+// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
+// CHECK-DAG:   [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK-DAG:   [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[AA]], i16* [[AA_C]], align
+// CHECK-DAG:   [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
+// CHECK-DAG:   [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK:       alloca i[[SZ]]
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*, i[[SZ]])* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]], i[[SZ]] %{{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target parallel for if(target: n>40)
+  for (long long i = -10; i < 10; i += 3) {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
 }
 
-// TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
 
-// TERM_DEBUG-LABEL: parallel_simd
-void parallel_simd(float *a) {
-#pragma omp target parallel for
-  for (unsigned i = 131071; i <= 2147483647; i += 127)
-    a[i] += bar();
+  #pragma omp target parallel for if(target: n>50)
+  for (unsigned i=100; i<10; i+=10) {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
 }
-#endif // HEADER
 
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target parallel for if(target: n>60)
+    for (unsigned long long it = 2000; it >= 600; it -= 400) {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]],
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]],
+// CHECK-DAG:   store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]],
+// CHECK-DAG:   store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK:       [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+
+// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
+// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
+// CHECK-DAG:   [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK-DAG:   [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
+// CHECK-DAG:   store i8 [[CONV_AAA]], i8* [[CONV]], align
+// CHECK-DAG:   [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_codegen_registration.cpp b/test/OpenMP/target_parallel_for_codegen_registration.cpp
new file mode 100644
index 0000000..6862a61
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_codegen_registration.cpp
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target parallel for codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target parallel for
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target parallel for
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp b/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp
new file mode 100644
index 0000000..db7f9ec
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target parallel for codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target parallel for
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target parallel for
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_parallel_for_collapse_messages.cpp b/test/OpenMP/target_parallel_for_collapse_messages.cpp
index 6163b52..d8ebdda 100644
--- a/test/OpenMP/target_parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/target_parallel_for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_debug_codegen.cpp b/test/OpenMP/target_parallel_for_debug_codegen.cpp
new file mode 100644
index 0000000..0102179
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_debug_codegen.cpp
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// expected-no-diagnostics
+
+int main() {
+  /* int(*b)[a]; */
+  /* int *(**c)[a]; */
+  bool bb;
+  int a;
+  int b[10][10];
+  int c[10][10][10];
+#pragma omp target parallel for firstprivate(a, b) map(tofrom          \
+                                                       : c) map(tofrom \
+                                                                : bb) if (a)
+  for (int i = 0; i < 10; ++i) {
+    int &f = c[1][1][1];
+    int &g = a;
+    int &h = b[1][1];
+    int d = 15;
+    a = 5;
+    b[0][a] = 10;
+    c[0][0][a] = 11;
+    b[0][a] = c[0][0][a];
+    bb |= b[0][a];
+  }
+#pragma omp target parallel for firstprivate(a) map(tofrom         \
+                                                    : c, b) map(to \
+                                                                : bb)
+  for (int i = 0; i < 10; ++i) {
+    int &f = c[1][1][1];
+    int &g = a;
+    int &h = b[1][1];
+    int d = 15;
+    a = 5;
+    b[0][a] = 10;
+    c[0][0][a] = 11;
+    b[0][a] = c[0][0][a];
+    d = bb;
+  }
+#pragma omp target parallel for map(tofrom              \
+                                    : a, c, b) map(from \
+                                                   : bb)
+  for (int i = 0; i < 10; ++i) {
+    int &f = c[1][1][1];
+    int &g = a;
+    int &h = b[1][1];
+    int d = 15;
+    a = 5;
+    b[0][a] = 10;
+    c[0][0][a] = 11;
+    b[0][a] = c[0][0][a];
+    bb = b[0][a];
+  }
+  return 0;
+}
+
+// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
+
+// CHECK: define internal void [[DEBUG_PARALLEL:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+
+// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
+// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
+
+// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
+
+// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
+// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
+// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
+// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
+// CHECK: call void @[[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
+
+// CHECK: define internal void @[[DEBUG_PARALLEL:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
+// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32*
+// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]*
+
+// CHECK: define internal void @[[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)*
+// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
+// CHECK: call void @[[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}})
+// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)*
+// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)*
+// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)*
+// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}})
+
+// CHECK-DAG: distinct !DISubprogram(name: "[[NONDEBUG_WRAPPER]]",
+// CHECK-DAG: distinct !DISubprogram(name: "[[DEBUG_PARALLEL]]",
diff --git a/test/OpenMP/target_parallel_for_default_messages.cpp b/test/OpenMP/target_parallel_for_default_messages.cpp
index c1f04f4..94049cd 100644
--- a/test/OpenMP/target_parallel_for_default_messages.cpp
+++ b/test/OpenMP/target_parallel_for_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_for_defaultmap_messages.cpp b/test/OpenMP/target_parallel_for_defaultmap_messages.cpp
index 24973ed..9f97b60 100644
--- a/test/OpenMP/target_parallel_for_defaultmap_messages.cpp
+++ b/test/OpenMP/target_parallel_for_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_depend_codegen.cpp b/test/OpenMP/target_parallel_for_depend_codegen.cpp
new file mode 100644
index 0000000..4845ed4
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[IN]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel for device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target parallel for device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[IN]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel for if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_depend_messages.cpp b/test/OpenMP/target_parallel_for_depend_messages.cpp
index 1987256..d644711 100644
--- a/test/OpenMP/target_parallel_for_depend_messages.cpp
+++ b/test/OpenMP/target_parallel_for_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_device_messages.cpp b/test/OpenMP/target_parallel_for_device_messages.cpp
index 16a21ba..bacf57d 100644
--- a/test/OpenMP/target_parallel_for_device_messages.cpp
+++ b/test/OpenMP/target_parallel_for_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_firstprivate_messages.cpp b/test/OpenMP/target_parallel_for_firstprivate_messages.cpp
index 36bfe25..350bb89 100644
--- a/test/OpenMP/target_parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/target_parallel_for_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_if_messages.cpp b/test/OpenMP/target_parallel_for_if_messages.cpp
index 4acf4b8..2bc433c 100644
--- a/test/OpenMP/target_parallel_for_if_messages.cpp
+++ b/test/OpenMP/target_parallel_for_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_is_device_ptr_ast_print.cpp b/test/OpenMP/target_parallel_for_is_device_ptr_ast_print.cpp
index eaa0941..fe60010 100644
--- a/test/OpenMP/target_parallel_for_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_parallel_for_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -59,7 +63,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target parallel for is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target parallel for is_device_ptr(this->k){{$}}
 // CHECK-NEXT: for (int i = 0; i < 100; i++)
 // CHECK-NEXT: foo();
 // CHECK-NEXT: #pragma omp target parallel for is_device_ptr(this->z)
diff --git a/test/OpenMP/target_parallel_for_is_device_ptr_messages.cpp b/test/OpenMP/target_parallel_for_is_device_ptr_messages.cpp
index b3b33bf..3e675ea 100644
--- a/test/OpenMP/target_parallel_for_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_parallel_for_is_device_ptr_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
+
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_parallel_for_lastprivate_messages.cpp b/test/OpenMP/target_parallel_for_lastprivate_messages.cpp
index ee703a1..0ff2c72 100644
--- a/test/OpenMP/target_parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/target_parallel_for_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_linear_messages.cpp b/test/OpenMP/target_parallel_for_linear_messages.cpp
index 36e897d..b7224b5 100644
--- a/test/OpenMP/target_parallel_for_linear_messages.cpp
+++ b/test/OpenMP/target_parallel_for_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
 int x;
 };
diff --git a/test/OpenMP/target_parallel_for_loop_messages.cpp b/test/OpenMP/target_parallel_for_loop_messages.cpp
index d7c1891..416cbfa 100644
--- a/test/OpenMP/target_parallel_for_loop_messages.cpp
+++ b/test/OpenMP/target_parallel_for_loop_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
diff --git a/test/OpenMP/target_parallel_for_map_messages.cpp b/test/OpenMP/target_parallel_for_map_messages.cpp
index f3e1c61..f4f98df 100644
--- a/test/OpenMP/target_parallel_for_map_messages.cpp
+++ b/test/OpenMP/target_parallel_for_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_parallel_for_messages.cpp b/test/OpenMP/target_parallel_for_messages.cpp
index 173025c..6bf4ac2 100644
--- a/test/OpenMP/target_parallel_for_messages.cpp
+++ b/test/OpenMP/target_parallel_for_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_misc_messages.c b/test/OpenMP/target_parallel_for_misc_messages.c
index cfe83f1..44629e1 100644
--- a/test/OpenMP/target_parallel_for_misc_messages.c
+++ b/test/OpenMP/target_parallel_for_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target parallel for'}}
 #pragma omp target parallel for
 
diff --git a/test/OpenMP/target_parallel_for_nowait_messages.cpp b/test/OpenMP/target_parallel_for_nowait_messages.cpp
index 06d2296..2fb875b 100644
--- a/test/OpenMP/target_parallel_for_nowait_messages.cpp
+++ b/test/OpenMP/target_parallel_for_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_num_threads_messages.cpp b/test/OpenMP/target_parallel_for_num_threads_messages.cpp
index 915cfb1..b47c96b 100644
--- a/test/OpenMP/target_parallel_for_num_threads_messages.cpp
+++ b/test/OpenMP/target_parallel_for_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_ordered_messages.cpp b/test/OpenMP/target_parallel_for_ordered_messages.cpp
index 72bb6e3..002e8e9 100644
--- a/test/OpenMP/target_parallel_for_ordered_messages.cpp
+++ b/test/OpenMP/target_parallel_for_ordered_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_private_messages.cpp b/test/OpenMP/target_parallel_for_private_messages.cpp
index 1d6381a..dae4f17 100644
--- a/test/OpenMP/target_parallel_for_private_messages.cpp
+++ b/test/OpenMP/target_parallel_for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -43,7 +45,7 @@
   S5(int v) : a(v) {}
   S5 &operator=(S5 &s) {
 #pragma omp target parallel for private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
-    for (int k = 0; k < s.a; ++k)
+    for (int k = 0; k < s.a; ++k) // expected-warning {{Non-trivial type 'S5' is mapped, only trivial types are guaranteed to be mapped correctly}}
       ++s.a;
     return *this;
   }
diff --git a/test/OpenMP/target_parallel_for_proc_bind_messages.cpp b/test/OpenMP/target_parallel_for_proc_bind_messages.cpp
index eeb232a..60d5b74 100644
--- a/test/OpenMP/target_parallel_for_proc_bind_messages.cpp
+++ b/test/OpenMP/target_parallel_for_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_for_reduction_messages.cpp b/test/OpenMP/target_parallel_for_reduction_messages.cpp
index 9f1f68f..5cc40e9 100644
--- a/test/OpenMP/target_parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/target_parallel_for_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_schedule_messages.cpp b/test/OpenMP/target_parallel_for_schedule_messages.cpp
index 075e1df..544ace2 100644
--- a/test/OpenMP/target_parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/target_parallel_for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_aligned_messages.cpp b/test/OpenMP/target_parallel_for_simd_aligned_messages.cpp
index 655f906..54a6a9e 100644
--- a/test/OpenMP/target_parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/target_parallel_for_simd_ast_print.cpp b/test/OpenMP/target_parallel_for_simd_ast_print.cpp
index 82d72c3..de7fb12 100644
--- a/test/OpenMP/target_parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/target_parallel_for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,7 +39,7 @@
   }
 };
 
-// CHECK: #pragma omp target parallel for simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp target parallel for simd private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp target parallel for simd private(this->a) private(this->a)
 // CHECK: #pragma omp target parallel for simd private(this->a) private(this->a) private(this->S::a)
 
@@ -77,14 +81,14 @@
     a = 2;
 // CHECK-NEXT: for (T i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
-#pragma omp target parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered(N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h)
+#pragma omp target parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h)
   for (int i = 0; i < 2; ++i)
     for (int j = 0; j < 2; ++j)
       for (int j = 0; j < 2; ++j)
         for (int j = 0; j < 2; ++j)
           for (int j = 0; j < 2; ++j)
             foo();
-  // CHECK-NEXT: #pragma omp target parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered(N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h)
+  // CHECK-NEXT: #pragma omp target parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h)
   // CHECK-NEXT: for (int i = 0; i < 2; ++i)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
   // CHECK-NEXT: for (int j = 0; j < 2; ++j)
diff --git a/test/OpenMP/target_parallel_for_simd_codegen.cpp b/test/OpenMP/target_parallel_for_simd_codegen.cpp
index c822ad5..39e10fe 100644
--- a/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -1,232 +1,831 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
-long long get_val() { return 0; }
-double *g_ptr;
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
-void simple(float *a, float *b, float *c, float *d) {
-  // CHECK: store i32 3, i32* %
-  // CHECK: icmp slt i32 %{{.+}}, 32
-  // CHECK: fmul float
-  // CHECK: fmul float
-  // CHECK: add nsw i32 %{{.+}}, 5
-  #pragma omp target parallel for simd device((int)*a)
-  for (int i = 3; i < 32; i += 5) {
-    a[i] = b[i] * c[i] * d[i];
-  }
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
 
-  // CHECK: call i{{.+}} @{{.+}}get_val{{.+}}()
-  // CHECK: store i32 10, i32* %
-  // CHECK: icmp sgt i32 %{{.+}}, 1
-  // CHECK: fadd float %{{.+}}, 1.000000e+00
-  // CHECK: add nsw {{.+}} %{{.+}}, 3
-  // CHECK: add nsw i32 %{{.+}}, -1
-  long long k = get_val();
-  #pragma omp target parallel for simd linear(k : 3) schedule(dynamic)
-  for (int i = 10; i > 1; i--) {
-    a[k]++;
-    k = k + 3;
-  }
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
 
-  // CHECK: store i32 12, i32* %
-  // CHECK: store i{{.+}} 2000, i{{.+}}* %
-  // CHECK: icmp uge i{{.+}} %{{.+}}, 600
-  // CHECK: store double 0.000000e+00,
-  // CHECK: fadd float %{{.+}}, 1.000000e+00
-  // CHECK: sub i{{.+}} %{{.+}}, 400
-  int lin = 12;
-  #pragma omp target parallel for simd linear(lin : get_val()), linear(g_ptr)
-  for (unsigned long long it = 2000; it >= 600; it-=400) {
-    *g_ptr++ = 0.0;
-    a[it + lin]++;
-  }
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
 
-  // CHECK: store i{{.+}} 6, i{{.+}}* %
-  // CHECK: icmp sle i{{.+}} %{{.+}}, 20
-  // CHECK: sub nsw i{{.+}} %{{.+}}, -4
-  #pragma omp target parallel for simd
-  for (short it = 6; it <= 20; it-=-4) {
-  }
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [10 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547, i64 288]
+// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
 
-  // CHECK: store i8 122, i8* %
-  // CHECK: icmp sge i32 %{{.+}}, 97
-  // CHECK: add nsw i32 %{{.+}}, -1
-  #pragma omp target parallel for simd
-  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
-  }
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
 
-  // CHECK: store i32 100, i32* %
-  // CHECK: icmp ult i32 %{{.+}}, 10
-  // CHECK: add i32 %{{.+}}, 10
-  #pragma omp target parallel for simd
-  for (unsigned i=100; i<10; i+=10) {
-  }
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
 
-  int A;
-  {
-  A = -1;
-  // CHECK: store i{{.+}} -10, i{{.+}}* %
-  // CHECK: icmp slt i{{.+}} %{{.+}}, 10
-  // CHECK: add nsw i{{.+}} %{{.+}}, 3
-  #pragma omp target parallel for simd lastprivate(A)
-  for (long long i = -10; i < 10; i += 3) {
-    A = i;
-  }
-  }
-  int R;
-  {
-  R = -1;
-  // CHECK: store i{{.+}} -10, i{{.+}}* %
-  // CHECK: icmp slt i{{.+}} %{{.+}}, 10
-  // CHECK: add nsw i{{.+}} %{{.+}}, 3
-  #pragma omp target parallel for simd reduction(*:R)
-  for (long long i = -10; i < 10; i += 3) {
-    R *= i;
-  }
-  }
-}
-
-template <class T, unsigned K> T tfoo(T a) { return a + K; }
-
-template <typename T, unsigned N>
-int templ1(T a, T *z) {
-  #pragma omp target parallel for simd collapse(N)
-  for (int i = 0; i < N * 2; i++) {
-    for (long long j = 0; j < (N + N + N + N); j += 2) {
-      z[i + j] = a + tfoo<T, N>(i + j);
-    }
-  }
-  return 0;
-}
-
-// Instatiation templ1<float,2>
-// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
-void inst_templ1() {
-  float a;
-  float z[100];
-  templ1<float,2> (a, z);
-}
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
 
 
-typedef int MyIdx;
-
-class IterDouble {
-  double *Ptr;
-public:
-  IterDouble operator++ () const {
-    IterDouble n;
-    n.Ptr = Ptr + 1;
-    return n;
-  }
-  bool operator < (const IterDouble &that) const {
-    return Ptr < that.Ptr;
-  }
-  double & operator *() const {
-    return *Ptr;
-  }
-  MyIdx operator - (const IterDouble &that) const {
-    return (MyIdx) (Ptr - that.Ptr);
-  }
-  IterDouble operator + (int Delta) {
-    IterDouble re;
-    re.Ptr = Ptr + Delta;
-    return re;
-  }
-
-  ///~IterDouble() {}
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
 };
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}}
-void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
-//
-// Calculate number of iterations before the loop body.
-// CHECK: invoke {{.*}}i32 @{{.*}}IterDouble{{.*}}
-  #pragma omp target parallel for simd
-  for (IterDouble i = ia; i < ib; ++i) {
-// Call of operator+ (i, IV).
-// CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// ... loop body ...
-   *i = *ic * 0.5;
-// Float multiply and save result.
-// CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
-// CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]]
-   ++ic;
+// CHECK-LABEL: get_val
+long long get_val() { return 0; }
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target parallel for simd nowait
+  for (int i = 3; i < 32; i += 5) {
   }
-// CHECK: ret void
-}
 
-
-// CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}}
-void collapsed(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
-  // k declared in the loop init below
-  short l; // inner loop counter
-//
-  #pragma omp target parallel for simd collapse(4)
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 2u; j < 5u; j++) //3 iterations
-      for (int k = 3; k <= 6; k++) // 4 iterations
-        for (l = 4; l < 9; ++l) // 5 iterations
-        {
-// ... loop body ...
-// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
-    float res = b[j] * c[k];
-    a[i] = res * d[l];
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}})
+  long long k = get_val();
+  #pragma omp target parallel for simd if(target: 0) linear(k : 3) schedule(dynamic)
+  for (int i = 10; i > 1; i--) {
+    a += 1;
   }
-// CHECK: ret void
-}
 
-extern char foo();
-extern double globalfloat;
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
 
-// CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}}
-void widened(float *a, float *b, float *c, float *d) {
-  int i; // outer loop counter
-  short j; // inner loop counter
-  globalfloat = 1.0;
-  int localint = 1;
-// CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]],
-  #pragma omp target parallel for simd collapse(2) private(globalfloat, localint)
-  for (i = 1; i < 3; i++) // 2 iterations
-    for (j = 0; j < foo(); j++) // foo() iterations
-  {
-// ... loop body ...
-//
-// Here we expect store into private double var, not global
-// CHECK: store double {{.+}}, double* [[GLOBALFLOAT]]
-    globalfloat = (float)j/i;
-    float res = b[j] * c[j];
-// Store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
-    a[i] = res * d[i];
-// Then there's a store into private var localint:
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]
-    localint = (int)j;
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  int lin = 12;
+  #pragma omp target parallel for simd if(target: 1) linear(lin, a : get_val())
+  for (unsigned long long it = 2000; it >= 600; it-=400) {
+    aa += 1;
   }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+  // CHECK:       [[IFELSE]]
+  // CHECK:       call void [[HVT3]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[IFEND]]
+  // CHECK:       [[IFEND]]
+
+  #pragma omp target parallel for simd if(target: n>10)
+  for (short it = 6; it <= 20; it-=-4) {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+  // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]],
+  // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]],
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]],
+  // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]],
+  // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]],
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]],
+  // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR5:%.+]],
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR5:%.+]],
+  // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]],
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]],
+  // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
+  // CHECK-DAG:   [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]],
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
+  // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target parallel for simd if(target: n>20) schedule(static, a)
+  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]]()
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*))
 //
-// Here we expect store into original localint, not its privatized version.
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT]]
-  localint = (int)j;
-// CHECK: ret void
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.)
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       alloca i{{32|64}}{{[*]*}}, align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK-64:    [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
+// CHECK-64:    store i32 [[AA]], i32* [[AA_C]], align
+// CHECK-32:    store i32 [[AA]], i32* [[AA_CASTED]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i{{32|64}}{{[*]*}})* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i{{32|64}}{{[*]*}} %{{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK:       !llvm.mem.parallel_loop_access
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       alloca i[[SZ]], align
+// CHECK:       alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK:       store i16 [[AA]], i16* [[AA_C]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i[[SZ]] {{.+}}, i[[SZ]] {{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT3]]
+// Create stack storage and store argument in there.
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[A_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
+// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
+// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
+// CHECK-DAG:   [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK-DAG:   [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[AA]], i16* [[AA_C]], align
+// CHECK-DAG:   [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
+// CHECK-DAG:   [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK:       alloca i[[SZ]]
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*, i[[SZ]])* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]], i[[SZ]] %{{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target parallel for simd if(target: n>40)
+  for (long long i = -10; i < 10; i += 3) {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
 }
 
-// TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
 
-// TERM_DEBUG-LABEL: parallel_simd
-void parallel_simd(float *a) {
-#pragma omp target parallel for simd
-  for (unsigned i = 131071; i <= 2147483647; i += 127)
-    a[i] += bar();
+  #pragma omp target parallel for simd if(target: n>50)
+  for (unsigned i=100; i<10; i+=10) {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
 }
-#endif // HEADER
 
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target parallel for simd if(target: n>60)
+    for (unsigned long long it = 2000; it >= 600; it -= 400) {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]],
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]],
+// CHECK-DAG:   store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]],
+// CHECK-DAG:   store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK:       [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+
+// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
+// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
+// CHECK-DAG:   [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK-DAG:   [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
+// CHECK-DAG:   store i8 [[CONV_AAA]], i8* [[CONV]], align
+// CHECK-DAG:   [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp b/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp
new file mode 100644
index 0000000..5a29222
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for simd if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp b/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp
new file mode 100644
index 0000000..6c8e06a
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target parallel for simd
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp b/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp
index ecf2d6e..f3f2aa9 100644
--- a/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_default_messages.cpp b/test/OpenMP/target_parallel_for_simd_default_messages.cpp
index 5d41cbc..b67bfc5 100644
--- a/test/OpenMP/target_parallel_for_simd_default_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_for_simd_defaultmap_messages.cpp b/test/OpenMP/target_parallel_for_simd_defaultmap_messages.cpp
index e922e0a..c019c37 100644
--- a/test/OpenMP/target_parallel_for_simd_defaultmap_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp b/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
new file mode 100644
index 0000000..da51699
--- /dev/null
+++ b/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[IN]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel for simd device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target parallel for simd device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[IN]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[IN]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target parallel for simd if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_parallel_for_simd_depend_messages.cpp b/test/OpenMP/target_parallel_for_simd_depend_messages.cpp
index 4375941..5a735fc 100644
--- a/test/OpenMP/target_parallel_for_simd_depend_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_device_messages.cpp b/test/OpenMP/target_parallel_for_simd_device_messages.cpp
index 2c9d43f..8d27195 100644
--- a/test/OpenMP/target_parallel_for_simd_device_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp
index 9397314..8a32c6f 100644
--- a/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -125,11 +127,11 @@
     foo();
 #pragma omp parallel private(i)
 #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
-  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
 #pragma omp parallel reduction(+ : i)
 #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
-  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
   return 0;
 }
@@ -219,7 +221,7 @@
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
-  for (i = 0; i < argc; ++i)    // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+  for (i = 0; i < argc; ++i)    // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
 #pragma omp parallel shared(xa)
 #pragma omp target parallel for simd firstprivate(xa) // OK: may be firstprivate
@@ -246,11 +248,11 @@
   }
 #pragma omp parallel private(i)
 #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
-  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
 #pragma omp parallel reduction(+ : i)
 #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
-  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
   static int si;
 #pragma omp target parallel for simd firstprivate(si) // OK
diff --git a/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/test/OpenMP/target_parallel_for_simd_if_messages.cpp
index 640e24c..fea1ad9 100644
--- a/test/OpenMP/target_parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_is_device_ptr_ast_print.cpp b/test/OpenMP/target_parallel_for_simd_is_device_ptr_ast_print.cpp
index af98f99..feeaa1e 100644
--- a/test/OpenMP/target_parallel_for_simd_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_parallel_for_simd_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -56,7 +60,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target parallel for simd is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target parallel for simd is_device_ptr(this->k){{$}}
 // CHECK-NEXT: for (int i = 0; i < 100; i++)
 // CHECK-NEXT: ;
 // CHECK-NEXT: #pragma omp target parallel for simd is_device_ptr(this->z)
diff --git a/test/OpenMP/target_parallel_for_simd_is_device_ptr_messages.cpp b/test/OpenMP/target_parallel_for_simd_is_device_ptr_messages.cpp
index 435b9a9..1079bf3 100644
--- a/test/OpenMP/target_parallel_for_simd_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp
index b41dc87..7c2c23f 100644
--- a/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -209,6 +211,8 @@
 #pragma omp target parallel for simd private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}}
   for (i = 0; i < argc; ++i)
     foo();
+// expected-note@+2 {{defined as lastprivate}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be lastprivate, predetermined as linear}}
 #pragma omp target parallel for simd lastprivate(i)
   for (i = 0; i < argc; ++i)
     foo();
diff --git a/test/OpenMP/target_parallel_for_simd_linear_messages.cpp b/test/OpenMP/target_parallel_for_simd_linear_messages.cpp
index e17f155..40fd052 100644
--- a/test/OpenMP/target_parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
 int x;
 };
diff --git a/test/OpenMP/target_parallel_for_simd_loop_messages.cpp b/test/OpenMP/target_parallel_for_simd_loop_messages.cpp
index ed184a5..fabf389 100644
--- a/test/OpenMP/target_parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_loop_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
@@ -238,27 +240,29 @@
     c[ii] = a[ii];
 
 // expected-note@+2  {{defined as firstprivate}}
-// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}}
 #pragma omp target parallel for simd firstprivate(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
-// expected-note@+2  {{defined as linear}}
-// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be linear, predetermined as private}}
 #pragma omp target parallel for simd linear(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
+// expected-note@+2  {{defined as private}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be private, predetermined as linear}}
 #pragma omp target parallel for simd private(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
+// expected-note@+2  {{defined as lastprivate}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be lastprivate, predetermined as linear}}
 #pragma omp target parallel for simd lastprivate(ii)
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
   {
-// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be threadprivate or thread local, predetermined as private}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be threadprivate or thread local, predetermined as linear}}
 #pragma omp target parallel for simd
     for (sii = 0; sii < 10; sii += 1)
       c[sii] = a[sii];
@@ -585,15 +589,15 @@
 #pragma omp target parallel for simd
   for (int i = 0; i < 10; i++) {
     c[i] = a[i] + b[i];
-    try {
+    try { // expected-error {{'try' statement cannot be used in OpenMP simd region}}
       for (int j = 0; j < 10; ++j) {
         if (a[i] > b[j])
-          throw a[i];
+          throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
       }
-      throw a[i];
+      throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
     } catch (float f) {
       if (f > 0.1)
-        throw a[i];
+        throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
       return; // expected-error {{cannot return from OpenMP region}}
     }
     switch (i) {
@@ -605,7 +609,7 @@
     }
     for (int j = 0; j < 10; j++) {
       if (c[i] > 10)
-        throw c[i];
+        throw c[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
     }
   }
   if (c[9] > 10)
diff --git a/test/OpenMP/target_parallel_for_simd_map_messages.cpp b/test/OpenMP/target_parallel_for_simd_map_messages.cpp
index 7e95e10..1cab598 100644
--- a/test/OpenMP/target_parallel_for_simd_map_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_parallel_for_simd_messages.cpp b/test/OpenMP/target_parallel_for_simd_messages.cpp
index 0e1a0fe..c0e0328 100644
--- a/test/OpenMP/target_parallel_for_simd_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_misc_messages.c b/test/OpenMP/target_parallel_for_simd_misc_messages.c
index 2adc6f8..8886ddd 100644
--- a/test/OpenMP/target_parallel_for_simd_misc_messages.c
+++ b/test/OpenMP/target_parallel_for_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target parallel for simd'}}
 #pragma omp target parallel for simd
 
@@ -166,7 +168,7 @@
 // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
     for (int j = 0; j < 16; ++j)
 // expected-error@+2 2 {{reduction variable must be shared}}
-// expected-error@+1 {{region cannot be closely nested inside 'target parallel for simd' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}}
+// expected-error@+1 {{OpenMP constructs may not be nested inside a simd region}}
 #pragma omp for reduction(+ : i, j)
       for (int k = 0; k < 16; ++k)
         i += j;
diff --git a/test/OpenMP/target_parallel_for_simd_nowait_messages.cpp b/test/OpenMP/target_parallel_for_simd_nowait_messages.cpp
index 3c4b512..5e1e131 100644
--- a/test/OpenMP/target_parallel_for_simd_nowait_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/target_parallel_for_simd_num_threads_messages.cpp
index 1076b86..b3323f6 100644
--- a/test/OpenMP/target_parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp b/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp
index 70a3b4e..3e3f6c2 100644
--- a/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp
@@ -2,11 +2,15 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
 #if __cplusplus >= 201103L
- // expected-note@+2 4 {{declared here}}
+ // expected-note@+2 2 {{declared here}}
 #endif
 bool foobool(int argc) {
   return argc;
@@ -15,7 +19,7 @@
 struct S1; // expected-note {{declared here}}
 
 template <class T, typename S, int N, int ST> // expected-note {{declared here}}
-T tmain(T argc, S **argv) {                   //expected-note 2 {{declared here}}
+T tmain(T argc, S **argv) {
   int j; // expected-note {{declared here}}
 #pragma omp target parallel for simd ordered
   for (int i = ST; i < N; i++)
@@ -26,28 +30,26 @@
 #pragma omp target parallel for simd ordered() // expected-error {{expected expression}}
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
-// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
-// expected-error@+2 2 {{expression is not an integral constant expression}}
-// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+// expected-error@+2 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
 #pragma omp target parallel for simd ordered(argc
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
-// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
 #pragma omp target parallel for simd ordered(ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
 #pragma omp target parallel for simd ordered(1)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}}
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
-#pragma omp target parallel for simd ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered((ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++)
-    argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target parallel for simd', but found only 1}}
-#if __cplusplus >= 201103L
-  // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
-#endif
-// expected-error@+3 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'ordered' clause}}
-// expected-error@+2 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
-// expected-error@+1 2 {{expression is not an integral constant expression}}
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+3 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+// expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'ordered' clause}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
 #pragma omp target parallel for simd ordered(foobool(argc)), ordered(true), ordered(-5)
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
@@ -59,14 +61,17 @@
 #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
 #pragma omp target parallel for simd ordered(1)
   for (int i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
-#pragma omp target parallel for simd ordered(N) // expected-error {{argument to 'ordered' clause must be a strictly positive integer value}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered(N)
   for (T i = ST; i < N; i++)
     argv[0][i] = argv[0][i] - argv[0][i - ST];
-#pragma omp target parallel for simd ordered(2) // expected-note {{as specified in 'ordered' clause}}
-  foo();                            // expected-error {{expected 2 for loops after '#pragma omp target parallel for simd'}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered(2)
+  foo();
   return argc;
 }
 
@@ -81,12 +86,14 @@
 #pragma omp target parallel for simd ordered() // expected-error {{expected expression}}
   for (int i = 4; i < 12; i++)
     argv[0][i] = argv[0][i] - argv[0][i - 4];
-#pragma omp target parallel for simd ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'ordered' clause}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++)
-    argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for simd', but found only 1}}
-#pragma omp target parallel for simd ordered(2 + 2))      // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}}  expected-note {{as specified in 'ordered' clause}}
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered(2 + 2))      // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}}
   for (int i = 4; i < 12; i++)
-    argv[0][i] = argv[0][i] - argv[0][i - 4];            // expected-error {{expected 4 for loops after '#pragma omp target parallel for simd', but found only 1}}
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
 #if __cplusplus >= 201103L
   // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
 #endif
@@ -110,13 +117,12 @@
 #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++)
     argv[0][i] = argv[0][i] - argv[0][i - 4];
-// expected-error@+3 {{statement after '#pragma omp target parallel for simd' must be a for loop}}
-// expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+// expected-error@+2 {{statement after '#pragma omp target parallel for simd' must be a for loop}}
 #pragma omp target parallel for simd ordered(ordered(tmain < int, char, -1, -2 > (argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   foo();
-#pragma omp target parallel for simd ordered(2) // expected-note {{as specified in 'ordered' clause}}
-  foo();                            // expected-error {{expected 2 for loops after '#pragma omp target parallel for simd'}}
-  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}}
+#pragma omp target parallel for simd ordered(2)
+  foo();
   return tmain<int, char, 1, 0>(argc, argv);
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_private_messages.cpp b/test/OpenMP/target_parallel_for_simd_private_messages.cpp
index 57262a5..51f1d9b 100644
--- a/test/OpenMP/target_parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -43,7 +45,7 @@
   S5(int v) : a(v) {}
   S5 &operator=(S5 &s) {
 #pragma omp target parallel for simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
-    for (int k = 0; k < s.a; ++k)
+    for (int k = 0; k < s.a; ++k) // expected-warning {{Non-trivial type 'S5' is mapped, only trivial types are guaranteed to be mapped correctly}}
       ++s.a;
     return *this;
   }
diff --git a/test/OpenMP/target_parallel_for_simd_proc_bind_messages.cpp b/test/OpenMP/target_parallel_for_simd_proc_bind_messages.cpp
index 5bb6d92..83b4efa 100644
--- a/test/OpenMP/target_parallel_for_simd_proc_bind_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
index 72e2130..30aaf83 100644
--- a/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp b/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp
index f990101..74b1a73 100644
--- a/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_schedule_messages.cpp b/test/OpenMP/target_parallel_for_simd_schedule_messages.cpp
index f0d86e9..6f26e7a 100644
--- a/test/OpenMP/target_parallel_for_simd_schedule_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp
index e51e67b..f8b375e 100644
--- a/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/target_parallel_for_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_if_codegen.cpp b/test/OpenMP/target_parallel_if_codegen.cpp
index 215d8f9..04c052a 100644
--- a/test/OpenMP/target_parallel_if_codegen.cpp
+++ b/test/OpenMP/target_parallel_if_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -145,7 +163,7 @@
 // CHECK:       store i8 [[FB]], i8* [[CONV]], align
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -171,7 +189,7 @@
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -203,7 +221,7 @@
 // CHECK:       br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -224,7 +242,7 @@
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -245,7 +263,7 @@
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -257,7 +275,7 @@
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/test/OpenMP/target_parallel_if_messages.cpp b/test/OpenMP/target_parallel_if_messages.cpp
index 06b7a59..d59b4ab 100644
--- a/test/OpenMP/target_parallel_if_messages.cpp
+++ b/test/OpenMP/target_parallel_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_is_device_ptr_ast_print.cpp b/test/OpenMP/target_parallel_is_device_ptr_ast_print.cpp
index 7fbddd8..16cc188 100644
--- a/test/OpenMP/target_parallel_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_parallel_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -50,7 +54,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target parallel is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target parallel is_device_ptr(this->k){{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: }
 // CHECK-NEXT: #pragma omp target parallel is_device_ptr(this->z)
diff --git a/test/OpenMP/target_parallel_is_device_ptr_messages.cpp b/test/OpenMP/target_parallel_is_device_ptr_messages.cpp
index d9c3ee9..b46fd71 100644
--- a/test/OpenMP/target_parallel_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_parallel_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp -ferror-limit 200 %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd -ferror-limit 200 %s
 struct ST {
   int *a;
 };
@@ -38,7 +40,7 @@
 #pragma omp target parallel is_device_ptr(aa) // OK
     {}
 #pragma omp target parallel is_device_ptr(raa) // OK
-    {}    
+    {}
 #pragma omp target parallel is_device_ptr(e) // expected-error{{expected pointer, array, reference to pointer, or reference to array in 'is_device_ptr clause'}}
     {}
 #pragma omp target parallel is_device_ptr(g) // OK
diff --git a/test/OpenMP/target_parallel_map_messages.cpp b/test/OpenMP/target_parallel_map_messages.cpp
index d3cc742..4bb14ea 100644
--- a/test/OpenMP/target_parallel_map_messages.cpp
+++ b/test/OpenMP/target_parallel_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_parallel_messages.cpp b/test/OpenMP/target_parallel_messages.cpp
index b6763d8..a7dfbf9 100644
--- a/test/OpenMP/target_parallel_messages.cpp
+++ b/test/OpenMP/target_parallel_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 // RUN: not %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
 // CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
 
 void foo() {
diff --git a/test/OpenMP/target_parallel_nowait_messages.cpp b/test/OpenMP/target_parallel_nowait_messages.cpp
index 91e26f2..8da03e4 100644
--- a/test/OpenMP/target_parallel_nowait_messages.cpp
+++ b/test/OpenMP/target_parallel_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_num_threads_codegen.cpp b/test/OpenMP/target_parallel_num_threads_codegen.cpp
index e2bddae..cc3f0d8 100644
--- a/test/OpenMP/target_parallel_num_threads_codegen.cpp
+++ b/test/OpenMP/target_parallel_num_threads_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -147,7 +165,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -158,7 +176,7 @@
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -186,7 +204,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -207,7 +225,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -225,7 +243,7 @@
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -247,7 +265,7 @@
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/test/OpenMP/target_parallel_num_threads_messages.cpp b/test/OpenMP/target_parallel_num_threads_messages.cpp
index 95797ca..efc1d13 100644
--- a/test/OpenMP/target_parallel_num_threads_messages.cpp
+++ b/test/OpenMP/target_parallel_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_private_messages.cpp b/test/OpenMP/target_parallel_private_messages.cpp
index fabd37d..34652d6 100644
--- a/test/OpenMP/target_parallel_private_messages.cpp
+++ b/test/OpenMP/target_parallel_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_proc_bind_messages.cpp b/test/OpenMP/target_parallel_proc_bind_messages.cpp
index 56292ad..a01b63d 100644
--- a/test/OpenMP/target_parallel_proc_bind_messages.cpp
+++ b/test/OpenMP/target_parallel_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_parallel_reduction_messages.cpp b/test/OpenMP/target_parallel_reduction_messages.cpp
index 038f099..6ea0884 100644
--- a/test/OpenMP/target_parallel_reduction_messages.cpp
+++ b/test/OpenMP/target_parallel_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_parallel_shared_messages.cpp b/test/OpenMP/target_parallel_shared_messages.cpp
index 302a092..2ade722 100644
--- a/test/OpenMP/target_parallel_shared_messages.cpp
+++ b/test/OpenMP/target_parallel_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_private_codegen.cpp b/test/OpenMP/target_private_codegen.cpp
index 5c738ee..87564a3 100644
--- a/test/OpenMP/target_private_codegen.cpp
+++ b/test/OpenMP/target_private_codegen.cpp
@@ -9,6 +9,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_private_messages.cpp b/test/OpenMP/target_private_messages.cpp
index a093a87..49ee3fe 100644
--- a/test/OpenMP/target_private_messages.cpp
+++ b/test/OpenMP/target_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
 extern S1 a;
 class S2 {
@@ -36,7 +38,7 @@
   S5(int v) : a(v) {}
   S5 &operator=(S5 &s) {
 #pragma omp target private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
-    for (int k = 0; k < s.a; ++k)
+    for (int k = 0; k < s.a; ++k) // expected-warning {{Non-trivial type 'S5' is mapped, only trivial types are guaranteed to be mapped correctly}}
       ++s.a;
     return *this;
   }
diff --git a/test/OpenMP/target_reduction_codegen.cpp b/test/OpenMP/target_reduction_codegen.cpp
new file mode 100644
index 0000000..bc4ecd9
--- /dev/null
+++ b/test/OpenMP/target_reduction_codegen.cpp
@@ -0,0 +1,225 @@
+// Only test codegen on target side, as private clause does not require any action on the host side
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+  TT<tx, ty> operator*(const TT<tx, ty> &) { return *this; }
+};
+
+// TCHECK: [[S1:%.+]] = type { double }
+
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  #pragma omp target reduction(*:a)
+  {
+  }
+
+  // TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: load i32*, i32** [[A]],
+  // TCHECK: ret void
+
+#pragma omp target reduction(+:a)
+  {
+    a = 1;
+  }
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
+  // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: [[REF:%.+]] = load i32*, i32** [[A]],
+  // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[REF]],
+  // TCHECK: ret void
+
+  #pragma omp target reduction(-:a, aa)
+  {
+    a = 1;
+    aa = 1;
+  }
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]])
+  // TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK:  [[AA:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store {{.+}}, {{.+}} [[A]],
+  // TCHECK: store {{.+}}, {{.+}} [[AA]],
+  // TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+  // TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+  // TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+  // TCHECK:  ret void
+
+  return a;
+}
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+#pragma omp target reduction(+:a,aa,b)
+  {
+    a = 1;
+    aa = 1;
+    b[2] = 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target reduction(-:a,aa,aaa,b)
+  {
+    a = 1;
+    aa = 1;
+    aaa = 1;
+    b[2] = 1;
+  }
+
+  return a;
+}
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK:  [[A:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[A2:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[A3:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK:  [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK: store {{.+}}, {{.+}} [[A]],
+// TCHECK: store {{.+}}, {{.+}} [[A2]],
+// TCHECK: store {{.+}}, {{.+}} [[A3]],
+// TCHECK: store {{.+}}, {{.+}} [[B]],
+// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+// TCHECK: [[A3_REF:%.+]] = load i8*, i8** [[A3]],
+// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A3_REF]],
+// TCHECK:  [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// TCHECK:  store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
+// TCHECK:  ret void
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+#pragma omp target reduction(max:b,c)
+    {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+
+  // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i16*{{.+}})
+  // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
+  // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]],
+  // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[B_ADDR]],
+  // TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[C_ADDR]],
+  // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]],
+  // TCHECK: [[B_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[B_ADDR]],
+  // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK: [[C_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]],
+
+  // this->a = (double)b + 1.5;
+  // TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_REF]],
+  // TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double
+  // TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00
+  // TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // TCHECK: store double [[NEW_A_VAL]], double* [[A_FIELD]],
+
+  // c[1][1] = ++a;
+  // TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // TCHECK: [[A_FIELD4_VAL:%.+]] = load double, double* [[A_FIELD4]],
+  // TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00
+  // TCHECK: store double [[A_FIELD_INC]], double* [[A_FIELD4]],
+  // TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}}
+  // TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]]
+  // TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_REF]], i{{[0-9]+}} [[C_IND]]
+  // TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_1_REF]], i{{[0-9]+}} 1
+  // TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], i{{[0-9]+}}* [[C_1_1_REF]],
+
+  // finish
+  // TCHECK: ret void
+};
+
+
+int bar(int n){
+  int a = 0;
+  a += foo(n);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// template
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}})
+// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*,
+// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK: store {{.+}}, {{.+}} [[A]],
+// TCHECK: store {{.+}}, {{.+}} [[A2]],
+// TCHECK: store {{.+}}, {{.+}} [[B]],
+// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
+// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
+// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
+// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
+// TCHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_reduction_messages.cpp b/test/OpenMP/target_reduction_messages.cpp
new file mode 100644
index 0000000..ff936b8
--- /dev/null
+++ b/test/OpenMP/target_reduction_messages.cpp
@@ -0,0 +1,266 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+void foobar(int &ref) {
+#pragma omp target reduction(+:ref)
+  foo();
+}
+
+struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+  S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
+  static const float S2sc; // expected-note 2 {{'S2sc' declared here}}
+};
+const float S2::S2sc = 0;
+S2 b;                     // expected-note 3 {{'b' defined here}}
+const S2 ba[5];           // expected-note 2 {{'ba' defined here}}
+class S3 {
+  int a;
+
+public:
+  int b;
+  S3() : a(0) {}
+  S3(const S3 &s3) : a(s3.a) {}
+  S3 operator+(const S3 &arg1) { return arg1; }
+};
+int operator+(const S3 &arg1, const S3 &arg2) { return 5; }
+S3 c;               // expected-note 3 {{'c' defined here}}
+const S3 ca[5];     // expected-note 2 {{'ca' defined here}}
+extern const int f; // expected-note 4 {{'f' declared here}}
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+  S4(const S4 &s4);
+  S4 &operator+(const S4 &arg) { return (*this); }
+
+public:
+  S4(int v) : a(v) {}
+};
+S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; }
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {}
+  S5 &operator+(const S5 &arg);
+
+public:
+  S5(int v) : a(v) {}
+};
+class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
+  int a;
+
+public:
+  S6() : a(6) {}
+  operator int() { return 6; }
+} o;
+
+S3 h, k;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class T>       // expected-note {{declared here}}
+T tmain(T argc) {
+  const T d = T();       // expected-note 4 {{'d' defined here}}
+  const T da[5] = {T()}; // expected-note 2 {{'da' defined here}}
+  T qa[5] = {T()};
+  T i;
+  T &j = i;                    // expected-note 4 {{'j' defined here}}
+  S3 &p = k;                   // expected-note 2 {{'p' defined here}}
+  const T &r = da[(int)i];     // expected-note 2 {{'r' defined here}}
+  T &q = qa[(int)i];           // expected-note 2 {{'q' defined here}}
+  T fl;
+#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}}
+  foo();
+#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+  foo();
+#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  foo();
+#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}}
+  foo();
+#pragma omp target reduction(&& : argc)
+  foo();
+#pragma omp target reduction(^ : T) // expected-error {{'T' does not refer to a value}}
+  foo();
+#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}}
+  foo();
+#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : o) // expected-error 2 {{no viable overloaded '='}}
+  foo();
+#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel private(k)
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}}
+  foo();
+#pragma omp target reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp parallel shared(i)
+#pragma omp target reduction(min : i)
+#pragma omp parallel reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel
+#pragma omp for private(fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+#pragma omp parallel
+#pragma omp for reduction(- : fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+
+  return T();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note 2 {{'d' defined here}}
+  const int da[5] = {0}; // expected-note {{'da' defined here}}
+  int qa[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;                  // expected-note 2 {{'j' defined here}}
+  S3 &p = k;                   // expected-note 2 {{'p' defined here}}
+  const int &r = da[i];        // expected-note {{'r' defined here}}
+  int &q = qa[i];              // expected-note {{'q' defined here}}
+  float fl;
+#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}}
+  foo();
+#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+  foo();
+#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  foo();
+#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  foo();
+#pragma omp target reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
+  foo();
+#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(~ : argc) // expected-error {{expected unqualified-id}}
+  foo();
+#pragma omp target reduction(&& : argc)
+  foo();
+#pragma omp target reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+  foo();
+#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  foo();
+#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp target reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+  foo();
+#pragma omp target reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
+  foo();
+#pragma omp target reduction(+ : o) // expected-error {{no viable overloaded '='}}
+  foo();
+#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel private(k)
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
+  foo();
+#pragma omp target reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
+  foo();
+#pragma omp parallel shared(i)
+#pragma omp target reduction(min : i)
+#pragma omp parallel reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  foo();
+#pragma omp parallel
+#pragma omp for private(fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+#pragma omp parallel
+#pragma omp for reduction(- : fl)
+  for (int i = 0; i < 10; ++i)
+#pragma omp target reduction(+ : fl)
+    foo();
+  static int m;
+#pragma omp target reduction(+ : m) // OK
+  m++;
+
+  return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
+}
diff --git a/test/OpenMP/target_simd_aligned_messages.cpp b/test/OpenMP/target_simd_aligned_messages.cpp
index 76a37f6..d2628b4 100644
--- a/test/OpenMP/target_simd_aligned_messages.cpp
+++ b/test/OpenMP/target_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/target_simd_ast_print.cpp b/test/OpenMP/target_simd_ast_print.cpp
index 01ce1d2..7d3daa3 100644
--- a/test/OpenMP/target_simd_ast_print.cpp
+++ b/test/OpenMP/target_simd_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -35,7 +39,7 @@
   }
 };
 
-// CHECK: #pragma omp target simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp target simd private(this->a) private(this->a) private(T::a){{$}}
 // CHECK: #pragma omp target simd private(this->a) private(this->a)
 // CHECK: #pragma omp target simd private(this->a) private(this->a) private(this->S::a)
 
diff --git a/test/OpenMP/target_simd_codegen.cpp b/test/OpenMP/target_simd_codegen.cpp
new file mode 100644
index 0000000..ad94def
--- /dev/null
+++ b/test/OpenMP/target_simd_codegen.cpp
@@ -0,0 +1,694 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
+
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547]
+// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+// CHECK-LABEL: get_val
+long long get_val() { return 0; }
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target simd nowait
+  for (int i = 3; i < 32; i += 5) {
+  }
+
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}})
+  long long k = get_val();
+  #pragma omp target simd if(target: 0) linear(k : 3)
+  for (int i = 10; i > 1; i--) {
+    a += 1;
+  }
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  int lin = 12;
+  #pragma omp target simd if(target: 1) linear(lin, a : get_val())
+  for (unsigned long long it = 2000; it >= 600; it-=400) {
+    aa += 1;
+  }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+  // CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+  // CHECK:       [[IFELSE]]
+  // CHECK:       call void [[HVT3]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[IFEND]]
+  // CHECK:       [[IFEND]]
+
+  #pragma omp target simd if(target: n>10)
+  for (short it = 6; it <= 20; it-=-4) {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+  // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]],
+  // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]],
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]],
+  // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]],
+  // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]],
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]],
+  // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR5:%.+]],
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR5:%.+]],
+  // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]],
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]],
+  // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
+  // CHECK-DAG:   [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]],
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
+  // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target simd if(target: n>20)
+  for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]]()
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK:       !llvm.mem.parallel_loop_access
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT3]]
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       !llvm.loop
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target simd if(target: n>40)
+  for (long long i = -10; i < 10; i += 3) {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+  #pragma omp target simd if(target: n>50)
+  for (unsigned i=100; i<10; i+=10) {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target simd if(target: n>60)
+    for (unsigned long long it = 2000; it >= 600; it -= 400) {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]],
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]],
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]],
+// CHECK-DAG:   store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]],
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]],
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]**
+// CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]**
+// CHECK-DAG:   store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]],
+// CHECK-DAG:   store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]],
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+#endif
diff --git a/test/OpenMP/target_simd_codegen_registration.cpp b/test/OpenMP/target_simd_codegen_registration.cpp
new file mode 100644
index 0000000..47185ce
--- /dev/null
+++ b/test/OpenMP/target_simd_codegen_registration.cpp
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target simd if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target simd
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target simd
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
diff --git a/test/OpenMP/target_simd_codegen_registration_naming.cpp b/test/OpenMP/target_simd_codegen_registration_naming.cpp
new file mode 100644
index 0000000..2de806d
--- /dev/null
+++ b/test/OpenMP/target_simd_codegen_registration_naming.cpp
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target simd
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target simd
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_simd_collapse_messages.cpp b/test/OpenMP/target_simd_collapse_messages.cpp
index ce42273..4008441 100644
--- a/test/OpenMP/target_simd_collapse_messages.cpp
+++ b/test/OpenMP/target_simd_collapse_messages.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_defaultmap_messages.cpp b/test/OpenMP/target_simd_defaultmap_messages.cpp
index 4f62213..33e12c1 100644
--- a/test/OpenMP/target_simd_defaultmap_messages.cpp
+++ b/test/OpenMP/target_simd_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_depend_codegen.cpp b/test/OpenMP/target_simd_depend_codegen.cpp
new file mode 100644
index 0000000..abbbc76
--- /dev/null
+++ b/test/OpenMP/target_simd_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target simd device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target simd device(global + a) nowait depend(inout: global, a, bn) if(a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target simd if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_simd_depend_messages.cpp b/test/OpenMP/target_simd_depend_messages.cpp
index 89184f2..80f988d 100644
--- a/test/OpenMP/target_simd_depend_messages.cpp
+++ b/test/OpenMP/target_simd_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_device_messages.cpp b/test/OpenMP/target_simd_device_messages.cpp
index 1e9afc3..d705f61 100644
--- a/test/OpenMP/target_simd_device_messages.cpp
+++ b/test/OpenMP/target_simd_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_firstprivate_messages.cpp b/test/OpenMP/target_simd_firstprivate_messages.cpp
index 5bfd951..85b216b 100644
--- a/test/OpenMP/target_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/target_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_if_messages.cpp b/test/OpenMP/target_simd_if_messages.cpp
index ff68f6f..60dee37 100644
--- a/test/OpenMP/target_simd_if_messages.cpp
+++ b/test/OpenMP/target_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_is_device_ptr_messages.cpp b/test/OpenMP/target_simd_is_device_ptr_messages.cpp
index 02e2b32..6d6bc24 100644
--- a/test/OpenMP/target_simd_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_simd_is_device_ptr_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
+
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_simd_lastprivate_messages.cpp b/test/OpenMP/target_simd_lastprivate_messages.cpp
index 9a6eb24..ae8bd0a 100644
--- a/test/OpenMP/target_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/target_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_linear_messages.cpp b/test/OpenMP/target_simd_linear_messages.cpp
index 6319e31..6a094b8 100644
--- a/test/OpenMP/target_simd_linear_messages.cpp
+++ b/test/OpenMP/target_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
 int x;
 };
diff --git a/test/OpenMP/target_simd_loop_messages.cpp b/test/OpenMP/target_simd_loop_messages.cpp
index d33f1f4..760bfe0 100644
--- a/test/OpenMP/target_simd_loop_messages.cpp
+++ b/test/OpenMP/target_simd_loop_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
diff --git a/test/OpenMP/target_simd_map_messages.cpp b/test/OpenMP/target_simd_map_messages.cpp
index 2473bbb..3722ecb 100644
--- a/test/OpenMP/target_simd_map_messages.cpp
+++ b/test/OpenMP/target_simd_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_simd_messages.cpp b/test/OpenMP/target_simd_messages.cpp
index d1b06e3..aa259c0 100644
--- a/test/OpenMP/target_simd_messages.cpp
+++ b/test/OpenMP/target_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_misc_messages.c b/test/OpenMP/target_simd_misc_messages.c
index debe387..3965d28 100644
--- a/test/OpenMP/target_simd_misc_messages.c
+++ b/test/OpenMP/target_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target simd'}}
 #pragma omp target simd
 
diff --git a/test/OpenMP/target_simd_nowait_messages.cpp b/test/OpenMP/target_simd_nowait_messages.cpp
index 3635ce2..c7ab302 100644
--- a/test/OpenMP/target_simd_nowait_messages.cpp
+++ b/test/OpenMP/target_simd_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_private_messages.cpp b/test/OpenMP/target_simd_private_messages.cpp
index b20ff0a..1b7d864 100644
--- a/test/OpenMP/target_simd_private_messages.cpp
+++ b/test/OpenMP/target_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -43,7 +45,7 @@
   S5(int v) : a(v) {}
   S5 &operator=(S5 &s) {
 #pragma omp target simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
-    for (int k = 0; k < s.a; ++k)
+    for (int k = 0; k < s.a; ++k) // expected-warning {{Non-trivial type 'S5' is mapped, only trivial types are guaranteed to be mapped correctly}}
       ++s.a;
     return *this;
   }
diff --git a/test/OpenMP/target_simd_reduction_messages.cpp b/test/OpenMP/target_simd_reduction_messages.cpp
index 9e1bb05..299c813 100644
--- a/test/OpenMP/target_simd_reduction_messages.cpp
+++ b/test/OpenMP/target_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_safelen_messages.cpp b/test/OpenMP/target_simd_safelen_messages.cpp
index 79bee76..69ceab1 100644
--- a/test/OpenMP/target_simd_safelen_messages.cpp
+++ b/test/OpenMP/target_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_simd_simdlen_messages.cpp b/test/OpenMP/target_simd_simdlen_messages.cpp
index 8a9a3dc..c65fe20 100644
--- a/test/OpenMP/target_simd_simdlen_messages.cpp
+++ b/test/OpenMP/target_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_ast_print.cpp b/test/OpenMP/target_teams_ast_print.cpp
index a80b8d7..a0cb635 100644
--- a/test/OpenMP/target_teams_ast_print.cpp
+++ b/test/OpenMP/target_teams_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -46,7 +50,7 @@
 // CHECK-NEXT: T b = argc, c, d, e, f, g;
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
-// CHECK-NEXT: #pragma omp target teams
+// CHECK-NEXT: #pragma omp target teams{{$}}
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(C) thread_limit(d * C)
 // CHECK-NEXT: foo()
diff --git a/test/OpenMP/target_teams_codegen.cpp b/test/OpenMP/target_teams_codegen.cpp
index 978879d..f77c671 100644
--- a/test/OpenMP/target_teams_codegen.cpp
+++ b/test/OpenMP/target_teams_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -38,16 +56,18 @@
 // code, only 6 will have mapped arguments, and only 4 have all-constant map
 // sizes.
 
-// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
-// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
-// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288]
-// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 547, i32 288, i32 547, i32 547, i32 288, i32 288, i32 547, i32 547]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 547]
-// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 547, i32 288, i32 288, i32 288, i32 547]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
@@ -95,14 +115,34 @@
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]]
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
-  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK:       call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
   // CHECK-NEXT:  br label %[[END]]
   // CHECK:       [[END]]
-  #pragma omp target teams
+  #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) nowait
   {
   }
 
@@ -112,7 +152,7 @@
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -136,7 +176,7 @@
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -168,7 +208,6 @@
   {
     a += 1;
     aa += 1;
-    global += 1;
   }
 
   // We capture 3 VLA sizes in this target region
@@ -181,14 +220,14 @@
   // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
   // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
 
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
   // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
   // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
   // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
 
-  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
-  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
-  // CHECK:       [[TRY]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
@@ -301,11 +340,13 @@
 // Check that the offloading functions are emitted and that the arguments are
 // correct and loaded correctly for the target regions in foo().
 
-// CHECK:       define internal void [[HVT0]]()
-// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*))
+// CHECK:       define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}})
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.)
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}})
+// CHECK:       alloca i[[SZ]],
+// CHECK:       bitcast i[[SZ]]* {{.+}} to i16*
 // CHECK:       ret void
 // CHECK-NEXT:  }
 
@@ -361,7 +402,6 @@
 // CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
 // CHECK:       [[A_CASTED:%.+]] = alloca i[[SZ]], align
 // CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
-// CHECK:       [[GLOBAL_CASTED:%.+]] = alloca i[[SZ]], align
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
 // CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
@@ -374,28 +414,18 @@
 // CHECK-DAG:   [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
 // CHECK-DAG:   [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
 // CHECK-DAG:   store i16 [[AA]], i16* [[AA_C]], align
-// CHECK-DAG:   [[GLOBAL:%.+]] = load i32, i32* @global, align
-// CHECK-64-DAG:[[GLOBAL_C:%.+]] = bitcast i[[SZ]]* [[GLOBAL_CASTED]] to i32*
-// CHECK-64-DAG:store i32 [[GLOBAL]], i32* [[GLOBAL_C]], align
-// CHECK-32-DAG:store i32 [[GLOBAL]], i32* [[GLOBAL_CASTED]], align
 // CHECK-DAG:   [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
 // CHECK-DAG:   [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
-// CHECK-DAG:   [[PARAM3:%.+]] = load i[[SZ]], i[[SZ]]* [[GLOBAL_CASTED]], align
-// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]], i[[SZ]] [[PARAM3]])
+// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
 // CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
 // CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
-// CHECK:       [[GLOBAL_ADDR:%.+]] = alloca i[[SZ]], align
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
-// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[GLOBAL_ADDR]], align
 // CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
 // CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
-// CHECK-64-DAG:[[GLOBAL_CADDR:%.+]] = bitcast i[[SZ]]* [[GLOBAL_ADDR]] to i32*
-// CHECK-64:    store i32 {{.+}}, i32* [[GLOBAL_CADDR]],
-// CHECK-32:    store i32 {{.+}}, i32* [[GLOBAL_ADDR]],
 // CHECK:       ret void
 // CHECK-NEXT:  }
 
@@ -441,7 +471,7 @@
 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]])
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}})
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
 // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
 
 template<typename tx>
@@ -523,17 +553,18 @@
 // CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
 // CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
 
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
 // CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
 // CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
 
-// We capture 2 VLA sizes in this target region
-// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
-// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
-
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
 // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
 // CHECK:       [[TRY]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
@@ -596,7 +627,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -646,7 +677,7 @@
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -718,7 +749,7 @@
 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
 //
 //
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}})
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
 // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
 
 
diff --git a/test/OpenMP/target_teams_codegen_registration.cpp b/test/OpenMP/target_teams_codegen_registration.cpp
index b815453..97ebbc6 100644
--- a/test/OpenMP/target_teams_codegen_registration.cpp
+++ b/test/OpenMP/target_teams_codegen_registration.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target teams codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,9 +24,22 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // Check that no target code is emmitted if no omptests flag was provided.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -63,40 +84,40 @@
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // TCHECK-NOT: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 // CHECK-DAG: {{@.+}} = private constant i8 0
 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
-// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
 
 // CHECK-NTARGET-NOT: private constant i8 0
 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
@@ -411,31 +432,31 @@
 
 // Check metadata is properly generated:
 // CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 // TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 245, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 261, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 267, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 406, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 284, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 290, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 278, i32 {{[0-9]+}}}
-// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 220, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}}
 
 #endif
diff --git a/test/OpenMP/target_teams_codegen_registration_naming.cpp b/test/OpenMP/target_teams_codegen_registration_naming.cpp
index fb42328..c09442d 100644
--- a/test/OpenMP/target_teams_codegen_registration_naming.cpp
+++ b/test/OpenMP/target_teams_codegen_registration_naming.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target teams codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_teams_default_messages.cpp b/test/OpenMP/target_teams_default_messages.cpp
index 0534637..fa54416 100644
--- a/test/OpenMP/target_teams_default_messages.cpp
+++ b/test/OpenMP/target_teams_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_teams_defaultmap_messages.cpp b/test/OpenMP/target_teams_defaultmap_messages.cpp
index f2d30fd..b1b2bdd 100644
--- a/test/OpenMP/target_teams_defaultmap_messages.cpp
+++ b/test/OpenMP/target_teams_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_depend_codegen.cpp b/test/OpenMP/target_teams_depend_codegen.cpp
new file mode 100644
index 0000000..27734bc
--- /dev/null
+++ b/test/OpenMP/target_teams_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target teams device(global + a) nowait depend(inout: global, a, bn) if(a)
+  {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* @0, i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams if(0) firstprivate(global) depend(out:global)
+  {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_teams_depend_messages.cpp b/test/OpenMP/target_teams_depend_messages.cpp
index 57983ee..235ce98 100644
--- a/test/OpenMP/target_teams_depend_messages.cpp
+++ b/test/OpenMP/target_teams_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_device_messages.cpp b/test/OpenMP/target_teams_device_messages.cpp
index bd75da8..da56c47 100644
--- a/test/OpenMP/target_teams_device_messages.cpp
+++ b/test/OpenMP/target_teams_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_ast_print.cpp b/test/OpenMP/target_teams_distribute_ast_print.cpp
index ae50366..4484451 100644
--- a/test/OpenMP/target_teams_distribute_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -81,7 +85,7 @@
 #pragma omp target teams distribute
   for (int i=0; i < 2; ++i)
     a = 2;
-// CHECK: #pragma omp target teams distribute
+// CHECK: #pragma omp target teams distribute{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target teams distribute private(argc, b), firstprivate(c, d), collapse(2)
diff --git a/test/OpenMP/target_teams_distribute_codegen.cpp b/test/OpenMP/target_teams_distribute_codegen.cpp
new file mode 100644
index 0000000..6605cbd
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -0,0 +1,851 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [10 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547, i64 288]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [5 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [5 x i64] [i64 288, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]]
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) nowait
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
+  #pragma omp target teams distribute if(target: 0)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+  }
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute if(target: 1)
+  for (int i = 0; i < 10; ++i) {
+    aa += 1;
+  }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]]
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+  // CHECK:       [[IFELSE]]
+  // CHECK:       call void [[HVT3]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[IFEND]]
+  // CHECK:       [[IFEND]]
+  #pragma omp target teams distribute if(target: n>10)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK:          load i32, i32* %
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[SADDR9:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX9:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR9:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX9]]
+  // CHECK-DAG:   [[PADDR9:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX9]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+  // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]],
+  // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]],
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]],
+  // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]],
+  // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]],
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]],
+  // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR5:%.+]],
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR5:%.+]],
+  // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to  float**
+  // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]],
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]],
+  // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to  [5 x [10 x double]]**
+  // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
+  // CHECK-DAG:   [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to  double**
+  // CHECK-DAG:   [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]],
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
+  // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to  [[TT]]**
+  // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR9:%.+]],
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR9:%.+]],
+  // CHECK-DAG:   [[CBPADDR9]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR9]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute if(target: n>20) dist_schedule(static, n)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}})
+// CHECK:       alloca i[[SZ]],
+// CHECK:       bitcast i[[SZ]]* {{.+}} to i16*
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK-64:    [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
+// CHECK-64:    store i32 [[AA]], i32* [[AA_C]], align
+// CHECK-32:    store i32 [[AA]], i32* [[AA_CASTED]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK:       store i16 [[AA]], i16* [[AA_C]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT3]]
+// Create stack storage and store argument in there.
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[A_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
+// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
+// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
+// CHECK-DAG:   [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK-DAG:   [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[AA]], i16* [[AA_C]], align
+// CHECK-DAG:   [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
+// CHECK-DAG:   [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK:       alloca i[[SZ]],
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*, i[[SZ]])* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]], i[[SZ]] %{{.+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target teams distribute if(target: n>40)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+  #pragma omp target teams distribute if(target: n>50)
+  for (int i = a; i < n; ++i) {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target teams distribute if(target: n>60)
+    for (int i = 0; i < 10; ++i) {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]],
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   store i[[SZ]] [[VAL3:%.+]], i[[SZ]]* [[CBPADDR3:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL3]], i[[SZ]]* [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 4
+// CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 4
+// CHECK-DAG:   store [10 x i32]* %{{.+}}, [10 x i32]** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store [10 x i32]* %{{.+}}, [10 x i32]** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]**
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK:       [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+
+// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
+// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
+// CHECK-DAG:   [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       alloca i[[SZ]],
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       alloca i[[SZ]],
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK-DAG:   [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
+// CHECK-DAG:   store i8 [[CONV_AAA]], i8* [[CONV]], align
+// CHECK-DAG:   [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] {{.+}}, i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_codegen_registration.cpp b/test/OpenMP/target_teams_distribute_codegen_registration.cpp
new file mode 100644
index 0000000..2b8208c
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_codegen_registration.cpp
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target teams distribute
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target teams distribute
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp b/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp
new file mode 100644
index 0000000..c2195ac
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target teams distribute
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target teams distribute
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
new file mode 100644
index 0000000..62fc589
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
@@ -0,0 +1,141 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+        a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: [[OMP_UB:%.+]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target teams distribute collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target teams distribute collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_collapse_messages.cpp b/test/OpenMP/target_teams_distribute_collapse_messages.cpp
index 4bc4a7b..4010c5a 100644
--- a/test/OpenMP/target_teams_distribute_collapse_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_default_messages.cpp b/test/OpenMP/target_teams_distribute_default_messages.cpp
index 1bd1067..2a3229d 100644
--- a/test/OpenMP/target_teams_distribute_default_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_teams_distribute_defaultmap_messages.cpp b/test/OpenMP/target_teams_distribute_defaultmap_messages.cpp
index 6608cee..a310152 100644
--- a/test/OpenMP/target_teams_distribute_defaultmap_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_depend_codegen.cpp b/test/OpenMP/target_teams_distribute_depend_codegen.cpp
new file mode 100644
index 0000000..ebd1f9e
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target teams distribute device(global + a) nowait depend(inout: global, a, bn) if(a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_depend_messages.cpp b/test/OpenMP/target_teams_distribute_depend_messages.cpp
index fbdd495..9daba67 100644
--- a/test/OpenMP/target_teams_distribute_depend_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_device_messages.cpp b/test/OpenMP/target_teams_distribute_device_messages.cpp
index b35d0be..aa62e09 100644
--- a/test/OpenMP/target_teams_distribute_device_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..3eb1531
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
@@ -0,0 +1,213 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+#pragma omp target teams distribute
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+#pragma omp target teams distribute
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp
index 8770870..2544b1b 100644
--- a/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
new file mode 100644
index 0000000..886a5de
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
@@ -0,0 +1,346 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // LAMBDA:  ret
+#pragma omp target teams distribute firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp
index 602e039..01c1a28 100644
--- a/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -125,7 +127,8 @@
 #pragma omp target teams distribute firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute lastprivate(argc), firstprivate(argc) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/target_teams_distribute_if_messages.cpp b/test/OpenMP/target_teams_distribute_if_messages.cpp
index c9ef03a..94d82ab 100644
--- a/test/OpenMP/target_teams_distribute_if_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
new file mode 100644
index 0000000..26a7c0d
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
@@ -0,0 +1,400 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target teams distribute lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 [[G_IN:%.+]], i64 [[G1_IN:%.+]], i64 [[SVAR_IN:%.+]], i64 [[SFVAR_IN:%.+]])
+      // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i32 [[SVAR_IN:%.+]], i32 [[SFVAR_IN:%.+]])
+      // LAMBDA-64: [[G_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA-64: [[G1_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA-64: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i32,
+      // LAMBDA-64: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i32,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA-64: store i64 [[G_IN]], i64* [[G_PRIVATE_ADDR]],
+      // LAMBDA-32: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[G1_IN]], i64* [[G1_PRIVATE_ADDR]],
+      // LAMBDA-32: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[SVAR_IN]], i64* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA-32: store i32 [[SVAR_IN]], i32* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[SFVAR_IN]], i64* [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA-32: store i32 [[SFVAR_IN]], i32* [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double*
+      // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double*
+      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
+      // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32*
+      // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]],
+      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA-64: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA-32: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+      [&]() {
+        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+        // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+        g = 2;
+        g1 = 2;
+        svar = 4;
+        sfvar = 8.0;
+        // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+        // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+        // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+        // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+        // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+        // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+        // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]]([2 x i{{[0-9]+}}]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} [[S_VAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32*
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_IN1]], i{{[0-9]+}}* [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32*
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp
index 5cfe782..0739846 100644
--- a/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -215,10 +217,12 @@
 #pragma omp target teams distribute lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}}
+#pragma omp target teams distribute firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute lastprivate(n) firstprivate(n) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/target_teams_distribute_loop_messages.cpp b/test/OpenMP/target_teams_distribute_loop_messages.cpp
index 441abcb..b090c28 100644
--- a/test/OpenMP/target_teams_distribute_loop_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_loop_messages.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -607,7 +609,8 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
-#pragma omp target teams distribute lastprivate(s) firstprivate(s)
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/target_teams_distribute_map_messages.cpp b/test/OpenMP/target_teams_distribute_map_messages.cpp
index eafdccb..826a09d 100644
--- a/test/OpenMP/target_teams_distribute_map_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_teams_distribute_messages.cpp b/test/OpenMP/target_teams_distribute_messages.cpp
index 91f3c76..d3be0f6 100644
--- a/test/OpenMP/target_teams_distribute_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_misc_messages.c b/test/OpenMP/target_teams_distribute_misc_messages.c
index bf4df08..6d17b51 100644
--- a/test/OpenMP/target_teams_distribute_misc_messages.c
+++ b/test/OpenMP/target_teams_distribute_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target teams distribute'}}
 #pragma omp target teams distribute
 
@@ -285,12 +287,15 @@
     ;
 
   int x, y, z;
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
 #pragma omp target teams distribute lastprivate(x) firstprivate(x)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}}
 #pragma omp target teams distribute lastprivate(x, y) firstprivate(x, y)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}}
 #pragma omp target teams distribute lastprivate(x, y, z) firstprivate(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
diff --git a/test/OpenMP/target_teams_distribute_nowait_messages.cpp b/test/OpenMP/target_teams_distribute_nowait_messages.cpp
index 17aa1dc..b7a9a25 100644
--- a/test/OpenMP/target_teams_distribute_nowait_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_num_teams_messages.cpp b/test/OpenMP/target_teams_distribute_num_teams_messages.cpp
index 9b36e9c..8b351dc 100644
--- a/test/OpenMP/target_teams_distribute_num_teams_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp
index 8df3c97..a22b561 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -24,8 +28,10 @@
 public:
   S7(typename T::type v) : a(v) {
 #pragma omp target teams distribute parallel for private(a) private(this->a) private(T::a)
-    for (int k = 0; k < a.a; ++k)
+    for (int k = 0; k < a.a; ++k) {
       ++this->a.a;
+#pragma omp cancel for
+    }
   }
   S7 &operator=(S7 &s) {
 #pragma omp target teams distribute parallel for private(a) private(this->a)
@@ -43,6 +49,7 @@
   }
 };
 // CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp cancel for
 // CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 
@@ -86,7 +93,7 @@
 #pragma omp target teams distribute parallel for
   for (int i=0; i < 2; ++i)
     a = 2;
-// CHECK: #pragma omp target teams distribute parallel for
+// CHECK: #pragma omp target teams distribute parallel for{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target teams distribute parallel for private(argc, b), firstprivate(c, d), collapse(2)
@@ -113,10 +120,10 @@
 // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute parallel for linear(d)
+#pragma omp target teams distribute parallel for
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute parallel for linear(d)
+// CHECK: #pragma omp target teams distribute parallel for
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
   return T();
@@ -158,10 +165,10 @@
 // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute parallel for linear(d)
+#pragma omp target teams distribute parallel for
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute parallel for linear(d)
+// CHECK: #pragma omp target teams distribute parallel for
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
   return (0);
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
new file mode 100644
index 0000000..38bbace
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -0,0 +1,118 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64 
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1 
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+// HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}(
+int target_teams_fun(int *g){
+  int n = 1000;
+  int a[1000];
+  int te = n / 128;
+  int th = 128;
+  // discard n_addr
+  // HCK1: alloca i32,
+  // HCK1: [[TE:%.+]] alloca i32,
+  // HCK1: [[TH:%.+]] = alloca i32,
+  // discard capture expressions for te and th
+  // HCK1: = alloca i32,
+  // HCK1: = alloca i32,
+  // HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
+  // HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+  // HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
+  // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+  // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, 
+
+  // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
+  #pragma omp target teams distribute parallel for num_teams(te), thread_limit(th)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+    #pragma omp cancel for
+  }
+
+  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
+  {{{
+  #pragma omp target teams distribute parallel for is_device_ptr(g)
+  for(int i = 0; i < n; i++) {
+    a[i] = g[0];
+  }
+  }}}
+
+  // outlined target regions
+  // HCK1: define internal void @[[OFFL1]](i{{32|64}} [[N_ARG:%.+]], i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+  // TCK1: define void @{{.+}}target_teams_fun{{.*}}(i{{32|64}} [[N_ARG:%.+]], {{.+}}, i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+  // CK1: [[N_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+  // TCK1: store {{.+}} [[N_ARG]], {{.+}} [[N_ADDR]],
+  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+  // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // HCK1: define internal void @[[OFFL2]](
+  // TCK1: define void @{{.+}}target_teams_fun{{.+}}(
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  return a[0];
+}
+
+#endif // CK1
+#endif // HEADER 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
new file mode 100644
index 0000000..80e3892
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -0,0 +1,157 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+        a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target teams distribute parallel for collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target teams distribute parallel for collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp
index bd94ba5..61e76cd 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++98
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++11
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
index db47e68..d7c3fba 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_defaultmap_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_defaultmap_messages.cpp
index ad0295d..8de99ae 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_defaultmap_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
new file mode 100644
index 0000000..ca4e6a6
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute parallel for device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target teams distribute parallel for device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute parallel for if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp
index c468895..253e139 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_device_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_device_messages.cpp
index b1b86cd..64b682a 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_device_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..28619f8
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -0,0 +1,270 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute parallel for dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute parallel for dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp
index d9913bc..a8abaff 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
new file mode 100644
index 0000000..ce1cb1e
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -0,0 +1,523 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY  
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64 
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64 
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY 
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute parallel for firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // HLAMBDA-LABEL: @main
+  // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // HLAMBDA:  ret
+#pragma omp target teams distribute parallel for firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // TLAMBDA: define void @[[LOFFL1:.+]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+
+    // use of private vars
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call void @[[OFFL1:.+]](
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK:  ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]](
+// TCHECK: define{{.*}} void @[[OFFL1:.+]](
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
+// Skip global and bound tid vars, and prev lb ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call void @[[TOFFL1:.+]](
+// HCHECK:  ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// TCHECK: define {{.*}}void @[[TOFFL1:.+]]({{.+}})
+// CHECK-DAG: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp
index d4e7083..a278e9f 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -124,7 +126,8 @@
 #pragma omp target teams distribute parallel for firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for lastprivate(argc), firstprivate(argc) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
new file mode 100644
index 0000000..8cd90a6
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp target teams distribute parallel for
+  for(int i = 0 ; i < 100; i++) {}
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+  // CHECK: call void @__kmpc_for_static_fini(
+
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+#pragma omp target teams distribute parallel for if (parallel: false)
+  for(int i = 0 ; i < 100; i++) {
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_serialized_parallel(
+  // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+  // CHECK: call void @__kmpc_end_serialized_parallel(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @{{.+}}gtid_test
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+    gtid_test();
+  }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target teams distribute parallel for if (true)
+  for(int i = 0 ; i < 100; i++) {
+    fn1();
+  }
+#pragma omp target teams distribute parallel for if (false)
+  for(int i = 0 ; i < 100; i++) {
+    fn2();
+  }
+#pragma omp target teams distribute parallel for if (parallel: Arg)
+  for(int i = 0 ; i < 100; i++) {
+    fn3();
+  }
+  return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target teams distribute parallel for if (true)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_0]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+    // CHECK: call void @__kmpc_for_static_fini(
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn4
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    fn4();
+  }
+
+#pragma omp target teams distribute parallel for if (false)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_1]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn5
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn5();
+  }
+
+#pragma omp target teams distribute parallel for if (Arg)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_2]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn6
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn6();
+  }
+
+  return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
index 82d39ac..6f990f7 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_ast_print.cpp
index c566319..b3944bc 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -56,7 +60,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target teams distribute parallel for is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target teams distribute parallel for is_device_ptr(this->k){{$}}
 // CHECK-NEXT: for (int i = 0; i < 100; i++)
 // CHECK-NEXT: ;
 // CHECK-NEXT: #pragma omp target teams distribute parallel for is_device_ptr(this->z)
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_messages.cpp
index eaa3190..e19e46c 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
new file mode 100644
index 0000000..acb1c0b
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -0,0 +1,454 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target teams distribute parallel for lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* {{.+}}, i32* {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+      // skip gbl and bound tid
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+       
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+    
+      // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+      // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+      // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+      
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+      // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+      // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+      // skip tid and prev variables
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+       
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+    
+      // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+      // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+      // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+      
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+      // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+      // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+
+// gbl and bound tid vars, prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]], 
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],  
+// CHECK: ret void
+
+// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]], 
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],  
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp
index ccecb63..b21b911 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -216,10 +218,12 @@
 #pragma omp target teams distribute parallel for lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}}
+#pragma omp target teams distribute parallel for firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for lastprivate(n) firstprivate(n) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp
deleted file mode 100644
index 82220dc..0000000
--- a/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-
-namespace X {
-  int x;
-};
-
-struct B {
-  static int ib; // expected-note {{'B::ib' declared here}}
-  static int bfoo() { return 8; }
-};
-
-int bfoo() { return 4; }
-
-int z;
-const int C1 = 1;
-const int C2 = 2;
-void test_linear_colons()
-{
-  int B = 0;
-
-#pragma omp target teams distribute parallel for linear(B:bfoo())
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B:B::bfoo())
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(X::x : ::z)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B,::z, X::x)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(::z)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B::bfoo()) // expected-error {{expected variable name}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target teams distribute parallel for linear(B::ib,B:C1+C2)
-  for (int i = 0; i < 10; ++i) ;
-}
-
-template<int L, class T, class N> T test_template(T* arr, N num) {
-  N i;
-  T sum = (T)0;
-  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
-
-#pragma omp target teams distribute parallel for linear(ind2:L) // expected-error {{argument of a linear clause should be of integral or pointer type}}
-  for (i = 0; i < num; ++i) {
-    T cur = arr[(int)ind2];
-    ind2 += L;
-    sum += cur;
-  }
-  return T();
-}
-
-template<int LEN> int test_warn() {
-  int ind2 = 0;
-#pragma omp target teams distribute parallel for linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
-  for (int i = 0; i < 100; i++) {
-    ind2 += LEN;
-  }
-  return ind2;
-}
-
-struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
-extern S1 a;
-class S2 {
-  mutable int a;
-public:
-  S2():a(0) { }
-};
-const S2 b; // expected-note 2 {{'b' defined here}}
-const S2 ba[5];
-class S3 {
-  int a;
-public:
-  S3():a(0) { }
-};
-const S3 ca[5];
-class S4 {
-  int a;
-  S4();
-public:
-  S4(int v):a(v) { }
-};
-class S5 {
-  int a;
-  S5():a(0) {}
-public:
-  S5(int v):a(v) { }
-};
-
-S3 h;
-#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
-
-template<class I, class C> int foomain(I argc, C **argv) {
-  I e(4);
-  I g(5);
-  int i;
-  int &j = i;
-
-#pragma omp target teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear () // expected-error {{expected expression}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc : 5)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (a, b:B::ib) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(e, g)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target teams distribute parallel for linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target teams distribute parallel for linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target teams distribute parallel for linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-  return 0;
-}
-
-namespace A {
-double x;
-#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
-}
-namespace C {
-using A::x;
-}
-
-int main(int argc, char **argv) {
-  double darr[100];
-  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
-  test_template<-4>(darr, 4);
-  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
-  test_warn<0>();
-
-  S4 e(4); // expected-note {{'e' defined here}}
-  S5 g(5); // expected-note {{'g' defined here}}
-  int i;
-  int &j = i;
-
-#pragma omp target teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear () // expected-error {{expected expression}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argc)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-
-#pragma omp target teams distribute parallel for linear (a, b) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(e, g) // expected-error {{argument of a linear clause should be of integral or pointer type, not 'S4'}} expected-error {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp parallel
-{
-  int i;
-#pragma omp target teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(i : 4)
-  for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-}
-
-#pragma omp target teams distribute parallel for linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
-  return 0;
-}
-
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp
index d912737..01bf38d 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -605,7 +607,8 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
-#pragma omp target teams distribute parallel for lastprivate(s) firstprivate(s)
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp
index 1fb466e..5cba1e7 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp
index cb76ef2..551acd2 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
@@ -27,7 +29,7 @@
 #pragma omp target teams distribute parallel for } // expected-warning {{extra tokens at the end of '#pragma omp target teams distribute parallel for' are ignored}}
   for (int i = 0; i < argc; ++i)
     foo();
-#pragma omp target teams distribute parallel for
+#pragma omp target teams distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp target teams distribute parallel for'}}
   for (int i = 0; i < argc; ++i)
     foo();
 // expected-warning@+1 {{extra tokens at the end of '#pragma omp target teams distribute parallel for' are ignored}}
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c b/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c
index 769be6c..820eb11 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c
+++ b/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target teams distribute parallel for'}}
 #pragma omp target teams distribute parallel for
 
@@ -285,12 +287,15 @@
     ;
 
   int x, y, z;
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for lastprivate(x) firstprivate(x)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for lastprivate(x, y) firstprivate(x, y)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for lastprivate(x, y, z) firstprivate(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_nowait_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_nowait_messages.cpp
index 30a6224..d0e6799 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_nowait_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fopenmp %s
 
+// RUN: %clang_cc1 -fsyntax-only -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp
index fdc2d9c..ceb4fa5 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_num_threads_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_num_threads_messages.cpp
index afd8c36..f558936 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_num_threads_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
new file mode 100644
index 0000000..2523696
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
@@ -0,0 +1,369 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64 
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64 
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+//
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute parallel for private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] ={{.*}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] ={{.+}} global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] ={{.+}} global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] ={{.+}} global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] ={{.+}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // HLAMBDA-LABEL: @main
+  // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call void @[[LOFFL1:.+]](
+    // HLAMBDA:  ret
+#pragma omp target teams distribute parallel for private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]()
+    // TLAMBDA: define{{.*}} void @[[LOFFL1:.+]]()
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0) 
+// HCHECK: call void @[[OFFL1:.+]]()
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK:  ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]]()
+// TCHECK: define{{.*}} void @[[OFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call void @[[TOFFL1:.+]]()
+// HCHECK: ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]()
+// TCHECK: define void @[[TOFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp
index d89cb1a..7750238 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
new file mode 100644
index 0000000..855273c
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -0,0 +1,93 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target teams distribute parallel for proc_bind(master)
+  for(int i = 0; i < 1000; i++) {}
+  return T();
+}
+
+int main() {
+  // CHECK-LABEL: @main
+#pragma omp target teams distribute parallel for proc_bind(spread)
+  for(int i = 0; i < 1000; i++) {}
+#pragma omp target teams distribute parallel for proc_bind(close)
+  for(int i = 0; i < 1000; i++) {}
+  return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_messages.cpp
index b2bbfa8..659684e 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
new file mode 100644
index 0000000..6575659
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target teams distribute parallel for reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target teams distribute parallel for reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_PAR:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_PAR]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // skip loop vars
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+     // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-64-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-32-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}}*{{.*}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
index 59b3541..53c41b6 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
new file mode 100644
index 0000000..8eb6c44
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -0,0 +1,400 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute parallel for schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute parallel for schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL4:.+]](
+    #pragma omp target teams distribute parallel for schedule(dynamic)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL5:.+]](
+    #pragma omp target teams distribute parallel for schedule(dynamic, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL4]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL5]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY 
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL5:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+
+// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT5:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_schedule_messages.cpp
index 54e3020..1f4f5f1 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_shared_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_shared_messages.cpp
index 4f90000..f90a533 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_shared_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_aligned_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_aligned_messages.cpp
index f121512..18685d2 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp
index 29a0c6d..5de2d4b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -28,8 +32,9 @@
       ++this->a.a;
   }
   S7 &operator=(S7 &s) {
-#pragma omp target teams distribute parallel for simd private(a) private(this->a)
-    for (int k = 0; k < s.a.a; ++k)
+    int k;
+#pragma omp target teams distribute parallel for simd private(a) private(this->a) linear(k)
+    for (k = 0; k < s.a.a; ++k)
       ++s.a.a;
 
     foo();
@@ -53,7 +58,7 @@
   }
 };
 // CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) private(T::a)
-// CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a)
+// CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) linear(k)
 // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK: #pragma omp target teams distribute parallel for simd simdlen(slen1) safelen(slen2) aligned(arr: alen)
 
@@ -108,7 +113,7 @@
 #pragma omp target teams distribute parallel for simd
   for (int i=0; i < 2; ++i)
     a = 2;
-// CHECK: #pragma omp target teams distribute parallel for simd
+// CHECK: #pragma omp target teams distribute parallel for simd{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target teams distribute parallel for simd private(argc, b), firstprivate(c, d), collapse(2)
@@ -135,10 +140,10 @@
 // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute parallel for simd simdlen(clen-1) linear(d)
+#pragma omp target teams distribute parallel for simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) linear(d)
+// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target teams distribute parallel for simd safelen(clen-1) aligned(arr:alen)
@@ -186,10 +191,10 @@
 // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute parallel for simd simdlen(clen-1) linear(d)
+#pragma omp target teams distribute parallel for simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) linear(d)
+// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target teams distribute parallel for simd safelen(clen-1) aligned(arr:N+6)
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
new file mode 100644
index 0000000..61cdd3c
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -0,0 +1,131 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+// HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}(
+int target_teams_fun(int *g){
+  int n = 1000;
+  int a[1000];
+  int te = n / 128;
+  int th = 128;
+// discard n_addr
+// HCK1: alloca i32,
+// HCK1: [[TE:%.+]] alloca i32,
+// HCK1: [[TH:%.+]] = alloca i32,
+// HCK1: [[I:%.+]] = alloca i32,
+// discard capture expressions for te and th
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
+// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
+// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+// HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+
+// HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
+  int i;
+#pragma omp target teams distribute parallel for simd num_teams(te), thread_limit(th) aligned(a : 8) safelen(16) simdlen(4) linear(i : n)
+  for(i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
+  {{{
+  #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
+  for(int i = 0; i < n; i++) {
+    a[i] = g[0];
+  }
+  }}}
+
+  // outlined target regions
+  // HCK1: define internal void @[[OFFL1]](i{{32|64}} [[I_ARG:%.+]], i{{32|64}} [[N_ARG:%.+]], {{.+}}, i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+  // TCK1: define void @{{.+}}target_teams_fun{{.*}}(i{{32|64}} [[I_ARG:%.+]], i{{32|64}} [[N_ARG:%.+]], {{.+}}, i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+  // CK1: [[I_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[N_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+  // TCK1: store {{.+}} [[N_ARG]], {{.+}} [[N_ADDR]],
+  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+  // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: [[ARRDECAY:%.+]] = getelementptr inbounds [1000 x i32], [1000 x i32]* %{{.+}}, i32 0, i32 0
+  // CK1: [[ARR_CAST:%.+]] = ptrtoint i32* [[ARRDECAY]] to i{{32|64}}
+  // CK1: [[MASKED_PTR:%.+]] = and i{{32|64}} [[ARR_CAST]], 7
+  // CK1: [[COND:%.+]] = icmp eq i{{32|64}} [[MASKED_PTR]], 0
+  // CK1: call void @llvm.assume(i1 [[COND]])
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // HCK1: define internal void @[[OFFL2]](
+  // TCK1: define void @{{.+}}target_teams_fun{{.+}}(
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  return a[0];
+}
+
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 8}
+
+#endif // CK1
+#endif // HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp
new file mode 100644
index 0000000..f03971b
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target teams distribute parallel for simd
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target teams distribute parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp
new file mode 100644
index 0000000..faeaeb9
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target teams distribute parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target teams distribute parallel for simd
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
new file mode 100644
index 0000000..3aca291
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -0,0 +1,157 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for simd collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+        a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target teams distribute parallel for simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target teams distribute parallel for simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp
index d0c8a58..069ea65 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++98
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++11
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
index 7d07280..a81aace 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_defaultmap_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_defaultmap_messages.cpp
index 6f0ea7e..c2c0e7b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_defaultmap_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
new file mode 100644
index 0000000..e8b4a93
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute parallel for simd device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target teams distribute parallel for simd device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute parallel for simd if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp
index 4a9bf01..7cc0d41 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_device_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_device_messages.cpp
index 6a72579..c7b3bc9 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_device_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..281a8d7
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -0,0 +1,270 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute parallel for simd dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute parallel for simd dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
index 4fb3753..079aaf6 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..05f7f8c
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -0,0 +1,523 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // HLAMBDA-LABEL: @main
+  // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // HLAMBDA:  ret
+#pragma omp target teams distribute parallel for simd firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // TLAMBDA: define void @[[LOFFL1:.+]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+
+    // use of private vars
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call void @[[OFFL1:.+]](
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK:  ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]](
+// TCHECK: define{{.*}} void @[[OFFL1:.+]](
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
+// Skip global and bound tid vars, and prev lb ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call void @[[TOFFL1:.+]](
+// HCHECK:  ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// TCHECK: define {{.*}}void @[[TOFFL1:.+]]({{.+}})
+// CHECK-DAG: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp
index d26044a..69292a7 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -124,7 +126,8 @@
 #pragma omp target teams distribute parallel for simd firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for simd lastprivate(argc), firstprivate(argc) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for simd lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
new file mode 100644
index 0000000..a3a14fb
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp target teams distribute parallel for simd
+  for(int i = 0 ; i < 100; i++) {}
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+  // CHECK: call void @__kmpc_for_static_fini(
+
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+#pragma omp target teams distribute parallel for simd if (parallel: false)
+  for(int i = 0 ; i < 100; i++) {
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_serialized_parallel(
+  // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+  // CHECK: call void @__kmpc_end_serialized_parallel(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @{{.+}}gtid_test
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+    gtid_test();
+  }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target teams distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    fn1();
+  }
+#pragma omp target teams distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    fn2();
+  }
+#pragma omp target teams distribute parallel for simd if (parallel: Arg)
+  for(int i = 0 ; i < 100; i++) {
+    fn3();
+  }
+  return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target teams distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_0]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+    // CHECK: call void @__kmpc_for_static_fini(
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn4
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    fn4();
+  }
+
+#pragma omp target teams distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_1]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn5
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn5();
+  }
+
+#pragma omp target teams distribute parallel for simd if (Arg)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_2]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn6
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn6();
+  }
+
+  return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
index 96db02e..7554e5b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_ast_print.cpp
index e73f40e..5107519 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -56,7 +60,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target teams distribute parallel for simd is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target teams distribute parallel for simd is_device_ptr(this->k){{$}}
 // CHECK-NEXT: for (int i = 0; i < 100; i++)
 // CHECK-NEXT: ;
 // CHECK-NEXT: #pragma omp target teams distribute parallel for simd is_device_ptr(this->z)
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_messages.cpp
index 520f746..57abff3 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..c3d3861
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -0,0 +1,456 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target teams distribute parallel for simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* {{.+}}, i32* {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+      // skip gbl and bound tid
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+      // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+      // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+      // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: store i32 2, i32* %
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+      // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+      // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+      // skip tid and prev variables
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+      // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+      // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+      // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+      // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: store i32 2, i32* %
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+      // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+      // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+      // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      [&]() {
+        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+        // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+        g = 2;
+        g1 = 2;
+        svar = 4;
+        sfvar = 8.0;
+        // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+        // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+        // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+        // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+        // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+        // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+        // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+
+// gbl and bound tid vars, prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
+// CHECK: ret void
+
+// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp
index 7f17d67..b65c22b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -216,10 +218,12 @@
 #pragma omp target teams distribute parallel for simd lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}}
+#pragma omp target teams distribute parallel for simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp
index 24c432a..3013480 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,33 +20,42 @@
 {
   int B = 0;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(::z)
   for (int i = 0; i < 10; ++i) ;
 
 #pragma omp target teams distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(B::ib,B:C1+C2)
   for (int i = 0; i < 10; ++i) ;
 }
@@ -65,6 +76,7 @@
 
 template<int LEN> int test_warn() {
   int ind2 = 0;
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
   #pragma omp target teams distribute parallel for simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
   for (int i = 0; i < 100; i++) {
     ind2 += LEN;
@@ -118,15 +130,18 @@
 #pragma omp target teams distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc : 5)
   for (int k = 0; k < argc; ++k) ++k;
 
@@ -139,33 +154,17 @@
 #pragma omp target teams distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(e, g)
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target teams distribute parallel for simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target teams distribute parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target teams distribute parallel for simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target teams distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -198,15 +197,18 @@
 #pragma omp target teams distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute parallel for simd linear (argc)
   for (int k = 0; k < argc; ++k) ++k;
 
@@ -226,22 +228,6 @@
 #pragma omp target teams distribute parallel for simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int i;
-    #pragma omp target teams distribute parallel for simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
-
-    #pragma omp target teams distribute parallel for simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-  }
-
-#pragma omp target teams distribute parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp
index 0ed57ba..1378ea4 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -608,7 +610,8 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
-#pragma omp target teams distribute parallel for simd lastprivate(s) firstprivate(s)
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp
index b230d9a..63d85eb 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_messages.cpp
index 7e16c79..58e8862 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c b/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c
index d444844..7005c7b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target teams distribute parallel for simd'}}
 #pragma omp target teams distribute parallel for simd
 
@@ -285,15 +287,22 @@
     ;
 
   int x, y, z;
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for simd lastprivate(x) firstprivate(x)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for simd lastprivate(x, y) firstprivate(x, y)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}}
 #pragma omp target teams distribute parallel for simd lastprivate(x, y, z) firstprivate(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp target teams distribute parallel for simd simdlen(64) safelen(8)
+  for (i = 0; i < 16; ++i)
+    ;
 }
 
 void test_loop_messages() {
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_nowait_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_nowait_messages.cpp
index 4c65130..458eaf7 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_nowait_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fopenmp %s
 
+// RUN: %clang_cc1 -fsyntax-only -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_num_teams_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_num_teams_messages.cpp
index 240d7d7..6564f10 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_num_teams_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_num_threads_messages.cpp
index 9032996..58fab1e 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
new file mode 100644
index 0000000..3792597
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -0,0 +1,369 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+//
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute parallel for simd private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] ={{.*}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] ={{.+}} global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] ={{.+}} global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] ={{.+}} global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] ={{.+}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // HLAMBDA-LABEL: @main
+  // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call void @[[LOFFL1:.+]](
+    // HLAMBDA:  ret
+#pragma omp target teams distribute parallel for simd private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]()
+    // TLAMBDA: define{{.*}} void @[[LOFFL1:.+]]()
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for simd private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0)
+// HCHECK: call void @[[OFFL1:.+]]()
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK:  ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]]()
+// TCHECK: define{{.*}} void @[[OFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call void @[[TOFFL1:.+]]()
+// HCHECK: ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]()
+// TCHECK: define void @[[TOFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp
index 20e1850..f1a8e0b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
new file mode 100644
index 0000000..b578634
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -0,0 +1,93 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target teams distribute parallel for simd proc_bind(master)
+  for(int i = 0; i < 1000; i++) {}
+  return T();
+}
+
+int main() {
+  // CHECK-LABEL: @main
+#pragma omp target teams distribute parallel for simd proc_bind(spread)
+  for(int i = 0; i < 1000; i++) {}
+#pragma omp target teams distribute parallel for simd proc_bind(close)
+  for(int i = 0; i < 1000; i++) {}
+  return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_messages.cpp
index 1471412..3ae5268 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
new file mode 100644
index 0000000..e938a77
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target teams distribute parallel for simd reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target teams distribute parallel for simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_PAR:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_PAR]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // skip loop vars
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+     // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-64-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-32-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute parallel for simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}}*{{.*}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+#endif
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
index 63744c7..8cd4acd 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp
index beb3c1e..f068e6b 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
new file mode 100644
index 0000000..7997697
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -0,0 +1,400 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute parallel for simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute parallel for simd schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute parallel for simd schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL4:.+]](
+    #pragma omp target teams distribute parallel for simd schedule(dynamic)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL5:.+]](
+    #pragma omp target teams distribute parallel for simd schedule(dynamic, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL4]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL5]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute parallel for simd schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL5:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+
+// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT5:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_messages.cpp
index 773a148..c24a543 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_shared_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_shared_messages.cpp
index cbef07a..f59ecbd 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_shared_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp
index c2a82d5..d5f714c 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_thread_limit_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_thread_limit_messages.cpp
index 159d1b0..ea8b1a7 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_thread_limit_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_thread_limit_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_thread_limit_messages.cpp
index 1db0726..bd04e93 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_thread_limit_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_private_codegen.cpp b/test/OpenMP/target_teams_distribute_private_codegen.cpp
new file mode 100644
index 0000000..457156e
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_private_codegen.cpp
@@ -0,0 +1,246 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]]()
+    // LAMBDA:  ret
+#pragma omp target teams distribute private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]()
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call void @[[TOFFL1:.+]]()
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_private_messages.cpp b/test/OpenMP/target_teams_distribute_private_messages.cpp
index fcbd4f8..df7b689 100644
--- a/test/OpenMP/target_teams_distribute_private_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
new file mode 100644
index 0000000..704bf00
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@@ -0,0 +1,224 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target teams distribute reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target teams distribute reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
+// CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
+// CHECK: ret i32 [[RES]]
+
+// CHECK: define{{.*}} void @[[OFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}}** [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_LOAD:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_LOAD]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, i32*{{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i32,
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i32*{{.+}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR:%.+]] = load i32*, i32** [[TVAR_ADDR]],
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load i32*, i32** [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_reduction_messages.cpp
index 3cfb086..38878fc 100644
--- a/test/OpenMP/target_teams_distribute_reduction_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_shared_messages.cpp b/test/OpenMP/target_teams_distribute_shared_messages.cpp
index 13f4d47..37e0aa1 100644
--- a/test/OpenMP/target_teams_distribute_shared_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_aligned_messages.cpp b/test/OpenMP/target_teams_distribute_simd_aligned_messages.cpp
index d91cfa9..2ec53ee 100644
--- a/test/OpenMP/target_teams_distribute_simd_aligned_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/target_teams_distribute_simd_ast_print.cpp b/test/OpenMP/target_teams_distribute_simd_ast_print.cpp
index 77732ef..a968635 100644
--- a/test/OpenMP/target_teams_distribute_simd_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_ast_print.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s -Wno-openmp-target | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-target | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -28,8 +32,9 @@
       ++this->a.a;
   }
   S7 &operator=(S7 &s) {
-#pragma omp target teams distribute simd private(a) private(this->a)
-    for (int k = 0; k < s.a.a; ++k)
+    int k;
+#pragma omp target teams distribute simd private(a) private(this->a) linear(k)
+    for (k = 0; k < s.a.a; ++k)
       ++s.a.a;
     return *this;
   }
@@ -50,7 +55,7 @@
   }
 };
 // CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) private(T::a)
-// CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a)
+// CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) linear(k)
 // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK: #pragma omp target teams distribute simd simdlen(slen1) safelen(slen2) aligned(arr: alen)
 // CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) private(this->S::a)
@@ -102,7 +107,7 @@
 #pragma omp target teams distribute simd
   for (int i=0; i < 2; ++i)
     a = 2;
-// CHECK: #pragma omp target teams distribute simd
+// CHECK: #pragma omp target teams distribute simd{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target teams distribute simd private(argc, b), firstprivate(c, d), collapse(2)
@@ -129,10 +134,10 @@
 // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute simd simdlen(clen-1) linear(d)
+#pragma omp target teams distribute simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) linear(d)
+// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target teams distribute simd safelen(clen-1) aligned(arr:alen)
@@ -180,10 +185,10 @@
 // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
-#pragma omp target teams distribute simd simdlen(clen-1) linear(d)
+#pragma omp target teams distribute simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
-// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) linear(d)
+// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target teams distribute simd safelen(clen-1) aligned(arr:N+6)
diff --git a/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_codegen.cpp
new file mode 100644
index 0000000..fb33cc9
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -0,0 +1,844 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288]
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [5 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [5 x i64] [i64 288, i64 288, i64 288, i64 288, i64 547]
+// CHECK-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]]
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) nowait
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
+  #pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a)
+  for (a = 0; a < 10; ++a) {
+    a += 1;
+  }
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute simd if(target: 1)
+  for (int i = 0; i < 10; ++i) {
+    aa += 1;
+  }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]]
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+  // CHECK:       [[IFELSE]]
+  // CHECK:       call void [[HVT3]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[IFEND]]
+  // CHECK:       [[IFEND]]
+  #pragma omp target teams distribute simd if(target: n>10)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+  // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]],
+  // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]],
+  // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]],
+  // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]],
+  // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]],
+  // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]],
+  // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]],
+  // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]**
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR5:%.+]],
+  // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR5:%.+]],
+  // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to  float**
+  // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float**
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]],
+  // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]],
+  // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to  [5 x [10 x double]]**
+  // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
+  // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
+  // CHECK-DAG:   [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to  double**
+  // CHECK-DAG:   [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double**
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]],
+  // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
+  // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to  [[TT]]**
+  // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target teams distribute simd if(target: n>20) aligned(b)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}})
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}})
+// CHECK:       alloca i[[SZ]],
+// CHECK:       bitcast i[[SZ]]* {{.+}} to i16*
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
+// CHECK-64:    [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
+// CHECK-64:    store i32 [[AA]], i32* [[AA_C]], align
+// CHECK-32:    store i32 [[AA]], i32* [[AA_CASTED]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    store i32 10, i32* [[AA_CADDR]], align
+// CHECK-32:    store i32 10, i32* [[AA_ADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK:       store i16 [[AA]], i16* [[AA_C]], align
+// CHECK:       [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT3]]
+// Create stack storage and store argument in there.
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[A_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_CASTED:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
+// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
+// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
+// CHECK-DAG:   [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
+// CHECK-DAG:   [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[AA]], i16* [[AA_C]], align
+// CHECK-DAG:   [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
+// CHECK-DAG:   [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
+// CHECK-DAG:   call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       ret void
+// CHECK-NEXT:  }
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target teams distribute simd if(target: n>40)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+  #pragma omp target teams distribute simd if(target: n>50)
+  for (int i = a; i < n; ++i) {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target teams distribute simd if(target: n>60)
+    for (int i = 0; i < 10; ++i) {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* %__vla_expr
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]],
+// CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]**
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16**
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   store i[[SZ]] [[VAL3:%.+]], i[[SZ]]* [[CBPADDR3:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL3]], i[[SZ]]* [[CPADDR3:%.+]],
+// CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 4
+// CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 4
+// CHECK-DAG:   store [10 x i32]* %{{.+}}, [10 x i32]** [[CBPADDR4:%.+]],
+// CHECK-DAG:   store [10 x i32]* %{{.+}}, [10 x i32]** [[CPADDR4:%.+]],
+// CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]**
+// CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]**
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]],
+// CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]],
+// CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]],
+// CHECK-DAG:   store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]],
+// CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+// CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+// CHECK:       [[IFELSE]]
+// CHECK:       call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[IFEND]]
+// CHECK:       [[IFEND]]
+
+
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK:       [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+
+// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
+// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
+// CHECK-DAG:   [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       alloca i[[SZ]],
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       alloca i[[SZ]],
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK-DAG:   [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
+// CHECK-DAG:   store i8 [[CONV_AAA]], i8* [[CONV]], align
+// CHECK-DAG:   [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] {{.+}}, i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK:       [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+
+// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
+// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
+// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
+// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
+// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
+// CHECK-DAG:   [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
+
+// CHECK-DAG:   [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
+// CHECK-DAG:   [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
+// CHECK-DAG:   store i16 [[CONV_AA]], i16* [[CONV]], align
+// CHECK-DAG:   [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
+
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
+//
+//
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
+// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
+
+// CHECK-DAG: !{!"llvm.loop.vectorize.width", i32 16}
+// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK-DAG: !{!"llvm.loop.vectorize.width", i32 32}
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp b/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp
new file mode 100644
index 0000000..9e455b6
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp
@@ -0,0 +1,475 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute simd if(target: 0)
+    for (int i = 0; i < 10; ++i)
+      a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target teams distribute simd
+    for (int i = 0; i < 10; ++i)
+      a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define linkonce hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target teams distribute simd
+  for (int i = 0; i < 10; ++i)
+    ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp b/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp
new file mode 100644
index 0000000..856664b
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp
@@ -0,0 +1,89 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+  #pragma omp target teams distribute simd
+  for (int i = 0; i < 10; ++i)
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target teams distribute simd
+      for (int i = 0; i < 10; ++i)
+        ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
new file mode 100644
index 0000000..8a80774
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
@@ -0,0 +1,141 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute simd collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+        a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: [[OMP_UB:%.+]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target teams distribute simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target teams distribute simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp b/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp
index 39937f9..a2d3bde 100644
--- a/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_defaultmap_messages.cpp b/test/OpenMP/target_teams_distribute_simd_defaultmap_messages.cpp
index 3bea411..b8ac8ae 100644
--- a/test/OpenMP/target_teams_distribute_simd_defaultmap_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_defaultmap_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
new file mode 100644
index 0000000..a94da30
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
@@ -0,0 +1,261 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4]
+// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+int global;
+extern int global;
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  static long *plocal;
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID:@.+]], i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 3
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute simd device(global + a) depend(in: global) depend(out: a, b, cn[4])
+  for (int i = 0; i < 10; ++i) {
+  }
+
+  // CHECK:       [[ADD:%.+]] = add nsw i32
+  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+
+  // CHECK:       [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0
+  // CHECK:       br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]]
+  // CHECK:       [[THEN]]:
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]**
+  // CHECK-DAG:   store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]]
+  // CHECK-DAG:   store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]]
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
+  // CHECK-DAG:   store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]]
+  // CHECK-DAG:   store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]]
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|52}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+
+  // CHECK:       [[ELSE]]:
+  // CHECK-NOT:   getelementptr inbounds [2 x i8*], [2 x i8*]*
+  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 1
+  // CHECK:       getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 2
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       br label %[[EXIT:.+]]
+  // CHECK:       [[EXIT]]:
+
+  #pragma omp target teams distribute simd device(global + a) nowait depend(inout: global, a, bn) if(a)
+  for (int i = 0; i < *plocal; ++i) {
+    static int local1;
+    *plocal = global;
+    local1 = global;
+  }
+
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+  // CHECK:       getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+  // CHECK:       [[DEP_START:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* %{{.+}}, i32 0, i32 0
+  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+  // CHECK:       call void @__kmpc_omp_wait_deps(%ident_t* [[ID]], i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call void @__kmpc_omp_task_begin_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+  // CHECK:       call void @__kmpc_omp_task_complete_if0(%ident_t* [[ID]], i32 [[GTID]], i8* [[TASK]])
+  #pragma omp target teams distribute simd if(0) firstprivate(global) depend(out:global)
+  for (int i = 0; i < global; ++i) {
+    global += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0:@.+]]()
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, [[TASK_TY0]]* noalias)
+// CHECK:       store void (i8*, ...)* null, void (i8*, ...)** %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0)
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT0]]()
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}})
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[SZT:%.+]] = getelementptr inbounds [2 x i[[SZ]]], [2 x i[[SZ]]]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK:       ret i32 0
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY1__]](i32{{.*}}, [[TASK_TY1__]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
+// CHECK:       [[BP0:%.+]] = load i[[SZ]]*, i[[SZ]]** %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT1]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+// CHECK:       define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, [[TASK_TY2]]* noalias)
+// CHECK:       call void (i8*, ...) %
+// CHECK:       [[BP1_I32:%.+]] = load i32, i32* %
+// CHECK-64:    [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
+// CHECK-64:    store i32 [[BP1_I32]], i32* [[BP1_CAST]],
+// CHECK-32:    store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
+// CHECK:       [[BP1:%.+]] = load i[[SZ]], i[[SZ]]* [[BP1_PTR]],
+// CHECK:       call void [[HVT2]](i[[SZ]] [[BP1]])
+// CHECK:       ret i32 0
+
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp b/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp
index c24c743..9c70d05 100644
--- a/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_device_messages.cpp b/test/OpenMP/target_teams_distribute_simd_device_messages.cpp
index e9246cb..1f47d1c 100644
--- a/test/OpenMP/target_teams_distribute_simd_device_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..874d4fc
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
@@ -0,0 +1,213 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target teams distribute simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target teams distribute simd dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target teams distribute simd dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+#pragma omp target teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target teams distribute simd dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+#pragma omp target teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target teams distribute simd dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp b/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp
index 9b9239d..b9fdc18 100644
--- a/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..34cf1f2
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
@@ -0,0 +1,346 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute simd firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // LAMBDA:  ret
+#pragma omp target teams distribute simd firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute simd firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp
index b9b039e..a4aaba7 100644
--- a/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -124,7 +126,8 @@
 #pragma omp target teams distribute simd firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute simd lastprivate(argc), firstprivate(argc) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute simd lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
 #pragma omp target teams distribute simd firstprivate(argc) map(argc) // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target teams distribute simd' directive}} expected-note {{defined as firstprivate}}
diff --git a/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
index 6cb2ad4..43d8957 100644
--- a/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_is_device_ptr_ast_print.cpp b/test/OpenMP/target_teams_distribute_simd_is_device_ptr_ast_print.cpp
index 61aede4..eb5347d 100644
--- a/test/OpenMP/target_teams_distribute_simd_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -56,7 +60,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target teams distribute simd is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target teams distribute simd is_device_ptr(this->k){{$}}
 // CHECK-NEXT: for (int i = 0; i < 100; i++)
 // CHECK-NEXT: ;
 // CHECK-NEXT: #pragma omp target teams distribute simd is_device_ptr(this->z)
diff --git a/test/OpenMP/target_teams_distribute_simd_is_device_ptr_messages.cpp b/test/OpenMP/target_teams_distribute_simd_is_device_ptr_messages.cpp
index 013b703..3408305 100644
--- a/test/OpenMP/target_teams_distribute_simd_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..fdf6d2f
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
@@ -0,0 +1,401 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target teams distribute simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 [[G_IN:%.+]], i64 [[G1_IN:%.+]], i64 [[SVAR_IN:%.+]], i64 [[SFVAR_IN:%.+]])
+      // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i32 [[SVAR_IN:%.+]], i32 [[SFVAR_IN:%.+]])
+      // LAMBDA-64: [[G_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA-64: [[G1_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA-64: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i32,
+      // LAMBDA-64: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i64,
+      // LAMBDA-32: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i32,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA-64: store i64 [[G_IN]], i64* [[G_PRIVATE_ADDR]],
+      // LAMBDA-32: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[G1_IN]], i64* [[G1_PRIVATE_ADDR]],
+      // LAMBDA-32: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[SVAR_IN]], i64* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA-32: store i32 [[SVAR_IN]], i32* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA-64: store i64 [[SFVAR_IN]], i64* [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA-32: store i32 [[SFVAR_IN]], i32* [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double*
+      // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double*
+      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
+      // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32*
+      // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]],
+      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: store i32 2, i32* %
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA-64: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA-32: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+      [&]() {
+        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+        // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+        g = 2;
+        g1 = 2;
+        svar = 4;
+        sfvar = 8.0;
+        // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+        // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+        // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+        // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+        // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+        // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+        // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]]([2 x i{{[0-9]+}}]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} [[S_VAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32*
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_IN1]], i{{[0-9]+}}* [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32*
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp
index b8b1916..3a9abfb 100644
--- a/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -216,10 +218,12 @@
 #pragma omp target teams distribute simd lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}}
+#pragma omp target teams distribute simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
-#pragma omp target teams distribute simd lastprivate(n) firstprivate(n) // OK
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp b/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp
index 8db57a0..5bcd77d 100644
--- a/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,33 +20,42 @@
 {
   int B = 0;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(::z)
   for (int i = 0; i < 10; ++i) ;
 
 #pragma omp target teams distribute simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(B::ib,B:C1+C2)
   for (int i = 0; i < 10; ++i) ;
 }
@@ -65,6 +76,7 @@
 
 template<int LEN> int test_warn() {
   int ind2 = 0;
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
   #pragma omp target teams distribute simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
   for (int i = 0; i < 100; i++) {
     ind2 += LEN;
@@ -118,15 +130,18 @@
 #pragma omp target teams distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc : 5)
   for (int k = 0; k < argc; ++k) ++k;
 
@@ -139,33 +154,17 @@
 #pragma omp target teams distribute simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(e, g)
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target teams distribute simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target teams distribute simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target teams distribute simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target teams distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -198,15 +197,18 @@
 #pragma omp target teams distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
 #pragma omp target teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target teams distribute simd linear (argc)
   for (int k = 0; k < argc; ++k) ++k;
 
@@ -226,22 +228,6 @@
 #pragma omp target teams distribute simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int i;
-    #pragma omp target teams distribute simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
-
-    #pragma omp target teams distribute simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-  }
-
-#pragma omp target teams distribute simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target teams distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp b/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp
index aae00a4..463c88d 100644
--- a/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s -Wno-openmp-target
 
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -607,7 +609,8 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
-#pragma omp target teams distribute simd lastprivate(s) firstprivate(s)
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
+#pragma omp target teams distribute simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/target_teams_distribute_simd_map_messages.cpp b/test/OpenMP/target_teams_distribute_simd_map_messages.cpp
index 29d48d9..73671d7 100644
--- a/test/OpenMP/target_teams_distribute_simd_map_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_map_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_teams_distribute_simd_messages.cpp b/test/OpenMP/target_teams_distribute_simd_messages.cpp
index aa82c94..6ee8073 100644
--- a/test/OpenMP/target_teams_distribute_simd_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
@@ -62,7 +64,7 @@
 // expected-error@+1 {{unexpected OpenMP clause 'default' in directive '#pragma omp target teams distribute simd'}}
 #pragma omp target teams distribute simd default(none)
   for (int i = 0; i < 10; ++i)
-    ++argc; // expected-error {{ariable 'argc' must have explicitly specified data sharing attributes}}
+    ++argc;
 
   goto L2; // expected-error {{use of undeclared label 'L2'}}
 #pragma omp target teams distribute simd
diff --git a/test/OpenMP/target_teams_distribute_simd_misc_messages.c b/test/OpenMP/target_teams_distribute_simd_misc_messages.c
index c76fdea..397795f 100644
--- a/test/OpenMP/target_teams_distribute_simd_misc_messages.c
+++ b/test/OpenMP/target_teams_distribute_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp target teams distribute simd'}}
 #pragma omp target teams distribute simd
 
@@ -285,15 +287,22 @@
     ;
 
   int x, y, z;
+// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}}
 #pragma omp target teams distribute simd lastprivate(x) firstprivate(x)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}}
 #pragma omp target teams distribute simd lastprivate(x, y) firstprivate(x, y)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}}
 #pragma omp target teams distribute simd lastprivate(x, y, z) firstprivate(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp target teams distribute simd simdlen(64) safelen(8)
+  for (i = 0; i < 16; ++i)
+    ;
 }
 
 void test_loop_messages() {
diff --git a/test/OpenMP/target_teams_distribute_simd_nowait_messages.cpp b/test/OpenMP/target_teams_distribute_simd_nowait_messages.cpp
index 6c2c2a5..25daa81 100644
--- a/test/OpenMP/target_teams_distribute_simd_nowait_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fopenmp %s
 
+// RUN: %clang_cc1 -fsyntax-only -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_num_teams_messages.cpp b/test/OpenMP/target_teams_distribute_simd_num_teams_messages.cpp
index 63926bc..cdc5244 100644
--- a/test/OpenMP/target_teams_distribute_simd_num_teams_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
new file mode 100644
index 0000000..f17550f
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
@@ -0,0 +1,246 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target teams distribute simd private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]]()
+    // LAMBDA:  ret
+#pragma omp target teams distribute simd private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]()
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute simd private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call void @[[TOFFL1:.+]]()
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_private_messages.cpp b/test/OpenMP/target_teams_distribute_simd_private_messages.cpp
index 3d351bd..743ac16 100644
--- a/test/OpenMP/target_teams_distribute_simd_private_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
new file mode 100644
index 0000000..c533bb3
--- /dev/null
+++ b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
@@ -0,0 +1,224 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target teams distribute simd reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target teams distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target teams distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
+// CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
+// CHECK: ret i32 [[RES]]
+
+// CHECK: define{{.*}} void @[[OFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}}** [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_LOAD:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_LOAD]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, i32*{{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i32,
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load i32*, i32** [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i32*{{.+}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR:%.+]] = load i32*, i32** [[TVAR_ADDR]],
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load i32*, i32** [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+#endif
diff --git a/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
index 35d4e12..b77082b 100644
--- a/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp b/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp
index 1f41c53..67b2221 100644
--- a/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_shared_messages.cpp b/test/OpenMP/target_teams_distribute_simd_shared_messages.cpp
index 9f98ab9..3147bae 100644
--- a/test/OpenMP/target_teams_distribute_simd_shared_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp b/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp
index 1b7ad66..6fea25a 100644
--- a/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_distribute_simd_thread_limit_messages.cpp b/test/OpenMP/target_teams_distribute_simd_thread_limit_messages.cpp
index 357f5dd..593650b 100644
--- a/test/OpenMP/target_teams_distribute_simd_thread_limit_messages.cpp
+++ b/test/OpenMP/target_teams_distribute_simd_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_firstprivate_messages.cpp b/test/OpenMP/target_teams_firstprivate_messages.cpp
index 84fc4ab..4927969 100644
--- a/test/OpenMP/target_teams_firstprivate_messages.cpp
+++ b/test/OpenMP/target_teams_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_if_messages.cpp b/test/OpenMP/target_teams_if_messages.cpp
index 0ab8fc3..84c900a 100644
--- a/test/OpenMP/target_teams_if_messages.cpp
+++ b/test/OpenMP/target_teams_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_is_device_ptr_ast_print.cpp b/test/OpenMP/target_teams_is_device_ptr_ast_print.cpp
index 3a9cee5..19fe955 100644
--- a/test/OpenMP/target_teams_is_device_ptr_ast_print.cpp
+++ b/test/OpenMP/target_teams_is_device_ptr_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -49,7 +53,7 @@
 // CHECK-NEXT: int aa[10];
 // CHECK-NEXT: arr &raa = this->aa;
 // CHECK-NEXT: func(
-// CHECK-NEXT: #pragma omp target teams is_device_ptr(this->k)
+// CHECK-NEXT: #pragma omp target teams is_device_ptr(this->k){{$}}
 // CHECK-NEXT: {
 // CHECK-NEXT: }
 // CHECK-NEXT: #pragma omp target teams is_device_ptr(this->z)
diff --git a/test/OpenMP/target_teams_is_device_ptr_messages.cpp b/test/OpenMP/target_teams_is_device_ptr_messages.cpp
index 0f5be43..8ae6d7f 100644
--- a/test/OpenMP/target_teams_is_device_ptr_messages.cpp
+++ b/test/OpenMP/target_teams_is_device_ptr_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -verify -fopenmp -ferror-limit 200 %s
+
+// RUN: %clang_cc1 -std=c++11 -verify -fopenmp-simd -ferror-limit 200 %s
 struct ST {
   int *a;
 };
diff --git a/test/OpenMP/target_teams_map_messages.cpp b/test/OpenMP/target_teams_map_messages.cpp
index ae5f7bc..57d9396 100644
--- a/test/OpenMP/target_teams_map_messages.cpp
+++ b/test/OpenMP/target_teams_map_messages.cpp
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 200 %s
-// rUN: %clang_cc1 -DCCODE -verify -fopenmp -ferror-limit 200 -x c %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 200 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 200 %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCCODE -verify -fopenmp -ferror-limit 200 -x c %s -Wno-openmp-target
 #ifdef CCODE
 void foo(int arg) {
   const int n = 0;
@@ -223,9 +225,17 @@
   {}
   #pragma omp target teams map(r.ArrS[0].A, t.ArrS[1].A)
   {}
-  #pragma omp target teams map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer derreferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  #pragma omp target teams map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
   {}
-  #pragma omp target teams map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer derreferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  #pragma omp target teams map(r.PtrS, r.PtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}}
+  {}
+  #pragma omp target teams map(r.PtrS->A, r.PtrS->B)
+  {}
+  #pragma omp target teams map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}}
+  {}
+  #pragma omp target teams map(r.RPtrS, r.RPtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}}
+  {}
+  #pragma omp target teams map(r.RPtrS->A, r.RPtrS->B)
   {}
   #pragma omp target teams map(r.S.Arr[:12])
   {}
@@ -265,7 +275,7 @@
   {}
   #pragma omp target teams map((p+1)->A)  // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}}
   {}
-  #pragma omp target teams map(u.B)  // expected-error {{mapped storage cannot be derived from a union}}
+  #pragma omp target teams map(u.B)  // expected-error {{mapping of union members is not allowed}}
   {}
 
   #pragma omp target data map(to: r.C) //expected-note {{used here}}
diff --git a/test/OpenMP/target_teams_messages.cpp b/test/OpenMP/target_teams_messages.cpp
index 7fc7d1f..3a367bf 100644
--- a/test/OpenMP/target_teams_messages.cpp
+++ b/test/OpenMP/target_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_nowait_messages.cpp b/test/OpenMP/target_teams_nowait_messages.cpp
index 9cf8cc3..aeb1829 100644
--- a/test/OpenMP/target_teams_nowait_messages.cpp
+++ b/test/OpenMP/target_teams_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_num_teams_codegen.cpp b/test/OpenMP/target_teams_num_teams_codegen.cpp
index 068f76e..d5b83ac 100644
--- a/test/OpenMP/target_teams_num_teams_codegen.cpp
+++ b/test/OpenMP/target_teams_num_teams_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -147,7 +165,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -158,7 +176,7 @@
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1024, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1024, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -186,7 +204,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -207,7 +225,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -225,7 +243,7 @@
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -247,7 +265,7 @@
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/test/OpenMP/target_teams_num_teams_messages.cpp b/test/OpenMP/target_teams_num_teams_messages.cpp
index e220fb6..9134af6 100644
--- a/test/OpenMP/target_teams_num_teams_messages.cpp
+++ b/test/OpenMP/target_teams_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_private_messages.cpp b/test/OpenMP/target_teams_private_messages.cpp
index 69551ff..7ee509c 100644
--- a/test/OpenMP/target_teams_private_messages.cpp
+++ b/test/OpenMP/target_teams_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_reduction_messages.cpp b/test/OpenMP/target_teams_reduction_messages.cpp
index b5876b1..c62f2c5 100644
--- a/test/OpenMP/target_teams_reduction_messages.cpp
+++ b/test/OpenMP/target_teams_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_shared_messages.cpp b/test/OpenMP/target_teams_shared_messages.cpp
index 84e412e..3fb8496 100644
--- a/test/OpenMP/target_teams_shared_messages.cpp
+++ b/test/OpenMP/target_teams_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_teams_thread_limit_codegen.cpp b/test/OpenMP/target_teams_thread_limit_codegen.cpp
index c56d142..01efd12 100644
--- a/test/OpenMP/target_teams_thread_limit_codegen.cpp
+++ b/test/OpenMP/target_teams_thread_limit_codegen.cpp
@@ -6,6 +6,14 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
@@ -16,6 +24,16 @@
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -147,7 +165,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -158,7 +176,7 @@
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 0, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 0, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -195,7 +213,7 @@
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR1]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR2]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -216,7 +234,7 @@
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -234,7 +252,7 @@
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -256,7 +274,7 @@
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/test/OpenMP/target_teams_thread_limit_messages.cpp b/test/OpenMP/target_teams_thread_limit_messages.cpp
index 2685f24..4b0a4b0 100644
--- a/test/OpenMP/target_teams_thread_limit_messages.cpp
+++ b/test/OpenMP/target_teams_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_update_ast_print.cpp b/test/OpenMP/target_update_ast_print.cpp
index cbff907..e60e081 100644
--- a/test/OpenMP/target_update_ast_print.cpp
+++ b/test/OpenMP/target_update_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -21,7 +25,7 @@
 // CHECK:      static T a;
 // CHECK-NEXT: U b;
 // CHECK-NEXT: int l;
-// CHECK-NEXT: #pragma omp target update to(a) if(l > 5) device(l) nowait depend(inout : l)
+// CHECK-NEXT: #pragma omp target update to(a) if(l > 5) device(l) nowait depend(inout : l){{$}}
 // CHECK-NEXT: #pragma omp target update from(b) if(l < 5) device(l - 1) nowait depend(inout : l)
 // CHECK:      static int a;
 // CHECK-NEXT: float b;
diff --git a/test/OpenMP/target_update_codegen.cpp b/test/OpenMP/target_update_codegen.cpp
index f1e61a5..ee76525 100644
--- a/test/OpenMP/target_update_codegen.cpp
+++ b/test/OpenMP/target_update_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 // CK1: [[ST:%.+]] = type { i32, double* }
@@ -22,15 +30,15 @@
 double gc[100];
 
 // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800]
-// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
-// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
-// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 34]
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
-// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
 
 // CK1-LABEL: _Z3fooi
 void foo(int arg) {
@@ -38,8 +46,9 @@
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_update(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
-  // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+  // CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+  // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -51,7 +60,7 @@
   // CK1-DAG: store [100 x double]* @gc, [100 x double]** [[PC0]]
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
-  #pragma omp target update if(1+3-5) device(arg) from(gc)
+  #pragma omp target update if(1+3-5) device(arg) from(gc) nowait
   {++arg;}
 
   // Region 01
@@ -62,7 +71,7 @@
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_update(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -85,7 +94,7 @@
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_update(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -108,7 +117,7 @@
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_update(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -142,6 +151,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: [[ST:%.+]] = type { i32, double* }
@@ -159,7 +176,7 @@
 };
 
 // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24]
-// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 34, i32 18]
+// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 34, i64 18]
 
 // CK2-LABEL: _Z3bari
 int bar(int arg){
@@ -170,8 +187,9 @@
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_update(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
-// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}},
+// CK2-DAG: call void @__tgt_target_data_update(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -208,6 +226,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3-LABEL: no_target_devices
@@ -227,6 +253,14 @@
 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
 
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-64
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
@@ -235,6 +269,16 @@
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-32
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCK4 --check-prefix TCK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// SIMD-ONLY4-NOT: {{__kmpc|__tgt}}
 #ifdef CK4
 
 // CK4-LABEL: device_side_scan
diff --git a/test/OpenMP/target_update_depend_codegen.cpp b/test/OpenMP/target_update_depend_codegen.cpp
new file mode 100644
index 0000000..550d31f
--- /dev/null
+++ b/test/OpenMP/target_update_depend_codegen.cpp
@@ -0,0 +1,386 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+// CK1: [[ST:%.+]] = type { i32, double* }
+// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]],
+// CK1-SAME: i[[sz]], i8 }
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+struct ST {
+  T a;
+  double *b;
+};
+
+ST<int> gb;
+double gc[100];
+
+// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800]
+// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+
+// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4]
+// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+
+// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24]
+// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+
+// CK1-LABEL: _Z3fooi
+void foo(int arg) {
+  int la;
+  float lb[arg];
+
+  // CK1: alloca [1 x %struct.kmp_depend_info],
+  // CK1: alloca [3 x %struct.kmp_depend_info],
+  // CK1: alloca [4 x %struct.kmp_depend_info],
+  // CK1: alloca [5 x %struct.kmp_depend_info],
+
+  // Region 00
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]**
+  // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}}
+  // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null)
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target update if(1+3-5) device(arg) from(gc) nowait depend(in: arg)
+  {++arg;}
+
+  // Region 01
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target update to(la) if(1+3-4) depend(in: la) depend(out: arg)
+  {++arg;}
+
+  // Region 02
+  // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CK1: [[IFTHEN]]
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK1: store i32* [[ARG]], i32** [[P0_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0
+  // CK1: [[IF:%.+]] = load i8, i8* %{{.+}}
+  // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
+  // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
+  // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
+  // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]],
+  // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false)
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+
+  // CK1: br label %[[IFEND:[^,]+]]
+
+  // CK1: [[IFELSE]]
+  // CK1: br label %[[IFEND]]
+  // CK1: [[IFEND]]
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  #pragma omp target update to(arg) if(arg) device(4) depend(inout: arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 03
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float**
+  // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float**
+  // CK1: store float* [[VLA]], float** [[P0_BC]],
+  // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0
+  // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 3, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target update from(lb) depend(out: lb, arg, la, gc)
+  {++arg;}
+
+  // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
+  {++arg;}
+
+  // Region 04
+  // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0
+  // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST**
+  // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]],
+  // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
+  // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
+  // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
+  // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double**
+  // CK1: store double* %{{.+}}, double** [[P1_BC]],
+  // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
+  // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
+  // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
+  // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
+  // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
+  // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
+  // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8*
+  // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
+  // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
+  // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4
+  // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0
+  // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]]
+  // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]],
+  // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1
+  // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]],
+  // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2
+  // CK1: store i8 1, i8* [[DEP_ATTRS]]
+  // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0
+  // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8*
+  // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null)
+  // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]])
+  // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  #pragma omp target update to(gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg)
+  {++arg;}
+}
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias)
+// CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
+// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias)
+// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
+// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+
+// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]],
+// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]],
+// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]])
+// CK1-NOT: __tgt_target_data_end
+// CK1: ret i32 0
+// CK1: }
+
+#endif
diff --git a/test/OpenMP/target_update_depend_messages.cpp b/test/OpenMP/target_update_depend_messages.cpp
index 59e159e..d52f566 100644
--- a/test/OpenMP/target_update_depend_messages.cpp
+++ b/test/OpenMP/target_update_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_update_device_messages.cpp b/test/OpenMP/target_update_device_messages.cpp
index 3711275..228b4a8 100644
--- a/test/OpenMP/target_update_device_messages.cpp
+++ b/test/OpenMP/target_update_device_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_update_from_messages.cpp b/test/OpenMP/target_update_from_messages.cpp
index c42bd12..e5a1f04 100644
--- a/test/OpenMP/target_update_from_messages.cpp
+++ b/test/OpenMP/target_update_from_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_update_if_messages.cpp b/test/OpenMP/target_update_if_messages.cpp
index 3d076a6..4f229a7 100644
--- a/test/OpenMP/target_update_if_messages.cpp
+++ b/test/OpenMP/target_update_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_update_messages.cpp b/test/OpenMP/target_update_messages.cpp
index 73f1eec..da71ff6 100644
--- a/test/OpenMP/target_update_messages.cpp
+++ b/test/OpenMP/target_update_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/target_update_nowait_messages.cpp b/test/OpenMP/target_update_nowait_messages.cpp
index 19bc58e..fb3d035 100644
--- a/test/OpenMP/target_update_nowait_messages.cpp
+++ b/test/OpenMP/target_update_nowait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 int main(int argc, char **argv) {
   int i;
 
diff --git a/test/OpenMP/target_update_to_messages.cpp b/test/OpenMP/target_update_to_messages.cpp
index 9bde51e..58f6aee 100644
--- a/test/OpenMP/target_update_to_messages.cpp
+++ b/test/OpenMP/target_update_to_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/target_vla_messages.cpp b/test/OpenMP/target_vla_messages.cpp
new file mode 100644
index 0000000..b744081
--- /dev/null
+++ b/test/OpenMP/target_vla_messages.cpp
@@ -0,0 +1,206 @@
+// PowerPC supports VLAs.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o %t-ppc-device.ll
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o %t-ppc-device.ll
+
+// Nvidia GPUs don't support VLAs.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-nvptx.bc
+// RUN: %clang_cc1 -verify -DNO_VLA -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-nvptx.bc -o %t-nvptx-device.ll
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-nvptx.bc
+
+#ifndef NO_VLA
+// expected-no-diagnostics
+#endif
+
+#pragma omp declare target
+void declare(int arg) {
+  int a[2];
+#ifdef NO_VLA
+  // expected-error@+2 {{variable length arrays are not supported for the current target}}
+#endif
+  int vla[arg];
+}
+
+void declare_parallel_reduction(int arg) {
+  int a[2];
+
+#pragma omp parallel reduction(+: a)
+  { }
+
+#pragma omp parallel reduction(+: a[0:2])
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+3 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+2 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp parallel reduction(+: a[0:arg])
+  { }
+}
+#pragma omp end declare target
+
+template <typename T>
+void target_template(int arg) {
+#pragma omp target
+  {
+#ifdef NO_VLA
+    // expected-error@+2 {{variable length arrays are not supported for the current target}}
+#endif
+    T vla[arg];
+  }
+}
+
+void target(int arg) {
+#pragma omp target
+  {
+#ifdef NO_VLA
+    // expected-error@+2 {{variable length arrays are not supported for the current target}}
+#endif
+    int vla[arg];
+  }
+
+#pragma omp target
+  {
+#pragma omp parallel
+    {
+#ifdef NO_VLA
+    // expected-error@+2 {{variable length arrays are not supported for the current target}}
+#endif
+      int vla[arg];
+    }
+  }
+
+  target_template<long>(arg);
+}
+
+void teams_reduction(int arg) {
+  int a[2];
+  int vla[arg];
+
+#pragma omp target map(a)
+#pragma omp teams reduction(+: a)
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla)
+#pragma omp teams reduction(+: vla)
+  { }
+
+#pragma omp target map(a[0:2])
+#pragma omp teams reduction(+: a[0:2])
+  { }
+
+#pragma omp target map(vla[0:2])
+#pragma omp teams reduction(+: vla[0:2])
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(a[0:arg])
+#pragma omp teams reduction(+: a[0:arg])
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla[0:arg])
+#pragma omp teams reduction(+: vla[0:arg])
+  { }
+}
+
+void parallel_reduction(int arg) {
+  int a[2];
+  int vla[arg];
+
+#pragma omp target map(a)
+#pragma omp parallel reduction(+: a)
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla)
+#pragma omp parallel reduction(+: vla)
+  { }
+
+#pragma omp target map(a[0:2])
+#pragma omp parallel reduction(+: a[0:2])
+  { }
+
+#pragma omp target map(vla[0:2])
+#pragma omp parallel reduction(+: vla[0:2])
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(a[0:arg])
+#pragma omp parallel reduction(+: a[0:arg])
+  { }
+
+#ifdef NO_VLA
+  // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+3 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla[0:arg])
+#pragma omp parallel reduction(+: vla[0:arg])
+  { }
+}
+
+void for_reduction(int arg) {
+  int a[2];
+  int vla[arg];
+
+#pragma omp target map(a)
+#pragma omp parallel
+#pragma omp for reduction(+: a)
+  for (int i = 0; i < arg; i++) ;
+
+#ifdef NO_VLA
+  // expected-error@+5 {{cannot generate code for reduction on variable length array}}
+  // expected-note@+4 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla)
+#pragma omp parallel
+#pragma omp for reduction(+: vla)
+  for (int i = 0; i < arg; i++) ;
+
+#pragma omp target map(a[0:2])
+#pragma omp parallel
+#pragma omp for reduction(+: a[0:2])
+  for (int i = 0; i < arg; i++) ;
+
+#pragma omp target map(vla[0:2])
+#pragma omp parallel
+#pragma omp for reduction(+: vla[0:2])
+  for (int i = 0; i < arg; i++) ;
+
+#ifdef NO_VLA
+  // expected-error@+5 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+4 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(a[0:arg])
+#pragma omp parallel
+#pragma omp for reduction(+: a[0:arg])
+  for (int i = 0; i < arg; i++) ;
+
+#ifdef NO_VLA
+  // expected-error@+5 {{cannot generate code for reduction on array section, which requires a variable length array}}
+  // expected-note@+4 {{variable length arrays are not supported for the current target}}
+#endif
+#pragma omp target map(vla[0:arg])
+#pragma omp parallel
+#pragma omp for reduction(+: vla[0:arg])
+  for (int i = 0; i < arg; i++) ;
+}
diff --git a/test/OpenMP/task_ast_print.cpp b/test/OpenMP/task_ast_print.cpp
index 54c05d8..01ffda9 100644
--- a/test/OpenMP/task_ast_print.cpp
+++ b/test/OpenMP/task_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -38,7 +42,7 @@
 };
 
 // CHECK: #pragma omp taskgroup task_reduction(+: this->b)
-// CHECK: #pragma omp task private(this->a) private(this->a) private(T::a) in_reduction(+: this->b)
+// CHECK: #pragma omp task private(this->a) private(this->a) private(T::a) in_reduction(+: this->b){{$}}
 // CHECK: #pragma omp task private(this->a) private(this->a)
 // CHECK: #pragma omp task private(this->a) private(this->a) private(this->S1::a)
 
diff --git a/test/OpenMP/task_codegen.c b/test/OpenMP/task_codegen.c
index 579a6f1..6669c7e 100644
--- a/test/OpenMP/task_codegen.c
+++ b/test/OpenMP/task_codegen.c
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/task_codegen.cpp b/test/OpenMP/task_codegen.cpp
index 4743ca3..fefe8b4 100644
--- a/test/OpenMP/task_codegen.cpp
+++ b/test/OpenMP/task_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -33,7 +38,7 @@
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[BITCAST]], i64 16, i1 false)
 // CHECK: [[PRIORITY_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 4
 // CHECK: [[PRIORITY:%.+]] = bitcast %union{{.+}}* [[PRIORITY_REF_PTR]] to i32*
 // CHECK: store i32 {{.+}}, i32* [[PRIORITY]]
@@ -50,7 +55,7 @@
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 8, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[BITCAST]], i64 8, i1 false)
 // CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES:%.*]], i64 0, i64 0
 // CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0
 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64* [[T0]]
diff --git a/test/OpenMP/task_default_messages.cpp b/test/OpenMP/task_default_messages.cpp
index b3df295..45bd8df 100644
--- a/test/OpenMP/task_default_messages.cpp
+++ b/test/OpenMP/task_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/task_depend_messages.cpp b/test/OpenMP/task_depend_messages.cpp
index 1e51b47..469785e 100644
--- a/test/OpenMP/task_depend_messages.cpp
+++ b/test/OpenMP/task_depend_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_final_messages.cpp b/test/OpenMP/task_final_messages.cpp
index a217ea2..49b9bfe 100644
--- a/test/OpenMP/task_final_messages.cpp
+++ b/test/OpenMP/task_final_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_firstprivate_codegen.cpp b/test/OpenMP/task_firstprivate_codegen.cpp
index 7c15a18..09b1dea 100644
--- a/test/OpenMP/task_firstprivate_codegen.cpp
+++ b/test/OpenMP/task_firstprivate_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32.
 // REQUIRES: shell
@@ -199,7 +207,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -353,7 +361,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
diff --git a/test/OpenMP/task_firstprivate_messages.cpp b/test/OpenMP/task_firstprivate_messages.cpp
index 11d8c57..1a64f2a 100644
--- a/test/OpenMP/task_firstprivate_messages.cpp
+++ b/test/OpenMP/task_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_if_codegen.cpp b/test/OpenMP/task_if_codegen.cpp
index 5952085..6f02942 100644
--- a/test/OpenMP/task_if_codegen.cpp
+++ b/test/OpenMP/task_if_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/task_if_messages.cpp b/test/OpenMP/task_if_messages.cpp
index 89b10f7..fca57bb 100644
--- a/test/OpenMP/task_if_messages.cpp
+++ b/test/OpenMP/task_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_in_reduction_codegen.cpp b/test/OpenMP/task_in_reduction_codegen.cpp
index 2ad013a..30ba9a1 100644
--- a/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/test/OpenMP/task_in_reduction_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/task_in_reduction_message.cpp b/test/OpenMP/task_in_reduction_message.cpp
index e176dc9..d99d2e4 100644
--- a/test/OpenMP/task_in_reduction_message.cpp
+++ b/test/OpenMP/task_in_reduction_message.cpp
@@ -2,6 +2,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_messages.cpp b/test/OpenMP/task_messages.cpp
index f42a37a..832d8fa 100644
--- a/test/OpenMP/task_messages.cpp
+++ b/test/OpenMP/task_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_priority_messages.cpp b/test/OpenMP/task_priority_messages.cpp
index 8a5553e..2e184ce 100644
--- a/test/OpenMP/task_priority_messages.cpp
+++ b/test/OpenMP/task_priority_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_private_codegen.cpp b/test/OpenMP/task_private_codegen.cpp
index 0217499..1484c6f 100644
--- a/test/OpenMP/task_private_codegen.cpp
+++ b/test/OpenMP/task_private_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32. Investigating.
 // REQUIRES: shell
diff --git a/test/OpenMP/task_private_messages.cpp b/test/OpenMP/task_private_messages.cpp
index 6bce135..5663a43 100644
--- a/test/OpenMP/task_private_messages.cpp
+++ b/test/OpenMP/task_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/task_shared_messages.cpp b/test/OpenMP/task_shared_messages.cpp
index bf3b8ba..bdd46ab 100644
--- a/test/OpenMP/task_shared_messages.cpp
+++ b/test/OpenMP/task_shared_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskgroup_ast_print.cpp b/test/OpenMP/taskgroup_ast_print.cpp
index 4292eed..a15600c 100644
--- a/test/OpenMP/taskgroup_ast_print.cpp
+++ b/test/OpenMP/taskgroup_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -70,7 +74,7 @@
 // CHECK: static int a;
 #pragma omp taskgroup
   a=2;
-// CHECK-NEXT: #pragma omp taskgroup
+// CHECK-NEXT: #pragma omp taskgroup{{$}}
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: ++a;
   ++a;
diff --git a/test/OpenMP/taskgroup_codegen.cpp b/test/OpenMP/taskgroup_codegen.cpp
index 3dd41a1..f672ab1 100644
--- a/test/OpenMP/taskgroup_codegen.cpp
+++ b/test/OpenMP/taskgroup_codegen.cpp
@@ -2,6 +2,12 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -40,7 +46,7 @@
 void parallel_taskgroup() {
 #pragma omp parallel
 #pragma omp taskgroup
-  // TERM_DEBUG:     __kmpc_global_thread_num
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_taskgroup({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
diff --git a/test/OpenMP/taskgroup_messages.cpp b/test/OpenMP/taskgroup_messages.cpp
index 2382073..dfb4725 100644
--- a/test/OpenMP/taskgroup_messages.cpp
+++ b/test/OpenMP/taskgroup_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 int foo();
 
 int main() {
diff --git a/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/test/OpenMP/taskgroup_task_reduction_codegen.cpp
index 884d002..bf0dd9b 100644
--- a/test/OpenMP/taskgroup_task_reduction_codegen.cpp
+++ b/test/OpenMP/taskgroup_task_reduction_codegen.cpp
@@ -1,10 +1,20 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK --check-prefix=DEBUG
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple x86_64-apple-darwin10 | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
+// CHECK-DAG: @reduction_size.[[ID:.+]]_[[CID:[0-9]+]].artificial.
+// CHECK-DAG: @reduction_size.[[ID]]_[[CID]].artificial..cache.
+
 struct S {
   int a;
   S() : a(0) {}
@@ -14,7 +24,6 @@
   friend S operator+(const S&a, const S&b) {return a;}
 };
 
-
 int main(int argc, char **argv) {
   int a;
   float b;
@@ -34,7 +43,7 @@
 // CHECK:       [[A:%.+]] = alloca i32,
 // CHECK:       [[B:%.+]] = alloca float,
 // CHECK:       [[C:%.+]] = alloca [5 x %struct.S],
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* @0)
+// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t*
 // CHECK:       [[RD_IN1:%.+]] = alloca [3 x [[T1:%[^,]+]]],
 // CHECK:       [[TD1:%.+]] = alloca i8*,
 // CHECK:       [[RD_IN2:%.+]] = alloca [2 x [[T2:%[^,]+]]],
@@ -42,7 +51,7 @@
 
 // CHECK:       [[VLA:%.+]] = alloca i16, i64 [[VLA_SIZE:%[^,]+]],
 
-// CHECK:       call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK:       call void @__kmpc_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]])
 // CHECK-DAG:   [[BC_A:%.+]] = bitcast i32* [[A]] to i8*
 // CHECK-DAG:   store i8* [[BC_A]], i8** [[A_REF:[^,]+]],
 // CHECK-DAG:   [[A_REF]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA:%[^,]+]], i32 0, i32 0
@@ -50,14 +59,14 @@
 // CHECK-DAG:   [[TMP6:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 1
 // CHECK-DAG:   store i64 4, i64* [[TMP6]],
 // CHECK-DAG:   [[TMP7:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[AINIT:@.+]] to i8*), i8** [[TMP7]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[AINIT:.+]] to i8*), i8** [[TMP7]],
 // CHECK-DAG:   [[TMP8:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 3
 // CHECK-DAG:   store i8* null, i8** [[TMP8]],
 // CHECK-DAG:   [[TMP9:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 4
-// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* [[ACOMB:@.+]] to i8*), i8** [[TMP9]],
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[ACOMB:.+]] to i8*), i8** [[TMP9]],
 // CHECK-DAG:   [[TMP10:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 5
 // CHECK-DAG:   [[TMP11:%.+]] = bitcast i32* [[TMP10]] to i8*
-// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* [[TMP11]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP11]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP13:%.+]] = bitcast float* [[B]] to i8*
 // CHECK-DAG:   store i8* [[TMP13]], i8** [[TMP12:%[^,]+]],
 // CHECK-DAG:   [[TMP12]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB:%[^,]+]], i32 0, i32 0
@@ -65,14 +74,14 @@
 // CHECK-DAG:   [[TMP14:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 1
 // CHECK-DAG:   store i64 4, i64* [[TMP14]],
 // CHECK-DAG:   [[TMP15:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[BINIT:@.+]] to i8*), i8** [[TMP15]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[BINIT:.+]] to i8*), i8** [[TMP15]],
 // CHECK-DAG:   [[TMP16:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 3
 // CHECK-DAG:   store i8* null, i8** [[TMP16]],
 // CHECK-DAG:   [[TMP17:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 4
-// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* [[BCOMB:@.+]] to i8*), i8** [[TMP17]],
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[BCOMB:.+]] to i8*), i8** [[TMP17]],
 // CHECK-DAG:   [[TMP18:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 5
 // CHECK-DAG:   [[TMP19:%.+]] = bitcast i32* [[TMP18]] to i8*
-// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* [[TMP19]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP19]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP21:%.+]] = bitcast i32* [[ARGC_ADDR]] to i8*
 // CHECK-DAG:   store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
 // CHECK-DAG:   [[TMP20]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC:%[^,]+]], i32 0, i32 0
@@ -80,18 +89,19 @@
 // CHECK-DAG:   [[TMP22:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 1
 // CHECK-DAG:   store i64 4, i64* [[TMP22]],
 // CHECK-DAG:   [[TMP23:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[ARGCINIT:@.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[ARGCINIT:.+]] to i8*), i8** [[TMP23]],
 // CHECK-DAG:   [[TMP24:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 3
 // CHECK-DAG:   store i8* null, i8** [[TMP24]],
 // CHECK-DAG:   [[TMP25:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 4
-// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* [[ARGCCOMB:@.+]] to i8*), i8** [[TMP25]],
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[ARGCCOMB:.+]] to i8*), i8** [[TMP25]],
 // CHECK-DAG:   [[TMP26:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 5
 // CHECK-DAG:   [[TMP27:%.+]] = bitcast i32* [[TMP26]] to i8*
-// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP28:%.+]] = bitcast [3 x [[T1]]]* [[RD_IN1]] to i8*
 // CHECK-DAG:   [[TMP29:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* [[TMP28]])
+// DEBUG-DAG:   call void @llvm.dbg.declare(metadata i8** [[TD1]],
 // CHECK-DAG:   store i8* [[TMP29]], i8** [[TD1]],
-// CHECK-DAG:   call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK-DAG:   call void @__kmpc_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]])
 // CHECK-DAG:   [[TMP31:%.+]] = bitcast [5 x %struct.S]* [[C]] to i8*
 // CHECK-DAG:   store i8* [[TMP31]], i8** [[TMP30:%[^,]+]],
 // CHECK-DAG:   [[TMP30]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC:%[^,]+]], i32 0, i32 0
@@ -99,14 +109,14 @@
 // CHECK-DAG:   [[TMP32:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 1
 // CHECK-DAG:   store i64 20, i64* [[TMP32]],
 // CHECK-DAG:   [[TMP33:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[CINIT:@.+]] to i8*), i8** [[TMP33]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[CINIT:.+]] to i8*), i8** [[TMP33]],
 // CHECK-DAG:   [[TMP34:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 3
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[CFINI:@.+]] to i8*), i8** [[TMP34]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[CFINI:.+]] to i8*), i8** [[TMP34]],
 // CHECK-DAG:   [[TMP35:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 4
-// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* [[CCOMB:@.+]] to i8*), i8** [[TMP35]],
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[CCOMB:.+]] to i8*), i8** [[TMP35]],
 // CHECK-DAG:   [[TMP36:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 5
 // CHECK-DAG:   [[TMP37:%.+]] = bitcast i32* [[TMP36]] to i8*
-// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* [[TMP37]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP39:%.+]] = bitcast i16* [[VLA]] to i8*
 // CHECK-DAG:   store i8* [[TMP39]], i8** [[TMP38:%[^,]+]],
 // CHECK-DAG:   [[TMP38]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA:%[^,]+]], i32 0, i32 0
@@ -116,67 +126,67 @@
 // CHECK-DAG:   [[TMP42:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 1
 // CHECK-DAG:   store i64 [[TMP40]], i64* [[TMP42]],
 // CHECK-DAG:   [[TMP43:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* [[VLAINIT:@.+]] to i8*), i8** [[TMP43]],
+// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[VLAINIT:.+]] to i8*), i8** [[TMP43]],
 // CHECK-DAG:   [[TMP44:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 3
 // CHECK-DAG:   store i8* null, i8** [[TMP44]],
 // CHECK-DAG:   [[TMP45:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 4
-// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* [[VLACOMB:@.+]] to i8*), i8** [[TMP45]],
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[VLACOMB:.+]] to i8*), i8** [[TMP45]],
 // CHECK-DAG:   [[TMP46:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 5
 // CHECK-DAG:   store i32 1, i32* [[TMP46]],
 // CHECK:       [[TMP47:%.+]] = bitcast [2 x [[T2]]]* [[RD_IN2]] to i8*
 // CHECK:       [[TMP48:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* [[TMP47]])
 // CHECK:       store i8* [[TMP48]], i8** [[TD2]],
-// CHECK:       call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
-// CHECK:       call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK:       call void @__kmpc_end_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]])
+// CHECK:       call void @__kmpc_end_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]])
 
-// CHECK-DAG: define internal void [[AINIT]](i8*)
+// CHECK-DAG: define internal void @[[AINIT]](i8*)
 // CHECK-DAG: store i32 0, i32* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[ACOMB]](i8*, i8*)
+// CHECK-DAG: define internal void @[[ACOMB]](i8*, i8*)
 // CHECK-DAG: add nsw i32 %
 // CHECK-DAG: store i32 %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[BINIT]](i8*)
+// CHECK-DAG: define internal void @[[BINIT]](i8*)
 // CHECK-DAG: store float 0.000000e+00, float* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[BCOMB]](i8*, i8*)
+// CHECK-DAG: define internal void @[[BCOMB]](i8*, i8*)
 // CHECK-DAG: fadd float %
 // CHECK-DAG: store float %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[ARGCINIT]](i8*)
+// CHECK-DAG: define internal void @[[ARGCINIT]](i8*)
 // CHECK-DAG: store i32 0, i32* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[ARGCCOMB]](i8*, i8*)
+// CHECK-DAG: define internal void @[[ARGCCOMB]](i8*, i8*)
 // CHECK-DAG: add nsw i32 %
 // CHECK-DAG: store i32 %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[CINIT]](i8*)
+// CHECK-DAG: define internal void @[[CINIT]](i8*)
 // CHECK-DAG: phi %struct.S* [
 // CHECK-DAG: call {{.+}}(%struct.S* {{.+}})
 // CHECK-DAG: br i1 %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[CFINI]](i8*)
+// CHECK-DAG: define internal void @[[CFINI]](i8*)
 // CHECK-DAG: phi %struct.S* [
 // CHECK-DAG: call {{.+}}(%struct.S* {{.+}})
 // CHECK-DAG: br i1 %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[CCOMB]](i8*, i8*)
+// CHECK-DAG: define internal void @[[CCOMB]](i8*, i8*)
 // CHECK-DAG: phi %struct.S* [
 // CHECK-DAG: phi %struct.S* [
 // CHECK-DAG: call {{.+}}(%struct.S* {{.+}}, %struct.S* {{.+}}, %struct.S* {{.+}})
@@ -186,18 +196,18 @@
 // CHECK-DAG: ret void
 // CHECK_DAG: }
 
-// CHECK-DAG: define internal void [[VLAINIT]](i8*)
-// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* @0)
-// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t* @0, i32 %
+// CHECK-DAG: define internal void @[[VLAINIT]](i8*)
+// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* {{[^,]+}})
+// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t*
 // CHECK-DAG: phi i16* [
 // CHECK-DAG: store i16 0, i16* %
 // CHECK-DAG: br i1 %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void [[VLACOMB]](i8*, i8*)
-// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* @0)
-// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t* @0, i32 %
+// CHECK-DAG: define internal void @[[VLACOMB]](i8*, i8*)
+// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* {{[^,]+}})
+// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t*
 // CHECK-DAG: phi i16* [
 // CHECK-DAG: phi i16* [
 // CHECK-DAG: sext i16 %{{.+}} to i32
@@ -208,3 +218,16 @@
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 #endif
+
+// DEBUG-LABEL: distinct !DICompileUnit
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[AINIT]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[ACOMB]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[BINIT]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[BCOMB]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[ARGCINIT]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[ARGCCOMB]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[CINIT]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[CFINI]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[CCOMB]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[VLAINIT]]",
+// DEBUG-DAG: distinct !DISubprogram(linkageName: "[[VLACOMB]]",
diff --git a/test/OpenMP/taskgroup_task_reduction_messages.cpp b/test/OpenMP/taskgroup_task_reduction_messages.cpp
index 819dc5b..59ff219 100644
--- a/test/OpenMP/taskgroup_task_reduction_messages.cpp
+++ b/test/OpenMP/taskgroup_task_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_ast_print.cpp b/test/OpenMP/taskloop_ast_print.cpp
index 891b31d..97f797b 100644
--- a/test/OpenMP/taskloop_ast_print.cpp
+++ b/test/OpenMP/taskloop_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -16,7 +20,7 @@
 #pragma omp taskgroup task_reduction(+: d)
 #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) in_reduction(+: d)
   // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d)
-  // CHECK-NEXT: #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) in_reduction(+: d)
+  // CHECK-NEXT: #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) in_reduction(+: d){{$}}
   for (int i = 0; i < 2; ++i)
     a = 2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
diff --git a/test/OpenMP/taskloop_codegen.cpp b/test/OpenMP/taskloop_codegen.cpp
index 94cf536..ace83ea 100644
--- a/test/OpenMP/taskloop_codegen.cpp
+++ b/test/OpenMP/taskloop_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/taskloop_collapse_messages.cpp b/test/OpenMP/taskloop_collapse_messages.cpp
index 1a5620e..4de0f38 100644
--- a/test/OpenMP/taskloop_collapse_messages.cpp
+++ b/test/OpenMP/taskloop_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_final_messages.cpp b/test/OpenMP/taskloop_final_messages.cpp
index a2d7cda..d53b123 100644
--- a/test/OpenMP/taskloop_final_messages.cpp
+++ b/test/OpenMP/taskloop_final_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_firstprivate_codegen.cpp b/test/OpenMP/taskloop_firstprivate_codegen.cpp
index 6e84530..d617835 100644
--- a/test/OpenMP/taskloop_firstprivate_codegen.cpp
+++ b/test/OpenMP/taskloop_firstprivate_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32.
 // REQUIRES: shell
@@ -198,7 +206,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -368,7 +376,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
diff --git a/test/OpenMP/taskloop_firstprivate_messages.cpp b/test/OpenMP/taskloop_firstprivate_messages.cpp
index e63d6a6..7e1b818 100644
--- a/test/OpenMP/taskloop_firstprivate_messages.cpp
+++ b/test/OpenMP/taskloop_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_grainsize_messages.cpp b/test/OpenMP/taskloop_grainsize_messages.cpp
index 047f925..a1cfea4 100644
--- a/test/OpenMP/taskloop_grainsize_messages.cpp
+++ b/test/OpenMP/taskloop_grainsize_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_in_reduction_codegen.cpp b/test/OpenMP/taskloop_in_reduction_codegen.cpp
index e9ee9a3..2417391 100644
--- a/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/taskloop_in_reduction_messages.cpp b/test/OpenMP/taskloop_in_reduction_messages.cpp
index 4099755..dc19bac 100644
--- a/test/OpenMP/taskloop_in_reduction_messages.cpp
+++ b/test/OpenMP/taskloop_in_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_lastprivate_codegen.cpp b/test/OpenMP/taskloop_lastprivate_codegen.cpp
index 837977e..a6f7abc 100644
--- a/test/OpenMP/taskloop_lastprivate_codegen.cpp
+++ b/test/OpenMP/taskloop_lastprivate_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32.
 // REQUIRES: shell
@@ -190,7 +198,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -279,18 +287,18 @@
 // CHECK-NEXT: br i1
 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: load i32, i32* %
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %
 // CHECK: phi [[S_DOUBLE_TY]]*
 // CHECK: phi [[S_DOUBLE_TY]]*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: icmp eq [[S_DOUBLE_TY]]* %
 // CHECK-NEXT: br i1
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: load i32, i32* %
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: br label
@@ -364,7 +372,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
@@ -445,16 +453,16 @@
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %
 // CHECK: phi [[S_INT_TY]]*
 // CHECK: phi [[S_INT_TY]]*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: icmp eq [[S_INT_TY]]* %
 // CHECK-NEXT: br i1
 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: br label
 // CHECK: ret
 
diff --git a/test/OpenMP/taskloop_lastprivate_messages.cpp b/test/OpenMP/taskloop_lastprivate_messages.cpp
index 1d238ea0..85aaa00 100644
--- a/test/OpenMP/taskloop_lastprivate_messages.cpp
+++ b/test/OpenMP/taskloop_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_loop_messages.cpp b/test/OpenMP/taskloop_loop_messages.cpp
index edfc2a8..4ce83b4 100644
--- a/test/OpenMP/taskloop_loop_messages.cpp
+++ b/test/OpenMP/taskloop_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
@@ -292,10 +294,8 @@
     c[ii] = a[ii];
 
 #pragma omp parallel
-// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp taskloop'}}
-// expected-note@+1 {{defined as linear}}
+// expected-error@+1 {{unexpected OpenMP clause 'linear' in directive '#pragma omp taskloop'}}
 #pragma omp taskloop linear(ii)
-// expected-error@+1 {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be linear, predetermined as private}}
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
diff --git a/test/OpenMP/taskloop_misc_messages.c b/test/OpenMP/taskloop_misc_messages.c
index f9e084e..4e2ac0a 100644
--- a/test/OpenMP/taskloop_misc_messages.c
+++ b/test/OpenMP/taskloop_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop'}}
 #pragma omp taskloop
 
diff --git a/test/OpenMP/taskloop_num_tasks_messages.cpp b/test/OpenMP/taskloop_num_tasks_messages.cpp
index 94d74eb..cf506ff 100644
--- a/test/OpenMP/taskloop_num_tasks_messages.cpp
+++ b/test/OpenMP/taskloop_num_tasks_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_priority_messages.cpp b/test/OpenMP/taskloop_priority_messages.cpp
index 6c15845..cbfd257 100644
--- a/test/OpenMP/taskloop_priority_messages.cpp
+++ b/test/OpenMP/taskloop_priority_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_private_codegen.cpp b/test/OpenMP/taskloop_private_codegen.cpp
index 6ca7c40..21ffcba 100644
--- a/test/OpenMP/taskloop_private_codegen.cpp
+++ b/test/OpenMP/taskloop_private_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32. Investigating.
 // REQUIRES: shell
diff --git a/test/OpenMP/taskloop_private_messages.cpp b/test/OpenMP/taskloop_private_messages.cpp
index 367d59d..ffe731d 100644
--- a/test/OpenMP/taskloop_private_messages.cpp
+++ b/test/OpenMP/taskloop_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_reduction_codegen.cpp b/test/OpenMP/taskloop_reduction_codegen.cpp
index 10cbf97..8abd017 100644
--- a/test/OpenMP/taskloop_reduction_codegen.cpp
+++ b/test/OpenMP/taskloop_reduction_codegen.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 struct S {
@@ -51,6 +54,7 @@
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
 // CHECK:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t*
 // CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
 // CHECK:    store i32 0, i32* [[RETVAL]],
@@ -68,14 +72,14 @@
 // CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
 // CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT1:@.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
 // CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
 // CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
-// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
@@ -91,11 +95,11 @@
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
 // CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT2:@.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
 // CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_FINI2:@.+]] to i8*), i8** [[TMP40]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
 // CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB2:@.+]] to i8*), i8** [[TMP41]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
 // CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
@@ -105,14 +109,14 @@
 // CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
 // CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT3:@.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
 // CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
 // CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
 // CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
-// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
 // CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
@@ -121,11 +125,11 @@
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
 // CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT4:@.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
 // CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
 // CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB4:@.+]] to i8*), i8** [[TMP59]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
 // CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
@@ -149,50 +153,59 @@
 
 // CHECK:    ret i32
 
-// CHECK: define internal void [[RED_INIT1]](i8*)
+// CHECK: define internal void @[[RED_INIT1]](i8*)
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB1]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB1]](i8*, i8*)
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT2]](i8*)
+// CHECK: define internal void @[[RED_INIT2]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_FINI2]](i8*)
+// CHECK: define internal void @[[RED_FINI2]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void @
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB2]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB2]](i8*, i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT3]](i8*)
+// CHECK: define internal void @[[RED_INIT3]](i8*)
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB3]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB3]](i8*, i8*)
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT4]](i8*)
+// CHECK: define internal void @[[RED_INIT4]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB4]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB4]](i8*, i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
+// CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_FINI2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT4]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB4]]"
diff --git a/test/OpenMP/taskloop_reduction_messages.cpp b/test/OpenMP/taskloop_reduction_messages.cpp
index 23f70b5..c83a8b3 100644
--- a/test/OpenMP/taskloop_reduction_messages.cpp
+++ b/test/OpenMP/taskloop_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_aligned_messages.cpp b/test/OpenMP/taskloop_simd_aligned_messages.cpp
index c4c41eb..24cdd00 100644
--- a/test/OpenMP/taskloop_simd_aligned_messages.cpp
+++ b/test/OpenMP/taskloop_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/taskloop_simd_ast_print.cpp b/test/OpenMP/taskloop_simd_ast_print.cpp
index 6f13c7e..0a847f8 100644
--- a/test/OpenMP/taskloop_simd_ast_print.cpp
+++ b/test/OpenMP/taskloop_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -17,7 +21,7 @@
 #pragma omp taskgroup task_reduction(+: d)
 #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+:g) in_reduction(+: d)
   // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d)
-  // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+: g) in_reduction(+: d)
+  // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+: g) in_reduction(+: d){{$}}
   for (int i = 0; i < 2; ++i)
     a = 2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
diff --git a/test/OpenMP/taskloop_simd_codegen.cpp b/test/OpenMP/taskloop_simd_codegen.cpp
index f787689..02491b2 100644
--- a/test/OpenMP/taskloop_simd_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -158,7 +163,7 @@
 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
 // CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
-#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(64) safelen(8)
+#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(8) safelen(64)
     for (a = 0; a < c; ++a)
       ;
   }
@@ -201,6 +206,6 @@
 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
 // CHECK: !{!"llvm.loop.vectorize.width", i32 4}
 // CHECK: !{!"llvm.loop.vectorize.width", i32 32}
-// CHECK: !{!"llvm.loop.vectorize.width", i32 64}
+// CHECK: !{!"llvm.loop.vectorize.width", i32 8}
 
 #endif
diff --git a/test/OpenMP/taskloop_simd_collapse_messages.cpp b/test/OpenMP/taskloop_simd_collapse_messages.cpp
index e4ce0c1..3f7e66a 100644
--- a/test/OpenMP/taskloop_simd_collapse_messages.cpp
+++ b/test/OpenMP/taskloop_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_final_messages.cpp b/test/OpenMP/taskloop_simd_final_messages.cpp
index c6580fb..b739366 100644
--- a/test/OpenMP/taskloop_simd_final_messages.cpp
+++ b/test/OpenMP/taskloop_simd_final_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp b/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp
index 9e268d8..4f406b2 100644
--- a/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32.
 // REQUIRES: shell
@@ -198,7 +206,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -368,7 +376,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
diff --git a/test/OpenMP/taskloop_simd_firstprivate_messages.cpp b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp
index 176dcd7..66c9e7a 100644
--- a/test/OpenMP/taskloop_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_grainsize_messages.cpp b/test/OpenMP/taskloop_simd_grainsize_messages.cpp
index 45ccb31..3441e59 100644
--- a/test/OpenMP/taskloop_simd_grainsize_messages.cpp
+++ b/test/OpenMP/taskloop_simd_grainsize_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index d894943..9313e09 100644
--- a/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/taskloop_simd_in_reduction_messages.cpp b/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
index f3b1a85..ee3d093 100644
--- a/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
+++ b/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp b/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp
index 8d5ebb2..be48446 100644
--- a/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32.
 // REQUIRES: shell
@@ -190,7 +198,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -279,18 +287,18 @@
 // CHECK-NEXT: br i1
 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: load i32, i32* %
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %
 // CHECK: phi [[S_DOUBLE_TY]]*
 // CHECK: phi [[S_DOUBLE_TY]]*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: icmp eq [[S_DOUBLE_TY]]* %
 // CHECK-NEXT: br i1
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: load i32, i32* %
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: br label
@@ -364,7 +372,7 @@
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
@@ -445,16 +453,16 @@
 // CHECK: store i32 %{{.+}}, i32* %
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
 // CHECK: bitcast [2 x i32]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %
 // CHECK: phi [[S_INT_TY]]*
 // CHECK: phi [[S_INT_TY]]*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: icmp eq [[S_INT_TY]]* %
 // CHECK-NEXT: br i1
 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
 // CHECK: br label
 // CHECK: ret
 
diff --git a/test/OpenMP/taskloop_simd_lastprivate_messages.cpp b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp
index e5cba6c..8f418ff 100644
--- a/test/OpenMP/taskloop_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_linear_messages.cpp b/test/OpenMP/taskloop_simd_linear_messages.cpp
index c9a82b9..2aea6a0 100644
--- a/test/OpenMP/taskloop_simd_linear_messages.cpp
+++ b/test/OpenMP/taskloop_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
diff --git a/test/OpenMP/taskloop_simd_loop_messages.cpp b/test/OpenMP/taskloop_simd_loop_messages.cpp
index 5022246..71aa9df 100644
--- a/test/OpenMP/taskloop_simd_loop_messages.cpp
+++ b/test/OpenMP/taskloop_simd_loop_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
   S() : a(0) {}
diff --git a/test/OpenMP/taskloop_simd_misc_messages.c b/test/OpenMP/taskloop_simd_misc_messages.c
index 61c7b10..bb4b2df 100644
--- a/test/OpenMP/taskloop_simd_misc_messages.c
+++ b/test/OpenMP/taskloop_simd_misc_messages.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify %s
+
 // expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop simd'}}
 #pragma omp taskloop simd
 
@@ -346,6 +348,10 @@
 #pragma omp taskloop simd lastprivate(x, y, z) firstprivate(x, y, z)
   for (i = 0; i < 16; ++i)
     ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp taskloop simd simdlen(64) safelen(8)
+  for (i = 0; i < 16; ++i)
+    ;
 }
 
 void test_loop_messages() {
diff --git a/test/OpenMP/taskloop_simd_num_tasks_messages.cpp b/test/OpenMP/taskloop_simd_num_tasks_messages.cpp
index f26ee79..3267d29 100644
--- a/test/OpenMP/taskloop_simd_num_tasks_messages.cpp
+++ b/test/OpenMP/taskloop_simd_num_tasks_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_priority_messages.cpp b/test/OpenMP/taskloop_simd_priority_messages.cpp
index a94798f..44e0b57 100644
--- a/test/OpenMP/taskloop_simd_priority_messages.cpp
+++ b/test/OpenMP/taskloop_simd_priority_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_private_codegen.cpp b/test/OpenMP/taskloop_simd_private_codegen.cpp
index 3833b5e..fd0820a 100644
--- a/test/OpenMP/taskloop_simd_private_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_private_codegen.cpp
@@ -4,6 +4,14 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 // It doesn't pass on win32. Investigating.
 // REQUIRES: shell
diff --git a/test/OpenMP/taskloop_simd_private_messages.cpp b/test/OpenMP/taskloop_simd_private_messages.cpp
index ba9e8da..68e5365 100644
--- a/test/OpenMP/taskloop_simd_private_messages.cpp
+++ b/test/OpenMP/taskloop_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/test/OpenMP/taskloop_simd_reduction_codegen.cpp
index b96c8d5..4bf460a 100644
--- a/test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ b/test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 struct S {
@@ -51,6 +54,7 @@
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
 // CHECK:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t*
 // CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
 // CHECK:    store i32 0, i32* [[RETVAL]],
@@ -68,14 +72,14 @@
 // CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
 // CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT1:@.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
 // CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
 // CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
-// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
@@ -91,11 +95,11 @@
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
 // CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT2:@.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
 // CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_FINI2:@.+]] to i8*), i8** [[TMP40]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
 // CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB2:@.+]] to i8*), i8** [[TMP41]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
 // CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
@@ -105,14 +109,14 @@
 // CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
 // CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT3:@.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
 // CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
 // CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
 // CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
-// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false)
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
 // CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
@@ -121,11 +125,11 @@
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
 // CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* [[RED_INIT4:@.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
 // CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
 // CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
-// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* [[RED_COMB4:@.+]] to i8*), i8** [[TMP59]],
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
 // CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
@@ -149,49 +153,58 @@
 
 // CHECK:    ret i32
 
-// CHECK: define internal void [[RED_INIT1]](i8*)
+// CHECK: define internal void @[[RED_INIT1]](i8*)
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB1]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB1]](i8*, i8*)
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT2]](i8*)
+// CHECK: define internal void @[[RED_INIT2]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_FINI2]](i8*)
+// CHECK: define internal void @[[RED_FINI2]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void @
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB2]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB2]](i8*, i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT3]](i8*)
+// CHECK: define internal void @[[RED_INIT3]](i8*)
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB3]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB3]](i8*, i8*)
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_INIT4]](i8*)
+// CHECK: define internal void @[[RED_INIT4]](i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
-// CHECK: define internal void [[RED_COMB4]](i8*, i8*)
+// CHECK: define internal void @[[RED_COMB4]](i8*, i8*)
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: fadd float %
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_FINI2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT4]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB4]]"
diff --git a/test/OpenMP/taskloop_simd_reduction_messages.cpp b/test/OpenMP/taskloop_simd_reduction_messages.cpp
index 886b685..10ee5ce 100644
--- a/test/OpenMP/taskloop_simd_reduction_messages.cpp
+++ b/test/OpenMP/taskloop_simd_reduction_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_safelen_messages.cpp b/test/OpenMP/taskloop_simd_safelen_messages.cpp
index 729f314..87c4fe0 100644
--- a/test/OpenMP/taskloop_simd_safelen_messages.cpp
+++ b/test/OpenMP/taskloop_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskloop_simd_simdlen_messages.cpp b/test/OpenMP/taskloop_simd_simdlen_messages.cpp
index 79655ba..4170d01 100644
--- a/test/OpenMP/taskloop_simd_simdlen_messages.cpp
+++ b/test/OpenMP/taskloop_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/taskwait_ast_print.cpp b/test/OpenMP/taskwait_ast_print.cpp
index 1a69b58..9cbc46e 100644
--- a/test/OpenMP/taskwait_ast_print.cpp
+++ b/test/OpenMP/taskwait_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -15,7 +19,7 @@
   return a + argc;
 }
 // CHECK:      static T a;
-// CHECK-NEXT: #pragma omp taskwait
+// CHECK-NEXT: #pragma omp taskwait{{$}}
 // CHECK:      static int a;
 // CHECK-NEXT: #pragma omp taskwait
 // CHECK:      static char a;
diff --git a/test/OpenMP/taskwait_codegen.cpp b/test/OpenMP/taskwait_codegen.cpp
index 492786e..22f381b 100644
--- a/test/OpenMP/taskwait_codegen.cpp
+++ b/test/OpenMP/taskwait_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/taskwait_messages.cpp b/test/OpenMP/taskwait_messages.cpp
index 06e8e6b..46efff5 100644
--- a/test/OpenMP/taskwait_messages.cpp
+++ b/test/OpenMP/taskwait_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 template <class T>
 T tmain(T argc) {
 #pragma omp taskwait
diff --git a/test/OpenMP/taskyield_ast_print.cpp b/test/OpenMP/taskyield_ast_print.cpp
index 1fd9b9c..7acf53d 100644
--- a/test/OpenMP/taskyield_ast_print.cpp
+++ b/test/OpenMP/taskyield_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -15,7 +19,7 @@
   return a + argc;
 }
 // CHECK:      static T a;
-// CHECK-NEXT: #pragma omp taskyield
+// CHECK-NEXT: #pragma omp taskyield{{$}}
 // CHECK:      static int a;
 // CHECK-NEXT: #pragma omp taskyield
 // CHECK:      static char a;
diff --git a/test/OpenMP/taskyield_codegen.cpp b/test/OpenMP/taskyield_codegen.cpp
index 80b65bf..d57071f 100644
--- a/test/OpenMP/taskyield_codegen.cpp
+++ b/test/OpenMP/taskyield_codegen.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/taskyield_messages.cpp b/test/OpenMP/taskyield_messages.cpp
index cfeaa63..fd98a87 100644
--- a/test/OpenMP/taskyield_messages.cpp
+++ b/test/OpenMP/taskyield_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 template <class T>
 T tmain(T argc) {
 #pragma omp taskyield
diff --git a/test/OpenMP/teams_ast_print.cpp b/test/OpenMP/teams_ast_print.cpp
index 861b25d..80300bf 100644
--- a/test/OpenMP/teams_ast_print.cpp
+++ b/test/OpenMP/teams_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -50,7 +54,7 @@
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
 // CHECK-NEXT: #pragma omp target
-// CHECK-NEXT: #pragma omp teams
+// CHECK-NEXT: #pragma omp teams{{$}}
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(C) thread_limit(d * C)
diff --git a/test/OpenMP/teams_codegen.cpp b/test/OpenMP/teams_codegen.cpp
index 8aaa206..1a22ab5 100644
--- a/test/OpenMP/teams_codegen.cpp
+++ b/test/OpenMP/teams_codegen.cpp
@@ -8,6 +8,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 int Gbla;
@@ -21,7 +29,7 @@
   int la = 23;
   float lc = 25.0;
 
-  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams
@@ -29,7 +37,7 @@
     ++comp;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -39,7 +47,7 @@
     }
   }}}
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -49,7 +57,7 @@
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -59,7 +67,7 @@
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -78,7 +86,7 @@
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -105,6 +113,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2-DAG: [[SSI:%.+]] = type { i32, float }
@@ -125,7 +141,7 @@
   SS<int> la;
   SS<long long> lb;
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
 
@@ -141,7 +157,7 @@
     ++comp;
   }
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
   // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
@@ -168,6 +184,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3: [[SSI:%.+]] = type { i32, float }
@@ -181,7 +205,7 @@
   int foo(void) {
     int comp = 1;
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
 
     // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
     // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
@@ -194,7 +218,7 @@
       ++comp;
     }
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
 
     // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
     // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
@@ -228,6 +252,16 @@
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
 
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
 #ifdef CK4
 
 // CK4-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
@@ -278,6 +312,16 @@
 // RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
 // RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-32
 
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK5 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY4 %s
+// SIMD-ONLY4-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifdef CK5
 
diff --git a/test/OpenMP/teams_default_messages.cpp b/test/OpenMP/teams_default_messages.cpp
index 83909cc..daf48ae 100644
--- a/test/OpenMP/teams_default_messages.cpp
+++ b/test/OpenMP/teams_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/teams_distribute_ast_print.cpp b/test/OpenMP/teams_distribute_ast_print.cpp
index f465c3f..ef07d51 100644
--- a/test/OpenMP/teams_distribute_ast_print.cpp
+++ b/test/OpenMP/teams_distribute_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -96,7 +100,7 @@
   for (int i=0; i < 2; ++i)
     a = 2;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute
+// CHECK-NEXT: #pragma omp teams distribute{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target
diff --git a/test/OpenMP/teams_distribute_codegen.cpp b/test/OpenMP/teams_distribute_codegen.cpp
index a1b98a3..0f6b5f2 100644
--- a/test/OpenMP/teams_distribute_codegen.cpp
+++ b/test/OpenMP/teams_distribute_codegen.cpp
@@ -8,6 +8,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 int a[100];
@@ -25,7 +33,7 @@
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -34,7 +42,7 @@
     a[i] = 0;
   }
 
-  // CK1: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -86,6 +94,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 // CK2: define {{.*}}i32 @{{.+}}teams_local_argv(
@@ -93,7 +109,7 @@
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute
@@ -122,6 +138,14 @@
 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 #ifdef CK3
 
 // CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }
@@ -133,7 +157,7 @@
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0)) 
+  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute
@@ -170,6 +194,14 @@
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
 
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
 #ifdef CK4
 
 template <typename T, int n>
@@ -197,7 +229,7 @@
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i32* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -212,7 +244,7 @@
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0)) 
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/test/OpenMP/teams_distribute_collapse_codegen.cpp b/test/OpenMP/teams_distribute_collapse_codegen.cpp
index e916077..716e766 100644
--- a/test/OpenMP/teams_distribute_collapse_codegen.cpp
+++ b/test/OpenMP/teams_distribute_collapse_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 template <typename T, int X, long long Y>
@@ -18,7 +26,7 @@
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     
-    // CK1: call i32 @__tgt_target(
+    // CK1: call i32 @__tgt_target_teams(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute collapse(2)
@@ -35,6 +43,8 @@
     // discard loop variables not needed here
     // CK1: = alloca i32,
     // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
     // CK1: [[OMP_UB:%.+]] = alloca i32,
     // CK1: store i32 56087, i32* [[OMP_UB]],
     // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
@@ -59,6 +69,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 template <typename T, int n, int m>
@@ -89,7 +107,7 @@
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -105,7 +123,7 @@
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/test/OpenMP/teams_distribute_collapse_messages.cpp b/test/OpenMP/teams_distribute_collapse_messages.cpp
index 37c10e5..bcbd774 100644
--- a/test/OpenMP/teams_distribute_collapse_messages.cpp
+++ b/test/OpenMP/teams_distribute_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++98
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++11
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_default_messages.cpp b/test/OpenMP/teams_distribute_default_messages.cpp
index bf62930..5e8702a 100644
--- a/test/OpenMP/teams_distribute_default_messages.cpp
+++ b/test/OpenMP/teams_distribute_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
index 685180c..97ae387 100644
--- a/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
+++ b/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
@@ -9,6 +9,14 @@
 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK1
 
 template <typename T, int X, long long Y>
@@ -18,21 +26,21 @@
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target(
+  // CK1: call i32 @__tgt_target_teams(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target(
+  // CK1: call i32 @__tgt_target_teams(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target(
+  // CK1: call i32 @__tgt_target_teams(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static, X/2)
@@ -84,6 +92,14 @@
 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 #ifdef CK2
 
 template <typename T, int n>
@@ -129,11 +145,11 @@
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -166,11 +182,11 @@
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target(
+// CK2: call i32 @__tgt_target_teams(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/test/OpenMP/teams_distribute_dist_schedule_messages.cpp b/test/OpenMP/teams_distribute_dist_schedule_messages.cpp
index d86722d..a86a87a 100644
--- a/test/OpenMP/teams_distribute_dist_schedule_messages.cpp
+++ b/test/OpenMP/teams_distribute_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
index aef288d..b9839a8 100644
--- a/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
+++ b/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
@@ -1,13 +1,26 @@
-// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 
 // expected-no-diagnostics
 #ifndef HEADER
@@ -73,7 +86,7 @@
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -154,7 +167,7 @@
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -217,7 +230,7 @@
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
 // CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
 // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
@@ -248,7 +261,7 @@
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
@@ -303,7 +316,7 @@
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
 // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
 // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
diff --git a/test/OpenMP/teams_distribute_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_firstprivate_messages.cpp
index a710807..e373a3a 100644
--- a/test/OpenMP/teams_distribute_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_firstprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -144,8 +146,9 @@
 #pragma omp teams distribute firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute lastprivate(argc), firstprivate(argc) // OK
+#pragma omp teams distribute lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
index cc48ee1..8832341 100644
--- a/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
+++ b/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
@@ -1,16 +1,32 @@
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
-// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -53,7 +69,7 @@
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -71,6 +87,7 @@
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
       // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
       // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
@@ -174,7 +191,7 @@
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -195,6 +212,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -229,7 +247,7 @@
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
 // CHECK: call void @__kmpc_for_static_fini(
 
 // lastprivates
@@ -242,7 +260,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -253,7 +271,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -262,7 +280,7 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
 // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
 // CHECK: ret void
@@ -271,7 +289,7 @@
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
@@ -293,6 +311,7 @@
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
@@ -328,7 +347,7 @@
 // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
 // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
 // CHECK: call void @__kmpc_for_static_fini(
 
 // lastprivates
@@ -341,7 +360,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
 // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
 // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
@@ -352,7 +371,7 @@
 // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
 // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
 // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
 // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
 // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
@@ -361,6 +380,6 @@
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
 #endif
diff --git a/test/OpenMP/teams_distribute_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_lastprivate_messages.cpp
index 6941197..d5e1a1c 100644
--- a/test/OpenMP/teams_distribute_lastprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_lastprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -255,12 +257,14 @@
 #pragma omp teams distribute lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}}
 #pragma omp target
-#pragma omp teams distribute firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp teams distribute firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute lastprivate(n) firstprivate(n) // OK
+#pragma omp teams distribute lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/teams_distribute_loop_messages.cpp b/test/OpenMP/teams_distribute_loop_messages.cpp
index ea2604e..13f39d1 100644
--- a/test/OpenMP/teams_distribute_loop_messages.cpp
+++ b/test/OpenMP/teams_distribute_loop_messages.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -689,12 +691,16 @@
       void g() { throw 0; }
     };
   }
+  #pragma omp target
+  #pragma omp teams distribute
+  f; // expected-error {{use of undeclared identifier 'f'}}
 }
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute lastprivate(s) firstprivate(s)
+#pragma omp teams distribute lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} expected-warning {{Non-trivial type 'S' is mapped, only trivial types are guaranteed to be mapped correctly}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/teams_distribute_num_teams_messages.cpp b/test/OpenMP/teams_distribute_num_teams_messages.cpp
index 6086abd..103a6d2 100644
--- a/test/OpenMP/teams_distribute_num_teams_messages.cpp
+++ b/test/OpenMP/teams_distribute_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp b/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp
index ad14794..246382d 100644
--- a/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -8,6 +12,9 @@
 
 void foo() {}
 
+int x;
+#pragma omp threadprivate(x)
+
 struct S {
   S(): a(0) {}
   S(int v) : a(v) {}
@@ -40,7 +47,7 @@
   void foo() {
     int b, argv, d, c, e, f;
 #pragma omp target
-#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d)
+#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x)
     for (int k = 0; k < a.a; ++k)
       ++a.a;
   }
@@ -50,7 +57,7 @@
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x)
 
 class S8 : public S7<S> {
   S8() {}
@@ -74,7 +81,7 @@
   void bar() {
     int b, argv, d, c, e, f8;
 #pragma omp target
-#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f8) thread_limit(d)
+#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f8) thread_limit(d) copyin(x)
     for (int k = 0; k < a.a; ++k)
       ++a.a;
   }
@@ -86,7 +93,7 @@
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute parallel for private(this->a) private(this->a)
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f8) thread_limit(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f8) thread_limit(d) copyin(x)
 
 template <class T, int N>
 T tmain(T argc) {
@@ -101,7 +108,7 @@
   for (int i=0; i < 2; ++i)
     a = 2;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for
+// CHECK-NEXT: #pragma omp teams distribute parallel for{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target
@@ -127,19 +134,19 @@
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: foo();  
 #pragma omp target
-#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d)
+#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x)
     for (int k = 0; k < 10; ++k)
       e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute parallel for linear(d)
+#pragma omp teams distribute parallel for
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for linear(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
   return T();
@@ -182,19 +189,19 @@
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: foo();
 #pragma omp target
-#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d)
+#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute parallel for linear(d)
+#pragma omp teams distribute parallel for
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for linear(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
   return (0);
diff --git a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
new file mode 100644
index 0000000..865ea12
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -0,0 +1,279 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+int a[100];
+
+// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
+int teams_argument_global(int n){
+  int te = n / 128;
+  int th = 128;
+  // discard n_addr
+  // CK1: alloca i32,
+  // CK1: [[TE:%.+]] = alloca i32,
+  // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+
+  // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
+  #pragma omp target
+  #pragma omp teams distribute parallel for num_teams(te), thread_limit(th)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+    #pragma omp cancel for
+  }
+
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
+  #pragma omp target
+  {{{
+  #pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  }}}
+
+  // outlined target regions
+  // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}})
+  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+  // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}})
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  return a[0];
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+// CK2: define {{.*}}i32 @{{.+}}teams_local_argv(
+int teams_local_arg(void) {
+  int n = 100;
+  int a[n];
+
+  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  // outlined target region
+  // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}})
+  // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK2: ret void
+
+  // CK2: define internal void @[[OUTL1]]({{.+}})
+  // CK2: call void @__kmpc_for_static_init_4(
+  // CK2: call void {{.+}} @__kmpc_fork_call(
+  // CK2: call void @__kmpc_for_static_fini(
+  // CK2: ret void
+
+  return a[0];
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+      // outlined target region
+  // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}})
+  // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK3: ret void
+
+  // CK3: define internal void @[[OUTL1]]({{.+}})
+  // CK3: call void @__kmpc_for_static_init_4(
+  // CK3: call void {{.+}} @__kmpc_fork_call(
+  // CK3: call void @__kmpc_for_static_fini(
+  // CK3: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK3
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK4
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int te = n/128;
+  int th = 128;
+#pragma omp target
+#pragma omp teams distribute parallel for num_teams(te) thread_limit(th)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4: call void @[[OFFL1:.+]]({{.+}})
+// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK4:  ret
+
+// CK4:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTL1]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void {{.+}} @__kmpc_fork_call(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+// CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4: call void @[[OFFLT:.+]]({{.+}})
+// CK4:  ret
+// CK4-NEXT: }
+
+// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
+// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTLT]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void {{.+}} @__kmpc_fork_call(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+#endif // CK4
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
new file mode 100644
index 0000000..11a9786
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -0,0 +1,160 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+        a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute parallel for collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute parallel for collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp
index ea4455d..2e83a37 100644
--- a/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++98
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++11
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
new file mode 100644
index 0000000..399025d
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
@@ -0,0 +1,212 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+int x;
+#pragma omp threadprivate(x)
+
+template <typename T>
+T tmain() {
+  int a[2];
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(x)
+  for (int i = 0; i < 2; ++i) {
+    a[i] = x;
+  }
+  return T();
+}
+
+int main() {
+  int a[2];
+#ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(x)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]](
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    a[i] = x;
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca [2 x i{{[0-9]+}}]*,
+    // LAMBDA: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+
+    // LAMBDA:  [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]],
+    // LAMBDA: store {{.+}} [[X_VAL]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      a[i] = x;
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(x)
+  for (int i = 0; i < 2; ++i) {
+    a[i] = x;
+  }
+  return tmain<int>();
+  //return 0;
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{%.+}} = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+
+// CHECK:  [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK:  [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]],
+// CHECK: store {{.+}} [[X_VAL]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call void @[[TOFFL1:.+]](
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]](
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: {{%.+}} = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK:  [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK:  [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]],
+// CHECK: store {{.+}} [[X_VAL]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp
new file mode 100644
index 0000000..2664b9d
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2 &operator=(S2 &s2) { return *this; }
+};
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+  S3 &operator=(S3 &s3) { return *this; }
+};
+class S4 {
+  int a;
+  S4();
+  S4 &operator=(const S4 &s4); // expected-note {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {}
+  S5 &operator=(const S5 &s5) { return *this; } // expected-note {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+};
+template <class T>
+class ST {
+public:
+  static T s;
+};
+
+S2 k;
+S3 h;
+S4 l(3);
+S5 m(4);
+#pragma omp threadprivate(h, k, l, m)
+
+namespace A {
+double x;
+#pragma omp threadprivate(x)
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams distribute parallel for copyin // expected-error {{expected '(' after 'copyin'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(k // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(h, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(l) // expected-error {{'operator=' is a private member of 'S4'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(i) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(m) // expected-error {{'operator=' is a private member of 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for copyin(ST<int>::s, B::x) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+
+  return 0;
+}
diff --git a/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
index 4cd7e65..e5b3331 100644
--- a/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..03276e4
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -0,0 +1,279 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp
index 098abb7..e0abe11 100644
--- a/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
new file mode 100644
index 0000000..f52422f
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -0,0 +1,499 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute parallel for firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+
+    // use of private vars
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
+// Skip global and bound tid vars, and prev lb ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp
index e6314c2..f495588 100644
--- a/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -144,8 +146,9 @@
 #pragma omp teams distribute parallel for firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for lastprivate(argc), firstprivate(argc) // OK
+#pragma omp teams distribute parallel for lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
new file mode 100644
index 0000000..f4119fc
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -0,0 +1,189 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+#pragma omp target
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp teams distribute parallel for
+  for(int i = 0 ; i < 100; i++) {}
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+  // CHECK: call void @__kmpc_for_static_fini(
+
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+#pragma omp target
+#pragma omp teams distribute parallel for if (parallel: false)
+  for(int i = 0 ; i < 100; i++) {
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_serialized_parallel(
+  // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+  // CHECK: call void @__kmpc_end_serialized_parallel(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @{{.+}}gtid_test
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+    gtid_test();
+  }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target
+#pragma omp teams distribute parallel for if (true)
+  for(int i = 0 ; i < 100; i++) {
+    fn1();
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for if (false)
+  for(int i = 0 ; i < 100; i++) {
+    fn2();
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for if (parallel: Arg)
+  for(int i = 0 ; i < 100; i++) {
+    fn3();
+  }
+  return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target
+#pragma omp teams distribute parallel for if (true)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_0]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+    // CHECK: call void @__kmpc_for_static_fini(
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn4
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    fn4();
+  }
+
+#pragma omp target
+#pragma omp teams distribute parallel for if (false)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_1]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn5
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn5();
+  }
+
+#pragma omp target
+#pragma omp teams distribute parallel for if (Arg)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_2]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn6
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn6();
+  }
+
+  return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
index 14253b8..e92dc65 100644
--- a/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
new file mode 100644
index 0000000..451dc05
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -0,0 +1,600 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams distribute parallel for lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// gbl and bound tid vars, prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp
index c5f457a..97e9e4f 100644
--- a/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -255,12 +257,14 @@
 #pragma omp teams distribute parallel for lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp teams distribute parallel for firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for lastprivate(n) firstprivate(n) // OK
+#pragma omp teams distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp
deleted file mode 100644
index b14fb23..0000000
--- a/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-
-namespace X {
-  int x;
-};
-
-struct B {
-  static int ib; // expected-note {{'B::ib' declared here}}
-  static int bfoo() { return 8; }
-};
-
-int bfoo() { return 4; }
-
-int z;
-const int C1 = 1;
-const int C2 = 2;
-void test_linear_colons()
-{
-  int B = 0;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B:bfoo())
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B:B::bfoo())
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(X::x : ::z)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B,::z, X::x)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(::z)
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B::bfoo()) // expected-error {{expected variable name}}
-  for (int i = 0; i < 10; ++i) ;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(B::ib,B:C1+C2)
-  for (int i = 0; i < 10; ++i) ;
-}
-
-template<int L, class T, class N> T test_template(T* arr, N num) {
-  N i;
-  T sum = (T)0;
-  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(ind2:L) // expected-error {{argument of a linear clause should be of integral or pointer type}}
-  for (i = 0; i < num; ++i) {
-    T cur = arr[(int)ind2];
-    ind2 += L;
-    sum += cur;
-  }
-  return T();
-}
-
-template<int LEN> int test_warn() {
-  int ind2 = 0;
-  #pragma omp target
-  #pragma omp teams distribute parallel for linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
-  for (int i = 0; i < 100; i++) {
-    ind2 += LEN;
-  }
-  return ind2;
-}
-
-struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
-extern S1 a;
-class S2 {
-  mutable int a;
-public:
-  S2():a(0) { }
-};
-const S2 b; // expected-note 2 {{'b' defined here}}
-const S2 ba[5];
-class S3 {
-  int a;
-public:
-  S3():a(0) { }
-};
-const S3 ca[5];
-class S4 {
-  int a;
-  S4();
-public:
-  S4(int v):a(v) { }
-};
-class S5 {
-  int a;
-  S5():a(0) {}
-public:
-  S5(int v):a(v) { }
-};
-
-S3 h;
-#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
-
-template<class I, class C> int foomain(I argc, C **argv) {
-  I e(4);
-  I g(5);
-  int i;
-  int &j = i;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear () // expected-error {{expected expression}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc : 5)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (a, b:B::ib) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(e, g)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute parallel for linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-  return 0;
-}
-
-namespace A {
-double x;
-#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
-}
-namespace C {
-using A::x;
-}
-
-int main(int argc, char **argv) {
-  double darr[100];
-  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
-  test_template<-4>(darr, 4);
-  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
-  test_warn<0>();
-
-  S4 e(4); // expected-note {{'e' defined here}}
-  S5 g(5); // expected-note {{'g' defined here}}
-  int i;
-  int &j = i;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear () // expected-error {{expected expression}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argc)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (a, b) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(e, g) // expected-error {{argument of a linear clause should be of integral or pointer type, not 'S4'}} expected-error {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
-  for (int k = 0; k < argc; ++k) ++k;
-
-  #pragma omp parallel
-  {
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute parallel for linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
-
-    #pragma omp target
-    #pragma omp teams distribute parallel for linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
-  return 0;
-}
-
diff --git a/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp
index e0c315f..0fd3fa2 100644
--- a/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -691,8 +693,9 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for lastprivate(s) firstprivate(s)
+#pragma omp teams distribute parallel for lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} expected-warning {{Non-trivial type 'S' is mapped, only trivial types are guaranteed to be mapped correctly}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_messages.cpp
index c3536cd..57ad666 100644
--- a/test/OpenMP/teams_distribute_parallel_for_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
@@ -9,6 +11,9 @@
 #pragma omp teams distribute parallel for // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute parallel for'}}
 
 int main(int argc, char **argv) {
+  #pragma omp target
+  #pragma omp teams distribute parallel for
+  f; // expected-error {{use of undeclared identifier 'f'}}
 #pragma omp target
 #pragma omp teams distribute parallel for { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute parallel for' are ignored}}
   for (int i = 0; i < argc; ++i)
@@ -34,7 +39,7 @@
   for (int i = 0; i < argc; ++i)
     foo();
 #pragma omp target
-#pragma omp teams distribute parallel for
+#pragma omp teams distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp teams distribute parallel for'}}
   for (int i = 0; i < argc; ++i)
     foo();
 // expected-warning@+2 {{extra tokens at the end of '#pragma omp teams distribute parallel for' are ignored}}
@@ -94,7 +99,7 @@
   }
 
 #pragma omp target
-#pragma omp teams distribute parallel for copyin(pvt) // expected-error {{unexpected OpenMP clause 'copyin' in directive '#pragma omp teams distribute parallel for'}}
+#pragma omp teams distribute parallel for copyin(pvt)
   for (int n = 0; n < 100; ++n) {}
 
   return 0;
@@ -107,3 +112,12 @@
     ;
 }
 
+void test_cancel() {
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for (int i = 0; i < 16; ++i) {
+#pragma omp cancel for
+    ;
+  }
+}
+
diff --git a/test/OpenMP/teams_distribute_parallel_for_num_teams_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_num_teams_messages.cpp
index 63bc91e..cc6f1c8 100644
--- a/test/OpenMP/teams_distribute_parallel_for_num_teams_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
new file mode 100644
index 0000000..ea4afa1
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -0,0 +1,125 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T, int C>
+int tmain() {
+#pragma omp target
+#pragma omp teams distribute parallel for num_threads(C)
+  for (int i = 0; i < 100; i++)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for num_threads(T(23))
+  for (int i = 0; i < 100; i++)
+    foo();
+  return 0;
+}
+
+int main() {
+  S s(0);
+  char a = s;
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
+// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
+#pragma omp target
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+#pragma omp teams distribute parallel for num_threads(2)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+    // CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2)
+    // CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+#pragma omp target
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+#pragma omp teams distribute parallel for num_threads(a)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64,
+    // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8*
+    // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
+    // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
+    // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
+    // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+  return a + tmain<char, 5>() + tmain<S, 1>();
+}
+
+// tmain 5
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
+
+// tmain 1
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_0]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_2]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_3]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]])
+// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64,
+// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]],
+// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8*
+// CHECK-DAG:   [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]],
+// CHECK-DAG:   [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}}
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]])
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
new file mode 100644
index 0000000..1c55a14
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
@@ -0,0 +1,345 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute parallel for private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}})
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call void @[[TOFFL1:.+]]()
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_private_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_private_messages.cpp
index 6e72905..5fda1a9 100644
--- a/test/OpenMP/teams_distribute_parallel_for_private_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
new file mode 100644
index 0000000..b5a7ca8
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -0,0 +1,95 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target
+#pragma omp teams distribute parallel for proc_bind(master)
+  for(int i = 0; i < 1000; i++) {}
+  return T();
+}
+
+int main() {
+  // CHECK-LABEL: @main
+#pragma omp target
+#pragma omp teams distribute parallel for proc_bind(spread)
+  for(int i = 0; i < 1000; i++) {}
+#pragma omp target
+#pragma omp teams distribute parallel for proc_bind(close)
+  for(int i = 0; i < 1000; i++) {}
+  return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_proc_bind_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_proc_bind_messages.cpp
index 2a7074a..33e9c39 100644
--- a/test/OpenMP/teams_distribute_parallel_for_proc_bind_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
new file mode 100644
index 0000000..34cc287
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -0,0 +1,358 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // skip loop vars
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+     // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
index 889efd7..31a91a8 100644
--- a/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
new file mode 100644
index 0000000..f7b11a7
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -0,0 +1,414 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL4:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for schedule(dynamic)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL5:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for schedule(dynamic, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL4]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL5]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL5:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+
+// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT5:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_schedule_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_schedule_messages.cpp
index 477d1a0..e8cda67 100644
--- a/test/OpenMP/teams_distribute_parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp
index 1a1af09..90edfb1 100644
--- a/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_aligned_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_aligned_messages.cpp
index 97b200b..8fc21a0 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp
index 26cc158..c7a0a35 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -29,9 +33,10 @@
       ++this->a.a;
   }
   S7 &operator=(S7 &s) {
+    int k;
 #pragma omp target
-#pragma omp teams distribute parallel for simd private(a) private(this->a)
-    for (int k = 0; k < s.a.a; ++k)
+#pragma omp teams distribute parallel for simd private(a) private(this->a) linear(k)
+    for (k = 0; k < s.a.a; ++k)
       ++s.a.a;
 
     foo();
@@ -59,7 +64,7 @@
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a) private(T::a)
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a)
+// CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a) linear(k)
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute parallel for simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK: #pragma omp target
@@ -127,7 +132,7 @@
   for (int i=0; i < 2; ++i)
     a = 2;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for simd
+// CHECK-NEXT: #pragma omp teams distribute parallel for simd{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target
@@ -161,11 +166,11 @@
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute parallel for simd simdlen(clen-1) linear(d)
+#pragma omp teams distribute parallel for simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) linear(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
@@ -224,11 +229,11 @@
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute parallel for simd simdlen(clen-1) linear(d)
+#pragma omp teams distribute parallel for simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) linear(d)
+// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
new file mode 100644
index 0000000..9c363d7
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -0,0 +1,295 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+int a[100];
+
+// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
+int teams_argument_global(int n){
+  int te = n / 128;
+  int th = 128;
+  // discard n_addr
+  // CK1: alloca i32,
+  // CK1: [[TE:%.+]] = alloca i32,
+  // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+
+  // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd num_teams(te), thread_limit(th) simdlen(64)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  int i;
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK1: call void @[[OFFL2:.+]](
+  #pragma omp target
+  {{{
+  #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) 
+  for(i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  }}}
+  // outlined target regions
+  // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}})
+  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+  // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}})
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.+}} @__kmpc_fork_call(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  return a[0];
+}
+
+// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 64}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+// CK2: define {{.*}}i32 @{{.+}}teams_local_argv(
+int teams_local_arg(void) {
+  int n = 100;
+  int a[n], i;
+
+  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK2: call void @[[OFFL1:.+]](
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
+  for(i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  // outlined target region
+  // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}})
+  // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK2: ret void
+
+  // CK2: define internal void @[[OUTL1]]({{.+}})
+  // CK2: call void @__kmpc_for_static_init_4(
+  // CK2: call void {{.+}} @__kmpc_fork_call(
+  // CK2: call void @__kmpc_for_static_fini(
+  // CK2: ret void
+
+  return a[0];
+}
+
+// CK2-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK2-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+    int i;
+  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
+    for(i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+      // outlined target region
+  // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}})
+  // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK3: ret void
+
+  // CK3: define internal void @[[OUTL1]]({{.+}})
+  // CK3: call void @__kmpc_for_static_init_4(
+  // CK3: call void {{.+}} @__kmpc_fork_call(
+  // CK3: call void @__kmpc_for_static_fini(
+  // CK3: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+
+// CK3-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK3-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+#endif // CK3
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK4
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int te = n/128;
+  int th = 128;
+#pragma omp target
+#pragma omp teams distribute parallel for simd num_teams(te) thread_limit(th) simdlen(64)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n], i;
+#pragma omp target
+#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
+  for(i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4: call void @[[OFFL1:.+]]({{.+}})
+// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK4:  ret
+
+// CK4:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTL1]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void {{.+}} @__kmpc_fork_call(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+// CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4: call void @[[OFFLT:.+]]({{.+}})
+// CK4:  ret
+// CK4-NEXT: }
+
+// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
+// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTLT]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void {{.+}} @__kmpc_fork_call(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+// CK4-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 64}
+
+#endif // CK4
+#endif
+
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
new file mode 100644
index 0000000..24bd31e
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -0,0 +1,165 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+	a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+
+// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp
index 197c5f1..a287da7 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++98
 // RUN: %clang_cc1 -verify -fopenmp %s -std=c++11
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++98
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -std=c++11
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
index dc69acb..8db9d66 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..4f11dc6
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -0,0 +1,284 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+
+// CK1: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
index b879de7..d53fa57 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..145ca1a
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -0,0 +1,504 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for simd firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+
+    // use of private vars
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+
+// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
+// Skip global and bound tid vars, and prev lb ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp
index cbbb313..036f201 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -144,8 +146,9 @@
 #pragma omp teams distribute parallel for simd firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for simd lastprivate(argc), firstprivate(argc) // OK
+#pragma omp teams distribute parallel for simd lastprivate(argc), firstprivate(argc)
   for (i = 0; i < argc; ++i) foo();
 
   return 0;
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
new file mode 100644
index 0000000..6679413
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -0,0 +1,192 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+#pragma omp target
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp teams distribute parallel for simd
+  for(int i = 0 ; i < 100; i++) {}
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+  // CHECK: call void @__kmpc_for_static_fini(
+
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (parallel: false)
+  for(int i = 0 ; i < 100; i++) {
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+  // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @__kmpc_serialized_parallel(
+  // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+  // CHECK: call void @__kmpc_end_serialized_parallel(
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+  // CHECK: call void @__kmpc_for_static_init_4(
+  // CHECK: call void @{{.+}}gtid_test
+  // CHECK: call void @__kmpc_for_static_fini(
+  // CHECK: ret
+    gtid_test();
+  }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    fn1();
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    fn2();
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (parallel: Arg)
+  for(int i = 0 ; i < 100; i++) {
+    fn3();
+  }
+  return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (true)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_0]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+    // CHECK: call void @__kmpc_for_static_fini(
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn4
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    fn4();
+  }
+
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (false)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_1]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn5
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn5();
+  }
+
+#pragma omp target
+#pragma omp teams distribute parallel for simd if (Arg)
+  for(int i = 0 ; i < 100; i++) {
+    // CHECK: define internal void [[OFFLOADING_FUN_2]](
+    // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+    // CHECK: call void @__kmpc_serialized_parallel(
+    // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+    // CHECK: call void @__kmpc_end_serialized_parallel(
+    // CHECK: call void @__kmpc_for_static_fini(
+
+    // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+    // CHECK: call void @__kmpc_for_static_init_4(
+    // CHECK: call {{.*}}void @{{.+}}fn6
+    // CHECK: call void @__kmpc_for_static_fini(
+    fn6();
+  }
+
+  return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
index 01978d7..d8a6423 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..c9141b0
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -0,0 +1,615 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // skip final branch
+      // LAMBDA: br
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+      // skip 'final' simd branch and block (checked as part of simd)
+      // LAMBDA: br
+
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+
+      [&]() {
+	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+	g = 2;
+	g1 = 2;
+	svar = 4;
+	sfvar = 8.0;
+	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+
+// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
+
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// gbl and bound tid vars, prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// skip final branch and block
+// CHECK: br
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+
+// CHECK: call void @__kmpc_for_static_fini(
+
+// skip final branch and block
+// CHECK: br
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp
index 62f8559..416d409 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -255,12 +257,14 @@
 #pragma omp teams distribute parallel for simd lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp teams distribute parallel for simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+#pragma omp teams distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp
index 5b91a3c..e60f2c9 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,34 +20,42 @@
 {
   int B = 0;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(::z)
   for (int i = 0; i < 10; ++i) ;
@@ -54,6 +64,7 @@
 #pragma omp teams distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(B::ib,B:C1+C2)
   for (int i = 0; i < 10; ++i) ;
@@ -76,6 +87,7 @@
 
 template<int LEN> int test_warn() {
   int ind2 = 0;
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
   #pragma omp target
   #pragma omp teams distribute parallel for simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
   for (int i = 0; i < 100; i++) {
@@ -133,10 +145,12 @@
 #pragma omp teams distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
@@ -145,6 +159,7 @@
 #pragma omp teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc : 5)
   for (int k = 0; k < argc; ++k) ++k;
@@ -161,6 +176,7 @@
 #pragma omp teams distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(e, g)
   for (int k = 0; k < argc; ++k) ++k;
@@ -169,32 +185,11 @@
 #pragma omp teams distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute parallel for simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target
-#pragma omp teams distribute parallel for simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target
-#pragma omp teams distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -230,10 +225,12 @@
 #pragma omp teams distribute parallel for simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
@@ -242,6 +239,7 @@
 #pragma omp teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd linear (argc)
   for (int k = 0; k < argc; ++k) ++k;
@@ -267,26 +265,6 @@
 #pragma omp teams distribute parallel for simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute parallel for simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
-
-    #pragma omp target
-    #pragma omp teams distribute parallel for simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute parallel for simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute parallel for simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp
index 0332562..9980a1d 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -693,8 +695,9 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute parallel for simd lastprivate(s) firstprivate(s)
+#pragma omp teams distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} expected-warning {{Non-trivial type 'S' is mapped, only trivial types are guaranteed to be mapped correctly}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp
index 7876c38..d27e156 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
@@ -9,6 +11,9 @@
 #pragma omp teams distribute parallel for simd // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute parallel for simd'}}
 
 int main(int argc, char **argv) {
+  #pragma omp target
+  #pragma omp teams distribute parallel for simd
+  f; // expected-error {{use of undeclared identifier 'f'}}
 #pragma omp target
 #pragma omp teams distribute parallel for simd { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute parallel for simd' are ignored}}
   for (int i = 0; i < argc; ++i)
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_num_teams_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_num_teams_messages.cpp
index 7affd60..5e4ce0c 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_num_teams_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
new file mode 100644
index 0000000..8c0c820
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T, int C>
+int tmain() {
+#pragma omp target
+#pragma omp teams distribute parallel for simd num_threads(C)
+  for (int i = 0; i < 100; i++)
+    foo();
+#pragma omp target
+#pragma omp teams distribute parallel for simd num_threads(T(23))
+  for (int i = 0; i < 100; i++)
+    foo();
+  return 0;
+}
+
+int main() {
+  S s(0);
+  char a = s;
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
+// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
+#pragma omp target
+  // CHECK: define internal void [[OFFLOADING_FUN_0]](
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+#pragma omp teams distribute parallel for simd num_threads(2)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+    // CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2)
+    // CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+#pragma omp target
+  // CHECK: define internal void [[OFFLOADING_FUN_1]](
+
+  // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+#pragma omp teams distribute parallel for simd num_threads(a)
+  for (int i = 0; i < 100; i++) {
+    // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64,
+    // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8*
+    // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
+    // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
+    // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
+    // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+    foo();
+  }
+  return a + tmain<char, 5>() + tmain<S, 1>();
+}
+
+// tmain 5
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
+
+// tmain 1
+// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_0]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_2]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]](
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: define internal void [[T_OFFLOADING_FUN_3]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]])
+// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64,
+// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]],
+// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8*
+// CHECK-DAG:   [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]],
+// CHECK-DAG:   [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}}
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]])
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_messages.cpp
index fc71569..1f9b61e 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
new file mode 100644
index 0000000..4fb506e
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for simd private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}})
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: ret void
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+
+// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call void @[[TOFFL1:.+]]()
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_private_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_private_messages.cpp
index dc1ef57..50bf034 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
new file mode 100644
index 0000000..93e1ae5
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -0,0 +1,98 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target
+#pragma omp teams distribute parallel for simd proc_bind(master)
+  for(int i = 0; i < 1000; i++) {}
+  return T();
+}
+
+int main() {
+  // CHECK-LABEL: @main
+#pragma omp target
+#pragma omp teams distribute parallel for simd proc_bind(spread)
+  for(int i = 0; i < 1000; i++) {}
+#pragma omp target
+#pragma omp teams distribute parallel for simd proc_bind(close)
+  for(int i = 0; i < 1000; i++) {}
+  return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_messages.cpp
index fd90429..459f40f 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s
+
 void foo();
 
 template <class T, typename S, int N>
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
new file mode 100644
index 0000000..f97f0a0
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -0,0 +1,364 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target
+#pragma omp teams distribute parallel for simd reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute parallel for simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+
+    // Skip global and bound tid vars, and prev lb and ub vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // skip loop vars
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: br
+
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+
+// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#else
+#pragma omp target
+#pragma omp teams distribute parallel for simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
index b0522e8..a17ffe3 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp
index 82fd4b0..e673bb3 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
new file mode 100644
index 0000000..ea99cc7
--- /dev/null
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -0,0 +1,418 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL4:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd schedule(dynamic)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL5:.+]](
+    #pragma omp target
+    #pragma omp teams distribute parallel for simd schedule(dynamic, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL4]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL4]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL5]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[PAR_OUTL5]]({{.+}})
+  // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+  // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+  // CK1: ret void
+
+  // CK1: !{!"llvm.loop.vectorize.enable", i1 true}
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+  int m = 10;
+#pragma omp target
+#pragma omp teams distribute parallel for simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd dist_schedule(static, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(dynamic)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute parallel for simd schedule(dynamic, m)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL5:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+
+// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT5:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
+
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp
index 1089555..90f3cf4 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp
index 82fd4b0..e673bb3 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_thread_limit_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_thread_limit_messages.cpp
index 81ed947..b5a9576 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_thread_limit_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_parallel_for_thread_limit_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_thread_limit_messages.cpp
index 4018f3f..ad37b53 100644
--- a/test/OpenMP/teams_distribute_parallel_for_thread_limit_messages.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_private_codegen.cpp b/test/OpenMP/teams_distribute_private_codegen.cpp
index 06ec35a..111c009 100644
--- a/test/OpenMP/teams_distribute_private_codegen.cpp
+++ b/test/OpenMP/teams_distribute_private_codegen.cpp
@@ -5,10 +5,23 @@
 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -72,13 +85,13 @@
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
 #pragma omp teams distribute private(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}})
     // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
     // LAMBDA: ret void
 
@@ -91,12 +104,12 @@
     // LAMBDA: alloca i32,
     // LAMBDA: alloca i32,
     // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
     // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
     // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
     // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
-    
     g = 1;
     g1 = 1;
     sivar = 2;
@@ -141,12 +154,12 @@
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
-// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
 
-// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: define{{.*}} void @[[OFFL1]]()
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
 // CHECK: ret void
 
@@ -159,12 +172,12 @@
 // CHECK: {{.+}} = alloca i32,
 // CHECK: {{.+}} = alloca i32,
 // CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
 // CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: alloca i32,
 
 // private(s_arr)
 // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
@@ -186,7 +199,7 @@
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
@@ -203,12 +216,12 @@
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
-// CHECK: alloca i32,
 
 // private(s_arr)
 // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
diff --git a/test/OpenMP/teams_distribute_private_messages.cpp b/test/OpenMP/teams_distribute_private_messages.cpp
index aff1f53..5fa002a 100644
--- a/test/OpenMP/teams_distribute_private_messages.cpp
+++ b/test/OpenMP/teams_distribute_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_reduction_codegen.cpp b/test/OpenMP/teams_distribute_reduction_codegen.cpp
index 3688b10..019306c 100644
--- a/test/OpenMP/teams_distribute_reduction_codegen.cpp
+++ b/test/OpenMP/teams_distribute_reduction_codegen.cpp
@@ -5,10 +5,23 @@
 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -34,7 +47,7 @@
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -115,7 +128,7 @@
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0))
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -166,7 +179,7 @@
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/test/OpenMP/teams_distribute_reduction_messages.cpp b/test/OpenMP/teams_distribute_reduction_messages.cpp
index f11236f..4b9d6e5 100644
--- a/test/OpenMP/teams_distribute_reduction_messages.cpp
+++ b/test/OpenMP/teams_distribute_reduction_messages.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_shared_messages.cpp b/test/OpenMP/teams_distribute_shared_messages.cpp
index ecf6f2e..b1828eb 100644
--- a/test/OpenMP/teams_distribute_shared_messages.cpp
+++ b/test/OpenMP/teams_distribute_shared_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_simd_aligned_messages.cpp b/test/OpenMP/teams_distribute_simd_aligned_messages.cpp
index 885432b..34227c6 100644
--- a/test/OpenMP/teams_distribute_simd_aligned_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_aligned_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s
+
 struct B {
   static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
   static constexpr int bfoo() { return 8; }
diff --git a/test/OpenMP/teams_distribute_simd_ast_print.cpp b/test/OpenMP/teams_distribute_simd_ast_print.cpp
index aaf6745..b3d8ce4 100644
--- a/test/OpenMP/teams_distribute_simd_ast_print.cpp
+++ b/test/OpenMP/teams_distribute_simd_ast_print.cpp
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -29,9 +33,10 @@
       ++this->a.a;
   }
   S7 &operator=(S7 &s) {
+    int k;
 #pragma omp target
-#pragma omp teams distribute simd private(a) private(this->a)
-    for (int k = 0; k < s.a.a; ++k)
+#pragma omp teams distribute simd private(a) private(this->a) linear(k)
+    for (k = 0; k < s.a.a; ++k)
       ++s.a.a;
     return *this;
   }
@@ -56,7 +61,7 @@
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a) private(T::a)
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a)
+// CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a) linear(k)
 // CHECK: #pragma omp target
 // CHECK-NEXT: #pragma omp teams distribute simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d)
 // CHECK: #pragma omp target
@@ -121,7 +126,7 @@
   for (int i=0; i < 2; ++i)
     a = 2;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute simd
+// CHECK-NEXT: #pragma omp teams distribute simd{{$}}
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp target
@@ -155,11 +160,11 @@
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute simd simdlen(clen-1) linear(d)
+#pragma omp teams distribute simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) linear(d)
+// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
@@ -218,11 +223,11 @@
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
-#pragma omp teams distribute simd simdlen(clen-1) linear(d)
+#pragma omp teams distribute simd simdlen(clen-1)
   for (int k = 0; k < 10; ++k)
     e += d + argc;
 // CHECK: #pragma omp target
-// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) linear(d)
+// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1)
 // CHECK-NEXT: for (int k = 0; k < 10; ++k)
 // CHECK-NEXT: e += d + argc;
 #pragma omp target
diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp
new file mode 100644
index 0000000..813d391
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -0,0 +1,283 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+int a[100];
+
+// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
+int teams_argument_global(int n) {
+  int i;
+  int te = n / 128;
+  int th = 128;
+  // discard n_addr and i
+  // CK1: alloca i32,
+  // CK1: alloca i32,
+  // CK1: [[TE:%.+]] = alloca i32,
+  // CK1: [[TH:%.+]] = alloca i32,
+  // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+  // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+  // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+
+  // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
+  #pragma omp target
+  #pragma omp teams distribute simd num_teams(te), thread_limit(th) aligned(a) simdlen(16) linear(i)
+  for(i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
+  #pragma omp target
+  {{{
+  #pragma omp teams distribute simd safelen(32)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  }}}
+
+  // outlined target regions
+  // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], [100 x i{{32|64}}]* {{.+}}, i{{32|64}} {{.+}}, {{.+}})
+  // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+  // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+  // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+  // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+  // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+  // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+  // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+  // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+  // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+  // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}})
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4(
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void
+
+  return a[0];
+}
+
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 16}
+// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 32}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+// CK2: define {{.*}}i32 @{{.+}}teams_local_argv(
+int teams_local_arg(void) {
+  int n = 100;
+  int a[n];
+
+  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+
+  // outlined target region
+  // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}})
+  // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK2: ret void
+
+  // CK2: define internal void @[[OUTL1]]({{.+}})
+  // CK2: call void @__kmpc_for_static_init_4(
+  // CK2: call void @__kmpc_for_static_fini(
+  // CK2: ret void  
+
+  return a[0];
+}
+// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
+    #pragma omp target
+    #pragma omp teams distribute simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+
+      // outlined target region
+  // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}})
+  // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+  // CK3: ret void
+
+  // CK3: define internal void @[[OUTL1]]({{.+}})
+  // CK3: call void @__kmpc_for_static_init_4(
+  // CK3: call void @__kmpc_for_static_fini(
+  // CK3: ret void  
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+// CK3: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK3
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
+
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s
+// SIMD-ONLY3-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK4
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+  int te = n/128;
+  int th = 128;
+#pragma omp target
+#pragma omp teams distribute simd num_teams(te) thread_limit(th)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+#pragma omp target
+#pragma omp teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4: call void @[[OFFL1:.+]]({{.+}})
+// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK4:  ret
+
+// CK4:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTL1]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+// CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4: call void @[[OFFLT:.+]]({{.+}})
+// CK4:  ret
+// CK4-NEXT: }
+
+// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
+// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
+// CK4: ret void
+
+// CK4: define internal void @[[OUTLT]]({{.+}})
+// CK4: call void @__kmpc_for_static_init_4(
+// CK4: call void @__kmpc_for_static_fini(
+// CK4: ret void
+
+// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK4
+#endif
+
diff --git a/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
new file mode 100644
index 0000000..41c494a
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
@@ -0,0 +1,147 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X][Y];
+
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+    
+    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute simd collapse(2)
+    for(int i = 0; i < X; i++) {
+      for(int j = 0; j < Y; j++) {
+	a[i][j] = (T)0;
+      }
+    }
+    // CK1: define internal void @[[OFFL1]](
+    // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+    // CK1: ret void
+    
+    // CK1: define internal void @[[OUTL1]]({{.+}})
+    // discard loop variables not needed here
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: = alloca i32,
+    // CK1: [[OMP_UB:%.+]] = alloca i32,
+    // CK1: store i32 56087, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+    // CK1: call void @__kmpc_for_static_fini(
+    // CK1: ret void  
+
+    return a[0][0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+
+// CK1: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+  T a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = (T)0;
+    }
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int m = 2;
+  int a[n][m];
+  #pragma omp target
+  #pragma omp teams distribute simd collapse(2)
+  for(int i = 0; i < n; i++) {
+    for(int j = 0; j < m; j++) {
+      a[i][j] = 0;
+    }
+  }
+  return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_simd_collapse_messages.cpp b/test/OpenMP/teams_distribute_simd_collapse_messages.cpp
index 0f62be2..d0b2f81 100644
--- a/test/OpenMP/teams_distribute_simd_collapse_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_collapse_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_default_messages.cpp b/test/OpenMP/teams_distribute_simd_default_messages.cpp
index a6685b9..af176f4 100644
--- a/test/OpenMP/teams_distribute_simd_default_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_default_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo();
 
 int main(int argc, char **argv) {
diff --git a/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
new file mode 100644
index 0000000..8937a44
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
@@ -0,0 +1,224 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a[X];
+  float b;
+  // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+  int foo(void) {
+
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL1:.+]](
+    #pragma omp target
+    #pragma omp teams distribute simd
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL2:.+]](
+    #pragma omp target
+    #pragma omp teams distribute simd dist_schedule(static)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call void @[[OFFL3:.+]](
+    #pragma omp target
+    #pragma omp teams distribute simd dist_schedule(static, X/2)
+    for(int i = 0; i < X; i++) {
+      a[i] = (T)0;
+    }
+  // CK1: define internal void @[[OFFL1]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL1]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void  
+
+  // CK1: define internal void @[[OFFL2]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL2]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void  
+
+  // CK1: define internal void @[[OFFL3]](
+  // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+  // CK1: ret void
+
+  // CK1: define internal void @[[OUTL3]]({{.+}})
+  // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+  // CK1: call void @__kmpc_for_static_fini(
+  // CK1: ret void  
+
+    return a[0];
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+// CK1: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+  T a[n];
+#pragma omp target
+#pragma omp teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+#pragma omp target
+#pragma omp teams distribute simd dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = (T)0;
+  }
+  return 0;
+}
+
+int main (int argc, char **argv) {
+  int n = 100;
+  int a[n];
+#pragma omp target
+#pragma omp teams distribute simd
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute simd dist_schedule(static)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+#pragma omp target
+#pragma omp teams distribute simd dist_schedule(static, n)
+  for(int i = 0; i < n; i++) {
+    a[i] = 0;
+  }
+  return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2:  define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2:  ret
+// CK2-NEXT: }
+
+// CK2:  define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2:  define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif // CK2
+#endif // #ifndef HEADER
diff --git a/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp b/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp
index 5274204..3eb0db9 100644
--- a/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_dist_schedule_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
new file mode 100644
index 0000000..4c65af8
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
@@ -0,0 +1,350 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute simd firstprivate(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute simd firstprivate(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+    // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+    // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+    // skip loop vars
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+    // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+    // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+    // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+    // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute simd firstprivate(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] =  bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp
index 324672c..5b074c4 100644
--- a/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -144,6 +146,7 @@
 #pragma omp teams distribute simd firstprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
 #pragma omp teams distribute simd lastprivate(argc), firstprivate(argc) // OK
   for (i = 0; i < argc; ++i) foo();
diff --git a/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
new file mode 100644
index 0000000..86cfd00
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
@@ -0,0 +1,387 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target| FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+  #pragma omp target
+  #pragma omp teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int svar;
+  volatile double g;
+  volatile double &g1 = g;
+
+  #ifdef LAMBDA
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    static float sfvar;
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+    // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+    // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+    #pragma omp target
+    #pragma omp teams distribute simd lastprivate(g, g1, svar, sfvar)
+    for (int i = 0; i < 2; ++i) {
+      // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+      // loop variables
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+      // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+      // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+      // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+      // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+      // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+      // init private variables
+      // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+      g = 1;
+      g1 = 1;
+      svar = 3;
+      sfvar = 4.0;
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+      // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+      // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+      // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+      // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+      // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+      // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+      // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+      // LAMBDA: store i32 2, i32* %
+      // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+      // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+      // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+      // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+      // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+      // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+      // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+      // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+
+      // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+      // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+      // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+      // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+      // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+      // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+      // LAMBDA: ret
+      [&]() {
+        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+        // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+        g = 2;
+        g1 = 2;
+        svar = 4;
+        sfvar = 8.0;
+        // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+        // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+        // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+        // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+        // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+        // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+        // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+      }();
+    }
+  }();
+  return 0;
+  #else
+  S<float> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<float> s_arr[] = {1, 2};
+  S<float> &var = test;
+
+  #pragma omp target
+  #pragma omp teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  int i;
+
+  return tmain<int>();
+  #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK:  store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp
index 8bdb380..0860591 100644
--- a/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
@@ -25,7 +27,7 @@
 const S2 ba[5];
 class S3 {
   int a;
-  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+  S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}}
 
 public:
   S3() : a(0) {}
@@ -52,7 +54,7 @@
 };
 class S6 {
   int a;
-  S6() : a(0) {}
+  S6() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S6(const S6 &s6) : a(s6.a) {}
@@ -255,12 +257,14 @@
 #pragma omp teams distribute simd lastprivate(j)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}}
 #pragma omp target
-#pragma omp teams distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+#pragma omp teams distribute simd firstprivate(m) lastprivate(m)
   for (i = 0; i < argc; ++i) foo();
 
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute simd lastprivate(n) firstprivate(n) // OK
+#pragma omp teams distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}}
   for (i = 0; i < argc; ++i) foo();
 
   static int si;
diff --git a/test/OpenMP/teams_distribute_simd_linear_messages.cpp b/test/OpenMP/teams_distribute_simd_linear_messages.cpp
index 6bfdb16..a3984b9 100644
--- a/test/OpenMP/teams_distribute_simd_linear_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_linear_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 namespace X {
   int x;
 };
@@ -18,34 +20,42 @@
 {
   int B = 0;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B:bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B:B::bfoo())
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(X::x : ::z)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B,::z, X::x)
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(::z)
   for (int i = 0; i < 10; ++i) ;
@@ -54,6 +64,7 @@
 #pragma omp teams distribute simd linear(B::bfoo()) // expected-error {{expected variable name}}
   for (int i = 0; i < 10; ++i) ;
 
+// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(B::ib,B:C1+C2)
   for (int i = 0; i < 10; ++i) ;
@@ -76,6 +87,7 @@
 
 template<int LEN> int test_warn() {
   int ind2 = 0;
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
   #pragma omp target
   #pragma omp teams distribute simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
   for (int i = 0; i < 100; i++) {
@@ -133,10 +145,12 @@
 #pragma omp teams distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
@@ -145,6 +159,7 @@
 #pragma omp teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc : 5)
   for (int k = 0; k < argc; ++k) ++k;
@@ -161,6 +176,7 @@
 #pragma omp teams distribute simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(e, g)
   for (int k = 0; k < argc; ++k) ++k;
@@ -169,32 +185,11 @@
 #pragma omp teams distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int v = 0;
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute simd linear(v:i)
-    for (int k = 0; k < argc; ++k) { i = k; v += i; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-  int v = 0;
-
-#pragma omp target
-#pragma omp teams distribute simd linear(v:j)
-  for (int k = 0; k < argc; ++k) { ++k; v += j; }
-
-#pragma omp target
-#pragma omp teams distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
   return 0;
 }
 
@@ -230,10 +225,12 @@
 #pragma omp teams distribute simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
@@ -242,6 +239,7 @@
 #pragma omp teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
 
+// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}}
 #pragma omp target
 #pragma omp teams distribute simd linear (argc)
   for (int k = 0; k < argc; ++k) ++k;
@@ -267,26 +265,6 @@
 #pragma omp teams distribute simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
 
-  #pragma omp parallel
-  {
-    int i;
-    #pragma omp target
-    #pragma omp teams distribute simd linear(i)
-      for (int k = 0; k < argc; ++k) ++k;
-
-    #pragma omp target
-    #pragma omp teams distribute simd linear(i : 4)
-      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
-  }
-
-#pragma omp target
-#pragma omp teams distribute simd linear(j)
-  for (int k = 0; k < argc; ++k) ++k;
-
-#pragma omp target
-#pragma omp teams distribute simd linear(i)
-  for (int k = 0; k < argc; ++k) ++k;
-
   foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
diff --git a/test/OpenMP/teams_distribute_simd_loop_messages.cpp b/test/OpenMP/teams_distribute_simd_loop_messages.cpp
index 81fbfe8..8721dc5 100644
--- a/test/OpenMP/teams_distribute_simd_loop_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_loop_messages.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
 class S {
   int a;
-  S() : a(0) {}
+  S() : a(0) {} // expected-note {{implicitly declared private here}}
 
 public:
   S(int v) : a(v) {}
@@ -693,8 +695,9 @@
 
 void test_loop_firstprivate_lastprivate() {
   S s(4);
+// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}}
 #pragma omp target
-#pragma omp teams distribute simd lastprivate(s) firstprivate(s)
+#pragma omp teams distribute simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} expected-warning {{Non-trivial type 'S' is mapped, only trivial types are guaranteed to be mapped correctly}}
   for (int i = 0; i < 16; ++i)
     ;
 }
diff --git a/test/OpenMP/teams_distribute_simd_messages.cpp b/test/OpenMP/teams_distribute_simd_messages.cpp
index 9095caf..7912870 100644
--- a/test/OpenMP/teams_distribute_simd_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
@@ -9,6 +11,9 @@
 #pragma omp teams distribute simd // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute simd'}}
 
 int main(int argc, char **argv) {
+  #pragma omp target
+  #pragma omp teams distribute simd
+  f; // expected-error {{use of undeclared identifier 'f'}}
 #pragma omp target
 #pragma omp teams distribute simd { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute simd' are ignored}}
   for (int i = 0; i < argc; ++i)
diff --git a/test/OpenMP/teams_distribute_simd_num_teams_messages.cpp b/test/OpenMP/teams_distribute_simd_num_teams_messages.cpp
index e65025a..8616464 100644
--- a/test/OpenMP/teams_distribute_simd_num_teams_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/test/OpenMP/teams_distribute_simd_private_codegen.cpp
new file mode 100644
index 0000000..6ed092c
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_private_codegen.cpp
@@ -0,0 +1,251 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &st) : a(st.a + st.b), b(0) {}
+  ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a + g) {}
+  S() : f(g) {}
+  S(const S &s, St t = St()) : f(s.f + t.a) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> &var = test;
+#pragma omp target
+#pragma omp teams distribute simd private(t_var, vec, s_arr, var)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+  }
+  return T();
+}
+
+// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute simd private(g, g1, sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}})
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+    // Skip global, bound tid and loop vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: alloca i32,
+    // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+    // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+    
+    g = 1;
+    g1 = 1;
+    sivar = 2;
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+    // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+    // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      sivar = 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute simd private(t_var, vec, s_arr, var, sivar)
+  for (int i = 0; i < 2; ++i) {
+    vec[i] = t_var;
+    s_arr[i] = var;
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]]()
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call void @[[TOFFL1:.+]]()
+// CHECK:  ret
+
+// CHECK: define {{.*}}void @[[TOFFL1]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/teams_distribute_simd_private_messages.cpp b/test/OpenMP/teams_distribute_simd_private_messages.cpp
index 9044213..94fb909 100644
--- a/test/OpenMP/teams_distribute_simd_private_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
new file mode 100644
index 0000000..10901de
--- /dev/null
+++ b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
@@ -0,0 +1,231 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+  T t_var = T();
+  T vec[] = {1, 2};
+#pragma omp target
+#pragma omp teams distribute simd reduction(+: t_var)
+  for (int i = 0; i < 2; ++i) {
+    t_var += (T) i;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call void @[[LOFFL1:.+]](
+    // LAMBDA:  ret
+#pragma omp target
+#pragma omp teams distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+    // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+    // LAMBDA: ret void
+
+    // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+    // Skip global and bound tid vars
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: {{.+}} = alloca i32*,
+    // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+    // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+    // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+    // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+    // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+    
+    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+    // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+    // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+    // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+    // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+    // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+    // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+    // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+    // LAMBDA: ]
+    // LAMBDA: [[CASE1]]:
+    // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+    // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    // LAMBDA: [[CASE2]]:
+    // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+    // LAMBDA: br
+    
+    sivar += i;
+
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+      sivar += 4;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+      // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+      // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+    }();
+  }
+  }();
+  return 0;
+#else
+#pragma omp target
+#pragma omp teams distribute simd reduction(+: sivar)
+  for (int i = 0; i < 2; ++i) {
+    sivar += i;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK:  ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
+// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]])
+// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+
+// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
+#endif
diff --git a/test/OpenMP/teams_distribute_simd_reduction_messages.cpp b/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
index 09d22bd..49b45b7 100644
--- a/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_simd_safelen_messages.cpp b/test/OpenMP/teams_distribute_simd_safelen_messages.cpp
index a323c08..7be581a 100644
--- a/test/OpenMP/teams_distribute_simd_safelen_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_safelen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_shared_messages.cpp b/test/OpenMP/teams_distribute_simd_shared_messages.cpp
index 6b06f5b..b29e2bb 100644
--- a/test/OpenMP/teams_distribute_simd_shared_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_shared_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp b/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp
index a323c08..7be581a 100644
--- a/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_simdlen_messages.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_simd_thread_limit_messages.cpp b/test/OpenMP/teams_distribute_simd_thread_limit_messages.cpp
index b9c8ac3..732bb34 100644
--- a/test/OpenMP/teams_distribute_simd_thread_limit_messages.cpp
+++ b/test/OpenMP/teams_distribute_simd_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_distribute_thread_limit_messages.cpp b/test/OpenMP/teams_distribute_thread_limit_messages.cpp
index ec4ca7a..d22cc25 100644
--- a/test/OpenMP/teams_distribute_thread_limit_messages.cpp
+++ b/test/OpenMP/teams_distribute_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_firstprivate_codegen.cpp b/test/OpenMP/teams_firstprivate_codegen.cpp
index 3248bfe..a2b4144 100644
--- a/test/OpenMP/teams_firstprivate_codegen.cpp
+++ b/test/OpenMP/teams_firstprivate_codegen.cpp
@@ -1,24 +1,48 @@
 // Test host codegen.
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
-// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
-// RUN: %clang_cc1 -DARRAY  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-64
-// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-64
-// RUN: %clang_cc1 -DARRAY  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-32
-// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-32
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -DARRAY  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-64
+// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-64
+// RUN: %clang_cc1 -DARRAY  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-32
+// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DARRAY  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck %s --check-prefix ARRAY --check-prefix ARRAY-32
+
+// RUN: %clang_cc1 -DARRAY  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DARRAY  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DARRAY  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DARRAY  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY2 %s
+// RUN: %clang_cc1 -DARRAY  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -Wno-openmp-target
+// RUN: %clang_cc1 -DARRAY  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - -Wno-openmp-target | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -148,7 +172,7 @@
 // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32*
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -200,7 +224,7 @@
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], align 128
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}} 128,
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}},
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -260,7 +284,7 @@
 // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
 // ARRAY: call i8* @llvm.stacksave()
 // ARRAY: [[SIZE:%.+]] = mul nuw i{{[0-9]+}} %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* %{{.+}}, i8* %{{.+}}, i{{[0-9]+}} [[SIZE]], i32 128, i1 false)
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i{{[0-9]+}} [[SIZE]], i1 false)
   #pragma omp target
   #pragma omp teams firstprivate(a, s, vla1, vla2)
   s[0].St_func(s, n, vla1);
@@ -278,6 +302,6 @@
 // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
 // ARRAY: call i8* @llvm.stacksave()
 // ARRAY: [[SIZE:%.+]] = mul nuw i{{[0-9]+}} %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* %{{.+}}, i8* %{{.+}}, i{{[0-9]+}} [[SIZE]], i32 128, i1 false)
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i{{[0-9]+}} [[SIZE]], i1 false)
 #endif
 #endif
diff --git a/test/OpenMP/teams_firstprivate_messages.cpp b/test/OpenMP/teams_firstprivate_messages.cpp
index ba12fde..d29a2e3 100644
--- a/test/OpenMP/teams_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_firstprivate_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_messages.cpp b/test/OpenMP/teams_messages.cpp
index 3a42a0e..6ed3be9 100644
--- a/test/OpenMP/teams_messages.cpp
+++ b/test/OpenMP/teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s
+
 void foo() {
 }
 
@@ -7,6 +9,9 @@
 
 int main(int argc, char **argv) {
   #pragma omp target
+  #pragma omp teams
+  f; // expected-error {{use of undeclared identifier 'f'}}
+  #pragma omp target
   #pragma omp teams { // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}}
   foo();
   #pragma omp target
diff --git a/test/OpenMP/teams_num_teams_messages.cpp b/test/OpenMP/teams_num_teams_messages.cpp
index 0187112..47b2b4e 100644
--- a/test/OpenMP/teams_num_teams_messages.cpp
+++ b/test/OpenMP/teams_num_teams_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_private_codegen.cpp b/test/OpenMP/teams_private_codegen.cpp
index 1ba010f..0df111c 100644
--- a/test/OpenMP/teams_private_codegen.cpp
+++ b/test/OpenMP/teams_private_codegen.cpp
@@ -5,12 +5,28 @@
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
 
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1  -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -98,7 +114,7 @@
 
   // lambda and target region in main
   // LAMBDA: define {{.+}} [[OUTER_LAMBDA]]([[CAP_TY]]* {{.+}})
-  // LAMBDA: call void @[[OMP_OFFLOADING:.+]](i{{[0-9]+}}* {{.+}}, i{{[0-9]+}} {{.+}}
+  // LAMBDA: call void @[[OMP_OFFLOADING:.+]]()
 
   // target region in struct constructor
   // LAMBDA: define{{.*}} void [[ST_CONSTR:@.+]]([[SS_TY]]* %this,
@@ -154,7 +170,7 @@
 #pragma omp target
 #pragma omp teams private(g, sivar)
   {
-    // LAMBDA: define{{.+}} @[[OMP_OFFLOADING]](i{{[0-9]+}}* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[SIVAR_IN:%.+]]
+    // LAMBDA: define{{.+}} @[[OMP_OFFLOADING]]()
     // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void
     
     // LAMBDA: define {{.+}} [[OMP_OUTLINED_1]](i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}* {{.+}}
@@ -196,14 +212,13 @@
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: [[OFF_IN:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* {{%.+}},
-// CHECK: call void @[[OMP_OFFLOADING:.+]](i{{[0-9]+}} [[OFF_IN]]
+// CHECK: call void @[[OMP_OFFLOADING:.+]]()
 // CHECK: = call{{.*}} i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 
 // target region in main function
-// CHECK: define{{.+}} @[[OMP_OFFLOADING]](i{{[0-9]+}}
+// CHECK: define{{.+}} @[[OMP_OFFLOADING]]()
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void
 
 // CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
diff --git a/test/OpenMP/teams_private_messages.cpp b/test/OpenMP/teams_private_messages.cpp
index 344ef8d..30c7a0a 100644
--- a/test/OpenMP/teams_private_messages.cpp
+++ b/test/OpenMP/teams_private_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/teams_reduction_messages.cpp b/test/OpenMP/teams_reduction_messages.cpp
index 7aef977..94a4743 100644
--- a/test/OpenMP/teams_reduction_messages.cpp
+++ b/test/OpenMP/teams_reduction_messages.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -o - %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s
-// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -o - %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s -Wno-openmp-target
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_shared_messages.cpp b/test/OpenMP/teams_shared_messages.cpp
index 6b013d0..a83b82f 100644
--- a/test/OpenMP/teams_shared_messages.cpp
+++ b/test/OpenMP/teams_shared_messages.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-target
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-target
 
 void foo() {
 }
diff --git a/test/OpenMP/teams_thread_limit_messages.cpp b/test/OpenMP/teams_thread_limit_messages.cpp
index 1cb147c..0c5a22c 100644
--- a/test/OpenMP/teams_thread_limit_messages.cpp
+++ b/test/OpenMP/teams_thread_limit_messages.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 100 -o - %s
+
 void foo() {
 }
 
diff --git a/test/OpenMP/threadprivate_ast_print.cpp b/test/OpenMP/threadprivate_ast_print.cpp
index 5502c62..a148203 100644
--- a/test/OpenMP/threadprivate_ast_print.cpp
+++ b/test/OpenMP/threadprivate_ast_print.cpp
@@ -4,6 +4,13 @@
 // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -18,7 +25,7 @@
  static int b;
 // CHECK: static int b;
 #pragma omp threadprivate(b)
-// CHECK-NEXT: #pragma omp threadprivate(St1::b)
+// CHECK-NEXT: #pragma omp threadprivate(St1::b){{$}}
 } d;
 
 int a, b;
diff --git a/test/OpenMP/threadprivate_codegen.cpp b/test/OpenMP/threadprivate_codegen.cpp
index 3785b70..1f40311 100644
--- a/test/OpenMP/threadprivate_codegen.cpp
+++ b/test/OpenMP/threadprivate_codegen.cpp
@@ -2,10 +2,20 @@
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
 // RUN: %clang_cc1 -verify -fopenmp -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK-TLS
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-TLS %s
 
+// RUN: %clang_cc1 -verify -fopenmp-simd -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
 // expected-no-diagnostics
 //
 #ifndef HEADER
@@ -144,26 +154,26 @@
 // CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23
 // CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01
 // CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer
-// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;192;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;247;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;334;19;;\00"
-// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;371;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;388;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;410;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;433;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;469;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;498;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;518;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;541;27;;\00"
-// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;564;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;684;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;707;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;743;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;772;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;792;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;815;27;;\00"
-// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;838;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;306;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;201;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;256;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;343;19;;\00"
+// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;380;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;397;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;419;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;442;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;478;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;507;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;527;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;550;27;;\00"
+// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;573;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;693;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;716;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;752;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;781;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;801;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;824;27;;\00"
+// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;847;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;315;9;;\00"
 // CHECK-TLS-DAG:  [[GS1:@.+]] = internal thread_local global [[S1]] zeroinitializer
 // CHECK-TLS-DAG:  [[GS2:@.+]] = internal global [[S2]] zeroinitializer
 // CHECK-TLS-DAG:  [[ARR_X:@.+]] = thread_local global [2 x [3 x [[S1]]]] zeroinitializer
@@ -180,7 +190,6 @@
 // CHECK-TLS-DAG:  [[GS1_TLS_INIT:@_ZTHL3gs1]] = internal alias void (), void ()* @__tls_init
 // CHECK-TLS-DAG:  [[ARR_X_TLS_INIT:@_ZTH5arr_x]] = alias void (), void ()* @__tls_init
 
-
 // CHECK-TLS-DAG:  [[ST_S4_ST_TLS_INIT:@_ZTHN2STI2S4E2stE]] = linkonce_odr alias void (), void ()* [[ST_S4_ST_CXX_INIT:@[^, ]*]]
 
 struct Static {
diff --git a/test/OpenMP/threadprivate_messages.cpp b/test/OpenMP/threadprivate_messages.cpp
index 9775bfa..62ddfd1 100644
--- a/test/OpenMP/threadprivate_messages.cpp
+++ b/test/OpenMP/threadprivate_messages.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -fnoopenmp-use-tls -ferror-limit 100 -emit-llvm -o - %s
 // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -emit-llvm -o - %s
 
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -fnoopenmp-use-tls -ferror-limit 100 -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp-simd -ferror-limit 100 -emit-llvm -o - %s
+
 #pragma omp threadprivate // expected-error {{expected '(' after 'threadprivate'}}
 #pragma omp threadprivate( // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-note {{to match this '('}}
 #pragma omp threadprivate() // expected-error {{expected identifier}}
diff --git a/test/OpenMP/varargs.cpp b/test/OpenMP/varargs.cpp
index 7738f83..4730c10 100644
--- a/test/OpenMP/varargs.cpp
+++ b/test/OpenMP/varargs.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 void f(int a, ...) {
 #pragma omp parallel for
   for (int i = 0; i < 100; ++i) {
diff --git a/test/OpenMP/vla_crash.c b/test/OpenMP/vla_crash.c
index 50dcf06..00cada4 100644
--- a/test/OpenMP/vla_crash.c
+++ b/test/OpenMP/vla_crash.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux-gnu -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
-// expected-no-diagnostics
+
+// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux-gnu -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
 int a;
 
@@ -20,3 +22,24 @@
 
 // CHECK: define internal void [[OUTLINED]](i32* {{[^,]+}}, i32* {{[^,]+}}, i64 {{[^,]+}}, i32** {{[^,]+}}, i64 {{[^,]+}}, i32**** {{[^,]+}})
 
+// CHECK-LABEL: bar
+void bar(int n, int *a) {
+  // CHECK: [[N:%.+]] = alloca i32,
+  // CHECK: [[A:%.+]] = alloca i32*,
+  // CHECK: [[P:%.+]] = alloca i32*,
+  // CHECK: @__kmpc_global_thread_num
+  // CHECK: [[BC:%.+]] = bitcast i32** [[A]] to i32*
+  // CHECK: store i32* [[BC]], i32** [[P]],
+  // CHECK: call void @__kmpc_serialized_parallel
+  // CHECK: call void [[OUTLINED:@[^(]+]](i32* %{{[^,]+}}, i32* %{{[^,]+}}, i64 %{{[^,]+}}, i32** [[P]], i32** [[A]])
+  // CHECK: call void @__kmpc_end_serialized_parallel
+  // CHECK: ret void
+  // expected-warning@+1 {{incompatible pointer types initializing 'int (*)[n]' with an expression of type 'int **'}}
+  int(*p)[n] = &a;
+#pragma omp parallel if(0)
+  // expected-warning@+1 {{comparison of distinct pointer types ('int (*)[n]' and 'int **')}}
+  if (p == &a) {
+  }
+}
+
+// CHECK: define internal void [[OUTLINED]](i32* {{[^,]+}}, i32* {{[^,]+}}, i64 {{[^,]+}}, i32** {{[^,]+}}, i32** {{[^,]+}})
diff --git a/test/PCH/cxx-required-decls.cpp b/test/PCH/cxx-required-decls.cpp
index c2f8e2f..132af6c 100644
--- a/test/PCH/cxx-required-decls.cpp
+++ b/test/PCH/cxx-required-decls.cpp
@@ -7,4 +7,4 @@
 
 // CHECK: @_ZL5globS = internal global %struct.S zeroinitializer
 // CHECK: @_ZL3bar = internal global i32 0, align 4
-// CHECK: @glob_var = global i32 0
+// CHECK: @glob_var = {{(dso_local )?}}global i32 0
diff --git a/test/PCH/dllexport-default-arg-closure.cpp b/test/PCH/dllexport-default-arg-closure.cpp
index 892f1eb..a8da0c0 100644
--- a/test/PCH/dllexport-default-arg-closure.cpp
+++ b/test/PCH/dllexport-default-arg-closure.cpp
@@ -17,7 +17,7 @@
 
 // Demangles as:
 // void Foo::`default constructor closure'(void)
-// CHECK: define weak_odr dllexport void @"\01??_FFoo@@QEAAXXZ"(%struct.Foo*{{.*}})
+// CHECK: define weak_odr dso_local dllexport void @"\01??_FFoo@@QEAAXXZ"(%struct.Foo*{{.*}})
 // CHECK:   call %struct.Foo* @"\01??0Foo@@QEAA@W4E@0@@Z"(%struct.Foo* {{.*}}, i32 0)
 
 #else
diff --git a/test/PCH/line-directive-nofilename.c b/test/PCH/line-directive-nofilename.c
new file mode 100644
index 0000000..0d2bcb7
--- /dev/null
+++ b/test/PCH/line-directive-nofilename.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -emit-pch -o %t %S/line-directive-nofilename.h
+// RUN: not %clang_cc1 -include-pch %t -fsyntax-only %s 2>&1 | FileCheck %s
+
+// This causes an "error: redefinition" diagnostic. The notes will have the
+// locations of the declarations from the PCH file.
+double foo, bar;
+
+// CHECK: line-directive-nofilename.h:42:5: note: previous definition is here
+// CHECK: foobar.h:100:5: note: previous definition is here
diff --git a/test/PCH/line-directive-nofilename.h b/test/PCH/line-directive-nofilename.h
new file mode 100644
index 0000000..2e2d189
--- /dev/null
+++ b/test/PCH/line-directive-nofilename.h
@@ -0,0 +1,5 @@
+#line 42
+int foo; // This should appear as at line-directive-nofilename.h:42
+
+#line 100 "foobar.h"
+int bar; // This should appear as at foobar.h:100
diff --git a/test/PCH/modified-header-crash.c b/test/PCH/modified-header-crash.c
index 4c21a8c..39b2530 100644
--- a/test/PCH/modified-header-crash.c
+++ b/test/PCH/modified-header-crash.c
@@ -1,6 +1,6 @@
 // Don't crash.
 
-// RUN: cp %S/modified-header-crash.h %t.h
+// RUN: cat %S/modified-header-crash.h > %t.h
 // RUN: %clang_cc1 -DCAKE -x c-header %t.h -emit-pch -o %t
 // RUN: echo 'int foobar;' >> %t.h
 // RUN: not %clang_cc1 %s -include-pch %t -fsyntax-only
diff --git a/test/PCH/pragma-loop.cpp b/test/PCH/pragma-loop.cpp
index 5975816..7f443dd 100644
--- a/test/PCH/pragma-loop.cpp
+++ b/test/PCH/pragma-loop.cpp
@@ -4,7 +4,7 @@
 // FIXME: A bug in ParsedAttributes causes the order of the attributes to be
 // reversed. The checks are consequently in the reverse order below.
 
-// CHECK: #pragma clang loop unroll_count(16)
+// CHECK: #pragma clang loop unroll_count(16){{$}}
 // CHECK: #pragma clang loop interleave_count(8)
 // CHECK: #pragma clang loop vectorize_width(4)
 // CHECK: #pragma clang loop distribute(enable)
@@ -15,9 +15,10 @@
 // CHECK: #pragma clang loop unroll(full)
 // CHECK: #pragma clang loop interleave(enable)
 // CHECK: #pragma clang loop vectorize(disable)
-// CHECK: #pragma unroll
-// CHECK: #pragma unroll (32)
-// CHECK: #pragma nounroll
+// FIXME: "#pragma unroll (enable)" is invalid and is not the input source.
+// CHECK: #pragma unroll (enable){{$}}
+// CHECK: #pragma unroll (32){{$}}
+// CHECK: #pragma nounroll{{$}}
 // CHECK: #pragma clang loop interleave_count(I)
 // CHECK: #pragma clang loop vectorize_width(V)
 
diff --git a/test/PCH/pragma-weak.c b/test/PCH/pragma-weak.c
index 1a8724c..4a48197 100644
--- a/test/PCH/pragma-weak.c
+++ b/test/PCH/pragma-weak.c
@@ -5,6 +5,6 @@
 // RUN: %clang_cc1 -x c-header -emit-pch -o %t %S/pragma-weak.h
 // RUN: not %clang_cc1 -include-pch %t %s -verify -emit-llvm -o - | FileCheck %s
 
-// CHECK: @weakvar = weak global i32 0
+// CHECK: @weakvar = weak {{(dso_local )?}}global i32 0
 int weakvar;
 // expected-warning {{weak identifier 'undeclaredvar' never declared}}
diff --git a/test/PCH/uses-seh.cpp b/test/PCH/uses-seh.cpp
index 6fbfc97..50e2053 100644
--- a/test/PCH/uses-seh.cpp
+++ b/test/PCH/uses-seh.cpp
@@ -19,7 +19,7 @@
 }
 int x = f();
 
-// CHECK: define linkonce_odr i32 @"\01?f@@YAHXZ"()
+// CHECK: define linkonce_odr dso_local i32 @"\01?f@@YAHXZ"()
 // CHECK: define internal i32 @"\01?filt$0@0@f@@"({{.*}})
 
 #else
diff --git a/test/PCH/verify_pch.m b/test/PCH/verify_pch.m
index e905f25..d50e234 100644
--- a/test/PCH/verify_pch.m
+++ b/test/PCH/verify_pch.m
@@ -2,7 +2,7 @@
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/usr/include
 // RUN: echo '// empty' > %t/usr/include/sys_header.h
-// RUN: cp %s %t.h
+// RUN: cat %s > %t.h
 //
 // Precompile
 // RUN: %clang_cc1 -isystem %t/usr/include -x objective-c-header -emit-pch -o %t.pch %t.h
diff --git a/test/Parser/MicrosoftExtensions.cpp b/test/Parser/MicrosoftExtensions.cpp
index c796ede..21635f0 100644
--- a/test/Parser/MicrosoftExtensions.cpp
+++ b/test/Parser/MicrosoftExtensions.cpp
@@ -51,7 +51,7 @@
 struct __declspec(uuid("000000000000-0000-1234-000000000047")) uuid_attr_bad5 { };// expected-error {{uuid attribute contains a malformed GUID}}
 [uuid("000000000000-0000-1234-000000000047")] struct uuid_attr_bad6 { };// expected-error {{uuid attribute contains a malformed GUID}}
 
-__declspec(uuid("000000A0-0000-0000-C000-000000000046")) int i; // expected-warning {{'uuid' attribute only applies to classes}}
+__declspec(uuid("000000A0-0000-0000-C000-000000000046")) int i; // expected-warning {{'uuid' attribute only applies to structs, unions, classes, and enums}}
 
 struct __declspec(uuid("000000A0-0000-0000-C000-000000000046"))
 struct_with_uuid { };
@@ -69,7 +69,7 @@
 enum_with_uuid { };
 enum enum_without_uuid { };
 
-int __declspec(uuid("000000A0-0000-0000-C000-000000000046")) inappropriate_uuid; // expected-warning {{'uuid' attribute only applies to classes and enumerations}}
+int __declspec(uuid("000000A0-0000-0000-C000-000000000046")) inappropriate_uuid; // expected-warning {{'uuid' attribute only applies to}}
 
 int uuid_sema_test()
 {
diff --git a/test/Parser/c2x-attributes.c b/test/Parser/c2x-attributes.c
index 1be69f1..f261dee 100644
--- a/test/Parser/c2x-attributes.c
+++ b/test/Parser/c2x-attributes.c
@@ -7,7 +7,7 @@
 };
 
 enum [[]] { Four };
-[[]] enum E2 { Five }; // expected-error {{an attribute list cannot appear here}}
+[[]] enum E2 { Five }; // expected-error {{misplaced attributes}}
 
 // FIXME: this diagnostic can be improved.
 enum { [[]] Six }; // expected-error {{expected identifier}}
@@ -24,7 +24,7 @@
   int o [[]] : 12;
 };
 
-[[]] struct S2 { int a; }; // expected-error {{an attribute list cannot appear here}}
+[[]] struct S2 { int a; }; // expected-error {{misplaced attributes}}
 struct S3 [[]] { int a; }; // expected-error {{an attribute list cannot appear here}}
 
 union [[]] U {
@@ -32,7 +32,7 @@
   [[]] int i;
 };
 
-[[]] union U2 { double d; }; // expected-error {{an attribute list cannot appear here}}
+[[]] union U2 { double d; }; // expected-error {{misplaced attributes}}
 union U3 [[]] { double d; }; // expected-error {{an attribute list cannot appear here}}
 
 struct [[]] IncompleteStruct;
diff --git a/test/Parser/cxx-ambig-decl-expr.cpp b/test/Parser/cxx-ambig-decl-expr.cpp
index feb185f..7bf08b0 100644
--- a/test/Parser/cxx-ambig-decl-expr.cpp
+++ b/test/Parser/cxx-ambig-decl-expr.cpp
@@ -11,3 +11,7 @@
   unknown *p = 0; // expected-error {{unknown type name 'unknown'}}
   unknown * p + 0; // expected-error {{undeclared identifier 'unknown'}}
 }
+
+auto (*p)() -> int(nullptr);
+auto (*q)() -> int(*)(unknown); // expected-error {{unknown type name 'unknown'}}
+auto (*r)() -> int(*)(unknown + 1); // expected-error {{undeclared identifier 'unknown'}}
diff --git a/test/Parser/cxx-concept-declaration.cpp b/test/Parser/cxx-concept-declaration.cpp
index fad5975..2e9d1ac 100644
--- a/test/Parser/cxx-concept-declaration.cpp
+++ b/test/Parser/cxx-concept-declaration.cpp
@@ -1,28 +1,7 @@
 
-// Support parsing of function concepts and variable concepts
+// Support parsing of concepts
+// Disabled for now.
+// expected-no-diagnostics
 
 // RUN:  %clang_cc1 -std=c++14 -fconcepts-ts -x c++ -verify %s
-
-template<typename T> concept bool C1 = true;
-
-template<typename T> concept bool C2() { return true; }
-
-template<typename T>
-struct A { typedef bool Boolean; };
-
-template<int N>
-A<void>::Boolean concept C3(!0);
-
-template<typename T, int = 0>
-concept auto C4(void) -> bool { return true; }
-
-constexpr int One = 1;
-
-template <typename>
-static concept decltype(!0) C5 { bool(One) };
-
-template<typename T> concept concept bool C6 = true; // expected-warning {{duplicate 'concept' declaration specifier}}
-
-template<typename T> concept concept bool C7() { return true; } // expected-warning {{duplicate 'concept' declaration specifier}}
-
-template<concept T> concept bool D1 = true; // expected-error {{unknown type name 'T'}}
+// template<typename T> concept C1 = true;
diff --git a/test/Parser/cxx-decl.cpp b/test/Parser/cxx-decl.cpp
index 8a7a388..1a24520 100644
--- a/test/Parser/cxx-decl.cpp
+++ b/test/Parser/cxx-decl.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -pedantic-errors -fcxx-exceptions -fexceptions %s
-// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -pedantic-errors -fcxx-exceptions -fexceptions -std=c++98 %s
-// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -pedantic-errors -fcxx-exceptions -fexceptions -std=c++11 %s
+// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -Wredundant-parens -pedantic-errors -fcxx-exceptions -fexceptions %s
+// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -Wredundant-parens -pedantic-errors -fcxx-exceptions -fexceptions -std=c++98 %s
+// RUN: %clang_cc1 -verify -fsyntax-only -triple i386-linux -Wredundant-parens -pedantic-errors -fcxx-exceptions -fexceptions -std=c++11 %s
 
 const char const *x10; // expected-error {{duplicate 'const' declaration specifier}}
 
@@ -83,7 +83,7 @@
 
   int global1,
   __attribute__(()) global2,
-  (global5),
+  (global5), // expected-warning {{redundant parentheses surrounding declarator}}
   *global6,
   &global7 = global1,
   &&global8 = static_cast<int&&>(global1),
@@ -199,7 +199,7 @@
 // expected-error@-2 {{expected expression}}
 // expected-error@-3 {{expected unqualified-id}}
 #else
-// expected-error@-5 {{an attribute list cannot appear here}}
+// expected-error@-5 {{misplaced attributes}}
 #endif
 
 namespace test7 {
@@ -263,6 +263,41 @@
   };
 }
 
+namespace NNS {
+  struct A {};
+  namespace B { extern A C1, C2, *C3, C4[], C5; }
+  // Do not produce a redundant parentheses warning here; removing these parens
+  // changes the meaning of the program.
+  A (::NNS::B::C1);
+  A (NNS::B::C2); // expected-warning {{redundant parentheses surrounding declarator}}
+  A (*::NNS::B::C3); // expected-warning {{redundant parentheses surrounding declarator}}
+  A (::NNS::B::C4[2]);
+  // Removing one of these sets of parentheses would be reasonable.
+  A ((::NNS::B::C5)); // expected-warning {{redundant parentheses surrounding declarator}}
+
+  void f() {
+    // FIXME: A vexing-parse warning here would be useful.
+    A(::NNS::B::C1); // expected-error {{definition or redeclaration}}
+    A(NNS::B::C1); // expected-warning {{redundant paren}} expected-error {{definition or redeclaration}}
+  }
+}
+
+inline namespace ParensAroundFriend { // expected-error 0-1{{C++11}}
+  struct A {};
+  struct B {
+    static A C();
+  };
+  namespace X {
+    struct B {};
+    struct D {
+      // No warning here: while this could be written as
+      //   friend (::B::C)();
+      // we do need parentheses *somewhere* here.
+      friend A (::B::C());
+    };
+  }
+}
+
 // PR8380
 extern ""      // expected-error {{unknown linkage language}}
 test6a { ;// expected-error {{C++ requires a type specifier for all declarations}}
diff --git a/test/Parser/cxx0x-attributes.cpp b/test/Parser/cxx0x-attributes.cpp
index a803085..e01491d 100644
--- a/test/Parser/cxx0x-attributes.cpp
+++ b/test/Parser/cxx0x-attributes.cpp
@@ -64,6 +64,13 @@
 struct [[]] struct_attr;
 class [[]] class_attr {};
 union [[]] union_attr;
+enum [[]] E { };
+namespace test_misplacement {
+[[]] struct struct_attr2;  //expected-error{{misplaced attributes}}
+[[]] class class_attr2; //expected-error{{misplaced attributes}}
+[[]] union union_attr2; //expected-error{{misplaced attributes}}
+[[]] enum  E2 { }; //expected-error{{misplaced attributes}}
+}
 
 // Checks attributes placed at wrong syntactic locations of class specifiers.
 class [[]] [[]]
@@ -91,7 +98,7 @@
 class D final alignas ([l) {}]{}); // expected-error {{expected ',' or ']' in lambda capture list}} expected-error {{an attribute list cannot appear here}}
 
 [[]] struct with_init_declarators {} init_declarator;
-[[]] struct no_init_declarators; // expected-error {{an attribute list cannot appear here}}
+[[]] struct no_init_declarators; // expected-error {{misplaced attributes}}
 template<typename> [[]] struct no_init_declarators_template; // expected-error {{an attribute list cannot appear here}}
 void fn_with_structs() {
   [[]] struct with_init_declarators {} init_declarator;
@@ -307,7 +314,7 @@
 
 [[attribute_declaration]]; // expected-warning {{unknown attribute 'attribute_declaration' ignored}}
 [[noreturn]]; // expected-error {{'noreturn' attribute only applies to functions}}
-[[carries_dependency]]; // expected-error {{'carries_dependency' attribute only applies to functions, methods, and parameters}}
+[[carries_dependency]]; // expected-error {{'carries_dependency' attribute only applies to parameters, Objective-C methods, and functions}}
 
 class A {
   A([[gnu::unused]] int a);
diff --git a/test/Parser/cxx0x-decl.cpp b/test/Parser/cxx0x-decl.cpp
index 2b253c0..1eaf84a 100644
--- a/test/Parser/cxx0x-decl.cpp
+++ b/test/Parser/cxx0x-decl.cpp
@@ -79,7 +79,7 @@
 namespace PR5066 {
   using T = int (*f)(); // expected-error {{type-id cannot have a name}}
   template<typename T> using U = int (*f)(); // expected-error {{type-id cannot have a name}}
-  auto f() -> int (*f)(); // expected-error {{type-id cannot have a name}}
+  auto f() -> int (*f)(); // expected-error {{only variables can be initialized}} expected-error {{expected ';'}}
   auto g = []() -> int (*f)() {}; // expected-error {{type-id cannot have a name}}
 }
 
diff --git a/test/Parser/cxx1z-class-template-argument-deduction.cpp b/test/Parser/cxx1z-class-template-argument-deduction.cpp
index b8f5a82..bf1d004 100644
--- a/test/Parser/cxx1z-class-template-argument-deduction.cpp
+++ b/test/Parser/cxx1z-class-template-argument-deduction.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -verify %s
 
-template <typename T> struct A { // expected-note 35{{declared here}}
+template <typename T> struct A { // expected-note 38{{declared here}}
   constexpr A() {}
   constexpr A(int) {}
   constexpr operator int() { return 0; }
@@ -17,6 +17,12 @@
   Y<A> ya; // expected-error {{requires template arguments}}
   X<::A> xcca;
   Y<::A> ycca; // expected-error {{requires template arguments}}
+  X<A*> xap; // expected-error {{requires template arguments}}
+  X<const A> xca; // expected-error {{requires template arguments}}
+  X<A const> xac; // expected-error {{requires template arguments}}
+  // FIXME: This should not parse as a template template argument due to the
+  // trailing attributes.
+  X<A [[]]> xa_attr;
 
   template<template<typename> typename = A> struct XD {};
   template<typename = A> struct YD {}; // expected-error {{requires template arguments}}
@@ -28,6 +34,23 @@
   template<typename T = A> struct G { }; // expected-error {{requires template arguments}}
 }
 
+namespace template_template_arg_pack {
+  template<template<typename> typename...> struct XP {};
+  template<typename...> struct YP {};
+
+  struct Z { template<typename T> struct Q {}; }; // expected-note 2{{here}}
+  
+  template<typename T> using ZId = Z;
+
+  template<typename ...Ts> struct A {
+    XP<ZId<Ts>::Q...> xe;
+    YP<ZId<Ts>::Q...> ye; // expected-error {{requires template arguments}}
+
+    XP<ZId<Ts>::Q> xp; // expected-error {{unexpanded parameter pack}}
+    YP<ZId<Ts>::Q> yp; // expected-error {{requires template arguments}}
+  };
+}
+
 namespace injected_class_name {
   template<typename T> struct A {
     A(T);
@@ -193,3 +216,18 @@
   template<typename T> void g(typename T::A = 0); // expected-note {{refers to class template member}}
   void h() { g<X>(); } // expected-error {{no matching function}}
 }
+
+namespace parenthesized {
+  template<typename T> struct X { X(T); };                    
+  auto n = (X([]{}));
+}
+
+namespace within_template_arg_list {
+  template<typename T> struct X { constexpr X(T v) : v(v) {} T v; };
+  template<int N = X(1).v> struct Y {};
+  using T = Y<>;
+  using T = Y<X(1).v>;
+  using T = Y<within_template_arg_list::X(1).v>;
+
+  template<int ...N> struct Z { Y<X(N)...> y; };
+}
diff --git a/test/Parser/cxx1z-decomposition.cpp b/test/Parser/cxx1z-decomposition.cpp
index 9cbe70e..cf4ba77 100644
--- a/test/Parser/cxx1z-decomposition.cpp
+++ b/test/Parser/cxx1z-decomposition.cpp
@@ -32,13 +32,14 @@
   void f(auto [a, b, c]); // expected-error {{'auto' not allowed in function prototype}} expected-error {{'a'}}
 
   void g() {
-    // A condition is not a simple-declaration.
-    for (; auto [a, b, c] = S(); ) {} // expected-error {{not permitted in this context}}
-    if (auto [a, b, c] = S()) {} // expected-error {{not permitted in this context}}
-    if (int n; auto [a, b, c] = S()) {} // expected-error {{not permitted in this context}}
-    switch (auto [a, b, c] = S()) {} // expected-error {{not permitted in this context}}
-    switch (int n; auto [a, b, c] = S()) {} // expected-error {{not permitted in this context}}
-    while (auto [a, b, c] = S()) {} // expected-error {{not permitted in this context}}
+    // A condition is allowed as a Clang extension.
+    // See commentary in test/Parser/decomposed-condition.cpp
+    for (; auto [a, b, c] = S(); ) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'S' is not contextually convertible to 'bool'}}
+    if (auto [a, b, c] = S()) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'S' is not contextually convertible to 'bool'}}
+    if (int n; auto [a, b, c] = S()) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'S' is not contextually convertible to 'bool'}}
+    switch (auto [a, b, c] = S()) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{statement requires expression of integer type ('S' invalid)}}
+    switch (int n; auto [a, b, c] = S()) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{statement requires expression of integer type ('S' invalid)}}
+    while (auto [a, b, c] = S()) {} // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'S' is not contextually convertible to 'bool'}}
 
     // An exception-declaration is not a simple-declaration.
     try {}
diff --git a/test/Parser/cxx1z-fold-expressions.cpp b/test/Parser/cxx1z-fold-expressions.cpp
index b1f7318..342f115 100644
--- a/test/Parser/cxx1z-fold-expressions.cpp
+++ b/test/Parser/cxx1z-fold-expressions.cpp
@@ -43,3 +43,20 @@
     (int)(undeclared_junk + ...) + // expected-error {{undeclared}}
     (int)(a + ...); // expected-error {{does not contain any unexpanded}}
 }
+
+// fold-operator can be '>' or '>>'.
+template <int... N> constexpr bool greaterThan() { return (N > ...); }
+template <int... N> constexpr int rightShift() { return (N >> ...); }
+
+static_assert(greaterThan<2, 1>());
+static_assert(rightShift<10, 1>() == 5);
+
+template <auto V> constexpr auto Identity = V;
+
+// Support fold operators within templates.
+template <int... N> constexpr int nestedFoldOperator() {
+  return Identity<(Identity<0> >> ... >> N)> +
+    Identity<(N >> ... >> Identity<0>)>;
+}
+
+static_assert(nestedFoldOperator<3, 1>() == 1);
diff --git a/test/Parser/cxx1z-nested-namespace-definition.cpp b/test/Parser/cxx1z-nested-namespace-definition.cpp
index e5e809a..e0c5244 100644
--- a/test/Parser/cxx1z-nested-namespace-definition.cpp
+++ b/test/Parser/cxx1z-nested-namespace-definition.cpp
@@ -1,5 +1,5 @@
 // RUN: cp %s %t
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++98
 // RUN: not %clang_cc1 -x c++ -fixit %t -Werror -DFIXIT
 // RUN: %clang_cc1 -x c++ %t -DFIXIT
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++17 -Wc++14-compat
diff --git a/test/Parser/cxx2a-spaceship.cpp b/test/Parser/cxx2a-spaceship.cpp
new file mode 100644
index 0000000..24cece3
--- /dev/null
+++ b/test/Parser/cxx2a-spaceship.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s
+
+template<int> struct X {};
+
+X<1> operator<<(X<0>, X<0>);
+X<2> operator<=>(X<0>, X<1>);
+X<2> operator<=>(X<1>, X<0>);
+X<3> operator<(X<0>, X<2>);
+X<3> operator<(X<2>, X<0>);
+
+void f(X<0> x0, X<1> x1) {
+  X<2> a = x0 <=> x0 << x0;
+  X<2> b = x0 << x0 <=> x0; // expected-warning {{overloaded operator << has higher precedence than comparison operator}} expected-note 2{{}}
+  X<3> c = x0 < x0 <=> x1;
+  X<3> d = x1 <=> x0 < x0;
+  X<3> e = x0 < x0 <=> x0 << x0;
+  X<3> f = x0 << x0 <=> x0 < x0; // expected-warning {{overloaded operator << has higher precedence than comparison operator}} expected-note 2{{}}
+}
diff --git a/test/Parser/decomposed-condition.cpp b/test/Parser/decomposed-condition.cpp
new file mode 100644
index 0000000..c41c822
--- /dev/null
+++ b/test/Parser/decomposed-condition.cpp
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -std=c++1z %s -verify
+
+struct Na {
+  bool flag;
+  float data;
+};
+
+struct Rst {
+  bool flag;
+  float data;
+  explicit operator bool() const {
+    return flag;
+  }
+};
+
+Rst f();
+Na g();
+
+namespace CondInIf {
+void h() {
+  if (auto [ok, d] = f()) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}}
+    ;
+  if (auto [ok, d] = g()) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'Na' is not contextually convertible to 'bool'}}
+    ;
+}
+} // namespace CondInIf
+
+namespace CondInWhile {
+void h() {
+  while (auto [ok, d] = f()) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}}
+    ;
+  while (auto [ok, d] = g()) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'Na' is not contextually convertible to 'bool'}}
+    ;
+}
+} // namespace CondInWhile
+
+namespace CondInFor {
+void h() {
+  for (; auto [ok, d] = f();) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}}
+    ;
+  for (; auto [ok, d] = g();) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{value of type 'Na' is not contextually convertible to 'bool'}}
+    ;
+}
+} // namespace CondInFor
+
+struct IntegerLike {
+  bool flag;
+  float data;
+  operator int() const {
+    return int(data);
+  }
+};
+
+namespace CondInSwitch {
+void h(IntegerLike x) {
+  switch (auto [ok, d] = x) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}}
+    ;
+  switch (auto [ok, d] = g()) // expected-warning {{ISO C++17 does not permit structured binding declaration in a condition}} expected-error {{statement requires expression of integer type ('Na' invalid)}}
+    ;
+}
+} // namespace CondInSwitch
diff --git a/test/Parser/ms-square-bracket-attributes.mm b/test/Parser/ms-square-bracket-attributes.mm
index a158cf7..c1fc14c 100644
--- a/test/Parser/ms-square-bracket-attributes.mm
+++ b/test/Parser/ms-square-bracket-attributes.mm
@@ -133,7 +133,7 @@
   (void)__uuidof(OuterClass::sic);
 }
 
-// expected-warning@+1 {{'uuid' attribute only applies to classes}}
+// expected-warning@+1 {{'uuid' attribute only applies to structs, unions, classes, and enums}}
 [uuid("000000A0-0000-0000-C000-000000000049")] void f();
 }
 
diff --git a/test/Parser/objc-attr.m b/test/Parser/objc-attr.m
new file mode 100644
index 0000000..7e65eff
--- /dev/null
+++ b/test/Parser/objc-attr.m
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fsyntax-only -fdouble-square-bracket-attributes -triple x86_64-apple-macosx10.10.0 -verify %s
+// expected-no-diagnostics
+
+@interface NSObject
+@end
+
+[[clang::objc_exception]]
+@interface Foo {
+  [[clang::iboutlet]] NSObject *h;
+}
+@property (readonly) [[clang::objc_returns_inner_pointer]] void *i, *j;
+@property (readonly) [[clang::iboutlet]] NSObject *k;
+@end
+
+[[clang::objc_runtime_name("name")]] @protocol Bar;
+
+[[clang::objc_protocol_requires_explicit_implementation]] 
+@protocol Baz
+@end
+
+@interface Quux
+-(void)g1 [[clang::ns_consumes_self]];
+-(void)g2 __attribute__((ns_consumes_self));
+-(void)h1: (int)x [[clang::ns_consumes_self]];
+-(void)h2: (int)x __attribute__((ns_consumes_self));
+-(void) [[clang::ns_consumes_self]] i1;
+-(void) __attribute__((ns_consumes_self)) i2;
+@end
diff --git a/test/Parser/objc-default-ctor-init.mm b/test/Parser/objc-default-ctor-init.mm
index ea4c064..a14a243 100644
--- a/test/Parser/objc-default-ctor-init.mm
+++ b/test/Parser/objc-default-ctor-init.mm
@@ -1,21 +1,21 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -std=c++11 -ast-dump %s | FileCheck %s
-// CHECK: CXXCtorInitializer Field {{.*}} 'ptr' 'void *'
-// CHECK: CXXCtorInitializer Field {{.*}} 'q' 'struct Q'
-
-@interface NSObject
-@end
-
-@interface I : NSObject
-@end
-
-struct Q { Q(); };
-
-struct S {
-  S();
-  void *ptr = nullptr;
-  Q q;
-};
-
-@implementation I
-S::S() {}
-@end
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -std=c++11 -ast-dump %s | FileCheck %s
+// CHECK: CXXCtorInitializer Field {{.*}} 'ptr' 'void *'
+// CHECK: CXXCtorInitializer Field {{.*}} 'q' 'Q'
+
+@interface NSObject
+@end
+
+@interface I : NSObject
+@end
+
+struct Q { Q(); };
+
+struct S {
+  S();
+  void *ptr = nullptr;
+  Q q;
+};
+
+@implementation I
+S::S() {}
+@end
diff --git a/test/Parser/objcxx11-invalid-lambda.cpp b/test/Parser/objcxx11-invalid-lambda.cpp
new file mode 100644
index 0000000..74c5636
--- /dev/null
+++ b/test/Parser/objcxx11-invalid-lambda.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -x objective-c++ -std=c++11 %s
+
+void foo() {  // expected-note {{to match this '{'}}
+  int bar;
+  auto baz = [
+      bar(  // expected-note {{to match this '('}} expected-note {{to match this '('}}
+        foo_undeclared() // expected-error{{use of undeclared identifier 'foo_undeclared'}} expected-error{{use of undeclared identifier 'foo_undeclared'}}
+      /* ) */
+    ] () { };   // expected-error{{expected ')'}}
+}               // expected-error{{expected ')'}} expected-error{{expected ';' at end of declaration}} expected-error{{expected '}'}}
diff --git a/test/Parser/pointer_promotion.c b/test/Parser/pointer_promotion.c
index 30589d0..8b718ad 100644
--- a/test/Parser/pointer_promotion.c
+++ b/test/Parser/pointer_promotion.c
@@ -12,6 +12,6 @@
   if (cp < fp) {} // expected-warning {{comparison of distinct pointer types ('char *' and 'struct foo *')}}
   if (fp < bp) {} // expected-warning {{comparison of distinct pointer types ('struct foo *' and 'struct bar *')}}
   if (ip < 7) {} // expected-warning {{comparison between pointer and integer ('int *' and 'int')}}
-  if (sint < ip) {} // expected-warning {{comparison between pointer and integer ('int' and 'int *')}}
+  if (sint < ip) {} // expected-warning {{comparison between pointer and integer ('short' and 'int *')}}
   if (ip == cp) {} // expected-warning {{comparison of distinct pointer types ('int *' and 'char *')}}
 }
diff --git a/test/Preprocessor/arm-acle-6.4.c b/test/Preprocessor/arm-acle-6.4.c
index 3102bd4..9e4253a 100644
--- a/test/Preprocessor/arm-acle-6.4.c
+++ b/test/Preprocessor/arm-acle-6.4.c
@@ -91,7 +91,7 @@
 
 // RUN: %clang -target arm-none-linux-eabi -march=armv6k -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6K
  
-// CHECK-V6K: __ARM_FEATURE_LDREX 0xF
+// CHECK-V6K: __ARM_FEATURE_LDREX 0xf
 
 // RUN: %clang -target arm-none-linux-eabi -march=armv7-a -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A
 
@@ -101,7 +101,7 @@
 // CHECK-V7A: __ARM_ARCH_PROFILE 'A'
 // CHECK-V7A: __ARM_FEATURE_CLZ 1
 // CHECK-V7A: __ARM_FEATURE_DSP 1
-// CHECK-V7A: __ARM_FEATURE_LDREX 0xF
+// CHECK-V7A: __ARM_FEATURE_LDREX 0xf
 // CHECK-V7A: __ARM_FEATURE_QBIT 1
 // CHECK-V7A: __ARM_FEATURE_SAT 1
 // CHECK-V7A: __ARM_FEATURE_SIMD32 1
@@ -129,7 +129,7 @@
 // CHECK-V7VE: __ARM_FEATURE_CLZ 1
 // CHECK-V7VE: __ARM_FEATURE_DSP 1
 // CHECK-V7VE: __ARM_FEATURE_IDIV 1
-// CHECK-V7VE: __ARM_FEATURE_LDREX 0xF
+// CHECK-V7VE: __ARM_FEATURE_LDREX 0xf
 // CHECK-V7VE: __ARM_FEATURE_QBIT 1
 // CHECK-V7VE: __ARM_FEATURE_SAT 1
 // CHECK-V7VE: __ARM_FEATURE_SIMD32 1
@@ -143,7 +143,7 @@
 // CHECK-V7R: __ARM_ARCH_PROFILE 'R'
 // CHECK-V7R: __ARM_FEATURE_CLZ 1
 // CHECK-V7R: __ARM_FEATURE_DSP 1
-// CHECK-V7R: __ARM_FEATURE_LDREX 0xF
+// CHECK-V7R: __ARM_FEATURE_LDREX 0xf
 // CHECK-V7R: __ARM_FEATURE_QBIT 1
 // CHECK-V7R: __ARM_FEATURE_SAT 1
 // CHECK-V7R: __ARM_FEATURE_SIMD32 1
@@ -188,7 +188,7 @@
 // CHECK-V8A: __ARM_FEATURE_CLZ 1
 // CHECK-V8A: __ARM_FEATURE_DSP 1
 // CHECK-V8A: __ARM_FEATURE_IDIV 1
-// CHECK-V8A: __ARM_FEATURE_LDREX 0xF
+// CHECK-V8A: __ARM_FEATURE_LDREX 0xf
 // CHECK-V8A: __ARM_FEATURE_QBIT 1
 // CHECK-V8A: __ARM_FEATURE_SAT 1
 // CHECK-V8A: __ARM_FEATURE_SIMD32 1
diff --git a/test/Preprocessor/arm-acle-6.5.c b/test/Preprocessor/arm-acle-6.5.c
index 7ad91bd..143ed75 100644
--- a/test/Preprocessor/arm-acle-6.5.c
+++ b/test/Preprocessor/arm-acle-6.5.c
@@ -26,7 +26,7 @@
 // RUN: %clang -target armv7a-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 // RUN: %clang -target armv7ve-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 
-// CHECK-SP-DP: __ARM_FP 0xC
+// CHECK-SP-DP: __ARM_FP 0xc
 
 // RUN: %clang -target arm-eabi -mfpu=vfpv3-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=vfpv3-d16-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
@@ -40,7 +40,7 @@
 // RUN: %clang -target arm-eabi -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target armv8-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 
-// CHECK-SP-DP-HP: __ARM_FP 0xE
+// CHECK-SP-DP-HP: __ARM_FP 0xe
 
 // RUN: %clang -target armv4-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
 // RUN: %clang -target armv5-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
diff --git a/test/Preprocessor/arm-target-features.c b/test/Preprocessor/arm-target-features.c
index b206e1c..82ab964 100644
--- a/test/Preprocessor/arm-target-features.c
+++ b/test/Preprocessor/arm-target-features.c
@@ -5,9 +5,19 @@
 // CHECK-V8A: #define __ARM_FEATURE_CRC32 1
 // CHECK-V8A: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK-V8A: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
-// CHECK-V8A: #define __ARM_FP 0xE
-// CHECK-V8A: #define __ARM_FP16_ARGS 1
-// CHECK-V8A: #define __ARM_FP16_FORMAT_IEEE 1
+// CHECK-V8A-NOT: #define __ARM_FP 0x
+
+// RUN: %clang -target armv8a-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8a-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARMEL__ 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_ARCH 8
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_ARCH_8A__ 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FEATURE_CRC32 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP 0xe
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP16_ARGS 1
+// CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP16_FORMAT_IEEE 1
 
 // RUN: %clang -target armv8r-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8R %s
 // CHECK-V8R: #define __ARMEL__ 1
@@ -16,7 +26,17 @@
 // CHECK-V8R: #define __ARM_FEATURE_CRC32 1
 // CHECK-V8R: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK-V8R: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
-// CHECK-V8R: #define __ARM_FP 0xE
+// CHECK-V8R-NOT: #define __ARM_FP 0x
+
+// RUN: %clang -target armv8r-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8R-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8r-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8R-ALLOW-FP-INSTR %s
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARMEL__ 1
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_ARCH 8
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_ARCH_8R__ 1
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_FEATURE_CRC32 1
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
+// CHECK-V8R-ALLOW-FP-INSTR: #define __ARM_FP 0xe
 
 // RUN: %clang -target armv7a-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7 %s
 // CHECK-V7: #define __ARMEL__ 1
@@ -25,14 +45,32 @@
 // CHECK-V7-NOT: __ARM_FEATURE_CRC32
 // CHECK-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
 // CHECK-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
-// CHECK-V7: #define __ARM_FP 0xC
+// CHECK-V7-NOT: #define __ARM_FP 0x
+
+// RUN: %clang -target armv7a-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7a-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7-ALLOW-FP-INSTR %s
+// CHECK-V7-ALLOW-FP-INSTR: #define __ARMEL__ 1
+// CHECK-V7-ALLOW-FP-INSTR: #define __ARM_ARCH 7
+// CHECK-V7-ALLOW-FP-INSTR: #define __ARM_ARCH_7A__ 1
+// CHECK-V7-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_CRC32
+// CHECK-V7-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
+// CHECK-V7-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-V7-ALLOW-FP-INSTR: #define __ARM_FP 0xc
 
 // RUN: %clang -target armv7ve-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7VE %s
 // CHECK-V7VE: #define __ARMEL__ 1
 // CHECK-V7VE: #define __ARM_ARCH 7
 // CHECK-V7VE: #define __ARM_ARCH_7VE__ 1
 // CHECK-V7VE: #define __ARM_ARCH_EXT_IDIV__ 1
-// CHECK-V7VE: #define __ARM_FP 0xC
+// CHECK-V7VE-NOT: #define __ARM_FP 0x
+
+// RUN: %clang -target armv7ve-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7VE-DEFAULT-ABI-SOFT %s
+// RUN: %clang -target armv7ve-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7VE-DEFAULT-ABI-SOFT %s
+// CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARMEL__ 1
+// CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH 7
+// CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_7VE__ 1
+// CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_EXT_IDIV__ 1
+// CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_FP 0xc
 
 // RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7S %s
 // CHECK-V7S: #define __ARMEL__ 1
@@ -41,7 +79,7 @@
 // CHECK-V7S-NOT: __ARM_FEATURE_CRC32
 // CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
 // CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
-// CHECK-V7S: #define __ARM_FP 0xE
+// CHECK-V7S: #define __ARM_FP 0xe
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF %s
 // CHECK-V8-BAREHF: #define __ARMEL__ 1
@@ -50,19 +88,19 @@
 // CHECK-V8-BAREHF: #define __ARM_FEATURE_CRC32 1
 // CHECK-V8-BAREHF: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK-V8-BAREHF: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
-// CHECK-V8-BAREHP: #define __ARM_FP 0xE
+// CHECK-V8-BAREHP: #define __ARM_FP 0xe
 // CHECK-V8-BAREHF: #define __ARM_NEON__ 1
 // CHECK-V8-BAREHF: #define __ARM_PCS_VFP 1
 // CHECK-V8-BAREHF: #define __VFP_FP__ 1
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-FP %s
 // CHECK-V8-BAREHF-FP-NOT: __ARM_NEON__ 1
-// CHECK-V8-BAREHP-FP: #define __ARM_FP 0xE
+// CHECK-V8-BAREHP-FP: #define __ARM_FP 0xe
 // CHECK-V8-BAREHF-FP: #define __VFP_FP__ 1
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=neon-fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-NEON-FP %s
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-NEON-FP %s
-// CHECK-V8-BAREHP-NEON-FP: #define __ARM_FP 0xE
+// CHECK-V8-BAREHP-NEON-FP: #define __ARM_FP 0xe
 // CHECK-V8-BAREHF-NEON-FP: #define __ARM_NEON__ 1
 // CHECK-V8-BAREHF-NEON-FP: #define __VFP_FP__ 1
 
@@ -85,10 +123,15 @@
 
 // RUN: %clang -target armv8a -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s
 // RUN: %clang -target armv8a -mthumb -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s
-// RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s
-// RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s
 // V8A:#define __ARM_ARCH_EXT_IDIV__ 1
-// V8A:#define __ARM_FP 0xE
+// V8A-NOT:#define __ARM_FP 0x
+
+// RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8a-eabi -mthumb -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8a-eabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8a-eabihf -mthumb -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A-ALLOW-FP-INSTR %s
+// V8A-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// V8A-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 // RUN: %clang -target armv8m.base-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_BASELINE %s
 // V8M_BASELINE: #define __ARM_ARCH 8
@@ -111,9 +154,22 @@
 // V8M_MAINLINE: #define __ARM_ARCH_PROFILE 'M'
 // V8M_MAINLINE-NOT: __ARM_FEATURE_CRC32
 // V8M_MAINLINE-NOT: __ARM_FEATURE_DSP
-// V8M_MAINLINE: #define __ARM_FP 0xE
+// V8M_MAINLINE-NOT: #define __ARM_FP 0x
 // V8M_MAINLINE: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
 
+// RUN: %clang -target armv8m.main-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M-MAINLINE-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8m.main-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M-MAINLINE-ALLOW-FP-INSTR %s
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_ARCH 8
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_ARCH_8M_MAIN__ 1
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_ARCH_EXT_IDIV__ 1
+// V8M-MAINLINE-ALLOW-FP-INSTR-NOT: __ARM_ARCH_ISA_ARM
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_ARCH_ISA_THUMB 2
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_ARCH_PROFILE 'M'
+// V8M-MAINLINE-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_CRC32
+// V8M-MAINLINE-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_DSP
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __ARM_FP 0xe
+// V8M-MAINLINE-ALLOW-FP-INSTR: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
 // RUN: %clang -target arm-none-linux-gnu -march=armv8-m.main+dsp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_MAINLINE_DSP %s
 // V8M_MAINLINE_DSP: #define __ARM_ARCH 8
 // V8M_MAINLINE_DSP: #define __ARM_ARCH_8M_MAIN__ 1
@@ -123,9 +179,21 @@
 // V8M_MAINLINE_DSP: #define __ARM_ARCH_PROFILE 'M'
 // V8M_MAINLINE_DSP-NOT: __ARM_FEATURE_CRC32
 // V8M_MAINLINE_DSP: #define __ARM_FEATURE_DSP 1
-// V8M_MAINLINE_DSP: #define __ARM_FP 0xE
+// V8M_MAINLINE_DSP-NOT: #define __ARM_FP 0x
 // V8M_MAINLINE_DSP: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
 
+// RUN: %clang -target arm-none-linux-gnueabi -march=armv8-m.main+dsp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M-MAINLINE-DSP-ALLOW-FP-INSTR %s
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_ARCH 8
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_ARCH_8M_MAIN__ 1
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_ARCH_EXT_IDIV__ 1
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR-NOT: __ARM_ARCH_ISA_ARM
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_ARCH_ISA_THUMB 2
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_ARCH_PROFILE 'M'
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_CRC32
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_FEATURE_DSP 1
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __ARM_FP 0xe
+// V8M-MAINLINE-DSP-ALLOW-FP-INSTR: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
 // RUN: %clang -target arm-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DEFS %s
 // CHECK-DEFS:#define __ARM_PCS 1
 // CHECK-DEFS:#define __ARM_SIZEOF_MINIMAL_ENUM 4
@@ -161,7 +229,9 @@
 // Check that -mfpu works properly for Cortex-A7 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s
-// DEFAULTFPU-A7:#define __ARM_FP 0xE
+// RUN: %clang -target armv7-none-linux-gnueabihf -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s
+// RUN: %clang -target armv7-none-linux-gnueabihf -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s
+// DEFAULTFPU-A7:#define __ARM_FP 0xe
 // DEFAULTFPU-A7:#define __ARM_NEON__ 1
 // DEFAULTFPU-A7:#define __ARM_VFPV4__ 1
 
@@ -173,14 +243,14 @@
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A7 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A7 %s
-// NONEON-A7:#define __ARM_FP 0xE
+// NONEON-A7:#define __ARM_FP 0xe
 // NONEON-A7-NOT:#define __ARM_NEON__ 1
 // NONEON-A7:#define __ARM_VFPV4__ 1
 
 // Check that -mfpu works properly for Cortex-A5 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A5 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A5 %s
-// DEFAULTFPU-A5:#define __ARM_FP 0xE
+// DEFAULTFPU-A5:#define __ARM_FP 0xe
 // DEFAULTFPU-A5:#define __ARM_NEON__ 1
 // DEFAULTFPU-A5:#define __ARM_VFPV4__ 1
 
@@ -192,7 +262,7 @@
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A5 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A5 %s
-// NONEON-A5:#define __ARM_FP 0xE
+// NONEON-A5:#define __ARM_FP 0xe
 // NONEON-A5-NOT:#define __ARM_NEON__ 1
 // NONEON-A5:#define __ARM_VFPV4__ 1
 
@@ -207,26 +277,49 @@
 // A5T-NOT:#define __ARM_FEATURE_DSP
 // A5T-NOT:#define __ARM_FP 0x{{.*}}
 
-// Test whether predefines are as expected when targeting cortex-a5.
+// Test whether predefines are as expected when targeting cortex-a5i (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A5 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A5 %s
 // A5:#define __ARM_ARCH 7
 // A5:#define __ARM_ARCH_7A__ 1
 // A5-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A5:#define __ARM_ARCH_PROFILE 'A'
+// A5-NOT:#define __ARM_DWARF_EH__ 1
 // A5-NOT: #define __ARM_FEATURE_DIRECTED_ROUNDING
 // A5:#define __ARM_FEATURE_DSP 1
 // A5-NOT: #define __ARM_FEATURE_NUMERIC_MAXMIN
-// A5:#define __ARM_FP 0xE
+// A5-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-a7.
+// Test whether predefines are as expected when targeting cortex-a5 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A5-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A5-ALLOW-FP-INSTR %s
+// A5-ALLOW-FP-INSTR:#define __ARM_ARCH 7
+// A5-ALLOW-FP-INSTR:#define __ARM_ARCH_7A__ 1
+// A5-ALLOW-FP-INSTR-NOT:#define __ARM_ARCH_EXT_IDIV__
+// A5-ALLOW-FP-INSTR:#define __ARM_ARCH_PROFILE 'A'
+// A5-ALLOW-FP-INSTR-NOT: #define __ARM_FEATURE_DIRECTED_ROUNDING
+// A5-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A5-ALLOW-FP-INSTR-NOT: #define __ARM_FEATURE_NUMERIC_MAXMIN
+// A5-ALLOW-FP-INSTR:#define __ARM_FP 0xe
+
+// Test whether predefines are as expected when targeting cortex-a7 (soft FP ABI as default).
 // RUN: %clang -target armv7k -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A7 %s
 // RUN: %clang -target armv7k -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A7 %s
 // A7:#define __ARM_ARCH 7
 // A7:#define __ARM_ARCH_EXT_IDIV__ 1
 // A7:#define __ARM_ARCH_PROFILE 'A'
+// A7-NOT:#define __ARM_DWARF_EH__ 1
 // A7:#define __ARM_FEATURE_DSP 1
-// A7:#define __ARM_FP 0xE
+// A7-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting cortex-a7 (softfp FP ABI as default).
+// RUN: %clang -target armv7k-eabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A7-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7k-eabi -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A7-ALLOW-FP-INSTR %s
+// A7-ALLOW-FP-INSTR:#define __ARM_ARCH 7
+// A7-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// A7-ALLOW-FP-INSTR:#define __ARM_ARCH_PROFILE 'A'
+// A7-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A7-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 // Test whether predefines are as expected when targeting cortex-a7.
 // RUN: %clang -target x86_64-apple-darwin -arch armv7k -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV7K %s
@@ -235,29 +328,43 @@
 // ARMV7K:#define __ARM_ARCH_PROFILE 'A'
 // ARMV7K:#define __ARM_DWARF_EH__ 1
 // ARMV7K:#define __ARM_FEATURE_DSP 1
-// ARMV7K:#define __ARM_FP 0xE
+// ARMV7K:#define __ARM_FP 0xe
 // ARMV7K:#define __ARM_PCS_VFP 1
 
 
-// Test whether predefines are as expected when targeting cortex-a8.
+// Test whether predefines are as expected when targeting cortex-a8 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A8 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A8 %s
 // A8-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A8:#define __ARM_FEATURE_DSP 1
-// A8:#define __ARM_FP 0xC
+// A8-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-a9.
+// Test whether predefines are as expected when targeting cortex-a8 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A8-ALLOW-FP-INSTR %s
+// A8-ALLOW-FP-INSTR-NOT:#define __ARM_ARCH_EXT_IDIV__
+// A8-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A8-ALLOW-FP-INSTR:#define __ARM_FP 0xc
+
+// Test whether predefines are as expected when targeting cortex-a9 (soft FP as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9 %s
 // A9-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A9:#define __ARM_FEATURE_DSP 1
-// A9:#define __ARM_FP 0xE
+// A9-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting cortex-a9 (softfp FP as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9-ALLOW-FP-INSTR %s
+// A9-ALLOW-FP-INSTR-NOT:#define __ARM_ARCH_EXT_IDIV__
+// A9-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A9-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 
 // Check that -mfpu works properly for Cortex-A12 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A12 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A12 %s
-// DEFAULTFPU-A12:#define __ARM_FP 0xE
+// DEFAULTFPU-A12:#define __ARM_FP 0xe
 // DEFAULTFPU-A12:#define __ARM_NEON__ 1
 // DEFAULTFPU-A12:#define __ARM_VFPV4__ 1
 
@@ -267,7 +374,7 @@
 // FPUNONE-A12-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A12-NOT:#define __ARM_VFPV4__ 1
 
-// Test whether predefines are as expected when targeting cortex-a12.
+// Test whether predefines are as expected when targeting cortex-a12 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A12 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A12 %s
 // A12:#define __ARM_ARCH 7
@@ -275,19 +382,36 @@
 // A12:#define __ARM_ARCH_EXT_IDIV__ 1
 // A12:#define __ARM_ARCH_PROFILE 'A'
 // A12:#define __ARM_FEATURE_DSP 1
-// A12:#define __ARM_FP 0xE
+// A12-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-a15.
+// Test whether predefines are as expected when targeting cortex-a12 (soft FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A12-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A12-ALLOW-FP-INSTR %s
+// A12-ALLOW-FP-INSTR:#define __ARM_ARCH 7
+// A12-ALLOW-FP-INSTR:#define __ARM_ARCH_7A__ 1
+// A12-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// A12-ALLOW-FP-INSTR:#define __ARM_ARCH_PROFILE 'A'
+// A12-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A12-ALLOW-FP-INSTR:#define __ARM_FP 0xe
+
+// Test whether predefines are as expected when targeting cortex-a15 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15 %s
 // A15:#define __ARM_ARCH_EXT_IDIV__ 1
 // A15:#define __ARM_FEATURE_DSP 1
-// A15:#define __ARM_FP 0xE
+// A15-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting cortex-a15 (softfp ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15-ALLOW-FP-INSTR %s
+// A15-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// A15-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A15-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 // Check that -mfpu works properly for Cortex-A17 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A17 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A17 %s
-// DEFAULTFPU-A17:#define __ARM_FP 0xE
+// DEFAULTFPU-A17:#define __ARM_FP 0xe
 // DEFAULTFPU-A17:#define __ARM_NEON__ 1
 // DEFAULTFPU-A17:#define __ARM_VFPV4__ 1
 
@@ -297,7 +421,7 @@
 // FPUNONE-A17-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A17-NOT:#define __ARM_VFPV4__ 1
 
-// Test whether predefines are as expected when targeting cortex-a17.
+// Test whether predefines are as expected when targeting cortex-a17 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A17 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A17 %s
 // A17:#define __ARM_ARCH 7
@@ -305,16 +429,33 @@
 // A17:#define __ARM_ARCH_EXT_IDIV__ 1
 // A17:#define __ARM_ARCH_PROFILE 'A'
 // A17:#define __ARM_FEATURE_DSP 1
-// A17:#define __ARM_FP 0xE
+// A17-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting swift.
+// Test whether predefines are as expected when targeting cortex-a17 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A17-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A17-ALLOW-FP-INSTR %s
+// A17-ALLOW-FP-INSTR:#define __ARM_ARCH 7
+// A17-ALLOW-FP-INSTR:#define __ARM_ARCH_7A__ 1
+// A17-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// A17-ALLOW-FP-INSTR:#define __ARM_ARCH_PROFILE 'A'
+// A17-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// A17-ALLOW-FP-INSTR:#define __ARM_FP 0xe
+
+// Test whether predefines are as expected when targeting swift (soft FP ABI as default).
 // RUN: %clang -target armv7s -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT %s
 // RUN: %clang -target armv7s -mthumb -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT %s
 // SWIFT:#define __ARM_ARCH_EXT_IDIV__ 1
 // SWIFT:#define __ARM_FEATURE_DSP 1
-// SWIFT:#define __ARM_FP 0xE
+// SWIFT-NOT:#define __ARM_FP 0xxE
 
-// Test whether predefines are as expected when targeting ARMv8-A Cortex implementations
+// Test whether predefines are as expected when targeting swift (softfp FP ABI as default).
+// RUN: %clang -target armv7s-eabi -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7s-eabi -mthumb -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT-ALLOW-FP-INSTR %s
+// SWIFT-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// SWIFT-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// SWIFT-ALLOW-FP-INSTR:#define __ARM_FP 0xe
+
+// Test whether predefines are as expected when targeting ARMv8-A Cortex implementations (soft FP ABI as default)
 // RUN: %clang -target armv8 -mcpu=cortex-a32 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // RUN: %clang -target armv8 -mthumb -mcpu=cortex-a32 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // RUN: %clang -target armv8 -mcpu=cortex-a35 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
@@ -329,7 +470,24 @@
 // RUN: %clang -target armv8 -mthumb -mcpu=cortex-a73 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // ARMV8:#define __ARM_ARCH_EXT_IDIV__ 1
 // ARMV8:#define __ARM_FEATURE_DSP 1
-// ARMV8:#define __ARM_FP 0xE
+// ARMV8-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting ARMv8-A Cortex implementations (softfp FP ABI as default)
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a32 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a32 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a35 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a35 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a53 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a53 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a57 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a57 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a72 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a72 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=cortex-a73 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=cortex-a73 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// ARMV8-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// ARMV8-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// ARMV8-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 // Test whether predefines are as expected when targeting cortex-r4.
 // RUN: %clang -target armv7 -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4-ARM %s
@@ -342,32 +500,59 @@
 // R4-THUMB:#define __ARM_FEATURE_DSP 1
 // R4-THUMB-NOT:#define __ARM_FP 0x{{.*}}
 
-// Test whether predefines are as expected when targeting cortex-r4f.
+// Test whether predefines are as expected when targeting cortex-r4f (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-ARM %s
 // R4F-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // R4F-ARM:#define __ARM_FEATURE_DSP 1
-// R4F-ARM:#define __ARM_FP 0xC
+// R4F-ARM-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting cortex-r4f (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-ARM-ALLOW-FP-INSTR %s
+// R4F-ARM-ALLOW-FP-INSTR-NOT:#define __ARM_ARCH_EXT_IDIV__
+// R4F-ARM-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// R4F-ARM-ALLOW-FP-INSTR:#define __ARM_FP 0xc
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-THUMB %s
 // R4F-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // R4F-THUMB:#define __ARM_FEATURE_DSP 1
-// R4F-THUMB:#define __ARM_FP 0xC
+// R4F-THUMB-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-r5.
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-THUMB-ALLOW-FP-INSTR %s
+// R4F-THUMB-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// R4F-THUMB-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// R4F-THUMB-ALLOW-FP-INSTR:#define __ARM_FP 0xc
+
+// Test whether predefines are as expected when targeting cortex-r5 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5 %s
 // R5:#define __ARM_ARCH_EXT_IDIV__ 1
 // R5:#define __ARM_FEATURE_DSP 1
-// R5:#define __ARM_FP 0xC
+// R5-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-r7 and cortex-r8.
+// Test whether predefines are as expected when targeting cortex-r5 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5-ALLOW-FP-INSTR %s
+// R5-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// R5-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// R5-ALLOW-FP-INSTR:#define __ARM_FP 0xc
+
+// Test whether predefines are as expected when targeting cortex-r7 and cortex-r8 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s
 // RUN: %clang -target armv7 -mcpu=cortex-r8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s
 // R7-R8:#define __ARM_ARCH_EXT_IDIV__ 1
 // R7-R8:#define __ARM_FEATURE_DSP 1
-// R7-R8:#define __ARM_FP 0xE
+// R7-R8-NOT:#define __ARM_FP 0x
+
+// Test whether predefines are as expected when targeting cortex-r7 and cortex-r8 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mcpu=cortex-r8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-r8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8-ALLOW-FP-INSTR %s
+// R7-R8-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// R7-R8-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// R7-R8-ALLOW-FP-INSTR:#define __ARM_FP 0xe
 
 // Test whether predefines are as expected when targeting cortex-m0.
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m0 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M0-THUMB %s
@@ -385,18 +570,31 @@
 // M3-THUMB-NOT:#define __ARM_FEATURE_DSP
 // M3-THUMB-NOT:#define __ARM_FP 0x{{.*}}
 
-// Test whether predefines are as expected when targeting cortex-m4.
+// Test whether predefines are as expected when targeting cortex-m4 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M4-THUMB %s
 // M4-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // M4-THUMB:#define __ARM_FEATURE_DSP 1
-// M4-THUMB:#define __ARM_FP 0x6
+// M4-THUMB-NOT:#define __ARM_FP 0x
 
-// Test whether predefines are as expected when targeting cortex-m7.
+// Test whether predefines are as expected when targeting cortex-m4 (softfp ABI as default).
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M4-THUMB-ALLOW-FP-INSTR %s
+// M4-THUMB-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// M4-THUMB-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// M4-THUMB-ALLOW-FP-INSTR:#define __ARM_FP 0x6
+
+// Test whether predefines are as expected when targeting cortex-m7 (soft FP ABI as default).
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M7-THUMB %s
 // M7-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // M7-THUMB:#define __ARM_FEATURE_DSP 1
-// M7-THUMB:#define __ARM_FP 0xE
-// M7-THUMB:#define __ARM_FPV5__ 1
+// M7-THUMB-NOT:#define __ARM_FP 0x
+// M7-THUMB-NOT:#define __ARM_FPV5__
+
+// Test whether predefines are as expected when targeting cortex-m7 (softfp FP ABI as default).
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=cortex-m7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M7-THUMB-ALLOW-FP-INSTR %s
+// M7-THUMB-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// M7-THUMB-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// M7-THUMB-ALLOW-FP-INSTR:#define __ARM_FP 0xe
+// M7-THUMB-ALLOW-FP-INSTR:#define __ARM_FPV5__ 1
 
 // Test whether predefines are as expected when targeting v8m cores
 // RUN: %clang -target arm -mcpu=cortex-m23 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M23 %s
@@ -411,6 +609,7 @@
 // M23-NOT: __ARM_FP 0x{{.*}}
 // M23-NOT: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
 
+// Test whether predefines are as expected when targeting m33 (soft FP ABI as default).
 // RUN: %clang -target arm -mcpu=cortex-m33 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M33 %s
 // M33: #define __ARM_ARCH 8
 // M33: #define __ARM_ARCH_8M_MAIN__ 1
@@ -420,26 +619,46 @@
 // M33: #define __ARM_ARCH_PROFILE 'M'
 // M33-NOT: __ARM_FEATURE_CRC32
 // M33: #define __ARM_FEATURE_DSP 1
-// M33: #define __ARM_FP 0x6
+// M33-NOT: #define __ARM_FP 0x
 // M33: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
 
-// Test whether predefines are as expected when targeting krait.
+// Test whether predefines are as expected when targeting m33 (softfp FP ABI as default).
+// RUN: %clang -target arm-eabi -mcpu=cortex-m33 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M33-ALLOW-FP-INSTR %s
+// M33-ALLOW-FP-INSTR: #define __ARM_ARCH 8
+// M33-ALLOW-FP-INSTR: #define __ARM_ARCH_8M_MAIN__ 1
+// M33-ALLOW-FP-INSTR: #define __ARM_ARCH_EXT_IDIV__ 1
+// M33-ALLOW-FP-INSTR-NOT: __ARM_ARCH_ISA_ARM
+// M33-ALLOW-FP-INSTR: #define __ARM_ARCH_ISA_THUMB 2
+// M33-ALLOW-FP-INSTR: #define __ARM_ARCH_PROFILE 'M'
+// M33-ALLOW-FP-INSTR-NOT: __ARM_FEATURE_CRC32
+// M33-ALLOW-FP-INSTR: #define __ARM_FEATURE_DSP 1
+// M33-ALLOW-FP-INSTR: #define __ARM_FP 0x6
+// M33-ALLOW-FP-INSTR: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
+// Test whether predefines are as expected when targeting krait (soft FP as default).
 // RUN: %clang -target armv7 -mcpu=krait -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=KRAIT %s
 // RUN: %clang -target armv7 -mthumb -mcpu=krait -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=KRAIT %s
 // KRAIT:#define __ARM_ARCH_EXT_IDIV__ 1
 // KRAIT:#define __ARM_FEATURE_DSP 1
-// KRAIT:#define  __ARM_VFPV4__ 1
+// KRAIT-NOT:#define  __ARM_VFPV4__
+
+// Test whether predefines are as expected when targeting krait (softfp FP as default).
+// RUN: %clang -target armv7-eabi -mcpu=krait -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=KRAIT-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv7-eabi -mthumb -mcpu=krait -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=KRAIT-ALLOW-FP-INSTR %s
+// KRAIT-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
+// KRAIT-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
+// KRAIT-ALLOW-FP-INSTR:#define  __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv8.1a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81A %s
 // CHECK-V81A: #define __ARM_ARCH 8
 // CHECK-V81A: #define __ARM_ARCH_8_1A__ 1
 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A'
 // CHECK-V81A: #define __ARM_FEATURE_QRDMX 1
-// CHECK-V81A: #define __ARM_FP 0xE
+// CHECK-V81A: #define __ARM_FP 0xe
 
 // RUN: %clang -target armv8.2a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V82A %s
 // CHECK-V82A: #define __ARM_ARCH 8
 // CHECK-V82A: #define __ARM_ARCH_8_2A__ 1
 // CHECK-V82A: #define __ARM_ARCH_PROFILE 'A'
 // CHECK-V82A: #define __ARM_FEATURE_QRDMX 1
-// CHECK-V82A: #define __ARM_FP 0xE
+// CHECK-V82A: #define __ARM_FP 0xe
diff --git a/test/Preprocessor/c17.c b/test/Preprocessor/c17.c
new file mode 100644
index 0000000..c610e84
--- /dev/null
+++ b/test/Preprocessor/c17.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c17 %s
+// expected-no-diagnostics
+
+_Static_assert(__STDC_VERSION__ == 201710L, "Incorrect __STDC_VERSION__");
diff --git a/test/Preprocessor/has_c_attribute.c b/test/Preprocessor/has_c_attribute.c
new file mode 100644
index 0000000..dc22da7
--- /dev/null
+++ b/test/Preprocessor/has_c_attribute.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -fdouble-square-bracket-attributes -std=c11 -E %s -o - | FileCheck %s
+
+// CHECK: has_fallthrough
+#if __has_c_attribute(fallthrough)
+  int has_fallthrough();
+#endif
+
+// CHECK: does_not_have_selectany
+#if !__has_c_attribute(selectany)
+  int does_not_have_selectany();
+#endif
+
diff --git a/test/Preprocessor/hexagon-predefines.c b/test/Preprocessor/hexagon-predefines.c
index 857ef8f..fe87262 100644
--- a/test/Preprocessor/hexagon-predefines.c
+++ b/test/Preprocessor/hexagon-predefines.c
@@ -19,6 +19,20 @@
 // CHECK-V60-NOT: #define __HVX__ 1
 // CHECK-V60: #define __hexagon__ 1
 
+// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv62 %s | FileCheck %s -check-prefix CHECK-V62
+// CHECK-V62: #define __HEXAGON_ARCH__ 62
+// CHECK-V62: #define __HEXAGON_V62__ 1
+// CHECK-V62-NOT: #define __HVX_LENGTH__
+// CHECK-V62-NOT: #define __HVX__ 1
+// CHECK-V62: #define __hexagon__ 1
+
+// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv65 %s | FileCheck %s -check-prefix CHECK-V65
+// CHECK-V65: #define __HEXAGON_ARCH__ 65
+// CHECK-V65: #define __HEXAGON_V65__ 1
+// CHECK-V65-NOT: #define __HVX_LENGTH__
+// CHECK-V65-NOT: #define __HVX__ 1
+// CHECK-V65: #define __hexagon__ 1
+
 // The HVX flags are explicitly defined by the driver.
 // RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv60 \
 // RUN: -target-feature +hvxv60 -target-feature +hvx-length64b %s | FileCheck \
diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c
index bb5c001..2ed74dc 100644
--- a/test/Preprocessor/init.c
+++ b/test/Preprocessor/init.c
@@ -1243,6 +1243,7 @@
 // AARCH64-FREEBSD:#define __WCHAR_TYPE__ unsigned int
 // AARCH64-FREEBSD:#define __WCHAR_UNSIGNED__ 1
 // AARCH64-FREEBSD:#define __WCHAR_WIDTH__ 32
+// AARCH64-FREEBSD:#define __WINT_MAX__ 2147483647
 // AARCH64-FREEBSD:#define __WINT_TYPE__ int
 // AARCH64-FREEBSD:#define __WINT_WIDTH__ 32
 // AARCH64-FREEBSD:#define __aarch64__ 1
@@ -1438,6 +1439,12 @@
 // AARCH64-DARWIN: #define __WINT_WIDTH__ 32
 // AARCH64-DARWIN: #define __aarch64__ 1
 
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=thumbv7-windows-msvc < /dev/null | FileCheck -match-full-lines -check-prefix ARM-MSVC %s
+//
+// ARM-MSVC: #define _M_ARM_NT 1
+// ARM-MSVC: #define _WIN32 1
+// ARM-MSVC-NOT:#define __ARM_DWARF_EH__ 1
+
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=aarch64-windows-msvc < /dev/null | FileCheck -match-full-lines -check-prefix AARCH64-MSVC %s
 //
 // AARCH64-MSVC: #define _INTEGRAL_MAX_BITS 64
@@ -1791,6 +1798,11 @@
 // ARM:#define __arm 1
 // ARM:#define __arm__ 1
 
+// RUN: %clang_cc1 -dM -ffreestanding -triple arm-none-none -target-abi apcs-gnu -E /dev/null -o - | FileCheck -match-full-lines -check-prefix ARM-APCS-GNU %s
+// ARM-APCS-GNU: #define __INTPTR_TYPE__ int
+// ARM-APCS-GNU: #define __PTRDIFF_TYPE__ int
+// ARM-APCS-GNU: #define __SIZE_TYPE__ unsigned int
+
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=armeb-none-none < /dev/null | FileCheck -match-full-lines -check-prefix ARM-BE %s
 //
 // ARM-BE-NOT:#define _LP64
@@ -2446,10 +2458,10 @@
 // ARM-NETBSD:#define __INTMAX_MAX__ 9223372036854775807LL
 // ARM-NETBSD:#define __INTMAX_TYPE__ long long int
 // ARM-NETBSD:#define __INTMAX_WIDTH__ 64
-// ARM-NETBSD:#define __INTPTR_FMTd__ "d"
-// ARM-NETBSD:#define __INTPTR_FMTi__ "i"
-// ARM-NETBSD:#define __INTPTR_MAX__ 2147483647
-// ARM-NETBSD:#define __INTPTR_TYPE__ int
+// ARM-NETBSD:#define __INTPTR_FMTd__ "ld"
+// ARM-NETBSD:#define __INTPTR_FMTi__ "li"
+// ARM-NETBSD:#define __INTPTR_MAX__ 2147483647L
+// ARM-NETBSD:#define __INTPTR_TYPE__ long int
 // ARM-NETBSD:#define __INTPTR_WIDTH__ 32
 // ARM-NETBSD:#define __INT_FAST16_FMTd__ "hd"
 // ARM-NETBSD:#define __INT_FAST16_FMTi__ "hi"
@@ -2541,8 +2553,8 @@
 // ARM-NETBSD:#define __UINTMAX_MAX__ 18446744073709551615ULL
 // ARM-NETBSD:#define __UINTMAX_TYPE__ long long unsigned int
 // ARM-NETBSD:#define __UINTMAX_WIDTH__ 64
-// ARM-NETBSD:#define __UINTPTR_MAX__ 4294967295U
-// ARM-NETBSD:#define __UINTPTR_TYPE__ unsigned int
+// ARM-NETBSD:#define __UINTPTR_MAX__ 4294967295UL
+// ARM-NETBSD:#define __UINTPTR_TYPE__ long unsigned int
 // ARM-NETBSD:#define __UINTPTR_WIDTH__ 32
 // ARM-NETBSD:#define __UINT_FAST16_MAX__ 65535
 // ARM-NETBSD:#define __UINT_FAST16_TYPE__ unsigned short
@@ -2640,6 +2652,10 @@
 // Thumbebv7: #define __THUMB_INTERWORK__ 1
 // Thumbebv7: #define __thumb2__ 1
 
+// RUN: %clang -E -dM -ffreestanding -target thumbv7-pc-mingw32 %s -o - | FileCheck -match-full-lines -check-prefix THUMB-MINGW %s
+
+// THUMB-MINGW:#define __ARM_DWARF_EH__ 1
+
 //
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-none-none < /dev/null | FileCheck -match-full-lines -check-prefix I386 %s
 //
@@ -8425,6 +8441,7 @@
 // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647
 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int
 // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32
+// X86_64-CLOUDABI:#define __WINT_MAX__ 2147483647
 // X86_64-CLOUDABI:#define __WINT_TYPE__ int
 // X86_64-CLOUDABI:#define __WINT_WIDTH__ 32
 // X86_64-CLOUDABI:#define __amd64 1
@@ -9349,6 +9366,7 @@
 // WEBASSEMBLY32-NEXT:#define __WCHAR_TYPE__ int
 // WEBASSEMBLY32-NOT:#define __WCHAR_UNSIGNED__
 // WEBASSEMBLY32-NEXT:#define __WCHAR_WIDTH__ 32
+// WEBASSEMBLY32-NEXT:#define __WINT_MAX__ 2147483647
 // WEBASSEMBLY32-NEXT:#define __WINT_TYPE__ int
 // WEBASSEMBLY32-NOT:#define __WINT_UNSIGNED__
 // WEBASSEMBLY32-NEXT:#define __WINT_WIDTH__ 32
@@ -9680,6 +9698,7 @@
 // WEBASSEMBLY64-NEXT:#define __WCHAR_TYPE__ int
 // WEBASSEMBLY64-NOT:#define __WCHAR_UNSIGNED__
 // WEBASSEMBLY64-NEXT:#define __WCHAR_WIDTH__ 32
+// WEBASSEMBLY64-NEXT:#define __WINT_MAX__ 2147483647
 // WEBASSEMBLY64-NEXT:#define __WINT_TYPE__ int
 // WEBASSEMBLY64-NOT:#define __WINT_UNSIGNED__
 // WEBASSEMBLY64-NEXT:#define __WINT_WIDTH__ 32
@@ -9881,11 +9900,11 @@
 
 
 // RUN: %clang_cc1 -E -dM -ffreestanding \
-// RUN:    -triple i686-windows-msvc -fms-compatibility < /dev/null \
+// RUN:    -triple i686-windows-msvc -fms-compatibility -x c++ < /dev/null \
 // RUN:  | FileCheck -match-full-lines -check-prefix MSVC-X32 %s
 
 // RUN: %clang_cc1 -E -dM -ffreestanding \
-// RUN:    -triple x86_64-windows-msvc -fms-compatibility < /dev/null \
+// RUN:    -triple x86_64-windows-msvc -fms-compatibility -x c++ < /dev/null \
 // RUN:  | FileCheck -match-full-lines -check-prefix MSVC-X64 %s
 
 // MSVC-X32:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2
@@ -9899,6 +9918,7 @@
 // MSVC-X32-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2
 // MSVC-X32-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2
 // MSVC-X32-NOT:#define __GCC_ATOMIC{{.*}}
+// MSVC-X32:#define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U
 
 // MSVC-X64:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2
 // MSVC-X64-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2
@@ -9910,7 +9930,8 @@
 // MSVC-X64-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2
 // MSVC-X64-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2
 // MSVC-X64-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2
-// MSVC-X86-NOT:#define __GCC_ATOMIC{{.*}}
+// MSVC-X64-NOT:#define __GCC_ATOMIC{{.*}}
+// MSVC-X64:#define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16ULL
 
 // RUN: %clang_cc1 -E -dM -ffreestanding                \
 // RUN:   -triple=aarch64-apple-ios9 < /dev/null        \
@@ -9920,3 +9941,482 @@
 // RUN: | FileCheck -check-prefix=DARWIN %s
 
 // DARWIN:#define __STDC_NO_THREADS__ 1
+
+// RUN: %clang_cc1 -triple i386-apple-macosx -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix MACOS-32 %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix MACOS-64 %s
+
+// MACOS-32: #define __INTPTR_TYPE__ long int
+// MACOS-32: #define __PTRDIFF_TYPE__ int
+// MACOS-32: #define __SIZE_TYPE__ long unsigned int
+
+// MACOS-64: #define __INTPTR_TYPE__ long int
+// MACOS-64: #define __PTRDIFF_TYPE__ long int
+// MACOS-64: #define __SIZE_TYPE__ long unsigned int
+
+// RUN: %clang_cc1 -triple i386-apple-ios-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix IOS-32 %s
+// RUN: %clang_cc1 -triple armv7-apple-ios -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix IOS-32 %s
+// RUN: %clang_cc1 -triple x86_64-apple-ios-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix IOS-64 %s
+// RUN: %clang_cc1 -triple arm64-apple-ios -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix IOS-64 %s
+
+// IOS-32: #define __INTPTR_TYPE__ long int
+// IOS-32: #define __PTRDIFF_TYPE__ int
+// IOS-32: #define __SIZE_TYPE__ long unsigned int
+
+// IOS-64: #define __INTPTR_TYPE__ long int
+// IOS-64: #define __PTRDIFF_TYPE__ long int
+// IOS-64: #define __SIZE_TYPE__ long unsigned int
+
+// RUN: %clang_cc1 -triple i386-apple-tvos-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix TVOS-32 %s
+// RUN: %clang_cc1 -triple armv7-apple-tvos -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix TVOS-32 %s
+// RUN: %clang_cc1 -triple x86_64-apple-tvos-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix TVOS-64 %s
+// RUN: %clang_cc1 -triple arm64-apple-tvos -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix TVOS-64 %s
+
+// TVOS-32: #define __INTPTR_TYPE__ long int
+// TVOS-32: #define __PTRDIFF_TYPE__ int
+// TVOS-32: #define __SIZE_TYPE__ long unsigned int
+
+// TVOS-64: #define __INTPTR_TYPE__ long int
+// TVOS-64: #define __PTRDIFF_TYPE__ long int
+// TVOS-64: #define __SIZE_TYPE__ long unsigned int
+
+// RUN: %clang_cc1 -triple i386-apple-watchos-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix WATCHOS-32 %s
+// RUN: %clang_cc1 -triple armv7k-apple-watchos -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix WATCHOS-64 %s
+// RUN: %clang_cc1 -triple x86_64-apple-watchos-simulator -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix WATCHOS-64 %s
+// RUN: %clang_cc1 -triple arm64-apple-watchos -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix WATCHOS-64 %s
+
+// WATCHOS-32: #define __INTPTR_TYPE__ long int
+// WATCHOS-32: #define __PTRDIFF_TYPE__ int
+// WATCHOS-32: #define __SIZE_TYPE__ long unsigned int
+
+// WATCHOS-64: #define __INTPTR_TYPE__ long int
+// WATCHOS-64: #define __PTRDIFF_TYPE__ long int
+// WATCHOS-64: #define __SIZE_TYPE__ long unsigned int
+
+// RUN: %clang_cc1 -triple armv7-apple-none-macho -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix ARM-DARWIN-BAREMETAL-32 %s
+// RUN: %clang_cc1 -triple arm64-apple-none-macho -ffreestanding -dM -E /dev/null -o - | FileCheck -match-full-lines -check-prefix ARM-DARWIN-BAREMETAL-64 %s
+
+// ARM-DARWIN-BAREMETAL-32: #define __INTPTR_TYPE__ long int
+// ARM-DARWIN-BAREMETAL-32: #define __PTRDIFF_TYPE__ int
+// ARM-DARWIN-BAREMETAL-32: #define __SIZE_TYPE__ long unsigned int
+
+// ARM-DARWIN-BAREMETAL-64: #define __INTPTR_TYPE__ long int
+// ARM-DARWIN-BAREMETAL-64: #define __PTRDIFF_TYPE__ long int
+// ARM-DARWIN-BAREMETAL-64: #define __SIZE_TYPE__ long unsigned int
+
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32 < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefix=RISCV32 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32-unknown-linux < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefixes=RISCV32,RISCV32-LINUX %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32 \
+// RUN: -fforce-enable-int128 < /dev/null | FileCheck -match-full-lines \
+// RUN: -check-prefixes=RISCV32,RISCV32-INT128 %s
+// RISCV32: #define _ILP32 1
+// RISCV32: #define __ATOMIC_ACQUIRE 2
+// RISCV32: #define __ATOMIC_ACQ_REL 4
+// RISCV32: #define __ATOMIC_CONSUME 1
+// RISCV32: #define __ATOMIC_RELAXED 0
+// RISCV32: #define __ATOMIC_RELEASE 3
+// RISCV32: #define __ATOMIC_SEQ_CST 5
+// RISCV32: #define __BIGGEST_ALIGNMENT__ 16
+// RISCV32: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
+// RISCV32: #define __CHAR16_TYPE__ unsigned short
+// RISCV32: #define __CHAR32_TYPE__ unsigned int
+// RISCV32: #define __CHAR_BIT__ 8
+// RISCV32: #define __DBL_DECIMAL_DIG__ 17
+// RISCV32: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324
+// RISCV32: #define __DBL_DIG__ 15
+// RISCV32: #define __DBL_EPSILON__ 2.2204460492503131e-16
+// RISCV32: #define __DBL_HAS_DENORM__ 1
+// RISCV32: #define __DBL_HAS_INFINITY__ 1
+// RISCV32: #define __DBL_HAS_QUIET_NAN__ 1
+// RISCV32: #define __DBL_MANT_DIG__ 53
+// RISCV32: #define __DBL_MAX_10_EXP__ 308
+// RISCV32: #define __DBL_MAX_EXP__ 1024
+// RISCV32: #define __DBL_MAX__ 1.7976931348623157e+308
+// RISCV32: #define __DBL_MIN_10_EXP__ (-307)
+// RISCV32: #define __DBL_MIN_EXP__ (-1021)
+// RISCV32: #define __DBL_MIN__ 2.2250738585072014e-308
+// RISCV32: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
+// RISCV32: #define __ELF__ 1
+// RISCV32: #define __FINITE_MATH_ONLY__ 0
+// RISCV32: #define __FLT_DECIMAL_DIG__ 9
+// RISCV32: #define __FLT_DENORM_MIN__ 1.40129846e-45F
+// RISCV32: #define __FLT_DIG__ 6
+// RISCV32: #define __FLT_EPSILON__ 1.19209290e-7F
+// RISCV32: #define __FLT_EVAL_METHOD__ 0
+// RISCV32: #define __FLT_HAS_DENORM__ 1
+// RISCV32: #define __FLT_HAS_INFINITY__ 1
+// RISCV32: #define __FLT_HAS_QUIET_NAN__ 1
+// RISCV32: #define __FLT_MANT_DIG__ 24
+// RISCV32: #define __FLT_MAX_10_EXP__ 38
+// RISCV32: #define __FLT_MAX_EXP__ 128
+// RISCV32: #define __FLT_MAX__ 3.40282347e+38F
+// RISCV32: #define __FLT_MIN_10_EXP__ (-37)
+// RISCV32: #define __FLT_MIN_EXP__ (-125)
+// RISCV32: #define __FLT_MIN__ 1.17549435e-38F
+// RISCV32: #define __FLT_RADIX__ 2
+// RISCV32: #define __GCC_ATOMIC_BOOL_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_CHAR_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_INT_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_LLONG_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_LONG_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_POINTER_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_SHORT_LOCK_FREE 1
+// RISCV32: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// RISCV32: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 1
+// RISCV32: #define __GNUC_MINOR__ {{.*}}
+// RISCV32: #define __GNUC_PATCHLEVEL__ {{.*}}
+// RISCV32: #define __GNUC_STDC_INLINE__ 1
+// RISCV32: #define __GNUC__ {{.*}}
+// RISCV32: #define __GXX_ABI_VERSION {{.*}}
+// RISCV32: #define __ILP32__ 1
+// RISCV32: #define __INT16_C_SUFFIX__
+// RISCV32: #define __INT16_MAX__ 32767
+// RISCV32: #define __INT16_TYPE__ short
+// RISCV32: #define __INT32_C_SUFFIX__
+// RISCV32: #define __INT32_MAX__ 2147483647
+// RISCV32: #define __INT32_TYPE__ int
+// RISCV32: #define __INT64_C_SUFFIX__ LL
+// RISCV32: #define __INT64_MAX__ 9223372036854775807LL
+// RISCV32: #define __INT64_TYPE__ long long int
+// RISCV32: #define __INT8_C_SUFFIX__
+// RISCV32: #define __INT8_MAX__ 127
+// RISCV32: #define __INT8_TYPE__ signed char
+// RISCV32: #define __INTMAX_C_SUFFIX__ LL
+// RISCV32: #define __INTMAX_MAX__ 9223372036854775807LL
+// RISCV32: #define __INTMAX_TYPE__ long long int
+// RISCV32: #define __INTMAX_WIDTH__ 64
+// RISCV32: #define __INTPTR_MAX__ 2147483647
+// RISCV32: #define __INTPTR_TYPE__ int
+// RISCV32: #define __INTPTR_WIDTH__ 32
+// TODO: RISC-V GCC defines INT_FAST16 as int
+// RISCV32: #define __INT_FAST16_MAX__ 32767
+// RISCV32: #define __INT_FAST16_TYPE__ short
+// RISCV32: #define __INT_FAST32_MAX__ 2147483647
+// RISCV32: #define __INT_FAST32_TYPE__ int
+// RISCV32: #define __INT_FAST64_MAX__ 9223372036854775807LL
+// RISCV32: #define __INT_FAST64_TYPE__ long long int
+// TODO: RISC-V GCC defines INT_FAST8 as int
+// RISCV32: #define __INT_FAST8_MAX__ 127
+// RISCV32: #define __INT_FAST8_TYPE__ signed char
+// RISCV32: #define __INT_LEAST16_MAX__ 32767
+// RISCV32: #define __INT_LEAST16_TYPE__ short
+// RISCV32: #define __INT_LEAST32_MAX__ 2147483647
+// RISCV32: #define __INT_LEAST32_TYPE__ int
+// RISCV32: #define __INT_LEAST64_MAX__ 9223372036854775807LL
+// RISCV32: #define __INT_LEAST64_TYPE__ long long int
+// RISCV32: #define __INT_LEAST8_MAX__ 127
+// RISCV32: #define __INT_LEAST8_TYPE__ signed char
+// RISCV32: #define __INT_MAX__ 2147483647
+// RISCV32: #define __LDBL_DECIMAL_DIG__ 36
+// RISCV32: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
+// RISCV32: #define __LDBL_DIG__ 33
+// RISCV32: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
+// RISCV32: #define __LDBL_HAS_DENORM__ 1
+// RISCV32: #define __LDBL_HAS_INFINITY__ 1
+// RISCV32: #define __LDBL_HAS_QUIET_NAN__ 1
+// RISCV32: #define __LDBL_MANT_DIG__ 113
+// RISCV32: #define __LDBL_MAX_10_EXP__ 4932
+// RISCV32: #define __LDBL_MAX_EXP__ 16384
+// RISCV32: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
+// RISCV32: #define __LDBL_MIN_10_EXP__ (-4931)
+// RISCV32: #define __LDBL_MIN_EXP__ (-16381)
+// RISCV32: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
+// RISCV32: #define __LITTLE_ENDIAN__ 1
+// RISCV32: #define __LONG_LONG_MAX__ 9223372036854775807LL
+// RISCV32: #define __LONG_MAX__ 2147483647L
+// RISCV32: #define __NO_INLINE__ 1
+// RISCV32: #define __POINTER_WIDTH__ 32
+// RISCV32: #define __PRAGMA_REDEFINE_EXTNAME 1
+// RISCV32: #define __PTRDIFF_MAX__ 2147483647
+// RISCV32: #define __PTRDIFF_TYPE__ int
+// RISCV32: #define __PTRDIFF_WIDTH__ 32
+// RISCV32: #define __SCHAR_MAX__ 127
+// RISCV32: #define __SHRT_MAX__ 32767
+// RISCV32: #define __SIG_ATOMIC_MAX__ 2147483647
+// RISCV32: #define __SIG_ATOMIC_WIDTH__ 32
+// RISCV32: #define __SIZEOF_DOUBLE__ 8
+// RISCV32: #define __SIZEOF_FLOAT__ 4
+// RISCV32-INT128: #define __SIZEOF_INT128__ 16
+// RISCV32: #define __SIZEOF_INT__ 4
+// RISCV32: #define __SIZEOF_LONG_DOUBLE__ 16
+// RISCV32: #define __SIZEOF_LONG_LONG__ 8
+// RISCV32: #define __SIZEOF_LONG__ 4
+// RISCV32: #define __SIZEOF_POINTER__ 4
+// RISCV32: #define __SIZEOF_PTRDIFF_T__ 4
+// RISCV32: #define __SIZEOF_SHORT__ 2
+// RISCV32: #define __SIZEOF_SIZE_T__ 4
+// RISCV32: #define __SIZEOF_WCHAR_T__ 4
+// RISCV32: #define __SIZEOF_WINT_T__ 4
+// RISCV32: #define __SIZE_MAX__ 4294967295U
+// RISCV32: #define __SIZE_TYPE__ unsigned int
+// RISCV32: #define __SIZE_WIDTH__ 32
+// RISCV32: #define __STDC_HOSTED__ 0
+// RISCV32: #define __STDC_UTF_16__ 1
+// RISCV32: #define __STDC_UTF_32__ 1
+// RISCV32: #define __STDC_VERSION__ 201112L
+// RISCV32: #define __STDC__ 1
+// RISCV32: #define __UINT16_C_SUFFIX__
+// RISCV32: #define __UINT16_MAX__ 65535
+// RISCV32: #define __UINT16_TYPE__ unsigned short
+// RISCV32: #define __UINT32_C_SUFFIX__ U
+// RISCV32: #define __UINT32_MAX__ 4294967295U
+// RISCV32: #define __UINT32_TYPE__ unsigned int
+// RISCV32: #define __UINT64_C_SUFFIX__ ULL
+// RISCV32: #define __UINT64_MAX__ 18446744073709551615ULL
+// RISCV32: #define __UINT64_TYPE__ long long unsigned int
+// RISCV32: #define __UINT8_C_SUFFIX__
+// RISCV32: #define __UINT8_MAX__ 255
+// RISCV32: #define __UINT8_TYPE__ unsigned char
+// RISCV32: #define __UINTMAX_C_SUFFIX__ ULL
+// RISCV32: #define __UINTMAX_MAX__ 18446744073709551615ULL
+// RISCV32: #define __UINTMAX_TYPE__ long long unsigned int
+// RISCV32: #define __UINTMAX_WIDTH__ 64
+// RISCV32: #define __UINTPTR_MAX__ 4294967295U
+// RISCV32: #define __UINTPTR_TYPE__ unsigned int
+// RISCV32: #define __UINTPTR_WIDTH__ 32
+// TODO: RISC-V GCC defines UINT_FAST16 to be unsigned int
+// RISCV32: #define __UINT_FAST16_MAX__ 65535
+// RISCV32: #define __UINT_FAST16_TYPE__ unsigned short
+// RISCV32: #define __UINT_FAST32_MAX__ 4294967295U
+// RISCV32: #define __UINT_FAST32_TYPE__ unsigned int
+// RISCV32: #define __UINT_FAST64_MAX__ 18446744073709551615ULL
+// RISCV32: #define __UINT_FAST64_TYPE__ long long unsigned int
+// TODO: RISC-V GCC defines UINT_FAST8 to be unsigned int
+// RISCV32: #define __UINT_FAST8_MAX__ 255
+// RISCV32: #define __UINT_FAST8_TYPE__ unsigned char
+// RISCV32: #define __UINT_LEAST16_MAX__ 65535
+// RISCV32: #define __UINT_LEAST16_TYPE__ unsigned short
+// RISCV32: #define __UINT_LEAST32_MAX__ 4294967295U
+// RISCV32: #define __UINT_LEAST32_TYPE__ unsigned int
+// RISCV32: #define __UINT_LEAST64_MAX__ 18446744073709551615ULL
+// RISCV32: #define __UINT_LEAST64_TYPE__ long long unsigned int
+// RISCV32: #define __UINT_LEAST8_MAX__ 255
+// RISCV32: #define __UINT_LEAST8_TYPE__ unsigned char
+// RISCV32: #define __USER_LABEL_PREFIX__
+// RISCV32: #define __WCHAR_MAX__ 2147483647
+// RISCV32: #define __WCHAR_TYPE__ int
+// RISCV32: #define __WCHAR_WIDTH__ 32
+// RISCV32: #define __WINT_TYPE__ unsigned int
+// RISCV32: #define __WINT_UNSIGNED__ 1
+// RISCV32: #define __WINT_WIDTH__ 32
+// RISCV32-LINUX: #define __gnu_linux__ 1
+// RISCV32-LINUX: #define __linux 1
+// RISCV32-LINUX: #define __linux__ 1
+// RISCV32: #define __riscv 1
+// RISCV32: #define __riscv_cmodel_medlow 1
+// RISCV32: #define __riscv_float_abi_soft 1
+// RISCV32: #define __riscv_xlen 32
+// RISCV32-LINUX: #define __unix 1
+// RISCV32-LINUX: #define __unix__ 1
+// RISCV32-LINUX: #define linux 1
+// RISCV32-LINUX: #define unix 1
+
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv64 < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefix=RISCV64 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv64-unknown-linux < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefixes=RISCV64,RISCV64-LINUX %s
+// RISCV64: #define _LP64 1
+// RISCV64: #define __ATOMIC_ACQUIRE 2
+// RISCV64: #define __ATOMIC_ACQ_REL 4
+// RISCV64: #define __ATOMIC_CONSUME 1
+// RISCV64: #define __ATOMIC_RELAXED 0
+// RISCV64: #define __ATOMIC_RELEASE 3
+// RISCV64: #define __ATOMIC_SEQ_CST 5
+// RISCV64: #define __BIGGEST_ALIGNMENT__ 16
+// RISCV64: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
+// RISCV64: #define __CHAR16_TYPE__ unsigned short
+// RISCV64: #define __CHAR32_TYPE__ unsigned int
+// RISCV64: #define __CHAR_BIT__ 8
+// RISCV64: #define __DBL_DECIMAL_DIG__ 17
+// RISCV64: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324
+// RISCV64: #define __DBL_DIG__ 15
+// RISCV64: #define __DBL_EPSILON__ 2.2204460492503131e-16
+// RISCV64: #define __DBL_HAS_DENORM__ 1
+// RISCV64: #define __DBL_HAS_INFINITY__ 1
+// RISCV64: #define __DBL_HAS_QUIET_NAN__ 1
+// RISCV64: #define __DBL_MANT_DIG__ 53
+// RISCV64: #define __DBL_MAX_10_EXP__ 308
+// RISCV64: #define __DBL_MAX_EXP__ 1024
+// RISCV64: #define __DBL_MAX__ 1.7976931348623157e+308
+// RISCV64: #define __DBL_MIN_10_EXP__ (-307)
+// RISCV64: #define __DBL_MIN_EXP__ (-1021)
+// RISCV64: #define __DBL_MIN__ 2.2250738585072014e-308
+// RISCV64: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
+// RISCV64: #define __ELF__ 1
+// RISCV64: #define __FINITE_MATH_ONLY__ 0
+// RISCV64: #define __FLT_DECIMAL_DIG__ 9
+// RISCV64: #define __FLT_DENORM_MIN__ 1.40129846e-45F
+// RISCV64: #define __FLT_DIG__ 6
+// RISCV64: #define __FLT_EPSILON__ 1.19209290e-7F
+// RISCV64: #define __FLT_EVAL_METHOD__ 0
+// RISCV64: #define __FLT_HAS_DENORM__ 1
+// RISCV64: #define __FLT_HAS_INFINITY__ 1
+// RISCV64: #define __FLT_HAS_QUIET_NAN__ 1
+// RISCV64: #define __FLT_MANT_DIG__ 24
+// RISCV64: #define __FLT_MAX_10_EXP__ 38
+// RISCV64: #define __FLT_MAX_EXP__ 128
+// RISCV64: #define __FLT_MAX__ 3.40282347e+38F
+// RISCV64: #define __FLT_MIN_10_EXP__ (-37)
+// RISCV64: #define __FLT_MIN_EXP__ (-125)
+// RISCV64: #define __FLT_MIN__ 1.17549435e-38F
+// RISCV64: #define __FLT_RADIX__ 2
+// RISCV64: #define __GCC_ATOMIC_BOOL_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_CHAR_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_INT_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_LLONG_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_LONG_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_POINTER_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_SHORT_LOCK_FREE 1
+// RISCV64: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// RISCV64: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 1
+// RISCV64: #define __GNUC_MINOR__ {{.*}}
+// RISCV64: #define __GNUC_PATCHLEVEL__ {{.*}}
+// RISCV64: #define __GNUC_STDC_INLINE__ 1
+// RISCV64: #define __GNUC__ {{.*}}
+// RISCV64: #define __GXX_ABI_VERSION {{.*}}
+// RISCV64: #define __INT16_C_SUFFIX__
+// RISCV64: #define __INT16_MAX__ 32767
+// RISCV64: #define __INT16_TYPE__ short
+// RISCV64: #define __INT32_C_SUFFIX__
+// RISCV64: #define __INT32_MAX__ 2147483647
+// RISCV64: #define __INT32_TYPE__ int
+// RISCV64: #define __INT64_C_SUFFIX__ L
+// RISCV64: #define __INT64_MAX__ 9223372036854775807L
+// RISCV64: #define __INT64_TYPE__ long int
+// RISCV64: #define __INT8_C_SUFFIX__
+// RISCV64: #define __INT8_MAX__ 127
+// RISCV64: #define __INT8_TYPE__ signed char
+// RISCV64: #define __INTMAX_C_SUFFIX__ L
+// RISCV64: #define __INTMAX_MAX__ 9223372036854775807L
+// RISCV64: #define __INTMAX_TYPE__ long int
+// RISCV64: #define __INTMAX_WIDTH__ 64
+// RISCV64: #define __INTPTR_MAX__ 9223372036854775807L
+// RISCV64: #define __INTPTR_TYPE__ long int
+// RISCV64: #define __INTPTR_WIDTH__ 64
+// TODO: RISC-V GCC defines INT_FAST16 as int
+// RISCV64: #define __INT_FAST16_MAX__ 32767
+// RISCV64: #define __INT_FAST16_TYPE__ short
+// RISCV64: #define __INT_FAST32_MAX__ 2147483647
+// RISCV64: #define __INT_FAST32_TYPE__ int
+// RISCV64: #define __INT_FAST64_MAX__ 9223372036854775807L
+// RISCV64: #define __INT_FAST64_TYPE__ long int
+// TODO: RISC-V GCC defines INT_FAST8 as int
+// RISCV64: #define __INT_FAST8_MAX__ 127
+// RISCV64: #define __INT_FAST8_TYPE__ signed char
+// RISCV64: #define __INT_LEAST16_MAX__ 32767
+// RISCV64: #define __INT_LEAST16_TYPE__ short
+// RISCV64: #define __INT_LEAST32_MAX__ 2147483647
+// RISCV64: #define __INT_LEAST32_TYPE__ int
+// RISCV64: #define __INT_LEAST64_MAX__ 9223372036854775807L
+// RISCV64: #define __INT_LEAST64_TYPE__ long int
+// RISCV64: #define __INT_LEAST8_MAX__ 127
+// RISCV64: #define __INT_LEAST8_TYPE__ signed char
+// RISCV64: #define __INT_MAX__ 2147483647
+// RISCV64: #define __LDBL_DECIMAL_DIG__ 36
+// RISCV64: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
+// RISCV64: #define __LDBL_DIG__ 33
+// RISCV64: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
+// RISCV64: #define __LDBL_HAS_DENORM__ 1
+// RISCV64: #define __LDBL_HAS_INFINITY__ 1
+// RISCV64: #define __LDBL_HAS_QUIET_NAN__ 1
+// RISCV64: #define __LDBL_MANT_DIG__ 113
+// RISCV64: #define __LDBL_MAX_10_EXP__ 4932
+// RISCV64: #define __LDBL_MAX_EXP__ 16384
+// RISCV64: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
+// RISCV64: #define __LDBL_MIN_10_EXP__ (-4931)
+// RISCV64: #define __LDBL_MIN_EXP__ (-16381)
+// RISCV64: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
+// RISCV64: #define __LITTLE_ENDIAN__ 1
+// RISCV64: #define __LONG_LONG_MAX__ 9223372036854775807LL
+// RISCV64: #define __LONG_MAX__ 9223372036854775807L
+// RISCV64: #define __LP64__ 1
+// RISCV64: #define __NO_INLINE__ 1
+// RISCV64: #define __POINTER_WIDTH__ 64
+// RISCV64: #define __PRAGMA_REDEFINE_EXTNAME 1
+// RISCV64: #define __PTRDIFF_MAX__ 9223372036854775807L
+// RISCV64: #define __PTRDIFF_TYPE__ long int
+// RISCV64: #define __PTRDIFF_WIDTH__ 64
+// RISCV64: #define __SCHAR_MAX__ 127
+// RISCV64: #define __SHRT_MAX__ 32767
+// RISCV64: #define __SIG_ATOMIC_MAX__ 2147483647
+// RISCV64: #define __SIG_ATOMIC_WIDTH__ 32
+// RISCV64: #define __SIZEOF_DOUBLE__ 8
+// RISCV64: #define __SIZEOF_FLOAT__ 4
+// RISCV64: #define __SIZEOF_INT__ 4
+// RISCV64: #define __SIZEOF_LONG_DOUBLE__ 16
+// RISCV64: #define __SIZEOF_LONG_LONG__ 8
+// RISCV64: #define __SIZEOF_LONG__ 8
+// RISCV64: #define __SIZEOF_POINTER__ 8
+// RISCV64: #define __SIZEOF_PTRDIFF_T__ 8
+// RISCV64: #define __SIZEOF_SHORT__ 2
+// RISCV64: #define __SIZEOF_SIZE_T__ 8
+// RISCV64: #define __SIZEOF_WCHAR_T__ 4
+// RISCV64: #define __SIZEOF_WINT_T__ 4
+// RISCV64: #define __SIZE_MAX__ 18446744073709551615UL
+// RISCV64: #define __SIZE_TYPE__ long unsigned int
+// RISCV64: #define __SIZE_WIDTH__ 64
+// RISCV64: #define __STDC_HOSTED__ 0
+// RISCV64: #define __STDC_UTF_16__ 1
+// RISCV64: #define __STDC_UTF_32__ 1
+// RISCV64: #define __STDC_VERSION__ 201112L
+// RISCV64: #define __STDC__ 1
+// RISCV64: #define __UINT16_C_SUFFIX__
+// RISCV64: #define __UINT16_MAX__ 65535
+// RISCV64: #define __UINT16_TYPE__ unsigned short
+// RISCV64: #define __UINT32_C_SUFFIX__ U
+// RISCV64: #define __UINT32_MAX__ 4294967295U
+// RISCV64: #define __UINT32_TYPE__ unsigned int
+// RISCV64: #define __UINT64_C_SUFFIX__ UL
+// RISCV64: #define __UINT64_MAX__ 18446744073709551615UL
+// RISCV64: #define __UINT64_TYPE__ long unsigned int
+// RISCV64: #define __UINT8_C_SUFFIX__
+// RISCV64: #define __UINT8_MAX__ 255
+// RISCV64: #define __UINT8_TYPE__ unsigned char
+// RISCV64: #define __UINTMAX_C_SUFFIX__ UL
+// RISCV64: #define __UINTMAX_MAX__ 18446744073709551615UL
+// RISCV64: #define __UINTMAX_TYPE__ long unsigned int
+// RISCV64: #define __UINTMAX_WIDTH__ 64
+// RISCV64: #define __UINTPTR_MAX__ 18446744073709551615UL
+// RISCV64: #define __UINTPTR_TYPE__ long unsigned int
+// RISCV64: #define __UINTPTR_WIDTH__ 64
+// TODO: RISC-V GCC defines UINT_FAST16 to be unsigned int
+// RISCV64: #define __UINT_FAST16_MAX__ 65535
+// RISCV64: #define __UINT_FAST16_TYPE__ unsigned short
+// RISCV64: #define __UINT_FAST32_MAX__ 4294967295U
+// RISCV64: #define __UINT_FAST32_TYPE__ unsigned int
+// RISCV64: #define __UINT_FAST64_MAX__ 18446744073709551615UL
+// RISCV64: #define __UINT_FAST64_TYPE__ long unsigned int
+// TODO: RISC-V GCC defines UINT_FAST8 to be unsigned int
+// RISCV64: #define __UINT_FAST8_MAX__ 255
+// RISCV64: #define __UINT_FAST8_TYPE__ unsigned char
+// RISCV64: #define __UINT_LEAST16_MAX__ 65535
+// RISCV64: #define __UINT_LEAST16_TYPE__ unsigned short
+// RISCV64: #define __UINT_LEAST32_MAX__ 4294967295U
+// RISCV64: #define __UINT_LEAST32_TYPE__ unsigned int
+// RISCV64: #define __UINT_LEAST64_MAX__ 18446744073709551615UL
+// RISCV64: #define __UINT_LEAST64_TYPE__ long unsigned int
+// RISCV64: #define __UINT_LEAST8_MAX__ 255
+// RISCV64: #define __UINT_LEAST8_TYPE__ unsigned char
+// RISCV64: #define __USER_LABEL_PREFIX__
+// RISCV64: #define __WCHAR_MAX__ 2147483647
+// RISCV64: #define __WCHAR_TYPE__ int
+// RISCV64: #define __WCHAR_WIDTH__ 32
+// RISCV64: #define __WINT_TYPE__ unsigned int
+// RISCV64: #define __WINT_UNSIGNED__ 1
+// RISCV64: #define __WINT_WIDTH__ 32
+// RISCV64-LINUX: #define __gnu_linux__ 1
+// RISCV64-LINUX: #define __linux 1
+// RISCV64-LINUX: #define __linux__ 1
+// RISCV64: #define __riscv 1
+// RISCV64: #define __riscv_cmodel_medlow 1
+// RISCV64: #define __riscv_float_abi_soft 1
+// RISCV64: #define __riscv_xlen 64
+// RISCV64-LINUX: #define __unix 1
+// RISCV64-LINUX: #define __unix__ 1
+// RISCV64-LINUX: #define linux 1
+// RISCV64-LINUX: #define unix 1
diff --git a/test/Preprocessor/is_target.c b/test/Preprocessor/is_target.c
new file mode 100644
index 0000000..44bdb11
--- /dev/null
+++ b/test/Preprocessor/is_target.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin-simulator -verify %s
+
+#if !__is_target_arch(x86_64) || !__is_target_arch(X86_64)
+  #error "mismatching arch"
+#endif
+
+#if __is_target_arch(arm64)
+  #error "mismatching arch"
+#endif
+
+// Silently ignore invalid archs. This will ensure that older compilers will
+// accept headers that support new arches/vendors/os variants.
+#if __is_target_arch(foo)
+  #error "invalid arch"
+#endif
+
+#if !__is_target_vendor(apple) || !__is_target_vendor(APPLE)
+  #error "mismatching vendor"
+#endif
+
+#if __is_target_vendor(unknown)
+  #error "mismatching vendor"
+#endif
+
+#if __is_target_vendor(foo)
+  #error "invalid vendor"
+#endif
+
+#if !__is_target_os(darwin) || !__is_target_os(DARWIN)
+  #error "mismatching os"
+#endif
+
+#if __is_target_os(ios)
+  #error "mismatching os"
+#endif
+
+#if __is_target_os(foo)
+  #error "invalid os"
+#endif
+
+#if !__is_target_environment(simulator) || !__is_target_environment(SIMULATOR)
+  #error "mismatching environment"
+#endif
+
+#if __is_target_environment(unknown)
+  #error "mismatching environment"
+#endif
+
+#if __is_target_environment(foo)
+  #error "invalid environment"
+#endif
+
+#if !__has_builtin(__is_target_arch) || !__has_builtin(__is_target_os) || !__has_builtin(__is_target_vendor) || !__has_builtin(__is_target_environment)
+  #error "has builtin doesn't work"
+#endif
+
+#if __is_target_arch(11) // expected-error {{builtin feature check macro requires a parenthesized identifier}}
+  #error "invalid arch"
+#endif
+
+#if __is_target_arch x86 // expected-error{{missing '(' after '__is_target_arch'}}
+  #error "invalid arch"
+#endif
+
+#if __is_target_arch ( x86  // expected-error {{unterminated function-like macro invocation}}
+  #error "invalid arch"
+#endif // expected-error@-2 {{expected value in expression}}
diff --git a/test/Preprocessor/is_target_arm.c b/test/Preprocessor/is_target_arm.c
new file mode 100644
index 0000000..9e1afe6
--- /dev/null
+++ b/test/Preprocessor/is_target_arm.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -fsyntax-only -triple thumbv7--windows-msvc19.11.0 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple armv7--windows-msvc19.11.0 -DARM -verify %s
+// expected-no-diagnostics
+
+// ARM does match arm and thumb.
+#if !__is_target_arch(arm)
+  #error "mismatching arch"
+#endif
+
+#if __is_target_arch(armeb) || __is_target_arch(armebv7) || __is_target_arch(thumbeb) || __is_target_arch(thumbebv7)
+  #error "mismatching arch"
+#endif
+
+// ARMV7 does match armv7 and thumbv7.
+#if !__is_target_arch(armv7)
+  #error "mismatching arch"
+#endif
+
+// ARMV6 does not match armv7 or thumbv7.
+#if __is_target_arch(armv6)
+  #error "mismatching arch"
+#endif
+
+#if __is_target_arch(arm64)
+  #error "mismatching arch"
+#endif
+
+#ifndef ARM
+
+// Allow checking for precise arch + subarch.
+#if !__is_target_arch(thumbv7)
+  #error "mismatching arch"
+#endif
+
+// But also allow checking for the arch without subarch.
+#if !__is_target_arch(thumb)
+  #error "mismatching arch"
+#endif
+
+// Same arch with a different subarch doesn't match.
+#if __is_target_arch(thumbv6)
+  #error "mismatching arch"
+#endif
+
+#else
+
+#if __is_target_arch(thumbv7) || __is_target_arch(thumb)
+  #error "mismatching arch"
+#endif
+
+#endif
diff --git a/test/Preprocessor/is_target_arm64.c b/test/Preprocessor/is_target_arm64.c
new file mode 100644
index 0000000..87ebe94
--- /dev/null
+++ b/test/Preprocessor/is_target_arm64.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fsyntax-only -triple arm64-apple-ios11 -verify %s
+// expected-no-diagnostics
+
+#if !__is_target_arch(arm64) || !__is_target_arch(aarch64)
+  #error "mismatching arch"
+#endif
+
+#if __is_target_arch(aarch64_be)
+  #error "mismatching arch"
+#endif
diff --git a/test/Preprocessor/is_target_environment_version.c b/test/Preprocessor/is_target_environment_version.c
new file mode 100644
index 0000000..e933860
--- /dev/null
+++ b/test/Preprocessor/is_target_environment_version.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-pc-windows-msvc18.0.0 -verify %s
+// expected-no-diagnostics
+
+#if !__is_target_environment(msvc)
+  #error "mismatching environment"
+#endif
diff --git a/test/Preprocessor/is_target_os_darwin.c b/test/Preprocessor/is_target_os_darwin.c
new file mode 100644
index 0000000..4c807ce
--- /dev/null
+++ b/test/Preprocessor/is_target_os_darwin.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-macos -DMAC -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-ios -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-tvos -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-watchos -verify %s
+// expected-no-diagnostics
+
+#if !__is_target_os(darwin)
+  #error "mismatching os"
+#endif
+
+// macOS matches both macOS and macOSX.
+#ifdef MAC
+
+#if !__is_target_os(macos)
+  #error "mismatching os"
+#endif
+
+#if !__is_target_os(macosx)
+  #error "mismatching os"
+#endif
+
+#if __is_target_os(ios)
+  #error "mismatching os"
+#endif
+
+#endif
diff --git a/test/Preprocessor/is_target_unknown.c b/test/Preprocessor/is_target_unknown.c
new file mode 100644
index 0000000..d81afcb
--- /dev/null
+++ b/test/Preprocessor/is_target_unknown.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only -triple i686-unknown-unknown -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple i686-- -verify %s
+// expected-no-diagnostics
+
+#if __is_target_arch(unknown)
+  #error "mismatching arch"
+#endif
+
+// Unknown vendor is allowed.
+#if !__is_target_vendor(unknown)
+  #error "mismatching vendor"
+#endif
+
+// Unknown OS is allowed.
+#if !__is_target_os(unknown)
+  #error "mismatching OS"
+#endif
+
+// Unknown environment is allowed.
+#if !__is_target_environment(unknown)
+  #error "mismatching environment"
+#endif
diff --git a/test/Preprocessor/macro-multiline.c b/test/Preprocessor/macro-multiline.c
index 72a5d20..664c37c 100644
--- a/test/Preprocessor/macro-multiline.c
+++ b/test/Preprocessor/macro-multiline.c
@@ -1,4 +1,4 @@
-// RUN: printf -- "-DX=A\nTHIS_SHOULD_NOT_EXIST_IN_THE_OUTPUT" | xargs -0 %clang -E %s | FileCheck -strict-whitespace %s
+// RUN: printf -- "-DX=A\\\\\nTHIS_SHOULD_NOT_EXIST_IN_THE_OUTPUT\n" | xargs %clang -E %s | FileCheck -strict-whitespace %s
 
 // Per GCC -D semantics, \n and anything that follows is ignored.
 
diff --git a/test/Preprocessor/macro_raw_string.cpp b/test/Preprocessor/macro_raw_string.cpp
new file mode 100644
index 0000000..d568948
--- /dev/null
+++ b/test/Preprocessor/macro_raw_string.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -E -std=c++11 %s -o %t
+// RUN: %clang_cc1 %t
+
+#define FOO(str) foo(#str)
+
+extern void foo(const char *str);
+
+void bar() {
+  FOO(R"(foo
+    bar)");
+}
diff --git a/test/Preprocessor/pragma-comment-linux.c b/test/Preprocessor/pragma-comment-linux.c
new file mode 100644
index 0000000..fcac049
--- /dev/null
+++ b/test/Preprocessor/pragma-comment-linux.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple i686-unknown-linux-gnu -fsyntax-only -verify %s -Wunknown-pragmas
+
+#pragma comment(linker, "")
+// expected-warning@-1 {{'#pragma comment linker' ignored}}
+
diff --git a/test/Preprocessor/pragma_microsoft.c b/test/Preprocessor/pragma_microsoft.c
index b256b2b..5d08943 100644
--- a/test/Preprocessor/pragma_microsoft.c
+++ b/test/Preprocessor/pragma_microsoft.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -fms-extensions -Wunknown-pragmas
-// RUN: not %clang_cc1 %s -fms-extensions -E | FileCheck %s
+// RUN: %clang_cc1 -triple i686-unknown-windows-msvc %s -fsyntax-only -verify -fms-extensions -Wunknown-pragmas
+// RUN: not %clang_cc1 -triple i686-unknown-windows-msvc %s -fms-extensions -E | FileCheck %s
 // REQUIRES: non-ps4-sdk
 
 // rdar://6495941
diff --git a/test/Preprocessor/pragma_unknown.c b/test/Preprocessor/pragma_unknown.c
index 5578ce5..81fe88c 100644
--- a/test/Preprocessor/pragma_unknown.c
+++ b/test/Preprocessor/pragma_unknown.c
@@ -1,29 +1,45 @@
 // RUN: %clang_cc1 -fsyntax-only -Wunknown-pragmas -verify %s
-// RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s
+// RUN: %clang_cc1 -E %s 2>&1 | FileCheck --strict-whitespace %s
 
 // GCC doesn't expand macro args for unrecognized pragmas.
 #define bar xX
 #pragma foo bar   // expected-warning {{unknown pragma ignored}}
+// CHECK-NOT: unknown pragma in STDC namespace
 // CHECK: {{^}}#pragma foo bar{{$}}
 
 #pragma STDC FP_CONTRACT ON
 #pragma STDC FP_CONTRACT OFF
 #pragma STDC FP_CONTRACT DEFAULT
 #pragma STDC FP_CONTRACT IN_BETWEEN  // expected-warning {{expected 'ON' or 'OFF' or 'DEFAULT' in pragma}}
+// CHECK: {{^}}#pragma STDC FP_CONTRACT ON{{$}}
+// CHECK: {{^}}#pragma STDC FP_CONTRACT OFF{{$}}
+// CHECK: {{^}}#pragma STDC FP_CONTRACT DEFAULT{{$}}
+// CHECK: {{^}}#pragma STDC FP_CONTRACT IN_BETWEEN{{$}}
 
 #pragma STDC FENV_ACCESS ON          // expected-warning {{pragma STDC FENV_ACCESS ON is not supported, ignoring pragma}}
 #pragma STDC FENV_ACCESS OFF
 #pragma STDC FENV_ACCESS DEFAULT
 #pragma STDC FENV_ACCESS IN_BETWEEN   // expected-warning {{expected 'ON' or 'OFF' or 'DEFAULT' in pragma}}
+// CHECK: {{^}}#pragma STDC FENV_ACCESS ON{{$}}
+// CHECK: {{^}}#pragma STDC FENV_ACCESS OFF{{$}}
+// CHECK: {{^}}#pragma STDC FENV_ACCESS DEFAULT{{$}}
+// CHECK: {{^}}#pragma STDC FENV_ACCESS IN_BETWEEN{{$}}
 
 #pragma STDC CX_LIMITED_RANGE ON
 #pragma STDC CX_LIMITED_RANGE OFF
 #pragma STDC CX_LIMITED_RANGE DEFAULT 
 #pragma STDC CX_LIMITED_RANGE IN_BETWEEN   // expected-warning {{expected 'ON' or 'OFF' or 'DEFAULT' in pragma}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE ON{{$}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE OFF{{$}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE DEFAULT{{$}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE IN_BETWEEN{{$}}
 
 #pragma STDC CX_LIMITED_RANGE    // expected-warning {{expected 'ON' or 'OFF' or 'DEFAULT' in pragma}}
 #pragma STDC CX_LIMITED_RANGE ON FULL POWER  // expected-warning {{expected end of directive in pragma}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE{{$}}
+// CHECK: {{^}}#pragma STDC CX_LIMITED_RANGE ON FULL POWER{{$}}
 
 #pragma STDC SO_GREAT  // expected-warning {{unknown pragma in STDC namespace}}
 #pragma STDC   // expected-warning {{unknown pragma in STDC namespace}}
-
+// CHECK: {{^}}#pragma STDC SO_GREAT{{$}}
+// CHECK: {{^}}#pragma STDC{{$}}
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index 06faf0e..ba9a5e9 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -156,6 +156,8 @@
 // CHECK_I686_M32: #define __i686__ 1
 // CHECK_I686_M32: #define __pentiumpro 1
 // CHECK_I686_M32: #define __pentiumpro__ 1
+// CHECK_I686_M32: #define __tune_i686__ 1
+// CHECK_I686_M32: #define __tune_pentiumpro__ 1
 // CHECK_I686_M32: #define i386 1
 // RUN: not %clang -march=i686 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
@@ -587,6 +589,7 @@
 // CHECK_BROADWELL_M32: #define __MMX__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
+// CHECK_BROADWELL_M32: #define __PRFCHW__ 1
 // CHECK_BROADWELL_M32: #define __RDRND__ 1
 // CHECK_BROADWELL_M32: #define __RDSEED__ 1
 // CHECK_BROADWELL_M32: #define __SSE2__ 1
@@ -618,6 +621,7 @@
 // CHECK_BROADWELL_M64: #define __MMX__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
+// CHECK_BROADWELL_M64: #define __PRFCHW__ 1
 // CHECK_BROADWELL_M64: #define __RDRND__ 1
 // CHECK_BROADWELL_M64: #define __RDSEED__ 1
 // CHECK_BROADWELL_M64: #define __SSE2_MATH__ 1
@@ -655,6 +659,7 @@
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
+// CHECK_SKL_M32: #define __PRFCHW__ 1
 // CHECK_SKL_M32: #define __RDRND__ 1
 // CHECK_SKL_M32: #define __RDSEED__ 1
 // CHECK_SKL_M32: #define __RTM__ 1
@@ -688,6 +693,7 @@
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
+// CHECK_SKL_M64: #define __PRFCHW__ 1
 // CHECK_SKL_M64: #define __RDRND__ 1
 // CHECK_SKL_M64: #define __RDSEED__ 1
 // CHECK_SKL_M64: #define __RTM__ 1
@@ -728,6 +734,7 @@
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
+// CHECK_KNL_M32: #define __PRFCHW__ 1
 // CHECK_KNL_M32: #define __RDRND__ 1
 // CHECK_KNL_M32: #define __RTM__ 1
 // CHECK_KNL_M32: #define __SSE2__ 1
@@ -764,6 +771,7 @@
 // CHECK_KNL_M64: #define __PCLMUL__ 1
 // CHECK_KNL_M64: #define __POPCNT__ 1
 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1
+// CHECK_KNL_M64: #define __PRFCHW__ 1
 // CHECK_KNL_M64: #define __RDRND__ 1
 // CHECK_KNL_M64: #define __RTM__ 1
 // CHECK_KNL_M64: #define __SSE2_MATH__ 1
@@ -793,6 +801,7 @@
 // CHECK_KNM_M32: #define __AVX512ER__ 1
 // CHECK_KNM_M32: #define __AVX512F__ 1
 // CHECK_KNM_M32: #define __AVX512PF__ 1
+// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1
 // CHECK_KNM_M32: #define __AVX__ 1
 // CHECK_KNM_M32: #define __BMI2__ 1
 // CHECK_KNM_M32: #define __BMI__ 1
@@ -803,6 +812,7 @@
 // CHECK_KNM_M32: #define __PCLMUL__ 1
 // CHECK_KNM_M32: #define __POPCNT__ 1
 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1
+// CHECK_KNM_M32: #define __PRFCHW__ 1
 // CHECK_KNM_M32: #define __RDRND__ 1
 // CHECK_KNM_M32: #define __RTM__ 1
 // CHECK_KNM_M32: #define __SSE2__ 1
@@ -826,6 +836,7 @@
 // CHECK_KNM_M64: #define __AVX512ER__ 1
 // CHECK_KNM_M64: #define __AVX512F__ 1
 // CHECK_KNM_M64: #define __AVX512PF__ 1
+// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1
 // CHECK_KNM_M64: #define __AVX__ 1
 // CHECK_KNM_M64: #define __BMI2__ 1
 // CHECK_KNM_M64: #define __BMI__ 1
@@ -836,6 +847,7 @@
 // CHECK_KNM_M64: #define __PCLMUL__ 1
 // CHECK_KNM_M64: #define __POPCNT__ 1
 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1
+// CHECK_KNM_M64: #define __PRFCHW__ 1
 // CHECK_KNM_M64: #define __RDRND__ 1
 // CHECK_KNM_M64: #define __RTM__ 1
 // CHECK_KNM_M64: #define __SSE2_MATH__ 1
@@ -874,8 +886,11 @@
 // CHECK_SKX_M32: #define __MMX__ 1
 // CHECK_SKX_M32: #define __MPX__ 1
 // CHECK_SKX_M32: #define __PCLMUL__ 1
+// CHECK_SKX_M32: #define __PKU__ 1
 // CHECK_SKX_M32: #define __POPCNT__ 1
+// CHECK_SKX_M32: #define __PRFCHW__ 1
 // CHECK_SKX_M32: #define __RDRND__ 1
+// CHECK_SKX_M32: #define __RDSEED__ 1
 // CHECK_SKX_M32: #define __RTM__ 1
 // CHECK_SKX_M32: #define __SGX__ 1
 // CHECK_SKX_M32: #define __SSE2__ 1
@@ -888,11 +903,11 @@
 // CHECK_SKX_M32: #define __XSAVEOPT__ 1
 // CHECK_SKX_M32: #define __XSAVES__ 1
 // CHECK_SKX_M32: #define __XSAVE__ 1
+// CHECK_SKX_M32: #define __corei7 1
+// CHECK_SKX_M32: #define __corei7__ 1
 // CHECK_SKX_M32: #define __i386 1
 // CHECK_SKX_M32: #define __i386__ 1
-// CHECK_SKX_M32: #define __skx 1
-// CHECK_SKX_M32: #define __skx__ 1
-// CHECK_SKX_M32: #define __tune_skx__ 1
+// CHECK_SKX_M32: #define __tune_corei7__ 1
 // CHECK_SKX_M32: #define i386 1
 
 // RUN: %clang -march=skylake-avx512 -m64 -E -dM %s -o - 2>&1 \
@@ -916,8 +931,11 @@
 // CHECK_SKX_M64: #define __MMX__ 1
 // CHECK_SKX_M64: #define __MPX__ 1
 // CHECK_SKX_M64: #define __PCLMUL__ 1
+// CHECK_SKX_M64: #define __PKU__ 1
 // CHECK_SKX_M64: #define __POPCNT__ 1
+// CHECK_SKX_M64: #define __PRFCHW__ 1
 // CHECK_SKX_M64: #define __RDRND__ 1
+// CHECK_SKX_M64: #define __RDSEED__ 1
 // CHECK_SKX_M64: #define __RTM__ 1
 // CHECK_SKX_M64: #define __SGX__ 1
 // CHECK_SKX_M64: #define __SSE2_MATH__ 1
@@ -934,9 +952,9 @@
 // CHECK_SKX_M64: #define __XSAVE__ 1
 // CHECK_SKX_M64: #define __amd64 1
 // CHECK_SKX_M64: #define __amd64__ 1
-// CHECK_SKX_M64: #define __skx 1
-// CHECK_SKX_M64: #define __skx__ 1
-// CHECK_SKX_M64: #define __tune_skx__ 1
+// CHECK_SKX_M64: #define __corei7 1
+// CHECK_SKX_M64: #define __corei7__ 1
+// CHECK_SKX_M64: #define __tune_corei7__ 1
 // CHECK_SKX_M64: #define __x86_64 1
 // CHECK_SKX_M64: #define __x86_64__ 1
 //
@@ -956,14 +974,18 @@
 // CHECK_CNL_M32: #define __BMI2__ 1
 // CHECK_CNL_M32: #define __BMI__ 1
 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_CNL_M32-NOT: #define __CLWB__ 1
 // CHECK_CNL_M32: #define __F16C__ 1
 // CHECK_CNL_M32: #define __FMA__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
 // CHECK_CNL_M32: #define __MMX__ 1
 // CHECK_CNL_M32: #define __MPX__ 1
 // CHECK_CNL_M32: #define __PCLMUL__ 1
+// CHECK_CNL_M32: #define __PKU__ 1
 // CHECK_CNL_M32: #define __POPCNT__ 1
+// CHECK_CNL_M32: #define __PRFCHW__ 1
 // CHECK_CNL_M32: #define __RDRND__ 1
+// CHECK_CNL_M32: #define __RDSEED__ 1
 // CHECK_CNL_M32: #define __RTM__ 1
 // CHECK_CNL_M32: #define __SGX__ 1
 // CHECK_CNL_M32: #define __SHA__ 1
@@ -977,8 +999,11 @@
 // CHECK_CNL_M32: #define __XSAVEOPT__ 1
 // CHECK_CNL_M32: #define __XSAVES__ 1
 // CHECK_CNL_M32: #define __XSAVE__ 1
+// CHECK_CNL_M32: #define __corei7 1
+// CHECK_CNL_M32: #define __corei7__ 1
 // CHECK_CNL_M32: #define __i386 1
 // CHECK_CNL_M32: #define __i386__ 1
+// CHECK_CNL_M32: #define __tune_corei7__ 1
 // CHECK_CNL_M32: #define i386 1
 //
 // RUN: %clang -march=cannonlake -m64 -E -dM %s -o - 2>&1 \
@@ -997,14 +1022,18 @@
 // CHECK_CNL_M64: #define __BMI2__ 1
 // CHECK_CNL_M64: #define __BMI__ 1
 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1
+// CHECK_CNL_M64-NOT: #define __CLWB__ 1
 // CHECK_CNL_M64: #define __F16C__ 1
 // CHECK_CNL_M64: #define __FMA__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1
 // CHECK_CNL_M64: #define __MMX__ 1
 // CHECK_CNL_M64: #define __MPX__ 1
 // CHECK_CNL_M64: #define __PCLMUL__ 1
+// CHECK_CNL_M64: #define __PKU__ 1
 // CHECK_CNL_M64: #define __POPCNT__ 1
+// CHECK_CNL_M64: #define __PRFCHW__ 1
 // CHECK_CNL_M64: #define __RDRND__ 1
+// CHECK_CNL_M64: #define __RDSEED__ 1
 // CHECK_CNL_M64: #define __RTM__ 1
 // CHECK_CNL_M64: #define __SGX__ 1
 // CHECK_CNL_M64: #define __SHA__ 1
@@ -1020,9 +1049,125 @@
 // CHECK_CNL_M64: #define __XSAVE__ 1
 // CHECK_CNL_M64: #define __amd64 1
 // CHECK_CNL_M64: #define __amd64__ 1
+// CHECK_CNL_M64: #define __corei7 1
+// CHECK_CNL_M64: #define __corei7__ 1
+// CHECK_CNL_M64: #define __tune_corei7__ 1
 // CHECK_CNL_M64: #define __x86_64 1
 // CHECK_CNL_M64: #define __x86_64__ 1
 
+// RUN: %clang -march=icelake -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32
+// CHECK_ICL_M32: #define __AES__ 1
+// CHECK_ICL_M32: #define __AVX2__ 1
+// CHECK_ICL_M32: #define __AVX512BITALG__ 1
+// CHECK_ICL_M32: #define __AVX512BW__ 1
+// CHECK_ICL_M32: #define __AVX512CD__ 1
+// CHECK_ICL_M32: #define __AVX512DQ__ 1
+// CHECK_ICL_M32: #define __AVX512F__ 1
+// CHECK_ICL_M32: #define __AVX512IFMA__ 1
+// CHECK_ICL_M32: #define __AVX512VBMI2__ 1
+// CHECK_ICL_M32: #define __AVX512VBMI__ 1
+// CHECK_ICL_M32: #define __AVX512VL__ 1
+// CHECK_ICL_M32: #define __AVX512VNNI__ 1
+// CHECK_ICL_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ICL_M32: #define __AVX__ 1
+// CHECK_ICL_M32: #define __BMI2__ 1
+// CHECK_ICL_M32: #define __BMI__ 1
+// CHECK_ICL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ICL_M32: #define __CLWB__ 1
+// CHECK_ICL_M32: #define __F16C__ 1
+// CHECK_ICL_M32: #define __FMA__ 1
+// CHECK_ICL_M32: #define __GFNI__ 1
+// CHECK_ICL_M32: #define __LZCNT__ 1
+// CHECK_ICL_M32: #define __MMX__ 1
+// CHECK_ICL_M32: #define __MPX__ 1
+// CHECK_ICL_M32: #define __PCLMUL__ 1
+// CHECK_ICL_M32: #define __PKU__ 1
+// CHECK_ICL_M32: #define __POPCNT__ 1
+// CHECK_ICL_M32: #define __PRFCHW__ 1
+// CHECK_ICL_M32: #define __RDPID__ 1
+// CHECK_ICL_M32: #define __RDRND__ 1
+// CHECK_ICL_M32: #define __RDSEED__ 1
+// CHECK_ICL_M32: #define __RTM__ 1
+// CHECK_ICL_M32: #define __SGX__ 1
+// CHECK_ICL_M32: #define __SHA__ 1
+// CHECK_ICL_M32: #define __SSE2__ 1
+// CHECK_ICL_M32: #define __SSE3__ 1
+// CHECK_ICL_M32: #define __SSE4_1__ 1
+// CHECK_ICL_M32: #define __SSE4_2__ 1
+// CHECK_ICL_M32: #define __SSE__ 1
+// CHECK_ICL_M32: #define __SSSE3__ 1
+// CHECK_ICL_M32: #define __VAES__ 1
+// CHECK_ICL_M32: #define __VPCLMULQDQ__ 1
+// CHECK_ICL_M32: #define __XSAVEC__ 1
+// CHECK_ICL_M32: #define __XSAVEOPT__ 1
+// CHECK_ICL_M32: #define __XSAVES__ 1
+// CHECK_ICL_M32: #define __XSAVE__ 1
+// CHECK_ICL_M32: #define __corei7 1
+// CHECK_ICL_M32: #define __corei7__ 1
+// CHECK_ICL_M32: #define __i386 1
+// CHECK_ICL_M32: #define __i386__ 1
+// CHECK_ICL_M32: #define __tune_corei7__ 1
+// CHECK_ICL_M32: #define i386 1
+//
+// RUN: %clang -march=icelake -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64
+// CHECK_ICL_M64: #define __AES__ 1
+// CHECK_ICL_M64: #define __AVX2__ 1
+// CHECK_ICL_M64: #define __AVX512BITALG__ 1
+// CHECK_ICL_M64: #define __AVX512BW__ 1
+// CHECK_ICL_M64: #define __AVX512CD__ 1
+// CHECK_ICL_M64: #define __AVX512DQ__ 1
+// CHECK_ICL_M64: #define __AVX512F__ 1
+// CHECK_ICL_M64: #define __AVX512IFMA__ 1
+// CHECK_ICL_M64: #define __AVX512VBMI2__ 1
+// CHECK_ICL_M64: #define __AVX512VBMI__ 1
+// CHECK_ICL_M64: #define __AVX512VL__ 1
+// CHECK_ICL_M64: #define __AVX512VNNI__ 1
+// CHECK_ICL_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ICL_M64: #define __AVX__ 1
+// CHECK_ICL_M64: #define __BMI2__ 1
+// CHECK_ICL_M64: #define __BMI__ 1
+// CHECK_ICL_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ICL_M64: #define __CLWB__ 1
+// CHECK_ICL_M64: #define __F16C__ 1
+// CHECK_ICL_M64: #define __FMA__ 1
+// CHECK_ICL_M64: #define __GFNI__ 1
+// CHECK_ICL_M64: #define __LZCNT__ 1
+// CHECK_ICL_M64: #define __MMX__ 1
+// CHECK_ICL_M64: #define __MPX__ 1
+// CHECK_ICL_M64: #define __PCLMUL__ 1
+// CHECK_ICL_M64: #define __PKU__ 1
+// CHECK_ICL_M64: #define __POPCNT__ 1
+// CHECK_ICL_M64: #define __PRFCHW__ 1
+// CHECK_ICL_M64: #define __RDPID__ 1
+// CHECK_ICL_M64: #define __RDRND__ 1
+// CHECK_ICL_M64: #define __RDSEED__ 1
+// CHECK_ICL_M64: #define __RTM__ 1
+// CHECK_ICL_M64: #define __SGX__ 1
+// CHECK_ICL_M64: #define __SHA__ 1
+// CHECK_ICL_M64: #define __SSE2__ 1
+// CHECK_ICL_M64: #define __SSE3__ 1
+// CHECK_ICL_M64: #define __SSE4_1__ 1
+// CHECK_ICL_M64: #define __SSE4_2__ 1
+// CHECK_ICL_M64: #define __SSE__ 1
+// CHECK_ICL_M64: #define __SSSE3__ 1
+// CHECK_ICL_M64: #define __VAES__ 1
+// CHECK_ICL_M64: #define __VPCLMULQDQ__ 1
+// CHECK_ICL_M64: #define __XSAVEC__ 1
+// CHECK_ICL_M64: #define __XSAVEOPT__ 1
+// CHECK_ICL_M64: #define __XSAVES__ 1
+// CHECK_ICL_M64: #define __XSAVE__ 1
+// CHECK_ICL_M64: #define __amd64 1
+// CHECK_ICL_M64: #define __amd64__ 1
+// CHECK_ICL_M64: #define __corei7 1
+// CHECK_ICL_M64: #define __corei7__ 1
+// CHECK_ICL_M64: #define __tune_corei7__ 1
+// CHECK_ICL_M64: #define __x86_64 1
+// CHECK_ICL_M64: #define __x86_64__ 1
+
 // RUN: %clang -march=atom -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M32
@@ -1066,6 +1211,7 @@
 // CHECK_GLM_M32: #define __MPX__ 1
 // CHECK_GLM_M32: #define __PCLMUL__ 1
 // CHECK_GLM_M32: #define __POPCNT__ 1
+// CHECK_GLM_M32: #define __PRFCHW__ 1
 // CHECK_GLM_M32: #define __RDRND__ 1
 // CHECK_GLM_M32: #define __RDSEED__ 1
 // CHECK_GLM_M32: #define __SHA__ 1
@@ -1098,6 +1244,7 @@
 // CHECK_GLM_M64: #define __MPX__ 1
 // CHECK_GLM_M64: #define __PCLMUL__ 1
 // CHECK_GLM_M64: #define __POPCNT__ 1
+// CHECK_GLM_M64: #define __PRFCHW__ 1
 // CHECK_GLM_M64: #define __RDRND__ 1
 // CHECK_GLM_M64: #define __RDSEED__ 1
 // CHECK_GLM_M64: #define __SSE2__ 1
@@ -1119,7 +1266,13 @@
 // RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32
+// CHECK_SLM_M32: #define __AES__ 1
+// CHECK_SLM_M32: #define __FXSR__ 1
 // CHECK_SLM_M32: #define __MMX__ 1
+// CHECK_SLM_M32: #define __PCLMUL__ 1
+// CHECK_SLM_M32: #define __POPCNT__ 1
+// CHECK_SLM_M32: #define __PRFCHW__ 1
+// CHECK_SLM_M32: #define __RDRND__ 1
 // CHECK_SLM_M32: #define __SSE2__ 1
 // CHECK_SLM_M32: #define __SSE3__ 1
 // CHECK_SLM_M32: #define __SSE4_1__ 1
@@ -1135,7 +1288,13 @@
 // RUN: %clang -march=slm -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64
+// CHECK_SLM_M64: #define __AES__ 1
+// CHECK_SLM_M64: #define __FXSR__ 1
 // CHECK_SLM_M64: #define __MMX__ 1
+// CHECK_SLM_M64: #define __PCLMUL__ 1
+// CHECK_SLM_M64: #define __POPCNT__ 1
+// CHECK_SLM_M64: #define __PRFCHW__ 1
+// CHECK_SLM_M64: #define __RDRND__ 1
 // CHECK_SLM_M64: #define __SSE2_MATH__ 1
 // CHECK_SLM_M64: #define __SSE2__ 1
 // CHECK_SLM_M64: #define __SSE3__ 1
@@ -1154,15 +1313,19 @@
 //
 // RUN: %clang -march=lakemont -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck %s -check-prefix=CHECK_LMT_M32
-// CHECK_LMT_M32: #define __i386 1
-// CHECK_LMT_M32: #define __i386__ 1
-// CHECK_LMT_M32: #define __tune_lakemont__ 1
-// CHECK_LMT_M32: #define i386 1
+// RUN:   | FileCheck %s -check-prefix=CHECK_LAKEMONT_M32
+// CHECK_LAKEMONT_M32: #define __i386 1
+// CHECK_LAKEMONT_M32: #define __i386__ 1
+// CHECK_LAKEMONT_M32: #define __i586 1
+// CHECK_LAKEMONT_M32: #define __i586__ 1
+// CHECK_LAKEMONT_M32: #define __pentium 1
+// CHECK_LAKEMONT_M32: #define __pentium__ 1
+// CHECK_LAKEMONT_M32: #define __tune_lakemont__ 1
+// CHECK_LAKEMONT_M32: #define i386 1
 // RUN: not %clang -march=lakemont -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck %s -check-prefix=CHECK_LMT_M64
-// CHECK_LMT_M64: error:
+// RUN:   | FileCheck %s -check-prefix=CHECK_LAKEMONT_M64
+// CHECK_LAKEMONT_M64: error:
 //
 // RUN: %clang -march=geode -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
@@ -2238,8 +2401,12 @@
 // CHECK_MYRIAD2: #define __leon__ 1
 // CHECK_MYRIAD2-1: #define __myriad2 1
 // CHECK_MYRIAD2-1: #define __myriad2__ 1
+// CHECK_MYRIAD2-2: #define __ma2x5x 1
+// CHECK_MYRIAD2-2: #define __ma2x5x__ 1
 // CHECK_MYRIAD2-2: #define __myriad2 2
 // CHECK_MYRIAD2-2: #define __myriad2__ 2
+// CHECK_MYRIAD2-3: #define __ma2x8x 1
+// CHECK_MYRIAD2-3: #define __ma2x8x__ 1
 // CHECK_MYRIAD2-3: #define __myriad2 3
 // CHECK_MYRIAD2-3: #define __myriad2__ 3
 // CHECK_SPARCEL: #define __sparc 1
diff --git a/test/Preprocessor/predefined-macros.c b/test/Preprocessor/predefined-macros.c
index 94b0b30..def1ef9 100644
--- a/test/Preprocessor/predefined-macros.c
+++ b/test/Preprocessor/predefined-macros.c
@@ -193,19 +193,81 @@
 // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
 // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
 
+// RUN: %clang_cc1 -triple i386-windows %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-X86-WIN
+
+// CHECK-X86-WIN-NOT: #define WIN32 1
+// CHECK-X86-WIN-NOT: #define WIN64 1
+// CHECK-X86-WIN-NOT: #define WINNT 1
+// CHECK-X86-WIN: #define _WIN32 1
+// CHECK-X86-WIN-NOT: #define _WIN64 1
+
+// RUN: %clang_cc1 -triple thumbv7-windows %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM-WIN
+
+// CHECK-ARM-WIN-NOT: #define WIN32 1
+// CHECK-ARM-WIN-NOT: #define WIN64 1
+// CHECK-ARM-WIN-NOT: #define WINNT 1
+// CHECK-ARM-WIN: #define _WIN32 1
+// CHECK-ARM-WIN-NOT: #define _WIN64 1
+
+// RUN: %clang_cc1 -triple x86_64-windows %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-AMD64-WIN
+
+// CHECK-AMD64-WIN-NOT: #define WIN32 1
+// CHECK-AMD64-WIN-NOT: #define WIN64 1
+// CHECK-AMD64-WIN-NOT: #define WINNT 1
+// CHECK-AMD64-WIN: #define _WIN32 1
+// CHECK-AMD64-WIN: #define _WIN64 1
+
 // RUN: %clang_cc1 -triple aarch64-windows %s -E -dM -o - \
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM64-WIN
 
+// CHECK-ARM64-WIN-NOT: #define WIN32 1
+// CHECK-ARM64-WIN-NOT: #define WIN64 1
+// CHECK-ARM64-WIN-NOT: #define WINNT 1
 // CHECK-ARM64-WIN: #define _M_ARM64 1
 // CHECK-ARM64-WIN: #define _WIN32 1
 // CHECK-ARM64-WIN: #define _WIN64 1
 
+// RUN: %clang_cc1 -triple i686-windows-gnu %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-X86-MINGW
+
+// CHECK-X86-MINGW: #define WIN32 1
+// CHECK-X86-MINGW-NOT: #define WIN64 1
+// CHECK-X86-MINGW: #define WINNT 1
+// CHECK-X86-MINGW: #define _WIN32 1
+// CHECK-X86-MINGW-NOT: #define _WIN64 1
+
+// RUN: %clang_cc1 -triple thumbv7-windows-gnu %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM-MINGW
+
+// CHECK-ARM-MINGW: #define WIN32 1
+// CHECK-ARM-MINGW-NOT: #define WIN64 1
+// CHECK-ARM-MINGW: #define WINNT 1
+// CHECK-ARM-MINGW: #define _WIN32 1
+// CHECK-ARM-MINGW-NOT: #define _WIN64 1
+
+// RUN: %clang_cc1 -triple x86_64-windows-gnu %s -E -dM -o - \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-AMD64-MINGW
+
+// CHECK-AMD64-MINGW: #define WIN32 1
+// CHECK-AMD64-MINGW: #define WIN64 1
+// CHECK-AMD64-MINGW: #define WINNT 1
+// CHECK-AMD64-MINGW: #define _WIN32 1
+// CHECK-AMD64-MINGW: #define _WIN64 1
+
 // RUN: %clang_cc1 -triple aarch64-windows-gnu %s -E -dM -o - \
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM64-MINGW
 
 // CHECK-ARM64-MINGW-NOT: #define _M_ARM64 1
 // CHECK-ARM64-MINGW: #define WIN32 1
 // CHECK-ARM64-MINGW: #define WIN64 1
+// CHECK-ARM64-MINGW: #define WINNT 1
 // CHECK-ARM64-MINGW: #define _WIN32 1
 // CHECK-ARM64-MINGW: #define _WIN64 1
 // CHECK-ARM64-MINGW: #define __aarch64__ 1
+
+// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR
+// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index ce3835f..37a4ffd 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -209,11 +209,54 @@
 // AVX512VBMI: #define __SSE__ 1
 // AVX512VBMI: #define __SSSE3__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s
+
+// AVX512BITALG: #define __AVX2__ 1
+// AVX512BITALG: #define __AVX512BITALG__ 1
+// AVX512BITALG: #define __AVX512BW__ 1
+// AVX512BITALG: #define __AVX512F__ 1
+// AVX512BITALG: #define __AVX__ 1
+// AVX512BITALG: #define __SSE2_MATH__ 1
+// AVX512BITALG: #define __SSE2__ 1
+// AVX512BITALG: #define __SSE3__ 1
+// AVX512BITALG: #define __SSE4_1__ 1
+// AVX512BITALG: #define __SSE4_2__ 1
+// AVX512BITALG: #define __SSE_MATH__ 1
+// AVX512BITALG: #define __SSE__ 1
+// AVX512BITALG: #define __SSSE3__ 1
+
+
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s
 
 // AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1
 // AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMI2 %s
+
+// AVX512VBMI2: #define __AVX2__ 1
+// AVX512VBMI2: #define __AVX512BW__ 1
+// AVX512VBMI2: #define __AVX512F__ 1
+// AVX512VBMI2: #define __AVX512VBMI2__ 1
+// AVX512VBMI2: #define __AVX__ 1
+// AVX512VBMI2: #define __SSE2_MATH__ 1
+// AVX512VBMI2: #define __SSE2__ 1
+// AVX512VBMI2: #define __SSE3__ 1
+// AVX512VBMI2: #define __SSE4_1__ 1
+// AVX512VBMI2: #define __SSE4_2__ 1
+// AVX512VBMI2: #define __SSE_MATH__ 1
+// AVX512VBMI2: #define __SSE__ 1
+// AVX512VBMI2: #define __SSSE3__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi2 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMI2NOAVX512BW %s
+
+// AVX512VBMI2NOAVX512BW-NOT: #define __AVX512BW__ 1
+// AVX512VBMI2NOAVX512BW-NOT: #define __AVX512VBMI2__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s
+
+// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1
+// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1
+
 // RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s
 
 // SSE42POPCNT: #define __POPCNT__ 1
@@ -333,6 +376,14 @@
 
 // ADX: #define __ADX__ 1
 
+// RUN: %clang -target i386-unknown-unknown -mshstk -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SHSTK %s
+
+// SHSTK: #define __SHSTK__ 1
+
+// RUN: %clang -target i386-unknown-unknown -mibt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=IBT %s
+
+// IBT: #define __IBT__ 1
+
 // RUN: %clang -target i386-unknown-unknown -march=atom -mrdseed -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDSEED %s
 
 // RDSEED: #define __RDSEED__ 1
@@ -364,3 +415,31 @@
 // RUN: %clang -target i386-unknown-unknown -march=atom -mclflushopt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CLFLUSHOPT %s
 
 // CLFLUSHOPT: #define __CLFLUSHOPT__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAES %s
+
+// VAES: #define __AES__ 1
+// VAES: #define __VAES__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -mno-aes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAESNOAES %s
+
+// VAESNOAES-NOT: #define __AES__ 1
+// VAESNOAES-NOT: #define __VAES__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mgfni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=GFNI %s
+
+// GFNI: #define __GFNI__ 1
+// GFNI: #define __SSE2__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQ %s
+
+// VPCLMULQDQ: #define __PCLMUL__ 1
+// VPCLMULQDQ: #define __VPCLMULQDQ__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -mno-pclmul -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQNOPCLMUL %s
+// VPCLMULQDQNOPCLMUL-NOT: #define __PCLMUL__ 1
+// VPCLMULQDQNOPCLMUL-NOT: #define __VPCLMULQDQ__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mrdpid -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDPID %s
+
+// RDPID: #define __RDPID__ 1
diff --git a/test/Profile/Inputs/c-captured.proftext b/test/Profile/Inputs/c-captured.proftext
index a35e67b..e82d4f5 100644
--- a/test/Profile/Inputs/c-captured.proftext
+++ b/test/Profile/Inputs/c-captured.proftext
@@ -1,23 +1,23 @@
 c-captured.c:__captured_stmt
-10
+42129
 2
 1
 1
 
 c-captured.c:__captured_stmt.1
-266
+4752450705
 3
 1
 10
 1
 
 main
-0
+24
 1
 1
 
 debug_captured
-650
+11043906705
 3
 1
 1
diff --git a/test/Profile/Inputs/c-counter-overflows.proftext b/test/Profile/Inputs/c-counter-overflows.proftext
index 5a3633e..b2e5dd1 100644
--- a/test/Profile/Inputs/c-counter-overflows.proftext
+++ b/test/Profile/Inputs/c-counter-overflows.proftext
@@ -1,5 +1,5 @@
 main
-285734896137
+10111551811706059223
 8
 1
 68719476720
diff --git a/test/Profile/Inputs/c-general.proftext b/test/Profile/Inputs/c-general.proftext
index 19e5bd3..c0d03b4 100644
--- a/test/Profile/Inputs/c-general.proftext
+++ b/test/Profile/Inputs/c-general.proftext
@@ -1,5 +1,5 @@
 simple_loops
-16515
+1245818015463121
 4
 1
 100
@@ -7,7 +7,7 @@
 75
 
 conditionals
-74917022372782735
+4190663230902537370
 11
 1
 100
@@ -22,7 +22,7 @@
 100
 
 early_exits
-44128811889290
+8265526549255474475
 9
 1
 0
@@ -35,7 +35,7 @@
 0
 
 jumps
-2016037664281362839
+15872630527555456493
 22
 1
 1
@@ -61,7 +61,7 @@
 9
 
 switches
-2745195701975551402
+11892326508727782373
 19
 1
 1
@@ -84,7 +84,7 @@
 0
 
 big_switch
-10218718452081869619
+16933280399284440835
 17
 1
 32
@@ -105,7 +105,7 @@
 2
 
 boolean_operators
-291222909838
+1245693242827665
 8
 1
 100
@@ -117,7 +117,7 @@
 50
 
 boolop_loops
-9760565944591
+11270260636676715317
 9
 1
 50
@@ -130,14 +130,14 @@
 26
 
 conditional_operator
-848
+54992
 3
 1
 0
 1
 
 do_fallthrough
-16586
+6898770640283947069
 4
 1
 10
@@ -145,12 +145,12 @@
 8
 
 main
-0
+24
 1
 1
 
 c-general.c:static_func
-4
+18129
 2
 1
 10
diff --git a/test/Profile/Inputs/c-unprofiled-blocks.proftext b/test/Profile/Inputs/c-unprofiled-blocks.proftext
index 87b48e1..ef7f653 100644
--- a/test/Profile/Inputs/c-unprofiled-blocks.proftext
+++ b/test/Profile/Inputs/c-unprofiled-blocks.proftext
@@ -1,5 +1,5 @@
 never_called
-44257542701577
+5644096560937528444
 9
 0
 0
@@ -12,12 +12,12 @@
 0
 
 main
-1
+24
 1
 1
 
 dead_code
-2859007309808137
+9636018207904213947
 10
 1
 0
diff --git a/test/Profile/Inputs/cxx-class.proftext b/test/Profile/Inputs/cxx-class.proftext
index 77645fb..1122006 100644
--- a/test/Profile/Inputs/cxx-class.proftext
+++ b/test/Profile/Inputs/cxx-class.proftext
@@ -1,52 +1,52 @@
 _Z14simple_wrapperv
-4
+18129
 2
 1
 100
 
 main
-0
+24
 1
 1
 
 _ZN6SimpleD1Ev
-10
+42129
 2
 0
 0
 
 _ZN6SimpleD2Ev
-10
+42129
 2
 100
 99
 
 _ZN6Simple6methodEv
-10
+42129
 2
 100
 99
 
 _ZN6SimpleC1Ei
-10
+42129
 2
 0
 0
 
 _ZN6SimpleC2Ei
-10
+42129
 2
 100
 99
 
 _ZN7DerivedC1Ev
-10
+42129
 2
 100
 99
 
 _ZN7DerivedD2Ev
-10
+42129
 2
 100
 99
diff --git a/test/Profile/Inputs/cxx-hash-v2.profdata.v5 b/test/Profile/Inputs/cxx-hash-v2.profdata.v5
new file mode 100644
index 0000000..5d7fb1f
--- /dev/null
+++ b/test/Profile/Inputs/cxx-hash-v2.profdata.v5
Binary files differ
diff --git a/test/Profile/Inputs/cxx-hash-v2.proftext b/test/Profile/Inputs/cxx-hash-v2.proftext
new file mode 100644
index 0000000..65c5645
--- /dev/null
+++ b/test/Profile/Inputs/cxx-hash-v2.proftext
@@ -0,0 +1,239 @@
+_Z11loop_returnv
+# Func Hash:
+1160721
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z10loop_breakv
+# Func Hash:
+1160593
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z8no_gotosv
+# Func Hash:
+1
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z13loop_continuev
+# Func Hash:
+1160657
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z18loop_after_if_elsev
+# Func Hash:
+11044458641
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z21consecutive_try_catchv
+# Func Hash:
+49221687100497
+# Num Counters:
+5
+# Counter Values:
+0
+0
+0
+0
+0
+
+_Z16nested_try_catchv
+# Func Hash:
+49147600487505
+# Num Counters:
+5
+# Counter Values:
+0
+0
+0
+0
+0
+
+_Z13indirect_gotov
+# Func Hash:
+1345
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z17nested_for_rangesv
+# Func Hash:
+1332305
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z18loop_in_then_blockv
+# Func Hash:
+11040003281
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z22consecutive_for_rangesv
+# Func Hash:
+1380689
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z15consecutive_dosv
+# Func Hash:
+856273
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z11direct_gotov
+# Func Hash:
+1281
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+main
+# Func Hash:
+24
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z11double_lnotv
+# Func Hash:
+174695569
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z18if_inside_of_whilev
+# Func Hash:
+36250705
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z11single_lnotv
+# Func Hash:
+2729105
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+_Z19if_outside_of_whilev
+# Func Hash:
+38053009
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z8no_throwv
+# Func Hash:
+0
+# Num Counters:
+1
+# Counter Values:
+0
+
+_Z9has_throwv
+# Func Hash:
+25
+# Num Counters:
+1
+# Counter Values:
+0
+
+_Z10nested_dosv
+# Func Hash:
+799825
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z16if_inside_of_forv
+# Func Hash:
+4750648401
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z18loop_in_else_blockv
+# Func Hash:
+11044398161
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z17if_outside_of_forv
+# Func Hash:
+4752450705
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+
+_Z10loop_emptyv
+# Func Hash:
+18129
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
diff --git a/test/Profile/Inputs/cxx-lambda.proftext b/test/Profile/Inputs/cxx-lambda.proftext
index 36646b5..e49cd8d 100644
--- a/test/Profile/Inputs/cxx-lambda.proftext
+++ b/test/Profile/Inputs/cxx-lambda.proftext
@@ -1,17 +1,17 @@
 cxx-lambda.cpp:_ZZ7lambdasvENK3$_0clEi
-654
+11211970062
 3
 10
 9
 9
 
 main
-0
+24
 1
 1
 
 _Z7lambdasv
-41226
+2895087587861649
 4
 1
 1
diff --git a/test/Profile/Inputs/cxx-rangefor.proftext b/test/Profile/Inputs/cxx-rangefor.proftext
index 7d2d1ef..b597292 100644
--- a/test/Profile/Inputs/cxx-rangefor.proftext
+++ b/test/Profile/Inputs/cxx-rangefor.proftext
@@ -1,5 +1,5 @@
 _Z9range_forv
-0x000000000014a28a
+6169071350249721981
 5
 1
 4
@@ -8,6 +8,6 @@
 1
 
 main
-0
+24
 1
 1
diff --git a/test/Profile/Inputs/cxx-templates.proftext b/test/Profile/Inputs/cxx-templates.proftext
index 5ea8400..5b9d8e4 100644
--- a/test/Profile/Inputs/cxx-templates.proftext
+++ b/test/Profile/Inputs/cxx-templates.proftext
@@ -1,16 +1,16 @@
 main
-0
+24
 1
 1
 
 _Z4loopILj0EEvv
-4
+18129
 2
 1
 0
 
 _Z4loopILj100EEvv
-4
+18129
 2
 1
 100
diff --git a/test/Profile/Inputs/cxx-throws.proftext b/test/Profile/Inputs/cxx-throws.proftext
index 1d197b9..32fcf5d 100644
--- a/test/Profile/Inputs/cxx-throws.proftext
+++ b/test/Profile/Inputs/cxx-throws.proftext
@@ -1,5 +1,5 @@
 _Z6throwsv
-18359008150154
+340120998528097520
 9
 1
 100
@@ -12,14 +12,14 @@
 100
 
 _Z11unreachablei
-0x28a
+706946049169
 3
 1
 1
 0
 
 main
-0x2cc
+187765848
 3
 1
 1
diff --git a/test/Profile/Inputs/func-entry.proftext b/test/Profile/Inputs/func-entry.proftext
index f7c2052..1548c2d 100644
--- a/test/Profile/Inputs/func-entry.proftext
+++ b/test/Profile/Inputs/func-entry.proftext
@@ -1,10 +1,10 @@
 foo
-0
+24
 1
 1000
 
 main
-4
+1160280
 2
 1
 10000
diff --git a/test/Profile/Inputs/gcc-flag-compatibility.proftext b/test/Profile/Inputs/gcc-flag-compatibility.proftext
index 99d41bb..38eb9e5 100644
--- a/test/Profile/Inputs/gcc-flag-compatibility.proftext
+++ b/test/Profile/Inputs/gcc-flag-compatibility.proftext
@@ -1,5 +1,5 @@
 main
-4
+1160280
 2
 1
 100
diff --git a/test/Profile/Inputs/objc-general.proftext b/test/Profile/Inputs/objc-general.proftext
index 8d6771f..f0034ae 100644
--- a/test/Profile/Inputs/objc-general.proftext
+++ b/test/Profile/Inputs/objc-general.proftext
@@ -1,17 +1,30 @@
 objc-general.m:__13+[A foreach:]_block_invoke
-10
+42129
 2
 2
 1
 
 objc-general.m:+[A foreach:]
-6
+401
 2
 1
 2
 
 main
-0
+24
 1
 1
 
+consecutive_objc_for_ranges
+1642897
+3
+0
+0
+0
+
+nested_objc_for_ranges
+1598545
+3
+0
+0
+0
diff --git a/test/Profile/c-outdated-data.c b/test/Profile/c-outdated-data.c
index b686f94..2b97734 100644
--- a/test/Profile/c-outdated-data.c
+++ b/test/Profile/c-outdated-data.c
@@ -7,10 +7,10 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-outdated-data.c %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -check-prefix=NO_MISSING
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-outdated-data.c %s -o /dev/null -emit-llvm -Wprofile-instr-missing -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -check-prefix=WITH_MISSING
 
-// NO_MISSING: warning: profile data may be out of date: of 3 functions, 1 has mismatched data that will be ignored
+// NO_MISSING: warning: profile data may be out of date: of 3 functions, 2 have mismatched data that will be ignored
 // NO_MISSING-NOT: 1 has no data
 
-// WITH_MISSING: warning: profile data may be out of date: of 3 functions, 1 has mismatched data that will be ignored
+// WITH_MISSING: warning: profile data may be out of date: of 3 functions, 2 have mismatched data that will be ignored
 // WITH_MISSING: warning: profile data may be incomplete: of 3 functions, 1 has no data
 
 void no_usable_data() {
diff --git a/test/Profile/cxx-hash-v2.cpp b/test/Profile/cxx-hash-v2.cpp
new file mode 100644
index 0000000..995fe00
--- /dev/null
+++ b/test/Profile/cxx-hash-v2.cpp
@@ -0,0 +1,177 @@
+// REQUIRES: shell
+
+// Check that all of the hashes in this file are unique (i.e, that none of the
+// profiles for these functions are mutually interchangeable).
+//
+// RUN: llvm-profdata show -all-functions %S/Inputs/cxx-hash-v2.profdata.v5 | grep "Hash: 0x" | sort > %t.hashes
+// RUN: uniq %t.hashes > %t.hashes.unique
+// RUN: diff %t.hashes %t.hashes.unique
+
+// RUN: llvm-profdata merge %S/Inputs/cxx-hash-v2.proftext -o %t.profdata
+// RUN: %clang_cc1 -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-apple-macosx10.9 -main-file-name cxx-hash-v2.mm %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck %s -allow-empty
+// RUN: %clang_cc1 -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-apple-macosx10.9 -main-file-name cxx-hash-v2.mm %s -o /dev/null -emit-llvm -fprofile-instrument-use-path=%S/Inputs/cxx-hash-v2.profdata.v5 2>&1 | FileCheck %s -allow-empty
+
+// CHECK-NOT: warning: profile data may be out of date
+
+int x;
+int arr[1] = {0};
+
+void loop_after_if_else() {
+  if (1)
+    x = 1;
+  else
+    x = 2;
+  while (0)
+    ++x;
+}
+
+void loop_in_then_block() {
+  if (1) {
+    while (0)
+      ++x;
+  } else {
+    x = 2;
+  }
+}
+
+void loop_in_else_block() {
+  if (1) {
+    x = 1;
+  } else {
+    while (0)
+      ++x;
+  }
+}
+
+void if_inside_of_for() {
+  for (x = 0; x < 0; ++x) {
+    x = 1;
+    if (1)
+      x = 2;
+  }
+}
+
+void if_outside_of_for() {
+  for (x = 0; x < 0; ++x)
+    x = 1;
+  if (1)
+    x = 2;
+}
+
+void if_inside_of_while() {
+  while (0) {
+    x = 1;
+    if (1)
+      x = 2;
+  }
+}
+
+void if_outside_of_while() {
+  while (0)
+    x = 1;
+  if (1)
+    x = 2;
+}
+
+void nested_dos() {
+  do {
+    do {
+      ++x;
+    } while (0);
+  } while (0);
+}
+
+void consecutive_dos() {
+  do {
+  } while (0);
+  do {
+    ++x;
+  } while (0);
+}
+
+void loop_empty() {
+  for (x = 0; x < 5; ++x) {}
+}
+
+void loop_return() {
+  for (x = 0; x < 5; ++x)
+    return;
+}
+
+void loop_continue() {
+  for (x = 0; x < 5; ++x)
+    continue;
+}
+
+void loop_break() {
+  for (x = 0; x < 5; ++x)
+    break;
+}
+
+void no_gotos() {
+  static void *dispatch[] = {&&done};
+  x = 0;
+done:
+  ++x;
+}
+
+void direct_goto() {
+  static void *dispatch[] = {&&done};
+  x = 0;
+  goto done;
+done:
+  ++x;
+}
+
+void indirect_goto() {
+  static void *dispatch[] = {&&done};
+  x = 0;
+  goto *dispatch[x];
+done:
+  ++x;
+}
+
+void nested_for_ranges() {
+  for (int a : arr)
+    for (int b : arr)
+      ++x;
+}
+
+void consecutive_for_ranges() {
+  for (int a : arr) {}
+  for (int b : arr)
+    ++x;
+}
+
+void nested_try_catch() {
+  try {
+    try {
+      ++x;
+    } catch (...) {}
+  } catch (...) {}
+}
+
+void consecutive_try_catch() {
+  try {} catch (...) {}
+  try {
+    ++x;
+  } catch (...) {}
+}
+
+void no_throw() {}
+
+void has_throw() {
+  throw 0;
+}
+
+void single_lnot() {
+  if (!x) {}
+}
+
+void double_lnot() {
+  if (!!x) {}
+}
+
+int main() {
+  return 0;
+}
diff --git a/test/Profile/objc-general.m b/test/Profile/objc-general.m
index b679627..7a4cac9 100644
--- a/test/Profile/objc-general.m
+++ b/test/Profile/objc-general.m
@@ -3,7 +3,9 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument=clang | FileCheck -check-prefix=PGOGEN %s
 
 // RUN: llvm-profdata merge %S/Inputs/objc-general.proftext -o %t.profdata
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument-use-path=%t.profdata | FileCheck -check-prefix=PGOUSE %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument-use-path=%t.profdata 2>&1 | FileCheck -check-prefix=PGOUSE %s
+
+// PGOUSE-NOT: warning: profile data may be out of date
 
 #ifdef HAVE_FOUNDATION
 
@@ -63,6 +65,20 @@
 }
 @end
 
+void nested_objc_for_ranges(NSArray *arr) {
+  int x = 0;
+  for (id a in arr)
+    for (id b in arr)
+      ++x;
+}
+
+void consecutive_objc_for_ranges(NSArray *arr) {
+  int x = 0;
+  for (id a in arr) {}
+  for (id b in arr)
+    ++x;
+}
+
 // PGOUSE-DAG: ![[FR1]] = !{!"branch_weights", i32 2, i32 3}
 // PGOUSE-DAG: ![[FR2]] = !{!"branch_weights", i32 3, i32 2}
 // PGOUSE-DAG: ![[BL1]] = !{!"branch_weights", i32 2, i32 2}
diff --git a/test/Refactor/Extract/ExtractExprIntoFunction.cpp b/test/Refactor/Extract/ExtractExprIntoFunction.cpp
new file mode 100644
index 0000000..0fccc92
--- /dev/null
+++ b/test/Refactor/Extract/ExtractExprIntoFunction.cpp
@@ -0,0 +1,70 @@
+// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++14 2>&1 | grep -v CHECK | FileCheck %s
+
+
+void simpleExtractNoCaptures() {
+  int i = /*range=->+0:33*/1 + 2;
+}
+
+// CHECK: 1 '' results:
+// CHECK:      static int extracted() {
+// CHECK-NEXT: return 1 + 2;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void simpleExtractNoCaptures() {
+// CHECK-NEXT:   int i = /*range=->+0:33*/extracted();{{$}}
+// CHECK-NEXT: }
+
+void simpleExtractStmtNoCaptures() {
+  /*range astatement=->+1:13*/int a = 1;
+  int b = 2;
+}
+// CHECK: 1 'astatement' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: int a = 1;
+// CHECK-NEXT: int b = 2;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void simpleExtractStmtNoCaptures() {
+// CHECK-NEXT:   /*range astatement=->+1:13*/extracted();{{$}}
+// CHECK-NEXT: }
+
+
+void blankRangeNoExtraction() {
+  int i = /*range blank=*/1 + 2;
+}
+
+// CHECK: 1 'blank' results:
+// CHECK-NEXT: the provided selection does not overlap with the AST nodes of interest
+
+int outOfBodyCodeNoExtraction = /*range out_of_body_expr=->+0:72*/1 + 2;
+
+struct OutOfBodyStuff {
+  int FieldInit = /*range out_of_body_expr=->+0:58*/1 + 2;
+
+  void foo(int x =/*range out_of_body_expr=->+0:58*/1 + 2);
+};
+
+auto inFunctionOutOfBody() -> decltype(/*range out_of_body_expr=->+0:79*/1 + 2) {
+  struct OutOfBodyStuff {
+    int FieldInit = /*range out_of_body_expr=->+0:60*/1 + 2;
+
+    void foo(int x =/*range out_of_body_expr=->+0:60*/1 + 2);
+  };
+  enum E {
+    X = /*range out_of_body_expr=->+0:48*/1 + 2
+  };
+  int x = 0;
+  using T = decltype(/*range out_of_body_expr=->+0:61*/x + 3);
+  return x;
+}
+
+// CHECK: 8 'out_of_body_expr' results:
+// CHECK: the selected code is not a part of a function's / method's body
+
+void simpleExpressionNoExtraction() {
+  int i = /*range simple_expr=->+0:41*/1 + /*range simple_expr=->+0:76*/(2);
+  (void) /*range simple_expr=->+0:40*/i;
+  (void)/*range simple_expr=->+0:47*/"literal";
+  (void)/*range simple_expr=->+0:41*/'c';
+}
+
+// CHECK: 5 'simple_expr' results:
+// CHECK-NEXT: the selected expression is too simple to extract
diff --git a/test/Refactor/Extract/ExtractionSemicolonPolicy.cpp b/test/Refactor/Extract/ExtractionSemicolonPolicy.cpp
new file mode 100644
index 0000000..5caf9d4
--- /dev/null
+++ b/test/Refactor/Extract/ExtractionSemicolonPolicy.cpp
@@ -0,0 +1,192 @@
+// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++11 -fcxx-exceptions | grep -v CHECK | FileCheck %s
+
+struct Rectangle { int width, height; };
+
+void extractStatement(const Rectangle &r) {
+  /*range adeclstmt=->+0:59*/int area = r.width * r.height;
+}
+// CHECK: 1 'adeclstmt' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: int area = r.width * r.height;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatement(const Rectangle &r) {
+// CHECK-NEXT:   /*range adeclstmt=->+0:59*/extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractStatementNoSemiIf(const Rectangle &r) {
+  /*range bextractif=->+2:4*/if (r.width) {
+    int x = r.height;
+  }
+}
+// CHECK: 1 'bextractif' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: if (r.width) {
+// CHECK-NEXT: int x = r.height;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNoSemiIf(const Rectangle &r) {
+// CHECK-NEXT:   /*range bextractif=->+2:4*/extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractStatementDontExtraneousSemi(const Rectangle &r) {
+  /*range cextractif=->+2:4*/if (r.width) {
+    int x = r.height;
+  } ;
+} //^ This semicolon shouldn't be extracted.
+// CHECK: 1 'cextractif' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: if (r.width) {
+// CHECK-NEXT: int x = r.height;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementDontExtraneousSemi(const Rectangle &r) {
+// CHECK-NEXT: extracted(); ;{{$}}
+// CHECK-NEXT: }
+
+void extractStatementNotSemiSwitch() {
+  /*range dextract=->+5:4*/switch (2) {
+  case 1:
+    break;
+  case 2:
+    break;
+  }
+}
+// CHECK: 1 'dextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: switch (2) {
+// CHECK-NEXT: case 1:
+// CHECK-NEXT:   break;
+// CHECK-NEXT: case 2:
+// CHECK-NEXT:   break;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNotSemiSwitch() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractStatementNotSemiWhile() {
+  /*range eextract=->+2:4*/while (true) {
+    int x = 0;
+  }
+}
+// CHECK: 1 'eextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: while (true) {
+// CHECK-NEXT: int x = 0;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNotSemiWhile() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractStatementNotSemiFor() {
+  /*range fextract=->+1:4*/for (int i = 0; i < 10; ++i) {
+  }
+}
+// CHECK: 1 'fextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: for (int i = 0; i < 10; ++i) {
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNotSemiFor() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+struct XS {
+  int *begin() { return 0; }
+  int *end() { return 0; }
+};
+
+void extractStatementNotSemiRangedFor(XS xs) {
+  /*range gextract=->+1:4*/for (int i : xs) {
+  }
+}
+// CHECK: 1 'gextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: for (int i : xs) {
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNotSemiRangedFor(XS xs) {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractStatementNotSemiRangedTryCatch() {
+  /*range hextract=->+3:4*/try { int x = 0; }
+  catch (const int &i) {
+    int y = i;
+  }
+}
+// CHECK: 1 'hextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: try { int x = 0; }
+// CHECK-NEXT: catch (const int &i) {
+// CHECK-NEXT:   int y = i;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractStatementNotSemiRangedTryCatch() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void extractCantFindSemicolon() {
+  /*range iextract=->+1:17*/do {
+  } while (true)
+  // Add a semicolon in both the extracted and original function as we don't
+  // want to extract the semicolon below.
+  ;
+}
+// CHECK: 1 'iextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: do {
+// CHECK-NEXT: } while (true);{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractCantFindSemicolon() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: //
+// CHECK-NEXT: //
+// CHECK-NEXT: ;
+// CHECK-NEXT: }
+
+void extractFindSemicolon() {
+  /*range jextract=->+1:17*/do {
+  } while (true) /*grab*/ ;
+}
+// CHECK: 1 'jextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: do {
+// CHECK-NEXT: } while (true) /*grab*/ ;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void extractFindSemicolon() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void call();
+
+void careForNonCompoundSemicolons1() {
+  /*range kextract=->+1:11*/if (true)
+    call();
+}
+// CHECK: 1 'kextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: if (true)
+// CHECK-NEXT: call();{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void careForNonCompoundSemicolons1() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: }
+
+void careForNonCompoundSemicolons2() {
+  /*range lextract=->+3:1*/for (int i = 0; i < 10; ++i)
+    while (i != 0)
+      ;
+  // end right here111!
+}
+// CHECK: 1 'lextract' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: while (i != 0)
+// CHECK-NEXT:   ;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: void careForNonCompoundSemicolons2() {
+// CHECK-NEXT: extracted();{{$}}
+// CHECK-NEXT: //
+// CHECK-NEXT: }
diff --git a/test/Refactor/Extract/ExtractionSemicolonPolicy.m b/test/Refactor/Extract/ExtractionSemicolonPolicy.m
new file mode 100644
index 0000000..10e6a16
--- /dev/null
+++ b/test/Refactor/Extract/ExtractionSemicolonPolicy.m
@@ -0,0 +1,56 @@
+// RUN: clang-refactor extract -selection=test:%s %s -- 2>&1 | grep -v CHECK | FileCheck %s
+
+@interface NSArray
++ (id)arrayWithObjects:(const id [])objects count:(unsigned long)cnt;
+@end
+
+void extractStatementNoSemiObjCFor(NSArray *array) {
+  /*range astmt=->+2:4*/for (id i in array) {
+    int x = 0;
+  }
+}
+// CHECK: 1 'astmt' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: for (id i in array) {
+// CHECK-NEXT: int x = 0;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+
+void extractStatementNoSemiSync() {
+  id lock;
+  /*range bstmt=->+2:4*/@synchronized(lock) {
+    int x = 0;
+  }
+}
+// CHECK: 1 'bstmt' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: @synchronized(lock) {
+// CHECK-NEXT: int x = 0;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+
+void extractStatementNoSemiAutorel() {
+  /*range cstmt=->+2:4*/@autoreleasepool {
+    int x = 0;
+  }
+}
+// CHECK: 1 'cstmt' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: @autoreleasepool {
+// CHECK-NEXT: int x = 0;
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+
+void extractStatementNoSemiTryFinalllllly() {
+  /*range dstmt=->+3:4*/@try {
+    int x = 0;
+  } @finally {
+  }
+}
+// CHECK: 1 'dstmt' results:
+// CHECK:      static void extracted() {
+// CHECK-NEXT: @try {
+// CHECK-NEXT: int x = 0;
+// CHECK-NEXT: } @finally {
+// CHECK-NEXT: }{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
diff --git a/test/Refactor/Extract/FromMethodToFunction.cpp b/test/Refactor/Extract/FromMethodToFunction.cpp
new file mode 100644
index 0000000..86bec16
--- /dev/null
+++ b/test/Refactor/Extract/FromMethodToFunction.cpp
@@ -0,0 +1,42 @@
+// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++11 2>&1 | grep -v CHECK | FileCheck --check-prefixes=CHECK,CHECK-INNER %s
+// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++11 -DMULTIPLE 2>&1 | grep -v CHECK | FileCheck --check-prefixes=CHECK,CHECK-OUTER %s
+
+#ifdef MULTIPLE
+class OuterClass {
+#define PREFIX OuterClass ::
+#else
+#define PREFIX
+#endif
+
+class AClass {
+
+  int method(int x) {
+    return /*range inner=->+0:38*/1 + 2 * 2;
+  }
+// CHECK-INNER: 1 'inner' results:
+// CHECK-INNER:      static int extracted() {
+// CHECK-INNER-NEXT: return 1 + 2 * 2;{{$}}
+// CHECK-INNER-NEXT: }{{[[:space:]].*}}
+// CHECK-INNER-NEXT: class AClass {
+
+// CHECK-OUTER: 1 'inner' results:
+// CHECK-OUTER:      static int extracted() {
+// CHECK-OUTER-NEXT: return 1 + 2 * 2;{{$}}
+// CHECK-OUTER-NEXT: }{{[[:space:]].*}}
+// CHECK-OUTER-NEXT: class OuterClass {
+
+  int otherMethod(int x);
+};
+
+#ifdef MULTIPLE
+};
+#endif
+
+int PREFIX AClass::otherMethod(int x) {
+  return /*range outofline=->+0:46*/2 * 2 - 1;
+}
+// CHECK: 1 'outofline' results:
+// CHECK:      static int extracted() {
+// CHECK-NEXT: return 2 * 2 - 1;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: int PREFIX AClass::otherMethod
diff --git a/test/Refactor/Extract/ObjCProperty.m b/test/Refactor/Extract/ObjCProperty.m
new file mode 100644
index 0000000..152ccb3
--- /dev/null
+++ b/test/Refactor/Extract/ObjCProperty.m
@@ -0,0 +1,41 @@
+// RUN: clang-refactor extract -selection=test:%s %s -- 2>&1 | grep -v CHECK | FileCheck %s
+
+@interface HasProperty
+
+@property (strong) HasProperty *item;
+
+- (HasProperty *)implicitProp;
+
+- (void)setImplicitSetter:(HasProperty *)value;
+
+@end
+
+@implementation HasProperty
+
+- (void)allowGetterExtraction {
+  /*range allow_getter=->+0:42*/self.item;
+  /*range allow_imp_getter=->+0:54*/self.implicitProp;
+}
+// CHECK: 1 'allow_getter' results:
+// CHECK:      extracted() {
+// CHECK-NEXT: return self.item;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: - (void)allowGetterExtraction {
+// CHECK-NEXT: extracted();
+
+// CHECK: 1 'allow_imp_getter' results:
+// CHECK:      extracted() {
+// CHECK-NEXT: return self.implicitProp;{{$}}
+// CHECK-NEXT: }{{[[:space:]].*}}
+// CHECK-NEXT: - (void)allowGetterExtraction {
+// CHECK-NEXT: self.item;
+// CHECK-NEXT: extracted();
+
+- (void)prohibitSetterExtraction {
+  /*range prohibit_setter=->+0:45*/self.item = 0;
+  /*range prohibit_setter=->+0:55*/self.implicitSetter = 0;
+}
+// CHECK: 2 'prohibit_setter' results:
+// CHECK: the selected expression can't be extracted
+
+@end
diff --git a/test/Refactor/LocalRename/BuiltinOffsetof.cpp b/test/Refactor/LocalRename/BuiltinOffsetof.cpp
new file mode 100644
index 0000000..3119eeb
--- /dev/null
+++ b/test/Refactor/LocalRename/BuiltinOffsetof.cpp
@@ -0,0 +1,32 @@
+// RUN: clang-refactor local-rename -selection=test:%s -new-name=bar %s -- | grep -v CHECK | FileCheck %s
+
+struct Struct {
+  int /*range f=*/field;
+};
+
+struct Struct2 {
+  Struct /*range array=*/array[4][2];
+};
+
+void foo() {
+  (void)__builtin_offsetof(Struct, /*range f=*/field);
+  (void)__builtin_offsetof(Struct2, /*range array=*/array[1][0]./*range f=*/field);
+}
+
+#define OFFSET_OF_(X, Y) __builtin_offsetof(X, Y)
+
+class SubclassOffsetof : public Struct {
+  void foo() {
+    (void)OFFSET_OF_(SubclassOffsetof, field);
+  }
+};
+
+// CHECK: 2 'array' results:
+// CHECK: Struct /*range array=*/bar[4][2];
+// CHECK: __builtin_offsetof(Struct2, /*range array=*/bar[1][0]./*range f=*/field);
+
+// CHECK: 3 'f' results:
+// CHECK: int /*range f=*/bar;
+// CHECK: __builtin_offsetof(Struct, /*range f=*/bar);
+// CHECK-NEXT: __builtin_offsetof(Struct2, /*range array=*/array[1][0]./*range f=*/bar);
+// CHECK: OFFSET_OF_(SubclassOffsetof, bar);
diff --git a/test/Refactor/LocalRename/Field.cpp b/test/Refactor/LocalRename/Field.cpp
index 83ca2e5..e674401 100644
--- a/test/Refactor/LocalRename/Field.cpp
+++ b/test/Refactor/LocalRename/Field.cpp
@@ -1,9 +1,11 @@
-// RUN: clang-refactor local-rename -selection=test:%s -new-name=Bar %s -- | FileCheck %s
+// RUN: clang-refactor local-rename -selection=test:%s -new-name=Bar %s -- | grep -v CHECK | FileCheck %s
 
 class Baz {
-  int /*range=*/Foo; // CHECK: int /*range=*/Bar;
+  int /*range=*/Foo;
+  // CHECK: int /*range=*/Bar;
 public:
   Baz();
 };
 
-Baz::Baz() : /*range=*/Foo(0) {}  // CHECK: Baz::Baz() : /*range=*/Bar(0) {};
+Baz::Baz() : /*range=*/Foo(0) {}
+// CHECK: Baz::Baz() : /*range=*/Bar(0) {}
diff --git a/test/Refactor/LocalRename/NoSymbolSelectedError.cpp b/test/Refactor/LocalRename/NoSymbolSelectedError.cpp
index b6e96e1..98de61a 100644
--- a/test/Refactor/LocalRename/NoSymbolSelectedError.cpp
+++ b/test/Refactor/LocalRename/NoSymbolSelectedError.cpp
@@ -1,5 +1,5 @@
-// RUN: not clang-refactor local-rename -selection=%s:4:1 -new-name=Bar %s -- 2>&1 | FileCheck %s
-// RUN: clang-refactor local-rename -selection=test:%s -new-name=Bar %s -- 2>&1 | FileCheck --check-prefix=TESTCHECK %s
+// RUN: not clang-refactor local-rename -selection=%s:4:1 -new-name=Bar %s -- 2>&1 | grep -v CHECK | FileCheck %s
+// RUN: clang-refactor local-rename -selection=test:%s -new-name=Bar %s -- 2>&1 | grep -v CHECK | FileCheck --check-prefix=TESTCHECK %s
 
 class Baz { // CHECK: [[@LINE]]:1: error: there is no symbol at the given location
 };
diff --git a/test/Refactor/LocalRename/QualifiedRename.cpp b/test/Refactor/LocalRename/QualifiedRename.cpp
new file mode 100644
index 0000000..d9eb138
--- /dev/null
+++ b/test/Refactor/LocalRename/QualifiedRename.cpp
@@ -0,0 +1,24 @@
+// RUN: clang-refactor local-rename -old-qualified-name="foo::A" -new-qualified-name="bar::B" %s -- -std=c++11 2>&1 | grep -v CHECK | FileCheck %s
+
+namespace foo {
+class A {};
+}
+// CHECK: namespace foo {
+// CHECK-NEXT: class B {};
+// CHECK-NEXT: }
+
+namespace bar {
+void f(foo::A* a) {
+  foo::A b;
+}
+// CHECK: void f(B* a) {
+// CHECK-NEXT:   B b;
+// CHECK-NEXT: }
+}
+
+void f(foo::A* a) {
+  foo::A b;
+}
+// CHECK: void f(bar::B* a) {
+// CHECK-NEXT:   bar::B b;
+// CHECK-NEXT: }
diff --git a/test/Refactor/tool-apply-replacements.cpp b/test/Refactor/tool-apply-replacements.cpp
index 9f45950..59b0991 100644
--- a/test/Refactor/tool-apply-replacements.cpp
+++ b/test/Refactor/tool-apply-replacements.cpp
@@ -1,10 +1,8 @@
-// RUN: rm -f %t.cp.cpp
-// RUN: cp %s %t.cp.cpp
-// RUN: clang-refactor local-rename -selection=%t.cp.cpp:9:7 -new-name=test %t.cp.cpp --
-// RUN: grep -v CHECK %t.cp.cpp | FileCheck %t.cp.cpp
-// RUN: cp %s %t.cp.cpp
-// RUN: clang-refactor local-rename -selection=%t.cp.cpp:9:7-9:15 -new-name=test %t.cp.cpp --
-// RUN: grep -v CHECK %t.cp.cpp | FileCheck %t.cp.cpp
+// RUN: sed -e 's#//.*$##' %s > %t.cpp
+// RUN: clang-refactor local-rename -selection=%t.cpp:7:7 -new-name=test %t.cpp -- | FileCheck %s
+// RUN: clang-refactor local-rename -selection=%t.cpp:7:7-7:15 -new-name=test %t.cpp -- | FileCheck %s
+// RUN: clang-refactor local-rename -i -selection=%t.cpp:7:7 -new-name=test %t.cpp --
+// RUN: FileCheck -input-file=%t.cpp %s
 
 class RenameMe {
 // CHECK: class test {
diff --git a/test/Refactor/tool-test-support.c b/test/Refactor/tool-test-support.c
index a208255..0e073f6 100644
--- a/test/Refactor/tool-test-support.c
+++ b/test/Refactor/tool-test-support.c
@@ -10,10 +10,13 @@
 
 /*range named =+0*/int test5;
 
+/*range =->+0:22*/int test6;
+
 // CHECK: Test selection group '':
 // CHECK-NEXT:   105-105
 // CHECK-NEXT:   158-158
 // CHECK-NEXT:   197-197
+// CHECK-NEXT:   248-251
 // CHECK-NEXT: Test selection group 'named':
 // CHECK-NEXT:   132-132
 // CHECK-NEXT:   218-218
@@ -29,6 +32,8 @@
 // CHECK: invoking action 'local-rename':
 // CHECK-NEXT: -selection={{.*}}tool-test-support.c:9:29
 
+// CHECK: invoking action 'local-rename':
+// CHECK-NEXT: -selection={{.*}}tool-test-support.c:13:19 -> {{.*}}tool-test-support.c:13:22
 
 // The following invocations are in the 'named' group, and they follow
 // the default invocation even if some of their ranges occur prior to the
diff --git a/test/Sema/Inputs/pragma-align-no-header-change-warning.h b/test/Sema/Inputs/pragma-align-no-header-change-warning.h
new file mode 100644
index 0000000..6cd4e7d
--- /dev/null
+++ b/test/Sema/Inputs/pragma-align-no-header-change-warning.h
@@ -0,0 +1,5 @@
+#pragma options align=mac68k
+
+struct S { int x; };
+
+#pragma options align=reset
diff --git a/test/Sema/alloc-size.c b/test/Sema/alloc-size.c
index 7004a5a..5618151 100644
--- a/test/Sema/alloc-size.c
+++ b/test/Sema/alloc-size.c
@@ -3,14 +3,14 @@
 void *fail1(int a) __attribute__((alloc_size)); //expected-error{{'alloc_size' attribute takes at least 1 argument}}
 void *fail2(int a) __attribute__((alloc_size())); //expected-error{{'alloc_size' attribute takes at least 1 argument}}
 
-void *fail3(int a) __attribute__((alloc_size(0))); //expected-error{{'alloc_size' attribute parameter 0 is out of bounds}}
-void *fail4(int a) __attribute__((alloc_size(2))); //expected-error{{'alloc_size' attribute parameter 2 is out of bounds}}
+void *fail3(int a) __attribute__((alloc_size(0))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds}}
+void *fail4(int a) __attribute__((alloc_size(2))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds}}
 
-void *fail5(int a, int b) __attribute__((alloc_size(0, 1))); //expected-error{{'alloc_size' attribute parameter 0 is out of bounds}}
-void *fail6(int a, int b) __attribute__((alloc_size(3, 1))); //expected-error{{'alloc_size' attribute parameter 3 is out of bounds}}
+void *fail5(int a, int b) __attribute__((alloc_size(0, 1))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds}}
+void *fail6(int a, int b) __attribute__((alloc_size(3, 1))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds}}
 
-void *fail7(int a, int b) __attribute__((alloc_size(1, 0))); //expected-error{{'alloc_size' attribute parameter 0 is out of bounds}}
-void *fail8(int a, int b) __attribute__((alloc_size(1, 3))); //expected-error{{'alloc_size' attribute parameter 3 is out of bounds}}
+void *fail7(int a, int b) __attribute__((alloc_size(1, 0))); //expected-error{{'alloc_size' attribute parameter 2 is out of bounds}}
+void *fail8(int a, int b) __attribute__((alloc_size(1, 3))); //expected-error{{'alloc_size' attribute parameter 2 is out of bounds}}
 
 int fail9(int a) __attribute__((alloc_size(1))); //expected-warning{{'alloc_size' attribute only applies to return values that are pointers}}
 
diff --git a/test/Sema/annotate.c b/test/Sema/annotate.c
index 4a786d0..0d1a1c2 100644
--- a/test/Sema/annotate.c
+++ b/test/Sema/annotate.c
@@ -1,9 +1,13 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify
+// RUN: %clang_cc1 %s -fsyntax-only -fdouble-square-bracket-attributes -verify
 
 void __attribute__((annotate("foo"))) foo(float *a) {
   __attribute__((annotate("bar"))) int x;
+  [[clang::annotate("bar")]] int x2;
   __attribute__((annotate(1))) int y; // expected-error {{'annotate' attribute requires a string}}
+  [[clang::annotate(1)]] int y2; // expected-error {{'annotate' attribute requires a string}}
   __attribute__((annotate("bar", 1))) int z; // expected-error {{'annotate' attribute takes one argument}}
+  [[clang::annotate("bar", 1)]] int z2; // expected-error {{'annotate' attribute takes one argument}}
+
   int u = __builtin_annotation(z, (char*) 0); // expected-error {{second argument to __builtin_annotation must be a non-wide string constant}}
   int v = __builtin_annotation(z, (char*) L"bar"); // expected-error {{second argument to __builtin_annotation must be a non-wide string constant}}
   int w = __builtin_annotation(z, "foo");
diff --git a/test/Sema/artificial.c b/test/Sema/artificial.c
new file mode 100644
index 0000000..076a61c
--- /dev/null
+++ b/test/Sema/artificial.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+inline void __attribute__((artificial)) foo() {}
+void __attribute__((artificial)) bar() {} // expected-warning {{'artificial' attribute only applies to inline functions}}
diff --git a/test/Sema/assign.c b/test/Sema/assign.c
index 5bdfa9a..28d7b9f 100644
--- a/test/Sema/assign.c
+++ b/test/Sema/assign.c
@@ -11,10 +11,10 @@
 
 typedef int arr[10];
 void test3() {
-  const arr b;
-  const int b2[10]; 
-  b[4] = 1; // expected-error {{read-only variable is not assignable}}
-  b2[4] = 1; // expected-error {{read-only variable is not assignable}}
+  const arr b;      // expected-note {{variable 'b' declared const here}}
+  const int b2[10]; // expected-note {{variable 'b2' declared const here}}
+  b[4] = 1;         // expected-error {{cannot assign to variable 'b' with const-qualified type 'const arr' (aka 'int const[10]')}}
+  b2[4] = 1;        // expected-error {{cannot assign to variable 'b2' with const-qualified type 'const int [10]'}}
 }
 
 typedef struct I {
diff --git a/test/Sema/ast-print.c b/test/Sema/ast-print.c
index f701b12..83a08bf 100644
--- a/test/Sema/ast-print.c
+++ b/test/Sema/ast-print.c
@@ -15,6 +15,10 @@
   };
 };
 
+// This used to crash clang.
+struct {
+}(s1);
+
 int foo(const struct blah *b) {
   // CHECK: return b->b;
   return b->b;
diff --git a/test/Sema/attr-alias.c b/test/Sema/attr-alias.c
index 151052f..9313670 100644
--- a/test/Sema/attr-alias.c
+++ b/test/Sema/attr-alias.c
@@ -2,7 +2,4 @@
 
 void g() {}
 
-// It is important that the following string be in the error message. The gcc
-// testsuite looks for it to decide if a target supports aliases.
-
-void f() __attribute__((alias("g"))); //expected-error {{only weak aliases are supported}}
+void f() __attribute__((alias("g"))); //expected-error {{aliases are not supported on darwin}}
diff --git a/test/Sema/attr-availability-square-brackets.c b/test/Sema/attr-availability-square-brackets.c
new file mode 100644
index 0000000..13dbf0a
--- /dev/null
+++ b/test/Sema/attr-availability-square-brackets.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -fsyntax-only -fdouble-square-bracket-attributes -verify %s
+
+void f0() [[clang::availability(macosx,introduced=10.4,deprecated=10.2)]]; // expected-warning{{feature cannot be deprecated in macOS version 10.2 before it was introduced in version 10.4; attribute ignored}}
+void f1() [[clang::availability(ios,obsoleted=2.1,deprecated=3.0)]];  // expected-warning{{feature cannot be obsoleted in iOS version 2.1 before it was deprecated in version 3.0; attribute ignored}}
+void f2() [[clang::availability(ios,introduced=2.1,deprecated=2.1)]];
+
+extern void
+ATSFontGetName(const char *oName) [[clang::availability(macosx,introduced=8.0,deprecated=9.0, message="use CTFontCopyFullName")]]; // expected-note {{'ATSFontGetName' has been explicitly marked deprecated here}}
+
+void test_10095131() {
+  ATSFontGetName("Hello"); // expected-warning {{'ATSFontGetName' is deprecated: first deprecated in macOS 9.0 - use CTFontCopyFullName}}
+}
+
+enum
+[[clang::availability(macos, unavailable)]]
+{
+    NSDataWritingFileProtectionWriteOnly = 0x30000000,
+    NSDataWritingFileProtectionCompleteUntilUserAuthentication = 0x40000000,
+};
+
+extern int x [[clang::availability(macosx,introduced=10.5)]];
+extern int x;
+
+int i [[clang::availability(this, should = 1.0)]]; // expected-error {{'should' is not an availability stage; use 'introduced', 'deprecated', or 'obsoleted'}} \
+                                                   // expected-warning {{unknown platform 'this' in availability macro}}
diff --git a/test/Sema/attr-capabilities.c b/test/Sema/attr-capabilities.c
index 88fdf30..21cbae9 100644
--- a/test/Sema/attr-capabilities.c
+++ b/test/Sema/attr-capabilities.c
@@ -11,8 +11,8 @@
 // Test an invalid capability name
 struct __attribute__((capability("wrong"))) IncorrectName {}; // expected-warning {{invalid capability name 'wrong'; capability name must be 'mutex' or 'role'}}
 
-int Test1 __attribute__((capability("test1")));  // expected-error {{'capability' attribute only applies to structs, unions, and typedefs}}
-int Test2 __attribute__((shared_capability("test2"))); // expected-error {{'shared_capability' attribute only applies to structs, unions, and typedefs}}
+int Test1 __attribute__((capability("test1")));  // expected-error {{'capability' attribute only applies to structs, unions, classes, and typedefs}}
+int Test2 __attribute__((shared_capability("test2"))); // expected-error {{'shared_capability' attribute only applies to structs, unions, classes, and typedefs}}
 int Test3 __attribute__((acquire_capability("test3")));  // expected-warning {{'acquire_capability' attribute only applies to functions}}
 int Test4 __attribute__((try_acquire_capability("test4"))); // expected-error {{'try_acquire_capability' attribute only applies to functions}}
 int Test5 __attribute__((release_capability("test5"))); // expected-warning {{'release_capability' attribute only applies to functions}}
diff --git a/test/Sema/attr-capabilities.cpp b/test/Sema/attr-capabilities.cpp
new file mode 100644
index 0000000..5bae94e
--- /dev/null
+++ b/test/Sema/attr-capabilities.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -fsyntax-only -Wthread-safety -verify %s
+
+class __attribute__((shared_capability("mutex"))) Mutex {
+ public:
+  void func1() __attribute__((assert_capability(this)));
+  void func2() __attribute__((assert_capability(!this)));
+
+  const Mutex& operator!() const { return *this; }
+};
+
+class NotACapability {
+ public:
+  void func1() __attribute__((assert_capability(this)));  // expected-warning {{'assert_capability' attribute requires arguments whose type is annotated with 'capability' attribute; type here is 'NotACapability *'}}
+  void func2() __attribute__((assert_capability(!this)));  // expected-warning {{'assert_capability' attribute requires arguments whose type is annotated with 'capability' attribute; type here is 'bool'}}
+
+  const NotACapability& operator!() const { return *this; }
+};
diff --git a/test/Sema/attr-cleanup.c b/test/Sema/attr-cleanup.c
index 26f283a..3669289 100644
--- a/test/Sema/attr-cleanup.c
+++ b/test/Sema/attr-cleanup.c
@@ -2,16 +2,16 @@
 
 void c1(int *a);
 
-extern int g1 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute ignored}}
-int g2 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute ignored}}
-static int g3 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute ignored}}
+extern int g1 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute only applies to local variables}}
+int g2 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute only applies to local variables}}
+static int g3 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute only applies to local variables}}
 
 void t1()
 {
     int v1 __attribute((cleanup)); // expected-error {{'cleanup' attribute takes one argument}}
     int v2 __attribute((cleanup(1, 2))); // expected-error {{'cleanup' attribute takes one argument}}
 
-    static int v3 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute ignored}}
+    static int v3 __attribute((cleanup(c1))); // expected-warning {{'cleanup' attribute only applies to local variables}}
 
     int v4 __attribute((cleanup(h))); // expected-error {{use of undeclared identifier 'h'}}
 
@@ -46,3 +46,5 @@
 void t6(void) {
   int i __attribute__((cleanup((void *)0)));  // expected-error {{'cleanup' argument is not a function}}
 }
+
+void t7(__attribute__((cleanup(c4))) int a) {} // expected-warning {{'cleanup' attribute only applies to local variables}}
diff --git a/test/Sema/attr-cx2.c b/test/Sema/attr-cx2.c
new file mode 100644
index 0000000..5b53762
--- /dev/null
+++ b/test/Sema/attr-cx2.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fsyntax-only -verify -fdouble-square-bracket-attributes %s
+
+struct S {};
+struct S * [[clang::address_space(1)]] Foo;
+
+enum [[clang::enum_extensibility(open)]] EnumOpen {
+  C0 = 1, C1 = 10
+};
+
+enum [[clang::flag_enum]] EnumFlag {
+  D0 = 1, D1 = 8
+};
+
+void foo(void *c) [[clang::overloadable]];
+void foo(char *c) [[clang::overloadable]];
+
+void context_okay(void *context [[clang::swift_context]]) [[clang::swiftcall]];
+void context_okay2(void *context [[clang::swift_context]], void *selfType, char **selfWitnessTable) [[clang::swiftcall]];
+
+void *f1(void) [[clang::ownership_returns(foo)]];
+void *f2() [[clang::ownership_returns(foo)]]; // expected-warning {{'ownership_returns' attribute only applies to non-K&R-style functions}}
+
+void foo2(void) [[clang::unavailable("not available - replaced")]]; // expected-note {{'foo2' has been explicitly marked unavailable here}}
+void bar(void) {
+  foo2(); // expected-error {{'foo2' is unavailable: not available - replaced}}
+}
diff --git a/test/Sema/attr-disable-tail-calls.c b/test/Sema/attr-disable-tail-calls.c
index 4574d5e..e8f5bcc 100644
--- a/test/Sema/attr-disable-tail-calls.c
+++ b/test/Sema/attr-disable-tail-calls.c
@@ -8,6 +8,6 @@
   __asm__("");
 }
 
-int g0 __attribute__((disable_tail_calls)); // expected-warning {{'disable_tail_calls' attribute only applies to functions and methods}}
+int g0 __attribute__((disable_tail_calls)); // expected-warning {{'disable_tail_calls' attribute only applies to functions and Objective-C methods}}
 
 int foo3(int a) __attribute__((disable_tail_calls("abc"))); // expected-error {{'disable_tail_calls' attribute takes no arguments}}
diff --git a/test/Sema/attr-external-source-symbol.c b/test/Sema/attr-external-source-symbol.c
index af6e6bc..dfed609 100644
--- a/test/Sema/attr-external-source-symbol.c
+++ b/test/Sema/attr-external-source-symbol.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -fblocks -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fblocks -verify -fdouble-square-bracket-attributes %s
 
 void threeClauses() __attribute__((external_source_symbol(language="Swift", defined_in="module", generated_declaration)));
 
@@ -17,3 +17,15 @@
       return 1;
   };
 }
+
+void threeClauses2() [[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration)]];
+
+void twoClauses2() [[clang::external_source_symbol(language="Swift", defined_in="module")]];
+
+void fourClauses2()
+[[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration, generated_declaration)]]; // expected-error {{duplicate 'generated_declaration' clause in an 'external_source_symbol' attribute}}
+
+void oneClause2() [[clang::external_source_symbol(generated_declaration)]];
+
+void noArguments2()
+[[clang::external_source_symbol]]; // expected-error {{'external_source_symbol' attribute takes at least 1 argument}}
diff --git a/test/Sema/attr-ifunc.c b/test/Sema/attr-ifunc.c
index 8f9c22f..16fd0df 100644
--- a/test/Sema/attr-ifunc.c
+++ b/test/Sema/attr-ifunc.c
@@ -5,7 +5,7 @@
 #if defined(_WIN32)
 void foo() {}
 void bar() __attribute__((ifunc("foo")));
-//expected-warning@-1 {{'ifunc' attribute ignored}}
+//expected-warning@-1 {{unknown attribute 'ifunc' ignored}}
 
 #else
 #if defined(CHECK_ALIASES)
diff --git a/test/Sema/attr-minsize.c b/test/Sema/attr-minsize.c
index 7b1c6ae..d2374b6 100644
--- a/test/Sema/attr-minsize.c
+++ b/test/Sema/attr-minsize.c
@@ -2,4 +2,4 @@
 
 int foo() __attribute__((__minsize__));
 
-int var1 __attribute__((__minsize__)); // expected-error{{'__minsize__' attribute only applies to functions and methods}}
+int var1 __attribute__((__minsize__)); // expected-error{{'__minsize__' attribute only applies to functions and Objective-C methods}}
diff --git a/test/Sema/attr-mode.c b/test/Sema/attr-mode.c
index e160d8d..c0e0426 100644
--- a/test/Sema/attr-mode.c
+++ b/test/Sema/attr-mode.c
@@ -26,8 +26,8 @@
 
 int **__attribute((mode(QI)))* i32;  // expected-error{{mode attribute}}
 
-__attribute__((mode(QI))) int invalid_func() { return 1; } // expected-error{{'mode' attribute only applies to variables, enums, fields and typedefs}}
-enum invalid_enum { A1 __attribute__((mode(QI))) }; // expected-error{{'mode' attribute only applies to variables, enums, fields and typedefs}}
+__attribute__((mode(QI))) int invalid_func() { return 1; } // expected-error{{'mode' attribute only applies to variables, enums, typedefs, and non-static data members}}
+enum invalid_enum { A1 __attribute__((mode(QI))) }; // expected-error{{'mode' attribute only applies to}}
 
 typedef _Complex double c32 __attribute((mode(SC)));
 int c32_test[sizeof(c32) == 8 ? 1 : -1];
diff --git a/test/Sema/attr-nodebug.c b/test/Sema/attr-nodebug.c
index e7ca58d..3557784 100644
--- a/test/Sema/attr-nodebug.c
+++ b/test/Sema/attr-nodebug.c
@@ -2,7 +2,7 @@
 
 int a __attribute__((nodebug));
 
-void b(int p __attribute__((nodebug))) { // expected-warning {{'nodebug' attribute only applies to variables and functions}}
+void b(int p __attribute__((nodebug))) { // expected-warning {{'nodebug' attribute only applies to functions, function pointers, Objective-C methods, and variables}}
   int b __attribute__((nodebug));
 }
 
diff --git a/test/Sema/attr-objc-bridge-related.m b/test/Sema/attr-objc-bridge-related.m
new file mode 100644
index 0000000..bf059ba
--- /dev/null
+++ b/test/Sema/attr-objc-bridge-related.m
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -verify -fsyntax-only -fdouble-square-bracket-attributes %s
+
+struct [[clang::objc_bridge_related(NSParagraphStyle,,)]] TestBridgedRef;
+
+struct [[clang::objc_bridge_related(NSColor,colorWithCGColor:,CGColor)]] CGColorRefOk;
+struct [[clang::objc_bridge_related(,colorWithCGColor:,CGColor)]] CGColorRef1NotOk; // expected-error {{expected a related ObjectiveC class name, e.g., 'NSColor'}}
+struct [[clang::objc_bridge_related(NSColor,colorWithCGColor::,CGColor)]] CGColorRef3NotOk; // expected-error {{expected a class method selector with single argument, e.g., 'colorWithCGColor:'}}
diff --git a/test/Sema/attr-ownership.c b/test/Sema/attr-ownership.c
index fa21b03..ff6c719 100644
--- a/test/Sema/attr-ownership.c
+++ b/test/Sema/attr-ownership.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify
+// RUN: %clang_cc1 %s -verify -fsyntax-only
 
 void f1(void) __attribute__((ownership_takes("foo"))); // expected-error {{'ownership_takes' attribute requires parameter 1 to be an identifier}}
 void *f2(void) __attribute__((ownership_returns(foo, 1, 2)));  // expected-error {{'ownership_returns' attribute takes no more than 1 argument}}
diff --git a/test/Sema/attr-ownership.cpp b/test/Sema/attr-ownership.cpp
new file mode 100644
index 0000000..cde195f
--- /dev/null
+++ b/test/Sema/attr-ownership.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+class C {
+  void f(int, int)
+      __attribute__((ownership_returns(foo, 2)))  // expected-note {{declared with index 2 here}}
+      __attribute__((ownership_returns(foo, 3))); // expected-error {{'ownership_returns' attribute index does not match; here it is 3}}
+};
diff --git a/test/Sema/attr-print.c b/test/Sema/attr-print.c
index 7ffbbb8..16b440d 100644
--- a/test/Sema/attr-print.c
+++ b/test/Sema/attr-print.c
@@ -7,6 +7,9 @@
 // CHECK: int y __declspec(align(4));
 __declspec(align(4)) int y;
 
+// CHECK: short arr[3] __attribute__((aligned));
+short arr[3] __attribute__((aligned));
+
 // CHECK: void foo() __attribute__((const));
 void foo() __attribute__((const));
 
diff --git a/test/Sema/attr-print.cpp b/test/Sema/attr-print.cpp
new file mode 100644
index 0000000..1828c3b
--- /dev/null
+++ b/test/Sema/attr-print.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -ast-print | FileCheck %s
+
+// CHECK: void xla(int a) __attribute__((xray_log_args(1)));
+void xla(int a) __attribute__((xray_log_args(1)));
+
+// CHECK: void *as2(int, int) __attribute__((alloc_size(1, 2)));
+void *as2(int, int) __attribute__((alloc_size(1, 2)));
+// CHECK: void *as1(void *, int) __attribute__((alloc_size(2)));
+void *as1(void *, int) __attribute__((alloc_size(2)));
+
+// CHECK: void fmt(int, const char *, ...) __attribute__((format(printf, 2, 3)));
+void fmt(int, const char *, ...) __attribute__((format(printf, 2, 3)));
+
+// CHECK: char *fmta(int, const char *) __attribute__((format_arg(2)));
+char *fmta(int, const char *) __attribute__((format_arg(2)));
+
+// CHECK: void nn(int *, int *) __attribute__((nonnull(1, 2)));
+void nn(int *, int *) __attribute__((nonnull(1, 2)));
+
+// CHECK: int *aa(int i) __attribute__((alloc_align(1)));
+int *aa(int i) __attribute__((alloc_align(1)));
+
+// CHECK: void ownt(int *, int *) __attribute__((ownership_takes(foo, 1, 2)));
+void ownt(int *, int *) __attribute__((ownership_takes(foo, 1, 2)));
+// CHECK: void ownh(int *, int *) __attribute__((ownership_holds(foo, 1, 2)));
+void ownh(int *, int *) __attribute__((ownership_holds(foo, 1, 2)));
+// CHECK: void ownr(int) __attribute__((ownership_returns(foo, 1)));
+void ownr(int) __attribute__((ownership_returns(foo, 1)));
+
+// CHECK: void awtt(int, int, ...) __attribute__((argument_with_type_tag(foo, 3, 2)));
+void awtt(int, int, ...) __attribute__((argument_with_type_tag(foo, 3, 2)));
+// CHECK: void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 1, 2)));
+void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 1, 2)));
+
+class C {
+  // CHECK: void xla(int a) __attribute__((xray_log_args(2)));
+  void xla(int a) __attribute__((xray_log_args(2)));
+
+  // CHECK: void *as2(int, int) __attribute__((alloc_size(2, 3)));
+  void *as2(int, int) __attribute__((alloc_size(2, 3)));
+  // CHECK: void *as1(void *, int) __attribute__((alloc_size(3)));
+  void *as1(void *, int) __attribute__((alloc_size(3)));
+
+  // CHECK: void fmt(int, const char *, ...) __attribute__((format(printf, 3, 4)));
+  void fmt(int, const char *, ...) __attribute__((format(printf, 3, 4)));
+
+  // CHECK: char *fmta(int, const char *) __attribute__((format_arg(3)));
+  char *fmta(int, const char *) __attribute__((format_arg(3)));
+
+  // CHECK: void nn(int *, int *) __attribute__((nonnull(2, 3)));
+  void nn(int *, int *) __attribute__((nonnull(2, 3)));
+
+  // CHECK: int *aa(int i) __attribute__((alloc_align(2)));
+  int *aa(int i) __attribute__((alloc_align(2)));
+
+  // CHECK: void ownt(int *, int *) __attribute__((ownership_takes(foo, 2, 3)));
+  void ownt(int *, int *) __attribute__((ownership_takes(foo, 2, 3)));
+  // CHECK: void ownh(int *, int *) __attribute__((ownership_holds(foo, 2, 3)));
+  void ownh(int *, int *) __attribute__((ownership_holds(foo, 2, 3)));
+  // CHECK: void ownr(int) __attribute__((ownership_returns(foo, 2)));
+  void ownr(int) __attribute__((ownership_returns(foo, 2)));
+
+  // CHECK: void awtt(int, int, ...) __attribute__((argument_with_type_tag(foo, 4, 3)));
+  void awtt(int, int, ...) __attribute__((argument_with_type_tag(foo, 4, 3)));
+  // CHECK: void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 2, 3)));
+  void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 2, 3)));
+};
diff --git a/test/Sema/attr-section.c b/test/Sema/attr-section.c
index b361738..bc42474 100644
--- a/test/Sema/attr-section.c
+++ b/test/Sema/attr-section.c
@@ -10,7 +10,7 @@
 
 // PR6007
 void test() {
-  __attribute__((section("NEAR,x"))) int n1; // expected-error {{'section' attribute only applies to functions, methods, properties, and global variables}}
+  __attribute__((section("NEAR,x"))) int n1; // expected-error {{'section' attribute only applies to functions, global variables, Objective-C methods, and Objective-C properties}}
   __attribute__((section("NEAR,x"))) static int n2; // ok.
 }
 
@@ -18,7 +18,7 @@
 void __attribute__((section("foo,zed"))) test2(void); // expected-note {{previous attribute is here}}
 void __attribute__((section("bar,zed"))) test2(void) {} // expected-warning {{section does not match previous declaration}}
 
-enum __attribute__((section("NEAR,x"))) e { one }; // expected-error {{'section' attribute only applies to functions, methods, properties, and global variables}}
+enum __attribute__((section("NEAR,x"))) e { one }; // expected-error {{'section' attribute only applies to}}
 
 extern int a; // expected-note {{previous declaration is here}}
 int *b = &a;
diff --git a/test/Sema/attr-target-ast.c b/test/Sema/attr-target-ast.c
new file mode 100644
index 0000000..6e8497e
--- /dev/null
+++ b/test/Sema/attr-target-ast.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ast-dump %s | FileCheck %s
+
+int __attribute__((target("arch=hiss,arch=woof"))) pine_tree() { return 4; }
+// CHECK-NOT: arch=hiss
+// CHECK-NOT: arch=woof
diff --git a/test/Sema/attr-target-mv-bad-target.c b/test/Sema/attr-target-mv-bad-target.c
new file mode 100644
index 0000000..9cf3c5e
--- /dev/null
+++ b/test/Sema/attr-target-mv-bad-target.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
+
+int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
+//expected-error@+2 {{function multiversioning is not supported on the current target}}
+//expected-note@-2 {{previous declaration is here}}
+int __attribute__((target("avx")))  redecl1(void) { return 2; }
+
+//expected-error@+1 {{function multiversioning is not supported on the current target}}
+int __attribute__((target("default"))) with_def(void) { return 1;}
diff --git a/test/Sema/attr-target-mv.c b/test/Sema/attr-target-mv.c
new file mode 100644
index 0000000..671adff
--- /dev/null
+++ b/test/Sema/attr-target-mv.c
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu  -fsyntax-only -verify %s
+
+void __attribute__((target("sse4.2"))) no_default(void);
+void __attribute__((target("arch=sandybridge")))  no_default(void);
+
+void use1(void){
+  // expected-error@+1 {{no matching function for call to 'no_default'}}
+  no_default();
+}
+
+void __attribute__((target("sse4.2"))) has_def(void);
+void __attribute__((target("default")))  has_def(void);
+
+void use2(void){
+  // expected-error@+2 {{reference to overloaded function could not be resolved; did you mean to call it?}}
+  // expected-note@-4 {{possible target for call}}
+  +has_def;
+}
+
+int __attribute__((target("sse4.2"))) no_proto();
+// expected-error@-1 {{multiversioned function must have a prototype}}
+// expected-note@+1 {{function multiversioning caused by this declaration}}
+int __attribute__((target("arch=sandybridge"))) no_proto();
+
+// The following should all be legal, since they are just redeclarations.
+int __attribute__((target("sse4.2"))) redecl1(void);
+int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
+int __attribute__((target("arch=sandybridge")))  redecl1(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) redecl2(void) { return 1; }
+int __attribute__((target("sse4.2"))) redecl2(void);
+int __attribute__((target("arch=sandybridge")))  redecl2(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) redecl3(void) { return 0; }
+int __attribute__((target("arch=ivybridge"))) redecl3(void) { return 1; }
+int __attribute__((target("arch=sandybridge")))  redecl3(void);
+int __attribute__((target("arch=sandybridge")))  redecl3(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) redecl4(void) { return 1; }
+int __attribute__((target("arch=sandybridge")))  redecl4(void) { return 2; }
+int __attribute__((target("arch=sandybridge")))  redecl4(void);
+
+int __attribute__((target("sse4.2"))) redef(void) { return 1; }
+int __attribute__((target("arch=ivybridge"))) redef(void) { return 1; }
+int __attribute__((target("arch=sandybridge")))  redef(void) { return 2; }
+// expected-error@+2 {{redefinition of 'redef'}}
+// expected-note@-2 {{previous definition is here}}
+int __attribute__((target("arch=sandybridge")))  redef(void) { return 2; }
+
+int __attribute__((target("default"))) redef2(void) { return 1;}
+// expected-error@+2 {{redefinition of 'redef2'}}
+// expected-note@-2 {{previous definition is here}}
+int __attribute__((target("default"))) redef2(void) { return 1;}
+
+int __attribute__((target("sse4.2"))) mv_after_use(void) { return 1; }
+int use3(void) {
+  return mv_after_use();
+}
+
+// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target("arch=sandybridge")))  mv_after_use(void) { return 2; }
+
+int __attribute__((target("sse4.2,arch=sandybridge"))) mangle(void) { return 1; }
+//expected-error@+2 {{multiversioned function redeclarations require identical target attributes}}
+//expected-note@-2 {{previous declaration is here}}
+int __attribute__((target("arch=sandybridge,sse4.2")))  mangle(void) { return 2; }
+
+int prev_no_target(void);
+int __attribute__((target("arch=sandybridge")))  prev_no_target(void) { return 2; }
+// expected-error@-2 {{function declaration is missing 'target' attribute in a multiversioned function}}
+// expected-note@+1 {{function multiversioning caused by this declaration}}
+int __attribute__((target("arch=ivybridge")))  prev_no_target(void) { return 2; }
+
+int __attribute__((target("arch=sandybridge")))  prev_no_target2(void);
+int prev_no_target2(void);
+// expected-error@-1 {{function declaration is missing 'target' attribute in a multiversioned function}}
+// expected-note@+1 {{function multiversioning caused by this declaration}}
+int __attribute__((target("arch=ivybridge")))  prev_no_target2(void);
+
+void __attribute__((target("sse4.2"))) addtl_attrs(void);
+//expected-error@+1 {{attribute 'target' multiversioning cannot be combined}}
+void __attribute__((used,target("arch=sandybridge")))  addtl_attrs(void);
+
+//expected-error@+1 {{attribute 'target' multiversioning cannot be combined}}
+void __attribute__((target("default"), used)) addtl_attrs2(void);
+
+//expected-error@+2 {{attribute 'target' multiversioning cannot be combined}}
+//expected-note@+2 {{function multiversioning caused by this declaration}}
+void __attribute__((used,target("sse4.2"))) addtl_attrs3(void);
+void __attribute__((target("arch=sandybridge")))  addtl_attrs3(void);
+
+void __attribute__((target("sse4.2"))) addtl_attrs4(void);
+void __attribute__((target("arch=sandybridge")))  addtl_attrs4(void);
+//expected-error@+1 {{attribute 'target' multiversioning cannot be combined}}
+void __attribute__((used,target("arch=ivybridge")))  addtl_attrs4(void);
+
+int __attribute__((target("sse4.2"))) diff_cc(void);
+// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
+__vectorcall int __attribute__((target("arch=sandybridge")))  diff_cc(void);
+
+int __attribute__((target("sse4.2"))) diff_ret(void);
+// expected-error@+1 {{multiversioned function declaration has a different return type}}
+short __attribute__((target("arch=sandybridge")))  diff_ret(void);
diff --git a/test/Sema/attr-target-unsupported.c b/test/Sema/attr-target-unsupported.c
new file mode 100644
index 0000000..8e23fcc
--- /dev/null
+++ b/test/Sema/attr-target-unsupported.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple mips-linux-gnu  -fsyntax-only -verify %s
+
+void __attribute__((target("arch=mips1")))
+foo(void) {}
+// expected-error@+3 {{function multiversioning is not supported on the current target}}
+// expected-note@-2 {{previous declaration is here}}
+void __attribute__((target("arch=mips2")))
+foo(void) {}
+
+// expected-error@+2 {{function multiversioning is not supported on the current target}}
+void __attribute__((target("default")))
+bar(void){}
diff --git a/test/Sema/attr-target.c b/test/Sema/attr-target.c
index 058bfc4..644d7cb 100644
--- a/test/Sema/attr-target.c
+++ b/test/Sema/attr-target.c
@@ -1,14 +1,21 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu  -fsyntax-only -verify %s
 
 int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo() { return 4; }
-int __attribute__((target())) bar() { return 4; } //expected-error {{'target' attribute takes one argument}}
-int __attribute__((target("tune=sandybridge"))) baz() { return 4; } //expected-warning {{ignoring unsupported 'tune=' in the target attribute string}}
-int __attribute__((target("fpmath=387"))) walrus() { return 4; } //expected-warning {{ignoring unsupported 'fpmath=' in the target attribute string}}
-int __attribute__((target("avx,sse4.2,arch=hiss"))) meow() {  return 4; }//expected-warning {{ignoring unsupported architecture 'hiss' in the target attribute string}}
-int __attribute__((target("woof"))) bark() {  return 4; }//expected-warning {{ignoring unsupported 'woof' in the target attribute string}}
-int __attribute__((target("arch="))) turtle() { return 4; } // no warning, same as saying 'nothing'.
-int __attribute__((target("arch=hiss,arch=woof"))) pine_tree() { return 4; } //expected-warning {{ignoring unsupported architecture 'hiss' in the target attribute string}}
-int __attribute__((target("arch=ivybridge,arch=haswell"))) oak_tree() { return 4; } //expected-warning {{ignoring duplicate 'arch=' in the target attribute string}}
-
+//expected-error@+1 {{'target' attribute takes one argument}}
+int __attribute__((target())) bar() { return 4; }
+//expected-warning@+1 {{unsupported 'tune=' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("tune=sandybridge"))) baz() { return 4; }
+//expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("fpmath=387"))) walrus() { return 4; }
+//expected-warning@+1 {{unsupported architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("avx,sse4.2,arch=hiss"))) meow() {  return 4; }
+//expected-warning@+1 {{unsupported 'woof' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("woof"))) bark() {  return 4; }
+// no warning, same as saying 'nothing'.
+int __attribute__((target("arch="))) turtle() { return 4; }
+//expected-warning@+1 {{unsupported architecture 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=hiss,arch=woof"))) pine_tree() { return 4; }
+//expected-warning@+1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("arch=ivybridge,arch=haswell"))) oak_tree() { return 4; }
 
 
diff --git a/test/Sema/attr-type-safety.c b/test/Sema/attr-type-safety.c
new file mode 100644
index 0000000..d7dab5d
--- /dev/null
+++ b/test/Sema/attr-type-safety.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -fsyntax-only -fdouble-square-bracket-attributes -verify %s
+
+struct A {};
+
+typedef struct A *MPI_Datatype;
+
+extern struct A datatype_wrong1 [[clang::type_tag_for_datatype]]; // expected-error {{'type_tag_for_datatype' attribute requires parameter 1 to be an identifier}}
+
+extern struct A datatype_wrong2 [[clang::type_tag_for_datatype(mpi,1,2)]]; // expected-error {{expected a type}}
+
+extern struct A datatype_wrong3 [[clang::type_tag_for_datatype(mpi,not_a_type)]]; // expected-error {{unknown type name 'not_a_type'}}
+
+extern struct A datatype_wrong4 [[clang::type_tag_for_datatype(mpi,int,int)]]; // expected-error {{expected identifier}}
+
+extern struct A datatype_wrong5 [[clang::type_tag_for_datatype(mpi,int,not_a_flag)]]; // expected-error {{invalid comparison flag 'not_a_flag'}}
+
+extern struct A datatype_wrong6 [[clang::type_tag_for_datatype(mpi,int,layout_compatible,not_a_flag)]]; // expected-error {{invalid comparison flag 'not_a_flag'}}
+
+extern struct A A_tag [[clang::type_tag_for_datatype(a,int)]];
+extern struct A B_tag [[clang::type_tag_for_datatype(b,int)]];
+
+static const int C_tag [[clang::type_tag_for_datatype(c,int)]] = 10;
+static const int D_tag [[clang::type_tag_for_datatype(d,int)]] = 20;
+
+[[clang::pointer_with_type_tag]] // expected-error {{'pointer_with_type_tag' attribute requires exactly 3 arguments}}
+int wrong1(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(mpi,0,7)]]  // expected-error {{attribute parameter 2 is out of bounds}}
+int wrong2(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(mpi,3,7)]] // expected-error {{attribute parameter 2 is out of bounds}}
+int wrong3(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(mpi,1,0)]] // expected-error {{attribute parameter 3 is out of bounds}}
+int wrong4(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(mpi,1,3)]] // expected-error {{attribute parameter 3 is out of bounds}}
+int wrong5(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(mpi,0x8000000000000001ULL,1)]] // expected-error {{attribute parameter 2 is out of bounds}}
+int wrong6(void *buf, MPI_Datatype datatype);
+
+[[clang::pointer_with_type_tag(a,1,2)]] void A_func(void *ptr, void *tag);
+[[clang::pointer_with_type_tag(c,1,2)]] void C_func(void *ptr, int tag);
+
diff --git a/test/Sema/attr-used.c b/test/Sema/attr-used.c
index 4e3bda7..c13d57c 100644
--- a/test/Sema/attr-used.c
+++ b/test/Sema/attr-used.c
@@ -3,7 +3,7 @@
 extern int l0 __attribute__((used)); // expected-warning {{'used' attribute ignored}}
 __private_extern__ int l1 __attribute__((used)); // expected-warning {{'used' attribute ignored}}
 
-struct __attribute__((used)) s { // expected-warning {{'used' attribute only applies to variables and functions}}
+struct __attribute__((used)) s { // expected-warning {{'used' attribute only applies to variables with non-local storage, functions, and Objective-C methods}}
   int x;
 };
 
@@ -14,7 +14,7 @@
 
 void f1() {
   static int a __attribute__((used));
-  int b __attribute__((used)); // expected-warning {{'used' attribute ignored}}
+  int b __attribute__((used)); // expected-warning {{'used' attribute only applies to variables with non-local storage, functions, and Objective-C methods}}
 }
 
 static void __attribute__((used)) f0(void);
diff --git a/test/Sema/attr-weak.c b/test/Sema/attr-weak.c
index df74554..e3610ca 100644
--- a/test/Sema/attr-weak.c
+++ b/test/Sema/attr-weak.c
@@ -5,10 +5,10 @@
 int g2 __attribute__((weak));
 int g3 __attribute__((weak_import)); // expected-warning {{'weak_import' attribute cannot be specified on a definition}}
 int __attribute__((weak_import)) g4(void);
-void __attribute__((weak_import)) g5(void) { 
+void __attribute__((weak_import)) g5(void) {
 }
 
-struct __attribute__((weak)) s0 {}; // expected-warning {{'weak' attribute only applies to variables and functions}}
+struct __attribute__((weak)) s0 {}; // expected-warning {{'weak' attribute only applies to variables, functions, and classes}}
 struct __attribute__((weak_import)) s1 {}; // expected-warning {{'weak_import' attribute only applies to variables and functions}}
 
 static int x __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
diff --git a/test/Sema/bitfield.c b/test/Sema/bitfield.c
index d625366..13e9480 100644
--- a/test/Sema/bitfield.c
+++ b/test/Sema/bitfield.c
@@ -82,3 +82,7 @@
 struct Test6 {
   : 0.0; // expected-error{{type name requires a specifier or qualifier}}
 };
+
+struct PR36157 {
+  int n : 1 ? 1 : implicitly_declare_function(); // expected-warning {{invalid in C99}}
+};
diff --git a/test/Sema/builtin-assume-aligned.c b/test/Sema/builtin-assume-aligned.c
index 33c1b74..057a500 100644
--- a/test/Sema/builtin-assume-aligned.c
+++ b/test/Sema/builtin-assume-aligned.c
@@ -47,7 +47,7 @@
 int test_int_assume_aligned(void) __attribute__((assume_aligned(16))); // expected-warning {{'assume_aligned' attribute only applies to return values that are pointers}}
 void *test_ptr_assume_aligned(void) __attribute__((assume_aligned(64))); // no-warning
 
-int j __attribute__((assume_aligned(8))); // expected-warning {{'assume_aligned' attribute only applies to functions and methods}}
+int j __attribute__((assume_aligned(8))); // expected-warning {{'assume_aligned' attribute only applies to Objective-C methods and functions}}
 void *test_no_fn_proto() __attribute__((assume_aligned(32))); // no-warning
 void *test_with_fn_proto(void) __attribute__((assume_aligned(128))); // no-warning
 
diff --git a/test/Sema/builtin-object-size.c b/test/Sema/builtin-object-size.c
index 300c739..096882a 100644
--- a/test/Sema/builtin-object-size.c
+++ b/test/Sema/builtin-object-size.c
@@ -91,3 +91,22 @@
 
   return n;
 }
+
+typedef struct {
+  char string[512];
+} NestedArrayStruct;
+
+typedef struct {
+  int x;
+  NestedArrayStruct session[];
+} IncompleteArrayStruct;
+
+void rd36094951_IAS_builtin_object_size_assertion(IncompleteArrayStruct *p) {
+#define rd36094951_CHECK(mode)                                                 \
+  __builtin___strlcpy_chk(p->session[0].string, "ab", 2,                       \
+                          __builtin_object_size(p->session[0].string, mode))
+  rd36094951_CHECK(0);
+  rd36094951_CHECK(1);
+  rd36094951_CHECK(2);
+  rd36094951_CHECK(3);
+}
diff --git a/test/Sema/builtins-arm.c b/test/Sema/builtins-arm.c
index 668b828..22572e0 100644
--- a/test/Sema/builtins-arm.c
+++ b/test/Sema/builtins-arm.c
@@ -2,6 +2,8 @@
 // RUN: %clang_cc1 -triple armv7 -target-abi apcs-gnu \
 // RUN:   -fsyntax-only -verify %s
 
+#include <arm_acle.h>
+
 void f(void *a, void *b) {
   __clear_cache(); // expected-error {{too few arguments to function call, expected 2, have 0}} // expected-note {{'__clear_cache' is a builtin with type 'void (void *, void *)}}
   __clear_cache(a); // expected-error {{too few arguments to function call, expected 2, have 1}}
@@ -136,3 +138,200 @@
   __builtin_arm_mrrc2(15, a, 0); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}}
   __builtin_arm_mrrc2(15, 0, a); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}}
 }
+
+void test_9_3_multiplications(int a, int b) {
+  int r;
+  r = __builtin_arm_smulbb(a, b);
+  r = __builtin_arm_smulbb(1, -9);
+
+  r = __builtin_arm_smulbt(a, b);
+  r = __builtin_arm_smulbt(0, b);
+
+  r = __builtin_arm_smultb(a, b);
+  r = __builtin_arm_smultb(5, b);
+
+  r = __builtin_arm_smultt(a, b);
+  r = __builtin_arm_smultt(a, -1);
+
+  r = __builtin_arm_smulwb(a, b);
+  r = __builtin_arm_smulwb(1, 2);
+
+  r = __builtin_arm_smulwt(a, b);
+  r = __builtin_arm_smulwt(-1, -2);
+  r = __builtin_arm_smulwt(-1.0f, -2);
+}
+
+void test_9_4_1_width_specified_saturation(int a, int b) {
+  unsigned u;
+  int s;
+
+  s = __builtin_arm_ssat(8, 2);
+  s = __builtin_arm_ssat(a, 1);
+  s = __builtin_arm_ssat(a, 32);
+  s = __builtin_arm_ssat(a, 0);   // expected-error {{argument should be a value from 1 to 32}}
+  s = __builtin_arm_ssat(a, 33);  // expected-error {{argument should be a value from 1 to 32}}
+  s = __builtin_arm_ssat(a, b);   // expected-error {{argument to '__builtin_arm_ssat' must be a constant integer}}
+
+  u = __builtin_arm_usat(8, 2);
+  u = __builtin_arm_usat(a, 0);
+  u = __builtin_arm_usat(a, 31);
+  u = __builtin_arm_usat(a, 32);  // expected-error {{argument should be a value from 0 to 31}}
+  u = __builtin_arm_usat(a, b);   // expected-error {{argument to '__builtin_arm_usat' must be a constant integer}}
+}
+
+void test_9_4_2_saturating_addition_subtraction(int a, int b) {
+  int s;
+  s = __builtin_arm_qadd(a, b);
+  s = __builtin_arm_qadd(-1, 0);
+
+  s = __builtin_arm_qsub(a, b);
+  s = __builtin_arm_qsub(0, -1);
+
+  s = __builtin_arm_qdbl(a);
+}
+
+void test_9_4_3_accumulating_multiplications(int a, int b, int c) {
+  int s;
+
+  s = __builtin_arm_smlabb(a, b, c);
+  s = __builtin_arm_smlabb(1, b, c);
+  s = __builtin_arm_smlabb(a, 2, c);
+  s = __builtin_arm_smlabb(a, b, -3);
+
+  s = __builtin_arm_smlabt(a, b, c);
+  s = __builtin_arm_smlabt(1, b, c);
+  s = __builtin_arm_smlabt(a, 2, c);
+  s = __builtin_arm_smlabt(a, b, -3);
+
+  s = __builtin_arm_smlatb(a, b, c);
+  s = __builtin_arm_smlatt(1, b, c);
+  s = __builtin_arm_smlawb(a, 2, c);
+  s = __builtin_arm_smlawt(a, b, -3);
+}
+
+void test_9_5_4_parallel_16bit_saturation(int16x2_t a) {
+  unsigned u;
+  int s;
+
+  s = __builtin_arm_ssat16(a, 1);
+  s = __builtin_arm_ssat16(a, 16);
+  s = __builtin_arm_ssat16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  s = __builtin_arm_ssat16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  u = __builtin_arm_usat16(a, 0);
+  u = __builtin_arm_usat16(a, 15);
+  u = __builtin_arm_usat16(a, 16); // expected-error {{argument should be a value from 0 to 15}}
+}
+
+void test_9_5_5_packing_and_unpacking(int16x2_t a, int8x4_t b, uint16x2_t c, uint8x4_t d) {
+  int16x2_t x;
+  uint16x2_t y;
+
+  x = __builtin_arm_sxtab16(a, b);
+  x = __builtin_arm_sxtab16(1, -1);
+  x = __builtin_arm_sxtb16(b);
+  x = __builtin_arm_sxtb16(-b);
+
+  y = __builtin_arm_uxtab16(c, d);
+  y = __builtin_arm_uxtab16(-1, -2);
+  y = __builtin_arm_uxtb16(d);
+  y = __builtin_arm_uxtb16(-1);
+}
+
+uint8x4_t
+test_9_5_6_parallel_selection(uint8x4_t a, uint8x4_t b) {
+  return __builtin_arm_sel(a, b);
+}
+
+void test_9_5_7_parallel_8bit_addition_substraction(int8x4_t a, int8x4_t b,
+                                                    uint8x4_t c, uint8x4_t d) {
+  int8x4_t s;
+  uint8x4_t u;
+
+  s = __builtin_arm_qadd8(a, b);
+  s = __builtin_arm_qsub8(a, b);
+  s = __builtin_arm_sadd8(a, b);
+  s = __builtin_arm_shadd8(a, b);
+  s = __builtin_arm_shsub8(a, b);
+  s = __builtin_arm_ssub8(a, b);
+
+  u = __builtin_arm_uadd8(c, d);
+  u = __builtin_arm_uhadd8(c, d);
+  u = __builtin_arm_uhsub8(c, d);
+  u = __builtin_arm_uqadd8(c, d);
+  u = __builtin_arm_uqsub8(c, d);
+  u = __builtin_arm_usub8(c, d);
+}
+
+void test_9_5_8_absolute_differences(uint8x4_t a, uint8x4_t b, uint32_t c) {
+  uint32_t r;
+
+  r = __builtin_arm_usad8(a, b);
+  r = __builtin_arm_usada8(a, b, c);
+}
+
+void test_9_5_9_parallel_addition_and_subtraction(int16x2_t a, int16x2_t b,
+                                                  uint16x2_t c, uint16x2_t d) {
+  int16x2_t x;
+  uint16x2_t y;
+
+  x = __builtin_arm_qadd16(a, b);
+  x = __builtin_arm_qasx(a, b);
+  x = __builtin_arm_qsax(a, b);
+  x = __builtin_arm_qsub16(a, b);
+  x = __builtin_arm_sadd16(a, b);
+  x = __builtin_arm_sasx(a, b);
+  x = __builtin_arm_shadd16(a, b);
+  x = __builtin_arm_shasx(a, b);
+  x = __builtin_arm_shsax(a, b);
+  x = __builtin_arm_shsub16(a, b);
+  x = __builtin_arm_ssax(a, b);
+  x = __builtin_arm_ssub16(a, b);
+
+  y = __builtin_arm_uadd16(c, d);
+  y = __builtin_arm_uasx(c, d);
+  y = __builtin_arm_uhadd16(c, d);
+  y = __builtin_arm_uhasx(c, d);
+  y = __builtin_arm_uhsax(c, d);
+  y = __builtin_arm_uhsub16(c, d);
+  y = __builtin_arm_uqadd16(c, d);
+  y = __builtin_arm_uqasx(c, d);
+  y = __builtin_arm_uqsax(c, d);
+  y = __builtin_arm_uqsub16(c, d);
+  y = __builtin_arm_usax(c, d);
+  y = __builtin_arm_usub16(c, d);
+}
+
+void test_9_5_10_parallel_16bit_multiplication(int16x2_t a, int16x2_t b,
+                                               int32_t c, int64_t d) {
+  int32_t x;
+  int64_t y;
+
+  x = __builtin_arm_smlad(a, b, c);
+  x = __builtin_arm_smladx(a, b, c);
+  y = __builtin_arm_smlald(a, b, d);
+  y = __builtin_arm_smlaldx(a, b, d);
+  x = __builtin_arm_smlsd(a, b, c);
+  x = __builtin_arm_smlsdx(a, b, c);
+  y = __builtin_arm_smlsld(a, b, d);
+  y = __builtin_arm_smlsldx(a, b, d);
+  x = __builtin_arm_smuad(a, b);
+  x = __builtin_arm_smuadx(a, b);
+  x = __builtin_arm_smusd(a, b);
+  x = __builtin_arm_smusdx(a, b);
+}
+
+void test_VFP(float f, double d) {
+  float fr;
+  double dr;
+
+  fr = __builtin_arm_vcvtr_f(f, 0);
+  fr = __builtin_arm_vcvtr_f(f, 1);
+  fr = __builtin_arm_vcvtr_f(f, -1); // expected-error {{argument should be a value from 0 to 1}}
+  fr = __builtin_arm_vcvtr_f(f, 2);  // expected-error {{argument should be a value from 0 to 1}}
+
+  dr = __builtin_arm_vcvtr_f(d, 0);
+  dr = __builtin_arm_vcvtr_f(d, 1);
+  dr = __builtin_arm_vcvtr_f(d, -1); // expected-error {{argument should be a value from 0 to 1}}
+  dr = __builtin_arm_vcvtr_f(d, 2);  // expected-error {{argument should be a value from 0 to 1}}
+}
diff --git a/test/Sema/builtins-x86.c b/test/Sema/builtins-x86.c
index 074efe1..472deeb 100644
--- a/test/Sema/builtins-x86.c
+++ b/test/Sema/builtins-x86.c
@@ -4,12 +4,17 @@
 typedef float __m128 __attribute__((__vector_size__(16)));
 typedef double __m128d __attribute__((__vector_size__(16)));
 
+typedef long long __m256i __attribute__((__vector_size__(32)));
+typedef float __m256 __attribute__((__vector_size__(32)));
+typedef double __m256d __attribute__((__vector_size__(32)));
+
 typedef long long __m512i __attribute__((__vector_size__(64)));
 typedef float __m512 __attribute__((__vector_size__(64)));
 typedef double __m512d __attribute__((__vector_size__(64)));
 
 typedef unsigned char __mmask8;
 typedef unsigned short __mmask16;
+typedef unsigned int __mmask32;
 
 __m128 test__builtin_ia32_cmpps(__m128 __a, __m128 __b) {
   __builtin_ia32_cmpps(__a, __b, 32); // expected-error {{argument should be a value from 0 to 31}}
@@ -83,3 +88,74 @@
   return __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument should be a value from 2 to 3}}
 }
 
+__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshldq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshldd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshldw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshrdq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshrdd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
+  return __builtin_ia32_vpshrdw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshldq256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshldq128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshldd256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshldd128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshldw256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshldw128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshrdq256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshrdq128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshrdd256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshrdd128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
+  return __builtin_ia32_vpshrdw256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
+  return __builtin_ia32_vpshrdw128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}}
+}
diff --git a/test/Sema/c2x-nodiscard.c b/test/Sema/c2x-nodiscard.c
index f128d3b..fc5b123 100644
--- a/test/Sema/c2x-nodiscard.c
+++ b/test/Sema/c2x-nodiscard.c
@@ -13,7 +13,7 @@
 [[nodiscard]] int f1(void);
 enum [[nodiscard]] E1 { One };
 
-[[nodiscard]] int i; // expected-warning {{'nodiscard' attribute only applies to functions, methods, enums, and classes}}
+[[nodiscard]] int i; // expected-warning {{'nodiscard' attribute only applies to Objective-C methods, enums, structs, unions, classes, functions, and function pointers}}
 
 struct [[nodiscard]] S4 {
   int i;
diff --git a/test/Sema/compare.c b/test/Sema/compare.c
index 97586a7..b2b486f 100644
--- a/test/Sema/compare.c
+++ b/test/Sema/compare.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -fsyntax-only -pedantic -verify -Wsign-compare %s -Wno-unreachable-code
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -fsyntax-only -pedantic -verify -Wsign-compare -Wtautological-constant-in-range-compare %s -Wno-unreachable-code
 
 int test(char *C) { // nothing here should warn.
   return C != ((void*)0);
@@ -391,3 +391,16 @@
 void test12(unsigned a) {
   if (0 && -1 > a) { }
 }
+
+// PR36008
+
+enum PR36008EnumTest {
+  kPR36008Value = 0,
+};
+
+void pr36008(enum PR36008EnumTest lhs) {
+  __typeof__(lhs) x = lhs;
+  __typeof__(kPR36008Value) y = (kPR36008Value);
+  if (x == y) x = y; // no warning
+  if (y == x) y = x; // no warning
+}
diff --git a/test/Sema/const-eval.c b/test/Sema/const-eval.c
index 1b0a325..d60296f 100644
--- a/test/Sema/const-eval.c
+++ b/test/Sema/const-eval.c
@@ -144,3 +144,11 @@
 void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;
 __int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c;
 void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];
+
+struct PR35214_X {
+  int k;
+  int arr[];
+};
+int PR35214_x;
+int PR35214_y = ((struct PR35214_X *)&PR35214_x)->arr[1]; // expected-error {{not a compile-time constant}}
+int *PR35214_z = &((struct PR35214_X *)&PR35214_x)->arr[1]; // ok, &PR35214_x + 2
diff --git a/test/Sema/constant-builtins-2.c b/test/Sema/constant-builtins-2.c
index a4baecb..40cfce1 100644
--- a/test/Sema/constant-builtins-2.c
+++ b/test/Sema/constant-builtins-2.c
@@ -5,24 +5,35 @@
 double       g0  = __builtin_huge_val();
 float        g1  = __builtin_huge_valf();
 long double  g2  = __builtin_huge_vall();
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g2_2 = __builtin_huge_valf128();
+#endif
 
 double       g3  = __builtin_inf();
 float        g4  = __builtin_inff();
 long double  g5  = __builtin_infl();
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g5_2 = __builtin_inff128();
+#endif
 
 double       g6  = __builtin_nan("");
 float        g7  = __builtin_nanf("");
 long double  g8  = __builtin_nanl("");
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g8_2 = __builtin_nanf128("");
+#endif
 
 // GCC constant folds these too (via native strtol):
 //double       g6_1  = __builtin_nan("1");
 //float        g7_1  = __builtin_nanf("1");
 //long double  g8_1  = __builtin_nanl("1");
 
-// APFloat doesn't have signalling NaN functions.
-//double       g9  = __builtin_nans("");
-//float        g10 = __builtin_nansf("");
-//long double  g11 = __builtin_nansl("");
+double       g9  = __builtin_nans("");
+float        g10 = __builtin_nansf("");
+long double  g11 = __builtin_nansl("");
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g11_2 = __builtin_nansf128("");
+#endif
 
 //int          g12 = __builtin_abs(-12);
 
@@ -32,10 +43,16 @@
 float        g14 = __builtin_fabsf(-12.f);
 // GCC doesn't eat this one.
 //long double  g15 = __builtin_fabsfl(-12.0L);
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g15_2 = __builtin_fabsf128(-12.q);
+#endif
 
 float        g16 = __builtin_copysign(1.0, -1.0);
 double       g17 = __builtin_copysignf(1.0f, -1.0f);
 long double  g18 = __builtin_copysignl(1.0L, -1.0L);
+#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
+__float128   g18_2 = __builtin_copysignf128(1.0q, -1.0q);
+#endif
 
 char classify_nan     [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nan(""))];
 char classify_snan    [__builtin_fpclassify(+1, -1, -1, -1, -1, __builtin_nans(""))];
diff --git a/test/Sema/conversion.c b/test/Sema/conversion.c
index 89c34c6..e55e834 100644
--- a/test/Sema/conversion.c
+++ b/test/Sema/conversion.c
@@ -429,3 +429,17 @@
     ushort16 crCbScale = pairedConstants.s4; // expected-warning {{implicit conversion loses integer precision: 'uint32_t' (aka 'unsigned int') to 'ushort16'}}
     ushort16 brBias = pairedConstants.s6; // expected-warning {{implicit conversion loses integer precision: 'uint32_t' (aka 'unsigned int') to 'ushort16'}}
 }
+
+
+float double2float_test1(double a) {
+    return a; // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'float'}}
+}
+
+void double2float_test2(double a, float *b) {
+    *b += a;
+}
+
+float sinf (float x);
+double double2float_test3(double a) {
+    return sinf(a); // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'float'}}
+}
diff --git a/test/Sema/cxx-as-c.c b/test/Sema/cxx-as-c.c
new file mode 100644
index 0000000..41d7350
--- /dev/null
+++ b/test/Sema/cxx-as-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -verify
+
+// PR36157
+struct Foo {
+  Foo(int n) : n_(n) {} // expected-error 1+{{}} expected-warning 1+{{}}
+private:
+  int n;
+};
+int main() { Foo f; } // expected-error 1+{{}}
diff --git a/test/Sema/darwin-tls.c b/test/Sema/darwin-tls.c
index 0fcf096..6279e0b 100644
--- a/test/Sema/darwin-tls.c
+++ b/test/Sema/darwin-tls.c
@@ -1,12 +1,18 @@
 // RUN: not %clang_cc1 -fsyntax-only -triple x86_64-apple-macosx10.6 %s 2>&1 | FileCheck %s --check-prefix NO-TLS
 // RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-macosx10.7 %s 2>&1 | FileCheck %s --check-prefix TLS
+
 // RUN: not %clang_cc1 -fsyntax-only -triple arm64-apple-ios7.1 %s 2>&1 | FileCheck %s --check-prefix NO-TLS
 // RUN: %clang_cc1 -fsyntax-only -triple arm64-apple-ios8.0 %s 2>&1 | FileCheck %s --check-prefix TLS
 // RUN: not %clang_cc1 -fsyntax-only -triple thumbv7s-apple-ios8.3 %s 2>&1 | FileCheck %s --check-prefix NO-TLS
 // RUN: %clang_cc1 -fsyntax-only -triple thumbv7s-apple-ios9.0 %s 2>&1 | FileCheck %s --check-prefix TLS
 // RUN: %clang_cc1 -fsyntax-only -triple armv7-apple-ios9.0 %s 2>&1 | FileCheck %s --check-prefix TLS
+// RUN: not %clang_cc1 -fsyntax-only -triple i386-apple-ios9.0-simulator %s 2>&1 | FileCheck %s --check-prefix NO-TLS
+// RUN: %clang_cc1 -fsyntax-only -triple i386-apple-ios10.0-simulator %s 2>&1 | FileCheck %s --check-prefix TLS
+
 // RUN: not %clang_cc1 -fsyntax-only -triple thumbv7k-apple-watchos1.0 %s 2>&1 | FileCheck %s --check-prefix NO-TLS
 // RUN: %clang_cc1 -fsyntax-only -triple thumbv7k-apple-watchos2.0 %s 2>&1 | FileCheck %s --check-prefix TLS
+// RUN: not %clang_cc1 -fsyntax-only -triple i386-apple-watchos2.0-simulator %s 2>&1 | FileCheck %s --check-prefix NO-TLS
+// RUN: %clang_cc1 -fsyntax-only -triple i386-apple-watchos3.0-simulator %s 2>&1 | FileCheck %s --check-prefix TLS
 
 
 __thread int a;
diff --git a/test/Sema/dllexport.c b/test/Sema/dllexport.c
index 7991a45..2e0fe0c 100644
--- a/test/Sema/dllexport.c
+++ b/test/Sema/dllexport.c
@@ -5,17 +5,17 @@
 
 // Invalid usage.
 __declspec(dllexport) typedef int typedef1;
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllexport) int typedef2;
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef int __declspec(dllexport) typedef3;
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef __declspec(dllexport) void (*FunTy)();
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 enum __declspec(dllexport) Enum { EnumVal };
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 struct __declspec(dllexport) Record {};
-// expected-warning@-1{{'dllexport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 
 
 
diff --git a/test/Sema/dllimport.c b/test/Sema/dllimport.c
index 53bf372..988a8e3 100644
--- a/test/Sema/dllimport.c
+++ b/test/Sema/dllimport.c
@@ -6,17 +6,17 @@
 
 // Invalid usage.
 __declspec(dllimport) typedef int typedef1;
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllimport) int typedef2;
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef int __declspec(dllimport) typedef3;
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef __declspec(dllimport) void (*FunTy)();
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 enum __declspec(dllimport) Enum { EnumVal };
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 struct __declspec(dllimport) Record {};
-// expected-warning@-1{{'dllimport' attribute only applies to variables and functions}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 
 
 
diff --git a/test/Sema/enum-sign-conversion.c b/test/Sema/enum-sign-conversion.c
new file mode 100644
index 0000000..518fc67
--- /dev/null
+++ b/test/Sema/enum-sign-conversion.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -verify -DUNSIGNED -Wsign-conversion %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -verify -Wsign-conversion %s
+
+// PR35200
+enum X { A,B,C};
+int f(enum X x) {
+#ifdef UNSIGNED
+  return x; // expected-warning {{implicit conversion changes signedness: 'enum X' to 'int'}}
+#else
+  // expected-no-diagnostics
+  return x;
+#endif
+}
diff --git a/test/Sema/error-type-safety.cpp b/test/Sema/error-type-safety.cpp
new file mode 100644
index 0000000..c5aa48f
--- /dev/null
+++ b/test/Sema/error-type-safety.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define INT_TAG 42
+
+static const int test_in
+    __attribute__((type_tag_for_datatype(test, int))) = INT_TAG;
+
+// Argument index: 1, Type tag index: 2
+void test_bounds_index(...)
+    __attribute__((argument_with_type_tag(test, 1, 2)));
+
+// Argument index: 1, Type tag index: 2
+void test_bounds_index_ptr(void *, ...)
+    __attribute__((pointer_with_type_tag(test, 1, 2)));
+
+// Argument index: 3, Type tag index: 1
+void test_bounds_arg_index(...)
+    __attribute__((argument_with_type_tag(test, 3, 1)));
+
+class C {
+public:
+  // Argument index: 2, Type tag index: 3
+  void test_bounds_index(...)
+      __attribute__((argument_with_type_tag(test, 2, 3)));
+
+  // Argument index: 2, Type tag index: 3
+  void test_bounds_index_ptr(void *, ...)
+      __attribute__((pointer_with_type_tag(test, 2, 3)));
+
+  // Argument index: 4, Type tag index: 2
+  void test_bounds_arg_index(...)
+      __attribute__((argument_with_type_tag(test, 4, 2)));
+};
+
+void test_bounds()
+{
+  C c;
+
+  // Test the boundary edges (ensure no off-by-one) with argument indexing.
+  test_bounds_index(1, INT_TAG);
+  c.test_bounds_index(1, INT_TAG);
+  test_bounds_index_ptr(0, INT_TAG);
+  c.test_bounds_index_ptr(0, INT_TAG);
+
+  test_bounds_index(1);       // expected-error {{type tag index 2 is greater than the number of arguments specified}}
+  c.test_bounds_index(1);     // expected-error {{type tag index 3 is greater than the number of arguments specified}}
+  test_bounds_index_ptr(0);   // expected-error {{type tag index 2 is greater than the number of arguments specified}}
+  c.test_bounds_index_ptr(0); // expected-error {{type tag index 3 is greater than the number of arguments specified}}
+
+  test_bounds_arg_index(INT_TAG, 1);   // expected-error {{argument index 3 is greater than the number of arguments specified}}
+  c.test_bounds_arg_index(INT_TAG, 1); // expected-error {{argument index 4 is greater than the number of arguments specified}}
+}
diff --git a/test/Sema/ext_vector_comparisons.c b/test/Sema/ext_vector_comparisons.c
index 605ba6c..4c632a4 100644
--- a/test/Sema/ext_vector_comparisons.c
+++ b/test/Sema/ext_vector_comparisons.c
@@ -6,12 +6,12 @@
   int4 vec, rv;
 
   // comparisons to self...
-  return vec == vec; // expected-warning{{self-comparison always evaluates to a constant}}
-  return vec != vec; // expected-warning{{self-comparison always evaluates to a constant}}
-  return vec < vec; // expected-warning{{self-comparison always evaluates to a constant}}
-  return vec <= vec; // expected-warning{{self-comparison always evaluates to a constant}}
-  return vec > vec; // expected-warning{{self-comparison always evaluates to a constant}}
-  return vec >= vec; // expected-warning{{self-comparison always evaluates to a constant}}
+  return vec == vec; // expected-warning{{self-comparison always evaluates to true}}
+  return vec != vec; // expected-warning{{self-comparison always evaluates to false}}
+  return vec < vec; // expected-warning{{self-comparison always evaluates to false}}
+  return vec <= vec; // expected-warning{{self-comparison always evaluates to true}}
+  return vec > vec; // expected-warning{{self-comparison always evaluates to false}}
+  return vec >= vec; // expected-warning{{self-comparison always evaluates to true}}
 }
 
 
diff --git a/test/Sema/float128-ld-incompatibility.cpp b/test/Sema/float128-ld-incompatibility.cpp
index d993ed7..13f5b6d 100644
--- a/test/Sema/float128-ld-incompatibility.cpp
+++ b/test/Sema/float128-ld-incompatibility.cpp
@@ -1,10 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 \
 // RUN: -triple powerpc64le-unknown-linux-gnu -target-cpu pwr8 \
 // RUN: -target-feature +float128 %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -triple x86_64-unknown-linux-gnu -Wno-unused-value -Wno-parentheses %s
 
 __float128 qf();
 long double ldf();
 
+#ifdef __PPC__
 // FIXME: once operations between long double and __float128 are implemented for
 //        targets where the types are different, these next two will change
 long double ld{qf()}; // expected-error {{cannot initialize a variable of type 'long double' with an rvalue of type '__float128'}}
@@ -17,6 +19,7 @@
 auto test2(long double a, __float128 b) -> decltype(a + b) { // expected-error {{invalid operands to binary expression ('long double' and '__float128')}}
   return a + b;      // expected-error {{invalid operands to binary expression ('long double' and '__float128')}}
 }
+#endif
 
 void test3(bool b) {
   long double ld;
diff --git a/test/Sema/internal_linkage.c b/test/Sema/internal_linkage.c
index f4deccc..57315d8 100644
--- a/test/Sema/internal_linkage.c
+++ b/test/Sema/internal_linkage.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fdouble-square-bracket-attributes %s
 
 int var __attribute__((internal_linkage));
 int var2 __attribute__((internal_linkage,common)); // expected-error{{'internal_linkage' and 'common' attributes are not compatible}} \
@@ -15,7 +15,13 @@
 int var5 __attribute__((common)); // expected-note{{conflicting attribute is here}}
 
 __attribute__((internal_linkage)) int f() {}
-struct __attribute__((internal_linkage)) S { // expected-warning{{'internal_linkage' attribute only applies to variables and functions}}
+struct __attribute__((internal_linkage)) S { // expected-warning{{'internal_linkage' attribute only applies to variables, functions, and classes}}
 };
 
 __attribute__((internal_linkage("foo"))) int g() {} // expected-error{{'internal_linkage' attribute takes no arguments}}
+
+int var6 [[clang::internal_linkage]];
+int var7 [[clang::internal_linkage]] __attribute__((common)); // expected-error{{'internal_linkage' and 'common' attributes are not compatible}} \
+                                                   // expected-note{{conflicting attribute is here}}
+__attribute__((common)) int var8 [[clang::internal_linkage]]; // expected-error{{'internal_linkage' and 'common' attributes are not compatible}} \
+                                                   // expected-note{{conflicting attribute is here}}
diff --git a/test/Sema/mms-bitfields.c b/test/Sema/mms-bitfields.c
index d238a7a..cee5b06 100644
--- a/test/Sema/mms-bitfields.c
+++ b/test/Sema/mms-bitfields.c
@@ -11,3 +11,18 @@
 
 // MS pads out bitfields between different types.
 static int arr[(sizeof(t) == 8) ? 1 : -1];
+
+#pragma pack (push,1)
+
+typedef unsigned int UINT32;
+
+struct Inner {
+  UINT32    A    :  1;
+  UINT32    B    :  1;
+  UINT32    C    :  1;
+  UINT32    D    : 30;
+} Inner;
+
+#pragma pack (pop)
+
+static int arr2[(sizeof(Inner) == 8) ? 1 : -1];
diff --git a/test/Sema/nonnull.c b/test/Sema/nonnull.c
index 217bbb1..b589bb3 100644
--- a/test/Sema/nonnull.c
+++ b/test/Sema/nonnull.c
@@ -37,7 +37,7 @@
 void *test_ptr_returns_nonnull(void) __attribute__((returns_nonnull)); // no-warning
 
 int i __attribute__((nonnull)); // expected-warning {{'nonnull' attribute only applies to functions, methods, and parameters}}
-int j __attribute__((returns_nonnull)); // expected-warning {{'returns_nonnull' attribute only applies to functions and methods}}
+int j __attribute__((returns_nonnull)); // expected-warning {{'returns_nonnull' attribute only applies to Objective-C methods and functions}}
 void *test_no_fn_proto() __attribute__((returns_nonnull)); // no-warning
 void *test_with_fn_proto(void) __attribute__((returns_nonnull)); // no-warning
 
diff --git a/test/Sema/outof-range-enum-constant-compare.c b/test/Sema/outof-range-enum-constant-compare.c
new file mode 100644
index 0000000..b9ce08f
--- /dev/null
+++ b/test/Sema/outof-range-enum-constant-compare.c
@@ -0,0 +1,379 @@
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -DUNSIGNED -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -DSIGNED -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -DUNSIGNED -DSILENCE -Wno-tautological-constant-out-of-range-compare -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -DSIGNED -DSILENCE -Wno-tautological-constant-out-of-range-compare -verify %s
+
+int main() {
+  enum A { A_a = 2 };
+  enum A a;
+
+#ifdef SILENCE
+  // expected-no-diagnostics
+#endif
+
+#ifdef UNSIGNED
+#ifndef SILENCE
+  if (a < 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296 >= a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a > 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296 <= a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a <= 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296 > a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a >= 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296 < a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a == 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296 != a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a != 4294967296) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296 == a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+
+  if (a < 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296U >= a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a > 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296U <= a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a <= 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296U > a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a >= 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296U < a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a == 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967296U != a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a != 4294967296U) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967296U == a) // expected-warning {{comparison of constant 4294967296 with expression of type 'enum A' is always false}}
+    return 0;
+#else // SILENCE
+  if (a < 4294967296)
+    return 0;
+  if (4294967296 >= a)
+    return 0;
+  if (a > 4294967296)
+    return 0;
+  if (4294967296 <= a)
+    return 0;
+  if (a <= 4294967296)
+    return 0;
+  if (4294967296 > a)
+    return 0;
+  if (a >= 4294967296)
+    return 0;
+  if (4294967296 < a)
+    return 0;
+  if (a == 4294967296)
+    return 0;
+  if (4294967296 != a)
+    return 0;
+  if (a != 4294967296)
+    return 0;
+  if (4294967296 == a)
+    return 0;
+
+  if (a < 4294967296U)
+    return 0;
+  if (4294967296U >= a)
+    return 0;
+  if (a > 4294967296U)
+    return 0;
+  if (4294967296U <= a)
+    return 0;
+  if (a <= 4294967296U)
+    return 0;
+  if (4294967296U > a)
+    return 0;
+  if (a >= 4294967296U)
+    return 0;
+  if (4294967296U < a)
+    return 0;
+  if (a == 4294967296U)
+    return 0;
+  if (4294967296U != a)
+    return 0;
+  if (a != 4294967296U)
+    return 0;
+  if (4294967296U == a)
+    return 0;
+#endif
+#elif defined(SIGNED)
+#ifndef SILENCE
+  if (a < -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+  if (-2147483649 >= a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a > -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (-2147483649 <= a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a <= -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+  if (-2147483649 > a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a >= -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (-2147483649 < a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a == -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+  if (-2147483649 != a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a != -2147483649) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always true}}
+    return 0;
+  if (-2147483649 == a) // expected-warning {{comparison of constant -2147483649 with expression of type 'enum A' is always false}}
+    return 0;
+
+  if (a < 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (2147483648 >= a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a > 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (2147483648 <= a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a <= 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (2147483648 > a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a >= 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (2147483648 < a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a == 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (2147483648 != a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a != 2147483648) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (2147483648 == a) // expected-warning {{comparison of constant 2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+#else // SILENCE
+  if (a < -2147483649)
+    return 0;
+  if (-2147483649 >= a)
+    return 0;
+  if (a > -2147483649)
+    return 0;
+  if (-2147483649 <= a)
+    return 0;
+  if (a <= -2147483649)
+    return 0;
+  if (-2147483649 > a)
+    return 0;
+  if (a >= -2147483649)
+    return 0;
+  if (-2147483649 < a)
+    return 0;
+  if (a == -2147483649)
+    return 0;
+  if (-2147483649 != a)
+    return 0;
+  if (a != -2147483649)
+    return 0;
+  if (-2147483649 == a)
+    return 0;
+
+  if (a < 2147483648)
+    return 0;
+  if (2147483648 >= a)
+    return 0;
+  if (a > 2147483648)
+    return 0;
+  if (2147483648 <= a)
+    return 0;
+  if (a <= 2147483648)
+    return 0;
+  if (2147483648 > a)
+    return 0;
+  if (a >= 2147483648)
+    return 0;
+  if (2147483648 < a)
+    return 0;
+  if (a == 2147483648)
+    return 0;
+  if (2147483648 != a)
+    return 0;
+  if (a != 2147483648)
+    return 0;
+  if (2147483648 == a)
+    return 0;
+#endif
+#endif
+}
+
+// https://bugs.llvm.org/show_bug.cgi?id=35009
+int PR35009() {
+  enum A { A_a = 2 };
+  enum A a;
+
+  // in C, this should not warn.
+
+  if (a < 1)
+    return 0;
+  if (1 >= a)
+    return 0;
+  if (a > 1)
+    return 0;
+  if (1 <= a)
+    return 0;
+  if (a <= 1)
+    return 0;
+  if (1 > a)
+    return 0;
+  if (a >= 1)
+    return 0;
+  if (1 < a)
+    return 0;
+  if (a == 1)
+    return 0;
+  if (1 != a)
+    return 0;
+  if (a != 1)
+    return 0;
+  if (1 == a)
+    return 0;
+
+  if (a < 1U)
+    return 0;
+  if (1U >= a)
+    return 0;
+  if (a > 1U)
+    return 0;
+  if (1U <= a)
+    return 0;
+  if (a <= 1U)
+    return 0;
+  if (1U > a)
+    return 0;
+  if (a >= 1U)
+    return 0;
+  if (1U < a)
+    return 0;
+  if (a == 1U)
+    return 0;
+  if (1U != a)
+    return 0;
+  if (a != 1U)
+    return 0;
+  if (1U == a)
+    return 0;
+
+  if (a < 2)
+    return 0;
+  if (2 >= a)
+    return 0;
+  if (a > 2)
+    return 0;
+  if (2 <= a)
+    return 0;
+  if (a <= 2)
+    return 0;
+  if (2 > a)
+    return 0;
+  if (a >= 2)
+    return 0;
+  if (2 < a)
+    return 0;
+  if (a == 2)
+    return 0;
+  if (2 != a)
+    return 0;
+  if (a != 2)
+    return 0;
+  if (2 == a)
+    return 0;
+
+  if (a < 2U)
+    return 0;
+  if (2U >= a)
+    return 0;
+  if (a > 2U)
+    return 0;
+  if (2U <= a)
+    return 0;
+  if (a <= 2U)
+    return 0;
+  if (2U > a)
+    return 0;
+  if (a >= 2U)
+    return 0;
+  if (2U < a)
+    return 0;
+  if (a == 2U)
+    return 0;
+  if (2U != a)
+    return 0;
+  if (a != 2U)
+    return 0;
+  if (2U == a)
+    return 0;
+
+  if (a < 3)
+    return 0;
+  if (3 >= a)
+    return 0;
+  if (a > 3)
+    return 0;
+  if (3 <= a)
+    return 0;
+  if (a <= 3)
+    return 0;
+  if (3 > a)
+    return 0;
+  if (a >= 3)
+    return 0;
+  if (3 < a)
+    return 0;
+  if (a == 3)
+    return 0;
+  if (3 != a)
+    return 0;
+  if (a != 3)
+    return 0;
+  if (3 == a)
+    return 0;
+
+  if (a < 3U)
+    return 0;
+  if (3U >= a)
+    return 0;
+  if (a > 3U)
+    return 0;
+  if (3U <= a)
+    return 0;
+  if (a <= 3U)
+    return 0;
+  if (3U > a)
+    return 0;
+  if (a >= 3U)
+    return 0;
+  if (3U < a)
+    return 0;
+  if (a == 3U)
+    return 0;
+  if (3U != a)
+    return 0;
+  if (a != 3U)
+    return 0;
+  if (3U == a)
+    return 0;
+
+  return 1;
+}
diff --git a/test/Sema/pragma-align-no-header-change-warning.c b/test/Sema/pragma-align-no-header-change-warning.c
new file mode 100644
index 0000000..dcd61d0
--- /dev/null
+++ b/test/Sema/pragma-align-no-header-change-warning.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple i686-apple-darwin9 -fsyntax-only -Wpragma-pack -I %S/Inputs -verify %s
+// expected-no-diagnostics
+
+#include "pragma-align-no-header-change-warning.h"
+
diff --git a/test/Sema/pragma-ms_struct.c b/test/Sema/pragma-ms_struct.c
index a2591b6..e10d49e 100644
--- a/test/Sema/pragma-ms_struct.c
+++ b/test/Sema/pragma-ms_struct.c
@@ -25,7 +25,7 @@
 } __attribute__((__ms_struct__)) t1;
 
 struct S {
-		   double __attribute__((ms_struct)) d;	// expected-warning {{'ms_struct' attribute only applies to struct or union}}
+		   double __attribute__((ms_struct)) d;	// expected-warning {{'ms_struct' attribute only applies to structs, unions, and classes}}
                    unsigned long bf_1 : 12;
                    unsigned long : 0;
                    unsigned long bf_2 : 12;
@@ -36,7 +36,7 @@
   A = 0,
   B,
   C
-} __attribute__((ms_struct)) e1; // expected-warning {{'ms_struct' attribute only applies to struct or union}}
+} __attribute__((ms_struct)) e1; // expected-warning {{'ms_struct' attribute only applies to}}
 
 // rdar://10513599
 #pragma ms_struct on
diff --git a/test/Sema/return.c b/test/Sema/return.c
index 2a58a6e..debf5ab 100644
--- a/test/Sema/return.c
+++ b/test/Sema/return.c
@@ -283,6 +283,18 @@
     goto lbl;
 }
 
+int test36a(int b) {
+  if (b)
+    return 43;
+  __builtin_unreachable();
+}
+
+int test36b(int b) {
+  if (b)
+    return 43;
+  __builtin_assume(0);
+}
+
 // PR19074.
 void abort(void) __attribute__((noreturn));
 #define av_assert0(cond) do {\
diff --git a/test/Sema/self-comparison.c b/test/Sema/self-comparison.c
index edb3a6a..bd7adcd 100644
--- a/test/Sema/self-comparison.c
+++ b/test/Sema/self-comparison.c
@@ -86,3 +86,8 @@
   return 1;
 }
 
+__attribute__((weak)) int weak_1[3];
+__attribute__((weak)) int weak_2[3];
+_Bool compare_weak() {
+  return weak_1 == weak_2;
+}
diff --git a/test/Sema/sign-compare-enum.c b/test/Sema/sign-compare-enum.c
new file mode 100644
index 0000000..fb67e32
--- /dev/null
+++ b/test/Sema/sign-compare-enum.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -verify -Wsign-compare %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -verify -Wsign-compare %s
+// RUN: %clang_cc1 -x c++ -triple=x86_64-pc-linux-gnu -fsyntax-only -verify -Wsign-compare %s
+// RUN: %clang_cc1 -x c++ -triple=x86_64-pc-win32 -fsyntax-only -verify -Wsign-compare %s
+
+// Check that -Wsign-compare is off by default.
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -verify -DSILENCE %s
+
+#ifdef SILENCE
+// expected-no-diagnostics
+#endif
+
+enum PosEnum {
+  A_a = 0,
+  A_b = 1,
+  A_c = 10
+};
+
+static const int message[] = {0, 1};
+
+int test_pos(enum PosEnum a) {
+  if (a < 2)
+    return 0;
+
+  // No warning, except in Windows C mode, where PosEnum is 'int' and it can
+  // take on any value according to the C standard.
+#if !defined(SILENCE) && defined(_WIN32) && !defined(__cplusplus)
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < 2U)
+    return 0;
+
+  unsigned uv = 2;
+#if !defined(SILENCE) && defined(_WIN32) && !defined(__cplusplus)
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < uv)
+    return 1;
+
+#if !defined(SILENCE) && defined(_WIN32) && !defined(__cplusplus)
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < sizeof(message)/sizeof(message[0]))
+    return 0;
+  return 4;
+}
+
+enum NegEnum {
+  NE_a = -1,
+  NE_b = 0,
+  NE_c = 10
+};
+
+int test_neg(enum NegEnum a) {
+  if (a < 2)
+    return 0;
+
+#ifndef SILENCE
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < 2U)
+    return 0;
+
+  unsigned uv = 2;
+#ifndef SILENCE
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < uv)
+    return 1;
+
+#ifndef SILENCE
+  // expected-warning@+2 {{comparison of integers of different signs}}
+#endif
+  if (a < sizeof(message)/sizeof(message[0]))
+    return 0;
+  return 4;
+}
diff --git a/test/Sema/tautological-constant-compare.c b/test/Sema/tautological-constant-compare.c
index b9ade2a..b242f35 100644
--- a/test/Sema/tautological-constant-compare.c
+++ b/test/Sema/tautological-constant-compare.c
@@ -1,7 +1,13 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -DTEST -verify %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wno-tautological-constant-compare -verify %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -DTEST -verify -x c++ %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wno-tautological-constant-compare -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-constant-in-range-compare -DTEST -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-constant-in-range-compare -DTEST -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify -x c++ %s
 
 int value(void);
 
@@ -94,15 +100,17 @@
   if (-32768 >= s)
       return 0;
 
+  // Note: both sides are promoted to unsigned long prior to the comparison, so
+  // it is perfectly possible for a short to compare greater than 32767UL.
   if (s == 32767UL)
       return 0;
   if (s != 32767UL)
       return 0;
   if (s < 32767UL)
       return 0;
-  if (s <= 32767UL) // expected-warning {{comparison 'short' <= 32767 is always true}}
+  if (s <= 32767UL)
       return 0;
-  if (s > 32767UL) // expected-warning {{comparison 'short' > 32767 is always false}}
+  if (s > 32767UL)
       return 0;
   if (s >= 32767UL)
       return 0;
@@ -111,13 +119,40 @@
       return 0;
   if (32767UL != s)
       return 0;
-  if (32767UL < s) // expected-warning {{comparison 32767 < 'short' is always false}}
+  if (32767UL < s)
       return 0;
   if (32767UL <= s)
       return 0;
   if (32767UL > s)
       return 0;
-  if (32767UL >= s) // expected-warning {{comparison 32767 >= 'short' is always true}}
+  if (32767UL >= s)
+      return 0;
+
+  enum { ULONG_MAX = (2UL * (unsigned long)__LONG_MAX__ + 1UL) };
+  if (s == 2UL * (unsigned long)__LONG_MAX__ + 1UL)
+      return 0;
+  if (s != 2UL * (unsigned long)__LONG_MAX__ + 1UL)
+      return 0;
+  if (s < 2UL * (unsigned long)__LONG_MAX__ + 1UL)
+      return 0;
+  if (s <= 2UL * (unsigned long)__LONG_MAX__ + 1UL) // expected-warning-re {{comparison 'short' <= {{.*}} is always true}}
+      return 0;
+  if (s > 2UL * (unsigned long)__LONG_MAX__ + 1UL) // expected-warning-re {{comparison 'short' > {{.*}} is always false}}
+      return 0;
+  if (s >= 2UL * (unsigned long)__LONG_MAX__ + 1UL)
+      return 0;
+
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL == s)
+      return 0;
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL != s)
+      return 0;
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL < s) // expected-warning-re {{comparison {{.*}} < 'short' is always false}}
+      return 0;
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL <= s)
+      return 0;
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL > s)
+      return 0;
+  if (2UL * (unsigned long)__LONG_MAX__ + 1UL >= s) // expected-warning-re {{comparison {{.*}} >= 'short' is always true}}
       return 0;
 
   // FIXME: assumes two's complement
@@ -281,8 +316,6 @@
   if (0 >= s)
     return 0;
 
-  // However the comparison with 0U would warn
-
   unsigned short us = value();
 
 #ifdef TEST
@@ -445,7 +478,7 @@
     return 0;
 
 #if __SIZEOF_INT128__
-  __int128 i128;
+  __int128 i128 = value();
   if (i128 == -1) // used to crash
       return 0;
 #endif
@@ -456,7 +489,7 @@
   no,
   maybe
   };
-  enum E e;
+  enum E e = (enum E)value();
 
   if (e == yes)
       return 0;
@@ -510,5 +543,23 @@
   if (maybe >= e)
       return 0;
 
+  // For the time being, use the declared type of bit-fields rather than their
+  // length when determining whether a value is in-range.
+  // FIXME: Reconsider this.
+  struct A {
+    int a : 3;
+    unsigned b : 3;
+    long c : 3;
+    unsigned long d : 3;
+  } a;
+  if (a.a < 3) {}
+  if (a.a < 4) {}
+  if (a.b < 7) {}
+  if (a.b < 8) {}
+  if (a.c < 3) {}
+  if (a.c < 4) {}
+  if (a.d < 7) {}
+  if (a.d < 8) {}
+
   return 1;
 }
diff --git a/test/Sema/tautological-constant-enum-compare.c b/test/Sema/tautological-constant-enum-compare.c
new file mode 100644
index 0000000..99481c7
--- /dev/null
+++ b/test/Sema/tautological-constant-enum-compare.c
@@ -0,0 +1,406 @@
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -DUNSIGNED -Wtautological-constant-in-range-compare -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -DSIGNED -Wtautological-constant-in-range-compare -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -DUNSIGNED -DSILENCE -Wno-tautological-constant-compare -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -DSIGNED -DSILENCE -Wno-tautological-constant-compare -verify %s
+
+int main() {
+  enum A { A_a = 2 };
+  enum A a;
+
+#ifdef SILENCE
+  // expected-no-diagnostics
+#else
+  // If we promote to unsigned, it doesn't matter whether the enum's underlying
+  // type was signed.
+  if (a < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+    return 0;
+  if (0U >= a)
+    return 0;
+  if (a > 0U)
+    return 0;
+  if (0U <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+    return 0;
+  if (a <= 0U)
+    return 0;
+  if (0U > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+    return 0;
+  if (a >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+    return 0;
+  if (0U < a)
+    return 0;
+
+  if (a < 4294967295U)
+    return 0;
+  if (4294967295U >= a) // expected-warning {{comparison 4294967295 >= 'enum A' is always true}}
+    return 0;
+  if (a > 4294967295U) // expected-warning {{comparison 'enum A' > 4294967295 is always false}}
+    return 0;
+  if (4294967295U <= a)
+    return 0;
+  if (a <= 4294967295U) // expected-warning {{comparison 'enum A' <= 4294967295 is always true}}
+    return 0;
+  if (4294967295U > a)
+    return 0;
+  if (a >= 4294967295U)
+    return 0;
+  if (4294967295U < a) // expected-warning {{comparison 4294967295 < 'enum A' is always false}}
+    return 0;
+
+  if (a < 2147483647U)
+    return 0;
+  if (2147483647U >= a)
+    return 0;
+  if (a > 2147483647U)
+    return 0;
+  if (2147483647U <= a)
+    return 0;
+  if (a <= 2147483647U)
+    return 0;
+  if (2147483647U > a)
+    return 0;
+  if (a >= 2147483647U)
+    return 0;
+  if (2147483647U < a)
+    return 0;
+#endif
+
+#if defined(UNSIGNED) && !defined(SILENCE)
+  if (a < 0) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+    return 0;
+  if (0 >= a)
+    return 0;
+  if (a > 0)
+    return 0;
+  if (0 <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+    return 0;
+  if (a <= 0)
+    return 0;
+  if (0 > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+    return 0;
+  if (a >= 0) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+    return 0;
+  if (0 < a)
+    return 0;
+
+  if (a < 4294967295)
+    return 0;
+  if (4294967295 >= a) // expected-warning {{comparison 4294967295 >= 'enum A' is always true}}
+    return 0;
+  if (a > 4294967295) // expected-warning {{comparison 'enum A' > 4294967295 is always false}}
+    return 0;
+  if (4294967295 <= a)
+    return 0;
+  if (a <= 4294967295) // expected-warning {{comparison 'enum A' <= 4294967295 is always true}}
+    return 0;
+  if (4294967295 > a)
+    return 0;
+  if (a >= 4294967295)
+    return 0;
+  if (4294967295 < a) // expected-warning {{comparison 4294967295 < 'enum A' is always false}}
+    return 0;
+#else // SIGNED || SILENCE
+  if (a < 0)
+    return 0;
+  if (0 >= a)
+    return 0;
+  if (a > 0)
+    return 0;
+  if (0 <= a)
+    return 0;
+  if (a <= 0)
+    return 0;
+  if (0 > a)
+    return 0;
+  if (a >= 0)
+    return 0;
+  if (0 < a)
+    return 0;
+
+#ifndef SILENCE
+  if (a < 4294967295) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967295 >= a) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a > 4294967295) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967295 <= a) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a <= 4294967295) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always true}}
+    return 0;
+  if (4294967295 > a) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a >= 4294967295) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always false}}
+    return 0;
+  if (4294967295 < a) // expected-warning {{comparison of constant 4294967295 with expression of type 'enum A' is always false}}
+    return 0;
+#else
+  if (a < 4294967295)
+    return 0;
+  if (4294967295 >= a)
+    return 0;
+  if (a > 4294967295)
+    return 0;
+  if (4294967295 <= a)
+    return 0;
+  if (a <= 4294967295)
+    return 0;
+  if (4294967295 > a)
+    return 0;
+  if (a >= 4294967295)
+    return 0;
+  if (4294967295 < a)
+    return 0;
+#endif
+#endif
+
+#if defined(SIGNED) && !defined(SILENCE)
+  if (a < -2147483648) // expected-warning {{comparison 'enum A' < -2147483648 is always false}}
+    return 0;
+  if (-2147483648 >= a)
+    return 0;
+  if (a > -2147483648)
+    return 0;
+  if (-2147483648 <= a) // expected-warning {{comparison -2147483648 <= 'enum A' is always true}}
+    return 0;
+  if (a <= -2147483648)
+    return 0;
+  if (-2147483648 > a) // expected-warning {{comparison -2147483648 > 'enum A' is always false}}
+    return 0;
+  if (a >= -2147483648) // expected-warning {{comparison 'enum A' >= -2147483648 is always true}}
+    return 0;
+  if (-2147483648 < a)
+    return 0;
+
+  if (a < 2147483647)
+    return 0;
+  if (2147483647 >= a) // expected-warning {{comparison 2147483647 >= 'enum A' is always true}}
+    return 0;
+  if (a > 2147483647) // expected-warning {{comparison 'enum A' > 2147483647 is always false}}
+    return 0;
+  if (2147483647 <= a)
+    return 0;
+  if (a <= 2147483647) // expected-warning {{comparison 'enum A' <= 2147483647 is always true}}
+    return 0;
+  if (2147483647 > a)
+    return 0;
+  if (a >= 2147483647)
+    return 0;
+  if (2147483647 < a) // expected-warning {{comparison 2147483647 < 'enum A' is always false}}
+    return 0;
+#elif defined(UNSIGNED) && !defined(SILENCE)
+#ifndef SILENCE
+  if (a < -2147483648) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (-2147483648 >= a) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a > -2147483648) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (-2147483648 <= a) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (a <= -2147483648) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (-2147483648 > a) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always false}}
+    return 0;
+  if (a >= -2147483648) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+  if (-2147483648 < a) // expected-warning {{comparison of constant -2147483648 with expression of type 'enum A' is always true}}
+    return 0;
+#else
+  if (a < -2147483648)
+    return 0;
+  if (-2147483648 >= a)
+    return 0;
+  if (a > -2147483648)
+    return 0;
+  if (-2147483648 <= a)
+    return 0;
+  if (a <= -2147483648)
+    return 0;
+  if (-2147483648 > a)
+    return 0;
+  if (a >= -2147483648)
+    return 0;
+  if (-2147483648 < a)
+    return 0;
+#endif
+
+  if (a < 2147483647)
+    return 0;
+  if (2147483647 >= a)
+    return 0;
+  if (a > 2147483647)
+    return 0;
+  if (2147483647 <= a)
+    return 0;
+  if (a <= 2147483647)
+    return 0;
+  if (2147483647 > a)
+    return 0;
+  if (a >= 2147483647)
+    return 0;
+  if (2147483647 < a)
+    return 0;
+#endif
+
+  return 1;
+}
+
+// https://bugs.llvm.org/show_bug.cgi?id=35009
+int PR35009() {
+  enum A { A_a = 2 };
+  enum A a;
+
+  // in C, this should not warn.
+
+  if (a < 1)
+    return 0;
+  if (1 >= a)
+    return 0;
+  if (a > 1)
+    return 0;
+  if (1 <= a)
+    return 0;
+  if (a <= 1)
+    return 0;
+  if (1 > a)
+    return 0;
+  if (a >= 1)
+    return 0;
+  if (1 < a)
+    return 0;
+  if (a == 1)
+    return 0;
+  if (1 != a)
+    return 0;
+  if (a != 1)
+    return 0;
+  if (1 == a)
+    return 0;
+
+  if (a < 1U)
+    return 0;
+  if (1U >= a)
+    return 0;
+  if (a > 1U)
+    return 0;
+  if (1U <= a)
+    return 0;
+  if (a <= 1U)
+    return 0;
+  if (1U > a)
+    return 0;
+  if (a >= 1U)
+    return 0;
+  if (1U < a)
+    return 0;
+  if (a == 1U)
+    return 0;
+  if (1U != a)
+    return 0;
+  if (a != 1U)
+    return 0;
+  if (1U == a)
+    return 0;
+
+  if (a < 2)
+    return 0;
+  if (2 >= a)
+    return 0;
+  if (a > 2)
+    return 0;
+  if (2 <= a)
+    return 0;
+  if (a <= 2)
+    return 0;
+  if (2 > a)
+    return 0;
+  if (a >= 2)
+    return 0;
+  if (2 < a)
+    return 0;
+  if (a == 2)
+    return 0;
+  if (2 != a)
+    return 0;
+  if (a != 2)
+    return 0;
+  if (2 == a)
+    return 0;
+
+  if (a < 2U)
+    return 0;
+  if (2U >= a)
+    return 0;
+  if (a > 2U)
+    return 0;
+  if (2U <= a)
+    return 0;
+  if (a <= 2U)
+    return 0;
+  if (2U > a)
+    return 0;
+  if (a >= 2U)
+    return 0;
+  if (2U < a)
+    return 0;
+  if (a == 2U)
+    return 0;
+  if (2U != a)
+    return 0;
+  if (a != 2U)
+    return 0;
+  if (2U == a)
+    return 0;
+
+  if (a < 3)
+    return 0;
+  if (3 >= a)
+    return 0;
+  if (a > 3)
+    return 0;
+  if (3 <= a)
+    return 0;
+  if (a <= 3)
+    return 0;
+  if (3 > a)
+    return 0;
+  if (a >= 3)
+    return 0;
+  if (3 < a)
+    return 0;
+  if (a == 3)
+    return 0;
+  if (3 != a)
+    return 0;
+  if (a != 3)
+    return 0;
+  if (3 == a)
+    return 0;
+
+  if (a < 3U)
+    return 0;
+  if (3U >= a)
+    return 0;
+  if (a > 3U)
+    return 0;
+  if (3U <= a)
+    return 0;
+  if (a <= 3U)
+    return 0;
+  if (3U > a)
+    return 0;
+  if (a >= 3U)
+    return 0;
+  if (3U < a)
+    return 0;
+  if (a == 3U)
+    return 0;
+  if (3U != a)
+    return 0;
+  if (a != 3U)
+    return 0;
+  if (3U == a)
+    return 0;
+
+  return 1;
+}
diff --git a/test/Sema/tautological-unsigned-enum-zero-compare.c b/test/Sema/tautological-unsigned-enum-zero-compare.c
index 49982a9..87a56aa 100644
--- a/test/Sema/tautological-unsigned-enum-zero-compare.c
+++ b/test/Sema/tautological-unsigned-enum-zero-compare.c
@@ -1,68 +1,127 @@
-// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -DALL_WARN -verify %s
-// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -DSIGN_WARN -verify %s
-// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only -Wno-tautological-unsigned-enum-zero-compare -verify %s
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only \
+// RUN:            -Wtautological-unsigned-enum-zero-compare \
+// RUN:            -verify=unsigned,unsigned-signed %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only \
+// RUN:            -Wtautological-unsigned-enum-zero-compare \
+// RUN:            -verify=unsigned-signed %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fsyntax-only \
+// RUN:            -verify=silence %s
 
 // Okay, this is where it gets complicated.
 // Then default enum sigdness is target-specific.
 // On windows, it is signed by default. We do not want to warn in that case.
 
 int main() {
-  enum A { A_foo, A_bar };
+  enum A { A_a = 0 };
   enum A a;
+  enum B { B_a = -1 };
+  enum B b;
 
-#ifdef ALL_WARN
-  if (a < 0) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  // silence-no-diagnostics
+
+  if (a < 0) // unsigned-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (a >= 0) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0 >= a)
     return 0;
-  if (0 <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a > 0)
     return 0;
-  if (0 > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0 <= a) // unsigned-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (a < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (a <= 0)
     return 0;
-  if (a >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0 > a) // unsigned-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a >= 0) // unsigned-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0 < a)
     return 0;
-#elif defined(SIGN_WARN)
-  if (a < 0) // ok
+
+  if (a < 0U) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (a >= 0) // ok
+  if (0U >= a)
     return 0;
-  if (0 <= a) // ok
+  if (a > 0U)
     return 0;
-  if (0 > a) // ok
+  if (0U <= a) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (a < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (a <= 0U)
     return 0;
-  if (a >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0U > a) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a >= 0U) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0U < a)
     return 0;
-#else
-  // expected-no-diagnostics
-  if (a < 0)
+
+  if (b < 0)
     return 0;
-  if (a >= 0)
+  if (0 >= b)
     return 0;
-  if (0 <= a)
+  if (b > 0)
     return 0;
-  if (0 > a)
+  if (0 <= b)
     return 0;
-  if (a < 0U)
+  if (b <= 0)
     return 0;
-  if (a >= 0U)
+  if (0 > b)
     return 0;
-  if (0U <= a)
+  if (b >= 0)
     return 0;
-  if (0U > a)
+  if (0 < b)
     return 0;
-#endif
+
+  if (b < 0U) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
+    return 0;
+  if (0U >= b)
+    return 0;
+  if (b > 0U)
+    return 0;
+  if (0U <= b) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
+    return 0;
+  if (b <= 0U)
+    return 0;
+  if (0U > b) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
+    return 0;
+  if (b >= 0U) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
+    return 0;
+  if (0U < b)
+    return 0;
+
+  if (a == 0)
+    return 0;
+  if (0 != a)
+    return 0;
+  if (a != 0)
+    return 0;
+  if (0 == a)
+    return 0;
+
+  if (a == 0U)
+    return 0;
+  if (0U != a)
+    return 0;
+  if (a != 0U)
+    return 0;
+  if (0U == a)
+    return 0;
+
+  if (b == 0)
+    return 0;
+  if (0 != b)
+    return 0;
+  if (b != 0)
+    return 0;
+  if (0 == b)
+    return 0;
+
+  if (b == 0U)
+    return 0;
+  if (0U != b)
+    return 0;
+  if (b != 0U)
+    return 0;
+  if (0U == b)
+    return 0;
 
   return 1;
 }
diff --git a/test/Sema/tautological-unsigned-enum-zero-compare.cpp b/test/Sema/tautological-unsigned-enum-zero-compare.cpp
index 3e78626..0b66c68 100644
--- a/test/Sema/tautological-unsigned-enum-zero-compare.cpp
+++ b/test/Sema/tautological-unsigned-enum-zero-compare.cpp
@@ -1,176 +1,148 @@
-// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-linux-gnu -fsyntax-only -DALL_WARN -verify %s
-// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-win32 -fsyntax-only -DSIGN_WARN -verify %s
-// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-win32 -fsyntax-only -Wno-tautological-unsigned-enum-zero-compare -verify %s
+// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-linux-gnu -fsyntax-only \
+// RUN:            -Wtautological-unsigned-enum-zero-compare \
+// RUN:            -verify=unsigned,unsigned-signed %s
+// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-win32 -fsyntax-only \
+// RUN:            -Wtautological-unsigned-enum-zero-compare \
+// RUN:            -verify=unsigned-signed %s
+// RUN: %clang_cc1 -std=c++11 -triple=x86_64-pc-win32 -fsyntax-only \
+// RUN:            -verify=silence %s
 
-// Okay, this is where it gets complicated.
-// Then default enum sigdness is target-specific.
-// On windows, it is signed by default. We do not want to warn in that case.
+// silence-no-diagnostics
 
 int main() {
-  enum A { A_foo, A_bar };
+  // On Windows, all enumerations have a fixed underlying type, which is 'int'
+  // if not otherwise specified, so A is identical to C on Windows. Otherwise,
+  // we follow the C++ rules, which say that the only valid values of A are 0
+  // and 1.
+  enum A { A_foo = 0, A_bar, };
   enum A a;
 
-  enum B : unsigned { B_foo, B_bar };
+  enum B : unsigned { B_foo = 0, B_bar, };
   enum B b;
 
-  enum C : signed { c_foo, c_bar };
+  enum C : signed { C_foo = 0, C_bar, };
   enum C c;
 
-#ifdef ALL_WARN
-  if (a < 0) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (a < 0) // unsigned-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (a >= 0) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0 >= a)
     return 0;
-  if (0 <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a > 0)
     return 0;
-  if (0 > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0 <= a) // unsigned-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (a < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (a <= 0)
     return 0;
-  if (a >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0 > a) // unsigned-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a >= 0) // unsigned-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0 < a)
     return 0;
 
-  if (b < 0) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  // FIXME: As below, the issue here is that the enumeration is promoted to
+  // unsigned.
+  if (a < 0U) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (b >= 0) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0U >= a)
     return 0;
-  if (0 <= b) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a > 0U)
     return 0;
-  if (0 > b) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0U <= a) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (b < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (a <= 0U)
     return 0;
-  if (b >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0U > a) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= b) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (a >= 0U) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > b) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0U < a)
     return 0;
 
-  if (c < 0) // ok
+  if (b < 0) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (c >= 0) // ok
+  if (0 >= b)
     return 0;
-  if (0 <= c) // ok
+  if (b > 0)
     return 0;
-  if (0 > c) // ok
+  if (0 <= b) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (c < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (b <= 0)
     return 0;
-  if (c >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0 > b) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= c) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (b >= 0) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > c) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
-    return 0;
-#elif defined(SIGN_WARN)
-  if (a < 0) // ok
-    return 0;
-  if (a >= 0) // ok
-    return 0;
-  if (0 <= a) // ok
-    return 0;
-  if (0 > a) // ok
-    return 0;
-  if (a < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
-    return 0;
-  if (a >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
-    return 0;
-  if (0U <= a) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
-    return 0;
-  if (0U > a) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0 < b)
     return 0;
 
-  if (b < 0) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (b < 0U) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (b >= 0) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0U >= b)
     return 0;
-  if (0 <= b) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (b > 0U)
     return 0;
-  if (0 > b) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
+  if (0U <= b) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
     return 0;
-  if (b < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
+  if (b <= 0U)
     return 0;
-  if (b >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
+  if (0U > b) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
     return 0;
-  if (0U <= b) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
+  if (b >= 0U) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
     return 0;
-  if (0U > b) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
-    return 0;
-
-  if (c < 0) // ok
-    return 0;
-  if (c >= 0) // ok
-    return 0;
-  if (0 <= c) // ok
-    return 0;
-  if (0 > c) // ok
-    return 0;
-  if (c < 0U) // expected-warning {{comparison of unsigned enum expression < 0 is always false}}
-    return 0;
-  if (c >= 0U) // expected-warning {{comparison of unsigned enum expression >= 0 is always true}}
-    return 0;
-  if (0U <= c) // expected-warning {{comparison of 0 <= unsigned enum expression is always true}}
-    return 0;
-  if (0U > c) // expected-warning {{comparison of 0 > unsigned enum expression is always false}}
-    return 0;
-#else
-  // expected-no-diagnostics
-  if (a < 0)
-    return 0;
-  if (a >= 0)
-    return 0;
-  if (0 <= a)
-    return 0;
-  if (0 > a)
-    return 0;
-  if (a < 0U)
-    return 0;
-  if (a >= 0U)
-    return 0;
-  if (0U <= a)
-    return 0;
-  if (0U > a)
-    return 0;
-
-  if (b < 0)
-    return 0;
-  if (b >= 0)
-    return 0;
-  if (0 <= b)
-    return 0;
-  if (0 > b)
-    return 0;
-  if (b < 0U)
-    return 0;
-  if (b >= 0U)
-    return 0;
-  if (0U <= b)
-    return 0;
-  if (0U > b)
+  if (0U < b)
     return 0;
 
   if (c < 0)
     return 0;
-  if (c >= 0)
+  if (0 >= c)
+    return 0;
+  if (c > 0)
     return 0;
   if (0 <= c)
     return 0;
+  if (c <= 0)
+    return 0;
   if (0 > c)
     return 0;
-  if (c < 0U)
+  if (c >= 0)
     return 0;
-  if (c >= 0U)
+  if (0 < c)
     return 0;
-  if (0U <= c)
+
+  // FIXME: These diagnostics are terrible. The issue here is that the signed
+  // enumeration value was promoted to an unsigned type.
+  if (c < 0U) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
     return 0;
-  if (0U > c)
+  if (0U >= c)
     return 0;
-#endif
+  if (c > 0U)
+    return 0;
+  if (0U <= c) // unsigned-signed-warning {{comparison of 0 <= unsigned enum expression is always true}}
+    return 0;
+  if (c <= 0U)
+    return 0;
+  if (0U > c) // unsigned-signed-warning {{comparison of 0 > unsigned enum expression is always false}}
+    return 0;
+  if (c >= 0U) // unsigned-signed-warning {{comparison of unsigned enum expression >= 0 is always true}}
+    return 0;
+  if (0U < c)
+    return 0;
 
   return 1;
 }
+
+namespace crash_enum_zero_width {
+int test() {
+  enum A : unsigned {
+    A_foo = 0
+  };
+  enum A a;
+
+  // used to crash in llvm::APSInt::getMaxValue()
+  if (a < 0) // unsigned-signed-warning {{comparison of unsigned enum expression < 0 is always false}}
+    return 0;
+
+  return 1;
+}
+} // namespace crash_enum_zero_width
diff --git a/test/Sema/tautological-unsigned-zero-compare.c b/test/Sema/tautological-unsigned-zero-compare.c
index e0611cb..4c9394e 100644
--- a/test/Sema/tautological-unsigned-zero-compare.c
+++ b/test/Sema/tautological-unsigned-zero-compare.c
@@ -1,7 +1,13 @@
-// RUN: %clang_cc1 -fsyntax-only -DTEST -verify %s
-// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-unsigned-zero-compare -verify %s
-// RUN: %clang_cc1 -fsyntax-only -DTEST -verify -x c++ %s
-// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-unsigned-zero-compare -verify -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only \
+// RUN:            -Wtautological-unsigned-zero-compare \
+// RUN:            -verify %s
+// RUN: %clang_cc1 -fsyntax-only \
+// RUN:            -verify=silence %s
+// RUN: %clang_cc1 -fsyntax-only \
+// RUN:            -Wtautological-unsigned-zero-compare \
+// RUN:            -verify -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only \
+// RUN:            -verify=silence -x c++ %s
 
 unsigned uvalue(void);
 signed int svalue(void);
@@ -13,13 +19,8 @@
 void TFunc() {
   // Make sure that we do warn for normal variables in template functions !
   unsigned char c = svalue();
-#ifdef TEST
   if (c < 0) // expected-warning {{comparison of unsigned expression < 0 is always false}}
       return;
-#else
-  if (c < 0)
-      return;
-#endif
 
   if (c < macro(0))
       return;
@@ -37,9 +38,39 @@
   TFunc<unsigned short>();
 #endif
 
+  short s = svalue();
+
   unsigned un = uvalue();
 
-#ifdef TEST
+  // silence-no-diagnostics
+
+  // Note: both sides are promoted to unsigned long prior to the comparison.
+  if (s == 0UL)
+      return 0;
+  if (s != 0UL)
+      return 0;
+  if (s < 0UL) // expected-warning {{comparison of unsigned expression < 0 is always false}}
+      return 0;
+  if (s <= 0UL)
+      return 0;
+  if (s > 0UL)
+      return 0;
+  if (s >= 0UL) // expected-warning {{comparison of unsigned expression >= 0 is always true}}
+      return 0;
+
+  if (0UL == s)
+      return 0;
+  if (0UL != s)
+      return 0;
+  if (0UL < s)
+      return 0;
+  if (0UL <= s) // expected-warning {{comparison of 0 <= unsigned expression is always true}}
+      return 0;
+  if (0UL > s) // expected-warning {{comparison of 0 > unsigned expression is always false}}
+      return 0;
+  if (0UL >= s)
+      return 0;
+
   if (un == 0)
       return 0;
   if (un != 0)
@@ -91,65 +122,10 @@
       return 0;
   if (0UL >= un)
       return 0;
-#else
-// expected-no-diagnostics
-  if (un == 0)
-      return 0;
-  if (un != 0)
-      return 0;
-  if (un < 0)
-      return 0;
-  if (un <= 0)
-      return 0;
-  if (un > 0)
-      return 0;
-  if (un >= 0)
-      return 0;
-
-  if (0 == un)
-      return 0;
-  if (0 != un)
-      return 0;
-  if (0 < un)
-      return 0;
-  if (0 <= un)
-      return 0;
-  if (0 > un)
-      return 0;
-  if (0 >= un)
-      return 0;
-
-  if (un == 0UL)
-      return 0;
-  if (un != 0UL)
-      return 0;
-  if (un < 0UL)
-      return 0;
-  if (un <= 0UL)
-      return 0;
-  if (un > 0UL)
-      return 0;
-  if (un >= 0UL)
-      return 0;
-
-  if (0UL == un)
-      return 0;
-  if (0UL != un)
-      return 0;
-  if (0UL < un)
-      return 0;
-  if (0UL <= un)
-      return 0;
-  if (0UL > un)
-      return 0;
-  if (0UL >= un)
-      return 0;
-#endif
 
 
   signed int a = svalue();
 
-#ifdef TEST
   if (a == 0)
       return 0;
   if (a != 0)
@@ -201,60 +177,6 @@
       return 0;
   if (0UL >= a)
       return 0;
-#else
-// expected-no-diagnostics
-  if (a == 0)
-      return 0;
-  if (a != 0)
-      return 0;
-  if (a < 0)
-      return 0;
-  if (a <= 0)
-      return 0;
-  if (a > 0)
-      return 0;
-  if (a >= 0)
-      return 0;
-
-  if (0 == a)
-      return 0;
-  if (0 != a)
-      return 0;
-  if (0 < a)
-      return 0;
-  if (0 <= a)
-      return 0;
-  if (0 > a)
-      return 0;
-  if (0 >= a)
-      return 0;
-
-  if (a == 0UL)
-      return 0;
-  if (a != 0UL)
-      return 0;
-  if (a < 0UL)
-      return 0;
-  if (a <= 0UL)
-      return 0;
-  if (a > 0UL)
-      return 0;
-  if (a >= 0UL)
-      return 0;
-
-  if (0UL == a)
-      return 0;
-  if (0UL != a)
-      return 0;
-  if (0UL < a)
-      return 0;
-  if (0UL <= a)
-      return 0;
-  if (0UL > a)
-      return 0;
-  if (0UL >= a)
-      return 0;
-#endif
 
 
   float fl = 0;
diff --git a/test/Sema/transparent-union.c b/test/Sema/transparent-union.c
index dfbadcf..7967535 100644
--- a/test/Sema/transparent-union.c
+++ b/test/Sema/transparent-union.c
@@ -43,6 +43,35 @@
 void fvpp(TU); // expected-note{{previous declaration is here}}
 void fvpp(void **v) {} // expected-error{{conflicting types}}
 
+/* Test redeclaring a function taking a transparent_union arg more than twice.
+   Merging different declarations depends on their order, vary order too. */
+
+void f_triple0(TU tu) {}
+void f_triple0(int *); // expected-note{{previous declaration is here}}
+void f_triple0(float *f); // expected-error{{conflicting types}}
+
+void f_triple1(int *);
+void f_triple1(TU tu) {} // expected-note{{previous definition is here}}
+void f_triple1(float *f); // expected-error{{conflicting types}}
+
+void f_triple2(int *); // expected-note{{previous declaration is here}}
+void f_triple2(float *f); // expected-error{{conflicting types}}
+void f_triple2(TU tu) {}
+
+/* Test calling redeclared function taking a transparent_union arg. */
+
+void f_callee(TU);
+void f_callee(int *i) {} // expected-note{{passing argument to parameter 'i' here}}
+
+void caller(void) {
+  TU tu;
+  f_callee(tu); // expected-error{{passing 'TU' to parameter of incompatible type 'int *'}}
+
+  int *i;
+  f_callee(i);
+}
+
+
 /* FIXME: we'd like to just use an "int" here and align it differently
    from the normal "int", but if we do so we lose the alignment
    information from the typedef within the compiler. */
@@ -102,7 +131,7 @@
 
 union pr30520v { void b; } __attribute__((transparent_union)); // expected-error {{field has incomplete type 'void'}}
 
-union pr30520a { int b[]; } __attribute__((transparent_union)); // expected-error {{field has incomplete type 'int []'}}
+union pr30520a { int b[]; } __attribute__((transparent_union)); // expected-error {{flexible array member 'b' in a union is not allowed}}
 
 // expected-note@+1 2 {{forward declaration of 'struct stb'}}
 union pr30520s { struct stb b; } __attribute__((transparent_union)); // expected-error {{field has incomplete type 'struct stb'}}
diff --git a/test/Sema/typedef-retain.c b/test/Sema/typedef-retain.c
index d216466..3d784ce 100644
--- a/test/Sema/typedef-retain.c
+++ b/test/Sema/typedef-retain.c
@@ -16,8 +16,8 @@
 typedef int a[5];
 void test3() {
   typedef const a b;
-  b r;
-  r[0]=10;  // expected-error {{read-only variable is not assignable}}
+  b r;       // expected-note {{variable 'r' declared const here}}
+  r[0] = 10; // expected-error {{cannot assign to variable 'r' with const-qualified type 'b' (aka 'int const[5]')}}
 }
 
 int test4(const a y) {
diff --git a/test/Sema/types.c b/test/Sema/types.c
index 9981be5..f44057d 100644
--- a/test/Sema/types.c
+++ b/test/Sema/types.c
@@ -75,7 +75,7 @@
 // no support for vector enum type
 enum { e_2 } x3 __attribute__((vector_size(64))); // expected-error {{invalid vector element type}}
 
-int x4 __attribute__((ext_vector_type(64)));  // expected-error {{'ext_vector_type' attribute only applies to types}}
+int x4 __attribute__((ext_vector_type(64)));  // expected-error {{'ext_vector_type' attribute only applies to typedefs}}
 
 // rdar://16492792
 typedef __attribute__ ((ext_vector_type(32),__aligned__(32))) unsigned char uchar32;
diff --git a/test/Sema/unused-expr.c b/test/Sema/unused-expr.c
index 58ad827..c574f5e 100644
--- a/test/Sema/unused-expr.c
+++ b/test/Sema/unused-expr.c
@@ -96,7 +96,7 @@
   return 0;
 }
 
-int t7 __attribute__ ((warn_unused_result)); // expected-warning {{'warn_unused_result' attribute only applies to functions}}
+int t7 __attribute__ ((warn_unused_result)); // expected-warning {{'warn_unused_result' attribute only applies to Objective-C methods, enums, structs, unions, classes, functions, and function pointers}}
 
 // PR4010
 int (*fn4)(void) __attribute__ ((warn_unused_result));
diff --git a/test/Sema/varargs.c b/test/Sema/varargs.c
index 0ade0cf..3fa4291 100644
--- a/test/Sema/varargs.c
+++ b/test/Sema/varargs.c
@@ -112,3 +112,12 @@
 #endif
   __builtin_va_end(va);
 }
+
+void f14(int e, ...) {
+  // expected-warning@+3 {{implicitly declaring library function 'va_start'}}
+  // expected-note@+2 {{include the header <stdarg.h>}}
+  // expected-error@+1 {{too few arguments to function call}}
+  va_start();
+  __builtin_va_list va;
+  va_start(va, e);
+}
diff --git a/test/Sema/vla.c b/test/Sema/vla.c
index b9576bf..c962749 100644
--- a/test/Sema/vla.c
+++ b/test/Sema/vla.c
@@ -68,3 +68,11 @@
 int TransformBug(int a) {
  return sizeof(*(int(*)[({ goto v; v: a;})]) 0); // expected-warning {{use of GNU statement expression extension}}
 }
+
+// PR36157
+struct {
+  int a[ // expected-error {{variable length array in struct}}
+    implicitly_declared() // expected-warning {{implicit declaration}}
+  ];
+};
+int (*use_implicitly_declared)() = implicitly_declared; // ok, was implicitly declared at file scope
diff --git a/test/Sema/warn-strict-prototypes.c b/test/Sema/warn-strict-prototypes.c
index a28f57d..0c23b3b 100644
--- a/test/Sema/warn-strict-prototypes.c
+++ b/test/Sema/warn-strict-prototypes.c
@@ -65,3 +65,9 @@
 void __attribute__((cdecl)) foo12(d) // expected-warning {{this old-style function definition is not preceded by a prototype}}
   short d;
 {}
+
+// No warnings for variadic functions. Overloadable attribute is required
+// to avoid err_ellipsis_first_param error.
+// rdar://problem/33251668
+void foo13(...) __attribute__((overloadable));
+void foo13(...) __attribute__((overloadable)) {}
diff --git a/test/Sema/warn-thread-safety-analysis.c b/test/Sema/warn-thread-safety-analysis.c
index 425ce4c..0a375b8 100644
--- a/test/Sema/warn-thread-safety-analysis.c
+++ b/test/Sema/warn-thread-safety-analysis.c
@@ -130,4 +130,4 @@
 
 // We had a problem where we'd skip all attributes that follow a late-parsed
 // attribute in a single __attribute__.
-void run() __attribute__((guarded_by(mu1), guarded_by(mu1))); // expected-warning 2{{only applies to fields and global variables}}
+void run() __attribute__((guarded_by(mu1), guarded_by(mu1))); // expected-warning 2{{only applies to non-static data members and global variables}}
diff --git a/test/Sema/warn-type-safety.c b/test/Sema/warn-type-safety.c
index 0431386..da91473 100644
--- a/test/Sema/warn-type-safety.c
+++ b/test/Sema/warn-type-safety.c
@@ -7,7 +7,7 @@
 typedef struct A *MPI_Datatype;
 
 int wrong1(void *buf, MPI_Datatype datatype)
-    __attribute__(( pointer_with_type_tag )); // expected-error {{'pointer_with_type_tag' attribute requires parameter 1 to be an identifier}}
+    __attribute__(( pointer_with_type_tag )); // expected-error {{'pointer_with_type_tag' attribute requires exactly 3 arguments}}
 
 int wrong2(void *buf, MPI_Datatype datatype)
     __attribute__(( pointer_with_type_tag(mpi,0,7) )); // expected-error {{attribute parameter 2 is out of bounds}}
@@ -32,11 +32,15 @@
 int wrong8(void *buf, MPI_Datatype datatype)
     __attribute__(( pointer_with_type_tag(mpi,1,x) )); // expected-error {{attribute requires parameter 3 to be an integer constant}}
 
-int wrong9 __attribute__(( pointer_with_type_tag(mpi,1,2) )); // expected-error {{attribute only applies to functions and methods}}
+int wrong9 __attribute__(( pointer_with_type_tag(mpi,1,2) )); // expected-error {{'pointer_with_type_tag' attribute only applies to non-K&R-style functions}}
 
 int wrong10(double buf, MPI_Datatype type)
     __attribute__(( pointer_with_type_tag(mpi,1,2) )); // expected-error {{'pointer_with_type_tag' attribute only applies to pointer arguments}}
 
+int ok11(void *, ...)
+    __attribute__(( pointer_with_type_tag(mpi,1,2) ));
+int wrong11(void *, ...)
+    __attribute__(( pointer_with_type_tag(mpi,2,3) )); // expected-error {{'pointer_with_type_tag' attribute only applies to pointer arguments}}
 
 extern struct A datatype_wrong1
     __attribute__(( type_tag_for_datatype )); // expected-error {{'type_tag_for_datatype' attribute requires parameter 1 to be an identifier}}
diff --git a/test/Sema/warn-unreachable.c b/test/Sema/warn-unreachable.c
index 440aa0a..aec3b07 100644
--- a/test/Sema/warn-unreachable.c
+++ b/test/Sema/warn-unreachable.c
@@ -468,6 +468,7 @@
   else
     return x;
   __builtin_unreachable(); // expected no warning
+  __builtin_assume(0); // expected no warning
 }
 
 int pr13910_bar(int x) {
@@ -485,16 +486,19 @@
     return x;
   pr13910_foo(x);          // expected-warning {{code will never be executed}}
   __builtin_unreachable(); // expected no warning
+  __builtin_assume(0);     // expected no warning
   pr13910_foo(x);          // expected-warning {{code will never be executed}}
 }
 
 void pr13910_noreturn() {
   raze();
   __builtin_unreachable(); // expected no warning
+  __builtin_assume(0); // expected no warning
 }
 
 void pr13910_assert() {
   myassert(0 && "unreachable");
   return;
   __builtin_unreachable(); // expected no warning
+  __builtin_assume(0); // expected no warning
 }
diff --git a/test/Sema/xray-always-instrument-attr.c b/test/Sema/xray-always-instrument-attr.c
index 3c063e2..b523690 100644
--- a/test/Sema/xray-always-instrument-attr.c
+++ b/test/Sema/xray-always-instrument-attr.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -verify -fsyntax-only -std=c11
 void foo() __attribute__((xray_always_instrument));
 
-struct __attribute__((xray_always_instrument)) a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and methods}}
+struct __attribute__((xray_always_instrument)) a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and Objective-C methods}}
 
 void bar() __attribute__((xray_always_instrument("not-supported"))); // expected-error {{'xray_always_instrument' attribute takes no arguments}}
diff --git a/test/Sema/xray-always-instrument-attr.cpp b/test/Sema/xray-always-instrument-attr.cpp
index 8d42837..d6a3395 100644
--- a/test/Sema/xray-always-instrument-attr.cpp
+++ b/test/Sema/xray-always-instrument-attr.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 %s -verify -fsyntax-only -std=c++11 -x c++
 void foo [[clang::xray_always_instrument]] ();
 
-struct [[clang::xray_always_instrument]] a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and methods}}
+struct [[clang::xray_always_instrument]] a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and Objective-C methods}}
 
 class b {
  void c [[clang::xray_always_instrument]] ();
diff --git a/test/Sema/xray-log-args-oob.c b/test/Sema/xray-log-args-oob.c
index a6be0f8..585f5fd 100644
--- a/test/Sema/xray-log-args-oob.c
+++ b/test/Sema/xray-log-args-oob.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -verify -fsyntax-only -std=c11
 void foo(int) __attribute__((xray_log_args(1)));
-struct __attribute__((xray_log_args(1))) a { int x; }; // expected-warning {{'xray_log_args' attribute only applies to functions and methods}}
+struct __attribute__((xray_log_args(1))) a { int x; }; // expected-warning {{'xray_log_args' attribute only applies to functions and Objective-C methods}}
 
 void fop() __attribute__((xray_log_args(1))); // expected-error {{'xray_log_args' attribute parameter 1 is out of bounds}}
 
diff --git a/test/Sema/xray-log-args-oob.cpp b/test/Sema/xray-log-args-oob.cpp
index 414bce0..82f3be3 100644
--- a/test/Sema/xray-log-args-oob.cpp
+++ b/test/Sema/xray-log-args-oob.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -verify -fsyntax-only -std=c++11 -x c++
 void foo [[clang::xray_log_args(1)]] (int);
-struct [[clang::xray_log_args(1)]] a { int x; }; // expected-warning {{'xray_log_args' attribute only applies to functions and methods}}
+struct [[clang::xray_log_args(1)]] a { int x; }; // expected-warning {{'xray_log_args' attribute only applies to functions and Objective-C methods}}
 
 void fop [[clang::xray_log_args(1)]] (); // expected-error {{'xray_log_args' attribute parameter 1 is out of bounds}}
 
diff --git a/test/SemaCUDA/call-stack-for-deferred-err.cu b/test/SemaCUDA/call-stack-for-deferred-err.cu
index ddcaabf..35f71dd 100644
--- a/test/SemaCUDA/call-stack-for-deferred-err.cu
+++ b/test/SemaCUDA/call-stack-for-deferred-err.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -fsyntax-only -verify %s
 
 #include "Inputs/cuda.h"
 
diff --git a/test/SemaCUDA/extern-shared.cu b/test/SemaCUDA/extern-shared.cu
index 9fd7bbb..2c0c6e0 100644
--- a/test/SemaCUDA/extern-shared.cu
+++ b/test/SemaCUDA/extern-shared.cu
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
 // RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -fcuda-rdc -verify=rdc %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-rdc -verify=rdc %s
+// These declarations are fine in separate compilation mode:
+// rdc-no-diagnostics
+
 #include "Inputs/cuda.h"
 
 __device__ void foo() {
diff --git a/test/SemaCUDA/launch_bounds.cu b/test/SemaCUDA/launch_bounds.cu
index 468954a..0ca0c01 100644
--- a/test/SemaCUDA/launch_bounds.cu
+++ b/test/SemaCUDA/launch_bounds.cu
@@ -15,7 +15,7 @@
 __launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
 __launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
 
-int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
+int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
 
 __launch_bounds__(true) void TestBool(void);
 __launch_bounds__(128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}}
diff --git a/test/SemaCUDA/no-call-stack-for-immediate-errs.cu b/test/SemaCUDA/no-call-stack-for-immediate-errs.cu
index 6dc9869..f7e9eb7 100644
--- a/test/SemaCUDA/no-call-stack-for-immediate-errs.cu
+++ b/test/SemaCUDA/no-call-stack-for-immediate-errs.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -fsyntax-only -verify %s
 
 #include "Inputs/cuda.h"
 
diff --git a/test/SemaCUDA/vla.cu b/test/SemaCUDA/vla.cu
index f0d1ba5..b65ae5e 100644
--- a/test/SemaCUDA/vla.cu
+++ b/test/SemaCUDA/vla.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -verify -DHOST %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -verify -DHOST %s
 
 #include "Inputs/cuda.h"
 
diff --git a/test/SemaCXX/Inputs/warn-zero-nullptr.h b/test/SemaCXX/Inputs/warn-zero-nullptr.h
new file mode 100644
index 0000000..9b86c4b
--- /dev/null
+++ b/test/SemaCXX/Inputs/warn-zero-nullptr.h
@@ -0,0 +1,3 @@
+#define NULL (0)
+#define SYSTEM_MACRO (0)
+#define OTHER_SYSTEM_MACRO (NULL)
diff --git a/test/SemaCXX/accessible-base.cpp b/test/SemaCXX/accessible-base.cpp
index 6bf06ce..2796985 100644
--- a/test/SemaCXX/accessible-base.cpp
+++ b/test/SemaCXX/accessible-base.cpp
@@ -1,10 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DMS -fms-compatibility -Wmicrosoft-inaccessible-base -fsyntax-only -verify %s
 
 struct A {
   int a;
 };
 
-struct X1 : virtual A 
+struct X1 : virtual A
 {};
 
 struct Y1 : X1, virtual A
@@ -13,7 +14,7 @@
 struct Y2 : X1, A // expected-warning{{direct base 'A' is inaccessible due to ambiguity:\n    struct Y2 -> struct X1 -> struct A\n    struct Y2 -> struct A}}
 {};
 
-struct X2 : A 
+struct X2 : A
 {};
 
 struct Z1 : X2, virtual A // expected-warning{{direct base 'A' is inaccessible due to ambiguity:\n    struct Z1 -> struct X2 -> struct A\n    struct Z1 -> struct A}}
@@ -21,3 +22,30 @@
 
 struct Z2 : X2, A // expected-warning{{direct base 'A' is inaccessible due to ambiguity:\n    struct Z2 -> struct X2 -> struct A\n    struct Z2 -> struct A}}
 {};
+
+A *y2_to_a(Y2 *p) {
+#ifdef MS
+  // expected-warning@+4 {{accessing inaccessible direct base 'A' of 'Y2' is a Microsoft extension}}
+#else
+  // expected-error@+2 {{ambiguous conversion}}
+#endif
+  return p;
+}
+
+A *z1_to_a(Z1 *p) {
+#ifdef MS
+  // expected-warning@+4 {{accessing inaccessible direct base 'A' of 'Z1' is a Microsoft extension}}
+#else
+  // expected-error@+2 {{ambiguous conversion}}
+#endif
+  return p;
+}
+
+A *z1_to_a(Z2 *p) {
+#ifdef MS
+  // expected-warning@+4 {{accessing inaccessible direct base 'A' of 'Z2' is a Microsoft extension}}
+#else
+  // expected-error@+2 {{ambiguous conversion}}
+#endif
+  return p;
+}
diff --git a/test/SemaCXX/address-packed.cpp b/test/SemaCXX/address-packed.cpp
index 308c485..f0d1496 100644
--- a/test/SemaCXX/address-packed.cpp
+++ b/test/SemaCXX/address-packed.cpp
@@ -112,3 +112,12 @@
   S<float> s3;
   s3.get(); // expected-note {{in instantiation of member function 'S<float>::get'}}
 }
+
+// PR35509
+typedef long L1;
+struct Incomplete;
+struct S2 {
+  L1 d;
+  Incomplete *e() const;
+} __attribute__((packed));
+Incomplete *S2::e() const { return (Incomplete *)&d; } // no-warning
diff --git a/test/SemaCXX/ast-print-crash.cpp b/test/SemaCXX/ast-print-crash.cpp
new file mode 100644
index 0000000..c108f66
--- /dev/null
+++ b/test/SemaCXX/ast-print-crash.cpp
@@ -0,0 +1,12 @@
+// RUN: not %clang_cc1 -triple %ms_abi_triple -ast-print %s -std=gnu++11 \
+// RUN:     | FileCheck %s
+
+// The test compiles a file with a syntax error which used to cause a crash with
+// -ast-print. Compilation fails due to the syntax error, but compiler should
+// not crash and print out whatever it manager to parse.
+
+// CHECK:      struct {
+// CHECK-NEXT: } dont_crash_on_syntax_error;
+// CHECK-NEXT: decltype(nullptr) p;
+struct {
+} dont_crash_on_syntax_error /* missing ; */ decltype(nullptr) p;
diff --git a/test/SemaCXX/attr-cxx-disabled.cpp b/test/SemaCXX/attr-cxx-disabled.cpp
new file mode 100644
index 0000000..a1a7533
--- /dev/null
+++ b/test/SemaCXX/attr-cxx-disabled.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -fsyntax-only -fno-double-square-bracket-attributes -verify -pedantic -std=c++11 -DERRORS %s
+// RUN: %clang_cc1 -fsyntax-only -fdouble-square-bracket-attributes -verify -pedantic -std=c++11 %s
+
+struct [[]] S {};
+
+#ifdef ERRORS
+// expected-error@-3 {{declaration of anonymous struct must be a definition}}
+// expected-warning@-4 {{declaration does not declare anything}}
+#else
+// expected-no-diagnostics
+#endif
+
diff --git a/test/SemaCXX/attr-lto-visibility-public.cpp b/test/SemaCXX/attr-lto-visibility-public.cpp
index 2f9ed87..42e0f79 100644
--- a/test/SemaCXX/attr-lto-visibility-public.cpp
+++ b/test/SemaCXX/attr-lto-visibility-public.cpp
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
 
-int i [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to struct, union or class}}
-typedef int t [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to struct, union or class}}
-[[clang::lto_visibility_public]] void f(); // expected-warning {{'lto_visibility_public' attribute only applies to struct, union or class}}
+int i [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to structs, unions, and classes}}
+typedef int t [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to}}
+[[clang::lto_visibility_public]] void f(); // expected-warning {{'lto_visibility_public' attribute only applies to}}
 void f() [[clang::lto_visibility_public]]; // expected-error {{'lto_visibility_public' attribute cannot be applied to types}}
 
 struct [[clang::lto_visibility_public]] s1 {
-  int i [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to struct, union or class}}
-  [[clang::lto_visibility_public]] void f(); // expected-warning {{'lto_visibility_public' attribute only applies to struct, union or class}}
+  int i [[clang::lto_visibility_public]]; // expected-warning {{'lto_visibility_public' attribute only applies to}}
+  [[clang::lto_visibility_public]] void f(); // expected-warning {{'lto_visibility_public' attribute only applies to}}
 };
 
 struct [[clang::lto_visibility_public(1)]] s2 { // expected-error {{'lto_visibility_public' attribute takes no arguments}}
diff --git a/test/SemaCXX/attr-mode-tmpl.cpp b/test/SemaCXX/attr-mode-tmpl.cpp
index d83bb39..f665b1b 100644
--- a/test/SemaCXX/attr-mode-tmpl.cpp
+++ b/test/SemaCXX/attr-mode-tmpl.cpp
@@ -72,7 +72,7 @@
                                               // expected-warning@-1{{deprecated}}
 
   // Check attribute on methods - it is invalid.
-  __attribute__((mode(QI))) T g1() { return 0; } // expected-error{{'mode' attribute only applies to variables, enums, fields and typedefs}}
+  __attribute__((mode(QI))) T g1() { return 0; } // expected-error{{'mode' attribute only applies to variables, enums, typedefs, and non-static data members}}
 };
 
 
diff --git a/test/SemaCXX/attr-no-sanitize.cpp b/test/SemaCXX/attr-no-sanitize.cpp
index 965def6..02bc9a9 100644
--- a/test/SemaCXX/attr-no-sanitize.cpp
+++ b/test/SemaCXX/attr-no-sanitize.cpp
@@ -16,10 +16,15 @@
 // PRINT: int f4() {{\[\[}}clang::no_sanitize("thread")]]
 [[clang::no_sanitize("thread")]] int f4();
 
+// DUMP-LABEL: FunctionDecl {{.*}} f4
+// DUMP: NoSanitizeAttr {{.*}} hwaddress
+// PRINT: int f4() {{\[\[}}clang::no_sanitize("hwaddress")]]
+[[clang::no_sanitize("hwaddress")]] int f4();
+
 // DUMP-LABEL: FunctionDecl {{.*}} f5
-// DUMP: NoSanitizeAttr {{.*}} address thread
-// PRINT: int f5() __attribute__((no_sanitize("address", "thread")))
-int f5() __attribute__((no_sanitize("address", "thread")));
+// DUMP: NoSanitizeAttr {{.*}} address thread hwaddress
+// PRINT: int f5() __attribute__((no_sanitize("address", "thread", "hwaddress")))
+int f5() __attribute__((no_sanitize("address", "thread", "hwaddress")));
 
 // DUMP-LABEL: FunctionDecl {{.*}} f6
 // DUMP: NoSanitizeAttr {{.*}} unknown
diff --git a/test/SemaCXX/attr-require-constant-initialization.cpp b/test/SemaCXX/attr-require-constant-initialization.cpp
index 0df9f2e..2dd72ea 100644
--- a/test/SemaCXX/attr-require-constant-initialization.cpp
+++ b/test/SemaCXX/attr-require-constant-initialization.cpp
@@ -56,12 +56,12 @@
 #if defined(TEST_ONE) // Test semantics of attribute
 
 // Test diagnostics when attribute is applied to non-static declarations.
-void test_func_local(ATTR int param) { // expected-error {{only applies to variables with static or thread}}
-  ATTR int x = 42;                     // expected-error {{only applies to variables with static or thread}}
+void test_func_local(ATTR int param) { // expected-error {{only applies to global variables}}
+  ATTR int x = 42;                     // expected-error {{only applies to}}
   ATTR extern int y;
 }
-struct ATTR class_mem { // expected-error {{only applies to variables with static or thread}}
-  ATTR int x;           // expected-error {{only applies to variables with static or thread}}
+struct ATTR class_mem { // expected-error {{only applies to}}
+  ATTR int x;           // expected-error {{only applies to}}
 };
 
 // [basic.start.static]p2.1
diff --git a/test/SemaCXX/attr-target-mv.cpp b/test/SemaCXX/attr-target-mv.cpp
new file mode 100644
index 0000000..11f3a27
--- /dev/null
+++ b/test/SemaCXX/attr-target-mv.cpp
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu  -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14
+void __attribute__((target("default"))) invalid_features(void);
+//expected-error@+2 {{function declaration is missing 'target' attribute in a multiversioned function}}
+//expected-warning@+1 {{unsupported 'hello_world' in the 'target' attribute string; 'target' attribute ignored}}
+void __attribute__((target("hello_world"))) invalid_features(void);
+//expected-error@+1 {{function multiversioning doesn't support feature 'no-sse4.2'}}
+void __attribute__((target("no-sse4.2"))) invalid_features(void);
+
+void __attribute__((target("sse4.2"))) no_default(void);
+void __attribute__((target("arch=sandybridge")))  no_default(void);
+
+void use1(void){
+  // expected-error@+1 {{no matching function for call to 'no_default'}}
+  no_default();
+}
+constexpr int __attribute__((target("sse4.2"))) foo(void) { return 0; }
+constexpr int __attribute__((target("arch=sandybridge"))) foo(void);
+//expected-error@+1 {{multiversioned function declaration has a different constexpr specification}}
+int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;}
+constexpr int __attribute__((target("default"))) foo(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) foo2(void) { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different constexpr specification}}
+constexpr int __attribute__((target("arch=sandybridge"))) foo2(void);
+int __attribute__((target("arch=ivybridge"))) foo2(void) {return 1;}
+int __attribute__((target("default"))) foo2(void) { return 2; }
+
+static int __attribute__((target("sse4.2"))) bar(void) { return 0; }
+static int __attribute__((target("arch=sandybridge"))) bar(void);
+//expected-error@+1 {{multiversioned function declaration has a different storage class}}
+int __attribute__((target("arch=ivybridge"))) bar(void) {return 1;}
+static int __attribute__((target("default"))) bar(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) bar2(void) { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different storage class}}
+static int __attribute__((target("arch=sandybridge"))) bar2(void);
+int __attribute__((target("arch=ivybridge"))) bar2(void) {return 1;}
+int __attribute__((target("default"))) bar2(void) { return 2; }
+
+
+inline int __attribute__((target("sse4.2"))) baz(void) { return 0; }
+inline int __attribute__((target("arch=sandybridge"))) baz(void);
+//expected-error@+1 {{multiversioned function declaration has a different inline specification}}
+int __attribute__((target("arch=ivybridge"))) baz(void) {return 1;}
+inline int __attribute__((target("default"))) baz(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) baz2(void) { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different inline specification}}
+inline int __attribute__((target("arch=sandybridge"))) baz2(void);
+int __attribute__((target("arch=ivybridge"))) baz2(void) {return 1;}
+int __attribute__((target("default"))) baz2(void) { return 2; }
+
+float __attribute__((target("sse4.2"))) bock(void) { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different return type}}
+int __attribute__((target("arch=sandybridge"))) bock(void);
+//expected-error@+1 {{multiversioned function declaration has a different return type}}
+int __attribute__((target("arch=ivybridge"))) bock(void) {return 1;}
+//expected-error@+1 {{multiversioned function declaration has a different return type}}
+int __attribute__((target("default"))) bock(void) { return 2; }
+
+int __attribute__((target("sse4.2"))) bock2(void) { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different return type}}
+float __attribute__((target("arch=sandybridge"))) bock2(void);
+int __attribute__((target("arch=ivybridge"))) bock2(void) {return 1;}
+int __attribute__((target("default"))) bock2(void) { return 2; }
+
+auto __attribute__((target("sse4.2"))) bock3(void) -> int { return 0; }
+//expected-error@+1 {{multiversioned function declaration has a different return type}}
+auto __attribute__((target("arch=sandybridge"))) bock3(void) -> short { return (short)0;}
+
+int __attribute__((target("sse4.2"))) bock4(void) noexcept(false) { return 0; }
+//expected-error@+2 {{exception specification in declaration does not match previous declaration}}
+//expected-note@-2 {{previous declaration is here}}
+int __attribute__((target("arch=sandybridge"))) bock4(void) noexcept(true) { return 1;}
+
+// FIXME: Add support for templates and virtual functions!
+template<typename T>
+int __attribute__((target("sse4.2"))) foo(T) { return 0; }
+// expected-error@+2 {{multiversioned functions do not yet support function templates}}
+template<typename T>
+int __attribute__((target("arch=sandybridge"))) foo(T);
+
+// expected-error@+2 {{multiversioned functions do not yet support function templates}}
+template<typename T>
+int __attribute__((target("default"))) foo(T) { return 2; }
+
+struct S {
+  template<typename T>
+  int __attribute__((target("sse4.2"))) foo(T) { return 0; }
+  // expected-error@+2 {{multiversioned functions do not yet support function templates}}
+  template<typename T>
+  int __attribute__((target("arch=sandybridge"))) foo(T);
+
+  // expected-error@+2 {{multiversioned functions do not yet support function templates}}
+  template<typename T>
+  int __attribute__((target("default"))) foo(T) { return 2; }
+
+  // expected-error@+1 {{multiversioned functions do not yet support virtual functions}}
+  virtual void __attribute__((target("default"))) virt();
+};
+
+extern "C" {
+int __attribute__((target("sse4.2"))) diff_mangle(void) { return 0; }
+}
+//expected-error@+1 {{multiversioned function declaration has a different linkage}}
+int __attribute__((target("arch=sandybridge"))) diff_mangle(void) { return 0; }
+
+// expected-error@+1 {{multiversioned functions do not yet support deduced return types}}
+auto __attribute__((target("default"))) deduced_return(void) { return 0; }
+// expected-error@-1 {{cannot initialize return object of type 'auto' with an rvalue of type 'int'}}
+
+auto __attribute__((target("default"))) trailing_return(void)-> int { return 0; }
+
+__attribute__((target("default"))) void DiffDecl();
+namespace N {
+using ::DiffDecl;
+// expected-error@+3 {{declaration conflicts with target of using declaration already in scope}}
+// expected-note@-4 {{target of using declaration}}
+// expected-note@-3 {{using declaration}}
+__attribute__((target("arch=sandybridge"))) void DiffDecl();
+} // namespace N
+
+struct SpecialFuncs {
+  // expected-error@+1 {{multiversioned functions do not yet support constructors}}
+  __attribute__((target("default"))) SpecialFuncs();
+  // expected-error@+1 {{multiversioned functions do not yet support destructors}}
+  __attribute__((target("default"))) ~SpecialFuncs();
+
+  // expected-error@+1 {{multiversioned functions do not yet support defaulted functions}}
+  SpecialFuncs& __attribute__((target("default"))) operator=(const SpecialFuncs&) = default;
+  // expected-error@+1 {{multiversioned functions do not yet support deleted functions}}
+  SpecialFuncs& __attribute__((target("default"))) operator=(SpecialFuncs&&) = delete;
+};
+
+class Secret {
+  int i = 0;
+  __attribute__((target("default")))
+  friend int SecretAccessor(Secret &s);
+  __attribute__((target("arch=sandybridge")))
+  friend int SecretAccessor(Secret &s);
+};
+
+__attribute__((target("default")))
+int SecretAccessor(Secret &s) {
+  return s.i;
+}
+
+__attribute__((target("arch=sandybridge")))
+int SecretAccessor(Secret &s) {
+  return s.i + 2;
+}
+
+__attribute__((target("arch=ivybridge")))
+int SecretAccessor(Secret &s) {
+  //expected-error@+2{{'i' is a private member of 'Secret'}}
+  //expected-note@-20{{implicitly declared private here}}
+  return s.i + 3;
+}
+
+constexpr int __attribute__((target("sse4.2"))) constexpr_foo(void) {
+  return 0;
+}
+constexpr int __attribute__((target("arch=sandybridge"))) constexpr_foo(void);
+constexpr int __attribute__((target("arch=ivybridge"))) constexpr_foo(void) {
+  return 1;
+}
+constexpr int __attribute__((target("default"))) constexpr_foo(void) {
+  return 2;
+}
+
+void constexpr_test() {
+  static_assert(foo() == 2, "Should call 'default' in a constexpr context");
+}
+
+struct BadOutOfLine {
+  int __attribute__((target("sse4.2"))) foo(int);
+  int __attribute__((target("default"))) foo(int);
+};
+
+int __attribute__((target("sse4.2"))) BadOutOfLine::foo(int) { return 0; }
+int __attribute__((target("default"))) BadOutOfLine::foo(int) { return 1; }
+// expected-error@+3 {{out-of-line definition of 'foo' does not match any declaration in 'BadOutOfLine'}}
+// expected-note@-3 {{member declaration nearly matches}}
+// expected-note@-3 {{member declaration nearly matches}}
+int __attribute__((target("arch=atom"))) BadOutOfLine::foo(int) { return 1; }
diff --git a/test/SemaCXX/attr-weak.cpp b/test/SemaCXX/attr-weak.cpp
index 8ba3a95..51deb66 100644
--- a/test/SemaCXX/attr-weak.cpp
+++ b/test/SemaCXX/attr-weak.cpp
@@ -3,7 +3,7 @@
 static int test0 __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
 static void test1() __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
 
-namespace test2 __attribute__((weak)) { // expected-warning {{'weak' attribute only applies to variables, functions and classes}}
+namespace test2 __attribute__((weak)) { // expected-warning {{'weak' attribute only applies to variables, functions, and classes}}
 }
 
 namespace {
diff --git a/test/SemaCXX/base-class-ambiguity-check.cpp b/test/SemaCXX/base-class-ambiguity-check.cpp
new file mode 100644
index 0000000..fc1c4c2
--- /dev/null
+++ b/test/SemaCXX/base-class-ambiguity-check.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+template <typename T> class Foo {
+  struct Base : T {};
+
+  // Test that this code no longer causes a crash in Sema. rdar://23291875
+  struct Derived : Base, T {};
+};
diff --git a/test/SemaCXX/builtin-assume-aligned-tmpl.cpp b/test/SemaCXX/builtin-assume-aligned-tmpl.cpp
index 0909070..61b8555 100644
--- a/test/SemaCXX/builtin-assume-aligned-tmpl.cpp
+++ b/test/SemaCXX/builtin-assume-aligned-tmpl.cpp
@@ -57,7 +57,7 @@
   atest4<int, 5>();
 }
 
-// expected-warning@+1 {{'assume_aligned' attribute only applies to functions and methods}}
+// expected-warning@+1 {{'assume_aligned' attribute only applies to Objective-C methods and functions}}
 class __attribute__((assume_aligned(32))) x {
   int y;
 };
diff --git a/test/SemaCXX/compare-cxx2a.cpp b/test/SemaCXX/compare-cxx2a.cpp
new file mode 100644
index 0000000..d88e3ba
--- /dev/null
+++ b/test/SemaCXX/compare-cxx2a.cpp
@@ -0,0 +1,174 @@
+// Force x86-64 because some of our heuristics are actually based
+// on integer sizes.
+
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -fsyntax-only -pedantic -verify -Wsign-compare -std=c++2a %s
+
+void self_compare() {
+  int a;
+  int b[3], c[3];
+  (void)(a <=> a); // expected-warning {{self-comparison always evaluates to 'std::strong_ordering::equal'}}
+  (void)(b <=> b); // expected-warning {{self-comparison always evaluates to 'std::strong_ordering::equal'}}
+  (void)(b <=> c); // expected-warning {{array comparison always evaluates to a constant}}
+}
+
+void test0(long a, unsigned long b) {
+  enum EnumA {A};
+  enum EnumB {B};
+  enum EnumC {C = 0x10000};
+         // (a,b)
+
+  // FIXME: <=> should never produce -Wsign-compare warnings. All the possible error
+  // cases involve narrowing conversions and so are ill-formed.
+  (void)(a <=> (unsigned long) b); // expected-warning {{comparison of integers of different signs}}
+  (void)(a <=> (unsigned int) b);
+  (void)(a <=> (unsigned short) b);
+  (void)(a <=> (unsigned char) b);
+  (void)((long) a <=> b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((int) a <=> b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((short) a <=> b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((signed char) a <=> b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((long) a <=> (unsigned long) b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((int) a <=> (unsigned int) b);  // expected-warning {{comparison of integers of different signs}}
+  (void)((short) a <=> (unsigned short) b);
+  (void)((signed char) a <=> (unsigned char) b);
+
+#if 0
+  // (A,b)
+  (void)(A <=> (unsigned long) b);
+  (void)(A <=> (unsigned int) b);
+  (void)(A <=> (unsigned short) b);
+  (void)(A <=> (unsigned char) b);
+  (void)((long) A <=> b);
+  (void)((int) A <=> b);
+  (void)((short) A <=> b);
+  (void)((signed char) A <=> b);
+  (void)((long) A <=> (unsigned long) b);
+  (void)((int) A <=> (unsigned int) b);
+  (void)((short) A <=> (unsigned short) b);
+  (void)((signed char) A <=> (unsigned char) b);
+
+  // (a,B)
+  (void)(a <=> (unsigned long) B);
+  (void)(a <=> (unsigned int) B);
+  (void)(a <=> (unsigned short) B);
+  (void)(a <=> (unsigned char) B);
+  (void)((long) a <=> B);
+  (void)((int) a <=> B);
+  (void)((short) a <=> B);
+  (void)((signed char) a <=> B);
+  (void)((long) a <=> (unsigned long) B);
+  (void)((int) a <=> (unsigned int) B);
+  (void)((short) a <=> (unsigned short) B);
+  (void)((signed char) a <=> (unsigned char) B);
+
+  // (C,b)
+  (void)(C <=> (unsigned long) b);
+  (void)(C <=> (unsigned int) b);
+  (void)(C <=> (unsigned short) b); // expected-warning {{comparison of constant 'C' (65536) with expression of type 'unsigned short' is always 'std::strong_ordering::greater'}}
+  (void)(C <=> (unsigned char) b);  // expected-warning {{comparison of constant 'C' (65536) with expression of type 'unsigned char' is always 'std::strong_ordering::greater'}}
+  (void)((long) C <=> b);
+  (void)((int) C <=> b);
+  (void)((short) C <=> b);
+  (void)((signed char) C <=> b);
+  (void)((long) C <=> (unsigned long) b);
+  (void)((int) C <=> (unsigned int) b);
+  (void)((short) C <=> (unsigned short) b);
+  (void)((signed char) C <=> (unsigned char) b);
+
+  // (a,C)
+  (void)(a <=> (unsigned long) C);
+  (void)(a <=> (unsigned int) C);
+  (void)(a <=> (unsigned short) C);
+  (void)(a <=> (unsigned char) C);
+  (void)((long) a <=> C);
+  (void)((int) a <=> C);
+  (void)((short) a <=> C); // expected-warning {{comparison of constant 'C' (65536) with expression of type 'short' is always 'std::strong_ordering::less'}}
+  (void)((signed char) a <=> C); // expected-warning {{comparison of constant 'C' (65536) with expression of type 'signed char' is always 'std::strong_ordering::less'}}
+  (void)((long) a <=> (unsigned long) C);
+  (void)((int) a <=> (unsigned int) C);
+  (void)((short) a <=> (unsigned short) C);
+  (void)((signed char) a <=> (unsigned char) C);
+#endif
+
+  // (0x80000,b)
+  (void)(0x80000 <=> (unsigned long) b);
+  (void)(0x80000 <=> (unsigned int) b);
+  (void)(0x80000 <=> (unsigned short) b); // expected-warning {{result of comparison of constant 524288 with expression of type 'unsigned short' is always 'std::strong_ordering::greater'}}
+  (void)(0x80000 <=> (unsigned char) b); // expected-warning {{result of comparison of constant 524288 with expression of type 'unsigned char' is always 'std::strong_ordering::greater'}}
+  (void)((long) 0x80000 <=> b);
+  (void)((int) 0x80000 <=> b);
+  (void)((short) 0x80000 <=> b);
+  (void)((signed char) 0x80000 <=> b);
+  (void)((long) 0x80000 <=> (unsigned long) b);
+  (void)((int) 0x80000 <=> (unsigned int) b);
+  (void)((short) 0x80000 <=> (unsigned short) b);
+  (void)((signed char) 0x80000 <=> (unsigned char) b);
+
+  // (a,0x80000)
+  (void)(a <=> (unsigned long) 0x80000); // expected-warning {{comparison of integers of different signs}}
+  (void)(a <=> (unsigned int) 0x80000);
+  (void)(a <=> (unsigned short) 0x80000);
+  (void)(a <=> (unsigned char) 0x80000);
+  (void)((long) a <=> 0x80000);
+  (void)((int) a <=> 0x80000);
+  (void)((short) a <=> 0x80000); // expected-warning {{comparison of constant 524288 with expression of type 'short' is always 'std::strong_ordering::less'}}
+  (void)((signed char) a <=> 0x80000); // expected-warning {{comparison of constant 524288 with expression of type 'signed char' is always 'std::strong_ordering::less'}}
+  (void)((long) a <=> (unsigned long) 0x80000); // expected-warning {{comparison of integers of different signs}}
+  (void)((int) a <=> (unsigned int) 0x80000); // expected-warning {{comparison of integers of different signs}}
+  (void)((short) a <=> (unsigned short) 0x80000);
+  (void)((signed char) a <=> (unsigned char) 0x80000);
+}
+
+void test5(bool b) {
+  (void) (b <=> -10); // expected-warning{{comparison of constant -10 with expression of type 'bool' is always 'std::strong_ordering::greater'}}
+  (void) (b <=> -1); // expected-warning{{comparison of constant -1 with expression of type 'bool' is always 'std::strong_ordering::greater'}}
+  (void) (b <=> 0);
+  (void) (b <=> 1);
+  (void) (b <=> 2); // expected-warning{{comparison of constant 2 with expression of type 'bool' is always 'std::strong_ordering::less'}}
+  (void) (b <=> 10); // expected-warning{{comparison of constant 10 with expression of type 'bool' is always 'std::strong_ordering::less'}}
+}
+
+void test6(signed char sc) {
+  (void)(sc <=> 200); // expected-warning{{comparison of constant 200 with expression of type 'signed char' is always 'std::strong_ordering::less'}}
+  (void)(200 <=> sc); // expected-warning{{comparison of constant 200 with expression of type 'signed char' is always 'std::strong_ordering::greater'}}
+}
+
+// Test many signedness combinations.
+void test7(unsigned long other) {
+  // Common unsigned, other unsigned, constant unsigned
+  (void)((unsigned)other <=> (unsigned long)(0x1'ffff'ffff)); // expected-warning{{less}}
+  (void)((unsigned)other <=> (unsigned long)(0xffff'ffff));
+  (void)((unsigned long)other <=> (unsigned)(0x1'ffff'ffff));
+  (void)((unsigned long)other <=> (unsigned)(0xffff'ffff));
+
+  // Common unsigned, other signed, constant unsigned
+  (void)((int)other <=> (unsigned long)(0xffff'ffff'ffff'ffff)); // expected-warning{{different signs}}
+  (void)((int)other <=> (unsigned long)(0x0000'0000'ffff'ffff)); // expected-warning{{different signs}}
+  (void)((int)other <=> (unsigned long)(0x0000'0000'0fff'ffff)); // expected-warning{{different signs}}
+  (void)((int)other <=> (unsigned)(0x8000'0000));                // expected-warning{{different signs}}
+
+  // Common unsigned, other unsigned, constant signed
+  (void)((unsigned long)other <=> (int)(0xffff'ffff));  // expected-warning{{different signs}}
+
+  // Common unsigned, other signed, constant signed
+  // Should not be possible as the common type should also be signed.
+
+  // Common signed, other signed, constant signed
+  (void)((int)other <=> (long)(0xffff'ffff));           // expected-warning{{less}}
+  (void)((int)other <=> (long)(0xffff'ffff'0000'0000)); // expected-warning{{greater}}
+  (void)((int)other <=> (long)(0x0fff'ffff));
+  (void)((int)other <=> (long)(0xffff'ffff'f000'0000));
+
+  // Common signed, other signed, constant unsigned
+  (void)((int)other <=> (unsigned char)(0xffff));
+  (void)((int)other <=> (unsigned char)(0xff));
+
+  // Common signed, other unsigned, constant signed
+  (void)((unsigned char)other <=> (int)(0xff));
+  (void)((unsigned char)other <=> (int)(0xffff));  // expected-warning{{less}}
+
+  // Common signed, other unsigned, constant unsigned
+  (void)((unsigned char)other <=> (unsigned short)(0xff));
+  (void)((unsigned char)other <=> (unsigned short)(0x100)); // expected-warning{{less}}
+  (void)((unsigned short)other <=> (unsigned char)(0xff));
+}
diff --git a/test/SemaCXX/compare.cpp b/test/SemaCXX/compare.cpp
index 1d940be..ee0fe01 100644
--- a/test/SemaCXX/compare.cpp
+++ b/test/SemaCXX/compare.cpp
@@ -1,7 +1,7 @@
 // Force x86-64 because some of our heuristics are actually based
 // on integer sizes.
 
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -fsyntax-only -pedantic -verify -Wsign-compare -std=c++11 %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -fsyntax-only -pedantic -verify -Wsign-compare -Wtautological-constant-in-range-compare -std=c++11 %s
 
 int test0(long a, unsigned long b) {
   enum EnumA {A};
@@ -245,8 +245,8 @@
   // unsigned.
   const unsigned B = -1;
   void (s < B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
-  void (s > B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
-  void (s <= B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
+  void (s > B); // expected-warning{{comparison 'short' > 4294967295 is always false}}
+  void (s <= B); // expected-warning{{comparison 'short' <= 4294967295 is always true}}
   void (s >= B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
   void (s == B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
   void (s != B); // expected-warning{{comparison of integers of different signs: 'short' and 'const unsigned int'}}
@@ -423,3 +423,19 @@
     testx<B>();
   }
 }
+
+namespace tautological_enum {
+  enum E { a, b, c } e;
+
+  // FIXME: We should warn about constructing this out-of-range numeration value.
+  const E invalid = (E)-1;
+  // ... but we should not warn about comparing against it.
+  bool x = e == invalid;
+
+  // We should not warn about relational comparisons for enumerators, even if
+  // they're tautological.
+  bool y = e >= a && e <= b;
+  const E first_in_range = a;
+  const E last_in_range = b;
+  bool z = e >= first_in_range && e <= last_in_range;
+}
diff --git a/test/SemaCXX/compound-literal.cpp b/test/SemaCXX/compound-literal.cpp
index 5480b1f..be9ebee 100644
--- a/test/SemaCXX/compound-literal.cpp
+++ b/test/SemaCXX/compound-literal.cpp
@@ -1,98 +1,98 @@
-// RUN: %clang_cc1 -fsyntax-only -std=c++03 -verify -ast-dump %s > %t-03
-// RUN: FileCheck --input-file=%t-03 %s
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify -ast-dump %s > %t-11
-// RUN: FileCheck --input-file=%t-11 %s
-// RUN: FileCheck --input-file=%t-11 %s --check-prefix=CHECK-CXX11
-
-// http://llvm.org/PR7905
-namespace PR7905 {
-struct S; // expected-note {{forward declaration}}
-void foo1() {
-  (void)(S[]) {{3}}; // expected-error {{array has incomplete element type}}
-}
-
-template <typename T> struct M { T m; };
-void foo2() {
-  (void)(M<short> []) {{3}};
-}
-}
-
-// Check compound literals mixed with C++11 list-initialization.
-namespace brace_initializers {
-  struct POD {
-    int x, y;
-  };
-  struct HasCtor {
-    HasCtor(int x, int y);
-  };
-  struct HasDtor {
-    int x, y;
-    ~HasDtor();
-  };
-  struct HasCtorDtor {
-    HasCtorDtor(int x, int y);
-    ~HasCtorDtor();
-  };
-
-  void test() {
-    (void)(POD){1, 2};
-    // CHECK-NOT: CXXBindTemporaryExpr {{.*}} 'struct brace_initializers::POD'
-    // CHECK: CompoundLiteralExpr {{.*}} 'struct brace_initializers::POD'
-    // CHECK-NEXT: InitListExpr {{.*}} 'struct brace_initializers::POD'
-    // CHECK-NEXT: IntegerLiteral {{.*}} 1{{$}}
-    // CHECK-NEXT: IntegerLiteral {{.*}} 2{{$}}
-
-    (void)(HasDtor){1, 2};
-    // CHECK: CXXBindTemporaryExpr {{.*}} 'struct brace_initializers::HasDtor'
-    // CHECK-NEXT: CompoundLiteralExpr {{.*}} 'struct brace_initializers::HasDtor'
-    // CHECK-NEXT: InitListExpr {{.*}} 'struct brace_initializers::HasDtor'
-    // CHECK-NEXT: IntegerLiteral {{.*}} 1{{$}}
-    // CHECK-NEXT: IntegerLiteral {{.*}} 2{{$}}
-
-#if __cplusplus >= 201103L
-    (void)(HasCtor){1, 2};
-    // CHECK-CXX11-NOT: CXXBindTemporaryExpr {{.*}} 'struct brace_initializers::HasCtor'
-    // CHECK-CXX11: CompoundLiteralExpr {{.*}} 'struct brace_initializers::HasCtor'
-    // CHECK-CXX11-NEXT: CXXTemporaryObjectExpr {{.*}} 'struct brace_initializers::HasCtor'
-    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 1{{$}}
-    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 2{{$}}
-
-    (void)(HasCtorDtor){1, 2};
-    // CHECK-CXX11: CXXBindTemporaryExpr {{.*}} 'struct brace_initializers::HasCtorDtor'
-    // CHECK-CXX11-NEXT: CompoundLiteralExpr {{.*}} 'struct brace_initializers::HasCtorDtor'
-    // CHECK-CXX11-NEXT: CXXTemporaryObjectExpr {{.*}} 'struct brace_initializers::HasCtorDtor'
-    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 1{{$}}
-    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 2{{$}}
-#endif
-  }
-
-  struct PrivateDtor {
-    int x, y;
-  private:
-    ~PrivateDtor(); // expected-note {{declared private here}}
-  };
-
-  void testPrivateDtor() {
-    (void)(PrivateDtor){1, 2}; // expected-error {{temporary of type 'brace_initializers::PrivateDtor' has private destructor}}
-  }
-}
-
-// This doesn't necessarily need to be an error, but CodeGen can't handle it
-// at the moment.
-int PR17415 = (int){PR17415}; // expected-error {{initializer element is not a compile-time constant}}
-
-// Make sure we accept this.  (Not sure if we actually should... but we do
-// at the moment.)
-template<unsigned> struct Value { };
-template<typename T>
-int &check_narrowed(Value<sizeof((T){1.1})>);
-
-#if __cplusplus >= 201103L
-// Compound literals in global lambdas have automatic storage duration
-// and are not subject to the constant-initialization rules.
-int computed_with_lambda = [] {
-  int x = 5;
-  int result = ((int[]) { x, x + 2, x + 4, x + 6 })[0];
-  return result;
-}();
-#endif
+// RUN: %clang_cc1 -fsyntax-only -std=c++03 -verify -ast-dump %s > %t-03
+// RUN: FileCheck --input-file=%t-03 %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify -ast-dump %s > %t-11
+// RUN: FileCheck --input-file=%t-11 %s
+// RUN: FileCheck --input-file=%t-11 %s --check-prefix=CHECK-CXX11
+
+// http://llvm.org/PR7905
+namespace PR7905 {
+struct S; // expected-note {{forward declaration}}
+void foo1() {
+  (void)(S[]) {{3}}; // expected-error {{array has incomplete element type}}
+}
+
+template <typename T> struct M { T m; };
+void foo2() {
+  (void)(M<short> []) {{3}};
+}
+}
+
+// Check compound literals mixed with C++11 list-initialization.
+namespace brace_initializers {
+  struct POD {
+    int x, y;
+  };
+  struct HasCtor {
+    HasCtor(int x, int y);
+  };
+  struct HasDtor {
+    int x, y;
+    ~HasDtor();
+  };
+  struct HasCtorDtor {
+    HasCtorDtor(int x, int y);
+    ~HasCtorDtor();
+  };
+
+  void test() {
+    (void)(POD){1, 2};
+    // CHECK-NOT: CXXBindTemporaryExpr {{.*}} 'brace_initializers::POD'
+    // CHECK: CompoundLiteralExpr {{.*}} 'brace_initializers::POD'
+    // CHECK-NEXT: InitListExpr {{.*}} 'brace_initializers::POD'
+    // CHECK-NEXT: IntegerLiteral {{.*}} 1{{$}}
+    // CHECK-NEXT: IntegerLiteral {{.*}} 2{{$}}
+
+    (void)(HasDtor){1, 2};
+    // CHECK: CXXBindTemporaryExpr {{.*}} 'brace_initializers::HasDtor'
+    // CHECK-NEXT: CompoundLiteralExpr {{.*}} 'brace_initializers::HasDtor'
+    // CHECK-NEXT: InitListExpr {{.*}} 'brace_initializers::HasDtor'
+    // CHECK-NEXT: IntegerLiteral {{.*}} 1{{$}}
+    // CHECK-NEXT: IntegerLiteral {{.*}} 2{{$}}
+
+#if __cplusplus >= 201103L
+    (void)(HasCtor){1, 2};
+    // CHECK-CXX11-NOT: CXXBindTemporaryExpr {{.*}} 'brace_initializers::HasCtor'
+    // CHECK-CXX11: CompoundLiteralExpr {{.*}} 'brace_initializers::HasCtor'
+    // CHECK-CXX11-NEXT: CXXTemporaryObjectExpr {{.*}} 'brace_initializers::HasCtor'
+    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 1{{$}}
+    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 2{{$}}
+
+    (void)(HasCtorDtor){1, 2};
+    // CHECK-CXX11: CXXBindTemporaryExpr {{.*}} 'brace_initializers::HasCtorDtor'
+    // CHECK-CXX11-NEXT: CompoundLiteralExpr {{.*}} 'brace_initializers::HasCtorDtor'
+    // CHECK-CXX11-NEXT: CXXTemporaryObjectExpr {{.*}} 'brace_initializers::HasCtorDtor'
+    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 1{{$}}
+    // CHECK-CXX11-NEXT: IntegerLiteral {{.*}} 2{{$}}
+#endif
+  }
+
+  struct PrivateDtor {
+    int x, y;
+  private:
+    ~PrivateDtor(); // expected-note {{declared private here}}
+  };
+
+  void testPrivateDtor() {
+    (void)(PrivateDtor){1, 2}; // expected-error {{temporary of type 'brace_initializers::PrivateDtor' has private destructor}}
+  }
+}
+
+// This doesn't necessarily need to be an error, but CodeGen can't handle it
+// at the moment.
+int PR17415 = (int){PR17415}; // expected-error {{initializer element is not a compile-time constant}}
+
+// Make sure we accept this.  (Not sure if we actually should... but we do
+// at the moment.)
+template<unsigned> struct Value { };
+template<typename T>
+int &check_narrowed(Value<sizeof((T){1.1})>);
+
+#if __cplusplus >= 201103L
+// Compound literals in global lambdas have automatic storage duration
+// and are not subject to the constant-initialization rules.
+int computed_with_lambda = [] {
+  int x = 5;
+  int result = ((int[]) { x, x + 2, x + 4, x + 6 })[0];
+  return result;
+}();
+#endif
diff --git a/test/SemaCXX/constant-expression-cxx11.cpp b/test/SemaCXX/constant-expression-cxx11.cpp
index e64f2d5..51dd619 100644
--- a/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/test/SemaCXX/constant-expression-cxx11.cpp
@@ -604,20 +604,31 @@
 
 }
 
-// Tests for indexes into arrays of unknown bounds.
+// Per current CWG direction, we reject any cases where pointer arithmetic is
+// not statically known to be valid.
 namespace ArrayOfUnknownBound {
-  // This is a corner case of the language where it's not clear whether this
-  // should be an error: When we see the initializer for Z::a, the bounds of
-  // Z::b aren't known yet, but they will be known by the end of the translation
-  // unit, so the compiler can in theory check the indexing into Z::b.
-  // For the time being, as long as this is unclear, we want to make sure that
-  // this compiles.
-  struct Z {
-    static const void *const a[];
-    static const void *const b[];
+  extern int arr[];
+  constexpr int *a = arr;
+  constexpr int *b = &arr[0];
+  static_assert(a == b, "");
+  constexpr int *c = &arr[1]; // expected-error {{constant}} expected-note {{indexing of array without known bound}}
+  constexpr int *d = &a[1]; // expected-error {{constant}} expected-note {{indexing of array without known bound}}
+  constexpr int *e = a + 1; // expected-error {{constant}} expected-note {{indexing of array without known bound}}
+
+  struct X {
+    int a;
+    int b[]; // expected-warning {{C99}}
   };
-  constexpr const void *Z::a[] = {&b[0], &b[1]};
-  constexpr const void *Z::b[] = {&a[0], &a[1]};
+  extern X x;
+  constexpr int *xb = x.b; // expected-error {{constant}} expected-note {{not supported}}
+
+  struct Y { int a; };
+  extern Y yarr[];
+  constexpr Y *p = yarr;
+  constexpr int *q = &p->a;
+
+  extern const int carr[]; // expected-note {{here}}
+  constexpr int n = carr[0]; // expected-error {{constant}} expected-note {{non-constexpr variable}}
 }
 
 namespace DependentValues {
@@ -1920,6 +1931,22 @@
     };
     static_assert(X::f(3) == -1, "3 should truncate to -1");
   }
+
+  struct HasUnnamedBitfield {
+    unsigned a;
+    unsigned : 20;
+    unsigned b;
+
+    constexpr HasUnnamedBitfield() : a(), b() {}
+    constexpr HasUnnamedBitfield(unsigned a, unsigned b) : a(a), b(b) {}
+  };
+
+  void testUnnamedBitfield() {
+    const HasUnnamedBitfield zero{};
+    int a = 1 / zero.b; // expected-warning {{division by zero is undefined}}
+    const HasUnnamedBitfield oneZero{1, 0};
+    int b = 1 / oneZero.b; // expected-warning {{division by zero is undefined}}
+  }
 }
 
 namespace ZeroSizeTypes {
diff --git a/test/SemaCXX/constant-expression-cxx1y.cpp b/test/SemaCXX/constant-expression-cxx1y.cpp
index 12fec08..c5c868b 100644
--- a/test/SemaCXX/constant-expression-cxx1y.cpp
+++ b/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1021,3 +1021,81 @@
 }
 static_assert(evalNested(), "");
 } // namespace PR19741
+
+namespace Mutable {
+  struct A { mutable int n; }; // expected-note 2{{here}}
+  constexpr int k = A{123}.n; // ok
+  static_assert(k == 123, "");
+
+  struct Q { A &&a; int b = a.n; };
+  constexpr Q q = { A{456} }; // expected-note {{temporary}}
+  static_assert(q.b == 456, "");
+  static_assert(q.a.n == 456, ""); // expected-error {{constant expression}} expected-note {{outside the expression that created the temporary}}
+
+  constexpr A a = {123};
+  constexpr int m = a.n; // expected-error {{constant expression}} expected-note {{mutable}}
+
+  constexpr Q r = { static_cast<A&&>(const_cast<A&>(a)) }; // expected-error {{constant expression}} expected-note@-8 {{mutable}}
+
+  struct B {
+    mutable int n; // expected-note {{here}}
+    int m;
+    constexpr B() : n(1), m(n) {} // ok
+  };
+  constexpr B b;
+  constexpr int p = b.n; // expected-error {{constant expression}} expected-note {{mutable}}
+}
+
+namespace IndirectFields {
+
+// Reference indirect field.
+struct A {
+  struct {
+    union {
+      int x = x = 3; // expected-note {{outside its lifetime}}
+    };
+  };
+  constexpr A() {}
+};
+static_assert(A().x == 3, ""); // expected-error{{not an integral constant expression}} expected-note{{in call to 'A()'}}
+
+// Reference another indirect field, with different 'this'.
+struct B {
+  struct {
+    union {
+      int x = 3;
+    };
+    int y = x;
+  };
+  constexpr B() {}
+};
+static_assert(B().y == 3, "");
+
+// Nested evaluation of indirect field initializers.
+struct C {
+  union {
+    int x = 1;
+  };
+};
+struct D {
+  struct {
+    C c;
+    int y = c.x + 1;
+  };
+};
+static_assert(D().y == 2, "");
+
+// Explicit 'this'.
+struct E {
+  int n = 0;
+  struct {
+    void *x = this;
+  };
+  void *y = this;
+};
+constexpr E e1 = E();
+static_assert(e1.x != e1.y, "");
+constexpr E e2 = E{0};
+static_assert(e2.x != e2.y, "");
+
+} // namespace IndirectFields
diff --git a/test/SemaCXX/constant-expression-cxx2a.cpp b/test/SemaCXX/constant-expression-cxx2a.cpp
new file mode 100644
index 0000000..935cbef
--- /dev/null
+++ b/test/SemaCXX/constant-expression-cxx2a.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s -fcxx-exceptions -triple=x86_64-linux-gnu
+
+namespace ThreeWayComparison {
+  struct A {
+    int n;
+    constexpr friend int operator<=>(const A &a, const A &b) {
+      return a.n < b.n ? -1 : a.n > b.n ? 1 : 0;
+    }
+  };
+  static_assert(A{1} <=> A{2} < 0);
+  static_assert(A{2} <=> A{1} > 0);
+  static_assert(A{2} <=> A{2} == 0);
+
+  // Note: not yet supported.
+  static_assert(1 <=> 2 < 0); // expected-error {{invalid operands}}
+  static_assert(2 <=> 1 > 0); // expected-error {{invalid operands}}
+  static_assert(1 <=> 1 == 0); // expected-error {{invalid operands}}
+  constexpr int k = (1 <=> 1, 0);
+  // expected-error@-1 {{constexpr variable 'k' must be initialized by a constant expression}}
+  // expected-warning@-2 {{three-way comparison result unused}}
+
+  constexpr void f() { // expected-error {{constant expression}}
+    void(1 <=> 1); // expected-note {{constant expression}}
+  }
+
+  // TODO: defaulted operator <=>
+}
diff --git a/test/SemaCXX/constexpr-array-unknown-bound.cpp b/test/SemaCXX/constexpr-array-unknown-bound.cpp
new file mode 100644
index 0000000..395c4cb
--- /dev/null
+++ b/test/SemaCXX/constexpr-array-unknown-bound.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -Wno-uninitialized -std=c++1z -fsyntax-only -verify
+
+const extern int arr[];
+constexpr auto p = arr; // ok
+constexpr int f(int i) {return p[i];} // expected-note {{read of dereferenced one-past-the-end pointer}}
+
+constexpr int arr[] {1, 2, 3};
+constexpr auto p2 = arr + 2; // ok
+constexpr int x = f(2); // ok
+constexpr int y = f(3); // expected-error {{constant expression}}
+// expected-note-re@-1 {{in call to 'f({{.*}})'}}
+
+// FIXME: consider permitting this case
+struct A {int m[];} a;
+constexpr auto p3 = a.m; // expected-error {{constant expression}} expected-note {{without known bound}}
+constexpr auto p4 = a.m + 1; // expected-error {{constant expression}} expected-note {{without known bound}}
+
+void g(int i) {
+  int arr[i];
+  constexpr auto *p = arr + 2; // expected-error {{constant expression}} expected-note {{without known bound}}
+
+  // FIXME: Give a better diagnostic here. The issue is that computing
+  // sizeof(*arr2) within the array indexing fails due to the VLA.
+  int arr2[2][i];
+  constexpr int m = ((void)arr2[2], 0); // expected-error {{constant expression}}
+}
diff --git a/test/SemaCXX/copy-initialization.cpp b/test/SemaCXX/copy-initialization.cpp
index 4f6c65c..cd7e5f0 100644
--- a/test/SemaCXX/copy-initialization.cpp
+++ b/test/SemaCXX/copy-initialization.cpp
@@ -26,7 +26,7 @@
 };
 
 // PR3600
-void test(const foo *P) { P->bar(); } // expected-error{{'bar' not viable: 'this' argument has type 'const foo', but function is not marked const}}
+void test(const foo *P) { P->bar(); } // expected-error{{'this' argument to member function 'bar' has type 'const foo', but function is not marked const}}
 
 namespace PR6757 {
   struct Foo {
diff --git a/test/SemaCXX/coroutines.cpp b/test/SemaCXX/coroutines.cpp
index 65e17ab..6ceeb86 100644
--- a/test/SemaCXX/coroutines.cpp
+++ b/test/SemaCXX/coroutines.cpp
@@ -781,6 +781,102 @@
 }
 template coro<good_promise_13> dependent_uses_nothrow_new(good_promise_13);
 
+struct good_promise_custom_new_operator {
+  coro<good_promise_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  void *operator new(SizeT, double, float, int);
+};
+
+coro<good_promise_custom_new_operator>
+good_coroutine_calls_custom_new_operator(double, float, int) {
+  co_return;
+}
+
+struct coroutine_nonstatic_member_struct;
+
+struct good_promise_nonstatic_member_custom_new_operator {
+  coro<good_promise_nonstatic_member_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  void *operator new(SizeT, coroutine_nonstatic_member_struct &, double);
+};
+
+struct bad_promise_nonstatic_member_mismatched_custom_new_operator {
+  coro<bad_promise_nonstatic_member_mismatched_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  // expected-note@+1 {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  void *operator new(SizeT, double);
+};
+
+struct coroutine_nonstatic_member_struct {
+  coro<good_promise_nonstatic_member_custom_new_operator>
+  good_coroutine_calls_nonstatic_member_custom_new_operator(double) {
+    co_return;
+  }
+
+  coro<bad_promise_nonstatic_member_mismatched_custom_new_operator>
+  bad_coroutine_calls_nonstatic_member_mistmatched_custom_new_operator(double) {
+    // expected-error@-1 {{no matching function for call to 'operator new'}}
+    co_return;
+  }
+};
+
+struct bad_promise_mismatched_custom_new_operator {
+  coro<bad_promise_mismatched_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  // expected-note@+1 {{candidate function not viable: requires 4 arguments, but 1 was provided}}
+  void *operator new(SizeT, double, float, int);
+};
+
+coro<bad_promise_mismatched_custom_new_operator>
+bad_coroutine_calls_mismatched_custom_new_operator(double) {
+  // expected-error@-1 {{no matching function for call to 'operator new'}}
+  co_return;
+}
+
+struct bad_promise_throwing_custom_new_operator {
+  static coro<bad_promise_throwing_custom_new_operator> get_return_object_on_allocation_failure();
+  coro<bad_promise_throwing_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  // expected-error@+1 {{'operator new' is required to have a non-throwing noexcept specification when the promise type declares 'get_return_object_on_allocation_failure()'}}
+  void *operator new(SizeT, double, float, int);
+};
+
+coro<bad_promise_throwing_custom_new_operator>
+bad_coroutine_calls_throwing_custom_new_operator(double, float, int) {
+  // expected-note@-1 {{call to 'operator new' implicitly required by coroutine function here}}
+  co_return;
+}
+
+struct good_promise_noexcept_custom_new_operator {
+  static coro<good_promise_noexcept_custom_new_operator> get_return_object_on_allocation_failure();
+  coro<good_promise_noexcept_custom_new_operator> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+  void *operator new(SizeT, double, float, int) noexcept;
+};
+
+coro<good_promise_noexcept_custom_new_operator>
+good_coroutine_calls_noexcept_custom_new_operator(double, float, int) {
+  co_return;
+}
+
 struct mismatch_gro_type_tag1 {};
 template<>
 struct std::experimental::coroutine_traits<int, mismatch_gro_type_tag1> {
@@ -1171,4 +1267,73 @@
 
 template CoroMemberTag DepTestType<int>::test_static_template<void>(const char *volatile &, unsigned);
 
+struct bad_promise_deleted_constructor {
+  // expected-note@+1 {{'bad_promise_deleted_constructor' has been explicitly marked deleted here}}
+  bad_promise_deleted_constructor() = delete;
+  coro<bad_promise_deleted_constructor> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+};
+
+coro<bad_promise_deleted_constructor>
+bad_coroutine_calls_deleted_promise_constructor() {
+  // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits<coro<CoroHandleMemberFunctionTest::bad_promise_deleted_constructor>>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_deleted_constructor')}}
+  co_return;
+}
+
+// Test that, when the promise type has a constructor whose signature matches
+// that of the coroutine function, that constructor is used. If no matching
+// constructor exists, the default constructor is used as a fallback. If no
+// matching constructors exist at all, an error is emitted. This is an
+// experimental feature that will be proposed for the Coroutines TS.
+
+struct good_promise_default_constructor {
+  good_promise_default_constructor(double, float, int);
+  good_promise_default_constructor() = default;
+  coro<good_promise_default_constructor> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+};
+
+coro<good_promise_default_constructor>
+good_coroutine_calls_default_constructor() {
+  co_return;
+}
+
+struct good_promise_custom_constructor {
+  good_promise_custom_constructor(double, float, int);
+  good_promise_custom_constructor() = delete;
+  coro<good_promise_custom_constructor> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+};
+
+coro<good_promise_custom_constructor>
+good_coroutine_calls_custom_constructor(double, float, int) {
+  co_return;
+}
+
+struct bad_promise_no_matching_constructor {
+  bad_promise_no_matching_constructor(int, int, int);
+  // expected-note@+1 {{'bad_promise_no_matching_constructor' has been explicitly marked deleted here}}
+  bad_promise_no_matching_constructor() = delete;
+  coro<bad_promise_no_matching_constructor> get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  void return_void();
+  void unhandled_exception();
+};
+
+coro<bad_promise_no_matching_constructor>
+bad_coroutine_calls_with_no_matching_constructor(int, int) {
+  // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits<coro<CoroHandleMemberFunctionTest::bad_promise_no_matching_constructor>, int, int>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_no_matching_constructor')}}
+  co_return;
+}
+
 } // namespace CoroHandleMemberFunctionTest
diff --git a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
index 78ebc04..860a4aa 100644
--- a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -148,7 +148,7 @@
 }
 
 void dangle() {
-  new auto{1, 2, 3}; // expected-error {{cannot use list-initialization}}
+  new auto{1, 2, 3}; // expected-error {{new expression for type 'auto' contains multiple constructor arguments}}
   new std::initializer_list<int>{1, 2, 3}; // expected-warning {{at the end of the full-expression}}
 }
 
diff --git a/test/SemaCXX/cxx11-ast-print.cpp b/test/SemaCXX/cxx11-ast-print.cpp
index 9c617af..17dcbcb 100644
--- a/test/SemaCXX/cxx11-ast-print.cpp
+++ b/test/SemaCXX/cxx11-ast-print.cpp
@@ -55,4 +55,8 @@
 ;
 // CHECK-NOT: ;
 
+void g(int n) {
+  int buffer[n];     // CHECK: int buffer[n];
+  [&buffer]() {}();  // CHECK: [&buffer]
+}
 
diff --git a/test/SemaCXX/cxx1y-contextual-conversion-tweaks.cpp b/test/SemaCXX/cxx1y-contextual-conversion-tweaks.cpp
index c7e6074..7cb83c6 100644
--- a/test/SemaCXX/cxx1y-contextual-conversion-tweaks.cpp
+++ b/test/SemaCXX/cxx1y-contextual-conversion-tweaks.cpp
@@ -96,8 +96,8 @@
 //expected-error@81 {{statement requires expression of integer type ('extended_examples::A1' invalid)}}
 //expected-error@85 {{statement requires expression of integer type ('extended_examples::B2' invalid)}}
 #else
-//expected-error@81 {{cannot initialize object parameter of type 'extended_examples::A1' with an expression of type 'extended_examples::A1'}}
-//expected-error@85 {{cannot initialize object parameter of type 'extended_examples::B2' with an expression of type 'extended_examples::B2'}}
+//expected-error@81 {{'this' argument to member function 'operator int' is an lvalue, but function has rvalue ref-qualifier}} expected-note@54 {{'operator int' declared here}}
+//expected-error@85 {{'this' argument to member function 'operator int' is an lvalue, but function has rvalue ref-qualifier}} expected-note@75 {{'operator int' declared here}}
 #endif
 
 namespace extended_examples_cxx1y {
@@ -149,9 +149,9 @@
 #ifdef CXX1Y
 //expected-error@139 {{statement requires expression of integer type ('extended_examples_cxx1y::A2' invalid)}}
 #else
-//expected-error@138 {{cannot initialize object parameter of type 'extended_examples_cxx1y::A1' with an expression of type 'extended_examples_cxx1y::A1'}}
-//expected-error@139 {{cannot initialize object parameter of type 'extended_examples_cxx1y::A2' with an expression of type 'extended_examples_cxx1y::A2'}}
-//expected-error@143 {{cannot initialize object parameter of type 'extended_examples_cxx1y::D' with an expression of type 'extended_examples_cxx1y::D'}}
+//expected-error@138 {{'this' argument to member function 'operator int' is an lvalue, but function has rvalue ref-qualifier}} expected-note@106 {{'operator int' declared here}}
+//expected-error@139 {{'this' argument to member function 'operator int' is an lvalue, but function has rvalue ref-qualifier}} expected-note@111 {{'operator int' declared here}}
+//expected-error@143 {{'this' argument to member function 'operator int' is an lvalue, but function has rvalue ref-qualifier}} expected-note@131 {{'operator int' declared here}}
 #endif
 
 namespace extended_examples_array_bounds {
diff --git a/test/SemaCXX/cxx1y-generic-lambdas.cpp b/test/SemaCXX/cxx1y-generic-lambdas.cpp
index 9f3c775..49386e7 100644
--- a/test/SemaCXX/cxx1y-generic-lambdas.cpp
+++ b/test/SemaCXX/cxx1y-generic-lambdas.cpp
@@ -181,7 +181,7 @@
     int (*fp2)(int) = [](auto b) -> int {  return b; };
     int (*fp3)(char) = [](auto c) -> int { return c; };
     char (*fp4)(int) = [](auto d) { return d; }; //expected-error{{no viable conversion}}\
-                                                 //expected-note{{candidate template ignored}}
+                                                 //expected-note{{candidate function[with $0 = int]}}
     char (*fp5)(char) = [](auto e) -> int { return e; }; //expected-error{{no viable conversion}}\
                                                  //expected-note{{candidate template ignored}}
 
@@ -207,12 +207,22 @@
 } // end ns
 
 namespace conversion_operator {
-void test() {
-    auto L = [](auto a) -> int { return a; };
+  void test() {
+    auto L = [](auto a) -> int { return a; }; // expected-error {{cannot initialize}}
     int (*fp)(int) = L; 
     int (&fp2)(int) = [](auto a) { return a; };  // expected-error{{non-const lvalue}}
     int (&&fp3)(int) = [](auto a) { return a; };  // expected-error{{no viable conversion}}\
                                                   //expected-note{{candidate}}
+
+    using F = int(int);
+    using G = int(void*);
+    L.operator F*();
+    L.operator G*(); // expected-note-re {{instantiation of function template specialization '{{.*}}::operator()<void *>'}}
+
+    // Here, the conversion function is named 'operator auto (*)(int)', and
+    // there is no way to write that name in valid C++.
+    auto M = [](auto a) -> auto { return a; };
+    M.operator F*(); // expected-error {{no member named 'operator int (*)(int)'}}
   }
 }
 }
@@ -992,4 +1002,4 @@
  void test() {
     [](auto x) noexcept(noexcept(x)) { } (0);
  }
-}
\ No newline at end of file
+}
diff --git a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
index 9232a8b..d21fbf2 100644
--- a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
+++ b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c++1z -verify %s -DERRORS
-// RUN: %clang_cc1 -std=c++1z -verify %s -UERRORS
+// RUN: %clang_cc1 -std=c++1z -verify %s -DERRORS -Wundefined-func-template
+// RUN: %clang_cc1 -std=c++1z -verify %s -UERRORS -Wundefined-func-template
 
 // This test is split into two because we only produce "undefined internal"
 // warnings if we didn't produce any errors.
@@ -309,6 +309,17 @@
   template int New(int);
 }
 
+namespace injected_class_name {
+  template<typename T = void> struct A {
+    A();
+    template<typename U> A(A<U>);
+  };
+  A<int> a;
+  A b = a;
+  using T = decltype(a);
+  using T = decltype(b);
+}
+
 #else
 
 // expected-no-diagnostics
diff --git a/test/SemaCXX/cxx1z-copy-omission.cpp b/test/SemaCXX/cxx1z-copy-omission.cpp
index cba6bc3..a7133d7 100644
--- a/test/SemaCXX/cxx1z-copy-omission.cpp
+++ b/test/SemaCXX/cxx1z-copy-omission.cpp
@@ -160,3 +160,12 @@
   // classes?
   AsDelegating(int n) : AsDelegating(make(n)) {} // expected-error {{deleted}}
 };
+
+namespace CtorTemplateBeatsNonTemplateConversionFn {
+  struct Foo { template <typename Derived> Foo(const Derived &); };
+  template <typename Derived> struct Base { operator Foo() const = delete; }; // expected-note {{deleted}}
+  struct Derived : Base<Derived> {};
+
+  Foo f(Derived d) { return d; } // expected-error {{invokes a deleted function}}
+  Foo g(Derived d) { return Foo(d); } // ok, calls constructor
+}
diff --git a/test/SemaCXX/cxx1z-noexcept-function-type.cpp b/test/SemaCXX/cxx1z-noexcept-function-type.cpp
index 6578eb8..814b04c 100644
--- a/test/SemaCXX/cxx1z-noexcept-function-type.cpp
+++ b/test/SemaCXX/cxx1z-noexcept-function-type.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -std=c++14 -verify -fexceptions -fcxx-exceptions %s
-// RUN: %clang_cc1 -std=c++1z -verify -fexceptions -fcxx-exceptions %s -Wno-dynamic-exception-spec
+// RUN: %clang_cc1 -std=c++17 -verify -fexceptions -fcxx-exceptions %s -Wno-dynamic-exception-spec
 // RUN: %clang_cc1 -std=c++14 -verify -fexceptions -fcxx-exceptions -Wno-c++1z-compat-mangling -DNO_COMPAT_MANGLING %s
 // RUN: %clang_cc1 -std=c++14 -verify -fexceptions -fcxx-exceptions -Wno-noexcept-type -DNO_COMPAT_MANGLING %s
 
diff --git a/test/SemaCXX/cxx2a-destroying-delete.cpp b/test/SemaCXX/cxx2a-destroying-delete.cpp
index a085c11..6115774 100644
--- a/test/SemaCXX/cxx2a-destroying-delete.cpp
+++ b/test/SemaCXX/cxx2a-destroying-delete.cpp
@@ -101,3 +101,22 @@
   };
   void delete_I(I *i) { delete i; } // expected-error {{deleted}}
 }
+
+namespace first_param_conversion {
+  struct A {
+    void operator delete(A *, std::destroying_delete_t);
+  };
+  void f(const volatile A *a) {
+    delete a; // ok
+  }
+
+  struct B {
+    void operator delete(B *, std::destroying_delete_t);
+  };
+  struct C : B {};
+  struct D : B {};
+  struct E : C, D {};
+  void g(E *e) {
+    delete e; // expected-error {{ambiguous conversion from derived class 'first_param_conversion::E' to base class 'first_param_conversion::B':}}
+  }
+}
diff --git a/test/SemaCXX/cxx2a-pointer-to-const-ref-member.cpp b/test/SemaCXX/cxx2a-pointer-to-const-ref-member.cpp
index e9cbb1a..0bc40c8 100644
--- a/test/SemaCXX/cxx2a-pointer-to-const-ref-member.cpp
+++ b/test/SemaCXX/cxx2a-pointer-to-const-ref-member.cpp
@@ -1,12 +1,12 @@
 // RUN: %clang_cc1 -std=c++2a %s -verify
 
 struct X {
-  void ref() & {}
+  void ref() & {} // expected-note{{'ref' declared here}}
   void cref() const& {}
 };
 
 void test() {
-  X{}.ref(); // expected-error{{cannot initialize object parameter of type 'X' with an expression of type 'X'}}
+  X{}.ref(); // expected-error{{'this' argument to member function 'ref' is an rvalue, but function has non-const lvalue ref-qualifier}}
   X{}.cref(); // expected-no-error
 
   (X{}.*&X::ref)(); // expected-error-re{{pointer-to-member function type 'void (X::*)() {{.*}}&' can only be called on an lvalue}}
diff --git a/test/SemaCXX/cxx2a-three-way-comparison.cpp b/test/SemaCXX/cxx2a-three-way-comparison.cpp
new file mode 100644
index 0000000..eb1480c
--- /dev/null
+++ b/test/SemaCXX/cxx2a-three-way-comparison.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s
+
+struct A {};
+constexpr int operator<=>(A a, A b) { return 42; }
+static_assert(operator<=>(A(), A()) == 42);
+
+int operator<=>(); // expected-error {{overloaded 'operator<=>' must have at least one parameter of class or enumeration type}}
+int operator<=>(A); // expected-error {{overloaded 'operator<=>' must be a binary operator}}
+int operator<=>(int, int); // expected-error {{overloaded 'operator<=>' must have at least one parameter of class or enumeration type}}
+int operator<=>(A, A, A); // expected-error {{overloaded 'operator<=>' must be a binary operator}}
+int operator<=>(A, A, ...); // expected-error {{overloaded 'operator<=>' cannot be variadic}}
+int operator<=>(int, A = {}); // expected-error {{parameter of overloaded 'operator<=>' cannot have a default argument}}
+
+struct B {
+  int &operator<=>(int);
+  friend int operator<=>(A, B);
+
+  friend int operator<=>(int, int); // expected-error {{overloaded 'operator<=>' must have at least one parameter of class or enumeration type}}
+  void operator<=>(); // expected-error {{overloaded 'operator<=>' must be a binary operator}};
+  void operator<=>(A, ...); // expected-error {{overloaded 'operator<=>' cannot be variadic}}
+  void operator<=>(A, A); // expected-error {{overloaded 'operator<=>' must be a binary operator}};
+};
+
+int &r = B().operator<=>(0);
diff --git a/test/SemaCXX/decl-expr-ambiguity.cpp b/test/SemaCXX/decl-expr-ambiguity.cpp
index 1e31d70..b77e226 100644
--- a/test/SemaCXX/decl-expr-ambiguity.cpp
+++ b/test/SemaCXX/decl-expr-ambiguity.cpp
@@ -125,3 +125,20 @@
   baz b3; // expected-error {{must use 'class' tag to refer to type 'baz' in this scope}}
 }
 }
+
+namespace TemporaryFromFunctionCall {
+  struct A {
+    A(int);
+  };
+  int f();
+  int g(int);
+  namespace N {
+    void x() {
+      // FIXME: For the first and second of these (but not the third), we
+      // should produce a vexing-parse warning.
+      A(f());
+      A(g(int()));
+      A(g(int));
+    }
+  }
+}
diff --git a/test/SemaCXX/decomposed-condition.cpp b/test/SemaCXX/decomposed-condition.cpp
new file mode 100644
index 0000000..ab011f6
--- /dev/null
+++ b/test/SemaCXX/decomposed-condition.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -std=c++1z -Wno-binding-in-condition -verify %s
+
+struct X {
+  bool flag;
+  int data;
+  constexpr explicit operator bool() const {
+    return flag;
+  }
+  constexpr operator int() const {
+    return data;
+  }
+};
+
+namespace CondInIf {
+constexpr int f(X x) {
+  if (auto [ok, d] = x)
+    return d + int(ok);
+  else
+    return d * int(ok);
+  ok = {}; // expected-error {{use of undeclared identifier 'ok'}}
+  d = {};  // expected-error {{use of undeclared identifier 'd'}}
+}
+
+static_assert(f({true, 2}) == 3);
+static_assert(f({false, 2}) == 0);
+
+constexpr char g(char const (&x)[2]) {
+  if (auto &[a, b] = x)
+    return a;
+  else
+    return b;
+
+  if (auto [a, b] = x) // expected-error {{an array type is not allowed here}}
+    ;
+}
+
+static_assert(g("x") == 'x');
+} // namespace CondInIf
+
+namespace CondInSwitch {
+constexpr int f(int n) {
+  switch (X s = {true, n}; auto [ok, d] = s) {
+    s = {};
+  case 0:
+    return int(ok);
+  case 1:
+    return d * 10;
+  case 2:
+    return d * 40;
+  default:
+    return 0;
+  }
+  ok = {}; // expected-error {{use of undeclared identifier 'ok'}}
+  d = {};  // expected-error {{use of undeclared identifier 'd'}}
+  s = {};  // expected-error {{use of undeclared identifier 's'}}
+}
+
+static_assert(f(0) == 1);
+static_assert(f(1) == 10);
+static_assert(f(2) == 80);
+} // namespace CondInSwitch
+
+namespace CondInWhile {
+constexpr int f(int n) {
+  int m = 1;
+  while (auto [ok, d] = X{n > 1, n}) {
+    m *= d;
+    --n;
+  }
+  return m;
+  return ok; // expected-error {{use of undeclared identifier 'ok'}}
+}
+
+static_assert(f(0) == 1);
+static_assert(f(1) == 1);
+static_assert(f(4) == 24);
+} // namespace CondInWhile
+
+namespace CondInFor {
+constexpr int f(int n) {
+  int a = 1, b = 1;
+  for (X x = {true, n}; auto &[ok, d] = x; --d) {
+    if (d < 2)
+      ok = false;
+    else {
+      int x = b;
+      b += a;
+      a = x;
+    }
+  }
+  return b;
+  return d; // expected-error {{use of undeclared identifier 'd'}}
+}
+
+static_assert(f(0) == 1);
+static_assert(f(1) == 1);
+static_assert(f(2) == 2);
+static_assert(f(5) == 8);
+} // namespace CondInFor
diff --git a/test/SemaCXX/deleted-operator.cpp b/test/SemaCXX/deleted-operator.cpp
index f71e83a..64b2b22 100644
--- a/test/SemaCXX/deleted-operator.cpp
+++ b/test/SemaCXX/deleted-operator.cpp
@@ -8,8 +8,8 @@
 int PR10757f() {
   PR10757 a1;
   // FIXME: We get a ridiculous number of "built-in candidate" notes here...
-  if(~a1) {} // expected-error {{overload resolution selected deleted operator}} expected-note 8 {{built-in candidate}}
-  if(a1==a1) {} // expected-error {{overload resolution selected deleted operator}} expected-note 144 {{built-in candidate}}
+  if(~a1) {} // expected-error {{overload resolution selected deleted operator}} expected-note 6-8 {{built-in candidate}}
+  if(a1==a1) {} // expected-error {{overload resolution selected deleted operator}} expected-note 1-144 {{built-in candidate}}
 }
 
 struct DelOpDel {
diff --git a/test/SemaCXX/deprecated.cpp b/test/SemaCXX/deprecated.cpp
index 26f30c9..773085b 100644
--- a/test/SemaCXX/deprecated.cpp
+++ b/test/SemaCXX/deprecated.cpp
@@ -20,7 +20,12 @@
 // expected-warning@-8 {{dynamic exception specifications are deprecated}} expected-note@-8 {{use 'noexcept(false)' instead}}
 #endif
 
-void stuff() {
+void stuff(register int q) {
+#if __cplusplus > 201402L
+  // expected-error@-2 {{ISO C++17 does not allow 'register' storage class specifier}}
+#elif __cplusplus >= 201103L && !defined(NO_DEPRECATED_FLAGS)
+  // expected-warning@-4 {{'register' storage class specifier is deprecated}}
+#endif
   register int n;
 #if __cplusplus > 201402L
   // expected-error@-2 {{ISO C++17 does not allow 'register' storage class specifier}}
@@ -93,3 +98,6 @@
   void g() { c1 = c2; } // expected-note {{implicit copy assignment operator for 'DeprecatedCopy::Dtor' first required here}}
 }
 #endif
+
+# 1 "/usr/include/system-header.h" 1 3
+void system_header_function(void) throw();
diff --git a/test/SemaCXX/dllexport.cpp b/test/SemaCXX/dllexport.cpp
index a3fed70..c09a531 100644
--- a/test/SemaCXX/dllexport.cpp
+++ b/test/SemaCXX/dllexport.cpp
@@ -17,18 +17,18 @@
 
 // Invalid usage.
 __declspec(dllexport) typedef int typedef1;
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllexport) int typedef2;
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef int __declspec(dllexport) typedef3;
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef __declspec(dllexport) void (*FunTy)();
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 enum __declspec(dllexport) Enum {};
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 #if __has_feature(cxx_strong_enums)
 enum class __declspec(dllexport) EnumClass {};
-// expected-warning@-1{{'dllexport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 #endif
 
 
@@ -565,7 +565,7 @@
   __declspec(dllexport)                void privateDef();
 public:
 
-  __declspec(dllexport)                int  Field; // expected-warning{{'dllexport' attribute only applies to variables, functions and classes}}
+  __declspec(dllexport)                int  Field; // expected-warning{{'dllexport' attribute only applies to}}
   __declspec(dllexport) static         int  StaticField;
   __declspec(dllexport) static         int  StaticFieldDef;
   __declspec(dllexport) static  const  int  StaticConstField;
@@ -977,7 +977,7 @@
   __declspec(dllexport)                void privateDef();
 public:
 
-  __declspec(dllexport)                int  Field; // expected-warning{{'dllexport' attribute only applies to variables, functions and classes}}
+  __declspec(dllexport)                int  Field; // expected-warning{{'dllexport' attribute only applies to}}
   __declspec(dllexport) static         int  StaticField;
   __declspec(dllexport) static         int  StaticFieldDef;
   __declspec(dllexport) static  const  int  StaticConstField;
diff --git a/test/SemaCXX/dllimport.cpp b/test/SemaCXX/dllimport.cpp
index 1c59cca..04c33e9 100644
--- a/test/SemaCXX/dllimport.cpp
+++ b/test/SemaCXX/dllimport.cpp
@@ -16,18 +16,18 @@
 
 // Invalid usage.
 __declspec(dllimport) typedef int typedef1;
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllimport) int typedef2;
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef int __declspec(dllimport) typedef3;
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef __declspec(dllimport) void (*FunTy)();
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 enum __declspec(dllimport) Enum {};
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 #if __has_feature(cxx_strong_enums)
 enum class __declspec(dllimport) EnumClass {};
-// expected-warning@-1{{'dllimport' attribute only applies to variables, functions and classes}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 #endif
 
 
@@ -574,7 +574,7 @@
   __declspec(dllimport)                void privateDecl();
 public:
 
-  __declspec(dllimport)                int  Field; // expected-warning{{'dllimport' attribute only applies to variables, functions and classes}}
+  __declspec(dllimport)                int  Field; // expected-warning{{'dllimport' attribute only applies to}}
   __declspec(dllimport) static         int  StaticField;
   __declspec(dllimport) static         int  StaticFieldDef; // expected-note{{attribute is here}}
   __declspec(dllimport) static  const  int  StaticConstField;
@@ -1147,7 +1147,7 @@
   __declspec(dllimport)                void privateDecl();
 public:
 
-  __declspec(dllimport)                int  Field; // expected-warning{{'dllimport' attribute only applies to variables, functions and classes}}
+  __declspec(dllimport)                int  Field; // expected-warning{{'dllimport' attribute only applies to}}
   __declspec(dllimport) static         int  StaticField;
   __declspec(dllimport) static         int  StaticFieldDef; // expected-note{{attribute is here}}
   __declspec(dllimport) static  const  int  StaticConstField;
diff --git a/test/SemaCXX/enum-scoped.cpp b/test/SemaCXX/enum-scoped.cpp
index 3114bca..1fcafb1 100644
--- a/test/SemaCXX/enum-scoped.cpp
+++ b/test/SemaCXX/enum-scoped.cpp
@@ -309,3 +309,8 @@
 
   bool f() { return !f1(); } // expected-error {{invalid argument type 'test11::E2' (aka 'test11::E') to unary expression}}
 }
+
+namespace PR35586 {
+  enum C { R, G, B };
+  enum B { F = (enum C) -1, T}; // this should compile cleanly, it used to assert.
+};
diff --git a/test/SemaCXX/err_typecheck_assign_const.cpp b/test/SemaCXX/err_typecheck_assign_const.cpp
index 7e28125..3725e58 100644
--- a/test/SemaCXX/err_typecheck_assign_const.cpp
+++ b/test/SemaCXX/err_typecheck_assign_const.cpp
@@ -129,3 +129,23 @@
   Func &bar();
   bar()() = 0; // expected-error {{read-only variable is not assignable}}
 }
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+struct OhNo {
+  float4 v;
+  void AssignMe() const { v.x = 1; } // expected-error {{cannot assign to non-static data member within const member function 'AssignMe'}} \
+                                        expected-note {{member function 'OhNo::AssignMe' is declared const here}}
+};
+
+typedef float float4_2 __attribute__((__vector_size__(16)));
+struct OhNo2 {
+  float4_2 v;
+  void AssignMe() const { v[0] = 1; } // expected-error {{cannot assign to non-static data member within const member function 'AssignMe'}} \
+                                        expected-note {{member function 'OhNo2::AssignMe' is declared const here}}
+};
+
+struct OhNo3 {
+  float v[4];
+  void AssignMe() const { v[0] = 1; } // expected-error {{cannot assign to non-static data member within const member function 'AssignMe'}} \
+                                        expected-note {{member function 'OhNo3::AssignMe' is declared const here}}
+};
diff --git a/test/SemaCXX/extern-c.cpp b/test/SemaCXX/extern-c.cpp
index fa6c2b1..1a32bee 100644
--- a/test/SemaCXX/extern-c.cpp
+++ b/test/SemaCXX/extern-c.cpp
@@ -242,3 +242,20 @@
   ExternCStruct3 *q3 = p3;
   ExternCStruct4 *q4 = p4; // expected-error {{ambiguous}}
 }
+
+namespace PR35697 {
+  typedef struct {} *ReturnStruct;
+  extern "C" ReturnStruct PR35697_f();
+  extern "C" ReturnStruct PR35697_v;
+  ReturnStruct PR35697_f();
+  ReturnStruct PR35697_v;
+
+  namespace {
+    extern "C" ReturnStruct PR35697_f();
+    extern "C" ReturnStruct PR35697_v;
+    void q() {
+      extern ReturnStruct PR35697_f();
+      extern ReturnStruct PR35697_v;
+    }
+  }
+}
diff --git a/test/SemaCXX/flexible-array-test.cpp b/test/SemaCXX/flexible-array-test.cpp
index 2d74fa5..f2105ca 100644
--- a/test/SemaCXX/flexible-array-test.cpp
+++ b/test/SemaCXX/flexible-array-test.cpp
@@ -66,6 +66,11 @@
   char c[];
 };
 
+class C {
+  char c[]; // expected-error {{flexible array member 'c' with type 'char []' is not at the end of class}}
+  int s; // expected-note {{next field declaration is here}}
+};
+
 namespace rdar9065507 {
 
 struct StorageBase {
diff --git a/test/SemaCXX/friend2.cpp b/test/SemaCXX/friend2.cpp
index d1d4b62..8eacdeb 100644
--- a/test/SemaCXX/friend2.cpp
+++ b/test/SemaCXX/friend2.cpp
@@ -101,6 +101,34 @@
   friend void func_12(int x = 0);  // expected-error{{friend declaration specifying a default argument must be the only declaration}}
 };
 
+// Friend function with uninstantiated body is still a definition.
+
+template<typename T> struct C20 {
+  friend void func_20() {} // expected-note{{previous definition is here}}
+};
+C20<int> c20i;
+void func_20() {} // expected-error{{redefinition of 'func_20'}}
+
+template<typename T> struct C21a {
+  friend void func_21() {} // expected-note{{previous definition is here}}
+};
+template<typename T> struct C21b {
+  friend void func_21() {} // expected-error{{redefinition of 'func_21'}}
+};
+C21a<int> c21ai;
+C21b<int> c21bi; // expected-note{{in instantiation of template class 'C21b<int>' requested here}}
+
+template<typename T> struct C22a {
+  friend void func_22() {} // expected-note{{previous definition is here}}
+};
+template<typename T> struct C22b {
+  friend void func_22();
+};
+C22a<int> c22ai;
+C22b<int> c22bi;
+void func_22() {} // expected-error{{redefinition of 'func_22'}}
+
+
 
 namespace pr22307 {
 
diff --git a/test/SemaCXX/has_unique_object_reps_member_ptr.cpp b/test/SemaCXX/has_unique_object_reps_member_ptr.cpp
new file mode 100644
index 0000000..b8e27f8
--- /dev/null
+++ b/test/SemaCXX/has_unique_object_reps_member_ptr.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-linux-pc -DIS64 -fsyntax-only -verify -std=c++17 %s 
+// RUN: %clang_cc1 -triple x86_64-windows-pc -DIS64 -fsyntax-only -verify -std=c++17 %s
+// RUN: %clang_cc1 -triple i386-linux-pc -fsyntax-only -verify -std=c++17 %s
+// RUN: %clang_cc1 -triple i386-windows-pc -DW32 -fsyntax-only -verify -std=c++17 %s
+// expected-no-diagnostics
+
+struct Base {};
+struct A : virtual Base {
+  virtual void n() {}
+};
+
+auto p = &A::n;
+static_assert(__has_unique_object_representations(decltype(p)));
+
+struct B {
+  decltype(p) x;
+  int b;
+#ifdef IS64
+  // required on 64 bit to fill out the tail padding.
+  int c;
+#endif
+};
+static_assert(__has_unique_object_representations(B));
+
+struct C { // has padding on Win32, but nothing else.
+  decltype(p) x;
+};
+#ifdef W32
+static_assert(!__has_unique_object_representations(C));
+#else
+static_assert(__has_unique_object_representations(C));
+#endif
diff --git a/test/SemaCXX/init-expr-crash.cpp b/test/SemaCXX/init-expr-crash.cpp
index 407da78..201ab03 100644
--- a/test/SemaCXX/init-expr-crash.cpp
+++ b/test/SemaCXX/init-expr-crash.cpp
@@ -29,3 +29,11 @@
     return 0;
   }
 };
+
+// This test checks for a crash that resulted from us miscomputing the
+// dependence of a nested initializer list.
+template<int> struct X {
+  static constexpr int n = 4;
+  static constexpr int a[1][1] = {n};
+};
+
diff --git a/test/SemaCXX/internal_linkage.cpp b/test/SemaCXX/internal_linkage.cpp
index d5cc676..921a90a 100644
--- a/test/SemaCXX/internal_linkage.cpp
+++ b/test/SemaCXX/internal_linkage.cpp
@@ -5,7 +5,7 @@
 class A;
 class __attribute__((internal_linkage)) A {
 public:
-  int x __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+  int x __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to variables, functions, and classes}}
   static int y __attribute__((internal_linkage));
   void f1() __attribute__((internal_linkage));
   void f2() __attribute__((internal_linkage)) {}
@@ -16,7 +16,7 @@
   ~A() __attribute__((internal_linkage)) {}
   A& operator=(const A&) __attribute__((internal_linkage)) { return *this; }
   struct {
-    int z  __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+    int z  __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to}}
   };
 };
 
@@ -24,14 +24,14 @@
 
 __attribute__((internal_linkage)) int A::zz; // expected-error{{'internal_linkage' attribute does not appear on the first declaration of 'zz'}}
 
-namespace Z __attribute__((internal_linkage)) { // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+namespace Z __attribute__((internal_linkage)) { // expected-warning{{'internal_linkage' attribute only applies to}}
 }
 
 __attribute__((internal_linkage("foo"))) int g() {} // expected-error{{'internal_linkage' attribute takes no arguments}}
 
 [[clang::internal_linkage]] int h() {}
 
-enum struct __attribute__((internal_linkage)) E { // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+enum struct __attribute__((internal_linkage)) E { // expected-warning{{'internal_linkage' attribute only applies to}}
   a = 1,
   b = 2
 };
diff --git a/test/SemaCXX/lambda-expressions.cpp b/test/SemaCXX/lambda-expressions.cpp
index efbb476..4565345 100644
--- a/test/SemaCXX/lambda-expressions.cpp
+++ b/test/SemaCXX/lambda-expressions.cpp
@@ -583,3 +583,43 @@
     (void) [] { X<N> x; };
   }
 }
+
+namespace ConversionOperatorDoesNotHaveDeducedReturnType {
+  auto x = [](int){};
+  auto y = [](auto) -> void {};
+  using T = decltype(x);
+  using U = decltype(y);
+  using ExpectedTypeT = void (*)(int);
+  template<typename T>
+    using ExpectedTypeU = void (*)(T);
+
+  struct X {
+    friend T::operator ExpectedTypeT() const;
+
+    // Formally, this is invalid, because the return type of the conversion
+    // function for a generic lambda expression is an unspecified decltype
+    // type, which this should not match. However, this declaration is
+    // functionally equivalent to that one, so we're permitted to choose to
+    // accept this.
+    template<typename T>
+      friend U::operator ExpectedTypeU<T>() const;
+  };
+
+  // This used to crash in return type deduction for the conversion opreator.
+  struct A { int n; void f() { +[](decltype(n)) {}; } };
+}
+
+namespace TypoCorrection {
+template <typename T> struct X {};
+// expected-note@-1 {{template parameter is declared here}}
+
+template <typename T>
+void Run(const int& points) {
+// expected-note@-1 {{'points' declared here}}
+  auto outer_lambda = []() {
+    auto inner_lambda = [](const X<Points>&) {};
+    // expected-error@-1 {{use of undeclared identifier 'Points'; did you mean 'points'?}}
+    // expected-error@-2 {{template argument for template type parameter must be a type}}
+  };
+}
+}
diff --git a/test/SemaCXX/large-array-init.cpp b/test/SemaCXX/large-array-init.cpp
new file mode 100644
index 0000000..ba73428
--- /dev/null
+++ b/test/SemaCXX/large-array-init.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -S -o %t.ll -mllvm -debug-only=exprconstant %s 2>&1 | \
+// RUN:     FileCheck %s
+// REQUIRES: asserts
+
+struct S { int i; };
+
+static struct S arr[100000000] = {{ 0 }};
+// CHECK: The number of elements to initialize: 1.
+
+struct S *foo() { return arr; }
diff --git a/test/SemaCXX/microsoft-vs-float128.cpp b/test/SemaCXX/microsoft-vs-float128.cpp
new file mode 100644
index 0000000..d271e47
--- /dev/null
+++ b/test/SemaCXX/microsoft-vs-float128.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fms-compatibility -fms-extensions -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -triple i686-pc-win32 -fms-compatibility -fms-extensions -fsyntax-only -verify -std=c++11 -DMS %s
+
+template <bool> struct enable_if {};
+template<> struct enable_if<true> { typedef void type; };
+
+template <typename, typename> struct is_same { static constexpr bool value = false; };
+template <typename T> struct is_same<T, T> { static constexpr bool value = true; };
+
+
+
+
+struct S {
+  // The only numeric types S can be converted to is __int128 and __float128.
+  template <typename T, typename = typename enable_if<
+                            !((__is_integral(T) && sizeof(T) != 16) ||
+                              is_same<T, float>::value ||
+                              is_same<T, double>::value ||
+                              is_same<T, long double>::value)>::type>
+  operator T() { return T(); }
+};
+
+void f() {
+#ifdef MS
+  // When targeting Win32, __float128 and __int128 do not exist, so the S
+  // object cannot be converted to anything usable in the expression.
+  // expected-error@+2{{invalid operands to binary expression ('S' and 'double')}}
+#endif
+  double d = S() + 1.0;
+#ifndef MS
+  // expected-error@-2{{use of overloaded operator '+' is ambiguous}}
+  // expected-note@-3 36{{built-in candidate operator+}}
+#endif
+}
diff --git a/test/SemaCXX/missing-members.cpp b/test/SemaCXX/missing-members.cpp
index 96bed07..61dddcb 100644
--- a/test/SemaCXX/missing-members.cpp
+++ b/test/SemaCXX/missing-members.cpp
@@ -37,3 +37,17 @@
   using A::B::C::f; // expected-error {{no member named 'f' in 'A::B::C'}}
   
 };
+
+struct S1 {};
+
+struct S2 : S1 {};
+
+struct S3 : S2 {
+  void run();
+};
+
+struct S4: S3 {};
+
+void test(S4 *ptr) {
+  ptr->S1::run();  // expected-error {{no member named 'run' in 'S1'}}
+}
diff --git a/test/SemaCXX/modules-ts.cppm b/test/SemaCXX/modules-ts.cppm
index f64a4c0..c07ee82 100644
--- a/test/SemaCXX/modules-ts.cppm
+++ b/test/SemaCXX/modules-ts.cppm
@@ -52,10 +52,6 @@
 export { ; }
 export { static_assert(true); }
 
-// FIXME: These diagnostics are not very good.
-export import foo; // expected-error {{expected unqualified-id}}
-export { import foo; } // expected-error {{expected unqualified-id}}
-
 int use_b = b;
 int use_n = n; // FIXME: this should not be visible, because it is not exported
 
diff --git a/test/SemaCXX/ms-uuid.cpp b/test/SemaCXX/ms-uuid.cpp
index e29dda8..c26ea68 100644
--- a/test/SemaCXX/ms-uuid.cpp
+++ b/test/SemaCXX/ms-uuid.cpp
@@ -23,7 +23,7 @@
 // clang-cl implements the following simpler (but largely compatible) behavior
 // instead:
 // * [] and __declspec uuids have the same behavior.
-// * If there are several uuids on a a class (no matter if on the same decl or
+// * If there are several uuids on a class (no matter if on the same decl or
 //   on several decls), it is an error if they don't match.
 // * Having several uuids that match is ok.
 
diff --git a/test/SemaCXX/new-array-size-conv.cpp b/test/SemaCXX/new-array-size-conv.cpp
index dbdd4bd..36b2f23 100644
--- a/test/SemaCXX/new-array-size-conv.cpp
+++ b/test/SemaCXX/new-array-size-conv.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -fsyntax-only -pedantic -verify -std=gnu++98 %s
 // RUN: %clang_cc1 -fsyntax-only -pedantic -verify -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -pedantic -verify -std=c++11 %s
 
diff --git a/test/SemaCXX/new-delete.cpp b/test/SemaCXX/new-delete.cpp
index cb0d030..870a592 100644
--- a/test/SemaCXX/new-delete.cpp
+++ b/test/SemaCXX/new-delete.cpp
@@ -80,12 +80,21 @@
   (void)new int[1.1];
 #if __cplusplus <= 199711L
   // expected-error@-2 {{array size expression must have integral or enumeration type, not 'double'}}
-#else
+#elif __cplusplus <= 201103L
   // expected-error@-4 {{array size expression must have integral or unscoped enumeration type, not 'double'}}
+#else
+  // expected-warning@-6 {{implicit conversion from 'double' to 'unsigned int' changes value from 1.1 to 1}}
 #endif
 
-  (void)new int[1][i]; // expected-error {{only the first dimension}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
-  (void)new (int[1][i]); // expected-error {{only the first dimension}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
+  (void)new int[1][i];  // expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
+  (void)new (int[1][i]); // expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
+#if __cplusplus <= 201103L
+  // expected-error@-3 {{only the first dimension}}
+  // expected-error@-3 {{only the first dimension}}
+#else
+  // expected-error@-6 {{array size is not a constant expression}}
+  // expected-error@-6 {{array size is not a constant expression}}
+#endif
   (void)new (int[i]); // expected-warning {{when type is in parentheses}}
   (void)new int(*(S*)0); // expected-error {{no viable conversion from 'S' to 'int'}}
   (void)new int(1, 2); // expected-error {{excess elements in scalar initializer}}
@@ -94,13 +103,20 @@
   (void)new const int; // expected-error {{default initialization of an object of const type 'const int'}}
   (void)new float*(ip); // expected-error {{cannot initialize a new value of type 'float *' with an lvalue of type 'int *'}}
   // Undefined, but clang should reject it directly.
-  (void)new int[-1]; // expected-error {{array size is negative}}
+  (void)new int[-1];
+#if __cplusplus <= 201103L
+  // expected-error@-2 {{array size is negative}}
+#else
+  // expected-error@-4 {{array is too large}}
+#endif
   (void)new int[2000000000]; // expected-error {{array is too large}}
   (void)new int[*(S*)0];
 #if __cplusplus <= 199711L
   // expected-error@-2 {{array size expression must have integral or enumeration type, not 'S'}}
-#else
+#elif __cplusplus <= 201103L
   // expected-error@-4 {{array size expression must have integral or unscoped enumeration type, not 'S'}}
+#else
+  // expected-error@-6 {{converting 'S' to incompatible type}}
 #endif
 
   (void)::S::new int; // expected-error {{expected unqualified-id}}
diff --git a/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.cpp b/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.cpp
new file mode 100644
index 0000000..ee5b0c4
--- /dev/null
+++ b/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wsystem-headers -isystem %S %s
+
+#include <no-warn-user-defined-literals-in-system-headers.h>
+
+void operator "" bar(long double); // expected-warning{{user-defined literal suffixes not starting with '_' are reserved}}
diff --git a/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.h b/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.h
new file mode 100644
index 0000000..d1e2583
--- /dev/null
+++ b/test/SemaCXX/no-warn-user-defined-literals-in-system-headers.h
@@ -0,0 +1,2 @@
+// Header for no-warn-user-defined-literals-in-system-headers.cpp
+void operator "" foo (const char *);
diff --git a/test/SemaCXX/overload-call.cpp b/test/SemaCXX/overload-call.cpp
index 0e3a9ee..0c4bba5 100644
--- a/test/SemaCXX/overload-call.cpp
+++ b/test/SemaCXX/overload-call.cpp
@@ -658,3 +658,11 @@
   // expected-note@-5 {{candidate function}}
 #endif
 }
+
+namespace ProduceNotesAfterSFINAEFailure {
+  struct A {
+    template<typename T, typename U = typename T::x> A(T); // expected-warning 0-1{{extension}}
+  };
+  void f(void*, A); // expected-note {{candidate function not viable}}
+  void g() { f(1, 2); } // expected-error {{no matching function}}
+}
diff --git a/test/SemaCXX/return-noreturn.cpp b/test/SemaCXX/return-noreturn.cpp
index cdd96e6..cfeddff 100644
--- a/test/SemaCXX/return-noreturn.cpp
+++ b/test/SemaCXX/return-noreturn.cpp
@@ -143,7 +143,7 @@
 } // expected-warning {{control reaches end of non-void function}}
 
 void PR9412_f() {
-    PR9412_t<PR9412_Exact>(); // expected-note {{in instantiation of function template specialization 'PR9412_t<PR9412_MatchType::PR9412_Exact>' requested here}}
+    PR9412_t<PR9412_Exact>(); // expected-note {{in instantiation of function template specialization 'PR9412_t<PR9412_Exact>' requested here}}
 }
 
 struct NoReturn {
diff --git a/test/SemaCXX/self-comparison.cpp b/test/SemaCXX/self-comparison.cpp
index fb15ec8..ac129b6 100644
--- a/test/SemaCXX/self-comparison.cpp
+++ b/test/SemaCXX/self-comparison.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++2a
 
 int foo(int x) {
   return x == x; // expected-warning {{self-comparison always evaluates to true}}
@@ -21,3 +21,22 @@
     return a == c; // expected-warning {{array comparison always evaluates to false}}
   }
 };
+
+namespace NA { extern "C" int x[3]; }
+namespace NB { extern "C" int x[3]; }
+bool k = NA::x == NB::x; // expected-warning {{self-comparison always evaluates to true}}
+
+template<typename T> struct Y { static inline int n; };
+bool f() {
+  return
+    Y<int>::n == Y<int>::n || // expected-warning {{self-comparison always evaluates to true}}
+    Y<void>::n == Y<int>::n;
+}
+template<typename T, typename U>
+bool g() {
+  // FIXME: Ideally we'd produce a self-comparison warning on the first of these.
+  return
+    Y<T>::n == Y<T>::n ||
+    Y<T>::n == Y<U>::n;
+}
+template bool g<int, int>(); // should not produce any warnings
diff --git a/test/SemaCXX/sourceranges.cpp b/test/SemaCXX/sourceranges.cpp
index 2f8eb2f..58772a0 100644
--- a/test/SemaCXX/sourceranges.cpp
+++ b/test/SemaCXX/sourceranges.cpp
@@ -1,52 +1,139 @@
-// RUN: %clang_cc1 -triple i686-mingw32 -ast-dump %s | FileCheck %s
-
-template<class T>
-class P {
- public:
-  P(T* t) {}
-};
-
-namespace foo {
-class A { public: A(int = 0) {} };
-enum B {};
-typedef int C;
-}
-
-// CHECK: VarDecl {{0x[0-9a-fA-F]+}} <line:16:1, col:36> col:15 ImplicitConstrArray 'foo::A [2]'
-static foo::A ImplicitConstrArray[2];
-
-int main() {
-  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::A *'
-  P<foo::A> p14 = new foo::A;
-  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::B *'
-  P<foo::B> p24 = new foo::B;
-  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::C *'
-  P<foo::C> pr4 = new foo::C;
-}
-
-foo::A getName() {
-  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:10, col:17> 'foo::A'
-  return foo::A();
-}
-
-void destruct(foo::A *a1, foo::A *a2, P<int> *p1) {
-  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:8> '<bound member function type>' ->~A
-  a1->~A();
-  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:16> '<bound member function type>' ->~A
-  a2->foo::A::~A();
-  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:13> '<bound member function type>' ->~P
-  p1->~P<int>();
-}
-
-struct D {
-  D(int);
-  ~D();
-};
-
-void construct() {
-  using namespace foo;
-  A a = A(12);
-  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'class foo::A' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
-  D d = D(12);
-  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'struct D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
-}
+// RUN: %clang_cc1 -triple i686-mingw32 -ast-dump %s | FileCheck %s
+// RUN: %clang_cc1 -triple i686-mingw32 -std=c++1z -ast-dump %s | FileCheck %s -check-prefix=CHECK-1Z
+
+template<class T>
+class P {
+ public:
+  P(T* t) {}
+};
+
+namespace foo {
+class A { public: A(int = 0) {} };
+enum B {};
+typedef int C;
+}
+
+// CHECK: VarDecl {{0x[0-9a-fA-F]+}} <line:[[@LINE+1]]:1, col:36> col:15 ImplicitConstrArray 'foo::A [2]'
+static foo::A ImplicitConstrArray[2];
+
+int main() {
+  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::A *'
+  P<foo::A> p14 = new foo::A;
+  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::B *'
+  P<foo::B> p24 = new foo::B;
+  // CHECK: CXXNewExpr {{0x[0-9a-fA-F]+}} <col:19, col:28> 'foo::C *'
+  P<foo::C> pr4 = new foo::C;
+}
+
+foo::A getName() {
+  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:10, col:17> 'foo::A'
+  return foo::A();
+}
+
+void destruct(foo::A *a1, foo::A *a2, P<int> *p1) {
+  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:8> '<bound member function type>' ->~A
+  a1->~A();
+  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:16> '<bound member function type>' ->~A
+  a2->foo::A::~A();
+  // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:13> '<bound member function type>' ->~P
+  p1->~P<int>();
+}
+
+struct D {
+  D(int);
+  ~D();
+};
+
+void construct() {
+  using namespace foo;
+  A a = A(12);
+  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'foo::A' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
+  D d = D(12);
+  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
+}
+
+void abort() __attribute__((noreturn));
+
+namespace std {
+typedef decltype(sizeof(int)) size_t;
+
+template <typename E> struct initializer_list {
+  const E *p;
+  size_t n;
+  initializer_list(const E *p, size_t n) : p(p), n(n) {}
+};
+
+template <typename F, typename S> struct pair {
+  F f;
+  S s;
+  pair(const F &f, const S &s) : f(f), s(s) {}
+};
+
+struct string {
+  const char *str;
+  string() { abort(); }
+  string(const char *S) : str(S) {}
+  ~string() { abort(); }
+};
+
+template<typename K, typename V>
+struct map {
+  using T = pair<K, V>;
+  map(initializer_list<T> i, const string &s = string()) {}
+  ~map() { abort(); }
+};
+
+}; // namespace std
+
+#if __cplusplus >= 201703L
+// CHECK-1Z: FunctionDecl {{.*}} construct_with_init_list
+std::map<int, int> construct_with_init_list() {
+  // CHECK-1Z-NEXT: CompoundStmt
+  // CHECK-1Z-NEXT: ReturnStmt {{.*}} <line:[[@LINE+5]]:3, col:35
+  // CHECK-1Z-NEXT: ExprWithCleanups {{.*}} <col:10, col:35
+  // CHECK-1Z-NEXT: CXXBindTemporaryExpr {{.*}} <col:10, col:35
+  // CHECK-1Z-NEXT: CXXTemporaryObjectExpr {{.*}} <col:10, col:35
+  // CHECK-1Z-NEXT: CXXStdInitializerListExpr {{.*}} <col:28, col:35
+  return std::map<int, int>{{0, 0}};
+}
+
+// CHECK-1Z: NamespaceDecl {{.*}} in_class_init
+namespace in_class_init {
+  struct A {};
+
+  // CHECK-1Z: CXXRecordDecl {{.*}} struct B definition
+  struct B {
+    // CHECK-1Z: FieldDecl {{.*}} a 'in_class_init::A'
+    // CHECK-1Z-NEXT: InitListExpr {{.*}} <col:11, col:12
+    A a = {};
+  };
+}
+
+// CHECK-1Z: NamespaceDecl {{.*}} delegating_constructor_init
+namespace delegating_constructor_init {
+  struct A {};
+
+  struct B : A {
+    A a;
+    B(A a) : a(a) {}
+  };
+
+  // CHECK-1Z: CXXRecordDecl {{.*}} struct C definition
+  struct C : B {
+    // CHECK-1Z: CXXConstructorDecl {{.*}} C
+    // CHECK-1Z-NEXT: CXXCtorInitializer 'delegating_constructor_init::B'
+    // CHECK-1Z-NEXT: CXXConstructExpr {{.*}} <col:11, col:15
+    // CHECK-1Z-NEXT: InitListExpr {{.*}} <col:13, col:14
+    C() : B({}) {};
+  };
+}
+
+// CHECK-1Z: NamespaceDecl {{.*}} new_init
+namespace new_init {
+  void A() {
+    // CHECK-1Z: CXXNewExpr {{.*}} <line:[[@LINE+2]]:5, col:14
+    // CHECK-1Z-NEXT: InitListExpr {{.*}} <col:12, col:14
+    new int{0};
+  }
+}
+#endif
diff --git a/test/SemaCXX/template-default-param-through-using.cpp b/test/SemaCXX/template-default-param-through-using.cpp
new file mode 100644
index 0000000..4bf26d5
--- /dev/null
+++ b/test/SemaCXX/template-default-param-through-using.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// expected-no-diagnostics
+namespace llvm {
+  template<typename T > struct StringSet;
+  template<int I > struct Int;
+  template <typename Inner, template <typename> class Outer>
+    struct TemplTempl;
+}
+
+namespace lld {
+  using llvm::StringSet;
+  using llvm::Int;
+  using llvm::TemplTempl;
+};
+
+namespace llvm {
+  template<typename T > struct StringSet;
+}
+
+template<typename T> struct Temp{};
+
+namespace llvm {
+  template<typename T = int> struct StringSet{};
+  template<int I = 5> struct Int{};
+  template <typename Inner, template <typename> class Outer = Temp>
+    struct TemplTempl{};
+};
+
+namespace lld {
+  StringSet<> s;
+  Int<> i;
+  TemplTempl<int> tt;
+}
diff --git a/test/SemaCXX/type-traits.cpp b/test/SemaCXX/type-traits.cpp
index 5879a77..ca96503 100644
--- a/test/SemaCXX/type-traits.cpp
+++ b/test/SemaCXX/type-traits.cpp
@@ -2225,6 +2225,7 @@
 
   // PR25513
   { int arr[F(__is_constructible(int(int)))]; }
+  { int arr[T(__is_constructible(int const &, long))]; }
 
   { int arr[T(__is_constructible(ACompleteType))]; }
   { int arr[T(__is_nothrow_constructible(ACompleteType))]; }
@@ -2275,6 +2276,47 @@
   { int arr[F(__is_trivially_constructible(const volatile void))]; }
 }
 
+template <class T, class RefType = T &>
+struct ConvertsToRef {
+  operator RefType() const { return static_cast<RefType>(obj); }
+  mutable T obj = 42;
+};
+
+void reference_binds_to_temporary_checks() {
+  { int arr[F((__reference_binds_to_temporary(int &, int &)))]; }
+  { int arr[F((__reference_binds_to_temporary(int &, int &&)))]; }
+
+  { int arr[F((__reference_binds_to_temporary(int const &, int &)))]; }
+  { int arr[F((__reference_binds_to_temporary(int const &, int const &)))]; }
+  { int arr[F((__reference_binds_to_temporary(int const &, int &&)))]; }
+
+  { int arr[F((__reference_binds_to_temporary(int &, long &)))]; } // doesn't construct
+  { int arr[T((__reference_binds_to_temporary(int const &, long &)))]; }
+  { int arr[T((__reference_binds_to_temporary(int const &, long &&)))]; }
+  { int arr[T((__reference_binds_to_temporary(int &&, long &)))]; }
+
+  using LRef = ConvertsToRef<int, int &>;
+  using RRef = ConvertsToRef<int, int &&>;
+  using CLRef = ConvertsToRef<int, const int &>;
+  using LongRef = ConvertsToRef<long, long &>;
+  { int arr[T((__is_constructible(int &, LRef)))]; }
+  { int arr[F((__reference_binds_to_temporary(int &, LRef)))]; }
+
+  { int arr[T((__is_constructible(int &&, RRef)))]; }
+  { int arr[F((__reference_binds_to_temporary(int &&, RRef)))]; }
+
+  { int arr[T((__is_constructible(int const &, CLRef)))]; }
+  { int arr[F((__reference_binds_to_temporary(int &&, CLRef)))]; }
+
+  { int arr[T((__is_constructible(int const &, LongRef)))]; }
+  { int arr[T((__reference_binds_to_temporary(int const &, LongRef)))]; }
+
+  // Test that it doesn't accept non-reference types as input.
+  { int arr[F((__reference_binds_to_temporary(int, long)))]; }
+
+  { int arr[T((__reference_binds_to_temporary(const int &, long)))]; }
+}
+
 void array_rank() {
   int t01[T(__array_rank(IntAr) == 1)];
   int t02[T(__array_rank(ConstIntArAr) == 2)];
@@ -2352,3 +2394,321 @@
   { int arr[F(__is_trivially_destructible(void))]; }
   { int arr[F(__is_trivially_destructible(const volatile void))]; }
 }
+
+// Instantiation of __has_unique_object_representations
+template <typename T>
+struct has_unique_object_representations {
+  static const bool value = __has_unique_object_representations(T);
+};
+
+static_assert(!has_unique_object_representations<void>::value, "void is never unique");
+static_assert(!has_unique_object_representations<const void>::value, "void is never unique");
+static_assert(!has_unique_object_representations<volatile void>::value, "void is never unique");
+static_assert(!has_unique_object_representations<const volatile void>::value, "void is never unique");
+
+static_assert(has_unique_object_representations<int>::value, "integrals are");
+static_assert(has_unique_object_representations<const int>::value, "integrals are");
+static_assert(has_unique_object_representations<volatile int>::value, "integrals are");
+static_assert(has_unique_object_representations<const volatile int>::value, "integrals are");
+
+static_assert(has_unique_object_representations<void *>::value, "as are pointers");
+static_assert(has_unique_object_representations<const void *>::value, "as are pointers");
+static_assert(has_unique_object_representations<volatile void *>::value, "are pointers");
+static_assert(has_unique_object_representations<const volatile void *>::value, "as are pointers");
+
+static_assert(has_unique_object_representations<int *>::value, "as are pointers");
+static_assert(has_unique_object_representations<const int *>::value, "as are pointers");
+static_assert(has_unique_object_representations<volatile int *>::value, "as are pointers");
+static_assert(has_unique_object_representations<const volatile int *>::value, "as are pointers");
+
+class C {};
+using FP = int (*)(int);
+using PMF = int (C::*)(int);
+using PMD = int C::*;
+
+static_assert(has_unique_object_representations<FP>::value, "even function pointers");
+static_assert(has_unique_object_representations<const FP>::value, "even function pointers");
+static_assert(has_unique_object_representations<volatile FP>::value, "even function pointers");
+static_assert(has_unique_object_representations<const volatile FP>::value, "even function pointers");
+
+static_assert(has_unique_object_representations<PMF>::value, "and pointer to members");
+static_assert(has_unique_object_representations<const PMF>::value, "and pointer to members");
+static_assert(has_unique_object_representations<volatile PMF>::value, "and pointer to members");
+static_assert(has_unique_object_representations<const volatile PMF>::value, "and pointer to members");
+
+static_assert(has_unique_object_representations<PMD>::value, "and pointer to members");
+static_assert(has_unique_object_representations<const PMD>::value, "and pointer to members");
+static_assert(has_unique_object_representations<volatile PMD>::value, "and pointer to members");
+static_assert(has_unique_object_representations<const volatile PMD>::value, "and pointer to members");
+
+static_assert(has_unique_object_representations<bool>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<char>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<signed char>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<unsigned char>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<short>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<unsigned short>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<int>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<unsigned int>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<long>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<unsigned long>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<long long>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<unsigned long long>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<wchar_t>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<char16_t>::value, "yes, all integral types");
+static_assert(has_unique_object_representations<char32_t>::value, "yes, all integral types");
+
+static_assert(!has_unique_object_representations<void>::value, "but not void!");
+static_assert(!has_unique_object_representations<decltype(nullptr)>::value, "or nullptr_t");
+static_assert(!has_unique_object_representations<float>::value, "definitely not Floating Point");
+static_assert(!has_unique_object_representations<double>::value, "definitely not Floating Point");
+static_assert(!has_unique_object_representations<long double>::value, "definitely not Floating Point");
+
+struct NoPadding {
+  int a;
+  int b;
+};
+
+static_assert(has_unique_object_representations<NoPadding>::value, "types without padding are");
+
+struct InheritsFromNoPadding : NoPadding {
+  int c;
+  int d;
+};
+
+static_assert(has_unique_object_representations<InheritsFromNoPadding>::value, "types without padding are");
+
+struct VirtuallyInheritsFromNoPadding : virtual NoPadding {
+  int c;
+  int d;
+};
+
+static_assert(!has_unique_object_representations<VirtuallyInheritsFromNoPadding>::value, "No virtual inheritence");
+
+struct Padding {
+  char a;
+  int b;
+};
+
+//static_assert(!has_unique_object_representations<Padding>::value, "but not with padding");
+
+struct InheritsFromPadding : Padding {
+  int c;
+  int d;
+};
+
+static_assert(!has_unique_object_representations<InheritsFromPadding>::value, "or its subclasses");
+
+struct TailPadding {
+  int a;
+  char b;
+};
+
+static_assert(!has_unique_object_representations<TailPadding>::value, "even at the end");
+
+struct TinyStruct {
+  char a;
+};
+
+static_assert(has_unique_object_representations<TinyStruct>::value, "Should be no padding");
+
+struct InheritsFromTinyStruct : TinyStruct {
+  int b;
+};
+
+static_assert(!has_unique_object_representations<InheritsFromTinyStruct>::value, "Inherit causes padding");
+
+union NoPaddingUnion {
+  int a;
+  unsigned int b;
+};
+
+static_assert(has_unique_object_representations<NoPaddingUnion>::value, "unions follow the same rules as structs");
+
+union PaddingUnion {
+  int a;
+  long long b;
+};
+
+static_assert(!has_unique_object_representations<PaddingUnion>::value, "unions follow the same rules as structs");
+
+struct NotTriviallyCopyable {
+  int x;
+  NotTriviallyCopyable(const NotTriviallyCopyable &) {}
+};
+
+static_assert(!has_unique_object_representations<NotTriviallyCopyable>::value, "must be trivially copyable");
+
+struct HasNonUniqueMember {
+  float x;
+};
+
+static_assert(!has_unique_object_representations<HasNonUniqueMember>::value, "all members must be unique");
+
+enum ExampleEnum { xExample,
+                   yExample };
+enum LLEnum : long long { xLongExample,
+                          yLongExample };
+
+static_assert(has_unique_object_representations<ExampleEnum>::value, "Enums are integrals, so unique!");
+static_assert(has_unique_object_representations<LLEnum>::value, "Enums are integrals, so unique!");
+
+enum class ExampleEnumClass { xExample,
+                              yExample };
+enum class LLEnumClass : long long { xLongExample,
+                                     yLongExample };
+
+static_assert(has_unique_object_representations<ExampleEnumClass>::value, "Enums are integrals, so unique!");
+static_assert(has_unique_object_representations<LLEnumClass>::value, "Enums are integrals, so unique!");
+
+// because references aren't trivially copyable.
+static_assert(!has_unique_object_representations<int &>::value, "No references!");
+static_assert(!has_unique_object_representations<const int &>::value, "No references!");
+static_assert(!has_unique_object_representations<volatile int &>::value, "No references!");
+static_assert(!has_unique_object_representations<const volatile int &>::value, "No references!");
+static_assert(!has_unique_object_representations<Empty>::value, "No empty types!");
+static_assert(!has_unique_object_representations<EmptyUnion>::value, "No empty types!");
+
+class Compressed : Empty {
+  int x;
+};
+
+static_assert(has_unique_object_representations<Compressed>::value, "But inheriting from one is ok");
+
+class EmptyInheritor : Compressed {};
+
+static_assert(has_unique_object_representations<EmptyInheritor>::value, "As long as the base has items, empty is ok");
+
+class Dynamic {
+  virtual void A();
+  int i;
+};
+
+static_assert(!has_unique_object_representations<Dynamic>::value, "Dynamic types are not valid");
+
+class InheritsDynamic : Dynamic {
+  int j;
+};
+
+static_assert(!has_unique_object_representations<InheritsDynamic>::value, "Dynamic types are not valid");
+
+static_assert(has_unique_object_representations<int[42]>::value, "Arrays are fine, as long as their value type is");
+static_assert(has_unique_object_representations<int[]>::value, "Arrays are fine, as long as their value type is");
+static_assert(has_unique_object_representations<int[][42]>::value, "Arrays are fine, as long as their value type is");
+static_assert(!has_unique_object_representations<double[42]>::value, "So no array of doubles!");
+static_assert(!has_unique_object_representations<double[]>::value, "So no array of doubles!");
+static_assert(!has_unique_object_representations<double[][42]>::value, "So no array of doubles!");
+
+struct __attribute__((aligned(16))) WeirdAlignment {
+  int i;
+};
+union __attribute__((aligned(16))) WeirdAlignmentUnion {
+  int i;
+};
+static_assert(!has_unique_object_representations<WeirdAlignment>::value, "Alignment causes padding");
+static_assert(!has_unique_object_representations<WeirdAlignmentUnion>::value, "Alignment causes padding");
+static_assert(!has_unique_object_representations<WeirdAlignment[42]>::value, "Also no arrays that have padding");
+
+static_assert(!has_unique_object_representations<int(int)>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) volatile>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const volatile>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) volatile &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const volatile &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) volatile &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int) const volatile &&>::value, "Functions are not unique");
+
+static_assert(!has_unique_object_representations<int(int, ...)>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) volatile>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const volatile>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) volatile &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const volatile &>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) volatile &&>::value, "Functions are not unique");
+static_assert(!has_unique_object_representations<int(int, ...) const volatile &&>::value, "Functions are not unique");
+
+void foo(){
+  static auto lambda = []() {};
+  static_assert(!has_unique_object_representations<decltype(lambda)>::value, "Lambdas follow struct rules");
+  int i;
+  static auto lambda2 = [i]() {};
+  static_assert(has_unique_object_representations<decltype(lambda2)>::value, "Lambdas follow struct rules");
+}
+
+struct PaddedBitfield {
+  char c : 6;
+  char d : 1;
+};
+
+struct UnPaddedBitfield {
+  char c : 6;
+  char d : 2;
+};
+
+struct AlignedPaddedBitfield {
+  char c : 6;
+  __attribute__((aligned(1)))
+  char d : 2;
+};
+
+static_assert(!has_unique_object_representations<PaddedBitfield>::value, "Bitfield padding");
+static_assert(has_unique_object_representations<UnPaddedBitfield>::value, "Bitfield padding");
+static_assert(!has_unique_object_representations<AlignedPaddedBitfield>::value, "Bitfield padding");
+
+struct BoolBitfield {
+  bool b : 8;
+};
+
+static_assert(has_unique_object_representations<BoolBitfield>::value, "Bitfield bool");
+
+struct BoolBitfield2 {
+  bool b : 16;
+};
+
+static_assert(!has_unique_object_representations<BoolBitfield2>::value, "Bitfield bool");
+
+struct GreaterSizeBitfield {
+  //expected-warning@+1 {{width of bit-field 'n'}}
+  int n : 1024;
+};
+
+static_assert(sizeof(GreaterSizeBitfield) == 128, "Bitfield Size");
+static_assert(!has_unique_object_representations<GreaterSizeBitfield>::value, "Bitfield padding");
+
+struct StructWithRef {
+  int &I;
+};
+
+static_assert(has_unique_object_representations<StructWithRef>::value, "References are still unique");
+
+struct NotUniqueBecauseTailPadding {
+  int &r;
+  char a;
+};
+struct CanBeUniqueIfNoPadding : NotUniqueBecauseTailPadding {
+  char b[7];
+};
+
+static_assert(!has_unique_object_representations<NotUniqueBecauseTailPadding>::value, 
+              "non trivial");
+// Can be unique on Itanium, since the is child class' data is 'folded' into the
+// parent's tail padding.
+static_assert(sizeof(CanBeUniqueIfNoPadding) != 16 ||
+              has_unique_object_representations<CanBeUniqueIfNoPadding>::value,
+              "inherit from std layout");
+
+namespace ErrorType {
+  struct S; //expected-note{{forward declaration of 'ErrorType::S'}}
+
+  struct T {
+        S t; //expected-error{{field has incomplete type 'ErrorType::S'}}
+  };
+  bool b = __has_unique_object_representations(T);
+};
diff --git a/test/SemaCXX/typeid-ref.cpp b/test/SemaCXX/typeid-ref.cpp
index 7e5dbdd..48c9fc2 100644
--- a/test/SemaCXX/typeid-ref.cpp
+++ b/test/SemaCXX/typeid-ref.cpp
@@ -6,7 +6,7 @@
 struct X { };
 
 void f() {
-  // CHECK: @_ZTS1X = linkonce_odr constant
-  // CHECK: @_ZTI1X = linkonce_odr constant 
+  // CHECK: @_ZTS1X = linkonce_odr {{(dso_local )?}}constant
+  // CHECK: @_ZTI1X = linkonce_odr {{(dso_local )?}}constant 
   (void)typeid(X&);
 }
diff --git a/test/SemaCXX/underlying_type.cpp b/test/SemaCXX/underlying_type.cpp
index dd019ae..87f3b92 100644
--- a/test/SemaCXX/underlying_type.cpp
+++ b/test/SemaCXX/underlying_type.cpp
@@ -62,3 +62,17 @@
 void PR26014() { f<E>(0); } // should not yield an ambiguity error.
 
 template<typename ...T> void f(__underlying_type(T) v); // expected-error {{declaration type contains unexpanded parameter pack 'T'}}
+
+namespace PR23421 {
+template <class T>
+using underlying_type_t = __underlying_type(T);
+// Should not crash.
+template <class T>
+struct make_unsigned_impl { using type = underlying_type_t<T>; };
+using AnotherType = make_unsigned_impl<E>::type;
+
+// also should not crash.
+template <typename T>
+__underlying_type(T) ft();
+auto x = &ft<E>;
+}
diff --git a/test/SemaCXX/unknown-type-name.cpp b/test/SemaCXX/unknown-type-name.cpp
index 2a9748e..9086c9a 100644
--- a/test/SemaCXX/unknown-type-name.cpp
+++ b/test/SemaCXX/unknown-type-name.cpp
@@ -95,7 +95,10 @@
 template<typename T> int h(T::type, int); // expected-error{{missing 'typename'}}
 template<typename T> int h(T::type x, char); // expected-error{{missing 'typename'}}
 
-template<typename T> int junk1(T::junk); // expected-warning{{variable templates are a C++14 extension}}
+template<typename T> int junk1(T::junk);
+#if __cplusplus <= 201103L
+// expected-warning@-2 {{variable templates are a C++14 extension}}
+#endif
 template<typename T> int junk2(T::junk) throw(); // expected-error{{missing 'typename'}}
 template<typename T> int junk3(T::junk) = delete; // expected-error{{missing 'typename'}}
 #if __cplusplus <= 199711L
@@ -106,7 +109,11 @@
 
 // FIXME: We can tell this was intended to be a function because it does not
 //        have a dependent nested name specifier.
-template<typename T> int i(T::type, int()); // expected-warning{{variable templates are a C++14 extension}}
+template<typename T> int i(T::type, int());
+#if __cplusplus <= 201103L
+// expected-warning@-2 {{variable templates are a C++14 extension}}
+#endif
+
 
 // FIXME: We know which type specifier should have been specified here. Provide
 //        a fix-it to add 'typename A<T>::type'
diff --git a/test/SemaCXX/unused.cpp b/test/SemaCXX/unused.cpp
index ba9ab23..abaf611 100644
--- a/test/SemaCXX/unused.cpp
+++ b/test/SemaCXX/unused.cpp
@@ -6,7 +6,7 @@
 // PR4103 : Make sure we don't get a bogus unused expression warning
 namespace PR4103 {
   class APInt {
-    char foo;
+    char foo; // expected-warning {{private field 'foo' is not used}}
   };
   class APSInt : public APInt {
     char bar; // expected-warning {{private field 'bar' is not used}}
diff --git a/test/SemaCXX/varargs.cpp b/test/SemaCXX/varargs.cpp
index f9027c2..f2f53dc 100644
--- a/test/SemaCXX/varargs.cpp
+++ b/test/SemaCXX/varargs.cpp
@@ -8,7 +8,7 @@
   __builtin_va_start(ap, s); // expected-warning {{passing an object of reference type to 'va_start' has undefined behavior}}
 }
 
-void g(register int i, ...) {
+void g(register int i, ...) { // expected-warning 0-1{{deprecated}}
   __builtin_va_start(ap, i); // UB in C, OK in C++
 }
 
diff --git a/test/SemaCXX/warn-consumed-parsing.cpp b/test/SemaCXX/warn-consumed-parsing.cpp
index 1796041..722a60b 100644
--- a/test/SemaCXX/warn-consumed-parsing.cpp
+++ b/test/SemaCXX/warn-consumed-parsing.cpp
@@ -22,15 +22,15 @@
   void callableWhen()   __attribute__ ((callable_when())); // expected-error {{'callable_when' attribute takes at least 1 argument}}
 };
 
-int var0 SET_TYPESTATE(consumed); // expected-warning {{'set_typestate' attribute only applies to methods}}
-int var1 TEST_TYPESTATE(consumed); // expected-warning {{'test_typestate' attribute only applies to methods}}
-int var2 CALLABLE_WHEN("consumed"); // expected-warning {{'callable_when' attribute only applies to methods}}
+int var0 SET_TYPESTATE(consumed); // expected-warning {{'set_typestate' attribute only applies to functions}}
+int var1 TEST_TYPESTATE(consumed); // expected-warning {{'test_typestate' attribute only applies to}}
+int var2 CALLABLE_WHEN("consumed"); // expected-warning {{'callable_when' attribute only applies to}}
 int var3 CONSUMABLE(consumed); // expected-warning {{'consumable' attribute only applies to classes}}
 int var4 RETURN_TYPESTATE(consumed); // expected-warning {{'return_typestate' attribute only applies to functions}}
 
-void function0() SET_TYPESTATE(consumed); // expected-warning {{'set_typestate' attribute only applies to methods}}
-void function1() TEST_TYPESTATE(consumed); // expected-warning {{'test_typestate' attribute only applies to methods}}
-void function2() CALLABLE_WHEN("consumed"); // expected-warning {{'callable_when' attribute only applies to methods}}
+void function0() SET_TYPESTATE(consumed); // expected-warning {{'set_typestate' attribute only applies to}}
+void function1() TEST_TYPESTATE(consumed); // expected-warning {{'test_typestate' attribute only applies to}}
+void function2() CALLABLE_WHEN("consumed"); // expected-warning {{'callable_when' attribute only applies to}}
 void function3() CONSUMABLE(consumed); // expected-warning {{'consumable' attribute only applies to classes}}
 
 class CONSUMABLE(unknown) AttrTester1 {
diff --git a/test/SemaCXX/warn-enum-compare.cpp b/test/SemaCXX/warn-enum-compare.cpp
index 95107a0..21dea5d 100644
--- a/test/SemaCXX/warn-enum-compare.cpp
+++ b/test/SemaCXX/warn-enum-compare.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify
+// RUN: %clang_cc1 %s -fsyntax-only -verify -triple %itanium_abi_triple
+// RUN: %clang_cc1 %s -fsyntax-only -verify -triple %ms_abi_triple -DMSABI
 
 enum Foo { FooA, FooB, FooC };
 enum Bar { BarD, BarE, BarF };
@@ -42,8 +43,10 @@
   while (b == c);
   while (B1 == name1::B2);
   while (B2 == name2::B1);
+#ifndef MSABI
   while (x == AnonAA); // expected-warning {{comparison of constant 'AnonAA' (42) with expression of type 'Foo' is always false}}
   while (AnonBB == y); // expected-warning {{comparison of constant 'AnonBB' (45) with expression of type 'Bar' is always false}}
+#endif
   while (AnonAA == AnonAB);
   while (AnonAB == AnonBA);
   while (AnonBB == AnonAA);
@@ -69,7 +72,9 @@
   while (B2 == ((((((name2::B1)))))));
 
   while (td == Anon1);
+#ifndef MSABI
   while (td == AnonAA);  // expected-warning {{comparison of constant 'AnonAA' (42) with expression of type 'TD' is always false}}
+#endif
 
   while (B1 == B2); // expected-warning  {{comparison of two values with different enumeration types ('name1::Baz' and 'name2::Baz')}}
   while (name1::B2 == name2::B3); // expected-warning  {{comparison of two values with different enumeration types ('name1::Baz' and 'name2::Baz')}}
diff --git a/test/SemaCXX/warn-global-constructors.cpp b/test/SemaCXX/warn-global-constructors.cpp
index 8568264..430f239 100644
--- a/test/SemaCXX/warn-global-constructors.cpp
+++ b/test/SemaCXX/warn-global-constructors.cpp
@@ -126,3 +126,22 @@
 void *array_storage[1];
 const int &global_reference = *(int *)array_storage;
 }
+
+namespace bitfields {
+  struct HasUnnamedBitfield {
+    unsigned a;
+    unsigned : 20;
+    unsigned b;
+
+    constexpr HasUnnamedBitfield() : a(), b() {}
+    constexpr HasUnnamedBitfield(unsigned a, unsigned b) : a(a), b(b) {}
+    explicit HasUnnamedBitfield(unsigned a) {}
+  };
+
+  const HasUnnamedBitfield zeroConst{};
+  HasUnnamedBitfield zeroMutable{};
+  const HasUnnamedBitfield explicitConst{1, 2};
+  HasUnnamedBitfield explicitMutable{1, 2};
+  const HasUnnamedBitfield nonConstexprConst{1}; // expected-warning {{global constructor}}
+  HasUnnamedBitfield nonConstexprMutable{1}; // expected-warning {{global constructor}}
+}
diff --git a/test/SemaCXX/warn-missing-variable-declarations.cpp b/test/SemaCXX/warn-missing-variable-declarations.cpp
index 5b88284..5b71f38 100644
--- a/test/SemaCXX/warn-missing-variable-declarations.cpp
+++ b/test/SemaCXX/warn-missing-variable-declarations.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -Wmissing-variable-declarations -fsyntax-only -Xclang -verify %s
+// RUN: %clang -Wmissing-variable-declarations -fsyntax-only -Xclang -verify -std=c++17 %s
 
 // Variable declarations that should trigger a warning.
 int vbad1; // expected-warning{{no previous extern declaration for non-static variable 'vbad1'}}
@@ -52,3 +52,24 @@
 namespace {
   int vgood4;
 }
+
+inline int inline_var = 0;
+const int const_var = 0;
+constexpr int constexpr_var = 0;
+inline constexpr int inline_constexpr_var = 0;
+extern const int extern_const_var = 0; // expected-warning {{no previous extern declaration}}
+extern constexpr int extern_constexpr_var = 0; // expected-warning {{no previous extern declaration}}
+
+template<typename> int var_template = 0;
+template<typename> constexpr int const_var_template = 0;
+template<typename> static int static_var_template = 0;
+
+template int var_template<int[1]>;
+int use_var_template() { return var_template<int[2]>; }
+template int var_template<int[3]>;
+extern template int var_template<int[4]>;
+template<> int var_template<int[5]>; // expected-warning {{no previous extern declaration}}
+
+// FIXME: We give this specialization internal linkage rather than inheriting
+// the linkage from the template! We should not warn here.
+template<> int static_var_template<int[5]>; // expected-warning {{no previous extern declaration}}
diff --git a/test/SemaCXX/warn-redundant-move.cpp b/test/SemaCXX/warn-redundant-move.cpp
index abfb001..2bfc8c9 100644
--- a/test/SemaCXX/warn-redundant-move.cpp
+++ b/test/SemaCXX/warn-redundant-move.cpp
@@ -1,116 +1,116 @@
-// RUN: %clang_cc1 -fsyntax-only -Wredundant-move -std=c++11 -verify %s
-// RUN: %clang_cc1 -fsyntax-only -Wredundant-move -std=c++11 -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -ast-dump | FileCheck %s --check-prefix=CHECK-AST
-
-// definitions for std::move
-namespace std {
-inline namespace foo {
-template <class T> struct remove_reference { typedef T type; };
-template <class T> struct remove_reference<T&> { typedef T type; };
-template <class T> struct remove_reference<T&&> { typedef T type; };
-
-template <class T> typename remove_reference<T>::type &&move(T &&t);
-}
-}
-
-// test1 and test2 should not warn until after implementation of DR1579.
-struct A {};
-struct B : public A {};
-
-A test1(B b1) {
-  B b2;
-  return b1;
-  return b2;
-  return std::move(b1);
-  return std::move(b2);
-}
-
-struct C {
-  C() {}
-  C(A) {}
-};
-
-C test2(A a1, B b1) {
-  A a2;
-  B b2;
-
-  return a1;
-  return a2;
-  return b1;
-  return b2;
-
-  return std::move(a1);
-  return std::move(a2);
-  return std::move(b1);
-  return std::move(b2);
-}
-
-// Copy of tests above with types changed to reference types.
-A test3(B& b1) {
-  B& b2 = b1;
-  return b1;
-  return b2;
-  return std::move(b1);
-  return std::move(b2);
-}
-
-C test4(A& a1, B& b1) {
-  A& a2 = a1;
-  B& b2 = b1;
-
-  return a1;
-  return a2;
-  return b1;
-  return b2;
-
-  return std::move(a1);
-  return std::move(a2);
-  return std::move(b1);
-  return std::move(b2);
-}
-
-// PR23819, case 2
-struct D {};
-D test5(D d) {
-  return d;
-  // Verify the implicit move from the AST dump
-  // CHECK-AST: ReturnStmt{{.*}}line:[[@LINE-2]]
-  // CHECK-AST-NEXT: CXXConstructExpr{{.*}}struct D{{.*}}void (struct D &&)
-  // CHECK-AST-NEXT: ImplicitCastExpr
-  // CHECK-AST-NEXT: DeclRefExpr{{.*}}ParmVar{{.*}}'d'
-
-  return std::move(d);
-  // expected-warning@-1{{redundant move in return statement}}
-  // expected-note@-2{{remove std::move call here}}
-  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:10-[[@LINE-3]]:20}:""
-  // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:21-[[@LINE-4]]:22}:""
-}
-
-namespace templates {
-  struct A {};
-  struct B { B(A); };
-
-  // Warn once here since the type is not dependent.
-  template <typename T>
-  A test1(A a) {
-    return std::move(a);
-    // expected-warning@-1{{redundant move in return statement}}
-    // expected-note@-2{{remove std::move call here}}
-    // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:22}:""
-    // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:23-[[@LINE-4]]:24}:""
-  }
-  void run_test1() {
-    test1<A>(A());
-    test1<B>(A());
-  }
-
-  // T1 and T2 may not be the same, the warning may not always apply.
-  template <typename T1, typename T2>
-  T1 test2(T2 t) {
-    return std::move(t);
-  }
-  void run_test2() {
-    test2<A, A>(A());
-    test2<B, A>(A());
-  }
-}
+// RUN: %clang_cc1 -fsyntax-only -Wredundant-move -std=c++11 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wredundant-move -std=c++11 -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -ast-dump | FileCheck %s --check-prefix=CHECK-AST
+
+// definitions for std::move
+namespace std {
+inline namespace foo {
+template <class T> struct remove_reference { typedef T type; };
+template <class T> struct remove_reference<T&> { typedef T type; };
+template <class T> struct remove_reference<T&&> { typedef T type; };
+
+template <class T> typename remove_reference<T>::type &&move(T &&t);
+}
+}
+
+// test1 and test2 should not warn until after implementation of DR1579.
+struct A {};
+struct B : public A {};
+
+A test1(B b1) {
+  B b2;
+  return b1;
+  return b2;
+  return std::move(b1);
+  return std::move(b2);
+}
+
+struct C {
+  C() {}
+  C(A) {}
+};
+
+C test2(A a1, B b1) {
+  A a2;
+  B b2;
+
+  return a1;
+  return a2;
+  return b1;
+  return b2;
+
+  return std::move(a1);
+  return std::move(a2);
+  return std::move(b1);
+  return std::move(b2);
+}
+
+// Copy of tests above with types changed to reference types.
+A test3(B& b1) {
+  B& b2 = b1;
+  return b1;
+  return b2;
+  return std::move(b1);
+  return std::move(b2);
+}
+
+C test4(A& a1, B& b1) {
+  A& a2 = a1;
+  B& b2 = b1;
+
+  return a1;
+  return a2;
+  return b1;
+  return b2;
+
+  return std::move(a1);
+  return std::move(a2);
+  return std::move(b1);
+  return std::move(b2);
+}
+
+// PR23819, case 2
+struct D {};
+D test5(D d) {
+  return d;
+  // Verify the implicit move from the AST dump
+  // CHECK-AST: ReturnStmt{{.*}}line:[[@LINE-2]]
+  // CHECK-AST-NEXT: CXXConstructExpr{{.*}}D{{.*}}void (D &&)
+  // CHECK-AST-NEXT: ImplicitCastExpr
+  // CHECK-AST-NEXT: DeclRefExpr{{.*}}ParmVar{{.*}}'d'
+
+  return std::move(d);
+  // expected-warning@-1{{redundant move in return statement}}
+  // expected-note@-2{{remove std::move call here}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:10-[[@LINE-3]]:20}:""
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:21-[[@LINE-4]]:22}:""
+}
+
+namespace templates {
+  struct A {};
+  struct B { B(A); };
+
+  // Warn once here since the type is not dependent.
+  template <typename T>
+  A test1(A a) {
+    return std::move(a);
+    // expected-warning@-1{{redundant move in return statement}}
+    // expected-note@-2{{remove std::move call here}}
+    // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:22}:""
+    // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:23-[[@LINE-4]]:24}:""
+  }
+  void run_test1() {
+    test1<A>(A());
+    test1<B>(A());
+  }
+
+  // T1 and T2 may not be the same, the warning may not always apply.
+  template <typename T1, typename T2>
+  T1 test2(T2 t) {
+    return std::move(t);
+  }
+  void run_test2() {
+    test2<A, A>(A());
+    test2<B, A>(A());
+  }
+}
diff --git a/test/SemaCXX/warn-thread-safety-analysis.cpp b/test/SemaCXX/warn-thread-safety-analysis.cpp
index e6e9a0a..829e2e4 100644
--- a/test/SemaCXX/warn-thread-safety-analysis.cpp
+++ b/test/SemaCXX/warn-thread-safety-analysis.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wthread-safety -Wthread-safety-beta -Wno-thread-safety-negative -fcxx-exceptions -DUSE_ASSERT_CAPABILITY=0 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wthread-safety -Wthread-safety-beta -Wno-thread-safety-negative -fcxx-exceptions -DUSE_ASSERT_CAPABILITY=1 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++17 -Wthread-safety -Wthread-safety-beta -Wno-thread-safety-negative -fcxx-exceptions -DUSE_ASSERT_CAPABILITY=0 %s
 
 // FIXME: should also run  %clang_cc1 -fsyntax-only -verify -Wthread-safety -std=c++11 -Wc++98-compat %s
 // FIXME: should also run  %clang_cc1 -fsyntax-only -verify -Wthread-safety %s
@@ -4427,11 +4428,11 @@
   int j PT_GUARDED_VAR;  // expected-warning {{'pt_guarded_var' only applies to pointer types; type here is 'int'}}
 
   void test() {
-    int i PT_GUARDED_BY(sls_mu);  // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
-    int j PT_GUARDED_VAR;  // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+    int i PT_GUARDED_BY(sls_mu);  // expected-warning {{'pt_guarded_by' attribute only applies to non-static data members and global variables}}
+    int j PT_GUARDED_VAR;  // expected-warning {{'pt_guarded_var' attribute only applies to non-static data members and global variables}}
 
-    typedef int PT_GUARDED_BY(sls_mu) bad1;  // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
-    typedef int PT_GUARDED_VAR bad2;  // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+    typedef int PT_GUARDED_BY(sls_mu) bad1;  // expected-warning {{'pt_guarded_by' attribute only applies to}}
+    typedef int PT_GUARDED_VAR bad2;  // expected-warning {{'pt_guarded_var' attribute only applies to}}
   }
 }  // end namespace pt_guard_attribute_type
 
@@ -5248,3 +5249,28 @@
 C c;
 void f() { c[A()]->g(); }
 } // namespace PR34800
+
+namespace ReturnScopedLockable {
+  template<typename Object> class SCOPED_LOCKABLE ReadLockedPtr {
+  public:
+    ReadLockedPtr(Object *ptr) SHARED_LOCK_FUNCTION((*this)->mutex);
+    ReadLockedPtr(ReadLockedPtr &&) SHARED_LOCK_FUNCTION((*this)->mutex);
+    ~ReadLockedPtr() UNLOCK_FUNCTION();
+
+    Object *operator->() const { return object; }
+
+  private:
+    Object *object;
+  };
+
+  struct Object {
+    int f() SHARED_LOCKS_REQUIRED(mutex);
+    Mutex mutex;
+  };
+
+  ReadLockedPtr<Object> get();
+  int use() {
+    auto ptr = get();
+    return ptr->f();
+  }
+}
diff --git a/test/SemaCXX/warn-thread-safety-parsing.cpp b/test/SemaCXX/warn-thread-safety-parsing.cpp
index ae32bfe..ac357e8 100644
--- a/test/SemaCXX/warn-thread-safety-parsing.cpp
+++ b/test/SemaCXX/warn-thread-safety-parsing.cpp
@@ -154,18 +154,18 @@
 };
 
 class GUARDED_VAR GV { // \
-  // expected-warning {{'guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_var' attribute only applies to non-static data members and global variables}}
 };
 
 void gv_function() GUARDED_VAR; // \
-  // expected-warning {{'guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_var' attribute only applies to}}
 
 void gv_function_params(int gv_lvar GUARDED_VAR); // \
-  // expected-warning {{'guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_var' attribute only applies to}}
 
 int gv_testfn(int y){
   int x GUARDED_VAR = y; // \
-    // expected-warning {{'guarded_var' attribute only applies to fields and global variables}}
+    // expected-warning {{'guarded_var' attribute only applies to}}
   return x;
 }
 
@@ -194,7 +194,7 @@
 };
 
 class PT_GUARDED_VAR PGV { // \
-  // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_var' attribute only applies to non-static data members and global variables}}
 };
 
 int *pgv_var_args __attribute__((pt_guarded_var(1))); // \
@@ -202,14 +202,14 @@
 
 
 void pgv_function() PT_GUARDED_VAR; // \
-  // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_var' attribute only applies to}}
 
 void pgv_function_params(int *gv_lvar PT_GUARDED_VAR); // \
-  // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_var' attribute only applies to}}
 
 void pgv_testfn(int y){
   int *x PT_GUARDED_VAR = new int(0); // \
-    // expected-warning {{'pt_guarded_var' attribute only applies to fields and global variables}}
+    // expected-warning {{'pt_guarded_var' attribute only applies to}}
   delete x;
 }
 
@@ -231,28 +231,28 @@
 };
 
 void l_test_function() LOCKABLE;  // \
-  // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'lockable' attribute only applies to structs, unions, and classes}}
 
 int l_testfn(int y) {
   int x LOCKABLE = y; // \
-    // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'lockable' attribute only applies to}}
   return x;
 }
 
 int l_test_var LOCKABLE; // \
-  // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'lockable' attribute only applies to}}
 
 class LFoo {
  private:
   int test_field LOCKABLE; // \
-    // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'lockable' attribute only applies to}}
   void test_method() LOCKABLE; // \
-    // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'lockable' attribute only applies to}}
 };
 
 
 void l_function_params(int lvar LOCKABLE); // \
-  // expected-warning {{'lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'lockable' attribute only applies to}}
 
 
 //-----------------------------------------//
@@ -271,28 +271,28 @@
 };
 
 void sl_test_function() SCOPED_LOCKABLE;  // \
-  // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'scoped_lockable' attribute only applies to structs, unions, and classes}}
 
 int sl_testfn(int y) {
   int x SCOPED_LOCKABLE = y; // \
-    // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'scoped_lockable' attribute only applies to}}
   return x;
 }
 
 int sl_test_var SCOPED_LOCKABLE; // \
-  // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'scoped_lockable' attribute only applies to}}
 
 class SLFoo {
  private:
   int test_field SCOPED_LOCKABLE; // \
-    // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'scoped_lockable' attribute only applies to}}
   void test_method() SCOPED_LOCKABLE; // \
-    // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+    // expected-warning {{'scoped_lockable' attribute only applies to}}
 };
 
 
 void sl_function_params(int lvar SCOPED_LOCKABLE); // \
-  // expected-warning {{'scoped_lockable' attribute only applies to struct, union or class}}
+  // expected-warning {{'scoped_lockable' attribute only applies to}}
 
 
 //-----------------------------------------//
@@ -325,18 +325,18 @@
 };
 
 class GUARDED_BY(mu1) GB { // \
-  // expected-warning {{'guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_by' attribute only applies to non-static data members and global variables}}
 };
 
 void gb_function() GUARDED_BY(mu1); // \
-  // expected-warning {{'guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_by' attribute only applies to}}
 
 void gb_function_params(int gv_lvar GUARDED_BY(mu1)); // \
-  // expected-warning {{'guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'guarded_by' attribute only applies to}}
 
 int gb_testfn(int y){
   int x GUARDED_BY(mu1) = y; // \
-    // expected-warning {{'guarded_by' attribute only applies to fields and global variables}}
+    // expected-warning {{'guarded_by' attribute only applies to}}
   return x;
 }
 
@@ -351,7 +351,8 @@
 int gb_var_arg_6 GUARDED_BY(muRef);
 int gb_var_arg_7 GUARDED_BY(muDoubleWrapper.getWrapper()->getMu());
 int gb_var_arg_8 GUARDED_BY(muPointer);
-
+int gb_var_arg_9 GUARDED_BY(!&mu1);
+int gb_var_arg_10 GUARDED_BY(!&*&mu1);
 
 // illegal attribute arguments
 int gb_var_arg_bad_1 GUARDED_BY(1); // \
@@ -396,18 +397,18 @@
 };
 
 class PT_GUARDED_BY(mu1) PGB { // \
-  // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_by' attribute only applies to non-static data members and global variables}}
 };
 
 void pgb_function() PT_GUARDED_BY(mu1); // \
-  // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_by' attribute only applies to}}
 
 void pgb_function_params(int gv_lvar PT_GUARDED_BY(mu1)); // \
-  // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
+  // expected-warning {{'pt_guarded_by' attribute only applies to}}
 
 void pgb_testfn(int y){
   int *x PT_GUARDED_BY(mu1) = new int(0); // \
-    // expected-warning {{'pt_guarded_by' attribute only applies to fields and global variables}}
+    // expected-warning {{'pt_guarded_by' attribute only applies to}}
   delete x;
 }
 
@@ -458,18 +459,18 @@
 };
 
 class ACQUIRED_AFTER(mu1) AA { // \
-  // expected-warning {{'acquired_after' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_after' attribute only applies to non-static data members and global variables}}
 };
 
 void aa_function() ACQUIRED_AFTER(mu1); // \
-  // expected-warning {{'acquired_after' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_after' attribute only applies to}}
 
 void aa_function_params(int gv_lvar ACQUIRED_AFTER(mu1)); // \
-  // expected-warning {{'acquired_after' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_after' attribute only applies to}}
 
 void aa_testfn(int y){
   Mutex x ACQUIRED_AFTER(mu1) = Mutex(); // \
-    // expected-warning {{'acquired_after' attribute only applies to fields and global variables}}
+    // expected-warning {{'acquired_after' attribute only applies to}}
 }
 
 //Check argument parsing.
@@ -518,18 +519,18 @@
 };
 
 class ACQUIRED_BEFORE(mu1) AB { // \
-  // expected-warning {{'acquired_before' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_before' attribute only applies to non-static data members and global variables}}
 };
 
 void ab_function() ACQUIRED_BEFORE(mu1); // \
-  // expected-warning {{'acquired_before' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_before' attribute only applies to}}
 
 void ab_function_params(int gv_lvar ACQUIRED_BEFORE(mu1)); // \
-  // expected-warning {{'acquired_before' attribute only applies to fields and global variables}}
+  // expected-warning {{'acquired_before' attribute only applies to}}
 
 void ab_testfn(int y){
   Mutex x ACQUIRED_BEFORE(mu1) = Mutex(); // \
-    // expected-warning {{'acquired_before' attribute only applies to fields and global variables}}
+    // expected-warning {{'acquired_before' attribute only applies to}}
 }
 
 // Note: illegal int ab_int ACQUIRED_BEFORE(mu1) will
diff --git a/test/SemaCXX/warn-throw-out-noexcept-func.cpp b/test/SemaCXX/warn-throw-out-noexcept-func.cpp
index a6c23dd..646cea4 100644
--- a/test/SemaCXX/warn-throw-out-noexcept-func.cpp
+++ b/test/SemaCXX/warn-throw-out-noexcept-func.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s  -fdelayed-template-parsing -fcxx-exceptions -fsyntax-only -Wexceptions -verify -fdeclspec -std=c++11
+// RUN: %clang_cc1 %s  -fdelayed-template-parsing -fcxx-exceptions -fsyntax-only -Wexceptions -verify -fdeclspec -std=c++17
 struct A_ShouldDiag {
   ~A_ShouldDiag(); // implicitly noexcept(true)
 };
@@ -248,9 +248,11 @@
   }
 }
 
-void p_ShouldDiag() noexcept { //expected-note {{function declared non-throwing here}}
+void p_ShouldNotDiag() noexcept {
+  // Don't warn here: it's possible that the user arranges to only call this
+  // when the active exception is of type 'int'.
   try {
-    throw; //expected-warning {{has a non-throwing exception specification but}}
+    throw;
   } catch (int){
   }
 }
@@ -325,3 +327,138 @@
   ShouldDiagnose obj;
   ShouldNotDiagnose obj1;
 }
+
+namespace ExceptionInNamespace {
+  namespace N {
+    struct E {};
+  }
+  void run() throw() {
+    try {
+      throw N::E();
+    } catch (const N::E &e) {
+    }
+  }
+}
+
+namespace HandlerSpecialCases {
+  struct A {};
+  using CA = const A;
+
+  struct B : A {};
+  using CB = const B;
+
+  struct AmbigBase {};
+  struct AmbigMiddle : AmbigBase {};
+  struct AmbigDerived : AmbigBase, AmbigMiddle {}; // expected-warning {{inaccessible}}
+
+  struct PrivateBase {};
+  struct PrivateDerived : private PrivateBase { friend void bad3() throw(); };
+
+  void good() throw() {
+    try { throw CA(); } catch (volatile A&) {}
+    try { throw B(); } catch (A&) {}
+    try { throw B(); } catch (const volatile A&) {}
+    try { throw CB(); } catch (A&) {}
+    try { throw (int*)0; } catch (void* const volatile) {}
+    try { throw (int*)0; } catch (void* const &) {}
+    try { throw (B*)0; } catch (A*) {}
+    try { throw (B*)0; } catch (A* const &) {}
+    try { throw (void(*)() noexcept)0; } catch (void (*)()) {}
+    try { throw (void(*)() noexcept)0; } catch (void (*const &)()) {}
+    try { throw (int**)0; } catch (const int * const*) {}
+    try { throw (int**)0; } catch (const int * const* const&) {}
+    try { throw nullptr; } catch (int*) {}
+    try { throw nullptr; } catch (int* const&) {}
+  }
+
+  void bad1() throw() { // expected-note {{here}}
+    try { throw A(); } catch (const B&) {} // expected-warning {{still throw}}
+  }
+  void bad2() throw() { // expected-note {{here}}
+    try { throw AmbigDerived(); } catch (const AmbigBase&) {} // expected-warning {{still throw}}
+  }
+  void bad3() throw() { // expected-note {{here}}
+    try { throw PrivateDerived(); } catch (const PrivateBase&) {} // expected-warning {{still throw}}
+  }
+  void bad4() throw() { // expected-note {{here}}
+    try { throw (int*)0; } catch (void* &) {} // expected-warning {{still throw}}
+  }
+  void bad5() throw() { // expected-note {{here}}
+    try { throw (int*)0; } catch (void* const volatile &) {} // expected-warning {{still throw}}
+  }
+  void bad6() throw() { // expected-note {{here}}
+    try { throw (int* volatile)0; } catch (void* const volatile &) {} // expected-warning {{still throw}}
+  }
+  void bad7() throw() { // expected-note {{here}}
+    try { throw (AmbigDerived*)0; } catch (AmbigBase*) {} // expected-warning {{still throw}}
+  }
+  void bad8() throw() { // expected-note {{here}}
+    try { throw (PrivateDerived*)0; } catch (PrivateBase*) {} // expected-warning {{still throw}}
+  }
+  void bad9() throw() { // expected-note {{here}}
+    try { throw (B*)0; } catch (A* &) {} // expected-warning {{still throw}}
+  }
+  void bad10() throw() { // expected-note {{here}}
+    try { throw (void(*)())0; } catch (void (*)() noexcept) {} // expected-warning {{still throw}}
+  }
+  void bad11() throw() { // expected-note {{here}}
+    try { throw (int**)0; } catch (const int **) {} // expected-warning {{still throw}}
+  }
+  void bad12() throw() { // expected-note {{here}}
+    try { throw nullptr; } catch (int) {} // expected-warning {{still throw}}
+  }
+}
+
+namespace NestedTry {
+  void f() noexcept {
+    try {
+      try {
+        throw 0;
+      } catch (float) {}
+    } catch (int) {}
+  }
+
+  struct A { [[noreturn]] ~A(); };
+
+  void g() noexcept { // expected-note {{here}}
+    try {
+      try {
+        throw 0; // expected-warning {{still throw}}
+      } catch (float) {}
+    } catch (const char*) {}
+  }
+
+  void h() noexcept { // expected-note {{here}}
+    try {
+      try {
+        throw 0;
+      } catch (float) {}
+    } catch (int) {
+      throw; // expected-warning {{still throw}}
+    }
+  }
+
+  // FIXME: Ideally, this should still warn; we can track which types are
+  // potentially thrown by the rethrow.
+  void i() noexcept {
+    try {
+      try {
+        throw 0;
+      } catch (int) {
+        throw;
+      }
+    } catch (float) {}
+  }
+
+  // FIXME: Ideally, this should not warn: the second catch block is
+  // unreachable.
+  void j() noexcept { // expected-note {{here}}
+    try {
+      try {
+        throw 0;
+      } catch (int) {}
+    } catch (float) {
+      throw; // expected-warning {{still throw}}
+    }
+  }
+}
diff --git a/test/SemaCXX/warn-unused-attribute.cpp b/test/SemaCXX/warn-unused-attribute.cpp
index f52de3b..dffda31 100644
--- a/test/SemaCXX/warn-unused-attribute.cpp
+++ b/test/SemaCXX/warn-unused-attribute.cpp
@@ -15,6 +15,6 @@
   TestNormal normal;
   used.use();
 
-  int i __attribute__((warn_unused)) = 12; // expected-warning {{'warn_unused' attribute only applies to struct, union or class}}
+  int i __attribute__((warn_unused)) = 12; // expected-warning {{'warn_unused' attribute only applies to structs, unions, and classes}}
   return i;
 }
diff --git a/test/SemaCXX/warn-unused-lambda-capture.cpp b/test/SemaCXX/warn-unused-lambda-capture.cpp
index 6ad8e26..52ec390 100644
--- a/test/SemaCXX/warn-unused-lambda-capture.cpp
+++ b/test/SemaCXX/warn-unused-lambda-capture.cpp
@@ -191,3 +191,12 @@
 void test_use_template() {
   test_templated<int>(); // expected-note{{in instantiation of function template specialization 'test_templated<int>' requested here}}
 }
+
+namespace pr35555 {
+int a;
+void b() {
+  int c[a];
+  auto vla_used = [&c] { return c[0]; };
+  auto vla_unused = [&c] {}; // expected-warning{{lambda capture 'c' is not used}}
+}
+} // namespace pr35555
diff --git a/test/SemaCXX/warn-unused-private-field.cpp b/test/SemaCXX/warn-unused-private-field.cpp
index fb34fa9..fe44122 100644
--- a/test/SemaCXX/warn-unused-private-field.cpp
+++ b/test/SemaCXX/warn-unused-private-field.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -Wunused-private-field -Wused-but-marked-unused -Wno-uninitialized -verify -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -Wunused-private-field -Wused-but-marked-unused -Wno-uninitialized -verify -std=c++17 %s
 
 class NotFullyDefined {
  public:
@@ -246,3 +247,19 @@
     X x[4]; // no-warning
   };
 }
+
+class implicit_special_member {
+public:
+  static implicit_special_member make() { return implicit_special_member(); }
+
+private:
+  int n; // expected-warning{{private field 'n' is not used}}
+};
+
+class defaulted_special_member {
+public:
+  defaulted_special_member(const defaulted_special_member&) = default;
+
+private:
+  int n; // expected-warning{{private field 'n' is not used}}
+};
diff --git a/test/SemaCXX/warn-zero-nullptr.cpp b/test/SemaCXX/warn-zero-nullptr.cpp
index edd2a75..45f05fa 100644
--- a/test/SemaCXX/warn-zero-nullptr.cpp
+++ b/test/SemaCXX/warn-zero-nullptr.cpp
@@ -1,4 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s -Wzero-as-null-pointer-constant -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -isystem %S/Inputs -Wzero-as-null-pointer-constant -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -isystem %S/Inputs -DSYSTEM_WARNINGS -Wzero-as-null-pointer-constant -Wsystem-headers -std=c++11
+
+#include <warn-zero-nullptr.h>
+
+#define MACRO (0)
+#define MCRO(x) (x)
 
 struct S {};
 
@@ -15,8 +21,12 @@
 void (*fp2)() = __null; // expected-warning{{zero as null pointer constant}}
 int (S::*mp2) = __null; // expected-warning{{zero as null pointer constant}}
 
-void f0(void* v = 0); // expected-warning{{zero as null pointer constant}}
-void f1(void* v);
+void f0(void* v = MACRO); // expected-warning{{zero as null pointer constant}}
+void f1(void* v = NULL); // expected-warning{{zero as null pointer constant}}
+void f2(void* v = MCRO(0)); // expected-warning{{zero as null pointer constant}}
+void f3(void* v = MCRO(NULL)); // expected-warning{{zero as null pointer constant}}
+void f4(void* v = 0); // expected-warning{{zero as null pointer constant}}
+void f5(void* v);
 
 void g() {
   f1(0); // expected-warning{{zero as null pointer constant}}
@@ -25,3 +35,58 @@
 // Warn on these too. Matches gcc and arguably makes sense.
 void* pp = (decltype(nullptr))0; // expected-warning{{zero as null pointer constant}}
 void* pp2 = static_cast<decltype(nullptr)>(0); // expected-warning{{zero as null pointer constant}}
+
+// Shouldn't warn.
+namespace pr34362 {
+struct A { operator int*() { return nullptr; } };
+void func() { if (nullptr == A()) {} }
+void func2() { if ((nullptr) == A()) {} }
+}
+
+template <typename T> void TmplFunc0(T var) {}
+void Func0Test() {
+  TmplFunc0<int>(0);
+  TmplFunc0<int*>(0); // expected-warning {{zero as null pointer constant}}
+  TmplFunc0<void*>(0); // expected-warning {{zero as null pointer constant}}
+}
+
+// FIXME: this one probably should not warn.
+template <typename T> void TmplFunc1(int a, T default_value = 0) {} // expected-warning{{zero as null pointer constant}} expected-warning{{zero as null pointer constant}}
+void FuncTest() {
+  TmplFunc1<int>(0);
+  TmplFunc1<int*>(0); // expected-note {{in instantiation of default function argument expression for 'TmplFunc1<int *>' required here}}
+  TmplFunc1<void*>(0);  // expected-note {{in instantiation of default function argument expression for 'TmplFunc1<void *>' required here}}
+}
+
+template<typename T>
+class TemplateClass0 {
+ public:
+  explicit TemplateClass0(T var) {}
+};
+void TemplateClass0Test() {
+  TemplateClass0<int> a(0);
+  TemplateClass0<int*> b(0); // expected-warning {{zero as null pointer constant}}
+  TemplateClass0<void*> c(0); // expected-warning {{zero as null pointer constant}}
+}
+
+template<typename T>
+class TemplateClass1 {
+ public:
+// FIXME: this one should *NOT* warn.
+  explicit TemplateClass1(int a, T default_value = 0) {} // expected-warning{{zero as null pointer constant}} expected-warning{{zero as null pointer constant}}
+};
+void IgnoreSubstTemplateType1() {
+  TemplateClass1<int> a(1);
+  TemplateClass1<int*> b(1); // expected-note {{in instantiation of default function argument expression for 'TemplateClass1<int *>' required here}}
+  TemplateClass1<void*> c(1); // expected-note {{in instantiation of default function argument expression for 'TemplateClass1<void *>' required here}}
+}
+
+#ifndef SYSTEM_WARNINGS
+// Do not warn on *any* other macros from system headers, even if they
+// expand to/their expansion contains NULL.
+void* sys_init = SYSTEM_MACRO;
+void* sys_init2 = OTHER_SYSTEM_MACRO;
+#else
+void* sys_init = SYSTEM_MACRO; // expected-warning {{zero as null pointer constant}}
+void* sys_init2 = OTHER_SYSTEM_MACRO; // expected-warning {{zero as null pointer constant}}
+#endif
diff --git a/test/SemaObjC/Inputs/module.map b/test/SemaObjC/Inputs/module.map
new file mode 100644
index 0000000..492d4c3
--- /dev/null
+++ b/test/SemaObjC/Inputs/module.map
@@ -0,0 +1,3 @@
+module empty {
+  header "empty.h"
+}
diff --git a/test/SemaObjC/arc-decls.m b/test/SemaObjC/arc-decls.m
index c1c319d..c728f00 100644
--- a/test/SemaObjC/arc-decls.m
+++ b/test/SemaObjC/arc-decls.m
@@ -3,7 +3,7 @@
 // rdar://8843524
 
 struct A {
-    id x; // expected-error {{ARC forbids Objective-C objects in struct}}
+    id x;
 };
 
 union u {
@@ -13,7 +13,7 @@
 @interface I {
    struct A a; 
    struct B {
-    id y[10][20]; // expected-error {{ARC forbids Objective-C objects in struct}}
+    id y[10][20];
     id z;
    } b;
 
@@ -23,7 +23,7 @@
 
 // rdar://10260525
 struct r10260525 {
-  id (^block) (); // expected-error {{ARC forbids blocks in struct}}
+  id (^block) ();
 };
 
 struct S { 
@@ -154,3 +154,25 @@
 
 @property (readwrite, weak) ControllerClass *weak_controller;
 @end
+
+@interface I3
+@end
+
+@interface D3 : I3
+@end
+
+@interface D3 (Cat1)
+- (id)method;
+@end
+
+@interface I3 (Cat2)
+// FIXME: clang should diagnose mismatch between methods in D3(Cat1) and
+//        I3(Cat2).
+- (id)method __attribute__((ns_returns_retained));
+@end
+
+@implementation D3
+- (id)method {
+  return (id)0;
+}
+@end
diff --git a/test/SemaObjC/arc-property-lifetime.m b/test/SemaObjC/arc-property-lifetime.m
index cfa32d1..b4b3403 100644
--- a/test/SemaObjC/arc-property-lifetime.m
+++ b/test/SemaObjC/arc-property-lifetime.m
@@ -172,7 +172,7 @@
 // rdar://11253688
 @interface Boom 
 {
-  const void * innerPointerIvar __attribute__((objc_returns_inner_pointer)); // expected-error {{'objc_returns_inner_pointer' attribute only applies to methods and properties}}
+  const void * innerPointerIvar __attribute__((objc_returns_inner_pointer)); // expected-error {{'objc_returns_inner_pointer' attribute only applies to Objective-C methods and Objective-C properties}}
 }
 @property (readonly) Boom * NotInnerPointer __attribute__((objc_returns_inner_pointer)); // expected-warning {{'objc_returns_inner_pointer' attribute only applies to properties that return a non-retainable pointer}}
 - (Boom *) NotInnerPointerMethod __attribute__((objc_returns_inner_pointer)); // expected-warning {{'objc_returns_inner_pointer' attribute only applies to methods that return a non-retainable pointer}}
diff --git a/test/SemaObjC/arc-system-header.m b/test/SemaObjC/arc-system-header.m
index 68230e7..f3b7236 100644
--- a/test/SemaObjC/arc-system-header.m
+++ b/test/SemaObjC/arc-system-header.m
@@ -23,8 +23,7 @@
 }
 
 void test5(struct Test5 *p) {
-  p->field = 0; // expected-error {{'field' is unavailable in ARC}}
-                // expected-note@arc-system-header.h:25 {{field has non-trivial ownership qualification}}
+  p->field = 0;
 }
 
 id test6() {
@@ -49,8 +48,7 @@
 
 extern void doSomething(Test9 arg);
 void test9() {
-    Test9 foo2 = {0, 0}; // expected-error {{'field' is unavailable in ARC}}
-                         // expected-note@arc-system-header.h:56 {{field has non-trivial ownership qualification}}
+    Test9 foo2 = {0, 0};
     doSomething(foo2);
 }
 #endif
diff --git a/test/SemaObjC/block-literal-with-attribute.m b/test/SemaObjC/block-literal-with-attribute.m
new file mode 100644
index 0000000..2a80579
--- /dev/null
+++ b/test/SemaObjC/block-literal-with-attribute.m
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only %s -verify -fblocks -fobjc-arc
+// RUN: %clang_cc1 -fsyntax-only %s -verify -fblocks
+
+__auto_type block = ^ id __attribute__((ns_returns_retained)) (id filter) {
+  return filter; // ok
+};
+__auto_type block2 = ^  __attribute__((ns_returns_retained)) id (id filter) {
+  return filter; // ok
+};
+__auto_type block3 = ^ id (id filter)  __attribute__((ns_returns_retained))  {
+  return filter; // ok
+};
+
+// expected-no-diagnostics
diff --git a/test/SemaObjC/dllexport.m b/test/SemaObjC/dllexport.m
index e90b982..11cd1fb 100644
--- a/test/SemaObjC/dllexport.m
+++ b/test/SemaObjC/dllexport.m
@@ -1,17 +1,17 @@
 // RUN: %clang_cc1 -triple i686-windows -fdeclspec -fsyntax-only -verify %s
 
 __declspec(dllexport) typedef int typedef1;
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllexport) int typedef2;
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef int __declspec(dllexport) typedef3;
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 typedef __declspec(dllexport) void (*FunTy)();
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 enum __declspec(dllexport) E { Val };
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 struct __declspec(dllexport) Record {};
-// expected-warning@-1{{'dllexport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllexport' attribute only applies to}}
 
 __declspec(dllexport)
 __attribute__((__objc_root_class__))
diff --git a/test/SemaObjC/dllimport.m b/test/SemaObjC/dllimport.m
index b836077..ea87aea 100644
--- a/test/SemaObjC/dllimport.m
+++ b/test/SemaObjC/dllimport.m
@@ -1,17 +1,17 @@
 // RUN: %clang_cc1 -triple i686-windows -fdeclspec -fsyntax-only -verify %s
 
 __declspec(dllimport) typedef int typedef1;
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, classes, and Objective-C interfaces}}
 typedef __declspec(dllimport) int typedef2;
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef int __declspec(dllimport) typedef3;
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 typedef __declspec(dllimport) void (*FunTy)();
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 enum __declspec(dllimport) E { Val };
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 struct __declspec(dllimport) Record {};
-// expected-warning@-1{{'dllimport' attribute only applies to functions, variables, and Objective-C interfaces}}
+// expected-warning@-1{{'dllimport' attribute only applies to}}
 
 __declspec(dllimport)
 __attribute__((__objc_root_class__))
diff --git a/test/SemaObjC/flexible-array-arc.m b/test/SemaObjC/flexible-array-arc.m
new file mode 100644
index 0000000..1490329
--- /dev/null
+++ b/test/SemaObjC/flexible-array-arc.m
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fsyntax-only -fobjc-arc -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-objc-root-class -DNOARC %s
+#ifdef NOARC
+// expected-no-diagnostics
+#endif
+
+@interface RetainableArray {
+  id flexible[];
+#ifndef NOARC
+  // expected-error@-2 {{ARC forbids flexible array members with retainable object type}}
+#endif
+}
+@end
+@implementation RetainableArray
+@end
+
+// Emit diagnostic only if have @implementation.
+@interface RetainableArrayWithoutImpl {
+  id flexible[];
+}
+@end
+
+// With ARC flexible array member objects can be only __unsafe_unretained
+@interface UnsafeUnretainedArray {
+  __unsafe_unretained id flexible[];
+}
+@end
+@implementation UnsafeUnretainedArray
+@end
+
+@interface NotObjCLifetimeTypeArray {
+  char flexible[];
+}
+@end
+@implementation NotObjCLifetimeTypeArray
+@end
diff --git a/test/SemaObjC/flexible-array.m b/test/SemaObjC/flexible-array.m
new file mode 100644
index 0000000..68e32d8
--- /dev/null
+++ b/test/SemaObjC/flexible-array.m
@@ -0,0 +1,288 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-objc-root-class %s
+
+// # Flexible array member.
+// ## Instance variables only in interface.
+@interface LastIvar {
+  char flexible[];
+}
+@end
+
+@interface NotLastIvar {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+// ## Instance variables in implementation.
+@interface LastIvarInImpl
+@end
+@implementation LastIvarInImpl {
+  char flexible[]; // expected-warning {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+}
+@end
+
+@interface NotLastIvarInImpl
+@end
+@implementation NotLastIvarInImpl {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+@implementation NotLastIvarInImplWithoutInterface { // expected-warning {{cannot find interface declaration for 'NotLastIvarInImplWithoutInterface'}}
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+@interface LastIvarInClass_OtherIvarInImpl {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+}
+@end
+@implementation LastIvarInClass_OtherIvarInImpl {
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+// ## Non-instance variables in implementation.
+@interface LastIvarInClass_UnrelatedVarInImpl {
+  char flexible[];
+}
+@end
+@implementation LastIvarInClass_UnrelatedVarInImpl
+int nonIvar;
+@end
+
+// ## Instance variables in class extension.
+@interface LastIvarInExtension
+@end
+@interface LastIvarInExtension() {
+  char flexible[]; // expected-warning {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+}
+@end
+
+@interface NotLastIvarInExtension
+@end
+@interface NotLastIvarInExtension() {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+@interface LastIvarInClass_OtherIvarInExtension {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+}
+@end
+@interface LastIvarInClass_OtherIvarInExtension() {
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+@interface LastIvarInExtension_OtherIvarInExtension
+@end
+@interface LastIvarInExtension_OtherIvarInExtension() {
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+@interface LastIvarInExtension_OtherIvarInExtension()
+// Extension without ivars to test we see through such extensions.
+@end
+@interface LastIvarInExtension_OtherIvarInExtension() {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+}
+@end
+
+@interface LastIvarInExtension_OtherIvarInImpl
+@end
+@interface LastIvarInExtension_OtherIvarInImpl() {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'char []' is not visible to subclasses and can conflict with their instance variables}}
+}
+@end
+@implementation LastIvarInExtension_OtherIvarInImpl {
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+// ## Instance variables in named categories.
+@interface IvarInNamedCategory
+@end
+@interface IvarInNamedCategory(Category) {
+  char flexible[]; // expected-error {{instance variables may not be placed in categories}}
+}
+@end
+
+// ## Synthesized instance variable.
+@interface LastIvarAndProperty {
+  char _flexible[];
+}
+@property char flexible[]; // expected-error {{property cannot have array or function type 'char []'}}
+@end
+
+// ## Synthesize other instance variables.
+@interface LastIvar_ExplicitlyNamedPropertyBackingIvarPreceding {
+  int _elementsCount;
+  char flexible[];
+}
+@property int count;
+@end
+@implementation LastIvar_ExplicitlyNamedPropertyBackingIvarPreceding
+@synthesize count = _elementsCount;
+@end
+
+@interface LastIvar_ImplicitlyNamedPropertyBackingIvarPreceding {
+  int count;
+  char flexible[];
+}
+@property int count;
+@end
+@implementation LastIvar_ImplicitlyNamedPropertyBackingIvarPreceding
+@synthesize count;
+@end
+
+@interface NotLastIvar_ExplicitlyNamedPropertyBackingIvarLast {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+}
+@property int count;
+@end
+@implementation NotLastIvar_ExplicitlyNamedPropertyBackingIvarLast
+@synthesize count = _elementsCount; // expected-note {{next synthesized instance variable is here}}
+@end
+
+@interface NotLastIvar_ImplicitlyNamedPropertyBackingIvarLast {
+  char flexible[]; // expected-error {{flexible array member 'flexible' with type 'char []' is not at the end of class}}
+}
+@property int count; // expected-note {{next synthesized instance variable is here}}
+@end
+@implementation NotLastIvar_ImplicitlyNamedPropertyBackingIvarLast
+// Test auto-synthesize.
+//@synthesize count;
+@end
+
+
+// # Variable sized types.
+struct Packet {
+  unsigned int size;
+  char data[];
+};
+
+// ## Instance variables only in interface.
+@interface LastStructIvar {
+  struct Packet flexible;
+}
+@end
+
+@interface NotLastStructIvar {
+  struct Packet flexible; // expected-error {{field 'flexible' with variable sized type 'struct Packet' is not at the end of class}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+// ## Instance variables in implementation.
+@interface LastStructIvarInImpl
+@end
+@implementation LastStructIvarInImpl {
+  struct Packet flexible; // expected-warning {{field 'flexible' with variable sized type 'struct Packet' is not visible to subclasses and can conflict with their instance variables}}
+}
+@end
+
+@interface NotLastStructIvarInImpl
+@end
+@implementation NotLastStructIvarInImpl {
+  struct Packet flexible; // expected-error {{field 'flexible' with variable sized type 'struct Packet' is not at the end of class}}
+  // expected-warning@-1 {{field 'flexible' with variable sized type 'struct Packet' is not visible to subclasses and can conflict with their instance variables}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+@interface LastStructIvarInClass_OtherIvarInImpl {
+  struct Packet flexible; // expected-error {{field 'flexible' with variable sized type 'struct Packet' is not at the end of class}}
+}
+@end
+@implementation LastStructIvarInClass_OtherIvarInImpl {
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+// ## Synthesized instance variable.
+@interface LastSynthesizeStructIvar
+@property int first;
+@property struct Packet flexible; // expected-error {{synthesized property with variable size type 'struct Packet' requires an existing instance variable}}
+@end
+@implementation LastSynthesizeStructIvar
+@end
+
+@interface NotLastSynthesizeStructIvar
+@property struct Packet flexible; // expected-error {{synthesized property with variable size type 'struct Packet' requires an existing instance variable}}
+@property int last;
+@end
+@implementation NotLastSynthesizeStructIvar
+@end
+
+@interface LastStructIvarWithExistingIvarAndSynthesizedProperty {
+  struct Packet _flexible;
+}
+@property struct Packet flexible;
+@end
+@implementation LastStructIvarWithExistingIvarAndSynthesizedProperty
+@end
+
+
+// # Subclasses.
+@interface FlexibleArrayMemberBase {
+  char flexible[]; // expected-note6 {{'flexible' declared here}}
+}
+@end
+
+@interface NoIvarAdditions : FlexibleArrayMemberBase
+@end
+@implementation NoIvarAdditions
+@end
+
+@interface AddedIvarInInterface : FlexibleArrayMemberBase {
+  int last; // expected-warning {{field 'last' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+}
+@end
+
+@interface AddedIvarInImplementation : FlexibleArrayMemberBase
+@end
+@implementation AddedIvarInImplementation {
+  int last; // expected-warning {{field 'last' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+}
+@end
+
+@interface AddedIvarInExtension : FlexibleArrayMemberBase
+@end
+@interface AddedIvarInExtension() {
+  int last; // expected-warning {{field 'last' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+}
+@end
+
+@interface SynthesizedIvar : FlexibleArrayMemberBase
+@property int count;
+@end
+@implementation SynthesizedIvar
+@synthesize count; // expected-warning {{field 'count' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+@end
+
+@interface WarnInSubclassOnlyOnce : FlexibleArrayMemberBase {
+  int last; // expected-warning {{field 'last' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+}
+@end
+@interface WarnInSubclassOnlyOnce() {
+  int laster;
+}
+@end
+@implementation WarnInSubclassOnlyOnce {
+  int lastest;
+}
+@end
+
+@interface AddedIvarInSubSubClass : NoIvarAdditions {
+  int last; // expected-warning {{field 'last' can overwrite instance variable 'flexible' with variable sized type 'char []' in superclass 'FlexibleArrayMemberBase'}}
+}
+@end
diff --git a/test/SemaObjC/format-arg-attribute.m b/test/SemaObjC/format-arg-attribute.m
index 8ad34e3..67c9c2e 100644
--- a/test/SemaObjC/format-arg-attribute.m
+++ b/test/SemaObjC/format-arg-attribute.m
@@ -9,9 +9,9 @@
 extern void fc2 (const NSString *) __attribute__((format_arg())); // expected-error {{'format_arg' attribute takes one argument}}
 extern void fc3 (const NSString *) __attribute__((format_arg(1, 2))); // expected-error {{'format_arg' attribute takes one argument}}
 
-struct s1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
-union u1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
-enum e1 { E1V0 } __attribute__((format_arg(1))); // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
+struct s1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to Objective-C methods and non-K&R-style functions}}
+union u1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to}}
+enum e1 { E1V0 } __attribute__((format_arg(1))); // expected-warning {{'format_arg' attribute only applies to}}
 
 extern NSString *ff3 (const NSString *) __attribute__((format_arg(3-2)));
 extern NSString *ff4 (const NSString *) __attribute__((format_arg(foo))); // expected-error {{use of undeclared identifier 'foo'}}
diff --git a/test/SemaObjC/ivar-sem-check-1.m b/test/SemaObjC/ivar-sem-check-1.m
index de038f5..48e0a54 100644
--- a/test/SemaObjC/ivar-sem-check-1.m
+++ b/test/SemaObjC/ivar-sem-check-1.m
@@ -6,14 +6,15 @@
 @interface INTF
 {
 	struct F {} JJ;
-	int arr[];  // expected-error {{field has incomplete type}}
+	int arr[];  // expected-error {{flexible array member 'arr' with type 'int []' is not at the end of class}}
 	struct S IC;  // expected-error {{field has incomplete type}}
+	              // expected-note@-1 {{next instance variable declaration is here}}
 	struct T { // expected-note {{previous definition is here}}
 	  struct T {} X;  // expected-error {{nested redefinition of 'T'}}
 	}YYY; 
 	FOO    BADFUNC;  // expected-error {{field 'BADFUNC' declared as a function}}
 	int kaka;	// expected-note {{previous declaration is here}}
 	int kaka;	// expected-error {{duplicate member 'kaka'}}
-	char ch[];	// expected-error {{field has incomplete type}}
+	char ch[];
 }
 @end
diff --git a/test/SemaObjC/ns-consumed-error-not-warning.m b/test/SemaObjC/ns-consumed-error-not-warning.m
new file mode 100644
index 0000000..f44cc8f
--- /dev/null
+++ b/test/SemaObjC/ns-consumed-error-not-warning.m
@@ -0,0 +1,13 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs -fobjc-arc -verify -fblocks -triple x86_64-apple-darwin10.0.0 -DOBJCARC %s
+// rdar://36265651
+
+@interface A
+-(void) m:(id)p; // expected-note {{parameter declared here}}
+@end
+
+@interface B : A
+-(void) m:(__attribute__((ns_consumed)) id)p; // expected-error {{overriding method has mismatched ns_consumed attribute on its parameter}}
+@end
+
+@import empty;
diff --git a/test/SemaObjC/objc-asm-attribute-neg-test.m b/test/SemaObjC/objc-asm-attribute-neg-test.m
index 2fb6643..98f39fb 100644
--- a/test/SemaObjC/objc-asm-attribute-neg-test.m
+++ b/test/SemaObjC/objc-asm-attribute-neg-test.m
@@ -15,15 +15,15 @@
 
 __attribute__((objc_runtime_name("MySecretNamespace.Message")))
 @interface Message <Protocol> { 
-__attribute__((objc_runtime_name("MySecretNamespace.Message"))) // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+__attribute__((objc_runtime_name("MySecretNamespace.Message"))) // expected-error {{'objc_runtime_name' attribute only applies to Objective-C interfaces and Objective-C protocols}}
   id MyIVAR;
 }
 __attribute__((objc_runtime_name("MySecretNamespace.Message")))
 @property int MyProperty; // expected-error {{prefix attribute must be followed by an interface or protocol}}}}
 
-- (int) getMyProperty __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+- (int) getMyProperty __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to}}
 
-- (void) setMyProperty : (int) arg __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to interface or protocol declarations}}
+- (void) setMyProperty : (int) arg __attribute__((objc_runtime_name("MySecretNamespace.Message"))); // expected-error {{'objc_runtime_name' attribute only applies to}}
 
 @end
 
diff --git a/test/SemaObjC/objcbridge-attribute-arc.m b/test/SemaObjC/objcbridge-attribute-arc.m
index 3bcfdf4..f7473cc 100644
--- a/test/SemaObjC/objcbridge-attribute-arc.m
+++ b/test/SemaObjC/objcbridge-attribute-arc.m
@@ -32,7 +32,7 @@
 
 @interface I
 {
-   __attribute__((objc_bridge(NSError))) void * color; // expected-error {{'objc_bridge' attribute only applies to structs, unions, and typedefs}};
+   __attribute__((objc_bridge(NSError))) void * color; // expected-error {{'objc_bridge' attribute only applies to structs, unions, classes, and typedefs}};
 }
 @end
 
diff --git a/test/SemaObjC/objcbridge-attribute.m b/test/SemaObjC/objcbridge-attribute.m
index 9cab64e..c93543c 100644
--- a/test/SemaObjC/objcbridge-attribute.m
+++ b/test/SemaObjC/objcbridge-attribute.m
@@ -38,7 +38,7 @@
 
 @interface I
 {
-   __attribute__((objc_bridge(NSError))) void * color; // expected-error {{'objc_bridge' attribute only applies to structs, unions, and typedefs}};
+   __attribute__((objc_bridge(NSError))) void * color; // expected-error {{'objc_bridge' attribute only applies to structs, unions, classes, and typedefs}};
 }
 @end
 
diff --git a/test/SemaObjC/strong-in-c-struct.m b/test/SemaObjC/strong-in-c-struct.m
new file mode 100644
index 0000000..5dd5f94
--- /dev/null
+++ b/test/SemaObjC/strong-in-c-struct.m
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -fsyntax-only -verify %s
+
+typedef struct {
+  id a;
+} Strong;
+
+void callee_variadic(const char *, ...);
+
+void test_variadic(void) {
+  Strong t;
+  callee_variadic("s", t); // expected-error {{cannot pass non-trivial C object of type 'Strong' by value to variadic function}}
+}
+
+void test_jump0(int cond) {
+  switch (cond) {
+  case 0:
+    ;
+    Strong x; // expected-note {{jump bypasses initialization of variable of non-trivial C struct type}}
+    break;
+  case 1: // expected-error {{cannot jump from switch statement to this case label}}
+    x.a = 0;
+    break;
+  }
+}
+
+void test_jump1(void) {
+  static void *ips[] = { &&L0 };
+L0:  // expected-note {{possible target of indirect goto}}
+  ;
+  Strong x; // expected-note {{jump exits scope of variable with non-trivial destructor}}
+  goto *ips; // expected-error {{cannot jump}}
+}
+
+typedef void (^BlockTy)(void);
+void func(BlockTy);
+void func2(Strong);
+
+void test_block_scope0(int cond) {
+  Strong x; // expected-note {{jump enters lifetime of block which captures a C struct that is non-trivial to destroy}}
+  switch (cond) {
+  case 0:
+    func(^{ func2(x); });
+    break;
+  default: // expected-error {{cannot jump from switch statement to this case label}}
+    break;
+  }
+}
+
+void test_block_scope1(void) {
+  static void *ips[] = { &&L0 };
+L0:  // expected-note {{possible target of indirect goto}}
+  ;
+  Strong x; // expected-note {{jump exits scope of variable with non-trivial destructor}} expected-note {{jump exits lifetime of block which captures a C struct that is non-trivial to destroy}}
+  func(^{ func2(x); });
+  goto *ips; // expected-error {{cannot jump}}
+}
diff --git a/test/SemaObjC/transfer-boxed-string-nullability.m b/test/SemaObjC/transfer-boxed-string-nullability.m
new file mode 100644
index 0000000..42604ef
--- /dev/null
+++ b/test/SemaObjC/transfer-boxed-string-nullability.m
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fblocks -fobjc-arc -Wnullable-to-nonnull-conversion -fsyntax-only -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -fblocks -fobjc-arc -Wnullable-to-nonnull-conversion -fsyntax-only -verify -Wno-objc-root-class -DNOWARN %s
+
+@interface NSString
+
++ (NSString*
+#ifndef NOWARN
+  _Nullable
+#else
+  _Nonnull
+#endif
+) stringWithUTF8String:(const char*)x;
+
+@end
+
+void takesNonNull(NSString * _Nonnull ptr);
+
+void testBoxedString() {
+  const char *str = "hey";
+  takesNonNull([NSString stringWithUTF8String:str]);
+  takesNonNull(@(str));
+#ifndef NOWARN
+  // expected-warning@-3 {{implicit conversion from nullable pointer 'NSString * _Nullable' to non-nullable pointer type 'NSString * _Nonnull'}}
+  // expected-warning@-3 {{implicit conversion from nullable pointer 'NSString * _Nullable' to non-nullable pointer type 'NSString * _Nonnull'}}
+#else
+  // expected-no-diagnostics
+#endif
+}
diff --git a/test/SemaObjC/unguarded-availability-category-protocol-use.m b/test/SemaObjC/unguarded-availability-category-protocol-use.m
new file mode 100644
index 0000000..d2eb9f4
--- /dev/null
+++ b/test/SemaObjC/unguarded-availability-category-protocol-use.m
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios10 -Wunguarded-availability -fblocks -fsyntax-only -verify %s
+
+__attribute__((availability(ios,unavailable)))
+@protocol Prot // expected-note {{here}}
+
+@end
+
+@interface A
+@end
+
+__attribute__((availability(ios,unavailable)))
+@interface A (Cat) <Prot> // No error.
+@end
+
+__attribute__((availability(tvos,unavailable)))
+@interface B @end
+@interface B (Cat) <Prot> // expected-error {{'Prot' is unavailable: not available on iOS}}
+@end
diff --git a/test/SemaObjC/warn-retain-cycle.m b/test/SemaObjC/warn-retain-cycle.m
index 4398d29..f27f1f8 100644
--- a/test/SemaObjC/warn-retain-cycle.m
+++ b/test/SemaObjC/warn-retain-cycle.m
@@ -198,3 +198,15 @@
    };
 
 }
+
+typedef void (^a_block_t)(void);
+
+@interface HonorNoEscape
+- (void)addStuffUsingBlock:(__attribute__((noescape)) a_block_t)block;
+@end
+
+void testNoEscape(HonorNoEscape *obj) {
+  [obj addStuffUsingBlock:^{
+    (void)obj; // ok.
+  }];
+}
diff --git a/test/SemaObjCXX/Inputs/nullability-completeness-cferror.h b/test/SemaObjCXX/Inputs/nullability-completeness-cferror.h
new file mode 100644
index 0000000..4988a74
--- /dev/null
+++ b/test/SemaObjCXX/Inputs/nullability-completeness-cferror.h
@@ -0,0 +1,13 @@
+@class NSError;
+
+#pragma clang assume_nonnull begin
+
+#ifdef USE_MUTABLE
+typedef struct __attribute__((objc_bridge_mutable(NSError))) __CFError * CFErrorRef;
+#else
+typedef struct __attribute__((objc_bridge(NSError))) __CFError * CFErrorRef;
+#endif
+
+void func1(CFErrorRef *error);
+
+#pragma clang assume_nonnull end
diff --git a/test/SemaObjCXX/attr-trivial-abi.mm b/test/SemaObjCXX/attr-trivial-abi.mm
new file mode 100644
index 0000000..c83a94d
--- /dev/null
+++ b/test/SemaObjCXX/attr-trivial-abi.mm
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -std=c++11 -fobjc-runtime-has-weak -fobjc-weak -fobjc-arc -fsyntax-only -verify %s
+
+void __attribute__((trivial_abi)) foo(); // expected-warning {{'trivial_abi' attribute only applies to classes}}
+
+struct [[clang::trivial_abi]] S0 {
+  int a;
+};
+
+struct __attribute__((trivial_abi)) S1 {
+  int a;
+};
+
+struct __attribute__((trivial_abi)) S2 { // expected-warning {{'trivial_abi' cannot be applied to 'S2'}}
+  __weak id a;
+};
+
+struct __attribute__((trivial_abi)) S3 { // expected-warning {{'trivial_abi' cannot be applied to 'S3'}}
+  virtual void m();
+};
+
+struct S4 {
+  int a;
+};
+
+struct __attribute__((trivial_abi)) S5 : public virtual S4 { // expected-warning {{'trivial_abi' cannot be applied to 'S5'}}
+};
+
+struct __attribute__((trivial_abi)) S9 : public S4 {
+};
+
+struct S6 {
+  __weak id a;
+};
+
+struct __attribute__((trivial_abi)) S12 { // expected-warning {{'trivial_abi' cannot be applied to 'S12'}}
+  __weak id a;
+};
+
+struct __attribute__((trivial_abi)) S13 { // expected-warning {{'trivial_abi' cannot be applied to 'S13'}}
+  __weak id a[2];
+};
+
+struct __attribute__((trivial_abi)) S7 { // expected-warning {{'trivial_abi' cannot be applied to 'S7'}}
+  S6 a;
+};
+
+struct __attribute__((trivial_abi)) S11 { // expected-warning {{'trivial_abi' cannot be applied to 'S11'}}
+  S6 a[2];
+};
+
+struct __attribute__((trivial_abi(1))) S8 { // expected-error {{'trivial_abi' attribute takes no arguments}}
+  int a;
+};
+
+// Do not warn when 'trivial_abi' is used to annotate a template class.
+template<class T>
+struct __attribute__((trivial_abi)) S10 {
+  T p;
+};
+
+S10<int *> p1;
+S10<__weak id> p2;
+
+template<>
+struct __attribute__((trivial_abi)) S10<id> { // expected-warning {{'trivial_abi' cannot be applied to 'S10<id>'}}
+  __weak id a;
+};
+
+template<class T>
+struct S14 {
+  T a;
+  __weak id b;
+};
+
+template<class T>
+struct __attribute__((trivial_abi)) S15 : S14<T> {
+};
+
+S15<int> s15;
+
+template<class T>
+struct __attribute__((trivial_abi)) S16 {
+  S14<T> a;
+};
+
+S16<int> s16;
+
+template<class T>
+struct __attribute__((trivial_abi)) S17 { // expected-warning {{'trivial_abi' cannot be applied to 'S17'}}
+  __weak id a;
+};
+
+S17<int> s17;
diff --git a/test/SemaObjCXX/block-cleanup.mm b/test/SemaObjCXX/block-cleanup.mm
index 0c6a6d8..53b2c22 100644
--- a/test/SemaObjCXX/block-cleanup.mm
+++ b/test/SemaObjCXX/block-cleanup.mm
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.11.0 -std=gnu++11 -o /dev/null -x objective-c++ -fblocks -ast-dump %s 2>&1 | FileCheck %s
-
-// CHECK:      -FunctionDecl {{.*}} test 'id (void)'
-// CHECK-NEXT:   -CompoundStmt
-// CHECK-NEXT:     -ReturnStmt
-// CHECK-NEXT:       -ExprWithCleanups
-// CHECK-NEXT:         -cleanup Block
-// CHECK-NEXT:         -cleanup Block
-
-@interface NSDictionary
-+ (id)dictionaryWithObjects:(const id [])objects forKeys:(const id [])keys count:(unsigned long)cnt;
-@end
-
-id test() {
-  return @{@"a": [](){}, @"b": [](){}};
-}
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.11.0 -std=gnu++11 -o /dev/null -x objective-c++ -fblocks -ast-dump %s 2>&1 | FileCheck %s
+
+// CHECK:      -FunctionDecl {{.*}} test 'id ()'
+// CHECK-NEXT:   -CompoundStmt
+// CHECK-NEXT:     -ReturnStmt
+// CHECK-NEXT:       -ExprWithCleanups
+// CHECK-NEXT:         -cleanup Block
+// CHECK-NEXT:         -cleanup Block
+
+@interface NSDictionary
++ (id)dictionaryWithObjects:(const id [])objects forKeys:(const id [])keys count:(unsigned long)cnt;
+@end
+
+id test() {
+  return @{@"a": [](){}, @"b": [](){}};
+}
diff --git a/test/SemaObjCXX/block-variable-move.mm b/test/SemaObjCXX/block-variable-move.mm
new file mode 100644
index 0000000..e26dffc
--- /dev/null
+++ b/test/SemaObjCXX/block-variable-move.mm
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fobjc-arc -verify -fblocks -Wpessimizing-move -Wredundant-move %s
+
+// definitions for std::move
+namespace std {
+inline namespace foo {
+template <class T> struct remove_reference { typedef T type; };
+template <class T> struct remove_reference<T&> { typedef T type; };
+template <class T> struct remove_reference<T&&> { typedef T type; };
+
+template <class T> typename remove_reference<T>::type &&move(T &&t);
+}
+}
+
+class MoveOnly {
+public:
+  MoveOnly() { }
+  MoveOnly(MoveOnly &&) = default; // expected-note 2 {{copy constructor is implicitly deleted}}
+  MoveOnly &operator=(MoveOnly &&) = default;
+  ~MoveOnly();
+};
+
+void copyInit() {
+  __block MoveOnly temp;
+  MoveOnly temp2 = temp; // expected-error {{call to implicitly-deleted copy constructor of 'MoveOnly'}}
+  MoveOnly temp3 = std::move(temp); // ok
+}
+
+MoveOnly errorOnCopy() {
+  __block MoveOnly temp;
+  return temp; // expected-error {{call to implicitly-deleted copy constructor of 'MoveOnly'}}
+}
+
+MoveOnly dontWarnOnMove() {
+  __block MoveOnly temp;
+  return std::move(temp); // ok
+}
+
+class MoveOnlySub : public MoveOnly {};
+
+MoveOnly dontWarnOnMoveSubclass() {
+  __block MoveOnlySub temp;
+  return std::move(temp); // ok
+}
diff --git a/test/SemaObjCXX/flexible-array.mm b/test/SemaObjCXX/flexible-array.mm
new file mode 100644
index 0000000..5537876
--- /dev/null
+++ b/test/SemaObjCXX/flexible-array.mm
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-objc-root-class %s
+
+// Test only flexible array member functionality specific to C++.
+
+union VariableSizeUnion {
+  int s;
+  char c[];
+};
+
+@interface LastUnionIvar {
+  VariableSizeUnion flexible;
+}
+@end
+
+@interface NotLastUnionIvar {
+  VariableSizeUnion flexible; // expected-error {{field 'flexible' with variable sized type 'VariableSizeUnion' is not at the end of class}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
+
+
+class VariableSizeClass {
+public:
+  int s;
+  char c[];
+};
+
+@interface LastClassIvar {
+  VariableSizeClass flexible;
+}
+@end
+
+@interface NotLastClassIvar {
+  VariableSizeClass flexible; // expected-error {{field 'flexible' with variable sized type 'VariableSizeClass' is not at the end of class}}
+  int last; // expected-note {{next instance variable declaration is here}}
+}
+@end
diff --git a/test/SemaObjCXX/nullability-completeness-cferror.mm b/test/SemaObjCXX/nullability-completeness-cferror.mm
new file mode 100644
index 0000000..4cea9fb
--- /dev/null
+++ b/test/SemaObjCXX/nullability-completeness-cferror.mm
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -I %S/Inputs -x objective-c -Wnullability-completeness -Werror -verify %s
+// RUN: %clang_cc1 -fsyntax-only -I %S/Inputs -x objective-c -Wnullability-completeness -Werror -verify -DUSE_MUTABLE %s
+// expected-no-diagnostics
+
+#include "nullability-completeness-cferror.h"
diff --git a/test/SemaObjCXX/typo-correction.mm b/test/SemaObjCXX/typo-correction.mm
index 8dcda79..3f8a082 100644
--- a/test/SemaObjCXX/typo-correction.mm
+++ b/test/SemaObjCXX/typo-correction.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify -fsyntax-only
+// RUN: %clang_cc1 %s -verify -fsyntax-only -Wno-objc-root-class
 
 class ClassA {};
 
@@ -55,3 +55,35 @@
   typoCandidate.x = 0; // expected-error {{use of undeclared identifier 'typoCandidate'; did you mean '_typoCandidate'?}}
 }
 @end
+
+// rdar://35172419
+// The scope of 'do-while' ends before typo-correction takes place.
+
+struct Mat2 { int rows; };
+
+@implementation ImplNoInt // expected-warning {{cannot find interface declaration for 'ImplNoInt'}}
+
+- (void)typoCorrentInDoWhile {
+  Mat2 tlMat1; // expected-note {{'tlMat1' declared here}}
+  // Create many scopes to exhaust the cache.
+  do {
+    for (int index = 0; index < 2; index++) {
+      if (true) {
+        for (int specialTileType = 1; specialTileType < 5; specialTileType++) {
+          for (int i = 0; i < 10; i++) {
+            for (double scale = 0.95; scale <= 1.055; scale += 0.05) {
+              for (int j = 0; j < 10; j++) {
+                if (1 > 0.9) {
+                    for (int sptile = 1; sptile < 5; sptile++) {
+                    }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } while (tlMat.rows); // expected-error {{use of undeclared identifier 'tlMat'; did you mean 'tlMat1'}}
+}
+
+@end
diff --git a/test/SemaOpenCL/extension-version.cl b/test/SemaOpenCL/extension-version.cl
index 6100346..714e4c2 100644
--- a/test/SemaOpenCL/extension-version.cl
+++ b/test/SemaOpenCL/extension-version.cl
@@ -131,6 +131,15 @@
 #endif
 #pragma OPENCL EXTENSION cl_khr_d3d10_sharing: enable
 
+#if (__OPENCL_C_VERSION__ >= 110)
+#ifndef cles_khr_int64
+#error "Missing cles_khr_int64 define"
+#endif
+#else
+// expected-warning@+2{{unsupported OpenCL extension 'cles_khr_int64' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cles_khr_int64: enable
+
 #if (__OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_context_abort
 #error "Missing cl_context_abort define"
@@ -273,3 +282,21 @@
 #endif
 #pragma OPENCL EXTENSION cl_amd_media_ops2: enable
 
+#if (__OPENCL_C_VERSION__ >= 120)
+#ifndef cl_intel_subgroups
+#error "Missing cl_intel_subgroups define"
+#endif
+#else
+// expected-warning@+2{{unsupported OpenCL extension 'cl_intel_subgroups' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_intel_subgroups : enable
+
+#if (__OPENCL_C_VERSION__ >= 120)
+#ifndef cl_intel_subgroups_short
+#error "Missing cl_intel_subgroups_short define"
+#endif
+#else
+// expected-warning@+2{{unsupported OpenCL extension 'cl_intel_subgroups_short' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_intel_subgroups_short : enable
+
diff --git a/test/SemaOpenCL/sampler_t.cl b/test/SemaOpenCL/sampler_t.cl
index 4d68dd2..5a1850b 100644
--- a/test/SemaOpenCL/sampler_t.cl
+++ b/test/SemaOpenCL/sampler_t.cl
@@ -46,36 +46,11 @@
 
 void kernel ker(sampler_t argsmp) {
   local sampler_t smp; // expected-error{{sampler type cannot be used with the __local and __global address space qualifiers}}
-  const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
-  const sampler_t const_smp2;
-  const sampler_t const_smp3 = const_smp;
-  const sampler_t const_smp4 = f();
   const sampler_t const_smp5 = 1.0f; // expected-error{{initializing 'const sampler_t' with an expression of incompatible type 'float'}}
   const sampler_t const_smp6 = 0x100000000LL; // expected-error{{sampler_t initialization requires 32-bit integer, not 'long long'}}
 
-  foo(glb_smp);
-  foo(glb_smp2);
-  foo(glb_smp3);
-  foo(glb_smp4);
-  foo(glb_smp5);
-  foo(glb_smp6);
-  foo(glb_smp7);
-  foo(glb_smp8);
-  foo(glb_smp9);
-  foo(smp);
-  foo(sampler_str.smp);
-  foo(const_smp);
-  foo(const_smp2);
-  foo(const_smp3);
-  foo(const_smp4);
-  foo(const_smp5);
-  foo(const_smp6);
-  foo(argsmp);
-  foo(5);
   foo(5.0f); // expected-error {{passing 'float' to parameter of incompatible type 'sampler_t'}}
-  sampler_t sa[] = {argsmp, const_smp}; // expected-error {{array of 'sampler_t' type is invalid in OpenCL}}
-  foo(sa[0]);
-  foo(bad());
+  sampler_t sa[] = {argsmp, glb_smp}; // expected-error {{array of 'sampler_t' type is invalid in OpenCL}}
 }
 
 void bad(sampler_t*); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}}
diff --git a/test/SemaTemplate/alignas.cpp b/test/SemaTemplate/alignas.cpp
index 8a1f96e..680f07b 100644
--- a/test/SemaTemplate/alignas.cpp
+++ b/test/SemaTemplate/alignas.cpp
@@ -21,3 +21,14 @@
 
 static_assert(sizeof(my_union<A, B, C>) == 16, "");
 static_assert(alignof(my_union<A, B, C>) == 8, "");
+
+namespace PR35028 {
+  template<class X, int Alignment> struct alignas(X) alignas(long long) alignas(long double) alignas(Alignment) Aligned {
+    union {
+      long long align1;
+      long double align2;
+      char data[sizeof(X)];
+    };
+  };
+  Aligned<int, 1> a;
+}
diff --git a/test/SemaTemplate/attributes.cpp b/test/SemaTemplate/attributes.cpp
index 1d46058..7634b93 100644
--- a/test/SemaTemplate/attributes.cpp
+++ b/test/SemaTemplate/attributes.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=gnu++11 -fsyntax-only -verify %s
+// RUN: not %clang_cc1 -std=gnu++11 -ast-dump %s | FileCheck %s
 
 namespace attribute_aligned {
   template<int N>
@@ -52,3 +53,13 @@
   template<typename T>
   inline void WBCFRelease(__attribute__((cf_consumed)) T aValue) { if(aValue) CFRelease(aValue); }
 }
+
+// CHECK: FunctionTemplateDecl {{.*}} HasAnnotations
+// CHECK:   AnnotateAttr {{.*}} "ANNOTATE_BAR"
+// CHECK:   AnnotateAttr {{.*}} "ANNOTATE_FOO"
+// CHECK: FunctionDecl {{.*}} HasAnnotations
+// CHECK:   TemplateArgument type 'int'
+// CHECK:   AnnotateAttr {{.*}} "ANNOTATE_BAR"
+// CHECK:   AnnotateAttr {{.*}} "ANNOTATE_FOO"
+template<typename T> [[clang::annotate("ANNOTATE_FOO"), clang::annotate("ANNOTATE_BAR")]] void HasAnnotations();
+void UseAnnotations() { HasAnnotations<int>(); }
diff --git a/test/SemaTemplate/class-template-decl.cpp b/test/SemaTemplate/class-template-decl.cpp
index 2e36cae..c49154c 100644
--- a/test/SemaTemplate/class-template-decl.cpp
+++ b/test/SemaTemplate/class-template-decl.cpp
@@ -57,8 +57,7 @@
   template<typename T> class X; // expected-error{{expression}}
 }
 
-template<typename T> class X1 var; // expected-warning{{variable templates are a C++14 extension}} \
-                                   // expected-error {{variable has incomplete type 'class X1'}} \
+template<typename T> class X1 var; // expected-error {{variable has incomplete type 'class X1'}} \
                                    // expected-note {{forward declaration of 'X1'}}
 
 namespace M {
diff --git a/test/SemaTemplate/cxx17-inline-variables.cpp b/test/SemaTemplate/cxx17-inline-variables.cpp
index c018050..7fc0aa8 100644
--- a/test/SemaTemplate/cxx17-inline-variables.cpp
+++ b/test/SemaTemplate/cxx17-inline-variables.cpp
@@ -5,3 +5,25 @@
 };
 extern template class DominatorTreeBase<false>;
 constexpr bool k = DominatorTreeBase<false>::IsPostDominator;
+
+namespace CompleteType {
+  template<unsigned N> constexpr int f(const bool (&)[N]) { return 0; }
+
+  template<bool ...V> struct X {
+    static constexpr bool arr[] = {V...};
+    static constexpr int value = f(arr);
+  };
+
+  constexpr int n = X<true>::value;
+}
+
+template <typename T> struct A {
+  static const int n;
+  static const int m;
+  constexpr int f() { return n; }
+  constexpr int g() { return n; }
+};
+template <typename T> constexpr int A<T>::n = sizeof(A) + sizeof(T);
+template <typename T> inline constexpr int A<T>::m = sizeof(A) + sizeof(T);
+static_assert(A<int>().f() == 5);
+static_assert(A<int>().g() == 5);
diff --git a/test/SemaTemplate/cxx1z-fold-expressions.cpp b/test/SemaTemplate/cxx1z-fold-expressions.cpp
index aefee92..383f51d 100644
--- a/test/SemaTemplate/cxx1z-fold-expressions.cpp
+++ b/test/SemaTemplate/cxx1z-fold-expressions.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++1z -verify %s
+// RUN: %clang_cc1 -std=c++2a -verify %s
 
 template<typename ...T> constexpr auto sum(T ...t) { return (... + t); }
 template<typename ...T> constexpr auto product(T ...t) { return (t * ...); }
@@ -76,3 +77,18 @@
   return (t.*....*ts);
 }
 static_assert(&apply(a, &A::b, &A::B::c, &A::B::C::d, &A::B::C::D::e) == &a.b.c.d.e);
+
+#if __cplusplus > 201703L
+// The <=> operator is unique among binary operators in not being a
+// fold-operator.
+// FIXME: This diagnostic is not great.
+template<typename ...T> constexpr auto spaceship1(T ...t) { return (t <=> ...); } // expected-error {{expected expression}}
+template<typename ...T> constexpr auto spaceship2(T ...t) { return (... <=> t); } // expected-error {{expected expression}}
+template<typename ...T> constexpr auto spaceship3(T ...t) { return (t <=> ... <=> 0); } // expected-error {{expected expression}}
+#endif
+
+// The GNU binary conditional operator ?: is not recognized as a fold-operator.
+// FIXME: Why not? This seems like it would be useful.
+template<typename ...T> constexpr auto binary_conditional1(T ...t) { return (t ?: ...); } // expected-error {{expected expression}}
+template<typename ...T> constexpr auto binary_conditional2(T ...t) { return (... ?: t); } // expected-error {{expected expression}}
+template<typename ...T> constexpr auto binary_conditional3(T ...t) { return (t ?: ... ?: 0); } // expected-error {{expected expression}}
diff --git a/test/SemaTemplate/deduction-crash.cpp b/test/SemaTemplate/deduction-crash.cpp
index 74a2586..2c58fef 100644
--- a/test/SemaTemplate/deduction-crash.cpp
+++ b/test/SemaTemplate/deduction-crash.cpp
@@ -144,3 +144,20 @@
   template<typename T, typename U = void> int n<T *>; // expected-error +{{}} expected-note {{}}
   int k = n<void *>;
 }
+
+namespace deduceFunctionSpecializationForInvalidOutOfLineFunction {
+
+template <typename InputT, typename OutputT>
+struct SourceSelectionRequirement {
+  template<typename T>
+  OutputT evaluateSelectionRequirement(InputT &&Value) {
+  }
+};
+
+template <typename InputT, typename OutputT>
+OutputT SourceSelectionRequirement<InputT, OutputT>::
+evaluateSelectionRequirement<void>(InputT &&Value) { // expected-error {{cannot specialize a member of an unspecialized template}}
+  return Value;
+}
+
+}
diff --git a/test/SemaTemplate/default-expr-arguments-2.cpp b/test/SemaTemplate/default-expr-arguments-2.cpp
index 0379494..bfe87a9 100644
--- a/test/SemaTemplate/default-expr-arguments-2.cpp
+++ b/test/SemaTemplate/default-expr-arguments-2.cpp
@@ -9,8 +9,8 @@
   public: enum { kSomeConst = 128 };
     bar(int x = kSomeConst) {}
   };
-  
-  // CHECK: FunctionDecl{{.*}}f 'void (void)'
+
+  // CHECK: FunctionDecl{{.*}}f 'void ()'
   void f() {
     // CHECK: VarDecl{{.*}}tmp 'bar<int>'
     // CHECK: CXXDefaultArgExpr{{.*}}'int'
diff --git a/test/SemaTemplate/default-expr-arguments-3.cpp b/test/SemaTemplate/default-expr-arguments-3.cpp
index 4449eb7..4d04209 100644
--- a/test/SemaTemplate/default-expr-arguments-3.cpp
+++ b/test/SemaTemplate/default-expr-arguments-3.cpp
@@ -1,55 +1,55 @@
-// RUN: %clang_cc1 -std=c++14 -verify -ast-dump %s | FileCheck %s
-// expected-no-diagnostics
-
-// CHECK: FunctionDecl {{.*}} used func 'void (void)'
-// CHECK-NEXT: TemplateArgument type 'int'
-// CHECK: LambdaExpr {{.*}} 'class (lambda at
-// CHECK: ParmVarDecl {{.*}} used f 'enum foo' cinit
-// CHECK-NEXT: DeclRefExpr {{.*}} 'enum foo' EnumConstant {{.*}} 'a' 'enum foo'
-
-namespace PR28795 {
-  template<typename T>
-  void func() {
-    enum class foo { a, b };
-    auto bar = [](foo f = foo::a) { return f; };
-    bar();
-  }
-
-  void foo() {
-    func<int>();
-  }
-}
-
-// CHECK: ClassTemplateSpecializationDecl {{.*}} struct class2 definition
-// CHECK: TemplateArgument type 'int'
-// CHECK: LambdaExpr {{.*}} 'class (lambda at
-// CHECK: ParmVarDecl {{.*}} used f 'enum foo' cinit
-// CHECK-NEXT: DeclRefExpr {{.*}} 'enum foo' EnumConstant {{.*}} 'a' 'enum foo'
-
-// Template struct case:
-template <class T> struct class2 {
-  void bar() {
-    enum class foo { a, b };
-    [](foo f = foo::a) { return f; }();
-  }
-};
-
-template struct class2<int>;
-
-// CHECK: FunctionTemplateDecl {{.*}} f1
-// CHECK-NEXT: TemplateTypeParmDecl {{.*}} typename depth 0 index 0 T
-// CHECK-NEXT: FunctionDecl {{.*}} f1 'void (void)'
-// CHECK: FunctionDecl {{.*}} f1 'void (void)'
-// CHECK-NEXT: TemplateArgument type 'int'
-// CHECK: ParmVarDecl {{.*}} n 'enum foo' cinit
-// CHECK-NEXT: DeclRefExpr {{.*}} 'enum foo' EnumConstant {{.*}} 'a' 'enum foo'
-
-template<typename T>
-void f1() {
-  enum class foo { a, b };
-  struct S {
-    int g1(foo n = foo::a);
-  };
-}
-
-template void f1<int>();
+// RUN: %clang_cc1 -std=c++14 -verify -ast-dump %s | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK: FunctionDecl {{.*}} used func 'void ()'
+// CHECK-NEXT: TemplateArgument type 'int'
+// CHECK: LambdaExpr {{.*}} '(lambda at
+// CHECK: ParmVarDecl {{.*}} used f 'foo' cinit
+// CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo'
+
+namespace PR28795 {
+  template<typename T>
+  void func() {
+    enum class foo { a, b };
+    auto bar = [](foo f = foo::a) { return f; };
+    bar();
+  }
+
+  void foo() {
+    func<int>();
+  }
+}
+
+// CHECK: ClassTemplateSpecializationDecl {{.*}} struct class2 definition
+// CHECK: TemplateArgument type 'int'
+// CHECK: LambdaExpr {{.*}} '(lambda at
+// CHECK: ParmVarDecl {{.*}} used f 'foo' cinit
+// CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo'
+
+// Template struct case:
+template <class T> struct class2 {
+  void bar() {
+    enum class foo { a, b };
+    [](foo f = foo::a) { return f; }();
+  }
+};
+
+template struct class2<int>;
+
+// CHECK: FunctionTemplateDecl {{.*}} f1
+// CHECK-NEXT: TemplateTypeParmDecl {{.*}} typename depth 0 index 0 T
+// CHECK-NEXT: FunctionDecl {{.*}} f1 'void ()'
+// CHECK: FunctionDecl {{.*}} f1 'void ()'
+// CHECK-NEXT: TemplateArgument type 'int'
+// CHECK: ParmVarDecl {{.*}} n 'foo' cinit
+// CHECK-NEXT: DeclRefExpr {{.*}} 'foo' EnumConstant {{.*}} 'a' 'foo'
+
+template<typename T>
+void f1() {
+  enum class foo { a, b };
+  struct S {
+    int g1(foo n = foo::a);
+  };
+}
+
+template void f1<int>();
diff --git a/test/SemaTemplate/explicit-instantiation.cpp b/test/SemaTemplate/explicit-instantiation.cpp
index 42d9f4d..d88c50b 100644
--- a/test/SemaTemplate/explicit-instantiation.cpp
+++ b/test/SemaTemplate/explicit-instantiation.cpp
@@ -124,10 +124,10 @@
 namespace undefined_static_data_member {
   template<typename T> struct A {
     static int a; // expected-note {{here}}
-    template<typename U> static int b; // expected-note {{here}} expected-warning {{extension}}
+    template<typename U> static int b; // expected-note {{here}} expected-warning 0+ {{extension}}
   };
   struct B {
-    template<typename U> static int c; // expected-note {{here}} expected-warning {{extension}}
+    template<typename U> static int c; // expected-note {{here}} expected-warning 0+ {{extension}}
   };
 
   template int A<int>::a; // expected-error {{explicit instantiation of undefined static data member 'a' of class template 'undefined_static_data_member::A<int>'}}
@@ -137,14 +137,14 @@
 
   template<typename T> struct C {
     static int a;
-    template<typename U> static int b; // expected-warning {{extension}}
+    template<typename U> static int b; // expected-warning 0+ {{extension}}
   };
   struct D {
-    template<typename U> static int c; // expected-warning {{extension}}
+    template<typename U> static int c; // expected-warning 0+ {{extension}}
   };
   template<typename T> int C<T>::a;
-  template<typename T> template<typename U> int C<T>::b; // expected-warning {{extension}}
-  template<typename U> int D::c; // expected-warning {{extension}}
+  template<typename T> template<typename U> int C<T>::b; // expected-warning 0+ {{extension}}
+  template<typename U> int D::c; // expected-warning 0+ {{extension}}
 
   template int C<int>::a;
   template int C<int>::b<int>;
diff --git a/test/SemaTemplate/explicit-specialization-member.cpp b/test/SemaTemplate/explicit-specialization-member.cpp
index c0c3680..e8165ac 100644
--- a/test/SemaTemplate/explicit-specialization-member.cpp
+++ b/test/SemaTemplate/explicit-specialization-member.cpp
@@ -38,24 +38,20 @@
 
   template<typename T>
   template<int N>
-  void Baz<T>::bar() { // expected-note {{couldn't infer template argument 'N'}}
+  void Baz<T>::bar() {
   }
 
-  // FIXME: We shouldn't try to match this against a prior declaration if
-  // template parameter matching failed.
   template<typename T>
-  void Baz<T>::bar<0>() { // expected-error {{cannot specialize a member of an unspecialized template}} \
-                          // expected-error {{no function template matches}}
+  void Baz<T>::bar<0>() { // expected-error {{cannot specialize a member of an unspecialized template}}
   }
 }
 
 namespace PR19340 {
 template<typename T> struct Helper {
-  template<int N> static void func(const T *m) {} // expected-note {{failed template argument deduction}}
+  template<int N> static void func(const T *m) {}
 };
 
-template<typename T> void Helper<T>::func<2>() {} // expected-error {{cannot specialize a member}} \
-                                                  // expected-error {{no function template matches}}
+template<typename T> void Helper<T>::func<2>() {} // expected-error {{cannot specialize a member}}
 }
 
 namespace SpecLoc {
diff --git a/test/SemaTemplate/instantiate-init.cpp b/test/SemaTemplate/instantiate-init.cpp
index 244e94f..51fa695 100644
--- a/test/SemaTemplate/instantiate-init.cpp
+++ b/test/SemaTemplate/instantiate-init.cpp
@@ -142,3 +142,17 @@
   template<typename T> X f() { return {}; }
   auto &&x = f<void>();
 }
+
+namespace InitListUpdate {
+  struct A { int n; };
+  using AA = A[1];
+
+  // Check that an init list update doesn't "lose" the pack-ness of an expression.
+  template <int... N> void f() {
+    g(AA{0, [0].n = N} ...); // expected-warning 3{{overrides prior init}} expected-note 3{{previous init}}
+    g(AA{N, [0].n = 0} ...); // expected-warning 3{{overrides prior init}} expected-note 3{{previous init}}
+  };
+
+  void g(AA, AA);
+  void h() { f<1, 2>(); } // expected-note {{instantiation of}}
+}
diff --git a/test/SemaTemplate/nested-deduction-guides.cpp b/test/SemaTemplate/nested-deduction-guides.cpp
new file mode 100644
index 0000000..2c5dda4
--- /dev/null
+++ b/test/SemaTemplate/nested-deduction-guides.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++17 -verify %s
+// expected-no-diagnostics
+
+template<typename T> struct A {
+  template<typename U> struct B {
+    B(...);
+  };
+  template<typename U> B(U) -> B<U>;
+};
+A<void>::B b = 123;
+
+using T = decltype(b);
+using T = A<void>::B<int>;
diff --git a/test/SemaTemplate/nested-template.cpp b/test/SemaTemplate/nested-template.cpp
index 44cb82e..efbde20 100644
--- a/test/SemaTemplate/nested-template.cpp
+++ b/test/SemaTemplate/nested-template.cpp
@@ -161,3 +161,10 @@
     template <typename T> struct X;
     template <typename T> int X<T>::func() {} //  expected-error{{out-of-line definition of 'func' from class 'X<T>' without definition}}
 };
+
+namespace RefPack {
+  template<const int &...N> struct A { template<typename ...T> void f(T (&...t)[N]); };
+  constexpr int k = 10;
+  int arr[10];
+  void g() { A<k>().f(arr); }
+}
diff --git a/test/SemaTemplate/sizeof-pack.cpp b/test/SemaTemplate/sizeof-pack.cpp
new file mode 100644
index 0000000..4b0c883
--- /dev/null
+++ b/test/SemaTemplate/sizeof-pack.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -std=c++11 -verify %s
+// expected-no-diagnostics
+
+template<int &...Ns> int f() {
+  return sizeof...(Ns);
+}
+template int f<>();
diff --git a/test/SemaTemplate/stmt-expr.cpp b/test/SemaTemplate/stmt-expr.cpp
new file mode 100644
index 0000000..2516a52
--- /dev/null
+++ b/test/SemaTemplate/stmt-expr.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -verify %s
+
+// FIXME: We could in principle support cases like this (particularly, cases
+// where the statement-expression contains no labels).
+template <typename... T> void f1() {
+  int arr[] = {
+    ({
+      T(); // expected-error {{unexpanded parameter pack}}
+    }) ... // expected-error {{does not contain any unexpanded parameter packs}}
+  };
+}
+
+// FIXME: The error for this isn't ideal; it'd be preferable to say that pack
+// expansion of a statement expression is not permitted.
+template <typename... T> void f2() {
+  [] {
+    int arr[] = {
+      T() + ({
+      foo:
+        T t; // expected-error {{unexpanded parameter pack}}
+        goto foo;
+        0;
+      }) ...
+    };
+  };
+}
+
+template <typename... T> void f3() {
+  ({
+    int arr[] = {
+      [] {
+      foo:
+        T t; // OK, expanded within compound statement
+        goto foo;
+        return 0;
+      } ...
+    };
+  });
+}
diff --git a/test/SemaTemplate/temp_arg_enum_printing.cpp b/test/SemaTemplate/temp_arg_enum_printing.cpp
index bdf277d..dbb4db8 100644
--- a/test/SemaTemplate/temp_arg_enum_printing.cpp
+++ b/test/SemaTemplate/temp_arg_enum_printing.cpp
@@ -13,9 +13,9 @@
 void foo();
   
 void test() {
-  // CHECK: template<> void foo<NamedEnumNS::NamedEnum::Val0>()
+  // CHECK: template<> void foo<NamedEnumNS::Val0>()
   NamedEnumNS::foo<Val0>();
-  // CHECK: template<> void foo<NamedEnumNS::NamedEnum::Val1>()
+  // CHECK: template<> void foo<NamedEnumNS::Val1>()
   NamedEnumNS::foo<(NamedEnum)1>();
   // CHECK: template<> void foo<2>()
   NamedEnumNS::foo<(NamedEnum)2>();
diff --git a/test/SemaTemplate/temp_arg_enum_printing_more.cpp b/test/SemaTemplate/temp_arg_enum_printing_more.cpp
new file mode 100644
index 0000000..a3a7158
--- /dev/null
+++ b/test/SemaTemplate/temp_arg_enum_printing_more.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fsyntax-only -ast-print %s -std=c++11 | FileCheck %s
+
+// Make sure that for template value arguments that are unscoped enumerators,
+// no qualified enum information is included in their name, as their visibility
+// is global. In the case of scoped enumerators, they must include information
+// about their enum enclosing scope.
+
+enum E1 { e1 };
+template<E1 v> struct tmpl_1 {};
+// CHECK: template<> struct tmpl_1<e1>
+tmpl_1<E1::e1> TMPL_1;                      // Name must be 'e1'.
+
+namespace nsp_1 { enum E2 { e2 }; }
+template<nsp_1::E2 v> struct tmpl_2 {};
+// CHECK: template<> struct tmpl_2<nsp_1::e2>
+tmpl_2<nsp_1::E2::e2> TMPL_2;               // Name must be 'nsp_1::e2'.
+
+enum class E3 { e3 };
+template<E3 v> struct tmpl_3 {};
+// CHECK: template<> struct tmpl_3<E3::e3>
+tmpl_3<E3::e3> TMPL_3;                      // Name must be 'E3::e3'.
+
+namespace nsp_2 { enum class E4 { e4 }; }
+template<nsp_2::E4 v> struct tmpl_4 {};
+// CHECK: template<> struct tmpl_4<nsp_2::E4::e4>
+tmpl_4<nsp_2::E4::e4> TMPL_4;               // Name must be 'nsp_2::E4::e4'.
diff --git a/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp b/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
index c1fcedd..1a84d54 100644
--- a/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
+++ b/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
@@ -23,6 +23,9 @@
   A<const char*, &(&x)[1]> h; // expected-error {{refers to subobject '&x + 1'}}
   A<const char*, 0> i; // expected-error {{not allowed in a converted constant}}
   A<const char*, nullptr> j;
+
+  extern char aub[];
+  A<char[], aub> k;
 }
 
 namespace Function {
@@ -235,6 +238,10 @@
     constexpr char s[] = "test";
     template<const auto* p> struct S { };
     S<s> p;
+
+    template<typename R, typename P, R F(P)> struct A {};
+    template<typename R, typename P, R F(P)> void x(A<R, P, F> a);
+    void g(int) { x(A<void, int, &g>()); }
   }
 
   namespace DecltypeAuto {
diff --git a/test/SemaTemplate/temp_arg_pack.cpp b/test/SemaTemplate/temp_arg_pack.cpp
new file mode 100644
index 0000000..b79dca7
--- /dev/null
+++ b/test/SemaTemplate/temp_arg_pack.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -verify %s
+
+namespace deduce_pack_non_pack {
+  template <typename...> class A;
+  template <typename> struct C {};
+  template <typename T> void g(C<A<T>>); // expected-note {{candidate template ignored: deduced type 'C<A<[...], (no argument)>>' of 1st parameter does not match adjusted type 'C<A<[...], int>>' of argument [with T = bool]}}
+  void h(C<A<bool, int>> &x) { g(x); } // expected-error {{no matching function}}
+}
diff --git a/test/SemaTemplate/undefined-template.cpp b/test/SemaTemplate/undefined-template.cpp
index 7dfe2fd..52530e2 100644
--- a/test/SemaTemplate/undefined-template.cpp
+++ b/test/SemaTemplate/undefined-template.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++14 -Wundefined-func-template %s
 
+#if !defined(INCLUDE)
 template <class T> struct C1 {
   static char s_var_1;       // expected-note{{forward declaration of template entity is here}}
   static char s_var_2;       // expected-note{{forward declaration of template entity is here}}
@@ -142,6 +143,16 @@
   void h(X<int> x) { g(x); } // no warning for use of 'g' despite the declaration having been instantiated from a template
 }
 
+#define INCLUDE
+#include "undefined-template.cpp"
+void func_25(SystemHeader<char> *x) {
+  x->meth();
+}
+
 int main() {
   return 0;
 }
+#else
+#pragma clang system_header
+template <typename T> struct SystemHeader { T meth(); };
+#endif
diff --git a/test/SemaTemplate/warn-thread-safety-analysis.cpp b/test/SemaTemplate/warn-thread-safety-analysis.cpp
new file mode 100644
index 0000000..03bae7a
--- /dev/null
+++ b/test/SemaTemplate/warn-thread-safety-analysis.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c++11 %s -verify -Wthread-safety-analysis
+
+class Mutex {
+public:
+  void Lock() __attribute__((exclusive_lock_function()));
+  void Unlock() __attribute__((unlock_function()));
+};
+
+class A {
+public:
+  Mutex mu1, mu2;
+
+  void foo() __attribute__((exclusive_locks_required(mu1))) __attribute__((exclusive_locks_required(mu2))) {}
+
+  template <class T> void bar() __attribute__((exclusive_locks_required(mu1))) __attribute__((exclusive_locks_required(mu2))) {
+    foo();
+  }
+};
+
+void f() {
+  A a;
+  a.mu1.Lock();
+  a.mu2.Lock();
+  a.bar<int>();
+  a.mu2.Unlock();
+  a.bar<int>(); // expected-warning {{calling function 'bar' requires holding mutex 'a.mu2' exclusively}}
+  a.mu1.Unlock();
+  a.bar<int>(); // expected-warning {{calling function 'bar' requires holding mutex 'a.mu1' exclusively}} \
+                   expected-warning {{calling function 'bar' requires holding mutex 'a.mu2' exclusively}}
+}
diff --git a/test/TableGen/anonymous-groups.td b/test/TableGen/anonymous-groups.td
index acc0a21..9e8dc39 100644
--- a/test/TableGen/anonymous-groups.td
+++ b/test/TableGen/anonymous-groups.td
@@ -7,21 +7,17 @@
 
 
 def InNamedGroup : Warning<"">, InGroup<DiagGroup<"name">>;
-//      CHECK: anonymous-groups.td:[[@LINE-1]]:41: error: group 'name' is referred to anonymously
+//      CHECK: anonymous-groups.td:[[@LINE-1]]:1: error: group 'name' is referred to anonymously
 // CHECK-NEXT: {{^def InNamedGroup : Warning<"">, InGroup<DiagGroup<"name">>;}}
-// CHECK-NEXT: {{^                                ~~~~~~~~\^~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: {{^                                InGroup<NamedGroup>}}
-// CHECK-NEXT: anonymous-groups.td:6:1: note: group defined here
+//      CHECK: anonymous-groups.td:6:1: note: group defined here
 // CHECK-NEXT: def NamedGroup : DiagGroup<"name">;
 // CHECK-NEXT: ^
 
 
 def AlsoInNamedGroup : Warning<"">, InGroup  < DiagGroup<"name"> >;
-//      CHECK: anonymous-groups.td:[[@LINE-1]]:48: error: group 'name' is referred to anonymously
+//      CHECK: anonymous-groups.td:[[@LINE-1]]:1: error: group 'name' is referred to anonymously
 // CHECK-NEXT: {{^def AlsoInNamedGroup : Warning<"">, InGroup  < DiagGroup<"name"> >;}}
-// CHECK-NEXT: {{^                                    ~~~~~~~~~~~\^~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: {{^                                    InGroup<NamedGroup>}}
-// CHECK-NEXT: anonymous-groups.td:6:1: note: group defined here
+//      CHECK: anonymous-groups.td:6:1: note: group defined here
 // CHECK-NEXT: def NamedGroup : DiagGroup<"name">;
 // CHECK-NEXT: ^
 
@@ -31,12 +27,8 @@
 def AnonymousGroupAgain : Warning<"">,
   InGroup<DiagGroup<"anonymous">>;
 
-//      CHECK: anonymous-groups.td:[[@LINE-5]]:43: error: group 'anonymous' is referred to anonymously
+//      CHECK: anonymous-groups.td:[[@LINE-5]]:1: error: group 'anonymous' is referred to anonymously
 // CHECK-NEXT: {{^def AnonymousGroup : Warning<"">, InGroup<DiagGroup<"anonymous">>;}}
-// CHECK-NEXT: {{^                                  ~~~~~~~~\^~~~~~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: anonymous-groups.td:[[@LINE-7]]:47: note: also referenced here
+//      CHECK: anonymous-groups.td:[[@LINE-6]]:1: note: also referenced here
 // CHECK-NEXT: {{^def AlsoAnonymousGroup : Warning<"">, InGroup<DiagGroup<"anonymous">>;}}
-// CHECK-NEXT: {{^                                      ~~~~~~~~\^~~~~~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: anonymous-groups.td:[[@LINE-8]]:11: note: also referenced here
-// CHECK-NEXT: {{^  InGroup<DiagGroup<"anonymous">>;}}
-// CHECK-NEXT: {{^  ~~~~~~~~\^~~~~~~~~~~~~~~~~~~~~~~}}
+//      CHECK: anonymous-groups.td:[[@LINE-7]]:1: note: also referenced here
diff --git a/test/TableGen/tg-fixits.td b/test/TableGen/tg-fixits.td
index d04a6a6..f0f62ef 100644
--- a/test/TableGen/tg-fixits.td
+++ b/test/TableGen/tg-fixits.td
@@ -4,38 +4,22 @@
 def NamedGroup : DiagGroup<"name">;
 
 def InNamedGroup : Warning<"">, InGroup<DiagGroup<"name">>;
-//      CHECK: tg-fixits.td:[[@LINE-1]]:41: error: group 'name' is referred to anonymously
+//      CHECK: tg-fixits.td:[[@LINE-1]]:1: error: group 'name' is referred to anonymously
 // CHECK-NEXT: {{^def InNamedGroup : Warning<"">, InGroup<DiagGroup<"name">>;}}
-// CHECK-NEXT: {{^                                ~~~~~~~~\^~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: {{^                                InGroup<NamedGroup>}}
 
 def Wrapped : Warning<"">, InGroup<DiagGroup<
   "name">>;
-//      CHECK: tg-fixits.td:[[@LINE-2]]:36: error: group 'name' is referred to anonymously
+//      CHECK: tg-fixits.td:[[@LINE-2]]:1: error: group 'name' is referred to anonymously
 // CHECK-NEXT: {{^def Wrapped : Warning<"">, InGroup<DiagGroup<}}
-// CHECK-NEXT: {{^                           ~~~~~~~~\^~~~~~~~~~}}
-// CHECK-NEXT: {{^                           InGroup<NamedGroup>}}
 
 def AlsoWrapped : Warning<"">, InGroup<
   DiagGroup<"name">>;
-//      CHECK: tg-fixits.td:[[@LINE-1]]:3: error: group 'name' is referred to anonymously
-// CHECK-NEXT: {{^  DiagGroup<"name">>;}}
-// CHECK-NEXT: {{^~~\^~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: {{^InGroup<NamedGroup>}}
-
-// The following lines contain hard tabs (\t); do not change this!
-def HardTabs : Warning<"">,
-	InGroup<	DiagGroup<"name">	>;
-//      CHECK: tg-fixits.td:[[@LINE-1]]:11: error: group 'name' is referred to anonymously
-// CHECK-NEXT: {{^        InGroup<        DiagGroup<"name">       >;}}
-// CHECK-NEXT: {{^        ~~~~~~~~~~~~~~~~\^~~~~~~~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT: {{^        InGroup<NamedGrop>}}
+//      CHECK: tg-fixits.td:[[@LINE-2]]:1: error: group 'name' is referred to anonymously
 
 // The following line has Unicode characters in it; do not change them!
 // FIXME: For now, we just give up on printing carets/ranges/fixits for
 // lines with Unicode in them, because SMDiagnostic don't keep a byte<->column
 // map around to line things up like Clang does.
 def Unicode : Warning<"ユニコード">, InGroup<DiagGroup<"name">>;
-//      CHECK: tg-fixits.td:[[@LINE-1]]:51: error: group 'name' is referred to anonymously
+//      CHECK: tg-fixits.td:[[@LINE-1]]:1: error: group 'name' is referred to anonymously
 // CHECK-NEXT: def Unicode : Warning<"{{[^"]+}}">, InGroup<DiagGroup<"name">>;
-// CHECK-NEXT: note:
diff --git a/test/Templight/templight-deduced-func.cpp b/test/Templight/templight-deduced-func.cpp
new file mode 100644
index 0000000..779fb5f
--- /dev/null
+++ b/test/Templight/templight-deduced-func.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+
+template <class T>
+int foo(T){return 0;}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-deduced-func.cpp:4:5'}}
+// CHECK: {{^poi:[ ]+'.*templight-deduced-func.cpp:44:12'$}}
+int gvar = foo(0);
diff --git a/test/Templight/templight-default-arg-inst.cpp b/test/Templight/templight-default-arg-inst.cpp
new file mode 100644
index 0000000..fe2bab3
--- /dev/null
+++ b/test/Templight/templight-default-arg-inst.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+template<class T, class U = T>
+class A {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::U'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:2:25'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:1'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::U'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:2:25'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:1'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::U'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentChecking$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:2:25'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:6'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::U'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentChecking$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:2:25'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:6'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int, int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-arg-inst.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-arg-inst.cpp:82:8'$}}
+A<int> a;
diff --git a/test/Templight/templight-default-func-arg.cpp b/test/Templight/templight-default-func-arg.cpp
new file mode 100644
index 0000000..19e6039
--- /dev/null
+++ b/test/Templight/templight-default-func-arg.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++14 -templight-dump %s 2>&1 | FileCheck %s
+template <class T>
+void foo(T b = 0) {};
+
+int main()
+{
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+foo$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+b$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:12'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+b$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:12'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-func-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-func-arg.cpp:72:3'$}}
+  foo<int>();
+}
diff --git a/test/Templight/templight-default-template-arg.cpp b/test/Templight/templight-default-template-arg.cpp
new file mode 100644
index 0000000..5d2ccb4
--- /dev/null
+++ b/test/Templight/templight-default-template-arg.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+template <class T = int>
+class A {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::T'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentChecking$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:2:17'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A::T'$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentChecking$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:2:17'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'A<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-default-template-arg.cpp:3:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-default-template-arg.cpp:69:5'$}}
+A<> a;
diff --git a/test/Templight/templight-exception-spec-func.cpp b/test/Templight/templight-exception-spec-func.cpp
new file mode 100644
index 0000000..39a66d0
--- /dev/null
+++ b/test/Templight/templight-exception-spec-func.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -templight-dump -std=c++14 %s 2>&1 | FileCheck %s
+template <bool B>
+void f() noexcept(B) {}
+
+int main()
+{
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+ExceptionSpecInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+ExceptionSpecInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<false>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-exception-spec-func.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-exception-spec-func.cpp:72:3'$}}
+  f<false>();
+}
diff --git a/test/Templight/templight-explicit-template-arg.cpp b/test/Templight/templight-explicit-template-arg.cpp
new file mode 100644
index 0000000..678cba2
--- /dev/null
+++ b/test/Templight/templight-explicit-template-arg.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+template <class T>
+void f(){}
+
+int main()
+{
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+f$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'f<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-explicit-template-arg.cpp:3:6'}}
+// CHECK: {{^poi:[ ]+'.*templight-explicit-template-arg.cpp:58:3'$}}
+  f<int>();
+}
diff --git a/test/Templight/templight-memoization.cpp b/test/Templight/templight-memoization.cpp
new file mode 100644
index 0000000..ddf2447
--- /dev/null
+++ b/test/Templight/templight-memoization.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+
+template <class T>
+struct foo {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-memoization.cpp:18:10'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-memoization.cpp:18:10'$}}
+foo<int> x;
+
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK-LABEL: {{^---$}}
+
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-memoization.cpp:41:10'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-memoization.cpp:41:10'$}}
+foo<int> y;
+
diff --git a/test/Templight/templight-nested-memoization.cpp b/test/Templight/templight-nested-memoization.cpp
new file mode 100644
index 0000000..37e8483
--- /dev/null
+++ b/test/Templight/templight-nested-memoization.cpp
@@ -0,0 +1,174 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+
+template <int N>
+struct fib
+{
+  static const int value = fib<N-1>::value + fib<N-2>::value;
+};
+
+template <>
+struct fib<0>
+{
+  static const int value = 1;
+};
+
+template <>
+struct fib<1>
+{
+  static const int value = 1;
+};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<4>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:173:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<4>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:173:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<4>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:173:8'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:16:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:16:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<0>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:10:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<0>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:10:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:16:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:16:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<3>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:28'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'fib<2>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:6:46'$}}
+// CHECK-LABEL: {{^---$}}
+//
+// CHECK: {{^name:[ ]+'fib<4>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-memoization.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-memoization.cpp:173:8'$}}
+fib<4> x;
+
diff --git a/test/Templight/templight-nested-template-instantiation.cpp b/test/Templight/templight-nested-template-instantiation.cpp
new file mode 100644
index 0000000..a064c16
--- /dev/null
+++ b/test/Templight/templight-nested-template-instantiation.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+
+template <int N>
+struct foo : foo<N - 1> {};
+
+template <>
+struct foo<0> {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:84:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:84:8'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:84:8'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<0>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:7:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<0>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:7:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:4:14'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<2>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-nested-template-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-nested-template-instantiation.cpp:84:8'$}}
+foo<2> x;
diff --git a/test/Templight/templight-one-instantiation.cpp b/test/Templight/templight-one-instantiation.cpp
new file mode 100644
index 0000000..e865ef3
--- /dev/null
+++ b/test/Templight/templight-one-instantiation.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+
+template <class T>
+struct foo {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-one-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-one-instantiation.cpp:18:10'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'foo<int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-one-instantiation.cpp:4:8'}}
+// CHECK: {{^poi:[ ]+'.*templight-one-instantiation.cpp:18:10'$}}
+foo<int> x;
diff --git a/test/Templight/templight-prior-template-arg.cpp b/test/Templight/templight-prior-template-arg.cpp
new file mode 100644
index 0000000..e9b1dd47
--- /dev/null
+++ b/test/Templight/templight-prior-template-arg.cpp
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -templight-dump %s 2>&1 | FileCheck %s
+template<class T>
+class A {};
+
+template <template <class Inner> class Outer>
+class B {};
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B::Outer'$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:5:40'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:1'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B::Outer'$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:5:40'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:1'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+//
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'B<A>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-prior-template-arg.cpp:6:7'}}
+// CHECK: {{^poi:[ ]+'.*templight-prior-template-arg.cpp:72:6'$}}
+B<A> b;
diff --git a/test/Tooling/Inputs/fixed-header.h b/test/Tooling/Inputs/fixed-header.h
new file mode 100644
index 0000000..4ce318f
--- /dev/null
+++ b/test/Tooling/Inputs/fixed-header.h
@@ -0,0 +1 @@
+#define SECRET_SYMBOL 1
diff --git a/test/Tooling/clang-diff-json.cpp b/test/Tooling/clang-diff-json.cpp
index 9aac6fa..20c808c 100644
--- a/test/Tooling/clang-diff-json.cpp
+++ b/test/Tooling/clang-diff-json.cpp
@@ -1,10 +1,10 @@
 // RUN: clang-diff -ast-dump-json %s -- \
-// RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
+// RUN: | '%python' -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
 // RUN: | FileCheck %s
 
-// CHECK: "begin": 299,
+// CHECK: "begin": 301,
 // CHECK: "type": "FieldDecl",
-// CHECK: "end": 319,
+// CHECK: "end": 321,
 // CHECK: "type": "CXXRecordDecl",
 class A {
   int x;
diff --git a/test/Tooling/fixed-database.cpp b/test/Tooling/fixed-database.cpp
new file mode 100644
index 0000000..73d0779
--- /dev/null
+++ b/test/Tooling/fixed-database.cpp
@@ -0,0 +1,19 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/Src
+// RUN: cp "%s" "%t/Src/test.cpp"
+// RUN: mkdir -p %t/Include
+// RUN: cp "%S/Inputs/fixed-header.h" "%t/Include/"
+// -I flag is relative to %t (where compile_flags is), not Src/.
+// RUN: echo '-IInclude/' >> %t/compile_flags.txt
+// RUN: echo "-Dklazz=class" >> %t/compile_flags.txt
+// RUN: echo '-std=c++11' >> %t/compile_flags.txt
+// RUN: clang-check "%t/Src/test.cpp" 2>&1
+// RUN: echo > %t/compile_flags.txt
+// RUN: not clang-check "%t/Src/test.cpp" 2>&1 | FileCheck "%s" -check-prefix=NODB
+
+// NODB: unknown type name 'klazz'
+klazz F{};
+
+// NODB: 'fixed-header.h' file not found
+#include "fixed-header.h"
+static_assert(SECRET_SYMBOL == 1, "");
diff --git a/test/Unit/lit.cfg.py b/test/Unit/lit.cfg.py
index c4794cc..342b692 100644
--- a/test/Unit/lit.cfg.py
+++ b/test/Unit/lit.cfg.py
@@ -35,17 +35,23 @@
     if symbolizer in os.environ:
         config.environment[symbolizer] = os.environ[symbolizer]
 
-shlibpath_var = ''
-if platform.system() == 'Linux':
-    shlibpath_var = 'LD_LIBRARY_PATH'
-elif platform.system() == 'Darwin':
-    shlibpath_var = 'DYLD_LIBRARY_PATH'
-elif platform.system() == 'Windows':
-    shlibpath_var = 'PATH'
+def find_shlibpath_var():
+    if platform.system() in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']:
+        yield 'LD_LIBRARY_PATH'
+    elif platform.system() == 'Darwin':
+        yield 'DYLD_LIBRARY_PATH'
+    elif platform.system() == 'Windows':
+        yield 'PATH'
 
-# in stand-alone builds, shlibdir is clang's build tree
-# while llvm_libs_dir is installed LLVM (and possibly older clang)
-shlibpath = os.path.pathsep.join((config.shlibdir, config.llvm_libs_dir,
-                                 config.environment.get(shlibpath_var,'')))
-
-config.environment[shlibpath_var] = shlibpath
+for shlibpath_var in find_shlibpath_var():
+    # in stand-alone builds, shlibdir is clang's build tree
+    # while llvm_libs_dir is installed LLVM (and possibly older clang)
+    shlibpath = os.path.pathsep.join(
+        (config.shlibdir,
+         config.llvm_libs_dir,
+         config.environment.get(shlibpath_var, '')))
+    config.environment[shlibpath_var] = shlibpath
+    break
+else:
+    lit_config.warning("unable to inject shared library path on '{}'"
+                       .format(platform.system()))
diff --git a/test/VFS/real-path-found-first.m b/test/VFS/real-path-found-first.m
index 5838aa3..cc5a002 100644
--- a/test/VFS/real-path-found-first.m
+++ b/test/VFS/real-path-found-first.m
@@ -7,7 +7,7 @@
 // REQUIRES: shell
 // RUN: rm -rf %t %t-cache %t.pch
 // RUN: mkdir -p %t/SomeFramework.framework/Modules
-// RUN: cp %S/Inputs/some_frame_module.map %t/SomeFramework.framework/Modules/module.modulemap
+// RUN: cat %S/Inputs/some_frame_module.map > %t/SomeFramework.framework/Modules/module.modulemap
 // RUN: sed -e "s:INPUT_DIR:%S/Inputs:g" -e "s:OUT_DIR:%t:g" %S/Inputs/vfsoverlay.yaml > %t.yaml
 
 // Build
diff --git a/test/VFS/umbrella-framework-import-skipnonexist.m b/test/VFS/umbrella-framework-import-skipnonexist.m
index 5c7cd6d..129f475 100644
--- a/test/VFS/umbrella-framework-import-skipnonexist.m
+++ b/test/VFS/umbrella-framework-import-skipnonexist.m
@@ -5,7 +5,7 @@
 
 // RUN: rm -rf %t
 // RUN: mkdir -p %t/vdir %t/outdir %t/cache
-// RUN: cp -a %S/Inputs/Bar.framework %t/outdir/
+// RUN: cp -R %S/Inputs/Bar.framework %t/outdir/
 //
 // RUN: sed -e "s:VDIR:%t/vdir:g" -e "s:OUT_DIR:%t/outdir:g" %S/Inputs/bar-headers.yaml > %t/vdir/bar-headers.yaml
 // RUN: rm -f %t/outdir/Bar.framework/Headers/B.h
diff --git a/test/clang-rename/TemplatedClassFunction.cpp b/test/clang-rename/TemplatedClassFunction.cpp
index 1f5b0b5..d7f21e0 100644
--- a/test/clang-rename/TemplatedClassFunction.cpp
+++ b/test/clang-rename/TemplatedClassFunction.cpp
@@ -6,17 +6,22 @@
 
 int main(int argc, char **argv) {
   A<int> a;
-  a.foo();   /* Test 2 */     // CHECK: a.bar()   /* Test 2 */
+  A<double> b;
+  A<float> c;
+  a.foo();   /* Test 2 */     // CHECK: a.bar();   /* Test 2 */
+  b.foo();   /* Test 3 */     // CHECK: b.bar();   /* Test 3 */
+  c.foo();   /* Test 4 */     // CHECK: c.bar();   /* Test 4 */
   return 0;
 }
 
 // Test 1.
-// RUN: clang-refactor rename -offset=48 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
+// RUN: clang-rename -offset=48 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
 // Test 2.
-// RUN: clang-refactor rename -offset=162 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-//
-// Currently unsupported test.
-// XFAIL: *
+// RUN: clang-rename -offset=191 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
+// Test 3.
+// RUN: clang-rename -offset=255 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
+// Test 4.
+// RUN: clang-rename -offset=319 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
 
 // To find offsets after modifying the file, use:
 //   grep -Ubo 'foo.*' <file>
diff --git a/test/lit.cfg.py b/test/lit.cfg.py
index 39bdf36..5323cfe 100644
--- a/test/lit.cfg.py
+++ b/test/lit.cfg.py
@@ -31,7 +31,7 @@
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
 # directories.
-config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt']
+config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt', 'debuginfo-tests']
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
@@ -58,8 +58,6 @@
 
 tools = [
     'c-index-test', 'clang-check', 'clang-diff', 'clang-format', 'opt',
-    ToolSubst('%test_debuginfo', command=os.path.join(
-        config.llvm_src_root, 'utils', 'test_debuginfo.pl')),
     ToolSubst('%clang_func_map', command=FindTool(
         'clang-func-mapping'), unresolved='ignore'),
 ]
diff --git a/tools/arcmt-test/CMakeLists.txt b/tools/arcmt-test/CMakeLists.txt
index 0cb2c0f..2b456be 100644
--- a/tools/arcmt-test/CMakeLists.txt
+++ b/tools/arcmt-test/CMakeLists.txt
@@ -7,6 +7,7 @@
   )
 
 target_link_libraries(arcmt-test
+  PRIVATE
   clangARCMigrate
   clangBasic
   clangFrontend
diff --git a/tools/c-arcmt-test/CMakeLists.txt b/tools/c-arcmt-test/CMakeLists.txt
index 8914607..08ac93c 100644
--- a/tools/c-arcmt-test/CMakeLists.txt
+++ b/tools/c-arcmt-test/CMakeLists.txt
@@ -4,10 +4,12 @@
 
 if (LLVM_BUILD_STATIC)
   target_link_libraries(c-arcmt-test
+    PRIVATE
     libclang_static
     )
 else()
   target_link_libraries(c-arcmt-test
+    PRIVATE
     libclang
     )
 endif()
diff --git a/tools/c-index-test/CMakeLists.txt b/tools/c-index-test/CMakeLists.txt
index ad990e0..d38c7bb 100644
--- a/tools/c-index-test/CMakeLists.txt
+++ b/tools/c-index-test/CMakeLists.txt
@@ -16,12 +16,14 @@
 
 if (LLVM_BUILD_STATIC)
   target_link_libraries(c-index-test
+    PRIVATE
     libclang_static
     clangCodeGen
     clangIndex
   )
 else()
   target_link_libraries(c-index-test
+    PRIVATE
     libclang
     clangAST
     clangBasic
@@ -39,7 +41,7 @@
 # If libxml2 is available, make it available for c-index-test.
 if (CLANG_HAVE_LIBXML)
   include_directories(SYSTEM ${LIBXML2_INCLUDE_DIR})
-  target_link_libraries(c-index-test ${LIBXML2_LIBRARIES})
+  target_link_libraries(c-index-test PRIVATE ${LIBXML2_LIBRARIES})
 endif()
 
 if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
@@ -56,10 +58,8 @@
     COMPONENT c-index-test)
 
   if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
-    add_custom_target(install-c-index-test
-      DEPENDS c-index-test
-      COMMAND "${CMAKE_COMMAND}"
-              -DCMAKE_INSTALL_COMPONENT=c-index-test
-              -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+    add_llvm_install_targets(install-c-index-test
+                             DEPENDS c-index-test
+                             COMPONENT c-index-test)
   endif()
 endif()
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index a11e03c..31bdf8a 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -86,6 +86,69 @@
   return options;
 }
 
+static void ModifyPrintingPolicyAccordingToEnv(CXPrintingPolicy Policy) {
+  struct Mapping {
+    const char *name;
+    enum CXPrintingPolicyProperty property;
+  };
+  struct Mapping mappings[] = {
+      {"CINDEXTEST_PRINTINGPOLICY_INDENTATION", CXPrintingPolicy_Indentation},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSSPECIFIERS",
+       CXPrintingPolicy_SuppressSpecifiers},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSTAGKEYWORD",
+       CXPrintingPolicy_SuppressTagKeyword},
+      {"CINDEXTEST_PRINTINGPOLICY_INCLUDETAGDEFINITION",
+       CXPrintingPolicy_IncludeTagDefinition},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSSCOPE",
+       CXPrintingPolicy_SuppressScope},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSUNWRITTENSCOPE",
+       CXPrintingPolicy_SuppressUnwrittenScope},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSINITIALIZERS",
+       CXPrintingPolicy_SuppressInitializers},
+      {"CINDEXTEST_PRINTINGPOLICY_CONSTANTARRAYSIZEASWRITTEN",
+       CXPrintingPolicy_ConstantArraySizeAsWritten},
+      {"CINDEXTEST_PRINTINGPOLICY_ANONYMOUSTAGLOCATIONS",
+       CXPrintingPolicy_AnonymousTagLocations},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSSTRONGLIFETIME",
+       CXPrintingPolicy_SuppressStrongLifetime},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSLIFETIMEQUALIFIERS",
+       CXPrintingPolicy_SuppressLifetimeQualifiers},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSTEMPLATEARGSINCXXCONSTRUCTORS",
+       CXPrintingPolicy_SuppressTemplateArgsInCXXConstructors},
+      {"CINDEXTEST_PRINTINGPOLICY_BOOL", CXPrintingPolicy_Bool},
+      {"CINDEXTEST_PRINTINGPOLICY_RESTRICT", CXPrintingPolicy_Restrict},
+      {"CINDEXTEST_PRINTINGPOLICY_ALIGNOF", CXPrintingPolicy_Alignof},
+      {"CINDEXTEST_PRINTINGPOLICY_UNDERSCOREALIGNOF",
+       CXPrintingPolicy_UnderscoreAlignof},
+      {"CINDEXTEST_PRINTINGPOLICY_USEVOIDFORZEROPARAMS",
+       CXPrintingPolicy_UseVoidForZeroParams},
+      {"CINDEXTEST_PRINTINGPOLICY_TERSEOUTPUT", CXPrintingPolicy_TerseOutput},
+      {"CINDEXTEST_PRINTINGPOLICY_POLISHFORDECLARATION",
+       CXPrintingPolicy_PolishForDeclaration},
+      {"CINDEXTEST_PRINTINGPOLICY_HALF", CXPrintingPolicy_Half},
+      {"CINDEXTEST_PRINTINGPOLICY_MSWCHAR", CXPrintingPolicy_MSWChar},
+      {"CINDEXTEST_PRINTINGPOLICY_INCLUDENEWLINES",
+       CXPrintingPolicy_IncludeNewlines},
+      {"CINDEXTEST_PRINTINGPOLICY_MSVCFORMATTING",
+       CXPrintingPolicy_MSVCFormatting},
+      {"CINDEXTEST_PRINTINGPOLICY_CONSTANTSASWRITTEN",
+       CXPrintingPolicy_ConstantsAsWritten},
+      {"CINDEXTEST_PRINTINGPOLICY_SUPPRESSIMPLICITBASE",
+       CXPrintingPolicy_SuppressImplicitBase},
+      {"CINDEXTEST_PRINTINGPOLICY_FULLYQUALIFIEDNAME",
+       CXPrintingPolicy_FullyQualifiedName},
+  };
+
+  unsigned i;
+  for (i = 0; i < sizeof(mappings) / sizeof(struct Mapping); i++) {
+    char *value = getenv(mappings[i].name);
+    if (value) {
+      clang_PrintingPolicy_setProperty(Policy, mappings[i].property,
+                                       (unsigned)strtoul(value, 0L, 10));
+    }
+  }
+}
+
 /** \brief Returns 0 in case of success, non-zero in case of a failure. */
 static int checkForErrors(CXTranslationUnit TU);
 
@@ -169,6 +232,7 @@
   *unsaved_files
     = (struct CXUnsavedFile *)malloc(sizeof(struct CXUnsavedFile) *
                                      *num_unsaved_files);
+  assert(*unsaved_files);
   for (i = 0; i != *num_unsaved_files; ++i) {
     struct CXUnsavedFile *unsaved = *unsaved_files + i;
     const char *arg_string = argv[arg_indices[i]] + prefix_len;
@@ -204,6 +268,7 @@
 
     /* Read the contents of the file we're remapping to. */
     contents = (char *)malloc(unsaved->Length + 1);
+    assert(contents);
     if (fread(contents, 1, unsaved->Length, to_file) != unsaved->Length) {
       fprintf(stderr, "error: unexpected %s reading 'to' file %s\n",
               (feof(to_file) ? "EOF" : "error"), sep + 1);
@@ -223,6 +288,7 @@
     /* Copy the file name that we're remapping from. */
     filename_len = sep - arg_string;
     filename = (char *)malloc(filename_len + 1);
+    assert(filename);
     memcpy(filename, arg_string, filename_len);
     filename[filename_len] = 0;
     unsaved->Filename = filename;
@@ -277,6 +343,7 @@
     = (struct CXUnsavedFile *)realloc(unsaved_files_no_try_idx,
                                       sizeof(struct CXUnsavedFile) *
                                         *num_unsaved_files);
+  assert(*unsaved_files);
   memcpy(*unsaved_files + num_unsaved_files_no_try_idx,
          unsaved_files_try_idx, sizeof(struct CXUnsavedFile) *
             num_unsaved_files_try_idx);
@@ -356,7 +423,11 @@
   PrintExtent(stdout, begin_line, begin_column, end_line, end_column);
 }
 
-int want_display_name = 0;
+static enum DisplayType {
+    DisplayType_Spelling,
+    DisplayType_DisplayName,
+    DisplayType_Pretty
+} wanted_display_type = DisplayType_Spelling;
 
 static void printVersion(const char *Prefix, CXVersion Version) {
   if (Version.Major < 0)
@@ -656,6 +727,28 @@
   return (int)lhs->col - (int)rhs->col;
 }
 
+static CXString CursorToText(CXCursor Cursor) {
+  CXString text;
+  switch (wanted_display_type) {
+  case DisplayType_Spelling:
+    return clang_getCursorSpelling(Cursor);
+  case DisplayType_DisplayName:
+    return clang_getCursorDisplayName(Cursor);
+  case DisplayType_Pretty: {
+    CXPrintingPolicy Policy = clang_getCursorPrintingPolicy(Cursor);
+    ModifyPrintingPolicyAccordingToEnv(Policy);
+    text = clang_getCursorPrettyPrinted(Cursor, Policy);
+    clang_PrintingPolicy_dispose(Policy);
+    return text;
+  }
+  }
+  assert(0 && "unknown display type"); /* no llvm_unreachable in C. */
+  /* Set to NULL to prevent uninitialized variable warnings. */
+  text.data = NULL;
+  text.private_flags = 0;
+  return text;
+}
+
 static void PrintCursor(CXCursor Cursor, const char *CommentSchemaFile) {
   CXTranslationUnit TU = clang_Cursor_getTranslationUnit(Cursor);
   if (clang_isInvalid(Cursor.kind)) {
@@ -682,8 +775,7 @@
     int I;
 
     ks = clang_getCursorKindSpelling(Cursor.kind);
-    string = want_display_name? clang_getCursorDisplayName(Cursor) 
-                              : clang_getCursorSpelling(Cursor);
+    string = CursorToText(Cursor);
     printf("%s=%s", clang_getCString(ks),
                     clang_getCString(string));
     clang_disposeString(ks);
@@ -804,12 +896,16 @@
       printf(" (const)");
     if (clang_CXXMethod_isPureVirtual(Cursor))
       printf(" (pure)");
+    if (clang_CXXRecord_isAbstract(Cursor))
+      printf(" (abstract)");
     if (clang_EnumDecl_isScoped(Cursor))
       printf(" (scoped)");
     if (clang_Cursor_isVariadic(Cursor))
       printf(" (variadic)");
     if (clang_Cursor_isObjCOptional(Cursor))
       printf(" (@optional)");
+    if (clang_isInvalidDeclaration(Cursor))
+      printf(" (invalid)");
 
     switch (clang_getCursorExceptionSpecificationType(Cursor))
     {
@@ -1036,6 +1132,13 @@
   }
 }
 
+static CXString createCXString(const char *CS) {
+  CXString Str;
+  Str.data = (const void *) CS;
+  Str.private_flags = 0;
+  return Str;
+}
+
 /******************************************************************************/
 /* Callbacks.                                                                 */
 /******************************************************************************/
@@ -1682,7 +1785,12 @@
     else if (!strcmp(filter, "all-display") || 
              !strcmp(filter, "local-display")) {
       ck = NULL;
-      want_display_name = 1;
+      wanted_display_type = DisplayType_DisplayName;
+    }
+    else if (!strcmp(filter, "all-pretty") ||
+             !strcmp(filter, "local-pretty")) {
+      ck = NULL;
+      wanted_display_type = DisplayType_Pretty;
     }
     else if (!strcmp(filter, "none")) K = (enum CXCursorKind) ~0;
     else if (!strcmp(filter, "category")) K = CXCursor_ObjCCategoryDecl;
@@ -1747,11 +1855,18 @@
   int result;
   unsigned Repeats = 0;
   unsigned I;
+  const char *InvocationPath;
 
   Idx = clang_createIndex(/* excludeDeclsFromPCH */
-                          (!strcmp(filter, "local") || 
-                           !strcmp(filter, "local-display"))? 1 : 0,
+                          (!strcmp(filter, "local") ||
+                           !strcmp(filter, "local-display") ||
+                           !strcmp(filter, "local-pretty"))
+                              ? 1
+                              : 0,
                           /* displayDiagnostics=*/1);
+  InvocationPath = getenv("CINDEXTEST_INVOCATION_EMISSION_PATH");
+  if (InvocationPath)
+    clang_CXIndex_setInvocationEmissionPathOption(Idx, InvocationPath);
 
   if ((CommentSchemaFile = parse_comments_schema(argc, argv))) {
     argc--;
@@ -2058,6 +2173,7 @@
 
   /* Copy the file name. */
   *filename = (char*)malloc(last_colon - input + 1);
+  assert(*filename);
   memcpy(*filename, input, last_colon - input);
   (*filename)[last_colon - input] = 0;
   return 0;
@@ -2312,11 +2428,14 @@
   CXTranslationUnit TU;
   unsigned I, Repeats = 1;
   unsigned completionOptions = clang_defaultCodeCompleteOptions();
-  
+  const char *InvocationPath;
+
   if (getenv("CINDEXTEST_CODE_COMPLETE_PATTERNS"))
     completionOptions |= CXCodeComplete_IncludeCodePatterns;
   if (getenv("CINDEXTEST_COMPLETION_BRIEF_COMMENTS"))
     completionOptions |= CXCodeComplete_IncludeBriefComments;
+  if (getenv("CINDEXTEST_COMPLETION_SKIP_PREAMBLE"))
+    completionOptions |= CXCodeComplete_SkipPreamble;
   
   if (timing_only)
     input += strlen("-code-completion-timing=");
@@ -2331,7 +2450,10 @@
     return -1;
 
   CIdx = clang_createIndex(0, 0);
-  
+  InvocationPath = getenv("CINDEXTEST_INVOCATION_EMISSION_PATH");
+  if (InvocationPath)
+    clang_CXIndex_setInvocationEmissionPathOption(CIdx, InvocationPath);
+
   if (getenv("CINDEXTEST_EDITING"))
     Repeats = 5;
 
@@ -2461,6 +2583,7 @@
   assert(NumLocations > 0 && "Unable to count locations?");
   Locations = (CursorSourceLocation *)malloc(
                                   NumLocations * sizeof(CursorSourceLocation));
+  assert(Locations);
   for (Loc = 0; Loc < NumLocations; ++Loc) {
     const char *input = argv[Loc + 1] + strlen(locations_flag);
     if ((errorCode = parse_file_line_column(input, &Locations[Loc].filename,
@@ -2754,6 +2877,7 @@
   assert(NumLocations > 0 && "Unable to count locations?");
   Locations = (CursorSourceLocation *)malloc(
                                   NumLocations * sizeof(CursorSourceLocation));
+  assert(Locations);
   for (Loc = 0; Loc < NumLocations; ++Loc) {
     const char *input = argv[Loc + 1] + strlen("-file-refs-at=");
     if ((errorCode = parse_file_line_column(input, &Locations[Loc].filename,
@@ -2861,6 +2985,7 @@
   /* Parse the locations. */
   assert(NumFilenames > 0 && "Unable to count filenames?");
   Filenames = (const char **)malloc(NumFilenames * sizeof(const char *));
+  assert(Filenames);
   for (I = 0; I < NumFilenames; ++I) {
     const char *input = argv[I + 1] + strlen("-file-includes-in=");
     /* Copy the file name. */
@@ -2944,7 +3069,9 @@
 static ImportedASTFilesData *importedASTs_create() {
   ImportedASTFilesData *p;
   p = malloc(sizeof(ImportedASTFilesData));
+  assert(p);
   p->filenames = malloc(MAX_IMPORTED_ASTFILES * sizeof(const char *));
+  assert(p->filenames);
   p->num_files = 0;
   return p;
 }
@@ -2980,7 +3107,7 @@
   int first_check_printed;
   int fail_for_error;
   int abort;
-  const char *main_filename;
+  CXString main_filename;
   ImportedASTFilesData *importedASTs;
   IndexDataStringList *strings;
   CXTranslationUnit TU;
@@ -3019,6 +3146,7 @@
   const char *cname;
   CXIdxClientFile file;
   unsigned line, column;
+  const char *main_filename;
   int isMainFile;
   
   index_data = (IndexData *)client_data;
@@ -3033,7 +3161,8 @@
   }
   filename = clang_getFileName((CXFile)file);
   cname = clang_getCString(filename);
-  if (strcmp(cname, index_data->main_filename) == 0)
+  main_filename = clang_getCString(index_data->main_filename);
+  if (strcmp(cname, main_filename) == 0)
     isMainFile = 1;
   else
     isMainFile = 0;
@@ -3075,6 +3204,7 @@
   node =
       (IndexDataStringList *)malloc(sizeof(IndexDataStringList) + strlen(name) +
                                     digitCount(line) + digitCount(column) + 2);
+  assert(node);
   newStr = node->data;
   sprintf(newStr, "%s:%d:%d", name, line, column);
 
@@ -3207,6 +3337,27 @@
   }
 }
 
+static void printSymbolRole(CXSymbolRole role) {
+  if (role & CXSymbolRole_Declaration)
+    printf(" decl");
+  if (role & CXSymbolRole_Definition)
+    printf(" def");
+  if (role & CXSymbolRole_Reference)
+    printf(" ref");
+  if (role & CXSymbolRole_Read)
+    printf(" read");
+  if (role & CXSymbolRole_Write)
+    printf(" write");
+  if (role & CXSymbolRole_Call)
+    printf(" call");
+  if (role & CXSymbolRole_Dynamic)
+    printf(" dyn");
+  if (role & CXSymbolRole_AddressOf)
+    printf(" addr");
+  if (role & CXSymbolRole_Implicit)
+    printf(" implicit");
+}
+
 static void index_diagnostic(CXClientData client_data,
                              CXDiagnosticSet diagSet, void *reserved) {
   CXString str;
@@ -3235,14 +3386,11 @@
 static CXIdxClientFile index_enteredMainFile(CXClientData client_data,
                                        CXFile file, void *reserved) {
   IndexData *index_data;
-  CXString filename;
 
   index_data = (IndexData *)client_data;
   printCheck(index_data);
 
-  filename = clang_getFileName(file);
-  index_data->main_filename = clang_getCString(filename);
-  clang_disposeString(filename);
+  index_data->main_filename = clang_getFileName(file);
 
   printf("[enteredMainFile]: ");
   printCXIndexFile((CXIdxClientFile)file);
@@ -3428,9 +3576,11 @@
   printCXIndexContainer(info->container);
   printf(" | refkind: ");
   switch (info->kind) {
-  case CXIdxEntityRef_Direct: printf("direct"); break;
-  case CXIdxEntityRef_Implicit: printf("implicit"); break;
+    case CXIdxEntityRef_Direct: printf("direct"); break;
+    case CXIdxEntityRef_Implicit: printf("implicit"); break;
   }
+  printf(" | role:");
+  printSymbolRole(info->role);
   printf("\n");
 }
 
@@ -3481,7 +3631,7 @@
   index_data.first_check_printed = 0;
   index_data.fail_for_error = 0;
   index_data.abort = 0;
-  index_data.main_filename = "";
+  index_data.main_filename = createCXString("");
   index_data.importedASTs = importedASTs;
   index_data.strings = NULL;
   index_data.TU = NULL;
@@ -3497,6 +3647,7 @@
   if (index_data.fail_for_error)
     result = -1;
 
+  clang_disposeString(index_data.main_filename);
   free_client_data(&index_data);
   return result;
 }
@@ -3518,7 +3669,7 @@
   index_data.first_check_printed = 0;
   index_data.fail_for_error = 0;
   index_data.abort = 0;
-  index_data.main_filename = "";
+  index_data.main_filename = createCXString("");
   index_data.importedASTs = importedASTs;
   index_data.strings = NULL;
   index_data.TU = TU;
@@ -3531,6 +3682,7 @@
     result = -1;
 
   clang_disposeTranslationUnit(TU);
+  clang_disposeString(index_data.main_filename);
   free_client_data(&index_data);
   return result;
 }
@@ -3654,6 +3806,7 @@
 
     len = strlen(database);
     tmp = (char *) malloc(len+1);
+    assert(tmp);
     memcpy(tmp, database, len+1);
     buildDir = dirname(tmp);
 
@@ -3835,6 +3988,7 @@
   }
 
   cursors = (CXCursor *)malloc(num_tokens * sizeof(CXCursor));
+  assert(cursors);
   clang_annotateTokens(TU, tokens, num_tokens, cursors);
 
   if (checkForErrors(TU) != 0) {
@@ -3909,6 +4063,7 @@
 
   len = strlen(database);
   tmp = (char *) malloc(len+1);
+  assert(tmp);
   memcpy(tmp, database, len+1);
   buildDir = dirname(tmp);
 
@@ -4023,9 +4178,7 @@
           if (!isUSR(I[2]))
             return not_usr("<class USR>", I[2]);
           else {
-            CXString x;
-            x.data = (void*) I[2];
-            x.private_flags = 0;
+            CXString x = createCXString(I[2]);
             print_usr(clang_constructUSR_ObjCIvar(I[1], x));
           }
 
@@ -4050,9 +4203,7 @@
           if (!isUSR(I[3]))
             return not_usr("<class USR>", I[3]);
           else {
-            CXString x;
-            x.data = (void*) I[3];
-            x.private_flags = 0;
+            CXString x = createCXString(I[3]);
             print_usr(clang_constructUSR_ObjCMethod(I[1], atoi(I[2]), x));
           }
           I += 4;
@@ -4080,9 +4231,7 @@
           if (!isUSR(I[2]))
             return not_usr("<class USR>", I[2]);
           else {
-            CXString x;
-            x.data = (void*) I[2];
-            x.private_flags = 0;
+            CXString x = createCXString(I[2]);
             print_usr(clang_constructUSR_ObjCProperty(I[1], x));
           }
           I += 3;
diff --git a/tools/clang-check/CMakeLists.txt b/tools/clang-check/CMakeLists.txt
index 04151a8..c5ace26 100644
--- a/tools/clang-check/CMakeLists.txt
+++ b/tools/clang-check/CMakeLists.txt
@@ -9,6 +9,7 @@
   )
 
 target_link_libraries(clang-check
+  PRIVATE
   clangAST
   clangBasic
   clangDriver
diff --git a/tools/clang-diff/CMakeLists.txt b/tools/clang-diff/CMakeLists.txt
index a1fc627..09bebf2 100644
--- a/tools/clang-diff/CMakeLists.txt
+++ b/tools/clang-diff/CMakeLists.txt
@@ -7,6 +7,7 @@
   )
 
 target_link_libraries(clang-diff
+  PRIVATE
   clangBasic
   clangFrontend
   clangTooling
diff --git a/tools/clang-diff/ClangDiff.cpp b/tools/clang-diff/ClangDiff.cpp
index 55cef91..4e2150a 100644
--- a/tools/clang-diff/ClangDiff.cpp
+++ b/tools/clang-diff/ClangDiff.cpp
@@ -33,9 +33,9 @@
     cl::desc("Print the internal representation of the AST as JSON."),
     cl::init(false), cl::cat(ClangDiffCategory));
 
-static cl::opt<bool>
-    PrintMatches("dump-matches", cl::desc("Print the matched nodes."),
-                 cl::init(false), cl::cat(ClangDiffCategory));
+static cl::opt<bool> PrintMatches("dump-matches",
+                                  cl::desc("Print the matched nodes."),
+                                  cl::init(false), cl::cat(ClangDiffCategory));
 
 static cl::opt<bool> HtmlDiff("html",
                               cl::desc("Output a side-by-side diff in HTML."),
diff --git a/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs b/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
index efb2147..1071b68 100644
--- a/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
+++ b/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
@@ -38,7 +38,7 @@
         private string style = "file";
         private bool formatOnSave = false;
         private string formatOnSaveFileExtensions =
-            ".c;.cpp;.cxx;.cc;.tli;.tlh;.h;.hh;.hpp;.hxx;.hh;.inl" +
+            ".c;.cpp;.cxx;.cc;.tli;.tlh;.h;.hh;.hpp;.hxx;.hh;.inl;" +
             ".java;.js;.ts;.m;.mm;.proto;.protodevel;.td";
 
         public OptionPageGrid Clone()
diff --git a/tools/clang-format/CMakeLists.txt b/tools/clang-format/CMakeLists.txt
index c695ba3..a295e8c 100644
--- a/tools/clang-format/CMakeLists.txt
+++ b/tools/clang-format/CMakeLists.txt
@@ -12,6 +12,7 @@
   )
 
 target_link_libraries(clang-format
+  PRIVATE
   ${CLANG_FORMAT_LIB_DEPS}
   )
 
diff --git a/tools/clang-format/ClangFormat.cpp b/tools/clang-format/ClangFormat.cpp
index e169b9e..2718a72 100644
--- a/tools/clang-format/ClangFormat.cpp
+++ b/tools/clang-format/ClangFormat.cpp
@@ -289,7 +289,7 @@
               "xml:space='preserve' incomplete_format='"
            << (Status.FormatComplete ? "false" : "true") << "'";
     if (!Status.FormatComplete)
-      outs() << " line=" << Status.Line;
+      outs() << " line='" << Status.Line << "'";
     outs() << ">\n";
     if (Cursor.getNumOccurrences() != 0)
       outs() << "<cursor>"
@@ -357,10 +357,27 @@
   }
 
   if (DumpConfig) {
+    StringRef FileName;
+    std::unique_ptr<llvm::MemoryBuffer> Code;
+    if (FileNames.empty()) {
+      // We can't read the code to detect the language if there's no
+      // file name, so leave Code empty here.
+      FileName = AssumeFileName;
+    } else {
+      // Read in the code in case the filename alone isn't enough to
+      // detect the language.
+      ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+          MemoryBuffer::getFileOrSTDIN(FileNames[0]);
+      if (std::error_code EC = CodeOrErr.getError()) {
+        llvm::errs() << EC.message() << "\n";
+        return 1;
+      }
+      FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
+      Code = std::move(CodeOrErr.get());
+    }
     llvm::Expected<clang::format::FormatStyle> FormatStyle =
-        clang::format::getStyle(
-            Style, FileNames.empty() ? AssumeFileName : FileNames[0],
-            FallbackStyle);
+        clang::format::getStyle(Style, FileName, FallbackStyle,
+                                Code ? Code->getBuffer() : "");
     if (!FormatStyle) {
       llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
       return 1;
diff --git a/tools/clang-format/clang-format.el b/tools/clang-format/clang-format.el
index aa9c3ff..6c626e0 100644
--- a/tools/clang-format/clang-format.el
+++ b/tools/clang-format/clang-format.el
@@ -119,10 +119,12 @@
       (byte-to-position (1+ byte)))))
 
 ;;;###autoload
-(defun clang-format-region (start end &optional style)
+(defun clang-format-region (start end &optional style assume-file-name)
   "Use clang-format to format the code between START and END according to STYLE.
-If called interactively uses the region or the current statement if there
-is no active region.  If no style is given uses `clang-format-style'."
+If called interactively uses the region or the current statement if there is no
+no active region. If no STYLE is given uses `clang-format-style'. Use
+ASSUME-FILE-NAME to locate a style config file, if no ASSUME-FILE-NAME is given
+uses the function `buffer-file-name'."
   (interactive
    (if (use-region-p)
        (list (region-beginning) (region-end))
@@ -131,6 +133,9 @@
   (unless style
     (setq style clang-format-style))
 
+  (unless assume-file-name
+    (setq assume-file-name buffer-file-name))
+
   (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate
                                                         'utf-8-unix))
         (file-end (clang-format--bufferpos-to-filepos end 'approximate
@@ -144,16 +149,21 @@
         ;; always use ‘utf-8-unix’ and ignore the buffer coding system.
         (default-process-coding-system '(utf-8-unix . utf-8-unix)))
     (unwind-protect
-        (let ((status (call-process-region
-                       nil nil clang-format-executable
-                       nil `(,temp-buffer ,temp-file) nil
-
-                       "-output-replacements-xml"
-                       "-assume-filename" (or (buffer-file-name) "")
-                       "-style" style
-                       "-offset" (number-to-string file-start)
-                       "-length" (number-to-string (- file-end file-start))
-                       "-cursor" (number-to-string cursor)))
+        (let ((status (apply #'call-process-region
+                             nil nil clang-format-executable
+                             nil `(,temp-buffer ,temp-file) nil
+                             `("-output-replacements-xml"
+                               ;; Gaurd against a nil assume-file-name.
+                               ;; If the clang-format option -assume-filename
+                               ;; is given a blank string it will crash as per
+                               ;; the following bug report
+                               ;; https://bugs.llvm.org/show_bug.cgi?id=34667
+                               ,@(and assume-file-name
+                                      (list "-assume-filename" assume-file-name))
+                               "-style" ,style
+                               "-offset" ,(number-to-string file-start)
+                               "-length" ,(number-to-string (- file-end file-start))
+                               "-cursor" ,(number-to-string cursor))))
               (stderr (with-temp-buffer
                         (unless (zerop (cadr (insert-file-contents temp-file)))
                           (insert ": "))
@@ -181,10 +191,13 @@
       (when (buffer-name temp-buffer) (kill-buffer temp-buffer)))))
 
 ;;;###autoload
-(defun clang-format-buffer (&optional style)
-  "Use clang-format to format the current buffer according to STYLE."
+(defun clang-format-buffer (&optional style assume-file-name)
+  "Use clang-format to format the current buffer according to STYLE.
+If no STYLE is given uses `clang-format-style'. Use ASSUME-FILE-NAME
+to locate a style config file. If no ASSUME-FILE-NAME is given uses
+the function `buffer-file-name'."
   (interactive)
-  (clang-format-region (point-min) (point-max) style))
+  (clang-format-region (point-min) (point-max) style assume-file-name))
 
 ;;;###autoload
 (defalias 'clang-format 'clang-format-region)
diff --git a/tools/clang-format/fuzzer/CMakeLists.txt b/tools/clang-format/fuzzer/CMakeLists.txt
index 8f77732..87ae05b 100644
--- a/tools/clang-format/fuzzer/CMakeLists.txt
+++ b/tools/clang-format/fuzzer/CMakeLists.txt
@@ -10,6 +10,7 @@
   )
 
 target_link_libraries(clang-format-fuzzer
+  PRIVATE
   ${CLANG_FORMAT_LIB_DEPS}
   ${LLVM_LIB_FUZZING_ENGINE}
   )
diff --git a/tools/clang-format/git-clang-format b/tools/clang-format/git-clang-format
index 60cd4fb..0b21039 100755
--- a/tools/clang-format/git-clang-format
+++ b/tools/clang-format/git-clang-format
@@ -79,6 +79,7 @@
       'm',  # ObjC
       'mm',  # ObjC++
       'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
+      'cu',  # CUDA
       # Other languages that clang-format supports
       'proto', 'protodevel',  # Protocol Buffers
       'java',  # Java
diff --git a/tools/clang-func-mapping/CMakeLists.txt b/tools/clang-func-mapping/CMakeLists.txt
index 8c10fcd..ae28e28 100644
--- a/tools/clang-func-mapping/CMakeLists.txt
+++ b/tools/clang-func-mapping/CMakeLists.txt
@@ -10,6 +10,7 @@
   )
 
 target_link_libraries(clang-func-mapping
+  PRIVATE
   clangAST
   clangBasic
   clangCrossTU
diff --git a/tools/clang-fuzzer/CMakeLists.txt b/tools/clang-fuzzer/CMakeLists.txt
index abc5015..b351ec5 100644
--- a/tools/clang-fuzzer/CMakeLists.txt
+++ b/tools/clang-fuzzer/CMakeLists.txt
@@ -48,6 +48,7 @@
     )
 
   target_link_libraries(clang-proto-fuzzer
+    PRIVATE
     ${ProtobufMutator_LIBRARIES}
     ${PROTOBUF_LIBRARIES}
     ${LLVM_LIB_FUZZING_ENGINE}
@@ -66,6 +67,7 @@
   )
 
 target_link_libraries(clang-fuzzer
+  PRIVATE
   ${LLVM_LIB_FUZZING_ENGINE}
   clangHandleCXX
   )
diff --git a/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt b/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
index 2b0ade4..910b793 100644
--- a/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
+++ b/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
@@ -11,4 +11,4 @@
                   )
 
 add_clang_executable(clang-proto-to-cxx proto_to_cxx_main.cpp)
-target_link_libraries(clang-proto-to-cxx clangProtoToCXX)
+target_link_libraries(clang-proto-to-cxx PRIVATE clangProtoToCXX)
diff --git a/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.cpp b/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.cpp
index cd75e0c..4a86515 100644
--- a/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.cpp
+++ b/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.cpp
@@ -94,7 +94,7 @@
 }
 std::string ProtoToCxx(const uint8_t *data, size_t size) {
   Function message;
-  if (!message.ParseFromArray(data, size))
+  if (!message.ParsePartialFromArray(data, size))
     return "#error invalid proto\n";
   return FunctionToString(message);
 }
diff --git a/tools/clang-import-test/CMakeLists.txt b/tools/clang-import-test/CMakeLists.txt
index 85e833d..836efac 100644
--- a/tools/clang-import-test/CMakeLists.txt
+++ b/tools/clang-import-test/CMakeLists.txt
@@ -24,5 +24,6 @@
   )
 
 target_link_libraries(clang-import-test
+  PRIVATE
   ${CLANG_IMPORT_TEST_LIB_DEPS}
   )
diff --git a/tools/clang-import-test/clang-import-test.cpp b/tools/clang-import-test/clang-import-test.cpp
index 1ea7ee3..e2cf7d8 100644
--- a/tools/clang-import-test/clang-import-test.cpp
+++ b/tools/clang-import-test/clang-import-test.cpp
@@ -50,9 +50,10 @@
     Direct("direct", llvm::cl::Optional,
            llvm::cl::desc("Use the parsed declarations without indirection"));
 
-static llvm::cl::opt<bool>
-    UseOrigins("use-origins", llvm::cl::Optional,
-           llvm::cl::desc("Use DeclContext origin information for more accurate lookups"));  
+static llvm::cl::opt<bool> UseOrigins(
+    "use-origins", llvm::cl::Optional,
+    llvm::cl::desc(
+        "Use DeclContext origin information for more accurate lookups"));
 
 static llvm::cl::list<std::string>
     ClangArgs("Xcc", llvm::cl::ZeroOrMore,
@@ -225,7 +226,7 @@
       CI.getDiagnostics(), ModuleName, CI.getHeaderSearchOpts(),
       CI.getPreprocessorOpts(), CI.getCodeGenOpts(), LLVMCtx));
 }
-} // end namespace
+} // namespace init_convenience
 
 namespace {
 
@@ -261,8 +262,8 @@
       {CI.getASTContext(), CI.getFileManager()});
   llvm::SmallVector<ExternalASTMerger::ImporterSource, 3> Sources;
   for (CIAndOrigins &Import : Imports)
-    Sources.push_back(
-        {Import.getASTContext(), Import.getFileManager(), Import.getOriginMap()});
+    Sources.push_back({Import.getASTContext(), Import.getFileManager(),
+                       Import.getOriginMap()});
   auto ES = llvm::make_unique<ExternalASTMerger>(Target, Sources);
   CI.getASTContext().setExternalSource(ES.release());
   CI.getASTContext().getTranslationUnitDecl()->setHasExternalVisibleStorage();
@@ -327,15 +328,15 @@
     CG.GetModule()->print(llvm::outs(), nullptr);
   if (CI.getDiagnosticClient().getNumErrors())
     return llvm::make_error<llvm::StringError>(
-        "Errors occured while parsing the expression.", std::error_code());
+        "Errors occurred while parsing the expression.", std::error_code());
   return std::move(CI);
 }
 
 void Forget(CIAndOrigins &CI, llvm::MutableArrayRef<CIAndOrigins> Imports) {
   llvm::SmallVector<ExternalASTMerger::ImporterSource, 3> Sources;
   for (CIAndOrigins &Import : Imports)
-    Sources.push_back(
-        {Import.getASTContext(), Import.getFileManager(), Import.getOriginMap()});
+    Sources.push_back({Import.getASTContext(), Import.getFileManager(),
+                       Import.getOriginMap()});
   ExternalASTSource *Source = CI.CI->getASTContext().getExternalSource();
   auto *Merger = static_cast<ExternalASTMerger *>(Source);
   Merger->RemoveSources(Sources);
diff --git a/tools/clang-offload-bundler/CMakeLists.txt b/tools/clang-offload-bundler/CMakeLists.txt
index 6161d08..8718015 100644
--- a/tools/clang-offload-bundler/CMakeLists.txt
+++ b/tools/clang-offload-bundler/CMakeLists.txt
@@ -18,6 +18,7 @@
 add_dependencies(clang clang-offload-bundler)
 
 target_link_libraries(clang-offload-bundler
+  PRIVATE
   ${CLANG_OFFLOAD_BUNDLER_LIB_DEPS}
   )
 
diff --git a/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index 6ff4bec..f320caf 100644
--- a/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -412,7 +412,7 @@
   /// read from the buffers.
   unsigned NumberOfProcessedInputs = 0;
 
-  /// LLVM context used to to create the auxiliary modules.
+  /// LLVM context used to create the auxiliary modules.
   LLVMContext VMContext;
 
   /// LLVM module used to create an object with all the bundle
@@ -563,7 +563,7 @@
           errs() << "error: unable to open temporary file.\n";
           return true;
         }
-        WriteBitcodeToFile(AuxModule.get(), BitcodeFile);
+        WriteBitcodeToFile(*AuxModule, BitcodeFile);
       }
 
       bool Failed = sys::ExecuteAndWait(ClangBinary.get(), ClangArgs);
diff --git a/tools/clang-refactor/CMakeLists.txt b/tools/clang-refactor/CMakeLists.txt
index a472c40..b435744 100644
--- a/tools/clang-refactor/CMakeLists.txt
+++ b/tools/clang-refactor/CMakeLists.txt
@@ -3,12 +3,13 @@
   Support
   )
 
-add_clang_executable(clang-refactor
+add_clang_tool(clang-refactor
   ClangRefactor.cpp
   TestSupport.cpp
   )
 
 target_link_libraries(clang-refactor
+  PRIVATE
   clangAST
   clangBasic
   clangFormat
@@ -19,5 +20,3 @@
   clangToolingCore
   clangToolingRefactor
   )
-
-install(TARGETS clang-refactor RUNTIME DESTINATION bin)
diff --git a/tools/clang-refactor/ClangRefactor.cpp b/tools/clang-refactor/ClangRefactor.cpp
index ed9bf5e..950b800 100644
--- a/tools/clang-refactor/ClangRefactor.cpp
+++ b/tools/clang-refactor/ClangRefactor.cpp
@@ -25,6 +25,7 @@
 #include "clang/Tooling/Tooling.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
@@ -40,6 +41,11 @@
 static cl::opt<bool> Verbose("v", cl::desc("Use verbose output"),
                              cl::cat(cl::GeneralCategory),
                              cl::sub(*cl::AllSubCommands));
+
+static cl::opt<bool> Inplace("i", cl::desc("Inplace edit <file>s"),
+                             cl::cat(cl::GeneralCategory),
+                             cl::sub(*cl::AllSubCommands));
+
 } // end namespace opts
 
 namespace {
@@ -253,18 +259,17 @@
       : SubCommand(Action->getCommand(), Action->getDescription()),
         Action(std::move(Action)), ActionRules(std::move(ActionRules)) {
     // Check if the selection option is supported.
-    bool HasSelection = false;
     for (const auto &Rule : this->ActionRules) {
-      if ((HasSelection = Rule->hasSelectionRequirement()))
+      if (Rule->hasSelectionRequirement()) {
+        Selection = llvm::make_unique<cl::opt<std::string>>(
+            "selection",
+            cl::desc(
+                "The selected source range in which the refactoring should "
+                "be initiated (<file>:<line>:<column>-<line>:<column> or "
+                "<file>:<line>:<column>)"),
+            cl::cat(Category), cl::sub(*this));
         break;
-    }
-    if (HasSelection) {
-      Selection = llvm::make_unique<cl::opt<std::string>>(
-          "selection",
-          cl::desc("The selected source range in which the refactoring should "
-                   "be initiated (<file>:<line>:<column>-<line>:<column> or "
-                   "<file>:<line>:<column>)"),
-          cl::cat(Category), cl::sub(*this));
+      }
     }
     // Create the refactoring options.
     for (const auto &Rule : this->ActionRules) {
@@ -278,10 +283,10 @@
 
   const RefactoringActionRules &getActionRules() const { return ActionRules; }
 
-  /// Parses the command-line arguments that are specific to this rule.
+  /// Parses the "-selection" command-line argument.
   ///
   /// \returns true on error, false otherwise.
-  bool parseArguments() {
+  bool parseSelectionArgument() {
     if (Selection) {
       ParsedSelection = SourceSelectionArgument::fromString(*Selection);
       if (!ParsedSelection)
@@ -309,7 +314,7 @@
 
 class ClangRefactorConsumer final : public ClangRefactorToolConsumerInterface {
 public:
-  ClangRefactorConsumer() {}
+  ClangRefactorConsumer(AtomicChanges &Changes) : SourceChanges(&Changes) {}
 
   void handleError(llvm::Error Err) override {
     Optional<PartialDiagnosticAt> Diag = DiagnosticError::take(Err);
@@ -324,24 +329,23 @@
   }
 
   void handle(AtomicChanges Changes) override {
-    SourceChanges.insert(SourceChanges.begin(), Changes.begin(), Changes.end());
+    SourceChanges->insert(SourceChanges->begin(), Changes.begin(),
+                          Changes.end());
   }
 
   void handle(SymbolOccurrences Occurrences) override {
     llvm_unreachable("symbol occurrence results are not handled yet");
   }
 
-  const AtomicChanges &getSourceChanges() const { return SourceChanges; }
-
 private:
-  AtomicChanges SourceChanges;
+  AtomicChanges *SourceChanges;
 };
 
 class ClangRefactorTool {
 public:
-  std::vector<std::unique_ptr<RefactoringActionSubcommand>> SubCommands;
-
-  ClangRefactorTool() {
+  ClangRefactorTool()
+      : SelectedSubcommand(nullptr), MatchingRule(nullptr),
+        Consumer(new ClangRefactorConsumer(Changes)), HasFailed(false) {
     std::vector<std::unique_ptr<RefactoringAction>> Actions =
         createRefactoringActions();
 
@@ -364,59 +368,115 @@
     }
   }
 
-  using TUCallbackType = llvm::function_ref<void(ASTContext &)>;
+  // Initializes the selected subcommand and refactoring rule based on the
+  // command line options.
+  llvm::Error Init() {
+    auto Subcommand = getSelectedSubcommand();
+    if (!Subcommand)
+      return Subcommand.takeError();
+    auto Rule = getMatchingRule(**Subcommand);
+    if (!Rule)
+      return Rule.takeError();
 
-  /// Parses the translation units that were given to the subcommand using
-  /// the 'sources' option and invokes the callback for each parsed
-  /// translation unit.
-  bool foreachTranslationUnit(const CompilationDatabase &DB,
-                              ArrayRef<std::string> Sources,
-                              TUCallbackType Callback) {
+    SelectedSubcommand = *Subcommand;
+    MatchingRule = *Rule;
+
+    return llvm::Error::success();
+  }
+
+  bool hasFailed() const { return HasFailed; }
+
+  using TUCallbackType = std::function<void(ASTContext &)>;
+
+  // Callback of an AST action. This invokes the matching rule on the given AST.
+  void callback(ASTContext &AST) {
+    assert(SelectedSubcommand && MatchingRule && Consumer);
+    RefactoringRuleContext Context(AST.getSourceManager());
+    Context.setASTContext(AST);
+
+    // If the selection option is test specific, we use a test-specific
+    // consumer.
+    std::unique_ptr<ClangRefactorToolConsumerInterface> TestConsumer;
+    bool HasSelection = MatchingRule->hasSelectionRequirement();
+    if (HasSelection)
+      TestConsumer = SelectedSubcommand->getSelection()->createCustomConsumer();
+    ClangRefactorToolConsumerInterface *ActiveConsumer =
+        TestConsumer ? TestConsumer.get() : Consumer.get();
+    ActiveConsumer->beginTU(AST);
+
+    auto InvokeRule = [&](RefactoringResultConsumer &Consumer) {
+      if (opts::Verbose)
+        logInvocation(*SelectedSubcommand, Context);
+      MatchingRule->invoke(*ActiveConsumer, Context);
+    };
+    if (HasSelection) {
+      assert(SelectedSubcommand->getSelection() &&
+             "Missing selection argument?");
+      if (opts::Verbose)
+        SelectedSubcommand->getSelection()->print(llvm::outs());
+      if (SelectedSubcommand->getSelection()->forAllRanges(
+              Context.getSources(), [&](SourceRange R) {
+                Context.setSelectionRange(R);
+                InvokeRule(*ActiveConsumer);
+              }))
+        HasFailed = true;
+      ActiveConsumer->endTU();
+      return;
+    }
+    InvokeRule(*ActiveConsumer);
+    ActiveConsumer->endTU();
+  }
+
+  llvm::Expected<std::unique_ptr<FrontendActionFactory>>
+  getFrontendActionFactory() {
     class ToolASTConsumer : public ASTConsumer {
     public:
       TUCallbackType Callback;
-      ToolASTConsumer(TUCallbackType Callback) : Callback(Callback) {}
+      ToolASTConsumer(TUCallbackType Callback)
+          : Callback(std::move(Callback)) {}
 
       void HandleTranslationUnit(ASTContext &Context) override {
         Callback(Context);
       }
     };
-    class ActionWrapper {
+    class ToolASTAction : public ASTFrontendAction {
     public:
-      TUCallbackType Callback;
-      ActionWrapper(TUCallbackType Callback) : Callback(Callback) {}
+      explicit ToolASTAction(TUCallbackType Callback)
+          : Callback(std::move(Callback)) {}
 
-      std::unique_ptr<ASTConsumer> newASTConsumer() {
-        return llvm::make_unique<ToolASTConsumer>(Callback);
+    protected:
+      std::unique_ptr<clang::ASTConsumer>
+      CreateASTConsumer(clang::CompilerInstance &compiler,
+                        StringRef /* dummy */) override {
+        std::unique_ptr<clang::ASTConsumer> Consumer{
+            new ToolASTConsumer(Callback)};
+        return Consumer;
       }
+
+    private:
+      TUCallbackType Callback;
     };
 
-    ClangTool Tool(DB, Sources);
-    ActionWrapper ToolAction(Callback);
-    std::unique_ptr<tooling::FrontendActionFactory> Factory =
-        tooling::newFrontendActionFactory(&ToolAction);
-    return Tool.run(Factory.get());
+    class ToolActionFactory : public FrontendActionFactory {
+    public:
+      ToolActionFactory(TUCallbackType Callback)
+          : Callback(std::move(Callback)) {}
+
+      FrontendAction *create() override { return new ToolASTAction(Callback); }
+
+    private:
+      TUCallbackType Callback;
+    };
+
+    return llvm::make_unique<ToolActionFactory>(
+        [this](ASTContext &AST) { return callback(AST); });
   }
 
-  /// Logs an individual refactoring action invocation to STDOUT.
-  void logInvocation(RefactoringActionSubcommand &Subcommand,
-                     const RefactoringRuleContext &Context) {
-    if (!opts::Verbose)
-      return;
-    llvm::outs() << "invoking action '" << Subcommand.getName() << "':\n";
-    if (Context.getSelectionRange().isValid()) {
-      SourceRange R = Context.getSelectionRange();
-      llvm::outs() << "  -selection=";
-      R.getBegin().print(llvm::outs(), Context.getSources());
-      llvm::outs() << " -> ";
-      R.getEnd().print(llvm::outs(), Context.getSources());
-      llvm::outs() << "\n";
-    }
-  }
-
-  bool applySourceChanges(const AtomicChanges &Replacements) {
+  // FIXME(ioeric): this seems to only works for changes in a single file at
+  // this point.
+  bool applySourceChanges() {
     std::set<std::string> Files;
-    for (const auto &Change : Replacements)
+    for (const auto &Change : Changes)
       Files.insert(Change.getFilePath());
     // FIXME: Add automatic formatting support as well.
     tooling::ApplyChangesSpec Spec;
@@ -430,138 +490,149 @@
         return true;
       }
       auto Result = tooling::applyAtomicChanges(File, (*BufferErr)->getBuffer(),
-                                                Replacements, Spec);
+                                                Changes, Spec);
       if (!Result) {
         llvm::errs() << toString(Result.takeError());
         return true;
       }
 
-      std::error_code EC;
-      llvm::raw_fd_ostream OS(File, EC, llvm::sys::fs::F_Text);
-      if (EC) {
-        llvm::errs() << EC.message() << "\n";
-        return true;
+      if (opts::Inplace) {
+        std::error_code EC;
+        llvm::raw_fd_ostream OS(File, EC, llvm::sys::fs::F_Text);
+        if (EC) {
+          llvm::errs() << EC.message() << "\n";
+          return true;
+        }
+        OS << *Result;
+        continue;
       }
-      OS << *Result;
+
+      llvm::outs() << *Result;
     }
     return false;
   }
 
-  bool invokeAction(RefactoringActionSubcommand &Subcommand,
-                    const CompilationDatabase &DB,
-                    ArrayRef<std::string> Sources) {
-    // Find a set of matching rules.
+private:
+  /// Logs an individual refactoring action invocation to STDOUT.
+  void logInvocation(RefactoringActionSubcommand &Subcommand,
+                     const RefactoringRuleContext &Context) {
+    llvm::outs() << "invoking action '" << Subcommand.getName() << "':\n";
+    if (Context.getSelectionRange().isValid()) {
+      SourceRange R = Context.getSelectionRange();
+      llvm::outs() << "  -selection=";
+      R.getBegin().print(llvm::outs(), Context.getSources());
+      llvm::outs() << " -> ";
+      R.getEnd().print(llvm::outs(), Context.getSources());
+      llvm::outs() << "\n";
+    }
+  }
+
+  llvm::Expected<RefactoringActionRule *>
+  getMatchingRule(RefactoringActionSubcommand &Subcommand) {
     SmallVector<RefactoringActionRule *, 4> MatchingRules;
     llvm::StringSet<> MissingOptions;
 
-    bool HasSelection = false;
     for (const auto &Rule : Subcommand.getActionRules()) {
-      bool SelectionMatches = true;
-      if (Rule->hasSelectionRequirement()) {
-        HasSelection = true;
-        if (!Subcommand.getSelection()) {
-          MissingOptions.insert("selection");
-          SelectionMatches = false;
-        }
-      }
       CommandLineRefactoringOptionVisitor Visitor(Subcommand.getOptions());
       Rule->visitRefactoringOptions(Visitor);
-      if (SelectionMatches && Visitor.getMissingRequiredOptions().empty()) {
-        MatchingRules.push_back(Rule.get());
-        continue;
+      if (Visitor.getMissingRequiredOptions().empty()) {
+        if (!Rule->hasSelectionRequirement()) {
+          MatchingRules.push_back(Rule.get());
+        } else {
+          Subcommand.parseSelectionArgument();
+          if (Subcommand.getSelection()) {
+            MatchingRules.push_back(Rule.get());
+          } else {
+            MissingOptions.insert("selection");
+          }
+        }
       }
       for (const RefactoringOption *Opt : Visitor.getMissingRequiredOptions())
         MissingOptions.insert(Opt->getName());
     }
     if (MatchingRules.empty()) {
-      llvm::errs() << "error: '" << Subcommand.getName()
-                   << "' can't be invoked with the given arguments:\n";
+      std::string Error;
+      llvm::raw_string_ostream OS(Error);
+      OS << "ERROR: '" << Subcommand.getName()
+         << "' can't be invoked with the given arguments:\n";
       for (const auto &Opt : MissingOptions)
-        llvm::errs() << "  missing '-" << Opt.getKey() << "' option\n";
-      return true;
+        OS << "  missing '-" << Opt.getKey() << "' option\n";
+      OS.flush();
+      return llvm::make_error<llvm::StringError>(
+          Error, llvm::inconvertibleErrorCode());
     }
-
-    ClangRefactorConsumer Consumer;
-    bool HasFailed = false;
-    if (foreachTranslationUnit(DB, Sources, [&](ASTContext &AST) {
-          RefactoringRuleContext Context(AST.getSourceManager());
-          Context.setASTContext(AST);
-
-          auto InvokeRule = [&](RefactoringResultConsumer &Consumer) {
-            logInvocation(Subcommand, Context);
-            for (RefactoringActionRule *Rule : MatchingRules) {
-              if (!Rule->hasSelectionRequirement())
-                continue;
-              Rule->invoke(Consumer, Context);
-              return;
-            }
-            // FIXME (Alex L): If more than one initiation succeeded, then the
-            // rules are ambiguous.
-            llvm_unreachable(
-                "The action must have at least one selection rule");
-          };
-
-          std::unique_ptr<ClangRefactorToolConsumerInterface> CustomConsumer;
-          if (HasSelection)
-            CustomConsumer = Subcommand.getSelection()->createCustomConsumer();
-          ClangRefactorToolConsumerInterface &ActiveConsumer =
-              CustomConsumer ? *CustomConsumer : Consumer;
-          ActiveConsumer.beginTU(AST);
-          if (HasSelection) {
-            assert(Subcommand.getSelection() && "Missing selection argument?");
-            if (opts::Verbose)
-              Subcommand.getSelection()->print(llvm::outs());
-            if (Subcommand.getSelection()->forAllRanges(
-                    Context.getSources(), [&](SourceRange R) {
-                      Context.setSelectionRange(R);
-                      InvokeRule(ActiveConsumer);
-                    }))
-              HasFailed = true;
-            ActiveConsumer.endTU();
-            return;
-          }
-          // FIXME (Alex L): Implement non-selection based invocation path.
-          ActiveConsumer.endTU();
-        }))
-      return true;
-    return HasFailed || applySourceChanges(Consumer.getSourceChanges());
+    if (MatchingRules.size() != 1) {
+      return llvm::make_error<llvm::StringError>(
+          llvm::Twine("ERROR: more than one matching rule of action") +
+              Subcommand.getName() + "was found with given options.",
+          llvm::inconvertibleErrorCode());
+    }
+    return MatchingRules.front();
   }
+  // Figure out which action is specified by the user. The user must specify the
+  // action using a command-line subcommand, e.g. the invocation `clang-refactor
+  // local-rename` corresponds to the `LocalRename` refactoring action. All
+  // subcommands must have a unique names. This allows us to figure out which
+  // refactoring action should be invoked by looking at the first subcommand
+  // that's enabled by LLVM's command-line parser.
+  llvm::Expected<RefactoringActionSubcommand *> getSelectedSubcommand() {
+    auto It = llvm::find_if(
+        SubCommands,
+        [](const std::unique_ptr<RefactoringActionSubcommand> &SubCommand) {
+          return !!(*SubCommand);
+        });
+    if (It == SubCommands.end()) {
+      std::string Error;
+      llvm::raw_string_ostream OS(Error);
+      OS << "error: no refactoring action given\n";
+      OS << "note: the following actions are supported:\n";
+      for (const auto &Subcommand : SubCommands)
+        OS.indent(2) << Subcommand->getName() << "\n";
+      OS.flush();
+      return llvm::make_error<llvm::StringError>(
+          Error, llvm::inconvertibleErrorCode());
+    }
+    RefactoringActionSubcommand *Subcommand = &(**It);
+    return Subcommand;
+  }
+
+  std::vector<std::unique_ptr<RefactoringActionSubcommand>> SubCommands;
+  RefactoringActionSubcommand *SelectedSubcommand;
+  RefactoringActionRule *MatchingRule;
+  std::unique_ptr<ClangRefactorToolConsumerInterface> Consumer;
+  AtomicChanges Changes;
+  bool HasFailed;
 };
 
 } // end anonymous namespace
 
 int main(int argc, const char **argv) {
-  ClangRefactorTool Tool;
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  ClangRefactorTool RefactorTool;
 
   CommonOptionsParser Options(
       argc, argv, cl::GeneralCategory, cl::ZeroOrMore,
       "Clang-based refactoring tool for C, C++ and Objective-C");
 
-  // Figure out which action is specified by the user. The user must specify
-  // the action using a command-line subcommand, e.g. the invocation
-  // `clang-refactor local-rename` corresponds to the `LocalRename` refactoring
-  // action. All subcommands must have a unique names. This allows us to figure
-  // out which refactoring action should be invoked by looking at the first
-  // subcommand that's enabled by LLVM's command-line parser.
-  auto It = llvm::find_if(
-      Tool.SubCommands,
-      [](const std::unique_ptr<RefactoringActionSubcommand> &SubCommand) {
-        return !!(*SubCommand);
-      });
-  if (It == Tool.SubCommands.end()) {
-    llvm::errs() << "error: no refactoring action given\n";
-    llvm::errs() << "note: the following actions are supported:\n";
-    for (const auto &Subcommand : Tool.SubCommands)
-      llvm::errs().indent(2) << Subcommand->getName() << "\n";
+  if (auto Err = RefactorTool.Init()) {
+    llvm::errs() << llvm::toString(std::move(Err)) << "\n";
     return 1;
   }
-  RefactoringActionSubcommand &ActionCommand = **It;
 
-  if (ActionCommand.parseArguments())
+  auto ActionFactory = RefactorTool.getFrontendActionFactory();
+  if (!ActionFactory) {
+    llvm::errs() << llvm::toString(ActionFactory.takeError()) << "\n";
     return 1;
-  if (Tool.invokeAction(ActionCommand, Options.getCompilations(),
-                        Options.getSourcePathList()))
-    return 1;
-
-  return 0;
+  }
+  ClangTool Tool(Options.getCompilations(), Options.getSourcePathList());
+  bool Failed = false;
+  if (Tool.run(ActionFactory->get()) != 0) {
+    llvm::errs() << "Failed to run refactoring action on files\n";
+    // It is possible that TUs are broken while changes are generated correctly,
+    // so we still try applying changes.
+    Failed = true;
+  }
+  return RefactorTool.applySourceChanges() || Failed ||
+         RefactorTool.hasFailed();
 }
diff --git a/tools/clang-refactor/TestSupport.cpp b/tools/clang-refactor/TestSupport.cpp
index 2f127bd..9331dfd 100644
--- a/tools/clang-refactor/TestSupport.cpp
+++ b/tools/clang-refactor/TestSupport.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
@@ -241,9 +242,9 @@
     // Dump the results:
     const auto &TestGroup = TestRanges.GroupedRanges[Group.index()];
     if (!CanonicalResult) {
-      llvm::errs() << TestGroup.Ranges.size() << " '" << TestGroup.Name
+      llvm::outs() << TestGroup.Ranges.size() << " '" << TestGroup.Name
                    << "' results:\n";
-      llvm::errs() << *CanonicalErrorMessage << "\n";
+      llvm::outs() << *CanonicalErrorMessage << "\n";
     } else {
       llvm::outs() << TestGroup.Ranges.size() << " '" << TestGroup.Name
                    << "' results:\n";
@@ -271,6 +272,25 @@
          (NewlinePos == StringRef::npos ? ColumnOffset : (unsigned)NewlinePos);
 }
 
+static unsigned addEndLineOffsetAndEndColumn(StringRef Source, unsigned Offset,
+                                             unsigned LineNumberOffset,
+                                             unsigned Column) {
+  StringRef Line = Source.drop_front(Offset);
+  unsigned LineOffset = 0;
+  for (; LineNumberOffset != 0; --LineNumberOffset) {
+    size_t NewlinePos = Line.find_first_of("\r\n");
+    // Line offset goes out of bounds.
+    if (NewlinePos == StringRef::npos)
+      break;
+    LineOffset += NewlinePos + 1;
+    Line = Line.drop_front(NewlinePos + 1);
+  }
+  // Source now points to the line at +lineOffset;
+  size_t LineStart = Source.find_last_of("\r\n", /*From=*/Offset + LineOffset);
+  return addColumnOffset(
+      Source, LineStart == StringRef::npos ? 0 : LineStart + 1, Column - 1);
+}
+
 Optional<TestSelectionRangesInFile>
 findTestSelectionRanges(StringRef Filename) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile =
@@ -282,11 +302,11 @@
   }
   StringRef Source = ErrOrFile.get()->getBuffer();
 
-  // FIXME (Alex L): 3rd capture groups for +line:column.
   // See the doc comment for this function for the explanation of this
   // syntax.
   static Regex RangeRegex("range[[:blank:]]*([[:alpha:]_]*)?[[:blank:]]*=[[:"
-                          "blank:]]*(\\+[[:digit:]]+)?");
+                          "blank:]]*(\\+[[:digit:]]+)?[[:blank:]]*(->[[:blank:]"
+                          "]*[\\+\\:[:digit:]]+)?");
 
   std::map<std::string, SmallVector<TestSelectionRange, 8>> GroupedRanges;
 
@@ -304,18 +324,22 @@
     StringRef Comment =
         Source.substr(Tok.getLocation().getRawEncoding(), Tok.getLength());
     SmallVector<StringRef, 4> Matches;
-    // Allow CHECK: comments to contain range= commands.
-    if (!RangeRegex.match(Comment, &Matches) || Comment.contains("CHECK")) {
-      // Try to detect mistyped 'range:' comments to ensure tests don't miss
-      // anything.
+    // Try to detect mistyped 'range:' comments to ensure tests don't miss
+    // anything.
+    auto DetectMistypedCommand = [&]() -> bool {
       if (Comment.contains_lower("range") && Comment.contains("=") &&
           !Comment.contains_lower("run") && !Comment.contains("CHECK")) {
         llvm::errs() << "error: suspicious comment '" << Comment
                      << "' that "
                         "resembles the range command found\n";
         llvm::errs() << "note: please reword if this isn't a range command\n";
-        return None;
       }
+      return false;
+    };
+    // Allow CHECK: comments to contain range= commands.
+    if (!RangeRegex.match(Comment, &Matches) || Comment.contains("CHECK")) {
+      if (DetectMistypedCommand())
+        return None;
       continue;
     }
     unsigned Offset = Tok.getEndLoc().getRawEncoding();
@@ -325,9 +349,28 @@
       if (Matches[2].drop_front().getAsInteger(10, ColumnOffset))
         assert(false && "regex should have produced a number");
     }
-    // FIXME (Alex L): Support true ranges.
     Offset = addColumnOffset(Source, Offset, ColumnOffset);
-    TestSelectionRange Range = {Offset, Offset};
+    unsigned EndOffset;
+
+    if (!Matches[3].empty()) {
+      static Regex EndLocRegex(
+          "->[[:blank:]]*(\\+[[:digit:]]+):([[:digit:]]+)");
+      SmallVector<StringRef, 4> EndLocMatches;
+      if (!EndLocRegex.match(Matches[3], &EndLocMatches)) {
+        if (DetectMistypedCommand())
+          return None;
+        continue;
+      }
+      unsigned EndLineOffset = 0, EndColumn = 0;
+      if (EndLocMatches[1].drop_front().getAsInteger(10, EndLineOffset) ||
+          EndLocMatches[2].getAsInteger(10, EndColumn))
+        assert(false && "regex should have produced a number");
+      EndOffset = addEndLineOffsetAndEndColumn(Source, Offset, EndLineOffset,
+                                               EndColumn);
+    } else {
+      EndOffset = Offset;
+    }
+    TestSelectionRange Range = {Offset, EndOffset};
     auto It = GroupedRanges.insert(std::make_pair(
         Matches[1].str(), SmallVector<TestSelectionRange, 8>{Range}));
     if (!It.second)
diff --git a/tools/clang-rename/CMakeLists.txt b/tools/clang-rename/CMakeLists.txt
index 771e3bd..9689e1c 100644
--- a/tools/clang-rename/CMakeLists.txt
+++ b/tools/clang-rename/CMakeLists.txt
@@ -3,9 +3,10 @@
   Support
   )
 
-add_clang_executable(clang-rename ClangRename.cpp)
+add_clang_tool(clang-rename ClangRename.cpp)
 
 target_link_libraries(clang-rename
+  PRIVATE
   clangBasic
   clangFrontend
   clangRewrite
@@ -14,8 +15,6 @@
   clangToolingRefactor
   )
 
-install(TARGETS clang-rename RUNTIME DESTINATION bin)
-
 install(PROGRAMS clang-rename.py
   DESTINATION share/clang
   COMPONENT clang-rename)
diff --git a/tools/clang-rename/ClangRename.cpp b/tools/clang-rename/ClangRename.cpp
index fca12ca..2c14b69 100644
--- a/tools/clang-rename/ClangRename.cpp
+++ b/tools/clang-rename/ClangRename.cpp
@@ -138,7 +138,7 @@
   // Check if NewNames is a valid identifier in C++17.
   LangOptions Options;
   Options.CPlusPlus = true;
-  Options.CPlusPlus1z = true;
+  Options.CPlusPlus17 = true;
   IdentifierTable Table(Options);
   for (const auto &NewName : NewNames) {
     auto NewNameTokKind = Table.get(NewName).getTokenID();
diff --git a/tools/diagtool/CMakeLists.txt b/tools/diagtool/CMakeLists.txt
index 3f7d803..beb6c35 100644
--- a/tools/diagtool/CMakeLists.txt
+++ b/tools/diagtool/CMakeLists.txt
@@ -13,6 +13,7 @@
 )
 
 target_link_libraries(diagtool
+  PRIVATE
   clangBasic
   clangFrontend
   )
diff --git a/tools/driver/CMakeLists.txt b/tools/driver/CMakeLists.txt
index 901b6d6..22a4984 100644
--- a/tools/driver/CMakeLists.txt
+++ b/tools/driver/CMakeLists.txt
@@ -38,6 +38,7 @@
   )
 
 target_link_libraries(clang
+  PRIVATE
   clangBasic
   clangCodeGen
   clangDriver
@@ -85,6 +86,7 @@
 
   set(TOOL_INFO_PLIST_OUT "${CMAKE_CURRENT_BINARY_DIR}/${TOOL_INFO_PLIST}")
   target_link_libraries(clang
+    PRIVATE
     "-Wl,-sectcreate,__TEXT,__info_plist,${TOOL_INFO_PLIST_OUT}")
   configure_file("${TOOL_INFO_PLIST}.in" "${TOOL_INFO_PLIST_OUT}" @ONLY)
 
@@ -121,11 +123,11 @@
   if("${ORDER_FILE}" STREQUAL "\n")
     set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${CLANG_ORDER_FILE})
   elseif(LINKER_ORDER_FILE_WORKS)
-    target_link_libraries(clang ${LINKER_ORDER_FILE_OPTION})
+    target_link_libraries(clang PRIVATE ${LINKER_ORDER_FILE_OPTION})
     set_target_properties(clang PROPERTIES LINK_DEPENDS ${CLANG_ORDER_FILE})
   endif()
 endif()
 
 if(WITH_POLLY AND LINK_POLLY_INTO_TOOLS)
-  target_link_libraries(clang Polly)
+  target_link_libraries(clang PRIVATE Polly)
 endif(WITH_POLLY AND LINK_POLLY_INTO_TOOLS)
diff --git a/tools/driver/cc1as_main.cpp b/tools/driver/cc1as_main.cpp
index 9b90562..630a0bc 100644
--- a/tools/driver/cc1as_main.cpp
+++ b/tools/driver/cc1as_main.cpp
@@ -181,7 +181,13 @@
 
   // Issue errors on unknown arguments.
   for (const Arg *A : Args.filtered(OPT_UNKNOWN)) {
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
+    auto ArgString = A->getAsString(Args);
+    std::string Nearest;
+    if (OptTbl->findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1)
+      Diags.Report(diag::err_drv_unknown_argument) << ArgString;
+    else
+      Diags.Report(diag::err_drv_unknown_argument_with_suggestion)
+          << ArgString << Nearest;
     Success = false;
   }
 
@@ -397,7 +403,7 @@
     if (Opts.ShowEncoding) {
       CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
       MCTargetOptions Options;
-      MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple, Opts.CPU, Options);
+      MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options);
     }
     auto FOut = llvm::make_unique<formatted_raw_ostream>(*Out);
     Str.reset(TheTarget->createAsmStreamer(
@@ -415,8 +421,7 @@
 
     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
     MCTargetOptions Options;
-    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple,
-                                                      Opts.CPU, Options);
+    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options);
     Triple T(Opts.Triple);
     Str.reset(TheTarget->createMCObjectStreamer(
                 T, Ctx, std::unique_ptr<MCAsmBackend>(MAB), *Out, std::unique_ptr<MCCodeEmitter>(CE), *STI,
diff --git a/tools/driver/driver.cpp b/tools/driver/driver.cpp
index fa757da..611bff2 100644
--- a/tools/driver/driver.cpp
+++ b/tools/driver/driver.cpp
@@ -311,7 +311,8 @@
     return cc1as_main(argv.slice(2), argv[0], GetExecutablePathVP);
 
   // Reject unknown tools.
-  llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n";
+  llvm::errs() << "error: unknown integrated tool '" << Tool << "'. "
+               << "Valid tools include '-cc1' and '-cc1as'.\n";
   return 1;
 }
 
diff --git a/tools/libclang/BuildSystem.cpp b/tools/libclang/BuildSystem.cpp
index 99aa5b6..79fa69c 100644
--- a/tools/libclang/BuildSystem.cpp
+++ b/tools/libclang/BuildSystem.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Chrono.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -78,7 +79,7 @@
   unwrap(VFO)->write(OS);
 
   StringRef Data = OS.str();
-  *out_buffer_ptr = (char*)malloc(Data.size());
+  *out_buffer_ptr = static_cast<char*>(llvm::safe_malloc(Data.size()));
   *out_buffer_size = Data.size();
   memcpy(*out_buffer_ptr, Data.data(), Data.size());
   return CXError_Success;
@@ -140,7 +141,7 @@
   OS << "}\n";
 
   StringRef Data = OS.str();
-  *out_buffer_ptr = (char*)malloc(Data.size());
+  *out_buffer_ptr = static_cast<char*>(llvm::safe_malloc(Data.size()));
   *out_buffer_size = Data.size();
   memcpy(*out_buffer_ptr, Data.data(), Data.size());
   return CXError_Success;
diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp
index fa3b970..791b475 100644
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -81,6 +81,8 @@
   D->Diagnostics = nullptr;
   D->OverridenCursorsPool = createOverridenCXCursorsPool();
   D->CommentToXML = nullptr;
+  D->ParsingOptions = 0;
+  D->Arguments = {};
   return D;
 }
 
@@ -783,6 +785,16 @@
   return false;
 }
 
+static bool HasTrailingReturnType(FunctionDecl *ND) {
+  const QualType Ty = ND->getType();
+  if (const FunctionType *AFT = Ty->getAs<FunctionType>()) {
+    if (const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(AFT))
+      return FT->hasTrailingReturn();
+  }
+
+  return false;
+}
+
 /// \brief Compare two base or member initializers based on their source order.
 static int CompareCXXCtorInitializers(CXXCtorInitializer *const *X,
                                       CXXCtorInitializer *const *Y) {
@@ -802,14 +814,16 @@
     // written. This requires a bit of work.
     TypeLoc TL = TSInfo->getTypeLoc().IgnoreParens();
     FunctionTypeLoc FTL = TL.getAs<FunctionTypeLoc>();
+    const bool HasTrailingRT = HasTrailingReturnType(ND);
     
     // If we have a function declared directly (without the use of a typedef),
     // visit just the return type. Otherwise, just visit the function's type
     // now.
-    if ((FTL && !isa<CXXConversionDecl>(ND) && Visit(FTL.getReturnLoc())) ||
+    if ((FTL && !isa<CXXConversionDecl>(ND) && !HasTrailingRT &&
+         Visit(FTL.getReturnLoc())) ||
         (!FTL && Visit(TL)))
       return true;
-    
+
     // Visit the nested-name-specifier, if present.
     if (NestedNameSpecifierLoc QualifierLoc = ND->getQualifierLoc())
       if (VisitNestedNameSpecifierLoc(QualifierLoc))
@@ -825,7 +839,11 @@
     // Visit the function parameters, if we have a function type.
     if (FTL && VisitFunctionTypeLoc(FTL, true))
       return true;
-    
+
+    // Visit the function's trailing return type.
+    if (FTL && HasTrailingRT && Visit(FTL.getReturnLoc()))
+      return true;
+
     // FIXME: Attributes?
   }
   
@@ -877,6 +895,9 @@
   if (Expr *BitWidth = D->getBitWidth())
     return Visit(MakeCXCursor(BitWidth, StmtParent, TU, RegionOfInterest));
 
+  if (Expr *Init = D->getInClassInitializer())
+    return Visit(MakeCXCursor(Init, StmtParent, TU, RegionOfInterest));
+
   return false;
 }
 
@@ -1022,8 +1043,9 @@
             [&SM](Decl *A, Decl *B) {
     SourceLocation L_A = A->getLocStart();
     SourceLocation L_B = B->getLocStart();
-    assert(L_A.isValid() && L_B.isValid());
-    return SM.isBeforeInTranslationUnit(L_A, L_B);
+    return L_A != L_B ?
+           SM.isBeforeInTranslationUnit(L_A, L_B) :
+           SM.isBeforeInTranslationUnit(A->getLocEnd(), B->getLocEnd());
   });
 
   // Now visit the decls.
@@ -3251,6 +3273,12 @@
   return 0;
 }
 
+void clang_CXIndex_setInvocationEmissionPathOption(CXIndex CIdx,
+                                                   const char *Path) {
+  if (CIdx)
+    static_cast<CIndexer *>(CIdx)->setInvocationEmissionPath(Path ? Path : "");
+}
+
 void clang_toggleCrashRecovery(unsigned isEnabled) {
   if (isEnabled)
     llvm::CrashRecoveryContext::Enable();
@@ -3427,6 +3455,11 @@
   // faster, trading for a slower (first) reparse.
   unsigned PrecompilePreambleAfterNParses =
       !PrecompilePreamble ? 0 : 2 - CreatePreambleOnFirstParse;
+
+  LibclangInvocationReporter InvocationReporter(
+      *CXXIdx, LibclangInvocationReporter::OperationKind::ParseOperation,
+      options, llvm::makeArrayRef(*Args), /*InvocationArgs=*/None,
+      unsaved_files);
   std::unique_ptr<ASTUnit> Unit(ASTUnit::LoadFromCommandLine(
       Args->data(), Args->data() + Args->size(),
       CXXIdx->getPCHContainerOperations(), Diags,
@@ -3453,7 +3486,14 @@
     return CXError_ASTReadError;
 
   *out_TU = MakeCXTranslationUnit(CXXIdx, std::move(Unit));
-  return *out_TU ? CXError_Success : CXError_Failure;
+  if (CXTranslationUnitImpl *TU = *out_TU) {
+    TU->ParsingOptions = options;
+    TU->Arguments.reserve(Args->size());
+    for (const char *Arg : *Args)
+      TU->Arguments.push_back(Arg);
+    return CXError_Success;
+  }
+  return CXError_Failure;
 }
 
 CXTranslationUnit
@@ -3508,11 +3548,6 @@
         llvm::makeArrayRef(unsaved_files, num_unsaved_files), options, out_TU);
   };
 
-  if (getenv("LIBCLANG_NOTHREADS")) {
-    ParseTranslationUnitImpl();
-    return result;
-  }
-
   llvm::CrashRecoveryContext CRC;
 
   if (!RunSafely(CRC, ParseTranslationUnitImpl)) {
@@ -3921,8 +3956,7 @@
     result = clang_saveTranslationUnit_Impl(TU, FileName, options);
   };
 
-  if (!CXXUnit->getDiagnostics().hasUnrecoverableErrorOccurred() ||
-      getenv("LIBCLANG_NOTHREADS")) {
+  if (!CXXUnit->getDiagnostics().hasUnrecoverableErrorOccurred()) {
     SaveTranslationUnitImpl();
 
     if (getenv("LIBCLANG_RESOURCE_USAGE"))
@@ -4045,11 +4079,6 @@
         TU, llvm::makeArrayRef(unsaved_files, num_unsaved_files), options);
   };
 
-  if (getenv("LIBCLANG_NOTHREADS")) {
-    ReparseTranslationUnitImpl();
-    return result;
-  }
-
   llvm::CrashRecoveryContext CRC;
 
   if (!RunSafely(CRC, ReparseTranslationUnitImpl)) {
@@ -4159,6 +4188,27 @@
   return const_cast<FileEntry *>(FMgr.getFile(file_name));
 }
 
+const char *clang_getFileContents(CXTranslationUnit TU, CXFile file,
+                                  size_t *size) {
+  if (isNotUsableTU(TU)) {
+    LOG_BAD_TU(TU);
+    return nullptr;
+  }
+
+  const SourceManager &SM = cxtu::getASTUnit(TU)->getSourceManager();
+  FileID fid = SM.translateFile(static_cast<FileEntry *>(file));
+  bool Invalid = true;
+  llvm::MemoryBuffer *buf = SM.getBuffer(fid, &Invalid);
+  if (Invalid) {
+    if (size)
+      *size = 0;
+    return nullptr;
+  }
+  if (size)
+    *size = buf->getBufferSize();
+  return buf->getBufferStart();
+}
+
 unsigned clang_isFileMultipleIncludeGuarded(CXTranslationUnit TU,
                                             CXFile file) {
   if (isNotUsableTU(TU)) {
@@ -4656,6 +4706,195 @@
   return cxstring::createSet(Manglings);
 }
 
+CXPrintingPolicy clang_getCursorPrintingPolicy(CXCursor C) {
+  if (clang_Cursor_isNull(C))
+    return 0;
+  return new PrintingPolicy(getCursorContext(C).getPrintingPolicy());
+}
+
+void clang_PrintingPolicy_dispose(CXPrintingPolicy Policy) {
+  if (Policy)
+    delete static_cast<PrintingPolicy *>(Policy);
+}
+
+unsigned
+clang_PrintingPolicy_getProperty(CXPrintingPolicy Policy,
+                                 enum CXPrintingPolicyProperty Property) {
+  if (!Policy)
+    return 0;
+
+  PrintingPolicy *P = static_cast<PrintingPolicy *>(Policy);
+  switch (Property) {
+  case CXPrintingPolicy_Indentation:
+    return P->Indentation;
+  case CXPrintingPolicy_SuppressSpecifiers:
+    return P->SuppressSpecifiers;
+  case CXPrintingPolicy_SuppressTagKeyword:
+    return P->SuppressTagKeyword;
+  case CXPrintingPolicy_IncludeTagDefinition:
+    return P->IncludeTagDefinition;
+  case CXPrintingPolicy_SuppressScope:
+    return P->SuppressScope;
+  case CXPrintingPolicy_SuppressUnwrittenScope:
+    return P->SuppressUnwrittenScope;
+  case CXPrintingPolicy_SuppressInitializers:
+    return P->SuppressInitializers;
+  case CXPrintingPolicy_ConstantArraySizeAsWritten:
+    return P->ConstantArraySizeAsWritten;
+  case CXPrintingPolicy_AnonymousTagLocations:
+    return P->AnonymousTagLocations;
+  case CXPrintingPolicy_SuppressStrongLifetime:
+    return P->SuppressStrongLifetime;
+  case CXPrintingPolicy_SuppressLifetimeQualifiers:
+    return P->SuppressLifetimeQualifiers;
+  case CXPrintingPolicy_SuppressTemplateArgsInCXXConstructors:
+    return P->SuppressTemplateArgsInCXXConstructors;
+  case CXPrintingPolicy_Bool:
+    return P->Bool;
+  case CXPrintingPolicy_Restrict:
+    return P->Restrict;
+  case CXPrintingPolicy_Alignof:
+    return P->Alignof;
+  case CXPrintingPolicy_UnderscoreAlignof:
+    return P->UnderscoreAlignof;
+  case CXPrintingPolicy_UseVoidForZeroParams:
+    return P->UseVoidForZeroParams;
+  case CXPrintingPolicy_TerseOutput:
+    return P->TerseOutput;
+  case CXPrintingPolicy_PolishForDeclaration:
+    return P->PolishForDeclaration;
+  case CXPrintingPolicy_Half:
+    return P->Half;
+  case CXPrintingPolicy_MSWChar:
+    return P->MSWChar;
+  case CXPrintingPolicy_IncludeNewlines:
+    return P->IncludeNewlines;
+  case CXPrintingPolicy_MSVCFormatting:
+    return P->MSVCFormatting;
+  case CXPrintingPolicy_ConstantsAsWritten:
+    return P->ConstantsAsWritten;
+  case CXPrintingPolicy_SuppressImplicitBase:
+    return P->SuppressImplicitBase;
+  case CXPrintingPolicy_FullyQualifiedName:
+    return P->FullyQualifiedName;
+  }
+
+  assert(false && "Invalid CXPrintingPolicyProperty");
+  return 0;
+}
+
+void clang_PrintingPolicy_setProperty(CXPrintingPolicy Policy,
+                                      enum CXPrintingPolicyProperty Property,
+                                      unsigned Value) {
+  if (!Policy)
+    return;
+
+  PrintingPolicy *P = static_cast<PrintingPolicy *>(Policy);
+  switch (Property) {
+  case CXPrintingPolicy_Indentation:
+    P->Indentation = Value;
+    return;
+  case CXPrintingPolicy_SuppressSpecifiers:
+    P->SuppressSpecifiers = Value;
+    return;
+  case CXPrintingPolicy_SuppressTagKeyword:
+    P->SuppressTagKeyword = Value;
+    return;
+  case CXPrintingPolicy_IncludeTagDefinition:
+    P->IncludeTagDefinition = Value;
+    return;
+  case CXPrintingPolicy_SuppressScope:
+    P->SuppressScope = Value;
+    return;
+  case CXPrintingPolicy_SuppressUnwrittenScope:
+    P->SuppressUnwrittenScope = Value;
+    return;
+  case CXPrintingPolicy_SuppressInitializers:
+    P->SuppressInitializers = Value;
+    return;
+  case CXPrintingPolicy_ConstantArraySizeAsWritten:
+    P->ConstantArraySizeAsWritten = Value;
+    return;
+  case CXPrintingPolicy_AnonymousTagLocations:
+    P->AnonymousTagLocations = Value;
+    return;
+  case CXPrintingPolicy_SuppressStrongLifetime:
+    P->SuppressStrongLifetime = Value;
+    return;
+  case CXPrintingPolicy_SuppressLifetimeQualifiers:
+    P->SuppressLifetimeQualifiers = Value;
+    return;
+  case CXPrintingPolicy_SuppressTemplateArgsInCXXConstructors:
+    P->SuppressTemplateArgsInCXXConstructors = Value;
+    return;
+  case CXPrintingPolicy_Bool:
+    P->Bool = Value;
+    return;
+  case CXPrintingPolicy_Restrict:
+    P->Restrict = Value;
+    return;
+  case CXPrintingPolicy_Alignof:
+    P->Alignof = Value;
+    return;
+  case CXPrintingPolicy_UnderscoreAlignof:
+    P->UnderscoreAlignof = Value;
+    return;
+  case CXPrintingPolicy_UseVoidForZeroParams:
+    P->UseVoidForZeroParams = Value;
+    return;
+  case CXPrintingPolicy_TerseOutput:
+    P->TerseOutput = Value;
+    return;
+  case CXPrintingPolicy_PolishForDeclaration:
+    P->PolishForDeclaration = Value;
+    return;
+  case CXPrintingPolicy_Half:
+    P->Half = Value;
+    return;
+  case CXPrintingPolicy_MSWChar:
+    P->MSWChar = Value;
+    return;
+  case CXPrintingPolicy_IncludeNewlines:
+    P->IncludeNewlines = Value;
+    return;
+  case CXPrintingPolicy_MSVCFormatting:
+    P->MSVCFormatting = Value;
+    return;
+  case CXPrintingPolicy_ConstantsAsWritten:
+    P->ConstantsAsWritten = Value;
+    return;
+  case CXPrintingPolicy_SuppressImplicitBase:
+    P->SuppressImplicitBase = Value;
+    return;
+  case CXPrintingPolicy_FullyQualifiedName:
+    P->FullyQualifiedName = Value;
+    return;
+  }
+
+  assert(false && "Invalid CXPrintingPolicyProperty");
+}
+
+CXString clang_getCursorPrettyPrinted(CXCursor C, CXPrintingPolicy cxPolicy) {
+  if (clang_Cursor_isNull(C))
+    return cxstring::createEmpty();
+
+  if (clang_isDeclaration(C.kind)) {
+    const Decl *D = getCursorDecl(C);
+    if (!D)
+      return cxstring::createEmpty();
+
+    SmallString<128> Str;
+    llvm::raw_svector_ostream OS(Str);
+    PrintingPolicy *UserPolicy = static_cast<PrintingPolicy *>(cxPolicy);
+    D->print(OS, UserPolicy ? *UserPolicy
+                            : getCursorContext(C).getPrintingPolicy());
+
+    return cxstring::createDup(OS.str());
+  }
+
+  return cxstring::createEmpty();
+}
+
 CXString clang_getCursorDisplayName(CXCursor C) {
   if (!clang_isDeclaration(C.kind))
     return clang_getCursorSpelling(C);
@@ -4726,12 +4965,12 @@
     // If the type was explicitly written, use that.
     if (TypeSourceInfo *TSInfo = ClassSpec->getTypeAsWritten())
       return cxstring::createDup(TSInfo->getType().getAsString(Policy));
-    
+
     SmallString<128> Str;
     llvm::raw_svector_ostream OS(Str);
     OS << *ClassSpec;
-    TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, ClassSpec->getTemplateArgs().asArray(), Policy);
+    printTemplateArgumentList(OS, ClassSpec->getTemplateArgs().asArray(),
+                              Policy);
     return cxstring::createDup(OS.str());
   }
   
@@ -5387,6 +5626,15 @@
          (K >= CXCursor_FirstExtraDecl && K <= CXCursor_LastExtraDecl);
 }
 
+unsigned clang_isInvalidDeclaration(CXCursor C) {
+  if (clang_isDeclaration(C.kind)) {
+    if (const Decl *D = getCursorDecl(C))
+      return D->isInvalidDecl();
+  }
+
+  return 0;
+}
+
 unsigned clang_isReference(enum CXCursorKind K) {
   return K >= CXCursor_FirstRef && K <= CXCursor_LastRef;
 }
@@ -6418,7 +6666,8 @@
   if (CXTokens.empty())
     return;
 
-  *Tokens = (CXToken *)malloc(sizeof(CXToken) * CXTokens.size());
+  *Tokens = static_cast<CXToken *>(
+      llvm::safe_malloc(sizeof(CXToken) * CXTokens.size()));
   memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size());
   *NumTokens = CXTokens.size();
 }
@@ -7885,6 +8134,17 @@
   return (Method && Method->isVirtual()) ? 1 : 0;
 }
 
+unsigned clang_CXXRecord_isAbstract(CXCursor C) {
+  if (!clang_isDeclaration(C.kind))
+    return 0;
+
+  const auto *D = cxcursor::getCursorDecl(C);
+  const auto *RD = dyn_cast_or_null<CXXRecordDecl>(D);
+  if (RD)
+    RD = RD->getDefinition();
+  return (RD && RD->isAbstract()) ? 1 : 0;
+}
+
 unsigned clang_EnumDecl_isScoped(CXCursor C) {
   if (!clang_isDeclaration(C.kind))
     return 0;
@@ -8090,6 +8350,7 @@
   SourceManager &sm = Ctx.getSourceManager();
   FileEntry *fileEntry = static_cast<FileEntry *>(file);
   FileID wantedFileID = sm.translateFile(fileEntry);
+  bool isMainFile = wantedFileID == sm.getMainFileID();
 
   const std::vector<SourceRange> &SkippedRanges = ppRec->getSkippedRanges();
   std::vector<SourceRange> wantedRanges;
@@ -8097,6 +8358,8 @@
        i != ei; ++i) {
     if (sm.getFileID(i->getBegin()) == wantedFileID || sm.getFileID(i->getEnd()) == wantedFileID)
       wantedRanges.push_back(*i);
+    else if (isMainFile && (astUnit->isInPreambleFileID(i->getBegin()) || astUnit->isInPreambleFileID(i->getEnd())))
+      wantedRanges.push_back(*i);
   }
 
   skipped->count = wantedRanges.size();
@@ -8164,7 +8427,7 @@
                unsigned Size) {
   if (!Size)
     Size = GetSafetyThreadStackSize();
-  if (Size)
+  if (Size && !getenv("LIBCLANG_NOTHREADS"))
     return CRC.RunSafelyOnThread(Fn, Size);
   return CRC.RunSafely(Fn);
 }
diff --git a/tools/libclang/CIndexCodeCompletion.cpp b/tools/libclang/CIndexCodeCompletion.cpp
index c2b4c0b..ba2c818 100644
--- a/tools/libclang/CIndexCodeCompletion.cpp
+++ b/tools/libclang/CIndexCodeCompletion.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Timer.h"
@@ -642,6 +643,7 @@
                           ArrayRef<CXUnsavedFile> unsaved_files,
                           unsigned options) {
   bool IncludeBriefComments = options & CXCodeComplete_IncludeBriefComments;
+  bool SkipPreamble = options & CXCodeComplete_SkipPreamble;
 
 #ifdef UDP_CODE_COMPLETION_LOGGER
 #ifdef UDP_CODE_COMPLETION_LOGGER_PORT
@@ -688,9 +690,20 @@
   // Create a code-completion consumer to capture the results.
   CodeCompleteOptions Opts;
   Opts.IncludeBriefComments = IncludeBriefComments;
+  Opts.LoadExternal = !SkipPreamble;
   CaptureCompletionResults Capture(Opts, *Results, &TU);
 
   // Perform completion.
+  std::vector<const char *> CArgs;
+  for (const auto &Arg : TU->Arguments)
+    CArgs.push_back(Arg.c_str());
+  std::string CompletionInvocation =
+      llvm::formatv("-code-completion-at={0}:{1}:{2}", complete_filename,
+                    complete_line, complete_column)
+          .str();
+  LibclangInvocationReporter InvocationReporter(
+      *CXXIdx, LibclangInvocationReporter::OperationKind::CompletionOperation,
+      TU->ParsingOptions, CArgs, CompletionInvocation, unsaved_files);
   AST->CodeComplete(complete_filename, complete_line, complete_column,
                     RemappedFiles, (options & CXCodeComplete_IncludeMacros),
                     (options & CXCodeComplete_IncludeCodePatterns),
@@ -806,11 +819,6 @@
         llvm::makeArrayRef(unsaved_files, num_unsaved_files), options);
   };
 
-  if (getenv("LIBCLANG_NOTHREADS")) {
-    CodeCompleteAtImpl();
-    return result;
-  }
-
   llvm::CrashRecoveryContext CRC;
 
   if (!RunSafely(CRC, CodeCompleteAtImpl)) {
diff --git a/tools/libclang/CIndexer.cpp b/tools/libclang/CIndexer.cpp
index 694ed60..62ea881 100644
--- a/tools/libclang/CIndexer.cpp
+++ b/tools/libclang/CIndexer.cpp
@@ -12,10 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "CIndexer.h"
+#include "CXString.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Version.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include <cstdio>
@@ -76,3 +80,83 @@
   ResourcesPath = LibClangPath.str();
   return ResourcesPath;
 }
+
+StringRef CIndexer::getClangToolchainPath() {
+  if (!ToolchainPath.empty())
+    return ToolchainPath;
+  StringRef ResourcePath = getClangResourcesPath();
+  ToolchainPath = llvm::sys::path::parent_path(
+      llvm::sys::path::parent_path(llvm::sys::path::parent_path(ResourcePath)));
+  return ToolchainPath;
+}
+
+LibclangInvocationReporter::LibclangInvocationReporter(
+    CIndexer &Idx, OperationKind Op, unsigned ParseOptions,
+    llvm::ArrayRef<const char *> Args,
+    llvm::ArrayRef<std::string> InvocationArgs,
+    llvm::ArrayRef<CXUnsavedFile> UnsavedFiles) {
+  StringRef Path = Idx.getInvocationEmissionPath();
+  if (Path.empty())
+    return;
+
+  // Create a temporary file for the invocation log.
+  SmallString<256> TempPath;
+  TempPath = Path;
+  llvm::sys::path::append(TempPath, "libclang-%%%%%%%%%%%%");
+  int FD;
+  if (llvm::sys::fs::createUniqueFile(TempPath, FD, TempPath))
+    return;
+  File = std::string(TempPath.begin(), TempPath.end());
+  llvm::raw_fd_ostream OS(FD, /*ShouldClose=*/true);
+
+  // Write out the information about the invocation to it.
+  auto WriteStringKey = [&OS](StringRef Key, StringRef Value) {
+    OS << R"(")" << Key << R"(":")";
+    OS << Value << '"';
+  };
+  OS << '{';
+  WriteStringKey("toolchain", Idx.getClangToolchainPath());
+  OS << ',';
+  WriteStringKey("libclang.operation",
+                 Op == OperationKind::ParseOperation ? "parse" : "complete");
+  OS << ',';
+  OS << R"("libclang.opts":)" << ParseOptions;
+  OS << ',';
+  OS << R"("args":[)";
+  for (const auto &I : llvm::enumerate(Args)) {
+    if (I.index())
+      OS << ',';
+    OS << '"' << I.value() << '"';
+  }
+  if (!InvocationArgs.empty()) {
+    OS << R"(],"invocation-args":[)";
+    for (const auto &I : llvm::enumerate(InvocationArgs)) {
+      if (I.index())
+        OS << ',';
+      OS << '"' << I.value() << '"';
+    }
+  }
+  if (!UnsavedFiles.empty()) {
+    OS << R"(],"unsaved_file_hashes":[)";
+    for (const auto &UF : llvm::enumerate(UnsavedFiles)) {
+      if (UF.index())
+        OS << ',';
+      OS << '{';
+      WriteStringKey("name", UF.value().Filename);
+      OS << ',';
+      llvm::MD5 Hash;
+      Hash.update(getContents(UF.value()));
+      llvm::MD5::MD5Result Result;
+      Hash.final(Result);
+      SmallString<32> Digest = Result.digest();
+      WriteStringKey("md5", Digest);
+      OS << '}';
+    }
+  }
+  OS << "]}";
+}
+
+LibclangInvocationReporter::~LibclangInvocationReporter() {
+  if (!File.empty())
+    llvm::sys::fs::remove(File);
+}
diff --git a/tools/libclang/CIndexer.h b/tools/libclang/CIndexer.h
index b227f94..6c46eed 100644
--- a/tools/libclang/CIndexer.h
+++ b/tools/libclang/CIndexer.h
@@ -18,6 +18,7 @@
 #include "clang-c/Index.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
 #include <utility>
 
 namespace llvm {
@@ -40,6 +41,10 @@
   std::string ResourcesPath;
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
 
+  std::string ToolchainPath;
+
+  std::string InvocationEmissionPath;
+
 public:
   CIndexer(std::shared_ptr<PCHContainerOperations> PCHContainerOps =
                std::make_shared<PCHContainerOperations>())
@@ -71,6 +76,31 @@
 
   /// \brief Get the path of the clang resource files.
   const std::string &getClangResourcesPath();
+
+  StringRef getClangToolchainPath();
+
+  void setInvocationEmissionPath(StringRef Str) {
+    InvocationEmissionPath = Str;
+  }
+
+  StringRef getInvocationEmissionPath() const { return InvocationEmissionPath; }
+};
+
+/// Logs information about a particular libclang operation like parsing to
+/// a new file in the invocation emission path.
+class LibclangInvocationReporter {
+public:
+  enum class OperationKind { ParseOperation, CompletionOperation };
+
+  LibclangInvocationReporter(CIndexer &Idx, OperationKind Op,
+                             unsigned ParseOptions,
+                             llvm::ArrayRef<const char *> Args,
+                             llvm::ArrayRef<std::string> InvocationArgs,
+                             llvm::ArrayRef<CXUnsavedFile> UnsavedFiles);
+  ~LibclangInvocationReporter();
+
+private:
+  std::string File;
 };
 
   /// \brief Return the current size to request for "safety".
diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt
index a28499c..4440637 100644
--- a/tools/libclang/CMakeLists.txt
+++ b/tools/libclang/CMakeLists.txt
@@ -141,10 +141,13 @@
   PATTERN ".svn" EXCLUDE
   )
 
+# LLVM_DISTRIBUTION_COMPONENTS requires that each component have both a
+# component and an install-component target, so add a dummy libclang-headers
+# target to allow using it in LLVM_DISTRIBUTION_COMPONENTS.
+add_custom_target(libclang-headers)
+set_target_properties(libclang-headers PROPERTIES FOLDER "Misc")
+
 if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
-  add_custom_target(install-libclang-headers
-    DEPENDS
-    COMMAND "${CMAKE_COMMAND}"
-            -DCMAKE_INSTALL_COMPONENT=libclang-headers
-            -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+  add_llvm_install_targets(install-libclang-headers
+                           COMPONENT libclang-headers)
 endif()
diff --git a/tools/libclang/CXIndexDataConsumer.cpp b/tools/libclang/CXIndexDataConsumer.cpp
index ffe5c48..ba1e92f 100644
--- a/tools/libclang/CXIndexDataConsumer.cpp
+++ b/tools/libclang/CXIndexDataConsumer.cpp
@@ -148,6 +148,11 @@
     return true;
   }
 };
+
+CXSymbolRole getSymbolRole(SymbolRoleSet Role) {
+  // CXSymbolRole mirrors low 9 bits of clang::index::SymbolRole.
+  return CXSymbolRole(static_cast<uint32_t>(Role) & ((1 << 9) - 1));
+}
 }
 
 bool CXIndexDataConsumer::handleDeclOccurence(const Decl *D,
@@ -184,6 +189,7 @@
     if (Roles & (unsigned)SymbolRole::Implicit) {
       Kind = CXIdxEntityRef_Implicit;
     }
+    CXSymbolRole CXRole = getSymbolRole(Roles);
 
     CXCursor Cursor;
     if (ASTNode.OrigE) {
@@ -202,7 +208,7 @@
     }
     handleReference(ND, Loc, Cursor,
                     dyn_cast_or_null<NamedDecl>(ASTNode.Parent),
-                    ASTNode.ContainerDC, ASTNode.OrigE, Kind);
+                    ASTNode.ContainerDC, ASTNode.OrigE, Kind, CXRole);
 
   } else {
     const DeclContext *LexicalDC = ASTNode.ContainerDC;
@@ -889,13 +895,14 @@
                                       const NamedDecl *Parent,
                                       const DeclContext *DC,
                                       const Expr *E,
-                                      CXIdxEntityRefKind Kind) {
-  if (!D)
+                                      CXIdxEntityRefKind Kind,
+                                      CXSymbolRole Role) {
+  if (!D || !DC)
     return false;
 
   CXCursor Cursor = E ? MakeCXCursor(E, cast<Decl>(DC), CXTU)
                       : getRefCursor(D, Loc);
-  return handleReference(D, Loc, Cursor, Parent, DC, E, Kind);
+  return handleReference(D, Loc, Cursor, Parent, DC, E, Kind, Role);
 }
 
 bool CXIndexDataConsumer::handleReference(const NamedDecl *D, SourceLocation Loc,
@@ -903,11 +910,12 @@
                                       const NamedDecl *Parent,
                                       const DeclContext *DC,
                                       const Expr *E,
-                                      CXIdxEntityRefKind Kind) {
+                                      CXIdxEntityRefKind Kind,
+                                      CXSymbolRole Role) {
   if (!CB.indexEntityReference)
     return false;
 
-  if (!D)
+  if (!D || !DC)
     return false;
   if (Loc.isInvalid())
     return false;
@@ -939,7 +947,8 @@
                               getIndexLoc(Loc),
                               &RefEntity,
                               Parent ? &ParentEntity : nullptr,
-                              &Container };
+                              &Container,
+                              Role };
   CB.indexEntityReference(ClientData, &Info);
   return true;
 }
@@ -1304,11 +1313,11 @@
 
 static CXIdxEntityCXXTemplateKind
 getEntityKindFromSymbolProperties(SymbolPropertySet K) {
-  if (K & (unsigned)SymbolProperty::TemplatePartialSpecialization)
+  if (K & (SymbolPropertySet)SymbolProperty::TemplatePartialSpecialization)
     return CXIdxEntity_TemplatePartialSpecialization;
-  if (K & (unsigned)SymbolProperty::TemplateSpecialization)
+  if (K & (SymbolPropertySet)SymbolProperty::TemplateSpecialization)
     return CXIdxEntity_TemplateSpecialization;
-  if (K & (unsigned)SymbolProperty::Generic)
+  if (K & (SymbolPropertySet)SymbolProperty::Generic)
     return CXIdxEntity_Template;
   return CXIdxEntity_NonTemplate;
 }
diff --git a/tools/libclang/CXIndexDataConsumer.h b/tools/libclang/CXIndexDataConsumer.h
index 16162cb..e3c726e 100644
--- a/tools/libclang/CXIndexDataConsumer.h
+++ b/tools/libclang/CXIndexDataConsumer.h
@@ -342,7 +342,7 @@
   CXTranslationUnit getCXTU() const { return CXTU; }
 
   void setASTContext(ASTContext &ctx);
-  void setPreprocessor(std::shared_ptr<Preprocessor> PP);
+  void setPreprocessor(std::shared_ptr<Preprocessor> PP) override;
 
   bool shouldSuppressRefs() const {
     return IndexOptions & CXIndexOpt_SuppressRedundantRefs;
@@ -436,13 +436,15 @@
                        const NamedDecl *Parent,
                        const DeclContext *DC,
                        const Expr *E = nullptr,
-                       CXIdxEntityRefKind Kind = CXIdxEntityRef_Direct);
+                       CXIdxEntityRefKind Kind = CXIdxEntityRef_Direct,
+                       CXSymbolRole Role = CXSymbolRole_None);
 
   bool handleReference(const NamedDecl *D, SourceLocation Loc,
                        const NamedDecl *Parent,
                        const DeclContext *DC,
                        const Expr *E = nullptr,
-                       CXIdxEntityRefKind Kind = CXIdxEntityRef_Direct);
+                       CXIdxEntityRefKind Kind = CXIdxEntityRef_Direct,
+                       CXSymbolRole Role = CXSymbolRole_None);
 
   bool isNotFromSourceFile(SourceLocation Loc) const;
 
diff --git a/tools/libclang/CXString.cpp b/tools/libclang/CXString.cpp
index 4486031..cef4e53 100644
--- a/tools/libclang/CXString.cpp
+++ b/tools/libclang/CXString.cpp
@@ -96,7 +96,7 @@
 
 CXString createDup(StringRef String) {
   CXString Result;
-  char *Spelling = static_cast<char *>(malloc(String.size() + 1));
+  char *Spelling = static_cast<char *>(llvm::safe_malloc(String.size() + 1));
   memmove(Spelling, String.data(), String.size());
   Spelling[String.size()] = 0;
   Result.data = Spelling;
diff --git a/tools/libclang/CXTranslationUnit.h b/tools/libclang/CXTranslationUnit.h
index ce8469b..590142f 100644
--- a/tools/libclang/CXTranslationUnit.h
+++ b/tools/libclang/CXTranslationUnit.h
@@ -33,6 +33,8 @@
   void *Diagnostics;
   void *OverridenCursorsPool;
   clang::index::CommentToXMLConverter *CommentToXML;
+  unsigned ParsingOptions;
+  std::vector<std::string> Arguments;
 };
 
 struct CXTargetInfoImpl {
diff --git a/tools/libclang/Indexing.cpp b/tools/libclang/Indexing.cpp
index 2a13624..021ebcf 100644
--- a/tools/libclang/Indexing.cpp
+++ b/tools/libclang/Indexing.cpp
@@ -880,11 +880,6 @@
         TU_options);
   };
 
-  if (getenv("LIBCLANG_NOTHREADS")) {
-    IndexSourceFileImpl();
-    return result;
-  }
-
   llvm::CrashRecoveryContext CRC;
 
   if (!RunSafely(CRC, IndexSourceFileImpl)) {
@@ -934,11 +929,6 @@
         index_options, TU);
   };
 
-  if (getenv("LIBCLANG_NOTHREADS")) {
-    IndexTranslationUnitImpl();
-    return result;
-  }
-
   llvm::CrashRecoveryContext CRC;
 
   if (!RunSafely(CRC, IndexTranslationUnitImpl)) {
diff --git a/tools/libclang/libclang.exports b/tools/libclang/libclang.exports
index fb4547a..67e4b43 100644
--- a/tools/libclang/libclang.exports
+++ b/tools/libclang/libclang.exports
@@ -2,6 +2,7 @@
 clang_CXCursorSet_insert
 clang_CXIndex_getGlobalOptions
 clang_CXIndex_setGlobalOptions
+clang_CXIndex_setInvocationEmissionPathOption
 clang_CXXConstructor_isConvertingConstructor
 clang_CXXConstructor_isCopyConstructor
 clang_CXXConstructor_isDefaultConstructor
@@ -12,6 +13,7 @@
 clang_CXXMethod_isPureVirtual
 clang_CXXMethod_isStatic
 clang_CXXMethod_isVirtual
+clang_CXXRecord_isAbstract
 clang_EnumDecl_isScoped
 clang_Cursor_getArgument
 clang_Cursor_getNumTemplateArguments
@@ -176,6 +178,8 @@
 clang_getCursorCompletionString
 clang_getCursorDefinition
 clang_getCursorDisplayName
+clang_getCursorPrintingPolicy
+clang_getCursorPrettyPrinted
 clang_getCursorExtent
 clang_getCursorExceptionSpecificationType
 clang_getCursorKind
@@ -218,6 +222,7 @@
 clang_getFieldDeclBitWidth
 clang_getExpansionLocation
 clang_getFile
+clang_getFileContents
 clang_getFileLocation
 clang_getFileName
 clang_getFileTime
@@ -288,6 +293,7 @@
 clang_isConstQualifiedType
 clang_isCursorDefinition
 clang_isDeclaration
+clang_isInvalidDeclaration
 clang_isExpression
 clang_isFileMultipleIncludeGuarded
 clang_isFunctionTypeVariadic
@@ -355,3 +361,6 @@
 clang_EvalResult_getAsDouble
 clang_EvalResult_getAsStr
 clang_EvalResult_dispose
+clang_PrintingPolicy_getProperty
+clang_PrintingPolicy_setProperty
+clang_PrintingPolicy_dispose
diff --git a/tools/scan-build-py/README.md b/tools/scan-build-py/README.md
index 54bfc37..1b6fc48 100644
--- a/tools/scan-build-py/README.md
+++ b/tools/scan-build-py/README.md
@@ -41,6 +41,32 @@
 Use `--help` to know more about the commands.
 
 
+How to use the experimental Cross Translation Unit analysis
+-----------------------------------------------------------
+
+To run the CTU analysis, a compilation database file has to be created:
+
+    $ intercept-build <your build command>
+
+To run the Clang Static Analyzer against a compilation database
+with CTU analysis enabled, execute:
+    
+    $ analyze-build --ctu
+
+For CTU analysis an additional (function-definition) collection-phase is required. 
+For debugging purposes, it is possible to separately execute the collection 
+and the analysis phase. By doing this, the intermediate files used for 
+the analysis are kept on the disk in `./ctu-dir`.
+    
+    # Collect and store the data required by the CTU analysis
+    $ analyze-build --ctu-collect-only
+    
+    # Analyze using the previously collected data
+    $ analyze-build --ctu-analyze-only
+
+Use `--help` to get more information about the commands.
+
+
 Limitations
 -----------
 
diff --git a/tools/scan-build-py/libscanbuild/__init__.py b/tools/scan-build-py/libscanbuild/__init__.py
index 800926e..e7b7487 100644
--- a/tools/scan-build-py/libscanbuild/__init__.py
+++ b/tools/scan-build-py/libscanbuild/__init__.py
@@ -19,6 +19,9 @@
 
 Execution = collections.namedtuple('Execution', ['pid', 'cwd', 'cmd'])
 
+CtuConfig = collections.namedtuple('CtuConfig', ['collect', 'analyze', 'dir',
+                                                 'func_map_cmd'])
+
 
 def duplicate_check(method):
     """ Predicate to detect duplicated entries.
diff --git a/tools/scan-build-py/libscanbuild/analyze.py b/tools/scan-build-py/libscanbuild/analyze.py
index b5614b5..3c93b11 100644
--- a/tools/scan-build-py/libscanbuild/analyze.py
+++ b/tools/scan-build-py/libscanbuild/analyze.py
@@ -22,16 +22,19 @@
 import subprocess
 import contextlib
 import datetime
+import shutil
+import glob
+from collections import defaultdict
 
 from libscanbuild import command_entry_point, compiler_wrapper, \
-    wrapper_environment, run_build, run_command
+    wrapper_environment, run_build, run_command, CtuConfig
 from libscanbuild.arguments import parse_args_for_scan_build, \
     parse_args_for_analyze_build
 from libscanbuild.intercept import capture
 from libscanbuild.report import document
 from libscanbuild.compilation import split_command, classify_source, \
     compiler_language
-from libscanbuild.clang import get_version, get_arguments
+from libscanbuild.clang import get_version, get_arguments, get_triple_arch
 from libscanbuild.shell import decode
 
 __all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
@@ -39,6 +42,9 @@
 COMPILER_WRAPPER_CC = 'analyze-cc'
 COMPILER_WRAPPER_CXX = 'analyze-c++'
 
+CTU_FUNCTION_MAP_FILENAME = 'externalFnMap.txt'
+CTU_TEMP_FNMAP_FOLDER = 'tmpExternalFnMaps'
+
 
 @command_entry_point
 def scan_build():
@@ -56,7 +62,7 @@
             exit_code = capture(args)
             # Run the analyzer against the captured commands.
             if need_analyzer(args.build):
-                run_analyzer_parallel(args)
+                govern_analyzer_runs(args)
         else:
             # Run build command and analyzer with compiler wrappers.
             environment = setup_environment(args)
@@ -75,7 +81,7 @@
     # will re-assign the report directory as new output
     with report_directory(args.output, args.keep_empty) as args.output:
         # Run the analyzer against a compilation db.
-        run_analyzer_parallel(args)
+        govern_analyzer_runs(args)
         # Cover report generation and bug counting.
         number_of_bugs = document(args)
         # Set exit status as it was requested.
@@ -95,6 +101,108 @@
     return len(args) and not re.search('configure|autogen', args[0])
 
 
+def prefix_with(constant, pieces):
+    """ From a sequence create another sequence where every second element
+    is from the original sequence and the odd elements are the prefix.
+
+    eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
+
+    return [elem for piece in pieces for elem in [constant, piece]]
+
+
+def get_ctu_config_from_args(args):
+    """ CTU configuration is created from the chosen phases and dir. """
+
+    return (
+        CtuConfig(collect=args.ctu_phases.collect,
+                  analyze=args.ctu_phases.analyze,
+                  dir=args.ctu_dir,
+                  func_map_cmd=args.func_map_cmd)
+        if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir')
+        else CtuConfig(collect=False, analyze=False, dir='', func_map_cmd=''))
+
+
+def get_ctu_config_from_json(ctu_conf_json):
+    """ CTU configuration is created from the chosen phases and dir. """
+
+    ctu_config = json.loads(ctu_conf_json)
+    # Recover namedtuple from json when coming from analyze-cc or analyze-c++
+    return CtuConfig(collect=ctu_config[0],
+                     analyze=ctu_config[1],
+                     dir=ctu_config[2],
+                     func_map_cmd=ctu_config[3])
+
+
+def create_global_ctu_function_map(func_map_lines):
+    """ Takes iterator of individual function maps and creates a global map
+    keeping only unique names. We leave conflicting names out of CTU.
+
+    :param func_map_lines: Contains the id of a function (mangled name) and
+    the originating source (the corresponding AST file) name.
+    :type func_map_lines: Iterator of str.
+    :returns: Mangled name - AST file pairs.
+    :rtype: List of (str, str) tuples.
+    """
+
+    mangled_to_asts = defaultdict(set)
+
+    for line in func_map_lines:
+        mangled_name, ast_file = line.strip().split(' ', 1)
+        mangled_to_asts[mangled_name].add(ast_file)
+
+    mangled_ast_pairs = []
+
+    for mangled_name, ast_files in mangled_to_asts.items():
+        if len(ast_files) == 1:
+            mangled_ast_pairs.append((mangled_name, next(iter(ast_files))))
+
+    return mangled_ast_pairs
+
+
+def merge_ctu_func_maps(ctudir):
+    """ Merge individual function maps into a global one.
+
+    As the collect phase runs parallel on multiple threads, all compilation
+    units are separately mapped into a temporary file in CTU_TEMP_FNMAP_FOLDER.
+    These function maps contain the mangled names of functions and the source
+    (AST generated from the source) which had them.
+    These files should be merged at the end into a global map file:
+    CTU_FUNCTION_MAP_FILENAME."""
+
+    def generate_func_map_lines(fnmap_dir):
+        """ Iterate over all lines of input files in a determined order. """
+
+        files = glob.glob(os.path.join(fnmap_dir, '*'))
+        files.sort()
+        for filename in files:
+            with open(filename, 'r') as in_file:
+                for line in in_file:
+                    yield line
+
+    def write_global_map(arch, mangled_ast_pairs):
+        """ Write (mangled function name, ast file) pairs into final file. """
+
+        extern_fns_map_file = os.path.join(ctudir, arch,
+                                           CTU_FUNCTION_MAP_FILENAME)
+        with open(extern_fns_map_file, 'w') as out_file:
+            for mangled_name, ast_file in mangled_ast_pairs:
+                out_file.write('%s %s\n' % (mangled_name, ast_file))
+
+    triple_arches = glob.glob(os.path.join(ctudir, '*'))
+    for triple_path in triple_arches:
+        if os.path.isdir(triple_path):
+            triple_arch = os.path.basename(triple_path)
+            fnmap_dir = os.path.join(ctudir, triple_arch,
+                                     CTU_TEMP_FNMAP_FOLDER)
+
+            func_map_lines = generate_func_map_lines(fnmap_dir)
+            mangled_ast_pairs = create_global_ctu_function_map(func_map_lines)
+            write_global_map(triple_arch, mangled_ast_pairs)
+
+            # Remove all temporary files
+            shutil.rmtree(fnmap_dir, ignore_errors=True)
+
+
 def run_analyzer_parallel(args):
     """ Runs the analyzer against the given compilation database. """
 
@@ -109,7 +217,8 @@
         'output_format': args.output_format,
         'output_failures': args.output_failures,
         'direct_args': analyzer_params(args),
-        'force_debug': args.force_debug
+        'force_debug': args.force_debug,
+        'ctu': get_ctu_config_from_args(args)
     }
 
     logging.debug('run analyzer against compilation database')
@@ -127,6 +236,38 @@
         pool.join()
 
 
+def govern_analyzer_runs(args):
+    """ Governs multiple runs in CTU mode or runs once in normal mode. """
+
+    ctu_config = get_ctu_config_from_args(args)
+    # If we do a CTU collect (1st phase) we remove all previous collection
+    # data first.
+    if ctu_config.collect:
+        shutil.rmtree(ctu_config.dir, ignore_errors=True)
+
+    # If the user asked for a collect (1st) and analyze (2nd) phase, we do an
+    # all-in-one run where we deliberately remove collection data before and
+    # also after the run. If the user asks only for a single phase data is
+    # left so multiple analyze runs can use the same data gathered by a single
+    # collection run.
+    if ctu_config.collect and ctu_config.analyze:
+        # CTU strings are coming from args.ctu_dir and func_map_cmd,
+        # so we can leave it empty
+        args.ctu_phases = CtuConfig(collect=True, analyze=False,
+                                    dir='', func_map_cmd='')
+        run_analyzer_parallel(args)
+        merge_ctu_func_maps(ctu_config.dir)
+        args.ctu_phases = CtuConfig(collect=False, analyze=True,
+                                    dir='', func_map_cmd='')
+        run_analyzer_parallel(args)
+        shutil.rmtree(ctu_config.dir, ignore_errors=True)
+    else:
+        # Single runs (collect or analyze) are launched from here.
+        run_analyzer_parallel(args)
+        if ctu_config.collect:
+            merge_ctu_func_maps(ctu_config.dir)
+
+
 def setup_environment(args):
     """ Set up environment for build command to interpose compiler wrapper. """
 
@@ -140,7 +281,8 @@
         'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
         'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
         'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
-        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else ''
+        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else '',
+        'ANALYZE_BUILD_CTU': json.dumps(get_ctu_config_from_args(args))
     })
     return environment
 
@@ -173,7 +315,8 @@
                                  '').split(' '),
         'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
         'directory': execution.cwd,
-        'command': [execution.cmd[0], '-c'] + compilation.flags
+        'command': [execution.cmd[0], '-c'] + compilation.flags,
+        'ctu': get_ctu_config_from_json(os.getenv('ANALYZE_BUILD_CTU'))
     }
     # call static analyzer against the compilation
     for source in compilation.files:
@@ -223,14 +366,6 @@
     """ A group of command line arguments can mapped to command
     line arguments of the analyzer. This method generates those. """
 
-    def prefix_with(constant, pieces):
-        """ From a sequence create another sequence where every second element
-        is from the original sequence and the odd elements are the prefix.
-
-        eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
-
-        return [elem for piece in pieces for elem in [constant, piece]]
-
     result = []
 
     if args.store_model:
@@ -294,8 +429,9 @@
           'direct_args',  # arguments from command line
           'force_debug',  # kill non debug macros
           'output_dir',  # where generated report files shall go
-          'output_format',  # it's 'plist' or 'html' or both
-          'output_failures'])  # generate crash reports or not
+          'output_format',  # it's 'plist', 'html', both or plist-multi-file
+          'output_failures',  # generate crash reports or not
+          'ctu'])  # ctu control options
 def run(opts):
     """ Entry point to run (or not) static analyzer against a single entry
     of the compilation database.
@@ -317,7 +453,7 @@
 
         return arch_check(opts)
     except Exception:
-        logging.error("Problem occured during analyzis.", exc_info=1)
+        logging.error("Problem occurred during analyzis.", exc_info=1)
         return None
 
 
@@ -383,7 +519,10 @@
 
     def target():
         """ Creates output file name for reports. """
-        if opts['output_format'] in {'plist', 'plist-html'}:
+        if opts['output_format'] in {
+                'plist',
+                'plist-html',
+                'plist-multi-file'}:
             (handle, name) = tempfile.mkstemp(prefix='report-',
                                               suffix='.plist',
                                               dir=opts['output_dir'])
@@ -407,8 +546,109 @@
         return result
 
 
+def func_map_list_src_to_ast(func_src_list):
+    """ Turns textual function map list with source files into a
+    function map list with ast files. """
+
+    func_ast_list = []
+    for fn_src_txt in func_src_list:
+        mangled_name, path = fn_src_txt.split(" ", 1)
+        # Normalize path on windows as well
+        path = os.path.splitdrive(path)[1]
+        # Make relative path out of absolute
+        path = path[1:] if path[0] == os.sep else path
+        ast_path = os.path.join("ast", path + ".ast")
+        func_ast_list.append(mangled_name + " " + ast_path)
+    return func_ast_list
+
+
+@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'ctu'])
+def ctu_collect_phase(opts):
+    """ Preprocess source by generating all data needed by CTU analysis. """
+
+    def generate_ast(triple_arch):
+        """ Generates ASTs for the current compilation command. """
+
+        args = opts['direct_args'] + opts['flags']
+        ast_joined_path = os.path.join(opts['ctu'].dir, triple_arch, 'ast',
+                                       os.path.realpath(opts['file'])[1:] +
+                                       '.ast')
+        ast_path = os.path.abspath(ast_joined_path)
+        ast_dir = os.path.dirname(ast_path)
+        if not os.path.isdir(ast_dir):
+            try:
+                os.makedirs(ast_dir)
+            except OSError:
+                # In case an other process already created it.
+                pass
+        ast_command = [opts['clang'], '-emit-ast']
+        ast_command.extend(args)
+        ast_command.append('-w')
+        ast_command.append(opts['file'])
+        ast_command.append('-o')
+        ast_command.append(ast_path)
+        logging.debug("Generating AST using '%s'", ast_command)
+        run_command(ast_command, cwd=opts['directory'])
+
+    def map_functions(triple_arch):
+        """ Generate function map file for the current source. """
+
+        args = opts['direct_args'] + opts['flags']
+        funcmap_command = [opts['ctu'].func_map_cmd]
+        funcmap_command.append(opts['file'])
+        funcmap_command.append('--')
+        funcmap_command.extend(args)
+        logging.debug("Generating function map using '%s'", funcmap_command)
+        func_src_list = run_command(funcmap_command, cwd=opts['directory'])
+        func_ast_list = func_map_list_src_to_ast(func_src_list)
+        extern_fns_map_folder = os.path.join(opts['ctu'].dir, triple_arch,
+                                             CTU_TEMP_FNMAP_FOLDER)
+        if not os.path.isdir(extern_fns_map_folder):
+            try:
+                os.makedirs(extern_fns_map_folder)
+            except OSError:
+                # In case an other process already created it.
+                pass
+        if func_ast_list:
+            with tempfile.NamedTemporaryFile(mode='w',
+                                             dir=extern_fns_map_folder,
+                                             delete=False) as out_file:
+                out_file.write("\n".join(func_ast_list) + "\n")
+
+    cwd = opts['directory']
+    cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \
+        + [opts['file']]
+    triple_arch = get_triple_arch(cmd, cwd)
+    generate_ast(triple_arch)
+    map_functions(triple_arch)
+
+
+@require(['ctu'])
+def dispatch_ctu(opts, continuation=run_analyzer):
+    """ Execute only one phase of 2 phases of CTU if needed. """
+
+    ctu_config = opts['ctu']
+
+    if ctu_config.collect or ctu_config.analyze:
+        assert ctu_config.collect != ctu_config.analyze
+        if ctu_config.collect:
+            return ctu_collect_phase(opts)
+        if ctu_config.analyze:
+            cwd = opts['directory']
+            cmd = [opts['clang'], '--analyze'] + opts['direct_args'] \
+                + opts['flags'] + [opts['file']]
+            triarch = get_triple_arch(cmd, cwd)
+            ctu_options = ['ctu-dir=' + os.path.join(ctu_config.dir, triarch),
+                           'experimental-enable-naive-ctu-analysis=true']
+            analyzer_options = prefix_with('-analyzer-config', ctu_options)
+            direct_options = prefix_with('-Xanalyzer', analyzer_options)
+            opts['direct_args'].extend(direct_options)
+
+    return continuation(opts)
+
+
 @require(['flags', 'force_debug'])
-def filter_debug_flags(opts, continuation=run_analyzer):
+def filter_debug_flags(opts, continuation=dispatch_ctu):
     """ Filter out nondebug macros when requested. """
 
     if opts.pop('force_debug'):
@@ -475,6 +715,7 @@
         logging.debug('analysis, on default arch')
         return continuation(opts)
 
+
 # To have good results from static analyzer certain compiler options shall be
 # omitted. The compiler flag filtering only affects the static analyzer run.
 #
diff --git a/tools/scan-build-py/libscanbuild/arguments.py b/tools/scan-build-py/libscanbuild/arguments.py
index 2735123..00679a4 100644
--- a/tools/scan-build-py/libscanbuild/arguments.py
+++ b/tools/scan-build-py/libscanbuild/arguments.py
@@ -18,8 +18,8 @@
 import argparse
 import logging
 import tempfile
-from libscanbuild import reconfigure_logging
-from libscanbuild.clang import get_checkers
+from libscanbuild import reconfigure_logging, CtuConfig
+from libscanbuild.clang import get_checkers, is_ctu_capable
 
 __all__ = ['parse_args_for_intercept_build', 'parse_args_for_analyze_build',
            'parse_args_for_scan_build']
@@ -98,6 +98,11 @@
         # add cdb parameter invisibly to make report module working.
         args.cdb = 'compile_commands.json'
 
+    # Make ctu_dir an abspath as it is needed inside clang
+    if not from_build_command and hasattr(args, 'ctu_phases') \
+            and hasattr(args.ctu_phases, 'dir'):
+        args.ctu_dir = os.path.abspath(args.ctu_dir)
+
 
 def validate_args_for_analyze(parser, args, from_build_command):
     """ Command line parsing is done by the argparse module, but semantic
@@ -122,6 +127,18 @@
     elif not from_build_command and not os.path.exists(args.cdb):
         parser.error(message='compilation database is missing')
 
+    # If the user wants CTU mode
+    if not from_build_command and hasattr(args, 'ctu_phases') \
+            and hasattr(args.ctu_phases, 'dir'):
+        # If CTU analyze_only, the input directory should exist
+        if args.ctu_phases.analyze and not args.ctu_phases.collect \
+                and not os.path.exists(args.ctu_dir):
+            parser.error(message='missing CTU directory')
+        # Check CTU capability via checking clang-func-mapping
+        if not is_ctu_capable(args.func_map_cmd):
+            parser.error(message="""This version of clang does not support CTU
+            functionality or clang-func-mapping command not found.""")
+
 
 def create_intercept_parser():
     """ Creates a parser for command-line arguments to 'intercept'. """
@@ -218,7 +235,15 @@
         default='html',
         action='store_const',
         help="""Cause the results as a set of .html and .plist files.""")
-    # TODO: implement '-view '
+    format_group.add_argument(
+        '--plist-multi-file',
+        '-plist-multi-file',
+        dest='output_format',
+        const='plist-multi-file',
+        default='html',
+        action='store_const',
+        help="""Cause the results as a set of .plist files with extra
+        information on related files.""")
 
     advanced = parser.add_argument_group('advanced options')
     advanced.add_argument(
@@ -333,6 +358,51 @@
     if from_build_command:
         parser.add_argument(
             dest='build', nargs=argparse.REMAINDER, help="""Command to run.""")
+    else:
+        ctu = parser.add_argument_group('cross translation unit analysis')
+        ctu_mutex_group = ctu.add_mutually_exclusive_group()
+        ctu_mutex_group.add_argument(
+            '--ctu',
+            action='store_const',
+            const=CtuConfig(collect=True, analyze=True,
+                            dir='', func_map_cmd=''),
+            dest='ctu_phases',
+            help="""Perform cross translation unit (ctu) analysis (both collect
+            and analyze phases) using default <ctu-dir> for temporary output.
+            At the end of the analysis, the temporary directory is removed.""")
+        ctu.add_argument(
+            '--ctu-dir',
+            metavar='<ctu-dir>',
+            dest='ctu_dir',
+            default='ctu-dir',
+            help="""Defines the temporary directory used between ctu
+            phases.""")
+        ctu_mutex_group.add_argument(
+            '--ctu-collect-only',
+            action='store_const',
+            const=CtuConfig(collect=True, analyze=False,
+                            dir='', func_map_cmd=''),
+            dest='ctu_phases',
+            help="""Perform only the collect phase of ctu.
+            Keep <ctu-dir> for further use.""")
+        ctu_mutex_group.add_argument(
+            '--ctu-analyze-only',
+            action='store_const',
+            const=CtuConfig(collect=False, analyze=True,
+                            dir='', func_map_cmd=''),
+            dest='ctu_phases',
+            help="""Perform only the analyze phase of ctu. <ctu-dir> should be
+            present and will not be removed after analysis.""")
+        ctu.add_argument(
+            '--use-func-map-cmd',
+            metavar='<path>',
+            dest='func_map_cmd',
+            default='clang-func-mapping',
+            help="""'%(prog)s' uses the 'clang-func-mapping' executable
+            relative to itself for generating function maps for static
+            analysis. One can override this behavior with this option by using
+            the 'clang-func-mapping' packaged with Xcode (on OS X) or from the
+            PATH.""")
     return parser
 
 
diff --git a/tools/scan-build-py/libscanbuild/clang.py b/tools/scan-build-py/libscanbuild/clang.py
index 192e708..ab42206 100644
--- a/tools/scan-build-py/libscanbuild/clang.py
+++ b/tools/scan-build-py/libscanbuild/clang.py
@@ -8,11 +8,13 @@
 Since Clang command line interface is so rich, but this project is using only
 a subset of that, it makes sense to create a function specific wrapper. """
 
+import subprocess
 import re
 from libscanbuild import run_command
 from libscanbuild.shell import decode
 
-__all__ = ['get_version', 'get_arguments', 'get_checkers']
+__all__ = ['get_version', 'get_arguments', 'get_checkers', 'is_ctu_capable',
+           'get_triple_arch']
 
 # regex for activated checker
 ACTIVE_CHECKER_PATTERN = re.compile(r'^-analyzer-checker=(.*)$')
@@ -152,3 +154,26 @@
         raise Exception('Could not query Clang for available checkers.')
 
     return checkers
+
+
+def is_ctu_capable(func_map_cmd):
+    """ Detects if the current (or given) clang and function mapping
+    executables are CTU compatible. """
+
+    try:
+        run_command([func_map_cmd, '-version'])
+    except (OSError, subprocess.CalledProcessError):
+        return False
+    return True
+
+
+def get_triple_arch(command, cwd):
+    """Returns the architecture part of the target triple for the given
+    compilation command. """
+
+    cmd = get_arguments(command, cwd)
+    try:
+        separator = cmd.index("-triple")
+        return cmd[separator + 1]
+    except (IndexError, ValueError):
+        return ""
diff --git a/tools/scan-build-py/libscanbuild/report.py b/tools/scan-build-py/libscanbuild/report.py
index 54b9695..b3753c1 100644
--- a/tools/scan-build-py/libscanbuild/report.py
+++ b/tools/scan-build-py/libscanbuild/report.py
@@ -13,7 +13,6 @@
 import os.path
 import sys
 import shutil
-import itertools
 import plistlib
 import glob
 import json
@@ -255,24 +254,29 @@
 
 
 def read_bugs(output_dir, html):
+    # type: (str, bool) -> Generator[Dict[str, Any], None, None]
     """ Generate a unique sequence of bugs from given output directory.
 
     Duplicates can be in a project if the same module was compiled multiple
     times with different compiler options. These would be better to show in
     the final report (cover) only once. """
 
-    parser = parse_bug_html if html else parse_bug_plist
-    pattern = '*.html' if html else '*.plist'
+    def empty(file_name):
+        return os.stat(file_name).st_size == 0
 
     duplicate = duplicate_check(
         lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
 
-    bugs = itertools.chain.from_iterable(
-        # parser creates a bug generator not the bug itself
-        parser(filename)
-        for filename in glob.iglob(os.path.join(output_dir, pattern)))
+    # get the right parser for the job.
+    parser = parse_bug_html if html else parse_bug_plist
+    # get the input files, which are not empty.
+    pattern = os.path.join(output_dir, '*.html' if html else '*.plist')
+    bug_files = (file for file in glob.iglob(pattern) if not empty(file))
 
-    return (bug for bug in bugs if not duplicate(bug))
+    for bug_file in bug_files:
+        for bug in parser(bug_file):
+            if not duplicate(bug):
+                yield bug
 
 
 def parse_bug_plist(filename):
diff --git a/tools/scan-build-py/tests/unit/test_analyze.py b/tools/scan-build-py/tests/unit/test_analyze.py
index a250ff2..9964a29 100644
--- a/tools/scan-build-py/tests/unit/test_analyze.py
+++ b/tools/scan-build-py/tests/unit/test_analyze.py
@@ -4,12 +4,12 @@
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 
-import libear
-import libscanbuild.analyze as sut
 import unittest
 import re
 import os
 import os.path
+import libear
+import libscanbuild.analyze as sut
 
 
 class ReportDirectoryTest(unittest.TestCase):
@@ -333,3 +333,83 @@
 
     def test_method_exception_not_caught(self):
         self.assertRaises(Exception, method_exception_from_inside, dict())
+
+
+class PrefixWithTest(unittest.TestCase):
+
+    def test_gives_empty_on_empty(self):
+        res = sut.prefix_with(0, [])
+        self.assertFalse(res)
+
+    def test_interleaves_prefix(self):
+        res = sut.prefix_with(0, [1, 2, 3])
+        self.assertListEqual([0, 1, 0, 2, 0, 3], res)
+
+
+class MergeCtuMapTest(unittest.TestCase):
+
+    def test_no_map_gives_empty(self):
+        pairs = sut.create_global_ctu_function_map([])
+        self.assertFalse(pairs)
+
+    def test_multiple_maps_merged(self):
+        concat_map = ['c:@F@fun1#I# ast/fun1.c.ast',
+                      'c:@F@fun2#I# ast/fun2.c.ast',
+                      'c:@F@fun3#I# ast/fun3.c.ast']
+        pairs = sut.create_global_ctu_function_map(concat_map)
+        self.assertTrue(('c:@F@fun1#I#', 'ast/fun1.c.ast') in pairs)
+        self.assertTrue(('c:@F@fun2#I#', 'ast/fun2.c.ast') in pairs)
+        self.assertTrue(('c:@F@fun3#I#', 'ast/fun3.c.ast') in pairs)
+        self.assertEqual(3, len(pairs))
+
+    def test_not_unique_func_left_out(self):
+        concat_map = ['c:@F@fun1#I# ast/fun1.c.ast',
+                      'c:@F@fun2#I# ast/fun2.c.ast',
+                      'c:@F@fun1#I# ast/fun7.c.ast']
+        pairs = sut.create_global_ctu_function_map(concat_map)
+        self.assertFalse(('c:@F@fun1#I#', 'ast/fun1.c.ast') in pairs)
+        self.assertFalse(('c:@F@fun1#I#', 'ast/fun7.c.ast') in pairs)
+        self.assertTrue(('c:@F@fun2#I#', 'ast/fun2.c.ast') in pairs)
+        self.assertEqual(1, len(pairs))
+
+    def test_duplicates_are_kept(self):
+        concat_map = ['c:@F@fun1#I# ast/fun1.c.ast',
+                      'c:@F@fun2#I# ast/fun2.c.ast',
+                      'c:@F@fun1#I# ast/fun1.c.ast']
+        pairs = sut.create_global_ctu_function_map(concat_map)
+        self.assertTrue(('c:@F@fun1#I#', 'ast/fun1.c.ast') in pairs)
+        self.assertTrue(('c:@F@fun2#I#', 'ast/fun2.c.ast') in pairs)
+        self.assertEqual(2, len(pairs))
+
+    def test_space_handled_in_source(self):
+        concat_map = ['c:@F@fun1#I# ast/f un.c.ast']
+        pairs = sut.create_global_ctu_function_map(concat_map)
+        self.assertTrue(('c:@F@fun1#I#', 'ast/f un.c.ast') in pairs)
+        self.assertEqual(1, len(pairs))
+
+
+class FuncMapSrcToAstTest(unittest.TestCase):
+
+    def test_empty_gives_empty(self):
+        fun_ast_lst = sut.func_map_list_src_to_ast([])
+        self.assertFalse(fun_ast_lst)
+
+    def test_sources_to_asts(self):
+        fun_src_lst = ['c:@F@f1#I# ' + os.path.join(os.sep + 'path', 'f1.c'),
+                       'c:@F@f2#I# ' + os.path.join(os.sep + 'path', 'f2.c')]
+        fun_ast_lst = sut.func_map_list_src_to_ast(fun_src_lst)
+        self.assertTrue('c:@F@f1#I# ' +
+                        os.path.join('ast', 'path', 'f1.c.ast')
+                        in fun_ast_lst)
+        self.assertTrue('c:@F@f2#I# ' +
+                        os.path.join('ast', 'path', 'f2.c.ast')
+                        in fun_ast_lst)
+        self.assertEqual(2, len(fun_ast_lst))
+
+    def test_spaces_handled(self):
+        fun_src_lst = ['c:@F@f1#I# ' + os.path.join(os.sep + 'path', 'f 1.c')]
+        fun_ast_lst = sut.func_map_list_src_to_ast(fun_src_lst)
+        self.assertTrue('c:@F@f1#I# ' +
+                        os.path.join('ast', 'path', 'f 1.c.ast')
+                        in fun_ast_lst)
+        self.assertEqual(1, len(fun_ast_lst))
diff --git a/tools/scan-build-py/tests/unit/test_clang.py b/tools/scan-build-py/tests/unit/test_clang.py
index eef8c26..07ac4d9 100644
--- a/tools/scan-build-py/tests/unit/test_clang.py
+++ b/tools/scan-build-py/tests/unit/test_clang.py
@@ -92,3 +92,15 @@
         self.assertEqual('Checker One description', result.get('checker.one'))
         self.assertTrue('checker.two' in result)
         self.assertEqual('Checker Two description', result.get('checker.two'))
+
+
+class ClangIsCtuCapableTest(unittest.TestCase):
+    def test_ctu_not_found(self):
+        is_ctu = sut.is_ctu_capable('not-found-clang-func-mapping')
+        self.assertFalse(is_ctu)
+
+
+class ClangGetTripleArchTest(unittest.TestCase):
+    def test_arch_is_not_empty(self):
+        arch = sut.get_triple_arch(['clang', '-E', '-'], '.')
+        self.assertTrue(len(arch) > 0)
diff --git a/tools/scan-build/bin/scan-build b/tools/scan-build/bin/scan-build
index cbf3bf3..49018eb 100755
--- a/tools/scan-build/bin/scan-build
+++ b/tools/scan-build/bin/scan-build
@@ -51,6 +51,7 @@
   OutputDir => undef,        # Parent directory to store HTML files.
   HtmlTitle => basename($CurrentDir)." - scan-build results",
   IgnoreErrors => 0,         # Ignore build errors.
+  KeepCC => 0,               # Do not override CC and CXX make variables
   ViewResults => 0,          # View results when the build terminates.
   ExitStatusFoundBugs => 0,  # Exit status reflects whether bugs were found
   ShowDescription => 0,      # Display the description of the defect in the list
@@ -1062,6 +1063,7 @@
 sub RunBuildCommand {
   my $Args = shift;
   my $IgnoreErrors = shift;
+  my $KeepCC = shift;
   my $Cmd = $Args->[0];
   my $CCAnalyzer = shift;
   my $CXXAnalyzer = shift;
@@ -1099,8 +1101,10 @@
     unshift @$Args, $CXXAnalyzer;
   }
   elsif ($Cmd eq "make" or $Cmd eq "gmake" or $Cmd eq "mingw32-make") {
-    AddIfNotPresent($Args, "CC=$CCAnalyzer");
-    AddIfNotPresent($Args, "CXX=$CXXAnalyzer");
+    if (!$KeepCC) {
+      AddIfNotPresent($Args, "CC=$CCAnalyzer");
+      AddIfNotPresent($Args, "CXX=$CXXAnalyzer");
+    }
     if ($IgnoreErrors) {
       AddIfNotPresent($Args,"-k");
       AddIfNotPresent($Args,"-i");
@@ -1158,6 +1162,12 @@
    currently supports make and xcodebuild. This is a convenience option; one
    can specify this behavior directly using build options.
 
+ --keep-cc
+
+   Do not override CC and CXX make variables. Useful when running make in
+   autoconf-based (and similar) projects where configure can add extra flags
+   to those variables.
+
  --html-title [title]
  --html-title=[title]
 
@@ -1532,6 +1542,12 @@
       next;
     }
 
+    if ($arg eq "--keep-cc") {
+      shift @$Args;
+      $Options{KeepCC} = 1;
+      next;
+    }
+
     if ($arg =~ /^--use-cc(=(.+))?$/) {
       shift @$Args;
       my $cc;
@@ -1838,8 +1854,8 @@
 );
 
 # Run the build.
-my $ExitStatus = RunBuildCommand(\@ARGV, $Options{IgnoreErrors}, $Cmd, $CmdCXX,
-                                \%EnvVars);
+my $ExitStatus = RunBuildCommand(\@ARGV, $Options{IgnoreErrors}, $Options{KeepCC},
+	                        $Cmd, $CmdCXX, \%EnvVars);
 
 if (defined $Options{OutputFormat}) {
   if ($Options{OutputFormat} =~ /plist/) {
diff --git a/tools/scan-build/libexec/ccc-analyzer b/tools/scan-build/libexec/ccc-analyzer
index b0ec7e7..73cd2ff 100755
--- a/tools/scan-build/libexec/ccc-analyzer
+++ b/tools/scan-build/libexec/ccc-analyzer
@@ -419,7 +419,7 @@
   'cc'  => 'c++',
   'C'   => 'c++',
   'ii'  => 'c++-cpp-output',
-  'i'   => $IsCXX ? 'c++-cpp-output' : 'c-cpp-output',
+  'i'   => $IsCXX ? 'c++-cpp-output' : 'cpp-output',
   'm'   => 'objective-c',
   'mi'  => 'objective-c-cpp-output',
   'mm'  => 'objective-c++',
@@ -439,7 +439,7 @@
   "c" => 1,
   "c++" => 1,
   "objective-c++" => 1,
-  "c-cpp-output" => 1,
+  "cpp-output" => 1,
   "objective-c-cpp-output" => 1,
   "c++-cpp-output" => 1
 );
diff --git a/unittests/AST/ASTImporterTest.cpp b/unittests/AST/ASTImporterTest.cpp
index aea5bfa..feadd94 100644
--- a/unittests/AST/ASTImporterTest.cpp
+++ b/unittests/AST/ASTImporterTest.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MatchVerifier.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTImporter.h"
-#include "MatchVerifier.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Tooling.h"
@@ -22,38 +22,50 @@
 namespace clang {
 namespace ast_matchers {
 
-typedef std::vector<std::string> StringVector;
+typedef std::vector<std::string> ArgVector;
+typedef std::vector<ArgVector> RunOptions;
 
-void getLangArgs(Language Lang, StringVector &Args) {
+static bool isCXX(Language Lang) {
+  return Lang == Lang_CXX || Lang == Lang_CXX11;
+}
+
+static RunOptions getRunOptionsForLanguage(Language Lang) {
+  ArgVector BasicArgs;
+  // Test with basic arguments.
   switch (Lang) {
   case Lang_C:
-    Args.insert(Args.end(), { "-x", "c", "-std=c99" });
+    BasicArgs = {"-x", "c", "-std=c99"};
     break;
   case Lang_C89:
-    Args.insert(Args.end(), { "-x", "c", "-std=c89" });
+    BasicArgs = {"-x", "c", "-std=c89"};
     break;
   case Lang_CXX:
-    Args.push_back("-std=c++98");
+    BasicArgs = {"-std=c++98", "-frtti"};
     break;
   case Lang_CXX11:
-    Args.push_back("-std=c++11");
+    BasicArgs = {"-std=c++11", "-frtti"};
     break;
   case Lang_OpenCL:
   case Lang_OBJCXX:
-    break;
+    llvm_unreachable("Not implemented yet!");
   }
+
+  // For C++, test with "-fdelayed-template-parsing" enabled to handle MSVC
+  // default behaviour.
+  if (isCXX(Lang)) {
+    ArgVector ArgsForDelayedTemplateParse = BasicArgs;
+    ArgsForDelayedTemplateParse.emplace_back("-fdelayed-template-parsing");
+    return {BasicArgs, ArgsForDelayedTemplateParse};
+  }
+
+  return {BasicArgs};
 }
 
 template<typename NodeType, typename MatcherType>
 testing::AssertionResult
-testImport(const std::string &FromCode, Language FromLang,
-           const std::string &ToCode, Language ToLang,
-           MatchVerifier<NodeType> &Verifier,
-           const MatcherType &AMatcher) {
-  StringVector FromArgs, ToArgs;
-  getLangArgs(FromLang, FromArgs);
-  getLangArgs(ToLang, ToArgs);
-
+testImport(const std::string &FromCode, const ArgVector &FromArgs,
+           const std::string &ToCode, const ArgVector &ToArgs,
+           MatchVerifier<NodeType> &Verifier, const MatcherType &AMatcher) {
   const char *const InputFileName = "input.cc";
   const char *const OutputFileName = "output.cc";
 
@@ -87,12 +99,16 @@
   if (FoundDecls.size() != 1)
     return testing::AssertionFailure() << "Multiple declarations were found!";
 
-  auto Imported = Importer.Import(*FoundDecls.begin());
+  // Sanity check: the node being imported should match in the same way as
+  // the result node.
+  EXPECT_TRUE(Verifier.match(FoundDecls.front(), AMatcher));
+
+  auto Imported = Importer.Import(FoundDecls.front());
   if (!Imported)
     return testing::AssertionFailure() << "Import failed, nullptr returned!";
 
   // This should dump source locations and assert if some source locations
-  // were not imported
+  // were not imported.
   SmallString<1024> ImportChecker;
   llvm::raw_svector_ostream ToNothing(ImportChecker);
   ToCtx.getTranslationUnitDecl()->print(ToNothing);
@@ -104,148 +120,154 @@
   return Verifier.match(Imported, AMatcher);
 }
 
+template<typename NodeType, typename MatcherType>
+void testImport(const std::string &FromCode, Language FromLang,
+                const std::string &ToCode, Language ToLang,
+                MatchVerifier<NodeType> &Verifier,
+                const MatcherType &AMatcher) {
+  auto RunOptsFrom = getRunOptionsForLanguage(FromLang);
+  auto RunOptsTo = getRunOptionsForLanguage(ToLang);
+  for (const auto &FromArgs : RunOptsFrom)
+    for (const auto &ToArgs : RunOptsTo)
+      EXPECT_TRUE(testImport(FromCode, FromArgs, ToCode, ToArgs,
+                             Verifier, AMatcher));
+}
+
+
 TEST(ImportExpr, ImportStringLiteral) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() { \"foo\"; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 stringLiteral(
-                                   hasType(
-                                     asString("const char [4]")))))))));
-  EXPECT_TRUE(testImport("void declToImport() { L\"foo\"; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 stringLiteral(
-                                   hasType(
-                                     asString("const wchar_t [4]")))))))));
-  EXPECT_TRUE(testImport("void declToImport() { \"foo\" \"bar\"; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 stringLiteral(
-                                   hasType(
-                                     asString("const char [7]")))))))));
+  testImport("void declToImport() { \"foo\"; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     stringLiteral(
+                       hasType(
+                         asString("const char [4]"))))))));
+  testImport("void declToImport() { L\"foo\"; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     stringLiteral(
+                       hasType(
+                        asString("const wchar_t [4]"))))))));
+  testImport("void declToImport() { \"foo\" \"bar\"; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     stringLiteral(
+                       hasType(
+                         asString("const char [7]"))))))));
 }
 
 TEST(ImportExpr, ImportGNUNullExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() { __null; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 gnuNullExpr(
-                                   hasType(isInteger()))))))));
+  testImport("void declToImport() { __null; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     gnuNullExpr(
+                       hasType(isInteger())))))));
 }
 
 TEST(ImportExpr, ImportCXXNullPtrLiteralExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() { nullptr; }",
-                         Lang_CXX11, "", Lang_CXX11, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 cxxNullPtrLiteralExpr()))))));
+  testImport("void declToImport() { nullptr; }",
+             Lang_CXX11, "", Lang_CXX11, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     cxxNullPtrLiteralExpr())))));
 }
 
 
 TEST(ImportExpr, ImportFloatinglLiteralExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() { 1.0; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 floatLiteral(
-                                   equals(1.0),
-                                   hasType(asString("double")))))))));
-  EXPECT_TRUE(testImport("void declToImport() { 1.0e-5f; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 floatLiteral(
-                                   equals(1.0e-5f),
-                                   hasType(asString("float")))))))));
+  testImport("void declToImport() { 1.0; }",
+             Lang_C, "", Lang_C, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     floatLiteral(
+                       equals(1.0),
+                       hasType(asString("double"))))))));
+  testImport("void declToImport() { 1.0e-5f; }",
+              Lang_C, "", Lang_C, Verifier,
+              functionDecl(
+                hasBody(
+                  compoundStmt(
+                    has(
+                      floatLiteral(
+                        equals(1.0e-5f),
+                        hasType(asString("float"))))))));
 }
 
 TEST(ImportExpr, ImportCompoundLiteralExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() {"
-          "  struct s { int x; long y; unsigned z; }; "
-          "  (struct s){ 42, 0L, 1U }; }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  compoundLiteralExpr(
-                    hasType(asString("struct s")),
-                    has(initListExpr(
-                      hasType(asString("struct s")),
-                      has(integerLiteral(
-                            equals(42), hasType(asString("int")))),
-                      has(integerLiteral(
-                            equals(0), hasType(asString("long")))),
-                      has(integerLiteral(
-                            equals(1),
-                            hasType(asString("unsigned int"))))
-                      )))))))));
+  testImport("void declToImport() {"
+             "  struct s { int x; long y; unsigned z; }; "
+             "  (struct s){ 42, 0L, 1U }; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     compoundLiteralExpr(
+                       hasType(asString("struct s")),
+                       has(initListExpr(
+                         hasType(asString("struct s")),
+                         has(integerLiteral(
+                               equals(42), hasType(asString("int")))),
+                         has(integerLiteral(
+                               equals(0), hasType(asString("long")))),
+                         has(integerLiteral(
+                               equals(1),
+                               hasType(asString("unsigned int"))))
+                         ))))))));
 }
 
 TEST(ImportExpr, ImportCXXThisExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport("class declToImport { void f() { this; } };",
-                   Lang_CXX, "", Lang_CXX, Verifier,
-                   cxxRecordDecl(
-                     hasMethod(
-                       hasBody(
-                         compoundStmt(
-                           has(
-                             cxxThisExpr(
-                               hasType(
-                                 asString("class declToImport *"))))))))));
+  testImport("class declToImport { void f() { this; } };",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             cxxRecordDecl(
+               hasMethod(
+                 hasBody(
+                   compoundStmt(
+                     has(
+                       cxxThisExpr(
+                         hasType(
+                           asString("class declToImport *")))))))));
 }
 
 TEST(ImportExpr, ImportAtomicExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport(
-      "void declToImport() { int *ptr; __atomic_load_n(ptr, 1); }", Lang_CXX,
-      "", Lang_CXX, Verifier,
-      functionDecl(hasBody(compoundStmt(has(atomicExpr(
-          has(ignoringParenImpCasts(
-              declRefExpr(hasDeclaration(varDecl(hasName("ptr"))),
-                          hasType(asString("int *"))))),
-          has(integerLiteral(equals(1), hasType(asString("int")))))))))));
+  testImport("void declToImport() { int *ptr; __atomic_load_n(ptr, 1); }",
+             Lang_C, "", Lang_C, Verifier,
+             functionDecl(hasBody(compoundStmt(has(atomicExpr(
+                 has(ignoringParenImpCasts(
+                     declRefExpr(hasDeclaration(varDecl(hasName("ptr"))),
+                                 hasType(asString("int *"))))),
+                 has(integerLiteral(equals(1), hasType(asString("int"))))))))));
 }
 
 TEST(ImportExpr, ImportLabelDeclAndAddrLabelExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() { loop: goto loop; &&loop; }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(labelStmt(hasDeclaration(labelDecl(hasName("loop"))))),
-                has(addrLabelExpr(hasDeclaration(labelDecl(hasName("loop")))))
-                )))));
+  testImport(
+      "void declToImport() { loop: goto loop; &&loop; }", Lang_C, "", Lang_C,
+      Verifier,
+      functionDecl(hasBody(compoundStmt(
+          has(labelStmt(hasDeclaration(labelDecl(hasName("loop"))))),
+          has(addrLabelExpr(hasDeclaration(labelDecl(hasName("loop")))))))));
 }
 
 AST_MATCHER_P(TemplateDecl, hasTemplateDecl,
@@ -256,7 +278,7 @@
 
 TEST(ImportExpr, ImportParenListExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport(
+  testImport(
       "template<typename T> class dummy { void f() { dummy X(*this); } };"
       "typedef dummy<int> declToImport;"
       "template class dummy<int>;",
@@ -268,187 +290,181 @@
                   hasBody(compoundStmt(has(declStmt(hasSingleDecl(
                       varDecl(hasInitializer(parenListExpr(has(unaryOperator(
                           hasOperatorName("*"),
-                          hasUnaryOperand(cxxThisExpr()))))))))))))))))))))))));
+                          hasUnaryOperand(cxxThisExpr())))))))))))))))))))))));
 }
 
 TEST(ImportExpr, ImportSwitch) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-      testImport("void declToImport() { int b; switch (b) { case 1: break; } }",
-                 Lang_CXX, "", Lang_CXX, Verifier,
-                 functionDecl(hasBody(compoundStmt(
-                     has(switchStmt(has(compoundStmt(has(caseStmt()))))))))));
+  testImport("void declToImport() { int b; switch (b) { case 1: break; } }",
+             Lang_C, "", Lang_C, Verifier,
+             functionDecl(hasBody(compoundStmt(
+                 has(switchStmt(has(compoundStmt(has(caseStmt())))))))));
 }
 
 TEST(ImportExpr, ImportStmtExpr) {
   MatchVerifier<Decl> Verifier;
   // NOTE: has() ignores implicit casts, using hasDescendant() to match it
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() { int b; int a = b ?: 1; int C = ({int X=4; X;}); }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  declStmt(
-                    hasSingleDecl(
-                      varDecl(
-                        hasName("C"),
-                        hasType(asString("int")),
-                        hasInitializer(
-                          stmtExpr(
-                            hasAnySubstatement(
-                              declStmt(
-                                hasSingleDecl(
-                                  varDecl(
-                                    hasName("X"),
-                                    hasType(asString("int")),
-                                    hasInitializer(
-                                      integerLiteral(equals(4))))))),
-                            hasDescendant(
-                              implicitCastExpr()
-                              ))))))))))));
+  testImport(
+    "void declToImport() { int b; int a = b ?: 1; int C = ({int X=4; X;}); }",
+    Lang_C, "", Lang_C, Verifier,
+    functionDecl(
+      hasBody(
+        compoundStmt(
+          has(
+            declStmt(
+              hasSingleDecl(
+                varDecl(
+                  hasName("C"),
+                  hasType(asString("int")),
+                  hasInitializer(
+                    stmtExpr(
+                      hasAnySubstatement(
+                        declStmt(
+                          hasSingleDecl(
+                            varDecl(
+                              hasName("X"),
+                              hasType(asString("int")),
+                              hasInitializer(
+                                integerLiteral(equals(4))))))),
+                      hasDescendant(
+                        implicitCastExpr()
+                        )))))))))));
 }
 
 TEST(ImportExpr, ImportConditionalOperator) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() { true ? 1 : -5; }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  conditionalOperator(
-                    hasCondition(cxxBoolLiteral(equals(true))),
-                    hasTrueExpression(integerLiteral(equals(1))),
-                    hasFalseExpression(
-                      unaryOperator(hasUnaryOperand(integerLiteral(equals(5))))
-                      ))))))));
+  testImport(
+    "void declToImport() { true ? 1 : -5; }",
+    Lang_CXX, "", Lang_CXX, Verifier,
+    functionDecl(
+      hasBody(
+        compoundStmt(
+          has(
+            conditionalOperator(
+              hasCondition(cxxBoolLiteral(equals(true))),
+              hasTrueExpression(integerLiteral(equals(1))),
+              hasFalseExpression(
+                unaryOperator(hasUnaryOperand(integerLiteral(equals(5))))
+                )))))));
 }
 
 TEST(ImportExpr, ImportBinaryConditionalOperator) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() { 1 ?: -5; }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  binaryConditionalOperator(
-                    hasCondition(
-                      implicitCastExpr(
-                        hasSourceExpression(
-                          opaqueValueExpr(
-                            hasSourceExpression(integerLiteral(equals(1))))),
-                        hasType(booleanType()))),
-                    hasTrueExpression(
-                      opaqueValueExpr(hasSourceExpression(
-                                        integerLiteral(equals(1))))),
-                    hasFalseExpression(
-                      unaryOperator(hasOperatorName("-"),
-                                    hasUnaryOperand(integerLiteral(equals(5)))))
-                      )))))));
+  testImport(
+    "void declToImport() { 1 ?: -5; }", Lang_CXX, "", Lang_CXX, Verifier,
+    functionDecl(
+      hasBody(
+        compoundStmt(
+          has(
+            binaryConditionalOperator(
+              hasCondition(
+                implicitCastExpr(
+                  hasSourceExpression(
+                    opaqueValueExpr(
+                      hasSourceExpression(integerLiteral(equals(1))))),
+                  hasType(booleanType()))),
+              hasTrueExpression(
+                opaqueValueExpr(hasSourceExpression(
+                                  integerLiteral(equals(1))))),
+              hasFalseExpression(
+                unaryOperator(hasOperatorName("-"),
+                              hasUnaryOperand(integerLiteral(equals(5)))))
+                ))))));
 }
 
 TEST(ImportExpr, ImportDesignatedInitExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() {"
-                         "  struct point { double x; double y; };"
-                         "  struct point ptarray[10] = "
-                                "{ [2].y = 1.0, [2].x = 2.0, [0].x = 1.0 }; }",
-                         Lang_C, "", Lang_C, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 declStmt(
-                                   hasSingleDecl(
-                                     varDecl(
-                                       hasInitializer(
-                                         initListExpr(
-                                           hasSyntacticForm(
-                                             initListExpr(
-                                               has(
-                                                 designatedInitExpr(
-                                                   designatorCountIs(2),
-                                                   has(floatLiteral(
-                                                         equals(1.0))),
-                                                   has(integerLiteral(
-                                                         equals(2))))),
-                                               has(
-                                                 designatedInitExpr(
-                                                   designatorCountIs(2),
-                                                   has(floatLiteral(
-                                                         equals(2.0))),
-                                                   has(integerLiteral(
-                                                         equals(2))))),
-                                               has(
-                                                 designatedInitExpr(
-                                                   designatorCountIs(2),
-                                                   has(floatLiteral(
-                                                         equals(1.0))),
-                                                   has(integerLiteral(
-                                                         equals(0)))))
-                                               )))))))))))));
+  testImport("void declToImport() {"
+             "  struct point { double x; double y; };"
+             "  struct point ptarray[10] = "
+                    "{ [2].y = 1.0, [2].x = 2.0, [0].x = 1.0 }; }",
+             Lang_C, "", Lang_C, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     declStmt(
+                       hasSingleDecl(
+                         varDecl(
+                           hasInitializer(
+                             initListExpr(
+                               hasSyntacticForm(
+                                 initListExpr(
+                                   has(
+                                     designatedInitExpr(
+                                       designatorCountIs(2),
+                                       has(floatLiteral(
+                                             equals(1.0))),
+                                       has(integerLiteral(
+                                             equals(2))))),
+                                   has(
+                                     designatedInitExpr(
+                                       designatorCountIs(2),
+                                       has(floatLiteral(
+                                             equals(2.0))),
+                                       has(integerLiteral(
+                                             equals(2))))),
+                                   has(
+                                     designatedInitExpr(
+                                       designatorCountIs(2),
+                                       has(floatLiteral(
+                                             equals(1.0))),
+                                       has(integerLiteral(
+                                             equals(0)))))
+                                   ))))))))))));
 }
 
 
 TEST(ImportExpr, ImportPredefinedExpr) {
   MatchVerifier<Decl> Verifier;
   // __func__ expands as StringLiteral("declToImport")
-  EXPECT_TRUE(testImport("void declToImport() { __func__; }",
-                         Lang_CXX, "", Lang_CXX, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 predefinedExpr(
-                                   hasType(
-                                     asString("const char [13]")),
-                                   has(
-                                     stringLiteral(
-                                       hasType(
-                                         asString("const char [13]")))))))))));
+  testImport("void declToImport() { __func__; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     predefinedExpr(
+                       hasType(
+                         asString("const char [13]")),
+                       has(
+                         stringLiteral(
+                           hasType(
+                             asString("const char [13]"))))))))));
 }
 
 TEST(ImportExpr, ImportInitListExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport() {"
-          "  struct point { double x; double y; };"
-          "  point ptarray[10] = { [2].y = 1.0, [2].x = 2.0,"
-          "                        [0].x = 1.0 }; }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  declStmt(
-                    hasSingleDecl(
-                      varDecl(
-                        hasInitializer(
-                          initListExpr(
-                            has(
-                              cxxConstructExpr(
-                                requiresZeroInitialization())),
-                            has(
-                              initListExpr(
-                                hasType(asString("struct point")),
-                                has(floatLiteral(equals(1.0))),
-                                has(implicitValueInitExpr(
-                                      hasType(asString("double")))))),
-                            has(
-                              initListExpr(
-                                hasType(asString("struct point")),
-                                has(floatLiteral(equals(2.0))),
-                                has(floatLiteral(equals(1.0)))))
-                              )))))))))));
+  testImport(
+    "void declToImport() {"
+    "  struct point { double x; double y; };"
+    "  point ptarray[10] = { [2].y = 1.0, [2].x = 2.0,"
+    "                        [0].x = 1.0 }; }",
+    Lang_CXX, "", Lang_CXX, Verifier,
+    functionDecl(
+      hasBody(
+        compoundStmt(
+          has(
+            declStmt(
+              hasSingleDecl(
+                varDecl(
+                  hasInitializer(
+                    initListExpr(
+                      has(
+                        cxxConstructExpr(
+                          requiresZeroInitialization())),
+                      has(
+                        initListExpr(
+                          hasType(asString("struct point")),
+                          has(floatLiteral(equals(1.0))),
+                          has(implicitValueInitExpr(
+                                hasType(asString("double")))))),
+                      has(
+                        initListExpr(
+                          hasType(asString("struct point")),
+                          has(floatLiteral(equals(2.0))),
+                          has(floatLiteral(equals(1.0)))))
+                        ))))))))));
 }
 
 
@@ -456,35 +472,349 @@
 
 TEST(ImportExpr, ImportVAArgExpr) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(
-        testImport(
-          "void declToImport(__builtin_va_list list, ...) {"
-          "  (void)__builtin_va_arg(list, int); }",
-          Lang_CXX, "", Lang_CXX, Verifier,
-          functionDecl(
-            hasBody(
-              compoundStmt(
-                has(
-                  cStyleCastExpr(
-                    hasSourceExpression(
-                      vaArgExpr()))))))));
+  testImport("void declToImport(__builtin_va_list list, ...) {"
+             "  (void)__builtin_va_arg(list, int); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     cStyleCastExpr(
+                       hasSourceExpression(
+                         vaArgExpr())))))));
 }
 
+TEST(ImportExpr, CXXTemporaryObjectExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("struct C {};"
+             "void declToImport() { C c = C(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(hasBody(compoundStmt(has(
+                 declStmt(has(varDecl(has(exprWithCleanups(has(cxxConstructExpr(
+                     has(materializeTemporaryExpr(has(implicitCastExpr(
+                         has(cxxTemporaryObjectExpr())))))))))))))))));
+}
 
 TEST(ImportType, ImportAtomicType) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(testImport("void declToImport() { typedef _Atomic(int) a_int; }",
-                         Lang_CXX11, "", Lang_CXX11, Verifier,
-                         functionDecl(
-                           hasBody(
-                             compoundStmt(
-                               has(
-                                 declStmt(
-                                   has(
-                                     typedefDecl(
-                                       has(atomicType()))))))))));
+  testImport("void declToImport() { typedef _Atomic(int) a_int; }",
+             Lang_CXX11, "", Lang_CXX11, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     declStmt(
+                       has(
+                         typedefDecl(
+                           has(atomicType())))))))));
 }
 
+TEST(ImportDecl, ImportFunctionTemplateDecl) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename T> void declToImport() { };", Lang_CXX, "",
+             Lang_CXX, Verifier, functionTemplateDecl());
+}
+
+const internal::VariadicDynCastAllOfMatcher<Expr, CXXDependentScopeMemberExpr>
+    cxxDependentScopeMemberExpr;
+
+TEST(ImportExpr, ImportCXXDependentScopeMemberExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename T> struct C { T t; };"
+             "template <typename T> void declToImport() {"
+             "  C<T> d;"
+             "  d.t;"
+             "}"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(has(functionDecl(
+                 has(compoundStmt(has(cxxDependentScopeMemberExpr())))))));
+  testImport("template <typename T> struct C { T t; };"
+             "template <typename T> void declToImport() {"
+             "  C<T> d;"
+             "  (&d)->t;"
+             "}"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(has(functionDecl(
+                 has(compoundStmt(has(cxxDependentScopeMemberExpr())))))));
+}
+
+TEST(ImportType, ImportTypeAliasTemplate) {
+  MatchVerifier<Decl> Verifier;
+  testImport(
+      "template <int K>"
+      "struct dummy { static const int i = K; };"
+      "template <int K> using dummy2 = dummy<K>;"
+      "int declToImport() { return dummy2<3>::i; }",
+      Lang_CXX11, "", Lang_CXX11, Verifier,
+      functionDecl(
+          hasBody(compoundStmt(
+              has(returnStmt(has(implicitCastExpr(has(declRefExpr()))))))),
+          unless(hasAncestor(translationUnitDecl(has(typeAliasDecl()))))));
+}
+
+const internal::VariadicDynCastAllOfMatcher<Decl, VarTemplateSpecializationDecl>
+    varTemplateSpecializationDecl;
+
+TEST(ImportDecl, ImportVarTemplate) {
+  MatchVerifier<Decl> Verifier;
+  testImport(
+      "template <typename T>"
+      "T pi = T(3.1415926535897932385L);"
+      "void declToImport() { pi<int>; }",
+      Lang_CXX11, "", Lang_CXX11, Verifier,
+      functionDecl(
+          hasBody(has(declRefExpr(to(varTemplateSpecializationDecl())))),
+          unless(hasAncestor(translationUnitDecl(has(varDecl(
+              hasName("pi"), unless(varTemplateSpecializationDecl()))))))));
+}
+
+TEST(ImportType, ImportPackExpansion) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename... Args>"
+             "struct dummy {"
+             "  dummy(Args... args) {}"
+             "  static const int i = 4;"
+             "};"
+             "int declToImport() { return dummy<int>::i; }",
+             Lang_CXX11, "", Lang_CXX11, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     returnStmt(
+                       has(
+                         implicitCastExpr(
+                           has(
+                             declRefExpr())))))))));
+}
+
+const internal::VariadicDynCastAllOfMatcher<Type,
+                                            DependentTemplateSpecializationType>
+    dependentTemplateSpecializationType;
+
+TEST(ImportType, ImportDependentTemplateSpecialization) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template<typename T>"
+             "struct A;"
+             "template<typename T>"
+             "struct declToImport {"
+             "  typename A<T>::template B<T> a;"
+             "};",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             classTemplateDecl(has(cxxRecordDecl(has(
+                 fieldDecl(hasType(dependentTemplateSpecializationType())))))));
+}
+
+const internal::VariadicDynCastAllOfMatcher<Stmt, SizeOfPackExpr>
+    sizeOfPackExpr;
+
+TEST(ImportExpr, ImportSizeOfPackExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename... Ts>"
+             "void declToImport() {"
+             "  const int i = sizeof...(Ts);"
+             "};"
+             "void g() { declToImport<int>(); }",
+             Lang_CXX11, "", Lang_CXX11, Verifier,
+             functionTemplateDecl(has(functionDecl(
+                 hasBody(compoundStmt(has(declStmt(has(varDecl(hasInitializer(
+                     implicitCastExpr(has(sizeOfPackExpr())))))))))))));
+  testImport(
+      "template <typename... Ts>"
+      "using X = int[sizeof...(Ts)];"
+      "template <typename... Us>"
+      "struct Y {"
+      "  X<Us..., int, double, int, Us...> f;"
+      "};"
+      "Y<float, int> declToImport;",
+      Lang_CXX11, "", Lang_CXX11, Verifier,
+      varDecl(hasType(classTemplateSpecializationDecl(has(fieldDecl(hasType(
+          hasUnqualifiedDesugaredType(constantArrayType(hasSize(7))))))))));
+}
+
+/// \brief Matches __builtin_types_compatible_p:
+/// GNU extension to check equivalent types
+/// Given
+/// \code
+///   __builtin_types_compatible_p(int, int)
+/// \endcode
+//  will generate TypeTraitExpr <...> 'int'
+const internal::VariadicDynCastAllOfMatcher<Stmt, TypeTraitExpr> typeTraitExpr;
+
+TEST(ImportExpr, ImportTypeTraitExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("void declToImport() { "
+             "  __builtin_types_compatible_p(int, int);"
+             "}",
+             Lang_C, "", Lang_C, Verifier,
+             functionDecl(
+               hasBody(
+                 compoundStmt(
+                   has(
+                     typeTraitExpr(hasType(asString("int"))))))));
+}
+
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTypeidExpr> cxxTypeidExpr;
+
+TEST(ImportExpr, ImportCXXTypeidExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport(
+      "namespace std { class type_info {}; }"
+      "void declToImport() {"
+      "  int x;"
+      "  auto a = typeid(int); auto b = typeid(x);"
+      "}",
+      Lang_CXX11, "", Lang_CXX11, Verifier,
+      functionDecl(
+          hasDescendant(varDecl(
+              hasName("a"), hasInitializer(hasDescendant(cxxTypeidExpr())))),
+          hasDescendant(varDecl(
+              hasName("b"), hasInitializer(hasDescendant(cxxTypeidExpr()))))));
+}
+
+TEST(ImportExpr, ImportTypeTraitExprValDep) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template<typename T> struct declToImport {"
+             "  void m() { __is_pod(T); }"
+             "};"
+             "void f() { declToImport<int>().m(); }",
+             Lang_CXX11, "", Lang_CXX11, Verifier,
+             classTemplateDecl(
+               has(
+                 cxxRecordDecl(
+                   has(
+                     functionDecl(
+                       hasBody(
+                         compoundStmt(
+                           has(
+                             typeTraitExpr(
+                               hasType(booleanType())
+                               ))))))))));
+}
+
+const internal::VariadicDynCastAllOfMatcher<Expr, CXXPseudoDestructorExpr>
+    cxxPseudoDestructorExpr;
+
+TEST(ImportExpr, ImportCXXPseudoDestructorExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("typedef int T;"
+             "void declToImport(int *p) {"
+             "  T t;"
+             "  p->T::~T();"
+             "}",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(has(compoundStmt(has(
+                 callExpr(has(cxxPseudoDestructorExpr())))))));
+}
+
+TEST(ImportDecl, ImportUsingDecl) {
+  MatchVerifier<Decl> Verifier;
+  testImport("namespace foo { int bar; }"
+             "void declToImport() { using foo::bar; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionDecl(
+               has(
+                 compoundStmt(
+                   has(
+                     declStmt(
+                       has(
+                         usingDecl())))))));
+}
+
+/// \brief Matches shadow declarations introduced into a scope by a
+///        (resolved) using declaration.
+///
+/// Given
+/// \code
+///   namespace n { int f; }
+///   namespace declToImport { using n::f; }
+/// \endcode
+/// usingShadowDecl()
+///   matches \code f \endcode
+const internal::VariadicDynCastAllOfMatcher<Decl,
+                                            UsingShadowDecl> usingShadowDecl;
+
+TEST(ImportDecl, ImportUsingShadowDecl) {
+  MatchVerifier<Decl> Verifier;
+  testImport("namespace foo { int bar; }"
+             "namespace declToImport { using foo::bar; }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             namespaceDecl(has(usingShadowDecl())));
+}
+
+TEST(ImportExpr, ImportUnresolvedLookupExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template<typename T> int foo();"
+             "template <typename T> void declToImport() {"
+             "  ::foo<T>;"
+             "  ::template foo<T>;"
+             "}"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(has(functionDecl(
+                 has(compoundStmt(has(unresolvedLookupExpr())))))));
+}
+
+TEST(ImportExpr, ImportCXXUnresolvedConstructExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename T> struct C { T t; };"
+             "template <typename T> void declToImport() {"
+             "  C<T> d;"
+             "  d.t = T();"
+             "}"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(has(functionDecl(has(compoundStmt(has(
+                 binaryOperator(has(cxxUnresolvedConstructExpr())))))))));
+  testImport("template <typename T> struct C { T t; };"
+             "template <typename T> void declToImport() {"
+             "  C<T> d;"
+             "  (&d)->t = T();"
+             "}"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(has(functionDecl(has(compoundStmt(has(
+                 binaryOperator(has(cxxUnresolvedConstructExpr())))))))));
+}
+
+/// Check that function "declToImport()" (which is the templated function
+/// for corresponding FunctionTemplateDecl) is not added into DeclContext.
+/// Same for class template declarations.
+TEST(ImportDecl, ImportTemplatedDeclForTemplate) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template <typename T> void declToImport() { T a = 1; }"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             functionTemplateDecl(hasAncestor(translationUnitDecl(
+                 unless(has(functionDecl(hasName("declToImport"))))))));
+  testImport("template <typename T> struct declToImport { T t; };"
+             "void instantiate() { declToImport<int>(); }",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             classTemplateDecl(hasAncestor(translationUnitDecl(
+                 unless(has(cxxRecordDecl(hasName("declToImport"))))))));
+}
+
+TEST(ImportExpr, CXXOperatorCallExpr) {
+  MatchVerifier<Decl> Verifier;
+  testImport("class declToImport {"
+             "  void f() { *this = declToImport(); }"
+             "};",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             cxxRecordDecl(has(cxxMethodDecl(hasBody(compoundStmt(
+                 has(exprWithCleanups(has(cxxOperatorCallExpr())))))))));
+}
+
+TEST(ImportExpr, DependentSizedArrayType) {
+  MatchVerifier<Decl> Verifier;
+  testImport("template<typename T, int Size> class declToImport {"
+             "  T data[Size];"
+             "};",
+             Lang_CXX, "", Lang_CXX, Verifier,
+             classTemplateDecl(has(cxxRecordDecl(
+                 has(fieldDecl(hasType(dependentSizedArrayType())))))));
+}
 
 } // end namespace ast_matchers
 } // end namespace clang
diff --git a/unittests/AST/CMakeLists.txt b/unittests/AST/CMakeLists.txt
index 45dfa7a..89590c1 100644
--- a/unittests/AST/CMakeLists.txt
+++ b/unittests/AST/CMakeLists.txt
@@ -15,12 +15,12 @@
   EvaluateAsRValueTest.cpp
   ExternalASTSourceTest.cpp
   NamedDeclPrinterTest.cpp
-  PostOrderASTVisitor.cpp
   SourceLocationTest.cpp
   StmtPrinterTest.cpp
   )
 
 target_link_libraries(ASTTests
+  PRIVATE
   clangAST
   clangASTMatchers
   clangBasic
diff --git a/unittests/AST/DeclPrinterTest.cpp b/unittests/AST/DeclPrinterTest.cpp
index ae6d0f0..4cf8bce 100644
--- a/unittests/AST/DeclPrinterTest.cpp
+++ b/unittests/AST/DeclPrinterTest.cpp
@@ -31,18 +31,25 @@
 
 namespace {
 
-void PrintDecl(raw_ostream &Out, const ASTContext *Context, const Decl *D) {
+using PrintingPolicyModifier = void (*)(PrintingPolicy &policy);
+
+void PrintDecl(raw_ostream &Out, const ASTContext *Context, const Decl *D,
+               PrintingPolicyModifier PolicyModifier) {
   PrintingPolicy Policy = Context->getPrintingPolicy();
   Policy.TerseOutput = true;
+  if (PolicyModifier)
+    PolicyModifier(Policy);
   D->print(Out, Policy, /*Indentation*/ 0, /*PrintInstantiation*/ false);
 }
 
 class PrintMatch : public MatchFinder::MatchCallback {
   SmallString<1024> Printed;
   unsigned NumFoundDecls;
+  PrintingPolicyModifier PolicyModifier;
 
 public:
-  PrintMatch() : NumFoundDecls(0) {}
+  PrintMatch(PrintingPolicyModifier PolicyModifier)
+      : NumFoundDecls(0), PolicyModifier(PolicyModifier) {}
 
   void run(const MatchFinder::MatchResult &Result) override {
     const Decl *D = Result.Nodes.getNodeAs<Decl>("id");
@@ -53,7 +60,7 @@
       return;
 
     llvm::raw_svector_ostream Out(Printed);
-    PrintDecl(Out, Result.Context, D);
+    PrintDecl(Out, Result.Context, D, PolicyModifier);
   }
 
   StringRef getPrinted() const {
@@ -65,13 +72,12 @@
   }
 };
 
-::testing::AssertionResult PrintedDeclMatches(
-                                  StringRef Code,
-                                  const std::vector<std::string> &Args,
-                                  const DeclarationMatcher &NodeMatch,
-                                  StringRef ExpectedPrinted,
-                                  StringRef FileName) {
-  PrintMatch Printer;
+::testing::AssertionResult
+PrintedDeclMatches(StringRef Code, const std::vector<std::string> &Args,
+                   const DeclarationMatcher &NodeMatch,
+                   StringRef ExpectedPrinted, StringRef FileName,
+                   PrintingPolicyModifier PolicyModifier = nullptr) {
+  PrintMatch Printer(PolicyModifier);
   MatchFinder Finder;
   Finder.addMatcher(NodeMatch, &Printer);
   std::unique_ptr<FrontendActionFactory> Factory(
@@ -98,27 +104,30 @@
   return ::testing::AssertionSuccess();
 }
 
-::testing::AssertionResult PrintedDeclCXX98Matches(StringRef Code,
-                                                   StringRef DeclName,
-                                                   StringRef ExpectedPrinted) {
+::testing::AssertionResult
+PrintedDeclCXX98Matches(StringRef Code, StringRef DeclName,
+                        StringRef ExpectedPrinted,
+                        PrintingPolicyModifier PolicyModifier = nullptr) {
   std::vector<std::string> Args(1, "-std=c++98");
   return PrintedDeclMatches(Code,
                             Args,
                             namedDecl(hasName(DeclName)).bind("id"),
                             ExpectedPrinted,
-                            "input.cc");
+                            "input.cc",
+                            PolicyModifier);
 }
 
-::testing::AssertionResult PrintedDeclCXX98Matches(
-                                  StringRef Code,
-                                  const DeclarationMatcher &NodeMatch,
-                                  StringRef ExpectedPrinted) {
+::testing::AssertionResult
+PrintedDeclCXX98Matches(StringRef Code, const DeclarationMatcher &NodeMatch,
+                        StringRef ExpectedPrinted,
+                        PrintingPolicyModifier PolicyModifier = nullptr) {
   std::vector<std::string> Args(1, "-std=c++98");
   return PrintedDeclMatches(Code,
                             Args,
                             NodeMatch,
                             ExpectedPrinted,
-                            "input.cc");
+                            "input.cc",
+                            PolicyModifier);
 }
 
 ::testing::AssertionResult PrintedDeclCXX11Matches(StringRef Code,
@@ -343,6 +352,47 @@
     "void A()"));
 }
 
+TEST(DeclPrinter, TestFreeFunctionDecl_FullyQualifiedName) {
+    ASSERT_TRUE(PrintedDeclCXX98Matches(
+      "void A();",
+      "A",
+      "void A()",
+      [](PrintingPolicy &Policy){ Policy.FullyQualifiedName = true; }));
+}
+
+TEST(DeclPrinter, TestFreeFunctionDeclInNamespace_FullyQualifiedName) {
+    ASSERT_TRUE(PrintedDeclCXX98Matches(
+      "namespace X { void A(); };",
+      "A",
+      "void X::A()",
+      [](PrintingPolicy &Policy){ Policy.FullyQualifiedName = true; }));
+}
+
+TEST(DeclPrinter, TestMemberFunction_FullyQualifiedName) {
+    ASSERT_TRUE(PrintedDeclCXX98Matches(
+      "struct X { void A(); };",
+      "A",
+      "void X::A()",
+      [](PrintingPolicy &Policy){ Policy.FullyQualifiedName = true; }));
+}
+
+TEST(DeclPrinter, TestMemberFunctionInNamespace_FullyQualifiedName) {
+    ASSERT_TRUE(PrintedDeclCXX98Matches(
+      "namespace Z { struct X { void A(); }; }",
+      "A",
+      "void Z::X::A()",
+      [](PrintingPolicy &Policy){ Policy.FullyQualifiedName = true; }));
+}
+
+TEST(DeclPrinter, TestMemberFunctionOutside_FullyQualifiedName) {
+    ASSERT_TRUE(PrintedDeclCXX98Matches(
+      "struct X { void A(); };"
+       "void X::A() {}",
+      functionDecl(hasName("A"), isDefinition()).bind("id"),
+      "void X::A()",
+      [](PrintingPolicy &Policy){ Policy.FullyQualifiedName = true; }));
+}
+
 TEST(DeclPrinter, TestFunctionDecl2) {
   ASSERT_TRUE(PrintedDeclCXX98Matches(
     "void A() {}",
@@ -478,6 +528,27 @@
     "A(const A &a, int = 0)"));
 }
 
+TEST(DeclPrinter, TestCXXConstructorDeclWithMemberInitializer) {
+  ASSERT_TRUE(PrintedDeclCXX98Matches(
+    "struct A {"
+    "  int m;"
+    "  A() : m(2) {}"
+    "};",
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
+    "A()"));
+}
+
+TEST(DeclPrinter, TestCXXConstructorDeclWithMemberInitializer_NoTerseOutput) {
+  ASSERT_TRUE(PrintedDeclCXX98Matches(
+    "struct A {"
+    "  int m;"
+    "  A() : m(2) {}"
+    "};",
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
+    "A() : m(2) {\n}\n",
+    [](PrintingPolicy &Policy){ Policy.TerseOutput = false; }));
+}
+
 TEST(DeclPrinter, TestCXXConstructorDecl5) {
   ASSERT_TRUE(PrintedDeclCXX11Matches(
     "struct A {"
@@ -540,7 +611,7 @@
     "  A(T&&... ts) : T(ts)... {}"
     "};",
     cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
-    "A<T...>(T &&...ts) : T(ts)... {}"));
+    "A<T...>(T &&...ts)"));
 }
 
 TEST(DeclPrinter, TestCXXDestructorDecl1) {
diff --git a/unittests/AST/NamedDeclPrinterTest.cpp b/unittests/AST/NamedDeclPrinterTest.cpp
index 92df457..5715a34 100644
--- a/unittests/AST/NamedDeclPrinterTest.cpp
+++ b/unittests/AST/NamedDeclPrinterTest.cpp
@@ -143,7 +143,7 @@
   ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
     "enum X { A };",
     "A",
-    "X::A"));
+    "A"));
 }
 
 TEST(NamedDeclPrinter, TestScopedNamedEnum) {
@@ -164,7 +164,7 @@
   ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
     "class X { enum Y { A }; };",
     "A",
-    "X::Y::A"));
+    "X::A"));
 }
 
 TEST(NamedDeclPrinter, TestClassWithScopedNamedEnum) {
@@ -173,3 +173,10 @@
     "A",
     "X::Y::A"));
 }
+
+TEST(NamedDeclPrinter, TestLinkageInNamespace) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "namespace X { extern \"C\" { int A; } }",
+    "A",
+    "X::A"));
+}
diff --git a/unittests/AST/PostOrderASTVisitor.cpp b/unittests/AST/PostOrderASTVisitor.cpp
deleted file mode 100644
index 2921f3e..0000000
--- a/unittests/AST/PostOrderASTVisitor.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- unittests/AST/PostOrderASTVisitor.cpp - Declaration printer tests --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains tests for the post-order traversing functionality
-// of RecursiveASTVisitor.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/Tooling/Tooling.h"
-#include "gtest/gtest.h"
-
-using namespace clang;
-
-namespace {
-
-  class RecordingVisitor
-    : public RecursiveASTVisitor<RecordingVisitor> {
-
-    bool VisitPostOrder;
-  public:
-    explicit RecordingVisitor(bool VisitPostOrder)
-      : VisitPostOrder(VisitPostOrder) {
-    }
-
-    // List of visited nodes during traversal.
-    std::vector<std::string> VisitedNodes;
-
-    bool shouldTraversePostOrder() const { return VisitPostOrder; }
-
-    bool VisitUnaryOperator(UnaryOperator *Op) {
-      VisitedNodes.push_back(Op->getOpcodeStr(Op->getOpcode()));
-      return true;
-    }
-
-    bool VisitBinaryOperator(BinaryOperator *Op) {
-      VisitedNodes.push_back(Op->getOpcodeStr());
-      return true;
-    }
-
-    bool VisitIntegerLiteral(IntegerLiteral *Lit) {
-      VisitedNodes.push_back(Lit->getValue().toString(10, false));
-      return true;
-    }
-
-    bool VisitVarDecl(VarDecl* D) {
-      VisitedNodes.push_back(D->getNameAsString());
-      return true;
-    }
-
-    bool VisitCXXMethodDecl(CXXMethodDecl *D) {
-      VisitedNodes.push_back(D->getQualifiedNameAsString());
-      return true;
-    }
-
-    bool VisitReturnStmt(ReturnStmt *S) {
-      VisitedNodes.push_back("return");
-      return true;
-    }
-
-    bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
-      VisitedNodes.push_back(Declaration->getQualifiedNameAsString());
-      return true;
-    }
-
-    bool VisitTemplateTypeParmType(TemplateTypeParmType *T) {
-      VisitedNodes.push_back(T->getDecl()->getQualifiedNameAsString());
-      return true;
-    }
-  };
-
-}
-
-TEST(RecursiveASTVisitor, PostOrderTraversal) {
-  auto ASTUnit = tooling::buildASTFromCode(
-    "class A {"
-    "  class B {"
-    "    int foo() { while(4) { int i = 9; int j = -5; } return (1 + 3) + 2; }"
-    "  };"
-    "};"
-  );
-  auto TU = ASTUnit->getASTContext().getTranslationUnitDecl();
-  // We traverse the translation unit and store all
-  // visited nodes.
-  RecordingVisitor Visitor(true);
-  Visitor.TraverseTranslationUnitDecl(TU);
-
-  std::vector<std::string> expected = {"4", "9",      "i",         "5",    "-",
-                                       "j", "1",      "3",         "+",    "2",
-                                       "+", "return", "A::B::foo", "A::B", "A"};
-  // Compare the list of actually visited nodes
-  // with the expected list of visited nodes.
-  ASSERT_EQ(expected.size(), Visitor.VisitedNodes.size());
-  for (std::size_t I = 0; I < expected.size(); I++) {
-    ASSERT_EQ(expected[I], Visitor.VisitedNodes[I]);
-  }
-}
-
-TEST(RecursiveASTVisitor, NoPostOrderTraversal) {
-  auto ASTUnit = tooling::buildASTFromCode(
-    "class A {"
-    "  class B {"
-    "    int foo() { return 1 + 2; }"
-    "  };"
-    "};"
-  );
-  auto TU = ASTUnit->getASTContext().getTranslationUnitDecl();
-  // We traverse the translation unit and store all
-  // visited nodes.
-  RecordingVisitor Visitor(false);
-  Visitor.TraverseTranslationUnitDecl(TU);
-
-  std::vector<std::string> expected = {
-    "A", "A::B", "A::B::foo", "return", "+", "1", "2"
-  };
-  // Compare the list of actually visited nodes
-  // with the expected list of visited nodes.
-  ASSERT_EQ(expected.size(), Visitor.VisitedNodes.size());
-  for (std::size_t I = 0; I < expected.size(); I++) {
-    ASSERT_EQ(expected[I], Visitor.VisitedNodes[I]);
-  }
-}
diff --git a/unittests/AST/StmtPrinterTest.cpp b/unittests/AST/StmtPrinterTest.cpp
index 12b2032..a064440 100644
--- a/unittests/AST/StmtPrinterTest.cpp
+++ b/unittests/AST/StmtPrinterTest.cpp
@@ -31,18 +31,26 @@
 
 namespace {
 
-void PrintStmt(raw_ostream &Out, const ASTContext *Context, const Stmt *S) {
+using PolicyAdjusterType =
+    Optional<llvm::function_ref<void(PrintingPolicy &Policy)>>;
+
+void PrintStmt(raw_ostream &Out, const ASTContext *Context, const Stmt *S,
+               PolicyAdjusterType PolicyAdjuster) {
   assert(S != nullptr && "Expected non-null Stmt");
   PrintingPolicy Policy = Context->getPrintingPolicy();
+  if (PolicyAdjuster)
+    (*PolicyAdjuster)(Policy);
   S->printPretty(Out, /*Helper*/ nullptr, Policy);
 }
 
 class PrintMatch : public MatchFinder::MatchCallback {
   SmallString<1024> Printed;
   unsigned NumFoundStmts;
+  PolicyAdjusterType PolicyAdjuster;
 
 public:
-  PrintMatch() : NumFoundStmts(0) {}
+  PrintMatch(PolicyAdjusterType PolicyAdjuster)
+      : NumFoundStmts(0), PolicyAdjuster(PolicyAdjuster) {}
 
   void run(const MatchFinder::MatchResult &Result) override {
     const Stmt *S = Result.Nodes.getNodeAs<Stmt>("id");
@@ -53,7 +61,7 @@
       return;
 
     llvm::raw_svector_ostream Out(Printed);
-    PrintStmt(Out, Result.Context, S);
+    PrintStmt(Out, Result.Context, S, PolicyAdjuster);
   }
 
   StringRef getPrinted() const {
@@ -68,9 +76,10 @@
 template <typename T>
 ::testing::AssertionResult
 PrintedStmtMatches(StringRef Code, const std::vector<std::string> &Args,
-                   const T &NodeMatch, StringRef ExpectedPrinted) {
+                   const T &NodeMatch, StringRef ExpectedPrinted,
+                   PolicyAdjusterType PolicyAdjuster = None) {
 
-  PrintMatch Printer;
+  PrintMatch Printer(PolicyAdjuster);
   MatchFinder Finder;
   Finder.addMatcher(NodeMatch, &Printer);
   std::unique_ptr<FrontendActionFactory> Factory(
@@ -122,11 +131,13 @@
 
 ::testing::AssertionResult
 PrintedStmtCXX11Matches(StringRef Code, const StatementMatcher &NodeMatch,
-                        StringRef ExpectedPrinted) {
+                        StringRef ExpectedPrinted,
+                        PolicyAdjusterType PolicyAdjuster = None) {
   std::vector<std::string> Args;
   Args.push_back("-std=c++11");
   Args.push_back("-Wno-unused-value");
-  return PrintedStmtMatches(Code, Args, NodeMatch, ExpectedPrinted);
+  return PrintedStmtMatches(Code, Args, NodeMatch, ExpectedPrinted,
+                            PolicyAdjuster);
 }
 
 ::testing::AssertionResult PrintedStmtMSMatches(
@@ -146,6 +157,17 @@
                             ExpectedPrinted);
 }
 
+::testing::AssertionResult
+PrintedStmtObjCMatches(StringRef Code, const StatementMatcher &NodeMatch,
+                       StringRef ExpectedPrinted,
+                       PolicyAdjusterType PolicyAdjuster = None) {
+  std::vector<std::string> Args;
+  Args.push_back("-ObjC");
+  Args.push_back("-fobjc-runtime=macosx-10.12.0");
+  return PrintedStmtMatches(Code, Args, NodeMatch, ExpectedPrinted,
+                            PolicyAdjuster);
+}
+
 } // unnamed namespace
 
 TEST(StmtPrinter, TestIntegerLiteral) {
@@ -214,3 +236,41 @@
     "(a & b)"));
     // WRONG; Should be: (a & b).operator void *()
 }
+
+TEST(StmtPrinter, TestNoImplicitBases) {
+  const char *CPPSource = R"(
+class A {
+  int field;
+  int member() { return field; }
+};
+)";
+  // No implicit 'this'.
+  ASSERT_TRUE(PrintedStmtCXX11Matches(
+      CPPSource, memberExpr(anything()).bind("id"), "field",
+      PolicyAdjusterType(
+          [](PrintingPolicy &PP) { PP.SuppressImplicitBase = true; })));
+  // Print implicit 'this'.
+  ASSERT_TRUE(PrintedStmtCXX11Matches(
+      CPPSource, memberExpr(anything()).bind("id"), "this->field"));
+
+  const char *ObjCSource = R"(
+@interface I {
+   int ivar;
+}
+@end
+@implementation I
+- (int) method {
+  return ivar;
+}
+@end
+      )";
+  // No implicit 'self'.
+  ASSERT_TRUE(PrintedStmtObjCMatches(ObjCSource, returnStmt().bind("id"),
+                                     "return ivar;\n",
+                                     PolicyAdjusterType([](PrintingPolicy &PP) {
+                                       PP.SuppressImplicitBase = true;
+                                     })));
+  // Print implicit 'self'.
+  ASSERT_TRUE(PrintedStmtObjCMatches(ObjCSource, returnStmt().bind("id"),
+                                     "return self->ivar;\n"));
+}
diff --git a/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index 7bc8421..3e27948 100644
--- a/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -1315,6 +1315,14 @@
     cxxMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("class A { void a() {} };", DefinitionOfMethodA));
   EXPECT_TRUE(notMatches("class A { void a(); };", DefinitionOfMethodA));
+
+  DeclarationMatcher DefinitionOfObjCMethodA =
+    objcMethodDecl(hasName("a"), isDefinition());
+  EXPECT_TRUE(matchesObjC("@interface A @end "
+                          "@implementation A; -(void)a {} @end",
+                          DefinitionOfObjCMethodA));
+  EXPECT_TRUE(notMatchesObjC("@interface A; - (void)a; @end",
+                             DefinitionOfObjCMethodA));
 }
 
 TEST(Matcher, HandlesNullQualTypes) {
@@ -1615,6 +1623,14 @@
       "template class X<A>;",
     cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
       fieldDecl(hasType(recordDecl(hasName("A"))))))));
+
+  // Make sure that we match the instantiation instead of the template
+  // definition by checking whether the member function is present.
+  EXPECT_TRUE(
+      matches("template <typename T> class X { void f() { T t; } };"
+              "extern template class X<int>;",
+              cxxRecordDecl(isTemplateInstantiation(),
+                            unless(hasDescendant(varDecl(hasName("t")))))));
 }
 
 TEST(IsTemplateInstantiation,
@@ -1762,6 +1778,84 @@
     functionDecl(isExplicitTemplateSpecialization())));
 }
 
+TEST(TypeMatching, MatchesNoReturn) {
+  EXPECT_TRUE(notMatches("void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(notMatches("void func() {}", functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(notMatchesC("void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(notMatchesC("void func() {}", functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(
+      notMatches("struct S { void func(); };", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      notMatches("struct S { void func() {} };", functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(notMatches("struct S { static void func(); };",
+                         functionDecl(isNoReturn())));
+  EXPECT_TRUE(notMatches("struct S { static void func() {} };",
+                         functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(notMatches("struct S { S(); };", functionDecl(isNoReturn())));
+  EXPECT_TRUE(notMatches("struct S { S() {} };", functionDecl(isNoReturn())));
+
+  // ---
+
+  EXPECT_TRUE(matches("[[noreturn]] void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("[[noreturn]] void func() {}", functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func() {} };",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func() {} };",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] S() {} };",
+                      functionDecl(isNoReturn())));
+
+  // ---
+
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func();",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func() {}",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func() {} };",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(
+      matches("struct S { __attribute__((noreturn)) static void func(); };",
+              functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("struct S { __attribute__((noreturn)) static void func() {} };",
+              functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) S(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) S() {} };",
+                      functionDecl(isNoReturn())));
+
+  // ---
+
+  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func();",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func() {}",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matchesC("_Noreturn void func();",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matchesC("_Noreturn void func() {}",
+                      functionDecl(isNoReturn())));
+}
+
 TEST(TypeMatching, MatchesBool) {
   EXPECT_TRUE(matches("struct S { bool func(); };",
                       cxxMethodDecl(returns(booleanType()))));
@@ -1983,5 +2077,67 @@
                       namedDecl(hasExternalFormalLinkage())));
 }
 
+TEST(HasDefaultArgument, Basic) {
+  EXPECT_TRUE(matches("void x(int val = 0) {}", 
+                      parmVarDecl(hasDefaultArgument())));
+  EXPECT_TRUE(notMatches("void x(int val) {}",
+                      parmVarDecl(hasDefaultArgument())));
+}
+
+TEST(IsArray, Basic) {
+  EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
+                      cxxNewExpr(isArray())));
+}
+
+TEST(HasArraySize, Basic) {
+  EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
+                      cxxNewExpr(hasArraySize(integerLiteral(equals(10))))));
+}
+
+TEST(HasDefinition, MatchesStructDefinition) {
+  EXPECT_TRUE(matches("struct x {};",
+                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("struct x;",
+                      cxxRecordDecl(hasDefinition())));
+}
+
+TEST(HasDefinition, MatchesClassDefinition) {
+  EXPECT_TRUE(matches("class x {};",
+                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("class x;",
+                      cxxRecordDecl(hasDefinition())));
+}
+
+TEST(HasDefinition, MatchesUnionDefinition) {
+  EXPECT_TRUE(matches("union x {};",
+                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("union x;",
+                      cxxRecordDecl(hasDefinition())));
+}
+
+TEST(IsScopedEnum, MatchesScopedEnum) {
+  EXPECT_TRUE(matches("enum class X {};", enumDecl(isScoped())));
+  EXPECT_TRUE(notMatches("enum X {};", enumDecl(isScoped())));
+}
+
+TEST(HasTrailingReturn, MatchesTrailingReturn) {
+  EXPECT_TRUE(matches("auto Y() -> int { return 0; }",
+                      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(matches("auto X() -> int;", functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatches("int X() { return 0; }", 
+                      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatches("int X();", functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatchesC("void X();", functionDecl(hasTrailingReturn())));
+}
+
+TEST(HasTrailingReturn, MatchesLambdaTrailingReturn) {
+  EXPECT_TRUE(matches(
+          "auto lambda2 = [](double x, double y) -> double {return x + y;};",
+          functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatches(
+          "auto lambda2 = [](double x, double y) {return x + y;};",
+          functionDecl(hasTrailingReturn())));
+}
+
 } // namespace ast_matchers
 } // namespace clang
diff --git a/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index beb6ed8..59fadad 100644
--- a/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -1590,7 +1590,7 @@
     )));
 }
 
-TEST(ObjCDeclMacher, CoreDecls) {
+TEST(ObjCDeclMatcher, CoreDecls) {
   std::string ObjCString =
     "@protocol Proto "
     "- (void)protoDidThing; "
@@ -1605,6 +1605,9 @@
     "{ id _ivar; } "
     "- (void)anything {} "
     "@end "
+    "@implementation Thing (ABC) "
+    "- (void)abc_doThing {} "
+    "@end "
     ;
 
   EXPECT_TRUE(matchesObjC(
@@ -1618,6 +1621,9 @@
     objcCategoryDecl(hasName("ABC"))));
   EXPECT_TRUE(matchesObjC(
     ObjCString,
+    objcCategoryImplDecl(hasName("ABC"))));
+  EXPECT_TRUE(matchesObjC(
+    ObjCString,
     objcMethodDecl(hasName("protoDidThing"))));
   EXPECT_TRUE(matchesObjC(
     ObjCString,
@@ -1633,5 +1639,28 @@
     objcPropertyDecl(hasName("enabled"))));
 }
 
+TEST(ObjCStmtMatcher, ExceptionStmts) {
+  std::string ObjCString =
+    "void f(id obj) {"
+    "  @try {"
+    "    @throw obj;"
+    "  } @catch (...) {"
+    "  } @finally {}"
+    "}";
+
+  EXPECT_TRUE(matchesObjC(
+    ObjCString,
+    objcTryStmt()));
+  EXPECT_TRUE(matchesObjC(
+    ObjCString,
+    objcThrowStmt()));
+  EXPECT_TRUE(matchesObjC(
+    ObjCString,
+    objcCatchStmt()));
+  EXPECT_TRUE(matchesObjC(
+    ObjCString,
+    objcFinallyStmt()));
+}
+
 } // namespace ast_matchers
 } // namespace clang
diff --git a/unittests/ASTMatchers/CMakeLists.txt b/unittests/ASTMatchers/CMakeLists.txt
index 5633031..a876fc2 100644
--- a/unittests/ASTMatchers/CMakeLists.txt
+++ b/unittests/ASTMatchers/CMakeLists.txt
@@ -18,6 +18,7 @@
   ASTMatchersTraversalTest.cpp)
 
 target_link_libraries(ASTMatchersTests
+  PRIVATE
   clangAST
   clangASTMatchers
   clangBasic
diff --git a/unittests/ASTMatchers/Dynamic/CMakeLists.txt b/unittests/ASTMatchers/Dynamic/CMakeLists.txt
index 506a655..848a820 100644
--- a/unittests/ASTMatchers/Dynamic/CMakeLists.txt
+++ b/unittests/ASTMatchers/Dynamic/CMakeLists.txt
@@ -8,6 +8,7 @@
   RegistryTest.cpp)
 
 target_link_libraries(DynamicASTMatchersTests
+  PRIVATE
   clangAST
   clangASTMatchers
   clangBasic
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index 62db8f6..0056f82 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -8,6 +8,7 @@
   )
 
 target_link_libraries(ClangAnalysisTests
+  PRIVATE
   clangAnalysis
   clangAST
   clangASTMatchers
diff --git a/unittests/Basic/CMakeLists.txt b/unittests/Basic/CMakeLists.txt
index 3a9f34f..b46c067 100644
--- a/unittests/Basic/CMakeLists.txt
+++ b/unittests/Basic/CMakeLists.txt
@@ -12,6 +12,7 @@
   )
 
 target_link_libraries(BasicTests
+  PRIVATE
   clangBasic
   clangLex
   )
diff --git a/unittests/Basic/VirtualFileSystemTest.cpp b/unittests/Basic/VirtualFileSystemTest.cpp
index 40add21..f9efbea 100644
--- a/unittests/Basic/VirtualFileSystemTest.cpp
+++ b/unittests/Basic/VirtualFileSystemTest.cpp
@@ -760,6 +760,88 @@
                       NormalizedFS.getCurrentWorkingDirectory().get()));
 }
 
+TEST_F(InMemoryFileSystemTest, AddFileWithUser) {
+  FS.addFile("/a/b/c", 0, MemoryBuffer::getMemBuffer("abc"), 0xFEEDFACE);
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_EQ(0xFEEDFACE, Stat->getUser());
+  Stat = FS.status("/a/b");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_EQ(0xFEEDFACE, Stat->getUser());
+  Stat = FS.status("/a/b/c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isRegularFile());
+  ASSERT_EQ(sys::fs::perms::all_all, Stat->getPermissions());
+  ASSERT_EQ(0xFEEDFACE, Stat->getUser());
+}
+
+TEST_F(InMemoryFileSystemTest, AddFileWithGroup) {
+  FS.addFile("/a/b/c", 0, MemoryBuffer::getMemBuffer("abc"), None, 0xDABBAD00);
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_EQ(0xDABBAD00, Stat->getGroup());
+  Stat = FS.status("/a/b");
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_EQ(0xDABBAD00, Stat->getGroup());
+  Stat = FS.status("/a/b/c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isRegularFile());
+  ASSERT_EQ(sys::fs::perms::all_all, Stat->getPermissions());
+  ASSERT_EQ(0xDABBAD00, Stat->getGroup());
+}
+
+TEST_F(InMemoryFileSystemTest, AddFileWithFileType) {
+  FS.addFile("/a/b/c", 0, MemoryBuffer::getMemBuffer("abc"), None, None,
+             sys::fs::file_type::socket_file);
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  Stat = FS.status("/a/b");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  Stat = FS.status("/a/b/c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_EQ(sys::fs::file_type::socket_file, Stat->getType());
+  ASSERT_EQ(sys::fs::perms::all_all, Stat->getPermissions());
+}
+
+TEST_F(InMemoryFileSystemTest, AddFileWithPerms) {
+  FS.addFile("/a/b/c", 0, MemoryBuffer::getMemBuffer("abc"), None, None,
+             None, sys::fs::perms::owner_read | sys::fs::perms::owner_write);
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_EQ(sys::fs::perms::owner_read | sys::fs::perms::owner_write |
+            sys::fs::perms::owner_exe, Stat->getPermissions());
+  Stat = FS.status("/a/b");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  ASSERT_EQ(sys::fs::perms::owner_read | sys::fs::perms::owner_write |
+            sys::fs::perms::owner_exe, Stat->getPermissions());
+  Stat = FS.status("/a/b/c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isRegularFile());
+  ASSERT_EQ(sys::fs::perms::owner_read | sys::fs::perms::owner_write,
+            Stat->getPermissions());
+}
+
+TEST_F(InMemoryFileSystemTest, AddDirectoryThenAddChild) {
+  FS.addFile("/a", 0, MemoryBuffer::getMemBuffer(""), /*User=*/None,
+             /*Group=*/None, sys::fs::file_type::directory_file);
+  FS.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("abc"), /*User=*/None,
+             /*Group=*/None, sys::fs::file_type::regular_file);
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isDirectory());
+  Stat = FS.status("/a/b");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_TRUE(Stat->isRegularFile());
+}
+
 // NOTE: in the tests below, we use '//root/' as our root directory, since it is
 // a legal *absolute* path on Windows as well as *nix.
 class VFSFromYAMLTest : public ::testing::Test {
diff --git a/unittests/CodeGen/CMakeLists.txt b/unittests/CodeGen/CMakeLists.txt
index c49776b..856dbce 100644
--- a/unittests/CodeGen/CMakeLists.txt
+++ b/unittests/CodeGen/CMakeLists.txt
@@ -7,9 +7,11 @@
   BufferSourceTest.cpp
   CodeGenExternalTest.cpp
   IncrementalProcessingTest.cpp
+  TBAAMetadataTest.cpp
   )
 
 target_link_libraries(ClangCodeGenTests
+  PRIVATE
   clangAST
   clangBasic
   clangCodeGen
diff --git a/unittests/CodeGen/IRMatchers.h b/unittests/CodeGen/IRMatchers.h
new file mode 100644
index 0000000..5150ca4
--- /dev/null
+++ b/unittests/CodeGen/IRMatchers.h
@@ -0,0 +1,453 @@
+//=== unittests/CodeGen/IRMatchers.h - Match on the LLVM IR -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides a simple mechanism for performing search operations over
+/// IR including metadata and types. It allows writing complex search patterns
+/// using understandable syntax. For instance, the code:
+///
+/// \code
+///       const BasicBlock *BB = ...
+///       const Instruction *I = match(BB,
+///           MInstruction(Instruction::Store,
+///               MConstInt(4, 8),
+///               MMTuple(
+///                   MMTuple(
+///                       MMString("omnipotent char"),
+///                       MMTuple(
+///                           MMString("Simple C/C++ TBAA")),
+///                       MConstInt(0, 64)),
+///                   MSameAs(0),
+///                   MConstInt(0))));
+/// \endcode
+///
+/// searches the basic block BB for the 'store' instruction, first argument of
+/// which is 'i8 4', and the attached metadata has an item described by the
+/// given tree.
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_UNITTESTS_CODEGEN_IRMATCHERS_H
+#define CLANG_UNITTESTS_CODEGEN_IRMATCHERS_H
+
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+/// Keeps information about pending match queries.
+///
+/// This class stores state of all unfinished match actions. It allows to
+/// use queries like "this operand is the same as n-th operand", which are
+/// hard to implement otherwise.
+///
+class MatcherContext {
+public:
+
+  /// Describes pending match query.
+  ///
+  /// The query is represented by the current entity being investigated (type,
+  /// value or metadata). If the entity is a member of a list (like arguments),
+  /// the query also keeps the entity number in that list.
+  ///
+  class Query {
+    PointerUnion3<const Value *, const Metadata *, const Type *> Entity;
+    unsigned OperandNo;
+
+  public:
+    Query(const Value *V, unsigned N) : Entity(V), OperandNo(N) {}
+    Query(const Metadata *M, unsigned N) : Entity(M), OperandNo(N) {}
+    Query(const Type *T, unsigned N) : Entity(T), OperandNo(N) {}
+
+    template<typename T>
+    const T *get() const {
+      return Entity.dyn_cast<const T *>();
+    }
+
+    unsigned getOperandNo() const { return OperandNo; }
+  };
+
+  template<typename T>
+  void push(const T *V, unsigned N = ~0) {
+    MatchStack.push_back(Query(V, N));
+  }
+
+  void pop() { MatchStack.pop_back(); }
+
+  template<typename T>
+  const T *top() const { return MatchStack.back().get<T>(); }
+
+  size_t size() const { return MatchStack.size(); }
+
+  unsigned getOperandNo() const { return MatchStack.back().getOperandNo(); }
+
+  /// Returns match query at the given offset from the top of queries.
+  ///
+  /// Offset 0 corresponds to the topmost query.
+  ///
+  const Query &getQuery(unsigned Offset) const {
+    assert(MatchStack.size() > Offset);
+    return MatchStack[MatchStack.size() - 1 - Offset];
+  }
+
+private:
+  SmallVector<Query, 8> MatchStack;
+};
+
+
+/// Base of all matcher classes.
+///
+class Matcher {
+public:
+  virtual ~Matcher() {}
+
+  /// Returns true if the entity on the top of the specified context satisfies
+  /// the matcher condition.
+  ///
+  virtual bool match(MatcherContext &MC) = 0;
+};
+
+
+/// Base class of matchers that test particular entity.
+///
+template<typename T>
+class EntityMatcher : public Matcher {
+public:
+  bool match(MatcherContext &MC) override {
+    if (auto V = MC.top<T>())
+      return matchEntity(*V, MC);
+    return false;
+  }
+  virtual bool matchEntity(const T &M, MatcherContext &C) = 0;
+};
+
+
+/// Matcher that matches any entity of the specified kind.
+///
+template<typename T>
+class AnyMatcher : public EntityMatcher<T> {
+public:
+  bool matchEntity(const T &M, MatcherContext &C) override { return true; }
+};
+
+
+/// Matcher that tests if the current entity satisfies the specified
+/// condition.
+///
+template<typename T>
+class CondMatcher : public EntityMatcher<T> {
+  std::function<bool(const T &)> Condition;
+public:
+  CondMatcher(std::function<bool(const T &)> C) : Condition(C) {}
+  bool matchEntity(const T &V, MatcherContext &C) override {
+    return Condition(V);
+  }
+};
+
+
+/// Matcher that save pointer to the entity that satisfies condition of the
+// specified matcher.
+///
+template<typename T>
+class SavingMatcher : public EntityMatcher<T> {
+  const T *&Var;
+  std::shared_ptr<Matcher> Next;
+public:
+  SavingMatcher(const T *&V, std::shared_ptr<Matcher> N) : Var(V), Next(N) {}
+  bool matchEntity(const T &V, MatcherContext &C) override {
+    bool Result = Next->match(C);
+    if (Result)
+      Var = &V;
+    return Result;
+  }
+};
+
+
+/// Matcher that checks that the entity is identical to another entity in the
+/// same container.
+///
+class SameAsMatcher : public Matcher {
+  unsigned OpNo;
+public:
+  SameAsMatcher(unsigned N) : OpNo(N) {}
+  bool match(MatcherContext &C) override {
+    if (C.getOperandNo() != ~0U) {
+      // Handle all known containers here.
+      const MatcherContext::Query &StackRec = C.getQuery(1);
+      if (const Metadata *MR = StackRec.get<Metadata>()) {
+        if (const auto *MT = dyn_cast<MDTuple>(MR)) {
+          if (OpNo < MT->getNumOperands())
+            return C.top<Metadata>() == MT->getOperand(OpNo).get();
+          return false;
+        }
+        llvm_unreachable("Unknown metadata container");
+      }
+      if (const Value *VR = StackRec.get<Value>()) {
+        if (const auto *Insn = dyn_cast<Instruction>(VR)) {
+          if (OpNo < Insn->getNumOperands())
+            return C.top<Value>() == Insn->getOperand(OpNo);
+          return false;
+        }
+        llvm_unreachable("Unknown value container");
+      }
+      llvm_unreachable("Unknown type container");
+    }
+    return false;
+  }
+};
+
+
+/// Matcher that tests if the entity is a constant integer.
+///
+class ConstantIntMatcher : public Matcher {
+  uint64_t IntValue;
+  unsigned Width;
+public:
+  ConstantIntMatcher(uint64_t V, unsigned W = 0) : IntValue(V), Width(W) {}
+  bool match(MatcherContext &Ctx) override {
+    if (const Value *V = Ctx.top<Value>()) {
+      if (const auto *CI = dyn_cast<ConstantInt>(V))
+        return (Width == 0 || CI->getBitWidth() == Width) &&
+               CI->getLimitedValue() == IntValue;
+    }
+    if (const Metadata *M = Ctx.top<Metadata>()) {
+      if (const auto *MT = dyn_cast<ValueAsMetadata>(M))
+        if (const auto *C = dyn_cast<ConstantInt>(MT->getValue()))
+          return (Width == 0 || C->getBitWidth() == Width) &&
+                 C->getLimitedValue() == IntValue;
+    }
+    return false;
+  }
+};
+
+
+/// Value matcher tuned to test instructions.
+///
+class InstructionMatcher : public EntityMatcher<Value> {
+  SmallVector<std::shared_ptr<Matcher>, 8> OperandMatchers;
+  std::shared_ptr<EntityMatcher<Metadata>> MetaMatcher = nullptr;
+  unsigned Code;
+public:
+  InstructionMatcher(unsigned C) : Code(C) {}
+
+  void push(std::shared_ptr<EntityMatcher<Metadata>> M) {
+    assert(!MetaMatcher && "Only one metadata matcher may be specified");
+    MetaMatcher = M;
+  }
+  void push(std::shared_ptr<Matcher> V) { OperandMatchers.push_back(V); }
+  template<typename... Args>
+  void push(std::shared_ptr<Matcher> V, Args... A) {
+    push(V);
+    push(A...);
+  }
+
+  virtual bool matchInstruction(const Instruction &I) {
+    return I.getOpcode() == Code;
+  }
+
+  bool matchEntity(const Value &V, MatcherContext &C) override {
+    if (const auto *I = dyn_cast<Instruction>(&V)) {
+      if (!matchInstruction(*I))
+        return false;
+      if (OperandMatchers.size() > I->getNumOperands())
+        return false;
+      for (unsigned N = 0, E = OperandMatchers.size(); N != E; ++N) {
+        C.push(I->getOperand(N), N);
+        if (!OperandMatchers[N]->match(C)) {
+          C.pop();
+          return false;
+        }
+        C.pop();
+      }
+      if (MetaMatcher) {
+        SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+        I->getAllMetadata(MDs);
+        bool Found = false;
+        for (auto Item : MDs) {
+          C.push(Item.second);
+          if (MetaMatcher->match(C)) {
+            Found = true;
+            C.pop();
+            break;
+          }
+          C.pop();
+        }
+        return Found;
+      }
+      return true;
+    }
+    return false;
+  }
+};
+
+
+/// Matcher that tests type of the current value using the specified
+/// type matcher.
+///
+class ValueTypeMatcher : public EntityMatcher<Value> {
+  std::shared_ptr<EntityMatcher<Type>> TyM;
+public:
+  ValueTypeMatcher(std::shared_ptr<EntityMatcher<Type>> T) : TyM(T) {}
+  ValueTypeMatcher(const Type *T)
+    : TyM(new CondMatcher<Type>([T](const Type &Ty) -> bool {
+                                  return &Ty == T;
+                                })) {}
+  bool matchEntity(const Value &V, MatcherContext &Ctx) override {
+    Type *Ty = V.getType();
+    Ctx.push(Ty);
+    bool Res = TyM->match(Ctx);
+    Ctx.pop();
+    return Res;
+  }
+};
+
+
+/// Matcher that matches string metadata.
+///
+class NameMetaMatcher : public EntityMatcher<Metadata> {
+  StringRef Name;
+public:
+  NameMetaMatcher(StringRef N) : Name(N) {}
+  bool matchEntity(const Metadata &M, MatcherContext &C) override {
+    if (auto *MDS = dyn_cast<MDString>(&M))
+      return MDS->getString().equals(Name);
+    return false;
+  }
+};
+
+
+/// Matcher that matches metadata tuples.
+///
+class MTupleMatcher : public EntityMatcher<Metadata> {
+  SmallVector<std::shared_ptr<Matcher>, 4> Operands;
+public:
+  void push(std::shared_ptr<Matcher> M) { Operands.push_back(M); }
+  template<typename... Args>
+  void push(std::shared_ptr<Matcher> M, Args... A) {
+    push(M);
+    push(A...);
+  }
+  bool matchEntity(const Metadata &M, MatcherContext &C) override {
+    if (const auto *MT = dyn_cast<MDTuple>(&M)) {
+      if (MT->getNumOperands() != Operands.size())
+        return false;
+      for (unsigned I = 0, E = MT->getNumOperands(); I != E; ++I) {
+        const MDOperand &Op = MT->getOperand(I);
+        C.push(Op.get(), I);
+        if (!Operands[I]->match(C)) {
+          C.pop();
+          return false;
+        }
+        C.pop();
+      }
+      return true;
+    }
+    return false;
+  }
+};
+
+
+// Helper function used to construct matchers.
+
+std::shared_ptr<Matcher> MSameAs(unsigned N) {
+  return std::shared_ptr<Matcher>(new SameAsMatcher(N));
+}
+
+template<typename... T>
+std::shared_ptr<InstructionMatcher> MInstruction(unsigned C, T... Args) {
+  auto Result = new InstructionMatcher(C);
+  Result->push(Args...);
+  return std::shared_ptr<InstructionMatcher>(Result);
+}
+
+std::shared_ptr<Matcher> MConstInt(uint64_t V, unsigned W = 0) {
+  return std::shared_ptr<Matcher>(new ConstantIntMatcher(V, W));
+}
+
+std::shared_ptr<EntityMatcher<Value>>
+ MValType(std::shared_ptr<EntityMatcher<Type>> T) {
+  return std::shared_ptr<EntityMatcher<Value>>(new ValueTypeMatcher(T));
+}
+
+std::shared_ptr<EntityMatcher<Value>> MValType(const Type *T) {
+  return std::shared_ptr<EntityMatcher<Value>>(new ValueTypeMatcher(T));
+}
+
+std::shared_ptr<EntityMatcher<Type>>
+MType(std::function<bool(const Type &)> C) {
+  return std::shared_ptr<EntityMatcher<Type>>(new CondMatcher<Type>(C));
+}
+
+std::shared_ptr<EntityMatcher<Metadata>> MMAny() {
+  return std::shared_ptr<EntityMatcher<Metadata>>(new AnyMatcher<Metadata>);
+}
+
+std::shared_ptr<EntityMatcher<Metadata>>
+MMSave(const Metadata *&V, std::shared_ptr<EntityMatcher<Metadata>> M) {
+  return std::shared_ptr<EntityMatcher<Metadata>>(
+      new SavingMatcher<Metadata>(V, M));
+}
+
+std::shared_ptr<EntityMatcher<Metadata>>
+MMString(const char *Name) {
+  return std::shared_ptr<EntityMatcher<Metadata>>(new NameMetaMatcher(Name));
+}
+
+template<typename... T>
+std::shared_ptr<EntityMatcher<Metadata>> MMTuple(T... Args) {
+  auto Res = new MTupleMatcher();
+  Res->push(Args...);
+  return std::shared_ptr<EntityMatcher<Metadata>>(Res);
+}
+
+
+/// Looks for the instruction that satisfies condition of the specified
+/// matcher inside the given basic block.
+/// \returns Pointer to the found instruction or nullptr if such instruction
+///          was not found.
+///
+const Instruction *match(const BasicBlock *BB, std::shared_ptr<Matcher> M) {
+  MatcherContext MC;
+  for (const auto &I : *BB) {
+    MC.push(&I);
+    if (M->match(MC))
+      return &I;
+    MC.pop();
+  }
+  assert(MC.size() == 0);
+  return nullptr;
+}
+
+
+/// Looks for the instruction that satisfies condition of the specified
+/// matcher starting from the specified instruction inside the same basic block.
+///
+/// The given instruction is not checked.
+///
+const Instruction *matchNext(const Instruction *I, std::shared_ptr<Matcher> M) {
+  if (!I)
+    return nullptr;
+  MatcherContext MC;
+  const BasicBlock *BB = I->getParent();
+  if (!BB)
+    return nullptr;
+  for (auto P = ++BasicBlock::const_iterator(I), E = BB->end(); P != E; ++P) {
+    MC.push(&*P);
+    if (M->match(MC))
+      return &*P;
+    MC.pop();
+  }
+  assert(MC.size() == 0);
+  return nullptr;
+}
+
+}
+#endif
diff --git a/unittests/CodeGen/TBAAMetadataTest.cpp b/unittests/CodeGen/TBAAMetadataTest.cpp
new file mode 100644
index 0000000..7514160
--- /dev/null
+++ b/unittests/CodeGen/TBAAMetadataTest.cpp
@@ -0,0 +1,1299 @@
+//=== unittests/CodeGen/TBAAMetadataTest.cpp - Checks metadata generation -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IRMatchers.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/CodeGen/ModuleBuilder.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Parse/ParseAST.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+struct TestCompiler {
+  LLVMContext Context;
+  clang::CompilerInstance compiler;
+  clang::CodeGenerator *CG = nullptr;
+  llvm::Module *M = nullptr;
+  unsigned PtrSize = 0;
+
+  void init(const char *TestProgram) {
+    compiler.createDiagnostics();
+    compiler.getCodeGenOpts().StructPathTBAA = 1;
+    compiler.getCodeGenOpts().OptimizationLevel = 1;
+
+    std::string TrStr = llvm::Triple::normalize(llvm::sys::getProcessTriple());
+    llvm::Triple Tr(TrStr);
+    Tr.setOS(Triple::Linux);
+    Tr.setVendor(Triple::VendorType::UnknownVendor);
+    Tr.setEnvironment(Triple::EnvironmentType::UnknownEnvironment);
+    compiler.getTargetOpts().Triple = Tr.getTriple();
+    compiler.setTarget(clang::TargetInfo::CreateTargetInfo(
+        compiler.getDiagnostics(),
+        std::make_shared<clang::TargetOptions>(compiler.getTargetOpts())));
+
+    const clang::TargetInfo &TInfo = compiler.getTarget();
+    PtrSize = TInfo.getPointerWidth(0) / 8;
+
+    compiler.createFileManager();
+    compiler.createSourceManager(compiler.getFileManager());
+    compiler.createPreprocessor(clang::TU_Prefix);
+
+    compiler.createASTContext();
+
+    CG = CreateLLVMCodeGen(
+        compiler.getDiagnostics(),
+        "main-module",
+        compiler.getHeaderSearchOpts(),
+        compiler.getPreprocessorOpts(),
+        compiler.getCodeGenOpts(),
+        Context);
+    compiler.setASTConsumer(std::unique_ptr<clang::ASTConsumer>(CG));
+
+    compiler.createSema(clang::TU_Prefix, nullptr);
+
+    clang::SourceManager &sm = compiler.getSourceManager();
+    sm.setMainFileID(sm.createFileID(
+        llvm::MemoryBuffer::getMemBuffer(TestProgram), clang::SrcMgr::C_User));
+  }
+
+  const BasicBlock *compile() {
+    clang::ParseAST(compiler.getSema(), false, false);
+    M = CG->GetModule();
+
+    // Do not expect more than one function definition.
+    auto FuncPtr = M->begin();
+    for (; FuncPtr != M->end(); ++FuncPtr)
+      if (!FuncPtr->isDeclaration())
+        break;
+    assert(FuncPtr != M->end());
+    const llvm::Function &Func = *FuncPtr;
+    ++FuncPtr;
+    for (; FuncPtr != M->end(); ++FuncPtr)
+      if (!FuncPtr->isDeclaration())
+        break;
+    assert(FuncPtr == M->end());
+
+    // The function must consist of single basic block.
+    auto BBPtr = Func.begin();
+    assert(Func.begin() != Func.end());
+    const BasicBlock &BB = *BBPtr;
+    ++BBPtr;
+    assert(BBPtr == Func.end());
+
+    return &BB;
+  }
+};
+
+
+auto OmnipotentCharC = MMTuple(
+  MMString("omnipotent char"),
+  MMTuple(
+    MMString("Simple C/C++ TBAA")),
+  MConstInt(0, 64)
+);
+
+
+auto OmnipotentCharCXX = MMTuple(
+  MMString("omnipotent char"),
+  MMTuple(
+    MMString("Simple C++ TBAA")),
+  MConstInt(0, 64)
+);
+
+
+TEST(TBAAMetadataTest, BasicTypes) {
+  const char TestProgram[] = R"**(
+    void func(char *CP, short *SP, int *IP, long long *LP, void **VPP,
+              int **IPP) {
+      *CP = 4;
+      *SP = 11;
+      *IP = 601;
+      *LP = 604;
+      *VPP = CP;
+      *IPP = IP;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().C11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 8),
+        MMTuple(
+          OmnipotentCharC,
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(601, 32),
+        MMTuple(
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(604, 64),
+        MMTuple(
+          MMTuple(
+            MMString("long long"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MValType(Type::getInt8PtrTy(Compiler.Context)),
+        MMTuple(
+          MMTuple(
+            MMString("any pointer"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MValType(Type::getInt32PtrTy(Compiler.Context)),
+        MMTuple(
+          MMTuple(
+            MMString("any pointer"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, CFields) {
+  const char TestProgram[] = R"**(
+    struct ABC {
+       short f16;
+       int f32;
+       long long f64;
+       unsigned short f16_2;
+       unsigned f32_2;
+       unsigned long long f64_2;
+    };
+
+    void func(struct ABC *A) {
+      A->f32 = 4;
+      A->f16 = 11;
+      A->f64 = 601;
+      A->f16_2 = 22;
+      A->f32_2 = 77;
+      A->f64_2 = 604;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().C11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto StructABC = MMTuple(
+    MMString("ABC"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(4),
+    MMTuple(
+      MMString("long long"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(8),
+    MSameAs(1),
+    MConstInt(16),
+    MSameAs(3),
+    MConstInt(20),
+    MSameAs(5),
+    MConstInt(24));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(601, 64),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("long long"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(8))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(22, 16),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(16))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(20))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(604, 64),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("long long"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(24))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, CTypedefFields) {
+  const char TestProgram[] = R"**(
+    typedef struct {
+       short f16;
+       int f32;
+    } ABC;
+    typedef struct {
+       short value_f16;
+       int value_f32;
+    } CDE;
+
+    void func(ABC *A, CDE *B) {
+      A->f32 = 4;
+      A->f16 = 11;
+      B->value_f32 = 44;
+      B->value_f16 = 111;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().C11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto NamelessStruct = MMTuple(
+    MMString(""),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(4));
+
+  const Metadata *MetaABC = nullptr;
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          MMSave(MetaABC, NamelessStruct),
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          NamelessStruct,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  const Metadata *MetaCDE = nullptr;
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(44, 32),
+        MMTuple(
+          MMSave(MetaCDE, NamelessStruct),
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(111, 16),
+        MMTuple(
+          NamelessStruct,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  // FIXME: Nameless structures used in definitions of 'ABC' and 'CDE' are
+  // different structures and must be described by different descriptors.
+  //ASSERT_TRUE(MetaABC != MetaCDE);
+}
+
+TEST(TBAAMetadataTest, CTypedefFields2) {
+  const char TestProgram[] = R"**(
+    typedef struct {
+       short f16;
+       int f32;
+    } ABC;
+    typedef struct {
+       short f16;
+       int f32;
+    } CDE;
+
+    void func(ABC *A, CDE *B) {
+      A->f32 = 4;
+      A->f16 = 11;
+      B->f32 = 44;
+      B->f16 = 111;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().C11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto NamelessStruct = MMTuple(
+    MMString(""),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(4));
+
+  const Metadata *MetaABC = nullptr;
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          MMSave(MetaABC, NamelessStruct),
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          NamelessStruct,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  const Metadata *MetaCDE = nullptr;
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(44, 32),
+        MMTuple(
+          MMSave(MetaCDE, NamelessStruct),
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(111, 16),
+        MMTuple(
+          NamelessStruct,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  // FIXME: Nameless structures used in definitions of 'ABC' and 'CDE' are
+  // different structures, although they have the same field sequence. They must
+  // be described by different descriptors.
+  //ASSERT_TRUE(MetaABC != MetaCDE);
+}
+
+TEST(TBAAMetadataTest, CTypedefFields3) {
+  const char TestProgram[] = R"**(
+    typedef struct {
+       short f16;
+       int f32;
+    } ABC;
+    typedef struct {
+       int f32;
+       short f16;
+    } CDE;
+
+    void func(ABC *A, CDE *B) {
+      A->f32 = 4;
+      A->f16 = 11;
+      B->f32 = 44;
+      B->f16 = 111;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().C11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto NamelessStruct1 = MMTuple(
+    MMString(""),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(4));
+
+  auto NamelessStruct2 = MMTuple(
+    MMString(""),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharC,
+      MConstInt(0)),
+    MConstInt(4));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          NamelessStruct1,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          NamelessStruct1,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(44, 32),
+        MMTuple(
+          NamelessStruct2,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(111, 16),
+        MMTuple(
+          NamelessStruct2,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharC,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, CXXFields) {
+  const char TestProgram[] = R"**(
+    struct ABC {
+       short f16;
+       int f32;
+       long long f64;
+       unsigned short f16_2;
+       unsigned f32_2;
+       unsigned long long f64_2;
+    };
+
+    void func(struct ABC *A) {
+      A->f32 = 4;
+      A->f16 = 11;
+      A->f64 = 601;
+      A->f16_2 = 22;
+      A->f32_2 = 77;
+      A->f64_2 = 604;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto StructABC = MMTuple(
+    MMString("_ZTS3ABC"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(4),
+    MMTuple(
+      MMString("long long"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(8),
+    MSameAs(1),
+    MConstInt(16),
+    MSameAs(3),
+    MConstInt(20),
+    MSameAs(5),
+    MConstInt(24));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(601, 64),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("long long"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(8))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(22, 16),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(16))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(20))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(604, 64),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("long long"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(24))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, CXXTypedefFields) {
+  const char TestProgram[] = R"**(
+    typedef struct {
+       short f16;
+       int f32;
+    } ABC;
+    typedef struct {
+       short value_f16;
+       int value_f32;
+    } CDE;
+
+    void func(ABC *A, CDE *B) {
+      A->f32 = 4;
+      A->f16 = 11;
+      B->value_f32 = 44;
+      B->value_f16 = 111;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto StructABC = MMTuple(
+    MMString("_ZTS3ABC"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(4));
+
+  auto StructCDE = MMTuple(
+    MMString("_ZTS3CDE"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(4));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(4, 32),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(11, 16),
+        MMTuple(
+          StructABC,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(44, 32),
+        MMTuple(
+          StructCDE,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(111, 16),
+        MMTuple(
+          StructCDE,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, StructureFields) {
+  const char TestProgram[] = R"**(
+    struct Inner {
+      int f32;
+    };
+
+    struct Outer {
+      short f16;
+      Inner b1;
+      Inner b2;
+    };
+
+    void func(Outer *S) {
+      S->f16 = 14;
+      S->b1.f32 = 35;
+      S->b2.f32 = 77;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto StructInner = MMTuple(
+    MMString("_ZTS5Inner"),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0));
+
+  auto StructOuter = MMTuple(
+    MMString("_ZTS5Outer"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    StructInner,
+    MConstInt(4),
+    MSameAs(3),
+    MConstInt(8));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(14, 16),
+        MMTuple(
+          StructOuter,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(35, 32),
+        MMTuple(
+          StructOuter,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          StructOuter,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(8))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, ArrayFields) {
+  const char TestProgram[] = R"**(
+    struct Inner {
+      int f32;
+    };
+
+    struct Outer {
+      short f16;
+      Inner b1[2];
+    };
+
+    void func(Outer *S) {
+      S->f16 = 14;
+      S->b1[0].f32 = 35;
+      S->b1[1].f32 = 77;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto StructInner = MMTuple(
+    MMString("_ZTS5Inner"),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0));
+
+  auto StructOuter = MMTuple(
+    MMString("_ZTS5Outer"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    OmnipotentCharCXX,    // FIXME: Info about array field is lost.
+    MConstInt(4));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(14, 16),
+        MMTuple(
+          StructOuter,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(35, 32),
+        MMTuple(
+          StructInner,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          StructInner,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, BaseClass) {
+  const char TestProgram[] = R"**(
+    struct Base {
+      int f32;
+    };
+
+    struct Derived : public Base {
+      short f16;
+    };
+
+    void func(Base *B, Derived *D) {
+      B->f32 = 14;
+      D->f16 = 35;
+      D->f32 = 77;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto ClassBase = MMTuple(
+    MMString("_ZTS4Base"),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0));
+
+  auto ClassDerived = MMTuple(
+    MMString("_ZTS7Derived"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(4));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(14, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(35, 16),
+        MMTuple(
+          ClassDerived,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, PolymorphicClass) {
+  const char TestProgram[] = R"**(
+    struct Base {
+      virtual void m1(int *) = 0;
+      int f32;
+    };
+
+    struct Derived : public Base {
+      virtual void m1(int *) override;
+      short f16;
+    };
+
+    void func(Base *B, Derived *D) {
+      B->f32 = 14;
+      D->f16 = 35;
+      D->f32 = 77;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto ClassBase = MMTuple(
+    MMString("_ZTS4Base"),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(Compiler.PtrSize));
+
+  auto ClassDerived = MMTuple(
+    MMString("_ZTS7Derived"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(Compiler.PtrSize + 4));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(14, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(Compiler.PtrSize))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(35, 16),
+        MMTuple(
+          ClassDerived,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(Compiler.PtrSize + 4))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(Compiler.PtrSize))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, VirtualBase) {
+  const char TestProgram[] = R"**(
+    struct Base {
+      int f32;
+    };
+
+    struct Derived : public virtual Base {
+      short f16;
+    };
+
+    void func(Base *B, Derived *D) {
+      B->f32 = 14;
+      D->f16 = 35;
+      D->f32 = 77;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto ClassBase = MMTuple(
+    MMString("_ZTS4Base"),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0));
+
+  auto ClassDerived = MMTuple(
+    MMString("_ZTS7Derived"),
+    MMTuple(
+      MMString("short"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(Compiler.PtrSize));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MConstInt(14, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(35, 16),
+        MMTuple(
+          ClassDerived,
+          MMTuple(
+            MMString("short"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(Compiler.PtrSize))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Load,
+        MMTuple(
+          MMTuple(
+            MMString("vtable pointer"),
+            MMTuple(
+              MMString("Simple C++ TBAA")),
+            MConstInt(0)),
+          MSameAs(0),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(77, 32),
+        MMTuple(
+          ClassBase,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+}
+
+TEST(TBAAMetadataTest, TemplSpec) {
+  const char TestProgram[] = R"**(
+    template<typename T1, typename T2>
+    struct ABC {
+      T1 f1;
+      T2 f2;
+    };
+
+    void func(ABC<double, int> *p) {
+      p->f1 = 12.1;
+      p->f2 = 44;
+    }
+  )**";
+
+  TestCompiler Compiler;
+  Compiler.compiler.getLangOpts().CPlusPlus = 1;
+  Compiler.compiler.getLangOpts().CPlusPlus11 = 1;
+  Compiler.init(TestProgram);
+  const BasicBlock *BB = Compiler.compile();
+
+  auto SpecABC = MMTuple(
+    MMString("_ZTS3ABCIdiE"),
+    MMTuple(
+      MMString("double"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(0),
+    MMTuple(
+      MMString("int"),
+      OmnipotentCharCXX,
+      MConstInt(0)),
+    MConstInt(8));
+
+  const Instruction *I = match(BB,
+      MInstruction(Instruction::Store,
+        MValType(MType([](const Type &T)->bool { return T.isDoubleTy(); })),
+        MMTuple(
+          SpecABC,
+          MMTuple(
+            MMString("double"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(0))));
+  ASSERT_TRUE(I);
+
+  I = matchNext(I,
+      MInstruction(Instruction::Store,
+        MConstInt(44, 32),
+        MMTuple(
+          SpecABC,
+          MMTuple(
+            MMString("int"),
+            OmnipotentCharCXX,
+            MConstInt(0)),
+          MConstInt(8))));
+  ASSERT_TRUE(I);
+}
+}
diff --git a/unittests/CrossTU/CMakeLists.txt b/unittests/CrossTU/CMakeLists.txt
index 3c479c4..652d916 100644
--- a/unittests/CrossTU/CMakeLists.txt
+++ b/unittests/CrossTU/CMakeLists.txt
@@ -8,6 +8,7 @@
   )
 
 target_link_libraries(CrossTUTests
+  PRIVATE
   clangAST
   clangBasic
   clangCrossTU
diff --git a/unittests/CrossTU/CrossTranslationUnitTest.cpp b/unittests/CrossTU/CrossTranslationUnitTest.cpp
index 795a435..5fbf56e 100644
--- a/unittests/CrossTU/CrossTranslationUnitTest.cpp
+++ b/unittests/CrossTU/CrossTranslationUnitTest.cpp
@@ -109,9 +109,9 @@
 
 TEST(CrossTranslationUnit, IndexFormatCanBeParsed) {
   llvm::StringMap<std::string> Index;
-  Index["a"] = "b";
-  Index["c"] = "d";
-  Index["e"] = "f";
+  Index["a"] = "/b/f1";
+  Index["c"] = "/d/f2";
+  Index["e"] = "/f/f3";
   std::string IndexText = createCrossTUIndexString(Index);
 
   int IndexFD;
@@ -134,5 +134,25 @@
     EXPECT_TRUE(Index.count(E.getKey()));
 }
 
+TEST(CrossTranslationUnit, CTUDirIsHandledCorrectly) {
+  llvm::StringMap<std::string> Index;
+  Index["a"] = "/b/c/d";
+  std::string IndexText = createCrossTUIndexString(Index);
+
+  int IndexFD;
+  llvm::SmallString<256> IndexFileName;
+  ASSERT_FALSE(llvm::sys::fs::createTemporaryFile("index", "txt", IndexFD,
+                                                  IndexFileName));
+  llvm::ToolOutputFile IndexFile(IndexFileName, IndexFD);
+  IndexFile.os() << IndexText;
+  IndexFile.os().flush();
+  EXPECT_TRUE(llvm::sys::fs::exists(IndexFileName));
+  llvm::Expected<llvm::StringMap<std::string>> IndexOrErr =
+      parseCrossTUIndex(IndexFileName, "/ctudir");
+  EXPECT_TRUE((bool)IndexOrErr);
+  llvm::StringMap<std::string> ParsedIndex = IndexOrErr.get();
+  EXPECT_EQ(ParsedIndex["a"], "/ctudir/b/c/d");
+}
+
 } // end namespace cross_tu
 } // end namespace clang
diff --git a/unittests/Driver/CMakeLists.txt b/unittests/Driver/CMakeLists.txt
index 2a3f41d..82dc0ff 100644
--- a/unittests/Driver/CMakeLists.txt
+++ b/unittests/Driver/CMakeLists.txt
@@ -7,10 +7,12 @@
 add_clang_unittest(ClangDriverTests
   DistroTest.cpp
   ToolChainTest.cpp
+  ModuleCacheTest.cpp
   MultilibTest.cpp
   )
 
 target_link_libraries(ClangDriverTests
+  PRIVATE
   clangDriver
   clangBasic
   )
diff --git a/unittests/Driver/ModuleCacheTest.cpp b/unittests/Driver/ModuleCacheTest.cpp
new file mode 100644
index 0000000..7340889
--- /dev/null
+++ b/unittests/Driver/ModuleCacheTest.cpp
@@ -0,0 +1,28 @@
+//===- unittests/Driver/ModuleCacheTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unit tests for the LLDB module cache API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Driver/Driver.h"
+#include "gtest/gtest.h"
+using namespace clang;
+using namespace clang::driver;
+
+namespace {
+
+TEST(ModuleCacheTest, GetTargetAndMode) {
+  SmallString<128> Buf;
+  Driver::getDefaultModuleCachePath(Buf);
+  StringRef Path = Buf;
+  EXPECT_TRUE(Path.find("org.llvm.clang") != Path.npos);
+  EXPECT_TRUE(Path.endswith("ModuleCache"));  
+}
+} // end anonymous namespace.
diff --git a/unittests/Format/CMakeLists.txt b/unittests/Format/CMakeLists.txt
index fa7e32c..18e4432 100644
--- a/unittests/Format/CMakeLists.txt
+++ b/unittests/Format/CMakeLists.txt
@@ -10,6 +10,7 @@
   FormatTestJava.cpp
   FormatTestObjC.cpp
   FormatTestProto.cpp
+  FormatTestRawStrings.cpp
   FormatTestSelective.cpp
   FormatTestTextProto.cpp
   NamespaceEndCommentsFixerTest.cpp
@@ -19,6 +20,7 @@
   )
 
 target_link_libraries(FormatTests
+  PRIVATE
   clangBasic
   clangFormat
   clangFrontend
diff --git a/unittests/Format/CleanupTest.cpp b/unittests/Format/CleanupTest.cpp
index 708fdf8..032cc44 100644
--- a/unittests/Format/CleanupTest.cpp
+++ b/unittests/Format/CleanupTest.cpp
@@ -496,7 +496,7 @@
                      "#include \"y/a.h\"\n"
                      "#include \"z/b.h\"\n";
   // FIXME: inserting after the empty line following the main header might be
-  // prefered.
+  // preferred.
   std::string Expected = "#include \"x/fix.h\"\n"
                          "#include <vector>\n"
                          "\n"
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index dadc025..75667d5 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -70,18 +70,23 @@
     return getStyleWithColumns(getGoogleStyle(), ColumnLimit);
   }
 
-  void verifyFormat(llvm::StringRef Code,
+  void verifyFormat(llvm::StringRef Expected, llvm::StringRef Code,
                     const FormatStyle &Style = getLLVMStyle()) {
-    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
+    EXPECT_EQ(Expected.str(), format(Code, Style));
     if (Style.Language == FormatStyle::LK_Cpp) {
       // Objective-C++ is a superset of C++, so everything checked for C++
       // needs to be checked for Objective-C++ as well.
       FormatStyle ObjCStyle = Style;
       ObjCStyle.Language = FormatStyle::LK_ObjC;
-      EXPECT_EQ(Code.str(), format(test::messUp(Code), ObjCStyle));
+      EXPECT_EQ(Expected.str(), format(test::messUp(Code), ObjCStyle));
     }
   }
 
+  void verifyFormat(llvm::StringRef Code,
+                    const FormatStyle &Style = getLLVMStyle()) {
+    verifyFormat(Code, test::messUp(Code), Style);
+  }
+
   void verifyIncompleteFormat(llvm::StringRef Code,
                               const FormatStyle &Style = getLLVMStyle()) {
     EXPECT_EQ(Code.str(),
@@ -583,6 +588,23 @@
                AllowSimpleBracedStatements);
 }
 
+TEST_F(FormatTest, ShortBlocksInMacrosDontMergeWithCodeAfterMacro) {
+  FormatStyle Style = getLLVMStyleWithColumns(60);
+  Style.AllowShortBlocksOnASingleLine = true;
+  Style.AllowShortIfStatementsOnASingleLine = true;
+  Style.BreakBeforeBraces = FormatStyle::BS_Allman;
+  EXPECT_EQ("#define A                                                  \\\n"
+            "  if (HANDLEwernufrnuLwrmviferuvnierv)                     \\\n"
+            "  { RET_ERR1_ANUIREUINERUIFNIOAerwfwrvnuier; }\n"
+            "X;",
+            format("#define A \\\n"
+                   "   if (HANDLEwernufrnuLwrmviferuvnierv) { \\\n"
+                   "      RET_ERR1_ANUIREUINERUIFNIOAerwfwrvnuier; \\\n"
+                   "   }\n"
+                   "X;",
+                   Style));
+}
+
 TEST_F(FormatTest, ParseIfElse) {
   verifyFormat("if (true)\n"
                "  if (true)\n"
@@ -720,6 +742,12 @@
                "     aaaaaaaaaaaaaaaaaaaaaaaaaaa != bbbbbbbbbbbbbbbbbbbbbbb;\n"
                "     ++aaaaaaaaaaaaaaaaaaaaaaaaaaa) {");
 
+  // These should not be formatted as Objective-C for-in loops.
+  verifyFormat("for (Foo *x = 0; x != in; x++) {\n}");
+  verifyFormat("Foo *x;\nfor (x = 0; x != in; x++) {\n}");
+  verifyFormat("Foo *x;\nfor (x in y) {\n}");
+  verifyFormat("for (const Foo<Bar> &baz = in.value(); !baz.at_end(); ++baz) {\n}");
+
   FormatStyle NoBinPacking = getLLVMStyle();
   NoBinPacking.BinPackParameters = false;
   verifyFormat("for (int aaaaaaaaaaa = 1;\n"
@@ -2479,8 +2507,6 @@
                "#endif",
                Style);
   // Test with include guards.
-  // EXPECT_EQ is used because verifyFormat() calls messUp() which incorrectly
-  // merges lines.
   verifyFormat("#ifndef HEADER_H\n"
                "#define HEADER_H\n"
                "code();\n"
@@ -2527,6 +2553,20 @@
                "#elif FOO\n"
                "#endif",
                Style);
+  // Non-identifier #define after potential include guard.
+  verifyFormat("#ifndef FOO\n"
+               "#  define 1\n"
+               "#endif\n",
+               Style);
+  // #if closes past last non-preprocessor line.
+  verifyFormat("#ifndef FOO\n"
+               "#define FOO\n"
+               "#if 1\n"
+               "int i;\n"
+               "#  define A 0\n"
+               "#endif\n"
+               "#endif\n",
+               Style);
   // FIXME: This doesn't handle the case where there's code between the
   // #ifndef and #define but all other conditions hold. This is because when
   // the #define line is parsed, UnwrappedLineParser::Lines doesn't hold the
@@ -2575,21 +2615,85 @@
                    "code();\n"
                    "#endif",
                    Style));
-  // FIXME: The comment indent corrector in TokenAnnotator gets thrown off by
-  // preprocessor indentation.
-  EXPECT_EQ("#if 1\n"
-            "  // comment\n"
-            "#  define A 0\n"
-            "// comment\n"
-            "#  define B 0\n"
-            "#endif",
-            format("#if 1\n"
-                   "// comment\n"
-                   "#  define A 0\n"
-                   "   // comment\n"
-                   "#  define B 0\n"
-                   "#endif",
-                   Style));
+  // Keep comments aligned with #, otherwise indent comments normally. These
+  // tests cannot use verifyFormat because messUp manipulates leading
+  // whitespace.
+  {
+    const char *Expected = ""
+                           "void f() {\n"
+                           "#if 1\n"
+                           "// Preprocessor aligned.\n"
+                           "#  define A 0\n"
+                           "  // Code. Separated by blank line.\n"
+                           "\n"
+                           "#  define B 0\n"
+                           "  // Code. Not aligned with #\n"
+                           "#  define C 0\n"
+                           "#endif";
+    const char *ToFormat = ""
+                           "void f() {\n"
+                           "#if 1\n"
+                           "// Preprocessor aligned.\n"
+                           "#  define A 0\n"
+                           "// Code. Separated by blank line.\n"
+                           "\n"
+                           "#  define B 0\n"
+                           "   // Code. Not aligned with #\n"
+                           "#  define C 0\n"
+                           "#endif";
+    EXPECT_EQ(Expected, format(ToFormat, Style));
+    EXPECT_EQ(Expected, format(Expected, Style));
+  }
+  // Keep block quotes aligned.
+  {
+    const char *Expected = ""
+                           "void f() {\n"
+                           "#if 1\n"
+                           "/* Preprocessor aligned. */\n"
+                           "#  define A 0\n"
+                           "  /* Code. Separated by blank line. */\n"
+                           "\n"
+                           "#  define B 0\n"
+                           "  /* Code. Not aligned with # */\n"
+                           "#  define C 0\n"
+                           "#endif";
+    const char *ToFormat = ""
+                           "void f() {\n"
+                           "#if 1\n"
+                           "/* Preprocessor aligned. */\n"
+                           "#  define A 0\n"
+                           "/* Code. Separated by blank line. */\n"
+                           "\n"
+                           "#  define B 0\n"
+                           "   /* Code. Not aligned with # */\n"
+                           "#  define C 0\n"
+                           "#endif";
+    EXPECT_EQ(Expected, format(ToFormat, Style));
+    EXPECT_EQ(Expected, format(Expected, Style));
+  }
+  // Keep comments aligned with un-indented directives.
+  {
+    const char *Expected = ""
+                           "void f() {\n"
+                           "// Preprocessor aligned.\n"
+                           "#define A 0\n"
+                           "  // Code. Separated by blank line.\n"
+                           "\n"
+                           "#define B 0\n"
+                           "  // Code. Not aligned with #\n"
+                           "#define C 0\n";
+    const char *ToFormat = ""
+                           "void f() {\n"
+                           "// Preprocessor aligned.\n"
+                           "#define A 0\n"
+                           "// Code. Separated by blank line.\n"
+                           "\n"
+                           "#define B 0\n"
+                           "   // Code. Not aligned with #\n"
+                           "#define C 0\n";
+    EXPECT_EQ(Expected, format(ToFormat, Style));
+    EXPECT_EQ(Expected, format(Expected, Style));
+  }
   // Test with tabs.
   Style.UseTab = FormatStyle::UT_Always;
   Style.IndentWidth = 8;
@@ -2629,14 +2733,39 @@
 }
 
 TEST_F(FormatTest, EscapedNewlines) {
-  EXPECT_EQ(
-      "#define A \\\n  int i;  \\\n  int j;",
-      format("#define A \\\nint i;\\\n  int j;", getLLVMStyleWithColumns(11)));
+  FormatStyle Narrow = getLLVMStyleWithColumns(11);
+  EXPECT_EQ("#define A \\\n  int i;  \\\n  int j;",
+            format("#define A \\\nint i;\\\n  int j;", Narrow));
   EXPECT_EQ("#define A\n\nint i;", format("#define A \\\n\n int i;"));
   EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
   EXPECT_EQ("/* \\  \\  \\\n */", format("\\\n/* \\  \\  \\\n */"));
   EXPECT_EQ("<a\n\\\\\n>", format("<a\n\\\\\n>"));
 
+  FormatStyle AlignLeft = getLLVMStyle();
+  AlignLeft.AlignEscapedNewlines = FormatStyle::ENAS_Left;
+  EXPECT_EQ("#define MACRO(x) \\\n"
+            "private:         \\\n"
+            "  int x(int a);\n",
+            format("#define MACRO(x) \\\n"
+                   "private:         \\\n"
+                   "  int x(int a);\n",
+                   AlignLeft));
+
+  // CRLF line endings
+  EXPECT_EQ("#define A \\\r\n  int i;  \\\r\n  int j;",
+            format("#define A \\\r\nint i;\\\r\n  int j;", Narrow));
+  EXPECT_EQ("#define A\r\n\r\nint i;", format("#define A \\\r\n\r\n int i;"));
+  EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
+  EXPECT_EQ("/* \\  \\  \\\r\n */", format("\\\r\n/* \\  \\  \\\r\n */"));
+  EXPECT_EQ("<a\r\n\\\\\r\n>", format("<a\r\n\\\\\r\n>"));
+  EXPECT_EQ("#define MACRO(x) \\\r\n"
+            "private:         \\\r\n"
+            "  int x(int a);\r\n",
+            format("#define MACRO(x) \\\r\n"
+                   "private:         \\\r\n"
+                   "  int x(int a);\r\n",
+                   AlignLeft));
+
   FormatStyle DontAlign = getLLVMStyle();
   DontAlign.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign;
   DontAlign.MaxEmptyLinesToKeep = 3;
@@ -2659,7 +2788,8 @@
                    "\\\n"
                    "  public: \\\n"
                    "    void baz(); \\\n"
-                   "  };", DontAlign));
+                   "  };",
+                   DontAlign));
 }
 
 TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {
@@ -3041,6 +3171,10 @@
       "if (aaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaa(\n"
       "        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) == 5) {\n"
       "}");
+  verifyFormat(
+      "if (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
+      "        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) <=> 5) {\n"
+      "}");
   // Even explicit parentheses stress the precedence enough to make the
   // additional break unnecessary.
   verifyFormat("if ((aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +\n"
@@ -3060,6 +3194,10 @@
                "        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ==\n"
                "    5) {\n"
                "}");
+  verifyFormat("if (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +\n"
+               "        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa <=>\n"
+               "    5) {\n"
+               "}");
 
   FormatStyle OnePerLine = getLLVMStyle();
   OnePerLine.BinPackParameters = false;
@@ -5523,6 +5661,8 @@
   verifyFormat("(a->f())++;");
   verifyFormat("a[42]++;");
   verifyFormat("if (!(a->f())) {\n}");
+  verifyFormat("if (!+i) {\n}");
+  verifyFormat("~&a;");
 
   verifyFormat("a-- > b;");
   verifyFormat("b ? -a : c;");
@@ -5565,6 +5705,7 @@
   verifyFormat("bool operator!=();");
   verifyFormat("int operator+();");
   verifyFormat("int operator++();");
+  verifyFormat("int operator++(int) volatile noexcept;");
   verifyFormat("bool operator,();");
   verifyFormat("bool operator();");
   verifyFormat("bool operator()();");
@@ -6292,7 +6433,8 @@
                "#include_next <test.h>"
                "#include \"abc.h\" // this is included for ABC\n"
                "#include \"some long include\" // with a comment\n"
-               "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"",
+               "#include \"some very long include path\"\n"
+               "#include <some/very/long/include/path>\n",
                getLLVMStyleWithColumns(35));
   EXPECT_EQ("#include \"a.h\"", format("#include  \"a.h\""));
   EXPECT_EQ("#include <a>", format("#include<a>"));
@@ -7699,6 +7841,12 @@
             format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, BreaksStringLiteralsAtColumnLimit) {
+  EXPECT_EQ("C a = \"some more \"\n"
+            "      \"text\";",
+            format("C a = \"some more text\";", getLLVMStyleWithColumns(18)));
+}
+
 TEST_F(FormatTest, FullyRemoveEmptyLines) {
   FormatStyle NoEmptyLines = getLLVMStyleWithColumns(80);
   NoEmptyLines.MaxEmptyLinesToKeep = 0;
@@ -7974,9 +8122,9 @@
             "    \"f\");",
             format("someFunction1234567890(\"aaabbbcccdddeeefff\");",
                    getLLVMStyleWithColumns(24)));
-  EXPECT_EQ("someFunction(\"aaabbbcc \"\n"
-            "             \"ddde \"\n"
-            "             \"efff\");",
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaabbbcc ddde \"\n"
+            "    \"efff\");",
             format("someFunction(\"aaabbbcc ddde efff\");",
                    getLLVMStyleWithColumns(25)));
   EXPECT_EQ("someFunction(\"aaabbbccc \"\n"
@@ -7995,10 +8143,9 @@
             "  int i;",
             format("#define A string s = \"1234567890\"; int i;",
                    getLLVMStyleWithColumns(20)));
-  // FIXME: Put additional penalties on breaking at non-whitespace locations.
-  EXPECT_EQ("someFunction(\"aaabbbcc \"\n"
-            "             \"dddeeeff\"\n"
-            "             \"f\");",
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaabbbcc \"\n"
+            "    \"dddeeefff\");",
             format("someFunction(\"aaabbbcc dddeeefff\");",
                    getLLVMStyleWithColumns(25)));
 }
@@ -8794,6 +8941,114 @@
   verifyFormat("a or_eq 8;", Spaces);
 }
 
+TEST_F(FormatTest, ConfigurableSpaceBeforeColon) {
+  verifyFormat("class Foo : public Bar {};");
+  verifyFormat("Foo::Foo() : foo(1) {}");
+  verifyFormat("for (auto a : b) {\n}");
+  verifyFormat("int x = a ? b : c;");
+  verifyFormat("{\n"
+               "label0:\n"
+               "  int x = 0;\n"
+               "}");
+  verifyFormat("switch (x) {\n"
+               "case 1:\n"
+               "default:\n"
+               "}");
+
+  FormatStyle CtorInitializerStyle = getLLVMStyleWithColumns(30);
+  CtorInitializerStyle.SpaceBeforeCtorInitializerColon = false;
+  verifyFormat("class Foo : public Bar {};", CtorInitializerStyle);
+  verifyFormat("Foo::Foo(): foo(1) {}", CtorInitializerStyle);
+  verifyFormat("for (auto a : b) {\n}", CtorInitializerStyle);
+  verifyFormat("int x = a ? b : c;", CtorInitializerStyle);
+  verifyFormat("{\n"
+               "label1:\n"
+               "  int x = 0;\n"
+               "}",
+               CtorInitializerStyle);
+  verifyFormat("switch (x) {\n"
+               "case 1:\n"
+               "default:\n"
+               "}",
+               CtorInitializerStyle);
+  CtorInitializerStyle.BreakConstructorInitializers =
+      FormatStyle::BCIS_AfterColon;
+  verifyFormat("Fooooooooooo::Fooooooooooo():\n"
+               "    aaaaaaaaaaaaaaaa(1),\n"
+               "    bbbbbbbbbbbbbbbb(2) {}",
+               CtorInitializerStyle);
+  CtorInitializerStyle.BreakConstructorInitializers =
+      FormatStyle::BCIS_BeforeComma;
+  verifyFormat("Fooooooooooo::Fooooooooooo()\n"
+               "    : aaaaaaaaaaaaaaaa(1)\n"
+               "    , bbbbbbbbbbbbbbbb(2) {}",
+               CtorInitializerStyle);
+  CtorInitializerStyle.BreakConstructorInitializers =
+      FormatStyle::BCIS_BeforeColon;
+  verifyFormat("Fooooooooooo::Fooooooooooo()\n"
+               "    : aaaaaaaaaaaaaaaa(1),\n"
+               "      bbbbbbbbbbbbbbbb(2) {}",
+               CtorInitializerStyle);
+  CtorInitializerStyle.ConstructorInitializerIndentWidth = 0;
+  verifyFormat("Fooooooooooo::Fooooooooooo()\n"
+               ": aaaaaaaaaaaaaaaa(1),\n"
+               "  bbbbbbbbbbbbbbbb(2) {}",
+               CtorInitializerStyle);
+
+  FormatStyle InheritanceStyle = getLLVMStyle();
+  InheritanceStyle.SpaceBeforeInheritanceColon = false;
+  verifyFormat("class Foo: public Bar {};", InheritanceStyle);
+  verifyFormat("Foo::Foo() : foo(1) {}", InheritanceStyle);
+  verifyFormat("for (auto a : b) {\n}", InheritanceStyle);
+  verifyFormat("int x = a ? b : c;", InheritanceStyle);
+  verifyFormat("{\n"
+               "label2:\n"
+               "  int x = 0;\n"
+               "}",
+               InheritanceStyle);
+  verifyFormat("switch (x) {\n"
+               "case 1:\n"
+               "default:\n"
+               "}",
+               InheritanceStyle);
+
+  FormatStyle ForLoopStyle = getLLVMStyle();
+  ForLoopStyle.SpaceBeforeRangeBasedForLoopColon = false;
+  verifyFormat("class Foo : public Bar {};", ForLoopStyle);
+  verifyFormat("Foo::Foo() : foo(1) {}", ForLoopStyle);
+  verifyFormat("for (auto a: b) {\n}", ForLoopStyle);
+  verifyFormat("int x = a ? b : c;", ForLoopStyle);
+  verifyFormat("{\n"
+               "label2:\n"
+               "  int x = 0;\n"
+               "}",
+               ForLoopStyle);
+  verifyFormat("switch (x) {\n"
+               "case 1:\n"
+               "default:\n"
+               "}",
+               ForLoopStyle);
+
+  FormatStyle NoSpaceStyle = getLLVMStyle();
+  NoSpaceStyle.SpaceBeforeCtorInitializerColon = false;
+  NoSpaceStyle.SpaceBeforeInheritanceColon = false;
+  NoSpaceStyle.SpaceBeforeRangeBasedForLoopColon = false;
+  verifyFormat("class Foo: public Bar {};", NoSpaceStyle);
+  verifyFormat("Foo::Foo(): foo(1) {}", NoSpaceStyle);
+  verifyFormat("for (auto a: b) {\n}", NoSpaceStyle);
+  verifyFormat("int x = a ? b : c;", NoSpaceStyle);
+  verifyFormat("{\n"
+               "label3:\n"
+               "  int x = 0;\n"
+               "}",
+               NoSpaceStyle);
+  verifyFormat("switch (x) {\n"
+               "case 1:\n"
+               "default:\n"
+               "}",
+               NoSpaceStyle);
+}
+
 TEST_F(FormatTest, AlignConsecutiveAssignments) {
   FormatStyle Alignment = getLLVMStyle();
   Alignment.AlignConsecutiveAssignments = false;
@@ -9867,6 +10122,113 @@
   EXPECT_EQ("#pragma option -C -A", format("#pragma    option   -C   -A"));
 }
 
+TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 20;
+
+  verifyFormat("int a; // the\n"
+               "       // comment", Style);
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int a; /* first line\n"
+                   "        * second\n"
+                   "        * line third\n"
+                   "        * line\n"
+                   "        */",
+                   Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second\n"
+            "       // line third\n"
+            "       // line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+
+  Style.PenaltyExcessCharacter = 90;
+  verifyFormat("int a; // the comment", Style);
+  EXPECT_EQ("int a; // the comment\n"
+            "       // aaa",
+            format("int a; // the comment aaa", Style));
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second line\n"
+            "        * third line\n"
+            "        */",
+            format("int a; /* first line\n"
+                   "        * second line\n"
+                   "        * third line\n"
+                   "        */",
+                   Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second line\n"
+            "       // third line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
+  // FIXME: Investigate why this is not getting the same layout as the test
+  // above.
+  EXPECT_EQ("int a; /* first line\n"
+            "        * second line\n"
+            "        * third line\n"
+            "        */",
+            format("int a; /* first line second line third line"
+                   "\n*/",
+                   Style));
+
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar foo bar\n",
+            format("// foo bar baz bazfoo\n"
+                   "// foo bar foo           bar\n",
+                   Style));
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar foo bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo            bar foo bar\n",
+                   Style));
+
+  // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
+  // next one.
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
+            format("// foo bar baz      bazfoo bar\n"
+                   "// foo            bar\n",
+                   Style));
+
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo bar baz      bazfoo bar\n"
+                   "// foo bar\n",
+                   Style));
+
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
+            format("// foo bar baz      bazfoo\n"
+                   "// foo bar baz      bazfoo bar\n"
+                   "// foo           bar\n",
+                   Style));
+
+  // Make sure we do not keep protruding characters if strict mode reflow is
+  // cheaper than keeping protruding characters.
+  Style.ColumnLimit = 21;
+  EXPECT_EQ("// foo foo foo foo\n"
+            "// foo foo foo foo\n"
+            "// foo foo foo foo\n",
+            format("// foo foo foo foo foo foo foo foo foo foo foo foo\n",
+                           Style));
+
+  EXPECT_EQ("int a = /* long block\n"
+            "           comment */\n"
+            "    42;",
+            format("int a = /* long block comment */ 42;", Style));
+}
+
 #define EXPECT_ALL_STYLES_EQUAL(Styles)                                        \
   for (size_t i = 1; i < Styles.size(); ++i)                                   \
   EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \
@@ -10028,6 +10390,9 @@
   CHECK_PARSE_BOOL(SpaceAfterCStyleCast);
   CHECK_PARSE_BOOL(SpaceAfterTemplateKeyword);
   CHECK_PARSE_BOOL(SpaceBeforeAssignmentOperators);
+  CHECK_PARSE_BOOL(SpaceBeforeCtorInitializerColon);
+  CHECK_PARSE_BOOL(SpaceBeforeInheritanceColon);
+  CHECK_PARSE_BOOL(SpaceBeforeRangeBasedForLoopColon);
 
   CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterClass);
   CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterControlStatement);
@@ -10254,6 +10619,42 @@
               "    Priority: 1",
               IncludeCategories, ExpectedCategories);
   CHECK_PARSE("IncludeIsMainRegex: 'abc$'", IncludeIsMainRegex, "abc$");
+
+  Style.RawStringFormats.clear();
+  std::vector<FormatStyle::RawStringFormat> ExpectedRawStringFormats = {
+      {
+          FormatStyle::LK_TextProto,
+          {"pb", "proto"},
+          {"PARSE_TEXT_PROTO"},
+          /*CanonicalDelimiter=*/"",
+          "llvm",
+      },
+      {
+          FormatStyle::LK_Cpp,
+          {"cc", "cpp"},
+          {"C_CODEBLOCK", "CPPEVAL"},
+          /*CanonicalDelimiter=*/"cc",
+          /*BasedOnStyle=*/"",
+      },
+  };
+
+  CHECK_PARSE("RawStringFormats:\n"
+              "  - Language: TextProto\n"
+              "    Delimiters:\n"
+              "      - 'pb'\n"
+              "      - 'proto'\n"
+              "    EnclosingFunctions:\n"
+              "      - 'PARSE_TEXT_PROTO'\n"
+              "    BasedOnStyle: llvm\n"
+              "  - Language: Cpp\n"
+              "    Delimiters:\n"
+              "      - 'cc'\n"
+              "      - 'cpp'\n"
+              "    EnclosingFunctions:\n"
+              "      - 'C_CODEBLOCK'\n"
+              "      - 'CPPEVAL'\n"
+              "    CanonicalDelimiter: 'cc'",
+              RawStringFormats, ExpectedRawStringFormats);
 }
 
 TEST_F(FormatTest, ParsesConfigurationWithLanguages) {
@@ -10447,10 +10848,12 @@
       "\"七 八 九 \"\n"
       "\"十\"",
       format("\"一 二 三 四 五六 七 八 九 十\"", getLLVMStyleWithColumns(11)));
-  EXPECT_EQ("\"一\t二 \"\n"
-            "\"\t三 \"\n"
-            "\"四 五\t六 \"\n"
-            "\"\t七 \"\n"
+  EXPECT_EQ("\"一\t\"\n"
+            "\"二 \t\"\n"
+            "\"三 四 \"\n"
+            "\"五\t\"\n"
+            "\"六 \t\"\n"
+            "\"七 \"\n"
             "\"八九十\tqq\"",
             format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
                    getLLVMStyleWithColumns(11)));
@@ -10946,6 +11349,17 @@
       "    });");
 }
 
+TEST_F(FormatTest, EmptyLinesInLambdas) {
+  verifyFormat("auto lambda = []() {\n"
+               "  x(); //\n"
+               "};",
+               "auto lambda = []() {\n"
+               "\n"
+               "  x(); //\n"
+               "\n"
+               "};");
+}
+
 TEST_F(FormatTest, FormatsBlocks) {
   FormatStyle ShortBlocks = getLLVMStyle();
   ShortBlocks.AllowShortBlocksOnASingleLine = true;
@@ -11428,6 +11842,13 @@
     verifyFormat("__super::FooBar();");
 }
 
+TEST(FormatStyle, GetStyleWithEmptyFileName) {
+  vfs::InMemoryFileSystem FS;
+  auto Style1 = getStyle("file", "", "Google", "", &FS);
+  ASSERT_TRUE((bool)Style1);
+  ASSERT_EQ(*Style1, getGoogleStyle());
+}
+
 TEST(FormatStyle, GetStyleOfFile) {
   vfs::InMemoryFileSystem FS;
   // Test 1: format file in the same directory.
@@ -11657,6 +12078,41 @@
   verifyFormat("auto const &[ a, b ] = f();", Spaces);
 }
 
+TEST_F(FormatTest, FileAndCode) {
+  EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", ""));
+  EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", ""));
+  EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", ""));
+  EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", ""));
+  EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.h", "@interface Foo\n@end\n"));
+  EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo", ""));
+  EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo", "@interface Foo\n@end\n"));
+}
+
+TEST_F(FormatTest, GuessLanguageWithForIn) {
+  EXPECT_EQ(FormatStyle::LK_Cpp,
+            guessLanguage("foo.h", "for (Foo *x = 0; x != in; x++) {}"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "for (Foo *x in bar) {}"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "for (Foo *x in [bar baz]) {}"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "for (Foo *x in [bar baz:blech]) {}"));
+  EXPECT_EQ(
+      FormatStyle::LK_ObjC,
+      guessLanguage("foo.h", "for (Foo *x in [bar baz:blech, 1, 2, 3, 0]) {}"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "for (Foo *x in [bar baz:^{[uh oh];}]) {}"));
+  EXPECT_EQ(FormatStyle::LK_Cpp,
+            guessLanguage("foo.h", "Foo *x; for (x = 0; x != in; x++) {}"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "Foo *x; for (x in y) {}"));
+  EXPECT_EQ(
+      FormatStyle::LK_Cpp,
+      guessLanguage(
+          "foo.h",
+          "for (const Foo<Bar>& baz = in.value(); !baz.at_end(); ++baz) {}"));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
diff --git a/unittests/Format/FormatTestComments.cpp b/unittests/Format/FormatTestComments.cpp
index ceccc83..ed11fbd 100644
--- a/unittests/Format/FormatTestComments.cpp
+++ b/unittests/Format/FormatTestComments.cpp
@@ -62,6 +62,12 @@
     return Style;
   }
 
+  FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) {
+    FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto);
+    Style.ColumnLimit = ColumnLimit;
+    return Style;
+  }
+
   void verifyFormat(llvm::StringRef Code,
                     const FormatStyle &Style = getLLVMStyle()) {
     EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
@@ -680,8 +686,7 @@
   EXPECT_EQ("{\n"
             "  //\n"
             "  //\\\n"
-            "  // long 1 2 3 4\n"
-            "  // 5\n"
+            "  // long 1 2 3 4 5\n"
             "}",
             format("{\n"
                    "  //\n"
@@ -689,6 +694,18 @@
                    "  // long 1 2 3 4 5\n"
                    "}",
                    getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  //\n"
+            "  //\\\n"
+            "  // long 1 2 3 4 5\n"
+            "  // 6\n"
+            "}",
+            format("{\n"
+                   "  //\n"
+                   "  //\\\n"
+                   "  // long 1 2 3 4 5 6\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
 }
 
 TEST_F(FormatTestComments, PreservesHangingIndentInCxxComments) {
@@ -1079,11 +1096,12 @@
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
             " * comment that\n"
-            " * doesn't\n"
-            " * fit on one line.\n"
-            " */",
+            " * doesn't fit on\n"
+            " * one line.  */",
             format("/* "
                    "This is a long                                         "
                    "comment that "
@@ -2061,6 +2079,22 @@
                    "       // longsec\n",
                    getLLVMStyleWithColumns(20)));
 
+  // Simple case that correctly handles reflow in parameter lists.
+  EXPECT_EQ("a = f(/* looooooooong\n"
+            "       * long long\n"
+            "       */\n"
+            "      a);",
+            format("a = f(/* looooooooong long\n* long\n*/ a);",
+                   getLLVMStyleWithColumns(22)));
+  // Tricky case that has fewer lines if we reflow the comment, ending up with
+  // fewer lines.
+  EXPECT_EQ("a = f(/* loooooong\n"
+            "       * long long\n"
+            "       */\n"
+            "      a);",
+            format("a = f(/* loooooong long\n* long\n*/ a);",
+                   getLLVMStyleWithColumns(22)));
+
   // Keep empty comment lines.
   EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20)));
   EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20)));
@@ -2069,6 +2103,85 @@
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into   foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(21)));
+
+  // Make sure that we correctly account for the space added in the reflow case
+  // when making the reflowing decision.
+  // First, when the next line ends precisely one column over the limit, do not
+  // reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567\n",
+            format("// some text that reflows\n"
+                   "// into1234567\n",
+                   getLLVMStyleWithColumns(21)));
+  // Secondly, when the next line ends later, but the first word in that line
+  // is precisely one column over the limit, do not reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567 f\n",
+            format("// some text that reflows\n"
+                   "// into1234567 f\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re    flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+            " * that re flows\n"
+            " */\n",
+            format("/* some text that\n"
+                   "*      re       flows\n"
+                   "*/\n",
+                   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+            "// that\n"
+            "//     re flows\n",
+            format("// some text that\n"
+                   "//     re       flows\n",
+                   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+            "// does re   flow\n"
+            "// more  here\n",
+            format("// some text that does\n"
+                   "// re   flow  more  here\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
             "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2409,9 +2522,13 @@
 
 TEST_F(FormatTestComments, BreaksAfterMultilineBlockCommentsInParamLists) {
   EXPECT_EQ("a = f(/* long\n"
-            "         long\n"
-            "       */\n"
+            "         long */\n"
             "      a);",
+            format("a = f(/* long long */ a);", getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("a = f(\n"
+            "    /* long\n"
+            "       long */\n"
+            "    a);",
             format("a = f(/* long long */ a);", getLLVMStyleWithColumns(15)));
 
   EXPECT_EQ("a = f(/* long\n"
@@ -2421,7 +2538,7 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */a);",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ("a = f(/* long\n"
             "         long\n"
@@ -2430,7 +2547,7 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */ a);",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ("a = f(/* long\n"
             "         long\n"
@@ -2439,23 +2556,36 @@
             format("a = f(/* long\n"
                    "         long\n"
                    "       */ (1 + 1));",
-                   getLLVMStyleWithColumns(15)));
+                   getLLVMStyleWithColumns(16)));
 
   EXPECT_EQ(
       "a = f(a,\n"
       "      /* long\n"
-      "         long\n"
-      "       */\n"
+      "         long */\n"
       "      b);",
+      format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
+  EXPECT_EQ(
+      "a = f(\n"
+      "    a,\n"
+      "    /* long\n"
+      "       long */\n"
+      "    b);",
       format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(15)));
 
-  EXPECT_EQ(
-      "a = f(a,\n"
-      "      /* long\n"
-      "         long\n"
-      "       */\n"
-      "      (1 + 1));",
-      format("a = f(a, /* long long */ (1 + 1));", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("a = f(a,\n"
+            "      /* long\n"
+            "         long */\n"
+            "      (1 + 1));",
+            format("a = f(a, /* long long */ (1 + 1));",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("a = f(\n"
+            "    a,\n"
+            "    /* long\n"
+            "       long */\n"
+            "    (1 + 1));",
+            format("a = f(a, /* long long */ (1 + 1));",
+                   getLLVMStyleWithColumns(15)));
 }
 
 TEST_F(FormatTestComments, IndentLineCommentsInStartOfBlockAtEndOfFile) {
@@ -2839,7 +2969,7 @@
                    getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
@@ -2847,8 +2977,7 @@
   EXPECT_EQ(
 "/*                                                                */ /*\n"
 "                                                                      *       a\n"
-"                                                                      * b c\n"
-"                                                                      * d*/",
+"                                                                      * b c d*/",
       format(
 "/*                                                                */ /*\n"
 " *       a b\n"
@@ -2872,6 +3001,94 @@
                    "    A = B;",
                    getLLVMStyleWithColumns(40)));
 }
+
+TEST_F(FormatTestComments, PythonStyleComments) {
+  // Keeps a space after '#'.
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("#comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks long comment.
+  EXPECT_EQ("# comment comment\n"
+            "# comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Indents comments.
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {\n"
+                   "# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  // Reflows long comments.
+  EXPECT_EQ("# comment comment\n"
+            "# comment comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks trailing comments.
+  EXPECT_EQ("k: val  # comment\n"
+            "        # comment\n"
+            "a: 1",
+            format("k:val#comment comment\n"
+                   "a:1",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("id {\n"
+            "  k: val  # comment\n"
+            "          # comment\n"
+            "  # line line\n"
+            "  a: 1\n"
+            "}",
+            format("id {k:val#comment comment\n"
+                   "# line line\n"
+                   "a:1}",
+                   getTextProtoStyleWithColumns(20)));
+  // Aligns trailing comments.
+  EXPECT_EQ("k: val  # commen1\n"
+            "        # commen2\n"
+            "        # commen3\n"
+            "# commen4\n"
+            "a: 1  # commen5\n"
+            "      # commen6\n"
+            "      # commen7",
+            format("k:val#commen1 commen2\n"
+                   " # commen3\n"
+                   "# commen4\n"
+                   "a:1#commen5 commen6\n"
+                   " #commen7",
+                   getTextProtoStyleWithColumns(20)));
+}
+
+TEST_F(FormatTestComments, BreaksBeforeTrailingUnbreakableSequence) {
+  // The end of /* trail */ is exactly at 80 columns, but the unbreakable
+  // trailing sequence ); after it exceeds the column limit. Make sure we
+  // correctly break the line in that case.
+  verifyFormat("int a =\n"
+               "    foo(/* trail */);",
+               getLLVMStyleWithColumns(23));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp
index 3df0166..5fb47a7 100644
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -294,6 +294,7 @@
   verifyFormat("x.for = 1;");
   verifyFormat("x.of();");
   verifyFormat("of(null);");
+  verifyFormat("return of(null);");
   verifyFormat("import {of} from 'x';");
   verifyFormat("x.in();");
   verifyFormat("x.let();");
@@ -323,6 +324,11 @@
                "  case: string;\n"
                "  default: string;\n"
                "}\n");
+  verifyFormat("const Axis = {\n"
+               "  for: 'for',\n"
+               "  x: 'x'\n"
+               "};",
+               "const Axis = {for: 'for', x:   'x'};");
 }
 
 TEST_F(FormatTestJS, ReservedWordsMethods) {
@@ -1110,6 +1116,10 @@
                "}");
   verifyFormat("for (let {a, b} of x) {\n"
                "}");
+  verifyFormat("for (let {a, b} of [x]) {\n"
+               "}");
+  verifyFormat("for (let [a, b] of [x]) {\n"
+               "}");
   verifyFormat("for (let {a, b} in x) {\n"
                "}");
 }
@@ -1119,6 +1129,7 @@
   // would change due to automatic semicolon insertion.
   // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1.
   verifyFormat("return aaaaa;", getGoogleJSStyleWithColumns(10));
+  verifyFormat("yield aaaaa;", getGoogleJSStyleWithColumns(10));
   verifyFormat("return /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10));
   verifyFormat("continue aaaaa;", getGoogleJSStyleWithColumns(10));
   verifyFormat("continue /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10));
@@ -1142,6 +1153,14 @@
                "const y = 3\n",
                "const x = (   5 +    9)\n"
                "const y = 3\n");
+  // Ideally the foo() bit should be indented relative to the async function().
+  verifyFormat("async function\n"
+               "foo() {}",
+               getGoogleJSStyleWithColumns(10));
+  verifyFormat("await theReckoning;", getGoogleJSStyleWithColumns(10));
+  verifyFormat("some['a']['b']", getGoogleJSStyleWithColumns(10));
+  verifyFormat("x = (a['a']\n"
+               "      ['b']);", getGoogleJSStyleWithColumns(10));
 }
 
 TEST_F(FormatTestJS, AutomaticSemicolonInsertionHeuristic) {
@@ -1192,6 +1211,8 @@
                                       "String");
   verifyFormat("function f(@Foo bar) {}", "function f(@Foo\n"
                                           "  bar) {}");
+  verifyFormat("function f(@Foo(Param) bar) {}", "function f(@Foo(Param)\n"
+                                                 "  bar) {}");
   verifyFormat("a = true\n"
                "return 1",
                "a = true\n"
@@ -1402,6 +1423,7 @@
   verifyFormat("function x(y: {a?: number;} = {}): number {\n"
                "  return 12;\n"
                "}");
+  verifyFormat("const x: Array<{a: number; b: string;}> = [];");
   verifyFormat("((a: string, b: number): string => a + b);");
   verifyFormat("var x: (y: number) => string;");
   verifyFormat("var x: P<string, (a: number) => string>;");
@@ -1421,6 +1443,8 @@
   verifyFormat(
       "var someValue = (v as aaaaaaaaaaaaaaaaaaaa<T>[])\n"
       "                    .someFunction(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);");
+  verifyFormat("const xIsALongIdent:\n""    YJustBarelyFitsLinex[];",
+      getGoogleJSStyleWithColumns(20));
 }
 
 TEST_F(FormatTestJS, UnionIntersectionTypes) {
@@ -1557,7 +1581,7 @@
                "}");
 }
 
-TEST_F(FormatTestJS, MetadataAnnotations) {
+TEST_F(FormatTestJS, Decorators) {
   verifyFormat("@A\nclass C {\n}");
   verifyFormat("@A({arg: 'value'})\nclass C {\n}");
   verifyFormat("@A\n@B\nclass C {\n}");
@@ -1606,6 +1630,17 @@
       Style);
 }
 
+TEST_F(FormatTestJS, RemoveEmptyLinesInArrowFunctions) {
+  verifyFormat("x = () => {\n"
+               "  foo();\n"
+               "};\n",
+               "x = () => {\n"
+               "\n"
+               "  foo();\n"
+               "\n"
+               "};\n");
+}
+
 TEST_F(FormatTestJS, Modules) {
   verifyFormat("import SomeThing from 'some/module.js';");
   verifyFormat("import {X, Y} from 'some/module.js';");
@@ -1648,9 +1683,15 @@
                "  x: number;\n"
                "  y: string;\n"
                "}");
-  verifyFormat("export class X { y: number; }");
-  verifyFormat("export abstract class X { y: number; }");
-  verifyFormat("export default class X { y: number }");
+  verifyFormat("export class X {\n"
+               "  y: number;\n"
+               "}");
+  verifyFormat("export abstract class X {\n"
+               "  y: number;\n"
+               "}");
+  verifyFormat("export default class X {\n"
+               "  y: number\n"
+               "}");
   verifyFormat("export default function() {\n  return 1;\n}");
   verifyFormat("export var x = 12;");
   verifyFormat("class C {}\n"
@@ -1672,7 +1713,9 @@
                "];");
   verifyFormat("export default [];");
   verifyFormat("export default () => {};");
-  verifyFormat("export interface Foo { foo: number; }\n"
+  verifyFormat("export interface Foo {\n"
+               "  foo: number;\n"
+               "}\n"
                "export class Bar {\n"
                "  blah(): string {\n"
                "    return this.blah;\n"
@@ -1893,6 +1936,7 @@
   verifyFormat("x = x as {a: string};");
   verifyFormat("x = x as (string);");
   verifyFormat("x = x! as (string);");
+  verifyFormat("x = y! in z;");
   verifyFormat("var x = something.someFunction() as\n"
                "    something;",
                getGoogleJSStyleWithColumns(40));
@@ -2089,6 +2133,7 @@
   verifyFormat("let x = foo!.bar();\n");
   verifyFormat("let x = foo ? bar! : baz;\n");
   verifyFormat("let x = !foo;\n");
+  verifyFormat("if (!+a) {\n}");
   verifyFormat("let x = foo[0]!;\n");
   verifyFormat("let x = (foo)!;\n");
   verifyFormat("let x = x(foo!);\n");
diff --git a/unittests/Format/FormatTestJava.cpp b/unittests/Format/FormatTestJava.cpp
index 2f376f7..1d471b7 100644
--- a/unittests/Format/FormatTestJava.cpp
+++ b/unittests/Format/FormatTestJava.cpp
@@ -144,6 +144,7 @@
   verifyFormat("public interface SomeInterface {\n"
                "  void doStuff(int theStuff);\n"
                "  void doMoreStuff(int moreStuff);\n"
+               "  default void doStuffWithDefault() {}\n"
                "}");
   verifyFormat("@interface SomeInterface {\n"
                "  void doStuff(int theStuff);\n"
diff --git a/unittests/Format/FormatTestObjC.cpp b/unittests/Format/FormatTestObjC.cpp
index 1f9fc45..8773088 100644
--- a/unittests/Format/FormatTestObjC.cpp
+++ b/unittests/Format/FormatTestObjC.cpp
@@ -79,10 +79,90 @@
   ASSERT_TRUE((bool)Style);
   EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
 
+  Style = getStyle("LLVM", "a.h", "none", "@interface\n"
+                                          "@end\n"
+                                          "//comment");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("LLVM", "a.h", "none", "@interface\n"
+                                          "@end //comment");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
   // No recognizable ObjC.
   Style = getStyle("LLVM", "a.h", "none", "void f() {}");
   ASSERT_TRUE((bool)Style);
   EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "@interface Foo\n@end\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none",
+                   "const int interface = 1;\nconst int end = 2;\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "@protocol Foo\n@end\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none",
+                   "const int protocol = 1;\nconst int end = 2;\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  Style =
+      getStyle("{}", "a.h", "none", "typedef NS_ENUM(NSInteger, Foo) {};\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "enum Foo {};");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  Style =
+      getStyle("{}", "a.h", "none", "inline void Foo() { Log(@\"Foo\"); }\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style =
+      getStyle("{}", "a.h", "none", "inline void Foo() { Log(\"Foo\"); }\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  Style =
+      getStyle("{}", "a.h", "none", "inline void Foo() { id = @[1, 2, 3]; }\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none",
+                   "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none",
+                   "inline void Foo() { int foo[] = {1, 2, 3}; }\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language);
+
+  // ObjC characteristic types.
+  Style = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "NSObject *Foo();\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
+
+  Style = getStyle("{}", "a.h", "none", "NSSet *Foo();\n");
+  ASSERT_TRUE((bool)Style);
+  EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language);
 }
 
 TEST_F(FormatTestObjC, FormatObjCTryCatch) {
@@ -107,7 +187,8 @@
                "@autoreleasepool {\n"
                "  f();\n"
                "}\n");
-  Style.BreakBeforeBraces = FormatStyle::BS_Allman;
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
+  Style.BraceWrapping.AfterControlStatement = true;
   verifyFormat("@autoreleasepool\n"
                "{\n"
                "  f();\n"
@@ -118,6 +199,36 @@
                "}\n");
 }
 
+TEST_F(FormatTestObjC, FormatObjCGenerics) {
+  Style.ColumnLimit = 40;
+  verifyFormat("int aaaaaaaaaaaaaaaa(\n"
+               "    NSArray<aaaaaaaaaaaaaaaaaa *>\n"
+               "        aaaaaaaaaaaaaaaaa);\n");
+  verifyFormat("int aaaaaaaaaaaaaaaa(\n"
+               "    NSArray<aaaaaaaaaaaaaaaaaaa<\n"
+               "        aaaaaaaaaaaaaaaa *> *>\n"
+               "        aaaaaaaaaaaaaaaaa);\n");
+}
+
+TEST_F(FormatTestObjC, FormatObjCSynchronized) {
+  verifyFormat("@synchronized(self) {\n"
+               "  f();\n"
+               "}\n"
+               "@synchronized(self) {\n"
+               "  f();\n"
+               "}\n");
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
+  Style.BraceWrapping.AfterControlStatement = true;
+  verifyFormat("@synchronized(self)\n"
+               "{\n"
+               "  f();\n"
+               "}\n"
+               "@synchronized(self)\n"
+               "{\n"
+               "  f();\n"
+               "}\n");
+}
+
 TEST_F(FormatTestObjC, FormatObjCInterface) {
   verifyFormat("@interface Foo : NSObject <NSSomeDelegate> {\n"
                "@public\n"
@@ -144,6 +255,12 @@
                "@end");
 
   verifyFormat("@interface Foo : Bar\n"
+               "@property(assign, readwrite) NSInteger bar;\n"
+               "+ (id)init;\n"
+               "@end");
+
+  verifyFormat("FOUNDATION_EXPORT NS_AVAILABLE_IOS(10.0) @interface Foo : Bar\n"
+               "@property(assign, readwrite) NSInteger bar;\n"
                "+ (id)init;\n"
                "@end");
 
@@ -199,8 +316,35 @@
                "+ (id)init;\n"
                "@end");
 
+  Style.ColumnLimit = 40;
+  verifyFormat("@interface ccccccccccccc () <\n"
+               "    ccccccccccccc, ccccccccccccc,\n"
+               "    ccccccccccccc, ccccccccccccc> {\n"
+               "}");
+  Style.ObjCBinPackProtocolList = FormatStyle::BPS_Never;
+  verifyFormat("@interface ddddddddddddd () <\n"
+               "    ddddddddddddd,\n"
+               "    ddddddddddddd,\n"
+               "    ddddddddddddd,\n"
+               "    ddddddddddddd> {\n"
+               "}");
+
+  Style.BinPackParameters = false;
+  Style.ObjCBinPackProtocolList = FormatStyle::BPS_Auto;
+  verifyFormat("@interface eeeeeeeeeeeee () <\n"
+               "    eeeeeeeeeeeee,\n"
+               "    eeeeeeeeeeeee,\n"
+               "    eeeeeeeeeeeee,\n"
+               "    eeeeeeeeeeeee> {\n"
+               "}");
+  Style.ObjCBinPackProtocolList = FormatStyle::BPS_Always;
+  verifyFormat("@interface fffffffffffff () <\n"
+               "    fffffffffffff, fffffffffffff,\n"
+               "    fffffffffffff, fffffffffffff> {\n"
+               "}");
+
   Style = getGoogleStyle(FormatStyle::LK_ObjC);
-  verifyFormat("@interface Foo : NSObject<NSSomeDelegate> {\n"
+  verifyFormat("@interface Foo : NSObject <NSSomeDelegate> {\n"
                " @public\n"
                "  int field1;\n"
                " @protected\n"
@@ -212,19 +356,22 @@
                "}\n"
                "+ (id)init;\n"
                "@end");
-  verifyFormat("@interface Foo : Bar<Baz, Quux>\n"
+  verifyFormat("@interface Foo : Bar <Baz, Quux>\n"
                "+ (id)init;\n"
                "@end");
-  verifyFormat("@interface Foo (HackStuff)<MyProtocol>\n"
+  verifyFormat("@interface Foo (HackStuff) <MyProtocol>\n"
                "+ (id)init;\n"
                "@end");
-  Style.BinPackParameters = false;
-  Style.ColumnLimit = 80;
-  verifyFormat("@interface aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ()<\n"
-               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
-               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
-               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
-               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa> {\n"
+  Style.ColumnLimit = 40;
+  // BinPackParameters should be true by default.
+  verifyFormat("void eeeeeeee(int eeeee, int eeeee,\n"
+               "              int eeeee, int eeeee);\n");
+  // ObjCBinPackProtocolList should be BPS_Never by default.
+  verifyFormat("@interface fffffffffffff () <\n"
+               "    fffffffffffff,\n"
+               "    fffffffffffff,\n"
+               "    fffffffffffff,\n"
+               "    fffffffffffff> {\n"
                "}");
 }
 
@@ -323,6 +470,10 @@
                "@protocol Bar\n"
                "@end");
 
+  verifyFormat("FOUNDATION_EXPORT NS_AVAILABLE_IOS(10.0) @protocol Foo\n"
+               "@property(assign, readwrite) NSInteger bar;\n"
+               "@end");
+
   verifyFormat("@protocol myProtocol\n"
                "- (void)mandatoryWithInt:(int)i;\n"
                "@optional\n"
@@ -343,7 +494,7 @@
                "@end");
 
   Style = getGoogleStyle(FormatStyle::LK_ObjC);
-  verifyFormat("@protocol MyProtocol<NSObject>\n"
+  verifyFormat("@protocol MyProtocol <NSObject>\n"
                "- (NSUInteger)numberOfThings;\n"
                "@end");
 }
@@ -571,8 +722,41 @@
   // Formats pair-parameters.
   verifyFormat("[I drawRectOn:surface ofSize:aa:bbb atOrigin:cc:dd];");
   verifyFormat("[I drawRectOn:surface //\n"
-               "        ofSize:aa:bbb\n"
-               "      atOrigin:cc:dd];");
+               "       ofSize:aa:bbb\n"
+               "     atOrigin:cc:dd];");
+
+  // Inline block as a first argument.
+  verifyFormat("[object justBlock:^{\n"
+               "  a = 42;\n"
+               "}];");
+  verifyFormat("[object\n"
+               "    justBlock:^{\n"
+               "      a = 42;\n"
+               "    }\n"
+               "     notBlock:42\n"
+               "            a:42];");
+  verifyFormat("[object\n"
+               "    firstBlock:^{\n"
+               "      a = 42;\n"
+               "    }\n"
+               "    blockWithLongerName:^{\n"
+               "      a = 42;\n"
+               "    }];");
+  verifyFormat("[object\n"
+               "    blockWithLongerName:^{\n"
+               "      a = 42;\n"
+               "    }\n"
+               "    secondBlock:^{\n"
+               "      a = 42;\n"
+               "    }];");
+  verifyFormat("[object\n"
+               "    firstBlock:^{\n"
+               "      a = 42;\n"
+               "    }\n"
+               "    notBlock:42\n"
+               "    secondBlock:^{\n"
+               "      a = 42;\n"
+               "    }];");
 
   Style.ColumnLimit = 70;
   verifyFormat(
@@ -605,6 +789,26 @@
       "                  backing:NSBackingStoreBuffered\n"
       "                    defer:NO]);\n"
       "}");
+
+  // Respect continuation indent and colon alignment (e.g. when object name is
+  // short, and first selector is the longest one)
+  Style = getLLVMStyle();
+  Style.Language = FormatStyle::LK_ObjC;
+  Style.ContinuationIndentWidth = 8;
+  verifyFormat("[self performSelectorOnMainThread:@selector(loadAccessories)\n"
+               "                       withObject:nil\n"
+               "                    waitUntilDone:false];");
+  verifyFormat("[self performSelector:@selector(loadAccessories)\n"
+               "        withObjectOnMainThread:nil\n"
+               "                 waitUntilDone:false];");
+  verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaa\n"
+               "        performSelectorOnMainThread:@selector(loadAccessories)\n"
+               "                         withObject:nil\n"
+               "                      waitUntilDone:false];");
+  verifyFormat("[self // force wrapping\n"
+               "        performSelectorOnMainThread:@selector(loadAccessories)\n"
+               "                         withObject:nil\n"
+               "                      waitUntilDone:false];");
 }
 
 TEST_F(FormatTestObjC, ObjCAt) {
@@ -689,6 +893,13 @@
                "    foo(n);\n"
                "  }\n"
                "}");
+  verifyFormat("for (Foo *x in bar) {\n}");
+  verifyFormat("for (Foo *x in [bar baz]) {\n}");
+  verifyFormat("for (Foo *x in [bar baz:blech]) {\n}");
+  verifyFormat("for (Foo *x in [bar baz:blech, 1, 2, 3, 0]) {\n}");
+  verifyFormat("for (Foo *x in [bar baz:^{\n"
+               "       [uh oh];\n"
+               "     }]) {\n}");
 }
 
 TEST_F(FormatTestObjC, ObjCLiterals) {
@@ -820,6 +1031,12 @@
   verifyFormat("[someFunction someLooooooooooooongParameter:@[\n"
                "  NSBundle.mainBundle.infoDictionary[@\"a\"]\n"
                "]];");
+  Style.ColumnLimit = 20;
+  // We can't break string literals inside NSArray literals
+  // (that raises -Wobjc-string-concatenation).
+  verifyFormat("NSArray *foo = @[\n"
+               "  @\"aaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
+               "];\n");
 }
 } // end namespace
 } // end namespace format
diff --git a/unittests/Format/FormatTestProto.cpp b/unittests/Format/FormatTestProto.cpp
index df94d81..6db44c7 100644
--- a/unittests/Format/FormatTestProto.cpp
+++ b/unittests/Format/FormatTestProto.cpp
@@ -97,7 +97,7 @@
                "  TYPE_B = 2;\n"
                "};");
   verifyFormat("enum Type {\n"
-               "  UNKNOWN = 0 [(some_options) = {a: aa, b: bb}];\n"
+               "  UNKNOWN = 0 [(some_options) = { a: aa, b: bb }];\n"
                "};");
   verifyFormat("enum Type {\n"
                "  UNKNOWN = 0 [(some_options) = {\n"
@@ -121,7 +121,7 @@
   verifyFormat("optional LongMessageType long_proto_field = 1\n"
                "    [default = REALLY_REALLY_LONG_CONSTANT_VALUE];");
   verifyFormat("repeated double value = 1\n"
-               "    [(aaaaaaa.aaaaaaaaa) = {aaaaaaaaaaaaaaaaa: AAAAAAAA}];");
+               "    [(aaaaaaa.aaaaaaaaa) = { aaaaaaaaaaaaaaaaa: AAAAAAAA }];");
   verifyFormat("repeated double value = 1 [(aaaaaaa.aaaaaaaaa) = {\n"
                "  aaaaaaaaaaaaaaaa: AAAAAAAAAA,\n"
                "  bbbbbbbbbbbbbbbb: BBBBBBBBBB\n"
@@ -168,6 +168,16 @@
                "    aaaaaaaaaaaaaaaa: true\n"
                "  }\n"
                "];");
+  verifyFormat("extensions 20 [(proto2.type) = 'Aaaa.bbbb'];");
+  verifyFormat("extensions 20\n"
+               "    [(proto3.type) = 'Aaaa.bbbb', (aaa.Aaa) = 'aaa.bbb'];");
+  verifyFormat("extensions 123 [\n"
+               "  (aaa) = aaaa,\n"
+               "  (bbbbbbbbbbbbbbbbbbbbbbbbbb) = {\n"
+               "    aaaaaaaaaaaaaaaaa: true,\n"
+               "    aaaaaaaaaaaaaaaa: true\n"
+               "  }\n"
+               "];");
 }
 
 TEST_F(FormatTestProto, DoesntWrapFileOptions) {
@@ -183,27 +193,27 @@
                "  field_a: OK\n"
                "  field_b: \"OK\"\n"
                "  field_c: \"OK\"\n"
-               "  msg_field: {field_d: 123}\n"
+               "  msg_field: { field_d: 123 }\n"
                "};");
   verifyFormat("option (MyProto.options) = {\n"
                "  field_a: OK\n"
                "  field_b: \"OK\"\n"
                "  field_c: \"OK\"\n"
-               "  msg_field: {field_d: 123 field_e: OK}\n"
+               "  msg_field: { field_d: 123 field_e: OK }\n"
                "};");
   verifyFormat("option (MyProto.options) = {\n"
                "  field_a: OK  // Comment\n"
                "  field_b: \"OK\"\n"
                "  field_c: \"OK\"\n"
-               "  msg_field: {field_d: 123}\n"
+               "  msg_field: { field_d: 123 }\n"
                "};");
   verifyFormat("option (MyProto.options) = {\n"
                "  field_c: \"OK\"\n"
-               "  msg_field {field_d: 123}\n"
+               "  msg_field { field_d: 123 }\n"
                "};");
   verifyFormat("option (MyProto.options) = {\n"
                "  field_a: OK\n"
-               "  field_b {field_c: OK}\n"
+               "  field_b { field_c: OK }\n"
                "  field_d: OKOKOK\n"
                "  field_e: OK\n"
                "}");
@@ -211,12 +221,14 @@
   // Support syntax with <> instead of {}.
   verifyFormat("option (MyProto.options) = {\n"
                "  field_c: \"OK\",\n"
-               "  msg_field: <field_d: 123>\n"
+               "  msg_field: < field_d: 123 >\n"
+               "  empty: <>\n"
+               "  empty <>\n"
                "};");
 
   verifyFormat("option (MyProto.options) = {\n"
                "  field_a: OK\n"
-               "  field_b <field_c: OK>\n"
+               "  field_b < field_c: OK >\n"
                "  field_d: OKOKOK\n"
                "  field_e: OK\n"
                "}");
@@ -224,9 +236,9 @@
   verifyFormat("option (MyProto.options) = {\n"
                "  msg_field: <>\n"
                "  field_c: \"OK\",\n"
-               "  msg_field: <field_d: 123>\n"
+               "  msg_field: < field_d: 123 >\n"
                "  field_e: OK\n"
-               "  msg_field: <field_d: 12>\n"
+               "  msg_field: < field_d: 12 >\n"
                "};");
 
   verifyFormat("option (MyProto.options) = <\n"
@@ -247,7 +259,7 @@
 
   verifyFormat("option (MyProto.options) = <\n"
                "  field_a: \"OK\"\n"
-               "  msg_field: {field_b: OK}\n"
+               "  msg_field: { field_b: OK }\n"
                "  field_g: OK\n"
                "  field_g: OK\n"
                "  field_g: OK\n"
@@ -350,7 +362,7 @@
                "      field_D: 4\n"
                "      field_E: 5\n"
                "    >\n"
-               "    msg_field <field_A: 1 field_B: 2 field_C: 3 field_D: 4>\n"
+               "    msg_field < field_A: 1 field_B: 2 field_C: 3 f_D: 4 >\n"
                "    field_e: OK\n"
                "    field_f: OK\n"
                "  }\n"
@@ -358,21 +370,21 @@
                ">;");
 
   verifyFormat("option (MyProto.options) = <\n"
-               "  data1 <key1: value1>\n"
-               "  data2 {key2: value2}\n"
+               "  data1 < key1: value1 >\n"
+               "  data2 { key2: value2 }\n"
                ">;");
 
   verifyFormat("option (MyProto.options) = <\n"
                "  app_id: 'com.javax.swing.salsa.latino'\n"
                "  head_id: 1\n"
-               "  data <key: value>\n"
+               "  data < key: value >\n"
                ">;");
 
   verifyFormat("option (MyProto.options) = {\n"
                "  app_id: 'com.javax.swing.salsa.latino'\n"
                "  head_id: 1\n"
                "  headheadheadheadheadhead_id: 1\n"
-               "  product_data {product {1}}\n"
+               "  product_data { product { 1 } }\n"
                "};");
 }
 
@@ -410,5 +422,73 @@
                "}");
 }
 
+TEST_F(FormatTestProto, KeepsLongStringLiteralsOnSameLine) {
+  verifyFormat(
+      "option (MyProto.options) = {\n"
+      "  foo: {\n"
+      "    text: \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaasaaaaaaaa\"\n"
+      "  }\n"
+      "}");
+}
+
+TEST_F(FormatTestProto, FormatsOptionsExtensions) {
+  verifyFormat("option (MyProto.options) = {\n"
+               "  msg_field: { field_d: 123 }\n"
+               "  [ext.t/u] { key: value }\n"
+               "  key: value\n"
+               "  [t.u/v] <\n"
+               "    [ext] { key: value }\n"
+               "  >\n"
+               "};");
+}
+
+TEST_F(FormatTestProto, NoSpaceAfterPercent) {
+  verifyFormat("option (MyProto.options) = {\n"
+               "  key: %lld\n"
+               "};");
+}
+
+TEST_F(FormatTestProto, FormatsRepeatedListInitializersInOptions) {
+  verifyFormat("option (MyProto.options) = {\n"
+               "  key: item\n"
+               "  keys: [\n"
+               "    'ala',\n"
+               "    'bala',\n"
+               "    'porto',\n"
+               "    'kala',\n"
+               "    'too',\n"
+               "    'long',\n"
+               "    'long',\n"
+               "    'long'\n"
+               "  ]\n"
+               "  key: [ item ]\n"
+               "  msg {\n"
+               "    key: item\n"
+               "    keys: [\n"
+               "      'ala',\n"
+               "      'bala',\n"
+               "      'porto',\n"
+               "      'kala',\n"
+               "      'too',\n"
+               "      'long',\n"
+               "      'long'\n"
+               "    ]\n"
+               "  }\n"
+               "  key: value\n"
+               "};");
+}
+
+TEST_F(FormatTestProto, AcceptsOperatorAsKeyInOptions) {
+  verifyFormat("option (MyProto.options) = {\n"
+               "  bbbbbbbbb: <\n"
+               "    ccccccccccccccccccccccc: <\n"
+               "      operator: 1\n"
+               "      operator: 2\n"
+               "      operator { key: value }\n"
+               "    >\n"
+               "  >\n"
+               "};");
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/unittests/Format/FormatTestRawStrings.cpp b/unittests/Format/FormatTestRawStrings.cpp
new file mode 100644
index 0000000..21bbc3f
--- /dev/null
+++ b/unittests/Format/FormatTestRawStrings.cpp
@@ -0,0 +1,786 @@
+//===- unittest/Format/FormatTestRawStrings.cpp - Formatting unit tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "../Tooling/ReplacementTest.h"
+#include "FormatTestUtils.h"
+
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+using clang::tooling::ReplacementTest;
+using clang::tooling::toReplacements;
+
+namespace clang {
+namespace format {
+namespace {
+
+class FormatTestRawStrings : public ::testing::Test {
+protected:
+  enum StatusCheck { SC_ExpectComplete, SC_ExpectIncomplete, SC_DoNotCheck };
+
+  std::string format(llvm::StringRef Code,
+                     const FormatStyle &Style = getLLVMStyle(),
+                     StatusCheck CheckComplete = SC_ExpectComplete) {
+    DEBUG(llvm::errs() << "---\n");
+    DEBUG(llvm::errs() << Code << "\n\n");
+    std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+    FormattingAttemptStatus Status;
+    tooling::Replacements Replaces =
+        reformat(Style, Code, Ranges, "<stdin>", &Status);
+    if (CheckComplete != SC_DoNotCheck) {
+      bool ExpectedCompleteFormat = CheckComplete == SC_ExpectComplete;
+      EXPECT_EQ(ExpectedCompleteFormat, Status.FormatComplete)
+          << Code << "\n\n";
+    }
+    ReplacementCount = Replaces.size();
+    auto Result = applyAllReplacements(Code, Replaces);
+    EXPECT_TRUE(static_cast<bool>(Result));
+    DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
+    return *Result;
+  }
+
+  FormatStyle getStyleWithColumns(FormatStyle Style, unsigned ColumnLimit) {
+    Style.ColumnLimit = ColumnLimit;
+    return Style;
+  }
+
+  FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) {
+    return getStyleWithColumns(getLLVMStyle(), ColumnLimit);
+  }
+
+  int ReplacementCount;
+
+  FormatStyle getRawStringPbStyleWithColumns(unsigned ColumnLimit) {
+    FormatStyle Style = getLLVMStyle();
+    Style.ColumnLimit = ColumnLimit;
+    Style.RawStringFormats = {
+        {
+            /*Language=*/FormatStyle::LK_TextProto,
+            /*Delimiters=*/{"pb"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            /*BasedOnStyle=*/"google",
+        },
+    };
+    return Style;
+  }
+
+  FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
+    FormatStyle Style = getLLVMStyle();
+    Style.RawStringFormats = {
+        {
+            /*Language=*/FormatStyle::LK_Cpp,
+            /*Delimiters=*/{"cpp"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            BasedOnStyle,
+        },
+    };
+    return Style;
+  }
+
+  FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
+    FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
+    Style.RawStringFormats = {
+        {
+            /*Language=*/FormatStyle::LK_Cpp,
+            /*Delimiters=*/{"cpp"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            BasedOnStyle,
+        },
+    };
+    return Style;
+  }
+
+  // Gcc 4.8 doesn't support raw string literals in macros, which breaks some
+  // build bots. We use this function instead.
+  void expect_eq(const std::string Expected, const std::string Actual) {
+    EXPECT_EQ(Expected, Actual);
+  }
+};
+
+TEST_F(FormatTestRawStrings, ReformatsAccordingToBaseStyle) {
+  // llvm style puts '*' on the right.
+  // google style puts '*' on the left.
+
+  // Use the llvm style if the raw string style has no BasedOnStyle.
+  expect_eq(R"test(int *i = R"cpp(int *p = nullptr;)cpp")test",
+            format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test",
+                   getRawStringLLVMCppStyleBasedOn("")));
+
+  // Use the google style if the raw string style has BasedOnStyle=google.
+  expect_eq(R"test(int *i = R"cpp(int* p = nullptr;)cpp")test",
+            format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test",
+                   getRawStringLLVMCppStyleBasedOn("google")));
+
+  // Use the llvm style if the raw string style has no BasedOnStyle=llvm.
+  expect_eq(R"test(int* i = R"cpp(int *p = nullptr;)cpp")test",
+            format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test",
+                   getRawStringGoogleCppStyleBasedOn("llvm")));
+}
+
+TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) {
+  // llvm style puts '*' on the right.
+  // google style puts '*' on the left.
+
+  // Uses the configured google style inside raw strings even if BasedOnStyle in
+  // the raw string format is llvm.
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
+  EXPECT_EQ(0, parseConfiguration("---\n"
+                                  "Language: Cpp\n"
+                                  "BasedOnStyle: Google", &Style).value());
+  Style.RawStringFormats = {{
+      FormatStyle::LK_Cpp,
+      {"cpp"},
+      {},
+      /*CanonicalDelimiter=*/"",
+      /*BasedOnStyle=*/"llvm",
+  }};
+  expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test",
+            format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style));
+}
+
+TEST_F(FormatTestRawStrings, MatchesDelimitersCaseSensitively) {
+  // Don't touch the 'PB' raw string, format the 'pb' raw string.
+  expect_eq(R"test(
+s = R"PB(item:1)PB";
+t = R"pb(item: 1)pb";)test",
+            format(R"test(
+s = R"PB(item:1)PB";
+t = R"pb(item:1)pb";)test",
+                   getRawStringPbStyleWithColumns(40)));
+}
+
+TEST_F(FormatTestRawStrings, ReformatsShortRawStringsOnSingleLine) {
+  expect_eq(
+      R"test(P p = TP(R"pb()pb");)test",
+      format(
+          R"test(P p = TP(R"pb( )pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+  expect_eq(
+      R"test(P p = TP(R"pb(item_1: 1)pb");)test",
+      format(
+          R"test(P p = TP(R"pb(item_1:1)pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+  expect_eq(
+      R"test(P p = TP(R"pb(item_1: 1)pb");)test",
+      format(
+          R"test(P p = TP(R"pb(  item_1 :  1   )pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+  expect_eq(
+      R"test(P p = TP(R"pb(item_1: 1 item_2: 2)pb");)test",
+      format(
+          R"test(P p = TP(R"pb(item_1:1 item_2:2)pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+  expect_eq(
+      R"test(P p = TP(R"pb(item_1 < 1 > item_2: { 2 })pb");)test",
+      format(
+          R"test(P p = TP(R"pb(item_1<1> item_2:{2})pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+
+  // Merge two short lines into one.
+  expect_eq(R"test(
+std::string s = R"pb(
+  item_1: 1 item_2: 2
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+  item_1:1
+  item_2:2
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+}
+
+TEST_F(FormatTestRawStrings, BreaksRawStringsExceedingColumnLimit) {
+  expect_eq(R"test(
+P p = TPPPPPPPPPPPPPPP(
+    R"pb(item_1: 1, item_2: 2)pb");)test",
+            format(R"test(
+P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2)pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+P p =
+    TPPPPPPPPPPPPPPP(
+        R"pb(item_1: 1,
+             item_2: 2,
+             item_3: 3)pb");)test",
+            format(R"test(
+P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2, item_3: 3)pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+P p = TP(R"pb(item_1 < 1 >
+              item_2: < 2 >
+              item_3 {})pb");)test",
+      format(R"test(
+P p = TP(R"pb(item_1<1> item_2:<2> item_3{ })pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(
+      R"test(
+P p = TP(R"pb(item_1: 1,
+              item_2: 2,
+              item_3: 3,
+              item_4: 4)pb");)test",
+      format(
+          R"test(
+P p = TP(R"pb(item_1: 1, item_2: 2, item_3: 3, item_4: 4)pb");)test",
+          getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+P p = TPPPPPPPPPPPPPPP(
+    R"pb(item_1 < 1 >,
+         item_2: { 2 },
+         item_3: < 3 >,
+         item_4: { 4 })pb");)test",
+            format(R"test(
+P p = TPPPPPPPPPPPPPPP(R"pb(item_1<1>, item_2: {2}, item_3: <3>, item_4:{4})pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Breaks before a short raw string exceeding the column limit.
+  expect_eq(R"test(
+FFFFFFFFFFFFFFFFFFFFFFFFFFF(
+    R"pb(key: 1)pb");
+P p = TPPPPPPPPPPPPPPPPPPPP(
+    R"pb(key: 2)pb");
+auto TPPPPPPPPPPPPPPPPPPPP =
+    R"pb(key: 3)pb";
+P p = TPPPPPPPPPPPPPPPPPPPP(
+    R"pb(i: 1, j: 2)pb");
+
+int f(string s) {
+  FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF(
+      R"pb(key: 1)pb");
+  P p = TPPPPPPPPPPPPPPPPPPPP(
+      R"pb(key: 2)pb");
+  auto TPPPPPPPPPPPPPPPPPPPP =
+      R"pb(key: 3)pb";
+  if (s.empty())
+    P p = TPPPPPPPPPPPPPPPPPPPP(
+        R"pb(i: 1, j: 2)pb");
+}
+)test",
+            format(R"test(
+FFFFFFFFFFFFFFFFFFFFFFFFFFF(R"pb(key:1)pb");
+P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(key:2)pb");
+auto TPPPPPPPPPPPPPPPPPPPP = R"pb(key:3)pb";
+P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(i: 1, j:2)pb");
+
+int f(string s) {
+  FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF(R"pb(key:1)pb");
+  P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(key:2)pb");
+  auto TPPPPPPPPPPPPPPPPPPPP = R"pb(key:3)pb";
+  if (s.empty())
+    P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(i: 1, j:2)pb");
+}
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+}
+
+TEST_F(FormatTestRawStrings, FormatsRawStringArguments) {
+  expect_eq(R"test(
+P p = TP(R"pb(key { 1 })pb", param_2);)test",
+            format(R"test(
+P p = TP(R"pb(key{1})pb",param_2);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+PPPPPPPPPPPPP(R"pb(keykeyk)pb",
+              param_2);)test",
+            format(R"test(
+PPPPPPPPPPPPP(R"pb(keykeyk)pb", param_2);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+P p = TP(
+    R"pb(item: { i: 1, s: 's' }
+         item: { i: 2, s: 't' })pb");)test",
+            format(R"test(
+P p = TP(R"pb(item: {i: 1, s: 's'} item: {i: 2, s: 't'})pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+  expect_eq(R"test(
+FFFFFFFFFFFFFFFFFFF(
+    R"pb(key: "value")pb",
+    R"pb(key2: "value")pb");)test",
+            format(R"test(
+FFFFFFFFFFFFFFFFFFF(R"pb(key: "value")pb", R"pb(key2: "value")pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats the first out of two arguments.
+  expect_eq(R"test(
+FFFFFFFF(R"pb(key: 1)pb", argument2);
+struct S {
+  const s =
+      f(R"pb(key: 1)pb", argument2);
+  void f() {
+    if (gol)
+      return g(R"pb(key: 1)pb",
+               132789237);
+    return g(R"pb(key: 1)pb", "172893");
+  }
+};)test",
+            format(R"test(
+FFFFFFFF(R"pb(key:1)pb", argument2);
+struct S {
+const s = f(R"pb(key:1)pb", argument2);
+void f() {
+  if (gol)
+    return g(R"pb(key:1)pb", 132789237);
+  return g(R"pb(key:1)pb", "172893");
+}
+};)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats the second out of two arguments.
+  expect_eq(R"test(
+FFFFFFFF(argument1, R"pb(key: 2)pb");
+struct S {
+  const s =
+      f(argument1, R"pb(key: 2)pb");
+  void f() {
+    if (gol)
+      return g(12784137,
+               R"pb(key: 2)pb");
+    return g(17283122, R"pb(key: 2)pb");
+  }
+};)test",
+            format(R"test(
+FFFFFFFF(argument1, R"pb(key:2)pb");
+struct S {
+const s = f(argument1, R"pb(key:2)pb");
+void f() {
+  if (gol)
+    return g(12784137, R"pb(key:2)pb");
+  return g(17283122, R"pb(key:2)pb");
+}
+};)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats two short raw string arguments.
+  expect_eq(R"test(
+FFFFF(R"pb(key: 1)pb", R"pb(key: 2)pb");)test",
+            format(R"test(
+FFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+  // TODO(krasimir): The original source code fits on one line, so the
+  // non-optimizing formatter is chosen. But after the formatting in protos is
+  // made, the code doesn't fit on one line anymore and further formatting
+  // splits it.
+  //
+  // Should we disable raw string formatting for the non-optimizing formatter?
+  expect_eq(R"test(
+FFFFFFF(R"pb(key: 1)pb", R"pb(key: 2)pb");)test",
+            format(R"test(
+FFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats two short raw string arguments, puts second on newline.
+  expect_eq(R"test(
+FFFFFFFF(R"pb(key: 1)pb",
+         R"pb(key: 2)pb");)test",
+            format(R"test(
+FFFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats both arguments.
+  expect_eq(R"test(
+FFFFFFFF(R"pb(key: 1)pb",
+         R"pb(key: 2)pb");
+struct S {
+  const s = f(R"pb(key: 1)pb",
+              R"pb(key: 2)pb");
+  void f() {
+    if (gol)
+      return g(R"pb(key: 1)pb",
+               R"pb(key: 2)pb");
+    return g(R"pb(k1)pb", R"pb(k2)pb");
+  }
+};)test",
+            format(R"test(
+FFFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb");
+struct S {
+const s = f(R"pb(key:1)pb", R"pb(key:2)pb");
+void f() {
+  if (gol)
+    return g(R"pb(key:1)pb", R"pb(key:2)pb");
+  return g(R"pb( k1 )pb", R"pb( k2 )pb");
+}
+};)test",
+                   getRawStringPbStyleWithColumns(40)));
+}
+
+TEST_F(FormatTestRawStrings, RawStringStartingWithNewlines) {
+  expect_eq(R"test(
+std::string s = R"pb(
+  item_1: 1
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+    item_1:1
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+std::string s = R"pb(
+
+  item_1: 1
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+
+    item_1:1
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+std::string s = R"pb(
+  item_1: 1
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+    item_1:1
+
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+std::string s = R"pb(
+  item_1: 1,
+  item_2: 2
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+  item_1:1, item_2:2
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+std::string s = R"pb(
+  book {
+    title: "Alice's Adventures"
+    author: "Lewis Caroll"
+  }
+  book {
+    title: "Peter Pan"
+    author: "J. M. Barrie"
+  }
+)pb";
+)test",
+            format(R"test(
+std::string s = R"pb(
+    book { title: "Alice's Adventures" author: "Lewis Caroll" }
+    book { title: "Peter Pan" author: "J. M. Barrie" }
+)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+}
+
+TEST_F(FormatTestRawStrings, BreaksBeforeRawStrings) {
+  expect_eq(R"test(
+ASSERT_TRUE(
+    ParseFromString(R"pb(item_1: 1)pb"),
+    ptr);)test",
+            format(R"test(
+ASSERT_TRUE(ParseFromString(R"pb(item_1: 1)pb"), ptr);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+ASSERT_TRUE(toolong::ParseFromString(
+                R"pb(item_1: 1)pb"),
+            ptr);)test",
+            format(R"test(
+ASSERT_TRUE(toolong::ParseFromString(R"pb(item_1: 1)pb"), ptr);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+ASSERT_TRUE(ParseFromString(
+                R"pb(item_1: 1,
+                     item_2: 2)pb"),
+            ptr);)test",
+            format(R"test(
+ASSERT_TRUE(ParseFromString(R"pb(item_1: 1, item_2: 2)pb"), ptr);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+ASSERT_TRUE(
+    ParseFromString(
+        R"pb(item_1: 1 item_2: 2)pb"),
+    ptr);)test",
+            format(R"test(
+ASSERT_TRUE(ParseFromString(R"pb(item_1: 1 item_2: 2)pb"), ptr);)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+}
+
+TEST_F(FormatTestRawStrings, RawStringsInOperands) {
+  // Formats the raw string first operand of a binary operator expression.
+  expect_eq(R"test(auto S = R"pb(item_1: 1)pb" + rest;)test",
+            format(R"test(auto S = R"pb(item_1:1)pb" + rest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1, item_2: 2)pb" +
+         rest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1,item_2:2)pb"+rest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S =
+    R"pb(item_1: 1 item_2: 2)pb" + rest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1 item_2:2)pb"+rest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1,
+              item_2: 2,
+              item_3: 3)pb" + rest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+rest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1,
+              item_2: 2,
+              item_3: 3)pb" +
+         longlongrest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+longlongrest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats the raw string second operand of a binary operator expression.
+  expect_eq(R"test(auto S = first + R"pb(item_1: 1)pb";)test",
+            format(R"test(auto S = first + R"pb(item_1:1)pb";)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = first + R"pb(item_1: 1,
+                      item_2: 2)pb";)test",
+            format(R"test(
+auto S = first+R"pb(item_1:1,item_2:2)pb";)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = first + R"pb(item_1: 1
+                      item_2: 2)pb";)test",
+            format(R"test(
+auto S = first+R"pb(item_1:1 item_2:2)pb";)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1,
+              item_2: 2,
+              item_3: 3)pb" + rest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+rest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1,
+              item_2: 2,
+              item_3: 3)pb" +
+         longlongrest;)test",
+            format(R"test(
+auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+longlongrest;)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  // Formats the raw string operands in expressions.
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1)pb" +
+         R"pb(item_2: 2)pb";
+)test",
+            format(R"test(
+auto S=R"pb(item_1:1)pb"+R"pb(item_2:2)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = R"pb(item_1: 1)pb" +
+         R"pb(item_2: 2)pb" +
+         R"pb(item_3: 3)pb";
+)test",
+            format(R"test(
+auto S=R"pb(item_1:1)pb"+R"pb(item_2:2)pb"+R"pb(item_3:3)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S = (count < 3)
+             ? R"pb(item_1: 1)pb"
+             : R"pb(item_2: 2)pb";
+)test",
+            format(R"test(
+auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S =
+    (count < 3)
+        ? R"pb(item_1: 1, item_2: 2)pb"
+        : R"pb(item_3: 3)pb";
+)test",
+            format(R"test(
+auto S=(count<3)?R"pb(item_1:1,item_2:2)pb":R"pb(item_3:3)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+  expect_eq(R"test(
+auto S =
+    (count < 3)
+        ? R"pb(item_1: 1)pb"
+        : R"pb(item_2: 2, item_3: 3)pb";
+)test",
+            format(R"test(
+auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2,item_3:3)pb";
+)test",
+                   getRawStringPbStyleWithColumns(40)));
+
+}
+
+TEST_F(FormatTestRawStrings, PrefixAndSuffixAlignment) {
+  // Keep the suffix at the end of line if not on newline.
+  expect_eq(R"test(
+int s() {
+  auto S = PTP(
+      R"pb(
+        item_1: 1,
+        item_2: 2)pb");
+})test",
+            format(R"test(
+int s() {
+  auto S = PTP(
+      R"pb(
+      item_1: 1,
+      item_2: 2)pb");
+})test",
+                   getRawStringPbStyleWithColumns(20)));
+
+  // Align the suffix with the surrounding FirstIndent if the prefix is not on
+  // a line of its own.
+  expect_eq(R"test(
+int s() {
+  auto S = PTP(
+      R"pb(
+        item_1: 1,
+        item_2: 2
+      )pb");
+})test",
+            format(R"test(
+int s() {
+  auto S = PTP(R"pb(
+      item_1: 1,
+      item_2: 2
+      )pb");
+})test",
+                   getRawStringPbStyleWithColumns(20)));
+
+  // Align the prefix with the suffix if both the prefix and suffix are on a
+  // line of their own.
+  expect_eq(R"test(
+int s() {
+  auto S = PTP(
+      R"pb(
+        item_1: 1,
+        item_2: 2,
+      )pb");
+})test",
+            format(R"test(
+int s() {
+  auto S = PTP(
+      R"pb(
+      item_1: 1,
+      item_2: 2,
+      )pb");
+})test",
+                   getRawStringPbStyleWithColumns(20)));
+}
+
+TEST_F(FormatTestRawStrings, EstimatesPenalty) {
+  // The penalty for characters exceeding the column limit in the raw string
+  // forces 'hh' to be put on a newline.
+  expect_eq(R"test(
+ff(gggggg,
+   hh(R"pb(key {
+             i1: k1
+             i2: k2
+           })pb"));
+)test",
+            format(R"test(
+ff(gggggg, hh(R"pb(key {
+    i1: k1
+    i2: k2
+    })pb"));
+)test",
+                   getRawStringPbStyleWithColumns(20)));
+}
+
+TEST_F(FormatTestRawStrings, DontFormatNonRawStrings) {
+  expect_eq(R"test(a = R"pb(key:value)";)test",
+            format(R"test(a = R"pb(key:value)";)test",
+                   getRawStringPbStyleWithColumns(20)));
+}
+
+TEST_F(FormatTestRawStrings, FormatsRawStringsWithEnclosingFunctionName) {
+  FormatStyle Style = getRawStringPbStyleWithColumns(40);
+  Style.RawStringFormats[0].EnclosingFunctions.push_back(
+      "PARSE_TEXT_PROTO");
+  Style.RawStringFormats[0].EnclosingFunctions.push_back("ParseTextProto");
+  expect_eq(R"test(a = PARSE_TEXT_PROTO(R"(key: value)");)test",
+            format(R"test(a = PARSE_TEXT_PROTO(R"(key:value)");)test", Style));
+
+  expect_eq(R"test(
+a = PARSE_TEXT_PROTO /**/ (
+    /**/ R"(key: value)");)test",
+            format(R"test(
+a = PARSE_TEXT_PROTO/**/(/**/R"(key:value)");)test",
+                   Style));
+
+  expect_eq(R"test(
+a = ParseTextProto<ProtoType>(
+    R"(key: value)");)test",
+            format(R"test(
+a = ParseTextProto<ProtoType>(R"(key:value)");)test",
+                   Style));
+}
+
+TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) {
+  FormatStyle Style = getRawStringPbStyleWithColumns(25);
+  Style.RawStringFormats[0].CanonicalDelimiter = "proto";
+  expect_eq(R"test(a = R"proto(key: value)proto";)test",
+            format(R"test(a = R"pb(key:value)pb";)test", Style));
+
+  // Don't update to canonical delimiter if it occurs as a raw string suffix in
+  // the raw string content.
+  expect_eq(R"test(a = R"pb(key: ")proto")pb";)test",
+            format(R"test(a = R"pb(key:")proto")pb";)test", Style));
+}
+
+} // end namespace
+} // end namespace format
+} // end namespace clang
diff --git a/unittests/Format/FormatTestTextProto.cpp b/unittests/Format/FormatTestTextProto.cpp
index 0a7bcdd..1102055 100644
--- a/unittests/Format/FormatTestTextProto.cpp
+++ b/unittests/Format/FormatTestTextProto.cpp
@@ -31,14 +31,18 @@
     return *Result;
   }
 
-  static std::string format(llvm::StringRef Code) {
-    FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
-    Style.ColumnLimit = 60; // To make writing tests easier.
+  static std::string format(llvm::StringRef Code, const FormatStyle &Style) {
     return format(Code, 0, Code.size(), Style);
   }
 
+  static void verifyFormat(llvm::StringRef Code, const FormatStyle &Style) {
+    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
+  }
+
   static void verifyFormat(llvm::StringRef Code) {
-    EXPECT_EQ(Code.str(), format(test::messUp(Code)));
+    FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
+    Style.ColumnLimit = 60; // To make writing tests easier.
+    verifyFormat(Code, Style);
   }
 };
 
@@ -49,9 +53,9 @@
 TEST_F(FormatTestTextProto, SupportsMessageFields) {
   verifyFormat("msg_field: {}");
 
-  verifyFormat("msg_field: {field_a: A}");
+  verifyFormat("msg_field: { field_a: A }");
 
-  verifyFormat("msg_field: {field_a: \"OK\" field_b: 123}");
+  verifyFormat("msg_field: { field_a: \"OK\" field_b: 123 }");
 
   verifyFormat("msg_field: {\n"
                "  field_a: 1\n"
@@ -63,9 +67,9 @@
 
   verifyFormat("msg_field {}");
 
-  verifyFormat("msg_field {field_a: A}");
+  verifyFormat("msg_field { field_a: A }");
 
-  verifyFormat("msg_field {field_a: \"OK\" field_b: 123}");
+  verifyFormat("msg_field { field_a: \"OK\" field_b: 123 }");
 
   verifyFormat("msg_field {\n"
                "  field_a: 1\n"
@@ -78,11 +82,11 @@
                "  field_h: 1234.567e-89\n"
                "}");
 
-  verifyFormat("msg_field: {msg_field {field_a: 1}}");
+  verifyFormat("msg_field: { msg_field { field_a: 1 } }");
 
   verifyFormat("id: \"ala.bala\"\n"
-               "item {type: ITEM_A rank: 1 score: 90.0}\n"
-               "item {type: ITEM_B rank: 2 score: 70.5}\n"
+               "item { type: ITEM_A rank: 1 score: 90.0 }\n"
+               "item { type: ITEM_B rank: 2 score: 70.5 }\n"
                "item {\n"
                "  type: ITEM_A\n"
                "  rank: 3\n"
@@ -102,24 +106,24 @@
   verifyFormat("field_a: OK\n"
                "field_b: \"OK\"\n"
                "field_c: \"OK\"\n"
-               "msg_field: {field_d: 123}\n"
+               "msg_field: { field_d: 123 }\n"
                "field_e: OK\n"
                "field_f: OK");
 
   verifyFormat("field_a: OK\n"
                "field_b: \"OK\"\n"
                "field_c: \"OK\"\n"
-               "msg_field: {field_d: 123 field_e: OK}");
+               "msg_field: { field_d: 123 field_e: OK }");
 
   verifyFormat("a: {\n"
                "  field_a: OK\n"
-               "  field_b {field_c: OK}\n"
+               "  field_b { field_c: OK }\n"
                "  field_d: OKOKOK\n"
                "  field_e: OK\n"
                "}");
 
   verifyFormat("field_a: OK,\n"
-               "field_b {field_c: OK},\n"
+               "field_b { field_c: OK },\n"
                "field_d: OKOKOK,\n"
                "field_e: OK");
 }
@@ -143,21 +147,21 @@
   // Single-line tests
   verifyFormat("msg_field <>");
   verifyFormat("msg_field: <>");
-  verifyFormat("msg_field <field_a: OK>");
-  verifyFormat("msg_field: <field_a: 123>");
-  verifyFormat("msg_field <field_a <>>");
-  verifyFormat("msg_field <field_a <field_b <>>>");
-  verifyFormat("msg_field: <field_a <field_b: <>>>");
-  verifyFormat("msg_field <field_a: OK, field_b: \"OK\">");
-  verifyFormat("msg_field <field_a: OK field_b: <>, field_c: OK>");
-  verifyFormat("msg_field <field_a {field_b: 1}, field_c: <field_d: 2>>");
-  verifyFormat("msg_field: <field_a: OK, field_b: \"OK\">");
-  verifyFormat("msg_field: <field_a: OK field_b: <>, field_c: OK>");
-  verifyFormat("msg_field: <field_a {field_b: 1}, field_c: <field_d: 2>>");
-  verifyFormat("field_a: \"OK\", msg_field: <field_b: 123>, field_c: {}");
-  verifyFormat("field_a <field_b: 1>, msg_field: <field_b: 123>, field_c <>");
-  verifyFormat("field_a <field_b: 1> msg_field: <field_b: 123> field_c <>");
-  verifyFormat("field <field <field: <>>, field <>> field: <field: 1>");
+  verifyFormat("msg_field < field_a: OK >");
+  verifyFormat("msg_field: < field_a: 123 >");
+  verifyFormat("msg_field < field_a <> >");
+  verifyFormat("msg_field < field_a < field_b <> > >");
+  verifyFormat("msg_field: < field_a < field_b: <> > >");
+  verifyFormat("msg_field < field_a: OK, field_b: \"OK\" >");
+  verifyFormat("msg_field < field_a: OK field_b: <>, field_c: OK >");
+  verifyFormat("msg_field < field_a { field_b: 1 }, field_c: < f_d: 2 > >");
+  verifyFormat("msg_field: < field_a: OK, field_b: \"OK\" >");
+  verifyFormat("msg_field: < field_a: OK field_b: <>, field_c: OK >");
+  verifyFormat("msg_field: < field_a { field_b: 1 }, field_c: < fd_d: 2 > >");
+  verifyFormat("field_a: \"OK\", msg_field: < field_b: 123 >, field_c: {}");
+  verifyFormat("field_a < field_b: 1 >, msg_fid: < fiel_b: 123 >, field_c <>");
+  verifyFormat("field_a < field_b: 1 > msg_fied: < field_b: 123 > field_c <>");
+  verifyFormat("field < field < field: <> >, field <> > field: < field: 1 >");
 
   // Multiple lines tests
   verifyFormat("msg_field <\n"
@@ -170,37 +174,37 @@
 
   verifyFormat("msg_field: <>\n"
                "field_c: \"OK\",\n"
-               "msg_field: <field_d: 123>\n"
+               "msg_field: < field_d: 123 >\n"
                "field_e: OK\n"
-               "msg_field: <field_d: 12>");
+               "msg_field: < field_d: 12 >");
 
   verifyFormat("field_a: OK,\n"
-               "field_b <field_c: OK>,\n"
-               "field_d: <12.5>,\n"
+               "field_b < field_c: OK >,\n"
+               "field_d: < 12.5 >,\n"
                "field_e: OK");
 
   verifyFormat("field_a: OK\n"
-               "field_b <field_c: OK>\n"
-               "field_d: <12.5>\n"
+               "field_b < field_c: OK >\n"
+               "field_d: < 12.5 >\n"
                "field_e: OKOKOK");
 
   verifyFormat("msg_field <\n"
                "  field_a: OK,\n"
-               "  field_b <field_c: OK>,\n"
-               "  field_d: <12.5>,\n"
+               "  field_b < field_c: OK >,\n"
+               "  field_d: < 12.5 >,\n"
                "  field_e: OK\n"
                ">");
 
   verifyFormat("msg_field <\n"
-               "  field_a: <field: OK>,\n"
-               "  field_b <field_c: OK>,\n"
-               "  field_d: <12.5>,\n"
+               "  field_a: < field: OK >,\n"
+               "  field_b < field_c: OK >,\n"
+               "  field_d: < 12.5 >,\n"
                "  field_e: OK,\n"
                ">");
 
   verifyFormat("msg_field: <\n"
                "  field_a: \"OK\"\n"
-               "  msg_field: {field_b: OK}\n"
+               "  msg_field: { field_b: OK }\n"
                "  field_g: OK\n"
                "  field_g: OK\n"
                "  field_g: OK\n"
@@ -208,86 +212,244 @@
 
   verifyFormat("field_a {\n"
                "  field_d: ok\n"
-               "  field_b: <field_c: 1>\n"
+               "  field_b: < field_c: 1 >\n"
                "  field_d: ok\n"
                "  field_d: ok\n"
                "}");
 
   verifyFormat("field_a: {\n"
                "  field_d: ok\n"
-               "  field_b: <field_c: 1>\n"
+               "  field_b: < field_c: 1 >\n"
                "  field_d: ok\n"
                "  field_d: ok\n"
                "}");
 
-  verifyFormat("field_a: <f1: 1, f2: <>>\n"
+  verifyFormat("field_a: < f1: 1, f2: <> >\n"
                "field_b <\n"
                "  field_b1: <>\n"
                "  field_b2: ok,\n"
                "  field_b3: <\n"
                "    field_x {}  // Comment\n"
-               "    field_y: {field_z: 1}\n"
+               "    field_y: { field_z: 1 }\n"
                "    field_w: ok\n"
                "  >\n"
                "  field {\n"
                "    field_x <>  // Comment\n"
-               "    field_y: <field_z: 1>\n"
+               "    field_y: < field_z: 1 >\n"
                "    field_w: ok\n"
                "    msg_field: <\n"
                "      field: <>\n"
-               "      field: <field: 1>\n"
-               "      field: <field: 2>\n"
-               "      field: <field: 3>\n"
-               "      field: <field: 4>\n"
+               "      field: < field: 1 >\n"
+               "      field: < field: 2 >\n"
+               "      field: < field: 3 >\n"
+               "      field: < field: 4 >\n"
                "      field: ok\n"
                "    >\n"
                "  }\n"
                ">\n"
                "field: OK,\n"
-               "field_c <field <field <>>>");
+               "field_c < field < field <> > >");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "head_id: 1\n"
-               "data <key: value>");
+               "data < key: value >");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "head_id: 1\n"
-               "data <key: value>\n"
+               "data < key: value >\n"
                "tail_id: 2");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "head_id: 1\n"
-               "data <key: value>\n"
-               "data {key: value}");
+               "data < key: value >\n"
+               "data { key: value }");
 
   verifyFormat("app {\n"
                "  app_id: 'com.javax.swing.salsa.latino'\n"
                "  head_id: 1\n"
-               "  data <key: value>\n"
+               "  data < key: value >\n"
                "}");
 
   verifyFormat("app: {\n"
                "  app_id: 'com.javax.swing.salsa.latino'\n"
                "  head_id: 1\n"
-               "  data <key: value>\n"
+               "  data < key: value >\n"
                "}");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "headheadheadheadheadhead_id: 1\n"
-               "product_data {product {1}}");
+               "product_data { product { 1 } }");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "headheadheadheadheadhead_id: 1\n"
-               "product_data <product {1}>");
+               "product_data < product { 1 } >");
 
   verifyFormat("app_id: 'com.javax.swing.salsa.latino'\n"
                "headheadheadheadheadhead_id: 1\n"
-               "product_data <product <1>>");
+               "product_data < product < 1 > >");
 
   verifyFormat("app <\n"
                "  app_id: 'com.javax.swing.salsa.latino'\n"
                "  headheadheadheadheadhead_id: 1\n"
-               "  product_data <product {1}>\n"
+               "  product_data < product { 1 } >\n"
+               ">");
+
+  verifyFormat("dcccwrnfioeruvginerurneitinfo {\n"
+               "  exte3nsionrnfvui { key: value }\n"
+               "}");
+}
+
+TEST_F(FormatTestTextProto, DiscardsUnbreakableTailIfCanBreakAfter) {
+  // The two closing braces count towards the string UnbreakableTailLength, but
+  // since we have broken after the corresponding opening braces, we don't
+  // consider that length for string breaking.
+  verifyFormat(
+      "foo: {\n"
+      "  bar: {\n"
+      "    text: \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
+      "  }\n"
+      "}");
+}
+
+TEST_F(FormatTestTextProto, KeepsLongStringLiteralsOnSameLine) {
+  verifyFormat(
+      "foo: {\n"
+      "  text: \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaasaaaaaaaaaa\"\n"
+      "}");
+}
+
+TEST_F(FormatTestTextProto, KeepsCommentsIndentedInList) {
+  verifyFormat("aaaaaaaaaa: 100\n"
+               "bbbbbbbbbbbbbbbbbbbbbbbbbbb: 200\n"
+               "# Single line comment for stuff here.\n"
+               "cccccccccccccccccccccccc: 3849\n"
+               "# Multiline comment for stuff here.\n"
+               "# Multiline comment for stuff here.\n"
+               "# Multiline comment for stuff here.\n"
+               "cccccccccccccccccccccccc: 3849");
+}
+
+TEST_F(FormatTestTextProto, FormatsExtensions) {
+  verifyFormat("[type] { key: value }");
+  verifyFormat("[type] {\n"
+               "  keyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy: value\n"
+               "}");
+  verifyFormat("[type.type] { key: value }");
+  verifyFormat("[type.type] < key: value >");
+  verifyFormat("[type.type/type.type] { key: value }");
+  verifyFormat("msg {\n"
+               "  [type.type] { key: value }\n"
+               "}");
+  verifyFormat("msg {\n"
+               "  [type.type] {\n"
+               "    keyyyyyyyyyyyyyy: valuuuuuuuuuuuuuuuuuuuuuuuuue\n"
+               "  }\n"
+               "}");
+  verifyFormat("key: value\n"
+               "[a.b] { key: value }");
+  verifyFormat("msg: <\n"
+               "  key: value\n"
+               "  [a.b.c/d.e]: < key: value >\n"
+               "  [f.g]: <\n"
+               "    key: valueeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\n"
+               "    key: {}\n"
+               "  >\n"
+               "  key {}\n"
+               "  [h.i.j] < key: value >\n"
+               "  [a]: {\n"
+               "    [b.c]: {}\n"
+               "    [d] <>\n"
+               "    [e/f]: 1\n"
+               "  }\n"
+               ">");
+  verifyFormat("[longg.long.long.long.long.long.long.long.long.long.long\n"
+               "     .longg.longlong] { key: value }");
+  verifyFormat("[longg.long.long.long.long.long.long.long.long.long.long\n"
+               "     .longg.longlong] {\n"
+               "  key: value\n"
+               "  key: value\n"
+               "  key: value\n"
+               "  key: value\n"
+               "}");
+  verifyFormat("[longg.long.long.long.long.long.long.long.long.long\n"
+               "     .long/longg.longlong] { key: value }");
+  verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/\n"
+               " bbbbbbbbbbbbbb] { key: value }");
+  verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
+               "] { key: value }");
+  verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
+               "] {\n"
+               "  [type.type] {\n"
+               "    keyyyyyyyyyyyyyy: valuuuuuuuuuuuuuuuuuuuuuuuuue\n"
+               "  }\n"
+               "}");
+  verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/\n"
+               " bbbbbbb] {\n"
+               "  [type.type] {\n"
+               "    keyyyyyyyyyyyyyy: valuuuuuuuuuuuuuuuuuuuuuuuuue\n"
+               "  }\n"
+               "}");
+  verifyFormat(
+      "aaaaaaaaaaaaaaa {\n"
+      "  bbbbbb {\n"
+      "    [a.b/cy] {\n"
+      "      eeeeeeeeeeeee: \"The lazy coo cat jumps over the lazy hot dog\"\n"
+      "    }\n"
+      "  }\n"
+      "}");
+}
+
+TEST_F(FormatTestTextProto, NoSpaceAfterPercent) {
+  verifyFormat("key: %d");
+}
+
+TEST_F(FormatTestTextProto, FormatsRepeatedListInitializers) {
+  verifyFormat("keys: []");
+  verifyFormat("keys: [ 1 ]");
+  verifyFormat("keys: [ 'ala', 'bala' ]");
+  verifyFormat("keys:\n"
+               "    [ 'ala', 'bala', 'porto', 'kala', 'too', 'long', 'ng' ]");
+  verifyFormat("key: item\n"
+               "keys: [\n"
+               "  'ala',\n"
+               "  'bala',\n"
+               "  'porto',\n"
+               "  'kala',\n"
+               "  'too',\n"
+               "  'long',\n"
+               "  'long',\n"
+               "  'long'\n"
+               "]\n"
+               "key: item\n"
+               "msg {\n"
+               "  key: item\n"
+               "  keys: [\n"
+               "    'ala',\n"
+               "    'bala',\n"
+               "    'porto',\n"
+               "    'kala',\n"
+               "    'too',\n"
+               "    'long',\n"
+               "    'long'\n"
+               "  ]\n"
+               "}\n"
+               "key: value"
+               );
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
+  Style.ColumnLimit = 60; // To make writing tests easier.
+  Style.Cpp11BracedListStyle = true;
+  verifyFormat("keys: [1]", Style);
+}
+
+TEST_F(FormatTestTextProto, AcceptsOperatorAsKey) {
+  verifyFormat("aaaaaaaaaaa: <\n"
+               "  bbbbbbbbb: <\n"
+               "    ccccccccccccccccccccccc: <\n"
+               "      operator: 1\n"
+               "      operator: 2\n"
+               "      operator { key: value }\n"
+               "    >\n"
+               "  >\n"
                ">");
 }
 } // end namespace tooling
diff --git a/unittests/Format/SortIncludesTest.cpp b/unittests/Format/SortIncludesTest.cpp
index ff39887..09fc070 100644
--- a/unittests/Format/SortIncludesTest.cpp
+++ b/unittests/Format/SortIncludesTest.cpp
@@ -77,6 +77,28 @@
   EXPECT_TRUE(sortIncludes(Style, Code, GetCodeRange(Code), "a.cc").empty());
 }
 
+TEST_F(SortIncludesTest, SortedIncludesInMultipleBlocksAreMerged) {
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "\n"
+                 "\n"
+                 "#include \"b.h\"\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "\n"
+                 "\n"
+                 "#include \"b.h\"\n"));
+}
+
 TEST_F(SortIncludesTest, SupportClangFormatOff) {
   EXPECT_EQ("#include <a>\n"
             "#include <b>\n"
@@ -159,6 +181,48 @@
                  "#include \"b.h\"\n"));
 }
 
+TEST_F(SortIncludesTest, SortsAllBlocksWhenMerging) {
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "\n"
+                 "#include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, CommentsAlwaysSeparateGroups) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "// comment\n"
+            "#include \"b.h\"\n",
+            sort("#include \"c.h\"\n"
+                 "#include \"a.h\"\n"
+                 "// comment\n"
+                 "#include \"b.h\"\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "// comment\n"
+            "#include \"b.h\"\n",
+            sort("#include \"c.h\"\n"
+                 "#include \"a.h\"\n"
+                 "// comment\n"
+                 "#include \"b.h\"\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "// comment\n"
+            "#include \"b.h\"\n",
+            sort("#include \"c.h\"\n"
+                 "#include \"a.h\"\n"
+                 "// comment\n"
+                 "#include \"b.h\"\n"));
+}
+
 TEST_F(SortIncludesTest, HandlesAngledIncludesAsSeparateBlocks) {
   EXPECT_EQ("#include \"a.h\"\n"
             "#include \"c.h\"\n"
@@ -180,6 +244,19 @@
                  "#include \"a.h\"\n"));
 }
 
+TEST_F(SortIncludesTest, RegroupsAngledIncludesInSeparateBlocks) {
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "\n"
+            "#include <b.h>\n"
+            "#include <d.h>\n",
+            sort("#include <d.h>\n"
+                 "#include <b.h>\n"
+                 "#include \"c.h\"\n"
+                 "#include \"a.h\"\n"));
+}
+
 TEST_F(SortIncludesTest, HandlesMultilineIncludes) {
   EXPECT_EQ("#include \"a.h\"\n"
             "#include \"b.h\"\n"
@@ -266,6 +343,35 @@
                  "a.cc"));
 }
 
+TEST_F(SortIncludesTest, RecognizeMainHeaderInAllGroups) {
+  Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+
+  EXPECT_EQ("#include \"c.h\"\n"
+            "#include \"a.h\"\n"
+            "#include \"b.h\"\n",
+            sort("#include \"b.h\"\n"
+                 "\n"
+                 "#include \"a.h\"\n"
+                 "#include \"c.h\"\n",
+                 "c.cc"));
+}
+
+TEST_F(SortIncludesTest, MainHeaderIsSeparatedWhenRegroupping) {
+  Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+
+  EXPECT_EQ("#include \"a.h\"\n"
+            "\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"b.h\"\n"
+                 "\n"
+                 "#include \"a.h\"\n"
+                 "#include \"c.h\"\n",
+                 "a.cc"));
+}
+
 TEST_F(SortIncludesTest, SupportCaseInsensitiveMatching) {
   // Setup an regex for main includes so we can cover those as well.
   Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
@@ -309,6 +415,34 @@
                  "c_main.cc"));
 }
 
+TEST_F(SortIncludesTest, PriorityGroupsAreSeparatedWhenRegroupping) {
+  Style.IncludeCategories = {{".*important_os_header.*", -1}, {".*", 1}};
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+
+  EXPECT_EQ("#include \"important_os_header.h\"\n"
+            "\n"
+            "#include \"c_main.h\"\n"
+            "\n"
+            "#include \"a_other.h\"\n",
+            sort("#include \"c_main.h\"\n"
+                 "#include \"a_other.h\"\n"
+                 "#include \"important_os_header.h\"\n",
+                 "c_main.cc"));
+
+  // check stable when re-run
+  EXPECT_EQ("#include \"important_os_header.h\"\n"
+            "\n"
+            "#include \"c_main.h\"\n"
+            "\n"
+            "#include \"a_other.h\"\n",
+            sort("#include \"important_os_header.h\"\n"
+                 "\n"
+                 "#include \"c_main.h\"\n"
+                 "\n"
+                 "#include \"a_other.h\"\n",
+                 "c_main.cc"));
+}
+
 TEST_F(SortIncludesTest, CalculatesCorrectCursorPosition) {
   std::string Code = "#include <ccc>\n"    // Start of line: 0
                      "#include <bbbbbb>\n" // Start of line: 15
@@ -332,6 +466,30 @@
                  "#include <b>\n"
                  "#include <b>\n"
                  "#include <c>\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+  EXPECT_EQ("#include <a>\n"
+            "#include <b>\n"
+            "#include <c>\n",
+            sort("#include <a>\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "#include <c>\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+  EXPECT_EQ("#include <a>\n"
+            "#include <b>\n"
+            "#include <c>\n",
+            sort("#include <a>\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "#include <c>\n"));
 }
 
 TEST_F(SortIncludesTest, SortAndDeduplicateIncludes) {
@@ -344,6 +502,30 @@
                  "#include <b>\n"
                  "#include <c>\n"
                  "#include <b>\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Merge;
+  EXPECT_EQ("#include <a>\n"
+            "#include <b>\n"
+            "#include <c>\n",
+            sort("#include <b>\n"
+                 "#include <a>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <c>\n"
+                 "#include <b>\n"));
+
+  Style.IncludeBlocks = FormatStyle::IBS_Regroup;
+  EXPECT_EQ("#include <a>\n"
+            "#include <b>\n"
+            "#include <c>\n",
+            sort("#include <b>\n"
+                 "#include <a>\n"
+                 "\n"
+                 "#include <b>\n"
+                 "\n"
+                 "#include <c>\n"
+                 "#include <b>\n"));
 }
 
 TEST_F(SortIncludesTest, CalculatesCorrectCursorPositionAfterDeduplicate) {
diff --git a/unittests/Format/UsingDeclarationsSorterTest.cpp b/unittests/Format/UsingDeclarationsSorterTest.cpp
index caa2f43..c4d9713 100644
--- a/unittests/Format/UsingDeclarationsSorterTest.cpp
+++ b/unittests/Format/UsingDeclarationsSorterTest.cpp
@@ -50,8 +50,8 @@
             "using aa;",
             sortUsingDeclarations("using aa;\n"
                                   "using a;"));
-  EXPECT_EQ("using ::a;\n"
-            "using a;",
+  EXPECT_EQ("using a;\n"
+            "using ::a;",
             sortUsingDeclarations("using a;\n"
                                   "using ::a;"));
 
@@ -86,7 +86,7 @@
                                   "using a, b;"));
 }
 
-TEST_F(UsingDeclarationsSorterTest, SortsCaseInsensitively) {
+TEST_F(UsingDeclarationsSorterTest, UsingDeclarationOrder) {
   EXPECT_EQ("using A;\n"
             "using a;",
             sortUsingDeclarations("using A;\n"
@@ -99,6 +99,23 @@
             "using B;",
             sortUsingDeclarations("using B;\n"
                                   "using a;"));
+
+  // Ignores leading '::'.
+  EXPECT_EQ("using ::a;\n"
+            "using A;",
+            sortUsingDeclarations("using ::a;\n"
+                                  "using A;"));
+
+  EXPECT_EQ("using ::A;\n"
+            "using a;",
+            sortUsingDeclarations("using ::A;\n"
+                                  "using a;"));
+
+  // Sorts '_' before 'a' and 'A'.
+  EXPECT_EQ("using _;\n"
+            "using A;",
+            sortUsingDeclarations("using A;\n"
+                                  "using _;"));
   EXPECT_EQ("using _;\n"
             "using a;",
             sortUsingDeclarations("using a;\n"
@@ -108,13 +125,16 @@
             sortUsingDeclarations("using a::a;\n"
                                   "using a::_;"));
 
+  // Sorts non-namespace names before namespace names at the same level.
   EXPECT_EQ("using ::testing::_;\n"
             "using ::testing::Aardvark;\n"
-            "using ::testing::apple::Honeycrisp;\n"
+            "using ::testing::kMax;\n"
             "using ::testing::Xylophone;\n"
+            "using ::testing::apple::Honeycrisp;\n"
             "using ::testing::zebra::Stripes;",
             sortUsingDeclarations("using ::testing::Aardvark;\n"
                                   "using ::testing::Xylophone;\n"
+                                  "using ::testing::kMax;\n"
                                   "using ::testing::_;\n"
                                   "using ::testing::apple::Honeycrisp;\n"
                                   "using ::testing::zebra::Stripes;"));
@@ -122,7 +142,6 @@
 
 TEST_F(UsingDeclarationsSorterTest, SortsStably) {
   EXPECT_EQ("using a;\n"
-            "using a;\n"
             "using A;\n"
             "using a;\n"
             "using A;\n"
@@ -131,11 +150,8 @@
             "using a;\n"
             "using B;\n"
             "using b;\n"
-            "using b;\n"
             "using B;\n"
             "using b;\n"
-            "using b;\n"
-            "using b;\n"
             "using B;\n"
             "using b;",
             sortUsingDeclarations("using a;\n"
@@ -170,14 +186,14 @@
                                   "using c;"));
 
   EXPECT_EQ("#include <iostream>\n"
-            "using ::std::endl;\n"
             "using std::cin;\n"
             "using std::cout;\n"
+            "using ::std::endl;\n"
             "int main();",
             sortUsingDeclarations("#include <iostream>\n"
                                   "using std::cout;\n"
-                                  "using std::cin;\n"
                                   "using ::std::endl;\n"
+                                  "using std::cin;\n"
                                   "int main();"));
 }
 
@@ -328,6 +344,32 @@
                                   {tooling::Range(19, 1)}));
 }
 
+TEST_F(UsingDeclarationsSorterTest, SortsUsingDeclarationsWithLeadingkComments) {
+  EXPECT_EQ("/* comment */ using a;\n"
+            "/* comment */ using b;",
+            sortUsingDeclarations("/* comment */ using b;\n"
+                                  "/* comment */ using a;"));
+}
+
+TEST_F(UsingDeclarationsSorterTest, DeduplicatesUsingDeclarations) {
+  EXPECT_EQ("using a;\n"
+            "using b;\n"
+            "using c;\n"
+            "\n"
+            "using a;\n"
+            "using e;",
+            sortUsingDeclarations("using c;\n"
+                                  "using a;\n"
+                                  "using b;\n"
+                                  "using a;\n"
+                                  "using b;\n"
+                                  "\n"
+                                  "using e;\n"
+                                  "using a;\n"
+                                  "using e;"));
+
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
diff --git a/unittests/Frontend/CMakeLists.txt b/unittests/Frontend/CMakeLists.txt
index c1f4f18..c764fb9 100644
--- a/unittests/Frontend/CMakeLists.txt
+++ b/unittests/Frontend/CMakeLists.txt
@@ -9,12 +9,15 @@
   CodeGenActionTest.cpp
   ParsedSourceLocationTest.cpp
   PCHPreambleTest.cpp
+  OutputStreamTest.cpp
   )
 target_link_libraries(FrontendTests
+  PRIVATE
   clangAST
   clangBasic
   clangFrontend
   clangLex
   clangSema
   clangCodeGen
+  clangFrontendTool
   )
diff --git a/unittests/Frontend/OutputStreamTest.cpp b/unittests/Frontend/OutputStreamTest.cpp
new file mode 100644
index 0000000..ff03650
--- /dev/null
+++ b/unittests/Frontend/OutputStreamTest.cpp
@@ -0,0 +1,46 @@
+//===- unittests/Frontend/OutputStreamTest.cpp --- FrontendAction tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/BackendUtil.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/FrontendTool/Utils.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::frontend;
+
+namespace {
+
+TEST(FrontendOutputTests, TestOutputStream) {
+  auto Invocation = std::make_shared<CompilerInvocation>();
+  Invocation->getPreprocessorOpts().addRemappedFile(
+      "test.cc", MemoryBuffer::getMemBuffer("").release());
+  Invocation->getFrontendOpts().Inputs.push_back(
+      FrontendInputFile("test.cc", InputKind::CXX));
+  Invocation->getFrontendOpts().ProgramAction = EmitBC;
+  Invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu";
+  CompilerInstance Compiler;
+
+  SmallVector<char, 256> IRBuffer;
+  std::unique_ptr<raw_pwrite_stream> IRStream(
+      new raw_svector_ostream(IRBuffer));
+
+  Compiler.setOutputStream(std::move(IRStream));
+  Compiler.setInvocation(std::move(Invocation));
+  Compiler.createDiagnostics();
+
+  bool Success = ExecuteCompilerInvocation(&Compiler);
+  EXPECT_TRUE(Success);
+  EXPECT_TRUE(!IRBuffer.empty());
+  EXPECT_TRUE(StringRef(IRBuffer.data()).startswith("BC"));
+}
+}
diff --git a/unittests/Lex/CMakeLists.txt b/unittests/Lex/CMakeLists.txt
index ef0f06c..bb0f66d 100644
--- a/unittests/Lex/CMakeLists.txt
+++ b/unittests/Lex/CMakeLists.txt
@@ -4,12 +4,14 @@
 
 add_clang_unittest(LexTests
   HeaderMapTest.cpp
+  HeaderSearchTest.cpp
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
   )
 
 target_link_libraries(LexTests
+  PRIVATE
   clangAST
   clangBasic
   clangLex
diff --git a/unittests/Lex/HeaderSearchTest.cpp b/unittests/Lex/HeaderSearchTest.cpp
new file mode 100644
index 0000000..c2794f5
--- /dev/null
+++ b/unittests/Lex/HeaderSearchTest.cpp
@@ -0,0 +1,96 @@
+//===- unittests/Lex/HeaderSearchTest.cpp ------ HeaderSearch tests -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/MemoryBufferCache.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace {
+
+// The test fixture.
+class HeaderSearchTest : public ::testing::Test {
+protected:
+  HeaderSearchTest()
+      : VFS(new vfs::InMemoryFileSystem), FileMgr(FileMgrOpts, VFS),
+        DiagID(new DiagnosticIDs()),
+        Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions),
+        Search(std::make_shared<HeaderSearchOptions>(), SourceMgr, Diags,
+               LangOpts, Target.get()) {
+    TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
+    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  void addSearchDir(llvm::StringRef Dir) {
+    VFS->addFile(Dir, 0, llvm::MemoryBuffer::getMemBuffer(""), /*User=*/None,
+                 /*Group=*/None, llvm::sys::fs::file_type::directory_file);
+    const DirectoryEntry *DE = FileMgr.getDirectory(Dir);
+    assert(DE);
+    auto DL = DirectoryLookup(DE, SrcMgr::C_User, /*isFramework=*/false);
+    Search.AddSearchPath(DL, /*isAngled=*/false);
+  }
+
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS;
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr<TargetOptions> TargetOpts;
+  IntrusiveRefCntPtr<TargetInfo> Target;
+  HeaderSearch Search;
+};
+
+TEST_F(HeaderSearchTest, NoSearchDir) {
+  EXPECT_EQ(Search.search_dir_size(), 0u);
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/x/y/z", /*WorkingDir=*/""),
+            "/x/y/z");
+}
+
+TEST_F(HeaderSearchTest, SimpleShorten) {
+  addSearchDir("/x");
+  addSearchDir("/x/y");
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/x/y/z", /*WorkingDir=*/""),
+            "z");
+  addSearchDir("/a/b/");
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/a/b/c", /*WorkingDir=*/""),
+            "c");
+}
+
+TEST_F(HeaderSearchTest, ShortenWithWorkingDir) {
+  addSearchDir("x/y");
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/a/b/c/x/y/z",
+                                                   /*WorkingDir=*/"/a/b/c"),
+            "z");
+}
+
+TEST_F(HeaderSearchTest, Dots) {
+  addSearchDir("/x/./y/");
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/x/y/./z",
+                                                   /*WorkingDir=*/""),
+            "z");
+  addSearchDir("a/.././c/");
+  EXPECT_EQ(Search.suggestPathToFileForDiagnostics("/m/n/./c/z",
+                                                   /*WorkingDir=*/"/m/n/"),
+            "z");
+}
+
+} // namespace
+} // namespace clang
diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp
index 894f8c7..216672a 100644
--- a/unittests/Lex/LexerTest.cpp
+++ b/unittests/Lex/LexerTest.cpp
@@ -37,7 +37,7 @@
       DiagID(new DiagnosticIDs()),
       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
       SourceMgr(Diags, FileMgr),
-      TargetOpts(new TargetOptions) 
+      TargetOpts(new TargetOptions)
   {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
@@ -474,8 +474,47 @@
 }
 
 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
-  std::vector<Token> LexedTokens = Lex("  //  \\\n");
-  EXPECT_TRUE(LexedTokens.empty());
+  EXPECT_TRUE(Lex("  //  \\\n").empty());
+  EXPECT_TRUE(Lex("#include <\\\\").empty());
+  EXPECT_TRUE(Lex("#include <\\\\\n").empty());
+}
+
+TEST_F(LexerTest, StringizingRasString) {
+  // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
+  std::string String1 = R"(foo
+    {"bar":[]}
+    baz)";
+  // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
+  SmallString<128> String2;
+  String2 += String1.c_str();
+
+  // Corner cases.
+  std::string String3 = R"(\
+    \n
+    \\n
+    \\)";
+  SmallString<128> String4;
+  String4 += String3.c_str();
+  std::string String5 = R"(a\
+
+
+    \\b)";
+  SmallString<128> String6;
+  String6 += String5.c_str();
+
+  String1 = Lexer::Stringify(StringRef(String1));
+  Lexer::Stringify(String2);
+  String3 = Lexer::Stringify(StringRef(String3));
+  Lexer::Stringify(String4);
+  String5 = Lexer::Stringify(StringRef(String5));
+  Lexer::Stringify(String6);
+
+  EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
+  EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
+  EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
+  EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
+  EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
+  EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
 }
 
 } // anonymous namespace
diff --git a/unittests/Rename/CMakeLists.txt b/unittests/Rename/CMakeLists.txt
index 1c50fde..b625a7a 100644
--- a/unittests/Rename/CMakeLists.txt
+++ b/unittests/Rename/CMakeLists.txt
@@ -9,10 +9,12 @@
   RenameClassTest.cpp
   RenameEnumTest.cpp
   RenameAliasTest.cpp
+  RenameMemberTest.cpp
   RenameFunctionTest.cpp
   )
 
 target_link_libraries(ClangRenameTests
+  PRIVATE
   clangAST
   clangASTMatchers
   clangBasic
diff --git a/unittests/Rename/RenameFunctionTest.cpp b/unittests/Rename/RenameFunctionTest.cpp
index ef84b4b..b27bbe2 100644
--- a/unittests/Rename/RenameFunctionTest.cpp
+++ b/unittests/Rename/RenameFunctionTest.cpp
@@ -220,6 +220,25 @@
   CompareSnippets(Expected, After);
 }
 
+TEST_F(RenameFunctionTest, RenameTemplateFunctions) {
+  std::string Before = R"(
+      namespace na {
+      template<typename T> T X();
+      }
+      namespace na { void f() { X<int>(); } }
+      namespace nb { void g() { na::X          <int>(); } }
+      )";
+  std::string Expected = R"(
+      namespace na {
+      template<typename T> T Y();
+      }
+      namespace na { void f() { nb::Y<int>(); } }
+      namespace nb { void g() { Y<int>(); } }
+      )";
+  std::string After = runClangRenameOnCode(Before, "na::X", "nb::Y");
+  CompareSnippets(Expected, After);
+}
+
 TEST_F(RenameFunctionTest, RenameOutOfLineFunctionDecls) {
   std::string Before = R"(
       namespace na {
diff --git a/unittests/Rename/RenameMemberTest.cpp b/unittests/Rename/RenameMemberTest.cpp
new file mode 100644
index 0000000..fb8d558
--- /dev/null
+++ b/unittests/Rename/RenameMemberTest.cpp
@@ -0,0 +1,229 @@
+//===-- ClangMemberTests.cpp - unit tests for renaming class members ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangRenameTest.h"
+
+namespace clang {
+namespace clang_rename {
+namespace test {
+namespace {
+
+class RenameMemberTest : public ClangRenameTest {
+public:
+  RenameMemberTest() {
+    AppendToHeader(R"(
+        struct NA {
+          void Foo();
+          void NotFoo();
+          static void SFoo();
+          static void SNotFoo();
+          int Moo;
+        };
+        struct A {
+          virtual void Foo();
+          void NotFoo();
+          static void SFoo();
+          static void SNotFoo();
+          int Moo;
+          int NotMoo;
+          static int SMoo;
+        };
+        struct B : public A {
+          void Foo() override;
+        };
+        template <typename T> struct TA {
+          T* Foo();
+          T* NotFoo();
+          static T* SFoo();
+          static T* NotSFoo();
+        };
+        template <typename T> struct TB : public TA<T> {};
+        namespace ns {
+          template <typename T> struct TA {
+            T* Foo();
+            T* NotFoo();
+            static T* SFoo();
+            static T* NotSFoo();
+            static int SMoo;
+          };
+          template <typename T> struct TB : public TA<T> {};
+          struct A {
+            void Foo();
+            void NotFoo();
+            static void SFoo();
+            static void SNotFoo();
+          };
+          struct B : public A {};
+          struct C {
+            template <class T>
+            void SFoo(const T& t) {}
+            template <class T>
+            void Foo() {}
+          };
+        })");
+  }
+};
+
+INSTANTIATE_TEST_CASE_P(
+    DISABLED_RenameTemplatedClassStaticVariableTest, RenameMemberTest,
+    testing::ValuesIn(std::vector<Case>({
+        // FIXME: support renaming static variables for template classes.
+        {"void f() { ns::TA<int>::SMoo; }",
+         "void f() { ns::TA<int>::SMeh; }", "ns::TA::SMoo", "ns::TA::SMeh"},
+    })), );
+
+INSTANTIATE_TEST_CASE_P(
+    RenameMemberTest, RenameMemberTest,
+    testing::ValuesIn(std::vector<Case>({
+        // Normal methods and fields.
+        {"void f() { A a; a.Foo(); }", "void f() { A a; a.Bar(); }", "A::Foo",
+         "A::Bar"},
+        {"void f() { ns::A a; a.Foo(); }", "void f() { ns::A a; a.Bar(); }",
+         "ns::A::Foo", "ns::A::Bar"},
+        {"void f() { A a; int x = a.Moo; }", "void f() { A a; int x = a.Meh; }",
+         "A::Moo", "A::Meh"},
+        {"void f() { B b; b.Foo(); }", "void f() { B b; b.Bar(); }", "B::Foo",
+         "B::Bar"},
+        {"void f() { ns::B b; b.Foo(); }", "void f() { ns::B b; b.Bar(); }",
+         "ns::A::Foo", "ns::A::Bar"},
+        {"void f() { B b; int x = b.Moo; }", "void f() { B b; int x = b.Meh; }",
+         "A::Moo", "A::Meh"},
+
+        // Static methods.
+        {"void f() { A::SFoo(); }", "void f() { A::SBar(); }", "A::SFoo",
+         "A::SBar"},
+        {"void f() { ns::A::SFoo(); }", "void f() { ns::A::SBar(); }",
+         "ns::A::SFoo", "ns::A::SBar"},
+        {"void f() { TA<int>::SFoo(); }", "void f() { TA<int>::SBar(); }",
+         "TA::SFoo", "TA::SBar"},
+        {"void f() { ns::TA<int>::SFoo(); }",
+         "void f() { ns::TA<int>::SBar(); }", "ns::TA::SFoo", "ns::TA::SBar"},
+
+        // Static variables.
+        {"void f() { A::SMoo; }",
+         "void f() { A::SMeh; }", "A::SMoo", "A::SMeh"},
+
+        // Templated methods.
+        {"void f() { TA<int> a; a.Foo(); }", "void f() { TA<int> a; a.Bar(); }",
+         "TA::Foo", "TA::Bar"},
+        {"void f() { ns::TA<int> a; a.Foo(); }",
+         "void f() { ns::TA<int> a; a.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"},
+        {"void f() { TB<int> b; b.Foo(); }", "void f() { TB<int> b; b.Bar(); }",
+         "TA::Foo", "TA::Bar"},
+        {"void f() { ns::TB<int> b; b.Foo(); }",
+         "void f() { ns::TB<int> b; b.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"},
+        {"void f() { ns::C c; int x; c.SFoo(x); }",
+         "void f() { ns::C c; int x; c.SBar(x); }", "ns::C::SFoo",
+         "ns::C::SBar"},
+        {"void f() { ns::C c; c.Foo<int>(); }",
+         "void f() { ns::C c; c.Bar<int>(); }", "ns::C::Foo", "ns::C::Bar"},
+
+        // Pointers to methods.
+        {"void f() { auto p = &A::Foo; }", "void f() { auto p = &A::Bar; }",
+         "A::Foo", "A::Bar"},
+        {"void f() { auto p = &A::SFoo; }", "void f() { auto p = &A::SBar; }",
+         "A::SFoo", "A::SBar"},
+        {"void f() { auto p = &B::Foo; }", "void f() { auto p = &B::Bar; }",
+         "B::Foo", "B::Bar"},
+        {"void f() { auto p = &ns::A::Foo; }",
+         "void f() { auto p = &ns::A::Bar; }", "ns::A::Foo", "ns::A::Bar"},
+        {"void f() { auto p = &ns::A::SFoo; }",
+         "void f() { auto p = &ns::A::SBar; }", "ns::A::SFoo", "ns::A::SBar"},
+        {"void f() { auto p = &ns::C::SFoo<int>; }",
+         "void f() { auto p = &ns::C::SBar<int>; }", "ns::C::SFoo",
+         "ns::C::SBar"},
+
+        // These methods are not declared or overridden in the subclass B, we
+        // have to use the qualified name with parent class A to identify them.
+        {"void f() { auto p = &ns::B::Foo; }",
+         "void f() { auto p = &ns::B::Bar; }", "ns::A::Foo", "ns::B::Bar"},
+        {"void f() { B::SFoo(); }", "void f() { B::SBar(); }", "A::SFoo",
+         "B::SBar"},
+        {"void f() { ns::B::SFoo(); }", "void f() { ns::B::SBar(); }",
+         "ns::A::SFoo", "ns::B::SBar"},
+        {"void f() { auto p = &B::SFoo; }", "void f() { auto p = &B::SBar; }",
+         "A::SFoo", "B::SBar"},
+        {"void f() { auto p = &ns::B::SFoo; }",
+         "void f() { auto p = &ns::B::SBar; }", "ns::A::SFoo", "ns::B::SBar"},
+        {"void f() { TB<int>::SFoo(); }", "void f() { TB<int>::SBar(); }",
+         "TA::SFoo", "TB::SBar"},
+        {"void f() { ns::TB<int>::SFoo(); }",
+         "void f() { ns::TB<int>::SBar(); }", "ns::TA::SFoo", "ns::TB::SBar"},
+    })), );
+
+TEST_P(RenameMemberTest, RenameMembers) {
+  auto Param = GetParam();
+  assert(!Param.OldName.empty());
+  assert(!Param.NewName.empty());
+  std::string Actual =
+      runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
+  CompareSnippets(Param.After, Actual);
+}
+
+TEST_F(RenameMemberTest, RenameMemberInsideClassMethods) {
+  std::string Before = R"(
+      struct X {
+        int Moo;
+        void Baz() { Moo = 1; }
+      };)";
+  std::string Expected = R"(
+      struct X {
+        int Meh;
+        void Baz() { Meh = 1; }
+      };)";
+  std::string After = runClangRenameOnCode(Before, "X::Moo", "Y::Meh");
+  CompareSnippets(Expected, After);
+}
+
+TEST_F(RenameMemberTest, RenameMethodInsideClassMethods) {
+  std::string Before = R"(
+      struct X {
+        void Foo() {}
+        void Baz() { Foo(); }
+      };)";
+  std::string Expected = R"(
+      struct X {
+        void Bar() {}
+        void Baz() { Bar(); }
+      };)";
+  std::string After = runClangRenameOnCode(Before, "X::Foo", "X::Bar");
+  CompareSnippets(Expected, After);
+}
+
+TEST_F(RenameMemberTest, RenameCtorInitializer) {
+  std::string Before = R"(
+      class X {
+      public:
+       X();
+       A a;
+       A a2;
+       B b;
+      };
+
+      X::X():a(), b() {}
+      )";
+  std::string Expected = R"(
+      class X {
+      public:
+       X();
+       A bar;
+       A a2;
+       B b;
+      };
+
+      X::X():bar(), b() {}
+      )";
+  std::string After = runClangRenameOnCode(Before, "X::a", "X::bar");
+  CompareSnippets(Expected, After);
+}
+
+} // anonymous namespace
+} // namespace test
+} // namespace clang_rename
+} // namesdpace clang
diff --git a/unittests/Rewrite/CMakeLists.txt b/unittests/Rewrite/CMakeLists.txt
index bee7ff6..8edd9ba 100644
--- a/unittests/Rewrite/CMakeLists.txt
+++ b/unittests/Rewrite/CMakeLists.txt
@@ -6,5 +6,6 @@
   RewriteBufferTest.cpp
   )
 target_link_libraries(RewriteTests
+  PRIVATE
   clangRewrite
   )
diff --git a/unittests/Sema/CMakeLists.txt b/unittests/Sema/CMakeLists.txt
index c25db81..45460f1 100644
--- a/unittests/Sema/CMakeLists.txt
+++ b/unittests/Sema/CMakeLists.txt
@@ -4,9 +4,11 @@
 
 add_clang_unittest(SemaTests
   ExternalSemaSourceTest.cpp
+  CodeCompleteTest.cpp
   )
 
 target_link_libraries(SemaTests
+  PRIVATE
   clangAST
   clangBasic
   clangFrontend
diff --git a/unittests/Sema/CodeCompleteTest.cpp b/unittests/Sema/CodeCompleteTest.cpp
new file mode 100644
index 0000000..8e888cb
--- /dev/null
+++ b/unittests/Sema/CodeCompleteTest.cpp
@@ -0,0 +1,134 @@
+//=== unittests/Sema/CodeCompleteTest.cpp - Code Complete tests ==============//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Parse/ParseAST.h"
+#include "clang/Sema/Sema.h"
+#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+namespace {
+
+using namespace clang;
+using namespace clang::tooling;
+using ::testing::UnorderedElementsAre;
+
+const char TestCCName[] = "test.cc";
+using VisitedContextResults = std::vector<std::string>;
+
+class VisitedContextFinder: public CodeCompleteConsumer {
+public:
+  VisitedContextFinder(VisitedContextResults &Results)
+      : CodeCompleteConsumer(/*CodeCompleteOpts=*/{},
+                             /*CodeCompleteConsumer*/ false),
+        VCResults(Results),
+        CCTUInfo(std::make_shared<GlobalCodeCompletionAllocator>()) {}
+
+  void ProcessCodeCompleteResults(Sema &S, CodeCompletionContext Context,
+                                  CodeCompletionResult *Results,
+                                  unsigned NumResults) override {
+    VisitedContexts = Context.getVisitedContexts();
+    VCResults = getVisitedNamespace();
+  }
+
+  CodeCompletionAllocator &getAllocator() override {
+    return CCTUInfo.getAllocator();
+  }
+
+  CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
+
+  std::vector<std::string> getVisitedNamespace() const {
+    std::vector<std::string> NSNames;
+    for (const auto *Context : VisitedContexts)
+      if (const auto *NS = llvm::dyn_cast<NamespaceDecl>(Context))
+        NSNames.push_back(NS->getQualifiedNameAsString());
+    return NSNames;
+  }
+
+private:
+  VisitedContextResults& VCResults;
+  CodeCompletionTUInfo CCTUInfo;
+  CodeCompletionContext::VisitedContextSet VisitedContexts;
+};
+
+class CodeCompleteAction : public SyntaxOnlyAction {
+public:
+  CodeCompleteAction(ParsedSourceLocation P, VisitedContextResults &Results)
+      : CompletePosition(std::move(P)), VCResults(Results) {}
+
+  bool BeginInvocation(CompilerInstance &CI) override {
+    CI.getFrontendOpts().CodeCompletionAt = CompletePosition;
+    CI.setCodeCompletionConsumer(new VisitedContextFinder(VCResults));
+    return true;
+  }
+
+private:
+  // 1-based code complete position <Line, Col>;
+  ParsedSourceLocation CompletePosition;
+  VisitedContextResults& VCResults;
+};
+
+ParsedSourceLocation offsetToPosition(llvm::StringRef Code, size_t Offset) {
+  Offset = std::min(Code.size(), Offset);
+  StringRef Before = Code.substr(0, Offset);
+  int Lines = Before.count('\n');
+  size_t PrevNL = Before.rfind('\n');
+  size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
+  return {TestCCName, static_cast<unsigned>(Lines + 1),
+          static_cast<unsigned>(Offset - StartOfLine + 1)};
+}
+
+VisitedContextResults runCodeCompleteOnCode(StringRef Code) {
+  VisitedContextResults Results;
+  auto TokenOffset = Code.find('^');
+  assert(TokenOffset != StringRef::npos &&
+         "Completion token ^ wasn't found in Code.");
+  std::string WithoutToken = Code.take_front(TokenOffset);
+  WithoutToken += Code.drop_front(WithoutToken.size() + 1);
+  assert(StringRef(WithoutToken).find('^') == StringRef::npos &&
+         "expected exactly one completion token ^ inside the code");
+
+  auto Action = llvm::make_unique<CodeCompleteAction>(
+      offsetToPosition(WithoutToken, TokenOffset), Results);
+  clang::tooling::runToolOnCodeWithArgs(Action.release(), Code, {"-std=c++11"},
+                                        TestCCName);
+  return Results;
+}
+
+TEST(SemaCodeCompleteTest, VisitedNSForValidQualifiedId) {
+  auto VisitedNS = runCodeCompleteOnCode(R"cpp(
+     namespace ns1 {}
+     namespace ns2 {}
+     namespace ns3 {}
+     namespace ns3 { namespace nns3 {} }
+
+     namespace foo {
+     using namespace ns1;
+     namespace ns4 {} // not visited
+     namespace { using namespace ns2; }
+     inline namespace bar { using namespace ns3::nns3; }
+     } // foo
+     namespace ns { foo::^ }
+  )cpp");
+  EXPECT_THAT(VisitedNS, UnorderedElementsAre("foo", "ns1", "ns2", "ns3::nns3",
+                                              "foo::(anonymous)"));
+}
+
+TEST(SemaCodeCompleteTest, VisitedNSForInvalideQualifiedId) {
+  auto VisitedNS = runCodeCompleteOnCode(R"cpp(
+     namespace ns { foo::^ }
+  )cpp");
+  EXPECT_TRUE(VisitedNS.empty());
+}
+
+} // namespace
diff --git a/unittests/StaticAnalyzer/AnalyzerOptionsTest.cpp b/unittests/StaticAnalyzer/AnalyzerOptionsTest.cpp
index 891c88d..cd2289d 100644
--- a/unittests/StaticAnalyzer/AnalyzerOptionsTest.cpp
+++ b/unittests/StaticAnalyzer/AnalyzerOptionsTest.cpp
@@ -53,9 +53,9 @@
   // search mode.
   CheckerOneMock CheckerOne;
   EXPECT_TRUE(Opts.getBooleanOption("Option", false, &CheckerOne));
-  // The package option is overriden with a checker option.
+  // The package option is overridden with a checker option.
   EXPECT_TRUE(Opts.getBooleanOption("Option", false, &CheckerOne, true));
-  // The Outer package option is overriden by the Inner package option. No
+  // The Outer package option is overridden by the Inner package option. No
   // package option is specified.
   EXPECT_TRUE(Opts.getBooleanOption("Option2", false, &CheckerOne, true));
   // No package option is specified and search in packages is turned off. The
diff --git a/unittests/StaticAnalyzer/CMakeLists.txt b/unittests/StaticAnalyzer/CMakeLists.txt
index 4aa5efb..4ca0be5 100644
--- a/unittests/StaticAnalyzer/CMakeLists.txt
+++ b/unittests/StaticAnalyzer/CMakeLists.txt
@@ -7,6 +7,7 @@
   )
 
 target_link_libraries(StaticAnalysisTests
+  PRIVATE
   clangBasic
   clangAnalysis
   clangStaticAnalyzerCore 
diff --git a/unittests/Tooling/ASTSelectionTest.cpp b/unittests/Tooling/ASTSelectionTest.cpp
index 79e89f9..2f5df8f 100644
--- a/unittests/Tooling/ASTSelectionTest.cpp
+++ b/unittests/Tooling/ASTSelectionTest.cpp
@@ -840,4 +840,246 @@
       });
 }
 
+TEST(ASTSelectionFinder, SelectEntireDeclStmtRange) {
+  StringRef Source = R"(
+void f(int x, int y) {
+   int a = x * y;
+}
+)";
+  // 'int a = x * y'
+  findSelectedASTNodesWithRange(
+      Source, {3, 4}, FileRange{{3, 4}, {3, 17}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<DeclStmt>((*SelectedCode)[0]));
+        ArrayRef<SelectedASTNode::ReferenceType> Parents =
+            SelectedCode->getParents();
+        EXPECT_EQ(Parents.size(), 3u);
+        EXPECT_TRUE(
+            isa<TranslationUnitDecl>(Parents[0].get().Node.get<Decl>()));
+        // Function 'f' definition.
+        EXPECT_TRUE(isa<FunctionDecl>(Parents[1].get().Node.get<Decl>()));
+        // Function body of function 'F'.
+        EXPECT_TRUE(isa<CompoundStmt>(Parents[2].get().Node.get<Stmt>()));
+      });
+}
+
+TEST(ASTSelectionFinder, SelectEntireDeclStmtRangeWithMultipleDecls) {
+  StringRef Source = R"(
+void f(int x, int y) {
+   int a = x * y, b = x - y;
+}
+)";
+  // 'b = x - y'
+  findSelectedASTNodesWithRange(
+      Source, {3, 19}, FileRange{{3, 19}, {3, 28}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<DeclStmt>((*SelectedCode)[0]));
+        ArrayRef<SelectedASTNode::ReferenceType> Parents =
+            SelectedCode->getParents();
+        EXPECT_EQ(Parents.size(), 3u);
+        EXPECT_TRUE(
+            isa<TranslationUnitDecl>(Parents[0].get().Node.get<Decl>()));
+        // Function 'f' definition.
+        EXPECT_TRUE(isa<FunctionDecl>(Parents[1].get().Node.get<Decl>()));
+        // Function body of function 'F'.
+        EXPECT_TRUE(isa<CompoundStmt>(Parents[2].get().Node.get<Stmt>()));
+      });
+}
+
+TEST(ASTSelectionFinder, SimpleCodeRangeASTSelectionInObjCMethod) {
+  StringRef Source = R"(@interface I @end
+@implementation I
+- (void) f:(int)x with:(int) y {
+  int z = x;
+  [self f: 2 with: 3];
+  if (x == 0) {
+    return;
+  }
+  x = 1;
+  return;
+}
+- (void)f2 {
+  int m = 0;
+}
+@end
+)";
+  // Range that spans multiple methods is an invalid code range.
+  findSelectedASTNodesWithRange(
+      Source, {9, 2}, FileRange{{9, 2}, {13, 1}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_FALSE(SelectedCode);
+      },
+      SelectionFinderVisitor::Lang_OBJC);
+  // Just 'z = x;':
+  findSelectedASTNodesWithRange(
+      Source, {4, 2}, FileRange{{4, 2}, {4, 13}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<DeclStmt>((*SelectedCode)[0]));
+        ArrayRef<SelectedASTNode::ReferenceType> Parents =
+            SelectedCode->getParents();
+        EXPECT_EQ(Parents.size(), 4u);
+        EXPECT_TRUE(
+            isa<TranslationUnitDecl>(Parents[0].get().Node.get<Decl>()));
+        // 'I' @implementation.
+        EXPECT_TRUE(isa<ObjCImplDecl>(Parents[1].get().Node.get<Decl>()));
+        // Function 'f' definition.
+        EXPECT_TRUE(isa<ObjCMethodDecl>(Parents[2].get().Node.get<Decl>()));
+        // Function body of function 'F'.
+        EXPECT_TRUE(isa<CompoundStmt>(Parents[3].get().Node.get<Stmt>()));
+      },
+      SelectionFinderVisitor::Lang_OBJC);
+  // From '[self f: 2 with: 3]' until just before 'x = 1;':
+  findSelectedASTNodesWithRange(
+      Source, {5, 2}, FileRange{{5, 2}, {9, 1}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 2u);
+        EXPECT_TRUE(isa<ObjCMessageExpr>((*SelectedCode)[0]));
+        EXPECT_TRUE(isa<IfStmt>((*SelectedCode)[1]));
+        ArrayRef<SelectedASTNode::ReferenceType> Parents =
+            SelectedCode->getParents();
+        EXPECT_EQ(Parents.size(), 4u);
+        EXPECT_TRUE(
+            isa<TranslationUnitDecl>(Parents[0].get().Node.get<Decl>()));
+        // 'I' @implementation.
+        EXPECT_TRUE(isa<ObjCImplDecl>(Parents[1].get().Node.get<Decl>()));
+        // Function 'f' definition.
+        EXPECT_TRUE(isa<ObjCMethodDecl>(Parents[2].get().Node.get<Decl>()));
+        // Function body of function 'F'.
+        EXPECT_TRUE(isa<CompoundStmt>(Parents[3].get().Node.get<Stmt>()));
+      },
+      SelectionFinderVisitor::Lang_OBJC);
+}
+
+TEST(ASTSelectionFinder, CanonicalizeObjCStringLiteral) {
+  StringRef Source = R"(
+void foo() {
+  (void)@"test";
+}
+      )";
+  // Just '"test"':
+  findSelectedASTNodesWithRange(
+      Source, {3, 10}, FileRange{{3, 10}, {3, 16}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<ObjCStringLiteral>((*SelectedCode)[0]));
+      },
+      SelectionFinderVisitor::Lang_OBJC);
+  // Just 'test':
+  findSelectedASTNodesWithRange(
+      Source, {3, 11}, FileRange{{3, 11}, {3, 15}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<ObjCStringLiteral>((*SelectedCode)[0]));
+      },
+      SelectionFinderVisitor::Lang_OBJC);
+}
+
+TEST(ASTSelectionFinder, CanonicalizeMemberCalleeToCall) {
+  StringRef Source = R"(
+class AClass { public:
+  void method();
+  int afield;
+  void selectWholeCallWhenJustMethodSelected(int &i) {
+    method();
+  }
+};
+void selectWholeCallWhenJustMethodSelected() {
+  AClass a;
+  a.method();
+}
+void dontSelectArgument(AClass &a) {
+  a.selectWholeCallWhenJustMethodSelected(a.afield);
+}
+     )";
+  // Just 'method' with implicit 'this':
+  findSelectedASTNodesWithRange(
+      Source, {6, 5}, FileRange{{6, 5}, {6, 11}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<CXXMemberCallExpr>((*SelectedCode)[0]));
+      });
+  // Just 'method':
+  findSelectedASTNodesWithRange(
+      Source, {11, 5}, FileRange{{11, 5}, {11, 11}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<CXXMemberCallExpr>((*SelectedCode)[0]));
+      });
+  // Just 'afield', which should not select the call.
+  findSelectedASTNodesWithRange(
+      Source, {14, 5}, FileRange{{14, 45}, {14, 51}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_FALSE(isa<CXXMemberCallExpr>((*SelectedCode)[0]));
+      });
+}
+
+TEST(ASTSelectionFinder, CanonicalizeFuncCalleeToCall) {
+  StringRef Source = R"(
+void function();
+
+void test() {
+  function();
+}
+     )";
+  // Just 'function':
+  findSelectedASTNodesWithRange(
+      Source, {5, 3}, FileRange{{5, 3}, {5, 11}},
+      [](SourceRange SelectionRange, Optional<SelectedASTNode> Node) {
+        EXPECT_TRUE(Node);
+        Node->dump();
+        Optional<CodeRangeASTSelection> SelectedCode =
+            CodeRangeASTSelection::create(SelectionRange, std::move(*Node));
+        EXPECT_TRUE(SelectedCode);
+        EXPECT_EQ(SelectedCode->size(), 1u);
+        EXPECT_TRUE(isa<CallExpr>((*SelectedCode)[0]));
+        EXPECT_TRUE(isa<CompoundStmt>(
+            SelectedCode->getParents()[SelectedCode->getParents().size() - 1]
+                .get()
+                .Node.get<Stmt>()));
+      });
+}
+
 } // end anonymous namespace
diff --git a/unittests/Tooling/CMakeLists.txt b/unittests/Tooling/CMakeLists.txt
index abd82c7..59ab01b 100644
--- a/unittests/Tooling/CMakeLists.txt
+++ b/unittests/Tooling/CMakeLists.txt
@@ -16,6 +16,7 @@
   CommentHandlerTest.cpp
   CompilationDatabaseTest.cpp
   DiagnosticsYamlTest.cpp
+  ExecutionTest.cpp
   FixItTest.cpp
   LexicallyOrderedRecursiveASTVisitorTest.cpp
   LookupTest.cpp
@@ -24,6 +25,7 @@
   RecursiveASTVisitorTestCallVisitor.cpp
   RecursiveASTVisitorTestDeclVisitor.cpp
   RecursiveASTVisitorTestExprVisitor.cpp
+  RecursiveASTVisitorTestPostOrderVisitor.cpp
   RecursiveASTVisitorTestTypeLocVisitor.cpp
   RefactoringActionRulesTest.cpp
   RefactoringCallbacksTest.cpp
@@ -34,6 +36,7 @@
   )
 
 target_link_libraries(ToolingTests
+  PRIVATE
   clangAST
   clangASTMatchers
   clangBasic
diff --git a/unittests/Tooling/ExecutionTest.cpp b/unittests/Tooling/ExecutionTest.cpp
new file mode 100644
index 0000000..26db8c6
--- /dev/null
+++ b/unittests/Tooling/ExecutionTest.cpp
@@ -0,0 +1,293 @@
+//===- unittest/Tooling/ExecutionTest.cpp - Tool execution tests. --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Execution.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/AllTUsExecution.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "clang/Tooling/StandaloneExecution.h"
+#include "clang/Tooling/ToolExecutorPluginRegistry.h"
+#include "clang/Tooling/Tooling.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+#include <string>
+
+namespace clang {
+namespace tooling {
+
+namespace {
+
+// This traverses the AST and outputs function name as key and "1" as value for
+// each function declaration.
+class ASTConsumerWithResult
+    : public ASTConsumer,
+      public RecursiveASTVisitor<ASTConsumerWithResult> {
+public:
+  using ASTVisitor = RecursiveASTVisitor<ASTConsumerWithResult>;
+
+  explicit ASTConsumerWithResult(ExecutionContext *Context) : Context(Context) {
+    assert(Context != nullptr);
+  }
+
+  void HandleTranslationUnit(clang::ASTContext &Context) override {
+    TraverseDecl(Context.getTranslationUnitDecl());
+  }
+
+  bool TraverseFunctionDecl(clang::FunctionDecl *Decl) {
+    Context->reportResult(Decl->getNameAsString(),
+                          Context->getRevision() + ":" + Context->getCorpus() +
+                              ":" + Context->getCurrentCompilationUnit() +
+                              "/1");
+    return ASTVisitor::TraverseFunctionDecl(Decl);
+  }
+
+private:
+  ExecutionContext *const Context;
+};
+
+class ReportResultAction : public ASTFrontendAction {
+public:
+  explicit ReportResultAction(ExecutionContext *Context) : Context(Context) {
+    assert(Context != nullptr);
+  }
+
+protected:
+  std::unique_ptr<clang::ASTConsumer>
+  CreateASTConsumer(clang::CompilerInstance &compiler,
+                    StringRef /* dummy */) override {
+    std::unique_ptr<clang::ASTConsumer> ast_consumer{
+        new ASTConsumerWithResult(Context)};
+    return ast_consumer;
+  }
+
+private:
+  ExecutionContext *const Context;
+};
+
+class ReportResultActionFactory : public FrontendActionFactory {
+public:
+  ReportResultActionFactory(ExecutionContext *Context) : Context(Context) {}
+  FrontendAction *create() override { return new ReportResultAction(Context); }
+
+private:
+  ExecutionContext *const Context;
+};
+
+} // namespace
+
+class TestToolExecutor : public ToolExecutor {
+public:
+  static const char *ExecutorName;
+
+  TestToolExecutor(CommonOptionsParser Options)
+      : OptionsParser(std::move(Options)) {}
+
+  StringRef getExecutorName() const override { return ExecutorName; }
+
+  llvm::Error
+  execute(llvm::ArrayRef<std::pair<std::unique_ptr<FrontendActionFactory>,
+                                   ArgumentsAdjuster>>) override {
+    return llvm::Error::success();
+  }
+
+  ExecutionContext *getExecutionContext() override { return nullptr; };
+
+  ToolResults *getToolResults() override { return nullptr; }
+
+  llvm::ArrayRef<std::string> getSourcePaths() const {
+    return OptionsParser.getSourcePathList();
+  }
+
+  void mapVirtualFile(StringRef FilePath, StringRef Content) override {
+    VFS[FilePath] = Content;
+  }
+
+private:
+  CommonOptionsParser OptionsParser;
+  std::string SourcePaths;
+  std::map<std::string, std::string> VFS;
+};
+
+const char *TestToolExecutor::ExecutorName = "test-executor";
+
+class TestToolExecutorPlugin : public ToolExecutorPlugin {
+public:
+  llvm::Expected<std::unique_ptr<ToolExecutor>>
+  create(CommonOptionsParser &OptionsParser) override {
+    return llvm::make_unique<TestToolExecutor>(std::move(OptionsParser));
+  }
+};
+
+static ToolExecutorPluginRegistry::Add<TestToolExecutorPlugin>
+    X("test-executor", "Plugin for TestToolExecutor.");
+
+llvm::cl::OptionCategory TestCategory("execution-test options");
+
+TEST(CreateToolExecutorTest, FailedCreateExecutorUndefinedFlag) {
+  std::vector<const char *> argv = {"prog", "--fake_flag_no_no_no", "f"};
+  int argc = argv.size();
+  auto Executor = internal::createExecutorFromCommandLineArgsImpl(
+      argc, &argv[0], TestCategory);
+  ASSERT_FALSE((bool)Executor);
+  llvm::consumeError(Executor.takeError());
+}
+
+TEST(CreateToolExecutorTest, RegisterFlagsBeforeReset) {
+  llvm::cl::opt<std::string> BeforeReset(
+      "before_reset", llvm::cl::desc("Defined before reset."),
+      llvm::cl::init(""));
+
+  llvm::cl::ResetAllOptionOccurrences();
+
+  std::vector<const char *> argv = {"prog", "--before_reset=set", "f"};
+  int argc = argv.size();
+  auto Executor = internal::createExecutorFromCommandLineArgsImpl(
+      argc, &argv[0], TestCategory);
+  ASSERT_TRUE((bool)Executor);
+  EXPECT_EQ(BeforeReset, "set");
+  BeforeReset.removeArgument();
+}
+
+TEST(CreateToolExecutorTest, CreateStandaloneToolExecutor) {
+  std::vector<const char *> argv = {"prog", "standalone.cpp"};
+  int argc = argv.size();
+  auto Executor = internal::createExecutorFromCommandLineArgsImpl(
+      argc, &argv[0], TestCategory);
+  ASSERT_TRUE((bool)Executor);
+  EXPECT_EQ(Executor->get()->getExecutorName(),
+            StandaloneToolExecutor::ExecutorName);
+}
+
+TEST(CreateToolExecutorTest, CreateTestToolExecutor) {
+  std::vector<const char *> argv = {"prog", "test.cpp",
+                                    "--executor=test-executor"};
+  int argc = argv.size();
+  auto Executor = internal::createExecutorFromCommandLineArgsImpl(
+      argc, &argv[0], TestCategory);
+  ASSERT_TRUE((bool)Executor);
+  EXPECT_EQ(Executor->get()->getExecutorName(), TestToolExecutor::ExecutorName);
+}
+
+TEST(StandaloneToolTest, SynctaxOnlyActionOnSimpleCode) {
+  FixedCompilationDatabase Compilations(".", std::vector<std::string>());
+  StandaloneToolExecutor Executor(Compilations,
+                                  std::vector<std::string>(1, "a.cc"));
+  Executor.mapVirtualFile("a.cc", "int x = 0;");
+
+  auto Err = Executor.execute(newFrontendActionFactory<SyntaxOnlyAction>(),
+                              getClangSyntaxOnlyAdjuster());
+  ASSERT_TRUE(!Err);
+}
+
+TEST(StandaloneToolTest, SimpleAction) {
+  FixedCompilationDatabase Compilations(".", std::vector<std::string>());
+  StandaloneToolExecutor Executor(Compilations,
+                                  std::vector<std::string>(1, "a.cc"));
+  Executor.mapVirtualFile("a.cc", "int x = 0;");
+
+  auto Err = Executor.execute(std::unique_ptr<FrontendActionFactory>(
+      new ReportResultActionFactory(Executor.getExecutionContext())));
+  ASSERT_TRUE(!Err);
+  auto KVs = Executor.getToolResults()->AllKVResults();
+  ASSERT_EQ(KVs.size(), 0u);
+}
+
+TEST(StandaloneToolTest, SimpleActionWithResult) {
+  FixedCompilationDatabase Compilations(".", std::vector<std::string>());
+  StandaloneToolExecutor Executor(Compilations,
+                                  std::vector<std::string>(1, "a.cc"));
+  Executor.mapVirtualFile("a.cc", "int x = 0; void f() {}");
+
+  auto Err = Executor.execute(std::unique_ptr<FrontendActionFactory>(
+      new ReportResultActionFactory(Executor.getExecutionContext())));
+  ASSERT_TRUE(!Err);
+  auto KVs = Executor.getToolResults()->AllKVResults();
+  ASSERT_EQ(KVs.size(), 1u);
+  EXPECT_EQ("f", KVs[0].first);
+  // Currently the standlone executor returns empty corpus, revision, and
+  // compilation unit.
+  EXPECT_EQ("::/1", KVs[0].second);
+
+  Executor.getToolResults()->forEachResult(
+      [](StringRef, StringRef Value) { EXPECT_EQ("::/1", Value); });
+}
+
+class FixedCompilationDatabaseWithFiles : public CompilationDatabase {
+public:
+  FixedCompilationDatabaseWithFiles(Twine Directory,
+                                    ArrayRef<std::string> Files,
+                                    ArrayRef<std::string> CommandLine)
+      : FixedCompilations(Directory, CommandLine), Files(Files) {}
+
+  std::vector<CompileCommand>
+  getCompileCommands(StringRef FilePath) const override {
+    return FixedCompilations.getCompileCommands(FilePath);
+  }
+
+  std::vector<std::string> getAllFiles() const override { return Files; }
+
+private:
+  FixedCompilationDatabase FixedCompilations;
+  std::vector<std::string> Files;
+};
+
+MATCHER_P(Named, Name, "") { return arg.first == Name; }
+
+TEST(AllTUsToolTest, AFewFiles) {
+  FixedCompilationDatabaseWithFiles Compilations(".", {"a.cc", "b.cc", "c.cc"},
+                                                 std::vector<std::string>());
+  AllTUsToolExecutor Executor(Compilations, /*ThreadCount=*/0);
+  Executor.mapVirtualFile("a.cc", "void x() {}");
+  Executor.mapVirtualFile("b.cc", "void y() {}");
+  Executor.mapVirtualFile("c.cc", "void z() {}");
+
+  auto Err = Executor.execute(std::unique_ptr<FrontendActionFactory>(
+      new ReportResultActionFactory(Executor.getExecutionContext())));
+  ASSERT_TRUE(!Err);
+  EXPECT_THAT(
+      Executor.getToolResults()->AllKVResults(),
+      ::testing::UnorderedElementsAre(Named("x"), Named("y"), Named("z")));
+}
+
+TEST(AllTUsToolTest, ManyFiles) {
+  unsigned NumFiles = 100;
+  std::vector<std::string> Files;
+  std::map<std::string, std::string> FileToContent;
+  std::vector<std::string> ExpectedSymbols;
+  for (unsigned i = 1; i <= NumFiles; ++i) {
+    std::string File = "f" + std::to_string(i) + ".cc";
+    std::string Symbol = "looong_function_name_" + std::to_string(i);
+    Files.push_back(File);
+    FileToContent[File] = "void " + Symbol + "() {}";
+    ExpectedSymbols.push_back(Symbol);
+  }
+  FixedCompilationDatabaseWithFiles Compilations(".", Files,
+                                                 std::vector<std::string>());
+  AllTUsToolExecutor Executor(Compilations, /*ThreadCount=*/0);
+  for (const auto &FileAndContent : FileToContent) {
+    Executor.mapVirtualFile(FileAndContent.first, FileAndContent.second);
+  }
+
+  auto Err = Executor.execute(std::unique_ptr<FrontendActionFactory>(
+      new ReportResultActionFactory(Executor.getExecutionContext())));
+  ASSERT_TRUE(!Err);
+  std::vector<std::string> Results;
+  Executor.getToolResults()->forEachResult(
+      [&](StringRef Name, StringRef) { Results.push_back(Name); });
+  EXPECT_THAT(ExpectedSymbols, ::testing::UnorderedElementsAreArray(Results));
+}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/unittests/Tooling/QualTypeNamesTest.cpp b/unittests/Tooling/QualTypeNamesTest.cpp
index edd5060..dd48f3b 100644
--- a/unittests/Tooling/QualTypeNamesTest.cpp
+++ b/unittests/Tooling/QualTypeNamesTest.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Tooling/Core/QualTypeNames.h"
+#include "clang/AST/QualTypeNames.h"
 #include "TestVisitor.h"
 using namespace clang;
 
diff --git a/unittests/Tooling/RecursiveASTVisitorTestPostOrderVisitor.cpp b/unittests/Tooling/RecursiveASTVisitorTestPostOrderVisitor.cpp
new file mode 100644
index 0000000..2e7b398
--- /dev/null
+++ b/unittests/Tooling/RecursiveASTVisitorTestPostOrderVisitor.cpp
@@ -0,0 +1,116 @@
+//===- unittests/Tooling/RecursiveASTVisitorPostOrderASTVisitor.cpp -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains tests for the post-order traversing functionality
+// of RecursiveASTVisitor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestVisitor.h"
+
+using namespace clang;
+
+namespace {
+
+class RecordingVisitor : public TestVisitor<RecordingVisitor> {
+
+  bool VisitPostOrder;
+
+public:
+  explicit RecordingVisitor(bool VisitPostOrder)
+      : VisitPostOrder(VisitPostOrder) {}
+
+  // List of visited nodes during traversal.
+  std::vector<std::string> VisitedNodes;
+
+  bool shouldTraversePostOrder() const { return VisitPostOrder; }
+
+  bool VisitUnaryOperator(UnaryOperator *Op) {
+    VisitedNodes.push_back(Op->getOpcodeStr(Op->getOpcode()));
+    return true;
+  }
+
+  bool VisitBinaryOperator(BinaryOperator *Op) {
+    VisitedNodes.push_back(Op->getOpcodeStr());
+    return true;
+  }
+
+  bool VisitIntegerLiteral(IntegerLiteral *Lit) {
+    VisitedNodes.push_back(Lit->getValue().toString(10, false));
+    return true;
+  }
+
+  bool VisitVarDecl(VarDecl *D) {
+    VisitedNodes.push_back(D->getNameAsString());
+    return true;
+  }
+
+  bool VisitCXXMethodDecl(CXXMethodDecl *D) {
+    VisitedNodes.push_back(D->getQualifiedNameAsString());
+    return true;
+  }
+
+  bool VisitReturnStmt(ReturnStmt *S) {
+    VisitedNodes.push_back("return");
+    return true;
+  }
+
+  bool VisitCXXRecordDecl(CXXRecordDecl *D) {
+    if (!D->isImplicit())
+      VisitedNodes.push_back(D->getQualifiedNameAsString());
+    return true;
+  }
+
+  bool VisitTemplateTypeParmType(TemplateTypeParmType *T) {
+    VisitedNodes.push_back(T->getDecl()->getQualifiedNameAsString());
+    return true;
+  }
+};
+} // namespace
+
+TEST(RecursiveASTVisitor, PostOrderTraversal) {
+  // We traverse the translation unit and store all visited nodes.
+  RecordingVisitor Visitor(true);
+  Visitor.runOver("class A {\n"
+                  "  class B {\n"
+                  "    int foo() {\n"
+                  "      while(4) { int i = 9; int j = -5; }\n"
+                  "      return (1 + 3) + 2; }\n"
+                  "  };\n"
+                  "};\n");
+
+  std::vector<std::string> expected = {"4", "9",      "i",         "5",    "-",
+                                       "j", "1",      "3",         "+",    "2",
+                                       "+", "return", "A::B::foo", "A::B", "A"};
+  // Compare the list of actually visited nodes with the expected list of
+  // visited nodes.
+  ASSERT_EQ(expected.size(), Visitor.VisitedNodes.size());
+  for (std::size_t I = 0; I < expected.size(); I++) {
+    ASSERT_EQ(expected[I], Visitor.VisitedNodes[I]);
+  }
+}
+
+TEST(RecursiveASTVisitor, NoPostOrderTraversal) {
+  // We traverse the translation unit and store all visited nodes.
+  RecordingVisitor Visitor(false);
+  Visitor.runOver("class A {\n"
+                  "  class B {\n"
+                  "    int foo() { return 1 + 2; }\n"
+                  "  };\n"
+                  "};\n");
+
+  std::vector<std::string> expected = {"A", "A::B", "A::B::foo", "return",
+                                       "+", "1",    "2"};
+  // Compare the list of actually visited nodes with the expected list of
+  // visited nodes.
+  ASSERT_EQ(expected.size(), Visitor.VisitedNodes.size());
+  for (std::size_t I = 0; I < expected.size(); I++) {
+    ASSERT_EQ(expected[I], Visitor.VisitedNodes[I]);
+  }
+}
diff --git a/unittests/Tooling/RefactoringActionRulesTest.cpp b/unittests/Tooling/RefactoringActionRulesTest.cpp
index 132a3a4..e9a12de 100644
--- a/unittests/Tooling/RefactoringActionRulesTest.cpp
+++ b/unittests/Tooling/RefactoringActionRulesTest.cpp
@@ -10,7 +10,8 @@
 #include "ReplacementTest.h"
 #include "RewriterTestContext.h"
 #include "clang/Tooling/Refactoring.h"
-#include "clang/Tooling/Refactoring/RefactoringActionRules.h"
+#include "clang/Tooling/Refactoring/Extract/Extract.h"
+#include "clang/Tooling/Refactoring/RefactoringAction.h"
 #include "clang/Tooling/Refactoring/RefactoringDiagnostic.h"
 #include "clang/Tooling/Refactoring/Rename/SymbolName.h"
 #include "clang/Tooling/Tooling.h"
@@ -63,6 +64,12 @@
     ReplaceAWithB(std::pair<SourceRange, int> Selection)
         : Selection(Selection) {}
 
+    static Expected<ReplaceAWithB>
+    initiate(RefactoringRuleContext &Cotnext,
+             std::pair<SourceRange, int> Selection) {
+      return ReplaceAWithB(Selection);
+    }
+
     Expected<AtomicChanges>
     createSourceReplacements(RefactoringRuleContext &Context) {
       const SourceManager &SM = Context.getSources();
@@ -90,7 +97,7 @@
   auto Rule =
       createRefactoringActionRule<ReplaceAWithB>(SelectionRequirement());
 
-  // When the requirements are satisifed, the rule's function must be invoked.
+  // When the requirements are satisfied, the rule's function must be invoked.
   {
     RefactoringRuleContext RefContext(Context.Sources);
     SourceLocation Cursor =
@@ -141,6 +148,11 @@
 TEST_F(RefactoringActionRulesTest, ReturnError) {
   class ErrorRule : public SourceChangeRefactoringRule {
   public:
+    static Expected<ErrorRule> initiate(RefactoringRuleContext &,
+                                        SourceRange R) {
+      return ErrorRule(R);
+    }
+
     ErrorRule(SourceRange R) {}
     Expected<AtomicChanges> createSourceReplacements(RefactoringRuleContext &) {
       return llvm::make_error<llvm::StringError>(
@@ -191,6 +203,11 @@
   public:
     FindOccurrences(SourceRange Selection) : Selection(Selection) {}
 
+    static Expected<FindOccurrences> initiate(RefactoringRuleContext &,
+                                              SourceRange Selection) {
+      return FindOccurrences(Selection);
+    }
+
     Expected<SymbolOccurrences>
     findSymbolOccurrences(RefactoringRuleContext &) override {
       SymbolOccurrences Occurrences;
@@ -219,4 +236,13 @@
             SourceRange(Cursor, Cursor.getLocWithOffset(strlen("test"))));
 }
 
+TEST_F(RefactoringActionRulesTest, EditorCommandBinding) {
+  const RefactoringDescriptor &Descriptor = ExtractFunction::describe();
+  EXPECT_EQ(Descriptor.Name, "extract-function");
+  EXPECT_EQ(
+      Descriptor.Description,
+      "(WIP action; use with caution!) Extracts code into a new function");
+  EXPECT_EQ(Descriptor.Title, "Extract Function");
+}
+
 } // end anonymous namespace
diff --git a/unittests/Tooling/ToolingTest.cpp b/unittests/Tooling/ToolingTest.cpp
index b179f03..6dd0e53 100644
--- a/unittests/Tooling/ToolingTest.cpp
+++ b/unittests/Tooling/ToolingTest.cpp
@@ -402,6 +402,55 @@
   EXPECT_FALSE(Found);
 }
 
+TEST(ClangToolTest, BaseVirtualFileSystemUsage) {
+  FixedCompilationDatabase Compilations("/", std::vector<std::string>());
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem(
+      new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
+
+  InMemoryFileSystem->addFile(
+      "a.cpp", 0, llvm::MemoryBuffer::getMemBuffer("int main() {}"));
+
+  ClangTool Tool(Compilations, std::vector<std::string>(1, "a.cpp"),
+                 std::make_shared<PCHContainerOperations>(), OverlayFileSystem);
+  std::unique_ptr<FrontendActionFactory> Action(
+      newFrontendActionFactory<SyntaxOnlyAction>());
+  EXPECT_EQ(0, Tool.run(Action.get()));
+}
+
+// Check getClangStripDependencyFileAdjuster doesn't strip args after -MD/-MMD.
+TEST(ClangToolTest, StripDependencyFileAdjuster) {
+  FixedCompilationDatabase Compilations("/", {"-MD", "-c", "-MMD", "-w"});
+
+  ClangTool Tool(Compilations, std::vector<std::string>(1, "/a.cc"));
+  Tool.mapVirtualFile("/a.cc", "void a() {}");
+
+  std::unique_ptr<FrontendActionFactory> Action(
+      newFrontendActionFactory<SyntaxOnlyAction>());
+
+  CommandLineArguments FinalArgs;
+  ArgumentsAdjuster CheckFlagsAdjuster =
+    [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) {
+      FinalArgs = Args;
+      return Args;
+    };
+  Tool.clearArgumentsAdjusters();
+  Tool.appendArgumentsAdjuster(getClangStripDependencyFileAdjuster());
+  Tool.appendArgumentsAdjuster(CheckFlagsAdjuster);
+  Tool.run(Action.get());
+
+  auto HasFlag = [&FinalArgs](const std::string &Flag) {
+    return std::find(FinalArgs.begin(), FinalArgs.end(), Flag) !=
+           FinalArgs.end();
+  };
+  EXPECT_FALSE(HasFlag("-MD"));
+  EXPECT_FALSE(HasFlag("-MMD"));
+  EXPECT_TRUE(HasFlag("-c"));
+  EXPECT_TRUE(HasFlag("-w"));
+}
+
 namespace {
 /// Find a target name such that looking for it in TargetRegistry by that name
 /// returns the same target. We expect that there is at least one target
@@ -533,5 +582,31 @@
 }
 #endif
 
+TEST(runToolOnCode, TestResetDiagnostics) {
+  // This is a tool that resets the diagnostic during the compilation.
+  struct ResetDiagnosticAction : public clang::ASTFrontendAction {
+    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &Compiler,
+                                                   StringRef) override {
+      struct Consumer : public clang::ASTConsumer {
+        bool HandleTopLevelDecl(clang::DeclGroupRef D) override {
+          auto &Diags = (*D.begin())->getASTContext().getDiagnostics();
+          // Ignore any error
+          Diags.Reset();
+          // Disable warnings because computing the CFG might crash.
+          Diags.setIgnoreAllWarnings(true);
+          return true;
+        }
+      };
+      return llvm::make_unique<Consumer>();
+    }
+  };
+
+  // Should not crash
+  EXPECT_FALSE(
+      runToolOnCode(new ResetDiagnosticAction,
+                    "struct Foo { Foo(int); ~Foo(); struct Fwd _fwd; };"
+                    "void func() { long x; Foo f(x); }"));
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/unittests/libclang/CMakeLists.txt b/unittests/libclang/CMakeLists.txt
index 1cdc45e..36f6089 100644
--- a/unittests/libclang/CMakeLists.txt
+++ b/unittests/libclang/CMakeLists.txt
@@ -3,5 +3,6 @@
   )
 
 target_link_libraries(libclangTests
+  PRIVATE
   libclang
   )
diff --git a/unittests/libclang/LibclangTest.cpp b/unittests/libclang/LibclangTest.cpp
index f2a96d6..c44095d 100644
--- a/unittests/libclang/LibclangTest.cpp
+++ b/unittests/libclang/LibclangTest.cpp
@@ -572,3 +572,114 @@
   EXPECT_EQ(0U, clang_getNumDiagnostics(ClangTU));
   DisplayDiagnostics();
 }
+
+class LibclangPrintingPolicyTest : public LibclangParseTest {
+public:
+  CXPrintingPolicy Policy = nullptr;
+
+  void SetUp() override {
+    LibclangParseTest::SetUp();
+    std::string File = "file.cpp";
+    WriteFile(File, "int i;\n");
+    ClangTU = clang_parseTranslationUnit(Index, File.c_str(), nullptr, 0,
+                                         nullptr, 0, TUFlags);
+    CXCursor TUCursor = clang_getTranslationUnitCursor(ClangTU);
+    Policy = clang_getCursorPrintingPolicy(TUCursor);
+  }
+  void TearDown() override {
+    clang_PrintingPolicy_dispose(Policy);
+    LibclangParseTest::TearDown();
+  }
+};
+
+TEST_F(LibclangPrintingPolicyTest, SetAndGetProperties) {
+  for (unsigned Value = 0; Value < 2; ++Value) {
+    for (int I = 0; I < CXPrintingPolicy_LastProperty; ++I) {
+      auto Property = static_cast<enum CXPrintingPolicyProperty>(I);
+
+      clang_PrintingPolicy_setProperty(Policy, Property, Value);
+      EXPECT_EQ(Value, clang_PrintingPolicy_getProperty(Policy, Property));
+    }
+  }
+}
+
+TEST_F(LibclangReparseTest, PreprocessorSkippedRanges) {
+  std::string Header = "header.h", Main = "main.cpp";
+  WriteFile(Header,
+    "#ifdef MANGOS\n"
+    "printf(\"mmm\");\n"
+    "#endif");
+  WriteFile(Main,
+    "#include \"header.h\"\n"
+    "#ifdef GUAVA\n"
+    "#endif\n"
+    "#ifdef KIWIS\n"
+    "printf(\"mmm!!\");\n"
+    "#endif");
+
+  for (int i = 0; i != 3; ++i) {
+    unsigned flags = TUFlags | CXTranslationUnit_PrecompiledPreamble;
+    if (i == 2)
+      flags |= CXTranslationUnit_CreatePreambleOnFirstParse;
+
+    if (i != 0)
+       clang_disposeTranslationUnit(ClangTU);  // dispose from previous iter
+
+    // parse once
+    ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0,
+                                         nullptr, 0, flags);
+    if (i != 0) {
+      // reparse
+      ASSERT_TRUE(ReparseTU(0, nullptr /* No unsaved files. */));
+    }
+
+    // Check all ranges are there
+    CXSourceRangeList *Ranges = clang_getAllSkippedRanges(ClangTU);
+    EXPECT_EQ(3U, Ranges->count);
+
+    CXSourceLocation cxl;
+    unsigned line;
+    cxl = clang_getRangeStart(Ranges->ranges[0]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(1U, line);
+    cxl = clang_getRangeEnd(Ranges->ranges[0]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(3U, line);
+
+    cxl = clang_getRangeStart(Ranges->ranges[1]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(2U, line);
+    cxl = clang_getRangeEnd(Ranges->ranges[1]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(3U, line);
+
+    cxl = clang_getRangeStart(Ranges->ranges[2]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(4U, line);
+    cxl = clang_getRangeEnd(Ranges->ranges[2]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(6U, line);
+
+    clang_disposeSourceRangeList(Ranges);
+
+    // Check obtaining ranges by each file works
+    CXFile cxf = clang_getFile(ClangTU, Header.c_str());
+    Ranges = clang_getSkippedRanges(ClangTU, cxf);
+    EXPECT_EQ(1U, Ranges->count);
+    cxl = clang_getRangeStart(Ranges->ranges[0]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(1U, line);
+    clang_disposeSourceRangeList(Ranges);
+
+    cxf = clang_getFile(ClangTU, Main.c_str());
+    Ranges = clang_getSkippedRanges(ClangTU, cxf);
+    EXPECT_EQ(2U, Ranges->count);
+    cxl = clang_getRangeStart(Ranges->ranges[0]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(2U, line);
+    cxl = clang_getRangeStart(Ranges->ranges[1]);
+    clang_getSpellingLocation(cxl, nullptr, &line, nullptr, nullptr);
+    EXPECT_EQ(4U, line);
+    clang_disposeSourceRangeList(Ranges);
+  }
+}
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 435529c..dba0c94 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -13,3 +13,4 @@
   NeonEmitter.cpp
   TableGen.cpp
   )
+set_target_properties(clang-tblgen PROPERTIES FOLDER "Clang tablegenning")
diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp
index effabcc..263dcae 100644
--- a/utils/TableGen/ClangAttrEmitter.cpp
+++ b/utils/TableGen/ClangAttrEmitter.cpp
@@ -56,8 +56,8 @@
     V(Spelling.getValueAsString("Variety")),
     N(Spelling.getValueAsString("Name")) {
 
-    assert(V != "GCC" && "Given a GCC spelling, which means this hasn't been"
-           "flattened!");
+    assert(V != "GCC" && V != "Clang" &&
+           "Given a GCC spelling, which means this hasn't been flattened!");
     if (V == "CXX11" || V == "C2x" || V == "Pragma")
       NS = Spelling.getValueAsString("Namespace");
     bool Unset;
@@ -78,11 +78,17 @@
   std::vector<FlattenedSpelling> Ret;
 
   for (const auto &Spelling : Spellings) {
-    if (Spelling->getValueAsString("Variety") == "GCC") {
+    StringRef Variety = Spelling->getValueAsString("Variety");
+    StringRef Name = Spelling->getValueAsString("Name");
+    if (Variety == "GCC") {
       // Gin up two new spelling objects to add into the list.
-      Ret.emplace_back("GNU", Spelling->getValueAsString("Name"), "", true);
-      Ret.emplace_back("CXX11", Spelling->getValueAsString("Name"), "gnu",
-                       true);
+      Ret.emplace_back("GNU", Name, "", true);
+      Ret.emplace_back("CXX11", Name, "gnu", true);
+    } else if (Variety == "Clang") {
+      Ret.emplace_back("GNU", Name, "", false);
+      Ret.emplace_back("CXX11", Name, "clang", false);
+      if (Spelling->getValueAsBit("AllowInC"))
+        Ret.emplace_back("C2x", Name, "clang", false);
     } else
       Ret.push_back(FlattenedSpelling(*Spelling));
   }
@@ -225,6 +231,7 @@
     virtual void writePCHReadArgs(raw_ostream &OS) const = 0;
     virtual void writePCHReadDecls(raw_ostream &OS) const = 0;
     virtual void writePCHWrite(raw_ostream &OS) const = 0;
+    virtual std::string getIsOmitted() const { return "false"; }
     virtual void writeValue(raw_ostream &OS) const = 0;
     virtual void writeDump(raw_ostream &OS) const = 0;
     virtual void writeDumpChildren(raw_ostream &OS) const {}
@@ -292,23 +299,25 @@
                                            std::string(getUpperName()) + "()");
     }
 
+    std::string getIsOmitted() const override {
+      if (type == "IdentifierInfo *")
+        return "!get" + getUpperName().str() + "()";
+      return "false";
+    }
+
     void writeValue(raw_ostream &OS) const override {
-      if (type == "FunctionDecl *") {
+      if (type == "FunctionDecl *")
         OS << "\" << get" << getUpperName()
            << "()->getNameInfo().getAsString() << \"";
-      } else if (type == "IdentifierInfo *") {
-        OS << "\";\n";
-        if (isOptional())
-          OS << "    if (get" << getUpperName() << "()) ";
-        else
-          OS << "    ";
-        OS << "OS << get" << getUpperName() << "()->getName();\n";
-        OS << "    OS << \"";
-      } else if (type == "TypeSourceInfo *") {
+      else if (type == "IdentifierInfo *")
+        // Some non-optional (comma required) identifier arguments can be the
+        // empty string but are then recorded as a nullptr.
+        OS << "\" << (get" << getUpperName() << "() ? get" << getUpperName()
+           << "()->getName() : \"\") << \"";
+      else if (type == "TypeSourceInfo *")
         OS << "\" << get" << getUpperName() << "().getAsString() << \"";
-      } else {
+      else
         OS << "\" << get" << getUpperName() << "() << \"";
-      }
     }
 
     void writeDump(raw_ostream &OS) const override {
@@ -316,9 +325,10 @@
         OS << "    OS << \" \";\n";
         OS << "    dumpBareDeclRef(SA->get" << getUpperName() << "());\n"; 
       } else if (type == "IdentifierInfo *") {
-        if (isOptional())
-          OS << "    if (SA->get" << getUpperName() << "())\n  ";
-        OS << "    OS << \" \" << SA->get" << getUpperName()
+        // Some non-optional (comma required) identifier arguments can be the
+        // empty string but are then recorded as a nullptr.
+        OS << "    if (SA->get" << getUpperName() << "())\n"
+           << "      OS << \" \" << SA->get" << getUpperName()
            << "()->getName();\n";
       } else if (type == "TypeSourceInfo *") {
         OS << "    OS << \" \" << SA->get" << getUpperName()
@@ -570,12 +580,15 @@
          << "Type());\n";
     }
 
+    std::string getIsOmitted() const override {
+      return "!is" + getLowerName().str() + "Expr || !" + getLowerName().str()
+             + "Expr";
+    }
+
     void writeValue(raw_ostream &OS) const override {
       OS << "\";\n";
-      // The aligned attribute argument expression is optional.
-      OS << "    if (is" << getLowerName() << "Expr && "
-         << getLowerName() << "Expr)\n";
-      OS << "      " << getLowerName() << "Expr->printPretty(OS, nullptr, Policy);\n";
+      OS << "    " << getLowerName()
+         << "Expr->printPretty(OS, nullptr, Policy);\n";
       OS << "    OS << \"";
     }
 
@@ -732,6 +745,138 @@
     }
   };
 
+  class VariadicParamIdxArgument : public VariadicArgument {
+  public:
+    VariadicParamIdxArgument(const Record &Arg, StringRef Attr)
+        : VariadicArgument(Arg, Attr, "ParamIdx") {}
+
+  public:
+    void writeCtorBody(raw_ostream &OS) const override {
+      VariadicArgument::writeCtorBody(OS);
+      OS << "    #ifndef NDEBUG\n"
+         << "    if (" << getLowerName() << "_size()) {\n"
+         << "      bool HasThis = " << getLowerName()
+         << "_begin()->hasThis();\n"
+         << "      for (const auto Idx : " << getLowerName() << "()) {\n"
+         << "        assert(Idx.isValid() && \"ParamIdx must be valid\");\n"
+         << "        assert(HasThis == Idx.hasThis() && "
+         << "\"HasThis must be consistent\");\n"
+         << "      }\n"
+         << "    }\n"
+         << "    #endif\n";
+    }
+
+    void writePCHReadDecls(raw_ostream &OS) const override {
+      OS << "    unsigned " << getUpperName() << "Size = Record.readInt();\n";
+      OS << "    bool " << getUpperName() << "HasThis = " << getUpperName()
+         << "Size ? Record.readInt() : false;\n";
+      OS << "    SmallVector<ParamIdx, 4> " << getUpperName() << ";\n"
+         << "    " << getUpperName() << ".reserve(" << getUpperName()
+         << "Size);\n"
+         << "    for (unsigned i = 0; i != " << getUpperName()
+         << "Size; ++i) {\n"
+         << "      " << getUpperName()
+         << ".push_back(ParamIdx(Record.readInt(), " << getUpperName()
+         << "HasThis));\n"
+         << "    }\n";
+    }
+
+    void writePCHReadArgs(raw_ostream &OS) const override {
+      OS << getUpperName() << ".data(), " << getUpperName() << "Size";
+    }
+
+    void writePCHWrite(raw_ostream &OS) const override {
+      OS << "    Record.push_back(SA->" << getLowerName() << "_size());\n";
+      OS << "    if (SA->" << getLowerName() << "_size())\n"
+         << "      Record.push_back(SA->" << getLowerName()
+         << "_begin()->hasThis());\n";
+      OS << "    for (auto Idx : SA->" << getLowerName() << "())\n"
+         << "      Record.push_back(Idx.getSourceIndex());\n";
+    }
+
+    void writeValueImpl(raw_ostream &OS) const override {
+      OS << "    OS << Val.getSourceIndex();\n";
+    }
+
+    void writeDump(raw_ostream &OS) const override {
+      OS << "    for (auto Idx : SA->" << getLowerName() << "())\n";
+      OS << "      OS << \" \" << Idx.getSourceIndex();\n";
+    }
+  };
+
+  class ParamIdxArgument : public Argument {
+    std::string IdxName;
+
+  public:
+    ParamIdxArgument(const Record &Arg, StringRef Attr)
+        : Argument(Arg, Attr), IdxName(getUpperName()) {}
+
+    void writeDeclarations(raw_ostream &OS) const override {
+      OS << "ParamIdx " << IdxName << ";\n";
+    }
+
+    void writeAccessors(raw_ostream &OS) const override {
+      OS << "\n"
+         << "  ParamIdx " << getLowerName() << "() const {"
+         << " return " << IdxName << "; }\n";
+    }
+
+    void writeCtorParameters(raw_ostream &OS) const override {
+      OS << "ParamIdx " << IdxName;
+    }
+
+    void writeCloneArgs(raw_ostream &OS) const override { OS << IdxName; }
+
+    void writeTemplateInstantiationArgs(raw_ostream &OS) const override {
+      OS << "A->" << getLowerName() << "()";
+    }
+
+    void writeImplicitCtorArgs(raw_ostream &OS) const override {
+      OS << IdxName;
+    }
+
+    void writeCtorInitializers(raw_ostream &OS) const override {
+      OS << IdxName << "(" << IdxName << ")";
+    }
+
+    void writeCtorDefaultInitializers(raw_ostream &OS) const override {
+      OS << IdxName << "()";
+    }
+
+    void writePCHReadDecls(raw_ostream &OS) const override {
+      OS << "    unsigned " << IdxName << "Src = Record.readInt();\n";
+      OS << "    bool " << IdxName << "HasThis = Record.readInt();\n";
+    }
+
+    void writePCHReadArgs(raw_ostream &OS) const override {
+      OS << "ParamIdx(" << IdxName << "Src, " << IdxName << "HasThis)";
+    }
+
+    void writePCHWrite(raw_ostream &OS) const override {
+      OS << "    Record.push_back(SA->" << getLowerName()
+         << "().isValid() ? SA->" << getLowerName()
+         << "().getSourceIndex() : 0);\n";
+      OS << "    Record.push_back(SA->" << getLowerName()
+         << "().isValid() ? SA->" << getLowerName()
+         << "().hasThis() : false);\n";
+    }
+
+    std::string getIsOmitted() const override {
+      return "!" + IdxName + ".isValid()";
+    }
+
+    void writeValue(raw_ostream &OS) const override {
+      OS << "\" << " << IdxName << ".getSourceIndex() << \"";
+    }
+
+    void writeDump(raw_ostream &OS) const override {
+      if (isOptional())
+        OS << "    if (SA->" << getLowerName() << "().isValid())\n  ";
+      OS << "    OS << \" \" << SA->" << getLowerName()
+         << "().getSourceIndex();\n";
+    }
+  };
+
   // Unique the enums, but maintain the original declaration ordering.
   std::vector<StringRef>
   uniqueEnumsInOrder(const std::vector<StringRef> &enums) {
@@ -1231,6 +1376,10 @@
     Ptr = llvm::make_unique<VariadicEnumArgument>(Arg, Attr);
   else if (ArgName == "VariadicExprArgument")
     Ptr = llvm::make_unique<VariadicExprArgument>(Arg, Attr);
+  else if (ArgName == "VariadicParamIdxArgument")
+    Ptr = llvm::make_unique<VariadicParamIdxArgument>(Arg, Attr);
+  else if (ArgName == "ParamIdxArgument")
+    Ptr = llvm::make_unique<ParamIdxArgument>(Arg, Attr);
   else if (ArgName == "VersionArgument")
     Ptr = llvm::make_unique<VersionArgument>(Arg, Attr);
 
@@ -1362,7 +1511,7 @@
       "    OS << \"" << Prefix << Spelling;
 
     if (Variety == "Pragma") {
-      OS << " \";\n";
+      OS << "\";\n";
       OS << "    printPrettyPragma(OS, Policy);\n";
       OS << "    OS << \"\\n\";";
       OS << "    break;\n";
@@ -1370,33 +1519,83 @@
       continue;
     }
 
-    // Fake arguments aren't part of the parsed form and should not be
-    // pretty-printed.
-    bool hasNonFakeArgs = llvm::any_of(
-        Args, [](const std::unique_ptr<Argument> &A) { return !A->isFake(); });
-
-    // FIXME: always printing the parenthesis isn't the correct behavior for
-    // attributes which have optional arguments that were not provided. For
-    // instance: __attribute__((aligned)) will be pretty printed as
-    // __attribute__((aligned())). The logic should check whether there is only
-    // a single argument, and if it is optional, whether it has been provided.
-    if (hasNonFakeArgs)
-      OS << "(";
     if (Spelling == "availability") {
+      OS << "(";
       writeAvailabilityValue(OS);
+      OS << ")";
     } else if (Spelling == "deprecated" || Spelling == "gnu::deprecated") {
-        writeDeprecatedAttrValue(OS, Variety);
+      OS << "(";
+      writeDeprecatedAttrValue(OS, Variety);
+      OS << ")";
     } else {
-      unsigned index = 0;
-      for (const auto &arg : Args) {
-        if (arg->isFake()) continue;
-        if (index++) OS << ", ";
-        arg->writeValue(OS);
+      // To avoid printing parentheses around an empty argument list or
+      // printing spurious commas at the end of an argument list, we need to
+      // determine where the last provided non-fake argument is.
+      unsigned NonFakeArgs = 0;
+      unsigned TrailingOptArgs = 0;
+      bool FoundNonOptArg = false;
+      for (const auto &arg : llvm::reverse(Args)) {
+        if (arg->isFake())
+          continue;
+        ++NonFakeArgs;
+        if (FoundNonOptArg)
+          continue;
+        // FIXME: arg->getIsOmitted() == "false" means we haven't implemented
+        // any way to detect whether the argument was omitted.
+        if (!arg->isOptional() || arg->getIsOmitted() == "false") {
+          FoundNonOptArg = true;
+          continue;
+        }
+        if (!TrailingOptArgs++)
+          OS << "\";\n"
+             << "    unsigned TrailingOmittedArgs = 0;\n";
+        OS << "    if (" << arg->getIsOmitted() << ")\n"
+           << "      ++TrailingOmittedArgs;\n";
       }
+      if (TrailingOptArgs)
+        OS << "    OS << \"";
+      if (TrailingOptArgs < NonFakeArgs)
+        OS << "(";
+      else if (TrailingOptArgs)
+        OS << "\";\n"
+           << "    if (TrailingOmittedArgs < " << NonFakeArgs << ")\n"
+           << "       OS << \"(\";\n"
+           << "    OS << \"";
+      unsigned ArgIndex = 0;
+      for (const auto &arg : Args) {
+        if (arg->isFake())
+          continue;
+        if (ArgIndex) {
+          if (ArgIndex >= NonFakeArgs - TrailingOptArgs)
+            OS << "\";\n"
+               << "    if (" << ArgIndex << " < " << NonFakeArgs
+               << " - TrailingOmittedArgs)\n"
+               << "      OS << \", \";\n"
+               << "    OS << \"";
+          else
+            OS << ", ";
+        }
+        std::string IsOmitted = arg->getIsOmitted();
+        if (arg->isOptional() && IsOmitted != "false")
+          OS << "\";\n"
+             << "    if (!(" << IsOmitted << ")) {\n"
+             << "      OS << \"";
+        arg->writeValue(OS);
+        if (arg->isOptional() && IsOmitted != "false")
+          OS << "\";\n"
+             << "    }\n"
+             << "    OS << \"";
+        ++ArgIndex;
+      }
+      if (TrailingOptArgs < NonFakeArgs)
+        OS << ")";
+      else if (TrailingOptArgs)
+        OS << "\";\n"
+           << "    if (TrailingOmittedArgs < " << NonFakeArgs << ")\n"
+           << "       OS << \")\";\n"
+           << "    OS << \"";
     }
 
-    if (hasNonFakeArgs)
-      OS << ")";
     OS << Suffix + "\";\n";
 
     OS <<
@@ -2059,10 +2258,13 @@
     ArrayRef<std::pair<Record *, SMRange>> Supers = R.getSuperClasses();
     assert(!Supers.empty() && "Forgot to specify a superclass for the attr");
     std::string SuperName;
+    bool Inheritable = false;
     for (const auto &Super : llvm::reverse(Supers)) {
       const Record *R = Super.first;
       if (R->getName() != "TargetSpecificAttr" && SuperName.empty())
         SuperName = R->getName();
+      if (R->getName() == "InheritableAttr")
+        Inheritable = true;
     }
 
     OS << "class " << R.getName() << "Attr : public " << SuperName << " {\n";
@@ -2156,8 +2358,13 @@
 
       OS << "             )\n";
       OS << "    : " << SuperName << "(attr::" << R.getName() << ", R, SI, "
-         << ( R.getValueAsBit("LateParsed") ? "true" : "false" ) << ", "
-         << ( R.getValueAsBit("DuplicatesAllowedWhileMerging") ? "true" : "false" ) << ")\n";
+         << ( R.getValueAsBit("LateParsed") ? "true" : "false" );
+      if (Inheritable) {
+        OS << ", "
+           << (R.getValueAsBit("InheritEvenIfAlreadyPresent") ? "true"
+                                                              : "false");
+      }
+      OS << ")\n";
 
       for (auto const &ai : Args) {
         OS << "              , ";
@@ -2618,6 +2825,31 @@
   OS << "  }\n";
 }
 
+// Helper function for GenerateTargetSpecificAttrChecks that alters the 'Test'
+// parameter with only a single check type, if applicable.
+static void GenerateTargetSpecificAttrCheck(const Record *R, std::string &Test,
+                                            std::string *FnName,
+                                            StringRef ListName,
+                                            StringRef CheckAgainst,
+                                            StringRef Scope) {
+  if (!R->isValueUnset(ListName)) {
+    Test += " && (";
+    std::vector<StringRef> Items = R->getValueAsListOfStrings(ListName);
+    for (auto I = Items.begin(), E = Items.end(); I != E; ++I) {
+      StringRef Part = *I;
+      Test += CheckAgainst;
+      Test += " == ";
+      Test += Scope;
+      Test += Part;
+      if (I + 1 != E)
+        Test += " || ";
+      if (FnName)
+        *FnName += Part;
+    }
+    Test += ")";
+  }
+}
+
 // Generate a conditional expression to check if the current target satisfies
 // the conditions for a TargetSpecificAttr record, and append the code for
 // those checks to the Test string. If the FnName string pointer is non-null,
@@ -2631,53 +2863,35 @@
   // named "T" and a TargetInfo object named "Target" within
   // scope that can be used to determine whether the attribute exists in
   // a given target.
-  Test += "(";
-
-  for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
-    StringRef Part = *I;
-    Test += "T.getArch() == llvm::Triple::";
-    Test += Part;
-    if (I + 1 != E)
-      Test += " || ";
-    if (FnName)
-      *FnName += Part;
+  Test += "true";
+  // If one or more architectures is specified, check those.  Arches are handled
+  // differently because GenerateTargetRequirements needs to combine the list
+  // with ParseKind.
+  if (!Arches.empty()) {
+    Test += " && (";
+    for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
+      StringRef Part = *I;
+      Test += "T.getArch() == llvm::Triple::";
+      Test += Part;
+      if (I + 1 != E)
+        Test += " || ";
+      if (FnName)
+        *FnName += Part;
+    }
+    Test += ")";
   }
-  Test += ")";
 
   // If the attribute is specific to particular OSes, check those.
-  if (!R->isValueUnset("OSes")) {
-    // We know that there was at least one arch test, so we need to and in the
-    // OS tests.
-    Test += " && (";
-    std::vector<StringRef> OSes = R->getValueAsListOfStrings("OSes");
-    for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) {
-      StringRef Part = *I;
-
-      Test += "T.getOS() == llvm::Triple::";
-      Test += Part;
-      if (I + 1 != E)
-        Test += " || ";
-      if (FnName)
-        *FnName += Part;
-    }
-    Test += ")";
-  }
+  GenerateTargetSpecificAttrCheck(R, Test, FnName, "OSes", "T.getOS()",
+                                  "llvm::Triple::");
 
   // If one or more CXX ABIs are specified, check those as well.
-  if (!R->isValueUnset("CXXABIs")) {
-    Test += " && (";
-    std::vector<StringRef> CXXABIs = R->getValueAsListOfStrings("CXXABIs");
-    for (auto I = CXXABIs.begin(), E = CXXABIs.end(); I != E; ++I) {
-      StringRef Part = *I;
-      Test += "Target.getCXXABI().getKind() == TargetCXXABI::";
-      Test += Part;
-      if (I + 1 != E)
-        Test += " || ";
-      if (FnName)
-        *FnName += Part;
-    }
-    Test += ")";
-  }
+  GenerateTargetSpecificAttrCheck(R, Test, FnName, "CXXABIs",
+                                  "Target.getCXXABI().getKind()",
+                                  "TargetCXXABI::");
+  // If one or more object formats is specified, check those.
+  GenerateTargetSpecificAttrCheck(R, Test, FnName, "ObjectFormats",
+                                  "T.getObjectFormat()", "llvm::Triple::");
 }
 
 static void GenerateHasAttrSpellingStringSwitch(
@@ -3025,136 +3239,72 @@
   OS << "}\n\n";
 }
 
+static std::string GetDiagnosticSpelling(const Record &R) {
+  std::string Ret = R.getValueAsString("DiagSpelling");
+  if (!Ret.empty())
+    return Ret;
+
+  // If we couldn't find the DiagSpelling in this object, we can check to see
+  // if the object is one that has a base, and if it is, loop up to the Base
+  // member recursively.
+  std::string Super = R.getSuperClasses().back().first->getName();
+  if (Super == "DDecl" || Super == "DStmt")
+    return GetDiagnosticSpelling(*R.getValueAsDef("Base"));
+
+  return "";
+}
+
 static std::string CalculateDiagnostic(const Record &S) {
   // If the SubjectList object has a custom diagnostic associated with it,
   // return that directly.
   const StringRef CustomDiag = S.getValueAsString("CustomDiag");
   if (!CustomDiag.empty())
-    return CustomDiag;
+    return ("\"" + Twine(CustomDiag) + "\"").str();
 
-  // Given the list of subjects, determine what diagnostic best fits.
-  enum {
-    Func = 1U << 0,
-    Var = 1U << 1,
-    ObjCMethod = 1U << 2,
-    Param = 1U << 3,
-    Class = 1U << 4,
-    GenericRecord = 1U << 5,
-    Type = 1U << 6,
-    ObjCIVar = 1U << 7,
-    ObjCProp = 1U << 8,
-    ObjCInterface = 1U << 9,
-    Block = 1U << 10,
-    Namespace = 1U << 11,
-    Field = 1U << 12,
-    CXXMethod = 1U << 13,
-    ObjCProtocol = 1U << 14,
-    Enum = 1U << 15,
-    Named = 1U << 16,
-  };
-  uint32_t SubMask = 0;
-
+  std::vector<std::string> DiagList;
   std::vector<Record *> Subjects = S.getValueAsListOfDefs("Subjects");
   for (const auto *Subject : Subjects) {
     const Record &R = *Subject;
-    std::string Name;
-
-    if (R.isSubClassOf("SubsetSubject")) {
-      PrintError(R.getLoc(), "SubsetSubjects should use a custom diagnostic");
-      // As a fallback, look through the SubsetSubject to see what its base
-      // type is, and use that. This needs to be updated if SubsetSubjects
-      // are allowed within other SubsetSubjects.
-      Name = R.getValueAsDef("Base")->getName();
-    } else
-      Name = R.getName();
-
-    uint32_t V = StringSwitch<uint32_t>(Name)
-                   .Case("Function", Func)
-                   .Case("Var", Var)
-                   .Case("ObjCMethod", ObjCMethod)
-                   .Case("ParmVar", Param)
-                   .Case("TypedefName", Type)
-                   .Case("ObjCIvar", ObjCIVar)
-                   .Case("ObjCProperty", ObjCProp)
-                   .Case("Record", GenericRecord)
-                   .Case("ObjCInterface", ObjCInterface)
-                   .Case("ObjCProtocol", ObjCProtocol)
-                   .Case("Block", Block)
-                   .Case("CXXRecord", Class)
-                   .Case("Namespace", Namespace)
-                   .Case("Field", Field)
-                   .Case("CXXMethod", CXXMethod)
-                   .Case("Enum", Enum)
-                   .Case("Named", Named)
-                   .Default(0);
-    if (!V) {
-      // Something wasn't in our mapping, so be helpful and let the developer
-      // know about it.
-      PrintFatalError(R.getLoc(), "Unknown subject type: " + R.getName());
-      return "";
+    // Get the diagnostic text from the Decl or Stmt node given.
+    std::string V = GetDiagnosticSpelling(R);
+    if (V.empty()) {
+      PrintError(R.getLoc(),
+                 "Could not determine diagnostic spelling for the node: " +
+                     R.getName() + "; please add one to DeclNodes.td");
+    } else {
+      // The node may contain a list of elements itself, so split the elements
+      // by a comma, and trim any whitespace.
+      SmallVector<StringRef, 2> Frags;
+      llvm::SplitString(V, Frags, ",");
+      for (auto Str : Frags) {
+        DiagList.push_back(Str.trim());
+      }
     }
-
-    SubMask |= V;
   }
 
-  switch (SubMask) {
-    // For the simple cases where there's only a single entry in the mask, we
-    // don't have to resort to bit fiddling.
-    case Func:  return "ExpectedFunction";
-    case Var:   return "ExpectedVariable";
-    case Param: return "ExpectedParameter";
-    case Class: return "ExpectedClass";
-    case Enum:  return "ExpectedEnum";
-    case CXXMethod:
-      // FIXME: Currently, this maps to ExpectedMethod based on existing code,
-      // but should map to something a bit more accurate at some point.
-    case ObjCMethod:  return "ExpectedMethod";
-    case Type:  return "ExpectedType";
-    case ObjCInterface: return "ExpectedObjectiveCInterface";
-    case ObjCProtocol: return "ExpectedObjectiveCProtocol";
-    
-    // "GenericRecord" means struct, union or class; check the language options
-    // and if not compiling for C++, strip off the class part. Note that this
-    // relies on the fact that the context for this declares "Sema &S".
-    case GenericRecord:
-      return "(S.getLangOpts().CPlusPlus ? ExpectedStructOrUnionOrClass : "
-                                           "ExpectedStructOrUnion)";
-    case Func | ObjCMethod | Block: return "ExpectedFunctionMethodOrBlock";
-    case Func | ObjCMethod | Class: return "ExpectedFunctionMethodOrClass";
-    case Func | Param:
-    case Func | ObjCMethod | Param: return "ExpectedFunctionMethodOrParameter";
-    case Func | ObjCMethod: return "ExpectedFunctionOrMethod";
-    case Func | Var: return "ExpectedVariableOrFunction";
-
-    // If not compiling for C++, the class portion does not apply.
-    case Func | Var | Class:
-      return "(S.getLangOpts().CPlusPlus ? ExpectedFunctionVariableOrClass : "
-                                           "ExpectedVariableOrFunction)";
-
-    case Func | Var | Class | ObjCInterface:
-      return "(S.getLangOpts().CPlusPlus"
-             "     ? ((S.getLangOpts().ObjC1 || S.getLangOpts().ObjC2)"
-             "            ? ExpectedFunctionVariableClassOrObjCInterface"
-             "            : ExpectedFunctionVariableOrClass)"
-             "     : ((S.getLangOpts().ObjC1 || S.getLangOpts().ObjC2)"
-             "            ? ExpectedFunctionVariableOrObjCInterface"
-             "            : ExpectedVariableOrFunction))";
-
-    case ObjCMethod | ObjCProp: return "ExpectedMethodOrProperty";
-    case Func | ObjCMethod | ObjCProp:
-      return "ExpectedFunctionOrMethodOrProperty";
-    case ObjCProtocol | ObjCInterface:
-      return "ExpectedObjectiveCInterfaceOrProtocol";
-    case Field | Var: return "ExpectedFieldOrGlobalVar";
-
-    case Named:
-      return "ExpectedNamedDecl";
+  if (DiagList.empty()) {
+    PrintFatalError(S.getLoc(),
+                    "Could not deduce diagnostic argument for Attr subjects");
+    return "";
   }
 
-  PrintFatalError(S.getLoc(),
-                  "Could not deduce diagnostic argument for Attr subjects");
+  // FIXME: this is not particularly good for localization purposes and ideally
+  // should be part of the diagnostics engine itself with some sort of list
+  // specifier.
 
-  return "";
+  // A single member of the list can be returned directly.
+  if (DiagList.size() == 1)
+    return '"' + DiagList.front() + '"';
+
+  if (DiagList.size() == 2)
+    return '"' + DiagList[0] + " and " + DiagList[1] + '"';
+
+  // If there are more than two in the list, we serialize the first N - 1
+  // elements with a comma. This leaves the string in the state: foo, bar,
+  // baz (but misses quux). We can then add ", and " for the last element
+  // manually.
+  std::string Diag = llvm::join(DiagList.begin(), DiagList.end() - 1, ", ");
+  return '"' + Diag + ", and " + *(DiagList.end() - 1) + '"';
 }
 
 static std::string GetSubjectWithSuffix(const Record *R) {
@@ -3241,8 +3391,8 @@
   }
   SS << ") {\n";
   SS << "    S.Diag(Attr.getLoc(), diag::";
-  SS << (Warn ? "warn_attribute_wrong_decl_type" :
-               "err_attribute_wrong_decl_type");
+  SS << (Warn ? "warn_attribute_wrong_decl_type_str" :
+               "err_attribute_wrong_decl_type_str");
   SS << ")\n";
   SS << "      << Attr.getName() << ";
   SS << CalculateDiagnostic(*SubjectObj) << ";\n";
@@ -3361,11 +3511,6 @@
   // Get the list of architectures to be tested for.
   const Record *R = Attr.getValueAsDef("Target");
   std::vector<StringRef> Arches = R->getValueAsListOfStrings("Arches");
-  if (Arches.empty()) {
-    PrintError(Attr.getLoc(), "Empty list of target architectures for a "
-                              "target-specific attr");
-    return "defaultTargetRequirements";
-  }
 
   // If there are other attributes which share the same parsed attribute kind,
   // such as target-specific attributes with a shared spelling, collapse the
@@ -3811,8 +3956,8 @@
     const Record &Deprecated = *Doc.Documentation->getValueAsDef("Deprecated");
     const StringRef Replacement = Deprecated.getValueAsString("Replacement");
     if (!Replacement.empty())
-      OS << "  This attribute has been superseded by ``"
-         << Replacement << "``.";
+      OS << "  This attribute has been superseded by ``" << Replacement
+         << "``.";
     OS << "\n\n";
   }
 
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index d9d99e0..4a2516f 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -154,15 +154,6 @@
                            RHS->DiagsInGroup.front());
 }
 
-static SMRange findSuperClassRange(const Record *R, StringRef SuperName) {
-  ArrayRef<std::pair<Record *, SMRange>> Supers = R->getSuperClasses();
-  auto I = std::find_if(Supers.begin(), Supers.end(),
-                        [&](const std::pair<Record *, SMRange> &SuperPair) {
-                          return SuperPair.first->getName() == SuperName;
-                        });
-  return (I != Supers.end()) ? I->second : SMRange();
-}
-
 /// \brief Invert the 1-[0/1] mapping of diags to group into a one to many
 /// mapping of groups to diags in the group.
 static void groupDiagnostics(const std::vector<Record*> &Diags,
@@ -236,22 +227,10 @@
         if (NextDiagGroup == (*I)->ExplicitDef)
           continue;
 
-        SMRange InGroupRange = findSuperClassRange(*DI, "InGroup");
-        SmallString<64> Replacement;
-        if (InGroupRange.isValid()) {
-          Replacement += "InGroup<";
-          Replacement += (*I)->ExplicitDef->getName();
-          Replacement += ">";
-        }
-        SMFixIt FixIt(InGroupRange, Replacement);
-
-        SrcMgr.PrintMessage(NextDiagGroup->getLoc().front(),
+        SrcMgr.PrintMessage((*DI)->getLoc().front(),
                             SourceMgr::DK_Error,
                             Twine("group '") + Name +
-                              "' is referred to anonymously",
-                            None,
-                            InGroupRange.isValid() ? FixIt
-                                                   : ArrayRef<SMFixIt>());
+                              "' is referred to anonymously");
         SrcMgr.PrintMessage((*I)->ExplicitDef->getLoc().front(),
                             SourceMgr::DK_Note, "group defined here");
       }
@@ -266,19 +245,14 @@
       const Record *NextDiagGroup = GroupInit->getDef();
       std::string Name = NextDiagGroup->getValueAsString("GroupName");
 
-      SMRange InGroupRange = findSuperClassRange(*DI, "InGroup");
-      SrcMgr.PrintMessage(NextDiagGroup->getLoc().front(),
+      SrcMgr.PrintMessage((*DI)->getLoc().front(),
                           SourceMgr::DK_Error,
                           Twine("group '") + Name +
-                            "' is referred to anonymously",
-                          InGroupRange);
+                            "' is referred to anonymously");
 
       for (++DI; DI != DE; ++DI) {
-        GroupInit = cast<DefInit>((*DI)->getValueInit("Group"));
-        InGroupRange = findSuperClassRange(*DI, "InGroup");
-        SrcMgr.PrintMessage(GroupInit->getDef()->getLoc().front(),
-                            SourceMgr::DK_Note, "also referenced here",
-                            InGroupRange);
+        SrcMgr.PrintMessage((*DI)->getLoc().front(),
+                            SourceMgr::DK_Note, "also referenced here");
       }
     }
   }
diff --git a/utils/TableGen/ClangOptionDocEmitter.cpp b/utils/TableGen/ClangOptionDocEmitter.cpp
index 5931451..734ff0b 100644
--- a/utils/TableGen/ClangOptionDocEmitter.cpp
+++ b/utils/TableGen/ClangOptionDocEmitter.cpp
@@ -245,19 +245,27 @@
 void emitOptionName(StringRef Prefix, const Record *Option, raw_ostream &OS) {
   // Find the arguments to list after the option.
   unsigned NumArgs = getNumArgsForKind(Option->getValueAsDef("Kind"), Option);
+  bool HasMetaVarName = !Option->isValueUnset("MetaVarName");
 
   std::vector<std::string> Args;
-  if (!Option->isValueUnset("MetaVarName"))
+  if (HasMetaVarName)
     Args.push_back(Option->getValueAsString("MetaVarName"));
   else if (NumArgs == 1)
     Args.push_back("<arg>");
 
-  while (Args.size() < NumArgs) {
-    Args.push_back(("<arg" + Twine(Args.size() + 1) + ">").str());
-    // Use '--args <arg1> <arg2>...' if any number of args are allowed.
-    if (Args.size() == 2 && NumArgs == UnlimitedArgs) {
-      Args.back() += "...";
-      break;
+  // Fill up arguments if this option didn't provide a meta var name or it
+  // supports an unlimited number of arguments. We can't see how many arguments
+  // already are in a meta var name, so assume it has right number. This is
+  // needed for JoinedAndSeparate options so that there arent't too many
+  // arguments.
+  if (!HasMetaVarName || NumArgs == UnlimitedArgs) {
+    while (Args.size() < NumArgs) {
+      Args.push_back(("<arg" + Twine(Args.size() + 1) + ">").str());
+      // Use '--args <arg1> <arg2>...' if any number of args are allowed.
+      if (Args.size() == 2 && NumArgs == UnlimitedArgs) {
+        Args.back() += "...";
+        break;
+      }
     }
   }
 
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 49c1edc..1a95ca4 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -552,7 +552,11 @@
   // run - Emit arm_neon.h.inc
   void run(raw_ostream &o);
 
+  // runFP16 - Emit arm_fp16.h.inc
+  void runFP16(raw_ostream &o);
+
   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
+	// and arm_fp16.h
   void runHeader(raw_ostream &o);
 
   // runTests - Emit tests for all the Neon intrinsics.
@@ -773,19 +777,19 @@
       break;
     case 'h':
       Float = true;
-    // Fall through
+      LLVM_FALLTHROUGH;
     case 's':
       ElementBitwidth = 16;
       break;
     case 'f':
       Float = true;
-    // Fall through
+      LLVM_FALLTHROUGH;
     case 'i':
       ElementBitwidth = 32;
       break;
     case 'd':
       Float = true;
-    // Fall through
+      LLVM_FALLTHROUGH;
     case 'l':
       ElementBitwidth = 64;
       break;
@@ -852,6 +856,35 @@
     NumVectors = 0;
     Float = true;
     break;
+  case 'Y':
+    Bitwidth = ElementBitwidth = 16;
+    NumVectors = 0;
+    Float = true;
+    break;
+  case 'I':
+    Bitwidth = ElementBitwidth = 32;
+    NumVectors = 0;
+    Float = false;
+    Signed = true;
+    break;
+  case 'L':
+    Bitwidth = ElementBitwidth = 64;
+    NumVectors = 0;
+    Float = false;
+    Signed = true;
+    break;
+  case 'U':
+    Bitwidth = ElementBitwidth = 32;
+    NumVectors = 0;
+    Float = false;
+    Signed = false;
+    break;
+  case 'O':
+    Bitwidth = ElementBitwidth = 64;
+    NumVectors = 0;
+    Float = false;
+    Signed = false;
+    break;
   case 'f':
     Float = true;
     ElementBitwidth = 32;
@@ -860,6 +893,10 @@
     Float = true;
     ElementBitwidth = 64;
     break;
+  case 'H':
+    Float = true;
+    ElementBitwidth = 16;
+    break;
   case 'g':
     if (AppliedQuad)
       Bitwidth /= 2;
@@ -911,7 +948,7 @@
     break;
   case 'c':
     Constant = true;
-  // Fall through
+    LLVM_FALLTHROUGH;
   case 'p':
     Pointer = true;
     Bitwidth = ElementBitwidth;
@@ -1006,7 +1043,7 @@
 }
 
 static bool isFloatingPointProtoModifier(char Mod) {
-  return Mod == 'F' || Mod == 'f';
+  return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
 }
 
 std::string Intrinsic::getBuiltinTypeStr() {
@@ -2102,7 +2139,7 @@
     OverloadInfo &OI = I.second;
 
     OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
-    OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
+    OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
     if (OI.PtrArgNum >= 0)
       OS << "; PtrArgNum = " << OI.PtrArgNum;
     if (OI.HasConstPtr)
@@ -2316,7 +2353,7 @@
 
     Type T2 = T;
     T2.makeScalar();
-    OS << utostr(T.getNumElements()) << "))) ";
+    OS << T.getNumElements() << "))) ";
     OS << T2.str();
     OS << " " << T.str() << ";\n";
   }
@@ -2346,7 +2383,7 @@
       Type VT(TS, M);
       OS << "typedef struct " << VT.str() << " {\n";
       OS << "  " << T.str() << " val";
-      OS << "[" << utostr(NumMembers) << "]";
+      OS << "[" << NumMembers << "]";
       OS << ";\n} ";
       OS << VT.str() << ";\n";
       OS << "\n";
@@ -2416,12 +2453,125 @@
   OS << "#endif /* __ARM_NEON_H */\n";
 }
 
+/// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
+/// is comprised of type definitions and function declarations.
+void NeonEmitter::runFP16(raw_ostream &OS) {
+  OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
+        "------------------------------"
+        "---===\n"
+        " *\n"
+        " * Permission is hereby granted, free of charge, to any person "
+        "obtaining a copy\n"
+        " * of this software and associated documentation files (the "
+				"\"Software\"), to deal\n"
+        " * in the Software without restriction, including without limitation "
+				"the rights\n"
+        " * to use, copy, modify, merge, publish, distribute, sublicense, "
+				"and/or sell\n"
+        " * copies of the Software, and to permit persons to whom the Software "
+				"is\n"
+        " * furnished to do so, subject to the following conditions:\n"
+        " *\n"
+        " * The above copyright notice and this permission notice shall be "
+        "included in\n"
+        " * all copies or substantial portions of the Software.\n"
+        " *\n"
+        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
+        "EXPRESS OR\n"
+        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
+        "MERCHANTABILITY,\n"
+        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
+        "SHALL THE\n"
+        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
+        "OTHER\n"
+        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
+        "ARISING FROM,\n"
+        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
+        "DEALINGS IN\n"
+        " * THE SOFTWARE.\n"
+        " *\n"
+        " *===-----------------------------------------------------------------"
+        "---"
+        "---===\n"
+        " */\n\n";
+
+  OS << "#ifndef __ARM_FP16_H\n";
+  OS << "#define __ARM_FP16_H\n\n";
+
+  OS << "#include <stdint.h>\n\n";
+
+  OS << "typedef __fp16 float16_t;\n";
+
+  OS << "#define __ai static inline __attribute__((__always_inline__, "
+        "__nodebug__))\n\n";
+
+  SmallVector<Intrinsic *, 128> Defs;
+  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+  for (auto *R : RV)
+    createIntrinsic(R, Defs);
+
+  for (auto *I : Defs)
+    I->indexBody();
+
+  std::stable_sort(
+      Defs.begin(), Defs.end(),
+      [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
+
+  // Only emit a def when its requirements have been met.
+  // FIXME: This loop could be made faster, but it's fast enough for now.
+  bool MadeProgress = true;
+  std::string InGuard;
+  while (!Defs.empty() && MadeProgress) {
+    MadeProgress = false;
+
+    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
+         I != Defs.end(); /*No step*/) {
+      bool DependenciesSatisfied = true;
+      for (auto *II : (*I)->getDependencies()) {
+        if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
+          DependenciesSatisfied = false;
+      }
+      if (!DependenciesSatisfied) {
+        // Try the next one.
+        ++I;
+        continue;
+      }
+
+      // Emit #endif/#if pair if needed.
+      if ((*I)->getGuard() != InGuard) {
+        if (!InGuard.empty())
+          OS << "#endif\n";
+        InGuard = (*I)->getGuard();
+        if (!InGuard.empty())
+          OS << "#if " << InGuard << "\n";
+      }
+
+      // Actually generate the intrinsic code.
+      OS << (*I)->generate();
+
+      MadeProgress = true;
+      I = Defs.erase(I);
+    }
+  }
+  assert(Defs.empty() && "Some requirements were not satisfied!");
+  if (!InGuard.empty())
+    OS << "#endif\n";
+
+  OS << "\n";
+  OS << "#undef __ai\n\n";
+  OS << "#endif /* __ARM_FP16_H */\n";
+}
+
 namespace clang {
 
 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   NeonEmitter(Records).run(OS);
 }
 
+void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
+  NeonEmitter(Records).runFP16(OS);
+}
+
 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   NeonEmitter(Records).runHeader(OS);
 }
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 840b330..a2ba131 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -52,6 +52,7 @@
   GenClangCommentCommandInfo,
   GenClangCommentCommandList,
   GenArmNeon,
+  GenArmFP16,
   GenArmNeonSema,
   GenArmNeonTest,
   GenAttrDocs,
@@ -139,6 +140,7 @@
                    "Generate list of commands that are used in "
                    "documentation comments"),
         clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
+        clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
         clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
@@ -250,6 +252,9 @@
   case GenArmNeon:
     EmitNeon(Records, OS);
     break;
+  case GenArmFP16:
+    EmitFP16(Records, OS);
+    break;
   case GenArmNeonSema:
     EmitNeonSema(Records, OS);
     break;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 342c889..706e812 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -65,6 +65,7 @@
 void EmitClangCommentCommandList(RecordKeeper &Records, raw_ostream &OS);
 
 void EmitNeon(RecordKeeper &Records, raw_ostream &OS);
+void EmitFP16(RecordKeeper &Records, raw_ostream &OS);
 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS);
 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS);
 void EmitNeon2(RecordKeeper &Records, raw_ostream &OS);
diff --git a/utils/analyzer/CmpRuns.py b/utils/analyzer/CmpRuns.py
index 2c0ed6a..918e9a9 100755
--- a/utils/analyzer/CmpRuns.py
+++ b/utils/analyzer/CmpRuns.py
@@ -26,8 +26,11 @@
 
 """
 
+import sys
 import os
 import plistlib
+from math import log
+from optparse import OptionParser
 
 
 # Information about analysis run:
@@ -47,6 +50,7 @@
         self._loc = self._data['location']
         self._report = report
         self._htmlReport = htmlReport
+        self._reportSize = len(self._data['path'])
 
     def getFileName(self):
         root = self._report.run.root
@@ -61,6 +65,9 @@
     def getColumn(self):
         return self._loc['col']
 
+    def getPathLength(self):
+        return self._reportSize
+
     def getCategory(self):
         return self._data['category']
 
@@ -81,9 +88,15 @@
         return os.path.join(self._report.run.path, self._htmlReport)
 
     def getReadableName(self):
-        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
-                                     self.getColumn(), self.getCategory(),
-                                     self.getDescription())
+        if 'issue_context' in self._data:
+            funcnamePostfix = "#" + self._data['issue_context']
+        else:
+            funcnamePostfix = ""
+        return '%s%s:%d:%d, %s: %s' % (self.getFileName(),
+                                       funcnamePostfix,
+                                       self.getLine(),
+                                       self.getColumn(), self.getCategory(),
+                                       self.getDescription())
 
     # Note, the data format is not an API and may change from one analyzer
     # version to another.
@@ -91,13 +104,6 @@
         return self._data
 
 
-class CmpOptions:
-    def __init__(self, verboseLog=None, rootA="", rootB=""):
-        self.rootA = rootA
-        self.rootB = rootB
-        self.verboseLog = verboseLog
-
-
 class AnalysisReport:
     def __init__(self, run, files):
         self.run = run
@@ -193,19 +199,20 @@
     return d.getIssueIdentifier()
 
 
-def compareResults(A, B):
+def compareResults(A, B, opts):
     """
     compareResults - Generate a relation from diagnostics in run A to
     diagnostics in run B.
 
-    The result is the relation as a list of triples (a, b, confidence) where
-    each element {a,b} is None or an element from the respective run, and
-    confidence is a measure of the match quality (where 0 indicates equality,
-    and None is used if either element is None).
+    The result is the relation as a list of triples (a, b) where
+    each element {a,b} is None or a matching element from the respective run
     """
 
     res = []
 
+    # Map size_before -> size_after
+    path_difference_data = []
+
     # Quickly eliminate equal elements.
     neqA = []
     neqB = []
@@ -217,7 +224,18 @@
         a = eltsA.pop()
         b = eltsB.pop()
         if (a.getIssueIdentifier() == b.getIssueIdentifier()):
-            res.append((a, b, 0))
+            if a.getPathLength() != b.getPathLength():
+                if opts.relative_path_histogram:
+                    path_difference_data.append(
+                        float(a.getPathLength()) / b.getPathLength())
+                elif opts.relative_log_path_histogram:
+                    path_difference_data.append(
+                        log(float(a.getPathLength()) / b.getPathLength()))
+                elif opts.absolute_path_histogram:
+                    path_difference_data.append(
+                        a.getPathLength() - b.getPathLength())
+
+            res.append((a, b))
         elif a.getIssueIdentifier() > b.getIssueIdentifier():
             eltsB.append(b)
             neqA.append(a)
@@ -234,14 +252,21 @@
     # in any way on the diagnostic format.
 
     for a in neqA:
-        res.append((a, None, None))
+        res.append((a, None))
     for b in neqB:
-        res.append((None, b, None))
+        res.append((None, b))
+
+    if opts.relative_log_path_histogram or opts.relative_path_histogram or \
+            opts.absolute_path_histogram:
+        from matplotlib import pyplot
+        pyplot.hist(path_difference_data, bins=100)
+        pyplot.show()
 
     return res
 
 
-def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
+def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
+                             Stdout=sys.stdout):
     # Load the run results.
     resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
     resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
@@ -252,47 +277,41 @@
     else:
         auxLog = None
 
-    diff = compareResults(resultsA, resultsB)
+    diff = compareResults(resultsA, resultsB, opts)
     foundDiffs = 0
+    totalAdded = 0
+    totalRemoved = 0
     for res in diff:
-        a, b, confidence = res
+        a, b = res
         if a is None:
-            print "ADDED: %r" % b.getReadableName()
+            Stdout.write("ADDED: %r\n" % b.getReadableName())
             foundDiffs += 1
+            totalAdded += 1
             if auxLog:
-                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
-                                                        b.getReport()))
+                auxLog.write("('ADDED', %r, %r)\n" % (b.getReadableName(),
+                                                      b.getReport()))
         elif b is None:
-            print "REMOVED: %r" % a.getReadableName()
+            Stdout.write("REMOVED: %r\n" % a.getReadableName())
             foundDiffs += 1
+            totalRemoved += 1
             if auxLog:
-                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
-                                                          a.getReport()))
-        elif confidence:
-            print "CHANGED: %r to %r" % (a.getReadableName(),
-                                         b.getReadableName())
-            foundDiffs += 1
-            if auxLog:
-                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
-                                 % (a.getReadableName(),
-                                    b.getReadableName(),
-                                    a.getReport(),
-                                    b.getReport()))
+                auxLog.write("('REMOVED', %r, %r)\n" % (a.getReadableName(),
+                                                        a.getReport()))
         else:
             pass
 
     TotalReports = len(resultsB.diagnostics)
-    print "TOTAL REPORTS: %r" % TotalReports
-    print "TOTAL DIFFERENCES: %r" % foundDiffs
+    Stdout.write("TOTAL REPORTS: %r\n" % TotalReports)
+    Stdout.write("TOTAL ADDED: %r\n" % totalAdded)
+    Stdout.write("TOTAL REMOVED: %r\n" % totalRemoved)
     if auxLog:
-        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
-        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
+        auxLog.write("('TOTAL NEW REPORTS', %r)\n" % TotalReports)
+        auxLog.write("('TOTAL DIFFERENCES', %r)\n" % foundDiffs)
+        auxLog.close()
 
     return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
 
-
-def main():
-    from optparse import OptionParser
+def generate_option_parser():
     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
     parser.add_option("", "--rootA", dest="rootA",
                       help="Prefix to ignore on source files for directory A",
@@ -302,9 +321,29 @@
                       action="store", type=str, default="")
     parser.add_option("", "--verbose-log", dest="verboseLog",
                       help="Write additional information to LOG \
-                      [default=None]",
+                           [default=None]",
                       action="store", type=str, default=None,
                       metavar="LOG")
+    parser.add_option("--relative-path-differences-histogram",
+                      action="store_true", dest="relative_path_histogram",
+                      default=False,
+                      help="Show histogram of relative paths differences. \
+                            Requires matplotlib")
+    parser.add_option("--relative-log-path-differences-histogram",
+                      action="store_true", dest="relative_log_path_histogram",
+                      default=False,
+                      help="Show histogram of log relative paths differences. \
+                            Requires matplotlib")
+    parser.add_option("--absolute-path-differences-histogram",
+                      action="store_true", dest="absolute_path_histogram",
+                      default=False,
+                      help="Show histogram of absolute paths differences. \
+                            Requires matplotlib")
+    return parser
+
+
+def main():
+    parser = generate_option_parser()
     (opts, args) = parser.parse_args()
 
     if len(args) != 2:
diff --git a/utils/analyzer/SATestAdd.py b/utils/analyzer/SATestAdd.py
index 4c3e35c..041b244 100755
--- a/utils/analyzer/SATestAdd.py
+++ b/utils/analyzer/SATestAdd.py
@@ -32,11 +32,11 @@
                                      (e.g., to adapt to newer version of clang)
                                      that should be applied to CachedSource
                                      before analysis. To construct this patch,
-                                     run the the download script to download
+                                     run the download script to download
                                      the project to CachedSource, copy the
                                      CachedSource to another directory (for
                                      example, PatchedSource) and make any
-                                     needed modifications to the the copied
+                                     needed modifications to the copied
                                      source.
                                      Then run:
                                           diff -ur CachedSource PatchedSource \
diff --git a/utils/analyzer/SATestBuild.py b/utils/analyzer/SATestBuild.py
index c131095..51e5f4e 100755
--- a/utils/analyzer/SATestBuild.py
+++ b/utils/analyzer/SATestBuild.py
@@ -43,78 +43,58 @@
 variable. It should contain a comma separated list.
 """
 import CmpRuns
+import SATestUtils
 
-import os
-import csv
-import sys
-import glob
-import math
-import shutil
-import time
-import plistlib
+from subprocess import CalledProcessError, check_call
 import argparse
-from subprocess import check_call, check_output, CalledProcessError
+import csv
+import glob
+import logging
+import math
 import multiprocessing
+import os
+import plistlib
+import shutil
+import sys
+import threading
+import time
+import Queue
 
 #------------------------------------------------------------------------------
 # Helper functions.
 #------------------------------------------------------------------------------
 
+Local = threading.local()
+Local.stdout = sys.stdout
+Local.stderr = sys.stderr
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 
-def which(command, paths=None):
-    """which(command, [paths]) - Look up the given command in the paths string
-    (or the PATH environment variable, if unspecified)."""
+class StreamToLogger(object):
+    def __init__(self, logger, log_level=logging.INFO):
+        self.logger = logger
+        self.log_level = log_level
 
-    if paths is None:
-        paths = os.environ.get('PATH', '')
+    def write(self, buf):
+        # Rstrip in order not to write an extra newline.
+        self.logger.log(self.log_level, buf.rstrip())
 
-    # Check for absolute match first.
-    if os.path.exists(command):
-        return command
+    def flush(self):
+        pass
 
-    # Would be nice if Python had a lib function for this.
-    if not paths:
-        paths = os.defpath
-
-    # Get suffixes to search.
-    # On Cygwin, 'PATHEXT' may exist but it should not be used.
-    if os.pathsep == ';':
-        pathext = os.environ.get('PATHEXT', '').split(';')
-    else:
-        pathext = ['']
-
-    # Search the paths...
-    for path in paths.split(os.pathsep):
-        for ext in pathext:
-            p = os.path.join(path, command + ext)
-            if os.path.exists(p):
-                return p
-
-    return None
-
-
-class flushfile(object):
-    """
-    Wrapper to flush the output after every print statement.
-    """
-    def __init__(self, f):
-        self.f = f
-
-    def write(self, x):
-        self.f.write(x)
-        self.f.flush()
-
-
-sys.stdout = flushfile(sys.stdout)
+    def fileno(self):
+        return 0
 
 
 def getProjectMapPath():
     ProjectMapPath = os.path.join(os.path.abspath(os.curdir),
                                   ProjectMapFile)
     if not os.path.exists(ProjectMapPath):
-        print "Error: Cannot find the Project Map file " + ProjectMapPath +\
-              "\nRunning script for the wrong directory?"
-        sys.exit(-1)
+        Local.stdout.write("Error: Cannot find the Project Map file "
+                           + ProjectMapPath
+                           + "\nRunning script for the wrong directory?\n")
+        sys.exit(1)
     return ProjectMapPath
 
 
@@ -137,13 +117,13 @@
 if 'CC' in os.environ:
     Clang = os.environ['CC']
 else:
-    Clang = which("clang", os.environ['PATH'])
+    Clang = SATestUtils.which("clang", os.environ['PATH'])
 if not Clang:
     print "Error: cannot find 'clang' in PATH"
-    sys.exit(-1)
+    sys.exit(1)
 
 # Number of jobs.
-Jobs = int(math.ceil(multiprocessing.cpu_count() * 0.75))
+MaxJobs = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 
 # Project map stores info about all the "registered" projects.
 ProjectMapFile = "projectMap.csv"
@@ -163,8 +143,6 @@
 # displayed when buildbot detects a build failure.
 NumOfFailuresInSummary = 10
 FailuresSummaryFileName = "failures.txt"
-# Summary of the result diffs.
-DiffsSummaryFileName = "diffs.txt"
 
 # The scan-build result directory.
 SBOutputDirName = "ScanBuildResults"
@@ -215,7 +193,8 @@
     """
     Cwd = os.path.join(Dir, PatchedSourceDirName)
     ScriptPath = os.path.join(Dir, CleanupScript)
-    runScript(ScriptPath, PBuildLogFile, Cwd)
+    SATestUtils.runScript(ScriptPath, PBuildLogFile, Cwd,
+                          Stdout=Local.stdout, Stderr=Local.stderr)
 
 
 def runDownloadScript(Dir, PBuildLogFile):
@@ -223,29 +202,8 @@
     Run the script to download the project, if it exists.
     """
     ScriptPath = os.path.join(Dir, DownloadScript)
-    runScript(ScriptPath, PBuildLogFile, Dir)
-
-
-def runScript(ScriptPath, PBuildLogFile, Cwd):
-    """
-    Run the provided script if it exists.
-    """
-    if os.path.exists(ScriptPath):
-        try:
-            if Verbose == 1:
-                print "  Executing: %s" % (ScriptPath,)
-            check_call("chmod +x '%s'" % ScriptPath, cwd=Cwd,
-                       stderr=PBuildLogFile,
-                       stdout=PBuildLogFile,
-                       shell=True)
-            check_call("'%s'" % ScriptPath, cwd=Cwd,
-                       stderr=PBuildLogFile,
-                       stdout=PBuildLogFile,
-                       shell=True)
-        except:
-            print "Error: Running %s failed. See %s for details." % (
-                  ScriptPath, PBuildLogFile.name)
-            sys.exit(-1)
+    SATestUtils.runScript(ScriptPath, PBuildLogFile, Dir,
+                          Stdout=Local.stdout, Stderr=Local.stderr)
 
 
 def downloadAndPatch(Dir, PBuildLogFile):
@@ -259,9 +217,9 @@
     if not os.path.exists(CachedSourceDirPath):
         runDownloadScript(Dir, PBuildLogFile)
         if not os.path.exists(CachedSourceDirPath):
-            print "Error: '%s' not found after download." % (
-                  CachedSourceDirPath)
-            exit(-1)
+            Local.stderr.write("Error: '%s' not found after download.\n" % (
+                               CachedSourceDirPath))
+            exit(1)
 
     PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
 
@@ -278,10 +236,10 @@
     PatchfilePath = os.path.join(Dir, PatchfileName)
     PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
     if not os.path.exists(PatchfilePath):
-        print "  No local patches."
+        Local.stdout.write("  No local patches.\n")
         return
 
-    print "  Applying patch."
+    Local.stdout.write("  Applying patch.\n")
     try:
         check_call("patch -p1 < '%s'" % (PatchfilePath),
                    cwd=PatchedSourceDirPath,
@@ -289,8 +247,9 @@
                    stdout=PBuildLogFile,
                    shell=True)
     except:
-        print "Error: Patch failed. See %s for details." % (PBuildLogFile.name)
-        sys.exit(-1)
+        Local.stderr.write("Error: Patch failed. See %s for details.\n" % (
+            PBuildLogFile.name))
+        sys.exit(1)
 
 
 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
@@ -300,8 +259,9 @@
     """
     BuildScriptPath = os.path.join(Dir, BuildScript)
     if not os.path.exists(BuildScriptPath):
-        print "Error: build script is not defined: %s" % BuildScriptPath
-        sys.exit(-1)
+        Local.stderr.write(
+            "Error: build script is not defined: %s\n" % BuildScriptPath)
+        sys.exit(1)
 
     AllCheckers = Checkers
     if 'SA_ADDITIONAL_CHECKERS' in os.environ:
@@ -314,6 +274,7 @@
     SBOptions += "-plist-html -o '%s' " % SBOutputDir
     SBOptions += "-enable-checker " + AllCheckers + " "
     SBOptions += "--keep-empty "
+    SBOptions += "-analyzer-config 'stable-report-filename=true' "
     # Always use ccc-analyze to ensure that we can locate the failures
     # directory.
     SBOptions += "--override-compiler "
@@ -329,40 +290,20 @@
             # automatically use the maximum number of cores.
             if (Command.startswith("make ") or Command == "make") and \
                     "-j" not in Command:
-                Command += " -j%d" % Jobs
+                Command += " -j%d" % MaxJobs
             SBCommand = SBPrefix + Command
+
             if Verbose == 1:
-                print "  Executing: %s" % (SBCommand,)
+                Local.stdout.write("  Executing: %s\n" % (SBCommand,))
             check_call(SBCommand, cwd=SBCwd,
                        stderr=PBuildLogFile,
                        stdout=PBuildLogFile,
                        shell=True)
-    except:
-        print "Error: scan-build failed. See ", PBuildLogFile.name,\
-              " for details."
-        raise
-
-
-def hasNoExtension(FileName):
-    (Root, Ext) = os.path.splitext(FileName)
-    return (Ext == "")
-
-
-def isValidSingleInputFile(FileName):
-    (Root, Ext) = os.path.splitext(FileName)
-    return Ext in (".i", ".ii", ".c", ".cpp", ".m", "")
-
-
-def getSDKPath(SDKName):
-    """
-    Get the path to the SDK for the given SDK name. Returns None if
-    the path cannot be determined.
-    """
-    if which("xcrun") is None:
-        return None
-
-    Cmd = "xcrun --sdk " + SDKName + " --show-sdk-path"
-    return check_output(Cmd, shell=True).rstrip()
+    except CalledProcessError:
+        Local.stderr.write("Error: scan-build failed. Its output was: \n")
+        PBuildLogFile.seek(0)
+        shutil.copyfileobj(PBuildLogFile, Local.stderr)
+        sys.exit(1)
 
 
 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
@@ -370,15 +311,16 @@
     Run analysis on a set of preprocessed files.
     """
     if os.path.exists(os.path.join(Dir, BuildScript)):
-        print "Error: The preprocessed files project should not contain %s" % \
-              BuildScript
+        Local.stderr.write(
+            "Error: The preprocessed files project should not contain %s\n" % (
+                BuildScript))
         raise Exception()
 
     CmdPrefix = Clang + " -cc1 "
 
     # For now, we assume the preprocessed files should be analyzed
     # with the OS X SDK.
-    SDKPath = getSDKPath("macosx")
+    SDKPath = SATestUtils.getSDKPath("macosx")
     if SDKPath is not None:
         CmdPrefix += "-isysroot " + SDKPath + " "
 
@@ -398,10 +340,11 @@
         Failed = False
 
         # Only run the analyzes on supported files.
-        if (hasNoExtension(FileName)):
+        if SATestUtils.hasNoExtension(FileName):
             continue
-        if (not isValidSingleInputFile(FileName)):
-            print "Error: Invalid single input file %s." % (FullFileName,)
+        if not SATestUtils.isValidSingleInputFile(FileName):
+            Local.stderr.write(
+                "Error: Invalid single input file %s.\n" % (FullFileName,))
             raise Exception()
 
         # Build and call the analyzer command.
@@ -410,14 +353,15 @@
         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
         try:
             if Verbose == 1:
-                print "  Executing: %s" % (Command,)
+                Local.stdout.write("  Executing: %s\n" % (Command,))
             check_call(Command, cwd=Dir, stderr=LogFile,
                        stdout=LogFile,
                        shell=True)
         except CalledProcessError, e:
-            print "Error: Analyzes of %s failed. See %s for details." \
-                  "Error code %d." % (
-                      FullFileName, LogFile.name, e.returncode)
+            Local.stderr.write("Error: Analyzes of %s failed. "
+                               "See %s for details."
+                               "Error code %d.\n" % (
+                                   FullFileName, LogFile.name, e.returncode))
             Failed = True
         finally:
             LogFile.close()
@@ -437,7 +381,7 @@
     if (os.path.exists(BuildLogPath)):
         RmCommand = "rm '%s'" % BuildLogPath
         if Verbose == 1:
-            print "  Executing: %s" % (RmCommand,)
+            Local.stdout.write("  Executing: %s\n" % (RmCommand,))
         check_call(RmCommand, shell=True)
 
 
@@ -445,8 +389,8 @@
     TBegin = time.time()
 
     BuildLogPath = getBuildLogPath(SBOutputDir)
-    print "Log file: %s" % (BuildLogPath,)
-    print "Output directory: %s" % (SBOutputDir, )
+    Local.stdout.write("Log file: %s\n" % (BuildLogPath,))
+    Local.stdout.write("Output directory: %s\n" % (SBOutputDir, ))
 
     removeLogFile(SBOutputDir)
 
@@ -454,8 +398,9 @@
     if (os.path.exists(SBOutputDir)):
         RmCommand = "rm -r '%s'" % SBOutputDir
         if Verbose == 1:
-            print "  Executing: %s" % (RmCommand,)
-            check_call(RmCommand, shell=True)
+            Local.stdout.write("  Executing: %s\n" % (RmCommand,))
+            check_call(RmCommand, shell=True, stdout=Local.stdout,
+                       stderr=Local.stderr)
     assert(not os.path.exists(SBOutputDir))
     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
 
@@ -472,8 +417,9 @@
             runCleanupScript(Dir, PBuildLogFile)
             normalizeReferenceResults(Dir, SBOutputDir, ProjectBuildMode)
 
-    print "Build complete (time: %.2f). See the log for more details: %s" % \
-          ((time.time() - TBegin), BuildLogPath)
+    Local.stdout.write("Build complete (time: %.2f). "
+                       "See the log for more details: %s\n" % (
+                           (time.time() - TBegin), BuildLogPath))
 
 
 def normalizeReferenceResults(Dir, SBOutputDir, ProjectBuildMode):
@@ -493,6 +439,14 @@
                      if SourceFile.startswith(PathPrefix)
                      else SourceFile for SourceFile in Data['files']]
             Data['files'] = Paths
+
+            # Remove transient fields which change from run to run.
+            for Diag in Data['diagnostics']:
+                if 'HTMLDiagnostics_files' in Diag:
+                    Diag.pop('HTMLDiagnostics_files')
+            if 'clang_version' in Data:
+                Data.pop('clang_version')
+
             plistlib.writePlist(Data, Plist)
 
 
@@ -512,6 +466,16 @@
             continue
 
 
+def CleanUpEmptyFolders(SBOutputDir):
+    """
+    Remove empty folders from results, as git would not store them.
+    """
+    Subfolders = glob.glob(SBOutputDir + "/*")
+    for Folder in Subfolders:
+        if not os.listdir(Folder):
+            os.removedirs(Folder)
+
+
 def checkBuild(SBOutputDir):
     """
     Given the scan-build output directory, checks if the build failed
@@ -524,43 +488,30 @@
     TotalFailed = len(Failures)
     if TotalFailed == 0:
         CleanUpEmptyPlists(SBOutputDir)
+        CleanUpEmptyFolders(SBOutputDir)
         Plists = glob.glob(SBOutputDir + "/*/*.plist")
-        print "Number of bug reports (non-empty plist files) produced: %d" %\
-            len(Plists)
+        Local.stdout.write(
+            "Number of bug reports (non-empty plist files) produced: %d\n" %
+            len(Plists))
         return
 
-    # Create summary file to display when the build fails.
-    SummaryPath = os.path.join(
-        SBOutputDir, LogFolderName, FailuresSummaryFileName)
-    if (Verbose > 0):
-        print "  Creating the failures summary file %s" % (SummaryPath,)
-
-    with open(SummaryPath, "w+") as SummaryLog:
-        SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
-        if TotalFailed > NumOfFailuresInSummary:
-            SummaryLog.write("See the first %d below.\n" % (
-                NumOfFailuresInSummary,))
+    Local.stderr.write("Error: analysis failed.\n")
+    Local.stderr.write("Total of %d failures discovered.\n" % TotalFailed)
+    if TotalFailed > NumOfFailuresInSummary:
+        Local.stderr.write(
+            "See the first %d below.\n" % NumOfFailuresInSummary)
         # TODO: Add a line "See the results folder for more."
 
-        Idx = 0
-        for FailLogPathI in Failures:
-            if Idx >= NumOfFailuresInSummary:
-                break
-            Idx += 1
-            SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,))
-            with open(FailLogPathI, "r") as FailLogI:
-                shutil.copyfileobj(FailLogI, SummaryLog)
+    Idx = 0
+    for FailLogPathI in Failures:
+        if Idx >= NumOfFailuresInSummary:
+            break
+        Idx += 1
+        Local.stderr.write("\n-- Error #%d -----------\n" % Idx)
+        with open(FailLogPathI, "r") as FailLogI:
+            shutil.copyfileobj(FailLogI, Local.stdout)
 
-    print "Error: analysis failed. See ", SummaryPath
-    sys.exit(-1)
-
-
-class Discarder(object):
-    """
-    Auxiliary object to discard stdout.
-    """
-    def write(self, text):
-        pass  # do nothing
+    sys.exit(1)
 
 
 def runCmpResults(Dir, Strictness=0):
@@ -590,7 +541,10 @@
         RefList.remove(RefLogDir)
     NewList.remove(os.path.join(NewDir, LogFolderName))
 
-    assert(len(RefList) == len(NewList))
+    if len(RefList) != len(NewList):
+        print "Mismatch in number of results folders: %s vs %s" % (
+            RefList, NewList)
+        sys.exit(1)
 
     # There might be more then one folder underneath - one per each scan-build
     # command (Ex: one for configure and one for make).
@@ -608,32 +562,30 @@
 
         assert(RefDir != NewDir)
         if Verbose == 1:
-            print "  Comparing Results: %s %s" % (RefDir, NewDir)
+            Local.stdout.write("  Comparing Results: %s %s\n" % (
+                               RefDir, NewDir))
 
-        DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
         PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
-        Opts = CmpRuns.CmpOptions(DiffsPath, "", PatchedSourceDirPath)
-        # Discard everything coming out of stdout
-        # (CmpRun produces a lot of them).
-        OLD_STDOUT = sys.stdout
-        sys.stdout = Discarder()
+        Opts, Args = CmpRuns.generate_option_parser().parse_args(
+            ["--rootA", "", "--rootB", PatchedSourceDirPath])
         # Scan the results, delete empty plist files.
         NumDiffs, ReportsInRef, ReportsInNew = \
-            CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
-        sys.stdout = OLD_STDOUT
+            CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts,
+                                             deleteEmpty=False,
+                                             Stdout=Local.stdout)
         if (NumDiffs > 0):
-            print "Warning: %r differences in diagnostics. See %s" % \
-                  (NumDiffs, DiffsPath,)
+            Local.stdout.write("Warning: %s differences in diagnostics.\n"
+                               % NumDiffs)
         if Strictness >= 2 and NumDiffs > 0:
-            print "Error: Diffs found in strict mode (2)."
+            Local.stdout.write("Error: Diffs found in strict mode (2).\n")
             TestsPassed = False
         elif Strictness >= 1 and ReportsInRef != ReportsInNew:
-            print "Error: The number of results are different in "\
-                  "strict mode (1)."
+            Local.stdout.write("Error: The number of results are different " +
+                               " strict mode (1).\n")
             TestsPassed = False
 
-    print "Diagnostic comparison complete (time: %.2f)." % (
-          time.time() - TBegin)
+    Local.stdout.write("Diagnostic comparison complete (time: %.2f).\n" % (
+                       time.time() - TBegin))
     return TestsPassed
 
 
@@ -653,19 +605,49 @@
     removeLogFile(SBOutputDir)
 
 
+class TestProjectThread(threading.Thread):
+    def __init__(self, TasksQueue, ResultsDiffer, FailureFlag):
+        """
+        :param ResultsDiffer: Used to signify that results differ from
+        the canonical ones.
+        :param FailureFlag: Used to signify a failure during the run.
+        """
+        self.TasksQueue = TasksQueue
+        self.ResultsDiffer = ResultsDiffer
+        self.FailureFlag = FailureFlag
+        super(TestProjectThread, self).__init__()
+
+        # Needed to gracefully handle interrupts with Ctrl-C
+        self.daemon = True
+
+    def run(self):
+        while not self.TasksQueue.empty():
+            try:
+                ProjArgs = self.TasksQueue.get()
+                Logger = logging.getLogger(ProjArgs[0])
+                Local.stdout = StreamToLogger(Logger, logging.INFO)
+                Local.stderr = StreamToLogger(Logger, logging.ERROR)
+                if not testProject(*ProjArgs):
+                    self.ResultsDiffer.set()
+                self.TasksQueue.task_done()
+            except:
+                self.FailureFlag.set()
+                raise
+
+
 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Strictness=0):
     """
     Test a given project.
     :return TestsPassed: Whether tests have passed according
     to the :param Strictness: criteria.
     """
-    print " \n\n--- Building project %s" % (ID,)
+    Local.stdout.write(" \n\n--- Building project %s\n" % (ID,))
 
     TBegin = time.time()
 
     Dir = getProjectDir(ID)
     if Verbose == 1:
-        print "  Build directory: %s." % (Dir,)
+        Local.stdout.write("  Build directory: %s.\n" % (Dir,))
 
     # Set the build results directory.
     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
@@ -681,18 +663,11 @@
     else:
         TestsPassed = runCmpResults(Dir, Strictness)
 
-    print "Completed tests for project %s (time: %.2f)." % \
-          (ID, (time.time() - TBegin))
+    Local.stdout.write("Completed tests for project %s (time: %.2f).\n" % (
+                       ID, (time.time() - TBegin)))
     return TestsPassed
 
 
-def isCommentCSVLine(Entries):
-    """
-    Treat CSV lines starting with a '#' as a comment.
-    """
-    return len(Entries) > 0 and Entries[0].startswith("#")
-
-
 def projectFileHandler():
     return open(getProjectMapPath(), "rb")
 
@@ -704,7 +679,7 @@
     """
     PMapFile.seek(0)
     for I in csv.reader(PMapFile):
-        if (isCommentCSVLine(I)):
+        if (SATestUtils.isCommentCSVLine(I)):
             continue
         yield I
 
@@ -714,24 +689,70 @@
     Validate project file.
     """
     for I in iterateOverProjects(PMapFile):
-        if (len(I) != 2):
+        if len(I) != 2:
             print "Error: Rows in the ProjectMapFile should have 2 entries."
             raise Exception()
-        if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
+        if I[1] not in ('0', '1', '2'):
             print "Error: Second entry in the ProjectMapFile should be 0" \
                   " (single file), 1 (project), or 2(single file c++11)."
             raise Exception()
 
+def singleThreadedTestAll(ProjectsToTest):
+    """
+    Run all projects.
+    :return: whether tests have passed.
+    """
+    Success = True
+    for ProjArgs in ProjectsToTest:
+        Success &= testProject(*ProjArgs)
+    return Success
 
-def testAll(IsReferenceBuild=False, Strictness=0):
-    TestsPassed = True
+def multiThreadedTestAll(ProjectsToTest, Jobs):
+    """
+    Run each project in a separate thread.
+
+    This is OK despite GIL, as testing is blocked
+    on launching external processes.
+
+    :return: whether tests have passed.
+    """
+    TasksQueue = Queue.Queue()
+
+    for ProjArgs in ProjectsToTest:
+        TasksQueue.put(ProjArgs)
+
+    ResultsDiffer = threading.Event()
+    FailureFlag = threading.Event()
+
+    for i in range(Jobs):
+        T = TestProjectThread(TasksQueue, ResultsDiffer, FailureFlag)
+        T.start()
+
+    # Required to handle Ctrl-C gracefully.
+    while TasksQueue.unfinished_tasks:
+        time.sleep(0.1)  # Seconds.
+        if FailureFlag.is_set():
+            Local.stderr.write("Test runner crashed\n")
+            sys.exit(1)
+    return not ResultsDiffer.is_set()
+
+
+def testAll(Args):
+    ProjectsToTest = []
+
     with projectFileHandler() as PMapFile:
         validateProjectFile(PMapFile)
 
         # Test the projects.
         for (ProjName, ProjBuildMode) in iterateOverProjects(PMapFile):
-            TestsPassed &= testProject(
-                ProjName, int(ProjBuildMode), IsReferenceBuild, Strictness)
+            ProjectsToTest.append((ProjName,
+                                  int(ProjBuildMode),
+                                  Args.regenerate,
+                                  Args.strictness))
+    if Args.jobs <= 1:
+        return singleThreadedTestAll(ProjectsToTest)
+    else:
+        return multiThreadedTestAll(ProjectsToTest, Args.jobs)
 
 
 if __name__ == '__main__':
@@ -745,13 +766,12 @@
                              reference. Default is 0.')
     Parser.add_argument('-r', dest='regenerate', action='store_true',
                         default=False, help='Regenerate reference output.')
+    Parser.add_argument('-j', '--jobs', dest='jobs', type=int,
+                        default=0,
+                        help='Number of projects to test concurrently')
     Args = Parser.parse_args()
 
-    IsReference = False
-    Strictness = Args.strictness
-    if Args.regenerate:
-        IsReference = True
-
-    TestsPassed = testAll(IsReference, Strictness)
+    TestsPassed = testAll(Args)
     if not TestsPassed:
-        sys.exit(-1)
+        print "ERROR: Tests failed."
+        sys.exit(42)
diff --git a/utils/analyzer/SATestUpdateDiffs.py b/utils/analyzer/SATestUpdateDiffs.py
index a60c625..92bbd83 100755
--- a/utils/analyzer/SATestUpdateDiffs.py
+++ b/utils/analyzer/SATestUpdateDiffs.py
@@ -13,10 +13,10 @@
 Verbose = 1
 
 
-def runCmd(Command):
+def runCmd(Command, **kwargs):
     if Verbose:
         print "Executing %s" % Command
-    check_call(Command, shell=True)
+    check_call(Command, shell=True, **kwargs)
 
 
 def updateReferenceResults(ProjName, ProjBuildMode):
@@ -32,42 +32,32 @@
     if not os.path.exists(CreatedResultsPath):
         print >> sys.stderr, "New results not found, was SATestBuild.py "\
                              "previously run?"
-        sys.exit(-1)
+        sys.exit(1)
 
-    # Remove reference results: in git, and then again for a good measure
-    # with rm, as git might not remove things fully if there are empty
-    # directories involved.
-    runCmd('git rm -r -q "%s"' % (RefResultsPath,))
-    runCmd('rm -rf "%s"' % (RefResultsPath,))
-
-    # Replace reference results with a freshly computed once.
-    runCmd('cp -r "%s" "%s"' % (CreatedResultsPath, RefResultsPath,))
-
-    # Run cleanup script.
     BuildLogPath = SATestBuild.getBuildLogPath(RefResultsPath)
+    Dirname = os.path.dirname(os.path.abspath(BuildLogPath))
+    runCmd("mkdir -p '%s'" % Dirname)
     with open(BuildLogPath, "wb+") as PBuildLogFile:
+        # Remove reference results: in git, and then again for a good measure
+        # with rm, as git might not remove things fully if there are empty
+        # directories involved.
+        runCmd('git rm -r -q "%s"' % (RefResultsPath,), stdout=PBuildLogFile)
+        runCmd('rm -rf "%s"' % (RefResultsPath,), stdout=PBuildLogFile)
+
+        # Replace reference results with a freshly computed once.
+        runCmd('cp -r "%s" "%s"' % (CreatedResultsPath, RefResultsPath,),
+               stdout=PBuildLogFile)
+
+        # Run cleanup script.
         SATestBuild.runCleanupScript(ProjDir, PBuildLogFile)
 
-    SATestBuild.normalizeReferenceResults(
-        ProjDir, RefResultsPath, ProjBuildMode)
+        SATestBuild.normalizeReferenceResults(
+            ProjDir, RefResultsPath, ProjBuildMode)
 
-    # Clean up the generated difference results.
-    SATestBuild.cleanupReferenceResults(RefResultsPath)
+        # Clean up the generated difference results.
+        SATestBuild.cleanupReferenceResults(RefResultsPath)
 
-    # Remove the created .diffs file before adding.
-    removeDiffsSummaryFiles(RefResultsPath)
-
-    runCmd('git add "%s"' % (RefResultsPath,))
-
-
-def removeDiffsSummaryFiles(RefResultsPath):
-    """
-    Remove all auto-generated .diffs files in reference data.
-    """
-    for (Dirpath, Dirnames, Filenames) in os.walk(RefResultsPath):
-        if SATestBuild.DiffsSummaryFileName in Filenames:
-            runCmd("rm '%s'" % os.path.join(
-                Dirpath, SATestBuild.DiffsSummaryFileName))
+        runCmd('git add "%s"' % (RefResultsPath,), stdout=PBuildLogFile)
 
 
 def main(argv):
@@ -75,7 +65,7 @@
         print >> sys.stderr, "Update static analyzer reference results based "\
                              "\non the previous run of SATestBuild.py.\n"\
                              "\nN.B.: Assumes that SATestBuild.py was just run"
-        sys.exit(-1)
+        sys.exit(1)
 
     with SATestBuild.projectFileHandler() as f:
         for (ProjName, ProjBuildMode) in SATestBuild.iterateOverProjects(f):
diff --git a/utils/analyzer/SATestUtils.py b/utils/analyzer/SATestUtils.py
new file mode 100644
index 0000000..2320652
--- /dev/null
+++ b/utils/analyzer/SATestUtils.py
@@ -0,0 +1,89 @@
+import os
+from subprocess import check_output, check_call
+import sys
+
+
+Verbose = 1
+
+def which(command, paths=None):
+    """which(command, [paths]) - Look up the given command in the paths string
+    (or the PATH environment variable, if unspecified)."""
+
+    if paths is None:
+        paths = os.environ.get('PATH', '')
+
+    # Check for absolute match first.
+    if os.path.exists(command):
+        return command
+
+    # Would be nice if Python had a lib function for this.
+    if not paths:
+        paths = os.defpath
+
+    # Get suffixes to search.
+    # On Cygwin, 'PATHEXT' may exist but it should not be used.
+    if os.pathsep == ';':
+        pathext = os.environ.get('PATHEXT', '').split(';')
+    else:
+        pathext = ['']
+
+    # Search the paths...
+    for path in paths.split(os.pathsep):
+        for ext in pathext:
+            p = os.path.join(path, command + ext)
+            if os.path.exists(p):
+                return p
+
+    return None
+
+
+def hasNoExtension(FileName):
+    (Root, Ext) = os.path.splitext(FileName)
+    return (Ext == "")
+
+
+def isValidSingleInputFile(FileName):
+    (Root, Ext) = os.path.splitext(FileName)
+    return Ext in (".i", ".ii", ".c", ".cpp", ".m", "")
+
+
+def getSDKPath(SDKName):
+    """
+    Get the path to the SDK for the given SDK name. Returns None if
+    the path cannot be determined.
+    """
+    if which("xcrun") is None:
+        return None
+
+    Cmd = "xcrun --sdk " + SDKName + " --show-sdk-path"
+    return check_output(Cmd, shell=True).rstrip()
+
+
+def runScript(ScriptPath, PBuildLogFile, Cwd, Stdout=sys.stdout,
+              Stderr=sys.stderr):
+    """
+    Run the provided script if it exists.
+    """
+    if os.path.exists(ScriptPath):
+        try:
+            if Verbose == 1:
+                Stdout.write("  Executing: %s\n" % (ScriptPath,))
+            check_call("chmod +x '%s'" % ScriptPath, cwd=Cwd,
+                       stderr=PBuildLogFile,
+                       stdout=PBuildLogFile,
+                       shell=True)
+            check_call("'%s'" % ScriptPath, cwd=Cwd,
+                       stderr=PBuildLogFile,
+                       stdout=PBuildLogFile,
+                       shell=True)
+        except:
+            Stderr.write("Error: Running %s failed. See %s for details.\n" % (
+                         ScriptPath, PBuildLogFile.name))
+            sys.exit(-1)
+
+
+def isCommentCSVLine(Entries):
+    """
+    Treat CSV lines starting with a '#' as a comment.
+    """
+    return len(Entries) > 0 and Entries[0].startswith("#")
diff --git a/utils/bash-autocomplete.sh b/utils/bash-autocomplete.sh
index ed815d2..3872dd2 100644
--- a/utils/bash-autocomplete.sh
+++ b/utils/bash-autocomplete.sh
@@ -25,35 +25,16 @@
     w2="${COMP_WORDS[$cword - 2]}"
   fi
 
-  # Clang want to know if -cc1 or -Xclang option is specified or not, because we don't want to show
-  # cc1 options otherwise.
-  if [[ "${COMP_WORDS[1]}" == "-cc1" || "$w1" == "-Xclang" ]]; then
-    arg="#"
-  fi
-
-  # bash always separates '=' as a token even if there's no space before/after '='.
-  # On the other hand, '=' is just a regular character for clang options that
-  # contain '='. For example, "-stdlib=" is defined as is, instead of "-stdlib" and "=".
-  # So, we need to partially undo bash tokenization here for integrity.
-  if [[ "$cur" == -* ]]; then
-    # -foo<tab>
-    arg="$arg$cur"
-  elif [[ "$w1" == -*  && "$cur" == '=' ]]; then
-    # -foo=<tab>
-    arg="$arg$w1=,"
-  elif [[ "$cur" == -*= ]]; then
-    # -foo=<tab>
-    arg="$arg$cur,"
-  elif [[ "$w1" == -* ]]; then
-    # -foo <tab> or -foo bar<tab>
-    arg="$arg$w1,$cur"
-  elif [[ "$w2" == -* && "$w1" == '=' ]]; then
-    # -foo=bar<tab>
-    arg="$arg$w2=,$cur"
-  elif [[ ${cur: -1} != '=' && ${cur/=} != $cur ]]; then
-    # -foo=bar<tab>
-    arg="$arg${cur%=*}=,${cur#*=}"
-  fi
+  # Pass all the current command-line flags to clang, so that clang can handle
+  # these internally.
+  # '=' is separated differently by bash, so we have to concat them without ','
+  for i in `seq 1 $cword`; do
+    if [[ $i == $cword || "${COMP_WORDS[$(($i+1))]}" == '=' ]]; then
+      arg="$arg${COMP_WORDS[$i]}"
+    else
+      arg="$arg${COMP_WORDS[$i]},"
+    fi
+  done
 
   # expand ~ to $HOME
   eval local path=${COMP_WORDS[0]}
@@ -67,7 +48,7 @@
 
   # When clang does not emit any possible autocompletion, or user pushed tab after " ",
   # just autocomplete files.
-  if [[ "$flags" == "$(echo -e '\n')" || "$arg" == "" ]]; then
+  if [[ "$flags" == "$(echo -e '\n')" ]]; then
     # If -foo=<tab> and there was no possible values, autocomplete files.
     [[ "$cur" == '=' || "$cur" == -*= ]] && cur=""
     _clang_filedir
diff --git a/utils/clangdiag.py b/utils/clangdiag.py
new file mode 100755
index 0000000..f434bfe
--- /dev/null
+++ b/utils/clangdiag.py
@@ -0,0 +1,192 @@
+#!/usr/bin/python
+
+#----------------------------------------------------------------------
+# Be sure to add the python path that points to the LLDB shared library.
+#
+# # To use this in the embedded python interpreter using "lldb" just
+# import it with the full path using the "command script import"
+# command
+#   (lldb) command script import /path/to/clandiag.py
+#----------------------------------------------------------------------
+
+import lldb
+import argparse
+import commands
+import shlex
+import os
+import re
+import subprocess
+
+class MyParser(argparse.ArgumentParser):
+    def format_help(self):
+        return '''     Commands for managing clang diagnostic breakpoints
+
+Syntax: clangdiag enable [<warning>|<diag-name>]
+        clangdiag disable
+        clangdiag diagtool [<path>|reset]
+
+The following subcommands are supported:
+
+      enable   -- Enable clang diagnostic breakpoints.
+      disable  -- Disable all clang diagnostic breakpoints.
+      diagtool -- Return, set, or reset diagtool path.
+
+This command sets breakpoints in clang, and clang based tools, that
+emit diagnostics.  When a diagnostic is emitted, and clangdiag is
+enabled, it will use the appropriate diagtool application to determine
+the name of the DiagID, and set breakpoints in all locations that
+'diag::name' appears in the source.  Since the new breakpoints are set
+after they are encountered, users will need to launch the executable a
+second time in order to hit the new breakpoints.
+
+For in-tree builds, the diagtool application, used to map DiagID's to
+names, is found automatically in the same directory as the target
+executable.  However, out-or-tree builds must use the 'diagtool'
+subcommand to set the appropriate path for diagtool in the clang debug
+bin directory.  Since this mapping is created at build-time, it's
+important for users to use the same version that was generated when
+clang was compiled, or else the id's won't match.
+
+Notes:
+- Substrings can be passed for both <warning> and <diag-name>.
+- If <warning> is passed, only enable the DiagID(s) for that warning.
+- If <diag-name> is passed, only enable that DiagID.
+- Rerunning enable clears existing breakpoints.
+- diagtool is used in breakpoint callbacks, so it can be changed
+  without the need to rerun enable.
+- Adding this to your ~.lldbinit file makes clangdiag available at startup:
+  "command script import /path/to/clangdiag.py"
+
+'''
+
+def create_diag_options():
+    parser = MyParser(prog='clangdiag')
+    subparsers = parser.add_subparsers(
+        title='subcommands',
+        dest='subcommands',
+        metavar='')
+    disable_parser = subparsers.add_parser('disable')
+    enable_parser = subparsers.add_parser('enable')
+    enable_parser.add_argument('id', nargs='?')
+    diagtool_parser = subparsers.add_parser('diagtool')
+    diagtool_parser.add_argument('path', nargs='?')
+    return parser
+
+def getDiagtool(target, diagtool = None):
+    id = target.GetProcess().GetProcessID()
+    if 'diagtool' not in getDiagtool.__dict__:
+        getDiagtool.diagtool = {}
+    if diagtool:
+        if diagtool == 'reset':
+            getDiagtool.diagtool[id] = None
+        elif os.path.exists(diagtool):
+            getDiagtool.diagtool[id] = diagtool
+        else:
+            print('clangdiag: %s not found.' % diagtool)
+    if not id in getDiagtool.diagtool or not getDiagtool.diagtool[id]:
+        getDiagtool.diagtool[id] = None
+        exe = target.GetExecutable()
+        if not exe.Exists():
+            print('clangdiag: Target (%s) not set.' % exe.GetFilename())
+        else:
+            diagtool = os.path.join(exe.GetDirectory(), 'diagtool')
+            if os.path.exists(diagtool):
+                getDiagtool.diagtool[id] = diagtool
+            else:
+                print('clangdiag: diagtool not found along side %s' % exe)
+
+    return getDiagtool.diagtool[id]
+
+def setDiagBreakpoint(frame, bp_loc, dict):
+    id = frame.FindVariable("DiagID").GetValue()
+    if id is None:
+        print('clangdiag: id is None')
+        return False
+
+    # Don't need to test this time, since we did that in enable.
+    target = frame.GetThread().GetProcess().GetTarget()
+    diagtool = getDiagtool(target)
+    name = subprocess.check_output([diagtool, "find-diagnostic-id", id]).rstrip();
+    # Make sure we only consider errors, warnings, and extentions.
+    # FIXME: Make this configurable?
+    prefixes = ['err_', 'warn_', 'exp_']
+    if len([prefix for prefix in prefixes+[''] if name.startswith(prefix)][0]):
+        bp = target.BreakpointCreateBySourceRegex(name, lldb.SBFileSpec())
+        bp.AddName("clang::Diagnostic")
+
+    return False
+
+def enable(exe_ctx, args):
+    # Always disable existing breakpoints
+    disable(exe_ctx)
+
+    target = exe_ctx.GetTarget()
+    numOfBreakpoints = target.GetNumBreakpoints()
+
+    if args.id:
+        # Make sure we only consider errors, warnings, and extentions.
+        # FIXME: Make this configurable?
+        prefixes = ['err_', 'warn_', 'exp_']
+        if len([prefix for prefix in prefixes+[''] if args.id.startswith(prefix)][0]):
+            bp = target.BreakpointCreateBySourceRegex(args.id, lldb.SBFileSpec())
+            bp.AddName("clang::Diagnostic")
+        else:
+            diagtool = getDiagtool(target)
+            list = subprocess.check_output([diagtool, "list-warnings"]).rstrip();
+            for line in list.splitlines(True):
+                m = re.search(r' *(.*) .*\[\-W' + re.escape(args.id) + r'.*].*', line)
+                # Make sure we only consider warnings.
+                if m and m.group(1).startswith('warn_'):
+                    bp = target.BreakpointCreateBySourceRegex(m.group(1), lldb.SBFileSpec())
+                    bp.AddName("clang::Diagnostic")
+    else:
+        print('Adding callbacks.')
+        bp = target.BreakpointCreateByName('DiagnosticsEngine::Report')
+        bp.SetScriptCallbackFunction('clangdiag.setDiagBreakpoint')
+        bp.AddName("clang::Diagnostic")
+
+    count = target.GetNumBreakpoints() - numOfBreakpoints
+    print('%i breakpoint%s added.' % (count, "s"[count==1:]))
+
+    return
+
+def disable(exe_ctx):
+    target = exe_ctx.GetTarget()
+    # Remove all diag breakpoints.
+    bkpts = lldb.SBBreakpointList(target)
+    target.FindBreakpointsByName("clang::Diagnostic", bkpts)
+    for i in range(bkpts.GetSize()):
+        target.BreakpointDelete(bkpts.GetBreakpointAtIndex(i).GetID())
+
+    return
+
+def the_diag_command(debugger, command, exe_ctx, result, dict):
+    # Use the Shell Lexer to properly parse up command options just like a
+    # shell would
+    command_args = shlex.split(command)
+    parser = create_diag_options()
+    try:
+        args = parser.parse_args(command_args)
+    except:
+        return
+
+    if args.subcommands == 'enable':
+        enable(exe_ctx, args)
+    elif args.subcommands == 'disable':
+        disable(exe_ctx)
+    else:
+        diagtool = getDiagtool(exe_ctx.GetTarget(), args.path)
+        print('diagtool = %s' % diagtool)
+
+    return
+
+def __lldb_init_module(debugger, dict):
+    # This initializer is being run from LLDB in the embedded command interpreter
+    # Make the options so we can generate the help text for the new LLDB
+    # command line command prior to registering it with LLDB below
+    parser = create_diag_options()
+    the_diag_command.__doc__ = parser.format_help()
+    # Add any commands contained in this module to LLDB
+    debugger.HandleCommand(
+        'command script add -f clangdiag.the_diag_command clangdiag')
+    print 'The "clangdiag" command has been installed, type "help clangdiag" or "clangdiag --help" for detailed help.'
diff --git a/utils/find-unused-diagnostics.sh b/utils/find-unused-diagnostics.sh
index cd48e69..783f58b 100755
--- a/utils/find-unused-diagnostics.sh
+++ b/utils/find-unused-diagnostics.sh
@@ -8,7 +8,7 @@
 ALL_DIAGS=$(grep -E --only-matching --no-filename '(err_|warn_|ext_|note_)[a-z_]+' ./include/clang/Basic/Diagnostic*.td)
 
 # Now look for all potential identifiers in the source files.
-ALL_SOURCES=$(find lib include tools -name \*.cpp -or -name \*.h)
+ALL_SOURCES=$(find lib include tools utils -name \*.cpp -or -name \*.h)
 DIAGS_IN_SOURCES=$(grep -E --only-matching --no-filename '(err_|warn_|ext_|note_)[a-z_]+' $ALL_SOURCES)
 
 # Print all diags that occur in the .td files but not in the source.
diff --git a/www/analyzer/alpha_checks.html b/www/analyzer/alpha_checks.html
index 7d84d23..eccdcaa 100644
--- a/www/analyzer/alpha_checks.html
+++ b/www/analyzer/alpha_checks.html
@@ -800,7 +800,7 @@
 fopen<br>
 fclose</div>(demo checker, the subject of the demo
 (<a href="http://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf">Slides</a>
-,<a href="http://llvm.org/devmtg/2012-11/videos/Zaks-Rose-Checker24Hours.mp4">Video</a>) 
+,<a href="https://youtu.be/kdxlsP5QVPw">Video</a>)
 by Anna Zaks and Jordan Rose presented at the <a href="http://llvm.org/devmtg/2012-11/">
 2012 LLVM Developers' Meeting).</a></div></div></td>
 <td><div class="exampleContainer expandable">
diff --git a/www/analyzer/checker_dev_manual.html b/www/analyzer/checker_dev_manual.html
index 4883556..42919fa 100644
--- a/www/analyzer/checker_dev_manual.html
+++ b/www/analyzer/checker_dev_manual.html
@@ -23,7 +23,7 @@
 constructing specific bug reports. Anyone who is interested in implementing their own 
 checker, should check out the Building a Checker in 24 Hours talk 
 (<a href="http://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf">slides</a>
- <a href="http://llvm.org/devmtg/2012-11/videos/Zaks-Rose-Checker24Hours.mp4">video</a>) 
+ <a href="https://youtu.be/kdxlsP5QVPw">video</a>)
 and refer to this page for additional information on writing a checker. The static analyzer is a 
 part of the Clang project, so consult <a href="http://clang.llvm.org/hacking.html">Hacking on Clang</a> 
 and <a href="http://llvm.org/docs/ProgrammersManual.html">LLVM Programmer's Manual</a> 
@@ -696,7 +696,7 @@
 meeting</a>. Describes the construction of SimpleStreamChecker. <a
 href="http://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf">Slides</a>
 and <a
-href="http://llvm.org/devmtg/2012-11/videos/Zaks-Rose-Checker24Hours.mp4">video</a>
+href="https://youtu.be/kdxlsP5QVPw">video</a>
 are available.
 </ul>
 
diff --git a/www/analyzer/open_projects.html b/www/analyzer/open_projects.html
index 744f4ec..89601a8 100644
--- a/www/analyzer/open_projects.html
+++ b/www/analyzer/open_projects.html
@@ -147,7 +147,7 @@
     <p>A SimpleStreamChecker has been presented in the Building a Checker in 24 
     Hours talk 
     (<a href="http://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf">slides</a>
-    <a href="http://llvm.org/devmtg/2012-11/videos/Zaks-Rose-Checker24Hours.mp4">video</a>). 
+    <a href="https://youtu.be/kdxlsP5QVPw">video</a>).
     We need to implement a production version of the checker with richer set of 
     APIs and evaluate it by running on real codebases. 
     <i>(Difficulty: Easy)</i></p>
diff --git a/www/analyzer/scan-build.html b/www/analyzer/scan-build.html
index b16f6bb..83efea9 100644
--- a/www/analyzer/scan-build.html
+++ b/www/analyzer/scan-build.html
@@ -248,7 +248,7 @@
 
 <pre class="code_example">
 $ scan-build ./configure
-$ scan-build make
+$ scan-build --keep-cc make
 </pre>
 
 <p>The reason <tt>configure</tt> also needs to be run through
diff --git a/www/comparison.html b/www/comparison.html
index 26f421d..58dcf13 100644
--- a/www/comparison.html
+++ b/www/comparison.html
@@ -4,7 +4,7 @@
 <html>
 <head>
   <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-  <title>Comparing clang to other open source compilers</title>
+  <title>Comparing Clang to other open source compilers</title>
   <link type="text/css" rel="stylesheet" href="menu.css">
   <link type="text/css" rel="stylesheet" href="content.css">
 </head>
@@ -14,7 +14,7 @@
     <h1>Clang vs Other Open Source Compilers</h1>
     
     <p>Building an entirely new compiler front-end is a big task, and it isn't
-       always clear to people why we decided to do this.  Here we compare clang
+       always clear to people why we decided to do this.  Here we compare Clang
        and its goals to other open source compiler front-ends that are
        available.  We restrict the discussion to very specific objective points
        to avoid controversy where possible.  Also, software is infinitely
@@ -24,7 +24,7 @@
        
     <p>The goal of this list is to describe how differences in goals lead to
        different strengths and weaknesses, not to make some compiler look bad.
-       This will hopefully help you to evaluate whether using clang is a good
+       This will hopefully help you to evaluate whether using Clang is a good
        idea for your personal goals.  Because we don't know specifically what
        <em>you</em> want to do, we describe the features of these compilers in
        terms of <em>our</em> goals: if you are only interested in static
@@ -45,10 +45,10 @@
     <h2><a name="gcc">Clang vs GCC (GNU Compiler Collection)</a></h2>
     <!--=====================================================================-->
     
-    <p>Pro's of GCC vs clang:</p>
+    <p>Pro's of GCC vs Clang:</p>
     
     <ul>
-    <li>GCC supports languages that clang does not aim to, such as Java, Ada,
+    <li>GCC supports languages that Clang does not aim to, such as Java, Ada,
         FORTRAN, Go, etc.</li>
     <li>GCC supports more targets than LLVM.</li>
     <li>GCC supports many language extensions, some of which are not implemented
@@ -59,7 +59,7 @@
     allowing VLAs in structs</a>.
     </ul>
 
-    <p>Pro's of clang vs GCC:</p>
+    <p>Pro's of Clang vs GCC:</p>
     
     <ul>
     <li>The Clang ASTs and design are intended to be <a 
@@ -103,7 +103,7 @@
         diagnostics and showing macro expansions, but GCC is still catching
         up.</li>
     <li>GCC is licensed under the GPL license. <a href="features.html#license">
-        clang uses a BSD license,</a> which allows it to be embedded in
+        Clang uses a BSD license,</a> which allows it to be embedded in
         software that is not GPL-licensed.</li>
     <li>Clang inherits a number of features from its use of LLVM as a backend,
         including support for a bytecode representation for intermediate code,
@@ -122,7 +122,7 @@
     <h2><a name="elsa">Clang vs Elsa (Elkhound-based C++ Parser)</a></h2>
     <!--=====================================================================-->
     
-    <p>Pro's of Elsa vs clang:</p>
+    <p>Pro's of Elsa vs Clang:</p>
     
     <ul>
     <li>Elsa's parser and AST is designed to be easily extensible by adding
@@ -130,7 +130,7 @@
         but requires you to write C++ code to do it.</li>
     </ul>
     
-    <p>Pro's of clang vs Elsa:</p>
+    <p>Pro's of Clang vs Elsa:</p>
     
     <ul>
     <li>Clang's C and C++ support is far more mature and practically useful than
@@ -143,7 +143,7 @@
         file bugs against Clang and they will often be fixed for you.  If you
         use Elsa, you are (mostly) on your own for bug fixes and feature
         enhancements.</li>
-    <li>Elsa is not built as a stack of reusable libraries like clang is.  It is
+    <li>Elsa is not built as a stack of reusable libraries like Clang is.  It is
         very difficult to use part of Elsa without the whole front-end.  For
         example, you cannot use Elsa to parse C/ObjC code without building an
         AST.  You can do this in Clang and it is much faster than building an
@@ -153,7 +153,7 @@
         its original position before preprocessing.  Like GCC, it does not keep
         track of macro expansions.</li>
     <li>Elsa is even slower and uses more memory than GCC, which itself requires 
-        far more space and time than clang.</li>
+        far more space and time than Clang.</li>
     <li>Elsa only does partial semantic analysis.  It is intended to work on
         code that is already validated by GCC, so it does not do many semantic
         checks required by the languages it implements.</li>
@@ -166,18 +166,18 @@
     <h2><a name="pcc">Clang vs PCC (Portable C Compiler)</a></h2>
     <!--=====================================================================-->
     
-    <p>Pro's of PCC vs clang:</p>
+    <p>Pro's of PCC vs Clang:</p>
     
     <ul>
     <li>The PCC source base is very small and builds quickly with just a C
         compiler.</li>
     </ul>
     
-    <p>Pro's of clang vs PCC:</p>
+    <p>Pro's of Clang vs PCC:</p>
     
     <ul>
     <li>PCC dates from the 1970's and has been dormant for most of that time.
-        The clang + llvm communities are very active.</li>
+        The Clang and LLVM communities are very active.</li>
     <li>PCC doesn't support Objective-C or C++ and doesn't aim to support
         C++.</li>
     <li>PCC's code generation is very limited compared to LLVM.  It produces very
diff --git a/www/cxx_dr_status.html b/www/cxx_dr_status.html
index 4402c33..f9a8f59 100644
--- a/www/cxx_dr_status.html
+++ b/www/cxx_dr_status.html
@@ -795,7 +795,7 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#126">126</a></td>
     <td>TC1</td>
     <td>Exception specifications and <TT>const</TT></td>
-    <td class="none" align="center">No</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr id="127">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#127">127</a></td>
@@ -3889,55 +3889,55 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#641">641</a></td>
     <td>CD2</td>
     <td>Overload resolution and conversion-to-same-type operators</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="642">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#642">642</a></td>
     <td>CD2</td>
     <td>Definition and use of &#8220;block scope&#8221; and &#8220;local scope&#8221;</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="643">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#643">643</a></td>
     <td>NAD</td>
     <td>Use of <TT>decltype</TT> in a class <I>member-specification</I></td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="644">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#644">644</a></td>
     <td>CD1</td>
     <td>Should a trivial class type be a literal type?</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr id="645">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#645">645</a></td>
     <td>CD2</td>
     <td>Are bit-field and non-bit-field members layout compatible?</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="na" align="center">N/A</td>
   </tr>
   <tr id="646">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#646">646</a></td>
     <td>NAD</td>
     <td>Can a class with a constexpr copy constructor be a literal type?</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="none" align="center">Superseded by <a href="#981">981</a></td>
   </tr>
   <tr id="647">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#647">647</a></td>
     <td>CD1</td>
     <td>Non-constexpr instances of constexpr constructor templates</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="648">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#648">648</a></td>
     <td>CD1</td>
     <td>Constant expressions in constexpr initializers</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="649">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#649">649</a></td>
     <td>CD1</td>
     <td>Optionally ill-formed extended alignment requests</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="650">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#650">650</a></td>
@@ -3949,13 +3949,13 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#651">651</a></td>
     <td>CD1</td>
     <td>Problems in <TT>decltype</TT> specification and examples</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="652">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#652">652</a></td>
     <td>CD2</td>
     <td>Compile-time evaluation of floating-point expressions</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="653">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#653">653</a></td>
@@ -3967,25 +3967,25 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#654">654</a></td>
     <td>CD1</td>
     <td>Conversions to and from <TT>nullptr_t</TT></td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="655">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#655">655</a></td>
     <td>C++11</td>
     <td>Initialization not specified for forwarding constructors</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="656">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#656">656</a></td>
     <td>CD2</td>
     <td>Direct binding to the result of a conversion operator</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="657">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#657">657</a></td>
     <td>CD2</td>
     <td>Abstract class parameter in synthesized declaration</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr id="658">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#658">658</a></td>
@@ -3997,13 +3997,13 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#659">659</a></td>
     <td>CD1</td>
     <td>Alignment of function types</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="660">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#660">660</a></td>
     <td>CD1</td>
     <td>Unnamed scoped enumerations</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="661">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#661">661</a></td>
@@ -4015,37 +4015,37 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#662">662</a></td>
     <td>NAD</td>
     <td>Forming a pointer to a reference type</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="663">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#663">663</a></td>
     <td>CD1</td>
     <td>Valid Cyrillic identifier characters</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes (C++11 onwards)</td>
   </tr>
   <tr id="664">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#664">664</a></td>
     <td>CD2</td>
     <td>Direct binding of references to non-class rvalue references</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="665">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#665">665</a></td>
     <td>CD2</td>
     <td>Problems in the specification of <TT>dynamic_cast</TT></td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="666">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#666">666</a></td>
     <td>CD1</td>
     <td>Dependent <I>qualified-id</I>s without the <TT>typename</TT> keyword</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="667">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#667">667</a></td>
     <td>CD2</td>
     <td>Trivial special member functions that cannot be implicitly defined</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="668">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#668">668</a></td>
@@ -4057,7 +4057,7 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#669">669</a></td>
     <td>NAD</td>
     <td>Confusing specification of the meaning of <TT>decltype</TT></td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr class="open" id="670">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#670">670</a></td>
@@ -4069,7 +4069,7 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#671">671</a></td>
     <td>CD1</td>
     <td>Explicit conversion from a scoped enumeration type to integral type</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="672">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#672">672</a></td>
@@ -4081,31 +4081,31 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#673">673</a></td>
     <td>NAD</td>
     <td>Injection of names from <I>elaborated-type-specifier</I>s in <TT>friend</TT> declarations</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="674">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#674">674</a></td>
     <td>C++11</td>
     <td>&#8220;matching specialization&#8221; for a friend declaration</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="none" align="center">No</td>
   </tr>
   <tr id="675">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#675">675</a></td>
     <td>CD3</td>
     <td>Signedness of bit-field with typedef or template parameter type</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="none" align="center">Duplicate of <a href="#739">739</a></td>
   </tr>
   <tr id="676">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#676">676</a></td>
     <td>C++11</td>
     <td><I>static_assert-declaration</I>s and general requirements for declarations</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="na" align="center">N/A</td>
   </tr>
   <tr id="677">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#677">677</a></td>
     <td>CD1</td>
     <td>Deleted <TT>operator delete</TT> and virtual destructors</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="none" align="center">No</td>
   </tr>
   <tr id="678">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#678">678</a></td>
@@ -4117,19 +4117,19 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#679">679</a></td>
     <td>CD1</td>
     <td>Equivalence of <I>template-id</I>s and operator function templates</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="680">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#680">680</a></td>
     <td>CD2</td>
     <td>What is a move constructor?</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="na" align="center">N/A</td>
   </tr>
   <tr id="681">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#681">681</a></td>
     <td>CD1</td>
     <td>Restrictions on declarators with late-specified return types</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr class="open" id="682">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#682">682</a></td>
@@ -4141,25 +4141,25 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#683">683</a></td>
     <td>CD1</td>
     <td>Requirements for trivial subobject special functions</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="684">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#684">684</a></td>
     <td>CD1</td>
     <td>Constant expressions involving the address of an automatic variable</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="none" align="center">Superseded by <a href="#1454">1454</a></td>
   </tr>
   <tr id="685">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#685">685</a></td>
     <td>CD2</td>
     <td>Integral promotion of enumeration ignores fixed underlying type</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr id="686">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#686">686</a></td>
     <td>CD1</td>
     <td>Type declarations/definitions in <I>type-specifier-seq</I>s and <I>type-id</I>s</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Yes</td>
   </tr>
   <tr class="open" id="687">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#687">687</a></td>
@@ -6493,7 +6493,7 @@
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1113">1113</a></td>
     <td>C++11</td>
     <td>Linkage of namespace member of unnamed namespace</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr id="1114">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1114">1114</a></td>
diff --git a/www/cxx_status.html b/www/cxx_status.html
index 85c0abc..10a4cda 100644
--- a/www/cxx_status.html
+++ b/www/cxx_status.html
@@ -766,9 +766,10 @@
 functions using expression syntax are no longer guaranteed to be destroyed in
 reverse construction order in that ABI.
 </span><br>
-<span id="p0522">(12): Despite being the the resolution to a Defect Report, this
+<span id="p0522">(12): Despite being the resolution to a Defect Report, this
 feature is disabled by default in all language versions, and can be enabled
-explicitly with the flag <tt>-frelaxed-template-template-args</tt> in Clang 4.
+explicitly with the flag <tt>-frelaxed-template-template-args</tt> in Clang 4
+onwards.
 The change to the standard lacks a corresponding change for template partial
 ordering, resulting in ambiguity errors for reasonable and previously-valid
 code. This issue is expected to be rectified soon.
@@ -798,22 +799,22 @@
     <tr>
       <td>Default member initializers for bit-fields</td>
       <td><a href="http://wg21.link/p0683r1">P0683R1</a></td>
-      <td class="svn" align="center">SVN</td>
+      <td class="svn" align="center">Clang 6</td>
     </tr>
     <tr>
       <td><tt>const&amp;</tt>-qualified pointers to members</td>
       <td><a href="http://wg21.link/p0704r1">P0704R1</a></td>
-      <td class="svn" align="center">SVN</td>
+      <td class="svn" align="center">Clang 6</td>
     </tr>
     <tr>
       <td>Allow <i>lambda-capture</i> <tt>[=, this]</tt></td>
       <td><a href="http://wg21.link/p0409r2">P0409R2</a></td>
-      <td class="svn" align="center">SVN</td>
+      <td class="svn" align="center">Clang 6</td>
     </tr>
     <tr>
       <td><tt>__VA_OPT__</tt> for preprocessor comma elision</td>
       <td><a href="http://wg21.link/p0306r4">P0306R4</a></td>
-      <td class="none" align="center">No</td>
+      <td class="svn" align="center">Clang 6</td>
     </tr>
     <tr>
       <td>Designated initializers</td>
@@ -828,11 +829,60 @@
     <tr>
       <td>Initializer list constructors in class template argument deduction</td>
       <td><a href="http://wg21.link/p0702r1">P0702R1</a></td>
-      <td class="svn" align="center">SVN <a href="#p0702">(13)</a></td>
+      <td class="svn" align="center">Clang 6 <a href="#p0702">(13)</a></td>
     </tr>
-    <tr id="p0374">
-      <td>Concepts</td>
+    <tr id="p0734">
+      <td rowspan="2">Concepts</td>
       <td><a href="http://wg21.link/p0734r0">P0734R0</a></td>
+      <td rowspan="2" class="none" align="center">No</td>
+    </tr>
+      <tr> <!-- from Albuquerque -->
+        <td><a href="http://wg21.link/p0857r0">P0857R0</a></td>
+      </tr>
+    <!-- Albuquerque papers -->
+    <tr>
+      <td>Range-based for statements with initializer</td>
+      <td><a href="http://wg21.link/p0614r1">P0614R1</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td>Simplifying implicit lambda capture</td>
+      <td><a href="http://wg21.link/p0588r1">P0588R1</a></td>
+      <td class="none" align="center">No <a href="#p0588">(14)</a></td>
+    </tr>
+    <tr>
+      <td>ADL and function templates that are not visible</td>
+      <td><a href="http://wg21.link/p0846r0">P0846R0</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td><tt>const</tt> mismatch with defaulted copy constructor</td>
+      <td><a href="http://wg21.link/p0641r2">P0641R2</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td>Less eager instantiation of <tt>constexpr</tt> functions</td>
+      <td><a href="http://wg21.link/p0859r0">P0859R0</a></td>
+      <td class="none" align="center">No <a href="#p0859">(15)</a></td>
+    </tr>
+    <tr>
+      <td>Consistent comparison (<tt>operator&lt;=&gt;</tt>)</td>
+      <td><a href="http://wg21.link/p0515r3">P0515R3</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td>Access checking on specializations</td>
+      <td><a href="http://wg21.link/p0692r1">P0692R1</a></td>
+      <td class="partial" align="center">Partial</td>
+    </tr>
+    <tr>
+      <td>Default constructible and assignable stateless lambdas</td>
+      <td><a href="http://wg21.link/p0624r2">P0624R2</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td>Lambdas in unevaluated contexts</td>
+      <td><a href="http://wg21.link/p0315r4">P0315R4</a></td>
       <td class="none" align="center">No</td>
     </tr>
 </table>
@@ -841,6 +891,12 @@
 <p>
 <span id="p0702">(13): This is the resolution to a Defect Report, so is applied
 to all language versions supporting class template argument deduction.
+</span><br>
+<span id="p0588">(14): This is the resolution to a Defect Report, so will be applied
+to all language versions supporting lamba expressions.
+</span><br>
+<span id="p0859">(15): This is the resolution to a Defect Report, so will be applied
+to all language versions supporting <tt>constexpr</tt>.
 </span>
 </p>
 
@@ -900,7 +956,7 @@
       <td>[TS] Concepts</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0121r0.pdf">P0121R0</a></td>
       <td></td>
-      <td class="na" align="center">Superseded by <a href="#p0374">P0734R0</a></td>
+      <td class="na" align="center">Superseded by <a href="#p0734">P0734R0</a></td>
     </tr>
     <tr>
       <td>[DRAFT TS] Coroutines</td>
diff --git a/www/index.html b/www/index.html
index 63ecc38..3bc7020 100644
--- a/www/index.html
+++ b/www/index.html
@@ -4,7 +4,7 @@
 <html>
 <head>
   <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-  <title>"clang" C Language Family Frontend for LLVM</title>
+  <title>Clang C Language Family Frontend for LLVM</title>
   <link type="text/css" rel="stylesheet" href="menu.css">
   <link type="text/css" rel="stylesheet" href="content.css">
 </head>
@@ -12,13 +12,15 @@
 <!--#include virtual="menu.html.incl"-->
 <div id="content">
   <!--*********************************************************************-->
-  <h1>clang: a C language family frontend for LLVM</h1>
+  <h1>Clang: a C language family frontend for LLVM</h1>
   <!--*********************************************************************-->
 
-  <p>The goal of the Clang project is to create a new C based language
-  front-end: C, C++, Objective C/C++, OpenCL C and others for the
-  <a href="http://www.llvm.org/">LLVM</a> compiler.  You can
-  <a href="get_started.html">get and build</a> the source  today.</p>
+  <p>The Clang project provides a language front-end and tooling infrastructure
+  for languages in the C language family (C, C++, Objective C/C++, OpenCL,
+  CUDA, and RenderScript) for the <a href="http://www.llvm.org/">LLVM</a>
+  project. Both a GCC-compatible compiler driver (<tt>clang</tt>) and an
+  MSVC-compatible compiler driver (<tt>clang-cl.exe</tt>) are provided. You
+  can <a href="get_started.html">get and build</a> the source today.</p>
 
   <!--=====================================================================-->
   <h2 id="goals">Features and Goals</h2>
@@ -83,7 +85,7 @@
   </ul>
 
   <p>For a more detailed comparison between Clang and other compilers, please
-     see the <a href="comparison.html">clang comparison page</a>.</p>
+     see the <a href="comparison.html">Clang comparison page</a>.</p>
 
   <!--=====================================================================-->
   <h2>Current Status</h2>
@@ -108,7 +110,7 @@
      it will "resonate" with you. :)</p>
 
   <p>Once you've done that, please consider <a href="get_involved.html">getting
-     involved in the clang community</a>.  The Clang developers include numerous
+     involved in the Clang community</a>.  The Clang developers include numerous
      volunteer contributors with a variety of backgrounds.  If you're
      interested in
      following the development of Clang, signing up for a mailing list is a good
diff --git a/www/make_cxx_dr_status b/www/make_cxx_dr_status
index 5fe7b9f..164ae99 100755
--- a/www/make_cxx_dr_status
+++ b/www/make_cxx_dr_status
@@ -108,9 +108,12 @@
   if status == 'unknown':
     avail = 'Unknown'
     avail_style = ' class="none"'
-  elif status == '6':
+  elif status == '7':
     avail = 'SVN'
     avail_style = ' class="svn"'
+  elif status == '6':
+    avail = 'Clang 6'
+    avail_style = ' class="svn"'
   elif re.match('^[0-9]+\.?[0-9]*', status):
     avail = 'Clang %s' % status
     avail_style = ' class="full"'
diff --git a/www/related.html b/www/related.html
index 0772797..ee4ccd5 100644
--- a/www/related.html
+++ b/www/related.html
@@ -12,7 +12,7 @@
   <!--#include virtual="menu.html.incl"-->
   <div id="content">
     <h1>Clang Related Projects</h1>
-    
+
     <p>As Clang matures, more and more projects are being built atop the Clang
       libraries and other open source projects are starting their own Clang
       related subprojects, like building their source code with Clang or writing
@@ -39,8 +39,8 @@
       <dd>
         <p>
           <b>Site:</b>
-          <a href="http://code.google.com/p/chromium/wiki/Clang">
-            http://code.google.com/p/chromium/wiki/Clang</a>
+          <a href="https://chromium.googlesource.com/chromium/src/+/master/docs/clang.md">
+            https://chromium.googlesource.com/chromium/src/+/master/docs/clang.md</a>
         </p>
         <p>
           Notes on using Clang to build the Chromium web browser.
